Class: Mass::ProfileRPA

Inherits:
Profile
  • Object
show all
Defined in:
lib/first-line/profile_rpa.rb

Constant Summary collapse

@@buffer_driver =
nil

Constants inherited from Profile

Mass::Profile::LOCKFILE_PATH, Mass::Profile::LOCK_TIMEOUT

Instance Attribute Summary collapse

Attributes inherited from Profile

#type

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Profile

#acquire_lock, #child_class_instance, #class_name_from_profile_type, #connectioncheck, #create_s3_folder, #download_image, #download_image_0, #inboxcheck, object_name, #release_lock, #running?, #upload_file_to_s3

Constructor Details

#initialize(desc) ⇒ ProfileRPA

Returns a new instance of ProfileRPA.



16
17
18
19
20
# File 'lib/first-line/profile_rpa.rb', line 16

def initialize(desc)
    super(desc)
    headless = true
    read_timeout = 300 # 300 seconds - 5 minutes
end

Instance Attribute Details

#headlessObject

Returns the value of attribute headless.



5
6
7
# File 'lib/first-line/profile_rpa.rb', line 5

def headless
  @headless
end

#read_timeoutObject

Returns the value of attribute read_timeout.



5
6
7
# File 'lib/first-line/profile_rpa.rb', line 5

def read_timeout
  @read_timeout
end

Class Method Details

.buffer_driverObject

SELENIUM FUNCTIONS



8
9
10
# File 'lib/first-line/profile_rpa.rb', line 8

def self.buffer_driver
    @@buffer_driver
end

.buffer_driver=(o) ⇒ Object



12
13
14
# File 'lib/first-line/profile_rpa.rb', line 12

def self.buffer_driver=(o)
    @@buffer_driver = o
end

.clear_driverObject

stop the adspower server for headless mode, if it is not already stopped and if headless is activated.



106
107
108
# File 'lib/first-line/profile_rpa.rb', line 106

def self.clear_driver
    self.buffer_driver = nil
end

.domainObject



22
23
24
# File 'lib/first-line/profile_rpa.rb', line 22

def self.domain
    raise "Method domain of ProfileRPA must be implemented on each child class."
end

.start_headless_server(headless: true, logger: nil) ⇒ Object

start the adspower server for headless mode, if it is not already started and if headless is activated.



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/first-line/profile_rpa.rb', line 77

def self.start_headless_server(headless: true, logger:nil)
    l = logger || BlackStack::DummyLogger.new(nil)
    # check if the rpa.rb process is running manually
    l.logs "Starting AdsPower headless server?... "
    unless headless
        l.logf "no".yellow + " (headless is disabled in the command line)"
    else
        l.logf "yes".green + " (headless is enabled)"

        # creating an AdsPower client
        l.logs "Creating AdsPower client... "
        c = AdsPowerClient.new(key: ADSPOWER_API_KEY)
        l.logf 'done'.green

        # starting adspower server
        l.logs "Is headless server already running?... "
        if c.online?
            l.logf "yes".green
        else
            l.logf "no".yellow
            
            l.logs "Starting AdsPower server... "
            c.server_start 
            l.logf 'done'.green
        end
    end # if parser.value('manual')
end

Instance Method Details

#automatic_downloads(status:, domain:, secure: true, www: true) ⇒ Object

Allow or Block a site to perform multiple downloads. Parameters:

  • status: :allow or :block

  • domain: the domain of the site to allow or block.

  • secure: true or false

  • www: true or false

Reference:



154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/first-line/profile_rpa.rb', line 154

def automatic_downloads(
    status:, 
    domain:,
    secure:true,
    www:true
) 
    err = []
    err << "The status must be :allow or :block." if ![:allow, :block].include?(status)
    err << "#{domain.to_s} is not a valid domain" if !domain.to_s.strip.downcase.valid_domain?
    raise err.join("\n") if err.size > 0

    site_url = secure ? "https://" : "http://"
    site_url += www ? "www." : ""
    site_url += domain.to_s
    site_url += secure ? ":443" : ""

    url = "chrome://settings/content/siteDetails?site=#{CGI.escape(site_url)}"
    self.driver.get(url)
    temp = self.driver.execute_script("return document.querySelector('settings-ui').shadowRoot.querySelector('settings-main#main').shadowRoot.querySelector('settings-basic-page')")
    rules = self.driver.execute_script("return arguments[0].shadowRoot.querySelector('settings-privacy-page').shadowRoot.querySelector('settings-animated-pages#pages settings-subpage site-details').shadowRoot.querySelector('div.list-frame:not(div>div.list-frame)')", temp)
    automaticDownloads = self.driver.execute_script("return arguments[0].querySelector('site-details-permission[label=\"Automatic downloads\"]').shadowRoot.querySelector('#permission')", rules)
    
    self.driver.execute_script("arguments[0].scrollIntoView()", automaticDownloads)
    automaticDownloads.click()
    automaticDownloads.find_element(:id => status.to_s).click()
end

#close_popups(logger: nil) ⇒ Object

for internal use only. users should call ‘send_request` instead.



72
73
74
# File 'lib/first-line/profile_rpa.rb', line 72

def close_popups(logger:nil)
    raise "Method close_popups of ProfileRPA must be implemented on each child class."
end

#driverObject

return a selnium driver for this profile



256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
# File 'lib/first-line/profile_rpa.rb', line 256

def driver()
    class_name = self.class.name.gsub('Mass::', '')
    allow_intercept_js = self.type.desc['allow_intercept_js']

    #l = BlackStack::DummyLogger.new(nil) if l.nil?

    c = AdsPowerClient.new(key: ADSPOWER_API_KEY)

    self.class.buffer_driver = nil if !c.check(self.desc['ads_power_id'])
    sleep(1)
    
    if self.class.buffer_driver.nil?
        self.class.buffer_driver = c.driver2( self.desc['ads_power_id'], 
            headless: headless,
            read_timeout: read_timeout 
        )
        self.class.buffer_driver.manage.window.resize_to(self.desc['browser_width'], self.desc['browser_height'])
        self.one_tab

        #if self.desc['allow_browser_to_download_multiple_files']
        #    self.automatic_downloads(status: :allow, domain: self.domain, secure: true, www: true)
        #else
        #    self.automatic_downloads(status: :block, domain: self.domain, secure: true, www: true)
        #end

        # Get source code of intercept.js library
        #l.logs "Getting source code of intercept.js library... "
        uri = URI.parse('https://raw.githubusercontent.com/leandrosardi/intercept/main/lib/intercept.js')
        js1 = Net::HTTP.get(uri)
        #l.logf "done".green
        

        # call ls command to get array of files in the folder
        # iterate all the *.js files inside the folder $RUBYLIB/extensions/mass.subaccount/js
        js2 = ''
        filenames = `ls #{Mass.js_path}/*.js`.split("\n")
        filenames.each { |filename|
            # Get the source code of the scraper
            js2 += `cat #{filename}`
            js2 += "\n\n"
        }

        #l.logf "done".green + " (#{filename.blue})"

        # Initializing the interceptor
        js3 = ""
=begin
        js3 = "
            $$.init({
            parse: function(xhr) {
                #{class_name}.scrape(xhr);
            }
            });
        "
=end            
        # Execute the scraper
        # Inject the library into the page
        #
        # Selenium: How to Inject/execute a Javascript in to a Page before loading/executing any other scripts of the page?
        #
        # Reference:
        # - https://stackoverflow.com/questions/31354352/selenium-how-to-inject-execute-a-javascript-in-to-a-page-before-loading-executi
        # 
        # Documentation:
        # - https://www.selenium.dev/documentation/webdriver/bidirectional/chrome_devtools/cdp_endpoint/
        #l.logs "Injecting the library into the page... "

        # setup a profile_type to work or not with intercept.js
        # References:
        # - https://github.com/VyMECO/mass.public/issues/6
        # - https://github.com/VyMECO/mass.public/issues/13
        #
        if allow_intercept_js
            self.class.buffer_driver.execute_cdp("Page.addScriptToEvaluateOnNewDocument", source: js1+js2+js3)
        end
        #l.logf "done".green 
    end

    # return
    return self.class.buffer_driver
end

#one_tabObject

close all tabs except one do this only if there are more than one tab do this in order to avoid the browser show up if it is not necesasry.



113
114
115
116
117
118
119
120
121
122
123
# File 'lib/first-line/profile_rpa.rb', line 113

def one_tab
    handles = self.class.buffer_driver.window_handles
    if handles.size > 1
        chosen = handles.pop
        handles.each { |handle|
            self.class.buffer_driver.switch_to.window(handle)
            self.class.buffer_driver.close # don't use quit, because you don't want to kill the chromedriver
        }
        self.class.buffer_driver.switch_to.window(chosen)
    end
end

#resolve_dual_url(lead_descriptor, field_1:, field_2:, logger: nil) ⇒ Object

resolving the redirection from facebook_2 to facebook, or linkedin_2 to more infromation here: github.com/VyMECO/mass.public/issues/93

Parameters:

  • lead_descriptor: hash descriptor of a lead

  • field_1: field for storing the URL type 1 that must be realized.

  • field_2: field for storing the URL type 2 that redirets to the URL type .

  • logger: object to write log. Optional. Default: nil.



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/first-line/profile_rpa.rb', line 35

def resolve_dual_url(lead_descriptor, 
    field_1:,
    field_2:,
    logger: nil
) 
    l = logger || BlackStack::DummyLogger.new(nil)
    k = lead_descriptor
  
    l.logs "Resolving dual-url redirection #{k[field_2].to_s.blue}... "
    if !k[field_2].nil? && k[field_1].nil?
        h_filters = {}
        h_filters[field_2.to_s] = k[field_2]
        lead = Mass::Lead.page(
            id_account: lead_descriptor['id_account'],
            page: 1,
            limit: 1,
            filters: h_filters
        ).first
        if lead
            l.logf 'done'.green + " (field_2 already registered in the database.)"
        else
            driver.get(k[field_2])
            sleep(5)
            k[field_1] = driver.current_url
            #ret.select { |k2| k2['lead'][field_2] ==  }
            l.logf 'done'.green + " (#{k[field_1].to_s.blue})"
        end
    else
        if k[field_2].nil?
            l.logf 'skip'.yellow + " (no field_2)"
        else
            l.logf 'skip'.yellow + " (no dual-url redirection needed)"
        end
    end
end

#screenshot(dropbox_folder = nil) ⇒ Object

take screenshot and upload it to dropbox return the URL of the screenshot



213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
# File 'lib/first-line/profile_rpa.rb', line 213

def screenshot(dropbox_folder=nil)
    raise "Either dropbox_folder parameter or self.desc['id_account'] are required." if dropbox_folder.nil? && self.desc['id_account'].nil?
    dropbox_folder = self.desc['id_account'] if dropbox_folder.nil?
    # parameters
    id = SecureRandom.uuid
    filename = "#{id}.png"
    tmp_path = "/tmp/#{filename}"
    # take screenshot using selenium driver and save it into the /tmp folder
    self.driver.save_screenshot(tmp_path)
    # code
    year = Time.now.year.to_s.rjust(4,'0')
    month = Time.now.month.to_s.rjust(2,'0')
    folder = dropbox_folder #"/massprospecting.rpa/#{dropbox_folder}.#{year}.#{month}"
    path = "#{folder}/#{filename}"
    create_s3_folder(folder)
    ret = upload_file_to_s3(tmp_path, path)
    File.delete(tmp_path)
    ret
end

#snapshot(dropbox_folder = nil) ⇒ Object

create a file in the cloud with the HTML of the current page return the URL of the file



235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# File 'lib/first-line/profile_rpa.rb', line 235

def snapshot(dropbox_folder=nil)
    raise "Either dropbox_folder parameter or self.desc['id_account'] are required." if dropbox_folder.nil? && self.desc['id_account'].nil?
    dropbox_folder = self.desc['id_account'] if dropbox_folder.nil?
    # parameters
    id = SecureRandom.uuid
    filename = "#{id}.html"
    tmp_path = "/tmp/#{filename}"
    # take screenshot using selenium driver and save it into the /tmp folder
    File.write(tmp_path, self.driver.page_source)
    # code
    year = Time.now.year.to_s.rjust(4,'0')
    month = Time.now.month.to_s.rjust(2,'0')
    folder = dropbox_folder #"/massprospecting.bots/#{dropbox_folder}.#{year}.#{month}"
    path = "#{folder}/#{filename}"
    create_s3_folder(folder)
    ret = upload_file_to_s3(tmp_path, path)
    File.delete(tmp_path)
    ret
end

#wait_for_file(file_paths, timeout = 30) ⇒ String

Waits for the presence of a file in specified file paths within a given timeout period. This function checks each file path in the provided array and returns the path of the first file it finds to exist. It raises an exception if none of the files are found within the specified timeout.

Examples:

wait_for_file(['/path/to/file1', '/path/to/file2'], timeout: 60)
# => "/path/to/file1" (if the file appears within 60 seconds)

Parameters:

  • file_paths (Array<String>)

    An array of file paths to check for the file’s existence.

  • timeout (Integer) (defaults to: 30)

    The maximum number of seconds to wait for the file to appear. Default is 30 seconds.

Returns:

  • (String)

    The path of the first file found in ‘file_paths`.

Raises:

  • (RuntimeError)

    If no file is found within the specified timeout, raises an error with a message listing the paths that were checked and the timeout duration.



198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/first-line/profile_rpa.rb', line 198

def wait_for_file(file_paths, timeout = 30)
    Timeout.timeout(timeout) do
      loop do
        file_paths.each do |path|
          return path if File.exist?(path)
        end
        sleep(1)
      end
    end
rescue Timeout::Error
    raise "Downloaded file not found in paths #{file_paths.join(',')} after #{timeout} seconds."
end