Class: Mass::ProfileRPA

Inherits:
Profile
  • Object
show all
Defined in:
lib/first-line/profile_rpa.rb

Constant Summary collapse

@@buffer_driver =
nil

Constants inherited from Profile

Mass::Profile::LOCKFILE_PATH, Mass::Profile::LOCK_TIMEOUT

Instance Attribute Summary collapse

Attributes inherited from Profile

#type

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Profile

#acquire_lock, #child_class_instance, #class_name_from_profile_type, #connectioncheck, #download_image, #download_image_0, #inboxcheck, object_name, #release_lock, #running?, #upload_to_dropbox_with_lock

Constructor Details

#initialize(desc) ⇒ ProfileRPA

Returns a new instance of ProfileRPA.



16
17
18
19
# File 'lib/first-line/profile_rpa.rb', line 16

def initialize(desc)
    super(desc)
    headless = true
end

Instance Attribute Details

#headlessObject

Returns the value of attribute headless.



5
6
7
# File 'lib/first-line/profile_rpa.rb', line 5

def headless
  @headless
end

Class Method Details

.buffer_driverObject

SELENIUM FUNCTIONS



8
9
10
# File 'lib/first-line/profile_rpa.rb', line 8

def self.buffer_driver
    @@buffer_driver
end

.buffer_driver=(o) ⇒ Object



12
13
14
# File 'lib/first-line/profile_rpa.rb', line 12

def self.buffer_driver=(o)
    @@buffer_driver = o
end

.clear_driverObject

stop the adspower server for headless mode, if it is not already stopped and if headless is activated.



105
106
107
# File 'lib/first-line/profile_rpa.rb', line 105

def self.clear_driver
    self.buffer_driver = nil
end

.domainObject



21
22
23
# File 'lib/first-line/profile_rpa.rb', line 21

def self.domain
    raise "Method domain of ProfileRPA must be implemented on each child class."
end

.start_headless_server(headless: true, logger: nil) ⇒ Object

start the adspower server for headless mode, if it is not already started and if headless is activated.



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/first-line/profile_rpa.rb', line 76

def self.start_headless_server(headless: true, logger:nil)
    l = logger || BlackStack::DummyLogger.new(nil)
    # check if the rpa.rb process is running manually
    l.logs "Starting AdsPower headless server?... "
    unless headless
        l.logf "no".yellow + " (headless is disabled in the command line)"
    else
        l.logf "yes".green + " (headless is enabled)"

        # creating an AdsPower client
        l.logs "Creating AdsPower client... "
        c = AdsPowerClient.new(key: ADSPOWER_API_KEY)
        l.logf 'done'.green

        # starting adspower server
        l.logs "Is headless server already running?... "
        if c.online?
            l.logf "yes".green
        else
            l.logf "no".yellow
            
            l.logs "Starting AdsPower server... "
            c.server_start 
            l.logf 'done'.green
        end
    end # if parser.value('manual')
end

Instance Method Details

#automatic_downloads(status:, domain:, secure: true, www: true) ⇒ Object

Allow or Block a site to perform multiple downloads. Parameters:

  • status: :allow or :block

  • domain: the domain of the site to allow or block.

  • secure: true or false

  • www: true or false

Reference:



153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# File 'lib/first-line/profile_rpa.rb', line 153

def automatic_downloads(
    status:, 
    domain:,
    secure:true,
    www:true
) 
    err = []
    err << "The status must be :allow or :block." if ![:allow, :block].include?(status)
    err << "#{domain.to_s} is not a valid domain" if !domain.to_s.strip.downcase.valid_domain?
    raise err.join("\n") if err.size > 0

    site_url = secure ? "https://" : "http://"
    site_url += www ? "www." : ""
    site_url += domain.to_s
    site_url += secure ? ":443" : ""

    url = "chrome://settings/content/siteDetails?site=#{CGI.escape(site_url)}"
    self.driver.get(url)
    temp = self.driver.execute_script("return document.querySelector('settings-ui').shadowRoot.querySelector('settings-main#main').shadowRoot.querySelector('settings-basic-page')")
    rules = self.driver.execute_script("return arguments[0].shadowRoot.querySelector('settings-privacy-page').shadowRoot.querySelector('settings-animated-pages#pages settings-subpage site-details').shadowRoot.querySelector('div.list-frame:not(div>div.list-frame)')", temp)
    automaticDownloads = self.driver.execute_script("return arguments[0].querySelector('site-details-permission[label=\"Automatic downloads\"]').shadowRoot.querySelector('#permission')", rules)
    
    self.driver.execute_script("arguments[0].scrollIntoView()", automaticDownloads)
    automaticDownloads.click()
    automaticDownloads.find_element(:id => status.to_s).click()
end

#close_popups(logger: nil) ⇒ Object

for internal use only. users should call ‘send_request` instead.



71
72
73
# File 'lib/first-line/profile_rpa.rb', line 71

def close_popups(logger:nil)
    raise "Method close_popups of ProfileRPA must be implemented on each child class."
end

#driverObject

return a selnium driver for this profile



295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
# File 'lib/first-line/profile_rpa.rb', line 295

def driver()
    class_name = self.class.name.gsub('Mass::', '')
    allow_intercept_js = self.type.desc['allow_intercept_js']

    #l = BlackStack::DummyLogger.new(nil) if l.nil?

    c = AdsPowerClient.new(key: ADSPOWER_API_KEY)

    self.class.buffer_driver = nil if !c.check(self.desc['ads_power_id'])
    sleep(1)
    
    if self.class.buffer_driver.nil?
        self.class.buffer_driver = c.driver(self.desc['ads_power_id'], headless)
        self.class.buffer_driver.manage.window.resize_to(self.desc['browser_width'], self.desc['browser_height'])
        self.one_tab

        #if self.desc['allow_browser_to_download_multiple_files']
        #    self.automatic_downloads(status: :allow, domain: self.domain, secure: true, www: true)
        #else
        #    self.automatic_downloads(status: :block, domain: self.domain, secure: true, www: true)
        #end

        # Get source code of intercept.js library
        #l.logs "Getting source code of intercept.js library... "
        uri = URI.parse('https://raw.githubusercontent.com/leandrosardi/intercept/main/lib/intercept.js')
        js1 = Net::HTTP.get(uri)
        #l.logf "done".green
        

        # call ls command to get array of files in the folder
        # iterate all the *.js files inside the folder $RUBYLIB/extensions/mass.subaccount/js
        js2 = ''
        filenames = `ls #{Mass.js_path}/*.js`.split("\n")
        filenames.each { |filename|
            # Get the source code of the scraper
            js2 += `cat #{filename}`
            js2 += "\n\n"
        }

        #l.logf "done".green + " (#{filename.blue})"

        # Initializing the interceptor
        js3 = ""
=begin
        js3 = "
            $$.init({
            parse: function(xhr) {
                #{class_name}.scrape(xhr);
            }
            });
        "
=end            
        # Execute the scraper
        # Inject the library into the page
        #
        # Selenium: How to Inject/execute a Javascript in to a Page before loading/executing any other scripts of the page?
        #
        # Reference:
        # - https://stackoverflow.com/questions/31354352/selenium-how-to-inject-execute-a-javascript-in-to-a-page-before-loading-executi
        # 
        # Documentation:
        # - https://www.selenium.dev/documentation/webdriver/bidirectional/chrome_devtools/cdp_endpoint/
        #l.logs "Injecting the library into the page... "

        # setup a profile_type to work or not with intercept.js
        # References:
        # - https://github.com/VyMECO/mass.public/issues/6
        # - https://github.com/VyMECO/mass.public/issues/13
        #
        if allow_intercept_js
            self.class.buffer_driver.execute_cdp("Page.addScriptToEvaluateOnNewDocument", source: js1+js2+js3)
        end
        #l.logf "done".green 
    end

    # return
    return self.class.buffer_driver
end

#one_tabObject

close all tabs except one do this only if there are more than one tab do this in order to avoid the browser show up if it is not necesasry.



112
113
114
115
116
117
118
119
120
121
122
# File 'lib/first-line/profile_rpa.rb', line 112

def one_tab
    handles = self.class.buffer_driver.window_handles
    if handles.size > 1
        chosen = handles.pop
        handles.each { |handle|
            self.class.buffer_driver.switch_to.window(handle)
            self.class.buffer_driver.close # don't use quit, because you don't want to kill the chromedriver
        }
        self.class.buffer_driver.switch_to.window(chosen)
    end
end

#resolve_dual_url(lead_descriptor, field_1:, field_2:, logger: nil) ⇒ Object

resolving the redirection from facebook_2 to facebook, or linkedin_2 to more infromation here: github.com/VyMECO/mass.public/issues/93

Parameters:

  • lead_descriptor: hash descriptor of a lead

  • field_1: field for storing the URL type 1 that must be realized.

  • field_2: field for storing the URL type 2 that redirets to the URL type .

  • logger: object to write log. Optional. Default: nil.



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/first-line/profile_rpa.rb', line 34

def resolve_dual_url(lead_descriptor, 
    field_1:,
    field_2:,
    logger: nil
) 
    l = logger || BlackStack::DummyLogger.new(nil)
    k = lead_descriptor
  
    l.logs "Resolving dual-url redirection #{k[field_2].to_s.blue}... "
    if !k[field_2].nil? && k[field_1].nil?
        h_filters = {}
        h_filters[field_2.to_s] = k[field_2]
        lead = Mass::Lead.page(
            id_account: lead_descriptor['id_account'],
            page: 1,
            limit: 1,
            filters: h_filters
        ).first
        if lead
            l.logf 'done'.green + " (field_2 already registered in the database.)"
        else
            driver.get(k[field_2])
            sleep(5)
            k[field_1] = driver.current_url
            #ret.select { |k2| k2['lead'][field_2] ==  }
            l.logf 'done'.green + " (#{k[field_1].to_s.blue})"
        end
    else
        if k[field_2].nil?
            l.logf 'skip'.yellow + " (no field_2)"
        else
            l.logf 'skip'.yellow + " (no dual-url redirection needed)"
        end
    end
end

#screenshot(dropbox_folder = nil) ⇒ Object

take screenshot and upload it to dropbox return the URL of the screenshot



252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
# File 'lib/first-line/profile_rpa.rb', line 252

def screenshot(dropbox_folder=nil)
    raise "Either dropbox_folder parameter or self.desc['id_account'] are required." if dropbox_folder.nil? && self.desc['id_account'].nil?
    dropbox_folder = self.desc['id_account'] if dropbox_folder.nil?
    # parameters
    id = SecureRandom.uuid
    filename = "#{id}.png"
    tmp_path = "/tmp/#{filename}"
    # take screenshot using selenium driver and save it into the /tmp folder
    self.driver.save_screenshot(tmp_path)
    # code
    year = Time.now.year.to_s.rjust(4,'0')
    month = Time.now.month.to_s.rjust(2,'0')
    folder = "/massprospecting.rpa/#{dropbox_folder}.#{year}.#{month}"
    path = "#{folder}/#{filename}"
    BlackStack::DropBox.dropbox_create_folder(folder)
    upload_to_dropbox_with_lock(tmp_path, path)
    File.delete(tmp_path)
    BlackStack::DropBox.get_file_url(path).gsub('&dl=1', '&dl=0')
end

#snapshot(dropbox_folder = nil) ⇒ Object

create a file in the cloud with the HTML of the current page return the URL of the file



274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
# File 'lib/first-line/profile_rpa.rb', line 274

def snapshot(dropbox_folder=nil)
    raise "Either dropbox_folder parameter or self.desc['id_account'] are required." if dropbox_folder.nil? && self.desc['id_account'].nil?
    dropbox_folder = self.desc['id_account'] if dropbox_folder.nil?
    # parameters
    id = SecureRandom.uuid
    filename = "#{id}.html"
    tmp_path = "/tmp/#{filename}"
    # take screenshot using selenium driver and save it into the /tmp folder
    File.write(tmp_path, self.driver.page_source)
    # code
    year = Time.now.year.to_s.rjust(4,'0')
    month = Time.now.month.to_s.rjust(2,'0')
    folder = "/massprospecting.bots/#{dropbox_folder}.#{year}.#{month}"
    path = "#{folder}/#{filename}"
    BlackStack::DropBox.dropbox_create_folder(folder)
    upload_to_dropbox_with_lock(tmp_path, path)
    File.delete(tmp_path)
    BlackStack::DropBox.get_file_url(path).gsub('&dl=1', '&dl=0')
end

#wait_for_dropbox_url(path, max_wait: 5, interval: 2) ⇒ String

Retrieves the URL of a file stored in Dropbox with a timeout mechanism. This function continuously attempts to retrieve the file URL until it becomes available in Dropbox, up to a specified maximum wait time.

Examples:

wait_for_dropbox_url('/path/to/file', max_wait: 60, interval: 3)
# => "https://www.dropbox.com/s/yourfile?dl=0"

Parameters:

  • path (String)

    The path to the file in Dropbox.

  • max_wait (Integer) (defaults to: 5)

    Maximum time to wait (in seconds) for the file to appear in Dropbox. Default is 30 seconds.

  • interval (Integer) (defaults to: 2)

    Time interval (in seconds) between each retry attempt. Default is 2 seconds.

Returns:

  • (String)

    The URL of the file in Dropbox, with the ‘&dl=1` parameter replaced by `&dl=0`.

  • nil If the file is not available within the specified maximum wait time, raises an error with a timeout message.



230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
# File 'lib/first-line/profile_rpa.rb', line 230

def wait_for_dropbox_url(path, max_wait: 5, interval: 2)
    start_time = Time.now
  
    loop do
        begin
            # Try to get the file URL
            return BlackStack::DropBox.get_file_url(path).gsub('&dl=1', '&dl=0')
        rescue => e
            # Check if the timeout has been exceeded
            if Time.now - start_time > max_wait
                #raise "Timeout exceeded while waiting for Dropbox file (#{path}): #{e.message}"
                return nil
            end
    
            # Wait for a short interval before retrying
            sleep(interval)
        end
    end
end

#wait_for_file(file_paths, timeout = 30) ⇒ String

Waits for the presence of a file in specified file paths within a given timeout period. This function checks each file path in the provided array and returns the path of the first file it finds to exist. It raises an exception if none of the files are found within the specified timeout.

Examples:

wait_for_file(['/path/to/file1', '/path/to/file2'], timeout: 60)
# => "/path/to/file1" (if the file appears within 60 seconds)

Parameters:

  • file_paths (Array<String>)

    An array of file paths to check for the file’s existence.

  • timeout (Integer) (defaults to: 30)

    The maximum number of seconds to wait for the file to appear. Default is 30 seconds.

Returns:

  • (String)

    The path of the first file found in ‘file_paths`.

Raises:

  • (RuntimeError)

    If no file is found within the specified timeout, raises an error with a message listing the paths that were checked and the timeout duration.



197
198
199
200
201
202
203
204
205
206
207
208
# File 'lib/first-line/profile_rpa.rb', line 197

def wait_for_file(file_paths, timeout = 30)
    Timeout.timeout(timeout) do
      loop do
        file_paths.each do |path|
          return path if File.exist?(path)
        end
        sleep(1)
      end
    end
rescue Timeout::Error
    raise "Downloaded file not found in paths #{file_paths.join(',')} after #{timeout} seconds."
end