Module: Twitterscraper::Proxy

Defined in:
lib/twitterscraper/proxy.rb

Defined Under Namespace

Classes: Pool, RetryExhausted

Constant Summary collapse

PROXY_URL =
'https://free-proxy-list.net/'

Class Method Summary collapse

Class Method Details

.get_proxies(retries = 3) ⇒ Object



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/twitterscraper/proxy.rb', line 41

def get_proxies(retries = 3)
  response = Twitterscraper::Http.get(PROXY_URL)
  html = Nokogiri::HTML(response)
  table = html.xpath('//table[@id="proxylisttable"]').first

  proxies = []

  table.xpath('tbody/tr').each do |tr|
    cells = tr.xpath('td')
    ip, port, anonymity, https = [0, 1, 4, 6].map { |i| cells[i].text.strip }
    next unless ['elite proxy', 'anonymous'].include?(anonymity)
    next if https == 'no'
    proxies << ip + ':' + port
  end

  proxies.shuffle
rescue => e
  if (retries -= 1) > 0
    retry
  else
    raise RetryExhausted.new(e.inspect)
  end
end