Module: GoldTweets::Client

Defined in:
lib/goldtweets/client.rb

Defined Under Namespace

Classes: Response

Constant Summary collapse

USER_AGENTS =

User agents to present to Twitter search

[ 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:63.0) Gecko/20100101 Firefox/63.0',
  'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:62.0) Gecko/20100101 Firefox/62.0',
  'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:61.0) Gecko/20100101 Firefox/61.0',
  'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0',
  'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
  'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
  'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',
  'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Safari/605.1.15'
].freeze
DEFAULT_HEADERS =

Static list of headers to be sent with API requests

{ 'Host' => 'twitter.com',
  'Accept' => 'application/json, text/javascript, */*; q=0.01',
  'Accept-Language' => 'en-US,en;q=0.5',
  'X-Requested-With' => 'XMLHttpRequest',
  'Connection' => 'keep-alive'
}.freeze
USERNAMES_PER_BATCH =

How many usernames to put in a single search

20
SEARCH_PREFIX =

URLs for searching and generating permalinks back to tweets

'https://twitter.com/i/search/timeline?'
'https://twitter.com'
DEFAULT_PARAMETERS =

Static list of parameters sent with a search

{ 'vertical' => 'news',
  'src' => 'typd',
  'include_available_features' => '1',
  'include_entities' => '1',
  'reset_error_state' => 'false'
}.freeze
TWEETS_SELECTOR =

XPath selectors

"//div[contains(concat(' ', normalize-space(@class), ' '), ' js-stream-tweet ') and not(contains(concat(' ', normalize-space(@class), ' '), ' withheld-tweet '))]"
USERNAMES_SELECTOR =
".//span[contains(concat(' ', normalize-space(@class), ' '), ' username ') and contains(concat(' ', normalize-space(@class), ' '), ' u-dir ')]/b"
AUTHORID_SELECTOR =
".//a[contains(concat(' ', normalize-space(@class), ' '), ' js-user-profile-link ')]"
CONTENT_SELECTOR =
".//p[contains(concat(' ', normalize-space(@class), ' '), ' js-tweet-text ')]"
RETWEETS_SELECTOR =
".//span[contains(concat(' ', normalize-space(@class), ' '), ' ProfileTweet-action--retweet ')]/span[contains(concat(' ', normalize-space(@class), ' '), ' ProfileTweet-actionCount ')]"
FAVORITES_SELECTOR =
".//span[contains(concat(' ', normalize-space(@class), ' '), ' ProfileTweet-action--favorite ')]/span[contains(concat(' ', normalize-space(@class), ' '), ' ProfileTweet-actionCount ')]"
REPLIES_SELECTOR =
".//span[contains(concat(' ', normalize-space(@class), ' '), ' ProfileTweet-action--reply ')]/span[contains(concat(' ', normalize-space(@class), ' '), ' ProfileTweet-actionCount ')]"
TIMESTAMP_SELECTOR =
".//small[contains(concat(' ', normalize-space(@class), ' '), ' time ')]//span[contains(concat(' ', normalize-space(@class), ' '), ' js-short-timestamp ')]"
GEO_SELECTOR =
".//span[contains(concat(' ', normalize-space(@class), ' '), ' Tweet-geo ')]"
".//a"

Class Method Summary collapse

Class Method Details

.get_tweets(criteria) ⇒ Object

Fetch tweets based on a GoldTweets::Search object This functionality is presently lacking several features of the original python library - proxy support, emoji handling, and allowing a provided block to be run on tweets as they are processed among them.



64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/goldtweets/client.rb', line 64

def self.get_tweets(criteria)
  user_agent = USER_AGENTS.sample
  cookie_jar = ''
  usernames  = usernames_for(criteria.usernames)
  batches    = usernames.each_slice(USERNAMES_PER_BATCH).to_a

  batches.map do |batch|
    refresh_cursor      = ''
    batch_results_count = 0
    collected_tweets    = []

    criteria.usernames = batch
    loop do
      response       = fetch_tweets(criteria, refresh_cursor, cookie_jar, user_agent)
      cookie_jar     = response.new_cookies if response.new_cookies
      refresh_cursor = response.new_cursor

      tweets   = response.body.xpath(TWEETS_SELECTOR).reduce([], &method(:parse_tweet))
      collected_tweets << tweets
      batch_results_count += tweets.length

      if (criteria.maximum_tweets.to_i > 0 && batch_results_count >= criteria.maximum_tweets) || (!response.more_items)
        break
      end
    end

    collected_tweets.flatten
  end.flatten
end