Class: Twitterscraper::Tweet

Inherits:
Object
  • Object
show all
Defined in:
lib/twitterscraper/tweet.rb

Constant Summary collapse

KEYS =
[
    :screen_name,
    :name,
    :user_id,
    :profile_image_url,
    :tweet_id,
    :text,
    :links,
    :hashtags,
    :image_urls,
    :video_url,
    :has_media,
    :likes,
    :retweets,
    :replies,
    :is_replied,
    :is_reply_to,
    :parent_tweet_id,
    :reply_to_users,
    :tweet_url,
    :timestamp,
    :created_at,
]

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(attrs) ⇒ Tweet

Returns a new instance of Tweet.



30
31
32
33
34
# File 'lib/twitterscraper/tweet.rb', line 30

def initialize(attrs)
  attrs.each do |key, value|
    instance_variable_set("@#{key}", value)
  end
end

Class Method Details

.from_html(text) ⇒ Object

.js-stream-item

.js-stream-tweet{data: {screen-name:, tweet-id:}}
  .stream-item-header
  .js-tweet-text-container
  .stream-item-footer


60
61
62
63
# File 'lib/twitterscraper/tweet.rb', line 60

def from_html(text)
  html = Nokogiri::HTML(text)
  from_tweets_html(html.xpath("//li[@class[contains(., 'js-stream-item')]]/div[@class[contains(., 'js-stream-tweet')]]"))
end

.from_json(text) ⇒ Object



47
48
49
50
51
52
53
# File 'lib/twitterscraper/tweet.rb', line 47

def from_json(text)
  json = JSON.parse(text)
  json.map do |tweet|
    tweet['created_at'] = Time.parse(tweet['created_at'])
    new(tweet)
  end
end

.from_tweet_html(html) ⇒ Object



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/twitterscraper/tweet.rb', line 71

def from_tweet_html(html)
  screen_name = html.attr('data-screen-name')
  tweet_id = html.attr('data-tweet-id')&.to_i

  unless html.to_s.include?('js-tweet-text-container')
    Twitterscraper.logger.warn "html doesn't include div.js-tweet-text-container url=https://twitter.com/#{screen_name}/status/#{tweet_id}"
    return nil
  end

  inner_html = Nokogiri::HTML(html.inner_html)

  profile_image_url = inner_html.xpath("//img[@class[contains(., 'js-action-profile-avatar')]]").first.attr('src').gsub(/_bigger/, '')
  text = inner_html.xpath("//div[@class[contains(., 'js-tweet-text-container')]]/p[@class[contains(., 'js-tweet-text')]]").first.text
  links = inner_html.xpath("//a[@class[contains(., 'twitter-timeline-link')]]").map { |elem| elem.attr('data-expanded-url') }.select { |link| link && !link.include?('pic.twitter') }
  image_urls = inner_html.xpath("//div[@class[contains(., 'AdaptiveMedia-photoContainer')]]").map { |elem| elem.attr('data-image-url') }
  video_url = inner_html.xpath("//div[@class[contains(., 'PlayableMedia-container')]]/a").map { |elem| elem.attr('href') }[0]
  has_media = !image_urls.empty? || (video_url && !video_url.empty?)

  actions = inner_html.xpath("//div[@class[contains(., 'ProfileTweet-actionCountList')]]")
  likes = actions.xpath("//span[@class[contains(., 'ProfileTweet-action--favorite')]]/span[@class[contains(., 'ProfileTweet-actionCount')]]").first.attr('data-tweet-stat-count').to_i || 0
  retweets = actions.xpath("//span[@class[contains(., 'ProfileTweet-action--retweet')]]/span[@class[contains(., 'ProfileTweet-actionCount')]]").first.attr('data-tweet-stat-count').to_i || 0
  replies = actions.xpath("//span[@class[contains(., 'ProfileTweet-action--reply u-hiddenVisually')]]/span[@class[contains(., 'ProfileTweet-actionCount')]]").first.attr('data-tweet-stat-count').to_i || 0
  is_replied = replies != 0

  parent_tweet_id = inner_html.xpath('//*[@data-conversation-id]').first.attr('data-conversation-id').to_i
  if tweet_id == parent_tweet_id
    is_reply_to = false
    parent_tweet_id = nil
    reply_to_users = []
  else
    is_reply_to = true
    reply_to_users = inner_html.xpath("//div[@class[contains(., 'ReplyingToContextBelowAuthor')]]/a").map { |user| {screen_name: user.text.delete_prefix('@'), user_id: user.attr('data-user-id')} }
  end

  timestamp = inner_html.xpath("//span[@class[contains(., 'js-short-timestamp')]]").first.attr('data-time').to_i
  new(
      screen_name: screen_name,
      name: html.attr('data-name'),
      user_id: html.attr('data-user-id').to_i,
      profile_image_url: profile_image_url,
      tweet_id: tweet_id,
      text: text,
      links: links,
      hashtags: text.scan(/#\w+/).map { |tag| tag.delete_prefix('#') },
      image_urls: image_urls,
      video_url: video_url,
      has_media: has_media,
      likes: likes,
      retweets: retweets,
      replies: replies,
      is_replied: is_replied,
      is_reply_to: is_reply_to,
      parent_tweet_id: parent_tweet_id,
      reply_to_users: reply_to_users,
      tweet_url: 'https://twitter.com' + html.attr('data-permalink-path'),
      timestamp: timestamp,
      created_at: Time.at(timestamp, in: '+00:00'),
  )
end

.from_tweets_html(html) ⇒ Object



65
66
67
68
69
# File 'lib/twitterscraper/tweet.rb', line 65

def from_tweets_html(html)
  html.map do |tweet|
    from_tweet_html(tweet)
  end.compact
end

Instance Method Details

#attrsObject



36
37
38
39
40
# File 'lib/twitterscraper/tweet.rb', line 36

def attrs
  KEYS.map do |key|
    [key, send(key)]
  end.to_h
end

#to_json(options = {}) ⇒ Object



42
43
44
# File 'lib/twitterscraper/tweet.rb', line 42

def to_json(options = {})
  attrs.to_json
end