Class: TildeScraper::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/tilde_scraper/scraper.rb

Class Method Summary collapse

Class Method Details

.scrape_comments(url) ⇒ Object



53
54
55
56
57
58
# File 'lib/tilde_scraper/scraper.rb', line 53

def self.scrape_comments(url)
  doc = open_url(url)
  comments = doc.css("#comments")
  array = scrape_children(comments, url)
  array
end

.scrape_groups(url) ⇒ Object



41
42
43
44
45
46
47
48
49
50
51
# File 'lib/tilde_scraper/scraper.rb', line 41

def self.scrape_groups(url)
  doc = open_url(url)
  out = doc.css("tr.group-level-0").map do |group|
    {
      name: group.css("a").text,
      description: group.css("p").text,
      subs: group.css("span.group-subscription-count").text.split(" ").first
    }
  end
  out
end

.scrape_page(url) ⇒ Object

Returns an array with two elements. the first a hash containing general page info the secound an array of hashes containing topic info



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/tilde_scraper/scraper.rb', line 5

def self.scrape_page(url)
  doc = open_url(url)
  output_array = []
  output_array << {
    url: url,
  }
  page_buttons = doc.css("a.page-item").each do |button|
    button_name = button.text
    output_array[0]["#{button_name.downcase}_link".to_sym] = button.attribute("href").value
  end

  topics = doc.css("article.topic")
  output_array << topics.map do |topic|
    title = topic.css("h1.topic-title a")
     = topic.css("div.topic-metadata")
    info = {
      title: title.text,
      comment_count: topic.css("div.topic-info-comments").text.strip,
      comment_link: "https://tildes.net" + topic.css("div.topic-info-comments a").attribute("href").value.split(" ").first,
      group: .css("span.topic-group").text,
      word_count: .css("span.topic-content-metadata").text.split(" ")[0],
      age: topic.css("time.time-responsive").attribute("data-abbreviated").value,
      votes: topic.css("div.topic-voting span.topic-voting-votes").text
    }
    topic_text = topic.css(".topic-text-excerpt")
    topic_text = topic_text.children.reject { |el| el.name == "summary" }
    if topic_text.length > 0
      info[:topic_text] = topic_text.reduce("") { |s, el| s + el.text}.strip
    else
      info[:link] = title.attribute("href").value
    end
    info
  end
  output_array
end