Class: ChanCrawlerGem::Collector

Inherits:
Object
  • Object
show all
Defined in:
lib/chanCrawlerGem.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(boards, key_words) ⇒ Collector

Returns a new instance of Collector.



19
20
21
22
23
24
# File 'lib/chanCrawlerGem.rb', line 19

def initialize(boards, key_words)
  @relevant_links = []
  @boards = boards
  @@base_url = ENV['BASE_URL']
  @key_words = key_words
end

Instance Attribute Details

#boardsObject (readonly)

Returns the value of attribute boards.



17
18
19
# File 'lib/chanCrawlerGem.rb', line 17

def boards
  @boards
end

#key_wordsObject (readonly)

Returns the value of attribute key_words.



17
18
19
# File 'lib/chanCrawlerGem.rb', line 17

def key_words
  @key_words
end

Returns the value of attribute relevant_links.



17
18
19
# File 'lib/chanCrawlerGem.rb', line 17

def relevant_links
  @relevant_links
end

Instance Method Details

#analyze_threads(threads, board) ⇒ Object



45
46
47
48
49
50
51
52
# File 'lib/chanCrawlerGem.rb', line 45

def analyze_threads(threads, board)
  # puts 'Analyzing thread list'
  threads.each do |thread|
    if thread_relevant?(thread)
      relevant_links.push "#{@@base_url}#{board}/thread/#{thread['no']}"
    end
  end
end

#board_catalog_urlsObject



26
27
28
29
30
31
# File 'lib/chanCrawlerGem.rb', line 26

def board_catalog_urls
  # puts 'Getting catalogs'
  catalogs = {}
  boards.each { |board| catalogs[board] = "http://a.4cdn.org/#{board}/catalog.json" }
  catalogs
end

#get_relevant_threadsObject



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/chanCrawlerGem.rb', line 54

def get_relevant_threads
  # puts 'Retrieving relevant threads'
  catalogs = board_catalog_urls
  catalogs.each do |board, catalog|
    catalog_content = JSON.parse(HTTParty.get(catalog).body)
    next if catalog_content.count < 1

    catalog_content.each do |page|
      next unless page['threads'].count.positive?

      analyze_threads(page['threads'], board)
    end
  end
  # puts 'Relevant threads retrieved'
end

#thread_relevant?(thread) ⇒ Boolean

Returns:

  • (Boolean)


33
34
35
36
37
38
39
40
41
42
43
# File 'lib/chanCrawlerGem.rb', line 33

def thread_relevant?(thread)
  return false if thread['com'].nil?

  # puts "Checking thread relevancy for #{thread['com']}"
  @key_words.each do |word|
    return false unless thread['com']
                        .downcase
                        .include?(word.downcase) && thread['images']
                        .positive?
  end
end