Class: ChanCrawlerGem::Collector
- Inherits:
-
Object
- Object
- ChanCrawlerGem::Collector
- Defined in:
- lib/chanCrawlerGem.rb
Instance Attribute Summary collapse
-
#boards ⇒ Object
readonly
Returns the value of attribute boards.
-
#key_words ⇒ Object
readonly
Returns the value of attribute key_words.
-
#relevant_links ⇒ Object
readonly
Returns the value of attribute relevant_links.
Instance Method Summary collapse
- #analyze_threads(threads, board) ⇒ Object
- #board_catalog_urls ⇒ Object
- #get_relevant_threads ⇒ Object
-
#initialize(boards, key_words) ⇒ Collector
constructor
A new instance of Collector.
- #thread_relevant?(thread) ⇒ Boolean
Constructor Details
#initialize(boards, key_words) ⇒ Collector
Returns a new instance of Collector.
19 20 21 22 23 24 |
# File 'lib/chanCrawlerGem.rb', line 19 def initialize(boards, key_words) @relevant_links = [] @boards = boards @@base_url = ENV['BASE_URL'] @key_words = key_words end |
Instance Attribute Details
#boards ⇒ Object (readonly)
Returns the value of attribute boards.
17 18 19 |
# File 'lib/chanCrawlerGem.rb', line 17 def boards @boards end |
#key_words ⇒ Object (readonly)
Returns the value of attribute key_words.
17 18 19 |
# File 'lib/chanCrawlerGem.rb', line 17 def key_words @key_words end |
#relevant_links ⇒ Object (readonly)
Returns the value of attribute relevant_links.
17 18 19 |
# File 'lib/chanCrawlerGem.rb', line 17 def relevant_links @relevant_links end |
Instance Method Details
#analyze_threads(threads, board) ⇒ Object
45 46 47 48 49 50 51 52 |
# File 'lib/chanCrawlerGem.rb', line 45 def analyze_threads(threads, board) # puts 'Analyzing thread list' threads.each do |thread| if thread_relevant?(thread) relevant_links.push "#{@@base_url}#{board}/thread/#{thread['no']}" end end end |
#board_catalog_urls ⇒ Object
26 27 28 29 30 31 |
# File 'lib/chanCrawlerGem.rb', line 26 def board_catalog_urls # puts 'Getting catalogs' catalogs = {} boards.each { |board| catalogs[board] = "http://a.4cdn.org/#{board}/catalog.json" } catalogs end |
#get_relevant_threads ⇒ Object
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
# File 'lib/chanCrawlerGem.rb', line 54 def get_relevant_threads # puts 'Retrieving relevant threads' catalogs = board_catalog_urls catalogs.each do |board, catalog| catalog_content = JSON.parse(HTTParty.get(catalog).body) next if catalog_content.count < 1 catalog_content.each do |page| next unless page['threads'].count.positive? analyze_threads(page['threads'], board) end end # puts 'Relevant threads retrieved' end |
#thread_relevant?(thread) ⇒ Boolean
33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/chanCrawlerGem.rb', line 33 def thread_relevant?(thread) return false if thread['com'].nil? # puts "Checking thread relevancy for #{thread['com']}" @key_words.each do |word| return false unless thread['com'] .downcase .include?(word.downcase) && thread['images'] .positive? end end |