Module: Parser

Includes:
Enable
Defined in:
lib/fly_parser/sources/fitness.rb,
lib/fly_parser.rb,
lib/fly_parser/base.rb,
lib/fly_parser/version.rb,
lib/fly_parser/xml_base.rb,
lib/fly_parser/sources/news.rb,
lib/fly_parser/sources/sport.rb,
lib/fly_parser/sources/news-az.rb,
lib/fly_parser/sources/news-fr.rb,
lib/fly_parser/sources/news-nl.rb,
lib/fly_parser/sources/exercise.rb,
lib/fly_parser/sources/astrology.rb

Overview

todo parse all pages first, and download only newest later

Defined Under Namespace

Classes: Astrology, Base, Exercise, Fitness, News, NewsAZ, NewsFr, NewsNl, Sport, XmlBase

Constant Summary collapse

VERSION =
"0.0.23"

Class Method Summary collapse

Methods included from Enable

#fitness, #iterate_sources, #local, #method_missing, #news, #news_az, #news_fr, #news_nl

Dynamic Method Handling

This class handles dynamic methods through the method_missing method in the class Enable

Class Method Details

.configObject



98
99
100
# File 'lib/fly_parser.rb', line 98

def config
  YAML.load_file(CONFIG_PATH)
end

.connect(url) ⇒ Object



47
48
49
50
51
# File 'lib/fly_parser.rb', line 47

def connect(url)
  agent = Mechanize.new
  agent.pluggable_parser.default = Mechanize::Page
  agent.get(url)
end

.find_sourceObject



102
103
104
# File 'lib/fly_parser.rb', line 102

def find_source
  config["sources"].find {|source| source["enabled"] }
end

.http(url) ⇒ Object

Get HTTP Source



43
44
45
# File 'lib/fly_parser.rb', line 43

def http(url)
  Nokogiri::HTML(open(url))
end

.init_parser(source) ⇒ Object



110
111
112
113
114
# File 'lib/fly_parser.rb', line 110

def init_parser(source)
  source_type = source["source"].gsub('-', '_')
  prefix = "enable_"
  send(prefix + source_type, source)
end

.logoObject



106
107
108
# File 'lib/fly_parser.rb', line 106

def 
  File.read(LOGO_PATH)
end

.parse_and_save(items) ⇒ Object

Threads are more slowly in this case, so don’t use it because we need delay(some sources ban you for a lot of requests concurrently)



80
81
82
83
84
85
86
# File 'lib/fly_parser.rb', line 80

def parse_and_save(items)
  items.each do |item|
    ap "Parsing #{item['type']}"
    result = parse_item(item)
    save_item(item, result)
  end
end

.parse_item(item) ⇒ Object



94
95
96
# File 'lib/fly_parser.rb', line 94

def parse_item(item)
  item["parser"].parse_all
end

.save(articles, options) ⇒ Object



53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/fly_parser.rb', line 53

def save(articles, options)
  articles.each do |article|
    item = Article.new(title: article[:title], content: article[:content])
    item.categories = [Category.find(options[:category_id])]
    item.remote_image_url = article[:poster_image]
    next unless item.valid?
    item.save

    item.tags << tags(article[:tags]) if article[:tags]

  end
end

.save_item(item, result) ⇒ Object



88
89
90
91
92
# File 'lib/fly_parser.rb', line 88

def save_item(item, result)
  category = Category.find_or_create_by!(name: JSON.generate(en: item["category"]))
  ap "and save to #{category.localized_name} category"
  Parser.save result, {category_id: category.id}
end

.startObject



70
71
72
73
74
75
76
77
# File 'lib/fly_parser.rb', line 70

def start
  puts Time.now

  source = find_source

  init_parser(source)
  parse_and_save(source["items"])
end

.tags(tags) ⇒ Object



66
67
68
# File 'lib/fly_parser.rb', line 66

def tags(tags)
  tags.map { |tag_name| Tag.find_or_create_by!(title: tag_name) }
end