Class: SJCBusSchedule::Crawler

Inherits:
Object
  • Object
show all
Defined in:
lib/sjc_bus_schedule/crawler.rb

Constant Summary collapse

BASE_URL =
"http://servicos2.sjc.sp.gov.br/servicos/horario-e-itinerario.aspx"
FILTERS =
{ number: 0, name: 1, itinerary: 2 }.freeze

Instance Method Summary collapse

Constructor Details

#initialize(http: HTTParty, query: { number: "" }) ⇒ Crawler

Returns a new instance of Crawler.



14
15
16
17
18
# File 'lib/sjc_bus_schedule/crawler.rb', line 14

def initialize(http: HTTParty, query: { number: "" })
  @http = http
  validate_filter(query.keys.first)
  set_url(*query.first)
end

Instance Method Details

#processObject



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/sjc_bus_schedule/crawler.rb', line 20

def process
  response = HTTParty.get(@url)

  doc = Nokogiri::HTML(response)

  schedule_links(doc).map do |schedule_link|
    bus_url = "http://servicos2.sjc.sp.gov.br#{schedule_link["href"]}"

    response = HTTParty.get(bus_url)
    doc = Nokogiri::HTML(response)

    bus_parser = SJCBusSchedule::Parser::Bus.new(doc: doc)
    schedules_parser = SJCBusSchedule::Parser::Schedules.new(doc: doc)

    build_buses(bus_parser: bus_parser, schedules_parser: schedules_parser)
  end
end