Class: Datahen::CLI::Parser

Inherits:
Thor
  • Object
show all
Defined in:
lib/datahen/cli/parser.rb

Instance Method Summary collapse

Instance Method Details

#batch_exec_parse(scraper_name, config_file) ⇒ Object



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/datahen/cli/parser.rb', line 76

def batch_exec_parse(scraper_name, config_file)
  if options[:job]
    job_id = options[:job]
  else
    job = Client::ScraperJob.new(options).find(scraper_name)
    job_id = job['id']
  end

  # make the stdout and stderr sync to prevent buffering
  old_stdout_sync = $stdout.sync
  old_stderr_sync = $stderr.sync
  $stdout.sync = true
  $stderr.sync = true

  begin
    batch = Datahen::Scraper::BatchParser.new job_id, config_file,
      worker_count: options[:"workers"],
      max_garbage: options[:"max-garbage"],
      dequeue_interval: options[:"dequeue-interval"],
      dequeue_scale: options[:"dequeue-scale"]
    batch.exec_parse true, options[:"keep-outputs"]
  rescue => e
    puts [e.message] + e.backtrace
  end

  # resume whatever state the stdout and stderr sync were
  $stdout.sync = old_stdout_sync
  $stderr.sync = old_stderr_sync
end

#exec_parse(scraper_name, parser_file, *gids) ⇒ Object



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/datahen/cli/parser.rb', line 45

def exec_parse(scraper_name, parser_file, *gids)
  if options[:job]
    job_id = options[:job]
  else
    job = Client::ScraperJob.new(options).find(scraper_name)
    job_id = job['id']
  end

  gids.each do |gid|
    begin
      puts "Parsing #{gid}"

      vars = JSON.parse(options[:vars]) if options[:vars]
      puts Datahen::Scraper::Parser.exec_parser_page(parser_file, gid, job_id, true, vars, options[:"keep-outputs"])
    rescue => e
      puts e
    end
  end
end

#try_parse(scraper_name, parser_file, gid) ⇒ Object



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/datahen/cli/parser.rb', line 14

def try_parse(scraper_name, parser_file, gid)
  begin

    if options[:job]
      job_id = options[:job]
    elsif options[:global]
      job_id = nil
    else
      job = Client::ScraperJob.new(options).find(scraper_name)
      job_id = job['id']
    end

    vars = JSON.parse(options[:vars]) if options[:vars]
    puts Datahen::Scraper::Parser.exec_parser_page(parser_file, gid, job_id, false, vars, options[:"keep-outputs"])

  rescue JSON::ParserError
    if options[:vars]
      puts "Error: #{options[:vars]} on vars is not a valid JSON"
    end
  end
end