Class: Datahen::CLI::Parser
- Inherits:
-
Thor
- Object
- Thor
- Datahen::CLI::Parser
- Defined in:
- lib/datahen/cli/parser.rb
Instance Method Summary collapse
- #batch_exec_parse(scraper_name, config_file) ⇒ Object
- #exec_parse(scraper_name, parser_file, *gids) ⇒ Object
- #try_parse(scraper_name, parser_file, gid) ⇒ Object
Instance Method Details
#batch_exec_parse(scraper_name, config_file) ⇒ Object
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
# File 'lib/datahen/cli/parser.rb', line 76 def batch_exec_parse(scraper_name, config_file) if [:job] job_id = [:job] else job = Client::ScraperJob.new().find(scraper_name) job_id = job['id'] end # make the stdout and stderr sync to prevent buffering old_stdout_sync = $stdout.sync old_stderr_sync = $stderr.sync $stdout.sync = true $stderr.sync = true begin batch = Datahen::Scraper::BatchParser.new job_id, config_file, worker_count: [:"workers"], max_garbage: [:"max-garbage"], dequeue_interval: [:"dequeue-interval"], dequeue_scale: [:"dequeue-scale"] batch.exec_parse true, [:"keep-outputs"] rescue => e puts [e.] + e.backtrace end # resume whatever state the stdout and stderr sync were $stdout.sync = old_stdout_sync $stderr.sync = old_stderr_sync end |
#exec_parse(scraper_name, parser_file, *gids) ⇒ Object
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/datahen/cli/parser.rb', line 45 def exec_parse(scraper_name, parser_file, *gids) if [:job] job_id = [:job] else job = Client::ScraperJob.new().find(scraper_name) job_id = job['id'] end gids.each do |gid| begin puts "Parsing #{gid}" vars = JSON.parse([:vars]) if [:vars] puts Datahen::Scraper::Parser.exec_parser_page(parser_file, gid, job_id, true, vars, [:"keep-outputs"]) rescue => e puts e end end end |
#try_parse(scraper_name, parser_file, gid) ⇒ Object
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
# File 'lib/datahen/cli/parser.rb', line 14 def try_parse(scraper_name, parser_file, gid) begin if [:job] job_id = [:job] elsif [:global] job_id = nil else job = Client::ScraperJob.new().find(scraper_name) job_id = job['id'] end vars = JSON.parse([:vars]) if [:vars] puts Datahen::Scraper::Parser.exec_parser_page(parser_file, gid, job_id, false, vars, [:"keep-outputs"]) rescue JSON::ParserError if [:vars] puts "Error: #{[:vars]} on vars is not a valid JSON" end end end |