Class: Result2csv::Converter
- Inherits:
-
Object
- Object
- Result2csv::Converter
- Defined in:
- lib/result2csv/converter.rb
Class Method Summary collapse
- .bucket ⇒ Object
- .convert_to_csv(url, user_token, result_id) ⇒ Object
- .create_matrix(result_file, user_token, result_id) ⇒ Object
- .does_not_have_csv?(result_url) ⇒ Boolean
- .file(url) ⇒ Object
- .get_results_url(options) ⇒ Object
- .has_csv?(url) ⇒ Boolean
- .parse_csv_headers(result_file) ⇒ Object
- .parser ⇒ Object
- .report_csv_conversion_progress(user_token, result_id, message) ⇒ Object
- .results_values_to_row(object, columns) ⇒ Object
- .retrieve(options) ⇒ Object
- .s3_csv_file(url) ⇒ Object
- .s3_object(key) ⇒ Object
- .s3_object_key(url) ⇒ Object
- .s3_url(url, content_type = "application/json") ⇒ Object
- .test_csv(result_url) ⇒ Object
- .toggle_downloaded_state(user, id, state) ⇒ Object
- .truncate_to_max_cell_size(string) ⇒ Object
- .write_csv_to_file(object_name, csv) ⇒ Object
- .write_csv_to_s3(object_name, csv) ⇒ Object
Class Method Details
.bucket ⇒ Object
58 59 60 61 |
# File 'lib/result2csv/converter.rb', line 58 def self.bucket s3 = AWS::S3.new s3.buckets["datafiniti-voltron-results"] end |
.convert_to_csv(url, user_token, result_id) ⇒ Object
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
# File 'lib/result2csv/converter.rb', line 112 def self.convert_to_csv(url, user_token, result_id) require 'json' JSON.freeze new_array, matrix = [], [] # report_csv_conversion_progress(user_token, result_id, "downloading") cached_result = RestClient.get(url) begin result_file = JSON.parse(cached_result) rescue begin result_file = JSON.parse(cached_result.gsub(/\],/, ',')) rescue result_file = JSON.parse(cached_result.gsub(/^,/, '')) end end # report_csv_conversion_progress(user_token, result_id, "starting") matrix = create_matrix(result_file, user_token, result_id) return matrix.to_csv end |
.create_matrix(result_file, user_token, result_id) ⇒ Object
132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
# File 'lib/result2csv/converter.rb', line 132 def self.create_matrix(result_file, user_token, result_id) matrix = Array.new headers = parse_csv_headers(result_file) matrix << headers total_size = result_file.size count = 1 result_file.each do |object| matrix << results_values_to_row(object, headers.size) progress = ((count.to_f/total_size.to_f)*100).to_i count += 1 report_csv_conversion_progress(user_token, result_id, "converting: #{progress}%")# if progress % 10 == 0 end return matrix end |
.does_not_have_csv?(result_url) ⇒ Boolean
49 50 51 52 |
# File 'lib/result2csv/converter.rb', line 49 def self.does_not_have_csv?(result_url) result_url = result_url[:result_url] if result_url.is_a? Hash !has_csv?(result_url) end |
.file(url) ⇒ Object
54 55 56 |
# File 'lib/result2csv/converter.rb', line 54 def self.file(url) return JSON.parse RestClient.get url end |
.get_results_url(options) ⇒ Object
21 22 23 24 25 |
# File 'lib/result2csv/converter.rb', line 21 def self.get_results_url() result = self.retrieve(crawl_name: [:crawl_name], user_token: [:user_token]) return JSON.parse(result[:body]) if result[:status] < 400 return '' end |
.has_csv?(url) ⇒ Boolean
42 43 44 45 46 47 |
# File 'lib/result2csv/converter.rb', line 42 def self.has_csv?(url) s3 = AWS::S3.new csv_path = "#{s3_object_key(url).split('.').first}_csv.csv" s3.buckets["datafiniti-voltron-results"].objects[csv_path].exists? rescue false end |
.parse_csv_headers(result_file) ⇒ Object
93 94 95 96 97 |
# File 'lib/result2csv/converter.rb', line 93 def self.parse_csv_headers(result_file) headers = [] result_file.first.keys.to_a.each{|h| headers << h} return headers end |
.parser ⇒ Object
99 100 101 |
# File 'lib/result2csv/converter.rb', line 99 def self.parser Yajl::Parser.new end |
.report_csv_conversion_progress(user_token, result_id, message) ⇒ Object
147 148 149 150 |
# File 'lib/result2csv/converter.rb', line 147 def self.report_csv_conversion_progress(user_token, result_id, ) # RealtimeMessage.publish(user_token, 'conversion-status', {:progress => message, :id => result_id}) print "\r#{}" end |
.results_values_to_row(object, columns) ⇒ Object
103 104 105 106 107 108 109 |
# File 'lib/result2csv/converter.rb', line 103 def self.results_values_to_row(object, columns) row = [] object.values.to_a.each do |v| row << truncate_to_max_cell_size(v) end return row end |
.retrieve(options) ⇒ Object
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
# File 'lib/result2csv/converter.rb', line 4 def self.retrieve() date_string = '{"'+ Time.at(Time.now.gmtime.to_i - 604800).strftime("%F") +'":""}' querystrings = 'fields=["url"]&dates=' + date_string template = Addressable::Template.new("https://#{[:user_token]}:@api.80legs.com/v2/results/#{[:crawl_name]}/{?query*}") template = template.({ "query" => { "fields" => ["url"], "dates" => date_string } }) response = RestClient.get(template.to_s) do |response, request| @code = response.code @body = response.body end return {body: @body, status: @code} end |
.s3_csv_file(url) ⇒ Object
37 38 39 40 |
# File 'lib/result2csv/converter.rb', line 37 def self.s3_csv_file(url) key = "#{s3_object_key(url).split('.').first}_csv.csv" s3_object(key).url_for(:get, endpoint: "s3.amazonaws.com", :response_content_disposition => "attachment", :response_content_type => "application/csv").to_s end |
.s3_object(key) ⇒ Object
32 33 34 35 |
# File 'lib/result2csv/converter.rb', line 32 def self.s3_object(key) s3 = AWS::S3.new s3.buckets["datafiniti-voltron-results"].objects[key] end |
.s3_object_key(url) ⇒ Object
27 28 29 30 |
# File 'lib/result2csv/converter.rb', line 27 def self.s3_object_key(url) uri = Addressable::URI.parse(url) uri.path.split('/')[1,2].join('/') end |
.s3_url(url, content_type = "application/json") ⇒ Object
63 64 65 66 67 68 69 |
# File 'lib/result2csv/converter.rb', line 63 def self.s3_url(url, content_type="application/json") uri = Addressable::URI.parse(url) s3 = AWS::S3.new obj = s3.buckets["datafiniti-voltron-results"].objects[uri.path.split('/')[1,2].join('/')] url = obj.url_for(:get, endpoint: "s3.amazonaws.com", :response_content_disposition => "attachment", :resonse_content_type => content_type) return url.to_s end |
.test_csv(result_url) ⇒ Object
159 160 161 162 163 164 |
# File 'lib/result2csv/converter.rb', line 159 def self.test_csv(result_url) object_name = "#{Result.s3_object_key(result_url).split('.').first}_csv.csv" csv = Result.convert_to_csv(result_url, 1, 1) Result.write_csv_to_s3(object_name, csv) puts Result.s3_csv_file(result_url).to_s end |
.toggle_downloaded_state(user, id, state) ⇒ Object
152 153 154 155 156 157 |
# File 'lib/result2csv/converter.rb', line 152 def self.toggle_downloaded_state(user, id, state) # return EightyLegsApi.conn(token: user.token).put("/results/#{id}", {downloaded: state}.to_json, :content_type => :json) RestClient.put("https://#{user.token}:@api.80legs.com/v2/results/#{id}", {downloaded: state}.to_json, :content_type => :json) do |response| return response.code end end |
.truncate_to_max_cell_size(string) ⇒ Object
81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/result2csv/converter.rb', line 81 def self.truncate_to_max_cell_size(string) unless string.nil? begin JSON.parse(string).to_s.gsub('"', "").gsub(",", ";") rescue string.to_s[0,32767].gsub('"', "").gsub(",", ";") end else "" end end |
.write_csv_to_file(object_name, csv) ⇒ Object
75 76 77 |
# File 'lib/result2csv/converter.rb', line 75 def self.write_csv_to_file(object_name, csv) File.open("#{object_name.split('/').last}", 'w') { |file| file.write(csv) } end |
.write_csv_to_s3(object_name, csv) ⇒ Object
71 72 73 |
# File 'lib/result2csv/converter.rb', line 71 def self.write_csv_to_s3(object_name, csv) obj = bucket.objects["#{object_name}"].write(csv) end |