Class: Result2csv::Converter

Inherits:
Object
  • Object
show all
Defined in:
lib/result2csv/converter.rb

Class Method Summary collapse

Class Method Details

.bucketObject



58
59
60
61
# File 'lib/result2csv/converter.rb', line 58

def self.bucket
  s3 = AWS::S3.new
  s3.buckets["datafiniti-voltron-results"]
end

.convert_to_csv(url, user_token, result_id) ⇒ Object



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/result2csv/converter.rb', line 112

def self.convert_to_csv(url, user_token, result_id)
  require 'json'
  JSON.freeze
  new_array, matrix = [], []
  # report_csv_conversion_progress(user_token, result_id, "downloading")
  cached_result = RestClient.get(url)
  begin
    result_file = JSON.parse(cached_result)
  rescue
    begin
      result_file = JSON.parse(cached_result.gsub(/\],/, ','))
    rescue
      result_file = JSON.parse(cached_result.gsub(/^,/, ''))
    end
  end
  # report_csv_conversion_progress(user_token, result_id, "starting")
  matrix = create_matrix(result_file, user_token, result_id)
  return matrix.to_csv
end

.create_matrix(result_file, user_token, result_id) ⇒ Object



132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/result2csv/converter.rb', line 132

def self.create_matrix(result_file, user_token, result_id)
  matrix = Array.new
  headers = parse_csv_headers(result_file)
  matrix << headers
  total_size = result_file.size
  count = 1
  result_file.each do |object|
    matrix << results_values_to_row(object, headers.size)
    progress = ((count.to_f/total_size.to_f)*100).to_i
    count += 1
    report_csv_conversion_progress(user_token, result_id, "converting: #{progress}%")# if progress % 10 == 0
  end
  return matrix
end

.does_not_have_csv?(result_url) ⇒ Boolean

Returns:

  • (Boolean)


49
50
51
52
# File 'lib/result2csv/converter.rb', line 49

def self.does_not_have_csv?(result_url)
  result_url = result_url[:result_url] if result_url.is_a? Hash
  !has_csv?(result_url)
end

.file(url) ⇒ Object



54
55
56
# File 'lib/result2csv/converter.rb', line 54

def self.file(url)
  return JSON.parse RestClient.get url
end

.get_results_url(options) ⇒ Object



21
22
23
24
25
# File 'lib/result2csv/converter.rb', line 21

def self.get_results_url(options)
  result = self.retrieve(crawl_name: options[:crawl_name], user_token: options[:user_token])
  return JSON.parse(result[:body]) if result[:status] < 400
  return ''
end

.has_csv?(url) ⇒ Boolean

Returns:

  • (Boolean)


42
43
44
45
46
47
# File 'lib/result2csv/converter.rb', line 42

def self.has_csv?(url)

  s3 = AWS::S3.new
  csv_path = "#{s3_object_key(url).split('.').first}_csv.csv"
  s3.buckets["datafiniti-voltron-results"].objects[csv_path].exists? rescue false
end

.parse_csv_headers(result_file) ⇒ Object



93
94
95
96
97
# File 'lib/result2csv/converter.rb', line 93

def self.parse_csv_headers(result_file)
  headers = []
  result_file.first.keys.to_a.each{|h| headers << h}
  return headers
end

.parserObject



99
100
101
# File 'lib/result2csv/converter.rb', line 99

def self.parser
  Yajl::Parser.new
end

.report_csv_conversion_progress(user_token, result_id, message) ⇒ Object



147
148
149
150
# File 'lib/result2csv/converter.rb', line 147

def self.report_csv_conversion_progress(user_token, result_id, message)
  # RealtimeMessage.publish(user_token, 'conversion-status', {:progress => message, :id => result_id})
  print "\r#{message}"
end

.results_values_to_row(object, columns) ⇒ Object



103
104
105
106
107
108
109
# File 'lib/result2csv/converter.rb', line 103

def self.results_values_to_row(object, columns)
  row = []
  object.values.to_a.each do |v|
    row << truncate_to_max_cell_size(v)
  end
  return row
end

.retrieve(options) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/result2csv/converter.rb', line 4

def self.retrieve(options)
  date_string = '{"'+ Time.at(Time.now.gmtime.to_i - 604800).strftime("%F") +'":""}'
  querystrings = 'fields=["url"]&dates=' + date_string
  template = Addressable::Template.new("https://#{options[:user_token]}:@api.80legs.com/v2/results/#{options[:crawl_name]}/{?query*}")
  template = template.expand({
                               "query" => {
                                 "fields" => ["url"],
                                 "dates" => date_string
                               }
  })
  response = RestClient.get(template.to_s) do |response, request|
    @code = response.code
    @body = response.body
  end
  return {body: @body, status: @code}
end

.s3_csv_file(url) ⇒ Object



37
38
39
40
# File 'lib/result2csv/converter.rb', line 37

def self.s3_csv_file(url)
  key = "#{s3_object_key(url).split('.').first}_csv.csv"
  s3_object(key).url_for(:get, endpoint: "s3.amazonaws.com", :response_content_disposition => "attachment", :response_content_type => "application/csv").to_s
end

.s3_object(key) ⇒ Object



32
33
34
35
# File 'lib/result2csv/converter.rb', line 32

def self.s3_object(key)
  s3 = AWS::S3.new
  s3.buckets["datafiniti-voltron-results"].objects[key]
end

.s3_object_key(url) ⇒ Object



27
28
29
30
# File 'lib/result2csv/converter.rb', line 27

def self.s3_object_key(url)
  uri = Addressable::URI.parse(url)
  uri.path.split('/')[1,2].join('/')
end

.s3_url(url, content_type = "application/json") ⇒ Object



63
64
65
66
67
68
69
# File 'lib/result2csv/converter.rb', line 63

def self.s3_url(url, content_type="application/json")
  uri = Addressable::URI.parse(url)
  s3 = AWS::S3.new
  obj = s3.buckets["datafiniti-voltron-results"].objects[uri.path.split('/')[1,2].join('/')]
  url = obj.url_for(:get, endpoint: "s3.amazonaws.com", :response_content_disposition => "attachment", :resonse_content_type => content_type)
  return url.to_s
end

.test_csv(result_url) ⇒ Object



159
160
161
162
163
164
# File 'lib/result2csv/converter.rb', line 159

def self.test_csv(result_url)
  object_name = "#{Result.s3_object_key(result_url).split('.').first}_csv.csv"
  csv = Result.convert_to_csv(result_url, 1, 1)
  Result.write_csv_to_s3(object_name, csv)
  puts Result.s3_csv_file(result_url).to_s
end

.toggle_downloaded_state(user, id, state) ⇒ Object



152
153
154
155
156
157
# File 'lib/result2csv/converter.rb', line 152

def self.toggle_downloaded_state(user, id, state)
  # return EightyLegsApi.conn(token: user.token).put("/results/#{id}", {downloaded: state}.to_json, :content_type => :json)
  RestClient.put("https://#{user.token}:@api.80legs.com/v2/results/#{id}", {downloaded: state}.to_json, :content_type => :json) do |response|
    return response.code
  end
end

.truncate_to_max_cell_size(string) ⇒ Object



81
82
83
84
85
86
87
88
89
90
91
# File 'lib/result2csv/converter.rb', line 81

def self.truncate_to_max_cell_size(string)
  unless string.nil?
    begin
      JSON.parse(string).to_s.gsub('"', "").gsub(",", ";")
    rescue
      string.to_s[0,32767].gsub('"', "").gsub(",", ";")
    end
  else
    ""
  end
end

.write_csv_to_file(object_name, csv) ⇒ Object



75
76
77
# File 'lib/result2csv/converter.rb', line 75

def self.write_csv_to_file(object_name, csv)
  File.open("#{object_name.split('/').last}", 'w') { |file| file.write(csv) }
end

.write_csv_to_s3(object_name, csv) ⇒ Object



71
72
73
# File 'lib/result2csv/converter.rb', line 71

def self.write_csv_to_s3(object_name, csv)
  obj = bucket.objects["#{object_name}"].write(csv)
end