Module: Datahunter

Defined in:
lib/datahunter.rb,
lib/datahunter/base.rb,
lib/datahunter/version.rb

Constant Summary collapse

DATASETS_URL =

DATASETS_URL = “localhost:3000/api/datasets/”

"http://shrouded-harbor-5877.herokuapp.com/api/datasets/"
FEEDBACK_URL =
"https://docs.google.com/forms/d/1yNzZjCCXvWHQCbWz4sx-nui3LafeeLcT7FF9T-vbKvw/viewform"
REQUEST_URL =
"https://docs.google.com/forms/d/1NRKWmb_mcpKJmrutXvZSZnysM_v0rfLhjD897H3Myrw/viewform?usp=send_form"
VERSION =
"0.4.1"

Class Method Summary collapse

Class Method Details

.clean_string(string) ⇒ Object



25
26
27
28
29
30
31
# File 'lib/datahunter/base.rb', line 25

def self.clean_string string
  string
    .gsub(/\n/, "")
    .gsub(/\r/, "")
    .gsub(/--/, "")
    .gsub(/  /, " ")
end

.datasets_url(query) ⇒ Object



33
34
35
# File 'lib/datahunter/base.rb', line 33

def self.datasets_url query
  "#{DATASETS_URL}#{Datahunter.query_string_builder query}"
end

.download_file(url, format = "", alt_url = "") ⇒ Object



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/datahunter/base.rb', line 78

def self.download_file url, format="", alt_url=""
  uri = Addressable::URI.parse(url)
  extension = Datahunter.parse_extension_from_uri uri

  if !@extensions.include? extension
    Launchy.open(url, options = {})
  else
    location = Dir.pwd
    uri = Addressable::URI.parse(url)
    file_name = uri.basename
    loc = location + "/" + file_name

    puts ("### Create/overwrite #{loc}? (RET) ".colorize(:yellow) +
          "Rename? (r) ".colorize(:cyan) +
          "abort? (q)")
    
    case ask "> "
    when 'rename'
      puts "Path/to/filename: ".colorize(:yellow)
      loc = ask "> "
    when 'n'
      abort("Ok then")
    end
    puts "### Start downloading..."
    Downloadr::HTTP.download(url, loc)
    puts "### Your file has been downloaded ;)".colorize(:green)
    Datahunter.print_excuse_and_alternative_url_message alt_url
  end
end

.download_the_data(dataset) ⇒ Object



108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/datahunter/base.rb', line 108

def self.download_the_data dataset
  resources = dataset["resources"]
  number_of_downloadable_links = resources.size

  if number_of_downloadable_links == 1
    dl = 0
  else
    Datahunter.print_downloadable_links resources
    puts "### which one? (0/1/...)".colorize(:yellow)
    dl = ask("> ", Integer) {|i| i.in = 0..(number_of_downloadable_links - 1)}
  end
  
  dl = dl.to_i
  Datahunter.download_file(resources[dl]["url"], resources[dl]["format"], dataset["uri"])
end

.ds_url(id) ⇒ Object



37
38
39
# File 'lib/datahunter/base.rb', line 37

def self.ds_url id
  "#{DATASETS_URL}#{id}"    
end

.get_dataset(dataset) ⇒ Object



133
134
135
136
137
138
139
# File 'lib/datahunter/base.rb', line 133

def self.get_dataset dataset
  if dataset.has_key?("resources") and dataset["resources"].any?
    Datahunter.download_the_data dataset
  else
    Datahunter.open_in_browser dataset["uri"]
  end
end

.open_in_browser(url) ⇒ Object



124
125
126
127
128
129
130
131
# File 'lib/datahunter/base.rb', line 124

def self.open_in_browser url
  if url =~ /\A#{URI::regexp}\z/
    puts "You can't download this dataset directly, but you should from there"
    Launchy.open(url, options = {})
  else
    Datahunter.print_bad_uri_message
  end
end

.parse_extension_from_uri(uri) ⇒ Object



74
75
76
# File 'lib/datahunter/base.rb', line 74

def self.parse_extension_from_uri uri
  uri.basename.split(".").last
end


157
158
159
160
# File 'lib/datahunter/base.rb', line 157

def self.print_bad_uri_message
  puts "The URL given by the publisher is not valid. We'll try to find out why "\
       "as soon as we can!".colorize(:red)
end


53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/datahunter/base.rb', line 53

def self.print_coll_of_datasets_info_light coll_of_datasets
  coll_of_datasets.each_with_index do |ds, index|
    desc = clean_string ds["description"]
    puts ("#{index+1}. ".colorize(:yellow) +
          "#{ds["title"]}".colorize(:green) +
          " id: ".colorize(:blue) +
          "#{ds["_id"]}")
    puts ("#{ds["spatial"].take(5)}")
    puts ("#{desc[0..100].gsub(/\w+\s*$/,'...')}".colorize(:blue))
  end
  puts
end


41
42
43
44
45
46
47
48
49
50
51
# File 'lib/datahunter/base.rb', line 41

def self.print_dataset_info dataset
  puts ("#{dataset["title"]}".colorize(:green))
  puts ("#{Datahunter.clean_string (dataset["description"])}".colorize(:blue))
  puts
  puts ("publisher: ".colorize(:blue) + "#{dataset["publisher"]}")
  puts ("temporal: ".colorize(:blue) + "#{dataset["temporal"]}")
  puts ("spatial: ".colorize(:blue) + "#{dataset["spatial"]}")
  puts ("created: ".colorize(:blue) + "#{dataset["created"]}")
  puts ("updated: ".colorize(:blue) + "#{dataset["updated"]}")
  puts ("score: ".colorize(:blue) + "#{dataset["huntscore"]}")
end


66
67
68
69
70
71
72
# File 'lib/datahunter/base.rb', line 66

def self.print_downloadable_links resources
  resources.each_with_index do |dl, i|
    puts ("#{i}. ".colorize(:yellow) +
          "#{dl["title"]} - ".colorize(:blue) + 
          "#{dl["format"]}".colorize(:green))
  end
end


149
150
151
152
153
154
155
# File 'lib/datahunter/base.rb', line 149

def self.print_excuse_and_alternative_url_message alt_url=""
  puts "If this is not the file you expected, it's maybe because publisher"\
       " don't always keep the metadata up-to-date. We try to clean most of"\
       " uri's and check the url. Anyway you may be able to download your"\
       " file by hand here:"
  puts "#{alt_url}".colorize(:blue)
end

Feedback requests



164
165
166
167
168
169
170
171
172
# File 'lib/datahunter/base.rb', line 164

def self.print_feedback_request
  puts "### give feedback? (y/n)".colorize(:yellow)
  case ask "> "
  when 'y'
    Launchy.open(FEEDBACK_URL, options = {})
  else
    puts "Bye for now!"
  end
end

Messages



142
143
144
145
146
147
# File 'lib/datahunter/base.rb', line 142

def self.print_no_dataset_message
  puts "We've found nothing for your query. "\
       "Remember, this is a first prototype, there will be a lot more "\
       "datasets indexed soon. If you want us to find a dataset for you, or "\
       "if you just want to give us a feedback, don't hesitate!".colorize(:red)
end


174
175
176
177
178
179
180
# File 'lib/datahunter/base.rb', line 174

def self.print_request_dataset_message
  puts "### request a dataset? (y/n)".colorize(:yellow)
  case ask "> "
  when 'y'
    Launchy.open(REQUEST_URL, options = {})
  end
end

.query_string_builder(string) ⇒ Object



16
17
18
19
20
21
22
23
# File 'lib/datahunter/base.rb', line 16

def self.query_string_builder string
  s = string
      .strip
      .gsub(/ {3,}/, ' ')
      .gsub(/ {2,}/, ' ')
      .gsub(/ /, '+')
  "?q=#{s}"
end