Module: Datahunter
- Defined in:
- lib/datahunter.rb,
lib/datahunter/base.rb,
lib/datahunter/version.rb
Constant Summary collapse
- DATASETS_URL =
DATASETS_URL = “localhost:3000/api/datasets/”
"http://shrouded-harbor-5877.herokuapp.com/api/datasets/"
- FEEDBACK_URL =
"https://docs.google.com/forms/d/1yNzZjCCXvWHQCbWz4sx-nui3LafeeLcT7FF9T-vbKvw/viewform"
- REQUEST_URL =
"https://docs.google.com/forms/d/1NRKWmb_mcpKJmrutXvZSZnysM_v0rfLhjD897H3Myrw/viewform?usp=send_form"
- VERSION =
"0.4.1"
Class Method Summary collapse
- .clean_string(string) ⇒ Object
- .datasets_url(query) ⇒ Object
- .download_file(url, format = "", alt_url = "") ⇒ Object
- .download_the_data(dataset) ⇒ Object
- .ds_url(id) ⇒ Object
- .get_dataset(dataset) ⇒ Object
- .open_in_browser(url) ⇒ Object
- .parse_extension_from_uri(uri) ⇒ Object
- .print_bad_uri_message ⇒ Object
- .print_coll_of_datasets_info_light(coll_of_datasets) ⇒ Object
- .print_dataset_info(dataset) ⇒ Object
- .print_downloadable_links(resources) ⇒ Object
- .print_excuse_and_alternative_url_message(alt_url = "") ⇒ Object
-
.print_feedback_request ⇒ Object
Feedback requests.
-
.print_no_dataset_message ⇒ Object
Messages.
- .print_request_dataset_message ⇒ Object
- .query_string_builder(string) ⇒ Object
Class Method Details
.clean_string(string) ⇒ Object
25 26 27 28 29 30 31 |
# File 'lib/datahunter/base.rb', line 25 def self.clean_string string string .gsub(/\n/, "") .gsub(/\r/, "") .gsub(/--/, "") .gsub(/ /, " ") end |
.datasets_url(query) ⇒ Object
33 34 35 |
# File 'lib/datahunter/base.rb', line 33 def self.datasets_url query "#{DATASETS_URL}#{Datahunter.query_string_builder query}" end |
.download_file(url, format = "", alt_url = "") ⇒ Object
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/datahunter/base.rb', line 78 def self.download_file url, format="", alt_url="" uri = Addressable::URI.parse(url) extension = Datahunter.parse_extension_from_uri uri if !@extensions.include? extension Launchy.open(url, = {}) else location = Dir.pwd uri = Addressable::URI.parse(url) file_name = uri.basename loc = location + "/" + file_name puts ("### Create/overwrite #{loc}? (RET) ".colorize(:yellow) + "Rename? (r) ".colorize(:cyan) + "abort? (q)") case ask "> " when 'rename' puts "Path/to/filename: ".colorize(:yellow) loc = ask "> " when 'n' abort("Ok then") end puts "### Start downloading..." Downloadr::HTTP.download(url, loc) puts "### Your file has been downloaded ;)".colorize(:green) Datahunter. alt_url end end |
.download_the_data(dataset) ⇒ Object
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
# File 'lib/datahunter/base.rb', line 108 def self.download_the_data dataset resources = dataset["resources"] number_of_downloadable_links = resources.size if number_of_downloadable_links == 1 dl = 0 else Datahunter.print_downloadable_links resources puts "### which one? (0/1/...)".colorize(:yellow) dl = ask("> ", Integer) {|i| i.in = 0..(number_of_downloadable_links - 1)} end dl = dl.to_i Datahunter.download_file(resources[dl]["url"], resources[dl]["format"], dataset["uri"]) end |
.ds_url(id) ⇒ Object
37 38 39 |
# File 'lib/datahunter/base.rb', line 37 def self.ds_url id "#{DATASETS_URL}#{id}" end |
.get_dataset(dataset) ⇒ Object
133 134 135 136 137 138 139 |
# File 'lib/datahunter/base.rb', line 133 def self.get_dataset dataset if dataset.has_key?("resources") and dataset["resources"].any? Datahunter.download_the_data dataset else Datahunter.open_in_browser dataset["uri"] end end |
.open_in_browser(url) ⇒ Object
124 125 126 127 128 129 130 131 |
# File 'lib/datahunter/base.rb', line 124 def self.open_in_browser url if url =~ /\A#{URI::regexp}\z/ puts "You can't download this dataset directly, but you should from there" Launchy.open(url, = {}) else Datahunter. end end |
.parse_extension_from_uri(uri) ⇒ Object
74 75 76 |
# File 'lib/datahunter/base.rb', line 74 def self.parse_extension_from_uri uri uri.basename.split(".").last end |
.print_bad_uri_message ⇒ Object
157 158 159 160 |
# File 'lib/datahunter/base.rb', line 157 def self. puts "The URL given by the publisher is not valid. We'll try to find out why "\ "as soon as we can!".colorize(:red) end |
.print_coll_of_datasets_info_light(coll_of_datasets) ⇒ Object
53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/datahunter/base.rb', line 53 def self.print_coll_of_datasets_info_light coll_of_datasets coll_of_datasets.each_with_index do |ds, index| desc = clean_string ds["description"] puts ("#{index+1}. ".colorize(:yellow) + "#{ds["title"]}".colorize(:green) + " id: ".colorize(:blue) + "#{ds["_id"]}") puts ("#{ds["spatial"].take(5)}") puts ("#{desc[0..100].gsub(/\w+\s*$/,'...')}".colorize(:blue)) end puts end |
.print_dataset_info(dataset) ⇒ Object
41 42 43 44 45 46 47 48 49 50 51 |
# File 'lib/datahunter/base.rb', line 41 def self.print_dataset_info dataset puts ("#{dataset["title"]}".colorize(:green)) puts ("#{Datahunter.clean_string (dataset["description"])}".colorize(:blue)) puts puts ("publisher: ".colorize(:blue) + "#{dataset["publisher"]}") puts ("temporal: ".colorize(:blue) + "#{dataset["temporal"]}") puts ("spatial: ".colorize(:blue) + "#{dataset["spatial"]}") puts ("created: ".colorize(:blue) + "#{dataset["created"]}") puts ("updated: ".colorize(:blue) + "#{dataset["updated"]}") puts ("score: ".colorize(:blue) + "#{dataset["huntscore"]}") end |
.print_downloadable_links(resources) ⇒ Object
66 67 68 69 70 71 72 |
# File 'lib/datahunter/base.rb', line 66 def self.print_downloadable_links resources resources.each_with_index do |dl, i| puts ("#{i}. ".colorize(:yellow) + "#{dl["title"]} - ".colorize(:blue) + "#{dl["format"]}".colorize(:green)) end end |
.print_excuse_and_alternative_url_message(alt_url = "") ⇒ Object
149 150 151 152 153 154 155 |
# File 'lib/datahunter/base.rb', line 149 def self. alt_url="" puts "If this is not the file you expected, it's maybe because publisher"\ " don't always keep the metadata up-to-date. We try to clean most of"\ " uri's and check the url. Anyway you may be able to download your"\ " file by hand here:" puts "#{alt_url}".colorize(:blue) end |
.print_feedback_request ⇒ Object
Feedback requests
164 165 166 167 168 169 170 171 172 |
# File 'lib/datahunter/base.rb', line 164 def self.print_feedback_request puts "### give feedback? (y/n)".colorize(:yellow) case ask "> " when 'y' Launchy.open(FEEDBACK_URL, = {}) else puts "Bye for now!" end end |
.print_no_dataset_message ⇒ Object
Messages
142 143 144 145 146 147 |
# File 'lib/datahunter/base.rb', line 142 def self. puts "We've found nothing for your query. "\ "Remember, this is a first prototype, there will be a lot more "\ "datasets indexed soon. If you want us to find a dataset for you, or "\ "if you just want to give us a feedback, don't hesitate!".colorize(:red) end |
.print_request_dataset_message ⇒ Object
174 175 176 177 178 179 180 |
# File 'lib/datahunter/base.rb', line 174 def self. puts "### request a dataset? (y/n)".colorize(:yellow) case ask "> " when 'y' Launchy.open(REQUEST_URL, = {}) end end |
.query_string_builder(string) ⇒ Object
16 17 18 19 20 21 22 23 |
# File 'lib/datahunter/base.rb', line 16 def self.query_string_builder string s = string .strip .gsub(/ {3,}/, ' ') .gsub(/ {2,}/, ' ') .gsub(/ /, '+') "?q=#{s}" end |