Module: SQ
- Defined in:
- lib/sq.rb
Class Method Summary collapse
- .process(uri, regex = /./, opts = {}) ⇒ Object
- .query(uri, regex = /./) ⇒ Object
- .user_agent ⇒ Object
- .version ⇒ Object
Class Method Details
.process(uri, regex = /./, opts = {}) ⇒ Object
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
# File 'lib/sq.rb', line 36 def process(uri, regex=/./, opts={}) uris = self.query(uri, regex) return 0 if uris.empty? out = File.(opts[:directory] || '.') Dir.mkdir(out) unless Dir.exists?(out) uris.each do |u| open("#{out}/#{u[:name]}", 'wb') do |f| open(u[:uri], 'rb') do |resp| f.write(resp.read) end end end end |
.query(uri, regex = /./) ⇒ Object
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
# File 'lib/sq.rb', line 19 def query(uri, regex=/./) uri = 'http://' + uri unless uri =~ /^https?:\/\// doc = Nokogiri::HTML(open(uri, 'User-Agent' => user_agent)) links = doc.css('a[href]') uris = links.map { |a| URI.join(uri, a.attr('href')) } uris.select! { |u| u.path =~ /\.pdf$/i && u.to_s =~ regex } uris.map do |u| { :uri => u.to_s, :name => u.path.split('/').last } end end |
.user_agent ⇒ Object
15 16 17 |
# File 'lib/sq.rb', line 15 def user_agent "SQ/#{version} +github.com/bfontaine/sq" end |
.version ⇒ Object
11 12 13 |
# File 'lib/sq.rb', line 11 def version '0.0.1' end |