Class: Autopagerize
- Inherits:
-
Object
- Object
- Autopagerize
- Includes:
- Enumerable
- Defined in:
- lib/autopagerize.rb,
lib/autopagerize/version.rb
Constant Summary collapse
- VERSION =
"0.1.0"
Instance Attribute Summary collapse
-
#client ⇒ Object
readonly
Returns the value of attribute client.
-
#options ⇒ Object
readonly
Returns the value of attribute options.
-
#siteinfo ⇒ Object
readonly
Returns the value of attribute siteinfo.
-
#url ⇒ Object
readonly
Returns the value of attribute url.
Instance Method Summary collapse
- #document ⇒ Object
- #each {|current| ... } ⇒ Object
- #html ⇒ Object
-
#initialize(url, siteinfo, options = {}) ⇒ Autopagerize
constructor
A new instance of Autopagerize.
- #next ⇒ Object
- #nextlink ⇒ Object
- #page ⇒ Object (also: #page_element)
- #process ⇒ Object
- #processed? ⇒ Boolean
- #processed_document ⇒ Object
- #processed_html ⇒ Object
- #processed_page_elements ⇒ Object
- #site ⇒ Object
Constructor Details
#initialize(url, siteinfo, options = {}) ⇒ Autopagerize
Returns a new instance of Autopagerize.
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
# File 'lib/autopagerize.rb', line 11 def initialize(url, siteinfo, = {}) @url = url @siteinfo = siteinfo @options = { :maxpage => 10, :headers => {}, # for internal/test use :current_page => 1, :httpclient => nil, :site => nil, }.merge() @site = @options[:site] end |
Instance Attribute Details
#client ⇒ Object (readonly)
Returns the value of attribute client.
9 10 11 |
# File 'lib/autopagerize.rb', line 9 def client @client end |
#options ⇒ Object (readonly)
Returns the value of attribute options.
9 10 11 |
# File 'lib/autopagerize.rb', line 9 def @options end |
#siteinfo ⇒ Object (readonly)
Returns the value of attribute siteinfo.
9 10 11 |
# File 'lib/autopagerize.rb', line 9 def siteinfo @siteinfo end |
#url ⇒ Object (readonly)
Returns the value of attribute url.
9 10 11 |
# File 'lib/autopagerize.rb', line 9 def url @url end |
Instance Method Details
#document ⇒ Object
109 110 111 |
# File 'lib/autopagerize.rb', line 109 def document @document ||= Nokogiri::HTML.parse(html) end |
#each {|current| ... } ⇒ Object
41 42 43 44 45 46 47 |
# File 'lib/autopagerize.rb', line 41 def each current = self yield current while current = current.next yield current end end |
#html ⇒ Object
105 106 107 |
# File 'lib/autopagerize.rb', line 105 def html @html ||= client.get_content(url, nil, [:headers]) end |
#next ⇒ Object
35 36 37 38 39 |
# File 'lib/autopagerize.rb', line 35 def next return nil if [:maxpage] <= [:current_page] return nil if nextlink.nil? @next ||= Autopagerize.new(nextlink, siteinfo, .merge(:current_page => [:current_page] + 1, :site => site)) end |
#nextlink ⇒ Object
28 29 30 31 32 33 |
# File 'lib/autopagerize.rb', line 28 def nextlink return nil unless site node = document.at_xpath(site["data"]["nextLink"]) return nil unless node Addressable::URI.join(url, node.attributes["href"].to_s).to_s end |
#page ⇒ Object Also known as: page_element
113 114 115 116 |
# File 'lib/autopagerize.rb', line 113 def page return nil unless site document.xpath(site["data"]["pageElement"]).last end |
#process ⇒ Object
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/autopagerize.rb', line 49 def process @processed ||= begin return true unless site result = document.dup # Insert rule: # https://autopagerize.jottit.com/details_of_siteinfo_(ja) before = site["data"]["insertBefore"] if before.nil? || before.length == 0 || result.at_xpath(before).nil? page = result.xpath(site["data"]["pageElement"]).last point = Nokogiri::XML::Node.new("dummy_for_autopagerize", result.document) page.after point else point = result.at_xpath(before) end @processed_page_elements = [self.page] current = self while current = current.next point.before(current.page) @processed_page_elements << current.page end point.remove @processed_document = result true end end |
#processed? ⇒ Boolean
77 78 79 |
# File 'lib/autopagerize.rb', line 77 def processed? @processed end |
#processed_document ⇒ Object
81 82 83 84 |
# File 'lib/autopagerize.rb', line 81 def processed_document process @processed_document || document end |
#processed_html ⇒ Object
91 92 93 |
# File 'lib/autopagerize.rb', line 91 def processed_html processed_document.to_xml end |
#processed_page_elements ⇒ Object
86 87 88 89 |
# File 'lib/autopagerize.rb', line 86 def processed_page_elements process @processed_page_elements || [] end |
#site ⇒ Object
95 96 97 98 99 |
# File 'lib/autopagerize.rb', line 95 def site @site ||= siteinfo.find do |site| /#{normalize_regex(site["data"]["url"])}/.match(url) && site["data"]["nextLink"] && document.at_xpath(site["data"]["nextLink"]) end end |