Class: Autopagerize

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/autopagerize.rb,
lib/autopagerize/version.rb

Constant Summary collapse

VERSION =
"0.1.0"

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url, siteinfo, options = {}) ⇒ Autopagerize

Returns a new instance of Autopagerize.



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/autopagerize.rb', line 11

def initialize(url, siteinfo, options = {})
  @url = url
  @siteinfo = siteinfo

  @options = {
    :maxpage => 10,
    :headers => {},

    # for internal/test use
    :current_page => 1,
    :httpclient => nil,
    :site => nil,
  }.merge(options)

  @site = @options[:site]
end

Instance Attribute Details

#clientObject (readonly)

Returns the value of attribute client.



9
10
11
# File 'lib/autopagerize.rb', line 9

def client
  @client
end

#optionsObject (readonly)

Returns the value of attribute options.



9
10
11
# File 'lib/autopagerize.rb', line 9

def options
  @options
end

#siteinfoObject (readonly)

Returns the value of attribute siteinfo.



9
10
11
# File 'lib/autopagerize.rb', line 9

def siteinfo
  @siteinfo
end

#urlObject (readonly)

Returns the value of attribute url.



9
10
11
# File 'lib/autopagerize.rb', line 9

def url
  @url
end

Instance Method Details

#documentObject



109
110
111
# File 'lib/autopagerize.rb', line 109

def document
  @document ||= Nokogiri::HTML.parse(html)
end

#each {|current| ... } ⇒ Object

Yields:

  • (current)


41
42
43
44
45
46
47
# File 'lib/autopagerize.rb', line 41

def each
  current = self
  yield current
  while current = current.next
    yield current
  end
end

#htmlObject



105
106
107
# File 'lib/autopagerize.rb', line 105

def html
  @html ||= client.get_content(url, nil, options[:headers])
end

#nextObject



35
36
37
38
39
# File 'lib/autopagerize.rb', line 35

def next
  return nil if options[:maxpage] <= options[:current_page]
  return nil if nextlink.nil?
  @next ||= Autopagerize.new(nextlink, siteinfo, options.merge(:current_page => options[:current_page] + 1, :site => site))
end


28
29
30
31
32
33
# File 'lib/autopagerize.rb', line 28

def nextlink
  return nil unless site
  node = document.at_xpath(site["data"]["nextLink"])
  return nil unless node
  Addressable::URI.join(url, node.attributes["href"].to_s).to_s
end

#pageObject Also known as: page_element



113
114
115
116
# File 'lib/autopagerize.rb', line 113

def page
  return nil unless site
  document.xpath(site["data"]["pageElement"]).last
end

#processObject



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/autopagerize.rb', line 49

def process
  @processed ||= begin
    return true unless site
    result = document.dup

    # Insert rule:
    # https://autopagerize.jottit.com/details_of_siteinfo_(ja)
    before = site["data"]["insertBefore"]
    if before.nil? || before.length == 0 || result.at_xpath(before).nil?
      page = result.xpath(site["data"]["pageElement"]).last
      point = Nokogiri::XML::Node.new("dummy_for_autopagerize", result.document)
      page.after point
    else
      point = result.at_xpath(before)
    end

    @processed_page_elements = [self.page]
    current = self
    while current = current.next
      point.before(current.page)
      @processed_page_elements << current.page
    end
    point.remove
    @processed_document = result
    true
  end
end

#processed?Boolean

Returns:

  • (Boolean)


77
78
79
# File 'lib/autopagerize.rb', line 77

def processed?
  @processed
end

#processed_documentObject



81
82
83
84
# File 'lib/autopagerize.rb', line 81

def processed_document
  process
  @processed_document || document
end

#processed_htmlObject



91
92
93
# File 'lib/autopagerize.rb', line 91

def processed_html
  processed_document.to_xml
end

#processed_page_elementsObject



86
87
88
89
# File 'lib/autopagerize.rb', line 86

def processed_page_elements
  process
  @processed_page_elements || []
end

#siteObject



95
96
97
98
99
# File 'lib/autopagerize.rb', line 95

def site
  @site ||= siteinfo.find do |site|
    /#{normalize_regex(site["data"]["url"])}/.match(url) && site["data"]["nextLink"] && document.at_xpath(site["data"]["nextLink"])
  end
end