Class: SiteData
- Inherits:
-
Struct
- Object
- Struct
- SiteData
- Defined in:
- lib/w3m-autopagerize-server.rb
Defined Under Namespace
Modules: FallbackSetup
Instance Attribute Summary collapse
-
#block ⇒ Object
Returns the value of attribute block.
-
#exampleUrl ⇒ Object
Returns the value of attribute exampleUrl.
-
#insertBefore ⇒ Object
Returns the value of attribute insertBefore.
-
#match ⇒ Object
Returns the value of attribute match.
-
#nextLink ⇒ Object
Returns the value of attribute nextLink.
-
#pageElement ⇒ Object
Returns the value of attribute pageElement.
Class Method Summary collapse
- .fallback_predicate1(text, words = $FALLBACK_WORDS, patterns = $FALLBACK_PATTERNS) ⇒ Object
- .fallback_predicate2(text, start_words = $FALLBACK_START_WORDS) ⇒ Object
-
.fallbacks ⇒ Object
link to next.
Instance Method Summary collapse
Instance Attribute Details
#block ⇒ Object
Returns the value of attribute block
38 39 40 |
# File 'lib/w3m-autopagerize-server.rb', line 38 def block @block end |
#exampleUrl ⇒ Object
Returns the value of attribute exampleUrl
38 39 40 |
# File 'lib/w3m-autopagerize-server.rb', line 38 def exampleUrl @exampleUrl end |
#insertBefore ⇒ Object
Returns the value of attribute insertBefore
38 39 40 |
# File 'lib/w3m-autopagerize-server.rb', line 38 def insertBefore @insertBefore end |
#match ⇒ Object
Returns the value of attribute match
38 39 40 |
# File 'lib/w3m-autopagerize-server.rb', line 38 def match @match end |
#nextLink ⇒ Object
Returns the value of attribute nextLink
38 39 40 |
# File 'lib/w3m-autopagerize-server.rb', line 38 def nextLink @nextLink end |
#pageElement ⇒ Object
Returns the value of attribute pageElement
38 39 40 |
# File 'lib/w3m-autopagerize-server.rb', line 38 def pageElement @pageElement end |
Class Method Details
.fallback_predicate1(text, words = $FALLBACK_WORDS, patterns = $FALLBACK_PATTERNS) ⇒ Object
40 41 42 43 44 45 46 47 |
# File 'lib/w3m-autopagerize-server.rb', line 40 def self.fallback_predicate1(text, words=$FALLBACK_WORDS, patterns=$FALLBACK_PATTERNS) a = [ words.map{|w| %Q!#{text}="#{w}"!}.join(' or '), patterns.map{|w| %Q!contains(#{text},"#{w}")!}.join(' or '), ] a.delete "" a.join " or " end |
.fallback_predicate2(text, start_words = $FALLBACK_START_WORDS) ⇒ Object
49 50 51 |
# File 'lib/w3m-autopagerize-server.rb', line 49 def self.fallback_predicate2(text, start_words=$FALLBACK_START_WORDS) start_words.map{|w| %Q!starts-with(#{text},"#{w}")!}.join(' or ') end |
.fallbacks ⇒ Object
link to next
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
# File 'lib/w3m-autopagerize-server.rb', line 54 def self.fallbacks @fallbacks ||= lambda do a = [ new("//a[#{fallback_predicate1('.')}]"), new("//form[descendant::input[#{fallback_predicate1('@value')}]]"), ] if $FALLBACK_START_WORDS.to_a.length > 0 a.concat [ new("//a[#{fallback_predicate2('.')}]"), new("//form[descendant::input[#{fallback_predicate2('@value')}]]"), ] end a.extend(FallbackSetup) end.call end |
Instance Method Details
#next_url(uri) ⇒ Object
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
# File 'lib/w3m-autopagerize-server.rb', line 89 def next_url(uri) uri = URI(uri.to_s) result = instance_exec(uri, match, &block) if block xpath = nextLink if xpath nokogiri = $nokogiri_cache[uri.to_s] $logger.info "#{__method__}: use xpath #{xpath}" nodes = nokogiri.xpath(xpath) node = nodes.first $logger.debug "#{__method__}: nodes.length = #{nodes.length}" nexturl = (node["href"] || node["action"] || node["value"]) rescue nil # nexturl = nokogiri.xpath("#{xpath}/@href").first.content rescue nil $logger.info "#{__method__}: nexturl = #{nexturl or 'NOT FOUND'}" if nexturl nexturl.gsub!(/ /, '+') # for some buggy sites not encoding spaces uri.merge nexturl end else $logger.info "#{__method__}: result = #{result}" uri.merge result end end |