Class: ContentMapping
- Inherits:
-
Object
- Object
- ContentMapping
- Defined in:
- lib/content_scrapper/content_mapping.rb
Instance Attribute Summary collapse
-
#content_xpaths_list ⇒ Object
readonly
Returns the value of attribute content_xpaths_list.
-
#iconv_from ⇒ Object
readonly
Returns the value of attribute iconv_from.
-
#iconv_to ⇒ Object
readonly
Returns the value of attribute iconv_to.
-
#url_pattern_regexp ⇒ Object
readonly
Returns the value of attribute url_pattern_regexp.
Instance Method Summary collapse
- #content_at(content_xpath) ⇒ Object
- #iconv(args) ⇒ Object
-
#initialize ⇒ ContentMapping
constructor
A new instance of ContentMapping.
- #matches_url?(url) ⇒ Boolean
- #scrap_content(doc, content_scrapper = nil) ⇒ Object
- #url_pattern(pattern) ⇒ Object
Constructor Details
#initialize ⇒ ContentMapping
Returns a new instance of ContentMapping.
7 8 9 |
# File 'lib/content_scrapper/content_mapping.rb', line 7 def initialize @content_xpaths_list = [] end |
Instance Attribute Details
#content_xpaths_list ⇒ Object (readonly)
Returns the value of attribute content_xpaths_list.
5 6 7 |
# File 'lib/content_scrapper/content_mapping.rb', line 5 def content_xpaths_list @content_xpaths_list end |
#iconv_from ⇒ Object (readonly)
Returns the value of attribute iconv_from.
5 6 7 |
# File 'lib/content_scrapper/content_mapping.rb', line 5 def iconv_from @iconv_from end |
#iconv_to ⇒ Object (readonly)
Returns the value of attribute iconv_to.
5 6 7 |
# File 'lib/content_scrapper/content_mapping.rb', line 5 def iconv_to @iconv_to end |
#url_pattern_regexp ⇒ Object (readonly)
Returns the value of attribute url_pattern_regexp.
5 6 7 |
# File 'lib/content_scrapper/content_mapping.rb', line 5 def url_pattern_regexp @url_pattern_regexp end |
Instance Method Details
#content_at(content_xpath) ⇒ Object
15 16 17 |
# File 'lib/content_scrapper/content_mapping.rb', line 15 def content_at(content_xpath) @content_xpaths_list << content_xpath end |
#iconv(args) ⇒ Object
19 20 21 22 |
# File 'lib/content_scrapper/content_mapping.rb', line 19 def iconv(args) @iconv_from = args[:from] @iconv_to = args[:to] end |
#matches_url?(url) ⇒ Boolean
24 25 26 |
# File 'lib/content_scrapper/content_mapping.rb', line 24 def matches_url?(url) url =~ @url_pattern_regexp end |
#scrap_content(doc, content_scrapper = nil) ⇒ Object
28 29 30 31 32 33 34 35 36 37 |
# File 'lib/content_scrapper/content_mapping.rb', line 28 def scrap_content(doc, content_scrapper = nil) @content_xpaths_list.each do |content_xpath| content_section = doc.xpath(content_xpath) content = content_section.to_a.join("\n") content = content_scrapper.clean_content(content) unless content_scrapper.nil? content = Iconv.conv(iconv_to, iconv_from, content) unless iconv_to.nil? return content if content_section.count > 0 end nil end |
#url_pattern(pattern) ⇒ Object
11 12 13 |
# File 'lib/content_scrapper/content_mapping.rb', line 11 def url_pattern(pattern) @url_pattern_regexp = pattern end |