Class: Brief::Document::ContentExtractor::ExtractionRule
- Inherits:
-
Object
- Object
- Brief::Document::ContentExtractor::ExtractionRule
- Defined in:
- lib/brief/document/content_extractor.rb
Instance Attribute Summary collapse
-
#args ⇒ Object
readonly
Returns the value of attribute args.
-
#rule ⇒ Object
readonly
Returns the value of attribute rule.
Instance Method Summary collapse
- #apply_to(document) ⇒ Object
- #deserialize? ⇒ Boolean
- #format ⇒ Object
-
#initialize(rule) ⇒ ExtractionRule
constructor
A new instance of ExtractionRule.
- #options ⇒ Object
- #selector ⇒ Object
Constructor Details
#initialize(rule) ⇒ ExtractionRule
Returns a new instance of ExtractionRule.
52 53 54 55 |
# File 'lib/brief/document/content_extractor.rb', line 52 def initialize(rule) @rule = rule @args = rule.args end |
Instance Attribute Details
#args ⇒ Object (readonly)
Returns the value of attribute args.
50 51 52 |
# File 'lib/brief/document/content_extractor.rb', line 50 def args @args end |
#rule ⇒ Object (readonly)
Returns the value of attribute rule.
50 51 52 |
# File 'lib/brief/document/content_extractor.rb', line 50 def rule @rule end |
Instance Method Details
#apply_to(document) ⇒ Object
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
# File 'lib/brief/document/content_extractor.rb', line 73 def apply_to(document) raise 'Must specify a selector' unless selector extracted = document.css(selector) return nil if extracted.length == 0 case when deserialize? && format == :json (JSON.parse(extracted.text.to_s) rescue {}).to_mash when deserialize? && format == :yaml (YAML.load(extracted.text.to_s) rescue {}).to_mash when selector.match(/first-of-type/) && extracted.length > 0 extracted.first.text else extracted.map(&:text) end end |
#deserialize? ⇒ Boolean
61 62 63 |
# File 'lib/brief/document/content_extractor.rb', line 61 def deserialize? !!(.serialize.present? && .serialize) end |
#format ⇒ Object
65 66 67 |
# File 'lib/brief/document/content_extractor.rb', line 65 def format .serialize.to_sym end |
#options ⇒ Object
57 58 59 |
# File 'lib/brief/document/content_extractor.rb', line 57 def args[1] || {}.to_mash end |
#selector ⇒ Object
69 70 71 |
# File 'lib/brief/document/content_extractor.rb', line 69 def selector args.first if args.first.is_a?(String) end |