Class: Scrapes::RuleParser::Extractor

Inherits:
Object
  • Object
show all
Defined in:
lib/scrapes/rule_parser.rb

Overview

:nodoc:all

Constant Summary collapse

REGEX =

TODO review this Parse each extractor into three parts: $1 function name (excluding parentheses) $2 element name $3 attribute name (including leading @) If a match is found the result is either $1, or $2 and/or $3

/^(\w+)\(\)|([A-Za-z][A-Za-z0-9_\-:]*)?(@[A-Za-z][A-Za-z0-9_\-:]*)?$/

Instance Method Summary collapse

Constructor Details

#initialize(context, statement) ⇒ Extractor

:nodoc:



273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
# File 'lib/scrapes/rule_parser.rb', line 273

def initialize(context, statement) # :nodoc:
  statement.strip!
  @extracts = []
  statement.split('|').each do |extract|
    parts = REGEX.match(extract)
    if parts[1]
      begin
        @extracts << context.method(parts[1])
      rescue NameError=>error
        raise InvalidRuleException, error.message, error.backtrace
      end
    elsif parts[2] and parts[3]
      attr_name = parts[3][1..-1]
      @extracts << proc do |node|
        node.attributes[attr_name] if node.name == parts[2]
      end
    elsif parts[2]
      @extracts << proc { |node| text(node) if node.name == parts[2] }
    elsif parts[3]
      attr_name = parts[3][1..-1]
      @extracts << proc do |node|
        if node.respond_to? :each
          node.all.attributes.all[attr_name]
        else
          node.attributes[attr_name]
        end
      end
    else
      raise InvalidRuleException, "Invalid extraction statement"
    end
  end
  raise InvalidRuleException, "Invalid (empty) extraction statement" if
    @extracts.size == 0
end

Instance Method Details

#extract(node) ⇒ Object

:nodoc:



309
310
311
312
313
314
315
# File 'lib/scrapes/rule_parser.rb', line 309

def extract(node) # :nodoc:
  value = nil
  @extracts.find do |extract|
    value = extract.call(node)
  end
  value
end

#inspectObject

:nodoc:



318
319
320
# File 'lib/scrapes/rule_parser.rb', line 318

def inspect() # :nodoc:
  @extracts.join('|')
end