3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
|
# File 'lib/scrappy/extractor/selectors/xpath.rb', line 3
def filter doc
rdf::value.map do |pattern|
interval = if sc::index.first
(sc::index.first.to_i..sc::index.first.to_i)
else
(0..-1)
end
patterns = sc::keyword
(doc[:content].search(pattern)[interval] || []).select do |node|
patterns.any? ? patterns.include?(node.text.clean.downcase) : true
end.map do |result|
if sc::attribute.first
sc::attribute.map { |attribute| { :uri=>doc[:uri], :content=>result, :value=>result[attribute].clean, :attribute=>attribute } }
else
[ { :uri=>doc[:uri], :content=>result, :value=>format(result, sc::format, doc[:uri]) } ]
end
end
end.flatten
end
|