Class: Lorax::Signature
- Inherits:
-
Object
- Object
- Lorax::Signature
- Defined in:
- lib/lorax/signature.rb
Constant Summary collapse
- SEP =
"\0"
Instance Method Summary collapse
-
#initialize(node = nil) ⇒ Signature
constructor
A new instance of Signature.
- #monogram(node = @node) ⇒ Object
- #nodes(sig = nil) ⇒ Object
- #root ⇒ Object
-
#set_signature(node, value) ⇒ Object
:nodoc: for testing.
-
#set_weight(node, value) ⇒ Object
:nodoc: for testing.
- #signature(node = @node) ⇒ Object
- #size ⇒ Object
- #weight(node = @node) ⇒ Object
Constructor Details
#initialize(node = nil) ⇒ Signature
Returns a new instance of Signature.
7 8 9 10 11 12 13 14 15 |
# File 'lib/lorax/signature.rb', line 7 def initialize(node=nil) @signatures = {} # node => signature @monograms = {} # node => monogram (signature not including children) @nodes = {} # signature => [node, ...] @weights = {} # node => weight @size = 0 @node = node signature(node) if node end |
Instance Method Details
#monogram(node = @node) ⇒ Object
85 86 87 88 89 |
# File 'lib/lorax/signature.rb', line 85 def monogram(node=@node) return @monograms[node] if @monograms.key?(node) signature(node) @monograms[node] end |
#nodes(sig = nil) ⇒ Object
21 22 23 |
# File 'lib/lorax/signature.rb', line 21 def nodes(sig=nil) sig ? @nodes[sig] : [@node] end |
#root ⇒ Object
17 18 19 |
# File 'lib/lorax/signature.rb', line 17 def root @node end |
#set_signature(node, value) ⇒ Object
:nodoc: for testing
91 92 93 94 |
# File 'lib/lorax/signature.rb', line 91 def set_signature(node, value) # :nodoc: for testing (@nodes[value] ||= []) << node @signatures[node] = value end |
#set_weight(node, value) ⇒ Object
:nodoc: for testing
96 97 98 |
# File 'lib/lorax/signature.rb', line 96 def set_weight(node, value) # :nodoc: for testing @weights[node] = value end |
#signature(node = @node) ⇒ Object
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
# File 'lib/lorax/signature.rb', line 29 def signature(node=@node) return @signatures[node] if @signatures.key?(node) raise ArgumentError, "signature expects a Node, but received #{node.inspect}" unless node.is_a?(Nokogiri::XML::Node) if node.text? content = node.content.strip if content.empty? return nil else monogram = signature = hashify(content) end elsif node.cdata? || node.comment? monogram = signature = hashify(node.content) elsif node.type == Nokogiri::XML::Node::ENTITY_REF_NODE monogram = signature = hashify(node.to_html) elsif node.element? children_sig = hashify(node.children .collect { |child| signature(child) }.compact) attr_sig = hashify(node.attributes.sort.collect { |k,v| [k, v.value] }.flatten) monogram = hashify(node.name, attr_sig) signature = hashify(node.name, attr_sig, children_sig) else raise ArgumentError, "signature expects an element, text, cdata or comment node, but received #{node.class}" end @size += 1 weight(node) (@nodes[signature] ||= []) << node @monograms[node] = monogram @signatures[node] = signature end |
#size ⇒ Object
25 26 27 |
# File 'lib/lorax/signature.rb', line 25 def size @size end |
#weight(node = @node) ⇒ Object
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# File 'lib/lorax/signature.rb', line 61 def weight(node=@node) return @weights[node] if @weights.key?(node) raise ArgumentError, "weight expects a Node, but received #{node.inspect}" unless node.is_a?(Nokogiri::XML::Node) if node.text? content = node.content.strip if content.empty? calculated_weight = 0 else calculated_weight = 1 + Math.log(content.length) end elsif node.cdata? || node.comment? calculated_weight = 1 + Math.log(node.content.length) elsif node.type == Nokogiri::XML::Node::ENTITY_REF_NODE calculated_weight = 1 elsif node.element? calculated_weight = node.children.inject(1) { |sum, child| sum += weight(child) } else raise ArgumentError, "weight expects an element, text, cdata or comment node, but received #{node.class}" end @weights[node] = calculated_weight end |