Class: Matryoshka::Document::Html::Merge
- Inherits:
-
Object
- Object
- Matryoshka::Document::Html::Merge
- Defined in:
- lib/matryoshka/document/html/merge.rb
Constant Summary collapse
- DEFAULT_MERGE =
[ {:empty=>:full}, {:doc=>:remerge_children}, {:id=>:replace}, {:before_id=>:before}, {:after_id=>:append}, {:single_tag => :replace}, {:header_tag => :end_of_tag}, {:default=>:end_of_tag} ]
- EXTERNAL_MERGE =
[ {:original_id => :replace_id}, {:original_selector => :selector}, {:nochildren=>:inside_body}, {:doc => :remerge_original_children}, # {:id=>:replace}, # {:parent=>:insert}, # This translate to before_id or after_id {:all=>:remerge_children} ]
- @@round =
0
Instance Attribute Summary collapse
-
#additional ⇒ Object
original is the parent document additional is the document to be merged corresponding is the matching portion of the original doc methodologies are the techniques used to merge, in order [=> what_to_do_if_match, …].
-
#corresponding ⇒ Object
original is the parent document additional is the document to be merged corresponding is the matching portion of the original doc methodologies are the techniques used to merge, in order [=> what_to_do_if_match, …].
-
#methodologies ⇒ Object
original is the parent document additional is the document to be merged corresponding is the matching portion of the original doc methodologies are the techniques used to merge, in order [=> what_to_do_if_match, …].
-
#original ⇒ Object
original is the parent document additional is the document to be merged corresponding is the matching portion of the original doc methodologies are the techniques used to merge, in order [=> what_to_do_if_match, …].
Instance Method Summary collapse
-
#additional_id ⇒ Object
These are quickly tacked on.
- #after_id ⇒ Object
- #all ⇒ Object
- #append ⇒ Object
- #before ⇒ Object
- #before_id ⇒ Object
- #convert_to_parse_format(data) ⇒ Object
- #corresponding_match(find_method) ⇒ Object
- #default ⇒ Object
- #doc ⇒ Object
-
#empty ⇒ Object
Below are matchign and replacing methods Perhaps move them to a separate module later.
- #end_of_tag ⇒ Object
- #full ⇒ Object
- #header_tag ⇒ Object
-
#id ⇒ Object
End externa-specific merging methods.
-
#initialize(orig, add, meth = DEFAULT_MERGE) ⇒ Merge
constructor
A new instance of Merge.
- #inside_body ⇒ Object
- #mergeable? ⇒ Boolean
- #nochildren ⇒ Object
-
#original_id ⇒ Object
Some external-specific merging methods.
- #original_selector ⇒ Object
- #remerge_children ⇒ Object
- #remerge_original_children ⇒ Object
- #replace ⇒ Object
- #replace_id ⇒ Object
- #run ⇒ Object
- #selector ⇒ Object
-
#single_tag ⇒ Object
For when there can be only one of a tag.
Constructor Details
#initialize(orig, add, meth = DEFAULT_MERGE) ⇒ Merge
Returns a new instance of Merge.
32 33 34 35 36 |
# File 'lib/matryoshka/document/html/merge.rb', line 32 def initialize(orig,add, meth = DEFAULT_MERGE) self.original = convert_to_parse_format(orig) self.additional = convert_to_parse_format(add) self.methodologies = meth end |
Instance Attribute Details
#additional ⇒ Object
original is the parent document additional is the document to be merged corresponding is the matching portion of the original doc methodologies are the techniques used to merge, in order
- => what_to_do_if_match, …
30 31 32 |
# File 'lib/matryoshka/document/html/merge.rb', line 30 def additional @additional end |
#corresponding ⇒ Object
original is the parent document additional is the document to be merged corresponding is the matching portion of the original doc methodologies are the techniques used to merge, in order
- => what_to_do_if_match, …
30 31 32 |
# File 'lib/matryoshka/document/html/merge.rb', line 30 def corresponding @corresponding end |
#methodologies ⇒ Object
original is the parent document additional is the document to be merged corresponding is the matching portion of the original doc methodologies are the techniques used to merge, in order
- => what_to_do_if_match, …
30 31 32 |
# File 'lib/matryoshka/document/html/merge.rb', line 30 def methodologies @methodologies end |
#original ⇒ Object
original is the parent document additional is the document to be merged corresponding is the matching portion of the original doc methodologies are the techniques used to merge, in order
- => what_to_do_if_match, …
30 31 32 |
# File 'lib/matryoshka/document/html/merge.rb', line 30 def original @original end |
Instance Method Details
#additional_id ⇒ Object
These are quickly tacked on.
131 132 133 |
# File 'lib/matryoshka/document/html/merge.rb', line 131 def additional_id additional.attributes['id'] if additional.respond_to? :attributes end |
#after_id ⇒ Object
147 148 149 150 151 152 153 |
# File 'lib/matryoshka/document/html/merge.rb', line 147 def after_id if additional_id if additional_id.index('after__') == 0 original.at("##{additional_id.sub('after__','')}") end end end |
#all ⇒ Object
183 184 185 |
# File 'lib/matryoshka/document/html/merge.rb', line 183 def all original end |
#append ⇒ Object
155 156 157 |
# File 'lib/matryoshka/document/html/merge.rb', line 155 def append corresponding.after(additional.to_html) end |
#before ⇒ Object
143 144 145 |
# File 'lib/matryoshka/document/html/merge.rb', line 143 def before corresponding.before(additional.to_html) end |
#before_id ⇒ Object
135 136 137 138 139 140 141 |
# File 'lib/matryoshka/document/html/merge.rb', line 135 def before_id if additional_id if additional_id.index('before__') == 0 original.at("##{additional_id.sub('before__','')}") end end end |
#convert_to_parse_format(data) ⇒ Object
57 58 59 60 61 62 63 64 65 66 |
# File 'lib/matryoshka/document/html/merge.rb', line 57 def convert_to_parse_format(data) if data.respond_to? :content # Html object # This should already be in Hpricot data.content elsif data.respond_to? :to_html # Hpricot object data else # probably string or IO Hpricot data end end |
#corresponding_match(find_method) ⇒ Object
53 54 55 |
# File 'lib/matryoshka/document/html/merge.rb', line 53 def corresponding_match(find_method) self.corresponding = send(find_method) end |
#default ⇒ Object
159 160 161 |
# File 'lib/matryoshka/document/html/merge.rb', line 159 def default original.at('body') or original end |
#doc ⇒ Object
78 79 80 81 |
# File 'lib/matryoshka/document/html/merge.rb', line 78 def doc # original unless additional.kind_of? Nokogiri::HTML::Element original if additional.class == Hpricot::Doc end |
#empty ⇒ Object
Below are matchign and replacing methods Perhaps move them to a separate module later
70 71 72 |
# File 'lib/matryoshka/document/html/merge.rb', line 70 def empty original if original.inner_html.empty? end |
#end_of_tag ⇒ Object
163 164 165 |
# File 'lib/matryoshka/document/html/merge.rb', line 163 def end_of_tag corresponding.inner_html = corresponding.inner_html + additional.to_html end |
#full ⇒ Object
74 75 76 |
# File 'lib/matryoshka/document/html/merge.rb', line 74 def full corresponding.inner_html = additional.inner_html end |
#header_tag ⇒ Object
187 188 189 |
# File 'lib/matryoshka/document/html/merge.rb', line 187 def header_tag ['link', 'meta','script'].include?(additional.name) and original.at('head') end |
#id ⇒ Object
End externa-specific merging methods
121 122 123 |
# File 'lib/matryoshka/document/html/merge.rb', line 121 def id original.at("##{additional.attributes['id']}") if additional.attributes['id'] end |
#inside_body ⇒ Object
177 178 179 180 181 |
# File 'lib/matryoshka/document/html/merge.rb', line 177 def inside_body corresponding.inner_html = begin additional.at('body') or additional end.to_html end |
#mergeable? ⇒ Boolean
196 197 198 199 |
# File 'lib/matryoshka/document/html/merge.rb', line 196 def mergeable? acceptable_classes_for_merging = [Hpricot::Elem, Hpricot::Doc] acceptable_classes_for_merging.include? additional.class end |
#nochildren ⇒ Object
167 168 169 170 171 172 173 174 175 |
# File 'lib/matryoshka/document/html/merge.rb', line 167 def nochildren return original.at('*') unless original.children original.children.each do |child| if child.class == Hpricot::Elem return false end end original.at('*') end |
#original_id ⇒ Object
Some external-specific merging methods
101 102 103 |
# File 'lib/matryoshka/document/html/merge.rb', line 101 def original_id original if additional.at("##{original.attributes['id']}") if original.attributes['id'] end |
#original_selector ⇒ Object
109 110 111 112 113 |
# File 'lib/matryoshka/document/html/merge.rb', line 109 def original_selector if original.attributes['rel'] == 'selector' original if additional.at(original.attributes['href']) end end |
#remerge_children ⇒ Object
83 84 85 86 87 88 89 90 |
# File 'lib/matryoshka/document/html/merge.rb', line 83 def remerge_children additional.children.each do |elem| # if elem.kind_of? Nokogiri::XML::Element if elem.kind_of? Hpricot::Elem self.class.new(corresponding,elem,methodologies).run end end end |
#remerge_original_children ⇒ Object
92 93 94 95 96 97 98 |
# File 'lib/matryoshka/document/html/merge.rb', line 92 def remerge_original_children corresponding.children.each do |elem| if elem.kind_of? Hpricot::Elem self.class.new(elem, additional, methodologies).run end end end |
#replace ⇒ Object
125 126 127 |
# File 'lib/matryoshka/document/html/merge.rb', line 125 def replace corresponding.swap additional.to_html end |
#replace_id ⇒ Object
105 106 107 |
# File 'lib/matryoshka/document/html/merge.rb', line 105 def replace_id corresponding.swap additional.at("##{corresponding.attributes['id']}").to_html end |
#run ⇒ Object
38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
# File 'lib/matryoshka/document/html/merge.rb', line 38 def run # puts "Round #{@@round += 1}" methodologies.each do |methodology| methodology.each_pair do |find_method, merge_technique| # puts "#{@@round} - Doing #{find_method}:#{merge_technique} on #{additional.to_html[0..50].gsub(/\n/,'')}" if (mergeable? and corresponding_match(find_method)) send merge_technique return original end end end # This wasn't in previous versions ... may be some reason to avoid. return original end |
#selector ⇒ Object
115 116 117 |
# File 'lib/matryoshka/document/html/merge.rb', line 115 def selector corresponding.swap additional.at(corresponding.attributes['href']).to_html end |
#single_tag ⇒ Object
For when there can be only one of a tag
192 193 194 |
# File 'lib/matryoshka/document/html/merge.rb', line 192 def single_tag ['title','head','body'].include?(additional.name) and original.at(additional.name) end |