Class: Bliss::ParserMachine
- Inherits:
-
Nokogiri::XML::SAX::Document
- Object
- Nokogiri::XML::SAX::Document
- Bliss::ParserMachine
- Defined in:
- lib/bliss/parser_machine.rb
Instance Method Summary collapse
- #cdata_block(string) ⇒ Object
-
#characters(string) ⇒ Object
def open_tag_regexps return @open_tag_regexps if @open_tag_regexps.
- #close ⇒ Object
- #concat_content(string) ⇒ Object
- #constraints(constraints) ⇒ Object
- #end_document ⇒ Object
- #end_element(element, attributes = []) ⇒ Object
-
#initialize ⇒ ParserMachine
constructor
A new instance of ParserMachine.
- #is_closed? ⇒ Boolean
- #on_root(&block) ⇒ Object
- #on_tag_close(element, block) ⇒ Object
- #on_tag_open(element, block) ⇒ Object
- #start_element(element, attributes) ⇒ Object
Constructor Details
#initialize ⇒ ParserMachine
Returns a new instance of ParserMachine.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
# File 'lib/bliss/parser_machine.rb', line 3 def initialize @depth = [] # @settings = {} # downcased @root = nil @nodes = {} @current_node = {} @on_root = nil @on_tag_open = {} @on_tag_close = {} @constraints = [] @closed = false end |
Instance Method Details
#cdata_block(string) ⇒ Object
122 123 124 125 |
# File 'lib/bliss/parser_machine.rb', line 122 def cdata_block(string) return if is_closed? concat_content(string) end |
#characters(string) ⇒ Object
def open_tag_regexps
return @open_tag_regexps if @open_tag_regexps
@open_tag_regexps = @on_tag_open.keys.collect {|key| Regexp.new(key) }
@open_tag_regexps
end
def close_tag_regexps
return @close_tag_regexps if @close_tag_regexps
@close_tag_regexps = @on_tag_close.keys.collect {|key| Regexp.new(key) }
@close_tag_regexps
end
117 118 119 120 |
# File 'lib/bliss/parser_machine.rb', line 117 def characters(string) return if is_closed? concat_content(string) end |
#close ⇒ Object
40 41 42 |
# File 'lib/bliss/parser_machine.rb', line 40 def close @closed = true end |
#concat_content(string) ⇒ Object
179 180 181 182 183 184 |
# File 'lib/bliss/parser_machine.rb', line 179 def concat_content(string) string.strip! if string @current_content << string end end |
#constraints(constraints) ⇒ Object
22 23 24 |
# File 'lib/bliss/parser_machine.rb', line 22 def constraints(constraints) @constraints = constraints end |
#end_document ⇒ Object
186 187 188 |
# File 'lib/bliss/parser_machine.rb', line 186 def end_document #puts @nodes.inspect end |
#end_element(element, attributes = []) ⇒ Object
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
# File 'lib/bliss/parser_machine.rb', line 127 def end_element(element, attributes=[]) return if is_closed? # element_transformation current = @nodes.pair_at_chain(@depth) value_at = @nodes.value_at_chain(@depth) if value_at.is_a? Hash current[element] = @current_content if @current_content.size > 0 elsif value_at.is_a? NilClass if current.is_a? Array current = current.last current[element] = @current_content if @current_content.size > 0 end end @current_content = '' # TODO search on hash with xpath style # for example: # keys: */ad/url # keys: root/ad/url # @on_tag_close.keys.select {|key| @depth.match(key)} ## search_key = @depth.join('/') # element if @depth.last == 'ad' #puts search_key #puts value_at.keys.inspect #ad array #puts @constraints.select{|c| search_key.match(Regexp.new("#{c.depth.split('/').join('/')}$"))}.inspect #puts current.keys.inspect # others puts @constraints.select{|c| search_key.match(Regexp.new("#{c.depth.split('/')[0..-2].join('/')}$"))}.inspect end @on_tag_close.keys.select{ |r| search_key.match(r) }.each do |reg| @on_tag_close[reg].call(value_at, @depth) end # TODO constraint should return Regexp like depth too #puts @constraints.collect(&:state).inspect @constraints.select{|c| [:not_checked, :passed].include?(c.state) }.select {|c| search_key.match(Regexp.new("#{c.depth.split('/').join('/')}$")) }.each do |constraint| #puts "search_key: #{search_key}" #puts "value_at.inspect: #{value_at.inspect}" #puts "current.inspect: #{current.inspect}" constraint.run!(current) end @depth.pop if @depth.last == element end |
#is_closed? ⇒ Boolean
44 45 46 |
# File 'lib/bliss/parser_machine.rb', line 44 def is_closed? @closed end |
#on_root(&block) ⇒ Object
26 27 28 |
# File 'lib/bliss/parser_machine.rb', line 26 def on_root(&block) @on_root = block end |
#on_tag_close(element, block) ⇒ Object
34 35 36 37 38 |
# File 'lib/bliss/parser_machine.rb', line 34 def on_tag_close(element, block) # TODO # check how do we want to handle on_tag_close depths (xpath, array, another) @on_tag_close.merge!({Regexp.new("#{element}$") => block}) end |
#on_tag_open(element, block) ⇒ Object
30 31 32 |
# File 'lib/bliss/parser_machine.rb', line 30 def on_tag_open(element, block) @on_tag_open.merge!({Regexp.new("#{element}$") => block}) end |
#start_element(element, attributes) ⇒ Object
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
# File 'lib/bliss/parser_machine.rb', line 48 def start_element(element, attributes) return if is_closed? # element_transformation if @root == nil @root = element if @on_root.is_a? Proc @on_root.call(@root) end end @depth.push(element) if @depth.last != element # TODO search on hash with xpath style # for example: # keys: */ad/url # keys: root/ad/url # @on_tag_close.keys.select {|key| @depth.match(key)} # other example: # keys: root/(ad|AD)/description ## search_key = @depth.join('/') # element @on_tag_open.keys.select{ |r| search_key.match(r) }.each do |reg| @on_tag_open[reg].call(@depth) end current = @nodes.pair_at_chain(@depth) value_at = @nodes.value_at_chain(@depth) if current.is_a? Hash if value_at.is_a? NilClass current[element] = {} elsif value_at.is_a? Hash if current[element].is_a? Array current[element].concat [{}] else current[element] = [current[element], {}] #current = @nodes.pair_at_chain(@depth) end elsif value_at.is_a? Array #puts @depth.inspect #puts current[element].inspect #puts current[element].inspect end elsif current.is_a? Array end @current_content = '' end |