Class: Bliss::Parser
- Inherits:
-
Object
- Object
- Bliss::Parser
- Defined in:
- lib/bliss/parser.rb
Instance Method Summary collapse
- #add_format(format) ⇒ Object
- #check_unhandled_bytes ⇒ Object
- #check_unhandled_bytes? ⇒ Boolean
- #close ⇒ Object
- #exceeded? ⇒ Boolean
- #file_close ⇒ Object
- #formats_details ⇒ Object
- #handle_wait_tag_close(chunk) ⇒ Object
-
#initialize(path, filepath = nil) ⇒ Parser
constructor
A new instance of Parser.
- #load_constraints_on_parser_machine ⇒ Object
- #on_max_unhandled_bytes(bytes, &block) ⇒ Object
-
#on_root(&block) ⇒ Object
deprecate this, use depth at on_tag_open or on_tag_close instead.
- #on_tag_close(element = '.', &block) ⇒ Object
- #on_tag_open(element = '.', &block) ⇒ Object
- #on_timeout(seconds, &block) ⇒ Object
- #parse ⇒ Object
- #reset_unhandled_bytes ⇒ Object
- #root ⇒ Object
- #secure_close ⇒ Object
- #wait_tag_close(element) ⇒ Object
Constructor Details
#initialize(path, filepath = nil) ⇒ Parser
Returns a new instance of Parser.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
# File 'lib/bliss/parser.rb', line 3 def initialize(path, filepath=nil) @path = path @parser_machine = Bliss::ParserMachine.new @push_parser = Nokogiri::XML::SAX::PushParser.new(@parser_machine) if filepath @file = File.new(filepath, 'w') @file.autoclose = false end @root = nil @nodes = nil @formats = [] on_root {} end |
Instance Method Details
#add_format(format) ⇒ Object
22 23 24 |
# File 'lib/bliss/parser.rb', line 22 def add_format(format) @formats.push(format) end |
#check_unhandled_bytes ⇒ Object
86 87 88 89 90 91 92 93 |
# File 'lib/bliss/parser.rb', line 86 def check_unhandled_bytes if @unhandled_bytes > @max_unhandled_bytes if @on_max_unhandled_bytes @on_max_unhandled_bytes.call @on_max_unhandled_bytes = nil end end end |
#check_unhandled_bytes? ⇒ Boolean
102 103 104 |
# File 'lib/bliss/parser.rb', line 102 def check_unhandled_bytes? @max_unhandled_bytes ? true : false end |
#close ⇒ Object
110 111 112 |
# File 'lib/bliss/parser.rb', line 110 def close @parser_machine.close end |
#exceeded? ⇒ Boolean
95 96 97 98 99 100 |
# File 'lib/bliss/parser.rb', line 95 def exceeded? return false if not check_unhandled_bytes? if @unhandled_bytes > @max_unhandled_bytes return true end end |
#file_close ⇒ Object
215 216 217 218 219 |
# File 'lib/bliss/parser.rb', line 215 def file_close if @file @file.close end end |
#formats_details ⇒ Object
30 31 32 33 34 |
# File 'lib/bliss/parser.rb', line 30 def formats_details @formats.each do |format| puts format.details.inspect end end |
#handle_wait_tag_close(chunk) ⇒ Object
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 |
# File 'lib/bliss/parser.rb', line 199 def handle_wait_tag_close(chunk) begin last_index = chunk.index(@wait_tag_close) if last_index last_index += 4 @file << chunk[0..last_index] @file << "</#{self.root}>" # TODO set this by using actual depth, so all tags get closed secure_close else @file << chunk end rescue secure_close end end |
#load_constraints_on_parser_machine ⇒ Object
26 27 28 |
# File 'lib/bliss/parser.rb', line 26 def load_constraints_on_parser_machine @parser_machine.constraints(@formats.collect(&:constraints).flatten) end |
#on_max_unhandled_bytes(bytes, &block) ⇒ Object
67 68 69 70 |
# File 'lib/bliss/parser.rb', line 67 def on_max_unhandled_bytes(bytes, &block) @max_unhandled_bytes = bytes @on_max_unhandled_bytes = block end |
#on_root(&block) ⇒ Object
deprecate this, use depth at on_tag_open or on_tag_close instead
37 38 39 40 41 42 43 |
# File 'lib/bliss/parser.rb', line 37 def on_root(&block) return false if not block.is_a? Proc @parser_machine.on_root { |root| @root = root block.call(root) } end |
#on_tag_close(element = '.', &block) ⇒ Object
58 59 60 61 62 63 64 65 |
# File 'lib/bliss/parser.rb', line 58 def on_tag_close(element='.', &block) overriden_block = Proc.new { |hash, depth| reset_unhandled_bytes block.call(hash, depth) } @parser_machine.on_tag_close(element, overriden_block) end |
#on_tag_open(element = '.', &block) ⇒ Object
45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/bliss/parser.rb', line 45 def on_tag_open(element='.', &block) return false if block.arity != 1 overriden_block = Proc.new { |depth| if not element == 'default' reset_unhandled_bytes end block.call(depth) } @parser_machine.on_tag_open(element, overriden_block) end |
#on_timeout(seconds, &block) ⇒ Object
72 73 74 75 |
# File 'lib/bliss/parser.rb', line 72 def on_timeout(seconds, &block) @timeout = seconds @on_timeout = block end |
#parse ⇒ Object
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 |
# File 'lib/bliss/parser.rb', line 114 def parse reset_unhandled_bytes if check_unhandled_bytes? load_constraints_on_parser_machine EM.run do http = nil if @timeout http = EM::HttpRequest.new(@path, :connect_timeout => @timeout, :inactivity_timeout => @timeout).get else http = EM::HttpRequest.new(@path).get end @autodetect_compression = true compression = :none if @autodetect_compression http.headers do if (/^attachment.+filename.+\.gz/i === http.response_header['CONTENT_DISPOSITION']) or http.response_header.compressed? or ["application/octet-stream", "application/x-gzip"].include? http.response_header['CONTENT_TYPE'] @zstream = Zlib::Inflate.new(Zlib::MAX_WBITS+16) compression = :gzip end end end http.stream { |chunk| if chunk chunk.force_encoding('UTF-8') if check_unhandled_bytes? @unhandled_bytes += chunk.length check_unhandled_bytes end if not @parser_machine.is_closed? begin case compression when :gzip chunk = @zstream.inflate(chunk) chunk.force_encoding('UTF-8') end @push_parser << chunk if @file @file << chunk end rescue Nokogiri::XML::SyntaxError => e #puts 'encoding error' if e..include?("encoding") raise Bliss::EncodingError, "Wrong encoding given" end end else if exceeded? #puts 'exceeded' secure_close else if @file if @wait_tag_close #puts 'handle wait' handle_wait_tag_close(chunk) #if @wait_tag_close else #puts 'secure close' secure_close end end end end end } http.errback { #puts 'errback' if @timeout @on_timeout.call end secure_close } http.callback { #if @file # @file.close #end #EM.stop secure_close } end file_close end |
#reset_unhandled_bytes ⇒ Object
81 82 83 84 |
# File 'lib/bliss/parser.rb', line 81 def reset_unhandled_bytes return false if not check_unhandled_bytes? @unhandled_bytes = 0 end |
#root ⇒ Object
106 107 108 |
# File 'lib/bliss/parser.rb', line 106 def root @root end |
#secure_close ⇒ Object
221 222 223 224 225 226 227 228 229 230 231 |
# File 'lib/bliss/parser.rb', line 221 def secure_close begin if @zstream @zstream.close end rescue ensure EM.stop #puts "Closed secure." end end |
#wait_tag_close(element) ⇒ Object
77 78 79 |
# File 'lib/bliss/parser.rb', line 77 def wait_tag_close(element) @wait_tag_close = "</#{element}>" end |