Class: Xsv::SaxParser
- Inherits:
-
Object
- Object
- Xsv::SaxParser
- Defined in:
- lib/xsv/sax_parser.rb
Direct Known Subclasses
RelationshipsHandler, SharedStringsParser, SheetBoundsHandler, SheetRowsHandler, SheetsIdsHandler, StylesHandler
Constant Summary collapse
- ATTR_REGEX =
/((\p{Alnum}+)="(.*?)")/mn
Instance Method Summary collapse
Instance Method Details
#parse(io) ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
# File 'lib/xsv/sax_parser.rb', line 9 def parse(io) responds_to_end_element = respond_to?(:end_element) responds_to_characters = respond_to?(:characters) state = :look_start if io.is_a?(String) pbuf = io.dup eof_reached = true must_read = false else pbuf = String.new(capacity: 8192) eof_reached = false must_read = true end loop do if must_read begin pbuf << io.sysread(2048) rescue EOFError, TypeError # EOFError is thrown by IO, rubyzip returns nil from sysread on EOF eof_reached = true end must_read = false end if state == :look_start if (o = pbuf.index("<")) chars = pbuf.slice!(0, o + 1).chop!.force_encoding("utf-8") if responds_to_characters && !chars.empty? characters(CGI.unescapeHTML(chars)) end state = :look_end elsif eof_reached # Discard anything after the last tag in the document break else # Continue loop to read more data into the buffer must_read = true next end end if state == :look_end if (o = pbuf.index(">")) if (s = pbuf.index(" ")) && s < o tag_name = pbuf.slice!(0, s + 1).chop! args = pbuf.slice!(0, o - s) else tag_name = pbuf.slice!(0, o + 1).chop! args = nil end is_close_tag = tag_name.delete_prefix!("/") # Strip XML namespace from tag if (offset = tag_name.index(":")) tag_name.slice!(0, offset + 1) end if is_close_tag end_element(tag_name) if responds_to_end_element elsif args.nil? start_element(tag_name, nil) else attribute_buffer = {} attributes = args.scan(ATTR_REGEX) while (attr = attributes.delete_at(0)) attribute_buffer[attr[1].to_sym] = attr[2] end start_element(tag_name, attribute_buffer) end_element(tag_name) if responds_to_end_element && args.end_with?("/") end state = :look_start elsif eof_reached raise Xsv::Error, "Malformed XML document, looking for end of tag beyond EOF" else must_read = true end end end end |