Class: Doc2Text::Markdown::DocxParser
- Inherits:
-
Nokogiri::XML::SAX::Document
- Object
- Nokogiri::XML::SAX::Document
- Doc2Text::Markdown::DocxParser
- Defined in:
- lib/doc2text/docx/markdown_docx_parser.rb
Instance Method Summary collapse
- #characters(string) ⇒ Object
- #close ⇒ Object
- #end_element_namespace(name, prefix = nil, uri = nil) ⇒ Object
-
#initialize(output, styles_xml_root = nil) ⇒ DocxParser
constructor
A new instance of DocxParser.
- #logger ⇒ Object
- #print_tree(node) ⇒ Object
- #start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = []) ⇒ Object
-
#xpath(string) ⇒ Object
Select nodes xpath style - supports selecting from the root node.
- #xpath_search_nodes(path, xml_root) ⇒ Object
Constructor Details
#initialize(output, styles_xml_root = nil) ⇒ DocxParser
Returns a new instance of DocxParser.
6 7 8 9 10 |
# File 'lib/doc2text/docx/markdown_docx_parser.rb', line 6 def initialize(output, styles_xml_root = nil) @styles_xml_root = styles_xml_root @output = output @automatic_styles = {} end |
Instance Method Details
#characters(string) ⇒ Object
30 31 32 33 34 35 |
# File 'lib/doc2text/docx/markdown_docx_parser.rb', line 30 def characters(string) unless string.strip.empty? plain_text = Docx::XmlNodes::PlainText.new(string) @current_node.children << plain_text end end |
#close ⇒ Object
37 38 39 |
# File 'lib/doc2text/docx/markdown_docx_parser.rb', line 37 def close @output.close end |
#end_element_namespace(name, prefix = nil, uri = nil) ⇒ Object
22 23 24 25 26 27 28 |
# File 'lib/doc2text/docx/markdown_docx_parser.rb', line 22 def end_element_namespace(name, prefix = nil, uri = nil) if @current_node.parent and @current_node.parent.body? @output << @current_node. @current_node.delete end @current_node = @current_node.parent end |
#logger ⇒ Object
76 77 78 |
# File 'lib/doc2text/docx/markdown_docx_parser.rb', line 76 def logger @logger ||= Logger.new(STDOUT) end |
#print_tree(node) ⇒ Object
41 42 43 44 45 46 |
# File 'lib/doc2text/docx/markdown_docx_parser.rb', line 41 def print_tree(node) puts node node.children.each do |child| print_tree child end end |
#start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = []) ⇒ Object
12 13 14 15 16 17 18 19 20 |
# File 'lib/doc2text/docx/markdown_docx_parser.rb', line 12 def start_element_namespace(name ,attrs = [], prefix = nil, uri = nil, ns = []) unless @xml_root @xml_root = @current_node = Docx::XmlNodes::Node.create_node prefix, name, nil, attrs, self else new_node = Docx::XmlNodes::Node.create_node prefix, name, @current_node, attrs, self @current_node.children << new_node @current_node = new_node end end |
#xpath(string) ⇒ Object
Select nodes xpath style
-
supports selecting from the root node
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/doc2text/docx/markdown_docx_parser.rb', line 50 def xpath(string) patterns = string.split '|' raise Doc2Text::XmlError, 'it does not support this xpath syntax' if patterns.length == 0 result = [] patterns.each do |pattern| if /^(\/[\w:\-]+)+$/ =~ pattern path = pattern.scan /[\w:\-]+/ result += xpath_search_nodes(path, @xml_root) result += xpath_search_nodes(path, @styles_xml_root) if @styles_xml_root else raise Doc2Text::XmlError, 'it does not support this xpath syntax' end end result end |
#xpath_search_nodes(path, xml_root) ⇒ Object
66 67 68 69 70 71 72 73 74 |
# File 'lib/doc2text/docx/markdown_docx_parser.rb', line 66 def xpath_search_nodes(path, xml_root) seek_nodes = [xml_root] path.each_with_index do |xml_name, index| seek_nodes.select! { |node| node.xml_name == xml_name } seek_nodes = seek_nodes.map(&:children).flatten unless index == path.length - 1 break if seek_nodes.empty? end seek_nodes end |