Class: Doc2Text::Markdown::DocxParser

Inherits:
Nokogiri::XML::SAX::Document
  • Object
show all
Defined in:
lib/doc2text/docx/markdown_docx_parser.rb

Instance Method Summary collapse

Constructor Details

#initialize(output, styles_xml_root = nil) ⇒ DocxParser

Returns a new instance of DocxParser.



6
7
8
9
10
# File 'lib/doc2text/docx/markdown_docx_parser.rb', line 6

def initialize(output, styles_xml_root = nil)
  @styles_xml_root = styles_xml_root
  @output = output
  @automatic_styles = {}
end

Instance Method Details

#characters(string) ⇒ Object



30
31
32
33
34
35
# File 'lib/doc2text/docx/markdown_docx_parser.rb', line 30

def characters(string)
  unless string.strip.empty?
    plain_text = Docx::XmlNodes::PlainText.new(string)
    @current_node.children << plain_text
  end
end

#closeObject



37
38
39
# File 'lib/doc2text/docx/markdown_docx_parser.rb', line 37

def close
  @output.close
end

#end_element_namespace(name, prefix = nil, uri = nil) ⇒ Object



22
23
24
25
26
27
28
# File 'lib/doc2text/docx/markdown_docx_parser.rb', line 22

def end_element_namespace(name, prefix = nil, uri = nil)
  if @current_node.parent and @current_node.parent.body?
    @output << @current_node.expand
    @current_node.delete
  end
  @current_node = @current_node.parent
end

#loggerObject



76
77
78
# File 'lib/doc2text/docx/markdown_docx_parser.rb', line 76

def logger
  @logger ||= Logger.new(STDOUT)
end


41
42
43
44
45
46
# File 'lib/doc2text/docx/markdown_docx_parser.rb', line 41

def print_tree(node)
  puts node
  node.children.each do |child|
    print_tree child
  end
end

#start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = []) ⇒ Object



12
13
14
15
16
17
18
19
20
# File 'lib/doc2text/docx/markdown_docx_parser.rb', line 12

def start_element_namespace(name ,attrs = [], prefix = nil, uri = nil, ns = [])
  unless @xml_root
    @xml_root = @current_node = Docx::XmlNodes::Node.create_node prefix, name, nil, attrs, self
  else
    new_node = Docx::XmlNodes::Node.create_node prefix, name, @current_node, attrs, self
    @current_node.children << new_node
    @current_node = new_node
  end
end

#xpath(string) ⇒ Object

Select nodes xpath style

  • supports selecting from the root node

Raises:



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/doc2text/docx/markdown_docx_parser.rb', line 50

def xpath(string)
  patterns = string.split '|'
  raise Doc2Text::XmlError, 'it does not support this xpath syntax' if patterns.length == 0
  result = []
  patterns.each do |pattern|
    if /^(\/[\w:\-]+)+$/ =~ pattern
      path = pattern.scan /[\w:\-]+/
      result += xpath_search_nodes(path, @xml_root)
      result += xpath_search_nodes(path, @styles_xml_root) if @styles_xml_root
    else
      raise Doc2Text::XmlError, 'it does not support this xpath syntax'
    end
  end
  result
end

#xpath_search_nodes(path, xml_root) ⇒ Object



66
67
68
69
70
71
72
73
74
# File 'lib/doc2text/docx/markdown_docx_parser.rb', line 66

def xpath_search_nodes(path, xml_root)
  seek_nodes = [xml_root]
  path.each_with_index do |xml_name, index|
    seek_nodes.select! { |node| node.xml_name == xml_name }
    seek_nodes = seek_nodes.map(&:children).flatten unless index == path.length - 1
    break if seek_nodes.empty?
  end
  seek_nodes
end