Class: OoxmlParser::DocumentStructure
- Inherits:
-
CommonDocumentStructure
- Object
- OOXMLDocumentObject
- CommonDocumentStructure
- OoxmlParser::DocumentStructure
- Defined in:
- lib/ooxml_parser/docx_parser/document_structure.rb
Overview
Basic class for DocumentStructure
Instance Attribute Summary collapse
-
#background ⇒ DocumentBackground
Background of document.
-
#comments ⇒ Comments
Comment of document.
-
#comments_document ⇒ CommentsDocument
Comments of whole document.
-
#comments_extended ⇒ CommentsExtended
Extended comments.
-
#default_paragraph_style ⇒ DocxParagraph
Default paragraph style.
-
#default_run_style ⇒ DocxParagraphRun
Default run style.
-
#default_table_paragraph_style ⇒ DocxParagraph
Default table paragraph style.
-
#default_table_run_style ⇒ DocxParagraphRun
Default table run style.
-
#document_properties ⇒ DocumentProperties
Properties of document.
-
#elements ⇒ Array<OOXMLDocumentObject>
List of elements.
-
#notes ⇒ Note
Notes of document.
-
#numbering ⇒ Numbering
Store numbering data.
-
#page_properties ⇒ PageProperties
Properties of document.
-
#relationships ⇒ Relationships
Relationships.
-
#settings ⇒ DocumentSettings
Settings.
-
#styles ⇒ Styles
Styles of document.
-
#theme ⇒ PresentationTheme
(also: #theme_colors)
Theme of docx.
Attributes inherited from CommonDocumentStructure
#content_types, #default_font_size, #default_font_style, #default_font_typeface, #file_path, #root_subfolder, #unpacked_folder, #xmls_stack
Attributes inherited from OOXMLDocumentObject
Instance Method Summary collapse
-
#==(other) ⇒ True, False
Compare this object to other.
-
#document_styles ⇒ Array<DocumentStyle>
Style of documents.
-
#element_by_description(location: :canvas, type: :docx_paragraph) ⇒ OOXMLDocumentObject
Get element by it’s type.
-
#initialize(params = {}) ⇒ DocumentStructure
constructor
A new instance of DocumentStructure.
-
#note_by_description(type) ⇒ Note
Get note by it’s description.
-
#outline(location: :canvas, type: :simple, levels_count: 1) ⇒ Array<String,String>
Return outline type.
-
#parse ⇒ DocumentStructure
Parse docx file.
-
#recognize_numbering(location: :canvas, type: :simple, paragraph_number: 0) ⇒ Array<String,String>
Detect numbering type.
Methods included from DocumentStructureHelpers
Methods included from DocumentStyleHelper
#based_on_style, #document_style_by_id, #document_style_by_name, #style_exist?
Methods included from DefaultStyleHelper
#parse_default_style, #parse_styles
Methods inherited from CommonDocumentStructure
#add_to_xmls_stack, #current_xml, #get_link_from_rels
Methods inherited from OOXMLDocumentObject
#boolean_attribute_value, #parse_xml, #with_data?
Methods included from OoxmlObjectAttributeHelper
#attribute_enabled?, #option_enabled?
Methods included from OoxmlDocumentObjectHelper
Constructor Details
#initialize(params = {}) ⇒ DocumentStructure
Returns a new instance of DocumentStructure.
57 58 59 60 61 62 63 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 57 def initialize(params = {}) @elements = [] @notes = [] @document_properties = DocumentProperties.new @comments = [] super end |
Instance Attribute Details
#background ⇒ DocumentBackground
Returns background of document.
29 30 31 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 29 def background @background end |
#comments ⇒ Comments
Returns comment of document.
33 34 35 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 33 def comments @comments end |
#comments_document ⇒ CommentsDocument
Returns comments of whole document.
45 46 47 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 45 def comments_document @comments_document end |
#comments_extended ⇒ CommentsExtended
Returns extended comments.
47 48 49 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 47 def comments_extended @comments_extended end |
#default_paragraph_style ⇒ DocxParagraph
Returns default paragraph style.
49 50 51 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 49 def default_paragraph_style @default_paragraph_style end |
#default_run_style ⇒ DocxParagraphRun
Returns default run style.
51 52 53 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 51 def default_run_style @default_run_style end |
#default_table_paragraph_style ⇒ DocxParagraph
Returns default table paragraph style.
53 54 55 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 53 def default_table_paragraph_style @default_table_paragraph_style end |
#default_table_run_style ⇒ DocxParagraphRun
Returns default table run style.
55 56 57 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 55 def default_table_run_style @default_table_run_style end |
#document_properties ⇒ DocumentProperties
Returns properties of document.
31 32 33 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 31 def document_properties @document_properties end |
#elements ⇒ Array<OOXMLDocumentObject>
Returns list of elements.
23 24 25 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 23 def elements @elements end |
#notes ⇒ Note
Returns notes of document.
27 28 29 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 27 def notes @notes end |
#numbering ⇒ Numbering
Returns store numbering data.
35 36 37 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 35 def numbering @numbering end |
#page_properties ⇒ PageProperties
Returns properties of document.
25 26 27 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 25 def page_properties @page_properties end |
#relationships ⇒ Relationships
Returns relationships.
41 42 43 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 41 def relationships @relationships end |
#settings ⇒ DocumentSettings
Returns settings.
43 44 45 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 43 def settings @settings end |
#styles ⇒ Styles
Returns styles of document.
37 38 39 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 37 def styles @styles end |
#theme ⇒ PresentationTheme Also known as: theme_colors
Returns theme of docx.
39 40 41 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 39 def theme @theme end |
Instance Method Details
#==(other) ⇒ True, False
Compare this object to other
70 71 72 73 74 75 76 77 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 70 def ==(other) @elements == other.elements && @page_properties == other.page_properties && @notes == other.notes && @background == other.background && @document_properties == other.document_properties && @comments == other.comments end |
#document_styles ⇒ Array<DocumentStyle>
Returns style of documents.
163 164 165 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 163 def document_styles styles.styles end |
#element_by_description(location: :canvas, type: :docx_paragraph) ⇒ OOXMLDocumentObject
Get element by it’s type
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 83 def element_by_description(location: :canvas, type: :docx_paragraph) case location when :canvas case type when :table elements[1].rows[0].cells[0].elements when :docx_paragraph, :simple, :paragraph elements when :shape elements[0].nonempty_runs.first.alternate_content.office2007_content.data.text_box else raise 'Wrong location(Need One of ":table", ":paragraph", ":shape")' end when :footer case type when :table note_by_description(:footer1).elements[0].rows[0].cells[0].elements when :docx_paragraph, :simple, :paragraph note_by_description(:footer1).elements when :shape note_by_description(:footer1).elements[0].nonempty_runs.first.alternate_content.office2007_content.data.text_box else raise 'Wrong location(Need One of ":table", ":simple", ":shape")' end when :header case type when :table note_by_description(:header1).elements[0].rows[0].cells[0].elements when :docx_paragraph, :simple, :paragraph note_by_description(:header1).elements when :shape note_by_description(:header1).elements[0].nonempty_runs.first.alternate_content.office2007_content.data.text_box else raise 'Wrong location(Need One of ":table", ":simple", ":shape")' end when :comment comments[0].paragraphs else raise 'Wrong global location(Need One of ":canvas", ":footer", ":header", ":comment")' end end |
#note_by_description(type) ⇒ Note
Get note by it’s description
128 129 130 131 132 133 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 128 def note_by_description(type) notes.each do |note| return note if note.type.to_sym == type end raise 'There isn\'t this type of the note' end |
#outline(location: :canvas, type: :simple, levels_count: 1) ⇒ Array<String,String>
Return outline type
152 153 154 155 156 157 158 159 160 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 152 def outline(location: :canvas, type: :simple, levels_count: 1) elements = element_by_description(location: location, type: type) set = [] levels_count.times do |col| set[0] = elements[col].numbering.abstruct_numbering.level_list[col].numbering_format.value set[1] = elements[col].numbering.abstruct_numbering.level_list[col].text.value end set end |
#parse ⇒ DocumentStructure
Parse docx file
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 169 def parse @content_types = ContentTypes.new(parent: self).parse @root_subfolder = 'word/' @comments = [] @default_paragraph_style = DocxParagraph.new @default_run_style = DocxParagraphRun.new(parent: self) @theme = PresentationTheme.new(parent: self).parse('word/theme/theme1.xml') @relationships = Relationships.new(parent: self).parse_file("#{root_object.unpacked_folder}word/_rels/document.xml.rels") parse_styles number = 0 root_object.add_to_xmls_stack('word/document.xml') doc = parse_xml(root_object.current_xml) doc.search('//w:document').each do |document| document.xpath('w:background').each do |background| @background = DocumentBackground.new(parent: self).parse(background) end document.xpath('w:body').each do |body| body.xpath('*').each do |element| case element.name when 'p' child = element.child unless child.nil? && @elements.last.instance_of?(Table) paragraph_style = default_paragraph_style.dup.parse(element, number, default_run_style, parent: self) number += 1 @elements << paragraph_style.dup end when 'tbl' table = Table.new(parent: self).parse(element, number, TableProperties.new) number += 1 @elements << table when 'sdt' @elements << StructuredDocumentTag.new(parent: self).parse(element) end end body.xpath('w:sectPr').each do |sect_pr| @page_properties = PageProperties.new(parent: self).parse(sect_pr, default_paragraph_style, default_run_style) @notes = page_properties.notes # keep copy of notes to compatibility with previous docx models end end end root_object.xmls_stack.pop @document_properties = DocumentProperties.new(parent: self).parse @comments = Comments.new(parent: self).parse @comments_extended = CommentsExtended.new(parent: self).parse @comments_document = Comments.new(parent: self, file: "#{root_object.unpacked_folder}word/#{relationships.target_by_type('commentsDocument').first}") .parse @settings = DocumentSettings.new(parent: self).parse self end |
#recognize_numbering(location: :canvas, type: :simple, paragraph_number: 0) ⇒ Array<String,String>
Detect numbering type
140 141 142 143 144 145 |
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 140 def recognize_numbering(location: :canvas, type: :simple, paragraph_number: 0) elements = element_by_description(location: location, type: type) lvl_text = elements[paragraph_number].numbering.abstruct_numbering.level_list[0].text.value num_format = elements[paragraph_number].numbering.abstruct_numbering.level_list[0].numbering_format.value [num_format, lvl_text] end |