Class: Omnidocx::Docx
- Inherits:
-
Object
- Object
- Omnidocx::Docx
- Defined in:
- lib/omnidocx.rb
Constant Summary collapse
- DOCUMENT_FILE_PATH =
'word/document.xml'
- RELATIONSHIP_FILE_PATH =
'word/_rels/document.xml.rels'
- CONTENT_TYPES_FILE =
'[Content_Types].xml'
- HEADER_RELS_FILE_PATH =
'word/_rels/header1.xml.rels'
- FOOTER_RELS_FILE_PATH =
'word/_rels/footer1.xml.rels'
- STYLES_FILE_PATH =
"word/styles.xml"
- HEADER_FILE_PATH =
"word/header1.xml"
- FOOTER_FILE_PATH =
"word/footer1.xml"
- MEDIA_TYPE =
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"
- EMUSPERINCH =
914400
- EMUSPERCM =
360000
- HORIZONTAL_DPI =
115
- VERTICAL_DPI =
117
- NAMESPACES =
{ "w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main", "wp": "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing", "a": "http://schemas.openxmlformats.org/drawingml/2006/main", "pic": "http://schemas.openxmlformats.org/drawingml/2006/picture", "r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships" }
- IMAGE_ELEMENT =
'<w:p xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" w:rsidR="00F127EA" w:rsidRDefault="00F127EA" w:rsidP="00BF4C96"><w:pPr><w:jc w:val="center"/></w:pPr><w:r><w:rPr><w:noProof/><w:lang w:eastAsia="en-IN"/></w:rPr><w:drawing><wp:inline xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" distT="0" distB="0" distL="0" distR="0"><wp:extent cx="" cy=""/><wp:effectExtent l="0" t="0" r="2540" b="1905"/><wp:docPr id="" name=""/><wp:cNvGraphicFramePr><a:graphicFrameLocks xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" noChangeAspect="1"/></wp:cNvGraphicFramePr><a:graphic xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"><a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture"><pic:pic xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture"><pic:nvPicPr><pic:cNvPr id="" name=""/><pic:cNvPicPr/></pic:nvPicPr><pic:blipFill><a:blip xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" r:embed=""><a:extLst><a:ext uri="{28A0092B-C50C-407E-A947-70E740481C1C}"><a14:useLocalDpi xmlns:a14="http://schemas.microsoft.com/office/drawing/2010/main" val="0"/></a:ext></a:extLst></a:blip><a:stretch><a:fillRect/></a:stretch></pic:blipFill><pic:spPr><a:xfrm><a:off x="0" y="0"/><a:ext cx="" cy=""/></a:xfrm><a:prstGeom prst="rect"><a:avLst/></a:prstGeom></pic:spPr></pic:pic></a:graphicData></a:graphic></wp:inline></w:drawing></w:r></w:p>'
Class Method Summary collapse
- .merge_documents(documents_to_merge = [], final_path, page_break) ⇒ Object
- .replace_doc_content(replacement_hash = {}, template_path, final_path) ⇒ Object
- .replace_footer_content(replacement_hash = {}, template_path, final_path) ⇒ Object
- .replace_header_content(replacement_hash = {}, template_path, final_path) ⇒ Object
- .write_images_to_doc(images_to_write = [], doc_path, final_path) ⇒ Object
Class Method Details
.merge_documents(documents_to_merge = [], final_path, page_break) ⇒ Object
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 |
# File 'lib/omnidocx.rb', line 177 def self.merge_documents(documents_to_merge = [], final_path, page_break) temp_file = Tempfile.new('docxedit-') documents_to_merge_count = documents_to_merge.count if documents_to_merge_count < 2 return "Pass atleast two documents to be merged" #minimum two documents required to merge end #first document to which the others will be appended (header/footer will be picked from this document) @main_document_zip = Zip::File.new(documents_to_merge.first) @main_document_xml = Nokogiri::XML(@main_document_zip.read(DOCUMENT_FILE_PATH)) @main_body = @main_document_xml.xpath("//w:body") @rel_doc = "" @cont_type_doc = "" @style_doc = "" doc_cnt = 0 #cnt variable to construct relationship ids, taken a high value 100 to avoid duplication cnt = 100 tbl_cnt = 10 #hash to store information about the media files and their corresponding new names media_hash = {} #rid_hash to store relationship information rid_hash = {} #table hash to store information if any tables present table_hash = {} #head_foot_media hash to store if any media files present in header/footer head_foot_media = {} #a counter for docPr element in the main document body docPr_id = 100 #array to store content type information about media extensions default_extensions = [] #array to store override content type information override_partnames = [] #array to store information about additional content types other than the ones present in the first(main) document additional_cont_type_entries = [] # prepare initial set of data from first document @main_document_zip.entries.each do |zip_entrie| in_stream = zip_entrie.get_input_stream.read #Relationship XML @rel_doc = Nokogiri::XML(in_stream) if zip_entrie.name == RELATIONSHIP_FILE_PATH #Styles XML to be updated later on with the additional tables info @style_doc = Nokogiri::XML(in_stream) if zip_entrie.name == STYLES_FILE_PATH #Content types XML to be updated later on with the additional media type info if zip_entrie.name == CONTENT_TYPES_FILE @cont_type_doc = Nokogiri::XML in_stream default_nodes = @cont_type_doc.css "Default" override_nodes = @cont_type_doc.css "Override" default_nodes.each { |node| default_extensions << node["Extension"] } override_nodes.each { |node| override_partnames << node["PartName"] } end end #opening a new zip for the final document Zip::OutputStream.open(temp_file.path) do |zos| documents_to_merge.each do |doc_path| media_hash["doc#{doc_cnt}"] = {} rid_hash["doc#{doc_cnt}"] = {} head_foot_media["doc#{doc_cnt}"] = [] table_hash["doc#{doc_cnt}"] = {} zip_file = Zip::File.new(doc_path) zip_file.entries.each do |e| if [HEADER_RELS_FILE_PATH, FOOTER_RELS_FILE_PATH].include?(e.name) hf_xml = Nokogiri::XML(e.get_input_stream.read) hf_xml.css("Relationship").each do |rel_node| #media file names in header & footer need not be changed as they will be picked from the first document only and not the subsequent documents, so no chance of duplication head_foot_media["doc#{doc_cnt}"] << rel_node["Target"].gsub("media/", "") end end if e.name == CONTENT_TYPES_FILE cont_type_xml = Nokogiri::XML(e.get_input_stream.read) default_nodes = cont_type_xml.css "Default" override_nodes = cont_type_xml.css "Override" default_nodes.each do |node| #checking if extension type already present in the content types xml extracted from the first document if !default_extensions.include?(node["Extension"]) && !node.to_xml.empty? additional_cont_type_entries << node default_extensions << node["Extension"] #extra extension type to be added to the content types XML end end override_nodes.each do |node| #checking if override content type info already present in the content types xml extracted from the first document if !override_partnames.include?(node["PartName"]) && !node.to_xml.empty? additional_cont_type_entries << node override_partnames << node["Partname"] #extra content type info to be added to the content types XML end end end end zip_file.entries.each do |e| unless e.name == DOCUMENT_FILE_PATH || [RELATIONSHIP_FILE_PATH, CONTENT_TYPES_FILE, STYLES_FILE_PATH].include?(e.name) if e.name.include?("word/media/image") # media files from header & footer from first document shouldn't be changed if head_foot_media["doc#{doc_cnt}"].include?(e.name.gsub("word/media/", "")) e_name = e.name else e_name = e.name.gsub(/image[0-9]*./, "image#{cnt}.") #storing the old media file name to new media file name to mapping in the media hash media_hash["doc#{doc_cnt}"][e.name.gsub("word/media/", "")] = cnt cnt += 1 end zos.put_next_entry(e_name) zos.print e.get_input_stream.read else #writing the files not needed to be edited back to the new zip (only from the first document, so as to avoid duplication) if doc_cnt == 0 zos.put_next_entry(e.name) zos.print e.get_input_stream.read end end end end #updating the stlye ids in the table elements present in the document content XML doc_content = doc_cnt == 0 ? @main_body : Nokogiri::XML(zip_file.read(DOCUMENT_FILE_PATH)) doc_content.xpath("//w:tbl").each do |tbl_node| style_last = tbl_node.xpath('.//w:tblStyle').last unless style_last.nil? val_attr = style_last.attributes['val'] table_hash["doc#{doc_cnt}"][val_attr.value.to_s] = tbl_cnt val_attr.value = val_attr.value.gsub(/[0-9]+/, tbl_cnt.to_s) tbl_cnt += 1 end end zip_file.entries.each do |e| #updating the relationship ids with the new media file names in the relationships XML if e.name == RELATIONSHIP_FILE_PATH rel_xml = doc_cnt == 0 ? @rel_doc : Nokogiri::XML(e.get_input_stream.read) rel_xml.css("Relationship").each do |node| next unless node.values.to_s.include?("image") i = media_hash["doc#{doc_cnt}"][node['Target'].to_s.gsub("media/", "")] target_val = node["Target"].gsub(/image[0-9]*./, "image#{i}.") rid_hash["doc#{doc_cnt}"][node['Id'].to_s] = i.to_s id_attr = node.attributes["Id"] new_id = id_attr.value.gsub(/[0-9]+/, i.to_s) if doc_cnt == 0 node["Target"] = target_val id_attr.value = new_id else # adding the extra relationship nodes for the media files to the relationship XML new_rel_node = "<Relationship Id=#{new_id} Type=#{node["Type"]} Target=#{target_val} />" @rel_doc.at('Relationships').add_child(new_rel_node) end end end #adding the table style information to the styles xml, if any tables present in the document being merged if e.name == STYLES_FILE_PATH style_xml = doc_cnt == 0 ? @style_doc : Nokogiri::XML(e.get_input_stream.read) table_nodes = style_xml.xpath('//w:style').select{ |n| n.attributes["type"].value == "table" } table_nodes = table_nodes.select{ |n| n.attributes["styleId"].value != "TableNormal" } if doc_cnt != 0 table_nodes.each do |table_node| style_id_attr = table_node.attributes['styleId'] tab_val = table_hash["doc#{doc_cnt}"][style_id_attr.value.to_s] style_id_attr.value = style_id_attr.value.gsub(/[0-9]+/, tab_val.to_s) #adding extra table style nodes to the styles xml, if any tables present in the document being merged @style_doc.xpath("//w:styles").children.last.add_next_sibling(table_node.to_xml) if doc_cnt != 0 end end end #updting the id and rid values for every drawing element in the document XML with the new counters doc_content.xpath("//w:drawing").each do |dr_node| docPr_node = dr_node.xpath(".//wp:docPr").last docPr_node['id'] = docPr_id.to_s docPr_id += 1 blip_node = dr_node.xpath(".//a:blip", NAMESPACES).last # not all <w:drawing> are images and only image has <a:blip> next if blip_node.nil? = blip_node.attributes["embed"] i = rid_hash["doc#{doc_cnt}"][.value] .value = .value.gsub(/[0-9]+/, i) end if doc_cnt > 0 #pulling out the <w:sectPr> element from the document body to be appended to the main document's body body_nodes = doc_content.xpath('//w:body').children[0..-2] #appending the body_nodes to main document's body @main_body.children.last.add_previous_sibling(body_nodes.to_xml) end #adding a page break after each documents being merged if page_break && doc_cnt < documents_to_merge_count - 1 @main_body.children.last.add_previous_sibling('<w:p><w:r><w:br w:type="page"/></w:r></w:p>') end doc_cnt += 1 end #writing the updated styles XML to the new zip zos.put_next_entry(STYLES_FILE_PATH) zos.print @style_doc.to_xml #writing the updated relationships XML to the new zip zos.put_next_entry(RELATIONSHIP_FILE_PATH) zos.print @rel_doc.to_xml zos.put_next_entry(CONTENT_TYPES_FILE) additional_cont_type_entries.each do |node| #adding addtional content type nodes to the content type XML @cont_type_doc.at("Types").add_child(node) end #writing the updated content types XML to the new zip zos.print @cont_type_doc.to_xml #writing the updated document content XML to the new zip zos.put_next_entry(DOCUMENT_FILE_PATH) zos.print @main_document_xml.to_xml end #moving the temporary docx file to the final_path specified by the user FileUtils.mv(temp_file.path, final_path) end |
.replace_doc_content(replacement_hash = {}, template_path, final_path) ⇒ Object
408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 |
# File 'lib/omnidocx.rb', line 408 def self.replace_doc_content(replacement_hash={}, template_path, final_path) @template_zip = Zip::File.new(template_path) @template_content = @template_zip.read(DOCUMENT_FILE_PATH) #replacing the keys with values in the document content xml replacement_hash.each do |key,value| @template_content.force_encoding("UTF-8").gsub!(key,value) end temp_file = Tempfile.new('docxedit-') Zip::OutputStream.open(temp_file.path) do |zos| @template_zip.entries.each do |e| unless e.name == DOCUMENT_FILE_PATH #writing the files not needed to be edited back to the new zip zos.put_next_entry(e.name) zos.print e.get_input_stream.read end end #writing the updated document content xml to the new zip zos.put_next_entry DOCUMENT_FILE_PATH zos.print @template_content end #moving the temporary docx file to the final_path specified by the user FileUtils.mv(temp_file.path, final_path) end |
.replace_footer_content(replacement_hash = {}, template_path, final_path) ⇒ Object
473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 |
# File 'lib/omnidocx.rb', line 473 def self.(replacement_hash={}, template_path, final_path) @template_zip = Zip::File.new(template_path) @footer_content = '' @template_zip.entries.each do |e| if e.name == FOOTER_FILE_PATH @footer_content = e.get_input_stream.read end end replacement_hash.each do |key,value| @footer_content.force_encoding("UTF-8").gsub!(key,value) end temp_file = Tempfile.new('docxedit-') Zip::OutputStream.open(temp_file.path) do |zos| @template_zip.entries.each do |e| unless e.name == FOOTER_FILE_PATH #writing the files not needed to be edited back to the new zip zos.put_next_entry(e.name) zos.print e.get_input_stream.read end end #writing the updated document content xml to the new zip zos.put_next_entry FOOTER_FILE_PATH zos.print @footer_content end #moving the temporary docx file to the final_path specified by the user FileUtils.mv(temp_file.path, final_path) end |
.replace_header_content(replacement_hash = {}, template_path, final_path) ⇒ Object
438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 |
# File 'lib/omnidocx.rb', line 438 def self.replace_header_content(replacement_hash={}, template_path, final_path) @template_zip = Zip::File.new(template_path) @header_content = '' @template_zip.entries.each do |e| if e.name == HEADER_FILE_PATH @header_content = e.get_input_stream.read end end replacement_hash.each do |key,value| @header_content.force_encoding("UTF-8").gsub!(key,value) end temp_file = Tempfile.new('docxedit-') Zip::OutputStream.open(temp_file.path) do |zos| @template_zip.entries.each do |e| unless e.name == HEADER_FILE_PATH #writing the files not needed to be edited back to the new zip zos.put_next_entry(e.name) zos.print e.get_input_stream.read end end #writing the updated document content xml to the new zip zos.put_next_entry HEADER_FILE_PATH zos.print @header_content end #moving the temporary docx file to the final_path specified by the user FileUtils.mv(temp_file.path, final_path) end |
.write_images_to_doc(images_to_write = [], doc_path, final_path) ⇒ Object
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
# File 'lib/omnidocx.rb', line 38 def self.write_images_to_doc(images_to_write=[], doc_path, final_path) temp_file = Tempfile.new('docxedit-') #every docx file is ultimately a zip file with the extension as docx @document_zip = Zip::File.new(doc_path) #reading the document content xml from the zip @document_content = @document_zip.read(DOCUMENT_FILE_PATH) @document_xml = Nokogiri::XML @document_content #every docx file has one body tag which essentially contains all the content of the doc @body = @document_xml.xpath("//w:body") @rel_doc = "" @cont_type_doc = "" cnt = 20 media_hash = {} #to maintain a list of all the content type info to be added upon adding media with different extensions media_content_type_hash = {} @document_zip.entries.each do |e| if e.name == RELATIONSHIP_FILE_PATH in_stream = e.get_input_stream.read @rel_doc = Nokogiri::XML in_stream #Relationships XML end if e.name == CONTENT_TYPES_FILE in_stream = e.get_input_stream.read @cont_type_doc = Nokogiri::XML in_stream #Content types XML to be updated later on with the additional media type info end end Zip::OutputStream.open(temp_file.path) do |zos| @document_zip.entries.each do |e| unless [DOCUMENT_FILE_PATH, RELATIONSHIP_FILE_PATH, CONTENT_TYPES_FILE].include?(e.name) #writing the files not needed to be edited back to the new zip zos.put_next_entry(e.name) zos.print e.get_input_stream.read end end images_to_write.each_with_index do |img, index| data = '' #checking if image path is a url or a local path uri = URI.parse(img[:path]) if %w( http https ).include?(uri.scheme) data = Kernel.open(img[:path]).read rescue nil else File.open(img[:path], 'rb') do |f| data = f.read rescue nil end end #if image path is readable if !data.empty? img_url_no_params = img[:path].gsub(/\?.*/,'') extension = File.extname(img_url_no_params).split(".").last if !media_content_type_hash.keys.include?(extension.split(".").last) #making an entry for a new media type media_content_type_hash["#{extension}"] = MIME::Types.type_for(img_url_no_params)[0].to_s end zos.put_next_entry("word/media/image#{cnt}.#{extension}") zos.print data #storing the image in the new zip new_rel_node = Nokogiri::XML::Node.new("Relationship", @rel_doc) new_rel_node["Id"] = "rid#{cnt}" new_rel_node["Type"] = MEDIA_TYPE new_rel_node["Target"] = "media/image#{cnt}.#{extension}" @rel_doc.at('Relationships').add_child(new_rel_node) #adding a new relationship node to the relationships xml hdpi = img[:hdpi] || HORIZONTAL_DPI vdpi = img[:vdpi] || VERTICAL_DPI #calculating the width and height of the image in EMUs, the format accepted by docx files widthEmus = (img[:width].to_i / hdpi.to_i * EMUSPERINCH) heightEmus = (img[:height].to_i / vdpi.to_i * EMUSPERINCH) #creating a new drawing element with info like rid, height, width,etc. @image_element_xml = Nokogiri::XML IMAGE_ELEMENT @image_element_xml.xpath("//w:drawing", NAMESPACES).each do |dr_node| docPr = dr_node.xpath(".//wp:docPr", NAMESPACES).last docPr["name"] = "image#{cnt}.#{extension}" docPr["id"] = "#{cnt}" extent = dr_node.xpath(".//wp:extent", NAMESPACES).last extent["cx"] = widthEmus.to_s extent["cy"] = heightEmus.to_s ext = dr_node.xpath(".//a:ext", NAMESPACES).last ext["cx"] = widthEmus.to_s ext["cy"] = heightEmus.to_s pic_cNvPr = dr_node.xpath(".//pic:cNvPr", NAMESPACES).last pic_cNvPr["name"] = "image#{cnt}.#{extension}" pic_cNvPr["id"] = "#{cnt}" blip = dr_node.xpath(".//a:blip", NAMESPACES).last blip.attributes["embed"].value = "rid#{cnt}" end #appending the drawing element to the document's body @body.children.last.add_previous_sibling(@image_element_xml.xpath("//w:p").last.to_xml) media_hash[cnt] = index end cnt+=1 end #updating the content type info media_content_type_hash.each do |ext, cont_type| new_default_node = Nokogiri::XML::Node.new("Default", @cont_type_doc) new_default_node["Extension"] = ext new_default_node["ContentType"] = cont_type @cont_type_doc.at("Types").add_child(new_default_node) end #writing the content types xml to the new zip zos.put_next_entry CONTENT_TYPES_FILE zos.print @cont_type_doc.to_xml #writing the relationships xml to the new zip zos.put_next_entry RELATIONSHIP_FILE_PATH zos.print @rel_doc.to_xml #writing the updated document content xml to the new zip zos.put_next_entry DOCUMENT_FILE_PATH zos.print @document_xml.to_xml end #moving the temporary docx file to the final_path specified by the user FileUtils.mv(temp_file.path, final_path) end |