Class: Docx::Cloner::DocxTool

Inherits:

Object

Object
Docx::Cloner::DocxTool

show all

Defined in:: lib/docx/cloner.rb

Instance Method Summary collapse

#clone_tag_scope(node, times) ⇒ Object

clone标签所在的范围，例如表格的行返回一组新的行对象集合.
#generate_paragraph(node) ⇒ Object
#get_tag_scope(tag, type) ⇒ Object

获取标签所在的范围，例如表格的行简单的考虑，则tags中第一个标签位置即可确定为scope位置复杂的考虑，则可根据tags中所有标签的共同根（如<w:tr>）确定scope位置，这种情况将允许标签名拥有自己的作用域这里仅做简单的考虑.
#include_text_tag?(tag) ⇒ Boolean
#initialize(file) ⇒ DocxTool constructor

A new instance of DocxTool.
#read_text_tag_xml(tag) ⇒ Object
#release ⇒ Object
#replace_tag(tag, value, node = nil) ⇒ Object

在指定的范围内替换标签.
#save(path) ⇒ Object
#set_row_tags(tags, values, type) ⇒ Object

根据行标签设置，替换成多行数据，这里考虑表格的一般情况.
#set_text_tag(tag, value) ⇒ Object

替换单个标签为指定值.

Constructor Details

#initialize(file) ⇒ `DocxTool`

Returns a new instance of DocxTool.

# File 'lib/docx/cloner.rb', line 11

def initialize(file)
  @zip = Zip::ZipFile.open(file)
  _xml = @zip.read("word/document.xml")
  @doc = Nokogiri::XML(_xml)
  @global_paragraph = generate_paragraph @doc

  @replace = {}

  #puts @paragraph
end

Instance Method Details

#clone_tag_scope(node, times) ⇒ `Object`

clone标签所在的范围，例如表格的行返回一组新的行对象集合

# File 'lib/docx/cloner.rb', line 176

def clone_tag_scope node, times
  #puts "clone #{node.node_name} #{times} times"
  nodes = Array.new times
  #puts "被克隆节点：#{node.path}"
  times.downto(1).each do |_i|
    i = _i.to_i - 1
    nodes[i] = node.dup
    node.add_next_sibling nodes[i]
    #puts "第#{i+1}个节点克隆：#{nodes[i].path}"
  end
  return nodes
end

#generate_paragraph(node) ⇒ `Object`

# File 'lib/docx/cloner.rb', line 100

def generate_paragraph node
  paragraphs = []
  #puts "查找范围：#{node.path}"
  wp_set = node.xpath(".//w:p")
  #puts "#{wp_set.size}'s wp"
  wp_set.each do |wp|
    p = {text_content: '', text_run: []}
    wp.xpath(".//w:t").each do |t|
      p[:text_content] << t.content
      p[:text_run] << t
      #puts "node name: #{t.node_name}" if t.content.size > 0
      #puts t.path
    end
    paragraphs << p
    #puts p[:text_content].include? '$名字$'
  end
  return paragraphs
end

#get_tag_scope(tag, type) ⇒ `Object`

获取标签所在的范围，例如表格的行简单的考虑，则tags中第一个标签位置即可确定为scope位置复杂的考虑，则可根据tags中所有标签的共同根（如<w:tr>）确定scope位置，这种情况将允许标签名拥有自己的作用域这里仅做简单的考虑

# File 'lib/docx/cloner.rb', line 86

def get_tag_scope tag, type
  @global_paragraph.each do |p|
    if p[:text_content].include? tag #这里是简单的考虑，即使行内标签也必须全局唯一
      node = p[:text_run].first
      while true
        return unless node                      #查找父节点失败
        return node if node.node_name == type   #查找到匹配的父节点
        node = node.parent
      end
    end
  end
  return false
end

#include_text_tag?(tag) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/docx/cloner.rb', line 43

def include_text_tag?(tag)
  @global_paragraph.each do |p|
    if p[:text_content].include? tag
      return true
    end
  end
  return false
end

#read_text_tag_xml(tag) ⇒ `Object`

# File 'lib/docx/cloner.rb', line 52

def read_text_tag_xml(tag)
  @global_paragraph.each do |p|
    if p[:text_content].include? tag
      from = p[:text_content].index tag
      to = from + tag.size - 1
      #puts "from:#{from}, to:#{to}"
      pos = 0
      dest = ""
      p[:text_run].each do |wt|
        #puts "pos:#{pos}"
        if pos >= from && pos < to
          dest << wt.parent.to_xml << "\n"
        end
        if pos >= to
          return dest
        end
        pos += wt.content.size
      end
      return dest
    end

  end
  return ''
end

#release ⇒ `Object`



22
23
24

# File 'lib/docx/cloner.rb', line 22

def release
  @zip.close
end

#replace_tag(tag, value, node = nil) ⇒ `Object`

在指定的范围内替换标签

# File 'lib/docx/cloner.rb', line 120

def replace_tag tag, value, node=nil
  paragraphs = node ? generate_paragraph(node) : @global_paragraph 
  #puts paragraphs
  paragraphs.each do |p|
    #puts p[:text_content]
    if p[:text_content].include? tag
      from = p[:text_content].index tag
      to = from + tag.size - 1
      #puts "tag:#{tag} | from:#{from}, to:#{to} >> #{p[:text_content]}"
      pos = 0
      dest = []
      #puts p[:text_run]
      p[:text_run].each do |wt|
        #puts "pos:#{pos}"
        #通常情况下，msword会把标签拆分成多个xml标签，如'{name}'被拆分成'<wt>{</wt>'和'<wt>name}</wt>'
        #这可能跟编辑器有关，在处理中文时，这是一种常见的情形
        if pos+1 >= from && pos <= to #通过pos+1修正临界点问题
          dest << wt
        end
        if pos > to
          break
        end
        pos += wt.content.size

        #这里要处理一下标签没有被拆分的情形，而是作为纯文本被包含在某个标签中
        #例如'{name}'包含在'<wt>my {name}</wt>'中
        #puts "pos:#{pos}, to:#{to}, dest.size:#{dest.size}"
        #puts wt
        if pos >= to && dest.size == 0
          #puts "simple_type | pos:#{pos}, to:#{to} >> #{wt.content}"
          wt.inner_html = wt.content.sub(tag, value)
          return true #如果是这种简单情形，就不再需要后续处理了
        end
      end

      if dest.size > 0
        #puts "被替换节点：#{dest.first.path}"
        dest.first.content = value
        dest[1..-1].each do |node|
          #puts node
          node.remove
        end
        #puts "\n"
        return true
      else
        return false
      end
    end

  end
  return false

end

#save(path) ⇒ `Object`

# File 'lib/docx/cloner.rb', line 26

def save(path)
  @replace["word/document.xml"] = @doc.serialize :save_with => 0

  Zip::ZipFile.open(path, Zip::ZipFile::CREATE) do |out|
    @zip.each do |entry|
      out.get_output_stream(entry.name) do |o|
        if @replace[entry.name]
          o.write(@replace[entry.name])
        else
          o.write(@zip.read(entry.name))
        end
      end
    end
  end
end

#set_row_tags(tags, values, type) ⇒ `Object`

根据行标签设置，替换成多行数据，这里考虑表格的一般情况

# File 'lib/docx/cloner.rb', line 190

def set_row_tags tags, values, type
  #puts "tags:#{tags}, values:#{values}, type:#{type}"
  #找到标签所在行的父节点
  tag_scope_node = get_tag_scope tags.first, type
  value_scope_nodes = clone_tag_scope tag_scope_node, values.size
  value_scope_nodes.each_with_index do |node, r|
    #puts "查找范围：#{node.path}"
    tags.each_with_index do |tag, c|
      replace_tag tag, values[r][c], node
    end
  end
  #清除标签
  tag_scope_node.remove
  return true
end

#set_text_tag(tag, value) ⇒ `Object`

替换单个标签为指定值



78
79
80

# File 'lib/docx/cloner.rb', line 78

def set_text_tag tag, value
  replace_tag tag, value
end

Class: Docx::Cloner::DocxTool

Instance Method Summary collapse

Constructor Details

#initialize(file) ⇒ DocxTool

Instance Method Details

#clone_tag_scope(node, times) ⇒ Object

#generate_paragraph(node) ⇒ Object

#get_tag_scope(tag, type) ⇒ Object

#include_text_tag?(tag) ⇒ Boolean

#read_text_tag_xml(tag) ⇒ Object

#release ⇒ Object

#replace_tag(tag, value, node = nil) ⇒ Object

#save(path) ⇒ Object

#set_row_tags(tags, values, type) ⇒ Object

#set_text_tag(tag, value) ⇒ Object