Class: Docx::Cloner::DocxTool

Inherits:
Object
  • Object
show all
Defined in:
lib/docx/cloner.rb

Instance Method Summary collapse

Constructor Details

#initialize(file) ⇒ DocxTool

Returns a new instance of DocxTool.



11
12
13
14
15
16
17
18
19
20
# File 'lib/docx/cloner.rb', line 11

def initialize(file)
  @zip = Zip::ZipFile.open(file)
  _xml = @zip.read("word/document.xml")
  @doc = Nokogiri::XML(_xml)
  @global_paragraph = generate_paragraph @doc

  @replace = {}

  #puts @paragraph
end

Instance Method Details

#clone_tag_scope(node, times) ⇒ Object

clone标签所在的范围,例如表格的行 返回一组新的行对象集合



176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/docx/cloner.rb', line 176

def clone_tag_scope node, times
  #puts "clone #{node.node_name} #{times} times"
  nodes = Array.new times
  #puts "被克隆节点:#{node.path}"
  times.downto(1).each do |_i|
    i = _i.to_i - 1
    nodes[i] = node.dup
    node.add_next_sibling nodes[i]
    #puts "第#{i+1}个节点克隆:#{nodes[i].path}"
  end
  return nodes
end

#generate_paragraph(node) ⇒ Object



100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/docx/cloner.rb', line 100

def generate_paragraph node
  paragraphs = []
  #puts "查找范围:#{node.path}"
  wp_set = node.xpath(".//w:p")
  #puts "#{wp_set.size}'s wp"
  wp_set.each do |wp|
    p = {text_content: '', text_run: []}
    wp.xpath(".//w:t").each do |t|
      p[:text_content] << t.content
      p[:text_run] << t
      #puts "node name: #{t.node_name}" if t.content.size > 0
      #puts t.path
    end
    paragraphs << p
    #puts p[:text_content].include? '$名字$'
  end
  return paragraphs
end

#get_tag_scope(tag, type) ⇒ Object

获取标签所在的范围,例如表格的行 简单的考虑,则tags中第一个标签位置即可确定为scope位置 复杂的考虑,则可根据tags中所有标签的共同根(如<w:tr>)确定scope位置,这种情况将允许标签名拥有自己的作用域 这里仅做简单的考虑



86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/docx/cloner.rb', line 86

def get_tag_scope tag, type
  @global_paragraph.each do |p|
    if p[:text_content].include? tag #这里是简单的考虑,即使行内标签也必须全局唯一
      node = p[:text_run].first
      while true
        return unless node                      #查找父节点失败
        return node if node.node_name == type   #查找到匹配的父节点
        node = node.parent
      end
    end
  end
  return false
end

#include_text_tag?(tag) ⇒ Boolean

Returns:

  • (Boolean)


43
44
45
46
47
48
49
50
# File 'lib/docx/cloner.rb', line 43

def include_text_tag?(tag)
  @global_paragraph.each do |p|
    if p[:text_content].include? tag
      return true
    end
  end
  return false
end

#read_text_tag_xml(tag) ⇒ Object



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/docx/cloner.rb', line 52

def read_text_tag_xml(tag)
  @global_paragraph.each do |p|
    if p[:text_content].include? tag
      from = p[:text_content].index tag
      to = from + tag.size - 1
      #puts "from:#{from}, to:#{to}"
      pos = 0
      dest = ""
      p[:text_run].each do |wt|
        #puts "pos:#{pos}"
        if pos >= from && pos < to
          dest << wt.parent.to_xml << "\n"
        end
        if pos >= to
          return dest
        end
        pos += wt.content.size
      end
      return dest
    end

  end
  return ''
end

#releaseObject



22
23
24
# File 'lib/docx/cloner.rb', line 22

def release
  @zip.close
end

#replace_tag(tag, value, node = nil) ⇒ Object

在指定的范围内替换标签



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/docx/cloner.rb', line 120

def replace_tag tag, value, node=nil
  paragraphs = node ? generate_paragraph(node) : @global_paragraph 
  #puts paragraphs
  paragraphs.each do |p|
    #puts p[:text_content]
    if p[:text_content].include? tag
      from = p[:text_content].index tag
      to = from + tag.size - 1
      #puts "tag:#{tag} | from:#{from}, to:#{to} >> #{p[:text_content]}"
      pos = 0
      dest = []
      #puts p[:text_run]
      p[:text_run].each do |wt|
        #puts "pos:#{pos}"
        #通常情况下,msword会把标签拆分成多个xml标签,如'{name}'被拆分成'<wt>{</wt>'和'<wt>name}</wt>'
        #这可能跟编辑器有关,在处理中文时,这是一种常见的情形
        if pos+1 >= from && pos <= to #通过pos+1修正临界点问题
          dest << wt
        end
        if pos > to
          break
        end
        pos += wt.content.size

        #这里要处理一下标签没有被拆分的情形,而是作为纯文本被包含在某个标签中
        #例如'{name}'包含在'<wt>my {name}</wt>'中
        #puts "pos:#{pos}, to:#{to}, dest.size:#{dest.size}"
        #puts wt
        if pos >= to && dest.size == 0
          #puts "simple_type | pos:#{pos}, to:#{to} >> #{wt.content}"
          wt.inner_html = wt.content.sub(tag, value)
          return true #如果是这种简单情形,就不再需要后续处理了
        end
      end

      if dest.size > 0
        #puts "被替换节点:#{dest.first.path}"
        dest.first.content = value
        dest[1..-1].each do |node|
          #puts node
          node.remove
        end
        #puts "\n"
        return true
      else
        return false
      end
    end

  end
  return false

end

#save(path) ⇒ Object



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/docx/cloner.rb', line 26

def save(path)
  @replace["word/document.xml"] = @doc.serialize :save_with => 0

  Zip::ZipFile.open(path, Zip::ZipFile::CREATE) do |out|
    @zip.each do |entry|
      out.get_output_stream(entry.name) do |o|
        if @replace[entry.name]
          o.write(@replace[entry.name])
        else
          o.write(@zip.read(entry.name))
        end
      end
    end
  end
end

#set_row_tags(tags, values, type) ⇒ Object

根据行标签设置,替换成多行数据,这里考虑表格的一般情况



190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# File 'lib/docx/cloner.rb', line 190

def set_row_tags tags, values, type
  #puts "tags:#{tags}, values:#{values}, type:#{type}"
  #找到标签所在行的父节点
  tag_scope_node = get_tag_scope tags.first, type
  value_scope_nodes = clone_tag_scope tag_scope_node, values.size
  value_scope_nodes.each_with_index do |node, r|
    #puts "查找范围:#{node.path}"
    tags.each_with_index do |tag, c|
      replace_tag tag, values[r][c], node
    end
  end
  #清除标签
  tag_scope_node.remove
  return true
end

#set_text_tag(tag, value) ⇒ Object

替换单个标签为指定值



78
79
80
# File 'lib/docx/cloner.rb', line 78

def set_text_tag tag, value
  replace_tag tag, value
end