Module: ODT2HTML::AnalyzeContent

Included in:
Base
Defined in:
lib/odt2html-nsi/analyze_content.rb

Instance Method Summary collapse

Instance Method Details

#analyze_content_xmlObject



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/odt2html-nsi/analyze_content.rb', line 4

def analyze_content_xml
  #
  # Get the namespaces from the root element; populate the
  # dynamic instance variable names and the namespace hash from them.
  #
  get_namespaces

  create_dispatch_table

  # handle style:style elements
  @doc.root.elements.each(
    "#{@office_ns}:automatic-styles/#{@style_ns}:style") do |el|
    process_style_style( el )
  end

  # handle text:list-style elements
  @doc.root.elements.each(
    "#{@office_ns}:automatic-styles/#{@text_ns}:list-style") do |el|
    process_text_list_style( el )
  end

  @doc.root.elements.each(
  "#{@office_ns}:body/#{@office_ns}:text") do |item|
    process_children( item, @body )
  end

end

#emit_element(output_node, element_name, attr_hash = nil) ⇒ Object

Emit an element with the given element_name and attr_hash (as attributes) as a child of the output_node



303
304
305
306
307
308
309
310
311
312
313
314
315
# File 'lib/odt2html-nsi/analyze_content.rb', line 303

def emit_element( output_node, element_name, attr_hash=nil )
  if (attr_hash != nil) then
    attr_hash.each do |key, value|
      if (value == nil) then
        attr_hash.delete( key )
      end
    end
    if attr_hash.empty? then
      attr_hash = nil
    end
  end
  output_node.add_element( element_name, attr_hash )
end

#modify_style_attribute(output_element, property, value) ⇒ Object

Modify the style attribute of output_element by adding the given property and value

Algorithm:

If there's no style attribute, create it.
If it exists, look for the property.
  If the property doesn't exist, add it and its value
  If it does exist,


326
327
328
329
330
331
# File 'lib/odt2html-nsi/analyze_content.rb', line 326

def modify_style_attribute( output_element, property, value )
  current = output_element.attribute("style")
  new_value = (current != nil) ? current.value + ";" : ""
  new_value += "#{property}:#{value}"
  output_element.attributes["style"] = new_value
end

#process_children(node, output_node, xpath_expr = "node()") ⇒ Object

Process an element’s children node: the context node output_node: the node to which to add the children xpath_expr: which children to process (default is all)

Algorithm: If the node is a text node, output to the destination. If it’s an element, munge its name into process_prefix_elementname. If that method exists, call it to handle the element. Otherwise, process this node’s children recursively.



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/odt2html-nsi/analyze_content.rb', line 44

def process_children( node, output_node, xpath_expr="node()" )
  REXML::XPath.each( node, xpath_expr ) do |item|
    if (item.kind_of?(REXML::Element)) then
      str = "process_" + @namespace_urn[item.namespace] + "_" +
        item.name.tr_s(":-", "__")
      if self.class.method_defined?( str ) then
        self.send( str, item, output_node )
      else
        process_children(item, output_node)
      end
    elsif (item.kind_of?(REXML::Text) && !item.value.match(/^\s*$/))
      output_node.add_text(item.value)
    end
  end
  #
  # If it's empty, add a null string to force a begin and end
  # tag to be generated
  if (!output_node.has_elements? && !output_node.has_text?) then
    output_node.add_text("")
  end
end

#process_table_table(element, output_node) ⇒ Object



198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/odt2html-nsi/analyze_content.rb', line 198

def process_table_table( element, output_node )
  style_name = register_style( element );
  table_el = emit_element(output_node, "table", {"class" => style_name,
   "cellpadding" => "0", "cellspacing" => "0"} )
  process_children( element, table_el, "#{@table_ns}:table-column" )
  if (REXML::XPath.match( element, "#{@table_ns}:table-header-rows" )) then
    thead = emit_element( table_el, "thead" )
    process_children( element, thead, "#{@table_ns}:table-header-rows/#{@table_ns}:table-row" )
  end
  tbody = emit_element( table_el, "tbody" )
  process_children( element, tbody, "#{@table_ns}:table-row" )
end

#process_table_table_cell(element, output_node) ⇒ Object



226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
# File 'lib/odt2html-nsi/analyze_content.rb', line 226

def process_table_table_cell( element, output_node )
  attr_hash = Hash.new
  style_name = register_style( element );
  if (style_name != nil) then
    attr_hash["class"] = style_name
  end
  repeat = 1;
  attr = element.attribute("#{@table_ns}:number-columns-repeated")
  if (attr != nil) then
    repeat = attr.value.to_i
  end
  attr = element.attribute("#{@table_ns}:number-columns-spanned")
  if (attr != nil) then
    attr_hash["colspan"] = attr.value
  end
  attr = element.attribute("#{@table_ns}:number-rows-spanned")
  if (attr != nil) then
    attr_hash["rowspan"] = attr.value
  end
  (1..repeat).each do |i|
    td = emit_element( output_node, "td", attr_hash )
    process_children( element, td )
  end
end

#process_table_table_column(element, output_node) ⇒ Object



211
212
213
214
215
216
217
218
# File 'lib/odt2html-nsi/analyze_content.rb', line 211

def process_table_table_column( element, output_node )
  style_name = register_style(element)
  span = element.attribute("#{@table_ns}:number-columns-repeated")
  if (span != nil) then
    span = span.value
  end
  emit_element( output_node, "col", {"class" => style_name, "span" => span} )
end

#process_table_table_row(element, output_node) ⇒ Object



220
221
222
223
224
# File 'lib/odt2html-nsi/analyze_content.rb', line 220

def process_table_table_row( element, output_node )
  style_name = register_style( element );
  tr = emit_element( output_node, "tr", {"class" => style_name} )
  process_children( element, tr, "#{@table_ns}:table-cell" )
end

#process_text_a(element, output_node) ⇒ Object



134
135
136
137
138
139
140
# File 'lib/odt2html-nsi/analyze_content.rb', line 134

def process_text_a( element, output_node )
  style_name = register_style( element )
  href = element.attribute("#{@xlink_ns}:href").value
  link = emit_element( output_node, "a",
    {"class" => style_name, "href" => href} )
  process_children( element, link )
end

#process_text_bookmark(element, output_node) ⇒ Object



146
147
148
# File 'lib/odt2html-nsi/analyze_content.rb', line 146

def process_text_bookmark( element, output_node )
  process_text_bookmark_start( element, output_node )
end

#process_text_bookmark_start(element, output_node) ⇒ Object



150
151
152
153
154
155
156
# File 'lib/odt2html-nsi/analyze_content.rb', line 150

def process_text_bookmark_start( element, output_node )
  style_name = register_style( element )
  the_name = element.attribute("#{@text_ns}:name").value;
  anchor = emit_element( output_node, "a",
    {"class" => style_name, "name" => the_name} )
  anchor.add_text("");
end

#process_text_h(element, output_node) ⇒ Object

Headings are processed as <hn> elements. The heading level comes from the text:outline-level attribute, with a maximum of 6.



107
108
109
110
111
112
113
114
115
# File 'lib/odt2html-nsi/analyze_content.rb', line 107

def process_text_h( element, output_node )
  style_name = register_style( element )
  level = element.attribute("#{@text_ns}:outline-level").value.to_i
  if (level > 6) then
    level = 6
  end
  heading = emit_element( output_node, "h" + level.to_s, {"class" => style_name} )
  process_children( element, heading )
end

#process_text_line_break(element, output_node) ⇒ Object



142
143
144
# File 'lib/odt2html-nsi/analyze_content.rb', line 142

def process_text_line_break( element, output_node )
  br = emit_element( output_node, "br" )
end

#process_text_list(element, output_node) ⇒ Object



158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# File 'lib/odt2html-nsi/analyze_content.rb', line 158

def process_text_list( element, output_node )
  # determine the level
  tag = "ul"
  level = REXML::XPath.match( element, "ancestor::#{@text_ns}:list" ).size + 1
  if (level == 1) then
    style_name = element.attribute("#{@text_ns}:style-name")
  else
    style_name = REXML::XPath.match( element,
      "ancestor::#{@text_ns}:list[last()]/@#{@text_ns}:style-name" )[0]
  end

  if (style_name != nil) then
    style_name = style_name.value + "_" + level.to_s
    style_name = style_name.tr_s('.','_')
    @style_info[style_name].block_used = true

    #
    # Determine if this is a numbered or bulleted list
    found = @style_info[style_name].find { |obj|
      obj.property == "list-style-type" }
    if (found) then
      if (!found.value.match(/disc|circle|square/)) then
        tag="ol"
      end
    end
  end
  list_el = emit_element( output_node, tag, {"class" => style_name} )
  process_children(element, list_el)
end

#process_text_list_item(element, output_node) ⇒ Object

List items are easy; just put the children inside a <li> </li> pair.



192
193
194
195
196
# File 'lib/odt2html-nsi/analyze_content.rb', line 192

def process_text_list_item( element, output_node )
  style_name = register_style( element )
  item = emit_element( output_node, "li", {"class" => style_name} )
  process_children( element, item )
end

#process_text_list_style(element) ⇒ Object

Create styles for each level of a <text:list-style> element. For bulleted lists, it sets the bullet type by indexing into the marker array; for numbered lists, it uses the numbering hash to translate OpenDocument’s style:num-format to the corresponding CSS list-style-type.



278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
# File 'lib/odt2html-nsi/analyze_content.rb', line 278

def process_text_list_style( element )
  marker = ["circle", "disc", "square"];
  numbering = {"1" => "decimal",
    "a" => "lower-alpha", "A" => "upper-alpha",
    "i" => "lower-roman", "I" => "upper-roman" }

  main_name = element.attribute( "#{@style_ns}:name" ).value
  element.elements.each do |child|
    level = child.attribute("#{@text_ns}:level").value
    selector = main_name + "_" + level

    if (child.name == "list-level-style-bullet")
      process_normal_style_attr( selector, "list-style-type",
        marker[(level.to_i-1)%3] )
    elsif (child.name == "list-level-style-number")
      process_normal_style_attr( selector, "list-style-type",
        numbering[child.attribute("#{@style_ns}:num-format").value] )
    end
  end
end

#process_text_p(element, output_node) ⇒ Object

Paragraphs are processed as <p> elements. / This is no longer valid A <text:p> with no children will generate a <br />.



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/odt2html-nsi/analyze_content.rb', line 71

def process_text_p( element, output_node )
  style_name = register_style( element )

  # always include class attribute
  attr_hash = {"class" => style_name}

  # If this paragraph has the same style as the previous one,
  # and a top border, and doesn't have style:join-border set to false
  # then eliminate the top border to merge it with previous paragraph

  if (@style_info.keys.index(style_name)) then
    if (style_name != nil && @previous_para_style == style_name) then
      if (@style_info[style_name].has_top_border? &&
        element.attribute_value("#{@style_ns}:join-border") !=
        false) then
        attr_hash["style"] = "border-top: none"
        modify_style_attribute( @previous_para,
          "border-bottom", "none")
      end
    end
  end

  para  = emit_element( output_node, "p", attr_hash )
  @previous_para_style = style_name
  @previous_para = para
  if (element.has_elements? || element.has_text?) then
    process_children( element, para )
  # else
  #   para.add_element("br")
  end
end

#process_text_s(element, output_node) ⇒ Object



130
131
132
# File 'lib/odt2html-nsi/analyze_content.rb', line 130

def process_text_s( element, output_node )
  output_node.add_text( " " )
end

#process_text_span(element, output_node) ⇒ Object

Text spans cannot produce a newline after their opening tag, so the extra "" parameter is passed to emit_start_tag



120
121
122
123
124
# File 'lib/odt2html-nsi/analyze_content.rb', line 120

def process_text_span( element, output_node )
  style_name = register_style( element )
  span = emit_element( output_node, "span", {"class" => style_name} )
  process_children( element, span )
end

#process_text_tab(element, output_node) ⇒ Object



126
127
128
# File 'lib/odt2html-nsi/analyze_content.rb', line 126

def process_text_tab( element, output_node )
  output_node.add_text( " " )
end

#register_style(element) ⇒ Object

Return the style name for this element, with periods changed to underscores to make it valid CSS.

Side effect: registers this style as “having been used” in the document



258
259
260
261
262
263
264
265
266
267
268
# File 'lib/odt2html-nsi/analyze_content.rb', line 258

def register_style( element )
  # get namespace prefix for this element
  style_name = element.attribute("#{element.prefix}:style-name");
  if (style_name != nil) then
    style_name = style_name.value.tr_s('.','_')
    if (@style_info[style_name] != nil) then
      @style_info[style_name].block_used = true
    end
  end
  return style_name
end