Module: ODT2HTML::AnalyzeContent

Included in:
Base
Defined in:
lib/odt2html/analyze_content.rb

Instance Method Summary collapse

Instance Method Details

#analyze_content_xmlObject



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/odt2html/analyze_content.rb', line 4

def analyze_content_xml
  #
  # Get the namespaces from the root element; populate the
  # dynamic instance variable names and the namespace hash from them.
  #
  get_namespaces

  create_dispatch_table

  # handle style:style elements
  @doc.root.elements.each(
    "#{@office_ns}:automatic-styles/#{@style_ns}:style") do |el|
    process_style_style( el )
  end

  # handle text:list-style elements
  @doc.root.elements.each(
    "#{@office_ns}:automatic-styles/#{@text_ns}:list-style") do |el|
    process_text_list_style( el )
  end

  @doc.root.elements.each(
  "#{@office_ns}:body/#{@office_ns}:text") do |item|
    process_children( item, @body )
  end

end

#emit_element(output_node, element_name, attr_hash = nil) ⇒ Object

Emit an element with the given element_name and attr_hash (as attributes) as a child of the output_node



299
300
301
302
303
304
305
306
307
308
309
310
311
# File 'lib/odt2html/analyze_content.rb', line 299

def emit_element( output_node, element_name, attr_hash=nil )
  if (attr_hash != nil) then
    attr_hash.each do |key, value|
      if (value == nil) then
        attr_hash.delete( key )
      end
    end
    if attr_hash.empty? then
      attr_hash = nil
    end
  end
  output_node.add_element( element_name, attr_hash )
end

#modify_style_attribute(output_element, property, value) ⇒ Object

Modify the style attribute of output_element by adding the given property and value

Algorithm:

If there's no style attribute, create it.
If it exists, look for the property.
  If the property doesn't exist, add it and its value
  If it does exist,


322
323
324
325
326
327
# File 'lib/odt2html/analyze_content.rb', line 322

def modify_style_attribute( output_element, property, value )
  current = output_element.attribute("style")
  new_value = (current != nil) ? current.value + ";" : ""
  new_value += "#{property}:#{value}"
  output_element.attributes["style"] = new_value
end

#process_children(node, output_node, xpath_expr = "node()") ⇒ Object

Process an element’s children node: the context node output_node: the node to which to add the children xpath_expr: which children to process (default is all)

Algorithm: If the node is a text node, output to the destination. If it’s an element, munge its name into process_prefix_elementname. If that method exists, call it to handle the element. Otherwise, process this node’s children recursively.



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/odt2html/analyze_content.rb', line 44

def process_children( node, output_node, xpath_expr="node()" )
  REXML::XPath.each( node, xpath_expr ) do |item|
    if (item.kind_of?(REXML::Element)) then
      str = "process_" + @namespace_urn[item.namespace] + "_" +
        item.name.tr_s(":-", "__")
      if self.class.method_defined?( str ) then
        self.send( str, item, output_node )
      else
        process_children(item, output_node)
      end
    elsif (item.kind_of?(REXML::Text) && !item.value.match(/^\s*$/))
      output_node.add_text(item.value)
    end
  end
  #
  # If it's empty, add a null string to force a begin and end
  # tag to be generated
  if (!output_node.has_elements? && !output_node.has_text?) then
    output_node.add_text("")
  end
end

#process_table_table(element, output_node) ⇒ Object



194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/odt2html/analyze_content.rb', line 194

def process_table_table( element, output_node )
  style_name = register_style( element );
  table_el = emit_element(output_node, "table", {"class" => style_name,
   "cellpadding" => "0", "cellspacing" => "0"} )
  process_children( element, table_el, "#{@table_ns}:table-column" )
  if (REXML::XPath.match( element, "#{@table_ns}:table-header-rows" )) then
    thead = emit_element( table_el, "thead" )
    process_children( element, thead, "#{@table_ns}:table-header-rows/#{@table_ns}:table-row" )
  end
  tbody = emit_element( table_el, "tbody" )
  process_children( element, tbody, "#{@table_ns}:table-row" )
end

#process_table_table_cell(element, output_node) ⇒ Object



222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
# File 'lib/odt2html/analyze_content.rb', line 222

def process_table_table_cell( element, output_node )
  attr_hash = Hash.new
  style_name = register_style( element );
  if (style_name != nil) then
    attr_hash["class"] = style_name
  end
  repeat = 1;
  attr = element.attribute("#{@table_ns}:number-columns-repeated")
  if (attr != nil) then
    repeat = attr.value.to_i
  end
  attr = element.attribute("#{@table_ns}:number-columns-spanned")
  if (attr != nil) then
    attr_hash["colspan"] = attr.value
  end
  attr = element.attribute("#{@table_ns}:number-rows-spanned")
  if (attr != nil) then
    attr_hash["rowspan"] = attr.value
  end
  (1..repeat).each do |i|
    td = emit_element( output_node, "td", attr_hash )
    process_children( element, td )
  end
end

#process_table_table_column(element, output_node) ⇒ Object



207
208
209
210
211
212
213
214
# File 'lib/odt2html/analyze_content.rb', line 207

def process_table_table_column( element, output_node )
  style_name = register_style(element)
  span = element.attribute("#{@table_ns}:number-columns-repeated")
  if (span != nil) then
    span = span.value
  end
  emit_element( output_node, "col", {"class" => style_name, "span" => span} )
end

#process_table_table_row(element, output_node) ⇒ Object



216
217
218
219
220
# File 'lib/odt2html/analyze_content.rb', line 216

def process_table_table_row( element, output_node )
  style_name = register_style( element );
  tr = emit_element( output_node, "tr", {"class" => style_name} )
  process_children( element, tr, "#{@table_ns}:table-cell" )
end

#process_text_a(element, output_node) ⇒ Object



130
131
132
133
134
135
136
# File 'lib/odt2html/analyze_content.rb', line 130

def process_text_a( element, output_node )
  style_name = register_style( element )
  href = element.attribute("#{@xlink_ns}:href").value
  link = emit_element( output_node, "a",
    {"class" => style_name, "href" => href} )
  process_children( element, link )
end

#process_text_bookmark(element, output_node) ⇒ Object



142
143
144
# File 'lib/odt2html/analyze_content.rb', line 142

def process_text_bookmark( element, output_node )
  process_text_bookmark_start( element, output_node )
end

#process_text_bookmark_start(element, output_node) ⇒ Object



146
147
148
149
150
151
152
# File 'lib/odt2html/analyze_content.rb', line 146

def process_text_bookmark_start( element, output_node )
  style_name = register_style( element )
  the_name = element.attribute("#{@text_ns}:name").value;
  anchor = emit_element( output_node, "a",
    {"class" => style_name, "name" => the_name} )
  anchor.add_text("");
end

#process_text_h(element, output_node) ⇒ Object

Headings are processed as <hn> elements. The heading level comes from the text:outline-level attribute, with a maximum of 6.



103
104
105
106
107
108
109
110
111
# File 'lib/odt2html/analyze_content.rb', line 103

def process_text_h( element, output_node )
  style_name = register_style( element )
  level = element.attribute("#{@text_ns}:outline-level").value.to_i
  if (level > 6) then
    level = 6
  end
  heading = emit_element( output_node, "h" + level.to_s, {"class" => style_name} )
  process_children( element, heading )
end

#process_text_line_break(element, output_node) ⇒ Object



138
139
140
# File 'lib/odt2html/analyze_content.rb', line 138

def process_text_line_break( element, output_node )
  br = emit_element( output_node, "br" )
end

#process_text_list(element, output_node) ⇒ Object



154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# File 'lib/odt2html/analyze_content.rb', line 154

def process_text_list( element, output_node )
  # determine the level
  tag = "ul"
  level = REXML::XPath.match( element, "ancestor::#{@text_ns}:list" ).size + 1
  if (level == 1) then
    style_name = element.attribute("#{@text_ns}:style-name")
  else
    style_name = REXML::XPath.match( element,
      "ancestor::#{@text_ns}:list[last()]/@#{@text_ns}:style-name" )[0]
  end

  if (style_name != nil) then
    style_name = style_name.value + "_" + level.to_s
    style_name = style_name.tr_s('.','_')
    @style_info[style_name].block_used = true

    #
    # Determine if this is a numbered or bulleted list
    found = @style_info[style_name].find { |obj|
      obj.property == "list-style-type" }
    if (found) then
      if (!found.value.match(/disc|circle|square/)) then
        tag="ol"
      end
    end
  end
  list_el = emit_element( output_node, tag, {"class" => style_name} )
  process_children(element, list_el)
end

#process_text_list_item(element, output_node) ⇒ Object

List items are easy; just put the children inside a <li> </li> pair.



188
189
190
191
192
# File 'lib/odt2html/analyze_content.rb', line 188

def process_text_list_item( element, output_node )
  style_name = register_style( element )
  item = emit_element( output_node, "li", {"class" => style_name} )
  process_children( element, item )
end

#process_text_list_style(element) ⇒ Object

Create styles for each level of a <text:list-style> element. For bulleted lists, it sets the bullet type by indexing into the marker array; for numbered lists, it uses the numbering hash to translate OpenDocument’s style:num-format to the corresponding CSS list-style-type.



274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# File 'lib/odt2html/analyze_content.rb', line 274

def process_text_list_style( element )
  marker = ["circle", "disc", "square"];
  numbering = {"1" => "decimal",
    "a" => "lower-alpha", "A" => "upper-alpha",
    "i" => "lower-roman", "I" => "upper-roman" }

  main_name = element.attribute( "#{@style_ns}:name" ).value
  element.elements.each do |child|
    level = child.attribute("#{@text_ns}:level").value
    selector = main_name + "_" + level

    if (child.name == "list-level-style-bullet")
      process_normal_style_attr( selector, "list-style-type",
        marker[(level.to_i-1)%3] )
    elsif (child.name == "list-level-style-number")
      process_normal_style_attr( selector, "list-style-type",
        numbering[child.attribute("#{@style_ns}:num-format").value] )
    end
  end
end

#process_text_p(element, output_node) ⇒ Object

Paragraphs are processed as <p> elements. / This is no longer valid A <text:p> with no children will generate a <br />.



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/odt2html/analyze_content.rb', line 71

def process_text_p( element, output_node )
  style_name = register_style( element )

  # always include class attribute
  attr_hash = {"class" => style_name}

  # If this paragraph has the same style as the previous one,
  # and a top border, and doesn't have style:join-border set to false
  # then eliminate the top border to merge it with previous paragraph
  if (style_name != nil && @previous_para_style == style_name) then
    if (@style_info[style_name].has_top_border? &&
      element.attribute_value("#{@style_ns}:join-border") !=
      false) then
      attr_hash["style"] = "border-top: none"
      modify_style_attribute( @previous_para,
        "border-bottom", "none")
    end
  end
  para  = emit_element( output_node, "p", attr_hash )
  @previous_para_style = style_name
  @previous_para = para
  if (element.has_elements? || element.has_text?) then
    process_children( element, para )
  # else
  #   para.add_element("br")
  end
end

#process_text_s(element, output_node) ⇒ Object



126
127
128
# File 'lib/odt2html/analyze_content.rb', line 126

def process_text_s( element, output_node )
  output_node.add_text( " " )
end

#process_text_span(element, output_node) ⇒ Object

Text spans cannot produce a newline after their opening tag, so the extra "" parameter is passed to emit_start_tag



116
117
118
119
120
# File 'lib/odt2html/analyze_content.rb', line 116

def process_text_span( element, output_node )
  style_name = register_style( element )
  span = emit_element( output_node, "span", {"class" => style_name} )
  process_children( element, span )
end

#process_text_tab(element, output_node) ⇒ Object



122
123
124
# File 'lib/odt2html/analyze_content.rb', line 122

def process_text_tab( element, output_node )
  output_node.add_text( " " )
end

#register_style(element) ⇒ Object

Return the style name for this element, with periods changed to underscores to make it valid CSS.

Side effect: registers this style as “having been used” in the document



254
255
256
257
258
259
260
261
262
263
264
# File 'lib/odt2html/analyze_content.rb', line 254

def register_style( element )
  # get namespace prefix for this element
  style_name = element.attribute("#{element.prefix}:style-name");
  if (style_name != nil) then
    style_name = style_name.value.tr_s('.','_')
    if (@style_info[style_name] != nil) then
      @style_info[style_name].block_used = true
    end
  end
  return style_name
end