Class: Html2Doc::IsoDIS

Inherits:
Html2Doc show all
Defined in:
lib/html2doc/lists.rb

Instance Method Summary collapse

Instance Method Details

#cleanup(docxml) ⇒ Object



163
164
165
166
167
168
169
170
# File 'lib/html2doc/lists.rb', line 163

def cleanup(docxml)
  super
  docxml.xpath("//div[@class = 'Quote' or @class = 'Example' or "\
               "@class = 'Note']").each do |d|
    d.delete("class")
  end
  docxml
end

#indent_lists(docxml) ⇒ Object



91
92
93
94
95
96
97
98
# File 'lib/html2doc/lists.rb', line 91

def indent_lists(docxml)
  docxml.xpath("//div[@class = 'Note' or @class = 'Example' or "\
               "@class = 'Quote']").each do |d|
    d.xpath(".//p").each do |p|
      indent_lists1(p)
    end
  end
end

#indent_lists1(para) ⇒ Object



100
101
102
103
104
105
106
107
# File 'lib/html2doc/lists.rb', line 100

def indent_lists1(para)
  m = /^(ListContinue|ListNumber|MsoListContinue|MsoListNumber)(\d)$/
    .match(para["class"]) or return
  base = m[1].sub(/^Mso/, "")
  level = m[2].to_i + 1
  level = 5 if level > 5
  para["class"] = "#{base}#{level}-"
end

#list2para(list) ⇒ Object



3
4
5
6
7
8
9
# File 'lib/html2doc/lists.rb', line 3

def list2para(list)
  return if list.xpath("./li").empty?

  list.xpath("./li").each do |l|
    list2para_level(l, list)
  end
end

#list2para_level(item, list) ⇒ Object



11
12
13
14
15
16
# File 'lib/html2doc/lists.rb', line 11

def list2para_level(item, list)
  level = item["level"]
  item.delete("level")
  item.name = "p"
  list2para_nest(item, level, list) if level
end

#list2para_nest(item, level, list) ⇒ Object



18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/html2doc/lists.rb', line 18

def list2para_nest(item, level, list)
  item["class"] = list2para_style(list.name, level)
  item.xpath("./p").each do |p|
    p["class"] = list2para_style(list.name, level)
  end
  p1 = item.at("./p") or return
  prev = p1.xpath("./preceding-sibling::* | ./preceding-sibling::text()")
  if prev[-1].name == "span" && prev[-1]["style"] == "mso-tab-count:1" &&
      prev.size == 2
    p1.children.first.previous = prev[1]
    p1.children.first.previous = prev[0]
  end
end

#list2para_style(listtype, depth) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/html2doc/lists.rb', line 48

def list2para_style(listtype, depth)
  case listtype
  when "ul"
    case depth
    when "1" then "ListContinue1"
    when "2", "3", "4" then "MsoListContinue#{depth}"
    else "MsoListContinue5"
    end
  when "ol"
    case depth
    when "1" then "ListNumber1"
    when "2", "3", "4" then "MsoListNumber#{depth}"
    else "MsoListNumber5"
    end
  end
end

#list2para_unnest_para(para, first_p, last_p) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/html2doc/lists.rb', line 32

def list2para_unnest_para(para, first_p, last_p)
  return if last_p.xpath("./following-sibling::* | ./following-sibling::text()")
    .any? do |x|
              !x.text.strip.empty?
            end

  prev = first_p.xpath("./preceding-sibling::* | "\
                       "./preceding-sibling::text()[normalize-space()]")
  # bullet, tab, paragraph: ignore bullet, tab
  if prev.empty? then para.replace(para.children)
  elsif prev.size == 2 && prev[-1].name == "span" &&
      prev[-1]["style"] == "mso-tab-count:1"
    first_p.replace(first_p.children)
  end
end

#list_add(xpath, liststyles, listtype, level) ⇒ Object



109
110
111
112
113
114
115
116
# File 'lib/html2doc/lists.rb', line 109

def list_add(xpath, liststyles, listtype, level)
  xpath.each do |l|
    l["seen"] = true if level == 1
    l["id"] ||= UUIDTools::UUID.random_create
    list_add_number(l, liststyles, listtype, level)
    list_add_tail(l, liststyles, listtype, level)
  end
end

#list_add_number(list, liststyles, listtype, level) ⇒ Object



118
119
120
121
122
123
124
# File 'lib/html2doc/lists.rb', line 118

def list_add_number(list, liststyles, listtype, level)
  i = list["start"] ? list["start"].to_i - 1 : 0
  (list.xpath(".//li") - list.xpath(".//ol//li | .//ul//li")).each do |li|
    i = style_list_iso(li, level, listtype, i)
    list_add1(li, liststyles, listtype, level)
  end
end

#list_add_tail(list, liststyles, listtype, level) ⇒ Object



126
127
128
129
130
131
132
# File 'lib/html2doc/lists.rb', line 126

def list_add_tail(list, liststyles, listtype, level)
  list.xpath(".//ul[not(ancestor::li/ancestor::*/@id = '#{list['id']}')] | "\
             ".//ol[not(ancestor::li/ancestor::*/@id = '#{list['id']}')]")
    .each do |li|
    list_add1(li.parent, liststyles, listtype, level - 1)
  end
end

#listidx(idx, level) ⇒ Object



153
154
155
156
157
158
159
160
161
# File 'lib/html2doc/lists.rb', line 153

def listidx(idx, level)
  case level
  when "a" then (96 + idx).chr.to_s
  when "1" then idx.to_s
  when "i" then RomanNumerals.to_roman(idx).downcase
  when "A" then (64 + idx).chr.to_s
  when "I" then RomanNumerals.to_roman(idx).upcase
  end
end

#listlabel(listtype, idx, level) ⇒ Object



146
147
148
149
150
151
# File 'lib/html2doc/lists.rb', line 146

def listlabel(listtype, idx, level)
  case listtype
  when :ul then "—"
  when :ol then "#{listidx(idx, level)})"
  end
end

#lists(docxml, liststyles) ⇒ Object



65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/html2doc/lists.rb', line 65

def lists(docxml, liststyles)
  super
  docxml.xpath("//p[ol | ul]").each do |p|
    list2para_unnest_para(p, p.at("./ol | ./ul"),
                          p.at("./*[name() = 'ul' or name() = 'ol'][last()]"))
  end
  docxml.xpath("//ol | //ul").each do |u|
    u.replace(u.children)
  end
  unnest_list_paras(docxml)
  indent_lists(docxml)
end

#style_list_iso(elem, level, listtype, idx) ⇒ Object



134
135
136
137
138
139
140
141
142
143
144
# File 'lib/html2doc/lists.rb', line 134

def style_list_iso(elem, level, listtype, idx)
  return idx if elem.at(".//ol | .//ul") && !elem.at("./p")

  idx += 1
  ol = elem.xpath("./ancestor::ol")&.last
  label = listlabel(listtype, idx, ol ? ol["type"] : nil)
  elem.children.first.previous =
    "#{label}<span style='mso-tab-count:1'>&#xa0;</span>"
  elem["level"] = level
  idx
end

#unnest_list_paras(docxml) ⇒ Object



78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/html2doc/lists.rb', line 78

def unnest_list_paras(docxml)
  docxml.xpath("//p[@class = 'ListContinue1' or @class = 'ListNumber1']"\
               "[.//p]").each do |p|
                 p.at("./p") and
                   list2para_unnest_para(p, p.at("./p"),
                                         p.at("./p[last()]"))
                 p.xpath(".//p[p]").each do |p1|
                   list2para_unnest_para(p1, p1.at("./p"),
                                         p1.at("./p[last()]"))
                 end
               end
end