Class: Html2Doc::IsoDIS
Instance Method Summary collapse
- #cleanup(docxml) ⇒ Object
- #indent_lists(docxml) ⇒ Object
- #indent_lists1(para) ⇒ Object
- #list2para(list) ⇒ Object
- #list2para_level(item, list) ⇒ Object
- #list2para_nest(item, level, list) ⇒ Object
- #list2para_style(listtype, depth) ⇒ Object
- #list2para_unnest_para(para, first_p, last_p) ⇒ Object
- #list_add(xpath, liststyles, listtype, level) ⇒ Object
- #list_add_number(list, liststyles, listtype, level) ⇒ Object
- #list_add_tail(list, liststyles, listtype, level) ⇒ Object
- #listidx(idx, level) ⇒ Object
- #listlabel(listtype, idx, level) ⇒ Object
- #lists(docxml, liststyles) ⇒ Object
- #style_list_iso(elem, level, listtype, idx) ⇒ Object
- #unnest_list_paras(docxml) ⇒ Object
Instance Method Details
#cleanup(docxml) ⇒ Object
165 166 167 168 169 170 171 172 |
# File 'lib/html2doc/lists.rb', line 165 def cleanup(docxml) super docxml.xpath("//div[@class = 'Quote' or @class = 'Example' or " \ "@class = 'Note']").each do |d| d.delete("class") end docxml end |
#indent_lists(docxml) ⇒ Object
91 92 93 94 95 96 97 98 |
# File 'lib/html2doc/lists.rb', line 91 def indent_lists(docxml) docxml.xpath("//div[@class = 'Note' or @class = 'Example' or " \ "@class = 'Quote']").each do |d| d.xpath(".//p").each do |p| indent_lists1(p) end end end |
#indent_lists1(para) ⇒ Object
100 101 102 103 104 105 106 107 |
# File 'lib/html2doc/lists.rb', line 100 def indent_lists1(para) m = /^(ListContinue|ListNumber|MsoListContinue|MsoListNumber)(\d)$/ .match(para["class"]) or return base = m[1].sub(/^Mso/, "") level = m[2].to_i + 1 level = 5 if level > 5 para["class"] = "#{base}#{level}-" end |
#list2para(list) ⇒ Object
3 4 5 6 7 8 9 |
# File 'lib/html2doc/lists.rb', line 3 def list2para(list) return if list.xpath("./li").empty? list.xpath("./li").each do |l| list2para_level(l, list) end end |
#list2para_level(item, list) ⇒ Object
11 12 13 14 15 16 |
# File 'lib/html2doc/lists.rb', line 11 def list2para_level(item, list) level = item["level"] item.delete("level") item.name = "p" list2para_nest(item, level, list) if level end |
#list2para_nest(item, level, list) ⇒ Object
18 19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/html2doc/lists.rb', line 18 def list2para_nest(item, level, list) item["class"] = list2para_style(list.name, level) item.xpath("./p").each do |p| p["class"] = list2para_style(list.name, level) end p1 = item.at("./p") or return prev = p1.xpath("./preceding-sibling::* | ./preceding-sibling::text()") if prev[-1].name == "span" && prev[-1]["style"] == "mso-tab-count:1" && prev.size == 2 p1.children.first.previous = prev[1] p1.children.first.previous = prev[0] end end |
#list2para_style(listtype, depth) ⇒ Object
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/html2doc/lists.rb', line 48 def list2para_style(listtype, depth) case listtype when "ul" case depth when "1" then "ListContinue1" when "2", "3", "4" then "MsoListContinue#{depth}" else "MsoListContinue5" end when "ol" case depth when "1" then "ListNumber1" when "2", "3", "4" then "MsoListNumber#{depth}" else "MsoListNumber5" end end end |
#list2para_unnest_para(para, first_p, last_p) ⇒ Object
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/html2doc/lists.rb', line 32 def list2para_unnest_para(para, first_p, last_p) return if last_p.xpath("./following-sibling::* | ./following-sibling::text()") .any? do |x| !x.text.strip.empty? end prev = first_p.xpath("./preceding-sibling::* | " \ "./preceding-sibling::text()[normalize-space()]") # bullet, tab, paragraph: ignore bullet, tab if prev.empty? then para.replace(para.children) elsif prev.size == 2 && prev[-1].name == "span" && prev[-1]["style"] == "mso-tab-count:1" first_p.replace(first_p.children) end end |
#list_add(xpath, liststyles, listtype, level) ⇒ Object
109 110 111 112 113 114 115 116 |
# File 'lib/html2doc/lists.rb', line 109 def list_add(xpath, liststyles, listtype, level) xpath.each do |l| l["seen"] = true if level == 1 l["id"] ||= UUIDTools::UUID.random_create list_add_number(l, liststyles, listtype, level) list_add_tail(l, liststyles, listtype, level) end end |
#list_add_number(list, liststyles, listtype, level) ⇒ Object
118 119 120 121 122 123 124 |
# File 'lib/html2doc/lists.rb', line 118 def list_add_number(list, liststyles, listtype, level) i = list["start"] ? list["start"].to_i - 1 : 0 (list.xpath(".//li") - list.xpath(".//ol//li | .//ul//li")).each do |li| i = style_list_iso(li, level, listtype, i) list_add1(li, liststyles, listtype, level) end end |
#list_add_tail(list, liststyles, listtype, level) ⇒ Object
126 127 128 129 130 131 132 |
# File 'lib/html2doc/lists.rb', line 126 def list_add_tail(list, liststyles, listtype, level) list.xpath(".//ul[not(ancestor::li/ancestor::*/@id = '#{list['id']}')] | " \ ".//ol[not(ancestor::li/ancestor::*/@id = '#{list['id']}')]") .each do |li| list_add1(li.parent, liststyles, listtype, level - 1) end end |
#listidx(idx, level) ⇒ Object
155 156 157 158 159 160 161 162 163 |
# File 'lib/html2doc/lists.rb', line 155 def listidx(idx, level) case level when "a" then (96 + idx).chr.to_s when "1" then idx.to_s when "i" then RomanNumerals.to_roman(idx).downcase when "A" then (64 + idx).chr.to_s when "I" then RomanNumerals.to_roman(idx).upcase end end |
#listlabel(listtype, idx, level) ⇒ Object
148 149 150 151 152 153 |
# File 'lib/html2doc/lists.rb', line 148 def listlabel(listtype, idx, level) case listtype when :ul then "—" when :ol then "#{listidx(idx, level)})" end end |
#lists(docxml, liststyles) ⇒ Object
65 66 67 68 69 70 71 72 73 74 75 76 |
# File 'lib/html2doc/lists.rb', line 65 def lists(docxml, liststyles) super docxml.xpath("//p[ol | ul]").each do |p| list2para_unnest_para(p, p.at("./ol | ./ul"), p.at("./*[name() = 'ul' or name() = 'ol'][last()]")) end docxml.xpath("//ol | //ul").each do |u| u.replace(u.children) end unnest_list_paras(docxml) indent_lists(docxml) end |
#style_list_iso(elem, level, listtype, idx) ⇒ Object
134 135 136 137 138 139 140 141 142 143 144 145 146 |
# File 'lib/html2doc/lists.rb', line 134 def style_list_iso(elem, level, listtype, idx) return idx if elem.at(".//ol | .//ul") && !elem.at("./p") idx += 1 ol = elem.xpath("./ancestor::ol")&.last label = listlabel(listtype, idx, ol ? ol["type"] : nil) unless elem.at("./ancestor::div[@class = 'index']") # indexsect elem.children.first.previous = "#{label}<span style='mso-tab-count:1'> </span>" end elem["level"] = level idx end |
#unnest_list_paras(docxml) ⇒ Object
78 79 80 81 82 83 84 85 86 87 88 89 |
# File 'lib/html2doc/lists.rb', line 78 def unnest_list_paras(docxml) docxml.xpath("//p[@class = 'ListContinue1' or @class = 'ListNumber1']" \ "[.//p]").each do |p| p.at("./p") and list2para_unnest_para(p, p.at("./p"), p.at("./p[last()]")) p.xpath(".//p[p]").each do |p1| list2para_unnest_para(p1, p1.at("./p"), p1.at("./p[last()]")) end end end |