Class: OdtParser
- Inherits:
-
Object
- Object
- OdtParser
- Defined in:
- lib/yesroff/odt_parser.rb
Instance Method Summary collapse
- #default_para_styles ⇒ Object
- #default_text_styles ⇒ Object
- #find_or_create_para_style(name) ⇒ Object
- #find_or_create_text_style(name) ⇒ Object
-
#initialize(odt_path) ⇒ OdtParser
constructor
A new instance of OdtParser.
- #lookup_para_style(name) ⇒ Object
- #lookup_text_style(name) ⇒ Object
- #parse ⇒ Object
- #parse_contents(contents, el) ⇒ Object
- #parse_indent(el) ⇒ Object
- #parse_paragraph(p) ⇒ Object
- #parse_paragraph_styles ⇒ Object
- #parse_paragraphs ⇒ Object
- #parse_span(el) ⇒ Object
- #parse_text_styles ⇒ Object
- #render ⇒ Object
Constructor Details
#initialize(odt_path) ⇒ OdtParser
Returns a new instance of OdtParser.
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/yesroff/odt_parser.rb', line 25 def initialize(odt_path) if File.exist?("yesroff.rc") log "loading notroff.rb" load 'yesroff.rc' end log "Reading #{odt_path}..." Zip::ZipFile.open(odt_path ) do |zipfile| zipfile.file.open("content.xml") do |content| @doc = REXML::Document.new(content.read) end end log "Done" @writer = NRWriter.new @paras = [] @text_styles = default_text_styles @para_styles = default_para_styles end |
Instance Method Details
#default_para_styles ⇒ Object
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
# File 'lib/yesroff/odt_parser.rb', line 62 def default_para_styles styles = StyleHash.new styles.add_style ParagraphStyle.new('FT', nil, :body, false) styles.add_style ParagraphStyle.new('IT', nil, :body, false) styles.add_style ParagraphStyle.new('Quotation', nil, :quote, true) styles.add_style ParagraphStyle.new('CDT1', nil, :code, false) styles.add_style ParagraphStyle.new('CDT', nil, :code, false) styles.add_style ParagraphStyle.new('CDTX', nil, :code, false) styles.add_style ParagraphStyle.new('C1', nil, :c1, true) styles.add_style ParagraphStyle.new('C2', nil, :c1, true) styles.add_style ParagraphStyle.new('TB', nil, :c1, true) styles.add_style ParagraphStyle.new('Free_20_Form', nil, :c1, true) styles.add_style ParagraphStyle.new('NLC1', nil, :code, false) styles.add_style ParagraphStyle.new('NLC', nil, :code, false) styles.add_style ParagraphStyle.new('NLCX', nil, :code, false) styles.add_style ParagraphStyle.new('NLPara', nil, :code, false) styles.add_style ParagraphStyle.new('TX', nil, :code, false) styles.add_style ParagraphStyle.new('HA', nil, :title, true) styles.add_style ParagraphStyle.new('HB', nil, :subtitle, true) styles.add_style ParagraphStyle.new('HC', nil, :sec, true) styles.add_style ParagraphStyle.new('HD', nil, :subsec, true) styles.add_style ParagraphStyle.new('TH', nil, :theading, true) styles.add_style ParagraphStyle.new('LH', nil, :ltitle, true) styles.add_style ParagraphStyle.new('LC', nil, :listing, false) styles.add_style ParagraphStyle.new('LC2', nil, :listing, false) styles.add_style ParagraphStyle.new('LX', nil, :listing, false) styles.add_style ParagraphStyle.new('BL1', nil, :bullet, true) styles.add_style ParagraphStyle.new('BL', nil, :bullet, true) styles.add_style ParagraphStyle.new('BX', nil, :bullet, true) styles.add_style ParagraphStyle.new('NL1', nil, :list, true) styles.add_style ParagraphStyle.new('NL', nil, :list, true) styles.add_style ParagraphStyle.new('NX', nil, :list, true) styles.add_style ParagraphStyle.new('BL Para', nil, :bullet, true) styles.add_style ParagraphStyle.new('Quotation_20_Attribution', nil, :attribution, true) additional_paragraph_styles(styles) styles end |
#default_text_styles ⇒ Object
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
# File 'lib/yesroff/odt_parser.rb', line 45 def default_text_styles cd1 = TextStyle.new("CD1") cd1.code = true hash = StyleHash.new hash.add_style cd1 hash.add_style TextStyle.new("Default") hash.add_style TextStyle.new("C1") hash.add_style TextStyle.new("C1_20_HD") hash.add_style TextStyle.new("FN") hash.add_style TextStyle.new("Base_20_Font") hash.add_style TextStyle.new("Chapter_20_Word") additional_text_styles(hash) hash end |
#find_or_create_para_style(name) ⇒ Object
163 164 165 166 167 168 169 170 171 172 |
# File 'lib/yesroff/odt_parser.rb', line 163 def find_or_create_para_style(name) return lookup_para_style(name) s = @para_styles[name] unless s STDERR.puts "Warning: no paragraph style named #{name}" s = ParagraphStyle.new(name, nil, :body, false) @para_styles[s.name] = s s end end |
#find_or_create_text_style(name) ⇒ Object
181 182 183 184 185 186 187 188 189 |
# File 'lib/yesroff/odt_parser.rb', line 181 def find_or_create_text_style(name) s = @text_styles[name] unless s STDERR.puts "Warning: no character style named #{name}" s = TextStyle.new(name) @text_styles[s.name] = s end s end |
#lookup_para_style(name) ⇒ Object
156 157 158 159 160 161 |
# File 'lib/yesroff/odt_parser.rb', line 156 def lookup_para_style(name) s = @para_styles[name] log "No such para style #{name}" unless s raise "No such para style #{name}" unless s s end |
#lookup_text_style(name) ⇒ Object
174 175 176 177 178 179 |
# File 'lib/yesroff/odt_parser.rb', line 174 def lookup_text_style(name) name = "Default" if name.nil? or name.empty? s = @text_styles[name] raise "No such text style [[#{name}]]" unless s s end |
#parse ⇒ Object
109 110 111 112 113 |
# File 'lib/yesroff/odt_parser.rb', line 109 def parse parse_text_styles parse_paragraph_styles @paras = parse_paragraphs end |
#parse_contents(contents, el) ⇒ Object
216 217 218 219 220 221 222 223 224 225 226 |
# File 'lib/yesroff/odt_parser.rb', line 216 def parse_contents(contents, el) results = [] el.each_child do |kid| if REXML::Text === kid results << Text.new(REXML::Text.unnormalize(kid.value)) else results << parse_span(kid) end end results end |
#parse_indent(el) ⇒ Object
202 203 |
# File 'lib/yesroff/odt_parser.rb', line 202 def parse_indent(el) end |
#parse_paragraph(p) ⇒ Object
191 192 193 194 195 196 197 198 199 200 |
# File 'lib/yesroff/odt_parser.rb', line 191 def parse_paragraph(p) attrs = p.attributes # puts "Parsing paragraph, attrs #{attrs}" # puts "==> style-name: [[#{attrs['text:style-name']}]]" style = find_or_create_para_style(attrs['text:style-name']) para = Paragraph.new(style) para.contents = parse_contents(para, p) para end |
#parse_paragraph_styles ⇒ Object
135 136 137 138 139 140 141 142 143 144 |
# File 'lib/yesroff/odt_parser.rb', line 135 def parse_paragraph_styles log "Parsing paragraph styles" styles = REXML::XPath.match(@doc, "//style:style[@style:family='paragraph']") styles.each do |s| attrs = s.attributes style = ParagraphStyle.new(attrs['style:name']) style.parent = find_or_create_para_style(attrs['parent-style-name']) @para_styles[style.name] = style end end |
#parse_paragraphs ⇒ Object
146 147 148 149 150 151 152 153 154 |
# File 'lib/yesroff/odt_parser.rb', line 146 def parse_paragraphs log "Parsing paragraphs" results = [] paras = REXML::XPath.match(@doc, '//text:p') paras.each do |p| results << parse_paragraph(p) end results end |
#parse_span(el) ⇒ Object
205 206 207 208 209 210 211 212 213 214 |
# File 'lib/yesroff/odt_parser.rb', line 205 def parse_span(el) attrs = el.attributes indent = attrs['text:c'] ? attrs['text:c'].to_i : 0 style = find_or_create_text_style(attrs['text:style-name']) span = Span.new(style) span.indent = indent span.contents = parse_contents(span, el) log("new span: #{span}") span end |
#parse_text_styles ⇒ Object
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
# File 'lib/yesroff/odt_parser.rb', line 119 def parse_text_styles log "Parsing text styles" styles = REXML::XPath.match(@doc, "//style:style[@style:family='text']") styles.each do |s| attrs = s.attributes style = TextStyle.new(attrs['style:name']) props = REXML::XPath.first(s, "./style:text-properties") if props style.bold = (props.attributes['fo:font-weight'] == 'bold') style.italic = (/italic/i =~ props.attributes['style:font-name']) || (props.attributes['fo:font-style'] == 'italic') end @text_styles[style.name] = style end end |
#render ⇒ Object
115 116 117 |
# File 'lib/yesroff/odt_parser.rb', line 115 def render @paras.each {|p| p.render(@writer)} end |