Class: OdtParser

Inherits:
Object
  • Object
show all
Defined in:
lib/yesroff/odt_parser.rb

Instance Method Summary collapse

Constructor Details

#initialize(odt_path) ⇒ OdtParser

Returns a new instance of OdtParser.



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/yesroff/odt_parser.rb', line 25

def initialize(odt_path)
  if File.exist?("yesroff.rc")
    log "loading notroff.rb"
    load 'yesroff.rc'
  end

  log "Reading #{odt_path}..."
  Zip::ZipFile.open(odt_path ) do |zipfile|
    zipfile.file.open("content.xml") do |content|
      @doc = REXML::Document.new(content.read)
    end
  end
  log "Done"

  @writer = NRWriter.new
  @paras = []
  @text_styles = default_text_styles
  @para_styles = default_para_styles
end

Instance Method Details

#default_para_stylesObject



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/yesroff/odt_parser.rb', line 62

def default_para_styles
  styles = StyleHash.new
  styles.add_style ParagraphStyle.new('FT', nil, :body, false)
  styles.add_style ParagraphStyle.new('IT', nil, :body, false)
  styles.add_style ParagraphStyle.new('Quotation', nil, :quote, true)

  styles.add_style ParagraphStyle.new('CDT1', nil, :code, false)
  styles.add_style ParagraphStyle.new('CDT', nil, :code, false)
  styles.add_style ParagraphStyle.new('CDTX', nil, :code, false)
  styles.add_style ParagraphStyle.new('C1', nil, :c1, true)
  styles.add_style ParagraphStyle.new('C2', nil, :c1, true)
  styles.add_style ParagraphStyle.new('TB', nil, :c1, true)
  styles.add_style ParagraphStyle.new('Free_20_Form', nil, :c1, true)


  styles.add_style ParagraphStyle.new('NLC1', nil, :code, false)
  styles.add_style ParagraphStyle.new('NLC', nil, :code, false)
  styles.add_style ParagraphStyle.new('NLCX', nil, :code, false)
  styles.add_style ParagraphStyle.new('NLPara', nil, :code, false)

  styles.add_style ParagraphStyle.new('TX', nil, :code, false)

  styles.add_style ParagraphStyle.new('HA', nil, :title, true)
  styles.add_style ParagraphStyle.new('HB', nil, :subtitle, true)
  styles.add_style ParagraphStyle.new('HC', nil, :sec, true)
  styles.add_style ParagraphStyle.new('HD', nil, :subsec, true)
  styles.add_style ParagraphStyle.new('TH', nil, :theading, true)
  styles.add_style ParagraphStyle.new('LH', nil, :ltitle, true)
  styles.add_style ParagraphStyle.new('LC', nil, :listing, false)
  styles.add_style ParagraphStyle.new('LC2', nil, :listing, false)
  styles.add_style ParagraphStyle.new('LX', nil, :listing, false)

  styles.add_style ParagraphStyle.new('BL1', nil, :bullet, true)
  styles.add_style ParagraphStyle.new('BL', nil, :bullet, true)
  styles.add_style ParagraphStyle.new('BX', nil, :bullet, true)

  styles.add_style ParagraphStyle.new('NL1', nil, :list, true)
  styles.add_style ParagraphStyle.new('NL', nil, :list, true)
  styles.add_style ParagraphStyle.new('NX', nil, :list, true)

  styles.add_style ParagraphStyle.new('BL Para', nil, :bullet, true)
  styles.add_style ParagraphStyle.new('Quotation_20_Attribution', nil, :attribution, true)

  additional_paragraph_styles(styles)
  styles
end

#default_text_stylesObject



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/yesroff/odt_parser.rb', line 45

def default_text_styles
  cd1 = TextStyle.new("CD1")
  cd1.code = true

  hash = StyleHash.new
  hash.add_style cd1
  hash.add_style TextStyle.new("Default")
  hash.add_style TextStyle.new("C1")
  hash.add_style TextStyle.new("C1_20_HD")
  hash.add_style TextStyle.new("FN")
  hash.add_style TextStyle.new("Base_20_Font")
  hash.add_style TextStyle.new("Chapter_20_Word")
  additional_text_styles(hash)
  hash
end

#find_or_create_para_style(name) ⇒ Object



163
164
165
166
167
168
169
170
171
172
# File 'lib/yesroff/odt_parser.rb', line 163

def find_or_create_para_style(name)
  return lookup_para_style(name)
  s = @para_styles[name]
  unless s
    STDERR.puts "Warning: no paragraph style named #{name}"
    s = ParagraphStyle.new(name, nil, :body, false)
    @para_styles[s.name] = s
    s
  end
end

#find_or_create_text_style(name) ⇒ Object



181
182
183
184
185
186
187
188
189
# File 'lib/yesroff/odt_parser.rb', line 181

def find_or_create_text_style(name)
  s = @text_styles[name]
  unless s
    STDERR.puts "Warning: no character style named #{name}"
    s = TextStyle.new(name)
    @text_styles[s.name] = s
  end
  s
end

#lookup_para_style(name) ⇒ Object



156
157
158
159
160
161
# File 'lib/yesroff/odt_parser.rb', line 156

def lookup_para_style(name)
  s = @para_styles[name]
  log "No such para style #{name}" unless s
  raise "No such para style #{name}" unless s
  s
end

#lookup_text_style(name) ⇒ Object



174
175
176
177
178
179
# File 'lib/yesroff/odt_parser.rb', line 174

def lookup_text_style(name)
  name = "Default" if name.nil? or name.empty?
  s = @text_styles[name]
  raise "No such text style [[#{name}]]" unless s
  s
end

#parseObject



109
110
111
112
113
# File 'lib/yesroff/odt_parser.rb', line 109

def parse
  parse_text_styles
  parse_paragraph_styles
  @paras = parse_paragraphs
end

#parse_contents(contents, el) ⇒ Object



216
217
218
219
220
221
222
223
224
225
226
# File 'lib/yesroff/odt_parser.rb', line 216

def parse_contents(contents, el)
  results = []
  el.each_child do |kid|
    if REXML::Text === kid
      results << Text.new(REXML::Text.unnormalize(kid.value))
    else
      results << parse_span(kid)
    end
  end
  results
end

#parse_indent(el) ⇒ Object



202
203
# File 'lib/yesroff/odt_parser.rb', line 202

def parse_indent(el)
end

#parse_paragraph(p) ⇒ Object



191
192
193
194
195
196
197
198
199
200
# File 'lib/yesroff/odt_parser.rb', line 191

def parse_paragraph(p)
  attrs = p.attributes
#    puts "Parsing paragraph, attrs #{attrs}"
#    puts "==> style-name: [[#{attrs['text:style-name']}]]"
  style = find_or_create_para_style(attrs['text:style-name'])

  para = Paragraph.new(style)
  para.contents = parse_contents(para, p)
  para
end

#parse_paragraph_stylesObject



135
136
137
138
139
140
141
142
143
144
# File 'lib/yesroff/odt_parser.rb', line 135

def parse_paragraph_styles
  log "Parsing paragraph styles"
  styles = REXML::XPath.match(@doc, "//style:style[@style:family='paragraph']")
  styles.each do |s|
    attrs = s.attributes
    style = ParagraphStyle.new(attrs['style:name'])
    style.parent = find_or_create_para_style(attrs['parent-style-name'])
    @para_styles[style.name] = style
  end
end

#parse_paragraphsObject



146
147
148
149
150
151
152
153
154
# File 'lib/yesroff/odt_parser.rb', line 146

def parse_paragraphs
  log "Parsing paragraphs"
  results = []
  paras = REXML::XPath.match(@doc, '//text:p')
  paras.each do |p|
    results << parse_paragraph(p)
  end
  results
end

#parse_span(el) ⇒ Object



205
206
207
208
209
210
211
212
213
214
# File 'lib/yesroff/odt_parser.rb', line 205

def parse_span(el)
  attrs = el.attributes
  indent = attrs['text:c'] ? attrs['text:c'].to_i : 0
  style = find_or_create_text_style(attrs['text:style-name'])
  span = Span.new(style)
  span.indent = indent
  span.contents = parse_contents(span, el)
  log("new span: #{span}")
  span
end

#parse_text_stylesObject



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# File 'lib/yesroff/odt_parser.rb', line 119

def parse_text_styles
  log "Parsing text styles"
  styles = REXML::XPath.match(@doc, "//style:style[@style:family='text']")
  styles.each do |s|
    attrs = s.attributes
    style = TextStyle.new(attrs['style:name'])
    props = REXML::XPath.first(s, "./style:text-properties")
    if props
      style.bold = (props.attributes['fo:font-weight'] == 'bold')
      style.italic = (/italic/i =~ props.attributes['style:font-name']) ||
        (props.attributes['fo:font-style'] == 'italic')
    end
    @text_styles[style.name] = style
  end
end

#renderObject



115
116
117
# File 'lib/yesroff/odt_parser.rb', line 115

def render
  @paras.each {|p| p.render(@writer)}
end