Class: MifToHtmlParser

Inherits:
Object
  • Object
show all
Includes:
MifParserUtils
Defined in:
app/models/mif_to_html_parser.rb

Constant Summary collapse

DIV =
%w[ABillTo Abt1 Abt2 Abt3 Abt4
Amendment Amendment_Number Amendment_Text Amendments_Commons Arrangement
AsAm
BillData BillTitle Bpara
CenteredHeading Chapter
ClauseText ClauseTitle Clause_Committee
Clauses ClausesToBeConsidered Clauses_ar
Committee CommitteeShorttitle ChapterTitle
Cover CoverHeading
CrossHeading CrossHeadingSch CrossHeadingTitle CrossHeadingTitle_ar
Date
Definition DefinitionListItem
Enda Endb
Endorse
Footer
Given
Head HeadAmd HeadConsider HeadNotice Head_thin
Heading_ar Heading_text
List ListItem
LongTitle Longtitle_text
MarshalledOrderNote Motion Move
NewClause_Committee NoticeOfAmds
OrderAmendmentText OrderCrossHeading
OrderHeading
OrderPreamble OrderText
Order_Committee Order_House Order_Motion
Part Part_ar PartSch PartTitle
Prelim
Report Resolution ResolutionHead
ResolutionPreamble ResolutionText Rubric
Schedule_ar
Schedules SchedulesTitle
Schedule_Committee SchedulesTitle_ar Schedules_ar
ScheduleTitle Schedule
SectionReference
Shorttitle Stageheader SubSection
Split
TableTitle
Table Text_motion TextContinuation
WordsOfEnactment].inject({}){|h,v| h[v]=true; h}
DIV_RE =
Regexp.new "(^#{DIV.keys.join("$|")}$)"
TABLE =
%w[TableData].inject({}){|h,v| h[v]=true; h}
TABLE_RE =
Regexp.new "(^#{TABLE.keys.join("$|")}$)"
TR =
%w[Row].inject({}){|h,v| h[v]=true; h}
TR_RE =
Regexp.new "(^#{TR.keys.join("$|")}$)"
TH =
%w[CellH].inject({}){|h,v| h[v]=true; h}
TH_RE =
Regexp.new "(^#{TH.keys.join("$|")}$)"
TD =
%w[Cell].inject({}){|h,v| h[v]=true; h}
TD_RE =
Regexp.new "(^#{TD.keys.join("$|")}$)"
SPAN =
%w[Amendment_Text_text
Bold Bold_text
ActClauseTitle_text
BillReference
ClauseTitle_text
Date_text Day Definition_text Dropcap
Enact Sbscript
FrameData Formula
Italic
Letter Line Line_text List_text
Move_text
NoteTxt Notehead Number Number_text
Page Page_text Para_sch_text Para_text PartNumber_ar PartNumber_ar_text
PartTitle_ar
PgfNumString Proposer_name
ResolutionDate ResolutionHead_text ResolutionPara_text
ResolutionSubPara_text ResolutionText_text
Roman Roman_text
ScheduleNumber_ar ScheduleNumber_ar_text
ScheduleTitle_ar ScheduleTitle_ar_text ScheduleTitle_text
STCommons STHouse STLords STText SmallCaps
Superscript Superscript_text SmallCaps_text
SubSection_text
WHITESPACE ].inject({}){|h,v| h[v]=true; h}
SPAN_RE =
Regexp.new "(^#{SPAN.keys.join("$|")}$)"
IGNORE =
%w[Jref_text
InternalReference InternalReference_text
Interpretation FileType
Jref ].inject({}){|h,v| h[v]=true; h}
IGNORE_RE =
Regexp.new "(^#{IGNORE.keys.join("$|")}$)"
UL =
%w[Sponsors].inject({}){|h,v| h[v]=true; h}
UL_RE =
Regexp.new "(#{UL.keys.join("|")})"
LI =
%w[Sponsor].inject({}){|h,v| h[v]=true; h}
LI_RE =
Regexp.new "(#{LI.keys.join("|")})"
HR =
%w[Separator_thick Separator_thin].inject({}){|h,v| h[v]=true; h}
HR_RE =
Regexp.new "(#{HR.keys.join("|")})"

Constants included from MifParserUtils

MifParserUtils::AMEND_REF, MifParserUtils::COMPRESS_WHITESPACE, MifParserUtils::COMPRESS_WHITESPACE_2, MifParserUtils::COMPRESS_WHITESPACE_3, MifParserUtils::COMPRESS_WHITESPACE_4, MifParserUtils::COMPRESS_WHITESPACE_5, MifParserUtils::LINK_REGEX, MifParserUtils::NEED_SPACE_BETWEEN_LABEL_AND_NUMBER_REGEX, MifParserUtils::NEED_SPACE_BETWEEN_LABEL_AND_NUMBER_REGEX_2, MifParserUtils::NEED_SPACE_BETWEEN_LABEL_AND_XREF_REGEX, MifParserUtils::NEED_SPACE_BETWEEN_LABEL_AND_XREF_REGEX_2, MifParserUtils::SPAN_REGEX, MifParserUtils::TOGGLE_SHOW_REGEXP, MifParserUtils::TOGGLE_SHOW_REGEXP_2

Instance Method Summary collapse

Methods included from MifParserUtils

#clean, #for_each_match, #format_haml, #get_attributes, #get_char, #get_uid, #make_attr, #postprocess, #preprocess, #start_tag

Instance Method Details

#a_attribute(node, attribute) ⇒ Object


215
216
217
# File 'app/models/mif_to_html_parser.rb', line 215

def a_attribute node, attribute
  node[attribute] ? " #{attribute}='#{node[attribute]}'" : ''
end

#add(text) ⇒ Object


656
657
658
659
660
661
662
663
# File 'app/models/mif_to_html_parser.rb', line 656

def add text
  if text.nil?
    raise 'text should not be null'
  else
    @html.pop if text.is_a?(String) && text[/^(;|\.|\))/] && @html.last == ' '
    @html << text
  end
end

#add_anchor(node) ⇒ Object


219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
# File 'app/models/mif_to_html_parser.rb', line 219

def add_anchor node
  tag = []
  tag << '<a'
  tag << a_attribute(node, 'rel')
  tag << a_attribute(node, 'resource')
  tag << a_attribute(node, 'href')
  tag << a_attribute(node, 'title')
  tag << '>'
  add tag.join('')
  @in_hyperlink = true
  node_children_to_html(node)
  @in_hyperlink = false
  add '</a>'

  add_trailing_para_line_anchor
end

#add_html_element(name, node) ⇒ Object


245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
# File 'app/models/mif_to_html_parser.rb', line 245

def add_html_element name, node
  tag = []
  tag << %Q|<#{name} class="#{css_class(node)}"|
  tag << %Q| id="#{node['id']}"| if node['id']
  tag << %Q| colspan="#{node['colspan']}"| if node['colspan']
  if name == 'hr'
    tag << " />"
  else
    tag << ">"
  end

  add tag.join('')

  #@in_amendment = (node.parent.name == 'Amendment' || node.parent.parent.name == 'Amendment')
  @in_amendment = check_for_amendment_ancestor(node)

  node_ref = nil
  if !@in_amendment && css_class(node) == 'SubSection'
    @subsection_number = (node/'PgfNumString/PgfNumString_1/text()').first.to_s.gsub("(","").gsub(")","").strip
    node_ref = "#{@parent_href_name}-#{@subsection_number}"
    add %Q|<a name="#{node_ref}" />|
  end
  if !@in_amendment && css_class(node) == 'Para'
    @para_ref = (node/'PgfNumString/PgfNumString_1/text()').first.to_s.gsub("(","").gsub(")","").strip
    node_ref = "#{@parent_href_name}-#{@subsection_number}-#{@para_ref}"
    add %Q|<a name="#{node_ref}" />|
  end
  if @effects
    render_effects(node_ref)
  end
  
  if name != 'hr'
    if @in_amendment && node['anchor']
      add %Q|<a name="#{node['anchor']}"/>|
    end
    node_children_to_html(node)
    add "</#{name}>"
  end

  @in_para_line = false unless @last_css_class[/^(Bold|Italic|SmallCaps)$/]
end

363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
# File 'app/models/mif_to_html_parser.rb', line 363

def add_link_element node, div=false
  id = get_id_attr node
  item = node.inner_text
  url = case item
    when /Act/
      find_act_url(item)
    when /Bill/
      find_bill_url(item)
    else
      ''
  end

  title = node.inner_text.blank? ? '' : %Q| title="#{node.inner_text}"|

  if div
    add %Q|<div#{id} class="#{node.name}">|
    add %Q|<a href="#{url}"#{title}>| unless url.blank?
  elsif url.blank?
    add %Q|<span#{id} class="#{node.name}">|
  else
    add %Q|<a#{id} href="#{url}" class="#{node.name}"#{title}>|
  end
  @in_hyperlink = true
  node_children_to_html(node)
  @in_hyperlink = false
  add "</a>" unless url.blank?
  add "</div>" if div
  add "</span>" if url.blank? && !div

  add_trailing_para_line_anchor
end

#add_trailing_para_line_anchorObject


395
396
397
398
399
400
401
# File 'app/models/mif_to_html_parser.rb', line 395

def add_trailing_para_line_anchor
  if @para_line_anchor
    add @para_line_anchor
    add "&nbsp;"
    @para_line_anchor = nil
  end
end

323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
# File 'app/models/mif_to_html_parser.rb', line 323

def add_xref_link node
  if node['anchor-ref']
    id = get_id_attr node
    
    title = parse_xref_link_title(node['anchor-ref'])
    
    add %Q|<a#{id} class="#{node.name}" href="##{node['anchor-ref']}" title="#{title}">|
    @in_hyperlink = true
    node_children_to_html(node)
    @in_hyperlink = false
    add "</a>"
  else
    add_html_element 'span', node
  end
end

#check_for_amendment_ancestor(node) ⇒ Object


236
237
238
239
240
241
242
243
# File 'app/models/mif_to_html_parser.rb', line 236

def check_for_amendment_ancestor(node)
  current_node = node
  while current_node.parent && current_node.parent.name != 'BillData'
    return true if current_node.parent.name == 'Amendment'
    current_node = current_node.parent
  end
  return false
end

#css_class(node) ⇒ Object


209
210
211
212
213
# File 'app/models/mif_to_html_parser.rb', line 209

def css_class node
  @last_css_class = node.name.gsub('.','_')
  @last_css_class += " #{node['class']}" unless node['class'].blank?
  @last_css_class
end

#doc_to_html(doc) ⇒ Object


192
193
194
195
196
197
198
199
200
201
# File 'app/models/mif_to_html_parser.rb', line 192

def doc_to_html(doc)
  @in_clauses = false
  @in_schedules = false
  @in_paragraph = false
  @in_amendment = false
  @in_hyperlink = false
  @para_line_anchor = nil
  @pages_rendered = 0
  node_children_to_html(doc.root)
end

#find_act_url(act_name) ⇒ Object


318
319
320
321
# File 'app/models/mif_to_html_parser.rb', line 318

def find_act_url act_name
  act = Act.from_name act_name
  act.statutelaw_url ? act.statutelaw_url : act.opsi_url
end

#find_bill_url(bill_name) ⇒ Object


312
313
314
315
316
# File 'app/models/mif_to_html_parser.rb', line 312

def find_bill_url bill_name
  bill_name = bill_name.chomp(' [HL]')
  @bill = Bill.from_name bill_name
  @bill.parliament_url
end

#find_clause_explanatory_noteObject


413
414
415
# File 'app/models/mif_to_html_parser.rb', line 413

def find_clause_explanatory_note
  @interleave && (note = @bill.find_note_for_clause_number(@clause_number))
end

#find_schedule_explanatory_noteObject


417
418
419
# File 'app/models/mif_to_html_parser.rb', line 417

def find_schedule_explanatory_note
  @interleave && (note = @bill.find_note_for_schedule_number(@schedule_number))
end

#generate_haml(html, options) ⇒ Object


46
47
48
49
50
51
52
53
54
55
56
# File 'app/models/mif_to_html_parser.rb', line 46

def generate_haml html, options
  html_file = Tempfile.new("#{Time.now.to_i.to_s}.html", "#{RAILS_ROOT}/tmp")
  html_file.write html
  html_file.close
  cmd = "html2haml #{html_file.path}"
  haml = `#{cmd}`
  html_file.delete

  # File.open('/Users/x/example.haml', 'w+') {|f| f.write(haml)}
  format_haml(haml, @clauses_file)
end

#generate_html(doc, options) ⇒ Object


58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'app/models/mif_to_html_parser.rb', line 58

def generate_html doc, options
  @interleave = options[:interleave_notes]
  @effects = options[:effects]

  if options[:body_only]
    @html = []
    doc_to_html(doc)
  else
    @html = ['<html><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /></head><body>']
    doc_to_html(doc)
    add ['</body></html>']
  end

  result = @html.join('')
  begin
    doc = REXML::Document.new(result)
  rescue Exception => e
    puts e.to_s
  end
  if options[:indent]
    indented = ''
    doc.write(indented,2)
    indented
  else
    result
  end
end

#get_html_for_char(element) ⇒ Object


86
87
88
89
90
91
92
93
# File 'app/models/mif_to_html_parser.rb', line 86

def get_html_for_char element
  char = get_char(element)
  if char == "\n"
    "<br />"
  else
    HTMLEntities.new.encode(char)
  end
end

#get_id_attr(node) ⇒ Object


359
360
361
# File 'app/models/mif_to_html_parser.rb', line 359

def get_id_attr node
  id = node['id'] ? %Q| id="#{node['id']}"| : ''
end

#handle_amendment_reference(node) ⇒ Object


639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
# File 'app/models/mif_to_html_parser.rb', line 639

def handle_amendment_reference node
  clause = node['Clause']
  schedule = node['Schedule']
  page = node['Page']
  line = node['Line']
  ref = ''
  ref += "clause#{clause}-" if clause
  ref += "schedule#{schedule}-" if schedule
  ref += "page#{page}-" if page
  ref += "line#{line}" if line
  ref.chomp!('-')

  add %Q|<a href="##{ref}" class="#{css_class(node)}">|
  node_children_to_html(node)
  add '</a>'
end

#handle_clause(node) ⇒ Object


421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
# File 'app/models/mif_to_html_parser.rb', line 421

def handle_clause node
  if node['HardReference'] && @in_clauses
    @clause_number = node.at('PgfNumString').inner_text.strip
  end
  clause_id = node['HardReference'].to_s.strip.gsub("&",'_')

  @in_amendment = false
  parent = node.parent
  while !@in_amendment && parent
    @in_amendment = true if parent.name == 'Amendment'
    parent = parent.parent
  end

  unless (@clause_number.blank? || clause_id.blank?) || @in_amendment
    clause_name = "clause#{@clause_number}"
    @parent_href_name = clause_name
    @clause_anchor_start = %Q|<a id="clause_#{clause_id}" name="#{clause_name}" href="##{clause_name}">|

    @explanatory_note = find_clause_explanatory_note unless @in_amendment

    add %Q|<div class="#{css_class(node)}" id="#{node['id']}">|
    if @effects
      render_effects(@parent_href_name)
    end
    node_children_to_html(node)
    if @explanatory_note && !@in_amendment
      add %Q|<div class="explanatory_note"><div class="explanatory_note_text"><span class="en_header">Explanatory Note:</span>#{@explanatory_note.html_note_text}</div></div>|
      add "</div>"
    end

    add "</div>"

    @explanatory_note = nil unless @in_amendment
  else
    add_html_element 'div', node
  end

  @in_amendment = false
end

#handle_clause_ar(node) ⇒ Object


622
623
624
625
# File 'app/models/mif_to_html_parser.rb', line 622

def handle_clause_ar node
  @clause_ref = node['HardReference']
  add_html_element 'div', node
end

#handle_clause_ar_text(node) ⇒ Object


627
628
629
630
631
632
633
634
635
636
637
# File 'app/models/mif_to_html_parser.rb', line 627

def handle_clause_ar_text node
  add_html_element 'span', node

  end_tag = @html.pop
  last_line = @html.pop

  add %Q|<a href="convert?file=#{@clauses_file}#clause_#{@clause_ref}">|
  add last_line
  add "</a>"
  add end_tag
end

#handle_clause_text(node) ⇒ Object


461
462
463
464
465
466
# File 'app/models/mif_to_html_parser.rb', line 461

def handle_clause_text node
  if @explanatory_note && !@in_amendment
    add %Q|<div class="ClauseTextWithExplanatoryNote" id="#{node['id']}en">|
  end
  add_html_element 'div', node
end

#handle_clauses(node) ⇒ Object


403
404
405
406
# File 'app/models/mif_to_html_parser.rb', line 403

def handle_clauses node
  @in_clauses = true
  add_html_element 'div', node
end

#handle_page_start(node) ⇒ Object


595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
# File 'app/models/mif_to_html_parser.rb', line 595

def handle_page_start node
  already_in_paragraph = handle_para node
  if node.name == 'PageStart' && already_in_paragraph
    line = @html.pop
    line += '<br />'
    add line
  end
  if node.name == 'PageStart'
    end_tag = @html.pop
    text = @html.pop
    if @pages_rendered == 0
      start_tag = @html.pop
      start_tag.gsub!("PageStart", "PageStart first")
      add start_tag
      @pages_rendered += 1
    end
    page = text[/Page (.+)/]
    if page
      @page_number = $1
      anchor = page.sub(' ','').downcase
      text.sub!(page, %Q|<a href="##{anchor}" name="#{anchor}">#{page}</a>|)
    end
    add text
    add end_tag
  end
end

#handle_para(node) ⇒ Object


548
549
550
551
552
553
# File 'app/models/mif_to_html_parser.rb', line 548

def handle_para node
  already_in_paragraph = @in_paragraph
  tag = (already_in_paragraph ? 'span' : 'div')
  add_html_element(tag, node)
  already_in_paragraph
end

#handle_para_line_start(node) ⇒ Object


555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
# File 'app/models/mif_to_html_parser.rb', line 555

def handle_para_line_start node
  last_line = nil
  if @html.last && @html.last.include?('<span')
    last_line = @html.pop
  end

  first_line = false
  if @html.last && @html.last.strip == ''
    @html.pop
  end
  if @html.last && @html.last.include?('<') && @html.last != '</a>'
    first_line = true
  end

  line = node['LineNum'].to_s
  anchor_name = "page#{@page_number}-line#{line}"

  if first_line
    # ignore
  else
    add %Q|<br />| 
  end

  para_line_anchor = %Q|<a name="#{anchor_name}"></a>|
  para_line_anchor += %Q|<a name="clause#{@clause_number}-#{anchor_name}"></a>| unless @clause_number.blank?

  unless line.empty?
    if @in_hyperlink
      @para_line_anchor = para_line_anchor
    else
      add para_line_anchor
    end
  end

  if last_line
    add last_line
  end
  @in_para_line = true
end

#handle_pdf_tag(node) ⇒ Object


537
538
539
540
541
542
543
544
545
546
# File 'app/models/mif_to_html_parser.rb', line 537

def handle_pdf_tag node
  already_in_paragraph = @in_paragraph
  tag = (already_in_paragraph ? 'span' : 'p')
  if css_class(node)[/_PgfTag/] && tag == 'span'
    raise "expecting #{css_class(node)} to be a paragraph but: already_in_paragraph -> #{already_in_paragraph} + #{node.inspect}"
  end
  @in_paragraph = true
  add_html_element(tag, node)
  @in_paragraph = false unless already_in_paragraph
end

#handle_pgf_num_string(node) ⇒ Object


512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
# File 'app/models/mif_to_html_parser.rb', line 512

def handle_pgf_num_string node
  if @clause_anchor_start
    add %Q|<span class="#{css_class(node)}">|
    add @clause_anchor_start
    node_children_to_html(node)
    add '</a>'
    add %Q|</span>|
    @clause_anchor_start = nil
  elsif @schedule_anchor_start
    add %Q|<span class="#{css_class(node)}">|
    add @schedule_anchor_start
    node_children_to_html(node)
    add '</a>'
    add %Q|</span>|
    @schedule_anchor_start = nil
  else
    add_html_element 'span', node
    if node.at('/text()').to_s.empty?
      end_tag = @html.pop
      add "&nbsp;"
      add end_tag
    end
  end
end

#handle_schedule(node) ⇒ Object


468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
# File 'app/models/mif_to_html_parser.rb', line 468

def handle_schedule node
  if node.at('ScheduleNumber_PgfTag') && @in_schedules
    @schedule_number = node.at('ScheduleNumber_PgfTag/PgfNumString/PgfNumString_0').inner_text.gsub('Schedule ', '').strip
  end
  if node['HardReference']
    schedule_id = node['HardReference'].to_s.strip.gsub("&",'_')
  end

  @in_amendment = (node.parent.name == 'Amendment') || (node.parent.parent.name == 'Amendment')

  unless (@schedule_number.blank? || schedule_id.blank?) || @in_amendment
    schedule_name = "schedule#{@schedule_number}"
    @parent_href_name = schedule_name
    @schedule_anchor_start = %Q|<a id="schedule_#{schedule_id}" name="#{schedule_name}" href="##{schedule_name}">|

    @explanatory_note = find_schedule_explanatory_note unless @in_amendment

    add %Q|<div class="#{css_class(node)}" id="#{node['id']}">|
    if @effects
      render_effects(@parent_href_name)
    end
    node_children_to_html(node)
    if @explanatory_note && !@in_amendment
      add %Q|<div class="explanatory_note"><div class="explanatory_note_text"><span class="en_header">Explanatory Note:</span>#{@explanatory_note.html_note_text}</div></div>|
      add "</div>"
    end

    add "</div>"

    @explanatory_note = nil unless @in_amendment
  else
    add_html_element 'div', node
  end

  @in_amendment = false
end

#handle_schedule_text(node) ⇒ Object


505
506
507
508
509
510
# File 'app/models/mif_to_html_parser.rb', line 505

def handle_schedule_text node
  if @explanatory_note && !@in_amendment
    add %Q|<div class="ScheduleTextWithExplanatoryNote" id="#{node['id']}_en">|
  end
  add_html_element 'div', node
end

#handle_schedules(node) ⇒ Object


408
409
410
411
# File 'app/models/mif_to_html_parser.rb', line 408

def handle_schedules node
  @in_schedules = true
  add_html_element 'div', node
end

#node_children_to_html(node) ⇒ Object


203
204
205
206
207
# File 'app/models/mif_to_html_parser.rb', line 203

def node_children_to_html(node)
  node.children.each do |child|
    node_to_html(child)
  end if node.children
end

#node_to_html(node) ⇒ Object


665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
# File 'app/models/mif_to_html_parser.rb', line 665

def node_to_html(node)
  case node.name.gsub('.','_')
    when 'Citation'
      add_link_element node
    when 'Xref'
      add_xref_link node
    when /BillTitle|Shorttitle/
      add_link_element node, true
    when 'ParaLineStart'
      handle_para_line_start node
    when 'PageStart'
      handle_page_start node
    when 'Clauses'
      handle_clauses node
    when 'Schedules'
      handle_schedules node
    when 'Clause'
      handle_clause node
    when 'Schedule'
      handle_schedule node
    when 'Clause_ar'
      handle_clause_ar node
    when 'Clause_ar_text'
      handle_clause_ar_text node
    when 'AmendmentReference'
      handle_amendment_reference node
    when /_number$/
      add_html_element 'span', node
    when /^PgfNumString_\d+$/
      handle_pgf_num_string node
    when /_PgfTag$/
      handle_pdf_tag node
    when /^(Para|(Sub)+Para|Para_sch|(Sub)+Para_sch|ResolutionPara|ResolutionSubPara|CoverPara|OrderDate|OrderPara|OrderSubPara|OrderSubSubPara|OrderHousePara|OrderHouseSubPara|OrderHouseSubSubPara|RunIntoPara)$/
      handle_para node
    when /^EndRule$/
      #ignore
    when 'ClauseText'
      handle_clause_text node
    when 'ScheduleText'
      handle_schedule_text node
    when DIV_RE
      add_html_element 'div', node
    when SPAN_RE
      add_html_element 'span', node
    when UL_RE
      add_html_element 'ul', node
    when LI_RE
      add_html_element 'li', node
    when HR_RE
      add_html_element 'hr', node
    when TR_RE
      add_html_element 'tr', node
    when TH_RE
      add_html_element 'th', node
    when TD_RE
      add_html_element 'td', node
    when TABLE_RE
      add_html_element 'table', node
    when 'a'
      add_anchor node
    when IGNORE_RE
      # ignore for now
    else
      raise "don't know how to handle: #{node.name}"
      node_children_to_html(node)
  end if node.elem?

  if node.text?
    text = node.to_s
    text.gsub!("/n", "<br />")
    text.gsub!('&amp;','&')
    text.gsub!('&','&amp;')
    add text
    @in_para_line = true if !text.blank? && @last_css_class[/^(Bold|Italic|SmallCaps|.+_text)$/]
  end
end

#parse_xml(xml, options = {:format => :html}) ⇒ Object


19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'app/models/mif_to_html_parser.rb', line 19

def parse_xml xml, options={:format => :html}
  doc = Hpricot.XML xml
  format = options[:format]
  @clauses_file = options[:clauses_file]
  if format == :html
    generate_html doc, options
  elsif format == :haml
    html = generate_html doc, options
    html = preprocess html
    File.open("#{RAILS_ROOT}/tmp/example.html", 'w+') {|f| f.write(html) } if RAILS_ENV == 'development'
    generate_haml html, options
  elsif format == :text
    html = generate_html doc, options
    html.gsub!("\n",'')
    html.gsub!("<div","\n<div")
    html.gsub!("<p","\n<p")
    html.gsub!("<br","\n<br")
    html.gsub!("\n\n","\n")

    html = ActionController::Base.helpers.strip_tags(html)
    html.gsub!(" \n","\n")
    html
  else
    raise "don't know how to generate format: #{format}"
  end
end

#parse_xml_file(xml_file, options) ⇒ Object

e.g. parser.parse_xml_file(“pbc0930106a.mif.xml”)


12
13
14
15
16
17
# File 'app/models/mif_to_html_parser.rb', line 12

def parse_xml_file xml_file, options
  unless options.has_key?(:clauses_file)
    options.merge!({:clauses_file => File.dirname(xml_file)+'/Clauses.mif' })
  end
  parse_xml(IO.read(xml_file), options)
end

339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
# File 'app/models/mif_to_html_parser.rb', line 339

def parse_xref_link_title(anchor_ref)
  link = ""
  parts = anchor_ref.split("-")
  for index in 0...parts.size
    case parts[index]
      when /^clause(.*)/
        link += "Clause #{$1} "
      when /^schedule(.*)/
        link += "Schedule #{$1} "
      when /^amendment/
        link += ", Amendment "
      when /^(\d+.*)$/
        link += "Subsection #{$1} "
      when /^(\D+)/
        link += "paragraph #{$1} "
    end
  end
  link.gsub(' ,', ',').strip()
end

#render_effects(node_ref) ⇒ Object


287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
# File 'app/models/mif_to_html_parser.rb', line 287

def render_effects(node_ref)
  effect_html = ''
  if node_ref && @bill && @bill.has_effects?
    effects = Effect.find_all_by_bill_id_and_bill_provision(@bill.id, node_ref)
    if effects
      effects.each do |effect|
        effect_type = ''
        case effect.type_of_effect
          when /inserted/
            effect_type = 'insertion'
          when /amended/
            effect_type = 'amendment'
          when /substituted/
            effect_type = 'substitution'
          when /repealed/
            effect_type = 'repeal'
        end
        effect_type = " #{effect_type}" unless effect_type.blank?
        effect_html += %Q|<div class="effect#{effect_type}">affects #{effect.affected_act} at #{effect.affected_act_provision} (#{effect.type_of_effect})</div>|
      end
      add effect_html unless effect_html == ''
    end
  end
end