Class: Slaw::ActGenerator

Inherits:
Object
  • Object
show all
Defined in:
lib/slaw/generator.rb

Overview

Base class for generating Act documents

Constant Summary collapse

@@parsers =
{}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(grammar) ⇒ ActGenerator

Returns a new instance of ActGenerator.



12
13
14
15
16
17
18
19
# File 'lib/slaw/generator.rb', line 12

def initialize(grammar)
  @grammar = grammar

  @parser = build_parser
  @builder = Slaw::Parse::Builder.new(parser: @parser)
  @parser = @builder.parser
  @cleanser = Slaw::Parse::Cleanser.new
end

Instance Attribute Details

#builderObject

Slaw::Parse::Builder

builder used by the generator



8
9
10
# File 'lib/slaw/generator.rb', line 8

def builder
  @builder
end

#parserObject

Treetop::Runtime::CompiledParser

compiled parser



5
6
7
# File 'lib/slaw/generator.rb', line 5

def parser
  @parser
end

Instance Method Details

#build_parserObject



21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/slaw/generator.rb', line 21

def build_parser
  unless @@parsers[@grammar]
    # load the grammar
    grammar_file = File.dirname(__FILE__) + "/grammars/#{@grammar}/act.treetop"
    Treetop.load(grammar_file)

    grammar_class = "Slaw::Grammars::#{@grammar.upcase}::ActParser"
    @@parsers[@grammar] = eval(grammar_class)
  end

  @parser = @@parsers[@grammar].new
end

#cleanup(text) ⇒ Object

Run basic cleanup on text, such as ensuring clean newlines and removing tabs. This is always automatically done before processing.



46
47
48
# File 'lib/slaw/generator.rb', line 46

def cleanup(text)
  @cleanser.cleanup(text)
end

#generate_from_text(text) ⇒ Nokogiri::Document

Generate a Slaw::Act instance from plain text.

Parameters:

  • text (String)

    plain text

Returns:

  • (Nokogiri::Document)

    the resulting xml



39
40
41
# File 'lib/slaw/generator.rb', line 39

def generate_from_text(text)
  @builder.parse_and_process_text(cleanup(text))
end

#guess_section_number_after_title(text) ⇒ Object

Try to determine if section numbers come after titles, rather than before.

eg:

Section title
1. Section content

versus

1. Section title
Section content


69
70
71
72
73
74
# File 'lib/slaw/generator.rb', line 69

def guess_section_number_after_title(text)
  before = text.scan(/^\w{4,}[^\n]+\n\d+\. /).length
  after  = text.scan(/^\s*\n\d+\. \w{4,}/).length

  before > after * 1.25
end

#reformat(text) ⇒ Object

Reformat some common errors in text to help make parsing more successful. Option and only recommended when processing a document for the first time.



53
54
55
# File 'lib/slaw/generator.rb', line 53

def reformat(text)
  @cleanser.reformat(text)
end

#text_from_act(doc) ⇒ Object

Transform an Akoma Ntoso XML document back into a plain-text version suitable for re-parsing back into XML with no loss of structure.



78
79
80
81
# File 'lib/slaw/generator.rb', line 78

def text_from_act(doc)
  xslt = Nokogiri::XSLT(File.read(File.join([File.dirname(__FILE__), "grammars/#{@grammar}/act_text.xsl"])))
  xslt.transform(doc).child.to_xml
end