Class: RSyntaxTree::StringParser

Inherits:

Object

Object
RSyntaxTree::StringParser

show all

Defined in:: lib/rsyntaxtree/string_parser.rb

Instance Attribute Summary collapse

#data ⇒ Object

Returns the value of attribute data.
#elist ⇒ Object

Returns the value of attribute elist.
#id ⇒ Object

Returns the value of attribute id.
#level ⇒ Object

Returns the value of attribute level.
#pos ⇒ Object

Returns the value of attribute pos.

Class Method Summary collapse

.valid?(data) ⇒ Boolean

Instance Method Summary collapse

#get_elementlist ⇒ Object
#get_next_token ⇒ Object
#initialize(str, fontset, fontsize, global) ⇒ StringParser constructor

A new instance of StringParser.
#make_tree(parent) ⇒ Object
#parse ⇒ Object

Constructor Details

#initialize(str, fontset, fontsize, global) ⇒ `StringParser`

Returns a new instance of StringParser.

# File 'lib/rsyntaxtree/string_parser.rb', line 19

def initialize(str, fontset, fontsize, global)
  @global = global
  # Clean up the data a little to make processing easier
  # repeated newlines => a newline
  string = str.gsub(/[\n\r]+/m, "\n")
  # a backslash followed by a newline => a backslash followed by an 'n'
  string.gsub!(/\\\n\s*/m, "\\n")
  # repeated whitespace characters => " "
  string.gsub!(/\s+/, " ")
  string.gsub!(/\]\s+\[/, "][")
  string.gsub!(/\s+\[/, "[")
  string.gsub!(/\[\s+/, "[")
  string.gsub!(/\s+\]/, "]")
  string.gsub!(/\]\s+/, "]")
  string.gsub!(/<(\d*)>/) do
    num_padding = $1.to_i
    result = if num_padding.positive?
               WHITESPACE_BLOCK * num_padding
             else
               WHITESPACE_BLOCK
             end
    result
  end

  @data = string # Store it for later...
  fontset[:normal] = fontset[:cjk] if @data.contains_cjk?
  @elist = ElementList.new # Initialize internal element list
  @pos = 0 # Position in the sentence
  @id = 1 # ID for the next element
  @level = 0 # Level in the diagram
  @fontset = fontset
  @fontsize = fontsize
end

Instance Attribute Details

#data ⇒ `Object`

Returns the value of attribute data.



17
18
19

# File 'lib/rsyntaxtree/string_parser.rb', line 17

def data
  @data
end

#elist ⇒ `Object`

Returns the value of attribute elist.



17
18
19

# File 'lib/rsyntaxtree/string_parser.rb', line 17

def elist
  @elist
end

#id ⇒ `Object`

Returns the value of attribute id.



17
18
19

# File 'lib/rsyntaxtree/string_parser.rb', line 17

def id
  @id
end

#level ⇒ `Object`

Returns the value of attribute level.



17
18
19

# File 'lib/rsyntaxtree/string_parser.rb', line 17

def level
  @level
end

#pos ⇒ `Object`

Returns the value of attribute pos.



17
18
19

# File 'lib/rsyntaxtree/string_parser.rb', line 17

def pos
  @pos
end

Class Method Details

.valid?(data) ⇒ `Boolean`

Returns:

(Boolean)

Raises:

(RSTError)

# File 'lib/rsyntaxtree/string_parser.rb', line 53

def self.valid?(data)
  raise RSTError, +"Error: input text is empty" if data.empty?

  if /\[\s*\]/m =~ data
    raise RSTError, +"Error: inside the brackets is empty"
  end

  text = data.strip
  text_r = text.split(//)
  open_br = []
  close_br = []
  escape = false
  text_r.each do |chr|
    if chr == "\\"
      escape = if escape
                 false
               else
                 true
               end
      next
    end

    if escape && /[\[\]]/ =~ chr
      escape = false
      next
    elsif chr == '['
      open_br.push(chr)
    elsif chr == ']'
      close_br.push(chr)
      break if open_br.length < close_br.length
    end
    escape = false
  end

  if open_br.empty? && close_br.empty?
    raise RSTError, +"Error: input text does not contain paired brackets"
  elsif open_br.length == close_br.length
    true
  else
    raise RSTError, +"Error: open and close brackets do not match"
  end
end

Instance Method Details

#get_elementlist ⇒ `Object`



101
102
103

# File 'lib/rsyntaxtree/string_parser.rb', line 101

def get_elementlist
  @elist;
end

#get_next_token ⇒ `Object`

# File 'lib/rsyntaxtree/string_parser.rb', line 105

def get_next_token
  data = @data.split(//)
  gottoken = false
  token = ""
  i = 0

  return "" if (@pos + 1) >= data.length

  escape = false
  while ((@pos + i) < data.length) && !gottoken
    ch = data[@pos + i]
    case ch
    when "["
      if escape
        token += '\\['  # エスケープされた角括弧として保持
        escape = false
      elsif i.positive?
        gottoken = true
      else
        token += ch
      end
    when "]"
      if escape
        token += '\\]'  # エスケープされた角括弧として保持
        escape = false
      else
        token += ch if i.zero?
        gottoken = true
      end
    when "\\"
      if escape
        token += '\\\\'
        escape = false
      else
        escape = true
      end
    when " "
      if escape
        token += '\\n'
        escape = false
      else
        token += ch
      end
    when /[n{}<>^+*_=~|-]/
      if escape
        token += '\\' + ch
        escape = false
      else
        token += ch
      end
    else
      if escape
        token += ch
        escape = false
      else
        token += ch
      end
    end
    i += 1
  end

  @pos += if i > 1
            i - 1
          else
            1
          end
  token
end

#make_tree(parent) ⇒ `Object`

# File 'lib/rsyntaxtree/string_parser.rb', line 174

def make_tree(parent)
  token = get_next_token.strip
  parts = []

  while token != "" && token != "]"
    token_r = token.split(//)
    case token_r[0]
    when "["
      # エスケープされた角括弧をチェック
      if token =~ /\A\\\[/ || token =~ /\A\\\]/
        # エスケープされた角括弧の場合は通常のテキストとして扱う
        element = Element.new(@id, parent, token, @level, @fontset, @fontsize, @global)
        @id += 1
        @elist.add(element)
      else
        # 以下、既存の処理
        tl = token_r.length
        token_r = token_r[1, tl - 1]
        spaceat = token_r.index(" ")
        newparent = -1

        if spaceat
          parts[0] = token_r[0, spaceat].join
          tl = token_r.length
          parts[1] = token_r[spaceat, tl - spaceat].join

          element = Element.new(@id, parent, parts[0], @level, @fontset, @fontsize, @global)
          @id += 1
          @elist.add(element)
          newparent = element.id

          element = Element.new(@id, @id - 1, parts[1], @level + 1, @fontset, @fontsize, @global)
          @id += 1
        else
          joined = token_r.join
          element = Element.new(@id, parent, joined, @level, @fontset, @fontsize, @global)
          @id += 1
          newparent = element.id
        end
        @elist.add(element)
        @level += 1
        make_tree(newparent)
      end
    else
      if token.strip != ""
        element = Element.new(@id, parent, token, @level, @fontset, @fontsize, @global)
        @id += 1
        @elist.add(element)
      end
    end
    token = get_next_token
  end
  @level -= 1
end

#parse ⇒ `Object`

# File 'lib/rsyntaxtree/string_parser.rb', line 96

def parse
  make_tree(0);
  @elist.set_hierarchy
end

Class: RSyntaxTree::StringParser

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(str, fontset, fontsize, global) ⇒ StringParser

Instance Attribute Details

#data ⇒ Object

#elist ⇒ Object

#id ⇒ Object

#level ⇒ Object

#pos ⇒ Object