Class: OrgParse::StructScanner

Inherits:
Object
  • Object
show all
Includes:
InlineUtils, Utils
Defined in:
lib/org-parse/struct-scanner.rb

Overview

Org-modeの文字列を、構造レベルのトークンに分解する

処理は、行単位で行う

Constant Summary collapse

GENERATOR =
"OrgParse #{VERSION} Powered by Ruby #{RUBY_VERSION}"
ListSymbols =
{
  :UL_START => :UL_END, :OL_START => :OL_END,
  :DL_START => :DL_END,
}

Instance Method Summary collapse

Methods included from InlineUtils

#line_parse, #set_struct_parser

Methods included from Utils

#get_indent

Constructor Details

#initialize(src, opts) ⇒ StructScanner

コンストラクタ

src

ソース文字列(または、文字列の配列) ソース文字列は、1行単位の配列として @srcに保存する

opts

skip:t の場合にタイトルとして使われる



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/org-parse/struct-scanner.rb', line 28

def initialize(src, opts)
  @src = (src.is_a? Array) ? src : src.to_a
  @line_idx = 0
  @outline_level = 0
  @nest_stack = []
  @token_que = []
  @section_stack = []
  @options = { :H => 3, :skip => false, :toc => true, :num => true,
    :author => nil, 
    :url => nil,
    :email => nil,
    :creator => GENERATOR, :timestamp => true,
    :title => nil, :text => [], :language => 'ja', :charset => 'utf-8',
    :default_title => '(no title)', :style => '', :uv => true,
    :dot_path => ''
  }
  opts.each {|k, v| @options[k] = v}
  read_options
  # opt = YAML.dump @options
  # File.open('dot.rc2', "w") {|f|
  #  f.write opt
  # }
  # p @options
end

Instance Method Details

#check_nest(kind, indent, string, dt = '') ⇒ Object

リスト開始ラインのネストをチェックする ネストしたリストの開始 新規のリストアイテムの開始



124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/org-parse/struct-scanner.rb', line 124

def check_nest(kind, indent, string, dt = '')
  last = @nest_stack.last
  if @nest_stack.empty? 
    # puts "0: line: #{line.chomp}  indent: #{indent} last: #{last.inspect}"
    @nest_stack << [kind, indent]
    @token_que << [kind, string]
    @token_que << [:LI_START, [kind, string, dt]]
  elsif last[1] < indent
    # puts "1: line: #{line.chomp}  indent: #{indent} last: #{last.inspect}"
    @nest_stack << [kind, indent]
    # @token_que << [:LI_END, line]
    @token_que << [kind, string]
    @token_que << [:LI_START, [kind, string, dt]]
  elsif last[1] > indent
    # puts "2: line: #{line.chomp}  indent: #{indent} last: #{last.inspect}"
    nest = @nest_stack.pop
    @token_que << [:LI_END, string]
    @token_que << [ListSymbols[nest[0]], ListSymbols[nest[0]]]
    check_nest kind, indent, string
  else
    # puts "3: line: #{line.chomp}  indent: #{indent} last: #{last.inspect}"
    @token_que << [:LI_END, string]
    @token_que << [:LI_START, [kind, string, dt]]
  end
end

#exit_nests(line) ⇒ Object

if in list then exit list.



151
152
153
154
155
156
157
158
# File 'lib/org-parse/struct-scanner.rb', line 151

def exit_nests(line)
  indent = get_indent line
  while @nest_stack.last and @nest_stack.last[1] >= indent
    nest = @nest_stack.pop
    @token_que << [:LI_END, line]
    @token_que << [ListSymbols[nest[0]], line]
  end
end

#exit_section(level) ⇒ Object



160
161
162
163
164
165
166
# File 'lib/org-parse/struct-scanner.rb', line 160

def exit_section(level)
  cnt = 0
  while @section_stack.last and @section_stack.last >= level
    @token_que << [:SEC_END, level]
    @section_stack.pop
  end
end

#next_tokenObject



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/org-parse/struct-scanner.rb', line 98

def next_token
  return [[false, false],[]] if @token_que.empty?
  # p @token_que[0]
  token = @token_que.shift
  vars = []

  while token[0] == :VARIABLELINE and (token[1][0] == "ATTR_HTML" or token[1][0] == "CAPTION")
    if token[1][0] == "CAPTION"
      vars << "CAPTION:" + token[1][1]
    else
      vars << token[1][1]
    end
    return [[false, false], []] if @token_que.empty?
    token = @token_que.shift
  end
  [token, vars]
end

#read_optionsObject



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/org-parse/struct-scanner.rb', line 54

def read_options
  @src.reject! do |line|
    m = true
    case line
    when /^\s*#\+OPTIONS:\s/i
      case $'
      when /H:([0-9]+)/
        @options[:H] = $1.to_i
      when /skip:(\w)/
        @options[:skip] = $1 != 'nil'
      when /num:(\w+)/
        @options[:num] = $1 != 'nil'
      when /toc:(\w+)/
        @options[:toc] = $1 != 'nil'
      when /author:(\w+)/
        @options[:author] = $1 != 'nil'
      when /creator:(\w+)/
        @options[:creator] = $1 != 'nil'
      when /timestamp:(\w+)/
        @options[:timestamp] = $1 != 'nil'
      when /uv:(\w+)/
        @options[:uv] = $1 != 'nil'
      else
        m = false
      end
    when /^\s*#\+TITLE:\s+(.*)$/i
      @options[:title] = $1
    when /^\s*#\+TEXT:\s+(.*)$/i
      @options[:text] += line_parse($1 + "\n")
    when /^\s*#\+LANGUAGE:\s+(.*)/i
      @options[:language] = $1
    when /^\s*#\+AUTHOR:\s*(.*)/i
      @options[:author] = $1
    when /^\s*#\+EMAIL:\s*(.*)/i
      @options[:email] = $1
    when /^\s*#\+STYLE:\s*(.*)$/i
      @options[:style] += $1
    else
      m = false
    end
    m
  end
end

#scanObject

split to tokens from @src array and set to @token_que



194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
# File 'lib/org-parse/struct-scanner.rb', line 194

def scan
  @line_idx = 0
  scan_before_1st_headline
  example_flag = false
  @token_que << [:DOCUMENT_START, @options]
  while @line_idx < @src.size
    line = @src[@line_idx]
    if example_flag && line !~ /^\s*#\+end_(example|html|src|dot)/i
      @token_que << [:TEXTLINE, [line, get_indent(line)]]
      @line_idx += 1
      next
    end

    case line
    when /^\s*$/ # WHITELINE
      @token_que << [:WHITELINE, line]
    when /^(\*+)(\s+)COMMENT(\s)/
      @line_idx += 1
      @line_idx += 1 while @src[@line_idx] and @src[@line_idx] !~ /^\*+\s/
      @line_idx -= 1 if @src[@line_idx]
    when /^(\*+)(\s+)/
      rest = $'
      level = $1
      exit_nests('')
      exit_section(level.size)
      @section_stack << level.size
      @token_que << [:HEADLINE, [level, rest]]
    when /^(\s*)-\s(.+)\s::\s+/
      # Definition LIST
      rest = $'
      check_nest :DL_START, get_indent($1), rest, $2
    when /^(\s*)[-+*]\s/
      # Unordered LIST
      rest = $'
      check_nest :UL_START, get_indent($1), rest
    when /^(\s*)[0-9]+(\.|\))\s+/
      # Ordered LIST
      rest = $'
      check_nest :OL_START, get_indent($1), rest
    when /^\s*#\+HTML:/
      # #+HTML
      rest = $'
      @token_que << [:QUOTE, $']
    when /^\s*#\+([^ :]+):\s*(.+)$/
      @token_que << [:VARIABLELINE, [$1.upcase, $2.chomp]]
    when /^(\s*):\s(.*)$/
      @token_que << [:EXAMPLE, [$2+"\n", get_indent($1)]]
    when /^\s*#\+BEGIN_([A-Z0-9_]+)/i # BLOCK
      block_name = $1.upcase
      exit_nests line
      @token_que << [:BLOCK_START, [block_name, line, get_indent(line)]]
      example_flag = true if ['EXAMPLE', 'HTML', 'SRC', 'DOT'].include? block_name.upcase
    when /^\s*#\+END_([A-Z0-9_]+)/i # BLOCK
      block_name = $1
      exit_nests line
      @token_que << [:BLOCK_END, [block_name, line]]
      example_flag = false
    when /^\s*\|[-\|\+]*\s*$/ # table separator
      # an org-mode table separator has the first non-whitespace
      # character as a | (pipe), then consists of nothing else other
      # than pipes, hyphens, and pluses.
      exit_nests line
      @token_que << [:TABLE_SEP, line]
    when /^\s*\|/             # table_row
      # the first non-whitespace character is a | (pipe).
      exit_nests line
      @token_que << [:TABLE_ROW, line]
    when /^\s*\[fn:([^\]]+)\]\s+(.+)$/
      exit_nests line
      @token_que << [:FOOTNOTE, [$1, $2]]
    else
      exit_nests line
      @token_que << [:TEXTLINE, [line, get_indent(line)]]
    end
    @line_idx += 1
  end
  exit_nests ''
  exit_section(0)
end

#scan_before_1st_headlineObject

scan before 1st headline



173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# File 'lib/org-parse/struct-scanner.rb', line 173

def scan_before_1st_headline
  title = nil
  if @options[:title]
    title =@options[:title]
    skip_to_1st_headline if @options[:skip]
  elsif @options[:skip]
    skip_to_1st_headline
    title = @options[:default_title]
  else
    @line_idx += 1 while @src[@line_idx] =~ /^\s*$/
    title = @src[@line_idx].chomp.sub(/^\s*/, '')
    if title =~ /^\*+\s*/
      title = @options[:default_title]
    else
      @line_idx += 1
    end
  end
  @options[:title] = line_parse title
end

#skip_to_1st_headlineObject



168
169
170
# File 'lib/org-parse/struct-scanner.rb', line 168

def skip_to_1st_headline
  @line_idx += 1 while @src[@line_idx] !~ /^\*\s+[^\s]/
end