Class: BibTeX::Lexer

Inherits:
Object
  • Object
show all
Defined in:
lib/bibtex/lexer.rb

Overview

The BibTeX::Lexer handles the lexical analysis of BibTeX bibliographies.

Constant Summary collapse

DEFAULTS =
{ :include => [:errors], :strict => true }.freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Lexer

Creates a new instance. Possible options and their respective default values are:

  • :include => [:errors] A list that may contain :meta_content, and

:errors; depending on whether or not these are present, the respective tokens are included in the parse tree.

  • :strict => true In strict mode objects can start anywhere; therefore

the ‘@’ symbol is not possible except inside literals or @comment objects; for a more lenient lexer set to false and objects are expected to start after a new line (leading white space is permitted).



46
47
48
49
# File 'lib/bibtex/lexer.rb', line 46

def initialize(options = {})
    @options = DEFAULTS.merge(options)
	@data = nil
end

Instance Attribute Details

#dataObject

Returns the value of attribute data.



29
30
31
# File 'lib/bibtex/lexer.rb', line 29

def data
  @data
end

#modeObject

Returns the value of attribute mode.



29
30
31
# File 'lib/bibtex/lexer.rb', line 29

def mode
  @mode
end

#optionsObject (readonly)

Returns the value of attribute options.



29
30
31
# File 'lib/bibtex/lexer.rb', line 29

def options
  @options
end

#stackObject (readonly)

Returns the value of attribute stack.



29
30
31
# File 'lib/bibtex/lexer.rb', line 29

def stack
  @stack
end

Instance Method Details

#active?(object) ⇒ Boolean

Returns true if the lexer is currently parsing the given object type.

Returns:

  • (Boolean)


98
99
100
# File 'lib/bibtex/lexer.rb', line 98

def active?(object)
	@active_object == object
end

#analyse(data = nil) ⇒ Object

Start the lexical analysis.

Raises:

  • (ArgumentError)


127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/bibtex/lexer.rb', line 127

def analyse(data=nil)
	raise(ArgumentError, 'Lexer: failed to start analysis: no source given!') if data.nil? && @data.nil?
	Log.debug('Lexer: starting lexical analysis...')
	
	self.data = data || @data.string
	@data.reset
	
	until @data.eos?
		case
		when bibtex_mode?
			parse_bibtex
		when meta_mode?
			parse_meta
		when content_mode?
			parse_content
		when literal_mode?
			parse_literal
		end
	end
	
	Log.debug('Lexer: finished lexical analysis.')
	push [false, '$end']
end

#backtrace(error) ⇒ Object



311
312
313
314
315
316
# File 'lib/bibtex/lexer.rb', line 311

def backtrace(error)
	trace = []
	trace.unshift(@stack.pop) until @stack.empty? || (!trace.empty? && [:AT,:META_CONTENT].include?(trace[0][0]))
	trace << error
	push [:ERROR,trace]
end

#bibtex_mode?Boolean

Returns true if the lexer is currenty parsing a BibTeX object.

Returns:

  • (Boolean)


87
88
89
# File 'lib/bibtex/lexer.rb', line 87

def bibtex_mode?
	[:bibtex,:comment,:string,:preamble,:entry].include?(self.mode)
end

#enter_objectObject

Called when the lexer encounters a new BibTeX object.



259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
# File 'lib/bibtex/lexer.rb', line 259

def enter_object
	@brace_level = 0
	self.mode = :bibtex
	push [:AT,'@']

	case
	when @data.scan(/string/io)
		self.mode = :string
		push [:STRING, @data.matched]
	when @data.scan(/preamble/io)
		self.mode = :preamble
		push [:PREAMBLE, self.data.matched]
	when @data.scan(/comment/io)
		self.mode = :comment
		push [:COMMENT, self.data.matched]
	when @data.scan(/[a-z\d:_!\.$%&*-]+/io)
		self.mode = :entry
		push [:NAME, @data.matched]
	end
end

#error_unbalanced_bracesObject



287
288
289
290
291
# File 'lib/bibtex/lexer.rb', line 287

def error_unbalanced_braces
	n = line_number_at(@data.pos)
	Log.warn("Lexer: unbalanced braces on line #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
	backtrace [:E_UNBALANCED_BRACES, [self.data.matched,n]]
end

#error_unexpected_tokenObject



305
306
307
308
309
# File 'lib/bibtex/lexer.rb', line 305

def error_unexpected_token
	n = line_number_at(@data.pos)
	Log.warn("Lexer: unexpected token `#{@data.matched}' on line #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
	backtrace [:E_UNEXPECTED_TOKEN, [@data.matched,n]]
end

#error_unterminated_contentObject



299
300
301
302
303
# File 'lib/bibtex/lexer.rb', line 299

def error_unterminated_content
	n = line_number_at(@data.pos)
	Log.warn("Lexer: unterminated content on line #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
	backtrace [:E_UNTERMINATED_CONTENT, [@data.matched,n]]
end

#error_unterminated_stringObject



293
294
295
296
297
# File 'lib/bibtex/lexer.rb', line 293

def error_unterminated_string
	n = line_number_at(@data.pos)
	Log.warn("Lexer: unterminated string on line #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
	backtrace [:E_UNTERMINATED_STRING, [@data.matched,n]]
end

#leave_objectObject

Called when parser leaves a BibTeX object.



281
282
283
284
# File 'lib/bibtex/lexer.rb', line 281

def leave_object
	self.mode = :meta
	@brace_level = 0
end

#line_number_at(index) ⇒ Object

Returns the line number at a given position in the source.



65
66
67
# File 'lib/bibtex/lexer.rb', line 65

def line_number_at(index)
	0 # (@line_breaks.find_index { |n| n >= index } || 0) + 1
end

#next_tokenObject

Returns the next token from the parse stack.



70
71
72
# File 'lib/bibtex/lexer.rb', line 70

def next_token
	@stack.shift
end

#parse_bibtexObject



151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# File 'lib/bibtex/lexer.rb', line 151

def parse_bibtex
	case
	when @data.scan(/[\t\r\n\s]+/o)
	when @data.scan(/\{/o)
		@brace_level += 1
		push [:LBRACE,'{']
		if (@brace_level == 1 && active?(:comment)) || (@brace_level == 2 && active?(:entry))
			self.mode = :content
		end
	when @data.scan(/\}/o)
		return error_unbalanced_braces if @brace_level < 1
		@brace_level -= 1
		push [:RBRACE,'}']
		leave_object if @brace_level == 0
	when @data.scan( /=/o)
		push [:EQ,'=']
	when @data.scan(/,/o)
		push [:COMMA,',']
	when @data.scan(/#/o)
		push [:SHARP,'#']
	when @data.scan(/\d+/o)
		push [:NUMBER,@data.matched]
	when @data.scan(/[a-z\d\/:_!$\.%&*-]+/io)
		push [:NAME,@data.matched]
	when @data.scan(/"/o)
		self.mode = :literal
	when @data.scan(/@/o)
		error_unexpected_token
		enter_object
	when @data.scan(/./o)
		error_unexpected_token
		enter_object
	end
end

#parse_contentObject



197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# File 'lib/bibtex/lexer.rb', line 197

def parse_content
	match = @data.scan_until(/\{|\}/o)
	case @data.matched
	when '{'
		@brace_level += 1
		push [:CONTENT,match]
	when '}'
		@brace_level -= 1
		case
		when @brace_level < 0
			push [:CONTENT,match.chop]
			error_unbalanced_braces
		when @brace_level == 0
			push [:CONTENT,match.chop]
			push [:RBRACE,'}']
			leave_object
		when @brace_level == 1 && active?(:entry)
			push [:CONTENT,match.chop]
			push [:RBRACE,'}']
			self.mode = :bibtex
		else
			push [:CONTENT, match]
		end
	else
		push [:CONTENT,@data.rest]
		@data.terminate
		error_unterminated_content
	end
end

#parse_literalObject



227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
# File 'lib/bibtex/lexer.rb', line 227

def parse_literal
	match = @data.scan_until(/[\{\}"\n]/o)
	case @data.matched
	when '{'
		@brace_level += 1
		push [:STRING_LITERAL,match]
	when '}'
		@brace_level -= 1
		if @brace_level < 1
			push [:STRING_LITERAL,match.chop]
			error_unbalanced_braces
		else
			push [:STRING_LITERAL,match]
		end
	when '"'
		if @brace_level == 1
			push [:STRING_LITERAL,match.chop]
			self.mode = :bibtex
		else
			push [:STRING_LITERAL,match]
		end
	when "\n"
		push [:STRING_LITERAL,match.chop]
		error_unterminated_string
	else
		push [:STRING_LITERAL,self.data.rest]
		@data.terminate
		error_unterminated_string
	end
end

#parse_metaObject



186
187
188
189
190
191
192
193
194
195
# File 'lib/bibtex/lexer.rb', line 186

def parse_meta
	match = @data.scan_until(strict? ? /@[\t ]*/o : /(^|\n)[\t ]*@[\t ]*/o)
	unless @data.matched.nil?
		push [:META_CONTENT, match.chop]
		enter_object
	else
		push [:META_CONTENT,@data.rest]
		@data.terminate
	end
end

#push(value) ⇒ Object

Pushes a value onto the parse stack.



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/bibtex/lexer.rb', line 106

def push(value)
	case
	when ([:CONTENT,:STRING_LITERAL].include?(value[0]) && value[0] == @stack.last[0])
		@stack.last[1][0] << value[1]
		@stack.last[1][1] = line_number_at(@data.pos)
	when value[0] == :ERROR
		@stack.push(value) if @options[:include].include?(:errors)
		leave_object
	when value[0] == :META_CONTENT
		if @options[:include].include?(:meta_content)
			value[1] = [value[1], line_number_at(@data.pos)]
			@stack.push(value)
		end
	else
		value[1] = [value[1], line_number_at(@data.pos)]
		@stack.push(value)
	end
	self
end

#strict?Boolean

Returns true if the lexer is currently in strict mode.

Returns:

  • (Boolean)


103
# File 'lib/bibtex/lexer.rb', line 103

def strict?; !!(@options[:strict]); end