Module: MaRuKu::In::Markdown::SpanLevelParser

Includes:
Helpers
Included in:
BlockLevelParser, CharSourceManual, CharSourceStrscan, MDDocument
Defined in:
lib/maruku.rb,
lib/maruku/attributes.rb,
lib/maruku/input/rubypants.rb,
lib/maruku/input/charsource.rb,
lib/maruku/input/html_helper.rb,
lib/maruku/input/parse_span_better.rb

Defined Under Namespace

Classes: CharSourceDebug, CharSourceManual, CharSourceStrscan, HTMLHelper, SpanContext

Constant Summary collapse

Punct_class =
'[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'
Close_class =
%![^\ \t\r\n\\[\{\(\-]!
Rules =
[
[/---/,   :mdash          ],
[/--/,    :ndash          ],
['...',   :hellip         ],
['. . .', :hellip         ],
["``",    :ldquo          ],
["''",    :rdquo          ],
[/<<\s/,  [:laquo, :nbsp] ],
[/\s>>/,  [:nbsp, :raquo] ],
[/<</,    :laquo          ],
[/>>/,    :raquo          ],

#		def educate_single_backticks(str)
#		["`", :lsquo]
#		["'", :rsquo]

# Special case if the very first character is a quote followed by
# punctuation at a non-word-break. Close the quotes by brute
# force:
[/^'(?=#{Punct_class}\B)/, :rsquo],
[/^"(?=#{Punct_class}\B)/, :rdquo],
# Special case for double sets of quotes, e.g.:
#   <p>He said, "'Quoted' words in a larger quote."</p>
[/"'(?=\w)/, [:ldquo, :lsquo]    ],
[/'"(?=\w)/, [:lsquo, :ldquo]    ],
# Special case for decade abbreviations (the '80s):
[/'(?=\d\ds)/, :rsquo            ],
# Get most opening single quotes:
[/(\s)'(?=\w)/, [:one, :lsquo]   ],
# Single closing quotes:
[/(#{Close_class})'/, [:one, :rsquo]],
[/'(\s|s\b|$)/, [:rsquo, :one]],
# Any remaining single quotes should be opening ones:
[/'/, :lsquo],
# Get most opening double quotes:
[/(\s)"(?=\w)/, [:one, :ldquo]],
# Double closing quotes:
[/(#{Close_class})"/, [:one, :rdquo]],
[/"(\s|s\b|$)/, [:rdquo, :one]],
# Any remaining quotes should be opening ones:
[/"/, :ldquo]
	].
	map{|reg, subst| # People should do the thinking, machines should do the work.
reg = Regexp.new(Regexp.escape(reg)) if not reg.kind_of? Regexp
subst = [subst] if not subst.kind_of?Array
[reg, subst]}
CharSource =

Choose!

CharSourceManual
EscapedCharInText =
Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>]
EscapedCharInQuotes =
Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>,?',?"]
EscapedCharInInlineCode =
[?\\,?`]
SPACE =

32

?\
R_REF_ID =

R_REF_ID = Regexp.compile(/(s]*)(s*])/) R_REF_ID = Regexp.compile(/(s]*)(s*])/)

Regexp.compile(/([^\]]*)\]/)

Instance Method Summary collapse

Methods included from Helpers

#md_abbr, #md_abbr_def, #md_ald, #md_br, #md_code, #md_codeblock, #md_el, #md_em, #md_email, #md_emstrong, #md_entity, #md_foot_ref, #md_footnote, #md_header, #md_hrule, #md_html, #md_ial, #md_im_image, #md_im_link, #md_image, #md_li, #md_link, #md_par, #md_quote, #md_ref_def, #md_strong, #md_url, #md_xml_instr

Instance Method Details

#apply_one_rule(reg, subst, input) ⇒ Object

note: input will be destroyed



192
193
194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/maruku/input/rubypants.rb', line 192

def apply_one_rule(reg, subst, input)
	output = []
	while first = input.shift
		if first.kind_of?(String) && (m = reg.match(first))
			output.push    m. pre_match if m. pre_match.size > 0
			 input.unshift m.post_match if m.post_match.size > 0
			subst.reverse.each do |x|
				input.unshift( x == :one ? m[1] : md_entity(x.to_s) ) end
		else
			output.push first
		end
	end
	return output
end

#describe_pos(buffer, buffer_index) ⇒ Object



154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# File 'lib/maruku/input/charsource.rb', line 154

def describe_pos(buffer, buffer_index)
	len = 75
	num_before = [len/2, buffer_index].min
	num_after = [len/2, buffer.size-buffer_index].min
	num_before_max = buffer_index
	num_after_max = buffer.size-buffer_index
	
#		puts "num #{num_before} #{num_after}"
	num_before = [num_before_max, len-num_after].min
	num_after  = [num_after_max, len-num_before].min
#		puts "num #{num_before} #{num_after}"
	
	index_start = [buffer_index - num_before, 0].max
	index_end   = [buffer_index + num_after, buffer.size].min
	
	size = index_end- index_start
	
#		puts "- #{index_start} #{size}"

	str = buffer[index_start, size]
	str.gsub!("\n",'N')
	str.gsub!("\t",'T')
	
	if index_end == buffer.size 
		str += "EOF"
	end
		
	pre_s = buffer_index-index_start
	pre_s = [pre_s, 0].max
	pre_s2 = [len-pre_s,0].max
#		puts "pre_S = #{pre_s}"
	pre =" "*(pre_s) 
	
	"-"*len+"\n"+
	str + "\n" +
	"-"*pre_s + "|" + "-"*(pre_s2)+"\n"+
#		pre + "|\n"+
	pre + "+--- Byte #{buffer_index}\n"+
	
	"Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
	add_tabs(buffer,1,">")
	
#		"CharSource: At character #{@buffer_index} of block "+
#		" beginning with:\n    #{@buffer[0,50].inspect} ...\n"+
#		" before: \n     ... #{cur_chars(50).inspect} ... "
end

#educate(elements) ⇒ Object



207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
# File 'lib/maruku/input/rubypants.rb', line 207

def educate(elements)
	Rules.each do |reg, subst|
		elements = apply_one_rule(reg, subst, elements)
	end
	# strips empty strings
	elements.delete_if {|x| x.kind_of?(String) && x.size == 0}
	final = []
	# join consecutive strings
	elements.each do |x|
		if x.kind_of?(String) && final.last.kind_of?(String)
			final.last << x
		else
			final << x
		end
	end
	return final
end

#extension_meta(src, con, break_on_chars) ⇒ Object



301
302
303
304
305
306
307
308
309
310
311
312
313
# File 'lib/maruku/input/parse_span_better.rb', line 301

def extension_meta(src, con, break_on_chars)
	if m = src.read_regexp(/([^\s\:\"\']+):/)
		name = m[1]
		al = read_attribute_list(src, con, break_on_chars)
#			puts "#{name}=#{al.inspect}"
		self.doc.ald[name] = al
	 	con.push md_ald(name, al)
	else
		al = read_attribute_list(src, con, break_on_chars)
		self.doc.ald[name] = al
		con.push md_ial(al)
	end
end

#interpret_extension(src, con, break_on_chars) ⇒ Object

Start: cursor on character after ‘End: curson on ‘’ or EOF



277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
# File 'lib/maruku/input/parse_span_better.rb', line 277

def interpret_extension(src, con, break_on_chars)
	case src.cur_char
	when ?:
		src.ignore_char # :
		extension_meta(src, con, break_on_chars)
	when ?#, ?.
		extension_meta(src, con, break_on_chars)
	else
		stuff = read_simple(src, escaped=[?}], break_on_chars, [])
		if stuff =~ /^(\w+\s|[^\w])/
			extension_id = $1.strip
			if false
			else
				maruku_recover "I don't know what to do with extension '#{extension_id}'\n"+
					"I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
				extension_meta(src, con, break_on_chars)
			end
		else 
			maruku_recover "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
			extension_meta(src, con, break_on_chars)
		end
	end
end

#is_ial(e) ⇒ Object

We need a helper



196
# File 'lib/maruku/attributes.rb', line 196

def is_ial(e); e.kind_of? MDElement and e.node_type == :ial end

#md_al(s = []) ⇒ Object



132
# File 'lib/maruku/attributes.rb', line 132

def md_al(s=[]); AttributeList.new(s) end

#merge_ial(elements, src, con) ⇒ Object



198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
# File 'lib/maruku/attributes.rb', line 198

def merge_ial(elements, src, con)	

	# Apply each IAL to the element before
	elements.each_with_index do |e, i| 
	if is_ial(e) && i>= 1 then
		before = elements[i-1]
		after = elements[i+1]
		if before.kind_of? MDElement
			before.al = e.ial
		elsif after.kind_of? MDElement
			after.al = e.ial
		else
			maruku_error "It is not clear to me what element this IAL {:#{e.ial.to_md}} \n"+
			"is referring to. The element before is a #{before.class.to_s}, \n"+
			"the element after is a #{after.class.to_s}.\n"+
			"\n before: #{before.inspect}"+
			"\n after: #{after.inspect}",
			src, con
			# xxx dire se c'è empty vicino
		end
	end 
	end
	
	if not Globals[:debug_keep_ials]
		elements.delete_if {|x| is_ial(x) unless x == elements.first} 
	end
end

#parse_lines_as_span(lines, parent = nil) ⇒ Object



35
36
37
# File 'lib/maruku/input/parse_span_better.rb', line 35

def parse_lines_as_span(lines, parent=nil)
	parse_span_better lines.join("\n"), parent
end

#parse_span_better(string, parent = nil) ⇒ Object



39
40
41
42
43
44
45
46
47
# File 'lib/maruku/input/parse_span_better.rb', line 39

def parse_span_better(string, parent=nil)
	if not string.kind_of? String then 
		error "Passed #{string.class}." end

	st = (string + "")
	st.freeze
	src = CharSource.new(st, parent)
	read_span(src, EscapedCharInText, [nil])
end

#read_attribute_list(src, con, break_on_chars) ⇒ Object

returns nil or an AttributeList



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/maruku/attributes.rb', line 135

def read_attribute_list(src, con, break_on_chars)
	
	separators = break_on_chars + [?=,?\ ,?\t]
	escaped = Maruku::EscapedCharInQuotes
		
	al = AttributeList.new
	while true
		src.consume_whitespace
		break if break_on_chars.include? src.cur_char

		case src.cur_char
		when nil 
			maruku_error "Attribute list terminated by EOF:\n "+
			             "#{al.inspect}" , src, con
			tell_user "I try to continue and return partial attribute list:\n"+
				al.inspect
			break
		when ?=     # error
			maruku_error "In attribute lists, cannot start identifier with `=`."
			tell_user "I try to continue"
			src.ignore_char
		when ?#     # id definition
			src.ignore_char
			if id = read_quoted_or_unquoted(src, con, escaped, separators)
				al.push_id id
			else
				maruku_error 'Could not read `id` attribute.', src, con
				tell_user 'Trying to ignore bad `id` attribute.'
			end
		when ?.     # class definition
			src.ignore_char
			if klass = read_quoted_or_unquoted(src, con, escaped, separators)
				al.push_class klass
			else
				maruku_error 'Could not read `class` attribute.', src, con
				tell_user 'Trying to ignore bad `class` attribute.'
			end
		else
			if key = read_quoted_or_unquoted(src, con, escaped, separators)
				if src.cur_char == ?=
					src.ignore_char # skip the =
					if val = read_quoted_or_unquoted(src, con, escaped, separators)
						al.push_key_val(key, val)
					else
						maruku_error "Could not read value for key #{key.inspect}.",
							src, con
						tell_user "Ignoring key #{key.inspect}."
					end
				else
					al.push_ref key
				end
			else
				maruku_error 'Could not read key or reference.'
			end
		end # case
	end # while true
	al
end

#read_em(src, delim) ⇒ Object



421
422
423
424
425
426
# File 'lib/maruku/input/parse_span_better.rb', line 421

def read_em(src, delim)
	src.ignore_char
	children = read_span(src, EscapedCharInText, nil, [delim])
	src.ignore_char
	md_em(children)
end

#read_email_el(src, con) ⇒ Object



323
324
325
326
327
328
329
330
# File 'lib/maruku/input/parse_span_better.rb', line 323

def read_email_el(src,con)
	src.ignore_char # leading <
	mail = read_simple(src, [], [?>])
	src.ignore_char # closing >
	
	address = mail.gsub(/^mailto:/,'')
	con.push_element md_email(address)
end

#read_emstrong(src, delim) ⇒ Object



435
436
437
438
439
440
# File 'lib/maruku/input/parse_span_better.rb', line 435

def read_emstrong(src, delim)
	src.ignore_chars(3)
	children = read_span(src, EscapedCharInText, nil, [delim])
	src.ignore_chars(3)
	md_emstrong(children)
end

#read_footnote_ref(src, con) ⇒ Object



462
463
464
465
# File 'lib/maruku/input/parse_span_better.rb', line 462

def read_footnote_ref(src,con)
	ref = read_ref_id(src,con)
	con.push_element md_foot_ref(ref)
end

#read_image(src, con) ⇒ Object

read link



615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
# File 'lib/maruku/input/parse_span_better.rb', line 615

def read_image(src, con)
	src.ignore_chars(2) # opening "!["
	alt_text = read_span(src, EscapedCharInText, [?]])
	src.ignore_char # closing bracket
	# ignore space
	if src.cur_char == SPACE and 
		(src.next_char == ?[ or src.next_char == ?( )
		src.ignore_char
	end
	case src.cur_char
	when ?(
		src.ignore_char # opening (
		src.consume_whitespace
		url = read_url(src, [SPACE,?\t,?)])
		if not url
			error "Could not read url from #{src.cur_chars(10).inspect}",
				src,con
		end
		src.consume_whitespace
		title = nil
		if src.cur_char != ?) # we have a title
			quote_char = src.cur_char
			title = read_quoted(src,con)
			if not title
				maruku_error 'Must quote title',src,con
			else				
				# Tries to read a title with quotes: ![a](url "ti"tle")
				# this is the most ugly thing in Markdown
				if not src.next_matches(/\s*\)/)
					# if there is not a closing par ), then read
					# the rest and guess it's title with quotes
					rest = read_simple(src, escaped=[], break_on_chars=[?)], 
						break_on_strings=[])
					# chop the closing char
					rest.chop!
					title << quote_char << rest
				end
			end
		end
		src.consume_whitespace
		closing = src.shift_char # closing )
		if closing != ?)
			error( ("Unclosed link: '"<<closing<<"'")+
				" Read url=#{url.inspect} title=#{title.inspect}",src,con)
		end
		con.push_element md_im_image(alt_text, url, title)
	when ?[ # link ref
		ref_id = read_ref_id(src,con)
		if not ref_id # TODO: check around
			error('Reference not closed.', src, con)
			ref_id = ""
		end
		if ref_id.size == 0
			ref_id =  alt_text.to_s
		end

		ref_id = sanitize_ref_id(ref_id)

		con.push_element md_image(alt_text, ref_id)
	else # no stuff
		ref_id =  sanitize_ref_id(alt_text.to_s)
		con.push_element md_image(alt_text, ref_id)
	end
end

#read_inline_code(src, con) ⇒ Object



510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
# File 'lib/maruku/input/parse_span_better.rb', line 510

def read_inline_code(src, con)
	# Count the number of ticks
	num_ticks = 0
	while src.cur_char == ?` 
		num_ticks += 1
		src.ignore_char
	end
	# We will read until this string
	end_string = "`"*num_ticks

	code = 
		read_simple(src, escaped=[], break_on_chars=[], 
			break_on_strings=[end_string])
	
#		puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
	src.ignore_chars num_ticks
	
	# Ignore at most one space
	if num_ticks > 1 && code[0] == SPACE
		code = code[1, code.size-1]
	end
	
	# drop last space 
	if num_ticks > 1 && code[-1] == SPACE
		code = code[0,code.size-1]
	end

#		puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} "
	con.push_element md_code(code)
end

#read_inline_html(src, con) ⇒ Object



467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
# File 'lib/maruku/input/parse_span_better.rb', line 467

def read_inline_html(src, con)
	h = HTMLHelper.new
	begin
		# This is our current buffer in the context
		next_stuff = src.current_remaining_buffer
		
		consumed = 0
		while true
			if consumed >= next_stuff.size
				maruku_error "Malformed HTML starting at #{next_stuff.inspect}", src, con
				break
			end

			h.eat_this next_stuff[consumed].chr; consumed += 1
			break if h.is_finished? 
		end
		src.ignore_chars(consumed)
		con.push_element md_html(h.stuff_you_read)
		
		#start = src.current_remaining_buffer
		# h.eat_this start
		# if not h.is_finished?
		# 	error "inline_html: Malformed:\n "+
		# 		"#{start.inspect}\n #{h.inspect}",src,con
		# end
		# 
		# consumed = start.size - h.rest.size 
		# if consumed > 0
		# 	con.push_element md_html(h.stuff_you_read)
		# 	src.ignore_chars(consumed)
		# else
		# 	puts "HTML helper did not work on #{start.inspect}"
		# 	con.push_char src.shift_char
		# end
	rescue Exception => e
		maruku_error "Bad html: \n" + 
			add_tabs(e.inspect+e.backtrace.join("\n"),1,'>'),
			src,con
		maruku_recover "I will try to continue after bad HTML.", src, con
		con.push_char src.shift_char
	end
end


541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
# File 'lib/maruku/input/parse_span_better.rb', line 541

def read_link(src, con)
	# we read the string and see what happens
	src.ignore_char # opening bracket
	children = read_span(src, EscapedCharInText, [?]])
	src.ignore_char # closing bracket

	# ignore space
	if src.cur_char == SPACE and 
		(src.next_char == ?[ or src.next_char == ?( )
		src.shift_char
	end
	
	case src.cur_char
	when ?(
		src.ignore_char # opening (
		src.consume_whitespace
		url = read_url(src, [SPACE,?\t,?)])
		if not url
			url = '' # no url is ok
		end
		src.consume_whitespace
		title = nil
		if src.cur_char != ?) # we have a title
			quote_char = src.cur_char
			title = read_quoted(src,con)
			
			if not title
				maruku_error 'Must quote title',src,con
			else
				# Tries to read a title with quotes: ![a](url "ti"tle")
				# this is the most ugly thing in Markdown
				if not src.next_matches(/\s*\)/)
					# if there is not a closing par ), then read
					# the rest and guess it's title with quotes
					rest = read_simple(src, escaped=[], break_on_chars=[?)], 
						break_on_strings=[])
					# chop the closing char
					rest.chop!
					title << quote_char << rest
				end
			end
		end
		src.consume_whitespace
		closing = src.shift_char # closing )
		if closing != ?)
			maruku_error 'Unclosed link',src,con
			maruku_recover "No closing ): I will not create"+
			" the link for #{children.inspect}", src, con
			con.push_elements children
			return
		end
		con.push_element md_im_link(children,url, title)
	when ?[ # link ref
		ref_id = read_ref_id(src,con)
		if ref_id
			if ref_id.size == 0
				ref_id = sanitize_ref_id(children.to_s)
			else
				ref_id = sanitize_ref_id(ref_id)
			end	
			con.push_element md_link(children, ref_id)
		else 
			maruku_error "Could not read ref_id", src, con
			maruku_recover "I will not create the link for "+
				"#{children.inspect}", src, con
			con.push_elements children
			return
		end
	else # empty [link]
		id = sanitize_ref_id(children.to_s) #. downcase.gsub(' ','_')
		con.push_element md_link(children, id)
	end
end

#read_quoted(src, con) ⇒ Object

Tries to read a quoted value. If stream does not start with ‘ or “, returns nil.



365
366
367
368
369
370
371
372
373
374
375
376
# File 'lib/maruku/input/parse_span_better.rb', line 365

def read_quoted(src, con)
	case src.cur_char
		when ?', ?"
			quote_char = src.shift_char # opening quote
			string = read_simple(src, EscapedCharInQuotes, [quote_char])
			src.ignore_char # closing quote
			return string
		else 
#				puts "Asked to read quote from: #{src.cur_chars(10).inspect}"
			return nil
	end
end

#read_quoted_or_unquoted(src, con, escaped, exit_on_chars) ⇒ Object



354
355
356
357
358
359
360
361
# File 'lib/maruku/input/parse_span_better.rb', line 354

def read_quoted_or_unquoted(src, con, escaped, exit_on_chars)
	case src.cur_char
	when ?', ?"
		read_quoted(src, con)
	else
		read_simple(src, escaped, exit_on_chars)
	end
end

#read_ref_id(src, con) ⇒ Object

Reads a bracketed id “[refid]”. Consumes also both brackets.



449
450
451
452
453
454
455
456
457
458
459
460
# File 'lib/maruku/input/parse_span_better.rb', line 449

def read_ref_id(src, con)
	src.ignore_char # [
	src.consume_whitespace
#		puts "Next: #{src.cur_chars(10).inspect}"
	if m = src.read_regexp(R_REF_ID) 
#			puts "Got: #{m[1].inspect} Ignored: #{m[2].inspect}"
#			puts "Then: #{src.cur_chars(10).inspect}"
		m[1]
	else
		nil
	end
end

#read_simple(src, escaped, exit_on_chars, exit_on_strings = nil) ⇒ Object

# If eat_delim is true, and if the delim is not the EOF, then the delim # gets eaten from the stream.



384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
# File 'lib/maruku/input/parse_span_better.rb', line 384

def read_simple(src, escaped, exit_on_chars, exit_on_strings=nil)
	text = ""
	while true
#			puts "Reading simple #{text.inspect}"
		c = src.cur_char
		if exit_on_chars && exit_on_chars.include?(c)
#				src.ignore_char if eat_delim
			break
		end
		
		break if exit_on_strings && 
			exit_on_strings.any? {|x| src.cur_chars_are x}
		
		case c
		when nil
			s= "String finished while reading (break on "+
			"#{exit_on_chars.map{|x|""<<x}.inspect})"+
			" already read: #{text.inspect}"
			maruku_error s, src
			maruku_recover "I boldly continue", src
			break
		when ?\\
			d = src.next_char
			if escaped.include? d
				src.ignore_chars(2)
				text << d
			else
				text << src.shift_char
			end
		else 
			text << src.shift_char
		end
	end
#		puts "Read simple #{text.inspect}"
	text.empty? ? nil : text
end

#read_span(src, escaped, exit_on_chars, exit_on_strings = nil) ⇒ Object

This is the main loop for reading span elements

It’s long, but not complex or difficult to understand.



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# File 'lib/maruku/input/parse_span_better.rb', line 54

def read_span(src, escaped, exit_on_chars, exit_on_strings=nil)
	con = SpanContext.new
	c = d = nil
	while true
		c = src.cur_char

		# This is only an optimization which cuts 50% of the time used.
		# (but you can't use a-zA-z in exit_on_chars)
		if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z)))
			con.cur_string << src.shift_char
			next
		end

		break if exit_on_chars && exit_on_chars.include?(c)
		break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x}
		
		# check if there are extensions
		if check_span_extensions(src, con)
			next
		end
		
		case c = src.cur_char	
		when ?\ # it's space (32)
			if src.cur_chars_are "  \n"
				src.ignore_chars(3)
				con.push_element  md_br()
				next
			else
				src.ignore_char
				con.push_space 
			end
		when ?\n, ?\t 
			src.ignore_char
			con.push_space 
		when ?`
			read_inline_code(src,con)
		when ?<
			# It could be:
			# 1) HTML "<div ..."
			# 2) HTML "<!-- ..."
			# 3) url "<http:// ", "<ftp:// ..."
			# 4) email "<andrea@... ", "<mailto:andrea@..."
			# 5) on itself! "a < b	"
			# 6) Start of <<guillemettes>>
			
			case d = src.next_char
				when ?<;  # guillemettes
					src.ignore_chars(2)
					con.push_char ?<
					con.push_char ?<
				when ?!; 
					if src.cur_chars_are '<!--'
						read_inline_html(src, con)
					else 
						con.push_char src.shift_char
					end
				when ?? 
					read_xml_instr_span(src, con) 
				when ?\ , ?\t 
					con.push_char src.shift_char
				else
					if src.next_matches(/<mailto:/) or
					   src.next_matches(/<[\w\.]+\@/)
						read_email_el(src, con)
					elsif src.next_matches(/<\w+:/)
						read_url_el(src, con)
					elsif src.next_matches(/<\w/)
						#puts "This is HTML: #{src.cur_chars(20)}"
						read_inline_html(src, con)
					else 
						#puts "This is NOT HTML: #{src.cur_chars(20)}"
						con.push_char src.shift_char
					end
			end
		when ?\\
			d = src.next_char
			if d == ?'
				src.ignore_chars(2)
				con.push_element md_entity('apos')
			elsif d == ?"
				src.ignore_chars(2)
				con.push_element md_entity('quot')
			elsif escaped.include? d
				src.ignore_chars(2)
				con.push_char d
			else
				con.push_char src.shift_char
			end
		when ?[
			if markdown_extra? && src.next_char == ?^
				read_footnote_ref(src,con)
			else
				read_link(src, con)
			end
		when ?!
			if src.next_char == ?[
				read_image(src, con)
			else
				con.push_char src.shift_char
			end
		when ?&
			# named references
			if m = src.read_regexp(/\&([\w\d]+);/)
				con.push_element md_entity(m[1])
			# numeric
			elsif m = src.read_regexp(/\&\#(x)?([\w\d]+);/)
				num = m[1]  ? m[2].hex : m[2].to_i
				con.push_element md_entity(num)
			else
				con.push_char src.shift_char
			end
		when ?*
			if not src.next_char
				maruku_error "Opening * as last char.", src, con
				maruku_recover "Threating as literal"
				con.push_char src.shift_char
			else
				follows = src.cur_chars(4)
				if follows =~ /^\*\*\*[^\s\*]/
					con.push_element read_emstrong(src,'***')
				elsif follows  =~ /^\*\*[^\s\*]/
					con.push_element read_strong(src,'**')
				elsif follows =~ /^\*[^\s\*]/
					con.push_element read_em(src,'*')
				else # * is just a normal char
					con.push_char src.shift_char
				end
			end
		when ?_
			if not src.next_char
				maruku_error "Opening _ as last char", src, con
				maruku_recover "Threating as literal", src, con
				con.push_char src.shift_char
			else
				# we don't want "mod_ruby" to start an emphasis
				# so we start one only if
				# 1) there's nothing else in the span (first char)
				# or 2) the last char was a space
				# or 3) the current string is empty
				#if con.elements.empty? ||
				if	 (con.cur_string =~ /\s\Z/) || (con.cur_string.size == 0)
					# also, we check the next characters
					follows = src.cur_chars(4)
					if  follows =~ /^\_\_\_[^\s\_]/
						con.push_element read_emstrong(src,'___')
					elsif follows  =~ /^\_\_[^\s\_]/
						con.push_element read_strong(src,'__')
					elsif follows =~ /^\_[^\s\_]/
						con.push_element read_em(src,'_')
					else # _ is just a normal char
						con.push_char src.shift_char
					end
				else
					# _ is just a normal char
						con.push_char src.shift_char
				end
			end
		when ?{ # extension
			if [?#, ?., ?:].include? src.next_char
				src.ignore_char # {
				interpret_extension(src, con, [?}])
				src.ignore_char # }
			else
				con.push_char src.shift_char
			end
		when nil
			maruku_error( ("Unclosed span (waiting for %s"+
			 "#{exit_on_strings.inspect})") % [
					exit_on_chars ? "#{exit_on_chars.inspect} or" : ""],
					src,con)
			break
		else # normal text
			con.push_char src.shift_char
		end # end case
	end # end while true
	con.push_string_if_present 

	# Assign IAL to elements
	merge_ial(con.elements, src, con)
	
	
	# Remove leading space
	if (s = con.elements.first).kind_of? String
		if s[0] == ?\ then con.elements[0] = s[1, s.size-1] end
		con.elements.shift if s.size == 0 
	end
	
	# Remove final spaces
	if (s = con.elements.last).kind_of? String
		s.chop! if s[-1] == ?\ 
		con.elements.pop if s.size == 0 
	end
	
	educated = educate(con.elements)

	educated
end

#read_strong(src, delim) ⇒ Object



428
429
430
431
432
433
# File 'lib/maruku/input/parse_span_better.rb', line 428

def read_strong(src, delim)
	src.ignore_chars(2)
	children = read_span(src, EscapedCharInText, nil, [delim])
	src.ignore_chars(2)
	md_strong(children)
end

#read_url(src, break_on) ⇒ Object



332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
# File 'lib/maruku/input/parse_span_better.rb', line 332

def read_url(src, break_on)
	if [?',?"].include? src.cur_char 
		error 'Invalid char for url', src
	end
	
	url = read_simple(src, [], break_on)
	if not url # empty url
		url = ""
	end
	
	if url[0] == ?< && url[-1] == ?>
		url = url[1, url.size-2]
	end
	
	if url.size == 0 
		return nil
	end
	
	url
end

#read_url_el(src, con) ⇒ Object



315
316
317
318
319
320
321
# File 'lib/maruku/input/parse_span_better.rb', line 315

def read_url_el(src,con)
	src.ignore_char # leading <
	url = read_simple(src, [], [?>])
	src.ignore_char # closing >
	
	con.push_element md_url(url)
end

#read_xml_instr_span(src, con) ⇒ Object



253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
# File 'lib/maruku/input/parse_span_better.rb', line 253

def read_xml_instr_span(src, con) 
	src.ignore_chars(2) # starting <?

	# read target <?target code... ?>
	target = if m = src.read_regexp(/(\w+)/)
		m[1]
	else
		''
	end
	
	delim = "?>"
	
	code = 
		read_simple(src, escaped=[], break_on_chars=[], 
		break_on_strings=[delim])
	
	src.ignore_chars delim.size
	
	code = (code || "").strip
	con.push_element md_xml_instr(target, code)
end

#unit_tests_for_attribute_listsObject



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/maruku/attributes.rb', line 80

def unit_tests_for_attribute_lists
	[
		[ "",     [], "Empty lists are allowed" ], 
		[ "=",    :throw, "Bad char to begin a list with." ], 
		[ "a =b", :throw, "No whitespace before `=`." ], 
		[ "a= b", :throw, "No whitespace after `=`." ], 

		[ "a b", [[:ref, 'a'],[:ref, 'b']], "More than one ref" ], 
		[ "a b c", [[:ref, 'a'],[:ref, 'b'],[:ref, 'c']], "More than one ref" ], 
		[ "hello notfound", [[:ref, 'hello'],[:ref, 'notfound']]], 

		[ "'a'",  [[:ref, 'a']], "Quoted value." ], 
		[ '"a"'   ], 

		[ "a=b",  [['a','b']], "Simple key/val" ], 
		[ "'a'=b"   ], 
		[ "'a'='b'" ], 
		[ "a='b'"   ], 

		[ 'a="b\'"',  [['a',"b\'"]], "Key/val with quotes" ],
		[ 'a=b\''],
		[ 'a="\\\'b\'"',  [['a',"\'b\'"]], "Key/val with quotes" ], 
		
		['"', :throw, "Unclosed quotes"],
		["'"],
		["'a "],
		['"a '],
		
		[ "#a",  [[:id, 'a']], "Simple ID" ], 
		[ "#'a'" ], 
		[ '#"a"' ], 

		[ "#",  :throw, "Unfinished '#'." ], 
		[ ".",  :throw, "Unfinished '.'." ], 
		[ "# a",  :throw, "No white-space after '#'." ], 
		[ ". a",  :throw, "No white-space after '.' ." ], 
		
		[ "a=b c=d",  [['a','b'],['c','d']], "Tabbing" ], 
		[ " \ta=b \tc='d' "],
		[ "\t a=b\t c='d'\t\t"],
		
		[ ".\"a'",  :throw, "Mixing quotes is bad." ], 
		
	].map { |s, expected, comment| 
		@expected = (expected ||= @expected)
		@comment  = (comment  ||= (last=@comment) )
		(comment == last && (comment += (@count+=1).to_s)) || @count = 1
		expected = [md_ial(expected)] if expected.kind_of? Array
		["{#{MagicChar}#{s}}", expected, "Attributes: #{comment}"]
	}
end