Module: MaRuKu::In::Markdown::SpanLevelParser

Includes:: Helpers

Included in:: BlockLevelParser, CharSourceManual, CharSourceStrscan, MDDocument

Defined in:: lib/maruku.rb,
lib/maruku/attributes.rb,
lib/maruku/input/rubypants.rb,
lib/maruku/input/charsource.rb,
lib/maruku/input/html_helper.rb,
lib/maruku/input/parse_span_better.rb

Defined Under Namespace

Classes: CharSourceDebug, CharSourceManual, CharSourceStrscan, HTMLHelper, SpanContext

Constant Summary collapse

Punct_class =

'[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'

Close_class =

%![^\ \t\r\n\\[\{\(\-]!

Rules =

[
[/---/,   :mdash          ],
[/--/,    :ndash          ],
['...',   :hellip         ],
['. . .', :hellip         ],
["``",    :ldquo          ],
["''",    :rdquo          ],
[/<<\s/,  [:laquo, :nbsp] ],
[/\s>>/,  [:nbsp, :raquo] ],
[/<</,    :laquo          ],
[/>>/,    :raquo          ],

#		def educate_single_backticks(str)
#		["`", :lsquo]
#		["'", :rsquo]

# Special case if the very first character is a quote followed by
# punctuation at a non-word-break. Close the quotes by brute
# force:
[/^'(?=#{Punct_class}\B)/, :rsquo],
[/^"(?=#{Punct_class}\B)/, :rdquo],
# Special case for double sets of quotes, e.g.:
#   <p>He said, "'Quoted' words in a larger quote."</p>
[/"'(?=\w)/, [:ldquo, :lsquo]    ],
[/'"(?=\w)/, [:lsquo, :ldquo]    ],
# Special case for decade abbreviations (the '80s):
[/'(?=\d\ds)/, :rsquo            ],
# Get most opening single quotes:
[/(\s)'(?=\w)/, [:one, :lsquo]   ],
# Single closing quotes:
[/(#{Close_class})'/, [:one, :rsquo]],
[/'(\s|s\b|$)/, [:rsquo, :one]],
# Any remaining single quotes should be opening ones:
[/'/, :lsquo],
# Get most opening double quotes:
[/(\s)"(?=\w)/, [:one, :ldquo]],
# Double closing quotes:
[/(#{Close_class})"/, [:one, :rdquo]],
[/"(\s|s\b|$)/, [:rdquo, :one]],
# Any remaining quotes should be opening ones:
[/"/, :ldquo]
	].
	map{|reg, subst| # People should do the thinking, machines should do the work.
reg = Regexp.new(Regexp.escape(reg)) if not reg.kind_of? Regexp
subst = [subst] if not subst.kind_of?Array
[reg, subst]}

CharSource = Choose!

CharSourceManual

EscapedCharInText =

Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>]

EscapedCharInQuotes =

Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>,?',?"]

EscapedCharInInlineCode =

[?\\,?`]

SPACE = 32

?\

R_REF_ID = R_REF_ID = Regexp.compile(/(s]*)(s*])/) R_REF_ID = Regexp.compile(/(s]*)(s*])/)

Regexp.compile(/([^\]]*)\]/)

Instance Method Summary collapse

#apply_one_rule(reg, subst, input) ⇒ Object

note: input will be destroyed.
#describe_pos(buffer, buffer_index) ⇒ Object
#educate(elements) ⇒ Object
#extension_meta(src, con, break_on_chars) ⇒ Object
#interpret_extension(src, con, break_on_chars) ⇒ Object

Start: cursor on character after ‘End: curson on ‘’ or EOF.
#is_ial(e) ⇒ Object

We need a helper.
#md_al(s = []) ⇒ Object
#merge_ial(elements, src, con) ⇒ Object
#parse_lines_as_span(lines, parent = nil) ⇒ Object
#parse_span_better(string, parent = nil) ⇒ Object
#read_attribute_list(src, con, break_on_chars) ⇒ Object

returns nil or an AttributeList.
#read_em(src, delim) ⇒ Object
#read_email_el(src, con) ⇒ Object
#read_emstrong(src, delim) ⇒ Object
#read_footnote_ref(src, con) ⇒ Object
#read_image(src, con) ⇒ Object

read link.
#read_inline_code(src, con) ⇒ Object
#read_inline_html(src, con) ⇒ Object
#read_link(src, con) ⇒ Object
#read_quoted(src, con) ⇒ Object

Tries to read a quoted value.
#read_quoted_or_unquoted(src, con, escaped, exit_on_chars) ⇒ Object
#read_ref_id(src, con) ⇒ Object

Reads a bracketed id “[refid]”.
#read_simple(src, escaped, exit_on_chars, exit_on_strings = nil) ⇒ Object

# If eat_delim is true, and if the delim is not the EOF, then the delim # gets eaten from the stream.
#read_span(src, escaped, exit_on_chars, exit_on_strings = nil) ⇒ Object

This is the main loop for reading span elements.
#read_strong(src, delim) ⇒ Object
#read_url(src, break_on) ⇒ Object
#read_url_el(src, con) ⇒ Object
#read_xml_instr_span(src, con) ⇒ Object
#unit_tests_for_attribute_lists ⇒ Object

Methods included from Helpers

#md_abbr, #md_abbr_def, #md_ald, #md_br, #md_code, #md_codeblock, #md_el, #md_em, #md_email, #md_emstrong, #md_entity, #md_foot_ref, #md_footnote, #md_header, #md_hrule, #md_html, #md_ial, #md_im_image, #md_im_link, #md_image, #md_li, #md_link, #md_par, #md_quote, #md_ref_def, #md_strong, #md_url, #md_xml_instr

Instance Method Details

#apply_one_rule(reg, subst, input) ⇒ `Object`

note: input will be destroyed

# File 'lib/maruku/input/rubypants.rb', line 192

def apply_one_rule(reg, subst, input)
	output = []
	while first = input.shift
		if first.kind_of?(String) && (m = reg.match(first))
			output.push    m. pre_match if m. pre_match.size > 0
			 input.unshift m.post_match if m.post_match.size > 0
			subst.reverse.each do |x|
				input.unshift( x == :one ? m[1] : md_entity(x.to_s) ) end
		else
			output.push first
		end
	end
	return output
end

#describe_pos(buffer, buffer_index) ⇒ `Object`

# File 'lib/maruku/input/charsource.rb', line 154

def describe_pos(buffer, buffer_index)
	len = 75
	num_before = [len/2, buffer_index].min
	num_after = [len/2, buffer.size-buffer_index].min
	num_before_max = buffer_index
	num_after_max = buffer.size-buffer_index
	
#		puts "num #{num_before} #{num_after}"
	num_before = [num_before_max, len-num_after].min
	num_after  = [num_after_max, len-num_before].min
#		puts "num #{num_before} #{num_after}"
	
	index_start = [buffer_index - num_before, 0].max
	index_end   = [buffer_index + num_after, buffer.size].min
	
	size = index_end- index_start
	
#		puts "- #{index_start} #{size}"

	str = buffer[index_start, size]
	str.gsub!("\n",'N')
	str.gsub!("\t",'T')
	
	if index_end == buffer.size 
		str += "EOF"
	end
		
	pre_s = buffer_index-index_start
	pre_s = [pre_s, 0].max
	pre_s2 = [len-pre_s,0].max
#		puts "pre_S = #{pre_s}"
	pre =" "*(pre_s) 
	
	"-"*len+"\n"+
	str + "\n" +
	"-"*pre_s + "|" + "-"*(pre_s2)+"\n"+
#		pre + "|\n"+
	pre + "+--- Byte #{buffer_index}\n"+
	
	"Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
	add_tabs(buffer,1,">")
	
#		"CharSource: At character #{@buffer_index} of block "+
#		" beginning with:\n    #{@buffer[0,50].inspect} ...\n"+
#		" before: \n     ... #{cur_chars(50).inspect} ... "
end

#educate(elements) ⇒ `Object`

# File 'lib/maruku/input/rubypants.rb', line 207

def educate(elements)
	Rules.each do |reg, subst|
		elements = apply_one_rule(reg, subst, elements)
	end
	# strips empty strings
	elements.delete_if {|x| x.kind_of?(String) && x.size == 0}
	final = []
	# join consecutive strings
	elements.each do |x|
		if x.kind_of?(String) && final.last.kind_of?(String)
			final.last << x
		else
			final << x
		end
	end
	return final
end

#extension_meta(src, con, break_on_chars) ⇒ `Object`

# File 'lib/maruku/input/parse_span_better.rb', line 301

def extension_meta(src, con, break_on_chars)
	if m = src.read_regexp(/([^\s\:\"\']+):/)
		name = m[1]
		al = read_attribute_list(src, con, break_on_chars)
#			puts "#{name}=#{al.inspect}"
		self.doc.ald[name] = al
	 	con.push md_ald(name, al)
	else
		al = read_attribute_list(src, con, break_on_chars)
		self.doc.ald[name] = al
		con.push md_ial(al)
	end
end

#interpret_extension(src, con, break_on_chars) ⇒ `Object`

Start: cursor on character after ‘End: curson on ‘’ or EOF

# File 'lib/maruku/input/parse_span_better.rb', line 277

def interpret_extension(src, con, break_on_chars)
	case src.cur_char
	when ?:
		src.ignore_char # :
		extension_meta(src, con, break_on_chars)
	when ?#, ?.
		extension_meta(src, con, break_on_chars)
	else
		stuff = read_simple(src, escaped=[?}], break_on_chars, [])
		if stuff =~ /^(\w+\s|[^\w])/
			extension_id = $1.strip
			if false
			else
				maruku_recover "I don't know what to do with extension '#{extension_id}'\n"+
					"I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
				extension_meta(src, con, break_on_chars)
			end
		else 
			maruku_recover "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
			extension_meta(src, con, break_on_chars)
		end
	end
end

#is_ial(e) ⇒ `Object`

We need a helper

196	# File 'lib/maruku/attributes.rb', line 196 def is_ial(e); e.kind_of? MDElement and e.node_type == :ial end

#md_al(s = []) ⇒ `Object`

132	# File 'lib/maruku/attributes.rb', line 132 def md_al(s=[]); AttributeList.new(s) end

#merge_ial(elements, src, con) ⇒ `Object`

# File 'lib/maruku/attributes.rb', line 198

def merge_ial(elements, src, con)	

	# Apply each IAL to the element before
	elements.each_with_index do |e, i| 
	if is_ial(e) && i>= 1 then
		before = elements[i-1]
		after = elements[i+1]
		if before.kind_of? MDElement
			before.al = e.ial
		elsif after.kind_of? MDElement
			after.al = e.ial
		else
			maruku_error "It is not clear to me what element this IAL {:#{e.ial.to_md}} \n"+
			"is referring to. The element before is a #{before.class.to_s}, \n"+
			"the element after is a #{after.class.to_s}.\n"+
			"\n before: #{before.inspect}"+
			"\n after: #{after.inspect}",
			src, con
			# xxx dire se c'è empty vicino
		end
	end 
	end
	
	if not Globals[:debug_keep_ials]
		elements.delete_if {|x| is_ial(x) unless x == elements.first} 
	end
end

#parse_lines_as_span(lines, parent = nil) ⇒ `Object`



35
36
37

# File 'lib/maruku/input/parse_span_better.rb', line 35

def parse_lines_as_span(lines, parent=nil)
	parse_span_better lines.join("\n"), parent
end

#parse_span_better(string, parent = nil) ⇒ `Object`

# File 'lib/maruku/input/parse_span_better.rb', line 39

def parse_span_better(string, parent=nil)
	if not string.kind_of? String then 
		error "Passed #{string.class}." end

	st = (string + "")
	st.freeze
	src = CharSource.new(st, parent)
	read_span(src, EscapedCharInText, [nil])
end

#read_attribute_list(src, con, break_on_chars) ⇒ `Object`

returns nil or an AttributeList

# File 'lib/maruku/attributes.rb', line 135

def read_attribute_list(src, con, break_on_chars)
	
	separators = break_on_chars + [?=,?\ ,?\t]
	escaped = Maruku::EscapedCharInQuotes
		
	al = AttributeList.new
	while true
		src.consume_whitespace
		break if break_on_chars.include? src.cur_char

		case src.cur_char
		when nil 
			maruku_error "Attribute list terminated by EOF:\n "+
			             "#{al.inspect}" , src, con
			tell_user "I try to continue and return partial attribute list:\n"+
				al.inspect
			break
		when ?=     # error
			maruku_error "In attribute lists, cannot start identifier with `=`."
			tell_user "I try to continue"
			src.ignore_char
		when ?#     # id definition
			src.ignore_char
			if id = read_quoted_or_unquoted(src, con, escaped, separators)
				al.push_id id
			else
				maruku_error 'Could not read `id` attribute.', src, con
				tell_user 'Trying to ignore bad `id` attribute.'
			end
		when ?.     # class definition
			src.ignore_char
			if klass = read_quoted_or_unquoted(src, con, escaped, separators)
				al.push_class klass
			else
				maruku_error 'Could not read `class` attribute.', src, con
				tell_user 'Trying to ignore bad `class` attribute.'
			end
		else
			if key = read_quoted_or_unquoted(src, con, escaped, separators)
				if src.cur_char == ?=
					src.ignore_char # skip the =
					if val = read_quoted_or_unquoted(src, con, escaped, separators)
						al.push_key_val(key, val)
					else
						maruku_error "Could not read value for key #{key.inspect}.",
							src, con
						tell_user "Ignoring key #{key.inspect}."
					end
				else
					al.push_ref key
				end
			else
				maruku_error 'Could not read key or reference.'
			end
		end # case
	end # while true
	al
end

#read_em(src, delim) ⇒ `Object`

# File 'lib/maruku/input/parse_span_better.rb', line 421

def read_em(src, delim)
	src.ignore_char
	children = read_span(src, EscapedCharInText, nil, [delim])
	src.ignore_char
	md_em(children)
end

#read_email_el(src, con) ⇒ `Object`

# File 'lib/maruku/input/parse_span_better.rb', line 323

def read_email_el(src,con)
	src.ignore_char # leading <
	mail = read_simple(src, [], [?>])
	src.ignore_char # closing >
	
	address = mail.gsub(/^mailto:/,'')
	con.push_element md_email(address)
end

#read_emstrong(src, delim) ⇒ `Object`

# File 'lib/maruku/input/parse_span_better.rb', line 435

def read_emstrong(src, delim)
	src.ignore_chars(3)
	children = read_span(src, EscapedCharInText, nil, [delim])
	src.ignore_chars(3)
	md_emstrong(children)
end

#read_footnote_ref(src, con) ⇒ `Object`

# File 'lib/maruku/input/parse_span_better.rb', line 462

def read_footnote_ref(src,con)
	ref = read_ref_id(src,con)
	con.push_element md_foot_ref(ref)
end

#read_image(src, con) ⇒ `Object`

read link

# File 'lib/maruku/input/parse_span_better.rb', line 615

def read_image(src, con)
	src.ignore_chars(2) # opening "!["
	alt_text = read_span(src, EscapedCharInText, [?]])
	src.ignore_char # closing bracket
	# ignore space
	if src.cur_char == SPACE and 
		(src.next_char == ?[ or src.next_char == ?( )
		src.ignore_char
	end
	case src.cur_char
	when ?(
		src.ignore_char # opening (
		src.consume_whitespace
		url = read_url(src, [SPACE,?\t,?)])
		if not url
			error "Could not read url from #{src.cur_chars(10).inspect}",
				src,con
		end
		src.consume_whitespace
		title = nil
		if src.cur_char != ?) # we have a title
			quote_char = src.cur_char
			title = read_quoted(src,con)
			if not title
				maruku_error 'Must quote title',src,con
			else				
				# Tries to read a title with quotes: ![a](url "ti"tle")
				# this is the most ugly thing in Markdown
				if not src.next_matches(/\s*\)/)
					# if there is not a closing par ), then read
					# the rest and guess it's title with quotes
					rest = read_simple(src, escaped=[], break_on_chars=[?)], 
						break_on_strings=[])
					# chop the closing char
					rest.chop!
					title << quote_char << rest
				end
			end
		end
		src.consume_whitespace
		closing = src.shift_char # closing )
		if closing != ?)
			error( ("Unclosed link: '"<<closing<<"'")+
				" Read url=#{url.inspect} title=#{title.inspect}",src,con)
		end
		con.push_element md_im_image(alt_text, url, title)
	when ?[ # link ref
		ref_id = read_ref_id(src,con)
		if not ref_id # TODO: check around
			error('Reference not closed.', src, con)
			ref_id = ""
		end
		if ref_id.size == 0
			ref_id =  alt_text.to_s
		end

		ref_id = sanitize_ref_id(ref_id)

		con.push_element md_image(alt_text, ref_id)
	else # no stuff
		ref_id =  sanitize_ref_id(alt_text.to_s)
		con.push_element md_image(alt_text, ref_id)
	end
end

#read_inline_code(src, con) ⇒ `Object`

# File 'lib/maruku/input/parse_span_better.rb', line 510

def read_inline_code(src, con)
	# Count the number of ticks
	num_ticks = 0
	while src.cur_char == ?` 
		num_ticks += 1
		src.ignore_char
	end
	# We will read until this string
	end_string = "`"*num_ticks

	code = 
		read_simple(src, escaped=[], break_on_chars=[], 
			break_on_strings=[end_string])
	
#		puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
	src.ignore_chars num_ticks
	
	# Ignore at most one space
	if num_ticks > 1 && code[0] == SPACE
		code = code[1, code.size-1]
	end
	
	# drop last space 
	if num_ticks > 1 && code[-1] == SPACE
		code = code[0,code.size-1]
	end

#		puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} "
	con.push_element md_code(code)
end

#read_inline_html(src, con) ⇒ `Object`

# File 'lib/maruku/input/parse_span_better.rb', line 467

def read_inline_html(src, con)
	h = HTMLHelper.new
	begin
		# This is our current buffer in the context
		next_stuff = src.current_remaining_buffer
		
		consumed = 0
		while true
			if consumed >= next_stuff.size
				maruku_error "Malformed HTML starting at #{next_stuff.inspect}", src, con
				break
			end

			h.eat_this next_stuff[consumed].chr; consumed += 1
			break if h.is_finished? 
		end
		src.ignore_chars(consumed)
		con.push_element md_html(h.stuff_you_read)
		
		#start = src.current_remaining_buffer
		# h.eat_this start
		# if not h.is_finished?
		# 	error "inline_html: Malformed:\n "+
		# 		"#{start.inspect}\n #{h.inspect}",src,con
		# end
		# 
		# consumed = start.size - h.rest.size 
		# if consumed > 0
		# 	con.push_element md_html(h.stuff_you_read)
		# 	src.ignore_chars(consumed)
		# else
		# 	puts "HTML helper did not work on #{start.inspect}"
		# 	con.push_char src.shift_char
		# end
	rescue Exception => e
		maruku_error "Bad html: \n" + 
			add_tabs(e.inspect+e.backtrace.join("\n"),1,'>'),
			src,con
		maruku_recover "I will try to continue after bad HTML.", src, con
		con.push_char src.shift_char
	end
end

#read_link(src, con) ⇒ `Object`

# File 'lib/maruku/input/parse_span_better.rb', line 541

def read_link(src, con)
	# we read the string and see what happens
	src.ignore_char # opening bracket
	children = read_span(src, EscapedCharInText, [?]])
	src.ignore_char # closing bracket

	# ignore space
	if src.cur_char == SPACE and 
		(src.next_char == ?[ or src.next_char == ?( )
		src.shift_char
	end
	
	case src.cur_char
	when ?(
		src.ignore_char # opening (
		src.consume_whitespace
		url = read_url(src, [SPACE,?\t,?)])
		if not url
			url = '' # no url is ok
		end
		src.consume_whitespace
		title = nil
		if src.cur_char != ?) # we have a title
			quote_char = src.cur_char
			title = read_quoted(src,con)
			
			if not title
				maruku_error 'Must quote title',src,con
			else
				# Tries to read a title with quotes: ![a](url "ti"tle")
				# this is the most ugly thing in Markdown
				if not src.next_matches(/\s*\)/)
					# if there is not a closing par ), then read
					# the rest and guess it's title with quotes
					rest = read_simple(src, escaped=[], break_on_chars=[?)], 
						break_on_strings=[])
					# chop the closing char
					rest.chop!
					title << quote_char << rest
				end
			end
		end
		src.consume_whitespace
		closing = src.shift_char # closing )
		if closing != ?)
			maruku_error 'Unclosed link',src,con
			maruku_recover "No closing ): I will not create"+
			" the link for #{children.inspect}", src, con
			con.push_elements children
			return
		end
		con.push_element md_im_link(children,url, title)
	when ?[ # link ref
		ref_id = read_ref_id(src,con)
		if ref_id
			if ref_id.size == 0
				ref_id = sanitize_ref_id(children.to_s)
			else
				ref_id = sanitize_ref_id(ref_id)
			end	
			con.push_element md_link(children, ref_id)
		else 
			maruku_error "Could not read ref_id", src, con
			maruku_recover "I will not create the link for "+
				"#{children.inspect}", src, con
			con.push_elements children
			return
		end
	else # empty [link]
		id = sanitize_ref_id(children.to_s) #. downcase.gsub(' ','_')
		con.push_element md_link(children, id)
	end
end

#read_quoted(src, con) ⇒ `Object`

Tries to read a quoted value. If stream does not start with ‘ or “, returns nil.

# File 'lib/maruku/input/parse_span_better.rb', line 365

def read_quoted(src, con)
	case src.cur_char
		when ?', ?"
			quote_char = src.shift_char # opening quote
			string = read_simple(src, EscapedCharInQuotes, [quote_char])
			src.ignore_char # closing quote
			return string
		else 
#				puts "Asked to read quote from: #{src.cur_chars(10).inspect}"
			return nil
	end
end

#read_quoted_or_unquoted(src, con, escaped, exit_on_chars) ⇒ `Object`

# File 'lib/maruku/input/parse_span_better.rb', line 354

def read_quoted_or_unquoted(src, con, escaped, exit_on_chars)
	case src.cur_char
	when ?', ?"
		read_quoted(src, con)
	else
		read_simple(src, escaped, exit_on_chars)
	end
end

#read_ref_id(src, con) ⇒ `Object`

Reads a bracketed id “[refid]”. Consumes also both brackets.

# File 'lib/maruku/input/parse_span_better.rb', line 449

def read_ref_id(src, con)
	src.ignore_char # [
	src.consume_whitespace
#		puts "Next: #{src.cur_chars(10).inspect}"
	if m = src.read_regexp(R_REF_ID) 
#			puts "Got: #{m[1].inspect} Ignored: #{m[2].inspect}"
#			puts "Then: #{src.cur_chars(10).inspect}"
		m[1]
	else
		nil
	end
end

#read_simple(src, escaped, exit_on_chars, exit_on_strings = nil) ⇒ `Object`

# If eat_delim is true, and if the delim is not the EOF, then the delim # gets eaten from the stream.

# File 'lib/maruku/input/parse_span_better.rb', line 384

def read_simple(src, escaped, exit_on_chars, exit_on_strings=nil)
	text = ""
	while true
#			puts "Reading simple #{text.inspect}"
		c = src.cur_char
		if exit_on_chars && exit_on_chars.include?(c)
#				src.ignore_char if eat_delim
			break
		end
		
		break if exit_on_strings && 
			exit_on_strings.any? {|x| src.cur_chars_are x}
		
		case c
		when nil
			s= "String finished while reading (break on "+
			"#{exit_on_chars.map{|x|""<<x}.inspect})"+
			" already read: #{text.inspect}"
			maruku_error s, src
			maruku_recover "I boldly continue", src
			break
		when ?\\
			d = src.next_char
			if escaped.include? d
				src.ignore_chars(2)
				text << d
			else
				text << src.shift_char
			end
		else 
			text << src.shift_char
		end
	end
#		puts "Read simple #{text.inspect}"
	text.empty? ? nil : text
end

#read_span(src, escaped, exit_on_chars, exit_on_strings = nil) ⇒ `Object`

This is the main loop for reading span elements

It’s long, but not complex or difficult to understand.

# File 'lib/maruku/input/parse_span_better.rb', line 54

def read_span(src, escaped, exit_on_chars, exit_on_strings=nil)
	con = SpanContext.new
	c = d = nil
	while true
		c = src.cur_char

		# This is only an optimization which cuts 50% of the time used.
		# (but you can't use a-zA-z in exit_on_chars)
		if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z)))
			con.cur_string << src.shift_char
			next
		end

		break if exit_on_chars && exit_on_chars.include?(c)
		break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x}
		
		# check if there are extensions
		if check_span_extensions(src, con)
			next
		end
		
		case c = src.cur_char	
		when ?\ # it's space (32)
			if src.cur_chars_are "  \n"
				src.ignore_chars(3)
				con.push_element  md_br()
				next
			else
				src.ignore_char
				con.push_space 
			end
		when ?\n, ?\t 
			src.ignore_char
			con.push_space 
		when ?`
			read_inline_code(src,con)
		when ?<
			# It could be:
			# 1) HTML "<div ..."
			# 2) HTML "<!-- ..."
			# 3) url "<http:// ", "<ftp:// ..."
			# 4) email "<andrea@... ", "<mailto:andrea@..."
			# 5) on itself! "a < b	"
			# 6) Start of <<guillemettes>>
			
			case d = src.next_char
				when ?<;  # guillemettes
					src.ignore_chars(2)
					con.push_char ?<
					con.push_char ?<
				when ?!; 
					if src.cur_chars_are '<!--'
						read_inline_html(src, con)
					else 
						con.push_char src.shift_char
					end
				when ?? 
					read_xml_instr_span(src, con) 
				when ?\ , ?\t 
					con.push_char src.shift_char
				else
					if src.next_matches(/<mailto:/) or
					   src.next_matches(/<[\w\.]+\@/)
						read_email_el(src, con)
					elsif src.next_matches(/<\w+:/)
						read_url_el(src, con)
					elsif src.next_matches(/<\w/)
						#puts "This is HTML: #{src.cur_chars(20)}"
						read_inline_html(src, con)
					else 
						#puts "This is NOT HTML: #{src.cur_chars(20)}"
						con.push_char src.shift_char
					end
			end
		when ?\\
			d = src.next_char
			if d == ?'
				src.ignore_chars(2)
				con.push_element md_entity('apos')
			elsif d == ?"
				src.ignore_chars(2)
				con.push_element md_entity('quot')
			elsif escaped.include? d
				src.ignore_chars(2)
				con.push_char d
			else
				con.push_char src.shift_char
			end
		when ?[
			if markdown_extra? && src.next_char == ?^
				read_footnote_ref(src,con)
			else
				read_link(src, con)
			end
		when ?!
			if src.next_char == ?[
				read_image(src, con)
			else
				con.push_char src.shift_char
			end
		when ?&
			# named references
			if m = src.read_regexp(/\&([\w\d]+);/)
				con.push_element md_entity(m[1])
			# numeric
			elsif m = src.read_regexp(/\&\#(x)?([\w\d]+);/)
				num = m[1]  ? m[2].hex : m[2].to_i
				con.push_element md_entity(num)
			else
				con.push_char src.shift_char
			end
		when ?*
			if not src.next_char
				maruku_error "Opening * as last char.", src, con
				maruku_recover "Threating as literal"
				con.push_char src.shift_char
			else
				follows = src.cur_chars(4)
				if follows =~ /^\*\*\*[^\s\*]/
					con.push_element read_emstrong(src,'***')
				elsif follows  =~ /^\*\*[^\s\*]/
					con.push_element read_strong(src,'**')
				elsif follows =~ /^\*[^\s\*]/
					con.push_element read_em(src,'*')
				else # * is just a normal char
					con.push_char src.shift_char
				end
			end
		when ?_
			if not src.next_char
				maruku_error "Opening _ as last char", src, con
				maruku_recover "Threating as literal", src, con
				con.push_char src.shift_char
			else
				# we don't want "mod_ruby" to start an emphasis
				# so we start one only if
				# 1) there's nothing else in the span (first char)
				# or 2) the last char was a space
				# or 3) the current string is empty
				#if con.elements.empty? ||
				if	 (con.cur_string =~ /\s\Z/) || (con.cur_string.size == 0)
					# also, we check the next characters
					follows = src.cur_chars(4)
					if  follows =~ /^\_\_\_[^\s\_]/
						con.push_element read_emstrong(src,'___')
					elsif follows  =~ /^\_\_[^\s\_]/
						con.push_element read_strong(src,'__')
					elsif follows =~ /^\_[^\s\_]/
						con.push_element read_em(src,'_')
					else # _ is just a normal char
						con.push_char src.shift_char
					end
				else
					# _ is just a normal char
						con.push_char src.shift_char
				end
			end
		when ?{ # extension
			if [?#, ?., ?:].include? src.next_char
				src.ignore_char # {
				interpret_extension(src, con, [?}])
				src.ignore_char # }
			else
				con.push_char src.shift_char
			end
		when nil
			maruku_error( ("Unclosed span (waiting for %s"+
			 "#{exit_on_strings.inspect})") % [
					exit_on_chars ? "#{exit_on_chars.inspect} or" : ""],
					src,con)
			break
		else # normal text
			con.push_char src.shift_char
		end # end case
	end # end while true
	con.push_string_if_present 

	# Assign IAL to elements
	merge_ial(con.elements, src, con)
	
	
	# Remove leading space
	if (s = con.elements.first).kind_of? String
		if s[0] == ?\ then con.elements[0] = s[1, s.size-1] end
		con.elements.shift if s.size == 0 
	end
	
	# Remove final spaces
	if (s = con.elements.last).kind_of? String
		s.chop! if s[-1] == ?\ 
		con.elements.pop if s.size == 0 
	end
	
	educated = educate(con.elements)

	educated
end

#read_strong(src, delim) ⇒ `Object`

# File 'lib/maruku/input/parse_span_better.rb', line 428

def read_strong(src, delim)
	src.ignore_chars(2)
	children = read_span(src, EscapedCharInText, nil, [delim])
	src.ignore_chars(2)
	md_strong(children)
end

#read_url(src, break_on) ⇒ `Object`

# File 'lib/maruku/input/parse_span_better.rb', line 332

def read_url(src, break_on)
	if [?',?"].include? src.cur_char 
		error 'Invalid char for url', src
	end
	
	url = read_simple(src, [], break_on)
	if not url # empty url
		url = ""
	end
	
	if url[0] == ?< && url[-1] == ?>
		url = url[1, url.size-2]
	end
	
	if url.size == 0 
		return nil
	end
	
	url
end

#read_url_el(src, con) ⇒ `Object`

# File 'lib/maruku/input/parse_span_better.rb', line 315

def read_url_el(src,con)
	src.ignore_char # leading <
	url = read_simple(src, [], [?>])
	src.ignore_char # closing >
	
	con.push_element md_url(url)
end

#read_xml_instr_span(src, con) ⇒ `Object`

# File 'lib/maruku/input/parse_span_better.rb', line 253

def read_xml_instr_span(src, con) 
	src.ignore_chars(2) # starting <?

	# read target <?target code... ?>
	target = if m = src.read_regexp(/(\w+)/)
		m[1]
	else
		''
	end
	
	delim = "?>"
	
	code = 
		read_simple(src, escaped=[], break_on_chars=[], 
		break_on_strings=[delim])
	
	src.ignore_chars delim.size
	
	code = (code || "").strip
	con.push_element md_xml_instr(target, code)
end

#unit_tests_for_attribute_lists ⇒ `Object`

# File 'lib/maruku/attributes.rb', line 80

def unit_tests_for_attribute_lists
	[
		[ "",     [], "Empty lists are allowed" ], 
		[ "=",    :throw, "Bad char to begin a list with." ], 
		[ "a =b", :throw, "No whitespace before `=`." ], 
		[ "a= b", :throw, "No whitespace after `=`." ], 

		[ "a b", [[:ref, 'a'],[:ref, 'b']], "More than one ref" ], 
		[ "a b c", [[:ref, 'a'],[:ref, 'b'],[:ref, 'c']], "More than one ref" ], 
		[ "hello notfound", [[:ref, 'hello'],[:ref, 'notfound']]], 

		[ "'a'",  [[:ref, 'a']], "Quoted value." ], 
		[ '"a"'   ], 

		[ "a=b",  [['a','b']], "Simple key/val" ], 
		[ "'a'=b"   ], 
		[ "'a'='b'" ], 
		[ "a='b'"   ], 

		[ 'a="b\'"',  [['a',"b\'"]], "Key/val with quotes" ],
		[ 'a=b\''],
		[ 'a="\\\'b\'"',  [['a',"\'b\'"]], "Key/val with quotes" ], 
		
		['"', :throw, "Unclosed quotes"],
		["'"],
		["'a "],
		['"a '],
		
		[ "#a",  [[:id, 'a']], "Simple ID" ], 
		[ "#'a'" ], 
		[ '#"a"' ], 

		[ "#",  :throw, "Unfinished '#'." ], 
		[ ".",  :throw, "Unfinished '.'." ], 
		[ "# a",  :throw, "No white-space after '#'." ], 
		[ ". a",  :throw, "No white-space after '.' ." ], 
		
		[ "a=b c=d",  [['a','b'],['c','d']], "Tabbing" ], 
		[ " \ta=b \tc='d' "],
		[ "\t a=b\t c='d'\t\t"],
		
		[ ".\"a'",  :throw, "Mixing quotes is bad." ], 
		
	].map { |s, expected, comment| 
		@expected = (expected ||= @expected)
		@comment  = (comment  ||= (last=@comment) )
		(comment == last && (comment += (@count+=1).to_s)) || @count = 1
		expected = [md_ial(expected)] if expected.kind_of? Array
		["{#{MagicChar}#{s}}", expected, "Attributes: #{comment}"]
	}
end

Module: MaRuKu::In::Markdown::SpanLevelParser

Defined Under Namespace

Constant Summary collapse

32

Instance Method Summary collapse

Methods included from Helpers

Instance Method Details

#apply_one_rule(reg, subst, input) ⇒ Object

#describe_pos(buffer, buffer_index) ⇒ Object

#educate(elements) ⇒ Object

#extension_meta(src, con, break_on_chars) ⇒ Object

#interpret_extension(src, con, break_on_chars) ⇒ Object

#is_ial(e) ⇒ Object

#md_al(s = []) ⇒ Object

#merge_ial(elements, src, con) ⇒ Object

#parse_lines_as_span(lines, parent = nil) ⇒ Object

#parse_span_better(string, parent = nil) ⇒ Object

#read_attribute_list(src, con, break_on_chars) ⇒ Object

#read_em(src, delim) ⇒ Object

#read_email_el(src, con) ⇒ Object

#read_emstrong(src, delim) ⇒ Object

#read_footnote_ref(src, con) ⇒ Object

#read_image(src, con) ⇒ Object

#read_inline_code(src, con) ⇒ Object

#read_inline_html(src, con) ⇒ Object

#read_link(src, con) ⇒ Object

#read_quoted(src, con) ⇒ Object

#read_quoted_or_unquoted(src, con, escaped, exit_on_chars) ⇒ Object

#read_ref_id(src, con) ⇒ Object

#read_simple(src, escaped, exit_on_chars, exit_on_strings = nil) ⇒ Object

#read_span(src, escaped, exit_on_chars, exit_on_strings = nil) ⇒ Object

#read_strong(src, delim) ⇒ Object

#read_url(src, break_on) ⇒ Object

#read_url_el(src, con) ⇒ Object

#read_xml_instr_span(src, con) ⇒ Object

#unit_tests_for_attribute_lists ⇒ Object