Module: MaRuKu::In::Markdown::SpanLevelParser

Includes:: Helpers

Included in:: BlockLevelParser, CharSourceManual, CharSourceStrscan, MDDocument

Defined in:: lib/amp-front/third_party/maruku.rb,
lib/amp-front/third_party/maruku/attributes.rb,
lib/amp-front/third_party/maruku/input/rubypants.rb,
lib/amp-front/third_party/maruku/input/charsource.rb,
lib/amp-front/third_party/maruku/input/html_helper.rb,
lib/amp-front/third_party/maruku/input/parse_span_better.rb

Defined Under Namespace

Classes: CharSourceDebug, CharSourceManual, CharSourceStrscan, HTMLHelper, SpanContext

Constant Summary collapse

Punct_class =

'[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'

Close_class =

%![^\ \t\r\n\\[\{\(\-]!

Rules =

[
[/---/,   :mdash          ],
[/--/,    :ndash          ],
['...',   :hellip         ],
['. . .', :hellip         ],
["``",    :ldquo          ],
["''",    :rdquo          ],
[/<<\s/,  [:laquo, :nbsp] ],
[/\s>>/,  [:nbsp, :raquo] ],
[/<</,    :laquo          ],
[/>>/,    :raquo          ],

#   def educate_single_backticks(str)
#   ["`", :lsquo]
#   ["'", :rsquo]

# Special case if the very first character is a quote followed by
# punctuation at a non-word-break. Close the quotes by brute
# force:
[/^'(?=#{Punct_class}\B)/, :rsquo],
[/^"(?=#{Punct_class}\B)/, :rdquo],
# Special case for double sets of quotes, e.g.:
#   <p>He said, "'Quoted' words in a larger quote."</p>
[/"'(?=\w)/, [:ldquo, :lsquo]    ],
[/'"(?=\w)/, [:lsquo, :ldquo]    ],
# Special case for decade abbreviations (the '80s):
[/'(?=\d\ds)/, :rsquo            ],
# Get most opening single quotes:
[/(\s)'(?=\w)/, [:one, :lsquo]   ],
# Single closing quotes:
[/(#{Close_class})'/, [:one, :rsquo]],
[/'(\s|s\b|$)/, [:rsquo, :one]],
# Any remaining single quotes should be opening ones:
[/'/, :lsquo],
# Get most opening double quotes:
[/(\s)"(?=\w)/, [:one, :ldquo]],
# Double closing quotes:
[/(#{Close_class})"/, [:one, :rdquo]],
[/"(\s|s\b|$)/, [:rdquo, :one]],
# Any remaining quotes should be opening ones:
[/"/, :ldquo]
  ].
  map{|reg, subst| # People should do the thinking, machines should do the work.
reg = Regexp.new(Regexp.escape(reg)) if not reg.kind_of? Regexp
subst = [subst] if not subst.kind_of?Array
[reg, subst]}

CharSource = Choose!

CharSourceManual

EscapedCharInText =

Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>]

EscapedCharInQuotes =

Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>,?',?"]

EscapedCharInInlineCode =

[?\\,?`]

SPACE = 32

?\

R_REF_ID = R_REF_ID = Regexp.compile(/(s]*)(s*])/) R_REF_ID = Regexp.compile(/(s]*)(s*])/)

Regexp.compile(/([^\]]*)\]/)

Instance Method Summary collapse

#apply_one_rule(reg, subst, input) ⇒ Object

note: input will be destroyed.
#describe_pos(buffer, buffer_index) ⇒ Object
#educate(elements) ⇒ Object
#extension_meta(src, con, break_on_chars) ⇒ Object
#interpret_extension(src, con, break_on_chars) ⇒ Object

Start: cursor on character after ‘End: curson on ‘’ or EOF.
#is_ial(e) ⇒ Object

We need a helper.
#md_al(s = []) ⇒ Object
#merge_ial(elements, src, con) ⇒ Object
#parse_lines_as_span(lines, parent = nil) ⇒ Object
#parse_span_better(string, parent = nil) ⇒ Object
#read_attribute_list(src, con, break_on_chars) ⇒ Object

returns nil or an AttributeList.
#read_em(src, delim) ⇒ Object
#read_email_el(src, con) ⇒ Object
#read_emstrong(src, delim) ⇒ Object
#read_footnote_ref(src, con) ⇒ Object
#read_image(src, con) ⇒ Object

read link.
#read_inline_code(src, con) ⇒ Object
#read_inline_html(src, con) ⇒ Object
#read_link(src, con) ⇒ Object
#read_quoted(src, con) ⇒ Object

Tries to read a quoted value.
#read_quoted_or_unquoted(src, con, escaped, exit_on_chars) ⇒ Object
#read_ref_id(src, con) ⇒ Object

Reads a bracketed id “[refid]”.
#read_simple(src, escaped, exit_on_chars, exit_on_strings = nil) ⇒ Object

# If eat_delim is true, and if the delim is not the EOF, then the delim # gets eaten from the stream.
#read_span(src, escaped, exit_on_chars, exit_on_strings = nil) ⇒ Object

This is the main loop for reading span elements.
#read_strong(src, delim) ⇒ Object
#read_url(src, break_on) ⇒ Object
#read_url_el(src, con) ⇒ Object
#read_xml_instr_span(src, con) ⇒ Object
#unit_tests_for_attribute_lists ⇒ Object

Methods included from Helpers

#md_abbr, #md_abbr_def, #md_ald, #md_br, #md_code, #md_codeblock, #md_el, #md_em, #md_email, #md_emstrong, #md_entity, #md_foot_ref, #md_footnote, #md_header, #md_hrule, #md_html, #md_ial, #md_im_image, #md_im_link, #md_image, #md_li, #md_link, #md_par, #md_quote, #md_ref_def, #md_strong, #md_url, #md_xml_instr

Instance Method Details

#apply_one_rule(reg, subst, input) ⇒ `Object`

note: input will be destroyed

# File 'lib/amp-front/third_party/maruku/input/rubypants.rb', line 192

def apply_one_rule(reg, subst, input)
  output = []
  while first = input.shift
    if first.kind_of?(String) && (m = reg.match(first))
      output.push    m. pre_match if m. pre_match.size > 0
       input.unshift m.post_match if m.post_match.size > 0
      subst.reverse.each do |x|
        input.unshift( x == :one ? m[1] : md_entity(x.to_s) ) end
    else
      output.push first
    end
  end
  return output
end

#describe_pos(buffer, buffer_index) ⇒ `Object`

# File 'lib/amp-front/third_party/maruku/input/charsource.rb', line 154

def describe_pos(buffer, buffer_index)
  len = 75
  num_before = [len/2, buffer_index].min
  num_after = [len/2, buffer.size-buffer_index].min
  num_before_max = buffer_index
  num_after_max = buffer.size-buffer_index
  
#   puts "num #{num_before} #{num_after}"
  num_before = [num_before_max, len-num_after].min
  num_after  = [num_after_max, len-num_before].min
#   puts "num #{num_before} #{num_after}"
  
  index_start = [buffer_index - num_before, 0].max
  index_end   = [buffer_index + num_after, buffer.size].min
  
  size = index_end- index_start
  
#   puts "- #{index_start} #{size}"

  str = buffer[index_start, size]
  str.gsub!("\n",'N')
  str.gsub!("\t",'T')
  
  if index_end == buffer.size 
    str += "EOF"
  end
    
  pre_s = buffer_index-index_start
  pre_s = [pre_s, 0].max
  pre_s2 = [len-pre_s,0].max
#   puts "pre_S = #{pre_s}"
  pre =" "*(pre_s) 
  
  "-"*len+"\n"+
  str + "\n" +
  "-"*pre_s + "|" + "-"*(pre_s2)+"\n"+
#   pre + "|\n"+
  pre + "+--- Byte #{buffer_index}\n"+
  
  "Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
  add_tabs(buffer,1,">")
  
#   "CharSource: At character #{@buffer_index} of block "+
#   " beginning with:\n    #{@buffer[0,50].inspect} ...\n"+
#   " before: \n     ... #{cur_chars(50).inspect} ... "
end

#educate(elements) ⇒ `Object`

# File 'lib/amp-front/third_party/maruku/input/rubypants.rb', line 207

def educate(elements)
  Rules.each do |reg, subst|
    elements = apply_one_rule(reg, subst, elements)
  end
  # strips empty strings
  elements.delete_if {|x| x.kind_of?(String) && x.size == 0}
  final = []
  # join consecutive strings
  elements.each do |x|
    if x.kind_of?(String) && final.last.kind_of?(String)
      final.last << x
    else
      final << x
    end
  end
  return final
end

#extension_meta(src, con, break_on_chars) ⇒ `Object`

# File 'lib/amp-front/third_party/maruku/input/parse_span_better.rb', line 301

def extension_meta(src, con, break_on_chars)
  if m = src.read_regexp(/([^\s\:\"\']+):/)
    name = m[1]
    al = read_attribute_list(src, con, break_on_chars)
#     puts "#{name}=#{al.inspect}"
    self.doc.ald[name] = al
     con.push md_ald(name, al)
  else
    al = read_attribute_list(src, con, break_on_chars)
    self.doc.ald[name] = al
    con.push md_ial(al)
  end
end

#interpret_extension(src, con, break_on_chars) ⇒ `Object`

Start: cursor on character after ‘End: curson on ‘’ or EOF

# File 'lib/amp-front/third_party/maruku/input/parse_span_better.rb', line 277

def interpret_extension(src, con, break_on_chars)
  case src.cur_char
  when ?:
    src.ignore_char # :
    extension_meta(src, con, break_on_chars)
  when ?#, ?.
    extension_meta(src, con, break_on_chars)
  else
    stuff = read_simple(src, escaped=[?}], break_on_chars, [])
    if stuff =~ /^(\w+\s|[^\w])/
      extension_id = $1.strip
      if false
      else
        maruku_recover "I don't know what to do with extension '#{extension_id}'\n"+
          "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
        extension_meta(src, con, break_on_chars)
      end
    else 
      maruku_recover "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
      extension_meta(src, con, break_on_chars)
    end
  end
end

#is_ial(e) ⇒ `Object`

We need a helper

196	# File 'lib/amp-front/third_party/maruku/attributes.rb', line 196 def is_ial(e); e.kind_of? MDElement and e.node_type == :ial end

#md_al(s = []) ⇒ `Object`

132	# File 'lib/amp-front/third_party/maruku/attributes.rb', line 132 def md_al(s=[]); AttributeList.new(s) end

#merge_ial(elements, src, con) ⇒ `Object`

# File 'lib/amp-front/third_party/maruku/attributes.rb', line 198

def merge_ial(elements, src, con) 

  # Apply each IAL to the element before
  elements.each_with_index do |e, i| 
  if is_ial(e) && i>= 1 then
    before = elements[i-1]
    after = elements[i+1]
    if before.kind_of? MDElement
      before.al = e.ial
    elsif after.kind_of? MDElement
      after.al = e.ial
    else
      maruku_error "It is not clear to me what element this IAL {:#{e.ial.to_md}} \n"+
      "is referring to. The element before is a #{before.class.to_s}, \n"+
      "the element after is a #{after.class.to_s}.\n"+
      "\n before: #{before.inspect}"+
      "\n after: #{after.inspect}",
      src, con
      # xxx dire se c'è empty vicino
    end
  end 
  end
  
  if not Globals[:debug_keep_ials]
    elements.delete_if {|x| is_ial(x) unless x == elements.first} 
  end
end

#parse_lines_as_span(lines, parent = nil) ⇒ `Object`



35
36
37

# File 'lib/amp-front/third_party/maruku/input/parse_span_better.rb', line 35

def parse_lines_as_span(lines, parent=nil)
  parse_span_better lines.join("\n"), parent
end

#parse_span_better(string, parent = nil) ⇒ `Object`

# File 'lib/amp-front/third_party/maruku/input/parse_span_better.rb', line 39

def parse_span_better(string, parent=nil)
  if not string.kind_of? String then 
    error "Passed #{string.class}." end

  st = (string + "")
  st.freeze
  src = CharSource.new(st, parent)
  read_span(src, EscapedCharInText, [nil])
end

#read_attribute_list(src, con, break_on_chars) ⇒ `Object`

returns nil or an AttributeList

# File 'lib/amp-front/third_party/maruku/attributes.rb', line 135

def read_attribute_list(src, con, break_on_chars)
  
  separators = break_on_chars + [?=,?\ ,?\t]
  escaped = Maruku::EscapedCharInQuotes
    
  al = AttributeList.new
  while true
    src.consume_whitespace
    break if break_on_chars.include? src.cur_char

    case src.cur_char
    when nil 
      maruku_error "Attribute list terminated by EOF:\n "+
                   "#{al.inspect}" , src, con
      tell_user "I try to continue and return partial attribute list:\n"+
        al.inspect
      break
    when ?=     # error
      maruku_error "In attribute lists, cannot start identifier with `=`."
      tell_user "I try to continue"
      src.ignore_char
    when ?#     # id definition
      src.ignore_char
      if id = read_quoted_or_unquoted(src, con, escaped, separators)
        al.push_id id
      else
        maruku_error 'Could not read `id` attribute.', src, con
        tell_user 'Trying to ignore bad `id` attribute.'
      end
    when ?.     # class definition
      src.ignore_char
      if klass = read_quoted_or_unquoted(src, con, escaped, separators)
        al.push_class klass
      else
        maruku_error 'Could not read `class` attribute.', src, con
        tell_user 'Trying to ignore bad `class` attribute.'
      end
    else
      if key = read_quoted_or_unquoted(src, con, escaped, separators)
        if src.cur_char == ?=
          src.ignore_char # skip the =
          if val = read_quoted_or_unquoted(src, con, escaped, separators)
            al.push_key_val(key, val)
          else
            maruku_error "Could not read value for key #{key.inspect}.",
              src, con
            tell_user "Ignoring key #{key.inspect}."
          end
        else
          al.push_ref key
        end
      else
        maruku_error 'Could not read key or reference.'
      end
    end # case
  end # while true
  al
end

#read_em(src, delim) ⇒ `Object`

# File 'lib/amp-front/third_party/maruku/input/parse_span_better.rb', line 421

def read_em(src, delim)
  src.ignore_char
  children = read_span(src, EscapedCharInText, nil, [delim])
  src.ignore_char
  md_em(children)
end

#read_email_el(src, con) ⇒ `Object`

# File 'lib/amp-front/third_party/maruku/input/parse_span_better.rb', line 323

def read_email_el(src,con)
  src.ignore_char # leading <
  mail = read_simple(src, [], [?>])
  src.ignore_char # closing >
  
  address = mail.gsub(/^mailto:/,'')
  con.push_element md_email(address)
end

#read_emstrong(src, delim) ⇒ `Object`

# File 'lib/amp-front/third_party/maruku/input/parse_span_better.rb', line 435

def read_emstrong(src, delim)
  src.ignore_chars(3)
  children = read_span(src, EscapedCharInText, nil, [delim])
  src.ignore_chars(3)
  md_emstrong(children)
end

#read_footnote_ref(src, con) ⇒ `Object`

# File 'lib/amp-front/third_party/maruku/input/parse_span_better.rb', line 462

def read_footnote_ref(src,con)
  ref = read_ref_id(src,con)
  con.push_element md_foot_ref(ref)
end

#read_image(src, con) ⇒ `Object`

read link

# File 'lib/amp-front/third_party/maruku/input/parse_span_better.rb', line 615

def read_image(src, con)
  src.ignore_chars(2) # opening "!["
  alt_text = read_span(src, EscapedCharInText, [?]])
  src.ignore_char # closing bracket
  # ignore space
  if src.cur_char == SPACE and 
    (src.next_char == ?[ or src.next_char == ?( )
    src.ignore_char
  end
  case src.cur_char
  when ?(
    src.ignore_char # opening (
    src.consume_whitespace
    url = read_url(src, [SPACE,?\t,?)])
    if not url
      error "Could not read url from #{src.cur_chars(10).inspect}",
        src,con
    end
    src.consume_whitespace
    title = nil
    if src.cur_char != ?) # we have a title
      quote_char = src.cur_char
      title = read_quoted(src,con)
      if not title
        maruku_error 'Must quote title',src,con
      else       
        # Tries to read a title with quotes: ![a](url "ti"tle")
        # this is the most ugly thing in Markdown
        if not src.next_matches(/\s*\)/)
          # if there is not a closing par ), then read
          # the rest and guess it's title with quotes
          rest = read_simple(src, escaped=[], break_on_chars=[?)], 
            break_on_strings=[])
          # chop the closing char
          rest.chop!
          title << quote_char << rest
        end
      end
    end
    src.consume_whitespace
    closing = src.shift_char # closing )
    if closing != ?)
      error( ("Unclosed link: '"<<closing<<"'")+
        " Read url=#{url.inspect} title=#{title.inspect}",src,con)
    end
    con.push_element md_im_image(alt_text, url, title)
  when ?[ # link ref
    ref_id = read_ref_id(src,con)
    if not ref_id # TODO: check around
      error('Reference not closed.', src, con)
      ref_id = ""
    end
    if ref_id.size == 0
      ref_id =  alt_text.to_s
    end

    ref_id = sanitize_ref_id(ref_id)

    con.push_element md_image(alt_text, ref_id)
  else # no stuff
    ref_id =  sanitize_ref_id(alt_text.to_s)
    con.push_element md_image(alt_text, ref_id)
  end
end

#read_inline_code(src, con) ⇒ `Object`

# File 'lib/amp-front/third_party/maruku/input/parse_span_better.rb', line 510

def read_inline_code(src, con)
  # Count the number of ticks
  num_ticks = 0
  while src.cur_char == ?` 
    num_ticks += 1
    src.ignore_char
  end
  # We will read until this string
  end_string = "`"*num_ticks

  code = 
    read_simple(src, escaped=[], break_on_chars=[], 
      break_on_strings=[end_string])
  
#   puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
  src.ignore_chars num_ticks
  
  # Ignore at most one space
  if num_ticks > 1 && code[0] == SPACE
    code = code[1, code.size-1]
  end
  
  # drop last space 
  if num_ticks > 1 && code[-1] == SPACE
    code = code[0,code.size-1]
  end

#   puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} "
  con.push_element md_code(code)
end

#read_inline_html(src, con) ⇒ `Object`

# File 'lib/amp-front/third_party/maruku/input/parse_span_better.rb', line 467

def read_inline_html(src, con)
  h = HTMLHelper.new
  begin
    # This is our current buffer in the context
    next_stuff = src.current_remaining_buffer
    
    consumed = 0
    while true
      if consumed >= next_stuff.size
        maruku_error "Malformed HTML starting at #{next_stuff.inspect}", src, con
        break
      end

      h.eat_this next_stuff[consumed].chr; consumed += 1
      break if h.is_finished? 
    end
    src.ignore_chars(consumed)
    con.push_element md_html(h.stuff_you_read)
    
    #start = src.current_remaining_buffer
    # h.eat_this start
    # if not h.is_finished?
    #   error "inline_html: Malformed:\n "+
    #     "#{start.inspect}\n #{h.inspect}",src,con
    # end
    # 
    # consumed = start.size - h.rest.size 
    # if consumed > 0
    #   con.push_element md_html(h.stuff_you_read)
    #   src.ignore_chars(consumed)
    # else
    #   puts "HTML helper did not work on #{start.inspect}"
    #   con.push_char src.shift_char
    # end
  rescue Exception => e
    maruku_error "Bad html: \n" + 
      add_tabs(e.inspect+e.backtrace.join("\n"),1,'>'),
      src,con
    maruku_recover "I will try to continue after bad HTML.", src, con
    con.push_char src.shift_char
  end
end

#read_link(src, con) ⇒ `Object`

# File 'lib/amp-front/third_party/maruku/input/parse_span_better.rb', line 541

def read_link(src, con)
  # we read the string and see what happens
  src.ignore_char # opening bracket
  children = read_span(src, EscapedCharInText, [?]])
  src.ignore_char # closing bracket

  # ignore space
  if src.cur_char == SPACE and 
    (src.next_char == ?[ or src.next_char == ?( )
    src.shift_char
  end
  
  case src.cur_char
  when ?(
    src.ignore_char # opening (
    src.consume_whitespace
    url = read_url(src, [SPACE,?\t,?)])
    if not url
      url = '' # no url is ok
    end
    src.consume_whitespace
    title = nil
    if src.cur_char != ?) # we have a title
      quote_char = src.cur_char
      title = read_quoted(src,con)
      
      if not title
        maruku_error 'Must quote title',src,con
      else
        # Tries to read a title with quotes: ![a](url "ti"tle")
        # this is the most ugly thing in Markdown
        if not src.next_matches(/\s*\)/)
          # if there is not a closing par ), then read
          # the rest and guess it's title with quotes
          rest = read_simple(src, escaped=[], break_on_chars=[?)], 
            break_on_strings=[])
          # chop the closing char
          rest.chop!
          title << quote_char << rest
        end
      end
    end
    src.consume_whitespace
    closing = src.shift_char # closing )
    if closing != ?)
      maruku_error 'Unclosed link',src,con
      maruku_recover "No closing ): I will not create"+
      " the link for #{children.inspect}", src, con
      con.push_elements children
      return
    end
    con.push_element md_im_link(children,url, title)
  when ?[ # link ref
    ref_id = read_ref_id(src,con)
    if ref_id
      if ref_id.size == 0
        ref_id = sanitize_ref_id(children.to_s)
      else
        ref_id = sanitize_ref_id(ref_id)
      end  
      con.push_element md_link(children, ref_id)
    else 
      maruku_error "Could not read ref_id", src, con
      maruku_recover "I will not create the link for "+
        "#{children.inspect}", src, con
      con.push_elements children
      return
    end
  else # empty [link]
    id = sanitize_ref_id(children.to_s) #. downcase.gsub(' ','_')
    con.push_element md_link(children, id)
  end
end

#read_quoted(src, con) ⇒ `Object`

Tries to read a quoted value. If stream does not start with ‘ or “, returns nil.

# File 'lib/amp-front/third_party/maruku/input/parse_span_better.rb', line 365

def read_quoted(src, con)
  case src.cur_char
    when ?', ?"
      quote_char = src.shift_char # opening quote
      string = read_simple(src, EscapedCharInQuotes, [quote_char])
      src.ignore_char # closing quote
      return string
    else 
#       puts "Asked to read quote from: #{src.cur_chars(10).inspect}"
      return nil
  end
end

#read_quoted_or_unquoted(src, con, escaped, exit_on_chars) ⇒ `Object`

# File 'lib/amp-front/third_party/maruku/input/parse_span_better.rb', line 354

def read_quoted_or_unquoted(src, con, escaped, exit_on_chars)
  case src.cur_char
  when ?', ?"
    read_quoted(src, con)
  else
    read_simple(src, escaped, exit_on_chars)
  end
end

#read_ref_id(src, con) ⇒ `Object`

Reads a bracketed id “[refid]”. Consumes also both brackets.

# File 'lib/amp-front/third_party/maruku/input/parse_span_better.rb', line 449

def read_ref_id(src, con)
  src.ignore_char # [
  src.consume_whitespace
#   puts "Next: #{src.cur_chars(10).inspect}"
  if m = src.read_regexp(R_REF_ID) 
#     puts "Got: #{m[1].inspect} Ignored: #{m[2].inspect}"
#     puts "Then: #{src.cur_chars(10).inspect}"
    m[1]
  else
    nil
  end
end

#read_simple(src, escaped, exit_on_chars, exit_on_strings = nil) ⇒ `Object`

# If eat_delim is true, and if the delim is not the EOF, then the delim # gets eaten from the stream.

# File 'lib/amp-front/third_party/maruku/input/parse_span_better.rb', line 384

def read_simple(src, escaped, exit_on_chars, exit_on_strings=nil)
  text = ""
  while true
#     puts "Reading simple #{text.inspect}"
    c = src.cur_char
    if exit_on_chars && exit_on_chars.include?(c)
#       src.ignore_char if eat_delim
      break
    end
    
    break if exit_on_strings && 
      exit_on_strings.any? {|x| src.cur_chars_are x}
    
    case c
    when nil
      s= "String finished while reading (break on "+
      "#{exit_on_chars.map{|x|""<<x}.inspect})"+
      " already read: #{text.inspect}"
      maruku_error s, src
      maruku_recover "I boldly continue", src
      break
    when ?\\
      d = src.next_char
      if escaped.include? d
        src.ignore_chars(2)
        text << d
      else
        text << src.shift_char
      end
    else 
      text << src.shift_char
    end
  end
#   puts "Read simple #{text.inspect}"
  text.empty? ? nil : text
end

#read_span(src, escaped, exit_on_chars, exit_on_strings = nil) ⇒ `Object`

This is the main loop for reading span elements

It’s long, but not complex or difficult to understand.

# File 'lib/amp-front/third_party/maruku/input/parse_span_better.rb', line 54

def read_span(src, escaped, exit_on_chars, exit_on_strings=nil)
  con = SpanContext.new
  c = d = nil
  while true
    c = src.cur_char

    # This is only an optimization which cuts 50% of the time used.
    # (but you can't use a-zA-z in exit_on_chars)
    if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z)))
      con.cur_string << src.shift_char
      next
    end

    break if exit_on_chars && exit_on_chars.include?(c)
    break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x}
    
    # check if there are extensions
    if check_span_extensions(src, con)
      next
    end
    
    case c = src.cur_char 
    when ?\ # it's space (32)
      if src.cur_chars_are "  \n"
        src.ignore_chars(3)
        con.push_element  md_br()
        next
      else
        src.ignore_char
        con.push_space 
      end
    when ?\n, ?\t 
      src.ignore_char
      con.push_space 
    when ?`
      read_inline_code(src,con)
    when ?<
      # It could be:
      # 1) HTML "<div ..."
      # 2) HTML "<!-- ..."
      # 3) url "<http:// ", "<ftp:// ..."
      # 4) email "<andrea@... ", "<mailto:andrea@..."
      # 5) on itself! "a < b "
      # 6) Start of <<guillemettes>>
      
      case d = src.next_char
        when ?<;  # guillemettes
          src.ignore_chars(2)
          con.push_char ?<
          con.push_char ?<
        when ?!; 
          if src.cur_chars_are '<!--'
            read_inline_html(src, con)
          else 
            con.push_char src.shift_char
          end
        when ?? 
          read_xml_instr_span(src, con) 
        when ?\ , ?\t 
          con.push_char src.shift_char
        else
          if src.next_matches(/<mailto:/) or
             src.next_matches(/<[\w\.]+\@/)
            read_email_el(src, con)
          elsif src.next_matches(/<\w+:/)
            read_url_el(src, con)
          elsif src.next_matches(/<\w/)
            #puts "This is HTML: #{src.cur_chars(20)}"
            read_inline_html(src, con)
          else 
            #puts "This is NOT HTML: #{src.cur_chars(20)}"
            con.push_char src.shift_char
          end
      end
    when ?\\
      d = src.next_char
      if d == ?'
        src.ignore_chars(2)
        con.push_element md_entity('apos')
      elsif d == ?"
        src.ignore_chars(2)
        con.push_element md_entity('quot')
      elsif escaped.include? d
        src.ignore_chars(2)
        con.push_char d
      else
        con.push_char src.shift_char
      end
    when ?[
      if markdown_extra? && src.next_char == ?^
        read_footnote_ref(src,con)
      else
        read_link(src, con)
      end
    when ?!
      if src.next_char == ?[
        read_image(src, con)
      else
        con.push_char src.shift_char
      end
    when ?&
      # named references
      if m = src.read_regexp(/\&([\w\d]+);/)
        con.push_element md_entity(m[1])
      # numeric
      elsif m = src.read_regexp(/\&\#(x)?([\w\d]+);/)
        num = m[1]  ? m[2].hex : m[2].to_i
        con.push_element md_entity(num)
      else
        con.push_char src.shift_char
      end
    when ?*
      if not src.next_char
        maruku_error "Opening * as last char.", src, con
        maruku_recover "Threating as literal"
        con.push_char src.shift_char
      else
        follows = src.cur_chars(4)
        if follows =~ /^\*\*\*[^\s\*]/
          con.push_element read_emstrong(src,'***')
        elsif follows  =~ /^\*\*[^\s\*]/
          con.push_element read_strong(src,'**')
        elsif follows =~ /^\*[^\s\*]/
          con.push_element read_em(src,'*')
        else # * is just a normal char
          con.push_char src.shift_char
        end
      end
    when ?_
      if not src.next_char
        maruku_error "Opening _ as last char", src, con
        maruku_recover "Threating as literal", src, con
        con.push_char src.shift_char
      else
        # we don't want "mod_ruby" to start an emphasis
        # so we start one only if
        # 1) there's nothing else in the span (first char)
        # or 2) the last char was a space
        # or 3) the current string is empty
        #if con.elements.empty? ||
        if   (con.cur_string =~ /\s\Z/) || (con.cur_string.size == 0)
          # also, we check the next characters
          follows = src.cur_chars(4)
          if  follows =~ /^\_\_\_[^\s\_]/
            con.push_element read_emstrong(src,'___')
          elsif follows  =~ /^\_\_[^\s\_]/
            con.push_element read_strong(src,'__')
          elsif follows =~ /^\_[^\s\_]/
            con.push_element read_em(src,'_')
          else # _ is just a normal char
            con.push_char src.shift_char
          end
        else
          # _ is just a normal char
            con.push_char src.shift_char
        end
      end
    when ?{ # extension
      if [?#, ?., ?:].include? src.next_char
        src.ignore_char # {
        interpret_extension(src, con, [?}])
        src.ignore_char # }
      else
        con.push_char src.shift_char
      end
    when nil
      maruku_error( ("Unclosed span (waiting for %s"+
       "#{exit_on_strings.inspect})") % [
          exit_on_chars ? "#{exit_on_chars.inspect} or" : ""],
          src,con)
      break
    else # normal text
      con.push_char src.shift_char
    end # end case
  end # end while true
  con.push_string_if_present 

  # Assign IAL to elements
  merge_ial(con.elements, src, con)
  
  
  # Remove leading space
  if (s = con.elements.first).kind_of? String
    if s[0] == ?\ then con.elements[0] = s[1, s.size-1] end
    con.elements.shift if s.size == 0 
  end
  
  # Remove final spaces
  if (s = con.elements.last).kind_of? String
    s.chop! if s[-1] == ?\ 
    con.elements.pop if s.size == 0 
  end
  
  educated = educate(con.elements)

  educated
end

#read_strong(src, delim) ⇒ `Object`

# File 'lib/amp-front/third_party/maruku/input/parse_span_better.rb', line 428

def read_strong(src, delim)
  src.ignore_chars(2)
  children = read_span(src, EscapedCharInText, nil, [delim])
  src.ignore_chars(2)
  md_strong(children)
end

#read_url(src, break_on) ⇒ `Object`

# File 'lib/amp-front/third_party/maruku/input/parse_span_better.rb', line 332

def read_url(src, break_on)
  if [?',?"].include? src.cur_char 
    error 'Invalid char for url', src
  end
  
  url = read_simple(src, [], break_on)
  if not url # empty url
    url = ""
  end
  
  if url[0] == ?< && url[-1] == ?>
    url = url[1, url.size-2]
  end
  
  if url.size == 0 
    return nil
  end
  
  url
end

#read_url_el(src, con) ⇒ `Object`

# File 'lib/amp-front/third_party/maruku/input/parse_span_better.rb', line 315

def read_url_el(src,con)
  src.ignore_char # leading <
  url = read_simple(src, [], [?>])
  src.ignore_char # closing >
  
  con.push_element md_url(url)
end

#read_xml_instr_span(src, con) ⇒ `Object`

# File 'lib/amp-front/third_party/maruku/input/parse_span_better.rb', line 253

def read_xml_instr_span(src, con) 
  src.ignore_chars(2) # starting <?

  # read target <?target code... ?>
  target = if m = src.read_regexp(/(\w+)/)
    m[1]
  else
    ''
  end
  
  delim = "?>"
  
  code = 
    read_simple(src, escaped=[], break_on_chars=[], 
    break_on_strings=[delim])
  
  src.ignore_chars delim.size
  
  code = (code || "").strip
  con.push_element md_xml_instr(target, code)
end

#unit_tests_for_attribute_lists ⇒ `Object`

# File 'lib/amp-front/third_party/maruku/attributes.rb', line 80

def unit_tests_for_attribute_lists
  [
    [ "",     [], "Empty lists are allowed" ], 
    [ "=",    :throw, "Bad char to begin a list with." ], 
    [ "a =b", :throw, "No whitespace before `=`." ], 
    [ "a= b", :throw, "No whitespace after `=`." ], 

    [ "a b", [[:ref, 'a'],[:ref, 'b']], "More than one ref" ], 
    [ "a b c", [[:ref, 'a'],[:ref, 'b'],[:ref, 'c']], "More than one ref" ], 
    [ "hello notfound", [[:ref, 'hello'],[:ref, 'notfound']]], 

    [ "'a'",  [[:ref, 'a']], "Quoted value." ], 
    [ '"a"'   ], 

    [ "a=b",  [['a','b']], "Simple key/val" ], 
    [ "'a'=b"   ], 
    [ "'a'='b'" ], 
    [ "a='b'"   ], 

    [ 'a="b\'"',  [['a',"b\'"]], "Key/val with quotes" ],
    [ 'a=b\''],
    [ 'a="\\\'b\'"',  [['a',"\'b\'"]], "Key/val with quotes" ], 
    
    ['"', :throw, "Unclosed quotes"],
    ["'"],
    ["'a "],
    ['"a '],
    
    [ "#a",  [[:id, 'a']], "Simple ID" ], 
    [ "#'a'" ], 
    [ '#"a"' ], 

    [ "#",  :throw, "Unfinished '#'." ], 
    [ ".",  :throw, "Unfinished '.'." ], 
    [ "# a",  :throw, "No white-space after '#'." ], 
    [ ". a",  :throw, "No white-space after '.' ." ], 
    
    [ "a=b c=d",  [['a','b'],['c','d']], "Tabbing" ], 
    [ " \ta=b \tc='d' "],
    [ "\t a=b\t c='d'\t\t"],
    
    [ ".\"a'",  :throw, "Mixing quotes is bad." ], 
    
  ].map { |s, expected, comment| 
    @expected = (expected ||= @expected)
    @comment  = (comment  ||= (last=@comment) )
    (comment == last && (comment += (@count+=1).to_s)) || @count = 1
    expected = [md_ial(expected)] if expected.kind_of? Array
    ["{#{MagicChar}#{s}}", expected, "Attributes: #{comment}"]
  }
end

Module: MaRuKu::In::Markdown::SpanLevelParser

Defined Under Namespace

Constant Summary collapse

32

Instance Method Summary collapse

Methods included from Helpers

Instance Method Details

#apply_one_rule(reg, subst, input) ⇒ Object

#describe_pos(buffer, buffer_index) ⇒ Object

#educate(elements) ⇒ Object

#extension_meta(src, con, break_on_chars) ⇒ Object

#interpret_extension(src, con, break_on_chars) ⇒ Object

#is_ial(e) ⇒ Object

#md_al(s = []) ⇒ Object

#merge_ial(elements, src, con) ⇒ Object

#parse_lines_as_span(lines, parent = nil) ⇒ Object

#parse_span_better(string, parent = nil) ⇒ Object

#read_attribute_list(src, con, break_on_chars) ⇒ Object

#read_em(src, delim) ⇒ Object

#read_email_el(src, con) ⇒ Object

#read_emstrong(src, delim) ⇒ Object

#read_footnote_ref(src, con) ⇒ Object

#read_image(src, con) ⇒ Object

#read_inline_code(src, con) ⇒ Object

#read_inline_html(src, con) ⇒ Object

#read_link(src, con) ⇒ Object

#read_quoted(src, con) ⇒ Object

#read_quoted_or_unquoted(src, con, escaped, exit_on_chars) ⇒ Object

#read_ref_id(src, con) ⇒ Object

#read_simple(src, escaped, exit_on_chars, exit_on_strings = nil) ⇒ Object

#read_span(src, escaped, exit_on_chars, exit_on_strings = nil) ⇒ Object

#read_strong(src, delim) ⇒ Object

#read_url(src, break_on) ⇒ Object

#read_url_el(src, con) ⇒ Object

#read_xml_instr_span(src, con) ⇒ Object

#unit_tests_for_attribute_lists ⇒ Object