Class: Newstile::Parser::Newstile

Inherits:

Base

Object
Base
Newstile::Parser::Newstile

show all

Includes:: Newstile, Html::Parser

Defined in:: lib/newstile/parser/newstile.rb,
lib/newstile/parser/newstile/eob.rb,
lib/newstile/parser/newstile/html.rb,
lib/newstile/parser/newstile/link.rb,
lib/newstile/parser/newstile/list.rb,
lib/newstile/parser/newstile/math.rb,
lib/newstile/parser/newstile/table.rb,
lib/newstile/parser/newstile/header.rb,
lib/newstile/parser/newstile/autolink.rb,
lib/newstile/parser/newstile/codespan.rb,
lib/newstile/parser/newstile/emphasis.rb,
lib/newstile/parser/newstile/footnote.rb,
lib/newstile/parser/newstile/codeblock.rb,
lib/newstile/parser/newstile/extension.rb,
lib/newstile/parser/newstile/paragraph.rb,
lib/newstile/parser/newstile/blank_line.rb,
lib/newstile/parser/newstile/blockquote.rb,
lib/newstile/parser/newstile/line_break.rb,
lib/newstile/parser/newstile/html_entity.rb,
lib/newstile/parser/newstile/abbreviation.rb,
lib/newstile/parser/newstile/smart_quotes.rb,
lib/newstile/parser/newstile/escaped_chars.rb,
lib/newstile/parser/newstile/attribute_list.rb,
lib/newstile/parser/newstile/block_boundary.rb,
lib/newstile/parser/newstile/horizontal_rule.rb,
lib/newstile/parser/newstile/typographic_symbol.rb

Overview

Used for parsing a document in newstile format.

If you want to extend the functionality of the parser, you need to the following:

Create a new subclass
add the needed parser methods
modify the @block_parsers and @span_parsers variables and add the names of your parser methods

Here is a small example for an extended parser class that parses ERB style tags as raw text if they are used as span level elements (an equivalent block level parser should probably also be made to handle the block case):

require 'newstile/parser/newstile'

class Newstile::Parser::ERBNewstile < Kramdown::Parser::Kramdown

   def initialize(doc)
     super(doc)
     @span_parsers.unshift(:erb_tags)
   end

   ERB_TAGS_START = /<%.*?%>/

   def parse_erb_tags
     @src.pos += @src.matched_size
     @tree.children << Element.new(:raw, @src.matched)
   end
   define_parser(:erb_tags, ERB_TAGS_START, '<%')

end

The new parser can be used like this:

require 'newstile/document'
# require the file with the above parser class

Newstile::Document.new(input_text, :input => 'ERBNewstile').to_html

Defined Under Namespace

Classes: Data

Constant Summary collapse

EOB_MARKER =

/^\^\s*?\n/

HTML_BLOCK_START =

/^#{OPT_SPACE}<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--|\/)/

HTML_SPAN_START =

/<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--|\/)/

PUNCTUATION_CHARS =

"_.:,;!?-"

LINK_ID_CHARS =

/[a-zA-Z0-9 #{PUNCTUATION_CHARS}]/

LINK_ID_NON_CHARS =

/[^a-zA-Z0-9 #{PUNCTUATION_CHARS}]/

LINK_DEFINITION_START =

/^#{OPT_SPACE}\[(#{LINK_ID_CHARS}+)\]:[ \t]*(?:<(.*?)>|([^'"\n]*?\S[^'"\n]*?))[ \t]*?(?:\n?[ \t]*?(["'])(.+?)\4[ \t]*?)?\n/

LINK_TEXT_BRACKET_RE =

/\\\[|\\\]|\[|\]/

LINK_INLINE_ID_RE =

/\s*?\[(#{LINK_ID_CHARS}+)?\]/

LINK_INLINE_TITLE_RE =

/\s*?(["'])(.+?)\1\s*?\)/

LINK_START =

/!?\[(?=[^^])/

NEWSTILE_LINK_START =

/!?([^\\]?)\"([^\"]+)\"\:([\S]+[\w\/])/

LIST_ITEM_IAL =

/^\s*(#{IAL_SPAN_START})?\s*\n/

LIST_START_UL =

/^(#{OPT_SPACE}[+*])([\t| ].*?\n)/

LIST_START_OL =

/^(#{OPT_SPACE}\d+\.)([\t| ].*?\n)/

LIST_START =

/#{LIST_START_UL}|#{LIST_START_OL}/

DEFINITION_LIST_START =

/^(#{OPT_SPACE}:)([\t| ].*?\n)/

BLOCK_MATH_START =

/^#{OPT_SPACE}(\\)?\$\$(.*?)\$\$\s*?\n/m

INLINE_MATH_START =

/\$\$(.*?)\$\$/

TABLE_SEP_LINE =

/^([+|: -]*?-[+|: -]*?)[ \t]*\n/

TABLE_HSEP_ALIGN =

/[ ]?(:?)-+(:?)[ ]?/

TABLE_FSEP_LINE =

/^[+|: =]*?=[+|: =]*?[ \t]*\n/

TABLE_ROW_LINE =

/^(.*?)[ \t]*\n/

TABLE_LINE =

/(?:\||.*?[^\\\n]\|).*?\n/

TABLE_START =

/^#{OPT_SPACE}(?=\S)#{TABLE_LINE}/

HEADER_ID =

/(?:[ \t]\{#(\w[\w-]*)\})?/

SETEXT_HEADER_START =

/^(#{OPT_SPACE}[^ \t].*?)#{HEADER_ID}[ \t]*?\n(-|=)+\s*?\n/

ATX_HEADER_START =

/^\#{1,6}/

ATX_HEADER_MATCH =

/^(\#{1,6})(.+?)\s*?#*#{HEADER_ID}\s*?\n/

NEWSTILE_HEADER_START =

/^!{1,6} /

NEWSTILE_HEADER_MATCH =

/^(!{1,6}) +(.+?)\s*?\!*#{HEADER_ID}\s*?\n/

ACHARS =

'[[:alnum:]]'

AUTOLINK_START_STR =

"<((mailto|https?|ftps?):.+?|[-.#{ACHARS}]+@[-#{ACHARS}]+(\.[-#{ACHARS}]+)*\.[a-z]+)>"

AUTOLINK_START =

/#{AUTOLINK_START_STR}/

CODESPAN_DELIMITER =

/`+/

EMPHASIS_START =

/(?:\*\*?|__?)/

FOOTNOTE_DEFINITION_START =

/^#{OPT_SPACE}\[\^(#{ALD_ID_NAME})\]:\s*?(.*?\n#{CODEBLOCK_MATCH})/

FOOTNOTE_MARKER_START =

/\[\^(#{ALD_ID_NAME})\]/

CODEBLOCK_START =

INDENT

CODEBLOCK_MATCH =

/(?:#{BLANK_LINE}?(?:#{INDENT}[ \t]*\S.*\n)+(?:(?!#{BLANK_LINE} {0,3}\S|#{IAL_BLOCK_START}|#{EOB_MARKER}|^#{OPT_SPACE}#{LAZY_END_HTML_STOP}|^#{OPT_SPACE}#{LAZY_END_HTML_START})^[ \t]*\S.*\n)*)*/

FENCED_CODEBLOCK_START =

/^~{3,}/

FENCED_CODEBLOCK_MATCH =

/^(~{3,})\s*?\n(.*?)^\1~*\s*?\n/m

EXT_STOP_STR =

"\\{:/(%s)?\\}"

EXT_START_STR =

"\\{::(\\w+)(?:\\s(#{ALD_ANY_CHARS}*?)|)(\\/)?\\}"

EXT_SPAN_START =

/#{EXT_START_STR}|#{EXT_STOP_STR % ALD_ID_NAME}/

EXT_BLOCK_START =

/^#{OPT_SPACE}(?:#{EXT_START_STR}|#{EXT_STOP_STR % ALD_ID_NAME})\s*?\n/

EXT_BLOCK_STOP_STR =

"^#{OPT_SPACE}#{EXT_STOP_STR}\s*?\n"

LAZY_END_HTML_SPAN_ELEMENTS =

HTML_SPAN_ELEMENTS + %w{script}

LAZY_END_HTML_START =

/<(?>(?!(?:#{LAZY_END_HTML_SPAN_ELEMENTS.join('|')})\b)#{REXML::Parsers::BaseParser::UNAME_STR})\s*(?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}\s*=\s*(["']).*?\1)*\s*\/?>/m

LAZY_END_HTML_STOP =

/<\/(?!(?:#{LAZY_END_HTML_SPAN_ELEMENTS.join('|')})\b)#{REXML::Parsers::BaseParser::UNAME_STR}\s*>/m

PARAGRAPH_START =

/^#{OPT_SPACE}[^ \t].*?\n/

PARAGRAPH_MATCH =

/(?:^.*\n)+?(?=#{BLANK_LINE}|#{IAL_BLOCK_START}|#{EOB_MARKER}|#{DEFINITION_LIST_START}|^#{OPT_SPACE}#{LAZY_END_HTML_STOP}|^#{OPT_SPACE}#{LAZY_END_HTML_START}|\Z)/

BLANK_LINE =

/(?:^\s*\n)+/

BLOCKQUOTE_START =

/^#{OPT_SPACE}> ?/

BLOCKQUOTE_MATCH =

/(^.*\n)+?(?=#{BLANK_LINE}|#{IAL_BLOCK_START}|#{EOB_MARKER}|^#{OPT_SPACE}#{LAZY_END_HTML_STOP}|^#{OPT_SPACE}#{LAZY_END_HTML_START}|\Z)/

SUMMARY_START =

/^#{OPT_SPACE}\/\/\. ?/

SUMMARY_MATCH =

/(^.*\n)+?(?=#{BLANK_LINE}|#{IAL_BLOCK_START}|#{EOB_MARKER}|^#{OPT_SPACE}#{LAZY_END_HTML_STOP}|^#{OPT_SPACE}#{LAZY_END_HTML_START}|\Z)/

LINE_BREAK =

/(  |\\\\)(?=\n)/

ABBREV_DEFINITION_START =

/^#{OPT_SPACE}\*\[(.+?)\]:(.*?)\n/

SQ_PUNCT =

'[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'

SQ_CLOSE =

%![^\ \\\\\t\r\n\\[{(-]!

SQ_RULES =

[
 [/("|')(?=#{SQ_PUNCT}\B)/, [:rquote1]],
 # Special case for double sets of quotes, e.g.:
 #   <p>He said, "'Quoted' words in a larger quote."</p>
 [/(\s?)"'(?=\w)/, [1, :ldquo, :lsquo]],
 [/(\s?)'"(?=\w)/, [1, :lsquo, :ldquo]],
 # Special case for decade abbreviations (the '80s):
 [/(\s?)'(?=\d\ds)/, [1, :rsquo]],

 # Get most opening single/double quotes:
 [/(\s)('|")(?=\w)/, [1, :lquote2]],
 # Single/double closing quotes:
 [/(#{SQ_CLOSE})('|")/, [1, :rquote2]],
 # Special case for e.g. "<i>Custer</i>'s Last Stand."
 [/("|')(\s|s\b|$)/, [:rquote1, 2]],
 # Any remaining single quotes should be opening ones:
 [/(.?)'/m, [1, :lsquo]],
 [/(.?)"/m, [1, :ldquo]],
]

SQ_SUBSTS = ‘“

{
  [:rquote1, '"'] => :rdquo,
  [:rquote1, "'"] => :rsquo,
  [:rquote2, '"'] => :rdquo,
  [:rquote2, "'"] => :rsquo,
  [:lquote1, '"'] => :ldquo,
  [:lquote1, "'"] => :lsquo,
  [:lquote2, '"'] => :ldquo,
  [:lquote2, "'"] => :lsquo,
}

SMART_QUOTES_RE =

/[^\\]?["']/

ESCAPED_CHARS =

/\\([\\.*_+`()\[\]{}#!:|"'\$=-])/

ALD_ID_CHARS =

/[\w-]/

ALD_ANY_CHARS =

/\\\}|[^\}]/

ALD_ID_NAME =

/\w#{ALD_ID_CHARS}*/

ALD_TYPE_KEY_VALUE_PAIR =

/(#{ALD_ID_NAME})=("|')((?:\\\}|\\\2|[^\}\2])*?)\2/

ALD_TYPE_CLASS_NAME =

/\.(#{ALD_ID_NAME})/

ALD_TYPE_ID_NAME =

/#(\w[\w:-]*)/

ALD_TYPE_REF =

/(#{ALD_ID_NAME})/

ALD_TYPE_ANY =

/(?:\A|\s)(?:#{ALD_TYPE_KEY_VALUE_PAIR}|#{ALD_TYPE_ID_NAME}|#{ALD_TYPE_CLASS_NAME}|#{ALD_TYPE_REF})(?=\s|\Z)/

ALD_START =

/^#{OPT_SPACE}\{:(#{ALD_ID_NAME}):(#{ALD_ANY_CHARS}+)\}\s*?\n/

IAL_BLOCK =

/\{:(?!:|\/)(#{ALD_ANY_CHARS}+)\}\s*?\n/

IAL_BLOCK_START =

/^#{OPT_SPACE}#{IAL_BLOCK}/

IAL_SPAN_START =

/\{:(#{ALD_ANY_CHARS}+)\}/

BLOCK_BOUNDARY =

/#{BLANK_LINE}|#{EOB_MARKER}|#{IAL_BLOCK_START}|\Z/

HR_START =

/^#{OPT_SPACE}(\*|-|_)[ \t]*\1[ \t]*\1[ \t]*(\1|[ \t])*\n/

TYPOGRAPHIC_SYMS =

[['---', :mdash], ['--', :ndash], 
['-. ', :qdash_space], # ['- ', :qdash_space],  
['...', :hellip],
['\\<<', '&lt;&lt;'], ['\\>>', '&gt;&gt;'],
['<< ', :laquo_space], [' >>', :raquo_space],
['<<', :laquo], ['>>', :raquo]]

TYPOGRAPHIC_SYMS_SUBST =

TYPOGRAPHIC_SYMS_RE =

/#{TYPOGRAPHIC_SYMS.map {|k,v| Regexp.escape(k)}.join('|')}/

Constants included from Html::Parser

Html::Parser::HTML_RAW_START

Constants included from Html::Constants

Constants included from Newstile

VERSION

Instance Attribute Summary collapse

#doc ⇒ Object readonly

Returns the value of attribute doc.
#options ⇒ Object readonly

Returns the value of attribute options.
#tree ⇒ Object readonly

Returns the value of attribute tree.

Instance Method Summary collapse

#add_link(el, href, title, alt_text = nil) ⇒ Object

This helper methods adds the approriate attributes to the element el of type a or img and the element itself to the @tree.
#after_block_boundary? ⇒ Boolean

Return true if we are after a block boundary.
#before_block_boundary? ⇒ Boolean

Return true if we are before a block boundary.
#handle_extension(name, opts, body, type) ⇒ Object
#handle_newstile_html_tag(el, closed) ⇒ Object
#html_parse_type(val) ⇒ Object

Return the HTML parse type defined by the string val, i.e.
#initialize(doc) ⇒ Newstile constructor

Create a new Newstile parser object for the Newstile::Document doc.
#parse(source) ⇒ Object

The source string provided on initialization is parsed and the created tree is returned.
#parse_abbrev_definition ⇒ Object

Parse the link definition at the current location.
#parse_ald ⇒ Object

Parse the attribute list definition at the current location.
#parse_attribute_list(str, opts) ⇒ Object

Parse the string str and extract all attributes and add all found attributes to the hash opts.
#parse_atx_header ⇒ Object

Parse the Atx header at the current location.
#parse_autolink ⇒ Object

Parse the autolink at the current location.
#parse_blank_line ⇒ Object

Parse the blank line at the current postition.
#parse_block_extension ⇒ Object

Parse the extension block at the current location.
#parse_block_html ⇒ Object

Parse the HTML at the current position as block level HTML.
#parse_block_ial ⇒ Object

Parse the inline attribute list at the current location.
#parse_block_math ⇒ Object

Parse the math block at the current location.
#parse_blockquote ⇒ Object

Parse the blockquote at the current location.
#parse_codeblock ⇒ Object

Parse the indented codeblock at the current location.
#parse_codeblock_fenced ⇒ Object

Parse the fenced codeblock at the current location.
#parse_codespan ⇒ Object

Parse the codespan at the current scanner location.
#parse_definition_list ⇒ Object

Parse the ordered or unordered list at the current location.
#parse_emphasis ⇒ Object

Parse the emphasis at the current location.
#parse_eob_marker ⇒ Object

Parse the EOB marker at the current location.
#parse_escaped_chars ⇒ Object

Parse the backslash-escaped character at the current location.
#parse_extension_start_tag(type) ⇒ Object
#parse_first_list_line(indentation, content) ⇒ Object

Used for parsing the first line of a list item or a definition, i.e.
#parse_footnote_definition ⇒ Object

Parse the foot note definition at the current location.
#parse_footnote_marker ⇒ Object

Parse the footnote marker at the current location.
#parse_horizontal_rule ⇒ Object

Parse the horizontal rule at the current location.
#parse_html_entity ⇒ Object

Parse the HTML entity at the current location.
#parse_inline_math ⇒ Object

Parse the inline math at the current location.
#parse_line_break ⇒ Object

Parse the line break at the current location.
#parse_link ⇒ Object

Parse the link at the current scanner position.
#parse_link_definition ⇒ Object

Parse the link definition at the current location.
#parse_list ⇒ Object

Parse the ordered or unordered list at the current location.
#parse_newstile_header ⇒ Object

Parse the newstile header at the current location.
#parse_newstile_link ⇒ Object

Parse the link definition at the current location.
#parse_paragraph ⇒ Object

Parse the paragraph at the current location.
#parse_setext_header ⇒ Object

Parse the Setext header at the current location.
#parse_smart_quotes ⇒ Object

Parse the smart quotes at current location.
#parse_span_extension ⇒ Object

Parse the extension span at the current location.
#parse_span_html ⇒ Object

Parse the HTML at the current position as span level HTML.
#parse_span_ial ⇒ Object

Parse the inline attribute list at the current location.
#parse_summary ⇒ Object

Parse the blockquote at the current location.
#parse_table ⇒ Object

Parse the table at the current location.
#parse_typographic_syms ⇒ Object

Parse the typographic symbols at the current location.
#replace_abbreviations(el, regexps = nil) ⇒ Object

Replace the abbreviation text with elements.
#update_ial_with_ial(ial, opts) ⇒ Object

Update the ial with the information from the inline attribute list opts.

Constructor Details

#initialize(doc) ⇒ `Newstile`

Create a new Newstile parser object for the Newstile::Document doc.

# File 'lib/newstile/parser/newstile.rb', line 81

def initialize(doc)
  super(doc)

  @src = nil
  @tree = nil
  @stack = []
  @text_type = :raw_text
  @block_ial = nil

  @doc.parse_infos[:ald] = {}
  @doc.parse_infos[:link_defs] = {}
  @doc.parse_infos[:abbrev_defs] = {}
  @doc.parse_infos[:footnotes] = {}

  @block_parsers = [:blank_line, :codeblock, :codeblock_fenced, :blockquote, :summary, :table, :atx_header,
                    :setext_header, 
                    :newstile_header, 
                    :horizontal_rule, :list, :definition_list, :link_definition, :block_html,
                    :footnote_definition, :abbrev_definition, :ald, :block_math,
                    :block_extension, :block_ial, :eob_marker, :paragraph]
  @span_parsers =  [ 
                    :emphasis, :codespan, :autolink, :span_html, :footnote_marker, :link, 
                    :newstile_link, 
                    :smart_quotes, 
                    :inline_math,
                    :span_extension, :span_ial, :html_entity, :typographic_syms, :line_break, :escaped_chars]

end

Instance Attribute Details

#doc ⇒ `Object` (readonly)

Returns the value of attribute doc.



77
78
79

# File 'lib/newstile/parser/newstile.rb', line 77

def doc
  @doc
end

#options ⇒ `Object` (readonly)

Returns the value of attribute options.



78
79
80

# File 'lib/newstile/parser/newstile.rb', line 78

def options
  @options
end

#tree ⇒ `Object` (readonly)

Returns the value of attribute tree.



76
77
78

# File 'lib/newstile/parser/newstile.rb', line 76

def tree
  @tree
end

Instance Method Details

#add_link(el, href, title, alt_text = nil) ⇒ `Object`

This helper methods adds the approriate attributes to the element el of type a or img and the element itself to the @tree.

# File 'lib/newstile/parser/newstile/link.rb', line 46

def add_link(el, href, title, alt_text = nil)
  if el.type == :a
    el.attr['href'] = href
  else
    el.attr['src'] = href
    el.attr['alt'] = alt_text
    el.children.clear
  end
  el.attr['title'] = title if title
  @tree.children << el
end

#after_block_boundary? ⇒ `Boolean`

Return true if we are after a block boundary.

Returns:

(Boolean)

# File 'lib/newstile/parser/newstile/block_boundary.rb', line 34

def after_block_boundary?
  !@tree.children.last || @tree.children.last.type == :blank ||
    (@tree.children.last.type == :eob && @tree.children.last.value.nil?) || @block_ial
end

#before_block_boundary? ⇒ `Boolean`

Return true if we are before a block boundary.

Returns:

(Boolean)



40
41
42

# File 'lib/newstile/parser/newstile/block_boundary.rb', line 40

def before_block_boundary?
  @src.check(BLOCK_BOUNDARY)
end

#handle_extension(name, opts, body, type) ⇒ `Object`

# File 'lib/newstile/parser/newstile/extension.rb', line 67

def handle_extension(name, opts, body, type)
  case name
  when 'comment'
    @tree.children << Element.new(:comment, body, nil, :category => type) if body.kind_of?(String)
    true
  when 'nomarkdown'
    @tree.children << Element.new(:raw, body, nil, :category => type, :type => opts['type'].to_s.split(/\s+/)) if body.kind_of?(String)
    true
  when 'options'
    opts.select do |k,v|
      k = k.to_sym
      if Newstile::Options.defined?(k)
        @doc.options[k] = Newstile::Options.parse(k, v) rescue @doc.options[k]
        false
      else
        true
      end
    end.each do |k,v|
      warning("Unknown newstile option '#{k}'")
    end
    @tree.children << Element.new(:eob, :extension) if type == :block
    true
  else
    false
  end
end

#handle_newstile_html_tag(el, closed) ⇒ `Object`

# File 'lib/newstile/parser/newstile/html.rb', line 31

def handle_newstile_html_tag(el, closed)
  parse_type = if @tree.type != :html_element || @tree.options[:parse_type] != :raw
                 (@doc.options[:parse_block_html] ? HTML_PARSE_AS[el.value] : :raw)
               else
                 :raw
               end
  if val = html_parse_type(el.attr.delete('markdown'))
    parse_type = (val == :default ? HTML_PARSE_AS[el.value] : val)
  end

  @src.scan(/[ \t]*\n/) if parse_type == :block
  el.options[:parse_type] = parse_type

  if !closed
    if parse_type == :block
      end_tag_found = parse_blocks(el)
      if !end_tag_found
        warning("Found no end tag for '#{el.value}' - auto-closing it")
      end
    elsif parse_type == :span
      curpos = @src.pos
      if result = @src.scan_until(/(?=<\/#{el.value}\s*>)/m)
        add_text(extract_string(curpos...@src.pos, @src), el)
        @src.scan(HTML_TAG_CLOSE_RE)
      else
        add_text(@src.scan(/.*/m), el)
        warning("Found no end tag for '#{el.value}' - auto-closing it")
      end
    else
      parse_raw_html(el, &method(:handle_newstile_html_tag))
    end
    @src.scan(/[ \t]*\n/) unless (@tree.type == :html_element && @tree.options[:parse_type] == :raw)
  end
end

#html_parse_type(val) ⇒ `Object`

Return the HTML parse type defined by the string val, i.e. raw when “0”, default parsing (return value nil) when “1”, span parsing when “span” and block parsing when “block”. If val is nil, then the default parsing mode is used.

# File 'lib/newstile/parser/newstile/html.rb', line 69

def html_parse_type(val)
  case val
  when "0" then :raw
  when "1" then :default
  when "span" then :span
  when "block" then :block
  when NilClass then nil
  else
    warning("Invalid markdown attribute val '#{val}', using default")
    nil
  end
end

#parse(source) ⇒ `Object`

The source string provided on initialization is parsed and the created tree is returned.

# File 'lib/newstile/parser/newstile.rb', line 113

def parse(source)
  configure_parser
  tree = Element.new(:root)
  parse_blocks(tree, adapt_source(source))
  update_tree(tree)
  replace_abbreviations(tree)
  @doc.parse_infos[:footnotes].each do |name, data|
    update_tree(data[:content])
  end
  tree
end

#parse_abbrev_definition ⇒ `Object`

Parse the link definition at the current location.

# File 'lib/newstile/parser/newstile/abbreviation.rb', line 30

def parse_abbrev_definition
  @src.pos += @src.matched_size
  abbrev_id, abbrev_text = @src[1], @src[2].strip
  warning("Duplicate abbreviation ID '#{abbrev_id}' - overwriting") if @doc.parse_infos[:abbrev_defs][abbrev_id]
  @doc.parse_infos[:abbrev_defs][abbrev_id] = abbrev_text
  @tree.children << Element.new(:eob, :abbrev_def)
  true
end

#parse_ald ⇒ `Object`

Parse the attribute list definition at the current location.

# File 'lib/newstile/parser/newstile/attribute_list.rb', line 67

def parse_ald
  @src.pos += @src.matched_size
  parse_attribute_list(@src[2], @doc.parse_infos[:ald][@src[1]] ||= Utils::OrderedHash.new)
  @tree.children << Element.new(:eob, :ald)
  true
end

#parse_attribute_list(str, opts) ⇒ `Object`

Parse the string str and extract all attributes and add all found attributes to the hash opts.

# File 'lib/newstile/parser/newstile/attribute_list.rb', line 29

def parse_attribute_list(str, opts)
  str.scan(ALD_TYPE_ANY).each do |key, sep, val, id_attr, class_attr, ref|
    if ref
      (opts[:refs] ||= []) << ref
    elsif class_attr
      opts['class'] = ((opts['class'] || '') + " #{class_attr}").lstrip
    elsif id_attr
      opts['id'] = id_attr
    else
      opts[key] = val.gsub(/\\(\}|#{sep})/, "\\1")
    end
  end
end

#parse_atx_header ⇒ `Object`

Parse the Atx header at the current location.

# File 'lib/newstile/parser/newstile/header.rb', line 51

def parse_atx_header
  return false if !after_block_boundary?

  result = @src.scan(ATX_HEADER_MATCH)
  level, text, id = @src[1], @src[2].strip, @src[3]
  el = new_block_el(:header, nil, nil, :level => level.length, :raw_text => text)
  add_text(text, el)
  el.attr['id'] = id if id
  @tree.children << el
  true
end

#parse_autolink ⇒ `Object`

Parse the autolink at the current location.

# File 'lib/newstile/parser/newstile/autolink.rb', line 42

def parse_autolink
  @src.pos += @src.matched_size
  href = @src[1]
  href= "mailto:#{href}" if @src[2].nil?
  el = Element.new(:a, nil, {'href' => href})
  add_text(@src[1].sub(/^mailto:/, ''), el)
  @tree.children << el
end

#parse_blank_line ⇒ `Object`

Parse the blank line at the current postition.

# File 'lib/newstile/parser/newstile/blank_line.rb', line 30

def parse_blank_line
  @src.pos += @src.matched_size
  if @tree.children.last && @tree.children.last.type == :blank
    @tree.children.last.value += @src.matched
  else
    @tree.children << new_block_el(:blank, @src.matched)
  end
  true
end

#parse_block_extension ⇒ `Object`

Parse the extension block at the current location.



102
103
104

# File 'lib/newstile/parser/newstile/extension.rb', line 102

def parse_block_extension
  parse_extension_start_tag(:block)
end

#parse_block_html ⇒ `Object`

Parse the HTML at the current position as block level HTML.

# File 'lib/newstile/parser/newstile/html.rb', line 86

def parse_block_html
  if result = @src.scan(HTML_COMMENT_RE)
    @tree.children << Element.new(:xml_comment, result, nil, :category => :block)
    @src.scan(/[ \t]*\n/)
    true
  elsif result = @src.scan(HTML_INSTRUCTION_RE)
    @tree.children << Element.new(:xml_pi, result, nil, :category => :block)
    @src.scan(/[ \t]*\n/)
    true
  else
    if result = @src.check(/^#{OPT_SPACE}#{HTML_TAG_RE}/) && !HTML_SPAN_ELEMENTS.include?(@src[1])
      @src.pos += @src.matched_size
      handle_html_start_tag(&method(:handle_newstile_html_tag))
      Newstile::Parser::Html::ElementConverter.new(@doc).process(@tree.children.last) if @doc.options[:html_to_native]
      true
    elsif result = @src.check(/^#{OPT_SPACE}#{HTML_TAG_CLOSE_RE}/) && !HTML_SPAN_ELEMENTS.include?(@src[1])
      name = @src[1]

      if @tree.type == :html_element && @tree.value == name
        @src.pos += @src.matched_size
        throw :stop_block_parsing, :found
      else
        false
      end
    else
      false
    end
  end
end

#parse_block_ial ⇒ `Object`

Parse the inline attribute list at the current location.

# File 'lib/newstile/parser/newstile/attribute_list.rb', line 80

def parse_block_ial
  @src.pos += @src.matched_size
  if @tree.children.last && @tree.children.last.type != :blank && @tree.children.last.type != :eob
    parse_attribute_list(@src[1], @tree.children.last.options[:ial] ||= Utils::OrderedHash.new)
    @tree.children << Element.new(:eob, :ial) unless @src.check(IAL_BLOCK_START)
  else
    parse_attribute_list(@src[1], @block_ial = Utils::OrderedHash.new)
  end
  true
end

#parse_block_math ⇒ `Object`

Parse the math block at the current location.

# File 'lib/newstile/parser/newstile/math.rb', line 32

def parse_block_math
  if !after_block_boundary?
    return false
  elsif @src[1]
    @src.scan(/^#{OPT_SPACE}\\/)
    return false
  end
  orig_pos = @src.pos
  @src.pos += @src.matched_size
  data = @src[2]
  if before_block_boundary?
    @tree.children << new_block_el(:math, data)
    true
  else
    @src.pos = orig_pos
    false
  end
end

#parse_blockquote ⇒ `Object`

Parse the blockquote at the current location.

# File 'lib/newstile/parser/newstile/blockquote.rb', line 35

def parse_blockquote
  el = new_block_el(:blockquote)
  @tree.children << el
  parse_blocks(el, @src.scan(BLOCKQUOTE_MATCH).gsub!(BLOCKQUOTE_START, ''))
  true
end

#parse_codeblock ⇒ `Object`

Parse the indented codeblock at the current location.

# File 'lib/newstile/parser/newstile/codeblock.rb', line 36

def parse_codeblock
  @tree.children << new_block_el(:codeblock, @src.scan(CODEBLOCK_MATCH).gsub(/\n( {0,3}\S)/, ' \\1').gsub!(INDENT, ''))
  true
end

#parse_codeblock_fenced ⇒ `Object`

Parse the fenced codeblock at the current location.

# File 'lib/newstile/parser/newstile/codeblock.rb', line 47

def parse_codeblock_fenced
  if @src.check(FENCED_CODEBLOCK_MATCH)
    @src.pos += @src.matched_size
    @tree.children << new_block_el(:codeblock, @src[2])
    true
  else
    false
  end
end

#parse_codespan ⇒ `Object`

Parse the codespan at the current scanner location.

# File 'lib/newstile/parser/newstile/codespan.rb', line 30

def parse_codespan
  result = @src.scan(CODESPAN_DELIMITER)
  simple = (result.length == 1)
  reset_pos = @src.pos

  if simple && @src.pre_match =~ /\s\Z/ && @src.match?(/\s/)
    add_text(result)
    return
  end

  text = @src.scan_until(/#{result}/)
  if text
    text.sub!(/#{result}\Z/, '')
    if !simple
      text = text[1..-1] if text[0..0] == ' '
      text = text[0..-2] if text[-1..-1] == ' '
    end
    @tree.children << Element.new(:codespan, text)
  else
    @src.pos = reset_pos
    add_text(result)
  end
end

#parse_definition_list ⇒ `Object`

Parse the ordered or unordered list at the current location.

# File 'lib/newstile/parser/newstile/list.rb', line 147

def parse_definition_list
  children = @tree.children
  if !children.last || (children.length == 1 && children.last.type != :p ) ||
      (children.length >= 2 && children[-1].type != :p && (children[-1].type != :blank || children[-1].value != "\n" || children[-2].type != :p))
    return false
  end

  first_as_para = false
  deflist = new_block_el(:dl)
  para = @tree.children.pop
  if para.type == :blank
    para = @tree.children.pop
    first_as_para = true
  end
  para.children.first.value.split("\n").each do |term|
    el = Element.new(:dt)
    el.children << Element.new(:raw_text, term)
    deflist.children << el
  end

  item = nil
  content_re, lazy_re, indent_re = nil
  def_start_re = DEFINITION_LIST_START
  last_is_blank = false
  while !@src.eos?
    if @src.scan(def_start_re)
      item = Element.new(:dd)
      item.options[:first_as_para] = first_as_para
      item.value, indentation, content_re, lazy_re, indent_re = parse_first_list_line(@src[1].length, @src[2])
      deflist.children << item

      item.value.sub!(/^#{IAL_SPAN_START}\s*/) do |match|
        parse_attribute_list($~[1], item.options[:ial] ||= {})
        ''
      end

      def_start_re = /^( {0,#{[3, indentation - 1].min}}:)([\t| ].*?\n)/
      first_as_para = false
      last_is_blank = false
    elsif @src.check(EOB_MARKER)
      break
    elsif (result = @src.scan(content_re)) || (!last_is_blank && (result = @src.scan(lazy_re)))
      result.sub!(/^(\t+)/) { " "*4*($1 ? $1.length : 0) }
      result.sub!(indent_re, '')
      item.value << result
      first_as_para = false
      last_is_blank = false
    elsif result = @src.scan(BLANK_LINE)
      first_as_para = true
      item.value << result
      last_is_blank = true
    else
      break
    end
  end

  last = nil
  deflist.children.each do |it|
    next if it.type == :dt

    parse_blocks(it, it.value)
    it.value = nil
    next if it.children.size == 0

    if it.children.last.type == :blank
      last = it.children.pop
    else
      last = nil
    end
    if it.children.first.type == :p && !it.options.delete(:first_as_para)
      it.children.first.children.first.value += "\n" if it.children.size > 1
      it.children.first.options[:transparent] = true
    end
  end

  if @tree.children.length >= 1 && @tree.children.last.type == :dl
    @tree.children[-1].children += deflist.children
  elsif @tree.children.length >= 2 && @tree.children[-1].type == :blank && @tree.children[-2].type == :dl
    @tree.children.pop
    @tree.children[-1].children += deflist.children
  else
    @tree.children << deflist
  end

  @tree.children << last if !last.nil?

  true
end

#parse_emphasis ⇒ `Object`

Parse the emphasis at the current location.

# File 'lib/newstile/parser/newstile/emphasis.rb', line 30

def parse_emphasis
  result = @src.scan(EMPHASIS_START)
  element = (result.length == 2 ? :strong : :em)
  type = (result =~ /_/ ? '_' : '*')
  reset_pos = @src.pos

  if (type == '_' && @src.pre_match =~ /[[:alpha:]]\z/ && @src.check(/[[:alpha:]]/)) || @src.check(/\s/) ||
      @tree.type == element || @stack.any? {|el, _| el.type == element}
    add_text(result)
    return
  end

  sub_parse = lambda do |delim, elem|
    el = Element.new(elem)
    stop_re = /#{Regexp.escape(delim)}/
    found = parse_spans(el, stop_re) do
      (@src.pre_match[-1, 1] !~ /\s/) &&
        (elem != :em || !@src.match?(/#{Regexp.escape(delim*2)}(?!#{Regexp.escape(delim)})/)) &&
        (type != '_' || !@src.match?(/#{Regexp.escape(delim)}[[:alpha:]]/)) && el.children.size > 0
    end
    [found, el, stop_re]
  end

  found, el, stop_re = sub_parse.call(result, element)
  if !found && element == :strong && @tree.type != :em
    @src.pos = reset_pos - 1
    found, el, stop_re = sub_parse.call(type, :em)
  end
  if found
    @src.scan(stop_re)
    @tree.children << el
  else
    @src.pos = reset_pos
    add_text(result)
  end
end

#parse_eob_marker ⇒ `Object`

Parse the EOB marker at the current location.

# File 'lib/newstile/parser/newstile/eob.rb', line 30

def parse_eob_marker
  @src.pos += @src.matched_size
  @tree.children << new_block_el(:eob)
  true
end

#parse_escaped_chars ⇒ `Object`

Parse the backslash-escaped character at the current location.

# File 'lib/newstile/parser/newstile/escaped_chars.rb', line 30

def parse_escaped_chars
  @src.pos += @src.matched_size
  add_text(@src[1])
end

#parse_extension_start_tag(type) ⇒ `Object`

# File 'lib/newstile/parser/newstile/extension.rb', line 29

def parse_extension_start_tag(type)
  orig_pos = @src.pos
  @src.pos += @src.matched_size

  error_block = lambda do |msg|
    warning(msg)
    @src.pos = orig_pos
    add_text(@src.scan(/./)) if type == :span
    false
  end

  if @src[4] || @src.matched == '{:/}'
    name = (@src[4] ? "for '#{@src[4]}' " : '')
    return error_block.call("Invalid extension stop tag #{name}found - ignoring it")
  end

  ext = @src[1]
  opts = {}
  body = nil
  parse_attribute_list(@src[2] || '', opts)

  if !@src[3]
    stop_re = (type == :block ? /#{EXT_BLOCK_STOP_STR % ext}/ : /#{EXT_STOP_STR % ext}/)
    if result = @src.scan_until(stop_re)
      body = result.sub!(stop_re, '')
      body.chomp! if type == :block
    else
      return error_block.call("No stop tag for extension '#{ext}' found - ignoring it")
    end
  end

  if !handle_extension(ext, opts, body, type)
    error_block.call("Invalid extension with name '#{ext}' specified - ignoring it")
  else
    true
  end
end

#parse_first_list_line(indentation, content) ⇒ `Object`

Used for parsing the first line of a list item or a definition, i.e. the line with list item marker or the definition marker.

# File 'lib/newstile/parser/newstile/list.rb', line 36

def parse_first_list_line(indentation, content)
  if content =~ LIST_ITEM_IAL
    indentation = 4
  else
    while content =~ /^ *\t/
      temp = content.scan(/^ */).first.length + indentation
      content.sub!(/^( *)(\t+)/) {$1 + " "*(4 - (temp % 4)) + " "*($2.length - 1)*4}
    end
    indentation += content.scan(/^ */).first.length
  end
  content.sub!(/^\s*/, '')

  indent_re = /^ {#{indentation}}/
  content_re = /^(?:(?:\t| {4}){#{indentation / 4}} {#{indentation % 4}}|(?:\t| {4}){#{indentation / 4 + 1}}).*\S.*\n/
  lazy_re = /(?!^ {0,#{[indentation, 3].min}}(?:#{IAL_BLOCK}|#{LAZY_END_HTML_STOP}|#{LAZY_END_HTML_START})).*\S.*\n/
  [content, indentation, content_re, lazy_re, indent_re]
end

#parse_footnote_definition ⇒ `Object`

Parse the foot note definition at the current location.

# File 'lib/newstile/parser/newstile/footnote.rb', line 34

def parse_footnote_definition
  @src.pos += @src.matched_size

  el = Element.new(:footnote_def)
  parse_blocks(el, @src[2].gsub(INDENT, ''))
  warning("Duplicate footnote name '#{@src[1]}' - overwriting") if @doc.parse_infos[:footnotes][@src[1]]
  (@doc.parse_infos[:footnotes][@src[1]] = {})[:content] = el
  @tree.children << Element.new(:eob, :footnote_def)
  true
end

#parse_footnote_marker ⇒ `Object`

Parse the footnote marker at the current location.

# File 'lib/newstile/parser/newstile/footnote.rb', line 50

def parse_footnote_marker
  @src.pos += @src.matched_size
  fn_def = @doc.parse_infos[:footnotes][@src[1]]
  if fn_def
    valid = fn_def[:marker] && fn_def[:marker].options[:stack][0..-2].zip(fn_def[:marker].options[:stack][1..-1]).all? do |par, child|
      par.children.include?(child)
    end
    if !fn_def[:marker] || !valid
      fn_def[:marker] = Element.new(:footnote, nil, nil, :name => @src[1])
      fn_def[:marker].options[:stack] = [@stack.map {|s| s.first}, @tree, fn_def[:marker]].flatten.compact
      @tree.children << fn_def[:marker]
    else
      warning("Footnote marker '#{@src[1]}' already appeared in document, ignoring newly found marker")
      add_text(@src.matched)
    end
  else
    warning("Footnote definition for '#{@src[1]}' not found")
    add_text(@src.matched)
  end
end

#parse_horizontal_rule ⇒ `Object`

Parse the horizontal rule at the current location.

# File 'lib/newstile/parser/newstile/horizontal_rule.rb', line 30

def parse_horizontal_rule
  @src.pos += @src.matched_size
  @tree.children << new_block_el(:hr)
  true
end

#parse_html_entity ⇒ `Object`

Parse the HTML entity at the current location.

# File 'lib/newstile/parser/newstile/html_entity.rb', line 30

def parse_html_entity
  @src.pos += @src.matched_size
  @tree.children << Element.new(:entity, ::Newstile::Utils::Entities.entity(@src[1] || (@src[2] && @src[2].to_i) || @src[3].hex),
                                nil, :original => @src.matched)
end

#parse_inline_math ⇒ `Object`

Parse the inline math at the current location.

# File 'lib/newstile/parser/newstile/math.rb', line 56

def parse_inline_math
  @src.pos += @src.matched_size
  @tree.children << Element.new(:math, @src[1], nil, :category => :span)
end

#parse_line_break ⇒ `Object`

Parse the line break at the current location.

# File 'lib/newstile/parser/newstile/line_break.rb', line 30

def parse_line_break
  @src.pos += @src.matched_size
  @tree.children << Element.new(:br)
end

#parse_link ⇒ `Object`

Parse the link at the current scanner position. This method is used to parse normal links as well as image links.

# File 'lib/newstile/parser/newstile/link.rb', line 65

def parse_link
  result = @src.scan(LINK_START)
  reset_pos = @src.pos

  link_type = (result =~ /^!/ ? :img : :a)

  # no nested links allowed
  if link_type == :a && (@tree.type == :img || @tree.type == :a || @stack.any? {|t,s| t && (t.type == :img || t.type == :a)})
    add_text(result)
    return
  end
  el = Element.new(link_type)

  stop_re = /\]|!?\[/
  count = 1
  found = parse_spans(el, stop_re) do
    case @src.matched
    when "[", "!["
      count += 1
    when "]"
      count -= 1
    end
    count - el.children.select {|c| c.type == :img}.size == 0
  end
  if !found || (link_type == :a && el.children.empty?)
    @src.pos = reset_pos
    add_text(result)
    return
  end
  alt_text = extract_string(reset_pos...@src.pos, @src)
  conv_link_id = alt_text.gsub(/(\s|\n)+/m, ' ').gsub(LINK_ID_NON_CHARS, '').downcase
  @src.scan(stop_re)

  # reference style link or no link url
  if @src.scan(LINK_INLINE_ID_RE) || !@src.check(/\(/)
    link_id = (@src[1] || conv_link_id).downcase
    if link_id.empty?
      @src.pos = reset_pos
      add_text(result)
    elsif @doc.parse_infos[:link_defs].has_key?(link_id)
      add_link(el, @doc.parse_infos[:link_defs][link_id].first, @doc.parse_infos[:link_defs][link_id].last, alt_text)
    else
      warning("No link definition for link ID '#{link_id}' found")
      @src.pos = reset_pos
      add_text(result)
    end
    return
  end

  # link url in parentheses
  if @src.scan(/\(<(.*?)>/)
    link_url = @src[1]
    if @src.scan(/\)/)
      add_link(el, link_url, nil, alt_text)
      return
    end
  else
    link_url = ''
    re = /\(|\)|\s(?=['"])/
    nr_of_brackets = 0
    while temp = @src.scan_until(re)
      link_url += temp
      case @src.matched
      when /\s/
        break
      when '('
        nr_of_brackets += 1
      when ')'
        nr_of_brackets -= 1
        break if nr_of_brackets == 0
      end
    end
    link_url = link_url[1..-2].strip

    if nr_of_brackets == 0
      add_link(el, link_url, nil, alt_text)
      return
    end
  end

  if @src.scan(LINK_INLINE_TITLE_RE)
    add_link(el, link_url, @src[2], alt_text)
  else
    @src.pos = reset_pos
    add_text(result)
  end
end

#parse_link_definition ⇒ `Object`

Parse the link definition at the current location.

# File 'lib/newstile/parser/newstile/link.rb', line 33

def parse_link_definition
  @src.pos += @src.matched_size
  link_id, link_url, link_title = @src[1].downcase, @src[2] || @src[3], @src[5]
  warning("Duplicate link ID '#{link_id}' - overwriting") if @doc.parse_infos[:link_defs][link_id]
  @doc.parse_infos[:link_defs][link_id] = [link_url, link_title]
  @tree.children << Element.new(:eob, :link_def)
  true
end

#parse_list ⇒ `Object`

Parse the ordered or unordered list at the current location.

# File 'lib/newstile/parser/newstile/list.rb', line 60

def parse_list
  type, list_start_re = (@src.check(LIST_START_UL) ? [:ul, LIST_START_UL] : [:ol, LIST_START_OL])
  list = new_block_el(type)

  item = nil
  content_re, lazy_re, indent_re = nil
  eob_found = false
  nested_list_found = false
  last_is_blank = false
  while !@src.eos?
    if last_is_blank && @src.check(HR_START)
      break
    elsif @src.scan(EOB_MARKER)
      eob_found = true
      break
    elsif @src.scan(list_start_re)
      item = Element.new(:li)
      item.value, indentation, content_re, lazy_re, indent_re = parse_first_list_line(@src[1].length, @src[2])
      list.children << item

      item.value.sub!(/^#{IAL_SPAN_START}\s*/) do |match|
        parse_attribute_list($~[1], item.options[:ial] ||= {})
        ''
      end

      list_start_re = (type == :ul ? /^( {0,#{[3, indentation - 1].min}}[+*-])([\t| ].*?\n)/ :
                       /^( {0,#{[3, indentation - 1].min}}\d+\.)([\t| ].*?\n)/)
      nested_list_found = (item.value =~ LIST_START)
      last_is_blank = false
    elsif (result = @src.scan(content_re)) || (!last_is_blank && (result = @src.scan(lazy_re)))
      result.sub!(/^(\t+)/) { " "*4*($1 ? $1.length : 0) }
      result.sub!(indent_re, '')
      if !nested_list_found && result =~ LIST_START
        item.value << "^\n"
        nested_list_found = true
      end
      item.value << result
      last_is_blank = false
    elsif result = @src.scan(BLANK_LINE)
      nested_list_found = true
      last_is_blank = true
      item.value << result
    else
      break
    end
  end

  @tree.children << list

  last = nil
  list.children.each do |it|
    temp = Element.new(:temp)
    parse_blocks(temp, it.value)
    it.children = temp.children
    it.value = nil
    next if it.children.size == 0

    # Handle the case where an EOB marker is inserted by a block IAL for the first paragraph
    it.children.delete_at(1) if it.children.first.type == :p &&
      it.children.length >= 2 && it.children[1].type == :eob && it.children.first.options[:ial]

    if it.children.first.type == :p &&
        (it.children.length < 2 || it.children[1].type != :blank ||
         (it == list.children.last && it.children.length == 2 && !eob_found)) &&
        (list.children.last != it || list.children.size == 1 ||
         list.children[0..-2].any? {|cit| cit.children.first.type != :p || cit.children.first.options[:transparent]})
      it.children.first.children.first.value += "\n" if it.children.size > 1 && it.children[1].type != :blank
      it.children.first.options[:transparent] = true
    end

    if it.children.last.type == :blank
      last = it.children.pop
    else
      last = nil
    end
  end

  @tree.children << last if !last.nil? && !eob_found

  true
end

#parse_newstile_header ⇒ `Object`

Parse the newstile header at the current location.

# File 'lib/newstile/parser/newstile/header.rb', line 69

def parse_newstile_header
  return false if !after_block_boundary?

  result = @src.scan(NEWSTILE_HEADER_MATCH)
  level, text, id = @src[1], @src[2], @src[3]
  el = new_block_el(:header, nil, nil, :level => level.length, :raw_text => text)
  add_text(text, el)
  el.attr['id'] = id if id
  @tree.children << el
  true
end

#parse_newstile_link ⇒ `Object`

Parse the link definition at the current location.

# File 'lib/newstile/parser/newstile/link.rb', line 157

def parse_newstile_link
  link_type = (@src.string[@src.pos] == '!'[0] ? :img : :a)
  @src.pos += @src.matched_size
  add_text @src[1] if @src[1]
  case link_type
  when :a then
    text, href = @src[2], @src[3]
    el = Element.new(:a, nil, {'href' => href})
    el.children = [ Element.new(:text, text) ]
    @tree.children << el
  when :img then
    alt, src = @src[2], @src[3]
    el = Element.new(:img, nil, {'src' => src, 'alt' => alt})
    @tree.children << el
  end
  true
end

#parse_paragraph ⇒ `Object`

Parse the paragraph at the current location.

# File 'lib/newstile/parser/newstile/paragraph.rb', line 41

def parse_paragraph
  result = @src.scan(PARAGRAPH_MATCH)
  if @tree.children.last && @tree.children.last.type == :p
    @tree.children.last.children.first.value << "\n" << result.chomp
  else
    @tree.children << new_block_el(:p)
    @tree.children.last.children << Element.new(@text_type, result.lstrip.chomp)
  end
  true
end

#parse_setext_header ⇒ `Object`

Parse the Setext header at the current location.

# File 'lib/newstile/parser/newstile/header.rb', line 33

def parse_setext_header
  return false if !after_block_boundary?

  @src.pos += @src.matched_size
  text, id, level = @src[1].strip, @src[2], @src[3]
  el = new_block_el(:header, nil, nil, :level => (level == '-' ? 2 : 1), :raw_text => text)
  add_text(text, el)
  el.attr['id'] = id if id
  @tree.children << el
  true
end

#parse_smart_quotes ⇒ `Object`

Parse the smart quotes at current location.

# File 'lib/newstile/parser/newstile/smart_quotes.rb', line 199

def parse_smart_quotes
  regexp, substs = SQ_RULES.find {|reg, subst| @src.scan(reg)}
  substs.each do |subst|
    if subst.kind_of?(Integer)
      add_text(@src[subst].to_s)
    else
      val = SQ_SUBSTS[[subst, @src[subst.to_s[-1,1].to_i]]] || subst
      @tree.children << Element.new(:smart_quote, val)
    end
  end
end

#parse_span_extension ⇒ `Object`

Parse the extension span at the current location.



109
110
111

# File 'lib/newstile/parser/newstile/extension.rb', line 109

def parse_span_extension
  parse_extension_start_tag(:span)
end

#parse_span_html ⇒ `Object`

Parse the HTML at the current position as span level HTML.

# File 'lib/newstile/parser/newstile/html.rb', line 121

def parse_span_html
  if result = @src.scan(HTML_COMMENT_RE)
    @tree.children << Element.new(:xml_comment, result, nil, :category => :span)
  elsif result = @src.scan(HTML_INSTRUCTION_RE)
    @tree.children << Element.new(:xml_pi, result, nil, :category => :span)
  elsif result = @src.scan(HTML_TAG_CLOSE_RE)
    warning("Found invalidly used HTML closing tag for '#{@src[1]}'")
    add_text(result)
  elsif result = @src.scan(HTML_TAG_RE)
    if HTML_BLOCK_ELEMENTS.include?(@src[1])
      warning("Found block HTML tag '#{@src[1]}' in span level text")
      add_text(result)
      return
    end

    reset_pos = @src.pos
    attrs = Utils::OrderedHash.new
    @src[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val.gsub(/\n+/, ' ')}

    do_parsing = (HTML_PARSE_AS_RAW.include?(@src[1]) || @tree.options[:parse_type] == :raw ? false : @doc.options[:parse_span_html])
    if val = html_parse_type(attrs.delete('markdown'))
      if val == :block
        warning("Cannot use block level parsing in span level HTML tag - using default mode")
      elsif val == :span
        do_parsing = true
      elsif val == :default
        do_parsing = !HTML_PARSE_AS_RAW.include?(@src[1])
      elsif val == :raw
        do_parsing = false
      end
    end

    el = Element.new(:html_element, @src[1], attrs, :category => :span, :parse_type => (do_parsing ? :span : :raw))
    @tree.children << el
    stop_re = /<\/#{Regexp.escape(@src[1])}\s*>/
    if !@src[4] && HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
      warning("The HTML tag '#{el.value}' cannot have any content - auto-closing it")
    elsif !@src[4]
      if parse_spans(el, stop_re, (do_parsing ? nil : [:span_html]))
        @src.scan(stop_re)
      else
        warning("Found no end tag for '#{el.value}' - auto-closing it")
        add_text(@src.scan(/.*/m), el)
      end
    end
    Newstile::Parser::Html::ElementConverter.new(@doc).process(el) if @doc.options[:html_to_native]
  else
    add_text(@src.scan(/./))
  end
end

#parse_span_ial ⇒ `Object`

Parse the inline attribute list at the current location.

# File 'lib/newstile/parser/newstile/attribute_list.rb', line 96

def parse_span_ial
  @src.pos += @src.matched_size
  if @tree.children.last && @tree.children.last.type != :text
    attr = Utils::OrderedHash.new
    parse_attribute_list(@src[1], attr)
    update_ial_with_ial(@tree.children.last.options[:ial] ||= Utils::OrderedHash.new, attr)
    update_attr_with_ial(@tree.children.last.attr, attr)
  else
    warning("Ignoring span IAL because preceding element is just text")
  end
end

#parse_summary ⇒ `Object`

Parse the blockquote at the current location.

# File 'lib/newstile/parser/newstile/blockquote.rb', line 48

def parse_summary
  result = @src.scan(SUMMARY_MATCH).gsub!(SUMMARY_START, '')
  if @tree.children.last && @tree.children.last.type == :summary
    @tree.children.last.children.first.value << "\n" << result.chomp
  else
    @tree.children << new_block_el(:summary)
    @tree.children.last.children << Element.new(@text_type, result.lstrip.chomp)
  end
  true
end

#parse_table ⇒ `Object`

Parse the table at the current location.

# File 'lib/newstile/parser/newstile/table.rb', line 37

def parse_table
  return false if !after_block_boundary?

  orig_pos = @src.pos
  table = new_block_el(:table, nil, nil, :alignment => [])
  leading_pipe = (@src.check(TABLE_LINE) =~ /^\s*\|/)
  @src.scan(TABLE_SEP_LINE)

  rows = []
  has_footer = false
  columns = 0

  add_container = lambda do |type, force|
    if force || type != :tbody || !has_footer
      cont = Element.new(type)
      cont.children, rows = rows, []
      table.children << cont
    end
  end

  while !@src.eos?
    break if !@src.check(TABLE_LINE)
    if @src.scan(TABLE_SEP_LINE) && !rows.empty?
      if table.options[:alignment].empty? && !has_footer
        add_container.call(:thead, false)
        table.options[:alignment] = @src[1].scan(TABLE_HSEP_ALIGN).map do |left, right|
          (left.empty? && right.empty? && :default) || (right.empty? && :left) || (left.empty? && :right) || :center
        end
      else # treat as normal separator line
        add_container.call(:tbody, false)
      end
    elsif @src.scan(TABLE_FSEP_LINE)
      add_container.call(:tbody, true) if !rows.empty?
      has_footer = true
    elsif @src.scan(TABLE_ROW_LINE)
      trow = Element.new(:tr)
      cells = (@src[1] + ' ').split(/\|/)
      i = 0
      while i < cells.length - 1
        backslashes = cells[i].scan(/\\+$/).first
        if backslashes && backslashes.length % 2 == 1
          cells[i] = cells[i].chop + '|' + cells[i+1]
          cells.delete_at(i+1)
        else
          i += 1
        end
      end
      cells.shift if leading_pipe && cells.first.strip.empty?
      cells.pop if cells.last.strip.empty?
      cells.each do |cell_text|
        tcell = Element.new(:td)
        tcell.children << Element.new(:raw_text, cell_text.strip)
        trow.children << tcell
      end
      columns = [columns, cells.length].max
      rows << trow
    else
      break
    end
  end

  if !before_block_boundary?
    @src.pos = orig_pos
    return false
  end

  add_container.call(has_footer ? :tfoot : :tbody, false) if !rows.empty?

  if !table.children.any? {|c| c.type == :tbody}
    warning("Found table without body - ignoring it")
    @src.pos = orig_pos
    return false
  end

  # adjust all table rows to have equal number of columns, same for alignment defs
  table.children.each do |kind|
    kind.children.each do |row|
      (columns - row.children.length).times do
        row.children << Element.new(:td)
      end
      row.children.each {|el| el.type = :th} if kind.type == :thead
    end
  end
  if table.options[:alignment].length > columns
    table.options[:alignment] = table.options[:alignment][0...columns]
  else
    table.options[:alignment] += [:default] * (columns - table.options[:alignment].length)
  end

  @tree.children << table

  true
end

#parse_typographic_syms ⇒ `Object`

Parse the typographic symbols at the current location.

# File 'lib/newstile/parser/newstile/typographic_symbol.rb', line 37

def parse_typographic_syms
  @src.pos += @src.matched_size
  val = TYPOGRAPHIC_SYMS_SUBST[@src.matched]
  if val.kind_of?(Symbol)
    @tree.children << Element.new(:typographic_sym, val)
  elsif @src.matched == '\\<<'
    @tree.children << Element.new(:entity, ::Newstile::Utils::Entities.entity('lt'))
    @tree.children << Element.new(:entity, ::Newstile::Utils::Entities.entity('lt'))
  else
    @tree.children << Element.new(:entity, ::Newstile::Utils::Entities.entity('gt'))
    @tree.children << Element.new(:entity, ::Newstile::Utils::Entities.entity('gt'))
  end
end

#replace_abbreviations(el, regexps = nil) ⇒ `Object`

Replace the abbreviation text with elements.

# File 'lib/newstile/parser/newstile/abbreviation.rb', line 41

def replace_abbreviations(el, regexps = nil)
  return if @doc.parse_infos[:abbrev_defs].empty?
  if !regexps
    regexps = [Regexp.union(*@doc.parse_infos[:abbrev_defs].keys.map {|k| /#{Regexp.escape(k)}/})]
    regexps << /(?=(?:\W|^)#{regexps.first}(?!\w))/ # regexp should only match on word boundaries
  end
  el.children.map! do |child|
    if child.type == :text
      result = []
      strscan = StringScanner.new(child.value)
      while temp = strscan.scan_until(regexps.last)
        temp += strscan.scan(/\W|^/)
        abbr = strscan.scan(regexps.first)
        result += [Element.new(:text, temp), Element.new(:abbreviation, abbr)]
      end
      result + [Element.new(:text, extract_string(strscan.pos..-1, strscan))]
    else
      replace_abbreviations(child, regexps)
      child
    end
  end.flatten!
end

#update_ial_with_ial(ial, opts) ⇒ `Object`

Update the ial with the information from the inline attribute list opts.

# File 'lib/newstile/parser/newstile/attribute_list.rb', line 44

def update_ial_with_ial(ial, opts)
  (ial[:refs] ||= []) << opts[:refs]
  opts.each do |k,v|
    if k == 'class'
      ial[k] = ((ial[k] || '') + " #{v}").lstrip
    elsif k.kind_of?(String)
      ial[k] = v
    end
  end
end

Class: Newstile::Parser::Newstile

Overview

Defined Under Namespace

Constant Summary collapse

Constants included from Html::Parser

Constants included from Html::Constants

Constants included from Newstile

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Html::Parser

Methods included from Newstile

Methods inherited from Base

Constructor Details

#initialize(doc) ⇒ Newstile

Instance Attribute Details

#doc ⇒ Object (readonly)

#options ⇒ Object (readonly)

#tree ⇒ Object (readonly)

Instance Method Details

#add_link(el, href, title, alt_text = nil) ⇒ Object

#after_block_boundary? ⇒ Boolean

#before_block_boundary? ⇒ Boolean

#handle_extension(name, opts, body, type) ⇒ Object

#handle_newstile_html_tag(el, closed) ⇒ Object

#html_parse_type(val) ⇒ Object

#parse(source) ⇒ Object

#parse_abbrev_definition ⇒ Object

#parse_ald ⇒ Object

#parse_attribute_list(str, opts) ⇒ Object

#parse_atx_header ⇒ Object

#parse_autolink ⇒ Object

#parse_blank_line ⇒ Object

#parse_block_extension ⇒ Object

#parse_block_html ⇒ Object

#parse_block_ial ⇒ Object

#parse_block_math ⇒ Object

#parse_blockquote ⇒ Object

#parse_codeblock ⇒ Object

#parse_codeblock_fenced ⇒ Object

#parse_codespan ⇒ Object

#parse_definition_list ⇒ Object

#parse_emphasis ⇒ Object

#parse_eob_marker ⇒ Object

#parse_escaped_chars ⇒ Object

#parse_extension_start_tag(type) ⇒ Object

#parse_first_list_line(indentation, content) ⇒ Object

#parse_footnote_definition ⇒ Object

#parse_footnote_marker ⇒ Object

#parse_horizontal_rule ⇒ Object

#parse_html_entity ⇒ Object

#parse_inline_math ⇒ Object

#parse_line_break ⇒ Object

#parse_link ⇒ Object

#parse_link_definition ⇒ Object

#parse_list ⇒ Object

#parse_newstile_header ⇒ Object

#parse_newstile_link ⇒ Object

#parse_paragraph ⇒ Object

#parse_setext_header ⇒ Object

#parse_smart_quotes ⇒ Object

#parse_span_extension ⇒ Object

#parse_span_html ⇒ Object

#parse_span_ial ⇒ Object

#parse_summary ⇒ Object

#parse_table ⇒ Object

#parse_typographic_syms ⇒ Object

#replace_abbreviations(el, regexps = nil) ⇒ Object

#update_ial_with_ial(ial, opts) ⇒ Object

#initialize(doc) ⇒ `Newstile`

#doc ⇒ `Object` (readonly)

#options ⇒ `Object` (readonly)

#tree ⇒ `Object` (readonly)

#add_link(el, href, title, alt_text = nil) ⇒ `Object`

#after_block_boundary? ⇒ `Boolean`

#before_block_boundary? ⇒ `Boolean`

#handle_extension(name, opts, body, type) ⇒ `Object`

#handle_newstile_html_tag(el, closed) ⇒ `Object`

#html_parse_type(val) ⇒ `Object`

#parse(source) ⇒ `Object`

#parse_abbrev_definition ⇒ `Object`

#parse_ald ⇒ `Object`

#parse_attribute_list(str, opts) ⇒ `Object`

#parse_atx_header ⇒ `Object`

#parse_autolink ⇒ `Object`

#parse_blank_line ⇒ `Object`

#parse_block_extension ⇒ `Object`

#parse_block_html ⇒ `Object`

#parse_block_ial ⇒ `Object`

#parse_block_math ⇒ `Object`

#parse_blockquote ⇒ `Object`

#parse_codeblock ⇒ `Object`

#parse_codeblock_fenced ⇒ `Object`

#parse_codespan ⇒ `Object`

#parse_definition_list ⇒ `Object`

#parse_emphasis ⇒ `Object`

#parse_eob_marker ⇒ `Object`

#parse_escaped_chars ⇒ `Object`

#parse_extension_start_tag(type) ⇒ `Object`

#parse_first_list_line(indentation, content) ⇒ `Object`

#parse_footnote_definition ⇒ `Object`

#parse_footnote_marker ⇒ `Object`

#parse_horizontal_rule ⇒ `Object`

#parse_html_entity ⇒ `Object`

#parse_inline_math ⇒ `Object`

#parse_line_break ⇒ `Object`

#parse_link ⇒ `Object`

#parse_link_definition ⇒ `Object`

#parse_list ⇒ `Object`

#parse_newstile_header ⇒ `Object`

#parse_newstile_link ⇒ `Object`

#parse_paragraph ⇒ `Object`

#parse_setext_header ⇒ `Object`

#parse_smart_quotes ⇒ `Object`

#parse_span_extension ⇒ `Object`

#parse_span_html ⇒ `Object`

#parse_span_ial ⇒ `Object`

#parse_summary ⇒ `Object`

#parse_table ⇒ `Object`

#parse_typographic_syms ⇒ `Object`

#replace_abbreviations(el, regexps = nil) ⇒ `Object`

#update_ial_with_ial(ial, opts) ⇒ `Object`