Class: Newstile::Parser::Newstile

Inherits:
Base
  • Object
show all
Includes:
Newstile, Html::Parser
Defined in:
lib/newstile/parser/newstile.rb,
lib/newstile/parser/newstile/eob.rb,
lib/newstile/parser/newstile/html.rb,
lib/newstile/parser/newstile/link.rb,
lib/newstile/parser/newstile/list.rb,
lib/newstile/parser/newstile/math.rb,
lib/newstile/parser/newstile/table.rb,
lib/newstile/parser/newstile/header.rb,
lib/newstile/parser/newstile/autolink.rb,
lib/newstile/parser/newstile/codespan.rb,
lib/newstile/parser/newstile/emphasis.rb,
lib/newstile/parser/newstile/footnote.rb,
lib/newstile/parser/newstile/codeblock.rb,
lib/newstile/parser/newstile/extension.rb,
lib/newstile/parser/newstile/paragraph.rb,
lib/newstile/parser/newstile/blank_line.rb,
lib/newstile/parser/newstile/blockquote.rb,
lib/newstile/parser/newstile/line_break.rb,
lib/newstile/parser/newstile/html_entity.rb,
lib/newstile/parser/newstile/abbreviation.rb,
lib/newstile/parser/newstile/smart_quotes.rb,
lib/newstile/parser/newstile/escaped_chars.rb,
lib/newstile/parser/newstile/attribute_list.rb,
lib/newstile/parser/newstile/block_boundary.rb,
lib/newstile/parser/newstile/horizontal_rule.rb,
lib/newstile/parser/newstile/typographic_symbol.rb

Overview

Used for parsing a document in newstile format.

If you want to extend the functionality of the parser, you need to the following:

  • Create a new subclass

  • add the needed parser methods

  • modify the @block_parsers and @span_parsers variables and add the names of your parser methods

Here is a small example for an extended parser class that parses ERB style tags as raw text if they are used as span level elements (an equivalent block level parser should probably also be made to handle the block case):

require 'newstile/parser/newstile'

class Newstile::Parser::ERBNewstile < Kramdown::Parser::Kramdown

   def initialize(doc)
     super(doc)
     @span_parsers.unshift(:erb_tags)
   end

   ERB_TAGS_START = /<%.*?%>/

   def parse_erb_tags
     @src.pos += @src.matched_size
     @tree.children << Element.new(:raw, @src.matched)
   end
   define_parser(:erb_tags, ERB_TAGS_START, '<%')

end

The new parser can be used like this:

require 'newstile/document'
# require the file with the above parser class

Newstile::Document.new(input_text, :input => 'ERBNewstile').to_html

Defined Under Namespace

Classes: Data

Constant Summary collapse

EOB_MARKER =
/^\^\s*?\n/
HTML_BLOCK_START =
/^#{OPT_SPACE}<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--|\/)/
HTML_SPAN_START =
/<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--|\/)/
PUNCTUATION_CHARS =
"_.:,;!?-"
/[a-zA-Z0-9 #{PUNCTUATION_CHARS}]/
/[^a-zA-Z0-9 #{PUNCTUATION_CHARS}]/
/^#{OPT_SPACE}\[(#{LINK_ID_CHARS}+)\]:[ \t]*(?:<(.*?)>|([^'"\n]*?\S[^'"\n]*?))[ \t]*?(?:\n?[ \t]*?(["'])(.+?)\4[ \t]*?)?\n/
/\\\[|\\\]|\[|\]/
/\s*?\[(#{LINK_ID_CHARS}+)?\]/
/\s*?(["'])(.+?)\1\s*?\)/
/!?\[(?=[^^])/
/!?([^\\]?)\"([^\"]+)\"\:([\S]+[\w\/])/
LIST_ITEM_IAL =
/^\s*(#{IAL_SPAN_START})?\s*\n/
LIST_START_UL =
/^(#{OPT_SPACE}[+*])([\t| ].*?\n)/
LIST_START_OL =
/^(#{OPT_SPACE}\d+\.)([\t| ].*?\n)/
LIST_START =
/#{LIST_START_UL}|#{LIST_START_OL}/
DEFINITION_LIST_START =
/^(#{OPT_SPACE}:)([\t| ].*?\n)/
BLOCK_MATH_START =
/^#{OPT_SPACE}(\\)?\$\$(.*?)\$\$\s*?\n/m
INLINE_MATH_START =
/\$\$(.*?)\$\$/
TABLE_SEP_LINE =
/^([+|: -]*?-[+|: -]*?)[ \t]*\n/
TABLE_HSEP_ALIGN =
/[ ]?(:?)-+(:?)[ ]?/
TABLE_FSEP_LINE =
/^[+|: =]*?=[+|: =]*?[ \t]*\n/
TABLE_ROW_LINE =
/^(.*?)[ \t]*\n/
TABLE_LINE =
/(?:\||.*?[^\\\n]\|).*?\n/
TABLE_START =
/^#{OPT_SPACE}(?=\S)#{TABLE_LINE}/
HEADER_ID =
/(?:[ \t]\{#(\w[\w-]*)\})?/
SETEXT_HEADER_START =
/^(#{OPT_SPACE}[^ \t].*?)#{HEADER_ID}[ \t]*?\n(-|=)+\s*?\n/
ATX_HEADER_START =
/^\#{1,6}/
ATX_HEADER_MATCH =
/^(\#{1,6})(.+?)\s*?#*#{HEADER_ID}\s*?\n/
NEWSTILE_HEADER_START =
/^!{1,6} /
NEWSTILE_HEADER_MATCH =
/^(!{1,6}) +(.+?)\s*?\!*#{HEADER_ID}\s*?\n/
ACHARS =
'[[:alnum:]]'
"<((mailto|https?|ftps?):.+?|[-.#{ACHARS}]+@[-#{ACHARS}]+(\.[-#{ACHARS}]+)*\.[a-z]+)>"
/#{AUTOLINK_START_STR}/
CODESPAN_DELIMITER =
/`+/
EMPHASIS_START =
/(?:\*\*?|__?)/
FOOTNOTE_DEFINITION_START =
/^#{OPT_SPACE}\[\^(#{ALD_ID_NAME})\]:\s*?(.*?\n#{CODEBLOCK_MATCH})/
FOOTNOTE_MARKER_START =
/\[\^(#{ALD_ID_NAME})\]/
CODEBLOCK_START =
INDENT
CODEBLOCK_MATCH =
/(?:#{BLANK_LINE}?(?:#{INDENT}[ \t]*\S.*\n)+(?:(?!#{BLANK_LINE} {0,3}\S|#{IAL_BLOCK_START}|#{EOB_MARKER}|^#{OPT_SPACE}#{LAZY_END_HTML_STOP}|^#{OPT_SPACE}#{LAZY_END_HTML_START})^[ \t]*\S.*\n)*)*/
FENCED_CODEBLOCK_START =
/^~{3,}/
FENCED_CODEBLOCK_MATCH =
/^(~{3,})\s*?\n(.*?)^\1~*\s*?\n/m
EXT_STOP_STR =
"\\{:/(%s)?\\}"
EXT_START_STR =
"\\{::(\\w+)(?:\\s(#{ALD_ANY_CHARS}*?)|)(\\/)?\\}"
EXT_SPAN_START =
/#{EXT_START_STR}|#{EXT_STOP_STR % ALD_ID_NAME}/
EXT_BLOCK_START =
/^#{OPT_SPACE}(?:#{EXT_START_STR}|#{EXT_STOP_STR % ALD_ID_NAME})\s*?\n/
EXT_BLOCK_STOP_STR =
"^#{OPT_SPACE}#{EXT_STOP_STR}\s*?\n"
LAZY_END_HTML_SPAN_ELEMENTS =
HTML_SPAN_ELEMENTS + %w{script}
LAZY_END_HTML_START =
/<(?>(?!(?:#{LAZY_END_HTML_SPAN_ELEMENTS.join('|')})\b)#{REXML::Parsers::BaseParser::UNAME_STR})\s*(?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}\s*=\s*(["']).*?\1)*\s*\/?>/m
LAZY_END_HTML_STOP =
/<\/(?!(?:#{LAZY_END_HTML_SPAN_ELEMENTS.join('|')})\b)#{REXML::Parsers::BaseParser::UNAME_STR}\s*>/m
PARAGRAPH_START =
/^#{OPT_SPACE}[^ \t].*?\n/
PARAGRAPH_MATCH =
/(?:^.*\n)+?(?=#{BLANK_LINE}|#{IAL_BLOCK_START}|#{EOB_MARKER}|#{DEFINITION_LIST_START}|^#{OPT_SPACE}#{LAZY_END_HTML_STOP}|^#{OPT_SPACE}#{LAZY_END_HTML_START}|\Z)/
BLANK_LINE =
/(?:^\s*\n)+/
BLOCKQUOTE_START =
/^#{OPT_SPACE}> ?/
BLOCKQUOTE_MATCH =
/(^.*\n)+?(?=#{BLANK_LINE}|#{IAL_BLOCK_START}|#{EOB_MARKER}|^#{OPT_SPACE}#{LAZY_END_HTML_STOP}|^#{OPT_SPACE}#{LAZY_END_HTML_START}|\Z)/
SUMMARY_START =
/^#{OPT_SPACE}\/\/\. ?/
SUMMARY_MATCH =
/(^.*\n)+?(?=#{BLANK_LINE}|#{IAL_BLOCK_START}|#{EOB_MARKER}|^#{OPT_SPACE}#{LAZY_END_HTML_STOP}|^#{OPT_SPACE}#{LAZY_END_HTML_START}|\Z)/
LINE_BREAK =
/(  |\\\\)(?=\n)/
ABBREV_DEFINITION_START =
/^#{OPT_SPACE}\*\[(.+?)\]:(.*?)\n/
SQ_PUNCT =
'[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'
SQ_CLOSE =
%![^\ \\\\\t\r\n\\[{(-]!
SQ_RULES =
[
 [/("|')(?=#{SQ_PUNCT}\B)/, [:rquote1]],
 # Special case for double sets of quotes, e.g.:
 #   <p>He said, "'Quoted' words in a larger quote."</p>
 [/(\s?)"'(?=\w)/, [1, :ldquo, :lsquo]],
 [/(\s?)'"(?=\w)/, [1, :lsquo, :ldquo]],
 # Special case for decade abbreviations (the '80s):
 [/(\s?)'(?=\d\ds)/, [1, :rsquo]],

 # Get most opening single/double quotes:
 [/(\s)('|")(?=\w)/, [1, :lquote2]],
 # Single/double closing quotes:
 [/(#{SQ_CLOSE})('|")/, [1, :rquote2]],
 # Special case for e.g. "<i>Custer</i>'s Last Stand."
 [/("|')(\s|s\b|$)/, [:rquote1, 2]],
 # Any remaining single quotes should be opening ones:
 [/(.?)'/m, [1, :lsquo]],
 [/(.?)"/m, [1, :ldquo]],
]
SQ_SUBSTS =

‘“

{
  [:rquote1, '"'] => :rdquo,
  [:rquote1, "'"] => :rsquo,
  [:rquote2, '"'] => :rdquo,
  [:rquote2, "'"] => :rsquo,
  [:lquote1, '"'] => :ldquo,
  [:lquote1, "'"] => :lsquo,
  [:lquote2, '"'] => :ldquo,
  [:lquote2, "'"] => :lsquo,
}
SMART_QUOTES_RE =
/[^\\]?["']/
ESCAPED_CHARS =
/\\([\\.*_+`()\[\]{}#!:|"'\$=-])/
ALD_ID_CHARS =
/[\w-]/
ALD_ANY_CHARS =
/\\\}|[^\}]/
ALD_ID_NAME =
/\w#{ALD_ID_CHARS}*/
ALD_TYPE_KEY_VALUE_PAIR =
/(#{ALD_ID_NAME})=("|')((?:\\\}|\\\2|[^\}\2])*?)\2/
ALD_TYPE_CLASS_NAME =
/\.(#{ALD_ID_NAME})/
ALD_TYPE_ID_NAME =
/#(\w[\w:-]*)/
ALD_TYPE_REF =
/(#{ALD_ID_NAME})/
ALD_TYPE_ANY =
/(?:\A|\s)(?:#{ALD_TYPE_KEY_VALUE_PAIR}|#{ALD_TYPE_ID_NAME}|#{ALD_TYPE_CLASS_NAME}|#{ALD_TYPE_REF})(?=\s|\Z)/
ALD_START =
/^#{OPT_SPACE}\{:(#{ALD_ID_NAME}):(#{ALD_ANY_CHARS}+)\}\s*?\n/
IAL_BLOCK =
/\{:(?!:|\/)(#{ALD_ANY_CHARS}+)\}\s*?\n/
IAL_BLOCK_START =
/^#{OPT_SPACE}#{IAL_BLOCK}/
IAL_SPAN_START =
/\{:(#{ALD_ANY_CHARS}+)\}/
BLOCK_BOUNDARY =
/#{BLANK_LINE}|#{EOB_MARKER}|#{IAL_BLOCK_START}|\Z/
HR_START =
/^#{OPT_SPACE}(\*|-|_)[ \t]*\1[ \t]*\1[ \t]*(\1|[ \t])*\n/
TYPOGRAPHIC_SYMS =
[['---', :mdash], ['--', :ndash], 
['-. ', :qdash_space], # ['- ', :qdash_space],  
['...', :hellip],
['\\<<', '&lt;&lt;'], ['\\>>', '&gt;&gt;'],
['<< ', :laquo_space], [' >>', :raquo_space],
['<<', :laquo], ['>>', :raquo]]
TYPOGRAPHIC_SYMS_SUBST =
TYPOGRAPHIC_SYMS_RE =
/#{TYPOGRAPHIC_SYMS.map {|k,v| Regexp.escape(k)}.join('|')}/

Constants included from Html::Parser

Html::Parser::HTML_RAW_START

Constants included from Html::Constants

Html::Constants::HTML_ATTRIBUTE_RE, Html::Constants::HTML_BLOCK_ELEMENTS, Html::Constants::HTML_COMMENT_RE, Html::Constants::HTML_DOCTYPE_RE, Html::Constants::HTML_ELEMENTS_WITHOUT_BODY, Html::Constants::HTML_ENTITY_RE, Html::Constants::HTML_INSTRUCTION_RE, Html::Constants::HTML_PARSE_AS, Html::Constants::HTML_PARSE_AS_BLOCK, Html::Constants::HTML_PARSE_AS_RAW, Html::Constants::HTML_PARSE_AS_SPAN, Html::Constants::HTML_SPAN_ELEMENTS, Html::Constants::HTML_TAG_CLOSE_RE, Html::Constants::HTML_TAG_RE

Constants included from Newstile

VERSION

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Html::Parser

#handle_html_script_tag, #handle_html_start_tag, #parse_raw_html

Methods included from Newstile

data_dir

Methods inherited from Base

#adapt_source, #add_text, #extract_string, parse, #warning

Constructor Details

#initialize(doc) ⇒ Newstile

Create a new Newstile parser object for the Newstile::Document doc.



81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/newstile/parser/newstile.rb', line 81

def initialize(doc)
  super(doc)

  @src = nil
  @tree = nil
  @stack = []
  @text_type = :raw_text
  @block_ial = nil

  @doc.parse_infos[:ald] = {}
  @doc.parse_infos[:link_defs] = {}
  @doc.parse_infos[:abbrev_defs] = {}
  @doc.parse_infos[:footnotes] = {}

  @block_parsers = [:blank_line, :codeblock, :codeblock_fenced, :blockquote, :summary, :table, :atx_header,
                    :setext_header, 
                    :newstile_header, 
                    :horizontal_rule, :list, :definition_list, :link_definition, :block_html,
                    :footnote_definition, :abbrev_definition, :ald, :block_math,
                    :block_extension, :block_ial, :eob_marker, :paragraph]
  @span_parsers =  [ 
                    :emphasis, :codespan, :autolink, :span_html, :footnote_marker, :link, 
                    :newstile_link, 
                    :smart_quotes, 
                    :inline_math,
                    :span_extension, :span_ial, :html_entity, :typographic_syms, :line_break, :escaped_chars]

end

Instance Attribute Details

#docObject (readonly)

Returns the value of attribute doc.



77
78
79
# File 'lib/newstile/parser/newstile.rb', line 77

def doc
  @doc
end

#optionsObject (readonly)

Returns the value of attribute options.



78
79
80
# File 'lib/newstile/parser/newstile.rb', line 78

def options
  @options
end

#treeObject (readonly)

Returns the value of attribute tree.



76
77
78
# File 'lib/newstile/parser/newstile.rb', line 76

def tree
  @tree
end

Instance Method Details

This helper methods adds the approriate attributes to the element el of type a or img and the element itself to the @tree.



46
47
48
49
50
51
52
53
54
55
56
# File 'lib/newstile/parser/newstile/link.rb', line 46

def add_link(el, href, title, alt_text = nil)
  if el.type == :a
    el.attr['href'] = href
  else
    el.attr['src'] = href
    el.attr['alt'] = alt_text
    el.children.clear
  end
  el.attr['title'] = title if title
  @tree.children << el
end

#after_block_boundary?Boolean

Return true if we are after a block boundary.

Returns:

  • (Boolean)


34
35
36
37
# File 'lib/newstile/parser/newstile/block_boundary.rb', line 34

def after_block_boundary?
  !@tree.children.last || @tree.children.last.type == :blank ||
    (@tree.children.last.type == :eob && @tree.children.last.value.nil?) || @block_ial
end

#before_block_boundary?Boolean

Return true if we are before a block boundary.

Returns:

  • (Boolean)


40
41
42
# File 'lib/newstile/parser/newstile/block_boundary.rb', line 40

def before_block_boundary?
  @src.check(BLOCK_BOUNDARY)
end

#handle_extension(name, opts, body, type) ⇒ Object



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/newstile/parser/newstile/extension.rb', line 67

def handle_extension(name, opts, body, type)
  case name
  when 'comment'
    @tree.children << Element.new(:comment, body, nil, :category => type) if body.kind_of?(String)
    true
  when 'nomarkdown'
    @tree.children << Element.new(:raw, body, nil, :category => type, :type => opts['type'].to_s.split(/\s+/)) if body.kind_of?(String)
    true
  when 'options'
    opts.select do |k,v|
      k = k.to_sym
      if Newstile::Options.defined?(k)
        @doc.options[k] = Newstile::Options.parse(k, v) rescue @doc.options[k]
        false
      else
        true
      end
    end.each do |k,v|
      warning("Unknown newstile option '#{k}'")
    end
    @tree.children << Element.new(:eob, :extension) if type == :block
    true
  else
    false
  end
end

#handle_newstile_html_tag(el, closed) ⇒ Object



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/newstile/parser/newstile/html.rb', line 31

def handle_newstile_html_tag(el, closed)
  parse_type = if @tree.type != :html_element || @tree.options[:parse_type] != :raw
                 (@doc.options[:parse_block_html] ? HTML_PARSE_AS[el.value] : :raw)
               else
                 :raw
               end
  if val = html_parse_type(el.attr.delete('markdown'))
    parse_type = (val == :default ? HTML_PARSE_AS[el.value] : val)
  end

  @src.scan(/[ \t]*\n/) if parse_type == :block
  el.options[:parse_type] = parse_type

  if !closed
    if parse_type == :block
      end_tag_found = parse_blocks(el)
      if !end_tag_found
        warning("Found no end tag for '#{el.value}' - auto-closing it")
      end
    elsif parse_type == :span
      curpos = @src.pos
      if result = @src.scan_until(/(?=<\/#{el.value}\s*>)/m)
        add_text(extract_string(curpos...@src.pos, @src), el)
        @src.scan(HTML_TAG_CLOSE_RE)
      else
        add_text(@src.scan(/.*/m), el)
        warning("Found no end tag for '#{el.value}' - auto-closing it")
      end
    else
      parse_raw_html(el, &method(:handle_newstile_html_tag))
    end
    @src.scan(/[ \t]*\n/) unless (@tree.type == :html_element && @tree.options[:parse_type] == :raw)
  end
end

#html_parse_type(val) ⇒ Object

Return the HTML parse type defined by the string val, i.e. raw when “0”, default parsing (return value nil) when “1”, span parsing when “span” and block parsing when “block”. If val is nil, then the default parsing mode is used.



69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/newstile/parser/newstile/html.rb', line 69

def html_parse_type(val)
  case val
  when "0" then :raw
  when "1" then :default
  when "span" then :span
  when "block" then :block
  when NilClass then nil
  else
    warning("Invalid markdown attribute val '#{val}', using default")
    nil
  end
end

#parse(source) ⇒ Object

The source string provided on initialization is parsed and the created tree is returned.



113
114
115
116
117
118
119
120
121
122
123
# File 'lib/newstile/parser/newstile.rb', line 113

def parse(source)
  configure_parser
  tree = Element.new(:root)
  parse_blocks(tree, adapt_source(source))
  update_tree(tree)
  replace_abbreviations(tree)
  @doc.parse_infos[:footnotes].each do |name, data|
    update_tree(data[:content])
  end
  tree
end

#parse_abbrev_definitionObject

Parse the link definition at the current location.



30
31
32
33
34
35
36
37
# File 'lib/newstile/parser/newstile/abbreviation.rb', line 30

def parse_abbrev_definition
  @src.pos += @src.matched_size
  abbrev_id, abbrev_text = @src[1], @src[2].strip
  warning("Duplicate abbreviation ID '#{abbrev_id}' - overwriting") if @doc.parse_infos[:abbrev_defs][abbrev_id]
  @doc.parse_infos[:abbrev_defs][abbrev_id] = abbrev_text
  @tree.children << Element.new(:eob, :abbrev_def)
  true
end

#parse_aldObject

Parse the attribute list definition at the current location.



67
68
69
70
71
72
# File 'lib/newstile/parser/newstile/attribute_list.rb', line 67

def parse_ald
  @src.pos += @src.matched_size
  parse_attribute_list(@src[2], @doc.parse_infos[:ald][@src[1]] ||= Utils::OrderedHash.new)
  @tree.children << Element.new(:eob, :ald)
  true
end

#parse_attribute_list(str, opts) ⇒ Object

Parse the string str and extract all attributes and add all found attributes to the hash opts.



29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/newstile/parser/newstile/attribute_list.rb', line 29

def parse_attribute_list(str, opts)
  str.scan(ALD_TYPE_ANY).each do |key, sep, val, id_attr, class_attr, ref|
    if ref
      (opts[:refs] ||= []) << ref
    elsif class_attr
      opts['class'] = ((opts['class'] || '') + " #{class_attr}").lstrip
    elsif id_attr
      opts['id'] = id_attr
    else
      opts[key] = val.gsub(/\\(\}|#{sep})/, "\\1")
    end
  end
end

#parse_atx_headerObject

Parse the Atx header at the current location.



51
52
53
54
55
56
57
58
59
60
61
# File 'lib/newstile/parser/newstile/header.rb', line 51

def parse_atx_header
  return false if !after_block_boundary?

  result = @src.scan(ATX_HEADER_MATCH)
  level, text, id = @src[1], @src[2].strip, @src[3]
  el = new_block_el(:header, nil, nil, :level => level.length, :raw_text => text)
  add_text(text, el)
  el.attr['id'] = id if id
  @tree.children << el
  true
end

Parse the autolink at the current location.



42
43
44
45
46
47
48
49
# File 'lib/newstile/parser/newstile/autolink.rb', line 42

def parse_autolink
  @src.pos += @src.matched_size
  href = @src[1]
  href= "mailto:#{href}" if @src[2].nil?
  el = Element.new(:a, nil, {'href' => href})
  add_text(@src[1].sub(/^mailto:/, ''), el)
  @tree.children << el
end

#parse_blank_lineObject

Parse the blank line at the current postition.



30
31
32
33
34
35
36
37
38
# File 'lib/newstile/parser/newstile/blank_line.rb', line 30

def parse_blank_line
  @src.pos += @src.matched_size
  if @tree.children.last && @tree.children.last.type == :blank
    @tree.children.last.value += @src.matched
  else
    @tree.children << new_block_el(:blank, @src.matched)
  end
  true
end

#parse_block_extensionObject

Parse the extension block at the current location.



102
103
104
# File 'lib/newstile/parser/newstile/extension.rb', line 102

def parse_block_extension
  parse_extension_start_tag(:block)
end

#parse_block_htmlObject

Parse the HTML at the current position as block level HTML.



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/newstile/parser/newstile/html.rb', line 86

def parse_block_html
  if result = @src.scan(HTML_COMMENT_RE)
    @tree.children << Element.new(:xml_comment, result, nil, :category => :block)
    @src.scan(/[ \t]*\n/)
    true
  elsif result = @src.scan(HTML_INSTRUCTION_RE)
    @tree.children << Element.new(:xml_pi, result, nil, :category => :block)
    @src.scan(/[ \t]*\n/)
    true
  else
    if result = @src.check(/^#{OPT_SPACE}#{HTML_TAG_RE}/) && !HTML_SPAN_ELEMENTS.include?(@src[1])
      @src.pos += @src.matched_size
      handle_html_start_tag(&method(:handle_newstile_html_tag))
      Newstile::Parser::Html::ElementConverter.new(@doc).process(@tree.children.last) if @doc.options[:html_to_native]
      true
    elsif result = @src.check(/^#{OPT_SPACE}#{HTML_TAG_CLOSE_RE}/) && !HTML_SPAN_ELEMENTS.include?(@src[1])
      name = @src[1]

      if @tree.type == :html_element && @tree.value == name
        @src.pos += @src.matched_size
        throw :stop_block_parsing, :found
      else
        false
      end
    else
      false
    end
  end
end

#parse_block_ialObject

Parse the inline attribute list at the current location.



80
81
82
83
84
85
86
87
88
89
# File 'lib/newstile/parser/newstile/attribute_list.rb', line 80

def parse_block_ial
  @src.pos += @src.matched_size
  if @tree.children.last && @tree.children.last.type != :blank && @tree.children.last.type != :eob
    parse_attribute_list(@src[1], @tree.children.last.options[:ial] ||= Utils::OrderedHash.new)
    @tree.children << Element.new(:eob, :ial) unless @src.check(IAL_BLOCK_START)
  else
    parse_attribute_list(@src[1], @block_ial = Utils::OrderedHash.new)
  end
  true
end

#parse_block_mathObject

Parse the math block at the current location.



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/newstile/parser/newstile/math.rb', line 32

def parse_block_math
  if !after_block_boundary?
    return false
  elsif @src[1]
    @src.scan(/^#{OPT_SPACE}\\/)
    return false
  end
  orig_pos = @src.pos
  @src.pos += @src.matched_size
  data = @src[2]
  if before_block_boundary?
    @tree.children << new_block_el(:math, data)
    true
  else
    @src.pos = orig_pos
    false
  end
end

#parse_blockquoteObject

Parse the blockquote at the current location.



35
36
37
38
39
40
# File 'lib/newstile/parser/newstile/blockquote.rb', line 35

def parse_blockquote
  el = new_block_el(:blockquote)
  @tree.children << el
  parse_blocks(el, @src.scan(BLOCKQUOTE_MATCH).gsub!(BLOCKQUOTE_START, ''))
  true
end

#parse_codeblockObject

Parse the indented codeblock at the current location.



36
37
38
39
# File 'lib/newstile/parser/newstile/codeblock.rb', line 36

def parse_codeblock
  @tree.children << new_block_el(:codeblock, @src.scan(CODEBLOCK_MATCH).gsub(/\n( {0,3}\S)/, ' \\1').gsub!(INDENT, ''))
  true
end

#parse_codeblock_fencedObject

Parse the fenced codeblock at the current location.



47
48
49
50
51
52
53
54
55
# File 'lib/newstile/parser/newstile/codeblock.rb', line 47

def parse_codeblock_fenced
  if @src.check(FENCED_CODEBLOCK_MATCH)
    @src.pos += @src.matched_size
    @tree.children << new_block_el(:codeblock, @src[2])
    true
  else
    false
  end
end

#parse_codespanObject

Parse the codespan at the current scanner location.



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/newstile/parser/newstile/codespan.rb', line 30

def parse_codespan
  result = @src.scan(CODESPAN_DELIMITER)
  simple = (result.length == 1)
  reset_pos = @src.pos

  if simple && @src.pre_match =~ /\s\Z/ && @src.match?(/\s/)
    add_text(result)
    return
  end

  text = @src.scan_until(/#{result}/)
  if text
    text.sub!(/#{result}\Z/, '')
    if !simple
      text = text[1..-1] if text[0..0] == ' '
      text = text[0..-2] if text[-1..-1] == ' '
    end
    @tree.children << Element.new(:codespan, text)
  else
    @src.pos = reset_pos
    add_text(result)
  end
end

#parse_definition_listObject

Parse the ordered or unordered list at the current location.



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
# File 'lib/newstile/parser/newstile/list.rb', line 147

def parse_definition_list
  children = @tree.children
  if !children.last || (children.length == 1 && children.last.type != :p ) ||
      (children.length >= 2 && children[-1].type != :p && (children[-1].type != :blank || children[-1].value != "\n" || children[-2].type != :p))
    return false
  end

  first_as_para = false
  deflist = new_block_el(:dl)
  para = @tree.children.pop
  if para.type == :blank
    para = @tree.children.pop
    first_as_para = true
  end
  para.children.first.value.split("\n").each do |term|
    el = Element.new(:dt)
    el.children << Element.new(:raw_text, term)
    deflist.children << el
  end

  item = nil
  content_re, lazy_re, indent_re = nil
  def_start_re = DEFINITION_LIST_START
  last_is_blank = false
  while !@src.eos?
    if @src.scan(def_start_re)
      item = Element.new(:dd)
      item.options[:first_as_para] = first_as_para
      item.value, indentation, content_re, lazy_re, indent_re = parse_first_list_line(@src[1].length, @src[2])
      deflist.children << item

      item.value.sub!(/^#{IAL_SPAN_START}\s*/) do |match|
        parse_attribute_list($~[1], item.options[:ial] ||= {})
        ''
      end

      def_start_re = /^( {0,#{[3, indentation - 1].min}}:)([\t| ].*?\n)/
      first_as_para = false
      last_is_blank = false
    elsif @src.check(EOB_MARKER)
      break
    elsif (result = @src.scan(content_re)) || (!last_is_blank && (result = @src.scan(lazy_re)))
      result.sub!(/^(\t+)/) { " "*4*($1 ? $1.length : 0) }
      result.sub!(indent_re, '')
      item.value << result
      first_as_para = false
      last_is_blank = false
    elsif result = @src.scan(BLANK_LINE)
      first_as_para = true
      item.value << result
      last_is_blank = true
    else
      break
    end
  end

  last = nil
  deflist.children.each do |it|
    next if it.type == :dt

    parse_blocks(it, it.value)
    it.value = nil
    next if it.children.size == 0

    if it.children.last.type == :blank
      last = it.children.pop
    else
      last = nil
    end
    if it.children.first.type == :p && !it.options.delete(:first_as_para)
      it.children.first.children.first.value += "\n" if it.children.size > 1
      it.children.first.options[:transparent] = true
    end
  end

  if @tree.children.length >= 1 && @tree.children.last.type == :dl
    @tree.children[-1].children += deflist.children
  elsif @tree.children.length >= 2 && @tree.children[-1].type == :blank && @tree.children[-2].type == :dl
    @tree.children.pop
    @tree.children[-1].children += deflist.children
  else
    @tree.children << deflist
  end

  @tree.children << last if !last.nil?

  true
end

#parse_emphasisObject

Parse the emphasis at the current location.



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/newstile/parser/newstile/emphasis.rb', line 30

def parse_emphasis
  result = @src.scan(EMPHASIS_START)
  element = (result.length == 2 ? :strong : :em)
  type = (result =~ /_/ ? '_' : '*')
  reset_pos = @src.pos

  if (type == '_' && @src.pre_match =~ /[[:alpha:]]\z/ && @src.check(/[[:alpha:]]/)) || @src.check(/\s/) ||
      @tree.type == element || @stack.any? {|el, _| el.type == element}
    add_text(result)
    return
  end

  sub_parse = lambda do |delim, elem|
    el = Element.new(elem)
    stop_re = /#{Regexp.escape(delim)}/
    found = parse_spans(el, stop_re) do
      (@src.pre_match[-1, 1] !~ /\s/) &&
        (elem != :em || !@src.match?(/#{Regexp.escape(delim*2)}(?!#{Regexp.escape(delim)})/)) &&
        (type != '_' || !@src.match?(/#{Regexp.escape(delim)}[[:alpha:]]/)) && el.children.size > 0
    end
    [found, el, stop_re]
  end

  found, el, stop_re = sub_parse.call(result, element)
  if !found && element == :strong && @tree.type != :em
    @src.pos = reset_pos - 1
    found, el, stop_re = sub_parse.call(type, :em)
  end
  if found
    @src.scan(stop_re)
    @tree.children << el
  else
    @src.pos = reset_pos
    add_text(result)
  end
end

#parse_eob_markerObject

Parse the EOB marker at the current location.



30
31
32
33
34
# File 'lib/newstile/parser/newstile/eob.rb', line 30

def parse_eob_marker
  @src.pos += @src.matched_size
  @tree.children << new_block_el(:eob)
  true
end

#parse_escaped_charsObject

Parse the backslash-escaped character at the current location.



30
31
32
33
# File 'lib/newstile/parser/newstile/escaped_chars.rb', line 30

def parse_escaped_chars
  @src.pos += @src.matched_size
  add_text(@src[1])
end

#parse_extension_start_tag(type) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/newstile/parser/newstile/extension.rb', line 29

def parse_extension_start_tag(type)
  orig_pos = @src.pos
  @src.pos += @src.matched_size

  error_block = lambda do |msg|
    warning(msg)
    @src.pos = orig_pos
    add_text(@src.scan(/./)) if type == :span
    false
  end

  if @src[4] || @src.matched == '{:/}'
    name = (@src[4] ? "for '#{@src[4]}' " : '')
    return error_block.call("Invalid extension stop tag #{name}found - ignoring it")
  end

  ext = @src[1]
  opts = {}
  body = nil
  parse_attribute_list(@src[2] || '', opts)

  if !@src[3]
    stop_re = (type == :block ? /#{EXT_BLOCK_STOP_STR % ext}/ : /#{EXT_STOP_STR % ext}/)
    if result = @src.scan_until(stop_re)
      body = result.sub!(stop_re, '')
      body.chomp! if type == :block
    else
      return error_block.call("No stop tag for extension '#{ext}' found - ignoring it")
    end
  end

  if !handle_extension(ext, opts, body, type)
    error_block.call("Invalid extension with name '#{ext}' specified - ignoring it")
  else
    true
  end
end

#parse_first_list_line(indentation, content) ⇒ Object

Used for parsing the first line of a list item or a definition, i.e. the line with list item marker or the definition marker.



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/newstile/parser/newstile/list.rb', line 36

def parse_first_list_line(indentation, content)
  if content =~ LIST_ITEM_IAL
    indentation = 4
  else
    while content =~ /^ *\t/
      temp = content.scan(/^ */).first.length + indentation
      content.sub!(/^( *)(\t+)/) {$1 + " "*(4 - (temp % 4)) + " "*($2.length - 1)*4}
    end
    indentation += content.scan(/^ */).first.length
  end
  content.sub!(/^\s*/, '')

  indent_re = /^ {#{indentation}}/
  content_re = /^(?:(?:\t| {4}){#{indentation / 4}} {#{indentation % 4}}|(?:\t| {4}){#{indentation / 4 + 1}}).*\S.*\n/
  lazy_re = /(?!^ {0,#{[indentation, 3].min}}(?:#{IAL_BLOCK}|#{LAZY_END_HTML_STOP}|#{LAZY_END_HTML_START})).*\S.*\n/
  [content, indentation, content_re, lazy_re, indent_re]
end

#parse_footnote_definitionObject

Parse the foot note definition at the current location.



34
35
36
37
38
39
40
41
42
43
# File 'lib/newstile/parser/newstile/footnote.rb', line 34

def parse_footnote_definition
  @src.pos += @src.matched_size

  el = Element.new(:footnote_def)
  parse_blocks(el, @src[2].gsub(INDENT, ''))
  warning("Duplicate footnote name '#{@src[1]}' - overwriting") if @doc.parse_infos[:footnotes][@src[1]]
  (@doc.parse_infos[:footnotes][@src[1]] = {})[:content] = el
  @tree.children << Element.new(:eob, :footnote_def)
  true
end

#parse_footnote_markerObject

Parse the footnote marker at the current location.



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/newstile/parser/newstile/footnote.rb', line 50

def parse_footnote_marker
  @src.pos += @src.matched_size
  fn_def = @doc.parse_infos[:footnotes][@src[1]]
  if fn_def
    valid = fn_def[:marker] && fn_def[:marker].options[:stack][0..-2].zip(fn_def[:marker].options[:stack][1..-1]).all? do |par, child|
      par.children.include?(child)
    end
    if !fn_def[:marker] || !valid
      fn_def[:marker] = Element.new(:footnote, nil, nil, :name => @src[1])
      fn_def[:marker].options[:stack] = [@stack.map {|s| s.first}, @tree, fn_def[:marker]].flatten.compact
      @tree.children << fn_def[:marker]
    else
      warning("Footnote marker '#{@src[1]}' already appeared in document, ignoring newly found marker")
      add_text(@src.matched)
    end
  else
    warning("Footnote definition for '#{@src[1]}' not found")
    add_text(@src.matched)
  end
end

#parse_horizontal_ruleObject

Parse the horizontal rule at the current location.



30
31
32
33
34
# File 'lib/newstile/parser/newstile/horizontal_rule.rb', line 30

def parse_horizontal_rule
  @src.pos += @src.matched_size
  @tree.children << new_block_el(:hr)
  true
end

#parse_html_entityObject

Parse the HTML entity at the current location.



30
31
32
33
34
# File 'lib/newstile/parser/newstile/html_entity.rb', line 30

def parse_html_entity
  @src.pos += @src.matched_size
  @tree.children << Element.new(:entity, ::Newstile::Utils::Entities.entity(@src[1] || (@src[2] && @src[2].to_i) || @src[3].hex),
                                nil, :original => @src.matched)
end

#parse_inline_mathObject

Parse the inline math at the current location.



56
57
58
59
# File 'lib/newstile/parser/newstile/math.rb', line 56

def parse_inline_math
  @src.pos += @src.matched_size
  @tree.children << Element.new(:math, @src[1], nil, :category => :span)
end

#parse_line_breakObject

Parse the line break at the current location.



30
31
32
33
# File 'lib/newstile/parser/newstile/line_break.rb', line 30

def parse_line_break
  @src.pos += @src.matched_size
  @tree.children << Element.new(:br)
end

Parse the link at the current scanner position. This method is used to parse normal links as well as image links.



65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/newstile/parser/newstile/link.rb', line 65

def parse_link
  result = @src.scan(LINK_START)
  reset_pos = @src.pos

  link_type = (result =~ /^!/ ? :img : :a)

  # no nested links allowed
  if link_type == :a && (@tree.type == :img || @tree.type == :a || @stack.any? {|t,s| t && (t.type == :img || t.type == :a)})
    add_text(result)
    return
  end
  el = Element.new(link_type)

  stop_re = /\]|!?\[/
  count = 1
  found = parse_spans(el, stop_re) do
    case @src.matched
    when "[", "!["
      count += 1
    when "]"
      count -= 1
    end
    count - el.children.select {|c| c.type == :img}.size == 0
  end
  if !found || (link_type == :a && el.children.empty?)
    @src.pos = reset_pos
    add_text(result)
    return
  end
  alt_text = extract_string(reset_pos...@src.pos, @src)
  conv_link_id = alt_text.gsub(/(\s|\n)+/m, ' ').gsub(LINK_ID_NON_CHARS, '').downcase
  @src.scan(stop_re)

  # reference style link or no link url
  if @src.scan(LINK_INLINE_ID_RE) || !@src.check(/\(/)
    link_id = (@src[1] || conv_link_id).downcase
    if link_id.empty?
      @src.pos = reset_pos
      add_text(result)
    elsif @doc.parse_infos[:link_defs].has_key?(link_id)
      add_link(el, @doc.parse_infos[:link_defs][link_id].first, @doc.parse_infos[:link_defs][link_id].last, alt_text)
    else
      warning("No link definition for link ID '#{link_id}' found")
      @src.pos = reset_pos
      add_text(result)
    end
    return
  end

  # link url in parentheses
  if @src.scan(/\(<(.*?)>/)
    link_url = @src[1]
    if @src.scan(/\)/)
      add_link(el, link_url, nil, alt_text)
      return
    end
  else
    link_url = ''
    re = /\(|\)|\s(?=['"])/
    nr_of_brackets = 0
    while temp = @src.scan_until(re)
      link_url += temp
      case @src.matched
      when /\s/
        break
      when '('
        nr_of_brackets += 1
      when ')'
        nr_of_brackets -= 1
        break if nr_of_brackets == 0
      end
    end
    link_url = link_url[1..-2].strip

    if nr_of_brackets == 0
      add_link(el, link_url, nil, alt_text)
      return
    end
  end

  if @src.scan(LINK_INLINE_TITLE_RE)
    add_link(el, link_url, @src[2], alt_text)
  else
    @src.pos = reset_pos
    add_text(result)
  end
end

Parse the link definition at the current location.



33
34
35
36
37
38
39
40
# File 'lib/newstile/parser/newstile/link.rb', line 33

def parse_link_definition
  @src.pos += @src.matched_size
  link_id, link_url, link_title = @src[1].downcase, @src[2] || @src[3], @src[5]
  warning("Duplicate link ID '#{link_id}' - overwriting") if @doc.parse_infos[:link_defs][link_id]
  @doc.parse_infos[:link_defs][link_id] = [link_url, link_title]
  @tree.children << Element.new(:eob, :link_def)
  true
end

#parse_listObject

Parse the ordered or unordered list at the current location.



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/newstile/parser/newstile/list.rb', line 60

def parse_list
  type, list_start_re = (@src.check(LIST_START_UL) ? [:ul, LIST_START_UL] : [:ol, LIST_START_OL])
  list = new_block_el(type)

  item = nil
  content_re, lazy_re, indent_re = nil
  eob_found = false
  nested_list_found = false
  last_is_blank = false
  while !@src.eos?
    if last_is_blank && @src.check(HR_START)
      break
    elsif @src.scan(EOB_MARKER)
      eob_found = true
      break
    elsif @src.scan(list_start_re)
      item = Element.new(:li)
      item.value, indentation, content_re, lazy_re, indent_re = parse_first_list_line(@src[1].length, @src[2])
      list.children << item

      item.value.sub!(/^#{IAL_SPAN_START}\s*/) do |match|
        parse_attribute_list($~[1], item.options[:ial] ||= {})
        ''
      end

      list_start_re = (type == :ul ? /^( {0,#{[3, indentation - 1].min}}[+*-])([\t| ].*?\n)/ :
                       /^( {0,#{[3, indentation - 1].min}}\d+\.)([\t| ].*?\n)/)
      nested_list_found = (item.value =~ LIST_START)
      last_is_blank = false
    elsif (result = @src.scan(content_re)) || (!last_is_blank && (result = @src.scan(lazy_re)))
      result.sub!(/^(\t+)/) { " "*4*($1 ? $1.length : 0) }
      result.sub!(indent_re, '')
      if !nested_list_found && result =~ LIST_START
        item.value << "^\n"
        nested_list_found = true
      end
      item.value << result
      last_is_blank = false
    elsif result = @src.scan(BLANK_LINE)
      nested_list_found = true
      last_is_blank = true
      item.value << result
    else
      break
    end
  end

  @tree.children << list

  last = nil
  list.children.each do |it|
    temp = Element.new(:temp)
    parse_blocks(temp, it.value)
    it.children = temp.children
    it.value = nil
    next if it.children.size == 0

    # Handle the case where an EOB marker is inserted by a block IAL for the first paragraph
    it.children.delete_at(1) if it.children.first.type == :p &&
      it.children.length >= 2 && it.children[1].type == :eob && it.children.first.options[:ial]

    if it.children.first.type == :p &&
        (it.children.length < 2 || it.children[1].type != :blank ||
         (it == list.children.last && it.children.length == 2 && !eob_found)) &&
        (list.children.last != it || list.children.size == 1 ||
         list.children[0..-2].any? {|cit| cit.children.first.type != :p || cit.children.first.options[:transparent]})
      it.children.first.children.first.value += "\n" if it.children.size > 1 && it.children[1].type != :blank
      it.children.first.options[:transparent] = true
    end

    if it.children.last.type == :blank
      last = it.children.pop
    else
      last = nil
    end
  end

  @tree.children << last if !last.nil? && !eob_found

  true
end

#parse_newstile_headerObject

Parse the newstile header at the current location.



69
70
71
72
73
74
75
76
77
78
79
# File 'lib/newstile/parser/newstile/header.rb', line 69

def parse_newstile_header
  return false if !after_block_boundary?

  result = @src.scan(NEWSTILE_HEADER_MATCH)
  level, text, id = @src[1], @src[2], @src[3]
  el = new_block_el(:header, nil, nil, :level => level.length, :raw_text => text)
  add_text(text, el)
  el.attr['id'] = id if id
  @tree.children << el
  true
end

Parse the link definition at the current location.



157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# File 'lib/newstile/parser/newstile/link.rb', line 157

def parse_newstile_link
  link_type = (@src.string[@src.pos] == '!'[0] ? :img : :a)
  @src.pos += @src.matched_size
  add_text @src[1] if @src[1]
  case link_type
  when :a then
    text, href = @src[2], @src[3]
    el = Element.new(:a, nil, {'href' => href})
    el.children = [ Element.new(:text, text) ]
    @tree.children << el
  when :img then
    alt, src = @src[2], @src[3]
    el = Element.new(:img, nil, {'src' => src, 'alt' => alt})
    @tree.children << el
  end
  true
end

#parse_paragraphObject

Parse the paragraph at the current location.



41
42
43
44
45
46
47
48
49
50
# File 'lib/newstile/parser/newstile/paragraph.rb', line 41

def parse_paragraph
  result = @src.scan(PARAGRAPH_MATCH)
  if @tree.children.last && @tree.children.last.type == :p
    @tree.children.last.children.first.value << "\n" << result.chomp
  else
    @tree.children << new_block_el(:p)
    @tree.children.last.children << Element.new(@text_type, result.lstrip.chomp)
  end
  true
end

#parse_setext_headerObject

Parse the Setext header at the current location.



33
34
35
36
37
38
39
40
41
42
43
# File 'lib/newstile/parser/newstile/header.rb', line 33

def parse_setext_header
  return false if !after_block_boundary?

  @src.pos += @src.matched_size
  text, id, level = @src[1].strip, @src[2], @src[3]
  el = new_block_el(:header, nil, nil, :level => (level == '-' ? 2 : 1), :raw_text => text)
  add_text(text, el)
  el.attr['id'] = id if id
  @tree.children << el
  true
end

#parse_smart_quotesObject

Parse the smart quotes at current location.



199
200
201
202
203
204
205
206
207
208
209
# File 'lib/newstile/parser/newstile/smart_quotes.rb', line 199

def parse_smart_quotes
  regexp, substs = SQ_RULES.find {|reg, subst| @src.scan(reg)}
  substs.each do |subst|
    if subst.kind_of?(Integer)
      add_text(@src[subst].to_s)
    else
      val = SQ_SUBSTS[[subst, @src[subst.to_s[-1,1].to_i]]] || subst
      @tree.children << Element.new(:smart_quote, val)
    end
  end
end

#parse_span_extensionObject

Parse the extension span at the current location.



109
110
111
# File 'lib/newstile/parser/newstile/extension.rb', line 109

def parse_span_extension
  parse_extension_start_tag(:span)
end

#parse_span_htmlObject

Parse the HTML at the current position as span level HTML.



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/newstile/parser/newstile/html.rb', line 121

def parse_span_html
  if result = @src.scan(HTML_COMMENT_RE)
    @tree.children << Element.new(:xml_comment, result, nil, :category => :span)
  elsif result = @src.scan(HTML_INSTRUCTION_RE)
    @tree.children << Element.new(:xml_pi, result, nil, :category => :span)
  elsif result = @src.scan(HTML_TAG_CLOSE_RE)
    warning("Found invalidly used HTML closing tag for '#{@src[1]}'")
    add_text(result)
  elsif result = @src.scan(HTML_TAG_RE)
    if HTML_BLOCK_ELEMENTS.include?(@src[1])
      warning("Found block HTML tag '#{@src[1]}' in span level text")
      add_text(result)
      return
    end

    reset_pos = @src.pos
    attrs = Utils::OrderedHash.new
    @src[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val.gsub(/\n+/, ' ')}

    do_parsing = (HTML_PARSE_AS_RAW.include?(@src[1]) || @tree.options[:parse_type] == :raw ? false : @doc.options[:parse_span_html])
    if val = html_parse_type(attrs.delete('markdown'))
      if val == :block
        warning("Cannot use block level parsing in span level HTML tag - using default mode")
      elsif val == :span
        do_parsing = true
      elsif val == :default
        do_parsing = !HTML_PARSE_AS_RAW.include?(@src[1])
      elsif val == :raw
        do_parsing = false
      end
    end

    el = Element.new(:html_element, @src[1], attrs, :category => :span, :parse_type => (do_parsing ? :span : :raw))
    @tree.children << el
    stop_re = /<\/#{Regexp.escape(@src[1])}\s*>/
    if !@src[4] && HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
      warning("The HTML tag '#{el.value}' cannot have any content - auto-closing it")
    elsif !@src[4]
      if parse_spans(el, stop_re, (do_parsing ? nil : [:span_html]))
        @src.scan(stop_re)
      else
        warning("Found no end tag for '#{el.value}' - auto-closing it")
        add_text(@src.scan(/.*/m), el)
      end
    end
    Newstile::Parser::Html::ElementConverter.new(@doc).process(el) if @doc.options[:html_to_native]
  else
    add_text(@src.scan(/./))
  end
end

#parse_span_ialObject

Parse the inline attribute list at the current location.



96
97
98
99
100
101
102
103
104
105
106
# File 'lib/newstile/parser/newstile/attribute_list.rb', line 96

def parse_span_ial
  @src.pos += @src.matched_size
  if @tree.children.last && @tree.children.last.type != :text
    attr = Utils::OrderedHash.new
    parse_attribute_list(@src[1], attr)
    update_ial_with_ial(@tree.children.last.options[:ial] ||= Utils::OrderedHash.new, attr)
    update_attr_with_ial(@tree.children.last.attr, attr)
  else
    warning("Ignoring span IAL because preceding element is just text")
  end
end

#parse_summaryObject

Parse the blockquote at the current location.



48
49
50
51
52
53
54
55
56
57
# File 'lib/newstile/parser/newstile/blockquote.rb', line 48

def parse_summary
  result = @src.scan(SUMMARY_MATCH).gsub!(SUMMARY_START, '')
  if @tree.children.last && @tree.children.last.type == :summary
    @tree.children.last.children.first.value << "\n" << result.chomp
  else
    @tree.children << new_block_el(:summary)
    @tree.children.last.children << Element.new(@text_type, result.lstrip.chomp)
  end
  true
end

#parse_tableObject

Parse the table at the current location.



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/newstile/parser/newstile/table.rb', line 37

def parse_table
  return false if !after_block_boundary?

  orig_pos = @src.pos
  table = new_block_el(:table, nil, nil, :alignment => [])
  leading_pipe = (@src.check(TABLE_LINE) =~ /^\s*\|/)
  @src.scan(TABLE_SEP_LINE)

  rows = []
  has_footer = false
  columns = 0

  add_container = lambda do |type, force|
    if force || type != :tbody || !has_footer
      cont = Element.new(type)
      cont.children, rows = rows, []
      table.children << cont
    end
  end

  while !@src.eos?
    break if !@src.check(TABLE_LINE)
    if @src.scan(TABLE_SEP_LINE) && !rows.empty?
      if table.options[:alignment].empty? && !has_footer
        add_container.call(:thead, false)
        table.options[:alignment] = @src[1].scan(TABLE_HSEP_ALIGN).map do |left, right|
          (left.empty? && right.empty? && :default) || (right.empty? && :left) || (left.empty? && :right) || :center
        end
      else # treat as normal separator line
        add_container.call(:tbody, false)
      end
    elsif @src.scan(TABLE_FSEP_LINE)
      add_container.call(:tbody, true) if !rows.empty?
      has_footer = true
    elsif @src.scan(TABLE_ROW_LINE)
      trow = Element.new(:tr)
      cells = (@src[1] + ' ').split(/\|/)
      i = 0
      while i < cells.length - 1
        backslashes = cells[i].scan(/\\+$/).first
        if backslashes && backslashes.length % 2 == 1
          cells[i] = cells[i].chop + '|' + cells[i+1]
          cells.delete_at(i+1)
        else
          i += 1
        end
      end
      cells.shift if leading_pipe && cells.first.strip.empty?
      cells.pop if cells.last.strip.empty?
      cells.each do |cell_text|
        tcell = Element.new(:td)
        tcell.children << Element.new(:raw_text, cell_text.strip)
        trow.children << tcell
      end
      columns = [columns, cells.length].max
      rows << trow
    else
      break
    end
  end

  if !before_block_boundary?
    @src.pos = orig_pos
    return false
  end

  add_container.call(has_footer ? :tfoot : :tbody, false) if !rows.empty?

  if !table.children.any? {|c| c.type == :tbody}
    warning("Found table without body - ignoring it")
    @src.pos = orig_pos
    return false
  end

  # adjust all table rows to have equal number of columns, same for alignment defs
  table.children.each do |kind|
    kind.children.each do |row|
      (columns - row.children.length).times do
        row.children << Element.new(:td)
      end
      row.children.each {|el| el.type = :th} if kind.type == :thead
    end
  end
  if table.options[:alignment].length > columns
    table.options[:alignment] = table.options[:alignment][0...columns]
  else
    table.options[:alignment] += [:default] * (columns - table.options[:alignment].length)
  end

  @tree.children << table

  true
end

#parse_typographic_symsObject

Parse the typographic symbols at the current location.



37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/newstile/parser/newstile/typographic_symbol.rb', line 37

def parse_typographic_syms
  @src.pos += @src.matched_size
  val = TYPOGRAPHIC_SYMS_SUBST[@src.matched]
  if val.kind_of?(Symbol)
    @tree.children << Element.new(:typographic_sym, val)
  elsif @src.matched == '\\<<'
    @tree.children << Element.new(:entity, ::Newstile::Utils::Entities.entity('lt'))
    @tree.children << Element.new(:entity, ::Newstile::Utils::Entities.entity('lt'))
  else
    @tree.children << Element.new(:entity, ::Newstile::Utils::Entities.entity('gt'))
    @tree.children << Element.new(:entity, ::Newstile::Utils::Entities.entity('gt'))
  end
end

#replace_abbreviations(el, regexps = nil) ⇒ Object

Replace the abbreviation text with elements.



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/newstile/parser/newstile/abbreviation.rb', line 41

def replace_abbreviations(el, regexps = nil)
  return if @doc.parse_infos[:abbrev_defs].empty?
  if !regexps
    regexps = [Regexp.union(*@doc.parse_infos[:abbrev_defs].keys.map {|k| /#{Regexp.escape(k)}/})]
    regexps << /(?=(?:\W|^)#{regexps.first}(?!\w))/ # regexp should only match on word boundaries
  end
  el.children.map! do |child|
    if child.type == :text
      result = []
      strscan = StringScanner.new(child.value)
      while temp = strscan.scan_until(regexps.last)
        temp += strscan.scan(/\W|^/)
        abbr = strscan.scan(regexps.first)
        result += [Element.new(:text, temp), Element.new(:abbreviation, abbr)]
      end
      result + [Element.new(:text, extract_string(strscan.pos..-1, strscan))]
    else
      replace_abbreviations(child, regexps)
      child
    end
  end.flatten!
end

#update_ial_with_ial(ial, opts) ⇒ Object

Update the ial with the information from the inline attribute list opts.



44
45
46
47
48
49
50
51
52
53
# File 'lib/newstile/parser/newstile/attribute_list.rb', line 44

def update_ial_with_ial(ial, opts)
  (ial[:refs] ||= []) << opts[:refs]
  opts.each do |k,v|
    if k == 'class'
      ial[k] = ((ial[k] || '') + " #{v}").lstrip
    elsif k.kind_of?(String)
      ial[k] = v
    end
  end
end