Class: Newstile::Parser::Newstile
- Includes:
- Newstile, Html::Parser
- Defined in:
- lib/newstile/parser/newstile.rb,
lib/newstile/parser/newstile/eob.rb,
lib/newstile/parser/newstile/html.rb,
lib/newstile/parser/newstile/link.rb,
lib/newstile/parser/newstile/list.rb,
lib/newstile/parser/newstile/math.rb,
lib/newstile/parser/newstile/table.rb,
lib/newstile/parser/newstile/header.rb,
lib/newstile/parser/newstile/autolink.rb,
lib/newstile/parser/newstile/codespan.rb,
lib/newstile/parser/newstile/emphasis.rb,
lib/newstile/parser/newstile/footnote.rb,
lib/newstile/parser/newstile/codeblock.rb,
lib/newstile/parser/newstile/extension.rb,
lib/newstile/parser/newstile/paragraph.rb,
lib/newstile/parser/newstile/blank_line.rb,
lib/newstile/parser/newstile/blockquote.rb,
lib/newstile/parser/newstile/line_break.rb,
lib/newstile/parser/newstile/html_entity.rb,
lib/newstile/parser/newstile/abbreviation.rb,
lib/newstile/parser/newstile/smart_quotes.rb,
lib/newstile/parser/newstile/escaped_chars.rb,
lib/newstile/parser/newstile/attribute_list.rb,
lib/newstile/parser/newstile/block_boundary.rb,
lib/newstile/parser/newstile/horizontal_rule.rb,
lib/newstile/parser/newstile/typographic_symbol.rb
Overview
Used for parsing a document in newstile format.
If you want to extend the functionality of the parser, you need to the following:
-
Create a new subclass
-
add the needed parser methods
-
modify the @block_parsers and @span_parsers variables and add the names of your parser methods
Here is a small example for an extended parser class that parses ERB style tags as raw text if they are used as span level elements (an equivalent block level parser should probably also be made to handle the block case):
require 'newstile/parser/newstile'
class Newstile::Parser::ERBNewstile < Kramdown::Parser::Kramdown
def initialize(doc)
super(doc)
@span_parsers.unshift(:erb_tags)
end
ERB_TAGS_START = /<%.*?%>/
def
@src.pos += @src.matched_size
@tree.children << Element.new(:raw, @src.matched)
end
define_parser(:erb_tags, ERB_TAGS_START, '<%')
end
The new parser can be used like this:
require 'newstile/document'
# require the file with the above parser class
Newstile::Document.new(input_text, :input => 'ERBNewstile').to_html
Defined Under Namespace
Classes: Data
Constant Summary collapse
- EOB_MARKER =
/^\^\s*?\n/
- HTML_BLOCK_START =
/^#{OPT_SPACE}<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--|\/)/
- HTML_SPAN_START =
/<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--|\/)/
- PUNCTUATION_CHARS =
"_.:,;!?-"
- LINK_ID_CHARS =
/[a-zA-Z0-9 #{PUNCTUATION_CHARS}]/
- LINK_ID_NON_CHARS =
/[^a-zA-Z0-9 #{PUNCTUATION_CHARS}]/
- LINK_DEFINITION_START =
/^#{OPT_SPACE}\[(#{LINK_ID_CHARS}+)\]:[ \t]*(?:<(.*?)>|([^'"\n]*?\S[^'"\n]*?))[ \t]*?(?:\n?[ \t]*?(["'])(.+?)\4[ \t]*?)?\n/
- LINK_TEXT_BRACKET_RE =
/\\\[|\\\]|\[|\]/
- LINK_INLINE_ID_RE =
/\s*?\[(#{LINK_ID_CHARS}+)?\]/
- LINK_INLINE_TITLE_RE =
/\s*?(["'])(.+?)\1\s*?\)/
- LINK_START =
/!?\[(?=[^^])/
- NEWSTILE_LINK_START =
/!?([^\\]?)\"([^\"]+)\"\:([\S]+[\w\/])/
- LIST_ITEM_IAL =
/^\s*(#{IAL_SPAN_START})?\s*\n/
- LIST_START_UL =
/^(#{OPT_SPACE}[+*])([\t| ].*?\n)/
- LIST_START_OL =
/^(#{OPT_SPACE}\d+\.)([\t| ].*?\n)/
- LIST_START =
/#{LIST_START_UL}|#{LIST_START_OL}/
- DEFINITION_LIST_START =
/^(#{OPT_SPACE}:)([\t| ].*?\n)/
- BLOCK_MATH_START =
/^#{OPT_SPACE}(\\)?\$\$(.*?)\$\$\s*?\n/m
- INLINE_MATH_START =
/\$\$(.*?)\$\$/
- TABLE_SEP_LINE =
/^([+|: -]*?-[+|: -]*?)[ \t]*\n/
- TABLE_HSEP_ALIGN =
/[ ]?(:?)-+(:?)[ ]?/
- TABLE_FSEP_LINE =
/^[+|: =]*?=[+|: =]*?[ \t]*\n/
- TABLE_ROW_LINE =
/^(.*?)[ \t]*\n/
- TABLE_LINE =
/(?:\||.*?[^\\\n]\|).*?\n/
- TABLE_START =
/^#{OPT_SPACE}(?=\S)#{TABLE_LINE}/
- HEADER_ID =
/(?:[ \t]\{#(\w[\w-]*)\})?/
- SETEXT_HEADER_START =
/^(#{OPT_SPACE}[^ \t].*?)#{HEADER_ID}[ \t]*?\n(-|=)+\s*?\n/
- ATX_HEADER_START =
/^\#{1,6}/
- ATX_HEADER_MATCH =
/^(\#{1,6})(.+?)\s*?#*#{HEADER_ID}\s*?\n/
- NEWSTILE_HEADER_START =
/^!{1,6} /
- NEWSTILE_HEADER_MATCH =
/^(!{1,6}) +(.+?)\s*?\!*#{HEADER_ID}\s*?\n/
- ACHARS =
'[[:alnum:]]'
- AUTOLINK_START_STR =
"<((mailto|https?|ftps?):.+?|[-.#{ACHARS}]+@[-#{ACHARS}]+(\.[-#{ACHARS}]+)*\.[a-z]+)>"
- AUTOLINK_START =
/#{AUTOLINK_START_STR}/
- CODESPAN_DELIMITER =
/`+/
- EMPHASIS_START =
/(?:\*\*?|__?)/
- FOOTNOTE_DEFINITION_START =
/^#{OPT_SPACE}\[\^(#{ALD_ID_NAME})\]:\s*?(.*?\n#{CODEBLOCK_MATCH})/
- FOOTNOTE_MARKER_START =
/\[\^(#{ALD_ID_NAME})\]/
- CODEBLOCK_START =
INDENT
- CODEBLOCK_MATCH =
/(?:#{BLANK_LINE}?(?:#{INDENT}[ \t]*\S.*\n)+(?:(?!#{BLANK_LINE} {0,3}\S|#{IAL_BLOCK_START}|#{EOB_MARKER}|^#{OPT_SPACE}#{LAZY_END_HTML_STOP}|^#{OPT_SPACE}#{LAZY_END_HTML_START})^[ \t]*\S.*\n)*)*/
- FENCED_CODEBLOCK_START =
/^~{3,}/
- FENCED_CODEBLOCK_MATCH =
/^(~{3,})\s*?\n(.*?)^\1~*\s*?\n/m
- EXT_STOP_STR =
"\\{:/(%s)?\\}"
- EXT_START_STR =
"\\{::(\\w+)(?:\\s(#{ALD_ANY_CHARS}*?)|)(\\/)?\\}"
- EXT_SPAN_START =
/#{EXT_START_STR}|#{EXT_STOP_STR % ALD_ID_NAME}/
- EXT_BLOCK_START =
/^#{OPT_SPACE}(?:#{EXT_START_STR}|#{EXT_STOP_STR % ALD_ID_NAME})\s*?\n/
- EXT_BLOCK_STOP_STR =
"^#{OPT_SPACE}#{EXT_STOP_STR}\s*?\n"
- LAZY_END_HTML_SPAN_ELEMENTS =
HTML_SPAN_ELEMENTS + %w{script}
- LAZY_END_HTML_START =
/<(?>(?!(?:#{LAZY_END_HTML_SPAN_ELEMENTS.join('|')})\b)#{REXML::Parsers::BaseParser::UNAME_STR})\s*(?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}\s*=\s*(["']).*?\1)*\s*\/?>/m
- LAZY_END_HTML_STOP =
/<\/(?!(?:#{LAZY_END_HTML_SPAN_ELEMENTS.join('|')})\b)#{REXML::Parsers::BaseParser::UNAME_STR}\s*>/m
- PARAGRAPH_START =
/^#{OPT_SPACE}[^ \t].*?\n/
- PARAGRAPH_MATCH =
/(?:^.*\n)+?(?=#{BLANK_LINE}|#{IAL_BLOCK_START}|#{EOB_MARKER}|#{DEFINITION_LIST_START}|^#{OPT_SPACE}#{LAZY_END_HTML_STOP}|^#{OPT_SPACE}#{LAZY_END_HTML_START}|\Z)/
- BLANK_LINE =
/(?:^\s*\n)+/
- BLOCKQUOTE_START =
/^#{OPT_SPACE}> ?/
- BLOCKQUOTE_MATCH =
/(^.*\n)+?(?=#{BLANK_LINE}|#{IAL_BLOCK_START}|#{EOB_MARKER}|^#{OPT_SPACE}#{LAZY_END_HTML_STOP}|^#{OPT_SPACE}#{LAZY_END_HTML_START}|\Z)/
- SUMMARY_START =
/^#{OPT_SPACE}\/\/\. ?/
- SUMMARY_MATCH =
/(^.*\n)+?(?=#{BLANK_LINE}|#{IAL_BLOCK_START}|#{EOB_MARKER}|^#{OPT_SPACE}#{LAZY_END_HTML_STOP}|^#{OPT_SPACE}#{LAZY_END_HTML_START}|\Z)/
- LINE_BREAK =
/( |\\\\)(?=\n)/
- ABBREV_DEFINITION_START =
/^#{OPT_SPACE}\*\[(.+?)\]:(.*?)\n/
- SQ_PUNCT =
'[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'
- SQ_CLOSE =
%![^\ \\\\\t\r\n\\[{(-]!
- SQ_RULES =
[ [/("|')(?=#{SQ_PUNCT}\B)/, [:rquote1]], # Special case for double sets of quotes, e.g.: # <p>He said, "'Quoted' words in a larger quote."</p> [/(\s?)"'(?=\w)/, [1, :ldquo, :lsquo]], [/(\s?)'"(?=\w)/, [1, :lsquo, :ldquo]], # Special case for decade abbreviations (the '80s): [/(\s?)'(?=\d\ds)/, [1, :rsquo]], # Get most opening single/double quotes: [/(\s)('|")(?=\w)/, [1, :lquote2]], # Single/double closing quotes: [/(#{SQ_CLOSE})('|")/, [1, :rquote2]], # Special case for e.g. "<i>Custer</i>'s Last Stand." [/("|')(\s|s\b|$)/, [:rquote1, 2]], # Any remaining single quotes should be opening ones: [/(.?)'/m, [1, :lsquo]], [/(.?)"/m, [1, :ldquo]], ]
- SQ_SUBSTS =
‘“
{ [:rquote1, '"'] => :rdquo, [:rquote1, "'"] => :rsquo, [:rquote2, '"'] => :rdquo, [:rquote2, "'"] => :rsquo, [:lquote1, '"'] => :ldquo, [:lquote1, "'"] => :lsquo, [:lquote2, '"'] => :ldquo, [:lquote2, "'"] => :lsquo, }
- SMART_QUOTES_RE =
/[^\\]?["']/
- ESCAPED_CHARS =
/\\([\\.*_+`()\[\]{}#!:|"'\$=-])/
- ALD_ID_CHARS =
/[\w-]/
- ALD_ANY_CHARS =
/\\\}|[^\}]/
- ALD_ID_NAME =
/\w#{ALD_ID_CHARS}*/
- ALD_TYPE_KEY_VALUE_PAIR =
/(#{ALD_ID_NAME})=("|')((?:\\\}|\\\2|[^\}\2])*?)\2/
- ALD_TYPE_CLASS_NAME =
/\.(#{ALD_ID_NAME})/
- ALD_TYPE_ID_NAME =
/#(\w[\w:-]*)/
- ALD_TYPE_REF =
/(#{ALD_ID_NAME})/
- ALD_TYPE_ANY =
/(?:\A|\s)(?:#{ALD_TYPE_KEY_VALUE_PAIR}|#{ALD_TYPE_ID_NAME}|#{ALD_TYPE_CLASS_NAME}|#{ALD_TYPE_REF})(?=\s|\Z)/
- ALD_START =
/^#{OPT_SPACE}\{:(#{ALD_ID_NAME}):(#{ALD_ANY_CHARS}+)\}\s*?\n/
- IAL_BLOCK =
/\{:(?!:|\/)(#{ALD_ANY_CHARS}+)\}\s*?\n/
- IAL_BLOCK_START =
/^#{OPT_SPACE}#{IAL_BLOCK}/
- IAL_SPAN_START =
/\{:(#{ALD_ANY_CHARS}+)\}/
- BLOCK_BOUNDARY =
/#{BLANK_LINE}|#{EOB_MARKER}|#{IAL_BLOCK_START}|\Z/
- HR_START =
/^#{OPT_SPACE}(\*|-|_)[ \t]*\1[ \t]*\1[ \t]*(\1|[ \t])*\n/
- TYPOGRAPHIC_SYMS =
[['---', :mdash], ['--', :ndash], ['-. ', :qdash_space], # ['- ', :qdash_space], ['...', :hellip], ['\\<<', '<<'], ['\\>>', '>>'], ['<< ', :laquo_space], [' >>', :raquo_space], ['<<', :laquo], ['>>', :raquo]]
- TYPOGRAPHIC_SYMS_SUBST =
- TYPOGRAPHIC_SYMS_RE =
/#{TYPOGRAPHIC_SYMS.map {|k,v| Regexp.escape(k)}.join('|')}/
Constants included from Html::Parser
Constants included from Html::Constants
Html::Constants::HTML_ATTRIBUTE_RE, Html::Constants::HTML_BLOCK_ELEMENTS, Html::Constants::HTML_COMMENT_RE, Html::Constants::HTML_DOCTYPE_RE, Html::Constants::HTML_ELEMENTS_WITHOUT_BODY, Html::Constants::HTML_ENTITY_RE, Html::Constants::HTML_INSTRUCTION_RE, Html::Constants::HTML_PARSE_AS, Html::Constants::HTML_PARSE_AS_BLOCK, Html::Constants::HTML_PARSE_AS_RAW, Html::Constants::HTML_PARSE_AS_SPAN, Html::Constants::HTML_SPAN_ELEMENTS, Html::Constants::HTML_TAG_CLOSE_RE, Html::Constants::HTML_TAG_RE
Constants included from Newstile
Instance Attribute Summary collapse
-
#doc ⇒ Object
readonly
Returns the value of attribute doc.
-
#options ⇒ Object
readonly
Returns the value of attribute options.
-
#tree ⇒ Object
readonly
Returns the value of attribute tree.
Instance Method Summary collapse
-
#add_link(el, href, title, alt_text = nil) ⇒ Object
This helper methods adds the approriate attributes to the element
el
of typea
orimg
and the element itself to the@tree
. -
#after_block_boundary? ⇒ Boolean
Return
true
if we are after a block boundary. -
#before_block_boundary? ⇒ Boolean
Return
true
if we are before a block boundary. - #handle_extension(name, opts, body, type) ⇒ Object
- #handle_newstile_html_tag(el, closed) ⇒ Object
-
#html_parse_type(val) ⇒ Object
Return the HTML parse type defined by the string
val
, i.e. -
#initialize(doc) ⇒ Newstile
constructor
Create a new Newstile parser object for the Newstile::Document
doc
. -
#parse(source) ⇒ Object
The source string provided on initialization is parsed and the created
tree
is returned. -
#parse_abbrev_definition ⇒ Object
Parse the link definition at the current location.
-
#parse_ald ⇒ Object
Parse the attribute list definition at the current location.
-
#parse_attribute_list(str, opts) ⇒ Object
Parse the string
str
and extract all attributes and add all found attributes to the hashopts
. -
#parse_atx_header ⇒ Object
Parse the Atx header at the current location.
-
#parse_autolink ⇒ Object
Parse the autolink at the current location.
-
#parse_blank_line ⇒ Object
Parse the blank line at the current postition.
-
#parse_block_extension ⇒ Object
Parse the extension block at the current location.
-
#parse_block_html ⇒ Object
Parse the HTML at the current position as block level HTML.
-
#parse_block_ial ⇒ Object
Parse the inline attribute list at the current location.
-
#parse_block_math ⇒ Object
Parse the math block at the current location.
-
#parse_blockquote ⇒ Object
Parse the blockquote at the current location.
-
#parse_codeblock ⇒ Object
Parse the indented codeblock at the current location.
-
#parse_codeblock_fenced ⇒ Object
Parse the fenced codeblock at the current location.
-
#parse_codespan ⇒ Object
Parse the codespan at the current scanner location.
-
#parse_definition_list ⇒ Object
Parse the ordered or unordered list at the current location.
-
#parse_emphasis ⇒ Object
Parse the emphasis at the current location.
-
#parse_eob_marker ⇒ Object
Parse the EOB marker at the current location.
-
#parse_escaped_chars ⇒ Object
Parse the backslash-escaped character at the current location.
- #parse_extension_start_tag(type) ⇒ Object
-
#parse_first_list_line(indentation, content) ⇒ Object
Used for parsing the first line of a list item or a definition, i.e.
-
#parse_footnote_definition ⇒ Object
Parse the foot note definition at the current location.
-
#parse_footnote_marker ⇒ Object
Parse the footnote marker at the current location.
-
#parse_horizontal_rule ⇒ Object
Parse the horizontal rule at the current location.
-
#parse_html_entity ⇒ Object
Parse the HTML entity at the current location.
-
#parse_inline_math ⇒ Object
Parse the inline math at the current location.
-
#parse_line_break ⇒ Object
Parse the line break at the current location.
-
#parse_link ⇒ Object
Parse the link at the current scanner position.
-
#parse_link_definition ⇒ Object
Parse the link definition at the current location.
-
#parse_list ⇒ Object
Parse the ordered or unordered list at the current location.
-
#parse_newstile_header ⇒ Object
Parse the newstile header at the current location.
-
#parse_newstile_link ⇒ Object
Parse the link definition at the current location.
-
#parse_paragraph ⇒ Object
Parse the paragraph at the current location.
-
#parse_setext_header ⇒ Object
Parse the Setext header at the current location.
-
#parse_smart_quotes ⇒ Object
Parse the smart quotes at current location.
-
#parse_span_extension ⇒ Object
Parse the extension span at the current location.
-
#parse_span_html ⇒ Object
Parse the HTML at the current position as span level HTML.
-
#parse_span_ial ⇒ Object
Parse the inline attribute list at the current location.
-
#parse_summary ⇒ Object
Parse the blockquote at the current location.
-
#parse_table ⇒ Object
Parse the table at the current location.
-
#parse_typographic_syms ⇒ Object
Parse the typographic symbols at the current location.
-
#replace_abbreviations(el, regexps = nil) ⇒ Object
Replace the abbreviation text with elements.
-
#update_ial_with_ial(ial, opts) ⇒ Object
Update the
ial
with the information from the inline attribute listopts
.
Methods included from Html::Parser
#handle_html_script_tag, #handle_html_start_tag, #parse_raw_html
Methods included from Newstile
Methods inherited from Base
#adapt_source, #add_text, #extract_string, parse, #warning
Constructor Details
#initialize(doc) ⇒ Newstile
Create a new Newstile parser object for the Newstile::Document doc
.
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
# File 'lib/newstile/parser/newstile.rb', line 81 def initialize(doc) super(doc) @src = nil @tree = nil @stack = [] @text_type = :raw_text @block_ial = nil @doc.parse_infos[:ald] = {} @doc.parse_infos[:link_defs] = {} @doc.parse_infos[:abbrev_defs] = {} @doc.parse_infos[:footnotes] = {} @block_parsers = [:blank_line, :codeblock, :codeblock_fenced, :blockquote, :summary, :table, :atx_header, :setext_header, :newstile_header, :horizontal_rule, :list, :definition_list, :link_definition, :block_html, :footnote_definition, :abbrev_definition, :ald, :block_math, :block_extension, :block_ial, :eob_marker, :paragraph] @span_parsers = [ :emphasis, :codespan, :autolink, :span_html, :footnote_marker, :link, :newstile_link, :smart_quotes, :inline_math, :span_extension, :span_ial, :html_entity, :typographic_syms, :line_break, :escaped_chars] end |
Instance Attribute Details
#doc ⇒ Object (readonly)
Returns the value of attribute doc.
77 78 79 |
# File 'lib/newstile/parser/newstile.rb', line 77 def doc @doc end |
#options ⇒ Object (readonly)
Returns the value of attribute options.
78 79 80 |
# File 'lib/newstile/parser/newstile.rb', line 78 def @options end |
#tree ⇒ Object (readonly)
Returns the value of attribute tree.
76 77 78 |
# File 'lib/newstile/parser/newstile.rb', line 76 def tree @tree end |
Instance Method Details
#add_link(el, href, title, alt_text = nil) ⇒ Object
This helper methods adds the approriate attributes to the element el
of type a
or img
and the element itself to the @tree
.
46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/newstile/parser/newstile/link.rb', line 46 def add_link(el, href, title, alt_text = nil) if el.type == :a el.attr['href'] = href else el.attr['src'] = href el.attr['alt'] = alt_text el.children.clear end el.attr['title'] = title if title @tree.children << el end |
#after_block_boundary? ⇒ Boolean
Return true
if we are after a block boundary.
34 35 36 37 |
# File 'lib/newstile/parser/newstile/block_boundary.rb', line 34 def after_block_boundary? !@tree.children.last || @tree.children.last.type == :blank || (@tree.children.last.type == :eob && @tree.children.last.value.nil?) || @block_ial end |
#before_block_boundary? ⇒ Boolean
Return true
if we are before a block boundary.
40 41 42 |
# File 'lib/newstile/parser/newstile/block_boundary.rb', line 40 def before_block_boundary? @src.check(BLOCK_BOUNDARY) end |
#handle_extension(name, opts, body, type) ⇒ Object
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
# File 'lib/newstile/parser/newstile/extension.rb', line 67 def handle_extension(name, opts, body, type) case name when 'comment' @tree.children << Element.new(:comment, body, nil, :category => type) if body.kind_of?(String) true when 'nomarkdown' @tree.children << Element.new(:raw, body, nil, :category => type, :type => opts['type'].to_s.split(/\s+/)) if body.kind_of?(String) true when 'options' opts.select do |k,v| k = k.to_sym if Newstile::Options.defined?(k) @doc.[k] = Newstile::Options.parse(k, v) rescue @doc.[k] false else true end end.each do |k,v| warning("Unknown newstile option '#{k}'") end @tree.children << Element.new(:eob, :extension) if type == :block true else false end end |
#handle_newstile_html_tag(el, closed) ⇒ Object
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/newstile/parser/newstile/html.rb', line 31 def handle_newstile_html_tag(el, closed) parse_type = if @tree.type != :html_element || @tree.[:parse_type] != :raw (@doc.[:parse_block_html] ? HTML_PARSE_AS[el.value] : :raw) else :raw end if val = html_parse_type(el.attr.delete('markdown')) parse_type = (val == :default ? HTML_PARSE_AS[el.value] : val) end @src.scan(/[ \t]*\n/) if parse_type == :block el.[:parse_type] = parse_type if !closed if parse_type == :block end_tag_found = parse_blocks(el) if !end_tag_found warning("Found no end tag for '#{el.value}' - auto-closing it") end elsif parse_type == :span curpos = @src.pos if result = @src.scan_until(/(?=<\/#{el.value}\s*>)/m) add_text(extract_string(curpos...@src.pos, @src), el) @src.scan(HTML_TAG_CLOSE_RE) else add_text(@src.scan(/.*/m), el) warning("Found no end tag for '#{el.value}' - auto-closing it") end else parse_raw_html(el, &method(:handle_newstile_html_tag)) end @src.scan(/[ \t]*\n/) unless (@tree.type == :html_element && @tree.[:parse_type] == :raw) end end |
#html_parse_type(val) ⇒ Object
Return the HTML parse type defined by the string val
, i.e. raw when “0”, default parsing (return value nil
) when “1”, span parsing when “span” and block parsing when “block”. If val
is nil, then the default parsing mode is used.
69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/newstile/parser/newstile/html.rb', line 69 def html_parse_type(val) case val when "0" then :raw when "1" then :default when "span" then :span when "block" then :block when NilClass then nil else warning("Invalid markdown attribute val '#{val}', using default") nil end end |
#parse(source) ⇒ Object
The source string provided on initialization is parsed and the created tree
is returned.
113 114 115 116 117 118 119 120 121 122 123 |
# File 'lib/newstile/parser/newstile.rb', line 113 def parse(source) configure_parser tree = Element.new(:root) parse_blocks(tree, adapt_source(source)) update_tree(tree) replace_abbreviations(tree) @doc.parse_infos[:footnotes].each do |name, data| update_tree(data[:content]) end tree end |
#parse_abbrev_definition ⇒ Object
Parse the link definition at the current location.
30 31 32 33 34 35 36 37 |
# File 'lib/newstile/parser/newstile/abbreviation.rb', line 30 def parse_abbrev_definition @src.pos += @src.matched_size abbrev_id, abbrev_text = @src[1], @src[2].strip warning("Duplicate abbreviation ID '#{abbrev_id}' - overwriting") if @doc.parse_infos[:abbrev_defs][abbrev_id] @doc.parse_infos[:abbrev_defs][abbrev_id] = abbrev_text @tree.children << Element.new(:eob, :abbrev_def) true end |
#parse_ald ⇒ Object
Parse the attribute list definition at the current location.
67 68 69 70 71 72 |
# File 'lib/newstile/parser/newstile/attribute_list.rb', line 67 def parse_ald @src.pos += @src.matched_size parse_attribute_list(@src[2], @doc.parse_infos[:ald][@src[1]] ||= Utils::OrderedHash.new) @tree.children << Element.new(:eob, :ald) true end |
#parse_attribute_list(str, opts) ⇒ Object
Parse the string str
and extract all attributes and add all found attributes to the hash opts
.
29 30 31 32 33 34 35 36 37 38 39 40 41 |
# File 'lib/newstile/parser/newstile/attribute_list.rb', line 29 def parse_attribute_list(str, opts) str.scan(ALD_TYPE_ANY).each do |key, sep, val, id_attr, class_attr, ref| if ref (opts[:refs] ||= []) << ref elsif class_attr opts['class'] = ((opts['class'] || '') + " #{class_attr}").lstrip elsif id_attr opts['id'] = id_attr else opts[key] = val.gsub(/\\(\}|#{sep})/, "\\1") end end end |
#parse_atx_header ⇒ Object
Parse the Atx header at the current location.
51 52 53 54 55 56 57 58 59 60 61 |
# File 'lib/newstile/parser/newstile/header.rb', line 51 def parse_atx_header return false if !after_block_boundary? result = @src.scan(ATX_HEADER_MATCH) level, text, id = @src[1], @src[2].strip, @src[3] el = new_block_el(:header, nil, nil, :level => level.length, :raw_text => text) add_text(text, el) el.attr['id'] = id if id @tree.children << el true end |
#parse_autolink ⇒ Object
Parse the autolink at the current location.
42 43 44 45 46 47 48 49 |
# File 'lib/newstile/parser/newstile/autolink.rb', line 42 def parse_autolink @src.pos += @src.matched_size href = @src[1] href= "mailto:#{href}" if @src[2].nil? el = Element.new(:a, nil, {'href' => href}) add_text(@src[1].sub(/^mailto:/, ''), el) @tree.children << el end |
#parse_blank_line ⇒ Object
Parse the blank line at the current postition.
30 31 32 33 34 35 36 37 38 |
# File 'lib/newstile/parser/newstile/blank_line.rb', line 30 def parse_blank_line @src.pos += @src.matched_size if @tree.children.last && @tree.children.last.type == :blank @tree.children.last.value += @src.matched else @tree.children << new_block_el(:blank, @src.matched) end true end |
#parse_block_extension ⇒ Object
Parse the extension block at the current location.
102 103 104 |
# File 'lib/newstile/parser/newstile/extension.rb', line 102 def parse_block_extension parse_extension_start_tag(:block) end |
#parse_block_html ⇒ Object
Parse the HTML at the current position as block level HTML.
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
# File 'lib/newstile/parser/newstile/html.rb', line 86 def parse_block_html if result = @src.scan(HTML_COMMENT_RE) @tree.children << Element.new(:xml_comment, result, nil, :category => :block) @src.scan(/[ \t]*\n/) true elsif result = @src.scan(HTML_INSTRUCTION_RE) @tree.children << Element.new(:xml_pi, result, nil, :category => :block) @src.scan(/[ \t]*\n/) true else if result = @src.check(/^#{OPT_SPACE}#{HTML_TAG_RE}/) && !HTML_SPAN_ELEMENTS.include?(@src[1]) @src.pos += @src.matched_size handle_html_start_tag(&method(:handle_newstile_html_tag)) Newstile::Parser::Html::ElementConverter.new(@doc).process(@tree.children.last) if @doc.[:html_to_native] true elsif result = @src.check(/^#{OPT_SPACE}#{HTML_TAG_CLOSE_RE}/) && !HTML_SPAN_ELEMENTS.include?(@src[1]) name = @src[1] if @tree.type == :html_element && @tree.value == name @src.pos += @src.matched_size throw :stop_block_parsing, :found else false end else false end end end |
#parse_block_ial ⇒ Object
Parse the inline attribute list at the current location.
80 81 82 83 84 85 86 87 88 89 |
# File 'lib/newstile/parser/newstile/attribute_list.rb', line 80 def parse_block_ial @src.pos += @src.matched_size if @tree.children.last && @tree.children.last.type != :blank && @tree.children.last.type != :eob parse_attribute_list(@src[1], @tree.children.last.[:ial] ||= Utils::OrderedHash.new) @tree.children << Element.new(:eob, :ial) unless @src.check(IAL_BLOCK_START) else parse_attribute_list(@src[1], @block_ial = Utils::OrderedHash.new) end true end |
#parse_block_math ⇒ Object
Parse the math block at the current location.
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
# File 'lib/newstile/parser/newstile/math.rb', line 32 def parse_block_math if !after_block_boundary? return false elsif @src[1] @src.scan(/^#{OPT_SPACE}\\/) return false end orig_pos = @src.pos @src.pos += @src.matched_size data = @src[2] if before_block_boundary? @tree.children << new_block_el(:math, data) true else @src.pos = orig_pos false end end |
#parse_blockquote ⇒ Object
Parse the blockquote at the current location.
35 36 37 38 39 40 |
# File 'lib/newstile/parser/newstile/blockquote.rb', line 35 def parse_blockquote el = new_block_el(:blockquote) @tree.children << el parse_blocks(el, @src.scan(BLOCKQUOTE_MATCH).gsub!(BLOCKQUOTE_START, '')) true end |
#parse_codeblock ⇒ Object
Parse the indented codeblock at the current location.
36 37 38 39 |
# File 'lib/newstile/parser/newstile/codeblock.rb', line 36 def parse_codeblock @tree.children << new_block_el(:codeblock, @src.scan(CODEBLOCK_MATCH).gsub(/\n( {0,3}\S)/, ' \\1').gsub!(INDENT, '')) true end |
#parse_codeblock_fenced ⇒ Object
Parse the fenced codeblock at the current location.
47 48 49 50 51 52 53 54 55 |
# File 'lib/newstile/parser/newstile/codeblock.rb', line 47 def parse_codeblock_fenced if @src.check(FENCED_CODEBLOCK_MATCH) @src.pos += @src.matched_size @tree.children << new_block_el(:codeblock, @src[2]) true else false end end |
#parse_codespan ⇒ Object
Parse the codespan at the current scanner location.
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/newstile/parser/newstile/codespan.rb', line 30 def parse_codespan result = @src.scan(CODESPAN_DELIMITER) simple = (result.length == 1) reset_pos = @src.pos if simple && @src.pre_match =~ /\s\Z/ && @src.match?(/\s/) add_text(result) return end text = @src.scan_until(/#{result}/) if text text.sub!(/#{result}\Z/, '') if !simple text = text[1..-1] if text[0..0] == ' ' text = text[0..-2] if text[-1..-1] == ' ' end @tree.children << Element.new(:codespan, text) else @src.pos = reset_pos add_text(result) end end |
#parse_definition_list ⇒ Object
Parse the ordered or unordered list at the current location.
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 |
# File 'lib/newstile/parser/newstile/list.rb', line 147 def parse_definition_list children = @tree.children if !children.last || (children.length == 1 && children.last.type != :p ) || (children.length >= 2 && children[-1].type != :p && (children[-1].type != :blank || children[-1].value != "\n" || children[-2].type != :p)) return false end first_as_para = false deflist = new_block_el(:dl) para = @tree.children.pop if para.type == :blank para = @tree.children.pop first_as_para = true end para.children.first.value.split("\n").each do |term| el = Element.new(:dt) el.children << Element.new(:raw_text, term) deflist.children << el end item = nil content_re, lazy_re, indent_re = nil def_start_re = DEFINITION_LIST_START last_is_blank = false while !@src.eos? if @src.scan(def_start_re) item = Element.new(:dd) item.[:first_as_para] = first_as_para item.value, indentation, content_re, lazy_re, indent_re = parse_first_list_line(@src[1].length, @src[2]) deflist.children << item item.value.sub!(/^#{IAL_SPAN_START}\s*/) do |match| parse_attribute_list($~[1], item.[:ial] ||= {}) '' end def_start_re = /^( {0,#{[3, indentation - 1].min}}:)([\t| ].*?\n)/ first_as_para = false last_is_blank = false elsif @src.check(EOB_MARKER) break elsif (result = @src.scan(content_re)) || (!last_is_blank && (result = @src.scan(lazy_re))) result.sub!(/^(\t+)/) { " "*4*($1 ? $1.length : 0) } result.sub!(indent_re, '') item.value << result first_as_para = false last_is_blank = false elsif result = @src.scan(BLANK_LINE) first_as_para = true item.value << result last_is_blank = true else break end end last = nil deflist.children.each do |it| next if it.type == :dt parse_blocks(it, it.value) it.value = nil next if it.children.size == 0 if it.children.last.type == :blank last = it.children.pop else last = nil end if it.children.first.type == :p && !it..delete(:first_as_para) it.children.first.children.first.value += "\n" if it.children.size > 1 it.children.first.[:transparent] = true end end if @tree.children.length >= 1 && @tree.children.last.type == :dl @tree.children[-1].children += deflist.children elsif @tree.children.length >= 2 && @tree.children[-1].type == :blank && @tree.children[-2].type == :dl @tree.children.pop @tree.children[-1].children += deflist.children else @tree.children << deflist end @tree.children << last if !last.nil? true end |
#parse_emphasis ⇒ Object
Parse the emphasis at the current location.
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
# File 'lib/newstile/parser/newstile/emphasis.rb', line 30 def parse_emphasis result = @src.scan(EMPHASIS_START) element = (result.length == 2 ? :strong : :em) type = (result =~ /_/ ? '_' : '*') reset_pos = @src.pos if (type == '_' && @src.pre_match =~ /[[:alpha:]]\z/ && @src.check(/[[:alpha:]]/)) || @src.check(/\s/) || @tree.type == element || @stack.any? {|el, _| el.type == element} add_text(result) return end sub_parse = lambda do |delim, elem| el = Element.new(elem) stop_re = /#{Regexp.escape(delim)}/ found = parse_spans(el, stop_re) do (@src.pre_match[-1, 1] !~ /\s/) && (elem != :em || !@src.match?(/#{Regexp.escape(delim*2)}(?!#{Regexp.escape(delim)})/)) && (type != '_' || !@src.match?(/#{Regexp.escape(delim)}[[:alpha:]]/)) && el.children.size > 0 end [found, el, stop_re] end found, el, stop_re = sub_parse.call(result, element) if !found && element == :strong && @tree.type != :em @src.pos = reset_pos - 1 found, el, stop_re = sub_parse.call(type, :em) end if found @src.scan(stop_re) @tree.children << el else @src.pos = reset_pos add_text(result) end end |
#parse_eob_marker ⇒ Object
Parse the EOB marker at the current location.
30 31 32 33 34 |
# File 'lib/newstile/parser/newstile/eob.rb', line 30 def parse_eob_marker @src.pos += @src.matched_size @tree.children << new_block_el(:eob) true end |
#parse_escaped_chars ⇒ Object
Parse the backslash-escaped character at the current location.
30 31 32 33 |
# File 'lib/newstile/parser/newstile/escaped_chars.rb', line 30 def parse_escaped_chars @src.pos += @src.matched_size add_text(@src[1]) end |
#parse_extension_start_tag(type) ⇒ Object
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
# File 'lib/newstile/parser/newstile/extension.rb', line 29 def parse_extension_start_tag(type) orig_pos = @src.pos @src.pos += @src.matched_size error_block = lambda do |msg| warning(msg) @src.pos = orig_pos add_text(@src.scan(/./)) if type == :span false end if @src[4] || @src.matched == '{:/}' name = (@src[4] ? "for '#{@src[4]}' " : '') return error_block.call("Invalid extension stop tag #{name}found - ignoring it") end ext = @src[1] opts = {} body = nil parse_attribute_list(@src[2] || '', opts) if !@src[3] stop_re = (type == :block ? /#{EXT_BLOCK_STOP_STR % ext}/ : /#{EXT_STOP_STR % ext}/) if result = @src.scan_until(stop_re) body = result.sub!(stop_re, '') body.chomp! if type == :block else return error_block.call("No stop tag for extension '#{ext}' found - ignoring it") end end if !handle_extension(ext, opts, body, type) error_block.call("Invalid extension with name '#{ext}' specified - ignoring it") else true end end |
#parse_first_list_line(indentation, content) ⇒ Object
Used for parsing the first line of a list item or a definition, i.e. the line with list item marker or the definition marker.
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/newstile/parser/newstile/list.rb', line 36 def parse_first_list_line(indentation, content) if content =~ LIST_ITEM_IAL indentation = 4 else while content =~ /^ *\t/ temp = content.scan(/^ */).first.length + indentation content.sub!(/^( *)(\t+)/) {$1 + " "*(4 - (temp % 4)) + " "*($2.length - 1)*4} end indentation += content.scan(/^ */).first.length end content.sub!(/^\s*/, '') indent_re = /^ {#{indentation}}/ content_re = /^(?:(?:\t| {4}){#{indentation / 4}} {#{indentation % 4}}|(?:\t| {4}){#{indentation / 4 + 1}}).*\S.*\n/ lazy_re = /(?!^ {0,#{[indentation, 3].min}}(?:#{IAL_BLOCK}|#{LAZY_END_HTML_STOP}|#{LAZY_END_HTML_START})).*\S.*\n/ [content, indentation, content_re, lazy_re, indent_re] end |
#parse_footnote_definition ⇒ Object
Parse the foot note definition at the current location.
34 35 36 37 38 39 40 41 42 43 |
# File 'lib/newstile/parser/newstile/footnote.rb', line 34 def parse_footnote_definition @src.pos += @src.matched_size el = Element.new(:footnote_def) parse_blocks(el, @src[2].gsub(INDENT, '')) warning("Duplicate footnote name '#{@src[1]}' - overwriting") if @doc.parse_infos[:footnotes][@src[1]] (@doc.parse_infos[:footnotes][@src[1]] = {})[:content] = el @tree.children << Element.new(:eob, :footnote_def) true end |
#parse_footnote_marker ⇒ Object
Parse the footnote marker at the current location.
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
# File 'lib/newstile/parser/newstile/footnote.rb', line 50 def parse_footnote_marker @src.pos += @src.matched_size fn_def = @doc.parse_infos[:footnotes][@src[1]] if fn_def valid = fn_def[:marker] && fn_def[:marker].[:stack][0..-2].zip(fn_def[:marker].[:stack][1..-1]).all? do |par, child| par.children.include?(child) end if !fn_def[:marker] || !valid fn_def[:marker] = Element.new(:footnote, nil, nil, :name => @src[1]) fn_def[:marker].[:stack] = [@stack.map {|s| s.first}, @tree, fn_def[:marker]].flatten.compact @tree.children << fn_def[:marker] else warning("Footnote marker '#{@src[1]}' already appeared in document, ignoring newly found marker") add_text(@src.matched) end else warning("Footnote definition for '#{@src[1]}' not found") add_text(@src.matched) end end |
#parse_horizontal_rule ⇒ Object
Parse the horizontal rule at the current location.
30 31 32 33 34 |
# File 'lib/newstile/parser/newstile/horizontal_rule.rb', line 30 def parse_horizontal_rule @src.pos += @src.matched_size @tree.children << new_block_el(:hr) true end |
#parse_html_entity ⇒ Object
Parse the HTML entity at the current location.
30 31 32 33 34 |
# File 'lib/newstile/parser/newstile/html_entity.rb', line 30 def parse_html_entity @src.pos += @src.matched_size @tree.children << Element.new(:entity, ::Newstile::Utils::Entities.entity(@src[1] || (@src[2] && @src[2].to_i) || @src[3].hex), nil, :original => @src.matched) end |
#parse_inline_math ⇒ Object
Parse the inline math at the current location.
56 57 58 59 |
# File 'lib/newstile/parser/newstile/math.rb', line 56 def parse_inline_math @src.pos += @src.matched_size @tree.children << Element.new(:math, @src[1], nil, :category => :span) end |
#parse_line_break ⇒ Object
Parse the line break at the current location.
30 31 32 33 |
# File 'lib/newstile/parser/newstile/line_break.rb', line 30 def parse_line_break @src.pos += @src.matched_size @tree.children << Element.new(:br) end |
#parse_link ⇒ Object
Parse the link at the current scanner position. This method is used to parse normal links as well as image links.
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
# File 'lib/newstile/parser/newstile/link.rb', line 65 def parse_link result = @src.scan(LINK_START) reset_pos = @src.pos link_type = (result =~ /^!/ ? :img : :a) # no nested links allowed if link_type == :a && (@tree.type == :img || @tree.type == :a || @stack.any? {|t,s| t && (t.type == :img || t.type == :a)}) add_text(result) return end el = Element.new(link_type) stop_re = /\]|!?\[/ count = 1 found = parse_spans(el, stop_re) do case @src.matched when "[", "![" count += 1 when "]" count -= 1 end count - el.children.select {|c| c.type == :img}.size == 0 end if !found || (link_type == :a && el.children.empty?) @src.pos = reset_pos add_text(result) return end alt_text = extract_string(reset_pos...@src.pos, @src) conv_link_id = alt_text.gsub(/(\s|\n)+/m, ' ').gsub(LINK_ID_NON_CHARS, '').downcase @src.scan(stop_re) # reference style link or no link url if @src.scan(LINK_INLINE_ID_RE) || !@src.check(/\(/) link_id = (@src[1] || conv_link_id).downcase if link_id.empty? @src.pos = reset_pos add_text(result) elsif @doc.parse_infos[:link_defs].has_key?(link_id) add_link(el, @doc.parse_infos[:link_defs][link_id].first, @doc.parse_infos[:link_defs][link_id].last, alt_text) else warning("No link definition for link ID '#{link_id}' found") @src.pos = reset_pos add_text(result) end return end # link url in parentheses if @src.scan(/\(<(.*?)>/) link_url = @src[1] if @src.scan(/\)/) add_link(el, link_url, nil, alt_text) return end else link_url = '' re = /\(|\)|\s(?=['"])/ nr_of_brackets = 0 while temp = @src.scan_until(re) link_url += temp case @src.matched when /\s/ break when '(' nr_of_brackets += 1 when ')' nr_of_brackets -= 1 break if nr_of_brackets == 0 end end link_url = link_url[1..-2].strip if nr_of_brackets == 0 add_link(el, link_url, nil, alt_text) return end end if @src.scan(LINK_INLINE_TITLE_RE) add_link(el, link_url, @src[2], alt_text) else @src.pos = reset_pos add_text(result) end end |
#parse_link_definition ⇒ Object
Parse the link definition at the current location.
33 34 35 36 37 38 39 40 |
# File 'lib/newstile/parser/newstile/link.rb', line 33 def parse_link_definition @src.pos += @src.matched_size link_id, link_url, link_title = @src[1].downcase, @src[2] || @src[3], @src[5] warning("Duplicate link ID '#{link_id}' - overwriting") if @doc.parse_infos[:link_defs][link_id] @doc.parse_infos[:link_defs][link_id] = [link_url, link_title] @tree.children << Element.new(:eob, :link_def) true end |
#parse_list ⇒ Object
Parse the ordered or unordered list at the current location.
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
# File 'lib/newstile/parser/newstile/list.rb', line 60 def parse_list type, list_start_re = (@src.check(LIST_START_UL) ? [:ul, LIST_START_UL] : [:ol, LIST_START_OL]) list = new_block_el(type) item = nil content_re, lazy_re, indent_re = nil eob_found = false nested_list_found = false last_is_blank = false while !@src.eos? if last_is_blank && @src.check(HR_START) break elsif @src.scan(EOB_MARKER) eob_found = true break elsif @src.scan(list_start_re) item = Element.new(:li) item.value, indentation, content_re, lazy_re, indent_re = parse_first_list_line(@src[1].length, @src[2]) list.children << item item.value.sub!(/^#{IAL_SPAN_START}\s*/) do |match| parse_attribute_list($~[1], item.[:ial] ||= {}) '' end list_start_re = (type == :ul ? /^( {0,#{[3, indentation - 1].min}}[+*-])([\t| ].*?\n)/ : /^( {0,#{[3, indentation - 1].min}}\d+\.)([\t| ].*?\n)/) nested_list_found = (item.value =~ LIST_START) last_is_blank = false elsif (result = @src.scan(content_re)) || (!last_is_blank && (result = @src.scan(lazy_re))) result.sub!(/^(\t+)/) { " "*4*($1 ? $1.length : 0) } result.sub!(indent_re, '') if !nested_list_found && result =~ LIST_START item.value << "^\n" nested_list_found = true end item.value << result last_is_blank = false elsif result = @src.scan(BLANK_LINE) nested_list_found = true last_is_blank = true item.value << result else break end end @tree.children << list last = nil list.children.each do |it| temp = Element.new(:temp) parse_blocks(temp, it.value) it.children = temp.children it.value = nil next if it.children.size == 0 # Handle the case where an EOB marker is inserted by a block IAL for the first paragraph it.children.delete_at(1) if it.children.first.type == :p && it.children.length >= 2 && it.children[1].type == :eob && it.children.first.[:ial] if it.children.first.type == :p && (it.children.length < 2 || it.children[1].type != :blank || (it == list.children.last && it.children.length == 2 && !eob_found)) && (list.children.last != it || list.children.size == 1 || list.children[0..-2].any? {|cit| cit.children.first.type != :p || cit.children.first.[:transparent]}) it.children.first.children.first.value += "\n" if it.children.size > 1 && it.children[1].type != :blank it.children.first.[:transparent] = true end if it.children.last.type == :blank last = it.children.pop else last = nil end end @tree.children << last if !last.nil? && !eob_found true end |
#parse_newstile_header ⇒ Object
Parse the newstile header at the current location.
69 70 71 72 73 74 75 76 77 78 79 |
# File 'lib/newstile/parser/newstile/header.rb', line 69 def parse_newstile_header return false if !after_block_boundary? result = @src.scan(NEWSTILE_HEADER_MATCH) level, text, id = @src[1], @src[2], @src[3] el = new_block_el(:header, nil, nil, :level => level.length, :raw_text => text) add_text(text, el) el.attr['id'] = id if id @tree.children << el true end |
#parse_newstile_link ⇒ Object
Parse the link definition at the current location.
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
# File 'lib/newstile/parser/newstile/link.rb', line 157 def parse_newstile_link link_type = (@src.string[@src.pos] == '!'[0] ? :img : :a) @src.pos += @src.matched_size add_text @src[1] if @src[1] case link_type when :a then text, href = @src[2], @src[3] el = Element.new(:a, nil, {'href' => href}) el.children = [ Element.new(:text, text) ] @tree.children << el when :img then alt, src = @src[2], @src[3] el = Element.new(:img, nil, {'src' => src, 'alt' => alt}) @tree.children << el end true end |
#parse_paragraph ⇒ Object
Parse the paragraph at the current location.
41 42 43 44 45 46 47 48 49 50 |
# File 'lib/newstile/parser/newstile/paragraph.rb', line 41 def parse_paragraph result = @src.scan(PARAGRAPH_MATCH) if @tree.children.last && @tree.children.last.type == :p @tree.children.last.children.first.value << "\n" << result.chomp else @tree.children << new_block_el(:p) @tree.children.last.children << Element.new(@text_type, result.lstrip.chomp) end true end |
#parse_setext_header ⇒ Object
Parse the Setext header at the current location.
33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/newstile/parser/newstile/header.rb', line 33 def parse_setext_header return false if !after_block_boundary? @src.pos += @src.matched_size text, id, level = @src[1].strip, @src[2], @src[3] el = new_block_el(:header, nil, nil, :level => (level == '-' ? 2 : 1), :raw_text => text) add_text(text, el) el.attr['id'] = id if id @tree.children << el true end |
#parse_smart_quotes ⇒ Object
Parse the smart quotes at current location.
199 200 201 202 203 204 205 206 207 208 209 |
# File 'lib/newstile/parser/newstile/smart_quotes.rb', line 199 def parse_smart_quotes regexp, substs = SQ_RULES.find {|reg, subst| @src.scan(reg)} substs.each do |subst| if subst.kind_of?(Integer) add_text(@src[subst].to_s) else val = SQ_SUBSTS[[subst, @src[subst.to_s[-1,1].to_i]]] || subst @tree.children << Element.new(:smart_quote, val) end end end |
#parse_span_extension ⇒ Object
Parse the extension span at the current location.
109 110 111 |
# File 'lib/newstile/parser/newstile/extension.rb', line 109 def parse_span_extension parse_extension_start_tag(:span) end |
#parse_span_html ⇒ Object
Parse the HTML at the current position as span level HTML.
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
# File 'lib/newstile/parser/newstile/html.rb', line 121 def parse_span_html if result = @src.scan(HTML_COMMENT_RE) @tree.children << Element.new(:xml_comment, result, nil, :category => :span) elsif result = @src.scan(HTML_INSTRUCTION_RE) @tree.children << Element.new(:xml_pi, result, nil, :category => :span) elsif result = @src.scan(HTML_TAG_CLOSE_RE) warning("Found invalidly used HTML closing tag for '#{@src[1]}'") add_text(result) elsif result = @src.scan(HTML_TAG_RE) if HTML_BLOCK_ELEMENTS.include?(@src[1]) warning("Found block HTML tag '#{@src[1]}' in span level text") add_text(result) return end reset_pos = @src.pos attrs = Utils::OrderedHash.new @src[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val.gsub(/\n+/, ' ')} do_parsing = (HTML_PARSE_AS_RAW.include?(@src[1]) || @tree.[:parse_type] == :raw ? false : @doc.[:parse_span_html]) if val = html_parse_type(attrs.delete('markdown')) if val == :block warning("Cannot use block level parsing in span level HTML tag - using default mode") elsif val == :span do_parsing = true elsif val == :default do_parsing = !HTML_PARSE_AS_RAW.include?(@src[1]) elsif val == :raw do_parsing = false end end el = Element.new(:html_element, @src[1], attrs, :category => :span, :parse_type => (do_parsing ? :span : :raw)) @tree.children << el stop_re = /<\/#{Regexp.escape(@src[1])}\s*>/ if !@src[4] && HTML_ELEMENTS_WITHOUT_BODY.include?(el.value) warning("The HTML tag '#{el.value}' cannot have any content - auto-closing it") elsif !@src[4] if parse_spans(el, stop_re, (do_parsing ? nil : [:span_html])) @src.scan(stop_re) else warning("Found no end tag for '#{el.value}' - auto-closing it") add_text(@src.scan(/.*/m), el) end end Newstile::Parser::Html::ElementConverter.new(@doc).process(el) if @doc.[:html_to_native] else add_text(@src.scan(/./)) end end |
#parse_span_ial ⇒ Object
Parse the inline attribute list at the current location.
96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/newstile/parser/newstile/attribute_list.rb', line 96 def parse_span_ial @src.pos += @src.matched_size if @tree.children.last && @tree.children.last.type != :text attr = Utils::OrderedHash.new parse_attribute_list(@src[1], attr) update_ial_with_ial(@tree.children.last.[:ial] ||= Utils::OrderedHash.new, attr) update_attr_with_ial(@tree.children.last.attr, attr) else warning("Ignoring span IAL because preceding element is just text") end end |
#parse_summary ⇒ Object
Parse the blockquote at the current location.
48 49 50 51 52 53 54 55 56 57 |
# File 'lib/newstile/parser/newstile/blockquote.rb', line 48 def parse_summary result = @src.scan(SUMMARY_MATCH).gsub!(SUMMARY_START, '') if @tree.children.last && @tree.children.last.type == :summary @tree.children.last.children.first.value << "\n" << result.chomp else @tree.children << new_block_el(:summary) @tree.children.last.children << Element.new(@text_type, result.lstrip.chomp) end true end |
#parse_table ⇒ Object
Parse the table at the current location.
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
# File 'lib/newstile/parser/newstile/table.rb', line 37 def parse_table return false if !after_block_boundary? orig_pos = @src.pos table = new_block_el(:table, nil, nil, :alignment => []) leading_pipe = (@src.check(TABLE_LINE) =~ /^\s*\|/) @src.scan(TABLE_SEP_LINE) rows = [] = false columns = 0 add_container = lambda do |type, force| if force || type != :tbody || ! cont = Element.new(type) cont.children, rows = rows, [] table.children << cont end end while !@src.eos? break if !@src.check(TABLE_LINE) if @src.scan(TABLE_SEP_LINE) && !rows.empty? if table.[:alignment].empty? && ! add_container.call(:thead, false) table.[:alignment] = @src[1].scan(TABLE_HSEP_ALIGN).map do |left, right| (left.empty? && right.empty? && :default) || (right.empty? && :left) || (left.empty? && :right) || :center end else # treat as normal separator line add_container.call(:tbody, false) end elsif @src.scan(TABLE_FSEP_LINE) add_container.call(:tbody, true) if !rows.empty? = true elsif @src.scan(TABLE_ROW_LINE) trow = Element.new(:tr) cells = (@src[1] + ' ').split(/\|/) i = 0 while i < cells.length - 1 backslashes = cells[i].scan(/\\+$/).first if backslashes && backslashes.length % 2 == 1 cells[i] = cells[i].chop + '|' + cells[i+1] cells.delete_at(i+1) else i += 1 end end cells.shift if leading_pipe && cells.first.strip.empty? cells.pop if cells.last.strip.empty? cells.each do |cell_text| tcell = Element.new(:td) tcell.children << Element.new(:raw_text, cell_text.strip) trow.children << tcell end columns = [columns, cells.length].max rows << trow else break end end if !before_block_boundary? @src.pos = orig_pos return false end add_container.call( ? :tfoot : :tbody, false) if !rows.empty? if !table.children.any? {|c| c.type == :tbody} warning("Found table without body - ignoring it") @src.pos = orig_pos return false end # adjust all table rows to have equal number of columns, same for alignment defs table.children.each do |kind| kind.children.each do |row| (columns - row.children.length).times do row.children << Element.new(:td) end row.children.each {|el| el.type = :th} if kind.type == :thead end end if table.[:alignment].length > columns table.[:alignment] = table.[:alignment][0...columns] else table.[:alignment] += [:default] * (columns - table.[:alignment].length) end @tree.children << table true end |
#parse_typographic_syms ⇒ Object
Parse the typographic symbols at the current location.
37 38 39 40 41 42 43 44 45 46 47 48 49 |
# File 'lib/newstile/parser/newstile/typographic_symbol.rb', line 37 def parse_typographic_syms @src.pos += @src.matched_size val = TYPOGRAPHIC_SYMS_SUBST[@src.matched] if val.kind_of?(Symbol) @tree.children << Element.new(:typographic_sym, val) elsif @src.matched == '\\<<' @tree.children << Element.new(:entity, ::Newstile::Utils::Entities.entity('lt')) @tree.children << Element.new(:entity, ::Newstile::Utils::Entities.entity('lt')) else @tree.children << Element.new(:entity, ::Newstile::Utils::Entities.entity('gt')) @tree.children << Element.new(:entity, ::Newstile::Utils::Entities.entity('gt')) end end |
#replace_abbreviations(el, regexps = nil) ⇒ Object
Replace the abbreviation text with elements.
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
# File 'lib/newstile/parser/newstile/abbreviation.rb', line 41 def replace_abbreviations(el, regexps = nil) return if @doc.parse_infos[:abbrev_defs].empty? if !regexps regexps = [Regexp.union(*@doc.parse_infos[:abbrev_defs].keys.map {|k| /#{Regexp.escape(k)}/})] regexps << /(?=(?:\W|^)#{regexps.first}(?!\w))/ # regexp should only match on word boundaries end el.children.map! do |child| if child.type == :text result = [] strscan = StringScanner.new(child.value) while temp = strscan.scan_until(regexps.last) temp += strscan.scan(/\W|^/) abbr = strscan.scan(regexps.first) result += [Element.new(:text, temp), Element.new(:abbreviation, abbr)] end result + [Element.new(:text, extract_string(strscan.pos..-1, strscan))] else replace_abbreviations(child, regexps) child end end.flatten! end |
#update_ial_with_ial(ial, opts) ⇒ Object
Update the ial
with the information from the inline attribute list opts
.
44 45 46 47 48 49 50 51 52 53 |
# File 'lib/newstile/parser/newstile/attribute_list.rb', line 44 def update_ial_with_ial(ial, opts) (ial[:refs] ||= []) << opts[:refs] opts.each do |k,v| if k == 'class' ial[k] = ((ial[k] || '') + " #{v}").lstrip elsif k.kind_of?(String) ial[k] = v end end end |