Class: Unmarkdown::Parser

Inherits:

Object

Object
Unmarkdown::Parser

show all

Defined in:: lib/unmarkdown/parser.rb

Constant Summary collapse

BLOCK_ELEMENT_NAMES =

%w{h1 h2 h3 h4 h5 h6 blockquote pre hr ul ol li p div}.freeze

AUTOLINK_URL_REGEX =

/((?:https?|ftp):[^'"\s]+)/i.freeze

AUTOLINK_EMAIL_REGEX =

%r{([-.\w]+\@[-a-z0-9]+(?:\.[-a-z0-9]+)*\.[a-z]+)}i.freeze

Instance Method Summary collapse

#initialize(html, options = {}) ⇒ Parser constructor

A new instance of Parser.
#parse ⇒ Object

Constructor Details

#initialize(html, options = {}) ⇒ `Parser`

Returns a new instance of Parser.

# File 'lib/unmarkdown/parser.rb', line 9

def initialize(html, options = {})
  @html = html
  @options = options
end

Instance Method Details

#parse ⇒ `Object`

# File 'lib/unmarkdown/parser.rb', line 14

def parse
  # If the HTML fragment starts with a comment, it is ignored. Add an
  # enclosing body tag to ensure everything is included.
  html = @html
  unless html.include?('<body')
    html = "<body>#{@html}</body>"
  end

  # Setup document
  doc = Nokogiri::HTML(html)
  doc.encoding = 'UTF-8'

  # Reset bookkeeping
  @list = []
  @list_position = []

  # Parse the root node recursively
  root_node = doc.xpath('//body')
  markdown = parse_nodes(root_node.children)

  # Strip whitespace
  markdown.rstrip.gsub(/\n{2}+/, "\n\n")

  # TODO: Strip trailing whitespace
end

Class: Unmarkdown::Parser

Constant Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(html, options = {}) ⇒ Parser

Instance Method Details

#parse ⇒ Object

#initialize(html, options = {}) ⇒ `Parser`

#parse ⇒ `Object`