Class: Unmarkdown::Parser
- Inherits:
-
Object
- Object
- Unmarkdown::Parser
- Defined in:
- lib/unmarkdown/parser.rb
Constant Summary collapse
- BLOCK_ELEMENT_NAMES =
%w{h1 h2 h3 h4 h5 h6 blockquote pre hr ul ol li p div}.freeze
- AUTOLINK_URL_REGEX =
/((?:https?|ftp):[^'"\s]+)/i.freeze
- AUTOLINK_EMAIL_REGEX =
%r{([-.\w]+\@[-a-z0-9]+(?:\.[-a-z0-9]+)*\.[a-z]+)}i.freeze
Instance Method Summary collapse
-
#initialize(html, options = {}) ⇒ Parser
constructor
A new instance of Parser.
- #parse ⇒ Object
Constructor Details
#initialize(html, options = {}) ⇒ Parser
Returns a new instance of Parser.
9 10 11 12 |
# File 'lib/unmarkdown/parser.rb', line 9 def initialize(html, = {}) @html = html @options = end |
Instance Method Details
#parse ⇒ Object
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/unmarkdown/parser.rb', line 14 def parse # If the HTML fragment starts with a comment, it is ignored. Add an # enclosing body tag to ensure everything is included. html = @html unless html.include?('<body') html = "<body>#{@html}</body>" end # Setup document doc = Nokogiri::HTML(html) doc.encoding = 'UTF-8' # Reset bookkeeping @list = [] @list_position = [] # Parse the root node recursively root_node = doc.xpath('//body') markdown = parse_nodes(root_node.children) # Strip whitespace markdown.rstrip.gsub(/\n{2}+/, "\n\n") # TODO: Strip trailing whitespace end |