Class: Wraptext::Parser

Inherits:

Object

Object
Wraptext::Parser

Defined in:: lib/wraptext/parser.rb

Constant Summary collapse

BLOCK_TAGS =

%w"table thead tfoot caption col colgroup tbody tr td th div dl dd dt
ul ol li pre select option form map area blockquote address math style input hr
fieldset legend section article aside hgroup header footer nav p
figure figcaption details menu summary h1 h2 h3 h4 h5 h6 script"

BLOCK_TAGS_LOOKUP =

Hash[*BLOCK_TAGS.map {|e| [e, 1]

NO_WRAP_IN =

%w"h1 h2 h3 h4 h5 h6"

NO_WRAP_IN_LOOKUP =

STRAIGHT_COPY_TAGS =

%w"script pre textarea style"

STRAIGHT_COPY_TAGS_LOOKUP =

MULTIPLE_NEWLINES_REGEX =

/(\r\n|\n){2,}/

Class Method Summary collapse

.parse(text) ⇒ Object

Instance Method Summary collapse

#initialize(text_or_nokogiri_doc) ⇒ Parser constructor

A new instance of Parser.
#to_doc ⇒ Object
#to_html ⇒ Object

Constructor Details

#initialize(text_or_nokogiri_doc) ⇒ `Parser`

Returns a new instance of Parser.

# File 'lib/wraptext/parser.rb', line 20

def initialize(text_or_nokogiri_doc)
  @doc = if text_or_nokogiri_doc.is_a? Nokogiri::XML::Document
    text_or_nokogiri_doc
  elsif text_or_nokogiri_doc.is_a? String
    # We add the empty paragraph tag, as Nokogiri assumes that leading script
    # tags belong in the document head otherwise.
    Nokogiri::HTML "<p></p>" + text_or_nokogiri_doc
  else
    raise "#initialize requires a string or Nokogiri document"
  end
  @root = Nokogiri::HTML "<body></body>"
  reparent_nodes @root.xpath("/html/body").first, @doc.xpath("/html/body").first
  replace_single_breaks
  strip_empty_paragraphs!
end

Class Method Details

.parse(text) ⇒ `Object`



16
17
18

# File 'lib/wraptext/parser.rb', line 16

def self.parse(text)
  new(text).to_html
end

Instance Method Details

#to_doc ⇒ `Object`



40
41
42

# File 'lib/wraptext/parser.rb', line 40

def to_doc
  @doc_out ||= @root.xpath("/html/body").first
end

#to_html ⇒ `Object`



36
37
38

# File 'lib/wraptext/parser.rb', line 36

def to_html
  @html ||= @root.xpath("/html/body").inner_html
end