Class: Pismo::Document

Inherits:
Object
  • Object
show all
Includes:
ExternalAttributes, InternalAttributes
Defined in:
lib/pismo/document.rb

Overview

Pismo::Document represents a single HTML document within Pismo

Constant Summary collapse

ATTRIBUTE_METHODS =
InternalAttributes.instance_methods + ExternalAttributes.instance_methods
DEFAULT_OPTIONS =
{
  :image_extractor => false,
  :min_image_width => 100,
  :min_image_height => 100
}

Constants included from InternalAttributes

InternalAttributes::DATETIME_REGEXEN, InternalAttributes::DEFAULT_KEYWORD_OPTIONS, InternalAttributes::FAVICON_MATCHES, InternalAttributes::FEED_MATCHES, InternalAttributes::LEDE_EXTRACTOR, InternalAttributes::MONTHS_REGEX, InternalAttributes::TITLE_SEPARATORS_REGEX

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from InternalAttributes

#author, #authors, #body, #datetime, #description, #descriptions, #favicon, #feed, #feeds, #html_body, #html_title, #images, #keywords, #lede, #ledes, #og_title, #reader_doc, #sentences, #strip_site_name_and_separators_from, #tags, #title, #titles, #videos

Constructor Details

#initialize(handle, options = {}) ⇒ Document

Returns a new instance of Document


22
23
24
25
26
# File 'lib/pismo/document.rb', line 22

def initialize(handle, options = {})
  @options = DEFAULT_OPTIONS.merge options
  url = @options.delete(:url)
  load(handle, url)
end

Instance Attribute Details

#docObject (readonly)

Returns the value of attribute doc


10
11
12
# File 'lib/pismo/document.rb', line 10

def doc
  @doc
end

#optionsObject (readonly)

Returns the value of attribute options


10
11
12
# File 'lib/pismo/document.rb', line 10

def options
  @options
end

#urlObject (readonly)

Returns the value of attribute url


10
11
12
# File 'lib/pismo/document.rb', line 10

def url
  @url
end

Instance Method Details

#htmlObject

An HTML representation of the document


29
30
31
# File 'lib/pismo/document.rb', line 29

def html
  @doc.to_s
end

#load(handle, url = nil) ⇒ Object


33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/pismo/document.rb', line 33

def load(handle, url = nil)
  @url = url if url
  @url = handle if handle =~ /\Ahttp/i

  @html = if handle =~ /\Ahttp/i
            open(handle) { |f| f.read }
          elsif handle.is_a?(StringIO) || handle.is_a?(IO) || handle.is_a?(Tempfile)
            handle.read
          else
            handle
          end

  @doc = Nokogiri::HTML(@html)
end

#match(args = [], all = false) ⇒ Object


48
49
50
51
# File 'lib/pismo/document.rb', line 48

def match(args = [], all = false)

  @doc.match([*args], all)
end