Module: Prism

Included in:
POSH, Pattern::Abbr, Pattern::DateTime, Pattern::TypeValue, Pattern::URL, Pattern::ValueClass
Defined in:
lib/prism.rb,
lib/prism/posh.rb,
lib/prism/pattern.rb,
lib/prism/posh/base.rb,
lib/prism/pattern/url.rb,
lib/prism/microformat.rb,
lib/prism/posh/anchor.rb,
lib/prism/pattern/abbr.rb,
lib/prism/microformat/geo.rb,
lib/prism/microformat/adr.rb,
lib/prism/microformat/xfn.rb,
lib/prism/microformat/xmdp.rb,
lib/prism/microformat/xoxo.rb,
lib/prism/pattern/datetime.rb,
lib/prism/pattern/typevalue.rb,
lib/prism/microformat/hcard.rb,
lib/prism/microformat/hatom.rb,
lib/prism/pattern/valueclass.rb,
lib/prism/microformat/reltag.rb,
lib/prism/posh/definition_list.rb,
lib/prism/microformat/votelinks.rb,
lib/prism/microformat/hcalendar.rb,
lib/prism/microformat/rellicense.rb

Defined Under Namespace

Modules: Microformat, Pattern Classes: POSH

Constant Summary collapse

VERSION =
"0.1.1"
PRODID =
"-//markwunsch.com//Prism #{VERSION}//EN"

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.find(document, format = nil) ⇒ Object

Convenience method for Prism::Microformat.find method


9
10
11
# File 'lib/prism.rb', line 9

def self.find(document, format=nil)
  Prism::Microformat.find(document, format)
end

.get(html) ⇒ Object

Get a string of html or a url and convert it to a Nokogiri Document


14
15
16
17
18
19
20
21
22
23
# File 'lib/prism.rb', line 14

def self.get(html)
  return html if html.is_a?(Nokogiri::XML::Node)
  begin
    url = URI.parse(html)
    doc = url.is_a?(URI::HTTP) ? get_url(url.normalize.to_s) : get_document(html)
  rescue URI::InvalidURIError
    doc = get_document(html)
  end
  doc
end

.get_document(html, url = nil) ⇒ Object

Convert HTML to a Nokogiri Document


36
37
38
# File 'lib/prism.rb', line 36

def self.get_document(html, url=nil)
  html.is_a?(Nokogiri::XML::Node) ? html : Nokogiri::HTML.parse(html, url)
end

.get_url(url) ⇒ Object

Open a URL and convert the contents to a Nokogiri Document


26
27
28
29
30
31
32
33
# File 'lib/prism.rb', line 26

def self.get_url(url)
  uri = URI.parse(url)
  doc = ''
  uri.open do |web|
    doc = web.read
  end
  get_document(doc, url)
end

.map(key) ⇒ Object

Map a key to an element or design pattern


45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/prism.rb', line 45

def self.map(key)
  case normalize(key)
    when :valueclass, :abbr, :uri, :url, :typevalue
      Prism::Pattern.map(key)
    when :hcard, :geo, :rellicense, :reltag, :votelinks, :xfn, :xmdp, :xoxo, :adr
      Prism::Microformat.map(key)
    when :base
      Prism::POSH::Base
    else
      raise "#{key} is not a recognized parser."
  end
end

.normalize(name) ⇒ Object


40
41
42
# File 'lib/prism.rb', line 40

def self.normalize(name)
  name.to_s.strip.downcase.intern
end

Instance Method Details

#extract(pattern = nil, &block) ⇒ Object

Define the pattern used to extract contents from node Can be a symbol that match to an Element parser, or a block


97
98
99
100
101
102
103
104
# File 'lib/prism.rb', line 97

def extract(pattern = nil, &block)
  if block_given?
    @extract = block 
  elsif pattern
    @extract = Prism.map(pattern).extract
  end
  @extract || lambda{|node| node.content.strip }
end

#extract_from(node) ⇒ Object

Extract the content from the node


107
108
109
# File 'lib/prism.rb', line 107

def extract_from(node)
  extract.call(node)
end

#find_in(document) ⇒ Object

Search for the element in a document


73
74
75
# File 'lib/prism.rb', line 73

def find_in(document)
  search.call(document)
end

#found_in?(node) ⇒ Boolean

Is the element found in node?

Returns:

  • (Boolean)

78
79
80
# File 'lib/prism.rb', line 78

def found_in?(node)
  find_in(node).eql?(node) || !find_in(node).empty?
end

#parse(document) ⇒ Object

Parse the document, finding every instance of the desired element, and extract their contents


112
113
114
115
116
117
118
119
120
121
# File 'lib/prism.rb', line 112

def parse(document)
  if found_in?(document)
    nodes = find_in(document)
    if nodes.respond_to?(:collect) and !nodes.instance_of? Nokogiri::XML::Element
      nodes.collect { |element| extract_from(element) }
    else
      extract_from(document)
    end
  end
end

#parse_first(document) ⇒ Object

Parse the document, extracting the content for the first instance of the element


124
125
126
127
128
129
130
131
132
133
# File 'lib/prism.rb', line 124

def parse_first(document)
  if found_in?(document)
    elements = find_in(document)
	# Fix for https://github.com/mwunsch/prism/issues/5
	if elements.respond_to?(:first) and !elements.instance_of? Nokogiri::XML::Element then 
 elements = elements.first
	end
    extract_from elements 
  end
end

#search(&block) ⇒ Object

Get/Set a function that defines how to find an element in a node. The Search function should return a Nokogiri::XML::NodeSet. eg. <tt>search {|node| node.css(element) }


61
62
63
64
# File 'lib/prism.rb', line 61

def search(&block)
  @search = block if block_given?
  @search || lambda {|node| node }
end

#search_for(selector = nil) ⇒ Object

Define a function to search a node with a specific selector


67
68
69
70
# File 'lib/prism.rb', line 67

def search_for(selector=nil)
  search {|node| node.css(selector) } if selector
  search
end

#valid?(node) ⇒ Boolean

Is this a valid node?

Returns:

  • (Boolean)

91
92
93
# File 'lib/prism.rb', line 91

def valid?(node)
  validate.call(node)
end

#validate(&block) ⇒ Object

Get/Set a function that tests to make sure a given node is the element we want. Should return truthy. Default just tests to see if the node passed is a child of its parent node.


85
86
87
88
# File 'lib/prism.rb', line 85

def validate(&block)
  @validate = block if block_given?
  @validate || lambda { |node| find_in(node.parent).children.include?(node) }
end