Class: RDF::RSS10::Reader

Inherits:
RDF::Reader
  • Object
show all
Defined in:
lib/sasquatch/rss10/reader.rb

Instance Method Summary collapse

Constructor Details

#initialize(input = $stdin, options = {}, &block) ⇒ Reader

copied indiscriminately from gkellogg’s rdf/xml parser



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/sasquatch/rss10/reader.rb', line 6

def initialize(input = $stdin, options = {}, &block)
  super do
    
    @base_uri = uri(options[:base_uri]) if options[:base_uri]
        
    @doc = case input
    when Nokogiri::XML::Document then input
    else Nokogiri::XML.parse(input, @base_uri.to_s)
    end
    
    raise RDF::ReaderError, "Synax errors:\n#{@doc.errors}" if !@doc.errors.empty? && validate?
    raise RDF::ReaderError, "Empty document" if (@doc.nil? || @doc.root.nil?) && validate?

    block.call(self) if block_given?
  end
end

Instance Method Details

#all_text_nodes?(nodes) ⇒ Boolean

Returns:

  • (Boolean)


135
136
137
138
139
# File 'lib/sasquatch/rss10/reader.rb', line 135

def all_text_nodes?(nodes)
  all_text = true
  nodes.each {|n| all_text = false unless n.is_a?(Nokogiri::XML::Text)}
  all_text
end

#each_statement {|statement| ... } ⇒ void

This method returns an undefined value.

Iterates the given block for each RDF statement in the input.

Yields:

  • (statement)

Yield Parameters:



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/sasquatch/rss10/reader.rb', line 28

def each_statement(&block)
  # Block called from add_statement
  @callback = block

  root = @doc.root


  rdf_nodes = root.xpath("/rdf:RDF", "rdf" => RDF.to_uri.to_s)
  statements = []
  rdf_nodes.each do |node|


    root.xpath("//rss:channel", "rss"=>RDF::RSS.to_s).each do |channel|
      if channel.attribute('about')
        channel_uri = RDF::URI.intern(channel.attribute('about').value)
      else
        channel_uri = RDF::Node.new
      end
      statements << RDF::Statement.new(channel_uri, RDF.type, RDF::RSS.channel)
      channel.children.each do |elem|
        unless elem.name == 'items'
          if elem.children.length == 1 && elem.children.first.is_a?(Nokogiri::XML::Text)
            statements << RDF::Statement.new(channel_uri, RDF::URI.intern(elem.namespace.href + elem.name), literal(elem.children.first))
          elsif elem.attribute('resource')
            statements << RDF::Statement.new(channel_uri, RDF::URI.intern(elem.namespace.href + elem.name), RDF::URI.intern(elem.attribute('resource').value))
          end
        else
          stmt = RDF::Statement.new(:subject=>channel_uri, :predicate=>RDF::URI.intern(elem.namespace.href + elem.name))
          elem.children.each do |list|
            if list.attribute('about')
              list_uri = RDF::URI.intern(list.attribute('about').value)
            else
              list_uri = RDF::Node.new
            end

            stmt.object = list_uri
            statements << stmt
            list_type = RDF::URI.intern(list.namespace.href + list.name)
            unless list_type == RDF.Description
              statements << RDF::Statement.new(:subject=>list_uri, :predicate=>RDF.type, :object=>list_type)
            end
            list.children.each do |li|
              stmt = RDF::Statement.new(:subject=>list_uri, :predicate=>RDF::URI.intern(li.namespace.href + li.name))
              if li.attribute('resource')
                stmt.object = RDF::URI.intern(li.attribute('resource').value)
              elsif li.children.length == 1 && li.children.first.is_a?(Nokogiri::XML::Text)
                stmt.object = literal(li.children.first)
              end
              statements << stmt if stmt.object
            end
          end
        end
      end
    end
    root.xpath("/rdf:RDF/rss:item", "rdf"=>RDF.to_uri.to_s, "rss"=>RDF::RSS.to_s).each do |item|
      if item.attribute('about')
        item_uri = RDF::URI.intern(item.attribute('about').value)
      else
        item_uri = RDF::Node.new
      end
      statements.concat statements_from_element(item, item_uri)
    end


  end
  statements.each do |stmt |
    yield stmt
  end   
  statements.to_enum    
end

#has_child_elements?(elem) ⇒ Boolean

Returns:

  • (Boolean)


141
142
143
144
145
# File 'lib/sasquatch/rss10/reader.rb', line 141

def has_child_elements?(elem)
  children = false
  elem.each {|e| children = true if e.is_a?(Nokogiri::XML::Element)}
  children
end

#literal(txt) ⇒ Object



147
148
149
150
151
152
153
154
155
156
# File 'lib/sasquatch/rss10/reader.rb', line 147

def literal(txt)
  options = {}
  if txt.attribute('lang')
    options[:language] = txt.attribute('lang').value.to_sym
  end
  if txt.attribute_with_ns('datatype', RDF.to_uri.to_s)
    options[:datatype] = RDF::URI.intern(txt.attribute_with_ns('datatype', RDF.to_uri.to_s).value)
  end
  RDF::Literal.new(txt.inner_text, options)       
end

#parse_children(elem, stmt) ⇒ Object



158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
# File 'lib/sasquatch/rss10/reader.rb', line 158

def parse_children(elem, stmt)
  old_stmt = nil
  statements = []
  if elem.attribute_with_ns("about", RDF.to_uri.to_s)
    old_stmt = stmt
    stmt = RDF::Statement.new(:subject=>elem.attribute_with_ns("about", RDF.to_uri.to_s).value)
    type_object = RDF::URI.intern(elem.namespace.href+elem.name)
    unless type_object == RDF.Description || type_object == RDF::RSS.item
      stmt.predicate = RDF.type
      stmt.object = type_object
      statements << stmt
      stmt = RDF::Statement.new
      stmt.subject = RDF::URI.intern(elem.attribute_with_ns('about', RDF.to_uri.to_s).value)
    end
  end         
  elem.children.each do |el|
    next if el.is_a?(Nokogiri::XML::Text)
    if el.attribute_with_ns('resource', RDF.to_uri.to_s) || el.attribute('resource')
      if el.attribute_with_ns('resource', RDF.to_uri.to_s)
        stmt.object = RDF::URI.intern(el.attribute_with_ns('resource', RDF.to_uri.to_s).value)
      else
        stmt.object = RDF::URI.intern(el.attribute('resource').value)
      end
      stmt.predicate = RDF::URI.intern(el.namespace.href+el.name)
      statements << stmt.dup
      stmt = RDF::Statement.new(:subject=>stmt.subject)
    elsif el.children.length == 1 && el.children.first.is_a?(Nokogiri::XML::Text)
      stmt.predicate = RDF::URI.intern(el.namespace.href+el.name)
      options = {}
      txt = el.children.first
      if txt.attribute('lang')
        options[:language] = txt.attribute('lang').value.to_sym
      end
      if txt.attribute_with_ns('datatype', RDF.to_uri.to_s)
        options[:datatype] = RDF::URI.intern(txt.attribute_with_ns('datatype', RDF.to_uri.to_s).value)
      end
      stmt.object = RDF::Literal.new(txt.inner_text, options)
      statements << stmt.dup
      stmt = RDF::Statement.new(:subject=>stmt.subject)
    else
      stmt_found = false
      el.children.each do |child|
        next unless child.is_a?(Nokogiri::XML::Element)
        stmt.predicate = RDF::URI.intern(el.namespace.href+el.name)
        if child.attribute_with_ns("about", RDF.to_uri.to_s)
          stmt.object = RDF::URI.intern(child.attribute_with_ns("about", RDF.to_uri.to_s).value)
          statements << stmt.dup
          stmt = RDF::Statement.new(:subject=>stmt.subject)
          stmt_found = true
        else
          stmt.object = RDF::Node.new
          statements << stmt.dup
          stmt = RDF::Statement.new(:subject=>stmt.subject)
          stmt_found = true
        end               
      end
      puts "#{el.name}: #{el.inspect}" unless stmt_found
    end
        
    #puts "#{el.name}: #{el.class.name}"
    statements.concat parse_children(el, stmt) unless el.children.empty?
  end
  if old_stmt
    stmt = old_stmt
  end
  statements
end

#statements_from_element(elem, resource) ⇒ Object



99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# File 'lib/sasquatch/rss10/reader.rb', line 99

def statements_from_element(elem, resource)
  child_elements = {}
  statements = []
  elem.children.each do |el|
    if el.attribute_with_ns('resource', RDF.to_uri.to_s)
      statements << RDF::Statement.new(:subject=>resource, :predicate=>RDF::URI.intern(el.namespace.href+el.name), :object=>RDF::URI.intern(el.attribute_with_ns('resource', RDF.to_uri.to_s).value))
    elsif all_text_nodes?(el.children)
      statements << RDF::Statement.new(:subject=>resource, :predicate=>RDF::URI.intern(el.namespace.href+el.name),:object=>literal(el.children.first))
    else
      el.children.each do |e|
        if e.attribute_with_ns('about', RDF.to_uri.to_s)
          c = RDF::URI.intern(e.attribute_with_ns('about', RDF.to_uri.to_s).value)
          statements << RDF::Statement.new(:subject=>resource, :predicate=>RDF::URI.intern(el.namespace.href+el.name), :object=>c)  
          child_elements[c] = e
          e_type = RDF::URI.intern(e.namespace.href + e.name)
          unless e_type == RDF.Description || RDF::RSS.item
            statements << RDF::Statement.new(:subject=>c, :predicate=>RDF.type, :object=>e_type)
          end
        elsif has_child_elements?(e)   
          c = RDF::Node.new         
          statements << RDF::Statement.new(:subject=>resource, :predicate=>RDF::URI.intern(el.namespace.href+el.name), :object=>c)  
          child_elements[c] = e
          e_type = RDF::URI.intern(e.namespace.href + e.name)
          unless e_type == RDF.Description || RDF::RSS.item
            statements << RDF::Statement.new(:subject=>c, :predicate=>RDF.type, :object=>e_type)
          end
        end               
      end
    end
  end
  child_elements.each_pair do |r,e|
    statements.concat statements_from_element(e, r)
  end
  statements
end