Module: SClust::RSS

Defined in:
lib/sclust/util/rss.rb

Overview

NOTE: RSS collides with the module ::RSS, so we use the

prefix when accessing the ::RSS module

that ships with Ruby. :)

Class Method Summary collapse

Class Method Details

.rss_to_documents(rss, &addNewDoc) ⇒ Object



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/sclust/util/rss.rb', line 33

def self.rss_to_documents(rss, &addNewDoc)
    
    $logger.debug("Operating on #{rss} of type #{rss.class}")
    
    # This block builds an RSS::Element (document).
    unless (rss.instance_of?(::RSS::Element))
        
        # Check if we have a URI string...
        if ( rss.instance_of?(String) )
            begin
                rss = URI.parse(rss)
            rescue URI::InvalidURIError => e
                $logger.warning("Exception parsing URI: #{e.message}")
            end
        end
        
        $logger.debug("Rss is now of type #{rss.class}.")

        # Parse it...
        if (rss.instance_of?(URI::HTTP))
            begin
                #rss = RSS::Parser::parse(Net::HTTP::get(rss), false)
                rss = ::RSS::Parser::parse($wwwagent.get_file(rss), false)
            rescue Exception => e
                $logger.error("Failed to retrieve URL #{rss}: #{e.message}")
                throw e
            end
        elsif(rss.instance_of?(String))
            rss = ::RSS::Parser::parse(rss, false)
        elsif(rss.is_a?(File))
            rss = ::RSS::Parser::parse(rss, false);
        else
            rss = nil
        end
        
        throw Exception.new("RSS was not a URI string, a URI object, an RSS document, or an RSS document string: #{rss}") unless rss
    end
    
    unless ( rss.nil? || rss.items.nil? )
    
        $logger.debug("Adding #{rss.items.size} to document collection.")
    
        # Add this documents of this item to the document collection.
        rss.items.each do |item|
            
            if ( item.instance_of?(::RSS::Rss::Channel::Item))
                
                addNewDoc.call(item.title, item.description, item) if ( item.description )
                
            elsif ( item.instance_of?(::RSS::RDF::Item) )
                
                addNewDoc.call(item.title, item.content_encoded, item)

            else
                
                addNewDoc.call(item.title.content, item.content.content, item)

            end
        end
    end
end