Class: Planet::Blog

Inherits:
Object
  • Object
show all
Defined in:
lib/planet/blog.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(attributes = {}) ⇒ Blog

Returns a new instance of Blog.



21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/planet/blog.rb', line 21

def initialize(attributes = {})
  self.url        = attributes[:url]
  self.feed       = attributes[:feed]
  self.type       = attributes[:type]
  self.name       = attributes[:name]
  self.author     = attributes[:author]
  self.image      = attributes[:image]
  self.twitter    = attributes[:twitter]
  self.posts      = attributes.fetch(:posts, [])
  self.planet     = attributes[:planet]
  self.categories = attributes.fetch(:categories, '')
  self.tags       = attributes.fetch(:tags, '')

  # Feedzirra parsed data is  made available for when the information
  # provides is not enough. Transparency should help use cases we're
  # not considering.
  self.rss_data = nil

  # get parser-manager instance
  @parsers = Parsers.new
end

Instance Attribute Details

#authorObject

Returns the value of attribute author.



8
9
10
# File 'lib/planet/blog.rb', line 8

def author
  @author
end

#categoriesObject

Returns the value of attribute categories.



8
9
10
# File 'lib/planet/blog.rb', line 8

def categories
  @categories
end

#feedObject

Returns the value of attribute feed.



8
9
10
# File 'lib/planet/blog.rb', line 8

def feed
  @feed
end

#imageObject

Returns the value of attribute image.



8
9
10
# File 'lib/planet/blog.rb', line 8

def image
  @image
end

#nameObject

Returns the value of attribute name.



8
9
10
# File 'lib/planet/blog.rb', line 8

def name
  @name
end

#planetObject

Returns the value of attribute planet.



8
9
10
# File 'lib/planet/blog.rb', line 8

def planet
  @planet
end

#postsObject

Returns the value of attribute posts.



8
9
10
# File 'lib/planet/blog.rb', line 8

def posts
  @posts
end

#rss_dataObject

Returns the value of attribute rss_data.



8
9
10
# File 'lib/planet/blog.rb', line 8

def rss_data
  @rss_data
end

#tagsObject

Returns the value of attribute tags.



8
9
10
# File 'lib/planet/blog.rb', line 8

def tags
  @tags
end

#twitterObject

Returns the value of attribute twitter.



8
9
10
# File 'lib/planet/blog.rb', line 8

def twitter
  @twitter
end

#typeObject

Returns the value of attribute type.



8
9
10
# File 'lib/planet/blog.rb', line 8

def type
  @type
end

#urlObject

Returns the value of attribute url.



8
9
10
# File 'lib/planet/blog.rb', line 8

def url
  @url
end

Instance Method Details

#fetchObject



43
44
45
46
47
48
49
50
51
# File 'lib/planet/blog.rb', line 43

def fetch
  # given parser can be set arbitrarily with :type or inferred from the domain
  parser = self.type ? @parsers.get_parser(self.type) : @parsers.get_parser_for(self.feed)

  # parser instances should mimick Feedzirra interface
  parser.fetch_and_parse(self.feed,
                        :on_success => lambda { |url, feed| on_fetch_success(feed) },
                        :on_failure => lambda { |url, response_code, response_header, response_body| puts "\t=> Failed to fetch #{url} with response_code: #{response_code}" })
end

#on_fetch_success(feed) ⇒ Object



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/planet/blog.rb', line 53

def on_fetch_success(feed)
  self.name ||= feed.title || 'the source'
  self.url  ||= feed.url

  if self.url.nil?
    abort "#{ self.author }'s blog does not have a url field on it's feed, you will need to specify it on planet.yml"
  end

  self.rss_data = feed

  feed.entries.each do |entry|
    next unless whitelisted?(entry)
    content = if entry.content
                self.sanitize_images(entry.content.strip)
              elsif entry.summary
                self.sanitize_images(entry.summary.strip)
              else
                abort "=> No content found on entry"
              end

    if self.planet.config.fetch('sanitize_html', false)
        content = Sanitize.fragment(content, Sanitize::Config::RELAXED)
    end

    self.posts << @post = Post.new(
      title: entry.title.nil? ? self.name : entry.title,
      content: content,
      date: entry.published,
      url: entry.url,
      blog: self,
      rss_data: entry
    )

    puts "=> Found post titled #{ @post.title } - by #{ @post.blog.author }"
  end
end

#sanitize_images(html) ⇒ Object



90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/planet/blog.rb', line 90

def sanitize_images(html)
  ## We take all images with src not matching http refs and append
  ## the original blog to them.
  html.scan(/<img src="([^h"]+)"/).flatten.each do |img|
    if img[0] == '/'
      html.gsub!(img, "#{ self.url }#{ img }")
    else
      html.gsub!(img, "#{ self.url }/#{ img }")
    end
  end

  html
end

#whitelisted?(entry) ⇒ Boolean

Returns:

  • (Boolean)


104
105
106
107
108
109
# File 'lib/planet/blog.rb', line 104

def whitelisted?(entry)
  return true if self.planet.whitelisted_tags.empty?
  result = !(entry.categories & self.planet.whitelisted_tags).empty?
  puts "\t=> Ignored post titled: #{entry.title} with categories: [#{entry.categories.join(', ')}]" unless result
  result
end