Class: Crawlable::Sitemap

Inherits:
Object
  • Object
show all
Defined in:
lib/crawlable/sitemap.rb

Class Attribute Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(*args, &block) ⇒ Sitemap

Returns a new instance of Sitemap.



52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/crawlable/sitemap.rb', line 52

def initialize(*args, &block)
  self.sitemap_host = args.shift
  options = args.extract_options!
  
  options.each do |k, v|
    self.send(k, v) if self.respond_to?(k)
  end
  
  instance_eval(&block)
  
  raise "Please define a host: 'Sitemap 'http://my-site.com' do ..." if self.sitemap_host.blank?
  
end

Class Attribute Details

.instanceObject

Returns the value of attribute instance.



5
6
7
# File 'lib/crawlable/sitemap.rb', line 5

def instance
  @instance
end

Instance Attribute Details

Returns the value of attribute links.



50
51
52
# File 'lib/crawlable/sitemap.rb', line 50

def links
  @links
end

#path(value = nil) ⇒ Object

Returns the value of attribute path.



50
51
52
# File 'lib/crawlable/sitemap.rb', line 50

def path
  @path
end

#ping(*args) ⇒ Object

Returns the value of attribute ping.



50
51
52
# File 'lib/crawlable/sitemap.rb', line 50

def ping
  @ping
end

#sitemap_host(*args) ⇒ Object

Returns the value of attribute sitemap_host.



50
51
52
# File 'lib/crawlable/sitemap.rb', line 50

def sitemap_host
  @sitemap_host
end

#stylesheet(value = nil) ⇒ Object

Returns the value of attribute stylesheet.



50
51
52
# File 'lib/crawlable/sitemap.rb', line 50

def stylesheet
  @stylesheet
end

#yahoo_app_id(string = nil) ⇒ Object

Returns the value of attribute yahoo_app_id.



50
51
52
# File 'lib/crawlable/sitemap.rb', line 50

def yahoo_app_id
  @yahoo_app_id
end

Class Method Details

.clearObject



43
44
45
46
# File 'lib/crawlable/sitemap.rb', line 43

def clear
  self.instance.clear
  self.instance = nil
end

.define!(*args, &block) ⇒ Object



7
8
9
# File 'lib/crawlable/sitemap.rb', line 7

def define!(*args, &block)
  self.instance = self.new(*args, &block)
end

.find(path, directory) ⇒ Object



15
16
17
18
19
# File 'lib/crawlable/sitemap.rb', line 15

def find(path, directory)
  if path =~ /#{Regexp.escape(self.path)}/i
    return File.join(directory, self.path)
  end
end

.inspectObject



35
36
37
# File 'lib/crawlable/sitemap.rb', line 35

def inspect
  self.instance.inspect
end

.parse!(path) ⇒ Object



21
22
23
24
# File 'lib/crawlable/sitemap.rb', line 21

def parse!(path)
  path ||= File.join(::Rails.root, 'config/sitemap.rb')
  eval(IO.read(path))
end

.pathObject



11
12
13
# File 'lib/crawlable/sitemap.rb', line 11

def path
  self.instance ? self.instance.path : ""
end

.process!(from, to, compress = false, &block) ⇒ Object



30
31
32
33
# File 'lib/crawlable/sitemap.rb', line 30

def process!(from, to, compress = false, &block)
  parse!(from)
  write(to, compress)
end

.to_xmlObject



39
40
41
# File 'lib/crawlable/sitemap.rb', line 39

def to_xml
  self.instance.to_xml
end

.write(to, compress = false) ⇒ Object



26
27
28
# File 'lib/crawlable/sitemap.rb', line 26

def write(to, compress = false)
  self.instance.write(to, compress)
end

Instance Method Details

#clearObject



218
219
220
# File 'lib/crawlable/sitemap.rb', line 218

def clear
  @links = nil
end

#image(path, *args, &block) ⇒ Object



127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/crawlable/sitemap.rb', line 127

def image(path, *args, &block)
  options = args.extract_options!
  options.assert_valid_keys(:priority, :changes, :updated_at, :host)
  
  result = {
    :path          => path,
    :caption       => options[:caption],
    :geo_location  => options[:geo_location],
    :title         => options[:title],
    :license       => options[:license]
  }
  
  self.links.last[:images].push(result)
end

#inspectObject



222
223
224
# File 'lib/crawlable/sitemap.rb', line 222

def inspect
  "<Sitemap @sitemap_host='#{sitemap_host.to_s}' @sitemap_path='#{sitemap_path.to_s}' @ping='#{ping.inspect}' @links='#{links.inspect}'/>"
end


100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/crawlable/sitemap.rb', line 100

def link(path, *args, &block)
  options = args.extract_options!
  options.assert_valid_keys(:priority, :changes, :updated_at, :sitemap_host)
  options.reverse_merge!(
    :priority => 0.5,
    :changes => 'monthly',
    :updated_at => Time.now,
    :host => self.sitemap_host
  )

  result = {
    :host        => options[:host],
    :path        => path,
    :url         => URI.join(options[:host], path).to_s,
    :priority    => options[:priority],
    :changes     => options[:changes],
    :updated_at  => options[:updated_at],
    :images      => []
  }
  
  self.links.push(result)
  
  instance_eval(&block) if block_given?
  
  result
end

#notifyObject



200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
# File 'lib/crawlable/sitemap.rb', line 200

def notify
  engines = {
    :google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=#{path}",
    :yahoo => "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=#{path}&appid=#{yahoo_app_id}",
    :ask => "http://submissions.ask.com/ping?sitemap=#{path}",
    :bing => "http://www.bing.com/webmaster/ping.aspx?siteMap=#{path}",
    :sitemap_writer => "http://www.sitemapwriter.com/notify.php?crawler=all&url=#{path}"
  }
  engines.each do |engine, link|
    begin
      open(link)
      puts "Successful ping of #{engine.to_s.titleize}"
    rescue Timeout::Error, StandardError => e
      puts "Ping failed for #{engine.to_s.titleize}: #{e.inspect}"
    end
  end
end

#sitemap_path(string = nil) ⇒ Object



96
97
98
# File 'lib/crawlable/sitemap.rb', line 96

def sitemap_path(string = nil)
  @sitemap_path = string || "public/sitemap.xml"
end

#to_xmlObject



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# File 'lib/crawlable/sitemap.rb', line 146

def to_xml
  namespaces = {
    "xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9",
    "xmlns:image" => "http://www.google.com/schemas/sitemap-image/1.1"
  }
  builder = Nokogiri::XML::Builder.new do |xml|
    xml.urlset(namespaces) do
      self.links.each do |link|
        xml.url do
          xml.loc        link[:path]
          xml.lastmod    w3c_date(link[:updated_at])  if link[:updated_at]
          xml.changefreq link[:changes]               if link[:changes]
          xml.priority   link[:priority]              if link[:priority]
          link[:images].each do |image|
            xml["image"].image do
              xml["image"].loc          image[:path]
              xml["image"].caption      image[:caption]       if image[:caption]
              xml["image"].geo_location image[:geo_location]  if image[:geo_location]
              xml["image"].title        image[:title]         if image[:title]
              xml["image"].license      image[:license]       if image[:license]
            end
          end
        end
      end
    end
  end
  xml = builder.to_xml
  
  # can't add processing instructions with nokogiri
  xml.gsub!("<?xml version=\"1.0\"?>") do |head|
    result = head
    result << "\n"
    result << "<?xml-stylesheet type=\"text/xsl\" href=\"#{stylesheet}\"?>"
  end if stylesheet
  
  xml
end

#w3c_date(date) ⇒ Object



142
143
144
# File 'lib/crawlable/sitemap.rb', line 142

def w3c_date(date)
   date.utc.strftime("%Y-%m-%dT%H:%M:%S+00:00")
end

#write(path, compress) ⇒ Object



184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
# File 'lib/crawlable/sitemap.rb', line 184

def write(path, compress)
  to = path
  if compress
    to << ".gz" unless File.extname(path) == ".gz"
    File.open(to, 'wb') do |file|
      gz = Zlib::GzipWriter.new(file)
      gz.write to_xml
      gz.close
    end
  else
    File.open(to, 'wb') do |file|
      file.puts to_xml
    end
  end
end