Module: Googletastic::Sync::Document::ClassMethods

Defined in:
lib/googletastic/sync/document.rb

Instance Method Summary collapse

Instance Method Details

#cleanup(syncables, options) ⇒ Object



102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/googletastic/sync/document.rb', line 102

def cleanup(syncables, options)
  syncables.each do |syncable|
    syncable.synced_at = Time.now
    syncable.save!
    path = File.join(options[:folder], options[:key], syncable.remote.id)
    path += options[:ext] if options.has_key?(:ext)
    begin
      File.delete(path)
    rescue Exception => e
      
    end
  end
end

#process(documents, options = {}) ⇒ Object



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/googletastic/sync/document.rb', line 27

def process(documents, options = {})
  options[:key] ||= "documents"
  # defaults to a reasonable limit (3MB) for heroku
  options[:max_size] ||= 3000000
  # per max_size chunk
  documents_processed = []
  # total
  updated_documents = []
  counted_size = 0
  documents.each_with_index do |document, index|
    next if document.remote.kind != "document"
    if document.synced_at and document.synced_at >= document.remote.updated_at
      puts "Skipping Document... #{document.title}"
      if documents_processed.length > 0 and index == documents.length - 1
        Googletastic::Sync.push(options[:username], options[:password], options)
        cleanup(documents_processed, options)
      end
      next
    end
    remote  = document.remote
    content = nil
    title   = remote.title
    ext     = remote.ext
    puts "Processing Document... #{document.title}"
    begin
      if ext == ".textile"
        # google is putting strange characters at beginning of downloaded files
        content = remote.download("txt").body.gsub(/\357\273\277/, "")
        content = RedCloth.new(content).to_html
      elsif ext == ".markdown"
        content = remote.download("txt").body.gsub(/\357\273\277/, "")
        content = BlueCloth.new(content).to_html
      elsif ext.nil? || ext.empty?
        # just use the html we have already
        content = remote.content
      else
        content = remote.download("txt").body.gsub(/\357\273\277/, "")
      end
      
      document.content = content
      title = remote.id
      
      tempfile = Tempfile.new("googltastic-tempfiles-#{title}-#{Time.now}-#{rand(10000)}")
      tempfile.write(content)

      path = File.join(options[:folder], options[:key])
      Dir.mkdir(path) unless File.exists?(path)
      path = File.join(path, title)
      
      # if we have passed the 5MB (or our 3MB) limit,
      # then push the files to GAE
      if tempfile.size + counted_size >= options[:max_size] || index == documents.length - 1
        Googletastic::Sync.push(options[:username], options[:password], options)
        cleanup(documents_processed, options)
        counted_size = 0
        documents_processed = []
      end
      
      content = Googletastic::PrettyPrint.xml(content)
      
      File.open(path, 'w') {|f| f.write(content) }
      documents_processed << document
      counted_size += tempfile.size
      tempfile.close
      
      updated_documents << document

    rescue Exception => e
      puts "Error... #{e.inspect}"
    end
  end

  updated_documents
end

#sync(documents, options, &block) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/googletastic/sync/document.rb', line 9

def sync(documents, options, &block)
  updated = process(documents, options)
  key = options[:key].to_s
  data = {key => []}
  documents.each do |document|
    data[key] << {
      :id => document.remote.id
    }
    yield(data, key, document) if block_given?
  end
  response = Googletastic::Sync.post(
    :url => options[:url],
    :path => options[:path],
    :format => :json,
    :data => data
  )
end