27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
|
# File 'lib/googletastic/sync/document.rb', line 27
def process(documents, options = {})
options[:key] ||= "documents"
options[:max_size] ||= 3000000
documents_processed = []
updated_documents = []
counted_size = 0
documents.each_with_index do |document, index|
next if document.remote.kind != "document"
if document.synced_at and document.synced_at >= document.remote.updated_at
puts "Skipping Document... #{document.title}"
if documents_processed.length > 0 and index == documents.length - 1
Googletastic::Sync.push(options[:username], options[:password], options)
cleanup(documents_processed, options)
end
next
end
remote = document.remote
content = nil
title = remote.title
ext = remote.ext
puts "Processing Document... #{document.title}"
begin
if ext == ".textile"
content = remote.download("txt").body.gsub(/\357\273\277/, "")
content = RedCloth.new(content).to_html
elsif ext == ".markdown"
content = remote.download("txt").body.gsub(/\357\273\277/, "")
content = BlueCloth.new(content).to_html
elsif ext.nil? || ext.empty?
content = remote.content
else
content = remote.download("txt").body.gsub(/\357\273\277/, "")
end
document.content = content
title = remote.id
tempfile = Tempfile.new("googltastic-tempfiles-#{title}-#{Time.now}-#{rand(10000)}")
tempfile.write(content)
path = File.join(options[:folder], options[:key])
Dir.mkdir(path) unless File.exists?(path)
path = File.join(path, title)
if tempfile.size + counted_size >= options[:max_size] || index == documents.length - 1
Googletastic::Sync.push(options[:username], options[:password], options)
cleanup(documents_processed, options)
counted_size = 0
documents_processed = []
end
content = Googletastic::PrettyPrint.xml(content)
File.open(path, 'w') {|f| f.write(content) }
documents_processed << document
counted_size += tempfile.size
tempfile.close
updated_documents << document
rescue Exception => e
puts "Error... #{e.inspect}"
end
end
updated_documents
end
|