Class: Pedophile::OfflineTree

Inherits:
Object
  • Object
show all
Defined in:
lib/pedophile/offline_tree.rb

Constant Summary collapse

TMP_STRUCTURE_PATH =
File.absolute_path(File.join(Wget::TMP_PATH, "files.yaml"))
TMP_CHANGES_PATH =
File.absolute_path(File.join(Wget::TMP_PATH, "changes.yaml"))
FIX_RELATIVE_PATH =
false

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(downloader) ⇒ OfflineTree

Returns a new instance of OfflineTree.



10
11
12
13
14
# File 'lib/pedophile/offline_tree.rb', line 10

def initialize(downloader)
  @downloader = downloader
  @files = Array.new
  @changes = Array.new
end

Instance Attribute Details

#downloaderObject (readonly)

Returns the value of attribute downloader.



16
17
18
# File 'lib/pedophile/offline_tree.rb', line 16

def downloader
  @downloader
end

#filesObject (readonly)

Returns the value of attribute files.



16
17
18
# File 'lib/pedophile/offline_tree.rb', line 16

def files
  @files
end

Instance Method Details

#after_processObject

Desctructive part



44
45
46
47
48
# File 'lib/pedophile/offline_tree.rb', line 44

def after_process
  load_processed
  remove_bad_suffix
  rename_files
end

#analyzeObject



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/pedophile/offline_tree.rb', line 55

def analyze
  # because I don't want to read all wget options...
  glob_path = "#{path}/**/**"
  puts "offline path #{path.to_s.cyan}"

  Dir.glob(glob_path) do |item|
    next if item == '.' or item == '..' or File.directory?(item)

    puts "analyze file #{item.to_s.yellow}"

    h = Hash.new
    h[:path] = item

    mime = `file --mime #{item}`
    if mime =~ /(\w+\/\w+);/
      mime = $1
    else
      mime = nil
    end

    h[:mime] = mime

    if mime == 'text/html' or mime == 'text/plain'
      h[:inside] = analyze_file(item)
    end

    @files << h
  end

  save_analyzed
end

#analyze_file(file) ⇒ Object



103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# File 'lib/pedophile/offline_tree.rb', line 103

def analyze_file(file)
  s = File.read(file)

  possible_paths_regexp = /"([^"]+)"/
  possible_paths = s.scan(possible_paths_regexp).flatten.uniq

  possible_paths_regexp = /'([^']+)'/
  possible_paths += s.scan(possible_paths_regexp).flatten.uniq

  relative_file_path = File.dirname(file)

  paths = Array.new
  possible_paths.each do |pp|
    if is_path_ok?(pp)
      h = Hash.new
      f = File.join(relative_file_path, pp)
      h[:exists] = File.exists?(f)
      h[:is_file] = File.file?(f)
      h[:path] = pp

      paths << h if should_add_path?(h)
    end
  end

  paths
end

#base_pathObject



142
143
144
145
# File 'lib/pedophile/offline_tree.rb', line 142

def base_path
  @base_path ||= self.downloader.wget.offline_path
  @base_path
end

#is_path_ok?(pp) ⇒ Boolean

TODO - check if this string is correct unix path

Returns:

  • (Boolean)


131
132
133
134
# File 'lib/pedophile/offline_tree.rb', line 131

def is_path_ok?(pp)
  # pp =~ /\A(?:[0-9a-zA-Z\_\-]+\/?)+\z/
  pp.size < 200
end

#load_analyzedObject



99
100
101
# File 'lib/pedophile/offline_tree.rb', line 99

def load_analyzed
  @files = YAML.load_file(TMP_STRUCTURE_PATH)
end

#make_it_soObject



18
19
20
21
22
23
24
25
26
27
# File 'lib/pedophile/offline_tree.rb', line 18

def make_it_so
  analyze
  load_analyzed

  process_bad_suffix1
  process_bad_suffix2
  process_bad_filenames
  save_analyzed
  save_changes
end

#pathObject



50
51
52
53
# File 'lib/pedophile/offline_tree.rb', line 50

def path
  @path ||= self.downloader.wget.offline_path
  @path
end

#process_bad_filenamesObject



188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
# File 'lib/pedophile/offline_tree.rb', line 188

def process_bad_filenames
  @files.each do |f|
    old_file = f[:path]
    new_file = old_file.gsub(/[^0-9A-Za-z.\-\/:]/, '_')

    if not new_file == old_file
      process_rename_file(old_file, new_file)
    end

    if f[:inside]
      f[:inside].each do |fi|
        old_file = fi[:path]
        if File.exists?(old_file)
          new_file = old_file.gsub(/[^0-9A-Za-z.\-\/:]/, '_')

          if not new_file == old_file
            process_rename_file(old_file, new_file)
          end
        end
      end
    end
  end
end

#process_bad_suffix1Object



161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# File 'lib/pedophile/offline_tree.rb', line 161

def process_bad_suffix1
  @files.each do |f|
    old_file = f[:path]
    new_file = old_file.gsub(/\?\d+/, '').gsub(/\%3F\d+/, '')

    if not new_file == old_file
      process_rename_file(old_file, new_file)
    end

    if f[:inside]
      f[:inside].each do |fi|
        old_file = fi[:path]
        if File.exists?(old_file)
          new_file = old_file.gsub(/\?\d+/, '').gsub(/\%3F\d+/, '')

          if not new_file == old_file
            process_rename_file(old_file, new_file)
          end

        end
      end
    end
  end

  process_massive_gsub(/\%3F\d+/, "", false)
end

#process_bad_suffix2Object

PROCESSING



148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/pedophile/offline_tree.rb', line 148

def process_bad_suffix2
  @files.each do |f|
    old_file = f[:path]
    new_file = old_file.gsub(/\?body=1/, '')

    if not new_file == old_file
      process_rename_file(old_file, new_file)
    end
  end

  process_massive_gsub("%3Fbody=1", "", false)
end

#process_massive_gsub(from, to, check_paths = false) ⇒ Object



262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
# File 'lib/pedophile/offline_tree.rb', line 262

def process_massive_gsub(from, to, check_paths = false)
  puts "massive gsub #{from.to_s.blue} to #{to.to_s.green}"

  @files.each do |f|
    # must be proper mime before, so not needed to check
    if f[:inside]
      file_path = f[:path].clone

      puts " open #{file_path.to_s.red}"

      old_from = from.to_s
      old_to = to.to_s

      # relative path fix
      if check_paths and FIX_RELATIVE_PATH
        absolute_path = File.absolute_path(File.dirname(file_path))
        first = Pathname.new(absolute_path)

        to_path = File.join(path, to)
        second = Pathname.new(File.absolute_path(to_path))
        to = second.relative_path_from(first).to_s
      end

      exists = File.exists?(file_path)
      if exists
        j = File.open(file_path)
        s = j.read
        j.close

        # logs
        if s.index(from)
          @changes << { gsub: { old: from, new: to, file: file_path, old_from: old_from, old_to: old_to } }
        end

        s.gsub!(from, to)

        j = File.open(file_path, "w")
        j.puts(s)
        j.close

        f[:inside].each do |fi|
          fi[:path].gsub!(from, to)
        end

        puts " done #{file_path.to_s.red}"
      else
        raise "file #{file_path} not found"
      end
    end
  end
end

#process_rename_file(old_file_path, new_file_path) ⇒ Object

def process_bad_filenames_links

process_massive_gsub(/\%3F/, "_", false)

end



216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# File 'lib/pedophile/offline_tree.rb', line 216

def process_rename_file(old_file_path, new_file_path)
  puts "rename from #{old_file_path.to_s.blue} to #{new_file_path.to_s.green}"

  # clone to not allow modify of @files
  old_file = old_file_path.clone
  new_file = new_file_path.clone
  # this will be with full path
  old_file_with_path = old_file_path.clone

  old_file.gsub!(base_path, '')
  new_file.gsub!(base_path, '')

  # ignore slashes
  old_file.gsub!(/^\//, '')
  new_file.gsub!(/^\//, '')

  # 1. rename 1 file
  new_file_path = old_file_with_path.gsub(old_file, new_file)
  File.rename(old_file_with_path, new_file_path)

  # internal log-like
  @changes << { rename: { old: old_file_with_path, new: new_file_path } }

  # 2. rename in @files
  @files.each do |f|
    if f[:path] == old_file_with_path
      f[:path] = new_file_path
    end

    if f[:inside]
      f[:inside].each do |fi|
        if fi[:path] == old_file_with_path
          fi[:path] = new_file_path
        end
      end
    end
  end

  # 3. gsub all files
  # gsub files after renaming
  process_massive_gsub(old_file, new_file, true)
  process_massive_gsub(old_file.gsub("?", "%3F"), new_file, true)

  puts "RENAMED #{old_file.to_s.blue} to #{new_file.to_s.green}"
end

#save_analyzedObject



87
88
89
90
91
# File 'lib/pedophile/offline_tree.rb', line 87

def save_analyzed
  f = File.new(TMP_STRUCTURE_PATH, "w")
  f.puts @files.to_yaml
  f.close
end

#save_changesObject



93
94
95
96
97
# File 'lib/pedophile/offline_tree.rb', line 93

def save_changes
  f = File.new(TMP_CHANGES_PATH, "w")
  f.puts @changes.to_yaml
  f.close
end

#should_add_path?(h) ⇒ Boolean

TODO

Returns:

  • (Boolean)


137
138
139
140
# File 'lib/pedophile/offline_tree.rb', line 137

def should_add_path?(h)
  return true
  #return h[:is_file]
end

#zip(output_file = 'site.zip') ⇒ Object



29
30
31
32
33
# File 'lib/pedophile/offline_tree.rb', line 29

def zip(output_file = 'site.zip')
  command = "cd #{Wget::TMP_OFFLINE_PATH}; zip -r #{output_file} #{self.downloader.wget.site_last_path}"
  puts command
  `#{command}`
end

#zip_with_custom_dir(output_path_zip, output_directory_name) ⇒ Object



35
36
37
38
39
40
41
# File 'lib/pedophile/offline_tree.rb', line 35

def zip_with_custom_dir(output_path_zip, output_directory_name)
  command = "cd #{Wget::TMP_PATH}; cd #{Wget::TMP_SITE_DIRECTORY}; mv \"#{self.downloader.wget.site_last_path}\" \"#{output_directory_name}\";"
  command += " zip -r #{output_path_zip} #{output_directory_name}"

  puts command
  `#{command}`
end