Class: Pedophile::OfflineTree
- Inherits:
-
Object
- Object
- Pedophile::OfflineTree
- Defined in:
- lib/pedophile/offline_tree.rb
Constant Summary collapse
- TMP_STRUCTURE_PATH =
File.absolute_path(File.join(Wget::TMP_PATH, "files.yaml"))
- TMP_CHANGES_PATH =
File.absolute_path(File.join(Wget::TMP_PATH, "changes.yaml"))
- FIX_RELATIVE_PATH =
false
Instance Attribute Summary collapse
-
#downloader ⇒ Object
readonly
Returns the value of attribute downloader.
-
#files ⇒ Object
readonly
Returns the value of attribute files.
Instance Method Summary collapse
-
#after_process ⇒ Object
Desctructive part.
- #analyze ⇒ Object
- #analyze_file(file) ⇒ Object
- #base_path ⇒ Object
-
#initialize(downloader) ⇒ OfflineTree
constructor
A new instance of OfflineTree.
-
#is_path_ok?(pp) ⇒ Boolean
TODO - check if this string is correct unix path.
- #load_analyzed ⇒ Object
- #make_it_so ⇒ Object
- #path ⇒ Object
- #process_bad_filenames ⇒ Object
- #process_bad_suffix1 ⇒ Object
-
#process_bad_suffix2 ⇒ Object
PROCESSING.
- #process_massive_gsub(from, to, check_paths = false) ⇒ Object
-
#process_rename_file(old_file_path, new_file_path) ⇒ Object
def process_bad_filenames_links process_massive_gsub(/%3F/, “_”, false) end.
- #save_analyzed ⇒ Object
- #save_changes ⇒ Object
-
#should_add_path?(h) ⇒ Boolean
TODO.
- #zip(output_file = 'site.zip') ⇒ Object
- #zip_with_custom_dir(output_path_zip, output_directory_name) ⇒ Object
Constructor Details
#initialize(downloader) ⇒ OfflineTree
Returns a new instance of OfflineTree.
10 11 12 13 14 |
# File 'lib/pedophile/offline_tree.rb', line 10 def initialize(downloader) @downloader = downloader @files = Array.new @changes = Array.new end |
Instance Attribute Details
#downloader ⇒ Object (readonly)
Returns the value of attribute downloader.
16 17 18 |
# File 'lib/pedophile/offline_tree.rb', line 16 def downloader @downloader end |
#files ⇒ Object (readonly)
Returns the value of attribute files.
16 17 18 |
# File 'lib/pedophile/offline_tree.rb', line 16 def files @files end |
Instance Method Details
#after_process ⇒ Object
Desctructive part
44 45 46 47 48 |
# File 'lib/pedophile/offline_tree.rb', line 44 def after_process load_processed remove_bad_suffix rename_files end |
#analyze ⇒ Object
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
# File 'lib/pedophile/offline_tree.rb', line 55 def analyze # because I don't want to read all wget options... glob_path = "#{path}/**/**" puts "offline path #{path.to_s.cyan}" Dir.glob(glob_path) do |item| next if item == '.' or item == '..' or File.directory?(item) puts "analyze file #{item.to_s.yellow}" h = Hash.new h[:path] = item mime = `file --mime #{item}` if mime =~ /(\w+\/\w+);/ mime = $1 else mime = nil end h[:mime] = mime if mime == 'text/html' or mime == 'text/plain' h[:inside] = analyze_file(item) end @files << h end save_analyzed end |
#analyze_file(file) ⇒ Object
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
# File 'lib/pedophile/offline_tree.rb', line 103 def analyze_file(file) s = File.read(file) possible_paths_regexp = /"([^"]+)"/ possible_paths = s.scan(possible_paths_regexp).flatten.uniq possible_paths_regexp = /'([^']+)'/ possible_paths += s.scan(possible_paths_regexp).flatten.uniq relative_file_path = File.dirname(file) paths = Array.new possible_paths.each do |pp| if is_path_ok?(pp) h = Hash.new f = File.join(relative_file_path, pp) h[:exists] = File.exists?(f) h[:is_file] = File.file?(f) h[:path] = pp paths << h if should_add_path?(h) end end paths end |
#base_path ⇒ Object
142 143 144 145 |
# File 'lib/pedophile/offline_tree.rb', line 142 def base_path @base_path ||= self.downloader.wget.offline_path @base_path end |
#is_path_ok?(pp) ⇒ Boolean
TODO - check if this string is correct unix path
131 132 133 134 |
# File 'lib/pedophile/offline_tree.rb', line 131 def is_path_ok?(pp) # pp =~ /\A(?:[0-9a-zA-Z\_\-]+\/?)+\z/ pp.size < 200 end |
#load_analyzed ⇒ Object
99 100 101 |
# File 'lib/pedophile/offline_tree.rb', line 99 def load_analyzed @files = YAML.load_file(TMP_STRUCTURE_PATH) end |
#make_it_so ⇒ Object
18 19 20 21 22 23 24 25 26 27 |
# File 'lib/pedophile/offline_tree.rb', line 18 def make_it_so analyze load_analyzed process_bad_suffix1 process_bad_suffix2 process_bad_filenames save_analyzed save_changes end |
#path ⇒ Object
50 51 52 53 |
# File 'lib/pedophile/offline_tree.rb', line 50 def path @path ||= self.downloader.wget.offline_path @path end |
#process_bad_filenames ⇒ Object
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
# File 'lib/pedophile/offline_tree.rb', line 188 def process_bad_filenames @files.each do |f| old_file = f[:path] new_file = old_file.gsub(/[^0-9A-Za-z.\-\/:]/, '_') if not new_file == old_file process_rename_file(old_file, new_file) end if f[:inside] f[:inside].each do |fi| old_file = fi[:path] if File.exists?(old_file) new_file = old_file.gsub(/[^0-9A-Za-z.\-\/:]/, '_') if not new_file == old_file process_rename_file(old_file, new_file) end end end end end end |
#process_bad_suffix1 ⇒ Object
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
# File 'lib/pedophile/offline_tree.rb', line 161 def process_bad_suffix1 @files.each do |f| old_file = f[:path] new_file = old_file.gsub(/\?\d+/, '').gsub(/\%3F\d+/, '') if not new_file == old_file process_rename_file(old_file, new_file) end if f[:inside] f[:inside].each do |fi| old_file = fi[:path] if File.exists?(old_file) new_file = old_file.gsub(/\?\d+/, '').gsub(/\%3F\d+/, '') if not new_file == old_file process_rename_file(old_file, new_file) end end end end end process_massive_gsub(/\%3F\d+/, "", false) end |
#process_bad_suffix2 ⇒ Object
PROCESSING
148 149 150 151 152 153 154 155 156 157 158 159 |
# File 'lib/pedophile/offline_tree.rb', line 148 def process_bad_suffix2 @files.each do |f| old_file = f[:path] new_file = old_file.gsub(/\?body=1/, '') if not new_file == old_file process_rename_file(old_file, new_file) end end process_massive_gsub("%3Fbody=1", "", false) end |
#process_massive_gsub(from, to, check_paths = false) ⇒ Object
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 |
# File 'lib/pedophile/offline_tree.rb', line 262 def process_massive_gsub(from, to, check_paths = false) puts "massive gsub #{from.to_s.blue} to #{to.to_s.green}" @files.each do |f| # must be proper mime before, so not needed to check if f[:inside] file_path = f[:path].clone puts " open #{file_path.to_s.red}" old_from = from.to_s old_to = to.to_s # relative path fix if check_paths and FIX_RELATIVE_PATH absolute_path = File.absolute_path(File.dirname(file_path)) first = Pathname.new(absolute_path) to_path = File.join(path, to) second = Pathname.new(File.absolute_path(to_path)) to = second.relative_path_from(first).to_s end exists = File.exists?(file_path) if exists j = File.open(file_path) s = j.read j.close # logs if s.index(from) @changes << { gsub: { old: from, new: to, file: file_path, old_from: old_from, old_to: old_to } } end s.gsub!(from, to) j = File.open(file_path, "w") j.puts(s) j.close f[:inside].each do |fi| fi[:path].gsub!(from, to) end puts " done #{file_path.to_s.red}" else raise "file #{file_path} not found" end end end end |
#process_rename_file(old_file_path, new_file_path) ⇒ Object
def process_bad_filenames_links
process_massive_gsub(/\%3F/, "_", false)
end
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 |
# File 'lib/pedophile/offline_tree.rb', line 216 def process_rename_file(old_file_path, new_file_path) puts "rename from #{old_file_path.to_s.blue} to #{new_file_path.to_s.green}" # clone to not allow modify of @files old_file = old_file_path.clone new_file = new_file_path.clone # this will be with full path old_file_with_path = old_file_path.clone old_file.gsub!(base_path, '') new_file.gsub!(base_path, '') # ignore slashes old_file.gsub!(/^\//, '') new_file.gsub!(/^\//, '') # 1. rename 1 file new_file_path = old_file_with_path.gsub(old_file, new_file) File.rename(old_file_with_path, new_file_path) # internal log-like @changes << { rename: { old: old_file_with_path, new: new_file_path } } # 2. rename in @files @files.each do |f| if f[:path] == old_file_with_path f[:path] = new_file_path end if f[:inside] f[:inside].each do |fi| if fi[:path] == old_file_with_path fi[:path] = new_file_path end end end end # 3. gsub all files # gsub files after renaming process_massive_gsub(old_file, new_file, true) process_massive_gsub(old_file.gsub("?", "%3F"), new_file, true) puts "RENAMED #{old_file.to_s.blue} to #{new_file.to_s.green}" end |
#save_analyzed ⇒ Object
87 88 89 90 91 |
# File 'lib/pedophile/offline_tree.rb', line 87 def save_analyzed f = File.new(TMP_STRUCTURE_PATH, "w") f.puts @files.to_yaml f.close end |
#save_changes ⇒ Object
93 94 95 96 97 |
# File 'lib/pedophile/offline_tree.rb', line 93 def save_changes f = File.new(TMP_CHANGES_PATH, "w") f.puts @changes.to_yaml f.close end |
#should_add_path?(h) ⇒ Boolean
TODO
137 138 139 140 |
# File 'lib/pedophile/offline_tree.rb', line 137 def should_add_path?(h) return true #return h[:is_file] end |
#zip(output_file = 'site.zip') ⇒ Object
29 30 31 32 33 |
# File 'lib/pedophile/offline_tree.rb', line 29 def zip(output_file = 'site.zip') command = "cd #{Wget::TMP_OFFLINE_PATH}; zip -r #{output_file} #{self.downloader.wget.site_last_path}" puts command `#{command}` end |
#zip_with_custom_dir(output_path_zip, output_directory_name) ⇒ Object
35 36 37 38 39 40 41 |
# File 'lib/pedophile/offline_tree.rb', line 35 def zip_with_custom_dir(output_path_zip, output_directory_name) command = "cd #{Wget::TMP_PATH}; cd #{Wget::TMP_SITE_DIRECTORY}; mv \"#{self.downloader.wget.site_last_path}\" \"#{output_directory_name}\";" command += " zip -r #{output_path_zip} #{output_directory_name}" puts command `#{command}` end |