Class: ExtractMetadata
- Inherits:
-
Object
- Object
- ExtractMetadata
- Defined in:
- lib/extractmetadata.rb
Instance Method Summary collapse
-
#extract ⇒ Object
Extract metadata.
-
#extract_file_metadata ⇒ Object
Extract PDF metadata.
-
#get_file_type ⇒ Object
Get file type.
-
#get_folders ⇒ Object
Split relative path and get array of directories.
-
#get_formatted_name ⇒ Object
Get a formatted file name.
-
#get_rel_path ⇒ Object
Get the relative path.
-
#initialize(file, input_dir, output_dir) ⇒ ExtractMetadata
constructor
A new instance of ExtractMetadata.
Constructor Details
#initialize(file, input_dir, output_dir) ⇒ ExtractMetadata
Returns a new instance of ExtractMetadata.
5 6 7 8 9 10 11 12 13 |
# File 'lib/extractmetadata.rb', line 5 def initialize(file, input_dir, output_dir) @path = file @input_dir = input_dir @output_dir = output_dir @allowed_extensions = [ 'pdf', 'doc', 'docbook', 'docx', 'txt', 'rtf', 'md', 'csv', 'xls', 'xlsx', 'jpg', 'jpeg', 'png', 'gif', 'svg' ] end |
Instance Method Details
#extract ⇒ Object
Extract metadata
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/extractmetadata.rb', line 16 def extract outhash = Hash.new # Get relative path @rel_path = get_rel_path outhash[:rel_path] = @rel_path outhash[:folders] = get_folders # Get formatted name and file type outhash[:formatted_name] = get_formatted_name outhash[:filetype] = get_file_type # Extract file metadata, merge. and return begin if (@allowed_extensions.include? outhash[:filetype]) outhash.merge!() else puts "skipping ." + outhash[:filetype] + " file" end rescue end return outhash end |
#extract_file_metadata ⇒ Object
Extract PDF metadata
67 68 69 70 71 72 73 74 75 76 77 78 |
# File 'lib/extractmetadata.rb', line 67 def = Hash.new [:author] = Docsplit.(@path) [:creator] = Docsplit.extract_creator(@path) [:producer] = Docsplit.extract_producer(@path) [:title] = Docsplit.extract_title(@path) [:subject] = Docsplit.extract_subject(@path) [:date] = Docsplit.extract_date(@path) [:keywords] = Docsplit.extract_keywords(@path) [:length] = Docsplit.extract_length(@path) return end |
#get_file_type ⇒ Object
Get file type
62 63 64 |
# File 'lib/extractmetadata.rb', line 62 def get_file_type @rel_path.split(".").last end |
#get_folders ⇒ Object
Split relative path and get array of directories
41 42 43 44 45 46 47 48 49 |
# File 'lib/extractmetadata.rb', line 41 def get_folders folders = @rel_path.split("/") # Remove file and empty items folders.delete(folders.last) folders.delete("") return folders end |
#get_formatted_name ⇒ Object
Get a formatted file name
57 58 59 |
# File 'lib/extractmetadata.rb', line 57 def get_formatted_name @rel_path.split(".").first.gsub("_", " ").gsub("/", "") end |
#get_rel_path ⇒ Object
Get the relative path
52 53 54 |
# File 'lib/extractmetadata.rb', line 52 def get_rel_path @path.gsub(@input_dir, "") end |