Class: Pdfmdstat

Inherits:
Object
  • Object
show all
Includes:
Pdfmdmethods
Defined in:
lib/pdfmd/pdfmdstat.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Pdfmdmethods

#determineValidSetting, #log, #queryHiera

Constructor Details

#initialize(metadata) ⇒ Pdfmdstat

Returns a new instance of Pdfmdstat.



19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/pdfmd/pdfmdstat.rb', line 19

def initialize()

  @default_tags = ['author', 'title', 'subject', 'createdate', 'keywords']
  @statdata = {
    'author' => {},
    'createdate' => {},
    'title' => {},
    'subject' => {},
    'keywords' => {},
  }
  @statdata = count_values(,@default_tags)
end

Instance Attribute Details

#metadataObject

Returns the value of attribute metadata.



12
13
14
# File 'lib/pdfmd/pdfmdstat.rb', line 12

def 
  @metadata
end

Instance Method Details

#analyse_metadataObject

Run statistical overview about the metadata Count all values in the metatags and summ them up



95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/pdfmd/pdfmdstat.rb', line 95

def ()

  outputHash = Hash.new
  @default_tags.sort.each do |tagname|
    outputHash[tagname.capitalize] = @statdata[tagname]
  end

  sortedOutputHash = Hash.new
  outputHash.each do |metatag,statdata|

    sortedstatdata = Hash.new
   statdata = statdata.sort.each do |title, amount|
     title = title.empty? ? '*empty*' : title
     sortedstatdata[title] = amount
   end

   sortedOutputHash[metatag] = sortedstatdata

  end

  # Load the class variable with the metadata
  @metadata_hash = sortedOutputHash

end

#count_values(metadata, keys = '') ⇒ Object

Counting all values provided as hash in metadata Optional keynames can be handed over as an array



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/pdfmd/pdfmdstat.rb', line 48

def count_values(, keys = '')

  data = Hash.new
  if keys == ''
    data = {
      'author' => {},
      'title' => {},
      'createdate' => {},
      'subject' => {},
      'keywords' => {},
    }
  elsif keys.is_a?(Array)

    keys.each do |keyname|
      data[keyname] = {}
    end

  else
    puts 'invalid keys provided'
    exit 1
  end

  # Iterate through all metadata and
  # count how often the metadata shows up in each
  # category
  .each do |value|

    # Iterate through all metadata tags and count
    datahash = eval value[1]
    datahash.keys.each do |tagkey|

      datahash[tagkey].nil? ? next : ''
      if data[tagkey][datahash[tagkey]].nil?
        data[tagkey][datahash[tagkey]] = 1
      else
        data[tagkey][datahash[tagkey]] = data[tagkey][datahash[tagkey]] + 1
      end
    end
  end

  data

end

#output_metadata(format = 'yaml') ⇒ Object

Output the metadata in multiple format Default: yaml

else:

json


125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/pdfmd/pdfmdstat.rb', line 125

def (format = 'yaml')

  case format
  when 'json'
    require 'json'
    puts @metadata_hash.to_json
  when 'hash'
    puts @metadata_hash
  else
    puts @metadata_hash.to_yaml.gsub(/---\n/,'')
  end

end

#tags(metatagnames) ⇒ Object

Method to set tags



34
35
36
37
38
39
40
41
42
43
44
# File 'lib/pdfmd/pdfmdstat.rb', line 34

def tags(metatagnames)

  if metatagnames.is_a?(String)
    @default_tags = metatagnames.split(',')
    self.log('debug', "Setting tags for statistic to '#{metatagnames}'.")
  elsif !metatagnames.nil?
    self.log('error', 'Unkown Tag definition. Exit.')
    exit 1
  end

end