Module: Phash

Extended by:
FFI::Library
Defined in:
lib/phash.rb,
lib/phash/text.rb,
lib/phash/audio.rb,
lib/phash/image.rb,
lib/phash/video.rb

Defined Under Namespace

Classes: Audio, AudioHash, Data, FileHash, HashData, Image, ImageHash, Text, TextHash, TxtHashPoint, TxtMatch, Video, VideoHash

Constant Summary collapse

DEFAULT_SAMPLE_RATE =
8000

Class Method Summary collapse

Class Method Details

.audio_data(path, length = 0, sample_rate = nil) ⇒ Object

Read audio file specified by path and optional length using ph_readaudio



58
59
60
61
62
63
64
65
66
67
# File 'lib/phash/audio.rb', line 58

def audio_data(path, length = 0, sample_rate = nil)
  sample_rate ||= DEFAULT_SAMPLE_RATE
  audio_data_length_p = FFI::MemoryPointer.new :int
  if audio_data = ph_readaudio(path.to_s, sample_rate, 1, nil, audio_data_length_p, length.to_f)
    audio_data_length = audio_data_length_p.get_int(0)
    audio_data_length_p.free

    Data.new(audio_data, audio_data_length)
  end
end

.audio_data_hash(audio_data, sample_rate = nil) ⇒ Object

Get hash of audio data using ph_audiohash



70
71
72
73
74
75
76
77
78
79
# File 'lib/phash/audio.rb', line 70

def audio_data_hash(audio_data, sample_rate = nil)
  sample_rate ||= DEFAULT_SAMPLE_RATE
  hash_data_length_p = FFI::MemoryPointer.new :int
  if hash_data = ph_audiohash(audio_data.data, audio_data.length, sample_rate, hash_data_length_p)
    hash_data_length = hash_data_length_p.get_int(0)
    hash_data_length_p.free

    AudioHash.new(hash_data, hash_data_length)
  end
end

.audio_distance_ber(hash_a, hash_b, threshold = 0.25, block_size = 256) ⇒ Object

Get distance between two audio hashes using ph_audio_distance_ber



90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/phash/audio.rb', line 90

def audio_distance_ber(hash_a, hash_b, threshold = 0.25, block_size = 256)
  hash_a.is_a?(AudioHash) or raise ArgumentError.new('hash_a is not an AudioHash')
  hash_b.is_a?(AudioHash) or raise ArgumentError.new('hash_b is not an AudioHash')

  distance_vector_length_p = FFI::MemoryPointer.new :int
  block_size = [block_size.to_i, hash_a.length, hash_b.length].min
  if distance_vector = ph_audio_distance_ber(hash_a.data, hash_a.length, hash_b.data, hash_b.length, threshold.to_f, block_size, distance_vector_length_p)
    distance_vector_length = distance_vector_length_p.get_int(0)
    distance_vector_length_p.free

    distance = distance_vector.get_array_of_double(0, distance_vector_length)
    free(distance_vector)
    distance
  end
end

.audio_hash(path, length = nil, sample_rate = nil) ⇒ Object

Use audio_data and audio_data_hash to compute hash for file at path, specify max length in seconds to read



82
83
84
85
86
87
# File 'lib/phash/audio.rb', line 82

def audio_hash(path, length = nil, sample_rate = nil)
  sample_rate ||= DEFAULT_SAMPLE_RATE
  if audio_data = audio_data(path, length, sample_rate)
    audio_data_hash(audio_data, sample_rate)
  end
end

.audio_similarity(hash_a, hash_b, *args) ⇒ Object

Get similarity from audio_distance_ber



107
108
109
# File 'lib/phash/audio.rb', line 107

def audio_similarity(hash_a, hash_b, *args)
  audio_distance_ber(hash_a, hash_b, *args).max
end

.image_hamming_distance(hash_a, hash_b) ⇒ Object

Get distance between two image hashes using ph_hamming_distance



33
34
35
36
37
38
# File 'lib/phash/image.rb', line 33

def image_hamming_distance(hash_a, hash_b)
  hash_a.is_a?(ImageHash) or raise ArgumentError.new('hash_a is not an ImageHash')
  hash_b.is_a?(ImageHash) or raise ArgumentError.new('hash_b is not an ImageHash')

  ph_hamming_distance(hash_a.data, hash_b.data)
end

.image_hash(path) ⇒ Object

Get image file hash using ph_dct_imagehash



22
23
24
25
26
27
28
29
30
# File 'lib/phash/image.rb', line 22

def image_hash(path)
  hash_p = FFI::MemoryPointer.new :ulong_long
  if -1 != ph_dct_imagehash(path.to_s, hash_p)
    hash = hash_p.get_uint64(0)
    hash_p.free

    ImageHash.new(hash)
  end
end

.image_similarity(hash_a, hash_b) ⇒ Object

Get similarity from hamming_distance



41
42
43
# File 'lib/phash/image.rb', line 41

def image_similarity(hash_a, hash_b)
  1 - image_hamming_distance(hash_a, hash_b) / 64.0
end

.text_hash(path) ⇒ Object

Get text file hash using ph_texthash



42
43
44
45
46
47
48
49
50
# File 'lib/phash/text.rb', line 42

def text_hash(path)
  hash_data_length_p = FFI::MemoryPointer.new :int
  if hash_data = ph_texthash(path.to_s, hash_data_length_p)
    hash_data_length = hash_data_length_p.get_int(0)
    hash_data_length_p.free

    TextHash.new(hash_data, hash_data_length)
  end
end

.text_similarity(hash_a, hash_b) ⇒ Object

Get distance between two text hashes using text_distance



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/phash/text.rb', line 53

def text_similarity(hash_a, hash_b)
  hash_a.is_a?(TextHash) or raise ArgumentError.new('hash_a is not a TextHash')
  hash_b.is_a?(TextHash) or raise ArgumentError.new('hash_b is not a TextHash')

  matches_length_p = FFI::MemoryPointer.new :int
  if data = ph_compare_text_hashes(hash_a.data, hash_a.length, hash_b.data, hash_b.length, matches_length_p)
    matches_length = matches_length_p.get_int(0)
    matches_length_p.free

    matches = matches_length.times.map{ |i| TxtMatch.new(data + i * TxtMatch.size) }

    matched_a = Array.new(hash_a.length)
    matched_b = Array.new(hash_b.length)
    matches.each do |match|
      index_a = match[:index_a]
      index_b = match[:index_b]
      match[:length].times do |i|
        matched_a[index_a + i] = true
        matched_b[index_b + i] = true
      end
    end
    coverage_a = matched_a.compact.length / hash_a.length.to_f
    coverage_b = matched_b.compact.length / hash_b.length.to_f

    similarity = (coverage_a + coverage_b) * 0.5
    free(data)
    similarity
  end
end

.video_dct_distance(hash_a, hash_b, threshold = 21) ⇒ Object Also known as: video_similarity

Get distance between two video hashes using ph_dct_videohash_dist



29
30
31
32
33
34
# File 'lib/phash/video.rb', line 29

def video_dct_distance(hash_a, hash_b, threshold = 21)
  hash_a.is_a?(VideoHash) or raise ArgumentError.new('hash_a is not a VideoHash')
  hash_b.is_a?(VideoHash) or raise ArgumentError.new('hash_b is not a VideoHash')

  ph_dct_videohash_dist(hash_a.data, hash_a.length, hash_b.data, hash_b.length, threshold.to_i)
end

.video_hash(path) ⇒ Object

Get video hash using ph_dct_videohash



18
19
20
21
22
23
24
25
26
# File 'lib/phash/video.rb', line 18

def video_hash(path)
  hash_data_length_p = FFI::MemoryPointer.new :int
  if hash_data = ph_dct_videohash(path.to_s, hash_data_length_p)
    hash_data_length = hash_data_length_p.get_int(0)
    hash_data_length_p.free

    VideoHash.new(hash_data, hash_data_length)
  end
end