Module: Phash
- Extended by:
- FFI::Library
- Defined in:
- lib/phash.rb,
lib/phash/text.rb,
lib/phash/audio.rb,
lib/phash/image.rb,
lib/phash/video.rb
Defined Under Namespace
Classes: Audio, AudioHash, Data, FileHash, HashData, Image, ImageHash, Text, TextHash, TxtHashPoint, TxtMatch, Video, VideoHash
Constant Summary collapse
- DEFAULT_SAMPLE_RATE =
8000
Class Method Summary collapse
-
.audio_data(path, length = 0, sample_rate = nil) ⇒ Object
Read audio file specified by path and optional length using
ph_readaudio
. -
.audio_data_hash(audio_data, sample_rate = nil) ⇒ Object
Get hash of audio data using
ph_audiohash
. -
.audio_distance_ber(hash_a, hash_b, threshold = 0.25, block_size = 256) ⇒ Object
Get distance between two audio hashes using
ph_audio_distance_ber
. -
.audio_hash(path, length = nil, sample_rate = nil) ⇒ Object
Use
audio_data
andaudio_data_hash
to compute hash for file at path, specify max length in seconds to read. -
.audio_similarity(hash_a, hash_b, *args) ⇒ Object
Get similarity from audio_distance_ber.
-
.image_hamming_distance(hash_a, hash_b) ⇒ Object
Get distance between two image hashes using
ph_hamming_distance
. -
.image_hash(path) ⇒ Object
Get image file hash using
ph_dct_imagehash
. -
.image_similarity(hash_a, hash_b) ⇒ Object
Get similarity from hamming_distance.
-
.text_hash(path) ⇒ Object
Get text file hash using
ph_texthash
. -
.text_similarity(hash_a, hash_b) ⇒ Object
Get distance between two text hashes using
text_distance
. -
.video_dct_distance(hash_a, hash_b, threshold = 21) ⇒ Object
(also: video_similarity)
Get distance between two video hashes using
ph_dct_videohash_dist
. -
.video_hash(path) ⇒ Object
Get video hash using
ph_dct_videohash
.
Class Method Details
.audio_data(path, length = 0, sample_rate = nil) ⇒ Object
Read audio file specified by path and optional length using ph_readaudio
58 59 60 61 62 63 64 65 66 67 |
# File 'lib/phash/audio.rb', line 58 def audio_data(path, length = 0, sample_rate = nil) sample_rate ||= DEFAULT_SAMPLE_RATE audio_data_length_p = FFI::MemoryPointer.new :int if audio_data = ph_readaudio(path.to_s, sample_rate, 1, nil, audio_data_length_p, length.to_f) audio_data_length = audio_data_length_p.get_int(0) audio_data_length_p.free Data.new(audio_data, audio_data_length) end end |
.audio_data_hash(audio_data, sample_rate = nil) ⇒ Object
Get hash of audio data using ph_audiohash
70 71 72 73 74 75 76 77 78 79 |
# File 'lib/phash/audio.rb', line 70 def audio_data_hash(audio_data, sample_rate = nil) sample_rate ||= DEFAULT_SAMPLE_RATE hash_data_length_p = FFI::MemoryPointer.new :int if hash_data = ph_audiohash(audio_data.data, audio_data.length, sample_rate, hash_data_length_p) hash_data_length = hash_data_length_p.get_int(0) hash_data_length_p.free AudioHash.new(hash_data, hash_data_length) end end |
.audio_distance_ber(hash_a, hash_b, threshold = 0.25, block_size = 256) ⇒ Object
Get distance between two audio hashes using ph_audio_distance_ber
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
# File 'lib/phash/audio.rb', line 90 def audio_distance_ber(hash_a, hash_b, threshold = 0.25, block_size = 256) hash_a.is_a?(AudioHash) or raise ArgumentError.new('hash_a is not an AudioHash') hash_b.is_a?(AudioHash) or raise ArgumentError.new('hash_b is not an AudioHash') distance_vector_length_p = FFI::MemoryPointer.new :int block_size = [block_size.to_i, hash_a.length, hash_b.length].min if distance_vector = ph_audio_distance_ber(hash_a.data, hash_a.length, hash_b.data, hash_b.length, threshold.to_f, block_size, distance_vector_length_p) distance_vector_length = distance_vector_length_p.get_int(0) distance_vector_length_p.free distance = distance_vector.get_array_of_double(0, distance_vector_length) free(distance_vector) distance end end |
.audio_hash(path, length = nil, sample_rate = nil) ⇒ Object
Use audio_data
and audio_data_hash
to compute hash for file at path, specify max length in seconds to read
82 83 84 85 86 87 |
# File 'lib/phash/audio.rb', line 82 def audio_hash(path, length = nil, sample_rate = nil) sample_rate ||= DEFAULT_SAMPLE_RATE if audio_data = audio_data(path, length, sample_rate) audio_data_hash(audio_data, sample_rate) end end |
.audio_similarity(hash_a, hash_b, *args) ⇒ Object
Get similarity from audio_distance_ber
107 108 109 |
# File 'lib/phash/audio.rb', line 107 def audio_similarity(hash_a, hash_b, *args) audio_distance_ber(hash_a, hash_b, *args).max end |
.image_hamming_distance(hash_a, hash_b) ⇒ Object
Get distance between two image hashes using ph_hamming_distance
33 34 35 36 37 38 |
# File 'lib/phash/image.rb', line 33 def image_hamming_distance(hash_a, hash_b) hash_a.is_a?(ImageHash) or raise ArgumentError.new('hash_a is not an ImageHash') hash_b.is_a?(ImageHash) or raise ArgumentError.new('hash_b is not an ImageHash') ph_hamming_distance(hash_a.data, hash_b.data) end |
.image_hash(path) ⇒ Object
Get image file hash using ph_dct_imagehash
22 23 24 25 26 27 28 29 30 |
# File 'lib/phash/image.rb', line 22 def image_hash(path) hash_p = FFI::MemoryPointer.new :ulong_long if -1 != ph_dct_imagehash(path.to_s, hash_p) hash = hash_p.get_uint64(0) hash_p.free ImageHash.new(hash) end end |
.image_similarity(hash_a, hash_b) ⇒ Object
Get similarity from hamming_distance
41 42 43 |
# File 'lib/phash/image.rb', line 41 def image_similarity(hash_a, hash_b) 1 - image_hamming_distance(hash_a, hash_b) / 64.0 end |
.text_hash(path) ⇒ Object
Get text file hash using ph_texthash
42 43 44 45 46 47 48 49 50 |
# File 'lib/phash/text.rb', line 42 def text_hash(path) hash_data_length_p = FFI::MemoryPointer.new :int if hash_data = ph_texthash(path.to_s, hash_data_length_p) hash_data_length = hash_data_length_p.get_int(0) hash_data_length_p.free TextHash.new(hash_data, hash_data_length) end end |
.text_similarity(hash_a, hash_b) ⇒ Object
Get distance between two text hashes using text_distance
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
# File 'lib/phash/text.rb', line 53 def text_similarity(hash_a, hash_b) hash_a.is_a?(TextHash) or raise ArgumentError.new('hash_a is not a TextHash') hash_b.is_a?(TextHash) or raise ArgumentError.new('hash_b is not a TextHash') matches_length_p = FFI::MemoryPointer.new :int if data = ph_compare_text_hashes(hash_a.data, hash_a.length, hash_b.data, hash_b.length, matches_length_p) matches_length = matches_length_p.get_int(0) matches_length_p.free matches = matches_length.times.map{ |i| TxtMatch.new(data + i * TxtMatch.size) } matched_a = Array.new(hash_a.length) matched_b = Array.new(hash_b.length) matches.each do |match| index_a = match[:index_a] index_b = match[:index_b] match[:length].times do |i| matched_a[index_a + i] = true matched_b[index_b + i] = true end end coverage_a = matched_a.compact.length / hash_a.length.to_f coverage_b = matched_b.compact.length / hash_b.length.to_f similarity = (coverage_a + coverage_b) * 0.5 free(data) similarity end end |
.video_dct_distance(hash_a, hash_b, threshold = 21) ⇒ Object Also known as: video_similarity
Get distance between two video hashes using ph_dct_videohash_dist
29 30 31 32 33 34 |
# File 'lib/phash/video.rb', line 29 def video_dct_distance(hash_a, hash_b, threshold = 21) hash_a.is_a?(VideoHash) or raise ArgumentError.new('hash_a is not a VideoHash') hash_b.is_a?(VideoHash) or raise ArgumentError.new('hash_b is not a VideoHash') ph_dct_videohash_dist(hash_a.data, hash_a.length, hash_b.data, hash_b.length, threshold.to_i) end |
.video_hash(path) ⇒ Object
Get video hash using ph_dct_videohash
18 19 20 21 22 23 24 25 26 |
# File 'lib/phash/video.rb', line 18 def video_hash(path) hash_data_length_p = FFI::MemoryPointer.new :int if hash_data = ph_dct_videohash(path.to_s, hash_data_length_p) hash_data_length = hash_data_length_p.get_int(0) hash_data_length_p.free VideoHash.new(hash_data, hash_data_length) end end |