Class: Jebediah

Inherits:
Object
  • Object
show all
Defined in:
lib/jebediah.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(dictPaths = nil) ⇒ Jebediah

Returns a new instance of Jebediah.



6
7
8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/jebediah.rb', line 6

def initialize(dictPaths=nil)
	if dictPaths == nil then
		# Default configuration is a 3-word phrase (adverb, verb, animal)
		#   e.g. "ridiculously elaborated parrot"
		base = File.expand_path(File.dirname(__FILE__) + '/../dictionaries')
		dictPaths = [
			File.join(base, "adverbs.txt"),
			File.join(base, "verbs.txt"),
			File.join(base, "animals.txt"),
		]
	end

	loadDictionaries(dictPaths)
end

Class Method Details

.versionObject



2
3
4
# File 'lib/jebediah.rb', line 2

def self.version
	return "1.0.7"
end

Instance Method Details

#dehyphenatePhrase(phrase) ⇒ Object



167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# File 'lib/jebediah.rb', line 167

def dehyphenatePhrase(phrase)
	phrase = phrase.split("-") if phrase.is_a?(String)
	split = []
	@dictionaries.each do |dict|
		return nil if phrase.empty?
		match = longestMatchInDictionary(phrase, dict)
		return nil if match.nil?

		split << match.join("-")
		phrase = phrase[match.count .. -1]
	end

	return nil unless phrase.empty?
	split
end

#hashForPhrase(phrase) ⇒ Object

Convert a phrase into a hash, e.g. “disobligingly hypnotized grizzly” -> “0123abc” Returns nil if the phrase cannot be converted Phrase can be supplied as a string or array. If string, then words must be separated by whitespace.



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/jebediah.rb', line 119

def hashForPhrase(phrase)
	# Suppose we have n+1 dictionaries in our nomenclature.
	# Let L_i be the length of the nth dictionary for 0 <= i <= n.
	# Let W_i be the ith word in the phrase
	# Let K_i be the index of W_i in the ith dictionary, zero-based (i.e. 0 <= K_i < L_i)
	# Our hash is:
	#   H = K0 + L0 K1 + L0 L1 K2 + ... + L0 L1 ... L_(n-1) K_n
	#
	# Represent this integer in hexadecimal to get a hash string.

	weight = 1
	hash = 0

	if phrase.is_a?(String) then
		if isHyphenated?(phrase) then
			phrase = dehyphenatePhrase(phrase)
		else
			phrase = phrase.gsub(/\s+/m, ' ').strip.split(' ')
		end
	end

	# If the phrase doesn't have the same number of words as our nomenclature requires, we can't convert
	if phrase.nil? || phrase.length != @dictionaries.length then
		return nil
	end

	phrase.length.times do |i|
		word = phrase[i]
		dict = @dictionaries[i]
		lineNumber = dict.index(word)
		if lineNumber.nil? then
			return nil
		end

		hash += lineNumber*weight
		weight *= dict.length
	end

	# Render the hash as a 7-digit hex string (suitable for git)
	"%07x" % hash
end

#hashFromString(str) ⇒ Object

Returns maximum number addressable in the dictionary space



34
35
36
37
38
# File 'lib/jebediah.rb', line 34

def hashFromString(str)
	str = str[2..-1] if str.start_with?("0x")
	str = str.to_s[0..maxStringLength-1]
	hash = Integer("0x" + str)
end

#isHash?(str) ⇒ Boolean

Test if a string is a valid hash

Returns:

  • (Boolean)


63
64
65
# File 'lib/jebediah.rb', line 63

def isHash?(str)
	str =~ /^[0-9a-fA-F]+$/
end

#isHyphenated?(str) ⇒ Boolean

Returns:

  • (Boolean)


67
68
69
# File 'lib/jebediah.rb', line 67

def isHyphenated?(str)
	str =~ /^[\w-]+$/
end

#loadDictionaries(dictPaths) ⇒ Object

Load in dictionaries from paths



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/jebediah.rb', line 41

def loadDictionaries(dictPaths)
	@dictionaries = []
	dictPaths.each do |dictPath|
		unless File.exists?(dictPath) then
			puts "Dictionary does not exist: #{dictPath}"
			return
		end

		unless File.file?(dictPath) then
			puts "Dictionary is not a regular file: #{dictPath}"
		end

		unless File.readable?(dictPath) then
			puts "Dictionary is not readable: #{dictPath}"
		end

		# Read dictionary lines into an array, no trailing newline
		@dictionaries.push File.open(dictPath, 'r') { |file| file.readlines.collect{|line| line.chomp} }
	end
end

#longestMatchInDictionary(words, dict) ⇒ Object



161
162
163
164
165
# File 'lib/jebediah.rb', line 161

def longestMatchInDictionary(words, dict)
	hyphenated = words.count.times.map { |n| words[0..n].join("-") }
	in_dictionary = hyphenated.select { |phrase| dict.include?(phrase) }
	in_dictionary.last.split("-") rescue nil
end

#maxStringLengthObject

Maximum hexadecimal string length that can safely be addressed by the dictionary set



27
28
29
30
31
# File 'lib/jebediah.rb', line 27

def maxStringLength
	weight = @dictionaries.inject(1) { |x, d| x * d.length }
	bits = (Math.log(weight)/Math.log(2)).floor
	chars = bits/4
end

#phraseForHash(hash) ⇒ Object

Convert a hash into a phrase, e.g. “abc4321” -> “rightward succeeded seal” Hash can be supplied as an integer, or hexadecimal string.

Returns nil if the hash cannot be converted



187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
# File 'lib/jebediah.rb', line 187

def phraseForHash(hash)
	# As noted in hashForPhrase, our hash is just an integer index, of the form
	#  H = K0 + L0 K1 + L0 L1 K2 + ... + L0 L1 ... L_(n-1) K_n
	#
	# The key insight in reversing this is to realize that if we write,
	#   c_0 = 1,  c_n = L0 L1 ... L_(n-1)
	#   s_n = c_n K_n
	#   S_n = s0 + s1 + ... + s_n, 
	# then we must have,
	#   c_(n+1) > S_n.    (see proof below)
	#
	# So if we consider
	#   H = S_n = c_n K_n + S_(n-1)
	# then
	#   S_n / c_n = K_n + S_(n-1) / c_n
	# We know that 0 <= S_(n-1) / c_n < 1, so
	#   int(S_n/c_n) = K_n
	#
	# We can use this information to recurse:
	#   s_n = c_n K_n = c_n * int(S_n/c_n)
	#   S_n - s_n = S_(n-1),

	begin
		weight = @dictionaries.inject(1) { |x, dict| dict.length * x } # L0 L1 L2 ... L_n
		sum = hashFromString(hash)
		lines = [ 0 ] * @dictionaries.length # We fill from the end backwards, so allocate the total size up front

		(@dictionaries.length-1).downto(0) do |n|
			weight /= @dictionaries[n].length # c_n = L0 L1 .. L_(n-1)
			lines[n] = (sum / weight).to_i # K_n = int(S_n / c_n)
			sum -= weight * lines[n] # S_(n-1) = S_n - c_n K_n
		end
	rescue
		return nil
	end

	#     Proof of c_(n+1) > S_n
	#
	# The following is an inductive proof.
	# Base case:  (c1 > S0)
	#   c1 > S0   <=>   L0 > K0, which we know by definition (0 <= K_i < L_i)
	#
	# Inductive step:  (c_n > S_(n-1)  =>  c_(n+1) > S_n)
	# Recall that,
	#   c_n = L0 L1 ... L_(n-1)
	# Notice that (L_n - K_n) >= 1, so
	#   c_n < L0 L1 ... L_(n-1) * (L_n - K_n) = c_(n+1) - s_n
	# So,
	#         c_n > S_(n-1)
	#    =>  c_(n+1) - s_n > S_(n-1)
	#    =>  c_(n+1) > S_(n-1) + s_n = S_n
	# Therefore, c_n > S_(n-1) => c_(n+1) > S_n.
	# Since we have c1 > S0,
	#   c_(n+1) > S_n for all n > 0.

	phrase = []
	@dictionaries.length.times do |i|
		phrase.push @dictionaries[i][lines[i]].strip
	end

	phrase
end

#phraseLengthObject

Returns number of words expected in a phrase for this Jebediah instance



22
23
24
# File 'lib/jebediah.rb', line 22

def phraseLength
	return @dictionaries.length
end

#process(input) ⇒ Object

Process an arbitrary string or array, and guess its meaning Returns a hash

:result = hash of phrase (if :type == 'hash'), phrase for hash (if :type == 'phrase'), or undefined (otherwise)
:type = 'hash', 'phrase', 'error'


97
98
99
100
101
102
103
104
# File 'lib/jebediah.rb', line 97

def process(input)
	r = processString(input) if input.is_a?(String)
	r = processArray(input) if input.is_a?(Array)
	r[:type] = 'error' if !r.has_key?(:result) || r[:result].nil?
	r.delete(:result) if r[:type] == 'error'

	return r
end

#processArray(arr) ⇒ Object

Processes arbitrary input formatted as an array



87
88
89
90
91
# File 'lib/jebediah.rb', line 87

def processArray(arr)
	return processString(arr[0]) if arr.length == 1
	return { :type => 'hash', :result => hashForPhrase(arr) } if arr.length == phraseLength
	return { :type => 'error' }
end

#processString(str) ⇒ Object

Processes arbitrary input formatted as a string



72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/jebediah.rb', line 72

def processString(str)
	if isHash?(str) then
		return { :type => 'phrase', :result => phraseForHash(str) }
	elsif(isHyphenated?(str)) then
		terms = dehyphenatePhrase(str)
		return { :type => 'hash', :result => hashForPhrase(terms) } unless terms.nil?
		return { :type => 'unreadable' }
	else
		terms = str.split(' ')
		return { :type => 'hash', :result => hashForPhrase(terms) } if phraseLength == terms.length
		return { :type => 'unreadable' }
	end
end

#renderResult(result) ⇒ Object

Renders a result from process() as a string



107
108
109
110
111
112
113
114
# File 'lib/jebediah.rb', line 107

def renderResult(result)
	has_keys = result.has_key?(:type) and result.has_key?(:result)
	return "Error processing input" if !has_keys or result[:type] == 'error' or result[:type].nil?
	return result[:result] if result[:result].is_a?(String)
	return result[:result].join(" ") if result[:result].is_a?(Array)

	return "Error processing result"
end