Class: Wordlist::Builder

Inherits:
Object
  • Object
show all
Includes:
Parsers
Defined in:
lib/wordlist/builder.rb

Direct Known Subclasses

Wordlist::Builders::Website

Instance Attribute Summary collapse

Attributes included from Parsers

#ignore_case, #ignore_phone_numbers, #ignore_punctuation, #ignore_references, #ignore_urls

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path, options = {}) {|_self| ... } ⇒ Builder

Creates a new word-list Builder object.

Parameters:

  • path (String)

    The path of the word-list file.

  • options (Hash) (defaults to: {})

    Additional options.

Options Hash (options):

  • :min_words (Integer) — default: 1

    The minimum number of words each line of the word-list must contain.

  • :max_words (Integer)

    The maximum number of words each line of the word-list may contain. Defaults to the value of :min_words, if not given.

Yields:

  • (_self)

Yield Parameters:



43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/wordlist/builder.rb', line 43

def initialize(path,options={})
  super()

  @path = File.expand_path(path)

  @min_words = options.fetch(:min_words,1)
  @max_words = options.fetch(:max_words,@min_words)

  @file       = nil
  @filter     = UniqueFilter.new
  @word_queue = []

  yield self if block_given?
end

Instance Attribute Details

#fileObject (readonly)

File for the word-list



19
20
21
# File 'lib/wordlist/builder.rb', line 19

def file
  @file
end

#filterObject (readonly)

The unique word filter



22
23
24
# File 'lib/wordlist/builder.rb', line 22

def filter
  @filter
end

#max_wordsObject (readonly)

Maximum number of words



16
17
18
# File 'lib/wordlist/builder.rb', line 16

def max_words
  @max_words
end

#min_wordsObject (readonly)

Minimum number of words



13
14
15
# File 'lib/wordlist/builder.rb', line 13

def min_words
  @min_words
end

#pathObject (readonly)

Path of the word-list



10
11
12
# File 'lib/wordlist/builder.rb', line 10

def path
  @path
end

#word_queueObject (readonly)

The queue of words awaiting processing



25
26
27
# File 'lib/wordlist/builder.rb', line 25

def word_queue
  @word_queue
end

Class Method Details

.build(*arguments) {|builder| ... } ⇒ Builder

Creates a new Builder object with the given arguments, opens the word-list file, passes the builder object to the given block then finally closes the word-list file.

Examples:

Builder.build('some/path') do |builder|
  builder.parse(readline)
end

Parameters:

  • arguments (Array)

    Additional arguments to pass to #initialize.

Yields:

  • (builder)

    If a block is given, it will be passed the new builder.

Yield Parameters:

  • builder (Builder)

    The newly created builer object.

Returns:

  • (Builder)

    The newly created builder object.



80
81
82
83
84
85
86
# File 'lib/wordlist/builder.rb', line 80

def self.build(*arguments,&block)
  self.new(*arguments) do |builder|
    builder.open!
    builder.build!(&block)
    builder.close!
  end
end

Instance Method Details

#+(words) ⇒ Builder

Add the given words to the word-list.

Parameters:

  • words (Array<String>)

    The words to add to the list.

Returns:

  • (Builder)

    The builder object.



203
204
205
206
# File 'lib/wordlist/builder.rb', line 203

def +(words)
  words.each { |word| self << word }
  return self
end

#<<(word) ⇒ Builder

Appends the given word to the word-list file, only if it has not been previously seen.

Parameters:

  • word (String)

    The word to append.

Returns:

  • (Builder)

    The builder object.



180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/wordlist/builder.rb', line 180

def <<(word)
  enqueue(word)

  if @file
    word_combinations do |words|
      @filter.pass(words) do |unique|
        @file.puts unique
      end
    end
  end

  return self
end

#build! {|builder| ... } ⇒ Object

Default to be called when the word-list is to be built.

Yields:

  • (builder)

    If a block is given, it will be passed the new builder object.



113
114
115
# File 'lib/wordlist/builder.rb', line 113

def build!
  yield self if block_given?
end

#close!Object

Closes the word-list file.



236
237
238
239
240
241
242
243
244
# File 'lib/wordlist/builder.rb', line 236

def close!
  if @file
    @file.close
    @file = nil

    @filter.clear
    @word_queue.clear
  end
end

#enqueue(word) ⇒ String

Enqueues a given word for processing.

Parameters:

  • word (String)

    The word to enqueue.

Returns:

  • (String)

    The enqueued word.



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/wordlist/builder.rb', line 126

def enqueue(word)
  # enqueue the word
  if @max_words == 1
    @word_queue[0] = word.to_s
  else
    @word_queue << word.to_s

    # make sure the queue does not overflow
    if @word_queue.length > @max_words
      @word_queue.shift
    end
  end

  return word
end

#open!File

Opens the word-list file for writing. If the file already exists, the previous words will be used to filter future duplicate words.

Returns:

  • (File)

    The open word-list file.



95
96
97
98
99
100
101
102
103
104
105
# File 'lib/wordlist/builder.rb', line 95

def open!
  if File.file?(@path)
    File.open(@path) do |file|
      file.each_line do |line|
        @filter.saw!(line.chomp)
      end
    end
  end

  @file = File.new(@path,File::RDWR | File::CREAT | File::APPEND)
end

#parse(text) ⇒ Object

Parses the given text, adding each unique word to the word-list file.

Parameters:

  • text (String)

    The text to parse.



214
215
216
# File 'lib/wordlist/builder.rb', line 214

def parse(text)
  super(text).each { |word| self << word }
end

#parse_file(path) ⇒ Object

Parses the contents of the file at the given path, adding each unique word to the word-list file.

Parameters:

  • path (String)

    The path of the file to parse.



225
226
227
228
229
230
231
# File 'lib/wordlist/builder.rb', line 225

def parse_file(path)
  File.open(path) do |file|
    file.each_line do |line|
      parse(line)
    end
  end
end

#word_combinations {|combination| ... } ⇒ Object

Enumerates over the combinations of previously seen words.

Yields:

  • (combination)

    The given block will be passed the combinations of previously seen words.

Yield Parameters:

  • combination (String)

    A combination of one or more space-separated words.



152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/wordlist/builder.rb', line 152

def word_combinations
  if @max_words == 1
    yield @word_queue[0]
  else
    current_words = @word_queue.length

    # we must have atleast the minimum amount of words
    if current_words >= @min_words
      upper_bound = (current_words - @min_words)

      # combine the words
      upper_bound.downto(0) do |i|
        yield @word_queue[i..-1].join(' ')
      end
    end
  end
end