Module: Treat::Entities::Entity::Buildable

Included in:
Treat::Entities::Entity
Defined in:
lib/treat/entities/entity/buildable.rb

Overview

Represents an object that can be built from a folder of files, a specific file, a string or a numeric object. This class is pretty much self-explanatory. FIXME how can we make this language independent?

Constant Summary collapse

WordRegexp =

Simple regexps to match common entities.

/^[[:alpha:]\-']+$/
NumberRegexp =
/^#?([0-9]+)(\.[0-9]+)?$/
PunctRegexp =
/^[[:punct:]\$]+$/
UriRegexp =
/^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(([0-9]{1,5})?\/.*)?$/ix
EmailRegexp =
/.+\@.+\..+/
Enclitics =
%w['ll 'm 're 's 't 've 'nt]
AcceptedFormats =

Accepted formats of serialized files

['.xml', '.yml', '.yaml', '.mongo']
Reserved =

Reserved folder names

['.index']

Instance Method Summary collapse

Instance Method Details

#anything_from_string(string) ⇒ Object

Build any kind of entity from a string.



283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
# File 'lib/treat/entities/entity/buildable.rb', line 283

def anything_from_string(string)
  case self.mn.downcase.intern
  when :document
    folder = Treat.paths.files
    if folder[-1] == '/'
      folder = folder[0..-2]
    end

    now = Time.now.to_f
    doc_file = folder+ "/#{now}.txt"
    string.force_encoding('UTF-8')
    File.open(doc_file, 'w') do |f|
      f.puts string
    end

    from_raw_file(doc_file)
  when :collection
    raise Treat::Exception,
    "Cannot create a " +
    "collection from a string " +
    "(need a readable file/folder)."
  when :phrase
    group_from_string(string)
  when :token
    token_from_string(string)
  when :zone
    zone_from_string(string)
  when :entity
    if string.count(' ') == 0
      token_from_string(string)
    else
      if string.gsub(/[\.\!\?]+/,
        '.').count('.') <= 1 &&
        string.count("\n") == 0
        group_from_string(string)
      else
        zone_from_string(string)
      end
    end
  else
    self.new(string)
  end

end

#build(*args) ⇒ Object

Build an entity from anything (can be a string, numeric,folder, or file name representing a raw or serialized file).



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/treat/entities/entity/buildable.rb', line 29

def build(*args)

  # This probably needs some doc.
  if args.size == 0
    file_or_value = ''
  elsif args[0].is_a?(Hash)
    file_or_value = args[0]
  elsif args.size == 1
    if args[0].is_a?(Treat::Entities::Entity)
      args[0] = [args[0]]
    end
    file_or_value = args[0]
  else
    file_or_value = args
  end

  fv = file_or_value.to_s

  if fv == ''; self.new
  elsif file_or_value.is_a?(Array)
    from_array(file_or_value)
  elsif file_or_value.is_a?(Hash)
    from_db(file_or_value)
  elsif self == Treat::Entities::Document || (is_serialized_file?(fv))
    if fv =~ UriRegexp
      from_url(fv)
    else
      from_file(fv)
    end
  elsif self == Treat::Entities::Collection
    if FileTest.directory?(fv)
      from_folder(fv)
    else
      create_collection(fv)
    end
  else
    if file_or_value.is_a?(String)
      from_string(file_or_value)
    elsif file_or_value.is_a?(Numeric)
      from_numeric(file_or_value)
    else
      raise Treat::Exception,
      "Unrecognizable input '#{fv}'. "+
      "Please supply a folder, " +
      "filename, string or number."
    end
  end

end

#check_encoding(string) ⇒ Object

This should be improved on.



329
330
331
# File 'lib/treat/entities/entity/buildable.rb', line 329

def check_encoding(string)
  string.encode("UTF-8", undef: :replace) # Fix
end

#create_collection(fv) ⇒ Object



389
390
391
392
# File 'lib/treat/entities/entity/buildable.rb', line 389

def create_collection(fv)
  FileUtils.mkdir(fv)
  Treat::Entities::Collection.new(fv)
end

#from_array(array) ⇒ Object

Build a document from an array of builders.



101
102
103
104
105
106
107
108
# File 'lib/treat/entities/entity/buildable.rb', line 101

def from_array(array)
  obj = self.new
  array.each do |el|
    el = el.to_entity unless el.is_a?(Treat::Entities::Entity)
    obj << el
  end
  obj
end

#from_db(hash) ⇒ Object



269
270
271
272
273
274
275
276
277
278
279
280
# File 'lib/treat/entities/entity/buildable.rb', line 269

def from_db(hash)
  adapter = (hash.delete(:adapter) ||
  Treat.databases.default.adapter)
  unless adapter
    raise Treat::Exception,
    "You must supply which database " +
    "adapter to use by passing the :adapter " +
    "option or setting configuration option" +
    "Treat.databases.default.adapter"
  end
  self.new.unserialize(adapter, hash)
end

#from_file(file, def_fmt = nil) ⇒ Object

Build a document from a raw or serialized file.



207
208
209
210
211
212
213
214
215
216
# File 'lib/treat/entities/entity/buildable.rb', line 207

def from_file(file,def_fmt=nil)

  if is_serialized_file?(file)
    from_serialized_file(file)
  else
    fmt = Treat::Workers::Formatters::Readers::Autoselect.detect_format(file,def_fmt)
    from_raw_file(file, fmt)
  end

end

#from_folder(folder) ⇒ Object

Build an entity from a folder with documents. Folders will be searched recursively.



157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# File 'lib/treat/entities/entity/buildable.rb', line 157

def from_folder(folder)

  return if Reserved.include?(folder)

  unless FileTest.directory?(folder)
    raise Treat::Exception,
    "Path '#{folder}' does " +
    "not point to a folder."
  end

  unless File.readable?(folder)
    raise Treat::Exception,
    "Folder '#{folder}' is not readable."
  end

  unless self ==
    Treat::Entities::Collection
    raise Treat::Exception,
    "Cannot create something " +
    "else than a collection " +
    "from folder '#{folder}'."
  end

  c = Treat::Entities::Collection.new(folder)
  folder += '/' unless folder[-1] == '/'

  if !FileTest.directory?(folder)
    FileUtils.mkdir(folder)
  end

  c.set :folder, folder
  i = folder + '/.index'
  c.set :index, i if FileTest.directory?(i)

  Dir[folder + '*'].each do |f|
    if FileTest.directory?(f)
      c2 = Treat::Entities::Collection.
      from_folder(f)
      c.<<(c2, false) if c2
    else
      c.<<(Treat::Entities::Document.
      from_file(f), false)
    end
  end

  return c

end

#from_numeric(numeric) ⇒ Object

Build an entity from a Numeric object.



141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/treat/entities/entity/buildable.rb', line 141

def from_numeric(numeric)
  unless (self ==
    Treat::Entities::Number) ||
    (self == Treat::Entities::Token) ||
    (self == Treat::Entities::Entity)
    raise Treat::Exception,
    "Cannot create something " +
    "else than a number/token from " +
    "a numeric object."
  end
  n = numeric.to_s
  Treat::Entities::Number.new(n)
end

#from_raw_file(file, def_fmt = 'txt') ⇒ Object

Build a document from a raw file.



219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# File 'lib/treat/entities/entity/buildable.rb', line 219

def from_raw_file(file, def_fmt='txt')

  unless self ==
    Treat::Entities::Document
    raise Treat::Exception,
    "Cannot create something else than a " +
    "document from raw file '#{file}'."
  end

  unless File.readable?(file)
    raise Treat::Exception,
    "Path '#{file}' does not "+
    "point to a readable file."
  end
  options =  {default_format: def_fmt}
  d = Treat::Entities::Document.new
  d.set :file, file
  d.read(:autoselect, options)

end

#from_serialized_file(file) ⇒ Object

Build an entity from a serialized file.



241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
# File 'lib/treat/entities/entity/buildable.rb', line 241

def from_serialized_file(file)

  unless File.readable?(file)
    raise Treat::Exception,
    "Path '#{file}' does not "+
    "point to a readable file."
  end
  doc = Treat::Entities::Document.new
  doc.set :file, file
  format = nil
  if File.extname(file) == '.yml' ||
     File.extname(file) == '.yaml'
    format = :yaml
  elsif File.extname(file) == '.xml'
    format = :xml
  else
    raise Treat::Exception,
    "Unreadable serialized format for #{file}."
  end
  doc.unserialize(format)
  doc.children[0].set_as_root!              # Fix this
  doc.children[0]
end

#from_string(string, enforce_type = false) ⇒ Object

Build an entity from a string. Type is enforced only if requested or if the entity is user-created (i.e. by calling build instead of from_string directly).



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/treat/entities/entity/buildable.rb', line 83

def from_string(string, enforce_type = false)
  # If calling using the build syntax (i.e. user-
  # called), enforce the type that was supplied.
  enforce_type = true if caller_method == :build
  unless self == Treat::Entities::Entity
    return self.new(string) if enforce_type
  end
  e = anything_from_string(string)
  if enforce_type && !e.is_a?(self)
    raise "Asked to build a #{self.mn.downcase} "+
    "from \"#{string}\" and to enforce type, "+
    "but type detected was #{e.class.mn.downcase}."
  end
  e
end

#from_url(url) ⇒ Object

Build a document from an URL.



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/treat/entities/entity/buildable.rb', line 111

def from_url(url)
  unless self ==
    Treat::Entities::Document
    raise Treat::Exception,
    'Cannot create something ' +
    'else than a document from a url.'
  end

  begin
    folder = Treat.paths.files
    if folder[-1] == '/'
      folder = folder[0..-2]
    end
    f = Schiphol.download(url,
    download_folder: folder,
    show_progress: !Treat.core.verbosity.silence,
    rectify_extensions: true,
    max_tries: 3)
  rescue
    raise Treat::Exception,
    "Couldn't download file at #{url}."
  end

  e = from_file(f,'html')
  e.set :url, url.to_s
  e

end

#group_from_string(string) ⇒ Object

Build a phrase from a string.



334
335
336
337
338
339
340
341
342
343
# File 'lib/treat/entities/entity/buildable.rb', line 334

def group_from_string(string)
  check_encoding(string)
  if !(string =~ /[a-zA-Z]+/)
    Treat::Entities::Fragment.new(string)
  elsif string.count('.!?') >= 1
    Treat::Entities::Sentence.new(string)
  else
    Treat::Entities::Phrase.new(string)
  end
end

#is_serialized_file?(path_to_check) ⇒ Boolean

Returns:

  • (Boolean)


265
266
267
# File 'lib/treat/entities/entity/buildable.rb', line 265

def is_serialized_file?(path_to_check)
  (AcceptedFormats.include? File.extname(path_to_check)) && (File.file?(path_to_check))
end

#token_from_string(string) ⇒ Object

Build the right type of token corresponding to a string.



347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
# File 'lib/treat/entities/entity/buildable.rb', line 347

def token_from_string(string)

  check_encoding(string)
  if Enclitics.include?(string.downcase)
    Treat::Entities::Enclitic.new(string)
  elsif string =~ WordRegexp &&
    string.count(' ') == 0 &&
    string != '-'
    Treat::Entities::Word.new(string)
  elsif string =~ NumberRegexp
    from_numeric(string)
  elsif string =~ PunctRegexp
    Treat::Entities::Punctuation.new(string)
  elsif string.count('.') > 0 &&
    string =~ UriRegexp
    Treat::Entities::Url.new(string)
  elsif string.count('@') > 0 &&
    string =~ EmailRegexp
    Treat::Entities::Email.new(string)
  else
    Treat::Entities::Symbol.new(string)
  end
end

#zone_from_string(string) ⇒ Object

Build the right type of zone corresponding to the string.



374
375
376
377
378
379
380
381
382
383
384
385
386
387
# File 'lib/treat/entities/entity/buildable.rb', line 374

def zone_from_string(string)

  check_encoding(string)
  dot = string.count('.!?')
  if dot && dot >= 1 && string.count("\n") > 0
    Treat::Entities::Section.new(string)
  elsif string.count('.') == 0 &&
    string.size < 45
    Treat::Entities::Title.new(string)
  else
    Treat::Entities::Paragraph.new(string)
  end

end