Class: LanguageDetector::Detector

Inherits:
Object
  • Object
show all
Defined in:
lib/language_detector.rb

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.trainObject



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/language_detector.rb', line 30

def self.train
  training_data = [
    [ "en", "english.txt", "english" ],
    [ "es", "spanish.txt", "spanish" ]
  ]

  @profiles = []

  training_data.each {|data|
    profile = LanguageDetector::Profile.new data[0]
    profile.init_with_training_file data[1]
    @profiles << profile
  }

  filename = File.expand_path(File.join(File.dirname(__FILE__), "model.yml"))
  File.open(filename, 'w') {|f|
    YAML.dump(@profiles, f)
  }
end

Instance Method Details

#detect_language(file_name) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/language_detector.rb', line 7

def detect_language file_name
  @profiles ||= load_model

  file_words = File.read(file_name).downcase

  input_file_profile = LanguageDetector::Profile.new("")
  input_file_profile.init_with_string(file_words)

  best_profile_name = 'unknown'
  best_distance = nil

  @profiles.each {|profile|
    calculated_distance = profile.compute_distance(input_file_profile)

    if best_distance.nil? || calculated_distance < best_distance
      best_distance     = calculated_distance
      best_profile_name = profile.name
    end
  }

  return best_profile_name
end

#load_modelObject



50
51
52
53
# File 'lib/language_detector.rb', line 50

def load_model
  filename = File.expand_path(File.join(File.dirname(__FILE__), "model.yml"))
  @profiles = YAML.load_file(filename)
end