Class: PdfExtract::Names::NamesDatabase
- Inherits:
-
Object
- Object
- PdfExtract::Names::NamesDatabase
- Defined in:
- lib/names.rb
Constant Summary collapse
- @@ambiguous_weighting =
0.0
- @@unambiguous_weighting =
1.0
- @@db =
SQLite3::Database.new(path_to_data("familynames.db"), {:readonly => true})
- @@stop_words =
File.open(path_to_data("stopwords.txt")).read.split(",")
Class Method Summary collapse
Class Method Details
.detect_names(content) ⇒ Object
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
# File 'lib/names.rb', line 20 def self.detect_names content words = content.split sum = 0.0 words.each do |word| word = word.downcase if not @@stop_words.include? word && word.length > 1 query_word = word.capitalize.gsub(/-(.)/) { |s| "-" + s[1].capitalize } @@db.execute("select * from names where name = ?", query_word) do |row| if row[2] == 1 sum += @@ambiguous_weighting else sum += @@unambiguous_weighting end end end end if sum == 0 {:name_frequency => 0} else {:name_frequency => (sum / words.length.to_f)} end end |
.path_to_data(data_filename) ⇒ Object
13 14 15 |
# File 'lib/names.rb', line 13 def self.path_to_data data_filename File.join(File.dirname(File.(__FILE__)), "../data/" + data_filename) end |