Class: PdfExtract::Names::NamesDatabase

Inherits:
Object
  • Object
show all
Defined in:
lib/names.rb

Constant Summary collapse

@@ambiguous_weighting =
0.0
@@unambiguous_weighting =
1.0
@@db =
SQLite3::Database.new(path_to_data("familynames.db"), {:readonly => true})
@@stop_words =
File.open(path_to_data("stopwords.txt")).read.split(",")

Class Method Summary collapse

Class Method Details

.detect_names(content) ⇒ Object



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/names.rb', line 20

def self.detect_names content
  words = content.split
  sum = 0.0

  words.each do |word|
    word = word.downcase

    if not @@stop_words.include? word && word.length > 1
      query_word = word.capitalize.gsub(/-(.)/) { |s|
        "-" + s[1].capitalize
      }

      @@db.execute("select * from names where name = ?", query_word) do |row|
        if row[2] == 1
          sum += @@ambiguous_weighting
        else
          sum += @@unambiguous_weighting
        end
      end
    end
    
  end

  if sum == 0
    {:name_frequency => 0}
  else
    {:name_frequency => (sum / words.length.to_f)}
  end
end

.path_to_data(data_filename) ⇒ Object



13
14
15
# File 'lib/names.rb', line 13

def self.path_to_data data_filename
  File.join(File.dirname(File.expand_path(__FILE__)), "../data/" + data_filename)
end