Class: Markov::DB

Inherits:
Object
  • Object
show all
Defined in:
lib/markov/db.rb

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ DB

Returns a new instance of DB.



4
5
6
7
8
9
10
11
# File 'lib/markov/db.rb', line 4

def initialize options={}
  @host = options[:host] || "localhost"
  @user = options[:user] || "postgres"
  @port = options[:port] || 5432
  @dbname = options[:dbname] || ""
  @password = options[:password] || ""
  @chunk = options[:chunk] || 4
end

Instance Method Details

#configObject



13
14
15
16
17
18
19
20
21
# File 'lib/markov/db.rb', line 13

def config
  {
    host: @host,
    port: @port,
    user: @user, 
    dbname: @dbname, 
    password: @password
  }
end

#csv_sourcesObject



68
69
70
71
72
# File 'lib/markov/db.rb', line 68

def csv_sources
  @query = "SELECT DISTINCT source
            FROM word_groups"
  connection(@query).values.flatten
end

#import_csv(name, source) ⇒ Object



48
49
50
51
52
53
54
55
56
57
# File 'lib/markov/db.rb', line 48

def import_csv name, source
  begin
    @csv = tmp_csv(name, source)
    @query = "COPY word_groups FROM '#{@dir}/#{name}' DELIMITER ',' CSV"
    connection(@query)
  ensure
    @csv && FileUtils.remove_entry(File.dirname(@csv))
    @word_groups = nil
  end
end

#json_sourcesObject



74
75
76
77
78
# File 'lib/markov/db.rb', line 74

def json_sources
  @query = "SELECT DISTINCT word_groups->'source'
            FROM word_groups_jsonb"
  connection(@query).values.flatten
end

#lookup(word, source) ⇒ Object



59
60
61
62
63
64
65
66
# File 'lib/markov/db.rb', line 59

def lookup word, source
  @query = "SELECT suffix, count(*) AS count
            FROM word_groups 
            WHERE prefix[5] = '#{word}'
            AND source = '#{source}'
            GROUP BY suffix"
  connection(@query).values
end

#split(input) ⇒ Object



27
28
29
30
31
# File 'lib/markov/db.rb', line 27

def split input
  [input].flatten.inject(""){ |r,a|
    r << [CGI.escape(a.split('/')[0]), a.split('/')[1] || "\"\""].join(",")
  } 
end

#tmp_csv(name, source) ⇒ Object



33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/markov/db.rb', line 33

def tmp_csv name, source
  @dir = Dir.mktmpdir
  @path = [@dir, name].join("/")
  @tmp_csv = CSV.open(@path, "wb") do |csv|
    word_groups(source, { tagged: true }).each do |g|
      csv << [
        name,
        "{#{g[:prefix].map { |w| split(w) }.join(",")}}",
        split(g[:suffix])
      ]
    end
  end
  @path
end

#word_groups(source, options = {}) ⇒ Object



23
24
25
# File 'lib/markov/db.rb', line 23

def word_groups source, options={}
  @word_groups ||= Parser.new(source).groups(@chunk, options)
end