Class: Concordancer

Inherits:
Object
  • Object
show all
Defined in:
lib/lumix/concordancer.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(db_uri, options = {}) ⇒ Concordancer

Returns a new instance of Concordancer.



55
56
57
58
59
60
61
62
# File 'lib/lumix/concordancer.rb', line 55

def initialize(db_uri, options = {})
  @progress_proc = options[:progress_proc]
  @db = connect(db_uri) do |db|
    db.tables.each{ |t| db.drop_table t } if options[:recreate]
  end
  @ids = db[:texts].map { |v| v[:id] }
  @tp = TextProcessing.new
end

Instance Attribute Details

#dbObject (readonly)

Returns the value of attribute db.



52
53
54
# File 'lib/lumix/concordancer.rb', line 52

def db
  @db
end

#progress_procObject

Returns the value of attribute progress_proc.



53
54
55
# File 'lib/lumix/concordancer.rb', line 53

def progress_proc
  @progress_proc
end

#tpObject (readonly)

Returns the value of attribute tp.



52
53
54
# File 'lib/lumix/concordancer.rb', line 52

def tp
  @tp
end

Instance Method Details

#allObject



101
102
103
# File 'lib/lumix/concordancer.rb', line 101

def all
  db[:texts].select(:id).map{|v| v.values}
end

#fallback?Boolean

Returns:

  • (Boolean)


64
65
66
# File 'lib/lumix/concordancer.rb', line 64

def fallback?
  @fallback
end

#find(filter) ⇒ Object



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/lumix/concordancer.rb', line 126

def find(filter)
  texts = db[:texts]
  prog = Progress.new(:search, texts.count, filter)
  progress(prog)

  re = Filter.to_re(filter)

  index = 0
  texts.inject(0) do |result, t|
    t_id, text, tagged = t[:id], t[:text], t[:tagged]

    # matches to ranges
    results = []
    tagged.scan(re) do |hit|
      t_begin = $~.begin(0)
      t_end = $~.end(0)
      # TODO decouple database operations for performance
      results << find_range(t_id, t_begin, t_end)
    end

    result += results.inject(0) do |result, f|
      text_snippet = TextSnippet.new(File.basename(t[:filename]), text, f[:src_begin].to_i, f[:src_end].to_i)
      tagged_snippet = TextSnippet.new(File.basename(t[:tagged_filename]), tagged, f[:tagged_begin].to_i, f[:tagged_end].to_i)
      if block_given?
        yield text_snippet, tagged_snippet
      else
        puts text_snippet
        puts tagged_snippet
        puts
      end
      result += 1
    end
    progress prog, (index += 1)
    result
  end
end

#find_range(t_id, t_begin, t_end) ⇒ Object



163
164
165
166
# File 'lib/lumix/concordancer.rb', line 163

def find_range(t_id, t_begin, t_end)
  ds = db[:assoc].filter(:text_id => t_id).filter{tagged_end >= t_begin}.filter{tagged_begin < t_end}
  ds.select{[{min(:src_begin) => :src_begin},{ max(:src_end) => :src_end}, {min(:tagged_begin) => :tagged_begin}, {max(:tagged_end) => :tagged_end}]}.first
end

#get_id(file) ⇒ Object



68
69
70
71
72
# File 'lib/lumix/concordancer.rb', line 68

def get_id(file)
  text = File.read(file)
  saved = db[:texts][:digest => text.digest]
  saved ? saved[:id] : nil
end


111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/lumix/concordancer.rb', line 111

def link(*ids)
  ids = all if ids.empty?
  ids.flatten!
  prog = Progress.new(:link, ids.size)
  progress(prog)

  ids.each_with_index do |id, index|
    ds = db[:assoc].filter(:text_id => id)
    yield ds if block_given?

    link_text(id) if ds.empty?
    progress(prog, index + 1)
  end
end

#link!(*ids) ⇒ Object



105
106
107
108
109
# File 'lib/lumix/concordancer.rb', line 105

def link!(*ids)
  link(*ids) do |ds|
    ds.delete
  end
end

#read(*files) ⇒ Object



74
75
76
77
78
79
80
81
82
83
# File 'lib/lumix/concordancer.rb', line 74

def read(*files)
  files = tp.to_filelist(files)
  prog = Progress.new(:read, files.size)
  puts "Reading #{files.size} files"
  files.each_with_index do |file, index|
    tp.read_file(file)
    progress(prog, index + 1)
  end
  link
end

#read_file(file) ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/lumix/concordancer.rb', line 85

def read_file(file)
  # read the raw text
  text = File.read(file)
  saved = db[:texts][:digest => text.digest]

  unless saved
    # retrieve the tagged version
    tagged_file = create_tagged_filename(file)
    process_file(file, tagged_file) unless File.exists?(tagged_file)

    tagged = retag(File.read(tagged_file))
    id = db[:texts].insert(:digest => text.digest, :text => text, :tagged => tagged, :filename => file, :tagged_filename => tagged_file)
    @ids << id
  end
end