Class: Lumix::Concordancer

Inherits:
Object
  • Object
show all
Defined in:
lib/lumix/concordancer.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(db_uri, options = {}) ⇒ Concordancer

Returns a new instance of Concordancer.



44
45
46
47
48
49
50
51
52
53
54
# File 'lib/lumix/concordancer.rb', line 44

def initialize(db_uri, options = {})
  @progress_proc = options[:progress_proc]
  @db = connect(db_uri)
  if options[:recreate]
    db.tables.each{ |t| db.drop_table t }
    migrate(db)
  end

  @ids = all
  @tp = TextProcessing.new
end

Instance Attribute Details

#dbObject (readonly)

Returns the value of attribute db.



40
41
42
# File 'lib/lumix/concordancer.rb', line 40

def db
  @db
end

Sets the attribute link_on_import

Parameters:

  • value

    the value to set the attribute link_on_import to.



42
43
44
# File 'lib/lumix/concordancer.rb', line 42

def link_on_import=(value)
  @link_on_import = value
end

#progress_procObject

Returns the value of attribute progress_proc.



41
42
43
# File 'lib/lumix/concordancer.rb', line 41

def progress_proc
  @progress_proc
end

#tpObject (readonly)

Returns the value of attribute tp.



40
41
42
# File 'lib/lumix/concordancer.rb', line 40

def tp
  @tp
end

Instance Method Details

#allObject



164
165
166
# File 'lib/lumix/concordancer.rb', line 164

def all
  TaggedText.ids
end

#correct(*ids) ⇒ Object



143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/lumix/concordancer.rb', line 143

def correct(*ids)
  ids = all if ids.empty?
  ids.flatten.each do |id|
    id = id.to_i
    d = TaggedText[id]
    next unless d

    file = d.filename

    text = File.read(file).to_utf
    d.text = text

    expected = text.digest
    if d.digest != expected
      puts "Correcting text #{file}"
      d.digest = expected
    end
    d.save
  end
end

#create_filter(f, &block) ⇒ Object



198
199
200
# File 'lib/lumix/concordancer.rb', line 198

def create_filter(f, &block)
  strategy.create_filter(f, &block)
end


60
61
62
# File 'lib/lumix/concordancer.rb', line 60

def create_link_pool
  Pool.new(strategy.concurrent_link? ? 4 : 1)
end

#find(filters) ⇒ Object



202
203
204
# File 'lib/lumix/concordancer.rb', line 202

def find(filters)
  strategy.find(filters)
end

#get_id(file) ⇒ Object



72
73
74
75
76
# File 'lib/lumix/concordancer.rb', line 72

def get_id(file)
  text = File.read(file).to_utf
  saved = TaggedText[:digest => text.digest]
  saved ? saved.id : nil
end


178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/lumix/concordancer.rb', line 178

def link(*ids)
  ids = all if ids.empty?
  ids.flatten!
  prog = Progress.new(:link, ids.size)
  progress(prog)

  p = create_link_pool
  ids.each_with_index do |id, index|
    #ds = db[:assoc].filter(:text_id => id)
    #yield ds if block_given?

    # TODO implement force
    p.schedule do
      strategy.link_text(id) #if ds.empty?
      progress(prog, index + 1)
    end
  end
  p.shutdown
end

#link!(*ids) ⇒ Object



172
173
174
175
176
# File 'lib/lumix/concordancer.rb', line 172

def link!(*ids)
  link(*ids) do |ds|
    ds.delete
  end
end


68
69
70
# File 'lib/lumix/concordancer.rb', line 68

def link_on_import!
  @link_on_import = true
end

Returns:

  • (Boolean)


64
65
66
# File 'lib/lumix/concordancer.rb', line 64

def link_on_import?
  @link_on_import
end

#read(*files) ⇒ Object



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# File 'lib/lumix/concordancer.rb', line 78

def read(*files)
  files = tp.to_filelist(*files)
  prog = Progress.new(:read, files.size)
  puts "Reading #{files.size} files"
  @unprocessed = if File.exists?('unprocessed.lst')
    File.readlines('unprocessed.lst').map(&:chomp)
  else
    []
  end

  File.open('unprocessed.lst', 'a') do |up|
    l = create_link_pool
    p = Pool.new(WORKERS)

    l.schedule{ link! } if RELINK

    files.each_with_index do |file, index|
      if @unprocessed.member?(file)
        puts "Ignoring #{file}"
        next
      end
      p.schedule do
        begin
          id = read_file(file)
          l.schedule { link id } if id and link_on_import?
        rescue
          puts "Error on file #{file}: #{$!}", $!.backtrace
          @unprocessed << file
          up.puts file
        end
        progress(prog, index + 1)
      end
    end
    l.schedule { link } if link_on_import? # make sure everything is linked
    p.shutdown
    l.shutdown
  end
end

#read_file(file) ⇒ Object



117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'lib/lumix/concordancer.rb', line 117

def read_file(file)
  text = File.read(file).to_utf
  saved = TaggedText.exists?(:filename => file, :digest => text.digest)

  unless saved
    puts "Reading file #{file}"
    # retrieve the tagged version
    tagged_file = tp.create_tagged_filename(file)
    tagged = if File.exists?(tagged_file)
      File.read(tagged_file)
    else
      tagged = tp.process(text)
      File.open(tagged_file, 'w') do |out|
        out.write tagged
      end
      tagged
    end

    retagged = retag(tagged)
    tt = TaggedText.create(:digest => text.digest, :text => text, :tagged => retagged, :filename => file, :tagged_filename => tagged_file)
    @ids << tt.id
    yield tt if block_given?
    tt
  end
end

#simulate!Object



168
169
170
# File 'lib/lumix/concordancer.rb', line 168

def simulate!
  strategy.simulate!
end

#strategyObject



56
57
58
# File 'lib/lumix/concordancer.rb', line 56

def strategy
  @strategy ||= SearchStrategy.new(@db, @progress_proc)
end