Class: Lumix::Concordancer
- Inherits:
-
Object
- Object
- Lumix::Concordancer
- Defined in:
- lib/lumix/concordancer.rb
Instance Attribute Summary collapse
-
#db ⇒ Object
readonly
Returns the value of attribute db.
-
#link_on_import ⇒ Object
writeonly
Sets the attribute link_on_import.
-
#progress_proc ⇒ Object
Returns the value of attribute progress_proc.
-
#tp ⇒ Object
readonly
Returns the value of attribute tp.
Instance Method Summary collapse
- #all ⇒ Object
- #correct(*ids) ⇒ Object
- #create_filter(f, &block) ⇒ Object
- #create_link_pool ⇒ Object
- #find(filters) ⇒ Object
- #get_id(file) ⇒ Object
-
#initialize(db_uri, options = {}) ⇒ Concordancer
constructor
A new instance of Concordancer.
- #link(*ids) ⇒ Object
- #link!(*ids) ⇒ Object
- #link_on_import! ⇒ Object
- #link_on_import? ⇒ Boolean
- #read(*files) ⇒ Object
- #read_file(file) ⇒ Object
- #simulate! ⇒ Object
- #strategy ⇒ Object
Constructor Details
#initialize(db_uri, options = {}) ⇒ Concordancer
Returns a new instance of Concordancer.
44 45 46 47 48 49 50 51 52 53 54 |
# File 'lib/lumix/concordancer.rb', line 44 def initialize(db_uri, = {}) @progress_proc = [:progress_proc] @db = connect(db_uri) if [:recreate] db.tables.each{ |t| db.drop_table t } migrate(db) end @ids = all @tp = TextProcessing.new end |
Instance Attribute Details
#db ⇒ Object (readonly)
Returns the value of attribute db.
40 41 42 |
# File 'lib/lumix/concordancer.rb', line 40 def db @db end |
#link_on_import=(value) ⇒ Object (writeonly)
Sets the attribute link_on_import
42 43 44 |
# File 'lib/lumix/concordancer.rb', line 42 def link_on_import=(value) @link_on_import = value end |
#progress_proc ⇒ Object
Returns the value of attribute progress_proc.
41 42 43 |
# File 'lib/lumix/concordancer.rb', line 41 def progress_proc @progress_proc end |
#tp ⇒ Object (readonly)
Returns the value of attribute tp.
40 41 42 |
# File 'lib/lumix/concordancer.rb', line 40 def tp @tp end |
Instance Method Details
#all ⇒ Object
164 165 166 |
# File 'lib/lumix/concordancer.rb', line 164 def all TaggedText.ids end |
#correct(*ids) ⇒ Object
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
# File 'lib/lumix/concordancer.rb', line 143 def correct(*ids) ids = all if ids.empty? ids.flatten.each do |id| id = id.to_i d = TaggedText[id] next unless d file = d.filename text = File.read(file).to_utf d.text = text expected = text.digest if d.digest != expected puts "Correcting text #{file}" d.digest = expected end d.save end end |
#create_filter(f, &block) ⇒ Object
198 199 200 |
# File 'lib/lumix/concordancer.rb', line 198 def create_filter(f, &block) strategy.create_filter(f, &block) end |
#create_link_pool ⇒ Object
60 61 62 |
# File 'lib/lumix/concordancer.rb', line 60 def create_link_pool Pool.new(strategy.concurrent_link? ? 4 : 1) end |
#find(filters) ⇒ Object
202 203 204 |
# File 'lib/lumix/concordancer.rb', line 202 def find(filters) strategy.find(filters) end |
#get_id(file) ⇒ Object
72 73 74 75 76 |
# File 'lib/lumix/concordancer.rb', line 72 def get_id(file) text = File.read(file).to_utf saved = TaggedText[:digest => text.digest] saved ? saved.id : nil end |
#link(*ids) ⇒ Object
178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
# File 'lib/lumix/concordancer.rb', line 178 def link(*ids) ids = all if ids.empty? ids.flatten! prog = Progress.new(:link, ids.size) progress(prog) p = create_link_pool ids.each_with_index do |id, index| #ds = db[:assoc].filter(:text_id => id) #yield ds if block_given? # TODO implement force p.schedule do strategy.link_text(id) #if ds.empty? progress(prog, index + 1) end end p.shutdown end |
#link!(*ids) ⇒ Object
172 173 174 175 176 |
# File 'lib/lumix/concordancer.rb', line 172 def link!(*ids) link(*ids) do |ds| ds.delete end end |
#link_on_import! ⇒ Object
68 69 70 |
# File 'lib/lumix/concordancer.rb', line 68 def link_on_import! @link_on_import = true end |
#link_on_import? ⇒ Boolean
64 65 66 |
# File 'lib/lumix/concordancer.rb', line 64 def link_on_import? @link_on_import end |
#read(*files) ⇒ Object
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
# File 'lib/lumix/concordancer.rb', line 78 def read(*files) files = tp.to_filelist(*files) prog = Progress.new(:read, files.size) puts "Reading #{files.size} files" @unprocessed = if File.exists?('unprocessed.lst') File.readlines('unprocessed.lst').map(&:chomp) else [] end File.open('unprocessed.lst', 'a') do |up| l = create_link_pool p = Pool.new(WORKERS) l.schedule{ link! } if RELINK files.each_with_index do |file, index| if @unprocessed.member?(file) puts "Ignoring #{file}" next end p.schedule do begin id = read_file(file) l.schedule { link id } if id and link_on_import? rescue puts "Error on file #{file}: #{$!}", $!.backtrace @unprocessed << file up.puts file end progress(prog, index + 1) end end l.schedule { link } if link_on_import? # make sure everything is linked p.shutdown l.shutdown end end |
#read_file(file) ⇒ Object
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
# File 'lib/lumix/concordancer.rb', line 117 def read_file(file) text = File.read(file).to_utf saved = TaggedText.exists?(:filename => file, :digest => text.digest) unless saved puts "Reading file #{file}" # retrieve the tagged version tagged_file = tp.create_tagged_filename(file) tagged = if File.exists?(tagged_file) File.read(tagged_file) else tagged = tp.process(text) File.open(tagged_file, 'w') do |out| out.write tagged end tagged end retagged = retag(tagged) tt = TaggedText.create(:digest => text.digest, :text => text, :tagged => retagged, :filename => file, :tagged_filename => tagged_file) @ids << tt.id yield tt if block_given? tt end end |
#simulate! ⇒ Object
168 169 170 |
# File 'lib/lumix/concordancer.rb', line 168 def simulate! strategy.simulate! end |
#strategy ⇒ Object
56 57 58 |
# File 'lib/lumix/concordancer.rb', line 56 def strategy @strategy ||= SearchStrategy.new(@db, @progress_proc) end |