Class: Concordancer
- Inherits:
-
Object
- Object
- Concordancer
- Defined in:
- lib/lumix/concordancer.rb
Instance Attribute Summary collapse
-
#db ⇒ Object
readonly
Returns the value of attribute db.
-
#progress_proc ⇒ Object
Returns the value of attribute progress_proc.
-
#tp ⇒ Object
readonly
Returns the value of attribute tp.
Instance Method Summary collapse
- #all ⇒ Object
- #fallback? ⇒ Boolean
- #find(filter) ⇒ Object
- #find_range(t_id, t_begin, t_end) ⇒ Object
- #get_id(file) ⇒ Object
-
#initialize(db_uri, options = {}) ⇒ Concordancer
constructor
A new instance of Concordancer.
- #link(*ids) ⇒ Object
- #link!(*ids) ⇒ Object
- #read(*files) ⇒ Object
- #read_file(file) ⇒ Object
Constructor Details
#initialize(db_uri, options = {}) ⇒ Concordancer
Returns a new instance of Concordancer.
55 56 57 58 59 60 61 62 |
# File 'lib/lumix/concordancer.rb', line 55 def initialize(db_uri, = {}) @progress_proc = [:progress_proc] @db = connect(db_uri) do |db| db.tables.each{ |t| db.drop_table t } if [:recreate] end @ids = db[:texts].map { |v| v[:id] } @tp = TextProcessing.new end |
Instance Attribute Details
#db ⇒ Object (readonly)
Returns the value of attribute db.
52 53 54 |
# File 'lib/lumix/concordancer.rb', line 52 def db @db end |
#progress_proc ⇒ Object
Returns the value of attribute progress_proc.
53 54 55 |
# File 'lib/lumix/concordancer.rb', line 53 def progress_proc @progress_proc end |
#tp ⇒ Object (readonly)
Returns the value of attribute tp.
52 53 54 |
# File 'lib/lumix/concordancer.rb', line 52 def tp @tp end |
Instance Method Details
#all ⇒ Object
101 102 103 |
# File 'lib/lumix/concordancer.rb', line 101 def all db[:texts].select(:id).map{|v| v.values} end |
#fallback? ⇒ Boolean
64 65 66 |
# File 'lib/lumix/concordancer.rb', line 64 def fallback? @fallback end |
#find(filter) ⇒ Object
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
# File 'lib/lumix/concordancer.rb', line 126 def find(filter) texts = db[:texts] prog = Progress.new(:search, texts.count, filter) progress(prog) re = Filter.to_re(filter) index = 0 texts.inject(0) do |result, t| t_id, text, tagged = t[:id], t[:text], t[:tagged] # matches to ranges results = [] tagged.scan(re) do |hit| t_begin = $~.begin(0) t_end = $~.end(0) # TODO decouple database operations for performance results << find_range(t_id, t_begin, t_end) end result += results.inject(0) do |result, f| text_snippet = TextSnippet.new(File.basename(t[:filename]), text, f[:src_begin].to_i, f[:src_end].to_i) tagged_snippet = TextSnippet.new(File.basename(t[:tagged_filename]), tagged, f[:tagged_begin].to_i, f[:tagged_end].to_i) if block_given? yield text_snippet, tagged_snippet else puts text_snippet puts tagged_snippet puts end result += 1 end progress prog, (index += 1) result end end |
#find_range(t_id, t_begin, t_end) ⇒ Object
163 164 165 166 |
# File 'lib/lumix/concordancer.rb', line 163 def find_range(t_id, t_begin, t_end) ds = db[:assoc].filter(:text_id => t_id).filter{tagged_end >= t_begin}.filter{tagged_begin < t_end} ds.select{[{min(:src_begin) => :src_begin},{ max(:src_end) => :src_end}, {min(:tagged_begin) => :tagged_begin}, {max(:tagged_end) => :tagged_end}]}.first end |
#get_id(file) ⇒ Object
68 69 70 71 72 |
# File 'lib/lumix/concordancer.rb', line 68 def get_id(file) text = File.read(file) saved = db[:texts][:digest => text.digest] saved ? saved[:id] : nil end |
#link(*ids) ⇒ Object
111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
# File 'lib/lumix/concordancer.rb', line 111 def link(*ids) ids = all if ids.empty? ids.flatten! prog = Progress.new(:link, ids.size) progress(prog) ids.each_with_index do |id, index| ds = db[:assoc].filter(:text_id => id) yield ds if block_given? link_text(id) if ds.empty? progress(prog, index + 1) end end |
#link!(*ids) ⇒ Object
105 106 107 108 109 |
# File 'lib/lumix/concordancer.rb', line 105 def link!(*ids) link(*ids) do |ds| ds.delete end end |
#read(*files) ⇒ Object
74 75 76 77 78 79 80 81 82 83 |
# File 'lib/lumix/concordancer.rb', line 74 def read(*files) files = tp.to_filelist(files) prog = Progress.new(:read, files.size) puts "Reading #{files.size} files" files.each_with_index do |file, index| tp.read_file(file) progress(prog, index + 1) end link end |
#read_file(file) ⇒ Object
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
# File 'lib/lumix/concordancer.rb', line 85 def read_file(file) # read the raw text text = File.read(file) saved = db[:texts][:digest => text.digest] unless saved # retrieve the tagged version tagged_file = create_tagged_filename(file) process_file(file, tagged_file) unless File.exists?(tagged_file) tagged = retag(File.read(tagged_file)) id = db[:texts].insert(:digest => text.digest, :text => text, :tagged => tagged, :filename => file, :tagged_filename => tagged_file) @ids << id end end |