GrnMini
Groonga(Rroonga) wrapper for use as the KVS.
Groonga(Rroonga) for using easily. You can add the data in the column without specifying. You can easy to use and persistence, advanced search query, sort, grouping (drill down), snippets, and pagination. You can make an immediate search engine.
Installation
$ gem install grn_mini
Basic Usage
Create a database with the name "test.db".
require 'grn_mini'
array = GrnMini::Array.new("test.db")
Add the record with the number column and text column.
Determine the column type when you first call "GrnMini::Array#add". (Add inverted index if data type is "string".)
array.add(text: "aaa", number: 1)
It is also possible to use the '<<'
array << {text: "bbb", number: 2}
array << {text: "ccc", number: 3}
array.size #=> 3
Open an existing database
require 'grn_mini'
array = GrnMini::Array.new("test.db")
array.size #=> 3
Create a temporary database. (Useful for testing)
GrnMini::Array.tmpdb do |array|
array << {text: "aaa", number: 1}
array << {text: "bbb", number: 2}
array << {text: "ccc", number: 3}
end
# Delete temporary database
Data Type
GrnMini::Array.tmpdb do |array|
array << {filename: "a.txt", int: 1, float: 1.5, time: Time.at(1999)}
array << {filename: "b.doc", int: 2, float: 2.5, time: Time.at(2000)}
# ShortText
array[1].filename #=> "a.txt"
array[2].filename #=> "b.doc"
# Int32
array[1].int #=> 1
array[2].int #=> 2
# Float
array[1].float #=> 1.5
array[2].float #=> 2.5
# Time
array[1].time #=> 1999-01-01
array[2].time #=> 2000-01-01
end
See also 8.4. Data type — Groonga documentation.
Access Record
Add
require 'grn_mini'
array = GrnMini::Array.new("test2.db")
array << {name:"Tanaka", age: 11, height: 162.5}
array << {name:"Suzuki", age: 31, height: 170.0}
Read
record = array[1] # Read from id (> 0)
record.id #=> 1
Access function with the same name as the column name
record.name #=> "Tanaka
record.age #=> 11
record.height #=> 162.5
Groonga::Record#attributes is useful for debug
record.attributes #=> {"_id"=>1, "age"=>11, "height"=>162.5, "name"=>"Tanaka"}
Update
array[2].name = "Hayashi"
array[2].attributes #=> {"_id"=>2, "age"=>31, "height"=>170.0, "name"=>"Hayashi"}
Delete
Delete by passing id.
array.delete(1)
# It returns 'nil' value when you access a deleted record
array[1].attributes #=> {"_id"=>1, "age"=>0, "height"=>0.0, "name"=>nil}
# Can't see deleted records if access from Enumerable
array.first.id #=> 2
array.first.attributes #=> {"_id"=>2, "age"=>31, "height"=>170.0, "name"=>"Hayashi"}
It is also possible to pass the block.
GrnMini::Array.tmpdb do |array|
array << {name:"Tanaka", age: 11, height: 162.5}
array << {name:"Suzuki", age: 31, height: 170.0}
array << {name:"Hayashi", age: 20, height: 165.0}
array.delete do |record|
record.age <= 20
end
array.size #=> 1
array.first.attributes #=> {"_id"=>2, "age"=>31, "height"=>170.0, "name"=>"Suzuki"}
end
Search
Use GrnMini::Array#select method.
:text
column is set to the :default_column
implicitly.
GrnMini::Array.tmpdb do |array|
array << {text:"aaa", number:1}
array << {text:"bbb", number:20}
array << {text:"bbb ccc", number:2}
array << {text:"bbb", number:15}
array << {text:"ccc", number:3}
results = array.select("aaa")
results.map {|record| record.attributes} #=> [{"_id"=>1, "_key"=>{"_id"=>1, "number"=>1, "text"=>"aaa"}, "_score"=>1}]
# AND
results = array.select("bbb ccc")
results.map {|record| record.attributes} #=> [{"_id"=>2, "_key"=>{"_id"=>3, "number"=>2, "text"=>"bbb ccc"}, "_score"=>2}]
# Specify column
results = array.select("bbb number:<10")
results.map {|record| record.attributes} #=> [{"_id"=>2, "_key"=>{"_id"=>3, "number"=>2, "text"=>"bbb ccc"}, "_score"=>2}]
# AND, OR, Grouping
results = array.select("bbb (number:<= 10 OR number:>=20)")
results.map {|record| record.attributes} #=> [{"_id"=>2, "_key"=>{"_id"=>3, "number"=>2, "text"=>"bbb ccc"}, "_score"=>2}, {"_id"=>4, "_key"=>{"_id"=>2, "number"=>20, "text"=>"bbb"}, "_score"=>2}]
# NOT
results = array.select("bbb - ccc")
results.map {|record| record.attributes} #=> [{"_id"=>1, "_key"=>{"_id"=>2, "number"=>20, "text"=>"bbb"}, "_score"=>1}, {"_id"=>3, "_key"=>{"_id"=>4, "number"=>15, "text"=>"bbb"}, "_score"=>1}]
end
Change :default_column
to :filename
column.
GrnMini::Array.tmpdb do |array|
array << {text: "txt", filename:"a.txt"}
array << {text: "txt", filename:"a.doc"}
array << {text: "txt", filename:"a.rb"}
# Specify column
results = array.select("filename:@txt")
results.first.attributes #=> {"_id"=>1, "_key"=>{"_id"=>1, "filename"=>"a.txt", "text"=>"txt"}, "_score"=>1}
# Change default_column
results = array.select("txt", default_column: "filename")
results.first.attributes #=> {"_id"=>1, "_key"=>{"_id"=>1, "filename"=>"a.txt", "text"=>"txt"}, "_score"=>1}
end
See also 8.10.1. Query syntax, Groonga::Table#select
Sort
Specify column name to sort.
GrnMini::Array.tmpdb do |array|
array << {name:"Tanaka", age: 11, height: 162.5}
array << {name:"Suzuki", age: 31, height: 170.0}
array << {name:"Hayashi", age: 21, height: 175.4}
array << {name:"Suzuki", age: 5, height: 110.0}
sorted = array.sort(["age"])
sorted.map {|r| {name: r.name, age: r.age}}
#=> [{:name=>"Suzuki", :age=> 5},
# {:name=>"Tanaka", :age=>11},
# {:name=>"Hayashi", :age=>21},
# {:name=>"Suzuki", :age=>31}]
end
Combination sort.
sorted = array.sort([{key: "name", order: :ascending},
{key: "age" , order: :descending}])
sorted.map {|r| {name: r.name, age: r.age}}
#=> [{:name=>"Hayashi", :age=>21},
# {:name=>"Suzuki", :age=>31},
# {:name=>"Suzuki", :age=> 5},
# {:name=>"Tanaka", :age=>11}]
Grouping
Drill down aka.
GrnMini::Array.tmpdb do |array|
array << {text:"aaaa.txt", suffix:"txt", type:1}
array << {text:"aaaa.doc", suffix:"doc", type:2}
array << {text:"aabb.txt", suffix:"txt", type:2}
groups = GrnMini::Util::group_with_sort(array, "suffix")
groups.size #=> 2
[groups[0].key, groups[0].n_sub_records] #=> ["txt", 2]
[groups[1].key, groups[1].n_sub_records] #=> ["doc", 1]
end
Grouping from selection results.
GrnMini::Array.tmpdb do |array|
array << {text:"aaaa", suffix:"txt"}
array << {text:"aaaa", suffix:"doc"}
array << {text:"aaaa", suffix:"txt"}
array << {text:"cccc", suffix:"txt"}
results = array.select("aa")
groups = GrnMini::Util::group_with_sort(results, "suffix")
groups.size #=> 2
[groups[0].key, groups[0].n_sub_records] #=> ["txt", 2]
[groups[1].key, groups[1].n_sub_records] #=> ["doc", 1]
end
Snippet
Display of keyword surrounding text. It is often used in search engine.
Use GrnMini::Util::text_snippet_from_selection_results
.
GrnMini::Array.tmpdb do |array|
array << {text: <<EOF, filename: "aaa.txt"}
[1] This is a pen pep pea pek pet.
------------------------------
------------------------------
------------------------------
------------------------------
[2] This is a pen pep pea pek pet.
------------------------------
------------------------------
------------------------------
------------------------------
EOF
results = array.select("This pen")
snippet = GrnMini::Util::text_snippet_from_selection_results(results)
record = results.first
segments = snippet.execute(record.text)
segments.size #=> 2
segments[0] #=> "[1] <<This>> is a <<pen>> pep pea pek pet.\n------------------------------\n------------------------------\n---"
segments[1] #=> "--------\n------------------------------\n[2] <<This>> is a <<pen>> pep pea pek pet.\n-------------------------"
end
GrnMini::Util::html_snippet_from_selection_results
is HTML escaped.
GrnMini::Array.tmpdb do |array|
array << {text: <<EOF, filename: "aaa.txt"}
<html>
<div>This is a pen pep pea pek pet.</div>
</html>
EOF
results = array.select("This pen")
snippet = GrnMini::Util::html_snippet_from_selection_results(results, '<span class="strong">', '</span>') # Default value is '<strong>', '</strong>'
record = results.first
segments = snippet.execute(record.text)
segments.size #=> 1
segments.first #=> "<html>\n <div><span class=\"strong\">This</span> is a <span class=\"strong\">pen</span> pep pea pek pet.</div>\n</html>\n"
end
See also Groonga::Expression#snippet
Pagination
#paginate is more convenient than #sort if you want a pagination.
GrnMini::Array.tmpdb do |array|
array << {text: "aaaa", filename: "1.txt"}
array << {text: "aaaa aaaa", filename: "2a.txt"}
array << {text: "aaaa aaaa aaaa", filename: "3.txt"}
array << {text: "aaaa aaaa", filename: "2b.txt"}
array << {text: "aaaa aaaa", filename: "2c.txt"}
array << {text: "aaaa aaaa", filename: "2d.txt"}
array << {text: "aaaa aaaa", filename: "2e.txt"}
array << {text: "aaaa aaaa", filename: "2f.txt"}
results = array.select("aaaa")
# -- page1 --
page_entries = results.paginate([["_score", :desc]], :page => 1, :size => 5)
# Total number of record
page_entries.n_records #=> 8
# Page offset
page_entries.start_offset #=> 1
page_entries.end_offset #=> 5
# Page entries
page_entries.size #=> 5
# -- page2 --
page_entries = results.paginate([["_score", :desc]], :page => 2, :size => 5)
# Sample page content display
puts "#{page_entries.n_records} hit. (#{page_entries.start_offset} - #{page_entries.end_offset})"
page_entries.each do |record|
puts "#{record.filename}: #{record.text}"
end
#=> 8 hit. (6 - 8)
# 2b.txt: aaaa aaaa
# 2f.txt: aaaa aaaa
# 1.txt: aaaa
end
See also Groonga::Table#pagenate