Class: RDig::Search::Searcher

Inherits:
Object
  • Object
show all
Includes:
Ferret::Search
Defined in:
lib/rdig/search.rb

Overview

This class is used to search the index. Call RDig::searcher to retrieve an instance ready for use.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(settings) ⇒ Searcher

takes the index section of the rdig configuration as a parameter.



13
14
15
16
17
# File 'lib/rdig/search.rb', line 13

def initialize(settings)
  @ferret_config = settings
  @query_parser = Ferret::QueryParser.new(settings.marshal_dump)
  ferret_searcher
end

Instance Attribute Details

#query_parserObject (readonly)

the query parser used to parse query strings



10
11
12
# File 'lib/rdig/search.rb', line 10

def query_parser
  @query_parser
end

Instance Method Details

#build_extract(data) ⇒ Object



96
97
98
# File 'lib/rdig/search.rb', line 96

def build_extract(data)
  (data && data.length > 200) ? data[0..200] : data
end

#ferret_searcherObject

returns the Ferret::Search::IndexSearcher instance used internally.



20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/rdig/search.rb', line 20

def ferret_searcher
  if @ferret_searcher and !@ferret_searcher.reader.latest?
    # reopen searcher
    @ferret_searcher.close
    @ferret_searcher = nil
  end
  unless @ferret_searcher
    @ferret_searcher = Ferret::Search::Searcher.new(@ferret_config.path)
    @query_parser.fields = @ferret_searcher.reader.field_names.to_a
  end
  @ferret_searcher
end

#get_maximum_score(query, options) ⇒ Object



33
34
35
36
37
38
# File 'lib/rdig/search.rb', line 33

def get_maximum_score(query, options)
  ferret_searcher.search_each(query, options.merge(:limit => 1, :offset => 0)) do |doc_id, score|
    return score
  end
  0
end

#process_query(query) ⇒ Object



91
92
93
94
# File 'lib/rdig/search.rb', line 91

def process_query(query)
  query = query_parser.parse(query) if query.is_a?(String)
  return query
end

#search(query, options = {}) ⇒ Object

run a search.

query usually will be a user-entered string. See the Ferret query language for more information on queries. A Ferret::Search::Query instance may be given, too.

Some of the more often used otions are:

offset

first document in result list to retrieve (0-based). The default is 0.

limit

number of documents to retrieve. The default is 10.

highlight

hash to configure excerpt highlighting, e.g.

:highlight => { :pre_tag => '<b>',
                :post_tag => '</b>',
                :ellipsis => '&hellip;',
                :excerpt_length => 50,
                :num_excerpts => 3 }

You may just set :highlight => true to go with the defaults, or use a hash to override those default values.

Please see the Ferret::Search::Searcher API for more options.



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/rdig/search.rb', line 60

def search(query, options={})
  result = {}
  query = process_query query
  RDig.logger.info "Query: #{query}"
  results = []
  searcher = ferret_searcher
  maximum_score = get_maximum_score query, options
  result[:hitcount] = searcher.search_each(query, options) do |doc_id, score|
    doc = searcher[doc_id]
    results << { :score => score,
                 :title => doc[:title],
                 :url => doc[:url],
                 :extract => build_extract(doc[:data]),
                 :relative_score => (score / maximum_score),
                 :doc_id => doc_id
               }
  end
  if highlight_opts = options[:highlight]
    highlight_opts = { :pre_tag => '<b>',
                       :post_tag => '</b>',
                       :ellipsis => '&hellip;',
                       :excerpt_length => 50,
                       :num_excerpts => 3 }.merge(Hash === highlight_opts ? highlight_opts : {})
    results.each do |r|
      r[:extract] = searcher.highlight(query, r[:doc_id], :data, highlight_opts)
    end
  end
  result[:list] = results
  result
end