Class: Utility::Elasticsearch::Index::TextAnalysisSettings

Inherits:
Object
  • Object
show all
Defined in:
lib/utility/elasticsearch/index/text_analysis_settings.rb

Defined Under Namespace

Classes: UnsupportedLanguageCode

Constant Summary collapse

DEFAULT_LANGUAGE =
:en
FRONT_NGRAM_MAX_GRAM =
12
LANGUAGE_DATA_FILE_PATH =
File.join(File.dirname(__FILE__), 'language_data.yml')
GENERIC_FILTERS =
{
  front_ngram: {
    type: 'edge_ngram',
    min_gram: 1,
    max_gram: FRONT_NGRAM_MAX_GRAM
  },
  delimiter: {
    type: 'word_delimiter_graph',
    generate_word_parts: true,
    generate_number_parts: true,
    catenate_words: true,
    catenate_numbers: true,
    catenate_all: true,
    preserve_original: false,
    split_on_case_change: true,
    split_on_numerics: true,
    stem_english_possessive: true
  },
  bigram_joiner: {
    type: 'shingle',
    token_separator: '',
    max_shingle_size: 2,
    output_unigrams: false
  },
  bigram_joiner_unigrams: {
    type: 'shingle',
    token_separator: '',
    max_shingle_size: 2,
    output_unigrams: true
  },
  bigram_max_size: {
    type: 'length',
    min: 0,
    max: 16
  }
}.freeze
NON_ICU_ANALYSIS_SETTINGS =
{
  tokenizer_name: 'standard', folding_filters: %w(cjk_width lowercase asciifolding)
}.freeze
ICU_ANALYSIS_SETTINGS =
{
  tokenizer_name: 'icu_tokenizer', folding_filters: %w(icu_folding)
}.freeze

Instance Method Summary collapse

Constructor Details

#initialize(language_code: nil, analysis_icu: false) ⇒ TextAnalysisSettings

Returns a new instance of TextAnalysisSettings.



66
67
68
69
70
71
72
73
# File 'lib/utility/elasticsearch/index/text_analysis_settings.rb', line 66

def initialize(language_code: nil, analysis_icu: false)
  @language_code = (language_code || DEFAULT_LANGUAGE).to_sym

  raise UnsupportedLanguageCode, "Language '#{language_code}' is not supported" unless language_data[@language_code]

  @analysis_icu = analysis_icu
  @analysis_settings = icu_settings(analysis_icu)
end

Instance Method Details

#to_hObject



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/utility/elasticsearch/index/text_analysis_settings.rb', line 75

def to_h
  {
    analysis: {
      analyzer: analyzer_definitions,
      filter: filter_definitions
    },
    index: {
      similarity: {
        default: {
          type: 'BM25'
        }
      }
    }
  }
end