Module: Eco::Data::FuzzyMatch::StringHelpers

Included in:
ClassMethods
Defined in:
lib/eco/data/fuzzy_match/string_helpers.rb

Instance Method Summary collapse

Instance Method Details

#get_words(str, normalized: false) ⇒ Object



17
18
19
20
21
# File 'lib/eco/data/fuzzy_match/string_helpers.rb', line 17

def get_words(str, normalized: false)
  return [] unless str
  str = normalize_string(str) unless normalized
  str.scan(/[a-zA-Z'-]+/).compact
end

#no_blanks(str) ⇒ Object



61
62
63
64
# File 'lib/eco/data/fuzzy_match/string_helpers.rb', line 61

def no_blanks(str)
  return nil unless str && str.is_a?(String)
  str.tr(' ', '')
end

#normalize_string(value) ⇒ Object

Downcases and trims



6
7
8
9
10
11
12
13
14
15
# File 'lib/eco/data/fuzzy_match/string_helpers.rb', line 6

def normalize_string(value)
  case value
  when Array
    value.map {|val| normalize_string(val)}
  when Symbol
    normalize_string(value.to_sym)
  when String
    value.downcase.strip
  end
end

#remove_matching_words(str1, str2, normalized: false) ⇒ Array<String>

Deletes the words of str1 and str2 that match

Returns:

  • (Array<String>)

    pair of words.



68
69
70
71
72
73
74
75
76
77
# File 'lib/eco/data/fuzzy_match/string_helpers.rb', line 68

def remove_matching_words(str1, str2, normalized: false)
  unless normalized
    str1 = normalize_string(str1)
    str2 = normalize_string(str2)
  end
  return [str1, str2] if !str1 || !str2 || str1.empty? || str2.empty?
  ws1 = get_words(str1)
  ws2 = get_words(str2)
  [(ws1 - ws2).join(" "), (ws2 - ws1).join(" ")]
end

#string_combinations(str, range = 2..3, normalized: false) ⇒ Array<String>

Keeps the start order of the words of the input Array words. It does not keep consecutive words together (it can jump/skip items).

Parameters:

  • str (String)

    the input string with the words.

  • range (Integer, Range) (defaults to: 2..3)

    determine the lenght of the generated values.

Returns:

  • (Array<String>)

    combinations of range length of words



36
37
38
39
# File 'lib/eco/data/fuzzy_match/string_helpers.rb', line 36

def string_combinations(str, range=2..3, normalized: false)
  combinations(get_words(str, normalized: normalized), range)
  .map {|comb| comb.join(' ')}
end

#string_ngrams(str, range = 2..3, normalized: false) ⇒ Array<String>

Keeps the start order of the words and consecutive words together/consecutive.

Parameters:

  • str (String)

    the input string with the words.

  • range (Integer, Range) (defaults to: 2..3)

    determine the lenght of the generated values.

Returns:

  • (Array<String>)

    combinations of range length of words.



27
28
29
# File 'lib/eco/data/fuzzy_match/string_helpers.rb', line 27

def string_ngrams(str, range=2..3, normalized: false)
  ngrams(get_words(str, normalized: normalized), range)
end

#string_permutations(str, range = 2..3, normalized: false) ⇒ Array<String>

It includes combinations that break the initial order of the Array. It does not keep consecutive words together (it can jump/skip items).

Parameters:

  • str (String)

    the input string with the words.

  • range (Integer, Range) (defaults to: 2..3)

    determine the lenght of the generated values.

Returns:

  • (Array<String>)

    permutations of range length of words



46
47
48
49
# File 'lib/eco/data/fuzzy_match/string_helpers.rb', line 46

def string_permutations(str, range=2..3, normalized: false)
  permutations(get_words(str, normalized: normalized), range)
  .map {|comb| comb.join(' ')}
end

#word_ngrams(str, range = 2..3, normalized: false) ⇒ Array<String>

Keeps the start order of the charts and consecutive charts together/consecutive.

Parameters:

  • str (String)

    the input word string.

  • range (Integer, Range) (defaults to: 2..3)

    determine the lenght of the generated values.

Returns:

  • (Array<String>)

    combinations of range length of words.



55
56
57
58
59
# File 'lib/eco/data/fuzzy_match/string_helpers.rb', line 55

def word_ngrams(str, range=2..3, normalized: false)
  str = normalize_string(str) unless normalized
  ngrams(str.to_s.chars, range)
  .map {|comb| no_blanks(comb)}
end