Module: Linguakit

Defined in:
lib/linguakit_ruby.rb

Constant Summary collapse

DEFAULT_COMMAND =
"linguakit %{module} %{lang} %{input}"
DEFAULT_COMMAND_STR =
"linguakit %{module} %{lang} '%{input}' %{options}"

Class Method Summary collapse

Class Method Details

.get_phrases(item) ⇒ Object



90
91
92
93
94
95
96
97
# File 'lib/linguakit_ruby.rb', line 90

def get_phrases item
  case item_config(item)[:type]
  when :str
    items_to_array keyword(item[:data])
  when :arr
    item_config(item)[:data]
  end
end

.get_score(principal_items, secondary_items, **args) ⇒ Object



99
100
101
102
103
104
105
106
107
108
# File 'lib/linguakit_ruby.rb', line 99

def get_score(principal_items, secondary_items, **args)
  _options = { score: args[:score] || 0.8 }
  principal_phrases = get_phrases principal_items
  secondary_phrases = get_phrases secondary_items
  final_score = secondary_phrases.map{ |phrase|
    match = FuzzyMatch.new(principal_phrases).find(phrase, {find_with_score: true})
    match[1] if match && match[1] >= _options[:score]
  }.reject(&:nil?).sum
  (final_score * 100) / principal_phrases.length
end

.item_config(item) ⇒ Object



83
84
85
86
87
88
# File 'lib/linguakit_ruby.rb', line 83

def item_config item
  {
    data: item[:data] || "",
    type: item[:type] || :str
  }
end

.items_to_array(items) ⇒ Object



72
73
74
# File 'lib/linguakit_ruby.rb', line 72

def items_to_array items
  items.map{|item| item[:phrase]}
end

.keyphrases(input, **args) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/linguakit_ruby.rb', line 29

def keyphrases input, **args
  # -s         = input is a string and not a file
  # -chi       = chi-square co-occurrence measure
  # -log       = loglikelihood
  # -scp       = symmetrical conditional probability
  # -mi        = mutual information
  # -cooc      = co-occurrence counting
  config = {
    module: 'mwe',
    input: str_to_file(input),
    lang: args[:lang] || 'es',
    options: args[:opts] || '-chi'
  }
  result = Open3.capture3 DEFAULT_COMMAND % config
  items = result[0].split("\n")
  items.map{|item|
    object = item.split("\t")
    {
      phrase: object[0],
      rank: object[1].to_f,
      composition: object[2]
    }
  }
end

.keyword(input, **args) ⇒ Object



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/linguakit_ruby.rb', line 54

def keyword input, **args
  config = {
    module: 'key',
    input: str_to_file(input),
    lang: args[:lang] || 'es'
  }
  result = Open3.capture3 DEFAULT_COMMAND % config
  items = result[0].split("\n")
  items.map{|item|
    object = item.split("\t")
    {
      phrase: object[0],
      rank: object[1].to_f,
      composition: object[2]
    }
  }
end

.sentiment(input, **args) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/linguakit_ruby.rb', line 13

def sentiment input, **args
  # -s         = input is a string and not a file
  config = {
    module: 'sent',
    input: input,
    lang: args[:lang] || 'es',
    options: args[:opts]
  }
  command = args[:opts] == '-s' ? DEFAULT_COMMAND_STR : DEFAULT_COMMAND
  result = Open3.capture3 command % config
  {
    emotion: result[0].split("\t")[1],
    point: result[0].split("\t")[2].split("\n")[0].to_f
  }
end

.str_to_file(str) ⇒ Object



76
77
78
79
80
81
# File 'lib/linguakit_ruby.rb', line 76

def str_to_file str
  file = Tempfile.new(['data', '.txt'], "#{ Dir.pwd}/tmp", encoding: 'utf-8')
  file.write str
  file.close
  file.path
end