Module: Linguakit
- Defined in:
- lib/linguakit_ruby.rb
Constant Summary collapse
- DEFAULT_COMMAND =
"linguakit %{module} %{lang} %{input}"
- DEFAULT_COMMAND_STR =
"linguakit %{module} %{lang} '%{input}' %{options}"
Class Method Summary collapse
- .get_phrases(item) ⇒ Object
- .get_score(principal_items, secondary_items, **args) ⇒ Object
- .item_config(item) ⇒ Object
- .items_to_array(items) ⇒ Object
- .keyphrases(input, **args) ⇒ Object
- .keyword(input, **args) ⇒ Object
- .sentiment(input, **args) ⇒ Object
- .str_to_file(str) ⇒ Object
Class Method Details
.get_phrases(item) ⇒ Object
90 91 92 93 94 95 96 97 |
# File 'lib/linguakit_ruby.rb', line 90 def get_phrases item case item_config(item)[:type] when :str items_to_array keyword(item[:data]) when :arr item_config(item)[:data] end end |
.get_score(principal_items, secondary_items, **args) ⇒ Object
99 100 101 102 103 104 105 106 107 108 |
# File 'lib/linguakit_ruby.rb', line 99 def get_score(principal_items, secondary_items, **args) = { score: args[:score] || 0.8 } principal_phrases = get_phrases principal_items secondary_phrases = get_phrases secondary_items final_score = secondary_phrases.map{ |phrase| match = FuzzyMatch.new(principal_phrases).find(phrase, {find_with_score: true}) match[1] if match && match[1] >= [:score] }.reject(&:nil?).sum (final_score * 100) / principal_phrases.length end |
.item_config(item) ⇒ Object
83 84 85 86 87 88 |
# File 'lib/linguakit_ruby.rb', line 83 def item_config item { data: item[:data] || "", type: item[:type] || :str } end |
.items_to_array(items) ⇒ Object
72 73 74 |
# File 'lib/linguakit_ruby.rb', line 72 def items_to_array items items.map{|item| item[:phrase]} end |
.keyphrases(input, **args) ⇒ Object
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/linguakit_ruby.rb', line 29 def keyphrases input, **args # -s = input is a string and not a file # -chi = chi-square co-occurrence measure # -log = loglikelihood # -scp = symmetrical conditional probability # -mi = mutual information # -cooc = co-occurrence counting config = { module: 'mwe', input: str_to_file(input), lang: args[:lang] || 'es', options: args[:opts] || '-chi' } result = Open3.capture3 DEFAULT_COMMAND % config items = result[0].split("\n") items.map{|item| object = item.split("\t") { phrase: object[0], rank: object[1].to_f, composition: object[2] } } end |
.keyword(input, **args) ⇒ Object
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/linguakit_ruby.rb', line 54 def keyword input, **args config = { module: 'key', input: str_to_file(input), lang: args[:lang] || 'es' } result = Open3.capture3 DEFAULT_COMMAND % config items = result[0].split("\n") items.map{|item| object = item.split("\t") { phrase: object[0], rank: object[1].to_f, composition: object[2] } } end |
.sentiment(input, **args) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
# File 'lib/linguakit_ruby.rb', line 13 def sentiment input, **args # -s = input is a string and not a file config = { module: 'sent', input: input, lang: args[:lang] || 'es', options: args[:opts] } command = args[:opts] == '-s' ? DEFAULT_COMMAND_STR : DEFAULT_COMMAND result = Open3.capture3 command % config { emotion: result[0].split("\t")[1], point: result[0].split("\t")[2].split("\n")[0].to_f } end |
.str_to_file(str) ⇒ Object
76 77 78 79 80 81 |
# File 'lib/linguakit_ruby.rb', line 76 def str_to_file str file = Tempfile.new(['data', '.txt'], "#{ Dir.pwd}/tmp", encoding: 'utf-8') file.write str file.close file.path end |