Class: DGaffML::Model
- Inherits:
-
Object
- Object
- DGaffML::Model
- Defined in:
- lib/dgaff_ml/model.rb
Instance Attribute Summary collapse
-
#model ⇒ Object
Returns the value of attribute model.
Instance Method Summary collapse
- #cast_val(value, directive) ⇒ Object
- #clean_str(string) ⇒ Object
- #convert(rows, model_keys, model_classes) ⇒ Object
-
#initialize(client, model_response) ⇒ Model
constructor
A new instance of Model.
- #predict(obs) ⇒ Object
- #translate_obs(obs) ⇒ Object
Constructor Details
#initialize(client, model_response) ⇒ Model
Returns a new instance of Model.
4 5 6 7 8 9 |
# File 'lib/dgaff_ml/model.rb', line 4 def initialize(client, model_response) @client = client @model = model_response @dataset_id = @model["id"] @user_id = @model["user_id"] end |
Instance Attribute Details
#model ⇒ Object
Returns the value of attribute model.
3 4 5 |
# File 'lib/dgaff_ml/model.rb', line 3 def model @model end |
Instance Method Details
#cast_val(value, directive) ⇒ Object
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
# File 'lib/dgaff_ml/model.rb', line 83 def cast_val(value, directive) if directive == "Integer" return value.to_i elsif directive == "Float" return value.to_f elsif directive == "Time" if value.length == 10 and value.scan(/\d/).count == 10 return Time.at(value).to_i elsif value.length == 13 and value.scan(/\d/).count == 13 return Time.at(value).to_i else return Chronic.parse(value).to_i end elsif directive == "Text" or directive == "Phrase" return clean_str(value).split(" ").collect{|word| Stemmer::stem_word(word)} elsif directive == "Categorical" return value end end |
#clean_str(string) ⇒ Object
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
# File 'lib/dgaff_ml/model.rb', line 56 def clean_str(string) string. gsub(/[^A-Za-z0-9(),!?\'\`]/, " "). gsub(" ", " "). gsub("\'s", " \'s"). gsub("", ""). gsub("\'ve", " \'ve"). gsub("n\'t", " n\'t"). gsub("\'re", " \'re"). gsub("\'d", " \'d"). gsub("\'ll", " \'ll"). gsub(",", " , "). gsub("!", " ! "). gsub("\(", " \\( "). gsub("\)", " \\) "). gsub(" \\\( \\\( \\\( ", " \(\(\( "). gsub(" \\\) \\\) \\\) ", " \)\)\) "). gsub("\?", " \? "). gsub(/\s{2,}/, " "). gsub(Regexp.new("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"), "<URL/>"). gsub("www", " "). gsub("com", " "). gsub("org", " "). strip. downcase end |
#convert(rows, model_keys, model_classes) ⇒ Object
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
# File 'lib/dgaff_ml/model.rb', line 29 def convert(rows, model_keys, model_classes) transposed = rows.transpose detexted = [] labels = [] transposed.each_with_index do |col, i| if model_classes[i] == "Phrase" || model_classes[i] == "Text" self.model["conversion_pipeline"][model_keys[i]]["unique_terms"].each do |term| counted = [] col.each do |row| row = [row.to_s] if row.nil? counted << row.count(term) end detexted << counted end elsif model_classes[i] == "Categorical" counted = [] col.each do |val| counted << self.model["conversion_pipeline"][model_keys[i]]["unique_terms"].index(val.to_s) end detexted << counted else detexted << col.collect{|r| r||self.model["conversion_pipeline"][model_keys[i]]["average"]} end end return detexted.transpose end |
#predict(obs) ⇒ Object
11 12 13 |
# File 'lib/dgaff_ml/model.rb', line 11 def predict(obs) @client.predict(@dataset_id, translate_obs(obs)) end |
#translate_obs(obs) ⇒ Object
15 16 17 18 19 20 21 22 23 24 25 26 27 |
# File 'lib/dgaff_ml/model.rb', line 15 def translate_obs(obs) model_keys = self.model["conversion_pipeline"].keys.sort_by(&:to_i) model_classes = model_keys.collect{|k| self.model["col_classes"][k.to_i]} translated_rows = [] obs.each do |row| translated_row = [] row.each_with_index do |el, i| translated_row << cast_val(el, model_classes[i]) end translated_rows << translated_row end self.convert(translated_rows, model_keys, model_classes) end |