Class: Word2vec::Model
- Inherits:
-
Object
- Object
- Word2vec::Model
- Defined in:
- ext/word2vec/word2vec.c
Class Method Summary collapse
-
.build_vocab(rb_train_file_name, rb_vocab_file_name) ⇒ Object
build the vocabubaly file from train file.
-
.load(rb_filename) ⇒ Object
model_load load the vectors.bin file from disc.
-
.tokenize(rb_train_file_name, rb_vocab_file_name, rb_output_file_name) ⇒ Object
tokenize a file.
Instance Method Summary collapse
-
#accuracy(rb_file_name) ⇒ Object
model find the analog word to other three.
-
#analogy(rb_wordx1, rb_wordy1, rb_wordx2) ⇒ Hash<String, Float>
model find the analog word to other three.
-
#distance(rb_word) ⇒ Hash<String, Float>
model find the nearest distance words.
-
#vector_dim ⇒ Integer
model vector dimensionality.
-
#word_count ⇒ Integer
model vocabulary length.
Class Method Details
.build_vocab(rb_train_file_name, rb_vocab_file_name) ⇒ Object
build the vocabubaly file from train file
135 136 137 138 139 140 141 142 |
# File 'ext/word2vec/word2vec.c', line 135
static VALUE build_vocab(VALUE mod, VALUE rb_train_file_name, VALUE rb_vocab_file_name) {
char* train_filename = StringValueCStr(rb_train_file_name);
char* vocab_filename = StringValueCStr(rb_vocab_file_name);
word2vec_build_vocab(train_filename, vocab_filename);
return Qtrue;
}
|
.load(rb_filename) ⇒ Object
model_load load the vectors.bin file from disc
40 41 42 43 44 45 46 47 48 |
# File 'ext/word2vec/word2vec.c', line 40
static VALUE model_load(VALUE mod, VALUE rb_filename) {
word2vec_model* model = ZALLOC(word2vec_model);
char* filename = StringValueCStr(rb_filename);
word2vec_model_load(model, filename);
return Data_Wrap_Struct(mod, NULL, model_deallocate, model);
}
|
.tokenize(rb_train_file_name, rb_vocab_file_name, rb_output_file_name) ⇒ Object
tokenize a file
150 151 152 153 154 155 156 157 158 |
# File 'ext/word2vec/word2vec.c', line 150
static VALUE tokenize(VALUE mod, VALUE rb_train_file_name, VALUE rb_vocab_file_name, VALUE rb_output_file_name) {
char* train_filename = StringValueCStr(rb_train_file_name);
char* vocab_filename = StringValueCStr(rb_vocab_file_name);
char* output_filename = StringValueCStr(rb_output_file_name);
word2vec_tokenize(train_filename, vocab_filename, output_filename);
return Qtrue;
}
|
Instance Method Details
#accuracy(rb_file_name) ⇒ Object
model find the analog word to other three
120 121 122 123 124 125 126 127 128 |
# File 'ext/word2vec/word2vec.c', line 120
static VALUE model_accuracy(VALUE mod, VALUE rb_file_name) {
word2vec_model *model;
Data_Get_Struct(mod, word2vec_model, model);
char* filename = StringValueCStr(rb_file_name);
word2vec_model_accuracy(model, filename);
return Qtrue;
}
|
#analogy(rb_wordx1, rb_wordy1, rb_wordx2) ⇒ Hash<String, Float>
model find the analog word to other three
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
# File 'ext/word2vec/word2vec.c', line 100
static VALUE model_analogy(VALUE mod, VALUE rb_wordx1, VALUE rb_wordy1, VALUE rb_wordx2) {
word2vec_model *model;
Data_Get_Struct(mod, word2vec_model, model);
char* wordx1 = StringValueCStr(rb_wordx1);
char* wordy1 = StringValueCStr(rb_wordy1);
char* wordx2 = StringValueCStr(rb_wordx2);
WordSimilarity word_list[N];
size_t word_count = word2vec_model_analogy(model, wordx1, wordy1, wordx2, word_list);
VALUE rb_ret = wordSimilarotyToHash(model, word_list, word_count);
return rb_ret;
}
|
#distance(rb_word) ⇒ Hash<String, Float>
model find the nearest distance words
79 80 81 82 83 84 85 86 87 88 89 90 91 |
# File 'ext/word2vec/word2vec.c', line 79
static VALUE model_distance(VALUE mod, VALUE rb_word) {
word2vec_model *model;
Data_Get_Struct(mod, word2vec_model, model);
char* word = StringValueCStr(rb_word);
WordSimilarity word_list[N];
size_t word_count = word2vec_model_distance(model, word, word_list);
VALUE rb_ret = wordSimilarotyToHash(model, word_list, word_count);
return rb_ret;
}
|
#vector_dim ⇒ Integer
model vector dimensionality
66 67 68 69 70 71 72 |
# File 'ext/word2vec/word2vec.c', line 66
static VALUE model_vector_dim(VALUE mod) {
word2vec_model *model;
Data_Get_Struct(mod, word2vec_model, model);
return SIZET2NUM(model->vector_dim);
}
|
#word_count ⇒ Integer
model vocabulary length
54 55 56 57 58 59 60 |
# File 'ext/word2vec/word2vec.c', line 54
static VALUE model_word_count(VALUE mod) {
word2vec_model *model;
Data_Get_Struct(mod, word2vec_model, model);
return SIZET2NUM(model->word_count);
}
|