Class: SVMLight::Model
- Inherits:
-
Object
- Object
- SVMLight::Model
- Defined in:
- lib/svmredlight/model.rb,
ext/svmredlight.c
Overview
A model is the product of training a SVM, once created it can take documents as inputs and act of them (by for instance classifying them). Models can also be read from files created by svm_learn.
Constant Summary collapse
- TYPES =
[:classification]
Class Method Summary collapse
-
.from_file(filename) ⇒ Object
Read a svm_light model from a file generated by svm_learn receives the filename as argument do make sure the file exists before calling this! otherwise exit(1) might be called and the ruby interpreter will die.
-
.learn_classification(r_docs_and_classes, learn_params, kernel_params, use_cache, alpha) ⇒ Object
If no linear.
-
.new(type, documents_and_lables, learn_params, kernel_params, alphas = nil) ⇒ Object
Learns a model from a set of labeled documents.
-
.read_from_file(pahtofile) ⇒ Object
Will load an existent model from a file.
Instance Method Summary collapse
-
#classify(example) ⇒ Object
Classify, takes an example (instance of Document) and returns its classification.
- #maxdiff ⇒ Object
- #support_vectors_count ⇒ Object
- #to_file(pahtofile) ⇒ Object
- #total_words ⇒ Object
- #totdoc ⇒ Object
-
#write_to_file(pahtofile) ⇒ Object
Will create a file containing the model info, the model info can be turn back into a model by using Model.read_from_file.
Class Method Details
.from_file(filename) ⇒ Object
Read a svm_light model from a file generated by svm_learn receives the filename as argument do make sure the file exists before calling this! otherwise exit(1) might be called and the ruby interpreter will die.
36 37 38 39 40 41 42 43 44 45 46 47 |
# File 'ext/svmredlight.c', line 36
static VALUE
model_read_from_file(VALUE klass, VALUE filename){
Check_Type(filename, T_STRING);
MODEL *m;
m = read_model(StringValuePtr(filename));
if(is_linear(m))
add_weight_vector_to_linear_model(m);
return Data_Wrap_Struct(klass, 0, model_free, m);
}
|
.learn_classification(r_docs_and_classes, learn_params, kernel_params, use_cache, alpha) ⇒ Object
If no linear
485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 |
# File 'ext/svmredlight.c', line 485
static VALUE
model_learn_classification(VALUE klass,
VALUE r_docs_and_classes, // Docs + labels array of arrays
VALUE learn_params, // Options hash with learning options
VALUE kernel_params, // Options hash with kernel options
VALUE use_cache, // If no linear
VALUE alpha
){
int i;
double *labels = NULL, *alpha_in = NULL;
long totdocs, totwords = 0, fnum = 0;
MODEL *m = NULL;
DOC **c_docs = NULL;
LEARN_PARM c_learn_param;
KERNEL_PARM c_kernel_param;
VALUE temp_ary, exception = rb_eArgError;
char error_msg[300];
Check_Type(r_docs_and_classes, T_ARRAY);
Check_Type(learn_params, T_HASH);
Check_Type(kernel_params, T_HASH);
if(!(TYPE(alpha) == T_ARRAY || NIL_P(alpha) ))
rb_raise(rb_eTypeError, "alpha must be an numeric array or nil");
if(TYPE(alpha) == T_ARRAY){
alpha_in = my_malloc(sizeof(double) * RARRAY_LEN(alpha));
for(i=0; i < RARRAY_LEN(alpha); i++){
if(TYPE(RARRAY_PTR(alpha)[i]) != T_FLOAT &&
TYPE(RARRAY_PTR(alpha)[i]) != T_FIXNUM ){
strncpy(error_msg,"All elements of the alpha array must be numeric ", 300);
goto bail;
}
alpha_in[i] = NUM2DBL(RARRAY_PTR(alpha)[i]);
}
}
if(setup_learn_params(&c_learn_param, learn_params, error_msg) != 0){
goto bail;
}
c_learn_param.type = CLASSIFICATION;
if(setup_kernel_params(&c_kernel_param, kernel_params, error_msg) != 0){
goto bail;
}
//TODO Setup kernel cache when we support non linear kernels
c_kernel_param.kernel_type = LINEAR;
if(check_kernel_and_learn_params_logic(&c_kernel_param, &c_learn_param, error_msg) != 0){
goto bail;
}
totdocs = (long)RARRAY_LEN(r_docs_and_classes);
if (totdocs == 0){
strncpy(error_msg, "Cannot create Model from empty Documents array", 300);
goto bail;
}
c_docs = (DOC **)my_malloc(sizeof(DOC *)*(totdocs));
labels = (double*)my_malloc(sizeof(double)*totdocs);
for(i=0; i < totdocs; i++){
// Just one of the documents and classes arrays, we expect temp_ary to have a Document
// and a label (long)
temp_ary = RARRAY_PTR(r_docs_and_classes)[i] ;
if( TYPE(temp_ary) != T_ARRAY ||
RARRAY_LEN(temp_ary) < 2 ||
rb_obj_class(RARRAY_PTR(temp_ary)[0]) != rb_cDocument ||
(TYPE(RARRAY_PTR(temp_ary)[1]) != T_FLOAT && TYPE(RARRAY_PTR(temp_ary)[1]) != T_FIXNUM )){
strncpy(error_msg, "All elements of documents and labels should be arrays,"
"where the first element is a document and the second a number", 300);
goto bail;
}
Data_Get_Struct(RARRAY_PTR(temp_ary)[0], DOC, c_docs[i]);
labels[i] = NUM2DBL(RARRAY_PTR(temp_ary)[1]);
fnum = 0;
// Increase feature number while there are still words in the vector
while(c_docs[i]->fvec->words[fnum].wnum) {
fnum++;
}
if(c_docs[i]->fvec->words[fnum -1].wnum > totwords)
totwords = c_docs[i]->fvec->words[fnum-1].wnum;
if(totwords > MAXFEATNUM){
strncpy(error_msg, "The number of features exceeds MAXFEATNUM the maximun "
"number of features defined for this version of SVMLight", 300);
goto bail;
}
}
m = (MODEL *)my_malloc(sizeof(MODEL));
svm_learn_classification(c_docs, labels, totdocs, totwords,
&c_learn_param, &c_kernel_param, NULL, m, alpha_in);
free(alpha_in);
free(labels);
// If need arises to free the data do a deep copy of m and create the ruby object with
// that data.
// free(c_docs);
return Data_Wrap_Struct(klass, 0, model_free, m);
bail:
free(alpha_in);
free(labels);
free(c_docs);
rb_raise(exception, error_msg, "%s");
}
|
.new(type, documents_and_lables, learn_params, kernel_params, alphas = nil) ⇒ Object
Learns a model from a set of labeled documents.
16 17 18 19 20 |
# File 'lib/svmredlight/model.rb', line 16 def self.new(type, documents_and_lables, learn_params, kernel_params, alphas = nil ) raise ArgumentError, "Supporte types are (for now) #{TYPES}" unless TYPES.include? type learn_classification(documents_and_lables, learn_params, kernel_params, false, alphas) end |
.read_from_file(pahtofile) ⇒ Object
Will load an existent model from a file
38 39 40 41 42 43 44 45 46 |
# File 'lib/svmredlight/model.rb', line 38 def self.read_from_file(pahtofile) if File.exists?(pahtofile) && File.file?(pahtofile) from_file(pahtofile) else raise MissingModelFile, "the #{pahtofile} does not exists or is not a file" end end |
Instance Method Details
#classify(example) ⇒ Object
Classify, takes an example (instance of Document) and returns its classification
611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 |
# File 'ext/svmredlight.c', line 611
static VALUE
model_classify_example(VALUE self, VALUE example){
DOC *ex;
MODEL *m;
double result;
Data_Get_Struct(example, DOC, ex);
Data_Get_Struct(self, MODEL, m);
/* Apparently unnecessary code
if(is_linear(m))
result = classify_example_linear(m, ex);
else
*/
result = classify_example(m, ex);
return rb_float_new((float)result);
}
|
#maxdiff ⇒ Object
668 669 670 671 672 673 674 |
# File 'ext/svmredlight.c', line 668
static VALUE
model_maxdiff(VALUE self){
MODEL *m;
Data_Get_Struct(self, MODEL, m);
return DBL2NUM(m->maxdiff);
}
|
#support_vectors_count ⇒ Object
632 633 634 635 636 637 638 |
# File 'ext/svmredlight.c', line 632
static VALUE
model_support_vectors_count(VALUE self){
MODEL *m;
Data_Get_Struct(self, MODEL, m);
return INT2FIX(m->sv_num);
}
|
#to_file(pahtofile) ⇒ Object
640 641 642 643 644 645 646 647 648 649 650 |
# File 'ext/svmredlight.c', line 640
static VALUE
model_write_to_file(VALUE self, VALUE pahtofile){
Check_Type(pahtofile, T_STRING);
MODEL *m;
Data_Get_Struct(self, MODEL, m);
write_model(StringValuePtr(pahtofile), m);
return Qnil;
}
|
#total_words ⇒ Object
652 653 654 655 656 657 658 |
# File 'ext/svmredlight.c', line 652
static VALUE
model_total_words(VALUE self){
MODEL *m;
Data_Get_Struct(self, MODEL, m);
return INT2FIX(m->totwords);
}
|
#totdoc ⇒ Object
660 661 662 663 664 665 666 |
# File 'ext/svmredlight.c', line 660
static VALUE
model_totdoc(VALUE self){
MODEL *m;
Data_Get_Struct(self, MODEL, m);
return INT2FIX(m->totdoc);
}
|
#write_to_file(pahtofile) ⇒ Object
Will create a file containing the model info, the model info can be turn back into a model by using Model.read_from_file
53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/svmredlight/model.rb', line 53 def write_to_file(pahtofile) dir = File.dirname(pahtofile) if File.directory?(dir) && File.writable?(dir) to_file(pahtofile) else raise ModelWriteError, "impossible to write #{pahtofile}" end end |