Class: SVMLight::Document
- Inherits:
-
Object
- Object
- SVMLight::Document
- Defined in:
- lib/svmredlight/document.rb,
ext/svmredlight.c
Overview
A document is the Ruby representation of a DOC structure in SVMlight, it contains a queryid, a slackid, a costfactor ( c ) and a vector with feature numbers and their correspondent weights.
Class Method Summary collapse
-
.create(id, cost, slackid, queryid, words_ary) ⇒ Object
Creates a DOC from an array of words it also takes an id -1 is normally OK for that value when using in filtering it also takes the C (cost) parameter for the SVM.
- .new(vector, opts = {}) ⇒ Object
Instance Method Summary collapse
Class Method Details
.create(id, cost, slackid, queryid, words_ary) ⇒ Object
Creates a DOC from an array of words it also takes an id -1 is normally OK for that value when using in filtering it also takes the C (cost) parameter for the SVM. words_ary an array of arrays like this
- [wnum, weight], [wnum, weight], …
-
so we do not waste memory, defeating the svec implementation and do
not introduce a bunch of 0’s that seem to be OK when classifying but screw all up on training
683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 |
# File 'ext/svmredlight.c', line 683
static VALUE
doc_create(VALUE klass, VALUE id, VALUE cost, VALUE slackid, VALUE queryid, VALUE words_ary ){
long docnum, i, c_slackid, c_queryid;
double c;
WORD *words;
SVECTOR *vec;
DOC *d;
VALUE inner_array;
Check_Type(words_ary, T_ARRAY);
Check_Type(slackid, T_FIXNUM);
Check_Type(queryid, T_FIXNUM);
if (RARRAY_LEN(words_ary) == 0)
rb_raise(rb_eArgError, "Cannot create Document from empty arrays");
words = (WORD*) my_malloc(sizeof(WORD) * (RARRAY_LEN(words_ary) + 1));
for(i=0; i < (long)RARRAY_LEN(words_ary); i++){
inner_array = RARRAY_PTR(words_ary)[i];
Check_Type(inner_array, T_ARRAY);
Check_Type(RARRAY_PTR(inner_array)[0], T_FIXNUM);
if(!(TYPE(RARRAY_PTR(inner_array)[1]) == T_FLOAT || TYPE(RARRAY_PTR(inner_array)[1]) == T_FIXNUM ))
rb_raise(rb_eArgError, "Feature weights must be numeric");
if(FIX2LONG(RARRAY_PTR(inner_array)[0]) <= 0 )
rb_raise(rb_eArgError, "Feature number has to be greater than zero");
(words[i]).wnum = FIX2LONG(RARRAY_PTR(inner_array)[0]);
(words[i]).weight = (FVAL)(NUM2DBL(RARRAY_PTR(inner_array)[1]));
}
words[i].wnum = 0;
vec = create_svector(words, (char*)"", 1.0);
c = NUM2DBL(cost);
docnum = FIX2INT(id);
d = create_example(docnum, FIX2LONG(queryid), FIX2LONG(slackid), c, vec);
return Data_Wrap_Struct(klass, 0, doc_free, d);
}
|
.new(vector, opts = {}) ⇒ Object
11 12 13 14 15 16 17 18 19 |
# File 'lib/svmredlight/document.rb', line 11 def self.new(vector, opts={}) opts.default = 0 docnum = opts[:docnum] costfactor = opts[:costfactor] slackid = opts[:slackid] queryid = opts[:queryid] create(docnum, costfactor, slackid, queryid, vector.to_a) end |
Instance Method Details
#costfactor ⇒ Object
750 751 752 753 754 755 756 |
# File 'ext/svmredlight.c', line 750
static VALUE
doc_get_costfactor(VALUE self){
DOC *d;
Data_Get_Struct(self, DOC, d);
return DBL2NUM(d->costfactor);
}
|
#docnum ⇒ Object
726 727 728 729 730 731 732 |
# File 'ext/svmredlight.c', line 726
static VALUE
doc_get_docnum(VALUE self){
DOC *d;
Data_Get_Struct(self, DOC, d);
return INT2FIX(d->docnum);
}
|
#queryid ⇒ Object
742 743 744 745 746 747 748 |
# File 'ext/svmredlight.c', line 742
static VALUE
doc_get_queryid(VALUE self){
DOC *d;
Data_Get_Struct(self, DOC, d);
return INT2FIX(d->queryid);
}
|
#slackid ⇒ Object
734 735 736 737 738 739 740 |
# File 'ext/svmredlight.c', line 734
static VALUE
doc_get_slackid(VALUE self){
DOC *d;
Data_Get_Struct(self, DOC, d);
return INT2FIX(d->slackid);
}
|