Class: Wapiti::Model
- Inherits:
-
Object
- Object
- Wapiti::Model
- Defined in:
- lib/wapiti/model.rb,
ext/wapiti/native.c
Instance Attribute Summary collapse
- #options ⇒ Object readonly
-
#path ⇒ Object
Returns the value of attribute path.
-
#sequence_count ⇒ Object
readonly
Returns the value of attribute sequence_count.
-
#sequence_errors ⇒ Object
readonly
Returns the value of attribute sequence_errors.
-
#token_count ⇒ Object
readonly
Returns the value of attribute token_count.
-
#token_errors ⇒ Object
readonly
Returns the value of attribute token_errors.
Class Method Summary collapse
Instance Method Summary collapse
- #clear_counters ⇒ Object (also: #clear)
- #compact ⇒ Object
- #initialize(*args) ⇒ Object constructor
- #label(data) ⇒ Object
-
#labels ⇒ Object
Returns a sorted list of all labels in the Model’s label database.
- #load(*args) ⇒ Object
- #native_label ⇒ Object
- #nftr ⇒ Object (also: #features)
-
#nlbl ⇒ Object
Native accessors.
- #nobs ⇒ Object (also: #observations)
- #pattern ⇒ Object
- #pattern=(filename) ⇒ Object
-
#save(*args) ⇒ Object
otherwise uses the passed-in argument as the Model’s path.
- #statistics ⇒ Object (also: #stats)
-
#sync ⇒ Object
Instance methods.
- #total ⇒ Object
- #train(data) ⇒ Object
Constructor Details
#initialize(*args) ⇒ Object
660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 |
# File 'ext/wapiti/native.c', line 660
static VALUE initialize_model(int argc, VALUE *argv, VALUE self) {
VALUE options;
if (argc > 1) {
rb_raise(rb_const_get(rb_mKernel, rb_intern("ArgumentError")),
"wrong number of arguments (%d for 0..1)", argc);
}
if (argc) {
if (TYPE(argv[0]) == T_HASH) {
options = rb_funcall(cOptions, rb_intern("new"), 1, argv[0]);
}
else {
if (strncmp("Wapiti::Options", rb_obj_classname(argv[0]), 15) != 0) {
rb_raise(cNativeError, "argument must be a hash or an options instance");
}
options = argv[0];
}
}
else {
options = rb_funcall(cOptions, rb_intern("new"), 0);
}
// yield options if block_given?
if (rb_block_given_p()) {
rb_yield(options);
}
model_set_options(self, options);
// Load a previous model if specified by options
if (get_options(options)->model) {
rb_funcall(self, rb_intern("load"), 0);
}
// initialize counters
rb_funcall(self, rb_intern("clear_counters"), 0);
return self;
}
|
Instance Attribute Details
#options ⇒ Object (readonly)
#path ⇒ Object
Returns the value of attribute path.
31 32 33 |
# File 'lib/wapiti/model.rb', line 31 def path @path end |
#sequence_count ⇒ Object (readonly)
Returns the value of attribute sequence_count.
33 34 35 |
# File 'lib/wapiti/model.rb', line 33 def sequence_count @sequence_count end |
#sequence_errors ⇒ Object (readonly)
Returns the value of attribute sequence_errors.
33 34 35 |
# File 'lib/wapiti/model.rb', line 33 def sequence_errors @sequence_errors end |
#token_count ⇒ Object (readonly)
Returns the value of attribute token_count.
33 34 35 |
# File 'lib/wapiti/model.rb', line 33 def token_count @token_count end |
#token_errors ⇒ Object (readonly)
Returns the value of attribute token_errors.
33 34 35 |
# File 'lib/wapiti/model.rb', line 33 def token_errors @token_errors end |
Class Method Details
.load(filename) ⇒ Object
22 23 24 25 26 27 |
# File 'lib/wapiti/model.rb', line 22 def load(filename) m = new m.path = filename m.load m end |
.train(data, options, &block) ⇒ Object
7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
# File 'lib/wapiti/model.rb', line 7 def train(data, , &block) config = Options.new(, &block) # check configuration # if config.pattern.empty? # raise ConfigurationError, 'invalid options: no pattern specified' # end unless config.valid? raise ConfigurationError, "invalid options: #{ config.validate.join('; ') }" end new(config).train(data) end |
Instance Method Details
#clear_counters ⇒ Object Also known as: clear
65 66 67 |
# File 'lib/wapiti/model.rb', line 65 def clear_counters @token_count = @token_errors = @sequence_count = @sequence_errors = 0 end |
#compact ⇒ Object
728 729 730 731 |
# File 'ext/wapiti/native.c', line 728 static VALUE model_compact(VALUE self) { mdl_compact(get_model(self)); return self; } |
#label(data) ⇒ Object
1133 1134 1135 1136 |
# File 'ext/wapiti/native.c', line 1133 def label(input, opts = nil) .update(opts) unless opts.nil? block_given? ? native_label(input, &Proc.new) : native_label(input) end |
#labels ⇒ Object
Returns a sorted list of all labels in the Model’s label database.
947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 |
# File 'ext/wapiti/native.c', line 947
static VALUE model_labels(VALUE self) {
mdl_t *model = get_model(self);
const size_t Y = model->nlbl;
qrk_t *lp = model->reader->lbl;
VALUE labels = rb_ary_new2(Y);
for (unsigned int i = 0; i < Y; ++i) {
rb_ary_push(labels, rb_str_new2(qrk_id2str(lp, i)));
}
rb_funcall(labels, rb_intern("sort!"), 0);
return labels;
}
|
#load(*args) ⇒ Object
771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 |
# File 'ext/wapiti/native.c', line 771
static VALUE model_load(int argc, VALUE *argv, VALUE self) {
if (argc > 1) {
rb_raise(rb_const_get(rb_mKernel, rb_intern("ArgumentError")),
"wrong number of arguments (%d for 0..1)", argc);
}
mdl_t *model = get_model(self);
// save passed-in argument in options
if (argc) {
Check_Type(argv[0], T_STRING);
rb_ivar_set(self, rb_intern("@path"), argv[0]);
}
// open the model file
FILE *file = 0;
VALUE path = rb_ivar_get(self, rb_intern("@path"));
if (NIL_P(path)) {
rb_raise(cNativeError, "failed to load model: no path given");
}
if (!(file = fopen(StringValueCStr(path), "r"))) {
rb_raise(cNativeError, "failed to load model: failed to open model file");
}
mdl_load(model, file);
fclose(file);
return self;
}
|
#native_label ⇒ Object
43 |
# File 'lib/wapiti/model.rb', line 43 alias native_label label |
#nftr ⇒ Object Also known as: features
712 713 714 |
# File 'ext/wapiti/native.c', line 712
static VALUE model_nftr(VALUE self) {
return INT2FIX(get_model(self)->nftr);
}
|
#nlbl ⇒ Object
Native accessors
704 705 706 |
# File 'ext/wapiti/native.c', line 704
static VALUE model_nlbl(VALUE self) {
return INT2FIX(get_model(self)->nlbl);
}
|
#nobs ⇒ Object Also known as: observations
708 709 710 |
# File 'ext/wapiti/native.c', line 708
static VALUE model_nobs(VALUE self) {
return INT2FIX(get_model(self)->nobs);
}
|
#pattern ⇒ Object
35 36 37 |
# File 'lib/wapiti/model.rb', line 35 def pattern .pattern end |
#pattern=(filename) ⇒ Object
39 40 41 |
# File 'lib/wapiti/model.rb', line 39 def pattern=(filename) .pattern = filename end |
#save(*args) ⇒ Object
otherwise uses the passed-in argument as the Model’s path.
739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 |
# File 'ext/wapiti/native.c', line 739
static VALUE model_save(int argc, VALUE *argv, VALUE self) {
if (argc > 1) {
rb_raise(rb_const_get(rb_mKernel, rb_intern("ArgumentError")),
"wrong number of arguments (%d for 0..1)", argc);
}
mdl_t *model = get_model(self);
// save passed-in path in options
if (argc) {
Check_Type(argv[0], T_STRING);
rb_ivar_set(self, rb_intern("@path"), argv[0]);
}
// open the output file
FILE *file = 0;
VALUE path = rb_ivar_get(self, rb_intern("@path"));
if (NIL_P(path)) {
rb_raise(cNativeError, "failed to save model: no path given");
}
if (!(file = fopen(StringValueCStr(path), "w"))) {
rb_raise(cNativeError, "failed to save model: failed to open model file");
}
mdl_save(model, file);
fclose(file);
return self;
}
|
#statistics ⇒ Object Also known as: stats
50 51 52 53 54 55 56 57 58 59 60 61 |
# File 'lib/wapiti/model.rb', line 50 def statistics s = {} s[:tokens] = { :total => token_count, :errors => @token_errors, :rate => token_errors / (token_count * 100.0) } s[:sequences] = { :total => sequence_count, :errors => sequence_errors, :rate => sequence_errors / (sequence_count * 100.0) } s end |
#sync ⇒ Object
Instance methods
723 724 725 726 |
# File 'ext/wapiti/native.c', line 723 static VALUE model_sync(VALUE self) { mdl_sync(get_model(self)); return self; } |
#total ⇒ Object
716 717 718 |
# File 'ext/wapiti/native.c', line 716
static VALUE model_total(VALUE self) {
return rb_float_new(get_model(self)->total);
}
|
#train(data) ⇒ Object
853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 |
# File 'ext/wapiti/native.c', line 853
static VALUE model_train(VALUE self, VALUE data) {
mdl_t* model = get_model(self);
int trn;
for (trn = 0; trn < trn_cnt; trn++) {
if (!strcmp(model->opt->algo, trn_lst[trn].name)) break;
}
if (trn == trn_cnt) {
rb_raise(cNativeError, "failed to train model: unknown algorithm '%s'", model->opt->algo);
}
FILE *file;
// Load the pattern file. This will unlock the database if previously
// locked by loading a model.
if (model->opt->pattern) {
file = fopen(model->opt->pattern, "r");
if (!file) {
rb_raise(cNativeError, "failed to train model: failed to load pattern file '%s'", model->opt->pattern);
}
rdr_loadpat(model->reader, file);
fclose(file);
}
else {
// rb_raise(cNativeError, "failed to train model: no pattern given");
}
qrk_lock(model->reader->obs, false);
// Load the training data. When this is done we lock the quarks as we
// don't want to put in the model, informations present only in the
// devlopment set.
switch (TYPE(data)) {
case T_STRING:
if (!(file = fopen(StringValuePtr(data), "r"))) {
rb_raise(cNativeError, "failed to train model: failed to open training data '%s", StringValuePtr(data));
}
model->train = rdr_readdat(model->reader, file, true);
fclose(file);
break;
case T_ARRAY:
model->train = to_dat(model->reader, data, true);
break;
default:
rb_raise(cNativeError, "failed to train model: invalid training data type (expected instance of String or Array)");
}
qrk_lock(model->reader->lbl, true);
qrk_lock(model->reader->obs, true);
if (!model->train || model->train->nseq == 0) {
rb_raise(cNativeError, "failed to train model: no training data loaded");
}
// If present, load the development set in the model. If not specified,
// the training dataset will be used instead.
if (model->opt->devel) {
if (!(file = fopen(model->opt->devel, "r"))) {
rb_raise(cNativeError, "failed to train model: cannot open development file '%s'", model->opt->devel);
}
model->devel = rdr_readdat(model->reader, file, true);
fclose(file);
}
// Initialize the model. If a previous model was loaded, this will be
// just a resync, else the model structure will be created.
rb_funcall(self, rb_intern("sync"), 0);
// Train the model.
uit_setup(model);
trn_lst[trn].train(model);
uit_cleanup(model);
// If requested compact the model.
if (model->opt->compact) {
const size_t O = model->nobs;
const size_t F = model->nftr;
rb_funcall(self, rb_intern("compact"), 0);
}
return self;
}
|