Class: Wapiti::Model

Inherits:
Object
  • Object
show all
Defined in:
lib/wapiti/model.rb,
ext/wapiti/native.c

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(*args) ⇒ Object



660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
# File 'ext/wapiti/native.c', line 660

static VALUE initialize_model(int argc, VALUE *argv, VALUE self) {
	VALUE options;
	
	if (argc > 1) {
		rb_raise(rb_const_get(rb_mKernel, rb_intern("ArgumentError")),
			"wrong number of arguments (%d for 0..1)", argc);
	}

	if (argc) {
		if (TYPE(argv[0]) == T_HASH) {
			options = rb_funcall(cOptions, rb_intern("new"), 1, argv[0]);			
		}
		else {
			if (strncmp("Wapiti::Options", rb_obj_classname(argv[0]), 15) != 0) {
				rb_raise(cNativeError, "argument must be a hash or an options instance");
			}
			options = argv[0];
		}
	}
	else {
		options = rb_funcall(cOptions, rb_intern("new"), 0);
	}
	
	// yield options if block_given?
	if (rb_block_given_p()) {
	 	rb_yield(options);
	}

	model_set_options(self, options);

	// Load a previous model if specified by options
	if (get_options(options)->model) {
		rb_funcall(self, rb_intern("load"), 0);
	}
	
	// initialize counters
	rb_funcall(self, rb_intern("clear_counters"), 0);

	return self;
}

Instance Attribute Details

#optionsObject (readonly)

#pathObject

Returns the value of attribute path.



31
32
33
# File 'lib/wapiti/model.rb', line 31

def path
  @path
end

#sequence_countObject (readonly)

Returns the value of attribute sequence_count.



33
34
35
# File 'lib/wapiti/model.rb', line 33

def sequence_count
  @sequence_count
end

#sequence_errorsObject (readonly)

Returns the value of attribute sequence_errors.



33
34
35
# File 'lib/wapiti/model.rb', line 33

def sequence_errors
  @sequence_errors
end

#token_countObject (readonly)

Returns the value of attribute token_count.



33
34
35
# File 'lib/wapiti/model.rb', line 33

def token_count
  @token_count
end

#token_errorsObject (readonly)

Returns the value of attribute token_errors.



33
34
35
# File 'lib/wapiti/model.rb', line 33

def token_errors
  @token_errors
end

Class Method Details

.load(filename) ⇒ Object



22
23
24
25
26
27
# File 'lib/wapiti/model.rb', line 22

def load(filename)
	m = new
	m.path = filename
	m.load
	m
end

.train(data, options, &block) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/wapiti/model.rb', line 7

def train(data, options, &block)
	config = Options.new(options, &block)
	
	# check configuration					
	# if config.pattern.empty?
	# 	raise ConfigurationError, 'invalid options: no pattern specified'
	# end
	
	unless config.valid?
		raise ConfigurationError, "invalid options: #{ config.validate.join('; ') }"
	end				
	
	new(config).train(data)
end

Instance Method Details

#clear_countersObject Also known as: clear



65
66
67
# File 'lib/wapiti/model.rb', line 65

def clear_counters
	@token_count = @token_errors = @sequence_count = @sequence_errors = 0
end

#compactObject



728
729
730
731
# File 'ext/wapiti/native.c', line 728

static VALUE model_compact(VALUE self) {
	mdl_compact(get_model(self));
	return self;
}

#label(data) ⇒ Object



1133
1134
1135
1136
# File 'ext/wapiti/native.c', line 1133

def label(input, opts = nil)
	options.update(opts) unless opts.nil?
	block_given? ? native_label(input, &Proc.new) : native_label(input)
end

#labelsObject

Returns a sorted list of all labels in the Model’s label database.



947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
# File 'ext/wapiti/native.c', line 947

static VALUE model_labels(VALUE self) {
	mdl_t *model = get_model(self);
	const size_t Y = model->nlbl;
	
	qrk_t *lp = model->reader->lbl;
	
	VALUE labels = rb_ary_new2(Y);

	for (unsigned int i = 0; i < Y; ++i) {
		rb_ary_push(labels, rb_str_new2(qrk_id2str(lp, i)));
	}
	
	rb_funcall(labels, rb_intern("sort!"), 0);
	
	return labels;
}

#load(*args) ⇒ Object



771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
# File 'ext/wapiti/native.c', line 771

static VALUE model_load(int argc, VALUE *argv, VALUE self) {
	if (argc > 1) {
		rb_raise(rb_const_get(rb_mKernel, rb_intern("ArgumentError")),
			"wrong number of arguments (%d for 0..1)", argc);
	}
	
	mdl_t *model = get_model(self);
	
	// save passed-in argument in options
	if (argc) {
		Check_Type(argv[0], T_STRING);
		rb_ivar_set(self, rb_intern("@path"), argv[0]);
	}
	
	// open the model file
	FILE *file = 0;
	VALUE path = rb_ivar_get(self, rb_intern("@path"));
	
	if (NIL_P(path)) {
		rb_raise(cNativeError, "failed to load model: no path given");
	}
	
	if (!(file = fopen(StringValueCStr(path), "r"))) {
		rb_raise(cNativeError, "failed to load model: failed to open model file");
	}
	
	mdl_load(model, file);
	fclose(file);
	
	return self;
}

#native_labelObject



43
# File 'lib/wapiti/model.rb', line 43

alias native_label label

#nftrObject Also known as: features



712
713
714
# File 'ext/wapiti/native.c', line 712

static VALUE model_nftr(VALUE self) {
	return INT2FIX(get_model(self)->nftr);
}

#nlblObject

Native accessors



704
705
706
# File 'ext/wapiti/native.c', line 704

static VALUE model_nlbl(VALUE self) {
	return INT2FIX(get_model(self)->nlbl);
}

#nobsObject Also known as: observations



708
709
710
# File 'ext/wapiti/native.c', line 708

static VALUE model_nobs(VALUE self) {
	return INT2FIX(get_model(self)->nobs);
}

#patternObject



35
36
37
# File 'lib/wapiti/model.rb', line 35

def pattern
	options.pattern
end

#pattern=(filename) ⇒ Object



39
40
41
# File 'lib/wapiti/model.rb', line 39

def pattern=(filename)
	options.pattern = filename
end

#save(*args) ⇒ Object

otherwise uses the passed-in argument as the Model’s path.



739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
# File 'ext/wapiti/native.c', line 739

static VALUE model_save(int argc, VALUE *argv, VALUE self) {
	if (argc > 1) {
		rb_raise(rb_const_get(rb_mKernel, rb_intern("ArgumentError")),
			"wrong number of arguments (%d for 0..1)", argc);
	}
	
	mdl_t *model = get_model(self);
	
	// save passed-in path in options
	if (argc) {
		Check_Type(argv[0], T_STRING);
		rb_ivar_set(self, rb_intern("@path"), argv[0]);
	}

	// open the output file
	FILE *file = 0;
	VALUE path = rb_ivar_get(self, rb_intern("@path"));
	
	if (NIL_P(path)) {
		rb_raise(cNativeError, "failed to save model: no path given");
	}
	
	if (!(file = fopen(StringValueCStr(path), "w"))) {
		rb_raise(cNativeError, "failed to save model: failed to open model file");
	}
	
	mdl_save(model, file);
	fclose(file);

	return self;
}

#statisticsObject Also known as: stats



50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/wapiti/model.rb', line 50

def statistics
	s = {}
	s[:tokens] = {
		:total => token_count, :errors => @token_errors,
		:rate => token_errors / (token_count * 100.0)
	}
	s[:sequences] = {
		:total => sequence_count, :errors => sequence_errors,
		:rate => sequence_errors / (sequence_count * 100.0)
	}
	s
end

#syncObject

Instance methods



723
724
725
726
# File 'ext/wapiti/native.c', line 723

static VALUE model_sync(VALUE self) {
	mdl_sync(get_model(self));
	return self;
}

#totalObject



716
717
718
# File 'ext/wapiti/native.c', line 716

static VALUE model_total(VALUE self) {
	return rb_float_new(get_model(self)->total);
}

#train(data) ⇒ Object



853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
# File 'ext/wapiti/native.c', line 853

static VALUE model_train(VALUE self, VALUE data) {
	
	mdl_t* model = get_model(self);
	
	int trn;
	for (trn = 0; trn < trn_cnt; trn++) {
		if (!strcmp(model->opt->algo, trn_lst[trn].name)) break;
	}
	
	if (trn == trn_cnt) {
		rb_raise(cNativeError, "failed to train model: unknown algorithm '%s'", model->opt->algo);
	}
	
	FILE *file;
	
	// Load the pattern file. This will unlock the database if previously
	// locked by loading a model.
	if (model->opt->pattern) {
		file = fopen(model->opt->pattern, "r");
		
		if (!file) {
			rb_raise(cNativeError, "failed to train model: failed to load pattern file '%s'", model->opt->pattern);
		}

		rdr_loadpat(model->reader, file);
		fclose(file);
	}
	else {
		// rb_raise(cNativeError, "failed to train model: no pattern given");
	}

	qrk_lock(model->reader->obs, false);
	
	
	// Load the training data. When this is done we lock the quarks as we
	// don't want to put in the model, informations present only in the
	// devlopment set.
	
	switch (TYPE(data)) {
		case T_STRING:
			if (!(file = fopen(StringValuePtr(data), "r"))) {
				rb_raise(cNativeError, "failed to train model: failed to open training data '%s", StringValuePtr(data));
			}

			model->train = rdr_readdat(model->reader, file, true);
			fclose(file);
			
			break;
		case T_ARRAY:
			model->train = to_dat(model->reader, data, true);

			break;
		default:
			rb_raise(cNativeError, "failed to train model: invalid training data type (expected instance of String or Array)");
	}

	qrk_lock(model->reader->lbl, true);
	qrk_lock(model->reader->obs, true);
	
	if (!model->train || model->train->nseq == 0) {
		rb_raise(cNativeError, "failed to train model: no training data loaded");
	}

	// If present, load the development set in the model. If not specified,
	// the training dataset will be used instead.
	if (model->opt->devel) {
		if (!(file = fopen(model->opt->devel, "r"))) {
			rb_raise(cNativeError, "failed to train model: cannot open development file '%s'", model->opt->devel);
		}
		
		model->devel = rdr_readdat(model->reader, file, true);
		fclose(file);
	}
	
	// Initialize the model. If a previous model was loaded, this will be
	// just a resync, else the model structure will be created.
	rb_funcall(self, rb_intern("sync"), 0);

	// Train the model.
	uit_setup(model);
	trn_lst[trn].train(model);
	uit_cleanup(model);
	
	// If requested compact the model.
	if (model->opt->compact) {
		const size_t O = model->nobs;
		const size_t F = model->nftr;
		rb_funcall(self, rb_intern("compact"), 0);
	}	
	
	return self;
}