Class: Wapiti::Model

Inherits:
Object
  • Object
show all
Defined in:
lib/wapiti/model.rb,
ext/wapiti/native.c

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(*args) ⇒ Object



660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
# File 'ext/wapiti/native.c', line 660

static VALUE initialize_model(int argc, VALUE *argv, VALUE self) {
  VALUE options;

  if (argc > 1) {
    rb_raise(rb_const_get(rb_mKernel, rb_intern("ArgumentError")),
      "wrong number of arguments (%d for 0..1)", argc);
  }

  if (argc) {
    if (TYPE(argv[0]) == T_HASH) {
      options = rb_funcall(cOptions, rb_intern("new"), 1, argv[0]);
    }
    else {
      if (strncmp("Wapiti::Options", rb_obj_classname(argv[0]), 15) != 0) {
        rb_raise(cNativeError, "argument must be a hash or an options instance");
      }
      options = argv[0];
    }
  }
  else {
    options = rb_funcall(cOptions, rb_intern("new"), 0);
  }

  // yield options if block_given?
  if (rb_block_given_p()) {
    rb_yield(options);
  }

  model_set_options(self, options);

  // Load a previous model if specified by options
  if (get_options(options)->model) {
    rb_funcall(self, rb_intern("load"), 0);
  }

  // initialize counters
  rb_funcall(self, rb_intern("clear_counters"), 0);

  return self;
}

Instance Attribute Details

#optionsObject (readonly)

#pathObject

Returns the value of attribute path.



31
32
33
# File 'lib/wapiti/model.rb', line 31

def path
  @path
end

#sequence_countObject (readonly)

Returns the value of attribute sequence_count.



33
34
35
# File 'lib/wapiti/model.rb', line 33

def sequence_count
  @sequence_count
end

#sequence_errorsObject (readonly)

Returns the value of attribute sequence_errors.



33
34
35
# File 'lib/wapiti/model.rb', line 33

def sequence_errors
  @sequence_errors
end

#token_countObject (readonly)

Returns the value of attribute token_count.



33
34
35
# File 'lib/wapiti/model.rb', line 33

def token_count
  @token_count
end

#token_errorsObject (readonly)

Returns the value of attribute token_errors.



33
34
35
# File 'lib/wapiti/model.rb', line 33

def token_errors
  @token_errors
end

Class Method Details

.load(filename) ⇒ Object



22
23
24
25
26
27
# File 'lib/wapiti/model.rb', line 22

def load(filename)
  m = new
  m.path = filename
  m.load
  m
end

.train(data, options, &block) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/wapiti/model.rb', line 7

def train(data, options, &block)
  config = Options.new(options, &block)

  # check configuration
  # if config.pattern.empty?
  #   raise ConfigurationError, 'invalid options: no pattern specified'
  # end

  unless config.valid?
    raise ConfigurationError, "invalid options: #{ config.validate.join('; ') }"
  end

  new(config).train(data)
end

Instance Method Details

#clear_countersObject Also known as: clear



71
72
73
# File 'lib/wapiti/model.rb', line 71

def clear_counters
  @token_count = @token_errors = @sequence_count = @sequence_errors = 0
end

#compactObject



728
729
730
731
# File 'ext/wapiti/native.c', line 728

static VALUE model_compact(VALUE self) {
  mdl_compact(get_model(self));
  return self;
}

#label(data) ⇒ Object



1140
1141
1142
1143
# File 'ext/wapiti/native.c', line 1140

def label(input, opts = nil)
  options.update(opts) unless opts.nil?
  block_given? ? native_label(input, &Proc.new) : native_label(input)
end

#labelsObject

Returns a sorted list of all labels in the Model’s label database.



947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
# File 'ext/wapiti/native.c', line 947

static VALUE model_labels(VALUE self) {
  mdl_t *model = get_model(self);
  const uint32_t Y = model->nlbl;

  qrk_t *lp = model->reader->lbl;

  VALUE labels = rb_ary_new2(Y);

  for (unsigned int i = 0; i < Y; ++i) {
    rb_ary_push(labels, rb_str_new2(qrk_id2str(lp, i)));
  }

  rb_funcall(labels, rb_intern("sort!"), 0);

  return labels;
}

#load(*args) ⇒ Object



771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
# File 'ext/wapiti/native.c', line 771

static VALUE model_load(int argc, VALUE *argv, VALUE self) {
  if (argc > 1) {
    rb_raise(rb_const_get(rb_mKernel, rb_intern("ArgumentError")),
      "wrong number of arguments (%d for 0..1)", argc);
  }

  mdl_t *model = get_model(self);

  // save passed-in argument in options
  if (argc) {
    Check_Type(argv[0], T_STRING);
    rb_ivar_set(self, rb_intern("@path"), argv[0]);
  }

  // open the model file
  FILE *file = 0;
  VALUE path = rb_ivar_get(self, rb_intern("@path"));

  if (NIL_P(path)) {
    rb_raise(cNativeError, "failed to load model: no path given");
  }

  if (!(file = fopen(StringValueCStr(path), "r"))) {
    rb_raise(cNativeError, "failed to load model: failed to open model file");
  }

  mdl_load(model, file);
  fclose(file);

  return self;
}

#nftrObject Also known as: features



712
713
714
# File 'ext/wapiti/native.c', line 712

static VALUE model_nftr(VALUE self) {
  return INT2FIX(get_model(self)->nftr);
}

#nlblObject

Native accessors



704
705
706
# File 'ext/wapiti/native.c', line 704

static VALUE model_nlbl(VALUE self) {
  return INT2FIX(get_model(self)->nlbl);
}

#nobsObject Also known as: observations



708
709
710
# File 'ext/wapiti/native.c', line 708

static VALUE model_nobs(VALUE self) {
  return INT2FIX(get_model(self)->nobs);
}

#patternObject



35
36
37
# File 'lib/wapiti/model.rb', line 35

def pattern
  options.pattern
end

#pattern=(filename) ⇒ Object



39
40
41
# File 'lib/wapiti/model.rb', line 39

def pattern=(filename)
  options.pattern = filename
end

#save(*args) ⇒ Object

otherwise uses the passed-in argument as the Model’s path.



739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
# File 'ext/wapiti/native.c', line 739

static VALUE model_save(int argc, VALUE *argv, VALUE self) {
  if (argc > 1) {
    rb_raise(rb_const_get(rb_mKernel, rb_intern("ArgumentError")),
      "wrong number of arguments (%d for 0..1)", argc);
  }

  mdl_t *model = get_model(self);

  // save passed-in path in options
  if (argc) {
    Check_Type(argv[0], T_STRING);
    rb_ivar_set(self, rb_intern("@path"), argv[0]);
  }

  // open the output file
  FILE *file = 0;
  VALUE path = rb_ivar_get(self, rb_intern("@path"));

  if (NIL_P(path)) {
    rb_raise(cNativeError, "failed to save model: no path given");
  }

  if (!(file = fopen(StringValueCStr(path), "w"))) {
    rb_raise(cNativeError, "failed to save model: failed to open model file");
  }

  mdl_save(model, file);
  fclose(file);

  return self;
}

#sequence_error_rateObject



82
83
84
85
# File 'lib/wapiti/model.rb', line 82

def sequence_error_rate
  return 0 if sequence_errors.zero?
  sequence_errors / sequence_count.to_f * 100.0
end

#statisticsObject Also known as: stats



58
59
60
61
62
63
64
65
66
67
# File 'lib/wapiti/model.rb', line 58

def statistics
  s = {}
  s[:tokens] = {
    :total => token_count, :errors => token_errors, :rate => token_error_rate
  }
  s[:sequences] = {
    :total => sequence_count, :errors => sequence_errors, :rate => sequence_error_rate
  }
  s
end

#syncObject

Instance methods



723
724
725
726
# File 'ext/wapiti/native.c', line 723

static VALUE model_sync(VALUE self) {
  mdl_sync(get_model(self));
  return self;
}

#token_error_rateObject



77
78
79
80
# File 'lib/wapiti/model.rb', line 77

def token_error_rate
  return 0 if token_errors.zero?
  token_errors / token_count.to_f * 100.0
end

#totalObject



716
717
718
# File 'ext/wapiti/native.c', line 716

static VALUE model_total(VALUE self) {
  return rb_float_new(get_model(self)->total);
}

#train(data) ⇒ Object



853
854
855
856
# File 'ext/wapiti/native.c', line 853

def train(input, opts = nil)
  options.update(opts) unless opts.nil?
  block_given? ? native_train(input, &Proc.new) : native_train(input)
end