Class: Statsample::Regression::Multiple::BaseEngine

Inherits:
Object
  • Object
show all
Includes:
Summarizable
Defined in:
lib/statsample/regression/multiple/baseengine.rb

Overview

Base class for Multiple Regression Engines

Direct Known Subclasses

AlglibEngine, GslEngine, MatrixEngine

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Summarizable

#summary

Constructor Details

#initialize(ds, y_var, opts = Hash.new) ⇒ BaseEngine

Returns a new instance of BaseEngine.



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/statsample/regression/multiple/baseengine.rb', line 20

def initialize(ds, y_var, opts = Hash.new)
  @ds=ds
  @predictors_n=@ds.vectors.size-1
  @total_cases=@ds.nrows
  @cases=@ds.nrows
  @y_var=y_var
  @r2=nil
  @name=_("Multiple Regression:  %s over %s") % [ ds.vectors.to_a.join(",") , @y_var]
  
  opts_default={:digits=>3}
  @opts=opts_default.merge opts
  
  @opts.each{|k,v|
    self.send("#{k}=",v) if self.respond_to? k
  }
end

Instance Attribute Details

#casesObject (readonly)

Minimum number of valid case for pairs of correlation



10
11
12
# File 'lib/statsample/regression/multiple/baseengine.rb', line 10

def cases
  @cases
end

#digitsObject

Returns the value of attribute digits.



16
17
18
# File 'lib/statsample/regression/multiple/baseengine.rb', line 16

def digits
  @digits
end

#nameObject

Name of analysis



8
9
10
# File 'lib/statsample/regression/multiple/baseengine.rb', line 8

def name
  @name
end

#total_casesObject (readonly)

Number of total cases (dataset.cases)



14
15
16
# File 'lib/statsample/regression/multiple/baseengine.rb', line 14

def total_cases
  @total_cases
end

#valid_casesObject (readonly)

Number of valid cases (listwise)



12
13
14
# File 'lib/statsample/regression/multiple/baseengine.rb', line 12

def valid_cases
  @valid_cases
end

Class Method Details

.univariate?Boolean

Returns:

  • (Boolean)


17
18
19
# File 'lib/statsample/regression/multiple/baseengine.rb', line 17

def self.univariate?
  true
end

Instance Method Details

#anovaObject

Calculate F Test



37
38
39
# File 'lib/statsample/regression/multiple/baseengine.rb', line 37

def anova
  @anova||=Statsample::Anova::OneWay.new(:ss_num=>ssr, :ss_den=>sse, :df_num=>df_r, :df_den=>df_e, :name_numerator=>_("Regression"), :name_denominator=>_("Error"), :name=>"ANOVA")
end

#assign_names(c) ⇒ Object



212
213
214
215
216
217
218
# File 'lib/statsample/regression/multiple/baseengine.rb', line 212

def assign_names(c)
  a={}
  @fields.each_index {|i|
    a[@fields[i]]=c[i]
  }
  a
end

#coeffs_seObject

Standard Error for coefficients



149
150
151
152
153
154
155
156
# File 'lib/statsample/regression/multiple/baseengine.rb', line 149

def coeffs_se
  out={}
  mse=sse.quo(df_e)
  coeffs.each {|k,v|
    out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares * tolerance(k)))
  }
  out
end

#coeffs_tObject

T values for coeffs



101
102
103
104
105
106
107
108
# File 'lib/statsample/regression/multiple/baseengine.rb', line 101

def coeffs_t
  out={}
  se=coeffs_se
  coeffs.each do |k,v|
    out[k]=v / se[k]
  end
  out
end

#coeffs_tolerancesObject

Tolerances for each coefficient



142
143
144
145
146
147
# File 'lib/statsample/regression/multiple/baseengine.rb', line 142

def coeffs_tolerances
  @fields.inject({}) {|a,f|
  a[f]=tolerance(f);
    a
  }
end

#constant_seObject

Standard error for constant



182
183
184
# File 'lib/statsample/regression/multiple/baseengine.rb', line 182

def constant_se
  estimated_variance_covariance_matrix[0,0]
end

#constant_tObject

T for constant



178
179
180
# File 'lib/statsample/regression/multiple/baseengine.rb', line 178

def constant_t
  constant.to_f/constant_se
end

#df_eObject

Degrees of freedom for error



122
123
124
# File 'lib/statsample/regression/multiple/baseengine.rb', line 122

def df_e
  @valid_cases-@predictors_n-1
end

#df_rObject

Degrees of freedom for regression



118
119
120
# File 'lib/statsample/regression/multiple/baseengine.rb', line 118

def df_r
  @predictors_n
end

#estimated_variance_covariance_matrixObject

Estimated Variance-Covariance Matrix Used for calculation of se of constant



165
166
167
168
169
170
171
172
173
174
175
176
# File 'lib/statsample/regression/multiple/baseengine.rb', line 165

def estimated_variance_covariance_matrix
  #mse_p=mse
  columns=[]
  @ds_valid.vectors.each{|k|
    v = @ds_valid[k]
    columns.push(v.to_a) unless k == @y_var
  }
  columns.unshift([1.0]*@valid_cases)
  x=::Matrix.columns(columns)
  matrix=((x.t*x)).inverse * mse
  matrix.collect {|i| Math::sqrt(i) if i>=0 }
end

#fObject

Fisher for Anova



126
127
128
# File 'lib/statsample/regression/multiple/baseengine.rb', line 126

def f
  anova.f
end

#mseObject

Mean Square Error



114
115
116
# File 'lib/statsample/regression/multiple/baseengine.rb', line 114

def mse
  sse.quo(df_e)
end

#msrObject

Mean square Regression



110
111
112
# File 'lib/statsample/regression/multiple/baseengine.rb', line 110

def msr
  ssr.quo(df_r)
end

#predictedObject

Retrieves a vector with predicted values for y



45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/statsample/regression/multiple/baseengine.rb', line 45

def predicted
  Daru::Vector.new(
    @total_cases.times.collect do |i|
      invalid = false
      vect = @dep_columns.collect {|v| invalid = true if v[i].nil?; v[i]}
      if invalid
        nil
      else
        process(vect)
      end
    end
  )
end

#probabilityObject

p-value of Fisher



130
131
132
# File 'lib/statsample/regression/multiple/baseengine.rb', line 130

def probability
  anova.probability
end

#process(v) ⇒ Object



240
241
242
243
244
245
246
247
# File 'lib/statsample/regression/multiple/baseengine.rb', line 240

def process(v)
  c=coeffs
  total=constant
  @fields.each_index{|i|
    total+=c[@fields[i]]*v[i]
  }
  total
end

#rObject

R Multiple



77
78
79
# File 'lib/statsample/regression/multiple/baseengine.rb', line 77

def r
  raise "You should implement this"
end

#r2_adjustedObject

R^2 Adjusted. Estimate Population R^2 usign Ezequiel formula. Always lower than sample R^2

Reference:

  • Leach, L. & Henson, R. (2007). The Use and Impact of Adjusted R2 Effects in Published Regression Research. Multiple Linear Regression Viewpoints, 33(1), 1-11.



89
90
91
# File 'lib/statsample/regression/multiple/baseengine.rb', line 89

def r2_adjusted
  r2-((1-r2)*@predictors_n).quo(df_e)
end

#report_building(b) ⇒ Object



185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/statsample/regression/multiple/baseengine.rb', line 185

def report_building(b)
  di="%0.#{digits}f"
  b.section(:name=>@name) do |g|
    c=coeffs
    g.text _("Engine: %s") % self.class
    g.text(_("Cases(listwise)=%d(%d)") % [@total_cases, @valid_cases])
    g.text _("R=")+(di % r)
    g.text _("R^2=")+(di % r2)
    g.text _("R^2 Adj=")+(di % r2_adjusted)
    g.text _("Std.Error R=")+ (di % se_estimate)
    
    g.text(_("Equation")+"="+ sprintf(di,constant) +" + "+ @fields.collect {|k| sprintf("#{di}%s",c[k],k)}.join(' + ') )
    
    g.parse_element(anova)
    sc=standarized_coeffs
    
    cse=coeffs_se
    g.table(:name=>_("Beta coefficients"), :header=>%w{coeff b beta se t}.collect{|field| _(field)} ) do |t|
				t.row([_("Constant"), sprintf(di, constant), "-", constant_se.nil? ? "": sprintf(di, constant_se), constant_t.nil? ? "" : sprintf(di, constant_t)])
      @fields.each do |f|
        t.row([f, sprintf(di, c[f]), sprintf(di, sc[f]), sprintf(di, cse[f]), sprintf(di, c[f].quo(cse[f]))])
      end  
    end
  end
end

#residualsObject

Retrieves a vector with residuals values for y



63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/statsample/regression/multiple/baseengine.rb', line 63

def residuals
  Daru::Vector.new(
    (0...@total_cases).collect do |i|
      invalid=false
      vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]}
      if invalid or @ds[@y_var][i].nil?
        nil
      else
        @ds[@y_var][i] - process(vect)
      end
    end
  )
end

#se_estimateObject

Standard error of estimate



41
42
43
# File 'lib/statsample/regression/multiple/baseengine.rb', line 41

def se_estimate
  Math::sqrt(sse.quo(df_e))
end

#se_r2Object

Estandar error of R^2 ????



159
160
161
# File 'lib/statsample/regression/multiple/baseengine.rb', line 159

def se_r2
  Math::sqrt((4*r2*(1-r2)**2*(df_e)**2).quo((@cases**2-1)*(@cases+3)))
end

#sseObject

Sum of squares (Error)



97
98
99
# File 'lib/statsample/regression/multiple/baseengine.rb', line 97

def sse
  sst - ssr
end

#sse_directObject



237
238
239
# File 'lib/statsample/regression/multiple/baseengine.rb', line 237

def sse_direct
  sst-ssr
end

#ssrObject

Sum of squares (regression)



93
94
95
# File 'lib/statsample/regression/multiple/baseengine.rb', line 93

def ssr
  r2*sst
end

#ssr_directObject

Sum of squares of regression using the predicted value minus y mean



222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
# File 'lib/statsample/regression/multiple/baseengine.rb', line 222

def ssr_direct
  mean=@dy.mean
  cases=0
  ssr=(0...@ds.cases).inject(0) {|a,i|
    invalid=false
    v=@dep_columns.collect{|c| invalid=true if c[i].nil?; c[i]}
    if !invalid
      cases+=1
      a+((process(v)-mean)**2)
    else
      a
    end
  }
  ssr
end

#sstObject

Sum of squares Total



81
82
83
# File 'lib/statsample/regression/multiple/baseengine.rb', line 81

def sst
  raise "You should implement this"
end

#standarized_predictedObject

Retrieves a vector with standarized values for y



59
60
61
# File 'lib/statsample/regression/multiple/baseengine.rb', line 59

def standarized_predicted
  predicted.standarized
end

#tolerance(var) ⇒ Object

Tolerance for a given variable talkstats.com/showthread.php?t=5056



135
136
137
138
139
140
# File 'lib/statsample/regression/multiple/baseengine.rb', line 135

def tolerance(var)
  ds = assign_names(@dep_columns)
  ds.each { |k,v| ds[k] = Daru::Vector.new(v) }
  lr = self.class.new(Daru::DataFrame.new(ds),var)
  1 - lr.r2
end