Class: Statsample::Crosstab

Inherits:
Object show all
Includes:
Summarizable
Defined in:
lib/statsample/crosstab.rb

Overview

Class to create crosstab of data With this, you can create reports and do chi square test The first vector will be at rows and the second will the the columns

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Summarizable

#summary

Constructor Details

#initialize(v1, v2, opts = Hash.new) ⇒ Crosstab

Returns a new instance of Crosstab.

Raises:

  • (ArgumentError)


10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/statsample/crosstab.rb', line 10

def initialize(v1, v2, opts=Hash.new)
  raise ArgumentError, "Vectors should be the same size" unless v1.size==v2.size
  @v_rows, @v_cols = Statsample.only_valid_clone(
    Daru::Vector.new(v1),
    Daru::Vector.new(v2))
  @cases          = @v_rows.size
  @row_label      = v1.name
  @column_label   = v2.name
  @name           = nil
  @percentage_row = @percentage_column = @percentage_total=false
  opts.each do |k,v|
    self.send("#{k}=",v) if self.respond_to? k
  end
  @name ||= _("Crosstab %s - %s") % [@row_label, @column_label]
end

Instance Attribute Details

#column_labelObject

Returns the value of attribute column_label.



9
10
11
# File 'lib/statsample/crosstab.rb', line 9

def column_label
  @column_label
end

#nameObject

Returns the value of attribute name.



9
10
11
# File 'lib/statsample/crosstab.rb', line 9

def name
  @name
end

#percentage_columnObject

Returns the value of attribute percentage_column.



9
10
11
# File 'lib/statsample/crosstab.rb', line 9

def percentage_column
  @percentage_column
end

#percentage_rowObject

Returns the value of attribute percentage_row.



9
10
11
# File 'lib/statsample/crosstab.rb', line 9

def percentage_row
  @percentage_row
end

#percentage_totalObject

Returns the value of attribute percentage_total.



9
10
11
# File 'lib/statsample/crosstab.rb', line 9

def percentage_total
  @percentage_total
end

#row_labelObject

Returns the value of attribute row_label.



9
10
11
# File 'lib/statsample/crosstab.rb', line 9

def row_label
  @row_label
end

#v_colsObject (readonly)

Returns the value of attribute v_cols.



8
9
10
# File 'lib/statsample/crosstab.rb', line 8

def v_cols
  @v_cols
end

#v_rowsObject (readonly)

Returns the value of attribute v_rows.



8
9
10
# File 'lib/statsample/crosstab.rb', line 8

def v_rows
  @v_rows
end

Instance Method Details

#chi_squareObject

Chi square, based on expected and real matrix



70
71
72
73
# File 'lib/statsample/crosstab.rb', line 70

def chi_square
  require 'statsample/test'
  Statsample::Test.chi_square(self.to_matrix, matrix_expected)
end

#cols_empty_hashObject



88
89
90
# File 'lib/statsample/crosstab.rb', line 88

def cols_empty_hash
  cols_names.inject({}) {|a,x| a[x]=0;a}
end

#cols_namesObject



28
29
30
# File 'lib/statsample/crosstab.rb', line 28

def cols_names
  @v_cols.factors.sort.reset_index!
end

#cols_totalObject



34
35
36
# File 'lib/statsample/crosstab.rb', line 34

def cols_total
  @v_cols.frequencies.to_h
end

#frequenciesObject



38
39
40
41
42
43
44
45
46
# File 'lib/statsample/crosstab.rb', line 38

def frequencies
  base = rows_names.inject([]) do |s,row| 
    s += cols_names.collect { |col| [row,col] }
  end.inject({}) do |s,par|
    s[par]=0
    s
  end
  base.update(Daru::Vector.new(Statsample::vector_cols_matrix(@v_rows,@v_cols).to_a).frequencies.to_h)
end

#frequencies_by_colObject



62
63
64
65
66
67
68
# File 'lib/statsample/crosstab.rb', line 62

def frequencies_by_col
  f=frequencies
  cols_names.inject({}){|sc,col| 
    sc[col]=rows_names.inject({}) {|sr,row| sr[row]=f[[row,col]]; sr}
    sc
  }
end

#frequencies_by_rowObject



55
56
57
58
59
60
61
# File 'lib/statsample/crosstab.rb', line 55

def frequencies_by_row
f=frequencies
rows_names.inject({}){|sr,row|
  sr[row]=cols_names.inject({}) {|sc,col| sc[col]=f[[row,col]]; sc}
  sr
}
end

#matrix_expectedObject

Useful to obtain chi square



75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/statsample/crosstab.rb', line 75

def matrix_expected
  rn=rows_names
  cn=cols_names
  rt=rows_total
  ct=cols_total
  t=@v_rows.size
  m=rn.collect{|row|
    cn.collect{|col|
      (rt[row]*ct[col]).quo(t) 
      }
  }
  Matrix.rows(m)
end

#report_building(builder) ⇒ Object



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/statsample/crosstab.rb', line 91

def report_building(builder)
  builder.section(:name=>@name) do |generator|
    fq=frequencies
    rn=rows_names
    cn=cols_names
    total=0
    total_cols=cols_empty_hash
    generator.text "Chi Square: #{chi_square}"
    generator.text(_("Rows: %s") % @row_label) unless @row_label.nil?
    generator.text(_("Columns: %s") % @column_label) unless @column_label.nil?
    
    t=ReportBuilder::Table.new(:name=>@name+" - "+_("Raw"), :header=>[""]+cols_names.collect {|c| @v_cols.index_of(c)}+[_("Total")])
    rn.each do |row|
      total_row=0
      t_row=[@v_rows.index_of(row)]
      cn.each do |col|
        data=fq[[row,col]]
        total_row+=fq[[row,col]]
        total+=fq[[row,col]]                    
        total_cols[col]+=fq[[row,col]]                    
        t_row.push(data)
      end
      t_row.push(total_row)
      t.row(t_row)
    end
    t.hr
    t_row=[_("Total")]
    cn.each do |v|
      t_row.push(total_cols[v])
    end
    t_row.push(total)
    t.row(t_row)
    generator.parse_element(t)
    
    if(@percentage_row)
      table_percentage(generator,:row)
    end
    if(@percentage_column)
    table_percentage(generator,:column)
    end
    if(@percentage_total)
    table_percentage(generator,:total)
    end
  end
end

#rows_namesObject



25
26
27
# File 'lib/statsample/crosstab.rb', line 25

def rows_names
  @v_rows.factors.sort.reset_index!
end

#rows_totalObject



31
32
33
# File 'lib/statsample/crosstab.rb', line 31

def rows_total
  @v_rows.frequencies.to_h
end

#table_percentage(generator, type) ⇒ Object



139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# File 'lib/statsample/crosstab.rb', line 139

def table_percentage(generator,type)
  fq=frequencies
  cn=cols_names
  rn=rows_names
  rt=rows_total
  ct=cols_total
  
  type_name=case type
    when :row     then  _("% Row")
    when :column  then  _("% Column")
    when :total   then  _("% Total")
  end
  
  t=ReportBuilder::Table.new(:name=>@name+" - "+_(type_name), :header=>[""]+cols_names.collect {|c| @v_cols.index_of(c) } + [_("Total")])
    rn.each do |row|
      t_row=[@v_rows.index_of(row)]
      cn.each do |col|
        total=case type
          when :row     then  rt[row]
          when :column  then  ct[col]
          when :total   then  @cases
        end
        data = sprintf("%0.2f%%", fq[[row,col]]*100.0/ total )
        t_row.push(data)
      end
      total=case type
        when :row     then  rt[row]
        when :column  then  @cases
        when :total   then  @cases
      end              
      t_row.push(sprintf("%0.2f%%", rt[row]*100.0/total))
      t.row(t_row)
    end
    
    t.hr
    t_row=[_("Total")]
    cn.each{|col|
      total=case type
        when :row     then  @cases
        when :column  then  ct[col]
        when :total   then  @cases
      end
      t_row.push(sprintf("%0.2f%%", ct[col]*100.0/total))
    }
  t_row.push("100%")
  t.row(t_row)
  generator.parse_element(t)
end

#to_matrixObject



47
48
49
50
51
52
53
54
# File 'lib/statsample/crosstab.rb', line 47

def to_matrix
  f  = frequencies
  rn = rows_names
  cn = cols_names
  Matrix.rows(rn.collect{|row|
      cn.collect{|col| f[[row,col]]}
  })
end