Class: Statsample::Crosstab

Inherits:
Object show all
Includes:
Summarizable
Defined in:
lib/statsample/crosstab.rb

Overview

Class to create crosstab of data With this, you can create reports and do chi square test The first vector will be at rows and the second will the the columns

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Summarizable

#summary

Constructor Details

#initialize(v1, v2, opts = Hash.new) ⇒ Crosstab

Returns a new instance of Crosstab.

Raises:

  • (ArgumentError)
[View source] [View on GitHub]

10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/statsample/crosstab.rb', line 10

def initialize(v1, v2, opts=Hash.new)
  raise ArgumentError, "Vectors should be the same size" unless v1.size==v2.size
  @v_rows, @v_cols = Statsample.only_valid_clone(
    Daru::Vector.new(v1),
    Daru::Vector.new(v2))
  @cases          = @v_rows.size
  @row_label      = v1.name
  @column_label   = v2.name
  @name           = nil
  @percentage_row = @percentage_column = @percentage_total=false
  opts.each do |k,v|
    self.send("#{k}=",v) if self.respond_to? k
  end
  @name ||= _("Crosstab %s - %s") % [@row_label, @column_label]
end

Instance Attribute Details

#column_labelObject

Returns the value of attribute column_label.

[View on GitHub]

9
10
11
# File 'lib/statsample/crosstab.rb', line 9

def column_label
  @column_label
end

#nameObject

Returns the value of attribute name.

[View on GitHub]

9
10
11
# File 'lib/statsample/crosstab.rb', line 9

def name
  @name
end

#percentage_columnObject

Returns the value of attribute percentage_column.

[View on GitHub]

9
10
11
# File 'lib/statsample/crosstab.rb', line 9

def percentage_column
  @percentage_column
end

#percentage_rowObject

Returns the value of attribute percentage_row.

[View on GitHub]

9
10
11
# File 'lib/statsample/crosstab.rb', line 9

def percentage_row
  @percentage_row
end

#percentage_totalObject

Returns the value of attribute percentage_total.

[View on GitHub]

9
10
11
# File 'lib/statsample/crosstab.rb', line 9

def percentage_total
  @percentage_total
end

#row_labelObject

Returns the value of attribute row_label.

[View on GitHub]

9
10
11
# File 'lib/statsample/crosstab.rb', line 9

def row_label
  @row_label
end

#v_colsObject (readonly)

Returns the value of attribute v_cols.

[View on GitHub]

8
9
10
# File 'lib/statsample/crosstab.rb', line 8

def v_cols
  @v_cols
end

#v_rowsObject (readonly)

Returns the value of attribute v_rows.

[View on GitHub]

8
9
10
# File 'lib/statsample/crosstab.rb', line 8

def v_rows
  @v_rows
end

Instance Method Details

#chi_squareObject

Chi square, based on expected and real matrix

[View source] [View on GitHub]

70
71
72
73
# File 'lib/statsample/crosstab.rb', line 70

def chi_square
  require 'statsample/test'
  Statsample::Test.chi_square(self.to_matrix, matrix_expected)
end

#cols_empty_hashObject

[View source] [View on GitHub]

88
89
90
# File 'lib/statsample/crosstab.rb', line 88

def cols_empty_hash
  cols_names.inject({}) {|a,x| a[x]=0;a}
end

#cols_namesObject

[View source] [View on GitHub]

28
29
30
# File 'lib/statsample/crosstab.rb', line 28

def cols_names
  @v_cols.factors.sort.reset_index!
end

#cols_totalObject

[View source] [View on GitHub]

34
35
36
# File 'lib/statsample/crosstab.rb', line 34

def cols_total
  @v_cols.frequencies.to_h
end

#frequenciesObject

[View source] [View on GitHub]

38
39
40
41
42
43
44
45
46
# File 'lib/statsample/crosstab.rb', line 38

def frequencies
  base = rows_names.inject([]) do |s,row| 
    s += cols_names.collect { |col| [row,col] }
  end.inject({}) do |s,par|
    s[par]=0
    s
  end
  base.update(Daru::Vector.new(Statsample::vector_cols_matrix(@v_rows,@v_cols).to_a).frequencies.to_h)
end

#frequencies_by_colObject

[View source] [View on GitHub]

62
63
64
65
66
67
68
# File 'lib/statsample/crosstab.rb', line 62

def frequencies_by_col
  f=frequencies
  cols_names.inject({}){|sc,col| 
    sc[col]=rows_names.inject({}) {|sr,row| sr[row]=f[[row,col]]; sr}
    sc
  }
end

#frequencies_by_rowObject

[View source] [View on GitHub]

55
56
57
58
59
60
61
# File 'lib/statsample/crosstab.rb', line 55

def frequencies_by_row
f=frequencies
rows_names.inject({}){|sr,row|
  sr[row]=cols_names.inject({}) {|sc,col| sc[col]=f[[row,col]]; sc}
  sr
}
end

#matrix_expectedObject

Useful to obtain chi square

[View source] [View on GitHub]

75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/statsample/crosstab.rb', line 75

def matrix_expected
  rn=rows_names
  cn=cols_names
  rt=rows_total
  ct=cols_total
  t=@v_rows.size
  m=rn.collect{|row|
    cn.collect{|col|
      (rt[row]*ct[col]).quo(t) 
      }
  }
  Matrix.rows(m)
end

#report_building(builder) ⇒ Object

[View source] [View on GitHub]

91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/statsample/crosstab.rb', line 91

def report_building(builder)
  builder.section(:name=>@name) do |generator|
    fq=frequencies
    rn=rows_names
    cn=cols_names
    total=0
    total_cols=cols_empty_hash
    generator.text "Chi Square: #{chi_square}"
    generator.text(_("Rows: %s") % @row_label) unless @row_label.nil?
    generator.text(_("Columns: %s") % @column_label) unless @column_label.nil?
    
    t=ReportBuilder::Table.new(:name=>@name+" - "+_("Raw"), :header=>[""]+cols_names.collect {|c| @v_cols.index_of(c)}+[_("Total")])
    rn.each do |row|
      total_row=0
      t_row=[@v_rows.index_of(row)]
      cn.each do |col|
        data=fq[[row,col]]
        total_row+=fq[[row,col]]
        total+=fq[[row,col]]                    
        total_cols[col]+=fq[[row,col]]                    
        t_row.push(data)
      end
      t_row.push(total_row)
      t.row(t_row)
    end
    t.hr
    t_row=[_("Total")]
    cn.each do |v|
      t_row.push(total_cols[v])
    end
    t_row.push(total)
    t.row(t_row)
    generator.parse_element(t)
    
    if(@percentage_row)
      table_percentage(generator,:row)
    end
    if(@percentage_column)
    table_percentage(generator,:column)
    end
    if(@percentage_total)
    table_percentage(generator,:total)
    end
  end
end

#rows_namesObject

[View source] [View on GitHub]

25
26
27
# File 'lib/statsample/crosstab.rb', line 25

def rows_names
  @v_rows.factors.sort.reset_index!
end

#rows_totalObject

[View source] [View on GitHub]

31
32
33
# File 'lib/statsample/crosstab.rb', line 31

def rows_total
  @v_rows.frequencies.to_h
end

#table_percentage(generator, type) ⇒ Object

[View source] [View on GitHub]

139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# File 'lib/statsample/crosstab.rb', line 139

def table_percentage(generator,type)
  fq=frequencies
  cn=cols_names
  rn=rows_names
  rt=rows_total
  ct=cols_total
  
  type_name=case type
    when :row     then  _("% Row")
    when :column  then  _("% Column")
    when :total   then  _("% Total")
  end
  
  t=ReportBuilder::Table.new(:name=>@name+" - "+_(type_name), :header=>[""]+cols_names.collect {|c| @v_cols.index_of(c) } + [_("Total")])
    rn.each do |row|
      t_row=[@v_rows.index_of(row)]
      cn.each do |col|
        total=case type
          when :row     then  rt[row]
          when :column  then  ct[col]
          when :total   then  @cases
        end
        data = sprintf("%0.2f%%", fq[[row,col]]*100.0/ total )
        t_row.push(data)
      end
      total=case type
        when :row     then  rt[row]
        when :column  then  @cases
        when :total   then  @cases
      end              
      t_row.push(sprintf("%0.2f%%", rt[row]*100.0/total))
      t.row(t_row)
    end
    
    t.hr
    t_row=[_("Total")]
    cn.each{|col|
      total=case type
        when :row     then  @cases
        when :column  then  ct[col]
        when :total   then  @cases
      end
      t_row.push(sprintf("%0.2f%%", ct[col]*100.0/total))
    }
  t_row.push("100%")
  t.row(t_row)
  generator.parse_element(t)
end

#to_matrixObject

[View source] [View on GitHub]

47
48
49
50
51
52
53
54
# File 'lib/statsample/crosstab.rb', line 47

def to_matrix
  f  = frequencies
  rn = rows_names
  cn = cols_names
  Matrix.rows(rn.collect{|row|
      cn.collect{|col| f[[row,col]]}
  })
end