Module: Glymour::Statistics

Included in:
Glymour::StructureLearning::LearningNet
Defined in:
lib/stats_module.rb

Defined Under Namespace

Classes: Variable, VariableContainer

Instance Method Summary collapse

Instance Method Details

#coindependent?(p_val, *variables) ⇒ Boolean

Takes two or more Variables Returns true if first two variables are coindependent given the rest

Returns:

  • (Boolean)


66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/stats_module.rb', line 66

def coindependent?(p_val, *variables)
  #TODO: Raise an exception if variables have different tables?
  R.echo(false)
  # Push variable data into R
  variables.each do |var|
    # Rinruby can't handle true and false values, so use 1 and 0 resp. instead
    sanitized_values = var.values.map do |value|
      case value
        when true  then 1
        when false then 0
        else value
      end
    end
    
    R.assign var.name, sanitized_values
  end
  
  R.eval <<-EOF
    cond_data <- data.frame(#{variables.map(&:name).join(', ')})
    t <-table(cond_data)
  EOF
  
  cond_vars = variables[2..(variables.length-1)]
  
  # If no conditioning variables are given, just return the chi square test for the first two
  if cond_vars.empty?
    R.eval "chisq <- chisq.test(t)"
    observed_p = R.pull "chisq$p.value"
    return observed_p > p_val
  end
  
  cond_values = cond_vars.map { |var| (1..var.values.uniq.length).collect }
  
  # Find the chi-squared statistic for every state of the conditioning variables and sum them
  chisq_sum = 0
  df = 0
  cond_values.inject!(&:product).map(&:flatten)
  cond_values.each do |value|
    R.eval <<-EOF
      partial_table <- t[,,#{value.join(',')}]
      table_without_zero_columns <- partial_table[,-(which(colSums(partial_table) == 0))]
      chisq <- chisq.test(table_without_zero_columns)
      s <- chisq$statistic
    EOF
    
    observed_s = R.pull("s").to_f
    chisq_sum += observed_s
    df += R.pull("chisq$parameter").to_i
  end
  # Compute the p-value of the sum of statistics
  observed_p = 1 - R.pull("pchisq(#{chisq_sum}, #{df})").to_f
  observed_p > p_val
end