Module: PubliSci::Analyzer

Included in:
DataSet::ORM::DataCube, DataSet::ORM::DataCube, Readers::Base, Writers::Base
Defined in:
lib/bio-publisci/analyzer.rb

Overview

handles analysis of R expression to extract properties and recognize potential ambiguity

Instance Method Summary collapse

Instance Method Details

#check_integrity(obs, dimensions, measures) ⇒ Object



50
51
52
53
54
55
# File 'lib/bio-publisci/analyzer.rb', line 50

def check_integrity(obs, dimensions, measures)
  obs.map{|o|
      raise "MissingValues for #{(dimensions | measures) - o.keys}" unless ((dimensions | measures) - o.keys).empty?
      raise "UnknownProperty #{o.keys - (dimensions | measures)}" unless (o.keys - (dimensions | measures)).empty?
  }
end

#dirty?(data) ⇒ Boolean

Returns:

  • (Boolean)


6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/bio-publisci/analyzer.rb', line 6

def dirty?(data)
  if data.is_a? Hash
    data.map{|k,v|
      return true if dirty?(k) || dirty?(v)
    }
    false
  elsif data.is_a? Array
    data.map{|datum|
      return true if dirty?(datum)
    }
  else
    dirty_characters = [".",' ']
    if data.to_s.scan(/./) & dirty_characters
      true
    else
      false
    end
  end
end

#recommend_range(data) ⇒ Object



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/bio-publisci/analyzer.rb', line 26

def recommend_range(data)
  classes = data.map{|d| d.class}
  homogenous = classes.uniq.size == 1
  if homogenous
    if classes[0] == Fixnum
      "xsd:int"
    elsif classes[0] == Float
      "xsd:double"
    elsif classes[0] == String
      recommend_range_strings(data)
    else
      :coded
    end
  else
    :coded
  end
end

#recommend_range_strings(data) ⇒ Object



44
45
46
47
48
# File 'lib/bio-publisci/analyzer.rb', line 44

def recommend_range_strings(data)
  return "xsd:int" if data.all?{|d| Integer(d) rescue nil}
  return "xsd:int" if data.all?{|d| Float(d) rescue nil}
  :coded
end