Class: Statsample::Graph::Histogram

Inherits:
Object
  • Object
show all
Includes:
Summarizable
Defined in:
lib/statsample/graph/histogram.rb

Overview

In statistics, a histogram is a graphical representation, showing a visual impression of the distribution of experimental data. It is an estimate of the probability distribution of a continuous variable and was first introduced by Karl Pearson [1]. A histogram consists of tabular frequencies, shown as adjacent rectangles, erected over discrete intervals (bins), with an area equal to the frequency of the observations in the interval. The height of a rectangle is also equal to the frequency density of the interval, i.e., the frequency divided by the width of the interval. The total area of the histogram is equal to the number of data.

Usage

Svg output

a = Daru::Vector.new() puts Statsample::Graph::Histogram.new(a).to_svg

Using ReportBuilder

a = Daru::Vector.new([1,2,3,4])
rb=ReportBuilder.new
rb.add(Statsample::Graph::Histogram.new(a))
rb.save_html('histogram.html')

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Summarizable

#summary

Constructor Details

#initialize(data, opts = Hash.new) ⇒ Histogram

data could be a vector or a histogram

[View source] [View on GitHub]

47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/statsample/graph/histogram.rb', line 47

def initialize(data, opts=Hash.new)
  prov_name=(data.respond_to?(:name)) ? data.name : ""
  opts_default={
    :name=>_("Histograma (%s)") % prov_name,
    :width=>400,
    :height=>300,
    :margin_top=>10,
    :margin_bottom=>20,
    :margin_left=>30,
    :margin_right=>20,
    :minimum_x=>nil,
    :maximum_x=>nil,
    :minimum_y=>nil,
    :maximum_y=>nil,
    :bins=>nil,
    :line_normal_distribution=>false
  }
  @opts=opts_default.merge(opts)
  opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
  @data=data
end

Instance Attribute Details

#binsObject

Could be an array of ranges or number of bins

[View on GitHub]

35
36
37
# File 'lib/statsample/graph/histogram.rb', line 35

def bins
  @bins
end

#heightObject

Total height

[View on GitHub]

24
25
26
# File 'lib/statsample/graph/histogram.rb', line 24

def height
  @height
end

#histObject (readonly)

Returns the value of attribute hist.

[View on GitHub]

33
34
35
# File 'lib/statsample/graph/histogram.rb', line 33

def hist
  @hist
end

#line_normal_distributionObject

Add a line showing normal distribution

[View on GitHub]

45
46
47
# File 'lib/statsample/graph/histogram.rb', line 45

def line_normal_distribution
  @line_normal_distribution
end

#margin_bottomObject

Bottom margin

[View on GitHub]

28
29
30
# File 'lib/statsample/graph/histogram.rb', line 28

def margin_bottom
  @margin_bottom
end

#margin_leftObject

Left margin

[View on GitHub]

30
31
32
# File 'lib/statsample/graph/histogram.rb', line 30

def margin_left
  @margin_left
end

#margin_rightObject

Right margin

[View on GitHub]

32
33
34
# File 'lib/statsample/graph/histogram.rb', line 32

def margin_right
  @margin_right
end

#margin_topObject

Top margin

[View on GitHub]

26
27
28
# File 'lib/statsample/graph/histogram.rb', line 26

def margin_top
  @margin_top
end

#maximum_xObject

Maximum value on x axis. Calculated automaticly from data if not set

[View on GitHub]

39
40
41
# File 'lib/statsample/graph/histogram.rb', line 39

def maximum_x
  @maximum_x
end

#maximum_yObject

Maximum value on y axis. Calculated automaticly from data if not set.

[View on GitHub]

43
44
45
# File 'lib/statsample/graph/histogram.rb', line 43

def maximum_y
  @maximum_y
end

#minimum_xObject

Minimum value on x axis. Calculated automaticly from data if not set

[View on GitHub]

37
38
39
# File 'lib/statsample/graph/histogram.rb', line 37

def minimum_x
  @minimum_x
end

#minimum_yObject

Minimum value on y axis. Set to 0 if not set

[View on GitHub]

41
42
43
# File 'lib/statsample/graph/histogram.rb', line 41

def minimum_y
  @minimum_y
end

#nameObject

Histogram name

[View on GitHub]

20
21
22
# File 'lib/statsample/graph/histogram.rb', line 20

def name
  @name
end

#widthObject

Total width

[View on GitHub]

22
23
24
# File 'lib/statsample/graph/histogram.rb', line 22

def width
  @width
end

Instance Method Details

#pre_visObject

:nodoc:

[View source] [View on GitHub]

68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/statsample/graph/histogram.rb', line 68

def pre_vis # :nodoc:
  if @data.is_a? Statsample::Histogram
    @hist=@data
    @mean=@hist.estimated_mean
    @sd=@hist.estimated_standard_deviation
  elsif @data.is_a? Daru::Vector
    @mean=@data.mean
    @sd=@data.sd
    @bins||=Math::sqrt(@data.size).floor
    @hist=@data.histogram(@bins)
  end
end

#report_building(builder) ⇒ Object

:nodoc:

[View source] [View on GitHub]

181
182
183
184
185
# File 'lib/statsample/graph/histogram.rb', line 181

def report_building(builder) # :nodoc:
  builder.section(:name=>name) do |b|
    b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
  end
end

#report_building_text(generator) ⇒ Object

[View source] [View on GitHub]

186
187
188
189
190
191
192
193
194
195
# File 'lib/statsample/graph/histogram.rb', line 186

def report_building_text(generator)
  pre_vis
  #anchor=generator.toc_entry(_("Histogram %s") % [@name])
  step=  @hist.max_val > 40 ? ( @hist.max_val / 40).ceil : 1
    
  @hist.range.each_with_index do |r,i|
    next if i==@hist.bins
    generator.text(sprintf("%5.2f : %s", r, "*" * (@hist.bin[i] / step).floor ))
  end
end

#rubyvis_normal_distribution(pan) ⇒ Object

[View source] [View on GitHub]

80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/statsample/graph/histogram.rb', line 80

def rubyvis_normal_distribution(pan)
  x_scale=@x_scale
  y_scale=@y_scale
  
  wob = @hist.get_range(0)[1] - @hist.get_range(0)[0]
  
  nob = ((@maximum_x-@minimum_x) / wob.to_f).floor
  sum=@hist.sum
  
  data=nob.times.map {|i|
    l=@minimum_x+i*wob
    r=@minimum_x+(i+1)*wob          
    middle=(l+r) / 2.0
    pi=Distribution::Normal.cdf((r-@mean) / @sd) - Distribution::Normal.cdf((l-@mean) / @sd)
    {:x=>middle, :y=>pi*sum}
  }
  pan.line do |l|
    l.data data
    l.interpolate "cardinal"
    l.stroke_style "black"
    l.bottom {|d| y_scale[d[:y]]}
    l.left {|d| x_scale[d[:x]]}
  end
  
end

#rubyvis_panelObject

Returns a Rubyvis panel with scatterplot

[View source] [View on GitHub]

106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/statsample/graph/histogram.rb', line 106

def rubyvis_panel # :nodoc:
  pre_vis
  #that=self
  
  @minimum_x||=@hist.min
  @maximum_x||=@hist.max
  @minimum_y||=0
  @maximum_y||=@hist.max_val
  
  margin_hor=margin_left + margin_right
  margin_vert=margin_top  + margin_bottom

  x_scale = pv.Scale.linear(@minimum_x, @maximum_x).range(0, width - margin_hor)

  y_scale=Rubyvis::Scale.linear(@minimum_y, @maximum_y).range(0, height - margin_vert)
  
  y_scale.nice
  
  bins=@hist.bins.times.map {|i|
    {
     :low =>@hist.get_range(i)[0],
     :high=>@hist.get_range(i)[1],
     :value=>@hist.bin[i]
    }
  }
  @x_scale=x_scale
  @y_scale=y_scale
  # cache data
  vis=Rubyvis::Panel.new do |pan| 
    pan.width  width  - margin_hor
    pan.height height - margin_vert
    pan.bottom margin_bottom
    pan.left   margin_left
    pan.right  margin_right
    pan.top    margin_top
     # Y axis
    pan.rule do
      data y_scale.ticks
      bottom y_scale
      stroke_style {|d| d!=0 ? "#eee" : "#000"}
      label(:anchor=>'left') do
        text y_scale.tick_format
      end
    end
    # X axis
    pan.rule do
      data x_scale.ticks
      left x_scale
      stroke_style "black"
      height 5
      bottom(-5)
      label(:anchor=>'bottom') do
        text x_scale.tick_format
      end
    end
   
    pan.bar do |bar|
      bar.data(bins)
      bar.left {|v| x_scale[v[:low]]}
      bar.width {|v| x_scale[v[:high]] - x_scale[v[:low]]}
      bar.bottom 0
      bar.height {|v| y_scale[v[:value]]}
      bar.stroke_style "black"
      bar.line_width 1
    end
     rubyvis_normal_distribution(pan) if @line_normal_distribution
  end
  vis
end

#to_svgObject

Returns SVG with scatterplot

[View source] [View on GitHub]

176
177
178
179
180
# File 'lib/statsample/graph/histogram.rb', line 176

def to_svg
  rp=rubyvis_panel
  rp.render
  rp.to_svg
end