Module: FeldtRuby::Statistics::Plotting

Included in:: RCommunicator

Defined in:: lib/feldtruby/statistics.rb

Overview

Plotting data sets in R with ggplot2 and save them to files.

Constant Summary collapse

GfxFormatToGfxParams =

{
  "pdf" => {:width => 7, :height => 5, :paper => 'special'},
  "png" => {:units => "cm", :width => 12, :height => 8},
  "tiff" => {:units => "cm", :width => 12, :height => 8},
}

Instance Method Summary collapse

#density_tile2d(csvFilePath, xlabel, ylabel, title = "densitytile2d") ⇒ Object
#filled_contour(csvFilePath, xlabel, ylabel, title = "filled.contour") ⇒ Object
#gfx_device(format, width = nil, height = nil) ⇒ Object
#ggplot2_setup_and_theme ⇒ Object
#hash_to_R_params(hash) ⇒ Object

Map a ruby hash of objects to parameters in R code/script.
#hexbin_heatmap(csvFilePath, xlabel, ylabel, title = "heatmap", bins = 50) ⇒ Object
#load_csv_files_as_data(dataMap, columnName = nil) ⇒ Object

Data can be specified in two ways, either directly in Ruby arrays, or as strings with the path to a csv file to be loaded.
#overlaid_densities(dataMap, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density") ⇒ Object

Overlaid density graph of the observations (sampled distributions) in data1 and data2.
#overlaid_densities_from_csv_files(columnName, csvFiles, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density") ⇒ Object

Plot the densities of the data found in the column named columnName in the csv files in csvFiles.
#plot_2dims(csvFilePath, plotCommand, xlabel, ylabel, title = "scatterplot") ⇒ Object
#save_graph(filename, width = nil, height = nil) ⇒ Object

Wrap a sve_graph call around a block that draws a diagram and this will save the diagram to a file.
#scatter_plot(csvFilePath, xlabel, ylabel, title = "scatterplot") ⇒ Object
#smooth_scatter_plot(csvFilePath, xlabel, ylabel, title = "smoothscatter") ⇒ Object

Instance Method Details

#density_tile2d(csvFilePath, xlabel, ylabel, title = "densitytile2d") ⇒ `Object`

# File 'lib/feldtruby/statistics.rb', line 359

def density_tile2d(csvFilePath, xlabel, ylabel, title = "densitytile2d")

  script = <<-EOS
    f <- ggplot(data, aes(x=#{xlabel}, y=#{ylabel}))
    f <- f + stat_density2d(geom="tile", aes(fill=..density..), contour=FALSE) + scale_fill_gradient(high="red", low="white")
  EOS

  plot_2dims(csvFilePath, script, xlabel.to_s, ylabel.to_s, title)

end

#filled_contour(csvFilePath, xlabel, ylabel, title = "filled.contour") ⇒ `Object`

# File 'lib/feldtruby/statistics.rb', line 275

def filled_contour(csvFilePath, xlabel, ylabel, title = "filled.contour")
  include_library "MASS"

  script = <<-EOS
    data <- read.csv(#{csvFilePath.inspect})
    k <- with(data, MASS::kde2d(#{xlabel}, #{ylabel}))
    f <- filled.contour(k, color=topo.colors, 
           plot.title=title(main = _title_),
           xlab=_xlabel_, ylab=_ylabel_)
    f
  EOS

  subst_eval script, {:title => title,
    :xlabel => xlabel.to_s, :ylabel => ylabel.to_s}

end

#gfx_device(format, width = nil, height = nil) ⇒ `Object`

# File 'lib/feldtruby/statistics.rb', line 236

def gfx_device(format, width = nil, height = nil)

  format = format.to_s            # If given as a symbol instead of a string

  unless GfxFormatToGfxParams.has_key?(format)
    raise ArgumentError.new("Don't now about gfx format #{format}")
  end

  params = GfxFormatToGfxParams[format]

  "#{format}(#{hash_to_R_params(params)})"

end

#ggplot2_setup_and_theme ⇒ `Object`

# File 'lib/feldtruby/statistics.rb', line 398

def ggplot2_setup_and_theme

  include_library("ggplot2")
  include_library("reshape2")

  script = <<-EOS
    f <- f + ggtitle(_title_) + xlab(_xlabel_) + ylab(_ylabel_)
    f <- f + theme_bw()
    f <- f + theme(
            plot.title = element_text(face="bold", size=12), 
            axis.title.x = element_text(face="bold", size=10),
            axis.title.y = element_text(face="bold", size=10)
          )
  EOS

end

#hash_to_R_params(hash) ⇒ `Object`

Map a ruby hash of objects to parameters in R code/script.

# File 'lib/feldtruby/statistics.rb', line 251

def hash_to_R_params(hash)

  hash.keys.sort.map do |key|

    "#{key.to_s} = #{ruby_object_to_R_string(hash[key])}"

  end.join(", ")

end

#hexbin_heatmap(csvFilePath, xlabel, ylabel, title = "heatmap", bins = 50) ⇒ `Object`

# File 'lib/feldtruby/statistics.rb', line 303

def hexbin_heatmap(csvFilePath, xlabel, ylabel, title = "heatmap", bins = 50)
  include_library "grid"
  include_library "lattice"
  include_library "hexbin"

  plot_2dims(csvFilePath,
    "f <- ggplot(data, aes(#{xlabel}, #{ylabel})) + geom_hex( bins = #{bins} )",
    xlabel, ylabel, title)
end

#load_csv_files_as_data(dataMap, columnName = nil) ⇒ `Object`

Data can be specified in two ways, either directly in Ruby arrays, or as strings with the path to a csv file to be loaded. In the latter case a column name must be given.

# File 'lib/feldtruby/statistics.rb', line 328

def load_csv_files_as_data(dataMap, columnName = nil)

  keys = dataMap.keys.sort

  read_csvs = ""
  data_frame = "data.frame(1:length(d_#{keys.first})"

  keys.each_with_index do |key, i|

    value = dataMap[key]

    set_name = "d_#{key}"
    
    read_csvs += "#{set_name} <- "

    if Array === value
      read_csvs += (ruby_object_to_R_string(value) + ";\n")
      data_frame += ", #{key} = #{set_name}"
    else
      read_csvs += "read.csv(#{value.inspect});\n"
      data_frame += ", #{key} = #{set_name}$#{columnName}"
    end

  end

  data_frame += ")"

  script = "#{read_csvs}data <- #{data_frame};"

end

#overlaid_densities(dataMap, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density") ⇒ `Object`

Overlaid density graph of the observations (sampled distributions) in data1 and data2. The dataMap maps the name of each data series to an array with its observations.

# File 'lib/feldtruby/statistics.rb', line 418

def overlaid_densities(dataMap, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density")

  cardinalities = dataMap.values.map {|vs| vs.length}.uniq

  unless cardinalities.length == 1

    raise ArgumentError.new("Must have same cardinality")

  end

  script = <<-EOS
    df <- data.frame(index = (1:#{cardinalities.first}), #{hash_to_R_params(dataMap)})
    df.m <- melt(df, id = "index")
    names(df.m)[2] <- _datasetsName_
    f <- ggplot(df.m, aes(value, fill=#{datasetsName}))
    f <- f + geom_density(alpha = 0.2, size = 0.5) + scale_color_brewer()
    #{ggplot2_setup_and_theme()}
    f
  EOS

  subst_eval script, {:title => title, :datasetsName => datasetsName,
    :xlabel => xlabel, :ylabel => ylabel}

end

#overlaid_densities_from_csv_files(columnName, csvFiles, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density") ⇒ `Object`

Plot the densities of the data found in the column named columnName in the csv files in csvFiles.

# File 'lib/feldtruby/statistics.rb', line 445

def overlaid_densities_from_csv_files(columnName, csvFiles, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density")

  load_csvs = load_csv_files_as_data csvFiles

  script = <<-EOS
    #{load_csvs}
    #df <- data.frame(index = (1:#{cardinalities.first}), #{hash_to_R_params(dataMap)})
    df.m <- melt(df, id = "index")
    names(df.m)[2] <- _datasetsName_
    f <- ggplot(df.m, aes(value, fill=#{datasetsName}))
    f <- f + geom_density(alpha = 0.2, size = 0.5) + scale_color_brewer()
    #{ggplot2_setup_and_theme()}
    f
  EOS

  puts script
  subst_eval script, {:title => title, :datasetsName => datasetsName,
    :xlabel => xlabel, :ylabel => ylabel}

end

#plot_2dims(csvFilePath, plotCommand, xlabel, ylabel, title = "scatterplot") ⇒ `Object`

# File 'lib/feldtruby/statistics.rb', line 261

def plot_2dims(csvFilePath, plotCommand, xlabel, ylabel, title = "scatterplot")

  script = <<-EOS
    data <- read.csv(#{csvFilePath.inspect})
    #{plotCommand}
    #{ggplot2_setup_and_theme()}
    f
  EOS

  subst_eval script, {:title => title,
    :xlabel => xlabel, :ylabel => ylabel}

end

#save_graph(filename, width = nil, height = nil) ⇒ `Object`

Wrap a sve_graph call around a block that draws a diagram and this will save the diagram to a file. The filetype is given by the file ending of the file name.

# File 'lib/feldtruby/statistics.rb', line 379

def save_graph(filename, width = nil, height = nil)

  file_ending = filename.split(".").last

  raise "Don't now about graphics format #{file_ending}" unless GfxFormatToGfxParams.has_key?(file_ending)

  params = GfxFormatToGfxParams[file_ending].clone

  params[:width] = width if width
  params[:height] = height if height

  RC.eval("#{file_ending}(#{filename.inspect}, #{hash_to_R_params(params)})")

  yield() # Just be sure not to nest these save_graph calls within each other...

  RC.eval("dev.off()")

end

#scatter_plot(csvFilePath, xlabel, ylabel, title = "scatterplot") ⇒ `Object`

# File 'lib/feldtruby/statistics.rb', line 313

def scatter_plot(csvFilePath, xlabel, ylabel, title = "scatterplot")

  script = <<-EOS
    smoothing_method <- if(nrow(data) > 1000) {'gam'} else {'loess'}
    f <- ggplot(data, aes(#{xlabel}, #{ylabel})) + geom_point(shape = 1)
    f <- f + stat_smooth(method = smoothing_method)
  EOS

  plot_2dims(csvFilePath, script, xlabel.to_s, ylabel.to_s, title)

end

#smooth_scatter_plot(csvFilePath, xlabel, ylabel, title = "smoothscatter") ⇒ `Object`

# File 'lib/feldtruby/statistics.rb', line 292

def smooth_scatter_plot(csvFilePath, xlabel, ylabel, title = "smoothscatter")
  include_library "graphics"

  script = <<-EOS
    f <- ggplot(data, aes(#{xlabel}, #{ylabel})) +
           geom_point() + geom_smooth( method="loess", se = FALSE )
  EOS

  plot_2dims(csvFilePath, script, xlabel.to_s, ylabel.to_s, title)
end

Module: FeldtRuby::Statistics::Plotting

Overview

Constant Summary collapse

Instance Method Summary collapse

Instance Method Details

#density_tile2d(csvFilePath, xlabel, ylabel, title = "densitytile2d") ⇒ Object

#filled_contour(csvFilePath, xlabel, ylabel, title = "filled.contour") ⇒ Object

#gfx_device(format, width = nil, height = nil) ⇒ Object

#ggplot2_setup_and_theme ⇒ Object

#hash_to_R_params(hash) ⇒ Object

#hexbin_heatmap(csvFilePath, xlabel, ylabel, title = "heatmap", bins = 50) ⇒ Object

#load_csv_files_as_data(dataMap, columnName = nil) ⇒ Object

#overlaid_densities(dataMap, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density") ⇒ Object

#overlaid_densities_from_csv_files(columnName, csvFiles, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density") ⇒ Object

#plot_2dims(csvFilePath, plotCommand, xlabel, ylabel, title = "scatterplot") ⇒ Object

#save_graph(filename, width = nil, height = nil) ⇒ Object

#scatter_plot(csvFilePath, xlabel, ylabel, title = "scatterplot") ⇒ Object

#smooth_scatter_plot(csvFilePath, xlabel, ylabel, title = "smoothscatter") ⇒ Object