Module: FeldtRuby::Statistics::Plotting
- Included in:
- RCommunicator
- Defined in:
- lib/feldtruby/statistics.rb
Overview
Plotting data sets in R with ggplot2 and save them to files.
Constant Summary collapse
- GfxFormatToGfxParams =
{ "pdf" => {:width => 7, :height => 5, :paper => 'special'}, "png" => {:units => "cm", :width => 12, :height => 8}, "tiff" => {:units => "cm", :width => 12, :height => 8}, }
Instance Method Summary collapse
- #density_tile2d(csvFilePath, xlabel, ylabel, title = "densitytile2d") ⇒ Object
- #filled_contour(csvFilePath, xlabel, ylabel, title = "filled.contour") ⇒ Object
- #gfx_device(format, width = nil, height = nil) ⇒ Object
- #ggplot2_setup_and_theme ⇒ Object
-
#hash_to_R_params(hash) ⇒ Object
Map a ruby hash of objects to parameters in R code/script.
- #hexbin_heatmap(csvFilePath, xlabel, ylabel, title = "heatmap", bins = 50) ⇒ Object
-
#load_csv_files_as_data(dataMap, columnName = nil) ⇒ Object
Data can be specified in two ways, either directly in Ruby arrays, or as strings with the path to a csv file to be loaded.
-
#overlaid_densities(dataMap, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density") ⇒ Object
Overlaid density graph of the observations (sampled distributions) in data1 and data2.
-
#overlaid_densities_from_csv_files(columnName, csvFiles, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density") ⇒ Object
Plot the densities of the data found in the column named columnName in the csv files in csvFiles.
- #plot_2dims(csvFilePath, plotCommand, xlabel, ylabel, title = "scatterplot") ⇒ Object
-
#save_graph(filename, width = nil, height = nil) ⇒ Object
Wrap a sve_graph call around a block that draws a diagram and this will save the diagram to a file.
- #scatter_plot(csvFilePath, xlabel, ylabel, title = "scatterplot") ⇒ Object
- #smooth_scatter_plot(csvFilePath, xlabel, ylabel, title = "smoothscatter") ⇒ Object
Instance Method Details
#density_tile2d(csvFilePath, xlabel, ylabel, title = "densitytile2d") ⇒ Object
359 360 361 362 363 364 365 366 367 368 |
# File 'lib/feldtruby/statistics.rb', line 359 def density_tile2d(csvFilePath, xlabel, ylabel, title = "densitytile2d") script = <<-EOS f <- ggplot(data, aes(x=#{xlabel}, y=#{ylabel})) f <- f + stat_density2d(geom="tile", aes(fill=..density..), contour=FALSE) + scale_fill_gradient(high="red", low="white") EOS plot_2dims(csvFilePath, script, xlabel.to_s, ylabel.to_s, title) end |
#filled_contour(csvFilePath, xlabel, ylabel, title = "filled.contour") ⇒ Object
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 |
# File 'lib/feldtruby/statistics.rb', line 275 def filled_contour(csvFilePath, xlabel, ylabel, title = "filled.contour") include_library "MASS" script = <<-EOS data <- read.csv(#{csvFilePath.inspect}) k <- with(data, MASS::kde2d(#{xlabel}, #{ylabel})) f <- filled.contour(k, color=topo.colors, plot.title=title(main = _title_), xlab=_xlabel_, ylab=_ylabel_) f EOS subst_eval script, {:title => title, :xlabel => xlabel.to_s, :ylabel => ylabel.to_s} end |
#gfx_device(format, width = nil, height = nil) ⇒ Object
236 237 238 239 240 241 242 243 244 245 246 247 248 |
# File 'lib/feldtruby/statistics.rb', line 236 def gfx_device(format, width = nil, height = nil) format = format.to_s # If given as a symbol instead of a string unless GfxFormatToGfxParams.has_key?(format) raise ArgumentError.new("Don't now about gfx format #{format}") end params = GfxFormatToGfxParams[format] "#{format}(#{hash_to_R_params(params)})" end |
#ggplot2_setup_and_theme ⇒ Object
398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 |
# File 'lib/feldtruby/statistics.rb', line 398 def ggplot2_setup_and_theme include_library("ggplot2") include_library("reshape2") script = <<-EOS f <- f + ggtitle(_title_) + xlab(_xlabel_) + ylab(_ylabel_) f <- f + theme_bw() f <- f + theme( plot.title = element_text(face="bold", size=12), axis.title.x = element_text(face="bold", size=10), axis.title.y = element_text(face="bold", size=10) ) EOS end |
#hash_to_R_params(hash) ⇒ Object
Map a ruby hash of objects to parameters in R code/script.
251 252 253 254 255 256 257 258 259 |
# File 'lib/feldtruby/statistics.rb', line 251 def hash_to_R_params(hash) hash.keys.sort.map do |key| "#{key.to_s} = #{ruby_object_to_R_string(hash[key])}" end.join(", ") end |
#hexbin_heatmap(csvFilePath, xlabel, ylabel, title = "heatmap", bins = 50) ⇒ Object
303 304 305 306 307 308 309 310 311 |
# File 'lib/feldtruby/statistics.rb', line 303 def hexbin_heatmap(csvFilePath, xlabel, ylabel, title = "heatmap", bins = 50) include_library "grid" include_library "lattice" include_library "hexbin" plot_2dims(csvFilePath, "f <- ggplot(data, aes(#{xlabel}, #{ylabel})) + geom_hex( bins = #{bins} )", xlabel, ylabel, title) end |
#load_csv_files_as_data(dataMap, columnName = nil) ⇒ Object
Data can be specified in two ways, either directly in Ruby arrays, or as strings with the path to a csv file to be loaded. In the latter case a column name must be given.
328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 |
# File 'lib/feldtruby/statistics.rb', line 328 def load_csv_files_as_data(dataMap, columnName = nil) keys = dataMap.keys.sort read_csvs = "" data_frame = "data.frame(1:length(d_#{keys.first})" keys.each_with_index do |key, i| value = dataMap[key] set_name = "d_#{key}" read_csvs += "#{set_name} <- " if Array === value read_csvs += (ruby_object_to_R_string(value) + ";\n") data_frame += ", #{key} = #{set_name}" else read_csvs += "read.csv(#{value.inspect});\n" data_frame += ", #{key} = #{set_name}$#{columnName}" end end data_frame += ")" script = "#{read_csvs}data <- #{data_frame};" end |
#overlaid_densities(dataMap, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density") ⇒ Object
Overlaid density graph of the observations (sampled distributions) in data1 and data2. The dataMap maps the name of each data series to an array with its observations.
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 |
# File 'lib/feldtruby/statistics.rb', line 418 def overlaid_densities(dataMap, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density") cardinalities = dataMap.values.map {|vs| vs.length}.uniq unless cardinalities.length == 1 raise ArgumentError.new("Must have same cardinality") end script = <<-EOS df <- data.frame(index = (1:#{cardinalities.first}), #{hash_to_R_params(dataMap)}) df.m <- melt(df, id = "index") names(df.m)[2] <- _datasetsName_ f <- ggplot(df.m, aes(value, fill=#{datasetsName})) f <- f + geom_density(alpha = 0.2, size = 0.5) + scale_color_brewer() #{ggplot2_setup_and_theme()} f EOS subst_eval script, {:title => title, :datasetsName => datasetsName, :xlabel => xlabel, :ylabel => ylabel} end |
#overlaid_densities_from_csv_files(columnName, csvFiles, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density") ⇒ Object
Plot the densities of the data found in the column named columnName in the csv files in csvFiles.
445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 |
# File 'lib/feldtruby/statistics.rb', line 445 def overlaid_densities_from_csv_files(columnName, csvFiles, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density") load_csvs = load_csv_files_as_data csvFiles script = <<-EOS #{load_csvs} #df <- data.frame(index = (1:#{cardinalities.first}), #{hash_to_R_params(dataMap)}) df.m <- melt(df, id = "index") names(df.m)[2] <- _datasetsName_ f <- ggplot(df.m, aes(value, fill=#{datasetsName})) f <- f + geom_density(alpha = 0.2, size = 0.5) + scale_color_brewer() #{ggplot2_setup_and_theme()} f EOS puts script subst_eval script, {:title => title, :datasetsName => datasetsName, :xlabel => xlabel, :ylabel => ylabel} end |
#plot_2dims(csvFilePath, plotCommand, xlabel, ylabel, title = "scatterplot") ⇒ Object
261 262 263 264 265 266 267 268 269 270 271 272 273 |
# File 'lib/feldtruby/statistics.rb', line 261 def plot_2dims(csvFilePath, plotCommand, xlabel, ylabel, title = "scatterplot") script = <<-EOS data <- read.csv(#{csvFilePath.inspect}) #{plotCommand} #{ggplot2_setup_and_theme()} f EOS subst_eval script, {:title => title, :xlabel => xlabel, :ylabel => ylabel} end |
#save_graph(filename, width = nil, height = nil) ⇒ Object
Wrap a sve_graph call around a block that draws a diagram and this will save the diagram to a file. The filetype is given by the file ending of the file name.
379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 |
# File 'lib/feldtruby/statistics.rb', line 379 def save_graph(filename, width = nil, height = nil) file_ending = filename.split(".").last raise "Don't now about graphics format #{file_ending}" unless GfxFormatToGfxParams.has_key?(file_ending) params = GfxFormatToGfxParams[file_ending].clone params[:width] = width if width params[:height] = height if height RC.eval("#{file_ending}(#{filename.inspect}, #{hash_to_R_params(params)})") yield() # Just be sure not to nest these save_graph calls within each other... RC.eval("dev.off()") end |
#scatter_plot(csvFilePath, xlabel, ylabel, title = "scatterplot") ⇒ Object
313 314 315 316 317 318 319 320 321 322 323 |
# File 'lib/feldtruby/statistics.rb', line 313 def scatter_plot(csvFilePath, xlabel, ylabel, title = "scatterplot") script = <<-EOS smoothing_method <- if(nrow(data) > 1000) {'gam'} else {'loess'} f <- ggplot(data, aes(#{xlabel}, #{ylabel})) + geom_point(shape = 1) f <- f + stat_smooth(method = smoothing_method) EOS plot_2dims(csvFilePath, script, xlabel.to_s, ylabel.to_s, title) end |
#smooth_scatter_plot(csvFilePath, xlabel, ylabel, title = "smoothscatter") ⇒ Object
292 293 294 295 296 297 298 299 300 301 |
# File 'lib/feldtruby/statistics.rb', line 292 def smooth_scatter_plot(csvFilePath, xlabel, ylabel, title = "smoothscatter") include_library "graphics" script = <<-EOS f <- ggplot(data, aes(#{xlabel}, #{ylabel})) + geom_point() + geom_smooth( method="loess", se = FALSE ) EOS plot_2dims(csvFilePath, script, xlabel.to_s, ylabel.to_s, title) end |