Goldmine
Data mining made easy... the Ruby way.
Turn any list into a treasure trove.
Goldmine allows you to apply pivot table logic to any list for powerful data mining capabilities.
In the nomenclature of Goldmine, we call this digging for data. So we've added a dig
method to Array
.
More reasons to love it
- Provides ETL like functionality... but simple and elegant
- Supports method chaining for deep data mining
- Handles values that are lists themselves
- Allows you to name your pivots
What does this all mean for you? Lets have a look.
The Basics
Pivot a simple list of numbers based on whether or not they are less than 5
list = [1,2,3,4,5,6,7,8,9]
data = list.dig { |i| i < 5 }
# {
# true => [1, 2, 3, 4],
# false => [5, 6, 7, 8, 9]
# }
The same pivot as above but explicitly named
list = [1,2,3,4,5,6,7,8,9]
data = list.dig("less than 5") { |i| i < 5 }
# {
# "less than 5: true" => [1, 2, 3, 4],
# "less than 5: false" => [5, 6, 7, 8, 9]
# }
Next Steps
Chain pivots together
list = [1,2,3,4,5,6,7,8,9]
data = list.dig { |i| i < 5 }.dig { |i| i % 2 == 0 }
# {
# [true, false] => [1, 3],
# [true, true] => [2, 4],
# [false, false] => [5, 7, 9],
# [false, true] => [6, 8]
# }
The same pivot as above but explicitly named
list = [1,2,3,4,5,6,7,8,9]
data = list.dig("less than 5") { |i| i < 5 }.dig("divisible by 2") { |i| i % 2 == 0 }
# {
# ["less than 5: true", "divisible by 2: false"] => [1, 3],
# ["less than 5: true", "divisible by 2: true"] => [2, 4],
# ["less than 5: false", "divisible by 2: false"] => [5, 7, 9],
# ["less than 5: false", "divisible by 2: true"] => [6, 8]
# }
Deep Cuts
Pivot a list of users based on a value that is itself a list
list = [
{ :name => "Nathan", :projects => [:a, :b] },
{ :name => "Eric", :projects => [:a, :d, :g] },
{ :name => "Brian", :projects => [:b, :c, :e, :f] },
{ :name => "Mark", :projects => [:g] },
{ :name => "Josh", :projects => [:a, :c] },
{ :name => "Matthew", :projects => [:b, :c, :d] }
]
data = list.dig { |record| record[:projects] }
# {
# :a => [ { :name => "Nathan", :projects => [:a, :b] },
# { :name => "Eric", :projects => [:a, :d, :g] },
# { :name => "Josh", :projects => [:a, :c] } ],
# :b => [ { :name => "Nathan", :projects => [:a, :b] },
# { :name => "Brian", :projects => [:b, :c, :e, :f] },
# { :name => "Matthew", :projects => [:b, :c, :d] } ],
# :d => [ { :name => "Eric", :projects => [:a, :d, :g] },
# { :name => "Matthew", :projects => [:b, :c, :d] } ],
# :g => [ { :name => "Eric", :projects => [:a, :d, :g] },
# { :name => "Mark", :projects => [:g] } ],
# :c => [ { :name => "Brian", :projects => [:b, :c, :e, :f] },
# { :name => "Josh", :projects => [:a, :c] },
# { :name => "Matthew", :projects => [:b, :c, :d] } ],
# :e => [ { :name => "Brian", :projects => [:b, :c, :e, :f] } ],
# :f => [ { :name => "Brian", :projects => [:b, :c, :e, :f] } ]
# }
Pivot a list of users based on lang and number of projects owned
list = [
{ :name => "Nathan", :langs => [:ruby, :javascript], :projects => [:a, :b] },
{ :name => "Eric", :langs => [:ruby, :javascript, :groovy], :projects => [:a, :d, :g] },
{ :name => "Brian", :langs => [:ruby, :javascript, :c, :go], :projects => [:b, :c, :e, :f] },
{ :name => "Mark", :langs => [:ruby, :java, :scala], :projects => [:g] },
{ :name => "Josh", :langs => [:ruby, :lisp, :clojure], :projects => [:a, :c] },
{ :name => "Matthew", :langs => [:ruby, :c, :clojure], :projects => [:b, :c, :d] }
]
data = list
.dig("lang") { |rec| rec[:langs] }
.dig("project count") { |rec| rec[:projects].length }
# {
# ["lang: ruby", "project count: 2"] => [ { :name => "Nathan", ... }, { :name => "Josh", ... } ],
# ["lang: ruby", "project count: 3"] => [ { :name => "Eric", ... }, { :name => "Matthew", ... } ],
# ["lang: ruby", "project count: 4"] => [ { :name => "Brian", ... } ],
# ["lang: ruby", "project count: 1"] => [ { :name => "Mark", ... } ],
# ["lang: javascript", "project count: 2"] => [ { :name => "Nathan", ... } ],
# ["lang: javascript", "project count: 3"] => [ { :name => "Eric", ... } ],
# ["lang: javascript", "project count: 4"] => [ { :name => "Brian", ... } ],
# ["lang: groovy", "project count: 3"] => [ { :name => "Eric", ... } ],
# ["lang: c", "project count: 4"] => [ { :name => "Brian", ... } ],
# ["lang: c", "project count: 3"] => [ { :name => "Matthew", ... } ],
# ["lang: go", "project count: 4"] => [ { :name => "Brian", ... } ],
# ["lang: java", "project count: 1"] => [ { :name => "Mark", ... } ],
# ["lang: scala", "project count: 1"] => [ { :name => "Mark", ... } ],
# ["lang: lisp", "project count: 2"] => [ { :name => "Josh", ... } ],
# ["lang: clojure", "project count: 2"] => [ { :name => "Josh", ... } ],
# ["lang: clojure", "project count: 3"] => [ { :name => "Matthew", ... } ]
# }
Pivot a list of users based on whether or not they know javascript, what other languages they know, and whether or not their name contains the letter 'a'
Pretty contrived example here, but hopefully illustrates the type of power thats available.
list = [
{ :name => "Nathan", :langs => [:ruby, :javascript], :projects => [:a, :b] },
{ :name => "Eric", :langs => [:ruby, :javascript, :groovy], :projects => [:a, :d, :g] },
{ :name => "Brian", :langs => [:ruby, :javascript, :c, :go], :projects => [:b, :c, :e, :f] },
{ :name => "Mark", :langs => [:ruby, :java, :scala], :projects => [:g] },
{ :name => "Josh", :langs => [:ruby, :lisp, :clojure], :projects => [:a, :c] },
{ :name => "Matthew", :langs => [:ruby, :c, :clojure], :projects => [:b, :c, :d] }
]
data = list
.dig("knows javascript") { |rec| rec[:langs].include?(:javascript) }
.dig("lang") { |rec| rec[:langs] }
.dig("name includes 'a'") { |rec| rec[:name].include?("a") }
# {
# ["knows javascript: true", "lang: ruby", "name includes 'a': true"] => [ { :name => "Nathan", ... }, { :name => "Brian", ... } ],
# ["knows javascript: true", "lang: ruby", "name includes 'a': false"] => [ { :name => "Eric", ... } ],
# ["knows javascript: true", "lang: javascript", "name includes 'a': true"] => [ { :name => "Nathan", ... }, { :name => "Brian", ... } ],
# ["knows javascript: true", "lang: javascript", "name includes 'a': false"] => [ { :name => "Eric", ... } ],
# ["knows javascript: true", "lang: groovy", "name includes 'a': false"] => [ { :name => "Eric", ... } ],
# ["knows javascript: true", "lang: c", "name includes 'a': true"] => [ { :name => "Brian", ... } ],
# ["knows javascript: true", "lang: go", "name includes 'a': true"] => [ { :name => "Brian", ... } ],
# ["knows javascript: false", "lang: ruby", "name includes 'a': true"] => [ { :name => "Mark", ... }, { :name => "Matthew", ... } ],
# ["knows javascript: false", "lang: ruby", "name includes 'a': false"] => [ { :name => "Josh", ... } ],
# ["knows javascript: false", "lang: java", "name includes 'a': true"] => [ { :name => "Mark", ... } ],
# ["knows javascript: false", "lang: scala", "name includes 'a': true"] => [ { :name => "Mark", ... } ],
# ["knows javascript: false", "lang: lisp", "name includes 'a': false"] => [ { :name => "Josh", ... } ],
# ["knows javascript: false", "lang: clojure", "name includes 'a': false"] => [ { :name => "Josh", ... } ],
# ["knows javascript: false", "lang: clojure", "name includes 'a': true"] => [ { :name => "Matthew", ... } ],
# ["knows javascript: false", "lang: c", "name includes 'a': true"] => [ { :name => "Matthew", ... } ]
# }