Class: PheldItunesDataMiner
- Inherits:
-
Object
- Object
- PheldItunesDataMiner
- Defined in:
- lib/pheld_itunes_data_miner.rb
Overview
Student Name: Peter Held Homework Week: 8
Constant Summary collapse
- VERSION =
'1.0.0'
Class Method Summary collapse
Instance Method Summary collapse
- #correlation(x, y) ⇒ Object
- #get_average_year(tracks) ⇒ Object
- #get_bit_rate(tracks) ⇒ Object
- #get_bitrate_playcount_correlation(tracks) ⇒ Object
- #get_most_popular_artists(tracks, list_depth) ⇒ Object
- #get_most_popular_genres(tracks, list_depth) ⇒ Object
- #get_most_popular_years(tracks, list_depth) ⇒ Object
- #get_number_of_albums(tracks) ⇒ Object
- #get_number_of_artists(tracks) ⇒ Object
- #get_number_of_genres(tracks) ⇒ Object
- #get_number_of_tracks(tracks) ⇒ Object
- #get_play_count(tracks) ⇒ Object
- #get_play_counts_normalized_for_date_added(tracks) ⇒ Object
- #get_playcount_normalized_for_date_added(track) ⇒ Object
- #get_rating(tracks) ⇒ Object
- #get_rating_playcount_correlation(tracks) ⇒ Object
- #get_total_playtime(tracks) ⇒ Object
- #get_tracks_added_by_year(tracks) ⇒ Object
- #guess_age(tracks) ⇒ Object
- #parse_file(file_name) ⇒ Object
- #parse_library(library_xml) ⇒ Object
- #print_stats(tracks) ⇒ Object
-
#seconds_fraction_to_time(seconds) ⇒ Object
Converts seconds to an array with days, hours, minutes and seconds.
- #sum(list) ⇒ Object
Class Method Details
.run(file_name) ⇒ Object
19 20 21 22 23 24 25 26 |
# File 'lib/pheld_itunes_data_miner.rb', line 19 def self.run file_name # read the tracks idm = PheldItunesDataMiner.new tracks = idm.parse_file(file_name) # print the stats idm.print_stats(tracks) end |
Instance Method Details
#correlation(x, y) ⇒ Object
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 |
# File 'lib/pheld_itunes_data_miner.rb', line 372 def correlation(x, y) # Calculate the necessary values n = x.size sum_x = sum(x) sum_y = sum(y) x_squared = x.map {|item| item*item } y_squared = y.map {|item| item*item } sum_x_squared = sum(x_squared) sum_y_squared = sum(y_squared) xy = [] x.each_with_index do |value, key| xy << value * y[key] end sum_xy = sum(xy) # Calculate the correlation value left = n * sum_xy - sum_x * sum_y right = ((n * sum_x_squared - sum_x**2) * (n * sum_y_squared - sum_y**2)) ** 0.5 left / right end |
#get_average_year(tracks) ⇒ Object
290 291 292 293 294 295 296 297 298 299 300 |
# File 'lib/pheld_itunes_data_miner.rb', line 290 def get_average_year tracks # scrub the tracks for nils scrubbed_tracks = tracks.delete_if {|track| track.year == nil} total = 0 scrubbed_tracks.each do |track| total += track.year end total / tracks.length end |
#get_bit_rate(tracks) ⇒ Object
354 355 356 357 358 359 360 361 362 |
# File 'lib/pheld_itunes_data_miner.rb', line 354 def get_bit_rate tracks bit_rates = [] tracks.each do |track| bit_rates << track.bit_rate end bit_rates end |
#get_bitrate_playcount_correlation(tracks) ⇒ Object
263 264 265 266 267 268 269 270 271 |
# File 'lib/pheld_itunes_data_miner.rb', line 263 def get_bitrate_playcount_correlation tracks # scrub the tracks for nils scrubbed_tracks = tracks.delete_if {|track| (track.bit_rate == nil) || (track.play_count == nil) || (track.date_added == nil)} bit_rates = get_bit_rate scrubbed_tracks play_counts = get_play_counts_normalized_for_date_added scrubbed_tracks correlation(bit_rates, play_counts) end |
#get_most_popular_artists(tracks, list_depth) ⇒ Object
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
# File 'lib/pheld_itunes_data_miner.rb', line 173 def get_most_popular_artists tracks, list_depth # scrub the tracks for nils scrubbed_tracks = tracks.delete_if {|track| track.artist == nil } artists = {} # hash to store artist counts top_artists = [] # get the list of artist occurance counts scrubbed_tracks.each do |track| if artists[track.artist] artists[track.artist] += 1 else artists[track.artist] = 1 end end # sort by the occurance counts sorted_artists = artists.sort {|a,b| b[1]<=>a[1]} if list_depth > 0 (0..(list_depth - 1)).each do |index| if sorted_artists[index] top_artists << [ sorted_artists[index][0], sorted_artists[index][1] ] end end end top_artists end |
#get_most_popular_genres(tracks, list_depth) ⇒ Object
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 |
# File 'lib/pheld_itunes_data_miner.rb', line 233 def get_most_popular_genres tracks, list_depth # scrub the tracks for nils scrubbed_tracks = tracks.delete_if {|track| track.genre == nil } genres = {} # hash to store genre counts top_genres = [] # get the list of artist occurance counts scrubbed_tracks.each do |track| if genres[track.genre] genres[track.genre] += 1 else genres[track.genre] = 1 end end # sort by the occurance counts sorted_genres = genres.sort {|a,b| b[1]<=>a[1]} if list_depth > 0 (0..(list_depth - 1)).each do |index| if sorted_genres[index] top_genres << [ sorted_genres[index][0], sorted_genres[index][1] ] end end end top_genres end |
#get_most_popular_years(tracks, list_depth) ⇒ Object
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 |
# File 'lib/pheld_itunes_data_miner.rb', line 203 def get_most_popular_years tracks, list_depth # scrub the tracks for nils scrubbed_tracks = tracks.delete_if {|track| track.year == nil } years = {} # hash to store artist counts top_years = [] # get the list of artist occurance counts scrubbed_tracks.each do |track| if years[track.year] # make sure the list even has this many in it years[track.year] = years[track.year] + 1 else years[track.year] = 1 end end # sort by the occurance counts sorted_years = years.sort {|a,b| b[1]<=>a[1]} if list_depth > 0 (0..(list_depth - 1)).each do |index| if sorted_years[index] # make sure the list even has this many in it top_years << [ sorted_years[index][0], sorted_years[index][1] ] end end end top_years end |
#get_number_of_albums(tracks) ⇒ Object
121 122 123 124 125 126 |
# File 'lib/pheld_itunes_data_miner.rb', line 121 def get_number_of_albums tracks # scrub the tracks for nils scrubbed_tracks = tracks.delete_if {|track| track.album == nil } scrubbed_tracks.map { |track| track.album }.uniq.length end |
#get_number_of_artists(tracks) ⇒ Object
114 115 116 117 118 119 |
# File 'lib/pheld_itunes_data_miner.rb', line 114 def get_number_of_artists tracks # scrub the tracks for nils scrubbed_tracks = tracks.delete_if {|track| track.artist == nil } scrubbed_tracks.map { |track| track.artist }.uniq.length end |
#get_number_of_genres(tracks) ⇒ Object
128 129 130 131 132 133 |
# File 'lib/pheld_itunes_data_miner.rb', line 128 def get_number_of_genres tracks # scrub the tracks for nils scrubbed_tracks = tracks.delete_if {|track| track.genre == nil } scrubbed_tracks.map { |track| track.genre }.uniq.length end |
#get_number_of_tracks(tracks) ⇒ Object
135 136 137 |
# File 'lib/pheld_itunes_data_miner.rb', line 135 def get_number_of_tracks tracks tracks.length end |
#get_play_count(tracks) ⇒ Object
324 325 326 327 328 329 330 331 332 |
# File 'lib/pheld_itunes_data_miner.rb', line 324 def get_play_count tracks play_counts = [] tracks.each do |track| play_counts << track.play_count end play_counts end |
#get_play_counts_normalized_for_date_added(tracks) ⇒ Object
334 335 336 337 338 339 340 341 342 |
# File 'lib/pheld_itunes_data_miner.rb', line 334 def get_play_counts_normalized_for_date_added tracks play_counts = [] tracks.each do |track| play_counts << get_playcount_normalized_for_date_added(track) end play_counts end |
#get_playcount_normalized_for_date_added(track) ⇒ Object
364 365 366 367 368 369 |
# File 'lib/pheld_itunes_data_miner.rb', line 364 def get_playcount_normalized_for_date_added track # Normalize for the age in days. Multiplied by 10000 so that the values aren't fractions # and the correlation() method can use them. age_normalized = (10000 * track.play_count / (Date.today - track.date_added)).round age_normalized end |
#get_rating(tracks) ⇒ Object
344 345 346 347 348 349 350 351 352 |
# File 'lib/pheld_itunes_data_miner.rb', line 344 def tracks = [] tracks.each do |track| << track. end end |
#get_rating_playcount_correlation(tracks) ⇒ Object
273 274 275 276 277 278 279 280 281 |
# File 'lib/pheld_itunes_data_miner.rb', line 273 def tracks # scrub the tracks for nils scrubbed_tracks = tracks.delete_if {|track| (track.play_count == nil) || (track. == nil) || (track.date_added == nil)} play_counts = get_play_counts_normalized_for_date_added scrubbed_tracks = scrubbed_tracks correlation(play_counts, ) end |
#get_total_playtime(tracks) ⇒ Object
139 140 141 142 143 144 145 146 147 148 149 150 151 |
# File 'lib/pheld_itunes_data_miner.rb', line 139 def get_total_playtime tracks # scrub the tracks for nils scrubbed_tracks = tracks.delete_if {|track| track.total_time == nil } total_playtime = 0 scrubbed_tracks.each do |track| total_playtime = total_playtime + track.total_time end # takes seconds, but iTunes stores in milliseconds seconds_fraction_to_time(total_playtime / 1000) end |
#get_tracks_added_by_year(tracks) ⇒ Object
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
# File 'lib/pheld_itunes_data_miner.rb', line 153 def get_tracks_added_by_year tracks # scrub the tracks for nils scrubbed_tracks = tracks.delete_if {|track| track.date_added == nil} tracks_added_by_year = {} # hash to store year added counts # get the list of track occurance counts scrubbed_tracks.each do |track| year_added = track.date_added.year if tracks_added_by_year[year_added] tracks_added_by_year[year_added] += 1 else tracks_added_by_year[year_added] = 1 end end tracks_added_by_year.sort # sort by year end |
#guess_age(tracks) ⇒ Object
284 285 286 287 288 |
# File 'lib/pheld_itunes_data_miner.rb', line 284 def guess_age tracks average_year = get_average_year(tracks) Time.now.year - average_year + 14 end |
#parse_file(file_name) ⇒ Object
68 69 70 71 72 73 74 75 76 77 |
# File 'lib/pheld_itunes_data_miner.rb', line 68 def parse_file file_name if !File.exists?(file_name) return nil end file = File.new(file_name) file_text = file.read parse_library file_text end |
#parse_library(library_xml) ⇒ Object
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/pheld_itunes_data_miner.rb', line 79 def parse_library library_xml tracks = [] doc = Nokogiri::XML.parse(library_xml) # get an array of the track xmls last_key = nil doc.xpath('/plist/dict/dict/dict').each do |track_xml| track = PheldItunesTrack.new track_xml.children.each do |element| if (element.name == "key" ) last_key = element.text else case last_key when "Track ID" then track.track_id = element.text.to_i when "Name" then track.name = element.text.strip when "Artist" then track.artist = element.text.strip when "Album" then track.album = element.text.strip when "Total Time" then track.total_time = element.text.to_i when "Year" then track.year = element.text.to_i when "Bit Rate" then track.bit_rate = element.text.to_i when "Play Count" then track.play_count = element.text.to_i when "Rating" then track. = element.text.to_i when "Date Added" then track.date_added = Date.parse(element.text) when "Genre" then track.genre = element.text.strip end end end tracks << track end tracks end |
#print_stats(tracks) ⇒ Object
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/pheld_itunes_data_miner.rb', line 28 def print_stats tracks # totals puts "Track count: #{tracks.length.to_s}" puts "Number of artists: #{get_number_of_artists(tracks).to_s}" puts "Number of albums: #{get_number_of_albums(tracks).to_s}" total_playtime = get_total_playtime(tracks) puts "Total playtime: #{total_playtime[0].to_s} days, #{total_playtime[1].to_s} hours, #{total_playtime[2].to_s} minutes, #{total_playtime[3].to_s} seconds" # popular stuff ten_most_popular_artists = get_most_popular_artists(tracks, 10) puts "Ten most popular artists:" ten_most_popular_artists.each do |artist| puts "\t\"#{artist[0]}\" - #{artist[1].to_s} tracks" end ten_most_popular_years = get_most_popular_years(tracks, 10) puts "Ten most popular years:" ten_most_popular_years.each do |year| puts "\t\"#{year[0]}\" - #{year[1].to_s} tracks" end ten_most_popular_genres = get_most_popular_genres(tracks, 10) puts "Ten most popular genres:" ten_most_popular_genres.each do |genre| puts "\t\"#{genre[0]}\" - #{genre[1].to_s} tracks" end # other interesting statistics tracks_added_per_year = get_tracks_added_by_year(tracks) puts "The number of tracks added each year was:" tracks_added_per_year.each do |year| puts "\t\"#{year[0].to_s}\" - #{year[1].to_s} tracks" end bitrate_playcount_correlation = get_bitrate_playcount_correlation(tracks) puts "The correlation between bit rate and play count is: #{bitrate_playcount_correlation.to_s}." = (tracks) puts "The correlation between rating and play count is: #{.to_s}." age_guess = guess_age(tracks) puts "According to my calculations/assumptions and other peoples' research, your age is #{age_guess.to_s}." end |
#seconds_fraction_to_time(seconds) ⇒ Object
Converts seconds to an array with days, hours, minutes and seconds
303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 |
# File 'lib/pheld_itunes_data_miner.rb', line 303 def seconds_fraction_to_time seconds days = hours = mins = 0 if seconds >= 60 then mins = (seconds / 60).to_i seconds = (seconds % 60 ).to_i if mins >= 60 then hours = (mins / 60).to_i mins = (mins % 60).to_i if hours >= 24 then days = (hours / 24).to_i hours = (hours % 24).to_i end end end [days,hours,mins,seconds] end |
#sum(list) ⇒ Object
400 401 402 |
# File 'lib/pheld_itunes_data_miner.rb', line 400 def sum(list) list.inject( nil ) { |sum,x| sum ? sum+x : x }; end |