Module: Disco::Data

Included in:
Disco
Defined in:
lib/disco/data.rb

Instance Method Summary collapse

Instance Method Details

#load_movielensObject



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/disco/data.rb', line 3

def load_movielens
  item_path = download_file("ml-100k/u.item", "https://files.grouplens.org/datasets/movielens/ml-100k/u.item",
    file_hash: "553841ebc7de3a0fd0d6b62a204ea30c1e651aacfb2814c7a6584ac52f2c5701")
  data_path = download_file("ml-100k/u.data", "https://files.grouplens.org/datasets/movielens/ml-100k/u.data",
    file_hash: "06416e597f82b7342361e41163890c81036900f418ad91315590814211dca490")

  movies = {}
  File.foreach(item_path) do |line|
    row = line.encode("UTF-8", "ISO-8859-1").split("|")
    movies[row[0]] = row[1]
  end

  data = []
  File.foreach(data_path) do |line|
    row = line.split("\t")
    data << {
      user_id: row[0].to_i,
      item_id: movies[row[1]],
      rating: row[2].to_i
    }
  end

  data
end