Class: Datasets::QuoraDuplicateQuestionPair

Inherits:
Dataset
  • Object
show all
Defined in:
lib/datasets/quora-duplicate-question-pair.rb

Defined Under Namespace

Classes: Record

Instance Attribute Summary

Attributes inherited from Dataset

#metadata

Instance Method Summary collapse

Methods inherited from Dataset

#clear_cache!, #to_table

Constructor Details

#initializeQuoraDuplicateQuestionPair

Returns a new instance of QuoraDuplicateQuestionPair.



16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/datasets/quora-duplicate-question-pair.rb', line 16

def initialize
  super()
  @metadata.id = "quora-duplicate-question-pair"
  @metadata.name = "Quora's duplicated question pair dataset"
  @metadata.url = "https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs"
  @metadata.licenses = [
    {
      name: "Quora's Terms of Service",
      url: "https://www.quora.com/about/tos",
    }
  ]
end

Instance Method Details

#eachObject



29
30
31
32
33
34
35
36
37
38
39
# File 'lib/datasets/quora-duplicate-question-pair.rb', line 29

def each
  return to_enum(__method__) unless block_given?

  open_data do |csv|
    csv.each do |row|
      row["is_duplicate"] = (row["is_duplicate"] == 1)
      record = Record.new(*row.fields)
      yield(record)
    end
  end
end