Class: Datasets::ITACorpus

Inherits:
Dataset
  • Object
show all
Defined in:
lib/datasets/ita-corpus.rb

Defined Under Namespace

Classes: Record

Instance Attribute Summary

Attributes inherited from Dataset

#metadata

Instance Method Summary collapse

Methods inherited from Dataset

#clear_cache!, #to_table

Constructor Details

#initialize(type: :emotion) ⇒ ITACorpus

Returns a new instance of ITACorpus.



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/datasets/ita-corpus.rb', line 8

def initialize(type: :emotion)
  unless [:emotion, :recitation].include?(type)
    raise ArgumentError, "Please set type :emotion or :recitation: #{type.inspect}"
  end

  super()
  @type = type
  @metadata.id = 'ita-corpus'
  @metadata.name = 'ITA-corpus'
  @metadata.url = 'https://github.com/mmorise/ita-corpus'
  @metadata.licenses = ['Unlicense']
  @metadata.description = lambda do
    fetch_readme
  end
end

Instance Method Details

#each(&block) ⇒ Object



24
25
26
27
28
29
30
31
32
# File 'lib/datasets/ita-corpus.rb', line 24

def each(&block)
  return to_enum(__method__) unless block_given?

  data_path = cache_dir_path + "#{@type}_transcript_utf8.txt"
  data_url = "#{download_base_url}/#{@type}_transcript_utf8.txt"
  download(data_path, data_url)

  parse_data(data_path, &block)
end