Class: Dbtools::Converter::Csv2rdf_converter

Inherits:
Object
  • Object
show all
Defined in:
lib/dbtools/converter/csv2rdf_converter.rb

Instance Method Summary collapse

Constructor Details

#initialize(filename, uri, default_vocabulary: "http://geophy.io/", options: {}) ⇒ Csv2rdf_converter

Constructor for the csv2rdf converter.

Parameters:

  • filename

    Filename of the csv file that needs to be converted.

  • uri

    RDF URI for the subject. This will be prepended with the row number. Example:

    uri = 'http://example.org/fileid'
    <http://example.org/fileid#123> <predicate> "value"
    
  • default_vocabulary (defaults to: "http://geophy.io/")

    Base vocabulary for the column names. Example:

    default_vocabulary = "http://geophy.io/"
    <subject> <http://geophy.io/column1> "value"
    


20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/dbtools/converter/csv2rdf_converter.rb', line 20

def initialize(filename, uri, default_vocabulary: "http://geophy.io/", options: {})
  @uri = uri
  @default_vocabulary = default_vocabulary
  delimiter = options[:col_sep]
  delimiter ||= guess_delimiter(filename)
  CSV::Converters[:integer] = lambda do |s|
    d = s.to_s
    return  d if d.size > 1 && d[0] == '0' && d[1] != '.'
    Integer(s.encode(CSV::ConverterEncoding)) rescue s
  end
  CSV::Converters[:float] = lambda do |s|
    d = s.to_s
    return  d if d.size > 1 && d[0] == '0' && d[1] != '.'
    Float(s.encode(CSV::ConverterEncoding)) rescue s
  end
  opts = { :headers => true,
           :header_converters => :symbol,
           :converters => :all,
           :col_sep => delimiter,
           :skip_blanks => true
  }.merge(options)
  @csv = CSV.open(filename, opts)
  #puts @csv
end

Instance Method Details

#each_tripleObject

Converts the current row to rdf triples.



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/dbtools/converter/csv2rdf_converter.rb', line 46

def each_triple
  @csv.each do |row|
    lineno = @csv.lineno
    #print a triple with the row id
    rdf = RDF::Statement({ subject:   RDF::URI.new("#{@uri}##{lineno}"),
                           predicate: RDF::URI.new("#{@default_vocabulary}rid"),
                           object:    lineno
                         })
    yield rdf.to_ntriples
    row.each do |colname, colvalue|
	#	puts colvalue
      next if colvalue.nil? or colvalue.to_s.empty?
      rdf = RDF::Statement({ subject:   RDF::URI.new("#{@uri}##{lineno}"),
                             predicate: RDF::URI.new(File.join(@default_vocabulary, colname.to_s)),
                             object:    colvalue
                           })
      yield rdf.to_ntriples
      # yield "#{subject} #{predicate} #{object} ."
    end
  end
end

#guess_delimiter(filename) ⇒ Object

Attempt to guess delimiter based on occurrence in the header.



69
70
71
72
73
74
75
76
77
78
# File 'lib/dbtools/converter/csv2rdf_converter.rb', line 69

def guess_delimiter(filename)
  delimiters = [',', '|', "\t", ';']
  lines = File.foreach("#{filename}").first(10).join
  delimiters_count = delimiters.map { |x| [x, lines.count(x)] }.to_h
  # Key is the delimiter, value is the occurence.
  most_likely_delimiter = delimiters_count.max_by { |k, v| v }
  # Check if the occurrence is not zero.
  raise "No delimiter detected. " if most_likely_delimiter[1].zero?
  return most_likely_delimiter.first
end