Class: Dbtools::Converter::Csv2rdf_converter
- Inherits:
-
Object
- Object
- Dbtools::Converter::Csv2rdf_converter
- Defined in:
- lib/dbtools/converter/csv2rdf_converter.rb
Instance Method Summary collapse
-
#each_triple ⇒ Object
Converts the current row to rdf triples.
-
#guess_delimiter(filename) ⇒ Object
Attempt to guess delimiter based on occurrence in the header.
-
#initialize(filename, uri, default_vocabulary: "http://geophy.io/", options: {}) ⇒ Csv2rdf_converter
constructor
Constructor for the csv2rdf converter.
Constructor Details
#initialize(filename, uri, default_vocabulary: "http://geophy.io/", options: {}) ⇒ Csv2rdf_converter
Constructor for the csv2rdf converter.
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/dbtools/converter/csv2rdf_converter.rb', line 20 def initialize(filename, uri, default_vocabulary: "http://geophy.io/", options: {}) @uri = uri @default_vocabulary = default_vocabulary delimiter = [:col_sep] delimiter ||= guess_delimiter(filename) CSV::Converters[:integer] = lambda do |s| d = s.to_s return d if d.size > 1 && d[0] == '0' && d[1] != '.' Integer(s.encode(CSV::ConverterEncoding)) rescue s end CSV::Converters[:float] = lambda do |s| d = s.to_s return d if d.size > 1 && d[0] == '0' && d[1] != '.' Float(s.encode(CSV::ConverterEncoding)) rescue s end opts = { :headers => true, :header_converters => :symbol, :converters => :all, :col_sep => delimiter, :skip_blanks => true }.merge() @csv = CSV.open(filename, opts) #puts @csv end |
Instance Method Details
#each_triple ⇒ Object
Converts the current row to rdf triples.
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/dbtools/converter/csv2rdf_converter.rb', line 46 def each_triple @csv.each do |row| lineno = @csv.lineno #print a triple with the row id rdf = RDF::Statement({ subject: RDF::URI.new("#{@uri}##{lineno}"), predicate: RDF::URI.new("#{@default_vocabulary}rid"), object: lineno }) yield rdf.to_ntriples row.each do |colname, colvalue| # puts colvalue next if colvalue.nil? or colvalue.to_s.empty? rdf = RDF::Statement({ subject: RDF::URI.new("#{@uri}##{lineno}"), predicate: RDF::URI.new(File.join(@default_vocabulary, colname.to_s)), object: colvalue }) yield rdf.to_ntriples # yield "#{subject} #{predicate} #{object} ." end end end |
#guess_delimiter(filename) ⇒ Object
Attempt to guess delimiter based on occurrence in the header.
69 70 71 72 73 74 75 76 77 78 |
# File 'lib/dbtools/converter/csv2rdf_converter.rb', line 69 def guess_delimiter(filename) delimiters = [',', '|', "\t", ';'] lines = File.foreach("#{filename}").first(10).join delimiters_count = delimiters.map { |x| [x, lines.count(x)] }.to_h # Key is the delimiter, value is the occurence. most_likely_delimiter = delimiters_count.max_by { |k, v| v } # Check if the occurrence is not zero. raise "No delimiter detected. " if most_likely_delimiter[1].zero? return most_likely_delimiter.first end |