Class: RDF::NTriples::Reader

Inherits:
Reader
  • Object
show all
Defined in:
lib/rdf/ntriples/reader.rb

Overview

N-Triples parser.

Examples:

Obtaining an NTriples reader class

RDF::Reader.for(:ntriples)     #=> RDF::NTriples::Reader
RDF::Reader.for("etc/doap.nt")
RDF::Reader.for(:file_name      => "etc/doap.nt")
RDF::Reader.for(:file_extension => "nt")
RDF::Reader.for(:content_type   => "text/plain")

Parsing RDF statements from an NTriples file

RDF::NTriples::Reader.open("etc/doap.nt") do |reader|
  reader.each_statement do |statement|
    puts statement.inspect
  end
end

Parsing RDF statements from an NTriples string

data = StringIO.new(File.read("etc/doap.nt"))
RDF::NTriples::Reader.new(data) do |reader|
  reader.each_statement do |statement|
    puts statement.inspect
  end
end

See Also:

Direct Known Subclasses

RDF::NQuads::Reader

Constant Summary collapse

COMMENT =
/^#\s*(.*)$/.freeze
NODEID =
/^_:([A-Za-z][A-Za-z0-9\-_]*)/.freeze
URIREF =
/^<([^>]+)>/.freeze
LITERAL_PLAIN =
/^"((?:\\"|[^"])*)"/.freeze
LITERAL_WITH_LANGUAGE =
/^"((?:\\"|[^"])*)"@([a-z]+[\-A-Za-z0-9]*)/.freeze
LITERAL_WITH_DATATYPE =
/^"((?:\\"|[^"])*)"\^\^<([^>]+)>/.freeze
LANGUAGE_TAG =
/^@([a-z]+[\-A-Za-z0-9]*)/.freeze
DATATYPE_URI =
/^\^\^<([^>]+)>/.freeze
LITERAL =
Regexp.union(LITERAL_WITH_LANGUAGE, LITERAL_WITH_DATATYPE, LITERAL_PLAIN).freeze
SUBJECT =
Regexp.union(URIREF, NODEID).freeze
PREDICATE =
Regexp.union(URIREF).freeze
OBJECT =
Regexp.union(URIREF, NODEID, LITERAL).freeze
ESCAPE_CHARS =
["\t", "\n", "\r", "\"", "\\"].freeze
ESCAPE_CHAR4 =
/\\u([0-9A-Fa-f]{4,4})/.freeze
ESCAPE_CHAR8 =
/\\U([0-9A-Fa-f]{8,8})/.freeze
ESCAPE_CHAR =
Regexp.union(ESCAPE_CHAR4, ESCAPE_CHAR8).freeze
ESCAPE_SURROGATE =
/\\u([0-9A-Fa-f]{4,4})\\u([0-9A-Fa-f]{4,4})/.freeze
ESCAPE_SURROGATE1 =
(0xD800..0xDBFF).freeze
ESCAPE_SURROGATE2 =
(0xDC00..0xDFFF).freeze

Instance Attribute Summary

Attributes inherited from Reader

#options

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Reader

#base_uri, #close, each, #each_statement, #each_triple, for, format, #initialize, open, #prefix, #prefixes, #prefixes=, #rewind, #to_sym, to_sym

Methods included from Util::Aliasing::LateBound

#alias_method

Methods included from Enumerable

#contexts, #dump, #each_context, #each_graph, #each_object, #each_predicate, #each_quad, #each_statement, #each_subject, #each_triple, #enum_context, #enum_graph, #enum_object, #enum_predicate, #enum_quad, #enum_statement, #enum_subject, #enum_triple, #has_context?, #has_object?, #has_predicate?, #has_quad?, #has_statement?, #has_subject?, #has_triple?, #objects, #predicates, #quads, #statements, #subjects, #supports?, #to_a, #to_hash, #to_set, #triples

Methods included from Countable

#count, #empty?, #enum_for

Methods included from Readable

#readable?

Constructor Details

This class inherits a constructor from RDF::Reader

Class Method Details

.parse_literal(input) ⇒ RDF::Literal

Parameters:

  • input (String)

Returns:



109
110
111
112
113
114
115
116
117
118
# File 'lib/rdf/ntriples/reader.rb', line 109

def self.parse_literal(input)
  case input
    when LITERAL_WITH_LANGUAGE
      RDF::Literal.new(unescape($1), :language => $2)
    when LITERAL_WITH_DATATYPE
      RDF::Literal.new(unescape($1), :datatype => $2)
    when LITERAL_PLAIN
      RDF::Literal.new(unescape($1))
  end
end

.parse_node(input) ⇒ RDF::Node

Parameters:

  • input (String)

Returns:



91
92
93
94
95
# File 'lib/rdf/ntriples/reader.rb', line 91

def self.parse_node(input)
  if input =~ NODEID
    RDF::Node.new($1)
  end
end

.parse_object(input) ⇒ RDF::Term

Parameters:

  • input (String)

Returns:



84
85
86
# File 'lib/rdf/ntriples/reader.rb', line 84

def self.parse_object(input)
  parse_uri(input) || parse_node(input) || parse_literal(input)
end

.parse_predicate(input) ⇒ RDF::URI

Parameters:

  • input (String)

Returns:



77
78
79
# File 'lib/rdf/ntriples/reader.rb', line 77

def self.parse_predicate(input)
  parse_uri(input, :intern => true)
end

.parse_subject(input) ⇒ RDF::Resource

Parameters:

  • input (String)

Returns:



70
71
72
# File 'lib/rdf/ntriples/reader.rb', line 70

def self.parse_subject(input)
  parse_uri(input) || parse_node(input)
end

.parse_uri(input, options = {}) ⇒ RDF::URI

Parameters:

  • input (String)

Returns:



100
101
102
103
104
# File 'lib/rdf/ntriples/reader.rb', line 100

def self.parse_uri(input, options = {})
  if input =~ URIREF
    RDF::URI.send(options[:intern] ? :intern : :new, $1)
  end
end

.unescape(string) ⇒ String



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# File 'lib/rdf/ntriples/reader.rb', line 126

def self.unescape(string)
  string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding)

  # Decode \t|\n|\r|\"|\\ character escapes:
  ESCAPE_CHARS.each { |escape| string.gsub!(escape.inspect[1...-1], escape) }

  # Decode \uXXXX\uXXXX surrogate pairs:
  while
    (string.sub!(ESCAPE_SURROGATE) do
      if ESCAPE_SURROGATE1.include?($1.hex) && ESCAPE_SURROGATE2.include?($2.hex)
        s = [$1, $2].pack('H*H*')
        s = s.respond_to?(:force_encoding) ?
          s.force_encoding(Encoding::UTF_16BE).encode!(Encoding::UTF_8) : # for Ruby 1.9+
          Iconv.conv('UTF-8', 'UTF-16BE', s)                              # for Ruby 1.8.x
      else
        s = [$1.hex].pack('U*') << '\u' << $2
      end
      s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::ASCII_8BIT) : s
    end)
  end

  # Decode \uXXXX and \UXXXXXXXX code points:
  string.gsub!(ESCAPE_CHAR) do
    s = [($1 || $2).hex].pack('U*')
    s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::ASCII_8BIT) : s
  end

  string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding)
  string
end

.unserialize(input) ⇒ RDF::Term

Reconstructs an RDF value from its serialized N-Triples representation.

Parameters:

  • input (String)

Returns:



60
61
62
63
64
65
# File 'lib/rdf/ntriples/reader.rb', line 60

def self.unserialize(input)
  case input
    when nil then nil
    else self.new(input).read_value
  end
end

Instance Method Details

#read_commentBoolean

Returns:

  • (Boolean)

See Also:



192
193
194
# File 'lib/rdf/ntriples/reader.rb', line 192

def read_comment
  match(COMMENT)
end

#read_literalRDF::Literal

Returns:

See Also:



222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
# File 'lib/rdf/ntriples/reader.rb', line 222

def read_literal
  if literal_str = match(LITERAL_PLAIN)
    literal_str = self.class.unescape(literal_str)
    literal = case
      when language = match(LANGUAGE_TAG)
        RDF::Literal.new(literal_str, :language => language)
      when datatype = match(/^(\^\^)/) # FIXME
        RDF::Literal.new(literal_str, :datatype => read_uriref || fail_object)
      else
        RDF::Literal.new(literal_str) # plain string literal
    end
    literal.validate!     if validate?
    literal.canonicalize! if canonicalize?
    literal
  end
end

#read_nodeRDF::Node

Returns:

See Also:



212
213
214
215
216
217
# File 'lib/rdf/ntriples/reader.rb', line 212

def read_node
  if node_id = match(NODEID)
    @nodes ||= {}
    @nodes[node_id] ||= RDF::Node.new(node_id)
  end
end

#read_tripleArray



170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/rdf/ntriples/reader.rb', line 170

def read_triple
  loop do
    readline.strip! # EOFError thrown on end of input
    line = @line    # for backtracking input in case of parse error

    begin
      unless blank? || read_comment
        subject   = read_uriref || read_node || fail_subject
        predicate = read_uriref(:intern => true) || fail_predicate
        object    = read_uriref || read_node || read_literal || fail_object
        return [subject, predicate, object]
      end
    rescue RDF::ReaderError => e
      @line = line  # this allows #read_value to work
      raise e
    end
  end
end

#read_uriref(options = {}) ⇒ RDF::URI

Returns:

See Also:



199
200
201
202
203
204
205
206
207
# File 'lib/rdf/ntriples/reader.rb', line 199

def read_uriref(options = {})
  if uri_str = match(URIREF)
    uri_str = self.class.unescape(uri_str)
    uri = RDF::URI.send(intern? && options[:intern] ? :intern : :new, uri_str)
    uri.validate!     if validate?
    uri.canonicalize! if canonicalize?
    uri
  end
end

#read_valueRDF::Term

Returns:



159
160
161
162
163
164
165
# File 'lib/rdf/ntriples/reader.rb', line 159

def read_value
  begin
    read_statement
  rescue RDF::ReaderError => e
    read_uriref || read_node || read_literal
  end
end