Class: OpenTox::Parser::Owl::Dataset
- Inherits:
-
Object
- Object
- OpenTox::Parser::Owl::Dataset
- Includes:
- OpenTox::Parser::Owl
- Defined in:
- lib/parser.rb
Overview
OWL-DL parser for datasets
Instance Attribute Summary collapse
-
#uri ⇒ Object
writeonly
Sets the attribute uri.
Instance Method Summary collapse
-
#initialize(uri, subjectid = nil) ⇒ OpenTox::Parser::Owl::Dataset
constructor
Create a new OWL-DL dataset parser.
-
#load_features(subjectid = nil) ⇒ Hash
Read only features from a dataset service.
-
#load_uri(subjectid = nil) ⇒ Hash
Read data from dataset service.
Methods included from OpenTox::Parser::Owl
Constructor Details
#initialize(uri, subjectid = nil) ⇒ OpenTox::Parser::Owl::Dataset
Create a new OWL-DL dataset parser
146 147 148 149 |
# File 'lib/parser.rb', line 146 def initialize(uri, subjectid=nil) super uri @dataset = ::OpenTox::Dataset.new(@uri, subjectid) end |
Instance Attribute Details
#uri=(value) ⇒ Object (writeonly)
Sets the attribute uri
141 142 143 |
# File 'lib/parser.rb', line 141 def uri=(value) @uri = value end |
Instance Method Details
#load_features(subjectid = nil) ⇒ Hash
Read only features from a dataset service.
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 |
# File 'lib/parser.rb', line 239 def load_features(subjectid=nil) if File.exist?(@uri) file = File.new(@uri) else file = Tempfile.new("ot-rdfxml") # do not concat /features to uri string, this would not work for dataset/R401577?max=3 uri = URI::parse(@uri) # PENDING # ambit models return http://host/dataset/id?feature_uris[]=sth but # amibt dataset services does not support http://host/dataset/id/features?feature_uris[]=sth # and features are not inlcuded in http://host/dataset/id/features # -> load features from complete dataset uri.path = File.join(uri.path,"features") unless @uri=~/\?(feature_uris|page|pagesize)/ uri = uri.to_s file.puts OpenTox::RestClientWrapper.get uri,{:subjectid => subjectid,:accept => "application/rdf+xml"},nil,false file.close to_delete = file.path end statements = [] features = Set.new `rapper -i rdfxml -o ntriples #{file.path} 2>/dev/null`.each_line do |line| triple = line.chomp.split('> ').collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')}[0..2] statements << triple features << triple[0] if triple[1] == RDF.type and (triple[2] =~ /Feature|Substructure/) end File.delete(to_delete) if to_delete statements.each do |triple| if features.include? triple[0] @dataset.features[triple[0]] = {} unless @dataset.features[triple[0]] if triple[1] == RDF.type @dataset.features[triple[0]][triple[1]] = [] unless @dataset.features[triple[0]][triple[1]] @dataset.features[triple[0]][triple[1]] << triple[2].split('^^').first else @dataset.features[triple[0]][triple[1]] = triple[2].split('^^').first end end end @dataset.features end |
#load_uri(subjectid = nil) ⇒ Hash
Read data from dataset service. Files can be parsed by setting #uri to a filename (after initialization with a real URI)
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 |
# File 'lib/parser.rb', line 162 def load_uri(subjectid=nil) # avoid using rapper directly because of 2 reasons: # * http errors wont be noticed # * subjectid cannot be sent as header ##uri += "?subjectid=#{CGI.escape(subjectid)}" if subjectid ##`rapper -i rdfxml -o ntriples #{file} 2>/dev/null`.each_line do |line| if File.exist?(@uri) file = File.new(@uri) else file = Tempfile.new("ot-rdfxml") file.puts OpenTox::RestClientWrapper.get @uri,{:subjectid => subjectid,:accept => "application/rdf+xml"},nil,false file.close to_delete = file.path end data = {} feature_values = {} feature = {} feature_accept_values = {} other_statements = {} `rapper -i rdfxml -o ntriples #{file.path} 2>/dev/null`.each_line do |line| triple = line.chomp.split(' ',3) triple = triple[0..2].collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')} case triple[1] when /#{OT.values}/i data[triple[0]] = {:compound => "", :values => []} unless data[triple[0]] data[triple[0]][:values] << triple[2] when /#{OT.value}/i feature_values[triple[0]] = triple[2] when /#{OT.compound}/i data[triple[0]] = {:compound => "", :values => []} unless data[triple[0]] data[triple[0]][:compound] = triple[2] when /#{OT.feature}/i feature[triple[0]] = triple[2] when /#{RDF.type}/i if triple[2]=~/#{OT.Compound}/i and !data[triple[0]] data[triple[0]] = {:compound => triple[0], :values => []} end when /#{OT.acceptValue}/i # acceptValue in ambit datasets is only provided in dataset/<id> no in dataset/<id>/features feature_accept_values[triple[0]] = [] unless feature_accept_values[triple[0]] feature_accept_values[triple[0]] << triple[2] else end end File.delete(to_delete) if to_delete data.each do |id,entry| if entry[:values].size==0 # no feature values add plain compounds @dataset.add_compound(entry[:compound]) else entry[:values].each do |value_id| if feature_values[value_id] split = feature_values[value_id].split(/\^\^/) case split[-1] when XSD.double, XSD.float value = split.first.to_f when XSD.boolean value = split.first=~/(?i)true/ ? true : false else value = split.first end end @dataset.add entry[:compound],feature[value_id],value end end end load_features subjectid feature_accept_values.each do |feature, values| @dataset.features[feature][OT.acceptValue] = values end @dataset. = (subjectid) @dataset end |