Class: YARRRML_Transform

Inherits:
Object
  • Object
show all
Defined in:
lib/yarrrml_template_builder/yarrrml_transform.rb

Overview

note that SDMrdfizer needs to be running on port 4000 with the ./data folder mounted as /data docker run –name rdfizer –rm -d -p 4000:4000 -v $PWD/data:/data fairdatasystems/sdmrdfizer:0.1.0

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(params = {}) ⇒ YARRRML_Transform

Parameters:

  • params (Hash) (defaults to: {})

    a customizable set of options

Options Hash (params):

  • :outputrdffolder (String) — default: /data/triples/
    • defaults to /data/triples - this folder must exist, even if left to default. NOTE - this path is not relative to the host, it is relative to the docker rdfizer, so it begins with /data not ./data)



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 55

def initialize(params = {}) # get a name from the "new" call, or set a default
  
  @datatype_tag = params.fetch( :datatype_tag, nil)
  unless @datatype_tag
        warn "must have a datatype_tag parameter.  Aborting"
        self.failure = true
        return nil
  end
  
  @data_path_client = params.fetch(:data_path_client, "/data")   # from client perspective (default for docker)
  @data_path_server = params.fetch(:data_path_server, self.data_path_client)     # potentially from Docker image perspective (default for docker)
  @config_path_client = params.fetch(:config_path_client, "/config")  # from client perspective (default for docker)
  @config_path_server = params.fetch(:config_path_server, self.config_path_client)  # potentially from Docker image perspective (default for docker)

  @data_path_client.gsub!('/$', "")   # remove trailing slashes -I'll add them later if I need them
  @data_path_server.gsub!('/$', "")
  @config_path_client.gsub!('/$', "")
  @config_path_server.gsub!('/$', "")


  @rdfizer_base_url = params.fetch(:rdfizer_base_url, "http://rdfizer:4000")
  @yarrrml_transform_base_url = params.fetch(:yarrrml_transform_base_url, "http://yarrrml_transform:3000")
  @rdfizer_base_url.gsub!('/$', "")
  @yarrrml_transform_base_url.gsub!('/$', "")
  
  
  @formulation = params.fetch(:formulation, "csv")
  @datafile = params.fetch(:datafile, "#{self.data_path_server}/#{self.datatype_tag}.csv")
  @baseURI = params.fetch(:baseURI, ENV['baseURI'])
  @baseURI = "http://example.org/data/" unless @baseURI

  @outputrdffolder = params.fetch(:outputrdffolder, "#{self.data_path_server}/triples")
  @outputrmlfile = params.fetch(:outputrmlfile, "#{self.data_path_server}/#{self.datatype_tag}_rml.ttl")
  @yarrrmlfilename_client = params.fetch(:yarrrmlfilename, "#{self.data_path_client}/#{self.datatype_tag}_yarrrml.yaml")
  @yarrrmlfilename_server = params.fetch(:yarrrmlfilename, "#{self.data_path_server}/#{self.datatype_tag}_yarrrml.yaml")
  @yarrrmltemplate = params.fetch(:yarrrmltemplate, "#{self.config_path_client}/#{self.datatype_tag}_yarrrml_template.yaml")
  @inifile_client = params.fetch(:inifile_client, "#{self.data_path_client}/#{self.datatype_tag}.ini")
  @inifile_server = params.fetch(:inifile_server, "#{self.data_path_server}/#{self.datatype_tag}.ini")   # this will be a docker image in almost all cases
  
  
  write_ini()
  
  transform_template()
  
  return self

end

Instance Attribute Details

#baseURIObject

Returns the value of attribute baseURI.



17
18
19
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 17

def baseURI
  @baseURI
end

#config_path_clientObject

Returns the value of attribute config_path_client.



12
13
14
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 12

def config_path_client
  @config_path_client
end

#config_path_serverObject

Returns the value of attribute config_path_server.



11
12
13
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 11

def config_path_server
  @config_path_server
end

#data_path_clientObject

Returns the value of attribute data_path_client.



10
11
12
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 10

def data_path_client
  @data_path_client
end

#data_path_serverObject

Returns the value of attribute data_path_server.



9
10
11
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 9

def data_path_server
  @data_path_server
end

#datafileObject

Returns the value of attribute datafile.



16
17
18
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 16

def datafile
  @datafile
end

#datatype_tagObject

Returns the value of attribute datatype_tag.



15
16
17
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 15

def datatype_tag
  @datatype_tag
end

#failureObject

Returns the value of attribute failure.



27
28
29
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 27

def failure
  @failure
end

#formulationObject

Returns the value of attribute formulation.



23
24
25
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 23

def formulation
  @formulation
end

#inifile_clientObject

Returns the value of attribute inifile_client.



25
26
27
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 25

def inifile_client
  @inifile_client
end

#inifile_serverObject

Returns the value of attribute inifile_server.



26
27
28
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 26

def inifile_server
  @inifile_server
end

#outputrdffolderObject

Returns the value of attribute outputrdffolder.



24
25
26
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 24

def outputrdffolder
  @outputrdffolder
end

#outputrmlfileObject

Returns the value of attribute outputrmlfile.



22
23
24
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 22

def outputrmlfile
  @outputrmlfile
end

#rdfizer_base_urlObject

Returns the value of attribute rdfizer_base_url.



13
14
15
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 13

def rdfizer_base_url
  @rdfizer_base_url
end

#yarrrml_transform_base_urlObject

Returns the value of attribute yarrrml_transform_base_url.



14
15
16
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 14

def yarrrml_transform_base_url
  @yarrrml_transform_base_url
end

#yarrrmlfilename_clientObject

Returns the value of attribute yarrrmlfilename_client.



20
21
22
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 20

def yarrrmlfilename_client
  @yarrrmlfilename_client
end

#yarrrmlfilename_serverObject

Returns the value of attribute yarrrmlfilename_server.



21
22
23
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 21

def yarrrmlfilename_server
  @yarrrmlfilename_server
end

#yarrrmltemplateObject

Returns the value of attribute yarrrmltemplate.



19
20
21
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 19

def yarrrmltemplate
  @yarrrmltemplate
end

Instance Method Details

#make_fair_dataObject

Executes the CSV to RDF based on the RML

no parameters

executes the sdmrdfizer transformation using the .ini file created by the ‘initialize’ routine



173
174
175
176
177
178
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 173

def make_fair_data
  warn "making FAIR data with #{self.rdfizer_base_url}/graph_creation/#{self.inifile_server}"  # this is sdmrdfizer
  response = RestClient::Request.execute(method: :get, url: self.rdfizer_base_url + "/graph_creation" + self.inifile_server, timeout: 900000000)
  warn response.code
  warn "FAIR data is avaialable in .#{self.outputrdffolder}/#{self.datatype_tag}.nt"
end

#transform_templateObject



105
106
107
108
109
110
111
112
113
114
115
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 105

def transform_template()
# transform appropriate template with this data

  File.open(self.yarrrmltemplate, "r") {|f| @template = f.read}
  @template.gsub!("|||DATA|||", self.datafile)
  @template.gsub!("|||FORMULATION|||", self.formulation)
  @template.gsub!("|||BASE|||", self.baseURI)
  File.open(self.yarrrmlfilename_client, "w") {|f| f.puts @template}
  warn "Ready to yarrrml transform #{self.datatype_tag} from #{self.yarrrmlfilename_client} "

end

#write_ini(inifile = self.inifile_client, path = self.data_path_server, rdffolder = self.outputrdffolder, rmlfile = self.outputrmlfile, datatype = self.datatype_tag) ⇒ Object



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 120

def write_ini(inifile = self.inifile_client,
              path = self.data_path_server,
              rdffolder = self.outputrdffolder,
              rmlfile = self.outputrmlfile,
              datatype = self.datatype_tag)

  configfilecontent = <<CONFIG
[default]
main_directory: #{path}

[datasets]
number_of_datasets: 1
output_folder: #{rdffolder}
all_in_one_file: yes
remove_duplicate: yes
enrichment: yes
name: #{datatype}
ordered: yes
large_file: true

[dataset1]
name: #{datatype}
mapping: #{rmlfile}

CONFIG

  File.open(inifile, "w"){|f| f.puts configfilecontent}

end

#yarrrml_transformObject

Executes the yarrrml to rml transformation

no parameters



157
158
159
160
161
162
163
164
# File 'lib/yarrrml_template_builder/yarrrml_transform.rb', line 157

def yarrrml_transform
  warn "running docker yarrrml-parser:ejp-latest"
  #parser_start_string = "docker run -e PARSERIN=#{self.yarrrmlfilename} -e PARSEROUT=#{self.outputrmlfile} --rm --name yarrrml-parser -v $PWD/data:/data markw/yarrrml-parser-ejp:latest"
  warn "yarrrml to rml starting with: #{self.yarrrml_transform_base_url} PARSERIN=#{self.yarrrmlfilename_server} -e PARSEROUT=#{self.outputrmlfile}"
  #resp = RestClient.get("#{self.yarrrml_transform_base_url}/?parserin=#{self.yarrrmlfilename_server}&parserout=#{self.outputrmlfile}")
  resp = RestClient::Request.execute(method: :get, url: "#{self.yarrrml_transform_base_url}/?parserin=#{self.yarrrmlfilename_server}&parserout=#{self.outputrmlfile}", timeout: 9000000)
  warn "#{resp}: rml file has been created in #{self.outputrmlfile} - ready to make FAIR data"
end