Class: TensorStream::Evaluator::OpenclEvaluator
- Inherits:
-
BaseEvaluator
- Object
- BaseEvaluator
- TensorStream::Evaluator::OpenclEvaluator
- Includes:
- ArrayOpsHelper, CLEventHelpers, MathHelper, OpHelper, OpenCLHelpers::ArrayOps, OpenCLHelpers::ImagesOps, OpenCLHelpers::MathOps, OpenCLHelpers::NNOps, OpenCLHelpers::RandomOps
- Defined in:
- lib/tensor_stream/opencl/opencl_evaluator.rb
Overview
OpenCL hardware accelerated evaluator
Constant Summary
Constants included from OpenCLHelpers::RandomOps
OpenCLHelpers::RandomOps::RAND_TABLE_SIZE
Instance Attribute Summary collapse
-
#context ⇒ Object
writeonly
Sets the attribute context.
-
#opencl_context ⇒ Object
readonly
Returns the value of attribute opencl_context.
-
#opencl_device ⇒ Object
readonly
Returns the value of attribute opencl_device.
-
#retain ⇒ Object
Returns the value of attribute retain.
Class Method Summary collapse
-
.default_device ⇒ Object
Select the best device available in the system for this evaluator.
- .fetch_device(query = []) ⇒ Object
- .getset_global_opencl_context(platform) ⇒ Object
- .opencl_to_device(dev) ⇒ Object
- .query_devices_with_score ⇒ Object
- .query_supported_devices ⇒ Object
Instance Method Summary collapse
- #complete_eval(tensor, context) ⇒ Object
-
#convert_from_buffer(tensor, result) ⇒ Object
buffer comes from non-opencl evaluator.
-
#enqueue_buffer_read(tensor, context) ⇒ Object
Generate OpenCL instruction to read back from GPU memory to Host memory for a tensor.
-
#initialize(session, device, thread_pool: nil, log_intermediates: false) ⇒ OpenclEvaluator
constructor
A new instance of OpenclEvaluator.
-
#run(tensor, execution_context) ⇒ Object
opencl evaluator main entrypoint.
- #run_with_buffer(tensor, context, execution_context) ⇒ Object
Methods included from CLEventHelpers
Methods included from OpenCLHelpers::RandomOps
Methods included from OpenCLHelpers::ArrayOps
Methods included from OpenCLHelpers::ImagesOps
Methods included from OpenCLHelpers::NNOps
Methods included from OpenCLHelpers::MathOps
Constructor Details
#initialize(session, device, thread_pool: nil, log_intermediates: false) ⇒ OpenclEvaluator
Returns a new instance of OpenclEvaluator.
56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 56 def initialize(session, device, thread_pool: nil, log_intermediates: false) super _create_opencl_context @opencl_device = device.native_device @max_work_item_dimensions = @opencl_device.max_work_item_dimensions @max_work_item_sizes = @opencl_device.max_work_item_sizes @max_work_group_size = @opencl_device.max_work_group_size @local_mem_size = @opencl_device.local_mem_size @device_type = @opencl_device.type.to_s.downcase create_command_queue end |
Instance Attribute Details
#context=(value) ⇒ Object (writeonly)
Sets the attribute context
44 45 46 |
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 44 def context=(value) @context = value end |
#opencl_context ⇒ Object (readonly)
Returns the value of attribute opencl_context.
43 44 45 |
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 43 def opencl_context @opencl_context end |
#opencl_device ⇒ Object (readonly)
Returns the value of attribute opencl_device.
43 44 45 |
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 43 def opencl_device @opencl_device end |
#retain ⇒ Object
Returns the value of attribute retain.
42 43 44 |
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 42 def retain @retain end |
Class Method Details
.default_device ⇒ Object
Select the best device available in the system for this evaluator
100 101 102 103 104 |
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 100 def default_device devices = OpenclEvaluator.query_devices_with_score device = devices.max { |a, b| a[1] <=> b[1] } opencl_to_device(device) end |
.fetch_device(query = []) ⇒ Object
79 80 81 82 83 |
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 79 def fetch_device(query = []) devices = query_devices_with_score platform_devices = devices.select { |d| d[0].platform.to_s.tr(' ', '_').downcase =~ /#{query[0].downcase}/ } opencl_to_device(platform_devices[[query[1].to_i, platform_devices.size - 1].min]) end |
.getset_global_opencl_context(platform) ⇒ Object
106 107 108 109 110 |
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 106 def getset_global_opencl_context(platform) @global_opencl_context ||= {} @global_opencl_context[platform] ||= yield @global_opencl_context[platform] end |
.opencl_to_device(dev) ⇒ Object
85 86 87 88 89 90 91 92 93 94 95 96 |
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 85 def opencl_to_device(dev) device = dev[0] index = dev[3] platform_name = device.platform.name.tr(' ', '_').downcase uri = [platform_name, index].join(':') device_type = device.type.to_s == 'GPU' ? :gpu : :cpu OpenclDevice.new(uri, device_type, self).tap do |d| d.native_device = device end end |
.query_devices_with_score ⇒ Object
191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 191 def self.query_devices_with_score OpenCL.platforms.flat_map do |p| p.devices.select { |d| d.available > 0 }.each_with_index.collect do |d, index| score = 0 if d.type.to_s == 'CPU' score += 1 elsif d.type.to_s == 'GPU' score += 4 end score += 1000 if d.platform.name == 'NVIDIA CUDA' score += d.max_compute_units * d.max_clock_frequency [d, score, p.name, index] end end end |
.query_supported_devices ⇒ Object
72 73 74 75 76 77 |
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 72 def query_supported_devices devices = query_devices_with_score devices.sort_by { |a| a[1] }.map do |d| opencl_to_device(d) end end |
Instance Method Details
#complete_eval(tensor, context) ⇒ Object
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 173 def complete_eval(tensor, context) return nil if tensor.nil? buffers = if tensor.is_a?(Array) tensor.map { |t| enqueue_buffer_read(t, context) } else [enqueue_buffer_read(tensor, context)] end events = build_event_wait_list(buffers) # puts "** wait #{tensor.name} **" OpenCL.wait_for_events(events) unless events.empty? # puts "** done #{tensor.name} **" tensor.is_a?(Array) ? buffers : buffers.first end |
#convert_from_buffer(tensor, result) ⇒ Object
buffer comes from non-opencl evaluator
138 139 140 141 142 143 144 145 146 147 |
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 138 def convert_from_buffer(tensor, result) if result.buffer.is_a?(TensorStream::Evaluator::OutputGroup) converted_outputs = result.buffer.outputs.zip(result.buffer.data_types).map do |output, data_type| convert_to_opencl([output].flatten, shape_eval(output), data_type: data_type, name: tensor.name) end TensorStream::Evaluator::OutputGroup.new(converted_outputs, result.buffer.data_types) else convert_to_opencl([result.buffer].flatten, shape_eval(result.buffer), data_type: result.data_type, name: tensor.name) end end |
#enqueue_buffer_read(tensor, context) ⇒ Object
Generate OpenCL instruction to read back from GPU memory to Host memory for a tensor
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 150 def enqueue_buffer_read(tensor, context) buffer = _run(tensor, context) if buffer.is_a?(Array) buffer.collect do |b| next b if b.buffer.size.zero? b.op = _opencl_queue.enqueue_read_buffer(b.cl_buffer, b.buffer, event_wait_list: build_event_wait_list([b])) b end else return buffer.outputs[0] if buffer.is_a?(OutputGroup) return buffer if buffer.nil? return [] if buffer.buffer.nil? return buffer if buffer.buffer.size.zero? # lazy allocate buffer.buffer = OpenCLBuffer.allocate_narray_for_type(buffer.buffer.data_type, buffer.buffer.size) if buffer.buffer.is_a?(OpenCLBuffer::LazyBuffer) buffer.op = _opencl_queue.enqueue_read_buffer(buffer.cl_buffer, buffer.buffer, event_wait_list: build_event_wait_list([buffer])) buffer end end |
#run(tensor, execution_context) ⇒ Object
opencl evaluator main entrypoint
114 115 116 117 118 119 120 |
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 114 def run(tensor, execution_context) result = complete_eval(tensor, execution_context) # puts "-------------------wait finish------------------------" _opencl_queue.finish # puts "-------------------done finish------------------------" read_final_result(result) end |
#run_with_buffer(tensor, context, execution_context) ⇒ Object
122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 122 def run_with_buffer(tensor, context, execution_context) @context = context @context[:_cache][:_cl_buffers] ||= {} if context[:_cache] if tensor.is_a?(Array) tensor.collect do |t| value = run(t, execution_context) Buffer.new(data_type: t.data_type, buffer: value) end else value = run(tensor, execution_context) Buffer.new(data_type: tensor.data_type, buffer: value) end end |