Class: ComputeUnit::NvidiaGpu
- Inherits:
-
Gpu
- Object
- Device
- ComputeBase
- Gpu
- ComputeUnit::NvidiaGpu
- Defined in:
- lib/compute_unit/gpus/nvidia_gpu.rb
Constant Summary collapse
- VENDOR_ID =
'10de'
- MAKE =
'Nvidia'
- SUBTYPE =
'nvidia'
- NVIDIA_SMI =
'/usr/bin/nvidia-smi'
- NVIDIA_PROC_PATH =
ENV['NVIDIA_PROC_PATH'] || File.join(ComputeUnit::Device::PROC_PATH, 'driver', 'nvidia', 'gpus')
Constants inherited from Gpu
Gpu::DEVICE_CLASS, Gpu::DEVICE_CLASS_NAME
Constants inherited from ComputeBase
Constants inherited from Device
Device::PROC_PATH, Device::SYSFS_DEVICES_PATH
Instance Attribute Summary
Attributes inherited from Gpu
#bios, #name, #pci_loc, #use_opencl
Attributes inherited from ComputeBase
#compute_type, #index, #power_offset, #serial, #timestamp, #type, #uuid
Attributes inherited from Device
#device_class_id, #device_id, #device_path, #device_vendor_id, #make, #model, #subsystem_device_id, #subsystem_vendor_id, #vendor
Class Method Summary collapse
- .blank_data ⇒ Object
- .create_from_path(device_path, index, use_opencl = false) ⇒ Object
-
.devices ⇒ Array
-
returns a list of device paths of all devices specific to the vendor id.
-
-
.find_all(use_opencl = false) ⇒ Array
-
returns and array of gpu instances of NVIDIA type only.
-
-
.read_information_file(device_path) ⇒ Hash
GTX 1070”, :irq=>“130”, :gpu_uuid=>“GPU-0116fb5c-66f4-1cba-c216-97f4600a8152”, :video_bios=>“86.04.50.40.4a”, :bus_type=>“PCIe”, :dma_size=>“47 bits”, :dma_mask=>“0x7fffffffffff”, :bus_location=>“0000:0d:00.0”, :device_minor=>“7”.
Instance Method Summary collapse
-
#core_clock ⇒ Integer
The current core clock speed.
-
#fan ⇒ Integer
The fan speed.
- #information_file ⇒ Object
-
#initialize(device_path, opts = {}) ⇒ NvidiaGpu
constructor
A new instance of NvidiaGpu.
-
#memory_clock ⇒ Integer
The current memory clock speed.
- #memory_free ⇒ Object
- #memory_total ⇒ Object
- #memory_used ⇒ Object
-
#meta ⇒ Hash
return cached data or fetch new data.
-
#metadata ⇒ Hash
“memory.used [MiB]”: “2578 MiB”, “memory.free [MiB]”: “5534 MiB”, “memory.total [MiB]”: “8112 MiB”, “utilization.gpu [%]”: “100”, “temperature.gpu”: “53”, “power.draw [W]”: “129.21”, “power.limit [W]”: “130.00”, “power.max_limit [W]”: “217.00”, “pstate”: 2, “fan.speed [%]”: “75”.
-
#power ⇒ Float
The power being used by the gpu.
- #power_limit ⇒ Object
- #power_limit=(value) ⇒ Object
- #power_max_limit ⇒ Object
- #pstate ⇒ Object
- #reset_metadata ⇒ Object
-
#set_fan_limit(_value, _type = 'current') ⇒ Numeric
-
original passed in value after being set.
-
- #set_mem_clock_and_vddc(_mem_clock, _mem_volt) ⇒ Object
- #subtype ⇒ Object
- #temp ⇒ Object
- #utilization ⇒ Object
Methods inherited from Gpu
#asic_temp, attached_processes, #compute_type, #configured_core_voltage, #core_voltage, #fan_limit, #fan_max_limit, #fan_min_limit, found_devices, #hardware_info, #mem_info, #mem_temp, #memory_volt, #opencl_board_name, opencl_cache, #opencl_device, opencl_devices, opencl_devices_from_cache, opencl_devices_from_platform, #opencl_name, #opencl_units, #status, #status_info, #to_h, #vddgfx, #voltage_table
Methods inherited from ComputeBase
#attached_processes, compute_classes, #device_class_name, #expired_metadata?, #top_processes
Methods included from Logger
color, log_file, log_level, logger, #logger
Methods inherited from Device
#base_hwmon_path, device, device_class, device_lookup, device_vendor, #expired_metadata?, #generic_model, #hwmon_path, #lock_rom, logger, manual_device_database, manual_device_lookup, manual_vendor_lookup, manual_vendors, name_map, name_translation, pci_database, #read_file, #read_hwmon_data, #read_kernel_setting, read_kernel_setting, #rom_data, #rom_path, subsystem_device, subsystem_device_lookup, subsystem_vendor, subsystem_vendor_lookup, #sysfs_model_name, system_checksum, #to_h, #to_json, #unlock_rom, vendor_lookup, #write_hwmon_data, #write_kernel_setting, write_kernel_setting
Methods included from Utils
Constructor Details
#initialize(device_path, opts = {}) ⇒ NvidiaGpu
Returns a new instance of NvidiaGpu.
13 14 15 16 17 18 19 20 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 13 def initialize(device_path, opts = {}) data = self.class.read_information_file(device_path).merge(opts) data[:pci_loc] = device_path data[:busid] = data[:bus_location] data[:bios] = data[:video_bios].upcase if data[:video_bios] data[:uuid] = data[:gpu_uuid] super(device_path, data) end |
Class Method Details
.blank_data ⇒ Object
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 41 def self.blank_data @blank_data ||= { 'memory.used [MiB]' => '0', 'memory.free [MiB]' => '0', 'memory.total [MiB]' => '0', 'utilization.gpu [%]' => '0', 'temperature.gpu' => '0', 'power.draw [W]' => '0', 'power.limit [W]' => '0', 'power.max_limit [W]' => '0', 'pstate' => 7, 'fan.speed [%]' => '0', 'clocks.current.memory [MHz]' => '0', 'clocks.current.sm [MHz]' => '0' } end |
.create_from_path(device_path, index, use_opencl = false) ⇒ Object
174 175 176 177 178 179 180 181 182 183 184 185 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 174 def self.create_from_path(device_path, index, use_opencl = false) opts = { device_class_id: device_class(device_path), device_id: device(device_path), device_vendor_id: device_vendor(device_path), subsystem_vendor_id: subsystem_vendor(device_path), subsystem_device_id: subsystem_device(device_path), use_opencl: use_opencl, index: index } new(device_path, opts) end |
.devices ⇒ Array
Returns - returns a list of device paths of all devices specific to the vendor id.
168 169 170 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 168 def self.devices ComputeUnit::Gpu.devices.find_all { |f| device_vendor(f) == VENDOR_ID } end |
.find_all(use_opencl = false) ⇒ Array
Returns - returns and array of gpu instances of NVIDIA type only.
188 189 190 191 192 193 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 188 def self.find_all(use_opencl = false) devices.map.with_index do |device_path, _index| found_index = ComputeUnit::Gpu.found_devices.index(device_path) create_from_path(device_path, found_index, use_opencl) end end |
.read_information_file(device_path) ⇒ Hash
GTX 1070”,
:irq=>"130",
:gpu_uuid=>"GPU-0116fb5c-66f4-1cba-c216-97f4600a8152",
:video_bios=>"86.04.50.40.4a",
:bus_type=>"PCIe",
:dma_size=>"47 bits",
:dma_mask=>"0x7fffffffffff",
:bus_location=>"0000:0d:00.0",
:device_minor=>"7"
211 212 213 214 215 216 217 218 219 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 211 def self.read_information_file(device_path) device_name = File.basename(device_path) information_file = File.join(NVIDIA_PROC_PATH, device_name, 'information') File.open(information_file, 'r') do |file| content = file.read content.scan(/\n?([\w\s]*):\s+(.*)/).map { |key, value| [key.downcase.tr(' ', '_').to_sym, value] }.to_h end end |
Instance Method Details
#core_clock ⇒ Integer
Returns the current core clock speed.
94 95 96 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 94 def core_clock ['clocks.current.sm [MHz]'].to_i end |
#fan ⇒ Integer
Returns the fan speed.
99 100 101 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 99 def fan ['fan.speed [%]'].to_i end |
#information_file ⇒ Object
160 161 162 163 164 165 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 160 def information_file @information_file ||= begin device_name = File.basename(device_path) File.join(NVIDIA_PROC_PATH, device_name, 'information') end end |
#memory_clock ⇒ Integer
Returns the current memory clock speed.
89 90 91 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 89 def memory_clock ['clocks.current.memory [MHz]'].to_i end |
#memory_free ⇒ Object
152 153 154 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 152 def memory_free ['memory.free [MiB]'] end |
#memory_total ⇒ Object
144 145 146 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 144 def memory_total ['memory.total [MiB]'] end |
#memory_used ⇒ Object
148 149 150 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 148 def memory_used ['memory.used [MiB]'] end |
#meta ⇒ Hash
return cached data or fetch new data
32 33 34 35 36 37 38 39 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 32 def if logger.debug("Expired Nvidia Data for #{uuid} ") @meta = else @meta ||= end end |
#metadata ⇒ Hash
data returned from nvidia-smi
“memory.used [MiB]”: “2578 MiB”,
"memory.free [MiB]": "5534 MiB",
"memory.total [MiB]": "8112 MiB",
"utilization.gpu [%]": "100",
"temperature.gpu": "53",
"power.draw [W]": "129.21",
"power.limit [W]": "130.00",
"power.max_limit [W]": "217.00",
"pstate": 2,
"fan.speed [%]": "75"
73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 73 def logger.debug("Calling #{NVIDIA_SMI}") data = `#{NVIDIA_SMI} --query-gpu=gpu_name,vbios_version,uuid,memory.used,memory.free,memory.total,utilization.gpu,temperature.gpu,power.draw,power.limit,power.max_limit,fan.speed,pstate,clocks.current.memory,clocks.current.sm -i #{index} --format=csv,nounits 2>&1` unless $CHILD_STATUS.success? # error code 15 logger.error(data.delete("\n")) return self.class.blank_data end cards = if data CSV.parse(data, headers: true, header_converters: ->(f) { f.strip }, converters: ->(f) { f ? f.strip : nil }).map(&:to_h) end cards.first end |
#power ⇒ Float
Returns the power being used by the gpu.
104 105 106 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 104 def power ['power.draw [W]'].strip.to_f + power_offset end |
#power_limit ⇒ Object
116 117 118 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 116 def power_limit ['power.limit [W]'].strip.to_f end |
#power_limit=(value) ⇒ Object
125 126 127 128 129 130 131 132 133 134 135 136 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 125 def power_limit=(value) # in the correct format and above 10 watts raise ArgumentError.new("Power value #{value.to_i} cannot exceed #{power_max_limit}") unless value.to_i.between?(1, power_max_limit.to_i) output = `#{NVIDIA_SMI} -i #{index} -pl #{value}` if $CHILD_STATUS.success? logger.info("GPU#{index} power set to #{value} Watts") else logger.warn("GPU#{index} failed setting power to #{value}\n#{output}") end value.to_i end |
#power_max_limit ⇒ Object
120 121 122 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 120 def power_max_limit ['power.max_limit [W]'].strip.to_f end |
#pstate ⇒ Object
112 113 114 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 112 def pstate ['pstate'].to_i end |
#reset_metadata ⇒ Object
26 27 28 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 26 def @meta = nil end |
#set_fan_limit(_value, _type = 'current') ⇒ Numeric
Returns - original passed in value after being set.
140 141 142 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 140 def set_fan_limit(_value, _type = 'current') raise NotImplementedError.new('Not implemented for Nvidia') end |
#set_mem_clock_and_vddc(_mem_clock, _mem_volt) ⇒ Object
195 196 197 198 199 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 195 def set_mem_clock_and_vddc(_mem_clock, _mem_volt) return unless experimental_on? logger.warn('Feature not enabled for nvidia') end |
#subtype ⇒ Object
22 23 24 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 22 def subtype SUBTYPE end |
#temp ⇒ Object
108 109 110 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 108 def temp ['temperature.gpu'].to_i end |
#utilization ⇒ Object
156 157 158 |
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 156 def utilization ['utilization.gpu [%]'].sub(/%/, '').to_i end |