Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
iperov
GitHub Repository: iperov/deepfacelab
Path: blob/master/core/leras/device.py
628 views
1
import sys
2
import ctypes
3
import os
4
import multiprocessing
5
import json
6
import time
7
from pathlib import Path
8
from core.interact import interact as io
9
10
11
class Device(object):
12
def __init__(self, index, tf_dev_type, name, total_mem, free_mem):
13
self.index = index
14
self.tf_dev_type = tf_dev_type
15
self.name = name
16
17
self.total_mem = total_mem
18
self.total_mem_gb = total_mem / 1024**3
19
self.free_mem = free_mem
20
self.free_mem_gb = free_mem / 1024**3
21
22
def __str__(self):
23
return f"[{self.index}]:[{self.name}][{self.free_mem_gb:.3}/{self.total_mem_gb :.3}]"
24
25
class Devices(object):
26
all_devices = None
27
28
def __init__(self, devices):
29
self.devices = devices
30
31
def __len__(self):
32
return len(self.devices)
33
34
def __getitem__(self, key):
35
result = self.devices[key]
36
if isinstance(key, slice):
37
return Devices(result)
38
return result
39
40
def __iter__(self):
41
for device in self.devices:
42
yield device
43
44
def get_best_device(self):
45
result = None
46
idx_mem = 0
47
for device in self.devices:
48
mem = device.total_mem
49
if mem > idx_mem:
50
result = device
51
idx_mem = mem
52
return result
53
54
def get_worst_device(self):
55
result = None
56
idx_mem = sys.maxsize
57
for device in self.devices:
58
mem = device.total_mem
59
if mem < idx_mem:
60
result = device
61
idx_mem = mem
62
return result
63
64
def get_device_by_index(self, idx):
65
for device in self.devices:
66
if device.index == idx:
67
return device
68
return None
69
70
def get_devices_from_index_list(self, idx_list):
71
result = []
72
for device in self.devices:
73
if device.index in idx_list:
74
result += [device]
75
return Devices(result)
76
77
def get_equal_devices(self, device):
78
device_name = device.name
79
result = []
80
for device in self.devices:
81
if device.name == device_name:
82
result.append (device)
83
return Devices(result)
84
85
def get_devices_at_least_mem(self, totalmemsize_gb):
86
result = []
87
for device in self.devices:
88
if device.total_mem >= totalmemsize_gb*(1024**3):
89
result.append (device)
90
return Devices(result)
91
92
@staticmethod
93
def _get_tf_devices_proc(q : multiprocessing.Queue):
94
95
if sys.platform[0:3] == 'win':
96
compute_cache_path = Path(os.environ['APPDATA']) / 'NVIDIA' / ('ComputeCache_ALL')
97
os.environ['CUDA_CACHE_PATH'] = str(compute_cache_path)
98
if not compute_cache_path.exists():
99
io.log_info("Caching GPU kernels...")
100
compute_cache_path.mkdir(parents=True, exist_ok=True)
101
102
import tensorflow
103
104
tf_version = tensorflow.version.VERSION
105
#if tf_version is None:
106
# tf_version = tensorflow.version.GIT_VERSION
107
if tf_version[0] == 'v':
108
tf_version = tf_version[1:]
109
if tf_version[0] == '2':
110
tf = tensorflow.compat.v1
111
else:
112
tf = tensorflow
113
114
import logging
115
# Disable tensorflow warnings
116
tf_logger = logging.getLogger('tensorflow')
117
tf_logger.setLevel(logging.ERROR)
118
119
from tensorflow.python.client import device_lib
120
121
devices = []
122
123
physical_devices = device_lib.list_local_devices()
124
physical_devices_f = {}
125
for dev in physical_devices:
126
dev_type = dev.device_type
127
dev_tf_name = dev.name
128
dev_tf_name = dev_tf_name[ dev_tf_name.index(dev_type) : ]
129
130
dev_idx = int(dev_tf_name.split(':')[-1])
131
132
if dev_type in ['GPU','DML']:
133
dev_name = dev_tf_name
134
135
dev_desc = dev.physical_device_desc
136
if len(dev_desc) != 0:
137
if dev_desc[0] == '{':
138
dev_desc_json = json.loads(dev_desc)
139
dev_desc_json_name = dev_desc_json.get('name',None)
140
if dev_desc_json_name is not None:
141
dev_name = dev_desc_json_name
142
else:
143
for param, value in ( v.split(':') for v in dev_desc.split(',') ):
144
param = param.strip()
145
value = value.strip()
146
if param == 'name':
147
dev_name = value
148
break
149
150
physical_devices_f[dev_idx] = (dev_type, dev_name, dev.memory_limit)
151
152
q.put(physical_devices_f)
153
time.sleep(0.1)
154
155
156
@staticmethod
157
def initialize_main_env():
158
if int(os.environ.get("NN_DEVICES_INITIALIZED", 0)) != 0:
159
return
160
161
if 'CUDA_VISIBLE_DEVICES' in os.environ.keys():
162
os.environ.pop('CUDA_VISIBLE_DEVICES')
163
164
os.environ['TF_DIRECTML_KERNEL_CACHE_SIZE'] = '2500'
165
os.environ['CUDA_​CACHE_​MAXSIZE'] = '2147483647'
166
os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '2'
167
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # tf log errors only
168
169
q = multiprocessing.Queue()
170
p = multiprocessing.Process(target=Devices._get_tf_devices_proc, args=(q,), daemon=True)
171
p.start()
172
p.join()
173
174
visible_devices = q.get()
175
176
os.environ['NN_DEVICES_INITIALIZED'] = '1'
177
os.environ['NN_DEVICES_COUNT'] = str(len(visible_devices))
178
179
for i in visible_devices:
180
dev_type, name, total_mem = visible_devices[i]
181
182
os.environ[f'NN_DEVICE_{i}_TF_DEV_TYPE'] = dev_type
183
os.environ[f'NN_DEVICE_{i}_NAME'] = name
184
os.environ[f'NN_DEVICE_{i}_TOTAL_MEM'] = str(total_mem)
185
os.environ[f'NN_DEVICE_{i}_FREE_MEM'] = str(total_mem)
186
187
188
189
@staticmethod
190
def getDevices():
191
if Devices.all_devices is None:
192
if int(os.environ.get("NN_DEVICES_INITIALIZED", 0)) != 1:
193
raise Exception("nn devices are not initialized. Run initialize_main_env() in main process.")
194
devices = []
195
for i in range ( int(os.environ['NN_DEVICES_COUNT']) ):
196
devices.append ( Device(index=i,
197
tf_dev_type=os.environ[f'NN_DEVICE_{i}_TF_DEV_TYPE'],
198
name=os.environ[f'NN_DEVICE_{i}_NAME'],
199
total_mem=int(os.environ[f'NN_DEVICE_{i}_TOTAL_MEM']),
200
free_mem=int(os.environ[f'NN_DEVICE_{i}_FREE_MEM']), )
201
)
202
Devices.all_devices = Devices(devices)
203
204
return Devices.all_devices
205
206
"""
207
208
209
# {'name' : name.split(b'\0', 1)[0].decode(),
210
# 'total_mem' : totalMem.value
211
# }
212
213
214
215
216
217
return
218
219
220
221
222
min_cc = int(os.environ.get("TF_MIN_REQ_CAP", 35))
223
libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll')
224
for libname in libnames:
225
try:
226
cuda = ctypes.CDLL(libname)
227
except:
228
continue
229
else:
230
break
231
else:
232
return Devices([])
233
234
nGpus = ctypes.c_int()
235
name = b' ' * 200
236
cc_major = ctypes.c_int()
237
cc_minor = ctypes.c_int()
238
freeMem = ctypes.c_size_t()
239
totalMem = ctypes.c_size_t()
240
241
result = ctypes.c_int()
242
device = ctypes.c_int()
243
context = ctypes.c_void_p()
244
error_str = ctypes.c_char_p()
245
246
devices = []
247
248
if cuda.cuInit(0) == 0 and \
249
cuda.cuDeviceGetCount(ctypes.byref(nGpus)) == 0:
250
for i in range(nGpus.value):
251
if cuda.cuDeviceGet(ctypes.byref(device), i) != 0 or \
252
cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name), device) != 0 or \
253
cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device) != 0:
254
continue
255
256
if cuda.cuCtxCreate_v2(ctypes.byref(context), 0, device) == 0:
257
if cuda.cuMemGetInfo_v2(ctypes.byref(freeMem), ctypes.byref(totalMem)) == 0:
258
cc = cc_major.value * 10 + cc_minor.value
259
if cc >= min_cc:
260
devices.append ( {'name' : name.split(b'\0', 1)[0].decode(),
261
'total_mem' : totalMem.value,
262
'free_mem' : freeMem.value,
263
'cc' : cc
264
})
265
cuda.cuCtxDetach(context)
266
267
os.environ['NN_DEVICES_COUNT'] = str(len(devices))
268
for i, device in enumerate(devices):
269
os.environ[f'NN_DEVICE_{i}_NAME'] = device['name']
270
os.environ[f'NN_DEVICE_{i}_TOTAL_MEM'] = str(device['total_mem'])
271
os.environ[f'NN_DEVICE_{i}_FREE_MEM'] = str(device['free_mem'])
272
os.environ[f'NN_DEVICE_{i}_CC'] = str(device['cc'])
273
"""
274