CoCalc -- device.py

GitHub Repository: iperov/deepfacelab
Path: blob/master/core/leras/device.py
⁶²⁸ views
1
import sys
2
import ctypes
3
import os
4
import multiprocessing
5
import json
6
import time
7
from pathlib import Path
8
from core.interact import interact as io
9

10

11
class Device(object):
12
    def __init__(self, index, tf_dev_type, name, total_mem, free_mem):
13
        self.index = index
14
        self.tf_dev_type = tf_dev_type
15
        self.name = name
16
        
17
        self.total_mem = total_mem
18
        self.total_mem_gb = total_mem / 1024**3
19
        self.free_mem = free_mem
20
        self.free_mem_gb = free_mem / 1024**3
21

22
    def __str__(self):
23
        return f"[{self.index}]:[{self.name}][{self.free_mem_gb:.3}/{self.total_mem_gb :.3}]"
24

25
class Devices(object):
26
    all_devices = None
27

28
    def __init__(self, devices):
29
        self.devices = devices
30

31
    def __len__(self):
32
        return len(self.devices)
33

34
    def __getitem__(self, key):
35
        result = self.devices[key]
36
        if isinstance(key, slice):
37
            return Devices(result)
38
        return result
39

40
    def __iter__(self):
41
        for device in self.devices:
42
            yield device
43

44
    def get_best_device(self):
45
        result = None
46
        idx_mem = 0
47
        for device in self.devices:
48
            mem = device.total_mem
49
            if mem > idx_mem:
50
                result = device
51
                idx_mem = mem
52
        return result
53

54
    def get_worst_device(self):
55
        result = None
56
        idx_mem = sys.maxsize
57
        for device in self.devices:
58
            mem = device.total_mem
59
            if mem < idx_mem:
60
                result = device
61
                idx_mem = mem
62
        return result
63

64
    def get_device_by_index(self, idx):
65
        for device in self.devices:
66
            if device.index == idx:
67
                return device
68
        return None
69

70
    def get_devices_from_index_list(self, idx_list):
71
        result = []
72
        for device in self.devices:
73
            if device.index in idx_list:
74
                result += [device]
75
        return Devices(result)
76

77
    def get_equal_devices(self, device):
78
        device_name = device.name
79
        result = []
80
        for device in self.devices:
81
            if device.name == device_name:
82
                result.append (device)
83
        return Devices(result)
84

85
    def get_devices_at_least_mem(self, totalmemsize_gb):
86
        result = []
87
        for device in self.devices:
88
            if device.total_mem >= totalmemsize_gb*(1024**3):
89
                result.append (device)
90
        return Devices(result)
91

92
    @staticmethod
93
    def _get_tf_devices_proc(q : multiprocessing.Queue):
94
        
95
        if sys.platform[0:3] == 'win':
96
            compute_cache_path = Path(os.environ['APPDATA']) / 'NVIDIA' / ('ComputeCache_ALL')
97
            os.environ['CUDA_CACHE_PATH'] = str(compute_cache_path)
98
            if not compute_cache_path.exists():
99
                io.log_info("Caching GPU kernels...")
100
                compute_cache_path.mkdir(parents=True, exist_ok=True)
101
                
102
        import tensorflow
103
        
104
        tf_version = tensorflow.version.VERSION
105
        #if tf_version is None:
106
        #    tf_version = tensorflow.version.GIT_VERSION
107
        if tf_version[0] == 'v':
108
            tf_version = tf_version[1:]
109
        if tf_version[0] == '2':
110
            tf = tensorflow.compat.v1
111
        else:
112
            tf = tensorflow
113
        
114
        import logging
115
        # Disable tensorflow warnings
116
        tf_logger = logging.getLogger('tensorflow')
117
        tf_logger.setLevel(logging.ERROR)
118

119
        from tensorflow.python.client import device_lib
120

121
        devices = []
122
        
123
        physical_devices = device_lib.list_local_devices()
124
        physical_devices_f = {}
125
        for dev in physical_devices:
126
            dev_type = dev.device_type
127
            dev_tf_name = dev.name
128
            dev_tf_name = dev_tf_name[ dev_tf_name.index(dev_type) : ]
129
            
130
            dev_idx = int(dev_tf_name.split(':')[-1])
131
            
132
            if dev_type in ['GPU','DML']:
133
                dev_name = dev_tf_name
134
                
135
                dev_desc = dev.physical_device_desc
136
                if len(dev_desc) != 0:
137
                    if dev_desc[0] == '{':
138
                        dev_desc_json = json.loads(dev_desc)
139
                        dev_desc_json_name = dev_desc_json.get('name',None)
140
                        if dev_desc_json_name is not None:
141
                            dev_name = dev_desc_json_name
142
                    else:
143
                        for param, value in ( v.split(':') for v in dev_desc.split(',') ):
144
                            param = param.strip()
145
                            value = value.strip()
146
                            if param == 'name':
147
                                dev_name = value
148
                                break
149
                
150
                physical_devices_f[dev_idx] = (dev_type, dev_name, dev.memory_limit)
151
                        
152
        q.put(physical_devices_f)
153
        time.sleep(0.1)
154
        
155
        
156
    @staticmethod
157
    def initialize_main_env():
158
        if int(os.environ.get("NN_DEVICES_INITIALIZED", 0)) != 0:
159
            return
160
            
161
        if 'CUDA_VISIBLE_DEVICES' in os.environ.keys():
162
            os.environ.pop('CUDA_VISIBLE_DEVICES')
163
        
164
        os.environ['TF_DIRECTML_KERNEL_CACHE_SIZE'] = '2500'
165
        os.environ['CUDA_CACHE_MAXSIZE'] = '2147483647'
166
        os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '2'
167
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # tf log errors only
168
        
169
        q = multiprocessing.Queue()
170
        p = multiprocessing.Process(target=Devices._get_tf_devices_proc, args=(q,), daemon=True)
171
        p.start()
172
        p.join()
173
        
174
        visible_devices = q.get()
175

176
        os.environ['NN_DEVICES_INITIALIZED'] = '1'
177
        os.environ['NN_DEVICES_COUNT'] = str(len(visible_devices))
178
        
179
        for i in visible_devices:
180
            dev_type, name, total_mem = visible_devices[i]
181

182
            os.environ[f'NN_DEVICE_{i}_TF_DEV_TYPE'] = dev_type
183
            os.environ[f'NN_DEVICE_{i}_NAME'] = name
184
            os.environ[f'NN_DEVICE_{i}_TOTAL_MEM'] = str(total_mem)
185
            os.environ[f'NN_DEVICE_{i}_FREE_MEM'] = str(total_mem)
186
            
187
        
188

189
    @staticmethod
190
    def getDevices():
191
        if Devices.all_devices is None:
192
            if int(os.environ.get("NN_DEVICES_INITIALIZED", 0)) != 1:
193
                raise Exception("nn devices are not initialized. Run initialize_main_env() in main process.")
194
            devices = []
195
            for i in range ( int(os.environ['NN_DEVICES_COUNT']) ):
196
                devices.append ( Device(index=i,
197
                                        tf_dev_type=os.environ[f'NN_DEVICE_{i}_TF_DEV_TYPE'],
198
                                        name=os.environ[f'NN_DEVICE_{i}_NAME'],
199
                                        total_mem=int(os.environ[f'NN_DEVICE_{i}_TOTAL_MEM']),
200
                                        free_mem=int(os.environ[f'NN_DEVICE_{i}_FREE_MEM']), )
201
                                )
202
            Devices.all_devices = Devices(devices)
203

204
        return Devices.all_devices
205

206
"""
207

208
        
209
        # {'name'      : name.split(b'\0', 1)[0].decode(),
210
        #     'total_mem' : totalMem.value
211
        # }
212

213
        
214
        
215
        
216
        
217
        return
218

219
        
220
        
221
        
222
        min_cc = int(os.environ.get("TF_MIN_REQ_CAP", 35))
223
        libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll')
224
        for libname in libnames:
225
            try:
226
                cuda = ctypes.CDLL(libname)
227
            except:
228
                continue
229
            else:
230
                break
231
        else:
232
            return Devices([])
233

234
        nGpus = ctypes.c_int()
235
        name = b' ' * 200
236
        cc_major = ctypes.c_int()
237
        cc_minor = ctypes.c_int()
238
        freeMem = ctypes.c_size_t()
239
        totalMem = ctypes.c_size_t()
240

241
        result = ctypes.c_int()
242
        device = ctypes.c_int()
243
        context = ctypes.c_void_p()
244
        error_str = ctypes.c_char_p()
245

246
        devices = []
247

248
        if cuda.cuInit(0) == 0 and \
249
            cuda.cuDeviceGetCount(ctypes.byref(nGpus)) == 0:
250
            for i in range(nGpus.value):
251
                if cuda.cuDeviceGet(ctypes.byref(device), i) != 0 or \
252
                    cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name), device) != 0 or \
253
                    cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device) != 0:
254
                    continue
255

256
                if cuda.cuCtxCreate_v2(ctypes.byref(context), 0, device) == 0:
257
                    if cuda.cuMemGetInfo_v2(ctypes.byref(freeMem), ctypes.byref(totalMem)) == 0:
258
                        cc = cc_major.value * 10 + cc_minor.value
259
                        if cc >= min_cc:
260
                            devices.append ( {'name'      : name.split(b'\0', 1)[0].decode(),
261
                                              'total_mem' : totalMem.value,
262
                                              'free_mem'  : freeMem.value,
263
                                              'cc'        : cc
264
                                              })
265
                    cuda.cuCtxDetach(context)
266

267
        os.environ['NN_DEVICES_COUNT'] = str(len(devices))
268
        for i, device in enumerate(devices):
269
            os.environ[f'NN_DEVICE_{i}_NAME'] = device['name']
270
            os.environ[f'NN_DEVICE_{i}_TOTAL_MEM'] = str(device['total_mem'])
271
            os.environ[f'NN_DEVICE_{i}_FREE_MEM'] = str(device['free_mem'])
272
            os.environ[f'NN_DEVICE_{i}_CC'] = str(device['cc'])
273
"""
274
Product

Resources

Company