Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
52867 views
1
/*****************************************************************************
2
* opencl.c: OpenCL initialization and kernel compilation
3
*****************************************************************************
4
* Copyright (C) 2012-2016 x264 project
5
*
6
* Authors: Steve Borho <[email protected]>
7
* Anton Mitrofanov <[email protected]>
8
*
9
* This program is free software; you can redistribute it and/or modify
10
* it under the terms of the GNU General Public License as published by
11
* the Free Software Foundation; either version 2 of the License, or
12
* (at your option) any later version.
13
*
14
* This program is distributed in the hope that it will be useful,
15
* but WITHOUT ANY WARRANTY; without even the implied warranty of
16
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
* GNU General Public License for more details.
18
*
19
* You should have received a copy of the GNU General Public License
20
* along with this program; if not, write to the Free Software
21
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
22
*
23
* This program is also available under a commercial proprietary license.
24
* For more information, contact us at [email protected].
25
*****************************************************************************/
26
27
#include "common.h"
28
29
#ifdef _WIN32
30
#include <windows.h>
31
#define ocl_open LoadLibraryW( L"OpenCL" )
32
#define ocl_close FreeLibrary
33
#define ocl_address GetProcAddress
34
#else
35
#include <dlfcn.h> //dlopen, dlsym, dlclose
36
#if SYS_MACOSX
37
#define ocl_open dlopen( "/System/Library/Frameworks/OpenCL.framework/OpenCL", RTLD_NOW )
38
#else
39
#define ocl_open dlopen( "libOpenCL.so", RTLD_NOW )
40
#endif
41
#define ocl_close dlclose
42
#define ocl_address dlsym
43
#endif
44
45
#define LOAD_OCL_FUNC(name, continue_on_fail)\
46
{\
47
ocl->name = (void*)ocl_address( ocl->library, #name );\
48
if( !continue_on_fail && !ocl->name )\
49
goto fail;\
50
}
51
52
/* load the library and functions we require from it */
53
x264_opencl_function_t *x264_opencl_load_library( void )
54
{
55
x264_opencl_function_t *ocl;
56
#undef fail
57
#define fail fail0
58
CHECKED_MALLOCZERO( ocl, sizeof(x264_opencl_function_t) );
59
#undef fail
60
#define fail fail1
61
ocl->library = ocl_open;
62
if( !ocl->library )
63
goto fail;
64
#undef fail
65
#define fail fail2
66
LOAD_OCL_FUNC( clBuildProgram, 0 );
67
LOAD_OCL_FUNC( clCreateBuffer, 0 );
68
LOAD_OCL_FUNC( clCreateCommandQueue, 0 );
69
LOAD_OCL_FUNC( clCreateContext, 0 );
70
LOAD_OCL_FUNC( clCreateImage2D, 0 );
71
LOAD_OCL_FUNC( clCreateKernel, 0 );
72
LOAD_OCL_FUNC( clCreateProgramWithBinary, 0 );
73
LOAD_OCL_FUNC( clCreateProgramWithSource, 0 );
74
LOAD_OCL_FUNC( clEnqueueCopyBuffer, 0 );
75
LOAD_OCL_FUNC( clEnqueueMapBuffer, 0 );
76
LOAD_OCL_FUNC( clEnqueueNDRangeKernel, 0 );
77
LOAD_OCL_FUNC( clEnqueueReadBuffer, 0 );
78
LOAD_OCL_FUNC( clEnqueueWriteBuffer, 0 );
79
LOAD_OCL_FUNC( clFinish, 0 );
80
LOAD_OCL_FUNC( clGetCommandQueueInfo, 0 );
81
LOAD_OCL_FUNC( clGetDeviceIDs, 0 );
82
LOAD_OCL_FUNC( clGetDeviceInfo, 0 );
83
LOAD_OCL_FUNC( clGetKernelWorkGroupInfo, 0 );
84
LOAD_OCL_FUNC( clGetPlatformIDs, 0 );
85
LOAD_OCL_FUNC( clGetProgramBuildInfo, 0 );
86
LOAD_OCL_FUNC( clGetProgramInfo, 0 );
87
LOAD_OCL_FUNC( clGetSupportedImageFormats, 0 );
88
LOAD_OCL_FUNC( clReleaseCommandQueue, 0 );
89
LOAD_OCL_FUNC( clReleaseContext, 0 );
90
LOAD_OCL_FUNC( clReleaseKernel, 0 );
91
LOAD_OCL_FUNC( clReleaseMemObject, 0 );
92
LOAD_OCL_FUNC( clReleaseProgram, 0 );
93
LOAD_OCL_FUNC( clSetKernelArg, 0 );
94
return ocl;
95
#undef fail
96
fail2:
97
ocl_close( ocl->library );
98
fail1:
99
x264_free( ocl );
100
fail0:
101
return NULL;
102
}
103
104
void x264_opencl_close_library( x264_opencl_function_t *ocl )
105
{
106
if( !ocl )
107
return;
108
ocl_close( ocl->library );
109
x264_free( ocl );
110
}
111
112
/* define from recent cl_ext.h, copied here in case headers are old */
113
#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042
114
115
/* Requires full include path in case of out-of-tree builds */
116
#include "common/oclobj.h"
117
118
static int x264_detect_switchable_graphics( void );
119
120
/* Try to load the cached compiled program binary, verify the device context is
121
* still valid before reuse */
122
static cl_program x264_opencl_cache_load( x264_t *h, const char *dev_name, const char *dev_vendor, const char *driver_version )
123
{
124
/* try to load cached program binary */
125
FILE *fp = x264_fopen( h->param.psz_clbin_file, "rb" );
126
if( !fp )
127
return NULL;
128
129
x264_opencl_function_t *ocl = h->opencl.ocl;
130
cl_program program = NULL;
131
uint8_t *binary = NULL;
132
133
fseek( fp, 0, SEEK_END );
134
size_t size = ftell( fp );
135
rewind( fp );
136
CHECKED_MALLOC( binary, size );
137
138
if ( fread( binary, 1, size, fp ) != size )
139
goto fail;
140
const uint8_t *ptr = (const uint8_t*)binary;
141
142
#define CHECK_STRING( STR )\
143
do {\
144
size_t len = strlen( STR );\
145
if( size <= len || strncmp( (char*)ptr, STR, len ) )\
146
goto fail;\
147
else {\
148
size -= (len+1); ptr += (len+1);\
149
}\
150
} while( 0 )
151
152
CHECK_STRING( dev_name );
153
CHECK_STRING( dev_vendor );
154
CHECK_STRING( driver_version );
155
CHECK_STRING( x264_opencl_source_hash );
156
#undef CHECK_STRING
157
158
cl_int status;
159
program = ocl->clCreateProgramWithBinary( h->opencl.context, 1, &h->opencl.device, &size, &ptr, NULL, &status );
160
if( status != CL_SUCCESS )
161
program = NULL;
162
163
fail:
164
fclose( fp );
165
x264_free( binary );
166
return program;
167
}
168
169
/* Save the compiled program binary to a file for later reuse. Device context
170
* is also saved in the cache file so we do not reuse stale binaries */
171
static void x264_opencl_cache_save( x264_t *h, cl_program program, const char *dev_name, const char *dev_vendor, const char *driver_version )
172
{
173
FILE *fp = x264_fopen( h->param.psz_clbin_file, "wb" );
174
if( !fp )
175
{
176
x264_log( h, X264_LOG_INFO, "OpenCL: unable to open clbin file for write\n" );
177
return;
178
}
179
180
x264_opencl_function_t *ocl = h->opencl.ocl;
181
uint8_t *binary = NULL;
182
183
size_t size = 0;
184
cl_int status = ocl->clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL );
185
if( status != CL_SUCCESS || !size )
186
{
187
x264_log( h, X264_LOG_INFO, "OpenCL: Unable to query program binary size, no cache file generated\n" );
188
goto fail;
189
}
190
191
CHECKED_MALLOC( binary, size );
192
status = ocl->clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof(uint8_t *), &binary, NULL );
193
if( status != CL_SUCCESS )
194
{
195
x264_log( h, X264_LOG_INFO, "OpenCL: Unable to query program binary, no cache file generated\n" );
196
goto fail;
197
}
198
199
fputs( dev_name, fp );
200
fputc( '\n', fp );
201
fputs( dev_vendor, fp );
202
fputc( '\n', fp );
203
fputs( driver_version, fp );
204
fputc( '\n', fp );
205
fputs( x264_opencl_source_hash, fp );
206
fputc( '\n', fp );
207
fwrite( binary, 1, size, fp );
208
209
fail:
210
fclose( fp );
211
x264_free( binary );
212
return;
213
}
214
215
/* The OpenCL source under common/opencl will be merged into common/oclobj.h by
216
* the Makefile. It defines a x264_opencl_source byte array which we will pass
217
* to clCreateProgramWithSource(). We also attempt to use a cache file for the
218
* compiled binary, stored in the current working folder. */
219
static cl_program x264_opencl_compile( x264_t *h )
220
{
221
x264_opencl_function_t *ocl = h->opencl.ocl;
222
cl_program program = NULL;
223
char *build_log = NULL;
224
225
char dev_name[64];
226
char dev_vendor[64];
227
char driver_version[64];
228
cl_int status;
229
status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_NAME, sizeof(dev_name), dev_name, NULL );
230
status |= ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_VENDOR, sizeof(dev_vendor), dev_vendor, NULL );
231
status |= ocl->clGetDeviceInfo( h->opencl.device, CL_DRIVER_VERSION, sizeof(driver_version), driver_version, NULL );
232
if( status != CL_SUCCESS )
233
return NULL;
234
235
// Most AMD GPUs have vector registers
236
int vectorize = !strcmp( dev_vendor, "Advanced Micro Devices, Inc." );
237
h->opencl.b_device_AMD_SI = 0;
238
239
if( vectorize )
240
{
241
/* Disable OpenCL on Intel/AMD switchable graphics devices */
242
if( x264_detect_switchable_graphics() )
243
{
244
x264_log( h, X264_LOG_INFO, "OpenCL acceleration disabled, switchable graphics detected\n" );
245
return NULL;
246
}
247
248
/* Detect AMD SouthernIsland or newer device (single-width registers) */
249
cl_uint simdwidth = 4;
250
status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, sizeof(cl_uint), &simdwidth, NULL );
251
if( status == CL_SUCCESS && simdwidth == 1 )
252
{
253
vectorize = 0;
254
h->opencl.b_device_AMD_SI = 1;
255
}
256
}
257
258
x264_log( h, X264_LOG_INFO, "OpenCL acceleration enabled with %s %s %s\n", dev_vendor, dev_name, h->opencl.b_device_AMD_SI ? "(SI)" : "" );
259
260
program = x264_opencl_cache_load( h, dev_name, dev_vendor, driver_version );
261
if( !program )
262
{
263
/* clCreateProgramWithSource() requires a pointer variable, you cannot just use &x264_opencl_source */
264
x264_log( h, X264_LOG_INFO, "Compiling OpenCL kernels...\n" );
265
const char *strptr = (const char*)x264_opencl_source;
266
size_t size = sizeof(x264_opencl_source);
267
program = ocl->clCreateProgramWithSource( h->opencl.context, 1, &strptr, &size, &status );
268
if( status != CL_SUCCESS || !program )
269
{
270
x264_log( h, X264_LOG_WARNING, "OpenCL: unable to create program\n" );
271
return NULL;
272
}
273
}
274
275
/* Build the program binary for the OpenCL device */
276
const char *buildopts = vectorize ? "-DVECTORIZE=1" : "";
277
status = ocl->clBuildProgram( program, 1, &h->opencl.device, buildopts, NULL, NULL );
278
if( status == CL_SUCCESS )
279
{
280
x264_opencl_cache_save( h, program, dev_name, dev_vendor, driver_version );
281
return program;
282
}
283
284
/* Compile failure, should not happen with production code. */
285
286
size_t build_log_len = 0;
287
status = ocl->clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_len );
288
if( status != CL_SUCCESS || !build_log_len )
289
{
290
x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to query build log\n" );
291
goto fail;
292
}
293
294
build_log = x264_malloc( build_log_len );
295
if( !build_log )
296
{
297
x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to alloc build log\n" );
298
goto fail;
299
}
300
301
status = ocl->clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, build_log_len, build_log, NULL );
302
if( status != CL_SUCCESS )
303
{
304
x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to get build log\n" );
305
goto fail;
306
}
307
308
FILE *log_file = x264_fopen( "x264_kernel_build_log.txt", "w" );
309
if( !log_file )
310
{
311
x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to create file x264_kernel_build_log.txt\n" );
312
goto fail;
313
}
314
fwrite( build_log, 1, build_log_len, log_file );
315
fclose( log_file );
316
x264_log( h, X264_LOG_WARNING, "OpenCL: kernel build errors written to x264_kernel_build_log.txt\n" );
317
318
fail:
319
x264_free( build_log );
320
if( program )
321
ocl->clReleaseProgram( program );
322
return NULL;
323
}
324
325
static int x264_opencl_lookahead_alloc( x264_t *h )
326
{
327
if( !h->param.rc.i_lookahead )
328
return -1;
329
330
static const char *kernelnames[] = {
331
"mb_intra_cost_satd_8x8",
332
"sum_intra_cost",
333
"downscale_hpel",
334
"downscale1",
335
"downscale2",
336
"memset_int16",
337
"weightp_scaled_images",
338
"weightp_hpel",
339
"hierarchical_motion",
340
"subpel_refine",
341
"mode_selection",
342
"sum_inter_cost"
343
};
344
345
cl_kernel *kernels[] = {
346
&h->opencl.intra_kernel,
347
&h->opencl.rowsum_intra_kernel,
348
&h->opencl.downscale_hpel_kernel,
349
&h->opencl.downscale_kernel1,
350
&h->opencl.downscale_kernel2,
351
&h->opencl.memset_kernel,
352
&h->opencl.weightp_scaled_images_kernel,
353
&h->opencl.weightp_hpel_kernel,
354
&h->opencl.hme_kernel,
355
&h->opencl.subpel_refine_kernel,
356
&h->opencl.mode_select_kernel,
357
&h->opencl.rowsum_inter_kernel
358
};
359
360
x264_opencl_function_t *ocl = h->opencl.ocl;
361
cl_int status;
362
363
h->opencl.lookahead_program = x264_opencl_compile( h );
364
if( !h->opencl.lookahead_program )
365
goto fail;
366
367
for( int i = 0; i < ARRAY_SIZE(kernelnames); i++ )
368
{
369
*kernels[i] = ocl->clCreateKernel( h->opencl.lookahead_program, kernelnames[i], &status );
370
if( status != CL_SUCCESS )
371
{
372
x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to compile kernel '%s' (%d)\n", kernelnames[i], status );
373
goto fail;
374
}
375
}
376
377
h->opencl.page_locked_buffer = ocl->clCreateBuffer( h->opencl.context, CL_MEM_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR, PAGE_LOCKED_BUF_SIZE, NULL, &status );
378
if( status != CL_SUCCESS )
379
{
380
x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to allocate page-locked buffer, error '%d'\n", status );
381
goto fail;
382
}
383
h->opencl.page_locked_ptr = ocl->clEnqueueMapBuffer( h->opencl.queue, h->opencl.page_locked_buffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
384
0, PAGE_LOCKED_BUF_SIZE, 0, NULL, NULL, &status );
385
if( status != CL_SUCCESS )
386
{
387
x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to map page-locked buffer, error '%d'\n", status );
388
goto fail;
389
}
390
391
return 0;
392
fail:
393
x264_opencl_lookahead_delete( h );
394
return -1;
395
}
396
397
static void CL_CALLBACK x264_opencl_error_notify( const char *errinfo, const void *private_info, size_t cb, void *user_data )
398
{
399
/* Any error notification can be assumed to be fatal to the OpenCL context.
400
* We need to stop using it immediately to prevent further damage. */
401
x264_t *h = (x264_t*)user_data;
402
h->param.b_opencl = 0;
403
h->opencl.b_fatal_error = 1;
404
x264_log( h, X264_LOG_ERROR, "OpenCL: %s\n", errinfo );
405
x264_log( h, X264_LOG_ERROR, "OpenCL: fatal error, aborting encode\n" );
406
}
407
408
int x264_opencl_lookahead_init( x264_t *h )
409
{
410
x264_opencl_function_t *ocl = h->opencl.ocl;
411
cl_platform_id *platforms = NULL;
412
cl_device_id *devices = NULL;
413
cl_image_format *imageType = NULL;
414
cl_context context = NULL;
415
int ret = -1;
416
417
cl_uint numPlatforms = 0;
418
cl_int status = ocl->clGetPlatformIDs( 0, NULL, &numPlatforms );
419
if( status != CL_SUCCESS || !numPlatforms )
420
{
421
x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to query installed platforms\n" );
422
goto fail;
423
}
424
platforms = (cl_platform_id*)x264_malloc( sizeof(cl_platform_id) * numPlatforms );
425
if( !platforms )
426
{
427
x264_log( h, X264_LOG_WARNING, "OpenCL: malloc of installed platforms buffer failed\n" );
428
goto fail;
429
}
430
status = ocl->clGetPlatformIDs( numPlatforms, platforms, NULL );
431
if( status != CL_SUCCESS )
432
{
433
x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to query installed platforms\n" );
434
goto fail;
435
}
436
437
/* Select the first OpenCL platform with a GPU device that supports our
438
* required image (texture) formats */
439
for( cl_uint i = 0; i < numPlatforms; i++ )
440
{
441
cl_uint gpu_count = 0;
442
status = ocl->clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &gpu_count );
443
if( status != CL_SUCCESS || !gpu_count )
444
continue;
445
446
x264_free( devices );
447
devices = x264_malloc( sizeof(cl_device_id) * gpu_count );
448
if( !devices )
449
continue;
450
451
status = ocl->clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, gpu_count, devices, NULL );
452
if( status != CL_SUCCESS )
453
continue;
454
455
/* Find a GPU device that supports our image formats */
456
for( cl_uint gpu = 0; gpu < gpu_count; gpu++ )
457
{
458
h->opencl.device = devices[gpu];
459
460
/* if the user has specified an exact device ID, skip all other
461
* GPUs. If this device matches, allow it to continue through the
462
* checks for supported images, etc. */
463
if( h->param.opencl_device_id && devices[gpu] != (cl_device_id)h->param.opencl_device_id )
464
continue;
465
466
cl_bool image_support = 0;
467
status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), &image_support, NULL );
468
if( status != CL_SUCCESS || !image_support )
469
continue;
470
471
if( context )
472
ocl->clReleaseContext( context );
473
context = ocl->clCreateContext( NULL, 1, &h->opencl.device, (void*)x264_opencl_error_notify, (void*)h, &status );
474
if( status != CL_SUCCESS || !context )
475
continue;
476
477
cl_uint imagecount = 0;
478
status = ocl->clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &imagecount );
479
if( status != CL_SUCCESS || !imagecount )
480
continue;
481
482
x264_free( imageType );
483
imageType = x264_malloc( sizeof(cl_image_format) * imagecount );
484
if( !imageType )
485
continue;
486
487
status = ocl->clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, imagecount, imageType, NULL );
488
if( status != CL_SUCCESS )
489
continue;
490
491
int b_has_r = 0;
492
int b_has_rgba = 0;
493
for( cl_uint j = 0; j < imagecount; j++ )
494
{
495
if( imageType[j].image_channel_order == CL_R &&
496
imageType[j].image_channel_data_type == CL_UNSIGNED_INT32 )
497
b_has_r = 1;
498
else if( imageType[j].image_channel_order == CL_RGBA &&
499
imageType[j].image_channel_data_type == CL_UNSIGNED_INT8 )
500
b_has_rgba = 1;
501
}
502
if( !b_has_r || !b_has_rgba )
503
{
504
char dev_name[64];
505
status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_NAME, sizeof(dev_name), dev_name, NULL );
506
if( status == CL_SUCCESS )
507
{
508
/* emit warning if we are discarding the user's explicit choice */
509
int level = h->param.opencl_device_id ? X264_LOG_WARNING : X264_LOG_DEBUG;
510
x264_log( h, level, "OpenCL: %s does not support required image formats\n", dev_name );
511
}
512
continue;
513
}
514
515
/* user selection of GPU device, skip N first matches */
516
if( h->param.i_opencl_device )
517
{
518
h->param.i_opencl_device--;
519
continue;
520
}
521
522
h->opencl.queue = ocl->clCreateCommandQueue( context, h->opencl.device, 0, &status );
523
if( status != CL_SUCCESS || !h->opencl.queue )
524
continue;
525
526
h->opencl.context = context;
527
context = NULL;
528
529
ret = 0;
530
break;
531
}
532
533
if( !ret )
534
break;
535
}
536
537
if( !h->param.psz_clbin_file )
538
h->param.psz_clbin_file = "x264_lookahead.clbin";
539
540
if( ret )
541
x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to find a compatible device\n" );
542
else
543
ret = x264_opencl_lookahead_alloc( h );
544
545
fail:
546
if( context )
547
ocl->clReleaseContext( context );
548
x264_free( imageType );
549
x264_free( devices );
550
x264_free( platforms );
551
return ret;
552
}
553
554
static void x264_opencl_lookahead_free( x264_t *h )
555
{
556
x264_opencl_function_t *ocl = h->opencl.ocl;
557
558
#define RELEASE( a, f ) do { if( a ) { ocl->f( a ); a = NULL; } } while( 0 )
559
RELEASE( h->opencl.downscale_hpel_kernel, clReleaseKernel );
560
RELEASE( h->opencl.downscale_kernel1, clReleaseKernel );
561
RELEASE( h->opencl.downscale_kernel2, clReleaseKernel );
562
RELEASE( h->opencl.weightp_hpel_kernel, clReleaseKernel );
563
RELEASE( h->opencl.weightp_scaled_images_kernel, clReleaseKernel );
564
RELEASE( h->opencl.memset_kernel, clReleaseKernel );
565
RELEASE( h->opencl.intra_kernel, clReleaseKernel );
566
RELEASE( h->opencl.rowsum_intra_kernel, clReleaseKernel );
567
RELEASE( h->opencl.hme_kernel, clReleaseKernel );
568
RELEASE( h->opencl.subpel_refine_kernel, clReleaseKernel );
569
RELEASE( h->opencl.mode_select_kernel, clReleaseKernel );
570
RELEASE( h->opencl.rowsum_inter_kernel, clReleaseKernel );
571
572
RELEASE( h->opencl.lookahead_program, clReleaseProgram );
573
574
RELEASE( h->opencl.page_locked_buffer, clReleaseMemObject );
575
RELEASE( h->opencl.luma_16x16_image[0], clReleaseMemObject );
576
RELEASE( h->opencl.luma_16x16_image[1], clReleaseMemObject );
577
for( int i = 0; i < NUM_IMAGE_SCALES; i++ )
578
RELEASE( h->opencl.weighted_scaled_images[i], clReleaseMemObject );
579
RELEASE( h->opencl.weighted_luma_hpel, clReleaseMemObject );
580
RELEASE( h->opencl.row_satds[0], clReleaseMemObject );
581
RELEASE( h->opencl.row_satds[1], clReleaseMemObject );
582
RELEASE( h->opencl.mv_buffers[0], clReleaseMemObject );
583
RELEASE( h->opencl.mv_buffers[1], clReleaseMemObject );
584
RELEASE( h->opencl.lowres_mv_costs, clReleaseMemObject );
585
RELEASE( h->opencl.mvp_buffer, clReleaseMemObject );
586
RELEASE( h->opencl.lowres_costs[0], clReleaseMemObject );
587
RELEASE( h->opencl.lowres_costs[1], clReleaseMemObject );
588
RELEASE( h->opencl.frame_stats[0], clReleaseMemObject );
589
RELEASE( h->opencl.frame_stats[1], clReleaseMemObject );
590
#undef RELEASE
591
}
592
593
void x264_opencl_lookahead_delete( x264_t *h )
594
{
595
x264_opencl_function_t *ocl = h->opencl.ocl;
596
597
if( !ocl )
598
return;
599
600
if( h->opencl.queue )
601
ocl->clFinish( h->opencl.queue );
602
603
x264_opencl_lookahead_free( h );
604
605
if( h->opencl.queue )
606
{
607
ocl->clReleaseCommandQueue( h->opencl.queue );
608
h->opencl.queue = NULL;
609
}
610
if( h->opencl.context )
611
{
612
ocl->clReleaseContext( h->opencl.context );
613
h->opencl.context = NULL;
614
}
615
}
616
617
void x264_opencl_frame_delete( x264_frame_t *frame )
618
{
619
x264_opencl_function_t *ocl = frame->opencl.ocl;
620
621
if( !ocl )
622
return;
623
624
#define RELEASEBUF(mem) do { if( mem ) { ocl->clReleaseMemObject( mem ); mem = NULL; } } while( 0 )
625
for( int j = 0; j < NUM_IMAGE_SCALES; j++ )
626
RELEASEBUF( frame->opencl.scaled_image2Ds[j] );
627
RELEASEBUF( frame->opencl.luma_hpel );
628
RELEASEBUF( frame->opencl.inv_qscale_factor );
629
RELEASEBUF( frame->opencl.intra_cost );
630
RELEASEBUF( frame->opencl.lowres_mvs0 );
631
RELEASEBUF( frame->opencl.lowres_mvs1 );
632
RELEASEBUF( frame->opencl.lowres_mv_costs0 );
633
RELEASEBUF( frame->opencl.lowres_mv_costs1 );
634
#undef RELEASEBUF
635
}
636
637
/* OpenCL misbehaves on hybrid laptops with Intel iGPU and AMD dGPU, so
638
* we consult AMD's ADL interface to detect this situation and disable
639
* OpenCL on these machines (Linux and Windows) */
640
#ifdef _WIN32
641
#define ADL_API_CALL
642
#define ADL_CALLBACK __stdcall
643
#define adl_close FreeLibrary
644
#define adl_address GetProcAddress
645
#else
646
#define ADL_API_CALL
647
#define ADL_CALLBACK
648
#define adl_close dlclose
649
#define adl_address dlsym
650
#endif
651
652
typedef void* ( ADL_CALLBACK *ADL_MAIN_MALLOC_CALLBACK )( int );
653
typedef int ( ADL_API_CALL *ADL_MAIN_CONTROL_CREATE )( ADL_MAIN_MALLOC_CALLBACK, int );
654
typedef int ( ADL_API_CALL *ADL_ADAPTER_NUMBEROFADAPTERS_GET )( int * );
655
typedef int ( ADL_API_CALL *ADL_POWERXPRESS_SCHEME_GET )( int, int *, int *, int * );
656
typedef int ( ADL_API_CALL *ADL_MAIN_CONTROL_DESTROY )( void );
657
658
#define ADL_OK 0
659
#define ADL_PX_SCHEME_DYNAMIC 2
660
661
static void* ADL_CALLBACK adl_malloc_wrapper( int iSize )
662
{
663
return x264_malloc( iSize );
664
}
665
666
static int x264_detect_switchable_graphics( void )
667
{
668
void *hDLL;
669
ADL_MAIN_CONTROL_CREATE ADL_Main_Control_Create;
670
ADL_ADAPTER_NUMBEROFADAPTERS_GET ADL_Adapter_NumberOfAdapters_Get;
671
ADL_POWERXPRESS_SCHEME_GET ADL_PowerXpress_Scheme_Get;
672
ADL_MAIN_CONTROL_DESTROY ADL_Main_Control_Destroy;
673
int ret = 0;
674
675
#ifdef _WIN32
676
hDLL = LoadLibraryW( L"atiadlxx.dll" );
677
if( !hDLL )
678
hDLL = LoadLibraryW( L"atiadlxy.dll" );
679
#else
680
hDLL = dlopen( "libatiadlxx.so", RTLD_LAZY|RTLD_GLOBAL );
681
#endif
682
if( !hDLL )
683
goto fail0;
684
685
ADL_Main_Control_Create = (ADL_MAIN_CONTROL_CREATE)adl_address(hDLL, "ADL_Main_Control_Create");
686
ADL_Main_Control_Destroy = (ADL_MAIN_CONTROL_DESTROY)adl_address(hDLL, "ADL_Main_Control_Destroy");
687
ADL_Adapter_NumberOfAdapters_Get = (ADL_ADAPTER_NUMBEROFADAPTERS_GET)adl_address(hDLL, "ADL_Adapter_NumberOfAdapters_Get");
688
ADL_PowerXpress_Scheme_Get = (ADL_POWERXPRESS_SCHEME_GET)adl_address(hDLL, "ADL_PowerXpress_Scheme_Get");
689
if( !ADL_Main_Control_Create || !ADL_Main_Control_Destroy || !ADL_Adapter_NumberOfAdapters_Get ||
690
!ADL_PowerXpress_Scheme_Get )
691
goto fail1;
692
693
if( ADL_OK != ADL_Main_Control_Create( adl_malloc_wrapper, 1 ) )
694
goto fail1;
695
696
int numAdapters = 0;
697
if( ADL_OK != ADL_Adapter_NumberOfAdapters_Get( &numAdapters ) )
698
goto fail2;
699
700
for( int i = 0; i < numAdapters; i++ )
701
{
702
int PXSchemeRange, PXSchemeCurrentState, PXSchemeDefaultState;
703
if( ADL_OK != ADL_PowerXpress_Scheme_Get( i, &PXSchemeRange, &PXSchemeCurrentState, &PXSchemeDefaultState) )
704
break;
705
706
if( PXSchemeRange >= ADL_PX_SCHEME_DYNAMIC )
707
{
708
ret = 1;
709
break;
710
}
711
}
712
713
fail2:
714
ADL_Main_Control_Destroy();
715
fail1:
716
adl_close( hDLL );
717
fail0:
718
return ret;
719
}
720
721