opencl.c 25.1 KB
Newer Older
Steve Borho's avatar
Steve Borho committed
1 2 3
/*****************************************************************************
 * opencl.c: OpenCL initialization and kernel compilation
 *****************************************************************************
Henrik Gramner's avatar
Henrik Gramner committed
4
 * Copyright (C) 2012-2014 x264 project
Steve Borho's avatar
Steve Borho committed
5 6
 *
 * Authors: Steve Borho <sborho@multicorewareinc.com>
7
 *          Anton Mitrofanov <BugMaster@narod.ru>
Steve Borho's avatar
Steve Borho committed
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
 *
 * This program is also available under a commercial proprietary license.
 * For more information, contact us at licensing@x264.com.
 *****************************************************************************/

#include "common.h"
28 29

#ifdef _WIN32
Steve Borho's avatar
Steve Borho committed
30
#include <windows.h>
Henrik Gramner's avatar
Henrik Gramner committed
31
#define ocl_open LoadLibraryW( L"OpenCL" )
32 33
#define ocl_close FreeLibrary
#define ocl_address GetProcAddress
Steve Borho's avatar
Steve Borho committed
34 35
#else
#include <dlfcn.h> //dlopen, dlsym, dlclose
36
#if SYS_MACOSX
37
#define ocl_open dlopen( "/System/Library/Frameworks/OpenCL.framework/OpenCL", RTLD_NOW )
38 39 40 41 42
#else
#define ocl_open dlopen( "libOpenCL.so", RTLD_NOW )
#endif
#define ocl_close dlclose
#define ocl_address dlsym
Steve Borho's avatar
Steve Borho committed
43 44
#endif

45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
#define LOAD_OCL_FUNC(name, continue_on_fail)\
{\
    ocl->name = (void*)ocl_address( ocl->library, #name );\
    if( !continue_on_fail && !ocl->name )\
        goto fail;\
}

/* load the library and functions we require from it */
x264_opencl_function_t *x264_opencl_load_library( void )
{
    x264_opencl_function_t *ocl;
#undef fail
#define fail fail0
    CHECKED_MALLOCZERO( ocl, sizeof(x264_opencl_function_t) );
#undef fail
#define fail fail1
    ocl->library = ocl_open;
    if( !ocl->library )
        goto fail;
#undef fail
#define fail fail2
    LOAD_OCL_FUNC( clBuildProgram, 0 );
    LOAD_OCL_FUNC( clCreateBuffer, 0 );
    LOAD_OCL_FUNC( clCreateCommandQueue, 0 );
    LOAD_OCL_FUNC( clCreateContext, 0 );
    LOAD_OCL_FUNC( clCreateImage2D, 0 );
    LOAD_OCL_FUNC( clCreateKernel, 0 );
    LOAD_OCL_FUNC( clCreateProgramWithBinary, 0 );
    LOAD_OCL_FUNC( clCreateProgramWithSource, 0 );
    LOAD_OCL_FUNC( clEnqueueCopyBuffer, 0 );
    LOAD_OCL_FUNC( clEnqueueMapBuffer, 0 );
    LOAD_OCL_FUNC( clEnqueueNDRangeKernel, 0 );
    LOAD_OCL_FUNC( clEnqueueReadBuffer, 0 );
    LOAD_OCL_FUNC( clEnqueueWriteBuffer, 0 );
    LOAD_OCL_FUNC( clFinish, 0 );
    LOAD_OCL_FUNC( clGetCommandQueueInfo, 0 );
    LOAD_OCL_FUNC( clGetDeviceIDs, 0 );
    LOAD_OCL_FUNC( clGetDeviceInfo, 0 );
    LOAD_OCL_FUNC( clGetKernelWorkGroupInfo, 0 );
    LOAD_OCL_FUNC( clGetPlatformIDs, 0 );
    LOAD_OCL_FUNC( clGetProgramBuildInfo, 0 );
    LOAD_OCL_FUNC( clGetProgramInfo, 0 );
    LOAD_OCL_FUNC( clGetSupportedImageFormats, 0 );
    LOAD_OCL_FUNC( clReleaseCommandQueue, 0 );
    LOAD_OCL_FUNC( clReleaseContext, 0 );
    LOAD_OCL_FUNC( clReleaseKernel, 0 );
    LOAD_OCL_FUNC( clReleaseMemObject, 0 );
    LOAD_OCL_FUNC( clReleaseProgram, 0 );
    LOAD_OCL_FUNC( clSetKernelArg, 0 );
    return ocl;
#undef fail
fail2:
    ocl_close( ocl->library );
fail1:
    x264_free( ocl );
fail0:
    return NULL;
}

void x264_opencl_close_library( x264_opencl_function_t *ocl )
{
    if( !ocl )
        return;
    ocl_close( ocl->library );
    x264_free( ocl );
}

Steve Borho's avatar
Steve Borho committed
112 113 114 115 116 117
/* define from recent cl_ext.h, copied here in case headers are old */
#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD        0x4042

/* Requires full include path in case of out-of-tree builds */
#include "common/oclobj.h"

118
static int x264_detect_switchable_graphics( void );
Steve Borho's avatar
Steve Borho committed
119 120 121

/* Try to load the cached compiled program binary, verify the device context is
 * still valid before reuse */
122
static cl_program x264_opencl_cache_load( x264_t *h, const char *dev_name, const char *dev_vendor, const char *driver_version )
Steve Borho's avatar
Steve Borho committed
123 124
{
    /* try to load cached program binary */
Henrik Gramner's avatar
Henrik Gramner committed
125
    FILE *fp = x264_fopen( h->param.psz_clbin_file, "rb" );
Steve Borho's avatar
Steve Borho committed
126 127 128
    if( !fp )
        return NULL;

129 130
    x264_opencl_function_t *ocl = h->opencl.ocl;
    cl_program program = NULL;
Anton Mitrofanov's avatar
Anton Mitrofanov committed
131
    uint8_t *binary = NULL;
132 133

    fseek( fp, 0, SEEK_END );
Steve Borho's avatar
Steve Borho committed
134 135 136 137
    size_t size = ftell( fp );
    rewind( fp );
    CHECKED_MALLOC( binary, size );

138 139
    if ( fread( binary, 1, size, fp ) != size )
        goto fail;
Steve Borho's avatar
Steve Borho committed
140 141 142 143 144 145 146 147 148 149 150 151
    const uint8_t *ptr = (const uint8_t*)binary;

#define CHECK_STRING( STR )\
    do {\
        size_t len = strlen( STR );\
        if( size <= len || strncmp( (char*)ptr, STR, len ) )\
            goto fail;\
        else {\
            size -= (len+1); ptr += (len+1);\
        }\
    } while( 0 )

152 153 154
    CHECK_STRING( dev_name );
    CHECK_STRING( dev_vendor );
    CHECK_STRING( driver_version );
Steve Borho's avatar
Steve Borho committed
155 156 157
    CHECK_STRING( x264_opencl_source_hash );
#undef CHECK_STRING

Anton Mitrofanov's avatar
Anton Mitrofanov committed
158
    cl_int status;
159
    program = ocl->clCreateProgramWithBinary( h->opencl.context, 1, &h->opencl.device, &size, &ptr, NULL, &status );
Steve Borho's avatar
Steve Borho committed
160 161 162 163 164 165 166 167 168 169 170
    if( status != CL_SUCCESS )
        program = NULL;

fail:
    fclose( fp );
    x264_free( binary );
    return program;
}

/* Save the compiled program binary to a file for later reuse.  Device context
 * is also saved in the cache file so we do not reuse stale binaries */
171
static void x264_opencl_cache_save( x264_t *h, cl_program program, const char *dev_name, const char *dev_vendor, const char *driver_version )
Steve Borho's avatar
Steve Borho committed
172
{
Henrik Gramner's avatar
Henrik Gramner committed
173
    FILE *fp = x264_fopen( h->param.psz_clbin_file, "wb" );
Steve Borho's avatar
Steve Borho committed
174 175
    if( !fp )
    {
Anton Mitrofanov's avatar
Anton Mitrofanov committed
176
        x264_log( h, X264_LOG_INFO, "OpenCL: unable to open clbin file for write\n" );
Steve Borho's avatar
Steve Borho committed
177 178 179
        return;
    }

180
    x264_opencl_function_t *ocl = h->opencl.ocl;
Anton Mitrofanov's avatar
Anton Mitrofanov committed
181 182 183
    uint8_t *binary = NULL;

    size_t size = 0;
184
    cl_int status = ocl->clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL );
Anton Mitrofanov's avatar
Anton Mitrofanov committed
185
    if( status != CL_SUCCESS || !size )
Steve Borho's avatar
Steve Borho committed
186
    {
Anton Mitrofanov's avatar
Anton Mitrofanov committed
187 188
        x264_log( h, X264_LOG_INFO, "OpenCL: Unable to query program binary size, no cache file generated\n" );
        goto fail;
Steve Borho's avatar
Steve Borho committed
189
    }
Anton Mitrofanov's avatar
Anton Mitrofanov committed
190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207

    CHECKED_MALLOC( binary, size );
    status = ocl->clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof(uint8_t *), &binary, NULL );
    if( status != CL_SUCCESS )
    {
        x264_log( h, X264_LOG_INFO, "OpenCL: Unable to query program binary, no cache file generated\n" );
        goto fail;
    }

    fputs( dev_name, fp );
    fputc( '\n', fp );
    fputs( dev_vendor, fp );
    fputc( '\n', fp );
    fputs( driver_version, fp );
    fputc( '\n', fp );
    fputs( x264_opencl_source_hash, fp );
    fputc( '\n', fp );
    fwrite( binary, 1, size, fp );
Steve Borho's avatar
Steve Borho committed
208 209

fail:
Anton Mitrofanov's avatar
Anton Mitrofanov committed
210 211
    fclose( fp );
    x264_free( binary );
Steve Borho's avatar
Steve Borho committed
212 213 214 215 216 217 218 219 220
    return;
}

/* The OpenCL source under common/opencl will be merged into common/oclobj.h by
 * the Makefile. It defines a x264_opencl_source byte array which we will pass
 * to clCreateProgramWithSource().  We also attempt to use a cache file for the
 * compiled binary, stored in the current working folder. */
static cl_program x264_opencl_compile( x264_t *h )
{
221
    x264_opencl_function_t *ocl = h->opencl.ocl;
Anton Mitrofanov's avatar
Anton Mitrofanov committed
222 223
    cl_program program = NULL;
    char *build_log = NULL;
Steve Borho's avatar
Steve Borho committed
224

225 226 227
    char dev_name[64];
    char dev_vendor[64];
    char driver_version[64];
Anton Mitrofanov's avatar
Anton Mitrofanov committed
228
    cl_int status;
229 230 231
    status  = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_NAME,    sizeof(dev_name), dev_name, NULL );
    status |= ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_VENDOR,  sizeof(dev_vendor), dev_vendor, NULL );
    status |= ocl->clGetDeviceInfo( h->opencl.device, CL_DRIVER_VERSION, sizeof(driver_version), driver_version, NULL );
Steve Borho's avatar
Steve Borho committed
232 233 234 235
    if( status != CL_SUCCESS )
        return NULL;

    // Most AMD GPUs have vector registers
236
    int vectorize = !strcmp( dev_vendor, "Advanced Micro Devices, Inc." );
Steve Borho's avatar
Steve Borho committed
237 238 239 240 241 242 243 244 245 246 247 248 249
    h->opencl.b_device_AMD_SI = 0;

    if( vectorize )
    {
        /* Disable OpenCL on Intel/AMD switchable graphics devices */
        if( x264_detect_switchable_graphics() )
        {
            x264_log( h, X264_LOG_INFO, "OpenCL acceleration disabled, switchable graphics detected\n" );
            return NULL;
        }

        /* Detect AMD SouthernIsland or newer device (single-width registers) */
        cl_uint simdwidth = 4;
250
        status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, sizeof(cl_uint), &simdwidth, NULL );
Steve Borho's avatar
Steve Borho committed
251 252 253 254 255 256 257
        if( status == CL_SUCCESS && simdwidth == 1 )
        {
            vectorize = 0;
            h->opencl.b_device_AMD_SI = 1;
        }
    }

258
    x264_log( h, X264_LOG_INFO, "OpenCL acceleration enabled with %s %s %s\n", dev_vendor, dev_name, h->opencl.b_device_AMD_SI ? "(SI)" : "" );
Steve Borho's avatar
Steve Borho committed
259

260
    program = x264_opencl_cache_load( h, dev_name, dev_vendor, driver_version );
Steve Borho's avatar
Steve Borho committed
261 262 263 264 265 266
    if( !program )
    {
        /* clCreateProgramWithSource() requires a pointer variable, you cannot just use &x264_opencl_source */
        x264_log( h, X264_LOG_INFO, "Compiling OpenCL kernels...\n" );
        const char *strptr = (const char*)x264_opencl_source;
        size_t size = sizeof(x264_opencl_source);
267
        program = ocl->clCreateProgramWithSource( h->opencl.context, 1, &strptr, &size, &status );
Steve Borho's avatar
Steve Borho committed
268 269 270 271 272 273 274 275 276
        if( status != CL_SUCCESS || !program )
        {
            x264_log( h, X264_LOG_WARNING, "OpenCL: unable to create program\n" );
            return NULL;
        }
    }

    /* Build the program binary for the OpenCL device */
    const char *buildopts = vectorize ? "-DVECTORIZE=1" : "";
277
    status = ocl->clBuildProgram( program, 1, &h->opencl.device, buildopts, NULL, NULL );
Steve Borho's avatar
Steve Borho committed
278 279
    if( status == CL_SUCCESS )
    {
280
        x264_opencl_cache_save( h, program, dev_name, dev_vendor, driver_version );
Steve Borho's avatar
Steve Borho committed
281 282 283 284 285 286
        return program;
    }

    /* Compile failure, should not happen with production code. */

    size_t build_log_len = 0;
Anton Mitrofanov's avatar
Anton Mitrofanov committed
287 288
    status = ocl->clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, 0, NULL, &build_log_len );
    if( status != CL_SUCCESS || !build_log_len )
Steve Borho's avatar
Steve Borho committed
289 290
    {
        x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to query build log\n" );
Anton Mitrofanov's avatar
Anton Mitrofanov committed
291
        goto fail;
Steve Borho's avatar
Steve Borho committed
292 293
    }

Anton Mitrofanov's avatar
Anton Mitrofanov committed
294
    build_log = x264_malloc( build_log_len );
Steve Borho's avatar
Steve Borho committed
295 296 297
    if( !build_log )
    {
        x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to alloc build log\n" );
Anton Mitrofanov's avatar
Anton Mitrofanov committed
298
        goto fail;
Steve Borho's avatar
Steve Borho committed
299 300
    }

301
    status = ocl->clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, build_log_len, build_log, NULL );
Steve Borho's avatar
Steve Borho committed
302 303 304
    if( status != CL_SUCCESS )
    {
        x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to get build log\n" );
Anton Mitrofanov's avatar
Anton Mitrofanov committed
305
        goto fail;
Steve Borho's avatar
Steve Borho committed
306 307
    }

Henrik Gramner's avatar
Henrik Gramner committed
308
    FILE *log_file = x264_fopen( "x264_kernel_build_log.txt", "w" );
Anton Mitrofanov's avatar
Anton Mitrofanov committed
309
    if( !log_file )
Steve Borho's avatar
Steve Borho committed
310
    {
Anton Mitrofanov's avatar
Anton Mitrofanov committed
311 312
        x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to create file x264_kernel_build_log.txt\n" );
        goto fail;
Steve Borho's avatar
Steve Borho committed
313
    }
Anton Mitrofanov's avatar
Anton Mitrofanov committed
314 315 316
    fwrite( build_log, 1, build_log_len, log_file );
    fclose( log_file );
    x264_log( h, X264_LOG_WARNING, "OpenCL: kernel build errors written to x264_kernel_build_log.txt\n" );
Steve Borho's avatar
Steve Borho committed
317 318

fail:
Anton Mitrofanov's avatar
Anton Mitrofanov committed
319 320 321
    x264_free( build_log );
    if( program )
        ocl->clReleaseProgram( program );
Steve Borho's avatar
Steve Borho committed
322 323 324
    return NULL;
}

325
static int x264_opencl_lookahead_alloc( x264_t *h )
Steve Borho's avatar
Steve Borho committed
326 327 328 329
{
    if( !h->param.rc.i_lookahead )
        return -1;

Anton Mitrofanov's avatar
Anton Mitrofanov committed
330
    static const char *kernelnames[] = {
Steve Borho's avatar
Steve Borho committed
331 332 333 334 335 336 337 338 339 340 341 342 343
        "mb_intra_cost_satd_8x8",
        "sum_intra_cost",
        "downscale_hpel",
        "downscale1",
        "downscale2",
        "memset_int16",
        "weightp_scaled_images",
        "weightp_hpel",
        "hierarchical_motion",
        "subpel_refine",
        "mode_selection",
        "sum_inter_cost"
    };
344

Steve Borho's avatar
Steve Borho committed
345 346 347 348 349 350 351 352 353 354 355 356 357 358
    cl_kernel *kernels[] = {
        &h->opencl.intra_kernel,
        &h->opencl.rowsum_intra_kernel,
        &h->opencl.downscale_hpel_kernel,
        &h->opencl.downscale_kernel1,
        &h->opencl.downscale_kernel2,
        &h->opencl.memset_kernel,
        &h->opencl.weightp_scaled_images_kernel,
        &h->opencl.weightp_hpel_kernel,
        &h->opencl.hme_kernel,
        &h->opencl.subpel_refine_kernel,
        &h->opencl.mode_select_kernel,
        &h->opencl.rowsum_inter_kernel
    };
359 360

    x264_opencl_function_t *ocl = h->opencl.ocl;
Steve Borho's avatar
Steve Borho committed
361 362 363 364
    cl_int status;

    h->opencl.lookahead_program = x264_opencl_compile( h );
    if( !h->opencl.lookahead_program )
365
        goto fail;
Steve Borho's avatar
Steve Borho committed
366 367 368

    for( int i = 0; i < ARRAY_SIZE(kernelnames); i++ )
    {
369
        *kernels[i] = ocl->clCreateKernel( h->opencl.lookahead_program, kernelnames[i], &status );
Steve Borho's avatar
Steve Borho committed
370 371 372
        if( status != CL_SUCCESS )
        {
            x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to compile kernel '%s' (%d)\n", kernelnames[i], status );
373
            goto fail;
Steve Borho's avatar
Steve Borho committed
374 375 376
        }
    }

377
    h->opencl.page_locked_buffer = ocl->clCreateBuffer( h->opencl.context, CL_MEM_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR, PAGE_LOCKED_BUF_SIZE, NULL, &status );
Steve Borho's avatar
Steve Borho committed
378 379 380
    if( status != CL_SUCCESS )
    {
        x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to allocate page-locked buffer, error '%d'\n", status );
381
        goto fail;
Steve Borho's avatar
Steve Borho committed
382
    }
383 384
    h->opencl.page_locked_ptr = ocl->clEnqueueMapBuffer( h->opencl.queue, h->opencl.page_locked_buffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
                                                         0, PAGE_LOCKED_BUF_SIZE, 0, NULL, NULL, &status );
Steve Borho's avatar
Steve Borho committed
385 386 387
    if( status != CL_SUCCESS )
    {
        x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to map page-locked buffer, error '%d'\n", status );
388
        goto fail;
Steve Borho's avatar
Steve Borho committed
389 390 391
    }

    return 0;
392 393 394
fail:
    x264_opencl_lookahead_delete( h );
    return -1;
Steve Borho's avatar
Steve Borho committed
395 396
}

397
static void CL_CALLBACK x264_opencl_error_notify( const char *errinfo, const void *private_info, size_t cb, void *user_data )
Steve Borho's avatar
Steve Borho committed
398 399 400 401 402 403 404 405 406 407
{
    /* Any error notification can be assumed to be fatal to the OpenCL context.
     * We need to stop using it immediately to prevent further damage. */
    x264_t *h = (x264_t*)user_data;
    h->param.b_opencl = 0;
    h->opencl.b_fatal_error = 1;
    x264_log( h, X264_LOG_ERROR, "OpenCL: %s\n", errinfo );
    x264_log( h, X264_LOG_ERROR, "OpenCL: fatal error, aborting encode\n" );
}

408
int x264_opencl_lookahead_init( x264_t *h )
Steve Borho's avatar
Steve Borho committed
409
{
410
    x264_opencl_function_t *ocl = h->opencl.ocl;
Anton Mitrofanov's avatar
Anton Mitrofanov committed
411 412 413 414
    cl_platform_id *platforms = NULL;
    cl_device_id *devices = NULL;
    cl_image_format *imageType = NULL;
    cl_context context = NULL;
Steve Borho's avatar
Steve Borho committed
415 416
    int ret = -1;

Anton Mitrofanov's avatar
Anton Mitrofanov committed
417 418 419
    cl_uint numPlatforms = 0;
    cl_int status = ocl->clGetPlatformIDs( 0, NULL, &numPlatforms );
    if( status != CL_SUCCESS || !numPlatforms )
Steve Borho's avatar
Steve Borho committed
420
    {
Anton Mitrofanov's avatar
Anton Mitrofanov committed
421 422 423 424 425 426 427 428
        x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to query installed platforms\n" );
        goto fail;
    }
    platforms = (cl_platform_id*)x264_malloc( sizeof(cl_platform_id) * numPlatforms );
    if( !platforms )
    {
        x264_log( h, X264_LOG_WARNING, "OpenCL: malloc of installed platforms buffer failed\n" );
        goto fail;
Steve Borho's avatar
Steve Borho committed
429
    }
430
    status = ocl->clGetPlatformIDs( numPlatforms, platforms, NULL );
Steve Borho's avatar
Steve Borho committed
431 432
    if( status != CL_SUCCESS )
    {
Anton Mitrofanov's avatar
Anton Mitrofanov committed
433 434
        x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to query installed platforms\n" );
        goto fail;
Steve Borho's avatar
Steve Borho committed
435 436 437 438
    }

    /* Select the first OpenCL platform with a GPU device that supports our
     * required image (texture) formats */
Anton Mitrofanov's avatar
Anton Mitrofanov committed
439
    for( cl_uint i = 0; i < numPlatforms; i++ )
Steve Borho's avatar
Steve Borho committed
440 441
    {
        cl_uint gpu_count = 0;
442
        status = ocl->clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &gpu_count );
Steve Borho's avatar
Steve Borho committed
443 444 445
        if( status != CL_SUCCESS || !gpu_count )
            continue;

Anton Mitrofanov's avatar
Anton Mitrofanov committed
446 447
        x264_free( devices );
        devices = x264_malloc( sizeof(cl_device_id) * gpu_count );
Steve Borho's avatar
Steve Borho committed
448 449 450
        if( !devices )
            continue;

451
        status = ocl->clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, gpu_count, devices, NULL );
Steve Borho's avatar
Steve Borho committed
452 453 454 455 456 457 458 459 460 461 462
        if( status != CL_SUCCESS )
            continue;

        /* Find a GPU device that supports our image formats */
        for( cl_uint gpu = 0; gpu < gpu_count; gpu++ )
        {
            h->opencl.device = devices[gpu];

            /* if the user has specified an exact device ID, skip all other
             * GPUs.  If this device matches, allow it to continue through the
             * checks for supported images, etc.  */
Anton Mitrofanov's avatar
Anton Mitrofanov committed
463
            if( h->param.opencl_device_id && devices[gpu] != (cl_device_id)h->param.opencl_device_id )
Steve Borho's avatar
Steve Borho committed
464 465
                continue;

Anton Mitrofanov's avatar
Anton Mitrofanov committed
466 467 468
            cl_bool image_support = 0;
            status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), &image_support, NULL );
            if( status != CL_SUCCESS || !image_support )
Steve Borho's avatar
Steve Borho committed
469 470
                continue;

Anton Mitrofanov's avatar
Anton Mitrofanov committed
471 472 473 474
            if( context )
                ocl->clReleaseContext( context );
            context = ocl->clCreateContext( NULL, 1, &h->opencl.device, (void*)x264_opencl_error_notify, (void*)h, &status );
            if( status != CL_SUCCESS || !context )
Steve Borho's avatar
Steve Borho committed
475 476 477
                continue;

            cl_uint imagecount = 0;
Anton Mitrofanov's avatar
Anton Mitrofanov committed
478 479
            status = ocl->clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &imagecount );
            if( status != CL_SUCCESS || !imagecount )
Steve Borho's avatar
Steve Borho committed
480 481
                continue;

Anton Mitrofanov's avatar
Anton Mitrofanov committed
482 483
            x264_free( imageType );
            imageType = x264_malloc( sizeof(cl_image_format) * imagecount );
Steve Borho's avatar
Steve Borho committed
484 485 486
            if( !imageType )
                continue;

Anton Mitrofanov's avatar
Anton Mitrofanov committed
487 488 489
            status = ocl->clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, imagecount, imageType, NULL );
            if( status != CL_SUCCESS )
                continue;
Steve Borho's avatar
Steve Borho committed
490 491 492 493 494 495 496 497 498 499 500 501 502 503

            int b_has_r = 0;
            int b_has_rgba = 0;
            for( cl_uint j = 0; j < imagecount; j++ )
            {
                if( imageType[j].image_channel_order == CL_R &&
                    imageType[j].image_channel_data_type == CL_UNSIGNED_INT32 )
                    b_has_r = 1;
                else if( imageType[j].image_channel_order == CL_RGBA &&
                         imageType[j].image_channel_data_type == CL_UNSIGNED_INT8 )
                    b_has_rgba = 1;
            }
            if( !b_has_r || !b_has_rgba )
            {
504 505
                char dev_name[64];
                status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_NAME, sizeof(dev_name), dev_name, NULL );
Steve Borho's avatar
Steve Borho committed
506 507 508 509
                if( status == CL_SUCCESS )
                {
                    /* emit warning if we are discarding the user's explicit choice */
                    int level = h->param.opencl_device_id ? X264_LOG_WARNING : X264_LOG_DEBUG;
Anton Mitrofanov's avatar
Anton Mitrofanov committed
510
                    x264_log( h, level, "OpenCL: %s does not support required image formats\n", dev_name );
Steve Borho's avatar
Steve Borho committed
511 512 513 514 515 516 517 518 519 520 521
                }
                continue;
            }

            /* user selection of GPU device, skip N first matches */
            if( h->param.i_opencl_device )
            {
                h->param.i_opencl_device--;
                continue;
            }

522
            h->opencl.queue = ocl->clCreateCommandQueue( context, h->opencl.device, 0, &status );
Anton Mitrofanov's avatar
Anton Mitrofanov committed
523
            if( status != CL_SUCCESS || !h->opencl.queue )
Steve Borho's avatar
Steve Borho committed
524 525 526
                continue;

            h->opencl.context = context;
Anton Mitrofanov's avatar
Anton Mitrofanov committed
527
            context = NULL;
Steve Borho's avatar
Steve Borho committed
528 529 530 531 532 533 534 535 536 537 538 539 540

            ret = 0;
            break;
        }

        if( !ret )
            break;
    }

    if( !h->param.psz_clbin_file )
        h->param.psz_clbin_file = "x264_lookahead.clbin";

    if( ret )
Anton Mitrofanov's avatar
Anton Mitrofanov committed
541
        x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to find a compatible device\n" );
Steve Borho's avatar
Steve Borho committed
542
    else
543
        ret = x264_opencl_lookahead_alloc( h );
Steve Borho's avatar
Steve Borho committed
544

Anton Mitrofanov's avatar
Anton Mitrofanov committed
545 546 547 548 549 550
fail:
    if( context )
        ocl->clReleaseContext( context );
    x264_free( imageType );
    x264_free( devices );
    x264_free( platforms );
Steve Borho's avatar
Steve Borho committed
551 552 553
    return ret;
}

554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616
static void x264_opencl_lookahead_free( x264_t *h )
{
    x264_opencl_function_t *ocl = h->opencl.ocl;

#define RELEASE( a, f ) do { if( a ) { ocl->f( a ); a = NULL; } } while( 0 )
    RELEASE( h->opencl.downscale_hpel_kernel, clReleaseKernel );
    RELEASE( h->opencl.downscale_kernel1, clReleaseKernel );
    RELEASE( h->opencl.downscale_kernel2, clReleaseKernel );
    RELEASE( h->opencl.weightp_hpel_kernel, clReleaseKernel );
    RELEASE( h->opencl.weightp_scaled_images_kernel, clReleaseKernel );
    RELEASE( h->opencl.memset_kernel, clReleaseKernel );
    RELEASE( h->opencl.intra_kernel, clReleaseKernel );
    RELEASE( h->opencl.rowsum_intra_kernel, clReleaseKernel );
    RELEASE( h->opencl.hme_kernel, clReleaseKernel );
    RELEASE( h->opencl.subpel_refine_kernel, clReleaseKernel );
    RELEASE( h->opencl.mode_select_kernel, clReleaseKernel );
    RELEASE( h->opencl.rowsum_inter_kernel, clReleaseKernel );

    RELEASE( h->opencl.lookahead_program, clReleaseProgram );

    RELEASE( h->opencl.page_locked_buffer, clReleaseMemObject );
    RELEASE( h->opencl.luma_16x16_image[0], clReleaseMemObject );
    RELEASE( h->opencl.luma_16x16_image[1], clReleaseMemObject );
    for( int i = 0; i < NUM_IMAGE_SCALES; i++ )
        RELEASE( h->opencl.weighted_scaled_images[i], clReleaseMemObject );
    RELEASE( h->opencl.weighted_luma_hpel, clReleaseMemObject );
    RELEASE( h->opencl.row_satds[0], clReleaseMemObject );
    RELEASE( h->opencl.row_satds[1], clReleaseMemObject );
    RELEASE( h->opencl.mv_buffers[0], clReleaseMemObject );
    RELEASE( h->opencl.mv_buffers[1], clReleaseMemObject );
    RELEASE( h->opencl.lowres_mv_costs, clReleaseMemObject );
    RELEASE( h->opencl.mvp_buffer, clReleaseMemObject );
    RELEASE( h->opencl.lowres_costs[0], clReleaseMemObject );
    RELEASE( h->opencl.lowres_costs[1], clReleaseMemObject );
    RELEASE( h->opencl.frame_stats[0], clReleaseMemObject );
    RELEASE( h->opencl.frame_stats[1], clReleaseMemObject );
#undef RELEASE
}

void x264_opencl_lookahead_delete( x264_t *h )
{
    x264_opencl_function_t *ocl = h->opencl.ocl;

    if( !ocl )
        return;

    if( h->opencl.queue )
        ocl->clFinish( h->opencl.queue );

    x264_opencl_lookahead_free( h );

    if( h->opencl.queue )
    {
        ocl->clReleaseCommandQueue( h->opencl.queue );
        h->opencl.queue = NULL;
    }
    if( h->opencl.context )
    {
        ocl->clReleaseContext( h->opencl.context );
        h->opencl.context = NULL;
    }
}

Steve Borho's avatar
Steve Borho committed
617 618
void x264_opencl_frame_delete( x264_frame_t *frame )
{
619 620 621 622 623 624
    x264_opencl_function_t *ocl = frame->opencl.ocl;

    if( !ocl )
        return;

#define RELEASEBUF(mem) do { if( mem ) { ocl->clReleaseMemObject( mem ); mem = NULL; } } while( 0 )
Steve Borho's avatar
Steve Borho committed
625 626 627 628 629 630 631 632 633 634 635 636 637 638 639
    for( int j = 0; j < NUM_IMAGE_SCALES; j++ )
        RELEASEBUF( frame->opencl.scaled_image2Ds[j] );
    RELEASEBUF( frame->opencl.luma_hpel );
    RELEASEBUF( frame->opencl.inv_qscale_factor );
    RELEASEBUF( frame->opencl.intra_cost );
    RELEASEBUF( frame->opencl.lowres_mvs0 );
    RELEASEBUF( frame->opencl.lowres_mvs1 );
    RELEASEBUF( frame->opencl.lowres_mv_costs0 );
    RELEASEBUF( frame->opencl.lowres_mv_costs1 );
#undef RELEASEBUF
}

/* OpenCL misbehaves on hybrid laptops with Intel iGPU and AMD dGPU, so
 * we consult AMD's ADL interface to detect this situation and disable
 * OpenCL on these machines (Linux and Windows) */
640 641 642 643 644 645 646 647 648 649
#ifdef _WIN32
#define ADL_API_CALL
#define ADL_CALLBACK __stdcall
#define adl_close FreeLibrary
#define adl_address GetProcAddress
#else
#define ADL_API_CALL
#define ADL_CALLBACK
#define adl_close dlclose
#define adl_address dlsym
Steve Borho's avatar
Steve Borho committed
650
#endif
651 652 653 654 655 656 657

typedef void* ( ADL_CALLBACK *ADL_MAIN_MALLOC_CALLBACK )( int );
typedef int   ( ADL_API_CALL *ADL_MAIN_CONTROL_CREATE )( ADL_MAIN_MALLOC_CALLBACK, int );
typedef int   ( ADL_API_CALL *ADL_ADAPTER_NUMBEROFADAPTERS_GET )( int * );
typedef int   ( ADL_API_CALL *ADL_POWERXPRESS_SCHEME_GET )( int, int *, int *, int * );
typedef int   ( ADL_API_CALL *ADL_MAIN_CONTROL_DESTROY )( void );

Steve Borho's avatar
Steve Borho committed
658 659 660
#define ADL_OK 0
#define ADL_PX_SCHEME_DYNAMIC 2

661 662 663 664
static void* ADL_CALLBACK adl_malloc_wrapper( int iSize )
{
    return x264_malloc( iSize );
}
Steve Borho's avatar
Steve Borho committed
665

666
static int x264_detect_switchable_graphics( void )
Steve Borho's avatar
Steve Borho committed
667
{
668
    void *hDLL;
Steve Borho's avatar
Steve Borho committed
669 670 671 672 673 674
    ADL_MAIN_CONTROL_CREATE          ADL_Main_Control_Create;
    ADL_ADAPTER_NUMBEROFADAPTERS_GET ADL_Adapter_NumberOfAdapters_Get;
    ADL_POWERXPRESS_SCHEME_GET       ADL_PowerXpress_Scheme_Get;
    ADL_MAIN_CONTROL_DESTROY         ADL_Main_Control_Destroy;
    int ret = 0;

675
#ifdef _WIN32
Henrik Gramner's avatar
Henrik Gramner committed
676
    hDLL = LoadLibraryW( L"atiadlxx.dll" );
Steve Borho's avatar
Steve Borho committed
677
    if( !hDLL )
Henrik Gramner's avatar
Henrik Gramner committed
678
        hDLL = LoadLibraryW( L"atiadlxy.dll" );
Steve Borho's avatar
Steve Borho committed
679 680 681 682
#else
    hDLL = dlopen( "libatiadlxx.so", RTLD_LAZY|RTLD_GLOBAL );
#endif
    if( !hDLL )
683
        goto fail0;
Steve Borho's avatar
Steve Borho committed
684

685 686 687 688
    ADL_Main_Control_Create          = (ADL_MAIN_CONTROL_CREATE)adl_address(hDLL, "ADL_Main_Control_Create");
    ADL_Main_Control_Destroy         = (ADL_MAIN_CONTROL_DESTROY)adl_address(hDLL, "ADL_Main_Control_Destroy");
    ADL_Adapter_NumberOfAdapters_Get = (ADL_ADAPTER_NUMBEROFADAPTERS_GET)adl_address(hDLL, "ADL_Adapter_NumberOfAdapters_Get");
    ADL_PowerXpress_Scheme_Get       = (ADL_POWERXPRESS_SCHEME_GET)adl_address(hDLL, "ADL_PowerXpress_Scheme_Get");
689
    if( !ADL_Main_Control_Create || !ADL_Main_Control_Destroy || !ADL_Adapter_NumberOfAdapters_Get ||
Steve Borho's avatar
Steve Borho committed
690
        !ADL_PowerXpress_Scheme_Get )
691
        goto fail1;
Steve Borho's avatar
Steve Borho committed
692

693 694
    if( ADL_OK != ADL_Main_Control_Create( adl_malloc_wrapper, 1 ) )
        goto fail1;
Steve Borho's avatar
Steve Borho committed
695 696 697

    int numAdapters = 0;
    if( ADL_OK != ADL_Adapter_NumberOfAdapters_Get( &numAdapters ) )
698
        goto fail2;
Steve Borho's avatar
Steve Borho committed
699 700 701 702 703 704 705 706 707 708 709 710 711 712

    for( int i = 0; i < numAdapters; i++ )
    {
        int PXSchemeRange, PXSchemeCurrentState, PXSchemeDefaultState;
        if( ADL_OK != ADL_PowerXpress_Scheme_Get( i, &PXSchemeRange, &PXSchemeCurrentState, &PXSchemeDefaultState) )
            break;

        if( PXSchemeRange >= ADL_PX_SCHEME_DYNAMIC )
        {
            ret = 1;
            break;
        }
    }

713
fail2:
Steve Borho's avatar
Steve Borho committed
714
    ADL_Main_Control_Destroy();
715 716 717
fail1:
    adl_close( hDLL );
fail0:
Steve Borho's avatar
Steve Borho committed
718 719
    return ret;
}