Commit 3aa9a67b authored by Anton Mitrofanov's avatar Anton Mitrofanov Committed by Fiona Glaser
Browse files

OpenCL support improvement/refactoring

Autoload the OpenCL library so that it's not required to run an openCL-enabled
build of x264.

Update X264_BUILD, which should have been changed with the first patch.
parent 0b2c3d35
...@@ -147,29 +147,7 @@ OBJSO += $(if $(RC), x264res.dll.o) ...@@ -147,29 +147,7 @@ OBJSO += $(if $(RC), x264res.dll.o)
endif endif
endif endif
QUOTED_CFLAGS := $(CFLAGS)
ifeq ($(HAVE_OPENCL),yes) ifeq ($(HAVE_OPENCL),yes)
empty:=
space:=$(empty) $(empty)
escaped:=\ $(empty)
open:=(
escopen:=\(
close:=)
escclose:=\)
SAFE_INC_DIR := $(subst $(space),$(escaped),$(OPENCL_INC_DIR))
SAFE_INC_DIR := $(subst $(open),$(escopen),$(SAFE_INC_DIR))
SAFE_INC_DIR := $(subst $(close),$(escclose),$(SAFE_INC_DIR))
SAFE_LIB_DIR := $(subst $(space),$(escaped),$(OPENCL_LIB_DIR))
SAFE_LIB_DIR := $(subst $(open),$(escopen),$(SAFE_LIB_DIR))
SAFE_LIB_DIR := $(subst $(close),$(escclose),$(SAFE_LIB_DIR))
# For normal CFLAGS and LDFLAGS, we must escape spaces with a backslash to
# make gcc happy
CFLAGS += -I$(SAFE_INC_DIR) -DCL_USE_DEPRECATED_OPENCL_1_1_APIS
LDFLAGS += -l$(OPENCL_LIB) -L$(SAFE_LIB_DIR)
# For the CFLAGS used by the .depend rule, we must add quotes because
# the rule does an extra level of shell expansions
QUOTED_CFLAGS += -I"$(OPENCL_INC_DIR)" -DCL_USE_DEPRECATED_OPENCL_1_1_APIS
common/oclobj.h: common/opencl/x264-cl.h $(wildcard $(SRCPATH)/common/opencl/*.cl) common/oclobj.h: common/opencl/x264-cl.h $(wildcard $(SRCPATH)/common/opencl/*.cl)
cat $^ | perl $(SRCPATH)/tools/cltostr.pl x264_opencl_source > $@ cat $^ | perl $(SRCPATH)/tools/cltostr.pl x264_opencl_source > $@
GENERATED += common/oclobj.h GENERATED += common/oclobj.h
...@@ -224,7 +202,7 @@ $(OBJS) $(OBJASM) $(OBJSO) $(OBJCLI) $(OBJCHK): .depend ...@@ -224,7 +202,7 @@ $(OBJS) $(OBJASM) $(OBJSO) $(OBJCLI) $(OBJCHK): .depend
.depend: config.mak .depend: config.mak
@rm -f .depend @rm -f .depend
@$(foreach SRC, $(addprefix $(SRCPATH)/, $(SRCS) $(SRCCLI) $(SRCSO)), $(CC) $(QUOTED_CFLAGS) $(SRC) $(DEPMT) $(SRC:$(SRCPATH)/%.c=%.o) $(DEPMM) 1>> .depend;) @$(foreach SRC, $(addprefix $(SRCPATH)/, $(SRCS) $(SRCCLI) $(SRCSO)), $(CC) $(CFLAGS) $(SRC) $(DEPMT) $(SRC:$(SRCPATH)/%.c=%.o) $(DEPMM) 1>> .depend;)
config.mak: config.mak:
./configure ./configure
......
...@@ -1293,8 +1293,8 @@ char *x264_param2string( x264_param_t *p, int b_res ) ...@@ -1293,8 +1293,8 @@ char *x264_param2string( x264_param_t *p, int b_res )
} }
if( p->b_opencl ) if( p->b_opencl )
s += sprintf( s, "opencl=%d", p->b_opencl ); s += sprintf( s, "opencl=%d ", p->b_opencl );
s += sprintf( s, " cabac=%d", p->b_cabac ); s += sprintf( s, "cabac=%d", p->b_cabac );
s += sprintf( s, " ref=%d", p->i_frame_reference ); s += sprintf( s, " ref=%d", p->i_frame_reference );
s += sprintf( s, " deblock=%d:%d:%d", p->b_deblocking_filter, s += sprintf( s, " deblock=%d:%d:%d", p->b_deblocking_filter,
p->i_deblocking_filter_alphac0, p->i_deblocking_filter_beta ); p->i_deblocking_filter_alphac0, p->i_deblocking_filter_beta );
......
...@@ -96,10 +96,6 @@ do {\ ...@@ -96,10 +96,6 @@ do {\
#include <assert.h> #include <assert.h>
#include <limits.h> #include <limits.h>
#if HAVE_OPENCL
#include "opencl.h"
#endif
#if HAVE_INTERLACED #if HAVE_INTERLACED
# define MB_INTERLACED h->mb.b_interlaced # define MB_INTERLACED h->mb.b_interlaced
# define SLICE_MBAFF h->sh.b_mbaff # define SLICE_MBAFF h->sh.b_mbaff
...@@ -209,6 +205,9 @@ static const uint8_t x264_scan8[16*3 + 3] = ...@@ -209,6 +205,9 @@ static const uint8_t x264_scan8[16*3 + 3] =
}; };
#include "x264.h" #include "x264.h"
#if HAVE_OPENCL
#include "opencl.h"
#endif
#include "cabac.h" #include "cabac.h"
#include "bitstream.h" #include "bitstream.h"
#include "set.h" #include "set.h"
......
...@@ -261,6 +261,10 @@ static x264_frame_t *x264_frame_new( x264_t *h, int b_fdec ) ...@@ -261,6 +261,10 @@ static x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
if( x264_pthread_cond_init( &frame->cv, NULL ) ) if( x264_pthread_cond_init( &frame->cv, NULL ) )
goto fail; goto fail;
#if HAVE_OPENCL
frame->opencl.ocl = h->opencl.ocl;
#endif
return frame; return frame;
fail: fail:
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
* Copyright (C) 2012-2013 x264 project * Copyright (C) 2012-2013 x264 project
* *
* Authors: Steve Borho <sborho@multicorewareinc.com> * Authors: Steve Borho <sborho@multicorewareinc.com>
* Anton Mitrofanov <BugMaster@narod.ru>
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
...@@ -24,33 +25,112 @@ ...@@ -24,33 +25,112 @@
*****************************************************************************/ *****************************************************************************/
#include "common.h" #include "common.h"
#if _WIN32
#ifdef _WIN32
#include <windows.h> #include <windows.h>
#define ocl_open LoadLibrary( "OpenCL" )
#define ocl_close FreeLibrary
#define ocl_address GetProcAddress
#else #else
#include <dlfcn.h> //dlopen, dlsym, dlclose #include <dlfcn.h> //dlopen, dlsym, dlclose
#if SYS_MACOSX
#define ocl_open dlopen( "libOpenCL.dylib", RTLD_NOW )
#else
#define ocl_open dlopen( "libOpenCL.so", RTLD_NOW )
#endif
#define ocl_close dlclose
#define ocl_address dlsym
#endif #endif
#define LOAD_OCL_FUNC(name, continue_on_fail)\
{\
ocl->name = (void*)ocl_address( ocl->library, #name );\
if( !continue_on_fail && !ocl->name )\
goto fail;\
}
/* load the library and functions we require from it */
x264_opencl_function_t *x264_opencl_load_library( void )
{
x264_opencl_function_t *ocl;
#undef fail
#define fail fail0
CHECKED_MALLOCZERO( ocl, sizeof(x264_opencl_function_t) );
#undef fail
#define fail fail1
ocl->library = ocl_open;
if( !ocl->library )
goto fail;
#undef fail
#define fail fail2
LOAD_OCL_FUNC( clBuildProgram, 0 );
LOAD_OCL_FUNC( clCreateBuffer, 0 );
LOAD_OCL_FUNC( clCreateCommandQueue, 0 );
LOAD_OCL_FUNC( clCreateContext, 0 );
LOAD_OCL_FUNC( clCreateImage2D, 0 );
LOAD_OCL_FUNC( clCreateKernel, 0 );
LOAD_OCL_FUNC( clCreateProgramWithBinary, 0 );
LOAD_OCL_FUNC( clCreateProgramWithSource, 0 );
LOAD_OCL_FUNC( clEnqueueCopyBuffer, 0 );
LOAD_OCL_FUNC( clEnqueueMapBuffer, 0 );
LOAD_OCL_FUNC( clEnqueueNDRangeKernel, 0 );
LOAD_OCL_FUNC( clEnqueueReadBuffer, 0 );
LOAD_OCL_FUNC( clEnqueueWriteBuffer, 0 );
LOAD_OCL_FUNC( clFinish, 0 );
LOAD_OCL_FUNC( clGetCommandQueueInfo, 0 );
LOAD_OCL_FUNC( clGetDeviceIDs, 0 );
LOAD_OCL_FUNC( clGetDeviceInfo, 0 );
LOAD_OCL_FUNC( clGetKernelWorkGroupInfo, 0 );
LOAD_OCL_FUNC( clGetPlatformIDs, 0 );
LOAD_OCL_FUNC( clGetProgramBuildInfo, 0 );
LOAD_OCL_FUNC( clGetProgramInfo, 0 );
LOAD_OCL_FUNC( clGetSupportedImageFormats, 0 );
LOAD_OCL_FUNC( clReleaseCommandQueue, 0 );
LOAD_OCL_FUNC( clReleaseContext, 0 );
LOAD_OCL_FUNC( clReleaseKernel, 0 );
LOAD_OCL_FUNC( clReleaseMemObject, 0 );
LOAD_OCL_FUNC( clReleaseProgram, 0 );
LOAD_OCL_FUNC( clSetKernelArg, 0 );
return ocl;
#undef fail
fail2:
ocl_close( ocl->library );
fail1:
x264_free( ocl );
fail0:
return NULL;
}
void x264_opencl_close_library( x264_opencl_function_t *ocl )
{
if( !ocl )
return;
ocl_close( ocl->library );
x264_free( ocl );
}
/* define from recent cl_ext.h, copied here in case headers are old */ /* define from recent cl_ext.h, copied here in case headers are old */
#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042 #define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042
/* Requires full include path in case of out-of-tree builds */ /* Requires full include path in case of out-of-tree builds */
#include "common/oclobj.h" #include "common/oclobj.h"
static int x264_detect_switchable_graphics(); static int x264_detect_switchable_graphics( void );
/* Try to load the cached compiled program binary, verify the device context is /* Try to load the cached compiled program binary, verify the device context is
* still valid before reuse */ * still valid before reuse */
static cl_program x264_opencl_cache_load( x264_t *h, char *devname, char *devvendor, char *driverversion ) static cl_program x264_opencl_cache_load( x264_t *h, char *devname, char *devvendor, char *driverversion )
{ {
cl_program program = NULL;
cl_int status;
/* try to load cached program binary */ /* try to load cached program binary */
FILE *fp = fopen( h->param.psz_clbin_file, "rb" ); FILE *fp = fopen( h->param.psz_clbin_file, "rb" );
if( !fp ) if( !fp )
return NULL; return NULL;
fseek( fp, 0L, SEEK_END ); x264_opencl_function_t *ocl = h->opencl.ocl;
cl_program program = NULL;
cl_int status;
fseek( fp, 0, SEEK_END );
size_t size = ftell( fp ); size_t size = ftell( fp );
rewind( fp ); rewind( fp );
uint8_t *binary; uint8_t *binary;
...@@ -75,7 +155,7 @@ static cl_program x264_opencl_cache_load( x264_t *h, char *devname, char *devven ...@@ -75,7 +155,7 @@ static cl_program x264_opencl_cache_load( x264_t *h, char *devname, char *devven
CHECK_STRING( x264_opencl_source_hash ); CHECK_STRING( x264_opencl_source_hash );
#undef CHECK_STRING #undef CHECK_STRING
program = clCreateProgramWithBinary( h->opencl.context, 1, &h->opencl.device, &size, &ptr, NULL, &status ); program = ocl->clCreateProgramWithBinary( h->opencl.context, 1, &h->opencl.device, &size, &ptr, NULL, &status );
if( status != CL_SUCCESS ) if( status != CL_SUCCESS )
program = NULL; program = NULL;
...@@ -96,13 +176,14 @@ static void x264_opencl_cache_save( x264_t *h, cl_program program, char *devname ...@@ -96,13 +176,14 @@ static void x264_opencl_cache_save( x264_t *h, cl_program program, char *devname
return; return;
} }
x264_opencl_function_t *ocl = h->opencl.ocl;
size_t size; size_t size;
cl_int status = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL ); cl_int status = ocl->clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL );
if( status == CL_SUCCESS ) if( status == CL_SUCCESS )
{ {
uint8_t *binary; uint8_t *binary;
CHECKED_MALLOC( binary, size ); CHECKED_MALLOC( binary, size );
status = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof(uint8_t *), &binary, NULL ); status = ocl->clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof(uint8_t *), &binary, NULL );
if( status == CL_SUCCESS ) if( status == CL_SUCCESS )
{ {
fputs( devname, fp ); fputs( devname, fp );
...@@ -133,15 +214,16 @@ fail: ...@@ -133,15 +214,16 @@ fail:
* compiled binary, stored in the current working folder. */ * compiled binary, stored in the current working folder. */
static cl_program x264_opencl_compile( x264_t *h ) static cl_program x264_opencl_compile( x264_t *h )
{ {
x264_opencl_function_t *ocl = h->opencl.ocl;
cl_program program; cl_program program;
cl_int status; cl_int status;
char devname[64]; char devname[64];
char devvendor[64]; char devvendor[64];
char driverversion[64]; char driverversion[64];
status = clGetDeviceInfo( h->opencl.device, CL_DEVICE_NAME, sizeof(devname), devname, NULL ); status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_NAME, sizeof(devname), devname, NULL );
status |= clGetDeviceInfo( h->opencl.device, CL_DEVICE_VENDOR, sizeof(devvendor), devvendor, NULL ); status |= ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_VENDOR, sizeof(devvendor), devvendor, NULL );
status |= clGetDeviceInfo( h->opencl.device, CL_DRIVER_VERSION, sizeof(driverversion), driverversion, NULL ); status |= ocl->clGetDeviceInfo( h->opencl.device, CL_DRIVER_VERSION, sizeof(driverversion), driverversion, NULL );
if( status != CL_SUCCESS ) if( status != CL_SUCCESS )
return NULL; return NULL;
...@@ -160,7 +242,7 @@ static cl_program x264_opencl_compile( x264_t *h ) ...@@ -160,7 +242,7 @@ static cl_program x264_opencl_compile( x264_t *h )
/* Detect AMD SouthernIsland or newer device (single-width registers) */ /* Detect AMD SouthernIsland or newer device (single-width registers) */
cl_uint simdwidth = 4; cl_uint simdwidth = 4;
status = clGetDeviceInfo( h->opencl.device, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, sizeof(cl_uint), &simdwidth, NULL ); status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, sizeof(cl_uint), &simdwidth, NULL );
if( status == CL_SUCCESS && simdwidth == 1 ) if( status == CL_SUCCESS && simdwidth == 1 )
{ {
vectorize = 0; vectorize = 0;
...@@ -177,7 +259,7 @@ static cl_program x264_opencl_compile( x264_t *h ) ...@@ -177,7 +259,7 @@ static cl_program x264_opencl_compile( x264_t *h )
x264_log( h, X264_LOG_INFO, "Compiling OpenCL kernels...\n" ); x264_log( h, X264_LOG_INFO, "Compiling OpenCL kernels...\n" );
const char *strptr = (const char*)x264_opencl_source; const char *strptr = (const char*)x264_opencl_source;
size_t size = sizeof(x264_opencl_source); size_t size = sizeof(x264_opencl_source);
program = clCreateProgramWithSource( h->opencl.context, 1, &strptr, &size, &status ); program = ocl->clCreateProgramWithSource( h->opencl.context, 1, &strptr, &size, &status );
if( status != CL_SUCCESS || !program ) if( status != CL_SUCCESS || !program )
{ {
x264_log( h, X264_LOG_WARNING, "OpenCL: unable to create program\n" ); x264_log( h, X264_LOG_WARNING, "OpenCL: unable to create program\n" );
...@@ -187,7 +269,7 @@ static cl_program x264_opencl_compile( x264_t *h ) ...@@ -187,7 +269,7 @@ static cl_program x264_opencl_compile( x264_t *h )
/* Build the program binary for the OpenCL device */ /* Build the program binary for the OpenCL device */
const char *buildopts = vectorize ? "-DVECTORIZE=1" : ""; const char *buildopts = vectorize ? "-DVECTORIZE=1" : "";
status = clBuildProgram( program, 1, &h->opencl.device, buildopts, NULL, NULL ); status = ocl->clBuildProgram( program, 1, &h->opencl.device, buildopts, NULL, NULL );
if( status == CL_SUCCESS ) if( status == CL_SUCCESS )
{ {
x264_opencl_cache_save( h, program, devname, devvendor, driverversion ); x264_opencl_cache_save( h, program, devname, devvendor, driverversion );
...@@ -198,7 +280,7 @@ static cl_program x264_opencl_compile( x264_t *h ) ...@@ -198,7 +280,7 @@ static cl_program x264_opencl_compile( x264_t *h )
size_t build_log_len = 0; size_t build_log_len = 0;
status = clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, build_log_len, NULL, &build_log_len ); status = ocl->clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, build_log_len, NULL, &build_log_len );
if( status != CL_SUCCESS ) if( status != CL_SUCCESS )
{ {
x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to query build log\n" ); x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to query build log\n" );
...@@ -213,7 +295,7 @@ static cl_program x264_opencl_compile( x264_t *h ) ...@@ -213,7 +295,7 @@ static cl_program x264_opencl_compile( x264_t *h )
return NULL; return NULL;
} }
status = clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, build_log_len, build_log, NULL ); status = ocl->clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, build_log_len, build_log, NULL );
if( status != CL_SUCCESS ) if( status != CL_SUCCESS )
{ {
x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to get build log\n" ); x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to get build log\n" );
...@@ -234,42 +316,7 @@ fail: ...@@ -234,42 +316,7 @@ fail:
return NULL; return NULL;
} }
static void x264_opencl_free_lookahead( x264_t *h ) static int x264_opencl_lookahead_alloc( x264_t *h )
{
#define RELEASE( a, f ) if( a ) f( a );
RELEASE( h->opencl.intra_kernel, clReleaseKernel )
RELEASE( h->opencl.rowsum_intra_kernel, clReleaseKernel )
RELEASE( h->opencl.downscale_kernel1, clReleaseKernel )
RELEASE( h->opencl.downscale_kernel2, clReleaseKernel )
RELEASE( h->opencl.downscale_hpel_kernel, clReleaseKernel )
RELEASE( h->opencl.weightp_hpel_kernel, clReleaseKernel )
RELEASE( h->opencl.weightp_scaled_images_kernel, clReleaseKernel )
RELEASE( h->opencl.memset_kernel, clReleaseKernel )
RELEASE( h->opencl.hme_kernel, clReleaseKernel )
RELEASE( h->opencl.subpel_refine_kernel, clReleaseKernel )
RELEASE( h->opencl.mode_select_kernel, clReleaseKernel )
RELEASE( h->opencl.rowsum_inter_kernel, clReleaseKernel )
RELEASE( h->opencl.lookahead_program, clReleaseProgram )
RELEASE( h->opencl.row_satds[0], clReleaseMemObject )
RELEASE( h->opencl.row_satds[1], clReleaseMemObject )
RELEASE( h->opencl.frame_stats[0], clReleaseMemObject )
RELEASE( h->opencl.frame_stats[1], clReleaseMemObject )
RELEASE( h->opencl.mv_buffers[0], clReleaseMemObject )
RELEASE( h->opencl.mv_buffers[1], clReleaseMemObject )
RELEASE( h->opencl.mvp_buffer, clReleaseMemObject )
RELEASE( h->opencl.luma_16x16_image[0], clReleaseMemObject )
RELEASE( h->opencl.luma_16x16_image[1], clReleaseMemObject )
RELEASE( h->opencl.lowres_mv_costs, clReleaseMemObject )
RELEASE( h->opencl.lowres_costs[0], clReleaseMemObject )
RELEASE( h->opencl.lowres_costs[1], clReleaseMemObject )
RELEASE( h->opencl.page_locked_buffer, clReleaseMemObject )
RELEASE( h->opencl.weighted_luma_hpel, clReleaseMemObject )
for( int i = 0; i < NUM_IMAGE_SCALES; i++ )
RELEASE( h->opencl.weighted_scaled_images[i], clReleaseMemObject )
#undef RELEASE
}
int x264_opencl_init_lookahead( x264_t *h )
{ {
if( !h->param.rc.i_lookahead ) if( !h->param.rc.i_lookahead )
return -1; return -1;
...@@ -288,6 +335,7 @@ int x264_opencl_init_lookahead( x264_t *h ) ...@@ -288,6 +335,7 @@ int x264_opencl_init_lookahead( x264_t *h )
"mode_selection", "mode_selection",
"sum_inter_cost" "sum_inter_cost"
}; };
cl_kernel *kernels[] = { cl_kernel *kernels[] = {
&h->opencl.intra_kernel, &h->opencl.intra_kernel,
&h->opencl.rowsum_intra_kernel, &h->opencl.rowsum_intra_kernel,
...@@ -302,43 +350,42 @@ int x264_opencl_init_lookahead( x264_t *h ) ...@@ -302,43 +350,42 @@ int x264_opencl_init_lookahead( x264_t *h )
&h->opencl.mode_select_kernel, &h->opencl.mode_select_kernel,
&h->opencl.rowsum_inter_kernel &h->opencl.rowsum_inter_kernel
}; };
x264_opencl_function_t *ocl = h->opencl.ocl;
cl_int status; cl_int status;
h->opencl.lookahead_program = x264_opencl_compile( h ); h->opencl.lookahead_program = x264_opencl_compile( h );
if( !h->opencl.lookahead_program ) if( !h->opencl.lookahead_program )
{ goto fail;
x264_opencl_free_lookahead( h );
return -1;
}
for( int i = 0; i < ARRAY_SIZE(kernelnames); i++ ) for( int i = 0; i < ARRAY_SIZE(kernelnames); i++ )
{ {
*kernels[i] = clCreateKernel( h->opencl.lookahead_program, kernelnames[i], &status ); *kernels[i] = ocl->clCreateKernel( h->opencl.lookahead_program, kernelnames[i], &status );
if( status != CL_SUCCESS ) if( status != CL_SUCCESS )
{ {
x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to compile kernel '%s' (%d)\n", kernelnames[i], status ); x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to compile kernel '%s' (%d)\n", kernelnames[i], status );
x264_opencl_free_lookahead( h ); goto fail;
return -1;
} }
} }
h->opencl.page_locked_buffer = clCreateBuffer( h->opencl.context, CL_MEM_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR, PAGE_LOCKED_BUF_SIZE, NULL, &status ); h->opencl.page_locked_buffer = ocl->clCreateBuffer( h->opencl.context, CL_MEM_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR, PAGE_LOCKED_BUF_SIZE, NULL, &status );
if( status != CL_SUCCESS ) if( status != CL_SUCCESS )
{ {
x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to allocate page-locked buffer, error '%d'\n", status ); x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to allocate page-locked buffer, error '%d'\n", status );
x264_opencl_free_lookahead( h ); goto fail;
return -1;
} }
h->opencl.page_locked_ptr = clEnqueueMapBuffer( h->opencl.queue, h->opencl.page_locked_buffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, h->opencl.page_locked_ptr = ocl->clEnqueueMapBuffer( h->opencl.queue, h->opencl.page_locked_buffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
0, PAGE_LOCKED_BUF_SIZE, 0, NULL, NULL, &status ); 0, PAGE_LOCKED_BUF_SIZE, 0, NULL, NULL, &status );
if( status != CL_SUCCESS ) if( status != CL_SUCCESS )
{ {
x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to map page-locked buffer, error '%d'\n", status ); x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to map page-locked buffer, error '%d'\n", status );
x264_opencl_free_lookahead( h ); goto fail;
return -1;
} }
return 0; return 0;
fail:
x264_opencl_lookahead_delete( h );
return -1;
} }
static void x264_opencl_error_notify( const char *errinfo, const void *private_info, size_t cb, void *user_data ) static void x264_opencl_error_notify( const char *errinfo, const void *private_info, size_t cb, void *user_data )
...@@ -352,13 +399,14 @@ static void x264_opencl_error_notify( const char *errinfo, const void *private_i ...@@ -352,13 +399,14 @@ static void x264_opencl_error_notify( const char *errinfo, const void *private_i
x264_log( h, X264_LOG_ERROR, "OpenCL: fatal error, aborting encode\n" ); x264_log( h, X264_LOG_ERROR, "OpenCL: fatal error, aborting encode\n" );
} }
int x264_opencl_init( x264_t *h ) int x264_opencl_lookahead_init( x264_t *h )
{ {
x264_opencl_function_t *ocl = h->opencl.ocl;
cl_int status; cl_int status;
cl_uint numPlatforms; cl_uint numPlatforms;
int ret = -1; int ret = -1;
status = clGetPlatformIDs( 0, NULL, &numPlatforms ); status = ocl->clGetPlatformIDs( 0, NULL, &numPlatforms );
if( status != CL_SUCCESS || numPlatforms == 0 ) if( status != CL_SUCCESS || numPlatforms == 0 )
{ {
x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to query installed platforms\n"); x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to query installed platforms\n");
...@@ -366,7 +414,7 @@ int x264_opencl_init( x264_t *h ) ...@@ -366,7 +414,7 @@ int x264_opencl_init( x264_t *h )
} }
cl_platform_id *platforms = (cl_platform_id*)x264_malloc( numPlatforms * sizeof(cl_platform_id) ); cl_platform_id *platforms = (cl_platform_id*)x264_malloc( numPlatforms * sizeof(cl_platform_id) );
status = clGetPlatformIDs( numPlatforms, platforms, NULL ); status = ocl->clGetPlatformIDs( numPlatforms, platforms, NULL );
if( status != CL_SUCCESS ) if( status != CL_SUCCESS )
{ {
x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to query installed platforms\n"); x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to query installed platforms\n");
...@@ -379,7 +427,7 @@ int x264_opencl_init( x264_t *h ) ...@@ -379,7 +427,7 @@ int x264_opencl_init( x264_t *h )
for( cl_uint i = 0; i < numPlatforms; ++i ) for( cl_uint i = 0; i < numPlatforms; ++i )
{ {
cl_uint gpu_count = 0; cl_uint gpu_count = 0;
status = clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &gpu_count ); status = ocl->clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &gpu_count );
if( status != CL_SUCCESS || !gpu_count ) if( status != CL_SUCCESS || !gpu_count )
continue; continue;
...@@ -387,7 +435,7 @@ int x264_opencl_init( x264_t *h ) ...@@ -387,7 +435,7 @@ int x264_opencl_init( x264_t *h )
if( !devices ) if( !devices )
continue; continue;
status = clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, gpu_count, devices, NULL ); status = ocl->clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, gpu_count, devices, NULL );
if( status != CL_SUCCESS ) if( status != CL_SUCCESS )
{ {
x264_free( devices ); x264_free( devices );
...@@ -406,30 +454,30 @@ int x264_opencl_init( x264_t *h ) ...@@ -406,30 +454,30 @@ int x264_opencl_init( x264_t *h )
continue; continue;
cl_bool image_support; cl_bool image_support;
clGetDeviceInfo( h->opencl.device, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), &image_support, NULL ); ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool),