win32thread.c 12 KB
Newer Older
1 2 3
/*****************************************************************************
 * win32thread.c: windows threading
 *****************************************************************************
Henrik Gramner's avatar
Henrik Gramner committed
4
 * Copyright (C) 2010-2019 x264 project
5 6 7
 *
 * Authors: Steven Walters <kemuri9@gmail.com>
 *          Pegasys Inc. <http://www.pegasys-inc.com>
Henrik Gramner's avatar
Henrik Gramner committed
8
 *          Henrik Gramner <henrik@gramner.com>
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
 *
 * This program is also available under a commercial proprietary license.
 * For more information, contact us at licensing@x264.com.
 *****************************************************************************/

28 29 30 31 32
/* Microsoft's way of supporting systems with >64 logical cpus can be found at
 * http://www.microsoft.com/whdc/system/Sysinternals/MoreThan64proc.mspx */

/* Based on the agreed standing that x264 does not need to utilize >64 logical cpus,
 * this API does not detect nor utilize more than 64 cpus for systems that have them. */
33

34
#include "base.h"
Henrik Gramner's avatar
Henrik Gramner committed
35 36 37 38 39 40 41 42 43

#if HAVE_WINRT
/* _beginthreadex() is technically the correct option, but it's only available for Desktop applications.
 * Using CreateThread() as an alternative works on Windows Store and Windows Phone 8.1+ as long as we're
 * using a dynamically linked MSVCRT which happens to be a requirement for WinRT applications anyway */
#define _beginthreadex CreateThread
#define InitializeCriticalSectionAndSpinCount(a, b) InitializeCriticalSectionEx(a, b, CRITICAL_SECTION_NO_DEBUG_INFO)
#define WaitForSingleObject(a, b) WaitForSingleObjectEx(a, b, FALSE)
#else
44
#include <process.h>
Henrik Gramner's avatar
Henrik Gramner committed
45
#endif
46 47 48 49

/* number of times to spin a thread about to block on a locked mutex before retrying and sleeping if still locked */
#define X264_SPIN_COUNT 0

Henrik Gramner's avatar
Henrik Gramner committed
50 51
/* global mutex for replacing MUTEX_INITIALIZER instances */
static x264_pthread_mutex_t static_mutex;
52 53

/* _beginthreadex requires that the start routine is __stdcall */
54
static unsigned __stdcall win32thread_worker( void *arg )
55 56
{
    x264_pthread_t *h = arg;
57
    *h->p_ret = h->func( h->arg );
58 59 60 61 62 63 64 65
    return 0;
}

int x264_pthread_create( x264_pthread_t *thread, const x264_pthread_attr_t *attr,
                         void *(*start_routine)( void* ), void *arg )
{
    thread->func   = start_routine;
    thread->arg    = arg;
66 67
    thread->p_ret  = &thread->ret;
    thread->ret    = NULL;
68
    thread->handle = (void*)_beginthreadex( NULL, 0, win32thread_worker, thread, 0, NULL );
69 70 71 72 73 74 75 76 77
    return !thread->handle;
}

int x264_pthread_join( x264_pthread_t thread, void **value_ptr )
{
    DWORD ret = WaitForSingleObject( thread.handle, INFINITE );
    if( ret != WAIT_OBJECT_0 )
        return -1;
    if( value_ptr )
78
        *value_ptr = *thread.p_ret;
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
    CloseHandle( thread.handle );
    return 0;
}

int x264_pthread_mutex_init( x264_pthread_mutex_t *mutex, const x264_pthread_mutexattr_t *attr )
{
    return !InitializeCriticalSectionAndSpinCount( mutex, X264_SPIN_COUNT );
}

int x264_pthread_mutex_destroy( x264_pthread_mutex_t *mutex )
{
    DeleteCriticalSection( mutex );
    return 0;
}

int x264_pthread_mutex_lock( x264_pthread_mutex_t *mutex )
{
Henrik Gramner's avatar
Henrik Gramner committed
96
    static const x264_pthread_mutex_t init = X264_PTHREAD_MUTEX_INITIALIZER;
97
    if( !memcmp( mutex, &init, sizeof(x264_pthread_mutex_t) ) )
98 99 100 101 102 103 104 105 106
    {
        int ret = 0;
        EnterCriticalSection( &static_mutex );
        if( !memcmp( mutex, &init, sizeof(x264_pthread_mutex_t) ) )
            ret = x264_pthread_mutex_init( mutex, NULL );
        LeaveCriticalSection( &static_mutex );
        if( ret )
            return ret;
    }
107 108 109 110 111 112 113 114 115 116
    EnterCriticalSection( mutex );
    return 0;
}

int x264_pthread_mutex_unlock( x264_pthread_mutex_t *mutex )
{
    LeaveCriticalSection( mutex );
    return 0;
}

Henrik Gramner's avatar
Henrik Gramner committed
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
void x264_win32_threading_destroy( void )
{
    x264_pthread_mutex_destroy( &static_mutex );
    memset( &static_mutex, 0, sizeof(static_mutex) );
}

#if HAVE_WINRT
int x264_pthread_cond_init( x264_pthread_cond_t *cond, const x264_pthread_condattr_t *attr )
{
    InitializeConditionVariable( cond );
    return 0;
}

int x264_pthread_cond_destroy( x264_pthread_cond_t *cond )
{
    return 0;
}

int x264_pthread_cond_broadcast( x264_pthread_cond_t *cond )
{
    WakeAllConditionVariable( cond );
    return 0;
}

int x264_pthread_cond_signal( x264_pthread_cond_t *cond )
{
    WakeConditionVariable( cond );
    return 0;
}

int x264_pthread_cond_wait( x264_pthread_cond_t *cond, x264_pthread_mutex_t *mutex )
{
    return !SleepConditionVariableCS( cond, mutex, INFINITE );
}

int x264_win32_threading_init( void )
{
    return x264_pthread_mutex_init( &static_mutex, NULL );
}

int x264_pthread_num_processors_np( void )
{
    SYSTEM_INFO si;
    GetNativeSystemInfo(&si);
    return si.dwNumberOfProcessors;
}

#else

static struct
{
    /* function pointers to conditional variable API on windows 6.0+ kernels */
    void (WINAPI *cond_broadcast)( x264_pthread_cond_t *cond );
    void (WINAPI *cond_init)( x264_pthread_cond_t *cond );
    void (WINAPI *cond_signal)( x264_pthread_cond_t *cond );
    BOOL (WINAPI *cond_wait)( x264_pthread_cond_t *cond, x264_pthread_mutex_t *mutex, DWORD milliseconds );
} thread_control;

175 176 177 178 179
/* for pre-Windows 6.0 platforms we need to define and use our own condition variable and api */
typedef struct
{
    x264_pthread_mutex_t mtx_broadcast;
    x264_pthread_mutex_t mtx_waiter_count;
180
    volatile int waiter_count;
181 182
    HANDLE semaphore;
    HANDLE waiters_done;
183
    volatile int is_broadcast;
184 185 186 187 188 189 190 191 192 193 194 195 196 197
} x264_win32_cond_t;

int x264_pthread_cond_init( x264_pthread_cond_t *cond, const x264_pthread_condattr_t *attr )
{
    if( thread_control.cond_init )
    {
        thread_control.cond_init( cond );
        return 0;
    }

    /* non native condition variables */
    x264_win32_cond_t *win32_cond = calloc( 1, sizeof(x264_win32_cond_t) );
    if( !win32_cond )
        return -1;
Henrik Gramner's avatar
Henrik Gramner committed
198
    cond->Ptr = win32_cond;
199
    win32_cond->semaphore = CreateSemaphoreW( NULL, 0, 0x7fffffff, NULL );
200 201 202 203 204 205 206 207
    if( !win32_cond->semaphore )
        return -1;

    if( x264_pthread_mutex_init( &win32_cond->mtx_waiter_count, NULL ) )
        return -1;
    if( x264_pthread_mutex_init( &win32_cond->mtx_broadcast, NULL ) )
        return -1;

208
    win32_cond->waiters_done = CreateEventW( NULL, FALSE, FALSE, NULL );
209 210 211 212 213 214 215 216 217 218 219 220 221
    if( !win32_cond->waiters_done )
        return -1;

    return 0;
}

int x264_pthread_cond_destroy( x264_pthread_cond_t *cond )
{
    /* native condition variables do not destroy */
    if( thread_control.cond_init )
        return 0;

    /* non native condition variables */
Henrik Gramner's avatar
Henrik Gramner committed
222
    x264_win32_cond_t *win32_cond = cond->Ptr;
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
    CloseHandle( win32_cond->semaphore );
    CloseHandle( win32_cond->waiters_done );
    x264_pthread_mutex_destroy( &win32_cond->mtx_broadcast );
    x264_pthread_mutex_destroy( &win32_cond->mtx_waiter_count );
    free( win32_cond );

    return 0;
}

int x264_pthread_cond_broadcast( x264_pthread_cond_t *cond )
{
    if( thread_control.cond_broadcast )
    {
        thread_control.cond_broadcast( cond );
        return 0;
    }

    /* non native condition variables */
Henrik Gramner's avatar
Henrik Gramner committed
241
    x264_win32_cond_t *win32_cond = cond->Ptr;
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
    x264_pthread_mutex_lock( &win32_cond->mtx_broadcast );
    x264_pthread_mutex_lock( &win32_cond->mtx_waiter_count );
    int have_waiter = 0;

    if( win32_cond->waiter_count )
    {
        win32_cond->is_broadcast = 1;
        have_waiter = 1;
    }

    if( have_waiter )
    {
        ReleaseSemaphore( win32_cond->semaphore, win32_cond->waiter_count, NULL );
        x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );
        WaitForSingleObject( win32_cond->waiters_done, INFINITE );
        win32_cond->is_broadcast = 0;
    }
    else
        x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );
    return x264_pthread_mutex_unlock( &win32_cond->mtx_broadcast );
}

int x264_pthread_cond_signal( x264_pthread_cond_t *cond )
{
    if( thread_control.cond_signal )
    {
        thread_control.cond_signal( cond );
        return 0;
    }

    /* non-native condition variables */
Henrik Gramner's avatar
Henrik Gramner committed
273
    x264_win32_cond_t *win32_cond = cond->Ptr;
274 275

    x264_pthread_mutex_lock( &win32_cond->mtx_broadcast );
276 277 278 279 280
    x264_pthread_mutex_lock( &win32_cond->mtx_waiter_count );
    int have_waiter = win32_cond->waiter_count;
    x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );

    if( have_waiter )
281
    {
282
        ReleaseSemaphore( win32_cond->semaphore, 1, NULL );
283 284 285 286
        WaitForSingleObject( win32_cond->waiters_done, INFINITE );
    }

    return x264_pthread_mutex_unlock( &win32_cond->mtx_broadcast );
287 288 289 290 291 292 293 294
}

int x264_pthread_cond_wait( x264_pthread_cond_t *cond, x264_pthread_mutex_t *mutex )
{
    if( thread_control.cond_wait )
        return !thread_control.cond_wait( cond, mutex, INFINITE );

    /* non native condition variables */
Henrik Gramner's avatar
Henrik Gramner committed
295
    x264_win32_cond_t *win32_cond = cond->Ptr;
296 297 298 299 300

    x264_pthread_mutex_lock( &win32_cond->mtx_broadcast );
    x264_pthread_mutex_lock( &win32_cond->mtx_waiter_count );
    win32_cond->waiter_count++;
    x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );
301
    x264_pthread_mutex_unlock( &win32_cond->mtx_broadcast );
302 303 304 305 306 307 308

    // unlock the external mutex
    x264_pthread_mutex_unlock( mutex );
    WaitForSingleObject( win32_cond->semaphore, INFINITE );

    x264_pthread_mutex_lock( &win32_cond->mtx_waiter_count );
    win32_cond->waiter_count--;
309
    int last_waiter = !win32_cond->waiter_count || !win32_cond->is_broadcast;
310 311 312 313 314 315 316 317 318 319 320 321
    x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );

    if( last_waiter )
        SetEvent( win32_cond->waiters_done );

    // lock the external mutex
    return x264_pthread_mutex_lock( mutex );
}

int x264_win32_threading_init( void )
{
    /* find function pointers to API functions, if they exist */
Henrik Gramner's avatar
Henrik Gramner committed
322
    HANDLE kernel_dll = GetModuleHandleW( L"kernel32.dll" );
323 324 325 326 327 328 329 330
    thread_control.cond_init = (void*)GetProcAddress( kernel_dll, "InitializeConditionVariable" );
    if( thread_control.cond_init )
    {
        /* we're on a windows 6.0+ kernel, acquire the rest of the functions */
        thread_control.cond_broadcast = (void*)GetProcAddress( kernel_dll, "WakeAllConditionVariable" );
        thread_control.cond_signal = (void*)GetProcAddress( kernel_dll, "WakeConditionVariable" );
        thread_control.cond_wait = (void*)GetProcAddress( kernel_dll, "SleepConditionVariableCS" );
    }
Henrik Gramner's avatar
Henrik Gramner committed
331
    return x264_pthread_mutex_init( &static_mutex, NULL );
332 333
}

334
int x264_pthread_num_processors_np( void )
335
{
336 337 338 339 340 341 342
    DWORD_PTR system_cpus, process_cpus = 0;
    int cpus = 0;

    /* GetProcessAffinityMask returns affinities of 0 when the process has threads in multiple processor groups.
     * On platforms that support processor grouping, use GetThreadGroupAffinity to get the current thread's affinity instead. */
#if ARCH_X86_64
    /* find function pointers to API functions specific to x86_64 platforms, if they exist */
Henrik Gramner's avatar
Henrik Gramner committed
343
    HANDLE kernel_dll = GetModuleHandleW( L"kernel32.dll" );
Henrik Gramner's avatar
Henrik Gramner committed
344
    BOOL (*get_thread_affinity)( HANDLE thread, void *group_affinity ) = (void*)GetProcAddress( kernel_dll, "GetThreadGroupAffinity" );
345
    if( get_thread_affinity )
346
    {
347
        /* running on a platform that supports >64 logical cpus */
Henrik Gramner's avatar
Henrik Gramner committed
348 349 350 351 352 353
        struct /* GROUP_AFFINITY */
        {
            ULONG_PTR mask; // KAFFINITY = ULONG_PTR
            USHORT group;
            USHORT reserved[3];
        } thread_affinity;
354 355
        if( get_thread_affinity( GetCurrentThread(), &thread_affinity ) )
            process_cpus = thread_affinity.mask;
356
    }
357 358 359 360 361 362 363
#endif
    if( !process_cpus )
        GetProcessAffinityMask( GetCurrentProcess(), &process_cpus, &system_cpus );
    for( DWORD_PTR bit = 1; bit; bit <<= 1 )
        cpus += !!(process_cpus & bit);

    return cpus ? cpus : 1;
364
}
Henrik Gramner's avatar
Henrik Gramner committed
365
#endif