Skip to content
Commits on Source (91)
......@@ -6,3 +6,4 @@ tags
.DS_Store
/tests/dav1d-test-data
*.snap
/tools/output/xxhash.h
......@@ -91,6 +91,7 @@ x86inc-check:
- git diff --exit-code x86inc/master:x86inc.asm src/ext/x86/x86inc.asm
allow_failure: true
build-debian:
extends: .debian-amd64-common
tags:
......@@ -138,6 +139,30 @@ build-debian-examples:
-Denable_examples=true
- ninja -C build
build-debian-no-tools:
extends: .debian-amd64-common
script:
- meson build --buildtype release
--werror
-Denable_tools=false
- ninja -C build
build-debian-bitdepth-8:
extends: .debian-amd64-common
script:
- meson build --buildtype release
--werror
-Dbitdepths=8
- ninja -C build
build-debian-bitdepth-16:
extends: .debian-amd64-common
script:
- meson build --buildtype release
--werror
-Dbitdepths=16
- ninja -C build
build-win32:
extends: .debian-amd64-common
script:
......@@ -290,7 +315,8 @@ build-debian-aarch64-clang-5:
build-macos:
stage: build
tags:
- macos
- amd64
- catalina
script:
- meson build --buildtype release
-Ddefault_library=both
......@@ -400,6 +426,8 @@ test-debian:
- ninja coverage-xml
- grep -Eo 'line-rate="[^"]+"' meson-logs/coverage.xml | head -n 1 |
grep -Eo '[0-9.]+' | awk '{ print "coverage:", $1 * 100 } '
- time meson test -v --suite testdata_seek-stress --test-args "--tilethreads 2 --framethreads 1 --pfthreads=2"
- time meson test -v --suite testdata_seek-stress --test-args "--tilethreads 2 --framethreads 2 --pfthreads=2"
coverage: '/^coverage: (\d+.\d+)$/'
artifacts:
expose_as: 'Coverage HTML report'
......@@ -452,7 +480,7 @@ test-debian-asan:
- ninja -C build
- cd build
- exit_code=0
- time meson test -v --setup=sanitizer --test-args "--cpumask 0" || exit_code=$((exit_code + $?))
- time meson test -v --setup=sanitizer --test-args "--cpumask 0" || exit_code=$((exit_code + $?))
- time meson test -v --setup=sanitizer --test-args "--cpumask 0xff" || exit_code=$((exit_code + $?))
- if [ $exit_code -ne 0 ]; then exit $exit_code; fi
......@@ -509,9 +537,14 @@ test-debian-tsan:
- ninja -C build
- cd build
- exit_code=0
- time meson test -v --setup=sanitizer --suite testdata-8 --suite testdata-10 --suite testdata-12 --test-args "--tilethreads 1 --framethreads 2" || exit_code=$((exit_code + $?))
- time meson test -v --setup=sanitizer --suite testdata-8 --suite testdata-10 --suite testdata-12 --test-args "--tilethreads 2 --framethreads 1" || exit_code=$((exit_code + $?))
- time meson test -v --setup=sanitizer --suite testdata-8 --suite testdata-10 --suite testdata-12 --test-args "--tilethreads 2 --framethreads 2" || exit_code=$((exit_code + $?))
- time meson test -v --setup=sanitizer --suite testdata-8 --suite testdata-10 --suite testdata-12 --test-args "--tilethreads 1 --framethreads 2 --pfthreads 1" || exit_code=$((exit_code + $?))
- time meson test -v --setup=sanitizer --suite testdata-8 --suite testdata-10 --suite testdata-12 --test-args "--tilethreads 2 --framethreads 1 --pfthreads 1" || exit_code=$((exit_code + $?))
- time meson test -v --setup=sanitizer --suite testdata-8 --suite testdata-10 --suite testdata-12 --test-args "--tilethreads 2 --framethreads 2 --pfthreads 1" || exit_code=$((exit_code + $?))
- time meson test -v --setup=sanitizer --suite testdata-8 --suite testdata-10 --suite testdata-12 --test-args "--tilethreads 2 --framethreads 1 --pfthreads 2" || exit_code=$((exit_code + $?))
- time meson test -v --setup=sanitizer --suite testdata-8 --suite testdata-10 --suite testdata-12 --test-args "--tilethreads 2 --framethreads 2 --pfthreads 2" || exit_code=$((exit_code + $?))
- time meson test -v --setup=sanitizer --suite testdata_seek-stress --test-args "--tilethreads 2 --framethreads 1 --pfthreads 2" || exit_code=$((exit_code + $?))
- time meson test -v --setup=sanitizer --suite testdata_seek-stress --test-args "--tilethreads 2 --framethreads 2 --pfthreads 2" || exit_code=$((exit_code + $?))
- time meson test -v --setup=sanitizer --suite oss-fuzz-asan --suite oss-fuzz-msan --suite oss-fuzz-ubsan || exit_code=$((exit_code + $?))
- if [ $exit_code -ne 0 ]; then exit $exit_code; fi
test-win64:
......
Changes for 0.8.1 'Eurasian hobby":
Changes for 0.8.2 'Eurasian hobby':
-----------------------------------
0.8.2 is a middle-size update of the 0.8.0 branch:
- ARM32 optimizations for ipred and itx in 10/12bits,
completing the 10b/12b work on ARM64 and ARM32
- Give the post-filters their own threads
- ARM64: rewrite the wiener functions
- Speed up coefficient decoding, 0.5%-3% global decoding gain
- x86 optimizations for CDEF_filter and wiener in 10/12bit
- x86: rewrite the SGR AVX2 asm
- x86: improve msac speed on SSE2+ machines
- ARM32: improve speed of ipred and warp
- ARM64: improve speed of ipred, cdef_dir, cdef_filter, warp_motion and itx16
- ARM32/64: improve speed of looprestoration
- Add seeking, pausing to the player
- Update the player for rendering of 10b/12b
- Misc speed improvements and fixes on all platforms
- Add a xxh3 muxer in the dav1d application
Changes for 0.8.1 'Eurasian hobby':
-----------------------------------
0.8.1 is a minor update on 0.8.0:
......@@ -10,7 +31,7 @@ Changes for 0.8.1 'Eurasian hobby":
- x86 optimizations for wiener in SSE2/SSSE3/AVX2
Changes for 0.8.0 'Eurasian hobby":
Changes for 0.8.0 'Eurasian hobby':
-----------------------------------
0.8.0 is a major update for dav1d:
......
......@@ -39,6 +39,11 @@
#include "dp_fifo.h"
#include "dp_renderer.h"
#define FRAME_OFFSET_TO_PTS(foff) \
(uint64_t)(((foff) * rd_ctx->spf) * 1000000000.0 + .5)
#define TS_TO_PTS(ts) \
(uint64_t)(((ts) * rd_ctx->timebase) * 1000000000.0 + .5)
// Selected renderer callbacks and cookie
static const Dav1dPlayRenderInfo *renderer_info = { NULL };
......@@ -59,27 +64,43 @@ typedef struct render_context
// Lock to protect access to the context structure
SDL_mutex *lock;
// Timestamp of previous decoded frame
int64_t last_pts;
// Timestamp of current decoded frame
int64_t current_pts;
// Timestamp of last displayed frame (in timebase unit)
int64_t last_ts;
// Timestamp of last decoded frame (in timebase unit)
int64_t current_ts;
// Ticks when last frame was received
uint32_t last_ticks;
// PTS time base
double timebase;
// Seconds per frame
double spf;
// Number of frames
uint32_t total;
// Fifo
Dav1dPlayPtrFifo *fifo;
// Custom SDL2 event type
uint32_t renderer_event_type;
// Custom SDL2 event types
uint32_t event_types;
// User pause state
uint8_t user_paused;
// Internal pause state
uint8_t paused;
// Start of internal pause state
uint32_t pause_start;
// Duration of internal pause state
uint32_t pause_time;
// Seek accumulator
int seek;
// Indicates if termination of the decoder thread was requested
uint8_t dec_should_terminate;
} Dav1dPlayRenderContext;
static void dp_settings_print_usage(const char *const app,
const char *const reason, ...)
const char *const reason, ...)
{
if (reason) {
va_list args;
......@@ -95,6 +116,7 @@ static void dp_settings_print_usage(const char *const app,
" --untimed/-u: ignore PTS, render as fast as possible\n"
" --framethreads $num: number of frame threads (default: 1)\n"
" --tilethreads $num: number of tile threads (default: 1)\n"
" --pfthreads $num: number of postfilter threads(default: 1)\n"
" --highquality: enable high quality rendering\n"
" --zerocopy/-z: enable zero copy upload path\n"
" --gpugrain/-g: enable GPU grain synthesis\n"
......@@ -115,7 +137,7 @@ static unsigned parse_unsigned(const char *const optarg, const int option,
}
static void dp_rd_ctx_parse_args(Dav1dPlayRenderContext *rd_ctx,
const int argc, char *const *const argv)
const int argc, char *const *const argv)
{
int o;
Dav1dPlaySettings *settings = &rd_ctx->settings;
......@@ -127,6 +149,7 @@ static void dp_rd_ctx_parse_args(Dav1dPlayRenderContext *rd_ctx,
enum {
ARG_FRAME_THREADS = 256,
ARG_TILE_THREADS,
ARG_POSTFILTER_THREADS,
ARG_HIGH_QUALITY,
};
......@@ -137,6 +160,7 @@ static void dp_rd_ctx_parse_args(Dav1dPlayRenderContext *rd_ctx,
{ "untimed", 0, NULL, 'u' },
{ "framethreads", 1, NULL, ARG_FRAME_THREADS },
{ "tilethreads", 1, NULL, ARG_TILE_THREADS },
{ "pfthreads", 1, NULL, ARG_POSTFILTER_THREADS },
{ "highquality", 0, NULL, ARG_HIGH_QUALITY },
{ "zerocopy", 0, NULL, 'z' },
{ "gpugrain", 0, NULL, 'g' },
......@@ -175,6 +199,10 @@ static void dp_rd_ctx_parse_args(Dav1dPlayRenderContext *rd_ctx,
lib_settings->n_tile_threads =
parse_unsigned(optarg, ARG_TILE_THREADS, argv[0]);
break;
case ARG_POSTFILTER_THREADS:
lib_settings->n_postfilter_threads =
parse_unsigned(optarg, ARG_POSTFILTER_THREADS, argv[0]);
break;
default:
dp_settings_print_usage(argv[0], NULL);
}
......@@ -213,16 +241,16 @@ static Dav1dPlayRenderContext *dp_rd_ctx_create(int argc, char **argv)
Dav1dPlayRenderContext *rd_ctx;
// Alloc
rd_ctx = malloc(sizeof(Dav1dPlayRenderContext));
rd_ctx = calloc(1, sizeof(Dav1dPlayRenderContext));
if (rd_ctx == NULL) {
return NULL;
}
// Register a custom event to notify our SDL main thread
// about new frames
rd_ctx->renderer_event_type = SDL_RegisterEvents(1);
if (rd_ctx->renderer_event_type == UINT32_MAX) {
fprintf(stderr, "Failure to create custom SDL event type!\n");
rd_ctx->event_types = SDL_RegisterEvents(3);
if (rd_ctx->event_types == UINT32_MAX) {
fprintf(stderr, "Failure to create custom SDL event types!\n");
free(rd_ctx);
return NULL;
}
......@@ -265,24 +293,17 @@ static Dav1dPlayRenderContext *dp_rd_ctx_create(int argc, char **argv)
return NULL;
}
rd_ctx->last_pts = 0;
rd_ctx->last_ticks = 0;
rd_ctx->current_pts = 0;
rd_ctx->timebase = 0;
rd_ctx->dec_should_terminate = 0;
return rd_ctx;
}
/**
* Notify about new available frame
* Notify about new event
*/
static void dp_rd_ctx_post_event(Dav1dPlayRenderContext *rd_ctx, uint32_t code)
static void dp_rd_ctx_post_event(Dav1dPlayRenderContext *rd_ctx, uint32_t type)
{
SDL_Event event;
SDL_zero(event);
event.type = rd_ctx->renderer_event_type;
event.user.code = code;
event.type = type;
SDL_PushEvent(&event);
}
......@@ -294,10 +315,137 @@ static void dp_rd_ctx_post_event(Dav1dPlayRenderContext *rd_ctx, uint32_t code)
* new picture.
*/
static void dp_rd_ctx_update_with_dav1d_picture(Dav1dPlayRenderContext *rd_ctx,
Dav1dPicture *dav1d_pic)
Dav1dPicture *dav1d_pic)
{
rd_ctx->current_ts = dav1d_pic->m.timestamp;
renderer_info->update_frame(rd_ctx->rd_priv, dav1d_pic, &rd_ctx->settings);
rd_ctx->current_pts = dav1d_pic->m.timestamp;
}
/**
* Toggle pause state
*/
static void dp_rd_ctx_toggle_pause(Dav1dPlayRenderContext *rd_ctx)
{
SDL_LockMutex(rd_ctx->lock);
rd_ctx->user_paused = !rd_ctx->user_paused;
if (rd_ctx->seek)
goto out;
rd_ctx->paused = rd_ctx->user_paused;
uint32_t now = SDL_GetTicks();
if (rd_ctx->paused)
rd_ctx->pause_start = now;
else {
rd_ctx->pause_time += now - rd_ctx->pause_start;
rd_ctx->pause_start = 0;
rd_ctx->last_ticks = now;
}
out:
SDL_UnlockMutex(rd_ctx->lock);
}
/**
* Query pause state
*/
static int dp_rd_ctx_is_paused(Dav1dPlayRenderContext *rd_ctx)
{
int ret;
SDL_LockMutex(rd_ctx->lock);
ret = rd_ctx->paused;
SDL_UnlockMutex(rd_ctx->lock);
return ret;
}
/**
* Request seeking, in seconds
*/
static void dp_rd_ctx_seek(Dav1dPlayRenderContext *rd_ctx, int sec)
{
SDL_LockMutex(rd_ctx->lock);
rd_ctx->seek += sec;
if (!rd_ctx->paused)
rd_ctx->pause_start = SDL_GetTicks();
rd_ctx->paused = 1;
SDL_UnlockMutex(rd_ctx->lock);
}
static int decode_frame(Dav1dPicture **p, Dav1dContext *c,
Dav1dData *data, DemuxerContext *in_ctx);
static inline void destroy_pic(void *a);
/**
* Seek the stream, if requested
*/
static int dp_rd_ctx_handle_seek(Dav1dPlayRenderContext *rd_ctx,
DemuxerContext *in_ctx,
Dav1dContext *c, Dav1dData *data)
{
int res = 0;
SDL_LockMutex(rd_ctx->lock);
if (!rd_ctx->seek)
goto out;
int64_t seek = rd_ctx->seek * 1000000000ULL;
uint64_t pts = TS_TO_PTS(rd_ctx->current_ts);
pts = ((int64_t)pts > -seek) ? pts + seek : 0;
int end = pts >= FRAME_OFFSET_TO_PTS(rd_ctx->total);
if (end)
pts = FRAME_OFFSET_TO_PTS(rd_ctx->total - 1);
uint64_t target_pts = pts;
dav1d_flush(c);
uint64_t shift = FRAME_OFFSET_TO_PTS(5);
while (1) {
if (shift > pts)
shift = pts;
if ((res = input_seek(in_ctx, pts - shift)))
goto out;
Dav1dSequenceHeader seq;
uint64_t cur_pts;
do {
if ((res = input_read(in_ctx, data)))
break;
cur_pts = TS_TO_PTS(data->m.timestamp);
res = dav1d_parse_sequence_header(&seq, data->data, data->sz);
} while (res && cur_pts < pts);
if (!res && cur_pts <= pts)
break;
if (shift > pts)
shift = pts;
pts -= shift;
}
if (!res) {
pts = TS_TO_PTS(data->m.timestamp);
while (pts < target_pts) {
Dav1dPicture *p;
if ((res = decode_frame(&p, c, data, in_ctx)))
break;
if (p) {
pts = TS_TO_PTS(p->m.timestamp);
if (pts < target_pts)
destroy_pic(p);
else {
dp_fifo_push(rd_ctx->fifo, p);
uint32_t type = rd_ctx->event_types + DAV1D_EVENT_SEEK_FRAME;
dp_rd_ctx_post_event(rd_ctx, type);
}
}
}
if (!res) {
rd_ctx->last_ts = data->m.timestamp - rd_ctx->spf / rd_ctx->timebase;
rd_ctx->current_ts = data->m.timestamp;
}
}
out:
rd_ctx->paused = rd_ctx->user_paused;
if (!rd_ctx->paused && rd_ctx->seek) {
uint32_t now = SDL_GetTicks();
rd_ctx->pause_time += now - rd_ctx->pause_start;
rd_ctx->pause_start = 0;
rd_ctx->last_ticks = now;
}
rd_ctx->seek = 0;
SDL_UnlockMutex(rd_ctx->lock);
if (res)
fprintf(stderr, "Error seeking, aborting\n");
return res;
}
/**
......@@ -329,14 +477,15 @@ static int dp_rd_ctx_should_terminate(Dav1dPlayRenderContext *rd_ctx)
*/
static void dp_rd_ctx_render(Dav1dPlayRenderContext *rd_ctx)
{
SDL_LockMutex(rd_ctx->lock);
// Calculate time since last frame was received
uint32_t ticks_now = SDL_GetTicks();
uint32_t ticks_diff = (rd_ctx->last_ticks != 0) ? ticks_now - rd_ctx->last_ticks : 0;
// Calculate when to display the frame
int64_t pts_diff = rd_ctx->current_pts - rd_ctx->last_pts;
int32_t wait_time = (pts_diff * rd_ctx->timebase) * 1000 - ticks_diff;
rd_ctx->last_pts = rd_ctx->current_pts;
int64_t ts_diff = rd_ctx->current_ts - rd_ctx->last_ts;
int32_t pts_diff = (ts_diff * rd_ctx->timebase) * 1000.0 + .5;
int32_t wait_time = pts_diff - ticks_diff;
// In untimed mode, simply don't wait
if (rd_ctx->settings.untimed)
......@@ -347,13 +496,59 @@ static void dp_rd_ctx_render(Dav1dPlayRenderContext *rd_ctx)
// accurate player this would need to be done in a better way.
if (wait_time > 0) {
SDL_Delay(wait_time);
} else if (wait_time < -10) { // Do not warn for minor time drifts
fprintf(stderr, "Frame displayed %f seconds too late\n", wait_time/(float)1000);
} else if (wait_time < -10 && !rd_ctx->paused) { // Do not warn for minor time drifts
fprintf(stderr, "Frame displayed %f seconds too late\n", wait_time / 1000.0);
}
renderer_info->render(rd_ctx->rd_priv, &rd_ctx->settings);
rd_ctx->last_ts = rd_ctx->current_ts;
rd_ctx->last_ticks = SDL_GetTicks();
SDL_UnlockMutex(rd_ctx->lock);
}
static int decode_frame(Dav1dPicture **p, Dav1dContext *c,
Dav1dData *data, DemuxerContext *in_ctx)
{
int res;
// Send data packets we got from the demuxer to dav1d
if ((res = dav1d_send_data(c, data)) < 0) {
// On EAGAIN, dav1d can not consume more data and
// dav1d_get_picture needs to be called first, which
// will happen below, so just keep going in that case
// and do not error out.
if (res != DAV1D_ERR(EAGAIN)) {
dav1d_data_unref(data);
goto err;
}
}
*p = calloc(1, sizeof(**p));
// Try to get a decoded frame
if ((res = dav1d_get_picture(c, *p)) < 0) {
// In all error cases, even EAGAIN, p needs to be freed as
// it is never added to the queue and would leak.
free(*p);
*p = NULL;
// On EAGAIN, it means dav1d has not enough data to decode
// therefore this is not a decoding error but just means
// we need to feed it more data, which happens in the next
// run of the decoder loop.
if (res != DAV1D_ERR(EAGAIN))
goto err;
}
return data->sz == 0 ? input_read(in_ctx, data) : 0;
err:
fprintf(stderr, "Error decoding frame: %s\n",
strerror(-res));
return res;
}
static inline void destroy_pic(void *a)
{
Dav1dPicture *p = (Dav1dPicture *)a;
dav1d_picture_unref(p);
free(p);
}
/* Decoder thread "main" function */
......@@ -366,10 +561,7 @@ static int decoder_thread_main(void *cookie)
Dav1dData data;
DemuxerContext *in_ctx = NULL;
int res = 0;
unsigned n_out = 0, total, timebase[2], fps[2];
// Store current ticks for stats calculation
uint32_t decoder_start = SDL_GetTicks();
unsigned total, timebase[2], fps[2];
Dav1dPlaySettings settings = rd_ctx->settings;
......@@ -382,8 +574,9 @@ static int decoder_thread_main(void *cookie)
goto cleanup;
}
double timebase_d = timebase[1]/(double)timebase[0];
rd_ctx->timebase = timebase_d;
rd_ctx->timebase = (double)timebase[1] / timebase[0];
rd_ctx->spf = (double)fps[1] / fps[0];
rd_ctx->total = total;
if ((res = dav1d_open(&c, &rd_ctx->lib_settings))) {
fprintf(stderr, "Failed opening dav1d decoder\n");
......@@ -398,55 +591,29 @@ static int decoder_thread_main(void *cookie)
}
// Decoder loop
do {
if (dp_rd_ctx_should_terminate(rd_ctx))
while (1) {
if (dp_rd_ctx_should_terminate(rd_ctx) ||
(res = dp_rd_ctx_handle_seek(rd_ctx, in_ctx, c, &data)) ||
(res = decode_frame(&p, c, &data, in_ctx)))
{
break;
// Send data packets we got from the demuxer to dav1d
if ((res = dav1d_send_data(c, &data)) < 0) {
// On EAGAIN, dav1d can not consume more data and
// dav1d_get_picture needs to be called first, which
// will happen below, so just keep going in that case
// and do not error out.
if (res != DAV1D_ERR(EAGAIN)) {
dav1d_data_unref(&data);
fprintf(stderr, "Error decoding frame: %s\n",
strerror(-res));
break;
}
}
p = calloc(1, sizeof(*p));
// Try to get a decoded frame
if ((res = dav1d_get_picture(c, p)) < 0) {
// In all error cases, even EAGAIN, p needs to be freed as
// it is never added to the queue and would leak.
free(p);
// On EAGAIN, it means dav1d has not enough data to decode
// therefore this is not a decoding error but just means
// we need to feed it more data, which happens in the next
// run of this decoder loop.
if (res != DAV1D_ERR(EAGAIN)) {
fprintf(stderr, "Error decoding frame: %s\n",
strerror(-res));
break;
}
res = 0;
} else {
else if (p) {
// Queue frame
dp_fifo_push(rd_ctx->fifo, p);
dp_rd_ctx_post_event(rd_ctx, DAV1D_EVENT_NEW_FRAME);
n_out++;
SDL_LockMutex(rd_ctx->lock);
int seek = rd_ctx->seek;
SDL_UnlockMutex(rd_ctx->lock);
if (!seek) {
dp_fifo_push(rd_ctx->fifo, p);
uint32_t type = rd_ctx->event_types + DAV1D_EVENT_NEW_FRAME;
dp_rd_ctx_post_event(rd_ctx, type);
}
}
} while ((data.sz > 0 || !input_read(in_ctx, &data)));
}
// Release remaining data
if (data.sz > 0) dav1d_data_unref(&data);
if (data.sz > 0)
dav1d_data_unref(&data);
// Do not drain in case an error occured and caused us to leave the
// decoding loop early.
if (res < 0)
......@@ -461,7 +628,6 @@ static int decoder_thread_main(void *cookie)
do {
if (dp_rd_ctx_should_terminate(rd_ctx))
break;
p = calloc(1, sizeof(*p));
res = dav1d_get_picture(c, p);
if (res < 0) {
......@@ -474,19 +640,13 @@ static int decoder_thread_main(void *cookie)
} else {
// Queue frame
dp_fifo_push(rd_ctx->fifo, p);
dp_rd_ctx_post_event(rd_ctx, DAV1D_EVENT_NEW_FRAME);
n_out++;
uint32_t type = rd_ctx->event_types + DAV1D_EVENT_NEW_FRAME;
dp_rd_ctx_post_event(rd_ctx, type);
}
} while (res != DAV1D_ERR(EAGAIN));
// Print stats
uint32_t decoding_time_ms = SDL_GetTicks() - decoder_start;
printf("Decoded %u frames in %d seconds, avg %.02f fps\n",
n_out, decoding_time_ms/1000, n_out / (decoding_time_ms / 1000.0));
cleanup:
dp_rd_ctx_post_event(rd_ctx, DAV1D_EVENT_DEC_QUIT);
dp_rd_ctx_post_event(rd_ctx, rd_ctx->event_types + DAV1D_EVENT_DEC_QUIT);
if (in_ctx)
input_close(in_ctx);
......@@ -543,41 +703,84 @@ int main(int argc, char **argv)
decoder_thread = SDL_CreateThread(decoder_thread_main, "Decoder thread", rd_ctx);
// Main loop
#define NUM_MAX_EVENTS 8
SDL_Event events[NUM_MAX_EVENTS];
int num_frame_events = 0;
uint32_t start_time = 0, n_out = 0;
while (1) {
SDL_Event e;
if (SDL_WaitEvent(&e)) {
if (e.type == SDL_QUIT) {
int num_events = 0;
SDL_WaitEvent(NULL);
while (num_events < NUM_MAX_EVENTS && SDL_PollEvent(&events[num_events++]))
break;
for (int i = 0; i < num_events; ++i) {
SDL_Event *e = &events[i];
if (e->type == SDL_QUIT) {
dp_rd_ctx_request_shutdown(rd_ctx);
} else if (e.type == SDL_WINDOWEVENT) {
if (e.window.event == SDL_WINDOWEVENT_SIZE_CHANGED) {
dp_fifo_flush(rd_ctx->fifo, destroy_pic);
SDL_FlushEvent(rd_ctx->event_types + DAV1D_EVENT_NEW_FRAME);
SDL_FlushEvent(rd_ctx->event_types + DAV1D_EVENT_SEEK_FRAME);
num_frame_events = 0;
} else if (e->type == SDL_WINDOWEVENT) {
if (e->window.event == SDL_WINDOWEVENT_SIZE_CHANGED) {
// TODO: Handle window resizes
} else if(e->window.event == SDL_WINDOWEVENT_EXPOSED) {
dp_rd_ctx_render(rd_ctx);
}
} else if (e.type == rd_ctx->renderer_event_type) {
if (e.user.code == DAV1D_EVENT_NEW_FRAME) {
// Dequeue frame and update the render context with it
Dav1dPicture *p = dp_fifo_shift(rd_ctx->fifo);
// Do not update textures during termination
if (!dp_rd_ctx_should_terminate(rd_ctx))
dp_rd_ctx_update_with_dav1d_picture(rd_ctx, p);
dav1d_picture_unref(p);
free(p);
} else if (e.user.code == DAV1D_EVENT_DEC_QUIT) {
break;
} else if (e->type == SDL_KEYDOWN) {
SDL_KeyboardEvent *kbde = (SDL_KeyboardEvent *)e;
if (kbde->keysym.sym == SDLK_SPACE) {
dp_rd_ctx_toggle_pause(rd_ctx);
} else if (kbde->keysym.sym == SDLK_LEFT ||
kbde->keysym.sym == SDLK_RIGHT)
{
if (kbde->keysym.sym == SDLK_LEFT)
dp_rd_ctx_seek(rd_ctx, -5);
else if (kbde->keysym.sym == SDLK_RIGHT)
dp_rd_ctx_seek(rd_ctx, +5);
dp_fifo_flush(rd_ctx->fifo, destroy_pic);
SDL_FlushEvent(rd_ctx->event_types + DAV1D_EVENT_NEW_FRAME);
num_frame_events = 0;
}
} else if (e->type == rd_ctx->event_types + DAV1D_EVENT_NEW_FRAME) {
num_frame_events++;
// Store current ticks for stats calculation
if (start_time == 0)
start_time = SDL_GetTicks();
} else if (e->type == rd_ctx->event_types + DAV1D_EVENT_SEEK_FRAME) {
// Dequeue frame and update the render context with it
Dav1dPicture *p = dp_fifo_shift(rd_ctx->fifo);
// Do not update textures during termination
if (!dp_rd_ctx_should_terminate(rd_ctx)) {
dp_rd_ctx_update_with_dav1d_picture(rd_ctx, p);
n_out++;
}
destroy_pic(p);
} else if (e->type == rd_ctx->event_types + DAV1D_EVENT_DEC_QUIT) {
goto out;
}
}
// Do not render during termination
if (!dp_rd_ctx_should_terminate(rd_ctx))
dp_rd_ctx_render(rd_ctx);
if (num_frame_events && !dp_rd_ctx_is_paused(rd_ctx)) {
// Dequeue frame and update the render context with it
Dav1dPicture *p = dp_fifo_shift(rd_ctx->fifo);
// Do not update textures during termination
if (!dp_rd_ctx_should_terminate(rd_ctx)) {
dp_rd_ctx_update_with_dav1d_picture(rd_ctx, p);
dp_rd_ctx_render(rd_ctx);
n_out++;
}
destroy_pic(p);
num_frame_events--;
}
}
out:;
// Print stats
uint32_t time_ms = SDL_GetTicks() - start_time - rd_ctx->pause_time;
printf("Decoded %u frames in %d seconds, avg %.02f fps\n",
n_out, time_ms / 1000, n_out/ (time_ms / 1000.0));
int decoder_ret = 0;
SDL_WaitThread(decoder_thread, &decoder_ret);
dp_rd_ctx_destroy(rd_ctx);
return decoder_ret;
}
......@@ -37,6 +37,8 @@ struct dp_fifo
size_t capacity;
size_t count;
void **entries;
int push_wait;
int flush;
};
......@@ -54,6 +56,8 @@ Dav1dPlayPtrFifo *dp_fifo_create(size_t capacity)
fifo->capacity = capacity;
fifo->count = 0;
fifo->push_wait = 0;
fifo->flush = 0;
fifo->lock = SDL_CreateMutex();
if (fifo->lock == NULL) {
......@@ -90,8 +94,16 @@ void dp_fifo_destroy(Dav1dPlayPtrFifo *fifo)
void dp_fifo_push(Dav1dPlayPtrFifo *fifo, void *element)
{
SDL_LockMutex(fifo->lock);
while (fifo->count == fifo->capacity)
while (fifo->count == fifo->capacity) {
fifo->push_wait = 1;
SDL_CondWait(fifo->cond_change, fifo->lock);
fifo->push_wait = 0;
if (fifo->flush) {
SDL_CondSignal(fifo->cond_change);
SDL_UnlockMutex(fifo->lock);
return;
}
}
fifo->entries[fifo->count++] = element;
if (fifo->count == 1)
SDL_CondSignal(fifo->cond_change);
......@@ -120,4 +132,16 @@ void *dp_fifo_shift(Dav1dPlayPtrFifo *fifo)
return res;
}
void dp_fifo_flush(Dav1dPlayPtrFifo *fifo, void (*destroy_elem)(void *))
{
SDL_LockMutex(fifo->lock);
fifo->flush = 1;
if (fifo->push_wait) {
SDL_CondSignal(fifo->cond_change);
SDL_CondWait(fifo->cond_change, fifo->lock);
}
while (fifo->count)
destroy_elem(fifo->entries[--fifo->count]);
fifo->flush = 0;
SDL_UnlockMutex(fifo->lock);
}
......@@ -59,3 +59,5 @@ void *dp_fifo_shift(Dav1dPlayPtrFifo *fifo);
* other thread will call dp_fifo_shift will lead to a deadlock.
*/
void dp_fifo_push(Dav1dPlayPtrFifo *fifo, void *element);
void dp_fifo_flush(Dav1dPlayPtrFifo *fifo, void (*destroy_elem)(void *));
......@@ -66,8 +66,11 @@ typedef struct {
#define WINDOW_WIDTH 910
#define WINDOW_HEIGHT 512
#define DAV1D_EVENT_NEW_FRAME 1
#define DAV1D_EVENT_DEC_QUIT 2
enum {
DAV1D_EVENT_NEW_FRAME,
DAV1D_EVENT_SEEK_FRAME,
DAV1D_EVENT_DEC_QUIT
};
/**
* Renderer info
......@@ -84,7 +87,7 @@ typedef struct rdr_info
void (*destroy_renderer)(void *cookie);
// Callback to the render function that renders a prevously sent frame
void (*render)(void *cookie, const Dav1dPlaySettings *settings);
// Callback to the send frame function
// Callback to the send frame function, _may_ also unref dav1d_pic!
int (*update_frame)(void *cookie, Dav1dPicture *dav1d_pic,
const Dav1dPlaySettings *settings);
// Callback for alloc/release pictures (optional)
......
......@@ -30,7 +30,7 @@
#include <assert.h>
#include <libplacebo/renderer.h>
#include <libplacebo/utils/upload.h>
#include <libplacebo/utils/dav1d.h>
#ifdef HAVE_PLACEBO_VULKAN
# include <libplacebo/vulkan.h>
......@@ -72,7 +72,7 @@ typedef struct renderer_priv_ctx
// Lock protecting access to the texture
SDL_mutex *lock;
// Image to render, and planes backing them
struct pl_image image;
struct pl_frame image;
const struct pl_tex *plane_tex[3];
} Dav1dPlayRendererPrivateContext;
......@@ -319,22 +319,15 @@ static void placebo_render(void *cookie, const Dav1dPlaySettings *settings)
if (settings->highquality)
render_params = pl_render_default_params;
struct pl_render_target target;
pl_render_target_from_swapchain(&target, &frame);
target.profile = (struct pl_icc_profile) {
.data = NULL,
.len = 0,
};
#if PL_API_VER >= 66
pl_rect2df_aspect_copy(&target.dst_rect, &rd_priv_ctx->image.src_rect, 0.0);
if (pl_render_target_partial(&target))
pl_tex_clear(rd_priv_ctx->gpu, target.fbo, (float[4]){ 0.0 });
#endif
struct pl_frame target;
pl_frame_from_swapchain(&target, &frame);
pl_rect2df_aspect_copy(&target.crop, &rd_priv_ctx->image.crop, 0.0);
if (pl_frame_is_cropped(&target))
pl_tex_clear(rd_priv_ctx->gpu, frame.fbo, (float[4]){ 0.0 });
if (!pl_render_image(rd_priv_ctx->renderer, &rd_priv_ctx->image, &target, &render_params)) {
fprintf(stderr, "Failed rendering frame!\n");
pl_tex_clear(rd_priv_ctx->gpu, target.fbo, (float[4]){ 1.0 });
pl_tex_clear(rd_priv_ctx->gpu, frame.fbo, (float[4]){ 1.0 });
}
ok = pl_swapchain_submit_frame(rd_priv_ctx->swapchain);
......@@ -351,320 +344,37 @@ static void placebo_render(void *cookie, const Dav1dPlaySettings *settings)
static int placebo_upload_image(void *cookie, Dav1dPicture *dav1d_pic,
const Dav1dPlaySettings *settings)
{
Dav1dPlayRendererPrivateContext *rd_priv_ctx = cookie;
assert(rd_priv_ctx != NULL);
SDL_LockMutex(rd_priv_ctx->lock);
if (dav1d_pic == NULL) {
SDL_UnlockMutex(rd_priv_ctx->lock);
return 0;
}
int width = dav1d_pic->p.w;
int height = dav1d_pic->p.h;
int sub_x = 0, sub_y = 0;
int bytes = (dav1d_pic->p.bpc + 7) / 8; // rounded up
enum pl_chroma_location chroma_loc = PL_CHROMA_UNKNOWN;
struct pl_image *image = &rd_priv_ctx->image;
*image = (struct pl_image) {
.num_planes = 3,
.width = width,
.height = height,
.src_rect = {0, 0, width, height},
.repr = {
.bits = {
.sample_depth = bytes * 8,
.color_depth = dav1d_pic->p.bpc,
},
},
Dav1dPlayRendererPrivateContext *p = cookie;
assert(p != NULL);
int ret = 0;
if (!dav1d_pic)
return ret;
struct pl_dav1d_upload_params params = {
.picture = dav1d_pic,
.film_grain = settings->gpugrain,
.gpu_allocated = settings->zerocopy,
.asynchronous = true,
};
// Figure out the correct plane dimensions/count
switch (dav1d_pic->p.layout) {
case DAV1D_PIXEL_LAYOUT_I400:
image->num_planes = 1;
break;
case DAV1D_PIXEL_LAYOUT_I420:
sub_x = sub_y = 1;
break;
case DAV1D_PIXEL_LAYOUT_I422:
sub_x = 1;
break;
case DAV1D_PIXEL_LAYOUT_I444:
break;
}
// Set the right colorspace metadata etc.
switch (dav1d_pic->seq_hdr->pri) {
case DAV1D_COLOR_PRI_UNKNOWN: image->color.primaries = PL_COLOR_PRIM_UNKNOWN; break;
case DAV1D_COLOR_PRI_BT709: image->color.primaries = PL_COLOR_PRIM_BT_709; break;
case DAV1D_COLOR_PRI_BT470M: image->color.primaries = PL_COLOR_PRIM_BT_470M; break;
case DAV1D_COLOR_PRI_BT470BG: image->color.primaries = PL_COLOR_PRIM_BT_601_625; break;
case DAV1D_COLOR_PRI_BT601: image->color.primaries = PL_COLOR_PRIM_BT_601_625; break;
case DAV1D_COLOR_PRI_BT2020: image->color.primaries = PL_COLOR_PRIM_BT_2020; break;
case DAV1D_COLOR_PRI_XYZ:
// Handled below
assert(dav1d_pic->seq_hdr->mtrx == DAV1D_MC_IDENTITY);
break;
default:
printf("warning: unknown dav1d color primaries %d.. ignoring, picture "
"may be very incorrect\n", dav1d_pic->seq_hdr->pri);
break;
}
switch (dav1d_pic->seq_hdr->trc) {
case DAV1D_TRC_BT709:
case DAV1D_TRC_BT470M:
case DAV1D_TRC_BT470BG:
case DAV1D_TRC_BT601:
case DAV1D_TRC_SMPTE240:
case DAV1D_TRC_BT2020_10BIT:
case DAV1D_TRC_BT2020_12BIT:
// These all map to the effective "SDR" CRT-based EOTF, BT.1886
image->color.transfer = PL_COLOR_TRC_BT_1886;
break;
case DAV1D_TRC_UNKNOWN: image->color.transfer = PL_COLOR_TRC_UNKNOWN; break;
case DAV1D_TRC_LINEAR: image->color.transfer = PL_COLOR_TRC_LINEAR; break;
case DAV1D_TRC_SRGB: image->color.transfer = PL_COLOR_TRC_SRGB; break;
case DAV1D_TRC_SMPTE2084: image->color.transfer = PL_COLOR_TRC_PQ; break;
case DAV1D_TRC_HLG: image->color.transfer = PL_COLOR_TRC_HLG; break;
default:
printf("warning: unknown dav1d color transfer %d.. ignoring, picture "
"may be very incorrect\n", dav1d_pic->seq_hdr->trc);
break;
}
switch (dav1d_pic->seq_hdr->mtrx) {
case DAV1D_MC_IDENTITY:
// This is going to be either RGB or XYZ
if (dav1d_pic->seq_hdr->pri == DAV1D_COLOR_PRI_XYZ) {
image->repr.sys = PL_COLOR_SYSTEM_XYZ;
} else {
image->repr.sys = PL_COLOR_SYSTEM_RGB;
}
break;
case DAV1D_MC_UNKNOWN:
// PL_COLOR_SYSTEM_UNKNOWN maps to RGB, so hard-code this one
image->repr.sys = pl_color_system_guess_ycbcr(width, height);
break;
case DAV1D_MC_BT709: image->repr.sys = PL_COLOR_SYSTEM_BT_709; break;
case DAV1D_MC_BT601: image->repr.sys = PL_COLOR_SYSTEM_BT_601; break;
case DAV1D_MC_SMPTE240: image->repr.sys = PL_COLOR_SYSTEM_SMPTE_240M; break;
case DAV1D_MC_SMPTE_YCGCO: image->repr.sys = PL_COLOR_SYSTEM_YCGCO; break;
case DAV1D_MC_BT2020_NCL: image->repr.sys = PL_COLOR_SYSTEM_BT_2020_NC; break;
case DAV1D_MC_BT2020_CL: image->repr.sys = PL_COLOR_SYSTEM_BT_2020_C; break;
case DAV1D_MC_ICTCP:
// This one is split up based on the actual HDR curve in use
if (dav1d_pic->seq_hdr->trc == DAV1D_TRC_HLG) {
image->repr.sys = PL_COLOR_SYSTEM_BT_2100_HLG;
} else {
image->repr.sys = PL_COLOR_SYSTEM_BT_2100_PQ;
}
break;
default:
printf("warning: unknown dav1d color matrix %d.. ignoring, picture "
"may be very incorrect\n", dav1d_pic->seq_hdr->mtrx);
break;
}
if (dav1d_pic->seq_hdr->color_range) {
image->repr.levels = PL_COLOR_LEVELS_PC;
} else {
image->repr.levels = PL_COLOR_LEVELS_TV;
}
switch (dav1d_pic->seq_hdr->chr) {
case DAV1D_CHR_UNKNOWN: chroma_loc = PL_CHROMA_UNKNOWN; break;
case DAV1D_CHR_VERTICAL: chroma_loc = PL_CHROMA_LEFT; break;
case DAV1D_CHR_COLOCATED: chroma_loc = PL_CHROMA_TOP_LEFT; break;
}
#if PL_API_VER >= 63
if (settings->gpugrain && dav1d_pic->frame_hdr->film_grain.present) {
Dav1dFilmGrainData *src = &dav1d_pic->frame_hdr->film_grain.data;
struct pl_av1_grain_data *dst = &image->av1_grain;
*dst = (struct pl_av1_grain_data) {
.grain_seed = src->seed,
.num_points_y = src->num_y_points,
.chroma_scaling_from_luma = src->chroma_scaling_from_luma,
.num_points_uv = { src->num_uv_points[0], src->num_uv_points[1] },
.scaling_shift = src->scaling_shift,
.ar_coeff_lag = src->ar_coeff_lag,
.ar_coeff_shift = (int)src->ar_coeff_shift,
.grain_scale_shift = src->grain_scale_shift,
.uv_mult = { src->uv_mult[0], src->uv_mult[1] },
.uv_mult_luma = { src->uv_luma_mult[0], src->uv_luma_mult[1] },
.uv_offset = { src->uv_offset[0], src->uv_offset[1] },
.overlap = src->overlap_flag,
};
assert(sizeof(dst->points_y) == sizeof(src->y_points));
assert(sizeof(dst->points_uv) == sizeof(src->uv_points));
assert(sizeof(dst->ar_coeffs_y) == sizeof(src->ar_coeffs_y));
memcpy(dst->points_y, src->y_points, sizeof(src->y_points));
memcpy(dst->points_uv, src->uv_points, sizeof(src->uv_points));
memcpy(dst->ar_coeffs_y, src->ar_coeffs_y, sizeof(src->ar_coeffs_y));
// this one has different row sizes for alignment
for (int c = 0; c < 2; c++) {
for (int i = 0; i < 25; i++)
dst->ar_coeffs_uv[c][i] = src->ar_coeffs_uv[c][i];
}
}
#endif
// Upload the actual planes
struct pl_plane_data data[3] = {
{
// Y plane
.type = PL_FMT_UNORM,
.width = width,
.height = height,
.pixel_stride = bytes,
.row_stride = dav1d_pic->stride[0],
.component_size = {bytes * 8},
.component_map = {0},
}, {
// U plane
.type = PL_FMT_UNORM,
.width = width >> sub_x,
.height = height >> sub_y,
.pixel_stride = bytes,
.row_stride = dav1d_pic->stride[1],
.component_size = {bytes * 8},
.component_map = {1},
}, {
// V plane
.type = PL_FMT_UNORM,
.width = width >> sub_x,
.height = height >> sub_y,
.pixel_stride = bytes,
.row_stride = dav1d_pic->stride[1],
.component_size = {bytes * 8},
.component_map = {2},
},
};
bool ok = true;
for (int i = 0; i < image->num_planes; i++) {
if (settings->zerocopy) {
const struct pl_buf *buf = dav1d_pic->allocator_data;
assert(buf);
data[i].buf = buf;
data[i].buf_offset = (uintptr_t) dav1d_pic->data[i] - (uintptr_t) buf->data;
} else {
data[i].pixels = dav1d_pic->data[i];
}
ok &= pl_upload_plane(rd_priv_ctx->gpu, &image->planes[i], &rd_priv_ctx->plane_tex[i], &data[i]);
}
// Apply the correct chroma plane shift. This has to be done after pl_upload_plane
#if PL_API_VER >= 67
pl_image_set_chroma_location(image, chroma_loc);
#else
pl_chroma_location_offset(chroma_loc, &image->planes[1].shift_x, &image->planes[1].shift_y);
pl_chroma_location_offset(chroma_loc, &image->planes[2].shift_x, &image->planes[2].shift_y);
#endif
if (!ok) {
SDL_LockMutex(p->lock);
if (!pl_upload_dav1dpicture(p->gpu, &p->image, p->plane_tex, &params)) {
fprintf(stderr, "Failed uploading planes!\n");
*image = (struct pl_image) {0};
p->image = (struct pl_frame) {0};
ret = -1;
}
SDL_UnlockMutex(rd_priv_ctx->lock);
return !ok;
SDL_UnlockMutex(p->lock);
return ret;
}
// Align to power of 2
#define ALIGN2(x, align) (((x) + (align) - 1) & ~((align) - 1))
static int placebo_alloc_pic(Dav1dPicture *const p, void *cookie)
static int placebo_alloc_pic(Dav1dPicture *const pic, void *cookie)
{
Dav1dPlayRendererPrivateContext *rd_priv_ctx = cookie;
assert(rd_priv_ctx != NULL);
SDL_LockMutex(rd_priv_ctx->lock);
const struct pl_gpu *gpu = rd_priv_ctx->gpu;
int ret = DAV1D_ERR(ENOMEM);
// Copied from dav1d_default_picture_alloc
const int hbd = p->p.bpc > 8;
const int aligned_w = ALIGN2(p->p.w, 128);
const int aligned_h = ALIGN2(p->p.h, 128);
const int has_chroma = p->p.layout != DAV1D_PIXEL_LAYOUT_I400;
const int ss_ver = p->p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = p->p.layout != DAV1D_PIXEL_LAYOUT_I444;
p->stride[0] = aligned_w << hbd;
p->stride[1] = has_chroma ? (aligned_w >> ss_hor) << hbd : 0;
// Align strides up to multiples of the GPU performance hints
p->stride[0] = ALIGN2(p->stride[0], gpu->limits.align_tex_xfer_stride);
p->stride[1] = ALIGN2(p->stride[1], gpu->limits.align_tex_xfer_stride);
// Aligning offsets to 4 also implicity aligns to the texel size (1 or 2)
size_t off_align = ALIGN2(gpu->limits.align_tex_xfer_offset, 4);
const size_t y_sz = ALIGN2(p->stride[0] * aligned_h, off_align);
const size_t uv_sz = ALIGN2(p->stride[1] * (aligned_h >> ss_ver), off_align);
// The extra DAV1D_PICTURE_ALIGNMENTs are to brute force plane alignment,
// even in the case that the driver gives us insane alignments
const size_t pic_size = y_sz + 2 * uv_sz;
const size_t total_size = pic_size + DAV1D_PICTURE_ALIGNMENT * 4;
// Validate size limitations
if (total_size > gpu->limits.max_xfer_size) {
printf("alloc of %zu bytes exceeds limits\n", total_size);
goto err;
}
const struct pl_buf *buf = pl_buf_create(gpu, &(struct pl_buf_params) {
.type = PL_BUF_TEX_TRANSFER,
.host_mapped = true,
.size = total_size,
.memory_type = PL_BUF_MEM_HOST,
.user_data = p,
});
if (!buf) {
printf("alloc of GPU mapped buffer failed\n");
goto err;
}
assert(buf->data);
uintptr_t base = (uintptr_t) buf->data, data[3];
data[0] = ALIGN2(base, DAV1D_PICTURE_ALIGNMENT);
data[1] = ALIGN2(data[0] + y_sz, DAV1D_PICTURE_ALIGNMENT);
data[2] = ALIGN2(data[1] + uv_sz, DAV1D_PICTURE_ALIGNMENT);
// Sanity check offset alignment for the sake of debugging
if (data[0] - base != ALIGN2(data[0] - base, off_align) ||
data[1] - base != ALIGN2(data[1] - base, off_align) ||
data[2] - base != ALIGN2(data[2] - base, off_align))
{
printf("GPU buffer horribly misaligned, expect slowdown!\n");
}
p->allocator_data = (void *) buf;
p->data[0] = (void *) data[0];
p->data[1] = (void *) data[1];
p->data[2] = (void *) data[2];
ret = 0;
// fall through
err:
SDL_LockMutex(rd_priv_ctx->lock);
int ret = pl_allocate_dav1dpicture(pic, rd_priv_ctx->gpu);
SDL_UnlockMutex(rd_priv_ctx->lock);
return ret;
}
......@@ -673,11 +383,9 @@ static void placebo_release_pic(Dav1dPicture *pic, void *cookie)
{
Dav1dPlayRendererPrivateContext *rd_priv_ctx = cookie;
assert(rd_priv_ctx != NULL);
assert(pic->allocator_data);
SDL_LockMutex(rd_priv_ctx->lock);
const struct pl_gpu *gpu = rd_priv_ctx->gpu;
pl_buf_destroy(gpu, (const struct pl_buf **) &pic->allocator_data);
pl_release_dav1dpicture(pic, rd_priv_ctx->gpu);
SDL_UnlockMutex(rd_priv_ctx->lock);
}
......@@ -690,10 +398,7 @@ const Dav1dPlayRenderInfo rdr_placebo_vk = {
.update_frame = placebo_upload_image,
.alloc_pic = placebo_alloc_pic,
.release_pic = placebo_release_pic,
# if PL_API_VER >= 63
.supports_gpu_grain = 1,
# endif
};
#else
const Dav1dPlayRenderInfo rdr_placebo_vk = { NULL };
......@@ -706,12 +411,7 @@ const Dav1dPlayRenderInfo rdr_placebo_gl = {
.destroy_renderer = placebo_renderer_destroy,
.render = placebo_render,
.update_frame = placebo_upload_image,
.alloc_pic = placebo_alloc_pic,
.release_pic = placebo_release_pic,
# if PL_API_VER >= 63
.supports_gpu_grain = 1,
# endif
};
#else
const Dav1dPlayRenderInfo rdr_placebo_gl = { NULL };
......
......@@ -43,10 +43,10 @@ dav1dplay_sources = files(
sdl2_dependency = dependency('sdl2', version: '>= 2.0.1', required: true)
if sdl2_dependency.found()
dav1dplay_deps = [sdl2_dependency]
dav1dplay_deps = [sdl2_dependency, libm_dependency]
dav1dplay_cflags = []
placebo_dependency = dependency('libplacebo', version: '>= 1.18.0', required: false)
placebo_dependency = dependency('libplacebo', version: '>= 3.110.0', required: false)
if placebo_dependency.found()
dav1dplay_deps += placebo_dependency
......
......@@ -116,8 +116,8 @@
# define dav1d_uninit(x) x
#endif
#ifdef _MSC_VER
#include <intrin.h>
#if defined(_MSC_VER) && !defined(__clang__)
#include <intrin.h>
static inline int ctz(const unsigned int mask) {
unsigned long idx;
......
/*
* Copyright © 2021, VideoLAN and dav1d authors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DAV1D_COMMON_FRAME_H
#define DAV1D_COMMON_FRAME_H
/*
* Checks whether Dav1dFrameType == INTER || == SWITCH
* Both are defined as odd numbers {1, 3} and therefore have the LSB set.
* See also: AV1 spec 6.8.2
*/
#define IS_INTER_OR_SWITCH(frame_header) \
((frame_header)->frame_type & 1)
/*
* Checks whether Dav1dFrameType == KEY || == INTRA
* See also: AV1 spec 6.8.2
*/
#define IS_KEY_OR_INTRA(frame_header) \
(!IS_INTER_OR_SWITCH(frame_header))
#endif /* DAV1D_COMMON_FRAME_H */
......@@ -45,6 +45,7 @@ typedef struct Dav1dRef Dav1dRef;
#define DAV1D_MAX_FRAME_THREADS 256
#define DAV1D_MAX_TILE_THREADS 64
#define DAV1D_MAX_POSTFILTER_THREADS 256
typedef struct Dav1dLogger {
void *cookie; ///< Custom data to pass to the callback.
......@@ -67,7 +68,8 @@ typedef struct Dav1dSettings {
unsigned frame_size_limit; ///< maximum frame size, in pixels (0 = unlimited)
Dav1dPicAllocator allocator; ///< Picture allocator callback.
Dav1dLogger logger; ///< Logger callback.
uint8_t reserved[32]; ///< reserved for future use
int n_postfilter_threads;
uint8_t reserved[28]; ///< reserved for future use
} Dav1dSettings;
/**
......
......@@ -25,9 +25,7 @@
# Revision file (vcs_version.h) generation
dav1d_git_dir = join_paths(dav1d_src_root, '.git')
rev_target = vcs_tag(command: [
'git', '--git-dir', dav1d_git_dir,
'describe', '--tags', '--long',
'--match', '?.*.*', '--always'
'git', '--git-dir', dav1d_git_dir, 'describe', '--long', '--always'
],
input: 'vcs_version.h.in',
output: 'vcs_version.h'
......
# Copyright © 2018-2020, VideoLAN and dav1d authors
# Copyright © 2018-2021, VideoLAN and dav1d authors
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
......@@ -23,14 +23,14 @@
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
project('dav1d', ['c'],
version: '0.8.1',
version: '0.8.2',
default_options: ['c_std=c99',
'warning_level=2',
'buildtype=release',
'b_ndebug=if-release'],
meson_version: '>= 0.49.0')
dav1d_soname_version = '5.0.0'
dav1d_soname_version = '5.0.1'
dav1d_api_version_array = dav1d_soname_version.split('.')
dav1d_api_version_major = dav1d_api_version_array[0]
dav1d_api_version_minor = dav1d_api_version_array[1]
......@@ -128,7 +128,7 @@ if host_machine.system() == 'windows'
rc_data.set('API_VERSION_MAJOR', dav1d_api_version_major)
rc_data.set('API_VERSION_MINOR', dav1d_api_version_minor)
rc_data.set('API_VERSION_REVISION', dav1d_api_version_revision)
rc_data.set('COPYRIGHT_YEARS', '2020')
rc_data.set('COPYRIGHT_YEARS', '2021')
else
thread_dependency = dependency('threads')
thread_compat_dep = []
......@@ -168,6 +168,8 @@ if host_machine.system() == 'linux'
endif
endif
libm_dependency = cc.find_library('m', required: false)
# Header checks
......@@ -257,6 +259,7 @@ if cc.get_argument_syntax() != 'msvc'
else
optional_arguments += [
'-wd4028', # parameter different from declaration
'-wd4090', # broken with arrays of pointers
'-wd4996' # use of POSIX functions
]
endif
......
......@@ -53,3 +53,7 @@ option('fuzzer_ldflags',
option('stack_alignment',
type: 'integer',
value: 0)
option('xxhash_muxer',
type : 'feature',
value : 'auto')
......@@ -40,8 +40,7 @@ function ipred_dc_128_8bpc_neon, export=1
adr r2, L(ipred_dc_128_tbl)
sub r3, r3, #25
ldr r3, [r2, r3, lsl #2]
mov lr, #128
vdup.8 q0, lr
vmov.i8 q0, #128
add r2, r2, r3
add r12, r0, r1
lsl r1, r1, #1
......@@ -79,7 +78,7 @@ L(ipred_dc_128_tbl):
bgt 16b
pop {r4, pc}
320:
vdup.8 q1, lr
vmov.i8 q1, #128
32:
vst1.8 {d0, d1, d2, d3}, [r0, :128], r1
vst1.8 {d0, d1, d2, d3}, [r12, :128], r1
......@@ -89,20 +88,18 @@ L(ipred_dc_128_tbl):
bgt 32b
pop {r4, pc}
640:
vdup.8 q1, lr
vdup.8 q2, lr
vdup.8 q3, lr
vmov.i8 q1, #128
sub r1, r1, #32
64:
vst1.8 {d0, d1, d2, d3}, [r0, :128]!
vst1.8 {d0, d1, d2, d3}, [r12, :128]!
vst1.8 {d4, d5, d6, d7}, [r0, :128], r1
vst1.8 {d4, d5, d6, d7}, [r12, :128], r1
vst1.8 {d0, d1, d2, d3}, [r0, :128], r1
vst1.8 {d0, d1, d2, d3}, [r12, :128], r1
subs r4, r4, #4
vst1.8 {d0, d1, d2, d3}, [r0, :128]!
vst1.8 {d0, d1, d2, d3}, [r12, :128]!
vst1.8 {d4, d5, d6, d7}, [r0, :128], r1
vst1.8 {d4, d5, d6, d7}, [r12, :128], r1
vst1.8 {d0, d1, d2, d3}, [r0, :128], r1
vst1.8 {d0, d1, d2, d3}, [r12, :128], r1
bgt 64b
pop {r4, pc}
endfunc
......@@ -401,19 +398,17 @@ L(ipred_dc_top_tbl):
vrshrn.u16 d18, q0, #6
vdup.8 q0, d18[0]
vdup.8 q1, d18[0]
vdup.8 q2, d18[0]
vdup.8 q3, d18[0]
sub r1, r1, #32
64:
vst1.8 {d0, d1, d2, d3}, [r0, :128]!
vst1.8 {d0, d1, d2, d3}, [r12, :128]!
vst1.8 {d4, d5, d6, d7}, [r0, :128], r1
vst1.8 {d4, d5, d6, d7}, [r12, :128], r1
vst1.8 {d0, d1, d2, d3}, [r0, :128], r1
vst1.8 {d0, d1, d2, d3}, [r12, :128], r1
subs r4, r4, #4
vst1.8 {d0, d1, d2, d3}, [r0, :128]!
vst1.8 {d0, d1, d2, d3}, [r12, :128]!
vst1.8 {d4, d5, d6, d7}, [r0, :128], r1
vst1.8 {d4, d5, d6, d7}, [r12, :128], r1
vst1.8 {d0, d1, d2, d3}, [r0, :128], r1
vst1.8 {d0, d1, d2, d3}, [r12, :128], r1
bgt 64b
pop {r4-r5, pc}
endfunc
......@@ -538,20 +533,18 @@ L(ipred_dc_left_h64):
vdup.8 q0, d0[0]
bx r3
L(ipred_dc_left_w64):
sub r1, r1, #32
vmov.8 q1, q0
vmov.8 q2, q0
vmov.8 q3, q0
sub r1, r1, #32
1:
vst1.8 {d0, d1, d2, d3}, [r0, :128]!
vst1.8 {d0, d1, d2, d3}, [r12, :128]!
vst1.8 {d4, d5, d6, d7}, [r0, :128], r1
vst1.8 {d4, d5, d6, d7}, [r12, :128], r1
vst1.8 {d0, d1, d2, d3}, [r0, :128], r1
vst1.8 {d0, d1, d2, d3}, [r12, :128], r1
subs r4, r4, #4
vst1.8 {d0, d1, d2, d3}, [r0, :128]!
vst1.8 {d0, d1, d2, d3}, [r12, :128]!
vst1.8 {d4, d5, d6, d7}, [r0, :128], r1
vst1.8 {d4, d5, d6, d7}, [r12, :128], r1
vst1.8 {d0, d1, d2, d3}, [r0, :128], r1
vst1.8 {d0, d1, d2, d3}, [r12, :128], r1
bgt 1b
pop {r4-r5, pc}
endfunc
......@@ -600,10 +593,10 @@ L(ipred_dc_tbl):
L(ipred_dc_h4):
vld1.32 {d0[]}, [r2, :32]!
vpaddl.u8 d0, d0
add r2, r2, #1
vpadd.u16 d0, d0
bx r3
L(ipred_dc_w4):
add r2, r2, #1
vld1.32 {d1[]}, [r2]
vadd.s16 d0, d0, d30
vpaddl.u8 d1, d1
......@@ -635,10 +628,10 @@ L(ipred_dc_h8):
vld1.8 {d0}, [r2, :64]!
vpaddl.u8 d0, d0
vpadd.u16 d0, d0
add r2, r2, #1
vpadd.u16 d0, d0
bx r3
L(ipred_dc_w8):
add r2, r2, #1
vld1.8 {d2}, [r2]
vadd.s16 d0, d0, d30
vpaddl.u8 d2, d2
......@@ -672,10 +665,10 @@ L(ipred_dc_h16):
vaddl.u8 q0, d0, d1
vadd.u16 d0, d0, d1
vpadd.u16 d0, d0
add r2, r2, #1
vpadd.u16 d0, d0
bx r3
L(ipred_dc_w16):
add r2, r2, #1
vld1.8 {d2, d3}, [r2]
vadd.s16 d0, d0, d30
vaddl.u8 q1, d2, d3
......@@ -712,10 +705,10 @@ L(ipred_dc_h32):
vadd.u16 q0, q0, q1
vadd.u16 d0, d0, d1
vpadd.u16 d0, d0
add r2, r2, #1
vpadd.u16 d0, d0
bx r3
L(ipred_dc_w32):
add r2, r2, #1
vld1.8 {d2, d3, d4, d5}, [r2]
vadd.s16 d0, d0, d30
vaddl.u8 q1, d2, d3
......@@ -760,10 +753,10 @@ L(ipred_dc_h64):
vadd.u16 q0, q0, q1
vadd.u16 d0, d0, d1
vpadd.u16 d0, d0
add r2, r2, #1
vpadd.u16 d0, d0
bx r3
L(ipred_dc_w64):
add r2, r2, #1
vld1.8 {d2, d3, d4, d5}, [r2]!
vadd.s16 d0, d0, d30
vaddl.u8 q2, d4, d5
......@@ -789,11 +782,11 @@ L(ipred_dc_w64):
vadd.s16 d0, d0, d2
vadd.s16 d0, d0, d3
vshl.u16 d18, d0, d28
beq 1f // h = 16/32
beq 1f
// h = 16/32
movw lr, #(0x5556/2)
movt lr, #(0x3334/2)
mov r5, r4
and r5, r5, #31
and r5, r4, #31
lsr lr, lr, r5
vdup.16 d30, lr
vqdmulh.s16 d18, d18, d30
......@@ -801,18 +794,16 @@ L(ipred_dc_w64):
sub r1, r1, #32
vdup.8 q0, d18[0]
vdup.8 q1, d18[0]
vdup.8 q2, d18[0]
vdup.8 q3, d18[0]
2:
vst1.8 {d0, d1, d2, d3}, [r0, :128]!
vst1.8 {d0, d1, d2, d3}, [r12, :128]!
vst1.8 {d4, d5, d6, d7}, [r0, :128], r1
vst1.8 {d4, d5, d6, d7}, [r12, :128], r1
vst1.8 {d0, d1, d2, d3}, [r0, :128], r1
vst1.8 {d0, d1, d2, d3}, [r12, :128], r1
subs r4, r4, #4
vst1.8 {d0, d1, d2, d3}, [r0, :128]!
vst1.8 {d0, d1, d2, d3}, [r12, :128]!
vst1.8 {d4, d5, d6, d7}, [r0, :128], r1
vst1.8 {d4, d5, d6, d7}, [r12, :128], r1
vst1.8 {d0, d1, d2, d3}, [r0, :128], r1
vst1.8 {d0, d1, d2, d3}, [r12, :128], r1
bgt 2b
pop {r4-r6, pc}
endfunc
......@@ -1444,6 +1435,8 @@ function ipred_filter_8bpc_neon, export=1
vmovl.s8 q13, d28
vmovl.s8 q14, d29
add r8, r2, #1
sub r2, r2, #2
mov r7, #-2
bx r5
.align 2
......@@ -1455,8 +1448,6 @@ L(ipred_filter_tbl):
40:
vld1.32 {d0[]}, [r8] // top (0-3)
sub r2, r2, #2
mov r7, #-2
vmovl.u8 q0, d0 // top (0-3)
4:
vld1.32 {d2[]}, [r2], r7 // left (0-1) + topleft (2)
......@@ -1473,13 +1464,11 @@ L(ipred_filter_tbl):
vst1.32 {d4[0]}, [r0, :32], r1
vmovl.u8 q0, d4
vst1.32 {d4[1]}, [r6, :32], r1
vext.8 q0, q0, q0, #8 // move top from [4-7] to [0-3]
vmov d0, d1 // move top from [4-7] to [0-3]
bgt 4b
pop {r4-r8, pc}
80:
vld1.8 {d0}, [r8] // top (0-7)
sub r2, r2, #2
mov r7, #-2
vmovl.u8 q0, d0 // top (0-7)
8:
vld1.32 {d2[]}, [r2], r7 // left (0-1) + topleft (2)
......@@ -1503,16 +1492,14 @@ L(ipred_filter_tbl):
vqrshrun.s16 d5, q3, #4
vzip.32 d4, d5
subs r4, r4, #2
vst1.64 {d4}, [r0, :64], r1
vst1.8 {d4}, [r0, :64], r1
vmovl.u8 q0, d5
vst1.64 {d5}, [r6, :64], r1
vst1.8 {d5}, [r6, :64], r1
bgt 8b
pop {r4-r8, pc}
160:
320:
vpush {q4-q5}
sub r2, r2, #2
mov r7, #-2
sub r1, r1, r3
mov lr, r3
......@@ -2003,10 +1990,10 @@ L(ipred_cfl_tbl):
L(ipred_cfl_h4):
vld1.32 {d0[]}, [r2, :32]!
vpaddl.u8 d0, d0
add r2, r2, #1
vpadd.i16 d0, d0
bx r12
L(ipred_cfl_w4):
add r2, r2, #1
vld1.32 {d1[]}, [r2]
vadd.i16 d0, d0, d16
vpaddl.u8 d1, d1
......@@ -2031,10 +2018,10 @@ L(ipred_cfl_h8):
vld1.8 {d0}, [r2, :64]!
vpaddl.u8 d0, d0
vpadd.i16 d0, d0
add r2, r2, #1
vpadd.i16 d0, d0
bx r12
L(ipred_cfl_w8):
add r2, r2, #1
vld1.8 {d1}, [r2]
vadd.i16 d0, d0, d16
vpaddl.u8 d1, d1
......@@ -2061,10 +2048,10 @@ L(ipred_cfl_h16):
vaddl.u8 q0, d0, d1
vadd.i16 d0, d0, d1
vpadd.i16 d0, d0
add r2, r2, #1
vpadd.i16 d0, d0
bx r12
L(ipred_cfl_w16):
add r2, r2, #1
vld1.8 {q2}, [r2]
vadd.i16 d0, d0, d16
vaddl.u8 q2, d4, d5
......@@ -2094,10 +2081,10 @@ L(ipred_cfl_h32):
vadd.i16 q0, q2, q3
vadd.i16 d0, d0, d1
vpadd.i16 d0, d0
add r2, r2, #1
vpadd.i16 d0, d0
bx r12
L(ipred_cfl_w32):
add r2, r2, #1
vld1.8 {q2, q3}, [r2]
vadd.i16 d0, d0, d16
vaddl.u8 q2, d4, d5
......
This diff is collapsed.
......@@ -706,7 +706,7 @@ def_fn_4x4 identity, flipadst
vrshrn_8h \r14, \r15, q4, q5, #12 // t7a
vmull_vmlal_8h q2, q3, \r10, \r11, \r6, \r7, d1[3], d1[2] // -> t6a
vrshrn_8h \r6, \r7, q6, q7, #12 // t5a
vrshrn_8h \r10, \r11, q2, q3, #12 // taa
vrshrn_8h \r10, \r11, q2, q3, #12 // t6a
vqadd.s16 q2, \q1, \q3 // t4
vqsub.s16 \q1, \q1, \q3 // t5a
......@@ -1173,7 +1173,7 @@ function inv_dct_4h_x16_neon, export=1
vrshrn.i32 d6, q3, #12 // t11
vrshrn.i32 d7, q4, #12 // t12
vmull_vmlal q4, d25, d21, d0[0], d0[0] // -> t10a
vmull_vmlal q4, d25, d21, d0[0], d0[0] // -> t13a
vrshrn.i32 d4, q2, #12 // t10a
vrshrn.i32 d5, q4, #12 // t13a
......@@ -1480,53 +1480,6 @@ function inv_txfm_add_vert_4x16_neon
pop {pc}
endfunc
.macro sub_sp_align space
#if CONFIG_THUMB
mov r7, sp
and r7, r7, #15
#else
and r7, sp, #15
#endif
sub sp, sp, r7
// Now the stack is aligned, store the amount of adjustment back
// on the stack, as we don't want to waste a register as frame
// pointer.
str r7, [sp, #-16]!
#ifdef _WIN32
.if \space > 8192
// Here, we'd need to touch two (or more) pages while decrementing
// the stack pointer.
.error "sub_sp_align doesn't support values over 8K at the moment"
.elseif \space > 4096
sub r7, sp, #4096
ldr r12, [r7]
sub r7, r7, #(\space - 4096)
mov sp, r7
.else
sub sp, sp, #\space
.endif
#else
.if \space >= 4096
sub sp, sp, #(\space)/4096*4096
.endif
.if (\space % 4096) != 0
sub sp, sp, #(\space)%4096
.endif
#endif
.endm
.macro add_sp_align space
.if \space >= 4096
add sp, sp, #(\space)/4096*4096
.endif
.if (\space % 4096) != 0
add sp, sp, #(\space)%4096
.endif
ldr r7, [sp], #16
// Add back the original stack adjustment
add sp, sp, r7
.endm
function inv_txfm_add_16x16_neon
sub_sp_align 512
ldrh r11, [r10], #2
......@@ -3248,7 +3201,9 @@ function inv_txfm_add_dct_dct_32x64_8bpc_neon, export=1
mov r8, #(32 - \i)
cmp r3, r11
blt 1f
.if \i < 28
ldrh r11, [r10], #2
.endif
.endif
add r7, r2, #(\i*2)
mov r8, #32*2
......@@ -3304,7 +3259,7 @@ function inv_txfm_add_dct_dct_64x16_8bpc_neon, export=1
add r6, r4, #(\i*64*2)
mov r9, #-2 // shift
bl inv_txfm_horz_dct_64x4_neon
.if \i < 8
.if \i < 12
ldrh r11, [r10], #2
.endif
.endr
......@@ -3353,7 +3308,9 @@ function inv_txfm_add_dct_dct_16x64_8bpc_neon, export=1
mov r8, #(32 - \i)
cmp r3, r11
blt 1f
.if \i < 28
ldrh r11, [r10], #2
.endif
.endif
add r7, r2, #(\i*2)
mov r8, #32*2
......
This diff is collapsed.
......@@ -141,13 +141,12 @@ function lpf_4_wd\wd\()_neon
vmov.i16 d6, #3
vbic d0, d1, d0 // (fm && wd >= 4 && !hev)
vmul.i16 d2, d2, d6
vmov.i16 d6, #4
vmov.i16 d7, #4
vadd.i16 d2, d2, d4
vmin.s16 d2, d2, d3 // f = iclip_diff()
vmov.i16 d7, #3
vmax.s16 d2, d2, d9 // f = iclip_diff()
vqadd.s16 d4, d6, d2 // f + 4
vqadd.s16 d5, d7, d2 // f + 3
vqadd.s16 d4, d7, d2 // f + 4
vqadd.s16 d5, d6, d2 // f + 3
vmin.s16 d4, d4, d3 // imin(f + 4, 128 << bitdepth_min_8 - 1)
vmin.s16 d5, d5, d3 // imin(f + 3, 128 << bitdepth_min_8 - 1)
vshr.s16 d4, d4, #3 // f1
......