...
 
Commits (9)
......@@ -50,8 +50,6 @@ extern int optreset;
* specific block, only when *not* __UNISTD_H_SOURCED__, in which
* to declare the extended API.
*/
#endif /* !defined(__GETOPT_H__) */
#if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__)
#define __GETOPT_LONG_H__
......@@ -93,3 +91,5 @@ extern int getopt_long_only(int nargc, char * const *nargv, const char *options,
#endif
#endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */
#endif /* !defined(__GETOPT_H__) */
/*
* Copyright © 2018, VideoLAN and dav1d authors
* Copyright © 2018, Two Orioles, LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MSCVER_STDATOMIC_H_
#define MSCVER_STDATOMIC_H_
#ifdef HAVE_STDATOMIC_H
# include <stdatomic.h>
#else /* !HAVE_STDATOMIC_H */
#if !defined(__cplusplus) && defined(_MSC_VER)
#pragma warning(push)
#pragma warning(disable:4067) /* newline for __has_include_next */
#if defined(__clang__) && __has_include_next(<stdatomic.h>)
/* use the clang stdatomic.h with clang-cl*/
# include_next <stdatomic.h>
#else /* ! stdatomic.h */
#include <windows.h>
#include "common/attributes.h"
typedef volatile LONG __declspec(align(32)) atomic_int;
typedef volatile ULONG __declspec(align(32)) atomic_uint;
typedef enum {
memory_order_relaxed,
memory_order_acquire
} msvc_atomic_memory_order;
#define atomic_init(p_a, v) do { *(p_a) = (v); } while(0)
#define atomic_store(p_a, v) InterlockedExchange((LONG*)p_a, v)
#define atomic_load(p_a) InterlockedCompareExchange((LONG*)p_a, 0, 0)
#define atomic_load_explicit(p_a, mo) atomic_load(p_a)
/*
* TODO use a special call to increment/decrement
* using InterlockedIncrement/InterlockedDecrement
*/
#define atomic_fetch_add(p_a, inc) InterlockedExchangeAdd(p_a, inc)
#define atomic_fetch_sub(p_a, dec) InterlockedExchangeAdd(p_a, -(dec))
#endif /* ! stdatomic.h */
#pragma warning(pop)
#endif /* !defined(__cplusplus) && defined(_MSC_VER) */
#endif /* !HAVE_STDATOMIC_H */
#endif /* MSCVER_STDATOMIC_H_ */
......@@ -39,7 +39,9 @@ else
thread_dependency = declare_dependency(sources: ['src/win32/thread.c'])
endif
dav1d_inc_dirs = include_directories(['include', 'include/dav1d'])
if cc.has_function('getopt_long', prefix : '#include <getopt.h>')
cdata.set('HAVE_GETOPT_H',1)
endif
#
# Option handling
......@@ -66,7 +68,9 @@ if host_machine.system() == 'windows'
]
endif
if not cc.check_header('stdatomic.h')
if cc.check_header('stdatomic.h')
cdata.set('HAVE_STDATOMIC_H',1)
elif cc.get_id() != 'msvc'
error('Atomics not supported')
endif
......@@ -165,6 +169,11 @@ cdata.set10('HAVE_ASM', is_asm_enabled)
#
# Generate config headers
#
if cdata.has('HAVE_GETOPT_H') and cdata.has('HAVE_STDATOMIC_H')
dav1d_inc_dirs = include_directories(['include', 'include/dav1d'])
else
dav1d_inc_dirs = include_directories(['include', 'include/dav1d', 'include/compat'])
endif
config_h_target = configure_file(output: 'config.h', configuration: cdata)
......@@ -309,7 +318,7 @@ dav1d_sources = files(
'tools/output/yuv.c'
)
if cc.get_id() == 'msvc'
if not cdata.has('HAVE_GETOPT_H')
dav1d_sources += files('tools/compat/getopt.c')
endif
......
......@@ -387,7 +387,7 @@ static const CdfModeContext av1_default_cdf = {
{ { AOM_CDF2(28015) }, { AOM_CDF2(21546) }, { AOM_CDF2(14400) }, },
{ { AOM_CDF2(28165) }, { AOM_CDF2(22401) }, { AOM_CDF2(16088) }, },
}, .txtp_inter = {
{}, {
{ 0 }, {
{ AOM_CDF16(4458, 5560, 7695, 9709, 13330, 14789, 17537, 20266,
21504, 22848, 23934, 25474, 27727, 28915, 30631) },
{ AOM_CDF16(1645, 2573, 4778, 5711, 7807, 8622, 10522, 15357,
......@@ -412,7 +412,7 @@ static const CdfModeContext av1_default_cdf = {
{ AOM_CDF2(748) },
},
}, .txtp_intra = {
{}, {
{ 0 }, {
{
{ AOM_CDF7(1535, 8035, 9461, 12751, 23467, 27825) },
{ AOM_CDF7(564, 3335, 9709, 10870, 18143, 28094) },
......
......@@ -449,20 +449,20 @@ static void read_pal_plane(Dav1dTileContext *const t, Av1Block *const b,
if (DEBUG_BLOCK_INFO) {
printf("Post-pal[pl=%d,sz=%d,cache_size=%d,used_cache=%d]: r=%d, cache=",
pl, b->pal_sz[pl], n_cache, n_used_cache, ts->msac.rng);
pl, pal_sz, n_cache, n_used_cache, ts->msac.rng);
for (int n = 0; n < n_cache; n++)
printf("%c%02x", n ? ' ' : '[', cache[n]);
printf("%s, pal=", n_cache ? "]" : "[]");
for (int n = 0; n < b->pal_sz[0]; n++)
for (int n = 0; n < pal_sz; n++)
printf("%c%02x", n ? ' ' : '[', pal[n]);
printf("]\n");
}
}
static void read_pal_uv(Dav1dTileContext *const t, Av1Block *const b,
const int sz_ctx, const int cbx4, const int cby4)
const int sz_ctx, const int bx4, const int by4)
{
read_pal_plane(t, b, 1, sz_ctx, cbx4, cby4);
read_pal_plane(t, b, 1, sz_ctx, bx4, by4);
// V pal coding
Dav1dTileState *const ts = t->ts;
......@@ -1013,8 +1013,8 @@ static void decode_b(Dav1dTileContext *const t,
msac_decode_bool_adapt(&ts->msac, ts->cdf.m.pal_uv[pal_ctx]);
if (DEBUG_BLOCK_INFO)
printf("Post-uv_pal[%d]: r=%d\n", use_uv_pal, ts->msac.rng);
if (use_uv_pal)
read_pal_uv(t, b, sz_ctx, cbx4, cby4);
if (use_uv_pal) // see aomedia bug 2183 for why we use luma coordinates
read_pal_uv(t, b, sz_ctx, bx4, by4);
}
}
......@@ -1118,17 +1118,22 @@ static void decode_b(Dav1dTileContext *const t,
if (has_chroma) {
memset(&t->l.uvmode[cby4], b->uv_mode, cbh4);
memset(&t->a->uvmode[cbx4], b->uv_mode, cbw4);
memset(&t->pal_sz_uv[1][cby4], b->pal_sz[1], cbh4);
memset(&t->pal_sz_uv[0][cbx4], b->pal_sz[1], cbw4);
// see aomedia bug 2183 for why we use luma coordinates here
memset(&t->pal_sz_uv[1][by4], b->pal_sz[1], bh4);
memset(&t->pal_sz_uv[0][bx4], b->pal_sz[1], bw4);
if (b->pal_sz[1]) for (int pl = 1; pl < 3; pl++) {
uint16_t *const pal = f->frame_thread.pass ?
f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
((t->bx >> 1) + (t->by & 1))][pl] : t->pal[pl];
for (int x = 0; x < cbw4; x++)
memcpy(t->al_pal[0][cbx4 + x][pl], pal, 16);
for (int y = 0; y < cbh4; y++)
memcpy(t->al_pal[1][cby4 + y][pl], pal, 16);
// see aomedia bug 2183 for why we use luma coordinates here
for (int x = 0; x < bw4; x++)
memcpy(t->al_pal[0][bx4 + x][pl], pal, 16);
for (int y = 0; y < bh4; y++)
memcpy(t->al_pal[1][by4 + y][pl], pal, 16);
}
} else { // see aomedia bug 2183 for why we reset this
memset(&t->pal_sz_uv[1][by4], 0, bh4);
memset(&t->pal_sz_uv[0][bx4], 0, bw4);
}
if ((f->frame_hdr.frame_type & 1) || f->frame_hdr.allow_intrabc) {
memset(&t->a->tx[bx4], t_dim->lw, bw4);
......@@ -1196,11 +1201,12 @@ static void decode_b(Dav1dTileContext *const t,
memset(&t->a->mode[bx4], DC_PRED, bw4);
memset(&t->l.pal_sz[by4], 0, bh4);
memset(&t->a->pal_sz[bx4], 0, bw4);
// see aomedia bug 2183 for why this is outside if (has_chroma)
memset(&t->pal_sz_uv[1][by4], 0, bh4);
memset(&t->pal_sz_uv[0][bx4], 0, bw4);
if (has_chroma) {
memset(&t->l.uvmode[cby4], DC_PRED, cbh4);
memset(&t->a->uvmode[cbx4], DC_PRED, cbw4);
memset(&t->pal_sz_uv[1][cby4], 0, cbh4);
memset(&t->pal_sz_uv[0][cbx4], 0, cbw4);
}
} else {
// inter-specific mode/mv coding
......@@ -1696,11 +1702,12 @@ static void decode_b(Dav1dTileContext *const t,
}
memset(&t->l.pal_sz[by4], 0, bh4);
memset(&t->a->pal_sz[bx4], 0, bw4);
// see aomedia bug 2183 for why this is outside if (has_chroma)
memset(&t->pal_sz_uv[1][by4], 0, bh4);
memset(&t->pal_sz_uv[0][bx4], 0, bw4);
if (has_chroma) {
memset(&t->l.uvmode[cby4], DC_PRED, cbh4);
memset(&t->a->uvmode[cbx4], DC_PRED, cbw4);
memset(&t->pal_sz_uv[1][cby4], 0, cbh4);
memset(&t->pal_sz_uv[0][cbx4], 0, cbw4);
}
memset(&t->a->tx_intra[bx4], b_dim[2], bw4);
memset(&t->l.tx_intra[by4], b_dim[3], bh4);
......
......@@ -145,23 +145,23 @@ dc_lfn(width, height,, unsigned dc = (width + height) >> 1; \
#endif
dc2d_lfn( 4, 4, dc >>= 3)
dc2d_lfn( 4, 8, dc = iclip_pixel(MULTIPLIER_1x2 * dc >> (BASE_SHIFT + 2)))
dc2d_lfn( 4, 16, dc = iclip_pixel(MULTIPLIER_1x4 * dc >> (BASE_SHIFT + 2)))
dc2d_lfn( 8, 4, dc = iclip_pixel(MULTIPLIER_1x2 * dc >> (BASE_SHIFT + 2)))
dc2d_lfn( 4, 8, dc = iclip_pixel(MULTIPLIER_1x2 * (dc >> 2) >> BASE_SHIFT))
dc2d_lfn( 4, 16, dc = iclip_pixel(MULTIPLIER_1x4 * (dc >> 2) >> BASE_SHIFT))
dc2d_lfn( 8, 4, dc = iclip_pixel(MULTIPLIER_1x2 * (dc >> 2) >> BASE_SHIFT))
dc2d_lfn( 8, 8, dc >>= 4)
dc2d_lfn( 8, 16, dc = iclip_pixel(MULTIPLIER_1x2 * dc >> (BASE_SHIFT + 3)))
dc2d_lfn( 8, 32, dc = iclip_pixel(MULTIPLIER_1x4 * dc >> (BASE_SHIFT + 3)))
dc2d_lfn(16, 4, dc = iclip_pixel(MULTIPLIER_1x4 * dc >> (BASE_SHIFT + 2)))
dc2d_lfn(16, 8, dc = iclip_pixel(MULTIPLIER_1x2 * dc >> (BASE_SHIFT + 3)))
dc2d_lfn( 8, 16, dc = iclip_pixel(MULTIPLIER_1x2 * (dc >> 3) >> BASE_SHIFT))
dc2d_lfn( 8, 32, dc = iclip_pixel(MULTIPLIER_1x4 * (dc >> 3) >> BASE_SHIFT))
dc2d_lfn(16, 4, dc = iclip_pixel(MULTIPLIER_1x4 * (dc >> 2) >> BASE_SHIFT))
dc2d_lfn(16, 8, dc = iclip_pixel(MULTIPLIER_1x2 * (dc >> 3) >> BASE_SHIFT))
dc2d_lfn(16, 16, dc >>= 5)
dc2d_lfn(16, 32, dc = iclip_pixel(MULTIPLIER_1x2 * dc >> (BASE_SHIFT + 4)))
dc2d_lfn(16, 64, dc = iclip_pixel(MULTIPLIER_1x4 * dc >> (BASE_SHIFT + 4)))
dc2d_lfn(32, 8, dc = iclip_pixel(MULTIPLIER_1x4 * dc >> (BASE_SHIFT + 3)))
dc2d_lfn(32, 16, dc = iclip_pixel(MULTIPLIER_1x2 * dc >> (BASE_SHIFT + 4)))
dc2d_lfn(16, 32, dc = iclip_pixel(MULTIPLIER_1x2 * (dc >> 4) >> BASE_SHIFT))
dc2d_lfn(16, 64, dc = iclip_pixel(MULTIPLIER_1x4 * (dc >> 4) >> BASE_SHIFT))
dc2d_lfn(32, 8, dc = iclip_pixel(MULTIPLIER_1x4 * (dc >> 3) >> BASE_SHIFT))
dc2d_lfn(32, 16, dc = iclip_pixel(MULTIPLIER_1x2 * (dc >> 4) >> BASE_SHIFT))
dc2d_lfn(32, 32, dc >>= 6)
dc2d_lfn(32, 64, dc = iclip_pixel(MULTIPLIER_1x2 * dc >> (BASE_SHIFT + 5)))
dc2d_lfn(64, 16, dc = iclip_pixel(MULTIPLIER_1x4 * dc >> (BASE_SHIFT + 4)))
dc2d_lfn(64, 32, dc = iclip_pixel(MULTIPLIER_1x2 * dc >> (BASE_SHIFT + 5)))
dc2d_lfn(32, 64, dc = iclip_pixel(MULTIPLIER_1x2 * (dc >> 5) >> BASE_SHIFT))
dc2d_lfn(64, 16, dc = iclip_pixel(MULTIPLIER_1x4 * (dc >> 4) >> BASE_SHIFT))
dc2d_lfn(64, 32, dc = iclip_pixel(MULTIPLIER_1x2 * (dc >> 5) >> BASE_SHIFT))
dc2d_lfn(64, 64, dc >>= 7)
#undef MULTIPLIER_1x2
......
......@@ -121,14 +121,16 @@ static void wiener_c(pixel *p, const ptrdiff_t p_stride,
{
// padding is 3 pixels above and 3 pixels below
const ptrdiff_t tmp_stride = sizeof(pixel) * (w + 6);
pixel tmp[(h + 6) * PXSTRIDE(tmp_stride)];
pixel *tmp = malloc(sizeof(*tmp) * ((h + 6) * PXSTRIDE(tmp_stride)));
if (tmp == NULL) return;
pixel *tmp_ptr = tmp;
padding(tmp, tmp_stride, p, p_stride, lpf, lpf_stride, w, h, edges);
// Values stored between horizontal and vertical filtering don't
// fit in a uint8_t.
uint16_t hor[(h + 6 /*padding*/) * w];
uint16_t *hor = malloc(sizeof(*hor) * ((h + 6 /*padding*/) * w));
if (hor == NULL) goto done;
uint16_t *hor_ptr = hor;
const int round_bits_h = 3 + (BITDEPTH == 12) * 2;
......@@ -164,6 +166,9 @@ static void wiener_c(pixel *p, const ptrdiff_t p_stride,
iclip_pixel((sum + rounding_off_v) >> round_bits_v);
}
}
done:
free(hor);
free(tmp);
}
// Sum over a 3x3 area
......@@ -412,12 +417,13 @@ static void selfguided_filter(int32_t *dst, const ptrdiff_t dst_stride,
{
const int tmp_stride = w + 6;
// FIXME Replace array with scratch memory
int32_t A_[(h + 6) * tmp_stride];
int32_t *A_ = malloc(sizeof(*A_) * ((h + 6) * tmp_stride));
int32_t *A = A_ + 3 * tmp_stride + 3;
// By inverting A and B after the boxsums, B can be of size coef instead
// of int32_t
coef B_[(h + 6) * tmp_stride];
coef *B_ = malloc(sizeof(*B_) * ((h + 6) * tmp_stride));
coef *B = B_ + 3 * tmp_stride + 3;
if (A_ == NULL || B_ == NULL) goto done;
const int step = (n == 25) + 1;
if (n == 25) {
......@@ -510,6 +516,9 @@ static void selfguided_filter(int32_t *dst, const ptrdiff_t dst_stride,
A += tmp_stride;
}
}
done:
free(A_);
free(B_);
}
static void selfguided_c(pixel *p, const ptrdiff_t p_stride,
......@@ -519,13 +528,16 @@ static void selfguided_c(pixel *p, const ptrdiff_t p_stride,
{
// padding is 3 pixels above and 3 pixels below
const int tmp_stride = sizeof(pixel) * (w + 6);
pixel tmp[(h + 6) * PXSTRIDE(tmp_stride)];
pixel *tmp = malloc(sizeof(*tmp) * ((h + 6) * PXSTRIDE(tmp_stride)));
if (tmp == NULL) return;
int32_t *dst1 = NULL;
int32_t *dst = malloc(sizeof(*dst) * (h * w));
if (dst == NULL) goto done;
padding(tmp, tmp_stride, p, p_stride, lpf, lpf_stride, w, h, edges);
// both r1 and r0 can't be zero
if (!sgr_params[sgr_idx][0]) {
int32_t dst[h * w];
const int s1 = sgr_params[sgr_idx][3];
selfguided_filter(dst, w, tmp, tmp_stride, w, h, 9, s1);
const int w1 = (1 << 7) - sgr_w[1];
......@@ -538,7 +550,6 @@ static void selfguided_c(pixel *p, const ptrdiff_t p_stride,
p += PXSTRIDE(p_stride);
}
} else if (!sgr_params[sgr_idx][1]) {
int32_t dst[h * w];
const int s0 = sgr_params[sgr_idx][2];
selfguided_filter(dst, w, tmp, tmp_stride, w, h, 25, s0);
const int w0 = sgr_w[0];
......@@ -551,8 +562,9 @@ static void selfguided_c(pixel *p, const ptrdiff_t p_stride,
p += PXSTRIDE(p_stride);
}
} else {
int32_t dst0[h * w];
int32_t dst1[h * w];
int32_t *dst0 = dst;
int32_t *dst1 = malloc(sizeof(*dst1) * (h * w));
if (dst1 == NULL) goto done;
const int s0 = sgr_params[sgr_idx][2];
const int s1 = sgr_params[sgr_idx][3];
const int w0 = sgr_w[0];
......@@ -569,6 +581,10 @@ static void selfguided_c(pixel *p, const ptrdiff_t p_stride,
p += PXSTRIDE(p_stride);
}
}
done:
free(dst1);
free(dst);
free(tmp);
}
void bitfn(dav1d_loop_restoration_dsp_init)(Dav1dLoopRestorationDSPContext *const c) {
......
......@@ -227,8 +227,9 @@ static void lr_sbrow(const Dav1dFrameContext *const f, pixel *p, const int y,
const int filter_h =
imin(((1 << (6 + f->seq_hdr.sb128)) - 8 * !y) >> ss_ver, h - y);
pixel pre_lr_border[filter_h * 3];
pixel post_lr_border[filter_h * 3];
pixel *pre_lr_border = malloc(sizeof(*pre_lr_border) * (filter_h * 3));
pixel *post_lr_border = malloc(sizeof(*post_lr_border) * (filter_h * 3));
if (pre_lr_border == NULL || post_lr_border == NULL) goto done;
int unit_w = unit_size;
......@@ -270,6 +271,9 @@ static void lr_sbrow(const Dav1dFrameContext *const f, pixel *p, const int y,
}
p += unit_w;
}
done:
free(pre_lr_border);
free(post_lr_border);
}
void bytefn(dav1d_lr_sbrow)(Dav1dFrameContext *const f, pixel *const dst[3],
......
......@@ -1248,7 +1248,11 @@ void bytefn(recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize bs,
t->tl_4x4_filter = filter_2d;
} else {
const enum Filter2d filter_2d = b->filter2d;
#ifdef _MSC_VER
coef **tmp = (coef(**)) t->scratch.compinter;
#else /* !_MSC_VER */
coef (*tmp)[bw4 * bh4 * 16] = (coef (*)[bw4 * bh4 * 16]) t->scratch.compinter;
#endif /* !_MSC_VER */
int jnt_weight;
uint8_t *const seg_mask = t->scratch_seg_mask;
const uint8_t *mask;
......
......@@ -173,9 +173,15 @@ static void fill2d_16x2(uint8_t *dst, const int w, const int h,
const enum BlockSize bs,
const uint8_t (*const master)[64 * 64],
const wedge_code_type *const cb,
#ifdef _MSC_VER
uint8_t (*masks_444)[16][1024],
uint8_t (*masks_422)[16][512],
uint8_t (*masks_420)[16][256],
#else /* !MSC_VER */
uint8_t (*masks_444)[16][w * h],
uint8_t (*masks_422)[16][w * h >> 1],
uint8_t (*masks_420)[16][w * h >> 2],
#endif
const unsigned signs)
{
uint8_t *ptr = dst;
......