Commit e10b855c authored by David Michael Barr's avatar David Michael Barr Committed by Henrik Gramner

Pass dimensions to cfl_ac and derive log2sz

parent d401106b
......@@ -52,7 +52,7 @@ typedef decl_angular_ipred_fn(*angular_ipred_fn);
*/
#define decl_cfl_ac_fn(name) \
void (name)(int16_t *ac, const pixel *y, ptrdiff_t stride, \
int w_pad, int h_pad)
int w_pad, int h_pad, int cw, int ch)
typedef decl_cfl_ac_fn(*cfl_ac_fn);
/*
......@@ -77,7 +77,7 @@ typedef struct Dav1dIntraPredDSPContext {
angular_ipred_fn intra_pred[N_IMPL_INTRA_PRED_MODES];
// chroma-from-luma
cfl_ac_fn cfl_ac[3 /* 420, 422, 444 */][N_RECT_TX_SIZES /* chroma tx size */];
cfl_ac_fn cfl_ac[3 /* 420, 422, 444 */];
cfl_pred_fn cfl_pred[DC_128_PRED + 1];
// palette
......
......@@ -614,7 +614,7 @@ static void ipred_filter_c(pixel *dst, const ptrdiff_t stride,
static NOINLINE void
cfl_ac_c(int16_t *ac, const pixel *ypx, const ptrdiff_t stride,
const int w_pad, const int h_pad, const int width, const int height,
const int ss_hor, const int ss_ver, const int log2sz)
const int ss_hor, const int ss_ver)
{
int y, x;
int16_t *const ac_orig = ac;
......@@ -642,6 +642,7 @@ cfl_ac_c(int16_t *ac, const pixel *ypx, const ptrdiff_t stride,
ac += width;
}
const int log2sz = ctz(width) + ctz(height);
int sum = (1 << log2sz) >> 1;
for (ac = ac_orig, y = 0; y < height; y++) {
for (x = 0; x < width; x++)
......@@ -658,49 +659,17 @@ cfl_ac_c(int16_t *ac, const pixel *ypx, const ptrdiff_t stride,
}
}
#define cfl_ac_fn(lw, lh, cw, ch, ss_hor, ss_ver, log2sz) \
static void cfl_ac_##lw##x##lh##_to_##cw##x##ch##_c(int16_t *const ac, \
const pixel *const ypx, \
const ptrdiff_t stride, \
const int w_pad, \
const int h_pad) \
#define cfl_ac_fn(fmt, ss_hor, ss_ver) \
static void cfl_ac_##fmt##_c(int16_t *const ac, const pixel *const ypx, \
const ptrdiff_t stride, const int w_pad, \
const int h_pad, const int cw, const int ch) \
{ \
cfl_ac_c(ac, ypx, stride, w_pad, h_pad, cw, ch, ss_hor, ss_ver, log2sz); \
cfl_ac_c(ac, ypx, stride, w_pad, h_pad, cw, ch, ss_hor, ss_ver); \
}
cfl_ac_fn( 8, 8, 4, 4, 1, 1, 4)
cfl_ac_fn( 8, 16, 4, 8, 1, 1, 5)
cfl_ac_fn( 8, 32, 4, 16, 1, 1, 6)
cfl_ac_fn(16, 8, 8, 4, 1, 1, 5)
cfl_ac_fn(16, 16, 8, 8, 1, 1, 6)
cfl_ac_fn(16, 32, 8, 16, 1, 1, 7)
cfl_ac_fn(32, 8, 16, 4, 1, 1, 6)
cfl_ac_fn(32, 16, 16, 8, 1, 1, 7)
cfl_ac_fn(32, 32, 16, 16, 1, 1, 8)
cfl_ac_fn( 8, 4, 4, 4, 1, 0, 4)
cfl_ac_fn( 8, 8, 4, 8, 1, 0, 5)
cfl_ac_fn(16, 4, 8, 4, 1, 0, 5)
cfl_ac_fn(16, 8, 8, 8, 1, 0, 6)
cfl_ac_fn(16, 16, 8, 16, 1, 0, 7)
cfl_ac_fn(32, 8, 16, 8, 1, 0, 7)
cfl_ac_fn(32, 16, 16, 16, 1, 0, 8)
cfl_ac_fn(32, 32, 16, 32, 1, 0, 9)
cfl_ac_fn( 4, 4, 4, 4, 0, 0, 4)
cfl_ac_fn( 4, 8, 4, 8, 0, 0, 5)
cfl_ac_fn( 4, 16, 4, 16, 0, 0, 6)
cfl_ac_fn( 8, 4, 8, 4, 0, 0, 5)
cfl_ac_fn( 8, 8, 8, 8, 0, 0, 6)
cfl_ac_fn( 8, 16, 8, 16, 0, 0, 7)
cfl_ac_fn( 8, 32, 8, 32, 0, 0, 8)
cfl_ac_fn(16, 4, 16, 4, 0, 0, 6)
cfl_ac_fn(16, 8, 16, 8, 0, 0, 7)
cfl_ac_fn(16, 16, 16, 16, 0, 0, 8)
cfl_ac_fn(16, 32, 16, 32, 0, 0, 9)
cfl_ac_fn(32, 8, 32, 8, 0, 0, 8)
cfl_ac_fn(32, 16, 32, 16, 0, 0, 9)
cfl_ac_fn(32, 32, 32, 32, 0, 0, 10)
cfl_ac_fn(420, 1, 1)
cfl_ac_fn(422, 1, 0)
cfl_ac_fn(444, 0, 0)
static void pal_pred_c(pixel *dst, const ptrdiff_t stride,
const uint16_t *const pal, const uint8_t *idx,
......@@ -730,40 +699,9 @@ void bitfn(dav1d_intra_pred_dsp_init)(Dav1dIntraPredDSPContext *const c) {
c->intra_pred[Z3_PRED ] = ipred_z3_c;
c->intra_pred[FILTER_PRED ] = ipred_filter_c;
// cfl functions are split per chroma subsampling type
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][ TX_4X4 ] = cfl_ac_8x8_to_4x4_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_4X8 ] = cfl_ac_8x16_to_4x8_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_4X16 ] = cfl_ac_8x32_to_4x16_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_8X4 ] = cfl_ac_16x8_to_8x4_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][ TX_8X8 ] = cfl_ac_16x16_to_8x8_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_8X16 ] = cfl_ac_16x32_to_8x16_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_16X4 ] = cfl_ac_32x8_to_16x4_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_16X8 ] = cfl_ac_32x16_to_16x8_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][ TX_16X16] = cfl_ac_32x32_to_16x16_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][ TX_4X4 ] = cfl_ac_8x4_to_4x4_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_4X8 ] = cfl_ac_8x8_to_4x8_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_8X4 ] = cfl_ac_16x4_to_8x4_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][ TX_8X8 ] = cfl_ac_16x8_to_8x8_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_8X16 ] = cfl_ac_16x16_to_8x16_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_16X8 ] = cfl_ac_32x8_to_16x8_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][ TX_16X16] = cfl_ac_32x16_to_16x16_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_16X32] = cfl_ac_32x32_to_16x32_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_4X4 ] = cfl_ac_4x4_to_4x4_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_4X8 ] = cfl_ac_4x8_to_4x8_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_4X16 ] = cfl_ac_4x16_to_4x16_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_8X4 ] = cfl_ac_8x4_to_8x4_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_8X8 ] = cfl_ac_8x8_to_8x8_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_8X16 ] = cfl_ac_8x16_to_8x16_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_8X32 ] = cfl_ac_8x32_to_8x32_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_16X4 ] = cfl_ac_16x4_to_16x4_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_16X8 ] = cfl_ac_16x8_to_16x8_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_16X16] = cfl_ac_16x16_to_16x16_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_16X32] = cfl_ac_16x32_to_16x32_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_32X8 ] = cfl_ac_32x8_to_32x8_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_32X16] = cfl_ac_32x16_to_32x16_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_32X32] = cfl_ac_32x32_to_32x32_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = cfl_ac_420_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = cfl_ac_422_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1] = cfl_ac_444_c;
c->cfl_pred[DC_PRED ] = ipred_cfl_c;
c->cfl_pred[DC_128_PRED ] = ipred_cfl_128_c;
......
......@@ -856,10 +856,10 @@ void bytefn(dav1d_recon_b_intra)(Dav1dTileContext *const t, const enum BlockSize
((cw4 << ss_hor) + t_dim->w - 1) & ~(t_dim->w - 1);
const int furthest_b =
((ch4 << ss_ver) + t_dim->h - 1) & ~(t_dim->h - 1);
dsp->ipred.cfl_ac[f->cur.p.p.layout - 1]
[b->uvtx](ac, y_src, f->cur.p.stride[0],
cbw4 - (furthest_r >> ss_hor),
cbh4 - (furthest_b >> ss_ver));
dsp->ipred.cfl_ac[f->cur.p.p.layout - 1](ac, y_src, f->cur.p.stride[0],
cbw4 - (furthest_r >> ss_hor),
cbh4 - (furthest_b >> ss_ver),
cbw4 * 4, cbh4 * 4);
for (int pl = 0; pl < 2; pl++) {
if (!b->cfl_alpha[pl]) continue;
int angle = 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment