Commit a9a09b70 authored by Ronald S. Bultje's avatar Ronald S. Bultje

cdef: simplify sec_taps

Also reduce scope of tables to inside the function where they are used.
parent 7775e15a
......@@ -34,19 +34,6 @@
#include "src/cdef.h"
static const int8_t cdef_directions[8 /* dir */][2 /* pass */] = {
{ -1 * 12 + 1, -2 * 12 + 2 },
{ 0 * 12 + 1, -1 * 12 + 2 },
{ 0 * 12 + 1, 0 * 12 + 2 },
{ 0 * 12 + 1, 1 * 12 + 2 },
{ 1 * 12 + 1, 2 * 12 + 2 },
{ 1 * 12 + 0, 2 * 12 + 1 },
{ 1 * 12 + 0, 2 * 12 + 0 },
{ 1 * 12 + 0, 2 * 12 - 1 }
};
static const uint8_t cdef_pri_taps[2][2] = { { 4, 2 }, { 3, 3 } };
static const uint8_t cdef_sec_taps[2][2] = { { 2, 1 }, { 2, 1 } };
static inline int constrain(const int diff, const int threshold,
const int damping)
{
......@@ -112,12 +99,23 @@ cdef_filter_block_c(pixel *dst, const ptrdiff_t dst_stride,
const int sec_strength, const int dir,
const int damping, const enum CdefEdgeFlags edges)
{
static const int8_t cdef_directions[8 /* dir */][2 /* pass */] = {
{ -1 * 12 + 1, -2 * 12 + 2 },
{ 0 * 12 + 1, -1 * 12 + 2 },
{ 0 * 12 + 1, 0 * 12 + 2 },
{ 0 * 12 + 1, 1 * 12 + 2 },
{ 1 * 12 + 1, 2 * 12 + 2 },
{ 1 * 12 + 0, 2 * 12 + 1 },
{ 1 * 12 + 0, 2 * 12 + 0 },
{ 1 * 12 + 0, 2 * 12 - 1 }
};
static const uint8_t cdef_pri_taps[2][2] = { { 4, 2 }, { 3, 3 } };
static const uint8_t sec_taps[2] = { 2, 1 };
const ptrdiff_t tmp_stride = 12;
assert((w == 4 || w == 8) && (h == 4 || h == 8));
uint16_t tmp_buf[144]; // 12*12 is the maximum value of tmp_stride * (h + 4)
uint16_t *tmp = tmp_buf + 2 * tmp_stride + 2;
const uint8_t *const pri_taps = cdef_pri_taps[(pri_strength >> (BITDEPTH - 8)) & 1];
const uint8_t *const sec_taps = cdef_sec_taps[(pri_strength >> (BITDEPTH - 8)) & 1];
padding(tmp, tmp_stride, dst, dst_stride, left, top, w, h, edges);
......
......@@ -37,7 +37,7 @@ shufw_6543210x: db 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15
shufw_210xxxxx: db 4, 5, 2, 3, 0, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
pw_128: times 2 dw 128
pw_2048: times 2 dw 2048
tap_table: dw 4, 2, 3, 3, 2, 1, 2, 1
tap_table: dw 4, 2, 3, 3, 2, 1
db -1 * 16 + 1, -2 * 16 + 2
db 0 * 16 + 1, -1 * 16 + 2
db 0 * 16 + 1, 0 * 16 + 2
......@@ -371,15 +371,14 @@ cglobal cdef_filter_%1x%2, 4, 9, 16, 2 * 16 + (%2+4)*%3, \
vpbroadcastw m0, xm0 ; pri_strength
vpbroadcastw m1, xm1 ; sec_strength
and prid, 1
and secd, 1
lea tapq, [tap_table]
lea priq, [tapq+priq*4] ; pri_taps
lea secq, [tapq+secq*4+8] ; sec_taps
lea secq, [tapq+8] ; sec_taps
; off1/2/3[k] [6 total] from [tapq+16+(dir+0/2/6)*2+k]
; off1/2/3[k] [6 total] from [tapq+12+(dir+0/2/6)*2+k]
DEFINE_ARGS dst, stride, tap, dir, pri, sec, stride3
mov dird, r6m
lea tapq, [tapq+dirq*2+16]
lea tapq, [tapq+dirq*2+12]
%if %1*%2*2/mmsize > 1
DEFINE_ARGS dst, stride, dir, stk, pri, sec, h, off, k
mov hd, %1*%2*2/mmsize
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment