Commit f813285c authored by Henrik Gramner's avatar Henrik Gramner

Shrink dav1d_dr_intra_derivative[]

parent a440af4a
......@@ -422,7 +422,7 @@ static void ipred_z1_c(pixel *dst, const ptrdiff_t stride,
const int enable_intra_edge_filter = angle >> 10;
angle &= 511;
assert(angle < 90);
int dx = dav1d_dr_intra_derivative[angle];
int dx = dav1d_dr_intra_derivative[angle >> 1];
pixel top_out[(64 + 64) * 2];
const pixel *top;
int max_base_x;
......@@ -476,8 +476,8 @@ static void ipred_z2_c(pixel *dst, const ptrdiff_t stride,
const int enable_intra_edge_filter = angle >> 10;
angle &= 511;
assert(angle > 90 && angle < 180);
int dy = dav1d_dr_intra_derivative[angle - 90];
int dx = dav1d_dr_intra_derivative[180 - angle];
int dy = dav1d_dr_intra_derivative[(angle - 90) >> 1];
int dx = dav1d_dr_intra_derivative[(180 - angle) >> 1];
const int upsample_left = enable_intra_edge_filter ?
get_upsample(width + height, 180 - angle, is_sm) : 0;
const int upsample_above = enable_intra_edge_filter ?
......@@ -557,7 +557,7 @@ static void ipred_z3_c(pixel *dst, const ptrdiff_t stride,
const int enable_intra_edge_filter = angle >> 10;
angle &= 511;
assert(angle > 180);
int dy = dav1d_dr_intra_derivative[270 - angle];
int dy = dav1d_dr_intra_derivative[(270 - angle) >> 1];
pixel left_out[(64 + 64) * 2];
const pixel *left;
int max_base_y;
......
......@@ -775,37 +775,36 @@ const uint8_t dav1d_sm_weights[128] = {
7, 6, 6, 5, 5, 4, 4, 4
};
const int16_t dav1d_dr_intra_derivative[90] = {
// More evenly spread out angles and limited to 10-bit
const uint16_t dav1d_dr_intra_derivative[44] = {
// Values that are 0 will never be used
0, 0, 0, // Approx angle
1023, 0, 0, // 3, ...
547, 0, 0, // 6, ...
372, 0, 0, 0, 0, // 9, ...
273, 0, 0, // 14, ...
215, 0, 0, // 17, ...
178, 0, 0, // 20, ...
151, 0, 0, // 23, ... (113 & 203 are base angles)
132, 0, 0, // 26, ...
116, 0, 0, // 29, ...
102, 0, 0, 0, // 32, ...
90, 0, 0, // 36, ...
80, 0, 0, // 39, ...
71, 0, 0, // 42, ...
64, 0, 0, // 45, ... (45 & 135 are base angles)
57, 0, 0, // 48, ...
51, 0, 0, // 51, ...
45, 0, 0, 0, // 54, ...
40, 0, 0, // 58, ...
35, 0, 0, // 61, ...
31, 0, 0, // 64, ...
27, 0, 0, // 67, ... (67 & 157 are base angles)
23, 0, 0, // 70, ...
19, 0, 0, // 73, ...
15, 0, 0, 0, 0, // 76, ...
11, 0, 0, // 81, ...
7, 0, 0, // 84, ...
3, 0, 0, // 87, ...
0, // Angles:
1023, 0, // 3, 93, 183
547, // 6, 96, 186
372, 0, 0, // 9, 99, 189
273, // 14, 104, 194
215, 0, // 17, 107, 197
178, // 20, 110, 200
151, 0, // 23, 113, 203 (113 & 203 are base angles)
132, // 26, 116, 206
116, 0, // 29, 119, 209
102, 0, // 32, 122, 212
90, // 36, 126, 216
80, 0, // 39, 129, 219
71, // 42, 132, 222
64, 0, // 45, 135, 225 (45 & 135 are base angles)
57, // 48, 138, 228
51, 0, // 51, 141, 231
45, 0, // 54, 144, 234
40, // 58, 148, 238
35, 0, // 61, 151, 241
31, // 64, 154, 244
27, 0, // 67, 157, 247 (67 & 157 are base angles)
23, // 70, 160, 250
19, 0, // 73, 163, 253
15, 0, // 76, 166, 256
11, 0, // 81, 171, 261
7, // 84, 174, 264
3 // 87, 177, 267
};
const int8_t ALIGN(dav1d_filter_intra_taps[5][64], 16) = {
......
......@@ -114,7 +114,7 @@ extern const int8_t dav1d_mc_warp_filter[193][8];
extern const int16_t dav1d_resize_filter[64][8];
extern const uint8_t dav1d_sm_weights[128];
extern const int16_t dav1d_dr_intra_derivative[90];
extern const uint16_t dav1d_dr_intra_derivative[44];
extern const int8_t dav1d_filter_intra_taps[5][64];
extern const uint8_t dav1d_obmc_masks[64];
......
......@@ -1308,9 +1308,10 @@ cglobal ipred_z1, 3, 8, 0, dst, stride, tl, w, h, angle, dx, maxbase
inc tlq
movsxd wq, [r6+wq*4]
add wq, r6
movzx dxd, angleb
mov dxd, angled
and dxd, 0x7e
add angled, 165 ; ~90
movzx dxd, word [r7+dxq*2]
movzx dxd, word [r7+dxq]
xor angled, 0x4ff ; d = 90 - angle
vpbroadcastd m3, [pw_512]
vpbroadcastd m4, [pw_62]
......@@ -2130,15 +2131,16 @@ cglobal ipred_z3, 4, 9, 0, dst, stride, tl, w, h, angle, dy, org_w, maxbase
lea r6, [ipred_z3_avx2_table]
tzcnt hd, hm
movifnidn angled, anglem
lea r7, [dr_intra_derivative+90*2]
lea r7, [dr_intra_derivative+45*2-1]
dec tlq
movsxd hq, [r6+hq*4]
sub angled, 180
add hq, r6
movzx dyd, angleb
mov dyd, angled
neg dyd
xor angled, 0x400
neg dyq
movzx dyd, word [r7+dyq*2]
or dyq, ~0x7e
movzx dyd, word [r7+dyq]
vpbroadcastd m3, [pw_512]
vpbroadcastd m4, [pw_62]
vpbroadcastd m5, [pw_64]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment