Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • videolan/dav1d
  • ePirat/dav1d
  • magsoft/dav1d
  • chouquette/dav1d
  • shiz/dav1d
  • tdaede/dav1d
  • tmatth/dav1d
  • dwbuiten/dav1d
  • mstorsjo/dav1d
  • janne/dav1d
  • ltrudeau/dav1d
  • rzumer/dav1d
  • lu_zero/dav1d
  • rbultje/dav1d
  • tbr/dav1d
  • thresh/dav1d
  • haasn/dav1d
  • midtskogen/dav1d
  • SmilingWolf/dav1d
  • lotharkript/dav1d
  • jamrial/dav1d
  • barrbrain/dav1d
  • robUx4/dav1d
  • jbk/dav1d
  • skal65535/dav1d
  • tappara/dav1d
  • dalecurtis/dav1d
  • montytyper/dav1d
  • TLaurent/dav1d
  • liwei/dav1d
  • CounterPillow/dav1d
  • rswarbrick-argon/dav1d
  • mjbshaw/dav1d
  • fcartegnie/dav1d
  • jyavenard/dav1d
  • xuefeng/dav1d
  • licao/dav1d
  • FredB/dav1d
  • jn7163/dav1d
  • bherman.aconspart/dav1d
  • anisse/dav1d
  • koda/dav1d
  • mihulet88/dav1d
  • sabdfl/dav1d
  • brion/dav1d
  • tj_davies/dav1d
  • EwoutH/dav1d
  • KyleSiefring/dav1d
  • manass3018/dav1d
  • krish-iyer/dav1d
  • stebler/dav1d
  • hchen1506/dav1d
  • f3ndot/dav1d
  • linkmauve/dav1d
  • malvanos/dav1d
  • rcss/dav1d
  • DonDiego/dav1d
  • ledyba-z/dav1d
  • seiqan2/dav1d
  • t0934812955/dav1d
  • xclaesse/dav1d
  • lynne/dav1d
  • loveingpowellalways/dav1d
  • govind.sharma/dav1d
  • kossh1/dav1d
  • davidandsabrina4ever2014/dav1d
  • abdouseck664/dav1d
  • jennifer.derrick61583/dav1d
  • msaas01925/dav1d
  • akymaster/dav1d
  • sylvestre/dav1d
  • morgan.shenkin/dav1d
  • B3rn4arD/dav1d
  • evzien/dav1d
  • mwozniak/dav1d
  • TompSciGit/dav1d
  • namse/dav1d
  • kkourin/dav1d
  • nico/dav1d
  • galad/dav1d
  • ltnokiago/dav1d
  • mindfreeze/dav1d
  • DmitriySychov/dav1d
  • oddstone/dav1d
  • nasirhemed/dav1d
  • richselwood/dav1d
  • longervision/dav1d
  • kurosu/dav1d
  • heitbaum/dav1d
  • Opiyonag/dav1d
  • salomethirot-arm/dav1d
  • dillmo71/dav1d
  • jwright-arm/dav1d
  • stonef385/dav1d
  • y-guyon/dav1d
  • andrekempe-arm/dav-1-d-reloaded
  • joedrago/dav1d
  • Rtytry/dav1d
  • altanai/dav1d
  • beiluo97/dav1d
  • wtc/dav1d
  • Asilx21/dav1d
  • DarioSucic/dav1d
  • Siberiawind/dav1d
  • edelmirocove17/dav1d
  • Mtndude/dav1d
  • dconrad/dav1d
  • ChildSoap/dav1d
  • kalan5269/dav1d
  • Jolincai/dav1d
  • kawiddoes/dav1d
  • ledyba/dav1d
  • minhhien231186/dav1d
  • beiluo971/dav1d
  • hakantezgoren34/dav1d
  • chigita73/dav1d
  • slomo/dav1d
  • Starbuck5/dav1d
  • jbeich/dav1d
  • berrylcm/dav1d
  • philip584521/dav1d
  • IgorKey/dav1d
  • shekar007/dav1d
  • jdek/dav1d
  • oldsssteveo/dav1d
  • Jingwiw/dav1d
  • vigneshv/dav1d
  • andrey.semashev/dav1d
  • v.cvetkov/dav1d
  • kattmedhatt/dav1d
  • ccawley2011/dav1d
  • rportalez/dav1d
  • Skantes/dav1d
  • arpadpanyik-arm/dav1d
  • asenat/dav1d
  • pcc/dav1d
  • nickg/dav1d
  • BogdanW3/dav1d
  • brad/dav1d
  • MARBEAN2/dav1d
  • yintong.ustc/dav1d
  • cosmin/dav1d
  • kasper93/dav1d
  • HecaiYuan/dav1d
  • jerrytsai569/dav1d
  • ttwuandes/dav1d
  • OctopusET/dav1d
  • maryla-uc/dav1d
  • Un1q32/dav1d
  • pranavk/dav1d
  • twulz/dav1d
  • gianni-r/dav1d
152 results
Show changes
Commits on Source (62)
/build*
/Session.vim
[._]*.swp
*~
tags
.DS_Store
stages:
- build
build-debian:
image: registry.videolan.org:5000/dav1d-debian-unstable:20180927123816
stage: build
tags:
- debian
- amd64
script:
- meson build --buildtype release
- ninja -v -C build include/version.h
- ninja -v -C build
build-win32:
image: registry.videolan.org:5000/dav1d-debian-unstable:20180927123816
stage: build
tags:
- win32
script:
- meson build --buildtype release --cross-file /opt/crossfiles/i686-w64-mingw32.meson
- ninja -v -C build include/version.h
- ninja -v -C build
build-win64:
image: registry.videolan.org:5000/dav1d-debian-unstable:20180927123816
stage: build
tags:
- win64
script:
- meson build --buildtype release --cross-file /opt/crossfiles/x86_64-w64-mingw32.meson
- ninja -v -C build include/version.h
- ninja -v -C build
# Dav1d contribution guide
# dav1d contribution guide
## CoC
The [VideoLAN Code of Conduct](https://wiki.videolan.org/CoC) applies to this project.
## ToDo
Todo list can be found [on the wiki](https://code.videolan.org/videolan/dav1d/wikis/task-list).
## Codebase language
The codebase is developed with the following assumptions:
For the library:
- C language with C99 version, without the VLA or the Complex (*\_\_STDC_NO_COMPLEX__*) features, and without compiler extension,
- asm in .asm files, using the NASM syntax,
- x86 asm in .asm files, using the NASM syntax,
- arm/arm64 in .S files, using the GAS syntax limited to subset llvm 5.0's internal assembler supports,
- no C++ is allowed, whatever the version.
For the tools and utils:
......@@ -44,4 +49,4 @@ Please read [How to Write a Git Commit Message](https://chris.beams.io/posts/git
## Patent license
You need to read and understand the [AV1 patents license](doc/PATENTS), before committing.
You need to read, understand, and agree to the [AV1 patents license](doc/PATENTS), before committing.
......@@ -58,8 +58,9 @@ The [VideoLAN Code of Conduct](https://wiki.videolan.org/CoC) applies to this pr
# Compile
1. Install [Meson](https://mesonbuild.com/)
2. Run `meson build`
1. Install [Meson](https://mesonbuild.com/) (0.47 or higher)
2. Run `meson build --buildtype release`
3. Build with `ninja -C build`
# Support
......
This diff is collapsed.
......@@ -72,4 +72,15 @@ static inline void coef_dump(const coef *buf, const int w, const int h,
}
}
static inline void ac_dump(const int16_t *buf, int w, int h, const char *what)
{
printf("%s\n", what);
while (h--) {
for (int x = 0; x < w; x++)
printf(" %03d", buf[x]);
buf += w;
printf("\n");
}
}
#endif /* __DAV1D_COMMON_DUMP_H__ */
......@@ -37,11 +37,11 @@
#define debug_abort abort
#endif
#define validate_input_or_ret_with_msg(x, r, msg...) \
#define validate_input_or_ret_with_msg(x, r, ...) \
if (!(x)) { \
fprintf(stderr, "Input validation check \'%s\' failed in %s!\n", \
#x, __PRETTY_FUNCTION__); \
fprintf(stderr, msg); \
#x, __func__); \
fprintf(stderr, __VA_ARGS__); \
debug_abort(); \
return r; \
}
......@@ -49,7 +49,7 @@
#define validate_input_or_ret(x, r) \
if (!(x)) { \
fprintf(stderr, "Input validation check \'%s\' failed in %s!\n", \
#x, __PRETTY_FUNCTION__); \
#x, __func__); \
debug_abort(); \
return r; \
}
......
......@@ -80,8 +80,8 @@ DAV1D_API int dav1d_open(Dav1dContext **c_out, const Dav1dSettings *s);
DAV1D_API int dav1d_decode(Dav1dContext *c, Dav1dData *in, Dav1dPicture *out);
/**
* Close decoder instance, free all associated memory.
* Close decoder instance, free all associated memory, and set $c_out to NULL.
*/
DAV1D_API void dav1d_close(Dav1dContext *c);
DAV1D_API void dav1d_close(Dav1dContext **c_out);
#endif /* __DAV1D_H__ */
......@@ -24,20 +24,19 @@
project('dav1d', ['c'],
version: '0.0.1',
default_options: ['c_std=c11'],
default_options: ['c_std=c99'],
meson_version: '>= 0.47.0')
dav1d_src_root = meson.current_source_dir()
cdata = configuration_data()
cdata_asm = configuration_data()
cc = meson.get_compiler('c')
if not meson.is_cross_build()
# On windows, we use a compatibility layer to emulate pthread
if host_machine.system() != 'windows'
thread_dependency = dependency('threads')
else
thread_dependency = cc.find_library('pthread', required: false)
endif
if thread_dependency.found()
cdata.set('HAVE_PTHREAD_H', 1)
thread_dependency = declare_dependency(sources: ['src/win32/thread.c'])
endif
dav1d_inc_dirs = include_directories(['include', 'include/dav1d'])
......@@ -55,12 +54,7 @@ endforeach
#
feature_defines = [
['_REENTRANT', 1], # Define so that reentrant versions of several functions get declared
['_THREAD_SAFE', 1], # Same as _REENTANT for some other OSes
['_GNU_SOURCE', 1], # Enable GNU extensions on systems that have them
['_POSIX_PTHREAD_SEMANTICS', 1], # Enable threading extensions on Solaris
['__EXTENSIONS__', 1], # Enable general extensions on Solaris
['_FILE_OFFSET_BITS', 64], # Define to 64 for large files support
['_POSIX_C_SOURCE', '200112L'], # POSIX.1–2001 (IEEE Std 1003.1-2001)
]
if host_machine.system() == 'windows'
......@@ -69,14 +63,6 @@ if host_machine.system() == 'windows'
['UNICODE', 1], # Define to 1 for Unicode (Wide Chars) APIs
['_UNICODE', 1], # Define to 1 for Unicode (Wide Chars) APIs
['__USE_MINGW_ANSI_STDIO', 1], # Define to force use of MinGW printf
['_ISOC99_SOURCE', 1], # Extensions to ISO C89 from ISO C99
['_ISOC11_SOURCE', 1], # Extensions to ISO C99 from ISO C11
['_POSIX_SOURCE', 1], # IEEE Std 1003.1
['_POSIX_C_SOURCE', '200809L'], #IEEE Std 1003.1
['_XOPEN_SOURCE', 700], # POSIX and XPG 7th edition
['_XOPEN_SOURCE_EXTENDED', 1], # XPG things and X/Open Unix extensions
['_BSD_SOURCE', 1], # ISO C, POSIX, and 4.3BSD things
['_SVID_SOURCE', 1], # ISO C, POSIX, and SVID things
]
endif
......@@ -84,21 +70,59 @@ if not cc.check_header('stdatomic.h')
error('Atomics not supported')
endif
if cc.has_argument('-mpreferred-stack-boundary=5')
stackalign_flag = '-mpreferred-stack-boundary=5'
stackrealign_flag = '-mincoming-stack-boundary=4'
# When cross compiling for win64 gcc refuses to use -mpreferred-stack-boundary
# with a value which isn't 3 or 4. However, when cross compiling with clang, 5 is
# accepted.
elif (host_machine.system() == 'windows' and host_machine.cpu_family() == 'x86_64'
and cc.has_argument('-mpreferred-stack-boundary=4'))
stackalign_flag = '-mpreferred-stack-boundary=4'
stackrealign_flag = '-mincoming-stack-boundary=4'
elif cc.has_argument('-mstack-alignment=32')
stackalign_flag = '-mstack-alignment=32'
stackrealign_flag = '-mstackrealign'
stackalign_flag = []
stackrealign_flag = []
if host_machine.cpu_family().startswith('x86')
if cc.has_argument('-mpreferred-stack-boundary=5')
stackalign_flag = ['-mpreferred-stack-boundary=5']
stackrealign_flag = ['-mincoming-stack-boundary=4']
cdata_asm.set('STACK_ALIGNMENT', 32)
cdata.set('STACK_ALIGNMENT', 32)
elif cc.has_argument('-mpreferred-stack-boundary=4')
stackalign_flag = ['-mpreferred-stack-boundary=4']
stackrealign_flag = ['-mincoming-stack-boundary=4']
cdata_asm.set('STACK_ALIGNMENT', 16)
cdata.set('STACK_ALIGNMENT', 16)
elif cc.has_argument('-mstack-alignment=32')
stackalign_flag = ['-mstack-alignment=32']
stackrealign_flag = ['-mstackrealign']
cdata_asm.set('STACK_ALIGNMENT', 32)
cdata.set('STACK_ALIGNMENT', 32)
else
if host_machine.cpu_family() == 'x86_64'
cdata_asm.set('STACK_ALIGNMENT', 16)
cdata.set('STACK_ALIGNMENT', 16)
else
cdata_asm.set('STACK_ALIGNMENT', 4)
cdata.set('STACK_ALIGNMENT', 4)
endif
endif
endif
if host_machine.cpu_family().startswith('x86')
cdata.set10('ARCH_X86', true)
if host_machine.cpu_family() == 'x86_64'
cdata_asm.set10('ARCH_X86_64', true)
cdata.set10('ARCH_X86_64', true)
cdata_asm.set10('ARCH_X86_32', false)
cdata.set10('ARCH_X86_32', false)
cdata_asm.set10('PIC', true)
else
cdata_asm.set10('ARCH_X86_64', false)
cdata.set10('ARCH_X86_64', false)
cdata_asm.set10('ARCH_X86_32', true)
cdata.set10('ARCH_X86_32', true)
endif
else
error('Failed to specify stack alignment')
cdata.set10('ARCH_X86', false)
cdata.set10('ARCH_X86_64', false)
cdata.set10('ARCH_X86_32', false)
endif
if cc.symbols_have_underscore_prefix()
cdata_asm.set10('PREFIX', true)
endif
if cc.has_argument('-fvisibility=hidden')
......@@ -107,29 +131,43 @@ else
warning('Compiler does not support -fvisibility=hidden, all symbols will be public!')
endif
if cc.has_function('posix_memalign', prefix: '#include <stdlib.h>', args: ['-D_POSIX_C_SOURCE=200112'])
feature_defines += [['POSIX_C_SOURCE', 200112]]
if cc.has_function('posix_memalign', prefix: '#include <stdlib.h>', args: ['-D_POSIX_C_SOURCE=200112L'])
cdata.set('HAVE_POSIX_MEMALIGN', 1)
elif cc.has_function('_aligned_malloc', prefix: '#include <malloc.h>')
cdata.set('HAVE_ALIGNED_MALLOC', 1)
endif
add_project_arguments('-fomit-frame-pointer', '-ffast-math',
language: 'c')
if (get_option('buildtype') != 'debug' and
get_option('buildtype') != 'plain')
add_project_arguments('-fomit-frame-pointer', '-ffast-math',
language: 'c')
endif
warning_flags = [
'-Wundef',
'-Wvla', # should be '-Werror=vla
]
add_project_arguments('-Wall', '-Wundef',
language: 'c')
add_project_arguments(cc.get_supported_arguments(warning_flags), language: 'c')
foreach f : feature_defines
cdata.set(f.get(0), f.get(1))
endforeach
is_asm_enabled = (get_option('build_asm') == true and
host_machine.cpu_family().startswith('x86'))
cdata.set10('HAVE_ASM', is_asm_enabled)
#
# Generate config headers
#
config_h_target = configure_file(output: 'config.h', configuration: cdata)
if is_asm_enabled
config_asm_target = configure_file(output: 'config.asm', output_format: 'nasm', configuration: cdata_asm)
endif
subdir('include')
#
......@@ -149,20 +187,6 @@ libdav1d_tmpl_sources = files(
'src/recon.c'
)
# Build a helper library for each bitdepth
bitdepth_objs = []
foreach bitdepth : dav1d_bitdepths
bitdepth_lib = static_library(
'dav1d_bitdepth_@0@'.format(bitdepth),
libdav1d_tmpl_sources, config_h_target,
include_directories: dav1d_inc_dirs,
c_args: ['-DBITDEPTH=@0@'.format(bitdepth), stackalign_flag],
install: false,
build_by_default: false,
)
bitdepth_objs += bitdepth_lib.extract_all_objects()
endforeach
entrypoints_src = files(
'src/lib.c',
'src/thread_task.c'
......@@ -171,8 +195,9 @@ entrypoints_lib = static_library(
'libdav1dentrypoint',
entrypoints_src,
include_directories: dav1d_inc_dirs,
c_args: [stackrealign_flag],
c_args: stackrealign_flag,
install: false,
build_by_default: false,
)
entrypoints_objs = entrypoints_lib.extract_all_objects()
......@@ -196,12 +221,66 @@ libdav1d_sources = files(
'src/qm.c',
)
if host_machine.system() == 'windows'
libdav1d_sources += files('src/win32/thread.c')
if is_asm_enabled
libdav1d_sources += files(
'src/x86/cpu.c',
)
libdav1d_tmpl_sources += files(
'src/x86/mc_init.c',
)
libdav1d_sources_asm = files(
'src/x86/cpuid.asm',
'src/x86/mc.asm',
)
nasm = find_program('nasm')
if host_machine.system() == 'windows'
nasm_format = 'win'
elif host_machine.system() == 'darwin'
nasm_format = 'macho'
else
nasm_format = 'elf'
endif
if host_machine.cpu_family() == 'x86_64'
nasm_format += '64'
else
nasm_format += '32'
endif
nasm_gen = generator(nasm,
output: '@BASENAME@.obj',
depfile: '@BASENAME@.obj.ndep',
arguments: [
'-f', nasm_format,
'-I', '@CURRENT_SOURCE_DIR@/',
'-MQ', '@OUTPUT@', '-MF', '@DEPFILE@',
'@EXTRA_ARGS@',
'@INPUT@',
'-o', '@OUTPUT@'
])
nasm_objs = nasm_gen.process(libdav1d_sources_asm)
else
nasm_objs = []
endif
# Build a helper library for each bitdepth
bitdepth_objs = []
foreach bitdepth : dav1d_bitdepths
bitdepth_lib = static_library(
'dav1d_bitdepth_@0@'.format(bitdepth),
libdav1d_tmpl_sources, config_h_target,
include_directories: dav1d_inc_dirs,
c_args: ['-DBITDEPTH=@0@'.format(bitdepth)] + stackalign_flag,
install: false,
build_by_default: false,
)
bitdepth_objs += bitdepth_lib.extract_all_objects()
endforeach
libdav1d = library('dav1d',
libdav1d_sources, rev_target,
libdav1d_sources, rev_target, nasm_objs,
version: '0.0.1',
objects: [bitdepth_objs, entrypoints_objs],
include_directories: dav1d_inc_dirs,
......@@ -229,7 +308,8 @@ dav1d_sources = files(
dav1d = executable('dav1d',
dav1d_sources, rev_target,
link_with: libdav1d,
include_directories: [dav1d_inc_dirs, include_directories('tools')]
include_directories: [dav1d_inc_dirs, include_directories('tools')],
install: true,
)
#
......
......@@ -4,3 +4,8 @@ option('bitdepths',
type: 'array',
choices: ['8', '10'],
description: 'Enable only specified bitdepths')
option('build_asm',
type: 'boolean',
value: true,
description: 'Build asm files, if available')
......@@ -25,23 +25,46 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "config.h"
#include <assert.h>
#include <stdlib.h>
#include "common/intops.h"
#include "src/cdef.h"
static const int8_t cdef_directions[8 /* dir */][2 /* pass */][2 /* y, x */] = {
{ { -1, 1 }, { -2, 2 } },
{ { 0, 1 }, { -1, 2 } },
{ { 0, 1 }, { 0, 2 } },
{ { 0, 1 }, { 1, 2 } },
{ { 1, 1 }, { 2, 2 } },
{ { 1, 0 }, { 2, 1 } },
{ { 1, 0 }, { 2, 0 } },
{ { 1, 0 }, { 2, -1 } }
static const int8_t cdef_directions4[8 /* dir */][2 /* pass */] = {
{ -1 * 8 + 1, -2 * 8 + 2 },
{ 0 * 8 + 1, -1 * 8 + 2 },
{ 0 * 8 + 1, 0 * 8 + 2 },
{ 0 * 8 + 1, 1 * 8 + 2 },
{ 1 * 8 + 1, 2 * 8 + 2 },
{ 1 * 8 + 0, 2 * 8 + 1 },
{ 1 * 8 + 0, 2 * 8 + 0 },
{ 1 * 8 + 0, 2 * 8 - 1 }
};
static const int8_t cdef_directions8[8 /* dir */][2 /* pass */] = {
{ -1 * 16 + 1, -2 * 16 + 2 },
{ 0 * 16 + 1, -1 * 16 + 2 },
{ 0 * 16 + 1, 0 * 16 + 2 },
{ 0 * 16 + 1, 1 * 16 + 2 },
{ 1 * 16 + 1, 2 * 16 + 2 },
{ 1 * 16 + 0, 2 * 16 + 1 },
{ 1 * 16 + 0, 2 * 16 + 0 },
{ 1 * 16 + 0, 2 * 16 - 1 }
};
static const uint8_t cdef_pri_taps[2][2] = { { 4, 2 }, { 3, 3 } };
static const uint8_t cdef_sec_taps[2][2] = { { 2, 1 }, { 2, 1 } };
......@@ -78,10 +101,16 @@ static void cdef_filter_block_c(pixel *const dst, const ptrdiff_t dst_stride,
const int sec_strength, const int dir,
const int damping, const enum CdefEdgeFlags edges)
{
const ptrdiff_t tmp_stride = w + 4;
uint16_t tmp[tmp_stride * (h + 4)];
const ptrdiff_t tmp_stride = 16 >> (w == 4);
assert((w == 4 || w == 8) && (h == 4 || h == 8));
uint16_t tmp[192]; // 16*12 is the maximum value of tmp_stride * (h + 4)
uint16_t *tmp2 = tmp + 2 * tmp_stride + 2;
const uint8_t *const pri_taps = cdef_pri_taps[(pri_strength >> (BITDEPTH - 8)) & 1];
const uint8_t *const sec_taps = cdef_sec_taps[(pri_strength >> (BITDEPTH - 8)) & 1];
const int8_t (*cdef_directions)[2];
assert(w == 4 || w == 8);
cdef_directions = w == 4 ? cdef_directions4 : cdef_directions8;
// fill extended input buffer
int x_start = -2, x_end = w + 2, y_start = -2, y_end = h + 2;
......@@ -104,10 +133,10 @@ static void cdef_filter_block_c(pixel *const dst, const ptrdiff_t dst_stride,
}
for (int y = y_start; y < 0; y++)
for (int x = x_start; x < x_end; x++)
tmp[(y + 2) * tmp_stride + (x + 2)] = top[y & 1][x];
tmp2[y * tmp_stride + x] = top[y & 1][x];
for (int y = 0; y < y_end; y++)
for (int x = x_start; x < x_end; x++)
tmp[(y + 2) * tmp_stride + (x + 2)] = dst[y * PXSTRIDE(dst_stride) + x];
tmp2[y * tmp_stride + x] = dst[y * PXSTRIDE(dst_stride) + x];
// run actual filter
for (int y = 0; y < h; y++) {
......@@ -116,23 +145,21 @@ static void cdef_filter_block_c(pixel *const dst, const ptrdiff_t dst_stride,
const int px = dst[y * PXSTRIDE(dst_stride) + x];
int max = px, min = px;
for (int k = 0; k < 2; k++) {
#define extpx(y, x) tmp[((y) + 2) * tmp_stride + ((x) + 2)]
const int8_t *const off1 = cdef_directions[dir][k];
const int p0 = extpx(y + off1[0], x + off1[1]);
const int p1 = extpx(y - off1[0], x - off1[1]);
const int8_t off1 = cdef_directions[dir][k];
const int p0 = tmp2[y * tmp_stride + x + off1];
const int p1 = tmp2[y * tmp_stride + x - off1];
sum += pri_taps[k] * constrain(p0 - px, pri_strength, damping);
sum += pri_taps[k] * constrain(p1 - px, pri_strength, damping);
if (p0 != CDEF_VERY_LARGE) max = imax(p0, max);
if (p1 != CDEF_VERY_LARGE) max = imax(p1, max);
min = imin(p0, min);
min = imin(p1, min);
const int8_t *const off2 = cdef_directions[(dir + 2) & 7][k];
const int s0 = extpx(y + off2[0], x + off2[1]);
const int s1 = extpx(y - off2[0], x - off2[1]);
const int8_t *const off3 = cdef_directions[(dir + 6) & 7][k];
const int s2 = extpx(y + off3[0], x + off3[1]);
const int s3 = extpx(y - off3[0], x - off3[1]);
#undef extpx
const int8_t off2 = cdef_directions[(dir + 2) & 7][k];
const int s0 = tmp2[y * tmp_stride + x + off2];
const int s1 = tmp2[y * tmp_stride + x - off2];
const int8_t off3 = cdef_directions[(dir + 6) & 7][k];
const int s2 = tmp2[y * tmp_stride + x + off3];
const int s3 = tmp2[y * tmp_stride + x - off3];
if (s0 != CDEF_VERY_LARGE) max = imax(s0, max);
if (s1 != CDEF_VERY_LARGE) max = imax(s1, max);
if (s2 != CDEF_VERY_LARGE) max = imax(s2, max);
......
......@@ -850,7 +850,7 @@ static void decode_b(Dav1dTileContext *const t,
!(t->by & (31 >> !f->seq_hdr.sb128)))
{
const int prev_qidx = ts->last_qidx;
const int have_delta_q = f->frame_hdr.delta_q_present &&
const int have_delta_q = f->frame_hdr.delta.q.present &&
(bs != (f->seq_hdr.sb128 ? BS_128x128 : BS_64x64) || !b->skip);
if (have_delta_q) {
int delta_q = msac_decode_symbol_adapt(&ts->msac, ts->cdf.m.delta_q, 4);
......@@ -860,7 +860,7 @@ static void decode_b(Dav1dTileContext *const t,
}
if (delta_q) {
if (msac_decode_bool(&ts->msac, 128 << 7)) delta_q = -delta_q;
delta_q *= 1 << f->frame_hdr.delta_q_res_log2;
delta_q *= 1 << f->frame_hdr.delta.q.res_log2;
}
ts->last_qidx = iclip(ts->last_qidx + delta_q, 1, 255);
if (have_delta_q && DEBUG_BLOCK_INFO)
......@@ -879,20 +879,20 @@ static void decode_b(Dav1dTileContext *const t,
// delta_lf
int8_t prev_delta_lf[4];
memcpy(prev_delta_lf, ts->last_delta_lf, 4);
if (have_delta_q && f->frame_hdr.delta_lf_present) {
const int n_lfs = f->frame_hdr.delta_lf_multi ?
if (have_delta_q && f->frame_hdr.delta.lf.present) {
const int n_lfs = f->frame_hdr.delta.lf.multi ?
f->seq_hdr.layout != DAV1D_PIXEL_LAYOUT_I400 ? 4 : 2 : 1;
for (int i = 0; i < n_lfs; i++) {
int delta_lf = msac_decode_symbol_adapt(&ts->msac,
ts->cdf.m.delta_lf[i + f->frame_hdr.delta_lf_multi], 4);
ts->cdf.m.delta_lf[i + f->frame_hdr.delta.lf.multi], 4);
if (delta_lf == 3) {
const int n_bits = 1 + msac_decode_bools(&ts->msac, 3);
delta_lf = msac_decode_bools(&ts->msac, n_bits) + 1 + (1 << n_bits);
}
if (delta_lf) {
if (msac_decode_bool(&ts->msac, 128 << 7)) delta_lf = -delta_lf;
delta_lf *= 1 << f->frame_hdr.delta_lf_res_log2;
delta_lf *= 1 << f->frame_hdr.delta.lf.res_log2;
}
ts->last_delta_lf[i] = iclip(ts->last_delta_lf[i] + delta_lf, -63, 63);
if (have_delta_q && DEBUG_BLOCK_INFO)
......@@ -2786,8 +2786,8 @@ int submit_frame(Dav1dContext *const c) {
dav1d_thread_picture_ref(out_delayed, &f->cur);
}
f->bw = (f->frame_hdr.width + 3) >> 2;
f->bh = (f->frame_hdr.height + 3) >> 2;
f->bw = ((f->frame_hdr.width + 7) >> 3) << 1;
f->bh = ((f->frame_hdr.height + 7) >> 3) << 1;
f->sb128w = (f->bw + 31) >> 5;
f->sb128h = (f->bh + 31) >> 5;
f->sb_shift = 4 + f->seq_hdr.sb128;
......
......@@ -307,7 +307,7 @@ static inline int get_jnt_comp_ctx(const int order_hint_n_bits,
const int yb4, const int xb4)
{
const unsigned d0 = abs(get_poc_diff(order_hint_n_bits, ref0poc, poc));
const unsigned d1 = abs(get_poc_diff(order_hint_n_bits, ref1poc, poc));
const unsigned d1 = abs(get_poc_diff(order_hint_n_bits, poc, ref1poc));
const int offset = d0 == d1;
const int a_ctx = a->comp_type[xb4] >= COMP_INTER_AVG ||
a->ref[0][xb4] == 6;
......
......@@ -36,24 +36,24 @@
#include "src/ipred.h"
#define sz_grid(l_fn) \
l_fn( 4, 4); \
l_fn( 4, 8); \
l_fn( 4, 16); \
l_fn( 8, 4); \
l_fn( 8, 8); \
l_fn( 8, 16); \
l_fn( 8, 32); \
l_fn(16, 4); \
l_fn(16, 8); \
l_fn(16, 16); \
l_fn(16, 32); \
l_fn(16, 64); \
l_fn(32, 8); \
l_fn(32, 16); \
l_fn(32, 32); \
l_fn(32, 64); \
l_fn(64, 16); \
l_fn(64, 32); \
l_fn( 4, 4) \
l_fn( 4, 8) \
l_fn( 4, 16) \
l_fn( 8, 4) \
l_fn( 8, 8) \
l_fn( 8, 16) \
l_fn( 8, 32) \
l_fn(16, 4) \
l_fn(16, 8) \
l_fn(16, 16) \
l_fn(16, 32) \
l_fn(16, 64) \
l_fn(32, 8) \
l_fn(32, 16) \
l_fn(32, 32) \
l_fn(32, 64) \
l_fn(64, 16) \
l_fn(64, 32) \
l_fn(64, 64)
static __attribute__((noinline)) void
......@@ -99,31 +99,31 @@ static void dc##dir##_##w##x##h##_c(pixel *dst, const ptrdiff_t stride, \
dc_lfn(width, height, top, unsigned dc = width >> 1; \
for (int i = 0; i < width; i++) \
dc += topleft[1 + i]; \
dc >>= sh1); \
dc >>= sh1) \
dc_lfn(width, height, left, unsigned dc = height >> 1; \
for (int i = 0; i < height; i++) \
dc += topleft[-(1 + i)]; \
dc >>= sh2)
dc1d_lfns( 4, 4, 2, 2);
dc1d_lfns( 4, 8, 2, 3);
dc1d_lfns( 4, 16, 2, 4);
dc1d_lfns( 8, 4, 3, 2);
dc1d_lfns( 8, 8, 3, 3);
dc1d_lfns( 8, 16, 3, 4);
dc1d_lfns( 8, 32, 3, 5);
dc1d_lfns(16, 4, 4, 2);
dc1d_lfns(16, 8, 4, 3);
dc1d_lfns(16, 16, 4, 4);
dc1d_lfns(16, 32, 4, 5);
dc1d_lfns(16, 64, 4, 6);
dc1d_lfns(32, 8, 5, 3);
dc1d_lfns(32, 16, 5, 4);
dc1d_lfns(32, 32, 5, 5);
dc1d_lfns(32, 64, 5, 6);
dc1d_lfns(64, 16, 6, 4);
dc1d_lfns(64, 32, 6, 5);
dc1d_lfns(64, 64, 6, 6);
dc1d_lfns( 4, 4, 2, 2)
dc1d_lfns( 4, 8, 2, 3)
dc1d_lfns( 4, 16, 2, 4)
dc1d_lfns( 8, 4, 3, 2)
dc1d_lfns( 8, 8, 3, 3)
dc1d_lfns( 8, 16, 3, 4)
dc1d_lfns( 8, 32, 3, 5)
dc1d_lfns(16, 4, 4, 2)
dc1d_lfns(16, 8, 4, 3)
dc1d_lfns(16, 16, 4, 4)
dc1d_lfns(16, 32, 4, 5)
dc1d_lfns(16, 64, 4, 6)
dc1d_lfns(32, 8, 5, 3)
dc1d_lfns(32, 16, 5, 4)
dc1d_lfns(32, 32, 5, 5)
dc1d_lfns(32, 64, 5, 6)
dc1d_lfns(64, 16, 6, 4)
dc1d_lfns(64, 32, 6, 5)
dc1d_lfns(64, 64, 6, 6)
#define dc2d_lfn(width, height, dc_gen) \
dc_lfn(width, height,, unsigned dc = (width + height) >> 1; \
......@@ -133,30 +133,44 @@ dc_lfn(width, height,, unsigned dc = (width + height) >> 1; \
dc += topleft[-(i + 1)]; \
dc_gen)
dc2d_lfn( 4, 4, dc >>= 3);
dc2d_lfn( 4, 8, dc = iclip_pixel(0x5556 * dc >> 18));
dc2d_lfn( 4, 16, dc = iclip_pixel(0x3334 * dc >> 18));
dc2d_lfn( 8, 4, dc = iclip_pixel(0x5556 * dc >> 18));
dc2d_lfn( 8, 8, dc >>= 4);
dc2d_lfn( 8, 16, dc = iclip_pixel(0x5556 * dc >> 19));
dc2d_lfn( 8, 32, dc = iclip_pixel(0x3334 * dc >> 19));
dc2d_lfn(16, 4, dc = iclip_pixel(0x3334 * dc >> 18));
dc2d_lfn(16, 8, dc = iclip_pixel(0x5556 * dc >> 19));
dc2d_lfn(16, 16, dc >>= 5);
dc2d_lfn(16, 32, dc = iclip_pixel(0x5556 * dc >> 20));
dc2d_lfn(16, 64, dc = iclip_pixel(0x3334 * dc >> 20));
dc2d_lfn(32, 8, dc = iclip_pixel(0x3334 * dc >> 19));
dc2d_lfn(32, 16, dc = iclip_pixel(0x5556 * dc >> 20));
dc2d_lfn(32, 32, dc >>= 6);
dc2d_lfn(32, 64, dc = iclip_pixel(0x5556 * dc >> 21));
dc2d_lfn(64, 16, dc = iclip_pixel(0x3334 * dc >> 20));
dc2d_lfn(64, 32, dc = iclip_pixel(0x5556 * dc >> 21));
dc2d_lfn(64, 64, dc >>= 7);
#if BITDEPTH == 8
#define MULTIPLIER_1x2 0x5556
#define MULTIPLIER_1x4 0x3334
#define BASE_SHIFT 16
#else
#define MULTIPLIER_1x2 0xAAAB
#define MULTIPLIER_1x4 0x6667
#define BASE_SHIFT 17
#endif
dc2d_lfn( 4, 4, dc >>= 3)
dc2d_lfn( 4, 8, dc = iclip_pixel(MULTIPLIER_1x2 * dc >> (BASE_SHIFT + 2)))
dc2d_lfn( 4, 16, dc = iclip_pixel(MULTIPLIER_1x4 * dc >> (BASE_SHIFT + 2)))
dc2d_lfn( 8, 4, dc = iclip_pixel(MULTIPLIER_1x2 * dc >> (BASE_SHIFT + 2)))
dc2d_lfn( 8, 8, dc >>= 4)
dc2d_lfn( 8, 16, dc = iclip_pixel(MULTIPLIER_1x2 * dc >> (BASE_SHIFT + 3)))
dc2d_lfn( 8, 32, dc = iclip_pixel(MULTIPLIER_1x4 * dc >> (BASE_SHIFT + 3)))
dc2d_lfn(16, 4, dc = iclip_pixel(MULTIPLIER_1x4 * dc >> (BASE_SHIFT + 2)))
dc2d_lfn(16, 8, dc = iclip_pixel(MULTIPLIER_1x2 * dc >> (BASE_SHIFT + 3)))
dc2d_lfn(16, 16, dc >>= 5)
dc2d_lfn(16, 32, dc = iclip_pixel(MULTIPLIER_1x2 * dc >> (BASE_SHIFT + 4)))
dc2d_lfn(16, 64, dc = iclip_pixel(MULTIPLIER_1x4 * dc >> (BASE_SHIFT + 4)))
dc2d_lfn(32, 8, dc = iclip_pixel(MULTIPLIER_1x4 * dc >> (BASE_SHIFT + 3)))
dc2d_lfn(32, 16, dc = iclip_pixel(MULTIPLIER_1x2 * dc >> (BASE_SHIFT + 4)))
dc2d_lfn(32, 32, dc >>= 6)
dc2d_lfn(32, 64, dc = iclip_pixel(MULTIPLIER_1x2 * dc >> (BASE_SHIFT + 5)))
dc2d_lfn(64, 16, dc = iclip_pixel(MULTIPLIER_1x4 * dc >> (BASE_SHIFT + 4)))
dc2d_lfn(64, 32, dc = iclip_pixel(MULTIPLIER_1x2 * dc >> (BASE_SHIFT + 5)))
dc2d_lfn(64, 64, dc >>= 7)
#undef MULTIPLIER_1x2
#undef MULTIPLIER_1x4
#undef BASE_SHIFT
#define dc128_lfn(width, height) \
dc_lfn(width, height, 128, const unsigned dc = (1 << BITDEPTH) >> 1)
sz_grid(dc128_lfn);
sz_grid(dc128_lfn)
static __attribute__((noinline)) void
v_c(pixel *dst, const ptrdiff_t stride,
......@@ -175,7 +189,7 @@ static void v_##width##x##height##_##c(pixel *dst, const ptrdiff_t stride, \
v_c(dst, stride, topleft, width, height); \
}
sz_grid(v_lfn);
sz_grid(v_lfn)
static __attribute__((noinline)) void
h_c(pixel *dst, const ptrdiff_t stride,
......@@ -194,7 +208,7 @@ static void h_##width##x##height##_c(pixel *dst, const ptrdiff_t stride, \
h_c(dst, stride, topleft, width, height); \
}
sz_grid(h_lfn);
sz_grid(h_lfn)
static __attribute__((noinline)) void
paeth_c(pixel *dst, const ptrdiff_t stride, const pixel *const tl_ptr,
......@@ -225,7 +239,7 @@ static void paeth_##width##x##height##_c(pixel *dst, const ptrdiff_t stride, \
paeth_c(dst, stride, topleft, width, height); \
}
sz_grid(paeth_lfn);
sz_grid(paeth_lfn)
static const uint8_t sm_weight_arrays[] = {
// Unused, because we always offset by bs, which is at least 2.
......@@ -276,7 +290,7 @@ static void smooth_##width##x##height##_c(pixel *dst, const ptrdiff_t stride, \
smooth_c(dst, stride, topleft, width, height); \
}
sz_grid(smooth_lfn);
sz_grid(smooth_lfn)
static __attribute__((noinline)) void
smooth_v_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft,
......@@ -303,7 +317,7 @@ static void smooth_v_##width##x##height##_c(pixel *dst, const ptrdiff_t stride,
smooth_v_c(dst, stride, topleft, width, height); \
}
sz_grid(smooth_v_lfn);
sz_grid(smooth_v_lfn)
static __attribute__((noinline)) void
smooth_h_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft,
......@@ -330,7 +344,7 @@ static void smooth_h_##width##x##height##_c(pixel *dst, const ptrdiff_t stride,
smooth_h_c(dst, stride, topleft, width, height); \
}
sz_grid(smooth_h_lfn);
sz_grid(smooth_h_lfn)
static const int16_t dr_intra_derivative[90] = {
// More evenly spread out angles and limited to 10-bit
......@@ -454,7 +468,7 @@ z1_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft_in,
angle &= 511;
assert(angle < 90);
const int dx = dr_intra_derivative[angle];
pixel top_out[(width + height) * 2];
pixel top_out[(64 + 64) * 2];
const pixel *top;
int max_base_x;
const int upsample_above = get_upsample(width + height, 90 - angle, is_sm);
......@@ -506,7 +520,7 @@ static void z1_##width##x##height##_c(pixel *dst, const ptrdiff_t stride, \
z1_c(dst, stride, topleft, angle, width, height); \
}
sz_grid(z1_lfn);
sz_grid(z1_lfn)
static __attribute__((noinline)) void
z2_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft_in,
......@@ -519,7 +533,7 @@ z2_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft_in,
const int dx = dr_intra_derivative[180 - angle];
const int upsample_left = get_upsample(width + height, 180 - angle, is_sm);
const int upsample_above = get_upsample(width + height, angle - 90, is_sm);
pixel edge[width * 2 + height * 2 + 1];
pixel edge[64 * 2 + 64 * 2 + 1];
pixel *const topleft = &edge[height * 2];
if (upsample_above) {
......@@ -587,7 +601,7 @@ static void z2_##width##x##height##_c(pixel *dst, const ptrdiff_t stride, \
z2_c(dst, stride, topleft, angle, width, height); \
}
sz_grid(z2_lfn);
sz_grid(z2_lfn)
static __attribute__((noinline)) void
z3_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft_in,
......@@ -597,7 +611,7 @@ z3_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft_in,
angle &= 511;
assert(angle > 180);
const int dy = dr_intra_derivative[270 - angle];
pixel left_out[(width + height) * 2];
pixel left_out[(64 + 64) * 2];
const pixel *left;
int max_base_y;
const int upsample_left = get_upsample(width + height, angle - 180, is_sm);
......@@ -652,7 +666,7 @@ static void z3_##width##x##height##_c(pixel *dst, const ptrdiff_t stride, \
z3_c(dst, stride, topleft, angle, width, height); \
}
sz_grid(z3_lfn);
sz_grid(z3_lfn)
static const int8_t av1_filter_intra_taps[5][8][8] = {
{
......@@ -758,20 +772,20 @@ static void filter_##width##x##height##_c(pixel *const dst, \
filter_intra_c(dst, stride, topleft, filt_idx, width, height); \
}
filter_lfn( 4, 4);
filter_lfn( 8, 4);
filter_lfn(16, 4);
filter_lfn( 4, 8);
filter_lfn( 8, 8);
filter_lfn(16, 8);
filter_lfn(32, 8);
filter_lfn( 4, 16);
filter_lfn( 8, 16);
filter_lfn(16, 16);
filter_lfn(32, 16);
filter_lfn( 8, 32);
filter_lfn(16, 32);
filter_lfn(32, 32);
filter_lfn( 4, 4)
filter_lfn( 8, 4)
filter_lfn(16, 4)
filter_lfn( 4, 8)
filter_lfn( 8, 8)
filter_lfn(16, 8)
filter_lfn(32, 8)
filter_lfn( 4, 16)
filter_lfn( 8, 16)
filter_lfn(16, 16)
filter_lfn(32, 16)
filter_lfn( 8, 32)
filter_lfn(16, 32)
filter_lfn(32, 32)
static __attribute__((noinline)) void
cfl_ac_c(int16_t *ac, const pixel *ypx, const ptrdiff_t stride,
......@@ -800,7 +814,7 @@ cfl_ac_c(int16_t *ac, const pixel *ypx, const ptrdiff_t stride,
ypx += PXSTRIDE(stride) << ss_ver;
}
for (; y < height; y++) {
memcpy(ac, &ac[-32], width * sizeof(*ac));
memcpy(ac, &ac[-width], width * sizeof(*ac));
ac += width;
}
......@@ -830,39 +844,39 @@ static void cfl_ac_##lw##x##lh##_to_##cw##x##ch##_c(int16_t *const ac, \
cfl_ac_c(ac, ypx, stride, w_pad, h_pad, cw, ch, ss_hor, ss_ver, log2sz); \
}
cfl_ac_fn( 8, 8, 4, 4, 1, 1, 4);
cfl_ac_fn( 8, 16, 4, 8, 1, 1, 5);
cfl_ac_fn( 8, 32, 4, 16, 1, 1, 6);
cfl_ac_fn(16, 8, 8, 4, 1, 1, 5);
cfl_ac_fn(16, 16, 8, 8, 1, 1, 6);
cfl_ac_fn(16, 32, 8, 16, 1, 1, 7);
cfl_ac_fn(32, 8, 16, 4, 1, 1, 6);
cfl_ac_fn(32, 16, 16, 8, 1, 1, 7);
cfl_ac_fn(32, 32, 16, 16, 1, 1, 8);
cfl_ac_fn( 8, 4, 4, 4, 1, 0, 4);
cfl_ac_fn( 8, 8, 4, 8, 1, 0, 5);
cfl_ac_fn(16, 4, 8, 4, 1, 0, 5);
cfl_ac_fn(16, 8, 8, 8, 1, 0, 6);
cfl_ac_fn(16, 16, 8, 16, 1, 0, 7);
cfl_ac_fn(32, 8, 16, 8, 1, 0, 7);
cfl_ac_fn(32, 16, 16, 16, 1, 0, 8);
cfl_ac_fn(32, 32, 16, 32, 1, 0, 9);
cfl_ac_fn( 4, 4, 4, 4, 0, 0, 4);
cfl_ac_fn( 4, 8, 4, 8, 0, 0, 5);
cfl_ac_fn( 4, 16, 4, 16, 0, 0, 6);
cfl_ac_fn( 8, 4, 8, 4, 0, 0, 5);
cfl_ac_fn( 8, 8, 8, 8, 0, 0, 6);
cfl_ac_fn( 8, 16, 8, 16, 0, 0, 7);
cfl_ac_fn( 8, 32, 8, 32, 0, 0, 8);
cfl_ac_fn(16, 4, 16, 4, 0, 0, 6);
cfl_ac_fn(16, 8, 16, 8, 0, 0, 7);
cfl_ac_fn(16, 16, 16, 16, 0, 0, 8);
cfl_ac_fn(16, 32, 16, 32, 0, 0, 9);
cfl_ac_fn(32, 8, 32, 8, 0, 0, 8);
cfl_ac_fn(32, 16, 32, 16, 0, 0, 9);
cfl_ac_fn(32, 32, 32, 32, 0, 0, 10);
cfl_ac_fn( 8, 8, 4, 4, 1, 1, 4)
cfl_ac_fn( 8, 16, 4, 8, 1, 1, 5)
cfl_ac_fn( 8, 32, 4, 16, 1, 1, 6)
cfl_ac_fn(16, 8, 8, 4, 1, 1, 5)
cfl_ac_fn(16, 16, 8, 8, 1, 1, 6)
cfl_ac_fn(16, 32, 8, 16, 1, 1, 7)
cfl_ac_fn(32, 8, 16, 4, 1, 1, 6)
cfl_ac_fn(32, 16, 16, 8, 1, 1, 7)
cfl_ac_fn(32, 32, 16, 16, 1, 1, 8)
cfl_ac_fn( 8, 4, 4, 4, 1, 0, 4)
cfl_ac_fn( 8, 8, 4, 8, 1, 0, 5)
cfl_ac_fn(16, 4, 8, 4, 1, 0, 5)
cfl_ac_fn(16, 8, 8, 8, 1, 0, 6)
cfl_ac_fn(16, 16, 8, 16, 1, 0, 7)
cfl_ac_fn(32, 8, 16, 8, 1, 0, 7)
cfl_ac_fn(32, 16, 16, 16, 1, 0, 8)
cfl_ac_fn(32, 32, 16, 32, 1, 0, 9)
cfl_ac_fn( 4, 4, 4, 4, 0, 0, 4)
cfl_ac_fn( 4, 8, 4, 8, 0, 0, 5)
cfl_ac_fn( 4, 16, 4, 16, 0, 0, 6)
cfl_ac_fn( 8, 4, 8, 4, 0, 0, 5)
cfl_ac_fn( 8, 8, 8, 8, 0, 0, 6)
cfl_ac_fn( 8, 16, 8, 16, 0, 0, 7)
cfl_ac_fn( 8, 32, 8, 32, 0, 0, 8)
cfl_ac_fn(16, 4, 16, 4, 0, 0, 6)
cfl_ac_fn(16, 8, 16, 8, 0, 0, 7)
cfl_ac_fn(16, 16, 16, 16, 0, 0, 8)
cfl_ac_fn(16, 32, 16, 32, 0, 0, 9)
cfl_ac_fn(32, 8, 32, 8, 0, 0, 8)
cfl_ac_fn(32, 16, 32, 16, 0, 0, 9)
cfl_ac_fn(32, 32, 32, 32, 0, 0, 10)
static __attribute__((noinline)) void
cfl_pred_c(pixel *dstU, pixel *dstV, const ptrdiff_t stride,
......@@ -896,10 +910,10 @@ static void cfl_pred_##width##xN_c(pixel *const dstU, \
cfl_pred_c(dstU, dstV, stride, ac, dc_pred, alphas, width, height); \
}
cfl_pred_fn( 4);
cfl_pred_fn( 8);
cfl_pred_fn(16);
cfl_pred_fn(32);
cfl_pred_fn( 4)
cfl_pred_fn( 8)
cfl_pred_fn(16)
cfl_pred_fn(32)
static void pal_pred_c(pixel *dst, const ptrdiff_t stride,
const uint16_t *const pal, const uint8_t *idx,
......@@ -979,9 +993,9 @@ void bitfn(dav1d_intra_pred_dsp_init)(Dav1dIntraPredDSPContext *const c) {
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][ TX_4X4 ] = cfl_ac_8x4_to_4x4_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_4X8 ] = cfl_ac_8x8_to_4x8_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_8X4 ] = cfl_ac_16x4_to_8x4_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][ TX_8X8 ] = cfl_ac_16x8_to_8x8_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_8X16] = cfl_ac_16x16_to_8x16_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_8X4 ] = cfl_ac_16x4_to_8x4_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][ TX_8X8 ] = cfl_ac_16x8_to_8x8_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_8X16 ] = cfl_ac_16x16_to_8x16_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_16X8 ] = cfl_ac_32x8_to_16x8_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][ TX_16X16] = cfl_ac_32x16_to_16x16_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_16X32] = cfl_ac_32x32_to_16x32_c;
......
......@@ -40,7 +40,7 @@
* see ipred_prepare.h for more detailed documentation.
*/
#define decl_angular_ipred_fn(name) \
void (name)(pixel *dst, ptrdiff_t stride, const pixel *topleft, int angle);
void (name)(pixel *dst, ptrdiff_t stride, const pixel *topleft, int angle)
typedef decl_angular_ipred_fn(*angular_ipred_fn);
/*
......
......@@ -49,7 +49,9 @@ inv_txfm_add_c(pixel *dst, const ptrdiff_t stride,
{
int i, j;
const ptrdiff_t sh = imin(h, 32), sw = imin(w, 32);
coef tmp[w * h], out[h], in_mem[w];
assert((h >= 4 || h <= 64) && (w >= 4 || w <= 64));
// Maximum value for h and w is 64
coef tmp[4096 /* w * h */], out[64 /* h */], in_mem[64 /* w */];
const int is_rect2 = w * 2 == h || h * 2 == w;
if (w != sw) memset(&in_mem[sw], 0, (w - sw) * sizeof(*in_mem));
......@@ -96,48 +98,48 @@ inv_txfm_add_##type1##_##type2##_##w##x##h##_c(pixel *dst, \
inv_txfm_fn(dct, dct, w, h, shift1, shift2)
#define inv_txfm_fn32(w, h, shift1, shift2) \
inv_txfm_fn64(w, h, shift1, shift2); \
inv_txfm_fn64(w, h, shift1, shift2) \
inv_txfm_fn(identity, identity, w, h, shift1, shift2)
#define inv_txfm_fn16(w, h, shift1, shift2) \
inv_txfm_fn32(w, h, shift1, shift2); \
inv_txfm_fn(adst, dct, w, h, shift1, shift2); \
inv_txfm_fn(dct, adst, w, h, shift1, shift2); \
inv_txfm_fn(adst, adst, w, h, shift1, shift2); \
inv_txfm_fn(dct, flipadst, w, h, shift1, shift2); \
inv_txfm_fn(flipadst, dct, w, h, shift1, shift2); \
inv_txfm_fn(adst, flipadst, w, h, shift1, shift2); \
inv_txfm_fn(flipadst, adst, w, h, shift1, shift2); \
inv_txfm_fn(flipadst, flipadst, w, h, shift1, shift2); \
inv_txfm_fn(identity, dct, w, h, shift1, shift2); \
inv_txfm_fn(dct, identity, w, h, shift1, shift2); \
inv_txfm_fn32(w, h, shift1, shift2) \
inv_txfm_fn(adst, dct, w, h, shift1, shift2) \
inv_txfm_fn(dct, adst, w, h, shift1, shift2) \
inv_txfm_fn(adst, adst, w, h, shift1, shift2) \
inv_txfm_fn(dct, flipadst, w, h, shift1, shift2) \
inv_txfm_fn(flipadst, dct, w, h, shift1, shift2) \
inv_txfm_fn(adst, flipadst, w, h, shift1, shift2) \
inv_txfm_fn(flipadst, adst, w, h, shift1, shift2) \
inv_txfm_fn(flipadst, flipadst, w, h, shift1, shift2) \
inv_txfm_fn(identity, dct, w, h, shift1, shift2) \
inv_txfm_fn(dct, identity, w, h, shift1, shift2) \
#define inv_txfm_fn84(w, h, shift1, shift2) \
inv_txfm_fn16(w, h, shift1, shift2); \
inv_txfm_fn(identity, flipadst, w, h, shift1, shift2); \
inv_txfm_fn(flipadst, identity, w, h, shift1, shift2); \
inv_txfm_fn(identity, adst, w, h, shift1, shift2); \
inv_txfm_fn(adst, identity, w, h, shift1, shift2); \
inv_txfm_fn84( 4, 4, 0, 4);
inv_txfm_fn84( 4, 8, 0, 4);
inv_txfm_fn84( 4, 16, 1, 4);
inv_txfm_fn84( 8, 4, 0, 4);
inv_txfm_fn84( 8, 8, 1, 4);
inv_txfm_fn84( 8, 16, 1, 4);
inv_txfm_fn32( 8, 32, 2, 4);
inv_txfm_fn84(16, 4, 1, 4);
inv_txfm_fn84(16, 8, 1, 4);
inv_txfm_fn16(16, 16, 2, 4);
inv_txfm_fn32(16, 32, 1, 4);
inv_txfm_fn64(16, 64, 2, 4);
inv_txfm_fn32(32, 8, 2, 4);
inv_txfm_fn32(32, 16, 1, 4);
inv_txfm_fn32(32, 32, 2, 4);
inv_txfm_fn64(32, 64, 1, 4);
inv_txfm_fn64(64, 16, 2, 4);
inv_txfm_fn64(64, 32, 1, 4);
inv_txfm_fn64(64, 64, 2, 4);
inv_txfm_fn16(w, h, shift1, shift2) \
inv_txfm_fn(identity, flipadst, w, h, shift1, shift2) \
inv_txfm_fn(flipadst, identity, w, h, shift1, shift2) \
inv_txfm_fn(identity, adst, w, h, shift1, shift2) \
inv_txfm_fn(adst, identity, w, h, shift1, shift2) \
inv_txfm_fn84( 4, 4, 0, 4)
inv_txfm_fn84( 4, 8, 0, 4)
inv_txfm_fn84( 4, 16, 1, 4)
inv_txfm_fn84( 8, 4, 0, 4)
inv_txfm_fn84( 8, 8, 1, 4)
inv_txfm_fn84( 8, 16, 1, 4)
inv_txfm_fn32( 8, 32, 2, 4)
inv_txfm_fn84(16, 4, 1, 4)
inv_txfm_fn84(16, 8, 1, 4)
inv_txfm_fn16(16, 16, 2, 4)
inv_txfm_fn32(16, 32, 1, 4)
inv_txfm_fn64(16, 64, 2, 4)
inv_txfm_fn32(32, 8, 2, 4)
inv_txfm_fn32(32, 16, 1, 4)
inv_txfm_fn32(32, 32, 2, 4)
inv_txfm_fn64(32, 64, 1, 4)
inv_txfm_fn64(64, 16, 2, 4)
inv_txfm_fn64(64, 32, 1, 4)
inv_txfm_fn64(64, 64, 2, 4)
static void inv_txfm_add_wht_wht_4x4_c(pixel *dst, const ptrdiff_t stride,
coef *const coeff, const int eob)
......
......@@ -802,9 +802,9 @@ static void inv_flipadst##sz##_1d(const coef *const in, const ptrdiff_t in_s, \
inv_adst##sz##_1d(in, in_s, &out[(sz - 1) * out_s], -out_s); \
}
flip_inv_adst(4);
flip_inv_adst(8);
flip_inv_adst(16);
flip_inv_adst(4)
flip_inv_adst(8)
flip_inv_adst(16)
#undef flip_inv_adst
......
......@@ -465,11 +465,17 @@ typedef struct Av1FrameHeader {
Av1SegmentationDataSet seg_data;
int lossless[NUM_SEGMENTS], qidx[NUM_SEGMENTS];
} segmentation;
int delta_q_present;
int delta_q_res_log2;
int delta_lf_present;
int delta_lf_res_log2;
int delta_lf_multi;
struct {
struct {
int present;
int res_log2;
} q;
struct {
int present;
int res_log2;
int multi;
} lf;
} delta;
int all_lossless;
struct {
int level_y[2];
......
......@@ -220,12 +220,14 @@ void bytefn(dav1d_loopfilter_sbrow)(const Dav1dFrameContext *const f,
int sby, const int start_of_tile_row)
{
int x, have_left;
// Don't filter outside the frame
const int hy4 = (f->cur.p.p.h + 3) >> 2;
const int have_top = sby > 0;
const int is_sb64 = !f->seq_hdr.sb128;
const int starty4 = (sby & is_sb64) << 4;
const int sbsz = 32 >> is_sb64;
const int sbl2 = 5 - is_sb64;
const int endy4 = starty4 + imin(f->bh - sby * f->sb_step, sbsz);
const int endy4 = starty4 + imin(hy4 - sby * f->sb_step, sbsz);
const int halign = (f->bh + 31) & ~31;
const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
......
......@@ -390,13 +390,13 @@ void dav1d_calc_lf_values(uint8_t (*const lflvl_values)[4][8][2],
dav1d_calc_lf_value(lflvl_values[s][0], 0, hdr->loopfilter.level_y[0],
lf_delta[0], segd ? segd->delta_lf_y_v : 0, mr_deltas);
dav1d_calc_lf_value(lflvl_values[s][1], 0, hdr->loopfilter.level_y[1],
lf_delta[hdr->delta_lf_multi ? 1 : 0],
lf_delta[hdr->delta.lf.multi ? 1 : 0],
segd ? segd->delta_lf_y_h : 0, mr_deltas);
dav1d_calc_lf_value(lflvl_values[s][2], 1, hdr->loopfilter.level_u,
lf_delta[hdr->delta_lf_multi ? 2 : 0],
lf_delta[hdr->delta.lf.multi ? 2 : 0],
segd ? segd->delta_lf_u : 0, mr_deltas);
dav1d_calc_lf_value(lflvl_values[s][3], 1, hdr->loopfilter.level_v,
lf_delta[hdr->delta_lf_multi ? 3 : 0],
lf_delta[hdr->delta.lf.multi ? 3 : 0],
segd ? segd->delta_lf_v : 0, mr_deltas);
}
}