Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • videolan/x264
  • EwoutH/x264
  • gramner/x264
  • BugMaster/x264
  • MaskRay/x264
  • thresh/x264
  • tpm/x264
  • wolfired/x264
  • ifb/x264
  • robinstorm/x264
  • ltnokiago/x264
  • janne/x264
  • Kromjunya/x264
  • trisnaayu0596/x264
  • felipegarcia1402/x264
  • coder2004/x264
  • philou/x264
  • walagnatalia/x264
  • DonDiego/x264
  • JHammler/x264
  • qyot27/x264
  • dwbuiten/x264
  • Kagami/x264
  • andriy-andreyev/x264
  • gxw/x264
  • trofi/x264
  • kierank/x264
  • aureliendavid/x264
  • galad/x264
  • roommini/x264
  • ocrete/x264
  • mstorsjo/x264
  • yinsj0116/x264
  • mamonet/x264
  • 1div0/x264
  • ko1265/x264
  • sergiomb2/x264
  • xutongda/x264
  • wenzhiwu/x264
  • arrowd/x264
  • FranceBB/x264
  • ziemek99/x264
  • longervision/x264
  • xopok/x264
  • jbk/x264
  • szatmary/x264
  • pekdon/x264
  • Jiangguyu/x264
  • jrtc27/x264
  • kankanol1/x264
  • gxwLite/x264
  • brad/x264
  • Gc6026/x264
  • jdek/x264
  • appcrash/x264
  • tguillem/x264
  • As/x264
  • wevian/x264
  • wangluls/x264
  • RellikJaeger/x264
  • hum/x264
  • rogerhardiman/x264
  • jankowalski12611/x264
  • zhijie1996/x264
  • yinshiyou/x264
  • Freed-Wu/x264
  • yajcoca/x264
  • bUd/x264
  • chienvannguyen2020/x264
  • nurbinakhatun386/x264
  • Siberiawind/x-264-meson
  • HecaiYuan/x264
  • david.chen/x264
  • Ytsejam76/x264
  • robUx4/x264
  • zhaoshiz/x-264-arm64ec
  • yintong.ustc/x-264-bd-ventana
  • nekobasu/x264
  • Courmisch/x264
  • BD-qjy/x264
  • quink/x264
  • markos/x264
82 results
Show changes
Commits on Source (4)
......@@ -229,6 +229,7 @@ ifneq ($(findstring HAVE_BITDEPTH10 1, $(CONFIG)),)
OBJASM += $(SRCASM_X:%.S=%-10.o)
endif
OBJCHK += tools/checkasm-loongarch.o
endif
endif
......
/*********************************************************************
* Copyright (c) 2022 Loongson Technology Corporation Limited
* Copyright (c) 2022-2024 Loongson Technology Corporation Limited
* Contributed by Xiwei Gu <guxiwei-hf@loongson.cn>
* Shiyou Yin <yinshiyou-hf@loongson.cn>
*
......@@ -31,12 +31,19 @@
*/
#define LML_VERSION_MAJOR 0
#define LML_VERSION_MINOR 2
#define LML_VERSION_MICRO 2
#define LML_VERSION_MINOR 4
#define LML_VERSION_MICRO 0
#define ASM_PREF
#define DEFAULT_ALIGN 5
/*
*============================================================================
* macros for specific projetc, set them as needed.
* Following LoongML macros for your reference.
*============================================================================
*/
.macro function name, align=DEFAULT_ALIGN
.macro endfunc
jirl $r0, $r1, 0x0
......@@ -99,6 +106,39 @@ ASM_PREF\name: ;
#define sp $sp
#define ra $ra
#define fa0 $fa0
#define fa1 $fa1
#define fa2 $fa2
#define fa3 $fa3
#define fa4 $fa4
#define fa5 $fa5
#define fa6 $fa6
#define fa7 $fa7
#define ft0 $ft0
#define ft1 $ft1
#define ft2 $ft2
#define ft3 $ft3
#define ft4 $ft4
#define ft5 $ft5
#define ft6 $ft6
#define ft7 $ft7
#define ft8 $ft8
#define ft9 $ft9
#define ft10 $ft10
#define ft11 $ft11
#define ft12 $ft12
#define ft13 $ft13
#define ft14 $ft14
#define ft15 $ft15
#define fs0 $fs0
#define fs1 $fs1
#define fs2 $fs2
#define fs3 $fs3
#define fs4 $fs4
#define fs5 $fs5
#define fs6 $fs6
#define fs7 $fs7
#define f0 $f0
#define f1 $f1
#define f2 $f2
......@@ -272,18 +312,17 @@ ASM_PREF\name: ;
.endm
/*
* Description : Range each element of vector
* Description : Range element vj[i] to vk[i] ~ vj[i]
* clip: vj > vk ? vj : vk && vj < va ? vj : va
* clip255: vj < 255 ? vj : 255 && vj > 0 ? vj : 0
*/
.macro vclip.h vd, vj, vk, va
vmax.h \vd, \vj, \vk
vmin.h \vd, \vd, \va
.endm
.macro vclip255.w vd, vj
vmaxi.w \vd, \vj, 0
vsat.wu \vd, \vd, 7
.macro vclip.w vd, vj, vk, va
vmax.w \vd, \vj, \vk
vmin.w \vd, \vd, \va
.endm
.macro xvclip.h xd, xj, xk, xa
......@@ -291,6 +330,25 @@ ASM_PREF\name: ;
xvmin.h \xd, \xd, \xa
.endm
.macro xvclip.w xd, xj, xk, xa
xvmax.w \xd, \xj, \xk
xvmin.w \xd, \xd, \xa
.endm
/*
* Description : Range element vj[i] to 0 ~ 255
* clip255: vj < 255 ? vj : 255 && vj > 0 ? vj : 0
*/
.macro vclip255.h vd, vj
vmaxi.h \vd, \vj, 0
vsat.hu \vd, \vd, 7
.endm
.macro vclip255.w vd, vj
vmaxi.w \vd, \vj, 0
vsat.wu \vd, \vd, 7
.endm
.macro xvclip255.h xd, xj
xvmaxi.h \xd, \xj, 0
xvsat.hu \xd, \xd, 7
......
......@@ -1438,6 +1438,9 @@ endfunc_x264
* const Pixel *pix2, intptr_t i_pix2)
*/
function_x264 pixel_sa8d_16x16_lasx
addi.d sp, sp, -8
fst.d f24, sp, 0
slli.d t2, a1, 1
slli.d t3, a3, 1
add.d t4, a1, t2
......@@ -1753,6 +1756,9 @@ function_x264 pixel_sa8d_16x16_lasx
add.d t4, t4, t5
addi.d t4, t4, 2
srli.d a0, t4, 2
fld.d f24, sp, 0
addi.d sp, sp, 8
endfunc_x264
/*
......
......@@ -984,3 +984,248 @@ function_x264 decimate_score64_lsx
jirl $r0, $r1, 0x0
.END_SCORE_64_LSX:
endfunc_x264
/*
* int coeff_level_run16( dctcoef *dct, x264_run_level_t *runlevel )
*/
function_x264 coeff_level_run16_lasx
addi.w t0, zero, 15
xvld xr0, a0, 0
xvldi xr2, 1
xvssrlni.bu.h xr0, xr0, 0
xvpermi.d xr1, xr0, 0xd8
xvsle.bu xr3, xr2, xr1
xvsrlni.b.h xr3, xr3, 4
xvpickve2gr.du t8, xr3, 0
clz.d t1, t8
srai.w t1, t1, 2
sub.w t0, t0, t1 // Index of the first non-zero element starting from the highest bit
st.w t0, a1, 0x00 // Store runlevel->last
addi.d t3, a1, 23
nor t2, zero, zero
addi.d t2, t2, -15
and t3, t3, t2 // runlevel->level
xor t4, t4, t4 // mask
xor t5, t5, t5 // total: number of non-zero elements
addi.w t6, zero, 1 // const 1
.LOOP_COEFF_LEVEL_RUN16_LASX:
slli.w t7, t0, 1
ldx.h t2, a0, t7
st.h t2, t3, 0
addi.d t3, t3, 2
addi.w t5, t5, 1
sll.w t2, t6, t0
or t4, t4, t2
bge zero, t4, .END_COEFF_LEVEL_RUN16_LASX
addi.w t0, t0, -1
slli.w t1, t1, 2
addi.w t1, t1, 4
sll.d t8, t8, t1
clz.d t1, t8
srai.w t1, t1, 2
sub.w t0, t0, t1 // Index of the first non-zero element starting from the highest bit
bge t0, zero, .LOOP_COEFF_LEVEL_RUN16_LASX
.END_COEFF_LEVEL_RUN16_LASX:
st.w t4, a1, 4
move a0, t5
endfunc_x264
function_x264 coeff_level_run15_lasx
addi.w t0, zero, 15
vld vr0, a0, 0
vld vr1, a0, 16
xvldi xr3, 1
vinsgr2vr.h vr1, zero, 7
xvpermi.q xr1, xr0, 0x20
xvssrlni.bu.h xr1, xr1, 0
xvpermi.d xr2, xr1, 0xd8
xvsle.bu xr4, xr3, xr2
xvsrlni.b.h xr4, xr4, 4
xvpickve2gr.du t8, xr4, 0
clz.d t1, t8
srai.w t1, t1, 2
sub.w t0, t0, t1 // Index of the first non-zero element starting from the highest bit
st.w t0, a1, 0x00 // Store runlevel->last
addi.d t3, a1, 23
nor t2, zero, zero
addi.d t2, t2, -15
and t3, t3, t2 // runlevel->level
xor t4, t4, t4 // mask
xor t5, t5, t5 // total: number of non-zero elements
addi.w t6, zero, 1 // const 1
.LOOP_COEFF_LEVEL_RUN15_LASX:
slli.w t7, t0, 1
ldx.h t2, a0, t7
st.h t2, t3, 0
addi.d t3, t3, 2
addi.w t5, t5, 1
sll.w t2, t6, t0
or t4, t4, t2
bge zero, t4, .END_COEFF_LEVEL_RUN15_LASX
addi.w t0, t0, -1
slli.w t1, t1, 2
addi.w t1, t1, 4
sll.d t8, t8, t1
clz.d t1, t8
srai.w t1, t1, 2
sub.w t0, t0, t1 // Index of the first non-zero element starting from the highest bit
bge t0, zero, .LOOP_COEFF_LEVEL_RUN15_LASX
.END_COEFF_LEVEL_RUN15_LASX:
st.w t4, a1, 4
move a0, t5
endfunc_x264
function_x264 coeff_level_run16_lsx
addi.w t0, zero, 15
vld vr0, a0, 0
vld vr1, a0, 16
vldi vr2, 1
vssrlni.bu.h vr0, vr0, 0
vssrlni.bu.h vr1, vr1, 0
vpermi.w vr1, vr0, 0x44
vsle.bu vr3, vr2, vr1
vsrlni.b.h vr3, vr3, 4
vpickve2gr.du t8, vr3, 0
clz.d t1, t8
srai.w t1, t1, 2
sub.w t0, t0, t1 // Index of the first non-zero element starting from the highest bit
st.w t0, a1, 0x00 // Store runlevel->last
addi.d t3, a1, 23
nor t2, zero, zero
addi.d t2, t2, -15
and t3, t3, t2 // runlevel->level
xor t4, t4, t4 // mask
xor t5, t5, t5 // total: number of non-zero elements
addi.w t6, zero, 1 // const 1
.LOOP_COEFF_LEVEL_RUN16_LSX:
slli.w t7, t0, 1
ldx.h t2, a0, t7
st.h t2, t3, 0
addi.d t3, t3, 2
addi.w t5, t5, 1
sll.w t2, t6, t0
or t4, t4, t2
bge zero, t4, .END_COEFF_LEVEL_RUN16_LSX
addi.w t0, t0, -1
slli.w t1, t1, 2
addi.w t1, t1, 4
sll.d t8, t8, t1
clz.d t1, t8
srai.w t1, t1, 2
sub.w t0, t0, t1 // Index of the first non-zero element starting from the highest bit
bge t0, zero, .LOOP_COEFF_LEVEL_RUN16_LSX
.END_COEFF_LEVEL_RUN16_LSX:
st.w t4, a1, 4
move a0, t5
endfunc_x264
function_x264 coeff_level_run15_lsx
addi.w t0, zero, 15
vld vr0, a0, 0
vld vr1, a0, 16
vldi vr2, 1
vinsgr2vr.h vr1, zero, 7
vssrlni.bu.h vr0, vr0, 0
vssrlni.bu.h vr1, vr1, 0
vpermi.w vr1, vr0, 0x44
vsle.bu vr3, vr2, vr1
vsrlni.b.h vr3, vr3, 4
vpickve2gr.du t8, vr3, 0
clz.d t1, t8
srai.w t1, t1, 2
sub.w t0, t0, t1 // Index of the first non-zero element starting from the highest bit
st.w t0, a1, 0x00 // Store runlevel->last
addi.d t3, a1, 23
nor t2, zero, zero
addi.d t2, t2, -15
and t3, t3, t2 // runlevel->level
xor t4, t4, t4 // mask
xor t5, t5, t5 // total: number of non-zero elements
addi.w t6, zero, 1 // const 1
.LOOP_COEFF_LEVEL_RUN15_LSX:
slli.w t7, t0, 1
ldx.h t2, a0, t7
st.h t2, t3, 0
addi.d t3, t3, 2
addi.w t5, t5, 1
sll.w t2, t6, t0
or t4, t4, t2
bge zero, t4, .END_COEFF_LEVEL_RUN15_LSX
addi.w t0, t0, -1
slli.w t1, t1, 2
addi.w t1, t1, 4
sll.d t8, t8, t1
clz.d t1, t8
srai.w t1, t1, 2
sub.w t0, t0, t1 // Index of the first non-zero element starting from the highest bit
bge t0, zero, .LOOP_COEFF_LEVEL_RUN15_LSX
.END_COEFF_LEVEL_RUN15_LSX:
st.w t4, a1, 4
move a0, t5
endfunc_x264
function_x264 coeff_level_run8_lsx
addi.w t0, zero, 15
vld vr0, a0, 0
vxor.v vr1, vr1, vr1
vldi vr2, 1
vssrlni.bu.h vr0, vr0, 0
vpermi.w vr1, vr0, 0x44
vsle.bu vr3, vr2, vr1
vsrlni.b.h vr3, vr3, 4
vpickve2gr.du t8, vr3, 0
clz.d t1, t8
srai.w t1, t1, 2
sub.w t0, t0, t1 // Index of the first non-zero element starting from the highest bit
st.w t0, a1, 0x00 // Store runlevel->last
addi.d t3, a1, 23
nor t2, zero, zero
addi.d t2, t2, -15
and t3, t3, t2 // runlevel->level
xor t4, t4, t4 // mask
xor t5, t5, t5 // total: number of non-zero elements
addi.w t6, zero, 1 // const 1
.LOOP_COEFF_LEVEL_RUN8_LSX:
slli.w t7, t0, 1
ldx.h t2, a0, t7
st.h t2, t3, 0
addi.d t3, t3, 2
addi.w t5, t5, 1
sll.w t2, t6, t0
or t4, t4, t2
bge zero, t4, .END_COEFF_LEVEL_RUN8_LSX
addi.w t0, t0, -1
slli.w t1, t1, 2
addi.w t1, t1, 4
sll.d t8, t8, t1
clz.d t1, t8
srai.w t1, t1, 2
sub.w t0, t0, t1 // Index of the first non-zero element starting from the highest bit
bge t0, zero, .LOOP_COEFF_LEVEL_RUN8_LSX
.END_COEFF_LEVEL_RUN8_LSX:
st.w t4, a1, 4
move a0, t5
endfunc_x264
......@@ -81,4 +81,16 @@ void x264_dequant_8x8_lasx( dctcoef dct[64], int dequant_mf[6][64], int i_qp );
#define x264_dequant_4x4_dc_lasx x264_template(dequant_4x4_dc_lasx)
void x264_dequant_4x4_dc_lasx( dctcoef dct[16], int dequant_mf[6][16], int i_qp );
#define x264_coeff_level_run16_lasx x264_template(coeff_level_run16_lasx)
int x264_coeff_level_run16_lasx( dctcoef *, x264_run_level_t * );
#define x264_coeff_level_run15_lasx x264_template(coeff_level_run15_lasx)
int x264_coeff_level_run15_lasx( dctcoef *, x264_run_level_t * );
#define x264_coeff_level_run16_lsx x264_template(coeff_level_run16_lsx)
int x264_coeff_level_run16_lsx( dctcoef *, x264_run_level_t * );
#define x264_coeff_level_run15_lsx x264_template(coeff_level_run15_lsx)
int x264_coeff_level_run15_lsx( dctcoef *, x264_run_level_t * );
#define x264_coeff_level_run8_lsx x264_template(coeff_level_run8_lsx)
int x264_coeff_level_run8_lsx( dctcoef *, x264_run_level_t * );
#endif/* X264_LOONGARCH_QUANT_H */
......@@ -848,11 +848,17 @@ void x264_quant_init( x264_t *h, uint32_t cpu, x264_quant_function_t *pf )
pf->dequant_4x4 = x264_dequant_4x4_lsx;
pf->dequant_8x8 = x264_dequant_8x8_lsx;
pf->dequant_4x4_dc = x264_dequant_4x4_dc_lsx;
pf->coeff_last4 = x264_coeff_last4_lsx;
pf->coeff_last8 = x264_coeff_last8_lsx;
pf->decimate_score15 = x264_decimate_score15_lsx;
pf->decimate_score16 = x264_decimate_score16_lsx;
pf->decimate_score64 = x264_decimate_score64_lsx;
pf->coeff_last4 = x264_coeff_last4_lsx;
pf->coeff_last8 = x264_coeff_last8_lsx;
pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_lsx;
pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_lsx;
pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_lsx;
pf->coeff_level_run8 = x264_coeff_level_run8_lsx;
pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_lsx;
pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_lsx;
}
if( cpu&X264_CPU_LASX )
{
......@@ -863,6 +869,8 @@ void x264_quant_init( x264_t *h, uint32_t cpu, x264_quant_function_t *pf )
pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_lasx;
pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_lasx;
pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_lasx;
pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_lasx;
pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_lasx;
}
#endif
......
/****************************************************************************
* checkasm-loongarch.S: assembly check tool
*****************************************************************************
* Copyright (C) 2024 x264 project
*
* Authors: Xiwei Gu <guxiwei-hf@loongson.cn>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at licensing@x264.com.
*****************************************************************************/
#include "../common/loongarch/loongson_asm.S"
const register_init, align=3
.quad 0x21f86d66c8ca00ce
.quad 0x75b6ba21077c48ad
.quad 0xed56bb2dcb3c7736
.quad 0x8bda43d3fd1a7e06
.quad 0xb64a9c9e5d318408
.quad 0xdf9a54b303f1d3a3
.quad 0x4a75479abd64e097
.quad 0x249214109d5d1c88
.quad 0x1a1b2550a612b48c
.quad 0x79445c159ce79064
.quad 0x2eed899d5a28ddcd
.quad 0x86b2536fcd8cf636
.quad 0xb0856806085e7943
.quad 0x3f2bf84fc0fcca4e
.quad 0xacbd382dcf5b8de2
.quad 0xd229e1f5b281303f
.quad 0x71aeaff20b095fd9
endconst
const error_message
.asciz "failed to preserve register"
endconst
.text
// max number of args used by any x264 asm function.
#define MAX_ARGS 15
#define CLOBBER_STACK ((8*MAX_ARGS + 15) & ~15)
// Fill dirty data at stack space
function x264_checkasm_stack_clobber
move t0, sp
addi.d t1, zero, CLOBBER_STACK
1:
st.d a0, sp, 0x00
st.d a1, sp, -0x08
addi.d sp, sp, -0x10
addi.d t1, t1, -0x10
blt zero,t1, 1b
move sp, t0
endfunc
#define ARG_STACK ((8*(MAX_ARGS - 8) + 15) & ~15)
function x264_checkasm_call
// Saved s0 - s8, fs0 - fs7
move t4, sp
addi.d sp, sp, -136
st.d s0, sp, 0
st.d s1, sp, 8
st.d s2, sp, 16
st.d s3, sp, 24
st.d s4, sp, 32
st.d s5, sp, 40
st.d s6, sp, 48
st.d s7, sp, 56
st.d s8, sp, 64
fst.d fs0, sp, 72
fst.d fs1, sp, 80
fst.d fs2, sp, 88
fst.d fs3, sp, 96
fst.d fs4, sp, 104
fst.d fs5, sp, 112
fst.d fs6, sp, 120
fst.d fs7, sp, 128
la.local t1, register_init
ld.d s0, t1, 0
ld.d s1, t1, 8
ld.d s2, t1, 16
ld.d s3, t1, 24
ld.d s4, t1, 32
ld.d s5, t1, 40
ld.d s6, t1, 48
ld.d s7, t1, 56
ld.d s8, t1, 64
fld.d fs0, t1, 72
fld.d fs1, t1, 80
fld.d fs2, t1, 88
fld.d fs3, t1, 96
fld.d fs4, t1, 104
fld.d fs5, t1, 112
fld.d fs6, t1, 120
fld.d fs7, t1, 128
addi.d sp, sp, -16
st.d a1, sp, 0 // ok
st.d ra, sp, 8 // Ret address
addi.d sp, sp, -ARG_STACK
addi.d t0, zero, 8*8
xor t1, t1, t1
.rept MAX_ARGS - 8
// Skip the first 8 args, that are loaded into registers
ldx.d t2, t4, t0
stx.d t2, sp, t1
addi.d t0, t0, 8
addi.d t1, t1, 8
.endr
move t3, a0 // Func
ld.d a0, t4, 0
ld.d a1, t4, 8
ld.d a2, t4, 16
ld.d a3, t4, 24
ld.d a4, t4, 32
ld.d a5, t4, 40
ld.d a6, t4, 48
ld.d a7, t4, 56
jirl ra, t3, 0
addi.d sp, sp, ARG_STACK
ld.d t2, sp, 0 // ok
ld.d ra, sp, 8 // Ret address
addi.d sp, sp, 16
la.local t1, register_init
xor t3, t3, t3
.macro check_reg_gr reg1
ld.d t0, t1, 0
xor t0, $s\reg1, t0
or t3, t3, t0
addi.d t1, t1, 8
.endm
check_reg_gr 0
check_reg_gr 1
check_reg_gr 2
check_reg_gr 3
check_reg_gr 4
check_reg_gr 5
check_reg_gr 6
check_reg_gr 7
check_reg_gr 8
.macro check_reg_fr reg1
ld.d t0, t1, 0
movfr2gr.d t4,$fs\reg1
xor t0, t0, t4
or t3, t3, t0
addi.d t1, t1, 8
.endm
check_reg_fr 0
check_reg_fr 1
check_reg_fr 2
check_reg_fr 3
check_reg_fr 4
check_reg_fr 5
check_reg_fr 6
check_reg_fr 7
beqz t3, 0f
st.d zero,t2, 0x00 // Set OK to 0
la.local a0, error_message
addi.d sp, sp, -8
st.d ra, sp, 0
bl puts
ld.d ra, sp, 0
addi.d sp, sp, 8
0:
ld.d s0, sp, 0
ld.d s1, sp, 8
ld.d s2, sp, 16
ld.d s3, sp, 24
ld.d s4, sp, 32
ld.d s5, sp, 40
ld.d s6, sp, 48
ld.d s7, sp, 56
ld.d s8, sp, 64
fld.d fs0, sp, 72
fld.d fs1, sp, 80
fld.d fs2, sp, 88
fld.d fs3, sp, 96
fld.d fs4, sp, 104
fld.d fs5, sp, 112
fld.d fs6, sp, 120
fld.d fs7, sp, 128
addi.d sp, sp, 136
endfunc
......@@ -274,6 +274,10 @@ intptr_t x264_checkasm_call_noneon( intptr_t (*func)(), int *ok, ... );
intptr_t (*x264_checkasm_call)( intptr_t (*func)(), int *ok, ... ) = x264_checkasm_call_noneon;
#endif
#if ARCH_LOONGARCH
intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... );
#endif
#define call_c1(func,...) func(__VA_ARGS__)
#if HAVE_MMX && ARCH_X86_64
......@@ -300,6 +304,12 @@ void x264_checkasm_stack_clobber( uint64_t clobber, ... );
x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, 0, 0, __VA_ARGS__ ); })
#elif HAVE_MMX || HAVE_ARMV6
#define call_a1(func,...) x264_checkasm_call( (intptr_t(*)())func, &ok, __VA_ARGS__ )
#elif ARCH_LOONGARCH && HAVE_LSX
void x264_checkasm_stack_clobber( uint64_t clobber, ... );
#define call_a1(func,...) ({ \
uint64_t r = (rand() & 0xffff) * 0x0001000100010001ULL; \
x264_checkasm_stack_clobber( r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r ); /* max_args+8 */ \
x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, 0, 0, __VA_ARGS__ ); })
#else
#define call_a1 call_c1
#endif
......