Commit 8e213b47 authored by Rémi Denis-Courmont's avatar Rémi Denis-Courmont

swscale_omap: remove dead OMAP2 (really ARMv6) CPU scaler plugin

This plugin had become useless since the removal of the OMAP
framebuffer video output.
parent 2792431a
......@@ -682,7 +682,7 @@ AC_CHECK_LIB(m,sqrt,[
VLC_ADD_LIBS([compressor headphone_channel_mixer normvol audiobargraph_a speex mono colorthres extract ball],[-lm])
])
AC_CHECK_LIB(m,ceil,[
VLC_ADD_LIBS([access_imem hotkeys mosaic swscale_omap],[-lm])
VLC_ADD_LIBS([access_imem hotkeys mosaic],[-lm])
])
AC_CHECK_LIB(m,exp,[
VLC_ADD_LIBS([gaussianblur],[-lm])
......
......@@ -316,7 +316,6 @@ $Id$
* svcdsub: SVCD subtitles decoder
* svg: a svg renderer module
* swscale: Video scaling filter
* swscale_omap: Video scaling filter for maemo/omap platform
* t140: T.140 text encoder
* taglib: Taglib tags parser and writer
* telepathy: Telepathy Presence information using MissionControl notification
......
......@@ -46,11 +46,6 @@ SOURCES_blendbench = blendbench.c
SOURCES_chain = chain.c
SOURCES_postproc = postproc.c
SOURCES_swscale = swscale.c ../codec/avcodec/chroma.c
SOURCES_swscale_omap = swscale_omap.c \
libswscale_nokia770/arm_jit_swscale.c \
libswscale_nokia770/arm_colorconv.S \
libswscale_nokia770/arm_jit_swscale.h \
libswscale_nokia770/arm_colorconv.h
SOURCES_scene = scene.c
SOURCES_sepia = sepia.c
SOURCES_yuvp = yuvp.c
......
/*
* ARM assembly optimized color format conversion functions
* (YV12 -> YUY2, YV12 -> some custom YUV420 format used by
* Epson graphics chip in Nokia N800)
*
* Copyright (C) 2007 Siarhei Siamashka <ssvb@users.sourceforge.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*/
.text
/*******************************************************/
.align
.global yv12_to_yuy2_line_arm
.func yv12_to_yuy2_line_arm
yv12_to_yuy2_line_arm:
#define DST r0
#define SRC_Y r1
#define SRC_U r2
#define SRC_V r3
#define WIDTH ip
ldr ip, [sp], #0
stmfd sp!, {r4-r8, lr}
#define TMP1 r8
#define TMP2 r12
#define TMP3 lr
bic WIDTH, #1
subs WIDTH, #8
blt 2f
1:
ldrb r4, [SRC_Y], #1
ldrb TMP1, [SRC_U], #1
ldrb TMP2, [SRC_Y], #1
ldrb TMP3, [SRC_V], #1
add r4, r4, TMP1, lsl #8
add r4, r4, TMP2, lsl #16
add r4, r4, TMP3, lsl #24
ldrb r5, [SRC_Y], #1
ldrb TMP1, [SRC_U], #1
ldrb TMP2, [SRC_Y], #1
ldrb TMP3, [SRC_V], #1
add r5, r5, TMP1, lsl #8
add r5, r5, TMP2, lsl #16
add r5, r5, TMP3, lsl #24
ldrb r6, [SRC_Y], #1
ldrb TMP1, [SRC_U], #1
ldrb TMP2, [SRC_Y], #1
ldrb TMP3, [SRC_V], #1
add r6, r6, TMP1, lsl #8
add r6, r6, TMP2, lsl #16
add r6, r6, TMP3, lsl #24
ldrb r7, [SRC_Y], #1
ldrb TMP1, [SRC_U], #1
ldrb TMP2, [SRC_Y], #1
ldrb TMP3, [SRC_V], #1
add r7, r7, TMP1, lsl #8
add r7, r7, TMP2, lsl #16
add r7, r7, TMP3, lsl #24
stmia DST!, {r4-r7}
subs WIDTH, WIDTH, #8
bge 1b
2:
adds WIDTH, WIDTH, #8
ble 4f
3:
ldrb r4, [SRC_Y], #1
ldrb TMP1, [SRC_U], #1
ldrb TMP2, [SRC_Y], #1
ldrb TMP3, [SRC_V], #1
add r4, r4, TMP1, lsl #8
add r4, r4, TMP2, lsl #16
add r4, r4, TMP3, lsl #24
str r4, [DST], #4
subs WIDTH, WIDTH, #2
bgt 3b
4:
ldmfd sp!, {r4-r8, pc}
#undef DST
#undef SRC_Y
#undef SRC_U
#undef SRC_V
#undef WIDTH
#undef TMP1
#undef TMP2
#undef TMP3
.endfunc
/*******************************************************/
#define DST r0
#define SRC_Y r1
#define SRC_U r2
#define WIDTH r3
#define TMP1 r10
#define TMP2 r12
#define TMP3 lr
.macro YUV420_function_template function_name, USE_PLD, USE_ARMV6
.align
.global \function_name
.func \function_name
\function_name:
/* Read information about 4 pixels, convert them to YUV420 and store into 6 bytes using 16-bit writes */
.macro CONVERT_4_PIXELS_MACROBLOCK
ldrb r4, [SRC_Y], #1
ldrb TMP1, [SRC_U], #1
ldrb r5, [SRC_U], #1
ldrb TMP2, [SRC_Y], #1
ldrb r6, [SRC_Y, #1]
ldrb TMP3, [SRC_Y], #2
add r4, r4, TMP1, lsl #8
add r5, r5, TMP2, lsl #8
add r6, r6, TMP3, lsl #8
strh r4, [DST], #2
strh r5, [DST], #2
strh r6, [DST], #2
.endm
.if \USE_ARMV6
.macro CONVERT_8_PIXELS_MACROBLOCK_1 DST_REG1, DST_REG2, FLAG1, FLAG2, PLD_FLAG
.if \FLAG1 == 0
ldrb \DST_REG1, [SRC_U], #1
ldrh TMP1, [SRC_Y], #2
ldrb TMP2, [SRC_U], #1
.endif
.if \FLAG2 == 1
ldrh \DST_REG2, [SRC_Y], #2
.endif
.if \PLD_FLAG == 1
pld [SRC_Y, #48]
.endif
add \DST_REG1, \DST_REG1, TMP1, lsl #8
add \DST_REG1, \DST_REG1, TMP2, lsl #24
.if \FLAG2 == 1
ldrb TMP1, [SRC_U], #1
ldrb TMP2, [SRC_Y], #1
.endif
rev16 \DST_REG1, \DST_REG1
.endm
.macro CONVERT_8_PIXELS_MACROBLOCK_2 DST_REG1, DST_REG2, FLAG1, FLAG2, DUMMY1
.if \FLAG1 == 0
ldrh \DST_REG1, [SRC_Y], #2
ldrb TMP1, [SRC_U], #1
ldrb TMP2, [SRC_Y], #1
.endif
.if \FLAG2 == 1
ldrb \DST_REG2, [SRC_Y], #1
.endif
add \DST_REG1, \DST_REG1, TMP1, lsl #16
add \DST_REG1, \DST_REG1, TMP2, lsl #24
.if \FLAG2 == 1
ldrb TMP1, [SRC_U], #1
ldrh TMP2, [SRC_Y], #2
.endif
rev16 \DST_REG1, \DST_REG1
.endm
.macro CONVERT_8_PIXELS_MACROBLOCK_3 DST_REG1, DST_REG2, FLAG1, FLAG2, DUMMY1
.if \FLAG1 == 0
ldrb \DST_REG1, [SRC_Y], #1
ldrb TMP1, [SRC_U], #1
ldrh TMP2, [SRC_Y], #2
.endif
.if \FLAG2 == 1
ldrb \DST_REG2, [SRC_U], #1
.endif
add \DST_REG1, \DST_REG1, TMP1, lsl #8
add \DST_REG1, \DST_REG1, TMP2, lsl #16
.if \FLAG2 == 1
ldrh TMP1, [SRC_Y], #2
ldrb TMP2, [SRC_U], #1
.endif
rev16 \DST_REG1, \DST_REG1
.endm
.else
/* Prepare the first 32-bit output value for 8 pixels macroblock */
.macro CONVERT_8_PIXELS_MACROBLOCK_1 DST_REG, DUMMY1, DUMMY2, DUMMY3, PLD_FLAG
ldrb \DST_REG, [SRC_Y], #1
ldrb TMP1, [SRC_U], #1
ldrb TMP2, [SRC_U], #1
ldrb TMP3, [SRC_Y], #1
.if \USE_PLD && (\PLD_FLAG == 1)
pld [SRC_Y, #48]
.endif
add \DST_REG, \DST_REG, TMP1, lsl #8
add \DST_REG, \DST_REG, TMP2, lsl #16
add \DST_REG, \DST_REG, TMP3, lsl #24
.endm
/* Prepare the second 32-bit output value for 8 pixels macroblock */
.macro CONVERT_8_PIXELS_MACROBLOCK_2 DST_REG, DUMMY1, DUMMY2, DUMMY3, DUMMY4
ldrb \DST_REG, [SRC_Y, #1]
ldrb TMP1, [SRC_Y], #2
ldrb TMP2, [SRC_Y], #1
ldrb TMP3, [SRC_U], #1
add \DST_REG, \DST_REG, TMP1, lsl #8
add \DST_REG, \DST_REG, TMP2, lsl #16
add \DST_REG, \DST_REG, TMP3, lsl #24
.endm
/* Prepare the third 32-bit output value for 8 pixels macroblock */
.macro CONVERT_8_PIXELS_MACROBLOCK_3 DST_REG, DUMMY1, DUMMY2, DUMMY3, DUMMY4
ldrb \DST_REG, [SRC_U], #1
ldrb TMP1, [SRC_Y], #1
ldrb TMP2, [SRC_Y, #1]
ldrb TMP3, [SRC_Y], #2
add \DST_REG, \DST_REG, TMP1, lsl #8
add \DST_REG, \DST_REG, TMP2, lsl #16
add \DST_REG, \DST_REG, TMP3, lsl #24
.endm
.endif
.if \USE_PLD
pld [SRC_Y]
.endif
stmfd sp!, {r4-r8, r10, lr}
/* Destination buffer should be at least 16-bit aligned, image width should be multiple of 4 */
bic DST, #1
bic WIDTH, #3
/* Ensure 32-bit alignment of the destination buffer */
tst DST, #2
beq 1f
subs WIDTH, #4
blt 6f
CONVERT_4_PIXELS_MACROBLOCK
1:
subs WIDTH, #32
blt 3f
2: /* Convert 32 pixels per loop iteration */
CONVERT_8_PIXELS_MACROBLOCK_1 r4, r6, 0, 1, 1 /* Also do cache preload for SRC_Y */
CONVERT_8_PIXELS_MACROBLOCK_2 r6, r7, 1, 1, 0
CONVERT_8_PIXELS_MACROBLOCK_3 r7, r8, 1, 1, 0
CONVERT_8_PIXELS_MACROBLOCK_1 r8, r5, 1, 1, 0
stmia DST!, {r4, r6, r7, r8}
subs WIDTH, #32
CONVERT_8_PIXELS_MACROBLOCK_2 r5, r6, 1, 1, 0
CONVERT_8_PIXELS_MACROBLOCK_3 r6, r7, 1, 1, 0
CONVERT_8_PIXELS_MACROBLOCK_1 r7, r8, 1, 1, 0
CONVERT_8_PIXELS_MACROBLOCK_2 r8, r4, 1, 1, 0
stmia DST!, {r5, r6, r7, r8}
.if \USE_PLD
/* Do cache preload for SRC_U */
pld [SRC_U, #48]
.endif
CONVERT_8_PIXELS_MACROBLOCK_3 r4, r6, 1, 1, 0
CONVERT_8_PIXELS_MACROBLOCK_1 r6, r7, 1, 1, 0
CONVERT_8_PIXELS_MACROBLOCK_2 r7, r8, 1, 1, 0
CONVERT_8_PIXELS_MACROBLOCK_3 r8, r4, 1, 0, 0
stmia DST!, {r4, r6, r7, r8}
bge 2b
3:
adds WIDTH, WIDTH, #32
ble 6f
subs WIDTH, WIDTH, #8
blt 5f
4: /* Convert remaining pixels processing them 8 per iteration */
CONVERT_8_PIXELS_MACROBLOCK_1 r4, r5, 0, 1, 0
CONVERT_8_PIXELS_MACROBLOCK_2 r5, r6, 1, 1, 0
CONVERT_8_PIXELS_MACROBLOCK_3 r6, r7, 1, 0, 0
stmia DST!, {r4-r6}
subs WIDTH, WIDTH, #8
bge 4b
5: /* Convert the last 4 pixels if needed */
adds WIDTH, WIDTH, #8
ble 6f
CONVERT_4_PIXELS_MACROBLOCK
subs WIDTH, #4
bgt 4b
6: /* Restore all registers and return */
ldmfd sp!, {r4-r8, r10, pc}
.purgem CONVERT_4_PIXELS_MACROBLOCK
.purgem CONVERT_8_PIXELS_MACROBLOCK_1
.purgem CONVERT_8_PIXELS_MACROBLOCK_2
.purgem CONVERT_8_PIXELS_MACROBLOCK_3
#undef DST
#undef SRC_Y
#undef SRC_U
#undef WIDTH
#undef TMP1
#undef TMP2
#undef TMP3
.endfunc
.endm
YUV420_function_template yv12_to_yuv420_line_arm, 0, 0
YUV420_function_template yv12_to_yuv420_line_armv5, 1, 0
YUV420_function_template yv12_to_yuv420_line_armv6, 1, 1
/*
* ARM assembly optimized color format conversion functions
* (YV12 -> YUY2, YV12 -> some custom YUV420 format used by
* Epson graphics chip in Nokia N800)
*
* Copyright (C) 2007 Siarhei Siamashka <ssvb@users.sourceforge.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*/
#ifndef __ARM_COLORCONV_H__
#define __ARM_COLORCONV_H__
#include <stdint.h>
/**
* Convert a line of pixels from YV12 to YUY2 color format
* @param dst - destination buffer for YUY2 pixel data, it should be 32-bit aligned
* @param src_y - pointer to Y plane
* @param src_u - pointer to U plane
* @param src_v - pointer to V plane
* @param w - number of pixels to convert (should be multiple of 2)
*/
void yv12_to_yuy2_line_arm(uint32_t *dst, const uint16_t *src_y, const uint8_t *src_u, const uint8_t *src_v, int w);
/**
* Convert a line of pixels from YV12 to YUV420 color format
* @param dst - destination buffer for YUV420 pixel data, it should be at least 16-bit aligned
* @param src_y - pointer to Y plane
* @param src_c - pointer to chroma plane (U for even lines, V for odd lines)
* @param w - number of pixels to convert (should be multiple of 4)
*/
void yv12_to_yuv420_line_arm(uint16_t *dst, const uint8_t *src_y, const uint8_t *src_c, int w);
/**
* Convert a line of pixels from YV12 to YUV420 color format
* @param dst - destination buffer for YUV420 pixel data, it should be at least 16-bit aligned
* @param src_y - pointer to Y plane
* @param src_c - pointer to chroma plane (U for even lines, V for odd lines)
* @param w - number of pixels to convert (should be multiple of 4)
*/
void yv12_to_yuv420_line_armv5(uint16_t *dst, const uint8_t *src_y, const uint8_t *src_c, int w);
/**
* Convert a line of pixels from YV12 to YUV420 color format
* @param dst - destination buffer for YUV420 pixel data, it should be at least 16-bit aligned
* @param src_y - pointer to Y plane, it should be 16-bit aligned
* @param src_c - pointer to chroma plane (U for even lines, V for odd lines)
* @param w - number of pixels to convert (should be multiple of 4)
*/
void yv12_to_yuv420_line_armv6(uint16_t *dst, const uint16_t *src_y, const uint8_t *src_c, int w);
#endif
/*
* Fast JIT powered scaler for ARM
*
* Copyright (C) 2007 Siarhei Siamashka <ssvb@users.sourceforge.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*/
#include <stdio.h>
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <string.h>
#include "arm_jit_swscale.h"
#include "arm_colorconv.h"
/* Size of cpu instructions cache, we should never exceed it in generated code */
#define INSTRUCTIONS_CACHE_SIZE 32768
/* Supported output formats */
#define FMT_OMAPFB_YUV422 1
#define FMT_OMAPFB_YUV420 2
extern void __clear_cache (char *beg, char *end);
/*
* API is similar to API from ffmpeg libswscale
*/
typedef struct SwsContextArmJit {
int fmt;
int source_w;
int source_h;
int target_w;
int target_h;
uint32_t *codebuffer;
int *linebuffer;
int armv6_is_supported;
} SwsContextArmJit;
//#define JIT_DEBUG
#define INTERPOLATE_COPY_FIRST 0
#define INTERPOLATE_AVERAGE_1_3 1
#define INTERPOLATE_AVERAGE_2_2 2
#define INTERPOLATE_AVERAGE_3_1 3
/**
* Get two nearest pixels from the source image
*
* @todo get rid of the floating point math
*/
static inline int get_pix(int quality, int orig_w, int dest_w, int x, int *p1, int *p2)
{
double offs = ((double)x + 0.5) / (double)dest_w * (double)orig_w;
double dist;
int pix1 = floor(offs - 0.5);
int pix2 = ceil(offs - 0.5);
// Special boundary cases
if (pix1 < 0) {
*p1 = *p2 = 0;
return INTERPOLATE_COPY_FIRST;
}
if (pix2 >= orig_w) {
*p1 = *p2 = orig_w - 1;
return INTERPOLATE_COPY_FIRST;
}
dist = offs - ((double)pix1 + 0.5);
#if 0
if (quality >= 3) {
if (dist > 0.125 && dist < 0.375) {
*p1 = pix1;
*p2 = pix2;
return INTERPOLATE_AVERAGE_3_1;
}
if (dist > 0.625 && dist < 0.875) {
*p1 = pix1;
*p2 = pix2;
return INTERPOLATE_AVERAGE_1_3;
}
}
#endif
if (quality >= 2) {
if (dist > 0.25 && dist < 0.75) {
*p1 = pix1;
*p2 = pix2;
return INTERPOLATE_AVERAGE_2_2;
}
}
if (dist < 0.5) {
*p1 = *p2 = pix1;
return INTERPOLATE_COPY_FIRST;
} else {
*p1 = *p2 = pix2;
return INTERPOLATE_COPY_FIRST;
}
}
static uint32_t *generate_arm_cmd_ldrb_r_r_offs(uint32_t *cmdbuffer, int dstreg, int basereg, int offset)
{
#ifdef JIT_DEBUG
printf("ldrb r%d, [r%d, #%d]\n", dstreg, basereg, offset);
#endif
*cmdbuffer++ = 0xE5D00000 | (basereg << 16) | (dstreg << 12) | (offset);
return cmdbuffer;
}
static uint32_t *generate_arm_cmd_add_r_r_r_lsl(uint32_t *cmdbuffer, int dstreg, int r1, int r2, int r2_shift)
{
#ifdef JIT_DEBUG
printf("add r%d, r%d, r%d, lsl #%d\n", dstreg, r1, r2, r2_shift);
#endif
*cmdbuffer++ = 0xE0800000 | (r1 << 16) | (dstreg << 12) | (r2_shift << 7) | (r2);
return cmdbuffer;
}
static uint32_t *generate_arm_cmd_mov_r_r_lsr(uint32_t *cmdbuffer, int dstreg, int r, int shift)
{
#ifdef JIT_DEBUG
printf("mov r%d, r%d, lsr #%d\n", dstreg, r, shift);
#endif
*cmdbuffer++ = 0xE1A00020 | (dstreg << 12) | (shift << 7) | (r);
return cmdbuffer;
}
/**
* Generation of 32-bit output scaled data
* @param quality - scaling quality level
* @param buf1reg - register that holds a pointer to the buffer with data for the first output byte
* @param buf2reg - register that holds a pointer to the buffer with data for the second output byte
* @param buf3reg - register that holds a pointer to the buffer with data for the third output byte
* @param buf4reg - register that holds a pointer to the buffer with data for the fourth output byte
*/
static uint32_t *generate_32bit_scaled_data_write(
uint32_t *p,
int quality, int orig_w, int dest_w,
int buf1reg, int size1, int offs1,
int buf2reg, int size2, int offs2,
int buf3reg, int size3, int offs3,
int buf4reg, int size4, int offs4)
{
int p1, p2;
int type_y1, type_y2, type_u, type_v;
// First stage: perform data loading
type_y1 = get_pix(quality, orig_w / size1, dest_w / size1, offs1 / size1, &p1, &p2);
if (type_y1 == INTERPOLATE_COPY_FIRST) {
// Special case, no interpolation is needed, so load this data
// directly into destination register
p = generate_arm_cmd_ldrb_r_r_offs(p, 4, buf1reg, p1);
} else {
p = generate_arm_cmd_ldrb_r_r_offs(p, 5, buf1reg, p1);
p = generate_arm_cmd_ldrb_r_r_offs(p, 6, buf1reg, p2);
}
// u
type_u = get_pix(quality, orig_w / size2, dest_w / size2, offs2 / size2, &p1, &p2);
p = generate_arm_cmd_ldrb_r_r_offs(p, 7, buf2reg, p1);
if (type_u != INTERPOLATE_COPY_FIRST) p = generate_arm_cmd_ldrb_r_r_offs(p, 8, buf2reg, p2);
// y2
type_y2 = get_pix(quality, orig_w / size3, dest_w / size3, offs3 / size3, &p1, &p2);
p = generate_arm_cmd_ldrb_r_r_offs(p, 9, buf3reg, p1);
if (type_y2 != INTERPOLATE_COPY_FIRST) p = generate_arm_cmd_ldrb_r_r_offs(p, 10, buf3reg, p2);
// v
type_v = get_pix(quality, orig_w / size4, dest_w / size4, offs4 / size4, &p1, &p2);
p = generate_arm_cmd_ldrb_r_r_offs(p, 11, buf4reg, p1);
if (type_v != INTERPOLATE_COPY_FIRST) p = generate_arm_cmd_ldrb_r_r_offs(p, 12, buf4reg, p2);
// Second stage: perform data shuffling
if (type_y1 == INTERPOLATE_AVERAGE_2_2) {
p = generate_arm_cmd_add_r_r_r_lsl(p, 14, 5, 6, 0);
p = generate_arm_cmd_mov_r_r_lsr(p, 4, 14, 1);