Commit f9f0bbc1 authored by Steve Lhomme's avatar Steve Lhomme Committed by Jean-Baptiste Kempf

copy: only copy the src_pitch pixels on each line not the whole width

parent 1bea49c2
......@@ -121,8 +121,7 @@ static int Extract( vlc_va_t *va, picture_t *p_picture, uint8_t *data )
pp_plane[i] = (uint8_t*)p_base + image.offsets[i_src_plane];
pi_pitch[i] = image.pitches[i_src_plane];
}
CopyFromYv12( p_picture, pp_plane, pi_pitch, sys->width, sys->height,
&sys->image_cache );
CopyFromYv12( p_picture, pp_plane, pi_pitch, sys->height, &sys->image_cache );
}
else
{
......@@ -135,8 +134,7 @@ static int Extract( vlc_va_t *va, picture_t *p_picture, uint8_t *data )
pp_plane[i] = (uint8_t*)p_base + image.offsets[i];
pi_pitch[i] = image.pitches[i];
}
CopyFromNv12( p_picture, pp_plane, pi_pitch, sys->width, sys->height,
&sys->image_cache );
CopyFromNv12( p_picture, pp_plane, pi_pitch, sys->height, &sys->image_cache );
}
vaUnmapBuffer(sys->hw_ctx.display, image.buf);
......
......@@ -69,7 +69,7 @@ static void copy420YpCbCr8Planar(picture_t *p_pic,
pi_pitch[i] = CVPixelBufferGetBytesPerRowOfPlane(buffer, i);
}
CopyFromNv12ToI420(p_pic, pp_plane, pi_pitch, i_width, i_height);
CopyFromNv12ToI420(p_pic, pp_plane, pi_pitch, i_height);
CVPixelBufferUnlockBaseAddress(buffer, 0);
}
......
......@@ -223,7 +223,7 @@ void CopyOmxPicture( int i_color_format, picture_t *p_pic,
copy_cache_t *p_surface_cache = (copy_cache_t*)p_architecture_specific->data;
uint8_t *ppi_src_pointers[2] = { p_src, p_src + i_src_stride * i_slice_height };
size_t pi_src_strides[2] = { i_src_stride, i_src_stride };
CopyFromNv12( p_pic, ppi_src_pointers, pi_src_strides, i_src_stride, i_slice_height, p_surface_cache );
CopyFromNv12( p_pic, ppi_src_pointers, pi_src_strides, i_slice_height, p_surface_cache );
return;
}
#endif
......
......@@ -264,14 +264,14 @@ static void SSE_SplitUV(uint8_t *dstu, size_t dstu_pitch,
static void SSE_CopyPlane(uint8_t *dst, size_t dst_pitch,
const uint8_t *src, size_t src_pitch,
uint8_t *cache, size_t cache_size,
unsigned width, unsigned height, unsigned cpu)
unsigned height, unsigned cpu)
{
const unsigned w16 = (width+15) & ~15;
const unsigned w16 = (src_pitch+15) & ~15;
const unsigned hstep = cache_size / w16;
assert(hstep > 0);
if (src_pitch == dst_pitch)
memcpy(dst, src, width * height);
memcpy(dst, src, src_pitch * height);
else
for (unsigned y = 0; y < height; y += hstep) {
const unsigned hblock = __MIN(hstep, height - y);
......@@ -279,12 +279,12 @@ static void SSE_CopyPlane(uint8_t *dst, size_t dst_pitch,
/* Copy a bunch of line into our cache */
CopyFromUswc(cache, w16,
src, src_pitch,
width, hblock, cpu);
src_pitch, hblock, cpu);
/* Copy from our cache to the destination */
Copy2d(dst, dst_pitch,
cache, w16,
width, hblock);
src_pitch, hblock);
/* */
src += src_pitch * hblock;
......@@ -296,9 +296,9 @@ static void SSE_SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
uint8_t *dstv, size_t dstv_pitch,
const uint8_t *src, size_t src_pitch,
uint8_t *cache, size_t cache_size,
unsigned width, unsigned height, unsigned cpu)
unsigned height, unsigned cpu)
{
const unsigned w16 = (2*width+15) & ~15;
const unsigned w16 = (2*src_pitch+15) & ~15;
const unsigned hstep = cache_size / w16;
assert(hstep > 0);
......@@ -307,11 +307,11 @@ static void SSE_SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
/* Copy a bunch of line into our cache */
CopyFromUswc(cache, w16, src, src_pitch,
2*width, hblock, cpu);
2*src_pitch, hblock, cpu);
/* Copy from our cache to the destination */
SSE_SplitUV(dstu, dstu_pitch, dstv, dstv_pitch,
cache, w16, width, hblock, cpu);
cache, w16, src_pitch, hblock, cpu);
/* */
src += src_pitch * hblock;
......@@ -322,24 +322,24 @@ static void SSE_SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
static void SSE_CopyFromNv12(picture_t *dst,
uint8_t *src[2], size_t src_pitch[2],
unsigned width, unsigned height,
unsigned height,
copy_cache_t *cache, unsigned cpu)
{
SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
src[0], src_pitch[0],
cache->buffer, cache->size,
width, height, cpu);
height, cpu);
SSE_SplitPlanes(dst->p[2].p_pixels, dst->p[2].i_pitch,
dst->p[1].p_pixels, dst->p[1].i_pitch,
src[1], src_pitch[1],
cache->buffer, cache->size,
(width+1)/2, (height+1)/2, cpu);
(height+1)/2, cpu);
asm volatile ("emms");
}
static void SSE_CopyFromYv12(picture_t *dst,
uint8_t *src[3], size_t src_pitch[3],
unsigned width, unsigned height,
unsigned height,
copy_cache_t *cache, unsigned cpu)
{
for (unsigned n = 0; n < 3; n++) {
......@@ -347,7 +347,7 @@ static void SSE_CopyFromYv12(picture_t *dst,
SSE_CopyPlane(dst->p[n].p_pixels, dst->p[n].i_pitch,
src[n], src_pitch[n],
cache->buffer, cache->size,
(width+d-1)/d, (height+d-1)/d, cpu);
(height+d-1)/d, cpu);
}
asm volatile ("emms");
}
......@@ -355,33 +355,33 @@ static void SSE_CopyFromYv12(picture_t *dst,
static void SSE_CopyFromNv12ToNv12(picture_t *dst,
uint8_t *src[2], size_t src_pitch[2],
unsigned width, unsigned height,
unsigned height,
copy_cache_t *cache, unsigned cpu)
{
SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
src[0], src_pitch[0],
cache->buffer, cache->size,
width, height, cpu);
height, cpu);
SSE_CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
src[1], src_pitch[1],
cache->buffer, cache->size,
width, height/2, cpu);
height/2, cpu);
asm volatile ("emms");
}
static void SSE_CopyFromI420ToNv12(picture_t *dst,
uint8_t *src[2], size_t src_pitch[2],
unsigned width, unsigned height,
unsigned height,
copy_cache_t *cache, unsigned cpu)
{
SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
src[0], src_pitch[0],
cache->buffer, cache->size,
width, height, cpu);
height, cpu);
/* TODO optimise the plane merging */
const unsigned copy_lines = height / 2;
const unsigned copy_pitch = width / 2;
const unsigned copy_pitch = src_pitch[1];
const int i_extra_pitch_uv = dst->p[1].i_pitch - 2 * copy_pitch;
const int i_extra_pitch_u = src_pitch[U_PLANE] - copy_pitch;
......@@ -408,13 +408,13 @@ static void SSE_CopyFromI420ToNv12(picture_t *dst,
static void CopyPlane(uint8_t *dst, size_t dst_pitch,
const uint8_t *src, size_t src_pitch,
unsigned width, unsigned height)
unsigned height)
{
if (src_pitch == dst_pitch)
memcpy(dst, src, width * height);
memcpy(dst, src, src_pitch * height);
else
for (unsigned y = 0; y < height; y++) {
memcpy(dst, src, width);
memcpy(dst, src, src_pitch);
src += src_pitch;
dst += dst_pitch;
}
......@@ -423,10 +423,10 @@ static void CopyPlane(uint8_t *dst, size_t dst_pitch,
static void SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
uint8_t *dstv, size_t dstv_pitch,
const uint8_t *src, size_t src_pitch,
unsigned width, unsigned height)
unsigned height)
{
for (unsigned y = 0; y < height; y++) {
for (unsigned x = 0; x < width; x++) {
for (unsigned x = 0; x < src_pitch; x++) {
dstu[x] = src[2*x+0];
dstv[x] = src[2*x+1];
}
......@@ -437,79 +437,69 @@ static void SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
}
void CopyFromNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
unsigned width, unsigned height,
copy_cache_t *cache)
unsigned height, copy_cache_t *cache)
{
#ifdef CAN_COMPILE_SSE2
unsigned cpu = vlc_CPU();
if (vlc_CPU_SSE2())
return SSE_CopyFromNv12(dst, src, src_pitch, width, height,
return SSE_CopyFromNv12(dst, src, src_pitch, height,
cache, cpu);
#else
(void) cache;
#endif
CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
src[0], src_pitch[0],
width, height);
src[0], src_pitch[0], height);
SplitPlanes(dst->p[2].p_pixels, dst->p[2].i_pitch,
dst->p[1].p_pixels, dst->p[1].i_pitch,
src[1], src_pitch[1],
width/2, height/2);
src[1], src_pitch[1], height/2);
}
void CopyFromNv12ToNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
unsigned width, unsigned height,
copy_cache_t *cache)
unsigned height, copy_cache_t *cache)
{
#ifdef CAN_COMPILE_SSE2
unsigned cpu = vlc_CPU();
if (vlc_CPU_SSE2())
return SSE_CopyFromNv12ToNv12(dst, src, src_pitch, width, height,
return SSE_CopyFromNv12ToNv12(dst, src, src_pitch, height,
cache, cpu);
#else
(void) cache;
#endif
CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
src[0], src_pitch[0],
width, height);
src[0], src_pitch[0], height);
CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
src[1], src_pitch[1],
width, height/2);
src[1], src_pitch[1], height/2);
}
void CopyFromNv12ToI420(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
unsigned width, unsigned height)
unsigned height)
{
CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
src[0], src_pitch[0],
width, height);
src[0], src_pitch[0], height);
SplitPlanes(dst->p[1].p_pixels, dst->p[1].i_pitch,
dst->p[2].p_pixels, dst->p[2].i_pitch,
src[1], src_pitch[1],
width/2, height/2);
src[1], src_pitch[1], height/2);
}
void CopyFromI420ToNv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
unsigned width, unsigned height,
copy_cache_t *cache)
unsigned height, copy_cache_t *cache)
{
#ifdef CAN_COMPILE_SSE2
unsigned cpu = vlc_CPU();
if (vlc_CPU_SSE2())
return SSE_CopyFromI420ToNv12(dst, src, src_pitch, width, height,
return SSE_CopyFromI420ToNv12(dst, src, src_pitch, height,
cache, cpu);
#else
(void) cache;
#endif
CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
src[0], src_pitch[0],
width, height);
src[0], src_pitch[0], height);
const unsigned copy_lines = height / 2;
const unsigned copy_pitch = width / 2;
const unsigned copy_pitch = src_pitch[1];
const int i_extra_pitch_uv = dst->p[1].i_pitch - 2 * copy_pitch;
const int i_extra_pitch_u = src_pitch[U_PLANE] - copy_pitch;
......@@ -533,22 +523,21 @@ void CopyFromI420ToNv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
void CopyFromYv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
unsigned width, unsigned height,
copy_cache_t *cache)
unsigned height, copy_cache_t *cache)
{
#ifdef CAN_COMPILE_SSE2
unsigned cpu = vlc_CPU();
if (vlc_CPU_SSE2())
return SSE_CopyFromYv12(dst, src, src_pitch, width, height,
return SSE_CopyFromYv12(dst, src, src_pitch, height,
cache, cpu);
#else
(void) cache;
#endif
CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
src[0], src_pitch[0], width, height);
src[0], src_pitch[0], height);
CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
src[1], src_pitch[1], width / 2, height / 2);
src[1], src_pitch[1], height / 2);
CopyPlane(dst->p[2].p_pixels, dst->p[2].i_pitch,
src[2], src_pitch[2], width / 2, height / 2);
src[2], src_pitch[2], height / 2);
}
......@@ -36,22 +36,18 @@ void CopyCleanCache(copy_cache_t *cache);
/* Copy planes from NV12 to YV12 */
void CopyFromNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
unsigned width, unsigned height,
copy_cache_t *cache);
unsigned height, copy_cache_t *cache);
/* Copy planes from YV12 to YV12 */
void CopyFromYv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
unsigned width, unsigned height,
copy_cache_t *cache);
unsigned height, copy_cache_t *cache);
void CopyFromNv12ToNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
unsigned width, unsigned height,
copy_cache_t *cache);
unsigned height, copy_cache_t *cache);
void CopyFromNv12ToI420(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
unsigned width, unsigned height);
unsigned height);
void CopyFromI420ToNv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
unsigned width, unsigned height,
copy_cache_t *cache);
unsigned height, copy_cache_t *cache);
#endif
......@@ -89,7 +89,6 @@ static void CVPX_I420(filter_t *p_filter, picture_t *sourcePicture, picture_t *d
}
CopyFromNv12ToI420(destinationPicture, pp_plane, pi_pitch,
sourcePicture->format.i_width,
sourcePicture->format.i_height);
CVPixelBufferUnlockBaseAddress(picsys->pixelBuffer, 0);
......
......@@ -153,8 +153,7 @@ static void D3D11_YUY2(filter_t *p_filter, picture_t *src, picture_t *dst)
+ pitch[1] * src->format.i_height / 2,
};
CopyFromYv12(dst, plane, pitch, src->format.i_width,
src->format.i_height, &sys->cache);
CopyFromYv12(dst, plane, pitch, src->format.i_height, &sys->cache);
} else if (desc.Format == DXGI_FORMAT_NV12) {
uint8_t *plane[2] = {
lock.pData,
......@@ -164,8 +163,7 @@ static void D3D11_YUY2(filter_t *p_filter, picture_t *src, picture_t *dst)
lock.RowPitch,
lock.RowPitch,
};
CopyFromNv12(dst, plane, pitch, src->format.i_width,
src->format.i_height, &sys->cache);
CopyFromNv12(dst, plane, pitch, src->format.i_height, &sys->cache);
} else {
msg_Err(p_filter, "Unsupported D3D11VA conversion from 0x%08X to YV12", desc.Format);
}
......@@ -223,8 +221,7 @@ static void D3D11_NV12(filter_t *p_filter, picture_t *src, picture_t *dst)
lock.RowPitch,
lock.RowPitch,
};
CopyFromNv12ToNv12(dst, plane, pitch, src->format.i_width,
src->format.i_height, &sys->cache);
CopyFromNv12ToNv12(dst, plane, pitch, src->format.i_height, &sys->cache);
} else {
msg_Err(p_filter, "Unsupported D3D11VA conversion from 0x%08X to NV12", desc.Format);
}
......
......@@ -108,8 +108,7 @@ static void DXA9_YV12(filter_t *p_filter, picture_t *src, picture_t *dst)
plane[1] = plane[2];
plane[2] = V;
}
CopyFromYv12(dst, plane, pitch, src->format.i_width,
src->format.i_height, p_copy_cache);
CopyFromYv12(dst, plane, pitch, src->format.i_height, p_copy_cache);
} else if (desc.Format == MAKEFOURCC('N','V','1','2')) {
uint8_t *plane[2] = {
lock.pBits,
......@@ -119,8 +118,7 @@ static void DXA9_YV12(filter_t *p_filter, picture_t *src, picture_t *dst)
lock.Pitch,
lock.Pitch,
};
CopyFromNv12(dst, plane, pitch, src->format.i_width,
src->format.i_height, p_copy_cache);
CopyFromNv12(dst, plane, pitch, src->format.i_height, p_copy_cache);
} else {
msg_Err(p_filter, "Unsupported DXA9 conversion from 0x%08X to YV12", desc.Format);
}
......@@ -153,8 +151,7 @@ static void DXA9_NV12(filter_t *p_filter, picture_t *src, picture_t *dst)
lock.Pitch,
lock.Pitch,
};
CopyFromNv12ToNv12(dst, plane, pitch, src->format.i_width,
src->format.i_height, p_copy_cache);
CopyFromNv12ToNv12(dst, plane, pitch, src->format.i_height, p_copy_cache);
} else {
msg_Err(p_filter, "Unsupported DXA9 conversion from 0x%08X to NV12", desc.Format);
}
......
......@@ -131,7 +131,6 @@ static void I420_YUV( filter_sys_t *p_sys, picture_t *p_src, picture_t *p_dst, b
};
CopyFromI420ToNv12( p_dst, plane, pitch,
p_src->format.i_x_offset + p_src->format.i_visible_width,
p_src->format.i_y_offset + p_src->format.i_visible_height,
&p_sys->cache );
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment