diff --git a/modules/video_output/win32/d3d_dynamic_shader.c b/modules/video_output/win32/d3d_dynamic_shader.c index 132160e8be9c0126d97a6274d8369239993abf9e..e41a818063dadfb5fc7a0c58ca38bc6e404935e2 100644 --- a/modules/video_output/win32/d3d_dynamic_shader.c +++ b/modules/video_output/win32/d3d_dynamic_shader.c @@ -38,7 +38,6 @@ static const char globPixelShaderDefault[] = "\ cbuffer PS_CONSTANT_BUFFER : register(b0)\n\ {\n\ - float4x4 WhitePoint;\n\ float4x4 Colorspace;\n\ float4x4 Primaries;\n\ float Opacity;\n\ @@ -267,7 +266,7 @@ float4 main( PS_INPUT In ) : SV_TARGET\n\ sample = sampleTexture( borderSampler, In.uv );\n\ else\n\ sample = sampleTexture( normalSampler, In.uv );\n\ - float4 rgba = max(mul(mul(sample, WhitePoint), Colorspace),0);\n\ + float4 rgba = max(mul(sample, Colorspace),0);\n\ float opacity = rgba.a * Opacity;\n\ float4 rgb = rgba; rgb.a = 0;\n\ rgb = sourceToLinear(rgb);\n\ diff --git a/modules/video_output/win32/d3d_shaders.c b/modules/video_output/win32/d3d_shaders.c index b1dc10435f7e1b3e605b1680aa8548fb7342c197..527724632eb6ade95874453dea4ca56e695e21bd 100644 --- a/modules/video_output/win32/d3d_shaders.c +++ b/modules/video_output/win32/d3d_shaders.c @@ -283,6 +283,45 @@ bool D3D_UpdateQuadLuminanceScale(d3d_quad_t *quad, float luminanceScale) return true; } +static void MultMat4(FLOAT dst[4*4], const FLOAT left[4*4], const FLOAT right[4*4]) +{ + // Cache the invariants in registers + FLOAT x = left[0*4 + 0]; + FLOAT y = left[0*4 + 1]; + FLOAT z = left[0*4 + 2]; + FLOAT w = left[0*4 + 3]; + // Perform the operation on the first row + dst[0*4 + 0] = (right[0*4 + 0] * x) + (right[1*4 + 0] * y) + (right[2*4 + 0] * z) + (right[3*4 + 0] * w); + dst[0*4 + 1] = (right[0*4 + 1] * x) + (right[1*4 + 1] * y) + (right[2*4 + 1] * z) + (right[3*4 + 1] * w); + dst[0*4 + 2] = (right[0*4 + 2] * x) + (right[1*4 + 2] * y) + (right[2*4 + 2] * z) + (right[3*4 + 2] * w); + dst[0*4 + 3] = (right[0*4 + 3] * x) + (right[1*4 + 3] * y) + (right[2*4 + 3] * z) + (right[3*4 + 3] * w); + // Repeat for all the other rows + x = left[1*4 + 0]; + y = left[1*4 + 1]; + z = left[1*4 + 2]; + w = left[1*4 + 3]; + dst[1*4 + 0] = (right[0*4 + 0] * x) + (right[1*4 + 0] * y) + (right[2*4 + 0] * z) + (right[3*4 + 0] * w); + dst[1*4 + 1] = (right[0*4 + 1] * x) + (right[1*4 + 1] * y) + (right[2*4 + 1] * z) + (right[3*4 + 1] * w); + dst[1*4 + 2] = (right[0*4 + 2] * x) + (right[1*4 + 2] * y) + (right[2*4 + 2] * z) + (right[3*4 + 2] * w); + dst[1*4 + 3] = (right[0*4 + 3] * x) + (right[1*4 + 3] * y) + (right[2*4 + 3] * z) + (right[3*4 + 3] * w); + x = left[2*4 + 0]; + y = left[2*4 + 1]; + z = left[2*4 + 2]; + w = left[2*4 + 3]; + dst[2*4 + 0] = (right[0*4 + 0] * x) + (right[1*4 + 0] * y) + (right[2*4 + 0] * z) + (right[3*4 + 0] * w); + dst[2*4 + 1] = (right[0*4 + 1] * x) + (right[1*4 + 1] * y) + (right[2*4 + 1] * z) + (right[3*4 + 1] * w); + dst[2*4 + 2] = (right[0*4 + 2] * x) + (right[1*4 + 2] * y) + (right[2*4 + 2] * z) + (right[3*4 + 2] * w); + dst[2*4 + 3] = (right[0*4 + 3] * x) + (right[1*4 + 3] * y) + (right[2*4 + 3] * z) + (right[3*4 + 3] * w); + x = left[3*4 + 0]; + y = left[3*4 + 1]; + z = left[3*4 + 2]; + w = left[3*4 + 3]; + dst[3*4 + 0] = (right[0*4 + 0] * x) + (right[1*4 + 0] * y) + (right[2*4 + 0] * z) + (right[3*4 + 0] * w); + dst[3*4 + 1] = (right[0*4 + 1] * x) + (right[1*4 + 1] * y) + (right[2*4 + 1] * z) + (right[3*4 + 1] * w); + dst[3*4 + 2] = (right[0*4 + 2] * x) + (right[1*4 + 2] * y) + (right[2*4 + 2] * z) + (right[3*4 + 2] * w); + dst[3*4 + 3] = (right[0*4 + 3] * x) + (right[1*4 + 3] * y) + (right[2*4 + 3] * z) + (right[3*4 + 3] * w); +} + void D3D_SetupQuad(vlc_object_t *o, const video_format_t *fmt, d3d_quad_t *quad, const display_info_t *displayFormat) { @@ -368,7 +407,8 @@ void D3D_SetupQuad(vlc_object_t *o, const video_format_t *fmt, d3d_quad_t *quad, 0.f, 0.f, 0.f, 1.f, }; - memcpy(quad->shaderConstants->WhitePoint, IDENTITY_4X4, sizeof(quad->shaderConstants->WhitePoint)); + FLOAT WhitePoint[4*4]; + memcpy(WhitePoint, IDENTITY_4X4, sizeof(WhitePoint)); const FLOAT *ppColorspace; if (RGB_src_shader == DxgiIsRGBFormat(displayFormat->pixelFormat)) @@ -378,9 +418,9 @@ void D3D_SetupQuad(vlc_object_t *o, const video_format_t *fmt, d3d_quad_t *quad, else if (RGB_src_shader) { ppColorspace = COLORSPACE_FULL_RGBA_TO_BT601_YUV; - quad->shaderConstants->WhitePoint[0*4 + 3] = -itu_black_level; - quad->shaderConstants->WhitePoint[1*4 + 3] = itu_achromacy; - quad->shaderConstants->WhitePoint[2*4 + 3] = itu_achromacy; + WhitePoint[0*4 + 3] = -itu_black_level; + WhitePoint[1*4 + 3] = itu_achromacy; + WhitePoint[2*4 + 3] = itu_achromacy; } else { @@ -406,13 +446,14 @@ void D3D_SetupQuad(vlc_object_t *o, const video_format_t *fmt, d3d_quad_t *quad, } break; } + /* all matrices work in studio range and output in full range */ - quad->shaderConstants->WhitePoint[0*4 + 3] = -itu_black_level; - quad->shaderConstants->WhitePoint[1*4 + 3] = -itu_achromacy; - quad->shaderConstants->WhitePoint[2*4 + 3] = -itu_achromacy; + WhitePoint[0*4 + 3] = -itu_black_level; + WhitePoint[1*4 + 3] = -itu_achromacy; + WhitePoint[2*4 + 3] = -itu_achromacy; } - memcpy(quad->shaderConstants->Colorspace, ppColorspace, sizeof(quad->shaderConstants->Colorspace)); + MultMat4(quad->shaderConstants->Colorspace, ppColorspace, WhitePoint); if (fmt->primaries != displayFormat->primaries) { diff --git a/modules/video_output/win32/d3d_shaders.h b/modules/video_output/win32/d3d_shaders.h index 2c4c5cd5f16c8ae275c87cfdcf5f8df98391391d..70c90f631a48394f630b24f4b208aee9556f321e 100644 --- a/modules/video_output/win32/d3d_shaders.h +++ b/modules/video_output/win32/d3d_shaders.h @@ -45,14 +45,13 @@ typedef struct { /* structures passed to the pixel shader */ typedef struct { - FLOAT WhitePoint[4*4]; FLOAT Colorspace[4*4]; FLOAT Primaries[4*4]; FLOAT Opacity; FLOAT LuminanceScale; FLOAT BoundaryX; FLOAT BoundaryY; - FLOAT padding[12]; // 256 bytes alignment + FLOAT padding[28]; // 256 bytes alignment } PS_CONSTANT_BUFFER; typedef struct {