Initial commit

2020-04-26 18:03:54 -05:00 · 2020-04-26 18:03:54 -05:00 · 8f79b7ce83
commit 8f79b7ce83
parent 49f2368e03
3242 changed files with 1313774 additions and 0 deletions
--- a/3dfx/3dfx_4x1.slangp
+++ b/3dfx/3dfx_4x1.slangp
@ -0,0 +1,18 @@
+shaders = 6
+shader0 = shaders/3dfx_pass_0.slang
+shader1 = shaders/3dfx_pass_1.slang
+shader2 = shaders/3dfx_pass_1.slang
+shader3 = shaders/3dfx_pass_1.slang
+shader4 = shaders/3dfx_pass_1.slang
+shader5 = shaders/3dfx_pass_2.slang
+
+filter_linear0 = true
+filter_linear1 = true
+filter_linear2 = true
+filter_linear3 = true
+filter_linear4 = true 
+
+scale_type_x0 = "source"
+scale_x0 = "1.000000"
+scale_type_y0 = "source"
+scale_y0 = "1.000000"
--- a/3dfx/shaders/3dfx_pass_0.frag
+++ b/3dfx/shaders/3dfx_pass_0.frag
@ -0,0 +1,154 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+
+
+
+float erroredtable[16] = float[16](float(16), float(4), float(13), float(1), float(8), float(12), float(5), float(9), float(14), float(2), float(15), float(3), float(6), float(10), float(7), float(11
+));
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   float LEIFX_LINES;
+}params;
+
+#pragma parameterLEIFX_LINES¡0.050.001.000.01
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+
+
+
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+uniform sampler2D Source;
+
+void main()
+{
+      vec2 res;
+      vec3 outcolor = texture(Source, vTexCoord). rgb;
+ res . x = params . SourceSize . x;
+ res . y = params . SourceSize . y;
+
+      vec2 ditheu = vTexCoord . xy * res . xy;
+
+ ditheu . x = vTexCoord . x * res . x;
+ ditheu . y = vTexCoord . y * res . y;
+
+
+
+
+ int ditdex = int(mod(ditheu . x, 4.0))* 4 + int(mod(ditheu . y, 4.0));
+ vec3 color;
+ vec3 colord;
+ color . r = outcolor . r * 255;
+ color . g = outcolor . g * 255;
+ color . b = outcolor . b * 255;
+ float yeh = 0.0;
+ float ohyes = 0.0;
+
+
+ if(yeh ++ == ditdex)ohyes = erroredtable[0];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[1];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[2];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[3];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[4];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[5];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[6];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[7];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[8];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[9];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[10];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[11];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[12];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[13];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[14];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[15];
+
+
+ ohyes = 17 -(ohyes - 1);
+ ohyes *= 0.5f;
+ ohyes += - 1;
+
+ colord . r = color . r + ohyes;
+ colord . g = color . g +(ohyes / 2);
+ colord . b = color . b + ohyes;
+ outcolor . rgb = colord . rgb * 0.003921568627451;
+
+
+
+
+
+      vec3 why = vec3(1.0);
+      vec3 reduceme = vec3(1.0);
+ float radooct = 32;
+
+ reduceme . r = pow(outcolor . r, why . r);
+ reduceme . r *= radooct;
+ reduceme . r = int(floor(reduceme . r));
+ reduceme . r /= radooct;
+ reduceme . r = pow(reduceme . r, why . r);
+
+ reduceme . g = pow(outcolor . g, why . g);
+ reduceme . g *= radooct * 2;
+ reduceme . g = int(floor(reduceme . g));
+ reduceme . g /= radooct * 2;
+ reduceme . g = pow(reduceme . g, why . g);
+
+ reduceme . b = pow(outcolor . b, why . b);
+ reduceme . b *= radooct;
+ reduceme . b = int(floor(reduceme . b));
+ reduceme . b /= radooct;
+ reduceme . b = pow(reduceme . b, why . b);
+
+ outcolor . rgb = reduceme . rgb;
+
+
+ {
+  float leifx_linegamma =(params . LEIFX_LINES / 10);
+  float horzline1 =(mod(ditheu . y, 2.0));
+  if(horzline1 < 1)leifx_linegamma = 0;
+
+  outcolor . r += leifx_linegamma;
+  outcolor . b += leifx_linegamma;
+ }
+
+   FragColor = vec4(outcolor, 1.0);
+}
--- a/3dfx/shaders/3dfx_pass_0.gsh
+++ b/3dfx/shaders/3dfx_pass_0.gsh
--- a/3dfx/shaders/3dfx_pass_0.ppslang
+++ b/3dfx/shaders/3dfx_pass_0.ppslang
@ -0,0 +1,167 @@
+#version 450
+
+
+
+
+
+
+
+
+
+
+
+float erroredtable[16]= {
+ 16, 4, 13, 1,
+ 8, 12, 5, 9,
+ 14, 2, 15, 3,
+ 6, 10, 7, 11
+};
+
+layout(push_constant)uniform Push
+{
+ vec4 SourceSize;
+ vec4 OriginalSize;
+ vec4 OutputSize;
+ uint FrameCount;
+ float LEIFX_LINES;
+} params;
+
+#pragma parameterLEIFX_LINES¡0.050.001.000.01
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+layout(std140, set = 0, binding = 0)uniform UBO
+{
+ mat4 MVP;
+} global;
+
+
+
+
+#pragma stagevertex
+layout(location = 0)in vec4 Position;
+layout(location = 1)in vec2 TexCoord;
+layout(location = 0)out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stagefragment
+layout(location = 0)in vec2 vTexCoord;
+layout(location = 0)out vec4 FragColor;
+layout(set = 0, binding = 2)uniform sampler2D Source;
+
+void main()
+{
+      vec2 res;
+      vec3 outcolor = texture(Source, vTexCoord). rgb;
+ res . x = params . SourceSize . x;
+ res . y = params . SourceSize . y;
+
+      vec2 ditheu = vTexCoord . xy * res . xy;
+
+ ditheu . x = vTexCoord . x * res . x;
+ ditheu . y = vTexCoord . y * res . y;
+
+
+
+
+ int ditdex = int(mod(ditheu . x, 4.0))* 4 + int(mod(ditheu . y, 4.0));
+ vec3 color;
+ vec3 colord;
+ color . r = outcolor . r * 255;
+ color . g = outcolor . g * 255;
+ color . b = outcolor . b * 255;
+ float yeh = 0.0;
+ float ohyes = 0.0;
+
+
+ if(yeh ++ == ditdex)ohyes = erroredtable[0];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[1];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[2];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[3];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[4];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[5];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[6];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[7];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[8];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[9];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[10];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[11];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[12];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[13];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[14];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[15];
+
+
+ ohyes = 17 -(ohyes - 1);
+ ohyes *= 0.5f;
+ ohyes += - 1;
+
+ colord . r = color . r + ohyes;
+ colord . g = color . g +(ohyes / 2);
+ colord . b = color . b + ohyes;
+ outcolor . rgb = colord . rgb * 0.003921568627451;
+
+
+
+
+
+      vec3 why = vec3(1.0);
+      vec3 reduceme = vec3(1.0);
+ float radooct = 32;
+
+ reduceme . r = pow(outcolor . r, why . r);
+ reduceme . r *= radooct;
+ reduceme . r = int(floor(reduceme . r));
+ reduceme . r /= radooct;
+ reduceme . r = pow(reduceme . r, why . r);
+
+ reduceme . g = pow(outcolor . g, why . g);
+ reduceme . g *= radooct * 2;
+ reduceme . g = int(floor(reduceme . g));
+ reduceme . g /= radooct * 2;
+ reduceme . g = pow(reduceme . g, why . g);
+
+ reduceme . b = pow(outcolor . b, why . b);
+ reduceme . b *= radooct;
+ reduceme . b = int(floor(reduceme . b));
+ reduceme . b /= radooct;
+ reduceme . b = pow(reduceme . b, why . b);
+
+ outcolor . rgb = reduceme . rgb;
+
+
+ {
+  float leifx_linegamma =(params . LEIFX_LINES / 10);
+  float horzline1 =(mod(ditheu . y, 2.0));
+  if(horzline1 < 1)leifx_linegamma = 0;
+
+  outcolor . r += leifx_linegamma;
+  outcolor . b += leifx_linegamma;
+ }
+
+   FragColor = vec4(outcolor, 1.0);
+}
--- a/3dfx/shaders/3dfx_pass_0.slang
+++ b/3dfx/shaders/3dfx_pass_0.slang
@ -0,0 +1,167 @@
+#version 450
+
+// "LeiFX" shader - "dither" and reduction process
+// 
+// 	Copyright (C) 2013-2014 leilei
+// 
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 2 of the License, or (at your option)
+// any later version.
+
+// This table came from the wikipedia article about Ordered Dithering. NOT MAME.  Just to clarify.
+float erroredtable[16] = {
+	16,4,13,1,   
+	8,12,5,9,
+	14,2,15,3,
+	6,10,7,11		
+};
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+	float LEIFX_LINES;
+} params;
+
+#pragma parameter LEIFX_LINES "LeiFX Line Intensity" 0.05 0.00 1.00 0.01
+
+#define LEIFX_LINES params.LEIFX_LINES
+
+#define saturate(c) clamp(c, 0.0, 1.0)
+#define lerp(c) mix(c)
+#define mul(a,b) (b*a)
+#define fmod(c) mod(c)
+#define frac(c) fract(c)
+#define tex2D(c,d) texture(c,d)
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+#define int2 ivec2
+#define int3 ivec3
+#define int4 ivec4
+#define bool2 bvec2
+#define bool3 bvec3
+#define bool4 bvec4
+#define float2x2 mat2x2
+#define float3x3 mat3x3
+#define float4x4 mat4x4
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+#define DITHERAMOUNT		0.5f // was 0.33f
+#define DITHERBIAS		-1  // 0 to 16, biases the value of the dither up.  - was 8
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global.MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stage fragment
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+
+void main()
+{
+	float2 res;
+	float3 outcolor = tex2D(Source, vTexCoord).rgb;
+	res.x = params.SourceSize.x;
+	res.y = params.SourceSize.y;
+	
+	float2 ditheu = vTexCoord.xy * res.xy;
+
+	ditheu.x = vTexCoord.x * res.x;
+	ditheu.y = vTexCoord.y * res.y;
+
+	// Dither. Total rewrite.
+	// NOW, WHAT PIXEL AM I!??
+
+	int ditdex = 	int(mod(ditheu.x, 4.0)) * 4 + int(mod(ditheu.y, 4.0)); // 4x4!
+	vec3 color;
+	vec3 colord;
+	color.r = outcolor.r * 255;
+	color.g = outcolor.g * 255;
+	color.b = outcolor.b * 255;
+	float yeh = 0.0;
+	float ohyes = 0.0;
+	
+//	for (yeh=ditdex; yeh<(ditdex+16); yeh++) ohyes = pow(erroredtable[yeh-15], 0.72f);
+	if (yeh++==ditdex) ohyes = erroredtable[0];
+	else if (yeh++==ditdex) ohyes = erroredtable[1];
+	else if (yeh++==ditdex) ohyes = erroredtable[2];
+	else if (yeh++==ditdex) ohyes = erroredtable[3];
+	else if (yeh++==ditdex) ohyes = erroredtable[4];
+	else if (yeh++==ditdex) ohyes = erroredtable[5];
+	else if (yeh++==ditdex) ohyes = erroredtable[6];
+	else if (yeh++==ditdex) ohyes = erroredtable[7];
+	else if (yeh++==ditdex) ohyes = erroredtable[8];
+	else if (yeh++==ditdex) ohyes = erroredtable[9];
+	else if (yeh++==ditdex) ohyes = erroredtable[10];
+	else if (yeh++==ditdex) ohyes = erroredtable[11];
+	else if (yeh++==ditdex) ohyes = erroredtable[12];
+	else if (yeh++==ditdex) ohyes = erroredtable[13];
+	else if (yeh++==ditdex) ohyes = erroredtable[14];
+	else if (yeh++==ditdex) ohyes = erroredtable[15];
+	
+	// Adjust the dither thing
+	ohyes = 17 - (ohyes - 1); // invert
+	ohyes *= DITHERAMOUNT;
+	ohyes += DITHERBIAS;
+
+	colord.r = color.r + ohyes;
+	colord.g = color.g + (ohyes / 2);
+	colord.b = color.b + ohyes;
+	outcolor.rgb = colord.rgb * 0.003921568627451; // divide by 255, i don't trust em
+	
+	//
+	// Reduce to 16-bit color
+	//
+
+	float3 why = float3(1.0);
+	float3 reduceme = float3(1.0);
+	float radooct = 32;	// 32 is usually the proper value
+
+	reduceme.r = pow(outcolor.r, why.r);  
+	reduceme.r *= radooct;	
+	reduceme.r = int(floor(reduceme.r));	
+	reduceme.r /= radooct; 
+	reduceme.r = pow(reduceme.r, why.r);
+
+	reduceme.g = pow(outcolor.g, why.g);  
+	reduceme.g *= radooct * 2;	
+	reduceme.g = int(floor(reduceme.g));	
+	reduceme.g /= radooct * 2; 
+	reduceme.g = pow(reduceme.g, why.g);
+
+	reduceme.b = pow(outcolor.b, why.b);  
+	reduceme.b *= radooct;	
+	reduceme.b = int(floor(reduceme.b));	
+	reduceme.b /= radooct; 
+	reduceme.b = pow(reduceme.b, why.b);
+
+	outcolor.rgb = reduceme.rgb;
+
+	// Add the purple line of lineness here, so the filter process catches it and gets gammaed.
+	{
+		float leifx_linegamma = (LEIFX_LINES / 10);
+		float horzline1 = 	(mod(ditheu.y, 	2.0));
+		if (horzline1 < 1)	leifx_linegamma = 0;
+	
+		outcolor.r += leifx_linegamma;
+		outcolor.b += leifx_linegamma;	
+	}
+	
+   FragColor = vec4(outcolor, 1.0);
+}
--- a/3dfx/shaders/3dfx_pass_0.vert
+++ b/3dfx/shaders/3dfx_pass_0.vert
@ -0,0 +1,68 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+
+
+
+float erroredtable[16] = float[16](float(16), float(4), float(13), float(1), float(8), float(12), float(5), float(9), float(14), float(2), float(15), float(3), float(6), float(10), float(7), float(11
+));
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   float LEIFX_LINES;
+}params;
+
+#pragma parameterLEIFX_LINES¡0.050.001.000.01
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+
+
+
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord;
+}
+
--- a/3dfx/shaders/3dfx_pass_1.frag
+++ b/3dfx/shaders/3dfx_pass_1.frag
@ -0,0 +1,100 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+}params;
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+uniform sampler2D Source;
+
+void main()
+{
+      vec3 outcolor = texture(Source, vTexCoord). rgb;
+
+      vec2 pixel;
+
+ pixel . x = params . SourceSize . z;
+ pixel . y = params . SourceSize . w;
+
+      vec3 pixel1 = texture(Source, vTexCoord + vec2((pixel . x), 0)). rgb;
+      vec3 pixel2 = texture(Source, vTexCoord + vec2(- pixel . x, 0)). rgb;
+      vec3 pixelblend;
+
+
+ {
+       vec3 pixeldiff;
+       vec3 pixelmake;
+       vec3 pixeldiffleft;
+
+  pixelmake . rgb = vec3(0.0);
+  pixeldiff . rgb = pixel2 . rgb - outcolor . rgb;
+
+  pixeldiffleft . rgb = pixel1 . rgb - outcolor . rgb;
+
+  if(pixeldiff . r > 0.04)pixeldiff . r = 0.04;
+  if(pixeldiff . g >(0.04 / 2))pixeldiff . g =(0.04 / 2);
+  if(pixeldiff . b > 0.04)pixeldiff . b = 0.04;
+
+  if(pixeldiff . r < - 0.04)pixeldiff . r = - 0.04;
+  if(pixeldiff . g < -(0.04 / 2))pixeldiff . g = -(0.04 / 2);
+  if(pixeldiff . b < - 0.04)pixeldiff . b = - 0.04;
+
+  if(pixeldiffleft . r > 0.04)pixeldiffleft . r = 0.04;
+  if(pixeldiffleft . g >(0.04 / 2))pixeldiffleft . g =(0.04 / 2);
+  if(pixeldiffleft . b > 0.04)pixeldiffleft . b = 0.04;
+
+  if(pixeldiffleft . r < - 0.04)pixeldiffleft . r = - 0.04;
+  if(pixeldiffleft . g < -(0.04 / 2))pixeldiffleft . g = -(0.04 / 2);
+  if(pixeldiffleft . b < - 0.04)pixeldiffleft . b = - 0.04;
+
+  pixelmake . rgb =(pixeldiff . rgb / 4)+(pixeldiffleft . rgb / 16);
+  outcolor . rgb =(outcolor . rgb + pixelmake . rgb);
+ }
+
+   FragColor = vec4(outcolor, 1.0);
+}
--- a/3dfx/shaders/3dfx_pass_1.gsh
+++ b/3dfx/shaders/3dfx_pass_1.gsh
--- a/3dfx/shaders/3dfx_pass_1.ppslang
+++ b/3dfx/shaders/3dfx_pass_1.ppslang
@ -0,0 +1,109 @@
+#version 450
+
+
+
+
+
+
+
+
+
+
+layout(push_constant)uniform Push
+{
+ vec4 SourceSize;
+ vec4 OriginalSize;
+ vec4 OutputSize;
+ uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0)uniform UBO
+{
+ mat4 MVP;
+} global;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#pragma stagevertex
+layout(location = 0)in vec4 Position;
+layout(location = 1)in vec2 TexCoord;
+layout(location = 0)out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stagefragment
+layout(location = 0)in vec2 vTexCoord;
+layout(location = 0)out vec4 FragColor;
+layout(set = 0, binding = 2)uniform sampler2D Source;
+
+void main()
+{
+      vec3 outcolor = texture(Source, vTexCoord). rgb;
+
+      vec2 pixel;
+
+ pixel . x = params . SourceSize . z;
+ pixel . y = params . SourceSize . w;
+
+      vec3 pixel1 = texture(Source, vTexCoord + vec2((pixel . x), 0)). rgb;
+      vec3 pixel2 = texture(Source, vTexCoord + vec2(- pixel . x, 0)). rgb;
+      vec3 pixelblend;
+
+
+ {
+       vec3 pixeldiff;
+       vec3 pixelmake;
+       vec3 pixeldiffleft;
+
+  pixelmake . rgb = vec3(0.0);
+  pixeldiff . rgb = pixel2 . rgb - outcolor . rgb;
+
+  pixeldiffleft . rgb = pixel1 . rgb - outcolor . rgb;
+
+  if(pixeldiff . r > 0.04)pixeldiff . r = 0.04;
+  if(pixeldiff . g >(0.04 / 2))pixeldiff . g =(0.04 / 2);
+  if(pixeldiff . b > 0.04)pixeldiff . b = 0.04;
+
+  if(pixeldiff . r < - 0.04)pixeldiff . r = - 0.04;
+  if(pixeldiff . g < -(0.04 / 2))pixeldiff . g = -(0.04 / 2);
+  if(pixeldiff . b < - 0.04)pixeldiff . b = - 0.04;
+
+  if(pixeldiffleft . r > 0.04)pixeldiffleft . r = 0.04;
+  if(pixeldiffleft . g >(0.04 / 2))pixeldiffleft . g =(0.04 / 2);
+  if(pixeldiffleft . b > 0.04)pixeldiffleft . b = 0.04;
+
+  if(pixeldiffleft . r < - 0.04)pixeldiffleft . r = - 0.04;
+  if(pixeldiffleft . g < -(0.04 / 2))pixeldiffleft . g = -(0.04 / 2);
+  if(pixeldiffleft . b < - 0.04)pixeldiffleft . b = - 0.04;
+
+  pixelmake . rgb =(pixeldiff . rgb / 4)+(pixeldiffleft . rgb / 16);
+  outcolor . rgb =(outcolor . rgb + pixelmake . rgb);
+ }
+
+   FragColor = vec4(outcolor, 1.0);
+}
--- a/3dfx/shaders/3dfx_pass_1.slang
+++ b/3dfx/shaders/3dfx_pass_1.slang
@ -0,0 +1,109 @@
+#version 450
+
+// "LeiFX" shader - Pixel filtering process
+// 
+// 	Copyright (C) 2013-2014 leilei
+// 
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 2 of the License, or (at your option)
+// any later version.
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+#define saturate(c) clamp(c, 0.0, 1.0)
+#define lerp(c) mix(c)
+#define mul(a,b) (b*a)
+#define fmod(c) mod(c)
+#define frac(c) fract(c)
+#define tex2D(c,d) texture(c,d)
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+#define int2 ivec2
+#define int3 ivec3
+#define int4 ivec4
+#define bool2 bvec2
+#define bool3 bvec3
+#define bool4 bvec4
+#define float2x2 mat2x2
+#define float3x3 mat3x3
+#define float4x4 mat4x4
+
+#define		FILTCAP		0.04	// filtered pixel should not exceed this 
+#define		FILTCAPG	(FILTCAP / 2)
+#define		LEIFX_PIXELWIDTH	0.50f
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global.MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stage fragment
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+
+void main()
+{
+	float3 outcolor = tex2D(Source, vTexCoord).rgb;
+	
+	float2 pixel;
+
+	pixel.x = params.SourceSize.z;
+	pixel.y = params.SourceSize.w;
+
+	float3 pixel1 = tex2D(Source, vTexCoord + float2((pixel.x), 0)).rgb;
+	float3 pixel2 = tex2D(Source, vTexCoord + float2(-pixel.x, 0)).rgb;
+	float3 pixelblend;
+	
+// New filter
+	{
+		float3 pixeldiff;
+		float3 pixelmake;		
+		float3 pixeldiffleft;
+
+		pixelmake.rgb = float3(0.0);
+		pixeldiff.rgb = pixel2.rgb- outcolor.rgb;
+
+		pixeldiffleft.rgb = pixel1.rgb - outcolor.rgb;
+
+		if (pixeldiff.r > FILTCAP) 		pixeldiff.r = FILTCAP;
+		if (pixeldiff.g > FILTCAPG) 		pixeldiff.g = FILTCAPG;
+		if (pixeldiff.b > FILTCAP) 		pixeldiff.b = FILTCAP;
+
+		if (pixeldiff.r < -FILTCAP) 		pixeldiff.r = -FILTCAP;
+		if (pixeldiff.g < -FILTCAPG) 		pixeldiff.g = -FILTCAPG;
+		if (pixeldiff.b < -FILTCAP) 		pixeldiff.b = -FILTCAP;
+
+		if (pixeldiffleft.r > FILTCAP) 		pixeldiffleft.r = FILTCAP;
+		if (pixeldiffleft.g > FILTCAPG) 	pixeldiffleft.g = FILTCAPG;
+		if (pixeldiffleft.b > FILTCAP) 		pixeldiffleft.b = FILTCAP;
+
+		if (pixeldiffleft.r < -FILTCAP) 	pixeldiffleft.r = -FILTCAP;
+		if (pixeldiffleft.g < -FILTCAPG) 	pixeldiffleft.g = -FILTCAPG;
+		if (pixeldiffleft.b < -FILTCAP) 	pixeldiffleft.b = -FILTCAP;
+
+		pixelmake.rgb = (pixeldiff.rgb / 4) + (pixeldiffleft.rgb / 16);
+		outcolor.rgb = (outcolor.rgb + pixelmake.rgb);
+	}	
+	
+   FragColor = vec4(outcolor, 1.0);
+}
--- a/3dfx/shaders/3dfx_pass_1.vert
+++ b/3dfx/shaders/3dfx_pass_1.vert
@ -0,0 +1,60 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+}params;
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord;
+}
+
--- a/3dfx/shaders/3dfx_pass_2.frag
+++ b/3dfx/shaders/3dfx_pass_2.frag
@ -0,0 +1,81 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+
+
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   float GAMMA_LEVEL;
+}params;
+
+#pragma parameterGAMMA_LEVEL¡1.30.002.000.01
+
+
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+float mod2(float x, float y)
+{
+ return x - y * floor(x / y);
+}
+
+
+
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+uniform sampler2D Source;
+
+void main()
+{
+      vec3 outcolor = texture(Source, vTexCoord). rgb;
+
+      vec2 res;
+ res . x = params . SourceSize . x;
+ res . y = params . SourceSize . y;
+
+
+
+
+
+ outcolor . r = pow(outcolor . r, 1.0 / params . GAMMA_LEVEL);
+ outcolor . g = pow(outcolor . g, 1.0 / params . GAMMA_LEVEL);
+ outcolor . b = pow(outcolor . b, 1.0 / params . GAMMA_LEVEL);
+
+   FragColor = vec4(outcolor, 1.0);
+}
--- a/3dfx/shaders/3dfx_pass_2.gsh
+++ b/3dfx/shaders/3dfx_pass_2.gsh
--- a/3dfx/shaders/3dfx_pass_2.ppslang
+++ b/3dfx/shaders/3dfx_pass_2.ppslang
@ -0,0 +1,90 @@
+#version 450
+
+
+
+
+
+
+
+
+
+
+
+layout(push_constant)uniform Push
+{
+ vec4 SourceSize;
+ vec4 OriginalSize;
+ vec4 OutputSize;
+ uint FrameCount;
+ float GAMMA_LEVEL;
+} params;
+
+#pragma parameterGAMMA_LEVEL¡1.30.002.000.01
+
+
+
+layout(std140, set = 0, binding = 0)uniform UBO
+{
+ mat4 MVP;
+} global;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+float mod2(float x, float y)
+{
+ return x - y * floor(x / y);
+}
+
+
+
+#pragma stagevertex
+layout(location = 0)in vec4 Position;
+layout(location = 1)in vec2 TexCoord;
+layout(location = 0)out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stagefragment
+layout(location = 0)in vec2 vTexCoord;
+layout(location = 0)out vec4 FragColor;
+layout(set = 0, binding = 2)uniform sampler2D Source;
+
+void main()
+{
+      vec3 outcolor = texture(Source, vTexCoord). rgb;
+
+      vec2 res;
+ res . x = params . SourceSize . x;
+ res . y = params . SourceSize . y;
+
+
+
+
+
+ outcolor . r = pow(outcolor . r, 1.0 / params . GAMMA_LEVEL);
+ outcolor . g = pow(outcolor . g, 1.0 / params . GAMMA_LEVEL);
+ outcolor . b = pow(outcolor . b, 1.0 / params . GAMMA_LEVEL);
+
+   FragColor = vec4(outcolor, 1.0);
+}
--- a/3dfx/shaders/3dfx_pass_2.slang
+++ b/3dfx/shaders/3dfx_pass_2.slang
@ -0,0 +1,90 @@
+#version 450
+
+
+// "LeiFX" shader - Gamma process
+// 
+// 	Copyright (C) 2013-2014 leilei
+// 
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 2 of the License, or (at your option)
+// any later version.
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+	float GAMMA_LEVEL;
+} params;
+
+#pragma parameter GAMMA_LEVEL "LeiFX Gamma Correction" 1.3 0.00 2.00 0.01
+
+#define GAMMA_LEVEL params.GAMMA_LEVEL
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+#define saturate(c) clamp(c, 0.0, 1.0)
+#define lerp(c) mix(c)
+#define mul(a,b) (b*a)
+#define fmod(c) mod(c)
+#define frac(c) fract(c)
+#define tex2D(c,d) texture(c,d)
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+#define int2 ivec2
+#define int3 ivec3
+#define int4 ivec4
+#define bool2 bvec2
+#define bool3 bvec3
+#define bool4 bvec4
+#define float2x2 mat2x2
+#define float3x3 mat3x3
+#define float4x4 mat4x4
+
+float mod2(float x, float y)
+{
+	return x - y * floor (x/y);
+}
+
+//float	GAMMA_LEVEL = 1.0;
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global.MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stage fragment
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+
+void main()
+{
+	float3 outcolor = tex2D(Source, vTexCoord).rgb;
+	
+	float2 res;
+	res.x = params.SourceSize.x;
+	res.y = params.SourceSize.y;
+
+	// Gamma scanlines
+	// the Voodoo drivers usually supply a 1.3 gamma setting whether people liked it or not
+	// but it was enough to brainwash the competition for looking 'too dark'
+
+	outcolor.r = pow(outcolor.r, 1.0 / GAMMA_LEVEL);
+	outcolor.g = pow(outcolor.g, 1.0 / GAMMA_LEVEL);
+	outcolor.b = pow(outcolor.b, 1.0 / GAMMA_LEVEL);
+	
+   FragColor = vec4(outcolor, 1.0);
+}
--- a/3dfx/shaders/3dfx_pass_2.vert
+++ b/3dfx/shaders/3dfx_pass_2.vert
@ -0,0 +1,69 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+
+
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   float GAMMA_LEVEL;
+}params;
+
+#pragma parameterGAMMA_LEVEL¡1.30.002.000.01
+
+
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+float mod2(float x, float y)
+{
+ return x - y * floor(x / y);
+}
+
+
+
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord;
+}
+
--- a/3dfx/shaders/old/3dfx_4x1.slangp
+++ b/3dfx/shaders/old/3dfx_4x1.slangp
@ -0,0 +1,18 @@
+shaders = 6
+shader0 = 3dfx_pass_0.slang
+shader1 = 3dfx_pass_1.slang
+shader2 = 3dfx_pass_2.slang
+shader3 = 3dfx_pass_2.slang
+shader4 = 3dfx_pass_2.slang
+shader5 = 3dfx_pass_2.slang
+
+filter_linear0 = true
+filter_linear1 = true
+filter_linear2 = true
+filter_linear3 = true
+filter_linear4 = true 
+
+scale_type_x0 = "source"
+scale_x0 = "1.000000"
+scale_type_y0 = "source"
+scale_y0 = "1.000000"
--- a/3dfx/shaders/old/3dfx_pass_0.frag
+++ b/3dfx/shaders/old/3dfx_pass_0.frag
@ -0,0 +1,134 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+
+
+
+float erroredtable[16] = float[16](float(16), float(4), float(13), float(1), float(8), float(12), float(5), float(9), float(14), float(2), float(15), float(3), float(6), float(10), float(7), float(11
+));
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+}params;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+
+
+
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+uniform sampler2D Source;
+
+void main()
+{
+      vec2 res;
+      vec3 outcolor = texture(Source, vTexCoord). rgb;
+ res . x = params . SourceSize . x;
+ res . y = params . SourceSize . y;
+
+      vec2 ditheu = vTexCoord . xy * res . xy;
+
+ ditheu . x = vTexCoord . x * res . x;
+ ditheu . y = vTexCoord . y * res . y;
+
+
+
+
+ int ditdex = int(mod(ditheu . x, 4.0))* 4 + int(mod(ditheu . y, 4.0));
+ vec3 color;
+ vec3 colord;
+ color . r = outcolor . r * 255;
+ color . g = outcolor . g * 255;
+ color . b = outcolor . b * 255;
+ float yeh = 0.0;
+ float ohyes = 0.0;
+
+
+ if(yeh ++ == ditdex)ohyes = erroredtable[0];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[1];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[2];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[3];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[4];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[5];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[6];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[7];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[8];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[9];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[10];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[11];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[12];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[13];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[14];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[15];
+
+ colord . r = color . r + ohyes;
+ colord . g = color . g +(ohyes / 2);
+ colord . b = color . b + ohyes;
+ outcolor . rgb = colord . rgb * 0.003921568627451;
+
+
+
+
+
+      vec3 why = vec3(1.0);
+      vec3 reduceme = vec3(1.0);
+ float radooct = 32;
+
+ reduceme . r = pow(outcolor . r, why . r);
+ reduceme . r *= radooct;
+ reduceme . r = int(floor(reduceme . r));
+ reduceme . r /= radooct;
+ reduceme . r = pow(reduceme . r, why . r);
+
+ reduceme . g = pow(outcolor . g, why . g);
+ reduceme . g *= radooct * 2;
+ reduceme . g = int(floor(reduceme . g));
+ reduceme . g /= radooct * 2;
+ reduceme . g = pow(reduceme . g, why . g);
+
+ reduceme . b = pow(outcolor . b, why . b);
+ reduceme . b *= radooct;
+ reduceme . b = int(floor(reduceme . b));
+ reduceme . b /= radooct;
+ reduceme . b = pow(reduceme . b, why . b);
+
+ outcolor . rgb = reduceme . rgb;
+
+   FragColor = vec4(outcolor, 1.0);
+}
--- a/3dfx/shaders/old/3dfx_pass_0.gsh
+++ b/3dfx/shaders/old/3dfx_pass_0.gsh
--- a/3dfx/shaders/old/3dfx_pass_0.ppslang
+++ b/3dfx/shaders/old/3dfx_pass_0.ppslang
@ -0,0 +1,147 @@
+#version 450
+
+
+
+
+
+
+
+
+
+
+
+float erroredtable[16]= {
+ 16, 4, 13, 1,
+ 8, 12, 5, 9,
+ 14, 2, 15, 3,
+ 6, 10, 7, 11
+};
+
+layout(push_constant)uniform Push
+{
+ vec4 SourceSize;
+ vec4 OriginalSize;
+ vec4 OutputSize;
+ uint FrameCount;
+} params;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+layout(std140, set = 0, binding = 0)uniform UBO
+{
+ mat4 MVP;
+} global;
+
+
+
+
+#pragma stagevertex
+layout(location = 0)in vec4 Position;
+layout(location = 1)in vec2 TexCoord;
+layout(location = 0)out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stagefragment
+layout(location = 0)in vec2 vTexCoord;
+layout(location = 0)out vec4 FragColor;
+layout(set = 0, binding = 2)uniform sampler2D Source;
+
+void main()
+{
+      vec2 res;
+      vec3 outcolor = texture(Source, vTexCoord). rgb;
+ res . x = params . SourceSize . x;
+ res . y = params . SourceSize . y;
+
+      vec2 ditheu = vTexCoord . xy * res . xy;
+
+ ditheu . x = vTexCoord . x * res . x;
+ ditheu . y = vTexCoord . y * res . y;
+
+
+
+
+ int ditdex = int(mod(ditheu . x, 4.0))* 4 + int(mod(ditheu . y, 4.0));
+ vec3 color;
+ vec3 colord;
+ color . r = outcolor . r * 255;
+ color . g = outcolor . g * 255;
+ color . b = outcolor . b * 255;
+ float yeh = 0.0;
+ float ohyes = 0.0;
+
+
+ if(yeh ++ == ditdex)ohyes = erroredtable[0];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[1];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[2];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[3];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[4];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[5];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[6];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[7];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[8];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[9];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[10];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[11];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[12];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[13];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[14];
+ else if(yeh ++ == ditdex)ohyes = erroredtable[15];
+
+ colord . r = color . r + ohyes;
+ colord . g = color . g +(ohyes / 2);
+ colord . b = color . b + ohyes;
+ outcolor . rgb = colord . rgb * 0.003921568627451;
+
+
+
+
+
+      vec3 why = vec3(1.0);
+      vec3 reduceme = vec3(1.0);
+ float radooct = 32;
+
+ reduceme . r = pow(outcolor . r, why . r);
+ reduceme . r *= radooct;
+ reduceme . r = int(floor(reduceme . r));
+ reduceme . r /= radooct;
+ reduceme . r = pow(reduceme . r, why . r);
+
+ reduceme . g = pow(outcolor . g, why . g);
+ reduceme . g *= radooct * 2;
+ reduceme . g = int(floor(reduceme . g));
+ reduceme . g /= radooct * 2;
+ reduceme . g = pow(reduceme . g, why . g);
+
+ reduceme . b = pow(outcolor . b, why . b);
+ reduceme . b *= radooct;
+ reduceme . b = int(floor(reduceme . b));
+ reduceme . b /= radooct;
+ reduceme . b = pow(reduceme . b, why . b);
+
+ outcolor . rgb = reduceme . rgb;
+
+   FragColor = vec4(outcolor, 1.0);
+}
--- a/3dfx/shaders/old/3dfx_pass_0.slang
+++ b/3dfx/shaders/old/3dfx_pass_0.slang
@ -0,0 +1,147 @@
+#version 450
+
+// "LeiFX" shader - "dither" and reduction process
+// 
+// 	Copyright (C) 2013-2014 leilei
+// 
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 2 of the License, or (at your option)
+// any later version.
+
+// This table came from the wikipedia article about Ordered Dithering. NOT MAME.  Just to clarify.
+float erroredtable[16] = {
+	16,4,13,1,   
+	8,12,5,9,
+	14,2,15,3,
+	6,10,7,11		
+};
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+} params;
+
+#define saturate(c) clamp(c, 0.0, 1.0)
+#define lerp(c) mix(c)
+#define mul(a,b) (b*a)
+#define fmod(c) mod(c)
+#define frac(c) fract(c)
+#define tex2D(c,d) texture(c,d)
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+#define int2 ivec2
+#define int3 ivec3
+#define int4 ivec4
+#define bool2 bvec2
+#define bool3 bvec3
+#define bool4 bvec4
+#define float2x2 mat2x2
+#define float3x3 mat3x3
+#define float4x4 mat4x4
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+#define DITHERAMOUNT		0.5f // was 0.33f
+#define DITHERBIAS		-1  // 0 to 16, biases the value of the dither up.  - was 8
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global.MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stage fragment
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+
+void main()
+{
+	float2 res;
+	float3 outcolor = tex2D(Source, vTexCoord).rgb;
+	res.x = params.SourceSize.x;
+	res.y = params.SourceSize.y;
+	
+	float2 ditheu = vTexCoord.xy * res.xy;
+
+	ditheu.x = vTexCoord.x * res.x;
+	ditheu.y = vTexCoord.y * res.y;
+
+	// Dither. Total rewrite.
+	// NOW, WHAT PIXEL AM I!??
+
+	int ditdex = 	int(mod(ditheu.x, 4.0)) * 4 + int(mod(ditheu.y, 4.0)); // 4x4!
+	vec3 color;
+	vec3 colord;
+	color.r = outcolor.r * 255;
+	color.g = outcolor.g * 255;
+	color.b = outcolor.b * 255;
+	float yeh = 0.0;
+	float ohyes = 0.0;
+	
+//	for (yeh=ditdex; yeh<(ditdex+16); yeh++) ohyes = pow(erroredtable[yeh-15], 0.72f);
+	if (yeh++==ditdex) ohyes = erroredtable[0];
+	else if (yeh++==ditdex) ohyes = erroredtable[1];
+	else if (yeh++==ditdex) ohyes = erroredtable[2];
+	else if (yeh++==ditdex) ohyes = erroredtable[3];
+	else if (yeh++==ditdex) ohyes = erroredtable[4];
+	else if (yeh++==ditdex) ohyes = erroredtable[5];
+	else if (yeh++==ditdex) ohyes = erroredtable[6];
+	else if (yeh++==ditdex) ohyes = erroredtable[7];
+	else if (yeh++==ditdex) ohyes = erroredtable[8];
+	else if (yeh++==ditdex) ohyes = erroredtable[9];
+	else if (yeh++==ditdex) ohyes = erroredtable[10];
+	else if (yeh++==ditdex) ohyes = erroredtable[11];
+	else if (yeh++==ditdex) ohyes = erroredtable[12];
+	else if (yeh++==ditdex) ohyes = erroredtable[13];
+	else if (yeh++==ditdex) ohyes = erroredtable[14];
+	else if (yeh++==ditdex) ohyes = erroredtable[15];
+
+	colord.r = color.r + ohyes;
+	colord.g = color.g + (ohyes / 2);
+	colord.b = color.b + ohyes;
+	outcolor.rgb = colord.rgb * 0.003921568627451; // divide by 255, i don't trust em
+	
+	//
+	// Reduce to 16-bit color
+	//
+
+	float3 why = float3(1.0);
+	float3 reduceme = float3(1.0);
+	float radooct = 32;	// 32 is usually the proper value
+
+	reduceme.r = pow(outcolor.r, why.r);  
+	reduceme.r *= radooct;	
+	reduceme.r = int(floor(reduceme.r));	
+	reduceme.r /= radooct; 
+	reduceme.r = pow(reduceme.r, why.r);
+
+	reduceme.g = pow(outcolor.g, why.g);  
+	reduceme.g *= radooct * 2;	
+	reduceme.g = int(floor(reduceme.g));	
+	reduceme.g /= radooct * 2; 
+	reduceme.g = pow(reduceme.g, why.g);
+
+	reduceme.b = pow(outcolor.b, why.b);  
+	reduceme.b *= radooct;	
+	reduceme.b = int(floor(reduceme.b));	
+	reduceme.b /= radooct; 
+	reduceme.b = pow(reduceme.b, why.b);
+
+	outcolor.rgb = reduceme.rgb;
+
+   FragColor = vec4(outcolor, 1.0);
+}
--- a/3dfx/shaders/old/3dfx_pass_0.vert
+++ b/3dfx/shaders/old/3dfx_pass_0.vert
@ -0,0 +1,63 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+
+
+
+float erroredtable[16] = float[16](float(16), float(4), float(13), float(1), float(8), float(12), float(5), float(9), float(14), float(2), float(15), float(3), float(6), float(10), float(7), float(11
+));
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+}params;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+
+
+
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord;
+}
+
--- a/3dfx/shaders/old/3dfx_pass_1.frag
+++ b/3dfx/shaders/old/3dfx_pass_1.frag
@ -0,0 +1,83 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+}params;
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+float mod2(float x, float y)
+{
+ return x - y * floor(x / y);
+}
+
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+uniform sampler2D Source;
+
+void main()
+{
+      vec3 outcolor = texture(Source, vTexCoord). rgb;
+
+      vec2 res;
+ res . x = params . SourceSize . x;
+ res . y = params . SourceSize . y;
+
+
+
+
+
+ float gammaed = 0.15;
+
+ float leifx_linegamma = gammaed;
+      vec2 dithet = vTexCoord . xy * res . xy;
+ dithet . y = vTexCoord . y * res . y;
+ float horzline1 =(mod2(dithet . y, 2.0));
+ if(horzline1 < 1)leifx_linegamma = 0;
+ float leifx_gamma = 1.3 - gammaed + leifx_linegamma;
+
+
+ outcolor . r = pow(outcolor . r, 1.0 / leifx_gamma);
+ outcolor . g = pow(outcolor . g, 1.0 / leifx_gamma);
+ outcolor . b = pow(outcolor . b, 1.0 / leifx_gamma);
+
+   FragColor = vec4(outcolor, 1.0);
+}
--- a/3dfx/shaders/old/3dfx_pass_1.gsh
+++ b/3dfx/shaders/old/3dfx_pass_1.gsh
--- a/3dfx/shaders/old/3dfx_pass_1.ppslang
+++ b/3dfx/shaders/old/3dfx_pass_1.ppslang
@ -0,0 +1,92 @@
+#version 450
+
+
+
+
+
+
+
+
+
+
+layout(push_constant)uniform Push
+{
+ vec4 SourceSize;
+ vec4 OriginalSize;
+ vec4 OutputSize;
+ uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0)uniform UBO
+{
+ mat4 MVP;
+} global;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+float mod2(float x, float y)
+{
+ return x - y * floor(x / y);
+}
+
+#pragma stagevertex
+layout(location = 0)in vec4 Position;
+layout(location = 1)in vec2 TexCoord;
+layout(location = 0)out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stagefragment
+layout(location = 0)in vec2 vTexCoord;
+layout(location = 0)out vec4 FragColor;
+layout(set = 0, binding = 2)uniform sampler2D Source;
+
+void main()
+{
+      vec3 outcolor = texture(Source, vTexCoord). rgb;
+
+      vec2 res;
+ res . x = params . SourceSize . x;
+ res . y = params . SourceSize . y;
+
+
+
+
+
+ float gammaed = 0.15;
+
+ float leifx_linegamma = gammaed;
+      vec2 dithet = vTexCoord . xy * res . xy;
+ dithet . y = vTexCoord . y * res . y;
+ float horzline1 =(mod2(dithet . y, 2.0));
+ if(horzline1 < 1)leifx_linegamma = 0;
+ float leifx_gamma = 1.3 - gammaed + leifx_linegamma;
+
+
+ outcolor . r = pow(outcolor . r, 1.0 / leifx_gamma);
+ outcolor . g = pow(outcolor . g, 1.0 / leifx_gamma);
+ outcolor . b = pow(outcolor . b, 1.0 / leifx_gamma);
+
+   FragColor = vec4(outcolor, 1.0);
+}
--- a/3dfx/shaders/old/3dfx_pass_1.slang
+++ b/3dfx/shaders/old/3dfx_pass_1.slang
@ -0,0 +1,92 @@
+#version 450
+
+// "LeiFX" shader - Gamma process
+// 
+// 	Copyright (C) 2013-2014 leilei
+// 
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 2 of the License, or (at your option)
+// any later version.
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+#define saturate(c) clamp(c, 0.0, 1.0)
+#define lerp(c) mix(c)
+#define mul(a,b) (b*a)
+#define fmod(c) mod(c)
+#define frac(c) fract(c)
+#define tex2D(c,d) texture(c,d)
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+#define int2 ivec2
+#define int3 ivec3
+#define int4 ivec4
+#define bool2 bvec2
+#define bool3 bvec3
+#define bool4 bvec4
+#define float2x2 mat2x2
+#define float3x3 mat3x3
+#define float4x4 mat4x4
+
+float mod2(float x, float y)
+{
+	return x - y * floor (x/y);
+}
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global.MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stage fragment
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+
+void main()
+{
+	float3 outcolor = tex2D(Source, vTexCoord).rgb;
+	
+	float2 res;
+	res.x = params.SourceSize.x;
+	res.y = params.SourceSize.y;
+
+	// Gamma scanlines
+	// the Voodoo drivers usually supply a 1.3 gamma setting whether people liked it or not
+	// but it was enough to brainwash the competition for looking 'too dark'
+
+	float gammaed = 0.15;
+
+	float leifx_linegamma = gammaed;
+	float2 dithet = vTexCoord.xy * res.xy;
+	dithet.y = vTexCoord.y * res.y;
+	float horzline1 = 	(mod2(dithet.y, 	2.0));
+	if (horzline1 < 1)	leifx_linegamma = 0;
+	float leifx_gamma = 1.3 - gammaed + leifx_linegamma;
+
+
+	outcolor.r = pow(outcolor.r, 1.0 / leifx_gamma);
+	outcolor.g = pow(outcolor.g, 1.0 / leifx_gamma);
+	outcolor.b = pow(outcolor.b, 1.0 / leifx_gamma);
+	
+   FragColor = vec4(outcolor, 1.0);
+}
--- a/3dfx/shaders/old/3dfx_pass_1.vert
+++ b/3dfx/shaders/old/3dfx_pass_1.vert
@ -0,0 +1,61 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+}params;
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+float mod2(float x, float y)
+{
+ return x - y * floor(x / y);
+}
+
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord;
+}
+
--- a/3dfx/shaders/old/3dfx_pass_2.frag
+++ b/3dfx/shaders/old/3dfx_pass_2.frag
@ -0,0 +1,105 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+
+
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   float LEIFX_BLURFACTOR;
+}params;
+
+#pragma parameterLEIFX_BLURFACTOR¡0.690.001.000.01
+
+
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+uniform sampler2D Source;
+
+void main()
+{
+      vec3 outcolor = texture(Source, vTexCoord). rgb;
+
+      vec2 pixel;
+
+ pixel . x = params . SourceSize . z;
+ pixel . y = params . SourceSize . w;
+
+
+
+ float blendy;
+ float blenda;
+
+ float blendfactor;
+
+      vec3 pixel1 = texture(Source, vTexCoord + vec2((pixel . x * 0.15), 0)). rgb;
+      vec3 pixel2 = texture(Source, vTexCoord + vec2(- pixel . x * 0.22, 0)). rgb;
+      vec3 pixel0 = texture(Source, vTexCoord + vec2(0, 0)). rgb;
+
+      vec3 pixelblend;
+
+
+ float gary1 = dot(pixel1 . rgb, vec3(1.0));
+ float gary2 = dot(pixel2 . rgb, vec3(1.0));
+
+ float mean = 1.0;
+ mean = gary1 - gary2;
+
+ if(mean < 0)mean *= - 1;
+ if(mean > 1)mean = 1;
+ mean = pow(mean, params . LEIFX_BLURFACTOR);
+
+ if(mean > 1)
+      mean = 1;
+
+ {
+
+  blendy = 1 - mean;
+  blenda = 1 - blendy;
+  pixel0 /= 3;
+  pixel1 /= 3;
+  pixel2 /= 3;
+     pixelblend . rgb = pixel0 + pixel1 + pixel2;
+  outcolor . rgb =(pixelblend . rgb * blendy)+(outcolor . rgb * blenda);
+ }
+
+   FragColor = vec4(outcolor, 1.0);
+}
--- a/3dfx/shaders/old/3dfx_pass_2.gsh
+++ b/3dfx/shaders/old/3dfx_pass_2.gsh
--- a/3dfx/shaders/old/3dfx_pass_2.ppslang
+++ b/3dfx/shaders/old/3dfx_pass_2.ppslang
@ -0,0 +1,114 @@
+#version 450
+
+
+
+
+
+
+
+
+
+
+
+layout(push_constant)uniform Push
+{
+ vec4 SourceSize;
+ vec4 OriginalSize;
+ vec4 OutputSize;
+ uint FrameCount;
+ float LEIFX_BLURFACTOR;
+} params;
+
+#pragma parameterLEIFX_BLURFACTOR¡0.690.001.000.01
+
+
+
+layout(std140, set = 0, binding = 0)uniform UBO
+{
+ mat4 MVP;
+} global;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#pragma stagevertex
+layout(location = 0)in vec4 Position;
+layout(location = 1)in vec2 TexCoord;
+layout(location = 0)out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stagefragment
+layout(location = 0)in vec2 vTexCoord;
+layout(location = 0)out vec4 FragColor;
+layout(set = 0, binding = 2)uniform sampler2D Source;
+
+void main()
+{
+      vec3 outcolor = texture(Source, vTexCoord). rgb;
+
+      vec2 pixel;
+
+ pixel . x = params . SourceSize . z;
+ pixel . y = params . SourceSize . w;
+
+
+
+ float blendy;
+ float blenda;
+
+ float blendfactor;
+
+      vec3 pixel1 = texture(Source, vTexCoord + vec2((pixel . x * 0.15), 0)). rgb;
+      vec3 pixel2 = texture(Source, vTexCoord + vec2(- pixel . x * 0.22, 0)). rgb;
+      vec3 pixel0 = texture(Source, vTexCoord + vec2(0, 0)). rgb;
+
+      vec3 pixelblend;
+
+
+ float gary1 = dot(pixel1 . rgb, vec3(1.0));
+ float gary2 = dot(pixel2 . rgb, vec3(1.0));
+
+ float mean = 1.0;
+ mean = gary1 - gary2;
+
+ if(mean < 0)mean *= - 1;
+ if(mean > 1)mean = 1;
+ mean = pow(mean, params . LEIFX_BLURFACTOR);
+
+ if(mean > 1)
+      mean = 1;
+
+ {
+
+  blendy = 1 - mean;
+  blenda = 1 - blendy;
+  pixel0 /= 3;
+  pixel1 /= 3;
+  pixel2 /= 3;
+     pixelblend . rgb = pixel0 + pixel1 + pixel2;
+  outcolor . rgb =(pixelblend . rgb * blendy)+(outcolor . rgb * blenda);
+ }
+
+   FragColor = vec4(outcolor, 1.0);
+}
--- a/3dfx/shaders/old/3dfx_pass_2.slang
+++ b/3dfx/shaders/old/3dfx_pass_2.slang
@ -0,0 +1,114 @@
+#version 450
+
+
+// "LeiFX" shader - Pixel filtering process
+// 
+// 	Copyright (C) 2013-2014 leilei
+// 
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 2 of the License, or (at your option)
+// any later version.
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+	float LEIFX_BLURFACTOR;
+} params;
+
+#pragma parameter LEIFX_BLURFACTOR "LeiFX Blur Factor" 0.69 0.00 1.00 0.01
+
+#define LEIFX_BLURFACTOR params.LEIFX_BLURFACTOR
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+#define saturate(c) clamp(c, 0.0, 1.0)
+#define lerp(c) mix(c)
+#define mul(a,b) (b*a)
+#define fmod(c) mod(c)
+#define frac(c) fract(c)
+#define tex2D(c,d) texture(c,d)
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+#define int2 ivec2
+#define int3 ivec3
+#define int4 ivec4
+#define bool2 bvec2
+#define bool3 bvec3
+#define bool4 bvec4
+#define float2x2 mat2x2
+#define float3x3 mat3x3
+#define float4x4 mat4x4
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global.MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stage fragment
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+
+void main()
+{
+	float3 outcolor = tex2D(Source, vTexCoord).rgb;
+	
+	float2 pixel;
+
+	pixel.x = params.SourceSize.z;
+	pixel.y = params.SourceSize.w;
+
+	// Sample things.
+
+	float blendy;	// to blend unblended with blend... trying to smooth the jag :(
+	float blenda;	
+
+	float blendfactor;
+
+	float3 pixel1 = tex2D(Source, vTexCoord + float2((pixel.x * 0.15), 0)).rgb;
+	float3 pixel2 = tex2D(Source, vTexCoord + float2(-pixel.x * 0.22, 0)).rgb;
+	float3 pixel0 = tex2D(Source, vTexCoord + float2(0, 0)).rgb;
+
+	float3 pixelblend;
+
+
+	float gary1 = dot(pixel1.rgb,float3(1.0));
+	float gary2 = dot(pixel2.rgb,float3(1.0));
+
+	float mean = 1.0;
+	mean = gary1 - gary2;
+
+	if (mean < 0)	mean *= -1;
+	if (mean > 1) mean = 1;	
+	mean = pow(mean, LEIFX_BLURFACTOR);	
+
+	if (mean > 1)
+      mean = 1;	
+
+	{
+		// variably BLEND IT ALL TO H*CK!!!!
+		blendy = 1 - mean;
+		blenda = 1 - blendy;
+		pixel0 /= 3;
+		pixel1 /= 3;
+		pixel2 /= 3;
+   		pixelblend.rgb = pixel0 + pixel1 + pixel2;
+		outcolor.rgb = (pixelblend.rgb * blendy) + (outcolor.rgb * blenda);
+	}
+	
+   FragColor = vec4(outcolor, 1.0);
+}
--- a/3dfx/shaders/old/3dfx_pass_2.vert
+++ b/3dfx/shaders/old/3dfx_pass_2.vert
@ -0,0 +1,62 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+
+
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   float LEIFX_BLURFACTOR;
+}params;
+
+#pragma parameterLEIFX_BLURFACTOR¡0.690.001.000.01
+
+
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord;
+}
+
--- a/14
+++ b/14
@ -0,0 +1,14 @@
+PREFIX := /usr
+INSTALLDIR := $(PREFIX)/share/libretro/shaders/shaders_slang
+
+all:
+	@echo "Nothing to make for slang-shaders."
+
+install:
+	mkdir -p $(DESTDIR)$(INSTALLDIR)
+	cp -ar -t $(DESTDIR)$(INSTALLDIR) *
+	rm -f $(DESTDIR)$(INSTALLDIR)/Makefile \
+		$(DESTDIR)$(INSTALLDIR)/configure
+
+test-install: all
+	DESTDIR=/tmp/build $(MAKE) install
--- a/README.md
+++ b/README.md
@ -0,0 +1,779 @@
+# Vulkan GLSL RetroArch shader system
+
+This document is a draft of RetroArch's new GPU shader system.
+It will outline the features in the new shader subsystem and describe details for how it will work in practice.
+
+In addition this document will contain various musings on why certain design choices are made and which compromised have been made to arrive at the conclusion. This is mostly for discussing and deliberation while the new system is under development.
+
+## Introduction
+
+### Target shader languages
+ - Vulkan
+ - GL 2.x (legacy desktop)
+ - GL 3.x+ (modern desktop)
+ - GLES2 (legacy mobile)
+ - GLES3 (modern mobile)
+ - (HLSL, potentially)
+ - (Metal, potentially)
+
+RetroArch is still expected to run on GLES2 and GL2 systems.
+GL2 is mostly not relevant any longer, but GLES2 is certainly a very relevant platform still and having GLES2 compatibility makes GL2 very easy.
+We therefore want to avoid speccing out a design which deliberately ruins GLES2 compatibility.
+
+However, we also do not want to artificially limit ourselves to shader features which are only available in GLES2.
+There are many shader builtins for example which only work in GLES3/GL3 and we should not hold back support in these cases.
+When we want to consider GLES2 compat we should not spec out high level features which do not make much sense in the context of GLES2.
+
+### Why a new spec?
+
+The current shader subsystem in RetroArch is quite mature with a large body of shaders written for it.
+While it has served us well, it is not forward-compatible.
+
+The current state of writing high-level shading languages that work "everywhere" is very challenging.
+There was no good ready-made solution for this.
+Up until now, we have relied on nVidia Cg to serve as a basic foundation for shaders, but Cg has been discontinued for years and is closed source.
+This is very problematic since Cg is not a forward compatible platform.
+It has many warts which are heavily tied in to legacy APIs and systems.
+For this reason, we cannot use Cg for newer APIs such as Vulkan and potentially D3D12 and Metal.
+
+Cg cross compilation to GLSL is barely working and it is horribly unmaintainable with several unfixable issues.
+The output is so horribly mangled and unoptimized that it is clearly not the approach we should be taking.
+We also cannot do the Cg transform in runtime on mobile due to lack of open source Cg runtime, so there's that as well.
+
+Another alternative is to write straight-up GLSL, but this too has some severe problems.
+All the different GL versions and GLSL variants are different enough that it becomes painful to write portable GLSL code that works without modification.
+Examples include:
+
+ - varying/attribute vs in/out (legacy vs modern)
+ - precision qualifiers (GLSL vs ESSL)
+ - texture2D vs texture (legacy vs modern)
+ - Lack of standard support for #include to reduce copy-pasta
+
+The problem really is that GLSL shaders are dependent on the runtime GL version, which makes it very annoying and hard to test all shader variants.
+
+We do not want to litter every shader with heaps of #ifdefs everywhere to combat this problem.
+We also want to avoid having to write pseudo-GLSL with some text based replacement behind the scenes.
+
+#### Vulkan GLSL as the portable solution
+
+Fortunately, there is now a forward looking and promising solution to our problems.
+Vulkan GLSL is a GLSL dialect designed for Vulkan and SPIR-V intermediate representation.
+The good part is that we can use whatever GLSL version we want when writing shaders, as it is decoupled from the GL runtime.
+
+In runtime, we can have a vendor-neutral mature compiler,
+[https://github.com/KhronosGroup/glslang](glslang) which compiles our Vulkan GLSL to SPIR-V.
+Using [https://github.com/KhronosGroup/SPIRV-Cross](SPIRV-Cross), we can then do reflection on the SPIR-V binary to deduce our filter chain layout.
+We can also disassemble back to our desired GLSL dialect in the GL backend based on which GL version we're running,
+which effectively means we can completely sidestep all our current problems with a pure GLSL based shading system.
+
+Another upside of this is that we no longer have to deal with vendor-specific quirks in the GLSL frontend.
+A common problem when people write for nVidia is that people mistakingly use float2/float3/float4 types from Cg/HLSL, which is supported
+as an extension in their GLSL frontend.
+
+##### Why not SPIR-V directly?
+
+This was considered, but there are several convenience problems with having a shading spec around pure SPIR-V.
+The first problem is metadata. In GLSL, we can quite easily extend with custom #pragmas or similar, but there is no trivial way to do this in SPIR-V
+outside writing custom tools to emit special metadata as debug information or similar with OpSource.
+
+We could also have this metadata outside in a separate file, but juggling more files means more churn, which we should try to avoid.
+The other problem is convenience. If RetroArch only accepts SPIR-V, we would need an explicit build step outside RetroArch first before we could
+test a shader. This gets very annoying during shader development,
+so it is clear that we need to support GLSL anyways, making SPIR-V support kinda redundant.
+
+The main argument for supporting SPIR-V would be to allow new shading languages to be used. This is a reasonable thing to consider, which is why
+the goal is to not design ourselves into a corner where it's only Vulkan GLSL that can possibly work down the line. We are open to the idea that
+new shading languages that target SPIR-V will emerge.
+
+### Warts in old shader system
+
+While the old shader system is functional it has some severe warts which have accumulated over time.
+In hindsight, some of the early design decisions were misguided and need to be properly fixed.
+
+#### Forced POT with padding
+
+This is arguably the largest wart of them all. The original reason behind this design decision was caused by a misguided effort to combat FP precision issues with texture sampling. The idea at the time was to avoid cases where nearest neighbor sampling at texel edges would cause artifacts. This is a typical case when textures are scaled with non-integer factors. However, the problem to begin with is naive nearest neighbor and non-integer scaling factors, and not FP precision. It was pure luck that POT tended to give better results with broken shaders, but we should not make this mistake again. POT padding has some severe issues which are not just cleanliness related either.
+
+Technically, GLES2 doesn't require non-POT support, but in practice, all GPUs support this.
+
+##### No proper UV wrapping
+Since the texture "ends" at UV coords < 1.0, we cannot properly
+use sampler wrapping modes. We can only fake `CLAMP_TO_BORDER` by padding with black color, but this filtering mode is not available by default in GLES2 and even GLES3!
+`CLAMP_TO_BORDER` isn't necessarily what we want either. `CLAMP_TO_EDGE` is usually a far more sane default.
+
+##### Extra arguments for actual width vs. texture width
+
+With normalized coordinates we need to think in both real resolution (e.g. 320x240) vs. POT padded resolutions (512x512) to deal with normalized UV coords. This complicates things massively and
+we were passing an insane amount of attributes and varyings to deal with this because the ratios between the two needn't be the same for two different textures.
+
+#### Arbitrary limits
+The way the old shader system deals with limits is quite naive.
+There is a hard limit of 8 when referencing other passes and older frames.
+There is no reason why we should have arbitrary limits like these.
+Part of the reason is C where dealing with dynamic memory is more painful than is should be so it was easier to take the lazy way out.
+
+#### Tacked on format handling
+
+In more complex shaders we need to consider more than just the plain `RGBA8_UNORM` format.
+The old shader system tacked on these things after the fact by adding booleans for SRGB and FP support, but this obviously doesn't scale.
+This point does get problematic since GLES2 has terrible support for render target formats, but we should allow complex shaders to use complex RT formats
+and rather just allow some shader presets to drop GLES2 compat.
+
+#### PASS vs PASSPREV
+
+Ugly. We do not need two ways to access previous passes, the actual solution is to have aliases for passes instead and access by name.
+
+#### Inconsistencies in parameter passing
+
+MVP matrices are passed in with weird conventions in the Cg spec, and its casing is weird.
+The source texture is passed with magic TEXUNIT0 semantic while other textures are passed via uniform struct members, etc.
+This is the result of tacking on feature support slowly over time without proper forethought.
+
+## High level Overview
+
+The RetroArch shader format outlines a filter chain/graph, a series of shader passes which operate on previously generated data to produce a final result.
+The goal is for every individual pass to access information from *all* previous shader passes, even across frames, easily.
+
+ - The filter chain specifies a number of shader passes to be executed one after the other.
+ - Each pass renders a full-screen quad to a texture of a certain resolution and format.
+ - The resolution can be dependent on external information.
+ - All filter chains begin at an input texture, which is created by a libretro core or similar.
+ - All filter chains terminate by rendering to the "backbuffer".
+
+The backbuffer is somewhat special since the resolution of it cannot be controlled by the shader.
+It can also not be fed back into the filter chain later
+because the frontend (here RetroArch) will render UI elements and such on top of the final pass output.
+
+Let's first look at what we mean by filter chains and how far we can expand this idea.
+
+### Simplest filter chain
+
+The simplest filter chain we can specify is a single pass.
+
+```
+(Input) -> [ Shader Pass #0 ] -> (Backbuffer)
+```
+
+In this case there are no offscreen render targets necessary since our input is rendered directly to screen.
+
+### Multiple passes
+
+A trivial extension is to keep our straight line view of the world where each pass looks at the previous output.
+
+```
+(Input) -> [ Shader Pass #0 ] -> (Framebuffer) -> [ Shader Pass #1 ] -> (Backbuffer)
+```
+
+Framebuffer here might have a different resolution than both Input and Backbuffer.
+A very common scenario for this is separable filters where we first scale horizontally, then vertically.
+
+### Multiple passes and multiple inputs
+
+There is no reason why we should restrict ourselves to a straight-line view.
+
+```
+     /------------------------------------------------\
+    /                                                  v
+(Input) -> [ Shader Pass #0 ] -> (Framebuffer #0) -> [ Shader Pass #1 ] -> (Backbuffer)
+```
+
+In this scenario, we have two inputs to shader pass #1, both the original, untouched input as well as the result of a pass in-between.
+All the inputs to a pass can have different resolutions.
+We have a way to query the resolution of individual textures to allow highly controlled sampling.
+
+We are now at a point where we can express an arbitrarily complex filter graph, but we can do better.
+For certain effects, time (or rather, results from earlier frames) can be an important factor.
+
+### Multiple passes, multiple inputs, with history
+
+We now extend our filter graph, where we also have access to information from earlier frames. Note that this is still a causal filter system.
+
+```
+Frame N:        (Input     N, Input N - 1, Input N - 2) -> [ Shader Pass #0 ] -> (Framebuffer     N, Framebuffer N - 1, Input N - 3) -> [ Shader Pass #1 ] -> (Backbuffer)
+Frame N - 1:    (Input N - 1, Input N - 2, Input N - 3) -> [ Shader Pass #0 ] -> (Framebuffer N - 1, Framebuffer N - 2, Input N - 4) -> [ Shader Pass #1 ] -> (Backbuffer)
+Frame N - 2:    (Input N - 2, Input N - 3, Input N - 4) -> [ Shader Pass #0 ] -> (Framebuffer N - 2, Framebuffer N - 3, Input N - 5) -> [ Shader Pass #1 ] -> (Backbuffer)
+```
+
+For framebuffers we can read the previous frame's framebuffer. We don't really need more than one frame of history since we have a feedback effect in place.
+Just like IIR filters, the "response" of such a feedback in the filter graph gives us essentially "infinite" history back in time,
+although it is mostly useful for long-lasting blurs and ghosting effects. Supporting more than one frame of feedback would also be extremely memory intensive since framebuffers tend to be
+much higher resolution than their input counterparts. One frame is also a nice "clean" limit. Once we go beyond just 1, the floodgate opens to arbitrary numbers, which we would want to avoid.
+It is also possible to fake as many feedback frames of history we want anyways,
+since we can copy a feedback frame to a separate pass anyways which effectively creates a "shift register" of feedback framebuffers in memory.
+
+Input textures can have arbitrary number of textures as history (just limited by memory).
+They cannot feedback since the filter chain cannot render into it, so it effectively is finite response (FIR).
+
+For the very first frames, frames with frame N < 0 are transparent black (all values 0).
+
+### No POT padding
+
+No texture in the filter chain is padded at any time. It is possible for resolutions in the filter chain to vary over time which is common with certain emulated systems.
+In this scenarios, the textures and framebuffers are simply resized appropriately.
+Older frames still keep their old resolution in the brief moment that the resolution is changing.
+
+It is very important that shaders do not blindly sample with nearest filter with any scale factor. If naive nearest neighbor sampling is to be used, shaders must make sure that
+the filter chain is configured with integer scaling factors so that ambiguous texel-edge sampling is avoided.
+
+### Deduce shader inputs by reflection
+
+We want to have as much useful information in the shader source as possible. We want to avoid having to explicitly write out metadata in shaders whereever we can.
+The biggest hurdle to overcome is how we describe our pipeline layout. The pipeline layout contains information about how we access resources such as uniforms and textures.
+There are three main types of inputs in this shader system.
+
+ - Texture samplers (sampler2D)
+ - Look-up textures for static input data
+ - Uniform data describing dimensions of textures
+ - Uniform ancillary data for render target dimensions, backbuffer target dimensions, frame count, etc
+ - Uniform user-defined parameters
+ - Uniform MVP for vertex shader
+
+#### Deduction by name
+
+There are two main approaches to deduce what a sampler2D uniform wants to sample from.
+The first way is to explicitly state somewhere else what that particular sampler needs, e.g.
+
+```
+uniform sampler2D geeWhatAmI;
+
+// Metadata somewhere else
+SAMPLER geeWhatAmI = Input[-2]; // Input frame from 2 frames ago
+```
+
+The other approach is to have built-in identifiers which correspond to certain textures.
+
+```
+// Source here being defined as the texture from previous framebuffer pass or the input texture if this is the first pass in the chain.
+uniform sampler2D Source;
+```
+
+In SPIR-V, we can use `OpName` to describe these names, so we do not require the original Vulkan GLSL source to perform this reflection.
+We use this approach throughout the specification. An identifier is mapped to an internal meaning (semantic). The shader backend looks at these semantics and constructs
+a filter chain based on all shaders in the chain.
+
+Identifiers can also have user defined meaning, either as an alias to existing identifiers or mapping to user defined parameters.
+
+### Combining vertex and fragment into a single shader file
+
+One strength of Cg is its ability to contain multiple shader stages in the same .cg file.
+This is very convenient since we always want to consider vertex and fragment together.
+This is especially needed when trying to mix and match shaders in a GUI window for example.
+We don't want to require users to load first a vertex shader, then fragment manually.
+
+GLSL however does not support this out of the box. This means we need to define a light-weight system for preprocessing
+one GLSL source file into multiple stages.
+
+#### Should we make vertex optional?
+
+In most cases, the vertex shader will remain the same.
+This leaves us with the option to provide a "default" vertex stage if the shader stage is not defined.
+
+### #include support
+
+With complex filter chains there is a lot of oppurtunity to reuse code.
+We therefore want light support for the #include directive.
+
+### User parameter support
+
+Since we already have a "preprocessor" of sorts, we can also trivially extend this idea with user parameters.
+In the shader source we can specify which uniform inputs are user controlled, GUI visible name, their effective range, etc.
+
+### Lookup textures
+
+A handy feature to have is reading from lookup textures.
+We can specify that some sampler inputs are loaded from a PNG file on disk as a plain RGBA8 texture.
+
+#### Do we want to support complex reinterpretation?
+
+There could be valid use cases for supporting other formats than plain `RGBA8_UNORM`.
+`SRGB` and `UINT` might be valid cases as well and maybe even 2x16-bit, 1x32-bit integer formats.
+
+#### Lookup buffers
+
+Do we want to support lookup buffers as UBOs as well?
+This wouldn't be doable in GLES2, but it could be useful as a more modern feature.
+If the LUT is small enough, we could realize it via plain old uniforms as well perhaps.
+
+This particular feature could be very interesting for generic polyphase lookup banks with different LUT files for different filters.
+
+## Vulkan GLSL specification
+
+This part of the spec considers how Vulkan GLSL shaders are written. The frontend uses the glslang frontend to compile GLSL sources.
+This ensures that we do not end up with vendor-specific extensions.
+The #version string should be as recent as possible, e.g. `#version 450` or `#version 310 es`.
+It is recommended to use 310 es since it allows mediump which can help on mobile.
+Note that after the Vulkan GLSL is turned into SPIR-V, the original #version string does not matter anymore.
+Also note that SPIR-V cannot be generated from legacy shader versions such as #version 100 (ES 2.0) or #version 120 (GL 2.1).
+
+The frontend will use reflection on the resulting SPIR-V file in order to deduce what each element in the UBO or what each texture means.
+The main types of data passed to shaders are read-only and can be classified as:
+
+ - `uniform sampler2D`: This is used for input textures, framebuffer results and lookup-textures.
+ - `uniform Block { };`: This is used for any constant data which is passed to the shader.
+ - `layout(push_constant) uniform Push {} name;`: This is used for any push constant data which is passed to the shader.
+
+### Resource usage rules
+
+Certain rules must be adhered to in order to make it easier for the frontend to dynamically set up bindings to resources.
+
+ - All resources must be using descriptor set #0, or don't use layout(set = #N) at all.
+ - layout(binding = #N) must be declared for all UBOs and sampler2Ds.
+ - All resources must use different bindings.
+ - There can be only one UBO.
+ - There can be only use push constant block.
+ - It is possible to have one regular UBO and one push constant UBO.
+ - If a UBO is used in both vertex and fragment, their binding number must match.
+ - If a UBO is used in both vertex and fragment, members with the same name must have the same offset/binary interface.
+   This problem is easily avoided by having the same UBO visible to both vertex and fragment as "common" code.
+ - If a push constant block is used in both vertex and fragment, members with the same name must have the same offset/binary interface.
+ - sampler2D cannot be used in vertex, although the size parameters of samplers can be used in vertex.
+ - Other resource types such as SSBOs, images, atomic counters, etc, etc, are not allowed.
+ - Every member of the UBOs and push constant blocks as well as every texture must be meaningful
+   to the frontend in some way, or an error is generated.
+
+### Initial preprocess of slang files
+
+The very first line of a `.slang` file must contain a `#version` statement.
+
+The first process which takes place is dealing with `#include` statements.
+A slang file is preprocessed by scanning through the slang and resolving all `#include` statements.
+The include process does not consider any preprocessor defines or conditional expressions.
+The include path must always be relative, and it will be relative to the file path of the current file.
+Nested includes are allowed, but includes in a cycle are undefined as preprocessor guards are not considered.
+
+E.g.:
+```
+#include "common.inc"
+```
+
+After includes have been resolved, the frontend scans through all lines of the shader and considers `#pragma` statements.
+These pragmas build up ancillary reflection information and otherwise meaningful metadata.
+
+#### `#pragma stage`
+This pragma controls which part of a `.slang` file are visible to certain shader stages.
+Currently, two variants of this pragma are supported:
+
+ - `#pragma stage vertex`
+ - `#pragma stage fragment`
+
+If no `#pragma stage` has been encountered yet, lines of code in a shader belong to all shader stages.
+If a `#pragma stage` statement has been encountered, that stage is considered active, and the following lines of shader code will only be used when building source for that particular shader stage. A new `#pragma stage` can override which stage is active.
+
+#### `#pragma name`
+This pragma lets a shader set its identifier. This identifier can be used to create simple aliases for other passes.
+
+E.g.:
+```
+#pragma name HorizontalPass
+```
+
+#### `#pragma format`
+This pragma controls the format of the framebuffer which this shader will render to.
+The default render target format is `R8G8B8A8_UNORM`.
+
+Supported render target formats are listed below. From a portability perspective,
+please be aware that GLES2 has abysmal render target format support,
+and GLES3/GL3 may have restricted floating point render target support.
+
+If rendering to uint/int formats, make sure your fragment shader output target is uint/int.
+
+#### 8-bit
+ - `R8_UNORM`
+ - `R8_UINT`
+ - `R8_SINT`
+ - `R8G8_UNORM`
+ - `R8G8_UINT`
+ - `R8G8_SINT`
+ - `R8G8B8A8_UNORM`
+ - `R8G8B8A8_UINT`
+ - `R8G8B8A8_SINT`
+ - `R8G8B8A8_SRGB`
+
+#### 10-bit
+ - `A2B10G10R10_UNORM_PACK32`
+ - `A2B10G10R10_UINT_PACK32`
+
+#### 16-bit
+ - `R16_UINT`
+ - `R16_SINT`
+ - `R16_SFLOAT`
+ - `R16G16_UINT`
+ - `R16G16_SINT`
+ - `R16G16_SFLOAT`
+ - `R16G16B16A16_UINT`
+ - `R16G16B16A16_SINT`
+ - `R16G16B16A16_SFLOAT`
+
+#### 32-bit
+ - `R32_UINT`
+ - `R32_SINT`
+ - `R32_SFLOAT`
+ - `R32G32_UINT`
+ - `R32G32_SINT`
+ - `R32G32_SFLOAT`
+ - `R32G32B32A32_UINT`
+ - `R32G32B32A32_SINT`
+ - `R32G32B32A32_SFLOAT`
+
+E.g.:
+```
+#pragma format R16_SFLOAT
+```
+#### `#pragma parameter`
+
+Shader parameters allow shaders to take user-defined inputs as uniform values.
+This makes shaders more configurable.
+
+The format is:
+```
+#pragma parameter IDENTIFIER "DESCRIPTION" INITIAL MINIMUM MAXIMUM [STEP]
+```
+The step parameter is optional.
+INITIAL, MINIMUM and MAXIMUM are floating point values.
+IDENTIFIER is the meaningful string which is the name of the uniform which will be used in a UBO or push constant block.
+DESCRIPTION is a string which is human readable representation of IDENTIFIER.
+
+E.g:
+```
+layout(push_constant) uniform Push {
+   float DummyVariable;
+} registers;
+#pragma parameter DummyVariable "This is a dummy variable" 1.0 0.2 2.0 0.1
+```
+
+### I/O interface variables
+
+The slang shader spec specifies two vertex inputs and one fragment output.
+Varyings between vertex and fragment shaders are user-defined.
+
+#### Vertex inputs
+Two attributes are provided and must be present in a shader.
+It is only the layout(location = #N) which is actually significant.
+The particular names of input and output variables are ignored, but should be consistent for readability.
+
+##### `layout(location = 0) in vec4 Position;`
+This attribute is a 2D position in the form `vec4(x, y, 0.0, 1.0);`.
+Shaders should not try to extract meaning from the x, y.
+`gl_Position` must be assigned as:
+
+```
+gl_Position = MVP * Position;
+```
+##### `layout(location = 1) in vec2 TexCoord;`
+The texture coordinate is semantically such that (0.0, 0.0) is top-left and (1.0, 1.0) is bottom right.
+If TexCoord is passed to a varying unmodified, the interpolated varying will be `uv = 0.5 / OutputSize` when rendering the upper left pixel as expected and `uv = 1.0 - 0.5 / OutputSize` when rendering the bottom-right pixel.
+
+#### Vertex/Fragment interface
+Vertex outputs and fragment inputs link by location, and not name.
+
+E.g.:
+```
+// Vertex
+layout(location = 0) out vec4 varying;
+// Fragment
+layout(location = 0) in vec4 some_other_name;
+```
+will still link fine, although using same names are encouraged for readability.
+
+#### Fragment outputs
+
+##### `layout(location = 0) out vec4 FragColor;`
+Fragment shaders must have a single output to location = 0.
+Multiple render targets are not allowed. The type of the output depends on the render target format.
+int/uint type must be used if UINT/INT render target formats are used, otherwise float type.
+
+### Builtin variables
+
+#### Builtin texture variables
+The input of textures get their meaning from their name.
+
+ - Original: This accesses the input of the filter chain, accessible from any pass.
+ - Source: This accesses the input from previous shader pass, or Original if accessed in the first pass of the filter chain.
+ - OriginalHistory#: This accesses the input # frames back in time.
+   There is no limit on #, except larger numbers will consume more VRAM.
+   OriginalHistory0 is an alias for Original, OriginalHistory1 is the previous frame and so on.
+ - PassOutput#: This accesses the output from pass # in this frame.
+   PassOutput# must be causal, it is an error to access PassOutputN in pass M if N >= M.
+   PassOutput# will typically be aliased to a more readable value.
+ - PassFeedback#: This accesses PassOutput# from the previous frame.
+   Any pass can read the feedback of any feedback, since it is causal.
+   PassFeedback# will typically be aliased to a more readable value.
+ - User#: This accesses look-up textures.
+   However, the direct use of User# is discouraged and should always be accessed via aliases.
+
+#### Builtin texture size uniform variables
+
+If a member of a UBO or a push constant block is called ???Size# where ???# is the name of a texture variable,
+that member must be a vec4, which will receive these values:
+ - X: Horizontal size of texture
+ - Y: Vertical size of texture
+ - Z: 1.0 / (Horizontal size of texture)
+ - W: 1.0 / (Vertical size of texture)
+
+It is valid to use a size variable without declaring the texture itself. This is useful for vertex shading.
+It is valid (although probably not useful) for a variable to be present in both a push constant block and a UBO block at the same time.
+
+#### Builtin uniform variables
+
+Other than uniforms related to textures, there are other special uniforms available.
+These builtin variables may be part of a UBO block and/or a push constant block.
+
+ - MVP: mat4 model view projection matrix.
+ - OutputSize: a vec4(x, y, 1.0 / x, 1.0 / y) variable describing the render target size (x, y) for this pass.
+ - FinalViewportSize: a vec4(x, y, 1.0 / x, 1.0 / y) variable describing the render target size for the final pass.
+   Accessible from any pass.
+ - FrameCount: a uint variable taking a value which increases by one every frame.
+   This value could be pre-wrapped by modulo if specified in preset.
+   This is useful for creating time-dependent effects.
+
+#### Aliases
+Aliases can give meaning to arbitrary names in a slang file.
+This is mostly relevant for LUT textures, shader parameters and accessing other passes by name.
+
+If a shader pass has a `#pragma name NAME` associated with it, meaning is given to the shader:
+ - NAME, is a sampler2D.
+ - NAMESize is a vec4 size uniform associated with NAME.
+ - NAMEFeedback is a sampler2D for the previous frame.
+ - NAMEFeedbackSize is a vec4 size uniform associated with NAMEFeedback.
+
+#### Example slang shader
+
+```
+#version 450
+// 450 or 310 es are recommended
+
+layout(set = 0, binding = 0, std140) uniform UBO
+{
+   mat4 MVP;
+   vec4 SourceSize; // Not used here, but doesn't hurt
+   float ColorMod;
+};
+
+#pragma name StockShader
+#pragma format R8G8B8A8_UNORM
+#pragma parameter ColorMod "Color intensity" 1.0 0.1 2.0 0.1
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+void main()
+{
+   gl_Position = MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stage fragment
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+layout(binding = 1) uniform sampler2D Source;
+void main()
+{
+   FragColor = texture(Source, vTexCoord) * ColorMod;
+}
+```
+
+### Push constants vs uniform blocks
+Push constants are fast-access uniform data which on some GPUs will improve performance over plain UBOs.
+It is encouraged to use push constant data as much as possible.
+
+```
+layout(push_constant) uniform Push
+{
+   vec4 SourceSize;
+   vec4 FinalViewportSize;
+} registers;
+```
+
+However, be aware that there is a limit to how large push constant blocks can be used.
+Vulkan puts a minimum required size of 128 bytes, which equals 8 vec4s.
+It is an error to use more than 128 bytes.
+If you're running out of space, you can move the MVP to a UBO instead, which frees up 64 bytes.
+Always prioritize push constants for data used in fragment shaders as there are many more fragment threads than vertex.
+Also note that like UBOs, the push constant space is shared across vertex and fragment.
+
+If you need more than 8 vec4s, you can spill uniforms over to plain UBOs,
+but more than 8 vec4s should be quite rare in practice.
+
+E.g.:
+
+```
+layout(binding = 0, std140) uniform UBO
+{
+   mat4 MVP; // Only used in vertex
+   vec4 SpilledUniform;
+} global;
+
+layout(push_constant) uniform Push
+{
+   vec4 SourceSize;
+   vec4 BlurPassSize;
+   // ...
+} registers;
+```
+
+### Samplers
+Which samplers are used for textures are specified by the preset format.
+The sampler remains constant throughout the frame, there is currently no way to select samplers on a frame-by-frame basic.
+This is mostly to make it possible to use the spec in GLES2 as GLES2 has no concept of separate samplers and images.
+
+### sRGB
+The input to the filter chain will not be of an sRGB format.
+This is due to many reasons, the main one being that it is very difficult for the frontend to get "free" passthrough of sRGB. It is possible to have a first pass which linearizes the input to a proper sRGB render target. In this way, custom gammas can be used as well.
+
+Similarly, the final pass will not be an sRGB backbuffer for similar reasons.
+
+### Caveats
+
+#### Frag Coord
+TexCoord also replaces `gl_FragCoord`. Do not use `gl_FragCoord` as it doesn't consider the viewports correctly.
+If you need `gl_FragCoord` use `vTexCoord * OutputSize.xy` instead.
+
+#### Derivatives
+Be careful with derivatives of vTexCoord. The screen might have been rotated by the vertex shader, which will also rotate the derivatives, especially in the final pass which hits the backbuffer.
+However, derivatives are fortunately never really needed, since w = 1 (we render flat 2D quads),
+which means derivatives of varyings are constant. You can do some trivial replacements which will be faster and more robust.
+
+```
+dFdx(vTexCoord) = vec2(OutputSize.z, 0.0);
+dFdy(vTexCoord) = vec2(0.0, OutputSize.w);
+fwidth(vTexCoord) = max(OutputSize.z, OutputSize.w);
+```
+To avoid issues with rotation or unexpected derivatives in case derivatives are really needed,
+off-screen passes will not have rotation and
+dFdx and dFdy will behave as expected.
+
+#### Correctly sampling textures
+A common mistake made by shaders is that they aren't careful enough about sampling textures correctly.
+There are three major cases to consider
+
+##### Bilinear sampling
+If bilinear is used, it is always safe to sample a texture.
+
+##### Nearest, with integer scale
+If the OutputSize / InputSize is integer,
+the interpolated vTexCoord will always fall inside the texel safely, so no special precautions have to be used.
+For very particular shaders which rely on nearest neighbor sampling, using integer scale to a framebuffer and upscaling that
+with more stable upscaling filters like bicubic for example is usually a great choice.
+
+##### Nearest, with non-integer scale
+Sometimes, it is necessary to upscale images to the backbuffer which have an arbitrary size.
+Bilinear is not always good enough here, so we must deal with a complicated case.
+
+If we interpolate vTexCoord over a frame with non-integer scale, it is possible that we end up just between two texels.
+Nearest neighbor will have to find a texel which is nearest, but there is no clear "nearest" texel. In this scenario, we end up having lots of failure cases which are typically observed as weird glitches in the image which change based on the resolution.
+
+To correctly sample nearest textures with non-integer scale, we must pre-quantize our texture coordinates.
+Here's a snippet which lets us safely sample a nearest filtered texture and emulate bilinear filtering.
+
+```
+   vec2 uv = vTexCoord * global.SourceSize.xy - 0.5; // Shift by 0.5 since the texel sampling points are in the texel center.
+   vec2 a = fract(uv);
+   vec2 tex = (floor(uv) + 0.5) * global.SourceSize.zw; // Build a sampling point which is in the center of the texel.
+
+   // Sample the bilinear footprint.
+   vec4 t0 = textureLodOffset(Source, tex, 0.0, ivec2(0, 0));
+   vec4 t1 = textureLodOffset(Source, tex, 0.0, ivec2(1, 0));
+   vec4 t2 = textureLodOffset(Source, tex, 0.0, ivec2(0, 1));
+   vec4 t3 = textureLodOffset(Source, tex, 0.0, ivec2(1, 1));
+
+   // Bilinear filter.
+   vec4 result = mix(mix(t0, t1, a.x), mix(t2, t3, a.x), a.y);
+```
+
+The concept of splitting up the integer texel along with the fractional texel helps us safely
+do arbitrary non-integer scaling safely.
+The uv variable could also be passed pre-computed from vertex to avoid the extra computation in fragment.
+
+### Preset format (.slangp)
+
+The present format is essentially unchanged from the old .cgp and .glslp, except the new preset format is called .slangp.
+
+## Porting guide from legacy Cg spec
+
+### Common functions
+ - mul(mat, vec) -> mat * vec
+ - lerp() -> mix()
+ - ddx() -> dFdx()
+ - ddy() -> dFdy()
+ - tex2D() -> texture()
+ - frac() -> fract()
+
+### Types
+
+ - floatN -> vecN
+ - boolN -> bvecN
+ - intN -> ivecN
+ - uintN -> uvecN
+ - float4x4 -> mat4
+
+### Builtin uniforms and misc
+
+ - modelViewProj -> MVP
+ - IN.video\_size -> SourceSize.xy
+ - IN.texture\_size -> SourceSize.xy (no POT shenanigans, so they are the same)
+ - IN.output\_size -> OutputSize.xy
+ - IN.frame\_count -> FrameCount (uint instead of float)
+ - \*.tex\_coord -> TexCoord (no POT shenanigans, so they are all the same)
+ - \*.lut\_tex\_coord -> TexCoord
+ - ORIG -> `Original`
+ - PASS# -> PassOutput#
+ - PASSPREV# -> No direct analog, PassOutput(CurrentPass - #), but prefer aliases
+
+### Cg semantics
+
+ - POSITION -> gl\_Position
+ - float2 texcoord : TEXCOORD0 -> layout(location = 1) in vec2 TexCoord;
+ - float4 varying : TEXCOORD# -> layout(location = #) out vec4 varying;
+ - uniform float4x4 modelViewProj -> uniform UBO { mat4 MVP; };
+
+Output structs should be flattened into separate varyings.
+
+E.g. instead of
+```
+struct VertexData
+{
+   float pos : POSITION;
+   float4 tex0 : TEXCOORD0;
+   float4 tex1 : TEXCOORD1;
+};
+
+void main_vertex(out VertexData vout)
+{
+   vout.pos = ...;
+   vout.tex0 = ...;
+   vout.tex1 = ...;
+}
+
+void main_fragment(in VertexData vout)
+{
+   ...
+}
+```
+
+do this
+
+```
+#pragma stage vertex
+layout(location = 0) out vec4 tex0;
+layout(location = 1) out vec4 tex1;
+void main()
+{
+   gl_Position = ...;
+   tex0 = ...;
+   tex1 = ...;
+}
+
+#pragma stage fragment
+layout(location = 0) in vec4 tex0;
+layout(location = 1) in vec4 tex1;
+void main()
+{
+}
+```
+
+Instead of returning a float4 from main\_fragment, have an output in fragment:
+
+```
+layout(location = 0) out vec4 FragColor;
+```
--- a/anti-aliasing/aa-shader-4.0-level2.slangp
+++ b/anti-aliasing/aa-shader-4.0-level2.slangp
@ -0,0 +1,15 @@
+shaders = 2
+
+shader0 = shaders/aa-shader-4.0-level2/aa-shader-4.0-level2-pass1.slang
+filter_linear0 = false
+scale_type0 = source
+scale0 = 2.0
+
+shader1 = shaders/aa-shader-4.0-level2/aa-shader-4.0-level2-pass2.slang
+filter_linear1 = false
+scale_type1 = source
+scale1 = 2.0
+
+shader2 = ../sharpen/shaders/adaptive-sharpen.slang
+filter_linear2 = false
+scale_type_2 = source
--- a/anti-aliasing/aa-shader-4.0.slangp
+++ b/anti-aliasing/aa-shader-4.0.slangp
@ -0,0 +1,10 @@
+shaders = 2
+
+shader0 = shaders/aa-shader-4.0.slang
+filter_linear0 = false
+scale_type0 = viewport
+scale0 = 1.0
+
+shader1 = ../sharpen/shaders/adaptive-sharpen.slang
+filter_linear1 = false
+scale_type1 = source
--- a/anti-aliasing/advanced-aa.slangp
+++ b/anti-aliasing/advanced-aa.slangp
@ -0,0 +1,11 @@
+shaders = 2
+
+shader0 = shaders/advanced-aa.slang
+filter_linear0 = false
+scale_type0 = source
+scale_x0 = 2.0
+scale_y0 = 2.0
+
+shader1 = ../stock.slang
+filter_linear1 = true
+scale_type_1 = source
--- a/anti-aliasing/fxaa.slangp
+++ b/anti-aliasing/fxaa.slangp
@ -0,0 +1,4 @@
+shaders = 1
+
+shader0 = shaders/fxaa.slang
+filter_linear0 = true
--- a/anti-aliasing/reverse-aa.slangp
+++ b/anti-aliasing/reverse-aa.slangp
@ -0,0 +1,7 @@
+shaders = 1
+
+shader0 = shaders/reverse-aa.slang
+filter_linear0 = false
+scale_type0 = source
+scale0 = 2.0
+
--- a/anti-aliasing/shaders/aa-shader-4.0-level2/aa-shader-4.0-level2-pass1.frag
+++ b/anti-aliasing/shaders/aa-shader-4.0-level2/aa-shader-4.0-level2-pass1.frag
@ -0,0 +1,90 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   float AAOFFSET;
+}params;
+
+#pragma parameterAAOFFSET¡1.00.252.00.05
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+uniform sampler2D Source;
+
+void main()
+{
+   vec2 tex = vTexCoord;
+   vec2 texsize = params . SourceSize . xy;
+   float dx = params . AAOFFSET / texsize . x;
+   float dy = params . AAOFFSET / texsize . y;
+   vec3 dt = vec3(1.0, 1.0, 1.0);
+
+   vec4 yx = vec4(dx, dy, - dx, - dy);
+   vec4 xh = yx * vec4(4.0, 1.5, 4.0, 1.5);
+   vec4 yv = yx * vec4(1.5, 4.0, 1.5, 4.0);
+
+   vec3 c11 = texture(Source, tex). xyz;
+   vec3 s00 = texture(Source, tex + yx . zw). xyz;
+   vec3 s20 = texture(Source, tex + yx . xw). xyz;
+   vec3 s22 = texture(Source, tex + yx . xy). xyz;
+   vec3 s02 = texture(Source, tex + yx . zy). xyz;
+   vec3 h00 = texture(Source, tex + xh . zw). xyz;
+   vec3 h20 = texture(Source, tex + xh . xw). xyz;
+   vec3 h22 = texture(Source, tex + xh . xy). xyz;
+   vec3 h02 = texture(Source, tex + xh . zy). xyz;
+   vec3 v00 = texture(Source, tex + yv . zw). xyz;
+   vec3 v20 = texture(Source, tex + yv . xw). xyz;
+   vec3 v22 = texture(Source, tex + yv . xy). xyz;
+   vec3 v02 = texture(Source, tex + yv . zy). xyz;
+
+   float m1 = 1.0 /(dot(abs(s00 - s22), dt)+ 0.00001);
+   float m2 = 1.0 /(dot(abs(s02 - s20), dt)+ 0.00001);
+   float h1 = 1.0 /(dot(abs(s00 - h22), dt)+ 0.00001);
+   float h2 = 1.0 /(dot(abs(s02 - h20), dt)+ 0.00001);
+   float h3 = 1.0 /(dot(abs(h00 - s22), dt)+ 0.00001);
+   float h4 = 1.0 /(dot(abs(h02 - s20), dt)+ 0.00001);
+   float v1 = 1.0 /(dot(abs(s00 - v22), dt)+ 0.00001);
+   float v2 = 1.0 /(dot(abs(s02 - v20), dt)+ 0.00001);
+   float v3 = 1.0 /(dot(abs(v00 - s22), dt)+ 0.00001);
+   float v4 = 1.0 /(dot(abs(v02 - s20), dt)+ 0.00001);
+
+   vec3 t1 = 0.5 *(m1 *(s00 + s22)+ m2 *(s02 + s20))/(m1 + m2);
+   vec3 t2 = 0.5 *(h1 *(s00 + h22)+ h2 *(s02 + h20)+ h3 *(h00 + s22)+ h4 *(h02 + s20))/(h1 + h2 + h3 + h4);
+   vec3 t3 = 0.5 *(v1 *(s00 + v22)+ v2 *(s02 + v20)+ v3 *(v00 + s22)+ v4 *(v02 + s20))/(v1 + v2 + v3 + v4);
+
+   float k1 = 1.0 /(dot(abs(t1 - c11), dt)+ 0.00001);
+   float k2 = 1.0 /(dot(abs(t2 - c11), dt)+ 0.00001);
+   float k3 = 1.0 /(dot(abs(t3 - c11), dt)+ 0.00001);
+
+   FragColor = vec4((k1 * t1 + k2 * t2 + k3 * t3)/(k1 + k2 + k3), 1.0);
+}
--- a/anti-aliasing/shaders/aa-shader-4.0-level2/aa-shader-4.0-level2-pass1.gsh
+++ b/anti-aliasing/shaders/aa-shader-4.0-level2/aa-shader-4.0-level2-pass1.gsh
--- a/anti-aliasing/shaders/aa-shader-4.0-level2/aa-shader-4.0-level2-pass1.ppslang
+++ b/anti-aliasing/shaders/aa-shader-4.0-level2/aa-shader-4.0-level2-pass1.ppslang
@ -0,0 +1,99 @@
+#version 450
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+layout(push_constant)uniform Push
+{
+ vec4 SourceSize;
+ vec4 OriginalSize;
+ vec4 OutputSize;
+ uint FrameCount;
+ float AAOFFSET;
+} params;
+
+#pragma parameterAAOFFSET¡1.00.252.00.05
+
+layout(std140, set = 0, binding = 0)uniform UBO
+{
+ mat4 MVP;
+} global;
+
+#pragma stagevertex
+layout(location = 0)in vec4 Position;
+layout(location = 1)in vec2 TexCoord;
+layout(location = 0)out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord * 1.000001;
+}
+
+#pragma stagefragment
+layout(location = 0)in vec2 vTexCoord;
+layout(location = 0)out vec4 FragColor;
+layout(set = 0, binding = 2)uniform sampler2D Source;
+
+void main()
+{
+   vec2 tex = vTexCoord;
+   vec2 texsize = params . SourceSize . xy;
+   float dx = params . AAOFFSET / texsize . x;
+   float dy = params . AAOFFSET / texsize . y;
+   vec3 dt = vec3(1.0, 1.0, 1.0);
+
+   vec4 yx = vec4(dx, dy, - dx, - dy);
+   vec4 xh = yx * vec4(4.0, 1.5, 4.0, 1.5);
+   vec4 yv = yx * vec4(1.5, 4.0, 1.5, 4.0);
+
+   vec3 c11 = texture(Source, tex). xyz;
+   vec3 s00 = texture(Source, tex + yx . zw). xyz;
+   vec3 s20 = texture(Source, tex + yx . xw). xyz;
+   vec3 s22 = texture(Source, tex + yx . xy). xyz;
+   vec3 s02 = texture(Source, tex + yx . zy). xyz;
+   vec3 h00 = texture(Source, tex + xh . zw). xyz;
+   vec3 h20 = texture(Source, tex + xh . xw). xyz;
+   vec3 h22 = texture(Source, tex + xh . xy). xyz;
+   vec3 h02 = texture(Source, tex + xh . zy). xyz;
+   vec3 v00 = texture(Source, tex + yv . zw). xyz;
+   vec3 v20 = texture(Source, tex + yv . xw). xyz;
+   vec3 v22 = texture(Source, tex + yv . xy). xyz;
+   vec3 v02 = texture(Source, tex + yv . zy). xyz;
+
+   float m1 = 1.0 /(dot(abs(s00 - s22), dt)+ 0.00001);
+   float m2 = 1.0 /(dot(abs(s02 - s20), dt)+ 0.00001);
+   float h1 = 1.0 /(dot(abs(s00 - h22), dt)+ 0.00001);
+   float h2 = 1.0 /(dot(abs(s02 - h20), dt)+ 0.00001);
+   float h3 = 1.0 /(dot(abs(h00 - s22), dt)+ 0.00001);
+   float h4 = 1.0 /(dot(abs(h02 - s20), dt)+ 0.00001);
+   float v1 = 1.0 /(dot(abs(s00 - v22), dt)+ 0.00001);
+   float v2 = 1.0 /(dot(abs(s02 - v20), dt)+ 0.00001);
+   float v3 = 1.0 /(dot(abs(v00 - s22), dt)+ 0.00001);
+   float v4 = 1.0 /(dot(abs(v02 - s20), dt)+ 0.00001);
+
+   vec3 t1 = 0.5 *(m1 *(s00 + s22)+ m2 *(s02 + s20))/(m1 + m2);
+   vec3 t2 = 0.5 *(h1 *(s00 + h22)+ h2 *(s02 + h20)+ h3 *(h00 + s22)+ h4 *(h02 + s20))/(h1 + h2 + h3 + h4);
+   vec3 t3 = 0.5 *(v1 *(s00 + v22)+ v2 *(s02 + v20)+ v3 *(v00 + s22)+ v4 *(v02 + s20))/(v1 + v2 + v3 + v4);
+
+   float k1 = 1.0 /(dot(abs(t1 - c11), dt)+ 0.00001);
+   float k2 = 1.0 /(dot(abs(t2 - c11), dt)+ 0.00001);
+   float k3 = 1.0 /(dot(abs(t3 - c11), dt)+ 0.00001);
+
+   FragColor = vec4((k1 * t1 + k2 * t2 + k3 * t3)/(k1 + k2 + k3), 1.0);
+}
--- a/anti-aliasing/shaders/aa-shader-4.0-level2/aa-shader-4.0-level2-pass1.slang
+++ b/anti-aliasing/shaders/aa-shader-4.0-level2/aa-shader-4.0-level2-pass1.slang
@ -0,0 +1,99 @@
+#version 450
+
+/*
+   Copyright (C) 2016 guest(r) - guest.r@gmail.com
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+*/
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+	float AAOFFSET;
+} params;
+
+#pragma parameter AAOFFSET "AA offset first pass" 1.0 0.25 2.0 0.05 
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global.MVP * Position;
+   vTexCoord = TexCoord * 1.000001;
+}
+
+#pragma stage fragment
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+
+void main()
+{
+   vec2 tex = vTexCoord;	
+   vec2 texsize = params.SourceSize.xy;
+   float dx = params.AAOFFSET/texsize.x;
+   float dy = params.AAOFFSET/texsize.y;
+   vec3 dt = vec3(1.0, 1.0, 1.0);
+ 
+   vec4 yx = vec4 ( dx, dy, -dx, -dy);
+   vec4 xh = yx*vec4(4.0,1.5,4.0,1.5);
+   vec4 yv = yx*vec4(1.5,4.0,1.5,4.0);
+
+   vec3 c11 = texture(Source, tex        ).xyz; 	
+   vec3 s00 = texture(Source, tex + yx.zw).xyz; 
+   vec3 s20 = texture(Source, tex + yx.xw).xyz; 
+   vec3 s22 = texture(Source, tex + yx.xy).xyz; 
+   vec3 s02 = texture(Source, tex + yx.zy).xyz;
+   vec3 h00 = texture(Source, tex + xh.zw).xyz; 
+   vec3 h20 = texture(Source, tex + xh.xw).xyz; 
+   vec3 h22 = texture(Source, tex + xh.xy).xyz; 
+   vec3 h02 = texture(Source, tex + xh.zy).xyz;
+   vec3 v00 = texture(Source, tex + yv.zw).xyz; 
+   vec3 v20 = texture(Source, tex + yv.xw).xyz; 
+   vec3 v22 = texture(Source, tex + yv.xy).xyz; 
+   vec3 v02 = texture(Source, tex + yv.zy).xyz;	 
+   
+   float m1=1.0/(dot(abs(s00-s22),dt)+0.00001);
+   float m2=1.0/(dot(abs(s02-s20),dt)+0.00001);
+   float h1=1.0/(dot(abs(s00-h22),dt)+0.00001);
+   float h2=1.0/(dot(abs(s02-h20),dt)+0.00001);
+   float h3=1.0/(dot(abs(h00-s22),dt)+0.00001);
+   float h4=1.0/(dot(abs(h02-s20),dt)+0.00001);
+   float v1=1.0/(dot(abs(s00-v22),dt)+0.00001);
+   float v2=1.0/(dot(abs(s02-v20),dt)+0.00001);
+   float v3=1.0/(dot(abs(v00-s22),dt)+0.00001);
+   float v4=1.0/(dot(abs(v02-s20),dt)+0.00001);
+
+   vec3 t1 = 0.5*(m1*(s00+s22)+m2*(s02+s20))/(m1+m2);
+   vec3 t2 = 0.5*(h1*(s00+h22)+h2*(s02+h20)+h3*(h00+s22)+h4*(h02+s20))/(h1+h2+h3+h4);
+   vec3 t3 = 0.5*(v1*(s00+v22)+v2*(s02+v20)+v3*(v00+s22)+v4*(v02+s20))/(v1+v2+v3+v4);
+
+   float k1 = 1.0/(dot(abs(t1-c11),dt)+0.00001);
+   float k2 = 1.0/(dot(abs(t2-c11),dt)+0.00001);
+   float k3 = 1.0/(dot(abs(t3-c11),dt)+0.00001);
+
+   FragColor =  vec4((k1*t1 + k2*t2 + k3*t3)/(k1+k2+k3),1.0);
+}
--- a/anti-aliasing/shaders/aa-shader-4.0-level2/aa-shader-4.0-level2-pass1.vert
+++ b/anti-aliasing/shaders/aa-shader-4.0-level2/aa-shader-4.0-level2-pass1.vert
@ -0,0 +1,49 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   float AAOFFSET;
+}params;
+
+#pragma parameterAAOFFSET¡1.00.252.00.05
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord * 1.000001;
+}
+
--- a/anti-aliasing/shaders/aa-shader-4.0-level2/aa-shader-4.0-level2-pass2.frag
+++ b/anti-aliasing/shaders/aa-shader-4.0-level2/aa-shader-4.0-level2-pass2.frag
@ -0,0 +1,65 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   float AAOFFSET2;
+}params;
+
+#pragma parameterAAOFFSET2¡0.50.252.00.05
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+uniform sampler2D Source;
+
+void main()
+{
+   vec2 texsize = params . SourceSize . xy;
+   float dx = pow(texsize . x, - 1.0)* params . AAOFFSET2;
+   float dy = pow(texsize . y, - 1.0)* params . AAOFFSET2;
+   vec3 dt = vec3(1.0, 1.0, 1.0);
+
+   vec2 UL = vTexCoord . xy + vec2(- dx, - dy);
+   vec2 UR = vTexCoord . xy + vec2(dx, - dy);
+   vec2 DL = vTexCoord . xy + vec2(- dx, dy);
+   vec2 DR = vTexCoord . xy + vec2(dx, dy);
+
+   vec3 c00 = texture(Source, UL). xyz;
+   vec3 c20 = texture(Source, UR). xyz;
+   vec3 c02 = texture(Source, DL). xyz;
+   vec3 c22 = texture(Source, DR). xyz;
+
+   float m1 = dot(abs(c00 - c22), dt)+ 0.001;
+   float m2 = dot(abs(c02 - c20), dt)+ 0.001;
+
+   FragColor = vec4((m1 *(c02 + c20)+ m2 *(c22 + c00))/(2.0 *(m1 + m2)), 1.0);
+}
--- a/anti-aliasing/shaders/aa-shader-4.0-level2/aa-shader-4.0-level2-pass2.gsh
+++ b/anti-aliasing/shaders/aa-shader-4.0-level2/aa-shader-4.0-level2-pass2.gsh
--- a/anti-aliasing/shaders/aa-shader-4.0-level2/aa-shader-4.0-level2-pass2.ppslang
+++ b/anti-aliasing/shaders/aa-shader-4.0-level2/aa-shader-4.0-level2-pass2.ppslang
@ -0,0 +1,74 @@
+#version 450
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+layout(push_constant)uniform Push
+{
+ vec4 SourceSize;
+ vec4 OriginalSize;
+ vec4 OutputSize;
+ uint FrameCount;
+ float AAOFFSET2;
+} params;
+
+#pragma parameterAAOFFSET2¡0.50.252.00.05
+
+layout(std140, set = 0, binding = 0)uniform UBO
+{
+ mat4 MVP;
+} global;
+
+#pragma stagevertex
+layout(location = 0)in vec4 Position;
+layout(location = 1)in vec2 TexCoord;
+layout(location = 0)out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord * 1.00001;
+}
+
+#pragma stagefragment
+layout(location = 0)in vec2 vTexCoord;
+layout(location = 0)out vec4 FragColor;
+layout(set = 0, binding = 2)uniform sampler2D Source;
+
+void main()
+{
+   vec2 texsize = params . SourceSize . xy;
+   float dx = pow(texsize . x, - 1.0)* params . AAOFFSET2;
+   float dy = pow(texsize . y, - 1.0)* params . AAOFFSET2;
+   vec3 dt = vec3(1.0, 1.0, 1.0);
+
+   vec2 UL = vTexCoord . xy + vec2(- dx, - dy);
+   vec2 UR = vTexCoord . xy + vec2(dx, - dy);
+   vec2 DL = vTexCoord . xy + vec2(- dx, dy);
+   vec2 DR = vTexCoord . xy + vec2(dx, dy);
+
+   vec3 c00 = texture(Source, UL). xyz;
+   vec3 c20 = texture(Source, UR). xyz;
+   vec3 c02 = texture(Source, DL). xyz;
+   vec3 c22 = texture(Source, DR). xyz;
+
+   float m1 = dot(abs(c00 - c22), dt)+ 0.001;
+   float m2 = dot(abs(c02 - c20), dt)+ 0.001;
+
+   FragColor = vec4((m1 *(c02 + c20)+ m2 *(c22 + c00))/(2.0 *(m1 + m2)), 1.0);
+}
--- a/anti-aliasing/shaders/aa-shader-4.0-level2/aa-shader-4.0-level2-pass2.slang
+++ b/anti-aliasing/shaders/aa-shader-4.0-level2/aa-shader-4.0-level2-pass2.slang
@ -0,0 +1,74 @@
+#version 450
+
+/*
+   Copyright (C) 2007 guest(r) - guest.r@gmail.com
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+*/
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+	float AAOFFSET2;
+} params;
+
+#pragma parameter AAOFFSET2 "AA offset second pass" 0.5 0.25 2.0 0.05 
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global.MVP * Position;
+   vTexCoord = TexCoord * 1.00001;
+}
+
+#pragma stage fragment
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+
+void main()
+{
+   vec2 texsize = params.SourceSize.xy;
+   float dx = pow(texsize.x, -1.0) * params.AAOFFSET2;
+   float dy = pow(texsize.y, -1.0) * params.AAOFFSET2;
+   vec3 dt = vec3(1.0, 1.0, 1.0);
+
+   vec2 UL = vTexCoord.xy + vec2(-dx,-dy);
+   vec2 UR = vTexCoord.xy + vec2(dx,-dy);
+   vec2 DL = vTexCoord.xy + vec2(-dx, dy);
+   vec2 DR = vTexCoord.xy + vec2(dx, dy);
+
+   vec3 c00 = texture(Source, UL).xyz;
+   vec3 c20 = texture(Source, UR).xyz;
+   vec3 c02 = texture(Source, DL).xyz;
+   vec3 c22 = texture(Source, DR).xyz;
+
+   float m1=dot(abs(c00-c22),dt)+0.001;
+   float m2=dot(abs(c02-c20),dt)+0.001;
+
+   FragColor = vec4((m1*(c02+c20)+m2*(c22+c00))/(2.0*(m1+m2)),1.0);
+}
--- a/anti-aliasing/shaders/aa-shader-4.0-level2/aa-shader-4.0-level2-pass2.vert
+++ b/anti-aliasing/shaders/aa-shader-4.0-level2/aa-shader-4.0-level2-pass2.vert
@ -0,0 +1,49 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   float AAOFFSET2;
+}params;
+
+#pragma parameterAAOFFSET2¡0.50.252.00.05
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord * 1.00001;
+}
+
--- a/anti-aliasing/shaders/aa-shader-4.0.frag
+++ b/anti-aliasing/shaders/aa-shader-4.0.frag
@ -0,0 +1,93 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   float INTERNAL_RES;
+}params;
+
+#pragma parameterINTERNAL_RES¡1.01.08.01.0
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+vec3 dt = vec3(1.0, 1.0, 1.0);
+
+vec3 texture2d_(sampler2D tex, vec2 coord, vec4 yx){
+
+ vec3 s00 = texture(tex, coord + yx . zw). xyz;
+ vec3 s20 = texture(tex, coord + yx . xw). xyz;
+ vec3 s22 = texture(tex, coord + yx . xy). xyz;
+ vec3 s02 = texture(tex, coord + yx . zy). xyz;
+
+ float m1 = dot(abs(s00 - s22), dt)+ 0.001;
+ float m2 = dot(abs(s02 - s20), dt)+ 0.001;
+
+ return 0.5 *(m2 *(s00 + s22)+ m1 *(s02 + s20))/(m1 + m2);
+}
+
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+uniform sampler2D Source;
+
+void main()
+{
+
+ vec2 size = 4.0 * params . SourceSize . xy / params . INTERNAL_RES;
+
+ vec2 inv_size = 1.0 / size;
+
+ vec4 yx = vec4(inv_size, - inv_size);
+
+ vec2 OGL2Pos = vTexCoord * size;
+
+ vec2 fp = fract(OGL2Pos);
+ vec2 dx = vec2(inv_size . x, 0.0);
+ vec2 dy = vec2(0.0, inv_size . y);
+ vec2 g1 = vec2(inv_size . x, inv_size . y);
+ vec2 g2 = vec2(- inv_size . x, inv_size . y);
+
+ vec2 pC4 = floor(OGL2Pos)* inv_size;
+
+
+ vec3 C1 = texture2d_(Source, pC4 - dy, yx);
+ vec3 C0 = texture2d_(Source, pC4 - g1, yx);
+ vec3 C2 = texture2d_(Source, pC4 - g2, yx);
+ vec3 C3 = texture2d_(Source, pC4 - dx, yx);
+ vec3 C4 = texture2d_(Source, pC4, yx);
+ vec3 C5 = texture2d_(Source, pC4 + dx, yx);
+ vec3 C6 = texture2d_(Source, pC4 + g2, yx);
+ vec3 C7 = texture2d_(Source, pC4 + dy, yx);
+ vec3 C8 = texture2d_(Source, pC4 + g1, yx);
+
+ vec3 ul, ur, dl, dr;
+ float m1, m2;
+
+ m1 = dot(abs(C0 - C4), dt)+ 0.001;
+ m2 = dot(abs(C1 - C3), dt)+ 0.001;
+ ul =(m2 *(C0 + C4)+ m1 *(C1 + C3))/(m1 + m2);
+
+ m1 = dot(abs(C1 - C5), dt)+ 0.001;
+ m2 = dot(abs(C2 - C4), dt)+ 0.001;
+ ur =(m2 *(C1 + C5)+ m1 *(C2 + C4))/(m1 + m2);
+
+ m1 = dot(abs(C3 - C7), dt)+ 0.001;
+ m2 = dot(abs(C6 - C4), dt)+ 0.001;
+ dl =(m2 *(C3 + C7)+ m1 *(C6 + C4))/(m1 + m2);
+
+ m1 = dot(abs(C4 - C8), dt)+ 0.001;
+ m2 = dot(abs(C5 - C7), dt)+ 0.001;
+ dr =(m2 *(C4 + C8)+ m1 *(C5 + C7))/(m1 + m2);
+
+ vec3 c11 = 0.5 *((dr * fp . x + dl *(1 - fp . x))* fp . y +(ur * fp . x + ul *(1 - fp . x))*(1 - fp . y));
+
+   FragColor = vec4(c11, 1.0);
+}
--- a/anti-aliasing/shaders/aa-shader-4.0.gsh
+++ b/anti-aliasing/shaders/aa-shader-4.0.gsh
--- a/anti-aliasing/shaders/aa-shader-4.0.ppslang
+++ b/anti-aliasing/shaders/aa-shader-4.0.ppslang
@ -0,0 +1,102 @@
+#version 450
+
+layout(push_constant)uniform Push
+{
+ vec4 SourceSize;
+ vec4 OriginalSize;
+ vec4 OutputSize;
+ uint FrameCount;
+ float INTERNAL_RES;
+} params;
+
+#pragma parameterINTERNAL_RES¡1.01.08.01.0
+
+layout(std140, set = 0, binding = 0)uniform UBO
+{
+ mat4 MVP;
+} global;
+
+const vec3 dt = vec3(1.0, 1.0, 1.0);
+
+vec3 texture2d_(sampler2D tex, vec2 coord, vec4 yx){
+
+ vec3 s00 = texture(tex, coord + yx . zw). xyz;
+ vec3 s20 = texture(tex, coord + yx . xw). xyz;
+ vec3 s22 = texture(tex, coord + yx . xy). xyz;
+ vec3 s02 = texture(tex, coord + yx . zy). xyz;
+
+ float m1 = dot(abs(s00 - s22), dt)+ 0.001;
+ float m2 = dot(abs(s02 - s20), dt)+ 0.001;
+
+ return 0.5 *(m2 *(s00 + s22)+ m1 *(s02 + s20))/(m1 + m2);
+}
+
+#pragma stagevertex
+layout(location = 0)in vec4 Position;
+layout(location = 1)in vec2 TexCoord;
+layout(location = 0)out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stagefragment
+layout(location = 0)in vec2 vTexCoord;
+layout(location = 0)out vec4 FragColor;
+layout(set = 0, binding = 2)uniform sampler2D Source;
+
+void main()
+{
+
+ vec2 size = 4.0 * params . SourceSize . xy / params . INTERNAL_RES;
+
+ vec2 inv_size = 1.0 / size;
+
+ vec4 yx = vec4(inv_size, - inv_size);
+
+ vec2 OGL2Pos = vTexCoord * size;
+
+ vec2 fp = fract(OGL2Pos);
+ vec2 dx = vec2(inv_size . x, 0.0);
+ vec2 dy = vec2(0.0, inv_size . y);
+ vec2 g1 = vec2(inv_size . x, inv_size . y);
+ vec2 g2 = vec2(- inv_size . x, inv_size . y);
+
+ vec2 pC4 = floor(OGL2Pos)* inv_size;
+
+
+ vec3 C1 = texture2d_(Source, pC4 - dy, yx);
+ vec3 C0 = texture2d_(Source, pC4 - g1, yx);
+ vec3 C2 = texture2d_(Source, pC4 - g2, yx);
+ vec3 C3 = texture2d_(Source, pC4 - dx, yx);
+ vec3 C4 = texture2d_(Source, pC4, yx);
+ vec3 C5 = texture2d_(Source, pC4 + dx, yx);
+ vec3 C6 = texture2d_(Source, pC4 + g2, yx);
+ vec3 C7 = texture2d_(Source, pC4 + dy, yx);
+ vec3 C8 = texture2d_(Source, pC4 + g1, yx);
+
+ vec3 ul, ur, dl, dr;
+ float m1, m2;
+
+ m1 = dot(abs(C0 - C4), dt)+ 0.001;
+ m2 = dot(abs(C1 - C3), dt)+ 0.001;
+ ul =(m2 *(C0 + C4)+ m1 *(C1 + C3))/(m1 + m2);
+
+ m1 = dot(abs(C1 - C5), dt)+ 0.001;
+ m2 = dot(abs(C2 - C4), dt)+ 0.001;
+ ur =(m2 *(C1 + C5)+ m1 *(C2 + C4))/(m1 + m2);
+
+ m1 = dot(abs(C3 - C7), dt)+ 0.001;
+ m2 = dot(abs(C6 - C4), dt)+ 0.001;
+ dl =(m2 *(C3 + C7)+ m1 *(C6 + C4))/(m1 + m2);
+
+ m1 = dot(abs(C4 - C8), dt)+ 0.001;
+ m2 = dot(abs(C5 - C7), dt)+ 0.001;
+ dr =(m2 *(C4 + C8)+ m1 *(C5 + C7))/(m1 + m2);
+
+ vec3 c11 = 0.5 *((dr * fp . x + dl *(1 - fp . x))* fp . y +(ur * fp . x + ul *(1 - fp . x))*(1 - fp . y));
+
+   FragColor = vec4(c11, 1.0);
+}
--- a/anti-aliasing/shaders/aa-shader-4.0.slang
+++ b/anti-aliasing/shaders/aa-shader-4.0.slang
@ -0,0 +1,102 @@
+#version 450
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+	float INTERNAL_RES;
+} params;
+
+#pragma parameter INTERNAL_RES "Internal Resolution" 1.0 1.0 8.0 1.0
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+const vec3 dt = vec3(1.0,1.0,1.0);
+
+vec3 texture2d_(sampler2D tex, vec2 coord, vec4 yx) {
+
+	vec3 s00 = texture(tex, coord + yx.zw).xyz; 
+	vec3 s20 = texture(tex, coord + yx.xw).xyz; 
+	vec3 s22 = texture(tex, coord + yx.xy).xyz; 
+	vec3 s02 = texture(tex, coord + yx.zy).xyz; 
+
+	float m1=dot(abs(s00-s22),dt)+0.001;
+	float m2=dot(abs(s02-s20),dt)+0.001;
+
+	return 0.5*(m2*(s00+s22)+m1*(s02+s20))/(m1+m2);
+}
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global.MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stage fragment
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+
+void main()
+{
+	// Calculating texel coordinates
+	vec2 size     = 4.0 * params.SourceSize.xy / params.INTERNAL_RES;
+//	vec2 size     = X * (params.OutputSize.xy * params.SourceSize.zw) * params.SourceSize.xy;
+	vec2 inv_size = 1.0 / size;
+
+	vec4 yx = vec4(inv_size, -inv_size);
+	
+	vec2 OGL2Pos = vTexCoord * size;
+
+	vec2 fp = fract(OGL2Pos);
+	vec2 dx = vec2(inv_size.x,0.0);
+	vec2 dy = vec2(0.0, inv_size.y);
+	vec2 g1 = vec2(inv_size.x,inv_size.y);
+	vec2 g2 = vec2(-inv_size.x,inv_size.y);
+	
+	vec2 pC4 = floor(OGL2Pos) * inv_size;	
+	
+	// Reading the texels
+	vec3 C1 = texture2d_(Source, pC4 - dy, yx);
+	vec3 C0 = texture2d_(Source, pC4 - g1, yx); 
+	vec3 C2 = texture2d_(Source, pC4 - g2, yx);
+	vec3 C3 = texture2d_(Source, pC4 - dx, yx);
+	vec3 C4 = texture2d_(Source, pC4     , yx);
+	vec3 C5 = texture2d_(Source, pC4 + dx, yx);
+	vec3 C6 = texture2d_(Source, pC4 + g2, yx);
+	vec3 C7 = texture2d_(Source, pC4 + dy, yx);
+	vec3 C8 = texture2d_(Source, pC4 + g1, yx);
+	
+	vec3 ul, ur, dl, dr;
+	float m1, m2;
+	
+	m1 = dot(abs(C0-C4),dt)+0.001;
+	m2 = dot(abs(C1-C3),dt)+0.001;
+	ul = (m2*(C0+C4)+m1*(C1+C3))/(m1+m2);  
+	
+	m1 = dot(abs(C1-C5),dt)+0.001;
+	m2 = dot(abs(C2-C4),dt)+0.001;
+	ur = (m2*(C1+C5)+m1*(C2+C4))/(m1+m2);
+	
+	m1 = dot(abs(C3-C7),dt)+0.001;
+	m2 = dot(abs(C6-C4),dt)+0.001;
+	dl = (m2*(C3+C7)+m1*(C6+C4))/(m1+m2);
+	
+	m1 = dot(abs(C4-C8),dt)+0.001;
+	m2 = dot(abs(C5-C7),dt)+0.001;
+	dr = (m2*(C4+C8)+m1*(C5+C7))/(m1+m2);
+	
+	vec3 c11 = 0.5*((dr*fp.x+dl*(1-fp.x))*fp.y+(ur*fp.x+ul*(1-fp.x))*(1-fp.y) );
+	
+   FragColor = vec4(c11, 1.0);
+}
--- a/anti-aliasing/shaders/aa-shader-4.0.vert
+++ b/anti-aliasing/shaders/aa-shader-4.0.vert
@ -0,0 +1,46 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   float INTERNAL_RES;
+}params;
+
+#pragma parameterINTERNAL_RES¡1.01.08.01.0
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+vec3 dt = vec3(1.0, 1.0, 1.0);
+
+vec3 texture2d_(sampler2D tex, vec2 coord, vec4 yx){
+
+ vec3 s00 = texture(tex, coord + yx . zw). xyz;
+ vec3 s20 = texture(tex, coord + yx . xw). xyz;
+ vec3 s22 = texture(tex, coord + yx . xy). xyz;
+ vec3 s02 = texture(tex, coord + yx . zy). xyz;
+
+ float m1 = dot(abs(s00 - s22), dt)+ 0.001;
+ float m2 = dot(abs(s02 - s20), dt)+ 0.001;
+
+ return 0.5 *(m2 *(s00 + s22)+ m1 *(s02 + s20))/(m1 + m2);
+}
+
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord;
+}
+
--- a/anti-aliasing/shaders/advanced-aa.frag
+++ b/anti-aliasing/shaders/advanced-aa.frag
@ -0,0 +1,88 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   float AA_RESOLUTION_X;
+   float AA_RESOLUTION_Y;
+}params;
+
+#pragma parameterAA_RESOLUTION_X¡0.00.01920.01.0
+#pragma parameterAA_RESOLUTION_Y¡0.00.01920.01.0
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+vec3 dt = vec3(1, 1, 1);
+
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 1) in vec4 t1;
+layout(location = 2) in vec4 t2;
+layout(location = 3) in vec4 t3;
+layout(location = 4) in vec4 t4;
+layout(location = 0) out vec4 FragColor;
+uniform sampler2D Source;
+
+void main()
+{
+   vec3 c00 = texture(Source, t1 . zw). xyz;
+   vec3 c10 = texture(Source, t3 . xy). xyz;
+   vec3 c20 = texture(Source, t3 . zw). xyz;
+   vec3 c01 = texture(Source, t1 . xy). xyz;
+   vec3 c11 = texture(Source, vTexCoord). xyz;
+   vec3 c21 = texture(Source, t2 . xy). xyz;
+   vec3 c02 = texture(Source, t2 . zw). xyz;
+   vec3 c12 = texture(Source, t4 . xy). xyz;
+   vec3 c22 = texture(Source, t4 . zw). xyz;
+
+   float d1 = dot(abs(c00 - c22), dt)+ 0.0001;
+   float d2 = dot(abs(c20 - c02), dt)+ 0.0001;
+   float hl = dot(abs(c01 - c21), dt)+ 0.0001;
+   float vl = dot(abs(c10 - c12), dt)+ 0.0001;
+
+   float k1 = 0.5 *(hl + vl);
+   float k2 = 0.5 *(d1 + d2);
+
+   vec3 t1 =(hl *(c10 + c12)+ vl *(c01 + c21)+ k1 * c11)/(2.5 *(hl + vl));
+   vec3 t2 =(d1 *(c20 + c02)+ d2 *(c00 + c22)+ k2 * c11)/(2.5 *(d1 + d2));
+
+   k1 = dot(abs(t1 - c11), dt)+ 0.0001;
+   k2 = dot(abs(t2 - c11), dt)+ 0.0001;
+
+   FragColor = vec4((k1 * t2 + k2 * t1)/(k1 + k2), 1);
+}
--- a/anti-aliasing/shaders/advanced-aa.gsh
+++ b/anti-aliasing/shaders/advanced-aa.gsh
--- a/anti-aliasing/shaders/advanced-aa.ppslang
+++ b/anti-aliasing/shaders/advanced-aa.ppslang
@ -0,0 +1,113 @@
+#version 450
+
+layout(push_constant)uniform Push
+{
+ vec4 SourceSize;
+ vec4 OriginalSize;
+ vec4 OutputSize;
+ uint FrameCount;
+ float AA_RESOLUTION_X;
+ float AA_RESOLUTION_Y;
+} params;
+
+#pragma parameterAA_RESOLUTION_X¡0.00.01920.01.0
+#pragma parameterAA_RESOLUTION_Y¡0.00.01920.01.0
+
+layout(std140, set = 0, binding = 0)uniform UBO
+{
+ mat4 MVP;
+} global;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+vec3 dt = vec3(1, 1, 1);
+
+#pragma stagevertex
+layout(location = 0)in vec4 Position;
+layout(location = 1)in vec2 TexCoord;
+layout(location = 0)out vec2 vTexCoord;
+layout(location = 1)out vec4 t1;
+layout(location = 2)out vec4 t2;
+layout(location = 3)out vec4 t3;
+layout(location = 4)out vec4 t4;
+
+void main()
+{
+ gl_Position = global . MVP * Position;
+ vTexCoord = TexCoord;
+    vec2 ps = vec2(1.0 /((params . AA_RESOLUTION_X == 0)? params . SourceSize . x : params . AA_RESOLUTION_X), 1.0 /((params . AA_RESOLUTION_Y == 0)? params . SourceSize . y : params . AA_RESOLUTION_Y));
+ float dx = ps . x * 0.5;
+ float dy = ps . y * 0.5;
+
+ t1 . xy = vTexCoord + vec2(- dx, 0);
+ t2 . xy = vTexCoord + vec2(dx, 0);
+ t3 . xy = vTexCoord + vec2(0, - dy);
+ t4 . xy = vTexCoord + vec2(0, dy);
+ t1 . zw = vTexCoord + vec2(- dx, - dy);
+ t2 . zw = vTexCoord + vec2(- dx, dy);
+ t3 . zw = vTexCoord + vec2(dx, - dy);
+ t4 . zw = vTexCoord + vec2(dx, dy);
+}
+
+#pragma stagefragment
+layout(location = 0)in vec2 vTexCoord;
+layout(location = 1)in vec4 t1;
+layout(location = 2)in vec4 t2;
+layout(location = 3)in vec4 t3;
+layout(location = 4)in vec4 t4;
+layout(location = 0)out vec4 FragColor;
+layout(set = 0, binding = 2)uniform sampler2D Source;
+
+void main()
+{
+   vec3 c00 = texture(Source, t1 . zw). xyz;
+   vec3 c10 = texture(Source, t3 . xy). xyz;
+   vec3 c20 = texture(Source, t3 . zw). xyz;
+   vec3 c01 = texture(Source, t1 . xy). xyz;
+   vec3 c11 = texture(Source, vTexCoord). xyz;
+   vec3 c21 = texture(Source, t2 . xy). xyz;
+   vec3 c02 = texture(Source, t2 . zw). xyz;
+   vec3 c12 = texture(Source, t4 . xy). xyz;
+   vec3 c22 = texture(Source, t4 . zw). xyz;
+
+   float d1 = dot(abs(c00 - c22), dt)+ 0.0001;
+   float d2 = dot(abs(c20 - c02), dt)+ 0.0001;
+   float hl = dot(abs(c01 - c21), dt)+ 0.0001;
+   float vl = dot(abs(c10 - c12), dt)+ 0.0001;
+
+   float k1 = 0.5 *(hl + vl);
+   float k2 = 0.5 *(d1 + d2);
+
+   vec3 t1 =(hl *(c10 + c12)+ vl *(c01 + c21)+ k1 * c11)/(2.5 *(hl + vl));
+   vec3 t2 =(d1 *(c20 + c02)+ d2 *(c00 + c22)+ k2 * c11)/(2.5 *(d1 + d2));
+
+   k1 = dot(abs(t1 - c11), dt)+ 0.0001;
+   k2 = dot(abs(t2 - c11), dt)+ 0.0001;
+
+   FragColor = vec4((k1 * t2 + k2 * t1)/(k1 + k2), 1);
+}
--- a/anti-aliasing/shaders/advanced-aa.slang
+++ b/anti-aliasing/shaders/advanced-aa.slang
@ -0,0 +1,113 @@
+#version 450
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+	float AA_RESOLUTION_X;
+	float AA_RESOLUTION_Y;
+} params;
+
+#pragma parameter AA_RESOLUTION_X "AA Input Res X" 0.0 0.0 1920.0 1.0
+#pragma parameter AA_RESOLUTION_Y "AA Input Res Y" 0.0 0.0 1920.0 1.0
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+/*
+   Copyright (C) 2006 guest(r) - guest.r@gmail.com
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+*/
+
+/*
+   The AdvancedAA shader is well used to:
+   - AA 2xscaled gfx. to its 4x absolute size,   
+   - AA hi-res "screens" (640x480) to their 2x size or,
+   - AA gfx. back to it's original size (looks nice above 640x480, set scaling to 1.0) 
+*/
+
+#define AA_RESOLUTION_X_DEF params.SourceSize.x
+#define AA_RESOLUTION_Y_DEF params.SourceSize.y
+vec3  dt = vec3(1,1,1);
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+layout(location = 1) out vec4 t1;
+layout(location = 2) out vec4 t2;
+layout(location = 3) out vec4 t3;
+layout(location = 4) out vec4 t4;
+
+void main()
+{
+	gl_Position = global.MVP * Position;
+	vTexCoord = TexCoord;
+   	vec2 ps = vec2(1.0/((params.AA_RESOLUTION_X == 0) ? AA_RESOLUTION_X_DEF : params.AA_RESOLUTION_X), 1.0/((params.AA_RESOLUTION_Y == 0) ? AA_RESOLUTION_Y_DEF : params.AA_RESOLUTION_Y));
+	float dx = ps.x*0.5;
+	float dy = ps.y*0.5;
+	
+	t1.xy = vTexCoord + vec2(-dx, 0);
+	t2.xy = vTexCoord + vec2( dx, 0);
+	t3.xy = vTexCoord + vec2( 0,-dy);
+	t4.xy = vTexCoord + vec2( 0, dy);
+	t1.zw = vTexCoord + vec2(-dx,-dy);
+	t2.zw = vTexCoord + vec2(-dx, dy);
+	t3.zw = vTexCoord + vec2( dx,-dy);
+	t4.zw = vTexCoord + vec2( dx, dy);
+}
+
+#pragma stage fragment
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 1) in vec4 t1;
+layout(location = 2) in vec4 t2;
+layout(location = 3) in vec4 t3;
+layout(location = 4) in vec4 t4;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+
+void main()
+{
+   vec3 c00 = texture(Source, t1.zw).xyz; 
+   vec3 c10 = texture(Source, t3.xy).xyz;
+   vec3 c20 = texture(Source, t3.zw).xyz;
+   vec3 c01 = texture(Source, t1.xy).xyz;
+   vec3 c11 = texture(Source, vTexCoord).xyz;
+   vec3 c21 = texture(Source, t2.xy).xyz;
+   vec3 c02 = texture(Source, t2.zw).xyz;
+   vec3 c12 = texture(Source, t4.xy).xyz;
+   vec3 c22 = texture(Source, t4.zw).xyz;
+
+   float d1=dot(abs(c00-c22),dt)+0.0001;
+   float d2=dot(abs(c20-c02),dt)+0.0001;
+   float hl=dot(abs(c01-c21),dt)+0.0001;
+   float vl=dot(abs(c10-c12),dt)+0.0001;
+   
+   float k1=0.5*(hl+vl);
+   float k2=0.5*(d1+d2);
+
+   vec3 t1=(hl*(c10+c12)+vl*(c01+c21)+k1*c11)/(2.5*(hl+vl));
+   vec3 t2=(d1*(c20+c02)+d2*(c00+c22)+k2*c11)/(2.5*(d1+d2));
+
+   k1=dot(abs(t1-c11),dt)+0.0001;
+   k2=dot(abs(t2-c11),dt)+0.0001;
+   
+   FragColor = vec4((k1*t2+k2*t1)/(k1+k2),1);
+}
--- a/anti-aliasing/shaders/advanced-aa.vert
+++ b/anti-aliasing/shaders/advanced-aa.vert
@ -0,0 +1,78 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   float AA_RESOLUTION_X;
+   float AA_RESOLUTION_Y;
+}params;
+
+#pragma parameterAA_RESOLUTION_X¡0.00.01920.01.0
+#pragma parameterAA_RESOLUTION_Y¡0.00.01920.01.0
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+vec3 dt = vec3(1, 1, 1);
+
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+layout(location = 1) out vec4 t1;
+layout(location = 2) out vec4 t2;
+layout(location = 3) out vec4 t3;
+layout(location = 4) out vec4 t4;
+
+void main()
+{
+ gl_Position = global . MVP * Position;
+ vTexCoord = TexCoord;
+    vec2 ps = vec2(1.0 /((params . AA_RESOLUTION_X == 0)? params . SourceSize . x : params . AA_RESOLUTION_X), 1.0 /((params . AA_RESOLUTION_Y == 0)? params . SourceSize . y : params . AA_RESOLUTION_Y));
+ float dx = ps . x * 0.5;
+ float dy = ps . y * 0.5;
+
+ t1 . xy = vTexCoord + vec2(- dx, 0);
+ t2 . xy = vTexCoord + vec2(dx, 0);
+ t3 . xy = vTexCoord + vec2(0, - dy);
+ t4 . xy = vTexCoord + vec2(0, dy);
+ t1 . zw = vTexCoord + vec2(- dx, - dy);
+ t2 . zw = vTexCoord + vec2(- dx, dy);
+ t3 . zw = vTexCoord + vec2(dx, - dy);
+ t4 . zw = vTexCoord + vec2(dx, dy);
+}
+
--- a/anti-aliasing/shaders/ewa_curvature.frag
+++ b/anti-aliasing/shaders/ewa_curvature.frag
@ -0,0 +1,188 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   float distortion;
+}params;
+
+#pragma parameterdistortion¡0.150.01.00.01
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+uniform sampler2D Source;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+float boxFilter(float r2){
+    return 1.0;
+}
+
+float gaussFilter(float r2){
+    float alpha = 1.0;
+    return exp(- alpha * r2);
+}
+
+float triFilter(float r2){
+    float alpha = 1.0;
+    float r = sqrt(r2);
+    return max(0, 1. - r);
+}
+
+float sinc(float x){
+    return sin(3.14159265358979323846 * x)/(3.14159265358979323846 * x);
+}
+
+float lanczosFilter(float r2){
+    if(r2 == 0)
+        return 1.;
+    float r = sqrt(r2);
+    return sinc(r)* sinc(r / 1.3);
+}
+
+
+float crFilter(float r2){
+    float r = sqrt(r2);
+    return(r >= 2.)? .0 :(r < 1.)?(3. * r * r2 - 5. * r2 + 2.):(- r * r2 + 5. * r2 - 8 * r + 4.);
+}
+
+float quadraticFilter(float r2){
+    float a = 1.0;
+    return 1.0 - r2 /(a * a);
+}
+
+float cubicFilter(float r2){
+    float a = 1.0;
+    float r = sqrt(r2);
+    return 1.0 - 3 * r2 /(a * a)+ 2 * r * r2 /(a * a * a);
+}
+
+
+
+
+
+
+
+
+
+vec4 ewaFilter(sampler2D Source, vec2 p0, vec2 du, vec2 dv, int scale){
+
+    vec4 foo = texture(Source, p0);
+
+
+    if(scale < 2)
+        return foo;
+
+    p0 -= vec2(0.5, 0.5)/ scale;
+    vec2 p = scale * p0;
+
+    float ux = 0.8 * du . s * scale;
+    float vx = 0.8 * du . t * scale;
+    float uy = 0.8 * dv . s * scale;
+    float vy = 0.8 * dv . t * scale;
+
+
+
+    float A = vx * vx + vy * vy + 1;
+    float B = - 2 *(ux * vx + uy * vy);
+    float C = ux * ux + uy * uy + 1;
+    float F = A * C - B * B / 4.;
+
+
+    float bbox_du = 2. /(- B * B + 4.0 * C * A)* sqrt((- B * B + 4.0 * C * A)* C * F);
+    float bbox_dv = 2. /(- B * B + 4.0 * C * A)* sqrt(A *(- B * B + 4.0 * C * A)* F);
+
+
+    int u0 = int(floor(p . s - bbox_du));
+    int u1 = int(ceil(p . s + bbox_du));
+    int v0 = int(floor(p . t - bbox_dv));
+    int v1 = int(ceil(p . t + bbox_dv));
+
+
+
+
+
+    vec4 num = vec4(0., 0., 0., 1.);
+    float den = 0;
+    float ddq = 2 * A;
+    float U = u0 - p . s;
+
+    for(int v = v0;v <= v1;++ v){
+        float V = v - p . t;
+        float dq = A *(2 * U + 1)+ B * V;
+        float q =(C * V + B * U)* V + A * U * U;
+
+        for(int u = u0;u <= u1;++ u){
+            if(q < F)
+            {
+                float r2 = q / F;
+                float weight = gaussFilter(r2);
+
+                num += weight * texture(Source, vec2(u + 0.5, v + 0.5)/ scale);
+                den += weight;
+            }
+            q += dq;
+            dq += ddq;
+        }
+
+    }
+
+
+    vec4 color = num *(1. / den);
+    return color;
+}
+
+vec4 texture2DEWA(sampler2D tex, vec2 coords){
+
+    vec2 du = dFdx(coords);
+    vec2 dv = dFdy(coords);
+
+    int scale = textureSize(tex, 0). x;
+
+    return ewaFilter(tex, coords, du, dv, scale);
+
+}
+
+vec2 radialDistortion(vec2 coord){
+  vec2 cc = coord - vec2(0.5);
+  float dist = dot(cc, cc)* params . distortion;
+  return coord + cc *(1.0 - dist)* dist;
+}
+
+void main()
+{
+   FragColor = texture2DEWA(Source, radialDistortion(vTexCoord));
+}
--- a/anti-aliasing/shaders/ewa_curvature.gsh
+++ b/anti-aliasing/shaders/ewa_curvature.gsh
--- a/anti-aliasing/shaders/ewa_curvature.ppslang
+++ b/anti-aliasing/shaders/ewa_curvature.ppslang
@ -0,0 +1,197 @@
+#version 450
+
+
+
+
+
+
+
+
+layout(push_constant)uniform Push
+{
+ vec4 SourceSize;
+ vec4 OriginalSize;
+ vec4 OutputSize;
+ uint FrameCount;
+ float distortion;
+} params;
+
+#pragma parameterdistortion¡0.150.01.00.01
+
+layout(std140, set = 0, binding = 0)uniform UBO
+{
+ mat4 MVP;
+} global;
+
+#pragma stagevertex
+layout(location = 0)in vec4 Position;
+layout(location = 1)in vec2 TexCoord;
+layout(location = 0)out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stagefragment
+layout(location = 0)in vec2 vTexCoord;
+layout(location = 0)out vec4 FragColor;
+layout(set = 0, binding = 2)uniform sampler2D Source;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+float boxFilter(float r2){
+    return 1.0;
+}
+
+float gaussFilter(float r2){
+    float alpha = 1.0;
+    return exp(- alpha * r2);
+}
+
+float triFilter(float r2){
+    float alpha = 1.0;
+    float r = sqrt(r2);
+    return max(0, 1. - r);
+}
+
+float sinc(float x){
+    return sin(3.14159265358979323846 * x)/(3.14159265358979323846 * x);
+}
+
+float lanczosFilter(float r2){
+    if(r2 == 0)
+        return 1.;
+    float r = sqrt(r2);
+    return sinc(r)* sinc(r / 1.3);
+}
+
+
+float crFilter(float r2){
+    float r = sqrt(r2);
+    return(r >= 2.)? .0 :(r < 1.)?(3. * r * r2 - 5. * r2 + 2.):(- r * r2 + 5. * r2 - 8 * r + 4.);
+}
+
+float quadraticFilter(float r2){
+    float a = 1.0;
+    return 1.0 - r2 /(a * a);
+}
+
+float cubicFilter(float r2){
+    float a = 1.0;
+    float r = sqrt(r2);
+    return 1.0 - 3 * r2 /(a * a)+ 2 * r * r2 /(a * a * a);
+}
+
+
+
+
+
+
+
+
+
+vec4 ewaFilter(sampler2D Source, vec2 p0, vec2 du, vec2 dv, int scale){
+
+    vec4 foo = texture(Source, p0);
+
+
+    if(scale < 2)
+        return foo;
+
+    p0 -= vec2(0.5, 0.5)/ scale;
+    vec2 p = scale * p0;
+
+    float ux = 0.8 * du . s * scale;
+    float vx = 0.8 * du . t * scale;
+    float uy = 0.8 * dv . s * scale;
+    float vy = 0.8 * dv . t * scale;
+
+
+
+    float A = vx * vx + vy * vy + 1;
+    float B = - 2 *(ux * vx + uy * vy);
+    float C = ux * ux + uy * uy + 1;
+    float F = A * C - B * B / 4.;
+
+
+    float bbox_du = 2. /(- B * B + 4.0 * C * A)* sqrt((- B * B + 4.0 * C * A)* C * F);
+    float bbox_dv = 2. /(- B * B + 4.0 * C * A)* sqrt(A *(- B * B + 4.0 * C * A)* F);
+
+
+    int u0 = int(floor(p . s - bbox_du));
+    int u1 = int(ceil(p . s + bbox_du));
+    int v0 = int(floor(p . t - bbox_dv));
+    int v1 = int(ceil(p . t + bbox_dv));
+
+
+
+
+
+    vec4 num = vec4(0., 0., 0., 1.);
+    float den = 0;
+    float ddq = 2 * A;
+    float U = u0 - p . s;
+
+    for(int v = v0;v <= v1;++ v){
+        float V = v - p . t;
+        float dq = A *(2 * U + 1)+ B * V;
+        float q =(C * V + B * U)* V + A * U * U;
+
+        for(int u = u0;u <= u1;++ u){
+            if(q < F)
+            {
+                float r2 = q / F;
+                float weight = gaussFilter(r2);
+
+                num += weight * texture(Source, vec2(u + 0.5, v + 0.5)/ scale);
+                den += weight;
+            }
+            q += dq;
+            dq += ddq;
+        }
+
+    }
+
+
+    vec4 color = num *(1. / den);
+    return color;
+}
+
+vec4 texture2DEWA(sampler2D tex, vec2 coords){
+
+    vec2 du = dFdx(coords);
+    vec2 dv = dFdy(coords);
+
+    int scale = textureSize(tex, 0). x;
+
+    return ewaFilter(tex, coords, du, dv, scale);
+
+}
+
+vec2 radialDistortion(vec2 coord){
+  vec2 cc = coord - vec2(0.5);
+  float dist = dot(cc, cc)* params . distortion;
+  return coord + cc *(1.0 - dist)* dist;
+}
+
+void main()
+{
+   FragColor = texture2DEWA(Source, radialDistortion(vTexCoord));
+}
--- a/anti-aliasing/shaders/ewa_curvature.slang
+++ b/anti-aliasing/shaders/ewa_curvature.slang
@ -0,0 +1,197 @@
+#version 450
+
+/**
+* Practical Elliptical Texture Filtering on the GPU
+* Copyright 2010-2011 Pavlos Mavridis, All rights reserved.
+*
+* Version: 0.6 - 12 / 7 / 2011 (DD/MM/YY)
+*/
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+	float distortion;
+} params;
+
+#pragma parameter distortion "EWA Curvature" 0.15 0.0 1.0 0.01
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global.MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stage fragment
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+
+//{========= TEXTURE FILTERING (EWA) PARAMETERS =========
+#define MAX_ECCENTRICITY 1
+#define FILTER_WIDTH 0.8
+#define FILTER_SHARPNESS 1.0
+#define TEXELS_PER_PIXEL 1.0
+#define TEXEL_LIMIT 32
+#define FILTER_FUNC gaussFilter
+//}======================================================
+
+#define M_PI 3.14159265358979323846
+
+#define SourceImage Source
+
+//{========================= FILTER FUNCTIONS =======================
+// We only use the Gaussian filter function. The other filters give
+// very similar results.
+ 
+float boxFilter(float r2){
+    return 1.0;
+}
+ 
+float gaussFilter(float r2){
+    float alpha = FILTER_SHARPNESS;
+    return exp(-alpha * r2);
+}
+ 
+float triFilter(float r2){
+    float alpha = FILTER_SHARPNESS;
+    float r= sqrt(r2);
+    return max(0, 1.-r);///alpha);
+}
+ 
+float sinc(float x){
+    return sin(M_PI*x)/(M_PI*x);
+}
+ 
+float lanczosFilter(float r2){
+    if (r2==0)
+        return 1.;
+    float r= sqrt(r2);
+    return sinc(r)*sinc(r/1.3);
+}
+ 
+//catmull-rom filter
+float crFilter(float r2){
+    float r = sqrt(r2);
+    return (r>=2.)?.0:(r<1.)?(3.*r*r2-5.*r2+2.):(-r*r2+5.*r2-8*r+4.);
+}
+ 
+float quadraticFilter(float r2){
+    float a = FILTER_SHARPNESS;
+    return 1.0 - r2/(a*a);
+}
+ 
+float cubicFilter(float r2){
+    float a = FILTER_SHARPNESS;
+    float r = sqrt(r2);
+    return 1.0 - 3*r2/(a*a) + 2*r*r2/(a*a*a);
+}
+
+//}
+
+//==================== EWA ( reference / 2-tex / 4-tex) ====================
+ 
+/**
+*   EWA filter
+*   Adapted from an ANSI C implementation from Matt Pharr
+*/
+vec4 ewaFilter(sampler2D Source, vec2 p0, vec2 du, vec2 dv, int scale){
+
+    vec4 foo = texture(Source,p0);
+   
+    //don't bother with elliptical filtering if the scale is very small
+    if(scale<2)
+        return foo;
+ 
+    p0 -=vec2(0.5,0.5)/scale;
+    vec2 p = scale * p0;
+ 
+    float ux = FILTER_WIDTH * du.s * scale;
+    float vx = FILTER_WIDTH * du.t * scale;
+    float uy = FILTER_WIDTH * dv.s * scale;
+    float vy = FILTER_WIDTH * dv.t * scale;
+ 
+    // compute ellipse coefficients
+    // A*x*x + B*x*y + C*y*y = F.
+    float A = vx*vx+vy*vy+1;
+    float B = -2*(ux*vx+uy*vy);
+    float C = ux*ux+uy*uy+1;
+    float F = A*C-B*B/4.;
+ 
+    // Compute the ellipse's (u,v) bounding box in texture space
+    float bbox_du = 2. / (-B*B+4.0*C*A) * sqrt((-B*B+4.0*C*A)*C*F);
+    float bbox_dv = 2. / (-B*B+4.0*C*A) * sqrt(A*(-B*B+4.0*C*A)*F);
+ 
+    //the ellipse bbox             
+    int u0 = int(floor(p.s - bbox_du));
+    int u1 = int(ceil (p.s + bbox_du));
+    int v0 = int(floor(p.t - bbox_dv));
+    int v1 = int(ceil (p.t + bbox_dv));
+ 
+    // Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse
+    // and incrementally update the value of Ax^2+Bxy*Cy^2; when this
+    // value, q, is less than F, we're inside the ellipse so we filter
+    // away..
+    vec4 num= vec4(0., 0., 0., 1.);
+    float den = 0;
+    float ddq = 2 * A;
+    float U = u0 - p.s;
+   
+    for (int v = v0; v <= v1; ++v) {
+        float V = v - p.t;
+        float dq = A*(2*U+1) + B*V;
+        float q = (C*V + B*U)*V + A*U*U;
+
+        for (int u = u0; u <= u1; ++u) {
+            if (q < F)
+            {
+                float r2 = q / F;
+                float weight = FILTER_FUNC(r2);
+           
+                num += weight* texture(Source, vec2(u+0.5,v+0.5)/scale);
+                den += weight;
+            }
+            q += dq;
+            dq += ddq;
+        }
+
+    }
+ 
+ 
+    vec4 color = num*(1./den);
+    return color;
+}
+
+vec4 texture2DEWA(sampler2D tex, vec2 coords){
+ 
+    vec2 du = dFdx(coords);
+    vec2 dv = dFdy(coords);
+   
+    int scale = textureSize(tex, 0).x;
+ 
+    return ewaFilter(tex, coords, du, dv, scale );
+ 
+}
+
+vec2 radialDistortion(vec2 coord) {
+  vec2 cc = coord - vec2(0.5);
+  float dist = dot(cc, cc) * params.distortion;
+  return coord + cc * (1.0 - dist) * dist;
+}
+
+void main()
+{
+   FragColor = texture2DEWA(SourceImage,radialDistortion(vTexCoord));
+}
--- a/anti-aliasing/shaders/ewa_curvature.vert
+++ b/anti-aliasing/shaders/ewa_curvature.vert
@ -0,0 +1,38 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   float distortion;
+}params;
+
+#pragma parameterdistortion¡0.150.01.00.01
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord;
+}
+
--- a/anti-aliasing/shaders/fxaa.frag
+++ b/anti-aliasing/shaders/fxaa.frag
@ -0,0 +1,262 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+}params;
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+float FxaaLuma(vec3 rgb){
+    return rgb . y *(0.587 / 0.299)+ rgb . x;
+}
+
+vec3 FxaaLerp3(vec3 a, vec3 b, float amountOfA){
+    return(vec3(- amountOfA)* b)+((a * vec3(amountOfA))+ b);
+}
+
+vec4 FxaaTexOff(sampler2D tex, vec2 pos, ivec2 off, vec2 rcpFrame){
+    float x = pos . x + float(off . x)* rcpFrame . x;
+    float y = pos . y + float(off . y)* rcpFrame . y;
+    return texture(tex, vec2(x, y));
+}
+
+
+
+
+vec3 FxaaPixelShader(vec2 pos, sampler2D tex, vec2 rcpFrame)
+{
+    vec3 rgbN = FxaaTexOff(tex, pos . xy, ivec2(0, - 1), rcpFrame). xyz;
+    vec3 rgbW = FxaaTexOff(tex, pos . xy, ivec2(- 1, 0), rcpFrame). xyz;
+    vec3 rgbM = FxaaTexOff(tex, pos . xy, ivec2(0, 0), rcpFrame). xyz;
+    vec3 rgbE = FxaaTexOff(tex, pos . xy, ivec2(1, 0), rcpFrame). xyz;
+    vec3 rgbS = FxaaTexOff(tex, pos . xy, ivec2(0, 1), rcpFrame). xyz;
+
+    float lumaN = FxaaLuma(rgbN);
+    float lumaW = FxaaLuma(rgbW);
+    float lumaM = FxaaLuma(rgbM);
+    float lumaE = FxaaLuma(rgbE);
+    float lumaS = FxaaLuma(rgbS);
+    float rangeMin = min(lumaM, min(min(lumaN, lumaW), min(lumaS, lumaE)));
+    float rangeMax = max(lumaM, max(max(lumaN, lumaW), max(lumaS, lumaE)));
+
+    float range = rangeMax - rangeMin;
+    if(range < max((1.0 / 24.0), rangeMax *(1.0 / 8.0)))
+    {
+        return rgbM;
+    }
+
+    vec3 rgbL = rgbN + rgbW + rgbM + rgbE + rgbS;
+
+    float lumaL =(lumaN + lumaW + lumaE + lumaS)* 0.25;
+    float rangeL = abs(lumaL - lumaM);
+    float blendL = max(0.0,(rangeL / range)-(1.0 / 4.0))*(1.0 /(1.0 -(1.0 / 4.0)));
+    blendL = min((3.0 / 4.0), blendL);
+
+    vec3 rgbNW = FxaaTexOff(tex, pos . xy, ivec2(- 1, - 1), rcpFrame). xyz;
+    vec3 rgbNE = FxaaTexOff(tex, pos . xy, ivec2(1, - 1), rcpFrame). xyz;
+    vec3 rgbSW = FxaaTexOff(tex, pos . xy, ivec2(- 1, 1), rcpFrame). xyz;
+    vec3 rgbSE = FxaaTexOff(tex, pos . xy, ivec2(1, 1), rcpFrame). xyz;
+    rgbL +=(rgbNW + rgbNE + rgbSW + rgbSE);
+    rgbL *= vec3(1.0 / 9.0);
+
+    float lumaNW = FxaaLuma(rgbNW);
+    float lumaNE = FxaaLuma(rgbNE);
+    float lumaSW = FxaaLuma(rgbSW);
+    float lumaSE = FxaaLuma(rgbSE);
+
+    float edgeVert =
+        abs((0.25 * lumaNW)+(- 0.5 * lumaN)+(0.25 * lumaNE))+
+        abs((0.50 * lumaW)+(- 1.0 * lumaM)+(0.50 * lumaE))+
+        abs((0.25 * lumaSW)+(- 0.5 * lumaS)+(0.25 * lumaSE));
+    float edgeHorz =
+        abs((0.25 * lumaNW)+(- 0.5 * lumaW)+(0.25 * lumaSW))+
+        abs((0.50 * lumaN)+(- 1.0 * lumaM)+(0.50 * lumaS))+
+        abs((0.25 * lumaNE)+(- 0.5 * lumaE)+(0.25 * lumaSE));
+
+    bool horzSpan = edgeHorz >= edgeVert;
+    float lengthSign = horzSpan ? - rcpFrame . y : - rcpFrame . x;
+
+    if(! horzSpan)
+    {
+        lumaN = lumaW;
+        lumaS = lumaE;
+    }
+
+    float gradientN = abs(lumaN - lumaM);
+    float gradientS = abs(lumaS - lumaM);
+    lumaN =(lumaN + lumaM)* 0.5;
+    lumaS =(lumaS + lumaM)* 0.5;
+
+    if(gradientN < gradientS)
+    {
+        lumaN = lumaS;
+        lumaN = lumaS;
+        gradientN = gradientS;
+        lengthSign *= - 1.0;
+    }
+
+    vec2 posN;
+    posN . x = pos . x +(horzSpan ? 0.0 : lengthSign * 0.5);
+    posN . y = pos . y +(horzSpan ? lengthSign * 0.5 : 0.0);
+
+    gradientN *=(1.0 / 4.0);
+
+    vec2 posP = posN;
+    vec2 offNP = horzSpan ? vec2(rcpFrame . x, 0.0): vec2(0.0, rcpFrame . y);
+    float lumaEndN = lumaN;
+    float lumaEndP = lumaN;
+    bool doneN = false;
+    bool doneP = false;
+    posN += offNP * vec2(- 1.0, - 1.0);
+    posP += offNP * vec2(1.0, 1.0);
+
+    for(int i = 0;i < 32;i ++){
+        if(! doneN)
+        {
+            lumaEndN = FxaaLuma(texture(tex, posN . xy). xyz);
+        }
+        if(! doneP)
+        {
+            lumaEndP = FxaaLuma(texture(tex, posP . xy). xyz);
+        }
+
+        doneN = doneN ||(abs(lumaEndN - lumaN)>= gradientN);
+        doneP = doneP ||(abs(lumaEndP - lumaN)>= gradientN);
+
+        if(doneN && doneP)
+        {
+            break;
+        }
+        if(! doneN)
+        {
+            posN -= offNP;
+        }
+        if(! doneP)
+        {
+            posP += offNP;
+        }
+    }
+
+    float dstN = horzSpan ? pos . x - posN . x : pos . y - posN . y;
+    float dstP = horzSpan ? posP . x - pos . x : posP . y - pos . y;
+    bool directionN = dstN < dstP;
+    lumaEndN = directionN ? lumaEndN : lumaEndP;
+
+    if(((lumaM - lumaN)< 0.0)==((lumaEndN - lumaN)< 0.0))
+    {
+        lengthSign = 0.0;
+    }
+
+
+    float spanLength =(dstP + dstN);
+    dstN = directionN ? dstN : dstP;
+    float subPixelOffset =(0.5 +(dstN *(- 1.0 / spanLength)))* lengthSign;
+    vec3 rgbF = texture(tex, vec2(
+        pos . x +(horzSpan ? 0.0 : subPixelOffset),
+        pos . y +(horzSpan ? subPixelOffset : 0.0))). xyz;
+    return FxaaLerp3(rgbL, rgbF, blendL);
+}
+
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+uniform sampler2D Source;
+
+void main()
+{
+   FragColor = vec4(FxaaPixelShader(vTexCoord, Source, vec2(params . SourceSize . z, params . SourceSize . w)), 1.0)* 1.0;
+}
--- a/anti-aliasing/shaders/fxaa.gsh
+++ b/anti-aliasing/shaders/fxaa.gsh
--- a/anti-aliasing/shaders/fxaa.ppslang
+++ b/anti-aliasing/shaders/fxaa.ppslang
@ -0,0 +1,271 @@
+#version 450
+
+layout(push_constant)uniform Push
+{
+ vec4 SourceSize;
+ vec4 OriginalSize;
+ vec4 OutputSize;
+ uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0)uniform UBO
+{
+ mat4 MVP;
+} global;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+float FxaaLuma(vec3 rgb){
+    return rgb . y *(0.587 / 0.299)+ rgb . x;
+}
+
+vec3 FxaaLerp3(vec3 a, vec3 b, float amountOfA){
+    return(vec3(- amountOfA)* b)+((a * vec3(amountOfA))+ b);
+}
+
+vec4 FxaaTexOff(sampler2D tex, vec2 pos, ivec2 off, vec2 rcpFrame){
+    float x = pos . x + float(off . x)* rcpFrame . x;
+    float y = pos . y + float(off . y)* rcpFrame . y;
+    return texture(tex, vec2(x, y));
+}
+
+
+
+
+vec3 FxaaPixelShader(vec2 pos, sampler2D tex, vec2 rcpFrame)
+{
+    vec3 rgbN = FxaaTexOff(tex, pos . xy, ivec2(0, - 1), rcpFrame). xyz;
+    vec3 rgbW = FxaaTexOff(tex, pos . xy, ivec2(- 1, 0), rcpFrame). xyz;
+    vec3 rgbM = FxaaTexOff(tex, pos . xy, ivec2(0, 0), rcpFrame). xyz;
+    vec3 rgbE = FxaaTexOff(tex, pos . xy, ivec2(1, 0), rcpFrame). xyz;
+    vec3 rgbS = FxaaTexOff(tex, pos . xy, ivec2(0, 1), rcpFrame). xyz;
+
+    float lumaN = FxaaLuma(rgbN);
+    float lumaW = FxaaLuma(rgbW);
+    float lumaM = FxaaLuma(rgbM);
+    float lumaE = FxaaLuma(rgbE);
+    float lumaS = FxaaLuma(rgbS);
+    float rangeMin = min(lumaM, min(min(lumaN, lumaW), min(lumaS, lumaE)));
+    float rangeMax = max(lumaM, max(max(lumaN, lumaW), max(lumaS, lumaE)));
+
+    float range = rangeMax - rangeMin;
+    if(range < max((1.0 / 24.0), rangeMax *(1.0 / 8.0)))
+    {
+        return rgbM;
+    }
+
+    vec3 rgbL = rgbN + rgbW + rgbM + rgbE + rgbS;
+
+    float lumaL =(lumaN + lumaW + lumaE + lumaS)* 0.25;
+    float rangeL = abs(lumaL - lumaM);
+    float blendL = max(0.0,(rangeL / range)-(1.0 / 4.0))*(1.0 /(1.0 -(1.0 / 4.0)));
+    blendL = min((3.0 / 4.0), blendL);
+
+    vec3 rgbNW = FxaaTexOff(tex, pos . xy, ivec2(- 1, - 1), rcpFrame). xyz;
+    vec3 rgbNE = FxaaTexOff(tex, pos . xy, ivec2(1, - 1), rcpFrame). xyz;
+    vec3 rgbSW = FxaaTexOff(tex, pos . xy, ivec2(- 1, 1), rcpFrame). xyz;
+    vec3 rgbSE = FxaaTexOff(tex, pos . xy, ivec2(1, 1), rcpFrame). xyz;
+    rgbL +=(rgbNW + rgbNE + rgbSW + rgbSE);
+    rgbL *= vec3(1.0 / 9.0);
+
+    float lumaNW = FxaaLuma(rgbNW);
+    float lumaNE = FxaaLuma(rgbNE);
+    float lumaSW = FxaaLuma(rgbSW);
+    float lumaSE = FxaaLuma(rgbSE);
+
+    float edgeVert =
+        abs((0.25 * lumaNW)+(- 0.5 * lumaN)+(0.25 * lumaNE))+
+        abs((0.50 * lumaW)+(- 1.0 * lumaM)+(0.50 * lumaE))+
+        abs((0.25 * lumaSW)+(- 0.5 * lumaS)+(0.25 * lumaSE));
+    float edgeHorz =
+        abs((0.25 * lumaNW)+(- 0.5 * lumaW)+(0.25 * lumaSW))+
+        abs((0.50 * lumaN)+(- 1.0 * lumaM)+(0.50 * lumaS))+
+        abs((0.25 * lumaNE)+(- 0.5 * lumaE)+(0.25 * lumaSE));
+
+    bool horzSpan = edgeHorz >= edgeVert;
+    float lengthSign = horzSpan ? - rcpFrame . y : - rcpFrame . x;
+
+    if(! horzSpan)
+    {
+        lumaN = lumaW;
+        lumaS = lumaE;
+    }
+
+    float gradientN = abs(lumaN - lumaM);
+    float gradientS = abs(lumaS - lumaM);
+    lumaN =(lumaN + lumaM)* 0.5;
+    lumaS =(lumaS + lumaM)* 0.5;
+
+    if(gradientN < gradientS)
+    {
+        lumaN = lumaS;
+        lumaN = lumaS;
+        gradientN = gradientS;
+        lengthSign *= - 1.0;
+    }
+
+    vec2 posN;
+    posN . x = pos . x +(horzSpan ? 0.0 : lengthSign * 0.5);
+    posN . y = pos . y +(horzSpan ? lengthSign * 0.5 : 0.0);
+
+    gradientN *=(1.0 / 4.0);
+
+    vec2 posP = posN;
+    vec2 offNP = horzSpan ? vec2(rcpFrame . x, 0.0): vec2(0.0, rcpFrame . y);
+    float lumaEndN = lumaN;
+    float lumaEndP = lumaN;
+    bool doneN = false;
+    bool doneP = false;
+    posN += offNP * vec2(- 1.0, - 1.0);
+    posP += offNP * vec2(1.0, 1.0);
+
+    for(int i = 0;i < 32;i ++){
+        if(! doneN)
+        {
+            lumaEndN = FxaaLuma(texture(tex, posN . xy). xyz);
+        }
+        if(! doneP)
+        {
+            lumaEndP = FxaaLuma(texture(tex, posP . xy). xyz);
+        }
+
+        doneN = doneN ||(abs(lumaEndN - lumaN)>= gradientN);
+        doneP = doneP ||(abs(lumaEndP - lumaN)>= gradientN);
+
+        if(doneN && doneP)
+        {
+            break;
+        }
+        if(! doneN)
+        {
+            posN -= offNP;
+        }
+        if(! doneP)
+        {
+            posP += offNP;
+        }
+    }
+
+    float dstN = horzSpan ? pos . x - posN . x : pos . y - posN . y;
+    float dstP = horzSpan ? posP . x - pos . x : posP . y - pos . y;
+    bool directionN = dstN < dstP;
+    lumaEndN = directionN ? lumaEndN : lumaEndP;
+
+    if(((lumaM - lumaN)< 0.0)==((lumaEndN - lumaN)< 0.0))
+    {
+        lengthSign = 0.0;
+    }
+
+
+    float spanLength =(dstP + dstN);
+    dstN = directionN ? dstN : dstP;
+    float subPixelOffset =(0.5 +(dstN *(- 1.0 / spanLength)))* lengthSign;
+    vec3 rgbF = texture(tex, vec2(
+        pos . x +(horzSpan ? 0.0 : subPixelOffset),
+        pos . y +(horzSpan ? subPixelOffset : 0.0))). xyz;
+    return FxaaLerp3(rgbL, rgbF, blendL);
+}
+
+#pragma stagevertex
+layout(location = 0)in vec4 Position;
+layout(location = 1)in vec2 TexCoord;
+layout(location = 0)out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stagefragment
+layout(location = 0)in vec2 vTexCoord;
+layout(location = 0)out vec4 FragColor;
+layout(set = 0, binding = 2)uniform sampler2D Source;
+
+void main()
+{
+   FragColor = vec4(FxaaPixelShader(vTexCoord, Source, vec2(params . SourceSize . z, params . SourceSize . w)), 1.0)* 1.0;
+}
--- a/anti-aliasing/shaders/fxaa.slang
+++ b/anti-aliasing/shaders/fxaa.slang
@ -0,0 +1,271 @@
+#version 450
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+/**
+ * @license
+ * Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
+ *
+ * TO  THE MAXIMUM  EXTENT PERMITTED  BY APPLICABLE  LAW, THIS SOFTWARE  IS PROVIDED
+ * *AS IS*  AND NVIDIA AND  ITS SUPPLIERS DISCLAIM  ALL WARRANTIES,  EITHER  EXPRESS
+ * OR IMPLIED, INCLUDING, BUT NOT LIMITED  TO, NONINFRINGEMENT,IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT SHALL  NVIDIA 
+ * OR ITS SUPPLIERS BE  LIABLE  FOR  ANY  DIRECT, SPECIAL,  INCIDENTAL,  INDIRECT,  OR  
+ * CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION,  DAMAGES FOR LOSS 
+ * OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR ANY 
+ * OTHER PECUNIARY LOSS) ARISING OUT OF THE  USE OF OR INABILITY  TO USE THIS SOFTWARE, 
+ * EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+ */
+
+/*
+FXAA_PRESET - Choose compile-in knob preset 0-5.
+------------------------------------------------------------------------------
+FXAA_EDGE_THRESHOLD - The minimum amount of local contrast required 
+                      to apply algorithm.
+                      1.0/3.0  - too little
+                      1.0/4.0  - good start
+                      1.0/8.0  - applies to more edges
+                      1.0/16.0 - overkill
+------------------------------------------------------------------------------
+FXAA_EDGE_THRESHOLD_MIN - Trims the algorithm from processing darks.
+                          Perf optimization.
+                          1.0/32.0 - visible limit (smaller isn't visible)
+                          1.0/16.0 - good compromise
+                          1.0/12.0 - upper limit (seeing artifacts)
+------------------------------------------------------------------------------
+FXAA_SEARCH_STEPS - Maximum number of search steps for end of span.
+------------------------------------------------------------------------------
+FXAA_SEARCH_THRESHOLD - Controls when to stop searching.
+                        1.0/4.0 - seems to be the best quality wise
+------------------------------------------------------------------------------
+FXAA_SUBPIX_TRIM - Controls sub-pixel aliasing removal.
+                   1.0/2.0 - low removal
+                   1.0/3.0 - medium removal
+                   1.0/4.0 - default removal
+                   1.0/8.0 - high removal
+                   0.0 - complete removal
+------------------------------------------------------------------------------
+FXAA_SUBPIX_CAP - Insures fine detail is not completely removed.
+                  This is important for the transition of sub-pixel detail,
+                  like fences and wires.
+                  3.0/4.0 - default (medium amount of filtering)
+                  7.0/8.0 - high amount of filtering
+                  1.0 - no capping of sub-pixel aliasing removal
+*/
+
+#ifndef FXAA_PRESET
+    #define FXAA_PRESET 5
+#endif
+#if (FXAA_PRESET == 3)
+    #define FXAA_EDGE_THRESHOLD      (1.0/8.0)
+    #define FXAA_EDGE_THRESHOLD_MIN  (1.0/16.0)
+    #define FXAA_SEARCH_STEPS        16
+    #define FXAA_SEARCH_THRESHOLD    (1.0/4.0)
+    #define FXAA_SUBPIX_CAP          (3.0/4.0)
+    #define FXAA_SUBPIX_TRIM         (1.0/4.0)
+#endif
+#if (FXAA_PRESET == 4)
+    #define FXAA_EDGE_THRESHOLD      (1.0/8.0)
+    #define FXAA_EDGE_THRESHOLD_MIN  (1.0/24.0)
+    #define FXAA_SEARCH_STEPS        24
+    #define FXAA_SEARCH_THRESHOLD    (1.0/4.0)
+    #define FXAA_SUBPIX_CAP          (3.0/4.0)
+    #define FXAA_SUBPIX_TRIM         (1.0/4.0)
+#endif
+#if (FXAA_PRESET == 5)
+    #define FXAA_EDGE_THRESHOLD      (1.0/8.0)
+    #define FXAA_EDGE_THRESHOLD_MIN  (1.0/24.0)
+    #define FXAA_SEARCH_STEPS        32
+    #define FXAA_SEARCH_THRESHOLD    (1.0/4.0)
+    #define FXAA_SUBPIX_CAP          (3.0/4.0)
+    #define FXAA_SUBPIX_TRIM         (1.0/4.0)
+#endif
+
+#define FXAA_SUBPIX_TRIM_SCALE (1.0/(1.0 - FXAA_SUBPIX_TRIM))
+
+// Return the luma, the estimation of luminance from rgb inputs.
+// This approximates luma using one FMA instruction,
+// skipping normalization and tossing out blue.
+// FxaaLuma() will range 0.0 to 2.963210702.
+float FxaaLuma(vec3 rgb) {
+    return rgb.y * (0.587/0.299) + rgb.x;
+}
+
+vec3 FxaaLerp3(vec3 a, vec3 b, float amountOfA) {
+    return (vec3(-amountOfA) * b) + ((a * vec3(amountOfA)) + b);
+}
+
+vec4 FxaaTexOff(sampler2D tex, vec2 pos, ivec2 off, vec2 rcpFrame) {
+    float x = pos.x + float(off.x) * rcpFrame.x;
+    float y = pos.y + float(off.y) * rcpFrame.y;
+    return texture(tex, vec2(x, y));
+}
+
+// pos is the output of FxaaVertexShader interpolated across screen.
+// xy -> actual texture position {0.0 to 1.0}
+// rcpFrame should be a uniform equal to  {1.0/frameWidth, 1.0/frameHeight}
+vec3 FxaaPixelShader(vec2 pos, sampler2D tex, vec2 rcpFrame)
+{
+    vec3 rgbN = FxaaTexOff(tex, pos.xy, ivec2( 0,-1), rcpFrame).xyz;
+    vec3 rgbW = FxaaTexOff(tex, pos.xy, ivec2(-1, 0), rcpFrame).xyz;
+    vec3 rgbM = FxaaTexOff(tex, pos.xy, ivec2( 0, 0), rcpFrame).xyz;
+    vec3 rgbE = FxaaTexOff(tex, pos.xy, ivec2( 1, 0), rcpFrame).xyz;
+    vec3 rgbS = FxaaTexOff(tex, pos.xy, ivec2( 0, 1), rcpFrame).xyz;
+    
+    float lumaN = FxaaLuma(rgbN);
+    float lumaW = FxaaLuma(rgbW);
+    float lumaM = FxaaLuma(rgbM);
+    float lumaE = FxaaLuma(rgbE);
+    float lumaS = FxaaLuma(rgbS);
+    float rangeMin = min(lumaM, min(min(lumaN, lumaW), min(lumaS, lumaE)));
+    float rangeMax = max(lumaM, max(max(lumaN, lumaW), max(lumaS, lumaE)));
+    
+    float range = rangeMax - rangeMin;
+    if(range < max(FXAA_EDGE_THRESHOLD_MIN, rangeMax * FXAA_EDGE_THRESHOLD))
+    {
+        return rgbM;
+    }
+    
+    vec3 rgbL = rgbN + rgbW + rgbM + rgbE + rgbS;
+    
+    float lumaL = (lumaN + lumaW + lumaE + lumaS) * 0.25;
+    float rangeL = abs(lumaL - lumaM);
+    float blendL = max(0.0, (rangeL / range) - FXAA_SUBPIX_TRIM) * FXAA_SUBPIX_TRIM_SCALE; 
+    blendL = min(FXAA_SUBPIX_CAP, blendL);
+    
+    vec3 rgbNW = FxaaTexOff(tex, pos.xy, ivec2(-1,-1), rcpFrame).xyz;
+    vec3 rgbNE = FxaaTexOff(tex, pos.xy, ivec2( 1,-1), rcpFrame).xyz;
+    vec3 rgbSW = FxaaTexOff(tex, pos.xy, ivec2(-1, 1), rcpFrame).xyz;
+    vec3 rgbSE = FxaaTexOff(tex, pos.xy, ivec2( 1, 1), rcpFrame).xyz;
+    rgbL += (rgbNW + rgbNE + rgbSW + rgbSE);
+    rgbL *= vec3(1.0/9.0);
+    
+    float lumaNW = FxaaLuma(rgbNW);
+    float lumaNE = FxaaLuma(rgbNE);
+    float lumaSW = FxaaLuma(rgbSW);
+    float lumaSE = FxaaLuma(rgbSE);
+    
+    float edgeVert = 
+        abs((0.25 * lumaNW) + (-0.5 * lumaN) + (0.25 * lumaNE)) +
+        abs((0.50 * lumaW ) + (-1.0 * lumaM) + (0.50 * lumaE )) +
+        abs((0.25 * lumaSW) + (-0.5 * lumaS) + (0.25 * lumaSE));
+    float edgeHorz = 
+        abs((0.25 * lumaNW) + (-0.5 * lumaW) + (0.25 * lumaSW)) +
+        abs((0.50 * lumaN ) + (-1.0 * lumaM) + (0.50 * lumaS )) +
+        abs((0.25 * lumaNE) + (-0.5 * lumaE) + (0.25 * lumaSE));
+        
+    bool horzSpan = edgeHorz >= edgeVert;
+    float lengthSign = horzSpan ? -rcpFrame.y : -rcpFrame.x;
+    
+    if(!horzSpan)
+    {
+        lumaN = lumaW;
+        lumaS = lumaE;
+    }
+    
+    float gradientN = abs(lumaN - lumaM);
+    float gradientS = abs(lumaS - lumaM);
+    lumaN = (lumaN + lumaM) * 0.5;
+    lumaS = (lumaS + lumaM) * 0.5;
+    
+    if (gradientN < gradientS)
+    {
+        lumaN = lumaS;
+        lumaN = lumaS;
+        gradientN = gradientS;
+        lengthSign *= -1.0;
+    }
+    
+    vec2 posN;
+    posN.x = pos.x + (horzSpan ? 0.0 : lengthSign * 0.5);
+    posN.y = pos.y + (horzSpan ? lengthSign * 0.5 : 0.0);
+    
+    gradientN *= FXAA_SEARCH_THRESHOLD;
+    
+    vec2 posP = posN;
+    vec2 offNP = horzSpan ? vec2(rcpFrame.x, 0.0) : vec2(0.0, rcpFrame.y); 
+    float lumaEndN = lumaN;
+    float lumaEndP = lumaN;
+    bool doneN = false;
+    bool doneP = false;
+    posN += offNP * vec2(-1.0, -1.0);
+    posP += offNP * vec2( 1.0,  1.0);
+    
+    for(int i = 0; i < FXAA_SEARCH_STEPS; i++) {
+        if(!doneN)
+        {
+            lumaEndN = FxaaLuma(texture(tex, posN.xy).xyz);
+        }
+        if(!doneP)
+        {
+            lumaEndP = FxaaLuma(texture(tex, posP.xy).xyz);
+        }
+        
+        doneN = doneN || (abs(lumaEndN - lumaN) >= gradientN);
+        doneP = doneP || (abs(lumaEndP - lumaN) >= gradientN);
+        
+        if(doneN && doneP)
+        {
+            break;
+        }
+        if(!doneN)
+        {
+            posN -= offNP;
+        }
+        if(!doneP)
+        {
+            posP += offNP;
+        }
+    }
+    
+    float dstN = horzSpan ? pos.x - posN.x : pos.y - posN.y;
+    float dstP = horzSpan ? posP.x - pos.x : posP.y - pos.y;
+    bool directionN = dstN < dstP;
+    lumaEndN = directionN ? lumaEndN : lumaEndP;
+    
+    if(((lumaM - lumaN) < 0.0) == ((lumaEndN - lumaN) < 0.0))
+    {
+        lengthSign = 0.0;
+    }
+ 
+
+    float spanLength = (dstP + dstN);
+    dstN = directionN ? dstN : dstP;
+    float subPixelOffset = (0.5 + (dstN * (-1.0/spanLength))) * lengthSign;
+    vec3 rgbF = texture(tex, vec2(
+        pos.x + (horzSpan ? 0.0 : subPixelOffset),
+        pos.y + (horzSpan ? subPixelOffset : 0.0))).xyz;
+    return FxaaLerp3(rgbL, rgbF, blendL); 
+}
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global.MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stage fragment
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+
+void main()
+{
+   FragColor = vec4(FxaaPixelShader(vTexCoord, Source, vec2(params.SourceSize.z, params.SourceSize.w)), 1.0) * 1.0;
+}
--- a/anti-aliasing/shaders/fxaa.vert
+++ b/anti-aliasing/shaders/fxaa.vert
@ -0,0 +1,264 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+}params;
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+float FxaaLuma(vec3 rgb){
+    return rgb . y *(0.587 / 0.299)+ rgb . x;
+}
+
+vec3 FxaaLerp3(vec3 a, vec3 b, float amountOfA){
+    return(vec3(- amountOfA)* b)+((a * vec3(amountOfA))+ b);
+}
+
+vec4 FxaaTexOff(sampler2D tex, vec2 pos, ivec2 off, vec2 rcpFrame){
+    float x = pos . x + float(off . x)* rcpFrame . x;
+    float y = pos . y + float(off . y)* rcpFrame . y;
+    return texture(tex, vec2(x, y));
+}
+
+
+
+
+vec3 FxaaPixelShader(vec2 pos, sampler2D tex, vec2 rcpFrame)
+{
+    vec3 rgbN = FxaaTexOff(tex, pos . xy, ivec2(0, - 1), rcpFrame). xyz;
+    vec3 rgbW = FxaaTexOff(tex, pos . xy, ivec2(- 1, 0), rcpFrame). xyz;
+    vec3 rgbM = FxaaTexOff(tex, pos . xy, ivec2(0, 0), rcpFrame). xyz;
+    vec3 rgbE = FxaaTexOff(tex, pos . xy, ivec2(1, 0), rcpFrame). xyz;
+    vec3 rgbS = FxaaTexOff(tex, pos . xy, ivec2(0, 1), rcpFrame). xyz;
+
+    float lumaN = FxaaLuma(rgbN);
+    float lumaW = FxaaLuma(rgbW);
+    float lumaM = FxaaLuma(rgbM);
+    float lumaE = FxaaLuma(rgbE);
+    float lumaS = FxaaLuma(rgbS);
+    float rangeMin = min(lumaM, min(min(lumaN, lumaW), min(lumaS, lumaE)));
+    float rangeMax = max(lumaM, max(max(lumaN, lumaW), max(lumaS, lumaE)));
+
+    float range = rangeMax - rangeMin;
+    if(range < max((1.0 / 24.0), rangeMax *(1.0 / 8.0)))
+    {
+        return rgbM;
+    }
+
+    vec3 rgbL = rgbN + rgbW + rgbM + rgbE + rgbS;
+
+    float lumaL =(lumaN + lumaW + lumaE + lumaS)* 0.25;
+    float rangeL = abs(lumaL - lumaM);
+    float blendL = max(0.0,(rangeL / range)-(1.0 / 4.0))*(1.0 /(1.0 -(1.0 / 4.0)));
+    blendL = min((3.0 / 4.0), blendL);
+
+    vec3 rgbNW = FxaaTexOff(tex, pos . xy, ivec2(- 1, - 1), rcpFrame). xyz;
+    vec3 rgbNE = FxaaTexOff(tex, pos . xy, ivec2(1, - 1), rcpFrame). xyz;
+    vec3 rgbSW = FxaaTexOff(tex, pos . xy, ivec2(- 1, 1), rcpFrame). xyz;
+    vec3 rgbSE = FxaaTexOff(tex, pos . xy, ivec2(1, 1), rcpFrame). xyz;
+    rgbL +=(rgbNW + rgbNE + rgbSW + rgbSE);
+    rgbL *= vec3(1.0 / 9.0);
+
+    float lumaNW = FxaaLuma(rgbNW);
+    float lumaNE = FxaaLuma(rgbNE);
+    float lumaSW = FxaaLuma(rgbSW);
+    float lumaSE = FxaaLuma(rgbSE);
+
+    float edgeVert =
+        abs((0.25 * lumaNW)+(- 0.5 * lumaN)+(0.25 * lumaNE))+
+        abs((0.50 * lumaW)+(- 1.0 * lumaM)+(0.50 * lumaE))+
+        abs((0.25 * lumaSW)+(- 0.5 * lumaS)+(0.25 * lumaSE));
+    float edgeHorz =
+        abs((0.25 * lumaNW)+(- 0.5 * lumaW)+(0.25 * lumaSW))+
+        abs((0.50 * lumaN)+(- 1.0 * lumaM)+(0.50 * lumaS))+
+        abs((0.25 * lumaNE)+(- 0.5 * lumaE)+(0.25 * lumaSE));
+
+    bool horzSpan = edgeHorz >= edgeVert;
+    float lengthSign = horzSpan ? - rcpFrame . y : - rcpFrame . x;
+
+    if(! horzSpan)
+    {
+        lumaN = lumaW;
+        lumaS = lumaE;
+    }
+
+    float gradientN = abs(lumaN - lumaM);
+    float gradientS = abs(lumaS - lumaM);
+    lumaN =(lumaN + lumaM)* 0.5;
+    lumaS =(lumaS + lumaM)* 0.5;
+
+    if(gradientN < gradientS)
+    {
+        lumaN = lumaS;
+        lumaN = lumaS;
+        gradientN = gradientS;
+        lengthSign *= - 1.0;
+    }
+
+    vec2 posN;
+    posN . x = pos . x +(horzSpan ? 0.0 : lengthSign * 0.5);
+    posN . y = pos . y +(horzSpan ? lengthSign * 0.5 : 0.0);
+
+    gradientN *=(1.0 / 4.0);
+
+    vec2 posP = posN;
+    vec2 offNP = horzSpan ? vec2(rcpFrame . x, 0.0): vec2(0.0, rcpFrame . y);
+    float lumaEndN = lumaN;
+    float lumaEndP = lumaN;
+    bool doneN = false;
+    bool doneP = false;
+    posN += offNP * vec2(- 1.0, - 1.0);
+    posP += offNP * vec2(1.0, 1.0);
+
+    for(int i = 0;i < 32;i ++){
+        if(! doneN)
+        {
+            lumaEndN = FxaaLuma(texture(tex, posN . xy). xyz);
+        }
+        if(! doneP)
+        {
+            lumaEndP = FxaaLuma(texture(tex, posP . xy). xyz);
+        }
+
+        doneN = doneN ||(abs(lumaEndN - lumaN)>= gradientN);
+        doneP = doneP ||(abs(lumaEndP - lumaN)>= gradientN);
+
+        if(doneN && doneP)
+        {
+            break;
+        }
+        if(! doneN)
+        {
+            posN -= offNP;
+        }
+        if(! doneP)
+        {
+            posP += offNP;
+        }
+    }
+
+    float dstN = horzSpan ? pos . x - posN . x : pos . y - posN . y;
+    float dstP = horzSpan ? posP . x - pos . x : posP . y - pos . y;
+    bool directionN = dstN < dstP;
+    lumaEndN = directionN ? lumaEndN : lumaEndP;
+
+    if(((lumaM - lumaN)< 0.0)==((lumaEndN - lumaN)< 0.0))
+    {
+        lengthSign = 0.0;
+    }
+
+
+    float spanLength =(dstP + dstN);
+    dstN = directionN ? dstN : dstP;
+    float subPixelOffset =(0.5 +(dstN *(- 1.0 / spanLength)))* lengthSign;
+    vec3 rgbF = texture(tex, vec2(
+        pos . x +(horzSpan ? 0.0 : subPixelOffset),
+        pos . y +(horzSpan ? subPixelOffset : 0.0))). xyz;
+    return FxaaLerp3(rgbL, rgbF, blendL);
+}
+
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord;
+}
+
--- a/anti-aliasing/shaders/reverse-aa-post3x/reverse-aa-post3x-pass0.frag
+++ b/anti-aliasing/shaders/reverse-aa-post3x/reverse-aa-post3x-pass0.frag
@ -0,0 +1,167 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   float RAA_SHR0;
+   float RAA_SMT0;
+   float RAA_DVT0;
+}params;
+
+#pragma parameterRAA_SHR0¡2.00.0010.00.05
+#pragma parameterRAA_SMT0¡0.50.0510.00.05
+#pragma parameterRAA_DVT0¡1.00.0510.00.05
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+uniform sampler2D Source;
+
+const int scl = 3;
+const int rad = 7;
+
+
+vec3 res2x(vec3 pre2, vec3 pre1, vec3 px, vec3 pos1, vec3 pos2)
+{
+    float d1, d2, w;
+ vec3 a, m, t, t1, t2;
+    mat4x3 pre = mat4x3(pre2, pre1, px, pos1);
+    mat4x3 pos = mat4x3(pre1, px, pos1, pos2);
+    mat4x3 df = pos - pre;
+
+    m . x =(px . x < 0.5)? px . x :(1.0 - px . x);
+    m . y =(px . y < 0.5)? px . y :(1.0 - px . y);
+    m . z =(px . z < 0.5)? px . z :(1.0 - px . z);
+ m = params . RAA_SHR0 * min(m, min(abs(df[1]), abs(df[2])));
+ t =(7 *(df[1]+ df[2])- 3 *(df[0]+ df[3]))/ 16;
+
+ a . x = t . x == 0.0 ? 1.0 : m . x / abs(t . x);
+ a . y = t . y == 0.0 ? 1.0 : m . y / abs(t . y);
+ a . z = t . z == 0.0 ? 1.0 : m . z / abs(t . z);
+ t1 = clamp(t, - m, m);
+ t2 = min(1.0, min(min(a . x, a . y), a . z))* t;
+
+ d1 = length(df[1]);d2 = length(df[2]);
+ d1 = d1 == 0.0 ? 0.0 : length(cross(df[1], t1))/ d1;
+ d2 = d2 == 0.0 ? 0.0 : length(cross(df[2], t1))/ d2;
+
+ w = min(1.0, max(d1, d2)/ 0.8125);
+
+ return mix(t1, t2, pow(w, params . RAA_DVT0));
+}
+
+void main()
+{
+
+
+ vec3 tx[2 * rad + 1];
+
+
+
+     tx[(0)+ rad]= texture(Source, vTexCoord). rgb;
+
+ for(int i = 1;i <= rad;i ++){
+       tx[(- i)+ rad]= texture(Source, vTexCoord + vec2(- i, 0)* params . OutputSize . zw). rgb;
+       tx[(i)+ rad]= texture(Source, vTexCoord + vec2(i, 0)* params . OutputSize . zw). rgb;
+ }
+
+
+
+
+ ivec2 i1 = ivec2(0), i2 = ivec2(0);
+ vec3 df1, df2;
+ vec2 d1, d2, d3;
+ bvec2 cn;
+
+ df1 = tx[(1)+ rad]- tx[(0)+ rad];df2 = tx[(0)+ rad]- tx[(- 1)+ rad];
+
+ d2 = vec2(length(df1), length(df2));
+ d3 = d2 . yx;
+
+
+
+ float sw = d2 . x + d2 . y;
+ sw = sw == 0.0 ? 1.0 : pow(length(df1 - df2)/ sw, params . RAA_SMT0);
+
+
+
+ for(int i = 1;i < rad;i ++){
+  d1 = d2;
+  d2 = d3;
+  d3 = vec2(distance(tx[(- i - 1)+ rad], tx[(- i)+ rad]), distance(tx[(i)+ rad], tx[(i + 1)+ rad]));
+  cn . x = max(d1 . x, d3 . x)< d2 . x;
+  cn . y = max(d1 . y, d3 . y)< d2 . y;
+  i2 . x = cn . x && i2 . x == 0 && i1 . x != 0 ? i : i2 . x;
+  i2 . y = cn . y && i2 . y == 0 && i1 . y != 0 ? i : i2 . y;
+  i1 . x = cn . x && i1 . x == 0 ? i : i1 . x;
+  i1 . y = cn . y && i1 . y == 0 ? i : i1 . y;
+ }
+
+ i2 . x = i2 . x == 0 ? i1 . x + 1 : i2 . x;
+ i2 . y = i2 . y == 0 ? i1 . y + 1 : i2 . y;
+
+
+
+ vec3 t = res2x(tx[(- i2 . x)+ rad], tx[(- i1 . x)+ rad], tx[(0)+ rad], tx[(i1 . y)+ rad], tx[(i2 . y)+ rad]);
+
+
+ float dw =(i1 . x == 0 || i1 . y == 0)? 0.0 : 2.0 *((i1 . x - 1.0)/(i1 . x + i1 . y - 2.0))- 1.0;
+
+
+ vec3 res = tx[(0)+ rad]+(scl - 1.0)/ scl * sw * dw * t;
+
+
+
+ vec3 lo = min(min(tx[(- 1)+ rad], tx[(0)+ rad]), tx[(1)+ rad]);
+    vec3 hi = max(max(tx[(- 1)+ rad], tx[(0)+ rad]), tx[(1)+ rad]);
+
+    FragColor = vec4(clamp(res, lo, hi), 1.0);
+}
--- a/anti-aliasing/shaders/reverse-aa-post3x/reverse-aa-post3x-pass0.gsh
+++ b/anti-aliasing/shaders/reverse-aa-post3x/reverse-aa-post3x-pass0.gsh
--- a/anti-aliasing/shaders/reverse-aa-post3x/reverse-aa-post3x-pass0.ppslang
+++ b/anti-aliasing/shaders/reverse-aa-post3x/reverse-aa-post3x-pass0.ppslang
@ -0,0 +1,176 @@
+#version 450
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+layout(push_constant)uniform Push
+{
+ vec4 SourceSize;
+ vec4 OriginalSize;
+ vec4 OutputSize;
+ uint FrameCount;
+ float RAA_SHR0;
+ float RAA_SMT0;
+ float RAA_DVT0;
+} params;
+
+#pragma parameterRAA_SHR0¡2.00.0010.00.05
+#pragma parameterRAA_SMT0¡0.50.0510.00.05
+#pragma parameterRAA_DVT0¡1.00.0510.00.05
+
+layout(std140, set = 0, binding = 0)uniform UBO
+{
+ mat4 MVP;
+} global;
+
+#pragma stagevertex
+layout(location = 0)in vec4 Position;
+layout(location = 1)in vec2 TexCoord;
+layout(location = 0)out vec2 vTexCoord;
+
+void main()
+{
+ gl_Position = global . MVP * Position;
+ vTexCoord = TexCoord;
+}
+
+#pragma stagefragment
+layout(location = 0)in vec2 vTexCoord;
+layout(location = 0)out vec4 FragColor;
+layout(set = 0, binding = 2)uniform sampler2D Source;
+
+const int scl = 3;
+const int rad = 7;
+
+
+vec3 res2x(vec3 pre2, vec3 pre1, vec3 px, vec3 pos1, vec3 pos2)
+{
+    float d1, d2, w;
+ vec3 a, m, t, t1, t2;
+    mat4x3 pre = mat4x3(pre2, pre1, px, pos1);
+    mat4x3 pos = mat4x3(pre1, px, pos1, pos2);
+    mat4x3 df = pos - pre;
+
+    m . x =(px . x < 0.5)? px . x :(1.0 - px . x);
+    m . y =(px . y < 0.5)? px . y :(1.0 - px . y);
+    m . z =(px . z < 0.5)? px . z :(1.0 - px . z);
+ m = params . RAA_SHR0 * min(m, min(abs(df[1]), abs(df[2])));
+ t =(7 *(df[1]+ df[2])- 3 *(df[0]+ df[3]))/ 16;
+
+ a . x = t . x == 0.0 ? 1.0 : m . x / abs(t . x);
+ a . y = t . y == 0.0 ? 1.0 : m . y / abs(t . y);
+ a . z = t . z == 0.0 ? 1.0 : m . z / abs(t . z);
+ t1 = clamp(t, - m, m);
+ t2 = min(1.0, min(min(a . x, a . y), a . z))* t;
+
+ d1 = length(df[1]);d2 = length(df[2]);
+ d1 = d1 == 0.0 ? 0.0 : length(cross(df[1], t1))/ d1;
+ d2 = d2 == 0.0 ? 0.0 : length(cross(df[2], t1))/ d2;
+
+ w = min(1.0, max(d1, d2)/ 0.8125);
+
+ return mix(t1, t2, pow(w, params . RAA_DVT0));
+}
+
+void main()
+{
+
+
+ vec3 tx[2 * rad + 1];
+
+
+
+     tx[(0)+ rad]= texture(Source, vTexCoord). rgb;
+
+ for(int i = 1;i <= rad;i ++){
+       tx[(- i)+ rad]= texture(Source, vTexCoord + vec2(- i, 0)* params . OutputSize . zw). rgb;
+       tx[(i)+ rad]= texture(Source, vTexCoord + vec2(i, 0)* params . OutputSize . zw). rgb;
+ }
+
+
+
+
+ ivec2 i1 = ivec2(0), i2 = ivec2(0);
+ vec3 df1, df2;
+ vec2 d1, d2, d3;
+ bvec2 cn;
+
+ df1 = tx[(1)+ rad]- tx[(0)+ rad];df2 = tx[(0)+ rad]- tx[(- 1)+ rad];
+
+ d2 = vec2(length(df1), length(df2));
+ d3 = d2 . yx;
+
+
+
+ float sw = d2 . x + d2 . y;
+ sw = sw == 0.0 ? 1.0 : pow(length(df1 - df2)/ sw, params . RAA_SMT0);
+
+
+
+ for(int i = 1;i < rad;i ++){
+  d1 = d2;
+  d2 = d3;
+  d3 = vec2(distance(tx[(- i - 1)+ rad], tx[(- i)+ rad]), distance(tx[(i)+ rad], tx[(i + 1)+ rad]));
+  cn . x = max(d1 . x, d3 . x)< d2 . x;
+  cn . y = max(d1 . y, d3 . y)< d2 . y;
+  i2 . x = cn . x && i2 . x == 0 && i1 . x != 0 ? i : i2 . x;
+  i2 . y = cn . y && i2 . y == 0 && i1 . y != 0 ? i : i2 . y;
+  i1 . x = cn . x && i1 . x == 0 ? i : i1 . x;
+  i1 . y = cn . y && i1 . y == 0 ? i : i1 . y;
+ }
+
+ i2 . x = i2 . x == 0 ? i1 . x + 1 : i2 . x;
+ i2 . y = i2 . y == 0 ? i1 . y + 1 : i2 . y;
+
+
+
+ vec3 t = res2x(tx[(- i2 . x)+ rad], tx[(- i1 . x)+ rad], tx[(0)+ rad], tx[(i1 . y)+ rad], tx[(i2 . y)+ rad]);
+
+
+ float dw =(i1 . x == 0 || i1 . y == 0)? 0.0 : 2.0 *((i1 . x - 1.0)/(i1 . x + i1 . y - 2.0))- 1.0;
+
+
+ vec3 res = tx[(0)+ rad]+(scl - 1.0)/ scl * sw * dw * t;
+
+
+
+ vec3 lo = min(min(tx[(- 1)+ rad], tx[(0)+ rad]), tx[(1)+ rad]);
+    vec3 hi = max(max(tx[(- 1)+ rad], tx[(0)+ rad]), tx[(1)+ rad]);
+
+    FragColor = vec4(clamp(res, lo, hi), 1.0);
+}
--- a/anti-aliasing/shaders/reverse-aa-post3x/reverse-aa-post3x-pass0.slang
+++ b/anti-aliasing/shaders/reverse-aa-post3x/reverse-aa-post3x-pass0.slang
@ -0,0 +1,176 @@
+#version 450
+
+/*
+	rAA post-3x - Pass 0
+	by Sp00kyFox, 2018-10-20
+
+Filter:	Nearest
+Scale:	1x
+
+This is a generalized continuation of the reverse antialiasing filter by
+Christoph Feck. Unlike the original filter this is supposed to be used on an
+already upscaled image. Which makes it possible to combine rAA with other filters
+just as ScaleFX, xBR or others.
+
+Pass 0 does the horizontal filtering.
+
+
+
+Copyright (c) 2018 Sp00kyFox - ScaleFX@web.de
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/ 
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+	float RAA_SHR0;
+	float RAA_SMT0;
+	float RAA_DVT0;
+} params;
+
+#pragma parameter RAA_SHR0 "rAA-3x 0 Sharpness"  2.0 0.00 10.0 0.05
+#pragma parameter RAA_SMT0 "rAA-3x 0 Smoothness" 0.5 0.05 10.0 0.05
+#pragma parameter RAA_DVT0 "rAA-3x 0 Deviation"  1.0 0.05 10.0 0.05
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+	gl_Position = global.MVP * Position;
+	vTexCoord = TexCoord;
+}
+
+#pragma stage fragment
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+
+const int scl = 3; // scale factor
+const int rad = 7; // search radius
+
+// core function of rAA - tilt of a pixel
+vec3 res2x(vec3 pre2, vec3 pre1, vec3 px, vec3 pos1, vec3 pos2)
+{
+    float d1, d2, w;
+	vec3 a, m, t, t1, t2;
+    mat4x3 pre = mat4x3(pre2, pre1,   px, pos1);
+    mat4x3 pos = mat4x3(pre1,   px, pos1, pos2);
+    mat4x3  df = pos - pre;
+
+    m.x = (px.x < 0.5) ? px.x : (1.0-px.x);
+    m.y = (px.y < 0.5) ? px.y : (1.0-px.y);
+    m.z = (px.z < 0.5) ? px.z : (1.0-px.z);
+	m = params.RAA_SHR0 * min(m, min(abs(df[1]), abs(df[2])));   // magnitude
+	t = (7 * (df[1] + df[2]) - 3 * (df[0] + df[3])) / 16; // tilt
+	
+	a.x = t.x == 0.0 ? 1.0 : m.x/abs(t.x);
+	a.y = t.y == 0.0 ? 1.0 : m.y/abs(t.y);
+	a.z = t.z == 0.0 ? 1.0 : m.z/abs(t.z);
+	t1 = clamp(t, -m, m);                       // limit channels
+	t2 = min(1.0, min(min(a.x, a.y), a.z)) * t; // limit length
+	
+	d1 = length(df[1]); d2 = length(df[2]);
+	d1 = d1 == 0.0 ? 0.0 : length(cross(df[1], t1))/d1; // distance between line (px, pre1) and point px-t1
+	d2 = d2 == 0.0 ? 0.0 : length(cross(df[2], t1))/d2; // distance between line (px, pos1) and point px+t1
+
+	w = min(1.0, max(d1,d2)/0.8125); // color deviation from optimal value
+	
+	return mix(t1, t2, pow(w, params.RAA_DVT0));
+}
+
+void main()
+{
+	// read texels
+
+	vec3 tx[2*rad+1];
+
+	#define TX(n) tx[(n)+rad]
+	
+	TX(0) = texture(Source, vTexCoord).rgb;
+	
+	for(int i=1; i<=rad; i++){
+		TX(-i) = texture(Source, vTexCoord + vec2(-i,0)*params.OutputSize.zw).rgb;
+		TX( i) = texture(Source, vTexCoord + vec2( i,0)*params.OutputSize.zw).rgb;
+	}
+	
+	
+	// prepare variables for candidate search
+	
+	ivec2 i1 = ivec2(0), i2 = ivec2(0);
+	vec3 df1, df2;
+	vec2 d1, d2, d3;
+	bvec2 cn;
+	
+	df1 = TX(1)-TX(0); df2 = TX(0)-TX(-1);
+	
+	d2 = vec2(length(df1), length(df2));
+	d3 = d2.yx;
+	
+	
+	// smoothness weight, protects smooth gradients
+	float sw = d2.x + d2.y;
+	sw = sw == 0.0 ? 1.0 : pow(length(df1-df2)/sw, params.RAA_SMT0);
+	
+	
+	// look for proper candidates
+	for(int i=1; i<rad; i++){
+		d1 = d2;
+		d2 = d3;
+		d3 = vec2(distance(TX(-i-1), TX(-i)), distance(TX(i), TX(i+1)));
+		cn.x = max(d1.x,d3.x)<d2.x;
+		cn.y = max(d1.y,d3.y)<d2.y;
+		i2.x = cn.x && i2.x==0 && i1.x!=0 ? i : i2.x;
+		i2.y = cn.y && i2.y==0 && i1.y!=0 ? i : i2.y;
+		i1.x = cn.x && i1.x==0 ? i : i1.x;
+		i1.y = cn.y && i1.y==0 ? i : i1.y;
+	}
+
+	i2.x = i2.x == 0 ? i1.x+1 : i2.x;
+	i2.y = i2.y == 0 ? i1.y+1 : i2.y;
+
+	
+	// rAA core with the candidates found above
+	vec3 t = res2x(TX(-i2.x), TX(-i1.x), TX(0), TX(i1.y), TX(i2.y));
+
+	// distance weight
+	float dw = (i1.x == 0 || i1.y == 0) ? 0.0 : 2.0 * ((i1.x-1.0)/(i1.x+i1.y-2.0)) - 1.0;	
+	
+	// result
+	vec3 res = TX(0) + (scl-1.0)/scl * sw*dw * t;
+	
+	
+	// prevent ringing	
+	vec3 lo  = min(min(TX(-1),TX(0)),TX(1));
+    vec3 hi  = max(max(TX(-1),TX(0)),TX(1));
+	
+    FragColor = vec4(clamp(res, lo, hi), 1.0);
+}
--- a/anti-aliasing/shaders/reverse-aa-post3x/reverse-aa-post3x-pass0.vert
+++ b/anti-aliasing/shaders/reverse-aa-post3x/reverse-aa-post3x-pass0.vert
@ -0,0 +1,73 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   float RAA_SHR0;
+   float RAA_SMT0;
+   float RAA_DVT0;
+}params;
+
+#pragma parameterRAA_SHR0¡2.00.0010.00.05
+#pragma parameterRAA_SMT0¡0.50.0510.00.05
+#pragma parameterRAA_DVT0¡1.00.0510.00.05
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+ gl_Position = global . MVP * Position;
+ vTexCoord = TexCoord;
+}
+
--- a/anti-aliasing/shaders/reverse-aa-post3x/reverse-aa-post3x-pass1.frag
+++ b/anti-aliasing/shaders/reverse-aa-post3x/reverse-aa-post3x-pass1.frag
@ -0,0 +1,167 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   float RAA_SHR1;
+   float RAA_SMT1;
+   float RAA_DVT1;
+}params;
+
+#pragma parameterRAA_SHR1¡2.00.0010.00.05
+#pragma parameterRAA_SMT1¡0.50.0510.00.05
+#pragma parameterRAA_DVT1¡1.00.0510.00.05
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+uniform sampler2D Source;
+
+const int scl = 3;
+const int rad = 7;
+
+
+vec3 res2x(vec3 pre2, vec3 pre1, vec3 px, vec3 pos1, vec3 pos2)
+{
+    float d1, d2, w;
+ vec3 a, m, t, t1, t2;
+    mat4x3 pre = mat4x3(pre2, pre1, px, pos1);
+    mat4x3 pos = mat4x3(pre1, px, pos1, pos2);
+    mat4x3 df = pos - pre;
+
+    m . x =(px . x < 0.5)? px . x :(1.0 - px . x);
+    m . y =(px . y < 0.5)? px . y :(1.0 - px . y);
+    m . z =(px . z < 0.5)? px . z :(1.0 - px . z);
+ m = params . RAA_SHR1 * min(m, min(abs(df[1]), abs(df[2])));
+ t =(7 *(df[1]+ df[2])- 3 *(df[0]+ df[3]))/ 16;
+
+ a . x = t . x == 0.0 ? 1.0 : m . x / abs(t . x);
+ a . y = t . y == 0.0 ? 1.0 : m . y / abs(t . y);
+ a . z = t . z == 0.0 ? 1.0 : m . z / abs(t . z);
+ t1 = clamp(t, - m, m);
+ t2 = min(1.0, min(min(a . x, a . y), a . z))* t;
+
+ d1 = length(df[1]);d2 = length(df[2]);
+ d1 = d1 == 0.0 ? 0.0 : length(cross(df[1], t1))/ d1;
+ d2 = d2 == 0.0 ? 0.0 : length(cross(df[2], t1))/ d2;
+
+ w = min(1.0, max(d1, d2)/ 0.8125);
+
+ return mix(t1, t2, pow(w, params . RAA_DVT1));
+}
+
+void main()
+{
+
+
+ vec3 tx[2 * rad + 1];
+
+
+
+     tx[(0)+ rad]= texture(Source, vTexCoord). rgb;
+
+ for(int i = 1;i <= rad;i ++){
+       tx[(- i)+ rad]= texture(Source, vTexCoord + vec2(0, - i)* params . SourceSize . zw). rgb;
+       tx[(i)+ rad]= texture(Source, vTexCoord + vec2(0, i)* params . SourceSize . zw). rgb;
+ }
+
+
+
+
+ ivec2 i1 = ivec2(0), i2 = ivec2(0);
+ vec3 df1, df2;
+ vec2 d1, d2, d3;
+ bvec2 cn;
+
+ df1 = tx[(1)+ rad]- tx[(0)+ rad];df2 = tx[(0)+ rad]- tx[(- 1)+ rad];
+
+ d2 = vec2(length(df1), length(df2));
+ d3 = d2 . yx;
+
+
+
+ float sw = d2 . x + d2 . y;
+ sw = sw == 0.0 ? 1.0 : pow(length(df1 - df2)/ sw, params . RAA_SMT1);
+
+
+
+ for(int i = 1;i < rad;i ++){
+  d1 = d2;
+  d2 = d3;
+  d3 = vec2(distance(tx[(- i - 1)+ rad], tx[(- i)+ rad]), distance(tx[(i)+ rad], tx[(i + 1)+ rad]));
+  cn . x = max(d1 . x, d3 . x)< d2 . x;
+  cn . y = max(d1 . y, d3 . y)< d2 . y;
+  i2 . x = cn . x && i2 . x == 0 && i1 . x != 0 ? i : i2 . x;
+  i2 . y = cn . y && i2 . y == 0 && i1 . y != 0 ? i : i2 . y;
+  i1 . x = cn . x && i1 . x == 0 ? i : i1 . x;
+  i1 . y = cn . y && i1 . y == 0 ? i : i1 . y;
+ }
+
+ i2 . x = i2 . x == 0 ? i1 . x + 1 : i2 . x;
+ i2 . y = i2 . y == 0 ? i1 . y + 1 : i2 . y;
+
+
+
+ vec3 t = res2x(tx[(- i2 . x)+ rad], tx[(- i1 . x)+ rad], tx[(0)+ rad], tx[(i1 . y)+ rad], tx[(i2 . y)+ rad]);
+
+
+ float dw =(i1 . x == 0 || i1 . y == 0)? 0.0 : 2.0 *((i1 . x - 1.0)/(i1 . x + i1 . y - 2.0))- 1.0;
+
+
+ vec3 res = tx[(0)+ rad]+(scl - 1.0)/ scl * sw * dw * t;
+
+
+
+ vec3 lo = min(min(tx[(- 1)+ rad], tx[(0)+ rad]), tx[(1)+ rad]);
+    vec3 hi = max(max(tx[(- 1)+ rad], tx[(0)+ rad]), tx[(1)+ rad]);
+
+    FragColor = vec4(clamp(res, lo, hi), 1.0);
+}
--- a/anti-aliasing/shaders/reverse-aa-post3x/reverse-aa-post3x-pass1.gsh
+++ b/anti-aliasing/shaders/reverse-aa-post3x/reverse-aa-post3x-pass1.gsh
--- a/anti-aliasing/shaders/reverse-aa-post3x/reverse-aa-post3x-pass1.ppslang
+++ b/anti-aliasing/shaders/reverse-aa-post3x/reverse-aa-post3x-pass1.ppslang
@ -0,0 +1,176 @@
+#version 450
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+layout(push_constant)uniform Push
+{
+ vec4 SourceSize;
+ vec4 OriginalSize;
+ vec4 OutputSize;
+ uint FrameCount;
+ float RAA_SHR1;
+ float RAA_SMT1;
+ float RAA_DVT1;
+} params;
+
+#pragma parameterRAA_SHR1¡2.00.0010.00.05
+#pragma parameterRAA_SMT1¡0.50.0510.00.05
+#pragma parameterRAA_DVT1¡1.00.0510.00.05
+
+layout(std140, set = 0, binding = 0)uniform UBO
+{
+ mat4 MVP;
+} global;
+
+#pragma stagevertex
+layout(location = 0)in vec4 Position;
+layout(location = 1)in vec2 TexCoord;
+layout(location = 0)out vec2 vTexCoord;
+
+void main()
+{
+ gl_Position = global . MVP * Position;
+ vTexCoord = TexCoord;
+}
+
+#pragma stagefragment
+layout(location = 0)in vec2 vTexCoord;
+layout(location = 0)out vec4 FragColor;
+layout(set = 0, binding = 2)uniform sampler2D Source;
+
+const int scl = 3;
+const int rad = 7;
+
+
+vec3 res2x(vec3 pre2, vec3 pre1, vec3 px, vec3 pos1, vec3 pos2)
+{
+    float d1, d2, w;
+ vec3 a, m, t, t1, t2;
+    mat4x3 pre = mat4x3(pre2, pre1, px, pos1);
+    mat4x3 pos = mat4x3(pre1, px, pos1, pos2);
+    mat4x3 df = pos - pre;
+
+    m . x =(px . x < 0.5)? px . x :(1.0 - px . x);
+    m . y =(px . y < 0.5)? px . y :(1.0 - px . y);
+    m . z =(px . z < 0.5)? px . z :(1.0 - px . z);
+ m = params . RAA_SHR1 * min(m, min(abs(df[1]), abs(df[2])));
+ t =(7 *(df[1]+ df[2])- 3 *(df[0]+ df[3]))/ 16;
+
+ a . x = t . x == 0.0 ? 1.0 : m . x / abs(t . x);
+ a . y = t . y == 0.0 ? 1.0 : m . y / abs(t . y);
+ a . z = t . z == 0.0 ? 1.0 : m . z / abs(t . z);
+ t1 = clamp(t, - m, m);
+ t2 = min(1.0, min(min(a . x, a . y), a . z))* t;
+
+ d1 = length(df[1]);d2 = length(df[2]);
+ d1 = d1 == 0.0 ? 0.0 : length(cross(df[1], t1))/ d1;
+ d2 = d2 == 0.0 ? 0.0 : length(cross(df[2], t1))/ d2;
+
+ w = min(1.0, max(d1, d2)/ 0.8125);
+
+ return mix(t1, t2, pow(w, params . RAA_DVT1));
+}
+
+void main()
+{
+
+
+ vec3 tx[2 * rad + 1];
+
+
+
+     tx[(0)+ rad]= texture(Source, vTexCoord). rgb;
+
+ for(int i = 1;i <= rad;i ++){
+       tx[(- i)+ rad]= texture(Source, vTexCoord + vec2(0, - i)* params . SourceSize . zw). rgb;
+       tx[(i)+ rad]= texture(Source, vTexCoord + vec2(0, i)* params . SourceSize . zw). rgb;
+ }
+
+
+
+
+ ivec2 i1 = ivec2(0), i2 = ivec2(0);
+ vec3 df1, df2;
+ vec2 d1, d2, d3;
+ bvec2 cn;
+
+ df1 = tx[(1)+ rad]- tx[(0)+ rad];df2 = tx[(0)+ rad]- tx[(- 1)+ rad];
+
+ d2 = vec2(length(df1), length(df2));
+ d3 = d2 . yx;
+
+
+
+ float sw = d2 . x + d2 . y;
+ sw = sw == 0.0 ? 1.0 : pow(length(df1 - df2)/ sw, params . RAA_SMT1);
+
+
+
+ for(int i = 1;i < rad;i ++){
+  d1 = d2;
+  d2 = d3;
+  d3 = vec2(distance(tx[(- i - 1)+ rad], tx[(- i)+ rad]), distance(tx[(i)+ rad], tx[(i + 1)+ rad]));
+  cn . x = max(d1 . x, d3 . x)< d2 . x;
+  cn . y = max(d1 . y, d3 . y)< d2 . y;
+  i2 . x = cn . x && i2 . x == 0 && i1 . x != 0 ? i : i2 . x;
+  i2 . y = cn . y && i2 . y == 0 && i1 . y != 0 ? i : i2 . y;
+  i1 . x = cn . x && i1 . x == 0 ? i : i1 . x;
+  i1 . y = cn . y && i1 . y == 0 ? i : i1 . y;
+ }
+
+ i2 . x = i2 . x == 0 ? i1 . x + 1 : i2 . x;
+ i2 . y = i2 . y == 0 ? i1 . y + 1 : i2 . y;
+
+
+
+ vec3 t = res2x(tx[(- i2 . x)+ rad], tx[(- i1 . x)+ rad], tx[(0)+ rad], tx[(i1 . y)+ rad], tx[(i2 . y)+ rad]);
+
+
+ float dw =(i1 . x == 0 || i1 . y == 0)? 0.0 : 2.0 *((i1 . x - 1.0)/(i1 . x + i1 . y - 2.0))- 1.0;
+
+
+ vec3 res = tx[(0)+ rad]+(scl - 1.0)/ scl * sw * dw * t;
+
+
+
+ vec3 lo = min(min(tx[(- 1)+ rad], tx[(0)+ rad]), tx[(1)+ rad]);
+    vec3 hi = max(max(tx[(- 1)+ rad], tx[(0)+ rad]), tx[(1)+ rad]);
+
+    FragColor = vec4(clamp(res, lo, hi), 1.0);
+}
--- a/anti-aliasing/shaders/reverse-aa-post3x/reverse-aa-post3x-pass1.slang
+++ b/anti-aliasing/shaders/reverse-aa-post3x/reverse-aa-post3x-pass1.slang
@ -0,0 +1,176 @@
+#version 450
+
+/*
+	rAA post-3x - Pass 1
+	by Sp00kyFox, 2018-10-20
+
+Filter:	Nearest
+Scale:	1x
+
+This is a generalized continuation of the reverse antialiasing filter by
+Christoph Feck. Unlike the original filter this is supposed to be used on an
+already upscaled image. Which makes it possible to combine rAA with other filters
+just as ScaleFX, xBR or others.
+
+Pass 1 does the vertical filtering.
+
+
+
+Copyright (c) 2018 Sp00kyFox - ScaleFX@web.de
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/ 
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+	float RAA_SHR1;
+	float RAA_SMT1;
+	float RAA_DVT1;
+} params;
+
+#pragma parameter RAA_SHR1 "rAA-3x 1 Sharpness"  2.0 0.00 10.0 0.05
+#pragma parameter RAA_SMT1 "rAA-3x 1 Smoothness" 0.5 0.05 10.0 0.05
+#pragma parameter RAA_DVT1 "rAA-3x 1 Deviation"  1.0 0.05 10.0 0.05
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+	gl_Position = global.MVP * Position;
+	vTexCoord = TexCoord;
+}
+
+#pragma stage fragment
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+
+const int scl = 3; // scale factor
+const int rad = 7; // search radius
+
+// core function of rAA - tilt of a pixel
+vec3 res2x(vec3 pre2, vec3 pre1, vec3 px, vec3 pos1, vec3 pos2)
+{
+    float d1, d2, w;
+	vec3 a, m, t, t1, t2;
+    mat4x3 pre = mat4x3(pre2, pre1,   px, pos1);
+    mat4x3 pos = mat4x3(pre1,   px, pos1, pos2);
+    mat4x3  df = pos - pre;
+
+    m.x = (px.x < 0.5) ? px.x : (1.0-px.x);
+    m.y = (px.y < 0.5) ? px.y : (1.0-px.y);
+    m.z = (px.z < 0.5) ? px.z : (1.0-px.z);
+	m = params.RAA_SHR1 * min(m, min(abs(df[1]), abs(df[2])));   // magnitude
+	t = (7 * (df[1] + df[2]) - 3 * (df[0] + df[3])) / 16; // tilt
+	
+	a.x = t.x == 0.0 ? 1.0 : m.x/abs(t.x);
+	a.y = t.y == 0.0 ? 1.0 : m.y/abs(t.y);
+	a.z = t.z == 0.0 ? 1.0 : m.z/abs(t.z);
+	t1 = clamp(t, -m, m);                       // limit channels
+	t2 = min(1.0, min(min(a.x, a.y), a.z)) * t; // limit length
+	
+	d1 = length(df[1]); d2 = length(df[2]);
+	d1 = d1 == 0.0 ? 0.0 : length(cross(df[1], t1))/d1; // distance between line (px, pre1) and point px-t1
+	d2 = d2 == 0.0 ? 0.0 : length(cross(df[2], t1))/d2; // distance between line (px, pos1) and point px+t1
+
+	w = min(1.0, max(d1,d2)/0.8125); // color deviation from optimal value
+	
+	return mix(t1, t2, pow(w, params.RAA_DVT1));
+}
+
+void main()
+{
+	// read texels
+
+	vec3 tx[2*rad+1];
+
+	#define TX(n) tx[(n)+rad]
+	
+	TX(0) = texture(Source, vTexCoord).rgb;
+	
+	for(int i=1; i<=rad; i++){
+		TX(-i) = texture(Source, vTexCoord + vec2(0,-i)*params.SourceSize.zw).rgb;
+		TX( i) = texture(Source, vTexCoord + vec2(0, i)*params.SourceSize.zw).rgb;
+	}
+	
+	
+	// prepare variables for candidate search
+	
+	ivec2 i1 = ivec2(0), i2 = ivec2(0);
+	vec3 df1, df2;
+	vec2 d1, d2, d3;
+	bvec2 cn;
+	
+	df1 = TX(1)-TX(0); df2 = TX(0)-TX(-1);
+	
+	d2 = vec2(length(df1), length(df2));
+	d3 = d2.yx;
+	
+	
+	// smoothness weight, protects smooth gradients
+	float sw = d2.x + d2.y;
+	sw = sw == 0.0 ? 1.0 : pow(length(df1-df2)/sw, params.RAA_SMT1);
+	
+	
+	// look for proper candidates
+	for(int i=1; i<rad; i++){
+		d1 = d2;
+		d2 = d3;
+		d3 = vec2(distance(TX(-i-1), TX(-i)), distance(TX(i), TX(i+1)));
+		cn.x = max(d1.x,d3.x)<d2.x;
+		cn.y = max(d1.y,d3.y)<d2.y;
+		i2.x = cn.x && i2.x==0 && i1.x!=0 ? i : i2.x;
+		i2.y = cn.y && i2.y==0 && i1.y!=0 ? i : i2.y;
+		i1.x = cn.x && i1.x==0 ? i : i1.x;
+		i1.y = cn.y && i1.y==0 ? i : i1.y;
+	}
+
+	i2.x = i2.x == 0 ? i1.x+1 : i2.x;
+	i2.y = i2.y == 0 ? i1.y+1 : i2.y;
+
+	
+	// rAA core with the candidates found above
+	vec3 t = res2x(TX(-i2.x), TX(-i1.x), TX(0), TX(i1.y), TX(i2.y));
+	
+	// distance weight
+	float dw = (i1.x == 0 || i1.y == 0) ? 0.0 : 2.0 * ((i1.x-1.0)/(i1.x+i1.y-2.0)) - 1.0;	
+	
+	// result
+	vec3 res = TX(0) + (scl-1.0)/scl * sw*dw * t;
+	
+	
+	// prevent ringing	
+	vec3 lo  = min(min(TX(-1),TX(0)),TX(1));
+    vec3 hi  = max(max(TX(-1),TX(0)),TX(1));
+	
+    FragColor = vec4(clamp(res, lo, hi), 1.0);
+}
--- a/anti-aliasing/shaders/reverse-aa-post3x/reverse-aa-post3x-pass1.vert
+++ b/anti-aliasing/shaders/reverse-aa-post3x/reverse-aa-post3x-pass1.vert
@ -0,0 +1,73 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   float RAA_SHR1;
+   float RAA_SMT1;
+   float RAA_DVT1;
+}params;
+
+#pragma parameterRAA_SHR1¡2.00.0010.00.05
+#pragma parameterRAA_SMT1¡0.50.0510.00.05
+#pragma parameterRAA_DVT1¡1.00.0510.00.05
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+ gl_Position = global . MVP * Position;
+ vTexCoord = TexCoord;
+}
+
--- a/anti-aliasing/shaders/reverse-aa.frag
+++ b/anti-aliasing/shaders/reverse-aa.frag
@ -0,0 +1,91 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OutputSize;
+   float REVERSEAA_SHARPNESS;
+}params;
+
+#pragma parameterREVERSEAA_SHARPNESS¡2.00.010.00.01
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+uniform sampler2D Source;
+
+
+
+vec3 res2x(vec3 pre2, vec3 pre1, vec3 px, vec3 pos1, vec3 pos2)
+{
+    vec3 t, m;
+    mat4x3 pre = mat4x3(pre2, pre1, px, pos1);
+    mat4x3 pos = mat4x3(pre1, px, pos1, pos2);
+    mat4x3 df = pos - pre;
+
+    m = 0.5 - abs(px - 0.5);
+    m = params . REVERSEAA_SHARPNESS * min(m, min(abs(df[1]), abs(df[2])));
+    t =(7 *(df[1]+ df[2])- 3 *(df[0]+ df[3]))/ 16;
+    t = clamp(t, - m, m);
+
+    return t;
+}
+
+void main()
+{
+   vec2 pos = fract(vTexCoord * params . SourceSize . xy)- vec2(0.5, 0.5);
+   vec2 coord = vTexCoord - pos * params . SourceSize . zw;
+
+   vec3 E = texture(Source, coord + params . SourceSize . zw * vec2(0, 0)). rgb;
+   vec3 _tx = res2x(texture(Source, coord + params . SourceSize . zw * vec2(- 2, 0)). rgb, texture(Source, coord + params . SourceSize . zw * vec2(- 1, 0)). rgb, E, texture(Source, coord + params . SourceSize . zw * vec2(1, 0)). rgb, texture(Source, coord + params . SourceSize . zw * vec2(2, 0)). rgb)* pos . x;
+   vec3 _ty = res2x(texture(Source, coord + params . SourceSize . zw * vec2(0, - 2)). rgb, texture(Source, coord + params . SourceSize . zw * vec2(0, - 1)). rgb, E, texture(Source, coord + params . SourceSize . zw * vec2(0, 1)). rgb, texture(Source, coord + params . SourceSize . zw * vec2(0, 2)). rgb)* pos . y;
+   vec3 res = clamp(E + 2.0 *(_tx + _ty), 0.0, 1.0);
+
+   FragColor = vec4(res, 1.0);
+}
--- a/anti-aliasing/shaders/reverse-aa.gsh
+++ b/anti-aliasing/shaders/reverse-aa.gsh
--- a/anti-aliasing/shaders/reverse-aa.ppslang
+++ b/anti-aliasing/shaders/reverse-aa.ppslang
@ -0,0 +1,100 @@
+#version 450
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+layout(push_constant)uniform Push
+{
+ vec4 SourceSize;
+ vec4 OutputSize;
+ float REVERSEAA_SHARPNESS;
+} params;
+
+#pragma parameterREVERSEAA_SHARPNESS¡2.00.010.00.01
+
+layout(std140, set = 0, binding = 0)uniform UBO
+{
+ mat4 MVP;
+} global;
+
+#pragma stagevertex
+layout(location = 0)in vec4 Position;
+layout(location = 1)in vec2 TexCoord;
+layout(location = 0)out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stagefragment
+layout(location = 0)in vec2 vTexCoord;
+layout(location = 0)out vec4 FragColor;
+layout(set = 0, binding = 2)uniform sampler2D Source;
+
+
+
+vec3 res2x(vec3 pre2, vec3 pre1, vec3 px, vec3 pos1, vec3 pos2)
+{
+    vec3 t, m;
+    mat4x3 pre = mat4x3(pre2, pre1, px, pos1);
+    mat4x3 pos = mat4x3(pre1, px, pos1, pos2);
+    mat4x3 df = pos - pre;
+
+    m = 0.5 - abs(px - 0.5);
+    m = params . REVERSEAA_SHARPNESS * min(m, min(abs(df[1]), abs(df[2])));
+    t =(7 *(df[1]+ df[2])- 3 *(df[0]+ df[3]))/ 16;
+    t = clamp(t, - m, m);
+
+    return t;
+}
+
+void main()
+{
+   vec2 pos = fract(vTexCoord * params . SourceSize . xy)- vec2(0.5, 0.5);
+   vec2 coord = vTexCoord - pos * params . SourceSize . zw;
+
+   vec3 E = texture(Source, coord + params . SourceSize . zw * vec2(0, 0)). rgb;
+   vec3 _tx = res2x(texture(Source, coord + params . SourceSize . zw * vec2(- 2, 0)). rgb, texture(Source, coord + params . SourceSize . zw * vec2(- 1, 0)). rgb, E, texture(Source, coord + params . SourceSize . zw * vec2(1, 0)). rgb, texture(Source, coord + params . SourceSize . zw * vec2(2, 0)). rgb)* pos . x;
+   vec3 _ty = res2x(texture(Source, coord + params . SourceSize . zw * vec2(0, - 2)). rgb, texture(Source, coord + params . SourceSize . zw * vec2(0, - 1)). rgb, E, texture(Source, coord + params . SourceSize . zw * vec2(0, 1)). rgb, texture(Source, coord + params . SourceSize . zw * vec2(0, 2)). rgb)* pos . y;
+   vec3 res = clamp(E + 2.0 *(_tx + _ty), 0.0, 1.0);
+
+   FragColor = vec4(res, 1.0);
+}
--- a/anti-aliasing/shaders/reverse-aa.slang
+++ b/anti-aliasing/shaders/reverse-aa.slang
@ -0,0 +1,100 @@
+#version 450
+
+/*
+   Reverse Antialiasing Shader
+
+   Adapted from the C source (see Copyright below) to shader
+   cg language by Hyllian/Jararaca - sergiogdb@gmail.com
+
+   This shader works best in 2x scale.
+
+*/
+
+/*
+ *
+ *  Copyright (c) 2012, Christoph Feck <christoph@maxiom.de>
+ *  All Rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *
+ *    * Redistributions of source code must retain the above copyright notice,
+ *      this list of conditions and the following disclaimer.
+ *
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OutputSize;
+	float REVERSEAA_SHARPNESS;
+} params;
+
+#pragma parameter REVERSEAA_SHARPNESS "ReverseAA Sharpness" 2.0 0.0 10.0 0.01
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global.MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stage fragment
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+
+#define P(x,y) texture(Source, coord + params.SourceSize.zw * vec2(x, y)).rgb
+
+vec3 res2x(vec3 pre2, vec3 pre1, vec3 px, vec3 pos1, vec3 pos2)
+{
+    vec3 t, m;
+    mat4x3 pre = mat4x3(pre2, pre1,   px, pos1);
+    mat4x3 pos = mat4x3(pre1,   px, pos1, pos2);
+    mat4x3  df = pos - pre;
+
+    m = 0.5 - abs(px - 0.5);
+    m = params.REVERSEAA_SHARPNESS * min(m, min(abs(df[1]), abs(df[2])));
+    t = (7 * (df[1] + df[2]) - 3 * (df[0] + df[3])) / 16;
+    t = clamp(t, -m, m);
+
+    return t;
+}
+
+void main()
+{
+   vec2 pos = fract(vTexCoord * params.SourceSize.xy) - vec2(0.5, 0.5);
+   vec2 coord = vTexCoord - pos * params.SourceSize.zw;
+
+   vec3 E = P( 0, 0);
+   vec3 _tx = res2x(P(-2,0), P(-1,0), E, P(1,0), P(2,0)) * pos.x;
+   vec3 _ty = res2x(P(0,-2), P(0,-1), E, P(0,1), P(0,2)) * pos.y;
+   vec3 res = clamp(E + 2.0*(_tx + _ty), 0.0, 1.0);
+
+   FragColor = vec4(res, 1.0);
+}
--- a/anti-aliasing/shaders/reverse-aa.vert
+++ b/anti-aliasing/shaders/reverse-aa.vert
@ -0,0 +1,68 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OutputSize;
+   float REVERSEAA_SHARPNESS;
+}params;
+
+#pragma parameterREVERSEAA_SHARPNESS¡2.00.010.00.01
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord;
+}
+
--- a/anti-aliasing/shaders/shock.frag
+++ b/anti-aliasing/shaders/shock.frag
@ -0,0 +1,60 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   float shockMagnitude;
+}params;
+
+#pragma parametershockMagnitude¡0.00.04.00.1
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+uniform sampler2D Source;
+
+
+
+vec3 ones = vec3(1.0, 1.0, 1.0);
+
+void main()
+{
+    vec3 inc = vec3(params . OutputSize . zw, 0.0);
+
+    vec3 curCol = texture(Source, vTexCoord). xyz;
+    vec3 upCol = texture(Source, vTexCoord + inc . zy). xyz;
+    vec3 downCol = texture(Source, vTexCoord - inc . zy). xyz;
+    vec3 rightCol = texture(Source, vTexCoord + inc . xz). xyz;
+    vec3 leftCol = texture(Source, vTexCoord - inc . xz). xyz;
+
+    vec3 Convexity = 4.0 * curCol - rightCol - leftCol - upCol - downCol;
+
+    vec2 Diffusion = vec2(dot((rightCol - leftCol)* Convexity, ones),
+                             dot((upCol - downCol)* Convexity, ones));
+
+    Diffusion *= params . shockMagnitude /(length(Diffusion)+ 0.00001);
+
+    curCol +=(Diffusion . x > 0 ? Diffusion . x * rightCol :
+
+             - Diffusion . x * leftCol)+
+
+            (Diffusion . y > 0 ? Diffusion . y * upCol :
+
+             - Diffusion . y * downCol);
+
+    FragColor = vec4(curCol /(1 + dot(abs(Diffusion), ones . xy)), 1.0);
+}
--- a/anti-aliasing/shaders/shock.gsh
+++ b/anti-aliasing/shaders/shock.gsh
--- a/anti-aliasing/shaders/shock.ppslang
+++ b/anti-aliasing/shaders/shock.ppslang
@ -0,0 +1,69 @@
+#version 450
+
+
+
+
+
+layout(push_constant)uniform Push
+{
+ vec4 SourceSize;
+ vec4 OriginalSize;
+ vec4 OutputSize;
+ uint FrameCount;
+ float shockMagnitude;
+} params;
+
+#pragma parametershockMagnitude¡0.00.04.00.1
+
+layout(std140, set = 0, binding = 0)uniform UBO
+{
+ mat4 MVP;
+} global;
+
+#pragma stagevertex
+layout(location = 0)in vec4 Position;
+layout(location = 1)in vec2 TexCoord;
+layout(location = 0)out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stagefragment
+layout(location = 0)in vec2 vTexCoord;
+layout(location = 0)out vec4 FragColor;
+layout(set = 0, binding = 2)uniform sampler2D Source;
+
+
+
+const vec3 ones = vec3(1.0, 1.0, 1.0);
+
+void main()
+{
+    vec3 inc = vec3(params . OutputSize . zw, 0.0);
+
+    vec3 curCol = texture(Source, vTexCoord). xyz;
+    vec3 upCol = texture(Source, vTexCoord + inc . zy). xyz;
+    vec3 downCol = texture(Source, vTexCoord - inc . zy). xyz;
+    vec3 rightCol = texture(Source, vTexCoord + inc . xz). xyz;
+    vec3 leftCol = texture(Source, vTexCoord - inc . xz). xyz;
+
+    vec3 Convexity = 4.0 * curCol - rightCol - leftCol - upCol - downCol;
+
+    vec2 Diffusion = vec2(dot((rightCol - leftCol)* Convexity, ones),
+                             dot((upCol - downCol)* Convexity, ones));
+
+    Diffusion *= params . shockMagnitude /(length(Diffusion)+ 0.00001);
+
+    curCol +=(Diffusion . x > 0 ? Diffusion . x * rightCol :
+
+             - Diffusion . x * leftCol)+
+
+            (Diffusion . y > 0 ? Diffusion . y * upCol :
+
+             - Diffusion . y * downCol);
+
+    FragColor = vec4(curCol /(1 + dot(abs(Diffusion), ones . xy)), 1.0);
+}
--- a/anti-aliasing/shaders/shock.slang
+++ b/anti-aliasing/shaders/shock.slang
@ -0,0 +1,69 @@
+#version 450
+
+// based on Nvidia's GPU Gems article:
+// https://developer.nvidia.com/gpugems/GPUGems2/gpugems2_chapter27.html
+// slang shader by torridgristle and hunterk
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+	float shockMagnitude;
+} params;
+
+#pragma parameter shockMagnitude "Shock Magnitude" 0.0 0.0 4.0 0.1
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global.MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stage fragment
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+
+#define Euler 2.718281828459045
+
+const vec3 ones = vec3(1.0, 1.0, 1.0);
+
+void main()
+{
+    vec3 inc = vec3(params.OutputSize.zw,0.0);
+    
+    vec3   curCol = texture(Source, vTexCoord).xyz;
+    vec3    upCol = texture(Source, vTexCoord + inc.zy).xyz;
+    vec3  downCol = texture(Source, vTexCoord - inc.zy).xyz;
+    vec3 rightCol = texture(Source, vTexCoord + inc.xz).xyz;
+    vec3  leftCol = texture(Source, vTexCoord - inc.xz).xyz;
+    
+    vec3 Convexity = 4.0 * curCol - rightCol - leftCol - upCol - downCol;
+
+    vec2 Diffusion = vec2(dot((rightCol - leftCol) * Convexity, ones),
+                             dot((upCol - downCol) * Convexity, ones));
+
+    Diffusion *= params.shockMagnitude/(length(Diffusion) + 0.00001);
+
+    curCol += (Diffusion.x > 0 ? Diffusion.x * rightCol :
+
+             -Diffusion.x*leftCol) +
+
+            (Diffusion.y > 0 ? Diffusion.y * upCol :
+
+             -Diffusion.y * downCol);
+
+    FragColor = vec4(curCol/(1 + dot(abs(Diffusion), ones.xy)),1.0);
+}
--- a/anti-aliasing/shaders/shock.vert
+++ b/anti-aliasing/shaders/shock.vert
@ -0,0 +1,35 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+uniform Push
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   float shockMagnitude;
+}params;
+
+#pragma parametershockMagnitude¡0.00.04.00.1
+
+layout(std140) uniform UBO
+{
+   mat4 MVP;
+}global;
+
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+   vTexCoord = TexCoord;
+}
+
--- a/anti-aliasing/shaders/smaa/AreaTex.png
+++ b/anti-aliasing/shaders/smaa/AreaTex.png
--- a/anti-aliasing/shaders/smaa/SearchTex.png
+++ b/anti-aliasing/shaders/smaa/SearchTex.png
--- a/anti-aliasing/shaders/smaa/msaa.slang
+++ b/anti-aliasing/shaders/smaa/msaa.slang
@ -0,0 +1,667 @@
+#version 450
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+/**
+ * Copyright (C) 2013 Jorge Jimenez (jorge@iryoku.com)
+ * Copyright (C) 2013 Jose I. Echevarria (joseignacioechevarria@gmail.com)
+ * Copyright (C) 2013 Belen Masia (bmasia@unizar.es)
+ * Copyright (C) 2013 Fernando Navarro (fernandn@microsoft.com)
+ * Copyright (C) 2013 Diego Gutierrez (diegog@unizar.es)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to
+ * do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software. As clarification, there
+ * is no requirement that the copyright notice and permission be included in
+ * binary distributions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+/**
+ *                  _______  ___  ___       ___           ___
+ *                 /       ||   \/   |     /   \         /   \
+ *                |   (---- |  \  /  |    /  ^  \       /  ^  \
+ *                 \   \    |  |\/|  |   /  /_\  \     /  /_\  \
+ *              ----)   |   |  |  |  |  /  _____  \   /  _____  \
+ *             |_______/    |__|  |__| /__/     \__\ /__/     \__\
+ * 
+ *                               E N H A N C E D
+ *       S U B P I X E L   M O R P H O L O G I C A L   A N T I A L I A S I N G
+ *
+ *                         http://www.iryoku.com/smaa/
+ *
+ * Hi, welcome aboard!
+ * 
+ * Here you'll find instructions to get the shader up and running as fast as
+ * possible.
+ *
+ * IMPORTANTE NOTICE: when updating, remember to update both this file and the
+ * precomputed textures! They may change from version to version.
+ *
+ * The shader has three passes, chained together as follows:
+ *
+ *                           |input|------------------?
+ *                              v                     |
+ *                    [ SMAA*EdgeDetection ]          |
+ *                              v                     |
+ *                          |edgesTex|                |
+ *                              v                     |
+ *              [ SMAABlendingWeightCalculation ]     |
+ *                              v                     |
+ *                          |blendTex|                |
+ *                              v                     |
+ *                [ SMAANeighborhoodBlending ] <------?
+ *                              v
+ *                           |output|
+ *
+ * Note that each [pass] has its own vertex and pixel shader. Remember to use
+ * oversized triangles instead of quads to avoid overshading along the
+ * diagonal.
+ *
+ * You've three edge detection methods to choose from: luma, color or depth.
+ * They represent different quality/performance and anti-aliasing/sharpness
+ * tradeoffs, so our recommendation is for you to choose the one that best
+ * suits your particular scenario:
+ *
+ * - Depth edge detection is usually the fastest but it may miss some edges.
+ *
+ * - Luma edge detection is usually more expensive than depth edge detection,
+ *   but catches visible edges that depth edge detection can miss.
+ *
+ * - Color edge detection is usually the most expensive one but catches
+ *   chroma-only edges.
+ *
+ * For quickstarters: just use luma edge detection.
+ *
+ * The general advice is to not rush the integration process and ensure each
+ * step is done correctly (don't try to integrate SMAA T2x with predicated edge
+ * detection from the start!). Ok then, let's go!
+ *
+ *  1. The first step is to create two RGBA temporal render targets for holding
+ *     |edgesTex| and |blendTex|.
+ *
+ *     In DX10 or DX11, you can use a RG render target for the edges texture.
+ *     In the case of NVIDIA GPUs, using RG render targets seems to actually be
+ *     slower.
+ *
+ *     On the Xbox 360, you can use the same render target for resolving both
+ *     |edgesTex| and |blendTex|, as they aren't needed simultaneously.
+ *
+ *  2. Both temporal render targets |edgesTex| and |blendTex| must be cleared
+ *     each frame. Do not forget to clear the alpha channel!
+ *
+ *  3. The next step is loading the two supporting precalculated textures,
+ *     'areaTex' and 'searchTex'. You'll find them in the 'Textures' folder as
+ *     C++ headers, and also as regular DDS files. They'll be needed for the
+ *     'SMAABlendingWeightCalculation' pass.
+ *
+ *     If you use the C++ headers, be sure to load them in the format specified
+ *     inside of them.
+ *
+ *     You can also compress 'areaTex' and 'searchTex' using BC5 and BC4
+ *     respectively, if you have that option in your content processor pipeline.
+ *     When compressing then, you get a non-perceptible quality decrease, and a
+ *     marginal performance increase.
+ *
+ *  4. All samplers must be set to linear filtering and clamp.
+ *
+ *     After you get the technique working, remember that 64-bit inputs have
+ *     half-rate linear filtering on GCN.
+ *
+ *     If SMAA is applied to 64-bit color buffers, switching to point filtering
+ *     when accesing them will increase the performance. Search for
+ *     'SMAASamplePoint' to see which textures may benefit from point
+ *     filtering, and where (which is basically the color input in the edge
+ *     detection and resolve passes).
+ *
+ *  5. All texture reads and buffer writes must be non-sRGB, with the exception
+ *     of the input read and the output write in
+ *     'SMAANeighborhoodBlending' (and only in this pass!). If sRGB reads in
+ *     this last pass are not possible, the technique will work anyway, but
+ *     will perform antialiasing in gamma space.
+ *
+ *     IMPORTANT: for best results the input read for the color/luma edge 
+ *     detection should *NOT* be sRGB.
+ *
+ *  6. Before including SMAA.h you'll have to setup the render target metrics,
+ *     the target and any optional configuration defines. Optionally you can
+ *     use a preset.
+ *
+ *     You have the following targets available: 
+ *         SMAA_HLSL_3
+ *         SMAA_HLSL_4
+ *         SMAA_HLSL_4_1
+ *         SMAA_GLSL_3 *
+ *         SMAA_GLSL_4 *
+ *
+ *         * (See SMAA_INCLUDE_VS and SMAA_INCLUDE_PS below).
+ *
+ *     And four presets:
+ *         SMAA_PRESET_LOW          (%60 of the quality)
+ *         SMAA_PRESET_MEDIUM       (%80 of the quality)
+ *         SMAA_PRESET_HIGH         (%95 of the quality)
+ *         SMAA_PRESET_ULTRA        (%99 of the quality)
+ *
+ *     For example:
+ *         #define SMAA_RT_METRICS vec4(1.0 / 1280.0, 1.0 / 720.0, 1280.0, 720.0)
+ *         #define SMAA_GLSL_4
+ *         #define SMAA_PRESET_HIGH
+ *         #include "SMAA.h"
+ *
+ *     Note that SMAA_RT_METRICS doesn't need to be a macro, it can be a
+ *     uniform variable. The code is designed to minimize the impact of not
+ *     using a constant value, but it is still better to hardcode it.
+ *
+ *     Depending on how you encoded 'areaTex' and 'searchTex', you may have to
+ *     add (and customize) the following defines before including SMAA.h:
+ *          #define SMAA_AREATEX_SELECT(sample) sample.rg
+ *          #define SMAA_SEARCHTEX_SELECT(sample) sample.r
+ *
+ *     If your engine is already using porting macros, you can define
+ *     SMAA_CUSTOM_SL, and define the porting functions by yourself.
+ *
+ *  7. Then, you'll have to setup the passes as indicated in the scheme above.
+ *     You can take a look into SMAA.fx, to see how we did it for our demo.
+ *     Checkout the function wrappers, you may want to copy-paste them!
+ *
+ *  8. It's recommended to validate the produced |edgesTex| and |blendTex|.
+ *     You can use a screenshot from your engine to compare the |edgesTex|
+ *     and |blendTex| produced inside of the engine with the results obtained
+ *     with the reference demo.
+ *
+ *  9. After you get the last pass to work, it's time to optimize. You'll have
+ *     to initialize a stencil buffer in the first pass (discard is already in
+ *     the code), then mask execution by using it the second pass. The last
+ *     pass should be executed in all pixels.
+ *
+ *
+ * After this point you can choose to enable predicated thresholding,
+ * temporal supersampling and motion blur integration:
+ *
+ * a) If you want to use predicated thresholding, take a look into
+ *    SMAA_PREDICATION; you'll need to pass an extra texture in the edge
+ *    detection pass.
+ *
+ * b) If you want to enable temporal supersampling (SMAA T2x):
+ *
+ * 1. The first step is to render using subpixel jitters. I won't go into
+ *    detail, but it's as simple as moving each vertex position in the
+ *    vertex shader, you can check how we do it in our DX10 demo.
+ *
+ * 2. Then, you must setup the temporal resolve. You may want to take a look
+ *    into SMAAResolve for resolving 2x modes. After you get it working, you'll
+ *    probably see ghosting everywhere. But fear not, you can enable the
+ *    CryENGINE temporal reprojection by setting the SMAA_REPROJECTION macro.
+ *    Check out SMAA_DECODE_VELOCITY if your velocity buffer is encoded.
+ *
+ * 3. The next step is to apply SMAA to each subpixel jittered frame, just as
+ *    done for 1x.
+ *
+ * 4. At this point you should already have something usable, but for best
+ *    results the proper area textures must be set depending on current jitter.
+ *    For this, the parameter 'subsampleIndices' of
+ *    'SMAABlendingWeightCalculationPS' must be set as follows, for our T2x
+ *    mode:
+ *
+ *    @SUBSAMPLE_INDICES
+ *
+ *    | S# |  Camera Jitter   |  subsampleIndices    |
+ *    +----+------------------+---------------------+
+ *    |  0 |  ( 0.25, -0.25)  |  float4(1, 1, 1, 0)  |
+ *    |  1 |  (-0.25,  0.25)  |  float4(2, 2, 2, 0)  |
+ *
+ *    These jitter positions assume a bottom-to-top y axis. S# stands for the
+ *    sample number.
+ *
+ * More information about temporal supersampling here:
+ *    http://iryoku.com/aacourse/downloads/13-Anti-Aliasing-Methods-in-CryENGINE-3.pdf
+ *
+ * c) If you want to enable spatial multisampling (SMAA S2x):
+ *
+ * 1. The scene must be rendered using MSAA 2x. The MSAA 2x buffer must be
+ *    created with:
+ *      - DX10:     see below (*)
+ *      - DX10.1:   D3D10_STANDARD_MULTISAMPLE_PATTERN or
+ *      - DX11:     D3D11_STANDARD_MULTISAMPLE_PATTERN
+ *
+ *    This allows to ensure that the subsample order matches the table in
+ *    @SUBSAMPLE_INDICES.
+ *
+ *    (*) In the case of DX10, we refer the reader to:
+ *      - SMAA::detectMSAAOrder and
+ *      - SMAA::msaaReorder
+ *
+ *    These functions allow to match the standard multisample patterns by
+ *    detecting the subsample order for a specific GPU, and reordering
+ *    them appropriately.
+ *
+ * 2. A shader must be run to output each subsample into a separate buffer
+ *    (DX10 is required). You can use SMAASeparate for this purpose, or just do
+ *    it in an existing pass (for example, in the tone mapping pass, which has
+ *    the advantage of feeding tone mapped subsamples to SMAA, which will yield
+ *    better results).
+ *
+ * 3. The full SMAA 1x pipeline must be run for each separated buffer, storing
+ *    the results in the final buffer. The second run should alpha blend with
+ *    the existing final buffer using a blending factor of 0.5.
+ *    'subsampleIndices' must be adjusted as in the SMAA T2x case (see point
+ *    b).
+ *
+ * d) If you want to enable temporal supersampling on top of SMAA S2x
+ *    (which actually is SMAA 4x):
+ *
+ * 1. SMAA 4x consists on temporally jittering SMAA S2x, so the first step is
+ *    to calculate SMAA S2x for current frame. In this case, 'subsampleIndices'
+ *    must be set as follows:
+ *
+ *    | F# | S# |   Camera Jitter    |    Net Jitter     |   subsampleIndices   |
+ *    +----+----+--------------------+-------------------+----------------------+
+ *    |  0 |  0 |  ( 0.125,  0.125)  |  ( 0.375, -0.125) |  float4(5, 3, 1, 3)  |
+ *    |  0 |  1 |  ( 0.125,  0.125)  |  (-0.125,  0.375) |  float4(4, 6, 2, 3)  |
+ *    +----+----+--------------------+-------------------+----------------------+
+ *    |  1 |  2 |  (-0.125, -0.125)  |  ( 0.125, -0.375) |  float4(3, 5, 1, 4)  |
+ *    |  1 |  3 |  (-0.125, -0.125)  |  (-0.375,  0.125) |  float4(6, 4, 2, 4)  |
+ *
+ *    These jitter positions assume a bottom-to-top y axis. F# stands for the
+ *    frame number. S# stands for the sample number.
+ *
+ * 2. After calculating SMAA S2x for current frame (with the new subsample
+ *    indices), previous frame must be reprojected as in SMAA T2x mode (see
+ *    point b).
+ *
+ * e) If motion blur is used, you may want to do the edge detection pass
+ *    together with motion blur. This has two advantages:
+ *
+ * 1. Pixels under heavy motion can be omitted from the edge detection process.
+ *    For these pixels we can just store "no edge", as motion blur will take
+ *    care of them.
+ * 2. The center pixel tap is reused.
+ *
+ * Note that in this case depth testing should be used instead of stenciling,
+ * as we have to write all the pixels in the motion blur pass.
+ *
+ * That's it!
+ */
+
+//-----------------------------------------------------------------------------
+// SMAA Presets
+
+/**
+ * Note that if you use one of these presets, the following configuration
+ * macros will be ignored if set in the "Configurable Defines" section.
+ */
+
+#if defined(SMAA_PRESET_LOW)
+#define SMAA_THRESHOLD 0.15
+#define SMAA_MAX_SEARCH_STEPS 4
+#define SMAA_DISABLE_DIAG_DETECTION
+#define SMAA_DISABLE_CORNER_DETECTION
+#elif defined(SMAA_PRESET_MEDIUM)
+#define SMAA_THRESHOLD 0.1
+#define SMAA_MAX_SEARCH_STEPS 8
+#define SMAA_DISABLE_DIAG_DETECTION
+#define SMAA_DISABLE_CORNER_DETECTION
+#elif defined(SMAA_PRESET_HIGH)
+#define SMAA_THRESHOLD 0.1
+#define SMAA_MAX_SEARCH_STEPS 16
+#define SMAA_MAX_SEARCH_STEPS_DIAG 8
+#define SMAA_CORNER_ROUNDING 25
+#elif defined(SMAA_PRESET_ULTRA)
+#define SMAA_THRESHOLD 0.05
+#define SMAA_MAX_SEARCH_STEPS 32
+#define SMAA_MAX_SEARCH_STEPS_DIAG 16
+#define SMAA_CORNER_ROUNDING 25
+#endif
+
+//-----------------------------------------------------------------------------
+// Configurable Defines
+
+/**
+ * SMAA_THRESHOLD specifies the threshold or sensitivity to edges.
+ * Lowering this value you will be able to detect more edges at the expense of
+ * performance. 
+ *
+ * Range: [0, 0.5]
+ *   0.1 is a reasonable value, and allows to catch most visible edges.
+ *   0.05 is a rather overkill value, that allows to catch 'em all.
+ *
+ *   If temporal supersampling is used, 0.2 could be a reasonable value, as low
+ *   contrast edges are properly filtered by just 2x.
+ */
+#ifndef SMAA_THRESHOLD
+#define SMAA_THRESHOLD 0.1
+#endif
+
+/**
+ * SMAA_DEPTH_THRESHOLD specifies the threshold for depth edge detection.
+ * 
+ * Range: depends on the depth range of the scene.
+ */
+#ifndef SMAA_DEPTH_THRESHOLD
+#define SMAA_DEPTH_THRESHOLD (0.1 * SMAA_THRESHOLD)
+#endif
+
+/**
+ * SMAA_MAX_SEARCH_STEPS specifies the maximum steps performed in the
+ * horizontal/vertical pattern searches, at each side of the pixel.
+ *
+ * In number of pixels, it's actually the double. So the maximum line length
+ * perfectly handled by, for example 16, is 64 (by perfectly, we meant that
+ * longer lines won't look as good, but still antialiased).
+ *
+ * Range: [0, 112]
+ */
+#ifndef SMAA_MAX_SEARCH_STEPS
+#define SMAA_MAX_SEARCH_STEPS 16
+#endif
+
+/**
+ * SMAA_MAX_SEARCH_STEPS_DIAG specifies the maximum steps performed in the
+ * diagonal pattern searches, at each side of the pixel. In this case we jump
+ * one pixel at time, instead of two.
+ *
+ * Range: [0, 20]
+ *
+ * On high-end machines it is cheap (between a 0.8x and 0.9x slower for 16 
+ * steps), but it can have a significant impact on older machines.
+ *
+ * Define SMAA_DISABLE_DIAG_DETECTION to disable diagonal processing.
+ */
+#ifndef SMAA_MAX_SEARCH_STEPS_DIAG
+#define SMAA_MAX_SEARCH_STEPS_DIAG 8
+#endif
+
+/**
+ * SMAA_CORNER_ROUNDING specifies how much sharp corners will be rounded.
+ *
+ * Range: [0, 100]
+ *
+ * Define SMAA_DISABLE_CORNER_DETECTION to disable corner processing.
+ */
+#ifndef SMAA_CORNER_ROUNDING
+#define SMAA_CORNER_ROUNDING 25
+#endif
+
+/**
+ * If there is an neighbor edge that has SMAA_LOCAL_CONTRAST_FACTOR times
+ * bigger contrast than current edge, current edge will be discarded.
+ *
+ * This allows to eliminate spurious crossing edges, and is based on the fact
+ * that, if there is too much contrast in a direction, that will hide
+ * perceptually contrast in the other neighbors.
+ */
+#ifndef SMAA_LOCAL_CONTRAST_ADAPTATION_FACTOR
+#define SMAA_LOCAL_CONTRAST_ADAPTATION_FACTOR 2.0
+#endif
+
+/**
+ * Predicated thresholding allows to better preserve texture details and to
+ * improve performance, by decreasing the number of detected edges using an
+ * additional buffer like the light accumulation buffer, object ids or even the
+ * depth buffer (the depth buffer usage may be limited to indoor or short range
+ * scenes).
+ *
+ * It locally decreases the luma or color threshold if an edge is found in an
+ * additional buffer (so the global threshold can be higher).
+ *
+ * This method was developed by Playstation EDGE MLAA team, and used in 
+ * Killzone 3, by using the light accumulation buffer. More information here:
+ *     http://iryoku.com/aacourse/downloads/06-MLAA-on-PS3.pptx 
+ */
+#ifndef SMAA_PREDICATION
+#define SMAA_PREDICATION 0
+#endif
+
+/**
+ * Threshold to be used in the additional predication buffer. 
+ *
+ * Range: depends on the input, so you'll have to find the magic number that
+ * works for you.
+ */
+#ifndef SMAA_PREDICATION_THRESHOLD
+#define SMAA_PREDICATION_THRESHOLD 0.01
+#endif
+
+/**
+ * How much to scale the global threshold used for luma or color edge
+ * detection when using predication.
+ *
+ * Range: [1, 5]
+ */
+#ifndef SMAA_PREDICATION_SCALE
+#define SMAA_PREDICATION_SCALE 2.0
+#endif
+
+/**
+ * How much to locally decrease the threshold.
+ *
+ * Range: [0, 1]
+ */
+#ifndef SMAA_PREDICATION_STRENGTH
+#define SMAA_PREDICATION_STRENGTH 0.4
+#endif
+
+/**
+ * Temporal reprojection allows to remove ghosting artifacts when using
+ * temporal supersampling. We use the CryEngine 3 method which also introduces
+ * velocity weighting. This feature is of extreme importance for totally
+ * removing ghosting. More information here:
+ *    http://iryoku.com/aacourse/downloads/13-Anti-Aliasing-Methods-in-CryENGINE-3.pdf
+ *
+ * Note that you'll need to setup a velocity buffer for enabling reprojection.
+ * For static geometry, saving the previous depth buffer is a viable
+ * alternative.
+ */
+#ifndef SMAA_REPROJECTION
+#define SMAA_REPROJECTION 0
+#endif
+
+/**
+ * SMAA_REPROJECTION_WEIGHT_SCALE controls the velocity weighting. It allows to
+ * remove ghosting trails behind the moving object, which are not removed by
+ * just using reprojection. Using low values will exhibit ghosting, while using
+ * high values will disable temporal supersampling under motion.
+ *
+ * Behind the scenes, velocity weighting removes temporal supersampling when
+ * the velocity of the subsamples differs (meaning they are different objects).
+ *
+ * Range: [0, 80]
+ */
+#ifndef SMAA_REPROJECTION_WEIGHT_SCALE
+#define SMAA_REPROJECTION_WEIGHT_SCALE 30.0
+#endif
+
+/**
+ * On some compilers, discard cannot be used in vertex shaders. Thus, they need
+ * to be compiled separately.
+ */
+#ifndef SMAA_INCLUDE_VS
+#define SMAA_INCLUDE_VS 1
+#endif
+#ifndef SMAA_INCLUDE_PS
+#define SMAA_INCLUDE_PS 1
+#endif
+
+//-----------------------------------------------------------------------------
+// Texture Access Defines
+
+#ifndef SMAA_AREATEX_SELECT
+#if defined(SMAA_HLSL_3)
+#define SMAA_AREATEX_SELECT(sample) sample.ra
+#else
+#define SMAA_AREATEX_SELECT(sample) sample.rg
+#endif
+#endif
+
+#ifndef SMAA_SEARCHTEX_SELECT
+#define SMAA_SEARCHTEX_SELECT(sample) sample.r
+#endif
+
+#ifndef SMAA_DECODE_VELOCITY
+#define SMAA_DECODE_VELOCITY(sample) sample.rg
+#endif
+
+//-----------------------------------------------------------------------------
+// Non-Configurable Defines
+
+#define SMAA_AREATEX_MAX_DISTANCE 16
+#define SMAA_AREATEX_MAX_DISTANCE_DIAG 20
+#define SMAA_AREATEX_PIXEL_SIZE (1.0 / float2(160.0, 560.0))
+#define SMAA_AREATEX_SUBTEX_SIZE (1.0 / 7.0)
+#define SMAA_SEARCHTEX_SIZE float2(66.0, 33.0)
+#define SMAA_SEARCHTEX_PACKED_SIZE float2(64.0, 16.0)
+#define SMAA_CORNER_ROUNDING_NORM (float(SMAA_CORNER_ROUNDING) / 100.0)
+
+//-----------------------------------------------------------------------------
+// Porting Functions
+
+#if defined(SMAA_HLSL_3)
+#define SMAATexture2D(tex) sampler2D tex
+#define SMAATexturePass2D(tex) tex
+#define SMAASampleLevelZero(tex, coord) tex2Dlod(tex, float4(coord, 0.0, 0.0))
+#define SMAASampleLevelZeroPoint(tex, coord) tex2Dlod(tex, float4(coord, 0.0, 0.0))
+#define SMAASampleLevelZeroOffset(tex, coord, offset) tex2Dlod(tex, float4(coord + offset * SMAA_RT_METRICS.xy, 0.0, 0.0))
+#define SMAASample(tex, coord) tex2D(tex, coord)
+#define SMAASamplePoint(tex, coord) tex2D(tex, coord)
+#define SMAASampleOffset(tex, coord, offset) tex2D(tex, coord + offset * SMAA_RT_METRICS.xy)
+#define SMAA_FLATTEN [flatten]
+#define SMAA_BRANCH [branch]
+#endif
+#if defined(SMAA_HLSL_4) || defined(SMAA_HLSL_4_1)
+SamplerState LinearSampler { Filter = MIN_MAG_LINEAR_MIP_POINT; AddressU = Clamp; AddressV = Clamp; };
+SamplerState PointSampler { Filter = MIN_MAG_MIP_POINT; AddressU = Clamp; AddressV = Clamp; };
+#define SMAATexture2D(tex) Texture2D tex
+#define SMAATexturePass2D(tex) tex
+#define SMAASampleLevelZero(tex, coord) tex.SampleLevel(LinearSampler, coord, 0)
+#define SMAASampleLevelZeroPoint(tex, coord) tex.SampleLevel(PointSampler, coord, 0)
+#define SMAASampleLevelZeroOffset(tex, coord, offset) tex.SampleLevel(LinearSampler, coord, 0, offset)
+#define SMAASample(tex, coord) tex.Sample(LinearSampler, coord)
+#define SMAASamplePoint(tex, coord) tex.Sample(PointSampler, coord)
+#define SMAASampleOffset(tex, coord, offset) tex.Sample(LinearSampler, coord, offset)
+#define SMAA_FLATTEN [flatten]
+#define SMAA_BRANCH [branch]
+#define SMAATexture2DMS2(tex) Texture2DMS<float4, 2> tex
+#define SMAALoad(tex, pos, sample) tex.Load(pos, sample)
+#if defined(SMAA_HLSL_4_1)
+#define SMAAGather(tex, coord) tex.Gather(LinearSampler, coord, 0)
+#endif
+#endif
+#if defined(SMAA_GLSL_3) || defined(SMAA_GLSL_4)
+#define SMAATexture2D(tex) sampler2D tex
+#define SMAATexturePass2D(tex) tex
+#define SMAASampleLevelZero(tex, coord) textureLod(tex, coord, 0.0)
+#define SMAASampleLevelZeroPoint(tex, coord) textureLod(tex, coord, 0.0)
+#define SMAASampleLevelZeroOffset(tex, coord, offset) textureLodOffset(tex, coord, 0.0, offset)
+#define SMAASample(tex, coord) texture(tex, coord)
+#define SMAASamplePoint(tex, coord) texture(tex, coord)
+#define SMAASampleOffset(tex, coord, offset) texture(tex, coord, offset)
+#define SMAA_FLATTEN  
+#define SMAA_BRANCH
+#define lerp(a, b, t) mix(a, b, t)
+#define saturate(a) clamp(a, 0.0, 1.0)
+#if defined(SMAA_GLSL_4)
+#define mad(a, b, c) fma(a, b, c)
+#define SMAAGather(tex, coord) textureGather(tex, coord)
+#else
+#define mad(a, b, c) (a * b + c)
+#endif
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+#define int2 ivec2
+#define int3 ivec3
+#define int4 ivec4
+#define bool2 bvec2
+#define bool3 bvec3
+#define bool4 bvec4
+#endif
+
+#define SMAA_GLSL_4
+#define SMAA_PRESET_MEDIUM
+//#include "SMAA.h"
+#define SMAA_RT_METRICS vec4(1.0 / 1280.0, 1.0 / 720.0, 1280.0, 720.0)
+
+#if !defined(SMAA_HLSL_3) && !defined(SMAA_HLSL_4) && !defined(SMAA_HLSL_4_1) && !defined(SMAA_GLSL_3) && !defined(SMAA_GLSL_4) && !defined(SMAA_CUSTOM_SL)
+#error you must define the shading language: SMAA_HLSL_*, SMAA_GLSL_* or SMAA_CUSTOM_SL
+#endif
+
+/**
+ * Gathers current pixel, and the top-left neighbors.
+ */
+ vec3 SMAAGatherNeighbours(vec2 coord, vec4 offset[3], sampler2D tex)
+ {
+	float P = texture(tex, coord).r;
+	float Pleft = texture(tex, offset[0].xy).r;
+	float Ptop = texture(tex, offset[0].zw).r;
+	return vec3(P, Pleft, Ptop);
+ }
+
+/**
+ * Adjusts the threshold by means of predication.
+ */
+ vec3 SMAACalculatePredicatedThreshold(vec2 coord, vec4 offset[3], sampler2D predicationTex)
+ {
+	vec3 neighbours = SMAAGatherNeighbours(coord, offset, predicationTex);
+	vec2 delta = abs(neighbours.xx - neighbours.yz);
+	vec2 edges = step(SMAA_PREDICATION_THRESHOLD, delta);
+	return SMAA_PREDICATION_SCALE * SMAA_THRESHOLD * (1.0 - SMAA_PREDICATION_STRENGTH * edges); 
+ }
+
+/**
+ * Conditional move:
+ */
+ void SMAAMovc(bvec2 cond, inout vec2 variable, vec2 value) {
+    if (cond.x) variable.x = value.x;
+    if (cond.y) variable.y = value.y;
+}
+ 
+void SMAAMovc(bvec4 cond, inout vec4 variable, vec4 value) {
+    SMAAMovc(cond.xy, variable.xy, value.xy);
+    SMAAMovc(cond.zw, variable.zw, value.zw);
+}
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global.MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stage fragment
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+
+void main()
+{
+   FragColor = vec4(texture(Source, vTexCoord).rgb, 1.0);
+}
--- a/anti-aliasing/shaders/smaa/smaa-blend-weight-calculation.slang
+++ b/anti-aliasing/shaders/smaa/smaa-blend-weight-calculation.slang
@ -0,0 +1,469 @@
+#version 450
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+#include "smaa-common.h"
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 texcoord;
+layout(location = 1) out vec2 pixcoord;
+layout(location = 2) out vec4 offset[3];
+
+void main()
+{
+   gl_Position = global.MVP * Position;
+   texcoord = TexCoord;
+   
+    pixcoord = texcoord * SMAA_RT_METRICS.zw;
+
+    // We will use these offsets for the searches later on (see @PSEUDO_GATHER4):
+    offset[0] = fma(SMAA_RT_METRICS.xyxy, vec4(-0.25, -0.125,  1.25, -0.125), texcoord.xyxy);
+    offset[1] = fma(SMAA_RT_METRICS.xyxy, vec4(-0.125, -0.25, -0.125,  1.25), texcoord.xyxy);
+
+    // And these for the searches, they indicate the ends of the loops:
+    offset[2] = fma(SMAA_RT_METRICS.xxyy,
+                    vec4(-2.0, 2.0, -2.0, 2.0) * float(SMAA_MAX_SEARCH_STEPS),
+                    vec4(offset[0].xz, offset[1].yw));
+}
+
+#pragma stage fragment
+layout(location = 0) in vec2 texcoord;
+layout(location = 1) in vec2 pixcoord;
+layout(location = 2) in vec4 offset[3];
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+layout(set = 0, binding = 3) uniform sampler2D areaTex;
+layout(set = 0, binding = 4) uniform sampler2D searchTex;
+
+//-----------------------------------------------------------------------------
+// Blending Weight Calculation Pixel Shader (Second Pass)
+
+/**
+ * Allows to decode two binary values from a bilinear-filtered access.
+ */
+vec2 SMAADecodeDiagBilinearAccess(vec2 e) {
+    // Bilinear access for fetching 'e' have a 0.25 offset, and we are
+    // interested in the R and G edges:
+    //
+    // +---G---+-------+
+    // |   x o R   x   |
+    // +-------+-------+
+    //
+    // Then, if one of these edge is enabled:
+    //   Red:   (0.75 * X + 0.25 * 1) => 0.25 or 1.0
+    //   Green: (0.75 * 1 + 0.25 * X) => 0.75 or 1.0
+    //
+    // This function will unpack the values (mad + mul + round):
+    // wolframalpha.com: round(x * abs(5 * x - 5 * 0.75)) plot 0 to 1
+    e.r = e.r * abs(5.0 * e.r - 5.0 * 0.75);
+    return round(e);
+}
+
+vec4 SMAADecodeDiagBilinearAccess(vec4 e) {
+    e.rb = e.rb * abs(5.0 * e.rb - 5.0 * 0.75);
+    return round(e);
+}
+
+/**
+ * These functions allows to perform diagonal pattern searches.
+ */
+vec2 SMAASearchDiag1(sampler2D edgesTex, vec2 texcoord, vec2 dir, out vec2 e) {
+    vec4 coord = vec4(texcoord, -1.0, 1.0);
+    vec3 t = vec3(SMAA_RT_METRICS.xy, 1.0);
+    while (coord.z < float(SMAA_MAX_SEARCH_STEPS_DIAG - 1) &&
+           coord.w > 0.9) {
+        coord.xyz = fma(t, vec3(dir, 1.0), coord.xyz);
+        e = textureLod(edgesTex, coord.xy, 0.0).rg;
+        coord.w = dot(e, vec2(0.5, 0.5));
+    }
+    return coord.zw;
+}
+
+vec2 SMAASearchDiag2(sampler2D edgesTex, vec2 texcoord, vec2 dir, out vec2 e) {
+    vec4 coord = vec4(texcoord, -1.0, 1.0);
+    coord.x += 0.25 * SMAA_RT_METRICS.x; // See @SearchDiag2Optimization
+    vec3 t = vec3(SMAA_RT_METRICS.xy, 1.0);
+    while (coord.z < float(SMAA_MAX_SEARCH_STEPS_DIAG - 1) &&
+           coord.w > 0.9) {
+        coord.xyz = fma(t, vec3(dir, 1.0), coord.xyz);
+
+        // @SearchDiag2Optimization
+        // Fetch both edges at once using bilinear filtering:
+        e = textureLod(edgesTex, coord.xy, 0.0).rg;
+        e = SMAADecodeDiagBilinearAccess(e);
+
+        // Non-optimized version:
+        // e.g = textureLod(edgesTex, coord.xy, 0.0).g;
+        // e.r = textureLod(edgesTex, coord.xy, ivec2(1, 0)).r;
+
+        coord.w = dot(e, vec2(0.5, 0.5));
+    }
+    return coord.zw;
+}
+
+/** 
+ * Similar to SMAAArea, this calculates the area corresponding to a certain
+ * diagonal distance and crossing edges 'e'.
+ */
+vec2 SMAAAreaDiag(sampler2D areaTex, vec2 dist, vec2 e, float offset) {
+    vec2 texcoord = fma(vec2(SMAA_AREATEX_MAX_DISTANCE_DIAG, SMAA_AREATEX_MAX_DISTANCE_DIAG), e, dist);
+
+    // We do a scale and bias for mapping to texel space:
+    texcoord = fma(SMAA_AREATEX_PIXEL_SIZE, texcoord, 0.5 * SMAA_AREATEX_PIXEL_SIZE);
+
+    // Diagonal areas are on the second half of the texture:
+    texcoord.x += 0.5;
+
+    // Move to proper place, according to the subpixel offset:
+    texcoord.y += SMAA_AREATEX_SUBTEX_SIZE * offset;
+
+    // Do it!
+    return SMAA_AREATEX_SELECT(textureLod(areaTex, texcoord, 0.0));
+}
+
+/**
+ * This searches for diagonal patterns and returns the corresponding weights.
+ */
+vec2 SMAACalculateDiagWeights(sampler2D edgesTex, sampler2D areaTex, vec2 texcoord, vec2 e, vec4 subsampleIndices) {
+    vec2 weights = vec2(0.0, 0.0);
+
+    // Search for the line ends:
+    vec4 d;
+    vec2 end;
+    if (e.r > 0.0) {
+        d.xz = SMAASearchDiag1(edgesTex, texcoord, vec2(-1.0,  1.0), end);
+        d.x += float(end.y > 0.9);
+    } else
+        d.xz = vec2(0.0, 0.0);
+    d.yw = SMAASearchDiag1(edgesTex, texcoord, vec2(1.0, -1.0), end);
+
+//    SMAA_BRANCH
+    if (d.x + d.y > 2.0) { // d.x + d.y + 1 > 3
+        // Fetch the crossing edges:
+        vec4 coords = fma(vec4(-d.x + 0.25, d.x, d.y, -d.y - 0.25), SMAA_RT_METRICS.xyxy, texcoord.xyxy);
+        vec4 c;
+        c.xy = textureLodOffset(edgesTex, coords.xy, 0.0, ivec2(-1,  0)).rg;
+        c.zw = textureLodOffset(edgesTex, coords.zw, 0.0, ivec2( 1,  0)).rg;
+        c.yxwz = SMAADecodeDiagBilinearAccess(c.xyzw);
+
+        // Non-optimized version:
+        // vec4 coords = fma(vec4(-d.x, d.x, d.y, -d.y), SMAA_RT_METRICS.xyxy, texcoord.xyxy);
+        // vec4 c;
+        // c.x = textureLodOffset(edgesTex, coords.xy, 0.0, ivec2(-1,  0)).g;
+        // c.y = textureLodOffset(edgesTex, coords.xy, 0.0, ivec2( 0,  0)).r;
+        // c.z = textureLodOffset(edgesTex, coords.zw, 0.0, ivec2( 1,  0)).g;
+        // c.w = textureLodOffset(edgesTex, coords.zw, 0.0, ivec2( 1, -1)).r;
+
+        // Merge crossing edges at each side into a single value:
+        vec2 cc = fma(vec2(2.0, 2.0), c.xz, c.yw);
+
+        // Remove the crossing edge if we didn't found the end of the line:
+        SMAAMovc(bvec2(step(0.9, d.zw)), cc, vec2(0.0, 0.0));
+
+        // Fetch the areas for this line:
+        weights += SMAAAreaDiag(areaTex, d.xy, cc, subsampleIndices.z);
+    }
+
+    // Search for the line ends:
+    d.xz = SMAASearchDiag2(edgesTex, texcoord, vec2(-1.0, -1.0), end);
+    if (textureLodOffset(edgesTex, texcoord, 0.0, ivec2(1, 0)).r > 0.0) {
+        d.yw = SMAASearchDiag2(edgesTex, texcoord, vec2(1.0, 1.0), end);
+        d.y += float(end.y > 0.9);
+    } else
+        d.yw = vec2(0.0, 0.0);
+
+//    SMAA_BRANCH
+    if (d.x + d.y > 2.0) { // d.x + d.y + 1 > 3
+        // Fetch the crossing edges:
+        vec4 coords = fma(vec4(-d.x, -d.x, d.y, d.y), SMAA_RT_METRICS.xyxy, texcoord.xyxy);
+        vec4 c;
+        c.x  = textureLodOffset(edgesTex, coords.xy, 0.0, ivec2(-1,  0)).g;
+        c.y  = textureLodOffset(edgesTex, coords.xy, 0.0, ivec2( 0, -1)).r;
+        c.zw = textureLodOffset(edgesTex, coords.zw, 0.0, ivec2( 1,  0)).gr;
+        vec2 cc = fma(vec2(2.0, 2.0), c.xz, c.yw);
+
+        // Remove the crossing edge if we didn't found the end of the line:
+        SMAAMovc(bvec2(step(0.9, d.zw)), cc, vec2(0.0, 0.0));
+
+        // Fetch the areas for this line:
+        weights += SMAAAreaDiag(areaTex, d.xy, cc, subsampleIndices.w).gr;
+    }
+
+    return weights;
+}
+
+//-----------------------------------------------------------------------------
+// Horizontal/Vertical Search Functions
+
+/**
+ * This allows to determine how much length should we add in the last step
+ * of the searches. It takes the bilinearly interpolated edge (see 
+ * @PSEUDO_GATHER4), and adds 0, 1 or 2, depending on which edges and
+ * crossing edges are active.
+ */
+float SMAASearchLength(sampler2D searchTex, vec2 e, float offset) {
+    // The texture is flipped vertically, with left and right cases taking half
+    // of the space horizontally:
+    vec2 scale = SMAA_SEARCHTEX_SIZE * vec2(0.5, -1.0);
+    vec2 bias = SMAA_SEARCHTEX_SIZE * vec2(offset, 1.0);
+
+    // Scale and bias to access texel centers:
+    scale += vec2(-1.0,  1.0);
+    bias  += vec2( 0.5, -0.5);
+
+    // Convert from pixel coordinates to texcoords:
+    // (We use SMAA_SEARCHTEX_PACKED_SIZE because the texture is cropped)
+    scale *= 1.0 / SMAA_SEARCHTEX_PACKED_SIZE;
+    bias *= 1.0 / SMAA_SEARCHTEX_PACKED_SIZE;
+
+    // Lookup the search texture:
+    return SMAA_SEARCHTEX_SELECT(textureLod(searchTex, fma(scale, e, bias), 0.0));
+}
+
+/**
+ * Horizontal/vertical search functions for the 2nd pass.
+ */
+float SMAASearchXLeft(sampler2D edgesTex, sampler2D searchTex, vec2 texcoord, float end) {
+    /**
+     * @PSEUDO_GATHER4
+     * This texcoord has been offset by (-0.25, -0.125) in the vertex shader to
+     * sample between edge, thus fetching four edges in a row.
+     * Sampling with different offsets in each direction allows to disambiguate
+     * which edges are active from the four fetched ones.
+     */
+    vec2 e = vec2(0.0, 1.0);
+    while (texcoord.x > end && 
+           e.g > 0.8281 && // Is there some edge not activated?
+           e.r == 0.0) { // Or is there a crossing edge that breaks the line?
+        e = textureLod(edgesTex, texcoord, 0.0).rg;
+        texcoord = fma(-vec2(2.0, 0.0), SMAA_RT_METRICS.xy, texcoord);
+    }
+
+    float offset = fma(-(255.0 / 127.0), SMAASearchLength(searchTex, e, 0.0), 3.25);
+    return fma(SMAA_RT_METRICS.x, offset, texcoord.x);
+
+    // Non-optimized version:
+    // We correct the previous (-0.25, -0.125) offset we applied:
+    // texcoord.x += 0.25 * SMAA_RT_METRICS.x;
+
+    // The searches are bias by 1, so adjust the coords accordingly:
+    // texcoord.x += SMAA_RT_METRICS.x;
+
+    // Disambiguate the length added by the last step:
+    // texcoord.x += 2.0 * SMAA_RT_METRICS.x; // Undo last step
+    // texcoord.x -= SMAA_RT_METRICS.x * (255.0 / 127.0) * SMAASearchLength(searchTex, e, 0.0);
+    // return fma(SMAA_RT_METRICS.x, offset, texcoord.x);
+}
+
+float SMAASearchXRight(sampler2D edgesTex, sampler2D searchTex, vec2 texcoord, float end) {
+    vec2 e = vec2(0.0, 1.0);
+    while (texcoord.x < end && 
+           e.g > 0.8281 && // Is there some edge not activated?
+           e.r == 0.0) { // Or is there a crossing edge that breaks the line?
+        e = textureLod(edgesTex, texcoord, 0.0).rg;
+        texcoord = fma(vec2(2.0, 0.0), SMAA_RT_METRICS.xy, texcoord);
+    }
+    float offset = fma(-(255.0 / 127.0), SMAASearchLength(searchTex, e, 0.5), 3.25);
+    return fma(-SMAA_RT_METRICS.x, offset, texcoord.x);
+}
+
+float SMAASearchYUp(sampler2D edgesTex, sampler2D searchTex, vec2 texcoord, float end) {
+    vec2 e = vec2(1.0, 0.0);
+    while (texcoord.y > end && 
+           e.r > 0.8281 && // Is there some edge not activated?
+           e.g == 0.0) { // Or is there a crossing edge that breaks the line?
+        e = textureLod(edgesTex, texcoord, 0.0).rg;
+        texcoord = fma(-vec2(0.0, 2.0), SMAA_RT_METRICS.xy, texcoord);
+    }
+    float offset = fma(-(255.0 / 127.0), SMAASearchLength(searchTex, e.gr, 0.0), 3.25);
+    return fma(SMAA_RT_METRICS.y, offset, texcoord.y);
+}
+
+float SMAASearchYDown(sampler2D edgesTex, sampler2D searchTex, vec2 texcoord, float end) {
+    vec2 e = vec2(1.0, 0.0);
+    while (texcoord.y < end && 
+           e.r > 0.8281 && // Is there some edge not activated?
+           e.g == 0.0) { // Or is there a crossing edge that breaks the line?
+        e = textureLod(edgesTex, texcoord, 0.0).rg;
+        texcoord = fma(vec2(0.0, 2.0), SMAA_RT_METRICS.xy, texcoord);
+    }
+    float offset = fma(-(255.0 / 127.0), SMAASearchLength(searchTex, e.gr, 0.5), 3.25);
+    return fma(-SMAA_RT_METRICS.y, offset, texcoord.y);
+}
+
+/** 
+ * Ok, we have the distance and both crossing edges. So, what are the areas
+ * at each side of current edge?
+ */
+vec2 SMAAArea(sampler2D areaTex, vec2 dist, float e1, float e2, float offset) {
+    // Rounding prevents precision errors of bilinear filtering:
+    vec2 texcoord = fma(vec2(SMAA_AREATEX_MAX_DISTANCE, SMAA_AREATEX_MAX_DISTANCE), round(4.0 * vec2(e1, e2)), dist);
+    
+    // We do a scale and bias for mapping to texel space:
+    texcoord = fma(SMAA_AREATEX_PIXEL_SIZE, texcoord, 0.5 * SMAA_AREATEX_PIXEL_SIZE);
+
+    // Move to proper place, according to the subpixel offset:
+    texcoord.y = fma(SMAA_AREATEX_SUBTEX_SIZE, offset, texcoord.y);
+
+    // Do it!
+    return SMAA_AREATEX_SELECT(textureLod(areaTex, texcoord, 0.0));
+}
+
+//-----------------------------------------------------------------------------
+// Corner Detection Functions
+
+void SMAADetectHorizontalCornerPattern(sampler2D edgesTex, inout vec2 weights, vec4 texcoord, vec2 d) {
+    #if !defined(SMAA_DISABLE_CORNER_DETECTION)
+    vec2 leftRight = step(d.xy, d.yx);
+    vec2 rounding = (1.0 - SMAA_CORNER_ROUNDING_NORM) * leftRight;
+
+    rounding /= leftRight.x + leftRight.y; // Reduce blending for pixels in the center of a line.
+
+    vec2 factor = vec2(1.0, 1.0);
+    factor.x -= rounding.x * textureLodOffset(edgesTex, texcoord.xy, 0.0, ivec2(0,  1)).r;
+    factor.x -= rounding.y * textureLodOffset(edgesTex, texcoord.zw, 0.0, ivec2(1,  1)).r;
+    factor.y -= rounding.x * textureLodOffset(edgesTex, texcoord.xy, 0.0, ivec2(0, -2)).r;
+    factor.y -= rounding.y * textureLodOffset(edgesTex, texcoord.zw, 0.0, ivec2(1, -2)).r;
+
+    weights *= clamp(factor, 0.0, 1.0);
+    #endif
+}
+
+void SMAADetectVerticalCornerPattern(sampler2D edgesTex, inout vec2 weights, vec4 texcoord, vec2 d) {
+    #if !defined(SMAA_DISABLE_CORNER_DETECTION)
+    vec2 leftRight = step(d.xy, d.yx);
+    vec2 rounding = (1.0 - SMAA_CORNER_ROUNDING_NORM) * leftRight;
+
+    rounding /= leftRight.x + leftRight.y;
+
+    vec2 factor = vec2(1.0, 1.0);
+    factor.x -= rounding.x * textureLodOffset(edgesTex, texcoord.xy, 0.0, ivec2( 1, 0)).g;
+    factor.x -= rounding.y * textureLodOffset(edgesTex, texcoord.zw, 0.0, ivec2( 1, 1)).g;
+    factor.y -= rounding.x * textureLodOffset(edgesTex, texcoord.xy, 0.0, ivec2(-2, 0)).g;
+    factor.y -= rounding.y * textureLodOffset(edgesTex, texcoord.zw, 0.0, ivec2(-2, 1)).g;
+
+    weights *= clamp(factor, 0.0, 1.0);
+    #endif
+}
+
+vec4 SMAABlendingWeightCalculationPS(vec2 texcoord,
+                                       vec2 pixcoord,
+                                       vec4 offset[3],
+                                       sampler2D edgesTex,
+                                       sampler2D areaTex,
+                                       sampler2D searchTex,
+                                       vec4 subsampleIndices) { // Just pass zero for SMAA 1x, see @SUBSAMPLE_INDICES.
+    vec4 weights = vec4(0.0, 0.0, 0.0, 0.0);
+
+    vec2 e = texture(edgesTex, texcoord).rg;
+
+//    SMAA_BRANCH
+    if (e.g > 0.0) { // Edge at north
+        #if !defined(SMAA_DISABLE_DIAG_DETECTION)
+        // Diagonals have both north and west edges, so searching for them in
+        // one of the boundaries is enough.
+        weights.rg = SMAACalculateDiagWeights(edgesTex, areaTex, texcoord, e, subsampleIndices);
+
+        // We give priority to diagonals, so if we find a diagonal we skip 
+        // horizontal/vertical processing.
+//        SMAA_BRANCH
+        if (weights.r == -weights.g) { // weights.r + weights.g == 0.0
+        #endif
+
+        vec2 d;
+
+        // Find the distance to the left:
+        vec3 coords;
+        coords.x = SMAASearchXLeft(edgesTex, searchTex, offset[0].xy, offset[2].x);
+        coords.y = offset[1].y; // offset[1].y = texcoord.y - 0.25 * SMAA_RT_METRICS.y (@CROSSING_OFFSET)
+        d.x = coords.x;
+
+        // Now fetch the left crossing edges, two at a time using bilinear
+        // filtering. Sampling at -0.25 (see @CROSSING_OFFSET) enables to
+        // discern what value each edge has:
+        float e1 = textureLod(edgesTex, coords.xy, 0.0).r;
+
+        // Find the distance to the right:
+        coords.z = SMAASearchXRight(edgesTex, searchTex, offset[0].zw, offset[2].y);
+        d.y = coords.z;
+
+        // We want the distances to be in pixel units (doing this here allow to
+        // better interleave arithmetic and memory accesses):
+        d = abs(round(fma(SMAA_RT_METRICS.zz, d, -pixcoord.xx)));
+
+        // SMAAArea below needs a sqrt, as the areas texture is compressed
+        // quadratically:
+        vec2 sqrt_d = sqrt(d);
+
+        // Fetch the right crossing edges:
+        float e2 = textureLodOffset(edgesTex, coords.zy, 0.0, ivec2(1, 0)).r;
+
+        // Ok, we know how this pattern looks like, now it is time for getting
+        // the actual area:
+        weights.rg = SMAAArea(areaTex, sqrt_d, e1, e2, subsampleIndices.y);
+
+        // Fix corners:
+        coords.y = texcoord.y;
+        SMAADetectHorizontalCornerPattern(edgesTex, weights.rg, coords.xyzy, d);
+
+        #if !defined(SMAA_DISABLE_DIAG_DETECTION)
+        } else
+            e.r = 0.0; // Skip vertical processing.
+        #endif
+    }
+
+//    SMAA_BRANCH
+    if (e.r > 0.0) { // Edge at west
+        vec2 d;
+
+        // Find the distance to the top:
+        vec3 coords;
+        coords.y = SMAASearchYUp(edgesTex, searchTex, offset[1].xy, offset[2].z);
+        coords.x = offset[0].x; // offset[1].x = texcoord.x - 0.25 * SMAA_RT_METRICS.x;
+        d.x = coords.y;
+
+        // Fetch the top crossing edges:
+        float e1 = textureLod(edgesTex, coords.xy, 0.0).g;
+
+        // Find the distance to the bottom:
+        coords.z = SMAASearchYDown(edgesTex, searchTex, offset[1].zw, offset[2].w);
+        d.y = coords.z;
+
+        // We want the distances to be in pixel units:
+        d = abs(round(fma(SMAA_RT_METRICS.ww, d, -pixcoord.yy)));
+
+        // SMAAArea below needs a sqrt, as the areas texture is compressed 
+        // quadratically:
+        vec2 sqrt_d = sqrt(d);
+
+        // Fetch the bottom crossing edges:
+        float e2 = textureLodOffset(edgesTex, coords.xz, 0.0, ivec2(0, 1)).g;
+
+        // Get the area for this direction:
+        weights.ba = SMAAArea(areaTex, sqrt_d, e1, e2, subsampleIndices.x);
+
+        // Fix corners:
+        coords.x = texcoord.x;
+        SMAADetectVerticalCornerPattern(edgesTex, weights.ba, coords.xyxz, d);
+    }
+
+    return weights;
+}
+
+void main()
+{
+   FragColor = SMAABlendingWeightCalculationPS(texcoord, pixcoord, offset, Source, areaTex, searchTex, vec4(0.0));
+}
--- a/anti-aliasing/shaders/smaa/smaa-common.h
+++ b/anti-aliasing/shaders/smaa/smaa-common.h
@ -0,0 +1,631 @@
+/**
+ * Copyright (C) 2013 Jorge Jimenez (jorge@iryoku.com)
+ * Copyright (C) 2013 Jose I. Echevarria (joseignacioechevarria@gmail.com)
+ * Copyright (C) 2013 Belen Masia (bmasia@unizar.es)
+ * Copyright (C) 2013 Fernando Navarro (fernandn@microsoft.com)
+ * Copyright (C) 2013 Diego Gutierrez (diegog@unizar.es)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to
+ * do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software. As clarification, there
+ * is no requirement that the copyright notice and permission be included in
+ * binary distributions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+ #define SMAA_GLSL_4
+#define SMAA_PRESET_HIGH
+//#include "SMAA.h"
+#define SMAA_RT_METRICS vec4(params.SourceSize.z, params.SourceSize.w, params.SourceSize.x, params.SourceSize.y)
+
+/**
+ *                  _______  ___  ___       ___           ___
+ *                 /       ||   \/   |     /   \         /   \
+ *                |   (---- |  \  /  |    /  ^  \       /  ^  \
+ *                 \   \    |  |\/|  |   /  /_\  \     /  /_\  \
+ *              ----)   |   |  |  |  |  /  _____  \   /  _____  \
+ *             |_______/    |__|  |__| /__/     \__\ /__/     \__\
+ * 
+ *                               E N H A N C E D
+ *       S U B P I X E L   M O R P H O L O G I C A L   A N T I A L I A S I N G
+ *
+ *                         http://www.iryoku.com/smaa/
+ *
+ * Hi, welcome aboard!
+ * 
+ * Here you'll find instructions to get the shader up and running as fast as
+ * possible.
+ *
+ * IMPORTANTE NOTICE: when updating, remember to update both this file and the
+ * precomputed textures! They may change from version to version.
+ *
+ * The shader has three passes, chained together as follows:
+ *
+ *                           |input|------------------?
+ *                              v                     |
+ *                    [ SMAA*EdgeDetection ]          |
+ *                              v                     |
+ *                          |edgesTex|                |
+ *                              v                     |
+ *              [ SMAABlendingWeightCalculation ]     |
+ *                              v                     |
+ *                          |blendTex|                |
+ *                              v                     |
+ *                [ SMAANeighborhoodBlending ] <------?
+ *                              v
+ *                           |output|
+ *
+ * Note that each [pass] has its own vertex and pixel shader. Remember to use
+ * oversized triangles instead of quads to avoid overshading along the
+ * diagonal.
+ *
+ * You've three edge detection methods to choose from: luma, color or depth.
+ * They represent different quality/performance and anti-aliasing/sharpness
+ * tradeoffs, so our recommendation is for you to choose the one that best
+ * suits your particular scenario:
+ *
+ * - Depth edge detection is usually the fastest but it may miss some edges.
+ *
+ * - Luma edge detection is usually more expensive than depth edge detection,
+ *   but catches visible edges that depth edge detection can miss.
+ *
+ * - Color edge detection is usually the most expensive one but catches
+ *   chroma-only edges.
+ *
+ * For quickstarters: just use luma edge detection.
+ *
+ * The general advice is to not rush the integration process and ensure each
+ * step is done correctly (don't try to integrate SMAA T2x with predicated edge
+ * detection from the start!). Ok then, let's go!
+ *
+ *  1. The first step is to create two RGBA temporal render targets for holding
+ *     |edgesTex| and |blendTex|.
+ *
+ *     In DX10 or DX11, you can use a RG render target for the edges texture.
+ *     In the case of NVIDIA GPUs, using RG render targets seems to actually be
+ *     slower.
+ *
+ *     On the Xbox 360, you can use the same render target for resolving both
+ *     |edgesTex| and |blendTex|, as they aren't needed simultaneously.
+ *
+ *  2. Both temporal render targets |edgesTex| and |blendTex| must be cleared
+ *     each frame. Do not forget to clear the alpha channel!
+ *
+ *  3. The next step is loading the two supporting precalculated textures,
+ *     'areaTex' and 'searchTex'. You'll find them in the 'Textures' folder as
+ *     C++ headers, and also as regular DDS files. They'll be needed for the
+ *     'SMAABlendingWeightCalculation' pass.
+ *
+ *     If you use the C++ headers, be sure to load them in the format specified
+ *     inside of them.
+ *
+ *     You can also compress 'areaTex' and 'searchTex' using BC5 and BC4
+ *     respectively, if you have that option in your content processor pipeline.
+ *     When compressing then, you get a non-perceptible quality decrease, and a
+ *     marginal performance increase.
+ *
+ *  4. All samplers must be set to linear filtering and clamp.
+ *
+ *     After you get the technique working, remember that 64-bit inputs have
+ *     half-rate linear filtering on GCN.
+ *
+ *     If SMAA is applied to 64-bit color buffers, switching to point filtering
+ *     when accesing them will increase the performance. Search for
+ *     'SMAASamplePoint' to see which textures may benefit from point
+ *     filtering, and where (which is basically the color input in the edge
+ *     detection and resolve passes).
+ *
+ *  5. All texture reads and buffer writes must be non-sRGB, with the exception
+ *     of the input read and the output write in
+ *     'SMAANeighborhoodBlending' (and only in this pass!). If sRGB reads in
+ *     this last pass are not possible, the technique will work anyway, but
+ *     will perform antialiasing in gamma space.
+ *
+ *     IMPORTANT: for best results the input read for the color/luma edge 
+ *     detection should *NOT* be sRGB.
+ *
+ *  6. Before including SMAA.h you'll have to setup the render target metrics,
+ *     the target and any optional configuration defines. Optionally you can
+ *     use a preset.
+ *
+ *     You have the following targets available: 
+ *         SMAA_HLSL_3
+ *         SMAA_HLSL_4
+ *         SMAA_HLSL_4_1
+ *         SMAA_GLSL_3 *
+ *         SMAA_GLSL_4 *
+ *
+ *         * (See SMAA_INCLUDE_VS and SMAA_INCLUDE_PS below).
+ *
+ *     And four presets:
+ *         SMAA_PRESET_LOW          (%60 of the quality)
+ *         SMAA_PRESET_MEDIUM       (%80 of the quality)
+ *         SMAA_PRESET_HIGH         (%95 of the quality)
+ *         SMAA_PRESET_ULTRA        (%99 of the quality)
+ *
+ *     For example:
+ *         #define SMAA_RT_METRICS vec4(1.0 / 1280.0, 1.0 / 720.0, 1280.0, 720.0)
+ *         #define SMAA_GLSL_4
+ *         #define SMAA_PRESET_HIGH
+ *         #include "SMAA.h"
+ *
+ *     Note that SMAA_RT_METRICS doesn't need to be a macro, it can be a
+ *     uniform variable. The code is designed to minimize the impact of not
+ *     using a constant value, but it is still better to hardcode it.
+ *
+ *     Depending on how you encoded 'areaTex' and 'searchTex', you may have to
+ *     add (and customize) the following defines before including SMAA.h:
+ *          #define SMAA_AREATEX_SELECT(sample) sample.rg
+ *          #define SMAA_SEARCHTEX_SELECT(sample) sample.r
+ *
+ *     If your engine is already using porting macros, you can define
+ *     SMAA_CUSTOM_SL, and define the porting functions by yourself.
+ *
+ *  7. Then, you'll have to setup the passes as indicated in the scheme above.
+ *     You can take a look into SMAA.fx, to see how we did it for our demo.
+ *     Checkout the function wrappers, you may want to copy-paste them!
+ *
+ *  8. It's recommended to validate the produced |edgesTex| and |blendTex|.
+ *     You can use a screenshot from your engine to compare the |edgesTex|
+ *     and |blendTex| produced inside of the engine with the results obtained
+ *     with the reference demo.
+ *
+ *  9. After you get the last pass to work, it's time to optimize. You'll have
+ *     to initialize a stencil buffer in the first pass (discard is already in
+ *     the code), then mask execution by using it the second pass. The last
+ *     pass should be executed in all pixels.
+ *
+ *
+ * After this point you can choose to enable predicated thresholding,
+ * temporal supersampling and motion blur integration:
+ *
+ * a) If you want to use predicated thresholding, take a look into
+ *    SMAA_PREDICATION; you'll need to pass an extra texture in the edge
+ *    detection pass.
+ *
+ * b) If you want to enable temporal supersampling (SMAA T2x):
+ *
+ * 1. The first step is to render using subpixel jitters. I won't go into
+ *    detail, but it's as simple as moving each vertex position in the
+ *    vertex shader, you can check how we do it in our DX10 demo.
+ *
+ * 2. Then, you must setup the temporal resolve. You may want to take a look
+ *    into SMAAResolve for resolving 2x modes. After you get it working, you'll
+ *    probably see ghosting everywhere. But fear not, you can enable the
+ *    CryENGINE temporal reprojection by setting the SMAA_REPROJECTION macro.
+ *    Check out SMAA_DECODE_VELOCITY if your velocity buffer is encoded.
+ *
+ * 3. The next step is to apply SMAA to each subpixel jittered frame, just as
+ *    done for 1x.
+ *
+ * 4. At this point you should already have something usable, but for best
+ *    results the proper area textures must be set depending on current jitter.
+ *    For this, the parameter 'subsampleIndices' of
+ *    'SMAABlendingWeightCalculationPS' must be set as follows, for our T2x
+ *    mode:
+ *
+ *    @SUBSAMPLE_INDICES
+ *
+ *    | S# |  Camera Jitter   |  subsampleIndices    |
+ *    +----+------------------+---------------------+
+ *    |  0 |  ( 0.25, -0.25)  |  float4(1, 1, 1, 0)  |
+ *    |  1 |  (-0.25,  0.25)  |  float4(2, 2, 2, 0)  |
+ *
+ *    These jitter positions assume a bottom-to-top y axis. S# stands for the
+ *    sample number.
+ *
+ * More information about temporal supersampling here:
+ *    http://iryoku.com/aacourse/downloads/13-Anti-Aliasing-Methods-in-CryENGINE-3.pdf
+ *
+ * c) If you want to enable spatial multisampling (SMAA S2x):
+ *
+ * 1. The scene must be rendered using MSAA 2x. The MSAA 2x buffer must be
+ *    created with:
+ *      - DX10:     see below (*)
+ *      - DX10.1:   D3D10_STANDARD_MULTISAMPLE_PATTERN or
+ *      - DX11:     D3D11_STANDARD_MULTISAMPLE_PATTERN
+ *
+ *    This allows to ensure that the subsample order matches the table in
+ *    @SUBSAMPLE_INDICES.
+ *
+ *    (*) In the case of DX10, we refer the reader to:
+ *      - SMAA::detectMSAAOrder and
+ *      - SMAA::msaaReorder
+ *
+ *    These functions allow to match the standard multisample patterns by
+ *    detecting the subsample order for a specific GPU, and reordering
+ *    them appropriately.
+ *
+ * 2. A shader must be run to output each subsample into a separate buffer
+ *    (DX10 is required). You can use SMAASeparate for this purpose, or just do
+ *    it in an existing pass (for example, in the tone mapping pass, which has
+ *    the advantage of feeding tone mapped subsamples to SMAA, which will yield
+ *    better results).
+ *
+ * 3. The full SMAA 1x pipeline must be run for each separated buffer, storing
+ *    the results in the final buffer. The second run should alpha blend with
+ *    the existing final buffer using a blending factor of 0.5.
+ *    'subsampleIndices' must be adjusted as in the SMAA T2x case (see point
+ *    b).
+ *
+ * d) If you want to enable temporal supersampling on top of SMAA S2x
+ *    (which actually is SMAA 4x):
+ *
+ * 1. SMAA 4x consists on temporally jittering SMAA S2x, so the first step is
+ *    to calculate SMAA S2x for current frame. In this case, 'subsampleIndices'
+ *    must be set as follows:
+ *
+ *    | F# | S# |   Camera Jitter    |    Net Jitter     |   subsampleIndices   |
+ *    +----+----+--------------------+-------------------+----------------------+
+ *    |  0 |  0 |  ( 0.125,  0.125)  |  ( 0.375, -0.125) |  float4(5, 3, 1, 3)  |
+ *    |  0 |  1 |  ( 0.125,  0.125)  |  (-0.125,  0.375) |  float4(4, 6, 2, 3)  |
+ *    +----+----+--------------------+-------------------+----------------------+
+ *    |  1 |  2 |  (-0.125, -0.125)  |  ( 0.125, -0.375) |  float4(3, 5, 1, 4)  |
+ *    |  1 |  3 |  (-0.125, -0.125)  |  (-0.375,  0.125) |  float4(6, 4, 2, 4)  |
+ *
+ *    These jitter positions assume a bottom-to-top y axis. F# stands for the
+ *    frame number. S# stands for the sample number.
+ *
+ * 2. After calculating SMAA S2x for current frame (with the new subsample
+ *    indices), previous frame must be reprojected as in SMAA T2x mode (see
+ *    point b).
+ *
+ * e) If motion blur is used, you may want to do the edge detection pass
+ *    together with motion blur. This has two advantages:
+ *
+ * 1. Pixels under heavy motion can be omitted from the edge detection process.
+ *    For these pixels we can just store "no edge", as motion blur will take
+ *    care of them.
+ * 2. The center pixel tap is reused.
+ *
+ * Note that in this case depth testing should be used instead of stenciling,
+ * as we have to write all the pixels in the motion blur pass.
+ *
+ * That's it!
+ */
+
+//-----------------------------------------------------------------------------
+// SMAA Presets
+
+/**
+ * Note that if you use one of these presets, the following configuration
+ * macros will be ignored if set in the "Configurable Defines" section.
+ */
+
+#if defined(SMAA_PRESET_LOW)
+#define SMAA_THRESHOLD 0.15
+#define SMAA_MAX_SEARCH_STEPS 4
+#define SMAA_DISABLE_DIAG_DETECTION
+#define SMAA_DISABLE_CORNER_DETECTION
+#elif defined(SMAA_PRESET_MEDIUM)
+#define SMAA_THRESHOLD 0.1
+#define SMAA_MAX_SEARCH_STEPS 8
+#define SMAA_DISABLE_DIAG_DETECTION
+#define SMAA_DISABLE_CORNER_DETECTION
+#elif defined(SMAA_PRESET_HIGH)
+#define SMAA_THRESHOLD 0.1
+#define SMAA_MAX_SEARCH_STEPS 16
+#define SMAA_MAX_SEARCH_STEPS_DIAG 8
+#define SMAA_CORNER_ROUNDING 25
+#elif defined(SMAA_PRESET_ULTRA)
+#define SMAA_THRESHOLD 0.05
+#define SMAA_MAX_SEARCH_STEPS 32
+#define SMAA_MAX_SEARCH_STEPS_DIAG 16
+#define SMAA_CORNER_ROUNDING 25
+#endif
+
+//-----------------------------------------------------------------------------
+// Configurable Defines
+
+/**
+ * SMAA_THRESHOLD specifies the threshold or sensitivity to edges.
+ * Lowering this value you will be able to detect more edges at the expense of
+ * performance. 
+ *
+ * Range: [0, 0.5]
+ *   0.1 is a reasonable value, and allows to catch most visible edges.
+ *   0.05 is a rather overkill value, that allows to catch 'em all.
+ *
+ *   If temporal supersampling is used, 0.2 could be a reasonable value, as low
+ *   contrast edges are properly filtered by just 2x.
+ */
+#ifndef SMAA_THRESHOLD
+#define SMAA_THRESHOLD 0.1
+#endif
+
+/**
+ * SMAA_DEPTH_THRESHOLD specifies the threshold for depth edge detection.
+ * 
+ * Range: depends on the depth range of the scene.
+ */
+#ifndef SMAA_DEPTH_THRESHOLD
+#define SMAA_DEPTH_THRESHOLD (0.1 * SMAA_THRESHOLD)
+#endif
+
+/**
+ * SMAA_MAX_SEARCH_STEPS specifies the maximum steps performed in the
+ * horizontal/vertical pattern searches, at each side of the pixel.
+ *
+ * In number of pixels, it's actually the double. So the maximum line length
+ * perfectly handled by, for example 16, is 64 (by perfectly, we meant that
+ * longer lines won't look as good, but still antialiased).
+ *
+ * Range: [0, 112]
+ */
+#ifndef SMAA_MAX_SEARCH_STEPS
+#define SMAA_MAX_SEARCH_STEPS 16
+#endif
+
+/**
+ * SMAA_MAX_SEARCH_STEPS_DIAG specifies the maximum steps performed in the
+ * diagonal pattern searches, at each side of the pixel. In this case we jump
+ * one pixel at time, instead of two.
+ *
+ * Range: [0, 20]
+ *
+ * On high-end machines it is cheap (between a 0.8x and 0.9x slower for 16 
+ * steps), but it can have a significant impact on older machines.
+ *
+ * Define SMAA_DISABLE_DIAG_DETECTION to disable diagonal processing.
+ */
+#ifndef SMAA_MAX_SEARCH_STEPS_DIAG
+#define SMAA_MAX_SEARCH_STEPS_DIAG 8
+#endif
+
+/**
+ * SMAA_CORNER_ROUNDING specifies how much sharp corners will be rounded.
+ *
+ * Range: [0, 100]
+ *
+ * Define SMAA_DISABLE_CORNER_DETECTION to disable corner processing.
+ */
+#ifndef SMAA_CORNER_ROUNDING
+#define SMAA_CORNER_ROUNDING 25
+#endif
+
+/**
+ * If there is an neighbor edge that has SMAA_LOCAL_CONTRAST_FACTOR times
+ * bigger contrast than current edge, current edge will be discarded.
+ *
+ * This allows to eliminate spurious crossing edges, and is based on the fact
+ * that, if there is too much contrast in a direction, that will hide
+ * perceptually contrast in the other neighbors.
+ */
+#ifndef SMAA_LOCAL_CONTRAST_ADAPTATION_FACTOR
+#define SMAA_LOCAL_CONTRAST_ADAPTATION_FACTOR 2.0
+#endif
+
+/**
+ * Predicated thresholding allows to better preserve texture details and to
+ * improve performance, by decreasing the number of detected edges using an
+ * additional buffer like the light accumulation buffer, object ids or even the
+ * depth buffer (the depth buffer usage may be limited to indoor or short range
+ * scenes).
+ *
+ * It locally decreases the luma or color threshold if an edge is found in an
+ * additional buffer (so the global threshold can be higher).
+ *
+ * This method was developed by Playstation EDGE MLAA team, and used in 
+ * Killzone 3, by using the light accumulation buffer. More information here:
+ *     http://iryoku.com/aacourse/downloads/06-MLAA-on-PS3.pptx 
+ */
+#ifndef SMAA_PREDICATION
+#define SMAA_PREDICATION 0
+#endif
+
+/**
+ * Threshold to be used in the additional predication buffer. 
+ *
+ * Range: depends on the input, so you'll have to find the magic number that
+ * works for you.
+ */
+#ifndef SMAA_PREDICATION_THRESHOLD
+#define SMAA_PREDICATION_THRESHOLD 0.01
+#endif
+
+/**
+ * How much to scale the global threshold used for luma or color edge
+ * detection when using predication.
+ *
+ * Range: [1, 5]
+ */
+#ifndef SMAA_PREDICATION_SCALE
+#define SMAA_PREDICATION_SCALE 2.0
+#endif
+
+/**
+ * How much to locally decrease the threshold.
+ *
+ * Range: [0, 1]
+ */
+#ifndef SMAA_PREDICATION_STRENGTH
+#define SMAA_PREDICATION_STRENGTH 0.4
+#endif
+
+/**
+ * Temporal reprojection allows to remove ghosting artifacts when using
+ * temporal supersampling. We use the CryEngine 3 method which also introduces
+ * velocity weighting. This feature is of extreme importance for totally
+ * removing ghosting. More information here:
+ *    http://iryoku.com/aacourse/downloads/13-Anti-Aliasing-Methods-in-CryENGINE-3.pdf
+ *
+ * Note that you'll need to setup a velocity buffer for enabling reprojection.
+ * For static geometry, saving the previous depth buffer is a viable
+ * alternative.
+ */
+#ifndef SMAA_REPROJECTION
+#define SMAA_REPROJECTION 0
+#endif
+
+/**
+ * SMAA_REPROJECTION_WEIGHT_SCALE controls the velocity weighting. It allows to
+ * remove ghosting trails behind the moving object, which are not removed by
+ * just using reprojection. Using low values will exhibit ghosting, while using
+ * high values will disable temporal supersampling under motion.
+ *
+ * Behind the scenes, velocity weighting removes temporal supersampling when
+ * the velocity of the subsamples differs (meaning they are different objects).
+ *
+ * Range: [0, 80]
+ */
+#ifndef SMAA_REPROJECTION_WEIGHT_SCALE
+#define SMAA_REPROJECTION_WEIGHT_SCALE 30.0
+#endif
+
+/**
+ * On some compilers, discard cannot be used in vertex shaders. Thus, they need
+ * to be compiled separately.
+ */
+#ifndef SMAA_INCLUDE_VS
+#define SMAA_INCLUDE_VS 1
+#endif
+#ifndef SMAA_INCLUDE_PS
+#define SMAA_INCLUDE_PS 1
+#endif
+
+//-----------------------------------------------------------------------------
+// Texture Access Defines
+
+#ifndef SMAA_AREATEX_SELECT
+#if defined(SMAA_HLSL_3)
+#define SMAA_AREATEX_SELECT(sample) sample.ra
+#else
+#define SMAA_AREATEX_SELECT(sample) sample.rg
+#endif
+#endif
+
+#ifndef SMAA_SEARCHTEX_SELECT
+#define SMAA_SEARCHTEX_SELECT(sample) sample.r
+#endif
+
+#ifndef SMAA_DECODE_VELOCITY
+#define SMAA_DECODE_VELOCITY(sample) sample.rg
+#endif
+
+//-----------------------------------------------------------------------------
+// Non-Configurable Defines
+
+#define SMAA_AREATEX_MAX_DISTANCE 16
+#define SMAA_AREATEX_MAX_DISTANCE_DIAG 20
+#define SMAA_AREATEX_PIXEL_SIZE (1.0 / vec2(160.0, 560.0))
+#define SMAA_AREATEX_SUBTEX_SIZE (1.0 / 7.0)
+#define SMAA_SEARCHTEX_SIZE vec2(66.0, 33.0)
+#define SMAA_SEARCHTEX_PACKED_SIZE vec2(64.0, 16.0)
+#define SMAA_CORNER_ROUNDING_NORM (float(SMAA_CORNER_ROUNDING) / 100.0)
+
+//-----------------------------------------------------------------------------
+// Porting Functions
+
+#if defined(SMAA_HLSL_3)
+#define SMAATexture2D(tex) sampler2D tex
+#define SMAATexturePass2D(tex) tex
+#define SMAASampleLevelZero(tex, coord) tex2Dlod(tex, float4(coord, 0.0, 0.0))
+#define SMAASampleLevelZeroPoint(tex, coord) tex2Dlod(tex, float4(coord, 0.0, 0.0))
+#define SMAASampleLevelZeroOffset(tex, coord, offset) tex2Dlod(tex, float4(coord + offset * SMAA_RT_METRICS.xy, 0.0, 0.0))
+#define SMAASample(tex, coord) tex2D(tex, coord)
+#define SMAASamplePoint(tex, coord) tex2D(tex, coord)
+#define SMAASampleOffset(tex, coord, offset) tex2D(tex, coord + offset * SMAA_RT_METRICS.xy)
+#define SMAA_FLATTEN [flatten]
+#define SMAA_BRANCH [branch]
+#endif
+#if defined(SMAA_HLSL_4) || defined(SMAA_HLSL_4_1)
+SamplerState LinearSampler { Filter = MIN_MAG_LINEAR_MIP_POINT; AddressU = Clamp; AddressV = Clamp; };
+SamplerState PointSampler { Filter = MIN_MAG_MIP_POINT; AddressU = Clamp; AddressV = Clamp; };
+#define SMAATexture2D(tex) Texture2D tex
+#define SMAATexturePass2D(tex) tex
+#define SMAASampleLevelZero(tex, coord) tex.SampleLevel(LinearSampler, coord, 0)
+#define SMAASampleLevelZeroPoint(tex, coord) tex.SampleLevel(PointSampler, coord, 0)
+#define SMAASampleLevelZeroOffset(tex, coord, offset) tex.SampleLevel(LinearSampler, coord, 0, offset)
+#define SMAASample(tex, coord) tex.Sample(LinearSampler, coord)
+#define SMAASamplePoint(tex, coord) tex.Sample(PointSampler, coord)
+#define SMAASampleOffset(tex, coord, offset) tex.Sample(LinearSampler, coord, offset)
+#define SMAA_FLATTEN [flatten]
+#define SMAA_BRANCH [branch]
+#define SMAATexture2DMS2(tex) Texture2DMS<float4, 2> tex
+#define SMAALoad(tex, pos, sample) tex.Load(pos, sample)
+#if defined(SMAA_HLSL_4_1)
+#define SMAAGather(tex, coord) tex.Gather(LinearSampler, coord, 0)
+#endif
+#endif
+#if defined(SMAA_GLSL_3) || defined(SMAA_GLSL_4)
+#define SMAATexture2D(tex) sampler2D tex
+#define SMAATexturePass2D(tex) tex
+#define SMAASampleLevelZero(tex, coord) textureLod(tex, coord, 0.0)
+#define SMAASampleLevelZeroPoint(tex, coord) textureLod(tex, coord, 0.0)
+#define SMAASampleLevelZeroOffset(tex, coord, offset) textureLodOffset(tex, coord, 0.0, offset)
+#define SMAASample(tex, coord) texture(tex, coord)
+#define SMAASamplePoint(tex, coord) texture(tex, coord)
+#define SMAASampleOffset(tex, coord, offset) texture(tex, coord, offset)
+#define SMAA_FLATTEN  
+#define SMAA_BRANCH
+#define lerp(a, b, t) mix(a, b, t)
+#define saturate(a) clamp(a, 0.0, 1.0)
+#if defined(SMAA_GLSL_4)
+#define mad(a, b, c) fma(a, b, c)
+#define SMAAGather(tex, coord) textureGather(tex, coord)
+#define SMAAGather(tex, coord) textureGather(tex, coord)
+#else
+#define mad(a, b, c) (a * b + c)
+#endif
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+#define int2 ivec2
+#define int3 ivec3
+#define int4 ivec4
+#define bool2 bvec2
+#define bool3 bvec3
+#define bool4 bvec4
+#endif
+
+#if !defined(SMAA_HLSL_3) && !defined(SMAA_HLSL_4) && !defined(SMAA_HLSL_4_1) && !defined(SMAA_GLSL_3) && !defined(SMAA_GLSL_4) && !defined(SMAA_CUSTOM_SL)
+#error you must define the shading language: SMAA_HLSL_*, SMAA_GLSL_* or SMAA_CUSTOM_SL
+#endif
+
+/**
+ * Gathers current pixel, and the top-left neighbors.
+ */
+ vec3 SMAAGatherNeighbours(vec2 coord, vec4 offset[3], sampler2D tex)
+ {
+	float P = texture(tex, coord).r;
+	float Pleft = texture(tex, offset[0].xy).r;
+	float Ptop = texture(tex, offset[0].zw).r;
+	return vec3(P, Pleft, Ptop);
+ }
+
+/**
+ * Adjusts the threshold by means of predication.
+ */
+ vec3 SMAACalculatePredicatedThreshold(vec2 coord, vec4 offset[3], sampler2D predicationTex)
+ {
+	vec3 neighbours = SMAAGatherNeighbours(coord, offset, predicationTex);
+	vec2 delta = abs(neighbours.xx - neighbours.yz);
+	vec2 edges = step(SMAA_PREDICATION_THRESHOLD, delta);
+	return vec3(SMAA_PREDICATION_SCALE * SMAA_THRESHOLD * (1.0 - SMAA_PREDICATION_STRENGTH * edges), 1.0); 
+ }
+
+/**
+ * Conditional move:
+ */
+ void SMAAMovc(bvec2 cond, inout vec2 variable, vec2 value) {
+    if (cond.x) variable.x = value.x;
+    if (cond.y) variable.y = value.y;
+}
+ 
+void SMAAMovc(bvec4 cond, inout vec4 variable, vec4 value) {
+    SMAAMovc(cond.xy, variable.xy, value.xy);
+    SMAAMovc(cond.zw, variable.zw, value.zw);
+}
--- a/anti-aliasing/shaders/smaa/smaa-edge-detection.slang
+++ b/anti-aliasing/shaders/smaa/smaa-edge-detection.slang
@ -0,0 +1,185 @@
+#version 450
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+#include "smaa-common.h"
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 texcoord;
+layout(location = 1) out vec4 offset[3];
+
+void main()
+{
+   gl_Position = global.MVP * Position;
+   texcoord = TexCoord;
+    offset[0] = fma(SMAA_RT_METRICS.xyxy, vec4(-1.0, 0.0, 0.0, -1.0), texcoord.xyxy);
+    offset[1] = fma(SMAA_RT_METRICS.xyxy, vec4( 1.0, 0.0, 0.0,  1.0), texcoord.xyxy);
+    offset[2] = fma(SMAA_RT_METRICS.xyxy, vec4(-2.0, 0.0, 0.0, -2.0), texcoord.xyxy);
+}
+
+#pragma stage fragment
+layout(location = 0) in vec2 texcoord;
+layout(location = 1) in vec4 offset[3];
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+
+/**
+ * Luma Edge Detection
+ *
+ * IMPORTANT NOTICE: luma edge detection requires gamma-corrected colors, and
+ * thus 'colorTex' should be a non-sRGB texture.
+ */
+vec2 SMAALumaEdgeDetectionPS(vec2 texcoord, vec4 offset[3], sampler2D colorTex
+                               #if SMAA_PREDICATION
+                               , SMAATexture2D(predicationTex)
+                               #endif
+                               ) {
+    // Calculate the threshold:
+    #if SMAA_PREDICATION
+    vec2 threshold = SMAACalculatePredicatedThreshold(texcoord, offset, predicationTex);
+    #else
+    vec2 threshold = vec2(SMAA_THRESHOLD, SMAA_THRESHOLD);
+    #endif
+
+    // Calculate lumas:
+    vec3 weights = vec3(0.2126, 0.7152, 0.0722);
+    float L = dot(texture(colorTex, texcoord).rgb, weights);
+
+    float Lleft = dot(texture(colorTex, offset[0].xy).rgb, weights);
+    float Ltop  = dot(texture(colorTex, offset[0].zw).rgb, weights);
+
+    // We do the usual threshold:
+    vec4 delta;
+    delta.xy = abs(L - vec2(Lleft, Ltop));
+    vec2 edges = step(threshold, delta.xy);
+
+    // Then discard if there is no edge:
+    if (dot(edges, vec2(1.0, 1.0)) == 0.0)
+        discard;
+
+    // Calculate right and bottom deltas:
+    float Lright = dot(texture(colorTex, offset[1].xy).rgb, weights);
+    float Lbottom  = dot(texture(colorTex, offset[1].zw).rgb, weights);
+    delta.zw = abs(L - vec2(Lright, Lbottom));
+
+    // Calculate the maximum delta in the direct neighborhood:
+    vec2 maxDelta = max(delta.xy, delta.zw);
+
+    // Calculate left-left and top-top deltas:
+    float Lleftleft = dot(texture(colorTex, offset[2].xy).rgb, weights);
+    float Ltoptop = dot(texture(colorTex, offset[2].zw).rgb, weights);
+    delta.zw = abs(vec2(Lleft, Ltop) - vec2(Lleftleft, Ltoptop));
+
+    // Calculate the final maximum delta:
+    maxDelta = max(maxDelta.xy, delta.zw);
+    float finalDelta = max(maxDelta.x, maxDelta.y);
+
+    // Local contrast adaptation:
+    edges.xy *= step(finalDelta, SMAA_LOCAL_CONTRAST_ADAPTATION_FACTOR * delta.xy);
+
+    return edges;
+}
+
+/**
+ * Color Edge Detection
+ *
+ * IMPORTANT NOTICE: color edge detection requires gamma-corrected colors, and
+ * thus 'colorTex' should be a non-sRGB texture.
+ */
+vec2 SMAAColorEdgeDetectionPS(vec2 texcoord,
+                                vec4 offset[3],
+                                sampler2D colorTex
+                                #if SMAA_PREDICATION
+                                , sampler2D predicationTex
+                                #endif
+                                ) {
+    // Calculate the threshold:
+    #if SMAA_PREDICATION
+    vec2 threshold = SMAACalculatePredicatedThreshold(texcoord, offset, predicationTex);
+    #else
+    vec2 threshold = vec2(SMAA_THRESHOLD, SMAA_THRESHOLD);
+    #endif
+
+    // Calculate color deltas:
+    vec4 delta;
+    vec3 C = texture(colorTex, texcoord).rgb;
+
+    vec3 Cleft = texture(colorTex, offset[0].xy).rgb;
+    vec3 t = abs(C - Cleft);
+    delta.x = max(max(t.r, t.g), t.b);
+
+    vec3 Ctop  = texture(colorTex, offset[0].zw).rgb;
+    t = abs(C - Ctop);
+    delta.y = max(max(t.r, t.g), t.b);
+
+    // We do the usual threshold:
+    vec2 edges = step(threshold, delta.xy);
+
+    // Then discard if there is no edge:
+    if (dot(edges, vec2(1.0, 1.0)) == 0.0)
+        discard;
+
+    // Calculate right and bottom deltas:
+    vec3 Cright = texture(colorTex, offset[1].xy).rgb;
+    t = abs(C - Cright);
+    delta.z = max(max(t.r, t.g), t.b);
+
+    vec3 Cbottom  = texture(colorTex, offset[1].zw).rgb;
+    t = abs(C - Cbottom);
+    delta.w = max(max(t.r, t.g), t.b);
+
+    // Calculate the maximum delta in the direct neighborhood:
+    vec2 maxDelta = max(delta.xy, delta.zw);
+
+    // Calculate left-left and top-top deltas:
+    vec3 Cleftleft  = texture(colorTex, offset[2].xy).rgb;
+    t = abs(C - Cleftleft);
+    delta.z = max(max(t.r, t.g), t.b);
+
+    vec3 Ctoptop = texture(colorTex, offset[2].zw).rgb;
+    t = abs(C - Ctoptop);
+    delta.w = max(max(t.r, t.g), t.b);
+
+    // Calculate the final maximum delta:
+    maxDelta = max(maxDelta.xy, delta.zw);
+    float finalDelta = max(maxDelta.x, maxDelta.y);
+
+    // Local contrast adaptation:
+    edges.xy *= step(finalDelta, SMAA_LOCAL_CONTRAST_ADAPTATION_FACTOR * delta.xy);
+
+    return edges;
+}
+
+/**
+ * Depth Edge Detection
+ */
+vec2 SMAADepthEdgeDetectionPS(vec2 texcoord,
+                                vec4 offset[3],
+                                sampler2D depthTex) {
+    vec3 neighbours = SMAAGatherNeighbours(texcoord, offset, depthTex);
+    vec2 delta = abs(neighbours.xx - vec2(neighbours.y, neighbours.z));
+    vec2 edges = step(SMAA_DEPTH_THRESHOLD, delta);
+
+    if (dot(edges, vec2(1.0, 1.0)) == 0.0)
+        discard;
+
+    return edges;
+}
+
+void main()
+{
+   FragColor = vec4(SMAALumaEdgeDetectionPS(texcoord, offset, Source), 0.0, 0.0);
+}
--- a/anti-aliasing/shaders/smaa/smaa-neighborhood-blending.slang
+++ b/anti-aliasing/shaders/smaa/smaa-neighborhood-blending.slang
@ -0,0 +1,102 @@
+#version 450
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+#include "smaa-common.h"
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 texcoord;
+layout(location = 1) out vec4 offset;
+
+void main()
+{
+   gl_Position = global.MVP * Position;
+   texcoord = TexCoord;
+   offset = fma(SMAA_RT_METRICS.xyxy, vec4( 1.0, 0.0, 0.0,  1.0), texcoord.xyxy);
+}
+
+#pragma stage fragment
+layout(location = 0) in vec2 texcoord;
+layout(location = 1) in vec4 offset;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+layout(set = 0, binding = 3) uniform sampler2D Original;
+
+//-----------------------------------------------------------------------------
+// Neighborhood Blending Pixel Shader (Third Pass)
+
+vec4 SMAANeighborhoodBlendingPS(vec2 texcoord,
+                                  vec4 offset,
+                                  sampler2D colorTex,
+                                  sampler2D blendTex
+                                  #if SMAA_REPROJECTION
+                                  , SMAATexture2D(velocityTex)
+                                  #endif
+                                  ) {
+    // Fetch the blending weights for current pixel:
+    vec4 a;
+    a.x = texture(blendTex, offset.xy).a; // Right
+    a.y = texture(blendTex, offset.zw).g; // Top
+    a.wz = texture(blendTex, texcoord).xz; // Bottom / Left
+
+    // Is there any blending weight with a value greater than 0.0?
+//    SMAA_BRANCH
+    if (dot(a, vec4(1.0, 1.0, 1.0, 1.0)) < 1e-5) {
+        vec4 color = textureLod(colorTex, texcoord, 0.0);
+
+        #if SMAA_REPROJECTION
+        vec2 velocity = SMAA_DECODE_VELOCITY(textureLod(velocityTex, texcoord, 0.0));
+
+        // Pack velocity into the alpha channel:
+        color.a = sqrt(5.0 * length(velocity));
+        #endif
+
+        return color;
+    } else {
+        bool h = max(a.x, a.z) > max(a.y, a.w); // max(horizontal) > max(vertical)
+
+        // Calculate the blending offsets:
+        vec4 blendingOffset = vec4(0.0, a.y, 0.0, a.w);
+        vec2 blendingWeight = a.yw;
+        SMAAMovc(bvec4(h, h, h, h), blendingOffset, vec4(a.x, 0.0, a.z, 0.0));
+        SMAAMovc(bvec2(h, h), blendingWeight, a.xz);
+        blendingWeight /= dot(blendingWeight, vec2(1.0, 1.0));
+
+        // Calculate the texture coordinates:
+        vec4 blendingCoord = fma(blendingOffset, vec4(SMAA_RT_METRICS.xy, -SMAA_RT_METRICS.xy), texcoord.xyxy);
+
+        // We exploit bilinear filtering to mix current pixel with the chosen
+        // neighbor:
+        vec4 color = blendingWeight.x * textureLod(colorTex, blendingCoord.xy, 0.0);
+        color += blendingWeight.y * textureLod(colorTex, blendingCoord.zw, 0.0);
+
+        #if SMAA_REPROJECTION
+        // Antialias velocity for proper reprojection in a later stage:
+        vec2 velocity = blendingWeight.x * SMAA_DECODE_VELOCITY(textureLod(velocityTex, blendingCoord.xy, 0.0));
+        velocity += blendingWeight.y * SMAA_DECODE_VELOCITY(textureLod(velocityTex, blendingCoord.zw, 0.0));
+
+        // Pack velocity into the alpha channel:
+        color.a = sqrt(5.0 * length(velocity));
+        #endif
+
+        return color;
+    }
+}
+
+void main()
+{
+   FragColor = SMAANeighborhoodBlendingPS(texcoord, offset, Original, Source);
+}
--- a/anti-aliasing/smaa.slangp
+++ b/anti-aliasing/smaa.slangp
@ -0,0 +1,18 @@
+shaders = 3
+
+shader0 = shaders/smaa/smaa-edge-detection.slang
+filter_linear0 = true
+scale_type0 = source
+scale0 = 1.0
+
+shader1 = shaders/smaa/smaa-blend-weight-calculation.slang
+filter_linear1 = true
+scale_type1 = source
+scale1 = 1.0
+
+shader2 = shaders/smaa/smaa-neighborhood-blending.slang
+filter_linear2 = true
+
+textures = "areaTex;searchTex"
+areaTex = shaders/smaa/AreaTex.png
+searchTex = shaders/smaa/SearchTex.png
--- a/auto-box/box-center.frag
+++ b/auto-box/box-center.frag
@ -0,0 +1,29 @@
+#version 150
+#define float2 vec2
+#define float3 vec3
+#define float4 vec4
+
+
+
+
+
+
+
+
+layout(std140) uniform UBO
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   mat4 MVP;
+}global;
+
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+uniform sampler2D Source;
+
+void main()
+{
+   FragColor = vec4(texture(Source, vTexCoord). rgb, 1.0);
+}
--- a/auto-box/box-center.gsh
+++ b/auto-box/box-center.gsh
--- a/auto-box/box-center.ppslang
+++ b/auto-box/box-center.ppslang
@ -0,0 +1,44 @@
+#version 450
+
+
+
+
+
+
+
+
+layout(std140, set = 0, binding = 0)uniform UBO
+{
+   vec4 SourceSize;
+   vec4 OriginalSize;
+   vec4 OutputSize;
+   uint FrameCount;
+   mat4 MVP;
+} global;
+
+#pragma stagevertex
+layout(location = 0)in vec4 Position;
+layout(location = 1)in vec2 TexCoord;
+layout(location = 0)out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global . MVP * Position;
+
+   vec2 box_scale = vec2(1.0);
+
+   vec2 scale =(global . OutputSize . xy / global . SourceSize . xy)/ box_scale;
+   vec2 middle = vec2(0.5);
+   vec2 diff = TexCoord - middle;
+   vTexCoord = middle + diff * scale;
+}
+
+#pragma stagefragment
+layout(location = 0)in vec2 vTexCoord;
+layout(location = 0)out vec4 FragColor;
+layout(set = 0, binding = 2)uniform sampler2D Source;
+
+void main()
+{
+   FragColor = vec4(texture(Source, vTexCoord). rgb, 1.0);
+}
--- a/Show more
+++ b/Show more