#version 450 layout(local_size_x = 8, local_size_y = 8) in; #include "common.h" #if defined(SCALED) #if defined(MSAA) layout(set = 0, binding = 0, rgba8) uniform writeonly image2DMS uScaledMS; #else layout(set = 0, binding = 0, rgba8) uniform writeonly image2D uScaled; #endif layout(set = 0, binding = 1) uniform usampler2D uUnscaled; #else layout(set = 0, binding = 0, r32ui) uniform writeonly uimage2D uUnscaled; #if defined(MSAA) layout(set = 0, binding = 1) uniform sampler2DMS uScaledMS; #else layout(set = 0, binding = 1) uniform sampler2D uScaled; #endif layout(constant_id = 0) const int NUM_SAMPLES = 1; layout(constant_id = 1) const int FILTER_MODE = 0; layout(constant_id = 3) const int SCALE = 1; #endif layout(set = 1, binding = 0, std140) uniform Rects { // Have to pad due to silly std140-ness. uvec4 offsets[1024]; }; layout(push_constant, std430) uniform PushMe { vec2 inv_source_size; uint scale; } registers; void main() { #if defined(SCALED) uvec2 coord = gl_GlobalInvocationID.xy + offsets[gl_WorkGroupID.z].xy * registers.scale; vec2 uv = (vec2(coord) + 0.5) * registers.inv_source_size; uint value = textureLod(uUnscaled, uv, 0.0).x; vec4 unorm = abgr1555(value); #if defined(MSAA) int samples = imageSamples(uScaledMS); for (int i = 0; i < samples; i++) imageStore(uScaledMS, ivec2(coord), i, unorm); #else imageStore(uScaled, ivec2(coord), unorm); #endif #else uvec2 coord = gl_GlobalInvocationID.xy + offsets[gl_WorkGroupID.z].xy; vec4 value; if (FILTER_MODE == 0) { #if defined(MSAA) value = texelFetch(uScaledMS, ivec2(coord * SCALE), 0); #else value = texelFetch(uScaled, ivec2(coord * SCALE), 0); #endif } else { #if defined(MSAA) value = vec4(0.0); ivec2 uv = ivec2(coord * SCALE); for (int y = 0; y < SCALE; y++) for (int x = 0; x < SCALE; x++) for (int i = 0; i < NUM_SAMPLES; i++) value += texelFetch(uScaledMS, uv + ivec2(x, y), i); value /= SCALE * SCALE * NUM_SAMPLES; #else vec2 uv = (vec2(coord) + 0.5) * registers.inv_source_size; if (SCALE == 2) // Grabs 4 texels and averages them box-style. value = textureLod(uScaled, uv, 0.0); if (SCALE == 4) { // Grabs 16 texels and averages them box-style. uv -= vec2(0.25 / 1024.0, 0.25 / 512.0); value = textureLod(uScaled, uv, 0.0); value += textureLodOffset(uScaled, uv, 0.0, ivec2(2, 0)); value += textureLodOffset(uScaled, uv, 0.0, ivec2(0, 2)); value += textureLodOffset(uScaled, uv, 0.0, ivec2(2, 2)); value /= 4.0; } if (SCALE == 8) { // Kinda crazy, grabs 64 texels and averages them. uv -= vec2(0.375 / 1024.0, 0.375 / 512.0); value = textureLod(uScaled, uv, 0.0); value += textureLodOffset(uScaled, uv, 0.0, ivec2(2, 0)); value += textureLodOffset(uScaled, uv, 0.0, ivec2(4, 0)); value += textureLodOffset(uScaled, uv, 0.0, ivec2(6, 0)); value += textureLodOffset(uScaled, uv, 0.0, ivec2(0, 2)); value += textureLodOffset(uScaled, uv, 0.0, ivec2(2, 2)); value += textureLodOffset(uScaled, uv, 0.0, ivec2(4, 2)); value += textureLodOffset(uScaled, uv, 0.0, ivec2(6, 2)); value += textureLodOffset(uScaled, uv, 0.0, ivec2(0, 4)); value += textureLodOffset(uScaled, uv, 0.0, ivec2(2, 4)); value += textureLodOffset(uScaled, uv, 0.0, ivec2(4, 4)); value += textureLodOffset(uScaled, uv, 0.0, ivec2(6, 4)); value += textureLodOffset(uScaled, uv, 0.0, ivec2(0, 6)); value += textureLodOffset(uScaled, uv, 0.0, ivec2(2, 6)); value += textureLodOffset(uScaled, uv, 0.0, ivec2(4, 6)); value += textureLodOffset(uScaled, uv, 0.0, ivec2(6, 6)); value /= 16.0; } if (SCALE == 16) { // Batshit insanity :D Should probably split this shader up and use LDS to do final mip, but ... eh. value = vec4(0.0); uv -= vec2(0.4375 / 1024.0, 0.4375 / 512.0); for (int y = 0; y < 8; y++) for (int x = 0; x < 8; x++) value += textureLod(uScaled, uv + vec2(x, y) * vec2(1.0 / 8192.0, 1.0 / 4096.0), 0.0); value /= 64.0; } #endif } uint packed_color = pack_abgr1555(value); imageStore(uUnscaled, ivec2(coord), uvec4(packed_color)); #endif }