/* * Copyright 2015-2021 Arm Limited * SPDX-License-Identifier: Apache-2.0 OR MIT * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * At your option, you may choose to accept this material under either: * 1. The Apache License, Version 2.0, found at , or * 2. The MIT License, found at . */ #include "spirv_glsl.hpp" #include "GLSL.std.450.h" #include "spirv_common.hpp" #include #include #include #include #include #include #include #ifndef _WIN32 #include #endif #include using namespace spv; using namespace SPIRV_CROSS_NAMESPACE; using namespace std; enum ExtraSubExpressionType { // Create masks above any legal ID range to allow multiple address spaces into the extra_sub_expressions map. EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET = 0x10000000, EXTRA_SUB_EXPRESSION_TYPE_AUX = 0x20000000 }; static bool is_unsigned_opcode(Op op) { // Don't have to be exhaustive, only relevant for legacy target checking ... switch (op) { case OpShiftRightLogical: case OpUGreaterThan: case OpUGreaterThanEqual: case OpULessThan: case OpULessThanEqual: case OpUConvert: case OpUDiv: case OpUMod: case OpUMulExtended: case OpConvertUToF: case OpConvertFToU: return true; default: return false; } } static bool is_unsigned_glsl_opcode(GLSLstd450 op) { // Don't have to be exhaustive, only relevant for legacy target checking ... switch (op) { case GLSLstd450UClamp: case GLSLstd450UMin: case GLSLstd450UMax: case GLSLstd450FindUMsb: return true; default: return false; } } static bool packing_is_vec4_padded(BufferPackingStandard packing) { switch (packing) { case BufferPackingHLSLCbuffer: case BufferPackingHLSLCbufferPackOffset: case BufferPackingStd140: case BufferPackingStd140EnhancedLayout: return true; default: return false; } } static bool packing_is_hlsl(BufferPackingStandard packing) { switch (packing) { case BufferPackingHLSLCbuffer: case BufferPackingHLSLCbufferPackOffset: return true; default: return false; } } static bool packing_has_flexible_offset(BufferPackingStandard packing) { switch (packing) { case BufferPackingStd140: case BufferPackingStd430: case BufferPackingScalar: case BufferPackingHLSLCbuffer: return false; default: return true; } } static bool packing_is_scalar(BufferPackingStandard packing) { switch (packing) { case BufferPackingScalar: case BufferPackingScalarEnhancedLayout: return true; default: return false; } } static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing) { switch (packing) { case BufferPackingStd140EnhancedLayout: return BufferPackingStd140; case BufferPackingStd430EnhancedLayout: return BufferPackingStd430; case BufferPackingHLSLCbufferPackOffset: return BufferPackingHLSLCbuffer; case BufferPackingScalarEnhancedLayout: return BufferPackingScalar; default: return packing; } } void CompilerGLSL::init() { if (ir.source.known) { options.es = ir.source.es; options.version = ir.source.version; } // Query the locale to see what the decimal point is. // We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale // rather than setting locales ourselves. Settings locales in a safe and isolated way is rather // tricky. #ifdef _WIN32 // On Windows, localeconv uses thread-local storage, so it should be fine. const struct lconv *conv = localeconv(); if (conv && conv->decimal_point) current_locale_radix_character = *conv->decimal_point; #elif defined(__ANDROID__) && __ANDROID_API__ < 26 // nl_langinfo is not supported on this platform, fall back to the worse alternative. const struct lconv *conv = localeconv(); if (conv && conv->decimal_point) current_locale_radix_character = *conv->decimal_point; #else // localeconv, the portable function is not MT safe ... const char *decimal_point = nl_langinfo(RADIXCHAR); if (decimal_point && *decimal_point != '\0') current_locale_radix_character = *decimal_point; #endif } static const char *to_pls_layout(PlsFormat format) { switch (format) { case PlsR11FG11FB10F: return "layout(r11f_g11f_b10f) "; case PlsR32F: return "layout(r32f) "; case PlsRG16F: return "layout(rg16f) "; case PlsRGB10A2: return "layout(rgb10_a2) "; case PlsRGBA8: return "layout(rgba8) "; case PlsRG16: return "layout(rg16) "; case PlsRGBA8I: return "layout(rgba8i)"; case PlsRG16I: return "layout(rg16i) "; case PlsRGB10A2UI: return "layout(rgb10_a2ui) "; case PlsRGBA8UI: return "layout(rgba8ui) "; case PlsRG16UI: return "layout(rg16ui) "; case PlsR32UI: return "layout(r32ui) "; default: return ""; } } static SPIRType::BaseType pls_format_to_basetype(PlsFormat format) { switch (format) { default: case PlsR11FG11FB10F: case PlsR32F: case PlsRG16F: case PlsRGB10A2: case PlsRGBA8: case PlsRG16: return SPIRType::Float; case PlsRGBA8I: case PlsRG16I: return SPIRType::Int; case PlsRGB10A2UI: case PlsRGBA8UI: case PlsRG16UI: case PlsR32UI: return SPIRType::UInt; } } static uint32_t pls_format_to_components(PlsFormat format) { switch (format) { default: case PlsR32F: case PlsR32UI: return 1; case PlsRG16F: case PlsRG16: case PlsRG16UI: case PlsRG16I: return 2; case PlsR11FG11FB10F: return 3; case PlsRGB10A2: case PlsRGBA8: case PlsRGBA8I: case PlsRGB10A2UI: case PlsRGBA8UI: return 4; } } const char *CompilerGLSL::vector_swizzle(int vecsize, int index) { static const char *const swizzle[4][4] = { { ".x", ".y", ".z", ".w" }, { ".xy", ".yz", ".zw", nullptr }, { ".xyz", ".yzw", nullptr, nullptr }, #if defined(__GNUC__) && (__GNUC__ == 9) // This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947. // This array ends up being compiled as all nullptrs, tripping the assertions below. { "", nullptr, nullptr, "$" }, #else { "", nullptr, nullptr, nullptr }, #endif }; assert(vecsize >= 1 && vecsize <= 4); assert(index >= 0 && index < 4); assert(swizzle[vecsize - 1][index]); return swizzle[vecsize - 1][index]; } void CompilerGLSL::reset(uint32_t iteration_count) { // Sanity check the iteration count to be robust against a certain class of bugs where // we keep forcing recompilations without making clear forward progress. // In buggy situations we will loop forever, or loop for an unbounded number of iterations. // Certain types of recompilations are considered to make forward progress, // but in almost all situations, we'll never see more than 3 iterations. // It is highly context-sensitive when we need to force recompilation, // and it is not practical with the current architecture // to resolve everything up front. if (iteration_count >= options.force_recompile_max_debug_iterations && !is_force_recompile_forward_progress) SPIRV_CROSS_THROW("Maximum compilation loops detected and no forward progress was made. Must be a SPIRV-Cross bug!"); // We do some speculative optimizations which should pretty much always work out, // but just in case the SPIR-V is rather weird, recompile until it's happy. // This typically only means one extra pass. clear_force_recompile(); // Clear invalid expression tracking. invalid_expressions.clear(); composite_insert_overwritten.clear(); current_function = nullptr; // Clear temporary usage tracking. expression_usage_counts.clear(); forwarded_temporaries.clear(); suppressed_usage_tracking.clear(); // Ensure that we declare phi-variable copies even if the original declaration isn't deferred flushed_phi_variables.clear(); current_emitting_switch_stack.clear(); reset_name_caches(); ir.for_each_typed_id([&](uint32_t, SPIRFunction &func) { func.active = false; func.flush_undeclared = true; }); ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { var.dependees.clear(); }); ir.reset_all_of_type(); ir.reset_all_of_type(); statement_count = 0; indent = 0; current_loop_level = 0; } void CompilerGLSL::remap_pls_variables() { for (auto &input : pls_inputs) { auto &var = get(input.id); bool input_is_target = false; if (var.storage == StorageClassUniformConstant) { auto &type = get(var.basetype); input_is_target = type.image.dim == DimSubpassData; } if (var.storage != StorageClassInput && !input_is_target) SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs."); var.remapped_variable = true; } for (auto &output : pls_outputs) { auto &var = get(output.id); if (var.storage != StorageClassOutput) SPIRV_CROSS_THROW("Can only use out variables for PLS outputs."); var.remapped_variable = true; } } void CompilerGLSL::remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location, bool coherent) { subpass_to_framebuffer_fetch_attachment.push_back({ input_attachment_index, color_location }); inout_color_attachments.push_back({ color_location, coherent }); } bool CompilerGLSL::location_is_framebuffer_fetch(uint32_t location) const { return std::find_if(begin(inout_color_attachments), end(inout_color_attachments), [&](const std::pair &elem) { return elem.first == location; }) != end(inout_color_attachments); } bool CompilerGLSL::location_is_non_coherent_framebuffer_fetch(uint32_t location) const { return std::find_if(begin(inout_color_attachments), end(inout_color_attachments), [&](const std::pair &elem) { return elem.first == location && !elem.second; }) != end(inout_color_attachments); } void CompilerGLSL::find_static_extensions() { ir.for_each_typed_id([&](uint32_t, const SPIRType &type) { if (type.basetype == SPIRType::Double) { if (options.es) SPIRV_CROSS_THROW("FP64 not supported in ES profile."); if (!options.es && options.version < 400) require_extension_internal("GL_ARB_gpu_shader_fp64"); } else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64) { if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310. SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310."); require_extension_internal("GL_ARB_gpu_shader_int64"); } else if (type.basetype == SPIRType::Half) { require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_float16"); if (options.vulkan_semantics) require_extension_internal("GL_EXT_shader_16bit_storage"); } else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte) { require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int8"); if (options.vulkan_semantics) require_extension_internal("GL_EXT_shader_8bit_storage"); } else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort) { require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int16"); if (options.vulkan_semantics) require_extension_internal("GL_EXT_shader_16bit_storage"); } }); auto &execution = get_entry_point(); switch (execution.model) { case ExecutionModelGLCompute: if (!options.es && options.version < 430) require_extension_internal("GL_ARB_compute_shader"); if (options.es && options.version < 310) SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders."); break; case ExecutionModelGeometry: if (options.es && options.version < 320) require_extension_internal("GL_EXT_geometry_shader"); if (!options.es && options.version < 150) require_extension_internal("GL_ARB_geometry_shader4"); if (execution.flags.get(ExecutionModeInvocations) && execution.invocations != 1) { // Instanced GS is part of 400 core or this extension. if (!options.es && options.version < 400) require_extension_internal("GL_ARB_gpu_shader5"); } break; case ExecutionModelTessellationEvaluation: case ExecutionModelTessellationControl: if (options.es && options.version < 320) require_extension_internal("GL_EXT_tessellation_shader"); if (!options.es && options.version < 400) require_extension_internal("GL_ARB_tessellation_shader"); break; case ExecutionModelRayGenerationKHR: case ExecutionModelIntersectionKHR: case ExecutionModelAnyHitKHR: case ExecutionModelClosestHitKHR: case ExecutionModelMissKHR: case ExecutionModelCallableKHR: // NV enums are aliases. if (options.es || options.version < 460) SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above."); if (!options.vulkan_semantics) SPIRV_CROSS_THROW("Ray tracing requires Vulkan semantics."); // Need to figure out if we should target KHR or NV extension based on capabilities. for (auto &cap : ir.declared_capabilities) { if (cap == CapabilityRayTracingKHR || cap == CapabilityRayQueryKHR || cap == CapabilityRayTraversalPrimitiveCullingKHR) { ray_tracing_is_khr = true; break; } } if (ray_tracing_is_khr) { // In KHR ray tracing we pass payloads by pointer instead of location, // so make sure we assign locations properly. ray_tracing_khr_fixup_locations(); require_extension_internal("GL_EXT_ray_tracing"); } else require_extension_internal("GL_NV_ray_tracing"); break; case ExecutionModelMeshEXT: case ExecutionModelTaskEXT: if (options.es || options.version < 450) SPIRV_CROSS_THROW("Mesh shaders require GLSL 450 or above."); if (!options.vulkan_semantics) SPIRV_CROSS_THROW("Mesh shaders require Vulkan semantics."); require_extension_internal("GL_EXT_mesh_shader"); break; default: break; } if (!pls_inputs.empty() || !pls_outputs.empty()) { if (execution.model != ExecutionModelFragment) SPIRV_CROSS_THROW("Can only use GL_EXT_shader_pixel_local_storage in fragment shaders."); require_extension_internal("GL_EXT_shader_pixel_local_storage"); } if (!inout_color_attachments.empty()) { if (execution.model != ExecutionModelFragment) SPIRV_CROSS_THROW("Can only use GL_EXT_shader_framebuffer_fetch in fragment shaders."); if (options.vulkan_semantics) SPIRV_CROSS_THROW("Cannot use EXT_shader_framebuffer_fetch in Vulkan GLSL."); bool has_coherent = false; bool has_incoherent = false; for (auto &att : inout_color_attachments) { if (att.second) has_coherent = true; else has_incoherent = true; } if (has_coherent) require_extension_internal("GL_EXT_shader_framebuffer_fetch"); if (has_incoherent) require_extension_internal("GL_EXT_shader_framebuffer_fetch_non_coherent"); } if (options.separate_shader_objects && !options.es && options.version < 410) require_extension_internal("GL_ARB_separate_shader_objects"); if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) { if (!options.vulkan_semantics) SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL."); if (options.es && options.version < 320) SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires ESSL 320."); else if (!options.es && options.version < 450) SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires GLSL 450."); require_extension_internal("GL_EXT_buffer_reference"); } else if (ir.addressing_model != AddressingModelLogical) { SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported."); } // Check for nonuniform qualifier and passthrough. // Instead of looping over all decorations to find this, just look at capabilities. for (auto &cap : ir.declared_capabilities) { switch (cap) { case CapabilityShaderNonUniformEXT: if (!options.vulkan_semantics) require_extension_internal("GL_NV_gpu_shader5"); else require_extension_internal("GL_EXT_nonuniform_qualifier"); break; case CapabilityRuntimeDescriptorArrayEXT: if (!options.vulkan_semantics) SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL."); require_extension_internal("GL_EXT_nonuniform_qualifier"); break; case CapabilityGeometryShaderPassthroughNV: if (execution.model == ExecutionModelGeometry) { require_extension_internal("GL_NV_geometry_shader_passthrough"); execution.geometry_passthrough = true; } break; case CapabilityVariablePointers: case CapabilityVariablePointersStorageBuffer: SPIRV_CROSS_THROW("VariablePointers capability is not supported in GLSL."); case CapabilityMultiView: if (options.vulkan_semantics) require_extension_internal("GL_EXT_multiview"); else { require_extension_internal("GL_OVR_multiview2"); if (options.ovr_multiview_view_count == 0) SPIRV_CROSS_THROW("ovr_multiview_view_count must be non-zero when using GL_OVR_multiview2."); if (get_execution_model() != ExecutionModelVertex) SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders."); } break; case CapabilityRayQueryKHR: if (options.es || options.version < 460 || !options.vulkan_semantics) SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460."); require_extension_internal("GL_EXT_ray_query"); ray_tracing_is_khr = true; break; case CapabilityRayTraversalPrimitiveCullingKHR: if (options.es || options.version < 460 || !options.vulkan_semantics) SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460."); require_extension_internal("GL_EXT_ray_flags_primitive_culling"); ray_tracing_is_khr = true; break; default: break; } } if (options.ovr_multiview_view_count) { if (options.vulkan_semantics) SPIRV_CROSS_THROW("OVR_multiview2 cannot be used with Vulkan semantics."); if (get_execution_model() != ExecutionModelVertex) SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders."); require_extension_internal("GL_OVR_multiview2"); } // KHR one is likely to get promoted at some point, so if we don't see an explicit SPIR-V extension, assume KHR. for (auto &ext : ir.declared_extensions) if (ext == "SPV_NV_fragment_shader_barycentric") barycentric_is_nv = true; } void CompilerGLSL::require_polyfill(Polyfill polyfill, bool relaxed) { uint32_t &polyfills = (relaxed && options.es) ? required_polyfills_relaxed : required_polyfills; if ((polyfills & polyfill) == 0) { polyfills |= polyfill; force_recompile(); } } void CompilerGLSL::ray_tracing_khr_fixup_locations() { uint32_t location = 0; ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { // Incoming payload storage can also be used for tracing. if (var.storage != StorageClassRayPayloadKHR && var.storage != StorageClassCallableDataKHR && var.storage != StorageClassIncomingRayPayloadKHR && var.storage != StorageClassIncomingCallableDataKHR) return; if (is_hidden_variable(var)) return; set_decoration(var.self, DecorationLocation, location++); }); } string CompilerGLSL::compile() { ir.fixup_reserved_names(); if (!options.vulkan_semantics) { // only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers backend.nonuniform_qualifier = ""; backend.needs_row_major_load_workaround = options.enable_row_major_load_workaround; } backend.allow_precision_qualifiers = options.vulkan_semantics || options.es; backend.force_gl_in_out_block = true; backend.supports_extensions = true; backend.use_array_constructor = true; backend.workgroup_size_is_hidden = true; backend.requires_relaxed_precision_analysis = options.es || options.vulkan_semantics; backend.support_precise_qualifier = (!options.es && options.version >= 400) || (options.es && options.version >= 320); if (is_legacy_es()) backend.support_case_fallthrough = false; // Scan the SPIR-V to find trivial uses of extensions. fixup_anonymous_struct_names(); fixup_type_alias(); reorder_type_alias(); build_function_control_flow_graphs_and_analyze(); find_static_extensions(); fixup_image_load_store_access(); update_active_builtins(); analyze_image_and_sampler_usage(); analyze_interlocked_resource_usage(); if (!inout_color_attachments.empty()) emit_inout_fragment_outputs_copy_to_subpass_inputs(); // Shaders might cast unrelated data to pointers of non-block types. // Find all such instances and make sure we can cast the pointers to a synthesized block type. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) analyze_non_block_pointer_types(); uint32_t pass_count = 0; do { reset(pass_count); buffer.reset(); emit_header(); emit_resources(); emit_extension_workarounds(get_execution_model()); if (required_polyfills != 0) emit_polyfills(required_polyfills, false); if (options.es && required_polyfills_relaxed != 0) emit_polyfills(required_polyfills_relaxed, true); emit_function(get(ir.default_entry_point), Bitset()); pass_count++; } while (is_forcing_recompilation()); // Implement the interlocked wrapper function at the end. // The body was implemented in lieu of main(). if (interlocked_is_complex) { statement("void main()"); begin_scope(); statement("// Interlocks were used in a way not compatible with GLSL, this is very slow."); statement("SPIRV_Cross_beginInvocationInterlock();"); statement("spvMainInterlockedBody();"); statement("SPIRV_Cross_endInvocationInterlock();"); end_scope(); } // Entry point in GLSL is always main(). get_entry_point().name = "main"; return buffer.str(); } std::string CompilerGLSL::get_partial_source() { return buffer.str(); } void CompilerGLSL::build_workgroup_size(SmallVector &arguments, const SpecializationConstant &wg_x, const SpecializationConstant &wg_y, const SpecializationConstant &wg_z) { auto &execution = get_entry_point(); bool builtin_workgroup = execution.workgroup_size.constant != 0; bool use_local_size_id = !builtin_workgroup && execution.flags.get(ExecutionModeLocalSizeId); if (wg_x.id) { if (options.vulkan_semantics) arguments.push_back(join("local_size_x_id = ", wg_x.constant_id)); else arguments.push_back(join("local_size_x = ", get(wg_x.id).specialization_constant_macro_name)); } else if (use_local_size_id && execution.workgroup_size.id_x) arguments.push_back(join("local_size_x = ", get(execution.workgroup_size.id_x).scalar())); else arguments.push_back(join("local_size_x = ", execution.workgroup_size.x)); if (wg_y.id) { if (options.vulkan_semantics) arguments.push_back(join("local_size_y_id = ", wg_y.constant_id)); else arguments.push_back(join("local_size_y = ", get(wg_y.id).specialization_constant_macro_name)); } else if (use_local_size_id && execution.workgroup_size.id_y) arguments.push_back(join("local_size_y = ", get(execution.workgroup_size.id_y).scalar())); else arguments.push_back(join("local_size_y = ", execution.workgroup_size.y)); if (wg_z.id) { if (options.vulkan_semantics) arguments.push_back(join("local_size_z_id = ", wg_z.constant_id)); else arguments.push_back(join("local_size_z = ", get(wg_z.id).specialization_constant_macro_name)); } else if (use_local_size_id && execution.workgroup_size.id_z) arguments.push_back(join("local_size_z = ", get(execution.workgroup_size.id_z).scalar())); else arguments.push_back(join("local_size_z = ", execution.workgroup_size.z)); } void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature) { if (options.vulkan_semantics) { auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature); require_extension_internal(ShaderSubgroupSupportHelper::get_extension_name(khr_extension)); } else { if (!shader_subgroup_supporter.is_feature_requested(feature)) force_recompile(); shader_subgroup_supporter.request_feature(feature); } } void CompilerGLSL::emit_header() { auto &execution = get_entry_point(); statement("#version ", options.version, options.es && options.version > 100 ? " es" : ""); if (!options.es && options.version < 420) { // Needed for binding = # on UBOs, etc. if (options.enable_420pack_extension) { statement("#ifdef GL_ARB_shading_language_420pack"); statement("#extension GL_ARB_shading_language_420pack : require"); statement("#endif"); } // Needed for: layout(early_fragment_tests) in; if (execution.flags.get(ExecutionModeEarlyFragmentTests)) require_extension_internal("GL_ARB_shader_image_load_store"); } // Needed for: layout(post_depth_coverage) in; if (execution.flags.get(ExecutionModePostDepthCoverage)) require_extension_internal("GL_ARB_post_depth_coverage"); // Needed for: layout({pixel,sample}_interlock_[un]ordered) in; bool interlock_used = execution.flags.get(ExecutionModePixelInterlockOrderedEXT) || execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) || execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) || execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT); if (interlock_used) { if (options.es) { if (options.version < 310) SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock."); require_extension_internal("GL_NV_fragment_shader_interlock"); } else { if (options.version < 420) require_extension_internal("GL_ARB_shader_image_load_store"); require_extension_internal("GL_ARB_fragment_shader_interlock"); } } for (auto &ext : forced_extensions) { if (ext == "GL_ARB_gpu_shader_int64") { statement("#if defined(GL_ARB_gpu_shader_int64)"); statement("#extension GL_ARB_gpu_shader_int64 : require"); if (!options.vulkan_semantics || options.es) { statement("#elif defined(GL_NV_gpu_shader5)"); statement("#extension GL_NV_gpu_shader5 : require"); } statement("#else"); statement("#error No extension available for 64-bit integers."); statement("#endif"); } else if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16") { // Special case, this extension has a potential fallback to another vendor extension in normal GLSL. // GL_AMD_gpu_shader_half_float is a superset, so try that first. statement("#if defined(GL_AMD_gpu_shader_half_float)"); statement("#extension GL_AMD_gpu_shader_half_float : require"); if (!options.vulkan_semantics) { statement("#elif defined(GL_NV_gpu_shader5)"); statement("#extension GL_NV_gpu_shader5 : require"); } else { statement("#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)"); statement("#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require"); } statement("#else"); statement("#error No extension available for FP16."); statement("#endif"); } else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int8") { if (options.vulkan_semantics) statement("#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require"); else { statement("#if defined(GL_EXT_shader_explicit_arithmetic_types_int8)"); statement("#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require"); statement("#elif defined(GL_NV_gpu_shader5)"); statement("#extension GL_NV_gpu_shader5 : require"); statement("#else"); statement("#error No extension available for Int8."); statement("#endif"); } } else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16") { if (options.vulkan_semantics) statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require"); else { statement("#if defined(GL_EXT_shader_explicit_arithmetic_types_int16)"); statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require"); statement("#elif defined(GL_AMD_gpu_shader_int16)"); statement("#extension GL_AMD_gpu_shader_int16 : require"); statement("#elif defined(GL_NV_gpu_shader5)"); statement("#extension GL_NV_gpu_shader5 : require"); statement("#else"); statement("#error No extension available for Int16."); statement("#endif"); } } else if (ext == "GL_ARB_post_depth_coverage") { if (options.es) statement("#extension GL_EXT_post_depth_coverage : require"); else { statement("#if defined(GL_ARB_post_depth_coverge)"); statement("#extension GL_ARB_post_depth_coverage : require"); statement("#else"); statement("#extension GL_EXT_post_depth_coverage : require"); statement("#endif"); } } else if (!options.vulkan_semantics && ext == "GL_ARB_shader_draw_parameters") { // Soft-enable this extension on plain GLSL. statement("#ifdef ", ext); statement("#extension ", ext, " : enable"); statement("#endif"); } else if (ext == "GL_EXT_control_flow_attributes") { // These are just hints so we can conditionally enable and fallback in the shader. statement("#if defined(GL_EXT_control_flow_attributes)"); statement("#extension GL_EXT_control_flow_attributes : require"); statement("#define SPIRV_CROSS_FLATTEN [[flatten]]"); statement("#define SPIRV_CROSS_BRANCH [[dont_flatten]]"); statement("#define SPIRV_CROSS_UNROLL [[unroll]]"); statement("#define SPIRV_CROSS_LOOP [[dont_unroll]]"); statement("#else"); statement("#define SPIRV_CROSS_FLATTEN"); statement("#define SPIRV_CROSS_BRANCH"); statement("#define SPIRV_CROSS_UNROLL"); statement("#define SPIRV_CROSS_LOOP"); statement("#endif"); } else if (ext == "GL_NV_fragment_shader_interlock") { statement("#extension GL_NV_fragment_shader_interlock : require"); statement("#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockNV()"); statement("#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockNV()"); } else if (ext == "GL_ARB_fragment_shader_interlock") { statement("#ifdef GL_ARB_fragment_shader_interlock"); statement("#extension GL_ARB_fragment_shader_interlock : enable"); statement("#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()"); statement("#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()"); statement("#elif defined(GL_INTEL_fragment_shader_ordering)"); statement("#extension GL_INTEL_fragment_shader_ordering : enable"); statement("#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()"); statement("#define SPIRV_Cross_endInvocationInterlock()"); statement("#endif"); } else statement("#extension ", ext, " : require"); } if (!options.vulkan_semantics) { using Supp = ShaderSubgroupSupportHelper; auto result = shader_subgroup_supporter.resolve(); for (uint32_t feature_index = 0; feature_index < Supp::FeatureCount; feature_index++) { auto feature = static_cast(feature_index); if (!shader_subgroup_supporter.is_feature_requested(feature)) continue; auto exts = Supp::get_candidates_for_feature(feature, result); if (exts.empty()) continue; statement(""); for (auto &ext : exts) { const char *name = Supp::get_extension_name(ext); const char *extra_predicate = Supp::get_extra_required_extension_predicate(ext); auto extra_names = Supp::get_extra_required_extension_names(ext); statement(&ext != &exts.front() ? "#elif" : "#if", " defined(", name, ")", (*extra_predicate != '\0' ? " && " : ""), extra_predicate); for (const auto &e : extra_names) statement("#extension ", e, " : enable"); statement("#extension ", name, " : require"); } if (!Supp::can_feature_be_implemented_without_extensions(feature)) { statement("#else"); statement("#error No extensions available to emulate requested subgroup feature."); } statement("#endif"); } } for (auto &header : header_lines) statement(header); SmallVector inputs; SmallVector outputs; switch (execution.model) { case ExecutionModelVertex: if (options.ovr_multiview_view_count) inputs.push_back(join("num_views = ", options.ovr_multiview_view_count)); break; case ExecutionModelGeometry: if ((execution.flags.get(ExecutionModeInvocations)) && execution.invocations != 1) inputs.push_back(join("invocations = ", execution.invocations)); if (execution.flags.get(ExecutionModeInputPoints)) inputs.push_back("points"); if (execution.flags.get(ExecutionModeInputLines)) inputs.push_back("lines"); if (execution.flags.get(ExecutionModeInputLinesAdjacency)) inputs.push_back("lines_adjacency"); if (execution.flags.get(ExecutionModeTriangles)) inputs.push_back("triangles"); if (execution.flags.get(ExecutionModeInputTrianglesAdjacency)) inputs.push_back("triangles_adjacency"); if (!execution.geometry_passthrough) { // For passthrough, these are implies and cannot be declared in shader. outputs.push_back(join("max_vertices = ", execution.output_vertices)); if (execution.flags.get(ExecutionModeOutputTriangleStrip)) outputs.push_back("triangle_strip"); if (execution.flags.get(ExecutionModeOutputPoints)) outputs.push_back("points"); if (execution.flags.get(ExecutionModeOutputLineStrip)) outputs.push_back("line_strip"); } break; case ExecutionModelTessellationControl: if (execution.flags.get(ExecutionModeOutputVertices)) outputs.push_back(join("vertices = ", execution.output_vertices)); break; case ExecutionModelTessellationEvaluation: if (execution.flags.get(ExecutionModeQuads)) inputs.push_back("quads"); if (execution.flags.get(ExecutionModeTriangles)) inputs.push_back("triangles"); if (execution.flags.get(ExecutionModeIsolines)) inputs.push_back("isolines"); if (execution.flags.get(ExecutionModePointMode)) inputs.push_back("point_mode"); if (!execution.flags.get(ExecutionModeIsolines)) { if (execution.flags.get(ExecutionModeVertexOrderCw)) inputs.push_back("cw"); if (execution.flags.get(ExecutionModeVertexOrderCcw)) inputs.push_back("ccw"); } if (execution.flags.get(ExecutionModeSpacingFractionalEven)) inputs.push_back("fractional_even_spacing"); if (execution.flags.get(ExecutionModeSpacingFractionalOdd)) inputs.push_back("fractional_odd_spacing"); if (execution.flags.get(ExecutionModeSpacingEqual)) inputs.push_back("equal_spacing"); break; case ExecutionModelGLCompute: case ExecutionModelTaskEXT: case ExecutionModelMeshEXT: { if (execution.workgroup_size.constant != 0 || execution.flags.get(ExecutionModeLocalSizeId)) { SpecializationConstant wg_x, wg_y, wg_z; get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); // If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro // declarations before we can emit the work group size. if (options.vulkan_semantics || ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0)))) build_workgroup_size(inputs, wg_x, wg_y, wg_z); } else { inputs.push_back(join("local_size_x = ", execution.workgroup_size.x)); inputs.push_back(join("local_size_y = ", execution.workgroup_size.y)); inputs.push_back(join("local_size_z = ", execution.workgroup_size.z)); } if (execution.model == ExecutionModelMeshEXT) { outputs.push_back(join("max_vertices = ", execution.output_vertices)); outputs.push_back(join("max_primitives = ", execution.output_primitives)); if (execution.flags.get(ExecutionModeOutputTrianglesEXT)) outputs.push_back("triangles"); else if (execution.flags.get(ExecutionModeOutputLinesEXT)) outputs.push_back("lines"); else if (execution.flags.get(ExecutionModeOutputPoints)) outputs.push_back("points"); } break; } case ExecutionModelFragment: if (options.es) { switch (options.fragment.default_float_precision) { case Options::Lowp: statement("precision lowp float;"); break; case Options::Mediump: statement("precision mediump float;"); break; case Options::Highp: statement("precision highp float;"); break; default: break; } switch (options.fragment.default_int_precision) { case Options::Lowp: statement("precision lowp int;"); break; case Options::Mediump: statement("precision mediump int;"); break; case Options::Highp: statement("precision highp int;"); break; default: break; } } if (execution.flags.get(ExecutionModeEarlyFragmentTests)) inputs.push_back("early_fragment_tests"); if (execution.flags.get(ExecutionModePostDepthCoverage)) inputs.push_back("post_depth_coverage"); if (interlock_used) statement("#if defined(GL_ARB_fragment_shader_interlock)"); if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT)) statement("layout(pixel_interlock_ordered) in;"); else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT)) statement("layout(pixel_interlock_unordered) in;"); else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT)) statement("layout(sample_interlock_ordered) in;"); else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT)) statement("layout(sample_interlock_unordered) in;"); if (interlock_used) { statement("#elif !defined(GL_INTEL_fragment_shader_ordering)"); statement("#error Fragment Shader Interlock/Ordering extension missing!"); statement("#endif"); } if (!options.es && execution.flags.get(ExecutionModeDepthGreater)) statement("layout(depth_greater) out float gl_FragDepth;"); else if (!options.es && execution.flags.get(ExecutionModeDepthLess)) statement("layout(depth_less) out float gl_FragDepth;"); break; default: break; } for (auto &cap : ir.declared_capabilities) if (cap == CapabilityRayTraversalPrimitiveCullingKHR) statement("layout(primitive_culling);"); if (!inputs.empty()) statement("layout(", merge(inputs), ") in;"); if (!outputs.empty()) statement("layout(", merge(outputs), ") out;"); statement(""); } bool CompilerGLSL::type_is_empty(const SPIRType &type) { return type.basetype == SPIRType::Struct && type.member_types.empty(); } void CompilerGLSL::emit_struct(SPIRType &type) { // Struct types can be stamped out multiple times // with just different offsets, matrix layouts, etc ... // Type-punning with these types is legal, which complicates things // when we are storing struct and array types in an SSBO for example. // If the type master is packed however, we can no longer assume that the struct declaration will be redundant. if (type.type_alias != TypeID(0) && !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) return; add_resource_name(type.self); auto name = type_to_glsl(type); statement(!backend.explicit_struct_type ? "struct " : "", name); begin_scope(); type.member_name_cache.clear(); uint32_t i = 0; bool emitted = false; for (auto &member : type.member_types) { add_member_name(type, i); emit_struct_member(type, member, i); i++; emitted = true; } // Don't declare empty structs in GLSL, this is not allowed. if (type_is_empty(type) && !backend.supports_empty_struct) { statement("int empty_struct_member;"); emitted = true; } if (has_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget)) emit_struct_padding_target(type); end_scope_decl(); if (emitted) statement(""); } string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags) { string res; //if (flags & (1ull << DecorationSmooth)) // res += "smooth "; if (flags.get(DecorationFlat)) res += "flat "; if (flags.get(DecorationNoPerspective)) { if (options.es) { if (options.version < 300) SPIRV_CROSS_THROW("noperspective requires ESSL 300."); require_extension_internal("GL_NV_shader_noperspective_interpolation"); } else if (is_legacy_desktop()) require_extension_internal("GL_EXT_gpu_shader4"); res += "noperspective "; } if (flags.get(DecorationCentroid)) res += "centroid "; if (flags.get(DecorationPatch)) res += "patch "; if (flags.get(DecorationSample)) { if (options.es) { if (options.version < 300) SPIRV_CROSS_THROW("sample requires ESSL 300."); else if (options.version < 320) require_extension_internal("GL_OES_shader_multisample_interpolation"); } res += "sample "; } if (flags.get(DecorationInvariant) && (options.es || options.version >= 120)) res += "invariant "; if (flags.get(DecorationPerPrimitiveEXT)) res += "perprimitiveEXT "; if (flags.get(DecorationExplicitInterpAMD)) { require_extension_internal("GL_AMD_shader_explicit_vertex_parameter"); res += "__explicitInterpAMD "; } if (flags.get(DecorationPerVertexKHR)) { if (options.es && options.version < 320) SPIRV_CROSS_THROW("pervertexEXT requires ESSL 320."); else if (!options.es && options.version < 450) SPIRV_CROSS_THROW("pervertexEXT requires GLSL 450."); if (barycentric_is_nv) { require_extension_internal("GL_NV_fragment_shader_barycentric"); res += "pervertexNV "; } else { require_extension_internal("GL_EXT_fragment_shader_barycentric"); res += "pervertexEXT "; } } return res; } string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index) { if (is_legacy()) return ""; bool is_block = has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock); if (!is_block) return ""; auto &memb = ir.meta[type.self].members; if (index >= memb.size()) return ""; auto &dec = memb[index]; SmallVector attr; if (has_member_decoration(type.self, index, DecorationPassthroughNV)) attr.push_back("passthrough"); // We can only apply layouts on members in block interfaces. // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly. // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL. // // We would like to go from (SPIR-V style): // // struct Foo { layout(row_major) mat4 matrix; }; // buffer UBO { Foo foo; }; // // to // // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations. // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level. auto flags = combined_decoration_for_member(type, index); if (flags.get(DecorationRowMajor)) attr.push_back("row_major"); // We don't emit any global layouts, so column_major is default. //if (flags & (1ull << DecorationColMajor)) // attr.push_back("column_major"); if (dec.decoration_flags.get(DecorationLocation) && can_use_io_location(type.storage, true)) attr.push_back(join("location = ", dec.location)); // Can only declare component if we can declare location. if (dec.decoration_flags.get(DecorationComponent) && can_use_io_location(type.storage, true)) { if (!options.es) { if (options.version < 440 && options.version >= 140) require_extension_internal("GL_ARB_enhanced_layouts"); else if (options.version < 140) SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40."); attr.push_back(join("component = ", dec.component)); } else SPIRV_CROSS_THROW("Component decoration is not supported in ES targets."); } // SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers. // This is only done selectively in GLSL as needed. if (has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) && dec.decoration_flags.get(DecorationOffset)) attr.push_back(join("offset = ", dec.offset)); else if (type.storage == StorageClassOutput && dec.decoration_flags.get(DecorationOffset)) attr.push_back(join("xfb_offset = ", dec.offset)); if (attr.empty()) return ""; string res = "layout("; res += merge(attr); res += ") "; return res; } const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format) { if (options.es && is_desktop_only_format(format)) SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile."); switch (format) { case ImageFormatRgba32f: return "rgba32f"; case ImageFormatRgba16f: return "rgba16f"; case ImageFormatR32f: return "r32f"; case ImageFormatRgba8: return "rgba8"; case ImageFormatRgba8Snorm: return "rgba8_snorm"; case ImageFormatRg32f: return "rg32f"; case ImageFormatRg16f: return "rg16f"; case ImageFormatRgba32i: return "rgba32i"; case ImageFormatRgba16i: return "rgba16i"; case ImageFormatR32i: return "r32i"; case ImageFormatRgba8i: return "rgba8i"; case ImageFormatRg32i: return "rg32i"; case ImageFormatRg16i: return "rg16i"; case ImageFormatRgba32ui: return "rgba32ui"; case ImageFormatRgba16ui: return "rgba16ui"; case ImageFormatR32ui: return "r32ui"; case ImageFormatRgba8ui: return "rgba8ui"; case ImageFormatRg32ui: return "rg32ui"; case ImageFormatRg16ui: return "rg16ui"; case ImageFormatR11fG11fB10f: return "r11f_g11f_b10f"; case ImageFormatR16f: return "r16f"; case ImageFormatRgb10A2: return "rgb10_a2"; case ImageFormatR8: return "r8"; case ImageFormatRg8: return "rg8"; case ImageFormatR16: return "r16"; case ImageFormatRg16: return "rg16"; case ImageFormatRgba16: return "rgba16"; case ImageFormatR16Snorm: return "r16_snorm"; case ImageFormatRg16Snorm: return "rg16_snorm"; case ImageFormatRgba16Snorm: return "rgba16_snorm"; case ImageFormatR8Snorm: return "r8_snorm"; case ImageFormatRg8Snorm: return "rg8_snorm"; case ImageFormatR8ui: return "r8ui"; case ImageFormatRg8ui: return "rg8ui"; case ImageFormatR16ui: return "r16ui"; case ImageFormatRgb10a2ui: return "rgb10_a2ui"; case ImageFormatR8i: return "r8i"; case ImageFormatRg8i: return "rg8i"; case ImageFormatR16i: return "r16i"; default: case ImageFormatUnknown: return nullptr; } } uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard) { switch (type.basetype) { case SPIRType::Double: case SPIRType::Int64: case SPIRType::UInt64: return 8; case SPIRType::Float: case SPIRType::Int: case SPIRType::UInt: return 4; case SPIRType::Half: case SPIRType::Short: case SPIRType::UShort: return 2; case SPIRType::SByte: case SPIRType::UByte: return 1; default: SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size."); } } uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing) { // If using PhysicalStorageBufferEXT storage class, this is a pointer, // and is 64-bit. if (type_is_top_level_physical_pointer(type)) { if (!type.pointer) SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers."); if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) { if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type)) return 16; else return 8; } else SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT."); } else if (type_is_top_level_array(type)) { uint32_t minimum_alignment = 1; if (packing_is_vec4_padded(packing)) minimum_alignment = 16; auto *tmp = &get(type.parent_type); while (!tmp->array.empty()) tmp = &get(tmp->parent_type); // Get the alignment of the base type, then maybe round up. return max(minimum_alignment, type_to_packed_alignment(*tmp, flags, packing)); } if (type.basetype == SPIRType::Struct) { // Rule 9. Structs alignments are maximum alignment of its members. uint32_t alignment = 1; for (uint32_t i = 0; i < type.member_types.size(); i++) { auto member_flags = ir.meta[type.self].members[i].decoration_flags; alignment = max(alignment, type_to_packed_alignment(get(type.member_types[i]), member_flags, packing)); } // In std140, struct alignment is rounded up to 16. if (packing_is_vec4_padded(packing)) alignment = max(alignment, 16u); return alignment; } else { const uint32_t base_alignment = type_to_packed_base_size(type, packing); // Alignment requirement for scalar block layout is always the alignment for the most basic component. if (packing_is_scalar(packing)) return base_alignment; // Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle // a vec4, this is handled outside since that part knows our current offset. if (type.columns == 1 && packing_is_hlsl(packing)) return base_alignment; // From 7.6.2.2 in GL 4.5 core spec. // Rule 1 if (type.vecsize == 1 && type.columns == 1) return base_alignment; // Rule 2 if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1) return type.vecsize * base_alignment; // Rule 3 if (type.vecsize == 3 && type.columns == 1) return 4 * base_alignment; // Rule 4 implied. Alignment does not change in std430. // Rule 5. Column-major matrices are stored as arrays of // vectors. if (flags.get(DecorationColMajor) && type.columns > 1) { if (packing_is_vec4_padded(packing)) return 4 * base_alignment; else if (type.vecsize == 3) return 4 * base_alignment; else return type.vecsize * base_alignment; } // Rule 6 implied. // Rule 7. if (flags.get(DecorationRowMajor) && type.vecsize > 1) { if (packing_is_vec4_padded(packing)) return 4 * base_alignment; else if (type.columns == 3) return 4 * base_alignment; else return type.columns * base_alignment; } // Rule 8 implied. } SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?"); } uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing) { // Array stride is equal to aligned size of the underlying type. uint32_t parent = type.parent_type; assert(parent); auto &tmp = get(parent); uint32_t size = type_to_packed_size(tmp, flags, packing); uint32_t alignment = type_to_packed_alignment(type, flags, packing); return (size + alignment - 1) & ~(alignment - 1); } uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing) { // If using PhysicalStorageBufferEXT storage class, this is a pointer, // and is 64-bit. if (type_is_top_level_physical_pointer(type)) { if (!type.pointer) SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers."); if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) return 8; else SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT."); } else if (type_is_top_level_array(type)) { uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing); // For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size, // so that it is possible to pack other vectors into the last element. if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct) packed_size -= (4 - type.vecsize) * (type.width / 8); return packed_size; } uint32_t size = 0; if (type.basetype == SPIRType::Struct) { uint32_t pad_alignment = 1; for (uint32_t i = 0; i < type.member_types.size(); i++) { auto member_flags = ir.meta[type.self].members[i].decoration_flags; auto &member_type = get(type.member_types[i]); uint32_t packed_alignment = type_to_packed_alignment(member_type, member_flags, packing); uint32_t alignment = max(packed_alignment, pad_alignment); // The next member following a struct member is aligned to the base alignment of the struct that came before. // GL 4.5 spec, 7.6.2.2. if (member_type.basetype == SPIRType::Struct) pad_alignment = packed_alignment; else pad_alignment = 1; size = (size + alignment - 1) & ~(alignment - 1); size += type_to_packed_size(member_type, member_flags, packing); } } else { const uint32_t base_alignment = type_to_packed_base_size(type, packing); if (packing_is_scalar(packing)) { size = type.vecsize * type.columns * base_alignment; } else { if (type.columns == 1) size = type.vecsize * base_alignment; if (flags.get(DecorationColMajor) && type.columns > 1) { if (packing_is_vec4_padded(packing)) size = type.columns * 4 * base_alignment; else if (type.vecsize == 3) size = type.columns * 4 * base_alignment; else size = type.columns * type.vecsize * base_alignment; } if (flags.get(DecorationRowMajor) && type.vecsize > 1) { if (packing_is_vec4_padded(packing)) size = type.vecsize * 4 * base_alignment; else if (type.columns == 3) size = type.vecsize * 4 * base_alignment; else size = type.vecsize * type.columns * base_alignment; } // For matrices in HLSL, the last element has a size which depends on its vector size, // so that it is possible to pack other vectors into the last element. if (packing_is_hlsl(packing) && type.columns > 1) size -= (4 - type.vecsize) * (type.width / 8); } } return size; } bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, uint32_t *failed_validation_index, uint32_t start_offset, uint32_t end_offset) { // This is very tricky and error prone, but try to be exhaustive and correct here. // SPIR-V doesn't directly say if we're using std430 or std140. // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters), // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information. // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing). // // It is almost certain that we're using std430, but it gets tricky with arrays in particular. // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430. // // The only two differences between std140 and std430 are related to padding alignment/array stride // in arrays and structs. In std140 they take minimum vec4 alignment. // std430 only removes the vec4 requirement. uint32_t offset = 0; uint32_t pad_alignment = 1; bool is_top_level_block = has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock); for (uint32_t i = 0; i < type.member_types.size(); i++) { auto &memb_type = get(type.member_types[i]); auto member_flags = ir.meta[type.self].members[i].decoration_flags; // Verify alignment rules. uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing); // This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g: // layout(constant_id = 0) const int s = 10; // const int S = s + 5; // SpecConstantOp // buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here, // we would need full implementation of compile-time constant folding. :( // If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant // for our analysis (e.g. unsized arrays). // This lets us simply ignore that there are spec constant op sized arrays in our buffers. // Querying size of this member will fail, so just don't call it unless we have to. // // This is likely "best effort" we can support without going into unacceptably complicated workarounds. bool member_can_be_unsized = is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty(); uint32_t packed_size = 0; if (!member_can_be_unsized || packing_is_hlsl(packing)) packed_size = type_to_packed_size(memb_type, member_flags, packing); // We only need to care about this if we have non-array types which can straddle the vec4 boundary. uint32_t actual_offset = type_struct_member_offset(type, i); if (packing_is_hlsl(packing)) { // If a member straddles across a vec4 boundary, alignment is actually vec4. uint32_t begin_word = actual_offset / 16; uint32_t end_word = (actual_offset + packed_size - 1) / 16; if (begin_word != end_word) packed_alignment = max(packed_alignment, 16u); } // Field is not in the specified range anymore and we can ignore any further fields. if (actual_offset >= end_offset) break; uint32_t alignment = max(packed_alignment, pad_alignment); offset = (offset + alignment - 1) & ~(alignment - 1); // The next member following a struct member is aligned to the base alignment of the struct that came before. // GL 4.5 spec, 7.6.2.2. if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer) pad_alignment = packed_alignment; else pad_alignment = 1; // Only care about packing if we are in the given range if (actual_offset >= start_offset) { // We only care about offsets in std140, std430, etc ... // For EnhancedLayout variants, we have the flexibility to choose our own offsets. if (!packing_has_flexible_offset(packing)) { if (actual_offset != offset) // This cannot be the packing we're looking for. { if (failed_validation_index) *failed_validation_index = i; return false; } } else if ((actual_offset & (alignment - 1)) != 0) { // We still need to verify that alignment rules are observed, even if we have explicit offset. if (failed_validation_index) *failed_validation_index = i; return false; } // Verify array stride rules. if (type_is_top_level_array(memb_type) && type_to_packed_array_stride(memb_type, member_flags, packing) != type_struct_member_array_stride(type, i)) { if (failed_validation_index) *failed_validation_index = i; return false; } // Verify that sub-structs also follow packing rules. // We cannot use enhanced layouts on substructs, so they better be up to spec. auto substruct_packing = packing_to_substruct_packing(packing); if (!memb_type.pointer && !memb_type.member_types.empty() && !buffer_is_packing_standard(memb_type, substruct_packing)) { if (failed_validation_index) *failed_validation_index = i; return false; } } // Bump size. offset = actual_offset + packed_size; } return true; } bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block) { // Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL. // Be very explicit here about how to solve the issue. if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) || (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput)) { uint32_t minimum_desktop_version = block ? 440 : 410; // ARB_enhanced_layouts vs ARB_separate_shader_objects ... if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects) return false; else if (options.es && options.version < 310) return false; } if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) || (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput)) { if (options.es && options.version < 300) return false; else if (!options.es && options.version < 330) return false; } if (storage == StorageClassUniform || storage == StorageClassUniformConstant || storage == StorageClassPushConstant) { if (options.es && options.version < 310) return false; else if (!options.es && options.version < 430) return false; } return true; } string CompilerGLSL::layout_for_variable(const SPIRVariable &var) { // FIXME: Come up with a better solution for when to disable layouts. // Having layouts depend on extensions as well as which types // of layouts are used. For now, the simple solution is to just disable // layouts for legacy versions. if (is_legacy()) return ""; if (subpass_input_is_framebuffer_fetch(var.self)) return ""; SmallVector attr; auto &type = get(var.basetype); auto &flags = get_decoration_bitset(var.self); auto &typeflags = get_decoration_bitset(type.self); if (flags.get(DecorationPassthroughNV)) attr.push_back("passthrough"); if (options.vulkan_semantics && var.storage == StorageClassPushConstant) attr.push_back("push_constant"); else if (var.storage == StorageClassShaderRecordBufferKHR) attr.push_back(ray_tracing_is_khr ? "shaderRecordEXT" : "shaderRecordNV"); if (flags.get(DecorationRowMajor)) attr.push_back("row_major"); if (flags.get(DecorationColMajor)) attr.push_back("column_major"); if (options.vulkan_semantics) { if (flags.get(DecorationInputAttachmentIndex)) attr.push_back(join("input_attachment_index = ", get_decoration(var.self, DecorationInputAttachmentIndex))); } bool is_block = has_decoration(type.self, DecorationBlock); if (flags.get(DecorationLocation) && can_use_io_location(var.storage, is_block)) { Bitset combined_decoration; for (uint32_t i = 0; i < ir.meta[type.self].members.size(); i++) combined_decoration.merge_or(combined_decoration_for_member(type, i)); // If our members have location decorations, we don't need to // emit location decorations at the top as well (looks weird). if (!combined_decoration.get(DecorationLocation)) attr.push_back(join("location = ", get_decoration(var.self, DecorationLocation))); } if (get_execution_model() == ExecutionModelFragment && var.storage == StorageClassOutput && location_is_non_coherent_framebuffer_fetch(get_decoration(var.self, DecorationLocation))) { attr.push_back("noncoherent"); } // Transform feedback bool uses_enhanced_layouts = false; if (is_block && var.storage == StorageClassOutput) { // For blocks, there is a restriction where xfb_stride/xfb_buffer must only be declared on the block itself, // since all members must match the same xfb_buffer. The only thing we will declare for members of the block // is the xfb_offset. uint32_t member_count = uint32_t(type.member_types.size()); bool have_xfb_buffer_stride = false; bool have_any_xfb_offset = false; bool have_geom_stream = false; uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0; if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride)) { have_xfb_buffer_stride = true; xfb_buffer = get_decoration(var.self, DecorationXfbBuffer); xfb_stride = get_decoration(var.self, DecorationXfbStride); } if (flags.get(DecorationStream)) { have_geom_stream = true; geom_stream = get_decoration(var.self, DecorationStream); } // Verify that none of the members violate our assumption. for (uint32_t i = 0; i < member_count; i++) { if (has_member_decoration(type.self, i, DecorationStream)) { uint32_t member_geom_stream = get_member_decoration(type.self, i, DecorationStream); if (have_geom_stream && member_geom_stream != geom_stream) SPIRV_CROSS_THROW("IO block member Stream mismatch."); have_geom_stream = true; geom_stream = member_geom_stream; } // Only members with an Offset decoration participate in XFB. if (!has_member_decoration(type.self, i, DecorationOffset)) continue; have_any_xfb_offset = true; if (has_member_decoration(type.self, i, DecorationXfbBuffer)) { uint32_t buffer_index = get_member_decoration(type.self, i, DecorationXfbBuffer); if (have_xfb_buffer_stride && buffer_index != xfb_buffer) SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); have_xfb_buffer_stride = true; xfb_buffer = buffer_index; } if (has_member_decoration(type.self, i, DecorationXfbStride)) { uint32_t stride = get_member_decoration(type.self, i, DecorationXfbStride); if (have_xfb_buffer_stride && stride != xfb_stride) SPIRV_CROSS_THROW("IO block member XfbStride mismatch."); have_xfb_buffer_stride = true; xfb_stride = stride; } } if (have_xfb_buffer_stride && have_any_xfb_offset) { attr.push_back(join("xfb_buffer = ", xfb_buffer)); attr.push_back(join("xfb_stride = ", xfb_stride)); uses_enhanced_layouts = true; } if (have_geom_stream) { if (get_execution_model() != ExecutionModelGeometry) SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders."); if (options.es) SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL."); if (options.version < 400) require_extension_internal("GL_ARB_transform_feedback3"); attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream))); } } else if (var.storage == StorageClassOutput) { if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride) && flags.get(DecorationOffset)) { // XFB for standalone variables, we can emit all decorations. attr.push_back(join("xfb_buffer = ", get_decoration(var.self, DecorationXfbBuffer))); attr.push_back(join("xfb_stride = ", get_decoration(var.self, DecorationXfbStride))); attr.push_back(join("xfb_offset = ", get_decoration(var.self, DecorationOffset))); uses_enhanced_layouts = true; } if (flags.get(DecorationStream)) { if (get_execution_model() != ExecutionModelGeometry) SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders."); if (options.es) SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL."); if (options.version < 400) require_extension_internal("GL_ARB_transform_feedback3"); attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream))); } } // Can only declare Component if we can declare location. if (flags.get(DecorationComponent) && can_use_io_location(var.storage, is_block)) { uses_enhanced_layouts = true; attr.push_back(join("component = ", get_decoration(var.self, DecorationComponent))); } if (uses_enhanced_layouts) { if (!options.es) { if (options.version < 440 && options.version >= 140) require_extension_internal("GL_ARB_enhanced_layouts"); else if (options.version < 140) SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in targets below GLSL 1.40."); if (!options.es && options.version < 440) require_extension_internal("GL_ARB_enhanced_layouts"); } else if (options.es) SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in ESSL."); } if (flags.get(DecorationIndex)) attr.push_back(join("index = ", get_decoration(var.self, DecorationIndex))); // Do not emit set = decoration in regular GLSL output, but // we need to preserve it in Vulkan GLSL mode. if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferKHR) { if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics) attr.push_back(join("set = ", get_decoration(var.self, DecorationDescriptorSet))); } bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant; bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR || (var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock)); bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer; bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock); // GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ... bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140); // pretend no UBOs when options say so if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms) can_use_buffer_blocks = false; bool can_use_binding; if (options.es) can_use_binding = options.version >= 310; else can_use_binding = options.enable_420pack_extension || (options.version >= 420); // Make sure we don't emit binding layout for a classic uniform on GLSL 1.30. if (!can_use_buffer_blocks && var.storage == StorageClassUniform) can_use_binding = false; if (var.storage == StorageClassShaderRecordBufferKHR) can_use_binding = false; if (can_use_binding && flags.get(DecorationBinding)) attr.push_back(join("binding = ", get_decoration(var.self, DecorationBinding))); if (var.storage != StorageClassOutput && flags.get(DecorationOffset)) attr.push_back(join("offset = ", get_decoration(var.self, DecorationOffset))); // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430. // If SPIR-V does not comply with either layout, we cannot really work around it. if (can_use_buffer_blocks && (ubo_block || emulated_ubo)) { attr.push_back(buffer_to_packing_standard(type, false)); } else if (can_use_buffer_blocks && (push_constant_block || ssbo_block)) { attr.push_back(buffer_to_packing_standard(type, true)); } // For images, the type itself adds a layout qualifer. // Only emit the format for storage images. if (type.basetype == SPIRType::Image && type.image.sampled == 2) { const char *fmt = format_to_glsl(type.image.format); if (fmt) attr.push_back(fmt); } if (attr.empty()) return ""; string res = "layout("; res += merge(attr); res += ") "; return res; } string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout) { if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430)) return "std430"; else if (buffer_is_packing_standard(type, BufferPackingStd140)) return "std140"; else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalar)) { require_extension_internal("GL_EXT_scalar_block_layout"); return "scalar"; } else if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout)) { if (options.es && !options.vulkan_semantics) SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do " "not support GL_ARB_enhanced_layouts."); if (!options.es && !options.vulkan_semantics && options.version < 440) require_extension_internal("GL_ARB_enhanced_layouts"); set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); return "std430"; } else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout)) { // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference, // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout. // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there. if (options.es && !options.vulkan_semantics) SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do " "not support GL_ARB_enhanced_layouts."); if (!options.es && !options.vulkan_semantics && options.version < 440) require_extension_internal("GL_ARB_enhanced_layouts"); set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); return "std140"; } else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout)) { set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); require_extension_internal("GL_EXT_scalar_block_layout"); return "scalar"; } else if (!support_std430_without_scalar_layout && options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingStd430)) { // UBOs can support std430 with GL_EXT_scalar_block_layout. require_extension_internal("GL_EXT_scalar_block_layout"); return "std430"; } else if (!support_std430_without_scalar_layout && options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout)) { // UBOs can support std430 with GL_EXT_scalar_block_layout. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); require_extension_internal("GL_EXT_scalar_block_layout"); return "std430"; } else { SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced " "layouts. You can try flattening this block to support a more flexible layout."); } } void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var) { if (flattened_buffer_blocks.count(var.self)) emit_buffer_block_flattened(var); else if (options.vulkan_semantics) emit_push_constant_block_vulkan(var); else if (options.emit_push_constant_as_uniform_buffer) emit_buffer_block_native(var); else emit_push_constant_block_glsl(var); } void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var) { emit_buffer_block(var); } void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var) { // OpenGL has no concept of push constant blocks, implement it as a uniform struct. auto &type = get(var.basetype); unset_decoration(var.self, DecorationBinding); unset_decoration(var.self, DecorationDescriptorSet); #if 0 if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet))) SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. " "Remap to location with reflection API first or disable these decorations."); #endif // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily. // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed. bool block_flag = has_decoration(type.self, DecorationBlock); unset_decoration(type.self, DecorationBlock); emit_struct(type); if (block_flag) set_decoration(type.self, DecorationBlock); emit_uniform(var); statement(""); } void CompilerGLSL::emit_buffer_block(const SPIRVariable &var) { auto &type = get(var.basetype); bool ubo_block = var.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock); if (flattened_buffer_blocks.count(var.self)) emit_buffer_block_flattened(var); else if (is_legacy() || (!options.es && options.version == 130) || (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)) emit_buffer_block_legacy(var); else emit_buffer_block_native(var); } void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var) { auto &type = get(var.basetype); bool ssbo = var.storage == StorageClassStorageBuffer || ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); if (ssbo) SPIRV_CROSS_THROW("SSBOs not supported in legacy targets."); // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily. // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed. auto &block_flags = ir.meta[type.self].decoration.decoration_flags; bool block_flag = block_flags.get(DecorationBlock); block_flags.clear(DecorationBlock); emit_struct(type); if (block_flag) block_flags.set(DecorationBlock); emit_uniform(var); statement(""); } void CompilerGLSL::emit_buffer_reference_block(uint32_t type_id, bool forward_declaration) { auto &type = get(type_id); string buffer_name; if (forward_declaration) { // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ... // Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration. // The names must match up. buffer_name = to_name(type.self, false); // Shaders never use the block by interface name, so we don't // have to track this other than updating name caches. // If we have a collision for any reason, just fallback immediately. if (ir.meta[type.self].decoration.alias.empty() || block_ssbo_names.find(buffer_name) != end(block_ssbo_names) || resource_names.find(buffer_name) != end(resource_names)) { buffer_name = join("_", type.self); } // Make sure we get something unique for both global name scope and block name scope. // See GLSL 4.5 spec: section 4.3.9 for details. add_variable(block_ssbo_names, resource_names, buffer_name); // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. // This cannot conflict with anything else, so we're safe now. // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope. if (buffer_name.empty()) buffer_name = join("_", type.self); block_names.insert(buffer_name); block_ssbo_names.insert(buffer_name); // Ensure we emit the correct name when emitting non-forward pointer type. ir.meta[type.self].decoration.alias = buffer_name; } else if (type.basetype != SPIRType::Struct) buffer_name = type_to_glsl(type); else buffer_name = to_name(type.self, false); if (!forward_declaration) { auto itr = physical_storage_type_to_alignment.find(type_id); uint32_t alignment = 0; if (itr != physical_storage_type_to_alignment.end()) alignment = itr->second.alignment; if (type.basetype == SPIRType::Struct) { SmallVector attributes; attributes.push_back("buffer_reference"); if (alignment) attributes.push_back(join("buffer_reference_align = ", alignment)); attributes.push_back(buffer_to_packing_standard(type, true)); auto flags = ir.get_buffer_block_type_flags(type); string decorations; if (flags.get(DecorationRestrict)) decorations += " restrict"; if (flags.get(DecorationCoherent)) decorations += " coherent"; if (flags.get(DecorationNonReadable)) decorations += " writeonly"; if (flags.get(DecorationNonWritable)) decorations += " readonly"; statement("layout(", merge(attributes), ")", decorations, " buffer ", buffer_name); } else if (alignment) statement("layout(buffer_reference, buffer_reference_align = ", alignment, ") buffer ", buffer_name); else statement("layout(buffer_reference) buffer ", buffer_name); begin_scope(); if (type.basetype == SPIRType::Struct) { type.member_name_cache.clear(); uint32_t i = 0; for (auto &member : type.member_types) { add_member_name(type, i); emit_struct_member(type, member, i); i++; } } else { auto &pointee_type = get_pointee_type(type); statement(type_to_glsl(pointee_type), " value", type_to_array_glsl(pointee_type), ";"); } end_scope_decl(); statement(""); } else { statement("layout(buffer_reference) buffer ", buffer_name, ";"); } } void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var) { auto &type = get(var.basetype); Bitset flags = ir.get_buffer_block_flags(var); bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR || ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); bool is_restrict = ssbo && flags.get(DecorationRestrict); bool is_writeonly = ssbo && flags.get(DecorationNonReadable); bool is_readonly = ssbo && flags.get(DecorationNonWritable); bool is_coherent = ssbo && flags.get(DecorationCoherent); // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ... auto buffer_name = to_name(type.self, false); auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names; // Shaders never use the block by interface name, so we don't // have to track this other than updating name caches. // If we have a collision for any reason, just fallback immediately. if (ir.meta[type.self].decoration.alias.empty() || block_namespace.find(buffer_name) != end(block_namespace) || resource_names.find(buffer_name) != end(resource_names)) { buffer_name = get_block_fallback_name(var.self); } // Make sure we get something unique for both global name scope and block name scope. // See GLSL 4.5 spec: section 4.3.9 for details. add_variable(block_namespace, resource_names, buffer_name); // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. // This cannot conflict with anything else, so we're safe now. // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope. if (buffer_name.empty()) buffer_name = join("_", get(var.basetype).self, "_", var.self); block_names.insert(buffer_name); block_namespace.insert(buffer_name); // Save for post-reflection later. declared_block_names[var.self] = buffer_name; statement(layout_for_variable(var), is_coherent ? "coherent " : "", is_restrict ? "restrict " : "", is_writeonly ? "writeonly " : "", is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ", buffer_name); begin_scope(); type.member_name_cache.clear(); uint32_t i = 0; for (auto &member : type.member_types) { add_member_name(type, i); emit_struct_member(type, member, i); i++; } // var.self can be used as a backup name for the block name, // so we need to make sure we don't disturb the name here on a recompile. // It will need to be reset if we have to recompile. preserve_alias_on_reset(var.self); add_resource_name(var.self); end_scope_decl(to_name(var.self) + type_to_array_glsl(type)); statement(""); } void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var) { auto &type = get(var.basetype); // Block names should never alias. auto buffer_name = to_name(type.self, false); size_t buffer_size = (get_declared_struct_size(type) + 15) / 16; SPIRType::BaseType basic_type; if (get_common_basic_type(type, basic_type)) { SPIRType tmp; tmp.basetype = basic_type; tmp.vecsize = 4; if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt) SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint."); auto flags = ir.get_buffer_block_flags(var); statement("uniform ", flags_to_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[", buffer_size, "];"); } else SPIRV_CROSS_THROW("All basic types in a flattened block must be the same."); } const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var) { auto &execution = get_entry_point(); if (subpass_input_is_framebuffer_fetch(var.self)) return ""; if (var.storage == StorageClassInput || var.storage == StorageClassOutput) { if (is_legacy() && execution.model == ExecutionModelVertex) return var.storage == StorageClassInput ? "attribute " : "varying "; else if (is_legacy() && execution.model == ExecutionModelFragment) return "varying "; // Fragment outputs are renamed so they never hit this case. else if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput) { uint32_t loc = get_decoration(var.self, DecorationLocation); bool is_inout = location_is_framebuffer_fetch(loc); if (is_inout) return "inout "; else return "out "; } else return var.storage == StorageClassInput ? "in " : "out "; } else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform || var.storage == StorageClassPushConstant) { return "uniform "; } else if (var.storage == StorageClassRayPayloadKHR) { return ray_tracing_is_khr ? "rayPayloadEXT " : "rayPayloadNV "; } else if (var.storage == StorageClassIncomingRayPayloadKHR) { return ray_tracing_is_khr ? "rayPayloadInEXT " : "rayPayloadInNV "; } else if (var.storage == StorageClassHitAttributeKHR) { return ray_tracing_is_khr ? "hitAttributeEXT " : "hitAttributeNV "; } else if (var.storage == StorageClassCallableDataKHR) { return ray_tracing_is_khr ? "callableDataEXT " : "callableDataNV "; } else if (var.storage == StorageClassIncomingCallableDataKHR) { return ray_tracing_is_khr ? "callableDataInEXT " : "callableDataInNV "; } return ""; } void CompilerGLSL::emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual, const SmallVector &indices) { uint32_t member_type_id = type.self; const SPIRType *member_type = &type; const SPIRType *parent_type = nullptr; auto flattened_name = basename; for (auto &index : indices) { flattened_name += "_"; flattened_name += to_member_name(*member_type, index); parent_type = member_type; member_type_id = member_type->member_types[index]; member_type = &get(member_type_id); } assert(member_type->basetype != SPIRType::Struct); // We're overriding struct member names, so ensure we do so on the primary type. if (parent_type->type_alias) parent_type = &get(parent_type->type_alias); // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row, // which is not allowed. ParsedIR::sanitize_underscores(flattened_name); uint32_t last_index = indices.back(); // Pass in the varying qualifier here so it will appear in the correct declaration order. // Replace member name while emitting it so it encodes both struct name and member name. auto backup_name = get_member_name(parent_type->self, last_index); auto member_name = to_member_name(*parent_type, last_index); set_member_name(parent_type->self, last_index, flattened_name); emit_struct_member(*parent_type, member_type_id, last_index, qual); // Restore member name. set_member_name(parent_type->self, last_index, member_name); } void CompilerGLSL::emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual, const SmallVector &indices) { auto sub_indices = indices; sub_indices.push_back(0); const SPIRType *member_type = &type; for (auto &index : indices) member_type = &get(member_type->member_types[index]); assert(member_type->basetype == SPIRType::Struct); if (!member_type->array.empty()) SPIRV_CROSS_THROW("Cannot flatten array of structs in I/O blocks."); for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++) { sub_indices.back() = i; if (get(member_type->member_types[i]).basetype == SPIRType::Struct) emit_flattened_io_block_struct(basename, type, qual, sub_indices); else emit_flattened_io_block_member(basename, type, qual, sub_indices); } } void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual) { auto &var_type = get(var.basetype); if (!var_type.array.empty()) SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings."); // Emit flattened types based on the type alias. Normally, we are never supposed to emit // struct declarations for aliased types. auto &type = var_type.type_alias ? get(var_type.type_alias) : var_type; auto old_flags = ir.meta[type.self].decoration.decoration_flags; // Emit the members as if they are part of a block to get all qualifiers. ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock); type.member_name_cache.clear(); SmallVector member_indices; member_indices.push_back(0); auto basename = to_name(var.self); uint32_t i = 0; for (auto &member : type.member_types) { add_member_name(type, i); auto &membertype = get(member); member_indices.back() = i; if (membertype.basetype == SPIRType::Struct) emit_flattened_io_block_struct(basename, type, qual, member_indices); else emit_flattened_io_block_member(basename, type, qual, member_indices); i++; } ir.meta[type.self].decoration.decoration_flags = old_flags; // Treat this variable as fully flattened from now on. flattened_structs[var.self] = true; } void CompilerGLSL::emit_interface_block(const SPIRVariable &var) { auto &type = get(var.basetype); if (var.storage == StorageClassInput && type.basetype == SPIRType::Double && !options.es && options.version < 410) { require_extension_internal("GL_ARB_vertex_attrib_64bit"); } // Either make it plain in/out or in/out blocks depending on what shader is doing ... bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); const char *qual = to_storage_qualifiers_glsl(var); if (block) { // ESSL earlier than 310 and GLSL earlier than 150 did not support // I/O variables which are struct types. // To support this, flatten the struct into separate varyings instead. if (options.force_flattened_io_blocks || (options.es && options.version < 310) || (!options.es && options.version < 150)) { // I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320. // On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150). emit_flattened_io_block(var, qual); } else { if (options.es && options.version < 320) { // Geometry and tessellation extensions imply this extension. if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader")) require_extension_internal("GL_EXT_shader_io_blocks"); } // Workaround to make sure we can emit "patch in/out" correctly. fixup_io_block_patch_primitive_qualifiers(var); // Block names should never alias. auto block_name = to_name(type.self, false); // The namespace for I/O blocks is separate from other variables in GLSL. auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names; // Shaders never use the block by interface name, so we don't // have to track this other than updating name caches. if (block_name.empty() || block_namespace.find(block_name) != end(block_namespace)) block_name = get_fallback_name(type.self); else block_namespace.insert(block_name); // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. // This cannot conflict with anything else, so we're safe now. if (block_name.empty()) block_name = join("_", get(var.basetype).self, "_", var.self); // Instance names cannot alias block names. resource_names.insert(block_name); const char *block_qualifier; if (has_decoration(var.self, DecorationPatch)) block_qualifier = "patch "; else if (has_decoration(var.self, DecorationPerPrimitiveEXT)) block_qualifier = "perprimitiveEXT "; else block_qualifier = ""; statement(layout_for_variable(var), block_qualifier, qual, block_name); begin_scope(); type.member_name_cache.clear(); uint32_t i = 0; for (auto &member : type.member_types) { add_member_name(type, i); emit_struct_member(type, member, i); i++; } add_resource_name(var.self); end_scope_decl(join(to_name(var.self), type_to_array_glsl(type))); statement(""); } } else { // ESSL earlier than 310 and GLSL earlier than 150 did not support // I/O variables which are struct types. // To support this, flatten the struct into separate varyings instead. if (type.basetype == SPIRType::Struct && (options.force_flattened_io_blocks || (options.es && options.version < 310) || (!options.es && options.version < 150))) { emit_flattened_io_block(var, qual); } else { add_resource_name(var.self); // Legacy GLSL did not support int attributes, we automatically // declare them as float and cast them on load/store SPIRType newtype = type; if (is_legacy() && var.storage == StorageClassInput && type.basetype == SPIRType::Int) newtype.basetype = SPIRType::Float; // Tessellation control and evaluation shaders must have either // gl_MaxPatchVertices or unsized arrays for input arrays. // Opt for unsized as it's the more "correct" variant to use. if (type.storage == StorageClassInput && !type.array.empty() && !has_decoration(var.self, DecorationPatch) && (get_entry_point().model == ExecutionModelTessellationControl || get_entry_point().model == ExecutionModelTessellationEvaluation)) { newtype.array.back() = 0; newtype.array_size_literal.back() = true; } statement(layout_for_variable(var), to_qualifiers_glsl(var.self), variable_decl(newtype, to_name(var.self), var.self), ";"); } } } void CompilerGLSL::emit_uniform(const SPIRVariable &var) { auto &type = get(var.basetype); if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData) { if (!options.es && options.version < 420) require_extension_internal("GL_ARB_shader_image_load_store"); else if (options.es && options.version < 310) SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store."); } add_resource_name(var.self); statement(layout_for_variable(var), variable_decl(var), ";"); } string CompilerGLSL::constant_value_macro_name(uint32_t id) { return join("SPIRV_CROSS_CONSTANT_ID_", id); } void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant) { auto &type = get(constant.basetype); add_resource_name(constant.self); auto name = to_name(constant.self); statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";"); } int CompilerGLSL::get_constant_mapping_to_workgroup_component(const SPIRConstant &c) const { auto &entry_point = get_entry_point(); int index = -1; // Need to redirect specialization constants which are used as WorkGroupSize to the builtin, // since the spec constant declarations are never explicitly declared. if (entry_point.workgroup_size.constant == 0 && entry_point.flags.get(ExecutionModeLocalSizeId)) { if (c.self == entry_point.workgroup_size.id_x) index = 0; else if (c.self == entry_point.workgroup_size.id_y) index = 1; else if (c.self == entry_point.workgroup_size.id_z) index = 2; } return index; } void CompilerGLSL::emit_constant(const SPIRConstant &constant) { auto &type = get(constant.constant_type); SpecializationConstant wg_x, wg_y, wg_z; ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); // This specialization constant is implicitly declared by emitting layout() in; if (constant.self == workgroup_size_id) return; // These specialization constants are implicitly declared by emitting layout() in; // In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration // later can use macro overrides for work group size. bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id || ConstantID(constant.self) == wg_z.id; if (options.vulkan_semantics && is_workgroup_size_constant) { // Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout(). return; } else if (!options.vulkan_semantics && is_workgroup_size_constant && !has_decoration(constant.self, DecorationSpecId)) { // Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros. return; } add_resource_name(constant.self); auto name = to_name(constant.self); // Only scalars have constant IDs. if (has_decoration(constant.self, DecorationSpecId)) { if (options.vulkan_semantics) { statement("layout(constant_id = ", get_decoration(constant.self, DecorationSpecId), ") const ", variable_decl(type, name), " = ", constant_expression(constant), ";"); } else { const string ¯o_name = constant.specialization_constant_macro_name; statement("#ifndef ", macro_name); statement("#define ", macro_name, " ", constant_expression(constant)); statement("#endif"); // For workgroup size constants, only emit the macros. if (!is_workgroup_size_constant) statement("const ", variable_decl(type, name), " = ", macro_name, ";"); } } else { statement("const ", variable_decl(type, name), " = ", constant_expression(constant), ";"); } } void CompilerGLSL::emit_entry_point_declarations() { } void CompilerGLSL::replace_illegal_names(const unordered_set &keywords) { ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { if (is_hidden_variable(var)) return; auto *meta = ir.find_meta(var.self); if (!meta) return; auto &m = meta->decoration; if (keywords.find(m.alias) != end(keywords)) m.alias = join("_", m.alias); }); ir.for_each_typed_id([&](uint32_t, const SPIRFunction &func) { auto *meta = ir.find_meta(func.self); if (!meta) return; auto &m = meta->decoration; if (keywords.find(m.alias) != end(keywords)) m.alias = join("_", m.alias); }); ir.for_each_typed_id([&](uint32_t, const SPIRType &type) { auto *meta = ir.find_meta(type.self); if (!meta) return; auto &m = meta->decoration; if (keywords.find(m.alias) != end(keywords)) m.alias = join("_", m.alias); for (auto &memb : meta->members) if (keywords.find(memb.alias) != end(keywords)) memb.alias = join("_", memb.alias); }); } void CompilerGLSL::replace_illegal_names() { // clang-format off static const unordered_set keywords = { "abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh", "atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement", "atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor", "bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse", "ceil", "cos", "cosh", "cross", "degrees", "dFdx", "dFdxCoarse", "dFdxFine", "dFdy", "dFdyCoarse", "dFdyFine", "distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2", "faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract", "frexp", "fwidth", "fwidthCoarse", "fwidthFine", "greaterThan", "greaterThanEqual", "groupMemoryBarrier", "imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor", "imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample", "inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2", "matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared", "min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual", "outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8", "packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow", "radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step", "tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets", "textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad", "textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize", "transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16", "unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow", "active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer", "bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard", "dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4", "do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float", "for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray", "iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube", "iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect", "image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant", "isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect", "isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp", "mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump", "namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly", "resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow", "sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray", "sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer", "samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static", "struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D", "uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube", "uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray", "usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube", "usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile", "while", "writeonly", }; // clang-format on replace_illegal_names(keywords); } void CompilerGLSL::replace_fragment_output(SPIRVariable &var) { auto &m = ir.meta[var.self].decoration; uint32_t location = 0; if (m.decoration_flags.get(DecorationLocation)) location = m.location; // If our variable is arrayed, we must not emit the array part of this as the SPIR-V will // do the access chain part of this for us. auto &type = get(var.basetype); if (type.array.empty()) { // Redirect the write to a specific render target in legacy GLSL. m.alias = join("gl_FragData[", location, "]"); if (is_legacy_es() && location != 0) require_extension_internal("GL_EXT_draw_buffers"); } else if (type.array.size() == 1) { // If location is non-zero, we probably have to add an offset. // This gets really tricky since we'd have to inject an offset in the access chain. // FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now. m.alias = "gl_FragData"; if (location != 0) SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. " "This is unimplemented in SPIRV-Cross."); if (is_legacy_es()) require_extension_internal("GL_EXT_draw_buffers"); } else SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL."); var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is. } void CompilerGLSL::replace_fragment_outputs() { ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput) replace_fragment_output(var); }); } string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr) { if (out_type.vecsize == input_components) return expr; else if (input_components == 1 && !backend.can_swizzle_scalar) return join(type_to_glsl(out_type), "(", expr, ")"); else { // FIXME: This will not work with packed expressions. auto e = enclose_expression(expr) + "."; // Just clamp the swizzle index if we have more outputs than inputs. for (uint32_t c = 0; c < out_type.vecsize; c++) e += index_to_swizzle(min(c, input_components - 1)); if (backend.swizzle_is_function && out_type.vecsize > 1) e += "()"; remove_duplicate_swizzle(e); return e; } } void CompilerGLSL::emit_pls() { auto &execution = get_entry_point(); if (execution.model != ExecutionModelFragment) SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders."); if (!options.es) SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES."); if (options.version < 300) SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above."); if (!pls_inputs.empty()) { statement("__pixel_local_inEXT _PLSIn"); begin_scope(); for (auto &input : pls_inputs) statement(pls_decl(input), ";"); end_scope_decl(); statement(""); } if (!pls_outputs.empty()) { statement("__pixel_local_outEXT _PLSOut"); begin_scope(); for (auto &output : pls_outputs) statement(pls_decl(output), ";"); end_scope_decl(); statement(""); } } void CompilerGLSL::fixup_image_load_store_access() { if (!options.enable_storage_image_qualifier_deduction) return; ir.for_each_typed_id([&](uint32_t var, const SPIRVariable &) { auto &vartype = expression_type(var); if (vartype.basetype == SPIRType::Image && vartype.image.sampled == 2) { // Very old glslangValidator and HLSL compilers do not emit required qualifiers here. // Solve this by making the image access as restricted as possible and loosen up if we need to. // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing. if (!has_decoration(var, DecorationNonWritable) && !has_decoration(var, DecorationNonReadable)) { set_decoration(var, DecorationNonWritable); set_decoration(var, DecorationNonReadable); } } }); } static bool is_block_builtin(BuiltIn builtin) { return builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance || builtin == BuiltInCullDistance; } bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage) { // If the builtin block uses XFB, we need to force explicit redeclaration of the builtin block. if (storage != StorageClassOutput) return false; bool should_force = false; ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { if (should_force) return; auto &type = this->get(var.basetype); bool block = has_decoration(type.self, DecorationBlock); if (var.storage == storage && block && is_builtin_variable(var)) { uint32_t member_count = uint32_t(type.member_types.size()); for (uint32_t i = 0; i < member_count; i++) { if (has_member_decoration(type.self, i, DecorationBuiltIn) && is_block_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn))) && has_member_decoration(type.self, i, DecorationOffset)) { should_force = true; } } } else if (var.storage == storage && !block && is_builtin_variable(var)) { if (is_block_builtin(BuiltIn(get_decoration(type.self, DecorationBuiltIn))) && has_decoration(var.self, DecorationOffset)) { should_force = true; } } }); // If we're declaring clip/cull planes with control points we need to force block declaration. if ((get_execution_model() == ExecutionModelTessellationControl || get_execution_model() == ExecutionModelMeshEXT) && (clip_distance_count || cull_distance_count)) { should_force = true; } return should_force; } void CompilerGLSL::fixup_implicit_builtin_block_names(ExecutionModel model) { ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); bool block = has_decoration(type.self, DecorationBlock); if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block && is_builtin_variable(var)) { if (model != ExecutionModelMeshEXT) { // Make sure the array has a supported name in the code. if (var.storage == StorageClassOutput) set_name(var.self, "gl_out"); else if (var.storage == StorageClassInput) set_name(var.self, "gl_in"); } else { auto flags = get_buffer_block_flags(var.self); if (flags.get(DecorationPerPrimitiveEXT)) { set_name(var.self, "gl_MeshPrimitivesEXT"); set_name(type.self, "gl_MeshPerPrimitiveEXT"); } else { set_name(var.self, "gl_MeshVerticesEXT"); set_name(type.self, "gl_MeshPerVertexEXT"); } } } if (model == ExecutionModelMeshEXT && var.storage == StorageClassOutput && !block) { auto *m = ir.find_meta(var.self); if (m && m->decoration.builtin) { auto builtin_type = m->decoration.builtin_type; if (builtin_type == BuiltInPrimitivePointIndicesEXT) set_name(var.self, "gl_PrimitivePointIndicesEXT"); else if (builtin_type == BuiltInPrimitiveLineIndicesEXT) set_name(var.self, "gl_PrimitiveLineIndicesEXT"); else if (builtin_type == BuiltInPrimitiveTriangleIndicesEXT) set_name(var.self, "gl_PrimitiveTriangleIndicesEXT"); } } }); } void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model) { Bitset emitted_builtins; Bitset global_builtins; const SPIRVariable *block_var = nullptr; bool emitted_block = false; // Need to use declared size in the type. // These variables might have been declared, but not statically used, so we haven't deduced their size yet. uint32_t cull_distance_size = 0; uint32_t clip_distance_size = 0; bool have_xfb_buffer_stride = false; bool have_geom_stream = false; bool have_any_xfb_offset = false; uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0; std::unordered_map builtin_xfb_offsets; const auto builtin_is_per_vertex_set = [](BuiltIn builtin) -> bool { return builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance || builtin == BuiltInCullDistance; }; ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); bool block = has_decoration(type.self, DecorationBlock); Bitset builtins; if (var.storage == storage && block && is_builtin_variable(var)) { uint32_t index = 0; for (auto &m : ir.meta[type.self].members) { if (m.builtin && builtin_is_per_vertex_set(m.builtin_type)) { builtins.set(m.builtin_type); if (m.builtin_type == BuiltInCullDistance) cull_distance_size = to_array_size_literal(this->get(type.member_types[index])); else if (m.builtin_type == BuiltInClipDistance) clip_distance_size = to_array_size_literal(this->get(type.member_types[index])); if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationOffset)) { have_any_xfb_offset = true; builtin_xfb_offsets[m.builtin_type] = m.offset; } if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream)) { uint32_t stream = m.stream; if (have_geom_stream && geom_stream != stream) SPIRV_CROSS_THROW("IO block member Stream mismatch."); have_geom_stream = true; geom_stream = stream; } } index++; } if (storage == StorageClassOutput && has_decoration(var.self, DecorationXfbBuffer) && has_decoration(var.self, DecorationXfbStride)) { uint32_t buffer_index = get_decoration(var.self, DecorationXfbBuffer); uint32_t stride = get_decoration(var.self, DecorationXfbStride); if (have_xfb_buffer_stride && buffer_index != xfb_buffer) SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); if (have_xfb_buffer_stride && stride != xfb_stride) SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); have_xfb_buffer_stride = true; xfb_buffer = buffer_index; xfb_stride = stride; } if (storage == StorageClassOutput && has_decoration(var.self, DecorationStream)) { uint32_t stream = get_decoration(var.self, DecorationStream); if (have_geom_stream && geom_stream != stream) SPIRV_CROSS_THROW("IO block member Stream mismatch."); have_geom_stream = true; geom_stream = stream; } } else if (var.storage == storage && !block && is_builtin_variable(var)) { // While we're at it, collect all declared global builtins (HLSL mostly ...). auto &m = ir.meta[var.self].decoration; if (m.builtin && builtin_is_per_vertex_set(m.builtin_type)) { global_builtins.set(m.builtin_type); if (m.builtin_type == BuiltInCullDistance) cull_distance_size = to_array_size_literal(type); else if (m.builtin_type == BuiltInClipDistance) clip_distance_size = to_array_size_literal(type); if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationXfbStride) && m.decoration_flags.get(DecorationXfbBuffer) && m.decoration_flags.get(DecorationOffset)) { have_any_xfb_offset = true; builtin_xfb_offsets[m.builtin_type] = m.offset; uint32_t buffer_index = m.xfb_buffer; uint32_t stride = m.xfb_stride; if (have_xfb_buffer_stride && buffer_index != xfb_buffer) SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); if (have_xfb_buffer_stride && stride != xfb_stride) SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); have_xfb_buffer_stride = true; xfb_buffer = buffer_index; xfb_stride = stride; } if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream)) { uint32_t stream = get_decoration(var.self, DecorationStream); if (have_geom_stream && geom_stream != stream) SPIRV_CROSS_THROW("IO block member Stream mismatch."); have_geom_stream = true; geom_stream = stream; } } } if (builtins.empty()) return; if (emitted_block) SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block."); emitted_builtins = builtins; emitted_block = true; block_var = &var; }); global_builtins = Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) | (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance))); // Try to collect all other declared builtins. if (!emitted_block) emitted_builtins = global_builtins; // Can't declare an empty interface block. if (emitted_builtins.empty()) return; if (storage == StorageClassOutput) { SmallVector attr; if (have_xfb_buffer_stride && have_any_xfb_offset) { if (!options.es) { if (options.version < 440 && options.version >= 140) require_extension_internal("GL_ARB_enhanced_layouts"); else if (options.version < 140) SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40."); if (!options.es && options.version < 440) require_extension_internal("GL_ARB_enhanced_layouts"); } else if (options.es) SPIRV_CROSS_THROW("Need GL_ARB_enhanced_layouts for xfb_stride or xfb_buffer."); attr.push_back(join("xfb_buffer = ", xfb_buffer, ", xfb_stride = ", xfb_stride)); } if (have_geom_stream) { if (get_execution_model() != ExecutionModelGeometry) SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders."); if (options.es) SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL."); if (options.version < 400) require_extension_internal("GL_ARB_transform_feedback3"); attr.push_back(join("stream = ", geom_stream)); } if (model == ExecutionModelMeshEXT) statement("out gl_MeshPerVertexEXT"); else if (!attr.empty()) statement("layout(", merge(attr), ") out gl_PerVertex"); else statement("out gl_PerVertex"); } else { // If we have passthrough, there is no way PerVertex cannot be passthrough. if (get_entry_point().geometry_passthrough) statement("layout(passthrough) in gl_PerVertex"); else statement("in gl_PerVertex"); } begin_scope(); if (emitted_builtins.get(BuiltInPosition)) { auto itr = builtin_xfb_offsets.find(BuiltInPosition); if (itr != end(builtin_xfb_offsets)) statement("layout(xfb_offset = ", itr->second, ") vec4 gl_Position;"); else statement("vec4 gl_Position;"); } if (emitted_builtins.get(BuiltInPointSize)) { auto itr = builtin_xfb_offsets.find(BuiltInPointSize); if (itr != end(builtin_xfb_offsets)) statement("layout(xfb_offset = ", itr->second, ") float gl_PointSize;"); else statement("float gl_PointSize;"); } if (emitted_builtins.get(BuiltInClipDistance)) { auto itr = builtin_xfb_offsets.find(BuiltInClipDistance); if (itr != end(builtin_xfb_offsets)) statement("layout(xfb_offset = ", itr->second, ") float gl_ClipDistance[", clip_distance_size, "];"); else statement("float gl_ClipDistance[", clip_distance_size, "];"); } if (emitted_builtins.get(BuiltInCullDistance)) { auto itr = builtin_xfb_offsets.find(BuiltInCullDistance); if (itr != end(builtin_xfb_offsets)) statement("layout(xfb_offset = ", itr->second, ") float gl_CullDistance[", cull_distance_size, "];"); else statement("float gl_CullDistance[", cull_distance_size, "];"); } bool builtin_array = model == ExecutionModelTessellationControl || (model == ExecutionModelMeshEXT && storage == StorageClassOutput) || (model == ExecutionModelGeometry && storage == StorageClassInput) || (model == ExecutionModelTessellationEvaluation && storage == StorageClassInput); if (builtin_array) { const char *instance_name; if (model == ExecutionModelMeshEXT) instance_name = "gl_MeshVerticesEXT"; // Per primitive is never synthesized. else instance_name = storage == StorageClassInput ? "gl_in" : "gl_out"; if (model == ExecutionModelTessellationControl && storage == StorageClassOutput) end_scope_decl(join(instance_name, "[", get_entry_point().output_vertices, "]")); else end_scope_decl(join(instance_name, "[]")); } else end_scope_decl(); statement(""); } bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const { bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable; if (statically_assigned) { auto *constant = maybe_get(var.static_expression); if (constant && constant->is_used_as_lut) return true; } return false; } void CompilerGLSL::emit_resources() { auto &execution = get_entry_point(); replace_illegal_names(); // Legacy GL uses gl_FragData[], redeclare all fragment outputs // with builtins. if (execution.model == ExecutionModelFragment && is_legacy()) replace_fragment_outputs(); // Emit PLS blocks if we have such variables. if (!pls_inputs.empty() || !pls_outputs.empty()) emit_pls(); switch (execution.model) { case ExecutionModelGeometry: case ExecutionModelTessellationControl: case ExecutionModelTessellationEvaluation: case ExecutionModelMeshEXT: fixup_implicit_builtin_block_names(execution.model); break; default: break; } // Emit custom gl_PerVertex for SSO compatibility. if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment) { switch (execution.model) { case ExecutionModelGeometry: case ExecutionModelTessellationControl: case ExecutionModelTessellationEvaluation: emit_declared_builtin_block(StorageClassInput, execution.model); emit_declared_builtin_block(StorageClassOutput, execution.model); break; case ExecutionModelVertex: case ExecutionModelMeshEXT: emit_declared_builtin_block(StorageClassOutput, execution.model); break; default: break; } } else if (should_force_emit_builtin_block(StorageClassOutput)) { emit_declared_builtin_block(StorageClassOutput, execution.model); } else if (execution.geometry_passthrough) { // Need to declare gl_in with Passthrough. // If we're doing passthrough, we cannot emit an output block, so the output block test above will never pass. emit_declared_builtin_block(StorageClassInput, execution.model); } else { // Need to redeclare clip/cull distance with explicit size to use them. // SPIR-V mandates these builtins have a size declared. const char *storage = execution.model == ExecutionModelFragment ? "in" : "out"; if (clip_distance_count != 0) statement(storage, " float gl_ClipDistance[", clip_distance_count, "];"); if (cull_distance_count != 0) statement(storage, " float gl_CullDistance[", cull_distance_count, "];"); if (clip_distance_count != 0 || cull_distance_count != 0) statement(""); } if (position_invariant && (options.es || options.version >= 120)) { statement("invariant gl_Position;"); statement(""); } bool emitted = false; // If emitted Vulkan GLSL, // emit specialization constants as actual floats, // spec op expressions will redirect to the constant name. // { auto loop_lock = ir.create_loop_hard_lock(); for (auto &id_ : ir.ids_for_constant_undef_or_type) { auto &id = ir.ids[id_]; if (id.get_type() == TypeConstant) { auto &c = id.get(); bool needs_declaration = c.specialization || c.is_used_as_lut; if (needs_declaration) { if (!options.vulkan_semantics && c.specialization) { c.specialization_constant_macro_name = constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); } emit_constant(c); emitted = true; } } else if (id.get_type() == TypeConstantOp) { emit_specialization_constant_op(id.get()); emitted = true; } else if (id.get_type() == TypeType) { auto *type = &id.get(); bool is_natural_struct = type->basetype == SPIRType::Struct && type->array.empty() && !type->pointer && (!has_decoration(type->self, DecorationBlock) && !has_decoration(type->self, DecorationBufferBlock)); // Special case, ray payload and hit attribute blocks are not really blocks, just regular structs. if (type->basetype == SPIRType::Struct && type->pointer && has_decoration(type->self, DecorationBlock) && (type->storage == StorageClassRayPayloadKHR || type->storage == StorageClassIncomingRayPayloadKHR || type->storage == StorageClassHitAttributeKHR)) { type = &get(type->parent_type); is_natural_struct = true; } if (is_natural_struct) { if (emitted) statement(""); emitted = false; emit_struct(*type); } } else if (id.get_type() == TypeUndef) { auto &undef = id.get(); auto &type = this->get(undef.basetype); // OpUndef can be void for some reason ... if (type.basetype == SPIRType::Void) return; string initializer; if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) initializer = join(" = ", to_zero_initialized_expression(undef.basetype)); // FIXME: If used in a constant, we must declare it as one. statement(variable_decl(type, to_name(undef.self), undef.self), initializer, ";"); emitted = true; } } } if (emitted) statement(""); // If we needed to declare work group size late, check here. // If the work group size depends on a specialization constant, we need to declare the layout() block // after constants (and their macros) have been declared. if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics && (execution.workgroup_size.constant != 0 || execution.flags.get(ExecutionModeLocalSizeId))) { SpecializationConstant wg_x, wg_y, wg_z; get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0))) { SmallVector inputs; build_workgroup_size(inputs, wg_x, wg_y, wg_z); statement("layout(", merge(inputs), ") in;"); statement(""); } } emitted = false; if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) { for (auto type : physical_storage_non_block_pointer_types) { emit_buffer_reference_block(type, false); } // Output buffer reference blocks. // Do this in two stages, one with forward declaration, // and one without. Buffer reference blocks can reference themselves // to support things like linked lists. ir.for_each_typed_id([&](uint32_t self, SPIRType &type) { if (type.basetype == SPIRType::Struct && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) && type.storage == StorageClassPhysicalStorageBufferEXT) { emit_buffer_reference_block(self, true); } }); ir.for_each_typed_id([&](uint32_t self, SPIRType &type) { if (type.basetype == SPIRType::Struct && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) && type.storage == StorageClassPhysicalStorageBufferEXT) { emit_buffer_reference_block(self, false); } }); } // Output UBOs and SSBOs ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform || type.storage == StorageClassShaderRecordBufferKHR; bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) && has_block_flags) { emit_buffer_block(var); } }); // Output push constant blocks ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant && !is_hidden_variable(var)) { emit_push_constant_block(var); } }); bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics; // Output Uniform Constants (values, samplers, images, etc). ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); // If we're remapping separate samplers and images, only emit the combined samplers. if (skip_separate_image_sampler) { // Sampler buffers are always used without a sampler, and they will also work in regular GL. bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer; bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; bool separate_sampler = type.basetype == SPIRType::Sampler; if (!sampler_buffer && (separate_image || separate_sampler)) return; } if (var.storage != StorageClassFunction && type.pointer && (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter || type.storage == StorageClassRayPayloadKHR || type.storage == StorageClassIncomingRayPayloadKHR || type.storage == StorageClassCallableDataKHR || type.storage == StorageClassIncomingCallableDataKHR || type.storage == StorageClassHitAttributeKHR) && !is_hidden_variable(var)) { emit_uniform(var); emitted = true; } }); if (emitted) statement(""); emitted = false; bool emitted_base_instance = false; // Output in/out interfaces. ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); bool is_hidden = is_hidden_variable(var); // Unused output I/O variables might still be required to implement framebuffer fetch. if (var.storage == StorageClassOutput && !is_legacy() && location_is_framebuffer_fetch(get_decoration(var.self, DecorationLocation)) != 0) { is_hidden = false; } if (var.storage != StorageClassFunction && type.pointer && (var.storage == StorageClassInput || var.storage == StorageClassOutput) && interface_variable_exists_in_entry_point(var.self) && !is_hidden) { if (options.es && get_execution_model() == ExecutionModelVertex && var.storage == StorageClassInput && type.array.size() == 1) { SPIRV_CROSS_THROW("OpenGL ES doesn't support array input variables in vertex shader."); } emit_interface_block(var); emitted = true; } else if (is_builtin_variable(var)) { auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); // For gl_InstanceIndex emulation on GLES, the API user needs to // supply this uniform. // The draw parameter extension is soft-enabled on GL with some fallbacks. if (!options.vulkan_semantics) { if (!emitted_base_instance && ((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) || (builtin == BuiltInBaseInstance))) { statement("#ifdef GL_ARB_shader_draw_parameters"); statement("#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB"); statement("#else"); // A crude, but simple workaround which should be good enough for non-indirect draws. statement("uniform int SPIRV_Cross_BaseInstance;"); statement("#endif"); emitted = true; emitted_base_instance = true; } else if (builtin == BuiltInBaseVertex) { statement("#ifdef GL_ARB_shader_draw_parameters"); statement("#define SPIRV_Cross_BaseVertex gl_BaseVertexARB"); statement("#else"); // A crude, but simple workaround which should be good enough for non-indirect draws. statement("uniform int SPIRV_Cross_BaseVertex;"); statement("#endif"); } else if (builtin == BuiltInDrawIndex) { statement("#ifndef GL_ARB_shader_draw_parameters"); // Cannot really be worked around. statement("#error GL_ARB_shader_draw_parameters is not supported."); statement("#endif"); } } } }); // Global variables. for (auto global : global_variables) { auto &var = get(global); if (is_hidden_variable(var, true)) continue; if (var.storage != StorageClassOutput) { if (!variable_is_lut(var)) { add_resource_name(var.self); string initializer; if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate && !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var))) { initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var))); } statement(variable_decl(var), initializer, ";"); emitted = true; } } else if (var.initializer && maybe_get(var.initializer) != nullptr) { emit_output_variable_initializer(var); } } if (emitted) statement(""); } void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var) { // If a StorageClassOutput variable has an initializer, we need to initialize it in main(). auto &entry_func = this->get(ir.default_entry_point); auto &type = get(var.basetype); bool is_patch = has_decoration(var.self, DecorationPatch); bool is_block = has_decoration(type.self, DecorationBlock); bool is_control_point = get_execution_model() == ExecutionModelTessellationControl && !is_patch; if (is_block) { uint32_t member_count = uint32_t(type.member_types.size()); bool type_is_array = type.array.size() == 1; uint32_t array_size = 1; if (type_is_array) array_size = to_array_size_literal(type); uint32_t iteration_count = is_control_point ? 1 : array_size; // If the initializer is a block, we must initialize each block member one at a time. for (uint32_t i = 0; i < member_count; i++) { // These outputs might not have been properly declared, so don't initialize them in that case. if (has_member_decoration(type.self, i, DecorationBuiltIn)) { if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInCullDistance && !cull_distance_count) continue; if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInClipDistance && !clip_distance_count) continue; } // We need to build a per-member array first, essentially transposing from AoS to SoA. // This code path hits when we have an array of blocks. string lut_name; if (type_is_array) { lut_name = join("_", var.self, "_", i, "_init"); uint32_t member_type_id = get(var.basetype).member_types[i]; auto &member_type = get(member_type_id); auto array_type = member_type; array_type.parent_type = member_type_id; array_type.array.push_back(array_size); array_type.array_size_literal.push_back(true); SmallVector exprs; exprs.reserve(array_size); auto &c = get(var.initializer); for (uint32_t j = 0; j < array_size; j++) exprs.push_back(to_expression(get(c.subconstants[j]).subconstants[i])); statement("const ", type_to_glsl(array_type), " ", lut_name, type_to_array_glsl(array_type), " = ", type_to_glsl_constructor(array_type), "(", merge(exprs, ", "), ");"); } for (uint32_t j = 0; j < iteration_count; j++) { entry_func.fixup_hooks_in.push_back([=, &var]() { AccessChainMeta meta; auto &c = this->get(var.initializer); uint32_t invocation_id = 0; uint32_t member_index_id = 0; if (is_control_point) { uint32_t ids = ir.increase_bound_by(3); SPIRType uint_type; uint_type.basetype = SPIRType::UInt; uint_type.width = 32; set(ids, uint_type); set(ids + 1, builtin_to_glsl(BuiltInInvocationId, StorageClassInput), ids, true); set(ids + 2, ids, i, false); invocation_id = ids + 1; member_index_id = ids + 2; } if (is_patch) { statement("if (gl_InvocationID == 0)"); begin_scope(); } if (type_is_array && !is_control_point) { uint32_t indices[2] = { j, i }; auto chain = access_chain_internal(var.self, indices, 2, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta); statement(chain, " = ", lut_name, "[", j, "];"); } else if (is_control_point) { uint32_t indices[2] = { invocation_id, member_index_id }; auto chain = access_chain_internal(var.self, indices, 2, 0, &meta); statement(chain, " = ", lut_name, "[", builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "];"); } else { auto chain = access_chain_internal(var.self, &i, 1, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta); statement(chain, " = ", to_expression(c.subconstants[i]), ";"); } if (is_patch) end_scope(); }); } } } else if (is_control_point) { auto lut_name = join("_", var.self, "_init"); statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type), " = ", to_expression(var.initializer), ";"); entry_func.fixup_hooks_in.push_back([&, lut_name]() { statement(to_expression(var.self), "[gl_InvocationID] = ", lut_name, "[gl_InvocationID];"); }); } else if (has_decoration(var.self, DecorationBuiltIn) && BuiltIn(get_decoration(var.self, DecorationBuiltIn)) == BuiltInSampleMask) { // We cannot copy the array since gl_SampleMask is unsized in GLSL. Unroll time! <_< entry_func.fixup_hooks_in.push_back([&] { auto &c = this->get(var.initializer); uint32_t num_constants = uint32_t(c.subconstants.size()); for (uint32_t i = 0; i < num_constants; i++) { // Don't use to_expression on constant since it might be uint, just fish out the raw int. statement(to_expression(var.self), "[", i, "] = ", convert_to_string(this->get(c.subconstants[i]).scalar_i32()), ";"); } }); } else { auto lut_name = join("_", var.self, "_init"); statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type), " = ", to_expression(var.initializer), ";"); entry_func.fixup_hooks_in.push_back([&, lut_name, is_patch]() { if (is_patch) { statement("if (gl_InvocationID == 0)"); begin_scope(); } statement(to_expression(var.self), " = ", lut_name, ";"); if (is_patch) end_scope(); }); } } void CompilerGLSL::emit_subgroup_arithmetic_workaround(const std::string &func, Op op, GroupOperation group_op) { std::string result; switch (group_op) { case GroupOperationReduce: result = "reduction"; break; case GroupOperationExclusiveScan: result = "excl_scan"; break; case GroupOperationInclusiveScan: result = "incl_scan"; break; default: SPIRV_CROSS_THROW("Unsupported workaround for arithmetic group operation"); } struct TypeInfo { std::string type; std::string identity; }; std::vector type_infos; switch (op) { case OpGroupNonUniformIAdd: { type_infos.emplace_back(TypeInfo{ "uint", "0u" }); type_infos.emplace_back(TypeInfo{ "uvec2", "uvec2(0u)" }); type_infos.emplace_back(TypeInfo{ "uvec3", "uvec3(0u)" }); type_infos.emplace_back(TypeInfo{ "uvec4", "uvec4(0u)" }); type_infos.emplace_back(TypeInfo{ "int", "0" }); type_infos.emplace_back(TypeInfo{ "ivec2", "ivec2(0)" }); type_infos.emplace_back(TypeInfo{ "ivec3", "ivec3(0)" }); type_infos.emplace_back(TypeInfo{ "ivec4", "ivec4(0)" }); break; } case OpGroupNonUniformFAdd: { type_infos.emplace_back(TypeInfo{ "float", "0.0f" }); type_infos.emplace_back(TypeInfo{ "vec2", "vec2(0.0f)" }); type_infos.emplace_back(TypeInfo{ "vec3", "vec3(0.0f)" }); type_infos.emplace_back(TypeInfo{ "vec4", "vec4(0.0f)" }); // ARB_gpu_shader_fp64 is required in GL4.0 which in turn is required by NV_thread_shuffle type_infos.emplace_back(TypeInfo{ "double", "0.0LF" }); type_infos.emplace_back(TypeInfo{ "dvec2", "dvec2(0.0LF)" }); type_infos.emplace_back(TypeInfo{ "dvec3", "dvec3(0.0LF)" }); type_infos.emplace_back(TypeInfo{ "dvec4", "dvec4(0.0LF)" }); break; } case OpGroupNonUniformIMul: { type_infos.emplace_back(TypeInfo{ "uint", "1u" }); type_infos.emplace_back(TypeInfo{ "uvec2", "uvec2(1u)" }); type_infos.emplace_back(TypeInfo{ "uvec3", "uvec3(1u)" }); type_infos.emplace_back(TypeInfo{ "uvec4", "uvec4(1u)" }); type_infos.emplace_back(TypeInfo{ "int", "1" }); type_infos.emplace_back(TypeInfo{ "ivec2", "ivec2(1)" }); type_infos.emplace_back(TypeInfo{ "ivec3", "ivec3(1)" }); type_infos.emplace_back(TypeInfo{ "ivec4", "ivec4(1)" }); break; } case OpGroupNonUniformFMul: { type_infos.emplace_back(TypeInfo{ "float", "1.0f" }); type_infos.emplace_back(TypeInfo{ "vec2", "vec2(1.0f)" }); type_infos.emplace_back(TypeInfo{ "vec3", "vec3(1.0f)" }); type_infos.emplace_back(TypeInfo{ "vec4", "vec4(1.0f)" }); type_infos.emplace_back(TypeInfo{ "double", "0.0LF" }); type_infos.emplace_back(TypeInfo{ "dvec2", "dvec2(1.0LF)" }); type_infos.emplace_back(TypeInfo{ "dvec3", "dvec3(1.0LF)" }); type_infos.emplace_back(TypeInfo{ "dvec4", "dvec4(1.0LF)" }); break; } default: SPIRV_CROSS_THROW("Unsupported workaround for arithmetic group operation"); } const bool op_is_addition = op == OpGroupNonUniformIAdd || op == OpGroupNonUniformFAdd; const bool op_is_multiplication = op == OpGroupNonUniformIMul || op == OpGroupNonUniformFMul; std::string op_symbol; if (op_is_addition) { op_symbol = "+="; } else if (op_is_multiplication) { op_symbol = "*="; } for (const TypeInfo &t : type_infos) { statement(t.type, " ", func, "(", t.type, " v)"); begin_scope(); statement(t.type, " ", result, " = ", t.identity, ";"); statement("uvec4 active_threads = subgroupBallot(true);"); statement("if (subgroupBallotBitCount(active_threads) == gl_SubgroupSize)"); begin_scope(); statement("uint total = gl_SubgroupSize / 2u;"); statement(result, " = v;"); statement("for (uint i = 1u; i <= total; i <<= 1u)"); begin_scope(); statement("bool valid;"); if (group_op == GroupOperationReduce) { statement(t.type, " s = shuffleXorNV(", result, ", i, gl_SubgroupSize, valid);"); } else if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan) { statement(t.type, " s = shuffleUpNV(", result, ", i, gl_SubgroupSize, valid);"); } if (op_is_addition || op_is_multiplication) { statement(result, " ", op_symbol, " valid ? s : ", t.identity, ";"); } end_scope(); if (group_op == GroupOperationExclusiveScan) { statement(result, " = shuffleUpNV(", result, ", 1u, gl_SubgroupSize);"); statement("if (subgroupElect())"); begin_scope(); statement(result, " = ", t.identity, ";"); end_scope(); } end_scope(); statement("else"); begin_scope(); if (group_op == GroupOperationExclusiveScan) { statement("uint total = subgroupBallotBitCount(gl_SubgroupLtMask);"); } else if (group_op == GroupOperationInclusiveScan) { statement("uint total = subgroupBallotBitCount(gl_SubgroupLeMask);"); } statement("for (uint i = 0u; i < gl_SubgroupSize; ++i)"); begin_scope(); statement("bool valid = subgroupBallotBitExtract(active_threads, i);"); statement(t.type, " s = shuffleNV(v, i, gl_SubgroupSize);"); if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan) { statement("valid = valid && (i < total);"); } if (op_is_addition || op_is_multiplication) { statement(result, " ", op_symbol, " valid ? s : ", t.identity, ";"); } end_scope(); end_scope(); statement("return ", result, ";"); end_scope(); } } void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model) { static const char *workaround_types[] = { "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4", "float", "vec2", "vec3", "vec4", "double", "dvec2", "dvec3", "dvec4" }; if (!options.vulkan_semantics) { using Supp = ShaderSubgroupSupportHelper; auto result = shader_subgroup_supporter.resolve(); if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMask)) { auto exts = Supp::get_candidates_for_feature(Supp::SubgroupMask, result); for (auto &e : exts) { const char *name = Supp::get_extension_name(e); statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); switch (e) { case Supp::NV_shader_thread_group: statement("#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)"); statement("#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)"); statement("#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)"); statement("#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)"); statement("#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)"); break; case Supp::ARB_shader_ballot: statement("#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)"); statement("#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)"); statement("#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)"); statement("#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)"); statement("#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)"); break; default: break; } } statement("#endif"); statement(""); } if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupSize)) { auto exts = Supp::get_candidates_for_feature(Supp::SubgroupSize, result); for (auto &e : exts) { const char *name = Supp::get_extension_name(e); statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); switch (e) { case Supp::NV_shader_thread_group: statement("#define gl_SubgroupSize gl_WarpSizeNV"); break; case Supp::ARB_shader_ballot: statement("#define gl_SubgroupSize gl_SubGroupSizeARB"); break; case Supp::AMD_gcn_shader: statement("#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)"); break; default: break; } } statement("#endif"); statement(""); } if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInvocationID)) { auto exts = Supp::get_candidates_for_feature(Supp::SubgroupInvocationID, result); for (auto &e : exts) { const char *name = Supp::get_extension_name(e); statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); switch (e) { case Supp::NV_shader_thread_group: statement("#define gl_SubgroupInvocationID gl_ThreadInWarpNV"); break; case Supp::ARB_shader_ballot: statement("#define gl_SubgroupInvocationID gl_SubGroupInvocationARB"); break; default: break; } } statement("#endif"); statement(""); } if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupID)) { auto exts = Supp::get_candidates_for_feature(Supp::SubgroupID, result); for (auto &e : exts) { const char *name = Supp::get_extension_name(e); statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); switch (e) { case Supp::NV_shader_thread_group: statement("#define gl_SubgroupID gl_WarpIDNV"); break; default: break; } } statement("#endif"); statement(""); } if (shader_subgroup_supporter.is_feature_requested(Supp::NumSubgroups)) { auto exts = Supp::get_candidates_for_feature(Supp::NumSubgroups, result); for (auto &e : exts) { const char *name = Supp::get_extension_name(e); statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); switch (e) { case Supp::NV_shader_thread_group: statement("#define gl_NumSubgroups gl_WarpsPerSMNV"); break; default: break; } } statement("#endif"); statement(""); } if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBroadcast_First)) { auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBroadcast_First, result); for (auto &e : exts) { const char *name = Supp::get_extension_name(e); statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); switch (e) { case Supp::NV_shader_thread_shuffle: for (const char *t : workaround_types) { statement(t, " subgroupBroadcastFirst(", t, " value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }"); } for (const char *t : workaround_types) { statement(t, " subgroupBroadcast(", t, " value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }"); } break; case Supp::ARB_shader_ballot: for (const char *t : workaround_types) { statement(t, " subgroupBroadcastFirst(", t, " value) { return readFirstInvocationARB(value); }"); } for (const char *t : workaround_types) { statement(t, " subgroupBroadcast(", t, " value, uint id) { return readInvocationARB(value, id); }"); } break; default: break; } } statement("#endif"); statement(""); } if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotFindLSB_MSB)) { auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallotFindLSB_MSB, result); for (auto &e : exts) { const char *name = Supp::get_extension_name(e); statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); switch (e) { case Supp::NV_shader_thread_group: statement("uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }"); statement("uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }"); break; default: break; } } statement("#else"); statement("uint subgroupBallotFindLSB(uvec4 value)"); begin_scope(); statement("int firstLive = findLSB(value.x);"); statement("return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));"); end_scope(); statement("uint subgroupBallotFindMSB(uvec4 value)"); begin_scope(); statement("int firstLive = findMSB(value.y);"); statement("return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));"); end_scope(); statement("#endif"); statement(""); } if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAll_Any_AllEqualBool)) { auto exts = Supp::get_candidates_for_feature(Supp::SubgroupAll_Any_AllEqualBool, result); for (auto &e : exts) { const char *name = Supp::get_extension_name(e); statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); switch (e) { case Supp::NV_gpu_shader_5: statement("bool subgroupAll(bool value) { return allThreadsNV(value); }"); statement("bool subgroupAny(bool value) { return anyThreadNV(value); }"); statement("bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }"); break; case Supp::ARB_shader_group_vote: statement("bool subgroupAll(bool v) { return allInvocationsARB(v); }"); statement("bool subgroupAny(bool v) { return anyInvocationARB(v); }"); statement("bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }"); break; case Supp::AMD_gcn_shader: statement("bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }"); statement("bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }"); statement("bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || " "b == ballotAMD(true); }"); break; default: break; } } statement("#endif"); statement(""); } if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAllEqualT)) { statement("#ifndef GL_KHR_shader_subgroup_vote"); statement( "#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return " "subgroupAllEqual(subgroupBroadcastFirst(value) == value); }"); for (const char *t : workaround_types) statement("_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(", t, ")"); statement("#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND"); statement("#endif"); statement(""); } if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallot)) { auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallot, result); for (auto &e : exts) { const char *name = Supp::get_extension_name(e); statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); switch (e) { case Supp::NV_shader_thread_group: statement("uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }"); break; case Supp::ARB_shader_ballot: statement("uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }"); break; default: break; } } statement("#endif"); statement(""); } if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupElect)) { statement("#ifndef GL_KHR_shader_subgroup_basic"); statement("bool subgroupElect()"); begin_scope(); statement("uvec4 activeMask = subgroupBallot(true);"); statement("uint firstLive = subgroupBallotFindLSB(activeMask);"); statement("return gl_SubgroupInvocationID == firstLive;"); end_scope(); statement("#endif"); statement(""); } if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBarrier)) { // Extensions we're using in place of GL_KHR_shader_subgroup_basic state // that subgroup execute in lockstep so this barrier is implicit. // However the GL 4.6 spec also states that `barrier` implies a shared memory barrier, // and a specific test of optimizing scans by leveraging lock-step invocation execution, // has shown that a `memoryBarrierShared` is needed in place of a `subgroupBarrier`. // https://github.com/buildaworldnet/IrrlichtBAW/commit/d8536857991b89a30a6b65d29441e51b64c2c7ad#diff-9f898d27be1ea6fc79b03d9b361e299334c1a347b6e4dc344ee66110c6aa596aR19 statement("#ifndef GL_KHR_shader_subgroup_basic"); statement("void subgroupBarrier() { memoryBarrierShared(); }"); statement("#endif"); statement(""); } if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMemBarrier)) { if (model == spv::ExecutionModelGLCompute) { statement("#ifndef GL_KHR_shader_subgroup_basic"); statement("void subgroupMemoryBarrier() { groupMemoryBarrier(); }"); statement("void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }"); statement("void subgroupMemoryBarrierShared() { memoryBarrierShared(); }"); statement("void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }"); statement("#endif"); } else { statement("#ifndef GL_KHR_shader_subgroup_basic"); statement("void subgroupMemoryBarrier() { memoryBarrier(); }"); statement("void subgroupMemoryBarrierBuffer() { memoryBarrierBuffer(); }"); statement("void subgroupMemoryBarrierImage() { memoryBarrierImage(); }"); statement("#endif"); } statement(""); } if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout)) { statement("#ifndef GL_KHR_shader_subgroup_ballot"); statement("bool subgroupInverseBallot(uvec4 value)"); begin_scope(); statement("return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));"); end_scope(); statement("uint subgroupBallotInclusiveBitCount(uvec4 value)"); begin_scope(); statement("uvec2 v = value.xy & gl_SubgroupLeMask.xy;"); statement("ivec2 c = bitCount(v);"); statement_no_indent("#ifdef GL_NV_shader_thread_group"); statement("return uint(c.x);"); statement_no_indent("#else"); statement("return uint(c.x + c.y);"); statement_no_indent("#endif"); end_scope(); statement("uint subgroupBallotExclusiveBitCount(uvec4 value)"); begin_scope(); statement("uvec2 v = value.xy & gl_SubgroupLtMask.xy;"); statement("ivec2 c = bitCount(v);"); statement_no_indent("#ifdef GL_NV_shader_thread_group"); statement("return uint(c.x);"); statement_no_indent("#else"); statement("return uint(c.x + c.y);"); statement_no_indent("#endif"); end_scope(); statement("#endif"); statement(""); } if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitCount)) { statement("#ifndef GL_KHR_shader_subgroup_ballot"); statement("uint subgroupBallotBitCount(uvec4 value)"); begin_scope(); statement("ivec2 c = bitCount(value.xy);"); statement_no_indent("#ifdef GL_NV_shader_thread_group"); statement("return uint(c.x);"); statement_no_indent("#else"); statement("return uint(c.x + c.y);"); statement_no_indent("#endif"); end_scope(); statement("#endif"); statement(""); } if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitExtract)) { statement("#ifndef GL_KHR_shader_subgroup_ballot"); statement("bool subgroupBallotBitExtract(uvec4 value, uint index)"); begin_scope(); statement_no_indent("#ifdef GL_NV_shader_thread_group"); statement("uint shifted = value.x >> index;"); statement_no_indent("#else"); statement("uint shifted = value[index >> 5u] >> (index & 0x1fu);"); statement_no_indent("#endif"); statement("return (shifted & 1u) != 0u;"); end_scope(); statement("#endif"); statement(""); } auto arithmetic_feature_helper = [&](Supp::Feature feat, std::string func_name, spv::Op op, spv::GroupOperation group_op) { if (shader_subgroup_supporter.is_feature_requested(feat)) { auto exts = Supp::get_candidates_for_feature(feat, result); for (auto &e : exts) { const char *name = Supp::get_extension_name(e); statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); switch (e) { case Supp::NV_shader_thread_shuffle: emit_subgroup_arithmetic_workaround(func_name, op, group_op); break; default: break; } } statement("#endif"); statement(""); } }; arithmetic_feature_helper(Supp::SubgroupArithmeticIAddReduce, "subgroupAdd", OpGroupNonUniformIAdd, GroupOperationReduce); arithmetic_feature_helper(Supp::SubgroupArithmeticIAddExclusiveScan, "subgroupExclusiveAdd", OpGroupNonUniformIAdd, GroupOperationExclusiveScan); arithmetic_feature_helper(Supp::SubgroupArithmeticIAddInclusiveScan, "subgroupInclusiveAdd", OpGroupNonUniformIAdd, GroupOperationInclusiveScan); arithmetic_feature_helper(Supp::SubgroupArithmeticFAddReduce, "subgroupAdd", OpGroupNonUniformFAdd, GroupOperationReduce); arithmetic_feature_helper(Supp::SubgroupArithmeticFAddExclusiveScan, "subgroupExclusiveAdd", OpGroupNonUniformFAdd, GroupOperationExclusiveScan); arithmetic_feature_helper(Supp::SubgroupArithmeticFAddInclusiveScan, "subgroupInclusiveAdd", OpGroupNonUniformFAdd, GroupOperationInclusiveScan); arithmetic_feature_helper(Supp::SubgroupArithmeticIMulReduce, "subgroupMul", OpGroupNonUniformIMul, GroupOperationReduce); arithmetic_feature_helper(Supp::SubgroupArithmeticIMulExclusiveScan, "subgroupExclusiveMul", OpGroupNonUniformIMul, GroupOperationExclusiveScan); arithmetic_feature_helper(Supp::SubgroupArithmeticIMulInclusiveScan, "subgroupInclusiveMul", OpGroupNonUniformIMul, GroupOperationInclusiveScan); arithmetic_feature_helper(Supp::SubgroupArithmeticFMulReduce, "subgroupMul", OpGroupNonUniformFMul, GroupOperationReduce); arithmetic_feature_helper(Supp::SubgroupArithmeticFMulExclusiveScan, "subgroupExclusiveMul", OpGroupNonUniformFMul, GroupOperationExclusiveScan); arithmetic_feature_helper(Supp::SubgroupArithmeticFMulInclusiveScan, "subgroupInclusiveMul", OpGroupNonUniformFMul, GroupOperationInclusiveScan); } if (!workaround_ubo_load_overload_types.empty()) { for (auto &type_id : workaround_ubo_load_overload_types) { auto &type = get(type_id); if (options.es && is_matrix(type)) { // Need both variants. // GLSL cannot overload on precision, so need to dispatch appropriately. statement("highp ", type_to_glsl(type), " spvWorkaroundRowMajor(highp ", type_to_glsl(type), " wrap) { return wrap; }"); statement("mediump ", type_to_glsl(type), " spvWorkaroundRowMajorMP(mediump ", type_to_glsl(type), " wrap) { return wrap; }"); } else { statement(type_to_glsl(type), " spvWorkaroundRowMajor(", type_to_glsl(type), " wrap) { return wrap; }"); } } statement(""); } } void CompilerGLSL::emit_polyfills(uint32_t polyfills, bool relaxed) { const char *qual = ""; const char *suffix = (options.es && relaxed) ? "MP" : ""; if (options.es) qual = relaxed ? "mediump " : "highp "; if (polyfills & PolyfillTranspose2x2) { statement(qual, "mat2 spvTranspose", suffix, "(", qual, "mat2 m)"); begin_scope(); statement("return mat2(m[0][0], m[1][0], m[0][1], m[1][1]);"); end_scope(); statement(""); } if (polyfills & PolyfillTranspose3x3) { statement(qual, "mat3 spvTranspose", suffix, "(", qual, "mat3 m)"); begin_scope(); statement("return mat3(m[0][0], m[1][0], m[2][0], m[0][1], m[1][1], m[2][1], m[0][2], m[1][2], m[2][2]);"); end_scope(); statement(""); } if (polyfills & PolyfillTranspose4x4) { statement(qual, "mat4 spvTranspose", suffix, "(", qual, "mat4 m)"); begin_scope(); statement("return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], " "m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);"); end_scope(); statement(""); } if (polyfills & PolyfillDeterminant2x2) { statement(qual, "float spvDeterminant", suffix, "(", qual, "mat2 m)"); begin_scope(); statement("return m[0][0] * m[1][1] - m[0][1] * m[1][0];"); end_scope(); statement(""); } if (polyfills & PolyfillDeterminant3x3) { statement(qual, "float spvDeterminant", suffix, "(", qual, "mat3 m)"); begin_scope(); statement("return dot(m[0], vec3(m[1][1] * m[2][2] - m[1][2] * m[2][1], " "m[1][2] * m[2][0] - m[1][0] * m[2][2], " "m[1][0] * m[2][1] - m[1][1] * m[2][0]));"); end_scope(); statement(""); } if (polyfills & PolyfillDeterminant4x4) { statement(qual, "float spvDeterminant", suffix, "(", qual, "mat4 m)"); begin_scope(); statement("return dot(m[0], vec4(" "m[2][1] * m[3][2] * m[1][3] - m[3][1] * m[2][2] * m[1][3] + m[3][1] * m[1][2] * m[2][3] - m[1][1] * m[3][2] * m[2][3] - m[2][1] * m[1][2] * m[3][3] + m[1][1] * m[2][2] * m[3][3], " "m[3][0] * m[2][2] * m[1][3] - m[2][0] * m[3][2] * m[1][3] - m[3][0] * m[1][2] * m[2][3] + m[1][0] * m[3][2] * m[2][3] + m[2][0] * m[1][2] * m[3][3] - m[1][0] * m[2][2] * m[3][3], " "m[2][0] * m[3][1] * m[1][3] - m[3][0] * m[2][1] * m[1][3] + m[3][0] * m[1][1] * m[2][3] - m[1][0] * m[3][1] * m[2][3] - m[2][0] * m[1][1] * m[3][3] + m[1][0] * m[2][1] * m[3][3], " "m[3][0] * m[2][1] * m[1][2] - m[2][0] * m[3][1] * m[1][2] - m[3][0] * m[1][1] * m[2][2] + m[1][0] * m[3][1] * m[2][2] + m[2][0] * m[1][1] * m[3][2] - m[1][0] * m[2][1] * m[3][2]));"); end_scope(); statement(""); } if (polyfills & PolyfillMatrixInverse2x2) { statement(qual, "mat2 spvInverse", suffix, "(", qual, "mat2 m)"); begin_scope(); statement("return mat2(m[1][1], -m[0][1], -m[1][0], m[0][0]) " "* (1.0 / (m[0][0] * m[1][1] - m[1][0] * m[0][1]));"); end_scope(); statement(""); } if (polyfills & PolyfillMatrixInverse3x3) { statement(qual, "mat3 spvInverse", suffix, "(", qual, "mat3 m)"); begin_scope(); statement(qual, "vec3 t = vec3(m[1][1] * m[2][2] - m[1][2] * m[2][1], m[1][2] * m[2][0] - m[1][0] * m[2][2], m[1][0] * m[2][1] - m[1][1] * m[2][0]);"); statement("return mat3(t[0], " "m[0][2] * m[2][1] - m[0][1] * m[2][2], " "m[0][1] * m[1][2] - m[0][2] * m[1][1], " "t[1], " "m[0][0] * m[2][2] - m[0][2] * m[2][0], " "m[0][2] * m[1][0] - m[0][0] * m[1][2], " "t[2], " "m[0][1] * m[2][0] - m[0][0] * m[2][1], " "m[0][0] * m[1][1] - m[0][1] * m[1][0]) " "* (1.0 / dot(m[0], t));"); end_scope(); statement(""); } if (polyfills & PolyfillMatrixInverse4x4) { statement(qual, "mat4 spvInverse", suffix, "(", qual, "mat4 m)"); begin_scope(); statement(qual, "vec4 t = vec4(" "m[2][1] * m[3][2] * m[1][3] - m[3][1] * m[2][2] * m[1][3] + m[3][1] * m[1][2] * m[2][3] - m[1][1] * m[3][2] * m[2][3] - m[2][1] * m[1][2] * m[3][3] + m[1][1] * m[2][2] * m[3][3], " "m[3][0] * m[2][2] * m[1][3] - m[2][0] * m[3][2] * m[1][3] - m[3][0] * m[1][2] * m[2][3] + m[1][0] * m[3][2] * m[2][3] + m[2][0] * m[1][2] * m[3][3] - m[1][0] * m[2][2] * m[3][3], " "m[2][0] * m[3][1] * m[1][3] - m[3][0] * m[2][1] * m[1][3] + m[3][0] * m[1][1] * m[2][3] - m[1][0] * m[3][1] * m[2][3] - m[2][0] * m[1][1] * m[3][3] + m[1][0] * m[2][1] * m[3][3], " "m[3][0] * m[2][1] * m[1][2] - m[2][0] * m[3][1] * m[1][2] - m[3][0] * m[1][1] * m[2][2] + m[1][0] * m[3][1] * m[2][2] + m[2][0] * m[1][1] * m[3][2] - m[1][0] * m[2][1] * m[3][2]);"); statement("return mat4(" "t[0], " "m[3][1] * m[2][2] * m[0][3] - m[2][1] * m[3][2] * m[0][3] - m[3][1] * m[0][2] * m[2][3] + m[0][1] * m[3][2] * m[2][3] + m[2][1] * m[0][2] * m[3][3] - m[0][1] * m[2][2] * m[3][3], " "m[1][1] * m[3][2] * m[0][3] - m[3][1] * m[1][2] * m[0][3] + m[3][1] * m[0][2] * m[1][3] - m[0][1] * m[3][2] * m[1][3] - m[1][1] * m[0][2] * m[3][3] + m[0][1] * m[1][2] * m[3][3], " "m[2][1] * m[1][2] * m[0][3] - m[1][1] * m[2][2] * m[0][3] - m[2][1] * m[0][2] * m[1][3] + m[0][1] * m[2][2] * m[1][3] + m[1][1] * m[0][2] * m[2][3] - m[0][1] * m[1][2] * m[2][3], " "t[1], " "m[2][0] * m[3][2] * m[0][3] - m[3][0] * m[2][2] * m[0][3] + m[3][0] * m[0][2] * m[2][3] - m[0][0] * m[3][2] * m[2][3] - m[2][0] * m[0][2] * m[3][3] + m[0][0] * m[2][2] * m[3][3], " "m[3][0] * m[1][2] * m[0][3] - m[1][0] * m[3][2] * m[0][3] - m[3][0] * m[0][2] * m[1][3] + m[0][0] * m[3][2] * m[1][3] + m[1][0] * m[0][2] * m[3][3] - m[0][0] * m[1][2] * m[3][3], " "m[1][0] * m[2][2] * m[0][3] - m[2][0] * m[1][2] * m[0][3] + m[2][0] * m[0][2] * m[1][3] - m[0][0] * m[2][2] * m[1][3] - m[1][0] * m[0][2] * m[2][3] + m[0][0] * m[1][2] * m[2][3], " "t[2], " "m[3][0] * m[2][1] * m[0][3] - m[2][0] * m[3][1] * m[0][3] - m[3][0] * m[0][1] * m[2][3] + m[0][0] * m[3][1] * m[2][3] + m[2][0] * m[0][1] * m[3][3] - m[0][0] * m[2][1] * m[3][3], " "m[1][0] * m[3][1] * m[0][3] - m[3][0] * m[1][1] * m[0][3] + m[3][0] * m[0][1] * m[1][3] - m[0][0] * m[3][1] * m[1][3] - m[1][0] * m[0][1] * m[3][3] + m[0][0] * m[1][1] * m[3][3], " "m[2][0] * m[1][1] * m[0][3] - m[1][0] * m[2][1] * m[0][3] - m[2][0] * m[0][1] * m[1][3] + m[0][0] * m[2][1] * m[1][3] + m[1][0] * m[0][1] * m[2][3] - m[0][0] * m[1][1] * m[2][3], " "t[3], " "m[2][0] * m[3][1] * m[0][2] - m[3][0] * m[2][1] * m[0][2] + m[3][0] * m[0][1] * m[2][2] - m[0][0] * m[3][1] * m[2][2] - m[2][0] * m[0][1] * m[3][2] + m[0][0] * m[2][1] * m[3][2], " "m[3][0] * m[1][1] * m[0][2] - m[1][0] * m[3][1] * m[0][2] - m[3][0] * m[0][1] * m[1][2] + m[0][0] * m[3][1] * m[1][2] + m[1][0] * m[0][1] * m[3][2] - m[0][0] * m[1][1] * m[3][2], " "m[1][0] * m[2][1] * m[0][2] - m[2][0] * m[1][1] * m[0][2] + m[2][0] * m[0][1] * m[1][2] - m[0][0] * m[2][1] * m[1][2] - m[1][0] * m[0][1] * m[2][2] + m[0][0] * m[1][1] * m[2][2]) " "* (1.0 / dot(m[0], t));"); end_scope(); statement(""); } } // Returns a string representation of the ID, usable as a function arg. // Default is to simply return the expression representation fo the arg ID. // Subclasses may override to modify the return value. string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id) { // Make sure that we use the name of the original variable, and not the parameter alias. uint32_t name_id = id; auto *var = maybe_get(id); if (var && var->basevariable) name_id = var->basevariable; return to_expression(name_id); } void CompilerGLSL::force_temporary_and_recompile(uint32_t id) { auto res = forced_temporaries.insert(id); // Forcing new temporaries guarantees forward progress. if (res.second) force_recompile_guarantee_forward_progress(); else force_recompile(); } uint32_t CompilerGLSL::consume_temporary_in_precision_context(uint32_t type_id, uint32_t id, Options::Precision precision) { // Constants do not have innate precision. auto handle_type = ir.ids[id].get_type(); if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef) return id; // Ignore anything that isn't 32-bit values. auto &type = get(type_id); if (type.pointer) return id; if (type.basetype != SPIRType::Float && type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int) return id; if (precision == Options::DontCare) { // If precision is consumed as don't care (operations only consisting of constants), // we need to bind the expression to a temporary, // otherwise we have no way of controlling the precision later. auto itr = forced_temporaries.insert(id); if (itr.second) force_recompile_guarantee_forward_progress(); return id; } auto current_precision = has_decoration(id, DecorationRelaxedPrecision) ? Options::Mediump : Options::Highp; if (current_precision == precision) return id; auto itr = temporary_to_mirror_precision_alias.find(id); if (itr == temporary_to_mirror_precision_alias.end()) { uint32_t alias_id = ir.increase_bound_by(1); auto &m = ir.meta[alias_id]; if (auto *input_m = ir.find_meta(id)) m = *input_m; const char *prefix; if (precision == Options::Mediump) { set_decoration(alias_id, DecorationRelaxedPrecision); prefix = "mp_copy_"; } else { unset_decoration(alias_id, DecorationRelaxedPrecision); prefix = "hp_copy_"; } auto alias_name = join(prefix, to_name(id)); ParsedIR::sanitize_underscores(alias_name); set_name(alias_id, alias_name); emit_op(type_id, alias_id, to_expression(id), true); temporary_to_mirror_precision_alias[id] = alias_id; forced_temporaries.insert(id); forced_temporaries.insert(alias_id); force_recompile_guarantee_forward_progress(); id = alias_id; } else { id = itr->second; } return id; } void CompilerGLSL::handle_invalid_expression(uint32_t id) { // We tried to read an invalidated expression. // This means we need another pass at compilation, but next time, // force temporary variables so that they cannot be invalidated. force_temporary_and_recompile(id); // If the invalid expression happened as a result of a CompositeInsert // overwrite, we must block this from happening next iteration. if (composite_insert_overwritten.count(id)) block_composite_insert_overwrite.insert(id); } // Converts the format of the current expression from packed to unpacked, // by wrapping the expression in a constructor of the appropriate type. // GLSL does not support packed formats, so simply return the expression. // Subclasses that do will override. string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool) { return expr_str; } // Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all. void CompilerGLSL::strip_enclosed_expression(string &expr) { if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')') return; // Have to make sure that our first and last parens actually enclose everything inside it. uint32_t paren_count = 0; for (auto &c : expr) { if (c == '(') paren_count++; else if (c == ')') { paren_count--; // If we hit 0 and this is not the final char, our first and final parens actually don't // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d). if (paren_count == 0 && &c != &expr.back()) return; } } expr.erase(expr.size() - 1, 1); expr.erase(begin(expr)); } bool CompilerGLSL::needs_enclose_expression(const std::string &expr) { bool need_parens = false; // If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back // unary expressions. if (!expr.empty()) { auto c = expr.front(); if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*') need_parens = true; } if (!need_parens) { uint32_t paren_count = 0; for (auto c : expr) { if (c == '(' || c == '[') paren_count++; else if (c == ')' || c == ']') { assert(paren_count); paren_count--; } else if (c == ' ' && paren_count == 0) { need_parens = true; break; } } assert(paren_count == 0); } return need_parens; } string CompilerGLSL::enclose_expression(const string &expr) { // If this expression contains any spaces which are not enclosed by parentheses, // we need to enclose it so we can treat the whole string as an expression. // This happens when two expressions have been part of a binary op earlier. if (needs_enclose_expression(expr)) return join('(', expr, ')'); else return expr; } string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr) { // If this expression starts with an address-of operator ('&'), then // just return the part after the operator. // TODO: Strip parens if unnecessary? if (expr.front() == '&') return expr.substr(1); else if (backend.native_pointers) return join('*', expr); else if (expr_type.storage == StorageClassPhysicalStorageBufferEXT && expr_type.basetype != SPIRType::Struct && expr_type.pointer_depth == 1) { return join(enclose_expression(expr), ".value"); } else return expr; } string CompilerGLSL::address_of_expression(const std::string &expr) { if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')') { // If we have an expression which looks like (*foo), taking the address of it is the same as stripping // the first two and last characters. We might have to enclose the expression. // This doesn't work for cases like (*foo + 10), // but this is an r-value expression which we cannot take the address of anyways. return enclose_expression(expr.substr(2, expr.size() - 3)); } else if (expr.front() == '*') { // If this expression starts with a dereference operator ('*'), then // just return the part after the operator. return expr.substr(1); } else return join('&', enclose_expression(expr)); } // Just like to_expression except that we enclose the expression inside parentheses if needed. string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read) { return enclose_expression(to_expression(id, register_expression_read)); } // Used explicitly when we want to read a row-major expression, but without any transpose shenanigans. // need_transpose must be forced to false. string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id) { return unpack_expression_type(to_expression(id), expression_type(id), get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID), has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), true); } string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read) { // If we need to transpose, it will also take care of unpacking rules. auto *e = maybe_get(id); bool need_transpose = e && e->need_transpose; bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID); bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); if (!need_transpose && (is_remapped || is_packed)) { return unpack_expression_type(to_expression(id, register_expression_read), get_pointee_type(expression_type_id(id)), get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID), has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false); } else return to_expression(id, register_expression_read); } string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read) { return enclose_expression(to_unpacked_expression(id, register_expression_read)); } string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read) { auto &type = expression_type(id); if (type.pointer && should_dereference(id)) return dereference_expression(type, to_enclosed_expression(id, register_expression_read)); else return to_expression(id, register_expression_read); } string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read) { auto &type = expression_type(id); if (type.pointer && expression_is_lvalue(id) && !should_dereference(id)) return address_of_expression(to_enclosed_expression(id, register_expression_read)); else return to_unpacked_expression(id, register_expression_read); } string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read) { auto &type = expression_type(id); if (type.pointer && expression_is_lvalue(id) && !should_dereference(id)) return address_of_expression(to_enclosed_expression(id, register_expression_read)); else return to_enclosed_unpacked_expression(id, register_expression_read); } string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index) { auto expr = to_enclosed_expression(id); if (has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked)) return join(expr, "[", index, "]"); else return join(expr, ".", index_to_swizzle(index)); } string CompilerGLSL::to_extract_constant_composite_expression(uint32_t result_type, const SPIRConstant &c, const uint32_t *chain, uint32_t length) { // It is kinda silly if application actually enter this path since they know the constant up front. // It is useful here to extract the plain constant directly. SPIRConstant tmp; tmp.constant_type = result_type; auto &composite_type = get(c.constant_type); assert(composite_type.basetype != SPIRType::Struct && composite_type.array.empty()); assert(!c.specialization); if (is_matrix(composite_type)) { if (length == 2) { tmp.m.c[0].vecsize = 1; tmp.m.columns = 1; tmp.m.c[0].r[0] = c.m.c[chain[0]].r[chain[1]]; } else { assert(length == 1); tmp.m.c[0].vecsize = composite_type.vecsize; tmp.m.columns = 1; tmp.m.c[0] = c.m.c[chain[0]]; } } else { assert(length == 1); tmp.m.c[0].vecsize = 1; tmp.m.columns = 1; tmp.m.c[0].r[0] = c.m.c[0].r[chain[0]]; } return constant_expression(tmp); } string CompilerGLSL::to_rerolled_array_expression(const SPIRType &parent_type, const string &base_expr, const SPIRType &type) { bool remapped_boolean = parent_type.basetype == SPIRType::Struct && type.basetype == SPIRType::Boolean && backend.boolean_in_struct_remapped_type != SPIRType::Boolean; SPIRType tmp_type; if (remapped_boolean) { tmp_type = get(type.parent_type); tmp_type.basetype = backend.boolean_in_struct_remapped_type; } else if (type.basetype == SPIRType::Boolean && backend.boolean_in_struct_remapped_type != SPIRType::Boolean) { // It's possible that we have an r-value expression that was OpLoaded from a struct. // We have to reroll this and explicitly cast the input to bool, because the r-value is short. tmp_type = get(type.parent_type); remapped_boolean = true; } uint32_t size = to_array_size_literal(type); auto &parent = get(type.parent_type); string expr = "{ "; for (uint32_t i = 0; i < size; i++) { auto subexpr = join(base_expr, "[", convert_to_string(i), "]"); if (!type_is_top_level_array(parent)) { if (remapped_boolean) subexpr = join(type_to_glsl(tmp_type), "(", subexpr, ")"); expr += subexpr; } else expr += to_rerolled_array_expression(parent_type, subexpr, parent); if (i + 1 < size) expr += ", "; } expr += " }"; return expr; } string CompilerGLSL::to_composite_constructor_expression(const SPIRType &parent_type, uint32_t id, bool block_like_type) { auto &type = expression_type(id); bool reroll_array = false; bool remapped_boolean = parent_type.basetype == SPIRType::Struct && type.basetype == SPIRType::Boolean && backend.boolean_in_struct_remapped_type != SPIRType::Boolean; if (type_is_top_level_array(type)) { reroll_array = !backend.array_is_value_type || (block_like_type && !backend.array_is_value_type_in_buffer_blocks); if (remapped_boolean) { // Forced to reroll if we have to change bool[] to short[]. reroll_array = true; } } if (reroll_array) { // For this case, we need to "re-roll" an array initializer from a temporary. // We cannot simply pass the array directly, since it decays to a pointer and it cannot // participate in a struct initializer. E.g. // float arr[2] = { 1.0, 2.0 }; // Foo foo = { arr }; must be transformed to // Foo foo = { { arr[0], arr[1] } }; // The array sizes cannot be deduced from specialization constants since we cannot use any loops. // We're only triggering one read of the array expression, but this is fine since arrays have to be declared // as temporaries anyways. return to_rerolled_array_expression(parent_type, to_enclosed_expression(id), type); } else { auto expr = to_unpacked_expression(id); if (remapped_boolean) { auto tmp_type = type; tmp_type.basetype = backend.boolean_in_struct_remapped_type; expr = join(type_to_glsl(tmp_type), "(", expr, ")"); } return expr; } } string CompilerGLSL::to_non_uniform_aware_expression(uint32_t id) { string expr = to_expression(id); if (has_decoration(id, DecorationNonUniform)) convert_non_uniform_expression(expr, id); return expr; } string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read) { auto itr = invalid_expressions.find(id); if (itr != end(invalid_expressions)) handle_invalid_expression(id); if (ir.ids[id].get_type() == TypeExpression) { // We might have a more complex chain of dependencies. // A possible scenario is that we // // %1 = OpLoad // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1. // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that. // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions. // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before. // // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store, // and see that we should not forward reads of the original variable. auto &expr = get(id); for (uint32_t dep : expr.expression_dependencies) if (invalid_expressions.find(dep) != end(invalid_expressions)) handle_invalid_expression(dep); } if (register_expression_read) track_expression_read(id); switch (ir.ids[id].get_type()) { case TypeExpression: { auto &e = get(id); if (e.base_expression) return to_enclosed_expression(e.base_expression) + e.expression; else if (e.need_transpose) { // This should not be reached for access chains, since we always deal explicitly with transpose state // when consuming an access chain expression. uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID); bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); bool relaxed = has_decoration(id, DecorationRelaxedPrecision); return convert_row_major_matrix(e.expression, get(e.expression_type), physical_type_id, is_packed, relaxed); } else if (flattened_structs.count(id)) { return load_flattened_struct(e.expression, get(e.expression_type)); } else { if (is_forcing_recompilation()) { // During first compilation phase, certain expression patterns can trigger exponential growth of memory. // Avoid this by returning dummy expressions during this phase. // Do not use empty expressions here, because those are sentinels for other cases. return "_"; } else return e.expression; } } case TypeConstant: { auto &c = get(id); auto &type = get(c.constant_type); // WorkGroupSize may be a constant. if (has_decoration(c.self, DecorationBuiltIn)) return builtin_to_glsl(BuiltIn(get_decoration(c.self, DecorationBuiltIn)), StorageClassGeneric); else if (c.specialization) { if (backend.workgroup_size_is_hidden) { int wg_index = get_constant_mapping_to_workgroup_component(c); if (wg_index >= 0) { auto wg_size = join(builtin_to_glsl(BuiltInWorkgroupSize, StorageClassInput), vector_swizzle(1, wg_index)); if (type.basetype != SPIRType::UInt) wg_size = bitcast_expression(type, SPIRType::UInt, wg_size); return wg_size; } } if (expression_is_forwarded(id)) return constant_expression(c); return to_name(id); } else if (c.is_used_as_lut) return to_name(id); else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline) return to_name(id); else if (!type.array.empty() && !backend.can_declare_arrays_inline) return to_name(id); else return constant_expression(c); } case TypeConstantOp: return to_name(id); case TypeVariable: { auto &var = get(id); // If we try to use a loop variable before the loop header, we have to redirect it to the static expression, // the variable has not been declared yet. if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable)) { // We might try to load from a loop variable before it has been initialized. // Prefer static expression and fallback to initializer. if (var.static_expression) return to_expression(var.static_expression); else if (var.initializer) return to_expression(var.initializer); else { // We cannot declare the variable yet, so have to fake it. uint32_t undef_id = ir.increase_bound_by(1); return emit_uninitialized_temporary_expression(get_variable_data_type_id(var), undef_id).expression; } } else if (var.deferred_declaration) { var.deferred_declaration = false; return variable_decl(var); } else if (flattened_structs.count(id)) { return load_flattened_struct(to_name(id), get(var.basetype)); } else { auto &dec = ir.meta[var.self].decoration; if (dec.builtin) return builtin_to_glsl(dec.builtin_type, var.storage); else return to_name(id); } } case TypeCombinedImageSampler: // This type should never be taken the expression of directly. // The intention is that texture sampling functions will extract the image and samplers // separately and take their expressions as needed. // GLSL does not use this type because OpSampledImage immediately creates a combined image sampler // expression ala sampler2D(texture, sampler). SPIRV_CROSS_THROW("Combined image samplers have no default expression representation."); case TypeAccessChain: // We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad. SPIRV_CROSS_THROW("Access chains have no default expression representation."); default: return to_name(id); } } SmallVector CompilerGLSL::get_composite_constant_ids(ConstantID const_id) { if (auto *constant = maybe_get(const_id)) { const auto &type = get(constant->constant_type); if (is_array(type) || type.basetype == SPIRType::Struct) return constant->subconstants; if (is_matrix(type)) return SmallVector(constant->m.id); if (is_vector(type)) return SmallVector(constant->m.c[0].id); SPIRV_CROSS_THROW("Unexpected scalar constant!"); } if (!const_composite_insert_ids.count(const_id)) SPIRV_CROSS_THROW("Unimplemented for this OpSpecConstantOp!"); return const_composite_insert_ids[const_id]; } void CompilerGLSL::fill_composite_constant(SPIRConstant &constant, TypeID type_id, const SmallVector &initializers) { auto &type = get(type_id); constant.specialization = true; if (is_array(type) || type.basetype == SPIRType::Struct) { constant.subconstants = initializers; } else if (is_matrix(type)) { constant.m.columns = type.columns; for (uint32_t i = 0; i < type.columns; ++i) { constant.m.id[i] = initializers[i]; constant.m.c[i].vecsize = type.vecsize; } } else if (is_vector(type)) { constant.m.c[0].vecsize = type.vecsize; for (uint32_t i = 0; i < type.vecsize; ++i) constant.m.c[0].id[i] = initializers[i]; } else SPIRV_CROSS_THROW("Unexpected scalar in SpecConstantOp CompositeInsert!"); } void CompilerGLSL::set_composite_constant(ConstantID const_id, TypeID type_id, const SmallVector &initializers) { if (maybe_get(const_id)) { const_composite_insert_ids[const_id] = initializers; return; } auto &constant = set(const_id, type_id); fill_composite_constant(constant, type_id, initializers); forwarded_temporaries.insert(const_id); } TypeID CompilerGLSL::get_composite_member_type(TypeID type_id, uint32_t member_idx) { auto &type = get(type_id); if (is_array(type)) return type.parent_type; if (type.basetype == SPIRType::Struct) return type.member_types[member_idx]; if (is_matrix(type)) return type.parent_type; if (is_vector(type)) return type.parent_type; SPIRV_CROSS_THROW("Shouldn't reach lower than vector handling OpSpecConstantOp CompositeInsert!"); } string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop) { auto &type = get(cop.basetype); bool binary = false; bool unary = false; string op; if (is_legacy() && is_unsigned_opcode(cop.opcode)) SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets."); // TODO: Find a clean way to reuse emit_instruction. switch (cop.opcode) { case OpSConvert: case OpUConvert: case OpFConvert: op = type_to_glsl_constructor(type); break; #define GLSL_BOP(opname, x) \ case Op##opname: \ binary = true; \ op = x; \ break #define GLSL_UOP(opname, x) \ case Op##opname: \ unary = true; \ op = x; \ break GLSL_UOP(SNegate, "-"); GLSL_UOP(Not, "~"); GLSL_BOP(IAdd, "+"); GLSL_BOP(ISub, "-"); GLSL_BOP(IMul, "*"); GLSL_BOP(SDiv, "/"); GLSL_BOP(UDiv, "/"); GLSL_BOP(UMod, "%"); GLSL_BOP(SMod, "%"); GLSL_BOP(ShiftRightLogical, ">>"); GLSL_BOP(ShiftRightArithmetic, ">>"); GLSL_BOP(ShiftLeftLogical, "<<"); GLSL_BOP(BitwiseOr, "|"); GLSL_BOP(BitwiseXor, "^"); GLSL_BOP(BitwiseAnd, "&"); GLSL_BOP(LogicalOr, "||"); GLSL_BOP(LogicalAnd, "&&"); GLSL_UOP(LogicalNot, "!"); GLSL_BOP(LogicalEqual, "=="); GLSL_BOP(LogicalNotEqual, "!="); GLSL_BOP(IEqual, "=="); GLSL_BOP(INotEqual, "!="); GLSL_BOP(ULessThan, "<"); GLSL_BOP(SLessThan, "<"); GLSL_BOP(ULessThanEqual, "<="); GLSL_BOP(SLessThanEqual, "<="); GLSL_BOP(UGreaterThan, ">"); GLSL_BOP(SGreaterThan, ">"); GLSL_BOP(UGreaterThanEqual, ">="); GLSL_BOP(SGreaterThanEqual, ">="); case OpSRem: { uint32_t op0 = cop.arguments[0]; uint32_t op1 = cop.arguments[1]; return join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(", to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")"); } case OpSelect: { if (cop.arguments.size() < 3) SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); // This one is pretty annoying. It's triggered from // uint(bool), int(bool) from spec constants. // In order to preserve its compile-time constness in Vulkan GLSL, // we need to reduce the OpSelect expression back to this simplified model. // If we cannot, fail. if (to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0])) { // Implement as a simple cast down below. } else { // Implement a ternary and pray the compiler understands it :) return to_ternary_expression(type, cop.arguments[0], cop.arguments[1], cop.arguments[2]); } break; } case OpVectorShuffle: { string expr = type_to_glsl_constructor(type); expr += "("; uint32_t left_components = expression_type(cop.arguments[0]).vecsize; string left_arg = to_enclosed_expression(cop.arguments[0]); string right_arg = to_enclosed_expression(cop.arguments[1]); for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++) { uint32_t index = cop.arguments[i]; if (index == 0xFFFFFFFF) { SPIRConstant c; c.constant_type = type.parent_type; assert(type.parent_type != ID(0)); expr += constant_expression(c); } else if (index >= left_components) { expr += right_arg + "." + "xyzw"[index - left_components]; } else { expr += left_arg + "." + "xyzw"[index]; } if (i + 1 < uint32_t(cop.arguments.size())) expr += ", "; } expr += ")"; return expr; } case OpCompositeExtract: { auto expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1), ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); return expr; } case OpCompositeInsert: { SmallVector new_init = get_composite_constant_ids(cop.arguments[1]); uint32_t idx; uint32_t target_id = cop.self; uint32_t target_type_id = cop.basetype; // We have to drill down to the part we want to modify, and create new // constants for each containing part. for (idx = 2; idx < cop.arguments.size() - 1; ++idx) { uint32_t new_const = ir.increase_bound_by(1); uint32_t old_const = new_init[cop.arguments[idx]]; new_init[cop.arguments[idx]] = new_const; set_composite_constant(target_id, target_type_id, new_init); new_init = get_composite_constant_ids(old_const); target_id = new_const; target_type_id = get_composite_member_type(target_type_id, cop.arguments[idx]); } // Now replace the initializer with the one from this instruction. new_init[cop.arguments[idx]] = cop.arguments[0]; set_composite_constant(target_id, target_type_id, new_init); SPIRConstant tmp_const(cop.basetype); fill_composite_constant(tmp_const, cop.basetype, const_composite_insert_ids[cop.self]); return constant_expression(tmp_const); } default: // Some opcodes are unimplemented here, these are currently not possible to test from glslang. SPIRV_CROSS_THROW("Unimplemented spec constant op."); } uint32_t bit_width = 0; if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert) bit_width = expression_type(cop.arguments[0]).width; SPIRType::BaseType input_type; bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode); switch (cop.opcode) { case OpIEqual: case OpINotEqual: input_type = to_signed_basetype(bit_width); break; case OpSLessThan: case OpSLessThanEqual: case OpSGreaterThan: case OpSGreaterThanEqual: case OpSMod: case OpSDiv: case OpShiftRightArithmetic: case OpSConvert: case OpSNegate: input_type = to_signed_basetype(bit_width); break; case OpULessThan: case OpULessThanEqual: case OpUGreaterThan: case OpUGreaterThanEqual: case OpUMod: case OpUDiv: case OpShiftRightLogical: case OpUConvert: input_type = to_unsigned_basetype(bit_width); break; default: input_type = type.basetype; break; } #undef GLSL_BOP #undef GLSL_UOP if (binary) { if (cop.arguments.size() < 2) SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); string cast_op0; string cast_op1; auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0], cop.arguments[1], skip_cast_if_equal_type); if (type.basetype != input_type && type.basetype != SPIRType::Boolean) { expected_type.basetype = input_type; auto expr = bitcast_glsl_op(type, expected_type); expr += '('; expr += join(cast_op0, " ", op, " ", cast_op1); expr += ')'; return expr; } else return join("(", cast_op0, " ", op, " ", cast_op1, ")"); } else if (unary) { if (cop.arguments.size() < 1) SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); // Auto-bitcast to result type as needed. // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants. return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")"); } else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert) { if (cop.arguments.size() < 1) SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); auto &arg_type = expression_type(cop.arguments[0]); if (arg_type.width < type.width && input_type != arg_type.basetype) { auto expected = arg_type; expected.basetype = input_type; return join(op, "(", bitcast_glsl(expected, cop.arguments[0]), ")"); } else return join(op, "(", to_expression(cop.arguments[0]), ")"); } else { if (cop.arguments.size() < 1) SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); return join(op, "(", to_expression(cop.arguments[0]), ")"); } } string CompilerGLSL::constant_expression(const SPIRConstant &c, bool inside_block_like_struct_scope, bool inside_struct_scope) { auto &type = get(c.constant_type); if (type_is_top_level_pointer(type)) { return backend.null_pointer_literal; } else if (!c.subconstants.empty()) { // Handles Arrays and structures. string res; // Only consider the decay if we are inside a struct scope where we are emitting a member with Offset decoration. // Outside a block-like struct declaration, we can always bind to a constant array with templated type. // Should look at ArrayStride here as well, but it's possible to declare a constant struct // with Offset = 0, using no ArrayStride on the enclosed array type. // A particular CTS test hits this scenario. bool array_type_decays = inside_block_like_struct_scope && type_is_top_level_array(type) && !backend.array_is_value_type_in_buffer_blocks; // Allow Metal to use the array template to make arrays a value type bool needs_trailing_tracket = false; if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct && !type_is_top_level_array(type)) { res = type_to_glsl_constructor(type) + "{ "; } else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type && type_is_top_level_array(type) && !array_type_decays) { const auto *p_type = &type; SPIRType tmp_type; if (inside_struct_scope && backend.boolean_in_struct_remapped_type != SPIRType::Boolean && type.basetype == SPIRType::Boolean) { tmp_type = type; tmp_type.basetype = backend.boolean_in_struct_remapped_type; p_type = &tmp_type; } res = type_to_glsl_constructor(*p_type) + "({ "; needs_trailing_tracket = true; } else if (backend.use_initializer_list) { res = "{ "; } else { res = type_to_glsl_constructor(type) + "("; } uint32_t subconstant_index = 0; for (auto &elem : c.subconstants) { if (auto *op = maybe_get(elem)) { res += constant_op_expression(*op); } else if (maybe_get(elem) != nullptr) { res += to_name(elem); } else { auto &subc = get(elem); if (subc.specialization && !expression_is_forwarded(elem)) res += to_name(elem); else { if (!type_is_top_level_array(type) && type.basetype == SPIRType::Struct) { // When we get down to emitting struct members, override the block-like information. // For constants, we can freely mix and match block-like state. inside_block_like_struct_scope = has_member_decoration(type.self, subconstant_index, DecorationOffset); } if (type.basetype == SPIRType::Struct) inside_struct_scope = true; res += constant_expression(subc, inside_block_like_struct_scope, inside_struct_scope); } } if (&elem != &c.subconstants.back()) res += ", "; subconstant_index++; } res += backend.use_initializer_list ? " }" : ")"; if (needs_trailing_tracket) res += ")"; return res; } else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0) { // Metal tessellation likes empty structs which are then constant expressions. if (backend.supports_empty_struct) return "{ }"; else if (backend.use_typed_initializer_list) return join(type_to_glsl(type), "{ 0 }"); else if (backend.use_initializer_list) return "{ 0 }"; else return join(type_to_glsl(type), "(0)"); } else if (c.columns() == 1) { auto res = constant_expression_vector(c, 0); if (inside_struct_scope && backend.boolean_in_struct_remapped_type != SPIRType::Boolean && type.basetype == SPIRType::Boolean) { SPIRType tmp_type = type; tmp_type.basetype = backend.boolean_in_struct_remapped_type; res = join(type_to_glsl(tmp_type), "(", res, ")"); } return res; } else { string res = type_to_glsl(type) + "("; for (uint32_t col = 0; col < c.columns(); col++) { if (c.specialization_constant_id(col) != 0) res += to_name(c.specialization_constant_id(col)); else res += constant_expression_vector(c, col); if (col + 1 < c.columns()) res += ", "; } res += ")"; if (inside_struct_scope && backend.boolean_in_struct_remapped_type != SPIRType::Boolean && type.basetype == SPIRType::Boolean) { SPIRType tmp_type = type; tmp_type.basetype = backend.boolean_in_struct_remapped_type; res = join(type_to_glsl(tmp_type), "(", res, ")"); } return res; } } #ifdef _MSC_VER // snprintf does not exist or is buggy on older MSVC versions, some of them // being used by MinGW. Use sprintf instead and disable corresponding warning. #pragma warning(push) #pragma warning(disable : 4996) #endif string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) { string res; float float_value = c.scalar_f16(col, row); // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots // of complicated workarounds, just value-cast to the half type always. if (std::isnan(float_value) || std::isinf(float_value)) { SPIRType type; type.basetype = SPIRType::Half; type.vecsize = 1; type.columns = 1; if (float_value == numeric_limits::infinity()) res = join(type_to_glsl(type), "(1.0 / 0.0)"); else if (float_value == -numeric_limits::infinity()) res = join(type_to_glsl(type), "(-1.0 / 0.0)"); else if (std::isnan(float_value)) res = join(type_to_glsl(type), "(0.0 / 0.0)"); else SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); } else { SPIRType type; type.basetype = SPIRType::Half; type.vecsize = 1; type.columns = 1; res = join(type_to_glsl(type), "(", convert_to_string(float_value, current_locale_radix_character), ")"); } return res; } string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) { string res; float float_value = c.scalar_f32(col, row); if (std::isnan(float_value) || std::isinf(float_value)) { // Use special representation. if (!is_legacy()) { SPIRType out_type; SPIRType in_type; out_type.basetype = SPIRType::Float; in_type.basetype = SPIRType::UInt; out_type.vecsize = 1; in_type.vecsize = 1; out_type.width = 32; in_type.width = 32; char print_buffer[32]; #ifdef _WIN32 sprintf(print_buffer, "0x%xu", c.scalar(col, row)); #else snprintf(print_buffer, sizeof(print_buffer), "0x%xu", c.scalar(col, row)); #endif const char *comment = "inf"; if (float_value == -numeric_limits::infinity()) comment = "-inf"; else if (std::isnan(float_value)) comment = "nan"; res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, " /* ", comment, " */)"); } else { if (float_value == numeric_limits::infinity()) { if (backend.float_literal_suffix) res = "(1.0f / 0.0f)"; else res = "(1.0 / 0.0)"; } else if (float_value == -numeric_limits::infinity()) { if (backend.float_literal_suffix) res = "(-1.0f / 0.0f)"; else res = "(-1.0 / 0.0)"; } else if (std::isnan(float_value)) { if (backend.float_literal_suffix) res = "(0.0f / 0.0f)"; else res = "(0.0 / 0.0)"; } else SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); } } else { res = convert_to_string(float_value, current_locale_radix_character); if (backend.float_literal_suffix) res += "f"; } return res; } std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) { string res; double double_value = c.scalar_f64(col, row); if (std::isnan(double_value) || std::isinf(double_value)) { // Use special representation. if (!is_legacy()) { SPIRType out_type; SPIRType in_type; out_type.basetype = SPIRType::Double; in_type.basetype = SPIRType::UInt64; out_type.vecsize = 1; in_type.vecsize = 1; out_type.width = 64; in_type.width = 64; uint64_t u64_value = c.scalar_u64(col, row); if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310. SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310."); require_extension_internal("GL_ARB_gpu_shader_int64"); char print_buffer[64]; #ifdef _WIN32 sprintf(print_buffer, "0x%llx%s", static_cast(u64_value), backend.long_long_literal_suffix ? "ull" : "ul"); #else snprintf(print_buffer, sizeof(print_buffer), "0x%llx%s", static_cast(u64_value), backend.long_long_literal_suffix ? "ull" : "ul"); #endif const char *comment = "inf"; if (double_value == -numeric_limits::infinity()) comment = "-inf"; else if (std::isnan(double_value)) comment = "nan"; res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, " /* ", comment, " */)"); } else { if (options.es) SPIRV_CROSS_THROW("FP64 not supported in ES profile."); if (options.version < 400) require_extension_internal("GL_ARB_gpu_shader_fp64"); if (double_value == numeric_limits::infinity()) { if (backend.double_literal_suffix) res = "(1.0lf / 0.0lf)"; else res = "(1.0 / 0.0)"; } else if (double_value == -numeric_limits::infinity()) { if (backend.double_literal_suffix) res = "(-1.0lf / 0.0lf)"; else res = "(-1.0 / 0.0)"; } else if (std::isnan(double_value)) { if (backend.double_literal_suffix) res = "(0.0lf / 0.0lf)"; else res = "(0.0 / 0.0)"; } else SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); } } else { res = convert_to_string(double_value, current_locale_radix_character); if (backend.double_literal_suffix) res += "lf"; } return res; } #ifdef _MSC_VER #pragma warning(pop) #endif string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector) { auto type = get(c.constant_type); type.columns = 1; auto scalar_type = type; scalar_type.vecsize = 1; string res; bool splat = backend.use_constructor_splatting && c.vector_size() > 1; bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1; if (!type_is_floating_point(type)) { // Cannot swizzle literal integers as a special case. swizzle_splat = false; } if (splat || swizzle_splat) { // Cannot use constant splatting if we have specialization constants somewhere in the vector. for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.specialization_constant_id(vector, i) != 0) { splat = false; swizzle_splat = false; break; } } } if (splat || swizzle_splat) { if (type.width == 64) { uint64_t ident = c.scalar_u64(vector, 0); for (uint32_t i = 1; i < c.vector_size(); i++) { if (ident != c.scalar_u64(vector, i)) { splat = false; swizzle_splat = false; break; } } } else { uint32_t ident = c.scalar(vector, 0); for (uint32_t i = 1; i < c.vector_size(); i++) { if (ident != c.scalar(vector, i)) { splat = false; swizzle_splat = false; } } } } if (c.vector_size() > 1 && !swizzle_splat) res += type_to_glsl(type) + "("; switch (type.basetype) { case SPIRType::Half: if (splat || swizzle_splat) { res += convert_half_to_string(c, vector, 0); if (swizzle_splat) res = remap_swizzle(get(c.constant_type), 1, res); } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_expression(c.specialization_constant_id(vector, i)); else res += convert_half_to_string(c, vector, i); if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::Float: if (splat || swizzle_splat) { res += convert_float_to_string(c, vector, 0); if (swizzle_splat) res = remap_swizzle(get(c.constant_type), 1, res); } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_expression(c.specialization_constant_id(vector, i)); else res += convert_float_to_string(c, vector, i); if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::Double: if (splat || swizzle_splat) { res += convert_double_to_string(c, vector, 0); if (swizzle_splat) res = remap_swizzle(get(c.constant_type), 1, res); } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_expression(c.specialization_constant_id(vector, i)); else res += convert_double_to_string(c, vector, i); if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::Int64: { auto tmp = type; tmp.vecsize = 1; tmp.columns = 1; auto int64_type = type_to_glsl(tmp); if (splat) { res += convert_to_string(c.scalar_i64(vector, 0), int64_type, backend.long_long_literal_suffix); } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_expression(c.specialization_constant_id(vector, i)); else res += convert_to_string(c.scalar_i64(vector, i), int64_type, backend.long_long_literal_suffix); if (i + 1 < c.vector_size()) res += ", "; } } break; } case SPIRType::UInt64: if (splat) { res += convert_to_string(c.scalar_u64(vector, 0)); if (backend.long_long_literal_suffix) res += "ull"; else res += "ul"; } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_expression(c.specialization_constant_id(vector, i)); else { res += convert_to_string(c.scalar_u64(vector, i)); if (backend.long_long_literal_suffix) res += "ull"; else res += "ul"; } if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::UInt: if (splat) { res += convert_to_string(c.scalar(vector, 0)); if (is_legacy()) { // Fake unsigned constant literals with signed ones if possible. // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. if (c.scalar_i32(vector, 0) < 0) SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative."); } else if (backend.uint32_t_literal_suffix) res += "u"; } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_expression(c.specialization_constant_id(vector, i)); else { res += convert_to_string(c.scalar(vector, i)); if (is_legacy()) { // Fake unsigned constant literals with signed ones if possible. // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. if (c.scalar_i32(vector, i) < 0) SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made " "the literal negative."); } else if (backend.uint32_t_literal_suffix) res += "u"; } if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::Int: if (splat) res += convert_to_string(c.scalar_i32(vector, 0)); else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_expression(c.specialization_constant_id(vector, i)); else res += convert_to_string(c.scalar_i32(vector, i)); if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::UShort: if (splat) { res += convert_to_string(c.scalar(vector, 0)); } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_expression(c.specialization_constant_id(vector, i)); else { if (*backend.uint16_t_literal_suffix) { res += convert_to_string(c.scalar_u16(vector, i)); res += backend.uint16_t_literal_suffix; } else { // If backend doesn't have a literal suffix, we need to value cast. res += type_to_glsl(scalar_type); res += "("; res += convert_to_string(c.scalar_u16(vector, i)); res += ")"; } } if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::Short: if (splat) { res += convert_to_string(c.scalar_i16(vector, 0)); } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_expression(c.specialization_constant_id(vector, i)); else { if (*backend.int16_t_literal_suffix) { res += convert_to_string(c.scalar_i16(vector, i)); res += backend.int16_t_literal_suffix; } else { // If backend doesn't have a literal suffix, we need to value cast. res += type_to_glsl(scalar_type); res += "("; res += convert_to_string(c.scalar_i16(vector, i)); res += ")"; } } if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::UByte: if (splat) { res += convert_to_string(c.scalar_u8(vector, 0)); } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_expression(c.specialization_constant_id(vector, i)); else { res += type_to_glsl(scalar_type); res += "("; res += convert_to_string(c.scalar_u8(vector, i)); res += ")"; } if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::SByte: if (splat) { res += convert_to_string(c.scalar_i8(vector, 0)); } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_expression(c.specialization_constant_id(vector, i)); else { res += type_to_glsl(scalar_type); res += "("; res += convert_to_string(c.scalar_i8(vector, i)); res += ")"; } if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::Boolean: if (splat) res += c.scalar(vector, 0) ? "true" : "false"; else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_expression(c.specialization_constant_id(vector, i)); else res += c.scalar(vector, i) ? "true" : "false"; if (i + 1 < c.vector_size()) res += ", "; } } break; default: SPIRV_CROSS_THROW("Invalid constant expression basetype."); } if (c.vector_size() > 1 && !swizzle_splat) res += ")"; return res; } SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id) { forced_temporaries.insert(id); emit_uninitialized_temporary(type, id); return set(id, to_name(id), type, true); } void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id) { // If we're declaring temporaries inside continue blocks, // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(result_id)) { auto &header = get(current_continue_block->loop_dominator); if (find_if(begin(header.declare_temporary), end(header.declare_temporary), [result_type, result_id](const pair &tmp) { return tmp.first == result_type && tmp.second == result_id; }) == end(header.declare_temporary)) { header.declare_temporary.emplace_back(result_type, result_id); hoisted_temporaries.insert(result_id); force_recompile(); } } else if (hoisted_temporaries.count(result_id) == 0) { auto &type = get(result_type); auto &flags = get_decoration_bitset(result_id); // The result_id has not been made into an expression yet, so use flags interface. add_local_variable_name(result_id); string initializer; if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) initializer = join(" = ", to_zero_initialized_expression(result_type)); statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), initializer, ";"); } } string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id) { auto &type = get(result_type); // If we're declaring temporaries inside continue blocks, // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(result_id)) { auto &header = get(current_continue_block->loop_dominator); if (find_if(begin(header.declare_temporary), end(header.declare_temporary), [result_type, result_id](const pair &tmp) { return tmp.first == result_type && tmp.second == result_id; }) == end(header.declare_temporary)) { header.declare_temporary.emplace_back(result_type, result_id); hoisted_temporaries.insert(result_id); force_recompile_guarantee_forward_progress(); } return join(to_name(result_id), " = "); } else if (hoisted_temporaries.count(result_id)) { // The temporary has already been declared earlier, so just "declare" the temporary by writing to it. return join(to_name(result_id), " = "); } else { // The result_id has not been made into an expression yet, so use flags interface. add_local_variable_name(result_id); auto &flags = get_decoration_bitset(result_id); return join(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = "); } } bool CompilerGLSL::expression_is_forwarded(uint32_t id) const { return forwarded_temporaries.count(id) != 0; } bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const { return suppressed_usage_tracking.count(id) != 0; } bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const { auto *expr = maybe_get(id); if (!expr) return false; // If we're emitting code at a deeper loop level than when we emitted the expression, // we're probably reading the same expression over and over. return current_loop_level > expr->emitted_loop_level; } SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding, bool suppress_usage_tracking) { if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries))) { // Just forward it without temporary. // If the forward is trivial, we do not force flushing to temporary for this expression. forwarded_temporaries.insert(result_id); if (suppress_usage_tracking) suppressed_usage_tracking.insert(result_id); return set(result_id, rhs, result_type, true); } else { // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are). statement(declare_temporary(result_type, result_id), rhs, ";"); return set(result_id, to_name(result_id), result_type, true); } } void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) { bool forward = should_forward(op0); emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward); inherit_expression_dependencies(result_id, op0); } void CompilerGLSL::emit_unary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) { auto &type = get(result_type); bool forward = should_forward(op0); emit_op(result_type, result_id, join(type_to_glsl(type), "(", op, to_enclosed_unpacked_expression(op0), ")"), forward); inherit_expression_dependencies(result_id, op0); } void CompilerGLSL::emit_mesh_tasks(SPIRBlock &block) { statement("EmitMeshTasksEXT(", to_unpacked_expression(block.mesh.groups[0]), ", ", to_unpacked_expression(block.mesh.groups[1]), ", ", to_unpacked_expression(block.mesh.groups[2]), ");"); } void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) { // Various FP arithmetic opcodes such as add, sub, mul will hit this. bool force_temporary_precise = backend.support_precise_qualifier && has_decoration(result_id, DecorationNoContraction) && type_is_floating_point(get(result_type)); bool forward = should_forward(op0) && should_forward(op1) && !force_temporary_precise; emit_op(result_type, result_id, join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); } void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op) { auto &type = get(result_type); auto expr = type_to_glsl_constructor(type); expr += '('; for (uint32_t i = 0; i < type.vecsize; i++) { // Make sure to call to_expression multiple times to ensure // that these expressions are properly flushed to temporaries if needed. expr += op; expr += to_extract_component_expression(operand, i); if (i + 1 < type.vecsize) expr += ", "; } expr += ')'; emit_op(result_type, result_id, expr, should_forward(operand)); inherit_expression_dependencies(result_id, operand); } void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, bool negate, SPIRType::BaseType expected_type) { auto &type0 = expression_type(op0); auto &type1 = expression_type(op1); SPIRType target_type0 = type0; SPIRType target_type1 = type1; target_type0.basetype = expected_type; target_type1.basetype = expected_type; target_type0.vecsize = 1; target_type1.vecsize = 1; auto &type = get(result_type); auto expr = type_to_glsl_constructor(type); expr += '('; for (uint32_t i = 0; i < type.vecsize; i++) { // Make sure to call to_expression multiple times to ensure // that these expressions are properly flushed to temporaries if needed. if (negate) expr += "!("; if (expected_type != SPIRType::Unknown && type0.basetype != expected_type) expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i)); else expr += to_extract_component_expression(op0, i); expr += ' '; expr += op; expr += ' '; if (expected_type != SPIRType::Unknown && type1.basetype != expected_type) expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i)); else expr += to_extract_component_expression(op1, i); if (negate) expr += ")"; if (i + 1 < type.vecsize) expr += ", "; } expr += ')'; emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); } SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type, uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type) { auto &type0 = expression_type(op0); auto &type1 = expression_type(op1); // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs. // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected // since equality test is exactly the same. bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type); // Create a fake type so we can bitcast to it. // We only deal with regular arithmetic types here like int, uints and so on. SPIRType expected_type; expected_type.basetype = input_type; expected_type.vecsize = type0.vecsize; expected_type.columns = type0.columns; expected_type.width = type0.width; if (cast) { cast_op0 = bitcast_glsl(expected_type, op0); cast_op1 = bitcast_glsl(expected_type, op1); } else { // If we don't cast, our actual input type is that of the first (or second) argument. cast_op0 = to_enclosed_unpacked_expression(op0); cast_op1 = to_enclosed_unpacked_expression(op1); input_type = type0.basetype; } return expected_type; } bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0) { // Some bitcasts may require complex casting sequences, and are implemented here. // Otherwise a simply unary function will do with bitcast_glsl_op. auto &output_type = get(result_type); auto &input_type = expression_type(op0); string expr; if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == 1) expr = join("unpackFloat2x16(floatBitsToUint(", to_unpacked_expression(op0), "))"); else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half && input_type.vecsize == 2) expr = join("uintBitsToFloat(packFloat2x16(", to_unpacked_expression(op0), "))"); else return false; emit_op(result_type, id, expr, should_forward(op0)); return true; } void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type, bool implicit_integer_promotion) { string cast_op0, cast_op1; auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type); auto &out_type = get(result_type); // We might have casted away from the result type, so bitcast again. // For example, arithmetic right shift with uint inputs. // Special case boolean outputs since relational opcodes output booleans instead of int/uint. auto bitop = join(cast_op0, " ", op, " ", cast_op1); string expr; if (implicit_integer_promotion) { // Simple value cast. expr = join(type_to_glsl(out_type), '(', bitop, ')'); } else if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) { expected_type.basetype = input_type; expr = join(bitcast_glsl_op(out_type, expected_type), '(', bitop, ')'); } else { expr = std::move(bitop); } emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); } void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) { bool forward = should_forward(op0); emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward); inherit_expression_dependencies(result_id, op0); } void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) { // Opaque types (e.g. OpTypeSampledImage) must always be forwarded in GLSL const auto &type = get_type(result_type); bool must_forward = type_is_opaque_value(type); bool forward = must_forward || (should_forward(op0) && should_forward(op1)); emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"), forward); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); } void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) { auto &type = get(result_type); if (type_is_floating_point(type)) { if (!options.vulkan_semantics) SPIRV_CROSS_THROW("Floating point atomics requires Vulkan semantics."); if (options.es) SPIRV_CROSS_THROW("Floating point atomics requires desktop GLSL."); require_extension_internal("GL_EXT_shader_atomic_float"); } forced_temporaries.insert(result_id); emit_op(result_type, result_id, join(op, "(", to_non_uniform_aware_expression(op0), ", ", to_unpacked_expression(op1), ")"), false); flush_all_atomic_capable_variables(); } void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, const char *op) { forced_temporaries.insert(result_id); emit_op(result_type, result_id, join(op, "(", to_non_uniform_aware_expression(op0), ", ", to_unpacked_expression(op1), ", ", to_unpacked_expression(op2), ")"), false); flush_all_atomic_capable_variables(); } void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op, SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type) { auto &out_type = get(result_type); auto &expr_type = expression_type(op0); auto expected_type = out_type; // Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends. expected_type.basetype = input_type; expected_type.width = expr_type.width; string cast_op; if (expr_type.basetype != input_type) { if (expr_type.basetype == SPIRType::Boolean) cast_op = join(type_to_glsl(expected_type), "(", to_unpacked_expression(op0), ")"); else cast_op = bitcast_glsl(expected_type, op0); } else cast_op = to_unpacked_expression(op0); string expr; if (out_type.basetype != expected_result_type) { expected_type.basetype = expected_result_type; expected_type.width = out_type.width; if (out_type.basetype == SPIRType::Boolean) expr = type_to_glsl(out_type); else expr = bitcast_glsl_op(out_type, expected_type); expr += '('; expr += join(op, "(", cast_op, ")"); expr += ')'; } else { expr += join(op, "(", cast_op, ")"); } emit_op(result_type, result_id, expr, should_forward(op0)); inherit_expression_dependencies(result_id, op0); } // Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs // and different vector sizes all at once. Need a special purpose method here. void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, const char *op, SPIRType::BaseType expected_result_type, SPIRType::BaseType input_type0, SPIRType::BaseType input_type1, SPIRType::BaseType input_type2) { auto &out_type = get(result_type); auto expected_type = out_type; expected_type.basetype = input_type0; string cast_op0 = expression_type(op0).basetype != input_type0 ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0); auto op1_expr = to_unpacked_expression(op1); auto op2_expr = to_unpacked_expression(op2); // Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit. expected_type.basetype = input_type1; expected_type.vecsize = 1; string cast_op1 = expression_type(op1).basetype != input_type1 ? join(type_to_glsl_constructor(expected_type), "(", op1_expr, ")") : op1_expr; expected_type.basetype = input_type2; expected_type.vecsize = 1; string cast_op2 = expression_type(op2).basetype != input_type2 ? join(type_to_glsl_constructor(expected_type), "(", op2_expr, ")") : op2_expr; string expr; if (out_type.basetype != expected_result_type) { expected_type.vecsize = out_type.vecsize; expected_type.basetype = expected_result_type; expr = bitcast_glsl_op(out_type, expected_type); expr += '('; expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); expr += ')'; } else { expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); } emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2)); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); inherit_expression_dependencies(result_id, op2); } void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, const char *op, SPIRType::BaseType input_type) { auto &out_type = get(result_type); auto expected_type = out_type; expected_type.basetype = input_type; string cast_op0 = expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0); string cast_op1 = expression_type(op1).basetype != input_type ? bitcast_glsl(expected_type, op1) : to_unpacked_expression(op1); string cast_op2 = expression_type(op2).basetype != input_type ? bitcast_glsl(expected_type, op2) : to_unpacked_expression(op2); string expr; if (out_type.basetype != input_type) { expr = bitcast_glsl_op(out_type, expected_type); expr += '('; expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); expr += ')'; } else { expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); } emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2)); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); inherit_expression_dependencies(result_id, op2); } void CompilerGLSL::emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, SPIRType::BaseType input_type) { // Special purpose method for implementing clustered subgroup opcodes. // Main difference is that op1 does not participate in any casting, it needs to be a literal. auto &out_type = get(result_type); auto expected_type = out_type; expected_type.basetype = input_type; string cast_op0 = expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0); string expr; if (out_type.basetype != input_type) { expr = bitcast_glsl_op(out_type, expected_type); expr += '('; expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")"); expr += ')'; } else { expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")"); } emit_op(result_type, result_id, expr, should_forward(op0)); inherit_expression_dependencies(result_id, op0); } void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type) { string cast_op0, cast_op1; auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type); auto &out_type = get(result_type); // Special case boolean outputs since relational opcodes output booleans instead of int/uint. string expr; if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) { expected_type.basetype = input_type; expr = bitcast_glsl_op(out_type, expected_type); expr += '('; expr += join(op, "(", cast_op0, ", ", cast_op1, ")"); expr += ')'; } else { expr += join(op, "(", cast_op0, ", ", cast_op1, ")"); } emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); } void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, const char *op) { bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2); emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ", to_unpacked_expression(op2), ")"), forward); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); inherit_expression_dependencies(result_id, op2); } void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, uint32_t op3, const char *op) { bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3); emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ", to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"), forward); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); inherit_expression_dependencies(result_id, op2); inherit_expression_dependencies(result_id, op3); } void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, uint32_t op3, const char *op, SPIRType::BaseType offset_count_type) { // Only need to cast offset/count arguments. Types of base/insert must be same as result type, // and bitfieldInsert is sign invariant. bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3); auto op0_expr = to_unpacked_expression(op0); auto op1_expr = to_unpacked_expression(op1); auto op2_expr = to_unpacked_expression(op2); auto op3_expr = to_unpacked_expression(op3); SPIRType target_type; target_type.vecsize = 1; target_type.basetype = offset_count_type; if (expression_type(op2).basetype != offset_count_type) { // Value-cast here. Input might be 16-bit. GLSL requires int. op2_expr = join(type_to_glsl_constructor(target_type), "(", op2_expr, ")"); } if (expression_type(op3).basetype != offset_count_type) { // Value-cast here. Input might be 16-bit. GLSL requires int. op3_expr = join(type_to_glsl_constructor(target_type), "(", op3_expr, ")"); } emit_op(result_type, result_id, join(op, "(", op0_expr, ", ", op1_expr, ", ", op2_expr, ", ", op3_expr, ")"), forward); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); inherit_expression_dependencies(result_id, op2); inherit_expression_dependencies(result_id, op3); } string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t tex) { const char *type; switch (imgtype.image.dim) { case spv::Dim1D: // Force 2D path for ES. if (options.es) type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D"; else type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D"; break; case spv::Dim2D: type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D"; break; case spv::Dim3D: type = "3D"; break; case spv::DimCube: type = "Cube"; break; case spv::DimRect: type = "2DRect"; break; case spv::DimBuffer: type = "Buffer"; break; case spv::DimSubpassData: type = "2D"; break; default: type = ""; break; } // In legacy GLSL, an extension is required for textureLod in the fragment // shader or textureGrad anywhere. bool legacy_lod_ext = false; auto &execution = get_entry_point(); if (op == "textureGrad" || op == "textureProjGrad" || ((op == "textureLod" || op == "textureProjLod") && execution.model != ExecutionModelVertex)) { if (is_legacy_es()) { legacy_lod_ext = true; require_extension_internal("GL_EXT_shader_texture_lod"); } else if (is_legacy_desktop()) require_extension_internal("GL_ARB_shader_texture_lod"); } if (op == "textureLodOffset" || op == "textureProjLodOffset") { if (is_legacy_es()) SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES")); require_extension_internal("GL_EXT_gpu_shader4"); } // GLES has very limited support for shadow samplers. // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers, // everything else can just throw bool is_comparison = is_depth_image(imgtype, tex); if (is_comparison && is_legacy_es()) { if (op == "texture" || op == "textureProj") require_extension_internal("GL_EXT_shadow_samplers"); else SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES")); if (imgtype.image.dim == spv::DimCube) return "shadowCubeNV"; } if (op == "textureSize") { if (is_legacy_es()) SPIRV_CROSS_THROW("textureSize not supported in legacy ES"); if (is_comparison) SPIRV_CROSS_THROW("textureSize not supported on shadow sampler in legacy GLSL"); require_extension_internal("GL_EXT_gpu_shader4"); } if (op == "texelFetch" && is_legacy_es()) SPIRV_CROSS_THROW("texelFetch not supported in legacy ES"); bool is_es_and_depth = is_legacy_es() && is_comparison; std::string type_prefix = is_comparison ? "shadow" : "texture"; if (op == "texture") return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type); else if (op == "textureLod") return join(type_prefix, type, legacy_lod_ext ? "LodEXT" : "Lod"); else if (op == "textureProj") return join(type_prefix, type, is_es_and_depth ? "ProjEXT" : "Proj"); else if (op == "textureGrad") return join(type_prefix, type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad"); else if (op == "textureProjLod") return join(type_prefix, type, legacy_lod_ext ? "ProjLodEXT" : "ProjLod"); else if (op == "textureLodOffset") return join(type_prefix, type, "LodOffset"); else if (op == "textureProjGrad") return join(type_prefix, type, is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad"); else if (op == "textureProjLodOffset") return join(type_prefix, type, "ProjLodOffset"); else if (op == "textureSize") return join("textureSize", type); else if (op == "texelFetch") return join("texelFetch", type); else { SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op)); } } bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp) { auto *cleft = maybe_get(left); auto *cright = maybe_get(right); auto &lerptype = expression_type(lerp); // If our targets aren't constants, we cannot use construction. if (!cleft || !cright) return false; // If our targets are spec constants, we cannot use construction. if (cleft->specialization || cright->specialization) return false; auto &value_type = get(cleft->constant_type); if (lerptype.basetype != SPIRType::Boolean) return false; if (value_type.basetype == SPIRType::Struct || is_array(value_type)) return false; if (!backend.use_constructor_splatting && value_type.vecsize != lerptype.vecsize) return false; // Only valid way in SPIR-V 1.4 to use matrices in select is a scalar select. // matrix(scalar) constructor fills in diagnonals, so gets messy very quickly. // Just avoid this case. if (value_type.columns > 1) return false; // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor. bool ret = true; for (uint32_t row = 0; ret && row < value_type.vecsize; row++) { switch (type.basetype) { case SPIRType::Short: case SPIRType::UShort: ret = cleft->scalar_u16(0, row) == 0 && cright->scalar_u16(0, row) == 1; break; case SPIRType::Int: case SPIRType::UInt: ret = cleft->scalar(0, row) == 0 && cright->scalar(0, row) == 1; break; case SPIRType::Half: ret = cleft->scalar_f16(0, row) == 0.0f && cright->scalar_f16(0, row) == 1.0f; break; case SPIRType::Float: ret = cleft->scalar_f32(0, row) == 0.0f && cright->scalar_f32(0, row) == 1.0f; break; case SPIRType::Double: ret = cleft->scalar_f64(0, row) == 0.0 && cright->scalar_f64(0, row) == 1.0; break; case SPIRType::Int64: case SPIRType::UInt64: ret = cleft->scalar_u64(0, row) == 0 && cright->scalar_u64(0, row) == 1; break; default: ret = false; break; } } if (ret) op = type_to_glsl_constructor(type); return ret; } string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value, uint32_t false_value) { string expr; auto &lerptype = expression_type(select); if (lerptype.vecsize == 1) expr = join(to_enclosed_expression(select), " ? ", to_enclosed_pointer_expression(true_value), " : ", to_enclosed_pointer_expression(false_value)); else { auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(expression, i); }; expr = type_to_glsl_constructor(restype); expr += "("; for (uint32_t i = 0; i < restype.vecsize; i++) { expr += swiz(select, i); expr += " ? "; expr += swiz(true_value, i); expr += " : "; expr += swiz(false_value, i); if (i + 1 < restype.vecsize) expr += ", "; } expr += ")"; } return expr; } void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp) { auto &lerptype = expression_type(lerp); auto &restype = get(result_type); // If this results in a variable pointer, assume it may be written through. if (restype.pointer) { register_write(left); register_write(right); } string mix_op; bool has_boolean_mix = *backend.boolean_mix_function && ((options.es && options.version >= 310) || (!options.es && options.version >= 450)); bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp); // Cannot use boolean mix when the lerp argument is just one boolean, // fall back to regular trinary statements. if (lerptype.vecsize == 1) has_boolean_mix = false; // If we can reduce the mix to a simple cast, do so. // This helps for cases like int(bool), uint(bool) which is implemented with // OpSelect bool 1 0. if (trivial_mix) { emit_unary_func_op(result_type, id, lerp, mix_op.c_str()); } else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean) { // Boolean mix not supported on desktop without extension. // Was added in OpenGL 4.5 with ES 3.1 compat. // // Could use GL_EXT_shader_integer_mix on desktop at least, // but Apple doesn't support it. :( // Just implement it as ternary expressions. auto expr = to_ternary_expression(get(result_type), lerp, right, left); emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp)); inherit_expression_dependencies(id, left); inherit_expression_dependencies(id, right); inherit_expression_dependencies(id, lerp); } else if (lerptype.basetype == SPIRType::Boolean) emit_trinary_func_op(result_type, id, left, right, lerp, backend.boolean_mix_function); else emit_trinary_func_op(result_type, id, left, right, lerp, "mix"); } string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id) { // Keep track of the array indices we have used to load the image. // We'll need to use the same array index into the combined image sampler array. auto image_expr = to_non_uniform_aware_expression(image_id); string array_expr; auto array_index = image_expr.find_first_of('['); if (array_index != string::npos) array_expr = image_expr.substr(array_index, string::npos); auto &args = current_function->arguments; // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect // all possible combinations into new sampler2D uniforms. auto *image = maybe_get_backing_variable(image_id); auto *samp = maybe_get_backing_variable(samp_id); if (image) image_id = image->self; if (samp) samp_id = samp->self; auto image_itr = find_if(begin(args), end(args), [image_id](const SPIRFunction::Parameter ¶m) { return image_id == param.id; }); auto sampler_itr = find_if(begin(args), end(args), [samp_id](const SPIRFunction::Parameter ¶m) { return samp_id == param.id; }); if (image_itr != end(args) || sampler_itr != end(args)) { // If any parameter originates from a parameter, we will find it in our argument list. bool global_image = image_itr == end(args); bool global_sampler = sampler_itr == end(args); VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(args))); VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(args))); auto &combined = current_function->combined_parameters; auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) { return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid && p.sampler_id == sid; }); if (itr != end(combined)) return to_expression(itr->id) + array_expr; else { SPIRV_CROSS_THROW("Cannot find mapping for combined sampler parameter, was " "build_combined_image_samplers() used " "before compile() was called?"); } } else { // For global sampler2D, look directly at the global remapping table. auto &mapping = combined_image_samplers; auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) { return combined.image_id == image_id && combined.sampler_id == samp_id; }); if (itr != end(combined_image_samplers)) return to_expression(itr->combined_id) + array_expr; else { SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used " "before compile() was called?"); } } } bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op, const uint32_t *ops) { switch (op) { case OpGroupNonUniformElect: case OpGroupNonUniformBallot: case OpGroupNonUniformBallotFindLSB: case OpGroupNonUniformBallotFindMSB: case OpGroupNonUniformBroadcast: case OpGroupNonUniformBroadcastFirst: case OpGroupNonUniformAll: case OpGroupNonUniformAny: case OpGroupNonUniformAllEqual: case OpControlBarrier: case OpMemoryBarrier: case OpGroupNonUniformBallotBitCount: case OpGroupNonUniformBallotBitExtract: case OpGroupNonUniformInverseBallot: return true; case OpGroupNonUniformIAdd: case OpGroupNonUniformFAdd: case OpGroupNonUniformIMul: case OpGroupNonUniformFMul: { const GroupOperation operation = static_cast(ops[3]); if (operation == GroupOperationReduce || operation == GroupOperationInclusiveScan || operation == GroupOperationExclusiveScan) { return true; } else { return false; } } default: return false; } } void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) { if (options.vulkan_semantics && combined_image_samplers.empty()) { emit_binary_func_op(result_type, result_id, image_id, samp_id, type_to_glsl(get(result_type), result_id).c_str()); } else { // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types. emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true); } // Make sure to suppress usage tracking and any expression invalidation. // It is illegal to create temporaries of opaque types. forwarded_temporaries.erase(result_id); } static inline bool image_opcode_is_sample_no_dref(Op op) { switch (op) { case OpImageSampleExplicitLod: case OpImageSampleImplicitLod: case OpImageSampleProjExplicitLod: case OpImageSampleProjImplicitLod: case OpImageFetch: case OpImageRead: case OpImageSparseSampleExplicitLod: case OpImageSparseSampleImplicitLod: case OpImageSparseSampleProjExplicitLod: case OpImageSparseSampleProjImplicitLod: case OpImageSparseFetch: case OpImageSparseRead: return true; default: return false; } } void CompilerGLSL::emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id, uint32_t &texel_id) { // Need to allocate two temporaries. if (options.es) SPIRV_CROSS_THROW("Sparse texture feedback is not supported on ESSL."); require_extension_internal("GL_ARB_sparse_texture2"); auto &temps = extra_sub_expressions[id]; if (temps == 0) temps = ir.increase_bound_by(2); feedback_id = temps + 0; texel_id = temps + 1; auto &return_type = get(result_type_id); if (return_type.basetype != SPIRType::Struct || return_type.member_types.size() != 2) SPIRV_CROSS_THROW("Invalid return type for sparse feedback."); emit_uninitialized_temporary(return_type.member_types[0], feedback_id); emit_uninitialized_temporary(return_type.member_types[1], texel_id); } uint32_t CompilerGLSL::get_sparse_feedback_texel_id(uint32_t id) const { auto itr = extra_sub_expressions.find(id); if (itr == extra_sub_expressions.end()) return 0; else return itr->second + 1; } void CompilerGLSL::emit_texture_op(const Instruction &i, bool sparse) { auto *ops = stream(i); auto op = static_cast(i.op); SmallVector inherited_expressions; uint32_t result_type_id = ops[0]; uint32_t id = ops[1]; auto &return_type = get(result_type_id); uint32_t sparse_code_id = 0; uint32_t sparse_texel_id = 0; if (sparse) emit_sparse_feedback_temporaries(result_type_id, id, sparse_code_id, sparse_texel_id); bool forward = false; string expr = to_texture_op(i, sparse, &forward, inherited_expressions); if (sparse) { statement(to_expression(sparse_code_id), " = ", expr, ";"); expr = join(type_to_glsl(return_type), "(", to_expression(sparse_code_id), ", ", to_expression(sparse_texel_id), ")"); forward = true; inherited_expressions.clear(); } emit_op(result_type_id, id, expr, forward); for (auto &inherit : inherited_expressions) inherit_expression_dependencies(id, inherit); // Do not register sparse ops as control dependent as they are always lowered to a temporary. switch (op) { case OpImageSampleDrefImplicitLod: case OpImageSampleImplicitLod: case OpImageSampleProjImplicitLod: case OpImageSampleProjDrefImplicitLod: register_control_dependent_expression(id); break; default: break; } } std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool *forward, SmallVector &inherited_expressions) { auto *ops = stream(i); auto op = static_cast(i.op); uint32_t length = i.length; uint32_t result_type_id = ops[0]; VariableID img = ops[2]; uint32_t coord = ops[3]; uint32_t dref = 0; uint32_t comp = 0; bool gather = false; bool proj = false; bool fetch = false; bool nonuniform_expression = false; const uint32_t *opt = nullptr; auto &result_type = get(result_type_id); inherited_expressions.push_back(coord); if (has_decoration(img, DecorationNonUniform) && !maybe_get_backing_variable(img)) nonuniform_expression = true; switch (op) { case OpImageSampleDrefImplicitLod: case OpImageSampleDrefExplicitLod: case OpImageSparseSampleDrefImplicitLod: case OpImageSparseSampleDrefExplicitLod: dref = ops[4]; opt = &ops[5]; length -= 5; break; case OpImageSampleProjDrefImplicitLod: case OpImageSampleProjDrefExplicitLod: case OpImageSparseSampleProjDrefImplicitLod: case OpImageSparseSampleProjDrefExplicitLod: dref = ops[4]; opt = &ops[5]; length -= 5; proj = true; break; case OpImageDrefGather: case OpImageSparseDrefGather: dref = ops[4]; opt = &ops[5]; length -= 5; gather = true; if (options.es && options.version < 310) SPIRV_CROSS_THROW("textureGather requires ESSL 310."); else if (!options.es && options.version < 400) SPIRV_CROSS_THROW("textureGather with depth compare requires GLSL 400."); break; case OpImageGather: case OpImageSparseGather: comp = ops[4]; opt = &ops[5]; length -= 5; gather = true; if (options.es && options.version < 310) SPIRV_CROSS_THROW("textureGather requires ESSL 310."); else if (!options.es && options.version < 400) { if (!expression_is_constant_null(comp)) SPIRV_CROSS_THROW("textureGather with component requires GLSL 400."); require_extension_internal("GL_ARB_texture_gather"); } break; case OpImageFetch: case OpImageSparseFetch: case OpImageRead: // Reads == fetches in Metal (other langs will not get here) opt = &ops[4]; length -= 4; fetch = true; break; case OpImageSampleProjImplicitLod: case OpImageSampleProjExplicitLod: case OpImageSparseSampleProjImplicitLod: case OpImageSparseSampleProjExplicitLod: opt = &ops[4]; length -= 4; proj = true; break; default: opt = &ops[4]; length -= 4; break; } // Bypass pointers because we need the real image struct auto &type = expression_type(img); auto &imgtype = get(type.self); uint32_t coord_components = 0; switch (imgtype.image.dim) { case spv::Dim1D: coord_components = 1; break; case spv::Dim2D: coord_components = 2; break; case spv::Dim3D: coord_components = 3; break; case spv::DimCube: coord_components = 3; break; case spv::DimBuffer: coord_components = 1; break; default: coord_components = 2; break; } if (dref) inherited_expressions.push_back(dref); if (proj) coord_components++; if (imgtype.image.arrayed) coord_components++; uint32_t bias = 0; uint32_t lod = 0; uint32_t grad_x = 0; uint32_t grad_y = 0; uint32_t coffset = 0; uint32_t offset = 0; uint32_t coffsets = 0; uint32_t sample = 0; uint32_t minlod = 0; uint32_t flags = 0; if (length) { flags = *opt++; length--; } auto test = [&](uint32_t &v, uint32_t flag) { if (length && (flags & flag)) { v = *opt++; inherited_expressions.push_back(v); length--; } }; test(bias, ImageOperandsBiasMask); test(lod, ImageOperandsLodMask); test(grad_x, ImageOperandsGradMask); test(grad_y, ImageOperandsGradMask); test(coffset, ImageOperandsConstOffsetMask); test(offset, ImageOperandsOffsetMask); test(coffsets, ImageOperandsConstOffsetsMask); test(sample, ImageOperandsSampleMask); test(minlod, ImageOperandsMinLodMask); TextureFunctionBaseArguments base_args = {}; base_args.img = img; base_args.imgtype = &imgtype; base_args.is_fetch = fetch != 0; base_args.is_gather = gather != 0; base_args.is_proj = proj != 0; string expr; TextureFunctionNameArguments name_args = {}; name_args.base = base_args; name_args.has_array_offsets = coffsets != 0; name_args.has_offset = coffset != 0 || offset != 0; name_args.has_grad = grad_x != 0 || grad_y != 0; name_args.has_dref = dref != 0; name_args.is_sparse_feedback = sparse; name_args.has_min_lod = minlod != 0; name_args.lod = lod; expr += to_function_name(name_args); expr += "("; uint32_t sparse_texel_id = 0; if (sparse) sparse_texel_id = get_sparse_feedback_texel_id(ops[1]); TextureFunctionArguments args = {}; args.base = base_args; args.coord = coord; args.coord_components = coord_components; args.dref = dref; args.grad_x = grad_x; args.grad_y = grad_y; args.lod = lod; if (coffsets) args.offset = coffsets; else if (coffset) args.offset = coffset; else args.offset = offset; args.bias = bias; args.component = comp; args.sample = sample; args.sparse_texel = sparse_texel_id; args.min_lod = minlod; args.nonuniform_expression = nonuniform_expression; expr += to_function_args(args, forward); expr += ")"; // texture(samplerXShadow) returns float. shadowX() returns vec4, but only in desktop GLSL. Swizzle here. if (is_legacy() && !options.es && is_depth_image(imgtype, img)) expr += ".r"; // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here. // Remap back to 4 components as sampling opcodes expect. if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op)) { bool image_is_depth = false; const auto *combined = maybe_get(img); VariableID image_id = combined ? combined->image : img; if (combined && is_depth_image(imgtype, combined->image)) image_is_depth = true; else if (is_depth_image(imgtype, img)) image_is_depth = true; // We must also check the backing variable for the image. // We might have loaded an OpImage, and used that handle for two different purposes. // Once with comparison, once without. auto *image_variable = maybe_get_backing_variable(image_id); if (image_variable && is_depth_image(get(image_variable->basetype), image_variable->self)) image_is_depth = true; if (image_is_depth) expr = remap_swizzle(result_type, 1, expr); } if (!sparse && !backend.support_small_type_sampling_result && result_type.width < 32) { // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically. // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision. expr = join(type_to_glsl_constructor(result_type), "(", expr, ")"); } // Deals with reads from MSL. We might need to downconvert to fewer components. if (op == OpImageRead) expr = remap_swizzle(result_type, 4, expr); return expr; } bool CompilerGLSL::expression_is_constant_null(uint32_t id) const { auto *c = maybe_get(id); if (!c) return false; return c->constant_is_null(); } bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr) { auto &type = expression_type(ptr); if (!type_is_top_level_array(get_pointee_type(type))) return false; if (!backend.array_is_value_type) return true; auto *var = maybe_get_backing_variable(ptr); if (!var) return false; auto &backed_type = get(var->basetype); return !backend.array_is_value_type_in_buffer_blocks && backed_type.basetype == SPIRType::Struct && has_member_decoration(backed_type.self, 0, DecorationOffset); } // Returns the function name for a texture sampling function for the specified image and sampling characteristics. // For some subclasses, the function is a method on the specified image. string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args) { if (args.has_min_lod) { if (options.es) SPIRV_CROSS_THROW("Sparse residency is not supported in ESSL."); require_extension_internal("GL_ARB_sparse_texture_clamp"); } string fname; auto &imgtype = *args.base.imgtype; VariableID tex = args.base.img; // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason. // To emulate this, we will have to use textureGrad with a constant gradient of 0. // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code. // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube. bool workaround_lod_array_shadow_as_grad = false; if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) && is_depth_image(imgtype, tex) && args.lod && !args.base.is_fetch) { if (!expression_is_constant_null(args.lod)) { SPIRV_CROSS_THROW("textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be " "expressed in GLSL."); } workaround_lod_array_shadow_as_grad = true; } if (args.is_sparse_feedback) fname += "sparse"; if (args.base.is_fetch) fname += args.is_sparse_feedback ? "TexelFetch" : "texelFetch"; else { fname += args.is_sparse_feedback ? "Texture" : "texture"; if (args.base.is_gather) fname += "Gather"; if (args.has_array_offsets) fname += "Offsets"; if (args.base.is_proj) fname += "Proj"; if (args.has_grad || workaround_lod_array_shadow_as_grad) fname += "Grad"; if (args.lod != 0 && !workaround_lod_array_shadow_as_grad) fname += "Lod"; } if (args.has_offset) fname += "Offset"; if (args.has_min_lod) fname += "Clamp"; if (args.is_sparse_feedback || args.has_min_lod) fname += "ARB"; return (is_legacy() && !args.base.is_gather) ? legacy_tex_op(fname, imgtype, tex) : fname; } std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id) { auto *var = maybe_get_backing_variable(id); // If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL. // In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions. if (var) { auto &type = get(var->basetype); if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer) { if (options.vulkan_semantics) { if (dummy_sampler_id) { // Don't need to consider Shadow state since the dummy sampler is always non-shadow. auto sampled_type = type; sampled_type.basetype = SPIRType::SampledImage; return join(type_to_glsl(sampled_type), "(", to_non_uniform_aware_expression(id), ", ", to_expression(dummy_sampler_id), ")"); } else { // Newer glslang supports this extension to deal with texture2D as argument to texture functions. require_extension_internal("GL_EXT_samplerless_texture_functions"); } } else { if (!dummy_sampler_id) SPIRV_CROSS_THROW("Cannot find dummy sampler ID. Was " "build_dummy_sampler_for_combined_images() called?"); return to_combined_image_sampler(id, dummy_sampler_id); } } } return to_non_uniform_aware_expression(id); } // Returns the function args for a texture sampling function for the specified image and sampling characteristics. string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward) { VariableID img = args.base.img; auto &imgtype = *args.base.imgtype; string farg_str; if (args.base.is_fetch) farg_str = convert_separate_image_to_expression(img); else farg_str = to_non_uniform_aware_expression(img); if (args.nonuniform_expression && farg_str.find_first_of('[') != string::npos) { // Only emit nonuniformEXT() wrapper if the underlying expression is arrayed in some way. farg_str = join(backend.nonuniform_qualifier, "(", farg_str, ")"); } bool swizz_func = backend.swizzle_is_function; auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * { if (comps == in_comps) return ""; switch (comps) { case 1: return ".x"; case 2: return swizz_func ? ".xy()" : ".xy"; case 3: return swizz_func ? ".xyz()" : ".xyz"; default: return ""; } }; bool forward = should_forward(args.coord); // The IR can give us more components than we need, so chop them off as needed. auto swizzle_expr = swizzle(args.coord_components, expression_type(args.coord).vecsize); // Only enclose the UV expression if needed. auto coord_expr = (*swizzle_expr == '\0') ? to_expression(args.coord) : (to_enclosed_expression(args.coord) + swizzle_expr); // texelFetch only takes int, not uint. auto &coord_type = expression_type(args.coord); if (coord_type.basetype == SPIRType::UInt) { auto expected_type = coord_type; expected_type.vecsize = args.coord_components; expected_type.basetype = SPIRType::Int; coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr); } // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason. // To emulate this, we will have to use textureGrad with a constant gradient of 0. // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code. // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube. bool workaround_lod_array_shadow_as_grad = ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) && is_depth_image(imgtype, img) && args.lod != 0 && !args.base.is_fetch; if (args.dref) { forward = forward && should_forward(args.dref); // SPIR-V splits dref and coordinate. if (args.base.is_gather || args.coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather. { farg_str += ", "; farg_str += to_expression(args.coord); farg_str += ", "; farg_str += to_expression(args.dref); } else if (args.base.is_proj) { // Have to reshuffle so we get vec4(coord, dref, proj), special case. // Other shading languages splits up the arguments for coord and compare value like SPIR-V. // The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow. farg_str += ", vec4("; if (imgtype.image.dim == Dim1D) { // Could reuse coord_expr, but we will mess up the temporary usage checking. farg_str += to_enclosed_expression(args.coord) + ".x"; farg_str += ", "; farg_str += "0.0, "; farg_str += to_expression(args.dref); farg_str += ", "; farg_str += to_enclosed_expression(args.coord) + ".y)"; } else if (imgtype.image.dim == Dim2D) { // Could reuse coord_expr, but we will mess up the temporary usage checking. farg_str += to_enclosed_expression(args.coord) + (swizz_func ? ".xy()" : ".xy"); farg_str += ", "; farg_str += to_expression(args.dref); farg_str += ", "; farg_str += to_enclosed_expression(args.coord) + ".z)"; } else SPIRV_CROSS_THROW("Invalid type for textureProj with shadow."); } else { // Create a composite which merges coord/dref into a single vector. auto type = expression_type(args.coord); type.vecsize = args.coord_components + 1; if (imgtype.image.dim == Dim1D && options.es) type.vecsize++; farg_str += ", "; farg_str += type_to_glsl_constructor(type); farg_str += "("; if (imgtype.image.dim == Dim1D && options.es) { if (imgtype.image.arrayed) { farg_str += enclose_expression(coord_expr) + ".x"; farg_str += ", 0.0, "; farg_str += enclose_expression(coord_expr) + ".y"; } else { farg_str += coord_expr; farg_str += ", 0.0"; } } else farg_str += coord_expr; farg_str += ", "; farg_str += to_expression(args.dref); farg_str += ")"; } } else { if (imgtype.image.dim == Dim1D && options.es) { // Have to fake a second coordinate. if (type_is_floating_point(coord_type)) { // Cannot mix proj and array. if (imgtype.image.arrayed || args.base.is_proj) { coord_expr = join("vec3(", enclose_expression(coord_expr), ".x, 0.0, ", enclose_expression(coord_expr), ".y)"); } else coord_expr = join("vec2(", coord_expr, ", 0.0)"); } else { if (imgtype.image.arrayed) { coord_expr = join("ivec3(", enclose_expression(coord_expr), ".x, 0, ", enclose_expression(coord_expr), ".y)"); } else coord_expr = join("ivec2(", coord_expr, ", 0)"); } } farg_str += ", "; farg_str += coord_expr; } if (args.grad_x || args.grad_y) { forward = forward && should_forward(args.grad_x); forward = forward && should_forward(args.grad_y); farg_str += ", "; farg_str += to_expression(args.grad_x); farg_str += ", "; farg_str += to_expression(args.grad_y); } if (args.lod) { if (workaround_lod_array_shadow_as_grad) { // Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0. // Implementing this as plain texture() is not safe on some implementations. if (imgtype.image.dim == Dim2D) farg_str += ", vec2(0.0), vec2(0.0)"; else if (imgtype.image.dim == DimCube) farg_str += ", vec3(0.0), vec3(0.0)"; } else { forward = forward && should_forward(args.lod); farg_str += ", "; // Lod expression for TexelFetch in GLSL must be int, and only int. if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms) farg_str += bitcast_expression(SPIRType::Int, args.lod); else farg_str += to_expression(args.lod); } } else if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms) { // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default. farg_str += ", 0"; } if (args.offset) { forward = forward && should_forward(args.offset); farg_str += ", "; farg_str += bitcast_expression(SPIRType::Int, args.offset); } if (args.sample) { farg_str += ", "; farg_str += bitcast_expression(SPIRType::Int, args.sample); } if (args.min_lod) { farg_str += ", "; farg_str += to_expression(args.min_lod); } if (args.sparse_texel) { // Sparse texel output parameter comes after everything else, except it's before the optional, component/bias arguments. farg_str += ", "; farg_str += to_expression(args.sparse_texel); } if (args.bias) { forward = forward && should_forward(args.bias); farg_str += ", "; farg_str += to_expression(args.bias); } if (args.component && !expression_is_constant_null(args.component)) { forward = forward && should_forward(args.component); farg_str += ", "; farg_str += bitcast_expression(SPIRType::Int, args.component); } *p_forward = forward; return farg_str; } Op CompilerGLSL::get_remapped_spirv_op(Op op) const { if (options.relax_nan_checks) { switch (op) { case OpFUnordLessThan: op = OpFOrdLessThan; break; case OpFUnordLessThanEqual: op = OpFOrdLessThanEqual; break; case OpFUnordGreaterThan: op = OpFOrdGreaterThan; break; case OpFUnordGreaterThanEqual: op = OpFOrdGreaterThanEqual; break; case OpFUnordEqual: op = OpFOrdEqual; break; case OpFOrdNotEqual: op = OpFUnordNotEqual; break; default: break; } } return op; } GLSLstd450 CompilerGLSL::get_remapped_glsl_op(GLSLstd450 std450_op) const { // Relax to non-NaN aware opcodes. if (options.relax_nan_checks) { switch (std450_op) { case GLSLstd450NClamp: std450_op = GLSLstd450FClamp; break; case GLSLstd450NMin: std450_op = GLSLstd450FMin; break; case GLSLstd450NMax: std450_op = GLSLstd450FMax; break; default: break; } } return std450_op; } void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length) { auto op = static_cast(eop); if (is_legacy() && is_unsigned_glsl_opcode(op)) SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets."); // If we need to do implicit bitcasts, make sure we do it with the correct type. uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, length); auto int_type = to_signed_basetype(integer_width); auto uint_type = to_unsigned_basetype(integer_width); op = get_remapped_glsl_op(op); switch (op) { // FP fiddling case GLSLstd450Round: if (!is_legacy()) emit_unary_func_op(result_type, id, args[0], "round"); else { auto op0 = to_enclosed_expression(args[0]); auto &op0_type = expression_type(args[0]); auto expr = join("floor(", op0, " + ", type_to_glsl_constructor(op0_type), "(0.5))"); bool forward = should_forward(args[0]); emit_op(result_type, id, expr, forward); inherit_expression_dependencies(id, args[0]); } break; case GLSLstd450RoundEven: if (!is_legacy()) emit_unary_func_op(result_type, id, args[0], "roundEven"); else if (!options.es) { // This extension provides round() with round-to-even semantics. require_extension_internal("GL_EXT_gpu_shader4"); emit_unary_func_op(result_type, id, args[0], "round"); } else SPIRV_CROSS_THROW("roundEven supported only in ESSL 300."); break; case GLSLstd450Trunc: if (!is_legacy()) emit_unary_func_op(result_type, id, args[0], "trunc"); else { // Implement by value-casting to int and back. bool forward = should_forward(args[0]); auto op0 = to_unpacked_expression(args[0]); auto &op0_type = expression_type(args[0]); auto via_type = op0_type; via_type.basetype = SPIRType::Int; auto expr = join(type_to_glsl(op0_type), "(", type_to_glsl(via_type), "(", op0, "))"); emit_op(result_type, id, expr, forward); inherit_expression_dependencies(id, args[0]); } break; case GLSLstd450SAbs: emit_unary_func_op_cast(result_type, id, args[0], "abs", int_type, int_type); break; case GLSLstd450FAbs: emit_unary_func_op(result_type, id, args[0], "abs"); break; case GLSLstd450SSign: emit_unary_func_op_cast(result_type, id, args[0], "sign", int_type, int_type); break; case GLSLstd450FSign: emit_unary_func_op(result_type, id, args[0], "sign"); break; case GLSLstd450Floor: emit_unary_func_op(result_type, id, args[0], "floor"); break; case GLSLstd450Ceil: emit_unary_func_op(result_type, id, args[0], "ceil"); break; case GLSLstd450Fract: emit_unary_func_op(result_type, id, args[0], "fract"); break; case GLSLstd450Radians: emit_unary_func_op(result_type, id, args[0], "radians"); break; case GLSLstd450Degrees: emit_unary_func_op(result_type, id, args[0], "degrees"); break; case GLSLstd450Fma: if ((!options.es && options.version < 400) || (options.es && options.version < 320)) { auto expr = join(to_enclosed_expression(args[0]), " * ", to_enclosed_expression(args[1]), " + ", to_enclosed_expression(args[2])); emit_op(result_type, id, expr, should_forward(args[0]) && should_forward(args[1]) && should_forward(args[2])); for (uint32_t i = 0; i < 3; i++) inherit_expression_dependencies(id, args[i]); } else emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma"); break; case GLSLstd450Modf: register_call_out_argument(args[1]); if (!is_legacy()) { forced_temporaries.insert(id); emit_binary_func_op(result_type, id, args[0], args[1], "modf"); } else { //NB. legacy GLSL doesn't have trunc() either, so we do a value cast auto &op1_type = expression_type(args[1]); auto via_type = op1_type; via_type.basetype = SPIRType::Int; statement(to_expression(args[1]), " = ", type_to_glsl(op1_type), "(", type_to_glsl(via_type), "(", to_expression(args[0]), "));"); emit_binary_op(result_type, id, args[0], args[1], "-"); } break; case GLSLstd450ModfStruct: { auto &type = get(result_type); emit_uninitialized_temporary_expression(result_type, id); if (!is_legacy()) { statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ", to_expression(id), ".", to_member_name(type, 1), ");"); } else { //NB. legacy GLSL doesn't have trunc() either, so we do a value cast auto &op0_type = expression_type(args[0]); auto via_type = op0_type; via_type.basetype = SPIRType::Int; statement(to_expression(id), ".", to_member_name(type, 1), " = ", type_to_glsl(op0_type), "(", type_to_glsl(via_type), "(", to_expression(args[0]), "));"); statement(to_expression(id), ".", to_member_name(type, 0), " = ", to_enclosed_expression(args[0]), " - ", to_expression(id), ".", to_member_name(type, 1), ";"); } break; } // Minmax case GLSLstd450UMin: emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", uint_type, false); break; case GLSLstd450SMin: emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", int_type, false); break; case GLSLstd450FMin: emit_binary_func_op(result_type, id, args[0], args[1], "min"); break; case GLSLstd450FMax: emit_binary_func_op(result_type, id, args[0], args[1], "max"); break; case GLSLstd450UMax: emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", uint_type, false); break; case GLSLstd450SMax: emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", int_type, false); break; case GLSLstd450FClamp: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp"); break; case GLSLstd450UClamp: emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", uint_type); break; case GLSLstd450SClamp: emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", int_type); break; // Trig case GLSLstd450Sin: emit_unary_func_op(result_type, id, args[0], "sin"); break; case GLSLstd450Cos: emit_unary_func_op(result_type, id, args[0], "cos"); break; case GLSLstd450Tan: emit_unary_func_op(result_type, id, args[0], "tan"); break; case GLSLstd450Asin: emit_unary_func_op(result_type, id, args[0], "asin"); break; case GLSLstd450Acos: emit_unary_func_op(result_type, id, args[0], "acos"); break; case GLSLstd450Atan: emit_unary_func_op(result_type, id, args[0], "atan"); break; case GLSLstd450Sinh: if (!is_legacy()) emit_unary_func_op(result_type, id, args[0], "sinh"); else { bool forward = should_forward(args[0]); auto expr = join("(exp(", to_expression(args[0]), ") - exp(-", to_enclosed_expression(args[0]), ")) * 0.5"); emit_op(result_type, id, expr, forward); inherit_expression_dependencies(id, args[0]); } break; case GLSLstd450Cosh: if (!is_legacy()) emit_unary_func_op(result_type, id, args[0], "cosh"); else { bool forward = should_forward(args[0]); auto expr = join("(exp(", to_expression(args[0]), ") + exp(-", to_enclosed_expression(args[0]), ")) * 0.5"); emit_op(result_type, id, expr, forward); inherit_expression_dependencies(id, args[0]); } break; case GLSLstd450Tanh: if (!is_legacy()) emit_unary_func_op(result_type, id, args[0], "tanh"); else { // Create temporaries to store the result of exp(arg) and exp(-arg). uint32_t &ids = extra_sub_expressions[id]; if (!ids) { ids = ir.increase_bound_by(2); // Inherit precision qualifier (legacy has no NoContraction). if (has_decoration(id, DecorationRelaxedPrecision)) { set_decoration(ids, DecorationRelaxedPrecision); set_decoration(ids + 1, DecorationRelaxedPrecision); } } uint32_t epos_id = ids; uint32_t eneg_id = ids + 1; emit_op(result_type, epos_id, join("exp(", to_expression(args[0]), ")"), false); emit_op(result_type, eneg_id, join("exp(-", to_enclosed_expression(args[0]), ")"), false); inherit_expression_dependencies(epos_id, args[0]); inherit_expression_dependencies(eneg_id, args[0]); auto expr = join("(", to_enclosed_expression(epos_id), " - ", to_enclosed_expression(eneg_id), ") / " "(", to_enclosed_expression(epos_id), " + ", to_enclosed_expression(eneg_id), ")"); emit_op(result_type, id, expr, true); inherit_expression_dependencies(id, epos_id); inherit_expression_dependencies(id, eneg_id); } break; case GLSLstd450Asinh: if (!is_legacy()) emit_unary_func_op(result_type, id, args[0], "asinh"); else emit_emulated_ahyper_op(result_type, id, args[0], GLSLstd450Asinh); break; case GLSLstd450Acosh: if (!is_legacy()) emit_unary_func_op(result_type, id, args[0], "acosh"); else emit_emulated_ahyper_op(result_type, id, args[0], GLSLstd450Acosh); break; case GLSLstd450Atanh: if (!is_legacy()) emit_unary_func_op(result_type, id, args[0], "atanh"); else emit_emulated_ahyper_op(result_type, id, args[0], GLSLstd450Atanh); break; case GLSLstd450Atan2: emit_binary_func_op(result_type, id, args[0], args[1], "atan"); break; // Exponentials case GLSLstd450Pow: emit_binary_func_op(result_type, id, args[0], args[1], "pow"); break; case GLSLstd450Exp: emit_unary_func_op(result_type, id, args[0], "exp"); break; case GLSLstd450Log: emit_unary_func_op(result_type, id, args[0], "log"); break; case GLSLstd450Exp2: emit_unary_func_op(result_type, id, args[0], "exp2"); break; case GLSLstd450Log2: emit_unary_func_op(result_type, id, args[0], "log2"); break; case GLSLstd450Sqrt: emit_unary_func_op(result_type, id, args[0], "sqrt"); break; case GLSLstd450InverseSqrt: emit_unary_func_op(result_type, id, args[0], "inversesqrt"); break; // Matrix math case GLSLstd450Determinant: { // No need to transpose - it doesn't affect the determinant auto *e = maybe_get(args[0]); bool old_transpose = e && e->need_transpose; if (old_transpose) e->need_transpose = false; if (options.version < 150) // also matches ES 100 { auto &type = expression_type(args[0]); assert(type.vecsize >= 2 && type.vecsize <= 4); assert(type.vecsize == type.columns); // ARB_gpu_shader_fp64 needs GLSL 150, other types are not valid if (type.basetype != SPIRType::Float) SPIRV_CROSS_THROW("Unsupported type for matrix determinant"); bool relaxed = has_decoration(id, DecorationRelaxedPrecision); require_polyfill(static_cast(PolyfillDeterminant2x2 << (type.vecsize - 2)), relaxed); emit_unary_func_op(result_type, id, args[0], (options.es && relaxed) ? "spvDeterminantMP" : "spvDeterminant"); } else emit_unary_func_op(result_type, id, args[0], "determinant"); if (old_transpose) e->need_transpose = true; break; } case GLSLstd450MatrixInverse: { // The inverse of the transpose is the same as the transpose of // the inverse, so we can just flip need_transpose of the result. auto *a = maybe_get(args[0]); bool old_transpose = a && a->need_transpose; if (old_transpose) a->need_transpose = false; const char *func = "inverse"; if (options.version < 140) // also matches ES 100 { auto &type = get(result_type); assert(type.vecsize >= 2 && type.vecsize <= 4); assert(type.vecsize == type.columns); // ARB_gpu_shader_fp64 needs GLSL 150, other types are invalid if (type.basetype != SPIRType::Float) SPIRV_CROSS_THROW("Unsupported type for matrix inverse"); bool relaxed = has_decoration(id, DecorationRelaxedPrecision); require_polyfill(static_cast(PolyfillMatrixInverse2x2 << (type.vecsize - 2)), relaxed); func = (options.es && relaxed) ? "spvInverseMP" : "spvInverse"; } bool forward = should_forward(args[0]); auto &e = emit_op(result_type, id, join(func, "(", to_unpacked_expression(args[0]), ")"), forward); inherit_expression_dependencies(id, args[0]); if (old_transpose) { e.need_transpose = true; a->need_transpose = true; } break; } // Lerping case GLSLstd450FMix: case GLSLstd450IMix: { emit_mix_op(result_type, id, args[0], args[1], args[2]); break; } case GLSLstd450Step: emit_binary_func_op(result_type, id, args[0], args[1], "step"); break; case GLSLstd450SmoothStep: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep"); break; // Packing case GLSLstd450Frexp: register_call_out_argument(args[1]); forced_temporaries.insert(id); emit_binary_func_op(result_type, id, args[0], args[1], "frexp"); break; case GLSLstd450FrexpStruct: { auto &type = get(result_type); emit_uninitialized_temporary_expression(result_type, id); statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ", to_expression(id), ".", to_member_name(type, 1), ");"); break; } case GLSLstd450Ldexp: { bool forward = should_forward(args[0]) && should_forward(args[1]); auto op0 = to_unpacked_expression(args[0]); auto op1 = to_unpacked_expression(args[1]); auto &op1_type = expression_type(args[1]); if (op1_type.basetype != SPIRType::Int) { // Need a value cast here. auto target_type = op1_type; target_type.basetype = SPIRType::Int; op1 = join(type_to_glsl_constructor(target_type), "(", op1, ")"); } auto expr = join("ldexp(", op0, ", ", op1, ")"); emit_op(result_type, id, expr, forward); inherit_expression_dependencies(id, args[0]); inherit_expression_dependencies(id, args[1]); break; } case GLSLstd450PackSnorm4x8: emit_unary_func_op(result_type, id, args[0], "packSnorm4x8"); break; case GLSLstd450PackUnorm4x8: emit_unary_func_op(result_type, id, args[0], "packUnorm4x8"); break; case GLSLstd450PackSnorm2x16: emit_unary_func_op(result_type, id, args[0], "packSnorm2x16"); break; case GLSLstd450PackUnorm2x16: emit_unary_func_op(result_type, id, args[0], "packUnorm2x16"); break; case GLSLstd450PackHalf2x16: emit_unary_func_op(result_type, id, args[0], "packHalf2x16"); break; case GLSLstd450UnpackSnorm4x8: emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8"); break; case GLSLstd450UnpackUnorm4x8: emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8"); break; case GLSLstd450UnpackSnorm2x16: emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16"); break; case GLSLstd450UnpackUnorm2x16: emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16"); break; case GLSLstd450UnpackHalf2x16: emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16"); break; case GLSLstd450PackDouble2x32: emit_unary_func_op(result_type, id, args[0], "packDouble2x32"); break; case GLSLstd450UnpackDouble2x32: emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32"); break; // Vector math case GLSLstd450Length: emit_unary_func_op(result_type, id, args[0], "length"); break; case GLSLstd450Distance: emit_binary_func_op(result_type, id, args[0], args[1], "distance"); break; case GLSLstd450Cross: emit_binary_func_op(result_type, id, args[0], args[1], "cross"); break; case GLSLstd450Normalize: emit_unary_func_op(result_type, id, args[0], "normalize"); break; case GLSLstd450FaceForward: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward"); break; case GLSLstd450Reflect: emit_binary_func_op(result_type, id, args[0], args[1], "reflect"); break; case GLSLstd450Refract: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract"); break; // Bit-fiddling case GLSLstd450FindILsb: // findLSB always returns int. emit_unary_func_op_cast(result_type, id, args[0], "findLSB", expression_type(args[0]).basetype, int_type); break; case GLSLstd450FindSMsb: emit_unary_func_op_cast(result_type, id, args[0], "findMSB", int_type, int_type); break; case GLSLstd450FindUMsb: emit_unary_func_op_cast(result_type, id, args[0], "findMSB", uint_type, int_type); // findMSB always returns int. break; // Multisampled varying case GLSLstd450InterpolateAtCentroid: emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid"); break; case GLSLstd450InterpolateAtSample: emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample"); break; case GLSLstd450InterpolateAtOffset: emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset"); break; case GLSLstd450NMin: case GLSLstd450NMax: { emit_nminmax_op(result_type, id, args[0], args[1], op); break; } case GLSLstd450NClamp: { // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op. // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags. uint32_t &max_id = extra_sub_expressions[id | EXTRA_SUB_EXPRESSION_TYPE_AUX]; if (!max_id) max_id = ir.increase_bound_by(1); // Inherit precision qualifiers. ir.meta[max_id] = ir.meta[id]; emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax); emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin); break; } default: statement("// unimplemented GLSL op ", eop); break; } } void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op) { // Need to emulate this call. uint32_t &ids = extra_sub_expressions[id]; if (!ids) { ids = ir.increase_bound_by(5); auto btype = get(result_type); btype.basetype = SPIRType::Boolean; set(ids, btype); } uint32_t btype_id = ids + 0; uint32_t left_nan_id = ids + 1; uint32_t right_nan_id = ids + 2; uint32_t tmp_id = ids + 3; uint32_t mixed_first_id = ids + 4; // Inherit precision qualifiers. ir.meta[tmp_id] = ir.meta[id]; ir.meta[mixed_first_id] = ir.meta[id]; if (!is_legacy()) { emit_unary_func_op(btype_id, left_nan_id, op0, "isnan"); emit_unary_func_op(btype_id, right_nan_id, op1, "isnan"); } else if (expression_type(op0).vecsize > 1) { // If the number doesn't equal itself, it must be NaN emit_binary_func_op(btype_id, left_nan_id, op0, op0, "notEqual"); emit_binary_func_op(btype_id, right_nan_id, op1, op1, "notEqual"); } else { emit_binary_op(btype_id, left_nan_id, op0, op0, "!="); emit_binary_op(btype_id, right_nan_id, op1, op1, "!="); } emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max"); emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id); emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id); } void CompilerGLSL::emit_emulated_ahyper_op(uint32_t result_type, uint32_t id, uint32_t op0, GLSLstd450 op) { const char *one = backend.float_literal_suffix ? "1.0f" : "1.0"; std::string expr; bool forward = should_forward(op0); switch (op) { case GLSLstd450Asinh: expr = join("log(", to_enclosed_expression(op0), " + sqrt(", to_enclosed_expression(op0), " * ", to_enclosed_expression(op0), " + ", one, "))"); emit_op(result_type, id, expr, forward); break; case GLSLstd450Acosh: expr = join("log(", to_enclosed_expression(op0), " + sqrt(", to_enclosed_expression(op0), " * ", to_enclosed_expression(op0), " - ", one, "))"); break; case GLSLstd450Atanh: expr = join("log((", one, " + ", to_enclosed_expression(op0), ") / " "(", one, " - ", to_enclosed_expression(op0), ")) * 0.5", backend.float_literal_suffix ? "f" : ""); break; default: SPIRV_CROSS_THROW("Invalid op."); } emit_op(result_type, id, expr, forward); inherit_expression_dependencies(id, op0); } void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t) { require_extension_internal("GL_AMD_shader_ballot"); enum AMDShaderBallot { SwizzleInvocationsAMD = 1, SwizzleInvocationsMaskedAMD = 2, WriteInvocationAMD = 3, MbcntAMD = 4 }; auto op = static_cast(eop); switch (op) { case SwizzleInvocationsAMD: emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD"); register_control_dependent_expression(id); break; case SwizzleInvocationsMaskedAMD: emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD"); register_control_dependent_expression(id); break; case WriteInvocationAMD: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD"); register_control_dependent_expression(id); break; case MbcntAMD: emit_unary_func_op(result_type, id, args[0], "mbcntAMD"); register_control_dependent_expression(id); break; default: statement("// unimplemented SPV AMD shader ballot op ", eop); break; } } void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t) { require_extension_internal("GL_AMD_shader_explicit_vertex_parameter"); enum AMDShaderExplicitVertexParameter { InterpolateAtVertexAMD = 1 }; auto op = static_cast(eop); switch (op) { case InterpolateAtVertexAMD: emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD"); break; default: statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop); break; } } void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t) { require_extension_internal("GL_AMD_shader_trinary_minmax"); enum AMDShaderTrinaryMinMax { FMin3AMD = 1, UMin3AMD = 2, SMin3AMD = 3, FMax3AMD = 4, UMax3AMD = 5, SMax3AMD = 6, FMid3AMD = 7, UMid3AMD = 8, SMid3AMD = 9 }; auto op = static_cast(eop); switch (op) { case FMin3AMD: case UMin3AMD: case SMin3AMD: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3"); break; case FMax3AMD: case UMax3AMD: case SMax3AMD: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3"); break; case FMid3AMD: case UMid3AMD: case SMid3AMD: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3"); break; default: statement("// unimplemented SPV AMD shader trinary minmax op ", eop); break; } } void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t) { require_extension_internal("GL_AMD_gcn_shader"); enum AMDGCNShader { CubeFaceIndexAMD = 1, CubeFaceCoordAMD = 2, TimeAMD = 3 }; auto op = static_cast(eop); switch (op) { case CubeFaceIndexAMD: emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD"); break; case CubeFaceCoordAMD: emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD"); break; case TimeAMD: { string expr = "timeAMD()"; emit_op(result_type, id, expr, true); register_control_dependent_expression(id); break; } default: statement("// unimplemented SPV AMD gcn shader op ", eop); break; } } void CompilerGLSL::emit_subgroup_op(const Instruction &i) { const uint32_t *ops = stream(i); auto op = static_cast(i.op); if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op, ops)) SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics."); // If we need to do implicit bitcasts, make sure we do it with the correct type. uint32_t integer_width = get_integer_width_for_instruction(i); auto int_type = to_signed_basetype(integer_width); auto uint_type = to_unsigned_basetype(integer_width); switch (op) { case OpGroupNonUniformElect: request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupElect); break; case OpGroupNonUniformBallotBitCount: { const GroupOperation operation = static_cast(ops[3]); if (operation == GroupOperationReduce) request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitCount); else if (operation == GroupOperationInclusiveScan || operation == GroupOperationExclusiveScan) request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout); } break; case OpGroupNonUniformBallotBitExtract: request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitExtract); break; case OpGroupNonUniformInverseBallot: request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout); break; case OpGroupNonUniformBallot: request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallot); break; case OpGroupNonUniformBallotFindLSB: case OpGroupNonUniformBallotFindMSB: request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotFindLSB_MSB); break; case OpGroupNonUniformBroadcast: case OpGroupNonUniformBroadcastFirst: request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBroadcast_First); break; case OpGroupNonUniformShuffle: case OpGroupNonUniformShuffleXor: require_extension_internal("GL_KHR_shader_subgroup_shuffle"); break; case OpGroupNonUniformShuffleUp: case OpGroupNonUniformShuffleDown: require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative"); break; case OpGroupNonUniformAll: case OpGroupNonUniformAny: case OpGroupNonUniformAllEqual: { const SPIRType &type = expression_type(ops[3]); if (type.basetype == SPIRType::BaseType::Boolean && type.vecsize == 1u) request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAll_Any_AllEqualBool); else request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAllEqualT); } break; // clang-format off #define GLSL_GROUP_OP(OP)\ case OpGroupNonUniform##OP:\ {\ auto operation = static_cast(ops[3]);\ if (operation == GroupOperationClusteredReduce)\ require_extension_internal("GL_KHR_shader_subgroup_clustered");\ else if (operation == GroupOperationReduce)\ request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##Reduce);\ else if (operation == GroupOperationExclusiveScan)\ request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##ExclusiveScan);\ else if (operation == GroupOperationInclusiveScan)\ request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##InclusiveScan);\ else\ SPIRV_CROSS_THROW("Invalid group operation.");\ break;\ } GLSL_GROUP_OP(IAdd) GLSL_GROUP_OP(FAdd) GLSL_GROUP_OP(IMul) GLSL_GROUP_OP(FMul) #undef GLSL_GROUP_OP // clang-format on case OpGroupNonUniformFMin: case OpGroupNonUniformFMax: case OpGroupNonUniformSMin: case OpGroupNonUniformSMax: case OpGroupNonUniformUMin: case OpGroupNonUniformUMax: case OpGroupNonUniformBitwiseAnd: case OpGroupNonUniformBitwiseOr: case OpGroupNonUniformBitwiseXor: case OpGroupNonUniformLogicalAnd: case OpGroupNonUniformLogicalOr: case OpGroupNonUniformLogicalXor: { auto operation = static_cast(ops[3]); if (operation == GroupOperationClusteredReduce) { require_extension_internal("GL_KHR_shader_subgroup_clustered"); } else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan || operation == GroupOperationReduce) { require_extension_internal("GL_KHR_shader_subgroup_arithmetic"); } else SPIRV_CROSS_THROW("Invalid group operation."); break; } case OpGroupNonUniformQuadSwap: case OpGroupNonUniformQuadBroadcast: require_extension_internal("GL_KHR_shader_subgroup_quad"); break; default: SPIRV_CROSS_THROW("Invalid opcode for subgroup."); } uint32_t result_type = ops[0]; uint32_t id = ops[1]; auto scope = static_cast(evaluate_constant_u32(ops[2])); if (scope != ScopeSubgroup) SPIRV_CROSS_THROW("Only subgroup scope is supported."); switch (op) { case OpGroupNonUniformElect: emit_op(result_type, id, "subgroupElect()", true); break; case OpGroupNonUniformBroadcast: emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast"); break; case OpGroupNonUniformBroadcastFirst: emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst"); break; case OpGroupNonUniformBallot: emit_unary_func_op(result_type, id, ops[3], "subgroupBallot"); break; case OpGroupNonUniformInverseBallot: emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot"); break; case OpGroupNonUniformBallotBitExtract: emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract"); break; case OpGroupNonUniformBallotFindLSB: emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB"); break; case OpGroupNonUniformBallotFindMSB: emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB"); break; case OpGroupNonUniformBallotBitCount: { auto operation = static_cast(ops[3]); if (operation == GroupOperationReduce) emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount"); else if (operation == GroupOperationInclusiveScan) emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount"); else if (operation == GroupOperationExclusiveScan) emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount"); else SPIRV_CROSS_THROW("Invalid BitCount operation."); break; } case OpGroupNonUniformShuffle: emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle"); break; case OpGroupNonUniformShuffleXor: emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor"); break; case OpGroupNonUniformShuffleUp: emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp"); break; case OpGroupNonUniformShuffleDown: emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown"); break; case OpGroupNonUniformAll: emit_unary_func_op(result_type, id, ops[3], "subgroupAll"); break; case OpGroupNonUniformAny: emit_unary_func_op(result_type, id, ops[3], "subgroupAny"); break; case OpGroupNonUniformAllEqual: emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual"); break; // clang-format off #define GLSL_GROUP_OP(op, glsl_op) \ case OpGroupNonUniform##op: \ { \ auto operation = static_cast(ops[3]); \ if (operation == GroupOperationReduce) \ emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \ else if (operation == GroupOperationInclusiveScan) \ emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \ else if (operation == GroupOperationExclusiveScan) \ emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \ else if (operation == GroupOperationClusteredReduce) \ emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \ else \ SPIRV_CROSS_THROW("Invalid group operation."); \ break; \ } #define GLSL_GROUP_OP_CAST(op, glsl_op, type) \ case OpGroupNonUniform##op: \ { \ auto operation = static_cast(ops[3]); \ if (operation == GroupOperationReduce) \ emit_unary_func_op_cast(result_type, id, ops[4], "subgroup" #glsl_op, type, type); \ else if (operation == GroupOperationInclusiveScan) \ emit_unary_func_op_cast(result_type, id, ops[4], "subgroupInclusive" #glsl_op, type, type); \ else if (operation == GroupOperationExclusiveScan) \ emit_unary_func_op_cast(result_type, id, ops[4], "subgroupExclusive" #glsl_op, type, type); \ else if (operation == GroupOperationClusteredReduce) \ emit_binary_func_op_cast_clustered(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op, type); \ else \ SPIRV_CROSS_THROW("Invalid group operation."); \ break; \ } GLSL_GROUP_OP(FAdd, Add) GLSL_GROUP_OP(FMul, Mul) GLSL_GROUP_OP(FMin, Min) GLSL_GROUP_OP(FMax, Max) GLSL_GROUP_OP(IAdd, Add) GLSL_GROUP_OP(IMul, Mul) GLSL_GROUP_OP_CAST(SMin, Min, int_type) GLSL_GROUP_OP_CAST(SMax, Max, int_type) GLSL_GROUP_OP_CAST(UMin, Min, uint_type) GLSL_GROUP_OP_CAST(UMax, Max, uint_type) GLSL_GROUP_OP(BitwiseAnd, And) GLSL_GROUP_OP(BitwiseOr, Or) GLSL_GROUP_OP(BitwiseXor, Xor) GLSL_GROUP_OP(LogicalAnd, And) GLSL_GROUP_OP(LogicalOr, Or) GLSL_GROUP_OP(LogicalXor, Xor) #undef GLSL_GROUP_OP #undef GLSL_GROUP_OP_CAST // clang-format on case OpGroupNonUniformQuadSwap: { uint32_t direction = evaluate_constant_u32(ops[4]); if (direction == 0) emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal"); else if (direction == 1) emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical"); else if (direction == 2) emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal"); else SPIRV_CROSS_THROW("Invalid quad swap direction."); break; } case OpGroupNonUniformQuadBroadcast: { emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast"); break; } default: SPIRV_CROSS_THROW("Invalid opcode for subgroup."); } register_control_dependent_expression(id); } string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type) { // OpBitcast can deal with pointers. if (out_type.pointer || in_type.pointer) { if (out_type.vecsize == 2 || in_type.vecsize == 2) require_extension_internal("GL_EXT_buffer_reference_uvec2"); return type_to_glsl(out_type); } if (out_type.basetype == in_type.basetype) return ""; assert(out_type.basetype != SPIRType::Boolean); assert(in_type.basetype != SPIRType::Boolean); bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type); bool same_size_cast = out_type.width == in_type.width; // Trivial bitcast case, casts between integers. if (integral_cast && same_size_cast) return type_to_glsl(out_type); // Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types). if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1) return "unpack8"; else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1) return "pack16"; else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1) return "pack32"; // Floating <-> Integer special casts. Just have to enumerate all cases. :( // 16-bit, 32-bit and 64-bit floats. if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float) { if (is_legacy_es()) SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL."); else if (!options.es && options.version < 330) require_extension_internal("GL_ARB_shader_bit_encoding"); return "floatBitsToUint"; } else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float) { if (is_legacy_es()) SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL."); else if (!options.es && options.version < 330) require_extension_internal("GL_ARB_shader_bit_encoding"); return "floatBitsToInt"; } else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt) { if (is_legacy_es()) SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL."); else if (!options.es && options.version < 330) require_extension_internal("GL_ARB_shader_bit_encoding"); return "uintBitsToFloat"; } else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int) { if (is_legacy_es()) SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL."); else if (!options.es && options.version < 330) require_extension_internal("GL_ARB_shader_bit_encoding"); return "intBitsToFloat"; } else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double) return "doubleBitsToInt64"; else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double) return "doubleBitsToUint64"; else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64) return "int64BitsToDouble"; else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64) return "uint64BitsToDouble"; else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half) return "float16BitsToInt16"; else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half) return "float16BitsToUint16"; else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short) return "int16BitsToFloat16"; else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort) return "uint16BitsToFloat16"; // And finally, some even more special purpose casts. if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2) return "packUint2x32"; else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UInt64 && out_type.vecsize == 2) return "unpackUint2x32"; else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) return "unpackFloat2x16"; else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2) return "packFloat2x16"; else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2) return "packInt2x16"; else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1) return "unpackInt2x16"; else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2) return "packUint2x16"; else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) return "unpackUint2x16"; else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4) return "packInt4x16"; else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1) return "unpackInt4x16"; else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4) return "packUint4x16"; else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1) return "unpackUint4x16"; return ""; } string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument) { auto op = bitcast_glsl_op(result_type, expression_type(argument)); if (op.empty()) return to_enclosed_unpacked_expression(argument); else return join(op, "(", to_unpacked_expression(argument), ")"); } std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg) { auto expr = to_expression(arg); auto &src_type = expression_type(arg); if (src_type.basetype != target_type) { auto target = src_type; target.basetype = target_type; expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")"); } return expr; } std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type, const std::string &expr) { if (target_type.basetype == expr_type) return expr; auto src_type = target_type; src_type.basetype = expr_type; return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")"); } string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) { switch (builtin) { case BuiltInPosition: return "gl_Position"; case BuiltInPointSize: return "gl_PointSize"; case BuiltInClipDistance: { if (options.es) require_extension_internal("GL_EXT_clip_cull_distance"); return "gl_ClipDistance"; } case BuiltInCullDistance: { if (options.es) require_extension_internal("GL_EXT_clip_cull_distance"); return "gl_CullDistance"; } case BuiltInVertexId: if (options.vulkan_semantics) SPIRV_CROSS_THROW("Cannot implement gl_VertexID in Vulkan GLSL. This shader was created " "with GL semantics."); return "gl_VertexID"; case BuiltInInstanceId: if (options.vulkan_semantics) { auto model = get_entry_point().model; switch (model) { case spv::ExecutionModelIntersectionKHR: case spv::ExecutionModelAnyHitKHR: case spv::ExecutionModelClosestHitKHR: // gl_InstanceID is allowed in these shaders. break; default: SPIRV_CROSS_THROW("Cannot implement gl_InstanceID in Vulkan GLSL. This shader was " "created with GL semantics."); } } if (!options.es && options.version < 140) { require_extension_internal("GL_ARB_draw_instanced"); } return "gl_InstanceID"; case BuiltInVertexIndex: if (options.vulkan_semantics) return "gl_VertexIndex"; else return "gl_VertexID"; // gl_VertexID already has the base offset applied. case BuiltInInstanceIndex: if (options.vulkan_semantics) return "gl_InstanceIndex"; if (!options.es && options.version < 140) { require_extension_internal("GL_ARB_draw_instanced"); } if (options.vertex.support_nonzero_base_instance) { if (!options.vulkan_semantics) { // This is a soft-enable. We will opt-in to using gl_BaseInstanceARB if supported. require_extension_internal("GL_ARB_shader_draw_parameters"); } return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID. } else return "gl_InstanceID"; case BuiltInPrimitiveId: if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry) return "gl_PrimitiveIDIn"; else return "gl_PrimitiveID"; case BuiltInInvocationId: return "gl_InvocationID"; case BuiltInLayer: return "gl_Layer"; case BuiltInViewportIndex: return "gl_ViewportIndex"; case BuiltInTessLevelOuter: return "gl_TessLevelOuter"; case BuiltInTessLevelInner: return "gl_TessLevelInner"; case BuiltInTessCoord: return "gl_TessCoord"; case BuiltInFragCoord: return "gl_FragCoord"; case BuiltInPointCoord: return "gl_PointCoord"; case BuiltInFrontFacing: return "gl_FrontFacing"; case BuiltInFragDepth: return "gl_FragDepth"; case BuiltInNumWorkgroups: return "gl_NumWorkGroups"; case BuiltInWorkgroupSize: return "gl_WorkGroupSize"; case BuiltInWorkgroupId: return "gl_WorkGroupID"; case BuiltInLocalInvocationId: return "gl_LocalInvocationID"; case BuiltInGlobalInvocationId: return "gl_GlobalInvocationID"; case BuiltInLocalInvocationIndex: return "gl_LocalInvocationIndex"; case BuiltInHelperInvocation: return "gl_HelperInvocation"; case BuiltInBaseVertex: if (options.es) SPIRV_CROSS_THROW("BaseVertex not supported in ES profile."); if (options.vulkan_semantics) { if (options.version < 460) { require_extension_internal("GL_ARB_shader_draw_parameters"); return "gl_BaseVertexARB"; } return "gl_BaseVertex"; } // On regular GL, this is soft-enabled and we emit ifdefs in code. require_extension_internal("GL_ARB_shader_draw_parameters"); return "SPIRV_Cross_BaseVertex"; case BuiltInBaseInstance: if (options.es) SPIRV_CROSS_THROW("BaseInstance not supported in ES profile."); if (options.vulkan_semantics) { if (options.version < 460) { require_extension_internal("GL_ARB_shader_draw_parameters"); return "gl_BaseInstanceARB"; } return "gl_BaseInstance"; } // On regular GL, this is soft-enabled and we emit ifdefs in code. require_extension_internal("GL_ARB_shader_draw_parameters"); return "SPIRV_Cross_BaseInstance"; case BuiltInDrawIndex: if (options.es) SPIRV_CROSS_THROW("DrawIndex not supported in ES profile."); if (options.vulkan_semantics) { if (options.version < 460) { require_extension_internal("GL_ARB_shader_draw_parameters"); return "gl_DrawIDARB"; } return "gl_DrawID"; } // On regular GL, this is soft-enabled and we emit ifdefs in code. require_extension_internal("GL_ARB_shader_draw_parameters"); return "gl_DrawIDARB"; case BuiltInSampleId: if (is_legacy()) SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL."); else if (options.es && options.version < 320) require_extension_internal("GL_OES_sample_variables"); else if (!options.es && options.version < 400) require_extension_internal("GL_ARB_sample_shading"); return "gl_SampleID"; case BuiltInSampleMask: if (is_legacy()) SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL."); else if (options.es && options.version < 320) require_extension_internal("GL_OES_sample_variables"); else if (!options.es && options.version < 400) require_extension_internal("GL_ARB_sample_shading"); if (storage == StorageClassInput) return "gl_SampleMaskIn"; else return "gl_SampleMask"; case BuiltInSamplePosition: if (is_legacy()) SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL."); else if (options.es && options.version < 320) require_extension_internal("GL_OES_sample_variables"); else if (!options.es && options.version < 400) require_extension_internal("GL_ARB_sample_shading"); return "gl_SamplePosition"; case BuiltInViewIndex: if (options.vulkan_semantics) return "gl_ViewIndex"; else return "gl_ViewID_OVR"; case BuiltInNumSubgroups: request_subgroup_feature(ShaderSubgroupSupportHelper::NumSubgroups); return "gl_NumSubgroups"; case BuiltInSubgroupId: request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupID); return "gl_SubgroupID"; case BuiltInSubgroupSize: request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupSize); return "gl_SubgroupSize"; case BuiltInSubgroupLocalInvocationId: request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInvocationID); return "gl_SubgroupInvocationID"; case BuiltInSubgroupEqMask: request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); return "gl_SubgroupEqMask"; case BuiltInSubgroupGeMask: request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); return "gl_SubgroupGeMask"; case BuiltInSubgroupGtMask: request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); return "gl_SubgroupGtMask"; case BuiltInSubgroupLeMask: request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); return "gl_SubgroupLeMask"; case BuiltInSubgroupLtMask: request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); return "gl_SubgroupLtMask"; case BuiltInLaunchIdKHR: return ray_tracing_is_khr ? "gl_LaunchIDEXT" : "gl_LaunchIDNV"; case BuiltInLaunchSizeKHR: return ray_tracing_is_khr ? "gl_LaunchSizeEXT" : "gl_LaunchSizeNV"; case BuiltInWorldRayOriginKHR: return ray_tracing_is_khr ? "gl_WorldRayOriginEXT" : "gl_WorldRayOriginNV"; case BuiltInWorldRayDirectionKHR: return ray_tracing_is_khr ? "gl_WorldRayDirectionEXT" : "gl_WorldRayDirectionNV"; case BuiltInObjectRayOriginKHR: return ray_tracing_is_khr ? "gl_ObjectRayOriginEXT" : "gl_ObjectRayOriginNV"; case BuiltInObjectRayDirectionKHR: return ray_tracing_is_khr ? "gl_ObjectRayDirectionEXT" : "gl_ObjectRayDirectionNV"; case BuiltInRayTminKHR: return ray_tracing_is_khr ? "gl_RayTminEXT" : "gl_RayTminNV"; case BuiltInRayTmaxKHR: return ray_tracing_is_khr ? "gl_RayTmaxEXT" : "gl_RayTmaxNV"; case BuiltInInstanceCustomIndexKHR: return ray_tracing_is_khr ? "gl_InstanceCustomIndexEXT" : "gl_InstanceCustomIndexNV"; case BuiltInObjectToWorldKHR: return ray_tracing_is_khr ? "gl_ObjectToWorldEXT" : "gl_ObjectToWorldNV"; case BuiltInWorldToObjectKHR: return ray_tracing_is_khr ? "gl_WorldToObjectEXT" : "gl_WorldToObjectNV"; case BuiltInHitTNV: // gl_HitTEXT is an alias of RayTMax in KHR. return "gl_HitTNV"; case BuiltInHitKindKHR: return ray_tracing_is_khr ? "gl_HitKindEXT" : "gl_HitKindNV"; case BuiltInIncomingRayFlagsKHR: return ray_tracing_is_khr ? "gl_IncomingRayFlagsEXT" : "gl_IncomingRayFlagsNV"; case BuiltInBaryCoordKHR: { if (options.es && options.version < 320) SPIRV_CROSS_THROW("gl_BaryCoordEXT requires ESSL 320."); else if (!options.es && options.version < 450) SPIRV_CROSS_THROW("gl_BaryCoordEXT requires GLSL 450."); if (barycentric_is_nv) { require_extension_internal("GL_NV_fragment_shader_barycentric"); return "gl_BaryCoordNV"; } else { require_extension_internal("GL_EXT_fragment_shader_barycentric"); return "gl_BaryCoordEXT"; } } case BuiltInBaryCoordNoPerspNV: { if (options.es && options.version < 320) SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires ESSL 320."); else if (!options.es && options.version < 450) SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires GLSL 450."); if (barycentric_is_nv) { require_extension_internal("GL_NV_fragment_shader_barycentric"); return "gl_BaryCoordNoPerspNV"; } else { require_extension_internal("GL_EXT_fragment_shader_barycentric"); return "gl_BaryCoordNoPerspEXT"; } } case BuiltInFragStencilRefEXT: { if (!options.es) { require_extension_internal("GL_ARB_shader_stencil_export"); return "gl_FragStencilRefARB"; } else SPIRV_CROSS_THROW("Stencil export not supported in GLES."); } case BuiltInPrimitiveShadingRateKHR: { if (!options.vulkan_semantics) SPIRV_CROSS_THROW("Can only use PrimitiveShadingRateKHR in Vulkan GLSL."); require_extension_internal("GL_EXT_fragment_shading_rate"); return "gl_PrimitiveShadingRateEXT"; } case BuiltInShadingRateKHR: { if (!options.vulkan_semantics) SPIRV_CROSS_THROW("Can only use ShadingRateKHR in Vulkan GLSL."); require_extension_internal("GL_EXT_fragment_shading_rate"); return "gl_ShadingRateEXT"; } case BuiltInDeviceIndex: if (!options.vulkan_semantics) SPIRV_CROSS_THROW("Need Vulkan semantics for device group support."); require_extension_internal("GL_EXT_device_group"); return "gl_DeviceIndex"; case BuiltInFullyCoveredEXT: if (!options.es) require_extension_internal("GL_NV_conservative_raster_underestimation"); else SPIRV_CROSS_THROW("Need desktop GL to use GL_NV_conservative_raster_underestimation."); return "gl_FragFullyCoveredNV"; case BuiltInPrimitiveTriangleIndicesEXT: return "gl_PrimitiveTriangleIndicesEXT"; case BuiltInPrimitiveLineIndicesEXT: return "gl_PrimitiveLineIndicesEXT"; case BuiltInPrimitivePointIndicesEXT: return "gl_PrimitivePointIndicesEXT"; case BuiltInCullPrimitiveEXT: return "gl_CullPrimitiveEXT"; default: return join("gl_BuiltIn_", convert_to_string(builtin)); } } const char *CompilerGLSL::index_to_swizzle(uint32_t index) { switch (index) { case 0: return "x"; case 1: return "y"; case 2: return "z"; case 3: return "w"; default: return "x"; // Don't crash, but engage the "undefined behavior" described for out-of-bounds logical addressing in spec. } } void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType * /*type*/, AccessChainFlags flags, bool &access_chain_is_arrayed, uint32_t index) { bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0; bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0; bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0; string idx_expr = index_is_literal ? convert_to_string(index) : to_unpacked_expression(index, register_expression_read); // For the case where the base of an OpPtrAccessChain already ends in [n], // we need to use the index as an offset to the existing index, otherwise, // we can just use the index directly. if (ptr_chain && access_chain_is_arrayed) { size_t split_pos = expr.find_last_of(']'); string expr_front = expr.substr(0, split_pos); string expr_back = expr.substr(split_pos); expr = expr_front + " + " + enclose_expression(idx_expr) + expr_back; } else { expr += "["; expr += idx_expr; expr += "]"; } } bool CompilerGLSL::access_chain_needs_stage_io_builtin_translation(uint32_t) { return true; } string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags, AccessChainMeta *meta) { string expr; bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0; bool msb_is_id = (flags & ACCESS_CHAIN_LITERAL_MSB_FORCE_ID) != 0; bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0; bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0; bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0; bool flatten_member_reference = (flags & ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT) != 0; if (!chain_only) { // We handle transpose explicitly, so don't resolve that here. auto *e = maybe_get(base); bool old_transpose = e && e->need_transpose; if (e) e->need_transpose = false; expr = to_enclosed_expression(base, register_expression_read); if (e) e->need_transpose = old_transpose; } // Start traversing type hierarchy at the proper non-pointer types, // but keep type_id referencing the original pointer for use below. uint32_t type_id = expression_type_id(base); if (!backend.native_pointers) { if (ptr_chain) SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain."); // Wrapped buffer reference pointer types will need to poke into the internal "value" member before // continuing the access chain. if (should_dereference(base)) { auto &type = get(type_id); expr = dereference_expression(type, expr); } } const auto *type = &get_pointee_type(type_id); bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos; bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base); bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked); uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID); bool is_invariant = has_decoration(base, DecorationInvariant); bool relaxed_precision = has_decoration(base, DecorationRelaxedPrecision); bool pending_array_enclose = false; bool dimension_flatten = false; bool access_meshlet_position_y = false; if (auto *base_expr = maybe_get(base)) { access_meshlet_position_y = base_expr->access_meshlet_position_y; } // If we are translating access to a structured buffer, the first subscript '._m0' must be hidden bool hide_first_subscript = count > 1 && is_user_type_structured(base); const auto append_index = [&](uint32_t index, bool is_literal, bool is_ptr_chain = false) { AccessChainFlags mod_flags = flags; if (!is_literal) mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT; if (!is_ptr_chain) mod_flags &= ~ACCESS_CHAIN_PTR_CHAIN_BIT; access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index); check_physical_type_cast(expr, type, physical_type); }; for (uint32_t i = 0; i < count; i++) { uint32_t index = indices[i]; bool is_literal = index_is_literal; if (is_literal && msb_is_id && (index >> 31u) != 0u) { is_literal = false; index &= 0x7fffffffu; } // Pointer chains if (ptr_chain && i == 0) { // If we are flattening multidimensional arrays, only create opening bracket on first // array index. if (options.flatten_multidimensional_arrays) { dimension_flatten = type->array.size() >= 1; pending_array_enclose = dimension_flatten; if (pending_array_enclose) expr += "["; } if (options.flatten_multidimensional_arrays && dimension_flatten) { // If we are flattening multidimensional arrays, do manual stride computation. if (is_literal) expr += convert_to_string(index); else expr += to_enclosed_expression(index, register_expression_read); for (auto j = uint32_t(type->array.size()); j; j--) { expr += " * "; expr += enclose_expression(to_array_size(*type, j - 1)); } if (type->array.empty()) pending_array_enclose = false; else expr += " + "; if (!pending_array_enclose) expr += "]"; } else { append_index(index, is_literal, true); } if (type->basetype == SPIRType::ControlPointArray) { type_id = type->parent_type; type = &get(type_id); } access_chain_is_arrayed = true; } // Arrays else if (!type->array.empty()) { // If we are flattening multidimensional arrays, only create opening bracket on first // array index. if (options.flatten_multidimensional_arrays && !pending_array_enclose) { dimension_flatten = type->array.size() > 1; pending_array_enclose = dimension_flatten; if (pending_array_enclose) expr += "["; } assert(type->parent_type); auto *var = maybe_get(base); if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) && !has_decoration(type->self, DecorationBlock)) { // This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared. // Normally, these variables live in blocks when compiled from GLSL, // but HLSL seems to just emit straight arrays here. // We must pretend this access goes through gl_in/gl_out arrays // to be able to access certain builtins as arrays. // Similar concerns apply for mesh shaders where we have to redirect to gl_MeshVerticesEXT or MeshPrimitivesEXT. auto builtin = ir.meta[base].decoration.builtin_type; bool mesh_shader = get_execution_model() == ExecutionModelMeshEXT; switch (builtin) { // case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom. // case BuiltInClipDistance: case BuiltInPosition: case BuiltInPointSize: if (mesh_shader) expr = join("gl_MeshVerticesEXT[", to_expression(index, register_expression_read), "].", expr); else if (var->storage == StorageClassInput) expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr); else if (var->storage == StorageClassOutput) expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr); else append_index(index, is_literal); break; case BuiltInPrimitiveId: case BuiltInLayer: case BuiltInViewportIndex: case BuiltInCullPrimitiveEXT: case BuiltInPrimitiveShadingRateKHR: if (mesh_shader) expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr); else append_index(index, is_literal); break; default: append_index(index, is_literal); break; } } else if (backend.force_merged_mesh_block && i == 0 && var && !is_builtin_variable(*var) && var->storage == StorageClassOutput) { if (is_per_primitive_variable(*var)) expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr); else expr = join("gl_MeshVerticesEXT[", to_expression(index, register_expression_read), "].", expr); } else if (options.flatten_multidimensional_arrays && dimension_flatten) { // If we are flattening multidimensional arrays, do manual stride computation. auto &parent_type = get(type->parent_type); if (is_literal) expr += convert_to_string(index); else expr += to_enclosed_expression(index, register_expression_read); for (auto j = uint32_t(parent_type.array.size()); j; j--) { expr += " * "; expr += enclose_expression(to_array_size(parent_type, j - 1)); } if (parent_type.array.empty()) pending_array_enclose = false; else expr += " + "; if (!pending_array_enclose) expr += "]"; } // Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal. // By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask. else if (!builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn)))) { append_index(index, is_literal); } if (var && has_decoration(var->self, DecorationBuiltIn) && get_decoration(var->self, DecorationBuiltIn) == BuiltInPosition && get_execution_model() == ExecutionModelMeshEXT) { access_meshlet_position_y = true; } type_id = type->parent_type; type = &get(type_id); access_chain_is_arrayed = true; } // For structs, the index refers to a constant, which indexes into the members, possibly through a redirection mapping. // We also check if this member is a builtin, since we then replace the entire expression with the builtin one. else if (type->basetype == SPIRType::Struct) { if (!is_literal) index = evaluate_constant_u32(index); if (index < uint32_t(type->member_type_index_redirection.size())) index = type->member_type_index_redirection[index]; if (index >= type->member_types.size()) SPIRV_CROSS_THROW("Member index is out of bounds!"); if (hide_first_subscript) { // First "._m0" subscript has been hidden, subsequent fields must be emitted even for structured buffers hide_first_subscript = false; } else { BuiltIn builtin = BuiltInMax; if (is_member_builtin(*type, index, &builtin) && access_chain_needs_stage_io_builtin_translation(base)) { if (access_chain_is_arrayed) { expr += "."; expr += builtin_to_glsl(builtin, type->storage); } else expr = builtin_to_glsl(builtin, type->storage); if (builtin == BuiltInPosition && get_execution_model() == ExecutionModelMeshEXT) { access_meshlet_position_y = true; } } else { // If the member has a qualified name, use it as the entire chain string qual_mbr_name = get_member_qualified_name(type_id, index); if (!qual_mbr_name.empty()) expr = qual_mbr_name; else if (flatten_member_reference) expr += join("_", to_member_name(*type, index)); else { // Any pointer de-refences for values are handled in the first access chain. // For pointer chains, the pointer-ness is resolved through an array access. // The only time this is not true is when accessing array of SSBO/UBO. // This case is explicitly handled. expr += to_member_reference(base, *type, index, ptr_chain || i != 0); } } } if (has_member_decoration(type->self, index, DecorationInvariant)) is_invariant = true; if (has_member_decoration(type->self, index, DecorationRelaxedPrecision)) relaxed_precision = true; is_packed = member_is_packed_physical_type(*type, index); if (member_is_remapped_physical_type(*type, index)) physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID); else physical_type = 0; row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index); type = &get(type->member_types[index]); } // Matrix -> Vector else if (type->columns > 1) { // If we have a row-major matrix here, we need to defer any transpose in case this access chain // is used to store a column. We can resolve it right here and now if we access a scalar directly, // by flipping indexing order of the matrix. expr += "["; if (is_literal) expr += convert_to_string(index); else expr += to_unpacked_expression(index, register_expression_read); expr += "]"; type_id = type->parent_type; type = &get(type_id); } // Vector -> Scalar else if (type->vecsize > 1) { string deferred_index; if (row_major_matrix_needs_conversion) { // Flip indexing order. auto column_index = expr.find_last_of('['); if (column_index != string::npos) { deferred_index = expr.substr(column_index); expr.resize(column_index); } } // Internally, access chain implementation can also be used on composites, // ignore scalar access workarounds in this case. StorageClass effective_storage = StorageClassGeneric; bool ignore_potential_sliced_writes = false; if ((flags & ACCESS_CHAIN_FORCE_COMPOSITE_BIT) == 0) { if (expression_type(base).pointer) effective_storage = get_expression_effective_storage_class(base); // Special consideration for control points. // Control points can only be written by InvocationID, so there is no need // to consider scalar access chains here. // Cleans up some cases where it's very painful to determine the accurate storage class // since blocks can be partially masked ... auto *var = maybe_get_backing_variable(base); if (var && var->storage == StorageClassOutput && get_execution_model() == ExecutionModelTessellationControl && !has_decoration(var->self, DecorationPatch)) { ignore_potential_sliced_writes = true; } } else ignore_potential_sliced_writes = true; if (!row_major_matrix_needs_conversion && !ignore_potential_sliced_writes) { // On some backends, we might not be able to safely access individual scalars in a vector. // To work around this, we might have to cast the access chain reference to something which can, // like a pointer to scalar, which we can then index into. prepare_access_chain_for_scalar_access(expr, get(type->parent_type), effective_storage, is_packed); } if (is_literal) { bool out_of_bounds = (index >= type->vecsize); if (!is_packed && !row_major_matrix_needs_conversion) { expr += "."; expr += index_to_swizzle(out_of_bounds ? 0 : index); } else { // For packed vectors, we can only access them as an array, not by swizzle. expr += join("[", out_of_bounds ? 0 : index, "]"); } } else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion) { auto &c = get(index); bool out_of_bounds = (c.scalar() >= type->vecsize); if (c.specialization) { // If the index is a spec constant, we cannot turn extract into a swizzle. expr += join("[", out_of_bounds ? "0" : to_expression(index), "]"); } else { expr += "."; expr += index_to_swizzle(out_of_bounds ? 0 : c.scalar()); } } else { expr += "["; expr += to_unpacked_expression(index, register_expression_read); expr += "]"; } if (row_major_matrix_needs_conversion && !ignore_potential_sliced_writes) { prepare_access_chain_for_scalar_access(expr, get(type->parent_type), effective_storage, is_packed); } if (access_meshlet_position_y) { if (is_literal) { access_meshlet_position_y = index == 1; } else { const auto *c = maybe_get(index); if (c) access_meshlet_position_y = c->scalar() == 1; else { // We don't know, but we have to assume no. // Flip Y in mesh shaders is an opt-in horrible hack, so we'll have to assume shaders try to behave. access_meshlet_position_y = false; } } } expr += deferred_index; row_major_matrix_needs_conversion = false; is_packed = false; physical_type = 0; type_id = type->parent_type; type = &get(type_id); } else if (!backend.allow_truncated_access_chain) SPIRV_CROSS_THROW("Cannot subdivide a scalar value!"); } if (pending_array_enclose) { SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, " "but the access chain was terminated in the middle of a multidimensional array. " "This is not supported."); } if (meta) { meta->need_transpose = row_major_matrix_needs_conversion; meta->storage_is_packed = is_packed; meta->storage_is_invariant = is_invariant; meta->storage_physical_type = physical_type; meta->relaxed_precision = relaxed_precision; meta->access_meshlet_position_y = access_meshlet_position_y; } return expr; } void CompilerGLSL::check_physical_type_cast(std::string &, const SPIRType *, uint32_t) { } void CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &) { } string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index) { auto ret = join(basename, "_", to_member_name(type, index)); ParsedIR::sanitize_underscores(ret); return ret; } string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, AccessChainMeta *meta, bool ptr_chain) { if (flattened_buffer_blocks.count(base)) { uint32_t matrix_stride = 0; uint32_t array_stride = 0; bool need_transpose = false; flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride, &array_stride, ptr_chain); if (meta) { meta->need_transpose = target_type.columns > 1 && need_transpose; meta->storage_is_packed = false; } return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, array_stride, need_transpose); } else if (flattened_structs.count(base) && count > 0) { AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT; if (ptr_chain) flags |= ACCESS_CHAIN_PTR_CHAIN_BIT; if (flattened_structs[base]) { flags |= ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT; if (meta) meta->flattened_struct = target_type.basetype == SPIRType::Struct; } auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(1); if (meta) { meta->need_transpose = false; meta->storage_is_packed = false; } auto basename = to_flattened_access_chain_expression(base); auto ret = join(basename, "_", chain); ParsedIR::sanitize_underscores(ret); return ret; } else { AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT; if (ptr_chain) flags |= ACCESS_CHAIN_PTR_CHAIN_BIT; return access_chain_internal(base, indices, count, flags, meta); } } string CompilerGLSL::load_flattened_struct(const string &basename, const SPIRType &type) { auto expr = type_to_glsl_constructor(type); expr += '('; for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) { if (i) expr += ", "; auto &member_type = get(type.member_types[i]); if (member_type.basetype == SPIRType::Struct) expr += load_flattened_struct(to_flattened_struct_member(basename, type, i), member_type); else expr += to_flattened_struct_member(basename, type, i); } expr += ')'; return expr; } std::string CompilerGLSL::to_flattened_access_chain_expression(uint32_t id) { // Do not use to_expression as that will unflatten access chains. string basename; if (const auto *var = maybe_get(id)) basename = to_name(var->self); else if (const auto *expr = maybe_get(id)) basename = expr->expression; else basename = to_expression(id); return basename; } void CompilerGLSL::store_flattened_struct(const string &basename, uint32_t rhs_id, const SPIRType &type, const SmallVector &indices) { SmallVector sub_indices = indices; sub_indices.push_back(0); auto *member_type = &type; for (auto &index : indices) member_type = &get(member_type->member_types[index]); for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++) { sub_indices.back() = i; auto lhs = join(basename, "_", to_member_name(*member_type, i)); ParsedIR::sanitize_underscores(lhs); if (get(member_type->member_types[i]).basetype == SPIRType::Struct) { store_flattened_struct(lhs, rhs_id, type, sub_indices); } else { auto rhs = to_expression(rhs_id) + to_multi_member_reference(type, sub_indices); statement(lhs, " = ", rhs, ";"); } } } void CompilerGLSL::store_flattened_struct(uint32_t lhs_id, uint32_t value) { auto &type = expression_type(lhs_id); auto basename = to_flattened_access_chain_expression(lhs_id); store_flattened_struct(basename, value, type, {}); } std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, uint32_t /* array_stride */, bool need_transpose) { if (!target_type.array.empty()) SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened"); else if (target_type.basetype == SPIRType::Struct) return flattened_access_chain_struct(base, indices, count, target_type, offset); else if (target_type.columns > 1) return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose); else return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose); } std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, uint32_t offset) { std::string expr; if (backend.can_declare_struct_inline) { expr += type_to_glsl_constructor(target_type); expr += "("; } else expr += "{"; for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i) { if (i != 0) expr += ", "; const SPIRType &member_type = get(target_type.member_types[i]); uint32_t member_offset = type_struct_member_offset(target_type, i); // The access chain terminates at the struct, so we need to find matrix strides and row-major information // ahead of time. bool need_transpose = false; bool relaxed = false; uint32_t matrix_stride = 0; if (member_type.columns > 1) { auto decorations = combined_decoration_for_member(target_type, i); need_transpose = decorations.get(DecorationRowMajor); relaxed = decorations.get(DecorationRelaxedPrecision); matrix_stride = type_struct_member_matrix_stride(target_type, i); } auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride, 0 /* array_stride */, need_transpose); // Cannot forward transpositions, so resolve them here. if (need_transpose) expr += convert_row_major_matrix(tmp, member_type, 0, false, relaxed); else expr += tmp; } expr += backend.can_declare_struct_inline ? ")" : "}"; return expr; } std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, bool need_transpose) { assert(matrix_stride); SPIRType tmp_type = target_type; if (need_transpose) swap(tmp_type.vecsize, tmp_type.columns); std::string expr; expr += type_to_glsl_constructor(tmp_type); expr += "("; for (uint32_t i = 0; i < tmp_type.columns; i++) { if (i != 0) expr += ", "; expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride, /* need_transpose= */ false); } expr += ")"; return expr; } std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, bool need_transpose) { auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16); auto buffer_name = to_name(expression_type(base).self); if (need_transpose) { std::string expr; if (target_type.vecsize > 1) { expr += type_to_glsl_constructor(target_type); expr += "("; } for (uint32_t i = 0; i < target_type.vecsize; ++i) { if (i != 0) expr += ", "; uint32_t component_offset = result.second + i * matrix_stride; assert(component_offset % (target_type.width / 8) == 0); uint32_t index = component_offset / (target_type.width / 8); expr += buffer_name; expr += "["; expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a + expr += convert_to_string(index / 4); expr += "]"; expr += vector_swizzle(1, index % 4); } if (target_type.vecsize > 1) { expr += ")"; } return expr; } else { assert(result.second % (target_type.width / 8) == 0); uint32_t index = result.second / (target_type.width / 8); std::string expr; expr += buffer_name; expr += "["; expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a + expr += convert_to_string(index / 4); expr += "]"; expr += vector_swizzle(target_type.vecsize, index % 4); return expr; } } std::pair CompilerGLSL::flattened_access_chain_offset( const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride, bool *need_transpose, uint32_t *out_matrix_stride, uint32_t *out_array_stride, bool ptr_chain) { // Start traversing type hierarchy at the proper non-pointer types. const auto *type = &get_pointee_type(basetype); std::string expr; // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout. bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false; uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0; uint32_t array_stride = out_array_stride ? *out_array_stride : 0; for (uint32_t i = 0; i < count; i++) { uint32_t index = indices[i]; // Pointers if (ptr_chain && i == 0) { // Here, the pointer type will be decorated with an array stride. array_stride = get_decoration(basetype.self, DecorationArrayStride); if (!array_stride) SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block."); auto *constant = maybe_get(index); if (constant) { // Constant array access. offset += constant->scalar() * array_stride; } else { // Dynamic array access. if (array_stride % word_stride) { SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size " "of a 4-component vector. " "Likely culprit here is a float or vec2 array inside a push " "constant block which is std430. " "This cannot be flattened. Try using std140 layout instead."); } expr += to_enclosed_expression(index); expr += " * "; expr += convert_to_string(array_stride / word_stride); expr += " + "; } } // Arrays else if (!type->array.empty()) { auto *constant = maybe_get(index); if (constant) { // Constant array access. offset += constant->scalar() * array_stride; } else { // Dynamic array access. if (array_stride % word_stride) { SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size " "of a 4-component vector. " "Likely culprit here is a float or vec2 array inside a push " "constant block which is std430. " "This cannot be flattened. Try using std140 layout instead."); } expr += to_enclosed_expression(index, false); expr += " * "; expr += convert_to_string(array_stride / word_stride); expr += " + "; } uint32_t parent_type = type->parent_type; type = &get(parent_type); if (!type->array.empty()) array_stride = get_decoration(parent_type, DecorationArrayStride); } // For structs, the index refers to a constant, which indexes into the members. // We also check if this member is a builtin, since we then replace the entire expression with the builtin one. else if (type->basetype == SPIRType::Struct) { index = evaluate_constant_u32(index); if (index >= type->member_types.size()) SPIRV_CROSS_THROW("Member index is out of bounds!"); offset += type_struct_member_offset(*type, index); auto &struct_type = *type; type = &get(type->member_types[index]); if (type->columns > 1) { matrix_stride = type_struct_member_matrix_stride(struct_type, index); row_major_matrix_needs_conversion = combined_decoration_for_member(struct_type, index).get(DecorationRowMajor); } else row_major_matrix_needs_conversion = false; if (!type->array.empty()) array_stride = type_struct_member_array_stride(struct_type, index); } // Matrix -> Vector else if (type->columns > 1) { auto *constant = maybe_get(index); if (constant) { index = evaluate_constant_u32(index); offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride); } else { uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride; // Dynamic array access. if (indexing_stride % word_stride) { SPIRV_CROSS_THROW("Matrix stride for dynamic indexing must be divisible by the size of a " "4-component vector. " "Likely culprit here is a row-major matrix being accessed dynamically. " "This cannot be flattened. Try using std140 layout instead."); } expr += to_enclosed_expression(index, false); expr += " * "; expr += convert_to_string(indexing_stride / word_stride); expr += " + "; } type = &get(type->parent_type); } // Vector -> Scalar else if (type->vecsize > 1) { auto *constant = maybe_get(index); if (constant) { index = evaluate_constant_u32(index); offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8)); } else { uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8); // Dynamic array access. if (indexing_stride % word_stride) { SPIRV_CROSS_THROW("Stride for dynamic vector indexing must be divisible by the " "size of a 4-component vector. " "This cannot be flattened in legacy targets."); } expr += to_enclosed_expression(index, false); expr += " * "; expr += convert_to_string(indexing_stride / word_stride); expr += " + "; } type = &get(type->parent_type); } else SPIRV_CROSS_THROW("Cannot subdivide a scalar value!"); } if (need_transpose) *need_transpose = row_major_matrix_needs_conversion; if (out_matrix_stride) *out_matrix_stride = matrix_stride; if (out_array_stride) *out_array_stride = array_stride; return std::make_pair(expr, offset); } bool CompilerGLSL::should_dereference(uint32_t id) { const auto &type = expression_type(id); // Non-pointer expressions don't need to be dereferenced. if (!type.pointer) return false; // Handles shouldn't be dereferenced either. if (!expression_is_lvalue(id)) return false; // If id is a variable but not a phi variable, we should not dereference it. if (auto *var = maybe_get(id)) return var->phi_variable; if (auto *expr = maybe_get(id)) { // If id is an access chain, we should not dereference it. if (expr->access_chain) return false; // If id is a forwarded copy of a variable pointer, we should not dereference it. SPIRVariable *var = nullptr; while (expr->loaded_from && expression_is_forwarded(expr->self)) { auto &src_type = expression_type(expr->loaded_from); // To be a copy, the pointer and its source expression must be the // same type. Can't check type.self, because for some reason that's // usually the base type with pointers stripped off. This check is // complex enough that I've hoisted it out of the while condition. if (src_type.pointer != type.pointer || src_type.pointer_depth != type.pointer_depth || src_type.parent_type != type.parent_type) break; if ((var = maybe_get(expr->loaded_from))) break; if (!(expr = maybe_get(expr->loaded_from))) break; } return !var || var->phi_variable; } // Otherwise, we should dereference this pointer expression. return true; } bool CompilerGLSL::should_forward(uint32_t id) const { // If id is a variable we will try to forward it regardless of force_temporary check below // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL auto *var = maybe_get(id); if (var) { // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation. return !(has_decoration(id, DecorationBuiltIn) && has_decoration(id, DecorationVolatile)); } // For debugging emit temporary variables for all expressions if (options.force_temporary) return false; // If an expression carries enough dependencies we need to stop forwarding at some point, // or we explode compilers. There are usually limits to how much we can nest expressions. auto *expr = maybe_get(id); const uint32_t max_expression_dependencies = 64; if (expr && expr->expression_dependencies.size() >= max_expression_dependencies) return false; if (expr && expr->loaded_from && has_decoration(expr->loaded_from, DecorationBuiltIn) && has_decoration(expr->loaded_from, DecorationVolatile)) { // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation. return false; } // Immutable expression can always be forwarded. if (is_immutable(id)) return true; return false; } bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const { // Used only by opcodes which don't do any real "work", they just swizzle data in some fashion. return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id); } void CompilerGLSL::track_expression_read(uint32_t id) { switch (ir.ids[id].get_type()) { case TypeExpression: { auto &e = get(id); for (auto implied_read : e.implied_read_expressions) track_expression_read(implied_read); break; } case TypeAccessChain: { auto &e = get(id); for (auto implied_read : e.implied_read_expressions) track_expression_read(implied_read); break; } default: break; } // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice. // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice. if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id)) { auto &v = expression_usage_counts[id]; v++; // If we create an expression outside a loop, // but access it inside a loop, we're implicitly reading it multiple times. // If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion // working inside the backend compiler. if (expression_read_implies_multiple_reads(id)) v++; if (v >= 2) { //if (v == 2) // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id); // Force a recompile after this pass to avoid forwarding this variable. force_temporary_and_recompile(id); } } } bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure) { if (forced_temporaries.find(id) != end(forced_temporaries)) return false; for (uint32_t i = 0; i < num_args; i++) if (!should_forward(args[i])) return false; // We need to forward globals as well. if (!pure) { for (auto global : global_variables) if (!should_forward(global)) return false; for (auto aliased : aliased_variables) if (!should_forward(aliased)) return false; } return true; } void CompilerGLSL::register_impure_function_call() { // Impure functions can modify globals and aliased variables, so invalidate them as well. for (auto global : global_variables) flush_dependees(get(global)); for (auto aliased : aliased_variables) flush_dependees(get(aliased)); } void CompilerGLSL::register_call_out_argument(uint32_t id) { register_write(id); auto *var = maybe_get(id); if (var) flush_variable_declaration(var->self); } string CompilerGLSL::variable_decl_function_local(SPIRVariable &var) { // These variables are always function local, // so make sure we emit the variable without storage qualifiers. // Some backends will inject custom variables locally in a function // with a storage qualifier which is not function-local. auto old_storage = var.storage; var.storage = StorageClassFunction; auto expr = variable_decl(var); var.storage = old_storage; return expr; } void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var) { // Ensure that we declare phi-variable copies even if the original declaration isn't deferred if (var.allocate_temporary_copy && !flushed_phi_variables.count(var.self)) { auto &type = get(var.basetype); auto &flags = get_decoration_bitset(var.self); statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";"); flushed_phi_variables.insert(var.self); } } void CompilerGLSL::flush_variable_declaration(uint32_t id) { // Ensure that we declare phi-variable copies even if the original declaration isn't deferred auto *var = maybe_get(id); if (var && var->deferred_declaration) { string initializer; if (options.force_zero_initialized_variables && (var->storage == StorageClassFunction || var->storage == StorageClassGeneric || var->storage == StorageClassPrivate) && !var->initializer && type_can_zero_initialize(get_variable_data_type(*var))) { initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(*var))); } statement(variable_decl_function_local(*var), initializer, ";"); var->deferred_declaration = false; } if (var) { emit_variable_temporary_copies(*var); } } bool CompilerGLSL::remove_duplicate_swizzle(string &op) { auto pos = op.find_last_of('.'); if (pos == string::npos || pos == 0) return false; string final_swiz = op.substr(pos + 1, string::npos); if (backend.swizzle_is_function) { if (final_swiz.size() < 2) return false; if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()") final_swiz.erase(final_swiz.size() - 2, string::npos); else return false; } // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. // If so, and previous swizzle is of same length, // we can drop the final swizzle altogether. for (uint32_t i = 0; i < final_swiz.size(); i++) { static const char expected[] = { 'x', 'y', 'z', 'w' }; if (i >= 4 || final_swiz[i] != expected[i]) return false; } auto prevpos = op.find_last_of('.', pos - 1); if (prevpos == string::npos) return false; prevpos++; // Make sure there are only swizzles here ... for (auto i = prevpos; i < pos; i++) { if (op[i] < 'w' || op[i] > 'z') { // If swizzles are foo.xyz() like in C++ backend for example, check for that. if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')') break; return false; } } // If original swizzle is large enough, just carve out the components we need. // E.g. foobar.wyx.xy will turn into foobar.wy. if (pos - prevpos >= final_swiz.size()) { op.erase(prevpos + final_swiz.size(), string::npos); // Add back the function call ... if (backend.swizzle_is_function) op += "()"; } return true; } // Optimizes away vector swizzles where we have something like // vec3 foo; // foo.xyz <-- swizzle expression does nothing. // This is a very common pattern after OpCompositeCombine. bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op) { auto pos = op.find_last_of('.'); if (pos == string::npos || pos == 0) return false; string final_swiz = op.substr(pos + 1, string::npos); if (backend.swizzle_is_function) { if (final_swiz.size() < 2) return false; if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()") final_swiz.erase(final_swiz.size() - 2, string::npos); else return false; } // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. // If so, and previous swizzle is of same length, // we can drop the final swizzle altogether. for (uint32_t i = 0; i < final_swiz.size(); i++) { static const char expected[] = { 'x', 'y', 'z', 'w' }; if (i >= 4 || final_swiz[i] != expected[i]) return false; } auto &type = expression_type(base); // Sanity checking ... assert(type.columns == 1 && type.array.empty()); if (type.vecsize == final_swiz.size()) op.erase(pos, string::npos); return true; } string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length) { ID base = 0; string op; string subop; // Can only merge swizzles for vectors. auto &type = get(return_type); bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1; bool swizzle_optimization = false; for (uint32_t i = 0; i < length; i++) { auto *e = maybe_get(elems[i]); // If we're merging another scalar which belongs to the same base // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible! if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base) { // Only supposed to be used for vector swizzle -> scalar. assert(!e->expression.empty() && e->expression.front() == '.'); subop += e->expression.substr(1, string::npos); swizzle_optimization = true; } else { // We'll likely end up with duplicated swizzles, e.g. // foobar.xyz.xyz from patterns like // OpVectorShuffle // OpCompositeExtract x 3 // OpCompositeConstruct 3x + other scalar. // Just modify op in-place. if (swizzle_optimization) { if (backend.swizzle_is_function) subop += "()"; // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles. // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on. // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize. // Essentially, we can only remove one set of swizzles, since that's what we have control over ... // Case 1: // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done. // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo. // Case 2: // foo.xyz: Duplicate swizzle won't kick in. // If foo is vec3, we can remove xyz, giving just foo. if (!remove_duplicate_swizzle(subop)) remove_unity_swizzle(base, subop); // Strips away redundant parens if we created them during component extraction. strip_enclosed_expression(subop); swizzle_optimization = false; op += subop; } else op += subop; if (i) op += ", "; bool uses_buffer_offset = type.basetype == SPIRType::Struct && has_member_decoration(type.self, i, DecorationOffset); subop = to_composite_constructor_expression(type, elems[i], uses_buffer_offset); } base = e ? e->base_expression : ID(0); } if (swizzle_optimization) { if (backend.swizzle_is_function) subop += "()"; if (!remove_duplicate_swizzle(subop)) remove_unity_swizzle(base, subop); // Strips away redundant parens if we created them during component extraction. strip_enclosed_expression(subop); } op += subop; return op; } bool CompilerGLSL::skip_argument(uint32_t id) const { if (!combined_image_samplers.empty() || !options.vulkan_semantics) { auto &type = expression_type(id); if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1)) return true; } return false; } bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs) { // Do this with strings because we have a very clear pattern we can check for and it avoids // adding lots of special cases to the code emission. if (rhs.size() < lhs.size() + 3) return false; // Do not optimize matrices. They are a bit awkward to reason about in general // (in which order does operation happen?), and it does not work on MSL anyways. if (type.vecsize > 1 && type.columns > 1) return false; auto index = rhs.find(lhs); if (index != 0) return false; // TODO: Shift operators, but it's not important for now. auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1); if (op != lhs.size() + 1) return false; // Check that the op is followed by space. This excludes && and ||. if (rhs[op + 1] != ' ') return false; char bop = rhs[op]; auto expr = rhs.substr(lhs.size() + 3); // Avoids false positives where we get a = a * b + c. // Normally, these expressions are always enclosed, but unexpected code paths may end up hitting this. if (needs_enclose_expression(expr)) return false; // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code. // Find some common patterns which are equivalent. if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)")) statement(lhs, bop, bop, ";"); else statement(lhs, " ", bop, "= ", expr, ";"); return true; } void CompilerGLSL::register_control_dependent_expression(uint32_t expr) { if (forwarded_temporaries.find(expr) == end(forwarded_temporaries)) return; assert(current_emitting_block); current_emitting_block->invalidate_expressions.push_back(expr); } void CompilerGLSL::emit_block_instructions(SPIRBlock &block) { current_emitting_block = █ if (backend.requires_relaxed_precision_analysis) { // If PHI variables are consumed in unexpected precision contexts, copy them here. for (size_t i = 0, n = block.phi_variables.size(); i < n; i++) { auto &phi = block.phi_variables[i]; // Ensure we only copy once. We know a-priori that this array will lay out // the same function variables together. if (i && block.phi_variables[i - 1].function_variable == phi.function_variable) continue; auto itr = temporary_to_mirror_precision_alias.find(phi.function_variable); if (itr != temporary_to_mirror_precision_alias.end()) { // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject, // so it helps to have handle_instruction_precision() on the outside of emit_instruction(). EmbeddedInstruction inst; inst.op = OpCopyObject; inst.length = 3; inst.ops.push_back(expression_type_id(itr->first)); inst.ops.push_back(itr->second); inst.ops.push_back(itr->first); emit_instruction(inst); } } } for (auto &op : block.ops) { auto temporary_copy = handle_instruction_precision(op); emit_instruction(op); if (temporary_copy.dst_id) { // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject, // so it helps to have handle_instruction_precision() on the outside of emit_instruction(). EmbeddedInstruction inst; inst.op = OpCopyObject; inst.length = 3; inst.ops.push_back(expression_type_id(temporary_copy.src_id)); inst.ops.push_back(temporary_copy.dst_id); inst.ops.push_back(temporary_copy.src_id); // Never attempt to hoist mirrored temporaries. // They are hoisted in lock-step with their parents. block_temporary_hoisting = true; emit_instruction(inst); block_temporary_hoisting = false; } } current_emitting_block = nullptr; } void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr) { // Allow trivially forwarded expressions like OpLoad or trivial shuffles, // these will be marked as having suppressed usage tracking. // Our only concern is to make sure arithmetic operations are done in similar ways. if (expression_is_forwarded(expr.self) && !expression_suppresses_usage_tracking(expr.self) && forced_invariant_temporaries.count(expr.self) == 0) { force_temporary_and_recompile(expr.self); forced_invariant_temporaries.insert(expr.self); for (auto &dependent : expr.expression_dependencies) disallow_forwarding_in_expression_chain(get(dependent)); } } void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id) { // Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to // this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary // in one translation unit, but not another, e.g. due to multiple use of an expression. // This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent // expressions to be temporaries. // It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough // for all reasonable uses of invariant. if (!has_decoration(store_id, DecorationInvariant)) return; auto *expr = maybe_get(value_id); if (!expr) return; disallow_forwarding_in_expression_chain(*expr); } void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) { auto rhs = to_pointer_expression(rhs_expression); // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null. if (!rhs.empty()) { handle_store_to_invariant_variable(lhs_expression, rhs_expression); if (!unroll_array_to_complex_store(lhs_expression, rhs_expression)) { auto lhs = to_dereferenced_expression(lhs_expression); if (has_decoration(lhs_expression, DecorationNonUniform)) convert_non_uniform_expression(lhs, lhs_expression); // We might need to cast in order to store to a builtin. cast_to_variable_store(lhs_expression, rhs, expression_type(rhs_expression)); // Tries to optimize assignments like " = op expr". // While this is purely cosmetic, this is important for legacy ESSL where loop // variable increments must be in either i++ or i += const-expr. // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0. if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs)) statement(lhs, " = ", rhs, ";"); } register_write(lhs_expression); } } uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const { if (instr.length < 3) return 32; auto *ops = stream(instr); switch (instr.op) { case OpSConvert: case OpConvertSToF: case OpUConvert: case OpConvertUToF: case OpIEqual: case OpINotEqual: case OpSLessThan: case OpSLessThanEqual: case OpSGreaterThan: case OpSGreaterThanEqual: case OpULessThan: case OpULessThanEqual: case OpUGreaterThan: case OpUGreaterThanEqual: return expression_type(ops[2]).width; default: { // We can look at result type which is more robust. auto *type = maybe_get(ops[0]); if (type && type_is_integral(*type)) return type->width; else return 32; } } } uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const { if (length < 1) return 32; switch (op) { case GLSLstd450SAbs: case GLSLstd450SSign: case GLSLstd450UMin: case GLSLstd450SMin: case GLSLstd450UMax: case GLSLstd450SMax: case GLSLstd450UClamp: case GLSLstd450SClamp: case GLSLstd450FindSMsb: case GLSLstd450FindUMsb: return expression_type(ops[0]).width; default: { // We don't need to care about other opcodes, just return 32. return 32; } } } void CompilerGLSL::forward_relaxed_precision(uint32_t dst_id, const uint32_t *args, uint32_t length) { // Only GLSL supports RelaxedPrecision directly. // We cannot implement this in HLSL or MSL because it is tied to the type system. // In SPIR-V, everything must masquerade as 32-bit. if (!backend.requires_relaxed_precision_analysis) return; auto input_precision = analyze_expression_precision(args, length); // For expressions which are loaded or directly forwarded, we inherit mediump implicitly. // For dst_id to be analyzed properly, it must inherit any relaxed precision decoration from src_id. if (input_precision == Options::Mediump) set_decoration(dst_id, DecorationRelaxedPrecision); } CompilerGLSL::Options::Precision CompilerGLSL::analyze_expression_precision(const uint32_t *args, uint32_t length) const { // Now, analyze the precision at which the arguments would run. // GLSL rules are such that the precision used to evaluate an expression is equal to the highest precision // for the inputs. Constants do not have inherent precision and do not contribute to this decision. // If all inputs are constants, they inherit precision from outer expressions, including an l-value. // In this case, we'll have to force a temporary for dst_id so that we can bind the constant expression with // correct precision. bool expression_has_highp = false; bool expression_has_mediump = false; for (uint32_t i = 0; i < length; i++) { uint32_t arg = args[i]; auto handle_type = ir.ids[arg].get_type(); if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef) continue; if (has_decoration(arg, DecorationRelaxedPrecision)) expression_has_mediump = true; else expression_has_highp = true; } if (expression_has_highp) return Options::Highp; else if (expression_has_mediump) return Options::Mediump; else return Options::DontCare; } void CompilerGLSL::analyze_precision_requirements(uint32_t type_id, uint32_t dst_id, uint32_t *args, uint32_t length) { if (!backend.requires_relaxed_precision_analysis) return; auto &type = get(type_id); // RelaxedPrecision only applies to 32-bit values. if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt) return; bool operation_is_highp = !has_decoration(dst_id, DecorationRelaxedPrecision); auto input_precision = analyze_expression_precision(args, length); if (input_precision == Options::DontCare) { consume_temporary_in_precision_context(type_id, dst_id, input_precision); return; } // In SPIR-V and GLSL, the semantics are flipped for how relaxed precision is determined. // In SPIR-V, the operation itself marks RelaxedPrecision, meaning that inputs can be truncated to 16-bit. // However, if the expression is not, inputs must be expanded to 32-bit first, // since the operation must run at high precision. // This is the awkward part, because if we have mediump inputs, or expressions which derived from mediump, // we might have to forcefully bind the source IDs to highp temporaries. This is done by clearing decorations // and forcing temporaries. Similarly for mediump operations. We bind highp expressions to mediump variables. if ((operation_is_highp && input_precision == Options::Mediump) || (!operation_is_highp && input_precision == Options::Highp)) { auto precision = operation_is_highp ? Options::Highp : Options::Mediump; for (uint32_t i = 0; i < length; i++) { // Rewrites the opcode so that we consume an ID in correct precision context. // This is pretty hacky, but it's the most straight forward way of implementing this without adding // lots of extra passes to rewrite all code blocks. args[i] = consume_temporary_in_precision_context(expression_type_id(args[i]), args[i], precision); } } } // This is probably not exhaustive ... static bool opcode_is_precision_sensitive_operation(Op op) { switch (op) { case OpFAdd: case OpFSub: case OpFMul: case OpFNegate: case OpIAdd: case OpISub: case OpIMul: case OpSNegate: case OpFMod: case OpFDiv: case OpFRem: case OpSMod: case OpSDiv: case OpSRem: case OpUMod: case OpUDiv: case OpVectorTimesMatrix: case OpMatrixTimesVector: case OpMatrixTimesMatrix: case OpDPdx: case OpDPdy: case OpDPdxCoarse: case OpDPdyCoarse: case OpDPdxFine: case OpDPdyFine: case OpFwidth: case OpFwidthCoarse: case OpFwidthFine: case OpVectorTimesScalar: case OpMatrixTimesScalar: case OpOuterProduct: case OpFConvert: case OpSConvert: case OpUConvert: case OpConvertSToF: case OpConvertUToF: case OpConvertFToU: case OpConvertFToS: return true; default: return false; } } // Instructions which just load data but don't do any arithmetic operation should just inherit the decoration. // SPIR-V doesn't require this, but it's somewhat implied it has to work this way, relaxed precision is only // relevant when operating on the IDs, not when shuffling things around. static bool opcode_is_precision_forwarding_instruction(Op op, uint32_t &arg_count) { switch (op) { case OpLoad: case OpAccessChain: case OpInBoundsAccessChain: case OpCompositeExtract: case OpVectorExtractDynamic: case OpSampledImage: case OpImage: case OpCopyObject: case OpImageRead: case OpImageFetch: case OpImageSampleImplicitLod: case OpImageSampleProjImplicitLod: case OpImageSampleDrefImplicitLod: case OpImageSampleProjDrefImplicitLod: case OpImageSampleExplicitLod: case OpImageSampleProjExplicitLod: case OpImageSampleDrefExplicitLod: case OpImageSampleProjDrefExplicitLod: case OpImageGather: case OpImageDrefGather: case OpImageSparseRead: case OpImageSparseFetch: case OpImageSparseSampleImplicitLod: case OpImageSparseSampleProjImplicitLod: case OpImageSparseSampleDrefImplicitLod: case OpImageSparseSampleProjDrefImplicitLod: case OpImageSparseSampleExplicitLod: case OpImageSparseSampleProjExplicitLod: case OpImageSparseSampleDrefExplicitLod: case OpImageSparseSampleProjDrefExplicitLod: case OpImageSparseGather: case OpImageSparseDrefGather: arg_count = 1; return true; case OpVectorShuffle: arg_count = 2; return true; case OpCompositeConstruct: return true; default: break; } return false; } CompilerGLSL::TemporaryCopy CompilerGLSL::handle_instruction_precision(const Instruction &instruction) { auto ops = stream_mutable(instruction); auto opcode = static_cast(instruction.op); uint32_t length = instruction.length; if (backend.requires_relaxed_precision_analysis) { if (length > 2) { uint32_t forwarding_length = length - 2; if (opcode_is_precision_sensitive_operation(opcode)) analyze_precision_requirements(ops[0], ops[1], &ops[2], forwarding_length); else if (opcode == OpExtInst && length >= 5 && get(ops[2]).ext == SPIRExtension::GLSL) analyze_precision_requirements(ops[0], ops[1], &ops[4], forwarding_length - 2); else if (opcode_is_precision_forwarding_instruction(opcode, forwarding_length)) forward_relaxed_precision(ops[1], &ops[2], forwarding_length); } uint32_t result_type = 0, result_id = 0; if (instruction_to_result_type(result_type, result_id, opcode, ops, length)) { auto itr = temporary_to_mirror_precision_alias.find(ops[1]); if (itr != temporary_to_mirror_precision_alias.end()) return { itr->second, itr->first }; } } return {}; } void CompilerGLSL::emit_instruction(const Instruction &instruction) { auto ops = stream(instruction); auto opcode = static_cast(instruction.op); uint32_t length = instruction.length; #define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op) #define GLSL_BOP_CAST(op, type) \ emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, \ opcode_is_sign_invariant(opcode), implicit_integer_promotion) #define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op) #define GLSL_UOP_CAST(op) emit_unary_op_cast(ops[0], ops[1], ops[2], #op) #define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op) #define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op) #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) #define GLSL_BFOP_CAST(op, type) \ emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) #define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op) // If we need to do implicit bitcasts, make sure we do it with the correct type. uint32_t integer_width = get_integer_width_for_instruction(instruction); auto int_type = to_signed_basetype(integer_width); auto uint_type = to_unsigned_basetype(integer_width); // Handle C implicit integer promotion rules. // If we get implicit promotion to int, need to make sure we cast by value to intended return type, // otherwise, future sign-dependent operations and bitcasts will break. bool implicit_integer_promotion = integer_width < 32 && backend.implicit_c_integer_promotion_rules && opcode_can_promote_integer_implicitly(opcode) && get(ops[0]).vecsize == 1; opcode = get_remapped_spirv_op(opcode); switch (opcode) { // Dealing with memory case OpLoad: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t ptr = ops[2]; flush_variable_declaration(ptr); // If we're loading from memory that cannot be changed by the shader, // just forward the expression directly to avoid needless temporaries. // If an expression is mutable and forwardable, we speculate that it is immutable. bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries); // If loading a non-native row-major matrix, mark the expression as need_transpose. bool need_transpose = false; bool old_need_transpose = false; auto *ptr_expression = maybe_get(ptr); if (forward) { // If we're forwarding the load, we're also going to forward transpose state, so don't transpose while // taking the expression. if (ptr_expression && ptr_expression->need_transpose) { old_need_transpose = true; ptr_expression->need_transpose = false; need_transpose = true; } else if (is_non_native_row_major_matrix(ptr)) need_transpose = true; } // If we are forwarding this load, // don't register the read to access chain here, defer that to when we actually use the expression, // using the add_implied_read_expression mechanism. string expr; bool is_packed = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked); bool is_remapped = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID); if (forward || (!is_packed && !is_remapped)) { // For the simple case, we do not need to deal with repacking. expr = to_dereferenced_expression(ptr, false); } else { // If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before // storing the expression to a temporary. expr = to_unpacked_expression(ptr); } auto &type = get(result_type); auto &expr_type = expression_type(ptr); // If the expression has more vector components than the result type, insert // a swizzle. This shouldn't happen normally on valid SPIR-V, but it might // happen with e.g. the MSL backend replacing the type of an input variable. if (expr_type.vecsize > type.vecsize) expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0)); if (forward && ptr_expression) ptr_expression->need_transpose = old_need_transpose; // We might need to cast in order to load from a builtin. cast_from_variable_load(ptr, expr, type); if (forward && ptr_expression) ptr_expression->need_transpose = false; // We might be trying to load a gl_Position[N], where we should be // doing float4[](gl_in[i].gl_Position, ...) instead. // Similar workarounds are required for input arrays in tessellation. // Also, loading from gl_SampleMask array needs special unroll. unroll_array_from_complex_load(id, ptr, expr); if (!type_is_opaque_value(type) && has_decoration(ptr, DecorationNonUniform)) { // If we're loading something non-opaque, we need to handle non-uniform descriptor access. convert_non_uniform_expression(expr, ptr); } if (forward && ptr_expression) ptr_expression->need_transpose = old_need_transpose; bool flattened = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0; if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(ptr) && !flattened) rewrite_load_for_wrapped_row_major(expr, result_type, ptr); // By default, suppress usage tracking since using same expression multiple times does not imply any extra work. // However, if we try to load a complex, composite object from a flattened buffer, // we should avoid emitting the same code over and over and lower the result to a temporary. bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1)); SPIRExpression *e = nullptr; if (!forward && expression_is_non_value_type_array(ptr)) { // Complicated load case where we need to make a copy of ptr, but we cannot, because // it is an array, and our backend does not support arrays as value types. // Emit the temporary, and copy it explicitly. e = &emit_uninitialized_temporary_expression(result_type, id); emit_array_copy(nullptr, id, ptr, StorageClassFunction, get_expression_effective_storage_class(ptr)); } else e = &emit_op(result_type, id, expr, forward, !usage_tracking); e->need_transpose = need_transpose; register_read(id, ptr, forward); if (forward) { // Pass through whether the result is of a packed type and the physical type ID. if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked)) set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID)) { set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID)); } } else { // This might have been set on an earlier compilation iteration, force it to be unset. unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID); } inherit_expression_dependencies(id, ptr); if (forward) add_implied_read_expression(*e, ptr); break; } case OpInBoundsAccessChain: case OpAccessChain: case OpPtrAccessChain: { auto *var = maybe_get(ops[2]); if (var) flush_variable_declaration(var->self); // If the base is immutable, the access chain pointer must also be. // If an expression is mutable and forwardable, we speculate that it is immutable. AccessChainMeta meta; bool ptr_chain = opcode == OpPtrAccessChain; auto &target_type = get(ops[0]); auto e = access_chain(ops[2], &ops[3], length - 3, target_type, &meta, ptr_chain); // If the base is flattened UBO of struct type, the expression has to be a composite. // In that case, backends which do not support inline syntax need it to be bound to a temporary. // Otherwise, invalid expressions like ({UBO[0].xyz, UBO[0].w, UBO[1]}).member are emitted. bool requires_temporary = false; if (flattened_buffer_blocks.count(ops[2]) && target_type.basetype == SPIRType::Struct) requires_temporary = !backend.can_declare_struct_inline; auto &expr = requires_temporary ? emit_op(ops[0], ops[1], std::move(e), false) : set(ops[1], std::move(e), ops[0], should_forward(ops[2])); auto *backing_variable = maybe_get_backing_variable(ops[2]); expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]); expr.need_transpose = meta.need_transpose; expr.access_chain = true; expr.access_meshlet_position_y = meta.access_meshlet_position_y; // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed. if (meta.storage_is_packed) set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked); if (meta.storage_physical_type != 0) set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type); if (meta.storage_is_invariant) set_decoration(ops[1], DecorationInvariant); if (meta.flattened_struct) flattened_structs[ops[1]] = true; if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis) set_decoration(ops[1], DecorationRelaxedPrecision); // If we have some expression dependencies in our access chain, this access chain is technically a forwarded // temporary which could be subject to invalidation. // Need to assume we're forwarded while calling inherit_expression_depdendencies. forwarded_temporaries.insert(ops[1]); // The access chain itself is never forced to a temporary, but its dependencies might. suppressed_usage_tracking.insert(ops[1]); for (uint32_t i = 2; i < length; i++) { inherit_expression_dependencies(ops[1], ops[i]); add_implied_read_expression(expr, ops[i]); } // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries, // we're not forwarded after all. if (expr.expression_dependencies.empty()) forwarded_temporaries.erase(ops[1]); break; } case OpStore: { auto *var = maybe_get(ops[0]); if (var && var->statically_assigned) var->static_expression = ops[1]; else if (var && var->loop_variable && !var->loop_variable_enable) var->static_expression = ops[1]; else if (var && var->remapped_variable && var->static_expression) { // Skip the write. } else if (flattened_structs.count(ops[0])) { store_flattened_struct(ops[0], ops[1]); register_write(ops[0]); } else { emit_store_statement(ops[0], ops[1]); } // Storing a pointer results in a variable pointer, so we must conservatively assume // we can write through it. if (expression_type(ops[1]).pointer) register_write(ops[1]); break; } case OpArrayLength: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); if (has_decoration(ops[2], DecorationNonUniform)) convert_non_uniform_expression(e, ops[2]); set(id, join(type_to_glsl(get(result_type)), "(", e, ".length())"), result_type, true); break; } // Function calls case OpFunctionCall: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t func = ops[2]; const auto *arg = &ops[3]; length -= 3; auto &callee = get(func); auto &return_type = get(callee.return_type); bool pure = function_is_pure(callee); bool callee_has_out_variables = false; bool emit_return_value_as_argument = false; // Invalidate out variables passed to functions since they can be OpStore'd to. for (uint32_t i = 0; i < length; i++) { if (callee.arguments[i].write_count) { register_call_out_argument(arg[i]); callee_has_out_variables = true; } flush_variable_declaration(arg[i]); } if (!return_type.array.empty() && !backend.can_return_array) { callee_has_out_variables = true; emit_return_value_as_argument = true; } if (!pure) register_impure_function_call(); string funexpr; SmallVector arglist; funexpr += to_name(func) + "("; if (emit_return_value_as_argument) { statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";"); arglist.push_back(to_name(id)); } for (uint32_t i = 0; i < length; i++) { // Do not pass in separate images or samplers if we're remapping // to combined image samplers. if (skip_argument(arg[i])) continue; arglist.push_back(to_func_call_arg(callee.arguments[i], arg[i])); } for (auto &combined : callee.combined_parameters) { auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]); auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]); arglist.push_back(to_combined_image_sampler(image_id, sampler_id)); } append_global_func_args(callee, length, arglist); funexpr += merge(arglist); funexpr += ")"; // Check for function call constraints. check_function_call_constraints(arg, length); if (return_type.basetype != SPIRType::Void) { // If the function actually writes to an out variable, // take the conservative route and do not forward. // The problem is that we might not read the function // result (and emit the function) before an out variable // is read (common case when return value is ignored! // In order to avoid start tracking invalid variables, // just avoid the forwarding problem altogether. bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure && (forced_temporaries.find(id) == end(forced_temporaries)); if (emit_return_value_as_argument) { statement(funexpr, ";"); set(id, to_name(id), result_type, true); } else emit_op(result_type, id, funexpr, forward); // Function calls are implicit loads from all variables in question. // Set dependencies for them. for (uint32_t i = 0; i < length; i++) register_read(id, arg[i], forward); // If we're going to forward the temporary result, // put dependencies on every variable that must not change. if (forward) register_global_read_dependencies(callee, id); } else statement(funexpr, ";"); break; } // Composite munging case OpCompositeConstruct: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; const auto *const elems = &ops[2]; length -= 2; bool forward = true; for (uint32_t i = 0; i < length; i++) forward = forward && should_forward(elems[i]); auto &out_type = get(result_type); auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr; // Only splat if we have vector constructors. // Arrays and structs must be initialized properly in full. bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct; bool splat = false; bool swizzle_splat = false; if (in_type) { splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting; swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar; if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type)) { // Cannot swizzle literal integers as a special case. swizzle_splat = false; } } if (splat || swizzle_splat) { uint32_t input = elems[0]; for (uint32_t i = 0; i < length; i++) { if (input != elems[i]) { splat = false; swizzle_splat = false; } } } if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline) forward = false; if (!out_type.array.empty() && !backend.can_declare_arrays_inline) forward = false; if (type_is_empty(out_type) && !backend.supports_empty_struct) forward = false; string constructor_op; if (backend.use_initializer_list && composite) { bool needs_trailing_tracket = false; // Only use this path if we are building composites. // This path cannot be used for arithmetic. if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty()) constructor_op += type_to_glsl_constructor(get(result_type)); else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty()) { // MSL path. Array constructor is baked into type here, do not use _constructor variant. constructor_op += type_to_glsl_constructor(get(result_type)) + "("; needs_trailing_tracket = true; } constructor_op += "{ "; if (type_is_empty(out_type) && !backend.supports_empty_struct) constructor_op += "0"; else if (splat) constructor_op += to_unpacked_expression(elems[0]); else constructor_op += build_composite_combiner(result_type, elems, length); constructor_op += " }"; if (needs_trailing_tracket) constructor_op += ")"; } else if (swizzle_splat && !composite) { constructor_op = remap_swizzle(get(result_type), 1, to_unpacked_expression(elems[0])); } else { constructor_op = type_to_glsl_constructor(get(result_type)) + "("; if (type_is_empty(out_type) && !backend.supports_empty_struct) constructor_op += "0"; else if (splat) constructor_op += to_unpacked_expression(elems[0]); else constructor_op += build_composite_combiner(result_type, elems, length); constructor_op += ")"; } if (!constructor_op.empty()) { emit_op(result_type, id, constructor_op, forward); for (uint32_t i = 0; i < length; i++) inherit_expression_dependencies(id, elems[i]); } break; } case OpVectorInsertDynamic: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t vec = ops[2]; uint32_t comp = ops[3]; uint32_t index = ops[4]; flush_variable_declaration(vec); // Make a copy, then use access chain to store the variable. statement(declare_temporary(result_type, id), to_expression(vec), ";"); set(id, to_name(id), result_type, true); auto chain = access_chain_internal(id, &index, 1, 0, nullptr); statement(chain, " = ", to_unpacked_expression(comp), ";"); break; } case OpVectorExtractDynamic: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; auto expr = access_chain_internal(ops[2], &ops[3], 1, 0, nullptr); emit_op(result_type, id, expr, should_forward(ops[2])); inherit_expression_dependencies(id, ops[2]); inherit_expression_dependencies(id, ops[3]); break; } case OpCompositeExtract: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; length -= 3; auto &type = get(result_type); // We can only split the expression here if our expression is forwarded as a temporary. bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries); // Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case. auto &composite_type = expression_type(ops[2]); bool composite_type_is_complex = composite_type.basetype == SPIRType::Struct || !composite_type.array.empty(); if (composite_type_is_complex) allow_base_expression = false; // Packed expressions or physical ID mapped expressions cannot be split up. if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked) || has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypeID)) allow_base_expression = false; // Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern // into the base expression. if (is_non_native_row_major_matrix(ops[2])) allow_base_expression = false; AccessChainMeta meta; SPIRExpression *e = nullptr; auto *c = maybe_get(ops[2]); if (c && !c->specialization && !composite_type_is_complex) { auto expr = to_extract_constant_composite_expression(result_type, *c, ops + 3, length); e = &emit_op(result_type, id, expr, true, true); } else if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1) { // Only apply this optimization if result is scalar. // We want to split the access chain from the base. // This is so we can later combine different CompositeExtract results // with CompositeConstruct without emitting code like // // vec3 temp = texture(...).xyz // vec4(temp.x, temp.y, temp.z, 1.0). // // when we actually wanted to emit this // vec4(texture(...).xyz, 1.0). // // Including the base will prevent this and would trigger multiple reads // from expression causing it to be forced to an actual temporary in GLSL. auto expr = access_chain_internal(ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta); e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2])); inherit_expression_dependencies(id, ops[2]); e->base_expression = ops[2]; if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis) set_decoration(ops[1], DecorationRelaxedPrecision); } else { auto expr = access_chain_internal(ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta); e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2])); inherit_expression_dependencies(id, ops[2]); } // Pass through some meta information to the loaded expression. // We can still end up loading a buffer type to a variable, then CompositeExtract from it // instead of loading everything through an access chain. e->need_transpose = meta.need_transpose; if (meta.storage_is_packed) set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); if (meta.storage_physical_type != 0) set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type); if (meta.storage_is_invariant) set_decoration(id, DecorationInvariant); break; } case OpCompositeInsert: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t obj = ops[2]; uint32_t composite = ops[3]; const auto *elems = &ops[4]; length -= 4; flush_variable_declaration(composite); // CompositeInsert requires a copy + modification, but this is very awkward code in HLL. // Speculate that the input composite is no longer used, and we can modify it in-place. // There are various scenarios where this is not possible to satisfy. bool can_modify_in_place = true; forced_temporaries.insert(id); // Cannot safely RMW PHI variables since they have no way to be invalidated, // forcing temporaries is not going to help. // This is similar for Constant and Undef inputs. // The only safe thing to RMW is SPIRExpression. // If the expression has already been used (i.e. used in a continue block), we have to keep using // that loop variable, since we won't be able to override the expression after the fact. // If the composite is hoisted, we might never be able to properly invalidate any usage // of that composite in a subsequent loop iteration. if (invalid_expressions.count(composite) || block_composite_insert_overwrite.count(composite) || hoisted_temporaries.count(id) || hoisted_temporaries.count(composite) || maybe_get(composite) == nullptr) { can_modify_in_place = false; } else if (backend.requires_relaxed_precision_analysis && has_decoration(composite, DecorationRelaxedPrecision) != has_decoration(id, DecorationRelaxedPrecision) && get(result_type).basetype != SPIRType::Struct) { // Similarly, if precision does not match for input and output, // we cannot alias them. If we write a composite into a relaxed precision // ID, we might get a false truncation. can_modify_in_place = false; } if (can_modify_in_place) { // Have to make sure the modified SSA value is bound to a temporary so we can modify it in-place. if (!forced_temporaries.count(composite)) force_temporary_and_recompile(composite); auto chain = access_chain_internal(composite, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); statement(chain, " = ", to_unpacked_expression(obj), ";"); set(id, to_expression(composite), result_type, true); invalid_expressions.insert(composite); composite_insert_overwritten.insert(composite); } else { if (maybe_get(composite) != nullptr) { emit_uninitialized_temporary_expression(result_type, id); } else { // Make a copy, then use access chain to store the variable. statement(declare_temporary(result_type, id), to_expression(composite), ";"); set(id, to_name(id), result_type, true); } auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); statement(chain, " = ", to_unpacked_expression(obj), ";"); } break; } case OpCopyMemory: { uint32_t lhs = ops[0]; uint32_t rhs = ops[1]; if (lhs != rhs) { uint32_t &tmp_id = extra_sub_expressions[instruction.offset | EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET]; if (!tmp_id) tmp_id = ir.increase_bound_by(1); uint32_t tmp_type_id = expression_type(rhs).parent_type; EmbeddedInstruction fake_load, fake_store; fake_load.op = OpLoad; fake_load.length = 3; fake_load.ops.push_back(tmp_type_id); fake_load.ops.push_back(tmp_id); fake_load.ops.push_back(rhs); fake_store.op = OpStore; fake_store.length = 2; fake_store.ops.push_back(lhs); fake_store.ops.push_back(tmp_id); // Load and Store do a *lot* of workarounds, and we'd like to reuse them as much as possible. // Synthesize a fake Load and Store pair for CopyMemory. emit_instruction(fake_load); emit_instruction(fake_store); } break; } case OpCopyLogical: { // This is used for copying object of different types, arrays and structs. // We need to unroll the copy, element-by-element. uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t rhs = ops[2]; emit_uninitialized_temporary_expression(result_type, id); emit_copy_logical_type(id, result_type, rhs, expression_type_id(rhs), {}); break; } case OpCopyObject: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t rhs = ops[2]; bool pointer = get(result_type).pointer; auto *chain = maybe_get(rhs); auto *imgsamp = maybe_get(rhs); if (chain) { // Cannot lower to a SPIRExpression, just copy the object. auto &e = set(id, *chain); e.self = id; } else if (imgsamp) { // Cannot lower to a SPIRExpression, just copy the object. // GLSL does not currently use this type and will never get here, but MSL does. // Handled here instead of CompilerMSL for better integration and general handling, // and in case GLSL or other subclasses require it in the future. auto &e = set(id, *imgsamp); e.self = id; } else if (expression_is_lvalue(rhs) && !pointer) { // Need a copy. // For pointer types, we copy the pointer itself. emit_op(result_type, id, to_unpacked_expression(rhs), false); } else { // RHS expression is immutable, so just forward it. // Copying these things really make no sense, but // seems to be allowed anyways. auto &e = emit_op(result_type, id, to_expression(rhs), true, true); if (pointer) { auto *var = maybe_get_backing_variable(rhs); e.loaded_from = var ? var->self : ID(0); } // If we're copying an access chain, need to inherit the read expressions. auto *rhs_expr = maybe_get(rhs); if (rhs_expr) { e.implied_read_expressions = rhs_expr->implied_read_expressions; e.expression_dependencies = rhs_expr->expression_dependencies; } } break; } case OpVectorShuffle: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t vec0 = ops[2]; uint32_t vec1 = ops[3]; const auto *elems = &ops[4]; length -= 4; auto &type0 = expression_type(vec0); // If we have the undefined swizzle index -1, we need to swizzle in undefined data, // or in our case, T(0). bool shuffle = false; for (uint32_t i = 0; i < length; i++) if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu) shuffle = true; // Cannot use swizzles with packed expressions, force shuffle path. if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPhysicalTypePacked)) shuffle = true; string expr; bool should_fwd, trivial_forward; if (shuffle) { should_fwd = should_forward(vec0) && should_forward(vec1); trivial_forward = should_suppress_usage_tracking(vec0) && should_suppress_usage_tracking(vec1); // Constructor style and shuffling from two different vectors. SmallVector args; for (uint32_t i = 0; i < length; i++) { if (elems[i] == 0xffffffffu) { // Use a constant 0 here. // We could use the first component or similar, but then we risk propagating // a value we might not need, and bog down codegen. SPIRConstant c; c.constant_type = type0.parent_type; assert(type0.parent_type != ID(0)); args.push_back(constant_expression(c)); } else if (elems[i] >= type0.vecsize) args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize)); else args.push_back(to_extract_component_expression(vec0, elems[i])); } expr += join(type_to_glsl_constructor(get(result_type)), "(", merge(args), ")"); } else { should_fwd = should_forward(vec0); trivial_forward = should_suppress_usage_tracking(vec0); // We only source from first vector, so can use swizzle. // If the vector is packed, unpack it before applying a swizzle (needed for MSL) expr += to_enclosed_unpacked_expression(vec0); expr += "."; for (uint32_t i = 0; i < length; i++) { assert(elems[i] != 0xffffffffu); expr += index_to_swizzle(elems[i]); } if (backend.swizzle_is_function && length > 1) expr += "()"; } // A shuffle is trivial in that it doesn't actually *do* anything. // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed. emit_op(result_type, id, expr, should_fwd, trivial_forward); inherit_expression_dependencies(id, vec0); if (vec0 != vec1) inherit_expression_dependencies(id, vec1); break; } // ALU case OpIsNan: if (!is_legacy()) GLSL_UFOP(isnan); else { // Check if the number doesn't equal itself auto &type = get(ops[0]); if (type.vecsize > 1) emit_binary_func_op(ops[0], ops[1], ops[2], ops[2], "notEqual"); else emit_binary_op(ops[0], ops[1], ops[2], ops[2], "!="); } break; case OpIsInf: if (!is_legacy()) GLSL_UFOP(isinf); else { // inf * 2 == inf by IEEE 754 rules, note this also applies to 0.0 // This is more reliable than checking if product with zero is NaN uint32_t result_type = ops[0]; uint32_t result_id = ops[1]; uint32_t operand = ops[2]; auto &type = get(result_type); std::string expr; if (type.vecsize > 1) { expr = type_to_glsl_constructor(type); expr += '('; for (uint32_t i = 0; i < type.vecsize; i++) { auto comp = to_extract_component_expression(operand, i); expr += join(comp, " != 0.0 && 2.0 * ", comp, " == ", comp); if (i + 1 < type.vecsize) expr += ", "; } expr += ')'; } else { // Register an extra read to force writing out a temporary auto oper = to_enclosed_expression(operand); track_expression_read(operand); expr += join(oper, " != 0.0 && 2.0 * ", oper, " == ", oper); } emit_op(result_type, result_id, expr, should_forward(operand)); inherit_expression_dependencies(result_id, operand); } break; case OpSNegate: if (implicit_integer_promotion || expression_type_id(ops[2]) != ops[0]) GLSL_UOP_CAST(-); else GLSL_UOP(-); break; case OpFNegate: GLSL_UOP(-); break; case OpIAdd: { // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts. auto type = get(ops[0]).basetype; GLSL_BOP_CAST(+, type); break; } case OpFAdd: GLSL_BOP(+); break; case OpISub: { auto type = get(ops[0]).basetype; GLSL_BOP_CAST(-, type); break; } case OpFSub: GLSL_BOP(-); break; case OpIMul: { auto type = get(ops[0]).basetype; GLSL_BOP_CAST(*, type); break; } case OpVectorTimesMatrix: case OpMatrixTimesVector: { // If the matrix needs transpose, just flip the multiply order. auto *e = maybe_get(ops[opcode == OpMatrixTimesVector ? 2 : 3]); if (e && e->need_transpose) { e->need_transpose = false; string expr; if (opcode == OpMatrixTimesVector) expr = join(to_enclosed_unpacked_expression(ops[3]), " * ", enclose_expression(to_unpacked_row_major_matrix_expression(ops[2]))); else expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ", to_enclosed_unpacked_expression(ops[2])); bool forward = should_forward(ops[2]) && should_forward(ops[3]); emit_op(ops[0], ops[1], expr, forward); e->need_transpose = true; inherit_expression_dependencies(ops[1], ops[2]); inherit_expression_dependencies(ops[1], ops[3]); } else GLSL_BOP(*); break; } case OpMatrixTimesMatrix: { auto *a = maybe_get(ops[2]); auto *b = maybe_get(ops[3]); // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed. // a^T * b^T = (b * a)^T. if (a && b && a->need_transpose && b->need_transpose) { a->need_transpose = false; b->need_transpose = false; auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ", enclose_expression(to_unpacked_row_major_matrix_expression(ops[2]))); bool forward = should_forward(ops[2]) && should_forward(ops[3]); auto &e = emit_op(ops[0], ops[1], expr, forward); e.need_transpose = true; a->need_transpose = true; b->need_transpose = true; inherit_expression_dependencies(ops[1], ops[2]); inherit_expression_dependencies(ops[1], ops[3]); } else GLSL_BOP(*); break; } case OpMatrixTimesScalar: { auto *a = maybe_get(ops[2]); // If the matrix need transpose, just mark the result as needing so. if (a && a->need_transpose) { a->need_transpose = false; auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])), " * ", to_enclosed_unpacked_expression(ops[3])); bool forward = should_forward(ops[2]) && should_forward(ops[3]); auto &e = emit_op(ops[0], ops[1], expr, forward); e.need_transpose = true; a->need_transpose = true; inherit_expression_dependencies(ops[1], ops[2]); inherit_expression_dependencies(ops[1], ops[3]); } else GLSL_BOP(*); break; } case OpFMul: case OpVectorTimesScalar: GLSL_BOP(*); break; case OpOuterProduct: if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00 { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t a = ops[2]; uint32_t b = ops[3]; auto &type = get(result_type); string expr = type_to_glsl_constructor(type); expr += "("; for (uint32_t col = 0; col < type.columns; col++) { expr += to_enclosed_expression(a); expr += " * "; expr += to_extract_component_expression(b, col); if (col + 1 < type.columns) expr += ", "; } expr += ")"; emit_op(result_type, id, expr, should_forward(a) && should_forward(b)); inherit_expression_dependencies(id, a); inherit_expression_dependencies(id, b); } else GLSL_BFOP(outerProduct); break; case OpDot: GLSL_BFOP(dot); break; case OpTranspose: if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00 { // transpose() is not available, so instead, flip need_transpose, // which can later be turned into an emulated transpose op by // convert_row_major_matrix(), if necessary. uint32_t result_type = ops[0]; uint32_t result_id = ops[1]; uint32_t input = ops[2]; // Force need_transpose to false temporarily to prevent // to_expression() from doing the transpose. bool need_transpose = false; auto *input_e = maybe_get(input); if (input_e) swap(need_transpose, input_e->need_transpose); bool forward = should_forward(input); auto &e = emit_op(result_type, result_id, to_expression(input), forward); e.need_transpose = !need_transpose; // Restore the old need_transpose flag. if (input_e) input_e->need_transpose = need_transpose; } else GLSL_UFOP(transpose); break; case OpSRem: { uint32_t result_type = ops[0]; uint32_t result_id = ops[1]; uint32_t op0 = ops[2]; uint32_t op1 = ops[3]; // Needs special handling. bool forward = should_forward(op0) && should_forward(op1); auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(", to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")"); if (implicit_integer_promotion) expr = join(type_to_glsl(get(result_type)), '(', expr, ')'); emit_op(result_type, result_id, expr, forward); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); break; } case OpSDiv: GLSL_BOP_CAST(/, int_type); break; case OpUDiv: GLSL_BOP_CAST(/, uint_type); break; case OpIAddCarry: case OpISubBorrow: { if (options.es && options.version < 310) SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310."); else if (!options.es && options.version < 400) SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400."); uint32_t result_type = ops[0]; uint32_t result_id = ops[1]; uint32_t op0 = ops[2]; uint32_t op1 = ops[3]; auto &type = get(result_type); emit_uninitialized_temporary_expression(result_type, result_id); const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow"; statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ");"); break; } case OpUMulExtended: case OpSMulExtended: { if (options.es && options.version < 310) SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310."); else if (!options.es && options.version < 400) SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000."); uint32_t result_type = ops[0]; uint32_t result_id = ops[1]; uint32_t op0 = ops[2]; uint32_t op1 = ops[3]; auto &type = get(result_type); emit_uninitialized_temporary_expression(result_type, result_id); const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended"; statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ", ", to_expression(result_id), ".", to_member_name(type, 0), ");"); break; } case OpFDiv: GLSL_BOP(/); break; case OpShiftRightLogical: GLSL_BOP_CAST(>>, uint_type); break; case OpShiftRightArithmetic: GLSL_BOP_CAST(>>, int_type); break; case OpShiftLeftLogical: { auto type = get(ops[0]).basetype; GLSL_BOP_CAST(<<, type); break; } case OpBitwiseOr: { auto type = get(ops[0]).basetype; GLSL_BOP_CAST(|, type); break; } case OpBitwiseXor: { auto type = get(ops[0]).basetype; GLSL_BOP_CAST(^, type); break; } case OpBitwiseAnd: { auto type = get(ops[0]).basetype; GLSL_BOP_CAST(&, type); break; } case OpNot: if (implicit_integer_promotion || expression_type_id(ops[2]) != ops[0]) GLSL_UOP_CAST(~); else GLSL_UOP(~); break; case OpUMod: GLSL_BOP_CAST(%, uint_type); break; case OpSMod: GLSL_BOP_CAST(%, int_type); break; case OpFMod: GLSL_BFOP(mod); break; case OpFRem: { uint32_t result_type = ops[0]; uint32_t result_id = ops[1]; uint32_t op0 = ops[2]; uint32_t op1 = ops[3]; // Needs special handling. bool forward = should_forward(op0) && should_forward(op1); std::string expr; if (!is_legacy()) { expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(", to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")"); } else { // Legacy GLSL has no trunc, emulate by casting to int and back auto &op0_type = expression_type(op0); auto via_type = op0_type; via_type.basetype = SPIRType::Int; expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", type_to_glsl(op0_type), "(", type_to_glsl(via_type), "(", to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), "))"); } emit_op(result_type, result_id, expr, forward); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); break; } // Relational case OpAny: GLSL_UFOP(any); break; case OpAll: GLSL_UFOP(all); break; case OpSelect: emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]); break; case OpLogicalOr: { // No vector variant in GLSL for logical OR. auto result_type = ops[0]; auto id = ops[1]; auto &type = get(result_type); if (type.vecsize > 1) emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false, SPIRType::Unknown); else GLSL_BOP(||); break; } case OpLogicalAnd: { // No vector variant in GLSL for logical AND. auto result_type = ops[0]; auto id = ops[1]; auto &type = get(result_type); if (type.vecsize > 1) emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false, SPIRType::Unknown); else GLSL_BOP(&&); break; } case OpLogicalNot: { auto &type = get(ops[0]); if (type.vecsize > 1) GLSL_UFOP(not ); else GLSL_UOP(!); break; } case OpIEqual: { if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP_CAST(equal, int_type); else GLSL_BOP_CAST(==, int_type); break; } case OpLogicalEqual: case OpFOrdEqual: { if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP(equal); else GLSL_BOP(==); break; } case OpINotEqual: { if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP_CAST(notEqual, int_type); else GLSL_BOP_CAST(!=, int_type); break; } case OpLogicalNotEqual: case OpFOrdNotEqual: case OpFUnordNotEqual: { // GLSL is fuzzy on what to do with ordered vs unordered not equal. // glslang started emitting UnorderedNotEqual some time ago to harmonize with IEEE, // but this means we have no easy way of implementing ordered not equal. if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP(notEqual); else GLSL_BOP(!=); break; } case OpUGreaterThan: case OpSGreaterThan: { auto type = opcode == OpUGreaterThan ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP_CAST(greaterThan, type); else GLSL_BOP_CAST(>, type); break; } case OpFOrdGreaterThan: { if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP(greaterThan); else GLSL_BOP(>); break; } case OpUGreaterThanEqual: case OpSGreaterThanEqual: { auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP_CAST(greaterThanEqual, type); else GLSL_BOP_CAST(>=, type); break; } case OpFOrdGreaterThanEqual: { if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP(greaterThanEqual); else GLSL_BOP(>=); break; } case OpULessThan: case OpSLessThan: { auto type = opcode == OpULessThan ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP_CAST(lessThan, type); else GLSL_BOP_CAST(<, type); break; } case OpFOrdLessThan: { if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP(lessThan); else GLSL_BOP(<); break; } case OpULessThanEqual: case OpSLessThanEqual: { auto type = opcode == OpULessThanEqual ? uint_type : int_type; if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP_CAST(lessThanEqual, type); else GLSL_BOP_CAST(<=, type); break; } case OpFOrdLessThanEqual: { if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP(lessThanEqual); else GLSL_BOP(<=); break; } // Conversion case OpSConvert: case OpConvertSToF: case OpUConvert: case OpConvertUToF: { auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type; uint32_t result_type = ops[0]; uint32_t id = ops[1]; auto &type = get(result_type); auto &arg_type = expression_type(ops[2]); auto func = type_to_glsl_constructor(type); if (arg_type.width < type.width || type_is_floating_point(type)) emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), input_type, type.basetype); else emit_unary_func_op(result_type, id, ops[2], func.c_str()); break; } case OpConvertFToU: case OpConvertFToS: { // Cast to expected arithmetic type, then potentially bitcast away to desired signedness. uint32_t result_type = ops[0]; uint32_t id = ops[1]; auto &type = get(result_type); auto expected_type = type; auto &float_type = expression_type(ops[2]); expected_type.basetype = opcode == OpConvertFToS ? to_signed_basetype(type.width) : to_unsigned_basetype(type.width); auto func = type_to_glsl_constructor(expected_type); emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), float_type.basetype, expected_type.basetype); break; } case OpFConvert: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; auto func = type_to_glsl_constructor(get(result_type)); emit_unary_func_op(result_type, id, ops[2], func.c_str()); break; } case OpBitcast: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t arg = ops[2]; if (!emit_complex_bitcast(result_type, id, arg)) { auto op = bitcast_glsl_op(get(result_type), expression_type(arg)); emit_unary_func_op(result_type, id, arg, op.c_str()); } break; } case OpQuantizeToF16: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t arg = ops[2]; string op; auto &type = get(result_type); switch (type.vecsize) { case 1: op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x"); break; case 2: op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))"); break; case 3: { auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))"); auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x"); op = join("vec3(", op0, ", ", op1, ")"); break; } case 4: { auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))"); auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))"); op = join("vec4(", op0, ", ", op1, ")"); break; } default: SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16."); } emit_op(result_type, id, op, should_forward(arg)); inherit_expression_dependencies(id, arg); break; } // Derivatives case OpDPdx: GLSL_UFOP(dFdx); if (is_legacy_es()) require_extension_internal("GL_OES_standard_derivatives"); register_control_dependent_expression(ops[1]); break; case OpDPdy: GLSL_UFOP(dFdy); if (is_legacy_es()) require_extension_internal("GL_OES_standard_derivatives"); register_control_dependent_expression(ops[1]); break; case OpDPdxFine: GLSL_UFOP(dFdxFine); if (options.es) { SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); } if (options.version < 450) require_extension_internal("GL_ARB_derivative_control"); register_control_dependent_expression(ops[1]); break; case OpDPdyFine: GLSL_UFOP(dFdyFine); if (options.es) { SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); } if (options.version < 450) require_extension_internal("GL_ARB_derivative_control"); register_control_dependent_expression(ops[1]); break; case OpDPdxCoarse: if (options.es) { SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); } GLSL_UFOP(dFdxCoarse); if (options.version < 450) require_extension_internal("GL_ARB_derivative_control"); register_control_dependent_expression(ops[1]); break; case OpDPdyCoarse: GLSL_UFOP(dFdyCoarse); if (options.es) { SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); } if (options.version < 450) require_extension_internal("GL_ARB_derivative_control"); register_control_dependent_expression(ops[1]); break; case OpFwidth: GLSL_UFOP(fwidth); if (is_legacy_es()) require_extension_internal("GL_OES_standard_derivatives"); register_control_dependent_expression(ops[1]); break; case OpFwidthCoarse: GLSL_UFOP(fwidthCoarse); if (options.es) { SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); } if (options.version < 450) require_extension_internal("GL_ARB_derivative_control"); register_control_dependent_expression(ops[1]); break; case OpFwidthFine: GLSL_UFOP(fwidthFine); if (options.es) { SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); } if (options.version < 450) require_extension_internal("GL_ARB_derivative_control"); register_control_dependent_expression(ops[1]); break; // Bitfield case OpBitFieldInsert: { emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "bitfieldInsert", SPIRType::Int); break; } case OpBitFieldSExtract: { emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", int_type, int_type, SPIRType::Int, SPIRType::Int); break; } case OpBitFieldUExtract: { emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", uint_type, uint_type, SPIRType::Int, SPIRType::Int); break; } case OpBitReverse: // BitReverse does not have issues with sign since result type must match input type. GLSL_UFOP(bitfieldReverse); break; case OpBitCount: { auto basetype = expression_type(ops[2]).basetype; emit_unary_func_op_cast(ops[0], ops[1], ops[2], "bitCount", basetype, int_type); break; } // Atomics case OpAtomicExchange: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t ptr = ops[2]; // Ignore semantics for now, probably only relevant to CL. uint32_t val = ops[5]; const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange"; emit_atomic_func_op(result_type, id, ptr, val, op); break; } case OpAtomicCompareExchange: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t ptr = ops[2]; uint32_t val = ops[6]; uint32_t comp = ops[7]; const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap"; emit_atomic_func_op(result_type, id, ptr, comp, val, op); break; } case OpAtomicLoad: { // In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out. // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL. auto &type = expression_type(ops[2]); forced_temporaries.insert(ops[1]); bool atomic_image = check_atomic_image(ops[2]); bool unsigned_type = (type.basetype == SPIRType::UInt) || (atomic_image && get(type.image.type).basetype == SPIRType::UInt); const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd"; const char *increment = unsigned_type ? "0u" : "0"; emit_op(ops[0], ops[1], join(op, "(", to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false); flush_all_atomic_capable_variables(); break; } case OpAtomicStore: { // In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result. // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL. uint32_t ptr = ops[0]; // Ignore semantics for now, probably only relevant to CL. uint32_t val = ops[3]; const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange"; statement(op, "(", to_non_uniform_aware_expression(ptr), ", ", to_expression(val), ");"); flush_all_atomic_capable_variables(); break; } case OpAtomicIIncrement: case OpAtomicIDecrement: { forced_temporaries.insert(ops[1]); auto &type = expression_type(ops[2]); if (type.storage == StorageClassAtomicCounter) { // Legacy GLSL stuff, not sure if this is relevant to support. if (opcode == OpAtomicIIncrement) GLSL_UFOP(atomicCounterIncrement); else GLSL_UFOP(atomicCounterDecrement); } else { bool atomic_image = check_atomic_image(ops[2]); bool unsigned_type = (type.basetype == SPIRType::UInt) || (atomic_image && get(type.image.type).basetype == SPIRType::UInt); const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd"; const char *increment = nullptr; if (opcode == OpAtomicIIncrement && unsigned_type) increment = "1u"; else if (opcode == OpAtomicIIncrement) increment = "1"; else if (unsigned_type) increment = "uint(-1)"; else increment = "-1"; emit_op(ops[0], ops[1], join(op, "(", to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false); } flush_all_atomic_capable_variables(); break; } case OpAtomicIAdd: case OpAtomicFAddEXT: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); break; } case OpAtomicISub: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; forced_temporaries.insert(ops[1]); auto expr = join(op, "(", to_non_uniform_aware_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")"); emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5])); flush_all_atomic_capable_variables(); break; } case OpAtomicSMin: case OpAtomicUMin: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin"; emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); break; } case OpAtomicSMax: case OpAtomicUMax: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax"; emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); break; } case OpAtomicAnd: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd"; emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); break; } case OpAtomicOr: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr"; emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); break; } case OpAtomicXor: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor"; emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); break; } // Geometry shaders case OpEmitVertex: statement("EmitVertex();"); break; case OpEndPrimitive: statement("EndPrimitive();"); break; case OpEmitStreamVertex: { if (options.es) SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES."); else if (!options.es && options.version < 400) SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400."); auto stream_expr = to_expression(ops[0]); if (expression_type(ops[0]).basetype != SPIRType::Int) stream_expr = join("int(", stream_expr, ")"); statement("EmitStreamVertex(", stream_expr, ");"); break; } case OpEndStreamPrimitive: { if (options.es) SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES."); else if (!options.es && options.version < 400) SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400."); auto stream_expr = to_expression(ops[0]); if (expression_type(ops[0]).basetype != SPIRType::Int) stream_expr = join("int(", stream_expr, ")"); statement("EndStreamPrimitive(", stream_expr, ");"); break; } // Textures case OpImageSampleExplicitLod: case OpImageSampleProjExplicitLod: case OpImageSampleDrefExplicitLod: case OpImageSampleProjDrefExplicitLod: case OpImageSampleImplicitLod: case OpImageSampleProjImplicitLod: case OpImageSampleDrefImplicitLod: case OpImageSampleProjDrefImplicitLod: case OpImageFetch: case OpImageGather: case OpImageDrefGather: // Gets a bit hairy, so move this to a separate instruction. emit_texture_op(instruction, false); break; case OpImageSparseSampleExplicitLod: case OpImageSparseSampleProjExplicitLod: case OpImageSparseSampleDrefExplicitLod: case OpImageSparseSampleProjDrefExplicitLod: case OpImageSparseSampleImplicitLod: case OpImageSparseSampleProjImplicitLod: case OpImageSparseSampleDrefImplicitLod: case OpImageSparseSampleProjDrefImplicitLod: case OpImageSparseFetch: case OpImageSparseGather: case OpImageSparseDrefGather: // Gets a bit hairy, so move this to a separate instruction. emit_texture_op(instruction, true); break; case OpImageSparseTexelsResident: if (options.es) SPIRV_CROSS_THROW("Sparse feedback is not supported in GLSL."); require_extension_internal("GL_ARB_sparse_texture2"); emit_unary_func_op_cast(ops[0], ops[1], ops[2], "sparseTexelsResidentARB", int_type, SPIRType::Boolean); break; case OpImage: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; // Suppress usage tracking. auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true); // When using the image, we need to know which variable it is actually loaded from. auto *var = maybe_get_backing_variable(ops[2]); e.loaded_from = var ? var->self : ID(0); break; } case OpImageQueryLod: { const char *op = nullptr; if (!options.es && options.version < 400) { require_extension_internal("GL_ARB_texture_query_lod"); // For some reason, the ARB spec is all-caps. op = "textureQueryLOD"; } else if (options.es) { if (options.version < 300) SPIRV_CROSS_THROW("textureQueryLod not supported in legacy ES"); require_extension_internal("GL_EXT_texture_query_lod"); op = "textureQueryLOD"; } else op = "textureQueryLod"; auto sampler_expr = to_expression(ops[2]); if (has_decoration(ops[2], DecorationNonUniform)) { if (maybe_get_backing_variable(ops[2])) convert_non_uniform_expression(sampler_expr, ops[2]); else if (*backend.nonuniform_qualifier != '\0') sampler_expr = join(backend.nonuniform_qualifier, "(", sampler_expr, ")"); } bool forward = should_forward(ops[3]); emit_op(ops[0], ops[1], join(op, "(", sampler_expr, ", ", to_unpacked_expression(ops[3]), ")"), forward); inherit_expression_dependencies(ops[1], ops[2]); inherit_expression_dependencies(ops[1], ops[3]); register_control_dependent_expression(ops[1]); break; } case OpImageQueryLevels: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; if (!options.es && options.version < 430) require_extension_internal("GL_ARB_texture_query_levels"); if (options.es) SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile."); auto expr = join("textureQueryLevels(", convert_separate_image_to_expression(ops[2]), ")"); auto &restype = get(ops[0]); expr = bitcast_expression(restype, SPIRType::Int, expr); emit_op(result_type, id, expr, true); break; } case OpImageQuerySamples: { auto &type = expression_type(ops[2]); uint32_t result_type = ops[0]; uint32_t id = ops[1]; if (options.es) SPIRV_CROSS_THROW("textureSamples and imageSamples not supported in ES profile."); else if (options.version < 450) require_extension_internal("GL_ARB_texture_query_samples"); string expr; if (type.image.sampled == 2) expr = join("imageSamples(", to_non_uniform_aware_expression(ops[2]), ")"); else expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")"); auto &restype = get(ops[0]); expr = bitcast_expression(restype, SPIRType::Int, expr); emit_op(result_type, id, expr, true); break; } case OpSampledImage: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_sampled_image_op(result_type, id, ops[2], ops[3]); inherit_expression_dependencies(id, ops[2]); inherit_expression_dependencies(id, ops[3]); break; } case OpImageQuerySizeLod: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t img = ops[2]; auto &type = expression_type(img); auto &imgtype = get(type.self); std::string fname = "textureSize"; if (is_legacy_desktop()) { fname = legacy_tex_op(fname, imgtype, img); } else if (is_legacy_es()) SPIRV_CROSS_THROW("textureSize is not supported in ESSL 100."); auto expr = join(fname, "(", convert_separate_image_to_expression(img), ", ", bitcast_expression(SPIRType::Int, ops[3]), ")"); // ES needs to emulate 1D images as 2D. if (type.image.dim == Dim1D && options.es) expr = join(expr, ".x"); auto &restype = get(ops[0]); expr = bitcast_expression(restype, SPIRType::Int, expr); emit_op(result_type, id, expr, true); break; } // Image load/store case OpImageRead: case OpImageSparseRead: { // We added Nonreadable speculatively to the OpImage variable due to glslangValidator // not adding the proper qualifiers. // If it turns out we need to read the image after all, remove the qualifier and recompile. auto *var = maybe_get_backing_variable(ops[2]); if (var) { auto &flags = get_decoration_bitset(var->self); if (flags.get(DecorationNonReadable)) { unset_decoration(var->self, DecorationNonReadable); force_recompile(); } } uint32_t result_type = ops[0]; uint32_t id = ops[1]; bool pure; string imgexpr; auto &type = expression_type(ops[2]); if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code { if (type.image.ms) SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible."); auto itr = find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; }); if (itr == end(pls_inputs)) { // For non-PLS inputs, we rely on subpass type remapping information to get it right // since ImageRead always returns 4-component vectors and the backing type is opaque. if (!var->remapped_components) SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly."); imgexpr = remap_swizzle(get(result_type), var->remapped_components, to_expression(ops[2])); } else { // PLS input could have different number of components than what the SPIR expects, swizzle to // the appropriate vector size. uint32_t components = pls_format_to_components(itr->format); imgexpr = remap_swizzle(get(result_type), components, to_expression(ops[2])); } pure = true; } else if (type.image.dim == DimSubpassData) { if (var && subpass_input_is_framebuffer_fetch(var->self)) { imgexpr = to_expression(var->self); } else if (options.vulkan_semantics) { // With Vulkan semantics, use the proper Vulkan GLSL construct. if (type.image.ms) { uint32_t operands = ops[4]; if (operands != ImageOperandsSampleMask || length != 6) SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " "operand mask was used."); uint32_t samples = ops[5]; imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ", ", to_expression(samples), ")"); } else imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ")"); } else { if (type.image.ms) { uint32_t operands = ops[4]; if (operands != ImageOperandsSampleMask || length != 6) SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " "operand mask was used."); uint32_t samples = ops[5]; imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ", to_expression(samples), ")"); } else { // Implement subpass loads via texture barrier style sampling. imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)"); } } imgexpr = remap_swizzle(get(result_type), 4, imgexpr); pure = true; } else { bool sparse = opcode == OpImageSparseRead; uint32_t sparse_code_id = 0; uint32_t sparse_texel_id = 0; if (sparse) emit_sparse_feedback_temporaries(ops[0], ops[1], sparse_code_id, sparse_texel_id); // imageLoad only accepts int coords, not uint. auto coord_expr = to_expression(ops[3]); auto target_coord_type = expression_type(ops[3]); target_coord_type.basetype = SPIRType::Int; coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr); // ES needs to emulate 1D images as 2D. if (type.image.dim == Dim1D && options.es) coord_expr = join("ivec2(", coord_expr, ", 0)"); // Plain image load/store. if (sparse) { if (type.image.ms) { uint32_t operands = ops[4]; if (operands != ImageOperandsSampleMask || length != 6) SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " "operand mask was used."); uint32_t samples = ops[5]; statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ", ", to_expression(sparse_texel_id), ");"); } else { statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ", ", to_expression(sparse_texel_id), ");"); } imgexpr = join(type_to_glsl(get(result_type)), "(", to_expression(sparse_code_id), ", ", to_expression(sparse_texel_id), ")"); } else { if (type.image.ms) { uint32_t operands = ops[4]; if (operands != ImageOperandsSampleMask || length != 6) SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " "operand mask was used."); uint32_t samples = ops[5]; imgexpr = join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")"); } else imgexpr = join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ")"); } if (!sparse) imgexpr = remap_swizzle(get(result_type), 4, imgexpr); pure = false; } if (var) { bool forward = forced_temporaries.find(id) == end(forced_temporaries); auto &e = emit_op(result_type, id, imgexpr, forward); // We only need to track dependencies if we're reading from image load/store. if (!pure) { e.loaded_from = var->self; if (forward) var->dependees.push_back(id); } } else emit_op(result_type, id, imgexpr, false); inherit_expression_dependencies(id, ops[2]); if (type.image.ms) inherit_expression_dependencies(id, ops[5]); break; } case OpImageTexelPointer: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; auto coord_expr = to_expression(ops[3]); auto target_coord_type = expression_type(ops[3]); target_coord_type.basetype = SPIRType::Int; coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr); auto expr = join(to_expression(ops[2]), ", ", coord_expr); auto &e = set(id, expr, result_type, true); // When using the pointer, we need to know which variable it is actually loaded from. auto *var = maybe_get_backing_variable(ops[2]); e.loaded_from = var ? var->self : ID(0); inherit_expression_dependencies(id, ops[3]); break; } case OpImageWrite: { // We added Nonwritable speculatively to the OpImage variable due to glslangValidator // not adding the proper qualifiers. // If it turns out we need to write to the image after all, remove the qualifier and recompile. auto *var = maybe_get_backing_variable(ops[0]); if (var) { if (has_decoration(var->self, DecorationNonWritable)) { unset_decoration(var->self, DecorationNonWritable); force_recompile(); } } auto &type = expression_type(ops[0]); auto &value_type = expression_type(ops[2]); auto store_type = value_type; store_type.vecsize = 4; // imageStore only accepts int coords, not uint. auto coord_expr = to_expression(ops[1]); auto target_coord_type = expression_type(ops[1]); target_coord_type.basetype = SPIRType::Int; coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr); // ES needs to emulate 1D images as 2D. if (type.image.dim == Dim1D && options.es) coord_expr = join("ivec2(", coord_expr, ", 0)"); if (type.image.ms) { uint32_t operands = ops[3]; if (operands != ImageOperandsSampleMask || length != 5) SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used."); uint32_t samples = ops[4]; statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ", remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); } else statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ", remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); if (var && variable_storage_is_aliased(*var)) flush_all_aliased_variables(); break; } case OpImageQuerySize: { auto &type = expression_type(ops[2]); uint32_t result_type = ops[0]; uint32_t id = ops[1]; if (type.basetype == SPIRType::Image) { string expr; if (type.image.sampled == 2) { if (!options.es && options.version < 430) require_extension_internal("GL_ARB_shader_image_size"); else if (options.es && options.version < 310) SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize."); // The size of an image is always constant. expr = join("imageSize(", to_non_uniform_aware_expression(ops[2]), ")"); } else { // This path is hit for samplerBuffers and multisampled images which do not have LOD. std::string fname = "textureSize"; if (is_legacy()) { auto &imgtype = get(type.self); fname = legacy_tex_op(fname, imgtype, ops[2]); } expr = join(fname, "(", convert_separate_image_to_expression(ops[2]), ")"); } auto &restype = get(ops[0]); expr = bitcast_expression(restype, SPIRType::Int, expr); emit_op(result_type, id, expr, true); } else SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize."); break; } // Compute case OpControlBarrier: case OpMemoryBarrier: { uint32_t execution_scope = 0; uint32_t memory; uint32_t semantics; if (opcode == OpMemoryBarrier) { memory = evaluate_constant_u32(ops[0]); semantics = evaluate_constant_u32(ops[1]); } else { execution_scope = evaluate_constant_u32(ops[0]); memory = evaluate_constant_u32(ops[1]); semantics = evaluate_constant_u32(ops[2]); } if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup) { // OpControlBarrier with ScopeSubgroup is subgroupBarrier() if (opcode != OpControlBarrier) { request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMemBarrier); } else { request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBarrier); } } if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl) { // Control shaders only have barriers, and it implies memory barriers. if (opcode == OpControlBarrier) statement("barrier();"); break; } // We only care about these flags, acquire/release and friends are not relevant to GLSL. semantics = mask_relevant_memory_semantics(semantics); if (opcode == OpMemoryBarrier) { // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier // does what we need, so we avoid redundant barriers. const Instruction *next = get_next_instruction_in_block(instruction); if (next && next->op == OpControlBarrier) { auto *next_ops = stream(*next); uint32_t next_memory = evaluate_constant_u32(next_ops[1]); uint32_t next_semantics = evaluate_constant_u32(next_ops[2]); next_semantics = mask_relevant_memory_semantics(next_semantics); bool memory_scope_covered = false; if (next_memory == memory) memory_scope_covered = true; else if (next_semantics == MemorySemanticsWorkgroupMemoryMask) { // If we only care about workgroup memory, either Device or Workgroup scope is fine, // scope does not have to match. if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) && (memory == ScopeDevice || memory == ScopeWorkgroup)) { memory_scope_covered = true; } } else if (memory == ScopeWorkgroup && next_memory == ScopeDevice) { // The control barrier has device scope, but the memory barrier just has workgroup scope. memory_scope_covered = true; } // If we have the same memory scope, and all memory types are covered, we're good. if (memory_scope_covered && (semantics & next_semantics) == semantics) break; } } // We are synchronizing some memory or syncing execution, // so we cannot forward any loads beyond the memory barrier. if (semantics || opcode == OpControlBarrier) { assert(current_emitting_block); flush_control_dependent_expressions(current_emitting_block->self); flush_all_active_variables(); } if (memory == ScopeWorkgroup) // Only need to consider memory within a group { if (semantics == MemorySemanticsWorkgroupMemoryMask) { // OpControlBarrier implies a memory barrier for shared memory as well. bool implies_shared_barrier = opcode == OpControlBarrier && execution_scope == ScopeWorkgroup; if (!implies_shared_barrier) statement("memoryBarrierShared();"); } else if (semantics != 0) statement("groupMemoryBarrier();"); } else if (memory == ScopeSubgroup) { const uint32_t all_barriers = MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask; if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask)) { // These are not relevant for GLSL, but assume it means memoryBarrier(). // memoryBarrier() does everything, so no need to test anything else. statement("subgroupMemoryBarrier();"); } else if ((semantics & all_barriers) == all_barriers) { // Short-hand instead of emitting 3 barriers. statement("subgroupMemoryBarrier();"); } else { // Pick out individual barriers. if (semantics & MemorySemanticsWorkgroupMemoryMask) statement("subgroupMemoryBarrierShared();"); if (semantics & MemorySemanticsUniformMemoryMask) statement("subgroupMemoryBarrierBuffer();"); if (semantics & MemorySemanticsImageMemoryMask) statement("subgroupMemoryBarrierImage();"); } } else { const uint32_t all_barriers = MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask; if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask)) { // These are not relevant for GLSL, but assume it means memoryBarrier(). // memoryBarrier() does everything, so no need to test anything else. statement("memoryBarrier();"); } else if ((semantics & all_barriers) == all_barriers) { // Short-hand instead of emitting 4 barriers. statement("memoryBarrier();"); } else { // Pick out individual barriers. if (semantics & MemorySemanticsWorkgroupMemoryMask) statement("memoryBarrierShared();"); if (semantics & MemorySemanticsUniformMemoryMask) statement("memoryBarrierBuffer();"); if (semantics & MemorySemanticsImageMemoryMask) statement("memoryBarrierImage();"); } } if (opcode == OpControlBarrier) { if (execution_scope == ScopeSubgroup) statement("subgroupBarrier();"); else statement("barrier();"); } break; } case OpExtInst: { uint32_t extension_set = ops[2]; auto ext = get(extension_set).ext; if (ext == SPIRExtension::GLSL) { emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } else if (ext == SPIRExtension::SPV_AMD_shader_ballot) { emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } else if (ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter) { emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } else if (ext == SPIRExtension::SPV_AMD_shader_trinary_minmax) { emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } else if (ext == SPIRExtension::SPV_AMD_gcn_shader) { emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } else if (ext == SPIRExtension::SPV_debug_info || ext == SPIRExtension::NonSemanticShaderDebugInfo || ext == SPIRExtension::NonSemanticGeneric) { break; // Ignore SPIR-V debug information extended instructions. } else if (ext == SPIRExtension::NonSemanticDebugPrintf) { // Operation 1 is printf. if (ops[3] == 1) { if (!options.vulkan_semantics) SPIRV_CROSS_THROW("Debug printf is only supported in Vulkan GLSL.\n"); require_extension_internal("GL_EXT_debug_printf"); auto &format_string = get(ops[4]).str; string expr = join("debugPrintfEXT(\"", format_string, "\""); for (uint32_t i = 5; i < length; i++) { expr += ", "; expr += to_expression(ops[i]); } statement(expr, ");"); } } else { statement("// unimplemented ext op ", instruction.op); break; } break; } // Legacy sub-group stuff ... case OpSubgroupBallotKHR: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; string expr; expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)"); emit_op(result_type, id, expr, should_forward(ops[2])); require_extension_internal("GL_ARB_shader_ballot"); inherit_expression_dependencies(id, ops[2]); register_control_dependent_expression(ops[1]); break; } case OpSubgroupFirstInvocationKHR: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB"); require_extension_internal("GL_ARB_shader_ballot"); register_control_dependent_expression(ops[1]); break; } case OpSubgroupReadInvocationKHR: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB"); require_extension_internal("GL_ARB_shader_ballot"); register_control_dependent_expression(ops[1]); break; } case OpSubgroupAllKHR: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB"); require_extension_internal("GL_ARB_shader_group_vote"); register_control_dependent_expression(ops[1]); break; } case OpSubgroupAnyKHR: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB"); require_extension_internal("GL_ARB_shader_group_vote"); register_control_dependent_expression(ops[1]); break; } case OpSubgroupAllEqualKHR: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB"); require_extension_internal("GL_ARB_shader_group_vote"); register_control_dependent_expression(ops[1]); break; } case OpGroupIAddNonUniformAMD: case OpGroupFAddNonUniformAMD: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD"); require_extension_internal("GL_AMD_shader_ballot"); register_control_dependent_expression(ops[1]); break; } case OpGroupFMinNonUniformAMD: case OpGroupUMinNonUniformAMD: case OpGroupSMinNonUniformAMD: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD"); require_extension_internal("GL_AMD_shader_ballot"); register_control_dependent_expression(ops[1]); break; } case OpGroupFMaxNonUniformAMD: case OpGroupUMaxNonUniformAMD: case OpGroupSMaxNonUniformAMD: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD"); require_extension_internal("GL_AMD_shader_ballot"); register_control_dependent_expression(ops[1]); break; } case OpFragmentMaskFetchAMD: { auto &type = expression_type(ops[2]); uint32_t result_type = ops[0]; uint32_t id = ops[1]; if (type.image.dim == spv::DimSubpassData) { emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD"); } else { emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD"); } require_extension_internal("GL_AMD_shader_fragment_mask"); break; } case OpFragmentFetchAMD: { auto &type = expression_type(ops[2]); uint32_t result_type = ops[0]; uint32_t id = ops[1]; if (type.image.dim == spv::DimSubpassData) { emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD"); } else { emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD"); } require_extension_internal("GL_AMD_shader_fragment_mask"); break; } // Vulkan 1.1 sub-group stuff ... case OpGroupNonUniformElect: case OpGroupNonUniformBroadcast: case OpGroupNonUniformBroadcastFirst: case OpGroupNonUniformBallot: case OpGroupNonUniformInverseBallot: case OpGroupNonUniformBallotBitExtract: case OpGroupNonUniformBallotBitCount: case OpGroupNonUniformBallotFindLSB: case OpGroupNonUniformBallotFindMSB: case OpGroupNonUniformShuffle: case OpGroupNonUniformShuffleXor: case OpGroupNonUniformShuffleUp: case OpGroupNonUniformShuffleDown: case OpGroupNonUniformAll: case OpGroupNonUniformAny: case OpGroupNonUniformAllEqual: case OpGroupNonUniformFAdd: case OpGroupNonUniformIAdd: case OpGroupNonUniformFMul: case OpGroupNonUniformIMul: case OpGroupNonUniformFMin: case OpGroupNonUniformFMax: case OpGroupNonUniformSMin: case OpGroupNonUniformSMax: case OpGroupNonUniformUMin: case OpGroupNonUniformUMax: case OpGroupNonUniformBitwiseAnd: case OpGroupNonUniformBitwiseOr: case OpGroupNonUniformBitwiseXor: case OpGroupNonUniformLogicalAnd: case OpGroupNonUniformLogicalOr: case OpGroupNonUniformLogicalXor: case OpGroupNonUniformQuadSwap: case OpGroupNonUniformQuadBroadcast: emit_subgroup_op(instruction); break; case OpFUnordEqual: case OpFUnordLessThan: case OpFUnordGreaterThan: case OpFUnordLessThanEqual: case OpFUnordGreaterThanEqual: { // GLSL doesn't specify if floating point comparisons are ordered or unordered, // but glslang always emits ordered floating point compares for GLSL. // To get unordered compares, we can test the opposite thing and invert the result. // This way, we force true when there is any NaN present. uint32_t op0 = ops[2]; uint32_t op1 = ops[3]; string expr; if (expression_type(op0).vecsize > 1) { const char *comp_op = nullptr; switch (opcode) { case OpFUnordEqual: comp_op = "notEqual"; break; case OpFUnordLessThan: comp_op = "greaterThanEqual"; break; case OpFUnordLessThanEqual: comp_op = "greaterThan"; break; case OpFUnordGreaterThan: comp_op = "lessThanEqual"; break; case OpFUnordGreaterThanEqual: comp_op = "lessThan"; break; default: assert(0); break; } expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))"); } else { const char *comp_op = nullptr; switch (opcode) { case OpFUnordEqual: comp_op = " != "; break; case OpFUnordLessThan: comp_op = " >= "; break; case OpFUnordLessThanEqual: comp_op = " > "; break; case OpFUnordGreaterThan: comp_op = " <= "; break; case OpFUnordGreaterThanEqual: comp_op = " < "; break; default: assert(0); break; } expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")"); } emit_op(ops[0], ops[1], expr, should_forward(op0) && should_forward(op1)); inherit_expression_dependencies(ops[1], op0); inherit_expression_dependencies(ops[1], op1); break; } case OpReportIntersectionKHR: // NV is same opcode. forced_temporaries.insert(ops[1]); if (ray_tracing_is_khr) GLSL_BFOP(reportIntersectionEXT); else GLSL_BFOP(reportIntersectionNV); flush_control_dependent_expressions(current_emitting_block->self); break; case OpIgnoreIntersectionNV: // KHR variant is a terminator. statement("ignoreIntersectionNV();"); flush_control_dependent_expressions(current_emitting_block->self); break; case OpTerminateRayNV: // KHR variant is a terminator. statement("terminateRayNV();"); flush_control_dependent_expressions(current_emitting_block->self); break; case OpTraceNV: statement("traceNV(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ", to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ", to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ", to_expression(ops[9]), ", ", to_expression(ops[10]), ");"); flush_control_dependent_expressions(current_emitting_block->self); break; case OpTraceRayKHR: if (!has_decoration(ops[10], DecorationLocation)) SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR."); statement("traceRayEXT(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ", to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ", to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ", to_expression(ops[9]), ", ", get_decoration(ops[10], DecorationLocation), ");"); flush_control_dependent_expressions(current_emitting_block->self); break; case OpExecuteCallableNV: statement("executeCallableNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");"); flush_control_dependent_expressions(current_emitting_block->self); break; case OpExecuteCallableKHR: if (!has_decoration(ops[1], DecorationLocation)) SPIRV_CROSS_THROW("A memory declaration object must be used in ExecuteCallableKHR."); statement("executeCallableEXT(", to_expression(ops[0]), ", ", get_decoration(ops[1], DecorationLocation), ");"); flush_control_dependent_expressions(current_emitting_block->self); break; // Don't bother forwarding temporaries. Avoids having to test expression invalidation with ray query objects. case OpRayQueryInitializeKHR: flush_variable_declaration(ops[0]); statement("rayQueryInitializeEXT(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ", to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ", to_expression(ops[6]), ", ", to_expression(ops[7]), ");"); break; case OpRayQueryProceedKHR: flush_variable_declaration(ops[0]); emit_op(ops[0], ops[1], join("rayQueryProceedEXT(", to_expression(ops[2]), ")"), false); break; case OpRayQueryTerminateKHR: flush_variable_declaration(ops[0]); statement("rayQueryTerminateEXT(", to_expression(ops[0]), ");"); break; case OpRayQueryGenerateIntersectionKHR: flush_variable_declaration(ops[0]); statement("rayQueryGenerateIntersectionEXT(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");"); break; case OpRayQueryConfirmIntersectionKHR: flush_variable_declaration(ops[0]); statement("rayQueryConfirmIntersectionEXT(", to_expression(ops[0]), ");"); break; #define GLSL_RAY_QUERY_GET_OP(op) \ case OpRayQueryGet##op##KHR: \ flush_variable_declaration(ops[2]); \ emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ")"), false); \ break #define GLSL_RAY_QUERY_GET_OP2(op) \ case OpRayQueryGet##op##KHR: \ flush_variable_declaration(ops[2]); \ emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), "))"), false); \ break GLSL_RAY_QUERY_GET_OP(RayTMin); GLSL_RAY_QUERY_GET_OP(RayFlags); GLSL_RAY_QUERY_GET_OP(WorldRayOrigin); GLSL_RAY_QUERY_GET_OP(WorldRayDirection); GLSL_RAY_QUERY_GET_OP(IntersectionCandidateAABBOpaque); GLSL_RAY_QUERY_GET_OP2(IntersectionType); GLSL_RAY_QUERY_GET_OP2(IntersectionT); GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceCustomIndex); GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceId); GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceShaderBindingTableRecordOffset); GLSL_RAY_QUERY_GET_OP2(IntersectionGeometryIndex); GLSL_RAY_QUERY_GET_OP2(IntersectionPrimitiveIndex); GLSL_RAY_QUERY_GET_OP2(IntersectionBarycentrics); GLSL_RAY_QUERY_GET_OP2(IntersectionFrontFace); GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayDirection); GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayOrigin); GLSL_RAY_QUERY_GET_OP2(IntersectionObjectToWorld); GLSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject); #undef GLSL_RAY_QUERY_GET_OP #undef GLSL_RAY_QUERY_GET_OP2 case OpConvertUToAccelerationStructureKHR: { require_extension_internal("GL_EXT_ray_tracing"); bool elide_temporary = should_forward(ops[2]) && forced_temporaries.count(ops[1]) == 0 && !hoisted_temporaries.count(ops[1]); if (elide_temporary) { GLSL_UFOP(accelerationStructureEXT); } else { // Force this path in subsequent iterations. forced_temporaries.insert(ops[1]); // We cannot declare a temporary acceleration structure in GLSL. // If we get to this point, we'll have to emit a temporary uvec2, // and cast to RTAS on demand. statement(declare_temporary(expression_type_id(ops[2]), ops[1]), to_unpacked_expression(ops[2]), ";"); // Use raw SPIRExpression interface to block all usage tracking. set(ops[1], join("accelerationStructureEXT(", to_name(ops[1]), ")"), ops[0], true); } break; } case OpConvertUToPtr: { auto &type = get(ops[0]); if (type.storage != StorageClassPhysicalStorageBufferEXT) SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr."); auto &in_type = expression_type(ops[2]); if (in_type.vecsize == 2) require_extension_internal("GL_EXT_buffer_reference_uvec2"); auto op = type_to_glsl(type); emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str()); break; } case OpConvertPtrToU: { auto &type = get(ops[0]); auto &ptr_type = expression_type(ops[2]); if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT) SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU."); if (type.vecsize == 2) require_extension_internal("GL_EXT_buffer_reference_uvec2"); auto op = type_to_glsl(type); emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str()); break; } case OpUndef: // Undefined value has been declared. break; case OpLine: { emit_line_directive(ops[0], ops[1]); break; } case OpNoLine: break; case OpDemoteToHelperInvocationEXT: if (!options.vulkan_semantics) SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL."); require_extension_internal("GL_EXT_demote_to_helper_invocation"); statement(backend.demote_literal, ";"); break; case OpIsHelperInvocationEXT: if (!options.vulkan_semantics) SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL."); require_extension_internal("GL_EXT_demote_to_helper_invocation"); // Helper lane state with demote is volatile by nature. // Do not forward this. emit_op(ops[0], ops[1], "helperInvocationEXT()", false); break; case OpBeginInvocationInterlockEXT: // If the interlock is complex, we emit this elsewhere. if (!interlocked_is_complex) { statement("SPIRV_Cross_beginInvocationInterlock();"); flush_all_active_variables(); // Make sure forwarding doesn't propagate outside interlock region. } break; case OpEndInvocationInterlockEXT: // If the interlock is complex, we emit this elsewhere. if (!interlocked_is_complex) { statement("SPIRV_Cross_endInvocationInterlock();"); flush_all_active_variables(); // Make sure forwarding doesn't propagate outside interlock region. } break; case OpSetMeshOutputsEXT: statement("SetMeshOutputsEXT(", to_unpacked_expression(ops[0]), ", ", to_unpacked_expression(ops[1]), ");"); break; case OpReadClockKHR: { auto &type = get(ops[0]); auto scope = static_cast(evaluate_constant_u32(ops[2])); const char *op = nullptr; // Forwarding clock statements leads to a scenario where an SSA value can take on different // values every time it's evaluated. Block any forwarding attempt. // We also might want to invalidate all expressions to function as a sort of optimization // barrier, but might be overkill for now. if (scope == ScopeDevice) { require_extension_internal("GL_EXT_shader_realtime_clock"); if (type.basetype == SPIRType::BaseType::UInt64) op = "clockRealtimeEXT()"; else if (type.basetype == SPIRType::BaseType::UInt && type.vecsize == 2) op = "clockRealtime2x32EXT()"; else SPIRV_CROSS_THROW("Unsupported result type for OpReadClockKHR opcode."); } else if (scope == ScopeSubgroup) { require_extension_internal("GL_ARB_shader_clock"); if (type.basetype == SPIRType::BaseType::UInt64) op = "clockARB()"; else if (type.basetype == SPIRType::BaseType::UInt && type.vecsize == 2) op = "clock2x32ARB()"; else SPIRV_CROSS_THROW("Unsupported result type for OpReadClockKHR opcode."); } else SPIRV_CROSS_THROW("Unsupported scope for OpReadClockKHR opcode."); emit_op(ops[0], ops[1], op, false); break; } default: statement("// unimplemented op ", instruction.op); break; } } // Appends function arguments, mapped from global variables, beyond the specified arg index. // This is used when a function call uses fewer arguments than the function defines. // This situation may occur if the function signature has been dynamically modified to // extract global variables referenced from within the function, and convert them to // function arguments. This is necessary for shader languages that do not support global // access to shader input content from within a function (eg. Metal). Each additional // function args uses the name of the global variable. Function nesting will modify the // functions and function calls all the way up the nesting chain. void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector &arglist) { auto &args = func.arguments; uint32_t arg_cnt = uint32_t(args.size()); for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++) { auto &arg = args[arg_idx]; assert(arg.alias_global_variable); // If the underlying variable needs to be declared // (ie. a local variable with deferred declaration), do so now. uint32_t var_id = get(arg.id).basevariable; if (var_id) flush_variable_declaration(var_id); arglist.push_back(to_func_call_arg(arg, arg.id)); } } string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index) { if (type.type_alias != TypeID(0) && !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) { return to_member_name(get(type.type_alias), index); } auto &memb = ir.meta[type.self].members; if (index < memb.size() && !memb[index].alias.empty()) return memb[index].alias; else return join("_m", index); } string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool) { return join(".", to_member_name(type, index)); } string CompilerGLSL::to_multi_member_reference(const SPIRType &type, const SmallVector &indices) { string ret; auto *member_type = &type; for (auto &index : indices) { ret += join(".", to_member_name(*member_type, index)); member_type = &get(member_type->member_types[index]); } return ret; } void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index) { auto &memb = ir.meta[type.self].members; if (index < memb.size() && !memb[index].alias.empty()) { auto &name = memb[index].alias; if (name.empty()) return; ParsedIR::sanitize_identifier(name, true, true); update_name_cache(type.member_name_cache, name); } } // Checks whether the ID is a row_major matrix that requires conversion before use bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id) { // Natively supported row-major matrices do not need to be converted. // Legacy targets do not support row major. if (backend.native_row_major_matrix && !is_legacy()) return false; auto *e = maybe_get(id); if (e) return e->need_transpose; else return has_decoration(id, DecorationRowMajor); } // Checks whether the member is a row_major matrix that requires conversion before use bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) { // Natively supported row-major matrices do not need to be converted. if (backend.native_row_major_matrix && !is_legacy()) return false; // Non-matrix or column-major matrix types do not need to be converted. if (!has_member_decoration(type.self, index, DecorationRowMajor)) return false; // Only square row-major matrices can be converted at this time. // Converting non-square matrices will require defining custom GLSL function that // swaps matrix elements while retaining the original dimensional form of the matrix. const auto mbr_type = get(type.member_types[index]); if (mbr_type.columns != mbr_type.vecsize) SPIRV_CROSS_THROW("Row-major matrices must be square on this platform."); return true; } // Checks if we need to remap physical type IDs when declaring the type in a buffer. bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const { return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID); } // Checks whether the member is in packed data type, that might need to be unpacked. bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const { return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypePacked); } // Wraps the expression string in a function call that converts the // row_major matrix result of the expression to a column_major matrix. // Base implementation uses the standard library transpose() function. // Subclasses may override to use a different function. string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */, bool /*is_packed*/, bool relaxed) { strip_enclosed_expression(exp_str); if (!is_matrix(exp_type)) { auto column_index = exp_str.find_last_of('['); if (column_index == string::npos) return exp_str; auto column_expr = exp_str.substr(column_index); exp_str.resize(column_index); auto transposed_expr = type_to_glsl_constructor(exp_type) + "("; // Loading a column from a row-major matrix. Unroll the load. for (uint32_t c = 0; c < exp_type.vecsize; c++) { transposed_expr += join(exp_str, '[', c, ']', column_expr); if (c + 1 < exp_type.vecsize) transposed_expr += ", "; } transposed_expr += ")"; return transposed_expr; } else if (options.version < 120) { // GLSL 110, ES 100 do not have transpose(), so emulate it. Note that // these GLSL versions do not support non-square matrices. if (exp_type.vecsize == 2 && exp_type.columns == 2) require_polyfill(PolyfillTranspose2x2, relaxed); else if (exp_type.vecsize == 3 && exp_type.columns == 3) require_polyfill(PolyfillTranspose3x3, relaxed); else if (exp_type.vecsize == 4 && exp_type.columns == 4) require_polyfill(PolyfillTranspose4x4, relaxed); else SPIRV_CROSS_THROW("Non-square matrices are not supported in legacy GLSL, cannot transpose."); return join("spvTranspose", (options.es && relaxed) ? "MP" : "", "(", exp_str, ")"); } else return join("transpose(", exp_str, ")"); } string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id) { string type_name = type_to_glsl(type, id); remap_variable_type_name(type, name, type_name); return join(type_name, " ", name, type_to_array_glsl(type)); } bool CompilerGLSL::variable_decl_is_remapped_storage(const SPIRVariable &var, StorageClass storage) const { return var.storage == storage; } // Emit a structure member. Subclasses may override to modify output, // or to dynamically add a padding member if needed. void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const string &qualifier, uint32_t) { auto &membertype = get(member_type_id); Bitset memberflags; auto &memb = ir.meta[type.self].members; if (index < memb.size()) memberflags = memb[index].decoration_flags; string qualifiers; bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); if (is_block) qualifiers = to_interpolation_qualifiers(memberflags); statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, memberflags), variable_decl(membertype, to_member_name(type, index)), ";"); } void CompilerGLSL::emit_struct_padding_target(const SPIRType &) { } string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags) { // GL_EXT_buffer_reference variables can be marked as restrict. if (flags.get(DecorationRestrictPointerEXT)) return "restrict "; string qual; if (type_is_floating_point(type) && flags.get(DecorationNoContraction) && backend.support_precise_qualifier) qual = "precise "; // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp). bool type_supports_precision = type.basetype == SPIRType::Float || type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt || type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Sampler; if (!type_supports_precision) return qual; if (options.es) { auto &execution = get_entry_point(); if (flags.get(DecorationRelaxedPrecision)) { bool implied_fmediump = type.basetype == SPIRType::Float && options.fragment.default_float_precision == Options::Mediump && execution.model == ExecutionModelFragment; bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && options.fragment.default_int_precision == Options::Mediump && execution.model == ExecutionModelFragment; qual += (implied_fmediump || implied_imediump) ? "" : "mediump "; } else { bool implied_fhighp = type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp && execution.model == ExecutionModelFragment) || (execution.model != ExecutionModelFragment)); bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && ((options.fragment.default_int_precision == Options::Highp && execution.model == ExecutionModelFragment) || (execution.model != ExecutionModelFragment)); qual += (implied_fhighp || implied_ihighp) ? "" : "highp "; } } else if (backend.allow_precision_qualifiers) { // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient. // The default is highp however, so only emit mediump in the rare case that a shader has these. if (flags.get(DecorationRelaxedPrecision)) qual += "mediump "; } return qual; } string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id) { auto &type = expression_type(id); bool use_precision_qualifiers = backend.allow_precision_qualifiers; if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage)) { // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types. auto &result_type = get(type.image.type); if (result_type.width < 32) return "mediump "; } return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags); } void CompilerGLSL::fixup_io_block_patch_primitive_qualifiers(const SPIRVariable &var) { // Works around weird behavior in glslangValidator where // a patch out block is translated to just block members getting the decoration. // To make glslang not complain when we compile again, we have to transform this back to a case where // the variable itself has Patch decoration, and not members. // Same for perprimitiveEXT. auto &type = get(var.basetype); if (has_decoration(type.self, DecorationBlock)) { uint32_t member_count = uint32_t(type.member_types.size()); Decoration promoted_decoration = {}; bool do_promote_decoration = false; for (uint32_t i = 0; i < member_count; i++) { if (has_member_decoration(type.self, i, DecorationPatch)) { promoted_decoration = DecorationPatch; do_promote_decoration = true; break; } else if (has_member_decoration(type.self, i, DecorationPerPrimitiveEXT)) { promoted_decoration = DecorationPerPrimitiveEXT; do_promote_decoration = true; break; } } if (do_promote_decoration) { set_decoration(var.self, promoted_decoration); for (uint32_t i = 0; i < member_count; i++) unset_member_decoration(type.self, i, promoted_decoration); } } } string CompilerGLSL::to_qualifiers_glsl(uint32_t id) { auto &flags = get_decoration_bitset(id); string res; auto *var = maybe_get(id); if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied) res += "shared "; else if (var && var->storage == StorageClassTaskPayloadWorkgroupEXT && !backend.shared_is_implied) res += "taskPayloadSharedEXT "; res += to_interpolation_qualifiers(flags); if (var) res += to_storage_qualifiers_glsl(*var); auto &type = expression_type(id); if (type.image.dim != DimSubpassData && type.image.sampled == 2) { if (flags.get(DecorationCoherent)) res += "coherent "; if (flags.get(DecorationRestrict)) res += "restrict "; if (flags.get(DecorationNonWritable)) res += "readonly "; bool formatted_load = type.image.format == ImageFormatUnknown; if (flags.get(DecorationNonReadable)) { res += "writeonly "; formatted_load = false; } if (formatted_load) { if (!options.es) require_extension_internal("GL_EXT_shader_image_load_formatted"); else SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_image_load_formatted in ESSL."); } } res += to_precision_qualifiers_glsl(id); return res; } string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg) { // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ... auto &type = expression_type(arg.id); const char *direction = ""; if (type.pointer) { if (arg.write_count && arg.read_count) direction = "inout "; else if (arg.write_count) direction = "out "; } return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id)); } string CompilerGLSL::to_initializer_expression(const SPIRVariable &var) { return to_unpacked_expression(var.initializer); } string CompilerGLSL::to_zero_initialized_expression(uint32_t type_id) { #ifndef NDEBUG auto &type = get(type_id); assert(type.storage == StorageClassPrivate || type.storage == StorageClassFunction || type.storage == StorageClassGeneric); #endif uint32_t id = ir.increase_bound_by(1); ir.make_constant_null(id, type_id, false); return constant_expression(get(id)); } bool CompilerGLSL::type_can_zero_initialize(const SPIRType &type) const { if (type.pointer) return false; if (!type.array.empty() && options.flatten_multidimensional_arrays) return false; for (auto &literal : type.array_size_literal) if (!literal) return false; for (auto &memb : type.member_types) if (!type_can_zero_initialize(get(memb))) return false; return true; } string CompilerGLSL::variable_decl(const SPIRVariable &variable) { // Ignore the pointer type since GLSL doesn't have pointers. auto &type = get_variable_data_type(variable); if (type.pointer_depth > 1 && !backend.support_pointer_to_pointer) SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types."); auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self)); if (variable.loop_variable && variable.static_expression) { uint32_t expr = variable.static_expression; if (ir.ids[expr].get_type() != TypeUndef) res += join(" = ", to_unpacked_expression(variable.static_expression)); else if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable))); } else if (variable.initializer && !variable_decl_is_remapped_storage(variable, StorageClassWorkgroup)) { uint32_t expr = variable.initializer; if (ir.ids[expr].get_type() != TypeUndef) res += join(" = ", to_initializer_expression(variable)); else if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable))); } return res; } const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable) { auto &flags = get_decoration_bitset(variable.self); if (flags.get(DecorationRelaxedPrecision)) return "mediump "; else return "highp "; } string CompilerGLSL::pls_decl(const PlsRemap &var) { auto &variable = get(var.id); SPIRType type; type.vecsize = pls_format_to_components(var.format); type.basetype = pls_format_to_basetype(var.format); return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ", to_name(variable.self)); } uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const { return to_array_size_literal(type, uint32_t(type.array.size() - 1)); } uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const { assert(type.array.size() == type.array_size_literal.size()); if (type.array_size_literal[index]) { return type.array[index]; } else { // Use the default spec constant value. // This is the best we can do. return evaluate_constant_u32(type.array[index]); } } string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index) { assert(type.array.size() == type.array_size_literal.size()); auto &size = type.array[index]; if (!type.array_size_literal[index]) return to_expression(size); else if (size) return convert_to_string(size); else if (!backend.unsized_array_supported) { // For runtime-sized arrays, we can work around // lack of standard support for this by simply having // a single element array. // // Runtime length arrays must always be the last element // in an interface block. return "1"; } else return ""; } string CompilerGLSL::type_to_array_glsl(const SPIRType &type) { if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct) { // We are using a wrapped pointer type, and we should not emit any array declarations here. return ""; } if (type.array.empty()) return ""; if (options.flatten_multidimensional_arrays) { string res; res += "["; for (auto i = uint32_t(type.array.size()); i; i--) { res += enclose_expression(to_array_size(type, i - 1)); if (i > 1) res += " * "; } res += "]"; return res; } else { if (type.array.size() > 1) { if (!options.es && options.version < 430) require_extension_internal("GL_ARB_arrays_of_arrays"); else if (options.es && options.version < 310) SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. " "Try using --flatten-multidimensional-arrays or set " "options.flatten_multidimensional_arrays to true."); } string res; for (auto i = uint32_t(type.array.size()); i; i--) { res += "["; res += to_array_size(type, i - 1); res += "]"; } return res; } } string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id) { auto &imagetype = get(type.image.type); string res; switch (imagetype.basetype) { case SPIRType::Int: case SPIRType::Short: case SPIRType::SByte: res = "i"; break; case SPIRType::UInt: case SPIRType::UShort: case SPIRType::UByte: res = "u"; break; default: break; } // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation. // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter. if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics) return res + "subpassInput" + (type.image.ms ? "MS" : ""); else if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && subpass_input_is_framebuffer_fetch(id)) { SPIRType sampled_type = get(type.image.type); sampled_type.vecsize = 4; return type_to_glsl(sampled_type); } // If we're emulating subpassInput with samplers, force sampler2D // so we don't have to specify format. if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData) { // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V. if (type.image.dim == DimBuffer && type.image.sampled == 1) res += "sampler"; else res += type.image.sampled == 2 ? "image" : "texture"; } else res += "sampler"; switch (type.image.dim) { case Dim1D: // ES doesn't support 1D. Fake it with 2D. res += options.es ? "2D" : "1D"; break; case Dim2D: res += "2D"; break; case Dim3D: res += "3D"; break; case DimCube: res += "Cube"; break; case DimRect: if (options.es) SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES."); if (is_legacy_desktop()) require_extension_internal("GL_ARB_texture_rectangle"); res += "2DRect"; break; case DimBuffer: if (options.es && options.version < 320) require_extension_internal("GL_EXT_texture_buffer"); else if (!options.es && options.version < 300) require_extension_internal("GL_EXT_texture_buffer_object"); res += "Buffer"; break; case DimSubpassData: res += "2D"; break; default: SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported."); } if (type.image.ms) res += "MS"; if (type.image.arrayed) { if (is_legacy_desktop()) require_extension_internal("GL_EXT_texture_array"); res += "Array"; } // "Shadow" state in GLSL only exists for samplers and combined image samplers. if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) && is_depth_image(type, id)) { res += "Shadow"; if (type.image.dim == DimCube && is_legacy()) { if (!options.es) require_extension_internal("GL_EXT_gpu_shader4"); else { require_extension_internal("GL_NV_shadow_samplers_cube"); res += "NV"; } } } return res; } string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type) { if (backend.use_array_constructor && type.array.size() > 1) { if (options.flatten_multidimensional_arrays) SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, " "e.g. float[][]()."); else if (!options.es && options.version < 430) require_extension_internal("GL_ARB_arrays_of_arrays"); else if (options.es && options.version < 310) SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310."); } auto e = type_to_glsl(type); if (backend.use_array_constructor) { for (uint32_t i = 0; i < type.array.size(); i++) e += "[]"; } return e; } // The optional id parameter indicates the object whose type we are trying // to find the description for. It is optional. Most type descriptions do not // depend on a specific object's use of that type. string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) { if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct) { // Need to create a magic type name which compacts the entire type information. string name = type_to_glsl(get_pointee_type(type)); for (size_t i = 0; i < type.array.size(); i++) { if (type.array_size_literal[i]) name += join(type.array[i], "_"); else name += join("id", type.array[i], "_"); } name += "Pointer"; return name; } switch (type.basetype) { case SPIRType::Struct: // Need OpName lookup here to get a "sensible" name for a struct. if (backend.explicit_struct_type) return join("struct ", to_name(type.self)); else return to_name(type.self); case SPIRType::Image: case SPIRType::SampledImage: return image_type_glsl(type, id); case SPIRType::Sampler: // The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing // this distinction into the type system. return comparison_ids.count(id) ? "samplerShadow" : "sampler"; case SPIRType::AccelerationStructure: return ray_tracing_is_khr ? "accelerationStructureEXT" : "accelerationStructureNV"; case SPIRType::RayQuery: return "rayQueryEXT"; case SPIRType::Void: return "void"; default: break; } if (type.basetype == SPIRType::UInt && is_legacy()) { if (options.es) SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy ESSL."); else require_extension_internal("GL_EXT_gpu_shader4"); } if (type.basetype == SPIRType::AtomicCounter) { if (options.es && options.version < 310) SPIRV_CROSS_THROW("At least ESSL 3.10 required for atomic counters."); else if (!options.es && options.version < 420) require_extension_internal("GL_ARB_shader_atomic_counters"); } if (type.vecsize == 1 && type.columns == 1) // Scalar builtin { switch (type.basetype) { case SPIRType::Boolean: return "bool"; case SPIRType::SByte: return backend.basic_int8_type; case SPIRType::UByte: return backend.basic_uint8_type; case SPIRType::Short: return backend.basic_int16_type; case SPIRType::UShort: return backend.basic_uint16_type; case SPIRType::Int: return backend.basic_int_type; case SPIRType::UInt: return backend.basic_uint_type; case SPIRType::AtomicCounter: return "atomic_uint"; case SPIRType::Half: return "float16_t"; case SPIRType::Float: return "float"; case SPIRType::Double: return "double"; case SPIRType::Int64: return "int64_t"; case SPIRType::UInt64: return "uint64_t"; default: return "???"; } } else if (type.vecsize > 1 && type.columns == 1) // Vector builtin { switch (type.basetype) { case SPIRType::Boolean: return join("bvec", type.vecsize); case SPIRType::SByte: return join("i8vec", type.vecsize); case SPIRType::UByte: return join("u8vec", type.vecsize); case SPIRType::Short: return join("i16vec", type.vecsize); case SPIRType::UShort: return join("u16vec", type.vecsize); case SPIRType::Int: return join("ivec", type.vecsize); case SPIRType::UInt: return join("uvec", type.vecsize); case SPIRType::Half: return join("f16vec", type.vecsize); case SPIRType::Float: return join("vec", type.vecsize); case SPIRType::Double: return join("dvec", type.vecsize); case SPIRType::Int64: return join("i64vec", type.vecsize); case SPIRType::UInt64: return join("u64vec", type.vecsize); default: return "???"; } } else if (type.vecsize == type.columns) // Simple Matrix builtin { switch (type.basetype) { case SPIRType::Boolean: return join("bmat", type.vecsize); case SPIRType::Int: return join("imat", type.vecsize); case SPIRType::UInt: return join("umat", type.vecsize); case SPIRType::Half: return join("f16mat", type.vecsize); case SPIRType::Float: return join("mat", type.vecsize); case SPIRType::Double: return join("dmat", type.vecsize); // Matrix types not supported for int64/uint64. default: return "???"; } } else { switch (type.basetype) { case SPIRType::Boolean: return join("bmat", type.columns, "x", type.vecsize); case SPIRType::Int: return join("imat", type.columns, "x", type.vecsize); case SPIRType::UInt: return join("umat", type.columns, "x", type.vecsize); case SPIRType::Half: return join("f16mat", type.columns, "x", type.vecsize); case SPIRType::Float: return join("mat", type.columns, "x", type.vecsize); case SPIRType::Double: return join("dmat", type.columns, "x", type.vecsize); // Matrix types not supported for int64/uint64. default: return "???"; } } } void CompilerGLSL::add_variable(unordered_set &variables_primary, const unordered_set &variables_secondary, string &name) { if (name.empty()) return; ParsedIR::sanitize_underscores(name); if (ParsedIR::is_globally_reserved_identifier(name, true)) { name.clear(); return; } update_name_cache(variables_primary, variables_secondary, name); } void CompilerGLSL::add_local_variable_name(uint32_t id) { add_variable(local_variable_names, block_names, ir.meta[id].decoration.alias); } void CompilerGLSL::add_resource_name(uint32_t id) { add_variable(resource_names, block_names, ir.meta[id].decoration.alias); } void CompilerGLSL::add_header_line(const std::string &line) { header_lines.push_back(line); } bool CompilerGLSL::has_extension(const std::string &ext) const { auto itr = find(begin(forced_extensions), end(forced_extensions), ext); return itr != end(forced_extensions); } void CompilerGLSL::require_extension(const std::string &ext) { if (!has_extension(ext)) forced_extensions.push_back(ext); } const SmallVector &CompilerGLSL::get_required_extensions() const { return forced_extensions; } void CompilerGLSL::require_extension_internal(const string &ext) { if (backend.supports_extensions && !has_extension(ext)) { forced_extensions.push_back(ext); force_recompile(); } } void CompilerGLSL::flatten_buffer_block(VariableID id) { auto &var = get(id); auto &type = get(var.basetype); auto name = to_name(type.self, false); auto &flags = get_decoration_bitset(type.self); if (!type.array.empty()) SPIRV_CROSS_THROW(name + " is an array of UBOs."); if (type.basetype != SPIRType::Struct) SPIRV_CROSS_THROW(name + " is not a struct."); if (!flags.get(DecorationBlock)) SPIRV_CROSS_THROW(name + " is not a block."); if (type.member_types.empty()) SPIRV_CROSS_THROW(name + " is an empty struct."); flattened_buffer_blocks.insert(id); } bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const { return false; // GLSL itself does not need to translate array builtin types to non-array builtin types } bool CompilerGLSL::is_user_type_structured(uint32_t /*id*/) const { return false; // GLSL itself does not have structured user type, but HLSL does with StructuredBuffer and RWStructuredBuffer resources. } bool CompilerGLSL::check_atomic_image(uint32_t id) { auto &type = expression_type(id); if (type.storage == StorageClassImage) { if (options.es && options.version < 320) require_extension_internal("GL_OES_shader_image_atomic"); auto *var = maybe_get_backing_variable(id); if (var) { if (has_decoration(var->self, DecorationNonWritable) || has_decoration(var->self, DecorationNonReadable)) { unset_decoration(var->self, DecorationNonWritable); unset_decoration(var->self, DecorationNonReadable); force_recompile(); } } return true; } else return false; } void CompilerGLSL::add_function_overload(const SPIRFunction &func) { Hasher hasher; for (auto &arg : func.arguments) { // Parameters can vary with pointer type or not, // but that will not change the signature in GLSL/HLSL, // so strip the pointer type before hashing. uint32_t type_id = get_pointee_type_id(arg.type); auto &type = get(type_id); if (!combined_image_samplers.empty()) { // If we have combined image samplers, we cannot really trust the image and sampler arguments // we pass down to callees, because they may be shuffled around. // Ignore these arguments, to make sure that functions need to differ in some other way // to be considered different overloads. if (type.basetype == SPIRType::SampledImage || (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler) { continue; } } hasher.u32(type_id); } uint64_t types_hash = hasher.get(); auto function_name = to_name(func.self); auto itr = function_overloads.find(function_name); if (itr != end(function_overloads)) { // There exists a function with this name already. auto &overloads = itr->second; if (overloads.count(types_hash) != 0) { // Overload conflict, assign a new name. add_resource_name(func.self); function_overloads[to_name(func.self)].insert(types_hash); } else { // Can reuse the name. overloads.insert(types_hash); } } else { // First time we see this function name. add_resource_name(func.self); function_overloads[to_name(func.self)].insert(types_hash); } } void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) { if (func.self != ir.default_entry_point) add_function_overload(func); // Avoid shadow declarations. local_variable_names = resource_names; string decl; auto &type = get(func.return_type); decl += flags_to_qualifiers_glsl(type, return_flags); decl += type_to_glsl(type); decl += type_to_array_glsl(type); decl += " "; if (func.self == ir.default_entry_point) { // If we need complex fallback in GLSL, we just wrap main() in a function // and interlock the entire shader ... if (interlocked_is_complex) decl += "spvMainInterlockedBody"; else decl += "main"; processing_entry_point = true; } else decl += to_name(func.self); decl += "("; SmallVector arglist; for (auto &arg : func.arguments) { // Do not pass in separate images or samplers if we're remapping // to combined image samplers. if (skip_argument(arg.id)) continue; // Might change the variable name if it already exists in this function. // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation // to use same name for variables. // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. add_local_variable_name(arg.id); arglist.push_back(argument_decl(arg)); // Hold a pointer to the parameter so we can invalidate the readonly field if needed. auto *var = maybe_get(arg.id); if (var) var->parameter = &arg; } for (auto &arg : func.shadow_arguments) { // Might change the variable name if it already exists in this function. // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation // to use same name for variables. // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. add_local_variable_name(arg.id); arglist.push_back(argument_decl(arg)); // Hold a pointer to the parameter so we can invalidate the readonly field if needed. auto *var = maybe_get(arg.id); if (var) var->parameter = &arg; } decl += merge(arglist); decl += ")"; statement(decl); } void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) { // Avoid potential cycles. if (func.active) return; func.active = true; // If we depend on a function, emit that function before we emit our own function. for (auto block : func.blocks) { auto &b = get(block); for (auto &i : b.ops) { auto ops = stream(i); auto op = static_cast(i.op); if (op == OpFunctionCall) { // Recursively emit functions which are called. uint32_t id = ops[2]; emit_function(get(id), ir.meta[ops[1]].decoration.decoration_flags); } } } if (func.entry_line.file_id != 0) emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal); emit_function_prototype(func, return_flags); begin_scope(); if (func.self == ir.default_entry_point) emit_entry_point_declarations(); current_function = &func; auto &entry_block = get(func.entry_block); sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack)); for (auto &array : func.constant_arrays_needed_on_stack) { auto &c = get(array); auto &type = get(c.constant_type); statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";"); } for (auto &v : func.local_variables) { auto &var = get(v); var.deferred_declaration = false; if (variable_decl_is_remapped_storage(var, StorageClassWorkgroup)) { // Special variable type which cannot have initializer, // need to be declared as standalone variables. // Comes from MSL which can push global variables as local variables in main function. add_local_variable_name(var.self); statement(variable_decl(var), ";"); var.deferred_declaration = false; } else if (var.storage == StorageClassPrivate) { // These variables will not have had their CFG usage analyzed, so move it to the entry block. // Comes from MSL which can push global variables as local variables in main function. // We could just declare them right now, but we would miss out on an important initialization case which is // LUT declaration in MSL. // If we don't declare the variable when it is assigned we're forced to go through a helper function // which copies elements one by one. add_local_variable_name(var.self); if (var.initializer) { statement(variable_decl(var), ";"); var.deferred_declaration = false; } else { auto &dominated = entry_block.dominated_variables; if (find(begin(dominated), end(dominated), var.self) == end(dominated)) entry_block.dominated_variables.push_back(var.self); var.deferred_declaration = true; } } else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression) { // No need to declare this variable, it has a static expression. var.deferred_declaration = false; } else if (expression_is_lvalue(v)) { add_local_variable_name(var.self); // Loop variables should never be declared early, they are explicitly emitted in a loop. if (var.initializer && !var.loop_variable) statement(variable_decl_function_local(var), ";"); else { // Don't declare variable until first use to declutter the GLSL output quite a lot. // If we don't touch the variable before first branch, // declare it then since we need variable declaration to be in top scope. var.deferred_declaration = true; } } else { // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this. // For these types (non-lvalue), we enforce forwarding through a shadowed variable. // This means that when we OpStore to these variables, we just write in the expression ID directly. // This breaks any kind of branching, since the variable must be statically assigned. // Branching on samplers and images would be pretty much impossible to fake in GLSL. var.statically_assigned = true; } var.loop_variable_enable = false; // Loop variables are never declared outside their for-loop, so block any implicit declaration. if (var.loop_variable) { var.deferred_declaration = false; // Need to reset the static expression so we can fallback to initializer if need be. var.static_expression = 0; } } // Enforce declaration order for regression testing purposes. for (auto &block_id : func.blocks) { auto &block = get(block_id); sort(begin(block.dominated_variables), end(block.dominated_variables)); } for (auto &line : current_function->fixup_hooks_in) line(); emit_block_chain(entry_block); end_scope(); processing_entry_point = false; statement(""); // Make sure deferred declaration state for local variables is cleared when we are done with function. // We risk declaring Private/Workgroup variables in places we are not supposed to otherwise. for (auto &v : func.local_variables) { auto &var = get(v); var.deferred_declaration = false; } } void CompilerGLSL::emit_fixup() { if (is_vertex_like_shader()) { if (options.vertex.fixup_clipspace) { const char *suffix = backend.float_literal_suffix ? "f" : ""; statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;"); } if (options.vertex.flip_vert_y) statement("gl_Position.y = -gl_Position.y;"); } } void CompilerGLSL::flush_phi(BlockID from, BlockID to) { auto &child = get(to); if (child.ignore_phi_from_block == from) return; unordered_set temporary_phi_variables; for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr) { auto &phi = *itr; if (phi.parent == from) { auto &var = get(phi.function_variable); // A Phi variable might be a loop variable, so flush to static expression. if (var.loop_variable && !var.loop_variable_enable) var.static_expression = phi.local_variable; else { flush_variable_declaration(phi.function_variable); // Check if we are going to write to a Phi variable that another statement will read from // as part of another Phi node in our target block. // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads. // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm. bool need_saved_temporary = find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool { return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from; }) != end(child.phi_variables); if (need_saved_temporary) { // Need to make sure we declare the phi variable with a copy at the right scope. // We cannot safely declare a temporary here since we might be inside a continue block. if (!var.allocate_temporary_copy) { var.allocate_temporary_copy = true; force_recompile(); } statement("_", phi.function_variable, "_copy", " = ", to_name(phi.function_variable), ";"); temporary_phi_variables.insert(phi.function_variable); } // This might be called in continue block, so make sure we // use this to emit ESSL 1.0 compliant increments/decrements. auto lhs = to_expression(phi.function_variable); string rhs; if (temporary_phi_variables.count(phi.local_variable)) rhs = join("_", phi.local_variable, "_copy"); else rhs = to_pointer_expression(phi.local_variable); if (!optimize_read_modify_write(get(var.basetype), lhs, rhs)) statement(lhs, " = ", rhs, ";"); } register_write(phi.function_variable); } } } void CompilerGLSL::branch_to_continue(BlockID from, BlockID to) { auto &to_block = get(to); if (from == to) return; assert(is_continue(to)); if (to_block.complex_continue) { // Just emit the whole block chain as is. auto usage_counts = expression_usage_counts; emit_block_chain(to_block); // Expression usage counts are moot after returning from the continue block. expression_usage_counts = usage_counts; } else { auto &from_block = get(from); bool outside_control_flow = false; uint32_t loop_dominator = 0; // FIXME: Refactor this to not use the old loop_dominator tracking. if (from_block.merge_block) { // If we are a loop header, we don't set the loop dominator, // so just use "self" here. loop_dominator = from; } else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator)) { loop_dominator = from_block.loop_dominator; } if (loop_dominator != 0) { auto &cfg = get_cfg_for_current_function(); // For non-complex continue blocks, we implicitly branch to the continue block // by having the continue block be part of the loop header in for (; ; continue-block). outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(loop_dominator, from); } // Some simplification for for-loops. We always end up with a useless continue; // statement since we branch to a loop block. // Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block, // we can avoid writing out an explicit continue statement. // Similar optimization to return statements if we know we're outside flow control. if (!outside_control_flow) statement("continue;"); } } void CompilerGLSL::branch(BlockID from, BlockID to) { flush_phi(from, to); flush_control_dependent_expressions(from); bool to_is_continue = is_continue(to); // This is only a continue if we branch to our loop dominator. if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get(from).loop_dominator == to) { // This can happen if we had a complex continue block which was emitted. // Once the continue block tries to branch to the loop header, just emit continue; // and end the chain here. statement("continue;"); } else if (from != to && is_break(to)) { // We cannot break to ourselves, so check explicitly for from != to. // This case can trigger if a loop header is all three of these things: // - Continue block // - Loop header // - Break merge target all at once ... // Very dirty workaround. // Switch constructs are able to break, but they cannot break out of a loop at the same time, // yet SPIR-V allows it. // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block, // write to the ladder here, and defer the break. // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case. if (is_loop_break(to)) { for (size_t n = current_emitting_switch_stack.size(); n; n--) { auto *current_emitting_switch = current_emitting_switch_stack[n - 1]; if (current_emitting_switch && current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) && get(current_emitting_switch->loop_dominator).merge_block == to) { if (!current_emitting_switch->need_ladder_break) { force_recompile(); current_emitting_switch->need_ladder_break = true; } statement("_", current_emitting_switch->self, "_ladder_break = true;"); } else break; } } statement("break;"); } else if (to_is_continue || from == to) { // For from == to case can happen for a do-while loop which branches into itself. // We don't mark these cases as continue blocks, but the only possible way to branch into // ourselves is through means of continue blocks. // If we are merging to a continue block, there is no need to emit the block chain for continue here. // We can branch to the continue block after we merge execution. // Here we make use of structured control flow rules from spec: // 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block // - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG // If we are branching to a merge block, we must be inside a construct which dominates the merge block. auto &block_meta = ir.block_meta[to]; bool branching_to_merge = (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT | ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0; if (!to_is_continue || !branching_to_merge) branch_to_continue(from, to); } else if (!is_conditional(to)) emit_block_chain(get(to)); // It is important that we check for break before continue. // A block might serve two purposes, a break block for the inner scope, and // a continue block in the outer scope. // Inner scope always takes precedence. } void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block) { auto &from_block = get(from); BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0); // If we branch directly to our selection merge target, we don't need a code path. bool true_block_needs_code = true_block != merge_block || flush_phi_required(from, true_block); bool false_block_needs_code = false_block != merge_block || flush_phi_required(from, false_block); if (!true_block_needs_code && !false_block_needs_code) return; // We might have a loop merge here. Only consider selection flattening constructs. // Loop hints are handled explicitly elsewhere. if (from_block.hint == SPIRBlock::HintFlatten || from_block.hint == SPIRBlock::HintDontFlatten) emit_block_hints(from_block); if (true_block_needs_code) { statement("if (", to_expression(cond), ")"); begin_scope(); branch(from, true_block); end_scope(); if (false_block_needs_code) { statement("else"); begin_scope(); branch(from, false_block); end_scope(); } } else if (false_block_needs_code) { // Only need false path, use negative conditional. statement("if (!", to_enclosed_expression(cond), ")"); begin_scope(); branch(from, false_block); end_scope(); } } // FIXME: This currently cannot handle complex continue blocks // as in do-while. // This should be seen as a "trivial" continue block. string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block) { auto *block = &get(continue_block); // While emitting the continue block, declare_temporary will check this // if we have to emit temporaries. current_continue_block = block; SmallVector statements; // Capture all statements into our list. auto *old = redirect_statement; redirect_statement = &statements; // Stamp out all blocks one after each other. while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0) { // Write out all instructions we have in this block. emit_block_instructions(*block); // For plain branchless for/while continue blocks. if (block->next_block) { flush_phi(continue_block, block->next_block); block = &get(block->next_block); } // For do while blocks. The last block will be a select block. else if (block->true_block && follow_true_block) { flush_phi(continue_block, block->true_block); block = &get(block->true_block); } else if (block->false_block && follow_false_block) { flush_phi(continue_block, block->false_block); block = &get(block->false_block); } else { SPIRV_CROSS_THROW("Invalid continue block detected!"); } } // Restore old pointer. redirect_statement = old; // Somewhat ugly, strip off the last ';' since we use ',' instead. // Ideally, we should select this behavior in statement(). for (auto &s : statements) { if (!s.empty() && s.back() == ';') s.erase(s.size() - 1, 1); } current_continue_block = nullptr; return merge(statements); } void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block) { // While loops do not take initializers, so declare all of them outside. for (auto &loop_var : block.loop_variables) { auto &var = get(loop_var); statement(variable_decl(var), ";"); } } string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block) { if (block.loop_variables.empty()) return ""; bool same_types = for_loop_initializers_are_same_type(block); // We can only declare for loop initializers if all variables are of same type. // If we cannot do this, declare individual variables before the loop header. // We might have a loop variable candidate which was not assigned to for some reason. uint32_t missing_initializers = 0; for (auto &variable : block.loop_variables) { uint32_t expr = get(variable).static_expression; // Sometimes loop variables are initialized with OpUndef, but we can just declare // a plain variable without initializer in this case. if (expr == 0 || ir.ids[expr].get_type() == TypeUndef) missing_initializers++; } if (block.loop_variables.size() == 1 && missing_initializers == 0) { return variable_decl(get(block.loop_variables.front())); } else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size())) { for (auto &loop_var : block.loop_variables) statement(variable_decl(get(loop_var)), ";"); return ""; } else { // We have a mix of loop variables, either ones with a clear initializer, or ones without. // Separate the two streams. string expr; for (auto &loop_var : block.loop_variables) { uint32_t static_expr = get(loop_var).static_expression; if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef) { statement(variable_decl(get(loop_var)), ";"); } else { auto &var = get(loop_var); auto &type = get_variable_data_type(var); if (expr.empty()) { // For loop initializers are of the form (block.true_block), get(block.merge_block))) condition = join("!", enclose_expression(condition)); statement("while (", condition, ")"); break; } default: block.disable_block_optimization = true; force_recompile(); begin_scope(); // We'll see an end_scope() later. return false; } begin_scope(); return true; } else { block.disable_block_optimization = true; force_recompile(); begin_scope(); // We'll see an end_scope() later. return false; } } else if (method == SPIRBlock::MergeToDirectForLoop) { auto &child = get(block.next_block); // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header. flush_undeclared_variables(child); uint32_t current_count = statement_count; // If we're trying to create a true for loop, // we need to make sure that all opcodes before branch statement do not actually emit any code. // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. emit_block_instructions_with_masked_debug(child); bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries); if (current_count == statement_count && condition_is_temporary) { uint32_t target_block = child.true_block; switch (continue_type) { case SPIRBlock::ForLoop: { // Important that we do this in this order because // emitting the continue block can invalidate the condition expression. auto initializer = emit_for_loop_initializers(block); auto condition = to_expression(child.condition); // Condition might have to be inverted. if (execution_is_noop(get(child.true_block), get(block.merge_block))) { condition = join("!", enclose_expression(condition)); target_block = child.false_block; } auto continue_block = emit_continue_block(block.continue_block, false, false); emit_block_hints(block); statement("for (", initializer, "; ", condition, "; ", continue_block, ")"); break; } case SPIRBlock::WhileLoop: { emit_while_loop_initializers(block); emit_block_hints(block); auto condition = to_expression(child.condition); // Condition might have to be inverted. if (execution_is_noop(get(child.true_block), get(block.merge_block))) { condition = join("!", enclose_expression(condition)); target_block = child.false_block; } statement("while (", condition, ")"); break; } default: block.disable_block_optimization = true; force_recompile(); begin_scope(); // We'll see an end_scope() later. return false; } begin_scope(); branch(child.self, target_block); return true; } else { block.disable_block_optimization = true; force_recompile(); begin_scope(); // We'll see an end_scope() later. return false; } } else return false; } void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block) { for (auto &v : block.dominated_variables) flush_variable_declaration(v); } void CompilerGLSL::emit_hoisted_temporaries(SmallVector> &temporaries) { // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header. // Need to sort these to ensure that reference output is stable. sort(begin(temporaries), end(temporaries), [](const pair &a, const pair &b) { return a.second < b.second; }); for (auto &tmp : temporaries) { auto &type = get(tmp.first); // There are some rare scenarios where we are asked to declare pointer types as hoisted temporaries. // This should be ignored unless we're doing actual variable pointers and backend supports it. // Access chains cannot normally be lowered to temporaries in GLSL and HLSL. if (type.pointer && !backend.native_pointers) continue; add_local_variable_name(tmp.second); auto &flags = get_decoration_bitset(tmp.second); // Not all targets support pointer literals, so don't bother with that case. string initializer; if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) initializer = join(" = ", to_zero_initialized_expression(tmp.first)); statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), initializer, ";"); hoisted_temporaries.insert(tmp.second); forced_temporaries.insert(tmp.second); // The temporary might be read from before it's assigned, set up the expression now. set(tmp.second, to_name(tmp.second), tmp.first, true); // If we have hoisted temporaries in multi-precision contexts, emit that here too ... // We will not be able to analyze hoisted-ness for dependent temporaries that we hallucinate here. auto mirrored_precision_itr = temporary_to_mirror_precision_alias.find(tmp.second); if (mirrored_precision_itr != temporary_to_mirror_precision_alias.end()) { uint32_t mirror_id = mirrored_precision_itr->second; auto &mirror_flags = get_decoration_bitset(mirror_id); statement(flags_to_qualifiers_glsl(type, mirror_flags), variable_decl(type, to_name(mirror_id)), initializer, ";"); // The temporary might be read from before it's assigned, set up the expression now. set(mirror_id, to_name(mirror_id), tmp.first, true); hoisted_temporaries.insert(mirror_id); } } } void CompilerGLSL::emit_block_chain(SPIRBlock &block) { bool select_branch_to_true_block = false; bool select_branch_to_false_block = false; bool skip_direct_branch = false; bool emitted_loop_header_variables = false; bool force_complex_continue_block = false; ValueSaver loop_level_saver(current_loop_level); if (block.merge == SPIRBlock::MergeLoop) add_loop_level(); // If we're emitting PHI variables with precision aliases, we have to emit them as hoisted temporaries. for (auto var_id : block.dominated_variables) { auto &var = get(var_id); if (var.phi_variable) { auto mirrored_precision_itr = temporary_to_mirror_precision_alias.find(var_id); if (mirrored_precision_itr != temporary_to_mirror_precision_alias.end() && find_if(block.declare_temporary.begin(), block.declare_temporary.end(), [mirrored_precision_itr](const std::pair &p) { return p.second == mirrored_precision_itr->second; }) == block.declare_temporary.end()) { block.declare_temporary.push_back({ var.basetype, mirrored_precision_itr->second }); } } } emit_hoisted_temporaries(block.declare_temporary); SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone; if (block.continue_block) { continue_type = continue_block_type(get(block.continue_block)); // If we know we cannot emit a loop, mark the block early as a complex loop so we don't force unnecessary recompiles. if (continue_type == SPIRBlock::ComplexLoop) block.complex_continue = true; } // If we have loop variables, stop masking out access to the variable now. for (auto var_id : block.loop_variables) { auto &var = get(var_id); var.loop_variable_enable = true; // We're not going to declare the variable directly, so emit a copy here. emit_variable_temporary_copies(var); } // Remember deferred declaration state. We will restore it before returning. SmallVector rearm_dominated_variables(block.dominated_variables.size()); for (size_t i = 0; i < block.dominated_variables.size(); i++) { uint32_t var_id = block.dominated_variables[i]; auto &var = get(var_id); rearm_dominated_variables[i] = var.deferred_declaration; } // This is the method often used by spirv-opt to implement loops. // The loop header goes straight into the continue block. // However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block, // it *MUST* be used in the continue block. This loop method will not work. if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop)) { flush_undeclared_variables(block); if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop)) { if (execution_is_noop(get(block.true_block), get(block.merge_block))) select_branch_to_false_block = true; else select_branch_to_true_block = true; emitted_loop_header_variables = true; force_complex_continue_block = true; } } // This is the older loop behavior in glslang which branches to loop body directly from the loop header. else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop)) { flush_undeclared_variables(block); if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop)) { // The body of while, is actually just the true (or false) block, so always branch there unconditionally. if (execution_is_noop(get(block.true_block), get(block.merge_block))) select_branch_to_false_block = true; else select_branch_to_true_block = true; emitted_loop_header_variables = true; } } // This is the newer loop behavior in glslang which branches from Loop header directly to // a new block, which in turn has a OpBranchSelection without a selection merge. else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop)) { flush_undeclared_variables(block); if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop)) { skip_direct_branch = true; emitted_loop_header_variables = true; } } else if (continue_type == SPIRBlock::DoWhileLoop) { flush_undeclared_variables(block); emit_while_loop_initializers(block); emitted_loop_header_variables = true; // We have some temporaries where the loop header is the dominator. // We risk a case where we have code like: // for (;;) { create-temporary; break; } consume-temporary; // so force-declare temporaries here. emit_hoisted_temporaries(block.potential_declare_temporary); statement("do"); begin_scope(); emit_block_instructions(block); } else if (block.merge == SPIRBlock::MergeLoop) { flush_undeclared_variables(block); emit_while_loop_initializers(block); emitted_loop_header_variables = true; // We have a generic loop without any distinguishable pattern like for, while or do while. get(block.continue_block).complex_continue = true; continue_type = SPIRBlock::ComplexLoop; // We have some temporaries where the loop header is the dominator. // We risk a case where we have code like: // for (;;) { create-temporary; break; } consume-temporary; // so force-declare temporaries here. emit_hoisted_temporaries(block.potential_declare_temporary); emit_block_hints(block); statement("for (;;)"); begin_scope(); emit_block_instructions(block); } else { emit_block_instructions(block); } // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem // as writes to said loop variables might have been masked out, we need a recompile. if (!emitted_loop_header_variables && !block.loop_variables.empty()) { force_recompile_guarantee_forward_progress(); for (auto var : block.loop_variables) get(var).loop_variable = false; block.loop_variables.clear(); } flush_undeclared_variables(block); bool emit_next_block = true; // Handle end of block. switch (block.terminator) { case SPIRBlock::Direct: // True when emitting complex continue block. if (block.loop_dominator == block.next_block) { branch(block.self, block.next_block); emit_next_block = false; } // True if MergeToDirectForLoop succeeded. else if (skip_direct_branch) emit_next_block = false; else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block)) { branch(block.self, block.next_block); emit_next_block = false; } break; case SPIRBlock::Select: // True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded. if (select_branch_to_true_block) { if (force_complex_continue_block) { assert(block.true_block == block.continue_block); // We're going to emit a continue block directly here, so make sure it's marked as complex. auto &complex_continue = get(block.continue_block).complex_continue; bool old_complex = complex_continue; complex_continue = true; branch(block.self, block.true_block); complex_continue = old_complex; } else branch(block.self, block.true_block); } else if (select_branch_to_false_block) { if (force_complex_continue_block) { assert(block.false_block == block.continue_block); // We're going to emit a continue block directly here, so make sure it's marked as complex. auto &complex_continue = get(block.continue_block).complex_continue; bool old_complex = complex_continue; complex_continue = true; branch(block.self, block.false_block); complex_continue = old_complex; } else branch(block.self, block.false_block); } else branch(block.self, block.condition, block.true_block, block.false_block); break; case SPIRBlock::MultiSelect: { auto &type = expression_type(block.condition); bool unsigned_case = type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort || type.basetype == SPIRType::UByte || type.basetype == SPIRType::UInt64; if (block.merge == SPIRBlock::MergeNone) SPIRV_CROSS_THROW("Switch statement is not structured"); if (!backend.support_64bit_switch && (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)) { // SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages. SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors."); } const char *label_suffix = ""; if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix) label_suffix = "u"; else if (type.basetype == SPIRType::Int64 && backend.support_64bit_switch) label_suffix = "l"; else if (type.basetype == SPIRType::UInt64 && backend.support_64bit_switch) label_suffix = "ul"; else if (type.basetype == SPIRType::UShort) label_suffix = backend.uint16_t_literal_suffix; else if (type.basetype == SPIRType::Short) label_suffix = backend.int16_t_literal_suffix; current_emitting_switch_stack.push_back(&block); if (block.need_ladder_break) statement("bool _", block.self, "_ladder_break = false;"); // Find all unique case constructs. unordered_map> case_constructs; SmallVector block_declaration_order; SmallVector literals_to_merge; // If a switch case branches to the default block for some reason, we can just remove that literal from consideration // and let the default: block handle it. // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here. // We only need to consider possible fallthrough if order[i] branches to order[i + 1]. auto &cases = get_case_list(block); for (auto &c : cases) { if (c.block != block.next_block && c.block != block.default_block) { if (!case_constructs.count(c.block)) block_declaration_order.push_back(c.block); case_constructs[c.block].push_back(c.value); } else if (c.block == block.next_block && block.default_block != block.next_block) { // We might have to flush phi inside specific case labels. // If we can piggyback on default:, do so instead. literals_to_merge.push_back(c.value); } } // Empty literal array -> default. if (block.default_block != block.next_block) { auto &default_block = get(block.default_block); // We need to slide in the default block somewhere in this chain // if there are fall-through scenarios since the default is declared separately in OpSwitch. // Only consider trivial fall-through cases here. size_t num_blocks = block_declaration_order.size(); bool injected_block = false; for (size_t i = 0; i < num_blocks; i++) { auto &case_block = get(block_declaration_order[i]); if (execution_is_direct_branch(case_block, default_block)) { // Fallthrough to default block, we must inject the default block here. block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block); injected_block = true; break; } else if (execution_is_direct_branch(default_block, case_block)) { // Default case is falling through to another case label, we must inject the default block here. block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block); injected_block = true; break; } } // Order does not matter. if (!injected_block) block_declaration_order.push_back(block.default_block); else if (is_legacy_es()) SPIRV_CROSS_THROW("Default case label fallthrough to other case label is not supported in ESSL 1.0."); case_constructs[block.default_block] = {}; } size_t num_blocks = block_declaration_order.size(); const auto to_case_label = [](uint64_t literal, uint32_t width, bool is_unsigned_case) -> string { if (is_unsigned_case) return convert_to_string(literal); // For smaller cases, the literals are compiled as 32 bit wide // literals so we don't need to care for all sizes specifically. if (width <= 32) { return convert_to_string(int64_t(int32_t(literal))); } return convert_to_string(int64_t(literal)); }; const auto to_legacy_case_label = [&](uint32_t condition, const SmallVector &labels, const char *suffix) -> string { string ret; size_t count = labels.size(); for (size_t i = 0; i < count; i++) { if (i) ret += " || "; ret += join(count > 1 ? "(" : "", to_enclosed_expression(condition), " == ", labels[i], suffix, count > 1 ? ")" : ""); } return ret; }; // We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture, // we need to flush phi nodes outside the switch block in a branch, // and skip any Phi handling inside the case label to make fall-through work as expected. // This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this // inside the case label if at all possible. for (size_t i = 1; backend.support_case_fallthrough && i < num_blocks; i++) { if (flush_phi_required(block.self, block_declaration_order[i]) && flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i])) { uint32_t target_block = block_declaration_order[i]; // Make sure we flush Phi, it might have been marked to be ignored earlier. get(target_block).ignore_phi_from_block = 0; auto &literals = case_constructs[target_block]; if (literals.empty()) { // Oh boy, gotta make a complete negative test instead! o.o // Find all possible literals that would *not* make us enter the default block. // If none of those literals match, we flush Phi ... SmallVector conditions; for (size_t j = 0; j < num_blocks; j++) { auto &negative_literals = case_constructs[block_declaration_order[j]]; for (auto &case_label : negative_literals) conditions.push_back(join(to_enclosed_expression(block.condition), " != ", to_case_label(case_label, type.width, unsigned_case))); } statement("if (", merge(conditions, " && "), ")"); begin_scope(); flush_phi(block.self, target_block); end_scope(); } else { SmallVector conditions; conditions.reserve(literals.size()); for (auto &case_label : literals) conditions.push_back(join(to_enclosed_expression(block.condition), " == ", to_case_label(case_label, type.width, unsigned_case))); statement("if (", merge(conditions, " || "), ")"); begin_scope(); flush_phi(block.self, target_block); end_scope(); } // Mark the block so that we don't flush Phi from header to case label. get(target_block).ignore_phi_from_block = block.self; } } // If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate // non-structured exits with the help of a switch block. // This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic. bool block_like_switch = cases.empty(); // If this is true, the switch is completely meaningless, and we should just avoid it. bool collapsed_switch = block_like_switch && block.default_block == block.next_block; if (!collapsed_switch) { if (block_like_switch || is_legacy_es()) { // ESSL 1.0 is not guaranteed to support do/while. if (is_legacy_es()) { uint32_t counter = statement_count; statement("for (int spvDummy", counter, " = 0; spvDummy", counter, " < 1; spvDummy", counter, "++)"); } else statement("do"); } else { emit_block_hints(block); statement("switch (", to_unpacked_expression(block.condition), ")"); } begin_scope(); } for (size_t i = 0; i < num_blocks; i++) { uint32_t target_block = block_declaration_order[i]; auto &literals = case_constructs[target_block]; if (literals.empty()) { // Default case. if (!block_like_switch) { if (is_legacy_es()) statement("else"); else statement("default:"); } } else { if (is_legacy_es()) { statement((i ? "else " : ""), "if (", to_legacy_case_label(block.condition, literals, label_suffix), ")"); } else { for (auto &case_literal : literals) { // The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here. statement("case ", to_case_label(case_literal, type.width, unsigned_case), label_suffix, ":"); } } } auto &case_block = get(target_block); if (backend.support_case_fallthrough && i + 1 < num_blocks && execution_is_direct_branch(case_block, get(block_declaration_order[i + 1]))) { // We will fall through here, so just terminate the block chain early. // We still need to deal with Phi potentially. // No need for a stack-like thing here since we only do fall-through when there is a // single trivial branch to fall-through target.. current_emitting_switch_fallthrough = true; } else current_emitting_switch_fallthrough = false; if (!block_like_switch) begin_scope(); branch(block.self, target_block); if (!block_like_switch) end_scope(); current_emitting_switch_fallthrough = false; } // Might still have to flush phi variables if we branch from loop header directly to merge target. // This is supposed to emit all cases where we branch from header to merge block directly. // There are two main scenarios where cannot rely on default fallthrough. // - There is an explicit default: label already. // In this case, literals_to_merge need to form their own "default" case, so that we avoid executing that block. // - Header -> Merge requires flushing PHI. In this case, we need to collect all cases and flush PHI there. bool header_merge_requires_phi = flush_phi_required(block.self, block.next_block); bool need_fallthrough_block = block.default_block == block.next_block || !literals_to_merge.empty(); if (!collapsed_switch && ((header_merge_requires_phi && need_fallthrough_block) || !literals_to_merge.empty())) { for (auto &case_literal : literals_to_merge) statement("case ", to_case_label(case_literal, type.width, unsigned_case), label_suffix, ":"); if (block.default_block == block.next_block) { if (is_legacy_es()) statement("else"); else statement("default:"); } begin_scope(); flush_phi(block.self, block.next_block); statement("break;"); end_scope(); } if (!collapsed_switch) { if (block_like_switch && !is_legacy_es()) end_scope_decl("while(false)"); else end_scope(); } else flush_phi(block.self, block.next_block); if (block.need_ladder_break) { statement("if (_", block.self, "_ladder_break)"); begin_scope(); statement("break;"); end_scope(); } current_emitting_switch_stack.pop_back(); break; } case SPIRBlock::Return: { for (auto &line : current_function->fixup_hooks_out) line(); if (processing_entry_point) emit_fixup(); auto &cfg = get_cfg_for_current_function(); if (block.return_value) { auto &type = expression_type(block.return_value); if (!type.array.empty() && !backend.can_return_array) { // If we cannot return arrays, we will have a special out argument we can write to instead. // The backend is responsible for setting this up, and redirection the return values as appropriate. if (ir.ids[block.return_value].get_type() != TypeUndef) { emit_array_copy("spvReturnValue", 0, block.return_value, StorageClassFunction, get_expression_effective_storage_class(block.return_value)); } if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) || block.loop_dominator != BlockID(SPIRBlock::NoDominator)) { statement("return;"); } } else { // OpReturnValue can return Undef, so don't emit anything for this case. if (ir.ids[block.return_value].get_type() != TypeUndef) statement("return ", to_unpacked_expression(block.return_value), ";"); } } else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) || block.loop_dominator != BlockID(SPIRBlock::NoDominator)) { // If this block is the very final block and not called from control flow, // we do not need an explicit return which looks out of place. Just end the function here. // In the very weird case of for(;;) { return; } executing return is unconditional, // but we actually need a return here ... statement("return;"); } break; } // If the Kill is terminating a block with a (probably synthetic) return value, emit a return value statement. case SPIRBlock::Kill: statement(backend.discard_literal, ";"); if (block.return_value) statement("return ", to_unpacked_expression(block.return_value), ";"); break; case SPIRBlock::Unreachable: { // Avoid emitting false fallthrough, which can happen for // if (cond) break; else discard; inside a case label. // Discard is not always implementable as a terminator. auto &cfg = get_cfg_for_current_function(); bool inner_dominator_is_switch = false; ID id = block.self; while (id) { auto &iter_block = get(id); if (iter_block.terminator == SPIRBlock::MultiSelect || iter_block.merge == SPIRBlock::MergeLoop) { ID next_block = iter_block.merge == SPIRBlock::MergeLoop ? iter_block.merge_block : iter_block.next_block; bool outside_construct = next_block && cfg.find_common_dominator(next_block, block.self) == next_block; if (!outside_construct) { inner_dominator_is_switch = iter_block.terminator == SPIRBlock::MultiSelect; break; } } if (cfg.get_preceding_edges(id).empty()) break; id = cfg.get_immediate_dominator(id); } if (inner_dominator_is_switch) statement("break; // unreachable workaround"); emit_next_block = false; break; } case SPIRBlock::IgnoreIntersection: statement("ignoreIntersectionEXT;"); break; case SPIRBlock::TerminateRay: statement("terminateRayEXT;"); break; case SPIRBlock::EmitMeshTasks: emit_mesh_tasks(block); break; default: SPIRV_CROSS_THROW("Unimplemented block terminator."); } if (block.next_block && emit_next_block) { // If we hit this case, we're dealing with an unconditional branch, which means we will output // that block after this. If we had selection merge, we already flushed phi variables. if (block.merge != SPIRBlock::MergeSelection) { flush_phi(block.self, block.next_block); // For a direct branch, need to remember to invalidate expressions in the next linear block instead. get(block.next_block).invalidate_expressions = block.invalidate_expressions; } // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi. if (!current_emitting_switch_fallthrough) { // For merge selects we might have ignored the fact that a merge target // could have been a break; or continue; // We will need to deal with it here. if (is_loop_break(block.next_block)) { // Cannot check for just break, because switch statements will also use break. assert(block.merge == SPIRBlock::MergeSelection); statement("break;"); } else if (is_continue(block.next_block)) { assert(block.merge == SPIRBlock::MergeSelection); branch_to_continue(block.self, block.next_block); } else if (BlockID(block.self) != block.next_block) emit_block_chain(get(block.next_block)); } } if (block.merge == SPIRBlock::MergeLoop) { if (continue_type == SPIRBlock::DoWhileLoop) { // Make sure that we run the continue block to get the expressions set, but this // should become an empty string. // We have no fallbacks if we cannot forward everything to temporaries ... const auto &continue_block = get(block.continue_block); bool positive_test = execution_is_noop(get(continue_block.true_block), get(continue_block.loop_dominator)); uint32_t current_count = statement_count; auto statements = emit_continue_block(block.continue_block, positive_test, !positive_test); if (statement_count != current_count) { // The DoWhile block has side effects, force ComplexLoop pattern next pass. get(block.continue_block).complex_continue = true; force_recompile(); } // Might have to invert the do-while test here. auto condition = to_expression(continue_block.condition); if (!positive_test) condition = join("!", enclose_expression(condition)); end_scope_decl(join("while (", condition, ")")); } else end_scope(); loop_level_saver.release(); // We cannot break out of two loops at once, so don't check for break; here. // Using block.self as the "from" block isn't quite right, but it has the same scope // and dominance structure, so it's fine. if (is_continue(block.merge_block)) branch_to_continue(block.self, block.merge_block); else emit_block_chain(get(block.merge_block)); } // Forget about control dependent expressions now. block.invalidate_expressions.clear(); // After we return, we must be out of scope, so if we somehow have to re-emit this function, // re-declare variables if necessary. assert(rearm_dominated_variables.size() == block.dominated_variables.size()); for (size_t i = 0; i < block.dominated_variables.size(); i++) { uint32_t var = block.dominated_variables[i]; get(var).deferred_declaration = rearm_dominated_variables[i]; } // Just like for deferred declaration, we need to forget about loop variable enable // if our block chain is reinstantiated later. for (auto &var_id : block.loop_variables) get(var_id).loop_variable_enable = false; } void CompilerGLSL::begin_scope() { statement("{"); indent++; } void CompilerGLSL::end_scope() { if (!indent) SPIRV_CROSS_THROW("Popping empty indent stack."); indent--; statement("}"); } void CompilerGLSL::end_scope(const string &trailer) { if (!indent) SPIRV_CROSS_THROW("Popping empty indent stack."); indent--; statement("}", trailer); } void CompilerGLSL::end_scope_decl() { if (!indent) SPIRV_CROSS_THROW("Popping empty indent stack."); indent--; statement("};"); } void CompilerGLSL::end_scope_decl(const string &decl) { if (!indent) SPIRV_CROSS_THROW("Popping empty indent stack."); indent--; statement("} ", decl, ";"); } void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length) { // If our variable is remapped, and we rely on type-remapping information as // well, then we cannot pass the variable as a function parameter. // Fixing this is non-trivial without stamping out variants of the same function, // so for now warn about this and suggest workarounds instead. for (uint32_t i = 0; i < length; i++) { auto *var = maybe_get(args[i]); if (!var || !var->remapped_variable) continue; auto &type = get(var->basetype); if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData) { SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. " "This will not work correctly because type-remapping information is lost. " "To workaround, please consider not passing the subpass input as a function parameter, " "or use in/out variables instead which do not need type remapping information."); } } } const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr) { // FIXME: This is kind of hacky. There should be a cleaner way. auto offset = uint32_t(&instr - current_emitting_block->ops.data()); if ((offset + 1) < current_emitting_block->ops.size()) return ¤t_emitting_block->ops[offset + 1]; else return nullptr; } uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics) { return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask | MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask); } bool CompilerGLSL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id, StorageClass, StorageClass) { string lhs; if (expr) lhs = expr; else lhs = to_expression(lhs_id); statement(lhs, " = ", to_expression(rhs_id), ";"); return true; } bool CompilerGLSL::unroll_array_to_complex_store(uint32_t target_id, uint32_t source_id) { if (!backend.force_gl_in_out_block) return false; // This path is only relevant for GL backends. auto *var = maybe_get(target_id); if (!var || var->storage != StorageClassOutput) return false; if (!is_builtin_variable(*var) || BuiltIn(get_decoration(var->self, DecorationBuiltIn)) != BuiltInSampleMask) return false; auto &type = expression_type(source_id); string array_expr; if (type.array_size_literal.back()) { array_expr = convert_to_string(type.array.back()); if (type.array.back() == 0) SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array."); } else array_expr = to_expression(type.array.back()); SPIRType target_type; target_type.basetype = SPIRType::Int; statement("for (int i = 0; i < int(", array_expr, "); i++)"); begin_scope(); statement(to_expression(target_id), "[i] = ", bitcast_expression(target_type, type.basetype, join(to_expression(source_id), "[i]")), ";"); end_scope(); return true; } void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr) { if (!backend.force_gl_in_out_block) return; // This path is only relevant for GL backends. auto *var = maybe_get(source_id); if (!var) return; if (var->storage != StorageClassInput && var->storage != StorageClassOutput) return; auto &type = get_variable_data_type(*var); if (type.array.empty()) return; auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn)); bool is_builtin = is_builtin_variable(*var) && (builtin == BuiltInPointSize || builtin == BuiltInPosition || builtin == BuiltInSampleMask); bool is_tess = is_tessellation_shader(); bool is_patch = has_decoration(var->self, DecorationPatch); bool is_sample_mask = is_builtin && builtin == BuiltInSampleMask; // Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it. // We must unroll the array load. // For builtins, we couldn't catch this case normally, // because this is resolved in the OpAccessChain in most cases. // If we load the entire array, we have no choice but to unroll here. if (!is_patch && (is_builtin || is_tess)) { auto new_expr = join("_", target_id, "_unrolled"); statement(variable_decl(type, new_expr, target_id), ";"); string array_expr; if (type.array_size_literal.back()) { array_expr = convert_to_string(type.array.back()); if (type.array.back() == 0) SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array."); } else array_expr = to_expression(type.array.back()); // The array size might be a specialization constant, so use a for-loop instead. statement("for (int i = 0; i < int(", array_expr, "); i++)"); begin_scope(); if (is_builtin && !is_sample_mask) statement(new_expr, "[i] = gl_in[i].", expr, ";"); else if (is_sample_mask) { SPIRType target_type; target_type.basetype = SPIRType::Int; statement(new_expr, "[i] = ", bitcast_expression(target_type, type.basetype, join(expr, "[i]")), ";"); } else statement(new_expr, "[i] = ", expr, "[i];"); end_scope(); expr = std::move(new_expr); } } void CompilerGLSL::cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) { // We will handle array cases elsewhere. if (!expr_type.array.empty()) return; auto *var = maybe_get_backing_variable(source_id); if (var) source_id = var->self; // Only interested in standalone builtin variables. if (!has_decoration(source_id, DecorationBuiltIn)) { // Except for int attributes in legacy GLSL, which are cast from float. if (is_legacy() && expr_type.basetype == SPIRType::Int && var && var->storage == StorageClassInput) expr = join(type_to_glsl(expr_type), "(", expr, ")"); return; } auto builtin = static_cast(get_decoration(source_id, DecorationBuiltIn)); auto expected_type = expr_type.basetype; // TODO: Fill in for more builtins. switch (builtin) { case BuiltInLayer: case BuiltInPrimitiveId: case BuiltInViewportIndex: case BuiltInInstanceId: case BuiltInInstanceIndex: case BuiltInVertexId: case BuiltInVertexIndex: case BuiltInSampleId: case BuiltInBaseVertex: case BuiltInBaseInstance: case BuiltInDrawIndex: case BuiltInFragStencilRefEXT: case BuiltInInstanceCustomIndexNV: case BuiltInSampleMask: case BuiltInPrimitiveShadingRateKHR: case BuiltInShadingRateKHR: expected_type = SPIRType::Int; break; case BuiltInGlobalInvocationId: case BuiltInLocalInvocationId: case BuiltInWorkgroupId: case BuiltInLocalInvocationIndex: case BuiltInWorkgroupSize: case BuiltInNumWorkgroups: case BuiltInIncomingRayFlagsNV: case BuiltInLaunchIdNV: case BuiltInLaunchSizeNV: case BuiltInPrimitiveTriangleIndicesEXT: case BuiltInPrimitiveLineIndicesEXT: case BuiltInPrimitivePointIndicesEXT: expected_type = SPIRType::UInt; break; default: break; } if (expected_type != expr_type.basetype) expr = bitcast_expression(expr_type, expected_type, expr); } void CompilerGLSL::cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) { auto *var = maybe_get_backing_variable(target_id); if (var) target_id = var->self; // Only interested in standalone builtin variables. if (!has_decoration(target_id, DecorationBuiltIn)) return; auto builtin = static_cast(get_decoration(target_id, DecorationBuiltIn)); auto expected_type = expr_type.basetype; // TODO: Fill in for more builtins. switch (builtin) { case BuiltInLayer: case BuiltInPrimitiveId: case BuiltInViewportIndex: case BuiltInFragStencilRefEXT: case BuiltInSampleMask: case BuiltInPrimitiveShadingRateKHR: case BuiltInShadingRateKHR: expected_type = SPIRType::Int; break; default: break; } if (expected_type != expr_type.basetype) { auto type = expr_type; type.basetype = expected_type; expr = bitcast_expression(type, expr_type.basetype, expr); } } void CompilerGLSL::convert_non_uniform_expression(string &expr, uint32_t ptr_id) { if (*backend.nonuniform_qualifier == '\0') return; auto *var = maybe_get_backing_variable(ptr_id); if (!var) return; if (var->storage != StorageClassUniformConstant && var->storage != StorageClassStorageBuffer && var->storage != StorageClassUniform) return; auto &backing_type = get(var->basetype); if (backing_type.array.empty()) return; // If we get here, we know we're accessing an arrayed resource which // might require nonuniform qualifier. auto start_array_index = expr.find_first_of('['); if (start_array_index == string::npos) return; // We've opened a bracket, track expressions until we can close the bracket. // This must be our resource index. size_t end_array_index = string::npos; unsigned bracket_count = 1; for (size_t index = start_array_index + 1; index < expr.size(); index++) { if (expr[index] == ']') { if (--bracket_count == 0) { end_array_index = index; break; } } else if (expr[index] == '[') bracket_count++; } assert(bracket_count == 0); // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's // nothing we can do here to express that. if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index) return; start_array_index++; expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(", expr.substr(start_array_index, end_array_index - start_array_index), ")", expr.substr(end_array_index, string::npos)); } void CompilerGLSL::emit_block_hints(const SPIRBlock &block) { if ((options.es && options.version < 310) || (!options.es && options.version < 140)) return; switch (block.hint) { case SPIRBlock::HintFlatten: require_extension_internal("GL_EXT_control_flow_attributes"); statement("SPIRV_CROSS_FLATTEN"); break; case SPIRBlock::HintDontFlatten: require_extension_internal("GL_EXT_control_flow_attributes"); statement("SPIRV_CROSS_BRANCH"); break; case SPIRBlock::HintUnroll: require_extension_internal("GL_EXT_control_flow_attributes"); statement("SPIRV_CROSS_UNROLL"); break; case SPIRBlock::HintDontUnroll: require_extension_internal("GL_EXT_control_flow_attributes"); statement("SPIRV_CROSS_LOOP"); break; default: break; } } void CompilerGLSL::preserve_alias_on_reset(uint32_t id) { preserved_aliases[id] = get_name(id); } void CompilerGLSL::reset_name_caches() { for (auto &preserved : preserved_aliases) set_name(preserved.first, preserved.second); preserved_aliases.clear(); resource_names.clear(); block_input_names.clear(); block_output_names.clear(); block_ubo_names.clear(); block_ssbo_names.clear(); block_names.clear(); function_overloads.clear(); } void CompilerGLSL::fixup_anonymous_struct_names(std::unordered_set &visited, const SPIRType &type) { if (visited.count(type.self)) return; visited.insert(type.self); for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) { auto &mbr_type = get(type.member_types[i]); if (mbr_type.basetype == SPIRType::Struct) { // If there are multiple aliases, the output might be somewhat unpredictable, // but the only real alternative in that case is to do nothing, which isn't any better. // This check should be fine in practice. if (get_name(mbr_type.self).empty() && !get_member_name(type.self, i).empty()) { auto anon_name = join("anon_", get_member_name(type.self, i)); ParsedIR::sanitize_underscores(anon_name); set_name(mbr_type.self, anon_name); } fixup_anonymous_struct_names(visited, mbr_type); } } } void CompilerGLSL::fixup_anonymous_struct_names() { // HLSL codegen can often end up emitting anonymous structs inside blocks, which // breaks GL linking since all names must match ... // Try to emit sensible code, so attempt to find such structs and emit anon_$member. // Breaks exponential explosion with weird type trees. std::unordered_set visited; ir.for_each_typed_id([&](uint32_t, SPIRType &type) { if (type.basetype == SPIRType::Struct && (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))) { fixup_anonymous_struct_names(visited, type); } }); } void CompilerGLSL::fixup_type_alias() { // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists. ir.for_each_typed_id([&](uint32_t self, SPIRType &type) { if (!type.type_alias) return; if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)) { // Top-level block types should never alias anything else. type.type_alias = 0; } else if (type_is_block_like(type) && type.self == ID(self)) { // A block-like type is any type which contains Offset decoration, but not top-level blocks, // i.e. blocks which are placed inside buffers. // Become the master. ir.for_each_typed_id([&](uint32_t other_id, SPIRType &other_type) { if (other_id == self) return; if (other_type.type_alias == type.type_alias) other_type.type_alias = self; }); this->get(type.type_alias).type_alias = self; type.type_alias = 0; } }); } void CompilerGLSL::reorder_type_alias() { // Reorder declaration of types so that the master of the type alias is always emitted first. // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here. auto loop_lock = ir.create_loop_hard_lock(); auto &type_ids = ir.ids_for_type[TypeType]; for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr) { auto &type = get(*alias_itr); if (type.type_alias != TypeID(0) && !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) { // We will skip declaring this type, so make sure the type_alias type comes before. auto master_itr = find(begin(type_ids), end(type_ids), ID(type.type_alias)); assert(master_itr != end(type_ids)); if (alias_itr < master_itr) { // Must also swap the type order for the constant-type joined array. auto &joined_types = ir.ids_for_constant_undef_or_type; auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr); auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr); assert(alt_alias_itr != end(joined_types)); assert(alt_master_itr != end(joined_types)); swap(*alias_itr, *master_itr); swap(*alt_alias_itr, *alt_master_itr); } } } } void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal) { // If we are redirecting statements, ignore the line directive. // Common case here is continue blocks. if (redirect_statement) return; // If we're emitting code in a sensitive context such as condition blocks in for loops, don't emit // any line directives, because it's not possible. if (block_debug_directives) return; if (options.emit_line_directives) { require_extension_internal("GL_GOOGLE_cpp_style_line_directive"); statement_no_indent("#line ", line_literal, " \"", get(file_id).str, "\""); } } void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id, SmallVector chain) { // Fully unroll all member/array indices one by one. auto &lhs_type = get(lhs_type_id); auto &rhs_type = get(rhs_type_id); if (!lhs_type.array.empty()) { // Could use a loop here to support specialization constants, but it gets rather complicated with nested array types, // and this is a rather obscure opcode anyways, keep it simple unless we are forced to. uint32_t array_size = to_array_size_literal(lhs_type); chain.push_back(0); for (uint32_t i = 0; i < array_size; i++) { chain.back() = i; emit_copy_logical_type(lhs_id, lhs_type.parent_type, rhs_id, rhs_type.parent_type, chain); } } else if (lhs_type.basetype == SPIRType::Struct) { chain.push_back(0); uint32_t member_count = uint32_t(lhs_type.member_types.size()); for (uint32_t i = 0; i < member_count; i++) { chain.back() = i; emit_copy_logical_type(lhs_id, lhs_type.member_types[i], rhs_id, rhs_type.member_types[i], chain); } } else { // Need to handle unpack/packing fixups since this can differ wildly between the logical types, // particularly in MSL. // To deal with this, we emit access chains and go through emit_store_statement // to deal with all the special cases we can encounter. AccessChainMeta lhs_meta, rhs_meta; auto lhs = access_chain_internal(lhs_id, chain.data(), uint32_t(chain.size()), ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &lhs_meta); auto rhs = access_chain_internal(rhs_id, chain.data(), uint32_t(chain.size()), ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &rhs_meta); uint32_t id = ir.increase_bound_by(2); lhs_id = id; rhs_id = id + 1; { auto &lhs_expr = set(lhs_id, std::move(lhs), lhs_type_id, true); lhs_expr.need_transpose = lhs_meta.need_transpose; if (lhs_meta.storage_is_packed) set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypePacked); if (lhs_meta.storage_physical_type != 0) set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypeID, lhs_meta.storage_physical_type); forwarded_temporaries.insert(lhs_id); suppressed_usage_tracking.insert(lhs_id); } { auto &rhs_expr = set(rhs_id, std::move(rhs), rhs_type_id, true); rhs_expr.need_transpose = rhs_meta.need_transpose; if (rhs_meta.storage_is_packed) set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypePacked); if (rhs_meta.storage_physical_type != 0) set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypeID, rhs_meta.storage_physical_type); forwarded_temporaries.insert(rhs_id); suppressed_usage_tracking.insert(rhs_id); } emit_store_statement(lhs_id, rhs_id); } } bool CompilerGLSL::subpass_input_is_framebuffer_fetch(uint32_t id) const { if (!has_decoration(id, DecorationInputAttachmentIndex)) return false; uint32_t input_attachment_index = get_decoration(id, DecorationInputAttachmentIndex); for (auto &remap : subpass_to_framebuffer_fetch_attachment) if (remap.first == input_attachment_index) return true; return false; } const SPIRVariable *CompilerGLSL::find_subpass_input_by_attachment_index(uint32_t index) const { const SPIRVariable *ret = nullptr; ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { if (has_decoration(var.self, DecorationInputAttachmentIndex) && get_decoration(var.self, DecorationInputAttachmentIndex) == index) { ret = &var; } }); return ret; } const SPIRVariable *CompilerGLSL::find_color_output_by_location(uint32_t location) const { const SPIRVariable *ret = nullptr; ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { if (var.storage == StorageClassOutput && get_decoration(var.self, DecorationLocation) == location) ret = &var; }); return ret; } void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs() { for (auto &remap : subpass_to_framebuffer_fetch_attachment) { auto *subpass_var = find_subpass_input_by_attachment_index(remap.first); auto *output_var = find_color_output_by_location(remap.second); if (!subpass_var) continue; if (!output_var) SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able " "to read from it."); if (is_array(get(output_var->basetype))) SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs."); auto &func = get(get_entry_point().self); func.fixup_hooks_in.push_back([=]() { if (is_legacy()) { statement(to_expression(subpass_var->self), " = ", "gl_LastFragData[", get_decoration(output_var->self, DecorationLocation), "];"); } else { uint32_t num_rt_components = this->get(output_var->basetype).vecsize; statement(to_expression(subpass_var->self), vector_swizzle(num_rt_components, 0), " = ", to_expression(output_var->self), ";"); } }); } } bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const { return is_depth_image(get(get(id).basetype), id); } const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c) { static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot", "GL_KHR_shader_subgroup_basic", "GL_KHR_shader_subgroup_vote", "GL_KHR_shader_subgroup_arithmetic", "GL_NV_gpu_shader_5", "GL_NV_shader_thread_group", "GL_NV_shader_thread_shuffle", "GL_ARB_shader_ballot", "GL_ARB_shader_group_vote", "GL_AMD_gcn_shader" }; return retval[c]; } SmallVector CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c) { switch (c) { case ARB_shader_ballot: return { "GL_ARB_shader_int64" }; case AMD_gcn_shader: return { "GL_AMD_gpu_shader_int64", "GL_NV_gpu_shader5" }; default: return {}; } } const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c) { switch (c) { case ARB_shader_ballot: return "defined(GL_ARB_shader_int64)"; case AMD_gcn_shader: return "(defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))"; default: return ""; } } CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSubgroupSupportHelper:: get_feature_dependencies(Feature feature) { switch (feature) { case SubgroupAllEqualT: return { SubgroupBroadcast_First, SubgroupAll_Any_AllEqualBool }; case SubgroupElect: return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID }; case SubgroupInverseBallot_InclBitCount_ExclBitCout: return { SubgroupMask }; case SubgroupBallotBitCount: return { SubgroupBallot }; case SubgroupArithmeticIAddReduce: case SubgroupArithmeticIAddInclusiveScan: case SubgroupArithmeticFAddReduce: case SubgroupArithmeticFAddInclusiveScan: case SubgroupArithmeticIMulReduce: case SubgroupArithmeticIMulInclusiveScan: case SubgroupArithmeticFMulReduce: case SubgroupArithmeticFMulInclusiveScan: return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount, SubgroupMask, SubgroupBallotBitExtract }; case SubgroupArithmeticIAddExclusiveScan: case SubgroupArithmeticFAddExclusiveScan: case SubgroupArithmeticIMulExclusiveScan: case SubgroupArithmeticFMulExclusiveScan: return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount, SubgroupMask, SubgroupElect, SubgroupBallotBitExtract }; default: return {}; } } CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper:: get_feature_dependency_mask(Feature feature) { return build_mask(get_feature_dependencies(feature)); } bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature) { static const bool retval[FeatureCount] = { false, false, false, false, false, false, true, // SubgroupBalloFindLSB_MSB false, false, false, false, true, // SubgroupMemBarrier - replaced with workgroup memory barriers false, false, true, false, false, false, false, false, false, false, // iadd, fadd false, false, false, false, false, false, // imul , fmul }; return retval[feature]; } CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgroupSupportHelper:: get_KHR_extension_for_feature(Feature feature) { static const Candidate extensions[FeatureCount] = { KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote, KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, }; return extensions[feature]; } void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature) { feature_mask |= (FeatureMask(1) << feature) | get_feature_dependency_mask(feature); } bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const { return (feature_mask & (1u << feature)) != 0; } CompilerGLSL::ShaderSubgroupSupportHelper::Result CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const { Result res; for (uint32_t i = 0u; i < FeatureCount; ++i) { if (feature_mask & (1u << i)) { auto feature = static_cast(i); std::unordered_set unique_candidates; auto candidates = get_candidates_for_feature(feature); unique_candidates.insert(candidates.begin(), candidates.end()); auto deps = get_feature_dependencies(feature); for (Feature d : deps) { candidates = get_candidates_for_feature(d); if (!candidates.empty()) unique_candidates.insert(candidates.begin(), candidates.end()); } for (uint32_t c : unique_candidates) ++res.weights[static_cast(c)]; } } return res; } CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper:: get_candidates_for_feature(Feature ft, const Result &r) { auto c = get_candidates_for_feature(ft); auto cmp = [&r](Candidate a, Candidate b) { if (r.weights[a] == r.weights[b]) return a < b; // Prefer candidates with lower enum value return r.weights[a] > r.weights[b]; }; std::sort(c.begin(), c.end(), cmp); return c; } CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper:: get_candidates_for_feature(Feature feature) { switch (feature) { case SubgroupMask: return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot }; case SubgroupSize: return { KHR_shader_subgroup_basic, NV_shader_thread_group, AMD_gcn_shader, ARB_shader_ballot }; case SubgroupInvocationID: return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot }; case SubgroupID: return { KHR_shader_subgroup_basic, NV_shader_thread_group }; case NumSubgroups: return { KHR_shader_subgroup_basic, NV_shader_thread_group }; case SubgroupBroadcast_First: return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot }; case SubgroupBallotFindLSB_MSB: return { KHR_shader_subgroup_ballot, NV_shader_thread_group }; case SubgroupAll_Any_AllEqualBool: return { KHR_shader_subgroup_vote, NV_gpu_shader_5, ARB_shader_group_vote, AMD_gcn_shader }; case SubgroupAllEqualT: return {}; // depends on other features only case SubgroupElect: return {}; // depends on other features only case SubgroupBallot: return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot }; case SubgroupBarrier: return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot, AMD_gcn_shader }; case SubgroupMemBarrier: return { KHR_shader_subgroup_basic }; case SubgroupInverseBallot_InclBitCount_ExclBitCout: return {}; case SubgroupBallotBitExtract: return { NV_shader_thread_group }; case SubgroupBallotBitCount: return {}; case SubgroupArithmeticIAddReduce: case SubgroupArithmeticIAddExclusiveScan: case SubgroupArithmeticIAddInclusiveScan: case SubgroupArithmeticFAddReduce: case SubgroupArithmeticFAddExclusiveScan: case SubgroupArithmeticFAddInclusiveScan: case SubgroupArithmeticIMulReduce: case SubgroupArithmeticIMulExclusiveScan: case SubgroupArithmeticIMulInclusiveScan: case SubgroupArithmeticFMulReduce: case SubgroupArithmeticFMulExclusiveScan: case SubgroupArithmeticFMulInclusiveScan: return { KHR_shader_subgroup_arithmetic, NV_shader_thread_shuffle }; default: return {}; } } CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask( const SmallVector &features) { FeatureMask mask = 0; for (Feature f : features) mask |= FeatureMask(1) << f; return mask; } CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result() { for (auto &weight : weights) weight = 0; // Make sure KHR_shader_subgroup extensions are always prefered. const uint32_t big_num = FeatureCount; weights[KHR_shader_subgroup_ballot] = big_num; weights[KHR_shader_subgroup_basic] = big_num; weights[KHR_shader_subgroup_vote] = big_num; weights[KHR_shader_subgroup_arithmetic] = big_num; } void CompilerGLSL::request_workaround_wrapper_overload(TypeID id) { // Must be ordered to maintain deterministic output, so vector is appropriate. if (find(begin(workaround_ubo_load_overload_types), end(workaround_ubo_load_overload_types), id) == end(workaround_ubo_load_overload_types)) { force_recompile(); workaround_ubo_load_overload_types.push_back(id); } } void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr) { // Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic. // To load these types correctly, we must first wrap them in a dummy function which only purpose is to // ensure row_major decoration is actually respected. auto *var = maybe_get_backing_variable(ptr); if (!var) return; auto &backing_type = get(var->basetype); bool is_ubo = backing_type.basetype == SPIRType::Struct && backing_type.storage == StorageClassUniform && has_decoration(backing_type.self, DecorationBlock); if (!is_ubo) return; auto *type = &get(loaded_type); bool rewrite = false; bool relaxed = options.es; if (is_matrix(*type)) { // To avoid adding a lot of unnecessary meta tracking to forward the row_major state, // we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state. // If there is any row-major action going on, we apply the workaround. // It is harmless to apply the workaround to column-major matrices, so this is still a valid solution. // If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround. type = &backing_type; } else { // If we're loading a composite, we don't have overloads like these. relaxed = false; } if (type->basetype == SPIRType::Struct) { // If we're loading a struct where any member is a row-major matrix, apply the workaround. for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++) { auto decorations = combined_decoration_for_member(*type, i); if (decorations.get(DecorationRowMajor)) rewrite = true; // Since we decide on a per-struct basis, only use mediump wrapper if all candidates are mediump. if (!decorations.get(DecorationRelaxedPrecision)) relaxed = false; } } if (rewrite) { request_workaround_wrapper_overload(loaded_type); expr = join("spvWorkaroundRowMajor", (relaxed ? "MP" : ""), "(", expr, ")"); } } void CompilerGLSL::mask_stage_output_by_location(uint32_t location, uint32_t component) { masked_output_locations.insert({ location, component }); } void CompilerGLSL::mask_stage_output_by_builtin(BuiltIn builtin) { masked_output_builtins.insert(builtin); } bool CompilerGLSL::is_stage_output_variable_masked(const SPIRVariable &var) const { auto &type = get(var.basetype); bool is_block = has_decoration(type.self, DecorationBlock); // Blocks by themselves are never masked. Must be masked per-member. if (is_block) return false; bool is_builtin = has_decoration(var.self, DecorationBuiltIn); if (is_builtin) { return is_stage_output_builtin_masked(BuiltIn(get_decoration(var.self, DecorationBuiltIn))); } else { if (!has_decoration(var.self, DecorationLocation)) return false; return is_stage_output_location_masked( get_decoration(var.self, DecorationLocation), get_decoration(var.self, DecorationComponent)); } } bool CompilerGLSL::is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const { auto &type = get(var.basetype); bool is_block = has_decoration(type.self, DecorationBlock); if (!is_block) return false; BuiltIn builtin = BuiltInMax; if (is_member_builtin(type, index, &builtin)) { return is_stage_output_builtin_masked(builtin); } else { uint32_t location = get_declared_member_location(var, index, strip_array); uint32_t component = get_member_decoration(type.self, index, DecorationComponent); return is_stage_output_location_masked(location, component); } } bool CompilerGLSL::is_per_primitive_variable(const SPIRVariable &var) const { if (has_decoration(var.self, DecorationPerPrimitiveEXT)) return true; auto &type = get(var.basetype); if (!has_decoration(type.self, DecorationBlock)) return false; for (uint32_t i = 0, n = uint32_t(type.member_types.size()); i < n; i++) if (!has_member_decoration(type.self, i, DecorationPerPrimitiveEXT)) return false; return true; } bool CompilerGLSL::is_stage_output_location_masked(uint32_t location, uint32_t component) const { return masked_output_locations.count({ location, component }) != 0; } bool CompilerGLSL::is_stage_output_builtin_masked(spv::BuiltIn builtin) const { return masked_output_builtins.count(builtin) != 0; } uint32_t CompilerGLSL::get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const { auto &block_type = get(var.basetype); if (has_member_decoration(block_type.self, mbr_idx, DecorationLocation)) return get_member_decoration(block_type.self, mbr_idx, DecorationLocation); else return get_accumulated_member_location(var, mbr_idx, strip_array); } uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const { auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); uint32_t location = get_decoration(var.self, DecorationLocation); for (uint32_t i = 0; i < mbr_idx; i++) { auto &mbr_type = get(type.member_types[i]); // Start counting from any place we have a new location decoration. if (has_member_decoration(type.self, mbr_idx, DecorationLocation)) location = get_member_decoration(type.self, mbr_idx, DecorationLocation); uint32_t location_count = type_to_location_count(mbr_type); location += location_count; } return location; } StorageClass CompilerGLSL::get_expression_effective_storage_class(uint32_t ptr) { auto *var = maybe_get_backing_variable(ptr); // If the expression has been lowered to a temporary, we need to use the Generic storage class. // We're looking for the effective storage class of a given expression. // An access chain or forwarded OpLoads from such access chains // will generally have the storage class of the underlying variable, but if the load was not forwarded // we have lost any address space qualifiers. bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get(ptr).access_chain && (forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0); if (var && !forced_temporary) { if (variable_decl_is_remapped_storage(*var, StorageClassWorkgroup)) return StorageClassWorkgroup; if (variable_decl_is_remapped_storage(*var, StorageClassStorageBuffer)) return StorageClassStorageBuffer; // Normalize SSBOs to StorageBuffer here. if (var->storage == StorageClassUniform && has_decoration(get(var->basetype).self, DecorationBufferBlock)) return StorageClassStorageBuffer; else return var->storage; } else return expression_type(ptr).storage; } uint32_t CompilerGLSL::type_to_location_count(const SPIRType &type) const { uint32_t count; if (type.basetype == SPIRType::Struct) { uint32_t mbr_count = uint32_t(type.member_types.size()); count = 0; for (uint32_t i = 0; i < mbr_count; i++) count += type_to_location_count(get(type.member_types[i])); } else { count = type.columns > 1 ? type.columns : 1; } uint32_t dim_count = uint32_t(type.array.size()); for (uint32_t i = 0; i < dim_count; i++) count *= to_array_size_literal(type, i); return count; }