From 6db69990da9f232e6d982cdcb69c2e27d93075cf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Feb 2021 18:10:18 -0300 Subject: spirv: Add lower fp16 to fp32 pass --- .../backend/spirv/emit_spirv_convert.cpp | 89 ++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp new file mode 100644 index 000000000..76ccaffce --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -0,0 +1,89 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +Id EmitConvertS16F16(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); +} + +Id EmitConvertS16F32(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); +} + +Id EmitConvertS16F64(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); +} + +Id EmitConvertS32F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U32[1], value); +} + +Id EmitConvertS32F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U32[1], value); +} + +Id EmitConvertS32F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U32[1], value); +} + +Id EmitConvertS64F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U64, value); +} + +Id EmitConvertS64F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U64, value); +} + +Id EmitConvertS64F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U64, value); +} + +Id EmitConvertU16F16(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); +} + +Id EmitConvertU16F32(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); +} + +Id EmitConvertU16F64(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); +} + +Id EmitConvertU32F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U32[1], value); +} + +Id EmitConvertU32F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U32[1], value); +} + +Id EmitConvertU32F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U32[1], value); +} + +Id EmitConvertU64F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U64, value); +} + +Id EmitConvertU64F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U64, value); +} + +Id EmitConvertU64F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U64, value); +} + +Id EmitConvertU64U32(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U64, value); +} + +Id EmitConvertU32U64(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], value); +} + +} // namespace Shader::Backend::SPIRV -- cgit v1.2.3 From 4006929c986a2e0e52429fe21201a7ad5ca3fea9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 3 Mar 2021 03:07:19 -0300 Subject: shader: Implement HADD2 --- src/shader_recompiler/CMakeLists.txt | 1 + src/shader_recompiler/backend/spirv/emit_spirv.h | 28 +++- .../backend/spirv/emit_spirv_composite.cpp | 72 ++++++-- .../backend/spirv/emit_spirv_convert.cpp | 16 ++ src/shader_recompiler/frontend/ir/ir_emitter.cpp | 90 +++++++++- src/shader_recompiler/frontend/ir/ir_emitter.h | 4 +- src/shader_recompiler/frontend/ir/opcodes.inc | 17 +- .../translate/impl/half_floating_point_add.cpp | 184 +++++++++++++++++++++ .../maxwell/translate/impl/load_store_memory.cpp | 2 +- .../maxwell/translate/impl/not_implemented.cpp | 16 -- .../global_memory_to_storage_buffer_pass.cpp | 2 +- .../ir_opt/lower_fp16_to_fp32.cpp | 10 ++ 12 files changed, 400 insertions(+), 42 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 6506413a8..cb73e03fb 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -71,6 +71,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/floating_point_multi_function.cpp frontend/maxwell/translate/impl/floating_point_multiply.cpp frontend/maxwell/translate/impl/floating_point_range_reduction.cpp + frontend/maxwell/translate/impl/half_floating_point_add.cpp frontend/maxwell/translate/impl/impl.cpp frontend/maxwell/translate/impl/impl.h frontend/maxwell/translate/impl/integer_add.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 5446d6ab6..bed43c094 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -90,24 +90,36 @@ Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); -void EmitCompositeConstructF16x2(EmitContext& ctx); -void EmitCompositeConstructF16x3(EmitContext& ctx); -void EmitCompositeConstructF16x4(EmitContext& ctx); +Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); -void EmitCompositeConstructF32x2(EmitContext& ctx); -void EmitCompositeConstructF32x3(EmitContext& ctx); -void EmitCompositeConstructF32x4(EmitContext& ctx); +Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); void EmitCompositeConstructF64x2(EmitContext& ctx); void EmitCompositeConstructF64x3(EmitContext& ctx); void EmitCompositeConstructF64x4(EmitContext& ctx); void EmitCompositeExtractF64x2(EmitContext& ctx); void EmitCompositeExtractF64x3(EmitContext& ctx); void EmitCompositeExtractF64x4(EmitContext& ctx); +Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); @@ -270,5 +282,9 @@ Id EmitConvertU64F32(EmitContext& ctx, Id value); Id EmitConvertU64F64(EmitContext& ctx, Id value); Id EmitConvertU64U32(EmitContext& ctx, Id value); Id EmitConvertU32U64(EmitContext& ctx, Id value); +Id EmitConvertF16F32(EmitContext& ctx, Id value); +Id EmitConvertF32F16(EmitContext& ctx, Id value); +Id EmitConvertF32F64(EmitContext& ctx, Id value); +Id EmitConvertF64F32(EmitContext& ctx, Id value); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index c950854a0..616e63676 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -30,16 +30,28 @@ Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index) { return ctx.OpCompositeExtract(ctx.U32[1], composite, index); } -void EmitCompositeConstructF16x2(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.U32[2], object, composite, index); } -void EmitCompositeConstructF16x3(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.U32[3], object, composite, index); } -void EmitCompositeConstructF16x4(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index); +} + +Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) { + return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2); +} + +Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3) { + return ctx.OpCompositeConstruct(ctx.F16[3], e1, e2, e3); +} + +Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { + return ctx.OpCompositeConstruct(ctx.F16[4], e1, e2, e3, e4); } Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) { @@ -54,16 +66,28 @@ Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) { return ctx.OpCompositeExtract(ctx.F16[1], composite, index); } -void EmitCompositeConstructF32x2(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F16[2], object, composite, index); } -void EmitCompositeConstructF32x3(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F16[3], object, composite, index); } -void EmitCompositeConstructF32x4(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); +} + +Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) { + return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2); +} + +Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { + return ctx.OpCompositeConstruct(ctx.F32[3], e1, e2, e3); +} + +Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { + return ctx.OpCompositeConstruct(ctx.F32[4], e1, e2, e3, e4); } Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) { @@ -78,6 +102,18 @@ Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) { return ctx.OpCompositeExtract(ctx.F32[1], composite, index); } +Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F32[2], object, composite, index); +} + +Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F32[3], object, composite, index); +} + +Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index); +} + void EmitCompositeConstructF64x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } @@ -102,4 +138,16 @@ void EmitCompositeExtractF64x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } +Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F64[2], object, composite, index); +} + +Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F64[3], object, composite, index); +} + +Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp index 76ccaffce..edcc2a1cc 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -86,4 +86,20 @@ Id EmitConvertU32U64(EmitContext& ctx, Id value) { return ctx.OpUConvert(ctx.U32[1], value); } +Id EmitConvertF16F32(EmitContext& ctx, Id value) { + return ctx.OpFConvert(ctx.F16[1], value); +} + +Id EmitConvertF32F16(EmitContext& ctx, Id value) { + return ctx.OpFConvert(ctx.F32[1], value); +} + +Id EmitConvertF32F64(EmitContext& ctx, Id value) { + return ctx.OpFConvert(ctx.F32[1], value); +} + +Id EmitConvertF64F32(EmitContext& ctx, Id value) { + return ctx.OpFConvert(ctx.F64[1], value); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 0f1cab57a..186920d8f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -334,12 +334,12 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu } Value IREmitter::CompositeExtract(const Value& vector, size_t element) { - const auto read = [&](Opcode opcode, size_t limit) -> Value { + const auto read{[&](Opcode opcode, size_t limit) -> Value { if (element >= limit) { throw InvalidArgument("Out of bounds element {}", element); } return Inst(opcode, vector, Value{static_cast(element)}); - }; + }}; switch (vector.Type()) { case Type::U32x2: return read(Opcode::CompositeExtractU32x2, 2); @@ -370,6 +370,43 @@ Value IREmitter::CompositeExtract(const Value& vector, size_t element) { } } +Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_t element) { + const auto insert{[&](Opcode opcode, size_t limit) { + if (element >= limit) { + throw InvalidArgument("Out of bounds element {}", element); + } + return Inst(opcode, vector, object, Value{static_cast(element)}); + }}; + switch (vector.Type()) { + case Type::U32x2: + return insert(Opcode::CompositeInsertU32x2, 2); + case Type::U32x3: + return insert(Opcode::CompositeInsertU32x3, 3); + case Type::U32x4: + return insert(Opcode::CompositeInsertU32x4, 4); + case Type::F16x2: + return insert(Opcode::CompositeInsertF16x2, 2); + case Type::F16x3: + return insert(Opcode::CompositeInsertF16x3, 3); + case Type::F16x4: + return insert(Opcode::CompositeInsertF16x4, 4); + case Type::F32x2: + return insert(Opcode::CompositeInsertF32x2, 2); + case Type::F32x3: + return insert(Opcode::CompositeInsertF32x3, 3); + case Type::F32x4: + return insert(Opcode::CompositeInsertF32x4, 4); + case Type::F64x2: + return insert(Opcode::CompositeInsertF64x2, 2); + case Type::F64x3: + return insert(Opcode::CompositeInsertF64x3, 3); + case Type::F64x4: + return insert(Opcode::CompositeInsertF64x4, 4); + default: + ThrowInvalidType(vector.Type()); + } +} + Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { if (true_value.Type() != false_value.Type()) { throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); @@ -433,7 +470,7 @@ U32 IREmitter::PackFloat2x16(const Value& vector) { } Value IREmitter::UnpackFloat2x16(const U32& value) { - return Inst(Opcode::UnpackFloat2x16, value); + return Inst(Opcode::UnpackFloat2x16, value); } F64 IREmitter::PackDouble2x32(const Value& vector) { @@ -968,7 +1005,7 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v } } -U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { +U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { switch (result_bitsize) { case 32: switch (value.Type()) { @@ -995,4 +1032,49 @@ U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); } +F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { + switch (result_bitsize) { + case 16: + switch (value.Type()) { + case Type::F16: + // Nothing to do + return value; + case Type::F32: + return Inst(Opcode::ConvertF16F32, value); + case Type::F64: + throw LogicError("Illegal conversion from F64 to F16"); + default: + break; + } + break; + case 32: + switch (value.Type()) { + case Type::F16: + return Inst(Opcode::ConvertF32F16, value); + case Type::F32: + // Nothing to do + return value; + case Type::F64: + return Inst(Opcode::ConvertF32F64, value); + default: + break; + } + break; + case 64: + switch (value.Type()) { + case Type::F16: + throw LogicError("Illegal conversion from F16 to F64"); + case Type::F32: + // Nothing to do + return value; + case Type::F64: + return Inst(Opcode::ConvertF32F64, value); + default: + break; + } + break; + } + throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); +} + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 03a67985f..5beb99895 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -97,6 +97,7 @@ public: [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, const Value& e4); [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); + [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element); [[nodiscard]] Value Select(const U1& condition, const Value& true_value, const Value& false_value); @@ -186,7 +187,8 @@ public: [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); - [[nodiscard]] U32U64 ConvertU(size_t result_bitsize, const U32U64& value); + [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); + [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index aedbc5c3e..acfc0a829 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -83,24 +83,36 @@ OPCODE(CompositeConstructU32x4, U32x4, U32, OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) +OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) +OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) +OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) +OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) +OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) +OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) +OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) +OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) +OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) +OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) +OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) +OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) // Select operations OPCODE(SelectU8, U8, U1, U8, U8, ) @@ -277,6 +289,9 @@ OPCODE(ConvertU32F64, U32, F64, OPCODE(ConvertU64F16, U64, F16, ) OPCODE(ConvertU64F32, U64, F32, ) OPCODE(ConvertU64F64, U64, F64, ) - OPCODE(ConvertU64U32, U64, U32, ) OPCODE(ConvertU32U64, U32, U64, ) +OPCODE(ConvertF16F32, F16, F32, ) +OPCODE(ConvertF32F16, F32, F16, ) +OPCODE(ConvertF32F64, F32, F64, ) +OPCODE(ConvertF64F32, F64, F32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp new file mode 100644 index 000000000..6965adfb3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -0,0 +1,184 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Merge : u64 { + H1_H0, + F32, + MRG_H0, + MRG_H1, +}; + +enum class Swizzle : u64 { + H1_H0, + F32, + H0_H0, + H1_H1, +}; + +std::pair Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { + switch (swizzle) { + case Swizzle::H1_H0: { + const IR::Value vector{ir.UnpackFloat2x16(value)}; + return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}}; + } + case Swizzle::H0_H0: { + const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)}; + return {scalar, scalar}; + } + case Swizzle::H1_H1: { + const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)}; + return {scalar, scalar}; + } + case Swizzle::F32: { + const IR::F32 scalar{ir.BitCast(value)}; + return {scalar, scalar}; + } + } + throw InvalidArgument("Invalid swizzle {}", swizzle); +} + +IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, + Merge merge) { + switch (merge) { + case Merge::H1_H0: + return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs)); + case Merge::F32: + return ir.BitCast(ir.FPConvert(32, lhs)); + case Merge::MRG_H0: + case Merge::MRG_H1: { + const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; + const bool h0{merge == Merge::MRG_H0}; + const IR::F16& insert{h0 ? lhs : rhs}; + return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, h0 ? 0 : 1)); + } + } + throw InvalidArgument("Invalid merge {}", merge); +} + +void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, + Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const hadd2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + const bool promotion{lhs_a.Type() != lhs_b.Type()}; + if (promotion) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + } + lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); + rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); + + lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); + + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)}; + IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)}; + if (sat) { + lhs = v.ir.FPSaturate(lhs); + rhs = v.ir.FPSaturate(rhs); + } + if (promotion) { + lhs = v.ir.FPConvert(16, lhs); + rhs = v.ir.FPConvert(16, rhs); + } + v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge)); +} +} // Anonymous namespace + +void TranslatorVisitor::HADD2_reg(u64 insn) { + union { + u64 raw; + BitField<49, 2, Merge> merge; + BitField<39, 1, u64> ftz; + BitField<32, 1, u64> sat; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + BitField<47, 2, Swizzle> swizzle_a; + BitField<31, 1, u64> neg_b; + BitField<30, 1, u64> abs_b; + BitField<28, 2, Swizzle> swizzle_b; + } const hadd2{insn}; + + HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, + hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b, + GetReg20(insn)); +} + +void TranslatorVisitor::HADD2_cbuf(u64 insn) { + union { + u64 raw; + BitField<49, 2, Merge> merge; + BitField<39, 1, u64> ftz; + BitField<52, 1, u64> sat; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + BitField<47, 2, Swizzle> swizzle_a; + BitField<56, 1, u64> neg_b; + BitField<54, 1, u64> abs_b; + } const hadd2{insn}; + + HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, + hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32, + GetCbuf(insn)); +} + +void TranslatorVisitor::HADD2_imm(u64 insn) { + union { + u64 raw; + BitField<49, 2, Merge> merge; + BitField<39, 1, u64> ftz; + BitField<52, 1, u64> sat; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + BitField<47, 2, Swizzle> swizzle_a; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + } const hadd2{insn}; + + const u32 imm{static_cast(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)}; + HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, + hadd2.neg_a != 0, hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); +} + +void TranslatorVisitor::HADD2_32I(u64 insn) { + union { + u64 raw; + BitField<55, 1, u64> ftz; + BitField<52, 1, u64> sat; + BitField<56, 1, u64> neg_a; + BitField<53, 2, Swizzle> swizzle_a; + BitField<20, 32, u64> imm32; + } const hadd2{insn}; + + const u32 imm{static_cast(hadd2.imm32)}; + HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0, + hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index 727524284..748b856c9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -59,7 +59,7 @@ IR::U64 Address(TranslatorVisitor& v, u64 insn) { const IR::U64 address{[&]() -> IR::U64 { if (mem.e == 0) { // LDG/STG without .E uses a 32-bit pointer, zero-extend it - return v.ir.ConvertU(64, v.X(mem.addr_reg)); + return v.ir.UConvert(64, v.X(mem.addr_reg)); } if (!IR::IsAligned(mem.addr_reg, 2)) { throw NotImplementedException("Unaligned address register"); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a0535f1c2..c24f29ff7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -265,22 +265,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) { ThrowNotImplemented(Opcode::GETLMEMBASE); } -void TranslatorVisitor::HADD2_reg(u64) { - ThrowNotImplemented(Opcode::HADD2_reg); -} - -void TranslatorVisitor::HADD2_cbuf(u64) { - ThrowNotImplemented(Opcode::HADD2_cbuf); -} - -void TranslatorVisitor::HADD2_imm(u64) { - ThrowNotImplemented(Opcode::HADD2_imm); -} - -void TranslatorVisitor::HADD2_32I(u64) { - ThrowNotImplemented(Opcode::HADD2_32I); -} - void TranslatorVisitor::HFMA2_reg(u64) { ThrowNotImplemented(Opcode::HFMA2_reg); } diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 98e3dfef7..965e52135 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -298,7 +298,7 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset)); } } else { - offset = ir.ConvertU(32, IR::U64{inst.Arg(0)}); + offset = ir.UConvert(32, IR::U64{inst.Arg(0)}); } // Subtract the least significant 32 bits from the guest offset. The result is the storage // buffer offset in bytes. diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index c7032f168..14a5cb50f 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -44,6 +44,12 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::CompositeExtractF32x3; case IR::Opcode::CompositeExtractF16x4: return IR::Opcode::CompositeExtractF32x4; + case IR::Opcode::CompositeInsertF16x2: + return IR::Opcode::CompositeInsertF32x2; + case IR::Opcode::CompositeInsertF16x3: + return IR::Opcode::CompositeInsertF32x3; + case IR::Opcode::CompositeInsertF16x4: + return IR::Opcode::CompositeInsertF32x4; case IR::Opcode::ConvertS16F16: return IR::Opcode::ConvertS16F32; case IR::Opcode::ConvertS32F16: @@ -60,6 +66,10 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::PackHalf2x16; case IR::Opcode::UnpackFloat2x16: return IR::Opcode::UnpackHalf2x16; + case IR::Opcode::ConvertF32F16: + return IR::Opcode::Identity; + case IR::Opcode::ConvertF16F32: + return IR::Opcode::Identity; default: return op; } -- cgit v1.2.3 From ab463712474de5f99eec137a9c6233e55fe184f0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Mar 2021 18:31:53 -0300 Subject: shader: Initial support for textures and TEX --- src/shader_recompiler/CMakeLists.txt | 3 + .../backend/spirv/emit_context.cpp | 69 ++- src/shader_recompiler/backend/spirv/emit_context.h | 7 + src/shader_recompiler/backend/spirv/emit_spirv.cpp | 12 + src/shader_recompiler/backend/spirv/emit_spirv.h | 32 +- .../backend/spirv/emit_spirv_convert.cpp | 48 ++ .../backend/spirv/emit_spirv_image.cpp | 146 ++++++ .../backend/spirv/emit_spirv_memory.cpp | 18 +- src/shader_recompiler/environment.h | 2 + src/shader_recompiler/file_environment.cpp | 4 + src/shader_recompiler/file_environment.h | 4 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 133 ++++- src/shader_recompiler/frontend/ir/ir_emitter.h | 21 +- .../frontend/ir/microinstruction.cpp | 73 ++- .../frontend/ir/microinstruction.h | 22 +- src/shader_recompiler/frontend/ir/modifiers.h | 10 + src/shader_recompiler/frontend/ir/opcodes.cpp | 2 +- src/shader_recompiler/frontend/ir/opcodes.inc | 569 +++++++++++---------- src/shader_recompiler/frontend/ir/reg.h | 11 + src/shader_recompiler/frontend/ir/value.h | 1 + src/shader_recompiler/frontend/maxwell/maxwell.inc | 4 +- src/shader_recompiler/frontend/maxwell/program.cpp | 1 + .../maxwell/translate/impl/not_implemented.cpp | 8 - .../maxwell/translate/impl/texture_sample.cpp | 232 +++++++++ .../ir_opt/collect_shader_info_pass.cpp | 19 + .../global_memory_to_storage_buffer_pass.cpp | 15 +- src/shader_recompiler/ir_opt/passes.h | 2 + src/shader_recompiler/ir_opt/texture_pass.cpp | 199 +++++++ src/shader_recompiler/shader_info.h | 52 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 101 ++++ .../renderer_vulkan/vk_compute_pipeline.h | 4 + .../renderer_vulkan/vk_pipeline_cache.cpp | 4 + src/video_core/renderer_vulkan/vk_rasterizer.cpp | 3 +- 33 files changed, 1489 insertions(+), 342 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_image.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp create mode 100644 src/shader_recompiler/ir_opt/texture_pass.cpp (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index cc38b28ed..fa268d38f 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -9,6 +9,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_control_flow.cpp backend/spirv/emit_spirv_convert.cpp backend/spirv/emit_spirv_floating_point.cpp + backend/spirv/emit_spirv_image.cpp backend/spirv/emit_spirv_integer.cpp backend/spirv/emit_spirv_logical.cpp backend/spirv/emit_spirv_memory.cpp @@ -100,6 +101,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/predicate_set_predicate.cpp frontend/maxwell/translate/impl/predicate_set_register.cpp frontend/maxwell/translate/impl/select_source_with_predicate.cpp + frontend/maxwell/translate/impl/texture_sample.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h ir_opt/collect_shader_info_pass.cpp @@ -110,6 +112,7 @@ add_library(shader_recompiler STATIC ir_opt/lower_fp16_to_fp32.cpp ir_opt/passes.h ir_opt/ssa_rewrite_pass.cpp + ir_opt/texture_pass.cpp ir_opt/verification_pass.cpp object_pool.h profile.h diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index d2dbd56d4..21900d387 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -12,6 +12,43 @@ #include "shader_recompiler/backend/spirv/emit_context.h" namespace Shader::Backend::SPIRV { +namespace { +Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { + const spv::ImageFormat format{spv::ImageFormat::Unknown}; + const Id type{ctx.F32[1]}; + switch (desc.type) { + case TextureType::Color1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 1, format); + case TextureType::ColorArray1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 1, format); + case TextureType::Color2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 1, format); + case TextureType::ColorArray2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 1, format); + case TextureType::Color3D: + return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 1, format); + case TextureType::ColorCube: + return ctx.TypeImage(type, spv::Dim::Cube, false, false, false, 1, format); + case TextureType::ColorArrayCube: + return ctx.TypeImage(type, spv::Dim::Cube, false, true, false, 1, format); + case TextureType::Shadow1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, true, false, false, 1, format); + case TextureType::ShadowArray1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, true, true, false, 1, format); + case TextureType::Shadow2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, true, false, false, 1, format); + case TextureType::ShadowArray2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, true, true, false, 1, format); + case TextureType::Shadow3D: + return ctx.TypeImage(type, spv::Dim::Dim3D, true, false, false, 1, format); + case TextureType::ShadowCube: + return ctx.TypeImage(type, spv::Dim::Cube, true, false, false, 1, format); + case TextureType::ShadowArrayCube: + return ctx.TypeImage(type, spv::Dim::Cube, false, true, false, 1, format); + } + throw InvalidArgument("Invalid texture type {}", desc.type); +} +} // Anonymous namespace void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { defs[0] = sirit_ctx.Name(base_type, name); @@ -35,6 +72,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program) u32 binding{}; DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); + DefineTextures(program.info, binding); DefineLabels(program); } @@ -46,6 +84,10 @@ Id EmitContext::Def(const IR::Value& value) { return value.Inst()->Definition(); } switch (value.Type()) { + case IR::Type::Void: + // Void instructions are used for optional arguments (e.g. texture offsets) + // They are not meant to be used in the SPIR-V module + return Id{}; case IR::Type::U1: return value.U1() ? true_value : false_value; case IR::Type::U32: @@ -122,7 +164,7 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { uniform_u32 = TypePointer(spv::StorageClass::Uniform, U32[1]); u32 index{}; - for (const Info::ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { + for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); @@ -152,7 +194,7 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); u32 index{}; - for (const Info::StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { + for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); @@ -163,6 +205,29 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { } } +void EmitContext::DefineTextures(const Info& info, u32& binding) { + textures.reserve(info.texture_descriptors.size()); + for (const TextureDescriptor& desc : info.texture_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Array of textures"); + } + const Id type{TypeSampledImage(ImageType(*this, desc))}; + const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, type)}; + const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, fmt::format("tex{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); + for (u32 index = 0; index < desc.count; ++index) { + // TODO: Pass count info + textures.push_back(TextureDefinition{ + .id{id}, + .type{type}, + }); + } + binding += desc.count; + } +} + void EmitContext::DefineLabels(IR::Program& program) { for (const IR::Function& function : program.functions) { for (IR::Block* const block : function.blocks) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index d20cf387e..8b3109eb8 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -29,6 +29,11 @@ private: std::array defs{}; }; +struct TextureDefinition { + Id id; + Id type; +}; + class EmitContext final : public Sirit::Module { public: explicit EmitContext(const Profile& profile, IR::Program& program); @@ -56,6 +61,7 @@ public: std::array cbufs{}; std::array ssbos{}; + std::vector textures; Id workgroup_id{}; Id local_invocation_id{}; @@ -66,6 +72,7 @@ private: void DefineSpecialVariables(const Info& info); void DefineConstantBuffers(const Info& info, u32& binding); void DefineStorageBuffers(const Info& info, u32& binding); + void DefineTextures(const Info& info, u32& binding); void DefineLabels(IR::Program& program); }; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 8097fe82d..a94e9cb2d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -221,6 +221,14 @@ std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program workgroup_size[2]); SetupDenormControl(profile, program, ctx, func); + if (info.uses_sampled_1d) { + ctx.AddCapability(spv::Capability::Sampled1D); + } + if (info.uses_sparse_residency) { + ctx.AddCapability(spv::Capability::SparseResidency); + } + // TODO: Track this usage + ctx.AddCapability(spv::Capability::ImageGatherExtended); return ctx.Assemble(); } @@ -259,4 +267,8 @@ void EmitGetOverflowFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } +void EmitGetSparseFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 92387ca28..69698c478 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -83,7 +83,8 @@ void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Va Id value); void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value); -void EmitWriteStorage128(EmitContext& ctx); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); @@ -145,6 +146,7 @@ void EmitGetZeroFromOp(EmitContext& ctx); void EmitGetSignFromOp(EmitContext& ctx); void EmitGetCarryFromOp(EmitContext& ctx); void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitGetSparseFromOp(EmitContext& ctx); Id EmitFPAbs16(EmitContext& ctx, Id value); Id EmitFPAbs32(EmitContext& ctx, Id value); Id EmitFPAbs64(EmitContext& ctx, Id value); @@ -291,5 +293,33 @@ Id EmitConvertF16F32(EmitContext& ctx, Id value); Id EmitConvertF32F16(EmitContext& ctx, Id value); Id EmitConvertF32F64(EmitContext& ctx, Id value); Id EmitConvertF64F32(EmitContext& ctx, Id value); +Id EmitConvertF16S32(EmitContext& ctx, Id value); +Id EmitConvertF16S64(EmitContext& ctx, Id value); +Id EmitConvertF16U32(EmitContext& ctx, Id value); +Id EmitConvertF16U64(EmitContext& ctx, Id value); +Id EmitConvertF32S32(EmitContext& ctx, Id value); +Id EmitConvertF32S64(EmitContext& ctx, Id value); +Id EmitConvertF32U32(EmitContext& ctx, Id value); +Id EmitConvertF32U64(EmitContext& ctx, Id value); +Id EmitConvertF64S32(EmitContext& ctx, Id value); +Id EmitConvertF64S64(EmitContext& ctx, Id value); +Id EmitConvertF64U32(EmitContext& ctx, Id value); +Id EmitConvertF64U64(EmitContext& ctx, Id value); +Id EmitBindlessImageSampleImplicitLod(EmitContext&); +Id EmitBindlessImageSampleExplicitLod(EmitContext&); +Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); +Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); +Id EmitBoundImageSampleImplicitLod(EmitContext&); +Id EmitBoundImageSampleExplicitLod(EmitContext&); +Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); +Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id bias_lc, Id offset); +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id lod_lc, Id offset); +Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id bias_lc, Id offset); +Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id lod_lc, Id offset); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp index edcc2a1cc..2aff673aa 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -102,4 +102,52 @@ Id EmitConvertF64F32(EmitContext& ctx, Id value) { return ctx.OpFConvert(ctx.F64[1], value); } +Id EmitConvertF16S32(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], value); +} + +Id EmitConvertF16S64(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], value); +} + +Id EmitConvertF16U32(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], value); +} + +Id EmitConvertF16U64(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], value); +} + +Id EmitConvertF32S32(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F32[1], value); +} + +Id EmitConvertF32S64(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F32[1], value); +} + +Id EmitConvertF32U32(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], value); +} + +Id EmitConvertF32U64(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], value); +} + +Id EmitConvertF64S32(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F64[1], value); +} + +Id EmitConvertF64S64(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F64[1], value); +} + +Id EmitConvertF64U32(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], value); +} + +Id EmitConvertF64U64(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], value); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp new file mode 100644 index 000000000..5f4783c95 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -0,0 +1,146 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/frontend/ir/modifiers.h" + +namespace Shader::Backend::SPIRV { +namespace { +class ImageOperands { +public: + explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp, + Id lod, Id offset) { + if (has_bias) { + const Id bias{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; + Add(spv::ImageOperandsMask::Bias, bias); + } + if (has_lod) { + const Id lod_value{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; + Add(spv::ImageOperandsMask::Lod, lod_value); + } + if (Sirit::ValidId(offset)) { + Add(spv::ImageOperandsMask::Offset, offset); + } + if (has_lod_clamp) { + const Id lod_clamp{has_bias ? ctx.OpCompositeExtract(ctx.F32[1], lod, 1) : lod}; + Add(spv::ImageOperandsMask::MinLod, lod_clamp); + } + } + + void Add(spv::ImageOperandsMask new_mask, Id value) { + mask = static_cast(static_cast(mask) | + static_cast(new_mask)); + operands.push_back(value); + } + + std::span Span() const noexcept { + return std::span{operands.data(), operands.size()}; + } + + spv::ImageOperandsMask Mask() const noexcept { + return mask; + } + +private: + boost::container::static_vector operands; + spv::ImageOperandsMask mask{}; +}; + +Id Texture(EmitContext& ctx, const IR::Value& index) { + if (index.IsImmediate()) { + const TextureDefinition def{ctx.textures.at(index.U32())}; + return ctx.OpLoad(def.type, def.id); + } + throw NotImplementedException("Indirect texture sample"); +} + +template +Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx, IR::Inst* inst, + Id result_type, Args&&... args) { + IR::Inst* const sparse{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + if (!sparse) { + return (ctx.*non_sparse_ptr)(result_type, std::forward(args)...); + } + const Id struct_type{ctx.TypeStruct(ctx.U32[1], result_type)}; + const Id sample{(ctx.*sparse_ptr)(struct_type, std::forward(args)...)}; + const Id resident_code{ctx.OpCompositeExtract(ctx.U32[1], sample, 0U)}; + sparse->SetDefinition(ctx.OpImageSparseTexelsResident(ctx.U1, resident_code)); + sparse->Invalidate(); + return ctx.OpCompositeExtract(result_type, sample, 1U); +} +} // Anonymous namespace + +Id EmitBindlessImageSampleImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageSampleExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageSampleImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageSampleExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageSampleDrefImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageSampleDrefExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id bias_lc, Id offset) { + const auto info{inst->Flags()}; + const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc, + offset); + return Emit(&EmitContext::OpImageSparseSampleImplicitLod, + &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index), + coords, operands.Mask(), operands.Span()); +} + +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id lod_lc, Id offset) { + const auto info{inst->Flags()}; + const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset); + return Emit(&EmitContext::OpImageSparseSampleExplicitLod, + &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index), + coords, operands.Mask(), operands.Span()); +} + +Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id bias_lc, Id offset) { + const auto info{inst->Flags()}; + const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc, + offset); + return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod, + &EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1], + Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); +} + +Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id lod_lc, Id offset) { + const auto info{inst->Flags()}; + const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset); + return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod, + &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1], + Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 808c1b401..7d3efc741 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -154,8 +154,22 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va ctx.OpStore(high_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); } -void EmitWriteStorage128(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Dynamic storage buffer indexing"); + } + // TODO: Support reinterpreting bindings, guaranteed to be aligned + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + for (u32 element = 0; element < 4; ++element) { + Id index = base_index; + if (element > 0) { + index = ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], element)); + } + const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)}; + ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, element)); + } } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 0ba681fb9..0fcb68050 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -12,6 +12,8 @@ public: [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; + [[nodiscard]] virtual u32 TextureBoundBuffer() = 0; + [[nodiscard]] virtual std::array WorkgroupSize() = 0; }; diff --git a/src/shader_recompiler/file_environment.cpp b/src/shader_recompiler/file_environment.cpp index 5127523f9..21700c72b 100644 --- a/src/shader_recompiler/file_environment.cpp +++ b/src/shader_recompiler/file_environment.cpp @@ -39,6 +39,10 @@ u64 FileEnvironment::ReadInstruction(u32 offset) { return data[offset / 8]; } +u32 FileEnvironment::TextureBoundBuffer() { + throw NotImplementedException("Texture bound buffer serialization"); +} + std::array FileEnvironment::WorkgroupSize() { return {1, 1, 1}; } diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h index b8c4bbadd..62302bc8e 100644 --- a/src/shader_recompiler/file_environment.h +++ b/src/shader_recompiler/file_environment.h @@ -3,7 +3,7 @@ #include #include "common/common_types.h" -#include "environment.h" +#include "shader_recompiler/environment.h" namespace Shader { @@ -14,6 +14,8 @@ public: u64 ReadInstruction(u32 offset) override; + u32 TextureBoundBuffer() override; + std::array WorkgroupSize() override; private: diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index f38b46bac..ae3354c66 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -7,11 +7,24 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::IR { - -[[noreturn]] static void ThrowInvalidType(Type type) { +namespace { +[[noreturn]] void ThrowInvalidType(Type type) { throw InvalidArgument("Invalid type {}", type); } +Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) { + if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) { + return ir.CompositeConstruct(bias_lod, lod_clamp); + } else if (!bias_lod.IsEmpty()) { + return bias_lod; + } else if (!lod_clamp.IsEmpty()) { + return lod_clamp; + } else { + return Value{}; + } +} +} // Anonymous namespace + U1 IREmitter::Imm1(bool value) const { return U1{Value{value}}; } @@ -261,6 +274,10 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) { return Inst(Opcode::GetOverflowFromOp, op); } +U1 IREmitter::GetSparseFromOp(const Value& op) { + return Inst(Opcode::GetSparseFromOp, op); +} + F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { if (a.Type() != a.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); @@ -1035,6 +1052,82 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v } } +F16F32F64 IREmitter::ConvertSToF(size_t bitsize, const U32U64& value) { + switch (bitsize) { + case 16: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF16S32, value); + case Type::U64: + return Inst(Opcode::ConvertF16S64, value); + default: + ThrowInvalidType(value.Type()); + } + case 32: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF32S32, value); + case Type::U64: + return Inst(Opcode::ConvertF32S64, value); + default: + ThrowInvalidType(value.Type()); + } + case 64: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF64S32, value); + case Type::U64: + return Inst(Opcode::ConvertF64S64, value); + default: + ThrowInvalidType(value.Type()); + } + default: + throw InvalidArgument("Invalid destination bitsize {}", bitsize); + } +} + +F16F32F64 IREmitter::ConvertUToF(size_t bitsize, const U32U64& value) { + switch (bitsize) { + case 16: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF16U32, value); + case Type::U64: + return Inst(Opcode::ConvertF16U64, value); + default: + ThrowInvalidType(value.Type()); + } + case 32: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF32U32, value); + case Type::U64: + return Inst(Opcode::ConvertF32U64, value); + default: + ThrowInvalidType(value.Type()); + } + case 64: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF64U32, value); + case Type::U64: + return Inst(Opcode::ConvertF64U64, value); + default: + ThrowInvalidType(value.Type()); + } + default: + throw InvalidArgument("Invalid destination bitsize {}", bitsize); + } +} + +F16F32F64 IREmitter::ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value) { + if (is_signed) { + return ConvertSToF(bitsize, value); + } else { + return ConvertUToF(bitsize, value); + } +} + U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { switch (result_bitsize) { case 32: @@ -1107,4 +1200,40 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); } +Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias, + const Value& offset, const F32& lod_clamp, + TextureInstInfo info) { + const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleImplicitLod + : Opcode::BindlessImageSampleImplicitLod}; + return Inst(op, Flags{info}, handle, coords, bias_lc, offset); +} + +Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod, + const Value& offset, const F32& lod_clamp, + TextureInstInfo info) { + const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)}; + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleExplicitLod + : Opcode::BindlessImageSampleExplicitLod}; + return Inst(op, Flags{info}, handle, coords, lod_lc, offset); +} + +F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref, + const F32& bias, const Value& offset, + const F32& lod_clamp, TextureInstInfo info) { + const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefImplicitLod + : Opcode::BindlessImageSampleDrefImplicitLod}; + return Inst(op, Flags{info}, handle, coords, dref, bias_lc, offset); +} + +F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref, + const F32& lod, const Value& offset, const F32& lod_clamp, + TextureInstInfo info) { + const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)}; + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefExplicitLod + : Opcode::BindlessImageSampleDrefExplicitLod}; + return Inst(op, Flags{info}, handle, coords, dref, lod_lc, offset); +} + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 6e29bf0e2..cb2a7710a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -91,6 +91,7 @@ public: [[nodiscard]] U1 GetSignFromOp(const Value& op); [[nodiscard]] U1 GetCarryFromOp(const Value& op); [[nodiscard]] U1 GetOverflowFromOp(const Value& op); + [[nodiscard]] U1 GetSparseFromOp(const Value& op); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); @@ -159,7 +160,7 @@ public: [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset, const U32& count); [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, - bool is_signed); + bool is_signed = false); [[nodiscard]] U32 BitReverse(const U32& value); [[nodiscard]] U32 BitCount(const U32& value); [[nodiscard]] U32 BitwiseNot(const U32& value); @@ -186,10 +187,28 @@ public: [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); + [[nodiscard]] F16F32F64 ConvertSToF(size_t bitsize, const U32U64& value); + [[nodiscard]] F16F32F64 ConvertUToF(size_t bitsize, const U32U64& value); + [[nodiscard]] F16F32F64 ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value); [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); + [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords, + const F32& bias, const Value& offset, + const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords, + const F32& lod, const Value& offset, + const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, + const F32& dref, const F32& bias, + const Value& offset, const F32& lod_clamp, + TextureInstInfo info); + [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, + const F32& dref, const F32& lod, + const Value& offset, const F32& lod_clamp, + TextureInstInfo info); + private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index d6a9be87d..88e186f21 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -10,26 +10,27 @@ #include "shader_recompiler/frontend/ir/type.h" namespace Shader::IR { - -static void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { +namespace { +void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { if (inst && inst->Opcode() != opcode) { throw LogicError("Invalid pseudo-instruction"); } } -static void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { +void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { if (dest_inst) { throw LogicError("Only one of each type of pseudo-op allowed"); } dest_inst = pseudo_inst; } -static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { +void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { if (inst->Opcode() != expected_opcode) { throw LogicError("Undoing use of invalid pseudo-op"); } inst = nullptr; } +} // Anonymous namespace Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { if (op == Opcode::Phi) { @@ -82,6 +83,7 @@ bool Inst::IsPseudoInstruction() const noexcept { case Opcode::GetSignFromOp: case Opcode::GetCarryFromOp: case Opcode::GetOverflowFromOp: + case Opcode::GetSparseFromOp: return true; default: return false; @@ -96,25 +98,26 @@ bool Inst::AreAllArgsImmediates() const { [](const IR::Value& value) { return value.IsImmediate(); }); } -bool Inst::HasAssociatedPseudoOperation() const noexcept { - return zero_inst || sign_inst || carry_inst || overflow_inst; -} - Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { - // This is faster than doing a search through the block. + if (!associated_insts) { + return nullptr; + } switch (opcode) { case Opcode::GetZeroFromOp: - CheckPseudoInstruction(zero_inst, Opcode::GetZeroFromOp); - return zero_inst; + CheckPseudoInstruction(associated_insts->zero_inst, Opcode::GetZeroFromOp); + return associated_insts->zero_inst; case Opcode::GetSignFromOp: - CheckPseudoInstruction(sign_inst, Opcode::GetSignFromOp); - return sign_inst; + CheckPseudoInstruction(associated_insts->sign_inst, Opcode::GetSignFromOp); + return associated_insts->sign_inst; case Opcode::GetCarryFromOp: - CheckPseudoInstruction(carry_inst, Opcode::GetCarryFromOp); - return carry_inst; + CheckPseudoInstruction(associated_insts->carry_inst, Opcode::GetCarryFromOp); + return associated_insts->carry_inst; case Opcode::GetOverflowFromOp: - CheckPseudoInstruction(overflow_inst, Opcode::GetOverflowFromOp); - return overflow_inst; + CheckPseudoInstruction(associated_insts->overflow_inst, Opcode::GetOverflowFromOp); + return associated_insts->overflow_inst; + case Opcode::GetSparseFromOp: + CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp); + return associated_insts->sparse_inst; default: throw InvalidArgument("{} is not a pseudo-instruction", opcode); } @@ -220,22 +223,37 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) { op = opcode; } +void AllocAssociatedInsts(std::unique_ptr& associated_insts) { + if (!associated_insts) { + associated_insts = std::make_unique(); + } +} + void Inst::Use(const Value& value) { Inst* const inst{value.Inst()}; ++inst->use_count; + std::unique_ptr& assoc_inst{inst->associated_insts}; switch (op) { case Opcode::GetZeroFromOp: - SetPseudoInstruction(inst->zero_inst, this); + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->zero_inst, this); break; case Opcode::GetSignFromOp: - SetPseudoInstruction(inst->sign_inst, this); + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->sign_inst, this); break; case Opcode::GetCarryFromOp: - SetPseudoInstruction(inst->carry_inst, this); + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->carry_inst, this); break; case Opcode::GetOverflowFromOp: - SetPseudoInstruction(inst->overflow_inst, this); + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->overflow_inst, this); + break; + case Opcode::GetSparseFromOp: + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->sparse_inst, this); break; default: break; @@ -246,18 +264,23 @@ void Inst::UndoUse(const Value& value) { Inst* const inst{value.Inst()}; --inst->use_count; + std::unique_ptr& assoc_inst{inst->associated_insts}; switch (op) { case Opcode::GetZeroFromOp: - RemovePseudoInstruction(inst->zero_inst, Opcode::GetZeroFromOp); + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->zero_inst, Opcode::GetZeroFromOp); break; case Opcode::GetSignFromOp: - RemovePseudoInstruction(inst->sign_inst, Opcode::GetSignFromOp); + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->sign_inst, Opcode::GetSignFromOp); break; case Opcode::GetCarryFromOp: - RemovePseudoInstruction(inst->carry_inst, Opcode::GetCarryFromOp); + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->carry_inst, Opcode::GetCarryFromOp); break; case Opcode::GetOverflowFromOp: - RemovePseudoInstruction(inst->overflow_inst, Opcode::GetOverflowFromOp); + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp); break; default: break; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 321393dd7..d5336c438 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -22,7 +22,7 @@ namespace Shader::IR { class Block; -constexpr size_t MAX_ARG_COUNT = 4; +struct AssociatedInsts; class Inst : public boost::intrusive::list_base_hook<> { public: @@ -50,6 +50,11 @@ public: return op; } + /// Determines if there is a pseudo-operation associated with this instruction. + [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept { + return associated_insts != nullptr; + } + /// Determines whether or not this instruction may have side effects. [[nodiscard]] bool MayHaveSideEffects() const noexcept; @@ -60,8 +65,6 @@ public: /// Determines if all arguments of this instruction are immediates. [[nodiscard]] bool AreAllArgsImmediates() const; - /// Determines if there is a pseudo-operation associated with this instruction. - [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept; /// Gets a pseudo-operation associated with this instruction [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); @@ -122,14 +125,21 @@ private: u32 definition{}; union { NonTriviallyDummy dummy{}; - std::array args; std::vector> phi_args; + std::array args; + }; + std::unique_ptr associated_insts; +}; +static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); + +struct AssociatedInsts { + union { + Inst* sparse_inst; + Inst* zero_inst{}; }; - Inst* zero_inst{}; Inst* sign_inst{}; Inst* carry_inst{}; Inst* overflow_inst{}; }; -static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased its size"); } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 44652eae7..ad07700ae 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -4,7 +4,9 @@ #pragma once +#include "common/bit_field.h" #include "common/common_types.h" +#include "shader_recompiler/shader_info.h" namespace Shader::IR { @@ -30,4 +32,12 @@ struct FpControl { }; static_assert(sizeof(FpControl) <= sizeof(u32)); +union TextureInstInfo { + u32 raw; + BitField<0, 8, TextureType> type; + BitField<8, 1, u32> has_bias; + BitField<16, 1, u32> has_lod_clamp; +}; +static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 1f188411a..8492a13d5 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -14,7 +14,7 @@ namespace { struct OpcodeMeta { std::string_view name; Type type; - std::array arg_types; + std::array arg_types; }; using enum Type; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index c4e72c84d..aa011fab1 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -2,301 +2,330 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... -OPCODE(Phi, Opaque, ) -OPCODE(Identity, Opaque, Opaque, ) -OPCODE(Void, Void, ) +// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, arg4 type, ... +OPCODE(Phi, Opaque, ) +OPCODE(Identity, Opaque, Opaque, ) +OPCODE(Void, Void, ) // Control flow -OPCODE(Branch, Void, Label, ) -OPCODE(BranchConditional, Void, U1, Label, Label, ) -OPCODE(LoopMerge, Void, Label, Label, ) -OPCODE(SelectionMerge, Void, Label, ) -OPCODE(Return, Void, ) +OPCODE(Branch, Void, Label, ) +OPCODE(BranchConditional, Void, U1, Label, Label, ) +OPCODE(LoopMerge, Void, Label, Label, ) +OPCODE(SelectionMerge, Void, Label, ) +OPCODE(Return, Void, ) // Context getters/setters -OPCODE(GetRegister, U32, Reg, ) -OPCODE(SetRegister, Void, Reg, U32, ) -OPCODE(GetPred, U1, Pred, ) -OPCODE(SetPred, Void, Pred, U1, ) -OPCODE(GetGotoVariable, U1, U32, ) -OPCODE(SetGotoVariable, Void, U32, U1, ) -OPCODE(GetCbuf, U32, U32, U32, ) -OPCODE(GetAttribute, U32, Attribute, ) -OPCODE(SetAttribute, Void, Attribute, U32, ) -OPCODE(GetAttributeIndexed, U32, U32, ) -OPCODE(SetAttributeIndexed, Void, U32, U32, ) -OPCODE(GetZFlag, U1, Void, ) -OPCODE(GetSFlag, U1, Void, ) -OPCODE(GetCFlag, U1, Void, ) -OPCODE(GetOFlag, U1, Void, ) -OPCODE(SetZFlag, Void, U1, ) -OPCODE(SetSFlag, Void, U1, ) -OPCODE(SetCFlag, Void, U1, ) -OPCODE(SetOFlag, Void, U1, ) -OPCODE(WorkgroupId, U32x3, ) -OPCODE(LocalInvocationId, U32x3, ) +OPCODE(GetRegister, U32, Reg, ) +OPCODE(SetRegister, Void, Reg, U32, ) +OPCODE(GetPred, U1, Pred, ) +OPCODE(SetPred, Void, Pred, U1, ) +OPCODE(GetGotoVariable, U1, U32, ) +OPCODE(SetGotoVariable, Void, U32, U1, ) +OPCODE(GetCbuf, U32, U32, U32, ) +OPCODE(GetAttribute, U32, Attribute, ) +OPCODE(SetAttribute, Void, Attribute, U32, ) +OPCODE(GetAttributeIndexed, U32, U32, ) +OPCODE(SetAttributeIndexed, Void, U32, U32, ) +OPCODE(GetZFlag, U1, Void, ) +OPCODE(GetSFlag, U1, Void, ) +OPCODE(GetCFlag, U1, Void, ) +OPCODE(GetOFlag, U1, Void, ) +OPCODE(SetZFlag, Void, U1, ) +OPCODE(SetSFlag, Void, U1, ) +OPCODE(SetCFlag, Void, U1, ) +OPCODE(SetOFlag, Void, U1, ) +OPCODE(WorkgroupId, U32x3, ) +OPCODE(LocalInvocationId, U32x3, ) // Undefined -OPCODE(UndefU1, U1, ) -OPCODE(UndefU8, U8, ) -OPCODE(UndefU16, U16, ) -OPCODE(UndefU32, U32, ) -OPCODE(UndefU64, U64, ) +OPCODE(UndefU1, U1, ) +OPCODE(UndefU8, U8, ) +OPCODE(UndefU16, U16, ) +OPCODE(UndefU32, U32, ) +OPCODE(UndefU64, U64, ) // Memory operations -OPCODE(LoadGlobalU8, U32, U64, ) -OPCODE(LoadGlobalS8, U32, U64, ) -OPCODE(LoadGlobalU16, U32, U64, ) -OPCODE(LoadGlobalS16, U32, U64, ) -OPCODE(LoadGlobal32, U32, U64, ) -OPCODE(LoadGlobal64, U32x2, U64, ) -OPCODE(LoadGlobal128, U32x4, U64, ) -OPCODE(WriteGlobalU8, Void, U64, U32, ) -OPCODE(WriteGlobalS8, Void, U64, U32, ) -OPCODE(WriteGlobalU16, Void, U64, U32, ) -OPCODE(WriteGlobalS16, Void, U64, U32, ) -OPCODE(WriteGlobal32, Void, U64, U32, ) -OPCODE(WriteGlobal64, Void, U64, U32x2, ) -OPCODE(WriteGlobal128, Void, U64, U32x4, ) +OPCODE(LoadGlobalU8, U32, U64, ) +OPCODE(LoadGlobalS8, U32, U64, ) +OPCODE(LoadGlobalU16, U32, U64, ) +OPCODE(LoadGlobalS16, U32, U64, ) +OPCODE(LoadGlobal32, U32, U64, ) +OPCODE(LoadGlobal64, U32x2, U64, ) +OPCODE(LoadGlobal128, U32x4, U64, ) +OPCODE(WriteGlobalU8, Void, U64, U32, ) +OPCODE(WriteGlobalS8, Void, U64, U32, ) +OPCODE(WriteGlobalU16, Void, U64, U32, ) +OPCODE(WriteGlobalS16, Void, U64, U32, ) +OPCODE(WriteGlobal32, Void, U64, U32, ) +OPCODE(WriteGlobal64, Void, U64, U32x2, ) +OPCODE(WriteGlobal128, Void, U64, U32x4, ) // Storage buffer operations -OPCODE(LoadStorageU8, U32, U32, U32, ) -OPCODE(LoadStorageS8, U32, U32, U32, ) -OPCODE(LoadStorageU16, U32, U32, U32, ) -OPCODE(LoadStorageS16, U32, U32, U32, ) -OPCODE(LoadStorage32, U32, U32, U32, ) -OPCODE(LoadStorage64, U32x2, U32, U32, ) -OPCODE(LoadStorage128, U32x4, U32, U32, ) -OPCODE(WriteStorageU8, Void, U32, U32, U32, ) -OPCODE(WriteStorageS8, Void, U32, U32, U32, ) -OPCODE(WriteStorageU16, Void, U32, U32, U32, ) -OPCODE(WriteStorageS16, Void, U32, U32, U32, ) -OPCODE(WriteStorage32, Void, U32, U32, U32, ) -OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) -OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) +OPCODE(LoadStorageU8, U32, U32, U32, ) +OPCODE(LoadStorageS8, U32, U32, U32, ) +OPCODE(LoadStorageU16, U32, U32, U32, ) +OPCODE(LoadStorageS16, U32, U32, U32, ) +OPCODE(LoadStorage32, U32, U32, U32, ) +OPCODE(LoadStorage64, U32x2, U32, U32, ) +OPCODE(LoadStorage128, U32x4, U32, U32, ) +OPCODE(WriteStorageU8, Void, U32, U32, U32, ) +OPCODE(WriteStorageS8, Void, U32, U32, U32, ) +OPCODE(WriteStorageU16, Void, U32, U32, U32, ) +OPCODE(WriteStorageS16, Void, U32, U32, U32, ) +OPCODE(WriteStorage32, Void, U32, U32, U32, ) +OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) +OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) // Vector utility -OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) -OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) -OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) -OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) -OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) -OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) -OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) -OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) -OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) -OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) -OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) -OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) -OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) -OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) -OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) -OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) -OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) -OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) -OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) -OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) -OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) -OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) -OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) -OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) -OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) -OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) -OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) -OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) -OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) -OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) -OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) -OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) -OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) -OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) -OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) -OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) +OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) +OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) +OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) +OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) +OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) +OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) +OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) +OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) +OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) +OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) +OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) +OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) +OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) +OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) +OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) +OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) +OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) +OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) +OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) +OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) +OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) +OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) +OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) +OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) +OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) +OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) +OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) +OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) +OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) +OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) +OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) +OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) +OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) +OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) +OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) +OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) // Select operations -OPCODE(SelectU1, U1, U1, U1, U1, ) -OPCODE(SelectU8, U8, U1, U8, U8, ) -OPCODE(SelectU16, U16, U1, U16, U16, ) -OPCODE(SelectU32, U32, U1, U32, U32, ) -OPCODE(SelectU64, U64, U1, U64, U64, ) -OPCODE(SelectF16, F16, U1, F16, F16, ) -OPCODE(SelectF32, F32, U1, F32, F32, ) +OPCODE(SelectU1, U1, U1, U1, U1, ) +OPCODE(SelectU8, U8, U1, U8, U8, ) +OPCODE(SelectU16, U16, U1, U16, U16, ) +OPCODE(SelectU32, U32, U1, U32, U32, ) +OPCODE(SelectU64, U64, U1, U64, U64, ) +OPCODE(SelectF16, F16, U1, F16, F16, ) +OPCODE(SelectF32, F32, U1, F32, F32, ) // Bitwise conversions -OPCODE(BitCastU16F16, U16, F16, ) -OPCODE(BitCastU32F32, U32, F32, ) -OPCODE(BitCastU64F64, U64, F64, ) -OPCODE(BitCastF16U16, F16, U16, ) -OPCODE(BitCastF32U32, F32, U32, ) -OPCODE(BitCastF64U64, F64, U64, ) -OPCODE(PackUint2x32, U64, U32x2, ) -OPCODE(UnpackUint2x32, U32x2, U64, ) -OPCODE(PackFloat2x16, U32, F16x2, ) -OPCODE(UnpackFloat2x16, F16x2, U32, ) -OPCODE(PackHalf2x16, U32, F32x2, ) -OPCODE(UnpackHalf2x16, F32x2, U32, ) -OPCODE(PackDouble2x32, F64, U32x2, ) -OPCODE(UnpackDouble2x32, U32x2, F64, ) +OPCODE(BitCastU16F16, U16, F16, ) +OPCODE(BitCastU32F32, U32, F32, ) +OPCODE(BitCastU64F64, U64, F64, ) +OPCODE(BitCastF16U16, F16, U16, ) +OPCODE(BitCastF32U32, F32, U32, ) +OPCODE(BitCastF64U64, F64, U64, ) +OPCODE(PackUint2x32, U64, U32x2, ) +OPCODE(UnpackUint2x32, U32x2, U64, ) +OPCODE(PackFloat2x16, U32, F16x2, ) +OPCODE(UnpackFloat2x16, F16x2, U32, ) +OPCODE(PackHalf2x16, U32, F32x2, ) +OPCODE(UnpackHalf2x16, F32x2, U32, ) +OPCODE(PackDouble2x32, F64, U32x2, ) +OPCODE(UnpackDouble2x32, U32x2, F64, ) // Pseudo-operation, handled specially at final emit -OPCODE(GetZeroFromOp, U1, Opaque, ) -OPCODE(GetSignFromOp, U1, Opaque, ) -OPCODE(GetCarryFromOp, U1, Opaque, ) -OPCODE(GetOverflowFromOp, U1, Opaque, ) +OPCODE(GetZeroFromOp, U1, Opaque, ) +OPCODE(GetSignFromOp, U1, Opaque, ) +OPCODE(GetCarryFromOp, U1, Opaque, ) +OPCODE(GetOverflowFromOp, U1, Opaque, ) +OPCODE(GetSparseFromOp, U1, Opaque, ) // Floating-point operations -OPCODE(FPAbs16, F16, F16, ) -OPCODE(FPAbs32, F32, F32, ) -OPCODE(FPAbs64, F64, F64, ) -OPCODE(FPAdd16, F16, F16, F16, ) -OPCODE(FPAdd32, F32, F32, F32, ) -OPCODE(FPAdd64, F64, F64, F64, ) -OPCODE(FPFma16, F16, F16, F16, F16, ) -OPCODE(FPFma32, F32, F32, F32, F32, ) -OPCODE(FPFma64, F64, F64, F64, F64, ) -OPCODE(FPMax32, F32, F32, F32, ) -OPCODE(FPMax64, F64, F64, F64, ) -OPCODE(FPMin32, F32, F32, F32, ) -OPCODE(FPMin64, F64, F64, F64, ) -OPCODE(FPMul16, F16, F16, F16, ) -OPCODE(FPMul32, F32, F32, F32, ) -OPCODE(FPMul64, F64, F64, F64, ) -OPCODE(FPNeg16, F16, F16, ) -OPCODE(FPNeg32, F32, F32, ) -OPCODE(FPNeg64, F64, F64, ) -OPCODE(FPRecip32, F32, F32, ) -OPCODE(FPRecip64, F64, F64, ) -OPCODE(FPRecipSqrt32, F32, F32, ) -OPCODE(FPRecipSqrt64, F64, F64, ) -OPCODE(FPSqrt, F32, F32, ) -OPCODE(FPSin, F32, F32, ) -OPCODE(FPExp2, F32, F32, ) -OPCODE(FPCos, F32, F32, ) -OPCODE(FPLog2, F32, F32, ) -OPCODE(FPSaturate16, F16, F16, ) -OPCODE(FPSaturate32, F32, F32, ) -OPCODE(FPSaturate64, F64, F64, ) -OPCODE(FPRoundEven16, F16, F16, ) -OPCODE(FPRoundEven32, F32, F32, ) -OPCODE(FPRoundEven64, F64, F64, ) -OPCODE(FPFloor16, F16, F16, ) -OPCODE(FPFloor32, F32, F32, ) -OPCODE(FPFloor64, F64, F64, ) -OPCODE(FPCeil16, F16, F16, ) -OPCODE(FPCeil32, F32, F32, ) -OPCODE(FPCeil64, F64, F64, ) -OPCODE(FPTrunc16, F16, F16, ) -OPCODE(FPTrunc32, F32, F32, ) -OPCODE(FPTrunc64, F64, F64, ) +OPCODE(FPAbs16, F16, F16, ) +OPCODE(FPAbs32, F32, F32, ) +OPCODE(FPAbs64, F64, F64, ) +OPCODE(FPAdd16, F16, F16, F16, ) +OPCODE(FPAdd32, F32, F32, F32, ) +OPCODE(FPAdd64, F64, F64, F64, ) +OPCODE(FPFma16, F16, F16, F16, F16, ) +OPCODE(FPFma32, F32, F32, F32, F32, ) +OPCODE(FPFma64, F64, F64, F64, F64, ) +OPCODE(FPMax32, F32, F32, F32, ) +OPCODE(FPMax64, F64, F64, F64, ) +OPCODE(FPMin32, F32, F32, F32, ) +OPCODE(FPMin64, F64, F64, F64, ) +OPCODE(FPMul16, F16, F16, F16, ) +OPCODE(FPMul32, F32, F32, F32, ) +OPCODE(FPMul64, F64, F64, F64, ) +OPCODE(FPNeg16, F16, F16, ) +OPCODE(FPNeg32, F32, F32, ) +OPCODE(FPNeg64, F64, F64, ) +OPCODE(FPRecip32, F32, F32, ) +OPCODE(FPRecip64, F64, F64, ) +OPCODE(FPRecipSqrt32, F32, F32, ) +OPCODE(FPRecipSqrt64, F64, F64, ) +OPCODE(FPSqrt, F32, F32, ) +OPCODE(FPSin, F32, F32, ) +OPCODE(FPExp2, F32, F32, ) +OPCODE(FPCos, F32, F32, ) +OPCODE(FPLog2, F32, F32, ) +OPCODE(FPSaturate16, F16, F16, ) +OPCODE(FPSaturate32, F32, F32, ) +OPCODE(FPSaturate64, F64, F64, ) +OPCODE(FPRoundEven16, F16, F16, ) +OPCODE(FPRoundEven32, F32, F32, ) +OPCODE(FPRoundEven64, F64, F64, ) +OPCODE(FPFloor16, F16, F16, ) +OPCODE(FPFloor32, F32, F32, ) +OPCODE(FPFloor64, F64, F64, ) +OPCODE(FPCeil16, F16, F16, ) +OPCODE(FPCeil32, F32, F32, ) +OPCODE(FPCeil64, F64, F64, ) +OPCODE(FPTrunc16, F16, F16, ) +OPCODE(FPTrunc32, F32, F32, ) +OPCODE(FPTrunc64, F64, F64, ) -OPCODE(FPOrdEqual16, U1, F16, F16, ) -OPCODE(FPOrdEqual32, U1, F32, F32, ) -OPCODE(FPOrdEqual64, U1, F64, F64, ) -OPCODE(FPUnordEqual16, U1, F16, F16, ) -OPCODE(FPUnordEqual32, U1, F32, F32, ) -OPCODE(FPUnordEqual64, U1, F64, F64, ) -OPCODE(FPOrdNotEqual16, U1, F16, F16, ) -OPCODE(FPOrdNotEqual32, U1, F32, F32, ) -OPCODE(FPOrdNotEqual64, U1, F64, F64, ) -OPCODE(FPUnordNotEqual16, U1, F16, F16, ) -OPCODE(FPUnordNotEqual32, U1, F32, F32, ) -OPCODE(FPUnordNotEqual64, U1, F64, F64, ) -OPCODE(FPOrdLessThan16, U1, F16, F16, ) -OPCODE(FPOrdLessThan32, U1, F32, F32, ) -OPCODE(FPOrdLessThan64, U1, F64, F64, ) -OPCODE(FPUnordLessThan16, U1, F16, F16, ) -OPCODE(FPUnordLessThan32, U1, F32, F32, ) -OPCODE(FPUnordLessThan64, U1, F64, F64, ) -OPCODE(FPOrdGreaterThan16, U1, F16, F16, ) -OPCODE(FPOrdGreaterThan32, U1, F32, F32, ) -OPCODE(FPOrdGreaterThan64, U1, F64, F64, ) -OPCODE(FPUnordGreaterThan16, U1, F16, F16, ) -OPCODE(FPUnordGreaterThan32, U1, F32, F32, ) -OPCODE(FPUnordGreaterThan64, U1, F64, F64, ) -OPCODE(FPOrdLessThanEqual16, U1, F16, F16, ) -OPCODE(FPOrdLessThanEqual32, U1, F32, F32, ) -OPCODE(FPOrdLessThanEqual64, U1, F64, F64, ) -OPCODE(FPUnordLessThanEqual16, U1, F16, F16, ) -OPCODE(FPUnordLessThanEqual32, U1, F32, F32, ) -OPCODE(FPUnordLessThanEqual64, U1, F64, F64, ) -OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, ) -OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, ) -OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, ) -OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) -OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) -OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) +OPCODE(FPOrdEqual16, U1, F16, F16, ) +OPCODE(FPOrdEqual32, U1, F32, F32, ) +OPCODE(FPOrdEqual64, U1, F64, F64, ) +OPCODE(FPUnordEqual16, U1, F16, F16, ) +OPCODE(FPUnordEqual32, U1, F32, F32, ) +OPCODE(FPUnordEqual64, U1, F64, F64, ) +OPCODE(FPOrdNotEqual16, U1, F16, F16, ) +OPCODE(FPOrdNotEqual32, U1, F32, F32, ) +OPCODE(FPOrdNotEqual64, U1, F64, F64, ) +OPCODE(FPUnordNotEqual16, U1, F16, F16, ) +OPCODE(FPUnordNotEqual32, U1, F32, F32, ) +OPCODE(FPUnordNotEqual64, U1, F64, F64, ) +OPCODE(FPOrdLessThan16, U1, F16, F16, ) +OPCODE(FPOrdLessThan32, U1, F32, F32, ) +OPCODE(FPOrdLessThan64, U1, F64, F64, ) +OPCODE(FPUnordLessThan16, U1, F16, F16, ) +OPCODE(FPUnordLessThan32, U1, F32, F32, ) +OPCODE(FPUnordLessThan64, U1, F64, F64, ) +OPCODE(FPOrdGreaterThan16, U1, F16, F16, ) +OPCODE(FPOrdGreaterThan32, U1, F32, F32, ) +OPCODE(FPOrdGreaterThan64, U1, F64, F64, ) +OPCODE(FPUnordGreaterThan16, U1, F16, F16, ) +OPCODE(FPUnordGreaterThan32, U1, F32, F32, ) +OPCODE(FPUnordGreaterThan64, U1, F64, F64, ) +OPCODE(FPOrdLessThanEqual16, U1, F16, F16, ) +OPCODE(FPOrdLessThanEqual32, U1, F32, F32, ) +OPCODE(FPOrdLessThanEqual64, U1, F64, F64, ) +OPCODE(FPUnordLessThanEqual16, U1, F16, F16, ) +OPCODE(FPUnordLessThanEqual32, U1, F32, F32, ) +OPCODE(FPUnordLessThanEqual64, U1, F64, F64, ) +OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, ) +OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, ) +OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, ) +OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) +OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) +OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) // Integer operations -OPCODE(IAdd32, U32, U32, U32, ) -OPCODE(IAdd64, U64, U64, U64, ) -OPCODE(ISub32, U32, U32, U32, ) -OPCODE(ISub64, U64, U64, U64, ) -OPCODE(IMul32, U32, U32, U32, ) -OPCODE(INeg32, U32, U32, ) -OPCODE(INeg64, U64, U64, ) -OPCODE(IAbs32, U32, U32, ) -OPCODE(ShiftLeftLogical32, U32, U32, U32, ) -OPCODE(ShiftLeftLogical64, U64, U64, U32, ) -OPCODE(ShiftRightLogical32, U32, U32, U32, ) -OPCODE(ShiftRightLogical64, U64, U64, U32, ) -OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) -OPCODE(ShiftRightArithmetic64, U64, U64, U32, ) -OPCODE(BitwiseAnd32, U32, U32, U32, ) -OPCODE(BitwiseOr32, U32, U32, U32, ) -OPCODE(BitwiseXor32, U32, U32, U32, ) -OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) -OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) -OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) -OPCODE(BitReverse32, U32, U32, ) -OPCODE(BitCount32, U32, U32, ) -OPCODE(BitwiseNot32, U32, U32, ) +OPCODE(IAdd32, U32, U32, U32, ) +OPCODE(IAdd64, U64, U64, U64, ) +OPCODE(ISub32, U32, U32, U32, ) +OPCODE(ISub64, U64, U64, U64, ) +OPCODE(IMul32, U32, U32, U32, ) +OPCODE(INeg32, U32, U32, ) +OPCODE(INeg64, U64, U64, ) +OPCODE(IAbs32, U32, U32, ) +OPCODE(ShiftLeftLogical32, U32, U32, U32, ) +OPCODE(ShiftLeftLogical64, U64, U64, U32, ) +OPCODE(ShiftRightLogical32, U32, U32, U32, ) +OPCODE(ShiftRightLogical64, U64, U64, U32, ) +OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) +OPCODE(ShiftRightArithmetic64, U64, U64, U32, ) +OPCODE(BitwiseAnd32, U32, U32, U32, ) +OPCODE(BitwiseOr32, U32, U32, U32, ) +OPCODE(BitwiseXor32, U32, U32, U32, ) +OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) +OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) +OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) +OPCODE(BitReverse32, U32, U32, ) +OPCODE(BitCount32, U32, U32, ) +OPCODE(BitwiseNot32, U32, U32, ) -OPCODE(FindSMsb32, U32, U32, ) -OPCODE(FindUMsb32, U32, U32, ) -OPCODE(SMin32, U32, U32, U32, ) -OPCODE(UMin32, U32, U32, U32, ) -OPCODE(SMax32, U32, U32, U32, ) -OPCODE(UMax32, U32, U32, U32, ) -OPCODE(SLessThan, U1, U32, U32, ) -OPCODE(ULessThan, U1, U32, U32, ) -OPCODE(IEqual, U1, U32, U32, ) -OPCODE(SLessThanEqual, U1, U32, U32, ) -OPCODE(ULessThanEqual, U1, U32, U32, ) -OPCODE(SGreaterThan, U1, U32, U32, ) -OPCODE(UGreaterThan, U1, U32, U32, ) -OPCODE(INotEqual, U1, U32, U32, ) -OPCODE(SGreaterThanEqual, U1, U32, U32, ) -OPCODE(UGreaterThanEqual, U1, U32, U32, ) +OPCODE(FindSMsb32, U32, U32, ) +OPCODE(FindUMsb32, U32, U32, ) +OPCODE(SMin32, U32, U32, U32, ) +OPCODE(UMin32, U32, U32, U32, ) +OPCODE(SMax32, U32, U32, U32, ) +OPCODE(UMax32, U32, U32, U32, ) +OPCODE(SLessThan, U1, U32, U32, ) +OPCODE(ULessThan, U1, U32, U32, ) +OPCODE(IEqual, U1, U32, U32, ) +OPCODE(SLessThanEqual, U1, U32, U32, ) +OPCODE(ULessThanEqual, U1, U32, U32, ) +OPCODE(SGreaterThan, U1, U32, U32, ) +OPCODE(UGreaterThan, U1, U32, U32, ) +OPCODE(INotEqual, U1, U32, U32, ) +OPCODE(SGreaterThanEqual, U1, U32, U32, ) +OPCODE(UGreaterThanEqual, U1, U32, U32, ) // Logical operations -OPCODE(LogicalOr, U1, U1, U1, ) -OPCODE(LogicalAnd, U1, U1, U1, ) -OPCODE(LogicalXor, U1, U1, U1, ) -OPCODE(LogicalNot, U1, U1, ) +OPCODE(LogicalOr, U1, U1, U1, ) +OPCODE(LogicalAnd, U1, U1, U1, ) +OPCODE(LogicalXor, U1, U1, U1, ) +OPCODE(LogicalNot, U1, U1, ) // Conversion operations -OPCODE(ConvertS16F16, U32, F16, ) -OPCODE(ConvertS16F32, U32, F32, ) -OPCODE(ConvertS16F64, U32, F64, ) -OPCODE(ConvertS32F16, U32, F16, ) -OPCODE(ConvertS32F32, U32, F32, ) -OPCODE(ConvertS32F64, U32, F64, ) -OPCODE(ConvertS64F16, U64, F16, ) -OPCODE(ConvertS64F32, U64, F32, ) -OPCODE(ConvertS64F64, U64, F64, ) -OPCODE(ConvertU16F16, U32, F16, ) -OPCODE(ConvertU16F32, U32, F32, ) -OPCODE(ConvertU16F64, U32, F64, ) -OPCODE(ConvertU32F16, U32, F16, ) -OPCODE(ConvertU32F32, U32, F32, ) -OPCODE(ConvertU32F64, U32, F64, ) -OPCODE(ConvertU64F16, U64, F16, ) -OPCODE(ConvertU64F32, U64, F32, ) -OPCODE(ConvertU64F64, U64, F64, ) -OPCODE(ConvertU64U32, U64, U32, ) -OPCODE(ConvertU32U64, U32, U64, ) -OPCODE(ConvertF16F32, F16, F32, ) -OPCODE(ConvertF32F16, F32, F16, ) -OPCODE(ConvertF32F64, F32, F64, ) -OPCODE(ConvertF64F32, F64, F32, ) +OPCODE(ConvertS16F16, U32, F16, ) +OPCODE(ConvertS16F32, U32, F32, ) +OPCODE(ConvertS16F64, U32, F64, ) +OPCODE(ConvertS32F16, U32, F16, ) +OPCODE(ConvertS32F32, U32, F32, ) +OPCODE(ConvertS32F64, U32, F64, ) +OPCODE(ConvertS64F16, U64, F16, ) +OPCODE(ConvertS64F32, U64, F32, ) +OPCODE(ConvertS64F64, U64, F64, ) +OPCODE(ConvertU16F16, U32, F16, ) +OPCODE(ConvertU16F32, U32, F32, ) +OPCODE(ConvertU16F64, U32, F64, ) +OPCODE(ConvertU32F16, U32, F16, ) +OPCODE(ConvertU32F32, U32, F32, ) +OPCODE(ConvertU32F64, U32, F64, ) +OPCODE(ConvertU64F16, U64, F16, ) +OPCODE(ConvertU64F32, U64, F32, ) +OPCODE(ConvertU64F64, U64, F64, ) +OPCODE(ConvertU64U32, U64, U32, ) +OPCODE(ConvertU32U64, U32, U64, ) +OPCODE(ConvertF16F32, F16, F32, ) +OPCODE(ConvertF32F16, F32, F16, ) +OPCODE(ConvertF32F64, F32, F64, ) +OPCODE(ConvertF64F32, F64, F32, ) +OPCODE(ConvertF16S32, F16, U32, ) +OPCODE(ConvertF16S64, F16, U64, ) +OPCODE(ConvertF16U32, F16, U32, ) +OPCODE(ConvertF16U64, F16, U64, ) +OPCODE(ConvertF32S32, F32, U32, ) +OPCODE(ConvertF32S64, F32, U64, ) +OPCODE(ConvertF32U32, F32, U32, ) +OPCODE(ConvertF32U64, F32, U64, ) +OPCODE(ConvertF64S32, F64, U32, ) +OPCODE(ConvertF64S64, F64, U64, ) +OPCODE(ConvertF64U32, F64, U32, ) +OPCODE(ConvertF64U64, F64, U64, ) + +// Image operations +OPCODE(BindlessImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BindlessImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) +OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) + +OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) +OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) + +OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) +OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h index 771094eb9..8fea05f7b 100644 --- a/src/shader_recompiler/frontend/ir/reg.h +++ b/src/shader_recompiler/frontend/ir/reg.h @@ -293,6 +293,17 @@ constexpr size_t NUM_REGS = 256; return reg + (-num); } +[[nodiscard]] constexpr Reg operator++(Reg& reg) { + reg = reg + 1; + return reg; +} + +[[nodiscard]] constexpr Reg operator++(Reg& reg, int) { + const Reg copy{reg}; + reg = reg + 1; + return copy; +} + [[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept { return static_cast(reg); } diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 9b7e1480b..3602883d6 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -75,6 +75,7 @@ private: f64 imm_f64; }; }; +static_assert(std::is_trivially_copyable_v); template class TypedValue : public Value { diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index 5d0b91598..f2a2ff331 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -249,8 +249,8 @@ INST(SULD, "SULD", "1110 1011 000- ----") INST(SURED, "SURED", "1110 1011 010- ----") INST(SUST, "SUST", "1110 1011 001- ----") INST(SYNC, "SYNC", "1111 0000 1111 1---") -INST(TEX, "TEX", "1100 00-- --11 1---") -INST(TEX_b, "TEX (b)", "1101 1110 1011 1---") +INST(TEX, "TEX", "1100 0--- ---- ----") +INST(TEX_b, "TEX (b)", "1101 1110 10-- ----") INST(TEXS, "TEXS", "1101 -00- ---- ----") INST(TLD, "TLD", "1101 1100 --11 1---") INST(TLD_b, "TLD (b)", "1101 1101 --11 1---") diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index dbfc04f75..b270bbccd 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -62,6 +62,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Blod : u64 { + None, + LZ, + LB, + LL, + INVALIDBLOD4, + INVALIDBLOD5, + LBA, + LLA, +}; + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type, bool dc) { + switch (type) { + case TextureType::_1D: + return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }}; + switch (type) { + case TextureType::_1D: + return v.F(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) { + switch (blod) { + case Blod::None: + return v.ir.Imm32(0.0f); + case Blod::LZ: + return v.ir.Imm32(0.0f); + case Blod::LB: + case Blod::LL: + case Blod::LBA: + case Blod::LLA: + return v.F(reg++); + case Blod::INVALIDBLOD4: + case Blod::INVALIDBLOD5: + break; + } + throw NotImplementedException("Invalid blod {}", blod); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { + const IR::U32 value{v.X(reg++)}; + switch (type) { + case TextureType::_1D: + case TextureType::ARRAY_1D: + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)); + case TextureType::_2D: + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4))); + case TextureType::_3D: + case TextureType::ARRAY_3D: + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4))); + case TextureType::CUBE: + case TextureType::ARRAY_CUBE: + throw NotImplementedException("Illegal offset on CUBE sample"); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +bool HasExplicitLod(Blod blod) { + switch (blod) { + case Blod::LL: + case Blod::LLA: + case Blod::LZ: + return true; + default: + return false; + } +} + +void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, + std::optional cbuf_offset) { + union { + u64 raw; + BitField<35, 1, u64> ndv; + BitField<49, 1, u64> nodep; + BitField<50, 1, u64> dc; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + } const tex{insn}; + + if (lc) { + throw NotImplementedException("LC"); + } + const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)}; + + IR::Reg meta_reg{tex.meta_reg}; + IR::Value handle; + IR::Value offset; + IR::F32 dref; + IR::F32 lod_clamp; + if (cbuf_offset) { + handle = v.ir.Imm32(*cbuf_offset); + } else { + handle = v.X(meta_reg++); + } + const IR::F32 lod{MakeLod(v, meta_reg, blod)}; + if (aoffi) { + offset = MakeOffset(v, meta_reg, tex.type); + } + if (tex.dc != 0) { + dref = v.F(meta_reg++); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tex.type, tex.dc != 0)); + info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); + info.has_lod_clamp.Assign(lc ? 1 : 0); + + const IR::Value sample{[&]() -> IR::Value { + if (tex.dc == 0) { + if (HasExplicitLod(blod)) { + return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, lod_clamp, info); + } else { + return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); + } + } + if (HasExplicitLod(blod)) { + return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, lod_clamp, + info); + } else { + return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, + info); + } + }()}; + + for (int element = 0; element < 4; ++element) { + if (((tex.mask >> element) & 1) == 0) { + continue; + } + IR::F32 value; + if (tex.dc != 0) { + value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); + } else { + value = IR::F32{v.ir.CompositeExtract(sample, element)}; + } + v.F(tex.dest_reg + element, value); + } + if (tex.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TEX(u64 insn) { + union { + u64 raw; + BitField<54, 1, u64> aoffi; + BitField<55, 3, Blod> blod; + BitField<58, 1, u64> lc; + BitField<36, 13, u64> cbuf_offset; + } const tex{insn}; + + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast(tex.cbuf_offset)); +} + +void TranslatorVisitor::TEX_b(u64 insn) { + union { + u64 raw; + BitField<36, 1, u64> aoffi; + BitField<37, 3, Blod> blod; + BitField<40, 1, u64> lc; + } const tex{insn}; + + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 6662ef4cd..960beadd4 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -82,6 +82,25 @@ void VisitUsages(Info& info, IR::Inst& inst) { throw NotImplementedException("Constant buffer with non-immediate index"); } break; + case IR::Opcode::BindlessImageSampleImplicitLod: + case IR::Opcode::BindlessImageSampleExplicitLod: + case IR::Opcode::BindlessImageSampleDrefImplicitLod: + case IR::Opcode::BindlessImageSampleDrefExplicitLod: + case IR::Opcode::BoundImageSampleImplicitLod: + case IR::Opcode::BoundImageSampleExplicitLod: + case IR::Opcode::BoundImageSampleDrefImplicitLod: + case IR::Opcode::BoundImageSampleDrefExplicitLod: + case IR::Opcode::ImageSampleImplicitLod: + case IR::Opcode::ImageSampleExplicitLod: + case IR::Opcode::ImageSampleDrefImplicitLod: + case IR::Opcode::ImageSampleDrefExplicitLod: { + const TextureType type{inst.Flags().type}; + info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || + type == TextureType::Shadow1D || type == TextureType::ShadowArray1D; + info.uses_sparse_residency |= + inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; + break; + } default: break; } diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 965e52135..2625c0bb2 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -226,6 +226,7 @@ std::optional Track(IR::Block* block, const IR::Value& value, } // Reversed loops are more likely to find the right result for (size_t arg = inst->NumArgs(); arg--;) { + IR::Block* inst_block{block}; if (inst->Opcode() == IR::Opcode::Phi) { // If we are going through a phi node, mark the current block as visited visited.insert(block); @@ -235,15 +236,11 @@ std::optional Track(IR::Block* block, const IR::Value& value, // Already visited, skip continue; } - const std::optional storage_buffer{Track(phi_block, inst->Arg(arg), bias, visited)}; - if (storage_buffer) { - return *storage_buffer; - } - } else { - const std::optional storage_buffer{Track(block, inst->Arg(arg), bias, visited)}; - if (storage_buffer) { - return *storage_buffer; - } + inst_block = phi_block; + } + const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), bias, visited)}; + if (storage_buffer) { + return *storage_buffer; } } return std::nullopt; diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 38106308c..3b7e7306b 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -6,6 +6,7 @@ #include +#include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/function.h" #include "shader_recompiler/frontend/ir/program.h" @@ -26,6 +27,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program); void IdentityRemovalPass(IR::Function& function); void LowerFp16ToFp32(IR::Program& program); void SsaRewritePass(std::span post_order_blocks); +void TexturePass(Environment& env, IR::Program& program); void VerificationPass(const IR::Function& function); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp new file mode 100644 index 000000000..80e4ad6a9 --- /dev/null +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -0,0 +1,199 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include +#include + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/ir_opt/passes.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader::Optimization { +namespace { +struct ConstBufferAddr { + u32 index; + u32 offset; +}; + +struct TextureInst { + ConstBufferAddr cbuf; + IR::Inst* inst; + IR::Block* block; +}; + +using TextureInstVector = boost::container::small_vector; + +using VisitedBlocks = boost::container::flat_set, + boost::container::small_vector>; + +IR::Opcode IndexedInstruction(const IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::BindlessImageSampleImplicitLod: + case IR::Opcode::BoundImageSampleImplicitLod: + return IR::Opcode::ImageSampleImplicitLod; + case IR::Opcode::BoundImageSampleExplicitLod: + case IR::Opcode::BindlessImageSampleExplicitLod: + return IR::Opcode::ImageSampleExplicitLod; + case IR::Opcode::BoundImageSampleDrefImplicitLod: + case IR::Opcode::BindlessImageSampleDrefImplicitLod: + return IR::Opcode::ImageSampleDrefImplicitLod; + case IR::Opcode::BoundImageSampleDrefExplicitLod: + case IR::Opcode::BindlessImageSampleDrefExplicitLod: + return IR::Opcode::ImageSampleDrefExplicitLod; + default: + return IR::Opcode::Void; + } +} + +bool IsBindless(const IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::BindlessImageSampleImplicitLod: + case IR::Opcode::BindlessImageSampleExplicitLod: + case IR::Opcode::BindlessImageSampleDrefImplicitLod: + case IR::Opcode::BindlessImageSampleDrefExplicitLod: + return true; + case IR::Opcode::BoundImageSampleImplicitLod: + case IR::Opcode::BoundImageSampleExplicitLod: + case IR::Opcode::BoundImageSampleDrefImplicitLod: + case IR::Opcode::BoundImageSampleDrefExplicitLod: + return false; + default: + throw InvalidArgument("Invalid opcode {}", inst.Opcode()); + } +} + +bool IsTextureInstruction(const IR::Inst& inst) { + return IndexedInstruction(inst) != IR::Opcode::Void; +} + +std::optional Track(IR::Block* block, const IR::Value& value, + VisitedBlocks& visited) { + if (value.IsImmediate()) { + // Immediates can't be a storage buffer + return std::nullopt; + } + const IR::Inst* const inst{value.InstRecursive()}; + if (inst->Opcode() == IR::Opcode::GetCbuf) { + const IR::Value index{inst->Arg(0)}; + const IR::Value offset{inst->Arg(1)}; + if (!index.IsImmediate()) { + // Reading a bindless texture from variable indices is valid + // but not supported here at the moment + return std::nullopt; + } + if (!offset.IsImmediate()) { + // TODO: Support arrays of textures + return std::nullopt; + } + return ConstBufferAddr{ + .index{index.U32()}, + .offset{offset.U32()}, + }; + } + // Reversed loops are more likely to find the right result + for (size_t arg = inst->NumArgs(); arg--;) { + IR::Block* inst_block{block}; + if (inst->Opcode() == IR::Opcode::Phi) { + // If we are going through a phi node, mark the current block as visited + visited.insert(block); + // and skip already visited blocks to avoid looping forever + IR::Block* const phi_block{inst->PhiBlock(arg)}; + if (visited.contains(phi_block)) { + // Already visited, skip + continue; + } + inst_block = phi_block; + } + const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), visited)}; + if (storage_buffer) { + return *storage_buffer; + } + } + return std::nullopt; +} + +TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { + ConstBufferAddr addr; + if (IsBindless(inst)) { + VisitedBlocks visited; + const std::optional track_addr{Track(block, IR::Value{&inst}, visited)}; + if (!track_addr) { + throw NotImplementedException("Failed to track bindless texture constant buffer"); + } + addr = *track_addr; + } else { + addr = ConstBufferAddr{ + .index{env.TextureBoundBuffer()}, + .offset{inst.Arg(0).U32()}, + }; + } + return TextureInst{ + .cbuf{addr}, + .inst{&inst}, + .block{block}, + }; +} + +class Descriptors { +public: + explicit Descriptors(TextureDescriptors& descriptors_) : descriptors{descriptors_} {} + + u32 Add(const TextureDescriptor& descriptor) { + // TODO: Handle arrays + auto it{std::ranges::find_if(descriptors, [&descriptor](const TextureDescriptor& existing) { + return descriptor.cbuf_index == existing.cbuf_index && + descriptor.cbuf_offset == existing.cbuf_offset && + descriptor.type == existing.type; + })}; + if (it != descriptors.end()) { + return static_cast(std::distance(descriptors.begin(), it)); + } + descriptors.push_back(descriptor); + return static_cast(descriptors.size()) - 1; + } + +private: + TextureDescriptors& descriptors; +}; +} // Anonymous namespace + +void TexturePass(Environment& env, IR::Program& program) { + TextureInstVector to_replace; + for (IR::Function& function : program.functions) { + for (IR::Block* const block : function.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (!IsTextureInstruction(inst)) { + continue; + } + to_replace.push_back(MakeInst(env, block, inst)); + } + } + } + // Sort instructions to visit textures by constant buffer index, then by offset + std::ranges::sort(to_replace, [](const auto& lhs, const auto& rhs) { + return lhs.cbuf.offset < rhs.cbuf.offset; + }); + std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) { + return lhs.cbuf.index < rhs.cbuf.index; + }); + Descriptors descriptors{program.info.texture_descriptors}; + for (TextureInst& texture_inst : to_replace) { + // TODO: Handle arrays + IR::Inst* const inst{texture_inst.inst}; + const u32 index{descriptors.Add(TextureDescriptor{ + .type{inst->Flags().type}, + .cbuf_index{texture_inst.cbuf.index}, + .cbuf_offset{texture_inst.cbuf.offset}, + .count{1}, + })}; + inst->ReplaceOpcode(IndexedInstruction(*inst)); + inst->SetArg(0, IR::Value{index}); + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 8766bf13e..103a2f0b4 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -8,25 +8,51 @@ #include "common/common_types.h" +#include #include namespace Shader { +enum class TextureType : u32 { + Color1D, + ColorArray1D, + Color2D, + ColorArray2D, + Color3D, + ColorCube, + ColorArrayCube, + Shadow1D, + ShadowArray1D, + Shadow2D, + ShadowArray2D, + Shadow3D, + ShadowCube, + ShadowArrayCube, +}; + +struct TextureDescriptor { + TextureType type; + u32 cbuf_index; + u32 cbuf_offset; + u32 count; +}; +using TextureDescriptors = boost::container::small_vector; + +struct ConstantBufferDescriptor { + u32 index; + u32 count; +}; + +struct StorageBufferDescriptor { + u32 cbuf_index; + u32 cbuf_offset; + u32 count; +}; + struct Info { static constexpr size_t MAX_CBUFS{18}; static constexpr size_t MAX_SSBOS{16}; - struct ConstantBufferDescriptor { - u32 index; - u32 count; - }; - - struct StorageBufferDescriptor { - u32 cbuf_index; - u32 cbuf_offset; - u32 count; - }; - bool uses_workgroup_id{}; bool uses_local_invocation_id{}; bool uses_fp16{}; @@ -35,12 +61,16 @@ struct Info { bool uses_fp16_denorms_preserve{}; bool uses_fp32_denorms_flush{}; bool uses_fp32_denorms_preserve{}; + bool uses_image_1d{}; + bool uses_sampled_1d{}; + bool uses_sparse_residency{}; u32 constant_buffer_mask{}; boost::container::static_vector constant_buffer_descriptors; boost::container::static_vector storage_buffers_descriptors; + TextureDescriptors texture_descriptors; }; } // namespace Shader diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index a658a3276..ef8bef6ff 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -40,6 +40,16 @@ vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Sh }); ++binding; } + for (const auto& desc : info.texture_descriptors) { + bindings.push_back({ + .binding = binding, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); + ++binding; + } return device.GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -79,6 +89,18 @@ vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( ++binding; offset += sizeof(DescriptorUpdateEntry); } + for (const auto& desc : info.texture_descriptors) { + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, .pNext = nullptr, @@ -92,6 +114,44 @@ vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( .set = 0, }); } + +struct TextureHandle { + explicit TextureHandle(u32 data, bool via_header_index) { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); + } + + u32 image; + u32 sampler; +}; + +VideoCommon::ImageViewType CastType(Shader::TextureType type) { + switch (type) { + case Shader::TextureType::Color1D: + case Shader::TextureType::Shadow1D: + return VideoCommon::ImageViewType::e1D; + case Shader::TextureType::ColorArray1D: + case Shader::TextureType::ShadowArray1D: + return VideoCommon::ImageViewType::e1DArray; + case Shader::TextureType::Color2D: + case Shader::TextureType::Shadow2D: + return VideoCommon::ImageViewType::e2D; + case Shader::TextureType::ColorArray2D: + case Shader::TextureType::ShadowArray2D: + return VideoCommon::ImageViewType::e2DArray; + case Shader::TextureType::Color3D: + case Shader::TextureType::Shadow3D: + return VideoCommon::ImageViewType::e3D; + case Shader::TextureType::ColorCube: + case Shader::TextureType::ShadowCube: + return VideoCommon::ImageViewType::Cube; + case Shader::TextureType::ColorArrayCube: + case Shader::TextureType::ShadowArrayCube: + return VideoCommon::ImageViewType::CubeArray; + } + UNREACHABLE_MSG("Invalid texture type {}", type); +} } // Anonymous namespace ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, @@ -143,6 +203,47 @@ void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { buffer_cache.BindHostComputeBuffers(); } +void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, + TextureCache& texture_cache) { + texture_cache.SynchronizeComputeDescriptors(); + + static constexpr size_t max_elements = 64; + std::array image_view_ids; + boost::container::static_vector image_view_indices; + boost::container::static_vector sampler_handles; + + const auto& launch_desc{kepler_compute.launch_description}; + const auto& cbufs{launch_desc.const_buffer_config}; + const bool via_header_index{launch_desc.linked_tsc}; + for (const auto& desc : info.texture_descriptors) { + const u32 cbuf_index{desc.cbuf_index}; + const u32 cbuf_offset{desc.cbuf_offset}; + ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); + + const GPUVAddr addr{cbufs[cbuf_index].Address() + cbuf_offset}; + const u32 raw_handle{gpu_memory.Read(addr)}; + + const TextureHandle handle(raw_handle, via_header_index); + image_view_indices.push_back(handle.image); + + Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + sampler_handles.push_back(sampler->Handle()); + } + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); + + size_t index{}; + for (const auto& desc : info.texture_descriptors) { + const VkSampler vk_sampler{sampler_handles[index]}; + ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; + const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; + update_descriptor_queue->AddSampledImage(vk_image_view, vk_sampler); + ++index; + } +} + VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index dc045d524..08d73a2a4 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -6,9 +6,11 @@ #include "common/common_types.h" #include "shader_recompiler/shader_info.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -30,6 +32,8 @@ public: ComputePipeline(const ComputePipeline&) = delete; void ConfigureBufferCache(BufferCache& buffer_cache); + void ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, TextureCache& texture_cache); [[nodiscard]] VkDescriptorSet UpdateDescriptorSet(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2497c2385..bcb7dd2eb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -76,6 +76,10 @@ public: return gpu_memory.Read(program_base + address); } + u32 TextureBoundBuffer() override { + return kepler_compute.regs.tex_cb_index; + } + std::array WorkgroupSize() override { const auto& qmd{kepler_compute.launch_description}; return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 1b662f9f3..c94419d29 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -241,9 +241,10 @@ void RasterizerVulkan::DispatchCompute() { if (!pipeline) { return; } - std::scoped_lock lock{buffer_cache.mutex}; + std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; update_descriptor_queue.Acquire(); pipeline->ConfigureBufferCache(buffer_cache); + pipeline->ConfigureTextureCache(kepler_compute, gpu_memory, texture_cache); const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()}; const auto& qmd{kepler_compute.launch_description}; -- cgit v1.2.3 From f91859efd259995806c2944f7941b105b58300d3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Mar 2021 05:04:12 -0300 Subject: shader: Implement I2F --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_context.cpp | 2 + src/shader_recompiler/backend/spirv/emit_spirv.h | 13 ++ .../backend/spirv/emit_spirv_convert.cpp | 48 ++++++ .../backend/spirv/emit_spirv_integer.cpp | 4 + src/shader_recompiler/frontend/ir/ir_emitter.cpp | 145 +++++++++++------ src/shader_recompiler/frontend/ir/ir_emitter.h | 14 +- src/shader_recompiler/frontend/ir/opcodes.inc | 13 ++ .../frontend/maxwell/translate/impl/impl.cpp | 21 +++ .../frontend/maxwell/translate/impl/impl.h | 2 + .../impl/integer_floating_point_conversion.cpp | 173 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 -- .../maxwell/translate/impl/texture_fetch.cpp | 2 +- .../translate/impl/texture_fetch_swizzled.cpp | 2 +- .../ir_opt/collect_shader_info_pass.cpp | 28 ++++ .../ir_opt/lower_fp16_to_fp32.cpp | 16 ++ .../renderer_vulkan/vk_pipeline_cache.cpp | 3 +- 17 files changed, 429 insertions(+), 70 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 31c394106..d0f0ec775 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -84,6 +84,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/integer_add_three_input.cpp frontend/maxwell/translate/impl/integer_compare.cpp frontend/maxwell/translate/impl/integer_compare_and_set.cpp + frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp frontend/maxwell/translate/impl/integer_funnel_shift.cpp frontend/maxwell/translate/impl/integer_minimum_maximum.cpp frontend/maxwell/translate/impl/integer_popcount.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 6c79b611b..6c8f16562 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -89,6 +89,8 @@ Id EmitContext::Def(const IR::Value& value) { return value.U1() ? true_value : false_value; case IR::Type::U32: return Constant(U32[1], value.U32()); + case IR::Type::U64: + return Constant(U64, value.U64()); case IR::Type::F32: return Constant(F32[1], value.F32()); case IR::Type::F64: diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index ae121f534..1fe65f8a9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -243,6 +243,7 @@ Id EmitIMul32(EmitContext& ctx, Id a, Id b); Id EmitINeg32(EmitContext& ctx, Id value); Id EmitINeg64(EmitContext& ctx, Id value); Id EmitIAbs32(EmitContext& ctx, Id value); +Id EmitIAbs64(EmitContext& ctx, Id value); Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); @@ -302,16 +303,28 @@ Id EmitConvertF16F32(EmitContext& ctx, Id value); Id EmitConvertF32F16(EmitContext& ctx, Id value); Id EmitConvertF32F64(EmitContext& ctx, Id value); Id EmitConvertF64F32(EmitContext& ctx, Id value); +Id EmitConvertF16S8(EmitContext& ctx, Id value); +Id EmitConvertF16S16(EmitContext& ctx, Id value); Id EmitConvertF16S32(EmitContext& ctx, Id value); Id EmitConvertF16S64(EmitContext& ctx, Id value); +Id EmitConvertF16U8(EmitContext& ctx, Id value); +Id EmitConvertF16U16(EmitContext& ctx, Id value); Id EmitConvertF16U32(EmitContext& ctx, Id value); Id EmitConvertF16U64(EmitContext& ctx, Id value); +Id EmitConvertF32S8(EmitContext& ctx, Id value); +Id EmitConvertF32S16(EmitContext& ctx, Id value); Id EmitConvertF32S32(EmitContext& ctx, Id value); Id EmitConvertF32S64(EmitContext& ctx, Id value); +Id EmitConvertF32U8(EmitContext& ctx, Id value); +Id EmitConvertF32U16(EmitContext& ctx, Id value); Id EmitConvertF32U32(EmitContext& ctx, Id value); Id EmitConvertF32U64(EmitContext& ctx, Id value); +Id EmitConvertF64S8(EmitContext& ctx, Id value); +Id EmitConvertF64S16(EmitContext& ctx, Id value); Id EmitConvertF64S32(EmitContext& ctx, Id value); Id EmitConvertF64S64(EmitContext& ctx, Id value); +Id EmitConvertF64U8(EmitContext& ctx, Id value); +Id EmitConvertF64U16(EmitContext& ctx, Id value); Id EmitConvertF64U32(EmitContext& ctx, Id value); Id EmitConvertF64U64(EmitContext& ctx, Id value); Id EmitBindlessImageSampleImplicitLod(EmitContext&); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp index 2aff673aa..757165626 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -102,6 +102,14 @@ Id EmitConvertF64F32(EmitContext& ctx, Id value) { return ctx.OpFConvert(ctx.F64[1], value); } +Id EmitConvertF16S8(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], value); +} + +Id EmitConvertF16S16(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], value); +} + Id EmitConvertF16S32(EmitContext& ctx, Id value) { return ctx.OpConvertSToF(ctx.F16[1], value); } @@ -110,6 +118,14 @@ Id EmitConvertF16S64(EmitContext& ctx, Id value) { return ctx.OpConvertSToF(ctx.F16[1], value); } +Id EmitConvertF16U8(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], value); +} + +Id EmitConvertF16U16(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], value); +} + Id EmitConvertF16U32(EmitContext& ctx, Id value) { return ctx.OpConvertUToF(ctx.F16[1], value); } @@ -118,6 +134,14 @@ Id EmitConvertF16U64(EmitContext& ctx, Id value) { return ctx.OpConvertUToF(ctx.F16[1], value); } +Id EmitConvertF32S8(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F32[1], ctx.OpUConvert(ctx.U8, value)); +} + +Id EmitConvertF32S16(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F32[1], ctx.OpUConvert(ctx.U16, value)); +} + Id EmitConvertF32S32(EmitContext& ctx, Id value) { return ctx.OpConvertSToF(ctx.F32[1], value); } @@ -126,6 +150,14 @@ Id EmitConvertF32S64(EmitContext& ctx, Id value) { return ctx.OpConvertSToF(ctx.F32[1], value); } +Id EmitConvertF32U8(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], ctx.OpUConvert(ctx.U8, value)); +} + +Id EmitConvertF32U16(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], ctx.OpUConvert(ctx.U16, value)); +} + Id EmitConvertF32U32(EmitContext& ctx, Id value) { return ctx.OpConvertUToF(ctx.F32[1], value); } @@ -134,6 +166,14 @@ Id EmitConvertF32U64(EmitContext& ctx, Id value) { return ctx.OpConvertUToF(ctx.F32[1], value); } +Id EmitConvertF64S8(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F64[1], ctx.OpUConvert(ctx.U8, value)); +} + +Id EmitConvertF64S16(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F64[1], ctx.OpUConvert(ctx.U16, value)); +} + Id EmitConvertF64S32(EmitContext& ctx, Id value) { return ctx.OpConvertSToF(ctx.F64[1], value); } @@ -142,6 +182,14 @@ Id EmitConvertF64S64(EmitContext& ctx, Id value) { return ctx.OpConvertSToF(ctx.F64[1], value); } +Id EmitConvertF64U8(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], ctx.OpUConvert(ctx.U8, value)); +} + +Id EmitConvertF64U16(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], ctx.OpUConvert(ctx.U16, value)); +} + Id EmitConvertF64U32(EmitContext& ctx, Id value) { return ctx.OpConvertUToF(ctx.F64[1], value); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index c9de204b0..a9c5e9cca 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -70,6 +70,10 @@ Id EmitIAbs32(EmitContext& ctx, Id value) { return ctx.OpSAbs(ctx.U32[1], value); } +Id EmitIAbs64(EmitContext& ctx, Id value) { + return ctx.OpSAbs(ctx.U64, value); +} + Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 672836c0b..652f6949e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -53,6 +53,10 @@ U64 IREmitter::Imm64(u64 value) const { return U64{Value{value}}; } +U64 IREmitter::Imm64(s64 value) const { + return U64{Value{static_cast(value)}}; +} + F64 IREmitter::Imm64(f64 value) const { return F64{Value{value}}; } @@ -363,7 +367,7 @@ U1 IREmitter::GetSparseFromOp(const Value& op) { } F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { - if (a.Type() != a.Type()) { + if (a.Type() != b.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); } switch (a.Type()) { @@ -974,8 +978,15 @@ U32U64 IREmitter::INeg(const U32U64& value) { } } -U32 IREmitter::IAbs(const U32& value) { - return Inst(Opcode::IAbs32, value); +U32U64 IREmitter::IAbs(const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::IAbs32, value); + case Type::U64: + return Inst(Opcode::IAbs64, value); + default: + ThrowInvalidType(value.Type()); + } } U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) { @@ -1074,8 +1085,25 @@ U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { return Inst(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); } -U1 IREmitter::IEqual(const U32& lhs, const U32& rhs) { - return Inst(Opcode::IEqual, lhs, rhs); +U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::U32: + return Inst(Opcode::IEqual, lhs, rhs); + case Type::U64: { + // Manually compare the unpacked values + const Value lhs_vector{UnpackUint2x32(lhs)}; + const Value rhs_vector{UnpackUint2x32(rhs)}; + return LogicalAnd(IEqual(IR::U32{CompositeExtract(lhs_vector, 0)}, + IR::U32{CompositeExtract(rhs_vector, 0)}), + IEqual(IR::U32{CompositeExtract(lhs_vector, 1)}, + IR::U32{CompositeExtract(rhs_vector, 1)})); + } + default: + ThrowInvalidType(lhs.Type()); + } } U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { @@ -1198,79 +1226,96 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v } } -F16F32F64 IREmitter::ConvertSToF(size_t bitsize, const U32U64& value) { - switch (bitsize) { +F16F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value) { + switch (dest_bitsize) { case 16: - switch (value.Type()) { - case Type::U32: + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF16S8, value); + case 16: + return Inst(Opcode::ConvertF16S16, value); + case 32: return Inst(Opcode::ConvertF16S32, value); - case Type::U64: + case 64: return Inst(Opcode::ConvertF16S64, value); - default: - ThrowInvalidType(value.Type()); } + break; case 32: - switch (value.Type()) { - case Type::U32: + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF32S8, value); + case 16: + return Inst(Opcode::ConvertF32S16, value); + case 32: return Inst(Opcode::ConvertF32S32, value); - case Type::U64: + case 64: return Inst(Opcode::ConvertF32S64, value); - default: - ThrowInvalidType(value.Type()); } + break; case 64: - switch (value.Type()) { - case Type::U32: - return Inst(Opcode::ConvertF64S32, value); - case Type::U64: - return Inst(Opcode::ConvertF64S64, value); - default: - ThrowInvalidType(value.Type()); + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF64S8, value); + case 16: + return Inst(Opcode::ConvertF64S16, value); + case 32: + return Inst(Opcode::ConvertF64S32, value); + case 64: + return Inst(Opcode::ConvertF64S64, value); } - default: - throw InvalidArgument("Invalid destination bitsize {}", bitsize); + break; } + throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize); } -F16F32F64 IREmitter::ConvertUToF(size_t bitsize, const U32U64& value) { - switch (bitsize) { +F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value) { + switch (dest_bitsize) { case 16: - switch (value.Type()) { - case Type::U32: + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF16U8, value); + case 16: + return Inst(Opcode::ConvertF16U16, value); + case 32: return Inst(Opcode::ConvertF16U32, value); - case Type::U64: + case 64: return Inst(Opcode::ConvertF16U64, value); - default: - ThrowInvalidType(value.Type()); } + break; case 32: - switch (value.Type()) { - case Type::U32: + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF32U8, value); + case 16: + return Inst(Opcode::ConvertF32U16, value); + case 32: return Inst(Opcode::ConvertF32U32, value); - case Type::U64: + case 64: return Inst(Opcode::ConvertF32U64, value); - default: - ThrowInvalidType(value.Type()); } + break; case 64: - switch (value.Type()) { - case Type::U32: - return Inst(Opcode::ConvertF64U32, value); - case Type::U64: - return Inst(Opcode::ConvertF64U64, value); - default: - ThrowInvalidType(value.Type()); + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF64U8, value); + case 16: + return Inst(Opcode::ConvertF64U16, value); + case 32: + return Inst(Opcode::ConvertF64U32, value); + case 64: + return Inst(Opcode::ConvertF64U64, value); } - default: - throw InvalidArgument("Invalid destination bitsize {}", bitsize); + break; } + throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize); } -F16F32F64 IREmitter::ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value) { +F16F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, + const Value& value) { if (is_signed) { - return ConvertSToF(bitsize, value); + return ConvertSToF(dest_bitsize, src_bitsize, value); } else { - return ConvertUToF(bitsize, value); + return ConvertUToF(dest_bitsize, src_bitsize, value); } } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 72af5db37..8edb11154 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -29,6 +29,7 @@ public: [[nodiscard]] U32 Imm32(s32 value) const; [[nodiscard]] F32 Imm32(f32 value) const; [[nodiscard]] U64 Imm64(u64 value) const; + [[nodiscard]] U64 Imm64(s64 value) const; [[nodiscard]] F64 Imm64(f64 value) const; void Branch(Block* label); @@ -170,7 +171,7 @@ public: [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] U32 IMul(const U32& a, const U32& b); [[nodiscard]] U32U64 INeg(const U32U64& value); - [[nodiscard]] U32 IAbs(const U32& value); + [[nodiscard]] U32U64 IAbs(const U32U64& value); [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift); [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift); [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift); @@ -193,7 +194,7 @@ public: [[nodiscard]] U32 UMax(const U32& a, const U32& b); [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); - [[nodiscard]] U1 IEqual(const U32& lhs, const U32& rhs); + [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs); [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs); @@ -207,9 +208,12 @@ public: [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); - [[nodiscard]] F16F32F64 ConvertSToF(size_t bitsize, const U32U64& value); - [[nodiscard]] F16F32F64 ConvertUToF(size_t bitsize, const U32U64& value); - [[nodiscard]] F16F32F64 ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value); + [[nodiscard]] F16F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize, + const Value& value); + [[nodiscard]] F16F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize, + const Value& value); + [[nodiscard]] F16F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, + const Value& value); [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 593faca52..8471db7b9 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -247,6 +247,7 @@ OPCODE(IMul32, U32, U32, OPCODE(INeg32, U32, U32, ) OPCODE(INeg64, U64, U64, ) OPCODE(IAbs32, U32, U32, ) +OPCODE(IAbs64, U64, U64, ) OPCODE(ShiftLeftLogical32, U32, U32, U32, ) OPCODE(ShiftLeftLogical64, U64, U64, U32, ) OPCODE(ShiftRightLogical32, U32, U32, U32, ) @@ -311,16 +312,28 @@ OPCODE(ConvertF16F32, F16, F32, OPCODE(ConvertF32F16, F32, F16, ) OPCODE(ConvertF32F64, F32, F64, ) OPCODE(ConvertF64F32, F64, F32, ) +OPCODE(ConvertF16S8, F16, U32, ) +OPCODE(ConvertF16S16, F16, U32, ) OPCODE(ConvertF16S32, F16, U32, ) OPCODE(ConvertF16S64, F16, U64, ) +OPCODE(ConvertF16U8, F16, U32, ) +OPCODE(ConvertF16U16, F16, U32, ) OPCODE(ConvertF16U32, F16, U32, ) OPCODE(ConvertF16U64, F16, U64, ) +OPCODE(ConvertF32S8, F32, U32, ) +OPCODE(ConvertF32S16, F32, U32, ) OPCODE(ConvertF32S32, F32, U32, ) OPCODE(ConvertF32S64, F32, U64, ) +OPCODE(ConvertF32U8, F32, U32, ) +OPCODE(ConvertF32U16, F32, U32, ) OPCODE(ConvertF32U32, F32, U32, ) OPCODE(ConvertF32U64, F32, U64, ) +OPCODE(ConvertF64S8, F64, U32, ) +OPCODE(ConvertF64S16, F64, U32, ) OPCODE(ConvertF64S32, F64, U32, ) OPCODE(ConvertF64S64, F64, U64, ) +OPCODE(ConvertF64U8, F64, U32, ) +OPCODE(ConvertF64U16, F64, U32, ) OPCODE(ConvertF64U32, F64, U32, ) OPCODE(ConvertF64U64, F64, U64, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index e444dcd4f..c9af83010 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -121,6 +121,22 @@ IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) { return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value)); } +IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) { + union { + u64 raw; + BitField<20, 1, u64> unaligned; + } const cbuf{insn}; + + if (cbuf.unaligned != 0) { + throw NotImplementedException("Unaligned packed constant buffer read"); + } + const auto [binding, lower_offset]{CbufAddr(insn)}; + const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)}; + const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)}; + const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)}; + return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value)); +} + IR::U32 TranslatorVisitor::GetImm20(u64 insn) { union { u64 raw; @@ -158,6 +174,11 @@ IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) { return ir.Imm64(Common::BitCast(value | sign_bit)); } +IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) { + const s64 value{GetImm20(insn).U32()}; + return ir.Imm64(static_cast(static_cast(value) << 32)); +} + IR::U32 TranslatorVisitor::GetImm32(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index ed81d9c36..cb66cca25 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -356,10 +356,12 @@ public: [[nodiscard]] IR::U32 GetCbuf(u64 insn); [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn); + [[nodiscard]] IR::U64 GetPackedCbuf(u64 insn); [[nodiscard]] IR::U32 GetImm20(u64 insn); [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn); + [[nodiscard]] IR::U64 GetPackedImm20(u64 insn); [[nodiscard]] IR::U32 GetImm32(u64 insn); [[nodiscard]] IR::F32 GetFloatImm32(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp new file mode 100644 index 000000000..e8b5ae1d2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -0,0 +1,173 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class FloatFormat : u64 { + F16 = 1, + F32 = 2, + F64 = 3, +}; + +enum class IntFormat : u64 { + U8 = 0, + U16 = 1, + U32 = 2, + U64 = 3, +}; + +union Encoding { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 2, FloatFormat> float_format; + BitField<10, 2, IntFormat> int_format; + BitField<13, 1, u64> is_signed; + BitField<39, 2, FpRounding> fp_rounding; + BitField<41, 2, u64> selector; + BitField<47, 1, u64> cc; + BitField<45, 1, u64> neg; + BitField<49, 1, u64> abs; +}; + +bool Is64(u64 insn) { + return Encoding{insn}.int_format == IntFormat::U64; +} + +int BitSize(FloatFormat format) { + switch (format) { + case FloatFormat::F16: + return 16; + case FloatFormat::F32: + return 32; + case FloatFormat::F64: + return 64; + } + throw NotImplementedException("Invalid float format {}", format); +} + +IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) { + const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))}; + const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))}; + const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)}; + const IR::U1 is_least{v.ir.IEqual(value, least_value)}; + return IR::U32{v.ir.Select(is_least, value, absolute)}; +} + +void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { + const Encoding i2f{insn}; + if (i2f.cc != 0) { + throw NotImplementedException("CC"); + } + const bool is_signed{i2f.is_signed != 0}; + int src_bitsize{}; + switch (i2f.int_format) { + case IntFormat::U8: + src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast(i2f.selector) * 8), + v.ir.Imm32(8), is_signed); + if (i2f.abs != 0) { + src = SmallAbs(v, src, 8); + } + src_bitsize = 8; + break; + case IntFormat::U16: + if (i2f.selector == 1 || i2f.selector == 3) { + throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value()); + } + src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast(i2f.selector) * 8), + v.ir.Imm32(16), is_signed); + if (i2f.abs != 0) { + src = SmallAbs(v, src, 16); + } + src_bitsize = 16; + break; + case IntFormat::U32: + case IntFormat::U64: + if (i2f.selector != 0) { + throw NotImplementedException("Unexpected selector {}", i2f.selector.Value()); + } + if (i2f.abs != 0 && is_signed) { + src = v.ir.IAbs(src); + } + src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32; + break; + } + const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32}; + const int dst_bitsize{BitSize(i2f.float_format)}; + IR::F16F32F64 value{v.ir.ConvertIToF(dst_bitsize, conversion_src_bitsize, is_signed, src)}; + if (i2f.neg != 0) { + if (i2f.abs != 0 || !is_signed) { + // We know the value is positive + value = v.ir.FPNeg(value); + } else { + // Only negate if the input isn't the lowest value + IR::U1 is_least; + if (src_bitsize == 64) { + is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits::min())); + } else { + const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))}; + is_least = v.ir.IEqual(src, least_value); + } + value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))}; + } + } + switch (i2f.float_format) { + case FloatFormat::F16: { + const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; + v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero))); + break; + } + case FloatFormat::F32: + v.F(i2f.dest_reg, value); + break; + case FloatFormat::F64: { + if (!IR::IsAligned(i2f.dest_reg, 2)) { + throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value()); + } + const IR::Value vector{v.ir.UnpackDouble2x32(value)}; + for (int i = 0; i < 2; ++i) { + v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, i)}); + } + break; + } + default: + throw NotImplementedException("Invalid float format {}", i2f.float_format.Value()); + } +} +} // Anonymous namespace + +void TranslatorVisitor::I2F_reg(u64 insn) { + if (Is64(insn)) { + union { + u64 raw; + BitField<20, 8, IR::Reg> reg; + } const value{insn}; + const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))}; + I2F(*this, insn, ir.PackUint2x32(regs)); + } else { + I2F(*this, insn, GetReg20(insn)); + } +} + +void TranslatorVisitor::I2F_cbuf(u64 insn) { + if (Is64(insn)) { + I2F(*this, insn, GetPackedCbuf(insn)); + } else { + I2F(*this, insn, GetCbuf(insn)); + } +} + +void TranslatorVisitor::I2F_imm(u64 insn) { + if (Is64(insn)) { + I2F(*this, insn, GetPackedImm20(insn)); + } else { + I2F(*this, insn, GetImm20(insn)); + } +} + +} // namespace Shader::Maxwell \ No newline at end of file diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a4367fc5a..4078feafa 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -241,18 +241,6 @@ void TranslatorVisitor::HSETP2_imm(u64) { ThrowNotImplemented(Opcode::HSETP2_imm); } -void TranslatorVisitor::I2F_reg(u64) { - ThrowNotImplemented(Opcode::I2F_reg); -} - -void TranslatorVisitor::I2F_cbuf(u64) { - ThrowNotImplemented(Opcode::I2F_cbuf); -} - -void TranslatorVisitor::I2F_imm(u64) { - ThrowNotImplemented(Opcode::I2F_imm); -} - void TranslatorVisitor::IDE(u64) { ThrowNotImplemented(Opcode::IDE); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 0fbb87ec4..b691b4d1f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -56,7 +56,7 @@ Shader::TextureType GetType(TextureType type, bool dc) { } IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { - const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }}; + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; switch (type) { case TextureType::_1D: return v.F(reg); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index 54f0df754..d5fda20f4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -65,7 +65,7 @@ IR::Value Composite(TranslatorVisitor& v, Args... regs) { } IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { - return v.ir.ConvertUToF(32, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16))); + return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16))); } IR::Value Sample(TranslatorVisitor& v, u64 insn) { diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index fbbe28632..e72505d61 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -79,6 +79,14 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ConvertU16F16: case IR::Opcode::ConvertU32F16: case IR::Opcode::ConvertU64F16: + case IR::Opcode::ConvertF16S8: + case IR::Opcode::ConvertF16S16: + case IR::Opcode::ConvertF16S32: + case IR::Opcode::ConvertF16S64: + case IR::Opcode::ConvertF16U8: + case IR::Opcode::ConvertF16U16: + case IR::Opcode::ConvertF16U32: + case IR::Opcode::ConvertF16U64: case IR::Opcode::FPAbs16: case IR::Opcode::FPAdd16: case IR::Opcode::FPCeil16: @@ -105,6 +113,14 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPRoundEven64: case IR::Opcode::FPSaturate64: case IR::Opcode::FPTrunc64: + case IR::Opcode::ConvertF64S8: + case IR::Opcode::ConvertF64S16: + case IR::Opcode::ConvertF64S32: + case IR::Opcode::ConvertF64S64: + case IR::Opcode::ConvertF64U8: + case IR::Opcode::ConvertF64U16: + case IR::Opcode::ConvertF64U32: + case IR::Opcode::ConvertF64U64: info.uses_fp64 = true; break; default: @@ -123,6 +139,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::WriteStorageU8: case IR::Opcode::WriteStorageS8: case IR::Opcode::SelectU8: + case IR::Opcode::ConvertF16S8: + case IR::Opcode::ConvertF16U8: + case IR::Opcode::ConvertF32S8: + case IR::Opcode::ConvertF32U8: + case IR::Opcode::ConvertF64S8: + case IR::Opcode::ConvertF64U8: info.uses_int8 = true; break; default: @@ -149,6 +171,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ConvertU16F16: case IR::Opcode::ConvertU16F32: case IR::Opcode::ConvertU16F64: + case IR::Opcode::ConvertF16S16: + case IR::Opcode::ConvertF16U16: + case IR::Opcode::ConvertF32S16: + case IR::Opcode::ConvertF32U16: + case IR::Opcode::ConvertF64S16: + case IR::Opcode::ConvertF64U16: info.uses_int16 = true; break; default: diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 74acb8bb6..baa3d22df 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -70,6 +70,22 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::Identity; case IR::Opcode::ConvertF16F32: return IR::Opcode::Identity; + case IR::Opcode::ConvertF16S8: + return IR::Opcode::ConvertF32S8; + case IR::Opcode::ConvertF16S16: + return IR::Opcode::ConvertF32S16; + case IR::Opcode::ConvertF16S32: + return IR::Opcode::ConvertF32S32; + case IR::Opcode::ConvertF16S64: + return IR::Opcode::ConvertF32S64; + case IR::Opcode::ConvertF16U8: + return IR::Opcode::ConvertF32U8; + case IR::Opcode::ConvertF16U16: + return IR::Opcode::ConvertF32U16; + case IR::Opcode::ConvertF16U32: + return IR::Opcode::ConvertF32U32; + case IR::Opcode::ConvertF16U64: + return IR::Opcode::ConvertF32U64; default: return op; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index c9da2080d..d1399a46d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -227,6 +227,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_} { const auto& float_control{device.FloatControlProperties()}; + const VkDriverIdKHR driver_id{device.GetDriverID()}; profile = Shader::Profile{ .unified_descriptor_binding = true, .support_float_controls = true, @@ -242,7 +243,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, .support_fp32_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, - .has_broken_spirv_clamp = true, // TODO: is_intel + .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, }; } -- cgit v1.2.3 From 7b03b9711815d0c4c39bb26f83ada9f6957bb269 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 04:08:58 -0300 Subject: spirv: Implement int8 and int16 conversion fallbacks --- .../backend/spirv/emit_spirv_convert.cpp | 99 +++++++++++++++++----- 1 file changed, 80 insertions(+), 19 deletions(-) (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp index 757165626..acb8957fe 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -5,17 +5,62 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" namespace Shader::Backend::SPIRV { +namespace { +Id ExtractU16(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U16, value); + } else { + return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u)); + } +} + +Id ExtractS16(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.S16, value); + } else { + return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u)); + } +} + +Id ExtractU8(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U8, value); + } else { + return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u)); + } +} + +Id ExtractS8(EmitContext& ctx, Id value) { + if (ctx.profile.support_int8) { + return ctx.OpSConvert(ctx.S8, value); + } else { + return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u)); + } +} +} // Anonymous namespace Id EmitConvertS16F16(EmitContext& ctx, Id value) { - return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + } else { + return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); + } } Id EmitConvertS16F32(EmitContext& ctx, Id value) { - return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + } else { + return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); + } } Id EmitConvertS16F64(EmitContext& ctx, Id value) { - return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + } else { + return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); + } } Id EmitConvertS32F16(EmitContext& ctx, Id value) { @@ -23,7 +68,11 @@ Id EmitConvertS32F16(EmitContext& ctx, Id value) { } Id EmitConvertS32F32(EmitContext& ctx, Id value) { - return ctx.OpConvertFToS(ctx.U32[1], value); + if (ctx.profile.has_broken_signed_operations) { + return ctx.OpBitcast(ctx.U32[1], ctx.OpConvertFToS(ctx.S32[1], value)); + } else { + return ctx.OpConvertFToS(ctx.U32[1], value); + } } Id EmitConvertS32F64(EmitContext& ctx, Id value) { @@ -43,15 +92,27 @@ Id EmitConvertS64F64(EmitContext& ctx, Id value) { } Id EmitConvertU16F16(EmitContext& ctx, Id value) { - return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); + } else { + return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value)); + } } Id EmitConvertU16F32(EmitContext& ctx, Id value) { - return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); + } else { + return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value)); + } } Id EmitConvertU16F64(EmitContext& ctx, Id value) { - return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); + } else { + return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value)); + } } Id EmitConvertU32F16(EmitContext& ctx, Id value) { @@ -103,11 +164,11 @@ Id EmitConvertF64F32(EmitContext& ctx, Id value) { } Id EmitConvertF16S8(EmitContext& ctx, Id value) { - return ctx.OpConvertSToF(ctx.F16[1], value); + return ctx.OpConvertSToF(ctx.F16[1], ExtractS8(ctx, value)); } Id EmitConvertF16S16(EmitContext& ctx, Id value) { - return ctx.OpConvertSToF(ctx.F16[1], value); + return ctx.OpConvertSToF(ctx.F16[1], ExtractS16(ctx, value)); } Id EmitConvertF16S32(EmitContext& ctx, Id value) { @@ -119,11 +180,11 @@ Id EmitConvertF16S64(EmitContext& ctx, Id value) { } Id EmitConvertF16U8(EmitContext& ctx, Id value) { - return ctx.OpConvertUToF(ctx.F16[1], value); + return ctx.OpConvertUToF(ctx.F16[1], ExtractU8(ctx, value)); } Id EmitConvertF16U16(EmitContext& ctx, Id value) { - return ctx.OpConvertUToF(ctx.F16[1], value); + return ctx.OpConvertUToF(ctx.F16[1], ExtractU16(ctx, value)); } Id EmitConvertF16U32(EmitContext& ctx, Id value) { @@ -135,11 +196,11 @@ Id EmitConvertF16U64(EmitContext& ctx, Id value) { } Id EmitConvertF32S8(EmitContext& ctx, Id value) { - return ctx.OpConvertSToF(ctx.F32[1], ctx.OpUConvert(ctx.U8, value)); + return ctx.OpConvertSToF(ctx.F32[1], ExtractS8(ctx, value)); } Id EmitConvertF32S16(EmitContext& ctx, Id value) { - return ctx.OpConvertSToF(ctx.F32[1], ctx.OpUConvert(ctx.U16, value)); + return ctx.OpConvertSToF(ctx.F32[1], ExtractS16(ctx, value)); } Id EmitConvertF32S32(EmitContext& ctx, Id value) { @@ -151,11 +212,11 @@ Id EmitConvertF32S64(EmitContext& ctx, Id value) { } Id EmitConvertF32U8(EmitContext& ctx, Id value) { - return ctx.OpConvertUToF(ctx.F32[1], ctx.OpUConvert(ctx.U8, value)); + return ctx.OpConvertUToF(ctx.F32[1], ExtractU8(ctx, value)); } Id EmitConvertF32U16(EmitContext& ctx, Id value) { - return ctx.OpConvertUToF(ctx.F32[1], ctx.OpUConvert(ctx.U16, value)); + return ctx.OpConvertUToF(ctx.F32[1], ExtractU16(ctx, value)); } Id EmitConvertF32U32(EmitContext& ctx, Id value) { @@ -167,11 +228,11 @@ Id EmitConvertF32U64(EmitContext& ctx, Id value) { } Id EmitConvertF64S8(EmitContext& ctx, Id value) { - return ctx.OpConvertSToF(ctx.F64[1], ctx.OpUConvert(ctx.U8, value)); + return ctx.OpConvertSToF(ctx.F64[1], ExtractS8(ctx, value)); } Id EmitConvertF64S16(EmitContext& ctx, Id value) { - return ctx.OpConvertSToF(ctx.F64[1], ctx.OpUConvert(ctx.U16, value)); + return ctx.OpConvertSToF(ctx.F64[1], ExtractS16(ctx, value)); } Id EmitConvertF64S32(EmitContext& ctx, Id value) { @@ -183,11 +244,11 @@ Id EmitConvertF64S64(EmitContext& ctx, Id value) { } Id EmitConvertF64U8(EmitContext& ctx, Id value) { - return ctx.OpConvertUToF(ctx.F64[1], ctx.OpUConvert(ctx.U8, value)); + return ctx.OpConvertUToF(ctx.F64[1], ExtractU8(ctx, value)); } Id EmitConvertF64U16(EmitContext& ctx, Id value) { - return ctx.OpConvertUToF(ctx.F64[1], ctx.OpUConvert(ctx.U16, value)); + return ctx.OpConvertUToF(ctx.F64[1], ExtractU16(ctx, value)); } Id EmitConvertF64U32(EmitContext& ctx, Id value) { -- cgit v1.2.3 From bed090807afd3364ed6ef18a031a0ffd95a1b89b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 3 May 2021 20:53:00 -0300 Subject: Move SPIR-V emission functions to their own header --- src/shader_recompiler/CMakeLists.txt | 2 + src/shader_recompiler/backend/bindings.h | 19 + src/shader_recompiler/backend/spirv/emit_context.h | 9 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 1 + src/shader_recompiler/backend/spirv/emit_spirv.h | 571 +------------------- .../backend/spirv/emit_spirv_atomic.cpp | 1 + .../backend/spirv/emit_spirv_barriers.cpp | 1 + .../spirv/emit_spirv_bitwise_conversion.cpp | 1 + .../backend/spirv/emit_spirv_composite.cpp | 1 + .../backend/spirv/emit_spirv_context_get_set.cpp | 1 + .../backend/spirv/emit_spirv_control_flow.cpp | 1 + .../backend/spirv/emit_spirv_convert.cpp | 1 + .../backend/spirv/emit_spirv_floating_point.cpp | 1 + .../backend/spirv/emit_spirv_image.cpp | 1 + .../backend/spirv/emit_spirv_image_atomic.cpp | 1 + .../backend/spirv/emit_spirv_instructions.h | 583 +++++++++++++++++++++ .../backend/spirv/emit_spirv_integer.cpp | 1 + .../backend/spirv/emit_spirv_logical.cpp | 1 + .../backend/spirv/emit_spirv_memory.cpp | 1 + .../backend/spirv/emit_spirv_select.cpp | 1 + .../backend/spirv/emit_spirv_shared_memory.cpp | 1 + .../backend/spirv/emit_spirv_special.cpp | 1 + .../backend/spirv/emit_spirv_undefined.cpp | 1 + .../backend/spirv/emit_spirv_warp.cpp | 1 + src/video_core/renderer_opengl/gl_shader_cache.cpp | 5 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 8 +- 26 files changed, 637 insertions(+), 579 deletions(-) create mode 100644 src/shader_recompiler/backend/bindings.h create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_instructions.h (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 0bcd714d6..6523615aa 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -1,4 +1,5 @@ add_library(shader_recompiler STATIC + backend/bindings.h backend/spirv/emit_context.cpp backend/spirv/emit_context.h backend/spirv/emit_spirv.cpp @@ -13,6 +14,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_floating_point.cpp backend/spirv/emit_spirv_image.cpp backend/spirv/emit_spirv_image_atomic.cpp + backend/spirv/emit_spirv_instructions.h backend/spirv/emit_spirv_integer.cpp backend/spirv/emit_spirv_logical.cpp backend/spirv/emit_spirv_memory.cpp diff --git a/src/shader_recompiler/backend/bindings.h b/src/shader_recompiler/backend/bindings.h new file mode 100644 index 000000000..35503000c --- /dev/null +++ b/src/shader_recompiler/backend/bindings.h @@ -0,0 +1,19 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Shader::Backend { + +struct Bindings { + u32 unified{}; + u32 uniform_buffer{}; + u32 storage_buffer{}; + u32 texture{}; + u32 image{}; +}; + +} // namespace Shader::Backend diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 30b08104d..8b000f1ec 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -9,6 +9,7 @@ #include +#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" #include "shader_recompiler/shader_info.h" @@ -17,14 +18,6 @@ namespace Shader::Backend::SPIRV { using Sirit::Id; -struct Bindings { - u32 unified{}; - u32 uniform_buffer{}; - u32 storage_buffer{}; - u32 texture{}; - u32 image{}; -}; - class VectorTypes { public: void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 3f9adc902..0681dfd16 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -9,6 +9,7 @@ #include #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/program.h" diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 47d62b190..d8ab2d8ed 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -4,9 +4,12 @@ #pragma once +#include + #include #include "common/common_types.h" +#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/spirv/emit_context.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" @@ -16,569 +19,9 @@ namespace Shader::Backend::SPIRV { [[nodiscard]] std::vector EmitSPIRV(const Profile& profile, IR::Program& program, Bindings& binding); -// Microinstruction emitters -Id EmitPhi(EmitContext& ctx, IR::Inst* inst); -void EmitVoid(EmitContext& ctx); -Id EmitIdentity(EmitContext& ctx, const IR::Value& value); -void EmitBranch(EmitContext& ctx, Id label); -void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label); -void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); -void EmitSelectionMerge(EmitContext& ctx, Id merge_label); -void EmitReturn(EmitContext& ctx); -void EmitJoin(EmitContext& ctx); -void EmitUnreachable(EmitContext& ctx); -void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); -void EmitBarrier(EmitContext& ctx); -void EmitWorkgroupMemoryBarrier(EmitContext& ctx); -void EmitDeviceMemoryBarrier(EmitContext& ctx); -void EmitPrologue(EmitContext& ctx); -void EmitEpilogue(EmitContext& ctx); -void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); -void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); -void EmitGetRegister(EmitContext& ctx); -void EmitSetRegister(EmitContext& ctx); -void EmitGetPred(EmitContext& ctx); -void EmitSetPred(EmitContext& ctx); -void EmitSetGotoVariable(EmitContext& ctx); -void EmitGetGotoVariable(EmitContext& ctx); -void EmitSetIndirectBranchVariable(EmitContext& ctx); -void EmitGetIndirectBranchVariable(EmitContext& ctx); -Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); -Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); -void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); -Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); -void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); -void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); -void EmitSetSampleMask(EmitContext& ctx, Id value); -void EmitSetFragDepth(EmitContext& ctx, Id value); -void EmitGetZFlag(EmitContext& ctx); -void EmitGetSFlag(EmitContext& ctx); -void EmitGetCFlag(EmitContext& ctx); -void EmitGetOFlag(EmitContext& ctx); -void EmitSetZFlag(EmitContext& ctx); -void EmitSetSFlag(EmitContext& ctx); -void EmitSetCFlag(EmitContext& ctx); -void EmitSetOFlag(EmitContext& ctx); -Id EmitWorkgroupId(EmitContext& ctx); -Id EmitLocalInvocationId(EmitContext& ctx); -Id EmitInvocationId(EmitContext& ctx); -Id EmitSampleId(EmitContext& ctx); -Id EmitIsHelperInvocation(EmitContext& ctx); -Id EmitYDirection(EmitContext& ctx); -Id EmitLoadLocal(EmitContext& ctx, Id word_offset); -void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); -Id EmitUndefU1(EmitContext& ctx); -Id EmitUndefU8(EmitContext& ctx); -Id EmitUndefU16(EmitContext& ctx); -Id EmitUndefU32(EmitContext& ctx); -Id EmitUndefU64(EmitContext& ctx); -void EmitLoadGlobalU8(EmitContext& ctx); -void EmitLoadGlobalS8(EmitContext& ctx); -void EmitLoadGlobalU16(EmitContext& ctx); -void EmitLoadGlobalS16(EmitContext& ctx); -Id EmitLoadGlobal32(EmitContext& ctx, Id address); -Id EmitLoadGlobal64(EmitContext& ctx, Id address); -Id EmitLoadGlobal128(EmitContext& ctx, Id address); -void EmitWriteGlobalU8(EmitContext& ctx); -void EmitWriteGlobalS8(EmitContext& ctx); -void EmitWriteGlobalU16(EmitContext& ctx); -void EmitWriteGlobalS16(EmitContext& ctx); -void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value); -void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value); -void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value); -Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitLoadSharedU8(EmitContext& ctx, Id offset); -Id EmitLoadSharedS8(EmitContext& ctx, Id offset); -Id EmitLoadSharedU16(EmitContext& ctx, Id offset); -Id EmitLoadSharedS16(EmitContext& ctx, Id offset); -Id EmitLoadSharedU32(EmitContext& ctx, Id offset); -Id EmitLoadSharedU64(EmitContext& ctx, Id offset); -Id EmitLoadSharedU128(EmitContext& ctx, Id offset); -void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); -Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); -Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); -Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); -Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); -void EmitCompositeConstructF64x2(EmitContext& ctx); -void EmitCompositeConstructF64x3(EmitContext& ctx); -void EmitCompositeConstructF64x4(EmitContext& ctx); -void EmitCompositeExtractF64x2(EmitContext& ctx); -void EmitCompositeExtractF64x3(EmitContext& ctx); -void EmitCompositeExtractF64x4(EmitContext& ctx); -Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); -void EmitBitCastU16F16(EmitContext& ctx); -Id EmitBitCastU32F32(EmitContext& ctx, Id value); -void EmitBitCastU64F64(EmitContext& ctx); -void EmitBitCastF16U16(EmitContext& ctx); -Id EmitBitCastF32U32(EmitContext& ctx, Id value); -void EmitBitCastF64U64(EmitContext& ctx); -Id EmitPackUint2x32(EmitContext& ctx, Id value); -Id EmitUnpackUint2x32(EmitContext& ctx, Id value); -Id EmitPackFloat2x16(EmitContext& ctx, Id value); -Id EmitUnpackFloat2x16(EmitContext& ctx, Id value); -Id EmitPackHalf2x16(EmitContext& ctx, Id value); -Id EmitUnpackHalf2x16(EmitContext& ctx, Id value); -Id EmitPackDouble2x32(EmitContext& ctx, Id value); -Id EmitUnpackDouble2x32(EmitContext& ctx, Id value); -void EmitGetZeroFromOp(EmitContext& ctx); -void EmitGetSignFromOp(EmitContext& ctx); -void EmitGetCarryFromOp(EmitContext& ctx); -void EmitGetOverflowFromOp(EmitContext& ctx); -void EmitGetSparseFromOp(EmitContext& ctx); -void EmitGetInBoundsFromOp(EmitContext& ctx); -Id EmitFPAbs16(EmitContext& ctx, Id value); -Id EmitFPAbs32(EmitContext& ctx, Id value); -Id EmitFPAbs64(EmitContext& ctx, Id value); -Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPMax32(EmitContext& ctx, Id a, Id b); -Id EmitFPMax64(EmitContext& ctx, Id a, Id b); -Id EmitFPMin32(EmitContext& ctx, Id a, Id b); -Id EmitFPMin64(EmitContext& ctx, Id a, Id b); -Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPNeg16(EmitContext& ctx, Id value); -Id EmitFPNeg32(EmitContext& ctx, Id value); -Id EmitFPNeg64(EmitContext& ctx, Id value); -Id EmitFPSin(EmitContext& ctx, Id value); -Id EmitFPCos(EmitContext& ctx, Id value); -Id EmitFPExp2(EmitContext& ctx, Id value); -Id EmitFPLog2(EmitContext& ctx, Id value); -Id EmitFPRecip32(EmitContext& ctx, Id value); -Id EmitFPRecip64(EmitContext& ctx, Id value); -Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); -Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); -Id EmitFPSqrt(EmitContext& ctx, Id value); -Id EmitFPSaturate16(EmitContext& ctx, Id value); -Id EmitFPSaturate32(EmitContext& ctx, Id value); -Id EmitFPSaturate64(EmitContext& ctx, Id value); -Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value); -Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value); -Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value); -Id EmitFPRoundEven16(EmitContext& ctx, Id value); -Id EmitFPRoundEven32(EmitContext& ctx, Id value); -Id EmitFPRoundEven64(EmitContext& ctx, Id value); -Id EmitFPFloor16(EmitContext& ctx, Id value); -Id EmitFPFloor32(EmitContext& ctx, Id value); -Id EmitFPFloor64(EmitContext& ctx, Id value); -Id EmitFPCeil16(EmitContext& ctx, Id value); -Id EmitFPCeil32(EmitContext& ctx, Id value); -Id EmitFPCeil64(EmitContext& ctx, Id value); -Id EmitFPTrunc16(EmitContext& ctx, Id value); -Id EmitFPTrunc32(EmitContext& ctx, Id value); -Id EmitFPTrunc64(EmitContext& ctx, Id value); -Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPIsNan16(EmitContext& ctx, Id value); -Id EmitFPIsNan32(EmitContext& ctx, Id value); -Id EmitFPIsNan64(EmitContext& ctx, Id value); -Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitIAdd64(EmitContext& ctx, Id a, Id b); -Id EmitISub32(EmitContext& ctx, Id a, Id b); -Id EmitISub64(EmitContext& ctx, Id a, Id b); -Id EmitIMul32(EmitContext& ctx, Id a, Id b); -Id EmitINeg32(EmitContext& ctx, Id value); -Id EmitINeg64(EmitContext& ctx, Id value); -Id EmitIAbs32(EmitContext& ctx, Id value); -Id EmitIAbs64(EmitContext& ctx, Id value); -Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); -Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); -Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); -Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); -Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); -Id EmitBitReverse32(EmitContext& ctx, Id value); -Id EmitBitCount32(EmitContext& ctx, Id value); -Id EmitBitwiseNot32(EmitContext& ctx, Id value); -Id EmitFindSMsb32(EmitContext& ctx, Id value); -Id EmitFindUMsb32(EmitContext& ctx, Id value); -Id EmitSMin32(EmitContext& ctx, Id a, Id b); -Id EmitUMin32(EmitContext& ctx, Id a, Id b); -Id EmitSMax32(EmitContext& ctx, Id a, Id b); -Id EmitUMax32(EmitContext& ctx, Id a, Id b); -Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); -Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); -Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitGlobalAtomicIAdd32(EmitContext& ctx); -Id EmitGlobalAtomicSMin32(EmitContext& ctx); -Id EmitGlobalAtomicUMin32(EmitContext& ctx); -Id EmitGlobalAtomicSMax32(EmitContext& ctx); -Id EmitGlobalAtomicUMax32(EmitContext& ctx); -Id EmitGlobalAtomicInc32(EmitContext& ctx); -Id EmitGlobalAtomicDec32(EmitContext& ctx); -Id EmitGlobalAtomicAnd32(EmitContext& ctx); -Id EmitGlobalAtomicOr32(EmitContext& ctx); -Id EmitGlobalAtomicXor32(EmitContext& ctx); -Id EmitGlobalAtomicExchange32(EmitContext& ctx); -Id EmitGlobalAtomicIAdd64(EmitContext& ctx); -Id EmitGlobalAtomicSMin64(EmitContext& ctx); -Id EmitGlobalAtomicUMin64(EmitContext& ctx); -Id EmitGlobalAtomicSMax64(EmitContext& ctx); -Id EmitGlobalAtomicUMax64(EmitContext& ctx); -Id EmitGlobalAtomicInc64(EmitContext& ctx); -Id EmitGlobalAtomicDec64(EmitContext& ctx); -Id EmitGlobalAtomicAnd64(EmitContext& ctx); -Id EmitGlobalAtomicOr64(EmitContext& ctx); -Id EmitGlobalAtomicXor64(EmitContext& ctx); -Id EmitGlobalAtomicExchange64(EmitContext& ctx); -Id EmitGlobalAtomicAddF32(EmitContext& ctx); -Id EmitGlobalAtomicAddF16x2(EmitContext& ctx); -Id EmitGlobalAtomicAddF32x2(EmitContext& ctx); -Id EmitGlobalAtomicMinF16x2(EmitContext& ctx); -Id EmitGlobalAtomicMinF32x2(EmitContext& ctx); -Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx); -Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx); -Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); -Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); -Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); -Id EmitLogicalNot(EmitContext& ctx, Id value); -Id EmitConvertS16F16(EmitContext& ctx, Id value); -Id EmitConvertS16F32(EmitContext& ctx, Id value); -Id EmitConvertS16F64(EmitContext& ctx, Id value); -Id EmitConvertS32F16(EmitContext& ctx, Id value); -Id EmitConvertS32F32(EmitContext& ctx, Id value); -Id EmitConvertS32F64(EmitContext& ctx, Id value); -Id EmitConvertS64F16(EmitContext& ctx, Id value); -Id EmitConvertS64F32(EmitContext& ctx, Id value); -Id EmitConvertS64F64(EmitContext& ctx, Id value); -Id EmitConvertU16F16(EmitContext& ctx, Id value); -Id EmitConvertU16F32(EmitContext& ctx, Id value); -Id EmitConvertU16F64(EmitContext& ctx, Id value); -Id EmitConvertU32F16(EmitContext& ctx, Id value); -Id EmitConvertU32F32(EmitContext& ctx, Id value); -Id EmitConvertU32F64(EmitContext& ctx, Id value); -Id EmitConvertU64F16(EmitContext& ctx, Id value); -Id EmitConvertU64F32(EmitContext& ctx, Id value); -Id EmitConvertU64F64(EmitContext& ctx, Id value); -Id EmitConvertU64U32(EmitContext& ctx, Id value); -Id EmitConvertU32U64(EmitContext& ctx, Id value); -Id EmitConvertF16F32(EmitContext& ctx, Id value); -Id EmitConvertF32F16(EmitContext& ctx, Id value); -Id EmitConvertF32F64(EmitContext& ctx, Id value); -Id EmitConvertF64F32(EmitContext& ctx, Id value); -Id EmitConvertF16S8(EmitContext& ctx, Id value); -Id EmitConvertF16S16(EmitContext& ctx, Id value); -Id EmitConvertF16S32(EmitContext& ctx, Id value); -Id EmitConvertF16S64(EmitContext& ctx, Id value); -Id EmitConvertF16U8(EmitContext& ctx, Id value); -Id EmitConvertF16U16(EmitContext& ctx, Id value); -Id EmitConvertF16U32(EmitContext& ctx, Id value); -Id EmitConvertF16U64(EmitContext& ctx, Id value); -Id EmitConvertF32S8(EmitContext& ctx, Id value); -Id EmitConvertF32S16(EmitContext& ctx, Id value); -Id EmitConvertF32S32(EmitContext& ctx, Id value); -Id EmitConvertF32S64(EmitContext& ctx, Id value); -Id EmitConvertF32U8(EmitContext& ctx, Id value); -Id EmitConvertF32U16(EmitContext& ctx, Id value); -Id EmitConvertF32U32(EmitContext& ctx, Id value); -Id EmitConvertF32U64(EmitContext& ctx, Id value); -Id EmitConvertF64S8(EmitContext& ctx, Id value); -Id EmitConvertF64S16(EmitContext& ctx, Id value); -Id EmitConvertF64S32(EmitContext& ctx, Id value); -Id EmitConvertF64S64(EmitContext& ctx, Id value); -Id EmitConvertF64U8(EmitContext& ctx, Id value); -Id EmitConvertF64U16(EmitContext& ctx, Id value); -Id EmitConvertF64U32(EmitContext& ctx, Id value); -Id EmitConvertF64U64(EmitContext& ctx, Id value); -Id EmitBindlessImageSampleImplicitLod(EmitContext&); -Id EmitBindlessImageSampleExplicitLod(EmitContext&); -Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); -Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); -Id EmitBindlessImageGather(EmitContext&); -Id EmitBindlessImageGatherDref(EmitContext&); -Id EmitBindlessImageFetch(EmitContext&); -Id EmitBindlessImageQueryDimensions(EmitContext&); -Id EmitBindlessImageQueryLod(EmitContext&); -Id EmitBindlessImageGradient(EmitContext&); -Id EmitBindlessImageRead(EmitContext&); -Id EmitBindlessImageWrite(EmitContext&); -Id EmitBoundImageSampleImplicitLod(EmitContext&); -Id EmitBoundImageSampleExplicitLod(EmitContext&); -Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); -Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); -Id EmitBoundImageGather(EmitContext&); -Id EmitBoundImageGatherDref(EmitContext&); -Id EmitBoundImageFetch(EmitContext&); -Id EmitBoundImageQueryDimensions(EmitContext&); -Id EmitBoundImageQueryLod(EmitContext&); -Id EmitBoundImageGradient(EmitContext&); -Id EmitBoundImageRead(EmitContext&); -Id EmitBoundImageWrite(EmitContext&); -Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id bias_lc, const IR::Value& offset); -Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id lod_lc, const IR::Value& offset); -Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - Id coords, Id dref, Id bias_lc, const IR::Value& offset); -Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - Id coords, Id dref, Id lod_lc, const IR::Value& offset); -Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - const IR::Value& offset, const IR::Value& offset2); -Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - const IR::Value& offset, const IR::Value& offset2, Id dref); -Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, - Id lod, Id ms); -Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod); -Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); -Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id derivates, Id offset, Id lod_clamp); -Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); -void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); -Id EmitBindlessImageAtomicIAdd32(EmitContext&); -Id EmitBindlessImageAtomicSMin32(EmitContext&); -Id EmitBindlessImageAtomicUMin32(EmitContext&); -Id EmitBindlessImageAtomicSMax32(EmitContext&); -Id EmitBindlessImageAtomicUMax32(EmitContext&); -Id EmitBindlessImageAtomicInc32(EmitContext&); -Id EmitBindlessImageAtomicDec32(EmitContext&); -Id EmitBindlessImageAtomicAnd32(EmitContext&); -Id EmitBindlessImageAtomicOr32(EmitContext&); -Id EmitBindlessImageAtomicXor32(EmitContext&); -Id EmitBindlessImageAtomicExchange32(EmitContext&); -Id EmitBoundImageAtomicIAdd32(EmitContext&); -Id EmitBoundImageAtomicSMin32(EmitContext&); -Id EmitBoundImageAtomicUMin32(EmitContext&); -Id EmitBoundImageAtomicSMax32(EmitContext&); -Id EmitBoundImageAtomicUMax32(EmitContext&); -Id EmitBoundImageAtomicInc32(EmitContext&); -Id EmitBoundImageAtomicDec32(EmitContext&); -Id EmitBoundImageAtomicAnd32(EmitContext&); -Id EmitBoundImageAtomicOr32(EmitContext&); -Id EmitBoundImageAtomicXor32(EmitContext&); -Id EmitBoundImageAtomicExchange32(EmitContext&); -Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitLaneId(EmitContext& ctx); -Id EmitVoteAll(EmitContext& ctx, Id pred); -Id EmitVoteAny(EmitContext& ctx, Id pred); -Id EmitVoteEqual(EmitContext& ctx, Id pred); -Id EmitSubgroupBallot(EmitContext& ctx, Id pred); -Id EmitSubgroupEqMask(EmitContext& ctx); -Id EmitSubgroupLtMask(EmitContext& ctx); -Id EmitSubgroupLeMask(EmitContext& ctx); -Id EmitSubgroupGtMask(EmitContext& ctx); -Id EmitSubgroupGeMask(EmitContext& ctx); -Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, - Id segmentation_mask); -Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, - Id segmentation_mask); -Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, - Id segmentation_mask); -Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, - Id segmentation_mask); -Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle); - -Id EmitDPdxFine(EmitContext& ctx, Id op_a); - -Id EmitDPdyFine(EmitContext& ctx, Id op_a); - -Id EmitDPdxCoarse(EmitContext& ctx, Id op_a); - -Id EmitDPdyCoarse(EmitContext& ctx, Id op_a); +[[nodiscard]] inline std::vector EmitSPIRV(const Profile& profile, IR::Program& program) { + Bindings binding; + return EmitSPIRV(profile, program, binding); +} } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 6e17d1c7e..053800eb7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp index 705aebd81..e0b52a001 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index 93a45d834..bb11f4f4e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index 079e226de..10ff4ecab 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index ef32184ea..8e57ff070 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -6,6 +6,7 @@ #include #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index b4a6fbb93..6154c46be 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp index acb8957fe..fd74e475f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index b3afbef25..61cf25f9c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 6680cf1b3..5832104df 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -5,6 +5,7 @@ #include #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp index 05bed22b9..d7f1a365a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h new file mode 100644 index 000000000..b5eec3cd1 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -0,0 +1,583 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/common_types.h" + +namespace IR { +enum class Attribute : u64; +enum class Patch : u64; +class Inst; +class Value; +} // namespace IR + +namespace Shader::Backend::SPIRV { + +using Sirit::Id; + +class EmitContext; + +// Microinstruction emitters +Id EmitPhi(EmitContext& ctx, IR::Inst* inst); +void EmitVoid(EmitContext& ctx); +Id EmitIdentity(EmitContext& ctx, const IR::Value& value); +void EmitBranch(EmitContext& ctx, Id label); +void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label); +void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); +void EmitSelectionMerge(EmitContext& ctx, Id merge_label); +void EmitReturn(EmitContext& ctx); +void EmitJoin(EmitContext& ctx); +void EmitUnreachable(EmitContext& ctx); +void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); +void EmitBarrier(EmitContext& ctx); +void EmitWorkgroupMemoryBarrier(EmitContext& ctx); +void EmitDeviceMemoryBarrier(EmitContext& ctx); +void EmitPrologue(EmitContext& ctx); +void EmitEpilogue(EmitContext& ctx); +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); +void EmitGetRegister(EmitContext& ctx); +void EmitSetRegister(EmitContext& ctx); +void EmitGetPred(EmitContext& ctx); +void EmitSetPred(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +void EmitSetIndirectBranchVariable(EmitContext& ctx); +void EmitGetIndirectBranchVariable(EmitContext& ctx); +Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); +Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); +void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); +void EmitSetSampleMask(EmitContext& ctx, Id value); +void EmitSetFragDepth(EmitContext& ctx, Id value); +void EmitGetZFlag(EmitContext& ctx); +void EmitGetSFlag(EmitContext& ctx); +void EmitGetCFlag(EmitContext& ctx); +void EmitGetOFlag(EmitContext& ctx); +void EmitSetZFlag(EmitContext& ctx); +void EmitSetSFlag(EmitContext& ctx); +void EmitSetCFlag(EmitContext& ctx); +void EmitSetOFlag(EmitContext& ctx); +Id EmitWorkgroupId(EmitContext& ctx); +Id EmitLocalInvocationId(EmitContext& ctx); +Id EmitInvocationId(EmitContext& ctx); +Id EmitSampleId(EmitContext& ctx); +Id EmitIsHelperInvocation(EmitContext& ctx); +Id EmitYDirection(EmitContext& ctx); +Id EmitLoadLocal(EmitContext& ctx, Id word_offset); +void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); +Id EmitUndefU1(EmitContext& ctx); +Id EmitUndefU8(EmitContext& ctx); +Id EmitUndefU16(EmitContext& ctx); +Id EmitUndefU32(EmitContext& ctx); +Id EmitUndefU64(EmitContext& ctx); +void EmitLoadGlobalU8(EmitContext& ctx); +void EmitLoadGlobalS8(EmitContext& ctx); +void EmitLoadGlobalU16(EmitContext& ctx); +void EmitLoadGlobalS16(EmitContext& ctx); +Id EmitLoadGlobal32(EmitContext& ctx, Id address); +Id EmitLoadGlobal64(EmitContext& ctx, Id address); +Id EmitLoadGlobal128(EmitContext& ctx, Id address); +void EmitWriteGlobalU8(EmitContext& ctx); +void EmitWriteGlobalS8(EmitContext& ctx); +void EmitWriteGlobalU16(EmitContext& ctx); +void EmitWriteGlobalS16(EmitContext& ctx); +void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value); +void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value); +void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value); +Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitLoadSharedU8(EmitContext& ctx, Id offset); +Id EmitLoadSharedS8(EmitContext& ctx, Id offset); +Id EmitLoadSharedU16(EmitContext& ctx, Id offset); +Id EmitLoadSharedS16(EmitContext& ctx, Id offset); +Id EmitLoadSharedU32(EmitContext& ctx, Id offset); +Id EmitLoadSharedU64(EmitContext& ctx, Id offset); +Id EmitLoadSharedU128(EmitContext& ctx, Id offset); +void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); +Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +void EmitBitCastU16F16(EmitContext& ctx); +Id EmitBitCastU32F32(EmitContext& ctx, Id value); +void EmitBitCastU64F64(EmitContext& ctx); +void EmitBitCastF16U16(EmitContext& ctx); +Id EmitBitCastF32U32(EmitContext& ctx, Id value); +void EmitBitCastF64U64(EmitContext& ctx); +Id EmitPackUint2x32(EmitContext& ctx, Id value); +Id EmitUnpackUint2x32(EmitContext& ctx, Id value); +Id EmitPackFloat2x16(EmitContext& ctx, Id value); +Id EmitUnpackFloat2x16(EmitContext& ctx, Id value); +Id EmitPackHalf2x16(EmitContext& ctx, Id value); +Id EmitUnpackHalf2x16(EmitContext& ctx, Id value); +Id EmitPackDouble2x32(EmitContext& ctx, Id value); +Id EmitUnpackDouble2x32(EmitContext& ctx, Id value); +void EmitGetZeroFromOp(EmitContext& ctx); +void EmitGetSignFromOp(EmitContext& ctx); +void EmitGetCarryFromOp(EmitContext& ctx); +void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitGetSparseFromOp(EmitContext& ctx); +void EmitGetInBoundsFromOp(EmitContext& ctx); +Id EmitFPAbs16(EmitContext& ctx, Id value); +Id EmitFPAbs32(EmitContext& ctx, Id value); +Id EmitFPAbs64(EmitContext& ctx, Id value); +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPMax32(EmitContext& ctx, Id a, Id b); +Id EmitFPMax64(EmitContext& ctx, Id a, Id b); +Id EmitFPMin32(EmitContext& ctx, Id a, Id b); +Id EmitFPMin64(EmitContext& ctx, Id a, Id b); +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPNeg16(EmitContext& ctx, Id value); +Id EmitFPNeg32(EmitContext& ctx, Id value); +Id EmitFPNeg64(EmitContext& ctx, Id value); +Id EmitFPSin(EmitContext& ctx, Id value); +Id EmitFPCos(EmitContext& ctx, Id value); +Id EmitFPExp2(EmitContext& ctx, Id value); +Id EmitFPLog2(EmitContext& ctx, Id value); +Id EmitFPRecip32(EmitContext& ctx, Id value); +Id EmitFPRecip64(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); +Id EmitFPSqrt(EmitContext& ctx, Id value); +Id EmitFPSaturate16(EmitContext& ctx, Id value); +Id EmitFPSaturate32(EmitContext& ctx, Id value); +Id EmitFPSaturate64(EmitContext& ctx, Id value); +Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPRoundEven16(EmitContext& ctx, Id value); +Id EmitFPRoundEven32(EmitContext& ctx, Id value); +Id EmitFPRoundEven64(EmitContext& ctx, Id value); +Id EmitFPFloor16(EmitContext& ctx, Id value); +Id EmitFPFloor32(EmitContext& ctx, Id value); +Id EmitFPFloor64(EmitContext& ctx, Id value); +Id EmitFPCeil16(EmitContext& ctx, Id value); +Id EmitFPCeil32(EmitContext& ctx, Id value); +Id EmitFPCeil64(EmitContext& ctx, Id value); +Id EmitFPTrunc16(EmitContext& ctx, Id value); +Id EmitFPTrunc32(EmitContext& ctx, Id value); +Id EmitFPTrunc64(EmitContext& ctx, Id value); +Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPIsNan16(EmitContext& ctx, Id value); +Id EmitFPIsNan32(EmitContext& ctx, Id value); +Id EmitFPIsNan64(EmitContext& ctx, Id value); +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitIAdd64(EmitContext& ctx, Id a, Id b); +Id EmitISub32(EmitContext& ctx, Id a, Id b); +Id EmitISub64(EmitContext& ctx, Id a, Id b); +Id EmitIMul32(EmitContext& ctx, Id a, Id b); +Id EmitINeg32(EmitContext& ctx, Id value); +Id EmitINeg64(EmitContext& ctx, Id value); +Id EmitIAbs32(EmitContext& ctx, Id value); +Id EmitIAbs64(EmitContext& ctx, Id value); +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); +Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); +Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); +Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); +Id EmitBitReverse32(EmitContext& ctx, Id value); +Id EmitBitCount32(EmitContext& ctx, Id value); +Id EmitBitwiseNot32(EmitContext& ctx, Id value); +Id EmitFindSMsb32(EmitContext& ctx, Id value); +Id EmitFindUMsb32(EmitContext& ctx, Id value); +Id EmitSMin32(EmitContext& ctx, Id a, Id b); +Id EmitUMin32(EmitContext& ctx, Id a, Id b); +Id EmitSMax32(EmitContext& ctx, Id a, Id b); +Id EmitUMax32(EmitContext& ctx, Id a, Id b); +Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); +Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); +Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitGlobalAtomicIAdd32(EmitContext& ctx); +Id EmitGlobalAtomicSMin32(EmitContext& ctx); +Id EmitGlobalAtomicUMin32(EmitContext& ctx); +Id EmitGlobalAtomicSMax32(EmitContext& ctx); +Id EmitGlobalAtomicUMax32(EmitContext& ctx); +Id EmitGlobalAtomicInc32(EmitContext& ctx); +Id EmitGlobalAtomicDec32(EmitContext& ctx); +Id EmitGlobalAtomicAnd32(EmitContext& ctx); +Id EmitGlobalAtomicOr32(EmitContext& ctx); +Id EmitGlobalAtomicXor32(EmitContext& ctx); +Id EmitGlobalAtomicExchange32(EmitContext& ctx); +Id EmitGlobalAtomicIAdd64(EmitContext& ctx); +Id EmitGlobalAtomicSMin64(EmitContext& ctx); +Id EmitGlobalAtomicUMin64(EmitContext& ctx); +Id EmitGlobalAtomicSMax64(EmitContext& ctx); +Id EmitGlobalAtomicUMax64(EmitContext& ctx); +Id EmitGlobalAtomicInc64(EmitContext& ctx); +Id EmitGlobalAtomicDec64(EmitContext& ctx); +Id EmitGlobalAtomicAnd64(EmitContext& ctx); +Id EmitGlobalAtomicOr64(EmitContext& ctx); +Id EmitGlobalAtomicXor64(EmitContext& ctx); +Id EmitGlobalAtomicExchange64(EmitContext& ctx); +Id EmitGlobalAtomicAddF32(EmitContext& ctx); +Id EmitGlobalAtomicAddF16x2(EmitContext& ctx); +Id EmitGlobalAtomicAddF32x2(EmitContext& ctx); +Id EmitGlobalAtomicMinF16x2(EmitContext& ctx); +Id EmitGlobalAtomicMinF32x2(EmitContext& ctx); +Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx); +Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx); +Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); +Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); +Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); +Id EmitLogicalNot(EmitContext& ctx, Id value); +Id EmitConvertS16F16(EmitContext& ctx, Id value); +Id EmitConvertS16F32(EmitContext& ctx, Id value); +Id EmitConvertS16F64(EmitContext& ctx, Id value); +Id EmitConvertS32F16(EmitContext& ctx, Id value); +Id EmitConvertS32F32(EmitContext& ctx, Id value); +Id EmitConvertS32F64(EmitContext& ctx, Id value); +Id EmitConvertS64F16(EmitContext& ctx, Id value); +Id EmitConvertS64F32(EmitContext& ctx, Id value); +Id EmitConvertS64F64(EmitContext& ctx, Id value); +Id EmitConvertU16F16(EmitContext& ctx, Id value); +Id EmitConvertU16F32(EmitContext& ctx, Id value); +Id EmitConvertU16F64(EmitContext& ctx, Id value); +Id EmitConvertU32F16(EmitContext& ctx, Id value); +Id EmitConvertU32F32(EmitContext& ctx, Id value); +Id EmitConvertU32F64(EmitContext& ctx, Id value); +Id EmitConvertU64F16(EmitContext& ctx, Id value); +Id EmitConvertU64F32(EmitContext& ctx, Id value); +Id EmitConvertU64F64(EmitContext& ctx, Id value); +Id EmitConvertU64U32(EmitContext& ctx, Id value); +Id EmitConvertU32U64(EmitContext& ctx, Id value); +Id EmitConvertF16F32(EmitContext& ctx, Id value); +Id EmitConvertF32F16(EmitContext& ctx, Id value); +Id EmitConvertF32F64(EmitContext& ctx, Id value); +Id EmitConvertF64F32(EmitContext& ctx, Id value); +Id EmitConvertF16S8(EmitContext& ctx, Id value); +Id EmitConvertF16S16(EmitContext& ctx, Id value); +Id EmitConvertF16S32(EmitContext& ctx, Id value); +Id EmitConvertF16S64(EmitContext& ctx, Id value); +Id EmitConvertF16U8(EmitContext& ctx, Id value); +Id EmitConvertF16U16(EmitContext& ctx, Id value); +Id EmitConvertF16U32(EmitContext& ctx, Id value); +Id EmitConvertF16U64(EmitContext& ctx, Id value); +Id EmitConvertF32S8(EmitContext& ctx, Id value); +Id EmitConvertF32S16(EmitContext& ctx, Id value); +Id EmitConvertF32S32(EmitContext& ctx, Id value); +Id EmitConvertF32S64(EmitContext& ctx, Id value); +Id EmitConvertF32U8(EmitContext& ctx, Id value); +Id EmitConvertF32U16(EmitContext& ctx, Id value); +Id EmitConvertF32U32(EmitContext& ctx, Id value); +Id EmitConvertF32U64(EmitContext& ctx, Id value); +Id EmitConvertF64S8(EmitContext& ctx, Id value); +Id EmitConvertF64S16(EmitContext& ctx, Id value); +Id EmitConvertF64S32(EmitContext& ctx, Id value); +Id EmitConvertF64S64(EmitContext& ctx, Id value); +Id EmitConvertF64U8(EmitContext& ctx, Id value); +Id EmitConvertF64U16(EmitContext& ctx, Id value); +Id EmitConvertF64U32(EmitContext& ctx, Id value); +Id EmitConvertF64U64(EmitContext& ctx, Id value); +Id EmitBindlessImageSampleImplicitLod(EmitContext&); +Id EmitBindlessImageSampleExplicitLod(EmitContext&); +Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); +Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); +Id EmitBindlessImageGather(EmitContext&); +Id EmitBindlessImageGatherDref(EmitContext&); +Id EmitBindlessImageFetch(EmitContext&); +Id EmitBindlessImageQueryDimensions(EmitContext&); +Id EmitBindlessImageQueryLod(EmitContext&); +Id EmitBindlessImageGradient(EmitContext&); +Id EmitBindlessImageRead(EmitContext&); +Id EmitBindlessImageWrite(EmitContext&); +Id EmitBoundImageSampleImplicitLod(EmitContext&); +Id EmitBoundImageSampleExplicitLod(EmitContext&); +Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); +Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); +Id EmitBoundImageGather(EmitContext&); +Id EmitBoundImageGatherDref(EmitContext&); +Id EmitBoundImageFetch(EmitContext&); +Id EmitBoundImageQueryDimensions(EmitContext&); +Id EmitBoundImageQueryLod(EmitContext&); +Id EmitBoundImageGradient(EmitContext&); +Id EmitBoundImageRead(EmitContext&); +Id EmitBoundImageWrite(EmitContext&); +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id bias_lc, const IR::Value& offset); +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id lod_lc, const IR::Value& offset); +Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id bias_lc, const IR::Value& offset); +Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id lod_lc, const IR::Value& offset); +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2); +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2, Id dref); +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, + Id lod, Id ms); +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod); +Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); +Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id derivates, Id offset, Id lod_clamp); +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); +Id EmitBindlessImageAtomicIAdd32(EmitContext&); +Id EmitBindlessImageAtomicSMin32(EmitContext&); +Id EmitBindlessImageAtomicUMin32(EmitContext&); +Id EmitBindlessImageAtomicSMax32(EmitContext&); +Id EmitBindlessImageAtomicUMax32(EmitContext&); +Id EmitBindlessImageAtomicInc32(EmitContext&); +Id EmitBindlessImageAtomicDec32(EmitContext&); +Id EmitBindlessImageAtomicAnd32(EmitContext&); +Id EmitBindlessImageAtomicOr32(EmitContext&); +Id EmitBindlessImageAtomicXor32(EmitContext&); +Id EmitBindlessImageAtomicExchange32(EmitContext&); +Id EmitBoundImageAtomicIAdd32(EmitContext&); +Id EmitBoundImageAtomicSMin32(EmitContext&); +Id EmitBoundImageAtomicUMin32(EmitContext&); +Id EmitBoundImageAtomicSMax32(EmitContext&); +Id EmitBoundImageAtomicUMax32(EmitContext&); +Id EmitBoundImageAtomicInc32(EmitContext&); +Id EmitBoundImageAtomicDec32(EmitContext&); +Id EmitBoundImageAtomicAnd32(EmitContext&); +Id EmitBoundImageAtomicOr32(EmitContext&); +Id EmitBoundImageAtomicXor32(EmitContext&); +Id EmitBoundImageAtomicExchange32(EmitContext&); +Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitLaneId(EmitContext& ctx); +Id EmitVoteAll(EmitContext& ctx, Id pred); +Id EmitVoteAny(EmitContext& ctx, Id pred); +Id EmitVoteEqual(EmitContext& ctx, Id pred); +Id EmitSubgroupBallot(EmitContext& ctx, Id pred); +Id EmitSubgroupEqMask(EmitContext& ctx); +Id EmitSubgroupLtMask(EmitContext& ctx); +Id EmitSubgroupLeMask(EmitContext& ctx); +Id EmitSubgroupGtMask(EmitContext& ctx); +Id EmitSubgroupGeMask(EmitContext& ctx); +Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle); +Id EmitDPdxFine(EmitContext& ctx, Id op_a); +Id EmitDPdyFine(EmitContext& ctx, Id op_a); +Id EmitDPdxCoarse(EmitContext& ctx, Id op_a); +Id EmitDPdyCoarse(EmitContext& ctx, Id op_a); + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 86e6a4f3b..06ab23b1d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp index bb434def2..b9a9500fc 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index a6a3f3351..37a66095f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -5,6 +5,7 @@ #include #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp index 0b45db45e..c5b4f4720 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp index 710d1cd25..9a79fc7a2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index d5430e905..ba948f3c9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp index 19b06dbe4..c9f469e90 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 239e2ecab..78b1e1ba7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c9ca1f005..6585817bc 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -254,7 +254,7 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( OGLProgram gl_program; gl_program.handle = glCreateProgram(); - Shader::Backend::SPIRV::Bindings binding; + Shader::Backend::Bindings binding; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; @@ -297,8 +297,7 @@ std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& p Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - Shader::Backend::SPIRV::Bindings binding; - const std::vector code{EmitSPIRV(profile, program, binding)}; + const std::vector code{EmitSPIRV(profile, program)}; OGLProgram gl_program; gl_program.handle = glCreateProgram(); AddShader(GL_COMPUTE_SHADER, gl_program.handle, code); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 30b71bdbc..a5edcd072 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -315,8 +315,9 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( std::array infos{}; std::array modules; - Shader::Backend::SPIRV::Bindings binding; - for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { + Shader::Backend::Bindings binding; + for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; + ++index) { if (key.unique_hashes[index] == 0) { continue; } @@ -388,8 +389,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - Shader::Backend::SPIRV::Bindings binding; - const std::vector code{EmitSPIRV(base_profile, program, binding)}; + const std::vector code{EmitSPIRV(base_profile, program)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { -- cgit v1.2.3 From 8554a644df7ad909e418f3e96016e95abc55712f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 11 Jun 2021 00:18:24 -0300 Subject: spirv/convert: Catch more broken signed operations on Nvidia OpenGL BitCast U32 to S32 before converting to float on drivers with broken signed operations. --- src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp index fd74e475f..2c4250a0c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -205,6 +205,9 @@ Id EmitConvertF32S16(EmitContext& ctx, Id value) { } Id EmitConvertF32S32(EmitContext& ctx, Id value) { + if (ctx.profile.has_broken_signed_operations) { + value = ctx.OpBitcast(ctx.S32[1], value); + } return ctx.OpConvertSToF(ctx.F32[1], value); } @@ -237,6 +240,9 @@ Id EmitConvertF64S16(EmitContext& ctx, Id value) { } Id EmitConvertF64S32(EmitContext& ctx, Id value) { + if (ctx.profile.has_broken_signed_operations) { + value = ctx.OpBitcast(ctx.S32[1], value); + } return ctx.OpConvertSToF(ctx.F64[1], value); } -- cgit v1.2.3 From d52bacf6f035ddbc4b2333953709cbd3993e4817 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 11 Jun 2021 01:11:59 -0400 Subject: spirv/convert: Catch more signed operations oversights The sign bit on integers of size < 32 was not properly preserved in casts --- src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp index 2c4250a0c..fd42b7a16 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -17,14 +17,14 @@ Id ExtractU16(EmitContext& ctx, Id value) { Id ExtractS16(EmitContext& ctx, Id value) { if (ctx.profile.support_int16) { - return ctx.OpUConvert(ctx.S16, value); + return ctx.OpSConvert(ctx.S16, value); } else { return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u)); } } Id ExtractU8(EmitContext& ctx, Id value) { - if (ctx.profile.support_int16) { + if (ctx.profile.support_int8) { return ctx.OpUConvert(ctx.U8, value); } else { return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u)); @@ -42,7 +42,7 @@ Id ExtractS8(EmitContext& ctx, Id value) { Id EmitConvertS16F16(EmitContext& ctx, Id value) { if (ctx.profile.support_int16) { - return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); } else { return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); } @@ -50,7 +50,7 @@ Id EmitConvertS16F16(EmitContext& ctx, Id value) { Id EmitConvertS16F32(EmitContext& ctx, Id value) { if (ctx.profile.support_int16) { - return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); } else { return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); } @@ -58,7 +58,7 @@ Id EmitConvertS16F32(EmitContext& ctx, Id value) { Id EmitConvertS16F64(EmitContext& ctx, Id value) { if (ctx.profile.support_int16) { - return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); } else { return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); } -- cgit v1.2.3