Skip to content
This repository was archived by the owner on Sep 15, 2025. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 2 additions & 0 deletions .typos.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,5 @@ dne = "dne"
offen = "offen"
varing = "varing"
Derivate = "Derivate"
TESE = "TESE"
SER = "SER"
8 changes: 7 additions & 1 deletion cmake/vkgc.cmake
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
##
#######################################################################################################################
#
# Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
# Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
Expand Down Expand Up @@ -38,6 +38,12 @@ if(LLPC_BUILD_GFX11)
endif()
#endif

#if LLPC_BUILD_GFX12
if(LLPC_BUILD_GFX12)
target_compile_definitions(vkgc_headers INTERFACE LLPC_BUILD_GFX12)
endif()
#endif

#if LLPC_RAY_TRACING
if(LLPC_RAY_TRACING)
if(NOT LLPC_IS_STANDALONE)
Expand Down
2 changes: 1 addition & 1 deletion compilerutils/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
##
#######################################################################################################################
#
# Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
# Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
Expand Down
2 changes: 1 addition & 1 deletion compilerutils/plugin/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
##
#######################################################################################################################
#
# Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
# Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
Expand Down
2 changes: 1 addition & 1 deletion compilerutils/test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
##
#######################################################################################################################
#
# Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
# Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
Expand Down
2 changes: 1 addition & 1 deletion gfxruntime/src/shaders/AdvancedBlend.hlsl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
***********************************************************************************************************************
*
* Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
* Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
Expand Down
85 changes: 78 additions & 7 deletions include/vkgcDefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,20 @@ struct CompileTimeConst {
} values; ///< The compile-time values for this slot.
};

#if LLPC_BUILD_GFX12
/// Handle temporal hint
enum TemporalHintOpType {
TemporalHintAtmWrite = 0,
TemporalHintImageRead = 4,
TemporalHintImageWrite = 8,
TemporalHintTessFactorWrite = 12,
TemporalHintTessRead = 16,
TemporalHintTessWrite = 20,
TemporalHintBufferRead = 24,
TemporalHintBufferWrite = 28,
};
#endif

/// Represents info of compile-time constants within a shader of a specified stage.
struct CompileConstInfo {
unsigned numCompileTimeConstants; ///< Number of compile time constants.
Expand Down Expand Up @@ -496,7 +510,11 @@ struct PipelineOptions {
bool reverseThreadGroup; ///< If set, enable thread group reversing
bool internalRtShaders; ///< Whether this pipeline has internal raytracing shaders
unsigned forceNonUniformResourceIndexStageMask; ///< Mask of the stage to force using non-uniform resource index.
#if LLPC_BUILD_GFX12
bool expertSchedulingMode;
#else
bool reserved16;
#endif

struct GLState {
bool replaceSetWithResourceType; ///< For OGL only, replace 'set' with resource type during spirv translate
Expand All @@ -519,14 +537,27 @@ struct PipelineOptions {
} glState;
const auto &getGlState() const { return glState; }

#if LLPC_BUILD_GFX12
unsigned cacheScopePolicyControl; ///< Control cache scope policy. attributes-through-memory read/write is
/// available.
#else
unsigned reserved20;
#endif
bool enablePrimGeneratedQuery; ///< If set, primitive generated query is enabled
bool disablePerCompFetch; ///< Disable per component fetch in uber fetch shader.
bool reserved21;
bool optimizePointSizeWrite; ///< If set, the write of PointSize in the last vertex processing stage will be
///< eliminated if the write value is 1.0.
CompileConstInfo *compileConstInfo; ///< Compile time constant data.
#if LLPC_BUILD_GFX12
unsigned temporalHintControl; ///< Override value for temporal hint. A load/store occupies 4 bits. The highest bit
/// of 4 bits marks whether to override temporal hint.
/// Arrange from the low bit to high bit in the following order:
/// TemporalHintAtmWrite,TemporalHintImageRead, TemporalHintImageWrite,
/// TemporalHintTessFactorWrite, TemporalHintTessRead, TemporalHintTessWrite
#else
unsigned reserved22;
#endif
bool padBufferSizeToNextDword; ///< Vulkan only, set if the driver rounds the buffer size up the next dword
};

Expand Down Expand Up @@ -776,6 +807,28 @@ inline unsigned compact32(ShaderHash hash) {
/// Represent a pipeline option which can be automatic as well as explicitly set.
enum InvariantLoads : unsigned { Auto = 0, EnableOptimization = 1, DisableOptimization = 2, ClearInvariants = 3 };

#if LLPC_BUILD_GFX12
/// Control cache policy: whether to use LLC (last level cache, aka set noAlloc).
struct CachePolicyLlc {
union NoAllocResource {
struct {
unsigned set : 5; ///< Resource set
unsigned binding : 16; ///< Resource binding
unsigned noAlloc : 1; ///< llc_noAlloc policy
unsigned : 10;
};
struct {
unsigned resourceId : 21; ///< Resource set
unsigned : 11;
};
unsigned u32All;
};

const unsigned *noAllocs; // Set for each resource.
unsigned resourceCount; // The count of resources
};
#endif

/// Represents per shader stage options.
struct PipelineShaderOptions {
ShaderHash clientHash; ///< Client-supplied unique shader hash. A value of zero indicates that LLPC should
Expand Down Expand Up @@ -918,6 +971,10 @@ struct PipelineShaderOptions {
/// Application workaround: forward propagate NoContraction decoration to any related FAdd operation.
bool forwardPropagateNoContract;

#if LLPC_BUILD_GFX12
/// Enable round-robin mode for waves in workgroup.
bool workgroupRoundRobin;
#endif
/// Binding ID offset of default uniform block
unsigned constantBufferBindingOffset;

Expand All @@ -931,6 +988,15 @@ struct PipelineShaderOptions {
/// will be assigned values as if they were decorated as DeviceIndex.
bool viewIndexFromDeviceIndex;

#if LLPC_BUILD_GFX12
/// Control LLC cache policy
CachePolicyLlc cachePolicyLlc;

/// Override value for temporal hint. A load/store occupies 4 bits. The highest bit of 4 bits marks whether to
/// override temporal hint.
unsigned temporalHintShaderControl;
#endif

/// Indicate whether the vertex shader is used by transform pipeline
bool enableTransformShader;

Expand Down Expand Up @@ -1471,13 +1537,18 @@ struct RayTracingPipelineBuildInfo {
unsigned pipelineLibStageMask; ///< Pipeline library stage mask
//@}

unsigned payloadSizeMaxInLib; ///< Pipeline library maxPayloadSize
unsigned attributeSizeMaxInLib; ///< Pipeline library maxAttributeSize
bool isReplay; ///< Pipeline is created for replaying
const void *pClientMetadata; ///< Pointer to (optional) client-defined data to be
/// stored inside the ELF
size_t clientMetadataSize; ///< Size (in bytes) of the client-defined data
unsigned cpsFlags; ///< Cps feature flags
unsigned payloadSizeMaxInLib; ///< Pipeline library maxPayloadSize
unsigned attributeSizeMaxInLib; ///< Pipeline library maxAttributeSize
bool isReplay; ///< Pipeline is created for replaying
const void *pClientMetadata; ///< Pointer to (optional) client-defined data to be
/// stored inside the ELF
size_t clientMetadataSize; ///< Size (in bytes) of the client-defined data
unsigned cpsFlags; ///< Cps feature flags
#if LLPC_BUILD_GFX12
bool disableDynamicVgpr; ///< Whether to disable dynamic VGPR mode for continuations. If not set, dVGPR mode is
/// enabled by default.
unsigned dynamicVgprBlockSize; ///< The size of the VGPR allocation granule used in dVGPR mode.
#endif
GpurtOption *pGpurtOptions; ///< Array of GPURT options
unsigned gpurtOptionCount; ///< Number of GPURT options
bool rtIgnoreDeclaredPayloadSize; ///< Ignore the declared payload size in the shader to address issues with Proton.
Expand Down
12 changes: 12 additions & 0 deletions lgc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,12 @@ target_sources(LLVMlgc PRIVATE
lowering/LowerRayQueryWrapper.cpp
)

#if LLPC_BUILD_GFX12
if(LLPC_BUILD_GFX12)
target_sources(LLVMlgc PRIVATE lowering/AddBufferOperationMetadata.cpp)
endif()
#endif

# include/lgc/lowering
target_sources(LLVMlgc PRIVATE
include/lgc/lowering/AddLoopMetadata.h
Expand Down Expand Up @@ -235,6 +241,12 @@ target_sources(LLVMlgc PRIVATE
include/lgc/lowering/WorkaroundDsSubdwordWrite.h
)

#if LLPC_BUILD_GFX12
if(LLPC_BUILD_GFX12)
target_sources(LLVMlgc PRIVATE include/lgc/lowering/AddBufferOperationMetadata.h)
endif()
#endif

# lgc/state
target_sources(LLVMlgc PRIVATE
state/Compiler.cpp
Expand Down
56 changes: 56 additions & 0 deletions lgc/builder/BuilderImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,51 @@ Type *BuilderBase::getConditionallyVectorizedTy(Type *elementTy, Type *maybeVecT
// @param vector2 : The float vector 2
// @param instName : Name to give instruction(s)
Value *BuilderImpl::CreateDotProduct(Value *const vector1, Value *const vector2, const Twine &instName) {
#if LLPC_BUILD_GFX12
if (getPipelineState()->getTargetInfo().getGfxIpVersion().major >= 12) {
// Use a chain of v_dot2_f16_f16/v_dot2_bf16_bf16 on gfx12+.
//
// Note: GFX11 has this instruction, but its precision doesn't satisfy Vulkan requirements.
//
// Note: GFX10 chips may have v_dot2_f32_f16, which we could consider generating in cases where bitexact results
// are not required.
//
// Note: v_dot2_f16_f16/v_dot2_bf16_bf16 only respects RTE mode according to HW spec. We must check the
// specified rounding mode before using it. Also, v_dot2_f16_f16/v_dot2_bf16_bf16 is not IEEE compliant
// so we must check NSZ as well.
const auto fp16RoundMode =
getPipelineState()->getShaderModes()->getCommonShaderMode(m_shaderStage.value()).fp16RoundMode;
const auto vectorTy = dyn_cast<FixedVectorType>(vector1->getType());
if (vectorTy && (vectorTy->getScalarSizeInBits() == 16) &&
(fp16RoundMode == FpRoundMode::DontCare || fp16RoundMode == FpRoundMode::Even) &&
getFastMathFlags().noSignedZeros()) {
int compCount = vectorTy->getNumElements();
Value *result = nullptr;
Type *basicType = getHalfTy();
Intrinsic::AMDGCNIntrinsics inst = Intrinsic::amdgcn_fdot2_f16_f16;
if (vectorTy->getScalarType()->isBFloatTy()) {
basicType = getBFloatTy();
inst = Intrinsic::amdgcn_fdot2_bf16_bf16;
}

if (compCount % 2 == 0) {
result = ConstantFP::get(basicType, 0.0);
} else {
// If the component count is odd, prefer feeding the last product (odd one out) as initial value.
Value *lhs = CreateExtractElement(vector1, compCount - 1);
Value *rhs = CreateExtractElement(vector2, compCount - 1);
result = CreateFMul(lhs, rhs);
}

for (int i = 0; i + 1 < compCount; i += 2) {
Value *lhs = CreateShuffleVector(vector1, {i, i + 1});
Value *rhs = CreateShuffleVector(vector2, {i, i + 1});
result = CreateIntrinsic(basicType, inst, {lhs, rhs, result});
}
return result;
}
}
#endif

Value *product = CreateFMul(vector1, vector2);
if (!isa<VectorType>(product->getType()))
Expand Down Expand Up @@ -254,6 +299,9 @@ Value *BuilderImpl::CreateIntegerDotProduct(Value *vector1, Value *vector2, Valu
bool BuilderImpl::supportWaveWideBPermute(ShaderStageEnum shaderStage) const {
auto gfxIp = getPipelineState()->getTargetInfo().getGfxIpVersion().major;
auto supportBPermute = gfxIp == 8 || gfxIp == 9;
#if LLPC_BUILD_GFX12
supportBPermute = supportBPermute || (gfxIp == 12);
#endif
auto waveSize = getPipelineState()->getShaderWaveSize(shaderStage);
supportBPermute = supportBPermute || waveSize == 32;
return supportBPermute;
Expand All @@ -265,6 +313,14 @@ bool BuilderImpl::supportPermLane64Dpp() const {
return getPipelineState()->getTargetInfo().getGfxIpVersion().major >= 11;
}

#if LLPC_BUILD_GFX12
// =====================================================================================================================
// Get whether the context we are building in supports permute lane var operations.
bool BuilderImpl::supportPermLaneVar() const {
return getPipelineState()->getTargetInfo().getGfxIpVersion().major >= 12;
}
#endif

// =====================================================================================================================
// Create an "if..endif" or "if..else..endif" structure. The current basic block becomes the "endif" block, and all
// instructions in that block before the insert point are moved to the "if" block. The insert point is moved to
Expand Down
11 changes: 10 additions & 1 deletion lgc/builder/DescBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,16 @@ Value *BuilderImpl::buildBufferCompactDesc(Value *desc, Value *stride) {
sqBufRsrcWord3.gfx11.format = BUF_FORMAT_32_UINT;
sqBufRsrcWord3.gfx11.oobSelect = stride ? 3 : 2;
assert(sqBufRsrcWord3.u32All == 0x20014FAC || sqBufRsrcWord3.u32All == 0x30014FAC);
} else {
}
#if LLPC_BUILD_GFX12
else if (gfxIp.major == 12) {
sqBufRsrcWord3.gfx12.format = BUF_FORMAT_32_UINT;
sqBufRsrcWord3.gfx12.compressionEn = 1;
sqBufRsrcWord3.gfx12.oobSelect = stride ? 3 : 2;
assert(sqBufRsrcWord3.u32All == 0x22014FAC || sqBufRsrcWord3.u32All == 0x32014FAC);
}
#endif
else {
llvm_unreachable("Not implemented!");
}
bufDesc = CreateInsertElement(bufDesc, getInt32(sqBufRsrcWord3.u32All), 3);
Expand Down
Loading
Loading