Currently, some strange unoptimized code is appearing, and I suspect it's because I'm using a multi-.hlsl and .h compilation pattern similar to C++, where I compile a bunch of .dxil files first and then merge them into a single .dxil.
I feel that DXC often neglects optimization during the merging phase, simply performing a naive function insertion.
Is there any way to further optimize DXIL or enable an optimization level higher than -O3?
This is clearly a deterministic value.
tips:
The last member of this struct.Storage is a zero-length array. I've found that as long as the array size is zero, I can read and write to it normally. Furthermore, it avoids the issue where the compiler loads all its elements and then stores them back unchanged, even when the stored values are never used and a ret void immediately follows.
eg:
; Function Attrs: alwaysinline nounwind
define void @main() #3 {
%1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 6), align 4
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 7), align 4
%3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 8), align 4
%4 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 9), align 4
%5 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 10), align 4
%6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 11), align 4
%7 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 12), align 4
%8 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 13), align 4
%9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 14), align 4
%10 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 15), align 4
%11 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 16), align 4
%12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 17), align 4
%13 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 18), align 4
%14 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 19), align 4
%15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 20), align 4
%16 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 21), align 4
%17 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 22), align 4
%18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 23), align 4
%19 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 24), align 4
%20 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 25), align 4
%21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 26), align 4
%22 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 27), align 4
%23 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 28), align 4
%24 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 29), align 4
%25 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 30), align 4
%26 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 31), align 4
%27 = load %dx.types.Handle, %dx.types.Handle* getelementptr inbounds ([0 x %dx.types.Handle], [0 x %dx.types.Handle]* @"\01?LaunchMemory@@3PAURWByteAddressBuffer@@A", i32 0, i32 0), align 4, !noalias !18
%28 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %27) ; CreateHandleForLib(Resource)
%29 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %28, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer
call void @dx.op.rawBufferStore.i64(i32 140, %dx.types.Handle %29, i32 16, i32 undef, i64 4294967296, i64 0, i64 0, i64 undef, i8 7, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)
store i32 0, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 0), align 4
store i32 1, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 1), align 4
store i32 0, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 2), align 4
store i32 0, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 3), align 4
store i32 0, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 4), align 4
store i32 0, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 5), align 4
store i32 %1, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 6), align 4
store i32 %2, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 7), align 4
store i32 %3, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 8), align 4
store i32 %4, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 9), align 4
store i32 %5, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 10), align 4
store i32 %6, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 11), align 4
store i32 %7, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 12), align 4
store i32 %8, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 13), align 4
store i32 %9, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 14), align 4
store i32 %10, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 15), align 4
store i32 %11, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 16), align 4
store i32 %12, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 17), align 4
store i32 %13, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 18), align 4
store i32 %14, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 19), align 4
store i32 %15, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 20), align 4
store i32 %16, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 21), align 4
store i32 %17, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 22), align 4
store i32 %18, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 23), align 4
store i32 %19, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 24), align 4
store i32 %20, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 25), align 4
store i32 %21, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 26), align 4
store i32 %22, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 27), align 4
store i32 %23, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 28), align 4
store i32 %24, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 29), align 4
store i32 %25, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 30), align 4
store i32 %26, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @"D:\5CE\5CProject_VS\5CMindUnion\5CXV_WorkGraphsEngine\5CProjects\5CGPULauch\5CNode0.c9f50a3039c7a1db.dxi.LaunchMemory_GlobalInstance.4.0", i32 0, i32 31), align 4
ret void
}
Description
Currently, some strange unoptimized code is appearing, and I suspect it's because I'm using a multi-.hlsl and .h compilation pattern similar to C++, where I compile a bunch of .dxil files first and then merge them into a single .dxil.
I feel that DXC often neglects optimization during the merging phase, simply performing a naive function insertion.
Query
Is there any way to further optimize DXIL or enable an optimization level higher than -O3?
Steps to Reproduce
if need I will make an reproduce about it.
Actual Behavior
Because that
selectthe validation layer is throwing an error, stating that undef cannot be passed to rawBufferStore.Or
This is clearly a deterministic value.
tips:
The last member of this struct.Storage is a zero-length array. I've found that as long as the array size is zero, I can read and write to it normally. Furthermore, it avoids the issue where the compiler loads all its elements and then stores them back unchanged, even when the stored values are never used and a ret void immediately follows.
eg:
Environment