Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion zstd/zstdgpu/zstdgpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,7 @@ ZSTDGPU_ENUM(Status) zstdgpu_CreatePersistentContext(zstdgpu_PersistentContext *
// Nvidia
ZSTDGPU_KERNEL_MAP(DecompressLiterals, DecompressLiterals_LdsStoreCache32_16);
context->DecompressLiterals_LdsStoreCache_StreamsPerGroup = 16;
ZSTDGPU_KERNEL_MAP(DecompressSequences, DecompressSequences_LdsFseCache32);
ZSTDGPU_KERNEL_MAP(DecompressSequences, DecompressSequences_SingleStream_LdsFseCache32);
ZSTDGPU_KERNEL_MAP(ExecuteSequences, ExecuteSequences64);
}
else if (featureOptions1.WaveLaneCountMax == 128)
Expand Down
1 change: 0 additions & 1 deletion zstd/zstdgpu/zstdgpu_shaders.h
Original file line number Diff line number Diff line change
Expand Up @@ -3872,7 +3872,6 @@ static void zstdgpu_ShaderEntry_DecompressSequences_MultiStream_LdsOutCache(ZSTD
uint32_t stateOffs = ZSTDGPU_BACKWARD_BITBUF(GetNoRefill)(bitBuffer, initBitcntOffs);
uint32_t stateMLen = ZSTDGPU_BACKWARD_BITBUF(GetNoRefill)(bitBuffer, initBitcntMLen);

const uint32_t seqMemStart = seqRefDst.offs;
const uint32_t storeCacheThreadOffset = seqStreamIdxInGroup * cacheDwordsPerStream;

const uint32_t laneCnt = zstdgpu_MinU32(WaveGetLaneCount(), tgSize);
Expand Down
14 changes: 12 additions & 2 deletions zstd/zstdgpu_demo/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -891,6 +891,7 @@ int WINAPI wWinMain(_In_ HINSTANCE hInstance, _In_opt_ HINSTANCE, _In_ LPWSTR lp
bool nextPrfLevel = false;
bool nextMinFrame = false;
bool nextMaxFrame = false;
bool badArg = false;
for (argi = 1; argi < argc; ++argi)
{
if (nextZst)
Expand Down Expand Up @@ -988,8 +989,13 @@ int WINAPI wWinMain(_In_ HINSTANCE hInstance, _In_opt_ HINSTANCE, _In_ LPWSTR lp
{
nextMaxFrame = true;
}
else
{
debugPrint(L"Unknown argv[%d] %s\n", argi, argv[argi]);
badArg = true;
}
}
if (1 == argc)
if (1 == argc || badArg)
{
debugPrint(L"USAGE:\n");
debugPrint(L"\t--zst <path to .zst file> [Required] Specifies a file path to .zst file to decompress. Could be absolute or relative path.\n");
Expand All @@ -1004,6 +1010,10 @@ int WINAPI wWinMain(_In_ HINSTANCE hInstance, _In_opt_ HINSTANCE, _In_ LPWSTR lp
debugPrint(L"\t--ext-mem [Optional] Enables external heaps so the library doesn't create them.\n");
debugPrint(L"\t--prf-lvl <0, 1, 2> [Optional] Chooses the level of profiling: 0 - overall bandwidth in GB/s, 1 - stage cost, 2 - internal pass cost.\n");
debugPrint(L"\t--idx-{min,max} <number> [Optional] Chooses the {minimal, maximal} index of the frame to decompress in multi-frame .zst file. Both values are clamped to the number of available frames.\n");
if (badArg)
{
return 1;
}
}
if (NULL == zstFilePathStorage)
{
Expand Down Expand Up @@ -1206,7 +1216,7 @@ int WINAPI wWinMain(_In_ HINSTANCE hInstance, _In_opt_ HINSTANCE, _In_ LPWSTR lp
// NOTE(pamartis): This variable is needed to support '--ext-mem' demo mode supplying into zstdgpu library
// 'compressed' data and 'meta' (references) to zstd frames -- as pre-loaded into VMEM buffers
// TODO(pamartis): Expose this option as command line option
static const uint32_t testSourceInGpuMemory = 0u;
const volatile uint32_t testSourceInGpuMemory = 0u;

d3d12aid_MappedBuffer zstdCompressedFramesMemory;
d3d12aid_MappedBuffer zstdCompressedFramesRefs;
Expand Down