Skip to content

Commit 2c2a532

Browse files
MichaelLettrichchiarazampolli
authored andcommitted
[CTF] Fix 0 Bit Packing
A bug in `ctf::EncodedBlocks` caused data to be packed incorrectly. The problem occurs for sequences where all input values are identical, i.e. for each index i in an array, the value of i = v. In broken CTFs, metadata entries for `opt` is `PACK`,`alphabetRangeBits`, `min` and `max` are `0`. The payload is a single word, containing a value. During unpacking, no data is written into the decode buffer. * For incorrectly written CTFs the original data can be restored by writing the payload buffer `metadata.messageLength` times into the decodeBuffer. * For new CTFs where all input values are identical, no payload is written. Instead `metadata.alphabetRangeBits` is set to 0 and `metadata.min` contains the repeating value. * When unpacking this special case, `metadata.min` is written `messageLength` times. The `rans::pack` and `rans::unpack` functions contain additional guards that will throw a `PackingError`, in case it is requested to unpack 0 Bit wide blocks or pack data into 0 Bit wide blocks. (cherry picked from commit bb1d6da)
1 parent 099fa61 commit 2c2a532

File tree

3 files changed

+82
-26
lines changed

3 files changed

+82
-26
lines changed

DataFormats/Detectors/Common/include/DetectorsCommonDataFormats/EncodedBlocks.h

Lines changed: 61 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -284,8 +284,10 @@ struct Block {
284284
// resize block and free up unused buffer space.
285285
void realignBlock()
286286
{
287-
size_t sz = estimateSize(getNStored());
288-
registry->offsFreeStart = (reinterpret_cast<char*>(payload) - registry->head) + sz;
287+
if (payload) {
288+
size_t sz = estimateSize(getNStored());
289+
registry->offsFreeStart = (reinterpret_cast<char*>(payload) - registry->head) + sz;
290+
}
289291
}
290292

291293
/// store binary blob data (buffer filled from head to tail)
@@ -608,13 +610,17 @@ class EncodedBlocks
608610
using source_type = typename std::iterator_traits<input_IT>::value_type;
609611

610612
rans::Metrics<source_type> metrics{};
613+
metrics.getDatasetProperties().numSamples = std::distance(srcBegin, srcEnd);
611614

612-
const auto [minIter, maxIter] = std::minmax_element(srcBegin, srcEnd);
613-
if (minIter != maxIter) {
615+
if (metrics.getDatasetProperties().numSamples != 0) {
616+
const auto [minIter, maxIter] = std::minmax_element(srcBegin, srcEnd);
614617
metrics.getDatasetProperties().min = *minIter;
615618
metrics.getDatasetProperties().max = *maxIter;
616-
metrics.getDatasetProperties().alphabetRangeBits = rans::utils::getRangeBits(metrics.getDatasetProperties().min,
617-
metrics.getDatasetProperties().max);
619+
620+
// special case: if min === max, the range is 0 and the data can be reconstructed just via the metadata.
621+
metrics.getDatasetProperties().alphabetRangeBits =
622+
rans::utils::getRangeBits(metrics.getDatasetProperties().min,
623+
metrics.getDatasetProperties().max);
618624
}
619625

620626
return pack(srcBegin, srcEnd, slot, metrics, buffer);
@@ -638,7 +644,7 @@ class EncodedBlocks
638644
CTFIOSize decodeCopyImpl(dst_IT dest, int slot) const;
639645

640646
ClassDefNV(EncodedBlocks, 3);
641-
};
647+
}; // namespace ctf
642648

643649
///_____________________________________________________________________________
644650
/// read from tree to non-flat object
@@ -877,7 +883,6 @@ inline auto EncodedBlocks<H, N, W>::create(VD& v)
877883
template <typename H, int N, typename W>
878884
void EncodedBlocks<H, N, W>::print(const std::string& prefix, int verbosity) const
879885
{
880-
verbosity = 5;
881886
if (verbosity > 0) {
882887
LOG(info) << prefix << "Container of " << N << " blocks, size: " << size() << " bytes, unused: " << getFreeSize();
883888
for (int i = 0; i < N; i++) {
@@ -922,22 +927,26 @@ CTFIOSize EncodedBlocks<H, N, W>::decode(D_IT dest, // it
922927
const auto& ansVersion = getANSHeader();
923928
const auto& block = mBlocks[slot];
924929
const auto& md = mMetadata[slot];
925-
926-
if (!block.getNStored()) {
927-
return {0, md.getUncompressedSize(), md.getCompressedSize()};
928-
}
930+
LOGP(debug, "Slot{} | NStored={} Ndict={} nData={}, MD: messageLength:{} opt:{} min:{} max:{} offs:{} width:{} ", slot, block.getNStored(), block.getNDict(), block.getNData(), md.messageLength, (int)md.opt, md.min, md.max, md.literalsPackingOffset, md.literalsPackingWidth);
929931

930932
if (ansVersion == ANSVersionCompat) {
933+
if (!block.getNStored()) {
934+
return {0, md.getUncompressedSize(), md.getCompressedSize()};
935+
}
931936
if (md.opt == Metadata::OptStore::EENCODE) {
932937
return decodeCompatImpl(dest, slot, decoderExt);
933938
} else {
934939
return decodeCopyImpl(dest, slot);
935940
}
936941
} else if (ansVersion == ANSVersion1) {
942+
if (md.opt == Metadata::OptStore::PACK) {
943+
return decodeUnpackImpl(dest, slot);
944+
}
945+
if (!block.getNStored()) {
946+
return {0, md.getUncompressedSize(), md.getCompressedSize()};
947+
}
937948
if (md.opt == Metadata::OptStore::EENCODE) {
938949
return decodeRansV1Impl(dest, slot, decoderExt);
939-
} else if (md.opt == Metadata::OptStore::PACK) {
940-
return decodeUnpackImpl(dest, slot);
941950
} else {
942951
return decodeCopyImpl(dest, slot);
943952
}
@@ -1048,9 +1057,26 @@ CTFIOSize EncodedBlocks<H, N, W>::decodeUnpackImpl(dst_IT dest, int slot) const
10481057
const auto& block = mBlocks[slot];
10491058
const auto& md = mMetadata[slot];
10501059

1060+
const size_t messageLength = md.messageLength;
10511061
const size_t packingWidth = md.probabilityBits;
10521062
const dest_t offset = md.min;
1053-
rans::unpack(block.getData(), md.messageLength, dest, packingWidth, offset);
1063+
const auto* srcIt = block.getData();
1064+
// we have a vector of one and the same value. All information is in the metadata
1065+
if (packingWidth == 0) {
1066+
const dest_t value = [&]() {
1067+
// Bugfix: We tried packing values with a width of 0 Bits;
1068+
if (md.nDataWords > 0) {
1069+
return static_cast<dest_t>(*srcIt);
1070+
}
1071+
// normal case:
1072+
return offset;
1073+
}();
1074+
for (size_t i = 0; i < messageLength; ++i) {
1075+
*dest++ = value;
1076+
}
1077+
} else {
1078+
rans::unpack(srcIt, messageLength, dest, packingWidth, offset);
1079+
}
10541080
return {0, md.getUncompressedSize(), md.getCompressedSize()};
10551081
};
10561082

@@ -1440,20 +1466,29 @@ o2::ctf::CTFIOSize EncodedBlocks<H, N, W>::pack(const input_IT srcBegin, const i
14401466
using storageBuffer_t = W;
14411467
using input_t = typename std::iterator_traits<input_IT>::value_type;
14421468

1443-
const size_t messageLength = std::distance(srcBegin, srcEnd);
1444-
1445-
internal::Packer<input_t> packer{metrics};
1446-
size_t packingBufferWords = packer.template getPackingBufferSize<storageBuffer_t>(messageLength);
1447-
auto [thisBlock, thisMetadata] = expandStorage(slot, packingBufferWords, buffer);
1469+
const size_t messageLength = metrics.getDatasetProperties().numSamples;
1470+
const auto alphabetRangeBits = metrics.getDatasetProperties().alphabetRangeBits;
14481471

1449-
auto packedMessageEnd = packer.pack(srcBegin, srcEnd, thisBlock->getCreateData(), thisBlock->getEndOfBlock());
1450-
const size_t packeSize = std::distance(thisBlock->getCreateData(), packedMessageEnd);
1451-
thisBlock->setNData(packeSize);
1452-
thisBlock->realignBlock();
1472+
auto* thisBlock = &mBlocks[slot];
1473+
auto* thisMetadata = &mMetadata[slot];
1474+
size_t packedSize = 0;
14531475

1454-
LOGP(info, "StoreData {} bytes, offs: {}:{}", packeSize * sizeof(storageBuffer_t), thisBlock->getOffsData(), thisBlock->getOffsData() + packeSize * sizeof(storageBuffer_t));
1476+
if (messageLength == 0) {
1477+
*thisMetadata = detail::makeMetadataPack<input_t>(0, 0, 0, 0);
1478+
} else if (metrics.getDatasetProperties().alphabetRangeBits == 0) {
1479+
*thisMetadata = detail::makeMetadataPack<input_t>(messageLength, 0, *srcBegin, 0);
1480+
} else {
1481+
internal::Packer<input_t> packer{metrics};
1482+
size_t packingBufferWords = packer.template getPackingBufferSize<storageBuffer_t>(messageLength);
1483+
std::tie(thisBlock, thisMetadata) = expandStorage(slot, packingBufferWords, buffer);
1484+
auto packedMessageEnd = packer.pack(srcBegin, srcEnd, thisBlock->getCreateData(), thisBlock->getEndOfBlock());
1485+
packedSize = std::distance(thisBlock->getCreateData(), packedMessageEnd);
1486+
*thisMetadata = detail::makeMetadataPack<input_t>(messageLength, packer.getPackingWidth(), packer.getOffset(), packedSize);
1487+
thisBlock->setNData(packedSize);
1488+
thisBlock->realignBlock();
1489+
}
14551490

1456-
*thisMetadata = detail::makeMetadataPack<input_t>(messageLength, packer.getPackingWidth(), packer.getOffset(), packeSize);
1491+
LOGP(info, "StoreData {} bytes, offs: {}:{}", packedSize * sizeof(storageBuffer_t), thisBlock->getOffsData(), thisBlock->getOffsData() + packedSize * sizeof(storageBuffer_t));
14571492
return {0, thisMetadata->getUncompressedSize(), thisMetadata->getCompressedSize()};
14581493
};
14591494

Utilities/rANS/include/rANS/internal/common/exceptions.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,11 @@ class DecodingError : public CodingError
9292
using CodingError::CodingError;
9393
};
9494

95+
class PackingError : public CodingError
96+
{
97+
using CodingError::CodingError;
98+
};
99+
95100
} // namespace o2::rans
96101

97102
#endif /* RANS_INTERNAL_COMMON_EXCEPTIONS_ */

Utilities/rANS/include/rANS/internal/pack/pack.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,13 @@ inline constexpr BitPtr pack(const input_T* __restrict inputBegin, size_t extent
202202
assert(outputBegin != nullptr);
203203

204204
switch (packingWidth) {
205+
case 0:
206+
if (extent > 0) {
207+
throw PackingError("Cannot pack data into 0 Bit wide blocks");
208+
} else {
209+
return BitPtr(outputBegin);
210+
}
211+
break;
205212
case 1:
206213
return packStreamImpl<input_T, output_T, 1>(inputBegin, extent, outputBegin, offset);
207214
break;
@@ -342,6 +349,15 @@ inline void unpack(const input_T* __restrict inputBegin, size_t extent, output_I
342349
using namespace utils;
343350
using dst_type = typename std::iterator_traits<output_IT>::value_type;
344351

352+
// cannot unpack into 0 bits
353+
if (packingWidth == 0) {
354+
if (extent == 0) {
355+
return;
356+
} else {
357+
throw PackingError("Cannot unpack into 0 Bit wide data");
358+
}
359+
}
360+
345361
auto unpackImpl = [&](auto packer) {
346362
output_IT outputIt = outputBegin;
347363
BitPtr iter{inputBegin};

0 commit comments

Comments
 (0)