Skip to content

Commit c47baa3

Browse files
committed
Streamlined byte extraction by removing get()/advance() calls.
We also replace extract_until() with extract(), as the latter is more standard. There is no need to call advance() before extract(), which is simpler to use. Also, directly extract file headers into string buffers for simplicity.
1 parent 9e3cc44 commit c47baa3

4 files changed

Lines changed: 26 additions & 63 deletions

File tree

include/rds2cpp/parse_rda.hpp

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -59,35 +59,16 @@ RdaFile parse_rda(Reader_& reader, const ParseRdaOptions& options) {
5959

6060
RdaFile output;
6161

62-
// Reading the header first. This is the first and only time that
63-
// we need to do a src.valid() check, as we're using the current
64-
// position of the source; in all other cases, it can be assumed
65-
// that the source needs to be advance()'d before get().
6662
{
6763
try {
68-
if (!src.valid()) {
69-
throw empty_error();
70-
}
71-
72-
std::string header;
73-
header += as_char(src.get());
74-
for (int i = 0; i < 4; ++i) {
75-
if (!src.advance()) {
76-
throw empty_error();
77-
}
78-
header += as_char(src.get());
79-
}
64+
std::string header(5, '\0');
65+
quick_extract(src, 5, reinterpret_cast<unsigned char*>(header.data()));
8066
if (header != "RDX2\n" && header != "RDX3\n") {
81-
throw std::runtime_error("unsupported format are currently supported");
67+
throw std::runtime_error("only RDX2 or RDX3 formats are currently supported");
8268
}
8369

84-
header.clear();
85-
for (int i = 0; i < 2; ++i) {
86-
if (!src.advance()) {
87-
throw empty_error();
88-
}
89-
header += as_char(src.get());
90-
}
70+
header.resize(2);
71+
quick_extract(src, 2, reinterpret_cast<unsigned char*>(header.data()));
9172
if (header != "X\n") {
9273
throw std::runtime_error("only RDA files in XDR format are currently supported");
9374
}

include/rds2cpp/parse_rds.hpp

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -59,23 +59,10 @@ RdsFile parse_rds(Reader_& reader, const ParseRdsOptions& options) {
5959

6060
RdsFile output;
6161

62-
// Reading the header first. This is the first and only time that
63-
// we need to do a src.valid() check, as we're using the current
64-
// position of the source; in all other cases, it can be assumed
65-
// that the source needs to be advance()'d before get().
6662
{
6763
try {
68-
if (!src.valid()) {
69-
throw empty_error();
70-
}
71-
72-
std::string header;
73-
header += as_char(src.get());
74-
if (!src.advance()) {
75-
throw empty_error();
76-
}
77-
header += as_char(src.get());
78-
64+
std::string header(2, '\0');
65+
quick_extract(src, 2, reinterpret_cast<unsigned char*>(header.data()));
7966
if (header != "X\n") {
8067
throw std::runtime_error("only RDS files in XDR format are currently supported");
8168
}

include/rds2cpp/utils_parse.hpp

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,15 @@ inline void quick_extract(byteme::BufferedReader<unsigned char>& src, std::size_
2626
if (len == 0) {
2727
return;
2828
}
29-
if (!src.advance()) {
29+
if (!src.valid()) {
3030
throw empty_error();
3131
}
32-
auto extracted = src.extract_until(len, output);
33-
if (extracted < len) {
32+
auto extracted = src.extract(len, output);
33+
if (extracted.first < len) {
3434
throw empty_error();
3535
}
3636
}
3737

38-
inline char as_char(unsigned char val) {
39-
return *reinterpret_cast<const char*>(&val); // make sure we interpret this as a char.
40-
}
41-
4238
inline bool little_endian() {
4339
const std::uint32_t value = 1;
4440
auto lsb = reinterpret_cast<const unsigned char *>(&value);

tests/tests/testthat/test-other.R

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,25 @@
11
# library(testthat); library(rds2cpp); source("setup.R"); source("test-other.R")
22

33
test_that("length extraction works correctly", {
4-
# Remember this advances past the first byte.
5-
expect_identical(rds2cpp::parse_length(as.raw(c(0, 0, 0, 0, 16))), 16)
6-
expect_identical(rds2cpp::parse_length(as.raw(c(0, 0, 0, 255, 16))), 255 * 256 + 16)
7-
expect_identical(rds2cpp::parse_length(as.raw(c(0, 0, 10, 255, 16))), 10 * 256^2 + 255 * 256 + 16)
8-
expect_identical(rds2cpp::parse_length(as.raw(c(0, 5, 10, 255, 16))), 5 * 256^3 + 10 * 256^2 + 255 * 256 + 16)
9-
10-
expect_error(rds2cpp::parse_length(as.raw(c(0, 255, 0, 0, 0)), "should be non-negative"))
11-
expect_identical(rds2cpp::parse_length(as.raw(c(0, 255, 255, 255, 255, 0, 0, 0, 1, 0, 0, 0, 1))), 2^32 + 1)
12-
expect_identical(rds2cpp::parse_length(as.raw(c(0, 255, 255, 255, 255, 0, 2, 0, 4, 0, 6, 0, 8))), 2 * 256^ 6 + 4 * 256^4 + 6 * 256^2 + 8)
4+
expect_identical(rds2cpp::parse_length(as.raw(c(0, 0, 0, 16))), 16)
5+
expect_identical(rds2cpp::parse_length(as.raw(c(0, 0, 255, 16))), 255 * 256 + 16)
6+
expect_identical(rds2cpp::parse_length(as.raw(c(0, 10, 255, 16))), 10 * 256^2 + 255 * 256 + 16)
7+
expect_identical(rds2cpp::parse_length(as.raw(c(5, 10, 255, 16))), 5 * 256^3 + 10 * 256^2 + 255 * 256 + 16)
8+
9+
expect_error(rds2cpp::parse_length(as.raw(c(255, 0, 0, 0)), "should be non-negative"))
10+
expect_identical(rds2cpp::parse_length(as.raw(c(255, 255, 255, 255, 0, 0, 0, 1, 0, 0, 0, 1))), 2^32 + 1)
11+
expect_identical(rds2cpp::parse_length(as.raw(c(255, 255, 255, 255, 0, 2, 0, 4, 0, 6, 0, 8))), 2 * 256^ 6 + 4 * 256^4 + 6 * 256^2 + 8)
1312
})
1413

1514
test_that("string extraction works correctly", {
16-
expect_identical(rds2cpp::parse_single_string(as.raw(c(0, 0, 0, 2^7, 9, 0, 0, 0, 5, charToRaw('ABCDE')))), list("ABCDE", "UTF-8"))
17-
expect_identical(rds2cpp::parse_single_string(as.raw(c(0, 0, 0, 2^6, 9, 0, 0, 0, 5, charToRaw('ABCDE')))), list("ABCDE", "latin1"))
18-
expect_identical(rds2cpp::parse_single_string(as.raw(c(0, 0, 2^2, 0, 9, 0, 0, 0, 5, charToRaw('ABCDE')))), list("ABCDE", "unknown"))
19-
expect_identical(rds2cpp::parse_single_string(as.raw(c(0, 0, 0, 2^5, 9, 0, 0, 0, 5, charToRaw('ABCDE')))), list("ABCDE", "bytes"))
20-
21-
expect_identical(rds2cpp::parse_single_string(as.raw(c(0, 0, 0, 2^7, 9, 0, 0, 0, 0))), list("", "UTF-8"))
22-
expect_identical(rds2cpp::parse_single_string(as.raw(c(0, 0, 0, 2^5, 9, 255, 255, 255, 255))), NA_character_)
23-
expect_error(rds2cpp::parse_single_string(as.raw(c(0, 0, 0, 2^5, 9, 255, 0, 0, 0))), "non-negative");
15+
expect_identical(rds2cpp::parse_single_string(as.raw(c(0, 0, 2^7, 9, 0, 0, 0, 5, charToRaw('ABCDE')))), list("ABCDE", "UTF-8"))
16+
expect_identical(rds2cpp::parse_single_string(as.raw(c(0, 0, 2^6, 9, 0, 0, 0, 5, charToRaw('ABCDE')))), list("ABCDE", "latin1"))
17+
expect_identical(rds2cpp::parse_single_string(as.raw(c(0, 2^2, 0, 9, 0, 0, 0, 5, charToRaw('ABCDE')))), list("ABCDE", "unknown"))
18+
expect_identical(rds2cpp::parse_single_string(as.raw(c(0, 0, 2^5, 9, 0, 0, 0, 5, charToRaw('ABCDE')))), list("ABCDE", "bytes"))
19+
20+
expect_identical(rds2cpp::parse_single_string(as.raw(c(0, 0, 2^7, 9, 0, 0, 0, 0))), list("", "UTF-8"))
21+
expect_identical(rds2cpp::parse_single_string(as.raw(c(0, 0, 2^5, 9, 255, 255, 255, 255))), NA_character_)
22+
expect_error(rds2cpp::parse_single_string(as.raw(c(0, 0, 2^5, 9, 255, 0, 0, 0))), "non-negative");
2423
})
2524

2625
test_that("RDS preamble extraction works correctly", {

0 commit comments

Comments
 (0)