Skip to content

Commit f029fe7

Browse files
committed
Use PerByte::advance_and_extract() to copy byte chunks from the input.
This should be faster than a get()/advance() loop as it uses std::copy() under the hood. However, it does require upgrading to the latest version of byteme. Note that we don't do this for strings as we don't have a buffer we can safely copy the byte chunks into - std::string methods only expose const pointers.
1 parent c31af34 commit f029fe7

9 files changed

Lines changed: 30 additions & 63 deletions

File tree

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
cmake_minimum_required(VERSION 3.24)
22

33
project(rds2cpp
4-
VERSION 1.1.0
4+
VERSION 1.2.0
55
DESCRIPTION "Standalone C++ library for reading RDS files"
66
LANGUAGES CXX)
77

@@ -19,7 +19,7 @@ option(RDS2CPP_FETCH_EXTERN "Automatically fetch rds2cpp's external dependencies
1919
if(RDS2CPP_FETCH_EXTERN)
2020
add_subdirectory(extern)
2121
else()
22-
find_package(ltla_byteme CONFIG REQUIRED)
22+
find_package(ltla_byteme 2.0.1 CONFIG REQUIRED)
2323
endif()
2424

2525
target_link_libraries(rds2cpp INTERFACE ltla::byteme)

cmake/Config.cmake.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
@PACKAGE_INIT@
22

33
include(CMakeFindDependencyMacro)
4-
find_dependency(ltla_byteme CONFIG REQUIRED)
4+
find_dependency(ltla_byteme 2.0.1 CONFIG REQUIRED)
55

66
include("${CMAKE_CURRENT_LIST_DIR}/ltla_rds2cppTargets.cmake")

extern/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ include(FetchContent)
55
FetchContent_Declare(
66
byteme
77
GIT_REPOSITORY https://github.com/LTLA/byteme
8-
GIT_TAG master
8+
GIT_TAG master # ^2.0.1
99
)
1010

1111
FetchContent_MakeAvailable(byteme)

include/rds2cpp/parse_atomic.hpp

Lines changed: 4 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,8 @@ Vector parse_integer_or_logical_body(Source_& src) {
2121
constexpr size_t width = 4;
2222
static_assert(width == sizeof(decltype(output.data[0])));
2323
size_t byte_length = width * len;
24-
2524
auto ptr = reinterpret_cast<unsigned char*>(output.data.data());
26-
for (size_t i = 0; i < byte_length; ++i) {
27-
if (!src.advance()) {
28-
throw empty_error();
29-
}
30-
ptr[i] = src.get();
31-
}
25+
quick_extract(src, byte_length, ptr);
3226

3327
// Flipping endianness.
3428
if (little_endian()) {
@@ -65,14 +59,8 @@ DoubleVector parse_double_body(Source_& src) try {
6559
constexpr size_t width = 8;
6660
static_assert(width == sizeof(decltype(output.data[0])));
6761
size_t byte_length = width * len;
68-
6962
auto ptr = reinterpret_cast<unsigned char*>(output.data.data());
70-
for (size_t i = 0; i < byte_length; ++i) {
71-
if (!src.advance()) {
72-
throw empty_error();
73-
}
74-
ptr[i] = src.get();
75-
}
63+
quick_extract(src, byte_length, ptr);
7664

7765
// Flipping endianness.
7866
if (little_endian()) {
@@ -93,12 +81,7 @@ RawVector parse_raw_body(Source_& src) try {
9381
RawVector output(len);
9482

9583
auto ptr = reinterpret_cast<unsigned char*>(output.data.data());
96-
for (size_t i = 0; i < len; ++i) {
97-
if (!src.advance()) {
98-
throw empty_error();
99-
}
100-
ptr[i] = src.get();
101-
}
84+
quick_extract(src, len, ptr);
10285

10386
return output;
10487
} catch (std::exception& e) {
@@ -113,14 +96,8 @@ ComplexVector parse_complex_body(Source_& src) try {
11396
constexpr size_t width = 16;
11497
static_assert(width == sizeof(decltype(output.data[0])));
11598
size_t byte_length = width * len;
116-
11799
auto ptr = reinterpret_cast<unsigned char*>(output.data.data());
118-
for (size_t b = 0; b < byte_length; ++b) {
119-
if (!src.advance()) {
120-
throw empty_error();
121-
}
122-
ptr[b] = src.get();
123-
}
100+
quick_extract(src, byte_length, ptr);
124101

125102
// Flipping endianness for each double.
126103
if (little_endian()) {

include/rds2cpp/parse_builtin.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@ BuiltInFunction parse_builtin_body(Source_& src) try {
1414
size_t len = get_length(src);
1515

1616
BuiltInFunction output;
17-
output.name.resize(len);
17+
output.name.reserve(len); // don't resize and use extract() on string::data, as that pointer is read-only AFAICT.
1818
for (size_t i = 0; i < len; ++i) {
1919
if (!src.advance()) {
2020
throw empty_error();
2121
}
22-
output.name[i] = src.get();
22+
output.name.push_back(as_char(src.get()));
2323
}
2424

2525
return output;

include/rds2cpp/parse_environment.hpp

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,7 @@ EnvironmentIndex parse_new_environment_body(Source_& src, SharedParseInfo& share
4545

4646
// The next 4 bytes describe the parent environment.
4747
std::array<unsigned char, 4> parent;
48-
for (int i = 0; i < 4; ++i) {
49-
if (!src.advance()) {
50-
throw empty_error();
51-
}
52-
parent[i] = src.get();
53-
}
48+
quick_extract(src, parent.size(), parent.data());
5449

5550
auto lastbit = parent[3];
5651
if (lastbit == static_cast<unsigned char>(SEXPType::REF)) {

include/rds2cpp/parse_rds.hpp

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,7 @@ RdsFile parse_rds(Reader_& reader, const ParseRdsOptions& options) {
9999
if (!src.advance()) {
100100
throw empty_error();
101101
}
102-
for (int pos = 0; pos < 3; ++pos) {
103-
if (!src.advance()) {
104-
throw empty_error();
105-
}
106-
output.writer_version[pos] = src.get();
107-
}
102+
quick_extract(src, output.writer_version.size(), output.writer_version.data());
108103
} catch (std::exception& e) {
109104
throw traceback("failed to read the writer version number from the RDS preamble", e);
110105
}
@@ -113,12 +108,7 @@ RdsFile parse_rds(Reader_& reader, const ParseRdsOptions& options) {
113108
if (!src.advance()) {
114109
throw empty_error();
115110
}
116-
for (int pos = 0; pos < 3; ++pos) {
117-
if (!src.advance()) {
118-
throw empty_error();
119-
}
120-
output.reader_version[pos] = src.get();
121-
}
111+
quick_extract(src, output.reader_version.size(), output.reader_version.data());
122112
} catch (std::exception& e) {
123113
throw traceback("failed to read the reader version number from the RDS preamble", e);
124114
}
@@ -140,12 +130,12 @@ RdsFile parse_rds(Reader_& reader, const ParseRdsOptions& options) {
140130
}
141131

142132
try {
143-
output.encoding.reserve(encoding_length);
133+
output.encoding.reserve(encoding_length); // don't resize and use extract() on string::data, as that pointer is read-only AFAICT.
144134
for (size_t b = 0; b < encoding_length; ++b) {
145135
if (!src.advance()) {
146136
throw empty_error();
147137
}
148-
output.encoding.push_back(src.get());
138+
output.encoding.push_back(as_char(src.get()));
149139
}
150140
} catch (std::exception& e) {
151141
throw traceback("failed to read the encoding string from the RDS preamble", e);

include/rds2cpp/parse_single_string.hpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,12 @@ StringInfo parse_single_string(Source_& src) try {
3737
output.missing = (strlen == static_cast<uint32_t>(-1));
3838

3939
if (!output.missing) {
40-
auto& str = output.value;
41-
str.resize(strlen);
40+
output.value.reserve(strlen); // don't resize and use extract() on string::data, as that pointer is read-only AFAICT.
4241
for (size_t i = 0; i < strlen; ++i) {
4342
if (!src.advance()) {
4443
throw empty_error();
4544
}
46-
str[i] = src.get();
45+
output.value.push_back(as_char(src.get()));
4746
}
4847

4948
/* String encoding is stored in the gp field, from bits 12 to 27 in the header.

include/rds2cpp/utils_parse.hpp

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,18 @@ inline std::runtime_error empty_error() {
1616
return std::runtime_error("no more bytes to read");
1717
}
1818

19+
template<class Source_>
20+
void quick_extract(Source_& src, size_t len, unsigned char* output) {
21+
auto extracted = src.advance_and_extract(len, output);
22+
if (extracted != len) {
23+
throw empty_error();
24+
}
25+
}
26+
27+
inline char as_char(unsigned char val) {
28+
return *reinterpret_cast<const char*>(&val); // make sure we interpret this as a char.
29+
}
30+
1931
template<class Source_>
2032
size_t get_length(Source_& src) {
2133
uint32_t initial = 0;
@@ -63,13 +75,7 @@ typedef std::array<unsigned char, 4> Header;
6375
template<class Source_>
6476
Header parse_header(Source_& src) try {
6577
Header details;
66-
int i = 0;
67-
for (int b = 0; b < 4; ++b, ++i) {
68-
if (!src.advance()) {
69-
throw empty_error();
70-
}
71-
details[i] = src.get();
72-
}
78+
quick_extract(src, details.size(), details.data());
7379
return details;
7480
} catch (std::exception& e) {
7581
throw traceback("failed to parse the R object header", e);

0 commit comments

Comments
 (0)