Skip to content

Commit 11ca5da

Browse files
authored
AVRO-4067: Optimize First Byte of Long Decode (#3183)
1 parent a7d27e4 commit 11ca5da

File tree

1 file changed

+18
-5
lines changed

1 file changed

+18
-5
lines changed

lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -184,10 +184,25 @@ public int readInt() throws IOException {
184184
@Override
185185
public long readLong() throws IOException {
186186
ensureBounds(10);
187-
int b = buf[pos++] & 0xff;
188-
int n = b & 0x7f;
187+
188+
/*
189+
* Long values are used for many different areas of the spec, for example: a
190+
* string is encoded as a long followed by that many bytes of UTF-8 encoded
191+
* character data. Because of this, long values actually tend to be pretty small
192+
* on average, and so can often fit within the first byte of the variable-length
193+
* array. Therefore, the first byte is prioritized. For the first byte, if the
194+
* high-order bit is set, this indicates there are more bytes to read, but also
195+
* this means a signed value >= 0 does not have any following bytes.
196+
*/
189197
long l;
190-
if (b > 0x7f) {
198+
int b, n;
199+
if ((b = buf[pos++]) == 0) {
200+
return 0;
201+
} else if (b > 0) {
202+
// back to two's-complement (zig-zag)
203+
return (b >>> 1) ^ -(b & 1);
204+
} else {
205+
n = b & 0x7f;
191206
b = buf[pos++] & 0xff;
192207
n ^= (b & 0x7f) << 7;
193208
if (b > 0x7f) {
@@ -209,8 +224,6 @@ public long readLong() throws IOException {
209224
} else {
210225
l = n;
211226
}
212-
} else {
213-
l = n;
214227
}
215228
if (pos > limit) {
216229
throw new EOFException();

0 commit comments

Comments
 (0)