Skip to content

Commit 93b69b7

Browse files
Update vendored DuckDB sources to ee6e77c3a4
1 parent dfa5e72 commit 93b69b7

597 files changed

Lines changed: 25186 additions & 21047 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

src/duckdb/extension/icu/third_party/icu/common/appendable.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Appendable::~Appendable() {}
2525
UBool
2626
Appendable::appendCodePoint(UChar32 c) {
2727
if(c<=0xffff) {
28-
return appendCodeUnit((char16_t)c);
28+
return appendCodeUnit(static_cast<char16_t>(c));
2929
} else {
3030
return appendCodeUnit(U16_LEAD(c)) && appendCodeUnit(U16_TRAIL(c));
3131
}

src/duckdb/extension/icu/third_party/icu/common/bmpset.cpp

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
7575
int32_t trail=start&0x3f; // Named for UTF-8 2-byte trail byte with lower 6 bits.
7676

7777
// Set one bit indicating an all-one block.
78-
uint32_t bits=(uint32_t)1<<lead;
78+
uint32_t bits = static_cast<uint32_t>(1) << lead;
7979
if((start+1)==limit) { // Single-character shortcut.
8080
table[trail]|=bits;
8181
return;
@@ -100,9 +100,9 @@ static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
100100
++lead;
101101
}
102102
if(lead<limitLead) {
103-
bits=~(((unsigned)1<<lead)-1);
103+
bits = ~((static_cast<unsigned>(1) << lead) - 1);
104104
if(limitLead<0x20) {
105-
bits&=((unsigned)1<<limitLead)-1;
105+
bits &= (static_cast<unsigned>(1) << limitLead) - 1;
106106
}
107107
for(trail=0; trail<64; ++trail) {
108108
table[trail]|=bits;
@@ -111,7 +111,7 @@ static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
111111
// limit<=0x800. If limit==0x800 then limitLead=32 and limitTrail=0.
112112
// In that case, bits=1<<limitLead is undefined but the bits value
113113
// is not used because trail<limitTrail is already false.
114-
bits=(uint32_t)1<<((limitLead == 0x20) ? (limitLead - 1) : limitLead);
114+
bits = static_cast<uint32_t>(1) << ((limitLead == 0x20) ? (limitLead - 1) : limitLead);
115115
for(trail=0; trail<limitTrail; ++trail) {
116116
table[trail]|=bits;
117117
}
@@ -290,22 +290,22 @@ int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {
290290

291291
UBool
292292
BMPSet::contains(UChar32 c) const {
293-
if((uint32_t)c<=0xff) {
294-
return (UBool)latin1Contains[c];
295-
} else if((uint32_t)c<=0x7ff) {
296-
return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0);
297-
} else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) {
293+
if (static_cast<uint32_t>(c) <= 0xff) {
294+
return latin1Contains[c];
295+
} else if (static_cast<uint32_t>(c) <= 0x7ff) {
296+
return (table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0;
297+
} else if (static_cast<uint32_t>(c) < 0xd800 || (c >= 0xe000 && c <= 0xffff)) {
298298
int lead=c>>12;
299299
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
300300
if(twoBits<=1) {
301301
// All 64 code points with the same bits 15..6
302302
// are either in the set or not.
303-
return (UBool)twoBits;
303+
return twoBits;
304304
} else {
305305
// Look up the code point in its 4k block of code points.
306306
return containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]);
307307
}
308-
} else if((uint32_t)c<=0x10ffff) {
308+
} else if (static_cast<uint32_t>(c) <= 0x10ffff) {
309309
// surrogate or supplementary code point
310310
return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
311311
} else {
@@ -332,7 +332,7 @@ BMPSet::span(const char16_t *s, const char16_t *limit, USetSpanCondition spanCon
332332
break;
333333
}
334334
} else if(c<=0x7ff) {
335-
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
335+
if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) == 0) {
336336
break;
337337
}
338338
} else if(c<0xd800 || c>=0xe000) {
@@ -372,7 +372,7 @@ BMPSet::span(const char16_t *s, const char16_t *limit, USetSpanCondition spanCon
372372
break;
373373
}
374374
} else if(c<=0x7ff) {
375-
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
375+
if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0) {
376376
break;
377377
}
378378
} else if(c<0xd800 || c>=0xe000) {
@@ -421,7 +421,7 @@ BMPSet::spanBack(const char16_t *s, const char16_t *limit, USetSpanCondition spa
421421
break;
422422
}
423423
} else if(c<=0x7ff) {
424-
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
424+
if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) == 0) {
425425
break;
426426
}
427427
} else if(c<0xd800 || c>=0xe000) {
@@ -464,7 +464,7 @@ BMPSet::spanBack(const char16_t *s, const char16_t *limit, USetSpanCondition spa
464464
break;
465465
}
466466
} else if(c<=0x7ff) {
467-
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
467+
if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0) {
468468
break;
469469
}
470470
} else if(c<0xd800 || c>=0xe000) {
@@ -527,7 +527,7 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
527527
b=*s;
528528
} while(U8_IS_SINGLE(b));
529529
}
530-
length=(int32_t)(limit-s);
530+
length = static_cast<int32_t>(limit - s);
531531
}
532532

533533
if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
@@ -547,7 +547,7 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
547547
* the truncated sequence.
548548
*/
549549
b=*(limit-1);
550-
if((int8_t)b<0) {
550+
if (static_cast<int8_t>(b) < 0) {
551551
// b>=0x80: lead or trail byte
552552
if(b<0xc0) {
553553
// single trail byte, check for preceding 3- or 4-byte lead byte
@@ -602,15 +602,15 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
602602
if(b>=0xe0) {
603603
if(b<0xf0) {
604604
if( /* handle U+0000..U+FFFF inline */
605-
(t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
606-
(t2=(uint8_t)(s[1]-0x80)) <= 0x3f
605+
(t1 = static_cast<uint8_t>(s[0] - 0x80)) <= 0x3f &&
606+
(t2 = static_cast<uint8_t>(s[1] - 0x80)) <= 0x3f
607607
) {
608608
b&=0xf;
609609
uint32_t twoBits=(bmpBlockBits[t1]>>b)&0x10001;
610610
if(twoBits<=1) {
611611
// All 64 code points with this lead byte and middle trail byte
612612
// are either in the set or not.
613-
if(twoBits!=(uint32_t)spanCondition) {
613+
if (twoBits != static_cast<uint32_t>(spanCondition)) {
614614
return s-1;
615615
}
616616
} else {
@@ -624,12 +624,12 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
624624
continue;
625625
}
626626
} else if( /* handle U+10000..U+10FFFF inline */
627-
(t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
628-
(t2=(uint8_t)(s[1]-0x80)) <= 0x3f &&
629-
(t3=(uint8_t)(s[2]-0x80)) <= 0x3f
627+
(t1 = static_cast<uint8_t>(s[0] - 0x80)) <= 0x3f &&
628+
(t2 = static_cast<uint8_t>(s[1] - 0x80)) <= 0x3f &&
629+
(t3 = static_cast<uint8_t>(s[2] - 0x80)) <= 0x3f
630630
) {
631631
// Give an illegal sequence the same value as the result of contains(FFFD).
632-
UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3;
632+
UChar32 c = (static_cast<UChar32>(b - 0xf0) << 18) | (static_cast<UChar32>(t1) << 12) | (t2 << 6) | t3;
633633
if( ( (0x10000<=c && c<=0x10ffff) ?
634634
containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
635635
containsFFFD
@@ -643,9 +643,9 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
643643
} else {
644644
if( /* handle U+0000..U+07FF inline */
645645
b>=0xc0 &&
646-
(t1=(uint8_t)(*s-0x80)) <= 0x3f
646+
(t1 = static_cast<uint8_t>(*s - 0x80)) <= 0x3f
647647
) {
648-
if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) {
648+
if (static_cast<USetSpanCondition>((table7FF[t1] & (static_cast<uint32_t>(1) << (b & 0x1f))) != 0) != spanCondition) {
649649
return s-1;
650650
}
651651
++s;
@@ -711,7 +711,7 @@ BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCon
711711
c=utf8_prevCharSafeBody(s, 0, &length, b, -3);
712712
// c is a valid code point, not ASCII, not a surrogate
713713
if(c<=0x7ff) {
714-
if((USetSpanCondition)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) != spanCondition) {
714+
if (static_cast<USetSpanCondition>((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0) != spanCondition) {
715715
return prev+1;
716716
}
717717
} else if(c<=0xffff) {
@@ -720,7 +720,7 @@ BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCon
720720
if(twoBits<=1) {
721721
// All 64 code points with the same bits 15..6
722722
// are either in the set or not.
723-
if(twoBits!=(uint32_t)spanCondition) {
723+
if (twoBits != static_cast<uint32_t>(spanCondition)) {
724724
return prev+1;
725725
}
726726
} else {

src/duckdb/extension/icu/third_party/icu/common/bmpset.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ class BMPSet : public UMemory {
156156
};
157157

158158
inline UBool BMPSet::containsSlow(UChar32 c, int32_t lo, int32_t hi) const {
159-
return (UBool)(findCodePoint(c, lo, hi) & 1);
159+
return findCodePoint(c, lo, hi) & 1;
160160
}
161161

162162
U_NAMESPACE_END

src/duckdb/extension/icu/third_party/icu/common/bytesinkutil.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit,
6464
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
6565
return false;
6666
}
67-
return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode);
67+
return appendChange(static_cast<int32_t>(limit - s), s16, s16Length, sink, edits, errorCode);
6868
}
6969

7070
void
@@ -81,15 +81,15 @@ ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *
8181
namespace {
8282

8383
// See unicode/utf8.h U8_APPEND_UNSAFE().
84-
inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
85-
inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
84+
inline uint8_t getTwoByteLead(UChar32 c) { return static_cast<uint8_t>((c >> 6) | 0xc0); }
85+
inline uint8_t getTwoByteTrail(UChar32 c) { return static_cast<uint8_t>((c & 0x3f) | 0x80); }
8686

8787
} // namespace
8888

8989
void
9090
ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) {
9191
U_ASSERT(0x80 <= c && c <= 0x7ff); // 2-byte UTF-8
92-
char s8[2] = { (char)getTwoByteLead(c), (char)getTwoByteTrail(c) };
92+
char s8[2] = {static_cast<char>(getTwoByteLead(c)), static_cast<char>(getTwoByteTrail(c))};
9393
sink.Append(s8, 2);
9494
}
9595

@@ -114,7 +114,7 @@ ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
114114
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
115115
return false;
116116
}
117-
int32_t length = (int32_t)(limit - s);
117+
int32_t length = static_cast<int32_t>(limit - s);
118118
if (length > 0) {
119119
appendNonEmptyUnchanged(s, length, sink, options, edits);
120120
}

src/duckdb/extension/icu/third_party/icu/common/bytesinkutil.h

Lines changed: 91 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,52 @@
77
#ifndef BYTESINKUTIL_H
88
#define BYTESINKUTIL_H
99

10+
#include <type_traits>
11+
1012
#include "unicode/utypes.h"
1113
#include "unicode/bytestream.h"
1214
#include "unicode/edits.h"
15+
#include "charstr.h"
1316
#include "cmemory.h"
1417
#include "uassert.h"
18+
#include "ustr_imp.h"
1519

1620
U_NAMESPACE_BEGIN
1721

1822
class ByteSink;
19-
class CharString;
2023
class Edits;
2124

25+
class U_COMMON_API CharStringByteSink : public ByteSink {
26+
public:
27+
CharStringByteSink(CharString* dest);
28+
~CharStringByteSink() override;
29+
30+
CharStringByteSink() = delete;
31+
CharStringByteSink(const CharStringByteSink&) = delete;
32+
CharStringByteSink& operator=(const CharStringByteSink&) = delete;
33+
34+
void Append(const char* bytes, int32_t n) override;
35+
36+
char* GetAppendBuffer(int32_t min_capacity,
37+
int32_t desired_capacity_hint,
38+
char* scratch,
39+
int32_t scratch_capacity,
40+
int32_t* result_capacity) override;
41+
42+
private:
43+
CharString& dest_;
44+
};
45+
46+
// CharString doesn't provide the public API that StringByteSink requires a
47+
// string class to have so this template specialization replaces the default
48+
// implementation of StringByteSink<CharString> with CharStringByteSink.
49+
template<>
50+
class StringByteSink<CharString> : public CharStringByteSink {
51+
public:
52+
StringByteSink(CharString* dest) : CharStringByteSink(dest) { }
53+
StringByteSink(CharString* dest, int32_t /*initialAppendCapacity*/) : CharStringByteSink(dest) { }
54+
};
55+
2256
class U_COMMON_API ByteSinkUtil {
2357
public:
2458
ByteSinkUtil() = delete; // all static
@@ -39,7 +73,7 @@ class U_COMMON_API ByteSinkUtil {
3973
/** The few bytes at [src, nextSrc[ were mapped/changed to valid code point c. */
4074
static inline void appendCodePoint(const uint8_t *src, const uint8_t *nextSrc, UChar32 c,
4175
ByteSink &sink, Edits *edits = nullptr) {
42-
appendCodePoint((int32_t)(nextSrc - src), c, sink, edits);
76+
appendCodePoint(static_cast<int32_t>(nextSrc - src), c, sink, edits);
4377
}
4478

4579
/** Append the two-byte character (U+0080..U+07FF). */
@@ -57,30 +91,64 @@ class U_COMMON_API ByteSinkUtil {
5791
ByteSink &sink, uint32_t options, Edits *edits,
5892
UErrorCode &errorCode);
5993

60-
private:
61-
static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
62-
ByteSink &sink, uint32_t options, Edits *edits);
63-
};
64-
65-
class U_COMMON_API CharStringByteSink : public ByteSink {
66-
public:
67-
CharStringByteSink(CharString* dest);
68-
~CharStringByteSink() override;
69-
70-
CharStringByteSink() = delete;
71-
CharStringByteSink(const CharStringByteSink&) = delete;
72-
CharStringByteSink& operator=(const CharStringByteSink&) = delete;
73-
74-
void Append(const char* bytes, int32_t n) override;
94+
/**
95+
* Calls a lambda that writes to a ByteSink with a CheckedArrayByteSink
96+
* and then returns through u_terminateChars(), in order to implement
97+
* the classic ICU4C C API writing to a fix sized buffer on top of a
98+
* contemporary C++ API.
99+
*
100+
* @param buffer receiving buffer
101+
* @param capacity capacity of receiving buffer
102+
* @param lambda that gets called with the sink as an argument
103+
* @param status set to U_BUFFER_OVERFLOW_ERROR on overflow
104+
* @return number of bytes written, or needed (in case of overflow)
105+
* @internal
106+
*/
107+
template <typename F,
108+
typename = std::enable_if_t<
109+
std::is_invocable_r_v<void, F, ByteSink&, UErrorCode&>>>
110+
static int32_t viaByteSinkToTerminatedChars(char* buffer, int32_t capacity,
111+
F&& lambda,
112+
UErrorCode& status) {
113+
if (U_FAILURE(status)) { return 0; }
114+
CheckedArrayByteSink sink(buffer, capacity);
115+
lambda(sink, status);
116+
if (U_FAILURE(status)) { return 0; }
117+
118+
int32_t reslen = sink.NumberOfBytesAppended();
119+
120+
if (sink.Overflowed()) {
121+
status = U_BUFFER_OVERFLOW_ERROR;
122+
return reslen;
123+
}
124+
125+
return u_terminateChars(buffer, capacity, reslen, &status);
126+
}
75127

76-
char* GetAppendBuffer(int32_t min_capacity,
77-
int32_t desired_capacity_hint,
78-
char* scratch,
79-
int32_t scratch_capacity,
80-
int32_t* result_capacity) override;
128+
/**
129+
* Calls a lambda that writes to a ByteSink with a CharStringByteSink and
130+
* then returns a CharString, in order to implement a contemporary C++ API
131+
* on top of a C/C++ compatibility ByteSink API.
132+
*
133+
* @param lambda that gets called with the sink as an argument
134+
* @param status to check and report
135+
* @return the resulting string, or an empty string (in case of error)
136+
* @internal
137+
*/
138+
template <typename F,
139+
typename = std::enable_if_t<
140+
std::is_invocable_r_v<void, F, ByteSink&, UErrorCode&>>>
141+
static CharString viaByteSinkToCharString(F&& lambda, UErrorCode& status) {
142+
if (U_FAILURE(status)) { return {}; }
143+
CharString result;
144+
CharStringByteSink sink(&result);
145+
lambda(sink, status);
146+
return result;
147+
}
81148

82149
private:
83-
CharString& dest_;
150+
static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
151+
ByteSink &sink, uint32_t options, Edits *edits);
84152
};
85153

86154
U_NAMESPACE_END

0 commit comments

Comments
 (0)