Skip to content

Commit 39a9c8b

Browse files
authored
feat: add UpdateSchema interface skeleton (#460)
- Define `UpdateSchema` class interface with full API documentation
1 parent b9ce88f commit 39a9c8b

File tree

11 files changed

+605
-0
lines changed

11 files changed

+605
-0
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ set(ICEBERG_SOURCES
8383
update/pending_update.cc
8484
update/update_partition_spec.cc
8585
update/update_properties.cc
86+
update/update_schema.cc
8687
update/update_sort_order.cc
8788
util/bucket_util.cc
8889
util/content_file_util.cc

src/iceberg/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ iceberg_sources = files(
104104
'update/pending_update.cc',
105105
'update/update_partition_spec.cc',
106106
'update/update_properties.cc',
107+
'update/update_schema.cc',
107108
'update/update_sort_order.cc',
108109
'util/bucket_util.cc',
109110
'util/content_file_util.cc',

src/iceberg/table.cc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "iceberg/transaction.h"
3333
#include "iceberg/update/update_partition_spec.h"
3434
#include "iceberg/update/update_properties.h"
35+
#include "iceberg/update/update_schema.h"
3536
#include "iceberg/util/macros.h"
3637

3738
namespace iceberg {
@@ -171,6 +172,13 @@ Result<std::shared_ptr<UpdateSortOrder>> Table::NewUpdateSortOrder() {
171172
return transaction->NewUpdateSortOrder();
172173
}
173174

175+
Result<std::shared_ptr<UpdateSchema>> Table::NewUpdateSchema() {
176+
ICEBERG_ASSIGN_OR_RAISE(
177+
auto transaction, Transaction::Make(shared_from_this(), Transaction::Kind::kUpdate,
178+
/*auto_commit=*/true));
179+
return transaction->NewUpdateSchema();
180+
}
181+
174182
Result<std::shared_ptr<StagedTable>> StagedTable::Make(
175183
TableIdentifier identifier, std::shared_ptr<TableMetadata> metadata,
176184
std::string metadata_location, std::shared_ptr<FileIO> io,
@@ -221,4 +229,8 @@ Result<std::shared_ptr<UpdateProperties>> StaticTable::NewUpdateProperties() {
221229
return NotSupported("Cannot create an update properties for a static table");
222230
}
223231

232+
Result<std::shared_ptr<UpdateSchema>> StaticTable::NewUpdateSchema() {
233+
return NotSupported("Cannot create an update schema for a static table");
234+
}
235+
224236
} // namespace iceberg

src/iceberg/table.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ class ICEBERG_EXPORT Table : public std::enable_shared_from_this<Table> {
140140
/// changes.
141141
virtual Result<std::shared_ptr<UpdateSortOrder>> NewUpdateSortOrder();
142142

143+
/// \brief Create a new UpdateSchema to alter the columns of this table and commit the
144+
/// changes.
145+
virtual Result<std::shared_ptr<UpdateSchema>> NewUpdateSchema();
146+
143147
protected:
144148
Table(TableIdentifier identifier, std::shared_ptr<TableMetadata> metadata,
145149
std::string metadata_location, std::shared_ptr<FileIO> io,
@@ -187,6 +191,8 @@ class ICEBERG_EXPORT StaticTable final : public Table {
187191

188192
Result<std::shared_ptr<UpdateProperties>> NewUpdateProperties() override;
189193

194+
Result<std::shared_ptr<UpdateSchema>> NewUpdateSchema() override;
195+
190196
private:
191197
using Table::Table;
192198
};

src/iceberg/transaction.cc

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <memory>
2323

2424
#include "iceberg/catalog.h"
25+
#include "iceberg/schema.h"
2526
#include "iceberg/table.h"
2627
#include "iceberg/table_metadata.h"
2728
#include "iceberg/table_requirement.h"
@@ -30,6 +31,7 @@
3031
#include "iceberg/update/pending_update.h"
3132
#include "iceberg/update/update_partition_spec.h"
3233
#include "iceberg/update/update_properties.h"
34+
#include "iceberg/update/update_schema.h"
3335
#include "iceberg/update/update_sort_order.h"
3436
#include "iceberg/util/checked_cast.h"
3537
#include "iceberg/util/macros.h"
@@ -105,6 +107,12 @@ Status Transaction::Apply(PendingUpdate& update) {
105107
metadata_builder_->AddPartitionSpec(std::move(result.spec));
106108
}
107109
} break;
110+
case PendingUpdate::Kind::kUpdateSchema: {
111+
auto& update_schema = internal::checked_cast<UpdateSchema&>(update);
112+
ICEBERG_ASSIGN_OR_RAISE(auto result, update_schema.Apply());
113+
metadata_builder_->SetCurrentSchema(std::move(result.schema),
114+
result.new_last_column_id);
115+
} break;
108116
default:
109117
return NotSupported("Unsupported pending update: {}",
110118
static_cast<int32_t>(update.kind()));
@@ -178,4 +186,11 @@ Result<std::shared_ptr<UpdateSortOrder>> Transaction::NewUpdateSortOrder() {
178186
return update_sort_order;
179187
}
180188

189+
Result<std::shared_ptr<UpdateSchema>> Transaction::NewUpdateSchema() {
190+
ICEBERG_ASSIGN_OR_RAISE(std::shared_ptr<UpdateSchema> update_schema,
191+
UpdateSchema::Make(shared_from_this()));
192+
ICEBERG_RETURN_UNEXPECTED(AddUpdate(update_schema));
193+
return update_schema;
194+
}
195+
181196
} // namespace iceberg

src/iceberg/transaction.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@ class ICEBERG_EXPORT Transaction : public std::enable_shared_from_this<Transacti
6868
/// changes.
6969
Result<std::shared_ptr<UpdateSortOrder>> NewUpdateSortOrder();
7070

71+
/// \brief Create a new UpdateSchema to alter the columns of this table and commit the
72+
/// changes.
73+
Result<std::shared_ptr<UpdateSchema>> NewUpdateSchema();
74+
7175
private:
7276
Transaction(std::shared_ptr<Table> table, Kind kind, bool auto_commit,
7377
std::unique_ptr<TableMetadataBuilder> metadata_builder);

src/iceberg/type_fwd.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ class Transaction;
190190
class PendingUpdate;
191191
class UpdatePartitionSpec;
192192
class UpdateProperties;
193+
class UpdateSchema;
193194
class UpdateSortOrder;
194195

195196
/// ----------------------------------------------------------------------------

src/iceberg/update/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ install_headers(
1919
[
2020
'pending_update.h',
2121
'update_partition_spec.h',
22+
'update_schema.h',
2223
'update_sort_order.h',
2324
'update_properties.h',
2425
],

src/iceberg/update/pending_update.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ class ICEBERG_EXPORT PendingUpdate : public ErrorCollector {
4444
enum class Kind : uint8_t {
4545
kUpdatePartitionSpec,
4646
kUpdateProperties,
47+
kUpdateSchema,
4748
kUpdateSortOrder,
4849
};
4950

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/update/update_schema.h"
21+
22+
#include <memory>
23+
#include <optional>
24+
#include <ranges>
25+
#include <string>
26+
#include <string_view>
27+
#include <unordered_set>
28+
#include <utility>
29+
30+
#include "iceberg/schema.h"
31+
#include "iceberg/table_metadata.h"
32+
#include "iceberg/transaction.h"
33+
#include "iceberg/type.h"
34+
#include "iceberg/util/error_collector.h"
35+
#include "iceberg/util/macros.h"
36+
37+
namespace iceberg {
38+
39+
Result<std::shared_ptr<UpdateSchema>> UpdateSchema::Make(
40+
std::shared_ptr<Transaction> transaction) {
41+
ICEBERG_PRECHECK(transaction != nullptr,
42+
"Cannot create UpdateSchema without transaction");
43+
return std::shared_ptr<UpdateSchema>(new UpdateSchema(std::move(transaction)));
44+
}
45+
46+
UpdateSchema::UpdateSchema(std::shared_ptr<Transaction> transaction)
47+
: PendingUpdate(std::move(transaction)) {
48+
const TableMetadata& base_metadata = transaction_->current();
49+
50+
// Get the current schema
51+
auto schema_result = base_metadata.Schema();
52+
if (!schema_result.has_value()) {
53+
AddError(schema_result.error());
54+
return;
55+
}
56+
schema_ = std::move(schema_result.value());
57+
58+
// Initialize last_column_id from base metadata
59+
last_column_id_ = base_metadata.last_column_id;
60+
61+
// Initialize identifier field names from the current schema
62+
auto identifier_names_result = schema_->IdentifierFieldNames();
63+
if (!identifier_names_result.has_value()) {
64+
AddError(identifier_names_result.error());
65+
return;
66+
}
67+
identifier_field_names_ = identifier_names_result.value() |
68+
std::ranges::to<std::unordered_set<std::string>>();
69+
}
70+
71+
UpdateSchema::~UpdateSchema() = default;
72+
73+
UpdateSchema& UpdateSchema::AllowIncompatibleChanges() {
74+
allow_incompatible_changes_ = true;
75+
return *this;
76+
}
77+
78+
UpdateSchema& UpdateSchema::CaseSensitive(bool case_sensitive) {
79+
case_sensitive_ = case_sensitive;
80+
return *this;
81+
}
82+
83+
UpdateSchema& UpdateSchema::AddColumn(std::string_view name, std::shared_ptr<Type> type,
84+
std::string_view doc) {
85+
// Check for "." in top-level name
86+
ICEBERG_BUILDER_CHECK(!name.contains('.'),
87+
"Cannot add column with ambiguous name: {}, use "
88+
"AddColumn(parent, name, type, doc)",
89+
name);
90+
return AddColumnInternal(std::nullopt, name, /*is_optional=*/true, std::move(type),
91+
doc);
92+
}
93+
94+
UpdateSchema& UpdateSchema::AddColumn(std::optional<std::string_view> parent,
95+
std::string_view name, std::shared_ptr<Type> type,
96+
std::string_view doc) {
97+
return AddColumnInternal(std::move(parent), name, /*is_optional=*/true, std::move(type),
98+
doc);
99+
}
100+
101+
UpdateSchema& UpdateSchema::AddRequiredColumn(std::string_view name,
102+
std::shared_ptr<Type> type,
103+
std::string_view doc) {
104+
// Check for "." in top-level name
105+
ICEBERG_BUILDER_CHECK(!name.contains('.'),
106+
"Cannot add column with ambiguous name: {}, use "
107+
"AddRequiredColumn(parent, name, type, doc)",
108+
name);
109+
return AddColumnInternal(std::nullopt, name, /*is_optional=*/false, std::move(type),
110+
doc);
111+
}
112+
113+
UpdateSchema& UpdateSchema::AddRequiredColumn(std::optional<std::string_view> parent,
114+
std::string_view name,
115+
std::shared_ptr<Type> type,
116+
std::string_view doc) {
117+
return AddColumnInternal(std::move(parent), name, /*is_optional=*/false,
118+
std::move(type), doc);
119+
}
120+
121+
UpdateSchema& UpdateSchema::UpdateColumn(std::string_view name,
122+
std::shared_ptr<PrimitiveType> new_type) {
123+
// TODO(Guotao Yu): Implement UpdateColumn
124+
AddError(NotImplemented("UpdateSchema::UpdateColumn not implemented"));
125+
return *this;
126+
}
127+
128+
UpdateSchema& UpdateSchema::UpdateColumnDoc(std::string_view name,
129+
std::string_view new_doc) {
130+
// TODO(Guotao Yu): Implement UpdateColumnDoc
131+
AddError(NotImplemented("UpdateSchema::UpdateColumnDoc not implemented"));
132+
return *this;
133+
}
134+
135+
UpdateSchema& UpdateSchema::AddColumnInternal(std::optional<std::string_view> parent,
136+
std::string_view name, bool is_optional,
137+
std::shared_ptr<Type> type,
138+
std::string_view doc) {
139+
// TODO(Guotao Yu): Implement AddColumnInternal logic
140+
// This is where the real work happens - finding parent, validating, etc.
141+
AddError(NotImplemented("UpdateSchema::AddColumnInternal not implemented"));
142+
return *this;
143+
}
144+
145+
UpdateSchema& UpdateSchema::RenameColumn(std::string_view name,
146+
std::string_view new_name) {
147+
// TODO(Guotao Yu): Implement RenameColumn
148+
AddError(NotImplemented("UpdateSchema::RenameColumn not implemented"));
149+
return *this;
150+
}
151+
152+
UpdateSchema& UpdateSchema::MakeColumnOptional(std::string_view name) {
153+
// TODO(Guotao Yu): Implement MakeColumnOptional
154+
AddError(NotImplemented("UpdateSchema::MakeColumnOptional not implemented"));
155+
return *this;
156+
}
157+
158+
UpdateSchema& UpdateSchema::RequireColumn(std::string_view name) {
159+
// TODO(Guotao Yu): Implement RequireColumn
160+
AddError(NotImplemented("UpdateSchema::RequireColumn not implemented"));
161+
return *this;
162+
}
163+
164+
UpdateSchema& UpdateSchema::DeleteColumn(std::string_view name) {
165+
// TODO(Guotao Yu): Implement DeleteColumn
166+
AddError(NotImplemented("UpdateSchema::DeleteColumn not implemented"));
167+
return *this;
168+
}
169+
170+
UpdateSchema& UpdateSchema::MoveFirst(std::string_view name) {
171+
// TODO(Guotao Yu): Implement MoveFirst
172+
AddError(NotImplemented("UpdateSchema::MoveFirst not implemented"));
173+
return *this;
174+
}
175+
176+
UpdateSchema& UpdateSchema::MoveBefore(std::string_view name,
177+
std::string_view before_name) {
178+
// TODO(Guotao Yu): Implement MoveBefore
179+
AddError(NotImplemented("UpdateSchema::MoveBefore not implemented"));
180+
return *this;
181+
}
182+
183+
UpdateSchema& UpdateSchema::MoveAfter(std::string_view name,
184+
std::string_view after_name) {
185+
// TODO(Guotao Yu): Implement MoveAfter
186+
AddError(NotImplemented("UpdateSchema::MoveAfter not implemented"));
187+
return *this;
188+
}
189+
190+
UpdateSchema& UpdateSchema::UnionByNameWith(std::shared_ptr<Schema> new_schema) {
191+
// TODO(Guotao Yu): Implement UnionByNameWith
192+
AddError(NotImplemented("UpdateSchema::UnionByNameWith not implemented"));
193+
return *this;
194+
}
195+
196+
UpdateSchema& UpdateSchema::SetIdentifierFields(
197+
const std::span<std::string_view>& names) {
198+
identifier_field_names_ = names | std::ranges::to<std::unordered_set<std::string>>();
199+
return *this;
200+
}
201+
202+
Result<UpdateSchema::ApplyResult> UpdateSchema::Apply() {
203+
// TODO(Guotao Yu): Implement Apply
204+
return NotImplemented("UpdateSchema::Apply not implemented");
205+
}
206+
207+
} // namespace iceberg

0 commit comments

Comments
 (0)