3636namespace iceberg {
3737
3838Schema::Schema (std::vector<SchemaField> fields, int32_t schema_id)
39- : StructType(std::move(fields)), schema_id_(schema_id) {}
39+ : StructType(std::move(fields)),
40+ schema_id_ (schema_id),
41+ cache_(std::make_unique<SchemaCache>(this )) {}
4042
4143Result<std::unique_ptr<Schema>> Schema::Make (std::vector<SchemaField> fields,
4244 int32_t schema_id,
@@ -156,54 +158,24 @@ bool Schema::Equals(const Schema& other) const {
156158Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldByName (
157159 std::string_view name, bool case_sensitive) const {
158160 if (case_sensitive) {
159- ICEBERG_ASSIGN_OR_RAISE (auto name_id_map, name_id_map_. Get (* this ));
161+ ICEBERG_ASSIGN_OR_RAISE (auto name_id_map, cache_-> GetNameIdMap ( ));
160162 auto it = name_id_map.get ().name_to_id .find (name);
161163 if (it == name_id_map.get ().name_to_id .end ()) {
162164 return std::nullopt ;
163165 };
164166 return FindFieldById (it->second );
165167 }
166- ICEBERG_ASSIGN_OR_RAISE (auto lowercase_name_to_id, lowercase_name_to_id_. Get (* this ));
168+ ICEBERG_ASSIGN_OR_RAISE (auto lowercase_name_to_id, cache_-> GetLowercaseNameToIdMap ( ));
167169 auto it = lowercase_name_to_id.get ().find (StringUtils::ToLower (name));
168170 if (it == lowercase_name_to_id.get ().end ()) {
169171 return std::nullopt ;
170172 }
171173 return FindFieldById (it->second );
172174}
173175
174- Result<std::unordered_map<int32_t , std::reference_wrapper<const SchemaField>>>
175- Schema::InitIdToFieldMap (const Schema& self) {
176- std::unordered_map<int32_t , std::reference_wrapper<const SchemaField>> id_to_field;
177- IdToFieldVisitor visitor (id_to_field);
178- ICEBERG_RETURN_UNEXPECTED (VisitTypeInline (self, &visitor));
179- return id_to_field;
180- }
181-
182- Result<Schema::NameIdMap> Schema::InitNameIdMap (const Schema& self) {
183- NameIdMap name_id_map;
184- NameToIdVisitor visitor (name_id_map.name_to_id , &name_id_map.id_to_name ,
185- /* case_sensitive=*/ true );
186- ICEBERG_RETURN_UNEXPECTED (
187- VisitTypeInline (self, &visitor, /* path=*/ " " , /* short_path=*/ " " ));
188- visitor.Finish ();
189- return name_id_map;
190- }
191-
192- Result<std::unordered_map<std::string, int32_t , StringHash, std::equal_to<>>>
193- Schema::InitLowerCaseNameToIdMap (const Schema& self) {
194- std::unordered_map<std::string, int32_t , StringHash, std::equal_to<>>
195- lowercase_name_to_id;
196- NameToIdVisitor visitor (lowercase_name_to_id, /* id_to_name=*/ nullptr ,
197- /* case_sensitive=*/ false );
198- ICEBERG_RETURN_UNEXPECTED (
199- VisitTypeInline (self, &visitor, /* path=*/ " " , /* short_path=*/ " " ));
200- visitor.Finish ();
201- return lowercase_name_to_id;
202- }
203-
204176Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldById (
205177 int32_t field_id) const {
206- ICEBERG_ASSIGN_OR_RAISE (auto id_to_field, id_to_field_. Get (* this ));
178+ ICEBERG_ASSIGN_OR_RAISE (auto id_to_field, cache_-> GetIdToFieldMap ( ));
207179 auto it = id_to_field.get ().find (field_id);
208180 if (it == id_to_field.get ().end ()) {
209181 return std::nullopt ;
@@ -213,38 +185,17 @@ Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFie
213185
214186Result<std::optional<std::string_view>> Schema::FindColumnNameById (
215187 int32_t field_id) const {
216- ICEBERG_ASSIGN_OR_RAISE (auto name_id_map, name_id_map_. Get (* this ));
188+ ICEBERG_ASSIGN_OR_RAISE (auto name_id_map, cache_-> GetNameIdMap ( ));
217189 auto it = name_id_map.get ().id_to_name .find (field_id);
218190 if (it == name_id_map.get ().id_to_name .end ()) {
219191 return std::nullopt ;
220192 }
221193 return it->second ;
222194}
223195
224- Result<std::unordered_map<int32_t , std::vector<size_t >>> Schema::InitIdToPositionPath (
225- const Schema& self) {
226- PositionPathVisitor visitor;
227- ICEBERG_RETURN_UNEXPECTED (VisitTypeInline (self, &visitor));
228- return visitor.Finish ();
229- }
230-
231- Result<int32_t > Schema::InitHighestFieldId (const Schema& self) {
232- ICEBERG_ASSIGN_OR_RAISE (auto id_to_field, self.id_to_field_ .Get (self));
233-
234- if (id_to_field.get ().empty ()) {
235- return kInitialColumnId ;
236- }
237-
238- auto max_it = std::ranges::max_element (
239- id_to_field.get (),
240- [](const auto & lhs, const auto & rhs) { return lhs.first < rhs.first ; });
241-
242- return max_it->first ;
243- }
244-
245196Result<std::unique_ptr<StructLikeAccessor>> Schema::GetAccessorById (
246197 int32_t field_id) const {
247- ICEBERG_ASSIGN_OR_RAISE (auto id_to_position_path, id_to_position_path_. Get (* this ));
198+ ICEBERG_ASSIGN_OR_RAISE (auto id_to_position_path, cache_-> GetIdToPositionPathMap ( ));
248199 if (auto it = id_to_position_path.get ().find (field_id);
249200 it != id_to_position_path.get ().cend ()) {
250201 ICEBERG_ASSIGN_OR_RAISE (auto field, FindFieldById (field_id));
@@ -322,15 +273,15 @@ Result<std::vector<std::string>> Schema::IdentifierFieldNames() const {
322273 return names;
323274}
324275
325- Result<int32_t > Schema::HighestFieldId () const { return highest_field_id_. Get (* this ); }
276+ Result<int32_t > Schema::HighestFieldId () const { return cache_-> GetHighestFieldId ( ); }
326277
327278bool Schema::SameSchema (const Schema& other) const {
328279 return fields_ == other.fields_ && identifier_field_ids_ == other.identifier_field_ids_ ;
329280}
330281
331282Status Schema::Validate (int32_t format_version) const {
332283 // Get all fields including nested ones
333- ICEBERG_ASSIGN_OR_RAISE (auto id_to_field, id_to_field_. Get (* this ));
284+ ICEBERG_ASSIGN_OR_RAISE (auto id_to_field, cache_-> GetIdToFieldMap ( ));
334285
335286 // Check each field's type and defaults
336287 for (const auto & [field_id, field_ref] : id_to_field.get ()) {
@@ -351,4 +302,75 @@ Status Schema::Validate(int32_t format_version) const {
351302 return {};
352303}
353304
305+ Result<SchemaCache::IdToFieldMapRef> SchemaCache::GetIdToFieldMap () const {
306+ return id_to_field_.Get (schema_);
307+ }
308+
309+ Result<SchemaCache::NameIdMapRef> SchemaCache::GetNameIdMap () const {
310+ return name_id_map_.Get (schema_);
311+ }
312+
313+ Result<SchemaCache::LowercaseNameToIdMapRef> SchemaCache::GetLowercaseNameToIdMap ()
314+ const {
315+ return lowercase_name_to_id_.Get (schema_);
316+ }
317+
318+ Result<SchemaCache::IdToPositionPathMapRef> SchemaCache::GetIdToPositionPathMap () const {
319+ return id_to_position_path_.Get (schema_);
320+ }
321+
322+ Result<int32_t > SchemaCache::GetHighestFieldId () const {
323+ return highest_field_id_.Get (schema_);
324+ }
325+
326+ Result<SchemaCache::IdToFieldMap> SchemaCache::InitIdToFieldMap (const Schema* schema) {
327+ std::unordered_map<int32_t , std::reference_wrapper<const SchemaField>> id_to_field;
328+ IdToFieldVisitor visitor (id_to_field);
329+ ICEBERG_RETURN_UNEXPECTED (VisitTypeInline (*schema, &visitor));
330+ return id_to_field;
331+ }
332+
333+ Result<SchemaCache::NameIdMap> SchemaCache::InitNameIdMap (const Schema* schema) {
334+ NameIdMap name_id_map;
335+ NameToIdVisitor visitor (name_id_map.name_to_id , &name_id_map.id_to_name ,
336+ /* case_sensitive=*/ true );
337+ ICEBERG_RETURN_UNEXPECTED (
338+ VisitTypeInline (*schema, &visitor, /* path=*/ " " , /* short_path=*/ " " ));
339+ visitor.Finish ();
340+ return name_id_map;
341+ }
342+
343+ Result<SchemaCache::LowercaseNameToIdMap> SchemaCache::InitLowerCaseNameToIdMap (
344+ const Schema* schema) {
345+ std::unordered_map<std::string, int32_t , StringHash, std::equal_to<>>
346+ lowercase_name_to_id;
347+ NameToIdVisitor visitor (lowercase_name_to_id, /* id_to_name=*/ nullptr ,
348+ /* case_sensitive=*/ false );
349+ ICEBERG_RETURN_UNEXPECTED (
350+ VisitTypeInline (*schema, &visitor, /* path=*/ " " , /* short_path=*/ " " ));
351+ visitor.Finish ();
352+ return lowercase_name_to_id;
353+ }
354+
355+ Result<SchemaCache::IdToPositionPathMap> SchemaCache::InitIdToPositionPath (
356+ const Schema* schema) {
357+ PositionPathVisitor visitor;
358+ ICEBERG_RETURN_UNEXPECTED (VisitTypeInline (*schema, &visitor));
359+ return visitor.Finish ();
360+ }
361+
362+ Result<int32_t > SchemaCache::InitHighestFieldId (const Schema* schema) {
363+ ICEBERG_ASSIGN_OR_RAISE (auto id_to_field, InitIdToFieldMap (schema));
364+
365+ if (id_to_field.empty ()) {
366+ return Schema::kInitialColumnId ;
367+ }
368+
369+ auto max_it = std::ranges::max_element (
370+ id_to_field,
371+ [](const auto & lhs, const auto & rhs) { return lhs.first < rhs.first ; });
372+
373+ return max_it->first ;
374+ }
375+
354376} // namespace iceberg
0 commit comments