Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
174 changes: 174 additions & 0 deletions schema_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -922,3 +922,177 @@ func TestHighestFieldIDListType(t *testing.T) {
)
assert.Equal(t, 2, tableSchema.HighestFieldID())
}

func TestSchemaWithGeometryGeographyTypes(t *testing.T) {
geom, err := iceberg.GeometryTypeOf("srid:4326")
require.NoError(t, err)
geog, err := iceberg.GeographyTypeOf("srid:4269", iceberg.EdgeAlgorithmKarney)
require.NoError(t, err)

schema := iceberg.NewSchema(1,
iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64, Required: true},
iceberg.NestedField{ID: 2, Name: "simple_point", Type: iceberg.GeometryType{}, Required: false},
iceberg.NestedField{ID: 3, Name: "location", Type: geom, Required: false},
iceberg.NestedField{ID: 4, Name: "service_area", Type: geog, Required: false},
)

data, err := json.Marshal(schema)
require.NoError(t, err)

assert.JSONEq(t, `{
"type": "struct",
"schema-id": 1,
"identifier-field-ids": [],
"fields": [
{"id": 1, "name": "id", "type": "long", "required": true},
{"id": 2, "name": "simple_point", "type": "geometry", "required": false},
{"id": 3, "name": "location", "type": "geometry(srid:4326)", "required": false},
{"id": 4, "name": "service_area", "type": "geography(srid:4269, karney)", "required": false}
]
}`, string(data))

var unmarshaledSchema iceberg.Schema
require.NoError(t, json.Unmarshal(data, &unmarshaledSchema))
assert.True(t, schema.Equals(&unmarshaledSchema))
}

func TestNestedFieldToStringGeographyGeometry(t *testing.T) {
geom, err := iceberg.GeometryTypeOf("srid:3857")
require.NoError(t, err)
geog, err := iceberg.GeographyTypeOf("srid:4269", iceberg.EdgeAlgorithmKarney)
require.NoError(t, err)

tests := []struct {
field iceberg.NestedField
expected string
}{
{
iceberg.NestedField{ID: 1, Name: "point", Type: iceberg.GeometryType{}, Required: false},
"1: point: optional geometry",
},
{
iceberg.NestedField{ID: 2, Name: "location", Type: geom, Required: true},
"2: location: required geometry(srid:3857)",
},
{
iceberg.NestedField{ID: 3, Name: "area", Type: iceberg.GeographyType{}, Required: false},
"3: area: optional geography",
},
{
iceberg.NestedField{ID: 4, Name: "region", Type: geog, Required: false},
"4: region: optional geography(srid:4269, karney)",
},
}

for _, tt := range tests {
t.Run(tt.field.Name, func(t *testing.T) {
assert.Equal(t, tt.expected, tt.field.String())
})
}
}

func TestSchemaWithGeometryInNestedStructures(t *testing.T) {
geom, err := iceberg.GeometryTypeOf("srid:4326")
require.NoError(t, err)
geog, err := iceberg.GeographyTypeOf("srid:4269", iceberg.EdgeAlgorithmSpherical)
require.NoError(t, err)

schema := iceberg.NewSchema(1,
iceberg.NestedField{
ID: 1,
Name: "locations",
Type: &iceberg.ListType{
ElementID: 2,
Element: geom,
ElementRequired: true,
},
Required: true,
},
iceberg.NestedField{
ID: 3,
Name: "region_data",
Type: &iceberg.MapType{
KeyID: 4,
KeyType: iceberg.PrimitiveTypes.String,
ValueID: 5,
ValueType: geog,
ValueRequired: false,
},
Required: false,
},
iceberg.NestedField{
ID: 6,
Name: "place",
Type: &iceberg.StructType{
FieldList: []iceberg.NestedField{
{ID: 7, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: true},
{ID: 8, Name: "coords", Type: iceberg.GeometryType{}, Required: false},
},
},
Required: false,
},
)

data, err := json.Marshal(schema)
require.NoError(t, err)

var unmarshaledSchema iceberg.Schema
require.NoError(t, json.Unmarshal(data, &unmarshaledSchema))
assert.True(t, schema.Equals(&unmarshaledSchema))

assert.Equal(t, "1: locations: required list<geometry(srid:4326)>", schema.Field(0).String())
assert.Equal(t, "3: region_data: optional map<string, geography(srid:4269, spherical)>", schema.Field(1).String())
}

func TestPruneColumnsWithGeometry(t *testing.T) {
geom, err := iceberg.GeometryTypeOf("srid:4326")
require.NoError(t, err)

schema := iceberg.NewSchema(1,
iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true},
iceberg.NestedField{ID: 2, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: false},
iceberg.NestedField{ID: 3, Name: "location", Type: geom, Required: false},
)

pruned, err := iceberg.PruneColumns(schema, map[int]iceberg.Void{1: {}, 3: {}}, false)
require.NoError(t, err)

expected := iceberg.NewSchema(1,
iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true},
iceberg.NestedField{ID: 3, Name: "location", Type: geom, Required: false},
)

assert.True(t, pruned.Equals(expected))
}

func TestSchemaIndexByIDWithGeography(t *testing.T) {
geog, err := iceberg.GeographyTypeOf("srid:4269", iceberg.EdgeAlgorithmKarney)
require.NoError(t, err)

schema := iceberg.NewSchema(1,
iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true},
iceberg.NestedField{ID: 2, Name: "area", Type: geog, Required: false},
)

index, err := iceberg.IndexByID(schema)
require.NoError(t, err)

assert.Len(t, index, 2)
assert.Equal(t, geog, index[2].Type)
assert.Equal(t, "area", index[2].Name)
}

func TestSchemaFindColumnNameWithGeometryGeography(t *testing.T) {
schema := iceberg.NewSchema(1,
iceberg.NestedField{ID: 1, Name: "point", Type: iceberg.GeometryType{}, Required: false},
iceberg.NestedField{ID: 2, Name: "region", Type: iceberg.GeographyType{}, Required: false},
)

name, ok := schema.FindColumnName(1)
assert.True(t, ok)
assert.Equal(t, "point", name)

name, ok = schema.FindColumnName(2)
assert.True(t, ok)
assert.Equal(t, "region", name)
}
82 changes: 82 additions & 0 deletions table/metadata_builder_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1205,6 +1205,8 @@ func TestUnsupportedTypes(t *testing.T) {
TestTypes := []iceberg.Type{
iceberg.TimestampNsType{},
iceberg.TimestampTzNsType{},
iceberg.GeometryType{},
iceberg.GeographyType{},
}
for _, typ := range TestTypes {
for unsupportedVersion := 1; unsupportedVersion < minFormatVersionForType(typ); unsupportedVersion++ {
Expand Down Expand Up @@ -1309,3 +1311,83 @@ func generateTypeSchema(typ iceberg.Type) *iceberg.Schema {

return sc
}

func TestGeometryGeographyNullOnlyDefaults(t *testing.T) {
testTypes := []struct {
name string
typ iceberg.Type
}{
{"geometry", iceberg.GeometryType{}},
{"geography", iceberg.GeographyType{}},
}

for _, tt := range testTypes {
t.Run(tt.name+" with non-null initial default", func(t *testing.T) {
defaultValue := "POINT(0 0)"
sc := iceberg.NewSchema(0,
iceberg.NestedField{
Type: tt.typ,
ID: 1,
Name: "location",
Required: false,
InitialDefault: &defaultValue,
},
)

err := checkSchemaCompatibility(sc, 3)
require.Error(t, err)
require.ErrorContains(t, err, "columns must default to null")
require.ErrorIs(t, err, iceberg.ErrInvalidSchema)
})

t.Run(tt.name+" with non-null write default", func(t *testing.T) {
defaultValue := "POINT(0 0)"
sc := iceberg.NewSchema(0,
iceberg.NestedField{
Type: tt.typ,
ID: 1,
Name: "location",
Required: false,
WriteDefault: &defaultValue,
},
)

err := checkSchemaCompatibility(sc, 3)
require.Error(t, err)
require.ErrorContains(t, err, "columns must default to null")
require.ErrorIs(t, err, iceberg.ErrInvalidSchema)
})

t.Run(tt.name+" with null defaults", func(t *testing.T) {
sc := iceberg.NewSchema(0,
iceberg.NestedField{
Type: tt.typ,
ID: 1,
Name: "location",
Required: false,
},
)

err := checkSchemaCompatibility(sc, 3)
require.NoError(t, err)
})

t.Run(tt.name+" in v2 with non-null initial default", func(t *testing.T) {
defaultValue := "POINT(0 0)"
sc := iceberg.NewSchema(0,
iceberg.NestedField{
Type: tt.typ,
ID: 1,
Name: "location",
Required: false,
InitialDefault: &defaultValue,
},
)

err := checkSchemaCompatibility(sc, 2)
require.Error(t, err)
require.ErrorContains(t, err, "is not supported until v3")
require.ErrorIs(t, err, iceberg.ErrInvalidSchema)
})
}
}
44 changes: 36 additions & 8 deletions table/metadata_schema_comptability.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,17 @@ func (e ErrIncompatibleSchema) Error() string {
problems.WriteString(fmt.Sprintf("\n- invalid type for %s: %s is not supported until v%d", f.ColName, f.Field.Type, f.UnsupportedType.MinFormatVersion))
}
if f.InvalidDefault != nil {
problems.WriteString(fmt.Sprintf("\n- invalid initial default for %s: non-null default (%v) is not supported until v%d", f.ColName, f.Field.InitialDefault, f.InvalidDefault.MinFormatVersion))
switch f.Field.Type.(type) {
case iceberg.GeometryType, iceberg.GeographyType:
if f.Field.InitialDefault != nil {
problems.WriteString(fmt.Sprintf("\n- invalid initial default for %s: %s columns must default to null", f.ColName, f.Field.Type))
}
if f.Field.WriteDefault != nil {
problems.WriteString(fmt.Sprintf("\n- invalid write default for %s: %s columns must default to null", f.ColName, f.Field.Type))
}
default:
problems.WriteString(fmt.Sprintf("\n- invalid initial default for %s: non-null default (%v) is not supported until v%d", f.ColName, f.Field.InitialDefault, f.InvalidDefault.MinFormatVersion))
}
}
}

Expand Down Expand Up @@ -96,12 +106,30 @@ func checkSchemaCompatibility(sc *iceberg.Schema, formatVersion int) error {
})
}

if field.InitialDefault != nil && formatVersion < defaultValuesMinFormatVersion {
problems = append(problems, IncompatibleField{
Field: field,
ColName: colName,
InvalidDefault: &InvalidDefault{MinFormatVersion: defaultValuesMinFormatVersion, WriteDefault: field.InitialDefault},
})
switch field.Type.(type) {
case iceberg.GeometryType, iceberg.GeographyType:
if field.InitialDefault != nil {
problems = append(problems, IncompatibleField{
Field: field,
ColName: colName,
InvalidDefault: &InvalidDefault{MinFormatVersion: formatVersion, WriteDefault: field.InitialDefault},
})
}
if field.WriteDefault != nil {
problems = append(problems, IncompatibleField{
Field: field,
ColName: colName,
InvalidDefault: &InvalidDefault{MinFormatVersion: formatVersion, WriteDefault: field.WriteDefault},
})
}
default:
if field.InitialDefault != nil && formatVersion < defaultValuesMinFormatVersion {
problems = append(problems, IncompatibleField{
Field: field,
ColName: colName,
InvalidDefault: &InvalidDefault{MinFormatVersion: defaultValuesMinFormatVersion, WriteDefault: field.InitialDefault},
})
}
}
}

Expand All @@ -117,7 +145,7 @@ func checkSchemaCompatibility(sc *iceberg.Schema, formatVersion int) error {
// version number for types that require newer format versions.
func minFormatVersionForType(t iceberg.Type) int {
switch t.(type) {
case iceberg.TimestampNsType, iceberg.TimestampTzNsType:
case iceberg.TimestampNsType, iceberg.TimestampTzNsType, iceberg.GeometryType, iceberg.GeographyType:
return 3
default:
// All other types supported in v1+
Expand Down
9 changes: 7 additions & 2 deletions transforms.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,14 @@ func (t IdentityTransform) MarshalText() ([]byte, error) {
func (IdentityTransform) String() string { return "identity" }

func (IdentityTransform) CanTransform(t Type) bool {
_, ok := t.(PrimitiveType)
switch t.(type) {
case GeometryType, GeographyType:
return false
default:
_, ok := t.(PrimitiveType)

return ok
return ok
}
}
func (IdentityTransform) ResultType(t Type) Type { return t }
func (IdentityTransform) PreservesOrder() bool { return true }
Expand Down
4 changes: 4 additions & 0 deletions transforms_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,8 @@ func TestCanTransform(t *testing.T) {
},
notAllowed: []iceberg.Type{
&iceberg.StructType{}, &iceberg.ListType{}, &iceberg.MapType{},
iceberg.GeometryType{},
iceberg.GeographyType{},
},
},
{
Expand All @@ -284,6 +286,8 @@ func TestCanTransform(t *testing.T) {
notAllowed: []iceberg.Type{
iceberg.PrimitiveTypes.Bool, iceberg.PrimitiveTypes.Float32, iceberg.PrimitiveTypes.Float64,
&iceberg.StructType{}, &iceberg.ListType{}, &iceberg.MapType{},
iceberg.GeometryType{},
iceberg.GeographyType{},
},
},
{
Expand Down
Loading
Loading