Skip to content

Commit 00bbef0

Browse files
Use structured types instead of raw props for logical type schemas (this seems to be what the Avro framework intends)
1 parent 956e867 commit 00bbef0

File tree

2 files changed

+76
-67
lines changed

2 files changed

+76
-67
lines changed

adapter/avro/src/main/java/org/apache/arrow/adapter/avro/ArrowToAvroUtils.java

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@
101101
import org.apache.arrow.vector.types.Types;
102102
import org.apache.arrow.vector.types.pojo.ArrowType;
103103
import org.apache.arrow.vector.types.pojo.Field;
104+
import org.apache.avro.LogicalType;
105+
import org.apache.avro.LogicalTypes;
104106
import org.apache.avro.Schema;
105107
import org.apache.avro.SchemaBuilder;
106108

@@ -296,30 +298,31 @@ private static <T> T buildBaseTypeSchema(
296298

297299
case Decimal:
298300
ArrowType.Decimal decimalType = (ArrowType.Decimal) field.getType();
299-
return builder
300-
.fixed(field.getName())
301-
.prop("logicalType", "decimal")
302-
.prop("precision", decimalType.getPrecision())
303-
.prop("scale", decimalType.getScale())
304-
.size(decimalType.getBitWidth() / 8);
301+
return builder.type(
302+
LogicalTypes.decimal(decimalType.getPrecision(), decimalType.getScale())
303+
.addToSchema(
304+
Schema.createFixed(
305+
field.getName(), namespace, "", decimalType.getBitWidth() / 8)));
305306

306307
case Date:
307-
return builder.intBuilder().prop("logicalType", "date").endInt();
308+
return builder.type(LogicalTypes.date().addToSchema(Schema.create(Schema.Type.INT)));
308309

309310
case Time:
310311
ArrowType.Time timeType = (ArrowType.Time) field.getType();
311312
if ((timeType.getUnit() == TimeUnit.SECOND || timeType.getUnit() == TimeUnit.MILLISECOND)) {
312313
// Second and millisecond time types are encoded as time-millis (INT)
313-
return builder.intBuilder().prop("logicalType", "time-millis").endInt();
314+
return builder.type(
315+
LogicalTypes.timeMillis().addToSchema(Schema.create(Schema.Type.INT)));
314316
} else {
315317
// All other time types (micro, nano) are encoded as time-micros (LONG)
316-
return builder.longBuilder().prop("logicalType", "time-micros").endLong();
318+
return builder.type(
319+
LogicalTypes.timeMicros().addToSchema(Schema.create(Schema.Type.LONG)));
317320
}
318321

319322
case Timestamp:
320323
ArrowType.Timestamp timestampType = (ArrowType.Timestamp) field.getType();
321-
String timestampLogicalType = timestampLogicalType(timestampType);
322-
return builder.longBuilder().prop("logicalType", timestampLogicalType).endLong();
324+
LogicalType timestampLogicalType = timestampLogicalType(timestampType);
325+
return builder.type(timestampLogicalType.addToSchema(Schema.create(Schema.Type.LONG)));
323326

324327
case Struct:
325328
String childNamespace =
@@ -348,15 +351,15 @@ private static <T> T addTypesToUnion(
348351
return accumulator.endUnion();
349352
}
350353

351-
private static String timestampLogicalType(ArrowType.Timestamp timestampType) {
354+
private static LogicalType timestampLogicalType(ArrowType.Timestamp timestampType) {
352355
boolean zoneAware = timestampType.getTimezone() != null;
353356
if (timestampType.getUnit() == TimeUnit.NANOSECOND) {
354-
return zoneAware ? "timestamp-nanos" : "local-timestamp-nanos";
357+
return zoneAware ? LogicalTypes.timestampNanos() : LogicalTypes.localTimestampNanos();
355358
} else if (timestampType.getUnit() == TimeUnit.MICROSECOND) {
356-
return zoneAware ? "timestamp-micros" : "local-timestamp-micros";
359+
return zoneAware ? LogicalTypes.timestampMicros() : LogicalTypes.localTimestampMicros();
357360
} else {
358361
// Timestamp in seconds will be cast to milliseconds, Avro does not support seconds
359-
return zoneAware ? "timestamp-millis" : "local-timestamp-millis";
362+
return zoneAware ? LogicalTypes.timestampMillis() : LogicalTypes.localTimestampMillis();
360363
}
361364
}
362365

0 commit comments

Comments
 (0)