-
Notifications
You must be signed in to change notification settings - Fork 4k
GH-36518: [Java] Fix ArrowFlightJdbcTimeStampVectorAccessor to return Timestamp objects with date and time that corresponds with local time instead of UTC date and time #36519
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9d02bfd
77e037b
5887433
1ff3fc6
f408486
beb4ea6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -25,7 +25,7 @@ | |
| import java.sql.Time; | ||
| import java.sql.Timestamp; | ||
| import java.time.LocalDateTime; | ||
| import java.time.temporal.ChronoUnit; | ||
| import java.time.ZoneId; | ||
| import java.util.Calendar; | ||
| import java.util.TimeZone; | ||
| import java.util.concurrent.TimeUnit; | ||
|
|
@@ -45,13 +45,13 @@ public class ArrowFlightJdbcTimeStampVectorAccessor extends ArrowFlightJdbcAcces | |
| private final TimeZone timeZone; | ||
| private final Getter getter; | ||
| private final TimeUnit timeUnit; | ||
| private final LongToLocalDateTime longToLocalDateTime; | ||
| private final LongToUTCDateTime longToUTCDateTime; | ||
| private final Holder holder; | ||
|
|
||
| /** | ||
| * Functional interface used to convert a number (in any time resolution) to LocalDateTime. | ||
| */ | ||
| interface LongToLocalDateTime { | ||
| interface LongToUTCDateTime { | ||
| LocalDateTime fromLong(long value); | ||
| } | ||
|
|
||
|
|
@@ -67,7 +67,7 @@ public ArrowFlightJdbcTimeStampVectorAccessor(TimeStampVector vector, | |
|
|
||
| this.timeZone = getTimeZoneForVector(vector); | ||
| this.timeUnit = getTimeUnitForVector(vector); | ||
| this.longToLocalDateTime = getLongToLocalDateTimeForVector(vector, this.timeZone); | ||
| this.longToUTCDateTime = getLongToUTCDateTimeForVector(vector); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -90,15 +90,19 @@ private LocalDateTime getLocalDateTime(Calendar calendar) { | |
|
|
||
| long value = holder.value; | ||
|
|
||
| LocalDateTime localDateTime = this.longToLocalDateTime.fromLong(value); | ||
| LocalDateTime localDateTime = this.longToUTCDateTime.fromLong(value); | ||
| ZoneId defaultTimeZone = Calendar.getInstance().getTimeZone().toZoneId(); | ||
| ZoneId sourceTimeZone; | ||
|
|
||
| if (calendar != null) { | ||
| TimeZone timeZone = calendar.getTimeZone(); | ||
| long millis = this.timeUnit.toMillis(value); | ||
| localDateTime = localDateTime | ||
| .minus(timeZone.getOffset(millis) - this.timeZone.getOffset(millis), ChronoUnit.MILLIS); | ||
| if (this.timeZone != null) { | ||
| sourceTimeZone = this.timeZone.toZoneId(); | ||
| } else if (calendar != null) { | ||
| sourceTimeZone = calendar.getTimeZone().toZoneId(); | ||
| } else { | ||
| sourceTimeZone = defaultTimeZone; | ||
| } | ||
| return localDateTime; | ||
|
|
||
| return localDateTime.atZone(sourceTimeZone).withZoneSameInstant(defaultTimeZone).toLocalDateTime(); | ||
|
Comment on lines
+97
to
+105
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure this is quite right: if the Arrow vector has a timezone, the underlying timestamp is always relative to UTC. |
||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -149,23 +153,22 @@ protected static TimeUnit getTimeUnitForVector(TimeStampVector vector) { | |
| } | ||
| } | ||
|
|
||
| protected static LongToLocalDateTime getLongToLocalDateTimeForVector(TimeStampVector vector, | ||
| TimeZone timeZone) { | ||
| String timeZoneID = timeZone.getID(); | ||
| protected static LongToUTCDateTime getLongToUTCDateTimeForVector(TimeStampVector vector) { | ||
| String timeZoneID = "UTC"; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: Make final |
||
|
|
||
| ArrowType.Timestamp arrowType = | ||
| (ArrowType.Timestamp) vector.getField().getFieldType().getType(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the switch block below, you may use the overload without timeZoneID, which is much simpler: |
||
|
|
||
| switch (arrowType.getUnit()) { | ||
| case NANOSECOND: | ||
| return nanoseconds -> DateUtility.getLocalDateTimeFromEpochNano(nanoseconds, timeZoneID); | ||
| return DateUtility::getLocalDateTimeFromEpochNano; | ||
| case MICROSECOND: | ||
| return microseconds -> DateUtility.getLocalDateTimeFromEpochMicro(microseconds, timeZoneID); | ||
| return DateUtility::getLocalDateTimeFromEpochMicro; | ||
| case MILLISECOND: | ||
| return milliseconds -> DateUtility.getLocalDateTimeFromEpochMilli(milliseconds, timeZoneID); | ||
| return DateUtility::getLocalDateTimeFromEpochMilli; | ||
| case SECOND: | ||
| return seconds -> DateUtility.getLocalDateTimeFromEpochMilli( | ||
| TimeUnit.SECONDS.toMillis(seconds), timeZoneID); | ||
| TimeUnit.SECONDS.toMillis(seconds)); | ||
| default: | ||
| throw new UnsupportedOperationException("Invalid Arrow time unit"); | ||
| } | ||
|
|
@@ -177,7 +180,7 @@ protected static TimeZone getTimeZoneForVector(TimeStampVector vector) { | |
|
|
||
| String timezoneName = arrowType.getTimezone(); | ||
| if (timezoneName == null) { | ||
| return TimeZone.getTimeZone("UTC"); | ||
| return null; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. This change seems to be the right one: an Arrow timestamp has no defined timezone if the timezone name is not set.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 |
||
| } | ||
|
|
||
| return TimeZone.getTimeZone(timezoneName); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,9 +20,11 @@ | |
| import static org.apache.calcite.avatica.util.DateTimeUtils.MILLIS_PER_DAY; | ||
|
|
||
| import java.sql.Timestamp; | ||
| import java.time.Instant; | ||
| import java.time.LocalDate; | ||
| import java.time.LocalDateTime; | ||
| import java.time.LocalTime; | ||
| import java.time.ZonedDateTime; | ||
| import java.util.Calendar; | ||
| import java.util.TimeZone; | ||
| import java.util.concurrent.TimeUnit; | ||
|
|
@@ -36,21 +38,17 @@ private DateTimeUtils() { | |
| } | ||
|
|
||
| /** | ||
| * Subtracts given Calendar's TimeZone offset from epoch milliseconds. | ||
| * Apply calendar timezone to epoch milliseconds. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we document this function and what steps it's doing in detail? |
||
| */ | ||
| public static long applyCalendarOffset(long milliseconds, Calendar calendar) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, looking at how it's used, now I'm thinking both implementations are wrong. For instance, it's used in DateVectorAccessor and is applied to the value from the vector and the user-supplied calendar. This function appears to assume the given milliseconds value is a naive timestamp in the given (or default) timezone, and converts it to a UTC timestamp. This is backwards. The value from the Arrow vector is ALREADY a UTC timestamp! That said, this function does seem to do the right thing for other places where it is used. So I think we need to name and properly document it, and remove it from places where it should NOT be applied. |
||
| if (calendar == null) { | ||
| calendar = Calendar.getInstance(); | ||
| } | ||
|
|
||
| final TimeZone tz = calendar.getTimeZone(); | ||
| final TimeZone defaultTz = TimeZone.getDefault(); | ||
|
|
||
| if (tz != defaultTz) { | ||
| milliseconds -= tz.getOffset(milliseconds) - defaultTz.getOffset(milliseconds); | ||
| } | ||
|
|
||
| return milliseconds; | ||
| Instant currInstant = Instant.ofEpochMilli(milliseconds); | ||
| LocalDateTime getTimestampWithoutTZ = LocalDateTime.ofInstant(currInstant, | ||
| TimeZone.getTimeZone("UTC").toZoneId()); | ||
|
Comment on lines
+47
to
+49
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we directly use ofEpochSecond instead of bouncing through an Instant and a timezone conversion? |
||
| ZonedDateTime parsedTime = getTimestampWithoutTZ.atZone(calendar.getTimeZone().toZoneId()); | ||
| return parsedTime.toEpochSecond() * 1000; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This appears to truncate the milliseconds?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. parsedTime.toInstant().toEpochMilli() seems like it would avoid this problem. |
||
| } | ||
|
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,7 @@ | |
|
|
||
| package org.apache.arrow.driver.jdbc.accessor.impl.calendar; | ||
|
|
||
| import static java.util.Optional.ofNullable; | ||
| import static org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcTimeStampVectorAccessor.getTimeUnitForVector; | ||
| import static org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcTimeStampVectorAccessor.getTimeZoneForVector; | ||
| import static org.hamcrest.CoreMatchers.equalTo; | ||
|
|
@@ -25,7 +26,9 @@ | |
| import java.sql.Date; | ||
| import java.sql.Time; | ||
| import java.sql.Timestamp; | ||
| import java.time.Instant; | ||
| import java.time.LocalDateTime; | ||
| import java.time.ZonedDateTime; | ||
| import java.util.Arrays; | ||
| import java.util.Calendar; | ||
| import java.util.Collection; | ||
|
|
@@ -170,21 +173,22 @@ public void testShouldGetTimestampReturnValidTimestampWithoutCalendar() throws E | |
|
|
||
| @Test | ||
| public void testShouldGetTimestampReturnValidTimestampWithCalendar() throws Exception { | ||
| TimeZone.setDefault(TimeZone.getTimeZone("Asia/Hong_Kong")); | ||
|
|
||
| TimeZone timeZone = TimeZone.getTimeZone(AMERICA_SAO_PAULO); | ||
| Calendar calendar = Calendar.getInstance(timeZone); | ||
|
|
||
| TimeZone timeZoneForVector = getTimeZoneForVector(vector); | ||
| TimeZone finalTimeZoneForResultWithoutCalendar = ofNullable(getTimeZoneForVector(vector)) | ||
| .orElse(TimeZone.getTimeZone("Asia/Hong_Kong")); | ||
|
|
||
| accessorIterator.iterate(vector, (accessor, currentRow) -> { | ||
| final Timestamp resultWithoutCalendar = accessor.getTimestamp(null); | ||
| final Timestamp result = accessor.getTimestamp(calendar); | ||
|
|
||
| long offset = timeZone.getOffset(resultWithoutCalendar.getTime()) - | ||
| timeZoneForVector.getOffset(resultWithoutCalendar.getTime()); | ||
|
|
||
| collector.checkThat(resultWithoutCalendar.getTime() - result.getTime(), is(offset)); | ||
| collector.checkThat(accessor.wasNull(), is(false)); | ||
| assertOffsetIsConsistentWithAccessorGetters(timeZone, finalTimeZoneForResultWithoutCalendar, result.getTime(), | ||
| resultWithoutCalendar.getTime(), accessor); | ||
| }); | ||
| TimeZone.setDefault(null); | ||
| } | ||
|
|
||
| @Test | ||
|
|
@@ -206,17 +210,15 @@ public void testShouldGetDateReturnValidDateWithCalendar() throws Exception { | |
| TimeZone timeZone = TimeZone.getTimeZone(AMERICA_SAO_PAULO); | ||
| Calendar calendar = Calendar.getInstance(timeZone); | ||
|
|
||
| TimeZone timeZoneForVector = getTimeZoneForVector(vector); | ||
| TimeZone finalTimeZoneForResultWithoutCalendar = ofNullable(getTimeZoneForVector(vector)) | ||
| .orElse(TimeZone.getDefault()); | ||
|
|
||
| accessorIterator.iterate(vector, (accessor, currentRow) -> { | ||
| final Date resultWithoutCalendar = accessor.getDate(null); | ||
| final Date result = accessor.getDate(calendar); | ||
|
|
||
| long offset = timeZone.getOffset(resultWithoutCalendar.getTime()) - | ||
| timeZoneForVector.getOffset(resultWithoutCalendar.getTime()); | ||
|
|
||
| collector.checkThat(resultWithoutCalendar.getTime() - result.getTime(), is(offset)); | ||
| collector.checkThat(accessor.wasNull(), is(false)); | ||
| assertOffsetIsConsistentWithAccessorGetters(timeZone, finalTimeZoneForResultWithoutCalendar, result.getTime(), | ||
| resultWithoutCalendar.getTime(), accessor); | ||
| }); | ||
| } | ||
|
|
||
|
|
@@ -239,17 +241,15 @@ public void testShouldGetTimeReturnValidTimeWithCalendar() throws Exception { | |
| TimeZone timeZone = TimeZone.getTimeZone(AMERICA_SAO_PAULO); | ||
| Calendar calendar = Calendar.getInstance(timeZone); | ||
|
|
||
| TimeZone timeZoneForVector = getTimeZoneForVector(vector); | ||
| TimeZone finalTimeZoneForResultWithoutCalendar = ofNullable(getTimeZoneForVector(vector)) | ||
| .orElse(TimeZone.getDefault()); | ||
|
|
||
| accessorIterator.iterate(vector, (accessor, currentRow) -> { | ||
| final Time resultWithoutCalendar = accessor.getTime(null); | ||
| final Time result = accessor.getTime(calendar); | ||
|
|
||
| long offset = timeZone.getOffset(resultWithoutCalendar.getTime()) - | ||
| timeZoneForVector.getOffset(resultWithoutCalendar.getTime()); | ||
|
|
||
| collector.checkThat(resultWithoutCalendar.getTime() - result.getTime(), is(offset)); | ||
| collector.checkThat(accessor.wasNull(), is(false)); | ||
| assertOffsetIsConsistentWithAccessorGetters(timeZone, finalTimeZoneForResultWithoutCalendar, result.getTime(), | ||
| resultWithoutCalendar.getTime(), accessor); | ||
| }); | ||
| } | ||
|
|
||
|
|
@@ -270,8 +270,11 @@ private Timestamp getTimestampForVector(int currentRow) { | |
| } else if (object instanceof Long) { | ||
| TimeUnit timeUnit = getTimeUnitForVector(vector); | ||
| long millis = timeUnit.toMillis((Long) object); | ||
| long offset = TimeZone.getTimeZone(timeZone).getOffset(millis); | ||
| expectedTimestamp = new Timestamp(millis + offset); | ||
|
|
||
| ZonedDateTime sourceTZDateTime = LocalDateTime | ||
| .ofInstant(Instant.ofEpochMilli(millis), TimeZone.getTimeZone("UTC").toZoneId()) | ||
| .atZone(TimeZone.getTimeZone(timeZone).toZoneId()); | ||
| expectedTimestamp = new Timestamp(sourceTZDateTime.toEpochSecond() * 1000); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks to truncate millis as well. |
||
| } | ||
| return expectedTimestamp; | ||
| } | ||
|
|
@@ -319,4 +322,18 @@ private void assertGetStringIsConsistentWithVarCharAccessor(Calendar calendar) t | |
| }); | ||
| } | ||
| } | ||
|
|
||
| private void assertOffsetIsConsistentWithAccessorGetters(TimeZone timeZone, | ||
| TimeZone finalTimeZoneForResultWithoutCalendar, long result, | ||
| long resultWithoutCalendar, | ||
| ArrowFlightJdbcTimeStampVectorAccessor accessor) { | ||
| final TimeZone timeZoneForResult = getTimeZoneForVector(vector) == null ? timeZone : | ||
| finalTimeZoneForResultWithoutCalendar; | ||
|
|
||
| long offset = timeZoneForResult.getOffset(result) - | ||
| finalTimeZoneForResultWithoutCalendar.getOffset(resultWithoutCalendar); | ||
|
|
||
| collector.checkThat(resultWithoutCalendar - result, is(offset)); | ||
| collector.checkThat(accessor.wasNull(), is(false)); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As per the comment from @lidavidm, sourceTimeZone should be UTC if the vector has provided a valid timezone.