Skip to content
Closed
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,22 @@
import static org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcDateVectorGetter.Getter;
import static org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcDateVectorGetter.Holder;
import static org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcDateVectorGetter.createGetter;
import static org.apache.arrow.driver.jdbc.utils.DateTimeUtils.getTimestampValue;
import static org.apache.calcite.avatica.util.DateTimeUtils.MILLIS_PER_DAY;
import static org.apache.calcite.avatica.util.DateTimeUtils.unixDateToString;

import java.sql.Date;
import java.sql.Timestamp;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.Calendar;
import java.util.concurrent.TimeUnit;
import java.util.function.IntSupplier;
import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessor;
import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory;
import org.apache.arrow.driver.jdbc.utils.DateTimeUtils;
import org.apache.arrow.vector.DateDayVector;
import org.apache.arrow.vector.DateMilliVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.util.DateUtility;

/** Accessor for the Arrow types: {@link DateDayVector} and {@link DateMilliVector}. */
public class ArrowFlightJdbcDateVectorAccessor extends ArrowFlightJdbcAccessor {
Expand Down Expand Up @@ -87,17 +88,12 @@ public Object getObject() {

@Override
public Date getDate(Calendar calendar) {
fillHolder();
if (this.wasNull) {
final LocalDateTime localDateTime = getLocalDateTime(calendar);
if (localDateTime == null) {
return null;
}

long value = holder.value;
long milliseconds = this.timeUnit.toMillis(value);

long millisWithCalendar = DateTimeUtils.applyCalendarOffset(milliseconds, calendar);

return new Date(getTimestampValue(millisWithCalendar).getTime());
return new Date(Timestamp.valueOf(localDateTime).getTime());
}

private void fillHolder() {
Expand All @@ -108,11 +104,36 @@ private void fillHolder() {

@Override
public Timestamp getTimestamp(Calendar calendar) {
Date date = getDate(calendar);
if (date == null) {
final LocalDateTime localDateTime = getLocalDateTime(calendar);
if (localDateTime == null) {
return null;
}

return Timestamp.valueOf(localDateTime);
}

private LocalDateTime getLocalDateTime(Calendar calendar) {
getter.get(getCurrentRow(), holder);
this.wasNull = holder.isSet == 0;
this.wasNullConsumer.setWasNull(this.wasNull);
if (this.wasNull) {
return null;
}
return new Timestamp(date.getTime());

final LocalDateTime localDateTime =
DateUtility.getLocalDateTimeFromEpochMilli(this.timeUnit.toMillis(holder.value));
final ZoneId defaultTimeZone = Calendar.getInstance().getTimeZone().toZoneId();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we want to use the system locale? It should just be UTC (even that is strictly wrong; the date is unzoned by default and only zoned when a calendar comes into play)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or does JDBC expect that it will be in the system timezone?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, problem here is that java.sql.Timestamp and java.sql.Date only contains a long value with milliseconds. You have to tell java how to print this long, in what timezone.

So, the method has to follow the same approach done at org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcTimeStampVectorAccessor#getLocalDateTime

  • first putting the received value (which is in UTC) at the expected time zone. The expected time zone is the default one for the JVM or the time zone specified when calling to java.sql.ResultSet#getDate(java.lang.String, java.util.Calendar). This is done on this line.
    .atZone(sourceTimeZone)
  • then, the value is adjusted to the JVM timezone.
    .withZoneSameInstant(defaultTimeZone)

When this method is used java.sql.ResultSet#getDate(java.lang.String, java.util.Calendar), it is expected that the caller prints the date returned on the specified time zone. Something like:

final Date dateValue = rs.getDate("date_field", Calendar.getInstance(TimeZone.getTimeZone("Asia/Tokyo")));
final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
sdf.setTimeZone(TimeZone.getTimeZone("Asia/Tokyo"));
System.out.println("Received date: " +sdf.format(dateValue));

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I found a little time to look into this finally.

I still don't see why the system timezone has to come into play.

Arrow date is with reference to the UNIX epoch, i.e. UTC. Java Date is with reference to GMT. As the Java docs themselves state, they are technically different but practically the same.

(1) If there is no calendar given, I would expect there to be no datetime arithmetic done, because they are in the same time zone (effectively). It happens to work out here because we convert to/from the system timezone, so the arithmetic here should cancel itself out, but I'm not sure if that will actually be the case around daylight savings time boundaries.

(2) If there is a calendar given, JDBC is frustratingly vague about the intention of the parameter. It seems most sources interpret it as, "treat the date value as a local date (without reference to any timezone) and give me a timestamp that would display that date (at midnight) in the given timezone". In that case, again, I don't see why the system timezone should come into play.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi,
initially, you have a timestamp at database which does not have an associated time zone. It can be in any of them.

Check the example I attached before. You have '2005-06-29 19:19:41' at database.

  • your JVM is in America/Los_Angeles and you invoke the getTimestamp(int columnIndex). At ArrowFlightJdbcTimeStampVectorAccessor you receives that exact timestamp in UTC. But, you have to translate it to the same timestamp value but at America/Los_Angeles (the default JVM timezone).

    If you print the obtained timestamp, you will see the same value that was at database.
    Something like:
    2005-06-29 19:19:41.0

  • your JVM is in America/Los_Angeles and you invoke the getTimestamp(int columnIndex, Calendar cal) specifying Asia/Tokyo. Same as before, you receive the timestamp in UTC at ArrowFlightJdbcTimeStampVectorAccessor, but now you have to put that same timestamp in Asia/Tokyo time zone, because is the one specified at the getTimestamp invokation.

    When you print the obtained timestamp, you will see this:
    2005-06-29 03:19:41.0

    This is because the timestamp is printed following your JVM timezone (America/Los_Angeles). But note that it is the expected value because you retrieved the timestamp using getTimestamp(index, Asia/Tokyo-calendar). So, you specified that the timestamp '2005-06-29 19:19:41' is at the time zone Asia/Tokyo.

    If you check the difference, there are 16 hours less, because America/Los_Angeles is at GMT-7 and Asia/Tokyo is at UTC+9. This is why the timestamp is printed as 2005-06-29 03:19:41.0, America/Los_Angeles value, which is '2005-06-29 19:19:41.0' at Asia/Tokyo

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeesh, system timezone appears to be a convention (e.g. this StackOverflow answer) but doesn't appear to be formally specified. I still think that, given Arrow actually specifies the epoch for its values, introducing the system timezone merely to be consistent with databases that appear not to store this information is strictly wrong.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose it could depend on how you want to read this sentence...

To conform with the definition of SQL DATE, the millisecond values wrapped by a java.sql.Date instance must be 'normalized' by setting the hours, minutes, seconds, and milliseconds to zero in the particular time zone with which the instance is associated.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ResultSet#getDate never specifies the time zone when a calendar is not provided, so I guess the question is whether we should use the underlying value's time zone or the system time zone

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, I will be out for a few days. I will try to answer questions when I return.
In the meantime, I would like to comment:

Yeesh, system timezone appears to be a convention (e.g. this StackOverflow answer) but doesn't appear to be formally specified. I still think that, given Arrow actually specifies the epoch for its values, introducing the system timezone merely to be consistent with databases that appear not to store this information is strictly wrong.

It is true that documentation is not 100% clear, but it is also true that other JDBC drivers from important vendors work like that, using the default JVM timezone. So, a generic JDBC client will expect the data on that way.

I'm talking about Date here still.
(1) Arrow's Date is defined in terms of the Unix epoch (i.e. UTC), and (2) java.sql.Date is also defined in terms of GMT. So it doesn't seem like the system timezone should be relevant.

I will check again the code. But the problem is the same. The java.sql.Date has also hours, minutes and seconds. So, the Arrow JDBC driver retrieves the value in UTC, ok, but when a new Date is created it is done on the default JVM timezone. So, it can generate a different date than the expected. This is why a similar translation than the applied with Timestamp is required.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also note that python adbc driver is also working as proposed fix.

(venv) >test_dates.py

SQL: SELECT * FROM ts_table
  c1                  c2
0  1 **2005-06-29 19:19:41**
Time 0 seconds

test_date.zip

final ZoneId sourceTimeZone;
if (calendar != null) {
sourceTimeZone = calendar.getTimeZone().toZoneId();
} else {
sourceTimeZone = defaultTimeZone;
}

return localDateTime
.atZone(sourceTimeZone)
.withZoneSameInstant(defaultTimeZone)
.toLocalDateTime();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import java.sql.Time;
import java.sql.Timestamp;
import java.time.LocalDateTime;
import java.time.temporal.ChronoUnit;
import java.time.ZoneId;
import java.util.Calendar;
import java.util.TimeZone;
import java.util.concurrent.TimeUnit;
Expand All @@ -40,12 +40,11 @@ public class ArrowFlightJdbcTimeStampVectorAccessor extends ArrowFlightJdbcAcces

private final TimeZone timeZone;
private final Getter getter;
private final TimeUnit timeUnit;
private final LongToLocalDateTime longToLocalDateTime;
private final LongToUTCDateTime longToUTCDateTime;
private final Holder holder;

/** Functional interface used to convert a number (in any time resolution) to LocalDateTime. */
interface LongToLocalDateTime {
interface LongToUTCDateTime {
LocalDateTime fromLong(long value);
}

Expand All @@ -59,8 +58,7 @@ public ArrowFlightJdbcTimeStampVectorAccessor(
this.getter = createGetter(vector);

this.timeZone = getTimeZoneForVector(vector);
this.timeUnit = getTimeUnitForVector(vector);
this.longToLocalDateTime = getLongToLocalDateTimeForVector(vector, this.timeZone);
this.longToUTCDateTime = getLongToUTCDateTimeForVector(vector);
}

@Override
Expand All @@ -83,16 +81,22 @@ private LocalDateTime getLocalDateTime(Calendar calendar) {

long value = holder.value;

LocalDateTime localDateTime = this.longToLocalDateTime.fromLong(value);
LocalDateTime localDateTime = this.longToUTCDateTime.fromLong(value);
ZoneId defaultTimeZone = Calendar.getInstance().getTimeZone().toZoneId();
ZoneId sourceTimeZone;

if (calendar != null) {
TimeZone timeZone = calendar.getTimeZone();
long millis = this.timeUnit.toMillis(value);
localDateTime =
localDateTime.minus(
timeZone.getOffset(millis) - this.timeZone.getOffset(millis), ChronoUnit.MILLIS);
if (this.timeZone != null) {
sourceTimeZone = this.timeZone.toZoneId();
} else if (calendar != null) {
sourceTimeZone = calendar.getTimeZone().toZoneId();
} else {
sourceTimeZone = defaultTimeZone;
}
return localDateTime;

return localDateTime
.atZone(sourceTimeZone)
.withZoneSameInstant(defaultTimeZone)
.toLocalDateTime();
}

@Override
Expand Down Expand Up @@ -143,24 +147,20 @@ protected static TimeUnit getTimeUnitForVector(TimeStampVector vector) {
}
}

protected static LongToLocalDateTime getLongToLocalDateTimeForVector(
TimeStampVector vector, TimeZone timeZone) {
String timeZoneID = timeZone.getID();

protected static LongToUTCDateTime getLongToUTCDateTimeForVector(TimeStampVector vector) {
ArrowType.Timestamp arrowType =
(ArrowType.Timestamp) vector.getField().getFieldType().getType();

switch (arrowType.getUnit()) {
case NANOSECOND:
return nanoseconds -> DateUtility.getLocalDateTimeFromEpochNano(nanoseconds, timeZoneID);
return DateUtility::getLocalDateTimeFromEpochNano;
case MICROSECOND:
return microseconds -> DateUtility.getLocalDateTimeFromEpochMicro(microseconds, timeZoneID);
return DateUtility::getLocalDateTimeFromEpochMicro;
case MILLISECOND:
return milliseconds -> DateUtility.getLocalDateTimeFromEpochMilli(milliseconds, timeZoneID);
return DateUtility::getLocalDateTimeFromEpochMilli;
case SECOND:
return seconds ->
DateUtility.getLocalDateTimeFromEpochMilli(
TimeUnit.SECONDS.toMillis(seconds), timeZoneID);
DateUtility.getLocalDateTimeFromEpochMilli(TimeUnit.SECONDS.toMillis(seconds));
default:
throw new UnsupportedOperationException("Invalid Arrow time unit");
}
Expand All @@ -172,7 +172,7 @@ protected static TimeZone getTimeZoneForVector(TimeStampVector vector) {

String timezoneName = arrowType.getTimezone();
if (timezoneName == null) {
return TimeZone.getTimeZone("UTC");
return null;
}

return TimeZone.getTimeZone(timezoneName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@
import static org.apache.calcite.avatica.util.DateTimeUtils.MILLIS_PER_DAY;

import java.sql.Timestamp;
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.ZonedDateTime;
import java.util.Calendar;
import java.util.TimeZone;
import java.util.concurrent.TimeUnit;
Expand All @@ -32,20 +34,16 @@ private DateTimeUtils() {
// Prevent instantiation.
}

/** Subtracts given Calendar's TimeZone offset from epoch milliseconds. */
/** Apply calendar timezone to epoch milliseconds. */
public static long applyCalendarOffset(long milliseconds, Calendar calendar) {
if (calendar == null) {
calendar = Calendar.getInstance();
}

final TimeZone tz = calendar.getTimeZone();
final TimeZone defaultTz = TimeZone.getDefault();

if (tz != defaultTz) {
milliseconds -= tz.getOffset(milliseconds) - defaultTz.getOffset(milliseconds);
}

return milliseconds;
Instant currInstant = Instant.ofEpochMilli(milliseconds);
LocalDateTime getTimestampWithoutTZ =
LocalDateTime.ofInstant(currInstant, TimeZone.getTimeZone("UTC").toZoneId());
ZonedDateTime parsedTime = getTimestampWithoutTZ.atZone(calendar.getTimeZone().toZoneId());
return parsedTime.toInstant().toEpochMilli();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.arrow.driver.jdbc.accessor.impl.calendar;

import static java.util.Optional.ofNullable;
import static org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcTimeStampVectorAccessor.getTimeUnitForVector;
import static org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcTimeStampVectorAccessor.getTimeZoneForVector;
import static org.hamcrest.CoreMatchers.equalTo;
Expand All @@ -25,7 +26,9 @@
import java.sql.Date;
import java.sql.Time;
import java.sql.Timestamp;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZonedDateTime;
import java.util.Calendar;
import java.util.TimeZone;
import java.util.concurrent.TimeUnit;
Expand Down Expand Up @@ -179,24 +182,28 @@ public void testShouldGetTimestampReturnValidTimestampWithoutCalendar(
public void testShouldGetTimestampReturnValidTimestampWithCalendar(
Supplier<TimeStampVector> vectorSupplier) throws Exception {
setup(vectorSupplier);
TimeZone.setDefault(TimeZone.getTimeZone("Asia/Hong_Kong"));

TimeZone timeZone = TimeZone.getTimeZone(AMERICA_SAO_PAULO);
Calendar calendar = Calendar.getInstance(timeZone);

TimeZone timeZoneForVector = getTimeZoneForVector(vector);
TimeZone finalTimeZoneForResultWithoutCalendar =
ofNullable(getTimeZoneForVector(vector)).orElse(TimeZone.getTimeZone("Asia/Hong_Kong"));

accessorIterator.iterate(
vector,
(accessor, currentRow) -> {
final Timestamp resultWithoutCalendar = accessor.getTimestamp(null);
final Timestamp result = accessor.getTimestamp(calendar);

long offset =
(long) timeZone.getOffset(resultWithoutCalendar.getTime())
- timeZoneForVector.getOffset(resultWithoutCalendar.getTime());

assertThat(resultWithoutCalendar.getTime() - result.getTime(), is(offset));
assertThat(accessor.wasNull(), is(false));
assertOffsetIsConsistentWithAccessorGetters(
timeZone,
finalTimeZoneForResultWithoutCalendar,
result.getTime(),
resultWithoutCalendar.getTime(),
accessor);
});
TimeZone.setDefault(null);
}

@ParameterizedTest
Expand Down Expand Up @@ -230,20 +237,21 @@ public void testShouldGetDateReturnValidDateWithCalendar(Supplier<TimeStampVecto
TimeZone timeZone = TimeZone.getTimeZone(AMERICA_SAO_PAULO);
Calendar calendar = Calendar.getInstance(timeZone);

TimeZone timeZoneForVector = getTimeZoneForVector(vector);
TimeZone finalTimeZoneForResultWithoutCalendar =
ofNullable(getTimeZoneForVector(vector)).orElse(TimeZone.getDefault());

accessorIterator.iterate(
vector,
(accessor, currentRow) -> {
final Date resultWithoutCalendar = accessor.getDate(null);
final Date result = accessor.getDate(calendar);

long offset =
(long) timeZone.getOffset(resultWithoutCalendar.getTime())
- timeZoneForVector.getOffset(resultWithoutCalendar.getTime());

assertThat(resultWithoutCalendar.getTime() - result.getTime(), is(offset));
assertThat(accessor.wasNull(), is(false));
assertOffsetIsConsistentWithAccessorGetters(
timeZone,
finalTimeZoneForResultWithoutCalendar,
result.getTime(),
resultWithoutCalendar.getTime(),
accessor);
});
}

Expand Down Expand Up @@ -278,20 +286,21 @@ public void testShouldGetTimeReturnValidTimeWithCalendar(Supplier<TimeStampVecto
TimeZone timeZone = TimeZone.getTimeZone(AMERICA_SAO_PAULO);
Calendar calendar = Calendar.getInstance(timeZone);

TimeZone timeZoneForVector = getTimeZoneForVector(vector);
TimeZone finalTimeZoneForResultWithoutCalendar =
ofNullable(getTimeZoneForVector(vector)).orElse(TimeZone.getDefault());

accessorIterator.iterate(
vector,
(accessor, currentRow) -> {
final Time resultWithoutCalendar = accessor.getTime(null);
final Time result = accessor.getTime(calendar);

long offset =
(long) timeZone.getOffset(resultWithoutCalendar.getTime())
- timeZoneForVector.getOffset(resultWithoutCalendar.getTime());

assertThat(resultWithoutCalendar.getTime() - result.getTime(), is(offset));
assertThat(accessor.wasNull(), is(false));
assertOffsetIsConsistentWithAccessorGetters(
timeZone,
finalTimeZoneForResultWithoutCalendar,
result.getTime(),
resultWithoutCalendar.getTime(),
accessor);
});
}

Expand All @@ -314,8 +323,12 @@ private Timestamp getTimestampForVector(int currentRow, String timeZone) {
} else if (object instanceof Long) {
TimeUnit timeUnit = getTimeUnitForVector(vector);
long millis = timeUnit.toMillis((Long) object);
long offset = TimeZone.getTimeZone(timeZone).getOffset(millis);
expectedTimestamp = new Timestamp(millis + offset);

ZonedDateTime sourceTZDateTime =
LocalDateTime.ofInstant(
Instant.ofEpochMilli(millis), TimeZone.getTimeZone("UTC").toZoneId())
.atZone(TimeZone.getTimeZone(timeZone).toZoneId());
expectedTimestamp = new Timestamp(sourceTZDateTime.toInstant().toEpochMilli());
}
return expectedTimestamp;
}
Expand Down Expand Up @@ -373,4 +386,21 @@ private void assertGetStringIsConsistentWithVarCharAccessor(Calendar calendar) t
});
}
}

private void assertOffsetIsConsistentWithAccessorGetters(
TimeZone timeZone,
TimeZone finalTimeZoneForResultWithoutCalendar,
long result,
long resultWithoutCalendar,
ArrowFlightJdbcTimeStampVectorAccessor accessor) {
final TimeZone timeZoneForResult =
getTimeZoneForVector(vector) == null ? timeZone : finalTimeZoneForResultWithoutCalendar;

long offset =
timeZoneForResult.getOffset(result)
- finalTimeZoneForResultWithoutCalendar.getOffset(resultWithoutCalendar);

assertThat(resultWithoutCalendar - result, is(offset));
assertThat(accessor.wasNull(), is(false));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ public void testShouldGetOffsetWithDifferentTimeZone() {
TimeZone.setDefault(alternateTimezone);

try { // Trying to guarantee timezone returns to its original value
final long expectedEpochMillis = epochMillis + offset;
final long expectedEpochMillis = epochMillis - offset;
final long actualEpochMillis =
DateTimeUtils.applyCalendarOffset(epochMillis, Calendar.getInstance(defaultTimezone));
DateTimeUtils.applyCalendarOffset(epochMillis, Calendar.getInstance(alternateTimezone));

assertThat(actualEpochMillis, is(expectedEpochMillis));
} finally {
Expand Down