Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion doc/user/content/sql/create-source/mysql.md
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,10 @@ CREATE SOURCE mz_source

If you're replicating tables that use [data types unsupported](#supported-types)
by Materialize, use the `TEXT COLUMNS` option to decode data as `text` for the
affected columns. This option expects the upstream fully-qualified names of the
affected columns. `TEXT COLUMNS` should also be used for columns that contain
MySQL zero-value `DATE`, `DATETIME`, or `TIMESTAMP` data.

This option expects the upstream fully-qualified names of the
replicated table and column (i.e. as defined in your MySQL database).

```mzsql
Expand Down
14 changes: 14 additions & 0 deletions doc/user/data/mysql_source_details.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,20 @@
- Use the [`EXCLUDE COLUMNS`](/sql/create-source/mysql/#excluding-columns)
option to exclude any columns that contain unsupported data types.

#### Zero values for `date`, `datetime`, and `timestamp`

MySQL allows the special "zero" values `0000-00-00`, `0000-00-00
00:00:00` in `date`, `datetime`, and `timestamp` columns when the server
`sql_mode` does not include `NO_ZERO_DATE` or `NO_ZERO_IN_DATE`. These
values are not representable in Materialize's corresponding native types,
so they will cause ingestion to fail for the affected column.

To ingest columns that contain zero values, use [`TEXT
COLUMNS`](/sql/create-source/mysql/#handling-unsupported-types) to
decode the affected columns as `text`. The zero values for `date`,
`datetime`, `timestamp`, and `year` are preserved verbatim as strings
(e.g. `"0000-00-00 00:00:00"`, `"0000"`).

- name: mysql-truncation-restriction
content: |
Avoid truncating upstream tables that are being replicated into Materialize.
Expand Down
12 changes: 10 additions & 2 deletions src/mysql-util/src/decoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,8 +361,16 @@ fn pack_val_as_datum(
}
}
Some(MySqlColumnMeta::Year) => {
let val = from_value_opt::<u16>(value)?;
packer.push(Datum::String(&val.to_string()));
let mut val = from_value_opt::<u16>(value)?;
// mysql_common incorrectly handles MySQL YEAR type, which has a valid range
// of 1901-2155 (https://dev.mysql.com/doc/refman/8.0/en/year.html)
//
// We treat the value 1900 as the zero-value year - "0000"
// https://github.com/blackbeam/rust_mysql_common/blob/v0.35.5/src/binlog/value.rs#L124-L129
if val == 1900 {
val = 0;
}
packer.push(Datum::String(&format!("{val:04}")));
}
Some(MySqlColumnMeta::Date) => {
// Some MySQL dates are invalid in chrono/NaiveDate (e.g. 0000-00-00), so
Expand Down
5 changes: 4 additions & 1 deletion test/mysql-cdc/30-text-columns.td
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@ $ mysql-connect name=mysql url=mysql://root@mysql password=${arg.mysql-root-pass

# Insert data into MySQL that can't be decoded using native types and must be decoded
# as a TEXT COLUMN. DATE-type coverage lives in text-columns-date.td;
# TIMESTAMP/DATETIME coverage lives in text-columns-timestamp.td.
# TIMESTAMP/DATETIME coverage lives in text-columns-timestamp.td;
# YEAR coverage (including the zero-year sentinel) lives in text-columns-year.td.
# The YEAR usage retained below is intentional, as part of the multi-column
# TEXT COLUMNS integration check (combined-clause SHOW CREATE TABLE rewrite).

$ mysql-execute name=mysql
DROP DATABASE IF EXISTS public;
Expand Down
15 changes: 15 additions & 0 deletions test/mysql-cdc/text-columns-date.td
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,18 @@ COMMIT;
WITH (TEXT COLUMNS = (event_date));
> COMMIT

# Block until the snapshot is fully ingested before issuing the
# post-snapshot inserts, so those rows go through the binlog decode
# path rather than being absorbed into the snapshot.
> SELECT id, event_date FROM events ORDER BY id;
1 "2024-04-03"
2 "0000-00-00"
3 <null>
4 "1000-01-01"
5 "9999-12-31"
11 "2024-00-01"
12 "2024-01-00"

# Post-snapshot rows exercise the replication / binlog decode path.
$ mysql-execute name=mysql
USE public;
Expand Down Expand Up @@ -136,6 +148,9 @@ INSERT INTO reports VALUES (1, '2024-04-03');
> CREATE TABLE reports FROM SOURCE da (REFERENCE public.reports);
> COMMIT

# Block until the snapshot lands before issuing the binlog-path insert,
# so the zero-date below is decoded as a replication event, not a
# snapshot row.
> SELECT * FROM reports;
1 "2024-04-03"

Expand Down
10 changes: 10 additions & 0 deletions test/mysql-cdc/text-columns-timestamp.td
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,16 @@ COMMIT;
WITH (TEXT COLUMNS = (created_at, updated_at, archived_at, born_at, mid_at));
> COMMIT

# Block until the snapshot is fully ingested before issuing the
# post-snapshot inserts, so those rows go through the binlog decode
# path rather than being absorbed into the snapshot.
> SELECT id, created_at, updated_at, archived_at, born_at, mid_at FROM products ORDER BY id;
1 "2024-04-03 10:15:13" "2024-04-03 10:15:13.123456" "2024-04-03 10:15:13" "2024-04-03 10:15:13.123456" "2024-04-03 10:15:13.1234"
2 "0000-00-00 00:00:00" "0000-00-00 00:00:00.000000" "0000-00-00 00:00:00" "0000-00-00 00:00:00.000000" "0000-00-00 00:00:00.0000"
3 <null> <null> <null> <null> <null>
7 <null> <null> "1001-01-01 00:00:00" "1001-01-01 00:00:00.000001" "1001-01-01 00:00:00.0001"
8 <null> <null> "9999-12-31 23:59:59" "9999-12-31 23:59:59.999999" "9999-12-31 23:59:59.9999"

# Post-snapshot rows exercise the replication / binlog decode path,
# which uses a different mysql_common::Value variant than the snapshot.
$ mysql-execute name=mysql
Expand Down
120 changes: 120 additions & 0 deletions test/mysql-cdc/text-columns-year.td
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# Copyright Materialize, Inc. and contributors. All rights reserved.
#
# Use of this software is governed by the Business Source License
# included in the LICENSE file at the root of this repository.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0.

#
# Regression test for MySQL YEAR columns ingested via TEXT COLUMNS,
# with a mix of valid values and the zero-year sentinel.
#
# YEAR is one of the MySQL types that cannot be ingested natively in
# Materialize (see schemas.rs: YEAR is only mapped in parse_as_text_column,
# not in the native parser). CREATE TABLE FROM SOURCE on a YEAR column
# without TEXT COLUMNS errors with "unsupported type"; declaring the column
# in TEXT COLUMNS is the documented workaround.
#
# Per https://dev.mysql.com/doc/refman/8.0/en/year.html:
# * YEAR range: 1901 to 2155
# * Zero value: 0000 (allowed when sql_mode lacks NO_ZERO_DATE)
#
# YEAR values are rendered zero-padded to four digits, matching the
# literal MySQL form and the DATE/TIMESTAMP zero-value convention
# ("0000-00-00", "0000-00-00 00:00:00"). The binlog decode path
# additionally remaps mysql_common's 1900-on-the-wire representation
# of the zero-year back to 0; the snapshot row (id 2) and the binlog
# row (id 7) below pin both paths.

> CREATE SECRET mysqlpass AS '${arg.mysql-root-password}'

> CREATE CONNECTION mysqc TO MYSQL (
HOST mysql,
USER root,
PASSWORD SECRET mysqlpass
)

$ mysql-connect name=mysql url=mysql://root@mysql password=${arg.mysql-root-password}

# sql_mode = '' is required so MySQL accepts the zero-year that motivates
# the use of TEXT COLUMNS in the first place.
$ mysql-execute name=mysql
DROP DATABASE IF EXISTS public;
CREATE DATABASE public;
USE public;
SET SESSION sql_mode = '';
CREATE TABLE events (id INT PRIMARY KEY, event_year YEAR NULL);
START TRANSACTION;
INSERT INTO events VALUES (1, '2024'), (2, '0000'), (3, NULL);
# Boundary rows: min and max valid YEAR values.
INSERT INTO events VALUES (4, '1901'), (5, '2155');
COMMIT;

> BEGIN
> CREATE SOURCE da
FROM MYSQL CONNECTION mysqc;
> CREATE TABLE events FROM SOURCE da (REFERENCE public.events)
WITH (TEXT COLUMNS = (event_year));
> COMMIT

> SELECT id, event_year FROM events ORDER BY id;
1 "2024"
2 "0000"
3 <null>
4 "1901"
5 "2155"

# Post-snapshot rows exercise the replication / binlog decode path.
$ mysql-execute name=mysql
USE public;
SET SESSION sql_mode = '';
START TRANSACTION;
INSERT INTO events VALUES (6, '2025'), (7, 0), (8, NULL);
INSERT INTO events VALUES (9, '1901'), (10, '2155');
COMMIT;

> SELECT id, event_year FROM events ORDER BY id;
1 "2024"
2 "0000"
3 <null>
4 "1901"
5 "2155"
6 "2025"
7 "0000"
8 <null>
9 "1901"
10 "2155"

# Verify the column type was rewritten to text by TEXT COLUMNS.
> SELECT pg_typeof(event_year) FROM events LIMIT 1;
text

# None of the data above should have caused the source to go into a stalled state.
> SELECT name, status, error IS NULL FROM mz_internal.mz_source_statuses WHERE name IN ('da', 'events') ORDER BY name;
da running true
events running true

> DROP SOURCE da CASCADE;

#
# Negative path: a YEAR column that is NOT declared as a TEXT COLUMN cannot be
# ingested. YEAR has no native mapping in Materialize, so CREATE TABLE FROM
# SOURCE must error. This guards the documented workaround: declare YEAR
# columns in TEXT COLUMNS.
#

$ mysql-execute name=mysql
DROP DATABASE IF EXISTS public;
CREATE DATABASE public;
USE public;
CREATE TABLE reports (id INT PRIMARY KEY, reported_year YEAR NULL);
INSERT INTO reports VALUES (1, '2024');

> CREATE SOURCE da
FROM MYSQL CONNECTION mysqc;
! CREATE TABLE reports FROM SOURCE da (REFERENCE public.reports);
contains: unsupported type

> DROP SOURCE da CASCADE;
Loading