Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -114,23 +114,34 @@ private static void clearTimeFormats() {

public CsvEnumerator(Source source, AtomicBoolean cancelFlag,
List<RelDataType> fieldTypes, List<Integer> fields) {
this(source, cancelFlag, fieldTypes, fields, ',');
}

public CsvEnumerator(Source source, AtomicBoolean cancelFlag,
List<RelDataType> fieldTypes, List<Integer> fields, char separator) {
//noinspection unchecked
this(source, cancelFlag, false, null,
(RowConverter<E>) converter(fieldTypes, fields));
(RowConverter<E>) converter(fieldTypes, fields), separator);
}

public CsvEnumerator(Source source, AtomicBoolean cancelFlag, boolean stream,
@Nullable String @Nullable [] filterValues, RowConverter<E> rowConverter) {
this(source, cancelFlag, stream, filterValues, rowConverter, ',');
}

public CsvEnumerator(Source source, AtomicBoolean cancelFlag, boolean stream,
@Nullable String @Nullable [] filterValues, RowConverter<E> rowConverter,
char separator) {
this.cancelFlag = cancelFlag;
this.rowConverter = rowConverter;
this.filterValues =
filterValues == null ? null
: ImmutableNullableList.copyOf(filterValues);
try {
if (stream) {
this.reader = new CsvStreamReader(source);
this.reader = new CsvStreamReader(source, separator);
} else {
this.reader = openCsv(source);
this.reader = openCsv(source, separator);
}
this.reader.readNext(); // skip header row
} catch (IOException e) {
Expand All @@ -157,13 +168,21 @@ private static RowConverter<?> converter(List<RelDataType> fieldTypes,
* of a CSV file. */
public static RelDataType deduceRowType(JavaTypeFactory typeFactory,
Source source, @Nullable List<RelDataType> fieldTypes, Boolean stream) {
return deduceRowType(typeFactory, source, fieldTypes, stream, ',');
}

/** Deduces the names and types of a table's columns by reading the first line
* of a CSV file. */
public static RelDataType deduceRowType(JavaTypeFactory typeFactory,
Source source, @Nullable List<RelDataType> fieldTypes, Boolean stream,
char separator) {
final List<RelDataType> types = new ArrayList<>();
final List<String> names = new ArrayList<>();
if (stream) {
names.add(FileSchemaFactory.ROWTIME_COLUMN_NAME);
types.add(typeFactory.createSqlType(SqlTypeName.TIMESTAMP));
}
try (CSVReader reader = openCsv(source)) {
try (CSVReader reader = openCsv(source, separator)) {
String[] strings = reader.readNext();
if (strings == null) {
strings = new String[]{"EmptyFileHasNoColumns:boolean"};
Expand Down Expand Up @@ -247,9 +266,9 @@ public static RelDataType deduceRowType(JavaTypeFactory typeFactory,
return typeFactory.createStructType(Pair.zip(names, types));
}

static CSVReader openCsv(Source source) throws IOException {
static CSVReader openCsv(Source source, char separator) throws IOException {
requireNonNull(source, "source");
return new CSVReader(source.reader());
return new CSVReader(source.reader(), separator);
}

@Override public E current() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ class CsvStreamReader extends CSVReader implements Closeable {
*/
public static final long DEFAULT_MONITOR_DELAY = 2000;

CsvStreamReader(Source source) {
CsvStreamReader(Source source, char separator) {
this(source,
CSVParser.DEFAULT_SEPARATOR,
separator,
CSVParser.DEFAULT_QUOTE_CHARACTER,
CSVParser.DEFAULT_ESCAPE_CHARACTER,
Comment thread
Diveyam-Mishra marked this conversation as resolved.
DEFAULT_SKIP_LINES,
Expand Down Expand Up @@ -106,7 +106,7 @@ private CsvStreamReader(Source source, char separator, char quoteChar,
/**
* Reads the next line from the buffer and converts to a string array.
*
* @return a string array with each comma-separated element as a separate entry.
* @return a string array with each delimited element as a separate entry.
*
* @throws IOException if bad things happen during the read
*/
Expand Down
11 changes: 7 additions & 4 deletions file/src/main/java/org/apache/calcite/adapter/file/CsvTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,16 @@
public abstract class CsvTable extends AbstractTable {
protected final Source source;
protected final @Nullable RelProtoDataType protoRowType;
protected final char separator;
private @Nullable RelDataType rowType;
private @Nullable List<RelDataType> fieldTypes;

/** Creates a CsvTable. */
CsvTable(Source source, @Nullable RelProtoDataType protoRowType) {
/** Creates a CsvTable with a custom separator. */
CsvTable(Source source, @Nullable RelProtoDataType protoRowType,
char separator) {
this.source = source;
this.protoRowType = protoRowType;
this.separator = separator;
}

@Override public RelDataType getRowType(RelDataTypeFactory typeFactory) {
Expand All @@ -53,7 +56,7 @@ public abstract class CsvTable extends AbstractTable {
if (rowType == null) {
rowType =
CsvEnumerator.deduceRowType((JavaTypeFactory) typeFactory, source,
null, isStream());
null, isStream(), separator);
}
return rowType;
}
Expand All @@ -63,7 +66,7 @@ public List<RelDataType> getFieldTypes(RelDataTypeFactory typeFactory) {
if (fieldTypes == null) {
fieldTypes = new ArrayList<>();
CsvEnumerator.deduceRowType((JavaTypeFactory) typeFactory, source,
fieldTypes, isStream());
fieldTypes, isStream(), separator);
}
return fieldTypes;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import org.apache.calcite.util.Source;
import org.apache.calcite.util.Sources;

import au.com.bytecode.opencsv.CSVParser;

import org.checkerframework.checker.nullness.qual.Nullable;

import java.io.File;
Expand All @@ -50,6 +52,17 @@ public CsvTableFactory() {
final Source source = Sources.file(base, fileName);
final RelProtoDataType protoRowType =
rowType != null ? RelDataTypeImpl.proto(rowType) : null;
return new CsvTranslatableTable(source, protoRowType);
final String separatorStr = (String) operand.get("separator");
final char separator;
if (separatorStr == null) {
separator = CSVParser.DEFAULT_SEPARATOR;
} else if (separatorStr.length() == 1) {
separator = separatorStr.charAt(0);
} else {
throw new IllegalArgumentException(
"Invalid separator '" + separatorStr
+ "'. Separator must be a single character.");
}
return new CsvTranslatableTable(source, protoRowType, separator);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,10 @@
*/
public class CsvTranslatableTable extends CsvTable
implements QueryableTable, TranslatableTable {
/** Creates a CsvTable. */
CsvTranslatableTable(Source source, @Nullable RelProtoDataType protoRowType) {
super(source, protoRowType);
/** Creates a CsvTranslatableTable with a custom separator. */
CsvTranslatableTable(Source source, @Nullable RelProtoDataType protoRowType,
char separator) {
super(source, protoRowType, separator);
}

@Override public String toString() {
Expand All @@ -65,7 +66,8 @@ public Enumerable<Object> project(final DataContext root,
@Override public Enumerator<Object> enumerator() {
JavaTypeFactory typeFactory = root.getTypeFactory();
return new CsvEnumerator<>(source, cancelFlag,
getFieldTypes(typeFactory), ImmutableIntList.of(fields));
getFieldTypes(typeFactory), ImmutableIntList.of(fields),
separator);
}
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import org.apache.calcite.util.Sources;
import org.apache.calcite.util.Util;

import au.com.bytecode.opencsv.CSVParser;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;

Expand Down Expand Up @@ -142,7 +144,8 @@ private static boolean addTable(ImmutableMap.Builder<String, Table> builder,
}
final Source sourceSansCsv = sourceSansGz.trimOrNull(".csv");
if (sourceSansCsv != null) {
final Table table = new CsvTranslatableTable(source, null);
final Table table =
new CsvTranslatableTable(source, null, CSVParser.DEFAULT_SEPARATOR);
builder.put(Util.first(tableName, sourceSansCsv.path()), table);
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,91 @@ private static void checkEmpty(ResultSet resultSet) {
sql("model-with-custom-table", "select * from CUSTOM_TABLE.EMPS").ok();
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-4460">[CALCITE-4460]
* Support custom delimiter when parsing CSV tables</a>.
*
* <p>Reads a pipe-delimited file via CsvTableFactory with a custom
* separator. */
@Test void testCsvCustomSeparatorPipe() {
final String sql = "select * from CUSTOM_SEPARATOR.PIPE_DEPTS";
sql("custom-separator", sql)
.returns("DEPTNO=10; NAME=Sales",
"DEPTNO=20; NAME=Marketing",
"DEPTNO=30; NAME=Accounts",
"DEPTNO=40; NAME=tic|tac|toe")
.ok();
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-4460">[CALCITE-4460]
* Support custom delimiter when parsing CSV tables</a>.
*
* <p>Verifies quoted content is parsed correctly when it contains the custom
* separator character. */
@Test void testCsvCustomSeparatorEscaping() {
final String sql = "select * from CUSTOM_SEPARATOR.PIPE_DEPTS "
+ "where NAME = 'tic|tac|toe'";
sql("custom-separator", sql)
.returns("DEPTNO=40; NAME=tic|tac|toe")
.ok();
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-4460">[CALCITE-4460]
* Support custom delimiter when parsing CSV tables</a>.
*
* <p>Verifies that a multi-character separator is rejected. */
@Test void testCsvCustomSeparatorInvalidMultiChar() throws SQLException {
Properties info = new Properties();
info.put("model",
"inline:"
+ "{\n"
+ " version: '1.0',\n"
+ " defaultSchema: 'TEST',\n"
+ " schemas: [\n"
+ " {\n"
+ " name: 'TEST',\n"
+ " tables: [\n"
+ " {\n"
+ " name: 'BAD',\n"
+ " type: 'custom',\n"
+ " factory: 'org.apache.calcite.adapter.file.CsvTableFactory',\n"
+ " operand: {\n"
+ " file: 'sales-csv/DEPTS.csv',\n"
+ " separator: '||'\n"
+ " }\n"
+ " }\n"
+ " ]\n"
+ " }\n"
+ " ]\n"
+ "}");
try {
Connection connection =
DriverManager.getConnection("jdbc:calcite:", info);
connection.close();
throw new AssertionError("expected error");
} catch (RuntimeException e) {
Throwable cause = e;
while (cause.getCause() != null) {
cause = cause.getCause();
}
assertThat(cause.getMessage(),
is("Invalid separator '||'. "
+ "Separator must be a single character."));
}
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-4460">[CALCITE-4460]
* Support custom delimiter when parsing CSV tables</a>.
*
* <p>Verifies that omitting the separator defaults to comma. */
@Test void testCsvDefaultSeparatorBackwardCompat() {
final String sql = "select * from CUSTOM_TABLE.EMPS";
sql("model-with-custom-table", sql).ok();
}

@Test void testPushDownProject() {
final String sql = "explain plan for select * from EMPS";
final String expected = "PLAN=CsvTableScan(table=[[SALES, EMPS]], "
Expand Down
36 changes: 36 additions & 0 deletions file/src/test/resources/custom-separator.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
{
"version": "1.0",
"defaultSchema": "CUSTOM_SEPARATOR",
"schemas": [
{
"name": "CUSTOM_SEPARATOR",
"tables": [
{
"name": "PIPE_DEPTS",
"type": "custom",
"factory": "org.apache.calcite.adapter.file.CsvTableFactory",
"operand": {
"file": "sales-csv/PIPE_DELIMITED.csv",
"separator": "|"
}
}
]
}
]
}
5 changes: 5 additions & 0 deletions file/src/test/resources/sales-csv/PIPE_DELIMITED.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
DEPTNO:int|NAME:string
10|"Sales"
20|"Marketing"
30|"Accounts"
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add a test record containing an escape character? I'd like to verify whether the custom delimiter and escape character function correctly in combination. Additionally, I had another thought: regarding support for custom escape characters—I'm not sure if Calcite supports this, though naturally, this doesn't necessarily need to be implemented in the current PR. Overall, this PR looks good.

40|"tic|tac|toe"
20 changes: 20 additions & 0 deletions site/_docs/file_adapter.md
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,26 @@ sqlline> select distinct deptno from depts;
3 rows selected (0.985 seconds)
{% endhighlight %}

### CSV Custom Separator

When using `CsvTableFactory` to define a table in a model, you can specify an
optional `separator` operand to use a custom delimiter.

{% highlight json %}
{
"name": "PIPE_DEPTS",
"type": "custom",
"factory": "org.apache.calcite.adapter.file.CsvTableFactory",
"operand": {
"file": "sales-csv/PIPE_DELIMITED.csv",
"separator": "|"
}
}
{% endhighlight %}

The separator must be a single character. If not specified, it defaults to a
comma.

## JSON files and model-free browsing

Some files describe their own schema, and for these files, we do not need a model. For example, `DEPTS.json` has an integer `DEPTNO` column and a string `NAME` column:
Expand Down
Loading