Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/source/python/parquet.rst
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,9 @@ also supported:
Snappy generally results in better performance, while Gzip may yield smaller
files.

``'lz4_raw'`` is also accepted as an alias for ``'lz4'``. Both use the
LZ4_RAW codec as defined in the Parquet specification.

These settings can also be set on a per-column basis:

.. code-block:: python
Expand Down
4 changes: 2 additions & 2 deletions python/pyarrow/_parquet.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1524,7 +1524,7 @@ cdef compression_name_from_enum(ParquetCompression compression_):

cdef int check_compression_name(name) except -1:
if name.upper() not in {'NONE', 'SNAPPY', 'GZIP', 'LZO', 'BROTLI', 'LZ4',
'ZSTD'}:
'LZ4_RAW', 'ZSTD'}:
raise ArrowException("Unsupported compression: " + name)
return 0

Expand All @@ -1539,7 +1539,7 @@ cdef ParquetCompression compression_from_name(name):
return ParquetCompression_LZO
elif name == 'BROTLI':
return ParquetCompression_BROTLI
elif name == 'LZ4':
elif name == 'LZ4' or name == 'LZ4_RAW':
return ParquetCompression_LZ4
elif name == 'ZSTD':
return ParquetCompression_ZSTD
Expand Down
4 changes: 3 additions & 1 deletion python/pyarrow/parquet/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,9 @@ def _sanitize_table(table, new_schema, flavor):
doesn't support dictionary encoding.
compression : str or dict, default 'snappy'
Specify the compression codec, either on a general basis or per-column.
Valid values: {'NONE', 'SNAPPY', 'GZIP', 'BROTLI', 'LZ4', 'ZSTD'}.
Valid values: {'NONE', 'SNAPPY', 'GZIP', 'BROTLI', 'LZ4', 'LZ4_RAW', 'ZSTD'}.
'LZ4_RAW' is accepted as an alias for 'LZ4' (both use the LZ4_RAW
codec as defined in the Parquet specification).
write_statistics : bool or list, default True
Specify if we should write statistics in general (default is True) or only
for some columns.
Expand Down
8 changes: 8 additions & 0 deletions python/pyarrow/tests/parquet/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,14 @@ def test_compression_level():
compression_level=level)


def test_lz4_raw_compression_alias():
# GH-41863: lz4_raw should be accepted as a compression name alias
arr = pa.array(list(map(int, range(1000))))
table = pa.Table.from_arrays([arr, arr], names=['a', 'b'])
_check_roundtrip(table, expected=table, compression="lz4_raw")
_check_roundtrip(table, expected=table, compression="LZ4_RAW")


def test_sanitized_spark_field_names():
a0 = pa.array([0, 1, 2, 3, 4])
name = 'prohib; ,\t{}'
Expand Down
Loading