Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions sdks/python/apache_beam/io/tfrecordio.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@
import struct
from functools import partial

import crcmod

from apache_beam import coders
from apache_beam.io import filebasedsink
from apache_beam.io.filebasedsource import FileBasedSource
Expand All @@ -35,6 +33,16 @@
from apache_beam.io.iobase import Write
from apache_beam.transforms import PTransform

try:
import crcmod
except ImportError:
logging.warning(
'crcmod package not found. This package is required if '
'python-snappy or google-crc32c are not installed. To ensure crcmod is '
'installed, install the tfrecord extra: pip install '
'apache-beam[tfrecord]')
crcmod = None

__all__ = ['ReadFromTFRecord', 'ReadAllFromTFRecord', 'WriteToTFRecord']

_LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -67,6 +75,11 @@ def _default_crc32c_fn(value):
pass

if not _default_crc32c_fn.fn:
if crcmod is None:
raise RuntimeError(
'Could not find python-snappy, google-crc32c, or crcmod. To allow '
'execution to succeed, make sure that one of these packages is '
'installed or pip install apache-beam[tfrecord]')
_LOGGER.warning(
'Couldn\'t find python-snappy or google-crc32c so the '
'implementation of _TFRecordUtil._masked_crc32c is not as fast '
Expand Down
7 changes: 6 additions & 1 deletion sdks/python/apache_beam/io/tfrecordio_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
import zlib
from datetime import datetime

import crcmod
import pytz

import apache_beam as beam
Expand Down Expand Up @@ -61,6 +60,11 @@
tf = None # pylint: disable=invalid-name
logging.warning('Tensorflow is not installed, so skipping some tests.')

try:
import crcmod
except ImportError:
crcmod = None

# Created by running following code in python:
# >>> import tensorflow as tf
# >>> import base64
Expand Down Expand Up @@ -121,6 +125,7 @@ def test_masked_crc32c(self):
0xe4999b0,
_TFRecordUtil._masked_crc32c(b'\x03\x00\x00\x00\x00\x00\x00\x00'))

@unittest.skipIf(crcmod is None, 'crcmod not installed.')
def test_masked_crc32c_crcmod(self):
crc32c_fn = crcmod.predefined.mkPredefinedCrcFun('crc-32c')
self.assertEqual(
Expand Down
6 changes: 3 additions & 3 deletions sdks/python/container/common.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def generatePythonRequirements = tasks.register("generatePythonRequirements") {
"${files(configurations.sdkSourceTarball.files).singleFile} " +
"base_image_requirements.txt " +
"container " +
"[gcp,dataframe,test] " +
"[gcp,dataframe,test,tfrecord] " +
"${pipExtraOptions}"
}
// Generate versions for ML dependencies
Expand All @@ -53,7 +53,7 @@ def generatePythonRequirements = tasks.register("generatePythonRequirements") {
"${files(configurations.sdkSourceTarball.files).singleFile} " +
"base_image_requirements.txt " +
"container/ml " +
"[gcp,dataframe,test,ml_cpu] " +
"[gcp,dataframe,test,ml_cpu,tfrecord] " +
"${pipExtraOptions}"
}
// TODO(https://github.com/apache/beam/issues/36637)
Expand All @@ -73,7 +73,7 @@ def generatePythonRequirements = tasks.register("generatePythonRequirements") {
"${files(configurations.sdkSourceTarball.files).singleFile} " +
"gpu_image_requirements.txt " +
"container/ml " +
"[gcp,dataframe,test,tensorflow,torch,transformers,vllm] " +
"[gcp,dataframe,test,tensorflow,tfrecord,torch,transformers,vllm] " +
"${pipExtraOptions}"
}
}
Expand Down
2 changes: 1 addition & 1 deletion sdks/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,6 @@ def get_portability_package_data():
},
ext_modules=extensions,
install_requires=[
'crcmod>=1.7,<2.0',
'cryptography>=39.0.0,<48.0.0',
'fastavro>=0.23.6,<2',
'fasteners>=0.3,<1.0',
Expand Down Expand Up @@ -596,6 +595,7 @@ def get_portability_package_data():
,
'dill'
],
'tfrecord': ['crcmod>=1.7,<2.0'],
'onnx': [
'onnxruntime==1.13.1',
'torch==1.13.1',
Expand Down
2 changes: 1 addition & 1 deletion sdks/python/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ pip_pre = True
# allow apps that support color to use it.
passenv=TERM,CLOUDSDK_CONFIG,DOCKER_*,TESTCONTAINERS_*,TC_*,ALLOYDB_PASSWORD
# Set [] options for pip installation of apache-beam tarball.
extras = test,dataframe,yaml
extras = test,dataframe,tfrecord,yaml
# Don't warn that these commands aren't installed.
allowlist_externals =
false
Expand Down
Loading