Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 14 additions & 9 deletions sdks/python/container/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ ARG TARGETOS
ARG TARGETARCH

COPY target/base_image_requirements.txt /tmp/base_image_requirements.txt
COPY target/license_scripts/upgrade_bundled_pip.py /tmp/upgrade_bundled_pip.py
COPY target/apache-beam.tar.gz /opt/apache/beam/tars/
COPY target/launcher/${TARGETOS}_${TARGETARCH}/boot target/LICENSE target/NOTICE target/LICENSE.python /opt/apache/beam/

Expand Down Expand Up @@ -84,16 +85,20 @@ RUN \
# Remove pip cache.
rm -rf /root/.cache/pip && \

# Update ensurepip to use most recent versions of setuptools and pip. This avoids some vulnerabilities which won't be fixed on older versions of python.
pip install upgrade_ensurepip; \
python3 -m upgrade_ensurepip; \
# setuptools is not bundled with ensurepip in Python 3.12+
# Update ensurepip bundled wheels to patched pip/setuptools (security).
# setuptools is not bundled with ensurepip in Python 3.12+; upgrade_ensurepip expects both.
if [ "${py_version}" = "3.10" ] || [ "${py_version}" = "3.11" ]; then \
find /usr/local/lib/python${py_version}/ensurepip/_bundled/setuptools-* -type f ! -name $(basename $(ls -v /usr/local/lib/python${py_version}/ensurepip/_bundled/setuptools-*-py3-none-any.whl | tail -n 1)) -delete; \
fi; \
find /usr/local/lib/python${py_version}/ensurepip/_bundled/pip-* -type f ! -name $(basename $(ls -v /usr/local/lib/python${py_version}/ensurepip/_bundled/pip-*-py3-none-any.whl | tail -n 1)) -delete; \
pip uninstall upgrade_ensurepip -y; \
python3 -m ensurepip;
pip install upgrade_ensurepip && \
python3 -m upgrade_ensurepip && \
find /usr/local/lib/python${py_version}/ensurepip/_bundled/setuptools-* -type f ! -name $(basename $(ls -v /usr/local/lib/python${py_version}/ensurepip/_bundled/setuptools-*-py3-none-any.whl | tail -n 1)) -delete && \
find /usr/local/lib/python${py_version}/ensurepip/_bundled/pip-* -type f ! -name $(basename $(ls -v /usr/local/lib/python${py_version}/ensurepip/_bundled/pip-*-py3-none-any.whl | tail -n 1)) -delete && \
pip uninstall upgrade_ensurepip -y && \
python3 -m ensurepip; \
else \
python3 /tmp/upgrade_bundled_pip.py && \
find /usr/local/lib/python${py_version}/ensurepip/_bundled/pip-* -type f ! -name $(basename $(ls -v /usr/local/lib/python${py_version}/ensurepip/_bundled/pip-*-py3-none-any.whl | tail -n 1)) -delete && \
python3 -m ensurepip; \
fi
Comment on lines 90 to +101
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Instead of maintaining separate logic for Python 3.10/3.11 (using the external upgrade_ensurepip package) and Python 3.12+ (using a custom script), we can unify both cases by extending upgrade_bundled_pip.py to handle setuptools when it is present. This completely eliminates the external upgrade_ensurepip dependency, avoids complex shell-based find/ls -v cleanup commands, and simplifies the Dockerfile significantly.

    python3 /tmp/upgrade_bundled_pip.py && \
    python3 -m ensurepip;


ENTRYPOINT ["/opt/apache/beam/boot"]

Expand Down
65 changes: 65 additions & 0 deletions sdks/python/container/license_scripts/upgrade_bundled_pip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""
Upgrade the pip wheel bundled in ensurepip for Python 3.12+.

The script is executed within Docker after the image pip has been upgraded.
upgrade_ensurepip expects setuptools to be bundled as well, but CPython 3.12+
only ships pip in ensurepip/_bundled.
"""

import subprocess
import sys
from pathlib import Path

import ensurepip


def main():
ep_path = Path(ensurepip.__file__)
wheel_dir = ep_path.parent / '_bundled'
pip_version = subprocess.check_output(
[sys.executable, '-m', 'pip', '--version'],
text=True).split()[1]
subprocess.check_call([
sys.executable,
'-m',
'pip',
'download',
'pip=={}'.format(pip_version),
'-d',
str(wheel_dir),
'--no-deps',
])
lines = ep_path.read_text().splitlines()
pip_line = None
for idx, line in enumerate(lines):
if line.startswith('_PIP_VERSION = '):
pip_line = idx
break
if pip_line is None:
sys.exit('ensurepip _PIP_VERSION not found')
org = ep_path.with_suffix('.py.org')
if not org.exists():
ep_path.rename(org)
lines[pip_line] = '_PIP_VERSION = "{}"'.format(pip_version)
ep_path.write_text('\n'.join(lines) + '\n')


if __name__ == '__main__':
main()
Comment on lines +33 to +65
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

We can enhance upgrade_bundled_pip.py to support both pip and setuptools (if bundled), and handle the cleanup of old wheels directly in Python. This makes the script fully self-contained, robust, and reusable across all Python versions (3.10+), eliminating the need for complex shell logic in the Dockerfile.

def main():
  ep_path = Path(ensurepip.__file__)
  wheel_dir = ep_path.parent / '_bundled'
  
  # Get current pip version
  pip_version = subprocess.check_output(
      [sys.executable, '-m', 'pip', '--version'],
      text=True).split()[1]
  
  # Download new pip wheel
  subprocess.check_call([
      sys.executable,
      '-m',
      'pip',
      'download',
      f'pip=={pip_version}',
      '-d',
      str(wheel_dir),
      '--no-deps',
  ])
  
  org = ep_path.with_suffix('.py.org')
  if org.exists():
    lines = org.read_text().splitlines()
  else:
    lines = ep_path.read_text().splitlines()
    ep_path.rename(org)
  
  # Update pip version in ensurepip/__init__.py
  pip_line = None
  for idx, line in enumerate(lines):
    if line.startswith('_PIP_VERSION = '):
      pip_line = idx
      break
  if pip_line is None:
    sys.exit('ensurepip _PIP_VERSION not found')
  lines[pip_line] = f'_PIP_VERSION = "{pip_version}"'
  
  # Check if setuptools is also bundled (Python < 3.12)
  setuptools_line = None
  setuptools_version = None
  for idx, line in enumerate(lines):
    if line.startswith('_SETUPTOOLS_VERSION = '):
      setuptools_line = idx
      break
      
  if setuptools_line is not None:
    try:
      setuptools_version = subprocess.check_output(
          [sys.executable, '-c', 'import setuptools; print(setuptools.__version__)'],
          text=True).strip()
      subprocess.check_call([
          sys.executable,
          '-m',
          'pip',
          'download',
          f'setuptools=={setuptools_version}',
          '-d',
          str(wheel_dir),
          '--no-deps',
      ])
      lines[setuptools_line] = f'_SETUPTOOLS_VERSION = "{setuptools_version}"'
    except Exception as e:
      print(f"Failed to upgrade bundled setuptools: {e}", file=sys.stderr)
    
  # Write modified ensurepip/__init__.py
  ep_path.write_text('\n'.join(lines) + '\n')
  
  # Clean up old wheels in _bundled
  for path in wheel_dir.glob('pip-*.whl'):
    if pip_version not in path.name:
      path.unlink()
      
  if setuptools_version is not None:
    for path in wheel_dir.glob('setuptools-*.whl'):
      if setuptools_version not in path.name:
        path.unlink()


if __name__ == '__main__':
  main()

Loading