Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
279 changes: 279 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,158 @@ jobs:
flags: amber
fail_ci_if_error: false

amber-rest:
# Re-runs the amber/common Scala tests against an Iceberg REST catalog
# (Lakekeeper backed by MinIO) so the REST code paths exercised by
# IcebergUtil.createRestCatalog and friends get end-to-end coverage.
# The existing `amber` job continues to cover the Postgres-catalog path.
if: ${{ inputs.run_amber }}
runs-on: ubuntu-22.04
env:
JAVA_OPTS: -Xms2048M -Xmx2048M -Xss6M -XX:ReservedCodeCacheSize=256M -Dfile.encoding=UTF-8
JVM_OPTS: -Xms2048M -Xmx2048M -Xss6M -XX:ReservedCodeCacheSize=256M -Dfile.encoding=UTF-8
services:
postgres:
image: postgres
env:
POSTGRES_PASSWORD: postgres
ports:
- 5432:5432
options: >-
--health-cmd="pg_isready -U postgres"
--health-interval=10s
--health-timeout=5s
--health-retries=5
steps:
- name: Checkout
uses: actions/checkout@v5
with:
ref: ${{ inputs.checkout_ref || github.sha }}
fetch-depth: 0
- name: Prepare backport workspace
if: ${{ inputs.backport_target_branch != '' }}
working-directory: ${{ github.workspace }}
run: bash ./.github/scripts/prepare-backport-checkout.sh "${{ inputs.backport_target_branch }}" "${{ inputs.backport_commit_range }}"
- name: Setup JDK
uses: actions/setup-java@v5
with:
distribution: "temurin"
java-version: 11
- name: Create Databases
run: |
psql -h localhost -U postgres -f sql/texera_ddl.sql
psql -h localhost -U postgres -f sql/iceberg_postgres_catalog.sql
psql -h localhost -U postgres -f sql/texera_lakefs.sql
psql -h localhost -U postgres -f sql/texera_lakekeeper.sql
env:
PGPASSWORD: postgres
- name: Setup sbt launcher
uses: sbt/setup-sbt@508b753e53cb6095967669e0911487d2b9bc9f41 # v1.1.22
- uses: coursier/cache-action@90c37294538be80a558fd665531fcdc2b467b475 # v8.1.0
with:
extraSbtFiles: '["*.sbt", "project/**.{scala,sbt}", "project/build.properties" ]'
- name: Create texera_db_for_test_cases
run: psql -h localhost -U postgres -v DB_NAME=texera_db_for_test_cases -f sql/texera_ddl.sql
env:
PGPASSWORD: postgres
- name: Start MinIO
run: |
docker run -d --name minio --network host \
-e MINIO_ROOT_USER=texera_minio \
-e MINIO_ROOT_PASSWORD=password \
minio/minio:RELEASE.2025-02-28T09-55-16Z server /data
for i in $(seq 1 30); do
curl -sf http://localhost:9000/minio/health/live && break
echo "Waiting for MinIO... (attempt $i)"
sleep 2
done
- name: Start Lakekeeper
# Lakekeeper splits the catalog DB into separate read/write URLs to
# support primary + read-replica deployments. CI has a single Postgres
# so both env vars point at the same database; declared once via the
# step env block and re-used across the migrate + serve invocations.
env:
LAKEKEEPER__PG_DATABASE_URL_READ: postgres://postgres:postgres@localhost:5432/texera_lakekeeper
LAKEKEEPER__PG_DATABASE_URL_WRITE: postgres://postgres:postgres@localhost:5432/texera_lakekeeper
LAKEKEEPER__PG_ENCRYPTION_KEY: texera_key
run: |
docker run --rm --network host \
-e LAKEKEEPER__PG_DATABASE_URL_READ \
-e LAKEKEEPER__PG_DATABASE_URL_WRITE \
-e LAKEKEEPER__PG_ENCRYPTION_KEY \
vakamo/lakekeeper:v0.11.0 migrate
docker run -d --name lakekeeper --network host \
-e LAKEKEEPER__PG_DATABASE_URL_READ \
-e LAKEKEEPER__PG_DATABASE_URL_WRITE \
-e LAKEKEEPER__PG_ENCRYPTION_KEY \
-e LAKEKEEPER__METRICS_PORT=9091 \
vakamo/lakekeeper:v0.11.0 serve
for i in $(seq 1 30); do
docker exec lakekeeper /home/nonroot/lakekeeper healthcheck && break
echo "Waiting for Lakekeeper... (attempt $i)"
sleep 2
done
docker exec lakekeeper /home/nonroot/lakekeeper healthcheck || {
echo "Lakekeeper failed to start. Container logs:"
docker logs lakekeeper
exit 1
}
- name: Initialize Lakekeeper warehouse
run: |
docker run --rm --network host --entrypoint sh minio/mc -c \
"mc alias set minio http://localhost:9000 texera_minio password && \
mc mb --ignore-existing minio/texera-iceberg"
curl -sf -X POST -H 'Content-Type: application/json' \
-d '{"project-id":"00000000-0000-0000-0000-000000000000","project-name":"default"}' \
http://localhost:8181/management/v1/project || true
curl -sf -X POST -H 'Content-Type: application/json' -d '{
"warehouse-name": "texera",
"project-id": "00000000-0000-0000-0000-000000000000",
"storage-profile": {
"type": "s3",
"bucket": "texera-iceberg",
"region": "us-west-2",
"endpoint": "http://localhost:9000",
"flavor": "s3-compat",
"path-style-access": true,
"sts-enabled": false
},
"storage-credential": {
"type": "s3",
"credential-type": "access-key",
"aws-access-key-id": "texera_minio",
"aws-secret-access-key": "password"
}
}' http://localhost:8181/management/v1/warehouse
- name: Set docker-java API version
run: |
echo "api.version=1.52" >> ~/.docker-java.properties
cat ~/.docker-java.properties
- name: Run amber and common module tests against REST catalog
env:
AMBER_TEST_FILTER: skip-integration
STORAGE_ICEBERG_CATALOG_TYPE: rest
STORAGE_ICEBERG_CATALOG_REST_URI: http://localhost:8181/catalog/
STORAGE_ICEBERG_CATALOG_REST_WAREHOUSE_NAME: texera
STORAGE_S3_ENDPOINT: http://localhost:9000
STORAGE_S3_REGION: us-west-2
STORAGE_S3_AUTH_USERNAME: texera_minio
STORAGE_S3_AUTH_PASSWORD: password
run: |
sbt "DAO/jacoco" \
"PyBuilder/jacoco" \
"WorkflowCore/jacoco" \
"WorkflowOperator/jacoco" \
"WorkflowExecutionService/jacoco"
- name: Upload amber-rest coverage to Codecov
if: always()
uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe # v5.5.4
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ./**/target/scala-2.13/jacoco/report/jacoco.xml
flags: amber-rest
fail_ci_if_error: false

amber-integration:
# Runs Scala tests tagged @org.apache.texera.amber.tags.IntegrationTest —
# currently the e2e specs that spawn Python UDF workers. Provisions
Expand Down Expand Up @@ -534,6 +686,133 @@ jobs:
flags: python
fail_ci_if_error: false

python-rest:
# Re-runs the Python pytest suite with STORAGE_ICEBERG_CATALOG_TYPE=rest
# so the Iceberg REST catalog client paths (pyiceberg's REST loader,
# iceberg_utils.create_rest_catalog) get end-to-end coverage. The
# existing `python` job continues to cover the Postgres-catalog path
# across the supported Python versions; this job pins to 3.12 since it's
# an integration check, not a version-compat matrix.
if: ${{ inputs.run_python }}
runs-on: ubuntu-latest
services:
postgres:
image: postgres
env:
POSTGRES_PASSWORD: postgres
ports:
- 5432:5432
options: >-
--health-cmd="pg_isready -U postgres"
--health-interval=10s
--health-timeout=5s
--health-retries=5
steps:
- name: Checkout Texera
uses: actions/checkout@v5
with:
ref: ${{ inputs.checkout_ref || github.sha }}
fetch-depth: 0
- name: Prepare backport workspace
if: ${{ inputs.backport_target_branch != '' }}
run: bash ./.github/scripts/prepare-backport-checkout.sh "${{ inputs.backport_target_branch }}" "${{ inputs.backport_commit_range }}"
- name: Set up Python 3.12
uses: actions/setup-python@v6
with:
python-version: "3.12"
- name: Install dependencies
run: |
python -m pip install uv
if [ -f amber/requirements.txt ]; then uv pip install --system --index-strategy unsafe-best-match -r amber/requirements.txt; fi
if [ -f amber/operator-requirements.txt ]; then uv pip install --system --index-strategy unsafe-best-match -r amber/operator-requirements.txt; fi
if [ -f amber/dev-requirements.txt ]; then uv pip install --system -r amber/dev-requirements.txt; fi
- name: Create iceberg catalog database
run: |
psql -h localhost -U postgres -f sql/iceberg_postgres_catalog.sql
psql -h localhost -U postgres -f sql/texera_lakekeeper.sql
env:
PGPASSWORD: postgres
- name: Start MinIO
run: |
docker run -d --name minio --network host \
-e MINIO_ROOT_USER=texera_minio \
-e MINIO_ROOT_PASSWORD=password \
minio/minio:RELEASE.2025-02-28T09-55-16Z server /data
for i in $(seq 1 30); do
curl -sf http://localhost:9000/minio/health/live && break
echo "Waiting for MinIO... (attempt $i)"
sleep 2
done
- name: Start Lakekeeper
# See amber-rest job for why both READ and WRITE URLs are set to the
# same DB.
env:
LAKEKEEPER__PG_DATABASE_URL_READ: postgres://postgres:postgres@localhost:5432/texera_lakekeeper
LAKEKEEPER__PG_DATABASE_URL_WRITE: postgres://postgres:postgres@localhost:5432/texera_lakekeeper
LAKEKEEPER__PG_ENCRYPTION_KEY: texera_key
run: |
docker run --rm --network host \
-e LAKEKEEPER__PG_DATABASE_URL_READ \
-e LAKEKEEPER__PG_DATABASE_URL_WRITE \
-e LAKEKEEPER__PG_ENCRYPTION_KEY \
vakamo/lakekeeper:v0.11.0 migrate
docker run -d --name lakekeeper --network host \
-e LAKEKEEPER__PG_DATABASE_URL_READ \
-e LAKEKEEPER__PG_DATABASE_URL_WRITE \
-e LAKEKEEPER__PG_ENCRYPTION_KEY \
-e LAKEKEEPER__METRICS_PORT=9091 \
vakamo/lakekeeper:v0.11.0 serve
for i in $(seq 1 30); do
docker exec lakekeeper /home/nonroot/lakekeeper healthcheck && break
echo "Waiting for Lakekeeper... (attempt $i)"
sleep 2
done
docker exec lakekeeper /home/nonroot/lakekeeper healthcheck || {
echo "Lakekeeper failed to start. Container logs:"
docker logs lakekeeper
exit 1
}
- name: Initialize Lakekeeper warehouse
run: |
docker run --rm --network host --entrypoint sh minio/mc -c \
"mc alias set minio http://localhost:9000 texera_minio password && \
mc mb --ignore-existing minio/texera-iceberg"
curl -sf -X POST -H 'Content-Type: application/json' \
-d '{"project-id":"00000000-0000-0000-0000-000000000000","project-name":"default"}' \
http://localhost:8181/management/v1/project || true
curl -sf -X POST -H 'Content-Type: application/json' -d '{
"warehouse-name": "texera",
"project-id": "00000000-0000-0000-0000-000000000000",
"storage-profile": {
"type": "s3",
"bucket": "texera-iceberg",
"region": "us-west-2",
"endpoint": "http://localhost:9000",
"flavor": "s3-compat",
"path-style-access": true,
"sts-enabled": false
},
"storage-credential": {
"type": "s3",
"credential-type": "access-key",
"aws-access-key-id": "texera_minio",
"aws-secret-access-key": "password"
}
}' http://localhost:8181/management/v1/warehouse
- name: Test with pytest against REST catalog
env:
STORAGE_ICEBERG_CATALOG_TYPE: rest
run: |
cd amber && pytest --cov=src/main/python --cov-report=xml -sv
- name: Upload python-rest coverage to Codecov
if: always()
uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe # v5.5.4
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ./amber/coverage.xml
flags: python-rest
fail_ci_if_error: false

agent-service:
if: ${{ inputs.run_agent_service }}
name: ${{ format('agent-service{0} ({1})', inputs.job_name_suffix, matrix.os) }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# under the License.

import datetime
import os
import pytest
import random
import tempfile
Expand All @@ -42,7 +43,7 @@
# user-resources/..."` value silently relied on CWD = amber/src/main/python
# and broke when the cwd moved up to amber/).
StorageConfig.initialize(
catalog_type="postgres",
catalog_type=os.environ.get("STORAGE_ICEBERG_CATALOG_TYPE", "postgres"),
postgres_uri_without_scheme="localhost:5432/texera_iceberg_catalog",
postgres_username="texera",
postgres_password="password",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.

import os
import pytest
from unittest.mock import patch, MagicMock
from pytexera.storage import large_binary_manager
Expand All @@ -27,7 +28,7 @@ def setup_storage_config(self):
"""Initialize StorageConfig for tests."""
if not StorageConfig._initialized:
StorageConfig.initialize(
catalog_type="postgres",
catalog_type=os.environ.get("STORAGE_ICEBERG_CATALOG_TYPE", "postgres"),
postgres_uri_without_scheme="localhost:5432/test",
postgres_username="test",
postgres_password="test",
Expand Down
7 changes: 7 additions & 0 deletions common/workflow-core/build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ ThisBuild / conflictManager := ConflictManager.latestRevision
// Restrict parallel execution of tests to avoid conflicts
Global / concurrentRestrictions += Tags.limit(Tags.Test, 1)

// Iceberg-aws S3FileIO trips a ClassCastException under sbt's layered
// classloader, so fork the test JVM when the REST catalog is exercised
// (i.e. the amber-rest CI job). The Postgres-catalog path is unaffected
// and continues to run in-process.
Test / fork := sys.env.get("STORAGE_ICEBERG_CATALOG_TYPE").contains("rest")
Test / baseDirectory := (ThisBuild / baseDirectory).value


/////////////////////////////////////////////////////////////////////////////
// Compiler Options
Expand Down
Loading