Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 154 additions & 0 deletions .github/workflows/docker-release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

name: Docker release - tika-server and tika-grpc

on:
push:
tags:
- '[0-9]+.[0-9]+.[0-9]+*'

jobs:
release-tika-server:
runs-on: ubuntu-latest
timeout-minutes: 60

steps:
- uses: actions/checkout@v4

- name: Extract version from tag
id: version
run: |
TAG_NAME="${GITHUB_REF#refs/tags/}"
echo "tag=${TAG_NAME}" >> "$GITHUB_OUTPUT"

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Set up QEMU for multi-arch
uses: docker/setup-qemu-action@v3

- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Build and push tika-server minimal
uses: docker/build-push-action@v6
with:
file: tika-server/docker-build/minimal/Dockerfile
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/s390x
push: true
build-args: |
TIKA_VERSION=${{ steps.version.outputs.tag }}
tags: |
apache/tika:${{ steps.version.outputs.tag }}
apache/tika:latest

- name: Build and push tika-server full
uses: docker/build-push-action@v6
with:
file: tika-server/docker-build/full/Dockerfile
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/s390x
push: true
build-args: |
TIKA_VERSION=${{ steps.version.outputs.tag }}
tags: |
apache/tika:${{ steps.version.outputs.tag }}-full
apache/tika:latest-full

release-tika-grpc:
runs-on: ubuntu-latest
timeout-minutes: 120

steps:
- uses: actions/checkout@v4

- name: Extract version from tag
id: version
run: |
TAG_NAME="${GITHUB_REF#refs/tags/}"
echo "tag=${TAG_NAME}" >> "$GITHUB_OUTPUT"

- name: Set up JDK 17
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: '17'
cache: 'maven'

- name: Build with Maven (skip tests)
run: mvn clean install -DskipTests -B "-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Set up QEMU for multi-arch
uses: docker/setup-qemu-action@v3

- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Prepare tika-grpc Docker build context
run: |
TIKA_VERSION="${{ steps.version.outputs.tag }}"
OUT_DIR=target/tika-grpc-docker

mkdir -p "${OUT_DIR}/libs" "${OUT_DIR}/plugins" "${OUT_DIR}/config" "${OUT_DIR}/bin"

cp "tika-grpc/target/tika-grpc-${TIKA_VERSION}.jar" "${OUT_DIR}/libs/"

# Copy tika-pipes plugin zip files
for dir in tika-pipes/tika-pipes-plugins/*/; do
plugin_name=$(basename "$dir")
zip_file="${dir}target/${plugin_name}-${TIKA_VERSION}.zip"
if [ -f "$zip_file" ]; then
cp "$zip_file" "${OUT_DIR}/plugins/"
fi
done

# Copy parser packages
for parser_package in \
"tika-parsers/tika-parsers-standard/tika-parsers-standard-package" \
"tika-parsers/tika-parsers-extended/tika-parser-scientific-package" \
"tika-parsers/tika-parsers-extended/tika-parser-sqlite3-package" \
"tika-parsers/tika-parsers-ml/tika-parser-nlp-package"; do
package_name=$(basename "$parser_package")
jar_file="${parser_package}/target/${package_name}-${TIKA_VERSION}.jar"
if [ -f "$jar_file" ]; then
cp "$jar_file" "${OUT_DIR}/plugins/"
fi
done

cp "tika-grpc/docker-build/start-tika-grpc.sh" "${OUT_DIR}/bin/"
cp "tika-grpc/docker-build/Dockerfile" "${OUT_DIR}/Dockerfile"

- name: Build and push tika-grpc
uses: docker/build-push-action@v6
with:
context: target/tika-grpc-docker
platforms: linux/amd64,linux/arm64
push: true
build-args: |
VERSION=${{ steps.version.outputs.tag }}
tags: |
apache/tika-grpc:${{ steps.version.outputs.tag }}
apache/tika-grpc:latest
147 changes: 147 additions & 0 deletions .github/workflows/docker-snapshot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

name: Docker snapshot - tika-server and tika-grpc

on:
push:
branches: [ main ]
paths-ignore:
- 'docs/**'
- '*.md'

jobs:
build:
runs-on: ubuntu-latest
timeout-minutes: 120

steps:
- uses: actions/checkout@v4

- name: Set up JDK 17
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: '17'
cache: 'maven'

- name: Extract version from pom
id: version
run: |
TIKA_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
echo "tika_version=${TIKA_VERSION}" >> "$GITHUB_OUTPUT"

- name: Build with Maven (skip tests)
run: mvn clean install -DskipTests -B "-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Set up QEMU for multi-arch
uses: docker/setup-qemu-action@v3

- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

# --- tika-server (minimal) ---
- name: Prepare tika-server minimal build context
run: |
TIKA_VERSION="${{ steps.version.outputs.tika_version }}"
OUT_DIR=target/tika-server-minimal-docker
mkdir -p "${OUT_DIR}/tika-server"
tar xzf "tika-server/tika-server-standard/target/tika-server-standard-${TIKA_VERSION}-bin.tgz" -C "${OUT_DIR}/tika-server"
cp "tika-server/docker-build/minimal/Dockerfile.snapshot" "${OUT_DIR}/Dockerfile"

- name: Build and push tika-server minimal snapshot
uses: docker/build-push-action@v6
with:
context: target/tika-server-minimal-docker
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/s390x
push: true
build-args: |
TIKA_VERSION=${{ steps.version.outputs.tika_version }}
tags: |
apache/tika:${{ steps.version.outputs.tika_version }}

# --- tika-server (full) ---
- name: Prepare tika-server full build context
run: |
TIKA_VERSION="${{ steps.version.outputs.tika_version }}"
OUT_DIR=target/tika-server-full-docker
mkdir -p "${OUT_DIR}/tika-server"
tar xzf "tika-server/tika-server-standard/target/tika-server-standard-${TIKA_VERSION}-bin.tgz" -C "${OUT_DIR}/tika-server"
cp "tika-server/docker-build/full/Dockerfile.snapshot" "${OUT_DIR}/Dockerfile"

- name: Build and push tika-server full snapshot
uses: docker/build-push-action@v6
with:
context: target/tika-server-full-docker
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/s390x
push: true
build-args: |
TIKA_VERSION=${{ steps.version.outputs.tika_version }}
tags: |
apache/tika:${{ steps.version.outputs.tika_version }}-full

# --- tika-grpc ---
- name: Prepare tika-grpc Docker build context
run: |
TIKA_VERSION="${{ steps.version.outputs.tika_version }}"
OUT_DIR=target/tika-grpc-docker

mkdir -p "${OUT_DIR}/libs" "${OUT_DIR}/plugins" "${OUT_DIR}/config" "${OUT_DIR}/bin"

cp "tika-grpc/target/tika-grpc-${TIKA_VERSION}.jar" "${OUT_DIR}/libs/"

# Copy tika-pipes plugin zip files
for dir in tika-pipes/tika-pipes-plugins/*/; do
plugin_name=$(basename "$dir")
zip_file="${dir}target/${plugin_name}-${TIKA_VERSION}.zip"
if [ -f "$zip_file" ]; then
cp "$zip_file" "${OUT_DIR}/plugins/"
fi
done

# Copy parser packages
for parser_package in \
"tika-parsers/tika-parsers-standard/tika-parsers-standard-package" \
"tika-parsers/tika-parsers-extended/tika-parser-scientific-package" \
"tika-parsers/tika-parsers-extended/tika-parser-sqlite3-package" \
"tika-parsers/tika-parsers-ml/tika-parser-nlp-package"; do
package_name=$(basename "$parser_package")
jar_file="${parser_package}/target/${package_name}-${TIKA_VERSION}.jar"
if [ -f "$jar_file" ]; then
cp "$jar_file" "${OUT_DIR}/plugins/"
fi
done

cp "tika-grpc/docker-build/start-tika-grpc.sh" "${OUT_DIR}/bin/"
cp "tika-grpc/docker-build/Dockerfile" "${OUT_DIR}/Dockerfile"

- name: Build and push tika-grpc snapshot
uses: docker/build-push-action@v6
with:
context: target/tika-grpc-docker
platforms: linux/amd64,linux/arm64
push: true
build-args: |
VERSION=${{ steps.version.outputs.tika_version }}
tags: |
apache/tika-grpc:${{ steps.version.outputs.tika_version }}
62 changes: 62 additions & 0 deletions tika-grpc/docker-build/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.

# "random" uid/gid hopefully not used anywhere else
# This needs to be set globally and then referenced in
# the subsequent stages -- see TIKA-3912
ARG UID_GID="35002:35002"

FROM ubuntu:noble

ARG UID_GID
COPY libs/ /tika/libs/
COPY plugins/ /tika/plugins/
COPY config/ /tika/config/
COPY bin/ /tika/bin
ARG JRE='openjdk-21-jre-headless'
ARG VERSION
ARG TIKA_GRPC_MAX_INBOUND_MESSAGE_SIZE=104857600
ARG TIKA_GRPC_MAX_OUTBOUND_MESSAGE_SIZE=104857600
ARG TIKA_GRPC_NUM_THREADS=4
RUN set -eux \
&& apt-get update \
&& apt-get install --yes --no-install-recommends gnupg2 software-properties-common \
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends $JRE \
gdal-bin \
tesseract-ocr \
tesseract-ocr-eng \
tesseract-ocr-ita \
tesseract-ocr-fra \
tesseract-ocr-spa \
tesseract-ocr-deu \
&& echo ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true | debconf-set-selections \
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
xfonts-utils \
fonts-freefont-ttf \
fonts-liberation \
ttf-mscorefonts-installer \
wget \
cabextract \
&& apt-get clean -y \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

USER $UID_GID

EXPOSE 9090
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use the ARG suggested previously to run as a nonroot user

Suggested change
EXPOSE 9090
USER $UID_GID
EXPOSE 9090

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I pushed a fix for this on the updated branch. Could you please repeat the analysis when you have a moment?

ENV TIKA_VERSION=$VERSION
ENV TIKA_GRPC_MAX_INBOUND_MESSAGE_SIZE=$TIKA_GRPC_MAX_INBOUND_MESSAGE_SIZE
ENV TIKA_GRPC_MAX_OUTBOUND_MESSAGE_SIZE=$TIKA_GRPC_MAX_OUTBOUND_MESSAGE_SIZE
ENV TIKA_GRPC_NUM_THREADS=$TIKA_GRPC_NUM_THREADS
RUN chmod +x "/tika/bin/start-tika-grpc.sh"
ENTRYPOINT ["/tika/bin/start-tika-grpc.sh"]

LABEL maintainer="Apache Tika Developers dev@tika.apache.org"
Loading
Loading