Skip to content

llm service fix.

llm service fix. #515

name: LLM Service CI/CD
on:
push:
branches: [master]
paths:
- "llm-service/**"
- ".github/workflows/llm-service-ci-cd.yml"
workflow_dispatch:
inputs:
environment:
description: "Environment to deploy to"
required: true
default: "prod"
type: choice
options:
- prod
- debug
jobs:
build-and-deploy:
runs-on: self-hosted
env:
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
steps:
- uses: actions/checkout@v3
- name: Set up JDK
uses: actions/setup-java@v3
with:
java-version: "21"
distribution: "temurin"
- name: Cache Maven packages
uses: actions/cache@v3
with:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
restore-keys: ${{ runner.os }}-m2
- name: Grant execute permission for mvnw
run: |
cd llm-service
chmod +x mvnw
- name: Build
run: |
cd llm-service
./mvnw clean package -DskipTests
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
with:
platforms: arm64
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Login to DockerHub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build and Push Docker image
uses: docker/build-push-action@v4
with:
context: ./llm-service
platforms: linux/arm64
push: true
tags: ${{ secrets.DOCKERHUB_USERNAME }}/llm-service:latest-arm64
- name: Deploy to VPS
uses: appleboy/ssh-action@master
with:
host: ${{ secrets.VPS_HOST }}
username: ${{ secrets.SSH_USERNAME }}
key: ${{ secrets.SSH_PRIVATE_KEY }}
script: |
mkdir -p /opt/craftpilot/tmp/netty
docker pull ${{ secrets.DOCKERHUB_USERNAME }}/llm-service:latest-arm64
docker stop llm-service || true
docker rm llm-service || true
if [[ "${{ github.event.inputs.environment }}" == "debug" ]]; then
docker run -d \
--name llm-service \
--network craftpilot-network \
--restart unless-stopped \
-p 8062:8062 \
-p 5005:5005 \
-v /opt/craftpilot/gcp-credentials.json:/gcp-credentials.json:ro \
-v /opt/craftpilot/tmp/netty:/tmp/netty \
-e SPRING_PROFILES_ACTIVE=prod \
-e OPENROUTER_API_KEY="${{ secrets.OPENROUTER_API_KEY }}" \
-e KAFKA_BOOTSTRAP_SERVERS=kafka:9092 \
-e EUREKA_CLIENT_SERVICEURL_DEFAULTZONE=http://craftpilot:13579ada@eureka-server:8761/eureka/ \
-e GOOGLE_APPLICATION_CREDENTIALS=/gcp-credentials.json \
-e SPRING_SECURITY_USER_NAME=craftpilot \
-e SPRING_SECURITY_USER_PASSWORD=13579ada \
-e SPRING_REDIS_HOST=redis \
-e SPRING_REDIS_PORT=6379 \
-e SPRING_REDIS_PASSWORD=13579ada \
-e MANAGEMENT_ENDPOINTS_WEB_BASE_PATH=/actuator \
-e "MANAGEMENT_ENDPOINTS_WEB_EXPOSURE_INCLUDE=*" \
-e MANAGEMENT_ENDPOINT_HEALTH_SHOW_DETAILS=always \
-e MANAGEMENT_ENDPOINT_HEALTH_PROBES_ENABLED=true \
-e MANAGEMENT_HEALTH_LIVENESSSTATE_ENABLED=true \
-e MANAGEMENT_HEALTH_READINESSSTATE_ENABLED=true \
-e MANAGEMENT_HEALTH_DEFAULTS_ENABLED=true \
-e SPRING_KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT \
-e LOGGING_LEVEL_COM_CRAFTPILOT=DEBUG \
-e "JAVA_TOOL_OPTIONS=-XX:+UseContainerSupport -XX:MaxRAMPercentage=75.0 -Dio.netty.noNative=true -Dio.netty.tryReflectionSetAccessible=true -Dnetty.native.workdir=/tmp/netty -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005" \
--ulimit nofile=65536:65536 \
--cap-drop ALL \
--security-opt no-new-privileges \
--health-cmd="curl -f http://localhost:8062/actuator/health/liveness || exit 1" \
--health-interval=30s \
--health-timeout=10s \
--health-retries=3 \
--health-start-period=120s \
${{ secrets.DOCKERHUB_USERNAME }}/llm-service:latest-arm64
else
docker run -d \
--name llm-service \
--network craftpilot-network \
--restart unless-stopped \
-p 8062:8062 \
-v /opt/craftpilot/gcp-credentials.json:/gcp-credentials.json:ro \
-v /opt/craftpilot/tmp/netty:/tmp/netty \
-e SPRING_PROFILES_ACTIVE=prod \
-e OPENROUTER_API_KEY="${{ secrets.OPENROUTER_API_KEY }}" \
-e KAFKA_BOOTSTRAP_SERVERS=kafka:9092 \
-e EUREKA_CLIENT_SERVICEURL_DEFAULTZONE=http://craftpilot:13579ada@eureka-server:8761/eureka/ \
-e GOOGLE_APPLICATION_CREDENTIALS=/gcp-credentials.json \
-e SPRING_SECURITY_USER_NAME=craftpilot \
-e SPRING_SECURITY_USER_PASSWORD=13579ada \
-e SPRING_REDIS_HOST=redis \
-e SPRING_REDIS_PORT=6379 \
-e SPRING_REDIS_PASSWORD=13579ada \
-e MANAGEMENT_ENDPOINTS_WEB_BASE_PATH=/actuator \
-e "MANAGEMENT_ENDPOINTS_WEB_EXPOSURE_INCLUDE=*" \
-e MANAGEMENT_ENDPOINT_HEALTH_SHOW_DETAILS=always \
-e MANAGEMENT_ENDPOINT_HEALTH_PROBES_ENABLED=true \
-e MANAGEMENT_HEALTH_LIVENESSSTATE_ENABLED=true \
-e MANAGEMENT_HEALTH_READINESSSTATE_ENABLED=true \
-e MANAGEMENT_HEALTH_DEFAULTS_ENABLED=true \
-e SPRING_KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT \
-e LOGGING_LEVEL_COM_CRAFTPILOT=DEBUG \
-e "JAVA_TOOL_OPTIONS=-XX:+UseContainerSupport -XX:MaxRAMPercentage=75.0 -Dio.netty.noNative=true -Dio.netty.tryReflectionSetAccessible=true -Dnetty.native.workdir=/tmp/netty" \
--ulimit nofile=65536:65536 \
--cap-drop ALL \
--security-opt no-new-privileges \
--health-cmd="curl -f http://localhost:8062/actuator/health/liveness || exit 1" \
--health-interval=30s \
--health-timeout=10s \
--health-retries=3 \
--health-start-period=120s \
${{ secrets.DOCKERHUB_USERNAME }}/llm-service:latest-arm64
fi
echo "=== Waiting for service startup ==="
max_attempts=20
counter=0
while [ $counter -lt $max_attempts ]; do
echo "Health check attempt $((counter + 1))/$max_attempts"
# Container durumu kontrolü
if ! docker ps --filter "name=llm-service" --format '{{.Status}}' | grep -q "Up"; then
echo "Container is not running anymore. Checking logs..."
docker logs llm-service
exit 1
fi
# Application log kontrolü
if docker logs llm-service 2>&1 | grep -q "Started LlmServiceApplication"; then
echo "Application startup completed"
# Eureka registration kontrolü
if docker logs llm-service 2>&1 | grep -q "DiscoveryClient.*registration status: 204"; then
echo "Successfully registered with Eureka"
# Final health check
HEALTH_CHECK=$(curl -s http://localhost:8062/actuator/health)
if echo "$HEALTH_CHECK" | grep -q '"status":"UP"'; then
echo "✓ Service is healthy"
echo "=== Deployment completed successfully ==="
exit 0
fi
fi
fi
echo "Waiting for service to start... ($((counter + 1))/$max_attempts)"
sleep 15
counter=$((counter + 1))
done
echo "=== Health check failed - Debug Information ==="
echo "Docker Status:"
docker ps -a | grep llm-service
echo "Container Logs:"
docker logs llm-service --tail 100
echo "Health Check Response:"
curl -v http://localhost:8062/actuator/health || true
exit 1