Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -185,3 +185,4 @@ logs/

pkgs/bay/ship_data/
pkgs/bay/scripts/
pkgs/bay/tests/k8s/k8s-deploy-local.yaml
91 changes: 74 additions & 17 deletions pkgs/bay/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,27 +1,84 @@
# ============================================
# Stage 1: Build frontend (Vue.js Dashboard)
# ============================================
FROM node:22-alpine AS frontend-builder

WORKDIR /app/dashboard

# Copy package files first for better caching
COPY dashboard/package.json dashboard/package-lock.json ./

# Install dependencies
RUN npm ci

# Copy source files
COPY dashboard/ ./

# Build for production
RUN npm run build

# ============================================
# Stage 2: Build Python dependencies
# ============================================
FROM python:3.11-slim AS python-builder

WORKDIR /app

# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
libc6-dev \
&& rm -rf /var/lib/apt/lists/*

# Copy requirements and install dependencies
COPY requirements.txt ./
RUN pip wheel --no-cache-dir --wheel-dir /app/wheels -r requirements.txt

# ============================================
# Stage 3: Final image with Nginx + Python
# ============================================
FROM python:3.11-slim

# Install system dependencies
# RUN apt-get update && apt-get install -y \
# gcc \
# libc6-dev \
# libffi-dev \
# bash \
# && rm -rf /var/lib/apt/lists/*
# Install Nginx and curl (for health checks)
RUN apt-get update && apt-get install -y --no-install-recommends \
nginx \
curl \
&& rm -rf /var/lib/apt/lists/*

# Set working directory
WORKDIR /app

# Copy all project files
COPY pyproject.toml requirements.txt alembic.ini run.py ./
# Copy Python wheels and install
COPY --from=python-builder /app/wheels /app/wheels
COPY requirements.txt ./
RUN pip install --no-cache-dir --no-index --find-links=/app/wheels -r requirements.txt \
&& rm -rf /app/wheels

# Copy Python application files
COPY pyproject.toml alembic.ini run.py ./
COPY app/ ./app/
COPY alembic/ ./alembic/

# Install dependencies and create data directory in one layer
RUN pip install -r requirements.txt --no-cache-dir && \
mkdir -p /app/data
# Create data directory
RUN mkdir -p /app/data

# Copy built frontend to Nginx html directory
COPY --from=frontend-builder /app/dashboard/dist /usr/share/nginx/html

# Copy Nginx configuration
COPY nginx.conf /etc/nginx/nginx.conf

# Copy and prepare entrypoint script
COPY docker-entrypoint.sh /docker-entrypoint.sh
RUN chmod +x /docker-entrypoint.sh

# Expose ports:
# - 8156: API (Python backend, can be exposed publicly)
# - 8157: Dashboard (Nginx, can be hidden behind NAT)
EXPOSE 8156 8157

# Expose port
EXPOSE 8156
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \
CMD curl -f http://localhost:8157/nginx-health && curl -f http://localhost:8156/health || exit 1

# Start the application
CMD ["python", "run.py"]
# Start both services
ENTRYPOINT ["/docker-entrypoint.sh"]
149 changes: 130 additions & 19 deletions pkgs/bay/app/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from sqlalchemy.pool import StaticPool
from typing import Optional, List
from app.config import settings
from app.models import Ship, SessionShip
from app.models import Ship, SessionShip, ShipStatus
from datetime import datetime, timezone


Expand Down Expand Up @@ -63,10 +63,12 @@ async def update_ship(self, ship: Ship) -> Ship:
ship.updated_at = datetime.now(timezone.utc)
session = self.get_session()
try:
session.add(ship)
# Use merge() instead of add() to handle detached objects
# merge() copies the state of the given instance into a persistent instance
merged_ship = await session.merge(ship)
await session.commit()
await session.refresh(ship)
return ship
await session.refresh(merged_ship)
return merged_ship
finally:
await session.close()

Expand All @@ -87,10 +89,23 @@ async def delete_ship(self, ship_id: str) -> bool:
await session.close()

async def list_active_ships(self) -> List[Ship]:
"""List all active ships"""
"""List all active ships (running and creating)"""
session = self.get_session()
try:
# Include both RUNNING and CREATING status ships
statement = select(Ship).where(
(Ship.status == ShipStatus.RUNNING) | (Ship.status == ShipStatus.CREATING)
)
result = await session.execute(statement)
return list(result.scalars().all())
finally:
await session.close()

async def list_all_ships(self) -> List[Ship]:
"""List all ships (including stopped)"""
session = self.get_session()
try:
statement = select(Ship).where(Ship.status == 1)
statement = select(Ship).order_by(Ship.created_at.desc())
result = await session.execute(statement)
return list(result.scalars().all())
finally:
Expand Down Expand Up @@ -163,20 +178,21 @@ async def update_session_ship(self, session_ship: SessionShip) -> SessionShip:
"""Update session-ship relationship"""
session = self.get_session()
try:
session.add(session_ship)
# Use merge() instead of add() to handle detached objects
merged_session_ship = await session.merge(session_ship)
await session.commit()
await session.refresh(session_ship)
return session_ship
await session.refresh(merged_session_ship)
return merged_session_ship
finally:
await session.close()

async def find_available_ship(self, session_id: str) -> Optional[Ship]:
"""Find an available ship that can accept a new session"""
session = self.get_session()
try:
# Find ships that have available session slots
# Find ships that have available session slots (only RUNNING ships)
statement = select(Ship).where(
Ship.status == 1, Ship.current_session_num < Ship.max_session_num
Ship.status == ShipStatus.RUNNING, Ship.current_session_num < Ship.max_session_num
)
result = await session.execute(statement)
ships = list(result.scalars().all())
Expand All @@ -193,38 +209,50 @@ async def find_available_ship(self, session_id: str) -> Optional[Ship]:
await session.close()

async def find_active_ship_for_session(self, session_id: str) -> Optional[Ship]:
"""Find an active running ship that this session has access to"""
"""Find an active running ship that this session has access to.

If the session has access to multiple running ships, returns the most recently updated one.
"""
session = self.get_session()
try:
# Find active ships that this session has access to
# Find RUNNING ships that this session has access to
# Order by updated_at desc to get the most recently used one
statement = (
select(Ship)
.join(SessionShip, Ship.id == SessionShip.ship_id)
.where(
SessionShip.session_id == session_id,
Ship.status == 1,
Ship.status == ShipStatus.RUNNING,
)
.order_by(Ship.updated_at.desc())
)
result = await session.execute(statement)
return result.scalar_one_or_none()
# Use scalars().first() instead of scalar_one_or_none() to handle multiple results
return result.scalars().first()
finally:
await session.close()

async def find_stopped_ship_for_session(self, session_id: str) -> Optional[Ship]:
"""Find a stopped ship that belongs to this session"""
"""Find a stopped ship that belongs to this session.

If the session has access to multiple stopped ships, returns the most recently updated one.
"""
session = self.get_session()
try:
# Find stopped ships that this session has access to
# Find STOPPED ships that this session has access to
# Order by updated_at desc to get the most recently stopped one
statement = (
select(Ship)
.join(SessionShip, Ship.id == SessionShip.ship_id)
.where(
SessionShip.session_id == session_id,
Ship.status == 0,
Ship.status == ShipStatus.STOPPED,
)
.order_by(Ship.updated_at.desc())
)
result = await session.execute(statement)
return result.scalar_one_or_none()
# Use scalars().first() instead of scalar_one_or_none() to handle multiple results
return result.scalars().first()
finally:
await session.close()

Expand Down Expand Up @@ -266,5 +294,88 @@ async def decrement_ship_session_count(self, ship_id: str) -> Optional[Ship]:
finally:
await session.close()

async def delete_sessions_for_ship(self, ship_id: str) -> List[str]:
"""Delete all session-ship relationships for a ship and return deleted session IDs"""
session = self.get_session()
try:
# First, get all session IDs for this ship
statement = select(SessionShip).where(SessionShip.ship_id == ship_id)
result = await session.execute(statement)
session_ships = list(result.scalars().all())

deleted_session_ids = [ss.session_id for ss in session_ships]

# Delete all session-ship relationships
for ss in session_ships:
await session.delete(ss)

await session.commit()
return deleted_session_ids
finally:
await session.close()

async def extend_session_ttl(
self, session_id: str, ttl: int
) -> Optional[SessionShip]:
"""Extend the TTL for a session by updating expires_at"""
from datetime import timedelta

session = self.get_session()
try:
statement = select(SessionShip).where(SessionShip.session_id == session_id)
result = await session.execute(statement)
session_ship = result.scalar_one_or_none()

if session_ship:
now = datetime.now(timezone.utc)
session_ship.expires_at = now + timedelta(seconds=ttl)
session_ship.last_activity = now
session.add(session_ship)
await session.commit()
await session.refresh(session_ship)

return session_ship
finally:
await session.close()

async def expire_sessions_for_ship(self, ship_id: str) -> int:
"""Mark all sessions for a ship as expired by setting expires_at to current time.

This is called when a ship is stopped to ensure session status
reflects the actual container state.

Args:
ship_id: The ship ID

Returns:
Number of sessions updated
"""
session = self.get_session()
try:
statement = select(SessionShip).where(SessionShip.ship_id == ship_id)
result = await session.execute(statement)
session_ships = list(result.scalars().all())

now = datetime.now(timezone.utc)
updated_count = 0

for ss in session_ships:
# Only update if session is still active (expires_at > now)
expires_at = ss.expires_at
if expires_at is not None:
if expires_at.tzinfo is None:
expires_at = expires_at.replace(tzinfo=timezone.utc)
if expires_at > now:
ss.expires_at = now
session.add(ss)
updated_count += 1

if updated_count > 0:
await session.commit()

return updated_count
finally:
await session.close()


db_service = DatabaseService()
3 changes: 2 additions & 1 deletion pkgs/bay/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from app.database import db_service
from app.drivers import initialize_driver, close_driver
from app.services.status import status_checker
from app.routes import health, ships, stat
from app.routes import health, ships, stat, sessions

# Configure logging
logging.basicConfig(
Expand Down Expand Up @@ -85,6 +85,7 @@ def create_app() -> FastAPI:
app.include_router(health.router, tags=["health"])
app.include_router(ships.router, tags=["ships"])
app.include_router(stat.router, tags=["stat"])
app.include_router(sessions.router, tags=["sessions"])

return app

Expand Down
20 changes: 19 additions & 1 deletion pkgs/bay/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,18 @@
import uuid


# Ship status constants
class ShipStatus:
"""Ship status constants"""
STOPPED = 0 # Ship is stopped, container not running
RUNNING = 1 # Ship is running, container active
CREATING = 2 # Ship is being created, container not yet ready


# Database Models
class ShipBase(SQLModel):
id: str = Field(default_factory=lambda: str(uuid.uuid4()), primary_key=True)
status: int = Field(default=1, description="1: running, 0: stopped")
status: int = Field(default=ShipStatus.CREATING, description="0: stopped, 1: running, 2: creating")
created_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc),
sa_column=Column(DateTime(timezone=True)),
Expand Down Expand Up @@ -87,6 +95,10 @@ class CreateShipRequest(BaseModel):
max_session_num: int = Field(
default=1, gt=0, description="Maximum number of sessions that can use this ship"
)
force_create: bool = Field(
default=False,
description="If True, skip all reuse logic and always create a new container"
)


class ShipResponse(BaseModel):
Expand Down Expand Up @@ -127,6 +139,12 @@ class ExtendTTLRequest(BaseModel):
ttl: int = Field(..., gt=0, description="New TTL in seconds")


class StartShipRequest(BaseModel):
model_config = ConfigDict(extra="forbid")

ttl: int = Field(default=3600, gt=0, description="TTL in seconds for the started ship")


class ErrorResponse(BaseModel):
detail: str

Expand Down
Loading