Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
# Ignore version control files
.git
.gitignore
.github
.claude

# Ignore build artifacts and cache
__pycache__
*.pyc
*.log
*.egg-info

# Ignore dependency directories
node_modules
vendor
frontend/node_modules
frontend/.next

# Ignore sensitive environment files
.env
.aws

# You can also list the Dockerfile and .dockerignore file itself;
# Docker still receives them for the build process but won't copy them into the final image
Dockerfile
docker-compose*.yml
.dockerignore

docs
README.md
start.sh
31 changes: 31 additions & 0 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Deploy to EC2

on:
push:
branches: [develop]

jobs:
deploy:
runs-on: ubuntu-latest
environment: Develop
steps:
- name: Deploy via SSH
uses: appleboy/ssh-action@v1
with:
host: ${{ secrets.EC2_HOST }}
username: ${{ secrets.EC2_USER }}
key: ${{ secrets.EC2_SSH_KEY }}
command_timeout: 15m
script: |
cd ~/akd-debugger
git pull origin develop

# Amazon Linux may have docker-compose (v1) or docker compose (v2)
if docker compose version &>/dev/null; then
COMPOSE="docker compose"
else
COMPOSE="docker-compose"
fi

$COMPOSE -f docker-compose.prod.yml up -d --build
$COMPOSE -f docker-compose.prod.yml exec app sh -c "uv run alembic upgrade head"
20 changes: 16 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,15 +1,27 @@
FROM python:3.13-slim
### Build stage — install dependencies
FROM python:3.13-slim AS build

WORKDIR /app

# Install uv
RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/*

COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv

COPY pyproject.toml .
COPY pyproject.toml uv.lock ./
RUN uv sync --no-dev

COPY . .

### Runtime stage
FROM python:3.13-slim

WORKDIR /app

RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/*

COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
COPY --from=build /app /app

EXPOSE 8000

CMD ["uv", "run", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
CMD ["uv", "run", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "2", "--timeout-keep-alive", "120"]
67 changes: 9 additions & 58 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,67 +1,18 @@
# AKD

LLM Agent Benchmarking Platform — define suites, run agents, grade outputs, and compare results.
LLM agent benchmarking platform.

Make sure docker is installed and running
## Quick Start

Install npm:
Requirements:
- Docker (running)
- Node.js

`curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.4/install.sh | bash`

`nvm install node`


### Run Application

start.sh

Visit: [localhost:3000](http://localhost:3000)

Backend API: [localhost:8000/docs](http://localhost:8000/docs)

### PostgreSQL backup and restore

Create a backup dump from your current database:

```bash
./scripts/pg_dump_to_file.sh
```

Optional custom output filename:

```bash
./scripts/pg_dump_to_file.sh backup_20260213.dump
```

Transfer dump file to your AWS VM:
Run:

```bash
scp backup_20260213.dump ec2-user@<EC2_PUBLIC_IP>:/tmp/
./start.sh
```

Restore on AWS VM (set connection vars if your DB host/port/user differs):

```bash
PGHOST=127.0.0.1 PGPORT=5432 PGDATABASE=benchmark PGUSER=postgres PGPASSWORD=postgres \
./scripts/pg_restore_from_file.sh /tmp/backup_20260213.dump
```

### Docker-only backup and restore (easiest)

Create dump from the docker-compose Postgres service:

```bash
./scripts/pg_dump_docker.sh backup_20260213.dump
```

Copy dump to EC2:

```bash
scp backup_20260213.dump ec2-user@<EC2_PUBLIC_IP>:/tmp/
```

On EC2, in project directory (where `docker-compose.yml` exists), restore to docker Postgres:

```bash
./scripts/pg_restore_docker.sh /tmp/backup_20260213.dump
```
App: [http://localhost:3000](http://localhost:3000)
API docs: [http://localhost:8000/docs](http://localhost:8000/docs)
69 changes: 64 additions & 5 deletions api/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from models.organization import Organization
from models.organization_membership import OrganizationMembership
from models.password_reset_token import PasswordResetToken
from models.project import Project
from models.project_membership import ProjectMembership
from models.user import User
from schemas.schemas import (
Expand All @@ -33,7 +34,7 @@
set_session_cookies,
)
from services.context import WorkspaceContext, require_org_context
from services.permissions import require_permission
from services.permissions import get_role_by_id, get_role_by_slug, require_permission
from services.security import generate_token, hash_password, hash_token, normalize_email
from services.workspaces import create_organization_with_defaults

Expand Down Expand Up @@ -130,6 +131,29 @@ async def signup(

if invitation:
role_id = invitation.org_role_id
if role_id is None:
default_org_role = await get_role_by_slug(
db,
organization_id=invitation.organization_id,
role_type="organization",
slug="org_user",
)
role_id = default_org_role.id if default_org_role else None
else:
scoped_org_role = await get_role_by_id(
db,
organization_id=invitation.organization_id,
role_type="organization",
role_id=role_id,
)
if not scoped_org_role:
default_org_role = await get_role_by_slug(
db,
organization_id=invitation.organization_id,
role_type="organization",
slug="org_user",
)
role_id = default_org_role.id if default_org_role else None
existing_membership = (
await db.execute(
select(OrganizationMembership).where(
Expand All @@ -147,12 +171,42 @@ async def signup(
is_active=True,
)
)
else:
if not existing_membership.is_active:
existing_membership.is_active = True
if existing_membership.role_id is None and role_id is not None:
existing_membership.role_id = role_id

default_project_role_id: int | None = None

for assignment in invitation.project_assignments or []:
project_id = assignment.get("project_id")
project_role_id = assignment.get("role_id")
if not isinstance(project_id, int):
continue
project = await db.get(Project, project_id)
if not project or project.organization_id != invitation.organization_id:
continue
resolved_project_role_id: int | None = project_role_id if isinstance(project_role_id, int) else None
if resolved_project_role_id is not None:
scoped_project_role = await get_role_by_id(
db,
organization_id=invitation.organization_id,
role_type="project",
role_id=resolved_project_role_id,
)
if not scoped_project_role:
resolved_project_role_id = None
if resolved_project_role_id is None:
if default_project_role_id is None:
default_project_role = await get_role_by_slug(
db,
organization_id=invitation.organization_id,
role_type="project",
slug="project_user",
)
default_project_role_id = default_project_role.id if default_project_role else None
resolved_project_role_id = default_project_role_id
existing_pm = (
await db.execute(
select(ProjectMembership).where(
Expand All @@ -168,18 +222,23 @@ async def signup(
organization_id=invitation.organization_id,
project_id=project_id,
user_id=user.id,
role_id=project_role_id if isinstance(project_role_id, int) else None,
role_id=resolved_project_role_id,
is_active=True,
)
)
else:
if not existing_pm.is_active:
existing_pm.is_active = True
if existing_pm.role_id is None and resolved_project_role_id is not None:
existing_pm.role_id = resolved_project_role_id

invitation.accepted_at = _utcnow()

await db.commit()
await db.refresh(user)

pair = await issue_session_pair(db, user, request=request)
set_session_cookies(response, pair)
set_session_cookies(response, pair, request=request)
return await _session_payload(db, user)


Expand All @@ -192,7 +251,7 @@ async def login(
):
user = await authenticate_credentials(db, normalize_email(body.email), body.password)
pair = await issue_session_pair(db, user, request=request)
set_session_cookies(response, pair)
set_session_cookies(response, pair, request=request)
return await _session_payload(db, user)


Expand All @@ -218,7 +277,7 @@ async def refresh_session(
if not refresh_token:
raise HTTPException(401, "Refresh cookie is missing")
user, pair = await rotate_access_from_refresh(db, refresh_token, request=request)
set_session_cookies(response, pair)
set_session_cookies(response, pair, request=request)
return await _session_payload(db, user)


Expand Down
Loading