-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile.bench
More file actions
55 lines (46 loc) · 1.85 KB
/
Dockerfile.bench
File metadata and controls
55 lines (46 loc) · 1.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# bad bench — full benchmark runner in Docker (no GUI needed)
#
# Uses Xvfb (virtual display) so Chrome runs "headed" with full stealth
# fingerprinting, without requiring an actual monitor or X server.
#
# Build:
# docker build --platform linux/amd64 -f Dockerfile.bench -t bad-bench .
#
# Run full WEBBENCH-50:
# docker run --platform linux/amd64 --env-file .env \
# -v $(pwd)/agent-results:/app/agent-results \
# bad-bench
#
# Run specific cases:
# docker run --platform linux/amd64 --env-file .env \
# -v $(pwd)/agent-results:/app/agent-results \
# bad-bench --cases bench/scenarios/cases/webbench-reachable4-max20-timeout120.json
FROM mcr.microsoft.com/playwright:v1.58.2-noble
# Install Xvfb for virtual display (Chrome needs a display for full stealth)
RUN apt-get update && apt-get install -y xvfb && rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Install deps (skip postinstall patches — only needed for claude-code/codex providers)
COPY package.json package-lock.json ./
RUN npm ci --ignore-scripts
# Install Chrome via patchright (real TLS fingerprint for stealth/anti-bot bypass)
RUN npx patchright install chrome
COPY tsconfig.json ./
COPY src/ ./src/
RUN npm run build
# Copy benchmark infrastructure
COPY scripts/ ./scripts/
COPY bench/ ./bench/
# Default output directory
RUN mkdir -p /app/agent-results
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
# Use xvfb-run so Chrome gets a virtual display — full stealth fingerprinting
# without --headless flag (headed mode with Xvfb = real GPU/plugin signals)
ENTRYPOINT ["xvfb-run", "--auto-servernum", "--server-args=-screen 0 1920x1080x24", \
"node", "scripts/run-scenario-track.mjs"]
CMD [ \
"--cases", "bench/scenarios/cases/webbench-full50-max20-timeout240.json", \
"--benchmark-profile", "webbench-stealth", \
"--model", "gpt-5.4", \
"--modes", "fast-explore", \
"--concurrency", "3" \
]