Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 90 additions & 15 deletions mise-tasks/infra/ensure-dev-cert
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,30 @@ set -euo pipefail
CERT_DIR="${BOXEL_DEV_CERT_DIR:-$HOME/.local/share/boxel/dev-certs}"
CERT_FILE="$CERT_DIR/localhost.pem"
KEY_FILE="$CERT_DIR/localhost-key.pem"
# Persists the set of env-mode slugs (BOXEL_ENVIRONMENT values) that any
# worktree on this machine has used, so the leaf cert can carry a
# `*.<slug>.localhost` SAN for each. RFC 6125 wildcards only cover one
# DNS label, so `*.localhost` does NOT match `host.<slug>.localhost`;
# we have to pre-declare the slug's wildcard explicitly.
SLUGS_FILE="$CERT_DIR/known-env-slugs.txt"

# Source the shared slug-computation helper. Resolved relative to this
# script's location: mise-tasks/infra/ensure-dev-cert → scripts/env-slug.sh.
_REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
. "$_REPO_ROOT/scripts/env-slug.sh"
unset _REPO_ROOT

CURRENT_SLUG="$(resolve_env_slug 2>/dev/null || true)"

# Stamp the current slug into the persistent slug list so subsequent
# regenerations keep covering it. Done up-front (before the idempotent
# skip check) so adding a fresh worktree triggers exactly one regen.
mkdir -p "$CERT_DIR"
touch "$SLUGS_FILE"
if [ -n "$CURRENT_SLUG" ] && ! grep -qxF "$CURRENT_SLUG" "$SLUGS_FILE"; then
echo "$CURRENT_SLUG" >> "$SLUGS_FILE"
echo "[ensure-dev-cert] Recorded new env-mode slug '$CURRENT_SLUG' in $SLUGS_FILE"
fi

# mkcert is required even on the idempotent-skip path: env-vars.sh needs
# `mkcert -CAROOT` to populate `NODE_EXTRA_CA_CERTS` so Node clients
Expand Down Expand Up @@ -158,17 +182,29 @@ echo "[ensure-dev-cert] mkcert root CA already trusted (system + NSS DB)"

# Idempotent skip when the leaf cert already exists, isn't within 7
# days of expiry, AND covers the subdomains the dev + matrix-harness
# publish-realm flows use.
# publish-realm flows + env-mode services use.
#
# SAN layout note: published-realm tenant subdomains in standard dev
# mode use a 3-label wildcard (`*.boxel-dev.localhost`). RFC 6125 §7.2
# — which Node's tls.checkServerIdentity and every mainstream browser
# enforce — refuses 2-label wildcards (e.g. `*.localhost`), so a
# single-label `.localhost` SAN cannot wildcard-cover dev tenant
# subdomains. The second label `boxel-dev` exists purely as the
# RFC-required filler and reads as obviously-local-only in browser
# tabs. Source realms in the dev stack (`user.localhost`) are covered
# as an explicit SAN entry, no wildcard needed.
#
# SAN layout note: published-realm tenant subdomains use a 3-label
# wildcard (`*.boxel-dev.localhost`). RFC 6125 §7.2 — which Node's
# tls.checkServerIdentity and every mainstream browser enforce —
# refuses 2-label wildcards (e.g. `*.localhost`), so a single-label
# `.localhost` SAN cannot wildcard-cover dev tenant subdomains. The
# second label `boxel-dev` exists purely as the RFC-required filler
# and reads as obviously-local-only in browser tabs. Source realms
# in the dev stack (`user.localhost`) are covered as an explicit
# SAN entry, no wildcard needed.
# Env mode (BOXEL_ENVIRONMENT set, see lib/env-vars.sh) layers two
# additional SANs per recorded slug:
# * `*.<slug>.localhost` — env-mode services routed by
# Traefik (host.<slug>.localhost,
# matrix.<slug>.localhost, etc.)
# * `*.realm-server.<slug>.localhost` — published realms living one
# label deeper, under the
# realm-server hostname
# Slugs persist in $SLUGS_FILE (stamped above) so the cert keeps
# covering older worktrees too — see $SLUGS_FILE stamping for why
# we accumulate rather than overwrite.
#
# Older certs were issued for `localhost 127.0.0.1 ::1` only, or with
# the no-longer-effective `*.localhost`; the SAN check below forces a
Expand All @@ -184,11 +220,31 @@ if [ -f "$CERT_FILE" ] && [ -f "$KEY_FILE" ]; then
cert_has_san() {
printf '%s\n' "$CERT_TEXT" | grep -q -- "$1"
}
if cert_has_san 'DNS:localhost' \
needs_regen=
if ! { cert_has_san 'DNS:localhost' \
&& cert_has_san 'DNS:user\.localhost' \
&& cert_has_san 'DNS:\*\.boxel-dev\.localhost' \
&& cert_has_san 'DNS:published\.realm' \
&& cert_has_san 'IP Address:127\.0\.0\.1'; then
&& cert_has_san 'IP Address:127\.0\.0\.1'; }; then
needs_regen=1
fi
if [ -z "$needs_regen" ]; then
while IFS= read -r slug; do
[ -z "$slug" ] && continue
# Each env-mode slug needs both a service wildcard and the
# deeper publish-realm wildcard. RFC 6125 wildcards only
# match one label, so we can't fold these into a single SAN.
if ! cert_has_san "DNS:\\*\\.${slug}\\.localhost"; then
needs_regen=1
break
fi
if ! cert_has_san "DNS:\\*\\.realm-server\\.${slug}\\.localhost"; then
needs_regen=1
break
fi
done < "$SLUGS_FILE"
fi
if [ -z "$needs_regen" ]; then
exit 0
fi
echo "[ensure-dev-cert] Existing cert at $CERT_FILE is missing one or more required SANs; regenerating."
Expand All @@ -197,10 +253,29 @@ if [ -f "$CERT_FILE" ] && [ -f "$KEY_FILE" ]; then
fi
fi

mkdir -p "$CERT_DIR"

echo "[ensure-dev-cert] Generating cert at $CERT_FILE"
# Build SAN list:
# * `localhost` / `127.0.0.1` / `::1` — base loopback names
# * `user.localhost` — standard-mode source realm
# * `*.boxel-dev.localhost` — standard-mode published-realm
# tenants (3-label wildcard so
# RFC 6125 §7.2 accepts it; see
# comment block above)
# * `published.realm` — matrix-harness host-resolver
# custom domain
# * Per env-mode slug in $SLUGS_FILE:
# - `*.<slug>.localhost` env-mode services
# - `*.realm-server.<slug>.localhost` env-mode published realms
# The leaf is shared across worktrees, so we accumulate slugs rather
# than overwriting them — see $SLUGS_FILE stamping above.
SANS=(localhost user.localhost 127.0.0.1 ::1 "*.boxel-dev.localhost" published.realm)
while IFS= read -r slug; do
[ -z "$slug" ] && continue
SANS+=("*.$slug.localhost")
SANS+=("*.realm-server.$slug.localhost")
done < "$SLUGS_FILE"

mkcert \
-cert-file "$CERT_FILE" \
-key-file "$KEY_FILE" \
localhost user.localhost 127.0.0.1 ::1 "*.boxel-dev.localhost" published.realm
"${SANS[@]}"
19 changes: 17 additions & 2 deletions mise-tasks/lib/dev-common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,13 @@ kill_from_pidfile() {
#
# Implementation notes:
# - `setsid` puts the guardian in its own session so it doesn't get
# SIGHUP'd when dev-all's session dies.
# SIGHUP'd when dev-all's session dies. macOS doesn't ship setsid in
# the base system (it's a Linux util-linux tool); we fall through to
# plain `&` + `disown` + nohup-style ignored SIGHUP. The trap path is
# still the primary cleanup signal — this guardian is a safety net
# that only matters when the trap is denied a chance to fire — so
# the marginal session-isolation guarantee setsid adds isn't worth
# a hard dependency on util-linux.
# - stdin/stdout/stderr are redirected away from the terminal so the
# guardian can survive after the user's shell exits.
# - Output goes to a log file so the user can audit what fired.
Expand All @@ -120,7 +126,16 @@ spawn_cleanup_guardian() {
_scg_parent_pid="$1"
_scg_log="${BOXEL_DEV_ALL_GUARDIAN_LOG:-${XDG_RUNTIME_DIR:-/tmp}/boxel-dev-all-guardian.log}"
_scg_lib="$(cd "$(dirname "$0")" && pwd)/lib/dev-common.sh"
setsid sh -c "
if command -v setsid >/dev/null 2>&1; then
_scg_session_prefix="setsid"
else
# macOS fallback: no session-leader detachment, but `trap '' HUP` in
# the guardian body makes SIGHUP a no-op so a terminal hangup on the
# parent shell doesn't kill us before cleanup runs.
_scg_session_prefix=""
fi
$_scg_session_prefix sh -c "
trap '' HUP
exec </dev/null >>'$_scg_log' 2>&1
echo \"[guardian \$(date +%H:%M:%S)] watching dev-all pid $_scg_parent_pid (pidfile $PIDFILE)\"
while kill -0 $_scg_parent_pid 2>/dev/null; do
Expand Down
10 changes: 10 additions & 0 deletions mise-tasks/lib/env-vars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,16 @@ if [ -n "${BOXEL_ENVIRONMENT:-}" ]; then
export ENV_SLUG
export ENV_MODE=true

# Drop standard-mode TLS env vars if they leaked in from a parent
# shell or a prior mise activation. In env mode Traefik terminates
# TLS in front of plain-HTTP services; leaving these set tells vite
# and the realm-server to terminate TLS themselves, so Traefik then
# speaks HTTP to a TLS-expecting upstream and every request fails
# with "HTTP/0.9 when not allowed". NODE_EXTRA_CA_CERTS is kept —
# Node clients still need to trust Traefik's mkcert leaf.
unset REALM_SERVER_TLS_CERT_FILE
unset REALM_SERVER_TLS_KEY_FILE

# Service URLs (Traefik hostnames). Traefik terminates TLS on :443
# with the mkcert leaf (`infra:ensure-dev-cert` provisioned;
# traefik/dynamic/tls.yml references). Plain :80 routes 308-redirect
Expand Down
29 changes: 22 additions & 7 deletions packages/host/config/environment.js
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ function environmentDefaults() {
realmServerURL: 'https://localhost:4201/',
realmHost: 'localhost:4201',
iconsURL: 'http://localhost:4206',
matrixURL: 'http://localhost:8008',
baseRealmURL: 'https://localhost:4201/base/',
catalogRealmURL: 'https://localhost:4201/catalog/',
skillsRealmURL: 'https://localhost:4201/skills/',
Expand All @@ -70,14 +71,22 @@ function environmentDefaults() {
}
let slug = getEnvSlug();
let realmHost = `realm-server.${slug}.localhost`;
// Env-mode services sit behind Traefik, which terminates TLS on :443
// with the mkcert leaf and 308-redirects :80 to https. The host page
// is loaded over https, so the realm URLs the host bundle fetches
// must match — http URLs trigger mixed-content blocking, and the
// CORS preflight refuses to follow Traefik's http→https redirect
// ("Redirect is not allowed for a preflight request"). Mirrors the
// standard-mode `https://localhost:4201` defaults above.
return {
realmServerURL: `http://${realmHost}/`,
realmServerURL: `https://${realmHost}/`,
realmHost,
iconsURL: `http://icons.${slug}.localhost`,
baseRealmURL: `http://${realmHost}/base/`,
catalogRealmURL: `http://${realmHost}/catalog/`,
skillsRealmURL: `http://${realmHost}/skills/`,
openRouterRealmURL: `http://${realmHost}/openrouter/`,
iconsURL: `https://icons.${slug}.localhost`,
matrixURL: `https://matrix.${slug}.localhost`,
baseRealmURL: `https://${realmHost}/base/`,
catalogRealmURL: `https://${realmHost}/catalog/`,
skillsRealmURL: `https://${realmHost}/skills/`,
openRouterRealmURL: `https://${realmHost}/openrouter/`,
};
}

Expand Down Expand Up @@ -108,7 +117,13 @@ module.exports = function (environment) {
},
logLevels:
process.env.LOG_LEVELS || '*=info,matrix=info,realm:events=debug',
matrixURL: process.env.MATRIX_URL || 'http://localhost:8008',
// In environment mode, use computed Traefik hostname (not env var,
// which may be stale from mise's shell-activation cache in standard
// mode and would otherwise force an http:// matrix URL onto an
// https:// host page).
matrixURL: process.env.BOXEL_ENVIRONMENT
? defaults.matrixURL
: process.env.MATRIX_URL || defaults.matrixURL,
matrixServerName: process.env.MATRIX_SERVER_NAME || 'localhost',
autoSaveDelayMs: 500,
monacoDebounceMs: 500,
Expand Down
19 changes: 19 additions & 0 deletions packages/host/scripts/traefik-helpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,25 @@ function registerWithTraefik(slug, hostname, port) {
const tmpPath = configPath + '.tmp';
fs.writeFileSync(tmpPath, entry, 'utf-8');
fs.renameSync(tmpPath, configPath);
kickTraefikIfNeeded();
}

// Bounce Traefik on macOS after a config write — see the matching
// helper in packages/realm-server/lib/dev-service-registry.ts for the
// rationale (Docker Desktop's bind mounts don't propagate inotify,
// and Traefik v3 file provider has no polling option).
function kickTraefikIfNeeded() {
if (process.platform !== 'darwin') return;
const { spawn } = require('child_process');
const child = spawn('docker', ['restart', 'boxel-traefik'], {
stdio: 'ignore',
detached: true,
});
child.on('error', () => {
// Docker not running, container missing, etc. — readiness probes
// through Traefik will surface the underlying problem.
});
child.unref();
}

module.exports = { getEnvSlug, getTraefikDynamicDir, registerWithTraefik };
18 changes: 17 additions & 1 deletion packages/host/vite.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@ import { boxelUIChecksumPlugin } from './lib/build/boxel-ui-checksum-plugin.mjs'
// vars whenever the cert exists; absent the cert, the dev stack stays
// on HTTP end-to-end and this falls through to Vite's default.
function devHttpsConfig() {
// Env mode: Traefik terminates TLS in front of a plain-HTTP vite, so
// we must NOT enable HTTPS here even if the TLS env vars are still
// set (e.g. inherited from a previous standard-mode shell session,
// or from a parent zsh that ran env-vars.sh before BOXEL_ENVIRONMENT
// was exported). Without this guard, vite expects a TLS handshake on
// its upstream port and Traefik's plain-HTTP proxy hits
// "HTTP/0.9 when not allowed" → 502 Bad Gateway in the browser.
if (process.env.BOXEL_ENVIRONMENT) return undefined;
let certPath = process.env.REALM_SERVER_TLS_CERT_FILE;
let keyPath = process.env.REALM_SERVER_TLS_KEY_FILE;
if (!certPath || !keyPath) return undefined;
Expand Down Expand Up @@ -326,7 +334,15 @@ export default defineConfig(({ mode }) => ({
allowedHosts: [envHostname],
hmr: {
host: envHostname,
clientPort: 80,
// The page is served by Traefik over https on :443, so the
// HMR client must connect via wss:// on the same port. With
// clientPort: 80, the browser opens `wss://host.<slug>.localhost:80/`
// which Traefik's :80 entrypoint returns a 404 for — the HMR
// WebSocket handshake fails, the prerender's standby load
// never finishes initializing, and realm-server boot stalls
// waiting on the prerender.
clientPort: 443,
protocol: 'wss',
},
}),
},
Expand Down
40 changes: 40 additions & 0 deletions packages/matrix/helpers/environment-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,35 @@ export function registerSynapseWithTraefik(hostPort: number): void {
let routerKey = `${serviceName}-${slug}`;
let hostname = `${serviceName}.${slug}.${DOMAIN}`;

// Mirror dev-service-registry.ts: two routers per service. `websecure`
// (port 443) terminates TLS at Traefik using the mkcert leaf in
// traefik/dynamic/tls.yml; the sibling `-http` router on :80
// 308-redirects to https. The browser hits the host bundle over https,
// so matrix login fetches (`https://matrix.<slug>.localhost/`) need
// the websecure router or every CORS preflight 404s.
let redirectMiddleware = `${routerKey}-https-redirect`;
let config: any = {
http: {
routers: {
[routerKey]: {
rule: `Host(\`${hostname}\`)`,
service: routerKey,
entryPoints: ['websecure'],
tls: {},
},
[`${routerKey}-http`]: {
rule: `Host(\`${hostname}\`)`,
entryPoints: ['web'],
middlewares: [redirectMiddleware],
service: routerKey,
},
},
middlewares: {
[redirectMiddleware]: {
redirectScheme: {
scheme: 'https',
permanent: true,
},
},
},
services: {
Expand All @@ -116,6 +138,24 @@ export function registerSynapseWithTraefik(hostPort: number): void {

atomicWrite(configPath, yaml.stringify(config));
console.log(`Registered Synapse at ${hostname} -> localhost:${hostPort}`);
kickTraefikIfNeeded();
}

// Bounce Traefik on macOS after a config write — Docker Desktop's bind
// mounts don't propagate inotify, and Traefik v3 file provider has no
// polling option. See dev-service-registry.ts for the full rationale.
function kickTraefikIfNeeded(): void {
if (process.platform !== 'darwin') return;
let { spawn } = require('child_process') as typeof import('child_process');
let child = spawn('docker', ['restart', 'boxel-traefik'], {
stdio: 'ignore',
detached: true,
});
child.on('error', () => {
// Docker not running or container missing — readiness probes
// through Traefik will surface the underlying problem.
});
child.unref();
}

export function deregisterSynapseFromTraefik(): void {
Expand Down
12 changes: 12 additions & 0 deletions packages/matrix/scripts/assert-synapse-running.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,18 @@ RUNNING=$(docker ps -f "name=^${CONTAINER_NAME}$" --format '{{.Names}}')

if [ "$RUNNING" = "$CONTAINER_NAME" ]; then
echo "synapse is already running (${CONTAINER_NAME})"
# Re-register with Traefik: a prior dev-all's shutdown (via
# deregisterEnvironment) deletes every `${slug}-*.yml` from the
# Traefik dynamic dir, but the synapse container survives across
# dev-all restarts. Without this, `https://matrix.<slug>.localhost/`
# has no Traefik route and every login fetch fails the CORS
# preflight with 404.
if [ -n "$BOXEL_ENVIRONMENT" ]; then
HOST_PORT=$(docker port "$CONTAINER_NAME" 8008/tcp 2>/dev/null | head -1 | awk -F: '{print $NF}')
if [ -n "$HOST_PORT" ]; then
pnpm exec ts-node --transpileOnly -e "import { registerSynapseWithTraefik } from './helpers/environment-config'; registerSynapseWithTraefik($HOST_PORT);"
fi
fi
else
pnpm run start:synapse
fi
Loading
Loading