e2b-dev · mishushakov · Mar 12, 2026 · Mar 12, 2026 · Mar 12, 2026 · Mar 12, 2026
@@ -0,0 +1,67 @@
+import { expect } from 'vitest'
+import { sandboxTest, wait } from './setup'
+
+async function waitForHealth(sandbox: any, maxRetries = 10, intervalMs = 100) {
+  for (let i = 0; i < maxRetries; i++) {
+    try {
+      const result = await sandbox.commands.run(
+        'curl -s -o /dev/null -w "%{http_code}" http://0.0.0.0:49999/health'
+      )
+      if (result.stdout.trim() === '200') {
+        return true
+      }
+    } catch {
+      // Connection refused or other error, retry
+    }
+    await wait(intervalMs)
+  }
+  return false
+}
+
+sandboxTest('restart after jupyter kill', async ({ sandbox }) => {
+  // Verify health is up initially
+  const initialHealth = await waitForHealth(sandbox)
+  expect(initialHealth).toBe(true)
+
+  // Kill the jupyter process as root
+  // The command handle may get killed too (since killing jupyter cascades to code-interpreter),
+  // so we catch the error.
+  try {
+    await sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server')", {
+      user: 'root',
+    })
+  } catch {
+    // Expected — the kill cascade may terminate the command handle
+  }
+
+  // Wait for supervisord to restart both services (jupyter startup + code-interpreter startup)
+  const recovered = await waitForHealth(sandbox, 60, 500)
+  expect(recovered).toBe(true)
+
+  // Verify code execution works after recovery
+  const result = await sandbox.runCode('x = 1; x')
+  expect(result.text).toEqual('1')
+})
+
+sandboxTest('restart after code-interpreter kill', async ({ sandbox }) => {
+  // Verify health is up initially
+  const initialHealth = await waitForHealth(sandbox)
+  expect(initialHealth).toBe(true)
+
+  // Kill the code-interpreter process as root
+  try {
+    await sandbox.commands.run('kill -9 $(cat /var/run/code-interpreter.pid)', {
+      user: 'root',
+    })
+  } catch {
+    // Expected — killing code-interpreter may terminate the command handle
+  }
+
+  // Wait for supervisord to restart it and health to come back
+  const recovered = await waitForHealth(sandbox, 60, 500)
+  expect(recovered).toBe(true)
+
+  // Verify code execution works after recovery
+  const result = await sandbox.runCode('x = 1; x')
+  expect(result.text).toEqual('1')
+})
@@ -0,0 +1,59 @@
+import asyncio
+
+from e2b_code_interpreter.code_interpreter_async import AsyncSandbox
+
+
+async def wait_for_health(sandbox: AsyncSandbox, max_retries=10, interval_ms=100):
+    for _ in range(max_retries):
+        try:
+            result = await sandbox.commands.run(
+                'curl -s -o /dev/null -w "%{http_code}" http://0.0.0.0:49999/health'
+            )
+            if result.stdout.strip() == "200":
+                return True
+        except Exception:
+            pass
+        await asyncio.sleep(interval_ms / 1000)
+    return False
+
+
+async def test_restart_after_jupyter_kill(async_sandbox: AsyncSandbox):
+    # Verify health is up initially
+    assert await wait_for_health(async_sandbox)
+
+    # Kill the jupyter process as root
+    # The command handle may get killed too (killing jupyter cascades to code-interpreter),
+    # so we catch the error.
+    try:
+        await async_sandbox.commands.run(
+            "kill -9 $(pgrep -f 'jupyter server')", user="root"
+        )
+    except Exception:
+        pass
+
+    # Wait for supervisord to restart both services
+    assert await wait_for_health(async_sandbox, 60, 500)
+
+    # Verify code execution works after recovery
+    result = await async_sandbox.run_code("x = 1; x")
+    assert result.text == "1"
+
+
+async def test_restart_after_code_interpreter_kill(async_sandbox: AsyncSandbox):
+    # Verify health is up initially
+    assert await wait_for_health(async_sandbox)
+
+    # Kill the code-interpreter process as root
+    try:
+        await async_sandbox.commands.run(
+            "kill -9 $(cat /var/run/code-interpreter.pid)", user="root"
+        )
+    except Exception:
+        pass
+
+    # Wait for supervisord to restart it and health to come back
+    assert await wait_for_health(async_sandbox, 60, 500)
+
+    # Verify code execution works after recovery
+    result = await async_sandbox.run_code("x = 1; x")
+    assert result.text == "1"
@@ -0,0 +1,57 @@
+import time
+
+from e2b_code_interpreter.code_interpreter_sync import Sandbox
+
+
+def wait_for_health(sandbox: Sandbox, max_retries=10, interval_ms=100):
+    for _ in range(max_retries):
+        try:
+            result = sandbox.commands.run(
+                'curl -s -o /dev/null -w "%{http_code}" http://0.0.0.0:49999/health'
+            )
+            if result.stdout.strip() == "200":
+                return True
+        except Exception:
+            pass
+        time.sleep(interval_ms / 1000)
+    return False
+
+
+def test_restart_after_jupyter_kill(sandbox: Sandbox):
+    # Verify health is up initially
+    assert wait_for_health(sandbox)
+
+    # Kill the jupyter process as root
+    # The command handle may get killed too (killing jupyter cascades to code-interpreter),
+    # so we catch the error.
+    try:
+        sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server')", user="root")
+    except Exception:
+        pass
+
+    # Wait for supervisord to restart both services
+    assert wait_for_health(sandbox, 60, 500)
+
+    # Verify code execution works after recovery
+    result = sandbox.run_code("x = 1; x")
+    assert result.text == "1"
+
+
+def test_restart_after_code_interpreter_kill(sandbox: Sandbox):
+    # Verify health is up initially
+    assert wait_for_health(sandbox)
+
+    # Kill the code-interpreter process as root
+    try:
+        sandbox.commands.run(
+            "kill -9 $(cat /var/run/code-interpreter.pid)", user="root"
+        )
+    except Exception:
+        pass
+
+    # Wait for supervisord to restart it and health to come back
+    assert wait_for_health(sandbox, 60, 500)
+
+    # Verify code execution works after recovery
+    result = sandbox.run_code("x = 1; x")
+    assert result.text == "1"
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+echo "Waiting for Jupyter server to be ready..."
+until curl -s -o /dev/null -w '%{http_code}' http://localhost:8888/api/status | grep -q '200'; do
+  sleep 0.5
+done
+echo "Jupyter server is ready, starting Code Interpreter..."
+
+echo $$ > /var/run/code-interpreter.pid
+exec /root/.server/.venv/bin/uvicorn main:app --host 0.0.0.0 --port 49999 --workers 1 --no-access-log --no-use-colors --timeout-keep-alive 640
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+/usr/local/bin/jupyter server --IdentityProvider.token=""
+
+# Jupyter exited — kill code-interpreter so supervisord restarts both
+echo "Jupyter exited, killing code-interpreter..."
+kill "$(cat /var/run/code-interpreter.pid)" 2>/dev/null
@@ -1,22 +1,4 @@
 #!/bin/bash
 
-function start_jupyter_server() {
-	counter=0
-	response=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:8888/api/status")
-	while [[ ${response} -ne 200 ]]; do
-		let counter++
-		if ((counter % 20 == 0)); then
-			echo "Waiting for Jupyter Server to start..."
-			sleep 0.1
-		fi
-
-		response=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:8888/api/status")
-	done
-
-	cd /root/.server/
-	.venv/bin/uvicorn main:app --host 0.0.0.0 --port 49999 --workers 1 --no-access-log --no-use-colors --timeout-keep-alive 640
-}
-
 echo "Starting Code Interpreter server..."
-start_jupyter_server &
-MATPLOTLIBRC=/root/.config/matplotlib/.matplotlibrc jupyter server --IdentityProvider.token="" >/dev/null 2>&1
+supervisord -c /etc/supervisord.conf
@@ -0,0 +1,28 @@
+[supervisord]
+nodaemon=true
+logfile=/var/log/supervisord.log
+pidfile=/var/run/supervisord.pid
+
+[program:jupyter]
+command=/root/.jupyter/start-jupyter.sh
+environment=MATPLOTLIBRC="/root/.config/matplotlib/.matplotlibrc"
+stdout_logfile=/dev/null
+stderr_logfile=/dev/fd/1
+stderr_logfile_maxbytes=0
+autorestart=true
+stopasgroup=true
+killasgroup=true
+priority=10
+
+[program:code-interpreter]
+command=/root/.jupyter/start-code-interpreter.sh
+directory=/root/.server
+stdout_logfile=/dev/fd/1
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/fd/1
+stderr_logfile_maxbytes=0
+autorestart=true
+stopasgroup=true
+killasgroup=true
+priority=20
+startsecs=0
@@ -38,6 +38,7 @@ def make_template(
                 "sudo",
                 "fonts-noto-cjk",
                 "ca-certificates",
+                "supervisor",
             ]
         )
         .run_cmd("curl -fsSL https://deb.nodesource.com/setup_20.x | bash -")
@@ -111,11 +112,17 @@ def make_template(
     template = (
         template.copy("matplotlibrc", ".config/matplotlib/.matplotlibrc")
         .copy("start-up.sh", ".jupyter/start-up.sh")
-        .run_cmd("chmod +x .jupyter/start-up.sh")
+        .copy("start-code-interpreter.sh", ".jupyter/start-code-interpreter.sh")
+        .copy("start-jupyter.sh", ".jupyter/start-jupyter.sh")
+        .run_cmd(
+            "chmod +x .jupyter/start-code-interpreter.sh .jupyter/start-up.sh .jupyter/start-jupyter.sh"
+        )
         .copy("jupyter_server_config.py", ".jupyter/")
         .make_dir(".ipython/profile_default/startup")
         .copy("ipython_kernel_config.py", ".ipython/profile_default/")
         .copy("startup_scripts", ".ipython/profile_default/startup")
+        # Install supervisord config
+        .copy("supervisord.conf", "/etc/supervisord.conf")
     )
 
     if is_docker: