Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions lmdeploy/serve/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ class VariableInterface:
api_server_url: str | None = None
allow_terminate_by_client: bool = False
enable_abort_handling: bool = False
api_keys: list[str] | None = None
response_parser_cls: type[ResponseParser] | None = None

@classmethod
Expand Down Expand Up @@ -1213,6 +1214,8 @@ async def startup_event():
url = f'{VariableInterface.proxy_url}/nodes/add'
data = {'url': VariableInterface.api_server_url, 'status': {'models': get_model_list(), 'role': engine_role}}
headers = {'accept': 'application/json', 'Content-Type': 'application/json'}
if isinstance(VariableInterface.api_keys, list) and len(VariableInterface.api_keys) > 0:
headers['Authorization'] = f'Bearer {VariableInterface.api_keys[0]}'
response = requests.post(url, headers=headers, json=data)

if response.status_code != 200:
Expand Down Expand Up @@ -1394,6 +1397,9 @@ def serve(model_path: str,

VariableInterface.allow_terminate_by_client = allow_terminate_by_client
VariableInterface.enable_abort_handling = enable_abort_handling
if isinstance(api_keys, str):
api_keys = [api_keys]
VariableInterface.api_keys = api_keys

ssl_keyfile, ssl_certfile, http_or_https = None, None, 'http'
if ssl:
Expand Down
2 changes: 2 additions & 0 deletions lmdeploy/serve/proxy/proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -921,6 +921,8 @@ def proxy(server_name: str = '0.0.0.0',
with_gdr=True,
)
node_manager.cache_status = not disable_cache_status
if isinstance(api_keys, str):
api_keys = [api_keys]
if api_keys is not None and (tokens := [key for key in api_keys if key]):
from lmdeploy.serve.utils.server_utils import AuthenticationMiddleware

Expand Down
5 changes: 3 additions & 2 deletions lmdeploy/serve/utils/server_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,13 @@ class AuthenticationMiddleware:
def __init__(self, app: ASGIApp, tokens: list[str]) -> None:
self.app = app
self.api_tokens = [hashlib.sha256(t.encode('utf-8')).digest() for t in tokens]
# Path prefixes that bypass authentication
# Path prefixes that bypass authentication. Keep this list limited to
# passive public endpoints; proxy node-management routes mutate routing
# state and must stay behind bearer-token authentication.
self.skip_prefixes = [
'/health', # Health check endpoints
'/docs', # Swagger UI documentation
'/redoc', # ReDoc documentation
'/nodes', # Endpoints about node operation between proxy and api_server
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These node management endpoints are only intended for internal communication between the api_server and the proxy. In that context, do we really need to enforce authentication on them?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for raising this. I agree these endpoints are intended for api_server ↔ proxy coordination, but today that is only an architectural convention, not an enforced boundary.

The proxy defaults to binding on 0.0.0.0, and when api_keys are configured the public inference routes are protected while /nodes/* was still skipped by auth. If the proxy port is reachable, an unauthenticated caller can:

  • read /nodes/status to learn the current node URLs/models;
  • call /nodes/remove or /nodes/terminate* to disrupt serving; and
  • call /nodes/add with a non-empty status.models, which NodeManager.add() accepts directly without first querying the backend.

That last case lets an attacker register their own backend for an existing model and potentially receive subsequent inference requests. Since the proxy forwards the original request headers to the selected backend, that can also expose client request metadata/authorization headers to the malicious node.

So the issue is exploitable whenever the proxy listener is reachable outside the trusted internal network. If deployments enforce isolation externally (localhost/private listener/firewall/service mesh), the risk is reduced, but when operators configure api_keys on a network-reachable proxy, leaving /nodes/* unauthenticated creates a separate unauthenticated management plane.

This patch keeps unauthenticated behavior for deployments without api_keys, and for authenticated deployments it makes the existing api_server registration path send the configured bearer token so the intended internal flow still works.

]

def verify_token(self, headers: Headers) -> bool:
Expand Down
59 changes: 59 additions & 0 deletions tests/test_lmdeploy/serve/test_authentication_middleware.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import importlib.util
from pathlib import Path

from fastapi import FastAPI
from fastapi.testclient import TestClient


def _load_authentication_middleware():
repo_root = Path(__file__).resolve().parents[3]
module_path = repo_root / 'lmdeploy' / 'serve' / 'utils' / 'server_utils.py'
spec = importlib.util.spec_from_file_location('server_utils_for_test', module_path)
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(module)
return module.AuthenticationMiddleware


AuthenticationMiddleware = _load_authentication_middleware()


def _make_client():
app = FastAPI()

@app.get('/health')
def health():
return {'ok': True}

@app.post('/nodes/add')
def add_node():
return {'added': True}

@app.get('/nodes/status')
def node_status():
return {'nodes': []}

@app.post('/v1/chat/completions')
def chat_completions():
return {'ok': True}

app.add_middleware(AuthenticationMiddleware, tokens=['secret'])
return TestClient(app)


def test_auth_middleware_protects_node_management_routes():
client = _make_client()

assert client.post('/nodes/add').status_code == 401
assert client.get('/nodes/status').status_code == 401

headers = {'Authorization': 'Bearer secret'}
assert client.post('/nodes/add', headers=headers).status_code == 200
assert client.get('/nodes/status', headers=headers).status_code == 200


def test_auth_middleware_keeps_passive_health_public():
client = _make_client()

assert client.get('/health').status_code == 200
assert client.post('/v1/chat/completions').status_code == 401
Loading