This guide explains how to diagnose issues when using nbs-ssh.
nbs-ssh uses structured JSONL (JSON Lines) event logging. Each line in the log is a complete JSON object representing a single event.
from nbs_ssh import SSHConnection, EventCollector
# Option 1: Log to file
async with SSHConnection(
"example.com",
username="alice",
auth=auth,
event_log_path="session.jsonl", # Events written here
) as conn:
await conn.exec("command")
# Option 2: In-memory collection
collector = EventCollector()
async with SSHConnection(
"example.com",
username="alice",
auth=auth,
event_collector=collector,
) as conn:
await conn.exec("command")
# Access events
for event in collector.events:
print(event.to_json())
# Option 3: Both (file + in-memory)
collector = EventCollector()
async with SSHConnection(
"example.com",
username="alice",
auth=auth,
event_collector=collector,
event_log_path="session.jsonl",
) as conn:
await conn.exec("command")# Log events to stderr (redirect to file)
nbs-ssh --events alice@example.com "ls" 2>session.jsonl
# View events in real-time
nbs-ssh --events alice@example.com "ls"Each line is a standalone JSON object:
{"event_type": "CONNECT", "timestamp": 1234567890.123, "data": {"status": "initiating", "host": "example.com", "port": 22}}
{"event_type": "AUTH", "timestamp": 1234567890.234, "data": {"status": "success", "method": "private_key", "duration_ms": 45.2}}
{"event_type": "EXEC", "timestamp": 1234567890.345, "data": {"command": "ls", "exit_code": 0, "duration_ms": 12.3}}
{"event_type": "DISCONNECT", "timestamp": 1234567890.456, "data": {"reason": "normal"}}# Pretty-print all events
cat session.jsonl | jq .
# Filter by event type
cat session.jsonl | jq 'select(.event_type == "ERROR")'
# Extract connection timing
cat session.jsonl | jq 'select(.event_type == "AUTH") | .data.duration_ms'
# Find failed operations
cat session.jsonl | jq 'select(.data.status == "failed")'import json
def read_events(path):
with open(path) as f:
for line in f:
yield json.loads(line)
# Find errors
for event in read_events("session.jsonl"):
if event["event_type"] == "ERROR":
print(f"Error: {event['data']['message']}")
elif event.get("data", {}).get("status") == "failed":
print(f"Failed: {event}")Connection initiation and establishment:
{
"event_type": "CONNECT",
"timestamp": 1234567890.123,
"data": {
"status": "initiating", // or "connected"
"host": "example.com",
"port": 22,
"username": "alice",
"auth_method": "private_key" // Only on success
}
}Authentication attempts and results:
{
"event_type": "AUTH",
"timestamp": 1234567890.234,
"data": {
"status": "success", // or "failed"
"method": "password", // "private_key", "ssh_agent"
"username": "alice",
"duration_ms": 45.2,
"error_type": "PermissionDenied", // On failure
"error_message": "..."
}
}Command execution:
{
"event_type": "EXEC",
"timestamp": 1234567890.345,
"data": {
"command": "ls -la",
"streaming": false,
"duration_ms": 123.4,
"exit_code": 0,
"stdout_len": 1024,
"stderr_len": 0
}
}Any error during operation:
{
"event_type": "ERROR",
"timestamp": 1234567890.456,
"data": {
"error_type": "ConnectionTimeout",
"message": "Connection timed out after 30.0 seconds",
"host": "example.com",
"port": 22
}
}Supervisor state transitions:
{
"event_type": "STATE_CHANGE",
"timestamp": 1234567890.567,
"data": {
"from_state": "connecting",
"to_state": "connected",
"reconnection_count": 0
}
}Port forwarding events:
{
"event_type": "FORWARD",
"timestamp": 1234567890.678,
"data": {
"status": "established", // "establishing", "closed", "failed"
"forward_type": "local",
"local_host": "localhost",
"local_port": 3306,
"remote_host": "db.server",
"remote_port": 3306
}
}Evidence bundles are comprehensive diagnostic packages created when issues occur.
from nbs_ssh import SSHConnection, EventCollector
collector = EventCollector()
try:
async with SSHConnection(
"example.com",
username="alice",
auth=auth,
event_collector=collector,
) as conn:
await conn.exec("command")
except Exception as e:
# Create bundle with all diagnostic info
bundle = conn.get_evidence_bundle()
# Save with secrets redacted (safe for sharing)
bundle.to_file("debug.json", redact=True)
# Save unredacted (internal debugging only)
bundle.to_file("debug_raw.json", redact=False)from nbs_ssh import EvidenceBundle
bundle = EvidenceBundle.from_file("debug.json")
# Timing information
print(f"Connect time: {bundle.timing.connect_duration_ms}ms")
print(f"Auth time: {bundle.timing.auth_duration_ms}ms")
print(f"Total time: {bundle.timing.total_duration_ms}ms")
# Negotiated algorithms
print(f"KEX: {bundle.algorithms.kex}")
print(f"Cipher: {bundle.algorithms.cipher_cs}")
print(f"MAC: {bundle.algorithms.mac_cs}")
# Why did connection end?
print(f"Disconnect reason: {bundle.disconnect_reason}")
# Error details
print(f"Error context: {bundle.error_context}")
# All events in the session
for event in bundle.events:
print(f"{event.event_type}: {event.data}")| Field | Description |
|---|---|
events |
List of all JSONL events from the session |
transcript |
Automation transcript (if expect/respond was used) |
algorithms |
Negotiated SSH algorithms (KEX, ciphers, MACs, compression) |
timing |
Connection, auth, and session timing |
host_info |
Target host, port, username |
disconnect_reason |
Why the connection ended (normal, keepalive_timeout, etc.) |
error_context |
Additional error details |
version |
Bundle format version |
created_ms |
When the bundle was created |
Symptom: ConnectionRefused exception
Causes:
- SSH server not running on target host
- Wrong port number
- Firewall blocking the connection
Diagnosis:
# Check if port is open
nc -zv example.com 22
# Check from the same network as your application
ssh -v -p 22 alice@example.comSymptom: ConnectionTimeout exception
Causes:
- Host unreachable (network issue)
- Firewall dropping packets silently
- Very slow network
Diagnosis:
# Check network connectivity
ping example.com
# Check routing
traceroute example.com
# Try with longer timeout
async with SSHConnection(..., connect_timeout=60.0) as conn:
...Symptom: AuthFailed exception
Causes:
- Wrong password
- Wrong username
- Key not accepted by server
- Key not in authorized_keys
Diagnosis:
# Check which method was tried
for event in collector.get_by_type("AUTH"):
print(f"Method: {event.data['method']}, Status: {event.data['status']}")# Verify key is correct
ssh-keygen -lf ~/.ssh/id_ed25519.pub
# Check server logs (on server)
tail -f /var/log/auth.logSymptom: KeyLoadError exception
Causes:
- Key file not found
- Wrong permissions on key file
- Wrong passphrase for encrypted key
- Corrupted key file
Diagnosis:
try:
async with SSHConnection(...) as conn:
...
except KeyLoadError as e:
print(f"Key path: {e.context.key_path}")
print(f"Reason: {e.context.extra.get('reason')}")# Check key file exists and permissions
ls -la ~/.ssh/id_ed25519
# Should be -rw------- (600)
# Verify key is valid
ssh-keygen -yf ~/.ssh/id_ed25519Symptom: HostKeyMismatch exception
Causes:
- Server was reinstalled/reconfigured
- Potential man-in-the-middle attack
- Connecting to wrong server
Resolution:
# If server legitimately changed, remove old key
ssh-keygen -R example.com
# Then reconnect to accept new key
ssh alice@example.comWarning: Only do this if you're certain the server legitimately changed!
Symptom: NoMutualKex exception
Causes:
- Server and client have no compatible encryption algorithms
- Very old or very new server with different algorithm support
Diagnosis:
# Check what algorithms server supports
ssh -Q kex
nmap --script ssh2-enum-algos example.comSymptom: AgentError exception
Causes:
- SSH agent not running
- SSH_AUTH_SOCK not set
- Agent socket not accessible
Diagnosis:
from nbs_ssh import check_agent_available
if not check_agent_available():
print("SSH agent not available")# Check agent is running
echo $SSH_AUTH_SOCK
ssh-add -lSymptom: Commands fail mid-execution
Causes:
- Network instability
- Server timeout
- Keepalive not configured
Solution: Use SSHSupervisor with keepalive:
from nbs_ssh import SSHSupervisor, KeepaliveConfig, RetryPolicy
keepalive = KeepaliveConfig(
interval_sec=15.0,
max_count=3,
)
retry = RetryPolicy(
max_retries=5,
base_delay_sec=2.0,
)
async with SSHSupervisor(
...,
keepalive=keepalive,
retry_policy=retry,
) as supervisor:
# Auto-reconnects on transient failures
await supervisor.exec("command")When reporting issues, include the following information:
try:
async with SSHConnection(..., event_collector=collector) as conn:
await conn.exec("command")
except Exception as e:
bundle = conn.get_evidence_bundle()
bundle.to_file("bug_report.json", redact=True) # Safe for sharingpython --version
pip show nbs-ssh asyncssh
uname -a # or systeminfo on WindowsProvide the smallest code that reproduces the issue:
import asyncio
from nbs_ssh import SSHConnection, create_password_auth
async def main():
auth = create_password_auth("password")
async with SSHConnection(
"example.com",
username="alice",
auth=auth,
) as conn:
result = await conn.exec("whoami")
print(result.stdout)
asyncio.run(main())- Expected behaviour: "Command should execute and print username"
- Actual behaviour: "ConnectionTimeout after 30 seconds"
Include the JSONL event log (with secrets redacted) or relevant portions of it.
When troubleshooting, work through this checklist:
- Can you SSH to the target manually? (
ssh user@host) - Is the port correct? (default is 22)
- Is the username correct?
- For key auth: Does the key file exist and have correct permissions (600)?
- For key auth: Is the public key in server's authorized_keys?
- For password auth: Is password authentication enabled on server?
- Are there firewall rules blocking the connection?
- Is known_hosts configured correctly (or disabled for testing)?
- Check the JSONL event log for specific error messages
- Generate an evidence bundle for detailed diagnostics