Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,499 changes: 196 additions & 1,303 deletions CLAUDE.md

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion playbooks/terraform.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
---
- name: Manage infrastructure lifecycle and SSH access with Terraform
hosts: localhost
gather_facts: false
roles:
- role: terraform
35 changes: 35 additions & 0 deletions scripts/check-ssh-key-migration.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/bash
# SPDX-License-Identifier: copyleft-next-0.3.1
#
# Check if SSH keys need migration from old (unhashed) to new (hashed) paths.
# This helps users upgrading from older kdevops versions that used a fixed
# SSH key path to the new per-directory hashed paths.

set -e

TOPDIR_PATH="${1:-.}"
HASH=$(echo "$TOPDIR_PATH" | sha256sum | cut -c1-8)

OLD_KEY="$HOME/.ssh/kdevops_terraform"
OLD_PUBKEY="$HOME/.ssh/kdevops_terraform.pub"
NEW_KEY="$HOME/.ssh/kdevops_terraform_${HASH}"
NEW_PUBKEY="$HOME/.ssh/kdevops_terraform_${HASH}.pub"

# Only show notice if old key exists but new key doesn't
if [ -f "$OLD_PUBKEY" ] && [ ! -f "$NEW_PUBKEY" ]; then
cat <<EOF
--------------------------------------------------------------------------------
NOTE: SSH key path has changed

kdevops now uses directory-specific SSH key paths. An old-style key exists:
Old: $OLD_PUBKEY
New: $NEW_PUBKEY

If you have RUNNING VMs that need the old key, migrate it:
mv "$OLD_KEY" "$NEW_KEY"
mv "$OLD_PUBKEY" "$NEW_PUBKEY"

Otherwise, a new key will be generated automatically.
--------------------------------------------------------------------------------
EOF
fi
5 changes: 5 additions & 0 deletions scripts/terraform.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,14 @@ ifeq (y,$(CONFIG_TERRAFORM_SSH_CONFIG_GENKEY_OVERWRITE))
DEFAULT_DEPS += remove-ssh-key
endif

DEFAULT_DEPS += ssh-key-migration-check
DEFAULT_DEPS += $(KDEVOPS_SSH_PRIVKEY)
endif # CONFIG_TERRAFORM_SSH_CONFIG_GENKEY

PHONY += ssh-key-migration-check
ssh-key-migration-check:
$(Q)$(TOPDIR)/scripts/check-ssh-key-migration.sh $(TOPDIR_PATH)

ANSIBLE_EXTRA_ARGS += $(TERRAFORM_EXTRA_VARS)

# Lambda Labs SSH key management
Expand Down
7 changes: 5 additions & 2 deletions terraform/Kconfig.ssh
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,17 @@ config TERRAFORM_SSH_CONFIG_USER

config TERRAFORM_SSH_CONFIG_PUBKEY_FILE
string "File containing Ansible's ssh public key"
default "~/.ssh/kdevops_terraform_$(shell, echo $(TOPDIR_PATH) | sha256sum | cut -c1-8).pub" if TERRAFORM_LAMBDALABS
default "~/.ssh/kdevops_terraform.pub"
default "~/.ssh/kdevops_terraform_$(shell, echo $(TOPDIR_PATH) | sha256sum | cut -c1-8).pub"
help
The filename of the file containing an ssh public key
Ansible is to use to manage its target nodes. The
matching private key should be located in a file using
the same basename (without the ".pub").

The filename includes an 8-character hash of the current
directory path, allowing multiple kdevops installations to
use separate SSH keys without conflicts.

config TERRAFORM_SSH_CONFIG_GENKEY
bool "Should we create a new random key for you?"
default y
Expand Down
30 changes: 30 additions & 0 deletions terraform/aws/scripts/aws_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@
from botocore.exceptions import ClientError, NoCredentialsError


class AwsNotConfiguredError(Exception):
"""Raised when AWS credentials are not available."""

pass


def get_default_region():
"""
Get the default AWS region from the ~/.aws/config file.
Expand Down Expand Up @@ -128,6 +134,30 @@ def handle_aws_credentials_error(quiet=False):
return False


def require_aws_credentials():
"""
Require AWS credentials, raising an exception if not configured.

This function should be called early in main() to validate AWS
credentials. If AWS is not configured, it raises AwsNotConfiguredError
to let the caller decide how to handle it.

This centralizes the handling of missing AWS credentials and avoids
TOCTOU race conditions from manual file existence checks.

Returns:
dict: Caller identity information if credentials are valid

Raises:
AwsNotConfiguredError: If AWS credentials are not found
"""
try:
sts = boto3.client("sts")
return sts.get_caller_identity()
except NoCredentialsError as e:
raise AwsNotConfiguredError("AWS credentials not found") from e


def get_all_regions(quiet=False):
"""
Retrieve the list of all AWS regions.
Expand Down
17 changes: 11 additions & 6 deletions terraform/aws/scripts/gen_kconfig_ami
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ from collections import defaultdict
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor, as_completed

import boto3
from botocore.exceptions import ClientError, NoCredentialsError
from botocore.exceptions import ClientError

from aws_common import (
AwsNotConfiguredError,
get_default_region,
get_jinja2_environment,
create_ec2_client,
handle_aws_client_error,
handle_aws_credentials_error,
require_aws_credentials,
)


Expand Down Expand Up @@ -257,9 +257,6 @@ def discover_ami_patterns(

return discovered_patterns

except NoCredentialsError:
handle_aws_credentials_error(quiet)
return {}
except ClientError as e:
handle_aws_client_error(e, f"discovering AMI patterns for {owner_name}", quiet)
return {}
Expand Down Expand Up @@ -822,6 +819,14 @@ def main():
output_owners_raw(owners, args.quiet)
return

# Allow make dynconfig to succeed without AWS credentials
try:
require_aws_credentials()
except AwsNotConfiguredError:
if not args.quiet:
print("AWS not configured - skipping (optional)", file=sys.stderr)
sys.exit(0)

if args.region:
region = args.region
else:
Expand Down
14 changes: 10 additions & 4 deletions terraform/aws/scripts/gen_kconfig_instance
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,12 @@ construct the "instance" Kconfig menu.
import sys
import argparse

from botocore.exceptions import ClientError, NoCredentialsError

from aws_common import (
AwsNotConfiguredError,
get_default_region,
get_all_instance_types,
get_jinja2_environment,
handle_aws_client_error,
handle_aws_credentials_error,
require_aws_credentials,
)


Expand Down Expand Up @@ -316,6 +314,14 @@ def main():
"""Main function to run the program."""
args = parse_arguments()

# Allow make dynconfig to succeed without AWS credentials
try:
require_aws_credentials()
except AwsNotConfiguredError:
if not args.quiet:
print("AWS not configured - skipping (optional)", file=sys.stderr)
sys.exit(0)

if args.region:
region = args.region
else:
Expand Down
14 changes: 10 additions & 4 deletions terraform/aws/scripts/gen_kconfig_location
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,15 @@ import sys
import argparse

from concurrent.futures import ThreadPoolExecutor, as_completed
from botocore.exceptions import ClientError, NoCredentialsError

from aws_common import (
AwsNotConfiguredError,
get_default_region,
get_all_regions,
get_region_availability_zones,
get_jinja2_environment,
get_region_kconfig_name,
create_ec2_client,
handle_aws_client_error,
handle_aws_credentials_error,
require_aws_credentials,
)


Expand Down Expand Up @@ -214,6 +212,14 @@ def main():
"""Main function to run the program."""
args = parse_arguments()

# Allow make dynconfig to succeed without AWS credentials
try:
require_aws_credentials()
except AwsNotConfiguredError:
if not args.quiet:
print("AWS not configured - skipping (optional)", file=sys.stderr)
sys.exit(0)

if not args.quiet:
print("Fetching list of all AWS regions...", file=sys.stderr)
regions = get_all_regions()
Expand Down
48 changes: 48 additions & 0 deletions terraform/azure/scripts/azure_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@
from jinja2 import Environment, FileSystemLoader


class AzureNotConfiguredError(Exception):
"""Raised when Azure credentials are not available."""

pass


def get_default_region():
"""
Get the default Azure region from Azure configuration.
Expand Down Expand Up @@ -371,3 +377,45 @@ def exit_on_empty_result(result, context, quiet=False):
)
print("Run 'az login' to authenticate with Azure.", file=sys.stderr)
sys.exit(1)


def require_azure_credentials():
"""
Require Azure credentials, raising an exception if not configured.

This function should be called early in main() to validate Azure
credentials. If Azure is not configured, it raises AzureNotConfiguredError
to let the caller decide how to handle it.

This centralizes the handling of missing Azure credentials and avoids
TOCTOU race conditions from manual file existence checks.

Returns:
str: Subscription ID if credentials are valid

Raises:
AzureNotConfiguredError: If Azure credentials are not found
"""
try:
from azure.common.credentials import get_cli_profile

profile = get_cli_profile()
credentials, subscription_id, _ = profile.get_login_credentials(
resource="https://management.azure.com"
)
return subscription_id
except ImportError as e:
raise AzureNotConfiguredError("Azure SDK not installed") from e
except Exception as e:
# Only treat as "not configured" if it looks like an auth/login issue
error_msg = str(e).lower()
auth_indicators = [
"login",
"logged in",
"authenticate",
"credential",
"az login",
]
if any(phrase in error_msg for phrase in auth_indicators):
raise AzureNotConfiguredError("Azure credentials not found") from e
raise
11 changes: 10 additions & 1 deletion terraform/azure/scripts/gen_kconfig_image
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,12 @@ from collections import defaultdict
from functools import lru_cache

from azure_common import (
AzureNotConfiguredError,
get_default_region,
get_jinja2_environment,
get_all_regions,
get_region_kconfig_name,
get_all_offers_and_skus,
require_azure_credentials,
)


Expand Down Expand Up @@ -730,6 +731,14 @@ def main():
output_publishers_raw(args.quiet)
return

# Allow make dynconfig to succeed without Azure credentials
try:
require_azure_credentials()
except AzureNotConfiguredError:
if not args.quiet:
print("Azure not configured - skipping (optional)", file=sys.stderr)
sys.exit(0)

publishers = get_known_publishers()

# Filter to specific publisher if requested
Expand Down
12 changes: 10 additions & 2 deletions terraform/azure/scripts/gen_kconfig_location
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,12 @@ import sys
import argparse

from azure_common import (
AzureNotConfiguredError,
get_default_region,
get_all_regions,
get_jinja2_environment,
get_region_kconfig_name,
exit_on_empty_result,
require_azure_credentials,
)


Expand Down Expand Up @@ -191,11 +192,18 @@ def main():
"""Main function to run the program."""
args = parse_arguments()

# Allow make dynconfig to succeed without Azure credentials
try:
require_azure_credentials()
except AzureNotConfiguredError:
if not args.quiet:
print("Azure not configured - skipping (optional)", file=sys.stderr)
sys.exit(0)

if not args.quiet:
print("Fetching list of all Azure regions...", file=sys.stderr)

regions = get_all_regions(args.quiet)
exit_on_empty_result(regions, "Azure region query", args.quiet)

if args.regions:
if args.format == "kconfig":
Expand Down
13 changes: 10 additions & 3 deletions terraform/azure/scripts/gen_kconfig_size
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,12 @@ import os
import yaml

from azure_common import (
AzureNotConfiguredError,
get_default_region,
get_all_regions,
get_jinja2_environment,
get_vm_sizes_and_skus,
exit_on_empty_result,
require_azure_credentials,
)


Expand Down Expand Up @@ -602,21 +603,27 @@ def main():
"""Main function to run the program."""
args = parse_arguments()

# Allow make dynconfig to succeed without Azure credentials
try:
require_azure_credentials()
except AzureNotConfiguredError:
if not args.quiet:
print("Azure not configured - skipping (optional)", file=sys.stderr)
sys.exit(0)

# Determine which regions to query
if args.region:
# Query specific region only
regions = [args.region]
elif args.all_regions:
# Query all regions
regions = get_all_regions(args.quiet)
exit_on_empty_result(regions, "Azure region query", args.quiet)
else:
# Query default region only
regions = [get_default_region()]

# Get VM sizes and capabilities in a single API call
sizes, sku_capabilities = get_all_vm_sizes_and_capabilities(regions, args.quiet)
exit_on_empty_result(sizes, "Azure VM size query", args.quiet)

if args.families:
output_families_raw(sizes, args.quiet)
Expand Down
Loading
Loading