Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
0bb7651
nrl-2002 Initial set up of dynamo export
jackleary Mar 5, 2026
704b380
nrl-2002 Sort lambdas for deployment
jackleary Mar 5, 2026
28522c6
Merge branch 'develop' into feature/jale13-nrl-2002-dynamo-export
jackleary Mar 5, 2026
c115f48
NRL-2002 Update table name
jackleary Mar 5, 2026
b14cade
Merge branch 'develop' into feature/jale13-nrl-2002-dynamo-export
mattdean3-nhs Apr 20, 2026
4ef535a
NRL-2002 Remove references to patient flags
anjalitrace2-nhs Apr 29, 2026
6cdfa8b
NRL-2002 Remove unused variables
anjalitrace2-nhs Apr 30, 2026
9f50f2e
NRL-2002 Add new dynamo export lambdas to build
anjalitrace2-nhs Apr 30, 2026
09c2166
Merge branch 'develop' of github.com:NHSDigital/NRLF into NRL-2002-ge…
anjalitrace2-nhs Apr 30, 2026
6566ba1
NRL-2002 Update handler names
anjalitrace2-nhs May 5, 2026
7cdd3b7
NRL-2002 Add instructions for enabling point-in-time recovery mode re…
anjalitrace2-nhs May 5, 2026
68b4d56
NRL-2002 Use KMS key arn rather than uuid to prevent export failing o…
anjalitrace2-nhs May 6, 2026
ce68b0d
NRL-2002 Give dyanmo export trigger lambda perms to access the table …
anjalitrace2-nhs May 6, 2026
2f019a3
NRL-2002 Comment out unused required arg for slack notification on error
anjalitrace2-nhs May 6, 2026
3d89d76
NRL-2002 initialise export_type in case no from_times calculated
anjalitrace2-nhs May 6, 2026
3164a0f
NRL-2002 logs for debugging
anjalitrace2-nhs May 6, 2026
b6638c3
NRL-2002 remove prints no longer needed to debug
anjalitrace2-nhs May 6, 2026
40696ac
NRL-2002 rename last updated column to be valid
anjalitrace2-nhs May 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,16 @@ check-deploy: ## check the deploy environment is setup correctly
check-deploy-warn:
@SHOULD_WARN_ONLY=true ./scripts/check-deploy-environment.sh

build: check-warn build-api-packages build-layers build-dependency-layer build-seed-sandbox-lambda ## Build the project
build: check-warn build-api-packages build-layers build-dependency-layer build-seed-sandbox-lambda build-dynamo-export-lambdas ## Build the project

build-seed-sandbox-lambda:
@echo "Building seed_sandbox Lambda"
@cd lambdas/seed_sandbox && make build

build-dynamo-export-lambdas:
@echo "Building dynamo_export Lambdas"
@cd lambdas/dynamo_export && make build

build-dependency-layer:
@echo "Building Lambda dependency layer"
@mkdir -p $(DIST_PATH)
Expand Down
33 changes: 33 additions & 0 deletions lambdas/dynamo_export/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
.PHONY: *

FILE_TO_PACKAGE?=

clean:
@echo "Cleaning build artifacts..."
rm -rf build
@echo "✓ Clean complete"

build-lambda: clean
$(eval LAMBDA_NAME := $(basename ${FILE_TO_PACKAGE}))
@echo "Building $(LAMBDA_NAME) Lambda deployment package..."
mkdir -p build

# Copy the handler
cp $(FILE_TO_PACKAGE) build/

# Create the zip file in root dist
mkdir -p ../../dist
cd build && zip -r "../../../dist/${LAMBDA_NAME}.zip" . -x "*.pyc" -x "__pycache__/*" -x ".DS_Store"

@echo "✓ Lambda package created: ../../dist/${LAMBDA_NAME}.zip"

build-dynamo-export-trigger:
FILE_TO_PACKAGE=dynamo_export_trigger.py make build-lambda

build-dynamo-export-poll:
FILE_TO_PACKAGE=dynamo_export_poll.py make build-lambda

build-ssm-put-param:
FILE_TO_PACKAGE=ssm_put_param.py make build-lambda

build: build-dynamo-export-trigger build-dynamo-export-poll build-ssm-put-param
31 changes: 31 additions & 0 deletions lambdas/dynamo_export/dynamo_export_poll.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import boto3
from botocore.config import Config

ddb = boto3.client(
"dynamodb",
config=Config(connect_timeout=5, read_timeout=5),
)


def lambda_handler(event, _context):
completed = []
for arn in event["export_arns"]:
response = ddb.describe_export(ExportArn=arn)
if response["ExportDescription"]["ExportStatus"] == "FAILED":
return {
"status": "FAILED",
"export_to_time": event["export_to_time"],
"export_arns": event["export_arns"],
"export_type": event["export_type"],
}

completed.append(response["ExportDescription"]["ExportStatus"])

status = "COMPLETED" if all(s == "COMPLETED" for s in completed) else "IN_PROGRESS"

return {
"status": status,
"export_to_time": event["export_to_time"],
"export_arns": event["export_arns"],
"export_type": event["export_type"],
}
85 changes: 85 additions & 0 deletions lambdas/dynamo_export/dynamo_export_trigger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import os
from datetime import datetime, timedelta, timezone

import boto3
from botocore.config import Config
from botocore.exceptions import ClientError

bucket = os.environ["BUCKET"]
ddb_table_arn = os.environ["DDB_TABLE_ARN"]
kms_key = os.environ["KMS_KEY"]
env = os.environ["ENVIRONMENT"]
ddb_table_name = os.environ["DDB_TABLE_NAME"]

SSM_PARAM = "/exports/DynamoExportRuntime"

ddb_client = boto3.client(
"dynamodb",
config=Config(connect_timeout=5, read_timeout=5),
)
ssm = boto3.client(
"ssm",
config=Config(connect_timeout=5, read_timeout=5),
)


def lambda_handler(_event, _context):
to_time = datetime.now(timezone.utc).replace(microsecond=0, second=0, minute=0)
export_arns = []
export_type = None

try:
from_time = ssm.get_parameter(Name=SSM_PARAM)["Parameter"]["Value"]
from_time = datetime.fromisoformat(from_time).replace(
microsecond=0, second=0, minute=0
)

# Handle exports longer than 24 hours by splitting into multiple exports
earliest_pitr = ddb_client.describe_continuous_backups(
TableName=ddb_table_name
)["ContinuousBackupsDescription"]["PointInTimeRecoveryDescription"][
"EarliestRestorableDateTime"
]

from_time = max(from_time, earliest_pitr)
days_difference = (to_time - from_time).days + 1
from_times = [from_time + timedelta(days=i) for i in range(days_difference)]

for base_time in from_times:
end_time = min(base_time + timedelta(days=1), to_time)
if end_time == base_time:
continue
response = ddb_client.export_table_to_point_in_time(
TableArn=ddb_table_arn,
S3Bucket=bucket,
S3SseAlgorithm="KMS",
S3SseKmsKeyId=kms_key,
ExportFormat="DYNAMODB_JSON",
ExportType="INCREMENTAL_EXPORT",
IncrementalExportSpecification={
"ExportFromTime": base_time,
"ExportToTime": end_time,
"ExportViewType": "NEW_AND_OLD_IMAGES",
},
)
export_arns.append(response["ExportDescription"]["ExportArn"])
export_type = response["ExportDescription"]["ExportType"]
except ClientError as e:
if e.response["Error"]["Code"] != "ParameterNotFound":
raise
response = ddb_client.export_table_to_point_in_time(
TableArn=ddb_table_arn,
S3Bucket=bucket,
S3SseAlgorithm="KMS",
S3SseKmsKeyId=kms_key,
ExportFormat="DYNAMODB_JSON",
ExportType="FULL_EXPORT",
)
export_arns.append(response["ExportDescription"]["ExportArn"])
export_type = response["ExportDescription"]["ExportType"]

return {
"export_to_time": to_time.isoformat(),
"export_arns": export_arns,
"export_type": export_type,
}
18 changes: 18 additions & 0 deletions lambdas/dynamo_export/ssm_put_param.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import boto3
from botocore.config import Config

ssm = boto3.client(
"ssm",
config=Config(connect_timeout=5, read_timeout=5),
)


def lambda_handler(event, _context):
param_name = "/exports/DynamoExportRuntime"
param_value = event["export_to_time"]
ssm.put_parameter(Name=param_name, Value=param_value, Type="String", Overwrite=True)
return {
"to_time": param_value,
"export_arns": event["export_arns"],
"export_type": event["export_type"],
}
11 changes: 10 additions & 1 deletion terraform/account-wide-infrastructure/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Each subdirectory corresponds to each AWS account (`mgmt`, `prod`, `test` and `d
## Table of Contents

1. [Prerequisites](#prerequisites)
2. [Initialise shell environment](#initialise-shell-environment)
2. [Deploy mgmt resources](#deploy-mgmt-resources)
3. [Deploy account wide resources](#deploy-account-wide-resources)
4. [Tear down account wide resources](#tear-down-account-wide-resources)

Expand Down Expand Up @@ -160,6 +160,15 @@ To disable the PowerBI Gateway from the account:
1. Set the `enable_powerbi_auto_push` variable to `false` in `./ACCOUNT_NAME/vars.tf`
2. Deploy the account-wide infrastructure to the account

#### Dynamo Export

Data from Dynamo is exported to Athena for reporting via a step function using AWS Glue.

This requires point-in-time recovery to be enabled on the desired dynamo table.
This is already enabled on the prod and int pointer tables, but no others.

If you'd like to trigger this step function on any other pointer table e.g. dev, perftest you will need to temporarily [enable point-in-time recovery via the console](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/PointInTimeRecovery_Howitworks.html#howitworks-enable-pitr-console).

## Tear down account wide resources

WARNING - This action will destroy all account-wide resources from the AWS account. This should
Expand Down
7 changes: 7 additions & 0 deletions terraform/account-wide-infrastructure/dev/dynamo_export.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
module "dynamo_export" {
source = "../modules/dynamo_export"
name_prefix = "nhsd-nrlf--dev"
environment = "dev"
pointer_table_name = module.dev-pointers-table.table_name
pointer_table_kms_key_arn = module.dev-pointers-table.kms_key_arn
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
{
"Comment": "execute lambdas",
"StartAt": "DynamoExport",
"States": {
"DynamoExport": {
"Type": "Task",
"Resource": "arn:aws:states:::lambda:invoke",
"OutputPath": "$.Payload",
"Parameters": {
"FunctionName": "${lambda_export_trigger_function_name}",
"Payload.$": "$"
},
"Next": "DynamoExportStatusCheck"
},
"DynamoExportStatusCheck": {
"Type": "Task",
"Resource": "arn:aws:states:::lambda:invoke",
"OutputPath": "$.Payload",
"Parameters": {
"FunctionName": "${lambda_export_poll_function_name}",
"Payload.$": "$"
},
"Next": "Choice"
},
"Choice": {
"Type": "Choice",
"Choices": [
{
"Variable": "$.status",
"StringEquals": "COMPLETED",
"Next": "SSMPut"
},
{
"Variable": "$.status",
"StringEquals": "IN_PROGRESS",
"Next": "WaitState"
},
{
"Variable": "$.status",
"StringEquals": "FAILED",
"Next": "ExportFailure"
}
],
"Default": "FailState"
},
"FailState": {
"Type": "Fail",
"Error": "UnhandledStatus",
"Cause": "Status not recognised"
},
"ExportFailure": {
"Type": "Fail",
"Error": "DynamoExportFailed",
"Cause": "DynamoDB Export Failed"
},
"WaitState": {
"Type": "Wait",
"Seconds": 120,
"Next": "DynamoExportStatusCheck"
},
"SSMPut": {
"Type": "Task",
"Resource": "arn:aws:states:::lambda:invoke",
"OutputPath": "$.Payload",
"Parameters": {
"FunctionName": "${lambda_ssm_put_param_function_name}",
"Payload.$": "$"
},
"Next": "GlueJobTrigger"
},
"GlueJobTrigger": {
"Type": "Task",
"Resource": "arn:aws:states:::glue:startJobRun.sync",
"Parameters": {
"JobName": "${glue_job_name}",
"Arguments": {
"--SOURCE_BUCKET": "${dynamo_export_s3_bucket_name}",
"--TARGET_BUCKET": "${dynamo_export_processed_s3_bucket_name}",
"--DDB_TABLE_ARN": "${ddb_table_arn}",
"--GLUE_CRAWLER_NAME": "${glue_crawler_name}",
"--EXPORT_TYPE.$": "$.export_type"
}
},
"End": true
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
resource "aws_s3_bucket" "dynamodb_output" {
bucket = "${var.name_prefix}-${var.environment}-dynamo-output-bucket"
}

# May need to restrict access to specific IAM roles/principals in future, but helps with testing for now.
data "aws_iam_policy_document" "dynamodb_output" {
statement {
sid = "HTTPSOnly"
effect = "Deny"
actions = ["s3:*"]

principals {
type = "AWS"
identifiers = ["*"]
}

resources = [
aws_s3_bucket.dynamodb_output.arn,
"${aws_s3_bucket.dynamodb_output.arn}/*"
]

condition {
test = "Bool"
variable = "aws:SecureTransport"
values = ["false"]
}
}
}

resource "aws_s3_bucket_policy" "dynamodb_output" {
bucket = aws_s3_bucket.dynamodb_output.id
policy = data.aws_iam_policy_document.dynamodb_output.json
}


resource "aws_s3_bucket_server_side_encryption_configuration" "dynamodb_output" {
bucket = aws_s3_bucket.dynamodb_output.bucket

rule {
apply_server_side_encryption_by_default {
kms_master_key_id = aws_kms_key.dynamo.arn
sse_algorithm = "aws:kms"
}
}
}


resource "aws_s3_bucket_public_access_block" "dynamodb_output_public_access_block" {
bucket = aws_s3_bucket.dynamodb_output.id

block_public_acls = true
block_public_policy = true
ignore_public_acls = true
restrict_public_buckets = true
}

resource "aws_s3_bucket_lifecycle_configuration" "dynamodb_output_lifecycle" {
bucket = aws_s3_bucket.dynamodb_output.id


rule {
id = "object-auto-delete-rule"
status = "Enabled"
filter {}

expiration {
days = 2
}
}
}

resource "aws_s3_bucket_versioning" "dynamodb_output_versioning" {
bucket = aws_s3_bucket.dynamodb_output.id
versioning_configuration {
status = "Enabled"
}
}
Loading
Loading