-
Notifications
You must be signed in to change notification settings - Fork 1
test: grafana SLO #185
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: feat/grafana-dashboards
Are you sure you want to change the base?
test: grafana SLO #185
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,225 @@ | ||
| import { it } from 'node:test'; | ||
| import * as assert from 'node:assert'; | ||
| import { | ||
| GetRoleCommand, | ||
| GetRolePolicyCommand, | ||
| ListRolePoliciesCommand, | ||
| } from '@aws-sdk/client-iam'; | ||
| import type { Dispatcher } from 'undici'; | ||
| import { request } from 'undici'; | ||
| import { Unwrap } from '@pulumi/pulumi'; | ||
| import { backOff } from '../util'; | ||
| import { GrafanaTestContext } from './test-context'; | ||
|
|
||
| const backOffConfig = { numOfAttempts: 15 }; | ||
|
|
||
| export function testGrafanaSloDashboard(ctx: GrafanaTestContext) { | ||
| it('should have created the Prometheus data source', async () => { | ||
| const grafana = ctx.outputs!.grafanaSloComponent; | ||
| const prometheusDataSource = grafana.prometheusDataSource!; | ||
| const prometheusDataSourceName = | ||
| prometheusDataSource.name as unknown as Unwrap< | ||
| typeof prometheusDataSource.name | ||
| >; | ||
|
|
||
| await backOff(async () => { | ||
| const { body, statusCode } = await grafanaRequest( | ||
| ctx, | ||
| 'GET', | ||
| `/api/datasources/name/${encodeURIComponent(prometheusDataSourceName)}`, | ||
| ); | ||
| assert.strictEqual(statusCode, 200, 'Expected data source to exist'); | ||
|
|
||
| const data = (await body.json()) as Record<string, unknown>; | ||
| assert.strictEqual( | ||
| data.type, | ||
| 'grafana-amazonprometheus-datasource', | ||
| 'Expected Amazon Prometheus data source type', | ||
| ); | ||
|
|
||
| const workspace = ctx.outputs!.prometheusWorkspace; | ||
| const prometheusEndpoint = | ||
| workspace.prometheusEndpoint as unknown as Unwrap< | ||
| typeof workspace.prometheusEndpoint | ||
| >; | ||
| assert.ok( | ||
| (data.url as string).includes(prometheusEndpoint.replace(/\/$/, '')), | ||
| 'Expected data source URL to contain the AMP workspace endpoint', | ||
| ); | ||
| }, backOffConfig); | ||
| }); | ||
|
|
||
| it('should have created the dashboard with expected panels', async () => { | ||
| const dashboard = ctx.outputs!.grafanaSloComponent.dashboards[0]; | ||
| const dashboardUid = dashboard.uid as unknown as Unwrap< | ||
| typeof dashboard.uid | ||
| >; | ||
|
|
||
| await backOff(async () => { | ||
| const { body, statusCode } = await grafanaRequest( | ||
| ctx, | ||
| 'GET', | ||
| `/api/dashboards/uid/${dashboardUid}`, | ||
| ); | ||
| assert.strictEqual(statusCode, 200, 'Expected dashboard to exist'); | ||
|
|
||
| const data = (await body.json()) as { | ||
| dashboard: { title: string; panels: Array<{ title: string }> }; | ||
| }; | ||
| assert.strictEqual( | ||
| data.dashboard.title, | ||
| 'ICB Grafana Test SLO', | ||
| 'Expected dashboard title to match', | ||
| ); | ||
|
|
||
| const panelTitles = data.dashboard.panels.map(p => p.title).sort(); | ||
| const expectedPanels = [ | ||
| 'Availability', | ||
| 'Availability Burn Rate', | ||
| 'Success Rate', | ||
| 'Success Rate Burn Rate', | ||
| 'HTTP Request Success Rate', | ||
| 'Request % below 250ms', | ||
| 'Latency Burn Rate', | ||
| '99th Percentile Latency', | ||
| 'Request percentage below 250ms', | ||
| 'Custom Panel', | ||
| ]; | ||
| assert.deepStrictEqual( | ||
| panelTitles, | ||
| expectedPanels.sort(), | ||
| 'Dashboard panels do not match expected panels', | ||
| ); | ||
| }, backOffConfig); | ||
| }); | ||
|
|
||
| it('should display metrics data in the dashboard', async () => { | ||
| await requestEndpointWithExpectedStatus(ctx, ctx.config.usersPath, 200); | ||
|
|
||
| const prometheusDataSource = | ||
| ctx.outputs!.grafanaSloComponent.prometheusDataSource!; | ||
| const prometheusDataSourceName = | ||
| prometheusDataSource.name as unknown as Unwrap< | ||
| typeof prometheusDataSource.name | ||
| >; | ||
| const { body: dsBody } = await grafanaRequest( | ||
| ctx, | ||
| 'GET', | ||
| `/api/datasources/name/${encodeURIComponent(prometheusDataSourceName)}`, | ||
| ); | ||
| const dsData = (await dsBody.json()) as Record<string, unknown>; | ||
| const dataSourceUid = dsData.uid as string; | ||
|
|
||
| await backOff(async () => { | ||
| const { body, statusCode } = await grafanaRequest( | ||
| ctx, | ||
| 'POST', | ||
| '/api/ds/query', | ||
| { | ||
| queries: [ | ||
| { | ||
| datasource: { | ||
| type: 'grafana-amazonprometheus-datasource', | ||
| uid: dataSourceUid, | ||
| }, | ||
| expr: `{__name__=~"${ctx.config.prometheusNamespace}_.*"}`, | ||
| instant: true, | ||
| refId: 'A', | ||
| }, | ||
| ], | ||
| from: 'now-5m', | ||
| to: 'now', | ||
| }, | ||
| ); | ||
| assert.strictEqual(statusCode, 200, 'Expected query to succeed'); | ||
|
|
||
| const data = (await body.json()) as { | ||
| results: Record<string, { frames: Array<unknown> }>; | ||
| }; | ||
| const frames = data.results?.A?.frames ?? []; | ||
| assert.ok( | ||
| frames.length > 0, | ||
| `Expected Grafana to return metric frames for namespace '${ctx.config.prometheusNamespace}'`, | ||
| ); | ||
| }, backOffConfig); | ||
| }); | ||
|
|
||
| it('should have created the IAM role with AMP inline policy', async () => { | ||
| const iamRole = ctx.outputs!.grafanaSloComponent.grafanaIamRole; | ||
| const grafanaAmpRoleArn = iamRole.arn as unknown as Unwrap< | ||
| typeof iamRole.arn | ||
| >; | ||
| const roleName = grafanaAmpRoleArn.split('/').pop()!; | ||
| const { Role } = await ctx.clients.iam.send( | ||
| new GetRoleCommand({ RoleName: roleName }), | ||
| ); | ||
| assert.ok(Role, 'Grafana IAM role should exist'); | ||
|
|
||
| const { PolicyNames } = await ctx.clients.iam.send( | ||
| new ListRolePoliciesCommand({ RoleName: roleName }), | ||
| ); | ||
| assert.ok( | ||
| PolicyNames && PolicyNames.length > 0, | ||
| 'IAM role should have at least one inline policy', | ||
| ); | ||
|
|
||
| const { PolicyDocument } = await ctx.clients.iam.send( | ||
| new GetRolePolicyCommand({ | ||
| RoleName: roleName, | ||
| PolicyName: PolicyNames[0], | ||
| }), | ||
| ); | ||
| const policy = JSON.parse(decodeURIComponent(PolicyDocument!)) as { | ||
| Statement: Array<{ Action: string[] }>; | ||
| }; | ||
| const actions = policy.Statement.flatMap(s => s.Action).sort(); | ||
| const expectedActions = [ | ||
| 'aps:GetSeries', | ||
| 'aps:GetLabels', | ||
| 'aps:GetMetricMetadata', | ||
| 'aps:QueryMetrics', | ||
| ].sort(); | ||
| assert.deepStrictEqual( | ||
| actions, | ||
| expectedActions, | ||
| 'AMP policy actions do not match expected actions', | ||
| ); | ||
| }); | ||
| } | ||
|
|
||
| async function requestEndpointWithExpectedStatus( | ||
| ctx: GrafanaTestContext, | ||
| path: string, | ||
| expectedStatus: number, | ||
| ): Promise<void> { | ||
| await backOff(async () => { | ||
| const webServer = ctx.outputs!.webServer; | ||
| const dnsName = webServer.lb.lb.dnsName as unknown as Unwrap< | ||
| typeof webServer.lb.lb.dnsName | ||
| >; | ||
| const endpoint = `http://${dnsName}${path}`; | ||
| const response = await request(endpoint); | ||
| assert.strictEqual( | ||
| response.statusCode, | ||
| expectedStatus, | ||
| `Endpoint ${endpoint} should return ${expectedStatus}`, | ||
| ); | ||
| }, backOffConfig); | ||
| } | ||
|
|
||
| async function grafanaRequest( | ||
| ctx: GrafanaTestContext, | ||
| method: Dispatcher.HttpMethod, | ||
| path: string, | ||
| body?: unknown, | ||
| ) { | ||
| const url = `${ctx.config.grafanaUrl.replace(/\/$/, '')}${path}`; | ||
| return request(url, { | ||
| method, | ||
| headers: { | ||
| Authorization: `Bearer ${ctx.config.grafanaAuth}`, | ||
| 'Content-Type': 'application/json', | ||
| }, | ||
| body: body !== undefined ? JSON.stringify(body) : undefined, | ||
| }); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| import { before, describe, after } from 'node:test'; | ||
| import { InlineProgramArgs, OutputMap } from '@pulumi/pulumi/automation'; | ||
| import { IAMClient } from '@aws-sdk/client-iam'; | ||
| import * as automation from '../automation'; | ||
| import { requireEnv, unwrapOutputs } from '../util'; | ||
| import { testGrafanaSloDashboard } from './grafana-slo-dashboard.test'; | ||
| import * as infraConfig from './infrastructure/config'; | ||
| import { GrafanaTestContext, ProgramOutput } from './test-context'; | ||
|
|
||
| const programArgs: InlineProgramArgs = { | ||
| stackName: 'dev', | ||
| projectName: 'icb-test-grafana', | ||
| program: () => import('./infrastructure'), | ||
| }; | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should other required env variables be explicitly required at this stage? Or can they be omitted? |
||
| const region = requireEnv('AWS_REGION'); | ||
| const ctx: GrafanaTestContext = { | ||
| config: { | ||
| region, | ||
| usersPath: infraConfig.usersPath, | ||
| appName: infraConfig.appName, | ||
| prometheusNamespace: infraConfig.prometheusNamespace, | ||
| grafanaUrl: requireEnv('GRAFANA_URL'), | ||
| grafanaAuth: requireEnv('GRAFANA_AUTH'), | ||
| }, | ||
| clients: { | ||
| iam: new IAMClient({ region }), | ||
| }, | ||
| }; | ||
|
|
||
| describe('Grafana component deployment', () => { | ||
| before(async () => { | ||
| const outputs: OutputMap = await automation.deploy(programArgs); | ||
| ctx.outputs = unwrapOutputs<ProgramOutput>(outputs); | ||
| }); | ||
|
|
||
| after(() => automation.destroy(programArgs)); | ||
|
|
||
| describe('SLO dashboard', () => testGrafanaSloDashboard(ctx)); | ||
| }); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| export const appName = 'grafana-test'; | ||
|
|
||
| export const appImage = 'studiondev/observability-sample-app'; | ||
| export const appPort = 3000; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add empty line above to be consistent with the rest of the file. |
||
|
|
||
| export const usersPath = '/users'; | ||
|
|
||
| export const prometheusNamespace = 'icb_grafana_integration'; | ||
|
|
||
| export const apiFilter = 'http_route=~"/.*"'; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,100 @@ | ||
| import * as aws from '@pulumi/aws'; | ||
| import * as pulumi from '@pulumi/pulumi'; | ||
| import * as studion from '@studion/infra-code-blocks'; | ||
| import { getCommonVpc } from '../../util'; | ||
| import { | ||
| appImage, | ||
| appPort, | ||
| appName, | ||
| prometheusNamespace, | ||
| apiFilter, | ||
| } from './config'; | ||
|
|
||
| const stackName = pulumi.getStack(); | ||
| const parent = new pulumi.ComponentResource( | ||
| 'studion:grafana:TestGroup', | ||
| `${appName}-root`, | ||
| ); | ||
| const tags = { | ||
| Env: stackName, | ||
| Project: appName, | ||
| }; | ||
|
|
||
| const vpc = getCommonVpc(); | ||
| const cluster = new aws.ecs.Cluster(`${appName}-cluster`, { tags }, { parent }); | ||
|
|
||
| const prometheusWorkspace = new aws.amp.Workspace( | ||
| `${appName}-workspace`, | ||
| { tags }, | ||
| { parent }, | ||
| ); | ||
|
|
||
| const cloudWatchLogGroup = new aws.cloudwatch.LogGroup( | ||
| `${appName}-log-group`, | ||
| { | ||
| name: `/grafana/test/${appName}-${stackName}`, | ||
| tags, | ||
| }, | ||
| { parent }, | ||
| ); | ||
|
|
||
| const otelCollector = new studion.openTelemetry.OtelCollectorBuilder( | ||
| appName, | ||
| stackName, | ||
| ) | ||
| .withDefault({ | ||
| prometheusNamespace, | ||
| prometheusWorkspace, | ||
| region: aws.config.requireRegion(), | ||
| logGroup: cloudWatchLogGroup, | ||
| logStreamName: `${appName}-stream`, | ||
| }) | ||
| .build(); | ||
|
|
||
| const ecs = { | ||
| cluster, | ||
| desiredCount: 1, | ||
| size: 'small' as const, | ||
| autoscaling: { enabled: false }, | ||
| }; | ||
|
|
||
| const webServer = new studion.WebServerBuilder(appName) | ||
| .withContainer(appImage, appPort, { | ||
| environment: [ | ||
| { name: 'OTEL_SERVICE_NAME', value: appName }, | ||
| { name: 'OTEL_EXPORTER_OTLP_ENDPOINT', value: 'http://127.0.0.1:4318' }, | ||
| { name: 'OTEL_EXPORTER_OTLP_PROTOCOL', value: 'http/json' }, | ||
| ], | ||
| }) | ||
| .withEcsConfig(ecs) | ||
| .withVpc(vpc.vpc) | ||
| .withOtelCollector(otelCollector) | ||
| .build({ parent }); | ||
|
|
||
| const grafanaSloDashboard = | ||
| new studion.grafana.dashboard.WebServerSloDashboardBuilder( | ||
| `${appName}-slo-dashboard`, | ||
| { title: 'ICB Grafana Test SLO' }, | ||
| ) | ||
| .withAvailability(0.99, '1d', prometheusNamespace) | ||
| .withSuccessRate(0.95, '1d', '1h', apiFilter, prometheusNamespace) | ||
| .withLatency(0.95, 250, '1d', '1h', apiFilter, prometheusNamespace) | ||
| .addPanel(dataSource => ({ | ||
| title: 'Custom Panel', | ||
| type: 'timeseries', | ||
| gridPos: { x: 12, y: 24, w: 12, h: 8 }, | ||
| datasource: dataSource.prometheus!, | ||
| targets: [{ expr: 'up', legendFormat: 'Up' }], | ||
| fieldConfig: { defaults: {} }, | ||
| })) | ||
| .build(); | ||
|
|
||
| const grafanaSloComponent = new studion.grafana.GrafanaBuilder(`${appName}-slo`) | ||
| .withPrometheus({ | ||
| endpoint: prometheusWorkspace.prometheusEndpoint, | ||
| region: aws.config.requireRegion(), | ||
| }) | ||
| .addDashboard(grafanaSloDashboard) | ||
| .build({ parent }); | ||
|
|
||
| export { webServer, prometheusWorkspace, grafanaSloComponent }; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems that the creation of the Prometheus data source, the AMP role policy, and the Grafana IAM role should be tested inside
index.test.tsusing a dedicated resource (e.g. grafanaMinimal) as that is a common logic.