Skip to content

Commit 97699b8

Browse files
committed
productionize
1 parent 513bf9d commit 97699b8

2 files changed

Lines changed: 56 additions & 29 deletions

File tree

src/lib.rs

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,13 @@ use axum::middleware::{Next, from_fn_with_state};
2424
use axum::response::{IntoResponse, Response};
2525
use axum::routing::{get, post};
2626
use sqlx::PgPool;
27-
use tower_http::trace::{MakeSpan, TraceLayer};
27+
use tower::ServiceBuilder;
28+
use tower_http::{
29+
catch_panic::CatchPanicLayer,
30+
request_id::{MakeRequestUuid, PropagateRequestIdLayer, SetRequestIdLayer},
31+
timeout::TimeoutLayer,
32+
trace::{MakeSpan, TraceLayer},
33+
};
2834

2935
use config::Config;
3036
use error::{ApiError, DbPoolTimeout};
@@ -296,7 +302,17 @@ pub fn build_router(state: AppState) -> Router {
296302
.route("/metrics", get(metrics_handler))
297303
.route("/SimpleNotificationService.pem", get(serve_signing_cert))
298304
.route_layer(from_fn_with_state(state.clone(), record_metrics))
299-
.layer(TraceLayer::new_for_http().make_span_with(OtelMakeSpan))
305+
.layer(
306+
ServiceBuilder::new()
307+
.layer(SetRequestIdLayer::x_request_id(MakeRequestUuid))
308+
.layer(PropagateRequestIdLayer::x_request_id())
309+
.layer(TraceLayer::new_for_http().make_span_with(OtelMakeSpan))
310+
.layer(TimeoutLayer::with_status_code(
311+
axum::http::StatusCode::REQUEST_TIMEOUT,
312+
Duration::from_secs(30),
313+
))
314+
.layer(CatchPanicLayer::new()),
315+
)
300316
.with_state(state)
301317
}
302318

@@ -318,11 +334,11 @@ async fn record_metrics(State(state): State<AppState>, req: Request, next: Next)
318334
let response = next.run(req).await;
319335

320336
state.metrics.http_connections_active.dec();
321-
let status = response.status().as_u16().to_string();
337+
let status = response.status().as_u16();
322338
state
323339
.metrics
324340
.http_requests_total
325-
.with_label_values(&[method.as_str(), &path, &status])
341+
.with_label_values(&[method.as_str(), &path, &status.to_string()])
326342
.inc();
327343
timer.observe(start.elapsed().as_secs_f64());
328344
let pool_size = state.pool.size() as usize;
@@ -337,6 +353,8 @@ async fn record_metrics(State(state): State<AppState>, req: Request, next: Next)
337353
state.metrics.db_pool_acquire_timeouts_total.inc();
338354
}
339355

356+
tracing::Span::current().record("http.status_code", status);
357+
340358
response
341359
}
342360

src/metrics.rs

Lines changed: 34 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ macro_rules! define_metrics {
1111
$(
1212
$metric_type:ident $field:ident($metric_name:literal)
1313
$([$($label:literal),+ $(,)?])?
14-
$(buckets = [$($bucket:expr),+ $(,)?])?
14+
$(buckets = $buckets:expr)?
1515
=> $help:literal
1616
),* $(,)?
1717
}
@@ -29,7 +29,7 @@ macro_rules! define_metrics {
2929
let $field = define_metrics!(
3030
@create $metric_type $metric_name $help
3131
$([$($label),+])?
32-
$(buckets = [$($bucket),+])?
32+
$(buckets = $buckets)?
3333
);
3434
registry.register(Box::new($field.clone())).expect("metric not yet registered");
3535
)*
@@ -86,68 +86,77 @@ macro_rules! define_metrics {
8686
(@create histogram $name:literal $help:literal) => {
8787
Histogram::with_opts(HistogramOpts::new($name, $help)).expect("valid metric")
8888
};
89-
(@create histogram $name:literal $help:literal buckets = [$($bucket:expr),+]) => {
89+
(@create histogram $name:literal $help:literal buckets = $buckets:expr) => {
9090
Histogram::with_opts(
91-
HistogramOpts::new($name, $help).buckets(vec![$($bucket),+])
91+
HistogramOpts::new($name, $help).buckets($buckets.to_vec())
9292
).expect("valid metric")
9393
};
9494
(@create histogram $name:literal $help:literal [$($label:literal),+]) => {
9595
HistogramVec::new(HistogramOpts::new($name, $help), &[$($label),+]).expect("valid metric")
9696
};
97-
(@create histogram $name:literal $help:literal [$($label:literal),+] buckets = [$($bucket:expr),+]) => {
97+
(@create histogram $name:literal $help:literal [$($label:literal),+] buckets = $buckets:expr) => {
9898
HistogramVec::new(
99-
HistogramOpts::new($name, $help).buckets(vec![$($bucket),+]),
99+
HistogramOpts::new($name, $help).buckets($buckets.to_vec()),
100100
&[$($label),+],
101101
).expect("valid metric")
102102
};
103103
(@create histogram_vec $name:literal $help:literal [$($label:literal),+]) => {
104104
HistogramVec::new(HistogramOpts::new($name, $help), &[$($label),+]).expect("valid metric")
105105
};
106-
(@create histogram_vec $name:literal $help:literal [$($label:literal),+] buckets = [$($bucket:expr),+]) => {
106+
(@create histogram_vec $name:literal $help:literal [$($label:literal),+] buckets = $buckets:expr) => {
107107
HistogramVec::new(
108-
HistogramOpts::new($name, $help).buckets(vec![$($bucket),+]),
108+
HistogramOpts::new($name, $help).buckets($buckets.to_vec()),
109109
&[$($label),+],
110110
).expect("valid metric")
111111
};
112112
}
113113

114+
// Bucket sets grouped by operation latency profile.
115+
116+
/// HTTP request latency — from 5ms fast-path to 2.5s slow requests.
117+
const HTTP_BUCKETS: &[f64] = &[0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5];
118+
/// Database-backed operations — 1ms fast-path through 1s slow queries.
119+
const DB_OP_BUCKETS: &[f64] = &[0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0];
120+
/// Outbound HTTP webhook delivery — 50ms minimum round-trip to 10s timeout.
121+
const DELIVERY_BUCKETS: &[f64] = &[0.05, 0.1, 0.25, 0.5, 1.0, 2.0, 5.0, 10.0];
122+
114123
define_metrics! {
115124
pub struct Metrics {
116125
counter_vec http_requests_total("http_requests_total")["method", "path", "status"]
117126
=> "Total HTTP requests",
118127
histogram_vec http_request_duration_seconds("http_request_duration_seconds")["method", "path"]
119-
buckets = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5]
128+
buckets = HTTP_BUCKETS
120129
=> "HTTP request duration in seconds",
121130
gauge http_connections_active("http_connections_active")
122131
=> "HTTP requests currently in flight",
123-
counter_vec messages_sent_total("messages_sent_total")["queue"]
132+
counter_vec messages_sent_total("queue_messages_sent_total")["queue"]
124133
=> "Total messages enqueued",
125-
counter_vec messages_received_total("messages_received_total")["queue"]
134+
counter_vec messages_received_total("queue_messages_received_total")["queue"]
126135
=> "Total messages delivered to consumers",
127-
counter_vec messages_deleted_total("messages_deleted_total")["queue"]
136+
counter_vec messages_deleted_total("queue_messages_deleted_total")["queue"]
128137
=> "Total messages deleted (acknowledged)",
129-
counter_vec messages_redelivered_total("messages_redelivered_total")["queue"]
138+
counter_vec messages_redelivered_total("queue_messages_redelivered_total")["queue"]
130139
=> "Total messages received with read_count > 1 (consumer did not ack before vt expiry)",
131-
histogram_vec message_send_duration_seconds("message_send_duration_seconds")["queue"]
132-
buckets = [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0]
140+
histogram_vec message_send_duration_seconds("queue_message_send_duration_seconds")["queue"]
141+
buckets = DB_OP_BUCKETS
133142
=> "Message send operation duration in seconds",
134-
histogram_vec message_receive_duration_seconds("message_receive_duration_seconds")["queue"]
135-
buckets = [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0]
143+
histogram_vec message_receive_duration_seconds("queue_message_receive_duration_seconds")["queue"]
144+
buckets = DB_OP_BUCKETS
136145
=> "Message receive operation duration in seconds",
137-
histogram_vec message_delete_duration_seconds("message_delete_duration_seconds")["queue"]
138-
buckets = [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0]
146+
histogram_vec message_delete_duration_seconds("queue_message_delete_duration_seconds")["queue"]
147+
buckets = DB_OP_BUCKETS
139148
=> "Message delete operation duration in seconds",
140-
histogram_vec message_age_at_receive_seconds("message_age_at_receive_seconds")["queue"]
149+
histogram_vec message_age_at_receive_seconds("queue_message_age_at_receive_seconds")["queue"]
141150
buckets = [0.1, 0.5, 1.0, 5.0, 15.0, 30.0, 60.0, 300.0, 900.0, 3600.0]
142151
=> "Message age when received (time from enqueue to first delivery) in seconds",
143-
counter_vec delivery_attempts_total("delivery_attempts_total")["outcome"]
152+
counter_vec delivery_attempts_total("queue_delivery_attempts_total")["outcome"]
144153
=> "HTTP webhook delivery attempts by outcome (success|failure)",
145-
histogram_vec delivery_attempt_duration_seconds("delivery_attempt_duration_seconds")["outcome"]
146-
buckets = [0.05, 0.1, 0.25, 0.5, 1.0, 2.0, 5.0, 10.0]
154+
histogram_vec delivery_attempt_duration_seconds("queue_delivery_attempt_duration_seconds")["outcome"]
155+
buckets = DELIVERY_BUCKETS
147156
=> "HTTP webhook delivery attempt duration in seconds",
148-
counter delivery_exhausted_total("delivery_exhausted_total")
157+
counter delivery_exhausted_total("queue_delivery_exhausted_total")
149158
=> "Webhook deliveries permanently abandoned after exhausting max_attempts",
150-
histogram coalescer_flush_batch_size("coalescer_flush_batch_size")
159+
histogram coalescer_flush_batch_size("queue_coalescer_flush_batch_size")
151160
buckets = [1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 100.0, 500.0, 1000.0]
152161
=> "Number of messages per coalescer flush batch",
153162
gauge_vec queue_depth("queue_depth")["queue"]

0 commit comments

Comments
 (0)