Skip to content

Commit 349a45a

Browse files
Bug 1997631 - Add telemetry for database loads and migrations to Nimbus SDK
There are now two new events, `nimbus_events.database_load`, which is emitted every time we attempt to load the database (i.e., once per session), and `nimbus_events.database_migration`, which is emitted every time we attempt a database migration. These events should give us insight into how often we are experiencing database corruption leading to enrollment state being dropped.
1 parent dd910ef commit 349a45a

14 files changed

Lines changed: 926 additions & 102 deletions

File tree

components/nimbus/android/src/main/java/org/mozilla/experiments/nimbus/Nimbus.kt

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ import org.mozilla.experiments.nimbus.GleanMetrics.NimbusHealth
3232
import org.mozilla.experiments.nimbus.GleanMetrics.Pings
3333
import org.mozilla.experiments.nimbus.internal.AppContext
3434
import org.mozilla.experiments.nimbus.internal.AvailableExperiment
35+
import org.mozilla.experiments.nimbus.internal.DatabaseLoadExtraDef
36+
import org.mozilla.experiments.nimbus.internal.DatabaseMigrationExtraDef
3537
import org.mozilla.experiments.nimbus.internal.EnrolledExperiment
3638
import org.mozilla.experiments.nimbus.internal.EnrollmentChangeEvent
3739
import org.mozilla.experiments.nimbus.internal.EnrollmentChangeEventType
@@ -83,6 +85,27 @@ open class Nimbus(
8385
private val logger = delegate.logger
8486

8587
private val metricsHandler = object : MetricsHandler {
88+
override fun recordDatabaseLoad(event: DatabaseLoadExtraDef) {
89+
NimbusEvents.databaseLoad.record(
90+
NimbusEvents.DatabaseLoadExtra(
91+
corrupt = event.corrupt,
92+
initialversion = event.initialVersion,
93+
error = event.error,
94+
migratedVersion = event.migratedVersion,
95+
migrationError = event.migrationError,
96+
)
97+
)
98+
}
99+
override fun recordDatabaseMigration(event: DatabaseMigrationExtraDef) {
100+
NimbusEvents.databaseMigrated.record(
101+
NimbusEvents.DatabaseMigratedExtra(
102+
reason = event.reason,
103+
fromVersion = event.fromVersion,
104+
toVersion = event.toVersion,
105+
error = event.error,
106+
)
107+
)
108+
}
86109
override fun recordEnrollmentStatuses(enrollmentStatusExtras: List<EnrollmentStatusExtraDef>) {
87110
for (extra in enrollmentStatusExtras) {
88111
NimbusEvents.enrollmentStatus.record(

components/nimbus/metrics.yaml

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,89 @@ nimbus_events:
167167
expires: never
168168
# Disabled by default. This needs a server-knobs rollout to ensure the volume is not overwhelming.
169169
disabled: true
170+
171+
database_load:
172+
type: event
173+
description: >
174+
An event recorded when the Nimbus database is loaded.
175+
176+
extra_keys:
177+
corrupt:
178+
type: boolean
179+
description: >
180+
Whether or not the database was corrupted when we tried to load it.
181+
182+
This field may be empty, e.g., if the database could not be loaded.
183+
184+
initial_version:
185+
type: quantity
186+
description: >
187+
The version of the database on disk at load time.
188+
189+
This field may be empty, e.g., if we cannot create a database.
190+
191+
error:
192+
type: string
193+
description: >
194+
If an error occured during the initial load of the database this field
195+
will contain the relevant error code.
196+
197+
migrated_version:
198+
type: quantity
199+
description: >
200+
The version of the database that was migrated to.
201+
202+
This field will be empty if no migration(s) occurred.
203+
204+
migration_error:
205+
type: string
206+
description: >
207+
If an error occurred during the initial load of the database this
208+
field will contain the relevant error code.
209+
210+
bugs:
211+
- https://bugzilla.mozilla.org/show_bug.cgi?id=1997631
212+
data_reviews:
213+
- https://bugzilla.mozilla.org/show_bug.cgi?id=1997631
214+
data_sensitivity:
215+
- technical
216+
notification_emails:
217+
- beth@mozilla.com
218+
- project-nimbus@mozilla.com
219+
expires: never
220+
221+
database_migration:
222+
type: events
223+
description: >
224+
An event recorded when a database migration occurs.
225+
226+
extra_keys:
227+
reason:
228+
type: string
229+
description: >
230+
The reason the migration occurred.
231+
232+
This is one of:
233+
234+
- "upgrade"
235+
- "invalid_version"
236+
237+
from_version:
238+
type: quantity
239+
description: >
240+
The original version of the database. A value of 0 indicates that
241+
the db_version was not set in the meta store.
242+
243+
to_version:
244+
type: quantity
245+
description: >
246+
The target version of the migration that was performed.
247+
248+
error:
249+
type: string
250+
description: >
251+
If an error occurred during the migration, the relevant error code.
252+
170253
exposure:
171254
type: event
172255
description: >

components/nimbus/src/error.rs

Lines changed: 88 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
//! TODO: Implement proper error handling, this would include defining the error enum,
99
//! impl std::error::Error using `thiserror` and ensuring all errors are handled appropriately
1010
11+
use std::borrow::Cow;
1112
use std::num::{ParseIntError, TryFromIntError};
1213

1314
// reexport logging helpers.
@@ -71,12 +72,12 @@ pub enum NimbusError {
7172
#[error("TryInto error: {0}")]
7273
TryFromSliceError(#[from] std::array::TryFromSliceError),
7374

74-
#[error("Error parsing URL: {0}")]
75-
UrlParsingError(#[from] url::ParseError),
76-
7775
#[error("UniFFI callback error: {0}")]
7876
UniFFICallbackError(#[from] uniffi::UnexpectedUniFFICallbackError),
7977

78+
#[error("Error parsing URL: {0}")]
79+
UrlParsingError(#[from] url::ParseError),
80+
8081
#[error("UUID parsing error: {0}")]
8182
UuidError(#[from] uuid::Error),
8283

@@ -92,14 +93,14 @@ pub enum NimbusError {
9293
#[error("Error with Remote Settings client: {0}")]
9394
ClientError(#[from] remote_settings::RemoteSettingsError),
9495

95-
#[cfg(feature = "stateful")]
96-
#[error("Rkv error: {0}")]
97-
RkvError(#[from] rkv::StoreError),
98-
9996
#[cfg(feature = "stateful")]
10097
#[error("Regex error: {0}")]
10198
RegexError(#[from] regex::Error),
10299

100+
#[cfg(feature = "stateful")]
101+
#[error("Rkv error: {0}")]
102+
RkvError(#[from] rkv::StoreError),
103+
103104
// Cirrus-only errors.
104105
#[cfg(not(feature = "stateful"))]
105106
#[error("Error in Cirrus: {0}")]
@@ -162,3 +163,83 @@ impl From<VersionParsingError> for NimbusError {
162163
}
163164

164165
pub type Result<T, E = NimbusError> = std::result::Result<T, E>;
166+
167+
/// An Error extension trait that allows simplified error codes to be submitted
168+
/// in telemetry.
169+
pub trait ErrorCode: std::error::Error {
170+
/// Return the error code for the given error.
171+
fn error_code(&self) -> Cow<'static, str>;
172+
}
173+
174+
#[cfg(feature = "stateful")]
175+
impl ErrorCode for NimbusError {
176+
fn error_code(&self) -> Cow<'static, str> {
177+
match self {
178+
Self::BehaviorError(e) => format!("BehaviorError({})", e.error_code()).into(),
179+
Self::ClientError(..) => "ClientError".into(),
180+
Self::DatabaseNotReady => "DatabaseNotReady".into(),
181+
Self::EmptyRatiosError => "EmptyRatiosError".into(),
182+
Self::EvaluationError(..) => "EvaluationError".into(),
183+
Self::IOError(e) => format!("IOError({:?})", e.kind()).into(),
184+
Self::InternalError(..) => "InternalError".into(),
185+
Self::InvalidExperimentFormat => "InvalidExperimentFormat".into(),
186+
Self::InvalidExpression => "InvalidExpression".into(),
187+
Self::InvalidFraction => "InvalidFraction".into(),
188+
Self::InvalidPath(..) => "InvalidPath".into(),
189+
Self::InvalidPersistedData => "InvalidPersistedData".into(),
190+
Self::JSONError(..) => "JSONError".into(),
191+
Self::NoSuchBranch(..) => "NoSuchBranch".into(),
192+
Self::NoSuchExperiment(..) => "NoSuchExperiment".into(),
193+
Self::OutOfBoundsError => "OutOfBoundsError".into(),
194+
Self::ParseIntError(..) => "ParseIntError".into(),
195+
Self::RegexError(..) => "RegexError".into(),
196+
Self::RkvError(e) => format!("RkvError({})", e.error_code()).into(),
197+
Self::TransformParameterError(..) => "TransformParameterError".into(),
198+
Self::TryFromIntError(..) => "TryFromIntError".into(),
199+
Self::TryFromSliceError(..) => "TryFromSliceError".into(),
200+
Self::UniFFICallbackError(..) => "UniFFICallbackError".into(),
201+
Self::UrlParsingError(..) => "UrlParsingError".into(),
202+
Self::UuidError(..) => "UuidError".into(),
203+
Self::VersionParsingError(..) => "VersionParsingError".into(),
204+
}
205+
}
206+
}
207+
208+
#[cfg(feature = "stateful")]
209+
impl ErrorCode for rkv::StoreError {
210+
fn error_code(&self) -> Cow<'static, str> {
211+
match self {
212+
Self::ManagerPoisonError => "ManagerPoisonError".into(),
213+
Self::DatabaseCorrupted => "DatabaseCorrupted".into(),
214+
Self::KeyValuePairNotFound => "KeyValuePairNotFound".into(),
215+
Self::KeyValuePairBadSize => "KeyValuePairBadSize".into(),
216+
Self::FileInvalid => "FileInvalid".into(),
217+
Self::MapFull => "MapFull".into(),
218+
Self::DbsFull => "DbsFull".into(),
219+
Self::ReadersFull => "ReadersFull".into(),
220+
Self::IoError(e) => format!("IoError({:?})", e.kind()).into(),
221+
Self::UnsuitableEnvironmentPath(..) => "UnsuitableEnvironmentPath".into(),
222+
Self::DataError(..) => "DataError".into(),
223+
Self::SafeModeError(..) => "SafeModeError".into(),
224+
Self::ReadTransactionAlreadyExists(..) => "ReadTransactionAlreadyExists".into(),
225+
Self::OpenAttemptedDuringTransaction(..) => "OpenAttemptedDuringTransaction".into(),
226+
}
227+
}
228+
}
229+
230+
#[cfg(feature = "stateful")]
231+
impl ErrorCode for BehaviorError {
232+
fn error_code(&self) -> Cow<'static, str> {
233+
match self {
234+
Self::EventQueryParseError(..) => "EventQueryParseError",
235+
Self::EventQueryTypeParseError(..) => "EventQueryTypeParseError",
236+
Self::IntervalParseError(..) => "IntervalParseError",
237+
Self::InvalidDuration(..) => "InvalidDuration",
238+
Self::InvalidState(..) => "InvalidState",
239+
Self::MissingEventStore => "MissingEventStore",
240+
Self::MissingRecordedContext => "MissingRecordedContext",
241+
Self::TypeError(..) => "TypeError",
242+
}
243+
.into()
244+
}
245+
}

components/nimbus/src/metrics.rs

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,10 @@ mod detail {
7070

7171
#[uniffi::trait_interface]
7272
pub trait MetricsHandler: Send + Sync {
73+
fn record_database_load(&self, event: DatabaseLoadExtraDef);
74+
75+
fn record_database_migration(&self, event: DatabaseMigrationExtraDef);
76+
7377
fn record_enrollment_statuses(
7478
&self,
7579
enrollment_status_extras: Vec<EnrollmentStatusExtraDef>,
@@ -101,8 +105,8 @@ mod detail {
101105
}
102106
}
103107

104-
#[derive(Clone, Default)]
105-
#[cfg_attr(test, derive(Debug, Eq, PartialEq))]
108+
#[derive(Default)]
109+
#[cfg_attr(test, derive(Clone, Debug, Eq, PartialEq))]
106110
pub struct MalformedFeatureConfigExtraDef {
107111
pub slug: Option<String>,
108112
pub branch: Option<String>,
@@ -128,6 +132,24 @@ mod detail {
128132
}
129133
}
130134
}
135+
136+
#[derive(Default)]
137+
#[cfg_attr(test, derive(Clone, Debug, Eq, PartialEq))]
138+
pub struct DatabaseLoadExtraDef {
139+
pub corrupt: Option<bool>,
140+
pub error: Option<String>,
141+
pub initial_version: Option<u16>,
142+
pub migrated_version: Option<u16>,
143+
pub migration_error: Option<String>,
144+
}
145+
146+
#[cfg_attr(test, derive(Clone, Debug, Eq, PartialEq))]
147+
pub struct DatabaseMigrationExtraDef {
148+
pub reason: String,
149+
pub from_version: u16,
150+
pub to_version: u16,
151+
pub error: Option<String>,
152+
}
131153
}
132154

133155
#[cfg(not(feature = "stateful"))]

components/nimbus/src/nimbus.udl

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ enum EnrollmentChangeEventType {
8585

8686
[Trait, WithForeign]
8787
interface MetricsHandler {
88+
void record_database_load(DatabaseLoadExtraDef event);
89+
90+
void record_database_migration(DatabaseMigrationExtraDef event);
91+
8892
void record_enrollment_statuses(sequence<EnrollmentStatusExtraDef> enrollment_status_extras);
8993
/// Feature activation is the pre-cursor to feature exposure: it is defined as the first time
9094
/// the feature configuration is asked for.
@@ -97,6 +101,21 @@ interface MetricsHandler {
97101
void submit_targeting_context();
98102
};
99103

104+
dictionary DatabaseLoadExtraDef {
105+
boolean? corrupt;
106+
string? error;
107+
u16? initial_version;
108+
u16? migrated_version;
109+
string? migration_error;
110+
};
111+
112+
dictionary DatabaseMigrationExtraDef {
113+
string reason;
114+
u16 from_version;
115+
u16 to_version;
116+
string? error;
117+
};
118+
100119
dictionary EnrollmentStatusExtraDef {
101120
string? branch;
102121
string? conflict_slug;
@@ -107,7 +126,6 @@ dictionary EnrollmentStatusExtraDef {
107126
sequence<PreviousGeckoPrefState>? prev_gecko_pref_states;
108127
};
109128

110-
111129
dictionary PreviousGeckoPrefState {
112130
OriginalGeckoPref original_value;
113131
string feature_id;

components/nimbus/src/stateful/nimbus_client.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ use crate::evaluator::{
2525
};
2626
use crate::json::{JsonObject, PrefValue};
2727
use crate::metrics::{
28-
EnrollmentStatusExtraDef, FeatureExposureExtraDef, MalformedFeatureConfigExtraDef,
29-
MetricsHandler,
28+
DatabaseLoadExtraDef, DatabaseMigrationExtraDef, EnrollmentStatusExtraDef,
29+
FeatureExposureExtraDef, MalformedFeatureConfigExtraDef, MetricsHandler,
3030
};
3131
use crate::schema::parse_experiments;
3232
use crate::stateful::behavior::EventStore;
@@ -663,7 +663,8 @@ impl NimbusClient {
663663
}
664664

665665
pub(crate) fn db(&self) -> Result<&Database> {
666-
self.db.get_or_try_init(|| Database::new(&self.db_path))
666+
self.db
667+
.get_or_try_init(|| Database::new(&self.db_path, self.metrics_handler.clone()))
667668
}
668669

669670
fn merge_additional_context(&self, context: Option<JsonObject>) -> Result<Value> {

0 commit comments

Comments
 (0)