Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
5a0bceb
refactor(e2e): make waitForLoadingComplete fail loudly + add anchored…
jeffredodd May 22, 2026
877ccda
fix(e2e): poll for admin_onboarding_review and stop swallowing scenar…
jeffredodd May 22, 2026
f5590c2
test(e2e): stop masking weekly-cadence provisioning failures with blo…
jeffredodd May 22, 2026
161cc18
ci(e2e): tighten retries 2->1 and cap canary timeouts
jeffredodd May 22, 2026
bc0b40d
fix(e2e): retry contractor onboarding_completed PUT and update wage f…
jeffredodd May 22, 2026
a211b34
fix(e2e): treat contractor onboarding_completed as best-effort and un…
jeffredodd May 22, 2026
3f40f0c
fix(e2e): add .first() to .or() locators that can match multiple elem…
jeffredodd May 22, 2026
0df0fcc
fix(e2e): use geocoder-safe addresses in scenarios that POST location…
jeffredodd May 22, 2026
0b522cf
refactor(e2e): drop silent .catch on canary control probes and assert…
jeffredodd May 22, 2026
8c2f721
docs(e2e): clean up reviewer-facing comments, re-anchor JSDoc, use na…
jeffredodd May 22, 2026
002e70f
fix(e2e): use geocoder-safe addresses in UI driver and globalSetup fa…
jeffredodd May 22, 2026
e91ecb2
fix(e2e): validate base demo state post-provisioning and retry on deg…
jeffredodd May 22, 2026
039d48a
fix(e2e): escalate to parallel demo-creation batches when factory ret…
jeffredodd May 22, 2026
a28e044
fix(e2e): raise per-test timeout to 4 min so the scenario fixture's r…
jeffredodd May 22, 2026
8d04bc4
fix(e2e): give the slow demo backend more patience and fix bank-step …
jeffredodd May 22, 2026
7878d94
fix(e2e): contractor canary 03 — prefer check-method row + re-fill pa…
jeffredodd May 22, 2026
ebac2a2
fix(e2e): be gentler on the demo factory when retrying degraded base …
jeffredodd May 23, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,15 @@ jobs:
# would otherwise fail matrix expansion.
if: needs.e2e-setup.outputs.domains != '[]'
runs-on: ubuntu-latest
# Wallclock ceiling per shard. Even with the per-test timeouts in
# playwright.demo.config.ts and the canary test.setTimeout overrides, a
# single hung test on a wedged demo backend can otherwise burn a full
# 6-hour Github runner. 30m is comfortably above the largest expected
# shard wallclock (sum of all canaries with 1 retry each on the slowest
# domain, accounting for the bumped per-canary patience for slow demo
# backend) but tight enough that a stuck run releases CI minutes for
# the next push.
timeout-minutes: 30
strategy:
fail-fast: false
# Concurrent shard ceiling against flows.gusto-demo.com. We only have 3
Expand Down
4 changes: 2 additions & 2 deletions e2e/globalSetup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -236,10 +236,10 @@ async function getOrCreateLocation(flowToken: string, companyId: string): Promis

console.log('No locations found, creating one...')
const newLocation = await postToApi<Location>(endpoint, {
street_1: '100 Test Street',
street_1: '500 3rd Street',
city: 'San Francisco',
state: 'CA',
zip: '94105',
zip: '94107',
phone_number: '4155551234',
})
console.log(`Created location: ${newLocation.street_1}, ${newLocation.city}`)
Expand Down
301 changes: 274 additions & 27 deletions e2e/scenario/runner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -184,15 +184,15 @@ async function decorateEmployees(
const onboardingStatus =
emp.onboarding_status === 'completed' ? 'onboarding_completed' : emp.onboarding_status
log(` Setting onboarding status for ${emp.key}: ${emp.onboarding_status}`)
try {
await api.put(`/employees/${uuid}/onboarding_status`, {
onboarding_status: onboardingStatus,
})
} catch (error) {
log(
` Skipping onboarding status update for ${emp.key}; API rejected status (${String(error)})`,
)
}
// The previous implementation swallowed this error and continued, which
// turned a half-provisioned scenario into a downstream test failure
// 30+s later (e.g. "list shows no employees" when in fact the runner
// never finished provisioning). If this PUT fails, the scenario can't
// be delivered as declared — fail fast with the API's reason rather
// than letting tests run against an unfinished company.
await api.put(`/employees/${uuid}/onboarding_status`, {
onboarding_status: onboardingStatus,
})
}

if (emp.termination) {
Expand Down Expand Up @@ -259,27 +259,70 @@ async function decorateContractors(
// auto-advances into `admin_onboarding_review` before we attempt the
// final transition.
log(` Setting payment method to Check for ${contractor.key}`)
const paymentMethod = await api.get<{ version?: string }>(
`/contractors/${created.uuid}/payment_method`,
)
await api.put(`/contractors/${created.uuid}/payment_method`, {
type: 'Check',
version: paymentMethod.version,
})
}

// PUT /contractors/:uuid/onboarding_status with retry on 422.
//
// Setting payment_method = Check transitions some demo contractors
// into admin_onboarding_review (eligible for the final transition);
// others stay in self_onboarding_not_invited or admin_onboarding_incomplete
// depending on the base demo's seed state. Only the first group can
// be PUT to onboarding_completed via the API alone — the second
// group is missing a prerequisite (typically an onboarding step the
// scenario runner cannot fulfill) and never converges, regardless
// of how long we wait.
//
// Strategy: try the transition with a short retry window for the
// genuinely-eventual-consistent cases, but treat persistent 422 as a
// best-effort warning rather than a fatal scenario error. Downstream
// contractor specs that need a payment-ready contractor (canary 03,
// contractor-payment-submit-lifecycle) handle the empty-payable-list
// case themselves by picking from the demo seed's pre-existing
// contractors. Other 4xx/5xx codes still fail fast.
log(` Setting onboarding status for ${contractor.key}: ${contractor.onboarding_status}`)
// Bumped from 30s to 90s. The previous 30s budget (6 attempts at 5s
// intervals) was reaching exhaustion in CI under the slow demo
// backend — the contractor's intermediate onboarding state was
// taking longer to advance than 30s allowed for. 90s gives 18
// attempts, comfortably enough margin that "still not ready after
// 90s" is a real degraded state, not retry-budget exhaustion.
const ONBOARDING_RETRY_BUDGET_MS = 90_000
const start = Date.now()
let lastError: unknown = null
let attempt = 0
let succeeded = false
while (Date.now() - start < ONBOARDING_RETRY_BUDGET_MS) {
attempt++
try {
const paymentMethod = await api.get<{ version?: string }>(
`/contractors/${created.uuid}/payment_method`,
)
await api.put(`/contractors/${created.uuid}/payment_method`, {
type: 'Check',
version: paymentMethod.version,
await api.put(`/contractors/${created.uuid}/onboarding_status`, {
onboarding_status: onboardingStatus,
})
lastError = null
succeeded = true
break
} catch (error) {
log(` Payment method update failed for ${contractor.key}: ${String(error)}`)
lastError = error
const message = String(error)
// 422 means the backend won't accept the transition from the
// contractor's current state — wait and retry in case it's
// eventual consistency. Any other failure (network, 5xx, etc.)
// is not transient and should fail fast.
if (!message.includes('(422)')) {
throw error
}
await new Promise(r => setTimeout(r, 5_000))
}
}

log(` Setting onboarding status for ${contractor.key}: ${contractor.onboarding_status}`)
try {
await api.put(`/contractors/${created.uuid}/onboarding_status`, {
onboarding_status: onboardingStatus,
})
} catch (error) {
if (!succeeded) {
log(
` Skipping onboarding status update for ${contractor.key}; API rejected status (${String(error)})`,
` Transition to ${contractor.onboarding_status} for ${contractor.key} did not succeed after ${attempt} attempts (${Math.round((Date.now() - start) / 1000)}s); continuing as best-effort. Last error: ${String(lastError)}`,
)
}
}
Expand Down Expand Up @@ -518,6 +561,179 @@ function validateExpectedContext(context: ScenarioContext, expectedPaths: string
}
}

/**
* Validate that the base demo's company satisfies the scenario's stated
* preconditions (onboarding complete, employees seeded, etc.).
*
* The `react_sdk_demo_company_onboarded` factory on flows.gusto-demo.com is
* non-deterministic: most invocations return a fully-onboarded company with
* ~14 seed employees, but a meaningful minority return an effectively-fresh
* company with 8+ payroll blockers and no employees. Tests that depend on
* the seeded onboarded state (the four canaries that opt in via
* requireOnboardedCompany / requireOnboardedEmployees) fail downstream with
* misleading errors when this happens.
*
* Returns null on success, or a human-readable reason string when the demo
* is degraded. The caller re-provisions on a non-null result.
*/
async function checkBaseDemoState(
api: ApiClient,
companyId: string,
requirements: { onboarded: boolean; onboardedEmployees: boolean },
): Promise<string | null> {
if (!requirements.onboarded && !requirements.onboardedEmployees) return null

try {
const status = await api.get<{ onboarding_completed?: boolean }>(
`/companies/${companyId}/onboarding_status`,
)
if (status.onboarding_completed !== true) {
return `expected onboarding_completed=true, got ${status.onboarding_completed ?? 'undefined'}`
}
} catch (error) {
return `could not read onboarding_status: ${String(error)}`
}

if (requirements.onboardedEmployees) {
try {
type EmployeeSummary = {
uuid: string
onboarded?: boolean
onboarding_status?: string
terminated?: boolean
}
const employees = await api.get<EmployeeSummary[]>(`/companies/${companyId}/employees`)
const onboarded = employees.filter(
e =>
!e.terminated && (e.onboarded === true || e.onboarding_status === 'onboarding_completed'),
)
if (onboarded.length === 0) {
return `expected ≥1 onboarded non-terminated employee, got ${employees.length} total / 0 onboarded`
}
} catch (error) {
return `could not read employees: ${String(error)}`
}
}

return null
}

/**
* Batch sizes used to find an acceptable base demo when the factory is in a
* degraded mode. The factory's bad output is statistically independent
* per-demo invocation (verified empirically), so a parallel batch converts
* serial retry latency into one round-trip per batch.
*
* Sized to be gentle on the demo backend. An earlier iteration used 4-way
* parallel retries — that worked when one CI run hit the backend at a
* time, but multiple concurrent CI runs (5 in parallel hitting backend
* simultaneously, ~200 concurrent POST /demos calls) overloaded the
* factory and made BOTH the original creation AND subsequent retries
* time out at 180s. 2-way batches at 3 attempts is enough headroom at
* the observed ~21% degraded-factory good-rate without flooding the
* backend:
*
* batch 1 (size 1): P(success) = 0.21
* batch 2 (size 2): P(at-least-one-good) = 1 - 0.79^2 = 0.38, cumulative 0.51
* batch 3 (size 2): cumulative 0.69
* batch 4 (size 2): cumulative 0.81
*
* Total worst-case demos: 7 (vs 13 before). Wall time worst case is
* still ~4 batches × ~25s each = ~100s, because each batch is bounded
* by the slowest demo not the sum.
*/
const BASE_DEMO_VALIDATION_BATCH_SIZES = [1, 2, 2, 2] as const

/**
* Wait between batches to give the demo backend time to settle if it's
* overloaded. The 5-run-concurrent stress test showed POST /demos
* timing out at 180s when hammered; a brief pause between escalating
* batches lets the backend recover (and lets a flaky transient
* degraded factory window pass).
*/
const BASE_DEMO_VALIDATION_INTER_BATCH_DELAY_MS = 5_000

async function findAcceptableBaseDemo(
gwsFlowsBase: string,
baseDemoType: string,
requirements: { onboarded: boolean; onboardedEmployees: boolean },
log: ReturnType<typeof makeLog>,
onProgress?: ProgressFn,
): Promise<{
demo: Awaited<ReturnType<typeof createDemoAndProvision>> | null
failures: string[]
}> {
const failures: string[] = []

for (let batchIdx = 0; batchIdx < BASE_DEMO_VALIDATION_BATCH_SIZES.length; batchIdx++) {
const batchSize = BASE_DEMO_VALIDATION_BATCH_SIZES[batchIdx]!
if (batchIdx > 0) {
log(
` Previous attempt(s) returned degraded demos; pausing ${BASE_DEMO_VALIDATION_INTER_BATCH_DELAY_MS}ms then escalating to parallel batch of ${batchSize}`,
)
await new Promise(r => setTimeout(r, BASE_DEMO_VALIDATION_INTER_BATCH_DELAY_MS))
}

// Settle individually so one demo's 180s timeout doesn't kill the whole
// batch — that would force the caller to re-create everything from
// scratch on the next batch. Each batch member resolves either to a
// ready demo or to a creation error we record in failures.
const batch = await Promise.allSettled(
Array.from({ length: batchSize }, () =>
createDemoAndProvision(gwsFlowsBase, baseDemoType, { onProgress }),
),
)

const creations: Array<{
candidate: Awaited<ReturnType<typeof createDemoAndProvision>> | null
reason: string | null
}> = []
for (const settled of batch) {
if (settled.status === 'fulfilled') {
creations.push({ candidate: settled.value, reason: null })
} else {
creations.push({ candidate: null, reason: `creation failed: ${String(settled.reason)}` })
}
}

// Validate each successfully-created candidate in parallel; pick the
// first acceptable one.
const validations = await Promise.all(
creations.map(async creation => {
if (!creation.candidate) {
return { candidate: null, reason: creation.reason ?? 'creation failed' }
}
const candidateApi = makeApi(gwsFlowsBase, creation.candidate.flowToken)
const reason = await checkBaseDemoState(
candidateApi,
creation.candidate.companyId,
requirements,
)
return { candidate: creation.candidate, reason }
}),
)

const acceptable = validations.find(v => v.reason === null && v.candidate !== null)
if (acceptable?.candidate) {
const discarded = validations.length - 1
if (discarded > 0) {
log(` Found acceptable demo on batch ${batchIdx + 1} (discarded ${discarded} degraded)`)
}
return { demo: acceptable.candidate, failures }
}

for (const v of validations) {
const companyHint = v.candidate ? v.candidate.companyId.slice(0, 8) : 'no-company'
failures.push(`batch ${batchIdx + 1} (${companyHint}): ${v.reason}`)
}
log(
` Batch ${batchIdx + 1} of ${BASE_DEMO_VALIDATION_BATCH_SIZES.length} returned ${batch.length}/${batch.length} degraded demos`,
)
}

return { demo: null, failures }
}

export async function provisionScenario(
scenarioPath: string,
options?: { gwsFlowsHost?: string; onProgress?: ProgressFn },
Expand All @@ -531,10 +747,41 @@ export async function provisionScenario(

const scenario = await loadScenario(scenarioPath)

// requireOnboardedEmployees implies requireOnboardedCompany — if you need
// onboarded employees you definitionally need onboarding to be complete.
const requireOnboarded =
scenario.requireOnboardedCompany === true || scenario.requireOnboardedEmployees === true
const requireOnboardedEmployees = scenario.requireOnboardedEmployees === true

log(`Provisioning ${scenario.baseDemo} demo for ${scenarioId}`)
const demoResult = await createDemoAndProvision(gwsFlowsBase, scenario.baseDemo, {
onProgress: options?.onProgress,
})
let demoResult: Awaited<ReturnType<typeof createDemoAndProvision>> | null = null
let validationFailures: string[] = []

if (!requireOnboarded) {
demoResult = await createDemoAndProvision(gwsFlowsBase, scenario.baseDemo, {
onProgress: options?.onProgress,
})
} else {
const result = await findAcceptableBaseDemo(
gwsFlowsBase,
scenario.baseDemo,
{ onboarded: requireOnboarded, onboardedEmployees: requireOnboardedEmployees },
log,
options?.onProgress,
)
demoResult = result.demo
validationFailures = result.failures
}

if (!demoResult) {
throw new Error(
`Base demo "${scenario.baseDemo}" failed scenario preconditions after ${BASE_DEMO_VALIDATION_BATCH_SIZES.reduce((s, n) => s + n, 0)} attempts across ${BASE_DEMO_VALIDATION_BATCH_SIZES.length} batches:\n${validationFailures
.map(f => ` - ${f}`)
.join(
'\n',
)}\nThis indicates a regression in the demo factory on the gws-flows backend, not in the SDK.`,
)
}

const { flowToken, companyId } = demoResult
const api = makeApi(gwsFlowsBase, flowToken)
Expand Down
8 changes: 4 additions & 4 deletions e2e/scenarios/company/company-filing-mailing-split.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@
"locations": [
{
"key": "filing-only",
"street_1": "10 Filing St",
"street_1": "500 3rd Street",
"city": "San Francisco",
"state": "CA",
"zip": "94105",
"zip": "94107",
"filing_address": true
},
{
"key": "mailing-only",
"street_1": "20 Mailing Way",
"street_1": "1 Frank H Ogawa Plaza",
"city": "Oakland",
"state": "CA",
"zip": "94607",
"zip": "94612",
"mailing_address": true
}
]
Expand Down
Loading
Loading