namecheap · kurok · Apr 21, 2026 · Apr 21, 2026
diff --git a/dist/index.js b/dist/index.js
@@ -87855,6 +87855,7 @@ const {
 const core = __nccwpck_require__(7484);
 const config = __nccwpck_require__(1283);
 const log = __nccwpck_require__(7223);
+const { withRetry } = __nccwpck_require__(6759);
 const { sortByCreationDate } = __nccwpck_require__(5804);
 
 // EC2Client reads region + credentials from the environment (set by
@@ -87988,9 +87989,11 @@ async function terminateEc2Instance() {
   const start = Date.now();
   log.info('terminate_instance', { instance_id: config.input.ec2InstanceId });
   try {
-    await client.send(new TerminateInstancesCommand({
-      InstanceIds: [config.input.ec2InstanceId],
-    }));
+    await withRetry('terminate_instance', () =>
+      client.send(new TerminateInstancesCommand({
+        InstanceIds: [config.input.ec2InstanceId],
+      })),
+    );
     log.info('terminate_instance', { instance_id: config.input.ec2InstanceId, elapsed_ms: Date.now() - start });
     core.info(`AWS EC2 instance ${config.input.ec2InstanceId} is terminated`);
   } catch (error) {
@@ -88098,6 +88101,7 @@ const github = __nccwpck_require__(3228);
 const _ = __nccwpck_require__(9975);
 const config = __nccwpck_require__(1283);
 const log = __nccwpck_require__(7223);
+const { withRetry } = __nccwpck_require__(6759);
 
 // use the unique label to find the runner
 // as we don't have the runner's id, it's not possible to get it in any other way
@@ -88145,7 +88149,9 @@ async function removeRunner() {
   const start = Date.now();
   log.info('remove_runner', { runner_id: runner.id, label: config.input.label });
   try {
-    await octokit.request('DELETE /repos/{owner}/{repo}/actions/runners/{runner_id}', _.merge(config.githubContext, { runner_id: runner.id }));
+    await withRetry('remove_runner', () =>
+      octokit.request('DELETE /repos/{owner}/{repo}/actions/runners/{runner_id}', _.merge(config.githubContext, { runner_id: runner.id })),
+    );
     log.info('remove_runner', { runner_id: runner.id, label: config.input.label, elapsed_ms: Date.now() - start });
     core.info(`GitHub self-hosted runner ${runner.name} is removed`);
     return;
@@ -88297,6 +88303,61 @@ module.exports = {
 };
 
 
+/***/ }),
+
+/***/ 6759:
+/***/ ((module, __unused_webpack_exports, __nccwpck_require__) => {
+
+const log = __nccwpck_require__(7223);
+
+// Run `fn()` with exponential backoff. `fn` returns a Promise. Retries
+// on any rejection; does not look at error shape (callers should only
+// pass idempotent operations like DELETE /runners/{id} and
+// TerminateInstances — re-executing on transient errors is safe).
+//
+// Defaults: 3 attempts, 2s base delay, doubled each time, capped at
+// 10s. Total worst-case wait is 2s + 4s + 8s = 14s.
+async function withRetry(step, fn, opts = {}) {
+  const attempts = opts.attempts || 3;
+  const baseMs = opts.baseMs || 2000;
+  const maxMs = opts.maxMs || 10000;
+
+  let lastError;
+  for (let i = 1; i <= attempts; i++) {
+    try {
+      return await fn();
+    } catch (error) {
+      lastError = error;
+      if (i === attempts) {
+        log.error(`${step}_retry`, {
+          attempt: i,
+          attempts,
+          exhausted: true,
+          error: error.name,
+          message: error.message,
+        });
+        throw error;
+      }
+      const delayMs = Math.min(baseMs * 2 ** (i - 1), maxMs);
+      log.warn(`${step}_retry`, {
+        attempt: i,
+        attempts,
+        next_delay_ms: delayMs,
+        error: error.name,
+        message: error.message,
+      });
+      await new Promise((resolve) => setTimeout(resolve, delayMs));
+    }
+  }
+  /* istanbul ignore next — unreachable, for type safety */
+  throw lastError;
+}
+
+module.exports = {
+  withRetry,
+};
+
+
 /***/ }),
 
 /***/ 5804:
@@ -88814,10 +88875,30 @@ async function start() {
 
 async function stop() {
   core.startGroup('stop-runner');
+  const failures = [];
   try {
     log.debug('stop_inputs', config.input);
-    await aws.terminateEc2Instance();
-    await gh.removeRunner();
+
+    // Attempt both cleanups independently — neither should short-circuit
+    // the other. A GitHub API failure must not prevent EC2 termination
+    // (billing) and vice versa. Both have internal retries via
+    // withRetry(); catch here is the last line of defense.
+    try {
+      await aws.terminateEc2Instance();
+    } catch (error) {
+      failures.push({ step: 'terminate_instance', error: error.name, message: error.message });
+    }
+    try {
+      await gh.removeRunner();
+    } catch (error) {
+      failures.push({ step: 'remove_runner', error: error.name, message: error.message });
+    }
+
+    if (failures.length > 0) {
+      log.error('stop', { outcome: 'partial', failures });
+      const summary = failures.map((f) => `${f.step}: ${f.message}`).join('; ');
+      throw new Error(`stop mode completed with ${failures.length} cleanup failure(s): ${summary}`);
+    }
     log.info('stop', { instance_id: config.input.ec2InstanceId, label: config.input.label, outcome: 'ok' });
   } finally {
     core.endGroup();

diff --git a/src/aws.js b/src/aws.js
@@ -9,6 +9,7 @@ const {
 const core = require('@actions/core');
 const config = require('./config');
 const log = require('./log');
+const { withRetry } = require('./retry');
 const { sortByCreationDate } = require('./utils');
 
 // EC2Client reads region + credentials from the environment (set by
@@ -142,9 +143,11 @@ async function terminateEc2Instance() {
   const start = Date.now();
   log.info('terminate_instance', { instance_id: config.input.ec2InstanceId });
   try {
-    await client.send(new TerminateInstancesCommand({
-      InstanceIds: [config.input.ec2InstanceId],
-    }));
+    await withRetry('terminate_instance', () =>
+      client.send(new TerminateInstancesCommand({
+        InstanceIds: [config.input.ec2InstanceId],
+      })),
+    );
     log.info('terminate_instance', { instance_id: config.input.ec2InstanceId, elapsed_ms: Date.now() - start });
     core.info(`AWS EC2 instance ${config.input.ec2InstanceId} is terminated`);
   } catch (error) {

diff --git a/src/gh.js b/src/gh.js
@@ -3,6 +3,7 @@ const github = require('@actions/github');
 const _ = require('lodash');
 const config = require('./config');
 const log = require('./log');
+const { withRetry } = require('./retry');
 
 // use the unique label to find the runner
 // as we don't have the runner's id, it's not possible to get it in any other way
@@ -50,7 +51,9 @@ async function removeRunner() {
   const start = Date.now();
   log.info('remove_runner', { runner_id: runner.id, label: config.input.label });
   try {
-    await octokit.request('DELETE /repos/{owner}/{repo}/actions/runners/{runner_id}', _.merge(config.githubContext, { runner_id: runner.id }));
+    await withRetry('remove_runner', () =>
+      octokit.request('DELETE /repos/{owner}/{repo}/actions/runners/{runner_id}', _.merge(config.githubContext, { runner_id: runner.id })),
+    );
     log.info('remove_runner', { runner_id: runner.id, label: config.input.label, elapsed_ms: Date.now() - start });
     core.info(`GitHub self-hosted runner ${runner.name} is removed`);
     return;

diff --git a/src/index.js b/src/index.js
@@ -53,10 +53,30 @@ async function start() {
 
 async function stop() {
   core.startGroup('stop-runner');
+  const failures = [];
   try {
     log.debug('stop_inputs', config.input);
-    await aws.terminateEc2Instance();
-    await gh.removeRunner();
+
+    // Attempt both cleanups independently — neither should short-circuit
+    // the other. A GitHub API failure must not prevent EC2 termination
+    // (billing) and vice versa. Both have internal retries via
+    // withRetry(); catch here is the last line of defense.
+    try {
+      await aws.terminateEc2Instance();
+    } catch (error) {
+      failures.push({ step: 'terminate_instance', error: error.name, message: error.message });
+    }
+    try {
+      await gh.removeRunner();
+    } catch (error) {
+      failures.push({ step: 'remove_runner', error: error.name, message: error.message });
+    }
+
+    if (failures.length > 0) {
+      log.error('stop', { outcome: 'partial', failures });
+      const summary = failures.map((f) => `${f.step}: ${f.message}`).join('; ');
+      throw new Error(`stop mode completed with ${failures.length} cleanup failure(s): ${summary}`);
+    }
     log.info('stop', { instance_id: config.input.ec2InstanceId, label: config.input.label, outcome: 'ok' });
   } finally {
     core.endGroup();

diff --git a/src/retry.js b/src/retry.js
@@ -0,0 +1,48 @@
+const log = require('./log');
+
+// Run `fn()` with exponential backoff. `fn` returns a Promise. Retries
+// on any rejection; does not look at error shape (callers should only
+// pass idempotent operations like DELETE /runners/{id} and
+// TerminateInstances — re-executing on transient errors is safe).
+//
+// Defaults: 3 attempts, 2s base delay, doubled each time, capped at
+// 10s. Total worst-case wait is 2s + 4s + 8s = 14s.
+async function withRetry(step, fn, opts = {}) {
+  const attempts = opts.attempts || 3;
+  const baseMs = opts.baseMs || 2000;
+  const maxMs = opts.maxMs || 10000;
+
+  let lastError;
+  for (let i = 1; i <= attempts; i++) {
+    try {
+      return await fn();
+    } catch (error) {
+      lastError = error;
+      if (i === attempts) {
+        log.error(`${step}_retry`, {
+          attempt: i,
+          attempts,
+          exhausted: true,
+          error: error.name,
+          message: error.message,
+        });
+        throw error;
+      }
+      const delayMs = Math.min(baseMs * 2 ** (i - 1), maxMs);
+      log.warn(`${step}_retry`, {
+        attempt: i,
+        attempts,
+        next_delay_ms: delayMs,
+        error: error.name,
+        message: error.message,
+      });
+      await new Promise((resolve) => setTimeout(resolve, delayMs));
+    }
+  }
+  /* istanbul ignore next — unreachable, for type safety */
+  throw lastError;
+}
+
+module.exports = {
+  withRetry,
+};
diff --git a/tests/retry.test.js b/tests/retry.test.js
@@ -0,0 +1,68 @@
+// retry.js exposes withRetry(step, fn, opts). Tests stub @actions/core
+// (via log.js) so the warn/error hooks don't actually hit the Actions
+// runtime, and use short base delays so the backoff waits don't blow
+// up test time.
+
+const coreMock = {
+  info: jest.fn(),
+  warning: jest.fn(),
+  error: jest.fn(),
+};
+
+function load() {
+  jest.resetModules();
+  coreMock.info.mockReset();
+  coreMock.warning.mockReset();
+  coreMock.error.mockReset();
+  jest.doMock('@actions/core', () => coreMock);
+  jest.doMock('../src/config', () => ({ input: { mode: 'stop', debug: 'false' } }));
+  return require('../src/retry');
+}
+
+describe('withRetry', () => {
+  test('resolves when fn succeeds on first try', async () => {
+    const { withRetry } = load();
+    const fn = jest.fn().mockResolvedValue('ok');
+    await expect(withRetry('test_step', fn, { baseMs: 1 })).resolves.toBe('ok');
+    expect(fn).toHaveBeenCalledTimes(1);
+    expect(coreMock.warning).not.toHaveBeenCalled();
+  });
+
+  test('retries on rejection and resolves when a later attempt succeeds', async () => {
+    const { withRetry } = load();
+    const fn = jest.fn()
+      .mockRejectedValueOnce(new Error('transient'))
+      .mockResolvedValue('ok');
+    await expect(withRetry('test_step', fn, { attempts: 3, baseMs: 1 })).resolves.toBe('ok');
+    expect(fn).toHaveBeenCalledTimes(2);
+    expect(coreMock.warning).toHaveBeenCalledTimes(1);
+    const payload = JSON.parse(coreMock.warning.mock.calls[0][0]);
+    expect(payload).toMatchObject({ step: 'test_step_retry', attempt: 1, attempts: 3 });
+  });
+
+  test('exhausts attempts and re-throws the last error', async () => {
+    const { withRetry } = load();
+    const fn = jest.fn().mockRejectedValue(new Error('persistent'));
+    await expect(withRetry('test_step', fn, { attempts: 3, baseMs: 1 })).rejects.toThrow('persistent');
+    expect(fn).toHaveBeenCalledTimes(3);
+    // Two warn logs (attempts 1 and 2) + one error log (attempt 3 exhausted).
+    expect(coreMock.warning).toHaveBeenCalledTimes(2);
+    expect(coreMock.error).toHaveBeenCalledTimes(1);
+    const final = JSON.parse(coreMock.error.mock.calls[0][0]);
+    expect(final).toMatchObject({ step: 'test_step_retry', attempt: 3, exhausted: true });
+  });
+
+  test('backoff caps at maxMs', async () => {
+    const { withRetry } = load();
+    const fn = jest.fn()
+      .mockRejectedValueOnce(new Error('e1'))
+      .mockRejectedValueOnce(new Error('e2'))
+      .mockRejectedValueOnce(new Error('e3'))
+      .mockResolvedValue('ok');
+    await expect(withRetry('test_step', fn, { attempts: 5, baseMs: 100, maxMs: 150 })).resolves.toBe('ok');
+    expect(fn).toHaveBeenCalledTimes(4);
+    // Delays emitted in warn logs: 100, 150, 150 (capped).
+    const delays = coreMock.warning.mock.calls.map((c) => JSON.parse(c[0]).next_delay_ms);
+    expect(delays).toEqual([100, 150, 150]);
+  });
+});