keploy
diff --git a/‎.github/workflows/doccano-django.yml‎
Lines changed: 197 additions & 0 deletions b/‎.github/workflows/doccano-django.yml‎
Lines changed: 197 additions & 0 deletions
diff --git a/‎.github/workflows/scripts/run-and-measure.sh‎
Lines changed: 102 additions & 0 deletions b/‎.github/workflows/scripts/run-and-measure.sh‎
Lines changed: 102 additions & 0 deletions
diff --git a/‎doccano-django/.coveragerc‎
Lines changed: 21 additions & 0 deletions b/‎doccano-django/.coveragerc‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎doccano-django/.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎doccano-django/.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎doccano-django/Dockerfile‎
Lines changed: 21 additions & 0 deletions b/‎doccano-django/Dockerfile‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎doccano-django/Dockerfile.coverage‎
Lines changed: 37 additions & 0 deletions b/‎doccano-django/Dockerfile.coverage‎
Lines changed: 37 additions & 0 deletions
@@ -0,0 +1,197 @@
+# doccano-django sample CI — keploy-independent end-to-end smoke +
+# coverage gate.
+#
+# Triggers ONLY on changes under doccano-django/ (or this workflow
+# file). Other samples in this repo have their own orthogonal CI;
+# gating the whole repo on every doccano change would slow them
+# all down for no benefit.
+#
+# What it gates:
+#   * `release-coverage` — checks out the PR's base branch (main)
+#     and runs the sample end-to-end: docker compose up, bootstrap
+#     admin token, drive flow.sh record-traffic with the per-call
+#     audit log enabled, capture the route-coverage percentage from
+#     `flow.sh coverage`. This is the baseline.
+#   * `build-coverage` — same end-to-end against the PR's HEAD ref.
+#   * `coverage-gate` — fails the PR if `build`'s coverage drops
+#     more than COVERAGE_THRESHOLD percentage points below
+#     `release`. Default threshold is 1.0pp; override via repo
+#     variable `DOCCANO_COVERAGE_THRESHOLD` for a tighter or
+#     looser bar.
+#
+# On push to main, only `build-coverage` runs (no baseline to
+# compare against — main IS the baseline).
+#
+# Standards-aligned choices:
+#   * `paths:` filter on both push and pull_request triggers — the
+#     canonical GH Actions way to scope a workflow to one
+#     subdirectory.
+#   * Job outputs (steps.<id>.outputs.coverage → needs.<job>.outputs)
+#     to thread the captured percentage between jobs.
+#   * `concurrency:` cancel-in-progress on the same ref so a stale
+#     run doesn't waste runner minutes.
+#   * actions/upload-artifact for the human-readable
+#     coverage_report.txt — reviewers can inspect missing routes
+#     directly from the PR's "checks" tab.
+#   * marocchino/sticky-pull-request-comment for the PR-side diff
+#     comment. Pinned-by-header so successive runs update the same
+#     comment instead of fanning out.
+#   * The compare step is plain bash + python3 (no external
+#     coverage service). For full Python coverage.py XMLs you'd
+#     want diff-cover or codecov, but the sample's coverage is
+#     API-route-based (single percentage), so the gate is a 3-line
+#     subtraction.
+#
+# Sample is genuinely keploy-independent here: the workflow uses
+# flow.sh's $DOCCANO_FIRED_ROUTES_FILE per-call audit log as its
+# numerator source, not a keploy recording. The lane scripts in
+# keploy/integrations and keploy/enterprise consume the same
+# flow.sh, but use the keploy/test-set-*/tests/*.yaml tree as
+# their numerator (authoritative — only calls keploy actually
+# CAPTURED count). Both modes are wired into
+# `flow.sh::doccano_list_recorded_routes`.
+name: doccano-django sample
+
+on:
+  pull_request:
+    paths:
+      - 'doccano-django/**'
+      - '.github/workflows/doccano-django.yml'
+  push:
+    branches: [main]
+    paths:
+      - 'doccano-django/**'
+      - '.github/workflows/doccano-django.yml'
+  workflow_dispatch: {}
+
+concurrency:
+  group: doccano-django-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  COVERAGE_THRESHOLD: ${{ vars.DOCCANO_COVERAGE_THRESHOLD || '1.0' }}
+
+jobs:
+  build-coverage:
+    name: build (current ref) coverage
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    outputs:
+      coverage: ${{ steps.measure.outputs.coverage }}
+    steps:
+      - uses: actions/checkout@v4
+      - id: measure
+        name: Run sample end-to-end + measure coverage
+        working-directory: doccano-django
+        env:
+          DOCCANO_FIRED_ROUTES_FILE: ${{ runner.temp }}/fired-routes-build.log
+          DOCCANO_PHASE: ci-build
+        run: ../.github/workflows/scripts/run-and-measure.sh
+
+      - name: Upload coverage report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-build
+          path: doccano-django/coverage_report.txt
+          if-no-files-found: warn
+
+  release-coverage:
+    if: github.event_name == 'pull_request'
+    name: release (base ref) coverage
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    outputs:
+      coverage: ${{ steps.measure.outputs.coverage || steps.empty-baseline.outputs.coverage }}
+      sample-existed: ${{ steps.detect.outputs.sample-existed }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.base.ref }}
+
+      # First-PR bootstrap escape hatch: the very PR that
+      # introduces the doccano-django/ sample has no baseline
+      # (doccano-django/ doesn't exist on the base ref). Detect
+      # that and short-circuit to coverage=0; the gate then
+      # treats build's coverage as the new baseline and trivially
+      # passes for any percentage > 0. After the introducing PR
+      # merges, every subsequent PR has a real baseline to diff
+      # against.
+      - id: detect
+        name: Detect baseline presence
+        run: |
+          if [ -d doccano-django ] && [ -x doccano-django/flow.sh ]; then
+            echo "sample-existed=true" >>"$GITHUB_OUTPUT"
+            echo "Sample exists on base ref — running full measurement."
+          else
+            echo "sample-existed=false" >>"$GITHUB_OUTPUT"
+            echo "No doccano-django/ on base ref — first-PR bootstrap; baseline coverage treated as 0%."
+          fi
+
+      - id: measure
+        name: Run sample end-to-end + measure coverage
+        if: steps.detect.outputs.sample-existed == 'true'
+        working-directory: doccano-django
+        env:
+          DOCCANO_FIRED_ROUTES_FILE: ${{ runner.temp }}/fired-routes-release.log
+          DOCCANO_PHASE: ci-release
+        run: ../.github/workflows/scripts/run-and-measure.sh
+
+      - id: empty-baseline
+        name: Emit zero baseline (first-PR bootstrap)
+        if: steps.detect.outputs.sample-existed != 'true'
+        run: echo "coverage=0.0" >>"$GITHUB_OUTPUT"
+
+      - name: Upload coverage report
+        if: always() && steps.detect.outputs.sample-existed == 'true'
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-release
+          path: doccano-django/coverage_report.txt
+          if-no-files-found: warn
+
+  coverage-gate:
+    if: github.event_name == 'pull_request'
+    name: coverage gate
+    needs: [build-coverage, release-coverage]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Compare build vs release
+        env:
+          BUILD: ${{ needs.build-coverage.outputs.coverage }}
+          RELEASE: ${{ needs.release-coverage.outputs.coverage }}
+          THRESHOLD: ${{ env.COVERAGE_THRESHOLD }}
+          BASE_REF: ${{ github.event.pull_request.base.ref }}
+        run: |
+          set -Eeuo pipefail
+          if [ -z "${BUILD:-}" ] || [ -z "${RELEASE:-}" ]; then
+            echo "::error::missing coverage outputs — build='${BUILD:-}' release='${RELEASE:-}'"
+            exit 1
+          fi
+          drop=$(python3 -c "print(round(${RELEASE} - ${BUILD}, 2))")
+          echo "Release (${BASE_REF}): ${RELEASE}%"
+          echo "Build   (this PR):     ${BUILD}%"
+          echo "Drop:                  ${drop}pp (threshold ${THRESHOLD}pp)"
+          if python3 -c "import sys; sys.exit(0 if (${RELEASE} - ${BUILD}) > ${THRESHOLD} else 1)"; then
+            echo "::error::doccano-django coverage dropped from ${RELEASE}% → ${BUILD}% (-${drop}pp), exceeding the ${THRESHOLD}pp threshold."
+            echo "Suggested actions:"
+            echo "  * Add curl(s) to flow.sh::doccano_record_traffic that exercise the new code paths."
+            echo "  * Or extend the .coveragerc 'omit' list if the new module is not part of the runtime backend (migrations, management commands, tests)."
+            exit 1
+          fi
+          echo "OK — coverage delta within ${THRESHOLD}pp threshold."
+
+      - name: Sticky PR comment
+        if: ${{ !cancelled() }}
+        uses: marocchino/sticky-pull-request-comment@v2
+        with:
+          header: doccano-django-coverage
+          message: |
+            ### doccano-django sample coverage
+
+            | ref | coverage |
+            |---|---|
+            | base (`${{ github.event.pull_request.base.ref }}`) | **${{ needs.release-coverage.outputs.coverage }}%** |
+            | this PR | **${{ needs.build-coverage.outputs.coverage }}%** |
+
+            Threshold: PR may not drop coverage by more than **${{ env.COVERAGE_THRESHOLD }}pp**. Override per-repo via the `DOCCANO_COVERAGE_THRESHOLD` actions variable.
@@ -0,0 +1,102 @@
+#!/usr/bin/env bash
+#
+# run-and-measure.sh — bring doccano up under the coverage overlay,
+# run flow.sh bootstrap + record-traffic, flush coverage from each
+# gunicorn worker, run flow.sh coverage to combine + report, and
+# emit `coverage=PCT` onto $GITHUB_OUTPUT for the downstream
+# coverage-gate job.
+#
+# Called from .github/workflows/doccano-django.yml's build-coverage
+# and release-coverage jobs (one per ref under comparison). Both
+# jobs source the same script so the measurement is identical
+# across refs — any drift in the numerator definition would
+# otherwise produce a misleading delta.
+#
+# Coverage isolation contract:
+#   * Base `Dockerfile` and `docker-compose.yml` are untouched.
+#   * The overlay `Dockerfile.coverage` + `docker-compose.coverage.yml`
+#     adds coverage.py + the auto-start .pth file. ONLY this script
+#     applies the overlay; the keploy/integrations and
+#     keploy/enterprise CI lanes consume the base compose and pay
+#     zero coverage-instrumentation cost.
+#
+# Inputs (from the workflow env):
+#   DOCCANO_PHASE     — label spliced into the project name so
+#                       build vs release runs don't collide.
+#   GITHUB_OUTPUT     — standard GH Actions sink for step outputs.
+set -Eeuo pipefail
+
+export DOCCANO_BACKEND_CONTAINER="${DOCCANO_BACKEND_CONTAINER:-doccano_backend}"
+export DOCCANO_DB_CONTAINER="${DOCCANO_DB_CONTAINER:-doccano_db}"
+export DOCCANO_APP_PORT="${DOCCANO_APP_PORT:-18080}"
+export DOCCANO_FIXED_TOKEN="${DOCCANO_FIXED_TOKEN:-ac38262065f0ae1476b6a707d9d697a101764a6b}"
+
+mkdir -p coverage
+chmod 777 coverage    # worker UID inside container differs from runner UID
+sudo rm -rf coverage/.coverage* 2>/dev/null || rm -rf coverage/.coverage* 2>/dev/null || true
+
+COMPOSE=(docker compose -f docker-compose.yml -f docker-compose.coverage.yml)
+
+# Stage 1: bring up doccano with bootstrap so the schema migrations
+# and the admin user persist into the named DB volume. The overlay
+# image runs gunicorn with coverage.process_startup() auto-armed in
+# every forked worker.
+DOCCANO_SKIP_BOOTSTRAP=0 "${COMPOSE[@]}" up -d --build
+
+# Wait for the backend to start serving (cold doccano boot runs
+# Django migrations + admin user create — on a GH runner this can
+# hit 90-120s).
+for i in $(seq 1 120); do
+    code=$(curl -sS -o /dev/null -w '%{http_code}' \
+        "http://127.0.0.1:${DOCCANO_APP_PORT}/v1/health/" 2>/dev/null || echo "")
+    if [ -n "$code" ] && [ "$code" != "000" ]; then break; fi
+    sleep 2
+done
+
+bash flow.sh bootstrap 240
+"${COMPOSE[@]}" down --remove-orphans
+
+# Stage 2: re-launch in skip-bootstrap mode against the populated
+# volume; same shape the keploy lanes use. The overlay layer is
+# preserved across compose-down (only `down -v` would wipe the
+# named volume), so coverage tooling is still wired in.
+DOCCANO_SKIP_BOOTSTRAP=1 "${COMPOSE[@]}" up -d
+
+# flow.sh::doccano_record_traffic gates on doccano_wait_for_fixed_token
+# internally, so this won't fire curls at a half-booted backend.
+bash flow.sh record-traffic
+
+# Flush coverage from each gunicorn worker. coverage.py with
+# sigterm = true writes the in-flight per-worker .coverage.<pid>
+# data file to /coverage on SIGTERM; `compose kill -s SIGTERM`
+# delivers it to the container's main process which propagates to
+# its workers via gunicorn's graceful shutdown.
+"${COMPOSE[@]}" kill -s SIGTERM backend
+# coverage.py's sigterm hook is synchronous but the OS-level
+# write+fsync needs a moment.
+sleep 3
+
+# Bring backend back up so `flow.sh coverage` can docker-exec
+# `coverage combine` + `coverage report` inside.
+"${COMPOSE[@]}" up -d backend
+for i in $(seq 1 60); do
+    if docker exec "$DOCCANO_BACKEND_CONTAINER" sh -c 'ls /coverage/.coverage.* >/dev/null 2>&1'; then
+        break
+    fi
+    sleep 1
+done
+
+COVERAGE_REPORT_FILE="$PWD/coverage_report.txt" bash flow.sh coverage
+
+# Parse `Covered N/M (XX.X%)` — anchored on the parenthesised form
+# so a future report-prose change doesn't break the parse.
+pct=$(grep -oE '\([0-9]+\.[0-9]+%\)' coverage_report.txt | head -1 | tr -d '()%')
+if [ -z "$pct" ]; then
+    echo "::error::Could not parse coverage percentage from coverage_report.txt"
+    cat coverage_report.txt || true
+    exit 1
+fi
+echo "coverage=${pct}" >>"$GITHUB_OUTPUT"
+echo "coverage: ${pct}% (Python line coverage via coverage.py)"
+
+"${COMPOSE[@]}" down -v --remove-orphans
@@ -0,0 +1,21 @@
+[run]
+# Per-process line coverage of the backend Django code.
+#
+# parallel + sigterm: gunicorn forks WORKERS subprocesses; each
+# writes its own .coverage.<host>.<pid> file under /coverage.
+# `combine` merges them at report time. `sigterm = true` flushes
+# the in-flight data on SIGTERM so the reaper from the workflow
+# captures it.
+parallel = true
+sigterm = true
+branch = false
+data_file = /coverage/.coverage
+source = /backend
+
+omit =
+    */tests/*
+    */migrations/*
+    */__pycache__/*
+    /backend/manage.py
+    /backend/config/wsgi.py
+    /backend/config/asgi.py
@@ -0,0 +1,2 @@
+coverage/
+coverage_report.txt
@@ -0,0 +1,21 @@
+# Thin wrapper around doccano's official backend image at the version
+# this sample tracks. Pinning here (rather than in each lane script
+# under keploy/integrations / keploy/enterprise) means a future
+# doccano release that changes the bug-triggering shape is a one-line
+# retag in this repo, not a hunt across the CI tree.
+#
+# Upstream tag: doccano/doccano:backend (the rolling backend tag)
+# Source pin:   doccano/doccano @ v1.8.5
+#               https://github.com/doccano/doccano/releases/tag/v1.8.5
+#
+# v1.8.5 was the version exercised on keploy/enterprise pipeline 3556
+# (PR #1889) and pipeline 3572 (PR #1964 minimal repro) where the
+# bug originally manifested.
+FROM doccano/doccano:backend
+
+USER root
+COPY doccano-entrypoint.sh /opt/bin/doccano-keploy-entrypoint.sh
+RUN chmod +x /opt/bin/doccano-keploy-entrypoint.sh
+USER doccano
+
+ENTRYPOINT ["/opt/bin/doccano-keploy-entrypoint.sh"]
@@ -0,0 +1,37 @@
+# Coverage-instrumented variant of the doccano backend image.
+#
+# Base `Dockerfile` (and `docker-compose.yml`) are deliberately
+# untouched so the keploy enterprise / integrations lanes — which
+# consume them as-is — pay zero coverage-instrumentation cost. This
+# overlay image is built and run ONLY by the standalone GitHub
+# Actions workflow under `.github/workflows/doccano-django.yml`,
+# wired in via `docker-compose.coverage.yml`.
+#
+# What the overlay adds:
+#   * `coverage` (Python coverage.py) installed into the same
+#     site-packages as gunicorn / Django.
+#   * `.coveragerc` placed at /backend/.coveragerc — the working
+#     directory the upstream image starts gunicorn from. With
+#     `COVERAGE_PROCESS_START=/backend/.coveragerc` exported into
+#     the container env (set in the compose overlay), every
+#     gunicorn worker that imports `coverage.process_startup` via
+#     site-packages will pick the rcfile up; combined with `parallel
+#     = true` and `sigterm = true` in the rcfile, this gives us
+#     real per-worker line coverage that flushes on SIGTERM.
+FROM doccano/doccano:backend
+
+USER root
+RUN pip install --no-cache-dir 'coverage[toml]==7.6.1'
+
+# Subprocess auto-start: a .pth file in site-packages is processed
+# at every Python startup, so each gunicorn worker that forks calls
+# coverage.process_startup() before any Django code runs. This is
+# the canonical way coverage.py instruments forked subprocesses
+# (see "Measuring sub-processes" in the coverage.py docs).
+RUN echo 'import coverage; coverage.process_startup()' \
+    > /usr/local/lib/python3.10/site-packages/coverage_subprocess.pth
+
+COPY .coveragerc /backend/.coveragerc
+RUN mkdir -p /coverage \
+    && chown -R doccano:doccano /coverage /backend/.coveragerc
+USER doccano