Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
env:
PYTEST_DISABLE_PLUGIN_AUTOLOAD: "1"
run: |
pytest tests/unit/ tests/test_security.py \
pytest tests/unit/ tests/test_security.py tests/test_voiceprint_db.py tests/test_job_service.py \
-p pytest_cov \
-v --tb=short --no-header \
--cov=app --cov-report=xml:coverage.xml \
Expand Down
21 changes: 16 additions & 5 deletions .github/workflows/claude-code-review.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ on:

permissions:
contents: read
id-token: write
pull-requests: write
issues: read

Expand All @@ -26,20 +27,30 @@ jobs:
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
ANTHROPIC_BASE_URL: ${{ secrets.ANTHROPIC_BASE_URL }}
GH_TOKEN_VALUE: ${{ secrets.GH_TOKEN }}
CLAUDE_MODEL: claude-sonnet-4-6
steps:
- name: Skip when ANTHROPIC_API_KEY is not configured
if: ${{ env.ANTHROPIC_API_KEY == '' }}
run: echo "ANTHROPIC_API_KEY is not configured; skipping Claude Code review."
- name: Skip when Claude secrets are not configured
if: ${{ env.ANTHROPIC_API_KEY == '' || env.ANTHROPIC_BASE_URL == '' || env.GH_TOKEN_VALUE == '' }}
run: echo "Claude Code review secrets are not configured; skipping Claude Code review."
- name: Checkout repository
if: ${{ env.ANTHROPIC_API_KEY != '' && env.ANTHROPIC_BASE_URL != '' && env.GH_TOKEN_VALUE != '' }}
uses: actions/checkout@v4
with:
fetch-depth: 1
persist-credentials: false
- name: Run Claude Code review
if: ${{ env.ANTHROPIC_API_KEY != '' }}
if: ${{ env.ANTHROPIC_API_KEY != '' && env.ANTHROPIC_BASE_URL != '' && env.GH_TOKEN_VALUE != '' }}
uses: anthropics/claude-code-action@v1
with:
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
github_token: ${{ secrets.GH_TOKEN }}
prompt: |
Review this pull request using REVIEW.md as the review-only guide.
Focus on actionable VoScript risks: privacy/security leaks, model lifecycle races,
GPU/CPU fallback behavior, HTTP API compatibility, regression-test coverage, and
synchronized English/Chinese documentation. Avoid formatting-only comments.
claude_args: "--max-turns 5"
claude_args: |
--model ${{ env.CLAUDE_MODEL }}
env:
ANTHROPIC_BASE_URL: ${{ secrets.ANTHROPIC_BASE_URL }}
6 changes: 4 additions & 2 deletions codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ coverage:
status:
project:
default:
target: 70%
target: 90%
threshold: 0%
patch:
default:
target: 60%
target: 80%
threshold: 0%

ignore:
- "tests/**"
Expand Down
298 changes: 298 additions & 0 deletions tests/unit/test_api_route_coverage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,298 @@
"""Endpoint edge coverage for transcription and voiceprint routers."""

from __future__ import annotations

import json
from pathlib import Path

import numpy as np


def _seed_result(transcriptions_dir: Path, tr_id: str, *, filename: str = "audio.wav"):
tr_dir = transcriptions_dir / tr_id
tr_dir.mkdir(parents=True, exist_ok=True)
payload = {
"id": tr_id,
"filename": filename,
"created_at": "2026-04-25T00:00:00+00:00",
"segments": [
{
"id": 1,
"start": None,
"end": float("nan"),
"speaker_label": "SPEAKER_00",
"speaker_name": "Maple\nInjected",
"speaker_id": "spk_old",
"text": "hello",
},
{
"id": 2,
"start": 61.25,
"end": 62.0,
"speaker_label": "SPEAKER_01",
"speaker_name": "Guest",
"speaker_id": None,
"text": "world",
},
],
"unique_speakers": ["Maple\nInjected", "Guest"],
"speaker_map": {},
}
result_path = tr_dir / "result.json"
result_path.write_text(json.dumps(payload), encoding="utf-8")
return result_path


def test_transcription_job_status_fallback_paths(app_client):
import api.routers.transcriptions as router

router.jobs["tr_memory_done"] = {
"status": "completed",
"filename": "done.wav",
"result": {"id": "tr_memory_done"},
}
router.jobs["tr_memory_failed"] = {
"status": "failed",
"filename": "failed.wav",
"error": "boom",
}

done = app_client.get("/api/jobs/tr_memory_done")
assert done.status_code == 200
assert done.json()["result"] == {"id": "tr_memory_done"}

failed = app_client.get("/api/jobs/tr_memory_failed")
assert failed.status_code == 200
assert failed.json()["error"] == "boom"

completed_dir = router.TRANSCRIPTIONS_DIR / "tr_disk_done"
completed_dir.mkdir(parents=True)
(completed_dir / "status.json").write_text(
json.dumps({"status": "completed", "filename": "disk.wav"}),
encoding="utf-8",
)
(completed_dir / "result.json").write_text("{bad-json", encoding="utf-8")
disk_done = app_client.get("/api/jobs/tr_disk_done")
assert disk_done.status_code == 200
assert disk_done.json()["result"] is None

queued_dir = router.TRANSCRIPTIONS_DIR / "tr_disk_queued"
queued_dir.mkdir(parents=True)
(queued_dir / "status.json").write_text(
json.dumps({"status": "queued", "filename": "queued.wav"}),
encoding="utf-8",
)
disk_queued = app_client.get("/api/jobs/tr_disk_queued")
assert disk_queued.status_code == 200
assert disk_queued.json()["status"] == "failed"

failed_dir = router.TRANSCRIPTIONS_DIR / "tr_disk_failed"
failed_dir.mkdir(parents=True)
(failed_dir / "status.json").write_text(
json.dumps({"status": "failed", "filename": "failed.wav", "error": "bad"}),
encoding="utf-8",
)
disk_failed = app_client.get("/api/jobs/tr_disk_failed")
assert disk_failed.status_code == 200
assert disk_failed.json()["error"] == "bad"


def test_transcription_list_audio_export_and_reassign_paths(app_client):
import api.routers.transcriptions as router

tr_id = "tr_route_edges"
_seed_result(router.TRANSCRIPTIONS_DIR, tr_id, filename="route_audio.wav")
bad_dir = router.TRANSCRIPTIONS_DIR / "tr_bad_listing"
bad_dir.mkdir(parents=True)
(bad_dir / "result.json").write_text("{bad-json", encoding="utf-8")

listing = app_client.get("/api/transcriptions")
assert listing.status_code == 200
assert any(
row["id"] == tr_id and row["segment_count"] == 2 for row in listing.json()
)

missing_audio = app_client.get(f"/api/transcriptions/{tr_id}/audio")
assert missing_audio.status_code == 404

(router.UPLOADS_DIR / "route_audio.wav").write_bytes(b"audio")
audio = app_client.get(f"/api/transcriptions/{tr_id}/audio")
assert audio.status_code == 200
assert audio.content == b"audio"

srt = app_client.get(f"/api/export/{tr_id}?format=srt")
assert srt.status_code == 200
assert "00:00:00,000 --> 00:00:00,000" in srt.text
assert "[Maple Injected] hello" in srt.text

txt = app_client.get(f"/api/export/{tr_id}?format=txt")
assert txt.status_code == 200
assert "[01:01] Guest: world" in txt.text

exported_json = app_client.get(f"/api/export/{tr_id}?format=json")
assert exported_json.status_code == 200
assert exported_json.json()["id"] == tr_id

unsupported = app_client.get(f"/api/export/{tr_id}?format=vtt")
assert unsupported.status_code == 400

invalid_id = app_client.put(
f"/api/transcriptions/{tr_id}/segments/1/speaker",
data={"speaker_name": "Maple", "speaker_id": "not-safe"},
)
assert invalid_id.status_code == 422

class FakeDB:
def __init__(self, found):
self.found = found

def get_speaker(self, speaker_id):
return {"id": speaker_id} if self.found else None

app_client.app.state.db = FakeDB(found=False)
missing_voiceprint = app_client.put(
f"/api/transcriptions/{tr_id}/segments/1/speaker",
data={"speaker_name": "Maple", "speaker_id": "spk_missing"},
)
assert missing_voiceprint.status_code == 404

app_client.app.state.db = FakeDB(found=True)
updated = app_client.put(
f"/api/transcriptions/{tr_id}/segments/1/speaker",
data={"speaker_name": "Maple", "speaker_id": "spk_known"},
)
assert updated.status_code == 200

cleared = app_client.put(
f"/api/transcriptions/{tr_id}/segments/2/speaker",
data={"speaker_name": "Maple"},
)
assert cleared.status_code == 200

result = json.loads((router.TRANSCRIPTIONS_DIR / tr_id / "result.json").read_text())
assert result["segments"][0]["speaker_id"] == "spk_known"
assert result["segments"][1]["speaker_id"] is None
assert result["unique_speakers"] == ["Maple"]

missing_segment = app_client.put(
f"/api/transcriptions/{tr_id}/segments/99/speaker",
data={"speaker_name": "Nobody"},
)
assert missing_segment.status_code == 404


def test_voiceprint_management_routes(app_client):
import api.routers.voiceprints as router

class FakeDB:
def __init__(self):
self.speakers = {}
self.updated = []
self.deleted = []
self.renamed = []
self.cohort_path = None
self.last_cohort_skipped = 3

def add_speaker(self, name, embedding):
assert embedding.shape == (3,)
self.speakers["spk_new"] = {"id": "spk_new", "name": name}
return "spk_new"

def update_speaker(self, speaker_id, embedding, name=None):
self.updated.append((speaker_id, name, tuple(embedding.tolist())))

def list_speakers(self):
return list(self.speakers.values())

def get_speaker(self, speaker_id):
return self.speakers.get(speaker_id)

def delete_speaker(self, speaker_id):
if speaker_id not in self.speakers:
raise ValueError("missing speaker")
self.deleted.append(speaker_id)
self.speakers.pop(speaker_id)

def rename_speaker(self, speaker_id, name):
if speaker_id not in self.speakers:
raise ValueError("missing speaker")
self.renamed.append((speaker_id, name))
self.speakers[speaker_id]["name"] = name

def build_cohort_from_transcriptions(self, transcriptions_dir):
assert transcriptions_dir == str(router.TRANSCRIPTIONS_DIR)
return 7

fake_db = FakeDB()
app_client.app.state.db = fake_db

missing = app_client.post(
"/api/voiceprints/enroll",
data={
"tr_id": "tr_voiceprint",
"speaker_label": "SPEAKER_00",
"speaker_name": "Maple",
},
)
assert missing.status_code == 404

tr_dir = router.safe_tr_dir("tr_voiceprint")
tr_dir.mkdir(parents=True, exist_ok=True)
np.save(tr_dir / "emb_SPEAKER_00.npy", np.array([1.0, 2.0, 3.0], dtype=np.float32))

created = app_client.post(
"/api/voiceprints/enroll",
data={
"tr_id": "tr_voiceprint",
"speaker_label": "SPEAKER_00",
"speaker_name": "Maple",
},
)
assert created.status_code == 200
assert created.json() == {"action": "created", "speaker_id": "spk_new"}

updated = app_client.post(
"/api/voiceprints/enroll",
data={
"tr_id": "tr_voiceprint",
"speaker_label": "SPEAKER_00",
"speaker_name": "Maple Updated",
"speaker_id": "spk_new",
},
)
assert updated.status_code == 200
assert updated.json() == {"action": "updated", "speaker_id": "spk_new"}
assert fake_db.updated == [("spk_new", "Maple Updated", (1.0, 2.0, 3.0))]

listing = app_client.get("/api/voiceprints")
assert listing.status_code == 200
assert listing.json() == [{"id": "spk_new", "name": "Maple"}]

found = app_client.get("/api/voiceprints/spk_new")
assert found.status_code == 200
assert found.json()["name"] == "Maple"

missing_get = app_client.get("/api/voiceprints/spk_missing")
assert missing_get.status_code == 404

renamed = app_client.put("/api/voiceprints/spk_new/name", data={"name": "Renamed"})
assert renamed.status_code == 200
assert fake_db.renamed == [("spk_new", "Renamed")]

missing_rename = app_client.put(
"/api/voiceprints/spk_missing/name", data={"name": "Missing"}
)
assert missing_rename.status_code == 404

cohort = app_client.post("/api/voiceprints/rebuild-cohort")
assert cohort.status_code == 200
assert cohort.json()["cohort_size"] == 7
assert cohort.json()["skipped"] == 3
assert cohort.json()["saved_to"].endswith("asnorm_cohort.npy")

deleted = app_client.delete("/api/voiceprints/spk_new")
assert deleted.status_code == 200
assert fake_db.deleted == ["spk_new"]

missing_delete = app_client.delete("/api/voiceprints/spk_missing")
assert missing_delete.status_code == 404
Loading
Loading