Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# Changelog

## [0.9.2] - 2026-05-10

### Fixed
- `Vcon.build_new()` now emits `"vcon": "0.4.0"` per `draft-ietf-vcon-vcon-core-02` §4.1.1. The field is deprecated in the draft but retained for parser compatibility; emitting it by default avoids surprises with strict parsers.
- `Vcon.build_new()` no longer initializes empty `"group": []` and `"redacted": {}`. The speckit reserves `group`, and downstream consumers no longer need to strip these defaults before serializing. Both fields are still populated lazily by their setters.

### Added
- `add_wtf_transcription_analysis()` — sibling helper to `add_wtf_transcription_attachment` that places the WTF transcription into `analysis[]` as a spec-shaped analysis entry (`type: "transcription"`, `vendor`, `product`, `schema`, `encoding: "json"`, JSON-stringified body). Use this when your pipeline treats transcripts as derived analysis output; use the existing `_attachment` helper for the canonical attachments[] placement shown in the speckit example.

## [0.9.0] - 2025-01-26

### 🎉 Major Release: Extension Framework and Privacy Compliance
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "vcon"
version = "0.9.1"
version = "0.9.2"
description = "The vCon library - Complete vCon 0.4.0 specification implementation"
authors = ["Thomas McCarthy-Howe <ghostofbasho@gmail.com>"]
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name="vcon",
version="0.9.0",
version="0.9.2",
author="Thomas McCarthy-Howe",
author_email="ghostofbasho@gmail.com",
description="A package for working with vCon containers",
Expand Down
70 changes: 67 additions & 3 deletions src/vcon/vcon.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,9 +453,8 @@ def build_new(
logger.debug(f"Generated UUID8: {uuid}")

vcon_dict = {
"vcon": "0.4.0",
"uuid": uuid,
"redacted": {},
"group": [],
"parties": [],
"dialog": [],
"attachments": [],
Expand Down Expand Up @@ -774,7 +773,72 @@ def add_wtf_transcription_attachment(
self.add_extension("wtf_transcription")

logger.info("Added WTF transcription attachment")


except ImportError:
raise RuntimeError("WTF extension not available")

WTF_SCHEMA_URL = "https://datatracker.ietf.org/doc/html/draft-howe-vcon-wtf-extension"

def add_wtf_transcription_analysis(
self,
transcript: Dict[str, Any],
segments: List[Dict[str, Any]],
metadata: Dict[str, Any],
dialog_index: Optional[int] = None,
**kwargs
) -> None:
"""
Add a WTF transcription as an analysis entry (vs. an attachment).

Use ``add_wtf_transcription_attachment`` for canonical placement
(matches the speckit example, attachments[]). Use this method if your
pipeline treats transcripts as derived analysis output and prefers
analysis[] placement.

Emits a spec-shaped analysis entry: ``type="transcription"``,
``vendor`` and ``product`` taken from ``metadata["provider"]`` /
``metadata["model"]``, ``schema`` set to the WTF draft URL,
``encoding="json"``, and ``body`` as a JSON-serialized WTF document.

Args:
transcript: Transcript information dictionary
segments: List of segment dictionaries
metadata: Metadata dictionary; ``provider`` and ``model`` keys
map to ``vendor`` and ``product`` on the analysis entry
dialog_index: Index of the dialog this transcription applies to
**kwargs: Additional WTF parameters (words, speakers, quality, etc.)
"""
if not EXTENSIONS_AVAILABLE:
raise RuntimeError("Extensions not available")

try:
from .extensions.wtf import WTFExtension
extension = WTFExtension()
wtf_attachment = extension.create_wtf_attachment(
transcript=transcript,
segments=segments,
metadata=metadata,
**kwargs
)

wtf_body = wtf_attachment["body"]
dialog_ref = [dialog_index] if dialog_index is not None else []

self.add_analysis(
type="transcription",
dialog=dialog_ref,
vendor=metadata.get("provider", "unknown"),
product=metadata.get("model"),
body=json.dumps(wtf_body),
encoding="json",
schema=self.WTF_SCHEMA_URL,
)

if "wtf_transcription" not in self.get_extensions():
self.add_extension("wtf_transcription")

logger.info("Added WTF transcription analysis entry")

except ImportError:
raise RuntimeError("WTF extension not available")

Expand Down
47 changes: 42 additions & 5 deletions tests/test_vcon.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,10 @@ def test_build_from_json() -> None:
def test_build_new() -> None:
vcon = Vcon.build_new()
assert vcon.uuid is not None
assert vcon.vcon is None # vcon field is now optional and not set by default
assert vcon.vcon == "0.4.0"
assert vcon.created_at is not None
assert "group" not in vcon.vcon_dict
assert "redacted" not in vcon.vcon_dict


def test_tags() -> None:
Expand Down Expand Up @@ -1368,11 +1370,14 @@ def test_version_field_optional_load_from_file(tmp_path) -> None:
assert "vcon" not in vcon.vcon_dict


def test_build_new_no_version_field() -> None:
"""Test that build_new creates vCons without version field."""
def test_build_new_sets_version_field() -> None:
"""build_new() sets vcon: '0.4.0' per draft-ietf-vcon-vcon-core-02 §4.1.1.

The field is deprecated in the draft but retained for parser compat;
the library emits it by default to avoid surprises with strict parsers.
"""
vcon = Vcon.build_new()
# Version field should not be automatically added
assert "vcon" not in vcon.vcon_dict
assert vcon.vcon_dict["vcon"] == "0.4.0"


def test_no_version_field_remains_absent() -> None:
Expand Down Expand Up @@ -1508,3 +1513,35 @@ def test_extensions_property_handling():
assert vcon.get_extensions() == ["video"]
assert vcon.get_critical() == ["encryption"]
assert vcon.vcon_dict.get("meta", {}).get("custom_field") == "value"


def test_build_new_emits_vcon_syntax_param() -> None:
"""build_new() should emit vcon: '0.4.0' and round-trip through JSON."""
vcon = Vcon.build_new()
serialized = vcon.to_json()
reloaded = Vcon.build_from_json(serialized)
assert reloaded.vcon_dict["vcon"] == "0.4.0"


def test_build_new_omits_empty_group_and_redacted() -> None:
"""build_new() should not seed empty group/redacted defaults."""
vcon = Vcon.build_new()
assert "group" not in vcon.vcon_dict
assert "redacted" not in vcon.vcon_dict


def test_is_valid_with_new_defaults() -> None:
"""A minimally-populated build_new() vCon should pass is_valid()."""
from vcon.party import Party
from vcon.dialog import Dialog

vcon = Vcon.build_new()
vcon.add_party(Party(name="Alice"))
vcon.add_dialog(Dialog(
type="recording",
start="2026-05-10T12:00:00Z",
parties=[0],
mediatype="audio/wav",
))
valid, errors = vcon.is_valid()
assert valid, f"Expected valid, got errors: {errors}"
66 changes: 65 additions & 1 deletion tests/test_wtf_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,7 +587,71 @@ def test_export_transcription(self):
srt_content = extension.export_transcription(attachment, "srt")
assert "1" in srt_content
assert "Hello world" in srt_content

vtt_content = extension.export_transcription(attachment, "vtt")
assert "WEBVTT" in vtt_content
assert "Hello world" in vtt_content


class TestWTFTranscriptionAnalysis:
"""Tests for the add_wtf_transcription_analysis() Vcon helper."""

def _payload(self):
return {
"transcript": {
"text": "Hello world",
"language": "en",
"duration": 2.0,
"confidence": 0.95,
},
"segments": [{
"id": 0,
"start": 0.0,
"end": 2.0,
"text": "Hello world",
"confidence": 0.95,
}],
"metadata": {
"created_at": "2026-05-10T00:00:00Z",
"processed_at": "2026-05-10T00:00:01Z",
"provider": "whisper",
"model": "whisper-1",
},
}

def test_add_wtf_transcription_analysis_basic(self):
from vcon import Vcon

vcon = Vcon.build_new()
vcon.add_wtf_transcription_analysis(**self._payload(), dialog_index=0)

assert len(vcon.vcon_dict["analysis"]) == 1
assert vcon.vcon_dict["attachments"] == []

entry = vcon.vcon_dict["analysis"][0]
assert entry["type"] == "transcription"
assert entry["vendor"] == "whisper"
assert entry["product"] == "whisper-1"
assert entry["encoding"] == "json"
assert entry["schema"] == Vcon.WTF_SCHEMA_URL
assert entry["dialog"] == [0]

body = json.loads(entry["body"])
assert body["transcript"]["text"] == "Hello world"

def test_add_wtf_transcription_analysis_registers_extension(self):
from vcon import Vcon

vcon = Vcon.build_new()
vcon.add_wtf_transcription_analysis(**self._payload())
assert "wtf_transcription" in vcon.get_extensions()

def test_attachment_helper_unchanged_regression(self):
"""Sanity: the existing _attachment helper still writes to attachments[]."""
from vcon import Vcon

vcon = Vcon.build_new()
vcon.add_wtf_transcription_attachment(**self._payload(), dialog_index=0)
assert len(vcon.vcon_dict["attachments"]) == 1
assert vcon.vcon_dict["analysis"] == []
assert vcon.vcon_dict["attachments"][0]["purpose"] == "wtf_transcription"
Loading