Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/buildcompiler/inventory/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
"""Package scaffolding for clean architecture."""
"""Inventory package exports for deterministic lookup/indexing contracts."""

from .inventory import Inventory

__all__ = ["Inventory"]
202 changes: 202 additions & 0 deletions src/buildcompiler/inventory/inventory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
"""Normalized inventory facade with eager deterministic indexes."""

from __future__ import annotations

from collections import defaultdict

from buildcompiler.domain import (
BuildStage,
IndexedBackbone,
IndexedPlasmid,
IndexedReagent,
MaterialState,
)


_MATERIAL_ORDER = {
MaterialState.PLANNED: 0,
MaterialState.GENERATED: 1,
MaterialState.ASSEMBLED: 2,
MaterialState.TRANSFORMED: 3,
MaterialState.PLATED: 4,
}


class Inventory:
def __init__(
self,
*,
plasmids: list[IndexedPlasmid] | None = None,
backbones: list[IndexedBackbone] | None = None,
reagents: list[IndexedReagent] | None = None,
) -> None:
self.plasmids_by_identity: dict[str, IndexedPlasmid] = {}
self.plasmids_by_insert_identity: dict[str, list[IndexedPlasmid]] = defaultdict(list)
self.plasmids_by_fusion_sites: dict[tuple[str, ...], list[IndexedPlasmid]] = defaultdict(list)
self.plasmids_by_antibiotic: dict[str, list[IndexedPlasmid]] = defaultdict(list)

self.backbones_by_identity: dict[str, IndexedBackbone] = {}
self.backbones_by_fusion_sites_and_antibiotic: dict[
tuple[tuple[str, ...], str], list[IndexedBackbone]
] = defaultdict(list)

self.reagents_by_identity: dict[str, IndexedReagent] = {}
self.reagents_by_name: dict[str, IndexedReagent] = {}

self.generated_products_by_identity: dict[str, IndexedPlasmid] = {}

for plasmid in plasmids or []:
self._add_plasmid(plasmid)
for backbone in backbones or []:
self._add_backbone(backbone)
for reagent in reagents or []:
self._add_reagent(reagent)

def _sorted_plasmids(self, items: list[IndexedPlasmid]) -> list[IndexedPlasmid]:
return sorted(items, key=lambda p: p.identity)

def _backbone_stage(self, backbone: IndexedBackbone) -> BuildStage | None:
raw = backbone.metadata.get("stage") if backbone.metadata else None
if raw is None:
return None
if isinstance(raw, BuildStage):
return raw
try:
return BuildStage(raw)
except ValueError:
return None

def _remove_plasmid_from_secondary_indexes(self, plasmid: IndexedPlasmid) -> None:
for insert_identity in sorted(plasmid.metadata.get("insert_identities", [])):
existing = self.plasmids_by_insert_identity.get(insert_identity, [])
filtered = [indexed for indexed in existing if indexed.identity != plasmid.identity]
if filtered:
self.plasmids_by_insert_identity[insert_identity] = filtered
else:
self.plasmids_by_insert_identity.pop(insert_identity, None)

fusion_sites = tuple(plasmid.metadata.get("fusion_sites", ()))
if fusion_sites:
existing = self.plasmids_by_fusion_sites.get(fusion_sites, [])
filtered = [indexed for indexed in existing if indexed.identity != plasmid.identity]
if filtered:
self.plasmids_by_fusion_sites[fusion_sites] = filtered
else:
self.plasmids_by_fusion_sites.pop(fusion_sites, None)

antibiotic = plasmid.metadata.get("antibiotic")
if antibiotic:
existing = self.plasmids_by_antibiotic.get(antibiotic, [])
filtered = [indexed for indexed in existing if indexed.identity != plasmid.identity]
if filtered:
self.plasmids_by_antibiotic[antibiotic] = filtered
else:
self.plasmids_by_antibiotic.pop(antibiotic, None)

def _add_plasmid(self, plasmid: IndexedPlasmid) -> None:
existing = self.plasmids_by_identity.get(plasmid.identity)
if existing is not None:
self._remove_plasmid_from_secondary_indexes(existing)

self.plasmids_by_identity[plasmid.identity] = plasmid
for insert_identity in sorted(plasmid.metadata.get("insert_identities", [])):
self.plasmids_by_insert_identity[insert_identity].append(plasmid)
self.plasmids_by_insert_identity[insert_identity] = self._sorted_plasmids(
self.plasmids_by_insert_identity[insert_identity]
)

fusion_sites = tuple(plasmid.metadata.get("fusion_sites", ()))
if fusion_sites:
self.plasmids_by_fusion_sites[fusion_sites].append(plasmid)
self.plasmids_by_fusion_sites[fusion_sites] = self._sorted_plasmids(
self.plasmids_by_fusion_sites[fusion_sites]
)

antibiotic = plasmid.metadata.get("antibiotic")
if antibiotic:
self.plasmids_by_antibiotic[antibiotic].append(plasmid)
self.plasmids_by_antibiotic[antibiotic] = self._sorted_plasmids(
self.plasmids_by_antibiotic[antibiotic]
)

def _add_backbone(self, backbone: IndexedBackbone) -> None:
self.backbones_by_identity[backbone.identity] = backbone
fusion_sites = tuple(backbone.metadata.get("fusion_sites", ()))
antibiotic = backbone.metadata.get("antibiotic")
if fusion_sites and antibiotic:
key = (fusion_sites, antibiotic)
self.backbones_by_fusion_sites_and_antibiotic[key].append(backbone)
self.backbones_by_fusion_sites_and_antibiotic[key] = sorted(
self.backbones_by_fusion_sites_and_antibiotic[key],
key=lambda b: b.identity,
)

def _add_reagent(self, reagent: IndexedReagent) -> None:
self.reagents_by_identity[reagent.identity] = reagent
if reagent.name:
self.reagents_by_name[reagent.name] = reagent

def find_single_part_plasmids(
self, part_identity: str, *, antibiotic: str | None = None
) -> list[IndexedPlasmid]:
matches = list(self.plasmids_by_insert_identity.get(part_identity, []))
if antibiotic is not None:
matches = [p for p in matches if p.metadata.get("antibiotic") == antibiotic]
return self._sorted_plasmids(matches)

def find_lvl1_region_plasmids(
self,
region_identity: str,
*,
min_material_state: MaterialState = MaterialState.PLANNED,
) -> list[IndexedPlasmid]:
matches = self.plasmids_by_insert_identity.get(region_identity, [])
min_rank = _MATERIAL_ORDER[min_material_state]
filtered = [p for p in matches if _MATERIAL_ORDER[p.state] >= min_rank]
return self._sorted_plasmids(filtered)

def find_backbone(
self,
*,
fusion_sites: tuple[str, ...] | None = None,
antibiotic: str | None = None,
stage: BuildStage | None = None,
) -> IndexedBackbone | None:
if fusion_sites is not None and antibiotic is not None:
candidates = list(
self.backbones_by_fusion_sites_and_antibiotic.get(
(tuple(fusion_sites), antibiotic), []
)
)
else:
candidates = sorted(self.backbones_by_identity.values(), key=lambda b: b.identity)
if fusion_sites is not None:
candidates = [
b for b in candidates if tuple(b.metadata.get("fusion_sites", ())) == tuple(fusion_sites)
]
if antibiotic is not None:
candidates = [b for b in candidates if b.metadata.get("antibiotic") == antibiotic]
if stage is not None:
candidates = [b for b in candidates if self._backbone_stage(b) == stage]
return candidates[0] if candidates else None

def find_restriction_enzyme(self, name: str) -> IndexedReagent | None:
reagent = self.reagents_by_name.get(name)
if reagent and reagent.reagent_type == "restriction_enzyme":
return reagent
return None

def find_ligase(self, preferred: str | None = None) -> IndexedReagent | None:
if preferred:
reagent = self.reagents_by_name.get(preferred)
if reagent and reagent.reagent_type == "ligase":
return reagent
ligases = sorted(
(r for r in self.reagents_by_identity.values() if r.reagent_type == "ligase"),
key=lambda r: r.identity,
)
return ligases[0] if ligases else None

def add_generated_product(self, product: IndexedPlasmid) -> None:
self.generated_products_by_identity[product.identity] = product
self._add_plasmid(product)
Comment on lines +200 to +202
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Remove stale index entries before re-adding a product

add_generated_product updates identity maps but then blindly calls _add_plasmid, which appends to all secondary lists without clearing any existing entry for the same product.identity. If the same product is indexed again (e.g., retries or state/metadata updates), lookups like find_single_part_plasmids and find_lvl1_region_plasmids can return duplicate or stale plasmid records, which can cascade into duplicate planning/execution actions.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@codex good catch solve this issue

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Summary

  • Fixed stale/duplicate plasmid secondary-index behavior by adding _remove_plasmid_from_secondary_indexes, which removes old insert/fusion-site/antibiotic mappings for an existing plasmid identity before reindexing. This ensures deterministic replacement instead of append-only duplication when the same generated product is added again. src/buildcompiler/inventory/inventory.pyL69-L120
  • Updated _add_plasmid to detect existing identity entries and clear their old secondary-index references before inserting updated data, while keeping identity map behavior unchanged. src/buildcompiler/inventory/inventory.pyL96-L120
  • Added a regression test that re-adds the same generated product identity with changed metadata and validates old index keys are cleared and new keys contain only the updated record. tests/unit/inventory/test_inventory_indexes.pyL73-L100

Testing

  • pytest tests/unit/inventory/test_inventory_indexes.py

View task →

6 changes: 5 additions & 1 deletion src/buildcompiler/sbol/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
"""Package scaffolding for clean architecture."""
"""SBOL package exports for clean architecture contracts."""

from .resolver import PullPolicy, SbolResolver

__all__ = ["PullPolicy", "SbolResolver"]
74 changes: 74 additions & 0 deletions src/buildcompiler/sbol/resolver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""SBOL document resolver with deterministic pull policy."""

from __future__ import annotations

from enum import Enum
from typing import Any, Callable

import sbol2


class PullPolicy(str, Enum):
"""Resolver behavior for remote pull attempts."""

NEVER = "never"
MISSING_ONLY = "missing_only"
ALWAYS_REFRESH = "always_refresh"


class SbolResolver:
"""Resolve SBOL objects by identity from a local document with optional pull fallback."""

def __init__(
self,
document: sbol2.Document,
*,
pull_policy: PullPolicy = PullPolicy.MISSING_ONLY,
pull_client: Callable[[str], Any] | None = None,
) -> None:
self.document = document
self.pull_policy = pull_policy
self.pull_client = pull_client

def maybe_pull(self, identity: str) -> Any | None:
if self.pull_policy == PullPolicy.NEVER:
return None
if self.pull_client is None:
return None
return self.pull_client(identity)

def _get(self, identity: str, expected_type: type) -> Any:
if self.pull_policy == PullPolicy.ALWAYS_REFRESH:
pulled = self.maybe_pull(identity)
if isinstance(pulled, expected_type):
return pulled

obj = self.document.find(identity)
if isinstance(obj, expected_type):
return obj

if self.pull_policy == PullPolicy.MISSING_ONLY:
pulled = self.maybe_pull(identity)
if isinstance(pulled, expected_type):
return pulled
obj = self.document.find(identity)
if isinstance(obj, expected_type):
return obj

raise LookupError(
f"Could not resolve {expected_type.__name__} with identity '{identity}'"
)

def get_component(self, identity: str) -> sbol2.ComponentDefinition:
return self._get(identity, sbol2.ComponentDefinition)

def get_module(self, identity: str) -> sbol2.ModuleDefinition:
return self._get(identity, sbol2.ModuleDefinition)

def get_combinatorial_derivation(
self, identity: str
) -> sbol2.CombinatorialDerivation:
return self._get(identity, sbol2.CombinatorialDerivation)

def get_implementation(self, identity: str) -> sbol2.Implementation:
return self._get(identity, sbol2.Implementation)
100 changes: 100 additions & 0 deletions tests/unit/inventory/test_inventory_indexes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from buildcompiler.domain import BuildStage, IndexedBackbone, IndexedPlasmid, IndexedReagent, MaterialState
from buildcompiler.inventory import Inventory


def _plasmid(identity: str, inserts: list[str], fusion_sites=("A", "B"), antibiotic="Ampicillin", state=MaterialState.PLANNED):
return IndexedPlasmid(
identity=identity,
display_id=identity.rsplit("/", 1)[-1],
state=state,
metadata={
"insert_identities": inserts,
"fusion_sites": fusion_sites,
"antibiotic": antibiotic,
},
)


def test_inventory_indexes_and_queries_are_deterministic():
p2 = _plasmid("https://example.org/p2", ["https://example.org/partA"], state=MaterialState.GENERATED)
p1 = _plasmid("https://example.org/p1", ["https://example.org/partA", "https://example.org/region1"])

b1 = IndexedBackbone(
identity="https://example.org/b1",
metadata={"fusion_sites": ("A", "B"), "antibiotic": "Ampicillin", "stage": BuildStage.ASSEMBLY_LVL1.value},
)
b2 = IndexedBackbone(
identity="https://example.org/b2",
metadata={"fusion_sites": ("A", "B"), "antibiotic": "Ampicillin", "stage": BuildStage.ASSEMBLY_LVL2.value},
)

e1 = IndexedReagent(identity="https://example.org/r1", name="BsaI", reagent_type="restriction_enzyme")
l1 = IndexedReagent(identity="https://example.org/r2", name="T4_DNA_ligase", reagent_type="ligase")

inv = Inventory(plasmids=[p2, p1], backbones=[b2, b1], reagents=[e1, l1])

assert inv.plasmids_by_identity[p1.identity] == p1
assert [p.identity for p in inv.plasmids_by_insert_identity["https://example.org/partA"]] == [p1.identity, p2.identity]
assert [p.identity for p in inv.plasmids_by_fusion_sites[("A", "B")]] == [p1.identity, p2.identity]
assert [p.identity for p in inv.plasmids_by_antibiotic["Ampicillin"]] == [p1.identity, p2.identity]

key = (("A", "B"), "Ampicillin")
assert [b.identity for b in inv.backbones_by_fusion_sites_and_antibiotic[key]] == [b1.identity, b2.identity]
assert inv.find_backbone(fusion_sites=("A", "B"), antibiotic="Ampicillin", stage=BuildStage.ASSEMBLY_LVL1) == b1

assert inv.find_restriction_enzyme("BsaI") == e1
assert inv.find_ligase("T4_DNA_ligase") == l1
assert inv.find_ligase().identity == l1.identity

assert [p.identity for p in inv.find_single_part_plasmids("https://example.org/partA")] == [p1.identity, p2.identity]
assert [p.identity for p in inv.find_lvl1_region_plasmids("https://example.org/region1")] == [p1.identity]
assert inv.find_lvl1_region_plasmids("https://example.org/partA", min_material_state=MaterialState.GENERATED) == [p2]


def test_add_generated_product_updates_indexes_immediately():
inv = Inventory()
product = _plasmid(
"https://example.org/generated1",
["https://example.org/partG"],
fusion_sites=("C", "D"),
antibiotic="Kanamycin",
state=MaterialState.GENERATED,
)

inv.add_generated_product(product)

assert inv.generated_products_by_identity[product.identity] == product
assert inv.plasmids_by_identity[product.identity] == product
assert inv.find_single_part_plasmids("https://example.org/partG") == [product]
assert inv.plasmids_by_fusion_sites[("C", "D")] == [product]
assert inv.plasmids_by_antibiotic["Kanamycin"] == [product]


def test_add_generated_product_replaces_existing_secondary_indexes():
inv = Inventory()
original = _plasmid(
"https://example.org/generated2",
["https://example.org/partOld"],
fusion_sites=("A", "B"),
antibiotic="Ampicillin",
state=MaterialState.GENERATED,
)
updated = _plasmid(
"https://example.org/generated2",
["https://example.org/partNew"],
fusion_sites=("C", "D"),
antibiotic="Kanamycin",
state=MaterialState.ASSEMBLED,
)

inv.add_generated_product(original)
inv.add_generated_product(updated)

assert inv.plasmids_by_identity[updated.identity] == updated
assert inv.generated_products_by_identity[updated.identity] == updated
assert inv.find_single_part_plasmids("https://example.org/partOld") == []
assert inv.find_single_part_plasmids("https://example.org/partNew") == [updated]
assert inv.plasmids_by_fusion_sites.get(("A", "B"), []) == []
assert inv.plasmids_by_fusion_sites[("C", "D")] == [updated]
assert inv.plasmids_by_antibiotic.get("Ampicillin", []) == []
assert inv.plasmids_by_antibiotic["Kanamycin"] == [updated]
Loading
Loading