-
Notifications
You must be signed in to change notification settings - Fork 1
[ISSUE-05] Implement SBOL resolver and inventory foundation #72
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Gonza10V
merged 3 commits into
full_build
from
codex/implement-sbol-resolver-and-inventory-foundation-i2plk2
May 5, 2026
Merged
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1 +1,5 @@ | ||
| """Package scaffolding for clean architecture.""" | ||
| """Inventory package exports for deterministic lookup/indexing contracts.""" | ||
|
|
||
| from .inventory import Inventory | ||
|
|
||
| __all__ = ["Inventory"] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,202 @@ | ||
| """Normalized inventory facade with eager deterministic indexes.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| from collections import defaultdict | ||
|
|
||
| from buildcompiler.domain import ( | ||
| BuildStage, | ||
| IndexedBackbone, | ||
| IndexedPlasmid, | ||
| IndexedReagent, | ||
| MaterialState, | ||
| ) | ||
|
|
||
|
|
||
| _MATERIAL_ORDER = { | ||
| MaterialState.PLANNED: 0, | ||
| MaterialState.GENERATED: 1, | ||
| MaterialState.ASSEMBLED: 2, | ||
| MaterialState.TRANSFORMED: 3, | ||
| MaterialState.PLATED: 4, | ||
| } | ||
|
|
||
|
|
||
| class Inventory: | ||
| def __init__( | ||
| self, | ||
| *, | ||
| plasmids: list[IndexedPlasmid] | None = None, | ||
| backbones: list[IndexedBackbone] | None = None, | ||
| reagents: list[IndexedReagent] | None = None, | ||
| ) -> None: | ||
| self.plasmids_by_identity: dict[str, IndexedPlasmid] = {} | ||
| self.plasmids_by_insert_identity: dict[str, list[IndexedPlasmid]] = defaultdict(list) | ||
| self.plasmids_by_fusion_sites: dict[tuple[str, ...], list[IndexedPlasmid]] = defaultdict(list) | ||
| self.plasmids_by_antibiotic: dict[str, list[IndexedPlasmid]] = defaultdict(list) | ||
|
|
||
| self.backbones_by_identity: dict[str, IndexedBackbone] = {} | ||
| self.backbones_by_fusion_sites_and_antibiotic: dict[ | ||
| tuple[tuple[str, ...], str], list[IndexedBackbone] | ||
| ] = defaultdict(list) | ||
|
|
||
| self.reagents_by_identity: dict[str, IndexedReagent] = {} | ||
| self.reagents_by_name: dict[str, IndexedReagent] = {} | ||
|
|
||
| self.generated_products_by_identity: dict[str, IndexedPlasmid] = {} | ||
|
|
||
| for plasmid in plasmids or []: | ||
| self._add_plasmid(plasmid) | ||
| for backbone in backbones or []: | ||
| self._add_backbone(backbone) | ||
| for reagent in reagents or []: | ||
| self._add_reagent(reagent) | ||
|
|
||
| def _sorted_plasmids(self, items: list[IndexedPlasmid]) -> list[IndexedPlasmid]: | ||
| return sorted(items, key=lambda p: p.identity) | ||
|
|
||
| def _backbone_stage(self, backbone: IndexedBackbone) -> BuildStage | None: | ||
| raw = backbone.metadata.get("stage") if backbone.metadata else None | ||
| if raw is None: | ||
| return None | ||
| if isinstance(raw, BuildStage): | ||
| return raw | ||
| try: | ||
| return BuildStage(raw) | ||
| except ValueError: | ||
| return None | ||
|
|
||
| def _remove_plasmid_from_secondary_indexes(self, plasmid: IndexedPlasmid) -> None: | ||
| for insert_identity in sorted(plasmid.metadata.get("insert_identities", [])): | ||
| existing = self.plasmids_by_insert_identity.get(insert_identity, []) | ||
| filtered = [indexed for indexed in existing if indexed.identity != plasmid.identity] | ||
| if filtered: | ||
| self.plasmids_by_insert_identity[insert_identity] = filtered | ||
| else: | ||
| self.plasmids_by_insert_identity.pop(insert_identity, None) | ||
|
|
||
| fusion_sites = tuple(plasmid.metadata.get("fusion_sites", ())) | ||
| if fusion_sites: | ||
| existing = self.plasmids_by_fusion_sites.get(fusion_sites, []) | ||
| filtered = [indexed for indexed in existing if indexed.identity != plasmid.identity] | ||
| if filtered: | ||
| self.plasmids_by_fusion_sites[fusion_sites] = filtered | ||
| else: | ||
| self.plasmids_by_fusion_sites.pop(fusion_sites, None) | ||
|
|
||
| antibiotic = plasmid.metadata.get("antibiotic") | ||
| if antibiotic: | ||
| existing = self.plasmids_by_antibiotic.get(antibiotic, []) | ||
| filtered = [indexed for indexed in existing if indexed.identity != plasmid.identity] | ||
| if filtered: | ||
| self.plasmids_by_antibiotic[antibiotic] = filtered | ||
| else: | ||
| self.plasmids_by_antibiotic.pop(antibiotic, None) | ||
|
|
||
| def _add_plasmid(self, plasmid: IndexedPlasmid) -> None: | ||
| existing = self.plasmids_by_identity.get(plasmid.identity) | ||
| if existing is not None: | ||
| self._remove_plasmid_from_secondary_indexes(existing) | ||
|
|
||
| self.plasmids_by_identity[plasmid.identity] = plasmid | ||
| for insert_identity in sorted(plasmid.metadata.get("insert_identities", [])): | ||
| self.plasmids_by_insert_identity[insert_identity].append(plasmid) | ||
| self.plasmids_by_insert_identity[insert_identity] = self._sorted_plasmids( | ||
| self.plasmids_by_insert_identity[insert_identity] | ||
| ) | ||
|
|
||
| fusion_sites = tuple(plasmid.metadata.get("fusion_sites", ())) | ||
| if fusion_sites: | ||
| self.plasmids_by_fusion_sites[fusion_sites].append(plasmid) | ||
| self.plasmids_by_fusion_sites[fusion_sites] = self._sorted_plasmids( | ||
| self.plasmids_by_fusion_sites[fusion_sites] | ||
| ) | ||
|
|
||
| antibiotic = plasmid.metadata.get("antibiotic") | ||
| if antibiotic: | ||
| self.plasmids_by_antibiotic[antibiotic].append(plasmid) | ||
| self.plasmids_by_antibiotic[antibiotic] = self._sorted_plasmids( | ||
| self.plasmids_by_antibiotic[antibiotic] | ||
| ) | ||
|
|
||
| def _add_backbone(self, backbone: IndexedBackbone) -> None: | ||
| self.backbones_by_identity[backbone.identity] = backbone | ||
| fusion_sites = tuple(backbone.metadata.get("fusion_sites", ())) | ||
| antibiotic = backbone.metadata.get("antibiotic") | ||
| if fusion_sites and antibiotic: | ||
| key = (fusion_sites, antibiotic) | ||
| self.backbones_by_fusion_sites_and_antibiotic[key].append(backbone) | ||
| self.backbones_by_fusion_sites_and_antibiotic[key] = sorted( | ||
| self.backbones_by_fusion_sites_and_antibiotic[key], | ||
| key=lambda b: b.identity, | ||
| ) | ||
|
|
||
| def _add_reagent(self, reagent: IndexedReagent) -> None: | ||
| self.reagents_by_identity[reagent.identity] = reagent | ||
| if reagent.name: | ||
| self.reagents_by_name[reagent.name] = reagent | ||
|
|
||
| def find_single_part_plasmids( | ||
| self, part_identity: str, *, antibiotic: str | None = None | ||
| ) -> list[IndexedPlasmid]: | ||
| matches = list(self.plasmids_by_insert_identity.get(part_identity, [])) | ||
| if antibiotic is not None: | ||
| matches = [p for p in matches if p.metadata.get("antibiotic") == antibiotic] | ||
| return self._sorted_plasmids(matches) | ||
|
|
||
| def find_lvl1_region_plasmids( | ||
| self, | ||
| region_identity: str, | ||
| *, | ||
| min_material_state: MaterialState = MaterialState.PLANNED, | ||
| ) -> list[IndexedPlasmid]: | ||
| matches = self.plasmids_by_insert_identity.get(region_identity, []) | ||
| min_rank = _MATERIAL_ORDER[min_material_state] | ||
| filtered = [p for p in matches if _MATERIAL_ORDER[p.state] >= min_rank] | ||
| return self._sorted_plasmids(filtered) | ||
|
|
||
| def find_backbone( | ||
| self, | ||
| *, | ||
| fusion_sites: tuple[str, ...] | None = None, | ||
| antibiotic: str | None = None, | ||
| stage: BuildStage | None = None, | ||
| ) -> IndexedBackbone | None: | ||
| if fusion_sites is not None and antibiotic is not None: | ||
| candidates = list( | ||
| self.backbones_by_fusion_sites_and_antibiotic.get( | ||
| (tuple(fusion_sites), antibiotic), [] | ||
| ) | ||
| ) | ||
| else: | ||
| candidates = sorted(self.backbones_by_identity.values(), key=lambda b: b.identity) | ||
| if fusion_sites is not None: | ||
| candidates = [ | ||
| b for b in candidates if tuple(b.metadata.get("fusion_sites", ())) == tuple(fusion_sites) | ||
| ] | ||
| if antibiotic is not None: | ||
| candidates = [b for b in candidates if b.metadata.get("antibiotic") == antibiotic] | ||
| if stage is not None: | ||
| candidates = [b for b in candidates if self._backbone_stage(b) == stage] | ||
| return candidates[0] if candidates else None | ||
|
|
||
| def find_restriction_enzyme(self, name: str) -> IndexedReagent | None: | ||
| reagent = self.reagents_by_name.get(name) | ||
| if reagent and reagent.reagent_type == "restriction_enzyme": | ||
| return reagent | ||
| return None | ||
|
|
||
| def find_ligase(self, preferred: str | None = None) -> IndexedReagent | None: | ||
| if preferred: | ||
| reagent = self.reagents_by_name.get(preferred) | ||
| if reagent and reagent.reagent_type == "ligase": | ||
| return reagent | ||
| ligases = sorted( | ||
| (r for r in self.reagents_by_identity.values() if r.reagent_type == "ligase"), | ||
| key=lambda r: r.identity, | ||
| ) | ||
| return ligases[0] if ligases else None | ||
|
|
||
| def add_generated_product(self, product: IndexedPlasmid) -> None: | ||
| self.generated_products_by_identity[product.identity] = product | ||
| self._add_plasmid(product) | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1 +1,5 @@ | ||
| """Package scaffolding for clean architecture.""" | ||
| """SBOL package exports for clean architecture contracts.""" | ||
|
|
||
| from .resolver import PullPolicy, SbolResolver | ||
|
|
||
| __all__ = ["PullPolicy", "SbolResolver"] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,74 @@ | ||
| """SBOL document resolver with deterministic pull policy.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| from enum import Enum | ||
| from typing import Any, Callable | ||
|
|
||
| import sbol2 | ||
|
|
||
|
|
||
| class PullPolicy(str, Enum): | ||
| """Resolver behavior for remote pull attempts.""" | ||
|
|
||
| NEVER = "never" | ||
| MISSING_ONLY = "missing_only" | ||
| ALWAYS_REFRESH = "always_refresh" | ||
|
|
||
|
|
||
| class SbolResolver: | ||
| """Resolve SBOL objects by identity from a local document with optional pull fallback.""" | ||
|
|
||
| def __init__( | ||
| self, | ||
| document: sbol2.Document, | ||
| *, | ||
| pull_policy: PullPolicy = PullPolicy.MISSING_ONLY, | ||
| pull_client: Callable[[str], Any] | None = None, | ||
| ) -> None: | ||
| self.document = document | ||
| self.pull_policy = pull_policy | ||
| self.pull_client = pull_client | ||
|
|
||
| def maybe_pull(self, identity: str) -> Any | None: | ||
| if self.pull_policy == PullPolicy.NEVER: | ||
| return None | ||
| if self.pull_client is None: | ||
| return None | ||
| return self.pull_client(identity) | ||
|
|
||
| def _get(self, identity: str, expected_type: type) -> Any: | ||
| if self.pull_policy == PullPolicy.ALWAYS_REFRESH: | ||
| pulled = self.maybe_pull(identity) | ||
| if isinstance(pulled, expected_type): | ||
| return pulled | ||
|
|
||
| obj = self.document.find(identity) | ||
| if isinstance(obj, expected_type): | ||
| return obj | ||
|
|
||
| if self.pull_policy == PullPolicy.MISSING_ONLY: | ||
| pulled = self.maybe_pull(identity) | ||
| if isinstance(pulled, expected_type): | ||
| return pulled | ||
| obj = self.document.find(identity) | ||
| if isinstance(obj, expected_type): | ||
| return obj | ||
|
|
||
| raise LookupError( | ||
| f"Could not resolve {expected_type.__name__} with identity '{identity}'" | ||
| ) | ||
|
|
||
| def get_component(self, identity: str) -> sbol2.ComponentDefinition: | ||
| return self._get(identity, sbol2.ComponentDefinition) | ||
|
|
||
| def get_module(self, identity: str) -> sbol2.ModuleDefinition: | ||
| return self._get(identity, sbol2.ModuleDefinition) | ||
|
|
||
| def get_combinatorial_derivation( | ||
| self, identity: str | ||
| ) -> sbol2.CombinatorialDerivation: | ||
| return self._get(identity, sbol2.CombinatorialDerivation) | ||
|
|
||
| def get_implementation(self, identity: str) -> sbol2.Implementation: | ||
| return self._get(identity, sbol2.Implementation) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,100 @@ | ||
| from buildcompiler.domain import BuildStage, IndexedBackbone, IndexedPlasmid, IndexedReagent, MaterialState | ||
| from buildcompiler.inventory import Inventory | ||
|
|
||
|
|
||
| def _plasmid(identity: str, inserts: list[str], fusion_sites=("A", "B"), antibiotic="Ampicillin", state=MaterialState.PLANNED): | ||
| return IndexedPlasmid( | ||
| identity=identity, | ||
| display_id=identity.rsplit("/", 1)[-1], | ||
| state=state, | ||
| metadata={ | ||
| "insert_identities": inserts, | ||
| "fusion_sites": fusion_sites, | ||
| "antibiotic": antibiotic, | ||
| }, | ||
| ) | ||
|
|
||
|
|
||
| def test_inventory_indexes_and_queries_are_deterministic(): | ||
| p2 = _plasmid("https://example.org/p2", ["https://example.org/partA"], state=MaterialState.GENERATED) | ||
| p1 = _plasmid("https://example.org/p1", ["https://example.org/partA", "https://example.org/region1"]) | ||
|
|
||
| b1 = IndexedBackbone( | ||
| identity="https://example.org/b1", | ||
| metadata={"fusion_sites": ("A", "B"), "antibiotic": "Ampicillin", "stage": BuildStage.ASSEMBLY_LVL1.value}, | ||
| ) | ||
| b2 = IndexedBackbone( | ||
| identity="https://example.org/b2", | ||
| metadata={"fusion_sites": ("A", "B"), "antibiotic": "Ampicillin", "stage": BuildStage.ASSEMBLY_LVL2.value}, | ||
| ) | ||
|
|
||
| e1 = IndexedReagent(identity="https://example.org/r1", name="BsaI", reagent_type="restriction_enzyme") | ||
| l1 = IndexedReagent(identity="https://example.org/r2", name="T4_DNA_ligase", reagent_type="ligase") | ||
|
|
||
| inv = Inventory(plasmids=[p2, p1], backbones=[b2, b1], reagents=[e1, l1]) | ||
|
|
||
| assert inv.plasmids_by_identity[p1.identity] == p1 | ||
| assert [p.identity for p in inv.plasmids_by_insert_identity["https://example.org/partA"]] == [p1.identity, p2.identity] | ||
| assert [p.identity for p in inv.plasmids_by_fusion_sites[("A", "B")]] == [p1.identity, p2.identity] | ||
| assert [p.identity for p in inv.plasmids_by_antibiotic["Ampicillin"]] == [p1.identity, p2.identity] | ||
|
|
||
| key = (("A", "B"), "Ampicillin") | ||
| assert [b.identity for b in inv.backbones_by_fusion_sites_and_antibiotic[key]] == [b1.identity, b2.identity] | ||
| assert inv.find_backbone(fusion_sites=("A", "B"), antibiotic="Ampicillin", stage=BuildStage.ASSEMBLY_LVL1) == b1 | ||
|
|
||
| assert inv.find_restriction_enzyme("BsaI") == e1 | ||
| assert inv.find_ligase("T4_DNA_ligase") == l1 | ||
| assert inv.find_ligase().identity == l1.identity | ||
|
|
||
| assert [p.identity for p in inv.find_single_part_plasmids("https://example.org/partA")] == [p1.identity, p2.identity] | ||
| assert [p.identity for p in inv.find_lvl1_region_plasmids("https://example.org/region1")] == [p1.identity] | ||
| assert inv.find_lvl1_region_plasmids("https://example.org/partA", min_material_state=MaterialState.GENERATED) == [p2] | ||
|
|
||
|
|
||
| def test_add_generated_product_updates_indexes_immediately(): | ||
| inv = Inventory() | ||
| product = _plasmid( | ||
| "https://example.org/generated1", | ||
| ["https://example.org/partG"], | ||
| fusion_sites=("C", "D"), | ||
| antibiotic="Kanamycin", | ||
| state=MaterialState.GENERATED, | ||
| ) | ||
|
|
||
| inv.add_generated_product(product) | ||
|
|
||
| assert inv.generated_products_by_identity[product.identity] == product | ||
| assert inv.plasmids_by_identity[product.identity] == product | ||
| assert inv.find_single_part_plasmids("https://example.org/partG") == [product] | ||
| assert inv.plasmids_by_fusion_sites[("C", "D")] == [product] | ||
| assert inv.plasmids_by_antibiotic["Kanamycin"] == [product] | ||
|
|
||
|
|
||
| def test_add_generated_product_replaces_existing_secondary_indexes(): | ||
| inv = Inventory() | ||
| original = _plasmid( | ||
| "https://example.org/generated2", | ||
| ["https://example.org/partOld"], | ||
| fusion_sites=("A", "B"), | ||
| antibiotic="Ampicillin", | ||
| state=MaterialState.GENERATED, | ||
| ) | ||
| updated = _plasmid( | ||
| "https://example.org/generated2", | ||
| ["https://example.org/partNew"], | ||
| fusion_sites=("C", "D"), | ||
| antibiotic="Kanamycin", | ||
| state=MaterialState.ASSEMBLED, | ||
| ) | ||
|
|
||
| inv.add_generated_product(original) | ||
| inv.add_generated_product(updated) | ||
|
|
||
| assert inv.plasmids_by_identity[updated.identity] == updated | ||
| assert inv.generated_products_by_identity[updated.identity] == updated | ||
| assert inv.find_single_part_plasmids("https://example.org/partOld") == [] | ||
| assert inv.find_single_part_plasmids("https://example.org/partNew") == [updated] | ||
| assert inv.plasmids_by_fusion_sites.get(("A", "B"), []) == [] | ||
| assert inv.plasmids_by_fusion_sites[("C", "D")] == [updated] | ||
| assert inv.plasmids_by_antibiotic.get("Ampicillin", []) == [] | ||
| assert inv.plasmids_by_antibiotic["Kanamycin"] == [updated] |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add_generated_productupdates identity maps but then blindly calls_add_plasmid, which appends to all secondary lists without clearing any existing entry for the sameproduct.identity. If the same product is indexed again (e.g., retries or state/metadata updates), lookups likefind_single_part_plasmidsandfind_lvl1_region_plasmidscan return duplicate or stale plasmid records, which can cascade into duplicate planning/execution actions.Useful? React with 👍 / 👎.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@codex good catch solve this issue
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Summary
_remove_plasmid_from_secondary_indexes, which removes old insert/fusion-site/antibiotic mappings for an existing plasmid identity before reindexing. This ensures deterministic replacement instead of append-only duplication when the same generated product is added again. src/buildcompiler/inventory/inventory.pyL69-L120_add_plasmidto detect existing identity entries and clear their old secondary-index references before inserting updated data, while keeping identity map behavior unchanged. src/buildcompiler/inventory/inventory.pyL96-L120Testing
pytest tests/unit/inventory/test_inventory_indexes.pyView task →