Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 200 additions & 1 deletion src/buildcompiler/buildcompiler.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import sbol2
import random
import warnings
from typing import List, Dict
from typing import Any, Dict, List

from buildcompiler.plasmid import Plasmid
from buildcompiler.sbol2build import Assembly, dna_componentdefinition_with_sequence
Expand Down Expand Up @@ -363,6 +363,145 @@ def assembly_lvl2(

return protocol

def transformation(
self,
assembly_products: List[Any],
chassis_name: str = "E_coli_DH5alpha",
transformation_doc: sbol2.Document = None,
) -> Dict[str, Any]:
"""Generate deterministic transformation artifacts from assembly outputs.

The method accepts either:
- ``Plasmid`` objects,
- ``sbol2.ComponentDefinition`` plasmids, or
- dictionaries containing at least a ``plasmid`` key with one of the above.

:param assembly_products: Structured inputs produced by an assembly stage.
:type assembly_products: list
:param chassis_name: Display id used for the chassis module and implementation.
:type chassis_name: str
:param transformation_doc: Optional SBOL document to write outputs into.
:type transformation_doc: sbol2.Document | None
:returns: Structured transformation outputs including SBOL references,
robot JSON intermediate, protocol placeholders, and logs.
:rtype: dict
:raises ValueError: If no valid plasmid inputs can be extracted.
"""
if transformation_doc is None:
transformation_doc = self.sbol_doc

normalized_products = self._normalize_transformation_inputs(assembly_products)
if not normalized_products:
raise ValueError("transformation requires at least one plasmid input.")

chassis_module, chassis_impl = self._get_or_create_chassis(
transformation_doc, chassis_name
)

sbol_outputs = []
robot_steps = []
logs = []

for index, product in enumerate(normalized_products, start=1):
plasmid = product["plasmid"]
plasmid_impl = self._get_or_create_plasmid_implementation(
transformation_doc, plasmid
)
transform_id = f"transform_{plasmid.displayId}_{index}"

transformation_activity = sbol2.Activity(transform_id)
transformation_activity.name = f"Transform {chassis_name} with {plasmid.displayId}"
transformation_activity.types = "http://sbols.org/v2#build"

chassis_usage = sbol2.Usage(
uri=f"{transform_id}_chassis_usage",
entity=chassis_impl.identity,
role="http://sbols.org/v2#build",
)
plasmid_usage = sbol2.Usage(
uri=f"{transform_id}_plasmid_usage",
entity=plasmid_impl.identity,
role="http://sbols.org/v2#build",
)
transformation_activity.usages = [chassis_usage, plasmid_usage]

transformed_strain = sbol2.ModuleDefinition(
f"{chassis_name}_with_{plasmid.displayId}"
Comment on lines +428 to +429
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Make transformed strain IDs unique per transformation step

The transformed strain identity is based only on chassis_name and plasmid.displayId, so repeated inputs (or different plasmids with the same displayId) collide on the same ModuleDefinition/Implementation IDs. In those cases _add_if_absent suppresses later objects, and multiple transformation steps can map to the same SBOL artifact URI, losing a one-step-to-one-artifact mapping.

Useful? React with 👍 / 👎.

)
transformed_strain.roles = [ORGANISM_STRAIN]
transformed_strain.name = f"{chassis_name} transformed with {plasmid.displayId}"

chassis_module_ref = sbol2.Module(
uri=f"{transformed_strain.displayId}_chassis_module"
)
chassis_module_ref.definition = chassis_module.identity
plasmid_fc = sbol2.FunctionalComponent(
uri=f"{transformed_strain.displayId}_plasmid_fc"
)
plasmid_fc.definition = plasmid.identity

transformed_strain.modules = [chassis_module_ref]
transformed_strain.functionalComponents = [plasmid_fc]

transformed_impl = sbol2.Implementation(
f"{transformed_strain.displayId}_impl"
)
transformed_impl.built = transformed_strain.identity
transformed_impl.wasGeneratedBy = transformation_activity.identity

for obj in (
transformation_activity,
chassis_usage,
plasmid_usage,
transformed_strain,
chassis_module_ref,
plasmid_fc,
transformed_impl,
):
self._add_if_absent(transformation_doc, obj)

sbol_outputs.append(
{
"transformation_activity": transformation_activity.identity,
"transformed_strain_module": transformed_strain.identity,
"transformed_strain_implementation": transformed_impl.identity,
}
)
robot_steps.append(
{
"step": index,
"plasmid": plasmid.displayId,
"chassis": chassis_name,
"mix_ul": {"competent_cells": 50, "assembly_product": 5},
"heat_shock": {"temperature_c": 42, "duration_seconds": 45},
"recovery": {"medium": "SOC", "volume_ul": 950, "duration_min": 60},
}
)
logs.append(
f"Prepared transformation input for plasmid {plasmid.displayId} into chassis {chassis_name}."
)

return {
"stage": "transformation",
"inputs": [item["source"] for item in normalized_products],
"chassis": chassis_name,
"sbol_artifacts": sbol_outputs,
"json_intermediate": {
"protocol": "chemical_transformation",
"version": "0.1",
"steps": robot_steps,
},
"protocol_artifacts": {
"ot2_script": "TODO: adapter to protocol generator",
"human_instructions": [
"Thaw competent cells on ice.",
"Combine assembly product with competent cells as specified.",
"Run heat shock and recovery according to generated parameters.",
],
"logs": logs,
},
}

def _extract_plasmids_from_strain(
self,
strain: sbol2.ModuleDefinition,
Expand Down Expand Up @@ -605,3 +744,63 @@ def _create_RE_implementation(name: str):

def _create_ligase_implementation():
pass

def _normalize_transformation_inputs(
self, assembly_products: List[Any]
) -> List[Dict[str, Any]]:
normalized = []
for item in assembly_products or []:
if isinstance(item, Plasmid):
normalized.append(
{"plasmid": item.plasmid_definition, "source": item.name}
)
continue

if isinstance(item, sbol2.ComponentDefinition):
normalized.append({"plasmid": item, "source": item.displayId})
continue

if isinstance(item, dict) and "plasmid" in item:
plasmid_candidate = item["plasmid"]
if isinstance(plasmid_candidate, Plasmid):
normalized.append(
{
"plasmid": plasmid_candidate.plasmid_definition,
"source": item.get("name", plasmid_candidate.name),
}
)
elif isinstance(plasmid_candidate, sbol2.ComponentDefinition):
normalized.append(
{
"plasmid": plasmid_candidate,
"source": item.get("name", plasmid_candidate.displayId),
}
)
return normalized

def _get_or_create_chassis(
self, doc: sbol2.Document, chassis_name: str
) -> tuple[sbol2.ModuleDefinition, sbol2.Implementation]:
chassis_module = doc.find(chassis_name) or sbol2.ModuleDefinition(chassis_name)
chassis_module.roles = [ORGANISM_STRAIN]
chassis_module.name = chassis_name
self._add_if_absent(doc, chassis_module)

chassis_impl_id = f"{chassis_name}_impl"
chassis_impl = doc.find(chassis_impl_id) or sbol2.Implementation(chassis_impl_id)
chassis_impl.built = chassis_module.identity
self._add_if_absent(doc, chassis_impl)
return chassis_module, chassis_impl

def _get_or_create_plasmid_implementation(
self, doc: sbol2.Document, plasmid: sbol2.ComponentDefinition
) -> sbol2.Implementation:
plasmid_impl_id = f"{plasmid.displayId}_impl"
plasmid_impl = doc.find(plasmid_impl_id) or sbol2.Implementation(plasmid_impl_id)
plasmid_impl.built = plasmid.identity
Comment on lines +798 to +800
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Use plasmid identity (not displayId) for impl lookup

This implementation key is derived only from plasmid.displayId, so two distinct plasmids that share a displayId (for example different SBOL versions or namespace variants) are forced to reuse one Implementation. Because the code then unconditionally sets plasmid_impl.built = plasmid.identity, later transformations can silently retarget the shared implementation and invalidate provenance for earlier activities/usages that referenced it.

Useful? React with 👍 / 👎.

self._add_if_absent(doc, plasmid_impl)
return plasmid_impl

def _add_if_absent(self, doc: sbol2.Document, obj: Any):
if doc.find(obj.identity) is None:
doc.add(obj)
64 changes: 64 additions & 0 deletions tests/test_buildcompiler_transformation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import os
import sys
import unittest

import sbol2

sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../src")))

from buildcompiler.buildcompiler import BuildCompiler
from buildcompiler.constants import ENGINEERED_PLASMID


class TestBuildCompilerTransformation(unittest.TestCase):
def setUp(self):
self.doc = sbol2.Document()
self.compiler = BuildCompiler(
collections=[],
sbh_registry="https://synbiohub.org",
auth_token="",
sbol_doc=self.doc,
)

def _make_plasmid(self, display_id: str) -> sbol2.ComponentDefinition:
plasmid = sbol2.ComponentDefinition(display_id)
plasmid.roles = [ENGINEERED_PLASMID]
self.doc.add(plasmid)
return plasmid

def test_transformation_accepts_component_definitions(self):
p1 = self._make_plasmid("geneA_plasmid")
p2 = self._make_plasmid("geneB_plasmid")

result = self.compiler.transformation([p1, p2], chassis_name="DH5alpha")

self.assertEqual(result["stage"], "transformation")
self.assertEqual(result["chassis"], "DH5alpha")
self.assertEqual(len(result["sbol_artifacts"]), 2)
self.assertEqual(len(result["json_intermediate"]["steps"]), 2)
self.assertEqual(
result["json_intermediate"]["steps"][0]["plasmid"], "geneA_plasmid"
)
self.assertIn("logs", result["protocol_artifacts"])

def test_transformation_accepts_dict_payloads(self):
plasmid = self._make_plasmid("geneC_plasmid")
result = self.compiler.transformation(
[{"name": "lvl1_geneC_output", "plasmid": plasmid}]
)

self.assertEqual(result["inputs"], ["lvl1_geneC_output"])
self.assertEqual(
result["sbol_artifacts"][0]["transformed_strain_module"].endswith(
"E_coli_DH5alpha_with_geneC_plasmid/1"
),
True,
)

def test_transformation_requires_inputs(self):
with self.assertRaises(ValueError):
self.compiler.transformation([])


if __name__ == "__main__":
unittest.main()
Loading