Skip to content

Commit c06ac6a

Browse files
authored
Lineage example (#82)
1 parent 3868733 commit c06ac6a

File tree

1 file changed

+284
-0
lines changed

1 file changed

+284
-0
lines changed

docs/lineage.md

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
# LabKey Experiment Lineage API
2+
3+
The LabKey Experiment Lineage API provides a powerful way to track and visualize relationships between
4+
different entities in your experimental data. This API allows you to:
5+
6+
1. **Query lineage relationships** between samples, materials, data, and other experimental entities
7+
1. **Traverse lineage graphs** in both upstream (parent) and downstream (child) directions
8+
9+
The lineage API represents relationships as a directed graph where:
10+
11+
- **Nodes** represent individual entities (samples, data objects, etc.)
12+
- **Edges** represent parent-child relationships between entities
13+
- Each node is uniquely identified by its **LSID** (Life Science Identifier)
14+
15+
### API Parameters
16+
17+
The Lineage API accepts the following parameters to control the scope and content of lineage queries:
18+
19+
#### Core Parameters
20+
21+
| Parameter | Type | Description |
22+
|------------|-------------|------------------------------------------------------------------------------------------------------------------------------------------|
23+
| `lsids` | `List[str]` | List of Life Science Identifiers (LSIDs) for which to retrieve lineage information. These are the "seed" entities for the lineage query. |
24+
| `depth` | `int` | Maximum number of generations to traverse in the lineage graph. Default maximum is 100. |
25+
| `parents` | `bool` | Whether to include parent (upstream) relationships in the lineage query. Default is `True`. |
26+
| `children` | `bool` | Whether to include child (downstream) relationships in the lineage query. Default is `True`. |
27+
28+
#### Filtering Parameters
29+
The following filter parameters filter nodes in graph to only match against the corresponding filter(s). NOTE: Using
30+
these filters can produce **disconnected graphs**.
31+
32+
| Parameter | Type | Description |
33+
|---------------------|-------|-----------------------------------------------------------------------------------------------------------|
34+
| `exp_type` | `str` | Filter lineage by experiment type. Possible values: `ALL`, `Data`, `Material`, `ExperimentRun`, `Object`. |
35+
| `cpas_type` | `str` | Filter lineage by CPAS type (optional). |
36+
| `run_protocol_lsid` | `str` | Filter lineage to only include entities associated with a specific protocol (optional). |
37+
38+
#### Data Inclusion Parameters
39+
40+
| Parameter | Type | Description |
41+
|------------------------------|--------|-----------------------------------------------------------------------------------------------|
42+
| `include_properties` | `bool` | Whether to include entity properties in the response. Default is `False`. |
43+
| `include_inputs_and_outputs` | `bool` | Whether to include detailed input and output information for each entity. Default is `False`. |
44+
| `include_run_steps` | `bool` | Whether to include experiment run step information. Default is `False`. |
45+
46+
## Response Structure
47+
The Lineage API response includes:
48+
- **seed**: The LSID(s) of the provided seed node(s)
49+
- **nodes**: A dictionary of all nodes in the lineage graph, keyed by LSID
50+
- Each node contains:
51+
- **name**: Display name of the entity
52+
- **parents**: Array of objects representing parent relationships
53+
- **children**: Array of objects representing child relationships
54+
- Additional properties when requested via inclusion parameters
55+
56+
### Examples
57+
58+
```python
59+
from collections import defaultdict
60+
61+
from labkey.api_wrapper import APIWrapper
62+
from labkey.query import QueryFilter
63+
64+
labkey_server = "localhost:8080"
65+
container_path = "Tutorials/HIV Study" # Full project/folder container path
66+
api = APIWrapper(labkey_server, container_path, use_ssl=False)
67+
68+
###################
69+
# Create a data class domain
70+
###################
71+
simple_molecules_domain = api.domain.create(
72+
{
73+
"kind": "DataClass",
74+
"domainDesign": {
75+
"name": "SimpleMolecules",
76+
"fields": [
77+
{"name": "formula", "label": "Chemical Formula", "rangeURI": "string"},
78+
{"name": "molarMass", "label": "Molar Mass (g/mol)", "rangeURI": "double"},
79+
],
80+
},
81+
}
82+
)
83+
84+
api.query.insert_rows(
85+
"exp.data",
86+
"SimpleMolecules",
87+
[
88+
{"name": "Water", "formula": "H20", "molarMass": 18.01528},
89+
{"name": "Salt", "formula": "NaCl", "molarMass": 58.443},
90+
],
91+
)
92+
93+
###################
94+
# Create a second data class domain
95+
###################
96+
substances_domain = api.domain.create(
97+
{
98+
"kind": "DataClass",
99+
"domainDesign": {
100+
"name": "Substances",
101+
"fields": [
102+
{"name": "type", "rangeURI": "string"},
103+
{"name": "fromNature", "rangeURI": "boolean"},
104+
],
105+
},
106+
}
107+
)
108+
109+
api.query.insert_rows(
110+
"exp.data",
111+
"Substances",
112+
[
113+
{
114+
"name": "Ocean Water",
115+
"type": "liquid",
116+
"fromNature": True,
117+
"DataInputs/SimpleMolecules": "Water, Salt",
118+
},
119+
{
120+
"name": "Bath Water",
121+
"type": "liquid",
122+
"fromNature": False,
123+
"DataInputs/SimpleMolecules": "Water",
124+
},
125+
],
126+
)
127+
128+
###################
129+
# Create a sample type domain
130+
###################
131+
field_samples_domain = api.domain.create(
132+
{
133+
"kind": "SampleSet",
134+
"domainDesign": {
135+
"name": "FieldSamples",
136+
"fields": [
137+
{"name": "name", "rangeURI": "string"},
138+
{"name": "receivedDate", "rangeURI": "dateTime"},
139+
{"name": "volume_mL", "rangeURI": "int"},
140+
],
141+
},
142+
}
143+
)
144+
145+
api.query.insert_rows(
146+
"samples",
147+
"FieldSamples",
148+
[
149+
{
150+
"name": "OC-1",
151+
"receivedDate": "05/12/2025",
152+
"volume_mL": 400,
153+
"DataInputs/Substances": "Ocean Water",
154+
},
155+
{
156+
"name": "OC-2",
157+
"receivedDate": "05/13/2025",
158+
"volume_mL": 600,
159+
"DataInputs/Substances": "Ocean Water",
160+
},
161+
{
162+
"name": "OC-3",
163+
"receivedDate": "05/14/2025",
164+
"volume_mL": 800,
165+
"DataInputs/Substances": "Ocean Water",
166+
},
167+
{
168+
"name": "BW-1",
169+
"receivedDate": "05/12/2025",
170+
"volume_mL": 400,
171+
"DataInputs/Substances": "Bath Water",
172+
},
173+
{
174+
"name": "BW-2",
175+
"receivedDate": "05/13/2025",
176+
"volume_mL": 600,
177+
"DataInputs/Substances": "Bath Water",
178+
},
179+
{
180+
"name": "BW-3",
181+
"receivedDate": "05/14/2025",
182+
"volume_mL": 800,
183+
"DataInputs/Substances": "Bath Water",
184+
},
185+
{
186+
"name": "Mixed-1",
187+
"receivedDate": "05/18/2025",
188+
"volume_mL": 50,
189+
"DataInputs/Substances": '"Bath Water", "Ocean Water"',
190+
},
191+
],
192+
)
193+
194+
###################
195+
# Query the lineage
196+
###################
197+
198+
# Specification for which entity to query
199+
schema_name = "exp.data"
200+
query_name = "Substances"
201+
entity_name = "Ocean Water"
202+
203+
# Fetch the LSID of the "seed" for the lineage request
204+
result = api.query.select_rows(
205+
schema_name, query_name, columns="Name, LSID", filter_array=[QueryFilter("name", entity_name)]
206+
)
207+
seed_lsid = result["rows"][0]["LSID"]
208+
209+
lineage_result = api.experiment.lineage([seed_lsid], depth=10)
210+
211+
###################
212+
# Traverse the lineage
213+
###################
214+
def traverse_lineage(node_lsid, lineage_result, depth=0, visited=None, nodes_by_depth=None):
215+
if visited is None:
216+
visited = set()
217+
if nodes_by_depth is None:
218+
nodes_by_depth = defaultdict(set)
219+
220+
if node_lsid in visited:
221+
return nodes_by_depth
222+
223+
visited.add(node_lsid)
224+
node = lineage_result["nodes"][node_lsid]
225+
226+
def process_edges(edges, offset):
227+
new_depth = depth + offset
228+
for edge in edges:
229+
related_lsid = edge["lsid"]
230+
related_node = lineage_result["nodes"][related_lsid]
231+
nodes_by_depth[new_depth].add(related_node["name"])
232+
233+
traverse_lineage(
234+
related_lsid, lineage_result, new_depth, visited.copy(), nodes_by_depth
235+
)
236+
237+
process_edges(node.get("parents", []), -1)
238+
process_edges(node.get("children", []), 1)
239+
240+
return nodes_by_depth
241+
242+
243+
nodes_by_depth = traverse_lineage(seed_lsid, lineage_result)
244+
245+
print("\n===== LINEAGE BY DEPTH =====\n")
246+
247+
# Print parents (negative depths) from furthest to closest
248+
for depth in range(min(nodes_by_depth.keys()), 0):
249+
if depth in nodes_by_depth:
250+
print(f"parent (depth = {depth}):")
251+
for node in sorted(nodes_by_depth[depth]):
252+
print(f"\t{node}")
253+
254+
seed_node = lineage_result["nodes"][seed_lsid]
255+
print(f"Seed: {seed_node["name"]}")
256+
257+
# Print children (positive depths) from closest to furthest
258+
for depth in range(1, max(nodes_by_depth.keys()) + 1):
259+
if depth in nodes_by_depth:
260+
print(f"children (depth = {depth}):")
261+
for node in sorted(nodes_by_depth[depth]):
262+
print(f"\t{node}")
263+
264+
###################
265+
# Output:
266+
#
267+
# ===== LINEAGE BY DEPTH =====
268+
#
269+
# parent (depth = -2):
270+
# Salt
271+
# Water
272+
# parent (depth = -1):
273+
# Derive data from Salt, Water
274+
# Seed: Ocean Water
275+
# children (depth = 1):
276+
# Derive 3 samples from Ocean Water
277+
# Derive sample from Ocean Water, Bath Water
278+
# children (depth = 2):
279+
# Mixed-1
280+
# OC-1
281+
# OC-2
282+
# OC-3
283+
###################
284+
```

0 commit comments

Comments
 (0)