Skip to content

Commit d91b2cf

Browse files
committed
lineage_example.py
1 parent 3868733 commit d91b2cf

File tree

1 file changed

+167
-0
lines changed

1 file changed

+167
-0
lines changed

samples/lineage_example.py

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
from collections import defaultdict
2+
3+
from labkey.api_wrapper import APIWrapper
4+
from labkey.query import QueryFilter
5+
6+
labkey_server = "localhost:8080"
7+
container_path = "Tutorials/HIV Study" # Full project/folder container path
8+
api = APIWrapper(labkey_server, container_path, use_ssl=False)
9+
10+
###################
11+
# Create a data class domain
12+
###################
13+
simple_molecules_domain = api.domain.create({
14+
"kind": "DataClass",
15+
"domainDesign": {
16+
"name": "SimpleMolecules",
17+
"fields": [
18+
{"name": "formula", "label": "Chemical Formula", "rangeURI": "string"},
19+
{"name": "molarMass", "label": "Molar Mass (g/mol)", "rangeURI": "double"},
20+
]
21+
}
22+
})
23+
24+
api.query.insert_rows("exp.data", "SimpleMolecules", [
25+
{"name": "Water", "formula": "H20", "molarMass": 18.01528},
26+
{"name": "Salt", "formula": "NaCl", "molarMass": 58.443}
27+
])
28+
29+
###################
30+
# Create a second data class domain
31+
###################
32+
substances_domain = api.domain.create({
33+
"kind": "DataClass",
34+
"domainDesign": {
35+
"name": "Substances",
36+
"fields": [
37+
{"name": "type", "rangeURI": "string"},
38+
{"name": "fromNature", "rangeURI": "boolean"},
39+
]
40+
}
41+
})
42+
43+
api.query.insert_rows("exp.data", "Substances", [
44+
{"name": "Ocean Water", "type": "liquid", "fromNature": True, "DataInputs/SimpleMolecules": "Water, Salt"},
45+
{"name": "Bath Water", "type": "liquid", "fromNature": False, "DataInputs/SimpleMolecules": "Water"}
46+
])
47+
48+
###################
49+
# Create a sample type domain
50+
###################
51+
field_samples_domain = api.domain.create({
52+
"kind": "SampleSet",
53+
"domainDesign": {
54+
"name": "FieldSamples",
55+
"fields": [
56+
{"name": "name", "rangeURI": "string"},
57+
{"name": "receivedDate", "rangeURI": "dateTime"},
58+
{"name": "volume_mL", "rangeURI": "int"},
59+
]
60+
}
61+
})
62+
63+
api.query.insert_rows("samples", "FieldSamples", [
64+
{"name": "OC-1", "receivedDate": "05/12/2025", "volume_mL": 400, "DataInputs/Substances": "Ocean Water"},
65+
{"name": "OC-2", "receivedDate": "05/13/2025", "volume_mL": 600, "DataInputs/Substances": "Ocean Water"},
66+
{"name": "OC-3", "receivedDate": "05/14/2025", "volume_mL": 800, "DataInputs/Substances": "Ocean Water"},
67+
68+
{"name": "BW-1", "receivedDate": "05/12/2025", "volume_mL": 400, "DataInputs/Substances": "Bath Water"},
69+
{"name": "BW-2", "receivedDate": "05/13/2025", "volume_mL": 600, "DataInputs/Substances": "Bath Water"},
70+
{"name": "BW-3", "receivedDate": "05/14/2025", "volume_mL": 800, "DataInputs/Substances": "Bath Water"},
71+
72+
{"name": "Mixed-1", "receivedDate": "05/18/2025", "volume_mL": 50, "DataInputs/Substances": "\"Bath Water\", \"Ocean Water\""},
73+
])
74+
75+
###################
76+
# Query the lineage
77+
###################
78+
79+
# Specification for which entity to query
80+
schema_name = "exp.data"
81+
query_name = "Substances"
82+
entity_name = "Ocean Water"
83+
84+
# Fetch the LSID of the "seed" for the lineage request. In this case, we'll query for the "Ocean Water" entity in Substances.
85+
result = api.query.select_rows(schema_name, query_name, columns="Name, LSID", filter_array=[QueryFilter("name", entity_name)])
86+
seed_lsid = result["rows"][0]["LSID"]
87+
88+
# Lineage results includes the following:
89+
# "seed": The LSID of all furnished seed nodes. A string if only a single seed, otherwise, an array of strings.
90+
# "nodes": A dictionary of lineage node objects keyed by each node's LSID. Nodes are linked together by their "parents" and "children" edges.
91+
#
92+
# On each node the following properties allow for traversal of the flattened graph structure.
93+
# "parents": An array of objects representing edges in the graph from nodes that refer to this node.
94+
# "children": An aray of objects representing edges in the graph to nodes to which this node refers.
95+
lineage_result = api.experiment.lineage([seed_lsid], depth=10)
96+
97+
###################
98+
# Traverse the lineage
99+
###################
100+
def traverse_lineage(node_lsid, lineage_result, depth=0, visited=None, nodes_by_depth=None):
101+
if visited is None:
102+
visited = set()
103+
if nodes_by_depth is None:
104+
nodes_by_depth = defaultdict(set)
105+
106+
if node_lsid in visited:
107+
return nodes_by_depth
108+
109+
visited.add(node_lsid)
110+
node = lineage_result["nodes"][node_lsid]
111+
112+
def process_edges(edges, offset):
113+
new_depth = depth + offset
114+
for edge in edges:
115+
related_lsid = edge["lsid"]
116+
related_node = lineage_result["nodes"][related_lsid]
117+
nodes_by_depth[new_depth].add(related_node['name'])
118+
119+
traverse_lineage(related_lsid, lineage_result, new_depth, visited.copy(), nodes_by_depth)
120+
121+
process_edges(node.get("parents", []), -1)
122+
process_edges(node.get("children", []), 1)
123+
124+
return nodes_by_depth
125+
126+
127+
nodes_by_depth = traverse_lineage(seed_lsid, lineage_result)
128+
129+
print("\n===== LINEAGE BY DEPTH =====\n")
130+
131+
# Print parents (negative depths) from furthest to closest
132+
for depth in range(min(nodes_by_depth.keys()), 0):
133+
if depth in nodes_by_depth:
134+
print(f"parent (depth = {depth}):")
135+
for node in sorted(nodes_by_depth[depth]):
136+
print(f"\t{node}")
137+
138+
seed_node = lineage_result["nodes"][seed_lsid]
139+
print(f"Seed: {seed_node["name"]}")
140+
141+
# Print children (positive depths) from closest to furthest
142+
for depth in range(1, max(nodes_by_depth.keys()) + 1):
143+
if depth in nodes_by_depth:
144+
print(f"children (depth = {depth}):")
145+
for node in sorted(nodes_by_depth[depth]):
146+
print(f"\t{node}")
147+
148+
###################
149+
# Output:
150+
#
151+
# ===== LINEAGE BY DEPTH =====
152+
#
153+
# parent (depth = -2):
154+
# Salt
155+
# Water
156+
# parent (depth = -1):
157+
# Derive data from Salt, Water
158+
# Seed: Ocean Water
159+
# children (depth = 1):
160+
# Derive 3 samples from Ocean Water
161+
# Derive sample from Ocean Water, Bath Water
162+
# children (depth = 2):
163+
# Mixed-1
164+
# OC-1
165+
# OC-2
166+
# OC-3
167+
###################

0 commit comments

Comments
 (0)