Skip to content

Commit c29ef2d

Browse files
authored
Merge pull request #229 from Sahil-u07/feature/enhanced-workflow-validation
Feature/enhanced workflow validation
2 parents 0a1fb79 + 5ffd847 commit c29ef2d

3 files changed

Lines changed: 246 additions & 5 deletions

File tree

concore_cli/README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,19 +72,26 @@ concore run workflow.graphml --source ./src --output ./build --auto-build
7272

7373
Validates a GraphML workflow file before running.
7474

75+
**Options:**
76+
- `-s, --source <dir>` - Source directory to verify file references exist
77+
7578
Checks:
7679
- Valid XML structure
7780
- GraphML format compliance
7881
- Node and edge definitions
7982
- File references and naming conventions
80-
- ZMQ vs file-based communication
83+
- Source file existence (when --source provided)
84+
- ZMQ port conflicts and reserved ports
85+
- Circular dependencies (warns for control loops)
86+
- Edge connectivity
8187

8288
**Options:**
8389
- `-s, --source <dir>` - Source directory (default: src)
8490

8591
**Example:**
8692
```bash
8793
concore validate workflow.graphml
94+
concore validate workflow.graphml --source ./src
8895
```
8996

9097
### `concore status`

concore_cli/commands/validate.py

Lines changed: 79 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def validate_workflow(workflow_file, source_dir, console):
1919
def finalize():
2020
show_results(console, errors, warnings, info)
2121
return len(errors) == 0
22-
22+
2323
try:
2424
with open(workflow_path, 'r') as f:
2525
content = f.read()
@@ -69,7 +69,7 @@ def finalize():
6969
warnings.append("No edges found in workflow")
7070
else:
7171
info.append(f"Found {len(edges)} edge(s)")
72-
72+
7373
if not source_root.exists():
7474
warnings.append(f"Source directory not found: {source_root}")
7575

@@ -96,7 +96,7 @@ def finalize():
9696
if re.search(r'[;&|`$\'"()\\]', label):
9797
errors.append(f"Node '{label}' contains unsafe shell characters")
9898
continue
99-
99+
100100
if ':' not in label:
101101
warnings.append(f"Node '{label}' missing format 'ID:filename'")
102102
else:
@@ -155,15 +155,90 @@ def finalize():
155155
if file_edges > 0:
156156
info.append(f"File-based edges: {file_edges}")
157157

158+
_check_cycles(soup, errors, warnings)
159+
_check_zmq_ports(soup, errors, warnings)
160+
158161
return finalize()
159-
162+
160163
except FileNotFoundError:
161164
console.print(f"[red]Error:[/red] File not found: {workflow_path}")
162165
return False
163166
except Exception as e:
164167
console.print(f"[red]Validation failed:[/red] {str(e)}")
165168
return False
166169

170+
def _check_cycles(soup, errors, warnings):
171+
nodes = soup.find_all('node')
172+
edges = soup.find_all('edge')
173+
174+
node_ids = [node.get('id') for node in nodes if node.get('id')]
175+
if not node_ids:
176+
return
177+
178+
graph = {nid: [] for nid in node_ids}
179+
for edge in edges:
180+
source = edge.get('source')
181+
target = edge.get('target')
182+
if source and target and source in graph:
183+
graph[source].append(target)
184+
185+
def has_cycle_from(start, visited, rec_stack):
186+
visited.add(start)
187+
rec_stack.add(start)
188+
189+
for neighbor in graph.get(start, []):
190+
if neighbor not in visited:
191+
if has_cycle_from(neighbor, visited, rec_stack):
192+
return True
193+
elif neighbor in rec_stack:
194+
return True
195+
196+
rec_stack.remove(start)
197+
return False
198+
199+
visited = set()
200+
for node_id in node_ids:
201+
if node_id not in visited:
202+
if has_cycle_from(node_id, visited, set()):
203+
warnings.append("Workflow contains cycles (expected for control loops)")
204+
return
205+
206+
def _check_zmq_ports(soup, errors, warnings):
207+
edges = soup.find_all('edge')
208+
port_pattern = re.compile(r"0x([a-fA-F0-9]+)_(\S+)")
209+
210+
ports_used = {}
211+
212+
for edge in edges:
213+
label_tag = edge.find('y:EdgeLabel') or edge.find('EdgeLabel')
214+
if not label_tag or not label_tag.text:
215+
continue
216+
217+
match = port_pattern.match(label_tag.text.strip())
218+
if not match:
219+
continue
220+
221+
port_hex = match.group(1)
222+
port_name = match.group(2)
223+
port_num = int(port_hex, 16)
224+
225+
if port_num < 1:
226+
errors.append(f"Invalid port number: {port_num} (0x{port_hex}) must be at least 1")
227+
continue
228+
elif port_num > 65535:
229+
errors.append(f"Invalid port number: {port_num} (0x{port_hex}) exceeds maximum (65535)")
230+
continue
231+
232+
if port_num in ports_used:
233+
existing_name = ports_used[port_num]
234+
if existing_name != port_name:
235+
errors.append(f"Port conflict: 0x{port_hex} used for both '{existing_name}' and '{port_name}'")
236+
else:
237+
ports_used[port_num] = port_name
238+
239+
if port_num < 1024:
240+
warnings.append(f"Port {port_num} (0x{port_hex}) is in reserved range (< 1024)")
241+
167242
def show_results(console, errors, warnings, info):
168243
if errors:
169244
console.print("[red]✗ Validation failed[/red]\n")

tests/test_graph.py

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,165 @@ def test_validate_valid_graph(self):
145145

146146
self.assertIn('Validation passed', result.output)
147147
self.assertIn('Workflow is valid', result.output)
148+
149+
def test_validate_missing_source_file(self):
150+
content = '''
151+
<graphml xmlns:y="http://www.yworks.com/xml/graphml">
152+
<graph id="G" edgedefault="directed">
153+
<node id="n0">
154+
<data key="d0"><y:NodeLabel>n0:missing.py</y:NodeLabel></data>
155+
</node>
156+
</graph>
157+
</graphml>
158+
'''
159+
filepath = self.create_graph_file('workflow.graphml', content)
160+
source_dir = Path(self.temp_dir) / 'src'
161+
source_dir.mkdir()
162+
163+
result = self.runner.invoke(cli, ['validate', filepath, '--source', str(source_dir)])
164+
165+
self.assertIn('Validation failed', result.output)
166+
self.assertIn('Missing source file', result.output)
167+
168+
def test_validate_with_existing_source_file(self):
169+
content = '''
170+
<graphml xmlns:y="http://www.yworks.com/xml/graphml">
171+
<graph id="G" edgedefault="directed">
172+
<node id="n0">
173+
<data key="d0"><y:NodeLabel>n0:exists.py</y:NodeLabel></data>
174+
</node>
175+
</graph>
176+
</graphml>
177+
'''
178+
filepath = self.create_graph_file('workflow.graphml', content)
179+
source_dir = Path(self.temp_dir) / 'src'
180+
source_dir.mkdir()
181+
(source_dir / 'exists.py').write_text('print("hello")')
182+
183+
result = self.runner.invoke(cli, ['validate', filepath, '--source', str(source_dir)])
184+
185+
self.assertIn('Validation passed', result.output)
186+
187+
def test_validate_zmq_port_conflict(self):
188+
content = '''
189+
<graphml xmlns:y="http://www.yworks.com/xml/graphml">
190+
<graph id="G" edgedefault="directed">
191+
<node id="n0">
192+
<data key="d0"><y:NodeLabel>n0:script1.py</y:NodeLabel></data>
193+
</node>
194+
<node id="n1">
195+
<data key="d0"><y:NodeLabel>n1:script2.py</y:NodeLabel></data>
196+
</node>
197+
<edge source="n0" target="n1">
198+
<data key="d1"><y:EdgeLabel>0x1234_portA</y:EdgeLabel></data>
199+
</edge>
200+
<edge source="n1" target="n0">
201+
<data key="d1"><y:EdgeLabel>0x1234_portB</y:EdgeLabel></data>
202+
</edge>
203+
</graph>
204+
</graphml>
205+
'''
206+
filepath = self.create_graph_file('conflict.graphml', content)
207+
208+
result = self.runner.invoke(cli, ['validate', filepath])
209+
210+
self.assertIn('Validation failed', result.output)
211+
self.assertIn('Port conflict', result.output)
212+
213+
def test_validate_reserved_port(self):
214+
content = '''
215+
<graphml xmlns:y="http://www.yworks.com/xml/graphml">
216+
<graph id="G" edgedefault="directed">
217+
<node id="n0">
218+
<data key="d0"><y:NodeLabel>n0:script1.py</y:NodeLabel></data>
219+
</node>
220+
<node id="n1">
221+
<data key="d0"><y:NodeLabel>n1:script2.py</y:NodeLabel></data>
222+
</node>
223+
<edge source="n0" target="n1">
224+
<data key="d1"><y:EdgeLabel>0x50_data</y:EdgeLabel></data>
225+
</edge>
226+
</graph>
227+
</graphml>
228+
'''
229+
filepath = self.create_graph_file('reserved.graphml', content)
230+
231+
result = self.runner.invoke(cli, ['validate', filepath])
232+
233+
self.assertIn('Port 80', result.output)
234+
self.assertIn('reserved range', result.output)
235+
236+
def test_validate_cycle_detection(self):
237+
content = '''
238+
<graphml xmlns:y="http://www.yworks.com/xml/graphml">
239+
<graph id="G" edgedefault="directed">
240+
<node id="n0">
241+
<data key="d0"><y:NodeLabel>n0:controller.py</y:NodeLabel></data>
242+
</node>
243+
<node id="n1">
244+
<data key="d0"><y:NodeLabel>n1:plant.py</y:NodeLabel></data>
245+
</node>
246+
<edge source="n0" target="n1">
247+
<data key="d1"><y:EdgeLabel>control_signal</y:EdgeLabel></data>
248+
</edge>
249+
<edge source="n1" target="n0">
250+
<data key="d1"><y:EdgeLabel>sensor_data</y:EdgeLabel></data>
251+
</edge>
252+
</graph>
253+
</graphml>
254+
'''
255+
filepath = self.create_graph_file('cycle.graphml', content)
256+
257+
result = self.runner.invoke(cli, ['validate', filepath])
258+
259+
self.assertIn('cycles', result.output)
260+
self.assertIn('control loops', result.output)
261+
262+
def test_validate_port_zero(self):
263+
content = '''
264+
<graphml xmlns:y="http://www.yworks.com/xml/graphml">
265+
<graph id="G" edgedefault="directed">
266+
<node id="n0">
267+
<data key="d0"><y:NodeLabel>n0:script1.py</y:NodeLabel></data>
268+
</node>
269+
<node id="n1">
270+
<data key="d0"><y:NodeLabel>n1:script2.py</y:NodeLabel></data>
271+
</node>
272+
<edge source="n0" target="n1">
273+
<data key="d1"><y:EdgeLabel>0x0_invalid</y:EdgeLabel></data>
274+
</edge>
275+
</graph>
276+
</graphml>
277+
'''
278+
filepath = self.create_graph_file('port_zero.graphml', content)
279+
280+
result = self.runner.invoke(cli, ['validate', filepath])
281+
282+
self.assertIn('Validation failed', result.output)
283+
self.assertIn('must be at least 1', result.output)
284+
285+
def test_validate_port_exceeds_maximum(self):
286+
content = '''
287+
<graphml xmlns:y="http://www.yworks.com/xml/graphml">
288+
<graph id="G" edgedefault="directed">
289+
<node id="n0">
290+
<data key="d0"><y:NodeLabel>n0:script1.py</y:NodeLabel></data>
291+
</node>
292+
<node id="n1">
293+
<data key="d0"><y:NodeLabel>n1:script2.py</y:NodeLabel></data>
294+
</node>
295+
<edge source="n0" target="n1">
296+
<data key="d1"><y:EdgeLabel>0x10000_toobig</y:EdgeLabel></data>
297+
</edge>
298+
</graph>
299+
</graphml>
300+
'''
301+
filepath = self.create_graph_file('port_max.graphml', content)
302+
303+
result = self.runner.invoke(cli, ['validate', filepath])
304+
305+
self.assertIn('Validation failed', result.output)
306+
self.assertIn('exceeds maximum (65535)', result.output)
148307

149308
if __name__ == '__main__':
150309
unittest.main()

0 commit comments

Comments
 (0)