-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
174 lines (143 loc) · 5.37 KB
/
main.py
File metadata and controls
174 lines (143 loc) · 5.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
"""Split Miner — command-line entry point.
Usage:
python main.py <log.xes|log.xes.gz> [options]
Options:
--eps FLOAT Concurrency threshold (default 0.1).
--eta FLOAT Filtering percentile (default 0.4).
--no-or-min Skip the OR-join minimisation phase.
--v2 Run Split Miner 2.0 (lifecycle-aware DFG, refined
concurrency, improper-completion fix, OR-split heuristic).
--out PATH Path for the exported BPMN XML (default model.bpmn).
--png PATH Optional path for a PNG rendering of the model.
"""
from __future__ import annotations
import argparse
import sys
from pathlib import Path
from bpmn_export import save_bpmn_png, save_bpmn_xml, to_bpmn
from bpmn_init import build_initial_bpmn
from concurrency import discover_concurrency, discover_concurrency_v2
from dfg_builder import (
build_dfg,
build_refined_dfg,
discover_loops,
discover_loops_refined,
preprocess_refined,
preprocess_traces,
strip_self_loops,
)
from filtering import filter_pdfg
from heuristics import identify_or_splits, remove_improper_completion
from joins import discover_joins
from log_loader import has_lifecycle_info, load_traces, load_traces_v2
from or_minimize import minimize_or_joins
from splits import discover_splits
def run(
traces: list[list[str]],
*,
eps: float = 0.1,
eta: float = 0.4,
or_minimise: bool = True,
):
"""Full Split Miner pipeline. Returns the populated WorkingGraph."""
if not traces:
raise ValueError("Input log is empty")
preprocessed = preprocess_traces(traces)
dfg, _ = build_dfg(preprocessed)
loops = discover_loops(dfg, preprocessed)
dfg_no_self = strip_self_loops(dfg)
conc = discover_concurrency(dfg_no_self, loops, eps=eps)
filt = filter_pdfg(conc.pdfg, eta=eta)
wg = build_initial_bpmn(filt, conc.concurrent_pairs, loops.self_loops)
discover_splits(wg)
discover_joins(wg)
if or_minimise:
minimize_or_joins(wg)
return wg
def run_v2(
refined_traces,
*,
eps: float = 0.1,
eta: float = 0.4,
or_minimise: bool = True,
):
"""Split Miner 2.0 pipeline.
Differences from :func:`run`:
* refined DFG — lifecycle-aware directly-follows
* refined concurrency — driven by overlapping lifecycles
* Heuristic 1 — strip improper completion caused by AND-split loops
* Heuristic 2 — relabel AND-splits as OR-splits when the data
supports inclusive-choice semantics.
Falls back gracefully on logs without lifecycle information: in such
cases the refined DFG/concurrency reduce to their SM 1.x equivalents.
"""
if not refined_traces:
raise ValueError("Input log is empty")
preprocessed = preprocess_refined(refined_traces)
dfg, _ = build_refined_dfg(preprocessed)
loops = discover_loops_refined(dfg, preprocessed)
dfg_no_self = strip_self_loops(dfg)
conc = discover_concurrency_v2(dfg_no_self, preprocessed, loops, eps=eps)
filt = filter_pdfg(conc.pdfg, eta=eta)
wg = build_initial_bpmn(filt, conc.concurrent_pairs, loops.self_loops)
discover_splits(wg)
# Heuristic 1 runs before join discovery so that improper-completion
# cycles are normalized first; the join phase below then sees the
# rewired AND-split. Heuristic 2 relabels AND-splits where the
# eligibility test in the refined log shows inclusive-choice behavior.
remove_improper_completion(wg)
identify_or_splits(wg, refined_traces)
discover_joins(wg)
if or_minimise:
minimize_or_joins(wg)
return wg
def _parse_args(argv: list[str]) -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Split Miner — Python port")
parser.add_argument("log", help="Path to an .xes or .xes.gz event log")
parser.add_argument("--eps", type=float, default=0.1)
parser.add_argument("--eta", type=float, default=0.4)
parser.add_argument("--no-or-min", action="store_true")
parser.add_argument(
"--v2",
action="store_true",
help="Use Split Miner 2.0 (lifecycle-aware DFG + OR-split heuristic)",
)
parser.add_argument("--out", default="model.bpmn")
parser.add_argument("--png", default=None)
return parser.parse_args(argv)
def main(argv: list[str] | None = None) -> int:
args = _parse_args(sys.argv[1:] if argv is None else argv)
if args.v2:
refined = load_traces_v2(args.log)
print(f"loaded {len(refined)} traces from {args.log}")
if not has_lifecycle_info(refined):
print(
"WARNING: log has no lifecycle:transition='start' events — "
"SM 2.0 reduces to SM 1.x (no overlaps to detect).",
file=sys.stderr,
)
wg = run_v2(
refined,
eps=args.eps,
eta=args.eta,
or_minimise=not args.no_or_min,
)
else:
traces = load_traces(args.log)
print(f"loaded {len(traces)} traces from {args.log}")
wg = run(
traces,
eps=args.eps,
eta=args.eta,
or_minimise=not args.no_or_min,
)
bpmn = to_bpmn(wg)
out = Path(args.out)
save_bpmn_xml(bpmn, out)
print(f"wrote BPMN XML to {out}")
if args.png:
save_bpmn_png(bpmn, args.png)
print(f"wrote PNG to {args.png}")
return 0
if __name__ == "__main__":
raise SystemExit(main())