-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpaper_example_log.py
More file actions
67 lines (55 loc) · 2.72 KB
/
paper_example_log.py
File metadata and controls
67 lines (55 loc) · 2.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
"""Materialize the running example of Augusto et al. (2019) as an XES log.
The paper enumerates ten distinct traces, each recorded ten times. We write
them to ``outputs/paper_example.xes`` so both the Java and Python miners can
consume the same file, and we can verify whether either reproduces Fig. 3c.
"""
from __future__ import annotations
from pathlib import Path
import pm4py
from log_loader import PAPER_EXAMPLE_LOG
def write_xes(out_path: Path) -> Path:
"""Hand-rolled XES that the OpenXES Java parser used by Split Miner accepts.
PM4Py's writer omits the ``<global>`` sections that the Java reference
implementation expects, which makes ``LogParser.getComplexLog`` NPE.
"""
out_path.parent.mkdir(parents=True, exist_ok=True)
parts: list[str] = [
'<?xml version="1.0" encoding="UTF-8" ?>',
'<log xes.version="1.0" xes.features="nested-attributes" '
'openxes.version="1.0RC7">',
'\t<extension name="Lifecycle" prefix="lifecycle" '
'uri="http://www.xes-standard.org/lifecycle.xesext"/>',
'\t<extension name="Concept" prefix="concept" '
'uri="http://www.xes-standard.org/concept.xesext"/>',
'\t<extension name="Time" prefix="time" '
'uri="http://www.xes-standard.org/time.xesext"/>',
'\t<global scope="trace">',
'\t\t<string key="concept:name" value="DEFAULT"/>',
'\t</global>',
'\t<global scope="event">',
'\t\t<string key="lifecycle:transition" value="complete"/>',
'\t\t<string key="concept:name" value="DEFAULT"/>',
'\t\t<date key="time:timestamp" value="1970-01-01T00:00:00.000+00:00"/>',
'\t</global>',
'\t<classifier name="concept:name" keys="concept:name"/>',
]
for i, trace in enumerate(PAPER_EXAMPLE_LOG):
parts.append('\t<trace>')
parts.append(f'\t\t<string key="concept:name" value="case_{i:03d}"/>')
for j, label in enumerate(trace):
parts.append('\t\t<event>')
parts.append(f'\t\t\t<string key="concept:name" value="{label}"/>')
parts.append('\t\t\t<string key="lifecycle:transition" value="complete"/>')
ts = f"2026-01-01T00:{j // 60:02d}:{j % 60:02d}.000+00:00"
parts.append(f'\t\t\t<date key="time:timestamp" value="{ts}"/>')
parts.append('\t\t</event>')
parts.append('\t</trace>')
parts.append('</log>')
with open(out_path, "w", encoding="utf-8") as f:
f.write("\n".join(parts))
return out_path
if __name__ == "__main__":
out = Path(__file__).parent / "outputs" / "paper_example.xes"
write_xes(out)
print(f"wrote {out} ({len(PAPER_EXAMPLE_LOG)} distinct traces x 10 = "
f"{sum(len(t) for t in PAPER_EXAMPLE_LOG)*10} events)")