-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathansi2gfm.py
More file actions
executable file
·301 lines (258 loc) · 10.4 KB
/
ansi2gfm.py
File metadata and controls
executable file
·301 lines (258 loc) · 10.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
#!/usr/bin/env python3
"""
Convert ANSI escape sequences to GitHub Flavored Markdown compatible markup.
GFM supports limited HTML tags within <pre> blocks:
- <b>, <strong> for bold
- <i>, <em> for italics
- <mark> for highlighting
- <del> for strikethrough
- <ins> for underline
Since GFM doesn't support color styling, we use semantic markup instead.
"""
import re
import sys
# ANSI color/style codes we care about
ANSI_PATTERNS = {
# Reset
'[0m': '',
'[00m': '',
# Text styles (preservable in GFM)
'[1m': '<b>', # Bold
'[01m': '<b>',
'[21m': '</b>', # Bold off
'[22m': '</b>', # Normal intensity
'[2m': '<i>', # Dim (use italic as approximation)
'[02m': '<i>',
'[3m': '<i>', # Italic
'[03m': '<i>',
'[23m': '</i>', # Italic off
'[4m': '<ins>', # Underline
'[04m': '<ins>',
'[24m': '</ins>', # Underline off
'[9m': '<del>', # Strikethrough
'[09m': '<del>',
'[29m': '</del>', # Strikethrough off
# Foreground colors (strip, but could add emojis for semantic meaning)
'[30m': '', # Black
'[31m': '', # Red
'[32m': '', # Green
'[33m': '', # Yellow
'[34m': '', # Blue
'[35m': '', # Magenta
'[36m': '', # Cyan
'[37m': '', # White
'[90m': '', # Bright Black (Gray)
'[91m': '', # Bright Red
'[92m': '', # Bright Green
'[93m': '', # Bright Yellow
'[94m': '', # Bright Blue
'[95m': '', # Bright Magenta
'[96m': '', # Bright Cyan
'[97m': '', # Bright White
# Background colors (use mark for any background)
'[40m': '<mark>', # Black bg
'[41m': '<mark>', # Red bg
'[42m': '<mark>', # Green bg
'[43m': '<mark>', # Yellow bg
'[44m': '<mark>', # Blue bg
'[45m': '<mark>', # Magenta bg
'[46m': '<mark>', # Cyan bg
'[47m': '<mark>', # White bg
'[100m': '<mark>', # Bright Black bg
'[101m': '<mark>', # Bright Red bg
'[102m': '<mark>', # Bright Green bg
'[103m': '<mark>', # Bright Yellow bg
'[104m': '<mark>', # Bright Blue bg
'[105m': '<mark>', # Bright Magenta bg
'[106m': '<mark>', # Bright Cyan bg
'[107m': '<mark>', # Bright White bg
# Combined codes (common patterns)
'[37;44m': '<mark><b>', # White on blue (highlight)
'[1;31m': '<b>', # Bold red -> just bold
'[1;32m': '<b>', # Bold green -> just bold
'[1;33m': '<b>', # Bold yellow -> just bold
}
def convert_ansi_to_gfm(text):
"""Convert ANSI escape sequences to GFM-compatible HTML tags with semantic meaning."""
lines = text.split('\n')
clean_lines = []
i = 0
while i < len(lines):
line = lines[i]
original_line = line
# Check for semantic patterns at start of line
# [93m at line start = <h1> (9PM main heading)
if line.startswith('\x1b[93m'):
# Remove the code
line = line[5:]
# Check if it ends with reset
if '\x1b[0m' in line:
line = line.replace('\x1b[0m', '')
line = '<h1>' + line + '</h1>'
clean_lines.append(line)
i += 1
continue
# [0m[93m at line start - could be heading or skip
if line.startswith('\x1b[0m\x1b[93m'):
# Remove the leading codes (\x1b[0m = 4 bytes, \x1b[93m = 5 bytes)
content = line[9:]
# Check if there's actual content on this line (not just whitespace/newline)
# Remove any trailing reset codes for this check
test_content = content.replace('\x1b[0m', '').strip()
if test_content:
# Has content - this is a skip line, use <del>
# Clean up any reset codes
line = content.replace('\x1b[0m', '')
line = '<del>' + line + '</del>'
clean_lines.append(line)
i += 1
continue
else:
# Empty or just whitespace - could be heading on next line
# Look ahead to see if next line has content
if i + 1 < len(lines):
next_line = lines[i + 1].strip()
# Check if next line has content and no ANSI codes at start
if next_line and not next_line.startswith('\x1b['):
# Check if it starts with a letter (could be skip/fail marker)
# Skip markers in summary: "s 0076 Container..."
if next_line and next_line[0].isalpha() and len(next_line) > 2 and next_line[1] == ' ':
# This is likely a skip/fail marker in summary, treat as strikethrough
line = '<del>' + next_line + '</del>'
clean_lines.append(line)
i += 2 # Skip both this line and next line
continue
else:
# Next line is a heading
line = '<h2>' + next_line + '</h2>'
clean_lines.append(line)
i += 2 # Skip both this line and next line
continue
# No content on next line, skip this empty line
line = ''
clean_lines.append(line)
i += 1
continue
# Process other ANSI codes
# [92m = pass (green - highlighted)
line = re.sub(r'\x1b\[0m\x1b\[92m', '<mark><b>', line)
line = re.sub(r'\x1b\[92m', '<mark><b>', line)
# [91m = fail (red - bold)
line = re.sub(r'\x1b\[0m\x1b\[91m', '<b>', line)
line = re.sub(r'\x1b\[91m', '<b>', line)
# [93m = skip/warning (yellow)
# If followed immediately by a character (like 's'), it's a skip marker in summary
# Use negative lookahead to check if NOT followed by [0m (which would be empty)
line = re.sub(r'\x1b\[93m(?=[a-zA-Z])', '<del>', line)
# Plain [93m (not followed by letter) = just bold
line = re.sub(r'\x1b\[93m', '<b>', line)
# [94m = new test/heading (blue - h3)
line = re.sub(r'\x1b\[94m', '<h3>', line)
# [2m = dim, just strip it (regular text)
line = re.sub(r'\x1b\[0?2m', '', line)
# [37;44m = white on blue (mark + bold)
line = re.sub(r'\x1b\[37;44m', '<mark><b>', line)
# Replace each [0m with appropriate closing tags
# First, detect what tags are open
def get_open_tags(text):
"""Scan text to find unclosed HTML tags"""
tags = []
# Track opens and closes
if '<mark><b>' in text:
tags.extend(['mark', 'b'])
elif '<b>' in text:
tags.append('b')
if '<del>' in text and '</del>' not in text:
tags.append('del')
if '<h3>' in text and '</h3>' not in text:
tags.append('h3')
# Remove tags that are already closed
if '</b></mark>' in text or ('</b>' in text and '</mark>' in text):
if 'b' in tags:
tags.remove('b')
if 'mark' in tags:
tags.remove('mark')
elif '</b>' in text and 'b' in tags:
tags.remove('b')
elif '</mark>' in text and 'mark' in tags:
tags.remove('mark')
elif '</del>' in text and 'del' in tags:
tags.remove('del')
return tags
# Replace [0m codes with closing tags
while '\x1b[0m' in line:
open_tags = get_open_tags(line)
if not open_tags:
# No tags to close, just remove the code
line = line.replace('\x1b[0m', '', 1)
continue
# Close tags in proper order
closing = ''
if 'b' in open_tags and 'mark' in open_tags:
closing = '</b></mark>'
elif 'del' in open_tags:
closing = '</del>'
elif 'b' in open_tags:
closing = '</b>'
elif 'h3' in open_tags:
closing = '</h3>'
elif 'mark' in open_tags:
closing = '</mark>'
else:
closing = '' # Nothing to close
# Replace first occurrence of [0m
line = line.replace('\x1b[0m', closing, 1)
# Clean up any remaining reset codes
line = line.replace('\x1b[00m', '')
# Clean up any remaining ANSI codes we didn't handle
line = re.sub(r'\x1b\[[0-9;]*m', '', line)
# Auto-close any tags still open at end of line
# But ONLY if next line doesn't start with a reset code
final_open_tags = get_open_tags(line)
if final_open_tags:
# Check if next line starts with reset code
next_line_has_reset = False
if i + 1 < len(lines):
next_line_has_reset = lines[i + 1].lstrip().startswith('\x1b[0m')
# Only auto-close if next line won't close them
if not next_line_has_reset:
closing = ''
if 'b' in final_open_tags and 'mark' in final_open_tags:
closing = '</b></mark>'
elif 'del' in final_open_tags:
closing = '</del>'
elif 'b' in final_open_tags:
closing = '</b>'
elif 'h3' in final_open_tags:
closing = '</h3>'
elif 'mark' in final_open_tags:
closing = '</mark>'
line += closing
clean_lines.append(line)
i += 1
return '\n'.join(clean_lines)
def main():
"""Read from stdin or file, convert ANSI to GFM, write to stdout."""
import argparse
parser = argparse.ArgumentParser(
description='Convert ANSI escape sequences to GFM-compatible markup'
)
parser.add_argument(
'input',
nargs='?',
type=argparse.FileType('r'),
default=sys.stdin,
help='Input file (default: stdin)'
)
parser.add_argument(
'-o', '--output',
type=argparse.FileType('w'),
default=sys.stdout,
help='Output file (default: stdout)'
)
args = parser.parse_args()
content = args.input.read()
converted = convert_ansi_to_gfm(content)
args.output.write(converted)
if __name__ == '__main__':
main()