-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsubtitle_parser.py
More file actions
58 lines (51 loc) · 1.87 KB
/
subtitle_parser.py
File metadata and controls
58 lines (51 loc) · 1.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import pysrt
import webvtt
import os
def parse_subtitle(filepath):
if not os.path.exists(filepath):
return []
ext = os.path.splitext(filepath)[1].lower()
subs = []
if ext == '.srt':
try:
srt_subs = pysrt.open(filepath)
for sub in srt_subs:
start_ms = sub.start.ordinal
end_ms = sub.end.ordinal
subs.append({
"start": start_ms,
"end": end_ms,
"text": sub.text
})
except Exception as e:
print(f"Error parsing SRT: {e}")
elif ext == '.vtt':
try:
for caption in webvtt.read(filepath):
# webvtt timestamps are like "00:00:20.000"
# Need to parse this to ms
def _parse_vtt_time(time_str):
parts = time_str.split(':')
if len(parts) == 3: # HH:MM:SS.mmm
h, m, s = parts
elif len(parts) == 2: # MM:SS.mmm
h = 0
m, s = parts
else:
return 0
s_parts = s.split('.')
if len(s_parts) == 2:
sec, ms = s_parts
else:
sec, ms = s_parts[0], 0
return int(h) * 3600000 + int(m) * 60000 + int(sec) * 1000 + int(ms)
start_ms = _parse_vtt_time(caption.start)
end_ms = _parse_vtt_time(caption.end)
subs.append({
"start": start_ms,
"end": end_ms,
"text": caption.text
})
except Exception as e:
print(f"Error parsing VTT: {e}")
return subs