-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdomain_generator.py
More file actions
128 lines (102 loc) · 3.6 KB
/
domain_generator.py
File metadata and controls
128 lines (102 loc) · 3.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
"""
Domain generator from simplified regex patterns.
Supports: [a-z], [0-9], {n}, (a|b|c), literal characters
"""
import re
import itertools
from typing import Generator, List, Tuple
def parse_char_class(pattern: str) -> List[str]:
"""Parse character class like [a-z] or [0-9] into list of chars."""
chars = []
i = 0
while i < len(pattern):
# Handle range like a-z
if i + 2 < len(pattern) and pattern[i + 1] == '-':
start, end = pattern[i], pattern[i + 2]
chars.extend(chr(c) for c in range(ord(start), ord(end) + 1))
i += 3
else:
chars.append(pattern[i])
i += 1
return chars
def parse_alternation(pattern: str) -> List[str]:
"""Parse alternation like (com|net) into list of options."""
return pattern.split('|')
def tokenize_regex(pattern: str) -> List[Tuple[str, int]]:
"""
Tokenize regex into list of (chars, repeat_count) tuples.
Returns list like: [(['a'-'z'], 2), (['0'-'9'], 3), (['com', 'net'], 1)]
"""
# Remove anchors
pattern = pattern.lstrip('^').rstrip('$')
tokens = []
i = 0
while i < len(pattern):
char_set = None
repeat = 1
# Character class [...]
if pattern[i] == '[':
end = pattern.index(']', i)
char_set = parse_char_class(pattern[i+1:end])
i = end + 1
# Alternation (...)
elif pattern[i] == '(':
end = pattern.index(')', i)
char_set = parse_alternation(pattern[i+1:end])
i = end + 1
# Escaped character or shorthand
elif pattern[i] == '\\':
next_char = pattern[i+1]
if next_char == 'd':
char_set = [str(n) for n in range(10)] # \d = [0-9]
elif next_char == 'w':
char_set = list('abcdefghijklmnopqrstuvwxyz0123456789_') # \w
else:
char_set = [next_char] # literal escape like \.
i += 2
# Literal character (skip if it's a quantifier)
elif pattern[i] not in '{':
char_set = [pattern[i]]
i += 1
# Parse quantifier {n} if present
if i < len(pattern) and pattern[i] == '{':
end = pattern.index('}', i)
repeat = int(pattern[i+1:end])
i = end + 1
if char_set:
tokens.append((char_set, repeat))
return tokens
def generate_domains(pattern: str) -> Generator[str, None, None]:
"""
Generate all domain combinations from regex pattern.
Example pattern: ^[a-z]{2}[0-9]{3}\\.(com|net)$
Yields: aa000.com, aa000.net, aa001.com, ...
"""
tokens = tokenize_regex(pattern)
# Expand tokens with repeats into flat list
expanded = []
for char_set, repeat in tokens:
for _ in range(repeat):
expanded.append(char_set)
# Generate cartesian product
for combo in itertools.product(*expanded):
yield ''.join(combo)
def count_combinations(pattern: str) -> int:
"""Count total number of combinations for a pattern."""
tokens = tokenize_regex(pattern)
total = 1
for char_set, repeat in tokens:
total *= len(char_set) ** repeat
return total
if __name__ == '__main__':
# Test the generator
test_pattern = r'^[a-z]{2}[0-9]{3}\.(com|net)$'
print(f"Pattern: {test_pattern}")
print(f"Total combinations: {count_combinations(test_pattern):,}")
# Show first 10 examples
gen = generate_domains(test_pattern)
print("\nFirst 10 domains:")
for i, domain in enumerate(gen):
if i >= 10:
break
print(f" {domain}")