-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest.py
More file actions
147 lines (135 loc) · 4.96 KB
/
test.py
File metadata and controls
147 lines (135 loc) · 4.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import re
def clean_custom_patterns(text):
# Replace: email, phone, youtube link, regular link with [email], [phone], [youtube], [link]
clean_text = re.sub( # email
r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)", "[email]", text
)
clean_text = re.sub( # phone
r"(\d{3}[-\.\s]??\d{3}[-\.\s]??\d{4}|\(\d{3}\)\s*\d{3}[-\.\s]??\d{4}|\d{3}[-\.\s]??\d{4})",
"[phone]",
clean_text,
)
clean_text = re.sub( # youtube link
r"(https?:\/\/)?(www\.)?(youtube\.com|youtu\.?be)\/.+", "[youtube]", clean_text
)
clean_text = re.sub( # regular link
r"(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)",
"[link]",
clean_text,
)
return clean_text
def test_clean_custom_patterns():
test_cases = [
# Email tests
{
"input": "Contact me at john.doe@example.com",
"expected": "Contact me at [email]",
},
{
"input": "Multiple emails: a@b.com and x.y@z.co.uk",
"expected": "Multiple emails: [email] and [email]",
},
{
"input": "Complex email: user+tag@subdomain.example.com",
"expected": "Complex email: [email]",
},
# Phone tests
{"input": "Call me at 123-456-7890", "expected": "Call me at [phone]"},
{
"input": "Phone formats: (123) 456-7890, 123.456.7890, 123 456 7890",
"expected": "Phone formats: [phone], [phone], [phone]",
},
{"input": "Short number: 123-4567", "expected": "Short number: [phone]"},
# YouTube link tests
{
"input": "Watch: https://www.youtube.com/watch?v=dQw4w9WgXcQ",
"expected": "Watch: [youtube]",
},
{
"input": "Short YouTube: https://youtu.be/dQw4w9WgXcQ",
"expected": "Short YouTube: [youtube]",
},
{
"input": "YouTube without protocol: www.youtube.com/watch?v=dQw4w9WgXcQ",
"expected": "YouTube without protocol: [youtube]",
},
# Regular link tests
{"input": "Visit http://example.com", "expected": "Visit [link]"},
{
"input": "Secure link: https://secure.example.com/path?query=value",
"expected": "Secure link: [link]",
},
{
"input": "Link with port: http://localhost:8080/test",
"expected": "Link with port: [link]",
},
# Mixed pattern tests
{
"input": "Contact: john@example.com, 123-456-7890, https://youtu.be/abc123, http://example.com",
"expected": "Contact: [email], [phone], [youtube], [link]",
},
{"input": "No patterns here!", "expected": "No patterns here!"},
# Edge cases
{
"input": "Email in parentheses (john.doe@example.com)",
"expected": "Email in parentheses ([email])",
},
{
"input": "Phone with extension: 123-456-7890 ext. 123",
"expected": "Phone with extension: [phone] ext. 123",
},
{
"input": "YouTube channel: https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw",
"expected": "YouTube channel: [youtube]",
},
{
"input": "Link with fragment: https://example.com/page#section",
"expected": "Link with fragment: [link]",
},
# Repeated patterns
{
"input": "Repeat: john@example.com john@example.com",
"expected": "Repeat: [email] [email]",
},
{
"input": "Repeat: 123-456-7890 123-456-7890",
"expected": "Repeat: [phone] [phone]",
},
{
"input": "Repeat: https://youtube.com/watch?v=123 https://youtube.com/watch?v=456",
"expected": "Repeat: [youtube] [youtube]",
},
{
"input": "Repeat: http://example.com http://example.org",
"expected": "Repeat: [link] [link]",
},
]
results = []
for i, test_case in enumerate(test_cases):
output = clean_custom_patterns(test_case["input"])
results.append(
{
"test_case": i + 1,
"input": test_case["input"],
"output": output,
"expected": test_case["expected"],
"passed": output == test_case["expected"],
}
)
return results
# Run the test cases
test_results = test_clean_custom_patterns()
# Print results
for result in test_results:
print(f"Test Case {result['test_case']}:")
print(f"Input: {result['input']}")
print(f"Output: {result['output']}")
print(f"Expected: {result['expected']}")
print(f"Passed: {result['passed']}")
print()
# Summary
total_tests = len(test_results)
passed_tests = sum(1 for result in test_results if result["passed"])
print(f"Total tests: {total_tests}")
print(f"Passed tests: {passed_tests}")
print(f"Failed tests: {total_tests - passed_tests}")