|
1 | 1 | import csv |
2 | | -import os |
3 | 2 | from selenium import webdriver |
4 | 3 | from selenium.webdriver.chrome.service import Service |
| 4 | +from webdriver_manager.chrome import ChromeDriverManager |
5 | 5 | from percy import percy_snapshot |
6 | 6 | from time import sleep |
7 | 7 | from concurrent.futures import ThreadPoolExecutor |
| 8 | +from selenium.webdriver.chrome.options import Options |
| 9 | +from urllib.parse import urlparse |
| 10 | +import re |
8 | 11 |
|
9 | | -CSV_FILE = './urls.csv' # Path to your CSV file |
10 | | -NUM_THREADS = 5 # Number of parallel threads |
11 | | -CHROMEDRIVER_PATH = "./chromedriver" |
| 12 | +CSV_FILE = './urls.csv' # Path to your CSV file |
| 13 | +NUM_THREADS = 2 # Number of parallel threads |
12 | 14 |
|
13 | 15 | # Load URLs from CSV |
14 | 16 | def load_urls(): |
15 | 17 | with open(CSV_FILE, newline='') as file: |
16 | 18 | reader = csv.reader(file) |
17 | | - return [row[0].strip() for row in reader if row[0].strip().startswith(("http://", "https://"))] |
| 19 | + return [row[0].strip() for row in reader if row and row[0].strip().startswith(("http://", "https://"))] |
| 20 | + |
| 21 | +# Helper to split list into n even chunks |
| 22 | +def split_list(lst, n): |
| 23 | + k, m = divmod(len(lst), n) |
| 24 | + return [lst[i*k + min(i, m):(i+1)*k + min(i+1, m)] for i in range(n)] |
18 | 25 |
|
19 | 26 | # Function for each thread to process its batch of URLs |
20 | 27 | def process_urls(urls): |
21 | 28 | if not urls: |
22 | 29 | print("No URLs provided to process.") |
23 | 30 | return |
24 | | - |
25 | | - service = Service(CHROMEDRIVER_PATH) |
26 | | - driver = webdriver.Chrome(service=service) |
| 31 | + # Use webdriver-manager to automatically install Chromedriver |
| 32 | + options = Options() |
| 33 | + options.add_argument("--headless=new") # optional but recommended for Percy |
| 34 | + options.add_argument("--no-sandbox") |
| 35 | + options.add_argument("--disable-dev-shm-usage") |
| 36 | + |
| 37 | + service = Service(ChromeDriverManager(driver_version="139.0.7258.155").install()) |
| 38 | + driver = webdriver.Chrome(service=service, options=options) |
| 39 | + driver.set_window_size(1200, 800) |
27 | 40 | try: |
28 | 41 | for url in urls: |
29 | 42 | print(f"Loading URL: {url}") |
30 | 43 | driver.get(url) |
31 | | - sleep(2) |
| 44 | + sleep(2) |
| 45 | + |
| 46 | + parsed_url = urlparse(url) |
| 47 | + hostname = parsed_url.netloc |
| 48 | + if hostname.startswith("www."): |
| 49 | + hostname = hostname[4:] |
| 50 | + |
| 51 | + # Sanitize path: remove leading slash and replace other slashes with underscores |
| 52 | + path = parsed_url.path.lstrip('/') |
| 53 | + sanitized_path = re.sub(r'[^a-zA-Z0-9_-]', '_', path) # Replace non-alphanum/underscore/dash chars |
| 54 | + |
| 55 | + # Construct snapshot name |
| 56 | + if sanitized_path: |
| 57 | + snapshot_name = f"Snapshot for {hostname}_{sanitized_path}" |
| 58 | + else: |
| 59 | + snapshot_name = f"Snapshot for {hostname}" |
32 | 60 |
|
33 | | - # Capture Percy snapshot |
34 | | - snapshot_name = f"Snapshot for {url}" |
35 | 61 | print(f"Capturing Percy snapshot: {snapshot_name}") |
36 | | - percy_snapshot(driver, snapshot_name) |
| 62 | + percy_snapshot(driver, snapshot_name,widths=[768, 1200]) |
| 63 | + |
37 | 64 | finally: |
38 | | - driver.quit() # Ensure the driver closes after the batch is done |
| 65 | + driver.quit() |
39 | 66 |
|
40 | 67 | def main(): |
41 | 68 | urls = load_urls() |
| 69 | + if not urls: |
| 70 | + print("No URLs found in the CSV file.") |
| 71 | + return |
42 | 72 |
|
43 | | - # Split URLs into batches based on the number of threads |
44 | | - batch_size = len(urls) // NUM_THREADS |
45 | | - url_batches = [urls[i:i + batch_size] for i in range(0, len(urls), batch_size)] |
| 73 | + url_batches = split_list(urls, NUM_THREADS) |
46 | 74 |
|
47 | | - # Process each batch in parallel |
48 | 75 | with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor: |
49 | | - futures = [executor.submit(process_urls, batch) for batch in url_batches] |
50 | | - |
| 76 | + # Submit only non-empty batches |
| 77 | + futures = [executor.submit(process_urls, batch) for batch in url_batches if batch] |
51 | 78 | for future in futures: |
52 | 79 | future.result() |
53 | 80 |
|
|
0 commit comments