|
| 1 | +# Copyright (c) Microsoft Corporation. |
| 2 | +# Licensed under the MIT license. |
| 3 | + |
| 4 | +"""Performance comparison: prefetch_pages=0 (sequential) vs prefetch_pages=1. |
| 5 | +
|
| 6 | +Usage: |
| 7 | + python tests/perf/prefetch_pages.py |
| 8 | +
|
| 9 | +Set PYTHONPATH=src before running. A browser login prompt will appear. |
| 10 | +
|
| 11 | +Environment variables (all optional): |
| 12 | + DATAVERSE_URL Org URL — if omitted, you will be prompted interactively |
| 13 | + PAGE_SIZE Records per page (default: 10) |
| 14 | + RUNS Repetitions per mode for averaging (default: 3) |
| 15 | +""" |
| 16 | + |
| 17 | +import os |
| 18 | +import sys |
| 19 | +import time |
| 20 | +import statistics |
| 21 | + |
| 22 | +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src")) |
| 23 | + |
| 24 | +from azure.identity import InteractiveBrowserCredential |
| 25 | +from PowerPlatform.Dataverse.client import DataverseClient |
| 26 | + |
| 27 | +_TABLE = "new_PrefetchPerfTest" |
| 28 | +_RECORD_COUNT = 50 # 50 records ÷ PAGE_SIZE pages for pagination |
| 29 | + |
| 30 | + |
| 31 | +def _backoff(op, delays=(0, 2, 5, 10, 20)): |
| 32 | + last = None |
| 33 | + for d in delays: |
| 34 | + if d: |
| 35 | + time.sleep(d) |
| 36 | + try: |
| 37 | + return op() |
| 38 | + except Exception as ex: # noqa: BLE001 |
| 39 | + last = ex |
| 40 | + raise last |
| 41 | + |
| 42 | + |
| 43 | +def _setup(client: DataverseClient) -> None: |
| 44 | + """Create table and populate with enough records to paginate.""" |
| 45 | + print(f"\n-> client.tables.get('{_TABLE}')") |
| 46 | + info = _backoff(lambda: client.tables.get(_TABLE)) |
| 47 | + if info: |
| 48 | + print(f"[OK] Table already exists: {info['table_schema_name']}") |
| 49 | + else: |
| 50 | + print(f"-> client.tables.create('{_TABLE}', ...)") |
| 51 | + _backoff(lambda: client.tables.create(_TABLE, {"new_Label": "string"})) |
| 52 | + print(f"[OK] Created table: {_TABLE}") |
| 53 | + |
| 54 | + print(f"-> client.records.create('{_TABLE}', [{_RECORD_COUNT} records])") |
| 55 | + records = [{"new_Label": f"record-{i:04d}"} for i in range(_RECORD_COUNT)] |
| 56 | + ids = _backoff(lambda: client.records.create(_TABLE, records)) |
| 57 | + print(f"[OK] Inserted {len(ids)} records") |
| 58 | + |
| 59 | + |
| 60 | +def _cleanup(client: DataverseClient) -> None: |
| 61 | + print(f"\n-> client.tables.delete('{_TABLE}')") |
| 62 | + try: |
| 63 | + _backoff(lambda: client.tables.delete(_TABLE)) |
| 64 | + print(f"[OK] Deleted table: {_TABLE}") |
| 65 | + except Exception as ex: # noqa: BLE001 |
| 66 | + print(f"[WARN] Cleanup failed: {ex}") |
| 67 | + |
| 68 | + |
| 69 | +def _consume(client: DataverseClient, page_size: int, prefetch: int) -> tuple[int, int, float]: |
| 70 | + """Return (total_records, page_count, elapsed_seconds).""" |
| 71 | + total = 0 |
| 72 | + pages = 0 |
| 73 | + t0 = time.perf_counter() |
| 74 | + for page in client.records.get(_TABLE, page_size=page_size, prefetch_pages=prefetch): |
| 75 | + total += len(page) |
| 76 | + pages += 1 |
| 77 | + time.sleep(0.05) # simulate per-page processing (e.g. db write, transform) |
| 78 | + return total, pages, time.perf_counter() - t0 |
| 79 | + |
| 80 | + |
| 81 | +def _run(client: DataverseClient, page_size: int, runs: int) -> None: |
| 82 | + # Dry run to confirm page count |
| 83 | + _, page_count, _ = _consume(client, page_size, prefetch=0) |
| 84 | + print(f"\nPage size : {page_size}") |
| 85 | + print(f"Pages found : {page_count}") |
| 86 | + print(f"Runs : {runs}\n") |
| 87 | + |
| 88 | + results: dict[int, list[float]] = {0: [], 1: []} |
| 89 | + |
| 90 | + for run in range(1, runs + 1): |
| 91 | + for mode in (0, 1): |
| 92 | + _, _, elapsed = _consume(client, page_size, prefetch=mode) |
| 93 | + results[mode].append(elapsed) |
| 94 | + label = "sequential" if mode == 0 else "prefetch=1" |
| 95 | + print(f" Run {run} {label:12s} {elapsed:.3f}s") |
| 96 | + |
| 97 | + print() |
| 98 | + print("=" * 50) |
| 99 | + print(f"{'Mode':<14} {'Mean':>7} {'Median':>7} {'Min':>7} {'Max':>7}") |
| 100 | + print("-" * 50) |
| 101 | + for mode, label in ((0, "sequential"), (1, "prefetch=1")): |
| 102 | + t = results[mode] |
| 103 | + print(f"{label:<14} {statistics.mean(t):>6.3f}s {statistics.median(t):>6.3f}s {min(t):>6.3f}s {max(t):>6.3f}s") |
| 104 | + print("=" * 50) |
| 105 | + |
| 106 | + mean_seq = statistics.mean(results[0]) |
| 107 | + mean_pre = statistics.mean(results[1]) |
| 108 | + if mean_seq > 0: |
| 109 | + delta = (mean_seq - mean_pre) / mean_seq * 100 |
| 110 | + direction = "faster" if delta > 0 else "slower" |
| 111 | + print(f"\nprefetch=1 is {abs(delta):.1f}% {direction} on average.") |
| 112 | + |
| 113 | + |
| 114 | +def main(): |
| 115 | + print("=" * 60) |
| 116 | + print("prefetch_pages performance comparison") |
| 117 | + print("=" * 60) |
| 118 | + |
| 119 | + base_url = os.environ.get("DATAVERSE_URL", "").rstrip("/") |
| 120 | + if not base_url: |
| 121 | + base_url = input("Enter Dataverse org URL: ").strip().rstrip("/") |
| 122 | + if not base_url: |
| 123 | + print("No URL provided; exiting.") |
| 124 | + sys.exit(1) |
| 125 | + |
| 126 | + page_size = int(os.environ.get("PAGE_SIZE", "10")) |
| 127 | + runs = int(os.environ.get("RUNS", "3")) |
| 128 | + |
| 129 | + print(f"\n-> InteractiveBrowserCredential()") |
| 130 | + credential = InteractiveBrowserCredential() |
| 131 | + |
| 132 | + print(f"-> DataverseClient('{base_url}', ...)") |
| 133 | + with DataverseClient(base_url=base_url, credential=credential) as client: |
| 134 | + print(f"[OK] Connected to: {base_url}") |
| 135 | + try: |
| 136 | + _setup(client) |
| 137 | + _run(client, page_size, runs) |
| 138 | + finally: |
| 139 | + _cleanup(client) |
| 140 | + |
| 141 | + |
| 142 | +if __name__ == "__main__": |
| 143 | + main() |
0 commit comments