Update docs for data loading

haochengxia · haochengxia · commit ef10981f90e7 · 2025-08-05T10:23:19.000Z
diff --git a/README.md b/README.md
@@ -54,33 +54,34 @@ With libcachesim installed, you can start cache simulation for some eviction alg
 ```python
 import libcachesim as lcs
 
-# Step 1: Get one trace from S3 bucket
-URI = "cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"
-dl = lcs.DataLoader()
-dl.load(URI)
-
-# Step 2: Open trace and process efficiently
+# Step 1: Open a trace hosted on S3 (find more via https://github.com/cacheMon/cache_dataset)
+URI = "s3://cache-datasets/cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"
 reader = lcs.TraceReader(
-    trace = dl.get_cache_path(URI),
+    trace = URI,
     trace_type = lcs.TraceType.ORACLE_GENERAL_TRACE,
     reader_init_params = lcs.ReaderInitParam(ignore_obj_size=False)
 )
 
-# Step 3: Initialize cache
-cache = lcs.S3FIFO(cache_size=1024*1024)
+# Step 2: Initialize cache
+cache = lcs.S3FIFO(
+    cache_size=1024*1024,
+    # Cache specific parameters
+    small_size_ratio=0.2,
+    ghost_size_ratio=0.8,
+    move_to_main_threshold=2,
+)
 
-# Step 4: Process entire trace efficiently (C++ backend)
-obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
-print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
+# Step 3: Process entire trace efficiently (C++ backend)
+req_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
+print(f"Request miss ratio: {req_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
 
-# Step 4.1: Process with limited number of requests
-cache = lcs.S3FIFO(cache_size=1024*1024)
-obj_miss_ratio, byte_miss_ratio = cache.process_trace(
+# Step 3.1: Further process the first 1000 requests again
+req_miss_ratio, byte_miss_ratio = cache.process_trace(
     reader,
     start_req=0,
     max_req=1000
 )
-print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
+print(f"Request miss ratio: {req_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
 ```
 
 ## Plugin System
diff --git a/docs/src/en/getting_started/quickstart.md b/docs/src/en/getting_started/quickstart.md
@@ -56,33 +56,34 @@ With libcachesim installed, you can start cache simulation for some eviction alg
     ```python
     import libcachesim as lcs
 
-    # Step 1: Get one trace from S3 bucket
-    URI = "cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"
-    dl = lcs.DataLoader()
-    dl.load(URI)
-
-    # Step 2: Open trace and process efficiently
+    # Step 1: Open a trace hosted on S3 (find more via https://github.com/cacheMon/cache_dataset)
+    URI = "s3://cache-datasets/cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"
     reader = lcs.TraceReader(
-        trace = dl.get_cache_path(URI),
+        trace = URI,
         trace_type = lcs.TraceType.ORACLE_GENERAL_TRACE,
         reader_init_params = lcs.ReaderInitParam(ignore_obj_size=False)
     )
 
-    # Step 3: Initialize cache
-    cache = lcs.S3FIFO(cache_size=1024*1024)
+    # Step 2: Initialize cache
+    cache = lcs.S3FIFO(
+        cache_size=1024*1024,
+        # Cache specific parameters
+        small_size_ratio=0.2,
+        ghost_size_ratio=0.8,
+        move_to_main_threshold=2,
+    )
 
-    # Step 4: Process entire trace efficiently (C++ backend)
-    obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
-    print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
+    # Step 3: Process entire trace efficiently (C++ backend)
+    req_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
+    print(f"Request miss ratio: {req_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
 
-    # Step 4.1: Process with limited number of requests
-    cache = lcs.S3FIFO(cache_size=1024*1024)
-    obj_miss_ratio, byte_miss_ratio = cache.process_trace(
+    # Step 3.1: Further process the first 1000 requests again
+    req_miss_ratio, byte_miss_ratio = cache.process_trace(
         reader,
         start_req=0,
         max_req=1000
     )
-    print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
+    print(f"Request miss ratio: {req_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
     ```
 
 The above example demonstrates the basic workflow of using `libcachesim` for cache simulation:
diff --git a/examples/basic_usage.py b/examples/basic_usage.py
@@ -1,25 +1,30 @@
 import libcachesim as lcs
 
-# Step 1: Get one trace from S3 bucket
-URI = "cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"
-dl = lcs.DataLoader()
-dl.load(URI)
-
-# Step 2: Open trace and process efficiently
+# Step 1: Open a trace hosted on S3 (find more via https://github.com/cacheMon/cache_dataset)
+URI = "s3://cache-datasets/cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"
 reader = lcs.TraceReader(
-    trace=dl.get_cache_path(URI),
-    trace_type=lcs.TraceType.ORACLE_GENERAL_TRACE,
-    reader_init_params=lcs.ReaderInitParam(ignore_obj_size=False),
+    trace = URI,
+    trace_type = lcs.TraceType.ORACLE_GENERAL_TRACE,
+    reader_init_params = lcs.ReaderInitParam(ignore_obj_size=False)
 )
 
-# Step 3: Initialize cache
-cache = lcs.S3FIFO(cache_size=1024 * 1024)
+# Step 2: Initialize cache
+cache = lcs.S3FIFO(
+    cache_size=1024*1024,
+    # Cache specific parameters
+    small_size_ratio=0.2,
+    ghost_size_ratio=0.8,
+    move_to_main_threshold=2,
+)
 
-# Step 4: Process entire trace efficiently (C++ backend)
-obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
-print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
+# Step 3: Process entire trace efficiently (C++ backend)
+req_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
+print(f"Request miss ratio: {req_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
 
-# Step 4.1: Process with limited number of requests
-cache = lcs.S3FIFO(cache_size=1024 * 1024)
-obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader, start_req=0, max_req=1000)
-print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
+# Step 3.1: Further process the first 1000 requests again
+req_miss_ratio, byte_miss_ratio = cache.process_trace(
+    reader,
+    start_req=0,
+    max_req=1000
+)
+print(f"Request miss ratio: {req_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")