accel-sim · reoLantern · Dec 29, 2025 · Copilot · Dec 29, 2025 · Copilot
diff --git a/util/tracer_nvbit/tracer_tool/tracer_tool.cu b/util/tracer_nvbit/tracer_tool/tracer_tool.cu
@@ -34,6 +34,19 @@
 
 #define TRACER_VERSION "5"
 
+static int get_attr_with_kernel_fallback(CUfunction func,
+                                         CUfunction_attribute attr) {
+  int value = 0;
+  CUresult res = cuFuncGetAttribute(&value, attr, func);
+  if (res == CUDA_ERROR_INVALID_HANDLE) {
+    CUdevice dev = 0;
+    if (cuCtxGetDevice(&dev) == CUDA_SUCCESS) {
+      cuKernelGetAttribute(&value, attr, (CUkernel)func, dev);
-    if (cuCtxGetDevice(&dev) == CUDA_SUCCESS) {
-      cuKernelGetAttribute(&value, attr, (CUkernel)func, dev);
+    CUresult ctx_res = cuCtxGetDevice(&dev);
+    if (ctx_res == CUDA_SUCCESS) {
+      cuKernelGetAttribute(&value, attr, (CUkernel)func, dev);
+    } else {
+      fprintf(stderr,
+              "get_attr_with_kernel_fallback: cuCtxGetDevice failed with "
+              "error code %d\n",
+              (int)ctx_res);
-    if (cuCtxGetDevice(&dev) == CUDA_SUCCESS) {
-      cuKernelGetAttribute(&value, attr, (CUkernel)func, dev);
+    CUresult ctx_res = cuCtxGetDevice(&dev);
+    if (ctx_res == CUDA_SUCCESS) {
+      cuKernelGetAttribute(&value, attr, (CUkernel)func, dev);
+    } else {
+      fprintf(stderr,
+              "get_attr_with_kernel_fallback: cuCtxGetDevice failed with "
+              "error code %d\n",
+              (int)ctx_res);
+    }
+  }
-  if (res == CUDA_ERROR_INVALID_HANDLE) {
-    CUdevice dev = 0;
-    if (cuCtxGetDevice(&dev) == CUDA_SUCCESS) {
-      cuKernelGetAttribute(&value, attr, (CUkernel)func, dev);
-    }
-  }
+  if (res == CUDA_SUCCESS) {
+    return value;
+  }
+
+  if (res == CUDA_ERROR_INVALID_HANDLE) {
+    CUdevice dev = 0;
+    CUresult dev_res = cuCtxGetDevice(&dev);
+    if (dev_res == CUDA_SUCCESS) {
+      CUresult kern_res = cuKernelGetAttribute(&value, attr, (CUkernel)func, dev);
+      if (kern_res != CUDA_SUCCESS) {
+        fprintf(stderr,
+                "tracer_tool: cuKernelGetAttribute failed (attr=%d) with error %d\n",
+                (int)attr, (int)kern_res);
+      }
+    } else {
+      fprintf(stderr,
+              "tracer_tool: cuCtxGetDevice failed with error %d while getting attribute %d\n",
+              (int)dev_res, (int)attr);
+    }
+  } else {
+    fprintf(stderr,
+            "tracer_tool: cuFuncGetAttribute failed (attr=%d) with error %d\n",
+            (int)attr, (int)res);
+  }
+
+  /* On any failure, return the default value (0) */
-  }
+  }
+  /* If both attribute queries fail, this function returns 0 (the default
+   * initialized value) without reporting an error. Callers should treat 0
+   * as a possible "attribute unavailable" result. */
-  if (res == CUDA_ERROR_INVALID_HANDLE) {
-    CUdevice dev = 0;
-    if (cuCtxGetDevice(&dev) == CUDA_SUCCESS) {
-      cuKernelGetAttribute(&value, attr, (CUkernel)func, dev);
-    }
-  }
+  if (res == CUDA_SUCCESS) {
+    return value;
+  }
+
+  if (res == CUDA_ERROR_INVALID_HANDLE) {
+    CUdevice dev = 0;
+    CUresult dev_res = cuCtxGetDevice(&dev);
+    if (dev_res == CUDA_SUCCESS) {
+      CUresult kern_res = cuKernelGetAttribute(&value, attr, (CUkernel)func, dev);
+      if (kern_res != CUDA_SUCCESS) {
+        fprintf(stderr,
+                "tracer_tool: cuKernelGetAttribute failed (attr=%d) with error %d\n",
+                (int)attr, (int)kern_res);
+      }
+    } else {
+      fprintf(stderr,
+              "tracer_tool: cuCtxGetDevice failed with error %d while getting attribute %d\n",
+              (int)dev_res, (int)attr);
+    }
+  } else {
+    fprintf(stderr,
+            "tracer_tool: cuFuncGetAttribute failed (attr=%d) with error %d\n",
+            (int)attr, (int)res);
+  }
+
+  /* On any failure, return the default value (0) */
-  }
+  }
+  /* If both attribute queries fail, this function returns 0 (the default
+   * initialized value) without reporting an error. Callers should treat 0
+   * as a possible "attribute unavailable" result. */
+  return value;
+}
+
 /* Channel used to communicate from GPU to CPU receiving thread */
 #define CHANNEL_SIZE (1l << 20)
 static __managed__ ChannelDev channel_dev;
@@ -502,16 +515,12 @@ static void enter_kernel_launch(CUcontext ctx, CUfunction func,
   }
 
   // Get the number of registers and shared memory size for the kernel
-  int nregs;
-  CUDA_SAFECALL(cuFuncGetAttribute(&nregs, CU_FUNC_ATTRIBUTE_NUM_REGS, func));
-
-  int shmem_static_nbytes;
-  CUDA_SAFECALL(cuFuncGetAttribute(&shmem_static_nbytes,
-                                   CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, func));
-
-  int binary_version;
-  CUDA_SAFECALL(cuFuncGetAttribute(&binary_version,
-                                   CU_FUNC_ATTRIBUTE_BINARY_VERSION, func));
+  int nregs =
+      get_attr_with_kernel_fallback(func, CU_FUNC_ATTRIBUTE_NUM_REGS);
+  int shmem_static_nbytes = get_attr_with_kernel_fallback(
+      func, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES);
+  int binary_version =
+      get_attr_with_kernel_fallback(func, CU_FUNC_ATTRIBUTE_BINARY_VERSION);
 
   // Instrument the kernel if needed
   instrument_function_if_needed(ctx, func);