Add smooth_l1_loss operator

lengmuzhaxi · lengmuzhaxi · commit 3117d93d3859 · 2025-12-24T16:45:47.000+08:00
diff --git a/include/infiniop.h b/include/infiniop.h
@@ -10,6 +10,9 @@
 #include "infiniop/ops/dequantize_awq.h"
 #include "infiniop/ops/gelu.h"
 #include "infiniop/ops/gemm.h"
+#include "infiniop/ops/hypot.h"
+#include "infiniop/ops/index_add.h"
+#include "infiniop/ops/index_copy.h"
 #include "infiniop/ops/layer_norm.h"
 #include "infiniop/ops/logsoftmax.h"
 #include "infiniop/ops/lp_norm.h"
@@ -22,11 +25,13 @@
 #include "infiniop/ops/rope.h"
 #include "infiniop/ops/sigmoid.h"
 #include "infiniop/ops/silu.h"
+#include "infiniop/ops/smooth_l1_loss.h"
 #include "infiniop/ops/softmax.h"
 #include "infiniop/ops/softplus.h"
 #include "infiniop/ops/sub.h"
 #include "infiniop/ops/swiglu.h"
 #include "infiniop/ops/tanh.h"
+#include "infiniop/ops/take.h"
 #include "infiniop/ops/topkrouter.h"
 #include "infiniop/ops/topksoftmax.h"
 #include "infiniop/ops/zeros.h"
diff --git a/python/infinicore/__init__.py b/python/infinicore/__init__.py
@@ -45,6 +45,10 @@
 from infinicore.ops.mul import mul
 from infinicore.ops.narrow import narrow
 from infinicore.ops.rearrange import rearrange
+from infinicore.ops.hypot import hypot
+from infinicore.ops.index_add import index_add
+from infinicore.ops.index_copy import index_copy
+from infinicore.ops.take import take
 from infinicore.tensor import (
     Tensor,
     empty,
@@ -111,6 +115,10 @@
     "from_list",
     "from_numpy",
     "from_torch",
+    "hypot",
+    "index_copy",
+    "index_add",
+    "take",
     "ones",
     "strided_empty",
     "strided_from_blob",
diff --git a/python/infinicore/nn/functional/__init__.py b/python/infinicore/nn/functional/__init__.py
@@ -6,12 +6,13 @@
 from .rope import RopeAlgo, rope
 from .silu import silu
 from .swiglu import swiglu
-
+from .smooth_l1_loss import smooth_l1_loss
 __all__ = [
     "causal_softmax",
     "random_sample",
     "rms_norm",
     "silu",
+    "smooth_l1_loss",
     "swiglu",
     "linear",
     "embedding",
diff --git a/src/infinicore/pybind11/ops.hpp b/src/infinicore/pybind11/ops.hpp
@@ -6,6 +6,11 @@
 #include "ops/attention.hpp"
 #include "ops/causal_softmax.hpp"
 #include "ops/embedding.hpp"
+#include "ops/hypot.hpp"
+#include "ops/take.hpp"
+#include "ops/index_copy.hpp"
+#include "ops/index_add.hpp"
+#include "ops/smooth_l1_loss.hpp"
 #include "ops/linear.hpp"
 #include "ops/matmul.hpp"
 #include "ops/mul.hpp"
@@ -28,6 +33,11 @@ inline void bind(py::module &m) {
     bind_linear(m);
     bind_matmul(m);
     bind_mul(m);
+     bind_hypot(m);
+    bind_take(m);
+    bind_index_copy(m);
+    bind_index_add(m);
+    bind_smooth_l1_loss(m);
     bind_rearrange(m);
     bind_rms_norm(m);
     bind_silu(m);
diff --git a/src/infiniop/ops/smooth_l1_loss/cuda/kernel.cuh b/src/infiniop/ops/smooth_l1_loss/cuda/kernel.cuh
@@ -1,12 +1,12 @@
 #ifndef __SMOOTH_L1_LOSS_CUDA_CUH__
 #define __SMOOTH_L1_LOSS_CUDA_CUH__
 
-#include <cuda_runtime.h>
 #if defined(__MACA__) || defined(__MACACC__)
     #include <maca_fp16.h>
     #include <maca_bfloat16.h>
     using nv_bfloat162 = __maca_bfloat162;
 #else
+    #include <cuda_runtime.h>
     #include <cuda_fp16.h>
     #include <cuda_bf16.h>
 #endif