Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
050d217
Add `blas_amax` and `blas_amin` operators
xu-zhengzhong Apr 28, 2026
c2dba21
Add `asum` operator
xu-zhengzhong Apr 28, 2026
5df0cc8
Add `axpy` operator
xu-zhengzhong Apr 28, 2026
d6584f7
Add `blas_copy` operator
xu-zhengzhong Apr 28, 2026
ac01531
Add `blas_dot` operator
xu-zhengzhong Apr 28, 2026
b876b79
Add `nrm2` operator
xu-zhengzhong Apr 28, 2026
33fbb95
Add `rot` operator
xu-zhengzhong Apr 28, 2026
07f4722
Add `rotg` operator
xu-zhengzhong Apr 28, 2026
9259321
Add `rotm` operator
xu-zhengzhong Apr 28, 2026
079f5f4
Add `rotmg` operator
xu-zhengzhong Apr 28, 2026
0153263
Add `scal` operator
xu-zhengzhong Apr 28, 2026
e64afb9
Add `swap` operator
xu-zhengzhong Apr 28, 2026
f8e9cc2
Use mcBLAS Ex APIs for `axpy`, `blas_dot`, `nrm2`, `rot` and `scal` o…
xu-zhengzhong Apr 29, 2026
9da0b53
Split BLAS op info into local headers
xu-zhengzhong May 6, 2026
b4f5bfa
Add InfiniCore `asum`, `blas_amax`, `blas_amin`, `blas_dot` and `nrm2…
xu-zhengzhong May 6, 2026
8b57466
Add InfiniCore `axpy`, `blas_copy`, `scal` and `swap` wrappers
xu-zhengzhong May 6, 2026
74564bd
Add InfiniCore `rot`, `rotg`, `rotm` and `rotmg` wrappers
xu-zhengzhong May 6, 2026
f91e4d3
Fix format and type conversion errors
xu-zhengzhong May 7, 2026
20e0146
Fix: set InfiniCore device explicitly before running test cases
xu-zhengzhong May 7, 2026
7317e42
Update `axpy`, `blas_dot`, `nrm2`, `rot` and `scal` to use the graph …
xu-zhengzhong May 7, 2026
446d0a7
Update `asum`, `blas_amax`, `blas_amin`, `blas_copy` and `swap` to us…
xu-zhengzhong May 8, 2026
0a8fa56
Add `InsertNewlineAtEOF: true` to `.clang-format`
xu-zhengzhong May 9, 2026
d1cee8a
Run `scripts/format.py` to fix the format
xu-zhengzhong May 9, 2026
0ce6e8e
Add empty `getMemInfo` and `getDeviceResourceSnapshot` to `infinirt_b…
xu-zhengzhong May 9, 2026
bbb364e
Revert `.clang-format`
xu-zhengzhong May 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions include/infinicore/ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,18 @@
#include "ops/addcmul.hpp"
#include "ops/asin.hpp"
#include "ops/asinh.hpp"
#include "ops/asum.hpp"
#include "ops/atanh.hpp"
#include "ops/attention.hpp"
#include "ops/avg_pool1d.hpp"
#include "ops/axpy.hpp"
#include "ops/baddbmm.hpp"
#include "ops/bilinear.hpp"
#include "ops/binary_cross_entropy_with_logits.hpp"
#include "ops/blas_amax.hpp"
#include "ops/blas_amin.hpp"
#include "ops/blas_copy.hpp"
#include "ops/blas_dot.hpp"
#include "ops/causal_softmax.hpp"
#include "ops/cdist.hpp"
#include "ops/conv2d.hpp"
Expand All @@ -28,6 +34,7 @@
#include "ops/layer_norm.hpp"
#include "ops/linear.hpp"
#include "ops/matmul.hpp"
#include "ops/nrm2.hpp"
#include "ops/ones.hpp"
#include "ops/paged_attention.hpp"
#include "ops/paged_attention_prefill.hpp"
Expand All @@ -41,8 +48,14 @@
#include "ops/relu.hpp"
#include "ops/rms_norm.hpp"
#include "ops/rope.hpp"
#include "ops/rot.hpp"
#include "ops/rotg.hpp"
#include "ops/rotm.hpp"
#include "ops/rotmg.hpp"
#include "ops/scal.hpp"
#include "ops/silu.hpp"
#include "ops/silu_and_mul.hpp"
#include "ops/softmax.hpp"
#include "ops/swap.hpp"
#include "ops/swiglu.hpp"
#include "ops/topksoftmax.hpp"
14 changes: 14 additions & 0 deletions include/infinicore/ops/asum.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#pragma once

#include "../device.hpp"
#include "../graph/graph.hpp"
#include "common/op.hpp"

namespace infinicore::op {

INFINICORE_GRAPH_OP_CLASS(Asum, const Tensor &, Tensor);

Tensor asum(const Tensor &x);
void asum_(const Tensor &x, Tensor result);

} // namespace infinicore::op
13 changes: 13 additions & 0 deletions include/infinicore/ops/axpy.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#pragma once

#include "../device.hpp"
#include "../graph/graph.hpp"
#include "common/op.hpp"

namespace infinicore::op {

INFINICORE_GRAPH_OP_CLASS(Axpy, const Tensor &, const Tensor &, Tensor);

void axpy_(const Tensor &alpha, const Tensor &x, Tensor y);

} // namespace infinicore::op
14 changes: 14 additions & 0 deletions include/infinicore/ops/blas_amax.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#pragma once

#include "../device.hpp"
#include "../graph/graph.hpp"
#include "common/op.hpp"

namespace infinicore::op {

INFINICORE_GRAPH_OP_CLASS(BlasAmax, const Tensor &, Tensor);

Tensor blas_amax(const Tensor &x);
void blas_amax_(const Tensor &x, Tensor result);

} // namespace infinicore::op
14 changes: 14 additions & 0 deletions include/infinicore/ops/blas_amin.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#pragma once

#include "../device.hpp"
#include "../graph/graph.hpp"
#include "common/op.hpp"

namespace infinicore::op {

INFINICORE_GRAPH_OP_CLASS(BlasAmin, const Tensor &, Tensor);

Tensor blas_amin(const Tensor &x);
void blas_amin_(const Tensor &x, Tensor result);

} // namespace infinicore::op
13 changes: 13 additions & 0 deletions include/infinicore/ops/blas_copy.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#pragma once

#include "../device.hpp"
#include "../graph/graph.hpp"
#include "common/op.hpp"

namespace infinicore::op {

INFINICORE_GRAPH_OP_CLASS(BlasCopy, const Tensor &, Tensor);

void blas_copy_(const Tensor &x, Tensor y);

} // namespace infinicore::op
14 changes: 14 additions & 0 deletions include/infinicore/ops/blas_dot.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#pragma once

#include "../device.hpp"
#include "../graph/graph.hpp"
#include "common/op.hpp"

namespace infinicore::op {

INFINICORE_GRAPH_OP_CLASS(BlasDot, const Tensor &, const Tensor &, Tensor);

Tensor blas_dot(const Tensor &x, const Tensor &y);
void blas_dot_(const Tensor &x, const Tensor &y, Tensor result);

} // namespace infinicore::op
14 changes: 14 additions & 0 deletions include/infinicore/ops/nrm2.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#pragma once

#include "../device.hpp"
#include "../graph/graph.hpp"
#include "common/op.hpp"

namespace infinicore::op {

INFINICORE_GRAPH_OP_CLASS(Nrm2, const Tensor &, Tensor);

Tensor nrm2(const Tensor &x);
void nrm2_(const Tensor &x, Tensor result);

} // namespace infinicore::op
13 changes: 13 additions & 0 deletions include/infinicore/ops/rot.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#pragma once

#include "../device.hpp"
#include "../graph/graph.hpp"
#include "common/op.hpp"

namespace infinicore::op {

INFINICORE_GRAPH_OP_CLASS(Rot, Tensor, Tensor, const Tensor &, const Tensor &);

void rot_(Tensor x, Tensor y, const Tensor &c, const Tensor &s);

} // namespace infinicore::op
17 changes: 17 additions & 0 deletions include/infinicore/ops/rotg.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {

class Rotg {
public:
using schema = void (*)(Tensor, Tensor, Tensor, Tensor);
static void execute(Tensor x, Tensor y, Tensor c, Tensor s);
static common::OpDispatcher<schema> &dispatcher();
};

void rotg_(Tensor x, Tensor y, Tensor c, Tensor s);

} // namespace infinicore::op
17 changes: 17 additions & 0 deletions include/infinicore/ops/rotm.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {

class Rotm {
public:
using schema = void (*)(Tensor, Tensor, Tensor);
static void execute(Tensor x, Tensor y, Tensor param);
static common::OpDispatcher<schema> &dispatcher();
};

void rotm_(Tensor x, Tensor y, Tensor param);

} // namespace infinicore::op
17 changes: 17 additions & 0 deletions include/infinicore/ops/rotmg.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {

class Rotmg {
public:
using schema = void (*)(Tensor, Tensor, Tensor, Tensor, Tensor);
static void execute(Tensor d1, Tensor d2, Tensor x1, Tensor y1, Tensor param);
static common::OpDispatcher<schema> &dispatcher();
};

void rotmg_(Tensor d1, Tensor d2, Tensor x1, Tensor y1, Tensor param);

} // namespace infinicore::op
13 changes: 13 additions & 0 deletions include/infinicore/ops/scal.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#pragma once

#include "../device.hpp"
#include "../graph/graph.hpp"
#include "common/op.hpp"

namespace infinicore::op {

INFINICORE_GRAPH_OP_CLASS(Scal, const Tensor &, Tensor);

void scal_(const Tensor &alpha, Tensor x);

} // namespace infinicore::op
13 changes: 13 additions & 0 deletions include/infinicore/ops/swap.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#pragma once

#include "../device.hpp"
#include "../graph/graph.hpp"
#include "common/op.hpp"

namespace infinicore::op {

INFINICORE_GRAPH_OP_CLASS(Swap, Tensor, Tensor);

void swap_(Tensor x, Tensor y);

} // namespace infinicore::op
13 changes: 13 additions & 0 deletions include/infiniop.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,17 @@
#include "infiniop/ops/all.h"
#include "infiniop/ops/asin.h"
#include "infiniop/ops/asinh.h"
#include "infiniop/ops/asum.h"
#include "infiniop/ops/atanh.h"
#include "infiniop/ops/attention.h"
#include "infiniop/ops/avg_pool1d.h"
#include "infiniop/ops/avg_pool3d.h"
#include "infiniop/ops/axpy.h"
#include "infiniop/ops/binary_cross_entropy_with_logits.h"
#include "infiniop/ops/blas_amax.h"
#include "infiniop/ops/blas_amin.h"
#include "infiniop/ops/blas_copy.h"
#include "infiniop/ops/blas_dot.h"
#include "infiniop/ops/block_diag.h"
#include "infiniop/ops/broadcast_to.h"
#include "infiniop/ops/causal_softmax.h"
Expand Down Expand Up @@ -78,6 +84,7 @@
#include "infiniop/ops/matrix_power.h"
#include "infiniop/ops/mul.h"
#include "infiniop/ops/multi_margin_loss.h"
#include "infiniop/ops/nrm2.h"
#include "infiniop/ops/ones.h"
#include "infiniop/ops/pad.h"
#include "infiniop/ops/paged_attention.h"
Expand All @@ -93,6 +100,11 @@
#include "infiniop/ops/relu.h"
#include "infiniop/ops/rms_norm.h"
#include "infiniop/ops/rope.h"
#include "infiniop/ops/rot.h"
#include "infiniop/ops/rotg.h"
#include "infiniop/ops/rotm.h"
#include "infiniop/ops/rotmg.h"
#include "infiniop/ops/scal.h"
#include "infiniop/ops/scatter.h"
#include "infiniop/ops/selu.h"
#include "infiniop/ops/sigmoid.h"
Expand All @@ -105,6 +117,7 @@
#include "infiniop/ops/softsign.h"
#include "infiniop/ops/sub.h"
#include "infiniop/ops/sum.h"
#include "infiniop/ops/swap.h"
#include "infiniop/ops/swiglu.h"
#include "infiniop/ops/take.h"
#include "infiniop/ops/tan.h"
Expand Down
24 changes: 24 additions & 0 deletions include/infiniop/ops/asum.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef __INFINIOP_ASUM_API_H__
#define __INFINIOP_ASUM_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopAsumDescriptor_t;

__INFINI_C __export infiniStatus_t infiniopCreateAsumDescriptor(infiniopHandle_t handle,
infiniopAsumDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t x,
infiniopTensorDescriptor_t result);

__INFINI_C __export infiniStatus_t infiniopGetAsumWorkspaceSize(infiniopAsumDescriptor_t desc, size_t *size);

__INFINI_C __export infiniStatus_t infiniopAsum(infiniopAsumDescriptor_t desc,
void *workspace,
size_t workspace_size,
const void *x,
void *result,
void *stream);

__INFINI_C __export infiniStatus_t infiniopDestroyAsumDescriptor(infiniopAsumDescriptor_t desc);

#endif // __INFINIOP_ASUM_API_H__
26 changes: 26 additions & 0 deletions include/infiniop/ops/axpy.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#ifndef __INFINIOP_AXPY_API_H__
#define __INFINIOP_AXPY_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopAxpyDescriptor_t;

__INFINI_C __export infiniStatus_t infiniopCreateAxpyDescriptor(infiniopHandle_t handle,
infiniopAxpyDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t alpha,
infiniopTensorDescriptor_t x,
infiniopTensorDescriptor_t y);

__INFINI_C __export infiniStatus_t infiniopGetAxpyWorkspaceSize(infiniopAxpyDescriptor_t desc, size_t *size);

__INFINI_C __export infiniStatus_t infiniopAxpy(infiniopAxpyDescriptor_t desc,
void *workspace,
size_t workspace_size,
const void *alpha,
const void *x,
void *y,
void *stream);

__INFINI_C __export infiniStatus_t infiniopDestroyAxpyDescriptor(infiniopAxpyDescriptor_t desc);

#endif // __INFINIOP_AXPY_API_H__
24 changes: 24 additions & 0 deletions include/infiniop/ops/blas_amax.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef __INFINIOP_BLAS_AMAX_API_H__
#define __INFINIOP_BLAS_AMAX_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopBlasAmaxDescriptor_t;

__INFINI_C __export infiniStatus_t infiniopCreateBlasAmaxDescriptor(infiniopHandle_t handle,
infiniopBlasAmaxDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t x,
infiniopTensorDescriptor_t result);

__INFINI_C __export infiniStatus_t infiniopGetBlasAmaxWorkspaceSize(infiniopBlasAmaxDescriptor_t desc, size_t *size);

__INFINI_C __export infiniStatus_t infiniopBlasAmax(infiniopBlasAmaxDescriptor_t desc,
void *workspace,
size_t workspace_size,
const void *x,
void *result,
void *stream);

__INFINI_C __export infiniStatus_t infiniopDestroyBlasAmaxDescriptor(infiniopBlasAmaxDescriptor_t desc);

#endif // __INFINIOP_BLAS_AMAX_API_H__
24 changes: 24 additions & 0 deletions include/infiniop/ops/blas_amin.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef __INFINIOP_BLAS_AMIN_API_H__
#define __INFINIOP_BLAS_AMIN_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopBlasAminDescriptor_t;

__INFINI_C __export infiniStatus_t infiniopCreateBlasAminDescriptor(infiniopHandle_t handle,
infiniopBlasAminDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t x,
infiniopTensorDescriptor_t result);

__INFINI_C __export infiniStatus_t infiniopGetBlasAminWorkspaceSize(infiniopBlasAminDescriptor_t desc, size_t *size);

__INFINI_C __export infiniStatus_t infiniopBlasAmin(infiniopBlasAminDescriptor_t desc,
void *workspace,
size_t workspace_size,
const void *x,
void *result,
void *stream);

__INFINI_C __export infiniStatus_t infiniopDestroyBlasAminDescriptor(infiniopBlasAminDescriptor_t desc);

#endif // __INFINIOP_BLAS_AMIN_API_H__
Loading
Loading