Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
0b4e6d7
feat: bilinear,but sem out
littleotherut Nov 26, 2025
dfa35c1
fix: test/op_register.py
littleotherut Nov 26, 2025
7349c80
删除调试语句,添加外部接口
littleotherut Nov 27, 2025
08af554
访问越界待处理
littleotherut Nov 27, 2025
cd5768d
修改实现方法,从Core/ops级别完成实现,不创建单独的InfiniOp
littleotherut Nov 27, 2025
1285bad
fix: rearrange分配最小Unit未考虑带有strides的情况能否被整除
littleotherut Nov 29, 2025
7b7b9e3
修改gemm编译选项,解决精度问题(当前实现下TF32 rtol仅能支持5e-4级别精度),删除无用冗余内容
littleotherut Nov 30, 2025
a5c5f7e
避免污染,取消gemm修改
littleotherut Nov 30, 2025
7978405
feat: baddbmm_demo,未处理beta,alpha
littleotherut Nov 30, 2025
02e7268
feat:实现类型转换,baddbmm
littleotherut Dec 1, 2025
ca8f885
换用更合适的gemm进行重构,无需类型转换,取消修改
littleotherut Dec 1, 2025
802e217
优化:特判beta=0,去掉不必要的contiguous处理
littleotherut Dec 1, 2025
e1cf0a1
优化实现,使用面向BLAS兼容的检查而非连续检查,减少baddbmm的新建张量,device达到1.00
littleotherut Dec 2, 2025
a765e7c
feat: fmod cpu and nvidia
littleotherut Dec 3, 2025
eaee08b
moore and metax test
littleotherut Dec 3, 2025
0c31473
Merge branch 'InfiniTensor:main' into 2025-autumn-littleotherut-T1-1-9
littleotherut Dec 3, 2025
6ea950e
fix: 基于matmul的更新调整bilinear
littleotherut Dec 4, 2025
06cfc35
moore_test
Dec 4, 2025
c1d7ded
bilinear
Dec 5, 2025
3b35a33
复原,moore lda=1未解决
littleotherut Dec 6, 2025
409a15f
Merge branch 'InfiniTensor:main' into 2025-autumn-littleotherut-T1-1-9
littleotherut Dec 6, 2025
7d6dec6
调整为行向量(更符合数学直觉与底层要求)的实现
littleotherut Dec 6, 2025
e6b921a
fix: 对所有张量添加检测
littleotherut Dec 6, 2025
ba9b48c
gemm添加TF32开关接口,bilinear禁用
littleotherut Dec 7, 2025
e917db2
asinh_demo_nvidia(未优化)
littleotherut Dec 7, 2025
c6abfd5
metax 和 moore的实现(待测)
littleotherut Dec 8, 2025
1ca4654
adaptive max pool1d cpu demo(已测试)
littleotherut Dec 9, 2025
78f8c76
adaptive_max_pool1d nvidia
littleotherut Dec 10, 2025
0bea611
修正命名,moore和metax平台实现(待测试)
littleotherut Dec 10, 2025
2aa7819
moore平台测试完毕
littleotherut Dec 10, 2025
e2d2e3e
metax实现勘误(gpu无空闲,暂未测试)
littleotherut Dec 10, 2025
7f3a9c5
Merge branch 'InfiniTensor:main' into 2025-autumn-littleotherut-T1-1-9
littleotherut Dec 12, 2025
3c3c7f1
修改core执行部分,符合更新后设置
littleotherut Dec 12, 2025
884559e
Merge branch 'InfiniTensor:main' into 2025-autumn-littleotherut-T1-1-9
littleotherut Dec 13, 2025
108603e
添加gemm beta检查,避免beta为0访问到脏内存
littleotherut Dec 13, 2025
13d2d4a
清理无关冗余文件
littleotherut Dec 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@ cache/
# Compressed
*.gz
*.zip
*.tar
*.tar
5 changes: 5 additions & 0 deletions include/infinicore/ops.hpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
#pragma once

#include "ops/adaptive_max_pool1d.hpp"
#include "ops/add.hpp"
#include "ops/asinh.hpp"
#include "ops/attention.hpp"
#include "ops/baddbmm.hpp"
#include "ops/bilinear.hpp"
#include "ops/causal_softmax.hpp"
#include "ops/fmod.hpp"
#include "ops/matmul.hpp"
#include "ops/ones.hpp"
#include "ops/rearrange.hpp"
Expand Down
16 changes: 16 additions & 0 deletions include/infinicore/ops/adaptive_max_pool1d.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class AdaptiveMaxPool1d {
public:
using schema = void (*)(Tensor, Tensor, size_t);
static void execute(Tensor y, Tensor x, size_t output_size);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor adaptive_max_pool1d(Tensor x, size_t output_size);
void adaptive_max_pool1d_(Tensor y, Tensor x, size_t output_size);
} // namespace infinicore::op
16 changes: 16 additions & 0 deletions include/infinicore/ops/asinh.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class Asinh {
public:
using schema = void (*)(Tensor, Tensor);
static void execute(Tensor y, Tensor x);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor asinh(Tensor x);
void asinh_(Tensor y, Tensor x);
} // namespace infinicore::op
15 changes: 15 additions & 0 deletions include/infinicore/ops/baddbmm.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"
#include <optional>

namespace infinicore::op {

Tensor baddbmm(Tensor input, Tensor batch1, Tensor batch2,
float beta = 1.0f,
float alpha = 1.0f);
void baddbmm_(Tensor out, Tensor input, Tensor batch1, Tensor batch2,
float beta = 1.0f,
float alpha = 1.0f);
} // namespace infinicore::op
12 changes: 12 additions & 0 deletions include/infinicore/ops/bilinear.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"
#include <optional>

namespace infinicore::op {

Tensor bilinear(Tensor x1, Tensor x2, Tensor weight, std::optional<Tensor> bias);
void bilinear_(Tensor out, Tensor x1, Tensor x2, Tensor weight, std::optional<Tensor> bias);

} // namespace infinicore::op
16 changes: 16 additions & 0 deletions include/infinicore/ops/fmod.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class Fmod {
public:
using schema = void (*)(Tensor, Tensor, Tensor);
static void execute(Tensor c, Tensor a, Tensor b);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor fmod(Tensor a, Tensor b);
void fmod_(Tensor c, Tensor a, Tensor b);
} // namespace infinicore::op
3 changes: 3 additions & 0 deletions include/infiniop.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
#define __INFINIOP_API_H__

#include "infiniop/handle.h"
#include "infiniop/ops/adaptive_max_pool1d.h"
#include "infiniop/ops/add.h"
#include "infiniop/ops/asinh.h"
#include "infiniop/ops/attention.h"
#include "infiniop/ops/causal_softmax.h"
#include "infiniop/ops/clip.h"
#include "infiniop/ops/conv.h"
#include "infiniop/ops/dequantize_awq.h"
#include "infiniop/ops/fmod.h"
#include "infiniop/ops/gelu.h"
#include "infiniop/ops/gemm.h"
#include "infiniop/ops/layer_norm.h"
Expand Down
22 changes: 22 additions & 0 deletions include/infiniop/ops/adaptive_max_pool1d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#ifndef __INFINIOP_ADAPTIVE_MAX_POOL1D_H__
#define __INFINIOP_ADAPTIVE_MAX_POOL1D_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopAdaptiveMaxPool1dDescriptor_t;

__C __export infiniStatus_t infiniopCreateAdaptiveMaxPool1dDescriptor(
infiniopHandle_t handle,
infiniopAdaptiveMaxPool1dDescriptor_t *desc,
infiniopTensorDescriptor_t y_desc,
infiniopTensorDescriptor_t x_desc,
size_t output_size);

__C __export infiniStatus_t infiniopGetAdaptiveMaxPool1dWorkspaceSize(infiniopAdaptiveMaxPool1dDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopAdaptiveMaxPool1d(infiniopAdaptiveMaxPool1dDescriptor_t desc, void *workspace, size_t workspace_size,
void *y, const void *x, void *stream);

__C __export infiniStatus_t infiniopDestroyAdaptiveMaxPool1dDescriptor(infiniopAdaptiveMaxPool1dDescriptor_t desc);

#endif
24 changes: 24 additions & 0 deletions include/infiniop/ops/asinh.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef __INFINIOP_ASINH_API_H_
#define __INFINIOP_ASINH_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopAsinhDescriptor_t;

__C __export infiniStatus_t infiniopCreateAsinhDescriptor(infiniopHandle_t handle,
infiniopAsinhDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x);

__C __export infiniStatus_t infiniopGetAsinhWorkspaceSize(infiniopAsinhDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopAsinh(infiniopAsinhDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x,
void *stream);

__C __export infiniStatus_t infiniopDestroyAsinhDescriptor(infiniopAsinhDescriptor_t desc);

#endif
26 changes: 26 additions & 0 deletions include/infiniop/ops/fmod.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#ifndef __INFINIOP_FMOD_API_H_
#define __INFINIOP_FMOD_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopFmodDescriptor_t;

__C __export infiniStatus_t infiniopCreateFmodDescriptor(infiniopHandle_t handle,
infiniopFmodDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t c,
infiniopTensorDescriptor_t a,
infiniopTensorDescriptor_t b);

__C __export infiniStatus_t infiniopGetFmodWorkspaceSize(infiniopFmodDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopFmod(infiniopFmodDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *c,
const void *a,
const void *b,
void *stream);

__C __export infiniStatus_t infiniopDestroyFmodDescriptor(infiniopFmodDescriptor_t desc);

#endif
8 changes: 8 additions & 0 deletions python/infinicore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,14 @@
)
from infinicore.ops.add import add
from infinicore.ops.attention import attention
from infinicore.ops.asinh import asinh
from infinicore.ops.matmul import matmul
from infinicore.ops.mul import mul
from infinicore.ops.narrow import narrow
from infinicore.ops.rearrange import rearrange
from infinicore.ops.baddbmm import baddbmm
from infinicore.ops.bilinear import bilinear
from infinicore.ops.fmod import fmod
from infinicore.tensor import (
Tensor,
empty,
Expand Down Expand Up @@ -101,6 +105,10 @@
# Operations.
"add",
"attention",
"asinh",
"baddbmm",
"bilinear",
"fmod",
"matmul",
"mul",
"narrow",
Expand Down
2 changes: 2 additions & 0 deletions python/infinicore/nn/functional/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .adaptive_max_pool1d import adaptive_max_pool1d
from .causal_softmax import causal_softmax
from .embedding import embedding
from .linear import linear
Expand All @@ -8,6 +9,7 @@
from .swiglu import swiglu

__all__ = [
"adaptive_max_pool1d",
"causal_softmax",
"random_sample",
"rms_norm",
Expand Down
39 changes: 39 additions & 0 deletions python/infinicore/nn/functional/adaptive_max_pool1d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from typing import List

from infinicore.lib import _infinicore
from infinicore.tensor import Tensor


def adaptive_max_pool1d(
input: Tensor,
output_size: int,
*,
out=None,
) -> Tensor:
r"""Applies a 1D adaptive max pooling over an input signal composed of
several input planes.

The output size is H_out. The algorithm used is fairly simple:

.. math::
\text{start} = \left\lfloor \frac{i \cdot L_{in}}{L_{out}} \right\rfloor

\text{end} = \left\lceil \frac{(i + 1) \cdot L_{in}}{L_{out}} \right\rceil

where :math:`L_{in}` is the size of the input dimension, and :math:`L_{out}` is the size of the output dimension.

Args:
input (Tensor): Input tensor of shape (N, C, L_in)
output_size (int): The target output size (L_out)
out (Tensor, optional): Output tensor.

Returns:
Tensor: The result of the adaptive max pooling operation.
"""

if out is None:
return Tensor(_infinicore.adaptive_max_pool1d(input._underlying, output_size))

_infinicore.adaptive_max_pool1d_(out._underlying, input._underlying, output_size)

return out
11 changes: 11 additions & 0 deletions python/infinicore/ops/asinh.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor


def asinh(input, *, out=None):
if out is None:
return Tensor(_infinicore.asinh(input._underlying))

_infinicore.asinh_(out._underlying, input._underlying)

return out
25 changes: 25 additions & 0 deletions python/infinicore/ops/baddbmm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor


def baddbmm(input, batch1, batch2, *, beta=1.0, alpha=1.0, out=None):
if out is None:
return Tensor(
_infinicore.baddbmm(
input._underlying,
batch1._underlying,
batch2._underlying,
float(beta),
float(alpha),
)
)
_infinicore.baddbmm_(
out._underlying,
input._underlying,
batch1._underlying,
batch2._underlying,
float(beta),
float(alpha),
)

return out
23 changes: 23 additions & 0 deletions python/infinicore/ops/bilinear.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor


def bilinear(input1, input2, weight, bias=None, *, out=None):
if out is None:
return Tensor(
_infinicore.bilinear(
input1._underlying,
input2._underlying,
weight._underlying,
bias._underlying if bias is not None else None,
)
)
_infinicore.bilinear_(
out._underlying,
input1._underlying,
input2._underlying,
weight._underlying,
bias._underlying if bias is not None else None,
)

return out
11 changes: 11 additions & 0 deletions python/infinicore/ops/fmod.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor


def fmod(input, other, *, out=None):
if out is None:
return Tensor(_infinicore.fmod(input._underlying, other._underlying))

_infinicore.fmod_(out._underlying, input._underlying, other._underlying)

return out
30 changes: 30 additions & 0 deletions src/infinicore/ops/adaptive_max_pool1d/adaptive_max_pool1d.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#include "infinicore/ops/adaptive_max_pool1d.hpp"

#include "../../utils.hpp"

namespace infinicore::op {

common::OpDispatcher<AdaptiveMaxPool1d::schema> &AdaptiveMaxPool1d::dispatcher() {
static common::OpDispatcher<AdaptiveMaxPool1d::schema> dispatcher_;
return dispatcher_;
}

void AdaptiveMaxPool1d::execute(Tensor y, Tensor x, size_t output_size) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(y, x);
infinicore::context::setDevice(y->device());
dispatcher().lookup(y->device().getType())(y, x, output_size);
}

Tensor adaptive_max_pool1d(Tensor x, size_t output_size) {
infinicore::Shape y_shape = x->shape();
y_shape.back() = output_size;
auto y = Tensor::empty(y_shape, x->dtype(), x->device());
adaptive_max_pool1d_(y, x, output_size);
return y;
}

void adaptive_max_pool1d_(Tensor y, Tensor x, size_t output_size) {
AdaptiveMaxPool1d::execute(y, x, output_size);
}

} // namespace infinicore::op
Loading