Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions include/infinicore/ops/logsumexp.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class LogSumExp {
public:
using schema = void (*)(Tensor, int, bool, Tensor);
static void execute(Tensor input, int dim, bool keepdim, Tensor output);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor logsumexp(Tensor input, int dim, bool keepdim);
void logsumexp_(Tensor input, int dim, bool keepdim, Tensor output);
} // namespace infinicore::op
16 changes: 16 additions & 0 deletions include/infinicore/ops/lp_pool1d.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class Lp_Pool1d {
public:
using schema = void (*)(Tensor, Tensor, float, size_t, size_t, bool);
static void execute(Tensor output, Tensor input, float norm_type, size_t kernel_size, size_t stride, bool ceil_mode);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor lp_pool1d(Tensor input, float norm_type, size_t kernel_size, size_t stride, bool ceil_mode);
void lp_pool1d_(Tensor output, Tensor input, float norm_type, size_t kernel_size, size_t stride, bool ceil_mode);
} // namespace infinicore::op
17 changes: 17 additions & 0 deletions include/infinicore/ops/lp_pool2d.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"
#include <tuple>

namespace infinicore::op {
class Lp_Pool2d {
public:
using schema = void (*)(Tensor, Tensor, float, const std::tuple<size_t, size_t>, const std::tuple<size_t, size_t>, bool);
static void execute(Tensor output, Tensor input, float norm_type, const std::tuple<size_t, size_t> kernel_size, const std::tuple<size_t, size_t> stride, bool ceil_mode);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor lp_pool2d(Tensor input, float norm_type, const std::tuple<size_t, size_t> kernel_size, const std::tuple<size_t, size_t> stride, bool ceil_mode);
void lp_pool2d_(Tensor output, Tensor input, float norm_type, const std::tuple<size_t, size_t> kernel_size, const std::tuple<size_t, size_t> stride, bool ceil_mode);
} // namespace infinicore::op
17 changes: 17 additions & 0 deletions include/infinicore/ops/lp_pool3d.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"
#include <tuple>

namespace infinicore::op {
class Lp_Pool3d {
public:
using schema = void (*)(Tensor, Tensor, float, const std::tuple<size_t, size_t, size_t>, const std::tuple<size_t, size_t, size_t>, bool);
static void execute(Tensor output, Tensor input, float norm_type, const std::tuple<size_t, size_t, size_t> kernel_size, const std::tuple<size_t, size_t, size_t> stride, bool ceil_mode);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor lp_pool3d(Tensor input, float norm_type, const std::tuple<size_t, size_t, size_t> kernel_size, const std::tuple<size_t, size_t, size_t> stride, bool ceil_mode);
void lp_pool3d_(Tensor output, Tensor input, float norm_type, const std::tuple<size_t, size_t, size_t> kernel_size, const std::tuple<size_t, size_t, size_t> stride, bool ceil_mode);
} // namespace infinicore::op
18 changes: 18 additions & 0 deletions include/infinicore/ops/max_global.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {

class MaxGlobal {
public:
using schema = void (*)(Tensor, Tensor);
static void execute(Tensor input, Tensor output);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor max_global(Tensor input);
void max_global_(Tensor input, Tensor output);

} // namespace infinicore::op
19 changes: 19 additions & 0 deletions include/infinicore/ops/max_reduce.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"
#include <tuple>

namespace infinicore::op {

class MaxReduce {
public:
using schema = void (*)(Tensor, Tensor, Tensor, int, bool);
static void execute(Tensor input, Tensor output, Tensor indices, int dim, bool keepdim);
static common::OpDispatcher<schema> &dispatcher();
};

std::tuple<Tensor, Tensor> max_reduce(Tensor input, int dim, bool keepdim);
void max_reduce_(Tensor input, Tensor output, Tensor indices, int dim, bool keepdim);

} // namespace infinicore::op
4 changes: 4 additions & 0 deletions python/infinicore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
from infinicore.ops.mul import mul
from infinicore.ops.narrow import narrow
from infinicore.ops.rearrange import rearrange
from infinicore.ops.logsumexp import logsumexp
from infinicore.ops.max import max
from infinicore.tensor import (
Tensor,
empty,
Expand Down Expand Up @@ -115,6 +117,8 @@
"strided_empty",
"strided_from_blob",
"zeros",
"logsumexp",
"max",
]

use_ntops = False
Expand Down
6 changes: 6 additions & 0 deletions python/infinicore/nn/functional/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
from .rope import RopeAlgo, rope
from .silu import silu
from .swiglu import swiglu
from .lp_pool1d import lp_pool1d
from .lp_pool2d import lp_pool2d
from .lp_pool3d import lp_pool3d

__all__ = [
"causal_softmax",
Expand All @@ -17,4 +20,7 @@
"embedding",
"rope",
"RopeAlgo",
"lp_pool1d",
"lp_pool2d",
"lp_pool3d",
]
27 changes: 27 additions & 0 deletions python/infinicore/nn/functional/lp_pool1d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import infinicore
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor


def lp_pool1d(
input: Tensor,
norm_type: float,
kernel_size: int,
stride: int | None = None,
ceil_mode: bool = False,
):
r"""Applies a 1D power-average pooling over an input signal composed of several input planes."""

if stride is None:
stride = kernel_size

if infinicore.use_ntops and input.device.type in ("cuda", "musa"):
return infinicore.ntops.torch.lp_pool1d(
input, norm_type, kernel_size, stride, ceil_mode
)

return Tensor(
_infinicore.lp_pool1d(
input._underlying, norm_type, kernel_size, stride, ceil_mode
)
)
32 changes: 32 additions & 0 deletions python/infinicore/nn/functional/lp_pool2d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import infinicore
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor


def lp_pool2d(
input: Tensor,
norm_type: float,
kernel_size: int | tuple[int, int],
stride: int | tuple[int, int] | None = None,
ceil_mode: bool = False,
):
r"""Applies a 2D power-average pooling over an input signal composed of several input planes."""
if isinstance(kernel_size, int):
kernel_size = (kernel_size, kernel_size)

if isinstance(stride, int):
stride = (stride, stride)

if stride is None:
stride = kernel_size

if infinicore.use_ntops and input.device.type in ("cuda", "musa"):
return infinicore.ntops.torch.lp_pool2d(
input, norm_type, kernel_size, stride, ceil_mode
)

return Tensor(
_infinicore.lp_pool2d(
input._underlying, norm_type, kernel_size, stride, ceil_mode
)
)
32 changes: 32 additions & 0 deletions python/infinicore/nn/functional/lp_pool3d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import infinicore
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor


def lp_pool3d(
input: Tensor,
norm_type: float,
kernel_size: int | tuple[int, int, int],
stride: int | tuple[int, int, int] | None = None,
ceil_mode: bool = False,
):
r"""Applies a 3D power-average pooling over an input signal composed of several input planes."""
if isinstance(kernel_size, int):
kernel_size = (kernel_size, kernel_size, kernel_size)

if isinstance(stride, int):
stride = (stride, stride, stride)

if stride is None:
stride = kernel_size

if infinicore.use_ntops and input.device.type in ("cuda", "musa"):
return infinicore.ntops.torch.lp_pool3d(
input, norm_type, kernel_size, stride, ceil_mode
)

return Tensor(
_infinicore.lp_pool3d(
input._underlying, norm_type, kernel_size, stride, ceil_mode
)
)
19 changes: 19 additions & 0 deletions python/infinicore/ops/logsumexp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import infinicore
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor


def logsumexp(
input: Tensor, dim: int | None = None, keepdim=False, *, out=None
) -> Tensor:
r"""Apply the logsumexp function."""

if infinicore.use_ntops and input.device.type in ("cuda", "musa"):
return infinicore.ntops.torch.logsumexp(input, dim, keepdim=keepdim, out=out)

if out is None:
return Tensor(_infinicore.logsumexp(input._underlying, dim, keepdim))

_infinicore.logsumexp_(input._underlying, dim, keepdim, out._underlying)

return out
33 changes: 33 additions & 0 deletions python/infinicore/ops/max.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import infinicore
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor


def max(
input: Tensor, dim: int | None = None, keepdim=False, *, out=None
) -> Tensor | tuple[Tensor, Tensor]:
r"""Apply the max function."""

if dim is None:
if infinicore.use_ntops and input.device.type in ("cuda", "musa"):
return infinicore.ntops.torch.max(input, out=out)

if out is None:
return Tensor(_infinicore.max_global(input._underlying))

_infinicore.max_global_(input._underlying, out._underlying)

return out
else:
if infinicore.use_ntops and input.device.type in ("cuda", "musa"):
return infinicore.ntops.torch.max(input, dim, keepdim=keepdim, out=out)

if out is None:
res, res_idx = _infinicore.max_reduce(input._underlying, dim, keepdim)
return Tensor(res), Tensor(res_idx)

_infinicore.max_reduce_(
input._underlying, out[0]._underlying, out[1]._underlying, dim, keepdim
)

return out
54 changes: 54 additions & 0 deletions src/infinicore/ops/logsumexp/logsumexp.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#include "infinicore/ops/logsumexp.hpp"
#include <iostream>
#include <stdexcept>

namespace infinicore::op {

common::OpDispatcher<LogSumExp::schema> &LogSumExp::dispatcher() {
static common::OpDispatcher<LogSumExp::schema> dispatcher_;
return dispatcher_;
};

void LogSumExp::execute(Tensor input, int dim, bool keepdim, Tensor output) {
infinicore::context::setDevice(input->device(), true);
auto device_type = context::getDevice().getType();
auto func = dispatcher().lookup(device_type);

if (func == nullptr) {
throw std::runtime_error("No LogSumExp implementation found for device type: " + std::to_string(static_cast<int>(device_type)));
}

func(input, dim, keepdim, output);
}

Tensor logsumexp(Tensor input, int dim, bool keepdim) {
// 规范化 dim
int normalized_dim = dim;
if (normalized_dim < 0) {
normalized_dim = input->ndim() + normalized_dim;
}

// 计算输出形状
Shape output_shape;
const auto &input_shape = input->shape();

if (keepdim) {
output_shape = input_shape;
output_shape[normalized_dim] = 1;
} else {
for (int i = 0; i < static_cast<int>(input_shape.size()); ++i) {
if (i != normalized_dim) {
output_shape.push_back(input_shape[i]);
}
}
}

auto output = Tensor::empty(output_shape, input->dtype(), input->device());
logsumexp_(input, dim, keepdim, output);
return output;
}

void logsumexp_(Tensor input, int dim, bool keepdim, Tensor output) {
LogSumExp::execute(input, dim, keepdim, output);
}
} // namespace infinicore::op
Loading