Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 1 addition & 17 deletions fearless_simd/examples/play.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,26 +30,10 @@ fn foo<S: Simd>(simd: S, x: f32) -> f32 {
simd.splat_f32x4(x).sqrt()[0]
}

// currently requires `safe_wrappers` feature
fn do_something_on_neon(_level: Level) -> f32 {
#[cfg(all(feature = "safe_wrappers", target_arch = "aarch64"))]
if let Some(neon) = _level.as_neon() {
return neon.vectorize(
#[inline(always)]
|| {
let v = neon.neon.vdupq_n_f32(42.0);
neon.neon.vgetq_lane_f32::<0>(v)
},
);
}
0.0
}

fn main() {
let level = Level::new();
let x = level.dispatch(Foo);
let y = dispatch!(level, simd => foo(simd, 42.0));
let z = do_something_on_neon(level);

println!("level = {level:?}, x = {x}, y = {y}, z = {z}");
println!("level = {level:?}, x = {x}, y = {y}");
}
79 changes: 47 additions & 32 deletions fearless_simd/examples/srgb.rs
Original file line number Diff line number Diff line change
@@ -1,53 +1,68 @@
// Copyright 2024 the Fearless_SIMD Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT

#![expect(
missing_docs,
reason = "TODO: https://github.com/linebender/fearless_simd/issues/40"
)]
//! Converts a single RGBA pixel from linear RGB to sRGB.
//!
//! This example demonstrates the usual Fearless SIMD structure:
//!
//! - write the main computation as an `#[inline(always)]` function generic over
//! [`Simd`];
//! - use [`dispatch!`] at the non-SIMD boundary to run it with the best
//! available target features;
//! - drop down to [`kernel!`](fearless_simd::kernel) when a small part of the
//! computation needs a target-specific intrinsic.
//!
//! The RGB channels are converted with portable SIMD operations. The alpha
//! channel is copied unchanged, using an architecture-specific lane-copy
//! intrinsic if one is available and a scalar fallback otherwise.

use fearless_simd::{Level, dispatch, f32x4, prelude::*};

// This block shows how to use safe wrappers for compile-time enforcement
// of using valid SIMD intrinsics.
#[cfg(feature = "safe_wrappers")]
#[inline(always)]
fn copy_alpha<S: Simd>(a: f32x4<S>, b: f32x4<S>) -> f32x4<S> {
// #[cfg(target_arch = "x86_64")]
// if let Some(avx2) = a.simd.level().as_avx2() {
// return avx2
// .sse4_1
// ._mm_blend_ps::<8>(a.into(), b.into())
// .simd_into(a.simd);
// }
#[cfg(target_arch = "aarch64")]
if let Some(neon) = a.simd.level().as_neon() {
return neon
.neon
.vcopyq_laneq_f32::<3, 3>(a.into(), b.into())
.simd_into(a.simd);
#[cfg(target_arch = "aarch64")]
use core::arch::aarch64::{float32x4_t, vcopyq_laneq_f32};
#[cfg(target_arch = "x86")]
use core::arch::x86::{__m128, _mm_blend_ps};
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::{__m128, _mm_blend_ps};

fearless_simd::kernel! {
/// Copy the alpha lane on AArch64 using a NEON lane-copy intrinsic.
#[inline]
fn copy_alpha_neon(neon: Neon, a: float32x4_t, b: float32x4_t) -> float32x4_t {
vcopyq_laneq_f32::<3, 3>(a, b)
}
let mut result = a;
result[3] = b[3];
result
}

// This block lets the example compile without safe wrappers.
#[cfg(not(feature = "safe_wrappers"))]
fearless_simd::kernel! {
/// Copy the alpha lane on x86 using the SSE4.2 token to enable SSE4.1 blend instructions.
#[inline]
fn copy_alpha_sse4_2(sse4_2: Sse4_2, a: __m128, b: __m128) -> __m128 {
_mm_blend_ps::<8>(a, b)
}
}

/// Return `a` with its alpha channel replaced by `b`'s alpha channel.
///
/// This helper shows how portable SIMD code can opportunistically call
/// target-specific kernels while still providing a fallback for every backend.
#[inline(always)]
fn copy_alpha<S: Simd>(a: f32x4<S>, b: f32x4<S>) -> f32x4<S> {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if let Some(sse4_2) = a.simd.level().as_sse4_2() {
return copy_alpha_sse4_2(sse4_2, a.into(), b.into()).simd_into(a.simd);
}

#[cfg(target_arch = "aarch64")]
if let Some(_neon) = a.simd.level().as_neon() {
unsafe {
return core::arch::aarch64::vcopyq_laneq_f32::<3, 3>(a.into(), b.into())
.simd_into(a.simd);
}
if let Some(neon) = a.simd.level().as_neon() {
return copy_alpha_neon(neon, a.into(), b.into()).simd_into(a.simd);
}

let mut result = a;
result[3] = b[3];
result
}

/// Approximate the linear-RGB to sRGB transfer curve for RGB, preserving alpha.
#[inline(always)]
fn to_srgb<S: Simd>(simd: S, rgba: [f32; 4]) -> [f32; 4] {
let v: f32x4<S> = rgba.simd_into(simd);
Expand Down
Loading
Loading