Grok 10.0.5
skeleton-inl.h
Go to the documentation of this file.
1// Copyright 2020 Google LLC
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16// Demo of functions that might be called from multiple SIMD modules (either
17// other -inl.h files, or a .cc file between begin/end_target-inl). This is
18// optional - all SIMD code can reside in .cc files. However, this allows
19// splitting code into different files while still inlining instead of requiring
20// calling through function pointers.
21
22// Per-target include guard. This is only required when using dynamic dispatch,
23// i.e. including foreach_target.h. For static dispatch, a normal include
24// guard would be fine because the header is only compiled once.
25#if defined(HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_) == defined(HWY_TARGET_TOGGLE)
26#ifdef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
27#undef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
28#else
29#define HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
30#endif
31
32// It is fine to #include normal or *-inl headers.
33#include <stddef.h>
34
35#include "hwy/highway.h"
36
38namespace skeleton {
39namespace HWY_NAMESPACE {
40
41// Highway ops reside here; ADL does not find templates nor builtins.
42namespace hn = hwy::HWY_NAMESPACE;
43
44// Example of a type-agnostic (caller-specified lane type) and width-agnostic
45// (uses best available instruction set) function in a header.
46//
47// Computes x[i] = mul_array[i] * x_array[i] + add_array[i] for i < size.
48template <class D, typename T>
49HWY_MAYBE_UNUSED void MulAddLoop(const D d, const T* HWY_RESTRICT mul_array,
50 const T* HWY_RESTRICT add_array,
51 const size_t size, T* HWY_RESTRICT x_array) {
52 for (size_t i = 0; i < size; i += hn::Lanes(d)) {
53 const auto mul = hn::Load(d, mul_array + i);
54 const auto add = hn::Load(d, add_array + i);
55 auto x = hn::Load(d, x_array + i);
56 x = hn::MulAdd(mul, x, add);
57 hn::Store(x, d, x_array + i);
58 }
59}
60
61// NOLINTNEXTLINE(google-readability-namespace-comments)
62} // namespace HWY_NAMESPACE
63} // namespace skeleton
65
66#endif // include guard
#define HWY_RESTRICT
Definition base.h:64
#define HWY_MAYBE_UNUSED
Definition base.h:82
Definition copy-inl.h:29
HWY_API Vec128< float, N > MulAdd(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > add)
Definition arm_neon-inl.h:1799
HWY_API constexpr size_t Lanes(Simd< T, N, kPow2 >)
Definition arm_sve-inl.h:243
HWY_API Vec128< T, N > Load(Simd< T, N, 0 > d, const T *HWY_RESTRICT p)
Definition arm_neon-inl.h:2753
HWY_API void Store(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT aligned)
Definition arm_neon-inl.h:2934
HWY_MAYBE_UNUSED void MulAddLoop(const D d, const T *HWY_RESTRICT mul_array, const T *HWY_RESTRICT add_array, const size_t size, T *HWY_RESTRICT x_array)
Definition skeleton-inl.h:49
Definition skeleton-inl.h:38
#define HWY_NAMESPACE
Definition set_macros-inl.h:82
HWY_AFTER_NAMESPACE()
HWY_BEFORE_NAMESPACE()