Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
sse.h
Go to the documentation of this file.
1/*
2 This file is part of darktable,
3 Copyright (C) 2017 Tobias Ellinghaus.
4 Copyright (C) 2018-2019, 2025 Aurélien PIERRE.
5 Copyright (C) 2019-2020 Pascal Obry.
6 Copyright (C) 2022 Martin Bařinka.
7
8 darktable is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 darktable is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with darktable. If not, see <http://www.gnu.org/licenses/>.
20*/
21#pragma once
22
23#include <xmmintrin.h>
24
25
30#define POLY0(x, c0) _mm_set1_ps(c0)
31#define POLY1(x, c0, c1) _mm_add_ps(_mm_mul_ps(POLY0(x, c1), x), _mm_set1_ps(c0))
32#define POLY2(x, c0, c1, c2) _mm_add_ps(_mm_mul_ps(POLY1(x, c1, c2), x), _mm_set1_ps(c0))
33#define POLY3(x, c0, c1, c2, c3) _mm_add_ps(_mm_mul_ps(POLY2(x, c1, c2, c3), x), _mm_set1_ps(c0))
34#define POLY4(x, c0, c1, c2, c3, c4) _mm_add_ps(_mm_mul_ps(POLY3(x, c1, c2, c3, c4), x), _mm_set1_ps(c0))
35#define POLY5(x, c0, c1, c2, c3, c4, c5) _mm_add_ps(_mm_mul_ps(POLY4(x, c1, c2, c3, c4, c5), x), _mm_set1_ps(c0))
36
37#define EXP_POLY_DEGREE 4
38#define LOG_POLY_DEGREE 5
39
43static inline __m128 _mm_exp2_ps(__m128 x)
44{
45 __m128i ipart;
46 __m128 fpart, expipart, expfpart;
47
48 x = _mm_min_ps(x, _mm_set1_ps(129.00000f));
49 x = _mm_max_ps(x, _mm_set1_ps(-126.99999f));
50
51 /* ipart = int(x - 0.5) */
52 ipart = _mm_cvtps_epi32(_mm_sub_ps(x, _mm_set1_ps(0.5f)));
53
54 /* fpart = x - ipart */
55 fpart = _mm_sub_ps(x, _mm_cvtepi32_ps(ipart));
56
57 /* expipart = (float) (1 << ipart) */
58 expipart = _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(ipart, _mm_set1_epi32(127)), 23));
59
60/* minimax polynomial fit of 2**x, in range [-0.5, 0.5[ */
61#if EXP_POLY_DEGREE == 5
62 expfpart
63 = POLY5(fpart, 9.9999994e-1f, 6.9315308e-1f, 2.4015361e-1f, 5.5826318e-2f, 8.9893397e-3f, 1.8775767e-3f);
64#elif EXP_POLY_DEGREE == 4
65 expfpart = POLY4(fpart, 1.0000026f, 6.9300383e-1f, 2.4144275e-1f, 5.2011464e-2f, 1.3534167e-2f);
66#elif EXP_POLY_DEGREE == 3
67 expfpart = POLY3(fpart, 9.9992520e-1f, 6.9583356e-1f, 2.2606716e-1f, 7.8024521e-2f);
68#elif EXP_POLY_DEGREE == 2
69 expfpart = POLY2(fpart, 1.0017247f, 6.5763628e-1f, 3.3718944e-1f);
70#else
71#error
72#endif
73
74 return _mm_mul_ps(expipart, expfpart);
75}
76
77
81static inline __m128 _mm_log2_ps(__m128 x)
82{
83 __m128i expmask = _mm_set1_epi32(0x7f800000);
84 __m128i mantmask = _mm_set1_epi32(0x007fffff);
85 __m128 one = _mm_set1_ps(1.0f);
86
87 __m128i i = _mm_castps_si128(x);
88
89 /* exp = (float) exponent(x) */
90 __m128 exp = _mm_cvtepi32_ps(_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(i, expmask), 23), _mm_set1_epi32(127)));
91
92 /* mant = (float) mantissa(x) */
93 __m128 mant = _mm_or_ps(_mm_castsi128_ps(_mm_and_si128(i, mantmask)), one);
94
95 __m128 logmant;
96
97/* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[
98 * These coefficients can be generate with
99 * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html
100 */
101#if LOG_POLY_DEGREE == 6
102 logmant = POLY5(mant, 3.11578814719469302614f, -3.32419399085241980044f, 2.59883907202499966007f,
103 -1.23152682416275988241f, 0.318212422185251071475f, -0.0344359067839062357313f);
104#elif LOG_POLY_DEGREE == 5
105 logmant = POLY4(mant, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f,
106 -0.465725644288844778798f, 0.0596515482674574969533f);
107#elif LOG_POLY_DEGREE == 4
108 logmant = POLY3(mant, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f,
109 -0.107254423828329604454f);
110#elif LOG_POLY_DEGREE == 3
111 logmant = POLY2(mant, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f);
112#else
113#error
114#endif
115
116 /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
117 logmant = _mm_mul_ps(logmant, _mm_sub_ps(mant, one));
118
119 return _mm_add_ps(logmant, exp);
120}
121
122static inline __m128 _mm_pow_ps(__m128 x, __m128 y)
123{
124 return _mm_exp2_ps(_mm_mul_ps(_mm_log2_ps(x), y));
125}
126
127static inline __m128 _mm_pow_ps1(__m128 x, float y)
128{
129 return _mm_exp2_ps(_mm_mul_ps(_mm_log2_ps(x), _mm_set1_ps(y)));
130}
131
132
137static inline float _mm_vectorGetByIndex( __m128 V, unsigned int i)
138{
139 union {
140 __m128 v;
141 float a[4];
142 } converter;
143
144 converter.v = V;
145 return converter.a[i];
146}
147
148// clang-format off
149// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
150// vim: shiftwidth=2 expandtab tabstop=2 cindent
151// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
152// clang-format on
const float i
Definition colorspaces_inline_conversions.h:669
const float V
Definition colorspaces_inline_conversions.h:772
const float a
Definition colorspaces_inline_conversions.h:1292
static const float x
Definition iop_profile.h:239
static const float v
Definition iop_profile.h:223
#define POLY5(x, c0, c1, c2, c3, c4, c5)
Definition sse.h:35
static __m128 _mm_log2_ps(__m128 x)
Definition sse.h:81
#define POLY3(x, c0, c1, c2, c3)
Definition sse.h:33
static __m128 _mm_pow_ps1(__m128 x, float y)
Definition sse.h:127
static __m128 _mm_exp2_ps(__m128 x)
Definition sse.h:43
#define POLY2(x, c0, c1, c2)
Definition sse.h:32
#define POLY4(x, c0, c1, c2, c3, c4)
Definition sse.h:34
static float _mm_vectorGetByIndex(__m128 V, unsigned int i)
Definition sse.h:137
static __m128 _mm_pow_ps(__m128 x, __m128 y)
Definition sse.h:122