Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
matrices.h
Go to the documentation of this file.
1/*
2 This file is part of darktable,
3 Copyright (C) 2010-2011 Henrik Andersson.
4 Copyright (C) 2010 johannes hanika.
5 Copyright (C) 2010 Pascal de Bruijn.
6 Copyright (C) 2012 Richard Wonka.
7 Copyright (C) 2013-2014 Jérémy Rosen.
8 Copyright (C) 2016 Tobias Ellinghaus.
9 Copyright (C) 2020 Pascal Obry.
10 Copyright (C) 2021 Ralf Brown.
11 Copyright (C) 2022 Martin Bařinka.
12 Copyright (C) 2025 Aurélien PIERRE.
13
14 darktable is free software: you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation, either version 3 of the License, or
17 (at your option) any later version.
18
19 darktable is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with darktable. If not, see <http://www.gnu.org/licenses/>.
26*/
27
28#pragma once
29
30#include "common/math.h"
31
32// a 3x3 matrix, padded to permit SSE instructions to be used for multiplication and addition
34
36static inline int mat3SSEinv(dt_colormatrix_t dst, const dt_colormatrix_t src)
37{
38#define A(y, x) src[(y - 1)][(x - 1)]
39#define B(y, x) dst[(y - 1)][(x - 1)]
40
41 const float det = A(1, 1) * (A(3, 3) * A(2, 2) - A(3, 2) * A(2, 3))
42 - A(2, 1) * (A(3, 3) * A(1, 2) - A(3, 2) * A(1, 3))
43 + A(3, 1) * (A(2, 3) * A(1, 2) - A(2, 2) * A(1, 3));
44
45 const float epsilon = 1e-7f;
46 if(fabsf(det) < epsilon) return 1;
47
48 const float invDet = 1.f / det;
49
50 B(1, 1) = invDet * (A(3, 3) * A(2, 2) - A(3, 2) * A(2, 3));
51 B(1, 2) = -invDet * (A(3, 3) * A(1, 2) - A(3, 2) * A(1, 3));
52 B(1, 3) = invDet * (A(2, 3) * A(1, 2) - A(2, 2) * A(1, 3));
53
54 B(2, 1) = -invDet * (A(3, 3) * A(2, 1) - A(3, 1) * A(2, 3));
55 B(2, 2) = invDet * (A(3, 3) * A(1, 1) - A(3, 1) * A(1, 3));
56 B(2, 3) = -invDet * (A(2, 3) * A(1, 1) - A(2, 1) * A(1, 3));
57
58 B(3, 1) = invDet * (A(3, 2) * A(2, 1) - A(3, 1) * A(2, 2));
59 B(3, 2) = -invDet * (A(3, 2) * A(1, 1) - A(3, 1) * A(1, 2));
60 B(3, 3) = invDet * (A(2, 2) * A(1, 1) - A(2, 1) * A(1, 2));
61#undef A
62#undef B
63 return 0;
64}
65
66
67// transpose a padded 3x3 matrix
68static inline void transpose_3xSSE(const dt_colormatrix_t input, dt_colormatrix_t output)
69{
70 output[0][0] = input[0][0];
71 output[0][1] = input[1][0];
72 output[0][2] = input[2][0];
73 output[0][3] = 0.0f;
74
75 output[1][0] = input[0][1];
76 output[1][1] = input[1][1];
77 output[1][2] = input[2][1];
78 output[1][3] = 0.0f;
79
80 output[2][0] = input[0][2];
81 output[2][1] = input[1][2];
82 output[2][2] = input[2][2];
83 output[2][3] = 0.0f;
84
85 for_four_channels(c, aligned(output))
86 output[3][c] = 0.0f;
87}
88
89
90// transpose and pad a 3x3 matrix into the padded format optimized for vectorization
91static inline void transpose_3x3_to_3xSSE(const float input[9], dt_colormatrix_t output)
92{
93 output[0][0] = input[0];
94 output[0][1] = input[3];
95 output[0][2] = input[6];
96 output[0][3] = 0.0f;
97
98 output[1][0] = input[1];
99 output[1][1] = input[4];
100 output[1][2] = input[7];
101 output[1][3] = 0.0f;
102
103 output[2][0] = input[2];
104 output[2][1] = input[5];
105 output[2][2] = input[8];
106 output[2][3] = 0.0f;
107
108 for_four_channels(c, aligned(output))
109 output[3][c] = 0.0f;
110}
111
112// convert a 3x3 matrix into the padded format optimized for vectorization
113static inline void repack_double3x3_to_3xSSE(const double input[9], dt_colormatrix_t output)
114{
115 output[0][0] = input[0];
116 output[0][1] = input[1];
117 output[0][2] = input[2];
118 output[0][3] = 0.0f;
119
120 output[1][0] = input[3];
121 output[1][1] = input[4];
122 output[1][2] = input[5];
123 output[1][3] = 0.0f;
124
125 output[2][0] = input[6];
126 output[2][1] = input[7];
127 output[2][2] = input[8];
128 output[2][3] = 0.0f;
129
130 for(size_t c = 0; c < 4; c++)
131 output[3][c] = 0.0f;
132}
133
134// convert a 3x3 matrix into the padded format optimized for vectorization
135static inline void pack_3xSSE_to_3x3(const dt_colormatrix_t input, float output[9])
136{
137 output[0] = input[0][0];
138 output[1] = input[0][1];
139 output[2] = input[0][2];
140 output[3] = input[1][0];
141 output[4] = input[1][1];
142 output[5] = input[1][2];
143 output[6] = input[2][0];
144 output[7] = input[2][1];
145 output[8] = input[2][2];
146}
147
148// convert a 3x3 matrix into 3 padded float4 rows: [m00 m01 m02 0, ...]
149static inline void pack_3xSSE_to_3x4(const dt_colormatrix_t input, float output[12])
150{
151 output[0] = input[0][0];
152 output[1] = input[0][1];
153 output[2] = input[0][2];
154 output[3] = 0.0f;
155 output[4] = input[1][0];
156 output[5] = input[1][1];
157 output[6] = input[1][2];
158 output[7] = 0.0f;
159 output[8] = input[2][0];
160 output[9] = input[2][1];
161 output[10] = input[2][2];
162 output[11] = 0.0f;
163}
164
165// vectorized multiplication of padded 3x3 matrices
166static inline void dt_colormatrix_mul(dt_colormatrix_t dst, const dt_colormatrix_t m1, const dt_colormatrix_t m2)
167{
168 for(int k = 0; k < 3; ++k)
169 {
170 dt_aligned_pixel_t sum = { 0.0f };
172 {
173 for(int j = 0; j < 3; j++)
174 sum[i] += m1[k][j] * m2[j][i];
175 dst[k][i] = sum[i];
176 }
177 }
178}
179
180// multiply two padded 3x3 matrices
181// dest needs to be different from m1 and m2
182// dest = m1 * m2 in this order
183// TODO: see if that refactors with the previous
184#ifdef _OPENMP
185#pragma omp declare simd
186#endif
187static inline void mat3SSEmul(dt_colormatrix_t dest, const dt_colormatrix_t m1, const dt_colormatrix_t m2)
188{
189 for(int k = 0; k < 3; k++)
190 {
191 for(int i = 0; i < 3; i++)
192 {
193 float x = 0.0f;
194 for(int j = 0; j < 3; j++)
195 x += m1[k][j] * m2[j][i];
196 dest[k][i] = x;
197 }
198 }
199}
200
201#ifdef _OPENMP
202#pragma omp declare simd uniform(M) aligned(M:64) aligned(v_in, v_out:16)
203#endif
204static inline void dot_product(const dt_aligned_pixel_t v_in, const dt_colormatrix_t M, dt_aligned_pixel_t v_out)
205{
206 // specialized 3x4 dot products of 4x1 RGB-alpha pixels
207 #ifdef _OPENMP
208 #pragma omp simd aligned(M:64) aligned(v_in, v_out:16)
209 #endif
210 for(size_t i = 0; i < 3; ++i) v_out[i] = scalar_product(v_in, M[i]);
211}
212
213
214
215// clang-format off
216// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
217// vim: shiftwidth=2 expandtab tabstop=2 cindent
218// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
219// clang-format on
const float i
Definition colorspaces_inline_conversions.h:669
const float c
Definition colorspaces_inline_conversions.h:1365
static const dt_colormatrix_t M
Definition colorspaces_inline_conversions.h:933
#define DT_ALIGNED_ARRAY
Definition darktable.h:312
#define for_each_channel(_var,...)
Definition darktable.h:582
#define for_four_channels(_var,...)
Definition darktable.h:584
static const float x
Definition iop_profile.h:239
static float scalar_product(const dt_aligned_pixel_t v_1, const dt_aligned_pixel_t v_2)
Definition math.h:204
#define B(y, x)
#define A(y, x)
static void transpose_3x3_to_3xSSE(const float input[9], dt_colormatrix_t output)
Definition matrices.h:91
float DT_ALIGNED_ARRAY dt_colormatrix_t[4][4]
Definition matrices.h:33
static void mat3SSEmul(dt_colormatrix_t dest, const dt_colormatrix_t m1, const dt_colormatrix_t m2)
Definition matrices.h:187
static void transpose_3xSSE(const dt_colormatrix_t input, dt_colormatrix_t output)
Definition matrices.h:68
static void pack_3xSSE_to_3x4(const dt_colormatrix_t input, float output[12])
Definition matrices.h:149
static void dt_colormatrix_mul(dt_colormatrix_t dst, const dt_colormatrix_t m1, const dt_colormatrix_t m2)
Definition matrices.h:166
static int mat3SSEinv(dt_colormatrix_t dst, const dt_colormatrix_t src)
Definition matrices.h:36
static void repack_double3x3_to_3xSSE(const double input[9], dt_colormatrix_t output)
Definition matrices.h:113
static void pack_3xSSE_to_3x3(const dt_colormatrix_t input, float output[9])
Definition matrices.h:135
static void dot_product(const dt_aligned_pixel_t v_in, const dt_colormatrix_t M, dt_aligned_pixel_t v_out)
Definition matrices.h:204