Ansel 0.0
A darktable fork - bloat + design vision
Loading...
Searching...
No Matches
matrices.h
Go to the documentation of this file.
1/*
2 This file is part of darktable,
3 Copyright (C) 2021 darktable developers.
4
5 darktable is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 darktable is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with darktable. If not, see <http://www.gnu.org/licenses/>.
17*/
18
19#pragma once
20
21#include "common/math.h"
22
23// a 3x3 matrix, padded to permit SSE instructions to be used for multiplication and addition
25
27static inline int mat3SSEinv(dt_colormatrix_t dst, const dt_colormatrix_t src)
28{
29#define A(y, x) src[(y - 1)][(x - 1)]
30#define B(y, x) dst[(y - 1)][(x - 1)]
31
32 const float det = A(1, 1) * (A(3, 3) * A(2, 2) - A(3, 2) * A(2, 3))
33 - A(2, 1) * (A(3, 3) * A(1, 2) - A(3, 2) * A(1, 3))
34 + A(3, 1) * (A(2, 3) * A(1, 2) - A(2, 2) * A(1, 3));
35
36 const float epsilon = 1e-7f;
37 if(fabsf(det) < epsilon) return 1;
38
39 const float invDet = 1.f / det;
40
41 B(1, 1) = invDet * (A(3, 3) * A(2, 2) - A(3, 2) * A(2, 3));
42 B(1, 2) = -invDet * (A(3, 3) * A(1, 2) - A(3, 2) * A(1, 3));
43 B(1, 3) = invDet * (A(2, 3) * A(1, 2) - A(2, 2) * A(1, 3));
44
45 B(2, 1) = -invDet * (A(3, 3) * A(2, 1) - A(3, 1) * A(2, 3));
46 B(2, 2) = invDet * (A(3, 3) * A(1, 1) - A(3, 1) * A(1, 3));
47 B(2, 3) = -invDet * (A(2, 3) * A(1, 1) - A(2, 1) * A(1, 3));
48
49 B(3, 1) = invDet * (A(3, 2) * A(2, 1) - A(3, 1) * A(2, 2));
50 B(3, 2) = -invDet * (A(3, 2) * A(1, 1) - A(3, 1) * A(1, 2));
51 B(3, 3) = invDet * (A(2, 2) * A(1, 1) - A(2, 1) * A(1, 2));
52#undef A
53#undef B
54 return 0;
55}
56
57
58// transpose a padded 3x3 matrix
59static inline void transpose_3xSSE(const dt_colormatrix_t input, dt_colormatrix_t output)
60{
61 output[0][0] = input[0][0];
62 output[0][1] = input[1][0];
63 output[0][2] = input[2][0];
64 output[0][3] = 0.0f;
65
66 output[1][0] = input[0][1];
67 output[1][1] = input[1][1];
68 output[1][2] = input[2][1];
69 output[1][3] = 0.0f;
70
71 output[2][0] = input[0][2];
72 output[2][1] = input[1][2];
73 output[2][2] = input[2][2];
74 output[2][3] = 0.0f;
75
76 for_four_channels(c, aligned(output))
77 output[3][c] = 0.0f;
78}
79
80
81// transpose and pad a 3x3 matrix into the padded format optimized for vectorization
82static inline void transpose_3x3_to_3xSSE(const float input[9], dt_colormatrix_t output)
83{
84 output[0][0] = input[0];
85 output[0][1] = input[3];
86 output[0][2] = input[6];
87 output[0][3] = 0.0f;
88
89 output[1][0] = input[1];
90 output[1][1] = input[4];
91 output[1][2] = input[7];
92 output[1][3] = 0.0f;
93
94 output[2][0] = input[2];
95 output[2][1] = input[5];
96 output[2][2] = input[8];
97 output[2][3] = 0.0f;
98
99 for_four_channels(c, aligned(output))
100 output[3][c] = 0.0f;
101}
102
103// convert a 3x3 matrix into the padded format optimized for vectorization
104static inline void repack_double3x3_to_3xSSE(const double input[9], dt_colormatrix_t output)
105{
106 output[0][0] = input[0];
107 output[0][1] = input[1];
108 output[0][2] = input[2];
109 output[0][3] = 0.0f;
110
111 output[1][0] = input[3];
112 output[1][1] = input[4];
113 output[1][2] = input[5];
114 output[1][3] = 0.0f;
115
116 output[2][0] = input[6];
117 output[2][1] = input[7];
118 output[2][2] = input[8];
119 output[2][3] = 0.0f;
120
121 for(size_t c = 0; c < 4; c++)
122 output[3][c] = 0.0f;
123}
124
125// convert a 3x3 matrix into the padded format optimized for vectorization
126static inline void pack_3xSSE_to_3x3(const dt_colormatrix_t input, float output[9])
127{
128 output[0] = input[0][0];
129 output[1] = input[0][1];
130 output[2] = input[0][2];
131 output[3] = input[1][0];
132 output[4] = input[1][1];
133 output[5] = input[1][2];
134 output[6] = input[2][0];
135 output[7] = input[2][1];
136 output[8] = input[2][2];
137}
138
139// vectorized multiplication of padded 3x3 matrices
140static inline void dt_colormatrix_mul(dt_colormatrix_t dst, const dt_colormatrix_t m1, const dt_colormatrix_t m2)
141{
142 for(int k = 0; k < 3; ++k)
143 {
144 dt_aligned_pixel_t sum = { 0.0f };
146 {
147 for(int j = 0; j < 3; j++)
148 sum[i] += m1[k][j] * m2[j][i];
149 dst[k][i] = sum[i];
150 }
151 }
152}
153
154// multiply two padded 3x3 matrices
155// dest needs to be different from m1 and m2
156// dest = m1 * m2 in this order
157// TODO: see if that refactors with the previous
158#ifdef _OPENMP
159#pragma omp declare simd
160#endif
161static inline void mat3SSEmul(dt_colormatrix_t dest, const dt_colormatrix_t m1, const dt_colormatrix_t m2)
162{
163 for(int k = 0; k < 3; k++)
164 {
165 for(int i = 0; i < 3; i++)
166 {
167 float x = 0.0f;
168 for(int j = 0; j < 3; j++)
169 x += m1[k][j] * m2[j][i];
170 dest[k][i] = x;
171 }
172 }
173}
174
175#ifdef _OPENMP
176#pragma omp declare simd uniform(M) aligned(M:64) aligned(v_in, v_out:16)
177#endif
178static inline void dot_product(const dt_aligned_pixel_t v_in, const dt_colormatrix_t M, dt_aligned_pixel_t v_out)
179{
180 // specialized 3x4 dot products of 4x1 RGB-alpha pixels
181 #ifdef _OPENMP
182 #pragma omp simd aligned(M:64) aligned(v_in, v_out:16)
183 #endif
184 for(size_t i = 0; i < 3; ++i) v_out[i] = scalar_product(v_in, M[i]);
185}
186
187
188
189// clang-format off
190// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
191// vim: shiftwidth=2 expandtab tabstop=2 cindent
192// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
193// clang-format on
#define DT_ALIGNED_ARRAY
Definition darktable.h:270
#define for_each_channel(_var,...)
Definition darktable.h:411
#define for_four_channels(_var,...)
Definition darktable.h:413
static float scalar_product(const dt_aligned_pixel_t v_1, const dt_aligned_pixel_t v_2)
Definition math.h:196
#define B(y, x)
#define A(y, x)
static void transpose_3x3_to_3xSSE(const float input[9], dt_colormatrix_t output)
Definition matrices.h:82
float DT_ALIGNED_ARRAY dt_colormatrix_t[4][4]
Definition matrices.h:24
static void mat3SSEmul(dt_colormatrix_t dest, const dt_colormatrix_t m1, const dt_colormatrix_t m2)
Definition matrices.h:161
static void transpose_3xSSE(const dt_colormatrix_t input, dt_colormatrix_t output)
Definition matrices.h:59
static void dt_colormatrix_mul(dt_colormatrix_t dst, const dt_colormatrix_t m1, const dt_colormatrix_t m2)
Definition matrices.h:140
static int mat3SSEinv(dt_colormatrix_t dst, const dt_colormatrix_t src)
Definition matrices.h:27
static void repack_double3x3_to_3xSSE(const double input[9], dt_colormatrix_t output)
Definition matrices.h:104
static void pack_3xSSE_to_3x3(const dt_colormatrix_t input, float output[9])
Definition matrices.h:126
static void dot_product(const dt_aligned_pixel_t v_in, const dt_colormatrix_t M, dt_aligned_pixel_t v_out)
Definition matrices.h:178