40 __m128 fpart, expipart, expfpart;
42 x = _mm_min_ps(x, _mm_set1_ps(129.00000f));
43 x = _mm_max_ps(x, _mm_set1_ps(-126.99999f));
46 ipart = _mm_cvtps_epi32(_mm_sub_ps(x, _mm_set1_ps(0.5f)));
49 fpart = _mm_sub_ps(x, _mm_cvtepi32_ps(ipart));
52 expipart = _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(ipart, _mm_set1_epi32(127)), 23));
55#if EXP_POLY_DEGREE == 5
57 =
POLY5(fpart, 9.9999994e-1f, 6.9315308e-1f, 2.4015361e-1f, 5.5826318e-2f, 8.9893397e-3f, 1.8775767e-3f);
58#elif EXP_POLY_DEGREE == 4
59 expfpart =
POLY4(fpart, 1.0000026f, 6.9300383e-1f, 2.4144275e-1f, 5.2011464e-2f, 1.3534167e-2f);
60#elif EXP_POLY_DEGREE == 3
61 expfpart =
POLY3(fpart, 9.9992520e-1f, 6.9583356e-1f, 2.2606716e-1f, 7.8024521e-2f);
62#elif EXP_POLY_DEGREE == 2
63 expfpart =
POLY2(fpart, 1.0017247f, 6.5763628e-1f, 3.3718944e-1f);
68 return _mm_mul_ps(expipart, expfpart);
77 __m128i expmask = _mm_set1_epi32(0x7f800000);
78 __m128i mantmask = _mm_set1_epi32(0x007fffff);
79 __m128 one = _mm_set1_ps(1.0f);
81 __m128i i = _mm_castps_si128(x);
84 __m128 exp = _mm_cvtepi32_ps(_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(i, expmask), 23), _mm_set1_epi32(127)));
87 __m128 mant = _mm_or_ps(_mm_castsi128_ps(_mm_and_si128(i, mantmask)), one);
95#if LOG_POLY_DEGREE == 6
96 logmant =
POLY5(mant, 3.11578814719469302614f, -3.32419399085241980044f, 2.59883907202499966007f,
97 -1.23152682416275988241f, 0.318212422185251071475f, -0.0344359067839062357313f);
98#elif LOG_POLY_DEGREE == 5
99 logmant =
POLY4(mant, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f,
100 -0.465725644288844778798f, 0.0596515482674574969533f);
101#elif LOG_POLY_DEGREE == 4
102 logmant =
POLY3(mant, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f,
103 -0.107254423828329604454f);
104#elif LOG_POLY_DEGREE == 3
105 logmant =
POLY2(mant, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f);
111 logmant = _mm_mul_ps(logmant, _mm_sub_ps(mant, one));
113 return _mm_add_ps(logmant, exp);
#define POLY5(x, c0, c1, c2, c3, c4, c5)
Definition sse.h:29
#define POLY3(x, c0, c1, c2, c3)
Definition sse.h:27
#define POLY4(x, c0, c1, c2, c3, c4)
Definition sse.h:28