46 __m128 fpart, expipart, expfpart;
48 x = _mm_min_ps(
x, _mm_set1_ps(129.00000f));
49 x = _mm_max_ps(
x, _mm_set1_ps(-126.99999f));
52 ipart = _mm_cvtps_epi32(_mm_sub_ps(
x, _mm_set1_ps(0.5f)));
55 fpart = _mm_sub_ps(
x, _mm_cvtepi32_ps(ipart));
58 expipart = _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(ipart, _mm_set1_epi32(127)), 23));
61#if EXP_POLY_DEGREE == 5
63 =
POLY5(fpart, 9.9999994e-1f, 6.9315308e-1f, 2.4015361e-1f, 5.5826318e-2f, 8.9893397e-3f, 1.8775767e-3f);
64#elif EXP_POLY_DEGREE == 4
65 expfpart =
POLY4(fpart, 1.0000026f, 6.9300383e-1f, 2.4144275e-1f, 5.2011464e-2f, 1.3534167e-2f);
66#elif EXP_POLY_DEGREE == 3
67 expfpart =
POLY3(fpart, 9.9992520e-1f, 6.9583356e-1f, 2.2606716e-1f, 7.8024521e-2f);
68#elif EXP_POLY_DEGREE == 2
69 expfpart =
POLY2(fpart, 1.0017247f, 6.5763628e-1f, 3.3718944e-1f);
74 return _mm_mul_ps(expipart, expfpart);
83 __m128i expmask = _mm_set1_epi32(0x7f800000);
84 __m128i mantmask = _mm_set1_epi32(0x007fffff);
85 __m128 one = _mm_set1_ps(1.0f);
87 __m128i
i = _mm_castps_si128(
x);
90 __m128 exp = _mm_cvtepi32_ps(_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(
i, expmask), 23), _mm_set1_epi32(127)));
93 __m128 mant = _mm_or_ps(_mm_castsi128_ps(_mm_and_si128(
i, mantmask)), one);
101#if LOG_POLY_DEGREE == 6
102 logmant =
POLY5(mant, 3.11578814719469302614f, -3.32419399085241980044f, 2.59883907202499966007f,
103 -1.23152682416275988241f, 0.318212422185251071475f, -0.0344359067839062357313f);
104#elif LOG_POLY_DEGREE == 5
105 logmant =
POLY4(mant, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f,
106 -0.465725644288844778798f, 0.0596515482674574969533f);
107#elif LOG_POLY_DEGREE == 4
108 logmant =
POLY3(mant, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f,
109 -0.107254423828329604454f);
110#elif LOG_POLY_DEGREE == 3
111 logmant =
POLY2(mant, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f);
117 logmant = _mm_mul_ps(logmant, _mm_sub_ps(mant, one));
119 return _mm_add_ps(logmant, exp);
#define POLY5(x, c0, c1, c2, c3, c4, c5)
Definition sse.h:35
#define POLY3(x, c0, c1, c2, c3)
Definition sse.h:33
#define POLY4(x, c0, c1, c2, c3, c4)
Definition sse.h:34