16 #ifndef BT_MATRIX3x3_H 17 #define BT_MATRIX3x3_H 26 #define vMPPP (_mm_set_ps (+0.0f, +0.0f, +0.0f, -0.0f)) 29 #if defined(BT_USE_SSE) 30 #define v1000 (_mm_set_ps(0.0f,0.0f,0.0f,1.0f)) 31 #define v0100 (_mm_set_ps(0.0f,0.0f,1.0f,0.0f)) 32 #define v0010 (_mm_set_ps(0.0f,1.0f,0.0f,0.0f)) 33 #elif defined(BT_USE_NEON) 39 #ifdef BT_USE_DOUBLE_PRECISION 40 #define btMatrix3x3Data btMatrix3x3DoubleData 42 #define btMatrix3x3Data btMatrix3x3FloatData 43 #endif //BT_USE_DOUBLE_PRECISION 78 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 96 m_el[0].mVec128 = rhs.
m_el[0].mVec128;
97 m_el[1].mVec128 = rhs.
m_el[1].mVec128;
98 m_el[2].mVec128 = rhs.
m_el[2].mVec128;
104 m_el[0].mVec128 = m.
m_el[0].mVec128;
105 m_el[1].mVec128 = m.
m_el[1].mVec128;
106 m_el[2].mVec128 = m.
m_el[2].mVec128;
116 m_el[0] = other.
m_el[0];
117 m_el[1] = other.
m_el[1];
118 m_el[2] = other.
m_el[2];
124 m_el[0] = other.
m_el[0];
125 m_el[1] = other.
m_el[1];
126 m_el[2] = other.
m_el[2];
136 return btVector3(m_el[0][i],m_el[1][i],m_el[2][i]);
215 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 216 __m128 vs, Q = q.get128();
217 __m128i Qi = btCastfTo128i(Q);
220 __m128 V11, V21, V31;
221 __m128 NQ = _mm_xor_ps(Q, btvMzeroMask);
222 __m128i NQi = btCastfTo128i(NQ);
224 V1 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,2,3)));
225 V2 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(0,0,1,3));
226 V3 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(2,1,0,3)));
227 V1 = _mm_xor_ps(V1, vMPPP);
229 V11 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,1,0,3)));
230 V21 = _mm_unpackhi_ps(Q, Q);
231 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(0,2,0,3));
237 V11 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(2,3,1,3));
239 V21 = _mm_xor_ps(V21, vMPPP);
240 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(3,3,1,3));
241 V31 = _mm_xor_ps(V31, vMPPP);
242 Y = btCastiTo128f(_mm_shuffle_epi32 (NQi, BT_SHUFFLE(3,2,0,3)));
243 Z = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,1,3)));
245 vs = _mm_load_ss(&s);
253 vs = bt_splat3_ps(vs, 0);
267 btScalar xs = q.
x() * s, ys = q.
y() * s, zs = q.
z() * s;
268 btScalar wx = q.
w() * xs, wy = q.
w() * ys, wz = q.
w() * zs;
269 btScalar xx = q.
x() * xs, xy = q.
x() * ys, xz = q.
x() * zs;
270 btScalar yy = q.
y() * ys, yz = q.
y() * zs, zz = q.
z() * zs;
272 btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
273 xy + wz,
btScalar(1.0) - (xx + zz), yz - wx,
274 xz - wy, yz + wx,
btScalar(1.0) - (xx + yy));
286 setEulerZYX(roll, pitch, yaw);
311 setValue(cj * ch, sj * sc - cs, sj * cc + ss,
312 cj * sh, sj * ss + cc, sj * cs - sc,
313 -sj, cj * si, cj * ci);
319 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON) 332 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON) 334 identityMatrix(v1000, v0100, v0010);
342 return identityMatrix;
349 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 350 __m128 v0 = m_el[0].mVec128;
351 __m128 v1 = m_el[1].mVec128;
352 __m128 v2 = m_el[2].mVec128;
353 __m128 *vm = (__m128 *)m;
356 v2 = _mm_and_ps(v2, btvFFF0fMask);
358 vT = _mm_unpackhi_ps(v0, v1);
359 v0 = _mm_unpacklo_ps(v0, v1);
361 v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) );
362 v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) );
363 v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT)));
368 #elif defined(BT_USE_NEON) 370 static const uint32x2_t zMask = (
const uint32x2_t) {
static_cast<uint32_t>(-1), 0 };
371 float32x4_t *vm = (float32x4_t *)m;
372 float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 );
373 float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) );
374 float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
375 float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
376 float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
377 float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q );
402 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 403 btScalar trace = m_el[0].
x() + m_el[1].
y() + m_el[2].
z();
415 temp.f[0]=m_el[2].
y() - m_el[1].
z();
416 temp.f[1]=m_el[0].
z() - m_el[2].
x();
417 temp.f[2]=m_el[1].
x() - m_el[0].
y();
424 if(m_el[0].x() < m_el[1].y())
426 if( m_el[1].y() < m_el[2].z() )
427 { i = 2; j = 0; k = 1; }
429 { i = 1; j = 2; k = 0; }
433 if( m_el[0].x() < m_el[2].z())
434 { i = 2; j = 0; k = 1; }
436 { i = 0; j = 1; k = 2; }
439 x = m_el[i][i] - m_el[j][j] - m_el[k][k] +
btScalar(1.0);
441 temp.f[3] = (m_el[k][j] - m_el[j][k]);
442 temp.f[j] = (m_el[j][i] + m_el[i][j]);
443 temp.f[k] = (m_el[k][i] + m_el[i][k]);
454 btScalar trace = m_el[0].
x() + m_el[1].
y() + m_el[2].
z();
464 temp[0]=((m_el[2].
y() - m_el[1].
z()) * s);
465 temp[1]=((m_el[0].
z() - m_el[2].
x()) * s);
466 temp[2]=((m_el[1].
x() - m_el[0].
y()) * s);
470 int i = m_el[0].
x() < m_el[1].
y() ?
471 (m_el[1].
y() < m_el[2].
z() ? 2 : 1) :
472 (m_el[0].x() < m_el[2].
z() ? 2 : 0);
480 temp[3] = (m_el[k][j] - m_el[j][k]) * s;
481 temp[j] = (m_el[j][i] + m_el[i][j]) * s;
482 temp[k] = (m_el[k][i] + m_el[i][k]) * s;
484 q.
setValue(temp[0],temp[1],temp[2],temp[3]);
535 if (
btFabs(m_el[2].x()) >= 1)
546 euler_out.roll = euler_out.pitch + delta;
547 euler_out2.roll = euler_out.pitch + delta;
553 euler_out.roll = -euler_out.pitch + delta;
554 euler_out2.roll = -euler_out.pitch + delta;
559 euler_out.pitch = -
btAsin(m_el[2].x());
560 euler_out2.pitch =
SIMD_PI - euler_out.pitch;
562 euler_out.roll =
btAtan2(m_el[2].y()/
btCos(euler_out.pitch),
563 m_el[2].
z()/
btCos(euler_out.pitch));
564 euler_out2.roll =
btAtan2(m_el[2].y()/
btCos(euler_out2.pitch),
565 m_el[2].
z()/
btCos(euler_out2.pitch));
567 euler_out.yaw =
btAtan2(m_el[1].x()/
btCos(euler_out.pitch),
568 m_el[0].
x()/
btCos(euler_out.pitch));
569 euler_out2.yaw =
btAtan2(m_el[1].x()/
btCos(euler_out2.pitch),
570 m_el[0].
x()/
btCos(euler_out2.pitch));
573 if (solution_number == 1)
576 pitch = euler_out.pitch;
577 roll = euler_out.roll;
581 yaw = euler_out2.yaw;
582 pitch = euler_out2.pitch;
583 roll = euler_out2.roll;
592 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 593 return btMatrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
596 m_el[0].x() * s.
x(), m_el[0].
y() * s.y(), m_el[0].
z() * s.z(),
597 m_el[1].
x() * s.x(), m_el[1].
y() * s.y(), m_el[1].
z() * s.z(),
598 m_el[2].
x() * s.x(), m_el[2].
y() * s.y(), m_el[2].
z() * s.z());
639 return m_el[0].
x() * v.
x() + m_el[1].
x() * v.
y() + m_el[2].
x() * v.
z();
643 return m_el[0].
y() * v.
x() + m_el[1].
y() * v.
y() + m_el[2].
y() * v.
z();
647 return m_el[0].
z() * v.
x() + m_el[1].
z() * v.
y() + m_el[2].
z() * v.
z();
661 for(iter = 0; iter < maxIter; iter++)
693 for (
int step = maxSteps; step > 0; step--)
728 btScalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq);
734 t = (theta >= 0) ? 1 / (theta +
btSqrt(1 + theta2))
735 : 1 / (theta -
btSqrt(1 + theta2));
736 cos = 1 /
btSqrt(1 + t * t);
742 t = 1 / (theta * (2 +
btScalar(0.5) / theta2));
748 m_el[p][q] = m_el[q][p] = 0;
749 m_el[p][p] -= t * mpq;
750 m_el[q][q] += t * mpq;
753 m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq;
754 m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp;
757 for (
int i = 0; i < 3; i++)
762 row[p] = cos * mrp - sin * mrq;
763 row[q] = cos * mrq + sin * mrp;
779 return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
798 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 799 __m128 rv00, rv01, rv02;
800 __m128 rv10, rv11, rv12;
801 __m128 rv20, rv21, rv22;
802 __m128 mv0, mv1, mv2;
804 rv02 = m_el[0].mVec128;
805 rv12 = m_el[1].mVec128;
806 rv22 = m_el[2].mVec128;
808 mv0 = _mm_and_ps(m[0].mVec128, btvFFF0fMask);
809 mv1 = _mm_and_ps(m[1].mVec128, btvFFF0fMask);
810 mv2 = _mm_and_ps(m[2].mVec128, btvFFF0fMask);
813 rv00 = bt_splat_ps(rv02, 0);
814 rv01 = bt_splat_ps(rv02, 1);
815 rv02 = bt_splat_ps(rv02, 2);
817 rv00 = _mm_mul_ps(rv00, mv0);
818 rv01 = _mm_mul_ps(rv01, mv1);
819 rv02 = _mm_mul_ps(rv02, mv2);
822 rv10 = bt_splat_ps(rv12, 0);
823 rv11 = bt_splat_ps(rv12, 1);
824 rv12 = bt_splat_ps(rv12, 2);
826 rv10 = _mm_mul_ps(rv10, mv0);
827 rv11 = _mm_mul_ps(rv11, mv1);
828 rv12 = _mm_mul_ps(rv12, mv2);
831 rv20 = bt_splat_ps(rv22, 0);
832 rv21 = bt_splat_ps(rv22, 1);
833 rv22 = bt_splat_ps(rv22, 2);
835 rv20 = _mm_mul_ps(rv20, mv0);
836 rv21 = _mm_mul_ps(rv21, mv1);
837 rv22 = _mm_mul_ps(rv22, mv2);
839 rv00 = _mm_add_ps(rv00, rv01);
840 rv10 = _mm_add_ps(rv10, rv11);
841 rv20 = _mm_add_ps(rv20, rv21);
843 m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
844 m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
845 m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
847 #elif defined(BT_USE_NEON) 849 float32x4_t rv0, rv1, rv2;
850 float32x4_t v0, v1, v2;
851 float32x4_t mv0, mv1, mv2;
853 v0 = m_el[0].mVec128;
854 v1 = m_el[1].mVec128;
855 v2 = m_el[2].mVec128;
857 mv0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
858 mv1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
859 mv2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
861 rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
862 rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
863 rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
865 rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
866 rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
867 rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
869 rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
870 rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
871 rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
873 m_el[0].mVec128 = rv0;
874 m_el[1].mVec128 = rv1;
875 m_el[2].mVec128 = rv2;
888 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 889 m_el[0].mVec128 = m_el[0].mVec128 + m.
m_el[0].mVec128;
890 m_el[1].mVec128 = m_el[1].mVec128 + m.
m_el[1].mVec128;
891 m_el[2].mVec128 = m_el[2].mVec128 + m.
m_el[2].mVec128;
894 m_el[0][0]+m.
m_el[0][0],
895 m_el[0][1]+m.
m_el[0][1],
896 m_el[0][2]+m.
m_el[0][2],
897 m_el[1][0]+m.
m_el[1][0],
898 m_el[1][1]+m.
m_el[1][1],
899 m_el[1][2]+m.
m_el[1][2],
900 m_el[2][0]+m.
m_el[2][0],
901 m_el[2][1]+m.
m_el[2][1],
902 m_el[2][2]+m.
m_el[2][2]);
910 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 911 __m128 vk = bt_splat_ps(_mm_load_ss((
float *)&k), 0x80);
913 _mm_mul_ps(m[0].mVec128, vk),
914 _mm_mul_ps(m[1].mVec128, vk),
915 _mm_mul_ps(m[2].mVec128, vk));
916 #elif defined(BT_USE_NEON) 918 vmulq_n_f32(m[0].mVec128, k),
919 vmulq_n_f32(m[1].mVec128, k),
920 vmulq_n_f32(m[2].mVec128, k));
923 m[0].x()*k,m[0].y()*k,m[0].z()*k,
924 m[1].x()*k,m[1].y()*k,m[1].z()*k,
925 m[2].x()*k,m[2].y()*k,m[2].z()*k);
932 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 934 m1[0].mVec128 + m2[0].mVec128,
935 m1[1].mVec128 + m2[1].mVec128,
936 m1[2].mVec128 + m2[2].mVec128);
956 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 958 m1[0].mVec128 - m2[0].mVec128,
959 m1[1].mVec128 - m2[1].mVec128,
960 m1[2].mVec128 - m2[2].mVec128);
981 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 982 m_el[0].mVec128 = m_el[0].mVec128 - m.
m_el[0].mVec128;
983 m_el[1].mVec128 = m_el[1].mVec128 - m.
m_el[1].mVec128;
984 m_el[2].mVec128 = m_el[2].mVec128 - m.
m_el[2].mVec128;
987 m_el[0][0]-m.
m_el[0][0],
988 m_el[0][1]-m.
m_el[0][1],
989 m_el[0][2]-m.
m_el[0][2],
990 m_el[1][0]-m.
m_el[1][0],
991 m_el[1][1]-m.
m_el[1][1],
992 m_el[1][2]-m.
m_el[1][2],
993 m_el[2][0]-m.
m_el[2][0],
994 m_el[2][1]-m.
m_el[2][1],
995 m_el[2][2]-m.
m_el[2][2]);
1004 return btTriple((*
this)[0], (*
this)[1], (*
this)[2]);
1011 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 1013 _mm_and_ps(m_el[0].mVec128, btvAbsfMask),
1014 _mm_and_ps(m_el[1].mVec128, btvAbsfMask),
1015 _mm_and_ps(m_el[2].mVec128, btvAbsfMask));
1016 #elif defined(BT_USE_NEON) 1018 (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, btv3AbsMask),
1019 (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, btv3AbsMask),
1020 (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, btv3AbsMask));
1032 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 1033 __m128 v0 = m_el[0].mVec128;
1034 __m128 v1 = m_el[1].mVec128;
1035 __m128 v2 = m_el[2].mVec128;
1038 v2 = _mm_and_ps(v2, btvFFF0fMask);
1040 vT = _mm_unpackhi_ps(v0, v1);
1041 v0 = _mm_unpacklo_ps(v0, v1);
1043 v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) );
1044 v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) );
1045 v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT)));
1049 #elif defined(BT_USE_NEON) 1051 static const uint32x2_t zMask = (
const uint32x2_t) {
static_cast<uint32_t>(-1), 0 };
1052 float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 );
1053 float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) );
1054 float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
1055 float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
1056 float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
1057 float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q );
1060 return btMatrix3x3( m_el[0].x(), m_el[1].x(), m_el[2].x(),
1061 m_el[0].y(), m_el[1].y(), m_el[2].y(),
1062 m_el[0].z(), m_el[1].z(), m_el[2].z());
1069 return btMatrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
1070 cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
1071 cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
1077 btVector3 co(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
1082 return btMatrix3x3(co.
x() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
1083 co.
y() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
1084 co.
z() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
1090 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 1093 __m128 row = m_el[0].mVec128;
1094 __m128 m0 = _mm_and_ps( m.
getRow(0).mVec128, btvFFF0fMask );
1095 __m128 m1 = _mm_and_ps( m.
getRow(1).mVec128, btvFFF0fMask);
1096 __m128 m2 = _mm_and_ps( m.
getRow(2).mVec128, btvFFF0fMask );
1097 __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
1098 __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
1099 __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
1100 row = m_el[1].mVec128;
1101 r0 = _mm_add_ps( r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
1102 r1 = _mm_add_ps( r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
1103 r2 = _mm_add_ps( r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
1104 row = m_el[2].mVec128;
1105 r0 = _mm_add_ps( r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
1106 r1 = _mm_add_ps( r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
1107 r2 = _mm_add_ps( r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
1110 #elif defined BT_USE_NEON 1112 static const uint32x4_t xyzMask = (
const uint32x4_t){
static_cast<uint32_t>(-1), static_cast<uint32_t>(-1),
static_cast<uint32_t>(-1), 0 };
1113 float32x4_t m0 = (float32x4_t) vandq_u32( (uint32x4_t) m.
getRow(0).mVec128, xyzMask );
1114 float32x4_t m1 = (float32x4_t) vandq_u32( (uint32x4_t) m.
getRow(1).mVec128, xyzMask );
1115 float32x4_t m2 = (float32x4_t) vandq_u32( (uint32x4_t) m.
getRow(2).mVec128, xyzMask );
1116 float32x4_t row = m_el[0].mVec128;
1117 float32x4_t r0 = vmulq_lane_f32( m0, vget_low_f32(row), 0);
1118 float32x4_t r1 = vmulq_lane_f32( m0, vget_low_f32(row), 1);
1119 float32x4_t r2 = vmulq_lane_f32( m0, vget_high_f32(row), 0);
1120 row = m_el[1].mVec128;
1121 r0 = vmlaq_lane_f32( r0, m1, vget_low_f32(row), 0);
1122 r1 = vmlaq_lane_f32( r1, m1, vget_low_f32(row), 1);
1123 r2 = vmlaq_lane_f32( r2, m1, vget_high_f32(row), 0);
1124 row = m_el[2].mVec128;
1125 r0 = vmlaq_lane_f32( r0, m2, vget_low_f32(row), 0);
1126 r1 = vmlaq_lane_f32( r1, m2, vget_low_f32(row), 1);
1127 r2 = vmlaq_lane_f32( r2, m2, vget_high_f32(row), 0);
1131 m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
1132 m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
1133 m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
1134 m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
1135 m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
1136 m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
1137 m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
1138 m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
1139 m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z());
1146 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 1147 __m128 a0 = m_el[0].mVec128;
1148 __m128 a1 = m_el[1].mVec128;
1149 __m128 a2 = m_el[2].mVec128;
1152 __m128 mx = mT[0].mVec128;
1153 __m128 my = mT[1].mVec128;
1154 __m128 mz = mT[2].mVec128;
1156 __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
1157 __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
1158 __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
1159 r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
1160 r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
1161 r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
1162 r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
1163 r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
1164 r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
1167 #elif defined BT_USE_NEON 1168 float32x4_t a0 = m_el[0].mVec128;
1169 float32x4_t a1 = m_el[1].mVec128;
1170 float32x4_t a2 = m_el[2].mVec128;
1173 float32x4_t mx = mT[0].mVec128;
1174 float32x4_t my = mT[1].mVec128;
1175 float32x4_t mz = mT[2].mVec128;
1177 float32x4_t r0 = vmulq_lane_f32( mx, vget_low_f32(a0), 0);
1178 float32x4_t r1 = vmulq_lane_f32( mx, vget_low_f32(a1), 0);
1179 float32x4_t r2 = vmulq_lane_f32( mx, vget_low_f32(a2), 0);
1180 r0 = vmlaq_lane_f32( r0, my, vget_low_f32(a0), 1);
1181 r1 = vmlaq_lane_f32( r1, my, vget_low_f32(a1), 1);
1182 r2 = vmlaq_lane_f32( r2, my, vget_low_f32(a2), 1);
1183 r0 = vmlaq_lane_f32( r0, mz, vget_high_f32(a0), 0);
1184 r1 = vmlaq_lane_f32( r1, mz, vget_high_f32(a1), 0);
1185 r2 = vmlaq_lane_f32( r2, mz, vget_high_f32(a2), 0);
1190 m_el[0].
dot(m[0]), m_el[0].
dot(m[1]), m_el[0].
dot(m[2]),
1191 m_el[1].
dot(m[0]), m_el[1].
dot(m[1]), m_el[1].
dot(m[2]),
1192 m_el[2].
dot(m[0]), m_el[2].
dot(m[1]), m_el[2].
dot(m[2]));
1199 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 1200 return v.
dot3(m[0], m[1], m[2]);
1210 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 1212 const __m128 vv = v.mVec128;
1214 __m128 c0 = bt_splat_ps( vv, 0);
1215 __m128 c1 = bt_splat_ps( vv, 1);
1216 __m128 c2 = bt_splat_ps( vv, 2);
1218 c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, btvFFF0fMask) );
1219 c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, btvFFF0fMask) );
1220 c0 = _mm_add_ps(c0, c1);
1221 c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, btvFFF0fMask) );
1224 #elif defined(BT_USE_NEON) 1225 const float32x4_t vv = v.mVec128;
1226 const float32x2_t vlo = vget_low_f32(vv);
1227 const float32x2_t vhi = vget_high_f32(vv);
1229 float32x4_t c0, c1, c2;
1231 c0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
1232 c1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
1233 c2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
1235 c0 = vmulq_lane_f32(c0, vlo, 0);
1236 c1 = vmulq_lane_f32(c1, vlo, 1);
1237 c2 = vmulq_lane_f32(c2, vhi, 0);
1238 c0 = vaddq_f32(c0, c1);
1239 c0 = vaddq_f32(c0, c2);
1250 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 1252 __m128 m10 = m1[0].mVec128;
1253 __m128 m11 = m1[1].mVec128;
1254 __m128 m12 = m1[2].mVec128;
1256 __m128 m2v = _mm_and_ps(m2[0].mVec128, btvFFF0fMask);
1258 __m128 c0 = bt_splat_ps( m10, 0);
1259 __m128 c1 = bt_splat_ps( m11, 0);
1260 __m128 c2 = bt_splat_ps( m12, 0);
1262 c0 = _mm_mul_ps(c0, m2v);
1263 c1 = _mm_mul_ps(c1, m2v);
1264 c2 = _mm_mul_ps(c2, m2v);
1266 m2v = _mm_and_ps(m2[1].mVec128, btvFFF0fMask);
1268 __m128 c0_1 = bt_splat_ps( m10, 1);
1269 __m128 c1_1 = bt_splat_ps( m11, 1);
1270 __m128 c2_1 = bt_splat_ps( m12, 1);
1272 c0_1 = _mm_mul_ps(c0_1, m2v);
1273 c1_1 = _mm_mul_ps(c1_1, m2v);
1274 c2_1 = _mm_mul_ps(c2_1, m2v);
1276 m2v = _mm_and_ps(m2[2].mVec128, btvFFF0fMask);
1278 c0 = _mm_add_ps(c0, c0_1);
1279 c1 = _mm_add_ps(c1, c1_1);
1280 c2 = _mm_add_ps(c2, c2_1);
1282 m10 = bt_splat_ps( m10, 2);
1283 m11 = bt_splat_ps( m11, 2);
1284 m12 = bt_splat_ps( m12, 2);
1286 m10 = _mm_mul_ps(m10, m2v);
1287 m11 = _mm_mul_ps(m11, m2v);
1288 m12 = _mm_mul_ps(m12, m2v);
1290 c0 = _mm_add_ps(c0, m10);
1291 c1 = _mm_add_ps(c1, m11);
1292 c2 = _mm_add_ps(c2, m12);
1296 #elif defined(BT_USE_NEON) 1298 float32x4_t rv0, rv1, rv2;
1299 float32x4_t v0, v1, v2;
1300 float32x4_t mv0, mv1, mv2;
1306 mv0 = (float32x4_t) vandq_s32((int32x4_t)m2[0].mVec128, btvFFF0Mask);
1307 mv1 = (float32x4_t) vandq_s32((int32x4_t)m2[1].mVec128, btvFFF0Mask);
1308 mv2 = (float32x4_t) vandq_s32((int32x4_t)m2[2].mVec128, btvFFF0Mask);
1310 rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
1311 rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
1312 rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
1314 rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
1315 rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
1316 rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
1318 rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
1319 rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
1320 rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
1351 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 1355 c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
1356 c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
1357 c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
1359 c0 = _mm_and_ps(c0, c1);
1360 c0 = _mm_and_ps(c0, c2);
1362 int m = _mm_movemask_ps((__m128)c0);
1363 return (0x7 == (m & 0x7));
1367 ( m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
1368 m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
1369 m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2] );
1390 for (
int i=0;i<3;i++)
1391 m_el[i].serialize(dataOut.m_el[i]);
1396 for (
int i=0;i<3;i++)
1397 m_el[i].serializeFloat(dataOut.
m_el[i]);
1403 for (
int i=0;i<3;i++)
1404 m_el[i].deSerialize(dataIn.m_el[i]);
1409 for (
int i=0;i<3;i++)
1410 m_el[i].deSerializeFloat(dataIn.
m_el[i]);
1415 for (
int i=0;i<3;i++)
1416 m_el[i].deSerializeDouble(dataIn.
m_el[i]);
1419 #endif //BT_MATRIX3x3_H btMatrix3x3 inverse() const
Return the inverse of the matrix.
void deSerializeFloat(const struct btMatrix3x3FloatData &dataIn)
btVector3DoubleData m_el[3]
btScalar tdoty(const btVector3 &v) const
bool operator==(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Equality operator between two matrices It will test all elements are equal.
void serialize(struct btMatrix3x3Data &dataOut) const
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
void setRotation(const btQuaternion &q)
Set the matrix from a quaternion.
btScalar btSin(btScalar x)
const btScalar & z() const
Return the z value.
btScalar btSqrt(btScalar y)
#define SIMD_FORCE_INLINE
btMatrix3x3 transposeTimes(const btMatrix3x3 &m) const
const btScalar & y() const
Return the y value.
btVector3 getColumn(int i) const
Get a column of the matrix as a vector.
const btVector3 & getRow(int i) const
Get a row of the matrix as a vector.
btMatrix3x3 operator+(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
btMatrix3x3 & operator=(const btMatrix3x3 &other)
Assignment Operator.
btQuaternion inverse(const btQuaternion &q)
Return the inverse of a quaternion.
const btScalar & w() const
Return the w value.
btVector3 m_el[3]
Data storage for the matrix, each vector is a row of the matrix.
btScalar dot(const btVector3 &v) const
Return the dot product.
const btScalar & x() const
Return the x value.
btMatrix3x3(const btQuaternion &q)
Constructor from Quaternion.
btVector3 btCross(const btVector3 &v1, const btVector3 &v2)
Return the cross product of two vectors.
btScalar tdotx(const btVector3 &v) const
btScalar tdotz(const btVector3 &v) const
void deSerialize(const struct btMatrix3x3Data &dataIn)
void getRotation(btQuaternion &q) const
Get the matrix represented as a quaternion.
btMatrix3x3 absolute() const
Return the matrix with all values non negative.
void diagonalize(btMatrix3x3 &rot, btScalar threshold, int maxSteps)
diagonalizes this matrix by the Jacobi method.
btMatrix3x3 scaled(const btVector3 &s) const
Create a scaled copy of the matrix.
btQuaternion & normalize()
Normalize the quaternion Such that x^2 + y^2 + z^2 +w^2 = 1.
void deSerializeDouble(const struct btMatrix3x3DoubleData &dataIn)
btMatrix3x3 & operator*=(const btMatrix3x3 &m)
Multiply by the target matrix on the right.
btScalar btAtan2(btScalar x, btScalar y)
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Set x,y,z and zero w.
btVector3 cross(const btVector3 &v) const
Return the cross product between this and another vector.
const btVector3 & operator[](int i) const
Get a const reference to a row of the matrix as a vector.
btMatrix3x3 operator*(const btMatrix3x3 &m, const btScalar &k)
void setValue(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Set the values of the matrix explicitly (row major)
btVector3 solve33(const btVector3 &b) const
Solve A * x = b, where b is a column vector.
btMatrix3x3(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Constructor with row major formatting.
btScalar length2() const
Return the length squared of the quaternion.
btScalar norm() const
Return the norm (length) of the vector.
const btScalar & y() const
Return the y value.
void getOpenGLSubMatrix(btScalar *m) const
Fill the rotational part of an OpenGL matrix and clear the shear/perspective.
btVector3 can be used to represent 3D points and vectors.
#define ATTRIBUTE_ALIGNED16(a)
btMatrix3x3 & operator-=(const btMatrix3x3 &m)
Substractss by the target matrix on the right.
btMatrix3x3 adjoint() const
Return the adjoint of the matrix.
void serializeFloat(struct btMatrix3x3FloatData &dataOut) const
void setEulerYPR(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the matrix from euler angles using YPR around YXZ respectively.
btMatrix3x3 & operator+=(const btMatrix3x3 &m)
Adds by the target matrix on the right.
btMatrix3x3 operator-(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
void getEulerYPR(btScalar &yaw, btScalar &pitch, btScalar &roll) const
Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR.
btMatrix3x3()
No initializaion constructor.
btMatrix3x3 transpose() const
Return the transpose of the matrix.
btVector3 dot3(const btVector3 &v0, const btVector3 &v1, const btVector3 &v2) const
btVector3 & operator[](int i)
Get a mutable reference to a row of the matrix as a vector.
const btScalar & x() const
Return the x value.
The btMatrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with...
btScalar dot(const btQuaternion &q1, const btQuaternion &q2)
Calculate the dot product between two quaternions.
btMatrix3x3(const btMatrix3x3 &other)
Copy constructor.
btMatrix3x3 timesTranspose(const btMatrix3x3 &m) const
The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatr...
void setFromOpenGLSubMatrix(const btScalar *m)
Set from the rotational part of a 4x4 OpenGL matrix.
btScalar btAsin(btScalar x)
btScalar btDot(const btVector3 &v1, const btVector3 &v2)
Return the dot product between two vectors.
btScalar cofac(int r1, int c1, int r2, int c2) const
Calculate the matrix cofactor.
btScalar btTriple(const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
void getEulerZYX(btScalar &yaw, btScalar &pitch, btScalar &roll, unsigned int solution_number=1) const
Get the matrix represented as euler angles around ZYX.
btScalar determinant() const
Return the determinant of the matrix.
void setIdentity()
Set the matrix to the identity.
static const btMatrix3x3 & getIdentity()
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
void extractRotation(btQuaternion &q, btScalar tolerance=1.0e-9, int maxIter=100)
extractRotation is from "A robust method to extract the rotational part of deformations" See http://d...
btScalar btCos(btScalar x)
btVector3FloatData m_el[3]
btScalar btFabs(btScalar x)
const btScalar & z() const
Return the z value.
void setEulerZYX(btScalar eulerX, btScalar eulerY, btScalar eulerZ)
Set the matrix from euler angles YPR around ZYX axes.