Bullet Collision Detection & Physics Library
btMatrix3x3.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/
3 
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9 
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14 
15 
16 #ifndef BT_MATRIX3x3_H
17 #define BT_MATRIX3x3_H
18 
19 #include "btVector3.h"
20 #include "btQuaternion.h"
21 #include <stdio.h>
22 
23 #ifdef BT_USE_SSE
24 //const __m128 ATTRIBUTE_ALIGNED16(v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
25 //const __m128 ATTRIBUTE_ALIGNED16(vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
26 #define vMPPP (_mm_set_ps (+0.0f, +0.0f, +0.0f, -0.0f))
27 #endif
28 
29 #if defined(BT_USE_SSE)
30 #define v1000 (_mm_set_ps(0.0f,0.0f,0.0f,1.0f))
31 #define v0100 (_mm_set_ps(0.0f,0.0f,1.0f,0.0f))
32 #define v0010 (_mm_set_ps(0.0f,1.0f,0.0f,0.0f))
33 #elif defined(BT_USE_NEON)
34 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
35 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
36 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
37 #endif
38 
39 #ifdef BT_USE_DOUBLE_PRECISION
40 #define btMatrix3x3Data btMatrix3x3DoubleData
41 #else
42 #define btMatrix3x3Data btMatrix3x3FloatData
43 #endif //BT_USE_DOUBLE_PRECISION
44 
45 
49 
51  btVector3 m_el[3];
52 
53 public:
56 
57  // explicit btMatrix3x3(const btScalar *m) { setFromOpenGLSubMatrix(m); }
58 
60  explicit btMatrix3x3(const btQuaternion& q) { setRotation(q); }
61  /*
62  template <typename btScalar>
63  Matrix3x3(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
64  {
65  setEulerYPR(yaw, pitch, roll);
66  }
67  */
69  btMatrix3x3(const btScalar& xx, const btScalar& xy, const btScalar& xz,
70  const btScalar& yx, const btScalar& yy, const btScalar& yz,
71  const btScalar& zx, const btScalar& zy, const btScalar& zz)
72  {
73  setValue(xx, xy, xz,
74  yx, yy, yz,
75  zx, zy, zz);
76  }
77 
78 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
79  SIMD_FORCE_INLINE btMatrix3x3 (const btSimdFloat4 v0, const btSimdFloat4 v1, const btSimdFloat4 v2 )
80  {
81  m_el[0].mVec128 = v0;
82  m_el[1].mVec128 = v1;
83  m_el[2].mVec128 = v2;
84  }
85 
86  SIMD_FORCE_INLINE btMatrix3x3 (const btVector3& v0, const btVector3& v1, const btVector3& v2 )
87  {
88  m_el[0] = v0;
89  m_el[1] = v1;
90  m_el[2] = v2;
91  }
92 
93  // Copy constructor
95  {
96  m_el[0].mVec128 = rhs.m_el[0].mVec128;
97  m_el[1].mVec128 = rhs.m_el[1].mVec128;
98  m_el[2].mVec128 = rhs.m_el[2].mVec128;
99  }
100 
101  // Assignment Operator
102  SIMD_FORCE_INLINE btMatrix3x3& operator=(const btMatrix3x3& m)
103  {
104  m_el[0].mVec128 = m.m_el[0].mVec128;
105  m_el[1].mVec128 = m.m_el[1].mVec128;
106  m_el[2].mVec128 = m.m_el[2].mVec128;
107 
108  return *this;
109  }
110 
111 #else
112 
115  {
116  m_el[0] = other.m_el[0];
117  m_el[1] = other.m_el[1];
118  m_el[2] = other.m_el[2];
119  }
120 
123  {
124  m_el[0] = other.m_el[0];
125  m_el[1] = other.m_el[1];
126  m_el[2] = other.m_el[2];
127  return *this;
128  }
129 
130 #endif
131 
135  {
136  return btVector3(m_el[0][i],m_el[1][i],m_el[2][i]);
137  }
138 
139 
142  SIMD_FORCE_INLINE const btVector3& getRow(int i) const
143  {
144  btFullAssert(0 <= i && i < 3);
145  return m_el[i];
146  }
147 
151  {
152  btFullAssert(0 <= i && i < 3);
153  return m_el[i];
154  }
155 
159  {
160  btFullAssert(0 <= i && i < 3);
161  return m_el[i];
162  }
163 
167  btMatrix3x3& operator*=(const btMatrix3x3& m);
168 
172  btMatrix3x3& operator+=(const btMatrix3x3& m);
173 
177  btMatrix3x3& operator-=(const btMatrix3x3& m);
178 
182  {
183  m_el[0].setValue(m[0],m[4],m[8]);
184  m_el[1].setValue(m[1],m[5],m[9]);
185  m_el[2].setValue(m[2],m[6],m[10]);
186 
187  }
198  void setValue(const btScalar& xx, const btScalar& xy, const btScalar& xz,
199  const btScalar& yx, const btScalar& yy, const btScalar& yz,
200  const btScalar& zx, const btScalar& zy, const btScalar& zz)
201  {
202  m_el[0].setValue(xx,xy,xz);
203  m_el[1].setValue(yx,yy,yz);
204  m_el[2].setValue(zx,zy,zz);
205  }
206 
209  void setRotation(const btQuaternion& q)
210  {
211  btScalar d = q.length2();
212  btFullAssert(d != btScalar(0.0));
213  btScalar s = btScalar(2.0) / d;
214 
215  #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
216  __m128 vs, Q = q.get128();
217  __m128i Qi = btCastfTo128i(Q);
218  __m128 Y, Z;
219  __m128 V1, V2, V3;
220  __m128 V11, V21, V31;
221  __m128 NQ = _mm_xor_ps(Q, btvMzeroMask);
222  __m128i NQi = btCastfTo128i(NQ);
223 
224  V1 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,2,3))); // Y X Z W
225  V2 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(0,0,1,3)); // -X -X Y W
226  V3 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(2,1,0,3))); // Z Y X W
227  V1 = _mm_xor_ps(V1, vMPPP); // change the sign of the first element
228 
229  V11 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,1,0,3))); // Y Y X W
230  V21 = _mm_unpackhi_ps(Q, Q); // Z Z W W
231  V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(0,2,0,3)); // X Z -X -W
232 
233  V2 = V2 * V1; //
234  V1 = V1 * V11; //
235  V3 = V3 * V31; //
236 
237  V11 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(2,3,1,3)); // -Z -W Y W
238  V11 = V11 * V21; //
239  V21 = _mm_xor_ps(V21, vMPPP); // change the sign of the first element
240  V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(3,3,1,3)); // W W -Y -W
241  V31 = _mm_xor_ps(V31, vMPPP); // change the sign of the first element
242  Y = btCastiTo128f(_mm_shuffle_epi32 (NQi, BT_SHUFFLE(3,2,0,3))); // -W -Z -X -W
243  Z = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,1,3))); // Y X Y W
244 
245  vs = _mm_load_ss(&s);
246  V21 = V21 * Y;
247  V31 = V31 * Z;
248 
249  V1 = V1 + V11;
250  V2 = V2 + V21;
251  V3 = V3 + V31;
252 
253  vs = bt_splat3_ps(vs, 0);
254  // s ready
255  V1 = V1 * vs;
256  V2 = V2 * vs;
257  V3 = V3 * vs;
258 
259  V1 = V1 + v1000;
260  V2 = V2 + v0100;
261  V3 = V3 + v0010;
262 
263  m_el[0] = V1;
264  m_el[1] = V2;
265  m_el[2] = V3;
266  #else
267  btScalar xs = q.x() * s, ys = q.y() * s, zs = q.z() * s;
268  btScalar wx = q.w() * xs, wy = q.w() * ys, wz = q.w() * zs;
269  btScalar xx = q.x() * xs, xy = q.x() * ys, xz = q.x() * zs;
270  btScalar yy = q.y() * ys, yz = q.y() * zs, zz = q.z() * zs;
271  setValue(
272  btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
273  xy + wz, btScalar(1.0) - (xx + zz), yz - wx,
274  xz - wy, yz + wx, btScalar(1.0) - (xx + yy));
275  #endif
276  }
277 
278 
284  void setEulerYPR(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
285  {
286  setEulerZYX(roll, pitch, yaw);
287  }
288 
298  void setEulerZYX(btScalar eulerX,btScalar eulerY,btScalar eulerZ) {
300  btScalar ci ( btCos(eulerX));
301  btScalar cj ( btCos(eulerY));
302  btScalar ch ( btCos(eulerZ));
303  btScalar si ( btSin(eulerX));
304  btScalar sj ( btSin(eulerY));
305  btScalar sh ( btSin(eulerZ));
306  btScalar cc = ci * ch;
307  btScalar cs = ci * sh;
308  btScalar sc = si * ch;
309  btScalar ss = si * sh;
310 
311  setValue(cj * ch, sj * sc - cs, sj * cc + ss,
312  cj * sh, sj * ss + cc, sj * cs - sc,
313  -sj, cj * si, cj * ci);
314  }
315 
317  void setIdentity()
318  {
319 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
320  m_el[0] = v1000;
321  m_el[1] = v0100;
322  m_el[2] = v0010;
323 #else
324  setValue(btScalar(1.0), btScalar(0.0), btScalar(0.0),
325  btScalar(0.0), btScalar(1.0), btScalar(0.0),
326  btScalar(0.0), btScalar(0.0), btScalar(1.0));
327 #endif
328  }
329 
330  static const btMatrix3x3& getIdentity()
331  {
332 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
333  static const btMatrix3x3
334  identityMatrix(v1000, v0100, v0010);
335 #else
336  static const btMatrix3x3
337  identityMatrix(
338  btScalar(1.0), btScalar(0.0), btScalar(0.0),
339  btScalar(0.0), btScalar(1.0), btScalar(0.0),
340  btScalar(0.0), btScalar(0.0), btScalar(1.0));
341 #endif
342  return identityMatrix;
343  }
344 
347  void getOpenGLSubMatrix(btScalar *m) const
348  {
349 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
350  __m128 v0 = m_el[0].mVec128;
351  __m128 v1 = m_el[1].mVec128;
352  __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
353  __m128 *vm = (__m128 *)m;
354  __m128 vT;
355 
356  v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
357 
358  vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
359  v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
360 
361  v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) ); // y0 y1 y2 0
362  v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) ); // x0 x1 x2 0
363  v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
364 
365  vm[0] = v0;
366  vm[1] = v1;
367  vm[2] = v2;
368 #elif defined(BT_USE_NEON)
369  // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
370  static const uint32x2_t zMask = (const uint32x2_t) {static_cast<uint32_t>(-1), 0 };
371  float32x4_t *vm = (float32x4_t *)m;
372  float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 ); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
373  float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) ); // {x2 0 }, {y2 0}
374  float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
375  float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
376  float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
377  float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q ); // z0 z1 z2 0
378 
379  vm[0] = v0;
380  vm[1] = v1;
381  vm[2] = v2;
382 #else
383  m[0] = btScalar(m_el[0].x());
384  m[1] = btScalar(m_el[1].x());
385  m[2] = btScalar(m_el[2].x());
386  m[3] = btScalar(0.0);
387  m[4] = btScalar(m_el[0].y());
388  m[5] = btScalar(m_el[1].y());
389  m[6] = btScalar(m_el[2].y());
390  m[7] = btScalar(0.0);
391  m[8] = btScalar(m_el[0].z());
392  m[9] = btScalar(m_el[1].z());
393  m[10] = btScalar(m_el[2].z());
394  m[11] = btScalar(0.0);
395 #endif
396  }
397 
400  void getRotation(btQuaternion& q) const
401  {
402 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
403  btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
404  btScalar s, x;
405 
406  union {
407  btSimdFloat4 vec;
408  btScalar f[4];
409  } temp;
410 
411  if (trace > btScalar(0.0))
412  {
413  x = trace + btScalar(1.0);
414 
415  temp.f[0]=m_el[2].y() - m_el[1].z();
416  temp.f[1]=m_el[0].z() - m_el[2].x();
417  temp.f[2]=m_el[1].x() - m_el[0].y();
418  temp.f[3]=x;
419  //temp.f[3]= s * btScalar(0.5);
420  }
421  else
422  {
423  int i, j, k;
424  if(m_el[0].x() < m_el[1].y())
425  {
426  if( m_el[1].y() < m_el[2].z() )
427  { i = 2; j = 0; k = 1; }
428  else
429  { i = 1; j = 2; k = 0; }
430  }
431  else
432  {
433  if( m_el[0].x() < m_el[2].z())
434  { i = 2; j = 0; k = 1; }
435  else
436  { i = 0; j = 1; k = 2; }
437  }
438 
439  x = m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0);
440 
441  temp.f[3] = (m_el[k][j] - m_el[j][k]);
442  temp.f[j] = (m_el[j][i] + m_el[i][j]);
443  temp.f[k] = (m_el[k][i] + m_el[i][k]);
444  temp.f[i] = x;
445  //temp.f[i] = s * btScalar(0.5);
446  }
447 
448  s = btSqrt(x);
449  q.set128(temp.vec);
450  s = btScalar(0.5) / s;
451 
452  q *= s;
453 #else
454  btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
455 
456  btScalar temp[4];
457 
458  if (trace > btScalar(0.0))
459  {
460  btScalar s = btSqrt(trace + btScalar(1.0));
461  temp[3]=(s * btScalar(0.5));
462  s = btScalar(0.5) / s;
463 
464  temp[0]=((m_el[2].y() - m_el[1].z()) * s);
465  temp[1]=((m_el[0].z() - m_el[2].x()) * s);
466  temp[2]=((m_el[1].x() - m_el[0].y()) * s);
467  }
468  else
469  {
470  int i = m_el[0].x() < m_el[1].y() ?
471  (m_el[1].y() < m_el[2].z() ? 2 : 1) :
472  (m_el[0].x() < m_el[2].z() ? 2 : 0);
473  int j = (i + 1) % 3;
474  int k = (i + 2) % 3;
475 
476  btScalar s = btSqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0));
477  temp[i] = s * btScalar(0.5);
478  s = btScalar(0.5) / s;
479 
480  temp[3] = (m_el[k][j] - m_el[j][k]) * s;
481  temp[j] = (m_el[j][i] + m_el[i][j]) * s;
482  temp[k] = (m_el[k][i] + m_el[i][k]) * s;
483  }
484  q.setValue(temp[0],temp[1],temp[2],temp[3]);
485 #endif
486  }
487 
492  void getEulerYPR(btScalar& yaw, btScalar& pitch, btScalar& roll) const
493  {
494 
495  // first use the normal calculus
496  yaw = btScalar(btAtan2(m_el[1].x(), m_el[0].x()));
497  pitch = btScalar(btAsin(-m_el[2].x()));
498  roll = btScalar(btAtan2(m_el[2].y(), m_el[2].z()));
499 
500  // on pitch = +/-HalfPI
501  if (btFabs(pitch)==SIMD_HALF_PI)
502  {
503  if (yaw>0)
504  yaw-=SIMD_PI;
505  else
506  yaw+=SIMD_PI;
507 
508  if (roll>0)
509  roll-=SIMD_PI;
510  else
511  roll+=SIMD_PI;
512  }
513  };
514 
515 
521  void getEulerZYX(btScalar& yaw, btScalar& pitch, btScalar& roll, unsigned int solution_number = 1) const
522  {
523  struct Euler
524  {
525  btScalar yaw;
526  btScalar pitch;
527  btScalar roll;
528  };
529 
530  Euler euler_out;
531  Euler euler_out2; //second solution
532  //get the pointer to the raw data
533 
534  // Check that pitch is not at a singularity
535  if (btFabs(m_el[2].x()) >= 1)
536  {
537  euler_out.yaw = 0;
538  euler_out2.yaw = 0;
539 
540  // From difference of angles formula
541  btScalar delta = btAtan2(m_el[0].x(),m_el[0].z());
542  if (m_el[2].x() > 0) //gimbal locked up
543  {
544  euler_out.pitch = SIMD_PI / btScalar(2.0);
545  euler_out2.pitch = SIMD_PI / btScalar(2.0);
546  euler_out.roll = euler_out.pitch + delta;
547  euler_out2.roll = euler_out.pitch + delta;
548  }
549  else // gimbal locked down
550  {
551  euler_out.pitch = -SIMD_PI / btScalar(2.0);
552  euler_out2.pitch = -SIMD_PI / btScalar(2.0);
553  euler_out.roll = -euler_out.pitch + delta;
554  euler_out2.roll = -euler_out.pitch + delta;
555  }
556  }
557  else
558  {
559  euler_out.pitch = - btAsin(m_el[2].x());
560  euler_out2.pitch = SIMD_PI - euler_out.pitch;
561 
562  euler_out.roll = btAtan2(m_el[2].y()/btCos(euler_out.pitch),
563  m_el[2].z()/btCos(euler_out.pitch));
564  euler_out2.roll = btAtan2(m_el[2].y()/btCos(euler_out2.pitch),
565  m_el[2].z()/btCos(euler_out2.pitch));
566 
567  euler_out.yaw = btAtan2(m_el[1].x()/btCos(euler_out.pitch),
568  m_el[0].x()/btCos(euler_out.pitch));
569  euler_out2.yaw = btAtan2(m_el[1].x()/btCos(euler_out2.pitch),
570  m_el[0].x()/btCos(euler_out2.pitch));
571  }
572 
573  if (solution_number == 1)
574  {
575  yaw = euler_out.yaw;
576  pitch = euler_out.pitch;
577  roll = euler_out.roll;
578  }
579  else
580  {
581  yaw = euler_out2.yaw;
582  pitch = euler_out2.pitch;
583  roll = euler_out2.roll;
584  }
585  }
586 
590  btMatrix3x3 scaled(const btVector3& s) const
591  {
592 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
593  return btMatrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
594 #else
595  return btMatrix3x3(
596  m_el[0].x() * s.x(), m_el[0].y() * s.y(), m_el[0].z() * s.z(),
597  m_el[1].x() * s.x(), m_el[1].y() * s.y(), m_el[1].z() * s.z(),
598  m_el[2].x() * s.x(), m_el[2].y() * s.y(), m_el[2].z() * s.z());
599 #endif
600  }
601 
603  btScalar determinant() const;
605  btMatrix3x3 adjoint() const;
607  btMatrix3x3 absolute() const;
609  btMatrix3x3 transpose() const;
611  btMatrix3x3 inverse() const;
612 
616  btVector3 solve33(const btVector3& b) const
617  {
618  btVector3 col1 = getColumn(0);
619  btVector3 col2 = getColumn(1);
620  btVector3 col3 = getColumn(2);
621 
622  btScalar det = btDot(col1, btCross(col2, col3));
623  if (btFabs(det)>SIMD_EPSILON)
624  {
625  det = 1.0f / det;
626  }
627  btVector3 x;
628  x[0] = det * btDot(b, btCross(col2, col3));
629  x[1] = det * btDot(col1, btCross(b, col3));
630  x[2] = det * btDot(col1, btCross(col2, b));
631  return x;
632  }
633 
634  btMatrix3x3 transposeTimes(const btMatrix3x3& m) const;
635  btMatrix3x3 timesTranspose(const btMatrix3x3& m) const;
636 
638  {
639  return m_el[0].x() * v.x() + m_el[1].x() * v.y() + m_el[2].x() * v.z();
640  }
642  {
643  return m_el[0].y() * v.x() + m_el[1].y() * v.y() + m_el[2].y() * v.z();
644  }
646  {
647  return m_el[0].z() * v.x() + m_el[1].z() * v.y() + m_el[2].z() * v.z();
648  }
649 
656  SIMD_FORCE_INLINE void extractRotation(btQuaternion &q,btScalar tolerance = 1.0e-9, int maxIter=100)
657  {
658  int iter =0;
659  btScalar w;
660  const btMatrix3x3& A=*this;
661  for(iter = 0; iter < maxIter; iter++)
662  {
663  btMatrix3x3 R(q);
664  btVector3 omega = (R.getColumn(0).cross(A.getColumn(0)) + R.getColumn(1).cross(A.getColumn(1))
665  + R.getColumn(2).cross(A.getColumn(2))
666  ) * (btScalar(1.0) / btFabs(R.getColumn(0).dot(A.getColumn(0)) + R.getColumn
667  (1).dot(A.getColumn(1)) + R.getColumn(2).dot(A.getColumn(2))) +
668  tolerance);
669  w = omega.norm();
670  if(w < tolerance)
671  break;
672  q = btQuaternion(btVector3((btScalar(1.0) / w) * omega),w) *
673  q;
674  q.normalize();
675  }
676  }
677 
678 
679 
680 
690  void diagonalize(btMatrix3x3& rot, btScalar threshold, int maxSteps)
691  {
692  rot.setIdentity();
693  for (int step = maxSteps; step > 0; step--)
694  {
695  // find off-diagonal element [p][q] with largest magnitude
696  int p = 0;
697  int q = 1;
698  int r = 2;
699  btScalar max = btFabs(m_el[0][1]);
700  btScalar v = btFabs(m_el[0][2]);
701  if (v > max)
702  {
703  q = 2;
704  r = 1;
705  max = v;
706  }
707  v = btFabs(m_el[1][2]);
708  if (v > max)
709  {
710  p = 1;
711  q = 2;
712  r = 0;
713  max = v;
714  }
715 
716  btScalar t = threshold * (btFabs(m_el[0][0]) + btFabs(m_el[1][1]) + btFabs(m_el[2][2]));
717  if (max <= t)
718  {
719  if (max <= SIMD_EPSILON * t)
720  {
721  return;
722  }
723  step = 1;
724  }
725 
726  // compute Jacobi rotation J which leads to a zero for element [p][q]
727  btScalar mpq = m_el[p][q];
728  btScalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq);
729  btScalar theta2 = theta * theta;
730  btScalar cos;
731  btScalar sin;
732  if (theta2 * theta2 < btScalar(10 / SIMD_EPSILON))
733  {
734  t = (theta >= 0) ? 1 / (theta + btSqrt(1 + theta2))
735  : 1 / (theta - btSqrt(1 + theta2));
736  cos = 1 / btSqrt(1 + t * t);
737  sin = cos * t;
738  }
739  else
740  {
741  // approximation for large theta-value, i.e., a nearly diagonal matrix
742  t = 1 / (theta * (2 + btScalar(0.5) / theta2));
743  cos = 1 - btScalar(0.5) * t * t;
744  sin = cos * t;
745  }
746 
747  // apply rotation to matrix (this = J^T * this * J)
748  m_el[p][q] = m_el[q][p] = 0;
749  m_el[p][p] -= t * mpq;
750  m_el[q][q] += t * mpq;
751  btScalar mrp = m_el[r][p];
752  btScalar mrq = m_el[r][q];
753  m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq;
754  m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp;
755 
756  // apply rotation to rot (rot = rot * J)
757  for (int i = 0; i < 3; i++)
758  {
759  btVector3& row = rot[i];
760  mrp = row[p];
761  mrq = row[q];
762  row[p] = cos * mrp - sin * mrq;
763  row[q] = cos * mrq + sin * mrp;
764  }
765  }
766  }
767 
768 
769 
777  btScalar cofac(int r1, int c1, int r2, int c2) const
778  {
779  return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
780  }
781 
782  void serialize(struct btMatrix3x3Data& dataOut) const;
783 
784  void serializeFloat(struct btMatrix3x3FloatData& dataOut) const;
785 
786  void deSerialize(const struct btMatrix3x3Data& dataIn);
787 
788  void deSerializeFloat(const struct btMatrix3x3FloatData& dataIn);
789 
790  void deSerializeDouble(const struct btMatrix3x3DoubleData& dataIn);
791 
792 };
793 
794 
797 {
798 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
799  __m128 rv00, rv01, rv02;
800  __m128 rv10, rv11, rv12;
801  __m128 rv20, rv21, rv22;
802  __m128 mv0, mv1, mv2;
803 
804  rv02 = m_el[0].mVec128;
805  rv12 = m_el[1].mVec128;
806  rv22 = m_el[2].mVec128;
807 
808  mv0 = _mm_and_ps(m[0].mVec128, btvFFF0fMask);
809  mv1 = _mm_and_ps(m[1].mVec128, btvFFF0fMask);
810  mv2 = _mm_and_ps(m[2].mVec128, btvFFF0fMask);
811 
812  // rv0
813  rv00 = bt_splat_ps(rv02, 0);
814  rv01 = bt_splat_ps(rv02, 1);
815  rv02 = bt_splat_ps(rv02, 2);
816 
817  rv00 = _mm_mul_ps(rv00, mv0);
818  rv01 = _mm_mul_ps(rv01, mv1);
819  rv02 = _mm_mul_ps(rv02, mv2);
820 
821  // rv1
822  rv10 = bt_splat_ps(rv12, 0);
823  rv11 = bt_splat_ps(rv12, 1);
824  rv12 = bt_splat_ps(rv12, 2);
825 
826  rv10 = _mm_mul_ps(rv10, mv0);
827  rv11 = _mm_mul_ps(rv11, mv1);
828  rv12 = _mm_mul_ps(rv12, mv2);
829 
830  // rv2
831  rv20 = bt_splat_ps(rv22, 0);
832  rv21 = bt_splat_ps(rv22, 1);
833  rv22 = bt_splat_ps(rv22, 2);
834 
835  rv20 = _mm_mul_ps(rv20, mv0);
836  rv21 = _mm_mul_ps(rv21, mv1);
837  rv22 = _mm_mul_ps(rv22, mv2);
838 
839  rv00 = _mm_add_ps(rv00, rv01);
840  rv10 = _mm_add_ps(rv10, rv11);
841  rv20 = _mm_add_ps(rv20, rv21);
842 
843  m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
844  m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
845  m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
846 
847 #elif defined(BT_USE_NEON)
848 
849  float32x4_t rv0, rv1, rv2;
850  float32x4_t v0, v1, v2;
851  float32x4_t mv0, mv1, mv2;
852 
853  v0 = m_el[0].mVec128;
854  v1 = m_el[1].mVec128;
855  v2 = m_el[2].mVec128;
856 
857  mv0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
858  mv1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
859  mv2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
860 
861  rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
862  rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
863  rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
864 
865  rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
866  rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
867  rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
868 
869  rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
870  rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
871  rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
872 
873  m_el[0].mVec128 = rv0;
874  m_el[1].mVec128 = rv1;
875  m_el[2].mVec128 = rv2;
876 #else
877  setValue(
878  m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
879  m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]),
880  m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2]));
881 #endif
882  return *this;
883 }
884 
887 {
888 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
889  m_el[0].mVec128 = m_el[0].mVec128 + m.m_el[0].mVec128;
890  m_el[1].mVec128 = m_el[1].mVec128 + m.m_el[1].mVec128;
891  m_el[2].mVec128 = m_el[2].mVec128 + m.m_el[2].mVec128;
892 #else
893  setValue(
894  m_el[0][0]+m.m_el[0][0],
895  m_el[0][1]+m.m_el[0][1],
896  m_el[0][2]+m.m_el[0][2],
897  m_el[1][0]+m.m_el[1][0],
898  m_el[1][1]+m.m_el[1][1],
899  m_el[1][2]+m.m_el[1][2],
900  m_el[2][0]+m.m_el[2][0],
901  m_el[2][1]+m.m_el[2][1],
902  m_el[2][2]+m.m_el[2][2]);
903 #endif
904  return *this;
905 }
906 
908 operator*(const btMatrix3x3& m, const btScalar & k)
909 {
910 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
911  __m128 vk = bt_splat_ps(_mm_load_ss((float *)&k), 0x80);
912  return btMatrix3x3(
913  _mm_mul_ps(m[0].mVec128, vk),
914  _mm_mul_ps(m[1].mVec128, vk),
915  _mm_mul_ps(m[2].mVec128, vk));
916 #elif defined(BT_USE_NEON)
917  return btMatrix3x3(
918  vmulq_n_f32(m[0].mVec128, k),
919  vmulq_n_f32(m[1].mVec128, k),
920  vmulq_n_f32(m[2].mVec128, k));
921 #else
922  return btMatrix3x3(
923  m[0].x()*k,m[0].y()*k,m[0].z()*k,
924  m[1].x()*k,m[1].y()*k,m[1].z()*k,
925  m[2].x()*k,m[2].y()*k,m[2].z()*k);
926 #endif
927 }
928 
930 operator+(const btMatrix3x3& m1, const btMatrix3x3& m2)
931 {
932 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
933  return btMatrix3x3(
934  m1[0].mVec128 + m2[0].mVec128,
935  m1[1].mVec128 + m2[1].mVec128,
936  m1[2].mVec128 + m2[2].mVec128);
937 #else
938  return btMatrix3x3(
939  m1[0][0]+m2[0][0],
940  m1[0][1]+m2[0][1],
941  m1[0][2]+m2[0][2],
942 
943  m1[1][0]+m2[1][0],
944  m1[1][1]+m2[1][1],
945  m1[1][2]+m2[1][2],
946 
947  m1[2][0]+m2[2][0],
948  m1[2][1]+m2[2][1],
949  m1[2][2]+m2[2][2]);
950 #endif
951 }
952 
954 operator-(const btMatrix3x3& m1, const btMatrix3x3& m2)
955 {
956 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
957  return btMatrix3x3(
958  m1[0].mVec128 - m2[0].mVec128,
959  m1[1].mVec128 - m2[1].mVec128,
960  m1[2].mVec128 - m2[2].mVec128);
961 #else
962  return btMatrix3x3(
963  m1[0][0]-m2[0][0],
964  m1[0][1]-m2[0][1],
965  m1[0][2]-m2[0][2],
966 
967  m1[1][0]-m2[1][0],
968  m1[1][1]-m2[1][1],
969  m1[1][2]-m2[1][2],
970 
971  m1[2][0]-m2[2][0],
972  m1[2][1]-m2[2][1],
973  m1[2][2]-m2[2][2]);
974 #endif
975 }
976 
977 
980 {
981 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
982  m_el[0].mVec128 = m_el[0].mVec128 - m.m_el[0].mVec128;
983  m_el[1].mVec128 = m_el[1].mVec128 - m.m_el[1].mVec128;
984  m_el[2].mVec128 = m_el[2].mVec128 - m.m_el[2].mVec128;
985 #else
986  setValue(
987  m_el[0][0]-m.m_el[0][0],
988  m_el[0][1]-m.m_el[0][1],
989  m_el[0][2]-m.m_el[0][2],
990  m_el[1][0]-m.m_el[1][0],
991  m_el[1][1]-m.m_el[1][1],
992  m_el[1][2]-m.m_el[1][2],
993  m_el[2][0]-m.m_el[2][0],
994  m_el[2][1]-m.m_el[2][1],
995  m_el[2][2]-m.m_el[2][2]);
996 #endif
997  return *this;
998 }
999 
1000 
1003 {
1004  return btTriple((*this)[0], (*this)[1], (*this)[2]);
1005 }
1006 
1007 
1010 {
1011 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1012  return btMatrix3x3(
1013  _mm_and_ps(m_el[0].mVec128, btvAbsfMask),
1014  _mm_and_ps(m_el[1].mVec128, btvAbsfMask),
1015  _mm_and_ps(m_el[2].mVec128, btvAbsfMask));
1016 #elif defined(BT_USE_NEON)
1017  return btMatrix3x3(
1018  (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, btv3AbsMask),
1019  (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, btv3AbsMask),
1020  (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, btv3AbsMask));
1021 #else
1022  return btMatrix3x3(
1023  btFabs(m_el[0].x()), btFabs(m_el[0].y()), btFabs(m_el[0].z()),
1024  btFabs(m_el[1].x()), btFabs(m_el[1].y()), btFabs(m_el[1].z()),
1025  btFabs(m_el[2].x()), btFabs(m_el[2].y()), btFabs(m_el[2].z()));
1026 #endif
1027 }
1028 
1031 {
1032 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1033  __m128 v0 = m_el[0].mVec128;
1034  __m128 v1 = m_el[1].mVec128;
1035  __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
1036  __m128 vT;
1037 
1038  v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
1039 
1040  vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
1041  v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
1042 
1043  v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) ); // y0 y1 y2 0
1044  v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) ); // x0 x1 x2 0
1045  v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
1046 
1047 
1048  return btMatrix3x3( v0, v1, v2 );
1049 #elif defined(BT_USE_NEON)
1050  // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
1051  static const uint32x2_t zMask = (const uint32x2_t) {static_cast<uint32_t>(-1), 0 };
1052  float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 ); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
1053  float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) ); // {x2 0 }, {y2 0}
1054  float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
1055  float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
1056  float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
1057  float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q ); // z0 z1 z2 0
1058  return btMatrix3x3( v0, v1, v2 );
1059 #else
1060  return btMatrix3x3( m_el[0].x(), m_el[1].x(), m_el[2].x(),
1061  m_el[0].y(), m_el[1].y(), m_el[2].y(),
1062  m_el[0].z(), m_el[1].z(), m_el[2].z());
1063 #endif
1064 }
1065 
1068 {
1069  return btMatrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
1070  cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
1071  cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
1072 }
1073 
1076 {
1077  btVector3 co(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
1078  btScalar det = (*this)[0].dot(co);
1079  //btFullAssert(det != btScalar(0.0));
1080  btAssert(det != btScalar(0.0));
1081  btScalar s = btScalar(1.0) / det;
1082  return btMatrix3x3(co.x() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
1083  co.y() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
1084  co.z() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
1085 }
1086 
1089 {
1090 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1091  // zeros w
1092 // static const __m128i xyzMask = (const __m128i){ -1ULL, 0xffffffffULL };
1093  __m128 row = m_el[0].mVec128;
1094  __m128 m0 = _mm_and_ps( m.getRow(0).mVec128, btvFFF0fMask );
1095  __m128 m1 = _mm_and_ps( m.getRow(1).mVec128, btvFFF0fMask);
1096  __m128 m2 = _mm_and_ps( m.getRow(2).mVec128, btvFFF0fMask );
1097  __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
1098  __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
1099  __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
1100  row = m_el[1].mVec128;
1101  r0 = _mm_add_ps( r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
1102  r1 = _mm_add_ps( r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
1103  r2 = _mm_add_ps( r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
1104  row = m_el[2].mVec128;
1105  r0 = _mm_add_ps( r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
1106  r1 = _mm_add_ps( r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
1107  r2 = _mm_add_ps( r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
1108  return btMatrix3x3( r0, r1, r2 );
1109 
1110 #elif defined BT_USE_NEON
1111  // zeros w
1112  static const uint32x4_t xyzMask = (const uint32x4_t){ static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 0 };
1113  float32x4_t m0 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(0).mVec128, xyzMask );
1114  float32x4_t m1 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(1).mVec128, xyzMask );
1115  float32x4_t m2 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(2).mVec128, xyzMask );
1116  float32x4_t row = m_el[0].mVec128;
1117  float32x4_t r0 = vmulq_lane_f32( m0, vget_low_f32(row), 0);
1118  float32x4_t r1 = vmulq_lane_f32( m0, vget_low_f32(row), 1);
1119  float32x4_t r2 = vmulq_lane_f32( m0, vget_high_f32(row), 0);
1120  row = m_el[1].mVec128;
1121  r0 = vmlaq_lane_f32( r0, m1, vget_low_f32(row), 0);
1122  r1 = vmlaq_lane_f32( r1, m1, vget_low_f32(row), 1);
1123  r2 = vmlaq_lane_f32( r2, m1, vget_high_f32(row), 0);
1124  row = m_el[2].mVec128;
1125  r0 = vmlaq_lane_f32( r0, m2, vget_low_f32(row), 0);
1126  r1 = vmlaq_lane_f32( r1, m2, vget_low_f32(row), 1);
1127  r2 = vmlaq_lane_f32( r2, m2, vget_high_f32(row), 0);
1128  return btMatrix3x3( r0, r1, r2 );
1129 #else
1130  return btMatrix3x3(
1131  m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
1132  m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
1133  m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
1134  m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
1135  m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
1136  m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
1137  m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
1138  m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
1139  m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z());
1140 #endif
1141 }
1142 
1145 {
1146 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1147  __m128 a0 = m_el[0].mVec128;
1148  __m128 a1 = m_el[1].mVec128;
1149  __m128 a2 = m_el[2].mVec128;
1150 
1151  btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1152  __m128 mx = mT[0].mVec128;
1153  __m128 my = mT[1].mVec128;
1154  __m128 mz = mT[2].mVec128;
1155 
1156  __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
1157  __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
1158  __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
1159  r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
1160  r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
1161  r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
1162  r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
1163  r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
1164  r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
1165  return btMatrix3x3( r0, r1, r2);
1166 
1167 #elif defined BT_USE_NEON
1168  float32x4_t a0 = m_el[0].mVec128;
1169  float32x4_t a1 = m_el[1].mVec128;
1170  float32x4_t a2 = m_el[2].mVec128;
1171 
1172  btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1173  float32x4_t mx = mT[0].mVec128;
1174  float32x4_t my = mT[1].mVec128;
1175  float32x4_t mz = mT[2].mVec128;
1176 
1177  float32x4_t r0 = vmulq_lane_f32( mx, vget_low_f32(a0), 0);
1178  float32x4_t r1 = vmulq_lane_f32( mx, vget_low_f32(a1), 0);
1179  float32x4_t r2 = vmulq_lane_f32( mx, vget_low_f32(a2), 0);
1180  r0 = vmlaq_lane_f32( r0, my, vget_low_f32(a0), 1);
1181  r1 = vmlaq_lane_f32( r1, my, vget_low_f32(a1), 1);
1182  r2 = vmlaq_lane_f32( r2, my, vget_low_f32(a2), 1);
1183  r0 = vmlaq_lane_f32( r0, mz, vget_high_f32(a0), 0);
1184  r1 = vmlaq_lane_f32( r1, mz, vget_high_f32(a1), 0);
1185  r2 = vmlaq_lane_f32( r2, mz, vget_high_f32(a2), 0);
1186  return btMatrix3x3( r0, r1, r2 );
1187 
1188 #else
1189  return btMatrix3x3(
1190  m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
1191  m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
1192  m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
1193 #endif
1194 }
1195 
1197 operator*(const btMatrix3x3& m, const btVector3& v)
1198 {
1199 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
1200  return v.dot3(m[0], m[1], m[2]);
1201 #else
1202  return btVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
1203 #endif
1204 }
1205 
1206 
1208 operator*(const btVector3& v, const btMatrix3x3& m)
1209 {
1210 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1211 
1212  const __m128 vv = v.mVec128;
1213 
1214  __m128 c0 = bt_splat_ps( vv, 0);
1215  __m128 c1 = bt_splat_ps( vv, 1);
1216  __m128 c2 = bt_splat_ps( vv, 2);
1217 
1218  c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, btvFFF0fMask) );
1219  c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, btvFFF0fMask) );
1220  c0 = _mm_add_ps(c0, c1);
1221  c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, btvFFF0fMask) );
1222 
1223  return btVector3(_mm_add_ps(c0, c2));
1224 #elif defined(BT_USE_NEON)
1225  const float32x4_t vv = v.mVec128;
1226  const float32x2_t vlo = vget_low_f32(vv);
1227  const float32x2_t vhi = vget_high_f32(vv);
1228 
1229  float32x4_t c0, c1, c2;
1230 
1231  c0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
1232  c1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
1233  c2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
1234 
1235  c0 = vmulq_lane_f32(c0, vlo, 0);
1236  c1 = vmulq_lane_f32(c1, vlo, 1);
1237  c2 = vmulq_lane_f32(c2, vhi, 0);
1238  c0 = vaddq_f32(c0, c1);
1239  c0 = vaddq_f32(c0, c2);
1240 
1241  return btVector3(c0);
1242 #else
1243  return btVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
1244 #endif
1245 }
1246 
1248 operator*(const btMatrix3x3& m1, const btMatrix3x3& m2)
1249 {
1250 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1251 
1252  __m128 m10 = m1[0].mVec128;
1253  __m128 m11 = m1[1].mVec128;
1254  __m128 m12 = m1[2].mVec128;
1255 
1256  __m128 m2v = _mm_and_ps(m2[0].mVec128, btvFFF0fMask);
1257 
1258  __m128 c0 = bt_splat_ps( m10, 0);
1259  __m128 c1 = bt_splat_ps( m11, 0);
1260  __m128 c2 = bt_splat_ps( m12, 0);
1261 
1262  c0 = _mm_mul_ps(c0, m2v);
1263  c1 = _mm_mul_ps(c1, m2v);
1264  c2 = _mm_mul_ps(c2, m2v);
1265 
1266  m2v = _mm_and_ps(m2[1].mVec128, btvFFF0fMask);
1267 
1268  __m128 c0_1 = bt_splat_ps( m10, 1);
1269  __m128 c1_1 = bt_splat_ps( m11, 1);
1270  __m128 c2_1 = bt_splat_ps( m12, 1);
1271 
1272  c0_1 = _mm_mul_ps(c0_1, m2v);
1273  c1_1 = _mm_mul_ps(c1_1, m2v);
1274  c2_1 = _mm_mul_ps(c2_1, m2v);
1275 
1276  m2v = _mm_and_ps(m2[2].mVec128, btvFFF0fMask);
1277 
1278  c0 = _mm_add_ps(c0, c0_1);
1279  c1 = _mm_add_ps(c1, c1_1);
1280  c2 = _mm_add_ps(c2, c2_1);
1281 
1282  m10 = bt_splat_ps( m10, 2);
1283  m11 = bt_splat_ps( m11, 2);
1284  m12 = bt_splat_ps( m12, 2);
1285 
1286  m10 = _mm_mul_ps(m10, m2v);
1287  m11 = _mm_mul_ps(m11, m2v);
1288  m12 = _mm_mul_ps(m12, m2v);
1289 
1290  c0 = _mm_add_ps(c0, m10);
1291  c1 = _mm_add_ps(c1, m11);
1292  c2 = _mm_add_ps(c2, m12);
1293 
1294  return btMatrix3x3(c0, c1, c2);
1295 
1296 #elif defined(BT_USE_NEON)
1297 
1298  float32x4_t rv0, rv1, rv2;
1299  float32x4_t v0, v1, v2;
1300  float32x4_t mv0, mv1, mv2;
1301 
1302  v0 = m1[0].mVec128;
1303  v1 = m1[1].mVec128;
1304  v2 = m1[2].mVec128;
1305 
1306  mv0 = (float32x4_t) vandq_s32((int32x4_t)m2[0].mVec128, btvFFF0Mask);
1307  mv1 = (float32x4_t) vandq_s32((int32x4_t)m2[1].mVec128, btvFFF0Mask);
1308  mv2 = (float32x4_t) vandq_s32((int32x4_t)m2[2].mVec128, btvFFF0Mask);
1309 
1310  rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
1311  rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
1312  rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
1313 
1314  rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
1315  rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
1316  rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
1317 
1318  rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
1319  rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
1320  rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
1321 
1322  return btMatrix3x3(rv0, rv1, rv2);
1323 
1324 #else
1325  return btMatrix3x3(
1326  m2.tdotx( m1[0]), m2.tdoty( m1[0]), m2.tdotz( m1[0]),
1327  m2.tdotx( m1[1]), m2.tdoty( m1[1]), m2.tdotz( m1[1]),
1328  m2.tdotx( m1[2]), m2.tdoty( m1[2]), m2.tdotz( m1[2]));
1329 #endif
1330 }
1331 
1332 /*
1333 SIMD_FORCE_INLINE btMatrix3x3 btMultTransposeLeft(const btMatrix3x3& m1, const btMatrix3x3& m2) {
1334 return btMatrix3x3(
1335 m1[0][0] * m2[0][0] + m1[1][0] * m2[1][0] + m1[2][0] * m2[2][0],
1336 m1[0][0] * m2[0][1] + m1[1][0] * m2[1][1] + m1[2][0] * m2[2][1],
1337 m1[0][0] * m2[0][2] + m1[1][0] * m2[1][2] + m1[2][0] * m2[2][2],
1338 m1[0][1] * m2[0][0] + m1[1][1] * m2[1][0] + m1[2][1] * m2[2][0],
1339 m1[0][1] * m2[0][1] + m1[1][1] * m2[1][1] + m1[2][1] * m2[2][1],
1340 m1[0][1] * m2[0][2] + m1[1][1] * m2[1][2] + m1[2][1] * m2[2][2],
1341 m1[0][2] * m2[0][0] + m1[1][2] * m2[1][0] + m1[2][2] * m2[2][0],
1342 m1[0][2] * m2[0][1] + m1[1][2] * m2[1][1] + m1[2][2] * m2[2][1],
1343 m1[0][2] * m2[0][2] + m1[1][2] * m2[1][2] + m1[2][2] * m2[2][2]);
1344 }
1345 */
1346 
1350 {
1351 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1352 
1353  __m128 c0, c1, c2;
1354 
1355  c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
1356  c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
1357  c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
1358 
1359  c0 = _mm_and_ps(c0, c1);
1360  c0 = _mm_and_ps(c0, c2);
1361 
1362  int m = _mm_movemask_ps((__m128)c0);
1363  return (0x7 == (m & 0x7));
1364 
1365 #else
1366  return
1367  ( m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
1368  m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
1369  m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2] );
1370 #endif
1371 }
1372 
1375 {
1377 };
1378 
1381 {
1383 };
1384 
1385 
1386 
1387 
1389 {
1390  for (int i=0;i<3;i++)
1391  m_el[i].serialize(dataOut.m_el[i]);
1392 }
1393 
1395 {
1396  for (int i=0;i<3;i++)
1397  m_el[i].serializeFloat(dataOut.m_el[i]);
1398 }
1399 
1400 
1402 {
1403  for (int i=0;i<3;i++)
1404  m_el[i].deSerialize(dataIn.m_el[i]);
1405 }
1406 
1408 {
1409  for (int i=0;i<3;i++)
1410  m_el[i].deSerializeFloat(dataIn.m_el[i]);
1411 }
1412 
1414 {
1415  for (int i=0;i<3;i++)
1416  m_el[i].deSerializeDouble(dataIn.m_el[i]);
1417 }
1418 
1419 #endif //BT_MATRIX3x3_H
1420 
btMatrix3x3 inverse() const
Return the inverse of the matrix.
Definition: btMatrix3x3.h:1075
void deSerializeFloat(const struct btMatrix3x3FloatData &dataIn)
Definition: btMatrix3x3.h:1407
#define SIMD_EPSILON
Definition: btScalar.h:521
for serialization
Definition: btMatrix3x3.h:1374
btVector3DoubleData m_el[3]
Definition: btMatrix3x3.h:1382
btScalar tdoty(const btVector3 &v) const
Definition: btMatrix3x3.h:641
bool operator==(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Equality operator between two matrices It will test all elements are equal.
Definition: btMatrix3x3.h:1349
void serialize(struct btMatrix3x3Data &dataOut) const
Definition: btMatrix3x3.h:1388
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Definition: btVector3.h:652
void setRotation(const btQuaternion &q)
Set the matrix from a quaternion.
Definition: btMatrix3x3.h:209
btScalar btSin(btScalar x)
Definition: btScalar.h:477
const btScalar & z() const
Return the z value.
Definition: btQuadWord.h:120
btScalar btSqrt(btScalar y)
Definition: btScalar.h:444
#define btAssert(x)
Definition: btScalar.h:131
unsigned int uint32_t
#define SIMD_FORCE_INLINE
Definition: btScalar.h:81
btMatrix3x3 transposeTimes(const btMatrix3x3 &m) const
Definition: btMatrix3x3.h:1088
const btScalar & y() const
Return the y value.
Definition: btQuadWord.h:118
btVector3 getColumn(int i) const
Get a column of the matrix as a vector.
Definition: btMatrix3x3.h:134
const btVector3 & getRow(int i) const
Get a row of the matrix as a vector.
Definition: btMatrix3x3.h:142
btMatrix3x3 operator+(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Definition: btMatrix3x3.h:930
btMatrix3x3 & operator=(const btMatrix3x3 &other)
Assignment Operator.
Definition: btMatrix3x3.h:122
btQuaternion inverse(const btQuaternion &q)
Return the inverse of a quaternion.
Definition: btQuaternion.h:920
#define btFullAssert(x)
Definition: btScalar.h:134
const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:122
#define SIMD_HALF_PI
Definition: btScalar.h:506
btVector3 m_el[3]
Data storage for the matrix, each vector is a row of the matrix.
Definition: btMatrix3x3.h:51
btScalar dot(const btVector3 &v) const
Return the dot product.
Definition: btVector3.h:235
const btScalar & x() const
Return the x value.
Definition: btVector3.h:587
btMatrix3x3(const btQuaternion &q)
Constructor from Quaternion.
Definition: btMatrix3x3.h:60
btVector3 btCross(const btVector3 &v1, const btVector3 &v2)
Return the cross product of two vectors.
Definition: btVector3.h:933
btScalar tdotx(const btVector3 &v) const
Definition: btMatrix3x3.h:637
btScalar tdotz(const btVector3 &v) const
Definition: btMatrix3x3.h:645
void deSerialize(const struct btMatrix3x3Data &dataIn)
Definition: btMatrix3x3.h:1401
#define SIMD_PI
Definition: btScalar.h:504
void getRotation(btQuaternion &q) const
Get the matrix represented as a quaternion.
Definition: btMatrix3x3.h:400
btMatrix3x3 absolute() const
Return the matrix with all values non negative.
Definition: btMatrix3x3.h:1009
void diagonalize(btMatrix3x3 &rot, btScalar threshold, int maxSteps)
diagonalizes this matrix by the Jacobi method.
Definition: btMatrix3x3.h:690
btMatrix3x3 scaled(const btVector3 &s) const
Create a scaled copy of the matrix.
Definition: btMatrix3x3.h:590
#define btMatrix3x3Data
Definition: btMatrix3x3.h:42
btQuaternion & normalize()
Normalize the quaternion Such that x^2 + y^2 + z^2 +w^2 = 1.
Definition: btQuaternion.h:387
void deSerializeDouble(const struct btMatrix3x3DoubleData &dataIn)
Definition: btMatrix3x3.h:1413
btMatrix3x3 & operator*=(const btMatrix3x3 &m)
Multiply by the target matrix on the right.
Definition: btMatrix3x3.h:796
btScalar btAtan2(btScalar x, btScalar y)
Definition: btScalar.h:496
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Set x,y,z and zero w.
Definition: btQuadWord.h:152
btVector3 cross(const btVector3 &v) const
Return the cross product between this and another vector.
Definition: btVector3.h:389
const btVector3 & operator[](int i) const
Get a const reference to a row of the matrix as a vector.
Definition: btMatrix3x3.h:158
btMatrix3x3 operator*(const btMatrix3x3 &m, const btScalar &k)
Definition: btMatrix3x3.h:908
void setValue(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Set the values of the matrix explicitly (row major)
Definition: btMatrix3x3.h:198
btVector3 solve33(const btVector3 &b) const
Solve A * x = b, where b is a column vector.
Definition: btMatrix3x3.h:616
btMatrix3x3(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Constructor with row major formatting.
Definition: btMatrix3x3.h:69
btScalar length2() const
Return the length squared of the quaternion.
Definition: btQuaternion.h:366
btScalar norm() const
Return the norm (length) of the vector.
Definition: btVector3.h:269
const btScalar & y() const
Return the y value.
Definition: btVector3.h:589
void getOpenGLSubMatrix(btScalar *m) const
Fill the rotational part of an OpenGL matrix and clear the shear/perspective.
Definition: btMatrix3x3.h:347
btVector3 can be used to represent 3D points and vectors.
Definition: btVector3.h:83
#define ATTRIBUTE_ALIGNED16(a)
Definition: btScalar.h:82
btMatrix3x3 & operator-=(const btMatrix3x3 &m)
Substractss by the target matrix on the right.
Definition: btMatrix3x3.h:979
btMatrix3x3 adjoint() const
Return the adjoint of the matrix.
Definition: btMatrix3x3.h:1067
void serializeFloat(struct btMatrix3x3FloatData &dataOut) const
Definition: btMatrix3x3.h:1394
void setEulerYPR(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the matrix from euler angles using YPR around YXZ respectively.
Definition: btMatrix3x3.h:284
btMatrix3x3 & operator+=(const btMatrix3x3 &m)
Adds by the target matrix on the right.
Definition: btMatrix3x3.h:886
btMatrix3x3 operator-(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Definition: btMatrix3x3.h:954
void getEulerYPR(btScalar &yaw, btScalar &pitch, btScalar &roll) const
Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR.
Definition: btMatrix3x3.h:492
btMatrix3x3()
No initializaion constructor.
Definition: btMatrix3x3.h:55
btMatrix3x3 transpose() const
Return the transpose of the matrix.
Definition: btMatrix3x3.h:1030
btVector3 dot3(const btVector3 &v0, const btVector3 &v1, const btVector3 &v2) const
Definition: btVector3.h:733
btVector3 & operator[](int i)
Get a mutable reference to a row of the matrix as a vector.
Definition: btMatrix3x3.h:150
const btScalar & x() const
Return the x value.
Definition: btQuadWord.h:116
The btMatrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with...
Definition: btMatrix3x3.h:48
btScalar dot(const btQuaternion &q1, const btQuaternion &q2)
Calculate the dot product between two quaternions.
Definition: btQuaternion.h:898
btMatrix3x3(const btMatrix3x3 &other)
Copy constructor.
Definition: btMatrix3x3.h:114
btMatrix3x3 timesTranspose(const btMatrix3x3 &m) const
Definition: btMatrix3x3.h:1144
for serialization
Definition: btMatrix3x3.h:1380
The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatr...
Definition: btQuaternion.h:55
void setFromOpenGLSubMatrix(const btScalar *m)
Set from the rotational part of a 4x4 OpenGL matrix.
Definition: btMatrix3x3.h:181
btScalar btAsin(btScalar x)
Definition: btScalar.h:487
btScalar btDot(const btVector3 &v1, const btVector3 &v2)
Return the dot product between two vectors.
Definition: btVector3.h:903
btScalar cofac(int r1, int c1, int r2, int c2) const
Calculate the matrix cofactor.
Definition: btMatrix3x3.h:777
btScalar btTriple(const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
Definition: btVector3.h:939
void getEulerZYX(btScalar &yaw, btScalar &pitch, btScalar &roll, unsigned int solution_number=1) const
Get the matrix represented as euler angles around ZYX.
Definition: btMatrix3x3.h:521
btScalar determinant() const
Return the determinant of the matrix.
Definition: btMatrix3x3.h:1002
void setIdentity()
Set the matrix to the identity.
Definition: btMatrix3x3.h:317
static const btMatrix3x3 & getIdentity()
Definition: btMatrix3x3.h:330
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
Definition: btScalar.h:292
void extractRotation(btQuaternion &q, btScalar tolerance=1.0e-9, int maxIter=100)
extractRotation is from "A robust method to extract the rotational part of deformations" See http://d...
Definition: btMatrix3x3.h:656
btScalar btCos(btScalar x)
Definition: btScalar.h:476
btVector3FloatData m_el[3]
Definition: btMatrix3x3.h:1376
btScalar btFabs(btScalar x)
Definition: btScalar.h:475
const btScalar & z() const
Return the z value.
Definition: btVector3.h:591
void setEulerZYX(btScalar eulerX, btScalar eulerY, btScalar eulerZ)
Set the matrix from euler angles YPR around ZYX axes.
Definition: btMatrix3x3.h:298