MathGL++: GLMatrix.cpp Source File

00001 /***************************************************************************
00002  *   Copyright (C) 2004 by Jacques Gasselin                                *
00003  *   jacquesgasselin@hotmail.com                                           *
00004  *                                                                         *
00005  *   This program is free software; you can redistribute it and/or modify  *
00006  *   it under the terms of the GNU Library General Public License as       *
00007  *   published by the Free Software Foundation; either version 2 of the    *
00008  *   License, or (at your option) any later version.                       *
00009  *                                                                         *
00010  *   This program is distributed in the hope that it will be useful,       *
00011  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00012  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
00013  *   GNU General Public License for more details.                          *
00014  *                                                                         *
00015  *   You should have received a copy of the GNU Library General Public     *
00016  *   License along with this program; if not, write to the                 *
00017  *   Free Software Foundation, Inc.,                                       *
00018  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
00019  ***************************************************************************/
00020 
00021 #include "GLMatrix.h"
00022 
00023 #if defined(GCC_USE_SSE)
00024 typedef int v4sf __attribute__ ((mode(V4SF)));
00025 #endif
00026 
00027 
00028 template<>
00029 mathglpp::GLMatrix<GLdouble> mathglpp::GLMatrix<GLdouble>::glTranslate(const GLdouble _x, const GLdouble _y, const GLdouble _z)
00030 {
00031     mathglpp::GLMatrix<GLdouble> ret;
00032     ret.m0 = 1; ret.m4 = 0; ret.m8 = 0;  ret.m12 = _x;
00033     ret.m1 = 0; ret.m5 = 1; ret.m9 = 0;  ret.m13 = _y;
00034     ret.m2 = 0; ret.m6 = 0; ret.m10 = 1; ret.m14 = _z;
00035     ret.m3 = 0; ret.m7 = 0; ret.m11 = 0; ret.m15 = 1;
00036     return ret;
00037 }
00038 
00039 template<>
00040 mathglpp::GLMatrix<GLfloat> mathglpp::GLMatrix<GLfloat>::glTranslate(const GLfloat _x, const GLfloat _y, const GLfloat _z)
00041 {
00042     mathglpp::GLMatrix<GLfloat> ret;
00043     ret.m0 = 1; ret.m4 = 0; ret.m8 = 0;  ret.m12 = _x;
00044     ret.m1 = 0; ret.m5 = 1; ret.m9 = 0;  ret.m13 = _y;
00045     ret.m2 = 0; ret.m6 = 0; ret.m10 = 1; ret.m14 = _z;
00046     ret.m3 = 0; ret.m7 = 0; ret.m11 = 0; ret.m15 = 1;
00047     return ret;
00048 }
00049 
00050 template<>
00051 mathglpp::GLMatrix<GLdouble> mathglpp::GLMatrix<GLdouble>::glRotate(const GLdouble angle, GLdouble x, GLdouble y, GLdouble z)
00052 {
00053     mathglpp::GLMatrix<GLdouble> ret;
00054     GLdouble mag = sqrt(x*x + y*y + z*z);
00055     x /= mag;
00056     y /= mag;
00057     z /= mag;
00058     const GLdouble c = cos(angle*M_PI/180);
00059     const GLdouble s = sin(angle*M_PI/180);
00060     ret.m0 = x*x*(1-c)+c;
00061     ret.m1 = y*x*(1-c)+z*s;
00062     ret.m2 = z*x*(1-c)-y*s;
00063     ret.m3 = 0;
00064 
00065     ret.m4 = x*y*(1-c)-z*s;
00066     ret.m5 = y*y*(1-c)+c;
00067     ret.m6 = z*y*(1-c)+x*s;
00068     ret.m7 = 0;
00069 
00070     ret.m8 = x*z*(1-c)+y*s;
00071     ret.m9 = y*z*(1-c)-x*s;
00072     ret.m10 = z*z*(1-c)+c;
00073     ret.m11 = 0;
00074 
00075     ret.m12 = 0;
00076     ret.m13 = 0;
00077     ret.m14 = 0;
00078     ret.m15 = 1;
00079 
00080     return ret;
00081 }
00082 
00083 template<>
00084 mathglpp::GLMatrix<GLfloat> mathglpp::GLMatrix<GLfloat>::glRotate(const GLfloat angle, GLfloat x, GLfloat y, GLfloat z)
00085 {
00086     mathglpp::GLMatrix<GLfloat> ret;
00087     GLfloat mag = sqrtf(x*x + y*y + z*z);
00088     x/=mag;
00089     y/=mag;
00090     z/=mag;
00091     const GLfloat c = cosf(angle*float(M_PI/180));
00092     const GLfloat s = sinf(angle*float(M_PI/180));
00093     ret.m0 = x*x*(1-c)+c;
00094     ret.m1 = y*x*(1-c)+z*s;
00095     ret.m2 = z*x*(1-c)-y*s;
00096     ret.m3 = 0;
00097 
00098     ret.m4 = x*y*(1-c)-z*s;
00099     ret.m5 = y*y*(1-c)+c;
00100     ret.m6 = z*y*(1-c)+x*s;
00101     ret.m7 = 0;
00102 
00103     ret.m8 = x*z*(1-c)+y*s;
00104     ret.m9 = y*z*(1-c)-x*s;
00105     ret.m10 = z*z*(1-c)+c;
00106     ret.m11 = 0;
00107 
00108     ret.m12 = 0;
00109     ret.m13 = 0;
00110     ret.m14 = 0;
00111     ret.m15 = 1;
00112 
00113     return ret;
00114 }
00115 
00116 
00117 template<>
00118 void mathglpp::GLMatrix<GLfloat>::glVertex3v(const int num, const GLfloat* const v_arr)
00119 {
00120     #if defined(GCC_USE_SSE)
00121     __builtin_prefetch(v_arr);
00122 
00123     v4sf r0,r1,r2;
00124     v4sf m_col0,m_col1,m_col2,m_col3;
00125 
00126     m_col0 = __builtin_ia32_loadaps(m);
00127     m_col1 = __builtin_ia32_loadaps(m+4);
00128     m_col2 = __builtin_ia32_loadaps(m+8);
00129     m_col3 = __builtin_ia32_loadaps(m+12);
00130 
00131     for(register int k = 0; k < num; ++k)
00132     {
00133         //load x,y,z
00134         r0 = __builtin_ia32_loadss(v_arr);
00135         r1 = __builtin_ia32_loadss(v_arr+1);
00136         r2 = __builtin_ia32_loadss(v_arr+2);
00137         //extend into all 4 single floats
00138         r0 = __builtin_ia32_shufps(r0,r0,0x00);
00139         r1 = __builtin_ia32_shufps(r1,r1,0x00);
00140         r2 = __builtin_ia32_shufps(r2,r2,0x00);
00141 
00142         //do the mults
00143         r0 = __builtin_ia32_mulps(r0,m_col0);
00144         v_arr+=3;
00145         r1 = __builtin_ia32_mulps(r1,m_col1);
00146         __builtin_prefetch(v_arr);
00147         //add it all up and, voila
00148         r2 = __builtin_ia32_mulps(r2,m_col2);
00149         r0 = __builtin_ia32_addps(r0,r1);
00150         r2 = __builtin_ia32_addps(r2,m_col3);
00151         r0 = __builtin_ia32_addps(r0,r2);
00152 
00153         ::glVertex4fv((GLfloat*)&r0);
00154     }
00155 
00156     #else
00157     register GLfloat ret[3];
00158     register GLfloat recip;
00159     for(register int k = 0; k < num; ++k)
00160     {
00161         ret[0] = v_arr[k*3]*m0 + v_arr[1+k*3]*m4 + v_arr[2+k*3]*m8 + m12;
00162         ret[1] = v_arr[k*3]*m1 + v_arr[1+k*3]*m5 + v_arr[2+k*3]*m9 + m13;
00163         ret[2] = v_arr[k*3]*m2 + v_arr[1+k*3]*m6 + v_arr[2+k*3]*m10 + m14;
00164 
00165         recip = 1/(v_arr[k*3]*m3 + v_arr[1+k*3]*m7 + v_arr[2+k*3]*m11 + m15);
00166 
00167         ret[0] *= recip;
00168         ret[1] *= recip;
00169         ret[2] *= recip;
00170         ::glVertex3fv(ret);
00171     }
00172     #endif
00173 }
00174 
00175 template<>
00176 void mathglpp::GLMatrix<GLdouble>::glVertex3v(const int num, const GLdouble* const v_arr)
00177 {
00178     register GLdouble ret[3];
00179     register GLdouble recip;
00180 
00181     for(register int k = 0; k < num; ++k)
00182     {
00183         ret[0] = v_arr[k*3]*m0 + v_arr[1+k*3]*m4 + v_arr[2+k*3]*m8 + m12;
00184         ret[1] = v_arr[k*3]*m1 + v_arr[1+k*3]*m5 + v_arr[2+k*3]*m9 + m13;
00185         ret[2] = v_arr[k*3]*m2 + v_arr[1+k*3]*m6 + v_arr[2+k*3]*m10 + m14;
00186 
00187         recip = 1/(v_arr[k*3]*m[3] + v_arr[1+k*3]*m[7] + v_arr[2+k*3]*m[11] + m[15]);
00188 
00189         ret[0] *= recip;
00190         ret[1] *= recip;
00191         ret[2] *= recip;
00192         ::glVertex3dv(ret);
00193     };
00194 }
00195 
00196 template<>
00197 void mathglpp::GLMatrix<GLfloat>::glVertex4v(const int num, const GLfloat* const v_arr)
00198 {
00199     #ifdef GCC_USE_SSE
00200     __builtin_prefetch(m);
00201 
00202     v4sf r0,r1,r2,r3;
00203     v4sf m_col0,m_col1,m_col2,m_col3;
00204 
00205     __builtin_prefetch(v_arr);
00206 
00207     m_col0 = __builtin_ia32_loadaps(m);
00208     m_col1 = __builtin_ia32_loadaps(m+4);
00209     m_col2 = __builtin_ia32_loadaps(m+8);
00210     m_col3 = __builtin_ia32_loadaps(m+12);
00211 
00212     for(register int k = 0; k < num; ++k)
00213     {
00214         //load x,y,z,w into v
00215         r0 = __builtin_ia32_loadss(v_arr);
00216         r1 = __builtin_ia32_loadss(v_arr+1);
00217         r2 = __builtin_ia32_loadss(v_arr+2);
00218         r3 = __builtin_ia32_loadss(v_arr+3);
00219         r0 = __builtin_ia32_shufps(r0,r0,0x00);
00220         r1 = __builtin_ia32_shufps(r1,r1,0x00);
00221         r2 = __builtin_ia32_shufps(r2,r2,0x00);
00222         r3 = __builtin_ia32_shufps(r3,r3,0x00);
00223 
00224         r0 = __builtin_ia32_mulps(m_col0,r0);
00225         v_arr+=4;
00226         r1 = __builtin_ia32_mulps(m_col1,r1);
00227         __builtin_prefetch(v_arr);
00228         r2 = __builtin_ia32_mulps(m_col2,r2);
00229         r3 = __builtin_ia32_mulps(m_col3,r3);
00230         r0 = __builtin_ia32_addps(r0,r1);
00231         r2 = __builtin_ia32_addps(r2,r3);
00232         r0 = __builtin_ia32_addps(r0,r2);
00233 
00234         ::glVertex4fv((GLfloat*)&r0);
00235     }
00236 
00237     #else
00238     register GLfloat ret[4];
00239     for(register int k = 0; k < num; ++k)
00240     {
00241         for(register unsigned j = 0; j < 4; ++j)
00242         {
00243             ret[j] = 0;
00244             for(register unsigned i = 0; i < 4; ++i)
00245                 ret[j] += v_arr[i+k*4]*m[j+i*4];
00246         }
00247         ::glVertex4fv(ret);
00248     }
00249     #endif
00250 }
00251 
00252 template<>
00253 void mathglpp::GLMatrix<GLdouble>::glVertex4v(int num, const GLdouble* v_arr)
00254 {
00255     register GLdouble ret[4];
00256     for(register int k = 0; k < num; ++k)
00257     {
00258         for(register unsigned j = 0; j < 4; ++j)
00259         {
00260             ret[j] = 0;
00261             for(register unsigned i = 0; i < 4; ++i)
00262                 ret[j] += v_arr[i+k*4]*m[j+i*4];
00263         };
00264         ::glVertex4dv(ret);
00265     };
00266 }
00267 
00268