head	3.25;
access;
symbols
	merge-1:3.21.2.5
	autoconf:3.21.0.4
	experimental-1:3.21.0.2
	mesa-3-1-with-kw3:3.14
	mesa-3-1-prior-to-kw3:3.13;
locks; strict;
comment	@ * @;


3.25
date	99.07.31.12.28.46;	author miklos;	state Exp;
branches;
next	3.24;

3.24
date	99.07.15.12.35.26;	author tjump;	state Exp;
branches;
next	3.23;

3.23
date	99.07.12.12.05.25;	author keithw;	state Exp;
branches;
next	3.22;

3.22
date	99.06.09.16.30.33;	author holger;	state Exp;
branches;
next	3.21;

3.21
date	99.04.10.03.57.36;	author joshv;	state Exp;
branches
	3.21.2.1;
next	3.20;

3.20
date	99.04.07.00.41.14;	author brianp;	state Exp;
branches;
next	3.19;

3.19
date	99.04.06.00.56.07;	author brianp;	state Exp;
branches;
next	3.18;

3.18
date	99.03.31.20.18.41;	author keithw;	state Exp;
branches;
next	3.17;

3.17
date	99.03.31.04.39.59;	author joshv;	state Exp;
branches;
next	3.16;

3.16
date	99.03.31.02.07.52;	author brianp;	state Exp;
branches;
next	3.15;

3.15
date	99.03.17.12.08.23;	author keithw;	state Exp;
branches;
next	3.14;

3.14
date	99.02.25.14.12.32;	author keithw;	state Exp;
branches;
next	3.13;

3.13
date	99.02.24.22.48.08;	author jens;	state Exp;
branches;
next	3.12;

3.12
date	99.02.14.03.46.34;	author brianp;	state Exp;
branches;
next	3.11;

3.11
date	98.11.08.22.37.53;	author brianp;	state Exp;
branches;
next	3.10;

3.10
date	98.11.07.14.21.55;	author brianp;	state Exp;
branches;
next	3.9;

3.9
date	98.11.07.13.40.07;	author brianp;	state Exp;
branches;
next	3.8;

3.8
date	98.11.01.20.20.29;	author brianp;	state Exp;
branches;
next	3.7;

3.7
date	98.10.29.04.10.21;	author brianp;	state Exp;
branches;
next	3.6;

3.6
date	98.10.29.03.57.11;	author brianp;	state Exp;
branches;
next	3.5;

3.5
date	98.10.29.02.28.13;	author brianp;	state Exp;
branches;
next	3.4;

3.4
date	98.09.25.03.12.27;	author brianp;	state Exp;
branches;
next	3.3;

3.3
date	98.08.20.04.15.48;	author brianp;	state Exp;
branches;
next	3.2;

3.2
date	98.04.18.05.00.56;	author brianp;	state Exp;
branches;
next	3.1;

3.1
date	98.02.01.16.37.19;	author brianp;	state Exp;
branches;
next	3.0;

3.0
date	98.01.31.21.08.31;	author brianp;	state Exp;
branches;
next	;

3.21.2.1
date	99.05.21.21.29.28;	author keithw;	state Exp;
branches;
next	3.21.2.2;

3.21.2.2
date	99.06.02.19.48.27;	author holger;	state Exp;
branches;
next	3.21.2.3;

3.21.2.3
date	99.06.03.01.44.16;	author holger;	state Exp;
branches;
next	3.21.2.4;

3.21.2.4
date	99.06.05.22.04.40;	author holger;	state Exp;
branches;
next	3.21.2.5;

3.21.2.5
date	99.06.06.22.35.55;	author keithw;	state Exp;
branches;
next	;


desc
@point and vector transformation
@


3.25
log
@reaadded init_copy_raw,init_copy_masked.
@
text
@/* $Id: xform.c,v 3.24 1999/07/15 12:35:26 tjump Exp $ */

/*
 * Mesa 3-D graphics library
 * Version:  3.1
 *
 * Copyright (C) 1999  Brian Paul   All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */


/*
 * Matrix/vertex/vector transformation stuff
 *
 *
 * NOTES:
 * 1. 4x4 transformation matrices are stored in memory in column major order.
 * 2. Points/vertices are to be thought of as column vectors.
 * 3. Transformation of a point p by a matrix M is: p' = M * p
 */


#ifdef PC_HEADER
#include "all.h"
#else
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "vb.h"
#include "context.h"
#include "mmath.h"
#include "types.h"
#include "xform.h"
#ifdef XFree86Server
#include "GL/xf86glx.h"
#endif
#endif

#ifdef DEBUG
#include "debug_xform.h"
#endif

#ifdef USE_X86_ASM
#include "X86/common_x86asm.h"
#endif

clip_func gl_clip_tab[5];
dotprod_func gl_dotprod_tab[2][5];
vec_copy_func gl_copy_tab[2][0x10];
normal_func gl_normal_tab[0xf][0x4];
transform_func **(gl_transform_tab[2]);
static transform_func *cull_transform_tab[5];
static transform_func *raw_transform_tab[5];


/* Raw data format used for:
 *    - Object-to-eye transform prior to culling, although this too
 *      could be culled under some circumstances.
 *    - Eye-to-clip transform (via the function above).
 *    - Cliptesting
 *    - And everything else too, if culling happens to be disabled.
 */
#define TAG(x) x##_raw
#define TAG2(x,y) x##y##_raw
#define IDX 0
#define STRIDE_LOOP for (i=0;i<count;i++, STRIDE_F(from, stride))
#define LOOP for (i=0;i<n;i++)
#define CULL_CHECK
#define CLIP_CHECK
#define ARGS
#include "xform_tmp.h"
#include "clip_tmp.h"
#include "norm_tmp.h"
#include "dotprod_tmp.h"
#include "copy_tmp.h"
#undef TAG
#undef TAG2
#undef LOOP
#undef CULL_CHECK
#undef CLIP_CHECK
#undef ARGS
#undef IDX

/* Culled data used for:
 *    - texture transformations
 *    - viewport map transformation
 *    - normal transformations prior to lighting
 *    - user cliptests
 */
#define TAG(x) x##_masked
#define TAG2(x,y) x##y##_masked
#define IDX CULL_MASK_ACTIVE
#define STRIDE_LOOP for (i=0;i<count;i++, STRIDE_F(from, stride))
#define LOOP for (i=0;i<n;i++)
#define CULL_CHECK if (mask[i])
#define CLIP_CHECK if ((mask[i] & flag) == 0)
#define ARGS , const GLubyte mask[]
#include "xform_tmp.h"
#include "norm_tmp.h"
#include "dotprod_tmp.h"
#include "copy_tmp.h"
#undef TAG
#undef TAG2
#undef LOOP
#undef CULL_CHECK
#undef CLIP_CHECK
#undef ARGS
#undef IDX




#if 0

#define TAG(x) x##_raw_compacted
#define TAG2(x,y) x##y##_raw_compacted
#define IDX COMPACTED_NORMALS
#define STRIDE_LOOP for (i=0;i<count;i++, STRIDE_F(from, stride))
#define LOOP for (i=0;i<n;i++)
#define CHECK if (flag[i] & VERT_NORM)
#define ARGS
#include "norm_tmp.h"
#undef TAG
#undef TAG2
#undef LOOP
#undef CHECK
#undef ARGS
#undef IDX


#define TAG(x) x##_masked
#define TAG2(x,y) x##y##_masked
#define IDX CULL_MASK_ACTIVE|COMPACTED_NORMALS
#define DUPLICATE_FUNCTIONS
#include "norm_tmp.h"
#undef TAG
#undef TAG2
#undef LOOP
#undef CHECK
#undef ARGS
#undef IDX

#endif


GLvector4f *gl_project_points( GLvector4f *proj_vec,
			       const GLvector4f *clip_vec )
{
   const GLuint stride = clip_vec->stride;
   const GLfloat *from = (GLfloat *)clip_vec->start;
   const GLuint count = clip_vec->count;
   GLfloat (*vProj)[4] = (GLfloat (*)[4])proj_vec->start;
   GLuint i;

   for (i = 0 ; i < count ; i++, STRIDE_F(from, stride))
   {
	 GLfloat oow = 1.0 / from[3];
	 vProj[i][3] = oow;
	 vProj[i][0] = from[0] * oow;
	 vProj[i][1] = from[1] * oow;
	 vProj[i][2] = from[2] * oow;
   }

   proj_vec->flags |= VEC_SIZE_4;
   proj_vec->size = 3;
   proj_vec->count = clip_vec->count;
   return proj_vec;
}



/*
 * This is called only once.  It initializes several tables with pointers
 * to optimized transformation functions.  This is where we can test for
 * AMD 3Dnow! capability, Intel Katmai, etc. and hook in the right code.
 */
void gl_init_transformation( void )
{
   gl_transform_tab[0] = raw_transform_tab;
   gl_transform_tab[1] = cull_transform_tab;

   init_c_transformations_raw();
   init_c_transformations_masked();
   init_c_norm_transform_raw();
   init_c_norm_transform_masked();
   init_c_cliptest_raw();
   init_copy0_raw();
   init_copy0_masked();
   init_dotprod_raw();
   init_dotprod_masked();

#ifdef DEBUG
   gl_test_all_transform_functions ("default");
   gl_test_all_normal_transform_functions ("default");
#endif

#ifdef USE_X86_ASM
   gl_init_all_x86_asm ();
#endif
}



/*
 * Transform a 4-element row vector (1x4 matrix) by a 4x4 matrix.  This
 * function is used for transforming clipping plane equations and spotlight
 * directions.
 * Mathematically,  u = v * m.
 * Input:  v - input vector
 *         m - transformation matrix
 * Output:  u - transformed vector
 */
void gl_transform_vector( GLfloat u[4], const GLfloat v[4], const GLfloat m[16] )
{
   GLfloat v0=v[0], v1=v[1], v2=v[2], v3=v[3];
#define M(row,col)  m[row + col*4]
   u[0] = v0 * M(0,0) + v1 * M(1,0) + v2 * M(2,0) + v3 * M(3,0);
   u[1] = v0 * M(0,1) + v1 * M(1,1) + v2 * M(2,1) + v3 * M(3,1);
   u[2] = v0 * M(0,2) + v1 * M(1,2) + v2 * M(2,2) + v3 * M(3,2);
   u[3] = v0 * M(0,3) + v1 * M(1,3) + v2 * M(2,3) + v3 * M(3,3);
#undef M
}


/* Useful for one-off point transformations, as in clipping.
 * Note that because the matrix isn't analyzed we do too many
 * multiplies, and that the result is always 4-clean.
 */
void gl_transform_point_sz( GLfloat Q[4], const GLfloat M[16],
			    const GLfloat P[4], GLuint sz )
{
   if (Q == P)
      return;

   if (sz == 4)
   {
      Q[0] = M[0] * P[0] + M[4] * P[1] + M[8] *  P[2] + M[12] * P[3];
      Q[1] = M[1] * P[0] + M[5] * P[1] + M[9] *  P[2] + M[13] * P[3];
      Q[2] = M[2] * P[0] + M[6] * P[1] + M[10] * P[2] + M[14] * P[3];
      Q[3] = M[3] * P[0] + M[7] * P[1] + M[11] * P[2] + M[15] * P[3];
   }
   else if (sz == 3)
   {
      Q[0] = M[0] * P[0] + M[4] * P[1] + M[8] *  P[2] + M[12];
      Q[1] = M[1] * P[0] + M[5] * P[1] + M[9] *  P[2] + M[13];
      Q[2] = M[2] * P[0] + M[6] * P[1] + M[10] * P[2] + M[14];
      Q[3] = M[3] * P[0] + M[7] * P[1] + M[11] * P[2] + M[15];
   }
   else if (sz == 2)
   {
      Q[0] = M[0] * P[0] + M[4] * P[1] +                M[12];
      Q[1] = M[1] * P[0] + M[5] * P[1] +                M[13];
      Q[2] = M[2] * P[0] + M[6] * P[1] +                M[14];
      Q[3] = M[3] * P[0] + M[7] * P[1] +                M[15];
   }
   else if (sz == 1)
   {
      Q[0] = M[0] * P[0] +                              M[12];
      Q[1] = M[1] * P[0] +                              M[13];
      Q[2] = M[2] * P[0] +                              M[14];
      Q[3] = M[3] * P[0] +                              M[15];
   }
}
@


3.24
log
@more Win32 build cleanups
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.23 1999/07/12 12:05:25 keithw Exp $ */
d203 2
a204 3
   /* deprecated code? -taj */
   /* init_copy_raw(); */
   /* init_copy_masked(); */
@


3.23
log
@merge from experimental branch upto merge-1 tag
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.22 1999/06/09 16:30:33 holger Exp $ */
d6 1
a6 1
 * 
d8 1
a8 1
 * 
d15 1
a15 1
 * 
d18 1
a18 1
 * 
d73 1
a73 1
 *    - Object-to-eye transform prior to culling, although this too 
d89 3
a91 3
#include "norm_tmp.h" 
#include "dotprod_tmp.h" 
#include "copy_tmp.h" 
d117 1
a117 1
#include "copy_tmp.h" 
d138 1
a138 1
#include "norm_tmp.h" 
d162 1
a162 1
GLvector4f *gl_project_points( GLvector4f *proj_vec, 
d171 1
a171 1
   for (i = 0 ; i < count ; i++, STRIDE_F(from, stride)) 
d173 1
a173 1
	 GLfloat oow = 1.0 / from[3]; 
d203 3
a205 2
   init_copy_raw();
   init_copy_masked();
d249 1
a249 1
   if (Q == P) 
d252 1
a252 1
   if (sz == 4) 
d258 1
a258 1
   } 
d265 1
a265 1
   } 
a280 2


@


3.22
log
@x86/3dnow related changes, taken from the experimental-1 branch
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.21 1999/04/10 03:57:36 joshv Exp $ */
a61 1
 
@


3.21
log
@Added Holger's xform debug/profile changes
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.20 1999/04/07 00:41:14 brianp Exp $ */
a44 1
#include "X86/x86.h"
d55 2
a56 2
#ifdef USE_3DNOW_ASM
#include "3Dnow/3dnow.h"
d59 2
a60 2
#ifdef DEBUG
#include "debug_xform.h"
d211 1
d215 1
a215 5
   gl_init_x86_asm_transforms();
#endif

#ifdef USE_3DNOW_ASM
   init_3dnow_transformation();
a216 1

@


3.21.2.1
log
@Quake3 inspired optimizations
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.21 1999/04/10 03:57:36 joshv Exp $ */
a218 1
   printf("hello\n");
@


3.21.2.2
log
@changed x86 asm initialisation
@
text
@d45 1
a55 4
#include "X86/common_x86asm.h"
/*
#include "X86/x86.h"

d57 1
a57 1
#include "X86/3dnow.h"
d59 1
a59 1
*/
d63 1
a213 1
/*
d219 2
a220 1
   gl_init_3dnow_asm_transforms();
a221 1
*/
a222 1
   gl_init_all_x86_asm ();
@


3.21.2.3
log
@cleanup
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.21.2.2 1999/06/02 19:48:27 holger Exp $ */
d56 2
d59 4
d215 10
@


3.21.2.4
log
@added 'gl_test_all_normal_transform_functions (default);' + #ifdef USE_X86_ASM
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.21.2.3 1999/06/03 01:44:16 holger Exp $ */
a54 1
#ifdef USE_X86_ASM
a55 1
#endif
a207 1
   gl_test_all_normal_transform_functions ("default");
@


3.21.2.5
log
@some trial assembly, made newer code active by default
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.21.2.4 1999/06/05 22:04:40 holger Exp $ */
a212 1
#ifdef USE_X86_ASM
a213 1
#endif
@


3.20
log
@Holger's April 6, 1999 updates
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.18 1999/03/31 20:18:41 keithw Exp $ */
d211 1
a211 2
   printf ("checking _c_ funtions... \n");
   test_all_transform_functions ();
@


3.19
log
@added gl_ prefix to init_3dnow_transformation()
@
text
@d60 4
d210 3
a212 2
#ifdef USE_3DNOW_ASM
   gl_init_3dnow_transformation();
d218 5
@


3.18
log
@Compiled vertex arrays
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.17 1999/03/31 04:39:59 joshv Exp $ */
d207 1
a207 1
   init_3dnow_transformation();
@


3.17
log
@added new x86 code
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.16 1999/03/31 02:07:52 brianp Exp $ */
d64 1
a64 1
normal_func gl_normal_tab[0x1f][0x2];
d126 34
a278 273
void gl_reset_vector4f( GLvector4f *vec, GLuint sz, GLfloat *val )
{
   static GLfloat c[4] = { 0, 0, 0, 1 };
   GLuint i;

   if (val) {
      COPY_4V(vec->data[0], val);

      for (i = sz ; i < vec->size ; i++) {
	 if (val[i] != c[i]) sz = i+1;
      }
   } else {
      vec->start = (GLfloat *)vec->data;
   }

   vec->count = 0;
   vec->size = sz;
}


void gl_reset_vector3f( GLvector3f *vec, GLfloat *val )
{
   if (val) {
      COPY_3V(vec->data[0], val);
      vec->start = (GLfloat *)vec->data[1];
   } else {
      vec->start = (GLfloat *)vec->data;
   }

   vec->count = 0;
}



static const GLubyte elem_bits[4] = {
   VEC_DIRTY_0,
   VEC_DIRTY_1, 
   VEC_DIRTY_2, 
   VEC_DIRTY_3
};

void gl_clean_elem( GLuint elt, GLvector4f *vec )
{
   static GLfloat clean[4] = { 0, 0, 0, 1 };
   GLfloat v = clean[elt];
   GLuint i; 

   for (i = 0 ; i < VB_MAX ; i++)
      vec->data[i][elt] = v;

   vec->flags &= ~elem_bits[elt];
}

static const GLubyte size_bits[5] = {
   0,
   VEC_SIZE_1,
   VEC_SIZE_2,
   VEC_SIZE_3,
   VEC_SIZE_4,
};

void gl_init_vector4f( GLvector4f *v, GLuint sz, GLuint flags,
		       GLfloat (*storage)[4] )
{
   v->stride = 4*sizeof(GLfloat);
   v->size = 2;
   if (storage) {
      v->data = storage;
      v->start = (GLfloat *)storage;
   } else {
      v->start = (GLfloat *)malloc(VB_SIZE * 4 * sizeof(GLfloat));
      v->data = (GLfloat (*)[4])v->start;
      flags |= VEC_MALLOC;
   }
   v->count = 0;
   v->flags = size_bits[4] | flags;
}



void gl_init_vector4f_sz( GLvector4f *v, GLuint sz, GLuint count )
{
   v->stride = sz*sizeof(GLfloat);
   v->size = 2;
   v->start = (GLfloat *)malloc(count * sz * sizeof(GLfloat));
   v->data = (GLfloat (*)[4])v->start;
   v->count = 0;
   v->flags = size_bits[sz] | VEC_MALLOC;
}



void gl_init_vector3f( GLvector3f *v, GLuint flags, GLfloat (*storage)[3] )
{
   v->stride = 3*sizeof(GLfloat);
   if (storage) {
      v->data = storage;
      v->start = (GLfloat *)storage;
   } else {
      v->start = (GLfloat *)malloc(VB_SIZE * 3 * sizeof(GLfloat));
      v->data = (GLfloat (*)[3])v->start;
      flags |= VEC_MALLOC;
   }
   v->count = 0;
   v->flags = flags;
}

void gl_init_vector4ub( GLvector4ub *v, GLuint flags, GLubyte (*storage)[4] )
{
   v->stride = 4*sizeof(GLubyte);
   if (storage) {
      v->data = storage;
      v->start = (GLubyte *)storage;
   } else {
      v->start = (GLubyte *)malloc(VB_SIZE * 4 * sizeof(GLubyte));
      v->data = (GLubyte (*)[4])v->start;
      flags |= VEC_MALLOC;
   }
   v->count = 0;
   v->flags = flags;
}

void gl_init_vector1ub( GLvector1ub *v, GLuint flags, GLubyte *storage )
{
   v->stride = 1*sizeof(GLubyte);
   if (storage) {
      v->data = storage;
      v->start = (GLubyte *)storage;
   } else {
      v->start = (GLubyte *)malloc(VB_SIZE * sizeof(GLubyte));
      v->data = v->start;
      flags |= VEC_MALLOC;
   }
   v->count = 0;
   v->flags = flags;
}

void gl_init_vector1ui( GLvector1ui *v, GLuint flags, GLuint *storage )
{
   v->stride = 1*sizeof(GLuint);
   if (storage) {
      v->data = storage;
      v->start = (GLuint *)storage;
   } else {
      v->start = (GLuint *)malloc(VB_SIZE * sizeof(GLuint));
      v->data = v->start;
      flags |= VEC_MALLOC;
   }
   v->count = 0;
   v->flags = flags;
}


void gl_free_vector4f( GLvector4f *v )
{
   if (v->flags & VEC_MALLOC) {
      free( v->data );
      v->data = 0;
      v->data = 0;
      v->flags &= ~VEC_MALLOC;
   }
}

void gl_free_vector3f( GLvector3f *v )
{
   if (v->flags & VEC_MALLOC) {
      free( v->data );
      v->data = 0;
      v->start = 0;
      v->flags &= ~VEC_MALLOC;
   }
}

void gl_free_vector4ub( GLvector4ub *v )
{
   if (v->flags & VEC_MALLOC) {
      free( v->data );
      v->data = 0;
      v->start = 0;
      v->flags &= ~VEC_MALLOC;
   }
}

void gl_free_vector1ub( GLvector1ub *v )
{
   if (v->flags & VEC_MALLOC) {
      free( v->data );
      v->data = 0;
      v->start = 0;
      v->flags &= ~VEC_MALLOC;
   }
}

void gl_free_vector1ui( GLvector1ui *v )
{
   if (v->flags & VEC_MALLOC) {
      free( v->data );
      v->data = 0;
      v->start = 0;
      v->flags &= ~VEC_MALLOC;
   }
}

void gl_print_vector4f( GLvector4f *v, GLubyte *cullmask, GLboolean culling )
{
   GLfloat c[4] = { 0, 0, 0, 1 };
   const char *templates[5] = {
      "%d:\t0, 0, 0, 1\n",
      "%d:\t%f, 0, 0, 1\n",
      "%d:\t%f, %f, 0, 1\n",
      "%d:\t%f, %f, %f, 1\n",
      "%d:\t%f, %f, %f, %f\n"
   };

   const char *t = templates[v->size];
   GLfloat *d = (GLfloat *)v->data;
   GLuint j, i = 0, count;

   printf("data-start\n");
   for ( ; d != v->start ; STRIDE_F(d, v->stride), i++) 
      printf( t, i, d[0], d[1], d[2], d[3]);
   
   printf("start-count(%d)\n", v->count);
   count = i + v->count;

   if (culling) {
      for ( ; i < count ; STRIDE_F(d, v->stride), i++) 
	 if (cullmask[i]) 
	    printf( t, i, d[0], d[1], d[2], d[3]);      
   } else {
      for ( ; i < count ; STRIDE_F(d, v->stride), i++) 
	 printf( t, i, d[0], d[1], d[2], d[3]);      
   }

   for (j = v->size ; j < 4; j++) {
      if ((v->flags & (1<<j)) == 0) {

	 printf("checking col %d is clean as advertised ", j);

	 for (i = 0, d = (GLfloat *) v->data ; 
	      i < count && d[j] == c[j] ; 
	      i++, STRIDE_F(d, v->stride));

	 if (i == count) 
	    printf(" --> ok\n");
	 else 
	    printf(" --> Failed at %d ******\n", i);
      }
   }
}


void gl_print_vector3f( GLvector3f *v, GLubyte *cullmask, GLboolean culling )
{
   GLfloat *d = (GLfloat *)v->data;
   GLuint i = 0, count;

   printf("data-start\n");
   for ( ; d != v->start ; STRIDE_F(d,v->stride), i++) 
      printf( "%d:\t%f, %f, %f\n", i, d[0], d[1], d[2]);
   
   printf("start-count(%d)\n", v->count);
   count = i + v->count;

   if (culling) {
      for ( ; i < count ; STRIDE_F(d,v->stride), i++) 
	 if (cullmask[i])
	    printf( "%d:\t%f, %f, %f\n", i, d[0], d[1], d[2]);      
   } else {
      for ( ; i < count ; STRIDE_F(d,v->stride), i++) 
	 printf( "%d:\t%f, %f, %f\n", i, d[0], d[1], d[2]);      
   }
}
@


3.16
log
@hooked in 3DNow! code
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.15 1999/03/17 12:08:23 keithw Exp $ */
d45 1
a45 1
#include "asm_386.h"
@


3.15
log
@Removed CLIP_4D_BIT, added CLIP_CULLED_BIT.  Clipmask is now used
to drive culling in vertex transformation, allowing us to skip
both clipped and culled vertices with a single test.
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.14 1999/02/25 14:12:32 keithw Exp $ */
a27 3



a35 8
 *
 * The volatile keyword is needed in a bunch of places to prevent
 * numerical problems on x86 CPUs.
 *
 * KW: Have split clip from transform to try and keep the number of
 * functions under control.  The volatile keyword is not required any 
 * longer.
 *
d56 4
d171 4
@


3.14
log
@Merged in kw3 patch
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.13 1999/02/24 22:48:08 jens Exp $ */
d89 2
a90 1
#define CHECK
d100 2
a101 1
#undef CHECK
d116 2
a117 1
#define CHECK if (mask[i])
d126 2
a127 1
#undef CHECK
a156 27


void gl_transform_points_raw( GLvector4f *to,
				const GLmatrix *mat,
				const GLvector4f *from)
{
   (gl_transform_tab[0][from->size][mat->type])(to, mat, from, 0);
}


void gl_transform_points_masked( GLvector4f *to,
				   const GLmatrix *mat,
				   const GLvector4f *from, 
				   const GLubyte mask[])
{
   (gl_transform_tab[1][from->size][mat->type])(to, mat, from, mask);
}


void gl_transform_points( GLvector4f *to, 
			    const GLmatrix *mat,
			    const GLvector4f *from,
			    const GLubyte mask[],
			    GLuint masked)
{
   (gl_transform_tab[masked][from->size][mat->type])(to, mat, from, mask);
}
@


3.13
log
@Added header file to get XMesa to compile standalone and inside XFree86
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.12 1999/02/14 03:46:34 brianp Exp $ */
d28 1
a28 42
/*
 * $Log: xform.c,v $
 * Revision 3.12  1999/02/14 03:46:34  brianp
 * new copyright
 *
 * Revision 3.11  1998/11/08 22:37:53  brianp
 * fixed a typo
 *
 * Revision 3.10  1998/11/07 14:21:55  brianp
 * replaced static array initialization with gl_init_transformation()
 *
 * Revision 3.9  1998/11/07 13:40:07  brianp
 * added a bunch of const and volatile qualifiers
 *
 * Revision 3.8  1998/11/01 20:20:29  brianp
 * updated with Josh's new x86 code
 *
 * Revision 3.7  1998/10/29 04:10:21  brianp
 * removed dead code, added placeholders for 3D-Now! code
 *
 * Revision 3.6  1998/10/29 03:57:11  brianp
 * misc clean-up of new vertex transformation code
 *
 * Revision 3.5  1998/10/29 02:28:13  brianp
 * incorporated Keith Whitwell's transformation optimizations
 *
 * Revision 3.4  1998/09/25 03:12:27  brianp
 * simplified gl_xform_normals_3fv() per Keith Whitwell
 *
 * Revision 3.3  1998/08/20 04:15:48  brianp
 * added prototype 3.1 transformation functions
 *
 * Revision 3.2  1998/04/18 05:00:56  brianp
 * renamed USE_ASM to USE_X86_ASM
 *
 * Revision 3.1  1998/02/01 16:37:19  brianp
 * added GL_EXT_rescale_normal extension
 *
 * Revision 3.0  1998/01/31 21:08:31  brianp
 * initial rev
 *
 */
d42 5
d54 2
d68 140
d221 1
a221 1
#define M(row,col)  m[col*4+row]
a228 1
  
d230 3
a232 8
#ifndef USE_X86_ASM  
/*
 * Apply a transformation matrix to an array of normal vectors:
 *   for i in 0 to n-1 do  v[i] = u[i] * m
 * where u[i] and v[i] are 3-element row vectors and m is a 16-element
 * transformation matrix.
 * If the normalize flag is true the normals will be scaled to length 1.
 * If the rescale flag is true then do normal rescaling.  
d234 26
a259 48
void gl_transform_normals_3fv( GLuint n, 
			       const GLfloat *in,
			       GLuint in_stride,
			       const GLfloat m[16],
			       GLfloat out[][3], 
			       GLboolean normalize,
			       GLboolean rescale)
{
   if (normalize) {
      /* Transform and normalize.
       * KW: rescale is a noop under these conditions...  
       */
      GLuint i;
      GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8];
      GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9];
      GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10];
      for (i = 0 ; i < n ; i++, in += in_stride) {
         GLdouble tx, ty, tz;
         {
            GLfloat ux = in[0],  uy = in[1],  uz = in[2];
            tx = ux * m0 + uy * m1 + uz * m2;
            ty = ux * m4 + uy * m5 + uz * m6;
            tz = ux * m8 + uy * m9 + uz * m10;
         }
         {
            GLdouble len, scale;
            len = GL_SQRT( tx*tx + ty*ty + tz*tz );
            scale = (len>1E-30) ? (1.0 / len) : 1.0;
            out[i][0] = tx * scale;
            out[i][1] = ty * scale;
            out[i][2] = tz * scale;
         }
      }
   }
   else if (rescale) {
      /* Transform and rescale */
      GLuint i;
      GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8];
      GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9];
      GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10];
      GLfloat f = GL_SQRT( m2*m2 + m6*m6 + m10*m10 ); /* precompute */
      f = (f == 0.0F) ? 1.0F : (1.0F / f);
      for (i = 0 ; i < n ; i++, in += in_stride) {
	 GLfloat ux = in[0],  uy = in[1],  uz = in[2];
         out[i][0] = f * (ux * m0 + uy * m1 + uz * m2);
         out[i][1] = f * (ux * m4 + uy * m5 + uz * m6);
         out[i][2] = f * (ux * m8 + uy * m9 + uz * m10);
      }
d261 6
a266 12
   else {
      /* Just transform */
      GLuint i;
      GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8];
      GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9];
      GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10];
      for (i = 0 ; i < n ; i++, in += in_stride) {
	 GLfloat ux = in[0],  uy = in[1],  uz = in[2];
         out[i][0] = ux * m0 + uy * m1 + uz * m2;
         out[i][1] = ux * m4 + uy * m5 + uz * m6;
         out[i][2] = ux * m8 + uy * m9 + uz * m10;
      }
a268 2
#endif

d271 1
a271 42

/*----------------------------------------------------------------------
 * Begin Keith's new code 
 *
 * NOTE:  the units for stride values is a GLfloat.  That is, if
 * stride = 3 then the start of each floating point coordinate is 12 bytes
 * after its predecessor.  Have to be clear about this because OpenGL's
 * vertex array stride values are in bytes!
 *
 *----------------------------------------------------------------------
 */


void gl_normalize_3fv( GLuint n, 
		       const GLfloat *in,
		       GLuint in_stride,
		       GLfloat out[][3] )
{
   GLuint i;
   for ( i = 0 ; i < n ; i++, in += in_stride ) {
      const GLfloat x = in[0], y = in[1], z = in[2];
      GLdouble len = x * x + y * y + z * z;
      if (len > 1e-50) {
         len = 1.0 / GL_SQRT(len);
         out[i][0] = x * len;
         out[i][1] = y * len;
         out[i][2] = z * len;
      }
      else {
         out[i][0] = x;
         out[i][1] = y;
         out[i][2] = z;
      }
   }
}
  
  
void gl_scale_3fv( GLuint n, 
		   const GLfloat *in,
		   GLuint in_stride,
		   GLfloat out[][3],
		   GLfloat scale)
d273 1
a274 4
   for ( i = 0 ; i < n ; i++, in += in_stride ) {
      SCALE_SCALAR_3V( out[i], scale, in );
   }
}
d276 2
d279 5
a283 20
static void transform_points2_general( const GLmatrix *mat, 
				       GLuint n,
				       const GLfloat *from, 
				       GLfloat to[][4],
				       GLuint stride)
{
   const GLfloat *m = mat->m;
   const GLfloat m0 = m[0],  m4 = m[4],  m12 = m[12];
   const GLfloat m1 = m[1],  m5 = m[5],  m13 = m[13];
   const GLfloat m2 = m[2],  m6 = m[6],  m14 = m[14];
   const GLfloat m3 = m[3],  m7 = m[7],  m15 = m[15];
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_GENERAL);
   for (i=0;i<n;i++, from+=stride) {
      const GLfloat ox = from[0], oy = from[1];
      to[i][0] = m0 * ox + m4 * oy + m12;
      to[i][1] = m1 * ox + m5 * oy + m13;
      to[i][2] = m2 * ox + m6 * oy + m14;
      to[i][3] = m3 * ox + m7 * oy + m15;
a284 2
   END_FAST_MATH;
}
d286 2
a287 16
static void transform_points2_identity( const GLmatrix *mat, 
					GLuint n,
					const GLfloat *from, 
					GLfloat to[][4],
					GLuint stride )
{
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_IDENTITY);
   for (i=0;i<n;i++, from+=stride) {
      to[i][0] = from[0];
      to[i][1] = from[1];
      to[i][2] = 0.0F;
      to[i][3] = 1.0F;
   }
   END_FAST_MATH;
a289 21
static void transform_points2_2d( const GLmatrix *mat, 
				  GLuint n,
				  const GLfloat *from, 
				  GLfloat to[][4],
				  GLuint stride )
{
   const GLfloat *m = mat->m;
   const GLfloat m0 = m[0], m1 = m[1], m4 = m[4], m5 = m[5];
   const GLfloat m12 = m[12], m13 = m[13];
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_2D);
   for (i=0;i<n;i++, from+=stride) {
      const GLfloat ox = from[0], oy = from[1];
      to[i][0] = m0 * ox + m4 * oy + m12;
      to[i][1] = m1 * ox + m5 * oy + m13;
      to[i][2] = 0.0F;
      to[i][3] = 1.0F;
   }
   END_FAST_MATH;
}
d291 1
a291 5
static void transform_points2_2d_no_rot( const GLmatrix *mat, 
					 GLuint n,
					 const GLfloat *from, 
					 GLfloat to[][4],
					 GLuint stride )
d293 5
a297 11
   const GLfloat *m = mat->m;
   const GLfloat m0 = m[0], m5 = m[5], m12 = m[12], m13 = m[13];
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_2D_NO_ROT);
   for (i=0;i<n;i++, from+=stride) {
      const GLfloat ox = from[0], oy = from[1];
      to[i][0] = m0 * ox           + m12;
      to[i][1] =           m5 * oy + m13;
      to[i][2] = 0.0F;
      to[i][3] = 1.0F;
a298 2
   END_FAST_MATH;
}
d300 1
a300 20
static void transform_points2_3d( const GLmatrix *mat, 
				  GLuint n,
				  const GLfloat *from, 
				  GLfloat to[][4],
				  GLuint stride )
{
   const GLfloat *m = mat->m;
   const GLfloat m0 = m[0], m1 = m[1], m2 = m[2], m4 = m[4], m5 = m[5];
   const GLfloat m6 = m[6], m12 = m[12], m13 = m[13], m14 = m[14];
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_3D);
   for (i=0;i<n;i++, from+=stride) {
      const GLfloat ox = from[0], oy = from[1];
      to[i][0] = m0 * ox + m4 * oy + m12;
      to[i][1] = m1 * ox + m5 * oy + m13;
      to[i][2] = m2 * ox + m6 * oy + m14;
      to[i][3] = 1.0F;
   }
   END_FAST_MATH;
a302 6
static void transform_points3_general( const GLmatrix *mat, 
				       GLuint n,
				       const GLfloat *from, 
				       GLfloat to[][4],
				       GLuint stride )
{
a303 15
   const GLfloat *m = mat->m;
   const GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8],  m12 = m[12];
   const GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9],  m13 = m[13];
   const GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10],  m14 = m[14];
   const GLfloat m3 = m[3],  m7 = m[7],  m11 = m[11],  m15 = m[15];
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_GENERAL);
   for (i=0;i<n;i++, from+=stride) {
      const GLfloat ox = from[0], oy = from[1], oz = from[2];
      to[i][0] = m0 * ox + m4 * oy + m8  * oz + m12;
      to[i][1] = m1 * ox + m5 * oy + m9  * oz + m13;
      to[i][2] = m2 * ox + m6 * oy + m10 * oz + m14;
      to[i][3] = m3 * ox + m7 * oy + m11 * oz + m15;
   }
d305 6
a310 2
   END_FAST_MATH;
}
d312 1
a312 5
static void transform_points3_identity( const GLmatrix *mat, 
					GLuint n,
					const GLfloat *from, 
					GLfloat to[][4],
					GLuint stride )
d314 8
a321 10
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_IDENTITY);
   for (i=0;i<n;i++, from+=stride) {
      to[i][0] = from[0];
      to[i][1] = from[1];
      to[i][2] = from[2];
      to[i][3] = 1.0F;
   }
   END_FAST_MATH;
d324 7
a330 21
static void transform_points3_2d( const GLmatrix *mat, 
				  GLuint n,
				  const GLfloat *from, 
				  GLfloat to[][4],
				  GLuint stride )
{
   const GLfloat *m = mat->m;
   const GLfloat m0 = m[0], m1 = m[1], m4 = m[4], m5 = m[5];
   const GLfloat m12 = m[12], m13 = m[13];
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_2D);
   for (i=0;i<n;i++, from+=stride) {
      const GLfloat ox = from[0], oy = from[1], oz = from[2];
      to[i][0] = m0 * ox + m4 * oy            + m12       ;
      to[i][1] = m1 * ox + m5 * oy            + m13       ;
      to[i][2] =                   +       oz             ;
      to[i][3] =                                      1.0F;
   }
   END_FAST_MATH;
}
d332 2
a333 5
static void transform_points3_2d_no_rot( const GLmatrix *mat, 
					 GLuint n,
					 const GLfloat *from, 
					 GLfloat to[][4],
					 GLuint stride )
d335 9
a343 11
   const GLfloat *m = mat->m;
   const GLfloat m0 = m[0], m5 = m[5], m12 = m[12], m13 = m[13];
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_2D_NO_ROT);
   for (i=0;i<n;i++, from+=stride) {
      const GLfloat ox = from[0], oy = from[1], oz = from[2];
      to[i][0] = m0 * ox                      + m12       ;
      to[i][1] =           m5 * oy            + m13       ;
      to[i][2] =                   +       oz             ;
      to[i][3] =                                      1.0F;
d345 2
a346 1
   END_FAST_MATH;
a348 22
static void transform_points3_3d( const GLmatrix *mat, 
				  GLuint n,
				  const GLfloat *from, 
				  GLfloat to[][4],
				  GLuint stride )
{
   const GLfloat *m = mat->m;
   const GLfloat m0 = m[0], m1 = m[1], m2 = m[2], m4 = m[4], m5 = m[5];
   const GLfloat m6 = m[6], m8 = m[8], m9 = m[9], m10 = m[10];
   const GLfloat m12 = m[12], m13 = m[13], m14 = m[14];
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_3D);
   for (i=0;i<n;i++, from+=stride) {
      const GLfloat ox = from[0], oy = from[1], oz = from[2];
      to[i][0] = m0 * ox + m4 * oy +  m8 * oz + m12       ;
      to[i][1] = m1 * ox + m5 * oy +  m9 * oz + m13       ;
      to[i][2] = m2 * ox + m6 * oy + m10 * oz + m14       ;
      to[i][3] =                                      1.0F;
   }
   END_FAST_MATH;
}
a349 23
/* previously known as ortho...
 */
static void transform_points3_3d_no_rot( const GLmatrix *mat, 
					 GLuint n,
					 const GLfloat *from, 
					 GLfloat to[][4],
					 GLuint stride )
{
   const GLfloat *m = mat->m;
   const GLfloat m0 = m[0], m5 = m[5];
   const GLfloat m10 = m[10], m12 = m[12], m13 = m[13], m14 = m[14];
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_3D_NO_ROT);
   for (i=0;i<n;i++, from+=stride) {
      const GLfloat ox = from[0], oy = from[1], oz = from[2];
      to[i][0] = m0 * ox                      + m12       ;
      to[i][1] =           m5 * oy            + m13       ;
      to[i][2] =                     m10 * oz + m14       ;
      to[i][3] =                                      1.0F;
   }
   END_FAST_MATH;
}
d351 8
a358 20
static void transform_points3_perspective( const GLmatrix *mat, 
					   GLuint n,
					   const GLfloat *from, 
					   GLfloat to[][4],
					   GLuint stride )
{
   const GLfloat *m = mat->m;
   const GLfloat m0 = m[0], m5 = m[5], m8 = m[8], m9 = m[9];
   const GLfloat m10 = m[10], m14 = m[14];
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_PERSPECTIVE);
   for (i=0;i<n;i++, from+=stride) {
      const GLfloat ox = from[0], oy = from[1], oz = from[2];
      to[i][0] = m0 * ox           + m8  * oz       ;
      to[i][1] =           m5 * oy + m9  * oz       ;
      to[i][2] =                     m10 * oz + m14 ;
      to[i][3] =                          -oz       ;
   }
   END_FAST_MATH;
d363 1
a363 5
static void transform_points4_general( const GLmatrix *mat, 
				       GLuint n,
				       const GLfloat *from, 
				       GLfloat to[][4],
				       GLuint stride )
d365 8
a372 14
   const GLfloat *m = mat->m;
   const GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8],  m12 = m[12];
   const GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9],  m13 = m[13];
   const GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10],  m14 = m[14];
   const GLfloat m3 = m[3],  m7 = m[7],  m11 = m[11],  m15 = m[15];
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_GENERAL);
   for (i=0;i<n;i++, from+=stride) {
      const GLfloat ox = from[0], oy = from[1], oz = from[2], ow = from[3];
      to[i][0] = m0 * ox + m4 * oy + m8  * oz + m12 * ow;
      to[i][1] = m1 * ox + m5 * oy + m9  * oz + m13 * ow;
      to[i][2] = m2 * ox + m6 * oy + m10 * oz + m14 * ow;
      to[i][3] = m3 * ox + m7 * oy + m11 * oz + m15 * ow;
d374 2
a375 1
   END_FAST_MATH;
d378 1
a378 5
static void transform_points4_identity( const GLmatrix *mat, 
					GLuint n,
					const GLfloat *from, 
					GLfloat to[][4],
					GLuint stride )
d380 8
a387 8
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_IDENTITY);
   for (i=0;i<n;i++, from+=stride) {
      to[i][0] = from[0];
      to[i][1] = from[1];
      to[i][2] = from[2];
      to[i][3] = from[3];
d389 2
a390 1
   END_FAST_MATH;
d393 10
a402 18
static void transform_points4_2d( const GLmatrix *mat, 
				  GLuint n,
				  const GLfloat *from, 
				  GLfloat to[][4],
				  GLuint stride )
{
   const GLfloat *m = mat->m;
   const GLfloat m0 = m[0], m1 = m[1], m4 = m[4], m5 = m[5];
   const GLfloat m12 = m[12], m13 = m[13];
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_2D);
   for (i=0;i<n;i++, from+=stride) {
      const GLfloat ox = from[0], oy = from[1], oz = from[2], ow = from[3];
      to[i][0] = m0 * ox + m4 * oy            + m12 * ow;
      to[i][1] = m1 * ox + m5 * oy            + m13 * ow;
      to[i][2] =                   +       oz           ;
      to[i][3] =                                      ow;
d404 2
a405 1
   END_FAST_MATH;
d408 1
a408 5
static void transform_points4_2d_no_rot( const GLmatrix *mat, 
					 GLuint n,
					 const GLfloat *from, 
					 GLfloat to[][4],
					 GLuint stride )
d410 8
a417 11
   const GLfloat *m = mat->m;
   const GLfloat m0 = m[0], m5 = m[5], m12 = m[12], m13 = m[13];
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_2D_NO_ROT);
   for (i=0;i<n;i++, from+=stride) {
      const GLfloat ox = from[0], oy = from[1], oz = from[2], ow = from[3];
      to[i][0] = m0 * ox                      + m12 * ow;
      to[i][1] =           m5 * oy            + m13 * ow;
      to[i][2] =                   +       oz           ;
      to[i][3] =                                      ow;
d419 2
a420 1
   END_FAST_MATH;
a422 22
static void transform_points4_3d( const GLmatrix *mat, 
				  GLuint n,
				  const GLfloat *from, 
				  GLfloat to[][4],
				  GLuint stride )
{
   const GLfloat *m = mat->m;
   const GLfloat m0 = m[0], m1 = m[1], m2 = m[2], m4 = m[4], m5 = m[5];
   const GLfloat m6 = m[6], m8 = m[8], m9 = m[9], m10 = m[10];
   const GLfloat m12 = m[12], m13 = m[13], m14 = m[14];
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_3D);
   for (i=0;i<n;i++, from+=stride) {
      const GLfloat ox = from[0], oy = from[1], oz = from[2], ow = from[3];
      to[i][0] = m0 * ox + m4 * oy +  m8 * oz + m12 * ow;
      to[i][1] = m1 * ox + m5 * oy +  m9 * oz + m13 * ow;
      to[i][2] = m2 * ox + m6 * oy + m10 * oz + m14 * ow;
      to[i][3] =                                      ow;
   }
   END_FAST_MATH;
}
d424 7
a430 18
static void transform_points4_3d_no_rot( const GLmatrix *mat, 
					 GLuint n,
					 const GLfloat *from, 
					 GLfloat to[][4],
					 GLuint stride )
{
   const GLfloat *m = mat->m;
   const GLfloat m0 = m[0], m5 = m[5];
   const GLfloat m10 = m[10], m12 = m[12], m13 = m[13], m14 = m[14];
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_3D_NO_ROT);
   for (i=0;i<n;i++, from+=stride) {
      const GLfloat ox = from[0], oy = from[1], oz = from[2], ow = from[3];
      to[i][0] = m0 * ox                      + m12 * ow;
      to[i][1] =           m5 * oy            + m13 * ow;
      to[i][2] =                     m10 * oz + m14 * ow;
      to[i][3] =                                      ow;
a431 1
   END_FAST_MATH;
d434 7
a440 63
static void transform_points4_perspective( const GLmatrix *mat, 
					   GLuint n,
					   const GLfloat *from, 
					   GLfloat to[][4],
					   GLuint stride )
{
   const GLfloat *m = mat->m;
   const GLfloat m0 = m[0], m5 = m[5], m8 = m[8], m9 = m[9];
   const GLfloat m10 = m[10], m14 = m[14];
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_PERSPECTIVE);
   for (i=0;i<n;i++, from+=stride) {
      const GLfloat ox = from[0], oy = from[1], oz = from[2], ow = from[3];
      to[i][0] = m0 * ox           + m8  * oz            ;
      to[i][1] =           m5 * oy + m9  * oz            ;
      to[i][2] =                     m10 * oz + m14 * ow ;
      to[i][3] =                          -oz            ;
   }
   END_FAST_MATH;
}



static void transform_cliptest_points3_3d_no_rot( const GLmatrix *mat, 
                                                  GLuint n,
                                                  const GLfloat *from, 
                                                  GLfloat to[][4],
                                                  GLuint stride,
                                                  GLubyte clipMask[],
                                                  GLubyte *orMask, 
                                                  GLubyte *andMask )
{
   GLubyte tmpOrMask = *orMask;
   GLubyte tmpAndMask = *andMask;
   const GLfloat *m = mat->m;
   const GLfloat m0 = m[0],                          m12 = m[12];
   const GLfloat             m5 = m[5],              m13 = m[13];
   const GLfloat                         m10 = m[10],  m14 = m[14];
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_3D_NO_ROT);
   for (i=0;i<n;i++, from+=stride) {
      const GLfloat ox = from[0], oy = from[1], oz = from[2];
      volatile GLfloat cx = m0 * ox                      + m12 ;
      volatile GLfloat cy =           m5 * oy            + m13 ;
      volatile GLfloat cz =                     m10 * oz + m14 ;
      GLubyte mask = 0;
      to[i][0] = cx;
      to[i][1] = cy;
      to[i][2] = cz;
      to[i][3] = 1.0F;
      if (cx >  1.0F)       mask |= CLIP_RIGHT_BIT;
      else if (cx < -1.0F)  mask |= CLIP_LEFT_BIT;
      if (cy >  1.0F)       mask |= CLIP_TOP_BIT;
      else if (cy < -1.0F)  mask |= CLIP_BOTTOM_BIT;
      if (cz >  1.0F)       mask |= CLIP_FAR_BIT;
      else if (cz < -1.0F)  mask |= CLIP_NEAR_BIT;
      if (mask) {
	 clipMask[i] |= mask;
	 tmpOrMask |= mask;
      }
      tmpAndMask &= mask;
a441 3
   *orMask = tmpOrMask;
   *andMask = tmpAndMask;
   END_FAST_MATH;
d444 1
a444 241

static void transform_cliptest_points3_perspective( const GLmatrix *mat, 
                                                    GLuint n,
                                                    const GLfloat *from, 
                                                    GLfloat to[][4],
                                                    GLuint stride,
                                                    GLubyte clipMask[],
                                                    GLubyte *orMask, 
                                                    GLubyte *andMask )
{
   GLubyte tmpOrMask = *orMask;
   GLubyte tmpAndMask = *andMask;
   const GLfloat *m = mat->m;
   const GLfloat m0 = m[0],              m8 = m[8]              ;
   const GLfloat             m5 = m[5],  m9 = m[9]              ;
   const GLfloat                         m10 = m[10],  m14 = m[14];
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_PERSPECTIVE);
   for (i=0;i<n;i++, from+=stride) {
      const GLfloat ox = from[0], oy = from[1], oz = from[2];
      volatile GLfloat cx = m0 * ox           + m8  * oz       ;
      volatile GLfloat cy =           m5 * oy + m9  * oz       ;
      volatile GLfloat cz =                     m10 * oz + m14 ;
      volatile GLfloat cw =                          -oz       ;
      GLubyte mask = 0;
      to[i][0] = cx;
      to[i][1] = cy;
      to[i][2] = cz;
      to[i][3] = cw;
      if (cx >  cw)       mask |= CLIP_RIGHT_BIT;
      else if (cx < -cw)  mask |= CLIP_LEFT_BIT;
      if (cy >  cw)       mask |= CLIP_TOP_BIT;
      else if (cy < -cw)  mask |= CLIP_BOTTOM_BIT;
      if (cz >  cw)       mask |= CLIP_FAR_BIT;
      else if (cz < -cw)  mask |= CLIP_NEAR_BIT;
      if (mask) {
	 clipMask[i] |= mask;
	 tmpOrMask |= mask;
      }
      tmpAndMask &= mask;
   }
   *orMask = tmpOrMask;
   *andMask = tmpAndMask;
   END_FAST_MATH;
}

static void transform_cliptest_points3_general( const GLmatrix *mat, 
                                                GLuint n,
                                                const GLfloat *from, 
                                                GLfloat to[][4],
                                                GLuint stride,
                                                GLubyte clipMask[],
                                                GLubyte *orMask, 
                                                GLubyte *andMask )
{
   GLubyte tmpOrMask = *orMask;
   GLubyte tmpAndMask = *andMask;
   const GLfloat *m = mat->m;
   const GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8],  m12 = m[12];
   const GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9],  m13 = m[13];
   const GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10],  m14 = m[14];
   const GLfloat m3 = m[3],  m7 = m[7],  m11 = m[11],  m15 = m[15];
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_GENERAL);
   for (i=0;i<n;i++, from+=stride) {
      const GLfloat ox = from[0], oy = from[1], oz = from[2];
      volatile GLfloat cx = m0 * ox + m4 * oy + m8  * oz + m12;
      volatile GLfloat cy = m1 * ox + m5 * oy + m9  * oz + m13;
      volatile GLfloat cz = m2 * ox + m6 * oy + m10 * oz + m14;
      volatile GLfloat cw = m3 * ox + m7 * oy + m11 * oz + m15;
      GLubyte mask = 0;
      to[i][0] = cx;
      to[i][1] = cy;
      to[i][2] = cz;
      to[i][3] = cw;
      if (cx >  cw)       mask |= CLIP_RIGHT_BIT;
      else if (cx < -cw)  mask |= CLIP_LEFT_BIT;
      if (cy >  cw)       mask |= CLIP_TOP_BIT;
      else if (cy < -cw)  mask |= CLIP_BOTTOM_BIT;
      if (cz >  cw)       mask |= CLIP_FAR_BIT;
      else if (cz < -cw)  mask |= CLIP_NEAR_BIT;
      if (mask) {
	 clipMask[i] |= mask;
	 tmpOrMask |= mask;
      }
      tmpAndMask &= mask;
   }
   *orMask = tmpOrMask;
   *andMask = tmpAndMask;
   END_FAST_MATH;
}

static void transform_cliptest_points4_perspective( const GLmatrix *mat, 
                                                    GLuint n,
                                                    const GLfloat *from, 
                                                    GLfloat to[][4],
                                                    GLuint stride,
                                                    GLubyte clipMask[],
                                                    GLubyte *orMask, 
                                                    GLubyte *andMask )
{
   GLubyte tmpOrMask = *orMask;
   GLubyte tmpAndMask = *andMask;
   const GLfloat *m = mat->m;
   const GLfloat m0 = m[0],              m8 = m[8]              ;
   const GLfloat             m5 = m[5],  m9 = m[9]              ;
   const GLfloat                         m10 = m[10],  m14 = m[14];
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_PERSPECTIVE);
   for (i=0;i<n;i++, from+=stride) {
      const GLfloat ox = from[0], oy = from[1], oz = from[2], ow = from[3];
      volatile GLfloat cx = m0 * ox +           m8  * oz           ;
      volatile GLfloat cy =           m5 * oy + m9  * oz           ;
      volatile GLfloat cz =                     m10 * oz + m14 * ow;
      volatile GLfloat cw =                          -oz           ;
      GLubyte mask = 0;
      to[i][0] = cx;
      to[i][1] = cy;
      to[i][2] = cz;
      to[i][3] = cw;
      if (cx >  cw)       mask |= CLIP_RIGHT_BIT;
      else if (cx < -cw)  mask |= CLIP_LEFT_BIT;
      if (cy >  cw)       mask |= CLIP_TOP_BIT;
      else if (cy < -cw)  mask |= CLIP_BOTTOM_BIT;
      if (cz >  cw)       mask |= CLIP_FAR_BIT;
      else if (cz < -cw)  mask |= CLIP_NEAR_BIT;
      if (mask) {
	 clipMask[i] |= mask;
	 tmpOrMask |= mask;
      }
      tmpAndMask &= mask;
   }
   *orMask = tmpOrMask;
   *andMask = tmpAndMask;
   END_FAST_MATH;
}

static void transform_cliptest_points4_3d_no_rot( const GLmatrix *mat, 
                                                  GLuint n,
                                                  const GLfloat *from, 
                                                  GLfloat to[][4],
                                                  GLuint stride,
                                                  GLubyte clipMask[],
                                                  GLubyte *orMask, 
                                                  GLubyte *andMask )
{
   GLubyte tmpOrMask = *orMask;
   GLubyte tmpAndMask = *andMask;
   const GLfloat *m = mat->m;
   const GLfloat m0 = m[0],                           m12 = m[12];
   const GLfloat             m5 = m[5],               m13 = m[13];
   const GLfloat                        m10 = m[10],  m14 = m[14];
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_3D_NO_ROT);
   for (i=0;i<n;i++, from+=stride) {
      const GLfloat ox = from[0], oy = from[1], oz = from[2], ow = from[3];
      volatile GLfloat cx = m0 * ox +                      m12 * ow;
      volatile GLfloat cy =           m5 * oy +            m13 * ow;
      volatile GLfloat cz =                     m10 * oz + m14 * ow;
      volatile GLfloat cw =                                      ow;
      GLubyte mask = 0;
      to[i][0] = cx;
      to[i][1] = cy;
      to[i][2] = cz;
      to[i][3] = cw;
      if (cx >  cw)       mask |= CLIP_RIGHT_BIT;
      else if (cx < -cw)  mask |= CLIP_LEFT_BIT;
      if (cy >  cw)       mask |= CLIP_TOP_BIT;
      else if (cy < -cw)  mask |= CLIP_BOTTOM_BIT;
      if (cz >  cw)       mask |= CLIP_FAR_BIT;
      else if (cz < -cw)  mask |= CLIP_NEAR_BIT;
      if (mask) {
	 clipMask[i] |= mask;
	 tmpOrMask |= mask;
      }
      tmpAndMask &= mask;
   }
   *orMask = tmpOrMask;
   *andMask = tmpAndMask;
   END_FAST_MATH;
}


static void transform_cliptest_points4_general( const GLmatrix *mat, 
                                                GLuint n,
                                                const GLfloat *from, 
                                                GLfloat to[][4],
                                                GLuint stride,
                                                GLubyte clipMask[],
                                                GLubyte *orMask, 
                                                GLubyte *andMask )
{
   GLubyte tmpOrMask = *orMask;
   GLubyte tmpAndMask = *andMask;
   const GLfloat *m = mat->m;
   const GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8],  m12 = m[12];
   const GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9],  m13 = m[13];
   const GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10],  m14 = m[14];
   const GLfloat m3 = m[3],  m7 = m[7],  m11 = m[11],  m15 = m[15];
   GLuint i;
   START_FAST_MATH;
   ASSERT(mat->type == MATRIX_GENERAL);
   for (i=0;i<n;i++, from+=stride) {
      const GLfloat ox = from[0], oy = from[1], oz = from[2], ow = from[3];
      volatile GLfloat cx = m0 * ox + m4 * oy + m8  * oz + m12 * ow;
      volatile GLfloat cy = m1 * ox + m5 * oy + m9  * oz + m13 * ow;
      volatile GLfloat cz = m2 * ox + m6 * oy + m10 * oz + m14 * ow;
      volatile GLfloat cw = m3 * ox + m7 * oy + m11 * oz + m15 * ow;
      GLubyte mask = 0;
      to[i][0] = cx;
      to[i][1] = cy;
      to[i][2] = cz;
      to[i][3] = cw;
      if (cx >  cw)       mask |= CLIP_RIGHT_BIT;
      else if (cx < -cw)  mask |= CLIP_LEFT_BIT;
      if (cy >  cw)       mask |= CLIP_TOP_BIT;
      else if (cy < -cw)  mask |= CLIP_BOTTOM_BIT;
      if (cz >  cw)       mask |= CLIP_FAR_BIT;
      else if (cz < -cw)  mask |= CLIP_NEAR_BIT;
      if (mask) {
	 clipMask[i] |= mask;
	 tmpOrMask |= mask;
      }
      tmpAndMask &= mask;
   }
   *orMask = tmpOrMask;
   *andMask = tmpAndMask;
   END_FAST_MATH;
}



static void cliptest_points4( GLuint n, 
			      CONST GLfloat vClip[][4], 
			      GLubyte clipMask[],
			      GLubyte *orMask, 
			      GLubyte *andMask )
d446 5
a450 17
   GLubyte tmpOrMask = *orMask;
   GLubyte tmpAndMask = *andMask;
   GLuint i;
   START_FAST_MATH;
   for (i=0;i<n;i++) {
      const GLfloat cx = vClip[i][0], cy = vClip[i][1];
      const GLfloat cz = vClip[i][2], cw = vClip[i][3];
      GLubyte mask = 0;
      if (cx >  cw)       mask |= CLIP_RIGHT_BIT;
      else if (cx < -cw)  mask |= CLIP_LEFT_BIT;
      if (cy >  cw)       mask |= CLIP_TOP_BIT;
      else if (cy < -cw)  mask |= CLIP_BOTTOM_BIT;
      if (cz >  cw)       mask |= CLIP_FAR_BIT;
      else if (cz < -cw)  mask |= CLIP_NEAR_BIT;
      clipMask[i] |= mask;
      tmpOrMask |= mask;
      tmpAndMask &= mask;
a451 4

   *orMask = tmpOrMask;
   *andMask = tmpAndMask;
   END_FAST_MATH;
d454 1
a454 5
static void cliptest_points3( GLuint n, 
			      CONST GLfloat vClip[][4], 
			      GLubyte clipMask[],
			      GLubyte *orMask, 
			      GLubyte *andMask )
d456 5
a460 16
   GLubyte tmpOrMask = *orMask;
   GLubyte tmpAndMask = *andMask;
   GLuint i;
   START_FAST_MATH;
   for (i=0;i<n;i++) {
      const GLfloat cx = vClip[i][0], cy = vClip[i][1], cz = vClip[i][2];
      GLubyte mask = 0;
      if (cx >  1.0)       mask |= CLIP_RIGHT_BIT;
      else if (cx < -1.0)  mask |= CLIP_LEFT_BIT;
      if (cy >  1.0)       mask |= CLIP_TOP_BIT;
      else if (cy < -1.0)  mask |= CLIP_BOTTOM_BIT;
      if (cz >  1.0)       mask |= CLIP_FAR_BIT;
      else if (cz < -1.0)  mask |= CLIP_NEAR_BIT;
      clipMask[i] |= mask;
      tmpOrMask |= mask;
      tmpAndMask &= mask;
a461 4

   *orMask = tmpOrMask;
   *andMask = tmpAndMask;
   END_FAST_MATH;
d464 1
a464 5
static void cliptest_points2( GLuint n, 
			      CONST GLfloat vClip[][4], 
			      GLubyte clipMask[],
			      GLubyte *orMask, 
			      GLubyte *andMask )
d466 5
a470 14
   GLubyte tmpOrMask = *orMask;
   GLubyte tmpAndMask = *andMask;
   GLuint i;
   START_FAST_MATH;
   for (i=0;i<n;i++) {
      const GLfloat cx = vClip[i][0], cy = vClip[i][1];
      GLubyte mask = 0;
      if (cx >  1.0)       mask |= CLIP_RIGHT_BIT;
      else if (cx < -1.0)  mask |= CLIP_LEFT_BIT;
      if (cy >  1.0)       mask |= CLIP_TOP_BIT;
      else if (cy < -1.0)  mask |= CLIP_BOTTOM_BIT;
      clipMask[i] |= mask;
      tmpOrMask |= mask;
      tmpAndMask &= mask;
a471 4

   *orMask = tmpOrMask;
   *andMask = tmpAndMask;
   END_FAST_MATH;
d474 1
a474 54


typedef void (*transform_func)( const GLmatrix *mat, 
				GLuint n,
				const GLfloat *from, 
				GLfloat to[][4],
				GLuint stride );

typedef void (*clip_func)( GLuint n, 
			   CONST GLfloat vClip[][4], 
			   GLubyte clipMask[],
			   GLubyte *orMask, 
			   GLubyte *andMask );

typedef void (*project_and_clip_func)( const GLmatrix *mat, 
				       GLuint n,
				       const GLfloat *from, 
				       GLfloat to[][4],
				       GLuint stride,
				       GLubyte clipMask[],
				       GLubyte *orMask, 
				       GLubyte *andMask );


/*
 * Table of optimized vertex transformation functions.
 * The first array index is the vertex size (0D, 1D, 2D, 3D, 4D)
 * The second array index is the matrix type (one of MATRIX_* constants)
 */
static transform_func transform_tab[5][7];


/*
 * Table of optimized cliptest functions.
 * The array index is the vertex size (0D, 1D, 2D, 3D, 4D)
 */
static clip_func clip_tab[5];


/*
 * Table of project and clip functions.
 * The first array index is the vertex size (0D, 1D, 2D, 3D, 4D)
 * The second array index is the matrix type (one of MATRIX_* constants)
 */
static project_and_clip_func project_clip_tab[5][7];



/*
 * This is called only once.  It initializes several tables with pointers
 * to optimized transformation functions.  This is where we can test for
 * AMD 3Dnow! capability, Intel Katmai, etc. and hook in the right code.
 */
void gl_init_transformation( void )
d476 8
a483 51
   /*
    * Point transformation (modelview and sometimes projection)
    */

   /* 0-D points (never happens) */
   transform_tab[0][MATRIX_GENERAL]     = 0;
   transform_tab[0][MATRIX_IDENTITY]    = 0;
   transform_tab[0][MATRIX_3D_NO_ROT]   = 0;
   transform_tab[0][MATRIX_PERSPECTIVE] = 0;
   transform_tab[0][MATRIX_2D]          = 0;
   transform_tab[0][MATRIX_2D_NO_ROT]   = 0;
   transform_tab[0][MATRIX_3D]          = 0;

   /* 1-D points (never happens) */
   transform_tab[1][MATRIX_GENERAL]     = 0;
   transform_tab[1][MATRIX_IDENTITY]    = 0;
   transform_tab[1][MATRIX_3D_NO_ROT]   = 0;
   transform_tab[1][MATRIX_PERSPECTIVE] = 0;
   transform_tab[1][MATRIX_2D]          = 0;
   transform_tab[1][MATRIX_2D_NO_ROT]   = 0;
   transform_tab[1][MATRIX_3D]          = 0;

   /* 2-D points */
   transform_tab[2][MATRIX_GENERAL]     = transform_points2_general;
   transform_tab[2][MATRIX_IDENTITY]    = transform_points2_identity;
   transform_tab[2][MATRIX_3D_NO_ROT]   = transform_points2_3d;      /*yes!*/
   transform_tab[2][MATRIX_PERSPECTIVE] = transform_points2_general; /*yes!*/
   transform_tab[2][MATRIX_2D]          = transform_points2_2d;
   transform_tab[2][MATRIX_2D_NO_ROT]   = transform_points2_2d_no_rot;
   transform_tab[2][MATRIX_3D]          = transform_points2_3d;

   /* 3-D points */
#if defined(USE_X86_ASM)
   transform_tab[3][MATRIX_GENERAL]     = asm_transform_points3_general;
   transform_tab[3][MATRIX_IDENTITY]    = asm_transform_points3_identity;
   transform_tab[3][MATRIX_3D_NO_ROT]   = transform_points3_3d_no_rot; /*yes!*/
   transform_tab[3][MATRIX_PERSPECTIVE] = transform_points3_perspective;/*yes*/
   transform_tab[3][MATRIX_2D]          = asm_transform_points3_2d;
   transform_tab[3][MATRIX_2D_NO_ROT]   = asm_transform_points3_2d_no_rot;
   transform_tab[3][MATRIX_3D]          = asm_transform_points3_3d;
#elif defined(USE_3DNOW_ASM)
   /* AMD 3D-Now! functions hook in here */
#else
   transform_tab[3][MATRIX_GENERAL]     = transform_points3_general;
   transform_tab[3][MATRIX_IDENTITY]    = transform_points3_identity;
   transform_tab[3][MATRIX_3D_NO_ROT]   = transform_points3_3d_no_rot;
   transform_tab[3][MATRIX_PERSPECTIVE] = transform_points3_perspective;
   transform_tab[3][MATRIX_2D]          = transform_points3_2d;
   transform_tab[3][MATRIX_2D_NO_ROT]   = transform_points3_2d_no_rot;
   transform_tab[3][MATRIX_3D]          = transform_points3_3d;
#endif
d485 3
a487 20
   /* 4-D points */
#if defined(USE_X86_ASM)
   transform_tab[4][MATRIX_GENERAL]     = asm_transform_points4_general;
   transform_tab[4][MATRIX_IDENTITY]    = asm_transform_points4_identity;
   transform_tab[4][MATRIX_3D_NO_ROT]   = asm_transform_points4_3d_no_rot;
   transform_tab[4][MATRIX_PERSPECTIVE] = asm_transform_points4_perspective;
   transform_tab[4][MATRIX_2D]          = asm_transform_points4_2d;
   transform_tab[4][MATRIX_2D_NO_ROT]   = asm_transform_points4_2d_no_rot;
   transform_tab[4][MATRIX_3D]          = asm_transform_points4_3d;
#elif defined(USE_3DNOW_ASM)
   /* AMD 3D-Now! functions hook in here */
#else
   transform_tab[4][MATRIX_GENERAL]     = transform_points4_general;
   transform_tab[4][MATRIX_IDENTITY]    = transform_points4_identity;
   transform_tab[4][MATRIX_3D_NO_ROT]   = transform_points4_3d_no_rot;
   transform_tab[4][MATRIX_PERSPECTIVE] = transform_points4_perspective;
   transform_tab[4][MATRIX_2D]          = transform_points4_2d;
   transform_tab[4][MATRIX_2D_NO_ROT]   = transform_points4_2d_no_rot;
   transform_tab[4][MATRIX_3D]          = transform_points4_3d;
#endif
d489 6
d496 8
a503 16
   /*
    * Clip testing
    */
#ifdef USE_X86_ASM
   clip_tab[0] = 0;
   clip_tab[1] = 0;
   clip_tab[2] = asm_cliptest_points4,
   clip_tab[3] = asm_cliptest_points4,
   clip_tab[4] = asm_cliptest_points4,
#else
   clip_tab[0] = 0;
   clip_tab[1] = 0;
   clip_tab[2] = cliptest_points2;
   clip_tab[3] = cliptest_points3;
   clip_tab[4] = cliptest_points4;
#endif
d505 2
d508 1
a508 51
   /*
    * Projection and clip testing
    */

   /* 0-D points (never happens) */
   project_clip_tab[0][MATRIX_GENERAL]     = 0;
   project_clip_tab[0][MATRIX_IDENTITY]    = 0;
   project_clip_tab[0][MATRIX_3D_NO_ROT]   = 0;
   project_clip_tab[0][MATRIX_PERSPECTIVE] = 0;
   project_clip_tab[0][MATRIX_2D]          = 0;
   project_clip_tab[0][MATRIX_2D_NO_ROT]   = 0;
   project_clip_tab[0][MATRIX_3D]          = 0;

   /* 1-D points (never happens) */
   project_clip_tab[1][MATRIX_GENERAL]     = 0;
   project_clip_tab[1][MATRIX_IDENTITY]    = 0;
   project_clip_tab[1][MATRIX_3D_NO_ROT]   = 0;
   project_clip_tab[1][MATRIX_PERSPECTIVE] = 0;
   project_clip_tab[1][MATRIX_2D]          = 0;
   project_clip_tab[1][MATRIX_2D_NO_ROT]   = 0;
   project_clip_tab[1][MATRIX_3D]          = 0;

   /* 2-D points (never happens) */
   project_clip_tab[2][MATRIX_GENERAL]     = 0;
   project_clip_tab[2][MATRIX_IDENTITY]    = 0;
   project_clip_tab[2][MATRIX_3D_NO_ROT]   = 0;
   project_clip_tab[2][MATRIX_PERSPECTIVE] = 0;
   project_clip_tab[2][MATRIX_2D]          = 0;
   project_clip_tab[2][MATRIX_2D_NO_ROT]   = 0;
   project_clip_tab[2][MATRIX_3D]          = 0;

   /* 3-D points */
#if defined(USE_X86_ASM)
   project_clip_tab[3][MATRIX_GENERAL]     = 0;
   project_clip_tab[3][MATRIX_IDENTITY]    = 0;
   project_clip_tab[3][MATRIX_3D_NO_ROT]   = 0;
   project_clip_tab[3][MATRIX_PERSPECTIVE] = 0;
   project_clip_tab[3][MATRIX_2D]          = 0;
   project_clip_tab[3][MATRIX_2D_NO_ROT]   = 0;
   project_clip_tab[3][MATRIX_3D]          = 0;
#elif defined(USE_3DNOW_ASM)
   /* AMD 3D-Now! functions hook in here */
#else
   project_clip_tab[3][MATRIX_GENERAL]     = transform_cliptest_points3_general;
   project_clip_tab[3][MATRIX_IDENTITY]    = 0;
   project_clip_tab[3][MATRIX_3D_NO_ROT]   = transform_cliptest_points3_3d_no_rot;
   project_clip_tab[3][MATRIX_PERSPECTIVE] = transform_cliptest_points3_perspective;
   project_clip_tab[3][MATRIX_2D]          = 0;
   project_clip_tab[3][MATRIX_2D_NO_ROT]   = 0;
   project_clip_tab[3][MATRIX_3D]          = 0;
#endif
d510 3
a512 20
   /* 4-D points */
#if defined(USE_X86_ASM)
   project_clip_tab[4][MATRIX_GENERAL]     = 0;
   project_clip_tab[4][MATRIX_IDENTITY]    = 0;
   project_clip_tab[4][MATRIX_3D_NO_ROT]   = 0;
   project_clip_tab[4][MATRIX_PERSPECTIVE] = 0;
   project_clip_tab[4][MATRIX_2D]          = 0;
   project_clip_tab[4][MATRIX_2D_NO_ROT]   = 0;
   project_clip_tab[4][MATRIX_3D]          = 0;
#elif defined(USE_3DNOW_ASM)
   /* AMD 3D-Now! functions hook in here */
#else
   project_clip_tab[4][MATRIX_GENERAL]     = transform_cliptest_points4_general;
   project_clip_tab[4][MATRIX_IDENTITY]    = 0;
   project_clip_tab[4][MATRIX_3D_NO_ROT]   = transform_cliptest_points4_3d_no_rot;
   project_clip_tab[4][MATRIX_PERSPECTIVE] = transform_cliptest_points4_perspective;
   project_clip_tab[4][MATRIX_2D]          = 0;
   project_clip_tab[4][MATRIX_2D_NO_ROT]   = 0;
   project_clip_tab[4][MATRIX_3D]          = 0;
#endif
d514 6
d523 4
d528 6
a533 9
void gl_project_and_cliptest_points( const GLmatrix *mat,
				     GLuint n, 
				     const GLfloat *from,
				     GLfloat to[][4],
				     GLuint stride,
				     GLubyte clipMask[],
				     GLubyte *orMask, 
				     GLubyte *andMask,
				     GLuint vec_size)
d535 7
a541 4
{
   if (project_clip_tab[vec_size][mat->type]) {
      (project_clip_tab[vec_size][mat->type])(mat, n, from, to, stride,
					      clipMask, orMask, andMask);
a542 28
   else {
      (transform_tab[vec_size][mat->type])(mat, n, from, to, stride);

#ifdef USE_X86_ASM
      asm_cliptest_points4(n, to, clipMask, orMask, andMask);
#else
      if (vec_size < 4 && !TEST_MAT_FLAGS(mat, MAT_FLAGS_3D))
	 vec_size = 4;
      else if (vec_size < 3 && (mat->type != MATRIX_2D ||
				mat->type != MATRIX_2D_NO_ROT))
	 vec_size = 3;
      
      (clip_tab[vec_size])(n, to, clipMask, orMask, andMask);
#endif
   }
}


void gl_transform_points( const GLmatrix *mat,
			  GLuint n,
			  const GLfloat *from, 
			  GLfloat to[][4],
			  GLuint stride,
			  GLuint vec_size)
{
/*    printf("gl_t_p: sz: %d type: %d from: %x stride: %d\n", */
/* 	  vec_size, mat->type, from, stride); fflush(stdout); */
   (transform_tab[vec_size][mat->type])(mat, n, from, to, stride);
@


3.12
log
@new copyright
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.11 1998/11/08 22:37:53 brianp Exp brianp $ */
d30 3
d96 3
@


3.11
log
@fixed a typo
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.10 1998/11/07 14:21:55 brianp Exp brianp $ */
d6 19
a24 15
 * Copyright (C) 1995-1998  Brian Paul
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the Free
 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
d30 3
@


3.10
log
@replaced static array initialization with gl_init_transformation()
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.9 1998/11/07 13:40:07 brianp Exp brianp $ */
d26 3
d1167 1
a1167 1
   clip_tab[4] = cliptest_points;
@


3.9
log
@added a bunch of const and volatile qualifiers
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.8 1998/11/01 20:20:29 brianp Exp brianp $ */
d26 3
d1050 25
a1074 1
static transform_func transform_tab[5][7] = 
d1076 4
d1081 7
a1087 1
   { 0, 0, 0, 0, 0, 0, 0 },
d1090 7
a1096 1
   { 0, 0, 0, 0, 0, 0, 0 },
d1099 7
a1105 9
   {
      transform_points2_general,
      transform_points2_identity,
      transform_points2_3d,          /* no ortho speedup */
      transform_points2_general,     /* no perspective speedup */
      transform_points2_2d,
      transform_points2_2d_no_rot,
      transform_points2_3d
   },
a1107 1
   {
d1109 7
a1115 7
      asm_transform_points3_general,
      asm_transform_points3_identity,
      transform_points3_3d_no_rot,       /* no asm implemented */
      transform_points3_perspective,     /* no asm implemented */
      asm_transform_points3_2d,
      asm_transform_points3_2d_no_rot,
      asm_transform_points3_3d
d1117 1
a1117 1
      /* AMD 3D-Now! functions hook in here */
d1119 7
a1125 7
      transform_points3_general,
      transform_points3_identity,
      transform_points3_3d_no_rot,
      transform_points3_perspective,
      transform_points3_2d,
      transform_points3_2d_no_rot,
      transform_points3_3d
a1126 1
   },
a1128 1
   {
d1130 7
a1136 7
      asm_transform_points4_general,
      asm_transform_points4_identity,
      asm_transform_points4_3d_no_rot,
      asm_transform_points4_perspective,
      asm_transform_points4_2d,
      asm_transform_points4_2d_no_rot,
      asm_transform_points4_3d
d1138 1
a1138 1
      /* AMD 3D-Now! functions hook in here */
d1140 7
a1146 7
      transform_points4_general,
      transform_points4_identity,
      transform_points4_3d_no_rot,
      transform_points4_perspective,
      transform_points4_2d,
      transform_points4_2d_no_rot,
      transform_points4_3d
a1147 2
   }
};
d1150 3
a1152 6
/*
 * Table of optimized cliptest functions.
 * The array index is the vertex size (0D, 1D, 2D, 3D, 4D)
 */
static clip_func clip_tab[5] = 
{
d1154 5
a1158 5
   0,
   0,
   asm_cliptest_points4,
   asm_cliptest_points4,
   asm_cliptest_points4
d1160 5
a1164 5
   0,
   0,
   cliptest_points2,
   cliptest_points3,
   cliptest_points4
a1165 1
};
d1168 3
a1171 7
/*
 * Table of project and clip functions.
 * The first array index is the vertex size (0D, 1D, 2D, 3D, 4D)
 * The second array index is the matrix type (one of MATRIX_* constants)
 */
static project_and_clip_func project_clip_tab[5][7] = 
{
d1173 7
a1179 1
   { 0, 0, 0, 0, 0, 0, 0 },
d1182 7
a1188 1
   { 0, 0, 0, 0, 0, 0, 0 },
d1191 7
a1197 1
   { 0, 0, 0, 0, 0, 0, 0 },
a1199 1
   {
d1201 9
a1209 7
      0,
      0,
      0,
      0,
      0,
      0,
      0
d1211 7
a1217 7
      transform_cliptest_points3_general, 
      0, 
      transform_cliptest_points3_3d_no_rot,
      transform_cliptest_points3_perspective,
      0, 
      0, 
      0
a1218 1
   },
a1220 1
   {
d1222 7
a1228 7
      0,
      0,
      0,
      0,
      0,
      0,
      0
d1230 1
a1230 1
      /* AMD 3D-Now! functions hook in here */
d1232 7
a1238 7
      transform_cliptest_points4_general, 
      0, 
      transform_cliptest_points4_3d_no_rot,
      transform_cliptest_points4_perspective,
      0, 
      0, 
      0
d1240 3
a1242 2
   }
};
d1256 1
a1256 2
   if (project_clip_tab[vec_size][mat->type]) 
   {
d1260 1
a1260 2
   else
   {
@


3.8
log
@updated with Josh's new x86 code
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.7 1998/10/29 04:10:21 brianp Exp $ */
d26 3
d65 2
d239 4
a242 4
   GLfloat m0 = m[0],  m4 = m[4],  m12 = m[12];
   GLfloat m1 = m[1],  m5 = m[5],  m13 = m[13];
   GLfloat m2 = m[2],  m6 = m[6],  m14 = m[14];
   GLfloat m3 = m[3],  m7 = m[7],  m15 = m[15];
d247 1
a247 1
      GLfloat ox = from[0], oy = from[1];
d281 2
a282 2
   GLfloat m0 = m[0], m1 = m[1], m4 = m[4], m5 = m[5];
   GLfloat m12 = m[12], m13 = m[13];
d287 1
a287 1
      GLfloat ox = from[0], oy = from[1];
d303 1
a303 1
   GLfloat m0 = m[0], m5 = m[5], m12 = m[12], m13 = m[13];
d308 1
a308 1
      GLfloat ox = from[0], oy = from[1];
d324 2
a325 2
   GLfloat m0 = m[0], m1 = m[1], m2 = m[2], m4 = m[4], m5 = m[5];
   GLfloat m6 = m[6], m12 = m[12], m13 = m[13], m14 = m[14];
d330 1
a330 1
      GLfloat ox = from[0], oy = from[1];
d347 4
a350 4
   GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8],  m12 = m[12];
   GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9],  m13 = m[13];
   GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10],  m14 = m[14];
   GLfloat m3 = m[3],  m7 = m[7],  m11 = m[11],  m15 = m[15];
d355 1
a355 1
      GLfloat ox = from[0], oy = from[1], oz = from[2];
d390 2
a391 2
   GLfloat m0 = m[0], m1 = m[1], m4 = m[4], m5 = m[5];
   GLfloat m12 = m[12], m13 = m[13];
d396 1
a396 1
      GLfloat ox = from[0], oy = from[1], oz = from[2];
d412 1
a412 1
   GLfloat m0 = m[0], m5 = m[5], m12 = m[12], m13 = m[13];
d417 1
a417 1
      GLfloat ox = from[0], oy = from[1], oz = from[2];
d433 3
a435 3
   GLfloat m0 = m[0], m1 = m[1], m2 = m[2], m4 = m[4], m5 = m[5];
   GLfloat m6 = m[6], m8 = m[8], m9 = m[9], m10 = m[10];
   GLfloat m12 = m[12], m13 = m[13], m14 = m[14];
d440 1
a440 1
      GLfloat ox = from[0], oy = from[1], oz = from[2];
d458 2
a459 2
   GLfloat m0 = m[0], m5 = m[5];
   GLfloat m10 = m[10], m12 = m[12], m13 = m[13], m14 = m[14];
d464 1
a464 1
      GLfloat ox = from[0], oy = from[1], oz = from[2];
d480 2
a481 2
   GLfloat m0 = m[0], m5 = m[5], m8 = m[8], m9 = m[9];
   GLfloat m10 = m[10], m14 = m[14];
d486 1
a486 1
      GLfloat ox = from[0], oy = from[1], oz = from[2];
d504 4
a507 4
   GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8],  m12 = m[12];
   GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9],  m13 = m[13];
   GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10],  m14 = m[14];
   GLfloat m3 = m[3],  m7 = m[7],  m11 = m[11],  m15 = m[15];
d512 1
a512 2
      GLfloat ox = from[0], oy = from[1];
      GLfloat oz = from[2], ow = from[3];
d546 2
a547 2
   GLfloat m0 = m[0], m1 = m[1], m4 = m[4], m5 = m[5];
   GLfloat m12 = m[12], m13 = m[13];
d552 1
a552 2
      GLfloat ox = from[0], oy = from[1];
      GLfloat oz = from[2], ow = from[3];
d568 1
a568 1
   GLfloat m0 = m[0], m5 = m[5], m12 = m[12], m13 = m[13];
d573 1
a573 2
      GLfloat ox = from[0], oy = from[1];
      GLfloat oz = from[2], ow = from[3];
d589 3
a591 3
   GLfloat m0 = m[0], m1 = m[1], m2 = m[2], m4 = m[4], m5 = m[5];
   GLfloat m6 = m[6], m8 = m[8], m9 = m[9], m10 = m[10];
   GLfloat m12 = m[12], m13 = m[13], m14 = m[14];
d596 1
a596 2
      GLfloat ox = from[0], oy = from[1];
      GLfloat oz = from[2], ow = from[3];
d612 2
a613 2
   GLfloat m0 = m[0], m5 = m[5];
   GLfloat m10 = m[10], m12 = m[12], m13 = m[13], m14 = m[14];
d618 1
a618 2
      GLfloat ox = from[0], oy = from[1], oz = from[2];
      GLfloat ow = from[3];
d634 2
a635 2
   GLfloat m0 = m[0], m5 = m[5], m8 = m[8], m9 = m[9];
   GLfloat m10 = m[10], m14 = m[14];
d640 1
a640 2
      GLfloat ox = from[0], oy = from[1], oz = from[2];
      GLfloat ow = from[3];
d663 3
a665 3
   GLfloat m0 = m[0],                          m12 = m[12];
   GLfloat             m5 = m[5],              m13 = m[13];
   GLfloat                         m10 = m[10],  m14 = m[14];
d670 4
a673 4
      GLfloat ox = from[0], oy = from[1], oz = from[2];
      GLfloat cx = m0 * ox                      + m12 ;
      GLfloat cy =           m5 * oy            + m13 ;
      GLfloat cz =                     m10 * oz + m14 ;
d709 3
a711 3
   GLfloat m0 = m[0],              m8 = m[8]              ;
   GLfloat             m5 = m[5],  m9 = m[9]              ;
   GLfloat                         m10 = m[10],  m14 = m[14];
d716 5
a720 5
      GLfloat ox = from[0], oy = from[1], oz = from[2];
      GLfloat cx = m0 * ox           + m8  * oz       ;
      GLfloat cy =           m5 * oy + m9  * oz       ;
      GLfloat cz =                     m10 * oz + m14 ;
      GLfloat cw =                          -oz       ;
d755 4
a758 4
   GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8],  m12 = m[12];
   GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9],  m13 = m[13];
   GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10],  m14 = m[14];
   GLfloat m3 = m[3],  m7 = m[7],  m11 = m[11],  m15 = m[15];
d763 5
a767 5
      GLfloat ox = from[0], oy = from[1], oz = from[2];
      GLfloat cx = m0 * ox + m4 * oy + m8  * oz + m12;
      GLfloat cy = m1 * ox + m5 * oy + m9  * oz + m13;
      GLfloat cz = m2 * ox + m6 * oy + m10 * oz + m14;
      GLfloat cw = m3 * ox + m7 * oy + m11 * oz + m15;
d802 3
a804 3
   GLfloat m0 = m[0],              m8 = m[8]              ;
   GLfloat             m5 = m[5],  m9 = m[9]              ;
   GLfloat                         m10 = m[10],  m14 = m[14];
d809 5
a813 5
      GLfloat ox = from[0], oy = from[1], oz = from[2], ow = from[3];
      GLfloat cx = m0 * ox +           m8  * oz           ;
      GLfloat cy =           m5 * oy + m9  * oz           ;
      GLfloat cz =                     m10 * oz + m14 * ow;
      GLfloat cw =                          -oz           ;
d848 3
a850 3
   GLfloat m0 = m[0],                          m12 = m[12];
   GLfloat             m5 = m[5],              m13 = m[13];
   GLfloat                         m10 = m[10],  m14 = m[14];
d855 5
a859 5
      GLfloat ox = from[0], oy = from[1], oz = from[2], ow = from[3];
      GLfloat cx = m0 * ox +                      m12 * ow;
      GLfloat cy =           m5 * oy +            m13 * ow;
      GLfloat cz =                     m10 * oz + m14 * ow;
      GLfloat cw =                                      ow;
d895 4
a898 4
   GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8],  m12 = m[12];
   GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9],  m13 = m[13];
   GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10],  m14 = m[14];
   GLfloat m3 = m[3],  m7 = m[7],  m11 = m[11],  m15 = m[15];
d903 5
a907 5
      GLfloat ox = from[0], oy = from[1], oz = from[2], ow = from[3];
      GLfloat cx = m0 * ox + m4 * oy + m8  * oz + m12 * ow;
      GLfloat cy = m1 * ox + m5 * oy + m9  * oz + m13 * ow;
      GLfloat cz = m2 * ox + m6 * oy + m10 * oz + m14 * ow;
      GLfloat cw = m3 * ox + m7 * oy + m11 * oz + m15 * ow;
d943 2
a944 2
      GLfloat cx = vClip[i][0], cy = vClip[i][1];
      GLfloat cz = vClip[i][2], cw = vClip[i][3];
d973 1
a973 2
      GLfloat cx = vClip[i][0], cy = vClip[i][1];
      GLfloat cz = vClip[i][2];
d1002 1
a1002 1
      GLfloat cx = vClip[i][0], cy = vClip[i][1];
@


3.7
log
@removed dead code, added placeholders for 3D-Now! code
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.6 1998/10/29 03:57:11 brianp Exp brianp $ */
d26 3
d69 1
d100 2
a101 1
  
d173 1
d1072 1
a1072 1
      asm_transform_points3_identity,    /* fix proto */
d1095 3
a1097 3
      asm_transform_points4_identity,    /* fix proto */
      transform_points4_3d_no_rot,       /* no asm implemented */
      transform_points4_perspective,     /* no asm implemented */
d1122 7
d1134 1
d1157 9
d1173 1
d1179 4
a1182 4
      asm_project_and_cliptest_general,
      asm_project_and_cliptest_identity,      /* fix proto */
      asm_project_and_cliptest_ortho,
      asm_project_and_cliptest_perspective,
d1221 3
d1231 1
@


3.6
log
@misc clean-up of new vertex transformation code
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.5 1998/10/29 02:28:13 brianp Exp brianp $ */
d26 3
a74 179
#if 00
/*
 * Apply a transformation matrix to an array of [X Y Z W] coordinates:
 *   for i in 0 to n-1 do   q[i] = m * p[i]
 * where p[i] and q[i] are 4-element column vectors and m is a 16-element
 * transformation matrix.
 */
void gl_xform_points_4fv( GLuint n, GLfloat q[][4], const GLfloat m[16],
                          GLfloat p[][4] )
{
   /* This function has been carefully crafted to maximize register usage
    * and use loop unrolling with IRIX 5.3's cc.  Hopefully other compilers
    * will like this code too.
    */
   {
      GLuint i;
      GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8],  m12 = m[12];
      GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9],  m13 = m[13];
      if (m12==0.0F && m13==0.0F) {
         /* common case */
         for (i=0;i<n;i++) {
            GLfloat p0 = p[i][0], p1 = p[i][1], p2 = p[i][2];
            q[i][0] = m0 * p0 + m4  * p1 + m8 * p2;
            q[i][1] = m1 * p0 + m5  * p1 + m9 * p2;
         }
      }
      else {
         /* general case */
         for (i=0;i<n;i++) {
            GLfloat p0 = p[i][0], p1 = p[i][1], p2 = p[i][2], p3 = p[i][3];
            q[i][0] = m0 * p0 + m4  * p1 + m8 * p2 + m12 * p3;
            q[i][1] = m1 * p0 + m5  * p1 + m9 * p2 + m13 * p3;
         }
      }
   }
   {
      GLuint i;
      GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10],  m14 = m[14];
      GLfloat m3 = m[3],  m7 = m[7],  m11 = m[11],  m15 = m[15];
      if (m3==0.0F && m7==0.0F && m11==0.0F && m15==1.0F) {
         /* common case */
         for (i=0;i<n;i++) {
            GLfloat p0 = p[i][0], p1 = p[i][1], p2 = p[i][2], p3 = p[i][3];
            q[i][2] = m2 * p0 + m6 * p1 + m10 * p2 + m14 * p3;
            q[i][3] = p3;
         }
      }
      else {
         /* general case */
         for (i=0;i<n;i++) {
            GLfloat p0 = p[i][0], p1 = p[i][1], p2 = p[i][2], p3 = p[i][3];
            q[i][2] = m2 * p0 + m6 * p1 + m10 * p2 + m14 * p3;
            q[i][3] = m3 * p0 + m7 * p1 + m11 * p2 + m15 * p3;
         }
      }
   }
}
#endif


#if 00
/*
 * Apply a transformation matrix to an array of [X Y Z] coordinates:
 *   for i in 0 to n-1 do   q[i] = m * p[i]
 */
void gl_xform_points_3fv( GLuint n, GLfloat q[][4], const GLfloat m[16],
                          GLfloat p[][3] )
{
   /* This function has been carefully crafted to maximize register usage
    * and use loop unrolling with IRIX 5.3's cc.  Hopefully other compilers
    * will like this code too.
    */
   {
      GLuint i;
      GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8],  m12 = m[12];
      GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9],  m13 = m[13];
      for (i=0;i<n;i++) {
         GLfloat p0 = p[i][0], p1 = p[i][1], p2 = p[i][2];
         q[i][0] = m0 * p0 + m4  * p1 + m8 * p2 + m12;
         q[i][1] = m1 * p0 + m5  * p1 + m9 * p2 + m13;
      }
   }
   {
      GLuint i;
      GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10],  m14 = m[14];
      GLfloat m3 = m[3],  m7 = m[7],  m11 = m[11],  m15 = m[15];
      if (m3==0.0F && m7==0.0F && m11==0.0F && m15==1.0F) {
         /* common case */
         for (i=0;i<n;i++) {
            GLfloat p0 = p[i][0], p1 = p[i][1], p2 = p[i][2];
            q[i][2] = m2 * p0 + m6 * p1 + m10 * p2 + m14;
            q[i][3] = 1.0F;
         }
      }
      else {
         /* general case */
         for (i=0;i<n;i++) {
            GLfloat p0 = p[i][0], p1 = p[i][1], p2 = p[i][2];
            q[i][2] = m2 * p0 + m6 * p1 + m10 * p2 + m14;
            q[i][3] = m3 * p0 + m7 * p1 + m11 * p2 + m15;
         }
      }
   }
}
#endif


#if 00
#ifndef USE_X86_ASM
/*
 * Apply a transformation matrix to an array of normal vectors:
 *   for i in 0 to n-1 do  v[i] = u[i] * m
 * where u[i] and v[i] are 3-element row vectors and m is a 16-element
 * transformation matrix.
 * If the normalize flag is true the normals will be scaled to length 1.
 * If the rescale flag is true then do normal rescaling.
 */
void gl_xform_normals_3fv( GLuint n, GLfloat v[][3], const GLfloat m[16],
                           GLfloat u[][3], GLboolean normalize,
                           GLboolean rescale )
{
   if (normalize) {
      /* Transform and normalize */
      /* Doesn't matter if normal rescaling is enabled */
      GLuint i;
      GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8];
      GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9];
      GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10];
      for (i=0;i<n;i++) {
         GLdouble tx, ty, tz;
         {
            GLfloat ux = u[i][0],  uy = u[i][1],  uz = u[i][2];
            tx = ux * m0 + uy * m1 + uz * m2;
            ty = ux * m4 + uy * m5 + uz * m6;
            tz = ux * m8 + uy * m9 + uz * m10;
         }
         {
            GLdouble len, scale;
            len = GL_SQRT( tx*tx + ty*ty + tz*tz );
            scale = (len>1E-30) ? (1.0 / len) : 1.0;
            v[i][0] = tx * scale;
            v[i][1] = ty * scale;
            v[i][2] = tz * scale;
         }
      }
   }
   else if (rescale) {
      /* Transform and rescale */
      GLuint i;
      GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8];
      GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9];
      GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10];
      GLfloat f = GL_SQRT( m2*m2 + m6*m6 + m10*m10 );
      f = (f == 0.0F) ? 1.0F : (1.0F / f);
      for (i=0;i<n;i++) {
         GLfloat ux = u[i][0],  uy = u[i][1],  uz = u[i][2];
         v[i][0] = f * (ux * m0 + uy * m1 + uz * m2);
         v[i][1] = f * (ux * m4 + uy * m5 + uz * m6);
         v[i][2] = f * (ux * m8 + uy * m9 + uz * m10);
         }
   }
   else {
      /* Just transform */
      GLuint i;
      GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8];
      GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9];
      GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10];
      for (i=0;i<n;i++) {
         GLfloat ux = u[i][0],  uy = u[i][1],  uz = u[i][2];
         v[i][0] = ux * m0 + uy * m1 + uz * m2;
         v[i][1] = ux * m4 + uy * m5 + uz * m6;
         v[i][2] = ux * m8 + uy * m9 + uz * m10;
      }
   }
}
#endif
#endif


d174 6
a220 5

/*
 * NOTE:  the units for stride values is a GLfloat
 */

d1064 1
a1064 1
#ifdef USE_X86_ASM
d1072 2
d1087 1
a1087 1
#ifdef USE_X86_ASM
d1095 2
d1154 1
a1154 1
#if USE_X86_ASM
d1162 2
@


3.5
log
@incorporated Keith Whitwell's transformation optimizations
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.4 1998/09/25 03:12:27 brianp Exp brianp $ */
d26 3
d63 1
a270 42






  
/* The functions gl_xform_points_4fv(), gl_xform_points_3fv(),
 * gl_transform_points3() and gl_transform_points4() are superceded by
 * the gl_transform_points() function in matvec.c, but that function
 * (group of functions) has not received the same IRIX optimization
 * attention...  
 */



void gl_normalize_3fv( GLuint n, 
		       const GLfloat *in,
		       GLuint in_stride,
		       GLfloat out[][3] )
{
   GLuint i;
   for ( i = 0 ; i < n ; i++, in += in_stride ) 
   {
      COPY_3V( out[i], in );
      NORMALIZE_3FV( out[i] );
   }
}
  
  
void gl_scale_3fv( GLuint n, 
		   const GLfloat *in,
		   GLuint in_stride,
		   GLfloat out[][3],
		   GLfloat scale)
{
   GLuint i;
   for ( i = 0 ; i < n ; i++, in += in_stride ) 
   {
      SCALE_SCALAR_3V( out[i], scale, in );
   }
}
d285 1
a285 1
			       GLfloat v[][3], 
a296 1

d309 3
a311 3
            v[i][0] = tx * scale;
            v[i][1] = ty * scale;
            v[i][2] = tz * scale;
d325 3
a327 3
         v[i][0] = f * (ux * m0 + uy * m1 + uz * m2);
         v[i][1] = f * (ux * m4 + uy * m5 + uz * m6);
         v[i][2] = f * (ux * m8 + uy * m9 + uz * m10);
d338 754
a1091 3
         v[i][0] = ux * m0 + uy * m1 + uz * m2;
         v[i][1] = ux * m4 + uy * m5 + uz * m6;
         v[i][2] = ux * m8 + uy * m9 + uz * m10;
d1093 32
d1126 261
@


3.4
log
@simplified gl_xform_normals_3fv() per Keith Whitwell
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.3 1998/08/20 04:15:48 brianp Exp brianp $ */
d26 3
d68 1
d125 1
d128 1
a128 1

d172 1
d175 1
a175 1

d244 1
a268 12
/**********************************************************************
 *
 * Mesa 3.1 prototype transformation code
 *
 * Objectives:
 *     Move all functions which may be asm-accelerated out of vbxform.c
 *       into this file.
 *
 *     Add support for arbitrary strides in source coordinates to better
 *       support vertex arrays.
 *
 **********************************************************************/
d272 7
a278 8
/*
 * Use the current modelview matrix to transform XYZ vertices from object
 * to eye coordinates.
 * Input:  ctx - the context
 *         n - number of vertices to transform
 *         stride - stride in bytes between subsequent vObj vertices
 *         vObj - pointer to first vertex (in object coordinates)
 * Output;  vEye - array [n][4] of eye coordinates
d280 22
a301 3
void
gl_transform_points3( const GLcontext *ctx, GLuint n, GLuint stride,
                      const GLfloat *vObj, GLfloat vEye[][4] )
d303 4
a306 84
   ASSERT((stride & 0x3) == 0);  /* multiple of 4 bytes */

   switch (ctx->ModelViewMatrixType) {
      case MATRIX_GENERAL:
         {
            const GLfloat *m = ctx->ModelViewMatrix;
            const GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8],  m12 = m[12];
            const GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9],  m13 = m[13];
            const GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10],  m14 = m[14];
            const GLfloat m3 = m[3],  m7 = m[7],  m11 = m[11],  m15 = m[15];
            GLuint i;
            for (i=0;i<n;i++) {
               const GLfloat ox = vObj[0], oy = vObj[1], oz = vObj[2];
               vEye[i][0] = m0 * ox + m4 * oy + m8  * oz + m12;
               vEye[i][1] = m1 * ox + m5 * oy + m9  * oz + m13;
               vEye[i][2] = m2 * ox + m6 * oy + m10 * oz + m14;
               vEye[i][3] = m3 * ox + m7 * oy + m11 * oz + m15;
               vObj = (const GLfloat *) ((GLubyte *) vObj + stride);
            }
         }
         break;
      case MATRIX_IDENTITY:
         {
            GLuint i;
            for (i=0;i<n;i++) {
               vEye[i][0] = vObj[0];
               vEye[i][1] = vObj[1];
               vEye[i][2] = vObj[2];
               vEye[i][3] = 1.0F;
               vObj = (const GLfloat *) ((GLubyte *) vObj + stride);
            }
         }
         break;
      case MATRIX_2D:
         {
            const GLfloat *m = ctx->ModelViewMatrix;
            const GLfloat m0 = m[0], m1 = m[1], m4 = m[4], m5 = m[5];
            const GLfloat m12 = m[12], m13 = m[13];
            GLuint i;
            for (i=0;i<n;i++) {
               const GLfloat ox = vObj[0], oy = vObj[1], oz = vObj[2];
               vEye[i][0] = m0 * ox + m4 * oy            + m12       ;
               vEye[i][1] = m1 * ox + m5 * oy            + m13       ;
               vEye[i][2] =                   +       oz             ;
               vEye[i][3] =                                      1.0F;
               vObj = (const GLfloat *) ((GLubyte *) vObj + stride);
            }
         }
         break;
      case MATRIX_2D_NO_ROT:
         {
            const GLfloat *m = ctx->ModelViewMatrix;
            const GLfloat m0 = m[0], m5 = m[5], m12 = m[12], m13 = m[13];
            GLuint i;
            for (i=0;i<n;i++) {
               const GLfloat ox = vObj[0], oy = vObj[1], oz = vObj[2];
               vEye[i][0] = m0 * ox                      + m12       ;
               vEye[i][1] =           m5 * oy            + m13       ;
               vEye[i][2] =                   +       oz             ;
               vEye[i][3] =                                      1.0F;
               vObj = (const GLfloat *) ((GLubyte *) vObj + stride);
            }
         }
         break;
      case MATRIX_3D:
         {
            const GLfloat *m = ctx->ModelViewMatrix;
            const GLfloat m0 = m[0], m1 = m[1], m2 = m[2], m4 = m[4];
            const GLfloat m5 = m[5], m6 = m[6], m8 = m[8], m9 = m[9];
            const GLfloat m10 = m[10], m12 = m[12], m13 = m[13], m14 = m[14];
            GLuint i;
            for (i=0;i<n;i++) {
               const GLfloat ox = vObj[0], oy = vObj[1], oz = vObj[2];
               vEye[i][0] = m0 * ox + m4 * oy +  m8 * oz + m12       ;
               vEye[i][1] = m1 * ox + m5 * oy +  m9 * oz + m13       ;
               vEye[i][2] = m2 * ox + m6 * oy + m10 * oz + m14       ;
               vEye[i][3] =                                      1.0F;
               vObj = (const GLfloat *) ((GLubyte *) vObj + stride);
            }
         }
         break;
      default:
         /* should never get here */
         gl_problem( NULL, "invalid matrix type in gl_transform_points3()" );
d309 2
a310 2


d312 6
a317 7
 * Use the current modelview matrix to transform XYZW vertices from object
 * to eye coordinates.
 * Input:  ctx - the context
 *         n - number of vertices to transform
 *         stride - stride in bytes between subsequent vObj vertices
 *         vObj - pointer to first vertex (in object coordinates)
 * Output;  vEye - array [n][4] of eye coordinates
d319 7
a325 3
void
gl_transform_points4( const GLcontext *ctx, GLuint n, GLuint stride,
                      const GLfloat *vObj, GLfloat vEye[][4] )
d327 8
a334 1
   ASSERT((stride & 0x3) == 0);  /* multiple of 4 bytes */
d336 2
a337 2
   switch (ctx->ModelViewMatrixType) {
      case MATRIX_GENERAL:
d339 4
a342 15
            const GLfloat *m = ctx->ModelViewMatrix;
            const GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8],  m12 = m[12];
            const GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9],  m13 = m[13];
            const GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10],  m14 = m[14];
            const GLfloat m3 = m[3],  m7 = m[7],  m11 = m[11],  m15 = m[15];
            GLuint i;
            for (i=0;i<n;i++) {
               const GLfloat ox = vObj[0], oy = vObj[1];
               const GLfloat oz = vObj[2], ow = vObj[3];
               vEye[i][0] = m0 * ox + m4 * oy + m8  * oz + m12 * ow;
               vEye[i][1] = m1 * ox + m5 * oy + m9  * oz + m13 * ow;
               vEye[i][2] = m2 * ox + m6 * oy + m10 * oz + m14 * ow;
               vEye[i][3] = m3 * ox + m7 * oy + m11 * oz + m15 * ow;
               vObj = (const GLfloat *) ((GLubyte *) vObj + stride);
            }
a343 2
         break;
      case MATRIX_IDENTITY:
d345 6
a350 8
            GLuint i;
            for (i=0;i<n;i++) {
               vEye[i][0] = vObj[0];
               vEye[i][1] = vObj[1];
               vEye[i][2] = vObj[2];
               vEye[i][3] = vObj[3];
               vObj = (const GLfloat *) ((GLubyte *) vObj + stride);
            }
d352 29
a380 55
         break;
      case MATRIX_2D:
         {
            const GLfloat *m = ctx->ModelViewMatrix;
            const GLfloat m0 = m[0], m1 = m[1], m4 = m[4], m5 = m[5];
            const GLfloat m12 = m[12], m13 = m[13];
            GLuint i;
            for (i=0;i<n;i++) {
               const GLfloat ox = vObj[0], oy = vObj[1];
               const GLfloat oz = vObj[2], ow = vObj[3];
               vEye[i][0] = m0 * ox + m4 * oy            + m12 * ow;
               vEye[i][1] = m1 * ox + m5 * oy            + m13 * ow;
               vEye[i][2] =                   +       oz           ;
               vEye[i][3] =                                      ow;
               vObj = (const GLfloat *) ((GLubyte *) vObj + stride);
            }
         }
         break;
      case MATRIX_2D_NO_ROT:
         {
            const GLfloat *m = ctx->ModelViewMatrix;
            const GLfloat m0 = m[0], m5 = m[5], m12 = m[12], m13 = m[13];
            GLuint i;
            for (i=0;i<n;i++) {
               const GLfloat ox = vObj[0], oy = vObj[1];
               const GLfloat oz = vObj[2], ow = vObj[3];
               vEye[i][0] = m0 * ox                      + m12 * ow;
               vEye[i][1] =           m5 * oy            + m13 * ow;
               vEye[i][2] =                   +       oz           ;
               vEye[i][3] =                                      ow;
               vObj = (const GLfloat *) ((GLubyte *) vObj + stride);
            }
         }
         break;
      case MATRIX_3D:
         {
            const GLfloat *m = ctx->ModelViewMatrix;
            const GLfloat m0 = m[0], m1 = m[1], m2 = m[2], m4 = m[4];
            const GLfloat m5 = m[5], m6 = m[6], m8 = m[8], m9 = m[9];
            const GLfloat m10 = m[10], m12 = m[12], m13 = m[13], m14 = m[14];
            GLuint i;
            for (i=0;i<n;i++) {
               const GLfloat ox = vObj[0], oy = vObj[1];
               const GLfloat oz = vObj[2], ow = vObj[3];
               vEye[i][0] = m0 * ox + m4 * oy +  m8 * oz + m12 * ow;
               vEye[i][1] = m1 * ox + m5 * oy +  m9 * oz + m13 * ow;
               vEye[i][2] = m2 * ox + m6 * oy + m10 * oz + m14 * ow;
               vEye[i][3] =                                      ow;
               vObj = (const GLfloat *) ((GLubyte *) vObj + stride);
            }
         }
         break;
      default:
         /* should never get here */
         gl_problem( NULL, "invalid matrix type in gl_transform_points4()" );
a382 2


@


3.3
log
@added prototype 3.1 transformation functions
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.2 1998/04/18 05:00:56 brianp Exp brianp $ */
d5 1
a5 1
 * Version:  3.0
d26 3
d184 21
a204 24
      if (rescale) {
         /* Transform normals, rescale and normalize */
         GLuint i;
         GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8];
         GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9];
         GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10];
         GLfloat f = GL_SQRT( m2*m2 + m6*m6 + m10*m10 );
         f = (f == 0.0F) ? 1.0F : (1.0F / f);
         for (i=0;i<n;i++) {
            GLdouble tx, ty, tz;
            {
               GLfloat ux = u[i][0],  uy = u[i][1],  uz = u[i][2];
               tx = f * (ux * m0 + uy * m1 + uz * m2);
               ty = f * (ux * m4 + uy * m5 + uz * m6);
               tz = f * (ux * m8 + uy * m9 + uz * m10);
            }
            {
               GLdouble len, scale;
               len = GL_SQRT( tx*tx + ty*ty + tz*tz );
               scale = (len>1E-30) ? (1.0 / len) : 1.0;
               v[i][0] = tx * scale;
               v[i][1] = ty * scale;
               v[i][2] = tz * scale;
            }
d207 14
a220 22
      else {
         /* Transform and normalize */
         GLuint i;
         GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8];
         GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9];
         GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10];
         for (i=0;i<n;i++) {
            GLdouble tx, ty, tz;
            {
               GLfloat ux = u[i][0],  uy = u[i][1],  uz = u[i][2];
               tx = ux * m0 + uy * m1 + uz * m2;
               ty = ux * m4 + uy * m5 + uz * m6;
               tz = ux * m8 + uy * m9 + uz * m10;
            }
            {
               GLdouble len, scale;
               len = GL_SQRT( tx*tx + ty*ty + tz*tz );
               scale = (len>1E-30) ? (1.0 / len) : 1.0;
               v[i][0] = tx * scale;
               v[i][1] = ty * scale;
               v[i][2] = tz * scale;
            }
a221 1
      }
d224 10
a233 27
      if (rescale) {
         /* Transform and rescale */
         GLuint i;
         GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8];
         GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9];
         GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10];
         GLfloat f = GL_SQRT( m2*m2 + m6*m6 + m10*m10 );
         f = (f == 0.0F) ? 1.0F : (1.0F / f);
         for (i=0;i<n;i++) {
            GLfloat ux = u[i][0],  uy = u[i][1],  uz = u[i][2];
            v[i][0] = f * (ux * m0 + uy * m1 + uz * m2);
            v[i][1] = f * (ux * m4 + uy * m5 + uz * m6);
            v[i][2] = f * (ux * m8 + uy * m9 + uz * m10);
         }
      }
      else {
         /* Just transform */
         GLuint i;
         GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8];
         GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9];
         GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10];
         for (i=0;i<n;i++) {
            GLfloat ux = u[i][0],  uy = u[i][1],  uz = u[i][2];
            v[i][0] = ux * m0 + uy * m1 + uz * m2;
            v[i][1] = ux * m4 + uy * m5 + uz * m6;
            v[i][2] = ux * m8 + uy * m9 + uz * m10;
         }
@


3.2
log
@renamed USE_ASM to USE_X86_ASM
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.1 1998/02/01 16:37:19 brianp Exp brianp $ */
d26 3
d54 1
d285 223
@


3.1
log
@added GL_EXT_rescale_normal extension
@
text
@d1 1
a1 1
/* $Id: xform.c,v 3.0 1998/01/31 21:08:31 brianp Exp brianp $ */
d26 3
d163 1
a163 1
#ifndef USE_ASM
@


3.0
log
@initial rev
@
text
@d1 1
a1 1
/* $Id$ */
d25 4
a28 1
 * $Log$
d167 1
d170 2
a171 1
                           GLfloat u[][3], GLboolean normalize )
d174 48
a221 20
      /* Transform normals and scale to unit length */
      GLuint i;
      GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8];
      GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9];
      GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10];
      for (i=0;i<n;i++) {
         GLdouble tx, ty, tz;
         {
            GLfloat ux = u[i][0],  uy = u[i][1],  uz = u[i][2];
            tx = ux * m0 + uy * m1 + uz * m2;
            ty = ux * m4 + uy * m5 + uz * m6;
            tz = ux * m8 + uy * m9 + uz * m10;
         }
         {
            GLdouble len, scale;
            len = GL_SQRT( tx*tx + ty*ty + tz*tz );
            scale = (len>1E-30) ? (1.0 / len) : 1.0;
            v[i][0] = tx * scale;
            v[i][1] = ty * scale;
            v[i][2] = tz * scale;
d226 27
a252 10
      /* Just transform normals, don't scale */
      GLuint i;
      GLfloat m0 = m[0],  m4 = m[4],  m8 = m[8];
      GLfloat m1 = m[1],  m5 = m[5],  m9 = m[9];
      GLfloat m2 = m[2],  m6 = m[6],  m10 = m[10];
      for (i=0;i<n;i++) {
         GLfloat ux = u[i][0],  uy = u[i][1],  uz = u[i][2];
         v[i][0] = ux * m0 + uy * m1 + uz * m2;
         v[i][1] = ux * m4 + uy * m5 + uz * m6;
         v[i][2] = ux * m8 + uy * m9 + uz * m10;
@
