diff --git a/Makefile b/Makefile index 2c30074..b53ab5d 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ CXXFLAGS = $(CFLAGS) LDLIBS = -lm # /home/relnev/ElectricFence-2.2.2/libefence.a CFLAGS += `sdl-config --cflags` -LDLIBS += `sdl-config --libs` +LDLIBS += -L/usr/X11R6/lib -lX11 -lXext `sdl-config --libs` AFLAGS = -g -Iinclude/ -w+macro-params -w+orphan-labels -w+number-overflow diff --git a/src/include/prototyp.h b/src/include/prototyp.h index d00ac1c..bf965b1 100644 --- a/src/include/prototyp.h +++ b/src/include/prototyp.h @@ -2323,8 +2323,6 @@ int DestroyActiveVDB(VIEWDESCRIPTORBLOCK *dblockptr); void PlatformSpecificVDBInit(VIEWDESCRIPTORBLOCK *vdb); -int SqRoot32(int A); -int SqRoot64(LONGLONGCH *A); /* CDF 4/2/98 */ int GetOneOverSin(int a); /* CDF 4/2/98 */ diff --git a/src/win95/inline.h b/src/win95/inline.h index f09e79a..6054f29 100644 --- a/src/win95/inline.h +++ b/src/win95/inline.h @@ -1215,9 +1215,9 @@ fptmp = (b); \ FloatToInt(); \ a = itmp;} -#else /* other compiler ? */ +#else -/* #error "Unknown compiler" */ +#if 0 void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c); void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a); void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c); @@ -1240,6 +1240,722 @@ void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m); int FloatToInt(float); #define f2i(a, b) { a = FloatToInt(b); } +#endif + +/* ADD */ + +static __inline__ void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c) +{ +/* + _asm + { + mov esi,a + mov edi,b + mov ebx,c + mov eax,[esi] + mov edx,[esi+4] + add eax,[edi] + adc edx,[edi+4] + mov [ebx],eax + mov [ebx+4],edx + } +*/ + +__asm__("movl 0(%%esi), %%eax \n\t" + "movl 4(%%esi), %%edx \n\t" + "addl 0(%%edi), %%eax \n\t" + "adcl 4(%%edi), %%edx \n\t" + "movl %%eax, 0(%%ebx) \n\t" + "movl %%edx, 4(%%ebx) \n\t" + : + : "S" (a), "D" (b), "b" (c) + : "%eax", "%edx", "memory", "cc" + ); + +/* +__asm__("movl 0(%%esi), %%eax \n\t" + "movl 4(%%esi), %%edx \n\t" + "addl 0(%%edi), %%eax \n\t" + "adcl 4(%%edi), %%edx \n\t" + : "=a" (c->lo32), "=d" (c->hi32) + : "S" (a), "D" (b) + ); +*/ +} + +/* ADD ++ */ + +static __inline__ void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a) +{ +/* + _asm + { + mov edi,c + mov esi,a + mov eax,[esi] + mov edx,[esi+4] + add [edi],eax + adc [edi+4],edx + } +*/ +__asm__("movl 0(%%esi), %%eax \n\t" + "movl 4(%%esi), %%edx \n\t" + "addl %%eax, 0(%%edi) \n\t" + "adcl %%edx, 4(%%edi) \n\t" + : + : "D" (c), "S" (a) + : "%eax", "%edx", "memory", "cc" + ); +} + +/* SUB */ + +static __inline__ void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c) +{ +/* + _asm + { + mov esi,a + mov edi,b + mov ebx,c + mov eax,[esi] + mov edx,[esi+4] + sub eax,[edi] + sbb edx,[edi+4] + mov [ebx],eax + mov [ebx+4],edx + } +*/ +__asm__("movl 0(%%esi), %%eax \n\t" + "movl 4(%%esi), %%edx \n\t" + "subl 0(%%edi), %%eax \n\t" + "sbbl 4(%%edi), %%edx \n\t" + "movl %%eax, 0(%%ebx) \n\t" + "movl %%edx, 4(%%ebx) \n\t" + : + : "S" (a), "D" (b), "b" (c) + : "%eax", "%edx", "memory", "cc" + ); +} + +/* SUB -- */ + +static __inline__ void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a) +{ +/* + _asm + { + mov edi,c + mov esi,a + mov eax,[esi] + mov edx,[esi+4] + sub [edi],eax + sbb [edi+4],edx + } +*/ +__asm__("movl 0(%%esi), %%eax \n\t" + "movl 4(%%esi), %%edx \n\t" + "subl %%eax, 0(%%edi) \n\t" + "sbbl %%edx, 4(%%edi) \n\t" + : + : "D" (c), "S" (a) + : "%eax", "%edx", "memory", "cc" + ); +} + +/* + + MUL + + This is the multiply we use, the 32 x 32 = 64 widening version + +*/ + +static __inline__ void MUL_I_WIDE(int a, int b, LONGLONGCH *c) +{ +/* + _asm + { + mov eax,a + mov ebx,c + imul b + mov [ebx],eax + mov [ebx+4],edx + } +*/ +__asm__("imull %0 \n\t" + "movl %%eax, 0(%%ebx) \n\t" + "movl %%edx, 4(%%ebx) \n\t" + : + : "a" (a), "b" (c), "q" (b) + : "%edx", "memory", "cc" + ); +} + +/* + + CMP + + This substitutes for ==, >, <, >=, <= + +*/ + +static __inline__ int CMP_LL(LONGLONGCH *a, LONGLONGCH *b) +{ + int retval; +/* + _asm + { + mov ebx,a + mov ecx,b + mov eax,[ebx] + mov edx,[ebx+4] + sub eax,[ecx] + sbb edx,[ecx+4] + and edx,edx + jne llnz + and eax,eax + je llgs + llnz: + mov retval,1 + and edx,edx + jge llgs + neg retval + llgs: + } +*/ +/* TODO */ +__asm__("xorl %0, %0 \n\t" + "movl 0(%%ebx), %%eax \n\t" + "movl 4(%%ebx), %%edx \n\t" + "subl 0(%%ecx), %%eax \n\t" + "sbbl 4(%%ecx), %%edx \n\t" + "andl %%edx, %%edx \n\t" + "jne llnz \n\t" + "andl %%eax, %%eax \n\t" + "je llgs \n" +"llnz: \n\t" + "movl $1, %0 \n\t" + "andl %%edx, %%edx \n\t" + "jge llgs \n\t" + "negl %0 \n" +"llgs: \n\t" + : "=r" (retval) + : "b" (a), "c" (b) + : "%eax", "%edx", "memory", "cc" + ); + + return retval; +} + +/* EQUALS */ + +static __inline__ void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b) +{ +/* + _asm + { + mov edi,a + mov esi,b + mov eax,[esi] + mov edx,[esi+4] + mov [edi],eax + mov [edi+4],edx + } +*/ +__asm__("movl 0(%%esi), %%eax \n\t" + "movl 4(%%esi), %%edx \n\t" + "movl %%eax, 0(%%edi) \n\t" + "movl %%edx, 4(%%edi) \n\t" + : + : "D" (a), "S" (b) + : "%eax", "%edx", "memory" + ); +} + +/* NEGATE */ + +static __inline__ void NEG_LL(LONGLONGCH *a) +{ +/* + _asm + { + mov esi,a + not dword ptr[esi] + not dword ptr[esi+4] + add dword ptr[esi],1 + adc dword ptr[esi+4],0 + } +*/ +__asm__("notl 0(%%esi) \n\t" + "notl 4(%%esi) \n\t" + "addl $1, 0(%%esi) \n\t" + "adcl $0, 4(%%esi) \n\t" + : + : "S" (a) + : "memory", "cc" + ); +} + +/* ASR */ + +static __inline__ void ASR_LL(LONGLONGCH *a, int shift) +{ +/* + _asm + { + mov esi,a + mov eax,shift + and eax,eax + jle asrdn + asrlp: + sar dword ptr[esi+4],1 + rcr dword ptr[esi],1 + dec eax + jne asrlp + asrdn: + } +*/ +__asm__("andl %%eax, %%eax \n\t" + "jle asrdn \n" +"asrlp: \n\t" + "sarl $1, 4(%%esi) \n\t" + "rcrl $1, 0(%%esi) \n\t" + "decl %%eax \n\t" + "jne asrlp \n" +"asrdn: \n\t" + : + : "S" (a), "a" (shift) + : "memory", "cc" + ); + +} + +/* Convert int to LONGLONGCH */ + +static __inline__ void IntToLL(LONGLONGCH *a, int *b) +{ +/* + _asm + { + mov esi,b + mov edi,a + mov eax,[esi] + cdq + mov [edi],eax + mov [edi+4],edx + } +*/ +__asm__("movl 0(%%esi), %%eax \n\t" + "cdq \n\t" + "movl %%eax, 0(%%edi) \n\t" + "movl %%edx, 4(%%edi) \n\t" + : + : "S" (b), "D" (a) + : "%eax", "%edx", "memory", "cc" + ); + +} + +/* + + Fixed Point Multiply. + + + 16.16 * 16.16 -> 16.16 + or + 16.16 * 0.32 -> 0.32 + + A proper version of this function ought to read + 16.16 * 16.16 -> 32.16 + but this would require a long long result + + Algorithm: + + Take the mid 32 bits of the 64 bit result + +*/ + +/* + These functions have been checked for suitability for + a Pentium and look as if they would work adequately. + Might be worth a more detailed look at optimising + them though. +*/ + +static __inline__ int MUL_FIXED(int a, int b) +{ + int retval; +/* + _asm + { + mov eax,a + imul b + shrd eax,edx,16 + mov retval,eax + } +*/ +/* TODO */ +__asm__("imull %0 \n\t" + "shrdl $16, %%edx, %%eax \n\t" + : "=a" (retval) + : "a" (a), "q" (b) + : "%edx", "cc" + ); + return retval; +} + +/* + + Fixed Point Divide - returns a / b + +*/ + +static __inline__ int DIV_FIXED(int a, int b) +{ + int retval; +/* + _asm + { + mov eax,a + cdq + rol eax,16 + mov dx,ax + xor ax,ax + idiv b + mov retval,eax + } +*/ +/* TODO */ +__asm__("cdq \n\t" + "roll $16, %%eax \n\t" + "mov %%ax, %%dx \n\t" + "xor %%ax, %%ax \n\t" + "idivl %0 \n\t" + : "=a" (retval) + : "a" (a), "q" (b) + : "%edx", "cc" + ); + return retval; +} + +/* + + Multiply and Divide Functions. + +*/ + + +/* + + 32/32 division + + This macro is a function on some other platforms + +*/ + +#define DIV_INT(a, b) ((a) / (b)) + +/* + + A Narrowing 64/32 Division + +*/ + +static __inline__ int NarrowDivide(LONGLONGCH *a, int b) +{ + int retval; +/* + _asm + { + mov esi,a + mov eax,[esi] + mov edx,[esi+4] + idiv b + mov retval,eax + } +*/ +__asm__("movl 0(%%esi), %%eax \n\t" + "movl 4(%%esi), %%edx \n\t" + "idivl %0 \n\t" + : "=a" (retval) + : "S" (a), "q" (b) + : "%edx", "cc" + ); + return retval; +} + +/* + + This function performs a Widening Multiply followed by a Narrowing Divide. + + a = (a * b) / c + +*/ + +static __inline__ int WideMulNarrowDiv(int a, int b, int c) +{ + int retval; +/* + _asm + { + mov eax,a + imul b + idiv c + mov retval,eax + } +*/ +/* TODO */ +__asm__("imull %0 \n\t" + "idivl %1 \n\t" + : "=a" (retval) + : "a" (a), "q" (b), "q" (c) + : "cc" + ); + return retval; +} + +/* + + Function to rotate a VECTORCH using a MATRIXCH + + This is the C function + + x = MUL_FIXED(m->mat11, v->vx); + x += MUL_FIXED(m->mat21, v->vy); + x += MUL_FIXED(m->mat31, v->vz); + + y = MUL_FIXED(m->mat12, v->vx); + y += MUL_FIXED(m->mat22, v->vy); + y += MUL_FIXED(m->mat32, v->vz); + + z = MUL_FIXED(m->mat13, v->vx); + z += MUL_FIXED(m->mat23, v->vy); + z += MUL_FIXED(m->mat33, v->vz); + + v->vx = x; + v->vy = y; + v->vz = z; + + This is the MUL_FIXED inline assembler function + + imul edx + shrd eax,edx,16 + + +typedef struct matrixch { + + int mat11; 0 + int mat12; 4 + int mat13; 8 + + int mat21; 12 + int mat22; 16 + int mat23; 20 + + int mat31; 24 + int mat32; 28 + int mat33; 32 + +} MATRIXCH; + +*/ + +#if 0 /* TODO if these are needed */ +static void RotateVector_ASM(VECTORCH *v, MATRIXCH *m) +{ + _asm + { + mov esi,v + mov edi,m + + mov eax,[edi + 0] + imul DWORD PTR [esi + 0] + shrd eax,edx,16 + mov ecx,eax + mov eax,[edi + 12] + imul DWORD PTR [esi + 4] + shrd eax,edx,16 + add ecx,eax + mov eax,[edi + 24] + imul DWORD PTR [esi + 8] + shrd eax,edx,16 + add ecx,eax + + mov eax,[edi + 4] + imul DWORD PTR [esi + 0] + shrd eax,edx,16 + mov ebx,eax + mov eax,[edi + 16] + imul DWORD PTR [esi + 4] + shrd eax,edx,16 + add ebx,eax + mov eax,[edi + 28] + imul DWORD PTR [esi + 8] + shrd eax,edx,16 + add ebx,eax + + mov eax,[edi + 8] + imul DWORD PTR [esi + 0] + shrd eax,edx,16 + mov ebp,eax + mov eax,[edi + 20] + imul DWORD PTR [esi + 4] + shrd eax,edx,16 + add ebp,eax + mov eax,[edi + 32] + imul DWORD PTR [esi + 8] + shrd eax,edx,16 + add ebp,eax + + mov [esi + 0],ecx + mov [esi + 4],ebx + mov [esi + 8],ebp + } +} + +/* + + Here is the same function, this time copying the result to a second vector + +*/ + +static void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m) +{ + _asm + { + mov esi,v1 + mov edi,m + + mov eax,[edi + 0] + imul DWORD PTR [esi + 0] + shrd eax,edx,16 + mov ecx,eax + mov eax,[edi + 12] + imul DWORD PTR [esi + 4] + shrd eax,edx,16 + add ecx,eax + mov eax,[edi + 24] + imul DWORD PTR [esi + 8] + shrd eax,edx,16 + add ecx,eax + + mov eax,[edi + 4] + imul DWORD PTR [esi + 0] + shrd eax,edx,16 + mov ebx,eax + mov eax,[edi + 16] + imul DWORD PTR [esi + 4] + shrd eax,edx,16 + add ebx,eax + mov eax,[edi + 28] + imul DWORD PTR [esi + 8] + shrd eax,edx,16 + add ebx,eax + + mov eax,[edi + 8] + imul DWORD PTR [esi + 0] + shrd eax,edx,16 + mov ebp,eax + mov eax,[edi + 20] + imul DWORD PTR [esi + 4] + shrd eax,edx,16 + add ebp,eax + mov eax,[edi + 32] + imul DWORD PTR [esi + 8] + shrd eax,edx,16 + add ebp,eax + + mov edx,v2 + mov [edx + 0],ecx + mov [edx + 4],ebx + mov [edx + 8],ebp + } +} +#endif + +#if (SupportFPMathsFunctions || SupportFPSquareRoot) + +/* + + Square Root + + Returns the Square Root of a 32-bit number + +*/ + +extern int sqrt_temp1; +extern int sqrt_temp2; + +static __inline__ int SqRoot32(int A) +{ + sqrt_temp1 = A; +/* + _asm + { + finit + fild A + fsqrt + fistp temp2 + fwait + } +*/ + +__asm__("finit \n\t" + "fild sqrt_temp1 \n\t" + "fsqrt \n\t" + "fistp sqrt_temp2 \n\t" + "fwait \n\t" + : + : + : "memory", "cc" + ); + + return sqrt_temp2; +} + +#endif + + +/* + + This may look ugly (it is) but it is a MUCH faster way to convert "float" into "int" than + the function call "CHP" used by the WATCOM compiler. + +*/ + +extern float fti_fptmp; +extern int fti_itmp; + +static __inline__ int FloatToInt(float fptmp) +{ + fti_fptmp = fptmp; +/* + _asm + { + fld fptmp + fistp itmp + } +*/ +__asm__("fld fti_fptmp \n\t" + "fistp fti_itmp \n\t" + : + : + : "memory", "cc" + ); + + return fti_itmp; +} + +/* + + This macro makes usage of the above function easier and more elegant + +*/ + +#define f2i(a, b) { \ +a = FloatToInt(b); \ +} #endif diff --git a/src/win95/plspecfn.c b/src/win95/plspecfn.c index 26c9527..0efb61e 100644 --- a/src/win95/plspecfn.c +++ b/src/win95/plspecfn.c @@ -18,6 +18,11 @@ #include "kshape.h" #endif +/* globals from inline.h */ +int sqrt_temp1; +int sqrt_temp2; +float fti_fptmp; +int fti_itmp; /* @@ -513,88 +518,6 @@ int WideMul2NarrowDiv(int a, int b, int c, int d, int e) } - - - -/* - - Square Root - - Returns the Square Root of a 32-bit number - -*/ - -#if (SupportFPMathsFunctions || SupportFPSquareRoot) -#else - - -int SqRoot32(int A) - -{ - - unsigned int edx = A; - unsigned int ecx; - - unsigned int ax = 0; - unsigned int bx = 0; - unsigned int di = 0; - - - for(ecx = 15; ecx!=0; ecx--) { - - bx <<= 1; - if(edx & 0x80000000) bx |= 1; - edx <<= 1; - - bx <<= 1; - if(edx & 0x80000000) bx |= 1; - edx <<= 1; - - ax += ax; - di = ax; - di += di; - - if(bx > di) { - - di++; - ax++; - - bx -= di; - - } - - } - - bx <<= 1; - if(edx & 0x80000000) bx |= 1; - edx <<= 1; - - bx <<= 1; - if(edx & 0x80000000) bx |= 1; - edx <<= 1; - - ax += ax; - di = ax; - di += di; - - if(bx > di) { - - ax++; - - } - - return ((int)ax); - -} - - -#endif /* SupportFPMathsFunctions */ - - - - - - /* Calculate Plane Normal from three POP's @@ -1115,99 +1038,6 @@ int Magnitude(VECTORCH *v) } - - - - - - - - - -/* - - 64-bit Square Root returns 32-bit result - - All 64-bit operations are now done using the type LONGLONGCH whose format - varies from platform to platform, although it is always 64-bits in size. - - NOTE: - - Function currently not available to Watcom C users - A Floating point version is STRONGLY advised for the PC anyway - -*/ - -#if 0 -int SqRoot64(LONGLONGCH *A) - -{ - -#if 0 - - unsigned long long edx = *A; - - unsigned int eax = 0; - unsigned int ebx = 0; - unsigned int edi = 0; - - unsigned int ecx; - - - unsigned long long TopBit = 0x8000000000000000LL; - - for(ecx = 31; ecx != 0; ecx--) { - - ebx <<= 1; - if(edx & TopBit) ebx |= 1; - edx <<= 1; - - ebx <<= 1; - if(edx & TopBit) ebx |= 1; - edx <<= 1; - - eax += eax; - edi = eax; - edi += edi; - - if(ebx > edi) { - - edi++; - eax++; - ebx -= edi; - - } - - } - - ebx <<= 1; - if(edx & TopBit) ebx |= 1; - edx <<= 1; - - ebx <<= 1; - if(edx & TopBit) ebx |= 1; - edx <<= 1; - - eax += eax; - edi = eax; - edi += edi; - - if(ebx > edi) { - - eax++; - - } - - return eax; - -#endif - - return (0); - -} - -#endif /* for #if 0 */ - /* Shift the 64-bit value until is LTE the limit