#include "3dc.h" void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c); void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a); void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c); void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a); void MUL_I_WIDE(int a, int b, LONGLONGCH *c); int CMP_LL(LONGLONGCH *a, LONGLONGCH *b); void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b); void NEG_LL(LONGLONGCH *a); void ASR_LL(LONGLONGCH *a, int shift); void IntToLL(LONGLONGCH *a, int *b); int MUL_FIXED(int a, int b); int DIV_FIXED(int a, int b); #define DIV_INT(a, b) ((a) / (b)) int NarrowDivide(LONGLONGCH *a, int b); int WideMulNarrowDiv(int a, int b, int c); void RotateVector_ASM(VECTORCH *v, MATRIXCH *m); void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m); #if 0 int FloatToInt(float); #define f2i(a, b) { a = FloatToInt(b); } #endif void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c) { /* _asm { mov esi,a mov edi,b mov ebx,c mov eax,[esi] mov edx,[esi+4] add eax,[edi] adc edx,[edi+4] mov [ebx],eax mov [ebx+4],edx } */ int dummy1, dummy2; __asm__("movl 0(%%esi), %0 \n\t" "movl 4(%%esi), %1 \n\t" "addl 0(%%edi), %0 \n\t" "adcl 4(%%edi), %1 \n\t" "movl %0, 0(%%ebx) \n\t" "movl %1, 4(%%ebx) \n\t" : "=&r" (dummy1), "=&r" (dummy2) : "S" (a), "D" (b), "b" (c) : "memory", "cc" ); /* __asm__("movl 0(%%esi), %%eax \n\t" "movl 4(%%esi), %%edx \n\t" "addl 0(%%edi), %%eax \n\t" "adcl 4(%%edi), %%edx \n\t" : "=a" (c->lo32), "=d" (c->hi32) : "S" (a), "D" (b) ); */ } /* ADD ++ */ void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a) { /* _asm { mov edi,c mov esi,a mov eax,[esi] mov edx,[esi+4] add [edi],eax adc [edi+4],edx } */ int dummy1, dummy2; __asm__("movl 0(%%esi), %0 \n\t" "movl 4(%%esi), %1 \n\t" "addl %0, 0(%%edi) \n\t" "adcl %1, 4(%%edi) \n\t" : "=&r" (dummy1), "=&r" (dummy2) : "D" (c), "S" (a) : "memory", "cc" ); } /* SUB */ void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c) { /* _asm { mov esi,a mov edi,b mov ebx,c mov eax,[esi] mov edx,[esi+4] sub eax,[edi] sbb edx,[edi+4] mov [ebx],eax mov [ebx+4],edx } */ int dummy1, dummy2; __asm__("movl 0(%%esi), %0 \n\t" "movl 4(%%esi), %1 \n\t" "subl 0(%%edi), %0 \n\t" "sbbl 4(%%edi), %1 \n\t" "movl %0, 0(%%ebx) \n\t" "movl %1, 4(%%ebx) \n\t" : "=&r" (dummy1), "=&r" (dummy2) : "S" (a), "D" (b), "b" (c) : "memory", "cc" ); } /* SUB -- */ void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a) { /* _asm { mov edi,c mov esi,a mov eax,[esi] mov edx,[esi+4] sub [edi],eax sbb [edi+4],edx } */ int dummy1, dummy2; __asm__("movl 0(%%esi), %0 \n\t" "movl 4(%%esi), %1 \n\t" "subl %0, 0(%%edi) \n\t" "sbbl %1, 4(%%edi) \n\t" : "=&r" (dummy1), "=&r" (dummy2) : "D" (c), "S" (a) : "memory", "cc" ); } /* MUL This is the multiply we use, the 32 x 32 = 64 widening version */ void MUL_I_WIDE(int a, int b, LONGLONGCH *c) { /* _asm { mov eax,a mov ebx,c imul b mov [ebx],eax mov [ebx+4],edx } */ unsigned int d1; __asm__("imull %3 \n\t" "movl %%eax, 0(%%ebx) \n\t" "movl %%edx, 4(%%ebx) \n\t" : "=a" (d1) : "0" (a), "b" (c), "m" (b) : "%edx", "memory", "cc" ); } /* CMP This substitutes for ==, >, <, >=, <= */ int CMP_LL(LONGLONGCH *a, LONGLONGCH *b) { /* int retval; _asm { mov ebx,a mov ecx,b mov eax,[ebx] mov edx,[ebx+4] sub eax,[ecx] sbb edx,[ecx+4] and edx,edx jne llnz and eax,eax je llgs llnz: mov retval,1 and edx,edx jge llgs neg retval llgs: } */ #if 1 int retval; __asm__("movl 0(%%ebx), %%eax \n\t" "movl 4(%%ebx), %%edx \n\t" "subl 0(%%ecx), %%eax \n\t" "sbbl 4(%%ecx), %%edx \n\t" "xorl %%ebx, %%ebx \n\t" "andl %%edx, %%edx \n\t" "jne 0f \n\t" /* llnz */ "andl %%eax, %%eax \n\t" "je 1f \n" /* llgs */ "0: \n\t" /* llnz */ "movl $1, %%ebx \n\t" "andl %%edx, %%edx \n\t" "jge 1f \n\t" /* llgs */ "negl %%ebx \n" "1: \n\t" /* llgs */ : "=b" (retval) : "b" (a), "c" (b) : "%eax", "%edx", "memory", "cc" ); return retval; #else if (a->hi32 > b->hi32) return 1; else if (a->hi32 < b->hi32) return -1; else if (a->lo32 > b->lo32) return 1; else if (a->lo32 < b->lo32) return -1; else return 0; #endif } /* EQUALS */ void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b) { /* _asm { mov edi,a mov esi,b mov eax,[esi] mov edx,[esi+4] mov [edi],eax mov [edi+4],edx } */ #if 0 __asm__("movl 0(%%esi), %%eax \n\t" "movl 4(%%esi), %%edx \n\t" "movl %%eax, 0(%%edi) \n\t" "movl %%edx, 4(%%edi) \n\t" : : "D" (a), "S" (b) : "%eax", "%edx", "memory" ); #else *a = *b; #endif } /* NEGATE */ void NEG_LL(LONGLONGCH *a) { /* _asm { mov esi,a not dword ptr[esi] not dword ptr[esi+4] add dword ptr[esi],1 adc dword ptr[esi+4],0 } */ __asm__("notl 0(%%esi) \n\t" "notl 4(%%esi) \n\t" "addl $1, 0(%%esi) \n\t" "adcl $0, 4(%%esi) \n\t" : : "S" (a) : "memory", "cc" ); } /* ASR */ void ASR_LL(LONGLONGCH *a, int shift) { /* _asm { mov esi,a mov eax,shift and eax,eax jle asrdn asrlp: sar dword ptr[esi+4],1 rcr dword ptr[esi],1 dec eax jne asrlp asrdn: } */ unsigned int d1; __asm__ volatile ("andl %0, %0 \n\t" "jle 0 \n" /* asrdn */ "1: \n\t" /* asrlp */ "sarl $1, 4(%%esi) \n\t" "rcrl $1, 0(%%esi) \n\t" "decl %0 \n\t" "jne 1 \n" "0: \n\t" : "=&r" (d1) : "S" (a), "a" (shift) : "memory", "cc" ); } /* Convert int to LONGLONGCH */ void IntToLL(LONGLONGCH *a, int *b) { /* _asm { mov esi,b mov edi,a mov eax,[esi] cdq mov [edi],eax mov [edi+4],edx } */ __asm__("movl 0(%%esi), %%eax \n\t" "cdq \n\t" "movl %%eax, 0(%%edi) \n\t" "movl %%edx, 4(%%edi) \n\t" : : "S" (b), "D" (a) : "%eax", "%edx", "memory", "cc" ); } /* Fixed Point Multiply. 16.16 * 16.16 -> 16.16 or 16.16 * 0.32 -> 0.32 A proper version of this function ought to read 16.16 * 16.16 -> 32.16 but this would require a long long result Algorithm: Take the mid 32 bits of the 64 bit result */ /* These functions have been checked for suitability for a Pentium and look as if they would work adequately. Might be worth a more detailed look at optimising them though. */ int MUL_FIXED(int a, int b) { int retval; /* _asm { mov eax,a imul b shrd eax,edx,16 mov retval,eax } */ __asm__("imull %2 \n\t" "shrdl $16, %%edx, %%eax \n\t" : "=a" (retval) : "0" (a), "m" (b) : "%edx", "cc" ); return retval; } /* Fixed Point Divide - returns a / b */ int DIV_FIXED(int a, int b) { int retval; if (b == 0) printf("DEBUG THIS: a = %d, b = %d\n", a, b); if (b == 0) return 0; /* TODO: debug this! (start with alien on ferarco) */ /* _asm { mov eax,a cdq rol eax,16 mov dx,ax xor ax,ax idiv b mov retval,eax } */ __asm__("cdq \n\t" "roll $16, %%eax \n\t" "mov %%ax, %%dx \n\t" "xor %%ax, %%ax \n\t" "idivl %2 \n\t" : "=a" (retval) : "0" (a), "m" (b) : "%edx", "cc" ); return retval; } /* Multiply and Divide Functions. */ /* 32/32 division This macro is a function on some other platforms */ #define DIV_INT(a, b) ((a) / (b)) /* A Narrowing 64/32 Division */ int NarrowDivide(LONGLONGCH *a, int b) { int retval; /* _asm { mov esi,a mov eax,[esi] mov edx,[esi+4] idiv b mov retval,eax } */ __asm__("movl 0(%%esi), %%eax \n\t" "movl 4(%%esi), %%edx \n\t" "idivl %2 \n\t" : "=a" (retval) : "S" (a), "m" (b) : "%edx", "cc" ); return retval; } /* This function performs a Widening Multiply followed by a Narrowing Divide. a = (a * b) / c */ int WideMulNarrowDiv(int a, int b, int c) { int retval; /* _asm { mov eax,a imul b idiv c mov retval,eax } */ __asm__("imull %2 \n\t" "idivl %3 \n\t" : "=a" (retval) : "0" (a), "m" (b), "m" (c) : "%edx", "cc" ); return retval; } /* Function to rotate a VECTORCH using a MATRIXCH This is the C function x = MUL_FIXED(m->mat11, v->vx); x += MUL_FIXED(m->mat21, v->vy); x += MUL_FIXED(m->mat31, v->vz); y = MUL_FIXED(m->mat12, v->vx); y += MUL_FIXED(m->mat22, v->vy); y += MUL_FIXED(m->mat32, v->vz); z = MUL_FIXED(m->mat13, v->vx); z += MUL_FIXED(m->mat23, v->vy); z += MUL_FIXED(m->mat33, v->vz); v->vx = x; v->vy = y; v->vz = z; This is the MUL_FIXED inline assembler function imul edx shrd eax,edx,16 typedef struct matrixch { int mat11; 0 int mat12; 4 int mat13; 8 int mat21; 12 int mat22; 16 int mat23; 20 int mat31; 24 int mat32; 28 int mat33; 32 } MATRIXCH; */ /* Square Root Returns the Square Root of a 32-bit number */ extern volatile int sqrt_temp; int SqRoot32(int A) { /* _asm { finit fild A fsqrt fistp temp2 fwait } */ __asm__ volatile ("finit \n\t" "fildl %0 \n\t" "fsqrt \n\t" "fistpl sqrt_temp \n\t" "fwait \n\t" : : "m" (A) : "memory", "cc" ); return sqrt_temp; } /* This may look ugly (it is) but it is a MUCH faster way to convert "float" into "int" than the function call "CHP" used by the WATCOM compiler. */ extern volatile float fti_fptmp; extern volatile int fti_itmp; void FloatToInt() { #if 1 __asm__ volatile ("flds fti_fptmp \n\t" "fistpl fti_itmp \n\t" : : : "memory", "cc" ); #else fti_itmp = (int)fti_fptmp; #endif }