avp/src/mathline.c
Steven Fuller 0e484b73da disabled some more assembly.
trying out passing NULL to SDL_GL_LoadLibrary by default.
2019-08-20 02:22:37 +02:00

633 lines
9.1 KiB
C

#include "3dc.h"
void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c);
void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a);
void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c);
void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a);
void MUL_I_WIDE(int a, int b, LONGLONGCH *c);
int CMP_LL(LONGLONGCH *a, LONGLONGCH *b);
void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b);
void NEG_LL(LONGLONGCH *a);
void ASR_LL(LONGLONGCH *a, int shift);
void IntToLL(LONGLONGCH *a, int *b);
int MUL_FIXED(int a, int b);
int DIV_FIXED(int a, int b);
#define DIV_INT(a, b) ((a) / (b))
int NarrowDivide(LONGLONGCH *a, int b);
int WideMulNarrowDiv(int a, int b, int c);
void RotateVector_ASM(VECTORCH *v, MATRIXCH *m);
void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m);
#if 0
int FloatToInt(float);
#define f2i(a, b) { a = FloatToInt(b); }
#endif
void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c)
{
/*
_asm
{
mov esi,a
mov edi,b
mov ebx,c
mov eax,[esi]
mov edx,[esi+4]
add eax,[edi]
adc edx,[edi+4]
mov [ebx],eax
mov [ebx+4],edx
}
*/
int dummy1, dummy2;
__asm__("movl 0(%%esi), %0 \n\t"
"movl 4(%%esi), %1 \n\t"
"addl 0(%%edi), %0 \n\t"
"adcl 4(%%edi), %1 \n\t"
"movl %0, 0(%%ebx) \n\t"
"movl %1, 4(%%ebx) \n\t"
: "=&r" (dummy1), "=&r" (dummy2)
: "S" (a), "D" (b), "b" (c)
: "memory", "cc"
);
/*
__asm__("movl 0(%%esi), %%eax \n\t"
"movl 4(%%esi), %%edx \n\t"
"addl 0(%%edi), %%eax \n\t"
"adcl 4(%%edi), %%edx \n\t"
: "=a" (c->lo32), "=d" (c->hi32)
: "S" (a), "D" (b)
);
*/
}
/* ADD ++ */
void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a)
{
/*
_asm
{
mov edi,c
mov esi,a
mov eax,[esi]
mov edx,[esi+4]
add [edi],eax
adc [edi+4],edx
}
*/
int dummy1, dummy2;
__asm__("movl 0(%%esi), %0 \n\t"
"movl 4(%%esi), %1 \n\t"
"addl %0, 0(%%edi) \n\t"
"adcl %1, 4(%%edi) \n\t"
: "=&r" (dummy1), "=&r" (dummy2)
: "D" (c), "S" (a)
: "memory", "cc"
);
}
/* SUB */
void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c)
{
/*
_asm
{
mov esi,a
mov edi,b
mov ebx,c
mov eax,[esi]
mov edx,[esi+4]
sub eax,[edi]
sbb edx,[edi+4]
mov [ebx],eax
mov [ebx+4],edx
}
*/
int dummy1, dummy2;
__asm__("movl 0(%%esi), %0 \n\t"
"movl 4(%%esi), %1 \n\t"
"subl 0(%%edi), %0 \n\t"
"sbbl 4(%%edi), %1 \n\t"
"movl %0, 0(%%ebx) \n\t"
"movl %1, 4(%%ebx) \n\t"
: "=&r" (dummy1), "=&r" (dummy2)
: "S" (a), "D" (b), "b" (c)
: "memory", "cc"
);
}
/* SUB -- */
void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a)
{
/*
_asm
{
mov edi,c
mov esi,a
mov eax,[esi]
mov edx,[esi+4]
sub [edi],eax
sbb [edi+4],edx
}
*/
int dummy1, dummy2;
__asm__("movl 0(%%esi), %0 \n\t"
"movl 4(%%esi), %1 \n\t"
"subl %0, 0(%%edi) \n\t"
"sbbl %1, 4(%%edi) \n\t"
: "=&r" (dummy1), "=&r" (dummy2)
: "D" (c), "S" (a)
: "memory", "cc"
);
}
/*
MUL
This is the multiply we use, the 32 x 32 = 64 widening version
*/
void MUL_I_WIDE(int a, int b, LONGLONGCH *c)
{
/*
_asm
{
mov eax,a
mov ebx,c
imul b
mov [ebx],eax
mov [ebx+4],edx
}
*/
unsigned int d1;
__asm__("imull %3 \n\t"
"movl %%eax, 0(%%ebx) \n\t"
"movl %%edx, 4(%%ebx) \n\t"
: "=a" (d1)
: "0" (a), "b" (c), "m" (b)
: "%edx", "memory", "cc"
);
}
/*
CMP
This substitutes for ==, >, <, >=, <=
*/
int CMP_LL(LONGLONGCH *a, LONGLONGCH *b)
{
/*
int retval;
_asm
{
mov ebx,a
mov ecx,b
mov eax,[ebx]
mov edx,[ebx+4]
sub eax,[ecx]
sbb edx,[ecx+4]
and edx,edx
jne llnz
and eax,eax
je llgs
llnz:
mov retval,1
and edx,edx
jge llgs
neg retval
llgs:
}
*/
#if 0
int retval;
__asm__("movl 0(%%ebx), %%eax \n\t"
"movl 4(%%ebx), %%edx \n\t"
"subl 0(%%ecx), %%eax \n\t"
"sbbl 4(%%ecx), %%edx \n\t"
"xorl %%ebx, %%ebx \n\t"
"andl %%edx, %%edx \n\t"
"jne 0f \n\t" /* llnz */
"andl %%eax, %%eax \n\t"
"je 1f \n" /* llgs */
"0: \n\t" /* llnz */
"movl $1, %%ebx \n\t"
"andl %%edx, %%edx \n\t"
"jge 1f \n\t" /* llgs */
"negl %%ebx \n"
"1: \n\t" /* llgs */
: "=b" (retval)
: "b" (a), "c" (b)
: "%eax", "%edx", "memory", "cc"
);
return retval;
#else
if (a->hi32 > b->hi32)
return 1;
else if (a->hi32 < b->hi32)
return -1;
else if (a->lo32 > b->lo32)
return 1;
else if (a->lo32 < b->lo32)
return -1;
else
return 0;
#endif
}
/* EQUALS */
void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b)
{
/*
_asm
{
mov edi,a
mov esi,b
mov eax,[esi]
mov edx,[esi+4]
mov [edi],eax
mov [edi+4],edx
}
*/
#if 0
__asm__("movl 0(%%esi), %%eax \n\t"
"movl 4(%%esi), %%edx \n\t"
"movl %%eax, 0(%%edi) \n\t"
"movl %%edx, 4(%%edi) \n\t"
:
: "D" (a), "S" (b)
: "%eax", "%edx", "memory"
);
#else
*a = *b;
#endif
}
/* NEGATE */
void NEG_LL(LONGLONGCH *a)
{
/*
_asm
{
mov esi,a
not dword ptr[esi]
not dword ptr[esi+4]
add dword ptr[esi],1
adc dword ptr[esi+4],0
}
*/
__asm__("notl 0(%%esi) \n\t"
"notl 4(%%esi) \n\t"
"addl $1, 0(%%esi) \n\t"
"adcl $0, 4(%%esi) \n\t"
:
: "S" (a)
: "memory", "cc"
);
}
/* ASR */
void ASR_LL(LONGLONGCH *a, int shift)
{
/*
_asm
{
mov esi,a
mov eax,shift
and eax,eax
jle asrdn
asrlp:
sar dword ptr[esi+4],1
rcr dword ptr[esi],1
dec eax
jne asrlp
asrdn:
}
*/
unsigned int d1;
__asm__ volatile
("andl %0, %0 \n\t"
"jle 0 \n" /* asrdn */
"1: \n\t" /* asrlp */
"sarl $1, 4(%%esi) \n\t"
"rcrl $1, 0(%%esi) \n\t"
"decl %0 \n\t"
"jne 1 \n"
"0: \n\t"
: "=&r" (d1)
: "S" (a), "a" (shift)
: "memory", "cc"
);
}
/* Convert int to LONGLONGCH */
void IntToLL(LONGLONGCH *a, int *b)
{
/*
_asm
{
mov esi,b
mov edi,a
mov eax,[esi]
cdq
mov [edi],eax
mov [edi+4],edx
}
*/
__asm__("movl 0(%%esi), %%eax \n\t"
"cdq \n\t"
"movl %%eax, 0(%%edi) \n\t"
"movl %%edx, 4(%%edi) \n\t"
:
: "S" (b), "D" (a)
: "%eax", "%edx", "memory", "cc"
);
}
/*
Fixed Point Multiply.
16.16 * 16.16 -> 16.16
or
16.16 * 0.32 -> 0.32
A proper version of this function ought to read
16.16 * 16.16 -> 32.16
but this would require a long long result
Algorithm:
Take the mid 32 bits of the 64 bit result
*/
/*
These functions have been checked for suitability for
a Pentium and look as if they would work adequately.
Might be worth a more detailed look at optimising
them though.
*/
int MUL_FIXED(int a, int b)
{
int retval;
/*
_asm
{
mov eax,a
imul b
shrd eax,edx,16
mov retval,eax
}
*/
__asm__("imull %2 \n\t"
"shrdl $16, %%edx, %%eax \n\t"
: "=a" (retval)
: "0" (a), "m" (b)
: "%edx", "cc"
);
return retval;
}
/*
Fixed Point Divide - returns a / b
*/
int DIV_FIXED(int a, int b)
{
int retval;
if (b == 0) printf("DEBUG THIS: a = %d, b = %d\n", a, b);
if (b == 0) return 0; /* TODO: debug this! (start with alien on ferarco) */
/*
_asm
{
mov eax,a
cdq
rol eax,16
mov dx,ax
xor ax,ax
idiv b
mov retval,eax
}
*/
__asm__("cdq \n\t"
"roll $16, %%eax \n\t"
"mov %%ax, %%dx \n\t"
"xor %%ax, %%ax \n\t"
"idivl %2 \n\t"
: "=a" (retval)
: "0" (a), "m" (b)
: "%edx", "cc"
);
return retval;
}
/*
Multiply and Divide Functions.
*/
/*
32/32 division
This macro is a function on some other platforms
*/
#define DIV_INT(a, b) ((a) / (b))
/*
A Narrowing 64/32 Division
*/
int NarrowDivide(LONGLONGCH *a, int b)
{
int retval;
/*
_asm
{
mov esi,a
mov eax,[esi]
mov edx,[esi+4]
idiv b
mov retval,eax
}
*/
__asm__("movl 0(%%esi), %%eax \n\t"
"movl 4(%%esi), %%edx \n\t"
"idivl %2 \n\t"
: "=a" (retval)
: "S" (a), "m" (b)
: "%edx", "cc"
);
return retval;
}
/*
This function performs a Widening Multiply followed by a Narrowing Divide.
a = (a * b) / c
*/
int WideMulNarrowDiv(int a, int b, int c)
{
int retval;
/*
_asm
{
mov eax,a
imul b
idiv c
mov retval,eax
}
*/
__asm__("imull %2 \n\t"
"idivl %3 \n\t"
: "=a" (retval)
: "0" (a), "m" (b), "m" (c)
: "%edx", "cc"
);
return retval;
}
/*
Function to rotate a VECTORCH using a MATRIXCH
This is the C function
x = MUL_FIXED(m->mat11, v->vx);
x += MUL_FIXED(m->mat21, v->vy);
x += MUL_FIXED(m->mat31, v->vz);
y = MUL_FIXED(m->mat12, v->vx);
y += MUL_FIXED(m->mat22, v->vy);
y += MUL_FIXED(m->mat32, v->vz);
z = MUL_FIXED(m->mat13, v->vx);
z += MUL_FIXED(m->mat23, v->vy);
z += MUL_FIXED(m->mat33, v->vz);
v->vx = x;
v->vy = y;
v->vz = z;
This is the MUL_FIXED inline assembler function
imul edx
shrd eax,edx,16
typedef struct matrixch {
int mat11; 0
int mat12; 4
int mat13; 8
int mat21; 12
int mat22; 16
int mat23; 20
int mat31; 24
int mat32; 28
int mat33; 32
} MATRIXCH;
*/
/*
Square Root
Returns the Square Root of a 32-bit number
*/
volatile int sqrt_temp;
int SqRoot32(int A)
{
/*
_asm
{
finit
fild A
fsqrt
fistp temp2
fwait
}
*/
#if 0
__asm__ volatile
("finit \n\t"
"fildl %0 \n\t"
"fsqrt \n\t"
"fistpl sqrt_temp \n\t"
"fwait \n\t"
:
: "m" (A)
: "memory", "cc"
);
return sqrt_temp;
#else
return sqrt( (float)A );
#endif
}
/*
This may look ugly (it is) but it is a MUCH faster way to convert "float" into "int" than
the function call "CHP" used by the WATCOM compiler.
*/
volatile float fti_fptmp;
volatile int fti_itmp;
void FloatToInt()
{
#if 0
__asm__ volatile
("flds fti_fptmp \n\t"
"fistpl fti_itmp \n\t"
:
:
: "memory", "cc"
);
#else
fti_itmp = (int)fti_fptmp;
#endif
}