Implemented (most of) the inline assembly in inline.h

2001-07-29 18:25:45 +00:00 · 2001-07-29 18:25:45 +00:00 · 5c497c61a6
commit 5c497c61a6
parent 44d4752e83
4 changed files with 724 additions and 180 deletions
--- a/2
+++ b/2
@ -6,7 +6,7 @@ CXXFLAGS = $(CFLAGS)
 LDLIBS = -lm # /home/relnev/ElectricFence-2.2.2/libefence.a
 CFLAGS += `sdl-config --cflags`
-LDLIBS += `sdl-config --libs`
+LDLIBS += -L/usr/X11R6/lib -lX11 -lXext `sdl-config --libs`
 AFLAGS = -g -Iinclude/ -w+macro-params -w+orphan-labels -w+number-overflow
--- a/src/include/prototyp.h
+++ b/src/include/prototyp.h
@ -2323,8 +2323,6 @@ int DestroyActiveVDB(VIEWDESCRIPTORBLOCK *dblockptr);
 void PlatformSpecificVDBInit(VIEWDESCRIPTORBLOCK *vdb);
 int SqRoot32(int A);
 int SqRoot64(LONGLONGCH *A);
 /* CDF 4/2/98 */
 int GetOneOverSin(int a);
 /* CDF 4/2/98 */
--- a/src/win95/inline.h
+++ b/src/win95/inline.h
@ -1215,9 +1215,9 @@ fptmp = (b); \
 FloatToInt(); \
 a = itmp;}
-#else /* other compiler ? */
+#else
-/* #error "Unknown compiler" */
+#if 0
 void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c);
 void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a);
 void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c);
@ -1240,6 +1240,722 @@ void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m);
 int FloatToInt(float);
 #define f2i(a, b) { a = FloatToInt(b); }
 #endif
 /* ADD */
 static __inline__ void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c)
 {
 /*
 	_asm
 	{
 		mov esi,a
 		mov edi,b
 		mov ebx,c
 		mov	eax,[esi]
 		mov	edx,[esi+4]
 		add	eax,[edi]
 		adc	edx,[edi+4]
 		mov	[ebx],eax
 		mov	[ebx+4],edx
 	}
 */
 __asm__("movl	0(%%esi), %%eax		\n\t"
 	"movl	4(%%esi), %%edx		\n\t"
 	"addl	0(%%edi), %%eax		\n\t"
 	"adcl	4(%%edi), %%edx		\n\t"
 	"movl	%%eax, 0(%%ebx)		\n\t"
 	"movl	%%edx, 4(%%ebx)		\n\t"
 	: 
 	: "S" (a), "D" (b), "b" (c)
 	: "%eax", "%edx", "memory", "cc"
 	);
 /*
 __asm__("movl	0(%%esi), %%eax		\n\t"
 	"movl	4(%%esi), %%edx		\n\t"
 	"addl	0(%%edi), %%eax		\n\t"
 	"adcl	4(%%edi), %%edx		\n\t"
 	: "=a" (c->lo32), "=d" (c->hi32)
 	: "S" (a), "D" (b)
 	);
 */
 }
 /* ADD ++ */
 static __inline__ void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a)
 {
 /*
 	_asm
 	{
 		mov edi,c
 		mov esi,a
 		mov	eax,[esi]
 		mov	edx,[esi+4]
 		add	[edi],eax
 		adc	[edi+4],edx
 	}
 */
 __asm__("movl	0(%%esi), %%eax		\n\t"
 	"movl	4(%%esi), %%edx		\n\t"
 	"addl	%%eax, 0(%%edi)		\n\t"
 	"adcl	%%edx, 4(%%edi)		\n\t"
 	:
 	: "D" (c), "S" (a)
 	: "%eax", "%edx", "memory", "cc"
 	);
 }
 /* SUB */
 static __inline__ void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c)
 {
 /*
 	_asm
 	{
 		mov esi,a
 		mov edi,b
 		mov ebx,c
 		mov	eax,[esi]
 		mov	edx,[esi+4]
 		sub	eax,[edi]
 		sbb	edx,[edi+4]
 		mov	[ebx],eax
 		mov	[ebx+4],edx
 	}
 */
 __asm__("movl	0(%%esi), %%eax		\n\t"
 	"movl	4(%%esi), %%edx		\n\t"
 	"subl	0(%%edi), %%eax		\n\t"
 	"sbbl	4(%%edi), %%edx		\n\t"
 	"movl	%%eax, 0(%%ebx)		\n\t"
 	"movl	%%edx, 4(%%ebx)		\n\t"
 	:
 	: "S" (a), "D" (b), "b" (c)
 	: "%eax", "%edx", "memory", "cc"
 	);
 }
 /* SUB -- */
 static __inline__ void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a)
 {
 /*
 	_asm
 	{
 		mov edi,c
 		mov esi,a
 		mov	eax,[esi]
 		mov	edx,[esi+4]
 		sub	[edi],eax
 		sbb	[edi+4],edx
 	}
 */
 __asm__("movl	0(%%esi), %%eax		\n\t"
 	"movl	4(%%esi), %%edx		\n\t"
 	"subl	%%eax, 0(%%edi)		\n\t"
 	"sbbl	%%edx, 4(%%edi)		\n\t"
 	:
 	: "D" (c), "S" (a)
 	: "%eax", "%edx", "memory", "cc"
 	);
 }
 /*
 MUL
 This is the multiply we use, the 32 x 32 = 64 widening version
 */
 static __inline__ void MUL_I_WIDE(int a, int b, LONGLONGCH *c)
 {
 /*
 	_asm
 	{
 		mov eax,a
 		mov ebx,c
 		imul b
 		mov	[ebx],eax
 		mov	[ebx+4],edx
 	}
 */
 __asm__("imull	%0			\n\t"
 	"movl	%%eax, 0(%%ebx)		\n\t"
 	"movl	%%edx, 4(%%ebx)		\n\t"
 	:
 	: "a" (a), "b" (c), "q" (b)
 	: "%edx", "memory", "cc"
 	);
 }
 /*
 CMP
 This substitutes for ==, >, <, >=, <=
 */
 static __inline__ int CMP_LL(LONGLONGCH *a, LONGLONGCH *b)
 {
 	int retval;
 /*
 	_asm
 	{
 		mov ebx,a
 		mov ecx,b
 		mov	eax,[ebx]
 		mov	edx,[ebx+4]
 		sub	eax,[ecx]
 		sbb	edx,[ecx+4]
 		and	edx,edx
 		jne	llnz
 		and	eax,eax
 		je	llgs
 		llnz:
 		mov	retval,1
 		and	edx,edx
 		jge	llgs
 		neg	retval
 		llgs:
 	}
 */
 /* TODO */
 __asm__("xorl	%0, %0			\n\t"
 	"movl	0(%%ebx), %%eax		\n\t"
 	"movl	4(%%ebx), %%edx		\n\t"
 	"subl	0(%%ecx), %%eax		\n\t"
 	"sbbl	4(%%ecx), %%edx		\n\t"
 	"andl	%%edx, %%edx		\n\t"
 	"jne	llnz			\n\t"
 	"andl	%%eax, %%eax		\n\t"
 	"je	llgs			\n"
 "llnz:					\n\t"
 	"movl	$1, %0			\n\t"
 	"andl	%%edx, %%edx		\n\t"
 	"jge	llgs			\n\t"
 	"negl	%0			\n"
 "llgs:					\n\t"
 	: "=r" (retval)
 	: "b" (a), "c" (b)
 	: "%eax", "%edx", "memory", "cc"
 	);
 	return retval;
 }
 /* EQUALS */
 static __inline__ void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b)
 {
 /*
 	_asm
 	{
 		mov edi,a
 		mov esi,b
 		mov	eax,[esi]
 		mov	edx,[esi+4]
 		mov	[edi],eax
 		mov	[edi+4],edx
 	}
 */
 __asm__("movl	0(%%esi), %%eax		\n\t"
 	"movl	4(%%esi), %%edx		\n\t"
 	"movl	%%eax, 0(%%edi)		\n\t"
 	"movl	%%edx, 4(%%edi)		\n\t"
 	:
 	: "D" (a), "S" (b)
 	: "%eax", "%edx", "memory"
 	);
 }
 /* NEGATE */
 static __inline__ void NEG_LL(LONGLONGCH *a)
 {
 /*
 	_asm
 	{
 		mov esi,a
 		not	dword ptr[esi]
 		not	dword ptr[esi+4]
 		add	dword ptr[esi],1
 		adc	dword ptr[esi+4],0
 	}
 */
 __asm__("notl	0(%%esi)		\n\t"
 	"notl	4(%%esi)		\n\t"
 	"addl	$1, 0(%%esi)		\n\t"
 	"adcl	$0, 4(%%esi)		\n\t"
 	:
 	: "S" (a)
 	: "memory", "cc"
 	);
 }
 /* ASR */
 static __inline__ void ASR_LL(LONGLONGCH *a, int shift)
 {
 /*
 	_asm
 	{
 		mov esi,a
 		mov eax,shift
 		and	eax,eax
 		jle	asrdn
 		asrlp:
 		sar	dword ptr[esi+4],1
 		rcr	dword ptr[esi],1
 		dec	eax
 		jne	asrlp
 		asrdn:
 	}
 */
 __asm__("andl	%%eax, %%eax		\n\t"
 	"jle	asrdn			\n"
 "asrlp:					\n\t"
 	"sarl	$1, 4(%%esi)		\n\t"
 	"rcrl	$1, 0(%%esi)		\n\t"
 	"decl	%%eax			\n\t"
 	"jne	asrlp			\n"
 "asrdn:					\n\t"
 	:
 	: "S" (a), "a" (shift)
 	: "memory", "cc"
 	);
 }
 /* Convert int to LONGLONGCH */
 static __inline__ void IntToLL(LONGLONGCH *a, int *b)
 {
 /*
 	_asm
 	{
 		mov esi,b
 		mov edi,a
 		mov	eax,[esi]
 		cdq
 		mov	[edi],eax
 		mov	[edi+4],edx
 	}
 */
 __asm__("movl	0(%%esi), %%eax		\n\t"
 	"cdq				\n\t"
 	"movl	%%eax, 0(%%edi)		\n\t"
 	"movl	%%edx, 4(%%edi)		\n\t"
 	:
 	: "S" (b), "D" (a)
 	: "%eax", "%edx", "memory", "cc"
 	);
 }
 /*
 Fixed Point Multiply.
 16.16 * 16.16 -> 16.16
 or
 16.16 * 0.32 -> 0.32
 A proper version of this function ought to read
 16.16 * 16.16 -> 32.16
 but this would require a long long result
 Algorithm:
 Take the mid 32 bits of the 64 bit result
 */
 /*
 	These functions have been checked for suitability for 
 	a Pentium and look as if they would work adequately.
 	Might be worth a more detailed look at optimising
 	them though.
 */
 static __inline__ int MUL_FIXED(int a, int b)
 {
 	int retval;
 /*
 	_asm
 	{
 		mov eax,a
 		imul b
 		shrd eax,edx,16
 		mov retval,eax
 	}
 */
 /* TODO */
 __asm__("imull	%0			\n\t"
 	"shrdl	$16, %%edx, %%eax	\n\t"
 	: "=a" (retval)
 	: "a" (a), "q" (b)
 	: "%edx", "cc"
 	);
 	return retval;
 }
 /*
 Fixed Point Divide - returns a / b
 */
 static __inline__ int DIV_FIXED(int a, int b)
 {
 	int retval;
 /*
 	_asm
 	{
 		mov eax,a
 		cdq
 		rol eax,16
 		mov dx,ax
 		xor ax,ax
 		idiv b
 		mov retval,eax
 	}
 */
 /* TODO */
 __asm__("cdq				\n\t"
 	"roll	$16, %%eax		\n\t"
 	"mov	%%ax, %%dx		\n\t"
 	"xor	%%ax, %%ax		\n\t"
 	"idivl	%0			\n\t"
 	: "=a" (retval)
 	: "a" (a), "q" (b)
 	: "%edx", "cc"
 	);
 	return retval;
 }
 /*
 Multiply and Divide Functions.
 */
 /*
 32/32 division
 This macro is a function on some other platforms
 */
 #define DIV_INT(a, b) ((a) / (b))
 /*
 A Narrowing 64/32 Division
 */
 static __inline__ int NarrowDivide(LONGLONGCH *a, int b)
 {
 	int retval;
 /*
 	_asm
 	{
 		mov esi,a
 		mov	eax,[esi]
 		mov	edx,[esi+4]
 		idiv	b
 		mov retval,eax
 	}
 */
 __asm__("movl	0(%%esi), %%eax		\n\t"
 	"movl	4(%%esi), %%edx		\n\t"
 	"idivl	%0			\n\t"
 	: "=a" (retval)
 	: "S" (a), "q" (b)
 	: "%edx", "cc"
 	);
 	return retval;
 }
 /*
 This function performs a Widening Multiply followed by a Narrowing Divide.
 a = (a * b) / c
 */
 static __inline__ int WideMulNarrowDiv(int a, int b, int c)
 {
 	int retval;
 /*
 	_asm
 	{
 		mov eax,a
 		imul b
 		idiv c
 		mov retval,eax
 	}
 */
 /* TODO */
 __asm__("imull	%0			\n\t"
 	"idivl	%1			\n\t"
 	: "=a" (retval)
 	: "a" (a), "q" (b), "q" (c)
 	: "cc"
 	);	
 	return retval;
 }
 /*
 Function to rotate a VECTORCH using a MATRIXCH
 This is the C function
 	x =  MUL_FIXED(m->mat11, v->vx);
 	x += MUL_FIXED(m->mat21, v->vy);
 	x += MUL_FIXED(m->mat31, v->vz);
 	y  = MUL_FIXED(m->mat12, v->vx);
 	y += MUL_FIXED(m->mat22, v->vy);
 	y += MUL_FIXED(m->mat32, v->vz);
 	z  = MUL_FIXED(m->mat13, v->vx);
 	z += MUL_FIXED(m->mat23, v->vy);
 	z += MUL_FIXED(m->mat33, v->vz);
 	v->vx = x;
 	v->vy = y;
 	v->vz = z;
 This is the MUL_FIXED inline assembler function
 	imul edx
 	shrd eax,edx,16
 typedef struct matrixch {
 	int mat11;	0
 	int mat12;	4
 	int mat13;	8
 	int mat21;	12
 	int mat22;	16
 	int mat23;	20
 	int mat31;	24
 	int mat32;	28
 	int mat33;	32
 } MATRIXCH;
 */
 #if 0 /* TODO if these are needed */
 static void RotateVector_ASM(VECTORCH *v, MATRIXCH *m)
 {
 	_asm
 	{
 		mov esi,v
 		mov edi,m
 		mov	eax,[edi + 0]
 		imul	DWORD PTR [esi + 0]
 		shrd	eax,edx,16
 		mov	ecx,eax
 		mov	eax,[edi + 12]
 		imul	DWORD PTR [esi + 4]
 		shrd	eax,edx,16
 		add	ecx,eax
 		mov	eax,[edi + 24]
 		imul	DWORD PTR [esi + 8]
 		shrd	eax,edx,16
 		add	ecx,eax
 		mov	eax,[edi + 4]
 		imul	DWORD PTR [esi + 0]
 		shrd	eax,edx,16
 		mov	ebx,eax
 		mov	eax,[edi + 16]
 		imul	DWORD PTR [esi + 4]
 		shrd	eax,edx,16
 		add	ebx,eax
 		mov	eax,[edi + 28]
 		imul	DWORD PTR [esi + 8]
 		shrd	eax,edx,16
 		add	ebx,eax
 		mov	eax,[edi + 8]
 		imul	DWORD PTR [esi + 0]
 		shrd	eax,edx,16
 		mov	ebp,eax
 		mov	eax,[edi + 20]
 		imul	DWORD PTR [esi + 4]
 		shrd	eax,edx,16
 		add	ebp,eax
 		mov	eax,[edi + 32]
 		imul	DWORD PTR [esi + 8]
 		shrd	eax,edx,16
 		add	ebp,eax
 		mov	[esi + 0],ecx
 		mov	[esi + 4],ebx
 		mov	[esi + 8],ebp
 	}
 }
 /*
 Here is the same function, this time copying the result to a second vector
 */
 static void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m)
 {
 	_asm
 	{
 		mov esi,v1
 		mov edi,m
 		mov	eax,[edi + 0]
 		imul	DWORD PTR [esi + 0]
 		shrd	eax,edx,16
 		mov	ecx,eax
 		mov	eax,[edi + 12]
 		imul	DWORD PTR [esi + 4]
 		shrd	eax,edx,16
 		add	ecx,eax
 		mov	eax,[edi + 24]
 		imul	DWORD PTR [esi + 8]
 		shrd	eax,edx,16
 		add	ecx,eax
 		mov	eax,[edi + 4]
 		imul	DWORD PTR [esi + 0]
 		shrd	eax,edx,16
 		mov	ebx,eax
 		mov	eax,[edi + 16]
 		imul	DWORD PTR [esi + 4]
 		shrd	eax,edx,16
 		add	ebx,eax
 		mov	eax,[edi + 28]
 		imul	DWORD PTR [esi + 8]
 		shrd	eax,edx,16
 		add	ebx,eax
 		mov	eax,[edi + 8]
 		imul	DWORD PTR [esi + 0]
 		shrd	eax,edx,16
 		mov	ebp,eax
 		mov	eax,[edi + 20]
 		imul	DWORD PTR [esi + 4]
 		shrd	eax,edx,16
 		add	ebp,eax
 		mov	eax,[edi + 32]
 		imul	DWORD PTR [esi + 8]
 		shrd	eax,edx,16
 		add	ebp,eax
 		mov edx,v2
 		mov	[edx + 0],ecx
 		mov	[edx + 4],ebx
 		mov	[edx + 8],ebp
 	}
 }
 #endif
 #if (SupportFPMathsFunctions || SupportFPSquareRoot)
 /*
 Square Root
 Returns the Square Root of a 32-bit number
 */
 extern int sqrt_temp1;
 extern int sqrt_temp2;
 static __inline__ int SqRoot32(int A)
 {
 	sqrt_temp1 = A;
 /*
 	_asm
 	{
 		finit
 		fild A
 		fsqrt
 		fistp temp2
 		fwait
 	}
 */
 __asm__("finit				\n\t"
 	"fild	sqrt_temp1		\n\t"
 	"fsqrt				\n\t"
 	"fistp	sqrt_temp2		\n\t"
 	"fwait				\n\t"
 	:
 	:
 	: "memory", "cc"
 	);
 	return sqrt_temp2;
 }
 #endif
 /*
 This may look ugly (it is) but it is a MUCH faster way to convert "float" into "int" than
 the function call "CHP" used by the WATCOM compiler.
 */
 extern float fti_fptmp;
 extern int fti_itmp;
 static __inline__ int FloatToInt(float fptmp)
 {
 	fti_fptmp = fptmp;
 /*
 	_asm
 	{
 		fld fptmp
 		fistp itmp
 	}
 */
 __asm__("fld	fti_fptmp		\n\t"
 	"fistp	fti_itmp		\n\t"
 	:
 	:
 	: "memory", "cc"
 	);
 	return fti_itmp;
 }
 /*
 This macro makes usage of the above function easier and more elegant
 */
 #define f2i(a, b) { \
 a = FloatToInt(b); \
 }
 #endif
--- a/src/win95/plspecfn.c
+++ b/src/win95/plspecfn.c
@ -18,6 +18,11 @@
 #include "kshape.h"
 #endif
 /* globals from inline.h */
 int sqrt_temp1;
 int sqrt_temp2;
 float fti_fptmp;
 int fti_itmp;
 /*
@ -513,88 +518,6 @@ int WideMul2NarrowDiv(int a, int b, int c, int d, int e)
 }
 /*
 Square Root
 Returns the Square Root of a 32-bit number
 */
 #if (SupportFPMathsFunctions || SupportFPSquareRoot)
 #else
 int SqRoot32(int A)
 {
 	unsigned int edx = A;
 	unsigned int ecx;
 	unsigned int ax = 0;
 	unsigned int bx = 0;
 	unsigned int di = 0;
 	for(ecx = 15; ecx!=0; ecx--) {
 		bx <<= 1;
 		if(edx & 0x80000000) bx |= 1;
 		edx <<= 1;
 		bx <<= 1;
 		if(edx & 0x80000000) bx |= 1;
 		edx <<= 1;
 		ax += ax;
 		di =  ax;
 		di += di;
 		if(bx > di) {
 			di++;
 			ax++;
 			bx -= di;
 		}
 	}
 	bx <<= 1;
 	if(edx & 0x80000000) bx |= 1;
 	edx <<= 1;
 	bx <<= 1;
 	if(edx & 0x80000000) bx |= 1;
 	edx <<= 1;
 	ax += ax;
 	di =  ax;
 	di += di;
 	if(bx > di) {
 		ax++;
 	}
 	return ((int)ax);
 }
 #endif	/* SupportFPMathsFunctions */
 /*
 Calculate Plane Normal from three POP's
@ -1115,99 +1038,6 @@ int Magnitude(VECTORCH *v)
 }
 /*
 64-bit Square Root returns 32-bit result
 All 64-bit operations are now done using the type LONGLONGCH whose format
 varies from platform to platform, although it is always 64-bits in size.
 NOTE:
 Function currently not available to Watcom C users
 A Floating point version is STRONGLY advised for the PC anyway
 */
 #if 0
 int SqRoot64(LONGLONGCH *A)
 {
 #if 0
 	unsigned long long edx = *A;
 	unsigned int eax = 0;
 	unsigned int ebx = 0;
 	unsigned int edi = 0;
 	unsigned int ecx;
 	unsigned long long TopBit = 0x8000000000000000LL;
 	for(ecx = 31; ecx != 0; ecx--) {
 		ebx <<= 1;
 		if(edx & TopBit) ebx |= 1;
 		edx <<= 1;
 		ebx <<= 1;
 		if(edx & TopBit) ebx |= 1;
 		edx <<= 1;
 		eax += eax;
 		edi  = eax;
 		edi += edi;
 		if(ebx > edi) {
 			edi++;
 			eax++;
 			ebx -= edi;
 		}
 	}
 	ebx <<= 1;
 	if(edx & TopBit) ebx |= 1;
 	edx <<= 1;
 	ebx <<= 1;
 	if(edx & TopBit) ebx |= 1;
 	edx <<= 1;
 	eax += eax;
 	edi  = eax;
 	edi += edi;
 	if(ebx > edi) {
 		eax++;
 	}
 	return eax;
 #endif
 	return (0);
 }
 #endif /* for #if 0 */
 /*
 Shift the 64-bit value until is LTE the limit