;----------------------------------------------------------------------------
;File name:	URAn_LFE.S			Revision date:	1995.10.07
;Revised by:	Ulf Ronald Andersson		Revision start:	1995.06.15
;----------------------------------------------------------------------------
;URAn Copyright:
;I release my revisions to this line-F emulator as FREEWARE, without any
;restrictions whatsoever, which leaves the original copyrights unaltered.
;----------------------------------------------------------------------------
;URAn Disclaimer:
;I am not to be held responsible in any way whatever for any consequences
;of use, nonuse, misuse, or abuse of this (or indeed any other) software.
;----------------------------------------------------------------------------
;	Assembly parameter definitions
;
program_XB_id	=	'LFED'
fpu_68881	=	$68881
fpu_68882	=	$68882
;
fpu_type	=	fpu_68882	;change this to your FPU type
fpu_offset	=	0		;change this if SFP_004 modified
;
line_F_only	=	1		;FPU cookie that hides SFP
line_F_and_SFP	=	$10001		;normal FPU cookie
;
cook_v		=	line_F_and_SFP	;choose actual FPU cookie to install
;
;----------------------------------------------------------------------------
;URAn revisions:
;
;1:	Changed source to be DevPac compatible.
;
;2:	Changed source to use URAn_xxx lib-files (why reinvent the wheel ?),
;	and included those used into the new package:
;
;	URAn_SYS.S	System vectors, variables, structures, hardware etc.
;	URAn_DOS.S	Macros for bios, xbios and gemdos.
;	URAn_XB.S	Macros for XBRA manipulation of all kinds.
;	URAn_JAR.S	Macros for cookie jar manipulation of all kinds.
;	
;
;3:	Translated diverse texts, comments and labels from the original
;	german to english.
;
;4:	Added hardware test for fpu presence.	(URAn_XB.S)
;	This temporarily links a new buserror handler using XBRA,
;	then accesses fpu which gives bus error if none present.
;
;5:	Added XBRA test for chained vectors.	(URAn_XB.S)
;	(Original only tested first chain link.)
;
;6:	Added creation of missing cookie.	(URAn_JAR.S)
;	This also creates cookie jar, if missing, and in that case also
;	installs a reset routine to remove the jar on warm reset.
;
;7:	Changed cookie storage code to use a symbolic constant, so it is
;	easy to mask the SFP indicator, to force programs to use line_F,
;	NB: This is extremely important in case of a 68882 fpu, since most
;	    programs will mistake it for a 68881.  That usually causes the
;	    computer to 'lock up' in an eternal loop awaiting responses
;	    the 68881 gives that differ from those of the 68882.
;
;8:	Added assembly constant defining offset to fpu address,
;	so modified SFP-004 boards can use 68882 without being recognized
;	by other softwares (which are usually not 68882-compatible).
;
;9:	Added conditional assembly clauses for 68882 compatibility.
;	(Should work, but is not yet fully tested !!!)
;
;10:	Corrected several serious bugs that crippled all except one
;	of the addressing modes of the FMOVEM instruction, which now
;	should be fully, and correctly, implemented.
;	NB: Some of the errors could adjust sp incorrectly !!!
;	The only fully correct mode was: "FMOVEM ea,FPn" ; where ea was
;	one of the 'control' modes.
;
;11:	Added local stack, partly to release unused RAM first in program,
;	but mainly to implement local stack for the line_F emulator.
;	Thus line_F codes can now be used also in supervisor mode.
;	This lost some of the speed increase gained elsewhere, but is worth
;	it, since speed critical programs may prefer supervisor mode for
;	heavy calculations to disable multitasking. Also, privileged opcodes
;	(such as FSAVE and FRESTORE) are always run in supervisor mode.
;	(Older LFED would bomb with such programs).
;
;12:	Added alternative emulator XBRA header, linked only for TOS older
;	than TOS 1.06, which tests for TOS-patch calls like LFED 1.00 did.
;	So the program will now work with both new and old TOS, without
;	sacrificing speed in the new TOS.  Useful for switched-TOS systems.
;	Actually there are two such 'extra' XBRA headers, which allows old
;	TOS to be recognized in RAM as well as in ROM.
;	NB: I didn't combine them, because the RAM test is slower (2 cmp's).
;
;13:	Implemented FSAVE, hopefully completely, except for not generating
;	exceptions for all illegal address modes (instead most are allowed).
;	NB: The same limited error checking was used throughout original.
;
;14:	Implemented FRESTORE, to the same extent as FSAVE.
;
;15:	Tweaked most of the 'specials' by restructuring bit tests, and
;	also by eliminating the 'extra' read of the response register before
;	proceeding with next opcode, since the FPU is always in	the 'idle'
;	state when it gives conditional results anyway.
;	Also abolished erroneous treatment of IA-bit as if it were CA_bit.
;
;16:	Corrected FBcc.L bug (seems to be editing mistake) which caused it
;	to jump 2 bytes short, even when branch not taken.
;	NB: I checked the source of all three preceding versions, and found
;	    that this bug was unchanged since 1.00, where some code had been
;	    inserted _before_, instead of _after_ a label, so it never ran.
;
;17:	Implemented FTRAPcc partially, so that this instruction does not
;	cause any exception unless the condition 'cc' is true.
;	Earlier LFED versions always made exception (=> program aborted)
;	for FTRAPcc instructions, without even evaluating the condition.
;
;18:	Corrected FDBcc bug, that caused the countdown data register to be
;	decremented even when the condition was true.
;
;19:	Changed the FPU reset of the error handler to FRESTORE a NULL frame,
;	instead of simply writing to the control CIR.  Since FRESTORE works
;	identically on 68881 and 68882, and gives more complete reset.
;
;----------------------------------------------------------------------------
;URAn:	Remaining lacks
;
;	FTRAP is only partially implemented, due to the lack of a 68020/68030
;	exception system, which is the basis for 68xxx coprocessor protocols.
;
;	This also means that _no_ exceptions can be correctly implemented
;	as described in the 'Floating-Point Coprocessor User's Manual'.
;
;	The emulator will not run on an ST/STE upgraded to cpu > 68010,
;	but in such case it would be silly to use a 'peripheral' FPU anyway.
;
;	The emulator is not reentrant (and not likely to become so either).
;
;----------------------------------------------------------------------------
;URAn:	Here follows the original German file-head of the old package,
;	unchanged except for a few clearly marked comments (in English).
;----------------------------------------------------------------------------
;Programm-/Routinen-Name:	Line-F Emulation Driver / LFED
;				Version 2.00
;Datum: 27.07.92
;
;Aufgabe/Kurzdoku: Line-F-Emulator fr die Benutzung eines MC68881
;in einem Atari ST/STE mit MC68000 (also auch der SFP004-Karte).
;Mit LFED ist die Nutzung der FPU in vollem Umfang gewhrleistet und
;kann von jedem Programm angesprochen werden, welches FPU-Untersttzung
;verlangt. MC68030 (TT) optimierte Programme sind hiervon natrlich
;ausgenommen, da der MC68000 dessen InstructionSet nicht versteht!
;
;Wird ein MC68882 als FPU genutzt, mssen einige kleinere Anpassungen
;vorgenommen werden, da dieser an verschiedenen Stellen bertragungen
;des PC's erwartet.
;URAn:	These changes (for 68882) are now included, compatibly to 68881.
;
;Autor:  Roger Butenuth (c't 04/90 Seite 430 ff.)
;	 Michael Hauschild (XBRA-Erweiterungen und alle folgenden
;	 Versionen)  
;
;	(c)	Smart Systems '92
;	Windhornstrae 2 / 3016 Seelze 1
;	Telefon (05137) 9 20 09
;---------------------------------------------------------------------
;Dokumentation: 
;---------------------------------------------------------------------
;Der Treiber installiert sich unter Beachtung des XBRA-Protokolls, 
;d.h. er erkennt sein Vorhandensein und gibt eine Meldung aus, falls
;versucht wird, ihn erneut zu installieren.
;
;Nicht untersttzte FPU-Befehle:
;
;	FTRAP, FSAVE, FRESTORE
;
;Erkannte BUGS in Version 1.00:
;
;	FMOVEM (An),FP0-FPn  ... dieser ist eigentlich kein BUG !
;URAn:	That is not true, the bugs were serious, and affected other modes.
;
;	Es tritt ein BUS-Error beim Terminieren von Programmen auf, die
;	die FPU nutzen und fr das Terminieren die C-Funktion return() 
;	verwenden. Der Grund hierfr ist ein Restore der FPU-Register
;	mit FMOVEM (diese werden am Programmbegin gesichert). Man kann
;	dieses SAUBER umgehen, indem man aus FPU-nutzenden Funktionen
;	mit exit() zurckkehrt (sofern man kein Funktionsergebnis
;	erwartet!) - dann sollte auch kein Bus-Error mehr auftreten 
;	(siehe FPU_TST2.C oder auch BGIDEMO.C)
;
;URAn:	Like I said above: the bugs affected other modes than those known,
;URAn:	so after saving registers with FMOVEM, they could _not_ be restored.
;URAn:	That is why 'exit()' was required, since it restored sp to a saved
;URAn:	value regardless of how the bugs had affected current sp.
;URAn:	As long as push-pull operations are correctly paired before return,
;URAn:	there is no longer any reason why return should not work correctly.
;
;Letzte nderung:
;
;   19.05.92	Freigabe der Version 1.00, da der BUG mit FMOVEM und
;	return(0) "gefixt" ist!
;URAn:	Sorry, but FMOVEM was still quite buggy in that version,
;URAn:	but the statement here that it wasn't is probably the main
;URAn:	reason why these bugs remained 'unfixed' in later versions.
;URAn:	Why didn't _anyone_ bother to test (in any version)  ???
;
;   27.07.92	Version 2.00 fr neuere TOS-Versionen (ab TOS-Version
;	1.06), in denen die	LineF nicht mehr "mibraucht" wird.
;	In dieser Version wird keine Prfung mehr gemacht, ob 
;	der Trap-Aufruf aus	dem ROM oder RAM kam. Es wird 
;	grundstzlich davon ausgegangen, da dieser von einer
;	Applikation gemacht wurde!
;
;   09.02.94	Diverse kleinere nderungen die den Code optimieren
;	und kleiner machen. Zudem wird jetzt auch auf den
;	_FPU-Cookie getestet und bei diesem das untere Wort
;	auf 1 gesetzt. Wenn der _FPU-Cookie oder kein 68881
;	Prozessor vorhanden ist, dann installiert sich diese
;	Version auch _nicht_!
;	Es gibt unter PureC keine Probleme wenn man die
;	Compileroption '-S' setzt, anscheinend optimiert sich 
;	PureC selber raus! :-((
;	Jan Kriesten, Maus F
;	  2:244/4344@Fidonet
;	  90:400/1002@NeST
;
;----------------------------------------------------------------------------
;ACHTUNG: bei den Assembler-Options mu -S Flag gesetzt werden 
;(wegen ReTurn from Exception / RTE) ! 
;----------------------------------------------------------------------------
;URAn:	I assume the above refers to the assembler of the Pure-C package.
;	If (like me) you prefer DevPac, you have full control anyway.
;----------------------------------------------------------------------------
;URAn:	Here the original file-head ends, and the source code starts.
;----------------------------------------------------------------------------
;
	include	URAn_DOS.S
	include	URAn_XB.S
	include	URAn_JAR.S
;
hw_fpu_ad	=	hw_fpu_base+fpu_offset
;
;----------------------------------------------------------------------------
	SECTION	TEXT
;----------------------------------------------------------------------------
;
start:
	movea.l	4(sp),a0		;a0 -> basepage
	lea	localtop(pc),sp	;init sp -> local stack
	move.l	#$100,d1		;d1 =  basepage size
	add.l	bp_textlen(a0),d1	;d1 += text section size
	add.l	bp_datalen(a0),d1	;d1 += data section size
	add.l	bp_bss_len(a0),d1	;d1 += bss section size
	move.l	d1,TSR_size		;store size for Ptermres
	gemdos	Mshrink,!,(a0),d1	;return unused RAM to system
	gemdos	Cconws,prog_title_s(pc)	;display program title
;
	XB_install	XB_fpu_test(pc),(ev_buserr).w
	gemdos		Super,!		;enter supervisor mode
	move.l		d0,-(sp)	;save old SSP for exit
	move.l		sp,fpu_test_SSP	;store new SSP for buserror exits
	move		sr,fpu_test_SR	;store SR for buserror exits
	clr		fpu_absent_f	;assume fpu present
	move		(hw_fpu_ad+fpu_stat).w,d0	;fpu absent=>buserror
fpu_test_exit:
	move.l		fpu_test_SSP(pc),sp	;ensures correct exit
	gemdos		Super,()	;exit supervisor mode
	XB_remove	XB_fpu_test(pc),(ev_buserr).w
	tst		fpu_absent_f
	bne		fpu_hardware_missing_exit
;
	gemdos		Super,!		;enter supervisor mode
	move.l		d0,-(sp)	;save old SSP for exit
	move.l		(_sysbase).w,a3		;a3->os header (may be dummy)
	move.l		os_selfbeg_p(a3),a3	;a3->os header (definite)
	move		os_version(a0),d3	;d3 = os version
	gemdos		Super,()	;exit supervisor mode
	cmp		#$0106,d3
	blt.s		old_TOS
new_TOS:
	XB_install	XB_line_f_new(pc),(ev_f_line).w
	bra.s		any_TOS
;
old_TOS:
	cmpa.l		#$FC0000,a3
	blt.s		old_RAM_TOS
old_ROM_TOS:
	XB_install	XB_line_f_old_ROM(pc),(ev_f_line).w
	bra.s		any_TOS
;
old_RAM_TOS:
	move.l		a3,d0
	add.l		d0,old_RAM_p_1	;relocate high comparison constant
	add.l		d0,old_RAM_p_2	;relocate low comparison constant
	XB_install	XB_line_f_old_RAM(pc),(ev_f_line).w
any_TOS:
	tst.l		d0
	bpl.s		already_installed_exit
;
	gemdos		Super,!		;enter supervisor mode
	move.l		d0,-(sp)	;save old SSP for exit
	edit_cookie	#'_FPU',#cook_v	;edit cookie, if it exists
	bpl.s		.done_cookie	;if MI, cookie didn't exist
	make_cookie	#'_FPU',#cook_v	;create cookie (jar if needed)
.done_cookie:
	gemdos		Super,()	;exit supervisor mode
;
	gemdos	Ptermres,TSR_size(pc),!	;keep TSR and return 0 to system
;
;URAn:	Original forgot return value, so stack garbage was used (harmless)
;
;
;	
already_installed_exit:
	pea	already_installed_s(pc)
	bra.s	error_exit
;
fpu_hardware_missing_exit:
	pea	fpu_hardware_missing_s(pc)
error_exit:
	gemdos	Cconws,()	;display string (ptr is on stack)
	gemdos	Pterm0		;exit program and return 0 to system
;
;----------------------------------------------------------------------------
;	XBRA-linked bus error handler for fpu test
;----------------------------------------------------------------------------
;
	XB_define	XB_fpu_test,program_XB_id
	st		fpu_absent_f			;note fpu absent
	move.l		fpu_test_SSP(pc),sp		;ensures correct exit
	pea		fpu_test_exit(pc)
	move		fpu_test_SR(pc),-(sp)
	rte
;
;----------------------------------------------------------------------------
;	XBRA-linked line-F emulator routine for old RAM TOS (< 1.06)
;----------------------------------------------------------------------------
;
	XB_define	XB_line_f_old_RAM,program_XB_id
old_RAM_p_1:	= *+2
	cmpi.l		#$30000,2(sp)		;called from above OS ?
	bhs.s		main_lfe		;if so, go use float emu
old_RAM_p_2:	= *+2
	cmpi.l		#$00000,2(sp)		;called from below OS ?
	blo.s		main_lfe		;if so, go use float emu
	XB_gonext_d	XB_line_f_old_ROM	;use older line_f for TOS
;
;URAn:	The 2 comparison constants above are modified before execution
;	by adding the RAM base address of TOS to each of them.
;
;----------------------------------------------------------------------------
;	XBRA-linked line-F emulator routine for old ROM TOS (< 1.06)
;----------------------------------------------------------------------------
;
	XB_define	XB_line_f_old_ROM,program_XB_id
	cmpi.l		#$FC0000,2(sp)		;called from TOS ?
	blt.s		main_lfe		;if not, go use float emu
	XB_gonext_d	XB_line_f_old_ROM	;use older line_f for TOS
;
;----------------------------------------------------------------------------
;	XBRA-linked line-F emulator routine for modern TOS (>= 1.06)
;----------------------------------------------------------------------------
;
	XB_define	XB_line_f_new,program_XB_id
main_lfe:
	movem.l	d0-a6,dregs	;save all user registers except SP
	lea	(hw_fpu_ad),a6	;a6 -> fpu base
	lea	fpu_comm(a6),a5	;a5 -> fpu command
	lea	fpu_oper(a6),a4	;a4 -> fpu operand
	lea	dregs(pc),a3	;a3 -> dregs
	move	(sp)+,d6	;d6 = rte frame SR  (pulled)
	move.l	(sp)+,a0	;a0 = rte frame return adress  (pulled)
	move.l	sp,d7		;d7 = precall SSP
	lea	localtop(pc),sp	;new SSP = localtop
	move.l	d7,a1		;a1 -> precall SSP
	btst	#13,d6		;test S bit of rte frame SR in d6
	bne.s	.have_user_sp
	move.l	usp,a1		;a1 -> precall USP
.have_user_sp:			;here a1 -> precall SP  (SSP or USP)
	move.l	a1,user_sp-dregs(a3)
	move.w	(a0),d1		;d1 =  line_F command
again:	;-------- reentry point for additional FPU commands
	andi.w	#$01C0,d1	;mask command type bits
	bne	special		;any bit set => 'special' command
;----------------------------------------------------------------------------
;Start of:	emulation of 'General' line_F instructions
;----------------------------------------------------------------------------
	move.w	2(a0),d1	;d1 = fpu command (not line_F)
	move.w	d1,(a5)		;hw_fpu_comm = fpu command
;
;The conditional below is only active for 68882
	ifeq	(fpu_type-fpu_68882)
	move.w	fpu_stat(a6),d0	;d0 = response
	btst	#14,d0
	beq.s	no_PC_bit
	move.l	a0,fpu_iadr(a6)	;fpu instruction address = a0
	bra.s	no_PC_bit
	endc
;The conditional above is only active for 68882
;
CA_loop:		;--- reentry point for additional FPU responses
	move.w	fpu_stat(a6),d0	;d0 = response
no_PC_bit:		;--- 68882 entry after fixing PC of 1'st response 
	btst	#12,d0		;test 1'st mode bit
	bne	rw_1x		;1 => jump
	btst	#11,d0		;test 2'nd mode bit
	beq.s	rw_00		;0 => jump
rw_01x:		;null primitive / transfer single CPU-register
	btst	#10,d0		;test 3'rd mode bit  (register transfer)
	bne.s	rw_sngl		;1 => jump to transfer single CPU-register
rw_010:		;null primitive identified
	btst	#15,d0		;CA (Come Again) bit set ?
	bne.s	CA_loop		;1 => loop back for new response
	addq.l	#4,a0		;a0 += pure instruction size
;'calc_add' will, as needed, have incremented a0 to cover passed inline code
;URAn:	Original always looped to CA_loop above after each instruction, but
;	most of the 'special' instructions do not need to do this.
;	I prefer to loop to 'next_op_loop' below for these (after fixing a0).
next_op_loop:
	move.w	(a0),d1		;d1 = next opcode
	move.w	d1,d0		;d0 = next opcode
	andi.w	#$f000,d0	;d0 &= line_F mask
	eori.w	#$f000,d0	;line_F opcode ?
	beq.s	again		;if so, loop back for another emulation
	btst	#13,d6		;test S bit of rte frame SR in d6
	beq.s	.restore_usp
.restore_ssp:
	move.l	user_sp(pc),sp	;restore SSP  (possibly push/pull adjusted)
	move.l	a0,-(sp)	;restore new PC in new rte frame
	move	d6,-(sp)	;restore precall SR in new rte frame
	movem.l	(a3),d0-a6	;restore all other regs
	rte			;exit trap
;
.restore_usp:
	move.l	user_sp(pc),a1	;a1 = old USP (possibly push/pull adjusted)
	move.l	a1,usp		;restore USP to USP via a1
	move.l	d7,sp		;restore precall SSP  (unchanged)
	move.l	a0,-(sp)	;restore new PC in new rte frame
	move	d6,-(sp)	;restore precall SR in new rte frame
	movem.l	(a3),d0-a6	;restore all other regs
	rte			;exit trap
;
;
rw_011:
rw_sngl:
;used for dynamic register list of FMOVEM and for dynamic 'k'-factors
;URAn: here original used fpu command in d1 to find register number,
;as shown in the 3 commented lines below:
;;;	andi.w	#%1110000,d1	;mask data register number * 16
;;;	lsr.w	#2,d1		;d1 = register number * 4  (longword index)
;;;	move.l	(a3,d1.w),(a4)	;hw_fpu_oper = saved register
;This works, but I prefer to take the number from response code in d0.
;since that is how Motorola have specified the protocol.
;This method only gains 1 cycle in speed (still, it's better than nothing).
	andi.w	#7,d0		;d0 = data reg_num
	add.w	d0,d0		;d0 = reg_num * 2
	add.w	d0,d0		;d0 = reg_num * 4  (longword index)
	move.l	(a3,d0.w),(a4)	;hw_fpu_oper = saved register
	bra.s	CA_loop		;always loop back for more here
;
;
rw_00:	;-------- %xxx00, Transfer multiple float registers (FMOVEM)
;URAn:	Original used a bugged method to identify predec mode by d0 bit 12,
;which is always zero here, also d0 = response code and HAS NO predec bit.
;;;	btst	#12,d0		;predec mode ?
;;;	beq.s	r_pred		;0 => jump
;this code occurred in one place (marked by me) further below.
;My method is to use a register (d5) for multi-move mode, and let it
;be affected by calc_add so that:
;control modes => 0	;need rising access only
;postinc mode  => 2	;needs rising access AND altered saved areg
;predec  mode  => 4	;needs falling access AND altered saved areg
;and then use that value to fetch a branch offset from a table
;This implementation also required changing subroutine 'calc_add' slightly.
;
;Further on, in 'w_00' a correct method for identifying multi-move mode was
;used, but since the new method is faster I implemented it there too.
;
	bsr 	calc_add	;a1->trans_ad  a2->trans_reg  d5=mode_type
	move.w	fpu_rsel(a6),d4	;d4 = register bit_list
	btst	#13,d0		;test DR bit
	beq.s	fmovem_to_FPU
fmovem_to_RAM:
;URAn:	This is where original had bugged predec test by d0 bit 12
	move	fmovem_to_RAM_t(pc,d5),d5
	jmp	fmovem_to_RAM_t(pc,d5)
;
fmovem_to_RAM_t:
.rz:	dc.w	.control-.rz,.postinc-.rz,.predec-.rz
;
.control:
	moveq	#8-1,d0		;loop for max 8 bits
.cont_1:
	add.w	d4,d4		;shift bit to carry
	bcc.s	.cont_2 	;skip move if bit zero
	move.l	(a4),(a1)+	;\
	move.l	(a4),(a1)+	; > move 3 longwords per float
	move.l	(a4),(a1)+	;/
.cont_2:
	dbra	d0,.cont_1	;loop back for more bits
	bra	CA_loop		;loop back for new FPU response
;
.postinc:	;URAn: This mode is ILLEGAL
	bra	cop_error
;
;r_pred:	;URAn: I have changed this label for new method
.predec:
	moveq	#8-1,d0		;loop for max 8 bits
.pred_1:
	add.w	d4,d4		;shift bit to carry
	bcc.s	.pred_2		;skip move if bit zero
	move.l	(a4),(a1)+	;\
	move.l	(a4),(a1)+	; > move 3 longwords per float
	move.l	(a4),(a1)+	;/
	suba.w	#24,a1		;decrement a1 by double size
.pred_2:
	dbra	d0,.pred_1	;loop back for more bits
	adda.w	#12,a1		;a1 -> last float stored
	move.l	a1,(a2)		;store altered saved areg
	bra	CA_loop		;loop back for new FPU response
;
;	
fmovem_to_FPU:
;w_00:		;URAn: I have changed this label for new method
;The original method used here, shown in 4 commented lines below
;is quite correct, but slower than new method
;;;	move.w	(a0),d0		;d0 = line_F opcode
;;;	andi.b	#%111000,d0	;d0 = ea_mode*8
;;;	cmpi.b	#%011000,d0	;is ea_mode (An)+
;;;	beq.s	w_post		;if so, go use postinc mode
;new method follows:
	move	fmovem_to_FPU_t(pc,d5),d5	;d5 = offset to routine
	jmp	fmovem_to_FPU_t(pc,d5)		;goto that routine
;
fmovem_to_FPU_t:
.rz:	dc.w	.control-.rz,.postinc-.rz,.predec-.rz
;
.control:	;this is for control modes
	moveq	#8-1,d0		;loop for max 8 bits
.cont_1:
	add.w	d4,d4		;shift bit to carry
	bcc.s	.cont_2		;skip move if bit zero
	move.l	(a1)+,(a4)	;\
	move.l	(a1)+,(a4)	; > move 3 longwords per float
	move.l	(a1)+,(a4)	;/
.cont_2:
	dbra	d0,.cont_1	;loop back for more bits
	bra	CA_loop		;loop back for new FPU response
;
.postinc:
;w_post:	;URAn: I have changed this label for new method
;URAn: Original erroneously removed assumed increment of 'calc_add' from a1
;even though no such increment has been made in 'calc_add' (older versions?).
;This resulted in 12 bytes offset to postincrement transfer address.
;;;	suba.w	#12,a1		;remove increment of 'calc_add'
	moveq	#8-1,d0		;loop for max 8 bits
.post_1:
	add.w	d4,d4		;shift bit to carry
	bcc.s	.post_2		;skip move if bit zero
	move.l	(a1)+,(a4)	;\
	move.l	(a1)+,(a4)	; > move 3 longwords per float
	move.l	(a1)+,(a4)	;/
.post_2:
	dbra	d0,.post_1	;loop back for more bits
	move.l	a1,(a2)		;store altered saved areg
	bra	CA_loop		;loop back for new FPU response
;
;
.predec:	;URAn: This mode is ILLEGAL
	bra	cop_error
;
;
rw_1x:
	btst	#11,d0		;test 2'nd mode bit
	bne.s	rw_11		;1 => exception of some kind
	btst	#13,d0		;test DR bit
	beq.s	w_10		;0 => go calc ea & move data to FPU
;--------  %xx110, evaluate effective address and transfer data from FPU
r_10:
	bsr	calc_add	;A1 -> dest_adress  d1 = operand size
	cmpi.w	#2,d1		;word size operand?
	ble.s	r_10_bw		;go fetch byte or word
r_10_loop:
	move.l	(a4),(a1)+	;transfer one longword from FPU
	subq.l	#4,d1		;reduce remaining size by 4
	bgt.s	r_10_loop	;loop until entire operand moved
	bra	CA_loop		;loop back for new FPU response
;
r_10_bw:
	subq.b	#1,d1		;byte size operand ?
	beq.s	r_10_byte	;if so, go fetch byte
	move.w	(a4),(a1)	;transfer one word from FPU
	bra	CA_loop		;loop back for new FPU response
;
r_10_byte:
	move.b	(a4),(a1)	;transfer one byte from FPU
	bra	CA_loop		;loop back for new FPU response
;
w_10:
;--------  %xx010, evaluate effective address and write data to FPU
	bsr	calc_add	;A1 -> dest_adress  d1 = operand size
	cmpi.w	#2,d1		;word size operand ?
	ble.s	w_10_bw		;go write word or byte
w_11:
	move.l	(a1)+,(a4)	;write one longword to FPU
	subq.l	#4,d1		;reduce remaining size by 4
	bgt.s	w_11		;loop until entire operand written
	bra	CA_loop		;loop back for new FPU response
;
w_10_bw:
	subq.b	#1,d1		;byte size operand ?
	beq.s	w_10_byte	;if so, go write byte
	move.w	(a1),(a4)	;write one word to FPU
	bra	CA_loop		;loop back for new FPU response
;
w_10_byte:
	move.b	(a1),(a4)	;write one byte to FPU
	bra	CA_loop		;loop back for new FPU response
;
rw_11:	;-------- %xxx11, take pre-/mid- instruction exception
	bra 	cop_error	;jump to error handler
;A more precise error analysis would be nice 
;(but how...?, without stack frames a'la 68020/68030)
;
;----------------------------------------------------------------------------
;End of:	emulation of 'General' line_F instructions
;----------------------------------------------------------------------------
;Start of:	emulation of 'Special' line_F instructions
;----------------------------------------------------------------------------
;
special:	;FScc/FDBcc/FTRAPcc/FBcc.w/FBcc.l/FSAVE/FRESTORE
	cmpi.w	#%001000000,d1		;Type 1 F_op ?
	beq.s	s_FScc_FDBcc_FTRAPcc	;these are decoded further in branch
	cmpi.w	#%010000000,d1		;Type 2 F_op ?
	beq	s_FBcc_W		;Type 2 = FBcc.W  (16bit offset)
	cmpi.w	#%011000000,d1		;Type 3 F_op ?
	beq	s_FBcc_L		;Type 3 = FBcc.L  (32bit offset
	cmpi.w	#%100000000,d1		;Type 4 F_op ?
	beq	s_FSAVE			;Type 4 = FSAVE
	cmpi.w	#%101000000,d1		;Type 5 F_op ?
	beq	s_FRESTORE		;Type 5 = FRESTORE
;Here we have decoded an illegal line_F instruction
	bra	cop_error
;
;
s_FScc_FDBcc_FTRAPcc:
	move.w	(a0),d0		;d0 = line_F opcode
	move.w	d0,d1		;d1 = line_F opcode
	andi.w	#%111000,d0	;d0 = significant bits for Type 1 F_ops
	cmpi.w	#%001000,d0	;FDBcc ?
	beq.s	s_FDBcc		;if so, go fix FDBcc
	cmpi.w	#%111000,d0	;FTRAP ?
	beq	s_FTRAPcc	;if so, go fix FTRAPcc
;Here we have decoded FScc, and proceed to fix this
	move.w	2(a0),fpu_cond(a6)	;write condition predicate to FPU
	moveq	#1,d0		;set operand size to 1 (for 'calc_add')
	bsr	calc_add	;a1 -> dest_adress  d1 = operand size
.loop:
	move.w	(a6),d0		;d0 = new FPU response code
	bmi.s	.loop		;loop until CA_bit = 0,  (NULL or exception)
	btst	#12,d0		;exception ?
	bne	cop_error	;if so, go make abort
	btst	#0,d0		;test condition flag ?
	sne	(a1)		;write FScc result to RAM
	addq	#4,a0		;pass opcode and condition words
	bra	next_op_loop	;loop back for next opcode
;
;
s_FDBcc:
	move.w	2(a0),fpu_cond(a6)	;write condition predicate to FPU
.loop:
	move.w	(a6),d0		;d0 = new FPU response code
	bmi.s	.loop		;loop until CA_bit = 0,  (NULL or exception)
	btst	#12,d0		;exception ?
	bne	cop_error	;if so, go make abort
	addq	#6,a0		;pass opcode, condword, offset  (still unused)
	btst	#0,d0		;test condition flag ?
	bne	next_op_loop	;skip countdown and looping if condition TRUE
	andi.w	#%111,d1	;d1 = reg_num  (from line_F code)
	add.w	d1,d1		;d1 = reg_num * 2
	add.w	d1,d1		;d1 = reg_num * 4
	lea	(a3,d1.w),a1	;a1 -> saved data register for countdown
	subq	#1,2(a1)	;decrement low word of saved data_reg
	blo	next_op_loop	;go break loop if count passed 0 to -1
.FDB_loop:
	adda.w	-(a0),a0	;a0 = a0 - 2 + offset  (automagic extended)
	bra	next_op_loop	;loop back for new opcode at jump destination
;
;
s_FBcc_W:
	
	move.w	(a0),fpu_cond(a6)	;write condition predicate to FPU
.loop:
	move.w	(a6),d0		;d0 = new FPU response code
	bmi.s	.loop		;loop until CA_bit = 0,  (NULL or exception)
	btst	#12,d0		;exception ?
	bne	cop_error	;if so, go make abort
	addq	#4,a0		;pass opcode and offset word  (still unused)
	btst	#0,d0		;test condition flag ?
	beq	next_op_loop	;loop back for next opcode if condition FALSE
	adda.w	-(a0),a0	;a0 = a0 - 2 + offset  (automagic extended)
	bra	next_op_loop	;loop back for new opcode at jump destination
;
;
s_FBcc_L:
	move.w	(a0),fpu_cond(a6)	;write condition predicate to FPU
.loop:
	move.w	(a6),d0		;d0 = new FPU response code
	bmi.s	.loop		;loop until CA_bit = 0,  (NULL or exception)
	btst	#12,d0		;exception ?
	bne	cop_error	;if so, go make abort
	addq	#6,a0		;pass opcode and offset long  (still unused)
	btst	#0,d0		;test condition flag ?
	beq	next_op_loop	;loop back for next opcode if condition FALSE
	adda.l	-(a0),a0	;a0 = a0 - 4 + offset long
	bra	next_op_loop	;loop back for new opcode at jump destination
;
;
s_FTRAPcc:
	and	#7,d1		;d1 = arg_mode  (from line_F opcode)
	add	d1,d1		;d1 = arg_mode * 2  (word index)
	move	FTRAP_t(pc,d1.w),d1	;d1 = instruction size  (-1 = error)
	bmi	cop_error	;arg_mode must be 2,3,4 for W,L,no_arg
	move.w	2(a0),fpu_cond(a6)	;write condition predicate to FPU
.loop:
	move.w	(a6),d0		;d0 = new FPU response code
	bmi.s	.loop		;loop until CA_bit = 0,  (NULL or exception)
	btst	#12,d0		;exception ?
	bne	cop_error	;if so, go make abort
	addq	#6,a0		;pass opcode, condword, offset  (still unused)
	btst	#0,d0		;test condition flag ?
	bne	cop_error	;make exception if condition is TRUE
	adda	d1,a0		;pass to next instruction
	bra	next_op_loop	;loop back for next opcode
;
;
FTRAP_t:
	dc.w	-1,-1,6,8,4,-1,-1,-1
;
;
s_FSAVE:
	moveq	#4,d0		;simulate operand size 2
	bsr	calc_add	;a1 -> addr
.save_loop_1:
	move.w	fpu_save(a6),d1
	move	d1,d2
	and	#$ff00,d1
	cmp	#$0100,d1
	beq.s	.save_loop_1	;loop on come-again format
	blt.s	.save_d0_bytes	;jump on NULL frame format
	cmp	#$0200,d1
	beq.s	cop_error	;error on illegal format
;Here we have IDLE, or BUSY frame format word in d2, low byte = size
	move	d2,d1
	and	#$ff,d1		;d1 = internal state size excl format word
	add	d1,d0		;d0 = internal state size incl format word
.save_d0_bytes:
	cmp	#2,d5		;what mode ?  4=predec  2=postinc  0=other
	blo.s	.save_bytes
	beq.s	cop_error
;Here we are using predecrement mode, which needs a little extra work
	addq	#4,a1		;remove a1 decrement of 'calc_add'
	sub	d0,a1		;decrement a1 by true state frame size
	move.l	a1,(a2)		;store altered saved areg
.save_bytes:
	move	d2,(a1)+	;store format word
	clr	(a1)+		;clear reserved word
	subq	#4,d0		;decrement size remaining by 1 longword
	ble.s	.save_done	;exit if this was all of frame
.save_loop_2:
	move.l	(a4),(a1)+	;move state frame data from operand to <ea>
	subq	#4,d0		;decrement size remaining by another longword
	bgt.s	.save_loop_2	;loop until entire frame saved
.save_done:
	addq	#2,a0		;this instruction had opcode size 2
	bra	next_op_loop	;loop back for a new instruction
;
;
s_FRESTORE:
	moveq	#4,d0		;simulate operand size 2
	bsr.s	calc_add	;a1 -> addr
	move	(a1)+,d2	;d2 = format word
	addq	#2,a1		;skip reserved word
	move	d2,fpu_rest(a6)	;send restore format word to FPU
	cmp	fpu_rest(a6),d2	;does FPU agree to restore ?
	bne.s	cop_error	;if not, we have a fatal error !
;Here we have a legal format word, with restore in progress
	move.b	d2,d0		;d0 = frame size excl format, unless NULL
	cmp	d2,d0		;NULL format ?
	beq.s	.rest_done	;if NULL format we've done it
.rest_loop:
	move.l	(a1)+,(a4)	;send frame data to FPU operand
	subq	#4,d0		;decrement size remaining by one longword
	bgt.s	.rest_loop	;loop until entire frame restored
.rest_done:
	cmp	#2,d5		;what mode ?  4=predec  2=postinc  0=other
	blo.s	.rest_next
	bhi.s	cop_error
	move.l	a1,(a2)		;store postinc-altered saved areg
.rest_next:
	addq	#2,a0		;this instruction had opcode size 2
	bra	next_op_loop	;loop back for a new instruction
;
;
;----------------------------------------------------------------------------
;End of:	emulation of 'Special' line_F instructions
;----------------------------------------------------------------------------
;This is a very simple error-handler for the line_F emulator.
;It will reset the FPU, and terminate the current program.
;The return code of that program will be 881.
;
;URAn:	Original simply wrote a 0 to the fpu control CIR, which does not
;	completely reset the FPU, and works differently with 68882.
;	The new routine writes a NULL format word to the restore CIR, which
;	resets the FPU as completely as a hardware reset.  This method will
;	also work exactly the same with 68882 as with 68881.
;	
;
cop_error:
	clr.w	fpu_rest(a6)	;FRESTORE a NULL frame
	tst.w	fpu_rest(a6)	;shake hands with FPU to complete transaction
;Here FPU has been reset, but we should also restore entry SSP before exit
	movea.l	d7,sp		;precall SSP is again SP
	gemdos	Pterm,#881
;
;----------------------------------------------------------------------------
;Start of:	subroutine	'calc_add'
;----------------------------------------------------------------------------
;
;Calculates operand address.
;Entry:	a0 -> line_F command, d0 = operand size.
;Exit:	a1 -> transfer area, a0 += size of inline data, d0 = preserved.
;	d5 = multi-move type flag:  predec=>4  postinc=>2  others=>0
;	d2/a2 are all destroyed.
;	For predec/postinc modes a2 -> saved address register (for FMOVEM)
;
calc_add:
	clr.l	d1		;d1 = 0  longword for size
	move.b	d0,d1		;d1 = d0 byte
	move.w	(a0),d2		;d2 = line_F code
	move.w	d2,d3		;d3 = line_F code
	andi.w	#%111000,d3	;d3 = adress_mode * 8
	lsr.w	#1,d3		;d3 = adress_mode * 2  (indexes longs)
	lea	cs_tab(pc),a1	;a1 -> jump table
	move.l	(a1,d3.w),a1	;a1 -> routine
	clr	d5		;d5 = 0  as multi-move mode flag
	jmp	(a1)		;jump to routine for the adress_mode
;
;
c_drd:		;%000	Data Register Direct:		Dn
c_ard:		;%001	Address Register Direct:	An
	lea	(a3),a1		;a1 -> register transfer area
	andi.w	#%1111,d2	;d2 = ad_bit*8+reg_num
	add.w	d2,d2
	add.w	d2,d2		;d2 = (ad_bit*8 + reg_num) * 4
	addq.w	#4,d2		;d2 += 4  to index end of register
	sub.w	d1,d2		;d2 -= d1  operation size
	adda.w	d2,a1		;a1 += d2  -> start of transfer area
	rts
;
;
c_ari:		;%010	Address Register Indirect:	(An)
	andi.w	#%111,d2	;mask register number
	add.w	d2,d2
	add.w	d2,d2		;d2=register_number * 4
	move.l	32(a3,d2.w),a1	;a1 -> transfer area
	rts
;
;
c_arpo:		;%011	ARI with Postincrement:		(An)+
	moveq	#2,d5		;d5 = 2  to note postinc multi-move mode
	andi.w	#%111,d2	;mask register number
	add.w	d2,d2
	add.w	d2,d2		;d2=register_number * 4
	lea 	32(a3,d2.w),a2	;a2 -> saved areg -> transfer area
	movea.l	(a2),a1		;a1 -> transfer area
	btst	#0,d1		;D1 odd? (Byteoperand)
	bne.s	.l_2		;odd => jump to .l_2
.l_1:	add.l	d1,(a2)		;saved areg += operand size
	rts
;
.l_2:	cmpi.w	#4*7,d2		;is index to saved a7 ?
	bne.s	.l_1		;if not, go use normal size
	addq.l	#2,(a2)		;else saved a7 += 2 for byte operand
	rts			;which keeps users sp even.
;
;
c_arpr:		;%100	ARI with Predecrement:		-(An)
	moveq	#4,d5		;d5 = 4  to note predec multi-move mode
	andi.w	#%111,d2	;mask register number
	add.w	d2,d2
	add.w	d2,d2		;d2=register_number * 4
	lea 	32(a3,d2.w),a2	;a2 -> saved areg -> transfer area+size
	btst	#0,d1		;D1 odd? (Byteoperand)
	bne.s	.l_2		;odd => jump to .l_2
.l_1:	sub.l	d1,(a2)		;saved areg -= operand size
	movea.l	(a2),a1		;a1 -> transfer area
	rts
;
.l_2:	cmpi.w	#4*7,d2		;is index to saved a7 ?
	bne.s	.l_1		;if not, go use normal size
	subq.l	#2,(a2)		;else saved a7 -= 2 for byte operand
	movea.l	(a2),a1		;a1 -> transfer area
	rts			;The above keeps users sp even.
;
;
c_ar16:		;%101	ARI with Displacement:		d16(An)
	andi.w	#%111,d2	;d2 = areg_num
	add.w	d2,d2
	add.w	d2,d2		;d2 = areg_num*4
	movea.l	32(a3,d2.w),a1	;a1 = saved areg
	move.w	4(a0),d2	;d2 = offset word
	adda.w	d2,a1		;a1 += offset
	addq.l	#2,a0		;a0 += 2  top pass offset word
	rts
;
;
c_ar08:		;%110	ARI with Index:			d8(An,Xn)
	andi.w	#%111,d2	;d2 = areg_num
	add.w	d2,d2
	add.w	d2,d2		;d2 = areg_num * 4
	movea.l	32(a3,d2.w),a1	;a1 = saved areg
	move.w	4(a0),d2	;d2 = index_offset word
	move.w	d2,d3		;d3 = index_offset word
;URAn: Here original merely truncated d3 to byte as shown in next comment,
;;;	andi.w	#$ff,d3		;d3 = byte offset
;but this was never extended to word, so didn't work with negative offsets.
;Also, the 'ext.w' eliminates the need for and'ing  (and is faster).
	ext.w	d3		;d3 = sign extended to word
	adda.w	d3,a1		;a1 += byte offset  extended to long by magic
	btst	#11,d2		;1=long, 0=word
	bne.s	c_ar81		;1 => go fix long indexing
	rol.w	#6,d2		;rotate bits 15..12 to bits 5..2
	andi.w	#%111100,d2	;d2 = (ad_bit*8+reg_num)*4
	adda.w	2(a3,d2.w),a1	;a1 += saved index register word
	addq.l	#2,a0		;a0 += 2  to pass index_offset word
	rts
;
;
c_ar81:
	rol.w	#6,d2		;rotate bits 15..12 to bits 5..2
	andi.w	#%111100,d2	;d2 = (ad_bit*8+reg_num)*4
	adda.l	(a3,d2.w),a1	;a1 += saved index register longword
	addq.l	#2,a0		;a0 += 2  to pass index_offset word
	rts
;
;
c_pc:		;%111	imme/absolut short/long, PC-Relativ (w/wo Index)
	btst	#2,d2		;immediate?
	bne.s	imme		;1 => go fix immediate data
	btst	#1,d2		;pc-relative?
	bne.s	pc_rel		;1 => go fix relative address
	btst	#0,d2		;long absolute?
	bne.s	c_long		;1 => go fix long address
c_short:
	move.w	4(a0),d2	;d2 = short address -> data
	movea.w	d2,a1		;a1 = d2  word extended to long automagically
	addq.l	#2,a0		;a0 += 2  to pass short address
	rts
;
;
c_long:
	movea.l	4(a0),a1	;a1 = long address -> data
	addq.l	#4,a0		;a0 += 4  to pass long address
	rts
;
;
imme:
	lea	4(a0),a1	;a1 -> immediate data after line_F & command
	adda.w	d1,a0		;a0 += operand size  to pass immediate data
	rts
;
;
pc_rel:
	btst	#0,d2		;test index_bit
	bne.s	pc_idx		;1 => go offset with index
	movea.l	a0,a1		;a1 -> current line_F opcode
	addq.l	#4,a1		;a1 += 4  -> byte after offset word
	adda.w	4(a0),a1	;a1 += offset
	addq.l	#2,a0		;a0 += 2  to pass offset word
	rts
;
;
pc_idx:
	move.l	a0,a1		;a1 -> current line_F opcode
	addq.l	#4,a1		;a1 += 4  -> byte after index_offset word
	clr.w	d2		;d2 = 0  (to clear top byte)
	move.b	5(a0),d2	;d2 = 8 bit offset
	adda.w	d2,a1		;a1 += offset
	move.b	4(a0),d2	;d2 = ad_bit*128+reg_num*16+long_bit*8
	btst	#3,d2		;test long_bit
	bne.s	pc_i_l		;1 => go index with longword
	andi.w	#%11110000,d2	;d2 = (ad_bit*8+reg_num)*16
	lsr.w	#2,d2		;d2 = (ad_bit*8+reg_num)*4 = offs to reg
	adda.w	2(a3,d2.w),a1	;a1 += saved index register word
	addq.l	#2,a0		;a0 += 2  to pass index_offset word
	rts
;
;
pc_i_l:
	andi.w	#%11110000,d2	;d2 = (ad_bit*8+reg_num)*16
	lsr.w	#2,d2		;d2 = (ad_bit*8+reg_num)*4 = offs to reg
	adda.l	(a3,d2.w),a1	;a1 += saved index register longword
	addq.l	#2,a0		;a0 += 2  to pass index_offset word
	rts
;
;
;----------------------------------------------------------------------------
;End of:	subroutine	'calc_add'
;----------------------------------------------------------------------------
;	URAn_xxx code expansions
;
	make	JAR_links	;puts cookie jar routines here
;
;NB: Here DevPac usually nags about how some branches 'could be short', but
;    _don't_ change the lib, because that will limit its usefulness.
;    Just ignore those warnings instead.
;----------------------------------------------------------------------------
;
	SECTION	DATA
;
;----------------------------------------------------------------------------
;
cs_tab:	dc.l	c_drd,c_ard,c_ari,c_arpo	;Jump table for
	dc.l	c_arpr,c_ar16,c_ar08,c_pc	;Adressing modes
;
prog_title_s:
	dc.b	CR,LF,ESC,'p'				;CRLF + Inverse on
	dc.b	"  Line-F-Emulation-Driver LFED  ",CR,LF 
	dc.b	"         Version 2.00           ",CR,LF
	dc.b	ESC,'q'					;inverse off
	dc.b	"    (c) by Smart Systems '92   ",CR,LF
	dc.b	"   Michael Hauschild / Seelze  ",CR,LF
	dc.b	"     Phone: 05137 / 9 20 09    ",CR,LF
	dc.b	NUL
;
already_installed_s:
	dc.b	CR,LF	;CR
	dc.b	" LFED already installed! "
	dc.b	CR,LF	;CR
	dc.b	NUL
;
fpu_hardware_missing_s:
	dc.b	CR,LF	;CR
	dc.b	" No coprocessor available! "
	dc.b	CR,LF	;CR
	dc.b	NUL
;
;----------------------------------------------------------------------------
;
	SECTION	BSS
;
;----------------------------------------------------------------------------
;
dregs:		ds.l	(8+7)	;saves registers (D0-D7/A0-A6)
user_sp:	ds.l	1	;saves users SP  (A7)  (USP or SSP)
;
fpu_test_SR:
	ds.w	1
fpu_test_SSP:
	ds.l	1
fpu_absent_f:
	ds.w	1
TSR_size:
	ds.l	1
local_stack:
	ds.l	256	;allow 256 longs on local stack
localtop:
;
;----------------------------------------------------------------------------
;
	END
;
;----------------------------------------------------------------------------
;End of file:	URAn_LFE.S
;----------------------------------------------------------------------------
