	IDNT		lcsum.asm

	CSECT		text
	XDEF		_lcsum
;
;  Amazing, incredible, hyper-optimized Internet checksum subroutine!
;
;  Compute the 1's complement sum of data buffer.  Called from C as:-
;
;	unsigned short lcsum(unsigned short *buf, unsigned short cnt);
;

_lcsum:
		MOVE.L	4(A7),A0		; get pointer to data block
		MOVE.L	8(A7),D1		; get number of 16bit words to sum
		MOVE		D2,A1			; save D2 in a volitile register
		MOVE		D1,D2			; save a copy of the count
		LSR.L		#1,D1			; convert from words to longs
		MOVEQ.L	#0,D0			; D0 used to accumulate the sum, clear CC
		BRA.S		endl			; jump to end of loop to start things off

;
; Take advantage of 68010 loop mode cache and add 2 words at a time until
; a carry propagates out.  68020 users win 'cause of instruction cache.
; 68000 users lose (though not nearly as much as 8086 folks!)
;

loop:	
 		ADD.L		(A0)+,D0		; add two words in
endl:	
		DBCS		D1,loop
		BCC.S		done			; jump if done
		ADDQ.L	#1,D0			; add in carry
		BRA.S		endl			; resume loop
done:
		BTST		#0,D2			; was word count odd?
		BEQ.S		done2

		MOVEQ.L	#0,D2
		MOVE.W	(A0),D2		; get the last word
		ADD.L		D2,D0			; add it in
		BCC.S		done2			; did that cause a carry?
		ADDQ.L	#1,D0			; yes
done2:
		MOVE.L	A1,D2			; restore register

		MOVE.L	D0,D1			; get copy of sum	     			D0=ABCD D1=ABCD
		SWAP.W	D1				; into low order part of D1  	D0=ABCD D1=CDAB
		AND.L		#$FFFF,D1	; zap (is this necessary?)   	D0=ABCD D1=00AB
		ADD.W		D0,D1			; two halfs of sum together
		MOVEQ.L	#0,D0
		ADDX.W	D0,D1			; get last carry
		MOVE.W	D1,D0
		RTS

		END
