; PupTestAla.asm - Alto Version
; Copyright Xerox Corporation 1979
; Last modified July 27, 1978  2:31 PM by Boggs

.ent UBlockEq
.ent BlockEq
.ent UPupChecksum

.ent MulDiv
.ent DoubleUsc
.ent MulPlus32x16
.ent Divide32x16
.ent UpdateAverage

; Bcpl runtime statics
getframe = 370
storeargs = 367
return = 366

	.srel

UBlockEq:	.UBlockEq
BlockEq:	.BlockEq
UPupChecksum:	.UPupChecksum

MulDiv:		.MulDiv
DoubleUsc:	.DoubleUsc
MulPlus32x16:	.MulPlus32x16
Divide32x16:	.Divide32x16
UpdateAverage:	.UpdateAverage

	.nrel

; UBlockEq(adr1,adr2,count) - Microcode version
; Compare the count words starting at adr1 with the corresponding
; words starting at adr2, and return true iff all the corresponding
; words are equal.

.UBlockEq:
	sta 3 1 2
	lda 3 3 2		; get count
	#66000			; call microcode
	mov 3 3 szr		; ac3 = 0 if all is well
	sub 0 0 skp
	 adc 0 0
	lda 3 1 2
	jmp 1 3

; BlockEq(adr1,adr2,count) - Assembly language version
; Compare the count words starting at adr1 with the corresponding
; words starting at adr2, and return true iff all the corresponding
; words are equal.

.BlockEq:
	sta 3 1 2
	sta 0 2 2		; Store adr1
	mov 1 3			; ac3 ← adr2

; Compare the first (count mod 8) words one at a time
bleq1:	lda 0 3 2		; See if count mod 8 = 0
	lda 1 c7
	and# 0 1 snr
	 jmp bleq2		; Yes, go to fast loop
	lda 0 @2 2		; No, do slow compare of one word
	lda 1 0 3
	se 0 1
	 jmp bleq5		; Not equal, return false
	isz 2 2			; Equal, increment addresses
	inc 3 3
	dsz 3 2			; Decrement and test count
	 jmp bleq1

; Set up for fast loop
bleq2:	lda 0 3 2		; Compute count/8
	movzr 0 0
	movzr 0 0
	movzr 0 0 snr
	 jmp bleq4		; Less than 8 words in block
	sta 0 3 2		; Store count/8
	lda 0 2 2		; Get current adr1

; Fast loop.  ac0/ adr1, ac3/ adr2, count in frame temp 3.
; The portion inside dir...eir is non-reentrant due to the use
; of save2.
bleq3:	dir			; Interlock non-reentrant portion
	sta 2 save2		; Save frame pointer
	mov 0 2			; ac2 ← adr1

	lda 0 0 2		; Compare 8 words, to bleq6 if fail
	lda 1 0 3
	se 0 1
	 jmp bleq6

	lda 0 1 2
	lda 1 1 3
	se 0 1
	 jmp bleq6

	lda 0 2 2
	lda 1 2 3
	se 0 1
	 jmp bleq6

	lda 0 3 2
	lda 1 3 3
	se 0 1
	 jmp bleq6

	lda 0 4 2
	lda 1 4 3
	se 0 1
	 jmp bleq6

	lda 0 5 2
	lda 1 5 3
	se 0 1
	 jmp bleq6

	lda 0 6 2
	lda 1 6 3
	se 0 1
	 jmp bleq6

	lda 0 7 2
	lda 1 7 3
	se 0 1
	 jmp bleq6

	lda 0 c10
	add 0 3			; Increment adr2 by 8
	add 2 0			; Increment adr1 by 8, move to ac0
	lda 2 save2		; Recover frame pointer
	eir			; Now reentrant
	dsz 3 2			; Decrement and test count
	 jmp bleq3		; More to do

bleq4:	mkminusone 0 0 skp	; Here to return true
bleq5:	 mkzero 0 0		; Here to return false
	lda 3 1 2
	jmp 1 3

bleq6:	lda 2 save2		; Here to return false when
	eir			;  inside fast loop
	jmp bleq5

c7:	7
c10:	10
save2:	0

; PupChecksum microcode interface
; microcoded replacement for PupChecksum procedure in PupAl1a.asm

.UPupChecksum:
	sta 3 1 2
	mov 0 1			; address in AC1 for microcode
	mov 0 3
	lda 3 0 3		; get pup length in bytes
	neg 3 3			; compute # words exclusive of checksum
	comzr 3 3		;  = (# bytes -1)/2
	mkzero 0 0		; init checksum
	#64000			; call microcode
	lda 3 1 2
	jmp 1 3


; MulDiv(a, b, c)
; returns the unsigned value (a*b)/c

.MulDiv:
	sta 3 1 2
	mov 2 3
	mov 0 2
	mkzero 0 0
	mul
	lda 2 3 3
	div
	 #77400
	mov 1 0
	mov 3 2
	lda 3 1 2
	jmp 1 3

; MulPlus32x16(addend,multiplier,lvMultiplicand)
; lvMultiplicand = lvMultiplicand*multiplier + addend (all unsigned)
; lvMultiplicand is 32 bits, multiplier and addend are 16 bits.
; returns the overflow

.MulPlus32x16:
	sta 3 1 2		; save return in frame
	mov 2 3			; vacate AC2.  AC3← frame
	sta 1 2 3		; save multiplier in frame
	lda 2 3 3		; AC2← lvMultiplicand
	lda 2 1 2		; AC2← lvMultiplicand!low
	mul
	lda 2 3 3		; AC2← lvMultiplicand
	sta 1 1 2		; store low result
	lda 2 0 2		; AC2← lvMultiplicand!high
	lda 1 2 3		; AC1← multiplier
	mul
	sta 1 @3 3		; store high result
	mov 3 2			; restore frame
	lda 3 1 2		; return
	jmp 1 3			; overflow in AC0


; DoubleUsc(lvA, lvB) = -1|0|1
; Returns:	-1 if A < B
;		 0 if A = B
;		 1 if A > B
; lvA and lvB are the addresses of the 32 bit operands

.DoubleUsc:
	sta 0 3 2		; lvA
	sta 1 2 2		; lvB
	lda 0 @3 2		; A high part
	lda 1 @2 2		; B high part
	se 0 1			; A, B
	 jmp dusc1
	isz 3 2			; lvA
	isz 2 2			; lvB
	lda 0 @3 2		; A low part
	lda 1 @2 2		; B low part

dusc1:	sleu 0 1		; A, B
	 jmp gr			; A > B
	se 0 1			; A, B
	 mkminusone 0 0 skp	; A < B
	 mkzero 0 0		; A = B
	jmp 1 3

gr:	mkone 0 0
	jmp 1 3


; Divide32x16(lvNumber,divisor) =
; unsigned divide the 32-bit number at @lvNumber,
; put the quotient back in @lvNumber, and return the remainder

.Divide32x16:
	sta 3 1 2	; return
	mov 2 3		; save stack pointer
	sta 0 2 3	; lvNumber
	sta 1 3 3	; divisor
	mkzero 0 0
	lda 1 @2 3	; get high dividend
	lda 2 3 3	; get divisor
	div		; ac0 ← remainder, ac1 ← quotient
	 nop
	sta 1 @2 3	; store high quotient
	isz 2 3
	lda 1 @2 3	; get low dividend
	div		; ac0 ← remainder, ac1 ← quotient
	 nop
	sta 1 @2 3	; store low quotient
	mov 3 2		; restore stack pointer
	lda 3 1 2
	jmp 1 3


; UpdateAverage(new,old,k) = ((k-1)*old)+new)/k

.UpdateAverage:
	sta 3 1 2
	mov 2 3		; preserve frame
	lda 2 3 3	; get k
	neg 2 2		; compute k-1
	com 2 2
	mul		; (ac0,ac1) ← ac0 + ac1*ac2
	inc 2 2		; compute k
	div		; ac1 ← (ac0,ac1)/ac2
	 nop		; skips on Alto
	mov 1 0		; resultis in ac0
	mov 3 2		; recover frame
	lda 3 1 2	; return
	jmp 1 3

	.end