; AltoByteBlt.asm -- Routine to move a block of bytes quickly.
;		E. A. Taft / February, 1976
; Copyright Xerox Corporation 1979

.ent ByteBlt

.srel
ByteBlt: .ByteBlt

.nrel

; Frame offsets
Temp=2.
DstAdr=4.	; Arguments start here
DstByte=5.
SrcAdr=6.
SrcByte=7.
ByteCount=8.

FrameSize=9.

; BCPL runtime
getframe=370
storeargs=367
return=366


; ByteBlt(DstAdr,DstByte,SrcAdr,SrcByte,ByteCount)
; Transfer a block of bytes according to arguments:
;	DstAdr		Word address of first destination byte
;	DstByte		Offset of first destination byte
;	SrcAdr		Word address of first source byte
;	SrcByte		Offset of first source byte
;	ByteCount	Number of bytes to transfer

.ByteBlt:
	sta 3 1 2		; Standard BCPL entry sequence
	jsr @getframe
	 FrameSize
	 jsr @storeargs

; Reduce the byte indices to the range [0,1]
	lda 0 SrcByte 2		; Get source byte offset
	movzr 0 0		; Compute word offset, remainder
	lda 3 SrcAdr 2		; Update source address
	add 0 3
	sta 3 SrcAdr 2
	subcl 0 0		; Leftover bytes (0 or 1)
	sta 0 SrcByte 2
	lda 1 DstByte 2		; Get destination byte offset
	movzr 1 1		; Compute word offset, remainder
	lda 3 DstAdr 2		; Update destination address
	add 1 3
	sta 3 DstAdr 2
	subcl 1 1		; Leftover bytes (0 or 1)
	sta 1 DstByte 2

; Test for small number of bytes to move
	lda 3 ByteCount 2	; Get byte count
	negor 3 3 szr		; 4 or fewer bytes?
	comzr 3 3 snr
	 jmp bytbl8		; Yes, move them the slow way

; See whether source and destination byte positions are in phase
	se 0 1			; In phase?
	 jmp bytbl5		; No

; Bytes are in phase, do move with blt
	sz 0 0			; Starting with even byte?
	 jsr MoveByte		; No, move one byte manually
	lda 3 ByteCount 2	; Get byte count
	movzr 3 3		; Compute full words to move
	subcl 0 0		; Leftover bytes (0 or 1)
	sta 0 ByteCount 2	; Update byte count
	lda 0 SrcAdr 2		; Get first source address
	add 3 0			; Update it
	sta 0 SrcAdr 2
	lda 1 DstAdr 2		; Get first destination address
	add 3 1			; Update it
	sta 1 DstAdr 2
	adc 3 0			; Compute first source address -1
	neg 1 1			; Compute last dest adr for blt
	com 1 1
	neg 3 3			; Negate word count for blt
	blt			; Blast away
	jmp bytbl8		; Handle leftover bytes

; Bytes are not in phase, do the transfer one word at a time
bytbl5:	sz 1 1			; Destination on even byte?
	 jsr MoveByte		; No, move a byte to make it be
	lda 1 ByteCount 2	; Get byte count
	movzr 1 1		; Convert to word count
	subcl 0 0		; Leftover bytes (0 or 1)
	sta 0 ByteCount 2	; Update byte count
	movzr 1 1 szc		; Compute word count/2
	 inc 1 1		; Round doubleword count up
	sta 1 Temp 2		; Store for loop
	lda 3 c377		; Right byte mask for loop
	lda 0 @SrcAdr 2		; Get first source byte (odd)
	and 3 0			; Put zero in lh
	mov 0 1 szc		; Check remainder from count/2
	 jmp bytbl7		; One leftover word

; Main loop -- transfers 4 bytes on each iteration.
; Here we have a leftover byte in rh of 0, zero in lh.
bytbl6:	isz SrcAdr 2		; Increment source address
	lda 1 @SrcAdr 2		; Get next source word
	add 1 0			; Ac0 ← new lh , new rh+leftover
	and 3 1			; Ac1 ← 0 , new rh
	subs 1 0		; Ac0 ← leftover , new lh
	sta 0 @DstAdr 2		; Store destination word
	isz DstAdr 2		; Increment destination address
; Here we have a leftover byte in rh of 1, zero in lh.
bytbl7:	isz SrcAdr 2		; Increment source address
	lda 0 @SrcAdr 2		; Get next source word
	add 0 1			; Ac1 ← new lh , new rh+leftover
	and 3 0			; Ac0 ← 0 , new rh
	subs 0 1		; Ac1 ← leftover , new lh
	sta 1 @DstAdr 2		; Store destination word
	isz DstAdr 2		; Increment destination address
	dsz Temp 2		; Test doubleword count
	 jmp bytbl6		; More words to move

; Do leftover bytes (if any) the slow way
bytbl8:	lda 0 ByteCount 2	; Check byte count
	snz 0 0
	 jsr @return		; No more, return
	jsr MoveByte		; More, transfer one byte
	jmp .-1			; Repeat til MoveByte returns

; Internal routine to transfer one byte from source to destination.
; Returns to caller+1 if there are still more bytes to transfer.
; Executes BCPL return (returns to caller of ByteBlt) if no more.
; Updates all state in the frame of the caller (ByteBlt).
; Clobbers the frame Temp word

MoveByte:
	sta 3 Temp 2		; Save return
	lda 0 @SrcAdr 2		; Get word containing source byte
	dsz SrcByte 2		; Check source byte position
	 jmp getevn		; Even

; Getting odd byte
	isz SrcAdr 2		; Increment source address
	jmp movby1

; Getting even byte
getevn:	movs 0 0		; Swap byte to right half
	mkone 1 1		; Fix up the byte position
	sta 1 SrcByte 2

movby1:	lda 3 c377		; Get byte mask
	lda 1 @DstAdr 2		; Existing contents of dest word
	dsz DstByte 2		; Check destination byte position
	 jmp putevn		; Go store even byte

; Putting odd byte
	and 3 0			; Mask out garbage, leave in rh
	movs 3 3		; Make left half mask
	and 3 1			; Retain lh of existing word
	add 1 0			; Insert new byte in right half
	sta 0 @DstAdr 2		; Store updated dest word
	isz DstAdr 2		; Increment dest address
	jmp movby2		; Go finish up

; Putting even byte
putevn:	ands 3 0		; Mask out garbage, leave in lh
	and 3 1			; Retain rh of existing word
	add 1 0			; Insert new byte in left half
	sta 0 @DstAdr 2		; Store updated dest word
	mkone 1 1		; Fix up the byte position
	sta 1 DstByte 2

movby2:	dsz ByteCount 2		; Decrement remaining byte count
	 jmp @Temp 2		; More to do, return to ByteBlt
	jsr @return		; Exhausted, return from ByteBlt

c377:	377


	.end