Optimizing DIV

skimask · - 8th September 2008, 01:15

Ok, did some fooling around with the divide optimizations...
This looks like it should work...

Code:

;****************************************************************
;* DIV        : 32 x 32 divide                                  *
;*                                                              *
;* Input      : R0 / R1                                         *
;* Output     : R0 = quotient                                   * 
;*            : R2 = remainder                                  *
;*                                                              *
;* Notes      : R2 = R0 MOD R1                                  *
;****************************************************************

    ifdef DIVS_USED
  LIST
DIVS	clrf	R3 + 3		; Clear sign difference indicator
	btfss	R0 + 3, 7	; Check for R0 negative
	bra	divchkr1	; Not negative
	btg	R3 + 3, 7	; Flip sign indicator
	clrf	WREG		; Clear W for subtracts
	negf	R0		; Flip value to plus
	subfwb	R0 + 1, F
	subfwb	R0 + 2, F
	subfwb	R0 + 3, F
divchkr1 btfss	R1 + 3, 7	; Check for R1 negative
	bra	divdo		; Not negative
	btg	R3 + 3, 7	; Flip sign indicator
	clrf	WREG		; Clear W for subtracts
	negf	R1		; Flip value to plus
	subfwb	R1 + 1, F
	subfwb	R1 + 2, F
	subfwb	R1 + 3, F
	bra	divdo		; Skip unsigned entry
  NOLIST
DIV_USED = 1
    endif

    ifdef DIV_USED
  LIST
DIV
      ifdef DIVS_USED
	clrf	R3 + 3		; Clear sign difference indicator	
      endif
divdo	clrf	R2		; Do the divide
	clrf	R2 + 1
	clrf	R2 + 2
	clrf	R2 + 3

	movlw	32
	movwf	R3

;added to speed up s-31 bit divide operations by ignoring
;zero'd bytes
        ifdef SKI_DIV_OPT
		if ( SKI_DIV_OPT == 1 )
SkiOpt
	movf    R0 + 3, W      ; IF R0.byte3 = 0 
	bnz     divloop
	movf    R1 + 3, W      ;   AND R1.byte3 = 0 then 
	bnz     divloop

	movlw   8              ;      loops - 8  ; movlw 24
	subwf   R3, F

	movff   R0 + 2, R0 + 3 ;      and preshift R0
	movff   R0 + 1, R0 + 2
	movff   R0 + 0, R0 + 1
	clrf    R0

	movff   R1 + 2, R1 + 3 ;      and R1 over 8 bits
	movff   R1 + 1, R1 + 2
	movff   R1 + 0, R1 + 1
	clrf    R1

	movf    R3, W
	btfss   STATUS, Z      ; stop if no loop's left (0/0)
	bra     SkiOpt
		endif
        endif

;above added to speed divide operations

;added to speed up s-31 bit divides by skipping cleared bits in divisor/dividend
	ifdef SKI_DIV_OPT
		if ( SKI_DIV_OPT == 2 )
SkiOpt2
	btfsc	R0 + 3, 7	; if highest bit set, goto divloop
	bra	divloop
	btfsc	R1 + 3, 7	; if highest bit set, goto divloop
	bra	divloop

;streamlined code here...old stuff is gone...
	bsc	status, 0	;clear carry - shift over complete R0
	rlcf	R0, F		;shift R0, .7 into carry
	rlcf	R0 + 1, F	;shift R0+1
	rlcf	R0 + 2, F	;shift R0+2
	rlcf	R0 + 3, F	;shift R0+3

	bsc	status, 0	;clear carry - shift over complete R1
	rlcf	R1, F		;shift R1, .7 into carry
	rlcf	R1 + 1, F	;shift R1+1
	rlcf	R1 + 2, F	;shift R1+2
	rlcf	R1 + 3, F	;shift R1+3

	movlw	1		;subtract one from the loop count
	subwf	R3, F

	movf	R3, W
	btfss STATUS, Z	;stop if no more loops
	bra	SkiOpt2

		endif
	endif
;above added to speed up divides at the bit level

divloop	rlcf	R0 + 3, W
	rlcf	R2, F
	rlcf	R2 + 1, F
	rlcf	R2 + 2, F
	rlcf	R2 + 3, F
	movf	R1, W
	subwf	R2, F
	movf	R1 + 1, W
	subwfb	R2 + 1, F
	movf	R1 + 2, W
	subwfb	R2 + 2, F
	movf	R1 + 3, W
	subwfb	R2 + 3, F

	bc	divok
	movf	R1, W
	addwf	R2, F
	movf	R1 + 1, W
	addwfc	R2 + 1, F
	movf	R1 + 2, W
	addwfc	R2 + 2, F
	movf	R1 + 3, W
	addwfc	R2 + 3, F

	bcf	STATUS, C

divok	rlcf	R0, F
	rlcf	R0 + 1, F
	rlcf	R0 + 2, F
	rlcf	R0 + 3, F

	decfsz	R3, F
	bra	divloop

      ifdef DIVS_USED
	btfss	R3 + 3, 7	; Should result be negative?
	bra	divdone		; Not negative
	clrf	WREG		; Clear W for subtracts
	negf	R0		; Flip quotient to minus
	subfwb	R0 + 1, F
	subfwb	R0 + 2, F
	subfwb	R0 + 3, F
	negf	R2		; Flip remainder to minus
	subfwb	R2 + 1, F
	subfwb	R2 + 2, F
	subfwb	R2 + 3, F
divdone
    endif

	movf	R0, W		; Get low byte to W
	goto	DUNN
  NOLIST
DUNN_USED = 1
    endif

See any glaring problems?
I'm still a bit fuzzy on using the DEFINEs in PBP and relating them to the assembler though, so not sure if that will work.
And, with the bit level optimization, I'm not sure that'll save any time anyways.
More testing to come...

Thread: Optimizing DIV

Thread Tools

Search Thread

Display

Threaded View

Similar Threads

Optimizing LCD commands?

Members who have read this thread : 0

Bookmarks

Bookmarks

Posting Permissions