Code:
;****************************************************************
;* DIV : 32 x 32 divide *
;* Input : R0 / R1 *
;* Output : R0 = quotient *
;* : R2 = remainder *
;* Notes : R2 = R0 MOD R1 *
;****************************************************************
ifdef DIVS_USED
LIST
DIVS clrf R3 + 3 ; Clear sign difference indicator
btfss R0 + 3, 7 ; Check for R0 negative
bra divchkr1 ; Not negative
btg R3 + 3, 7 ; Flip sign indicator
clrf WREG ; Clear W for subtracts
negf R0 ; Flip value to plus
subfwb R0 + 1, F
subfwb R0 + 2, F
subfwb R0 + 3, F
divchkr1 btfss R1 + 3, 7 ; Check for R1 negative
bra divdo ; Not negative
btg R3 + 3, 7 ; Flip sign indicator
clrf WREG ; Clear W for subtracts
negf R1 ; Flip value to plus
subfwb R1 + 1, F
subfwb R1 + 2, F
subfwb R1 + 3, F
bra divdo ; Skip unsigned entry
NOLIST
DIV_USED = 1
endif
ifdef DIV_USED
LIST
DIV
ifdef DIVS_USED
clrf R3 + 3 ; Clear sign difference indicator
endif
divdo clrf R2 ; Do the divide
clrf R2 + 1
clrf R2 + 2
clrf R2 + 3
movlw 32 ; start with 32 loops
movwf R3
ifdef SKI_DIV_SPEEDUP
SkiOpt
movf R0 + 3, W ; IF R0.byte3 = 0
bnz divloop
movf R1 + 3, W ; AND R1.byte3 = 0 then
bnz divloop
movlw 8 ; loops - 8 ; movlw 24
subwf R3, F
movff R0 + 2, R0 + 3 ; and preshift R0
movff R0 + 1, R0 + 2
movff R0 + 0, R0 + 1
clrf R0
movff R1 + 2, R1 + 3 ; and R1 over 8 bits
movff R1 + 1, R1 + 2
movff R1 + 0, R1 + 1
clrf R1
movf R3, W
btfss STATUS, Z ; stop if no loop's left (0/0)
bra SkiOpt
endif
divloop rlcf R0 + 3, W
rlcf R2, F
rlcf R2 + 1, F
rlcf R2 + 2, F
rlcf R2 + 3, F
movf R1, W
subwf R2, F
movf R1 + 1, W
subwfb R2 + 1, F
movf R1 + 2, W
subwfb R2 + 2, F
movf R1 + 3, W
subwfb R2 + 3, F
bc divok
movf R1, W
addwf R2, F
movf R1 + 1, W
addwfc R2 + 1, F
movf R1 + 2, W
addwfc R2 + 2, F
movf R1 + 3, W
addwfc R2 + 3, F
bcf STATUS, C
divok rlcf R0, F
rlcf R0 + 1, F
rlcf R0 + 2, F
rlcf R0 + 3, F
decfsz R3, F
bra divloop
ifdef DIVS_USED
btfss R3 + 3, 7 ; Should result be negative?
bra divdone ; Not negative
clrf WREG ; Clear W for subtracts
negf R0 ; Flip quotient to minus
subfwb R0 + 1, F
subfwb R0 + 2, F
subfwb R0 + 3, F
negf R2 ; Flip remainder to minus
subfwb R2 + 1, F
subfwb R2 + 2, F
subfwb R2 + 3, F
divdone
endif
movf R0, W ; Get low byte to W
goto DUNN
NOLIST
DUNN_USED = 1
endif
This optimization (byte level optimize) seems to work very well on my end using MPLAB sim and the stopwatch.
Bookmarks