Well, looks like it may need something else.

I wrapped it in a program, and changed the labels so it would coexist with PBP, then used a couple FOR loops to cycle through some numbers. It does the optimized divide, then does a normal PBP divide and compares the results. If they aren't the same, is sends a message out the USART.

With the DEFINE SKI_DIV_OPT line commented out, both versions are always the same. Which shows that the test program is working, and the output looks like this ...
Code:
A=93  No ERRORs
A=94  No ERRORs
A=95  No ERRORs
A=96  No ERRORs
A=97  No ERRORs
A=98  No ERRORs
A=99  No ERRORs
A=100  No ERRORs
A=101  No ERRORs
A=102  No ERRORs
A=103  No ERRORs
A=104  No ERRORs
With SKI_DIV_OPT defined as either 1 or 2, the results are the same. Here's some output from SKI_DIV_OPT 1 ...
Code:
 Quotient Error: A=59  B=51  Ski=0  PBP=1
Remainder Error: A=59  B=51  Ski=59  PBP=8
 Quotient Error: A=59  B=52  Ski=0  PBP=1
Remainder Error: A=59  B=52  Ski=59  PBP=7
 Quotient Error: A=59  B=53  Ski=0  PBP=1
Remainder Error: A=59  B=53  Ski=59  PBP=6
 Quotient Error: A=59  B=54  Ski=0  PBP=1
Remainder Error: A=59  B=54  Ski=59  PBP=5
 Quotient Error: A=59  B=55  Ski=0  PBP=1
Remainder Error: A=59  B=55  Ski=59  PBP=4
 Quotient Error: A=59  B=56  Ski=0  PBP=1
Remainder Error: A=59  B=56  Ski=59  PBP=3
 Quotient Error: A=59  B=57  Ski=0  PBP=1
Remainder Error: A=59  B=57  Ski=59  PBP=2
 Quotient Error: A=59  B=58  Ski=0  PBP=1
Remainder Error: A=59  B=58  Ski=59  PBP=1
 Quotient Error: A=59  B=59  Ski=0  PBP=1
Remainder Error: A=59  B=59  Ski=59  PBP=0
 Quotient Error: A=60  B=0  Ski=255  PBP=4294967295
 Quotient Error: A=60  B=1  Ski=0  PBP=60
Remainder Error: A=60  B=1  Ski=60  PBP=0
 Quotient Error: A=60  B=2  Ski=0  PBP=30
Remainder Error: A=60  B=2  Ski=60  PBP=0
 Quotient Error: A=60  B=3  Ski=0  PBP=20
Remainder Error: A=60  B=3  Ski=60  PBP=0
 Quotient Error: A=60  B=4  Ski=0  PBP=15
Remainder Error: A=60  B=4  Ski=60  PBP=0
 Quotient Error: A=60  B=5  Ski=0  PBP=12
Remainder Error: A=60  B=5  Ski=60  PBP=0
 Quotient Error: A=60  B=6  Ski=0  PBP=10
Remainder Error: A=60  B=6  Ski=60  PBP=0
 Quotient Error: A=60  B=7  Ski=0  PBP=8
Remainder Error: A=60  B=7  Ski=60  PBP=4
 Quotient Error: A=60  B=8  Ski=0  PBP=7
Remainder Error: A=60  B=8  Ski=60  PBP=4
 Quotient Error: A=60  B=9  Ski=0  PBP=6
Remainder Error: A=60  B=9  Ski=60  PBP=6
 Quotient Error: A=60  B=10  Ski=0  PBP=6
Remainder Error: A=60  B=10  Ski=60  PBP=0
 Quotient Error: A=60  B=11  Ski=0  PBP=5
Remainder Error: A=60  B=11  Ski=60  PBP=5
 Quotient Error: A=60  B=12  Ski=0  PBP=5
Remainder Error: A=60  B=12  Ski=60  PBP=0
 Quotient Error: A=60  B=13  Ski=0  PBP=4
Here's the program, any 18F will do ...
Code:
'****************************************************************
'*  Name    : Test_SkiDIV.pbp                                   *
'*  Author  : Darrel Taylor                                     *
'*  Date    : 9/9/2008                                          *
'*  Version : 1.0                                               *
'*  Thread  : instruction execution time                        *
'*      http://www.picbasic.co.uk/forum/showthread.php?p=61992  *
'****************************************************************
;****************************************************************
;* DIV        : 32 x 32 divide                                  *
;*                                                              *
;* Input      : R0 / R1                                         *
;* Output     : R0 = quotient                                   * 
;*            : R2 = remainder                                  *
;*                                                              *
;* Notes      : R2 = R0 MOD R1                                  *
;****************************************************************

DEFINE OSC 40

DEFINE HSER_TXSTA 24h 'Hser transmit status init 
DEFINE HSER_RCSTA 90h 'Hser receive status init 
DEFINE HSER_BAUD 38400 'Hser baud rate 
DEFINE HSER_CLROERR 1 'Hser clear overflow automatically 
 
DEFINE SKI_DIV_OPT 1

AL    VAR LONG
BL    VAR LONG
SkiQ  VAR LONG   ; ASM quotient
SkiR  VAR LONG   ; ASM remainder

PBPQ  VAR LONG   ; PBP quotient
PBPR  VAR LONG   ; PBP remainder

AW  VAR WORD
BW  VAR WORD
RW  VAR WORD

ERROR VAR BIT

For AL = 0 to 1000
    ERROR = 0
    For BL = 0 to 1000
        @ MOVE?NN  _AL, R0
        @ MOVE?NN  _BL, R1  ; AL / BL
        @ L?CALL   #DIVS
        @ MOVE?ANN R0, _SkiQ
        @ MOVE?NN  R2, _SkiR
        PBPQ = AL / BL                   ; do same in PBP
        PBPR = AL // BL
        
        if SkiQ != PBPQ then 
            HSEROUT [" Quotient Error: A=",DEC AL,"  B=",DEC BL, _
                     "  Ski=",dec SkiQ,"  PBP=",DEC PBPQ,13,10]
            ERROR = 1
        endif
        if SkiR != PBPR then 
            HSEROUT ["Remainder Error: A=",DEC AL,"  B=",DEC BL, _
                     "  Ski=",dec SkiR,"  PBP=",DEC PBPR,13,10]
            ERROR = 1
        endif
    next BL
    if ERROR = 0 then HSEROUT ["A=",dec AL,"  No ERRORs",13,10]
next AL

stop

; -------------------
ASM
    ifdef DIVS_USED
  LIST
#DIVS  clrf	R3 + 3		; Clear sign difference indicator
	btfss	R0 + 3, 7	; Check for R0 negative
	bra	#divchkr1	; Not negative
	btg	R3 + 3, 7	; Flip sign indicator
	clrf	WREG		; Clear W for subtracts
	negf	R0		; Flip value to plus
	subfwb	R0 + 1, F
	subfwb	R0 + 2, F
	subfwb	R0 + 3, F
#divchkr1  btfss	R1 + 3, 7	; Check for R1 negative
	bra	#divdo		; Not negative
	btg	R3 + 3, 7	; Flip sign indicator
	clrf	WREG		; Clear W for subtracts
	negf	R1		; Flip value to plus
	subfwb	R1 + 1, F
	subfwb	R1 + 2, F
	subfwb	R1 + 3, F
	bra	#divdo		; Skip unsigned entry
  NOLIST
DIV_USED = 1
    endif

    ifdef DIV_USED
  LIST
#DIV
      ifdef DIVS_USED
	clrf	R3 + 3		; Clear sign difference indicator	
      endif
#divdo	clrf	R2		; Do the divide
	clrf	R2 + 1
	clrf	R2 + 2
	clrf	R2 + 3

	movlw	32
	movwf	R3

;added to speed up s-31 bit divide operations by ignoring
;zero'd bytes
        ifdef SKI_DIV_OPT
		if ( SKI_DIV_OPT == 1 )
SkiOpt
	movf    R0 + 3, W      ; IF R0.byte3 = 0 
	bnz     #divloop
	movf    R1 + 3, W      ;   AND R1.byte3 = 0 then 
	bnz     #divloop

	movlw   8              ;      loops - 8  ; movlw 24
	subwf   R3, F

	movff   R0 + 2, R0 + 3 ;      and preshift R0
	movff   R0 + 1, R0 + 2
	movff   R0 + 0, R0 + 1
	clrf    R0

	movff   R1 + 2, R1 + 3 ;      and R1 over 8 bits
	movff   R1 + 1, R1 + 2
	movff   R1 + 0, R1 + 1
	clrf    R1

	movf    R3, W
	btfss   STATUS, Z      ; stop if no loop's left (0/0)
	bra     SkiOpt
		endif
        endif

;above added to speed divide operations

;added to speed up s-31 bit divides by skipping cleared bits in divisor/dividend
	ifdef SKI_DIV_OPT
		if ( SKI_DIV_OPT == 2 )
SkiOpt2
	btfsc	R0 + 3, 7	; if highest bit set, goto divloop
	bra	#divloop
	btfsc	R1 + 3, 7	; if highest bit set, goto divloop
	bra	#divloop

;streamlined code here...old stuff is gone...
	bcf    	STATUS, 0	;clear carry - shift over complete R0
	rlcf	R0, F		;shift R0, .7 into carry
	rlcf	R0 + 1, F	;shift R0+1
	rlcf	R0 + 2, F	;shift R0+2
	rlcf	R0 + 3, F	;shift R0+3

;	bsc	status, 0	;clear carry - shift over complete R1
	bcf	    STATUS, 0	;clear carry - shift over complete R1
	rlcf	R1, F		;shift R1, .7 into carry
	rlcf	R1 + 1, F	;shift R1+1
	rlcf	R1 + 2, F	;shift R1+2
	rlcf	R1 + 3, F	;shift R1+3

	movlw	1		;subtract one from the loop count
	subwf	R3, F

	movf	R3, W
	btfss STATUS, Z	;stop if no more loops
	bra	SkiOpt2

		endif
	endif
;above added to speed up divides at the bit level

#divloop	rlcf	R0 + 3, W
	rlcf	R2, F
	rlcf	R2 + 1, F
	rlcf	R2 + 2, F
	rlcf	R2 + 3, F
	movf	R1, W
	subwf	R2, F
	movf	R1 + 1, W
	subwfb	R2 + 1, F
	movf	R1 + 2, W
	subwfb	R2 + 2, F
	movf	R1 + 3, W
	subwfb	R2 + 3, F

	bc	#divok
	movf	R1, W
	addwf	R2, F
	movf	R1 + 1, W
	addwfc	R2 + 1, F
	movf	R1 + 2, W
	addwfc	R2 + 2, F
	movf	R1 + 3, W
	addwfc	R2 + 3, F

	bcf	STATUS, C

#divok	rlcf	R0, F
	rlcf	R0 + 1, F
	rlcf	R0 + 2, F
	rlcf	R0 + 3, F

	decfsz	R3, F
	bra	#divloop

      ifdef DIVS_USED
	btfss	R3 + 3, 7	; Should result be negative?
	bra	#divdone		; Not negative
	clrf	WREG		; Clear W for subtracts
	negf	R0		; Flip quotient to minus
	subfwb	R0 + 1, F
	subfwb	R0 + 2, F
	subfwb	R0 + 3, F
	negf	R2		; Flip remainder to minus
	subfwb	R2 + 1, F
	subfwb	R2 + 2, F
	subfwb	R2 + 3, F
#divdone
    endif

	movf	R0, W		; Get low byte to W
	goto	DUNN
  NOLIST
DUNN_USED = 1
    endif
ENDASM