Latest and greatest (so far)...
Seems to work great for me...will probably fail miserably for others!
Haven't packaged it up real nice yet, some comments could stand to be added yet, some removed...
8 bit only divides run about 4.2 times faster
16 bit only divides run about 2.4 times faster
16/24 bit mixed divides run about 1.5 times faster
24/32 bit mixed divides actually run about 2% slower

But the cycle counts, and therefore the actual figures aren't 100% accurate because they include the cycles used by the For/Next loops.

Code:
resetplaceholder:	'18f4685 code
DEFINE	OSC		40	'40 Mhz clock for proto work
DEFINE	NO_CLRWDT	1	'no extra clear watchdog timer instructions
DISABLE
DEFINE SKI_DIV_OPT 1
AL VAR LONG : BL VAR LONG : SkiQ VAR LONG : SkiR VAR LONG : PBPQ VAR LONG
PBPR VAR LONG : AW VAR WORD : BW VAR WORD : RW VAR WORD : ERROR VAR BIT
errorcount var long : indicator var word : loopcount var long
minnum var long : maxnum var long : s1 var long
pbpq = al / bl	'need to use at least one PBP divide to kick in DIVS for now
'indicator commented out in ASM code during -speed- runs
testcode:	minnum = 1000 : maxnum = 1255 : s1 = 1 : gosub dodivide
                    gosub dodivide : gosub dodivide : gosub dodivide
                    gosub dodivide : gosub dodivide : gosub dodivide
		@ nop	;stopper - easy to find point to set a breakpoint in MPSIM
END
stop
dodivide:	For AL = minnum to maxnum step s1
                   For BL = minnum to maxnum step s1
'			indicator.lowbyte = $f0		'watch in MPSIM to show what's going on
'			loopcount = loopcount + 1	'watch in MPSIM to show what's going on

			@ MOVE?NN	_AL, R0				;use only during OPT run
			@ MOVE?NN	_BL, R1		; AL / BL	;use only during OPT run
			@ L?CALL	#DIVS				;use only during OPT run
			@ MOVE?NN	R0, _SkiQ			;use only during OPT run
			@ MOVE?NN	R2, _SkiR			;use only during OPT run

'			indicator.lowbyte = $0f		'watch in MPSIM to show what's going on

'			PBPQ = AL / BL					;use only during PBP run

'			PBPR = AL // BL					;use only when doing -both- runs to check for errors
'			indicator.lowbyte = 0		'watch in MPSIM to show what's going on
'			if ( pbpq <> skiq ) or ( pbpr <> skir ) then errorcount = errorcount + 1	'watch in MPSIM to show what's going on
		next BL
          next AL
     return
end
ASM
	ifdef DIVS_USED
  LIST
#DIVS
	clrf	R3 + 1		;clear shiftout counter
	clrf	R3 + 3		; Clear sign difference indicator
	btfss	R0 + 3, 7	; Check for R0 negative
	bra	#divchkr1	; Not negative
	btg	R3 + 3, 7	; Flip sign indicator
	clrf	WREG		; Clear W for subtracts
	negf	R0 + 0		; Flip value to plus
	subfwb	R0 + 1, F
	subfwb	R0 + 2, F
	subfwb	R0 + 3, F
#divchkr1
	btfss	R1 + 3, 7	; Check for R1 negative
	bra	#divdo		; Not negative
	btg	R3 + 3, 7	; Flip sign indicator
	clrf	WREG		; Clear W for subtracts
	negf	R1 + 0		; Flip value to plus
	subfwb	R1 + 1, F
	subfwb	R1 + 2, F
	subfwb	R1 + 3, F
	bra	#divdo		; Skip unsigned entry
  NOLIST
DIV_USED = 1
	endif
	ifdef DIV_USED
  LIST
#DIV
		ifdef DIVS_USED
	clrf	R3 + 3		; Clear sign difference indicator
		endif
#divdo
	clrf	R2 + 0		; Do the divide
	clrf	R2 + 1
	clrf	R2 + 2
	clrf	R2 + 3
	movlw	32
	movwf	R3 + 0
		ifdef SKI_DIV_OPT
;check for zero case and send directly to divloop if they are zero
	movf	R0 + 0, W
	bnz	#divzero1
	movf	R0 + 1, W
	bnz	#divzero1
	movf	R0 + 2, W
	bnz	#divzero1
	movf	R0 + 3, W
	bnz	#divzero1
	bra	#divloopa	;if R0 is zero, do #divloop
#divzero1
	movf	R1 + 0, W
	bnz	SkiOpt5
	movf	R1 + 1, W
	bnz	SkiOpt5
	movf	R1 + 2, W
	bnz	SkiOpt5
	movf	R1 + 3, W
	bnz	SkiOpt5		;if R1 is not zero, continue OPT
	bra	#divloopa	;if R1 is zero, do divloop
SkiOpt5	;check if can use different divide methods (32, 24, 16, 8)
	movlw	32		;load loop count
	movwf	R3 + 0
	movf	R0 + 3, W
	bnz	#divloopa	;use normal div
	movf	R1 + 3, W
	bnz	#divloopa	;use normal div
	movlw	24		;load loop count
	movwf	R3 + 0
	movf	R0 + 2, W
	bnz	#divloop24a	;jump out to 24 bit
	movf	R1 + 2, W
	bnz	#divloop24a	;jump out to 24 bit
	movlw	16		;load loop count
	movwf	R3 + 0
	movf	R0 + 1, W
	bnz	#divloop16a	;jump out to 16 bit
	movf	R1 + 1, W
	bnz	#divloop16a	;jump out to 16 bit
	movlw	8		;load loop count
	movwf	R3 + 0
	movf	R0 + 0, W
	bnz	#divloop8a	;jump out to 8 bit
	movf	R1 + 0, W
	bnz	#divloop8a
	movlw	32		;reload loop count with max count
	movwf	R3 + 0
		endif
;above added to speed divide operations
#divloopa	;32 bit divide
;	movlw	50
;	movwf	_indicator + 1
#divloop
	rlcf	R0 + 3, W
	rlcf	R2, F
	rlcf	R2 + 1, F
	rlcf	R2 + 2, F
	rlcf	R2 + 3, F
	movf	R1, W
	subwf	R2, F
	movf	R1 + 1, W
	subwfb	R2 + 1, F
	movf	R1 + 2, W
	subwfb	R2 + 2, F
	movf	R1 + 3, W
	subwfb	R2 + 3, F
	bc	#divok
	movf	R1, W
	addwf	R2, F
	movf	R1 + 1, W
	addwfc	R2 + 1, F
	movf	R1 + 2, W
	addwfc	R2 + 2, F
	movf	R1 + 3, W
	addwfc	R2 + 3, F
	bcf	STATUS, 0
#divok
	rlcf	R0, F
	rlcf	R0 + 1, F
	rlcf	R0 + 2, F
	rlcf	R0 + 3, F
	decfsz	R3, F
	bra	#divloop
		ifdef DIVS_USED
	bra	#divnegchk	; Check for negative result
		endif
		ifdef SKI_DIV_OPT
#divloop24a	;24 bit divide
;	movlw	36
;	movwf	_indicator + 1
#divloop24
	rlcf	R0 + 2, W
	rlcf	R2, F
	rlcf	R2 + 1, F
	rlcf	R2 + 2, F
	movf	R1, W
	subwf	R2, F
	movf	R1 + 1, W
	subwfb	R2 + 1, F
	movf	R1 + 2, W
	subwfb	R2 + 2, F
	bc	#divok24
	movf	R1, W
	addwf	R2, F
	movf	R1 + 1, W
	addwfc	R2 + 1, F
	movf	R1 + 2, W
	addwfc	R2 + 2, F
	bcf	STATUS, 0
#divok24
	rlcf	R0, F
	rlcf	R0 + 1, F
	rlcf	R0 + 2, F
	decfsz	R3, F
	bra	#divloop24
		ifdef DIVS_USED
	bra	#divnegchk	; Check for negative result
		endif
#divloop16a	;16 bit divide
;	movlw	22
;	movwf	_indicator + 1
#divloop16
	rlcf	R0 + 1, W
	rlcf	R2, F
	rlcf	R2 + 1, F
	movf	R1, W
	subwf	R2, F
	movf	R1 + 1, W
	subwfb	R2 + 1, F
	bc	#divok16
	movf	R1, W
	addwf	R2, F
	movf	R1 + 1, W
	addwfc	R2 + 1, F
	bcf	STATUS, 0
#divok16
	rlcf	R0, F
	rlcf	R0 + 1, F
	decfsz	R3, F
	bra	#divloop16
		ifdef DIVS_USED
	bra	#divnegchk	; Check for negative result
		endif
#divloop8a	;8 bit divide
;	movlw	8
;	movwf	_indicator + 1
#divloop8
	rlcf	R0, W
	rlcf	R2, F
	movf	R1, W
	subwf	R2, F
	bc	#divok8
	movf	R1, W
	addwf	R2, F
	bcf	STATUS, 0
#divok8
	rlcf	R0, F
	decfsz	R3, F
	bra	#divloop8
		endif
		ifdef DIVS_USED
#divnegchk
	btfss	R3 + 3, 7	; Should result be negative?
	bra	#divdone	; Not negative
	clrf	WREG		; Clear W for subtracts
	negf	R0		; Flip quotient to minus
	subfwb	R0 + 1, F
	subfwb	R0 + 2, F
	subfwb	R0 + 3, F
	negf	R2		; Flip remainder to minus
	subfwb	R2 + 1, F
	subfwb	R2 + 2, F
	subfwb	R2 + 3, F
		endif
#divdone
	movf	R0 + 0,W
	goto	DUNN
  NOLIST
DUNN_USED = 1
	endif
ENDASM
END