[BITS 32]

[GLOBAL _DrawOSpan]
[GLOBAL _DrawMaskOSpan]
[GLOBAL _DrawTransOSpan]

;[SECTION .text ALIGN=4]
[SECTION .text]


%include "wtd.inc"

OLOOP_LEN    equ 32
OLOOP_SIZE   equ 12
OMLOOP_LEN    equ 32
OMLOOP_SIZE   equ 16
OTLOOP_LEN    equ 32
OTLOOP_SIZE   equ 15

;****************************************************************************
; Draws a vertical object span (any Tex width)
;					EAX
;	void DrawOSpan(struct WLine *pWLine)
;****************************************************************************
align
_DrawOSpan:
        push ebp			; preserve caller's stack frame
        mov ebp,esp			; point to our stack frame
	pushad				; save all registers
	mov ebx, [ebp+8]

	movzx eax, word [ebx+Count]
	mov [LoopCount], eax
	and eax, OLOOP_LEN-1
	mov esi, OLoopOffset
	mov eax, [esi+eax*4]
	add eax, .OLoopStart
	push eax

	mov esi, [ebx+RawPtr]		; raw ptr in esi

	mov eax, [ebx+Coord-2]		; frac Coord in hi eax
	xor ax, ax

	mov ecx, [ebx+Delta-2]		; frac Delta in hi ecx
	xor cx, cx			; clear lower ecx

	xor edx, edx			; int Coord in low edx
	mov dx, [ebx+Coord+2]
	add esi, edx			; Advance raw ptr

	xor edx, edx			; int Delta in low edx
	mov dx, [ebx+Delta+2]

	movzx ebp, word [ebx+ViewWidth]

	mov edi, [ebx+PixPtr]
	mov ebx, [ebx+PalPtr]

	ret

align
.OLoopStart:				; unrolled loop

	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
      
	mov bl, [esi]
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx

	sub dword [LoopCount], OLOOP_LEN
	jae NEAR .OLoopStart

	popad				; restore all registers
	mov esp, ebp
        pop ebp				; restore caller's stack frame
        ret				; return


;****************************************************************************
; Draws a vertical masked object span (any Tex width)
;					EAX
;	void DrawMaskOSpan(struct WLine *pWLine)
;****************************************************************************
align
_DrawMaskOSpan:
        push ebp			; preserve caller's stack frame
        mov ebp,esp			; point to our stack frame
	pushad				; save all registers
	mov ebx, [ebp+8]

	movzx eax, word [ebx+Count]
	mov [LoopCount], eax
	and eax, OMLOOP_LEN-1
	mov esi, OMLoopOffset
	mov eax, [esi+eax*4]
	add eax, .OMLoopStart
	push eax

	mov esi, [ebx+RawPtr]		; raw ptr in esi

	mov eax, [ebx+Coord-2]		; frac Coord in hi eax
	xor ax, ax

	mov ecx, [ebx+Delta-2]		; frac Delta in hi ecx
	xor cx, cx			; clear lower ecx

	xor edx, edx			; int Coord in low edx
	mov dx, [ebx+Coord+2]
	add esi, edx			; Advance raw ptr

	xor edx, edx			; int Delta in low edx
	mov dx, [ebx+Delta+2]

	movzx ebp, word [ebx+ViewWidth]

	mov edi, [ebx+PixPtr]
	mov ebx, [ebx+PalPtr]

	ret

align
.OMLoopStart:				; unrolled loop

	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
        
	mov bl, [esi]
	or bl, bl
	je $+6
	mov bl, [ebx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx

	sub dword [LoopCount], OMLOOP_LEN
	jae NEAR .OMLoopStart

	popad				; restore all registers
	mov esp, ebp
        pop ebp				; restore caller's stack frame
        ret				; return


;****************************************************************************
; Draws a vertical translucent object span (any Tex width)
;					EAX
;	void DrawTransOSpan(struct WLine *pWLine)
;****************************************************************************
align
_DrawTransOSpan:
        push ebp			; preserve caller's stack frame
        mov ebp,esp			; point to our stack frame
	pushad				; save all registers
	mov ebx, [ebp+8]

	movzx eax, word[ebx+Count]
	mov [LoopCount], eax
	and eax, OTLOOP_LEN-1
	mov esi, OTLoopOffset
	mov eax, [esi+eax*4]
	add eax, .OTLoopStart
	push eax

	mov esi, [ebx+RawPtr]		; raw ptr in esi

	mov eax, [ebx+Coord-2]		; frac Coord in hi eax
	xor ax, ax

	mov ecx, [ebx+Delta-2]		; frac Delta in hi ecx
	xor cx, cx			; clear lower ecx

	xor edx, edx			; int Coord in low edx
	mov dx, [ebx+Coord+2]
	add esi, edx			; Advance raw ptr

	xor edx, edx			; int Delta in low edx
	mov dx, [ebx+Delta+2]

	movzx ebp, word [ebx+ViewWidth]

	mov edi, [ebx+PixPtr]
	mov ebx, [ebx+PalPtr]

	sub ebx, ecx			; make [ebx+ecx] pointer

	ret

align
.OTLoopStart:				; unrolled loop

	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx
       
	mov bl, [edi]
	mov ch, [esi]
	mov bl, [ebx+ecx]
	mov [edi], bl
	add edi, ebp
	add eax, ecx
	adc esi, edx

	sub dword [LoopCount], OTLOOP_LEN
	jae NEAR .OTLoopStart

	popad				; restore all registers
	mov esp, ebp
        pop ebp				; restore caller's stack frame
        ret				; return

;[SECTION .data ALIGN=4]
[SECTION .data]

LoopCount:	dd 0

OLoopOffset:
    dd 0180h,0174h,0168h,015Ch,0150h,0144h,0138h,012Ch
    dd 0120h,0114h,0108h,00FCh,00F0h,00E4h,00D8h,00CCh
    dd 00C0h,00B4h,00A8h,009Ch,0090h,0084h,0078h,006Ch
    dd 0060h,0054h,0048h,003Ch,0030h,0024h,0018h,000Ch

OMLoopOffset:
    dd 0200h,01F0h,01E0h,01D0h,01C0h,01B0h,01A0h,0190h
    dd 0180h,0170h,0160h,0150h,0140h,0130h,0120h,0110h
    dd 0100h,00F0h,00E0h,00D0h,00C0h,00B0h,00A0h,0090h
    dd 0080h,0070h,0060h,0050h,0040h,0030h,0020h,0010h

OTLoopOffset:
    dd 01E0h,01D1h,01C2h,01B3h,01A4h,0195h,0186h,0177h
    dd 0168h,0159h,014Ah,013Bh,012Ch,011Dh,010Eh,00FFh
    dd 00F0h,00E1h,00D2h,00C3h,00B4h,00A5h,0096h,0087h
    dd 0078h,0069h,005Ah,004Bh,003Ch,002Dh,001Eh,000Fh
