1354 lines
32 KiB
NASM
Executable file
1354 lines
32 KiB
NASM
Executable file
;THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX
|
|
;SOFTWARE CORPORATION ("PARALLAX"). PARALLAX, IN DISTRIBUTING THE CODE TO
|
|
;END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A
|
|
;ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS
|
|
;IN USING, DISPLAYING, AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS
|
|
;SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE
|
|
;FREE PURPOSES. IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE
|
|
;CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES. THE END-USER UNDERSTANDS
|
|
;AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE.
|
|
;COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION. ALL RIGHTS RESERVED.
|
|
;
|
|
; $Source: /cvsroot/dxx-rebirth/d1x-rebirth/texmap/tmap_per.asm,v $
|
|
; $Revision: 1.1.1.1 $
|
|
; $Author: zicodxx $
|
|
; $Date: 2006/03/17 19:46:03 $
|
|
;
|
|
; Perspective texture mapper inner loop.
|
|
;
|
|
; $Log: tmap_per.asm,v $
|
|
; Revision 1.1.1.1 2006/03/17 19:46:03 zicodxx
|
|
; initial import
|
|
;
|
|
; Revision 1.1.1.1 1999/06/14 22:14:01 donut
|
|
; Import of d1x 1.37 source.
|
|
;
|
|
; Revision 1.26 1995/02/20 18:22:55 john
|
|
; Put all the externs in the assembly modules into tmap_inc.asm.
|
|
; Also, moved all the C versions of the inner loops into a new module,
|
|
; scanline.c.
|
|
;
|
|
; Revision 1.25 1995/02/20 17:09:08 john
|
|
; Added code so that you can build the tmapper with no assembly!
|
|
;
|
|
; Revision 1.24 1995/01/10 09:32:07 mike
|
|
; mostly fix garbage at end of scanline, but slow down by 1-4%.
|
|
;
|
|
; Revision 1.23 1994/12/02 23:29:57 mike
|
|
; optimizations.
|
|
;
|
|
; Revision 1.22 1994/11/30 00:57:00 mike
|
|
; optimization.
|
|
;
|
|
; Revision 1.21 1994/11/21 13:57:42 mike
|
|
; fix right side shear bug
|
|
;
|
|
; Revision 1.20 1994/11/12 16:41:09 mike
|
|
; jae -> ja.
|
|
;
|
|
; Revision 1.19 1994/10/27 19:40:00 john
|
|
; Made lighting table lookup be _gr_fade_table[eax] instead
|
|
; of fs:[eax], which gets rig of a segment override that
|
|
; supposedly costs 1 clock on a 486. Mainly, I wanted to verify
|
|
; that the only reason we need selectors is for the source texture
|
|
; data .
|
|
;
|
|
; Revision 1.18 1994/05/03 11:08:32 mike
|
|
; Trap divide overflows.
|
|
;
|
|
; Revision 1.17 1994/04/21 15:03:41 mike
|
|
; make faster.
|
|
;
|
|
; Revision 1.16 1994/04/08 16:46:57 john
|
|
; Made 32 fade levels. Hacked.
|
|
;
|
|
; Revision 1.15 1994/03/31 08:35:18 mike
|
|
; Fix quantized-by-4 bug in inner loop.
|
|
;
|
|
; Revision 1.14 1994/03/14 17:41:14 mike
|
|
; Fix bug in unlighted version.
|
|
;
|
|
; Revision 1.13 1994/03/14 15:45:14 mike
|
|
; streamline code.
|
|
;
|
|
; Revision 1.12 1994/01/14 14:01:58 mike
|
|
; *** empty log message ***
|
|
;
|
|
; Revision 1.11 1993/12/18 14:43:44 john
|
|
; Messed around with doing 1/z, the u*(1/z) and v*(1/z)
|
|
; (Went from 23 fps to 21 fps... not good! )
|
|
;
|
|
; Revision 1.10 1993/12/17 16:14:17 john
|
|
; Split lighted/nonlighted, so there is no cmp lighting
|
|
; in the inner loop.
|
|
;
|
|
; Revision 1.9 1993/12/17 12:34:29 john
|
|
; Made leftover bytes use linear approx instead of correct...
|
|
; should save about 8 divides per scanline on average.
|
|
; Also, took out anti-aliasing code and rearranged to
|
|
; order of some instructions to help on 486 pipelining.
|
|
; (The anti-aliasing code did *not* look good, so I
|
|
; figure there was no reason to keep it in. )
|
|
;
|
|
; Revision 1.8 1993/12/16 18:37:52 mike
|
|
; Align some stuff on 4 byte boundaries.
|
|
;
|
|
; Revision 1.7 1993/11/30 08:44:18 john
|
|
; Made selector set check for < 64*64 bitmaps.
|
|
;
|
|
; Revision 1.6 1993/11/23 17:25:26 john
|
|
; Added safety "and eax, 0fffh" in lighting lookup.
|
|
;
|
|
; Revision 1.5 1993/11/23 15:08:52 mike
|
|
; Fixed lighting bug.
|
|
;
|
|
; Revision 1.4 1993/11/23 14:38:50 john
|
|
; optimized NORMAL code by switching EBX and ESI, so BH can be used in
|
|
; the lighting process.
|
|
;
|
|
; Revision 1.3 1993/11/23 14:30:53 john
|
|
; Made the perspective tmapper do 1/8 divides; added lighting.
|
|
;
|
|
; Revision 1.2 1993/11/22 10:24:59 mike
|
|
; *** empty log message ***
|
|
;
|
|
; Revision 1.1 1993/09/08 17:29:53 mike
|
|
; Initial revision
|
|
;
|
|
;
|
|
;
|
|
|
|
[BITS 32]
|
|
|
|
global _asm_tmap_scanline_per
|
|
global asm_tmap_scanline_per
|
|
|
|
%include "tmap_inc.asm"
|
|
|
|
[SECTION .data]
|
|
align 4
|
|
;extern _per2_flag;:dword
|
|
%ifdef __LINUX__
|
|
; Cater for linux ELF compilers...
|
|
global x
|
|
%define _loop_count loop_count
|
|
%define _new_end new_end
|
|
%define _scan_doubling_flag scan_doubling_flag
|
|
%define _linear_if_far_flag linear_if_far_flag
|
|
%endif
|
|
|
|
global _x
|
|
global _loop_count
|
|
global _new_end
|
|
global _scan_doubling_flag
|
|
global _linear_if_far_flag
|
|
|
|
; global _max_ecx
|
|
; global _min_ecx
|
|
|
|
mem_edx dd 0
|
|
x:
|
|
_x dd 0
|
|
_loop_count dd 0
|
|
|
|
; _max_ecx dd 0
|
|
; _min_ecx dd 55555555h
|
|
_new_end dd 1 ; if set, use new, but slower, way of finishing off extra pixels on scanline, 01/10/95 --MK
|
|
|
|
_scan_doubling_flag dd 0
|
|
_linear_if_far_flag dd 0
|
|
|
|
;---------- local variables
|
|
align 4
|
|
req_base dd 0
|
|
req_size dd 0
|
|
U0 dd 0
|
|
U1 dd 0
|
|
V0 dd 0
|
|
V1 dd 0
|
|
num_left_over dd 0
|
|
DU1 dd 0
|
|
DV1 dd 0
|
|
DZ1 dd 0
|
|
|
|
[SECTION .text]
|
|
|
|
; --------------------------------------------------------------------------------------------------
|
|
; Enter:
|
|
; _xleft fixed point left x coordinate
|
|
; _xright fixed point right x coordinate
|
|
; _y fixed point y coordinate
|
|
; _pixptr address of source pixel map
|
|
; _u fixed point initial u coordinate
|
|
; _v fixed point initial v coordinate
|
|
; _z fixed point initial z coordinate
|
|
; _du_dx fixed point du/dx
|
|
; _dv_dx fixed point dv/dx
|
|
; _dz_dx fixed point dz/dx
|
|
|
|
; for (x = (int) xleft; x <= (int) xright; x++) {
|
|
; _setcolor(read_pixel_from_tmap(srcb,((int) (u/z)) & 63,((int) (v/z)) & 63));
|
|
; _setpixel(x,y);
|
|
;
|
|
; u += du_dx;
|
|
; v += dv_dx;
|
|
; z += dz_dx;
|
|
; }
|
|
|
|
|
|
align 16
|
|
_asm_tmap_scanline_per:
|
|
asm_tmap_scanline_per:
|
|
; push es
|
|
pusha
|
|
|
|
;---------------------------- setup for loop ---------------------------------
|
|
; Setup for loop: _loop_count iterations = (int) xright - (int) xleft
|
|
; esi source pixel pointer = pixptr
|
|
; edi initial row pointer = y*320+x
|
|
; NOTE: fx_xright and fx_xleft changed from fix to int by mk on 12/01/94.
|
|
|
|
; set esi = pointer to start of texture map data
|
|
|
|
; set edi = address of first pixel to modify
|
|
mov edi,[_fx_y]
|
|
; mov es,[_pixel_data_selector] ; selector[0*2]
|
|
|
|
mov edi,[_y_pointers+edi*4]
|
|
|
|
mov ebx,[_fx_xleft]
|
|
test ebx, ebx
|
|
jns ebx_ok
|
|
xor ebx, ebx
|
|
ebx_ok: add edi,[_write_buffer]
|
|
add edi,ebx
|
|
|
|
; set _loop_count = # of iterations
|
|
mov eax,[_fx_xright]
|
|
sub eax,ebx
|
|
js near _none_to_do
|
|
mov [_loop_count],eax
|
|
|
|
; lighting values are passed in fixed point, but need to be in 8 bit integer, 8 bit fraction so we can easily
|
|
; get the integer by reading %bh
|
|
sar dword [_fx_l], 8
|
|
sar dword [_fx_dl_dx],8
|
|
jns dl_dx_ok
|
|
inc dword [_fx_dl_dx] ; round towards 0 for negative deltas
|
|
dl_dx_ok:
|
|
|
|
; set initial values
|
|
mov ebx,[_fx_u]
|
|
mov ebp,[_fx_v]
|
|
mov ecx,[_fx_z]
|
|
|
|
test dword [_per2_flag],-1
|
|
je tmap_loop
|
|
|
|
test dword [_Lighting_on], -1
|
|
je near _tmap_loop_fast_nolight
|
|
jmp _tmap_loop_fast
|
|
;tmap_loop_fast_nolight_jumper:
|
|
; jmp tmap_loop_fast_nolight
|
|
|
|
;================ PERSPECTIVE TEXTURE MAP INNER LOOPS ========================
|
|
;
|
|
; Usage in loop: eax division, pixel value
|
|
; ebx u
|
|
; ecx z
|
|
; edx division
|
|
; ebp v
|
|
; esi source pixel pointer
|
|
; edi destination pixel pointer
|
|
|
|
;-------------------- NORMAL PERSPECTIVE TEXTURE MAP LOOP -----------------
|
|
tmap_loop:
|
|
mov esi, ebx ; esi becomes u coordinate
|
|
|
|
align 4
|
|
tmap_loop0:
|
|
|
|
; compute v coordinate
|
|
mov eax, ebp ; get v
|
|
mov edx, eax
|
|
sar edx, 31
|
|
idiv ecx ; eax = (v/z)
|
|
|
|
and eax,3fh ; mask with height-1
|
|
mov ebx,eax
|
|
|
|
; compute u coordinate
|
|
mov eax, esi ; get u
|
|
mov edx, eax
|
|
sar edx, 31
|
|
idiv ecx ; eax = (u/z)
|
|
|
|
shl eax,26
|
|
shld ebx,eax,6 ; esi = v*64+u
|
|
|
|
; read 1 pixel
|
|
add ebx, [_pixptr]
|
|
xor eax, eax
|
|
test dword [_Lighting_on], -1
|
|
mov al, [ebx] ; get pixel from source bitmap
|
|
je NoLight1
|
|
|
|
; LIGHTING CODE
|
|
mov ebx, [_fx_l] ; get temp copy of lighting value
|
|
mov ah, bh ; get lighting level
|
|
add ebx, [_fx_dl_dx] ; update lighting value
|
|
mov al, [_gr_fade_table+eax] ; xlat pixel thru lighting tables
|
|
mov [_fx_l], ebx ; save temp copy of lighting value
|
|
|
|
; transparency check
|
|
NoLight1: cmp al,255
|
|
je skip1
|
|
|
|
mov [edi],al
|
|
skip1: inc edi
|
|
|
|
; update deltas
|
|
add ebp,[_fx_dv_dx]
|
|
add esi,[_fx_du_dx]
|
|
add ecx,[_fx_dz_dx]
|
|
je _div_0_abort ; would be dividing by 0, so abort
|
|
|
|
dec dword [_loop_count]
|
|
jns tmap_loop0
|
|
|
|
_none_to_do:
|
|
popa
|
|
; pop es
|
|
ret
|
|
|
|
; We detected a z=0 condition, which seems pretty bogus, don't you think?
|
|
; So, we abort, but maybe we want to know about it.
|
|
_div_0_abort:
|
|
jmp _none_to_do
|
|
|
|
;-------------------------- PER/4 TMAPPER ----------------
|
|
;
|
|
; x = x1
|
|
; U0 = u/w; V0 = v/w;
|
|
; while ( 1 )
|
|
; u += du_dx*4; v+= dv_dx*4
|
|
; U1 = u/w; V1 = v/w;
|
|
; DUDX = (U1-U0)/4; DVDX = (V1-V0)/4;
|
|
;
|
|
; ; Pixel 0
|
|
; pixels = texmap[V0*64+U0];
|
|
; U0 += DUDX; V0 += DVDX
|
|
; ; Pixel 1
|
|
; pixels = (pixels<<8)+texmap[V0*64+U0];
|
|
; U0 += DUDX; V0 += DVDX
|
|
; ; Pixel 2
|
|
; pixels = (pixels<<8)+texmap[V0*64+U0];
|
|
; U0 += DUDX; V0 += DVDX
|
|
; ; Pixel 3
|
|
; pixels = (pixels<<8)+texmap[V0*64+U0];
|
|
;
|
|
; screen[x] = pixel
|
|
; x += 4;
|
|
; U0 = U1; V0 = V1
|
|
|
|
NBITS equ 4 ; 2^NBITS pixels plotted per divide
|
|
ZSHIFT equ 4 ; precision used in PDIV macro
|
|
|
|
|
|
;PDIV MACRO
|
|
; Returns EAX/ECX in 16.16 format in EAX. Trashes EDX
|
|
; sig bits 6.3
|
|
; mov edx,eax
|
|
; shl eax,ZSHIFT
|
|
; sar edx,32-ZSHIFT
|
|
; idiv ecx ; eax = (v/z)
|
|
; shl eax, 16-ZSHIFT
|
|
;ENDM
|
|
|
|
global _tmap_loop_fast
|
|
|
|
; -------------------------------------- Start of Getting Dword Aligned ----------------------------------------------
|
|
; ebx fx_u
|
|
|
|
_tmap_loop_fast:
|
|
mov esi,ebx
|
|
|
|
align 4
|
|
NotDwordAligned1:
|
|
test edi, 11b
|
|
jz DwordAligned1
|
|
|
|
; compute v coordinate
|
|
mov eax, ebp ; get v
|
|
mov edx, eax
|
|
sar edx, 31
|
|
idiv ecx ; eax = (v/z)
|
|
|
|
and eax,3fh ; mask with height-1
|
|
mov ebx,eax
|
|
|
|
; compute u coordinate
|
|
mov eax, esi ; get u
|
|
mov edx, eax
|
|
sar edx, 31
|
|
idiv ecx ; eax = (u/z)
|
|
|
|
shl eax,26
|
|
shld ebx,eax,6 ; esi = v*64+u
|
|
|
|
; read 1 pixel
|
|
add ebx,[_pixptr]
|
|
xor eax, eax
|
|
mov al, [ebx] ; get pixel from source bitmap
|
|
|
|
; lighting code
|
|
mov ebx, [_fx_l] ; get temp copy of lighting value
|
|
mov ah, bh ; get lighting level
|
|
add ebx, [_fx_dl_dx] ; update lighting value
|
|
mov [_fx_l], ebx ; save temp copy of lighting value
|
|
|
|
; transparency check
|
|
cmp al,255
|
|
je skip2 ; this pixel is transparent, so don't write it (or light it)
|
|
|
|
mov al, [_gr_fade_table+eax] ; xlat pixel thru lighting tables
|
|
|
|
; write 1 pixel
|
|
mov [edi],al
|
|
skip2: inc edi
|
|
|
|
; update deltas
|
|
add ebp,[_fx_dv_dx]
|
|
add esi,[_fx_du_dx]
|
|
add ecx,[_fx_dz_dx]
|
|
je _div_0_abort ; would be dividing by 0, so abort
|
|
|
|
dec dword [_loop_count]
|
|
jns NotDwordAligned1
|
|
|
|
jmp _none_to_do
|
|
|
|
; -------------------------------------- End of Getting Dword Aligned ----------------------------------------------
|
|
|
|
DwordAligned1:
|
|
|
|
mov eax, [_loop_count]
|
|
mov ebx, esi ; get fx_u [pentium pipelining]
|
|
inc eax
|
|
mov esi, eax
|
|
and esi, (1 << NBITS) - 1
|
|
sar eax, NBITS
|
|
mov [num_left_over], esi
|
|
je near tmap_loop ; there are no 2^NBITS chunks, do divide/pixel for whole scanline
|
|
mov [_loop_count], eax ; _loop_count = pixels / NPIXS
|
|
|
|
; compute initial v coordinate
|
|
mov eax,ebp ; get v
|
|
mov edx,ebp
|
|
shl eax,ZSHIFT
|
|
sar edx,32-ZSHIFT
|
|
idiv ecx ; eax = (v/z)
|
|
shl eax, 16-ZSHIFT
|
|
mov [V0], eax
|
|
|
|
; compute initial u coordinate
|
|
mov eax,ebx ; get u
|
|
mov edx,ebx
|
|
shl eax,ZSHIFT
|
|
sar edx,32-ZSHIFT
|
|
idiv ecx ; eax = (v/z)
|
|
shl eax, 16-ZSHIFT
|
|
mov [U0], eax
|
|
|
|
; Set deltas to NPIXS pixel increments
|
|
mov eax, [_fx_du_dx]
|
|
shl eax, NBITS
|
|
mov [DU1], eax
|
|
mov eax, [_fx_dv_dx]
|
|
shl eax, NBITS
|
|
mov [DV1], eax
|
|
mov eax, [_fx_dz_dx]
|
|
shl eax, NBITS
|
|
mov [DZ1], eax
|
|
|
|
align 4
|
|
TopOfLoop4:
|
|
add ebx, [DU1]
|
|
add ebp, [DV1]
|
|
add ecx, [DZ1]
|
|
je near _div_0_abort ; would be dividing by 0, so abort
|
|
|
|
; Done with ebx, ebp, ecx until next iteration
|
|
push ebx
|
|
push ecx
|
|
push ebp
|
|
push edi
|
|
|
|
; Find fixed U1
|
|
mov eax, ebx
|
|
mov edx,ebx
|
|
shl eax,ZSHIFT
|
|
sar edx,32-ZSHIFT
|
|
idiv ecx ; eax = (v/z)
|
|
shl eax, 16-ZSHIFT
|
|
mov ebx, eax ; ebx = U1 until pop's
|
|
|
|
; Find fixed V1
|
|
mov eax, ebp
|
|
mov edx, ebp
|
|
shl eax,ZSHIFT
|
|
sar edx,32-ZSHIFT
|
|
idiv ecx ; eax = (v/z)
|
|
|
|
mov ecx, [U0] ; ecx = U0 until pop's
|
|
mov edi, [V0] ; edi = V0 until pop's
|
|
|
|
shl eax, 16-ZSHIFT
|
|
mov ebp, eax ; ebp = V1 until pop's
|
|
|
|
; Make ESI = V0:U0 in 6:10,6:10 format
|
|
mov eax, ecx
|
|
shr eax, 6
|
|
mov esi, edi
|
|
shl esi, 10
|
|
mov si, ax
|
|
|
|
; Make EDX = DV:DU in 6:10,6:10 format
|
|
mov eax, ebx
|
|
sub eax, ecx
|
|
sar eax, NBITS+6
|
|
mov edx, ebp
|
|
sub edx, edi
|
|
shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac
|
|
mov dx, ax ; put delta u in low word
|
|
|
|
; Save the U1 and V1 so we don't have to divide on the next iteration
|
|
mov [U0], ebx
|
|
mov [V0], ebp
|
|
|
|
pop edi ; Restore EDI before using it
|
|
|
|
; LIGHTING CODE
|
|
mov ebx, [_fx_l]
|
|
mov ebp, [_fx_dl_dx]
|
|
|
|
test dword [_Transparency_on],-1
|
|
je near no_trans1
|
|
|
|
%macro repproc1 0
|
|
mov eax, esi ; get u,v
|
|
shr eax, 26 ; shift out all but int(v)
|
|
shld ax,si,6 ; shift in u, shifting up v
|
|
add esi, edx ; inc u,v
|
|
add eax, [_pixptr]
|
|
movzx eax, byte [eax] ; get pixel from source bitmap
|
|
cmp al,255
|
|
je %%skipa1
|
|
mov ah, bh ; form lighting table lookup value
|
|
add ebx, ebp ; update lighting value
|
|
mov al, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer
|
|
mov [edi],al
|
|
%%skipa1:
|
|
inc edi
|
|
|
|
; Do odd pixel
|
|
mov eax, esi ; get u,v
|
|
shr eax, 26 ; shift out all but int(v)
|
|
shld ax,si,6 ; shift in u, shifting up v
|
|
add esi, edx ; inc u,v
|
|
add eax,[_pixptr]
|
|
movzx eax, byte [eax] ; get pixel from source bitmap
|
|
cmp al,255
|
|
je %%skipa2
|
|
mov ah, bh ; form lighting table lookup value
|
|
add ebx, ebp ; update lighting value
|
|
mov al, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer
|
|
mov [edi],al
|
|
%%skipa2:
|
|
inc edi
|
|
%endmacro
|
|
|
|
|
|
%rep (2 << (NBITS-2))
|
|
; local skip3,no_trans1
|
|
; local skipa1,skipa2
|
|
repproc1
|
|
%endrep
|
|
|
|
jmp cont1
|
|
|
|
; -------------------------------------------------------
|
|
no_trans1:
|
|
|
|
%macro repproc2 0
|
|
mov eax, esi ; get u,v
|
|
shr eax, 26 ; shift out all but int(v)
|
|
shld ax,si,6 ; shift in u, shifting up v
|
|
add esi, edx ; inc u,v
|
|
add eax,[_pixptr]
|
|
movzx eax, byte [eax] ; get pixel from source bitmap
|
|
mov ah, bh ; form lighting table lookup value
|
|
add ebx, ebp ; update lighting value
|
|
mov cl, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer
|
|
|
|
; Do odd pixel
|
|
mov eax, esi ; get u,v
|
|
shr eax, 26 ; shift out all but int(v)
|
|
shld ax,si,6 ; shift in u, shifting up v
|
|
add esi, edx ; inc u,v
|
|
add eax,[_pixptr]
|
|
movzx eax, byte [eax] ; get pixel from source bitmap
|
|
mov ah, bh ; form lighting table lookup value
|
|
add ebx, ebp ; update lighting value
|
|
mov ch, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer
|
|
|
|
; ----- This is about 1% faster than the above, and could probably be optimized more.
|
|
; ----- Problem is, it gets the u,v coordinates backwards. What you would need to do
|
|
; ----- is switch the packing of the u,v coordinates above (about 95 lines up).
|
|
;----------; mov eax, esi
|
|
;----------; shr ax, 10
|
|
;----------; rol eax, 6
|
|
;----------; mov dx, ax
|
|
;----------; add esi, mem_edx
|
|
;----------; mov dl, es:[edx]
|
|
;----------; mov dh, bh
|
|
;----------; add ebx, ebp
|
|
;----------; mov cl, _gr_fade_table[edx]
|
|
;----------;
|
|
;----------; mov eax, esi
|
|
;----------; shr ax, 10
|
|
;----------; rol eax, 6
|
|
;----------; mov dx, ax
|
|
;----------; add esi, mem_edx
|
|
;----------; mov dl, es:[edx]
|
|
;----------; mov dh, bh
|
|
;----------; add ebx, ebp
|
|
;----------; mov ch, _gr_fade_table[edx]
|
|
|
|
ror ecx, 16 ; move to next double dest pixel position
|
|
%endmacro
|
|
|
|
%rep (1 << (NBITS-2))
|
|
|
|
repproc2
|
|
repproc2
|
|
|
|
mov [edi],ecx ; Draw 4 pixels to display
|
|
add edi,4
|
|
%endrep
|
|
;; pop edx
|
|
cont1:
|
|
|
|
; -------------------------------------------------------
|
|
|
|
; LIGHTING CODE
|
|
mov [_fx_l], ebx
|
|
pop ebp
|
|
pop ecx
|
|
pop ebx
|
|
dec dword [_loop_count]
|
|
jnz near TopOfLoop4
|
|
|
|
EndOfLoop4:
|
|
test dword [num_left_over], -1
|
|
je near _none_to_do
|
|
|
|
; ----------------------------------------- Start of LeftOver Pixels ------------------------------------------
|
|
DoEndPixels:
|
|
push ecx
|
|
|
|
mov eax, ecx
|
|
lea eax, [eax*2+eax]
|
|
|
|
add ecx, [DZ1]
|
|
js notokhere
|
|
shl ecx,2
|
|
cmp eax, ecx
|
|
pop ecx
|
|
jl okhere
|
|
jmp bah_bah
|
|
notokhere:
|
|
pop ecx
|
|
bah_bah:
|
|
test dword [_new_end],-1
|
|
jne near NewDoEndPixels
|
|
okhere:
|
|
|
|
add ebx, [DU1]
|
|
add ebp, [DV1]
|
|
add ecx, [DZ1]
|
|
je near _div_0_abort
|
|
jns dep_cont
|
|
|
|
; z went negative.
|
|
; this can happen because we added DZ1 to the current z, but dz1 represents dz for perhaps 16 pixels
|
|
; though we might only plot one more pixel.
|
|
mov cl, 1
|
|
|
|
dep_loop: mov eax, [DU1]
|
|
sar eax, cl
|
|
sub ebx, eax
|
|
|
|
mov eax, [DV1]
|
|
sar eax, cl
|
|
sub ebp, eax
|
|
|
|
mov eax, [DZ1]
|
|
sar eax, cl
|
|
sub ecx, eax
|
|
je near _div_0_abort
|
|
jns dep_cont
|
|
|
|
inc cl
|
|
cmp cl, NBITS
|
|
jne dep_loop
|
|
|
|
dep_cont:
|
|
push edi ; use edi as a temporary variable
|
|
|
|
cmp ecx,1 << (ZSHIFT+1)
|
|
jg ecx_ok
|
|
mov ecx, 1 << (ZSHIFT+1)
|
|
ecx_ok:
|
|
|
|
; Find fixed U1
|
|
mov eax, ebx
|
|
;PDIV
|
|
mov edx,eax
|
|
shl eax,ZSHIFT
|
|
sar edx,32-ZSHIFT
|
|
idiv ecx ; eax = (v/z)
|
|
shl eax, 16-ZSHIFT
|
|
|
|
mov ebx, eax ; ebx = U1 until pop's
|
|
|
|
; Find fixed V1
|
|
mov eax, ebp
|
|
;PDIV
|
|
mov edx,eax
|
|
shl eax,ZSHIFT
|
|
sar edx,32-ZSHIFT
|
|
idiv ecx ; eax = (v/z)
|
|
shl eax, 16-ZSHIFT
|
|
|
|
mov ebp, eax ; ebp = V1 until pop's
|
|
|
|
mov ecx, [U0] ; ecx = U0 until pop's
|
|
mov edi, [V0] ; edi = V0 until pop's
|
|
|
|
; Make ESI = V0:U0 in 6:10,6:10 format
|
|
mov eax, ecx
|
|
shr eax, 6
|
|
mov esi, edi
|
|
shl esi, 10
|
|
mov si, ax
|
|
|
|
; Make EDX = DV:DU in 6:10,6:10 format
|
|
mov eax, ebx
|
|
sub eax, ecx
|
|
sar eax, NBITS+6
|
|
mov edx, ebp
|
|
sub edx, edi
|
|
shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac
|
|
mov dx, ax ; put delta u in low word
|
|
|
|
pop edi ; Restore EDI before using it
|
|
|
|
mov ecx, [num_left_over]
|
|
|
|
; LIGHTING CODE
|
|
mov ebx, [_fx_l]
|
|
mov ebp, [_fx_dl_dx]
|
|
|
|
ITERATION equ 0
|
|
|
|
%macro repproc3 0
|
|
; Do even pixel
|
|
mov eax, esi ; get u,v
|
|
shr eax, 26 ; shift out all but int(v)
|
|
shld ax,si,6 ; shift in u, shifting up v
|
|
add eax,[_pixptr]
|
|
movzx eax, byte [eax] ; get pixel from source bitmap
|
|
add esi, edx ; inc u,v
|
|
mov ah, bh ; form lighting table lookup value
|
|
add ebx, ebp ; update lighting value
|
|
cmp al,255
|
|
je %%skip4
|
|
mov al, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer
|
|
mov [edi+ITERATION], al ; write pixel
|
|
%%skip4: dec ecx
|
|
jz near _none_to_do
|
|
|
|
; Do odd pixel
|
|
mov eax, esi ; get u,v
|
|
shr eax, 26 ; shift out all but int(v)
|
|
shld ax,si,6 ; shift in u, shifting up v
|
|
add eax,[_pixptr]
|
|
movzx eax, byte [eax] ; get pixel from source bitmap
|
|
add esi, edx ; inc u,v
|
|
mov ah, bh ; form lighting table lookup value
|
|
add ebx, [_fx_dl_dx] ; update lighting value
|
|
cmp al,255
|
|
je %%skip5
|
|
mov al, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer
|
|
mov [edi+ITERATION+1], al ; write pixel
|
|
%%skip5: dec ecx
|
|
jz near _none_to_do
|
|
%endmacro
|
|
|
|
%rep (1 << (NBITS-1))
|
|
;local skip4, skip5
|
|
repproc3
|
|
%assign ITERATION ITERATION + 2
|
|
|
|
%endrep
|
|
|
|
; Should never get here!!!!
|
|
int 3
|
|
jmp _none_to_do
|
|
|
|
; ----------------------------------------- End of LeftOver Pixels ------------------------------------------
|
|
|
|
; --BUGGY NEW--NewDoEndPixels:
|
|
; --BUGGY NEW-- mov eax, num_left_over
|
|
; --BUGGY NEW-- and num_left_over, 3
|
|
; --BUGGY NEW-- shr eax, 2
|
|
; --BUGGY NEW-- je NDEP_1
|
|
; --BUGGY NEW-- mov _loop_count, eax
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW--; do 4 pixels per hunk, not 16, so div deltas by 4 (16/4=4)
|
|
; --BUGGY NEW-- shr DU1,2
|
|
; --BUGGY NEW-- shr DV1,2
|
|
; --BUGGY NEW-- shr DZ1,2
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW--NDEP_TopOfLoop4:
|
|
; --BUGGY NEW-- add ebx, DU1
|
|
; --BUGGY NEW-- add ebp, DV1
|
|
; --BUGGY NEW-- add ecx, DZ1
|
|
; --BUGGY NEW-- je _div_0_abort ; would be dividing by 0, so abort
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW--; Done with ebx, ebp, ecx until next iteration
|
|
; --BUGGY NEW-- push ebx
|
|
; --BUGGY NEW-- push ecx
|
|
; --BUGGY NEW-- push ebp
|
|
; --BUGGY NEW-- push edi
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW--; Find fixed U1
|
|
; --BUGGY NEW-- mov eax, ebx
|
|
; --BUGGY NEW-- mov edx,ebx
|
|
; --BUGGY NEW-- shl eax,(ZSHIFT-2)
|
|
; --BUGGY NEW-- sar edx,32-(ZSHIFT-2)
|
|
; --BUGGY NEW-- idiv ecx ; eax = (v/z)
|
|
; --BUGGY NEW-- shl eax, 16-(ZSHIFT-2)
|
|
; --BUGGY NEW-- mov ebx, eax ; ebx = U1 until pop's
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW--; Find fixed V1
|
|
; --BUGGY NEW-- mov eax, ebp
|
|
; --BUGGY NEW-- mov edx, ebp
|
|
; --BUGGY NEW-- shl eax,(ZSHIFT-2)
|
|
; --BUGGY NEW-- sar edx,32-(ZSHIFT-2)
|
|
; --BUGGY NEW-- idiv ecx ; eax = (v/z)
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW-- mov ecx, U0 ; ecx = U0 until pop's
|
|
; --BUGGY NEW-- mov edi, V0 ; edi = V0 until pop's
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW-- shl eax, 16-(ZSHIFT-2)
|
|
; --BUGGY NEW-- mov ebp, eax ; ebp = V1 until pop's
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW--; Make ESI = V0:U0 in 6:10,6:10 format
|
|
; --BUGGY NEW-- mov eax, ecx
|
|
; --BUGGY NEW-- shr eax, 6
|
|
; --BUGGY NEW-- mov esi, edi
|
|
; --BUGGY NEW-- shl esi, 10
|
|
; --BUGGY NEW-- mov si, ax
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW--; Make EDX = DV:DU in 6:10,6:10 format
|
|
; --BUGGY NEW-- mov eax, ebx
|
|
; --BUGGY NEW-- sub eax, ecx
|
|
; --BUGGY NEW-- sar eax, (NBITS-2)+6
|
|
; --BUGGY NEW-- mov edx, ebp
|
|
; --BUGGY NEW-- sub edx, edi
|
|
; --BUGGY NEW-- shl edx, 10-(NBITS-2) ; EDX = V1-V0/ 4 in 6:10 int:frac
|
|
; --BUGGY NEW-- mov dx, ax ; put delta u in low word
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW--; Save the U1 and V1 so we don't have to divide on the next iteration
|
|
; --BUGGY NEW-- mov U0, ebx
|
|
; --BUGGY NEW-- mov V0, ebp
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW-- pop edi ; Restore EDI before using it
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW--; LIGHTING CODE
|
|
; --BUGGY NEW-- mov ebx, _fx_l
|
|
; --BUGGY NEW-- mov ebp, _fx_dl_dx
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW--;** test _Transparency_on,-1
|
|
; --BUGGY NEW--;** je NDEP_no_trans1
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW-- REPT 2
|
|
; --BUGGY NEW-- local NDEP_skipa1, NDEP_skipa2
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW-- mov eax, esi ; get u,v
|
|
; --BUGGY NEW-- shr eax, 26 ; shift out all but int(v)
|
|
; --BUGGY NEW-- shld ax,si,6 ; shift in u, shifting up v
|
|
; --BUGGY NEW-- add esi, edx ; inc u,v
|
|
; --BUGGY NEW-- mov al, es:[eax] ; get pixel from source bitmap
|
|
; --BUGGY NEW-- cmp al,255
|
|
; --BUGGY NEW-- je NDEP_skipa1
|
|
; --BUGGY NEW-- mov ah, bh ; form lighting table lookup value
|
|
; --BUGGY NEW-- add ebx, ebp ; update lighting value
|
|
; --BUGGY NEW-- mov al, _gr_fade_table[eax] ; xlat thru lighting table into dest buffer
|
|
; --BUGGY NEW-- mov [edi],al
|
|
; --BUGGY NEW--NDEP_skipa1:
|
|
; --BUGGY NEW-- inc edi
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW--; Do odd pixel
|
|
; --BUGGY NEW-- mov eax, esi ; get u,v
|
|
; --BUGGY NEW-- shr eax, 26 ; shift out all but int(v)
|
|
; --BUGGY NEW-- shld ax,si,6 ; shift in u, shifting up v
|
|
; --BUGGY NEW-- add esi, edx ; inc u,v
|
|
; --BUGGY NEW-- mov al, es:[eax] ; get pixel from source bitmap
|
|
; --BUGGY NEW-- cmp al,255
|
|
; --BUGGY NEW-- je NDEP_skipa2
|
|
; --BUGGY NEW-- mov ah, bh ; form lighting table lookup value
|
|
; --BUGGY NEW-- add ebx, ebp ; update lighting value
|
|
; --BUGGY NEW-- mov al, _gr_fade_table[eax] ; xlat thru lighting table into dest buffer
|
|
; --BUGGY NEW-- mov [edi],al
|
|
; --BUGGY NEW--NDEP_skipa2:
|
|
; --BUGGY NEW-- inc edi
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW-- ENDM
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW-- mov _fx_l, ebx
|
|
; --BUGGY NEW-- pop ebp
|
|
; --BUGGY NEW-- pop ecx
|
|
; --BUGGY NEW-- pop ebx
|
|
; --BUGGY NEW-- dec _loop_count
|
|
; --BUGGY NEW-- jnz NDEP_TopOfLoop4
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW-- test num_left_over, -1
|
|
; --BUGGY NEW-- je _none_to_do
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW--NDEP_1:
|
|
; --BUGGY NEW-- mov esi,ebx
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW-- align 4
|
|
; --BUGGY NEW--NDEP_loop:
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW--; compute v coordinate
|
|
; --BUGGY NEW-- mov eax, ebp ; get v
|
|
; --BUGGY NEW-- mov edx, eax
|
|
; --BUGGY NEW-- sar edx, 31
|
|
; --BUGGY NEW-- idiv ecx ; eax = (v/z)
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW-- and eax,3fh ; mask with height-1
|
|
; --BUGGY NEW-- mov ebx,eax
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW--; compute u coordinate
|
|
; --BUGGY NEW-- mov eax, esi ; get u
|
|
; --BUGGY NEW-- mov edx, eax
|
|
; --BUGGY NEW-- sar edx, 31
|
|
; --BUGGY NEW-- idiv ecx ; eax = (u/z)
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW-- shl eax,26
|
|
; --BUGGY NEW-- shld ebx,eax,6 ; esi = v*64+u
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW--; read 1 pixel
|
|
; --BUGGY NEW-- xor eax, eax
|
|
; --BUGGY NEW-- mov al, es:[ebx] ; get pixel from source bitmap
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW--; lighting code
|
|
; --BUGGY NEW-- mov ebx, _fx_l ; get temp copy of lighting value
|
|
; --BUGGY NEW-- mov ah, bh ; get lighting level
|
|
; --BUGGY NEW-- add ebx, _fx_dl_dx ; update lighting value
|
|
; --BUGGY NEW-- mov _fx_l, ebx ; save temp copy of lighting value
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW--; transparency check
|
|
; --BUGGY NEW-- cmp al,255
|
|
; --BUGGY NEW-- je NDEP_skip2 ; this pixel is transparent, so don't write it (or light it)
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW-- mov al, _gr_fade_table[eax] ; xlat pixel thru lighting tables
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW--; write 1 pixel
|
|
; --BUGGY NEW-- mov [edi],al
|
|
; --BUGGY NEW--NDEP_skip2: inc edi
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW--; update deltas
|
|
; --BUGGY NEW-- add ebp,_fx_dv_dx
|
|
; --BUGGY NEW-- add esi,_fx_du_dx
|
|
; --BUGGY NEW-- add ecx,_fx_dz_dx
|
|
; --BUGGY NEW-- je _div_0_abort ; would be dividing by 0, so abort
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW-- dec num_left_over
|
|
; --BUGGY NEW-- jne NDEP_loop
|
|
; --BUGGY NEW--
|
|
; --BUGGY NEW-- jmp _none_to_do
|
|
|
|
NewDoEndPixels:
|
|
mov esi,ebx
|
|
|
|
align 4
|
|
NDEP_loop:
|
|
|
|
; compute v coordinate
|
|
mov eax, ebp ; get v
|
|
mov edx, eax
|
|
sar edx, 31
|
|
idiv ecx ; eax = (v/z)
|
|
|
|
and eax,3fh ; mask with height-1
|
|
mov ebx,eax
|
|
|
|
; compute u coordinate
|
|
mov eax, esi ; get u
|
|
mov edx, eax
|
|
sar edx, 31
|
|
idiv ecx ; eax = (u/z)
|
|
|
|
shl eax,26
|
|
shld ebx,eax,6 ; esi = v*64+u
|
|
|
|
; read 1 pixel
|
|
add ebx,[_pixptr]
|
|
xor eax, eax
|
|
mov al, [ebx] ; get pixel from source bitmap
|
|
|
|
; lighting code
|
|
mov ebx, [_fx_l] ; get temp copy of lighting value
|
|
mov ah, bh ; get lighting level
|
|
add ebx, [_fx_dl_dx] ; update lighting value
|
|
mov [_fx_l], ebx ; save temp copy of lighting value
|
|
|
|
; transparency check
|
|
cmp al,255
|
|
je NDEP_skip2 ; this pixel is transparent, so don't write it (or light it)
|
|
|
|
mov al, [_gr_fade_table+eax] ; xlat pixel thru lighting tables
|
|
|
|
; write 1 pixel
|
|
mov [edi],al
|
|
NDEP_skip2: inc edi
|
|
|
|
; update deltas
|
|
add ebp,[_fx_dv_dx]
|
|
add esi,[_fx_du_dx]
|
|
add ecx,[_fx_dz_dx]
|
|
je near _div_0_abort ; would be dividing by 0, so abort
|
|
|
|
dec dword [num_left_over]
|
|
jne NDEP_loop
|
|
|
|
jmp _none_to_do
|
|
|
|
; ==================================================== No Lighting Code ======================================================
|
|
global _tmap_loop_fast_nolight
|
|
_tmap_loop_fast_nolight:
|
|
mov esi,ebx
|
|
|
|
align 4
|
|
NotDwordAligned1_nolight:
|
|
test edi, 11b
|
|
jz DwordAligned1_nolight
|
|
|
|
; compute v coordinate
|
|
mov eax,ebp ; get v
|
|
mov edx, eax
|
|
sar edx, 31
|
|
idiv ecx ; eax = (v/z)
|
|
|
|
and eax,3fh ; mask with height-1
|
|
mov ebx,eax
|
|
|
|
; compute u coordinate
|
|
mov eax, esi ; get u
|
|
mov edx, eax
|
|
sar edx, 31
|
|
idiv ecx ; eax = (u/z)
|
|
|
|
shl eax,26
|
|
shld ebx,eax,6 ; esi = v*64+u
|
|
|
|
; read 1 pixel
|
|
add ebx,[_pixptr]
|
|
mov al,[ebx] ; get pixel from source bitmap
|
|
|
|
; write 1 pixel
|
|
cmp al,255
|
|
je skip6
|
|
mov [edi],al
|
|
skip6: inc edi
|
|
|
|
; update deltas
|
|
add ebp,[_fx_dv_dx]
|
|
add esi,[_fx_du_dx]
|
|
add ecx,[_fx_dz_dx]
|
|
je near _div_0_abort ; would be dividing by 0, so abort
|
|
|
|
dec dword [_loop_count]
|
|
jns NotDwordAligned1_nolight
|
|
jmp _none_to_do
|
|
|
|
DwordAligned1_nolight:
|
|
mov ebx,esi
|
|
|
|
mov eax, [_loop_count]
|
|
inc eax
|
|
mov [num_left_over], eax
|
|
shr eax, NBITS
|
|
|
|
test eax, -1
|
|
je near tmap_loop ; no 2^NBITS chunks, do divide/pixel for whole scanline
|
|
|
|
mov [_loop_count], eax ; _loop_count = pixels / NPIXS
|
|
shl eax, NBITS
|
|
sub [num_left_over], eax ; num_left_over = obvious
|
|
|
|
; compute initial v coordinate
|
|
mov eax,ebp ; get v
|
|
;PDIV
|
|
mov edx,eax
|
|
shl eax,ZSHIFT
|
|
sar edx,32-ZSHIFT
|
|
idiv ecx ; eax = (v/z)
|
|
shl eax, 16-ZSHIFT
|
|
|
|
mov [V0], eax
|
|
|
|
; compute initial u coordinate
|
|
mov eax,ebx ; get u
|
|
;PDIV
|
|
mov edx,eax
|
|
shl eax,ZSHIFT
|
|
sar edx,32-ZSHIFT
|
|
idiv ecx ; eax = (v/z)
|
|
shl eax, 16-ZSHIFT
|
|
|
|
mov [U0], eax
|
|
|
|
; Set deltas to NPIXS pixel increments
|
|
mov eax, [_fx_du_dx]
|
|
shl eax, NBITS
|
|
mov [DU1], eax
|
|
mov eax, [_fx_dv_dx]
|
|
shl eax, NBITS
|
|
mov [DV1], eax
|
|
mov eax, [_fx_dz_dx]
|
|
shl eax, NBITS
|
|
mov [DZ1], eax
|
|
|
|
align 4
|
|
TopOfLoop4_nolight:
|
|
add ebx, [DU1]
|
|
add ebp, [DV1]
|
|
add ecx, [DZ1]
|
|
je near _div_0_abort
|
|
|
|
; Done with ebx, ebp, ecx until next iteration
|
|
push ebx
|
|
push ecx
|
|
push ebp
|
|
push edi
|
|
|
|
; Find fixed U1
|
|
mov eax, ebx
|
|
;PDIV
|
|
mov edx,eax
|
|
shl eax,ZSHIFT
|
|
sar edx,32-ZSHIFT
|
|
idiv ecx ; eax = (v/z)
|
|
shl eax, 16-ZSHIFT
|
|
|
|
mov ebx, eax ; ebx = U1 until pop's
|
|
|
|
; Find fixed V1
|
|
mov eax, ebp
|
|
;PDIV
|
|
mov edx,eax
|
|
shl eax,ZSHIFT
|
|
sar edx,32-ZSHIFT
|
|
idiv ecx ; eax = (v/z)
|
|
shl eax, 16-ZSHIFT
|
|
|
|
mov ebp, eax ; ebp = V1 until pop's
|
|
|
|
mov ecx, [U0] ; ecx = U0 until pop's
|
|
mov edi, [V0] ; edi = V0 until pop's
|
|
|
|
; Make ESI = V0:U0 in 6:10,6:10 format
|
|
mov eax, ecx
|
|
shr eax, 6
|
|
mov esi, edi
|
|
shl esi, 10
|
|
mov si, ax
|
|
|
|
; Make EDX = DV:DU in 6:10,6:10 format
|
|
mov eax, ebx
|
|
sub eax, ecx
|
|
sar eax, NBITS+6
|
|
mov edx, ebp
|
|
sub edx, edi
|
|
shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac
|
|
mov dx, ax ; put delta u in low word
|
|
|
|
; Save the U1 and V1 so we don't have to divide on the next iteration
|
|
mov [U0], ebx
|
|
mov [V0], ebp
|
|
|
|
pop edi ; Restore EDI before using it
|
|
|
|
%macro repproc4 0
|
|
; Do 1 pixel
|
|
mov eax, esi ; get u,v
|
|
shr eax, 26 ; shift out all but int(v)
|
|
shld ax,si,6 ; shift in u, shifting up v
|
|
add esi, edx ; inc u,v
|
|
add eax,[_pixptr]
|
|
mov cl, [eax] ; load into buffer register
|
|
|
|
mov eax, esi ; get u,v
|
|
shr eax, 26 ; shift out all but int(v)
|
|
shld ax,si,6 ; shift in u, shifting up v
|
|
add eax,[_pixptr]
|
|
mov ch, [eax] ; load into buffer register
|
|
add esi, edx ; inc u,v
|
|
ror ecx, 16 ; move to next dest pixel
|
|
|
|
mov eax, esi ; get u,v
|
|
shr eax, 26 ; shift out all but int(v)
|
|
shld ax,si,6 ; shift in u, shifting up v
|
|
add eax,[_pixptr]
|
|
mov cl, [eax] ; load into buffer register
|
|
add esi, edx ; inc u,v
|
|
|
|
mov eax, esi ; get u,v
|
|
shr eax, 26 ; shift out all but int(v)
|
|
shld ax,si,6 ; shift in u, shifting up v
|
|
add eax,[_pixptr]
|
|
mov ch, [eax] ; load into buffer register
|
|
add esi, edx ; inc u,v
|
|
ror ecx, 16 ;-- can get rid of this, just write in different order below -- ; move to next dest pixel
|
|
|
|
test dword [_Transparency_on],-1
|
|
je %%no_trans2
|
|
cmp ecx,-1
|
|
je %%skip7
|
|
|
|
cmp cl,255
|
|
je %%skip1q
|
|
mov [edi],cl
|
|
%%skip1q:
|
|
|
|
cmp ch,255
|
|
je %%skip2q
|
|
mov [edi+1],ch
|
|
%%skip2q:
|
|
ror ecx,16
|
|
|
|
cmp cl,255
|
|
je %%skip3q
|
|
mov [edi+2],cl
|
|
%%skip3q:
|
|
|
|
|
|
cmp ch,255
|
|
je %%skip4q
|
|
mov [edi+3],ch
|
|
%%skip4q:
|
|
|
|
jmp %%skip7
|
|
%%no_trans2:
|
|
mov [edi],ecx ; Draw 4 pixels to display
|
|
%%skip7: add edi,4
|
|
%endmacro
|
|
|
|
%rep (1 << (NBITS-2))
|
|
;local skip7, no_trans2, skip1q, skip2q, skip3q, skip4q
|
|
repproc4
|
|
|
|
%endrep
|
|
|
|
pop ebp
|
|
pop ecx
|
|
pop ebx
|
|
dec dword [_loop_count]
|
|
jnz near TopOfLoop4_nolight
|
|
|
|
EndOfLoop4_nolight:
|
|
|
|
test dword [num_left_over], -1
|
|
je near _none_to_do
|
|
|
|
DoEndPixels_nolight:
|
|
add ebx, [DU1]
|
|
add ebp, [DV1]
|
|
add ecx, [DZ1]
|
|
je near _div_0_abort
|
|
push edi ; use edi as a temporary variable
|
|
|
|
; Find fixed U1
|
|
mov eax, ebx
|
|
mov edx,eax
|
|
shl eax,ZSHIFT
|
|
sar edx,32-ZSHIFT
|
|
idiv ecx ; eax = (v/z)
|
|
shl eax, 16-ZSHIFT
|
|
mov ebx, eax ; ebx = U1 until pop's
|
|
|
|
; Find fixed V1
|
|
mov eax, ebp
|
|
mov edx,eax
|
|
shl eax,ZSHIFT
|
|
sar edx,32-ZSHIFT
|
|
idiv ecx ; eax = (v/z)
|
|
shl eax, 16-ZSHIFT
|
|
mov ebp, eax ; ebp = V1 until pop's
|
|
|
|
mov ecx, [U0] ; ecx = U0 until pop's
|
|
mov edi, [V0] ; edi = V0 until pop's
|
|
|
|
; Make ESI = V0:U0 in 6:10,6:10 format
|
|
mov eax, ecx
|
|
shr eax, 6
|
|
mov esi, edi
|
|
shl esi, 10
|
|
mov si, ax
|
|
|
|
; Make EDX = DV:DU in 6:10,6:10 format
|
|
mov eax, ebx
|
|
sub eax, ecx
|
|
sar eax, NBITS+6
|
|
mov edx, ebp
|
|
sub edx, edi
|
|
shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac
|
|
mov dx, ax ; put delta u in low word
|
|
|
|
pop edi ; Restore EDI before using it
|
|
|
|
mov ecx, [num_left_over]
|
|
|
|
%assign ITERATION 0
|
|
%macro repproc5 0
|
|
; Do 1 pixel
|
|
mov eax, esi ; get u,v
|
|
shr eax, 26 ; shift out all but int(v)
|
|
shld ax,si,6 ; shift in u, shifting up v
|
|
add eax,[_pixptr]
|
|
movzx eax, byte [eax] ; load into buffer register
|
|
add esi, edx ; inc u,v
|
|
cmp al,255
|
|
je %%skip8
|
|
mov [edi+ITERATION], al ; write pixel
|
|
%%skip8: dec ecx
|
|
jz near _none_to_do
|
|
%endmacro
|
|
|
|
%rep (1 << NBITS)
|
|
;local skip8
|
|
repproc5
|
|
%assign ITERATION ITERATION + 1
|
|
%endrep
|
|
|
|
; Should never get here!!!!!
|
|
int 3
|
|
jmp _none_to_do
|
|
|