# -- tmap_per.1 = 8.88, 6/27/95 # -- tmap_per.3 = 8.99, 6/27/95 # -- tmap_per.4 = 9.10, 6/28/95 # -- Make more use of r0 and other volatile registers. # -- Use fewer registers and only save/restore what's necessary. # -- Use stmw instead of all the stws in epilog, prolog. # -- mtctr loads "magical" counter register, then use bdnz _label # -- Superopt from Freeware guys, ftp from prep.ai.mit.edu, look in /pub/gnu or something like that. # -- At DoEndPixels, you write up to 15 slow pixels. Do chunks of 4. # Macro added by allender 6/21/95 -- found in MPW PPC assembler examples. This macro # used at beginning of routine should export all appropriate symbols and set us up # properly for debug information MACRO MakeFunction &fnName EXPORT &fnName[DS] EXPORT .&fnName[PR] TC &fnName[TC], &fnName[DS] CSECT &fnName[DS] DC.L .&fnName[PR] DC.L TOC[tc0] FUNCTION .&fnName[PR] CSECT .&fnName[PR] ENDM # ------------------------------------------------------------- # Plot one pixel # Lighting, No Transparency. macro ppc_pix_l mr eax, V0 rlwimi eax, U0, 26, 16, 21 # Now, eax has v:u, but it's 10 bits too high and garbage above it rlwinm eax, eax, 22, 20, 31 # Shift right 10 bits, mask out high garbage (preserve only low 12 bits) lbzx tr2, eax, es # get source pixel add U0, U0, edx # u0 = u0 + du rlwimi tr2, tr0, 24, 16, 23 # mask lighting value (%bh) above pixel value (%al) add V0, V0, esi # v0 = v0 + dv lbzx eax, r3, tr2 # xlate lighting:pixel through lighting tables stb eax, 0(edi) # {change this to stbu eax 1(edi) and kill the addi below} addi edi, edi, 1 add tr0, tr0, tr1 # fx_l += fx_dl_dx endm # ------------------------------------------------------------- # Plot one pixel # Lighting & Transparency. macro ppc_pix_lt mr eax, V0 rlwimi eax, U0, 26, 16, 21 # Now, eax has v:u, but it's 10 bits too high and garbage above it rlwinm eax, eax, 22, 20, 31 # Shift right 10 bits, mask out high garbage (preserve only low 12 bits) lbzx tr2, eax, es # get source pixel add U0, U0, edx # u0 = u0 + du cmpwi cr0, tr2, 0xff # cmpwi cr0, tr2, 0x0 add V0, V0, esi # v0 = v0 + dv beq @skip rlwimi tr2, tr0, 24, 16, 23 # mask lighting value (%bh) above pixel value (%al) lbzx eax, r3, tr2 # xlate lighting:pixel through lighting tables stb eax, 0(edi) @skip: addi edi, edi, 1 add tr0, tr0, tr1 # fx_l += fx_dl_dx endm # ------------------------------------------------------------- # Plot one pixel # No Lighting & Transparency. macro ppc_pix_t mr eax, V0 rlwimi eax, U0, 26, 16, 21 # Now, eax has v:u, but it's 10 bits too high and garbage above it rlwinm eax, eax, 22, 20, 31 # Shift right 10 bits, mask out high garbage (preserve only low 12 bits) lbzx tr2, eax, es # get source pixel add U0, U0, edx # u0 = u0 + du cmpwi cr0, tr2, 0xff add V0, V0, esi # v0 = v0 + dv beq @skip rlwimi tr2, tr0, 24, 16, 23 # mask lighting value (%bh) above pixel value (%al) stb tr2, 0(edi) @skip: addi edi, edi, 1 endm # ------------------------------------------------------------- # Plot one pixel # Lighting & Transparency. # Decrements r_num_left_over. If goes 0, branches to _none_to_do. # Added by allender 6/21/95 -- need TOC stuff to access global variables. We could # probably help ourselves immensly if we used paramaters were possible. toc tc gr_fade_table[TC], gr_fade_table tc write_buffer[TC], write_buffer tc window_left[TC], window_left tc window_right[TC], window_right tc window_top[TC], window_top tc window_bottom[TC], window_bottom tc window_width[TC], window_width tc bytes_per_row[TC], bytes_per_row tc window_height[TC], window_height tc y_pointers[TC], y_pointers tc per2_flag[TC], per2_flag tc tmap_flat_cthru_table[TC], tmap_flat_cthru_table tc tmap_flat_color[TC], tmap_flat_color tc tmap_flat_shade_value[TC], tmap_flat_shade_value tc dither_intensity_lighting[TC], dither_intensity_lighting tc Lighting_on[TC], Lighting_on tc pixel_data_selector[TC], pixel_data_selector tc gr_fade_table_selector[TC], gr_fade_table_selector tc Transparency_on[TC], Transparency_on tc fx_u[TC], fx_u tc fx_v[TC], fx_v tc fx_z[TC], fx_z tc fx_l[TC], fx_l tc fx_du_dx[TC], fx_du_dx tc fx_dv_dx[TC], fx_dv_dx tc fx_dz_dx[TC], fx_dz_dx tc fx_dl_dx[TC], fx_dl_dx tc fx_y[TC], fx_y tc fx_xleft[TC], fx_xleft tc fx_xright[TC], fx_xright tc pixptr[TC], pixptr # Added by allender 6/21/95 -- use the handy constants to set up the space to save on # the stack linkageArea: set 24 ; constant comes from the PowerPC Runtime Architecture Document CalleesParams: set 32 ; always leave space for GPR's 3-10 CalleesLocalVars: set 0 ; none numGPRs: set 19 ; num volitile GPR's (GPR's 13-31) used numFPRs: set 0 ; num volitile FPR's (FPR's 14-31) used stack_pad: set 12 ; padding to maintain quadword alignment spaceToSave: set linkageArea + CalleesParams + CalleesLocalVars + 4*numGPRs + 8*numFPRs + stack_pad MakeFunction asm_tmap_scanline_per align 4 # include 'tmap_inc.a' import gr_fade_table import write_buffer import window_left import window_right import window_top import window_bottom import window_width import bytes_per_row import window_height import y_pointers #_lighting_tables equ _gr_fade_table #write_buffer equ _write_buffer #max_window_width equ 320 #num_iters = max_window_width # # if num_iters and 1 #num_iters = num_iters + 1 # endif import per2_flag import tmap_flat_cthru_table import tmap_flat_color import tmap_flat_shade_value import dither_intensity_lighting import Lighting_on import pixel_data_selector import gr_fade_table_selector import Transparency_on import fx_u import fx_v import fx_z import fx_l import fx_du_dx import fx_dv_dx import fx_dz_dx import fx_dl_dx import fx_y import fx_xleft import fx_xright import pixptr # include ('stdhdr.s') #MWA -- not used num_left_over .long 0 # We don't pass any parameters, so we use those registers. r_num_left_over: equ r4 # PowerPC equates for 80x86 compatibility eax: equ r13 ebx: equ r14 ecx: equ r15 edx: equ r16 ebp: equ r17 esi: equ r18 edi: equ r19 es: equ r20 tr0: equ r21 tr1: equ r22 tr2: equ r23 r_loop_count: equ r24 U0: equ r25 U1: equ r26 V0: equ r27 V1: equ r28 DU1: equ r29 DV1: equ r30 DZ1: equ r31 #MWA csect texmap # -------------------------------------------------------------------------------------------------- # Enter: # _xleft fixed point left x coordinate # _xright fixed point right x coordinate # _y fixed point y coordinate # _pixptr address of source pixel map # _u fixed point initial u coordinate # _v fixed point initial v coordinate # _z fixed point initial z coordinate # _du_dx fixed point du/dx # _dv_dx fixed point dv/dx # _dz_dx fixed point dz/dx # for (x = (int) xleft# x <= (int) xright# x++) { # _setcolor(read_pixel_from_tmap(srcb,((int) (u/z)) & 63,((int) (v/z)) & 63))# # _setpixel(x,y)# # # u += du_dx; # v += dv_dx; # z += dz_dx; # } align 4 # #asm_tmap_scanline_per_: # Prolog # 1. Save the contents of the Link Register if necessary. (I think it is not.) # 2. Save the nonvolatile contents of the Condition Register to be used. # 3. Save the contents of the nonvolatile floating-point registers to be used. (None to be used as of 6/20/95.) # 4. Save the contents of the nonvolatile general-purpose registers to be used. # 5. Store the current stack pointer (or back chain) and decrement the stack pointer by the size of the stack frame. ; PROLOGUE - called routine's responsibilities mflr r0 ; Get link register stw r0, 8(SP) ; Store the link resgister on the stack stw r31, -4(sp) stw r30, -8(sp) stw r29, -12(sp) stw r28, -16(sp) stw r27, -20(sp) stw r26, -24(sp) stw r25, -28(sp) stw r24, -32(sp) stw r23, -36(sp) stw r22, -40(sp) stw r21, -44(sp) stw r20, -48(sp) stw r19, -52(sp) stw r18, -56(sp) stw r17, -60(sp) stw r16, -64(sp) stw r15, -68(sp) stw r14, -72(sp) stw r13, -76(sp) stwu SP, -spaceToSave(SP); skip over the stack space where the caller ; might have saved stuff #---------------------------- setup for loop --------------------------------- # Setup for loop: _loop_count iterations = (int) xright - (int) xleft # esi source pixel pointer = pixptr # edi initial row pointer = y*320+x # NOTE: fx_xright and fx_xleft changed from fix to int by mk on 12/01/94. lwz es, pixptr[TC](RTOC) lwzx es, r0, es # set edi = address of first pixel to modify lwz edi, fx_y[TC](RTOC) # mov edi,_fx_y lwzx edi, r0, edi slwi edi, edi, 2 # mov edi,_y_pointers[edi*4] lwz r3, y_pointers[TC](RTOC) lwzx edi, r3, edi lwz ebx, fx_xleft[TC](RTOC) # mov ebx,_fx_xleft lwzx ebx, r0, ebx mr. ebx, ebx # test ebx, ebx bgt ebx_ok # jns ebx_ok xor ebx, ebx, ebx # xor ebx, ebx ebx_ok: lwz tr0, write_buffer[TC](RTOC) # add edi,write_buffer add edi, edi, ebx # add edi,ebx lwzx tr0, r0, tr0 add edi, edi, tr0 # set _loop_count = # of iterations lwz eax, fx_xright[TC](RTOC) # mov eax,_fx_xright lwzx eax, r0, eax sub. eax, eax, ebx # sub eax,ebx mr r_loop_count, eax blt _none_to_do # js _none_to_do # lighting values are passed in fixed point, but need to be in 8 bit integer, 8 bit fraction so we can easily # get the integer by reading %bh # (Not on the PowerPC and we could use the precision!) ### lwz tr0, fx_l[TC](RTOC) # sar _fx_l, 8 ### lwzx tr1, r0, tr0 ### srawi tr1, tr1, 8 ### stwx tr1, r0, tr0 ### ### lwz tr0, fx_dl_dx[TC](RTOC) # sar _fx_dl_dx,8 ### lwzx tr1, r0, tr0 ### srawi. tr1, tr1, 8 ### bge dl_dx_ok # jns dl_dx_ok ### addi tr1, tr1, 1 # inc _fx_dl_dx # round towards 0 for negative deltas ###dl_dx_ok: ### stwx tr1, r0, tr0 # set initial values lwz ebx, fx_u[TC](RTOC) # mov ebx,_fx_u lwzx ebx, r0, ebx lwz ebp, fx_v[TC](RTOC) # mov ebp,_fx_v lwzx ebp, r0, ebp lwz ecx, fx_z[TC](RTOC) # mov ecx,_fx_z lwzx ecx, r0, ecx lwz tr0, fx_dv_dx[TC](RTOC) lwzx DV1, r0, tr0 lwz tr0, fx_du_dx[TC](RTOC) lwzx DU1, r0, tr0 lwz tr0, fx_dz_dx[TC](RTOC) lwzx DZ1, r0, tr0 lwz tr0, per2_flag[TC](RTOC) # test _per2_flag,-1 lwzx tr0, r0, tr0 mr. tr0, tr0 lwz tr0, Lighting_on[TC](RTOC) # test _Lighting_on, -1 beq tmap_slow # je tmap_loop lwzx tr0, r0, tr0 # test _Lighting_on, -1 mr. tr0, tr0 beq tmap_loop_fast_nolight # je tmap_loop_fast_nolight b tmap_loop_fast # jmp tmap_loop_fast #================ PERSPECTIVE TEXTURE MAP INNER LOOPS ======================== # # Usage in loop: eax division, pixel value # ebx u # ecx z # edx division # ebp v # esi source pixel pointer # edi destination pixel pointer #-------------------- NORMAL PERSPECTIVE TEXTURE MAP LOOP ----------------- tmap_slow_from_fast: mr r_loop_count, r_num_left_over tmap_slow: lwz tr0, Lighting_on[TC](RTOC) # test _Lighting_on, -1 lwzx tr0, r0, tr0 mr. tr0, tr0 beq tmap_slow_NoLight # je NoLight1 lwz tr0, fx_l[TC](RTOC) lwzx esi, r0, tr0 # use esi for lighting value lwz tr0, fx_dl_dx[TC](RTOC) lwzx tr2, r0, tr0 # tr2 is delta lighting value lwz r3, gr_fade_table[TC](RTOC) # r3 is fade table pointer lwz tr0, Transparency_on[TC](RTOC) lwzx tr0, r0, tr0 mr. tr0, tr0 beq tmap_slow_no_transparency # ---------- Yes transparency. Yes lighting. ---------- tmap_loop0: divw tr0, ebp, ecx # compute v coordinate divw tr1, ebx, ecx # compute u coordinate andi. tr1, tr1, 0x3f rlwimi tr1, tr0, 6, 20, 25 lbzx tr1, es, tr1 # mov al, es:[ebx] # get pixel from source bitmap rlwimi tr1, esi, 24, 16, 23 # mask lighting value (%bh) above pixel value (%al) lbzx tr1, r3, tr1 # xlate lighting:pixel through lighting tables add esi, esi, tr2 # update lighting value cmpwi cr0, tr1, 0xff # check for transparency beq skip1 # je skip1 stb tr1, 0(edi) # mov [edi],al skip1: addi edi, edi, 1 # inc edi add. ecx, ecx, DZ1 # add ecx,_fx_dz_dx add ebp, ebp, DV1 # add ebp,_fx_dv_dx add ebx, ebx, DU1 # add esi,_fx_du_dx beq _div_0_abort # je _div_0_abort # would be dividing by 0, so abort addic. r_loop_count, r_loop_count, -1 # dec _loop_count bge tmap_loop0 # jns tmap_loop0 _none_to_do: # added by allender 6/21/95 -- from MPW example and PPC developers book ; EPILOGUE - return sequence addic SP,SP,spaceToSave ; Reset the stack pointer lwz r31, -4(sp) lwz r30, -8(sp) lwz r29, -12(sp) lwz r28, -16(sp) lwz r27, -20(sp) lwz r26, -24(sp) lwz r25, -28(sp) lwz r24, -32(sp) lwz r23, -36(sp) lwz r22, -40(sp) lwz r21, -44(sp) lwz r20, -48(sp) lwz r19, -52(sp) lwz r18, -56(sp) lwz r17, -60(sp) lwz r16, -64(sp) lwz r15, -68(sp) lwz r14, -72(sp) lwz r13, -76(sp) lwz r0, 8(sp) mtlr r0 ; Reset the link register blr ; return via the link register # We detected a z=0 condition, which seems pretty bogus, don't you think? # So, we abort, but maybe we want to know about it. _div_0_abort: b _none_to_do # ---------- No transparency. Yes lighting. ---------- tmap_slow_no_transparency: tmap_loop0a: divw tr0, ebp, ecx # compute v coordinate divw tr1, ebx, ecx # compute u coordinate andi. tr1, tr1, 0x3f # get u coordinate in 0..63 rlwimi tr1, tr0, 6, 20, 25 # pack together v:u in low 12 bits lbzx tr1, es, tr1 # Read source pixel. rlwimi tr1, esi, 24, 16, 23 # mask lighting value (%bh) above pixel value (%al) lbzx tr1, r3, tr1 # xlate lighting:pixel through lighting tables add esi, esi, tr2 # update lighting value stb tr1, 0(edi) # mov [edi],al addi edi, edi, 1 # inc edi add. ecx, ecx, DZ1 # add ecx,_fx_dz_dx add ebp, ebp, DV1 # add ebp,_fx_dv_dx add ebx, ebx, DU1 # add esi,_fx_du_dx beq _div_0_abort # je _div_0_abort # would be dividing by 0, so abort addic. r_loop_count, r_loop_count, -1 # dec _loop_count bge tmap_loop0a # jns tmap_loop0 b _none_to_do # ---------- Yes transparency. No lighting. ---------- # (Note: We don't know for sure there is lighting, but, except for debugging, if we aren't lighting, we _do_ have transparency.) tmap_slow_NoLight: tmap_loop0_nolight: divw tr0, ebp, ecx # compute v coordinate divw tr1, ebx, ecx # compute u coordinate andi. tr1, tr1, 0x3f rlwimi tr1, tr0, 6, 20, 25 lbzx tr1, es, tr1 # mov al, es:[ebx] # get pixel from source bitmap cmpwi cr0, tr1, 0xff # check for transparency # cmpwi cr0, tr1, 0x0 # check for transparency beq skip1a # je skip1 stb tr1, 0(edi) # mov [edi],al skip1a: addi edi, edi, 1 # inc edi add. ecx, ecx, DZ1 # add ecx,_fx_dz_dx add ebp, ebp, DV1 # add ebp,_fx_dv_dx add ebx, ebx, DU1 # add esi,_fx_du_dx beq _div_0_abort # je _div_0_abort # would be dividing by 0, so abort addic. r_loop_count, r_loop_count, -1 # dec _loop_count bge tmap_loop0_nolight # jns tmap_loop0 b _none_to_do #-------------------------- PER/4 TMAPPER ---------------- # # x = x1 # U0 = u/w# V0 = v/w# # while ( 1 ) # u += du_dx*4# v+= dv_dx*4 # U1 = u/w# V1 = v/w# # DUDX = (U1-U0)/4# DVDX = (V1-V0)/4# # # # Pixel 0 # pixels = texmap[V0*64+U0]# # U0 += DUDX# V0 += DVDX # # Pixel 1 # pixels = (pixels<<8)+texmap[V0*64+U0]# # U0 += DUDX# V0 += DVDX # # Pixel 2 # pixels = (pixels<<8)+texmap[V0*64+U0]# # U0 += DUDX# V0 += DVDX # # Pixel 3 # pixels = (pixels<<8)+texmap[V0*64+U0]# # # screen[x] = pixel # x += 4# # U0 = U1# V0 = V1 # Note: If you change NBITS, you must change the number of calls to the ppc_pix macros. NBITS: equ 4 # 2^NBITS pixels plotted per divide NBITS_mask: equ 15 # 2^NBITS-1 NBITS_shl_minus_2: equ 4 # 2 ^ (NBITS-2) ZSHIFT: equ 4 # precision used in PDIV macro DIV_SHIFT: equ 4 # Used to be 8...overflowed, smaller less likely to overflow export tmap_loop_fast # -------------------------------------- Start of Getting Dword Aligned ---------------------------------------------- # ebx fx_u # ebp fx_v # esi fx_l # r3 gr_fade_table tmap_loop_fast: lwz esi, fx_l[TC](RTOC) lwz r3, gr_fade_table[TC](RTOC) lwz tr2, fx_dl_dx[TC](RTOC) lwzx esi, r0, esi lwzx tr2, r0, tr2 # This is a hack! If we allow zero pixels to be plotted for alignment, the code later hangs. andi. tr0, edi, 3 # DEBUG HACK!! beq skip_test # DEBUG HACK!! NotDwordAligned1: andi. tr0, edi, 3 # test edi, 11b beq DwordAligned1 # jz DwordAligned1 skip_test: # DEBUG HACK!! divw tr0, ebp, ecx # tr0: v coodinate divw tr1, ebx, ecx # tr1: u coordinate rlwimi tr1, tr0, 6, 20, 25 # get v:u in low 12 bits, but garbage above andi. tr1, tr1, 0xfff # preserve only 12 bit index add esi, esi, tr2 # fx_l += fx_dl_dx lbzx tr1, es, tr1 # mov al, es:[ebx] # get pixel from source bitmap rlwimi tr1, esi, 24, 16, 23 # mask lighting value (%bh) above pixel value (%al) lbzx tr0, tr1, r3 # xlate lighting:pixel through lighting tables cmpwi cr0, tr0, 0xff # transparent pixel? beq skip2 # yes, skip stb tr0, 0(edi) # mov [edi],al skip2: addi edi, edi, 1 # inc edi # update deltas add. ecx, ecx, DZ1 # add ecx,_fx_dz_dx add ebx, ebx, DU1 # add esi,_fx_du_dx add ebp, ebp, DV1 # add ebp,_fx_dv_dx beq _div_0_abort # je _div_0_abort # would be dividing by 0, so abort addic. r_loop_count, r_loop_count, -1 # dec _loop_count bge NotDwordAligned1 b _none_to_do # -------------------------------------- End of Getting Dword Aligned ---------------------------------------------- DwordAligned1: addi r_loop_count, r_loop_count, 1 andi. r_num_left_over, r_loop_count, NBITS_mask srwi. r_loop_count, r_loop_count, NBITS beq tmap_slow_from_fast # compute initial u, v coordinates slwi eax, ebp, DIV_SHIFT divw V0, eax, ecx slwi V0, V0, 16 - DIV_SHIFT slwi eax, ebx, DIV_SHIFT divw U0, eax, ecx slwi U0, U0, 16 - DIV_SHIFT # Set deltas to NPIXS pixel increments lwz tr0, fx_du_dx[TC](RTOC) lwzx tr1, r0, tr0 slwi DU1, tr1, NBITS lwz tr0, fx_dv_dx[TC](RTOC) lwzx tr1, r0, tr0 slwi DV1, tr1, NBITS lwz tr0, fx_dz_dx[TC](RTOC) lwzx tr1, r0, tr0 slwi DZ1, tr1, NBITS # LIGHTING CODE lwz tr0, fx_l[TC](RTOC) # mov ebx, _fx_l lwzx tr0, r0, tr0 # mov ebx, _fx_l lwz tr1, fx_dl_dx[TC](RTOC) # mov ebp, _fx_dl_dx lwzx tr1, r0, tr1 # mov ebp, _fx_dl_dx # Inside this loop, tr0 = fx_l, tr1 = fx_dl_dx TopOfLoop4: add. ecx, ecx, DZ1 # add ecx, DZ1 add ebx, ebx, DU1 # add ebx, DU1 add ebp, ebp, DV1 # add ebp, DV1 beq _div_0_abort # would be dividing by 0, so abort # Find fixed U1, V1 slwi eax, ebx, DIV_SHIFT divw U1, eax, ecx slwi eax, ebp, DIV_SHIFT divw V1, eax, ecx slwi U1, U1, 16 - DIV_SHIFT slwi V1, V1, 16 - DIV_SHIFT # PPC: Make %esi be dv, %edx be du sub esi, V1, V0 sub edx, U1, U0 srawi esi, esi, NBITS srawi edx, edx, NBITS lwz eax, Transparency_on[TC](RTOC) # test _Transparency_on,-1 lwzx eax, r0, eax # test _Transparency_on,-1 mr. eax, eax bne yes_trans1 # je no_trans1 # Plot 16 pixels. (2^NBITS) li r5, 4 # do 4 times... mtctr r5 subi edi, edi, 1 # ----------------------------------------------- pix_loop1: mr eax, V0 rlwimi eax, U0, 26, 16, 21 # Now, eax has v:u, but it's 10 bits too high and garbage above it rlwinm eax, eax, 22, 20, 31 # Shift right 10 bits, mask out high garbage (preserve only low 12 bits) add U0, U0, edx # u0 = u0 + du lbzx tr2, eax, es # get source pixel add V0, V0, esi # v0 = v0 + dv rlwimi tr2, tr0, 24, 16, 23 # mask lighting value (%bh) above pixel value (%al) lbzx r6, r3, tr2 # xlate lighting:pixel through lighting tables # ----------------------------------------------- mr eax, V0 add tr0, tr0, tr1 # fx_l += fx_dl_dx rlwimi eax, U0, 26, 16, 21 # Now, eax has v:u, but it's 10 bits too high and garbage above it rlwinm eax, eax, 22, 20, 31 # Shift right 10 bits, mask out high garbage (preserve only low 12 bits) stbu r6, 1(edi) # {change this to stbu eax 1(edi) and kill the addi below} add U0, U0, edx # u0 = u0 + du lbzx tr2, eax, es # get source pixel add V0, V0, esi # v0 = v0 + dv rlwimi tr2, tr0, 24, 16, 23 # mask lighting value (%bh) above pixel value (%al) lbzx r6, r3, tr2 # xlate lighting:pixel through lighting tables add tr0, tr0, tr1 # fx_l += fx_dl_dx # ----------------------------------------------- mr eax, V0 rlwimi eax, U0, 26, 16, 21 # Now, eax has v:u, but it's 10 bits too high and garbage above it rlwinm eax, eax, 22, 20, 31 # Shift right 10 bits, mask out high garbage (preserve only low 12 bits) stbu r6, 1(edi) # {change this to stbu eax 1(edi) and kill the addi below} add U0, U0, edx # u0 = u0 + du lbzx tr2, eax, es # get source pixel add V0, V0, esi # v0 = v0 + dv rlwimi tr2, tr0, 24, 16, 23 # mask lighting value (%bh) above pixel value (%al) lbzx r6, r3, tr2 # xlate lighting:pixel through lighting tables add tr0, tr0, tr1 # fx_l += fx_dl_dx # ----------------------------------------------- mr eax, V0 rlwimi eax, U0, 26, 16, 21 # Now, eax has v:u, but it's 10 bits too high and garbage above it rlwinm eax, eax, 22, 20, 31 # Shift right 10 bits, mask out high garbage (preserve only low 12 bits) stbu r6, 1(edi) # {change this to stbu eax 1(edi) and kill the addi below} add U0, U0, edx # u0 = u0 + du lbzx tr2, eax, es # get source pixel add V0, V0, esi # v0 = v0 + dv rlwimi tr2, tr0, 24, 16, 23 # mask lighting value (%bh) above pixel value (%al) lbzx r6, r3, tr2 # xlate lighting:pixel through lighting tables add tr0, tr0, tr1 # fx_l += fx_dl_dx # ----------------------------------------------- stbu r6, 1(edi) # {change this to stbu eax 1(edi) and kill the addi below} bdnz pix_loop1 addi edi, edi, 1 cont1: addic. r_loop_count, r_loop_count, -1 mr U0, U1 mr V0, V1 bne TopOfLoop4 EndOfLoop4: mr. r_num_left_over, r_num_left_over beq _none_to_do b DoEndPixels ; ------------------------------------------------------- yes_trans1: # Plot 16 pixels. (2^NBITS) ppc_pix_lt ppc_pix_lt ppc_pix_lt ppc_pix_lt ppc_pix_lt ppc_pix_lt ppc_pix_lt ppc_pix_lt ppc_pix_lt ppc_pix_lt ppc_pix_lt ppc_pix_lt ppc_pix_lt ppc_pix_lt ppc_pix_lt ppc_pix_lt b+ cont1 # ----------------------------------------- Start of LeftOver Pixels ------------------------------------------ DoEndPixels: # This is the stuff to determine whether to use the slower, but more accurate, leftover pixel stuff. add. ecx, ecx, DZ1 # add ecx, DZ1 add ebx, ebx, DU1 # add ebx, DU1 add ebp, ebp, DV1 # add ebp, DV1 beq _div_0_abort # je _div_0_abort bgt+ dep_cont # jns dep_cont # z went negative. # this can happen because we added DZ1 to the current z, but dz1 represents dz for perhaps 16 pixels # though we might only plot one more pixel. # Instead of converting the ugly code below, I'm just going to abort if z went negative. # It hardly ever does and we shipped shareware that way... b _none_to_do dep_cont: slwi eax, ebx, DIV_SHIFT divw U1, eax, ecx slwi U1, U1, 16 - DIV_SHIFT slwi eax, ebp, DIV_SHIFT divw V1, eax, ecx slwi V1, V1, 16 - DIV_SHIFT sub esi, V1, V0 sub edx, U1, U0 srawi esi, esi, NBITS srawi edx, edx, NBITS leftover_loop: mr eax, V0 rlwimi eax, U0, 26, 16, 21 # Now, eax has v:u, but it's 10 bits too high and garbage above it rlwinm eax, eax, 22, 20, 31 # Shift right 10 bits, mask out high garbage (preserve only low 12 bits) lbzx tr2, eax, es # get source pixel add U0, U0, edx # u0 = u0 + du add V0, V0, esi # v0 = v0 + dv cmpwi cr0, tr2, 0xff # transparent pixel? # cmpwi cr0, tr2, 0x0 # transparent pixel? add tr0, tr0, tr1 # fx_l += fx_dl_dx beq skipa1 rlwimi tr2, tr0, 24, 16, 23 # mask lighting value (%bh) above pixel value (%al) lbzx eax, r3, tr2 # xlate lighting:pixel through lighting tables stb eax, 0(edi) skipa1: addi edi, edi, 1 addic. r_num_left_over, r_num_left_over, -1 bne leftover_loop b _none_to_do # jmp _none_to_do nol_tmap_slow_from_fast: mr r_loop_count, r_num_left_over # ---------- Yes transparency. No lighting. ---------- # (Note: We don't know for sure there is lighting, but, except for debugging, if we aren't lighting, we _do_ have transparency.) nol_tmap_slow_NoLight: nol_tmap_loop0_nolight: divw tr0, ebp, ecx # compute v coordinate divw tr1, ebx, ecx # compute u coordinate andi. tr1, tr1, 0x3f rlwimi tr1, tr0, 6, 20, 25 lbzx tr1, es, tr1 # mov al, es:[ebx] # get pixel from source bitmap cmpwi cr0, tr1, 0xff # check for transparency beq nol_skip1a # je skip1 stb tr1, 0(edi) # mov [edi],al nol_skip1a: addi edi, edi, 1 # inc edi add. ecx, ecx, DZ1 # add ecx,_fx_dz_dx add ebp, ebp, DV1 # add ebp,_fx_dv_dx add ebx, ebx, DU1 # add esi,_fx_du_dx beq _div_0_abort # je _div_0_abort # would be dividing by 0, so abort addic. r_loop_count, r_loop_count, -1 # dec _loop_count bge nol_tmap_loop0_nolight # jns tmap_loop0 b _none_to_do export tmap_loop_fast_nolight # -------------------------------------- Start of Getting Dword Aligned ---------------------------------------------- # ebx fx_u # ebp fx_v # esi fx_l # r3 gr_fade_table tmap_loop_fast_nolight: # This is a hack! If we allow zero pixels to be plotted for alignment, the code later hangs. andi. tr0, edi, 3 # DEBUG HACK!! beq nol_skip_test # DEBUG HACK!! nol_NotDwordAligned1: andi. tr0, edi, 3 # test edi, 11b beq nol_DwordAligned1 # jz DwordAligned1 nol_skip_test: # DEBUG HACK!! divw tr0, ebp, ecx # tr0: v coodinate divw tr1, ebx, ecx # tr1: u coordinate rlwimi tr1, tr0, 6, 20, 25 # get v:u in low 12 bits, but garbage above andi. tr1, tr1, 0xfff # preserve only 12 bit index lbzx tr1, es, tr1 # mov al, es:[ebx] # get pixel from source bitmap cmpwi cr0, tr1, 0xff # transparent pixel? beq nol_skip2 # yes, skip stb tr1, 0(edi) # mov [edi],al nol_skip2: addi edi, edi, 1 # inc edi # update deltas add. ecx, ecx, DZ1 # add ecx,_fx_dz_dx add ebx, ebx, DU1 # add esi,_fx_du_dx add ebp, ebp, DV1 # add ebp,_fx_dv_dx beq _div_0_abort # je _div_0_abort # would be dividing by 0, so abort addic. r_loop_count, r_loop_count, -1 # dec _loop_count bge nol_NotDwordAligned1 b _none_to_do # -------------------------------------- End of Getting Dword Aligned ---------------------------------------------- nol_DwordAligned1: addi r_loop_count, r_loop_count, 1 andi. r_num_left_over, r_loop_count, NBITS_mask srwi. r_loop_count, r_loop_count, NBITS beq nol_tmap_slow_from_fast # compute initial u, v coordinates slwi eax, ebp, DIV_SHIFT divw V0, eax, ecx slwi V0, V0, 16 - DIV_SHIFT slwi eax, ebx, DIV_SHIFT divw U0, eax, ecx slwi U0, U0, 16 - DIV_SHIFT # Set deltas to NPIXS pixel increments lwz tr0, fx_du_dx[TC](RTOC) lwzx tr1, r0, tr0 slwi DU1, tr1, NBITS lwz tr0, fx_dv_dx[TC](RTOC) lwzx tr1, r0, tr0 slwi DV1, tr1, NBITS lwz tr0, fx_dz_dx[TC](RTOC) lwzx tr1, r0, tr0 slwi DZ1, tr1, NBITS nol_TopOfLoop4: add. ecx, ecx, DZ1 # add ecx, DZ1 add ebx, ebx, DU1 # add ebx, DU1 add ebp, ebp, DV1 # add ebp, DV1 beq _div_0_abort # would be dividing by 0, so abort # Find fixed U1, V1 slwi eax, ebx, DIV_SHIFT divw U1, eax, ecx slwi eax, ebp, DIV_SHIFT divw V1, eax, ecx slwi U1, U1, 16 - DIV_SHIFT slwi V1, V1, 16 - DIV_SHIFT # PPC: Make %esi be dv, %edx be du sub esi, V1, V0 sub edx, U1, U0 srawi esi, esi, NBITS srawi edx, edx, NBITS lwz eax, Transparency_on[TC](RTOC) # test _Transparency_on,-1 lwzx eax, r0, eax # test _Transparency_on,-1 mr. eax, eax bne nol_yes_trans1 # je no_trans1 # Plot 16 pixels. (2^NBITS) li r5, 4 # do 4 times... mtctr r5 subi edi, edi, 1 # ----------------------------------------------- nol_pix_loop1: mr eax, V0 rlwimi eax, U0, 26, 16, 21 # Now, eax has v:u, but it's 10 bits too high and garbage above it rlwinm eax, eax, 22, 20, 31 # Shift right 10 bits, mask out high garbage (preserve only low 12 bits) add U0, U0, edx # u0 = u0 + du lbzx r6, eax, es # get source pixel add V0, V0, esi # v0 = v0 + dv # ----------------------------------------------- mr eax, V0 rlwimi eax, U0, 26, 16, 21 # Now, eax has v:u, but it's 10 bits too high and garbage above it rlwinm eax, eax, 22, 20, 31 # Shift right 10 bits, mask out high garbage (preserve only low 12 bits) stbu r6, 1(edi) # {change this to stbu eax 1(edi) and kill the addi below} add U0, U0, edx # u0 = u0 + du lbzx r6, eax, es # get source pixel add V0, V0, esi # v0 = v0 + dv # ----------------------------------------------- mr eax, V0 rlwimi eax, U0, 26, 16, 21 # Now, eax has v:u, but it's 10 bits too high and garbage above it rlwinm eax, eax, 22, 20, 31 # Shift right 10 bits, mask out high garbage (preserve only low 12 bits) stbu r6, 1(edi) # {change this to stbu eax 1(edi) and kill the addi below} add U0, U0, edx # u0 = u0 + du lbzx r6, eax, es # get source pixel add V0, V0, esi # v0 = v0 + dv # ----------------------------------------------- mr eax, V0 rlwimi eax, U0, 26, 16, 21 # Now, eax has v:u, but it's 10 bits too high and garbage above it rlwinm eax, eax, 22, 20, 31 # Shift right 10 bits, mask out high garbage (preserve only low 12 bits) stbu r6, 1(edi) # {change this to stbu eax 1(edi) and kill the addi below} add U0, U0, edx # u0 = u0 + du lbzx r6, eax, es # get source pixel add V0, V0, esi # v0 = v0 + dv # ----------------------------------------------- stbu r6, 1(edi) # {change this to stbu eax 1(edi) and kill the addi below} bdnz nol_pix_loop1 addi edi, edi, 1 nol_cont1: addic. r_loop_count, r_loop_count, -1 mr U0, U1 mr V0, V1 bne nol_TopOfLoop4 nol_EndOfLoop4: mr. r_num_left_over, r_num_left_over beq _none_to_do b nol_DoEndPixels ; ------------------------------------------------------- nol_yes_trans1: # Plot 16 pixels. (2^NBITS) ppc_pix_t ppc_pix_t ppc_pix_t ppc_pix_t ppc_pix_t ppc_pix_t ppc_pix_t ppc_pix_t ppc_pix_t ppc_pix_t ppc_pix_t ppc_pix_t ppc_pix_t ppc_pix_t ppc_pix_t ppc_pix_t b+ nol_cont1 # ----------------------------------------- Start of LeftOver Pixels ------------------------------------------ nol_DoEndPixels: # This is the stuff to determine whether to use the slower, but more accurate, leftover pixel stuff. add. ecx, ecx, DZ1 # add ecx, DZ1 add ebx, ebx, DU1 # add ebx, DU1 add ebp, ebp, DV1 # add ebp, DV1 beq _div_0_abort # je _div_0_abort blt _none_to_do slwi eax, ebx, DIV_SHIFT divw U1, eax, ecx slwi U1, U1, 16 - DIV_SHIFT slwi eax, ebp, DIV_SHIFT divw V1, eax, ecx slwi V1, V1, 16 - DIV_SHIFT sub esi, V1, V0 sub edx, U1, U0 srawi esi, esi, NBITS srawi edx, edx, NBITS nol_leftover_loop: mr eax, V0 rlwimi eax, U0, 26, 16, 21 # Now, eax has v:u, but it's 10 bits too high and garbage above it rlwinm eax, eax, 22, 20, 31 # Shift right 10 bits, mask out high garbage (preserve only low 12 bits) lbzx tr2, eax, es # get source pixel add U0, U0, edx # u0 = u0 + du add V0, V0, esi # v0 = v0 + dv cmpwi cr0, tr2, 0xff # transparent pixel? beq nol_skipa1 stb tr2, 0(edi) nol_skipa1: addi edi, edi, 1 addic. r_num_left_over, r_num_left_over, -1 bne nol_leftover_loop b _none_to_do # jmp _none_to_do