;----------------------------------------------------------------------------- ;S404 highly optimized data_decruncher_lib 1.0_turbo for use in stc.library ;23.11.93 by Marcus 'Cozine' Ottosson ; ; This is a highly optimized library decruncher. The code does not fit into ; the instruction cache (286 bytes). Generally, it's NOT possible to use all ; the 256 bytes available in the instruction cache. The caches in the 68020 ; and the 68030 consist of 16 rows, each of which contains 16 bytes. ; AllocMem() and AllocPooled(), the memory allocation routines used for ; loading executables, guarantee 8-byte alignment. ; On the 68000 my large testfile decrunches 3.7% faster in this version than ; the standard version. Some files may decrunch much slower on the 68020 and ; the 68030. I don't know the maximum loss, probably about 10-20%. The loss ; will not be very high when we are multitasking, since the interrupts cause a ; partial or complete cache flush anyway. I want some some test reports!!! ; ;----------------------------------------------------------------------------- ;S404 data_decruncher v0.2 ;(c) 1993 by Jouni 'Mr.Spiv' Korhonen (SWSW) ;----------------------------------------------------------------------------- ;call with registers: a2 = destination address ; a1 = crunched data ;----------------------------------------------------------------------------- ;Uses d0-d7/a0-a6 decrunch: l0: addq #8,a1 edDCopyableStart: move.l a2,a5 add.l (a1)+,a2 add.l (a1),a1 edDCopyableStart2: moveq #0,d4 moveq #16,d5 movem (a1),d2/d6/d7 not d4 lea loff6(pc),a3 lea llen5a(pc),a4 moveq #1,d0 moveq #-1,d3 bra.s ltest1 cnop 0,8 ; Use if main loop>=244 bytes ;*** Here's the start of the instruction cache lins: subq #8,d7 bpl.s lins2 lins1: move d7,d1 addq #8,d7 lsl.l d7,d6 move -(a1),d6 neg d1 lsl.l d1,d6 addq #8,d7 swap d6 move.b d6,-(a2) swap d6 cmp.l a2,a5 dbhs d7,lmain bra.s lexma lins2: rol #8,d6 move.b d6,-(a2) ltest1: cmp.l a2,a5 dbhs d7,lmain lexma bhs.s lexit lmain1: move -(a1),d6 moveq #15,d7 lmain: add d6,d6 bcc.s lins dbf d7,llen1 move -(a1),d6 moveq #15,d7 llen1: add d6,d6 bcs.s llen6 moveq #2,d1 moveq #4-2,d3 dbf d7,llen2 move -(a1),d6 moveq #15,d7 llen2: add d6,d6 bcs.s llen5 dbf d7,llen3 move -(a1),d6 moveq #15,d7 llen3: add d6,d6 bcc.s llen4 moveq #4,d1 moveq #8-2,d3 lea llen3a(pc),a6 bra.s lbits llen3a: add d1,d3 cmp #15,d1 blo.s loff1 moveq #5,d1 moveq #14-1,d3 lea llen3b(pc),a6 bra.s lbits llen4: moveq #23-2,d3 lloop: moveq #8,d1 llen5: move.l a4,a6 bra.s lbits llen5a: add d1,d3 not.b d1 dbeq d7,loff2 bne.s loff2a bra.s lloop loff6: add d1,a0 move.b (a0),-(a2) lcopy: move.b -(a0),-(a2) dbf d3,lcopy ltest: cmp.l a2,a5 dbhs d7,lmain blo.s lmain1 lexit: rts llen6: dbf d7,llen7 move -(a1),d6 moveq #15,d7 llen7: add d6,d6 addx d0,d3 loff1: dbf d7,loff2 loff2a: move -(a1),d6 moveq #15,d7 loff2: add d6,d6 bcs.s loff3 dbf d7,loff4 move -(a1),d6 moveq #15,d7 loff4: moveq #9,d1 lea 32(a2),a0 add d6,d6 bcc.s loff5 moveq #5,d1 move.l a2,a0 bra.s loff5 loff3: lea 544(a2),a0 move d2,d1 loff5: move.l a3,a6 lbits: and.l d4,d6 sub d1,d7 bpl.s lbits2 add d7,d1 lsl.l d1,d6 move d7,d1 move -(a1),d6 neg d1 add d5,d7 lbits2: lsl.l d1,d6 move.l d6,d1 swap d1 jmp (a6) ; This part is not executed very often. Some files may decrunch much slower ; on the 68020/68030. llen3b: add d1,d3 l2ins: subq #8,d7 bmi.s l2ins1 rol #8,d6 move.b d6,-(a2) dbf d3,l2ins bra.s ltest l2ins1: move d7,d1 addq #8,d7 lsl.l d7,d6 move -(a1),d6 neg d1 lsl.l d1,d6 addq #8,d7 swap d6 move.b d6,-(a2) swap d6 dbf d3,l2ins bra ltest