From 99b33745851b676ee73acf0baae2006f207b6492 Mon Sep 17 00:00:00 2001 From: Joost van der Sluis Date: Dec 21 2005 11:16:51 +0000 Subject: - Updated fpc-2.0.2-G5.patch --- diff --git a/fpc-2.0.2-G5.patch b/fpc-2.0.2-G5.patch new file mode 100644 index 0000000..8fa4aa9 --- /dev/null +++ b/fpc-2.0.2-G5.patch @@ -0,0 +1,379 @@ +Index: fpcsrc/rtl/powerpc/powerpc.inc +=================================================================== +--- fpcsrc/rtl/powerpc/powerpc.inc (revision 830) ++++ fpcsrc/rtl/powerpc/powerpc.inc (working copy) +@@ -203,374 +203,7 @@ + end; + {$endif MACOS} + +-{**************************************************************************** +- Move / Fill +-****************************************************************************} + +-{$ifndef FPC_SYSTEM_HAS_MOVE} +-{$define FPC_SYSTEM_HAS_MOVE} +-procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];assembler; nostackframe; +-asm +- { count <= 0 ? } +- cmpwi cr0,r5,0 +- { check if we have to do the move backwards because of overlap } +- sub r10,r4,r3 +- { carry := boolean(dest-source < count) = boolean(overlap) } +- subc r10,r10,r5 +- +- { count < 15 ? (to decide whether we will move dwords or bytes } +- cmpwi cr1,r5,15 +- +- { if overlap, then r10 := -1 else r10 := 0 } +- subfe r10,r10,r10 +- +- { count < 63 ? (32 + max. alignment (31) } +- cmpwi cr7,r5,63 +- +- { if count <= 0, stop } +- ble cr0,.LMoveDone +- +- { load the begin of the source in the data cache } +- dcbt 0,r3 +- { and the dest as well } +- dcbtst 0,r4 +- +- { if overlap, then r0 := count else r0 := 0 } +- and r0,r5,r10 +- { if overlap, then point source and dest to the end } +- add r3,r3,r0 +- add r4,r4,r0 +- { if overlap, then r6 := 0, else r6 := -1 } +- not r6,r10 +- { if overlap, then r10 := -2, else r10 := 0 } +- slwi r10,r10,1 +- { if overlap, then r10 := -1, else r10 := 1 } +- addi r10,r10,1 +- +- { if count < 15, copy everything byte by byte } +- blt cr1,.LMoveBytes +- +- { if no overlap, then source/dest += -1, otherwise they stay } +- { After the next instruction, r3/r4 + r10 = next position to } +- { load/store from/to } +- add r3,r3,r6 +- add r4,r4,r6 +- +- { otherwise, guarantee 4 byte alignment for dest for starters } +-.LMove4ByteAlignLoop: +- lbzux r0,r3,r10 +- stbux r0,r4,r10 +- { is dest now 4 aligned? } +- andi. r0,r4,3 +- subi r5,r5,1 +- { while not aligned, continue } +- bne cr0,.LMove4ByteAlignLoop +- +-{$ifndef ppc603} +- { check for 32 byte alignment } +- andi. r7,r4,31 +-{$endif non ppc603} +- { we are going to copy one byte again (the one at the newly } +- { aligned address), so increase count byte 1 } +- addi r5,r5,1 +- { count div 4 for number of dwords to copy } +- srwi r0,r5,2 +- { if 11 <= count < 63, copy using dwords } +- blt cr7,.LMoveDWords +- +-{$ifndef ppc603} +- { # of dwords to copy to reach 32 byte alignment (*4) } +- { (depends on forward/backward copy) } +- +- { if forward copy, r6 = -1 -> r8 := 32 } +- { if backward copy, r6 = 0 -> r8 := 0 } +- rlwinm r8,r6,0,31-6+1,31-6+1 +- { if forward copy, we have to copy 32 - unaligned count bytes } +- { if backward copy unaligned count bytes } +- sub r7,r8,r7 +- { if backward copy, the calculated value is now negate -> } +- { make it positive again } +- not r8, r6 +- add r7, r7, r8 +- xor r7, r7, r8 +-{$endif not ppc603} +- +- { multiply the update count with 4 } +- slwi r10,r10,2 +- slwi r6,r6,2 +- { and adapt the source and dest } +- add r3,r3,r6 +- add r4,r4,r6 +- +-{$ifndef ppc603} +- beq cr0,.LMove32BytesAligned +-.L32BytesAlignMoveLoop: +- { count >= 39 -> align to 8 byte boundary and then use the FPU } +- { since we're already at 4 byte alignment, use dword store } +- subic. r7,r7,4 +- lwzux r0,r3,r10 +- subi r5,r5,4 +- stwux r0,r4,r10 +- bne .L32BytesAlignMoveLoop +- +-.LMove32BytesAligned: +- { count div 32 ( >= 1, since count was >=63 } +- srwi r0,r5,5 +- { remainder } +- andi. r5,r5,31 +- { to decide if we will do some dword stores (instead of only } +- { byte stores) afterwards or not } +-{$else not ppc603} +- srwi r0,r5,4 +- andi. r5,r5,15 +-{$endif not ppc603} +- cmpwi cr1,r5,11 +- mtctr r0 +- +- { r0 := count div 4, will be moved to ctr when copying dwords } +- srwi r0,r5,2 +- +-{$ifndef ppc603} +- { adjust the update count: it will now be 8 or -8 depending on overlap } +- slwi r10,r10,1 +- +- { adjust source and dest pointers: because of the above loop, dest is now } +- { aligned to 8 bytes. So if we add r6 we will still have an 8 bytes } +- { aligned address) } +- add r3,r3,r6 +- add r4,r4,r6 +- +- slwi r6,r6,1 +- +- { the dcbz offset must give a 32 byte aligned address when added } +- { to the current dest address and its address must point to the } +- { bytes that will be overwritten in the current iteration. In case } +- { of a forward loop, the dest address has currently an offset of } +- { -8 compared to the bytes that will be overwritten (and r6 = -8). } +- { In case of a backward of a loop, the dest address currently has } +- { an offset of +32 compared to the bytes that will be overwritten } +- { (and r6 = 0). So the forward dcbz offset must become +8 and the } +- { backward -32 -> (-r6 * 5) - 32 gives the correct offset } +- slwi r7,r6,2 +- add r7,r7,r6 +- neg r7,r7 +- subi r7,r7,32 +- +-.LMove32ByteDcbz: +- lfdux f0,r3,r10 +- lfdux f1,r3,r10 +- lfdux f2,r3,r10 +- lfdux f3,r3,r10 +- { must be done only now, in case source and dest are less than } +- { 32 bytes apart! } +- dcbz r4,r7 +- stfdux f0,r4,r10 +- stfdux f1,r4,r10 +- stfdux f2,r4,r10 +- stfdux f3,r4,r10 +- bdnz .LMove32ByteDcbz +-.LMove32ByteLoopDone: +-{$else not ppc603} +-.LMove16ByteLoop: +- lwzux r11,r3,r10 +- lwzux r7,r3,r10 +- lwzux r8,r3,r10 +- lwzux r9,r3,r10 +- stwux r11,r4,r10 +- stwux r7,r4,r10 +- stwux r8,r4,r10 +- stwux r9,r4,r10 +- bdnz .LMove16ByteLoop +-{$endif not ppc603} +- +- { cr0*4+eq is true if "count and 31" = 0 } +- beq cr0,.LMoveDone +- +- { make r10 again -1 or 1, but first adjust source/dest pointers } +- sub r3,r3,r6 +- sub r4,r4,r6 +-{$ifndef ppc603} +- srawi r10,r10,3 +- srawi r6,r6,3 +-{$else not ppc603} +- srawi r10,r10,2 +- srawi r6,r6,2 +-{$endif not ppc603} +- +- { cr1 contains whether count <= 11 } +- ble cr1,.LMoveBytes +- +-.LMoveDWords: +- mtctr r0 +- andi. r5,r5,3 +- { r10 * 4 } +- slwi r10,r10,2 +- slwi r6,r6,2 +- add r3,r3,r6 +- add r4,r4,r6 +- +-.LMoveDWordsLoop: +- lwzux r0,r3,r10 +- stwux r0,r4,r10 +- bdnz .LMoveDWordsLoop +- +- beq cr0,.LMoveDone +- { make r10 again -1 or 1 } +- sub r3,r3,r6 +- sub r4,r4,r6 +- srawi r10,r10,2 +- srawi r6,r6,2 +-.LMoveBytes: +- add r3,r3,r6 +- add r4,r4,r6 +- mtctr r5 +-.LMoveBytesLoop: +- lbzux r0,r3,r10 +- stbux r0,r4,r10 +- bdnz .LMoveBytesLoop +-.LMoveDone: +-end; +-{$endif FPC_SYSTEM_HAS_MOVE} +- +- +-{$ifndef FPC_SYSTEM_HAS_FILLCHAR} +-{$define FPC_SYSTEM_HAS_FILLCHAR} +- +-Procedure FillChar(var x;count:longint;value:byte);assembler; +-{ input: x in r3, count in r4, value in r5 } +- +-{$ifndef FPC_ABI_AIX} +-{ in the AIX ABI, we can use te red zone for temp storage, otherwise we have } +-{ to explicitely allocate room } +-var +- temp : packed record +- case byte of +- 0: (l1,l2: longint); +- 1: (d: double); +- end; +-{$endif FPC_ABI_AIX} +-asm +- { no bytes? } +- cmpwi cr6,r4,0 +- { less than 15 bytes? } +- cmpwi cr7,r4,15 +- { less than 64 bytes? } +- cmpwi cr1,r4,64 +- { fill r5 with ValueValueValueValue } +- rlwimi r5,r5,8,16,23 +- { setup for aligning x to multiple of 4} +- rlwinm r10,r3,0,31-2+1,31 +- rlwimi r5,r5,16,0,15 +- ble cr6,.LFillCharDone +- { get the start of the data in the cache (and mark it as "will be } +- { modified") } +- dcbtst 0,r3 +- subfic r10,r10,4 +- blt cr7,.LFillCharVerySmall +- { just store 4 bytes instead of using a loop to align (there are } +- { plenty of other instructions now to keep the processor busy } +- { while it handles the (possibly unaligned) store) } +- stw r5,0(r3) +- { r3 := align(r3,4) } +- add r3,r3,r10 +- { decrease count with number of bytes already stored } +- sub r4,r4,r10 +- blt cr1,.LFillCharSmall +- { if we have to fill with 0 (which happens a lot), we can simply use } +- { dcbz for the most part, which is very fast, so make a special case } +- { for that } +- cmplwi cr1,r5,0 +- { align to a multiple of 32 (and immediately check whether we aren't } +- { already 32 byte aligned) } +- rlwinm. r10,r3,0,31-5+1,31 +- { setup r3 for using update forms of store instructions } +- subi r3,r3,4 +- { get number of bytes to store } +- subfic r10,r10,32 +- { if already 32byte aligned, skip align loop } +- beq .L32ByteAlignLoopDone +- { substract from the total count } +- sub r4,r4,r10 +-.L32ByteAlignLoop: +- { we were already aligned to 4 byres, so this will count down to } +- { exactly 0 } +- subic. r10,r10,4 +- stwu r5,4(r3) +- bne .L32ByteAlignLoop +-.L32ByteAlignLoopDone: +- { get the amount of 32 byte blocks } +- srwi r10,r4,5 +- { and keep the rest in r4 (recording whether there is any rest) } +- rlwinm. r4,r4,0,31-5+1,31 +- { move to ctr } +- mtctr r10 +- { check how many rest there is (to decide whether we'll use } +- { FillCharSmall or FillCharVerySmall) } +- cmplwi cr7,r4,11 +- { if filling with zero, only use dcbz } +- bne cr1, .LFillCharNoZero +- { make r3 point again to the actual store position } +- addi r3,r3,4 +-.LFillCharDCBZLoop: +- dcbz 0,r3 +- addi r3,r3,32 +- bdnz .LFillCharDCBZLoop +- { if there was no rest, we're finished } +- beq .LFillCharDone +- b .LFillCharVerySmall +-.LFillCharNoZero: +-{$ifdef FPC_ABI_AIX} +- stw r5,-4(r1) +- stw r5,-8(r1) +- lfd f0,-8(r1) +-{$else FPC_ABI_AIX} +- stw r5,temp +- stw r5,temp+4 +- lfd f0,temp +-{$endif FPC_ABI_AIX} +- { make r3 point to address-8, so we're able to use fp double stores } +- { with update (it's already -4 now) } +- subi r3,r3,4 +- { load r10 with 8, so that dcbz uses the correct address } +- li r10, 8 +-.LFillChar32ByteLoop: +- dcbz r3,r10 +- stfdu f0,8(r3) +- stfdu f0,8(r3) +- stfdu f0,8(r3) +- stfdu f0,8(r3) +- bdnz .LFillChar32ByteLoop +- { if there was no rest, we're finished } +- beq .LFillCharDone +- { make r3 point again to the actual next byte that must be written } +- addi r3,r3,8 +- b .LFillCharVerySmall +-.LFillCharSmall: +- { when we arrive here, we're already 4 byte aligned } +- { get count div 4 to store dwords } +- srwi r10,r4,2 +- { get ready for use of update stores } +- subi r3,r3,4 +- mtctr r10 +- rlwinm. r4,r4,0,31-2+1,31 +-.LFillCharSmallLoop: +- stwu r5,4(r3) +- bdnz .LFillCharSmallLoop +- { if nothing left, stop } +- beq .LFillCharDone +- { get ready to store bytes } +- addi r3,r3,4 +-.LFillCharVerySmall: +- mtctr r4 +- subi r3,r3,1 +-.LFillCharVerySmallLoop: +- stbu r5,1(r3) +- bdnz .LFillCharVerySmallLoop +-.LFillCharDone: +-end; +-{$endif FPC_SYSTEM_HAS_FILLCHAR} +- +- + {$ifndef FPC_SYSTEM_HAS_FILLDWORD} + {$define FPC_SYSTEM_HAS_FILLDWORD} + procedure filldword(var x;count : longint;value : dword); diff --git a/fpc.spec b/fpc.spec index 9e189c5..e7b8001 100644 --- a/fpc.spec +++ b/fpc.spec @@ -1,6 +1,6 @@ Name: fpc Version: 2.0.2 -Release: 1%{?dist} +Release: 2%{?dist} Summary: Free Pascal Compiler Group: Development/Languages @@ -156,6 +156,9 @@ rm -rf %{buildroot} %{_datadir}/fpcsrc %changelog +* Tue Dec 20 2005 Joost van der Sluis 2.0.2-2 +- Updated fpc-2.0.2-G5.patch + * Tue Dec 20 2005 Joost van der Sluis 2.0.2-1 - Updated to version 2.0.2