--- ceph-15.2.4/src/yasm-wrapper.orig 2020-06-30 11:40:51.000000000 -0400 +++ ceph-15.2.4/src/yasm-wrapper 2020-07-20 12:21:34.574980869 -0400 @@ -1,10 +1,11 @@ -#!/bin/sh -e +#!/bin/sh # libtool and yasm do not get along. # filter out any crap that libtool feeds us that yasm does not understand. #echo $0: got $* new="" touch="" +object="" while [ -n "$*" ]; do case "$1" in -f ) @@ -29,6 +30,12 @@ touch="$1" shift ;; + -o ) + shift + object="$1" + new="$new -o $1" + shift + ;; * ) new="$new $1" shift @@ -36,8 +43,15 @@ esac done -#echo $0: yasm $new -yasm $new +#echo ${0}: yasm ${new} +yasm ${new} + +echo ${new} | grep -- "crc32c_intel_fast*asm\.s" +if [ $? -ne 0 ]; then + touch /tmp/${object} + ld -r -z ibt -z shstk -z noexecstack -o ${object}.tmp ${object} + mv ${object}.tmp ${object} +fi [ -n "$touch" ] && touch $touch --- ceph-15.2.2/src/common/crc32c_intel_fast_asm.s.orig 2020-05-26 08:34:32.226201974 -0400 +++ ceph-15.2.2/src/common/crc32c_intel_fast_asm.s 2020-05-26 17:19:20.327201974 -0400 @@ -1,5 +1,5 @@ ; -; Copyright 2012-2013 Intel Corporation All Rights Reserved. +; Copyright 2012-2015 Intel Corporation All Rights Reserved. ; All rights reserved. ; ; http://opensource.org/licenses/BSD-3-Clause @@ -59,16 +59,34 @@ xor rbx, rbx ;; rbx = crc1 = 0; xor r10, r10 ;; r10 = crc2 = 0; + cmp len, %%bSize*3*2 + jbe %%non_prefetch + %assign i 0 %rep %%bSize/8 - 1 - crc32 rax, [bufptmp+i + 0*%%bSize] ;; update crc0 - crc32 rbx, [bufptmp+i + 1*%%bSize] ;; update crc1 - crc32 r10, [bufptmp+i + 2*%%bSize] ;; update crc2 + %if i < %%bSize*3/4 + prefetchnta [bufptmp+ %%bSize*3 + i*4] + %endif + crc32 rax, qword [bufptmp+i + 0*%%bSize] ;; update crc0 + crc32 rbx, qword [bufptmp+i + 1*%%bSize] ;; update crc1 + crc32 r10, qword [bufptmp+i + 2*%%bSize] ;; update crc2 %assign i (i+8) %endrep - crc32 rax, [bufptmp+i + 0*%%bSize] ;; update crc0 - crc32 rbx, [bufptmp+i + 1*%%bSize] ;; update crc1 -; SKIP ;crc32 r10, [bufptmp+i + 2*%%bSize] ;; update crc2 + jmp %%next %+ %1 + +%%non_prefetch: + %assign i 0 + %rep %%bSize/8 - 1 + crc32 rax, qword [bufptmp+i + 0*%%bSize] ;; update crc0 + crc32 rbx, qword [bufptmp+i + 1*%%bSize] ;; update crc1 + crc32 r10, qword [bufptmp+i + 2*%%bSize] ;; update crc2 + %assign i (i+8) + %endrep + +%%next %+ %1: + crc32 rax, qword [bufptmp+i + 0*%%bSize] ;; update crc0 + crc32 rbx, qword [bufptmp+i + 1*%%bSize] ;; update crc1 +; SKIP ;crc32 r10, qword [bufptmp+i + 2*%%bSize] ;; update crc2 ; merge in crc0 movzx bufp_dw, al @@ -180,12 +198,15 @@ %define crc_init_dw r8d %endif - + endbranch push rdi push rbx mov rax, crc_init ;; rax = crc_init; + cmp len, 8 + jb less_than_8 + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; 1) ALIGN: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -195,9 +216,6 @@ ;; amount of the address je proc_block ;; Skip if aligned - cmp len, 8 - jb less_than_8 - ;;;; Calculate CRC of unaligned bytes of the buffer (if any) ;;;; mov rbx, [bufptmp] ;; load a quadword from the buffer add bufptmp, bufp ;; align buffer pointer for @@ -233,7 +251,7 @@ jnc bit7 ;; jump to bit-6 if bit-7 == 0 %assign i 0 %rep 16 - crc32 rax, [bufptmp+i] ;; compute crc32 of 8-byte data + crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data %assign i (i+8) %endrep je do_return ;; return if remaining data is zero @@ -244,7 +262,7 @@ jnc bit6 ;; jump to bit-6 if bit-7 == 0 %assign i 0 %rep 8 - crc32 rax, [bufptmp+i] ;; compute crc32 of 8-byte data + crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data %assign i (i+8) %endrep je do_return ;; return if remaining data is zero @@ -254,7 +272,7 @@ jnc bit5 ;; jump to bit-5 if bit-6 == 0 %assign i 0 %rep 4 - crc32 rax, [bufptmp+i] ;; compute crc32 of 8-byte data + crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data %assign i (i+8) %endrep je do_return ;; return if remaining data is zero @@ -264,7 +282,7 @@ jnc bit4 ;; jump to bit-4 if bit-5 == 0 %assign i 0 %rep 2 - crc32 rax, [bufptmp+i] ;; compute crc32 of 8-byte data + crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data %assign i (i+8) %endrep je do_return ;; return if remaining data is zero @@ -272,11 +290,11 @@ bit4: shl len_b, 1 ;; shift-out MSB (bit-4) jnc bit3 ;; jump to bit-3 if bit-4 == 0 - crc32 rax, [bufptmp] ;; compute crc32 of 8-byte data + crc32 rax, qword [bufptmp] ;; compute crc32 of 8-byte data je do_return ;; return if remaining data is zero add bufptmp, 8 ;; buf +=8; (next 8 bytes) bit3: - mov rbx, [bufptmp] ;; load a 8-bytes from the buffer: + mov rbx, qword [bufptmp] ;; load a 8-bytes from the buffer: shl len_b, 1 ;; shift-out MSB (bit-3) jnc bit2 ;; jump to bit-2 if bit-3 == 0 crc32 eax, ebx ;; compute crc32 of 4-byte data --- ceph-15.2.2/src/common/crc32c_intel_fast_zero_asm.s.orig 2020-05-26 08:34:32.226201974 -0400 +++ ceph-15.2.2/src/common/crc32c_intel_fast_zero_asm.s 2020-05-26 17:19:32.497201974 -0400 @@ -1,5 +1,5 @@ ; -; Copyright 2012-2013 Intel Corporation All Rights Reserved. +; Copyright 2012-2015 Intel Corporation All Rights Reserved. ; All rights reserved. ; ; http://opensource.org/licenses/BSD-3-Clause @@ -59,6 +59,19 @@ xor rbx, rbx ;; rbx = crc1 = 0; xor r10, r10 ;; r10 = crc2 = 0; + cmp len, %%bSize*3*2 + jbe %%non_prefetch + + %assign i 0 + %rep %%bSize/8 - 1 + crc32 rax, bufptmp ;; update crc0 + crc32 rbx, bufptmp ;; update crc1 + crc32 r10, bufptmp ;; update crc2 + %assign i (i+8) + %endrep + jmp %%next %+ %1 + +%%non_prefetch: %assign i 0 %rep %%bSize/8 - 1 crc32 rax, bufptmp ;; update crc0 @@ -66,6 +79,8 @@ crc32 r10, bufptmp ;; update crc2 %assign i (i+8) %endrep + +%%next %+ %1: crc32 rax, bufptmp ;; update crc0 crc32 rbx, bufptmp ;; update crc1 ; SKIP ;crc32 r10, bufptmp ;; update crc2 @@ -180,12 +195,15 @@ %define crc_init_dw r8d %endif - + endbranch push rdi push rbx mov rax, crc_init ;; rax = crc_init; + cmp len, 8 + jb less_than_8 + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; 1) ALIGN: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;