Kyle McMartin eb16415
commit 3ec7d8a6b30659b34693730a374c0265a191c4ba
Kyle McMartin eb16415
Author: Kyle McMartin <kyle@mcmartin.ca>
Kyle McMartin eb16415
Date:   Wed Dec 3 12:26:23 2014 -0500
Kyle McMartin eb16415
Kyle McMartin eb16415
    Revert "[AArch64] Add optimized strchrnul."
Kyle McMartin eb16415
    
Kyle McMartin eb16415
    This reverts commit be9d4ccc7fe62751db1a5fdcb31958561dbbda9a.
Kyle McMartin eb16415
Kyle McMartin eb16415
diff --git a/sysdeps/aarch64/strchrnul.S b/sysdeps/aarch64/strchrnul.S
Kyle McMartin eb16415
deleted file mode 100644
Kyle McMartin eb16415
index b98c2e9..0000000
Kyle McMartin eb16415
--- a/sysdeps/aarch64/strchrnul.S
Kyle McMartin eb16415
+++ /dev/null
Kyle McMartin eb16415
@@ -1,130 +0,0 @@
Kyle McMartin eb16415
-/* strchrnul - find a character or nul in a string
Kyle McMartin eb16415
-
Kyle McMartin eb16415
-   Copyright (C) 2014 Free Software Foundation, Inc.
Kyle McMartin eb16415
-
Kyle McMartin eb16415
-   This file is part of the GNU C Library.
Kyle McMartin eb16415
-
Kyle McMartin eb16415
-   The GNU C Library is free software; you can redistribute it and/or
Kyle McMartin eb16415
-   modify it under the terms of the GNU Lesser General Public
Kyle McMartin eb16415
-   License as published by the Free Software Foundation; either
Kyle McMartin eb16415
-   version 2.1 of the License, or (at your option) any later version.
Kyle McMartin eb16415
-
Kyle McMartin eb16415
-   The GNU C Library is distributed in the hope that it will be useful,
Kyle McMartin eb16415
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
Kyle McMartin eb16415
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Kyle McMartin eb16415
-   Lesser General Public License for more details.
Kyle McMartin eb16415
-
Kyle McMartin eb16415
-   You should have received a copy of the GNU Lesser General Public
Kyle McMartin eb16415
-   License along with the GNU C Library.  If not, see
Kyle McMartin eb16415
-   <http://www.gnu.org/licenses/>.  */
Kyle McMartin eb16415
-
Kyle McMartin eb16415
-#include <sysdep.h>
Kyle McMartin eb16415
-
Kyle McMartin eb16415
-/* Assumptions:
Kyle McMartin eb16415
- *
Kyle McMartin eb16415
- * ARMv8-a, AArch64
Kyle McMartin eb16415
- * Neon Available.
Kyle McMartin eb16415
- */
Kyle McMartin eb16415
-
Kyle McMartin eb16415
-/* Arguments and results.  */
Kyle McMartin eb16415
-#define srcin		x0
Kyle McMartin eb16415
-#define chrin		w1
Kyle McMartin eb16415
-
Kyle McMartin eb16415
-#define result		x0
Kyle McMartin eb16415
-
Kyle McMartin eb16415
-/* Locals and temporaries.  */
Kyle McMartin eb16415
-
Kyle McMartin eb16415
-#define src		x2
Kyle McMartin eb16415
-#define tmp1		x3
Kyle McMartin eb16415
-#define wtmp2		w4
Kyle McMartin eb16415
-#define tmp3		x5
Kyle McMartin eb16415
-
Kyle McMartin eb16415
-#define vrepchr		v0
Kyle McMartin eb16415
-#define vdata1		v1
Kyle McMartin eb16415
-#define vdata2		v2
Kyle McMartin eb16415
-#define vhas_nul1	v3
Kyle McMartin eb16415
-#define vhas_nul2	v4
Kyle McMartin eb16415
-#define vhas_chr1	v5
Kyle McMartin eb16415
-#define vhas_chr2	v6
Kyle McMartin eb16415
-#define vrepmask	v15
Kyle McMartin eb16415
-#define vend1		v16
Kyle McMartin eb16415
-
Kyle McMartin eb16415
-/* Core algorithm.
Kyle McMartin eb16415
-
Kyle McMartin eb16415
-   For each 32-byte hunk we calculate a 64-bit syndrome value, with
Kyle McMartin eb16415
-   two bits per byte (LSB is always in bits 0 and 1, for both big
Kyle McMartin eb16415
-   and little-endian systems).  For each tuple, bit 0 is set iff
Kyle McMartin eb16415
-   the relevant byte matched the requested character or nul.  Since the
Kyle McMartin eb16415
-   bits in the syndrome reflect exactly the order in which things occur
Kyle McMartin eb16415
-   in the original string a count_trailing_zeros() operation will
Kyle McMartin eb16415
-   identify exactly which byte is causing the termination.  */
Kyle McMartin eb16415
-
Kyle McMartin eb16415
-ENTRY (__strchrnul)
Kyle McMartin eb16415
-	/* Magic constant 0x40100401 to allow us to identify which lane
Kyle McMartin eb16415
-	   matches the termination condition.  */
Kyle McMartin eb16415
-	mov	wtmp2, #0x0401
Kyle McMartin eb16415
-	movk	wtmp2, #0x4010, lsl #16
Kyle McMartin eb16415
-	dup	vrepchr.16b, chrin
Kyle McMartin eb16415
-	bic	src, srcin, #31		/* Work with aligned 32-byte hunks.  */
Kyle McMartin eb16415
-	dup	vrepmask.4s, wtmp2
Kyle McMartin eb16415
-	ands	tmp1, srcin, #31
Kyle McMartin eb16415
-	b.eq	L(loop)
Kyle McMartin eb16415
-
Kyle McMartin eb16415
-	/* Input string is not 32-byte aligned.  Rather than forcing
Kyle McMartin eb16415
-	   the padding bytes to a safe value, we calculate the syndrome
Kyle McMartin eb16415
-	   for all the bytes, but then mask off those bits of the
Kyle McMartin eb16415
-	   syndrome that are related to the padding.  */
Kyle McMartin eb16415
-	ld1	{vdata1.16b, vdata2.16b}, [src], #32
Kyle McMartin eb16415
-	neg	tmp1, tmp1
Kyle McMartin eb16415
-	cmeq	vhas_nul1.16b, vdata1.16b, #0
Kyle McMartin eb16415
-	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
Kyle McMartin eb16415
-	cmeq	vhas_nul2.16b, vdata2.16b, #0
Kyle McMartin eb16415
-	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
Kyle McMartin eb16415
-	orr	vhas_chr1.16b, vhas_chr1.16b, vhas_nul1.16b
Kyle McMartin eb16415
-	orr	vhas_chr2.16b, vhas_chr2.16b, vhas_nul2.16b
Kyle McMartin eb16415
-	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
Kyle McMartin eb16415
-	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
Kyle McMartin eb16415
-	lsl	tmp1, tmp1, #1
Kyle McMartin eb16415
-	addp	vend1.16b, vhas_chr1.16b, vhas_chr2.16b	// 256->128
Kyle McMartin eb16415
-	mov	tmp3, #~0
Kyle McMartin eb16415
-	addp	vend1.16b, vend1.16b, vend1.16b		// 128->64
Kyle McMartin eb16415
-	lsr	tmp1, tmp3, tmp1
Kyle McMartin eb16415
-
Kyle McMartin eb16415
-	mov	tmp3, vend1.2d[0]
Kyle McMartin eb16415
-	bic	tmp1, tmp3, tmp1	// Mask padding bits.
Kyle McMartin eb16415
-	cbnz	tmp1, L(tail)
Kyle McMartin eb16415
-
Kyle McMartin eb16415
-L(loop):
Kyle McMartin eb16415
-	ld1	{vdata1.16b, vdata2.16b}, [src], #32
Kyle McMartin eb16415
-	cmeq	vhas_nul1.16b, vdata1.16b, #0
Kyle McMartin eb16415
-	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
Kyle McMartin eb16415
-	cmeq	vhas_nul2.16b, vdata2.16b, #0
Kyle McMartin eb16415
-	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
Kyle McMartin eb16415
-	/* Use a fast check for the termination condition.  */
Kyle McMartin eb16415
-	orr	vhas_chr1.16b, vhas_nul1.16b, vhas_chr1.16b
Kyle McMartin eb16415
-	orr	vhas_chr2.16b, vhas_nul2.16b, vhas_chr2.16b
Kyle McMartin eb16415
-	orr	vend1.16b, vhas_chr1.16b, vhas_chr2.16b
Kyle McMartin eb16415
-	addp	vend1.2d, vend1.2d, vend1.2d
Kyle McMartin eb16415
-	mov	tmp1, vend1.2d[0]
Kyle McMartin eb16415
-	cbz	tmp1, L(loop)
Kyle McMartin eb16415
-
Kyle McMartin eb16415
-	/* Termination condition found.  Now need to establish exactly why
Kyle McMartin eb16415
-	   we terminated.  */
Kyle McMartin eb16415
-	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
Kyle McMartin eb16415
-	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
Kyle McMartin eb16415
-	addp	vend1.16b, vhas_chr1.16b, vhas_chr2.16b		// 256->128
Kyle McMartin eb16415
-	addp	vend1.16b, vend1.16b, vend1.16b		// 128->64
Kyle McMartin eb16415
-
Kyle McMartin eb16415
-	mov	tmp1, vend1.2d[0]
Kyle McMartin eb16415
-L(tail):
Kyle McMartin eb16415
-	/* Count the trailing zeros, by bit reversing...  */
Kyle McMartin eb16415
-	rbit	tmp1, tmp1
Kyle McMartin eb16415
-	/* Re-bias source.  */
Kyle McMartin eb16415
-	sub	src, src, #32
Kyle McMartin eb16415
-	clz	tmp1, tmp1	/* ... and counting the leading zeros.  */
Kyle McMartin eb16415
-	/* tmp1 is twice the offset into the fragment.  */
Kyle McMartin eb16415
-	add	result, src, tmp1, lsr #1
Kyle McMartin eb16415
-	ret
Kyle McMartin eb16415
-
Kyle McMartin eb16415
-END(__strchrnul)
Kyle McMartin eb16415
-weak_alias (__strchrnul, strchrnul)