From 17a154db6774a4acf347cfc5189eaf2cd675e696 Mon Sep 17 00:00:00 2001 From: Adenilson Cavalcanti Date: Mon, 9 Apr 2018 15:14:19 -0700 Subject: [PATCH 2/3] Porting optimized longest_match This patch was contributed to zlib-ng and features an improved longest_match function using the most distant hash code to reduce number of checks (see: http://www.gildor.org/en/projects/zlib). Original patch by Jun He. --- CMakeLists.txt | 3 +- contrib/arm/arm_longest_match.h | 142 ++++++++++++++++++++++++++++++++ deflate.c | 11 ++- 3 files changed, 152 insertions(+), 4 deletions(-) create mode 100644 contrib/arm/arm_longest_match.h diff --git a/CMakeLists.txt b/CMakeLists.txt index e9a74e9..3826eba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -141,7 +141,8 @@ if(CMAKE_COMPILER_IS_GNUCC) set(ZLIB_ARM_NEON_HDRS contrib/arm/chunkcopy.h contrib/arm/inffast_chunk.h - contrib/arm/neon_slide_hash.h) + contrib/arm/neon_slide_hash.h + contrib/arm/arm_longest_match.h) set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast_chunk.c) add_definitions(-DARM_NEON) set(COMPILER ${CMAKE_C_COMPILER}) diff --git a/contrib/arm/arm_longest_match.h b/contrib/arm/arm_longest_match.h new file mode 100644 index 0000000..9e7083f --- /dev/null +++ b/contrib/arm/arm_longest_match.h @@ -0,0 +1,142 @@ +/* Copyright (C) 1995-2011, 2016 Mark Adler + * Copyright (C) 2017 ARM Holdings Inc. + * Authors: Adenilson Cavalcanti + * Jun He + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ +#ifndef __ARM_LONGEST__MATCH__ +#define __ARM_LONGEST__MATCH__ + +#if defined(ARM_NEON) +#include "deflate.h" +#include +static inline long get_match_len(const unsigned char *a, const unsigned char *b, long max) +{ + register int len = 0; + register unsigned long xor = 0; + register int check_loops = max/sizeof(unsigned long); + while(check_loops-- > 0) { + xor = (*(unsigned long *)(a+len)) ^ (*(unsigned long *)(b+len)); + if (xor) break; + len += sizeof(unsigned long); + } + if (0 == xor) { + while (len < max) { + if (a[len] != b[len]) break; + len++; + } + return len; + } + xor = __builtin_ctzl(xor)>>3; + return len + xor; +} + +/* + * This implementation is based on algorithm described at: + * http://www.gildor.org/en/projects/zlib + * It uses the hash chain indexed by the most distant hash code to + * reduce number of checks. + * This also eliminates the those unnecessary check loops in legacy + * longest_match's do..while loop if the "most distant code" is out + * of search buffer + * + */ +static inline unsigned arm_longest_match(deflate_state *const s, IPos cur_match) { + unsigned chain_length = s->max_chain_length;/* max hash chain length */ + unsigned char *scan = s->window + s->strstart; /* current string */ + unsigned char *match; /* matched string */ + unsigned int len; /* length of current match */ + unsigned int best_len = s->prev_length; /* best match length so far */ + unsigned int nice_match = s->nice_match; /* stop if match long enough */ + IPos limit = s->strstart > (IPos)MAX_DIST(s) ? + s->strstart - (IPos)MAX_DIST(s) : 0; + /* Stop when cur_match becomes <= limit. To simplify the code, + * we prevent matches with the string of window index 0. + */ + int offset = 0; /* offset of the head[most_distant_hash] from IN cur_match */ + Pos *prev = s->prev; + unsigned int wmask = s->w_mask; + unsigned char *scan_buf_base = s->window; + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + /* Do not look for matches beyond the end of the input. This is necessary + * to make deflate deterministic. + */ + if ((unsigned int)nice_match > s->lookahead) nice_match = s->lookahead; + + Assert((unsigned long)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + /* find most distant hash code for lazy_match */ + if (best_len > MIN_MATCH) { + /* search for most distant hash code */ + int i; + uint16_t hash = 0; + IPos pos; + + UPDATE_HASH(s, hash, scan[1]); + UPDATE_HASH(s, hash, scan[2]); + for (i = 3; i <= best_len; i++) { + UPDATE_HASH(s, hash, scan[i]); + /* get head IPos of hash calced by scan[i-2..i] */ + pos = s->head[hash]; + /* compare it to current "farthest hash" IPos */ + if (pos <= cur_match) { + /* we have a new "farthest hash" now */ + offset = i - 2; + cur_match = pos; + } + } + + /* update variables to correspond offset */ + limit += offset; + /* + * check if the most distant code's offset is out of search buffer + * if it is true, then this means scan[offset..offset+2] are not + * presented in the search buffer. So we just return best_len + * we've found. + */ + if (cur_match < limit) return best_len; + + scan_buf_base -= offset; + /* reduce hash search depth based on best_len */ + chain_length /= best_len - MIN_MATCH; + } + + do { + Assert(cur_match < s->strstart, "no future"); + + /* Determine matched length at current pos */ + match = scan_buf_base + cur_match; + len = get_match_len(match, scan, MAX_MATCH); + + if (len > best_len) { + /* found longer string */ + s->match_start = cur_match - offset; + best_len = len; + /* good enough? */ + if (len >= nice_match) break; + } + /* move to prev pos in this hash chain */ + } while ((cur_match = prev[cur_match & wmask]) > limit && --chain_length != 0); + + return (best_len <= s->lookahead)? best_len : s->lookahead; +} + +#endif +#endif diff --git a/deflate.c b/deflate.c index 36f99ac..4c42259 100644 --- a/deflate.c +++ b/deflate.c @@ -50,9 +50,6 @@ /* @(#) $Id$ */ #include "deflate.h" -#if __ARM_NEON -#include "contrib/arm/neon_slide_hash.h" -#endif const char deflate_copyright[] = " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler "; @@ -196,6 +193,11 @@ local const config configuration_table[10] = { s->head[s->hash_size-1] = NIL; \ zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); +#if defined(ARM_NEON) +#include "contrib/arm/arm_longest_match.h" +#include "contrib/arm/neon_slide_hash.h" +#endif + /* =========================================================================== * Slide the hash table when sliding the window down (could be avoided with 32 * bit values at the expense of memory usage). We slide even when level == 0 to @@ -1244,6 +1246,9 @@ local uInt longest_match(s, cur_match) deflate_state *s; IPos cur_match; /* current match */ { +#if defined(ARM_NEON) + return arm_longest_match(s, cur_match); +#endif unsigned chain_length = s->max_chain_length;/* max hash chain length */ register Bytef *scan = s->window + s->strstart; /* current string */ register Bytef *match; /* matched string */ -- 2.19.0