Blob Blame History Raw
From 17a154db6774a4acf347cfc5189eaf2cd675e696 Mon Sep 17 00:00:00 2001
From: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
Date: Mon, 9 Apr 2018 15:14:19 -0700
Subject: [PATCH 2/3] Porting optimized longest_match

This patch was contributed to zlib-ng and features an improved longest_match
function using the most distant hash code to reduce number of checks
(see: http://www.gildor.org/en/projects/zlib).

Original patch by Jun He.
---
 CMakeLists.txt                  |   3 +-
 contrib/arm/arm_longest_match.h | 142 ++++++++++++++++++++++++++++++++
 deflate.c                       |  11 ++-
 3 files changed, 152 insertions(+), 4 deletions(-)
 create mode 100644 contrib/arm/arm_longest_match.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e9a74e9..3826eba 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -141,7 +141,8 @@ if(CMAKE_COMPILER_IS_GNUCC)
         set(ZLIB_ARM_NEON_HDRS
             contrib/arm/chunkcopy.h
             contrib/arm/inffast_chunk.h
-            contrib/arm/neon_slide_hash.h)
+            contrib/arm/neon_slide_hash.h
+            contrib/arm/arm_longest_match.h)
         set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast_chunk.c)
         add_definitions(-DARM_NEON)
         set(COMPILER ${CMAKE_C_COMPILER})
diff --git a/contrib/arm/arm_longest_match.h b/contrib/arm/arm_longest_match.h
new file mode 100644
index 0000000..9e7083f
--- /dev/null
+++ b/contrib/arm/arm_longest_match.h
@@ -0,0 +1,142 @@
+/* Copyright (C) 1995-2011, 2016 Mark Adler
+ * Copyright (C) 2017 ARM Holdings Inc.
+ * Authors: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
+ *          Jun He <jun.he@arm.com>
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ * 1. The origin of this software must not be misrepresented; you must not
+ *  claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+#ifndef __ARM_LONGEST__MATCH__
+#define __ARM_LONGEST__MATCH__
+
+#if defined(ARM_NEON)
+#include "deflate.h"
+#include <stdint.h>
+static inline long get_match_len(const unsigned char *a, const unsigned char *b, long max)
+{
+    register int len = 0;
+    register unsigned long xor = 0;
+    register int check_loops = max/sizeof(unsigned long);
+    while(check_loops-- > 0) {
+        xor = (*(unsigned long *)(a+len)) ^ (*(unsigned long *)(b+len));
+        if (xor) break;
+        len += sizeof(unsigned long);
+    }
+    if (0 == xor) {
+        while (len < max) {
+            if (a[len] != b[len]) break;
+            len++;
+        }
+        return len;
+    }
+    xor = __builtin_ctzl(xor)>>3;
+    return len + xor;
+}
+
+/*
+ * This implementation is based on algorithm described at:
+ * http://www.gildor.org/en/projects/zlib
+ * It uses the hash chain indexed by the most distant hash code to
+ * reduce number of checks.
+ * This also eliminates the those unnecessary check loops in legacy
+ * longest_match's do..while loop if the "most distant code" is out
+ * of search buffer
+ *
+ */
+static inline unsigned arm_longest_match(deflate_state *const s, IPos cur_match) {
+    unsigned chain_length = s->max_chain_length;/* max hash chain length */
+    unsigned char *scan = s->window + s->strstart; /* current string */
+    unsigned char *match;                       /* matched string */
+    unsigned int len;                  /* length of current match */
+    unsigned int best_len = s->prev_length;     /* best match length so far */
+    unsigned int nice_match = s->nice_match;    /* stop if match long enough */
+    IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
+        s->strstart - (IPos)MAX_DIST(s) : 0;
+    /* Stop when cur_match becomes <= limit. To simplify the code,
+     * we prevent matches with the string of window index 0.
+     */
+    int offset = 0;  /* offset of the head[most_distant_hash] from IN cur_match */
+    Pos *prev = s->prev;
+    unsigned int wmask = s->w_mask;
+    unsigned char *scan_buf_base = s->window;
+
+    /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
+     * It is easy to get rid of this optimization if necessary.
+     */
+    Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
+
+    /* Do not look for matches beyond the end of the input. This is necessary
+     * to make deflate deterministic.
+     */
+    if ((unsigned int)nice_match > s->lookahead) nice_match = s->lookahead;
+
+    Assert((unsigned long)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
+
+    /* find most distant hash code for lazy_match */
+    if (best_len > MIN_MATCH) {
+        /* search for most distant hash code */
+        int i;
+        uint16_t hash = 0;
+        IPos pos;
+
+        UPDATE_HASH(s, hash, scan[1]);
+        UPDATE_HASH(s, hash, scan[2]);
+        for (i = 3; i <= best_len; i++) {
+            UPDATE_HASH(s, hash, scan[i]);
+            /* get head IPos of hash calced by scan[i-2..i] */
+            pos = s->head[hash];
+            /* compare it to current "farthest hash" IPos */
+            if (pos <= cur_match) {
+                /* we have a new "farthest hash" now */
+                offset = i - 2;
+                cur_match = pos;
+            }
+        }
+
+        /* update variables to correspond offset */
+        limit += offset;
+        /*
+         * check if the most distant code's offset is out of search buffer
+         * if it is true, then this means scan[offset..offset+2] are not
+	 * presented in the search buffer. So we just return best_len 
+	 * we've found.
+         */
+        if (cur_match < limit) return best_len;
+
+        scan_buf_base -= offset;
+        /* reduce hash search depth based on best_len */
+        chain_length /= best_len - MIN_MATCH;
+    }
+
+    do {
+        Assert(cur_match < s->strstart, "no future");
+
+        /* Determine matched length at current pos */
+        match = scan_buf_base + cur_match;
+        len = get_match_len(match, scan, MAX_MATCH);
+
+        if (len > best_len) {
+            /* found longer string */
+            s->match_start = cur_match - offset;
+            best_len = len;
+            /* good enough? */
+            if (len >= nice_match) break;
+        }
+        /* move to prev pos in this hash chain */
+    } while ((cur_match = prev[cur_match & wmask]) > limit && --chain_length != 0);
+
+    return (best_len <= s->lookahead)? best_len : s->lookahead;
+}
+
+#endif
+#endif
diff --git a/deflate.c b/deflate.c
index 36f99ac..4c42259 100644
--- a/deflate.c
+++ b/deflate.c
@@ -50,9 +50,6 @@
 /* @(#) $Id$ */
 
 #include "deflate.h"
-#if __ARM_NEON
-#include "contrib/arm/neon_slide_hash.h"
-#endif
 
 const char deflate_copyright[] =
    " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler ";
@@ -196,6 +193,11 @@ local const config configuration_table[10] = {
     s->head[s->hash_size-1] = NIL; \
     zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head));
 
+#if defined(ARM_NEON)
+#include "contrib/arm/arm_longest_match.h"
+#include "contrib/arm/neon_slide_hash.h"
+#endif
+
 /* ===========================================================================
  * Slide the hash table when sliding the window down (could be avoided with 32
  * bit values at the expense of memory usage). We slide even when level == 0 to
@@ -1244,6 +1246,9 @@ local uInt longest_match(s, cur_match)
     deflate_state *s;
     IPos cur_match;                             /* current match */
 {
+#if defined(ARM_NEON)
+    return arm_longest_match(s, cur_match);
+#endif
     unsigned chain_length = s->max_chain_length;/* max hash chain length */
     register Bytef *scan = s->window + s->strstart; /* current string */
     register Bytef *match;                      /* matched string */
-- 
2.19.0