Blob Blame History Raw
From 4e65ca20fc242e4a03471558a357d7809adeb9c4 Mon Sep 17 00:00:00 2001
From: IBM developers
Date: Thu, 1 Aug 2019 09:02:01 +0200
Subject: [PATCH] Add support for IBM Z hardware-accelerated deflate

Future versions of IBM Z mainframes will provide DFLTCC instruction,
which implements deflate algorithm in hardware with estimated
compression and decompression performance orders of magnitude faster
than the current zlib and ratio comparable with that of level 1.

This patch adds DFLTCC support to zlib. In order to enable it, the
following build commands should be used:

    $ CFLAGS=-DDFLTCC ./configure
    $ make OBJA=dfltcc.o PIC_OBJA=dfltcc.lo

When built like this, zlib would compress in hardware on level 1, and in
software on all other levels. Decompression will always happen in
hardware. In order to enable DFLTCC compression for levels 1-6 (i.e. to
make it used by default) one could either add -DDFLTCC_LEVEL_MASK=0x7e
at compile time, or set the environment variable DFLTCC_LEVEL_MASK to
0x7e at run time.

Two DFLTCC compression calls produce the same results only when they
both are made on machines of the same generation, and when the
respective buffers have the same offset relative to the start of the
page. Therefore care should be taken when using hardware compression
when reproducible results are desired. One such use case - reproducible
software builds - is handled explicitly: when SOURCE_DATE_EPOCH
environment variable is set, the hardware compression is disabled.

DFLTCC does not support every single zlib feature, in particular:

    * inflate(Z_BLOCK) and inflate(Z_TREES)
    * inflateMark()
    * inflatePrime()
    * deflateParams() after the first deflate() call

When used, these functions will either switch to software, or, in case
this is not possible, gracefully fail.

This patch tries to add DFLTCC support in a least intrusive way.
All SystemZ-specific code was placed into a separate file, but
unfortunately there is still a noticeable amount of changes in the
main zlib code. Below is the summary of those changes.

DFLTCC takes as arguments a parameter block, an input buffer, an output
buffer and a window. Since DFLTCC requires parameter block to be
doubleword-aligned, and it's reasonable to allocate it alongside
deflate and inflate states, ZALLOC_STATE, ZFREE_STATE and ZCOPY_STATE
macros were introduced in order to encapsulate the allocation details.
The same is true for window, for which ZALLOC_WINDOW and
TRY_FREE_WINDOW macros were introduced.

While for inflate software and hardware window formats match, this is
not the case for deflate. Therefore, deflateSetDictionary and
deflateGetDictionary need special handling, which is triggered using the
new DEFLATE_SET_DICTIONARY_HOOK and DEFLATE_GET_DICTIONARY_HOOK macros.

deflateResetKeep() and inflateResetKeep() now update the DFLTCC
parameter block, which is allocated alongside zlib state, using
the new DEFLATE_RESET_KEEP_HOOK and INFLATE_RESET_KEEP_HOOK macros.

In order to make unsupported deflateParams(), inflatePrime() and
inflateMark() calls to fail gracefully, the new DEFLATE_PARAMS_HOOK,
INFLATE_PRIME_HOOK and INFLATE_MARK_HOOK macros were introduced.

The algorithm implemented in hardware has different compression ratio
than the one implemented in software. In order for deflateBound() to
return the correct results for the hardware implementation, the new
DEFLATE_BOUND_ADJUST_COMPLEN and DEFLATE_NEED_CONSERVATIVE_BOUND macros
were introduced.

Actual compression and decompression are handled by the new DEFLATE_HOOK
and INFLATE_TYPEDO_HOOK macros. Since inflation with DFLTCC manages the
window on its own, calling updatewindow() is suppressed using the new
INFLATE_NEED_UPDATEWINDOW() macro.

In addition to compression, DFLTCC computes CRC-32 and Adler-32
checksums, therefore, whenever it's used, software checksumming needs to
be suppressed using the new DEFLATE_NEED_CHECKSUM and
INFLATE_NEED_CHECKSUM macros.

DFLTCC will refuse to write an End-of-block Symbol if there is no input
data, thus in some cases it is necessary to do this manually. In order
to achieve this, send_bits, bi_reverse, bi_windup and flush_pending
were promoted from local to ZLIB_INTERNAL. Furthermore, since block and
stream termination must be handled in software as well, block_state enum
was moved to deflate.h.

Since the first call to dfltcc_inflate already needs the window, and it
might be not allocated yet, inflate_ensure_window was factored out of
updatewindow and made ZLIB_INTERNAL.
---
 Makefile.in                   |   8 +
 configure                     |  13 +
 contrib/README.contrib        |   4 +
 contrib/s390/dfltcc.c         | 901 ++++++++++++++++++++++++++++++++++
 contrib/s390/dfltcc.h         |  55 +++
 contrib/s390/dfltcc_deflate.h |  50 ++
 deflate.c                     |  60 ++-
 deflate.h                     |  12 +
 gzguts.h                      |   4 +
 inflate.c                     |  84 +++-
 inflate.h                     |   2 +
 test/infcover.c               |   2 +-
 test/minigzip.c               |   4 +
 trees.c                       |  13 +-
 14 files changed, 1161 insertions(+), 51 deletions(-)
 create mode 100644 contrib/s390/dfltcc.c
 create mode 100644 contrib/s390/dfltcc.h
 create mode 100644 contrib/s390/dfltcc_deflate.h

diff --git a/Makefile.in b/Makefile.in
index 5a77949..e756e2f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -143,6 +143,14 @@ match.lo: match.S
 	mv _match.o match.lo
 	rm -f _match.s
 
+dfltcc.o: $(SRCDIR)contrib/s390/dfltcc.c $(SRCDIR)zlib.h zconf.h
+	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)contrib/s390/dfltcc.c
+
+dfltcc.lo: $(SRCDIR)contrib/s390/dfltcc.c $(SRCDIR)zlib.h zconf.h
+	-@mkdir objs 2>/dev/null || test -d objs
+	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/dfltcc.o $(SRCDIR)contrib/s390/dfltcc.c
+	-@mv objs/dfltcc.o $@
+
 example.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h
 	$(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/example.c
 
diff --git a/configure b/configure
index e974d1f..8fab355 100755
--- a/configure
+++ b/configure
@@ -826,6 +826,19 @@ EOF
   fi
 fi
 
+# Check whether sys/sdt.h is available
+cat > $test.c << EOF
+#include <sys/sdt.h>
+int main() { return 0; }
+EOF
+if try ${CC} ${CFLAGS} $test.c; then
+    echo "Checking for sys/sdt.h ... Yes." | tee -a configure.log
+    CFLAGS="$CFLAGS -DHAVE_SYS_SDT_H"
+    SFLAGS="$SFLAGS -DHAVE_SYS_SDT_H"
+else
+    echo "Checking for sys/sdt.h ... No." | tee -a configure.log
+fi
+
 # show the results in the log
 echo >> configure.log
 echo ALL = $ALL >> configure.log
diff --git a/contrib/README.contrib b/contrib/README.contrib
index a411d5c..b4d3b18 100644
--- a/contrib/README.contrib
+++ b/contrib/README.contrib
@@ -67,6 +67,10 @@ puff/       by Mark Adler <madler@alumni.caltech.edu>
         Small, low memory usage inflate.  Also serves to provide an
         unambiguous description of the deflate format.
 
+s390/       by Ilya Leoshkevich <iii@linux.ibm.com>
+        Hardware-accelerated deflate on IBM Z with DEFLATE CONVERSION CALL
+        instruction.
+
 testzlib/   by Gilles Vollant <info@winimage.com>
         Example of the use of zlib
 
diff --git a/contrib/s390/dfltcc.c b/contrib/s390/dfltcc.c
new file mode 100644
index 0000000..d187796
--- /dev/null
+++ b/contrib/s390/dfltcc.c
@@ -0,0 +1,901 @@
+/* dfltcc.c - SystemZ DEFLATE CONVERSION CALL support. */
+
+/*
+   Use the following commands to build zlib with DFLTCC support:
+        $ CFLAGS=-DDFLTCC ./configure
+        $ make OBJA=dfltcc.o PIC_OBJA=dfltcc.lo
+*/
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include "../../zutil.h"
+#include "../../deflate.h"
+#include "../../inftrees.h"
+#include "../../inflate.h"
+#include "dfltcc.h"
+#include "dfltcc_deflate.h"
+#ifdef HAVE_SYS_SDT_H
+#include <sys/sdt.h>
+#endif
+
+/*
+   C wrapper for the DEFLATE CONVERSION CALL instruction.
+ */
+typedef enum {
+    DFLTCC_CC_OK = 0,
+    DFLTCC_CC_OP1_TOO_SHORT = 1,
+    DFLTCC_CC_OP2_TOO_SHORT = 2,
+    DFLTCC_CC_OP2_CORRUPT = 2,
+    DFLTCC_CC_AGAIN = 3,
+} dfltcc_cc;
+
+#define DFLTCC_QAF 0
+#define DFLTCC_GDHT 1
+#define DFLTCC_CMPR 2
+#define DFLTCC_XPND 4
+#define HBT_CIRCULAR (1 << 7)
+#define HB_BITS 15
+#define HB_SIZE (1 << HB_BITS)
+#define DFLTCC_FACILITY 151
+
+local inline dfltcc_cc dfltcc OF((int fn, void *param,
+                                  Bytef **op1, size_t *len1,
+                                  z_const Bytef **op2, size_t *len2,
+                                  void *hist));
+local inline dfltcc_cc dfltcc(fn, param, op1, len1, op2, len2, hist)
+    int fn;
+    void *param;
+    Bytef **op1;
+    size_t *len1;
+    z_const Bytef **op2;
+    size_t *len2;
+    void *hist;
+{
+    Bytef *t2 = op1 ? *op1 : NULL;
+    size_t t3 = len1 ? *len1 : 0;
+    z_const Bytef *t4 = op2 ? *op2 : NULL;
+    size_t t5 = len2 ? *len2 : 0;
+    register int r0 __asm__("r0") = fn;
+    register void *r1 __asm__("r1") = param;
+    register Bytef *r2 __asm__("r2") = t2;
+    register size_t r3 __asm__("r3") = t3;
+    register z_const Bytef *r4 __asm__("r4") = t4;
+    register size_t r5 __asm__("r5") = t5;
+    int cc;
+
+    __asm__ volatile(
+#ifdef HAVE_SYS_SDT_H
+                     STAP_PROBE_ASM(zlib, dfltcc_entry,
+                                    STAP_PROBE_ASM_TEMPLATE(5))
+#endif
+                     ".insn rrf,0xb9390000,%[r2],%[r4],%[hist],0\n"
+#ifdef HAVE_SYS_SDT_H
+                     STAP_PROBE_ASM(zlib, dfltcc_exit,
+                                    STAP_PROBE_ASM_TEMPLATE(5))
+#endif
+                     "ipm %[cc]\n"
+                     : [r2] "+r" (r2)
+                     , [r3] "+r" (r3)
+                     , [r4] "+r" (r4)
+                     , [r5] "+r" (r5)
+                     , [cc] "=r" (cc)
+                     : [r0] "r" (r0)
+                     , [r1] "r" (r1)
+                     , [hist] "r" (hist)
+#ifdef HAVE_SYS_SDT_H
+                     , STAP_PROBE_ASM_OPERANDS(5, r2, r3, r4, r5, hist)
+#endif
+                     : "cc", "memory");
+    t2 = r2; t3 = r3; t4 = r4; t5 = r5;
+
+    if (op1)
+        *op1 = t2;
+    if (len1)
+        *len1 = t3;
+    if (op2)
+        *op2 = t4;
+    if (len2)
+        *len2 = t5;
+    return (cc >> 28) & 3;
+}
+
+/*
+   Parameter Block for Query Available Functions.
+ */
+#define static_assert(c, msg) \
+        __attribute__((unused)) \
+        static char static_assert_failed_ ## msg[c ? 1 : -1]
+
+struct dfltcc_qaf_param {
+    char fns[16];
+    char reserved1[8];
+    char fmts[2];
+    char reserved2[6];
+};
+
+static_assert(sizeof(struct dfltcc_qaf_param) == 32,
+              sizeof_struct_dfltcc_qaf_param_is_32);
+
+local inline int is_bit_set OF((const char *bits, int n));
+local inline int is_bit_set(bits, n)
+    const char *bits;
+    int n;
+{
+    return bits[n / 8] & (1 << (7 - (n % 8)));
+}
+
+local inline void clear_bit OF((char *bits, int n));
+local inline void clear_bit(bits, n)
+    char *bits;
+    int n;
+{
+    bits[n / 8] &= ~(1 << (7 - (n % 8)));
+}
+
+#define DFLTCC_FMT0 0
+
+/*
+   Parameter Block for Generate Dynamic-Huffman Table, Compress and Expand.
+ */
+#define CVT_CRC32 0
+#define CVT_ADLER32 1
+#define HTT_FIXED 0
+#define HTT_DYNAMIC 1
+
+struct dfltcc_param_v0 {
+    uint16_t pbvn;                     /* Parameter-Block-Version Number */
+    uint8_t mvn;                       /* Model-Version Number */
+    uint8_t ribm;                      /* Reserved for IBM use */
+    unsigned reserved32 : 31;
+    unsigned cf : 1;                   /* Continuation Flag */
+    uint8_t reserved64[8];
+    unsigned nt : 1;                   /* New Task */
+    unsigned reserved129 : 1;
+    unsigned cvt : 1;                  /* Check Value Type */
+    unsigned reserved131 : 1;
+    unsigned htt : 1;                  /* Huffman-Table Type */
+    unsigned bcf : 1;                  /* Block-Continuation Flag */
+    unsigned bcc : 1;                  /* Block Closing Control */
+    unsigned bhf : 1;                  /* Block Header Final */
+    unsigned reserved136 : 1;
+    unsigned reserved137 : 1;
+    unsigned dhtgc : 1;                /* DHT Generation Control */
+    unsigned reserved139 : 5;
+    unsigned reserved144 : 5;
+    unsigned sbb : 3;                  /* Sub-Byte Boundary */
+    uint8_t oesc;                      /* Operation-Ending-Supplemental Code */
+    unsigned reserved160 : 12;
+    unsigned ifs : 4;                  /* Incomplete-Function Status */
+    uint16_t ifl;                      /* Incomplete-Function Length */
+    uint8_t reserved192[8];
+    uint8_t reserved256[8];
+    uint8_t reserved320[4];
+    uint16_t hl;                       /* History Length */
+    unsigned reserved368 : 1;
+    uint16_t ho : 15;                  /* History Offset */
+    uint32_t cv;                       /* Check Value */
+    unsigned eobs : 15;                /* End-of-block Symbol */
+    unsigned reserved431: 1;
+    uint8_t eobl : 4;                  /* End-of-block Length */
+    unsigned reserved436 : 12;
+    unsigned reserved448 : 4;
+    uint16_t cdhtl : 12;               /* Compressed-Dynamic-Huffman Table
+                                          Length */
+    uint8_t reserved464[6];
+    uint8_t cdht[288];
+    uint8_t reserved[32];
+    uint8_t csb[1152];
+};
+
+static_assert(sizeof(struct dfltcc_param_v0) == 1536,
+              sizeof_struct_dfltcc_param_v0_is_1536);
+
+local z_const char *oesc_msg OF((char *buf, int oesc));
+local z_const char *oesc_msg(buf, oesc)
+    char *buf;
+    int oesc;
+{
+    if (oesc == 0x00)
+        return NULL; /* Successful completion */
+    else {
+        sprintf(buf, "Operation-Ending-Supplemental Code is 0x%.2X", oesc);
+        return buf;
+    }
+}
+
+/*
+   Extension of inflate_state and deflate_state. Must be doubleword-aligned.
+*/
+struct dfltcc_state {
+    struct dfltcc_param_v0 param;      /* Parameter block. */
+    struct dfltcc_qaf_param af;        /* Available functions. */
+    uLong level_mask;                  /* Levels on which to use DFLTCC */
+    uLong block_size;                  /* New block each X bytes */
+    uLong block_threshold;             /* New block after total_in > X */
+    uLong dht_threshold;               /* New block only if avail_in >= X */
+    char msg[64];                      /* Buffer for strm->msg */
+};
+
+#define ALIGN_UP(p, size) \
+        (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1))
+
+#define GET_DFLTCC_STATE(state) ((struct dfltcc_state FAR *)( \
+        (char FAR *)(state) + ALIGN_UP(sizeof(*state), 8)))
+
+/*
+   Compress.
+ */
+local inline int dfltcc_are_params_ok(int level,
+                                      uInt window_bits,
+                                      int strategy,
+                                      uLong level_mask);
+local inline int dfltcc_are_params_ok(level, window_bits, strategy, level_mask)
+    int level;
+    uInt window_bits;
+    int strategy;
+    uLong level_mask;
+{
+    return (level_mask & (1 << level)) != 0 &&
+        (window_bits == HB_BITS) &&
+        (strategy == Z_FIXED || strategy == Z_DEFAULT_STRATEGY);
+}
+
+
+int ZLIB_INTERNAL dfltcc_can_deflate(strm)
+    z_streamp strm;
+{
+    deflate_state FAR *state = (deflate_state FAR *)strm->state;
+    struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state);
+
+    /* Unsupported compression settings */
+    if (!dfltcc_are_params_ok(state->level, state->w_bits, state->strategy,
+                              dfltcc_state->level_mask))
+        return 0;
+
+    /* Unsupported hardware */
+    if (!is_bit_set(dfltcc_state->af.fns, DFLTCC_GDHT) ||
+            !is_bit_set(dfltcc_state->af.fns, DFLTCC_CMPR) ||
+            !is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0))
+        return 0;
+
+    return 1;
+}
+
+local void dfltcc_gdht OF((z_streamp strm));
+local void dfltcc_gdht(strm)
+    z_streamp strm;
+{
+    deflate_state FAR *state = (deflate_state FAR *)strm->state;
+    struct dfltcc_param_v0 FAR *param = &GET_DFLTCC_STATE(state)->param;
+    size_t avail_in = avail_in = strm->avail_in;
+
+    dfltcc(DFLTCC_GDHT,
+           param, NULL, NULL,
+           &strm->next_in, &avail_in, NULL);
+}
+
+local dfltcc_cc dfltcc_cmpr OF((z_streamp strm));
+local dfltcc_cc dfltcc_cmpr(strm)
+    z_streamp strm;
+{
+    deflate_state FAR *state = (deflate_state FAR *)strm->state;
+    struct dfltcc_param_v0 FAR *param = &GET_DFLTCC_STATE(state)->param;
+    size_t avail_in = strm->avail_in;
+    size_t avail_out = strm->avail_out;
+    dfltcc_cc cc;
+
+    cc = dfltcc(DFLTCC_CMPR | HBT_CIRCULAR,
+                param, &strm->next_out, &avail_out,
+                &strm->next_in, &avail_in, state->window);
+    strm->total_in += (strm->avail_in - avail_in);
+    strm->total_out += (strm->avail_out - avail_out);
+    strm->avail_in = avail_in;
+    strm->avail_out = avail_out;
+    return cc;
+}
+
+local void send_eobs OF((z_streamp strm,
+                         z_const struct dfltcc_param_v0 FAR *param));
+local void send_eobs(strm, param)
+    z_streamp strm;
+    z_const struct dfltcc_param_v0 FAR *param;
+{
+    deflate_state FAR *state = (deflate_state FAR *)strm->state;
+
+    _tr_send_bits(
+          state,
+          bi_reverse(param->eobs >> (15 - param->eobl), param->eobl),
+          param->eobl);
+    flush_pending(strm);
+    if (state->pending != 0) {
+        /* The remaining data is located in pending_out[0:pending]. If someone
+         * calls put_byte() - this might happen in deflate() - the byte will be
+         * placed into pending_buf[pending], which is incorrect. Move the
+         * remaining data to the beginning of pending_buf so that put_byte() is
+         * usable again.
+         */
+        memmove(state->pending_buf, state->pending_out, state->pending);
+        state->pending_out = state->pending_buf;
+    }
+#ifdef ZLIB_DEBUG
+    state->compressed_len += param->eobl;
+#endif
+}
+
+int ZLIB_INTERNAL dfltcc_deflate(strm, flush, result)
+    z_streamp strm;
+    int flush;
+    block_state *result;
+{
+    deflate_state FAR *state = (deflate_state FAR *)strm->state;
+    struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state);
+    struct dfltcc_param_v0 FAR *param = &dfltcc_state->param;
+    uInt masked_avail_in;
+    dfltcc_cc cc;
+    int need_empty_block;
+    int soft_bcc;
+    int no_flush;
+
+    if (!dfltcc_can_deflate(strm))
+        return 0;
+
+again:
+    masked_avail_in = 0;
+    soft_bcc = 0;
+    no_flush = flush == Z_NO_FLUSH;
+
+    /* Trailing empty block. Switch to software, except when Continuation Flag
+     * is set, which means that DFLTCC has buffered some output in the
+     * parameter block and needs to be called again in order to flush it.
+     */
+    if (flush == Z_FINISH && strm->avail_in == 0 && !param->cf) {
+        if (param->bcf) {
+            /* A block is still open, and the hardware does not support closing
+             * blocks without adding data. Thus, close it manually.
+             */
+            send_eobs(strm, param);
+            param->bcf = 0;
+        }
+        return 0;
+    }
+
+    if (strm->avail_in == 0 && !param->cf) {
+        *result = need_more;
+        return 1;
+    }
+
+    /* There is an open non-BFINAL block, we are not going to close it just
+     * yet, we have compressed more than DFLTCC_BLOCK_SIZE bytes and we see
+     * more than DFLTCC_DHT_MIN_SAMPLE_SIZE bytes. Open a new block with a new
+     * DHT in order to adapt to a possibly changed input data distribution.
+     */
+    if (param->bcf && no_flush &&
+            strm->total_in > dfltcc_state->block_threshold &&
+            strm->avail_in >= dfltcc_state->dht_threshold) {
+        if (param->cf) {
+            /* We need to flush the DFLTCC buffer before writing the
+             * End-of-block Symbol. Mask the input data and proceed as usual.
+             */
+            masked_avail_in += strm->avail_in;
+            strm->avail_in = 0;
+            no_flush = 0;
+        } else {
+            /* DFLTCC buffer is empty, so we can manually write the
+             * End-of-block Symbol right away.
+             */
+            send_eobs(strm, param);
+            param->bcf = 0;
+            dfltcc_state->block_threshold =
+                strm->total_in + dfltcc_state->block_size;
+            if (strm->avail_out == 0) {
+                *result = need_more;
+                return 1;
+            }
+        }
+    }
+
+    /* The caller gave us too much data. Pass only one block worth of
+     * uncompressed data to DFLTCC and mask the rest, so that on the next
+     * iteration we start a new block.
+     */
+    if (no_flush && strm->avail_in > dfltcc_state->block_size) {
+        masked_avail_in += (strm->avail_in - dfltcc_state->block_size);
+        strm->avail_in = dfltcc_state->block_size;
+    }
+
+    /* When we have an open non-BFINAL deflate block and caller indicates that
+     * the stream is ending, we need to close an open deflate block and open a
+     * BFINAL one.
+     */
+    need_empty_block = flush == Z_FINISH && param->bcf && !param->bhf;
+
+    /* Translate stream to parameter block */
+    param->cvt = state->wrap == 2 ? CVT_CRC32 : CVT_ADLER32;
+    if (!no_flush)
+        /* We need to close a block. Always do this in software - when there is
+         * no input data, the hardware will not nohor BCC. */
+        soft_bcc = 1;
+    if (flush == Z_FINISH && !param->bcf)
+        /* We are about to open a BFINAL block, set Block Header Final bit
+         * until the stream ends.
+         */
+        param->bhf = 1;
+    /* DFLTCC-CMPR will write to next_out, so make sure that buffers with
+     * higher precedence are empty.
+     */
+    Assert(state->pending == 0, "There must be no pending bytes");
+    Assert(state->bi_valid < 8, "There must be less than 8 pending bits");
+    param->sbb = (unsigned int)state->bi_valid;
+    if (param->sbb > 0)
+        *strm->next_out = (Bytef)state->bi_buf;
+    if (param->hl)
+        param->nt = 0; /* Honor history */
+    param->cv = state->wrap == 2 ? ZSWAP32(strm->adler) : strm->adler;
+
+    /* When opening a block, choose a Huffman-Table Type */
+    if (!param->bcf) {
+        if (state->strategy == Z_FIXED ||
+                (strm->total_in == 0 && dfltcc_state->block_threshold > 0))
+            param->htt = HTT_FIXED;
+        else {
+            param->htt = HTT_DYNAMIC;
+            dfltcc_gdht(strm);
+        }
+    }
+
+    /* Deflate */
+    do {
+        cc = dfltcc_cmpr(strm);
+        if (strm->avail_in < 4096 && masked_avail_in > 0)
+            /* We are about to call DFLTCC with a small input buffer, which is
+             * inefficient. Since there is masked data, there will be at least
+             * one more DFLTCC call, so skip the current one and make the next
+             * one handle more data.
+             */
+            break;
+    } while (cc == DFLTCC_CC_AGAIN);
+
+    /* Translate parameter block to stream */
+    strm->msg = oesc_msg(dfltcc_state->msg, param->oesc);
+    state->bi_valid = param->sbb;
+    if (state->bi_valid == 0)
+        state->bi_buf = 0; /* Avoid accessing next_out */
+    else
+        state->bi_buf = *strm->next_out & ((1 << state->bi_valid) - 1);
+    strm->adler = state->wrap == 2 ? ZSWAP32(param->cv) : param->cv;
+
+    /* Unmask the input data */
+    strm->avail_in += masked_avail_in;
+    masked_avail_in = 0;
+
+    /* If we encounter an error, it means there is a bug in DFLTCC call */
+    Assert(cc != DFLTCC_CC_OP2_CORRUPT || param->oesc == 0, "BUG");
+
+    /* Update Block-Continuation Flag. It will be used to check whether to call
+     * GDHT the next time.
+     */
+    if (cc == DFLTCC_CC_OK) {
+        if (soft_bcc) {
+            send_eobs(strm, param);
+            param->bcf = 0;
+            dfltcc_state->block_threshold =
+                strm->total_in + dfltcc_state->block_size;
+        } else
+            param->bcf = 1;
+        if (flush == Z_FINISH) {
+            if (need_empty_block)
+                /* Make the current deflate() call also close the stream */
+                return 0;
+            else {
+                bi_windup(state);
+                *result = finish_done;
+            }
+        } else {
+            if (flush == Z_FULL_FLUSH)
+                param->hl = 0; /* Clear history */
+            *result = flush == Z_NO_FLUSH ? need_more : block_done;
+        }
+    } else {
+        param->bcf = 1;
+        *result = need_more;
+    }
+    if (strm->avail_in != 0 && strm->avail_out != 0)
+        goto again; /* deflate() must use all input or all output */
+    return 1;
+}
+
+/*
+   Expand.
+ */
+int ZLIB_INTERNAL dfltcc_can_inflate(strm)
+    z_streamp strm;
+{
+    struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state;
+    struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state);
+
+    /* Unsupported compression settings */
+    if (state->wbits != HB_BITS)
+        return 0;
+
+    /* Unsupported hardware */
+    return is_bit_set(dfltcc_state->af.fns, DFLTCC_XPND) &&
+               is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0);
+}
+
+local dfltcc_cc dfltcc_xpnd OF((z_streamp strm));
+local dfltcc_cc dfltcc_xpnd(strm)
+    z_streamp strm;
+{
+    struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state;
+    struct dfltcc_param_v0 FAR *param = &GET_DFLTCC_STATE(state)->param;
+    size_t avail_in = strm->avail_in;
+    size_t avail_out = strm->avail_out;
+    dfltcc_cc cc;
+
+    cc = dfltcc(DFLTCC_XPND | HBT_CIRCULAR,
+                param, &strm->next_out, &avail_out,
+                &strm->next_in, &avail_in, state->window);
+    strm->avail_in = avail_in;
+    strm->avail_out = avail_out;
+    return cc;
+}
+
+dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(strm, flush, ret)
+    z_streamp strm;
+    int flush;
+    int *ret;
+{
+    struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state;
+    struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state);
+    struct dfltcc_param_v0 FAR *param = &dfltcc_state->param;
+    dfltcc_cc cc;
+
+    if (flush == Z_BLOCK || flush == Z_TREES) {
+        /* DFLTCC does not support stopping on block boundaries */
+        if (dfltcc_inflate_disable(strm)) {
+            *ret = Z_STREAM_ERROR;
+            return DFLTCC_INFLATE_BREAK;
+        } else
+            return DFLTCC_INFLATE_SOFTWARE;
+    }
+
+    if (state->last) {
+        if (state->bits != 0) {
+            strm->next_in++;
+            strm->avail_in--;
+            state->bits = 0;
+        }
+        state->mode = CHECK;
+        return DFLTCC_INFLATE_CONTINUE;
+    }
+
+    if (strm->avail_in == 0 && !param->cf)
+        return DFLTCC_INFLATE_BREAK;
+
+    if (inflate_ensure_window(state)) {
+        state->mode = MEM;
+        return DFLTCC_INFLATE_CONTINUE;
+    }
+
+    /* Translate stream to parameter block */
+    param->cvt = state->flags ? CVT_CRC32 : CVT_ADLER32;
+    param->sbb = state->bits;
+    param->hl = state->whave; /* Software and hardware history formats match */
+    param->ho = (state->wnext - state->whave) & ((1 << HB_BITS) - 1);
+    if (param->hl)
+        param->nt = 0; /* Honor history for the first block */
+    param->cv = state->flags ? ZSWAP32(state->check) : state->check;
+
+    /* Inflate */
+    do {
+        cc = dfltcc_xpnd(strm);
+    } while (cc == DFLTCC_CC_AGAIN);
+
+    /* Translate parameter block to stream */
+    strm->msg = oesc_msg(dfltcc_state->msg, param->oesc);
+    state->last = cc == DFLTCC_CC_OK;
+    state->bits = param->sbb;
+    state->whave = param->hl;
+    state->wnext = (param->ho + param->hl) & ((1 << HB_BITS) - 1);
+    state->check = state->flags ? ZSWAP32(param->cv) : param->cv;
+    if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) {
+        /* Report an error if stream is corrupted */
+        state->mode = BAD;
+        return DFLTCC_INFLATE_CONTINUE;
+    }
+    state->mode = TYPEDO;
+    /* Break if operands are exhausted, otherwise continue looping */
+    return (cc == DFLTCC_CC_OP1_TOO_SHORT || cc == DFLTCC_CC_OP2_TOO_SHORT) ?
+        DFLTCC_INFLATE_BREAK : DFLTCC_INFLATE_CONTINUE;
+}
+
+int ZLIB_INTERNAL dfltcc_was_inflate_used(strm)
+    z_streamp strm;
+{
+    struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state;
+    struct dfltcc_param_v0 FAR *param = &GET_DFLTCC_STATE(state)->param;
+
+    return !param->nt;
+}
+
+int ZLIB_INTERNAL dfltcc_inflate_disable(strm)
+    z_streamp strm;
+{
+    struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state;
+    struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state);
+
+    if (!dfltcc_can_inflate(strm))
+        return 0;
+    if (dfltcc_was_inflate_used(strm))
+        /* DFLTCC has already decompressed some data. Since there is not
+         * enough information to resume decompression in software, the call
+         * must fail.
+         */
+        return 1;
+    /* DFLTCC was not used yet - decompress in software */
+    memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af));
+    return 0;
+}
+
+/*
+   Memory management.
+   DFLTCC requires parameter blocks and window to be aligned. zlib allows
+   users to specify their own allocation functions, so using e.g.
+   `posix_memalign' is not an option. Thus, we overallocate and take the
+   aligned portion of the buffer.
+*/
+local inline int is_dfltcc_enabled OF((void));
+local inline int is_dfltcc_enabled(void)
+{
+    const char *env;
+    uint64_t facilities[(DFLTCC_FACILITY / 64) + 1];
+    register char r0 __asm__("r0");
+
+    env = secure_getenv("DFLTCC");
+    if (env && !strcmp(env, "0"))
+      /* User has explicitly disabled DFLTCC. */
+      return 0;
+
+    memset(facilities, 0, sizeof(facilities));
+    r0 = sizeof(facilities) / sizeof(facilities[0]) - 1;
+    /* STFLE is supported since z9-109 and only in z/Architecture mode. When
+     * compiling with -m31, gcc defaults to ESA mode, however, since the kernel
+     * is 64-bit, it's always z/Architecture mode at runtime.
+     */
+    __asm__ volatile(".machinemode push\n"
+                     ".machinemode zarch\n"
+                     "stfle %[facilities]\n"
+                     ".machinemode pop\n"
+                     : [facilities] "=Q" (facilities)
+                     , [r0] "+r" (r0)
+                     :
+                     : "cc");
+    return is_bit_set((const char *)facilities, DFLTCC_FACILITY);
+}
+
+void ZLIB_INTERNAL dfltcc_reset(strm, size)
+    z_streamp strm;
+    uInt size;
+{
+    struct dfltcc_state *dfltcc_state =
+        (struct dfltcc_state *)((char FAR *)strm->state + ALIGN_UP(size, 8));
+    struct dfltcc_qaf_param *param =
+        (struct dfltcc_qaf_param *)&dfltcc_state->param;
+    const char *s;
+
+    /* Initialize available functions */
+    if (is_dfltcc_enabled()) {
+        dfltcc(DFLTCC_QAF, param, NULL, NULL, NULL, NULL, NULL);
+        memmove(&dfltcc_state->af, param, sizeof(dfltcc_state->af));
+    } else
+        memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af));
+
+    if (secure_getenv("SOURCE_DATE_EPOCH"))
+        /* User needs reproducible results, but the output of DFLTCC_CMPR
+         * depends on buffers' page offsets.
+         */
+        clear_bit(dfltcc_state->af.fns, DFLTCC_CMPR);
+
+    /* Initialize parameter block */
+    memset(&dfltcc_state->param, 0, sizeof(dfltcc_state->param));
+    dfltcc_state->param.nt = 1;
+
+    /* Initialize tuning parameters */
+#ifndef DFLTCC_LEVEL_MASK
+#define DFLTCC_LEVEL_MASK 0x2
+#endif
+    s = secure_getenv("DFLTCC_LEVEL_MASK");
+    dfltcc_state->level_mask = (s && *s) ? strtoul(s, NULL, 0) :
+                                           DFLTCC_LEVEL_MASK;
+#ifndef DFLTCC_BLOCK_SIZE
+#define DFLTCC_BLOCK_SIZE 1048576
+#endif
+    s = secure_getenv("DFLTCC_BLOCK_SIZE");
+    dfltcc_state->block_size = (s && *s) ? strtoul(s, NULL, 0) :
+                                           DFLTCC_BLOCK_SIZE;
+#ifndef DFLTCC_FIRST_FHT_BLOCK_SIZE
+#define DFLTCC_FIRST_FHT_BLOCK_SIZE 4096
+#endif
+    s = secure_getenv("DFLTCC_FIRST_FHT_BLOCK_SIZE");
+    dfltcc_state->block_threshold = (s && *s) ? strtoul(s, NULL, 0) :
+                                                DFLTCC_FIRST_FHT_BLOCK_SIZE;
+#ifndef DFLTCC_DHT_MIN_SAMPLE_SIZE
+#define DFLTCC_DHT_MIN_SAMPLE_SIZE 4096
+#endif
+    s = secure_getenv("DFLTCC_DHT_MIN_SAMPLE_SIZE");
+    dfltcc_state->dht_threshold = (s && *s) ? strtoul(s, NULL, 0) :
+                                              DFLTCC_DHT_MIN_SAMPLE_SIZE;
+#ifndef DFLTCC_RIBM
+#define DFLTCC_RIBM 0
+#endif
+    s = secure_getenv("DFLTCC_RIBM");
+    dfltcc_state->param.ribm = (s && *s) ? strtoul(s, NULL, 0) :
+                                           DFLTCC_RIBM;
+}
+
+voidpf ZLIB_INTERNAL dfltcc_alloc_state(strm, items, size)
+    z_streamp strm;
+    uInt items;
+    uInt size;
+{
+    return ZALLOC(strm,
+                  ALIGN_UP(items * size, 8) + sizeof(struct dfltcc_state),
+                  sizeof(unsigned char));
+}
+
+void ZLIB_INTERNAL dfltcc_copy_state(dst, src, size)
+    voidpf dst;
+    const voidpf src;
+    uInt size;
+{
+    zmemcpy(dst, src, ALIGN_UP(size, 8) + sizeof(struct dfltcc_state));
+}
+
+static const int PAGE_ALIGN = 0x1000;
+
+voidpf ZLIB_INTERNAL dfltcc_alloc_window(strm, items, size)
+    z_streamp strm;
+    uInt items;
+    uInt size;
+{
+    voidpf p, w;
+
+    /* To simplify freeing, we store the pointer to the allocated buffer right
+     * before the window.
+     */
+    p = ZALLOC(strm, sizeof(voidpf) + items * size + PAGE_ALIGN,
+               sizeof(unsigned char));
+    if (p == NULL)
+        return NULL;
+    w = ALIGN_UP((char FAR *)p + sizeof(voidpf), PAGE_ALIGN);
+    *(voidpf *)((char FAR *)w - sizeof(voidpf)) = p;
+    return w;
+}
+
+void ZLIB_INTERNAL dfltcc_free_window(strm, w)
+    z_streamp strm;
+    voidpf w;
+{
+    if (w)
+        ZFREE(strm, *(voidpf *)((unsigned char FAR *)w - sizeof(voidpf)));
+}
+
+/*
+   Switching between hardware and software compression.
+   DFLTCC does not support all zlib settings, e.g. generation of non-compressed
+   blocks or alternative window sizes. When such settings are applied on the
+   fly with deflateParams, we need to convert between hardware and software
+   window formats.
+*/
+int ZLIB_INTERNAL dfltcc_deflate_params(strm, level, strategy)
+    z_streamp strm;
+    int level;
+    int strategy;
+{
+    deflate_state FAR *state = (deflate_state FAR *)strm->state;
+    struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state);
+    struct dfltcc_param_v0 FAR *param = &dfltcc_state->param;
+    int could_deflate = dfltcc_can_deflate(strm);
+    int can_deflate = dfltcc_are_params_ok(level, state->w_bits, strategy,
+                                           dfltcc_state->level_mask);
+
+    if (can_deflate == could_deflate)
+        /* We continue to work in the same mode - no changes needed */
+        return Z_OK;
+
+    if (strm->total_in == 0 && param->nt == 1 && param->hl == 0)
+        /* DFLTCC was not used yet - no changes needed */
+        return Z_OK;
+
+    /* Switching between hardware and software is not implemented */
+    return Z_STREAM_ERROR;
+}
+
+/*
+   Preloading history.
+*/
+local void append_history OF((struct dfltcc_param_v0 FAR *param,
+                              Bytef *history,
+                              const Bytef *buf,
+                              uInt count));
+local void append_history(param, history, buf, count)
+    struct dfltcc_param_v0 FAR *param;
+    Bytef *history;
+    const Bytef *buf;
+    uInt count;
+{
+    size_t offset;
+    size_t n;
+
+    /* Do not use more than 32K */
+    if (count > HB_SIZE) {
+        buf += count - HB_SIZE;
+        count = HB_SIZE;
+    }
+    offset = (param->ho + param->hl) % HB_SIZE;
+    if (offset + count <= HB_SIZE)
+        /* Circular history buffer does not wrap - copy one chunk */
+        zmemcpy(history + offset, buf, count);
+    else {
+        /* Circular history buffer wraps - copy two chunks */
+        n = HB_SIZE - offset;
+        zmemcpy(history + offset, buf, n);
+        zmemcpy(history, buf + n, count - n);
+    }
+    n = param->hl + count;
+    if (n <= HB_SIZE)
+        /* All history fits into buffer - no need to discard anything */
+        param->hl = n;
+    else {
+        /* History does not fit into buffer - discard extra bytes */
+        param->ho = (param->ho + (n - HB_SIZE)) % HB_SIZE;
+        param->hl = HB_SIZE;
+    }
+}
+
+int ZLIB_INTERNAL dfltcc_deflate_set_dictionary(strm, dictionary, dict_length)
+    z_streamp strm;
+    const Bytef *dictionary;
+    uInt dict_length;
+{
+    deflate_state FAR *state = (deflate_state FAR *)strm->state;
+    struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state);
+    struct dfltcc_param_v0 FAR *param = &dfltcc_state->param;
+
+    append_history(param, state->window, dictionary, dict_length);
+    state->strstart = 1; /* Add FDICT to zlib header */
+    return Z_OK;
+}
+
+int ZLIB_INTERNAL dfltcc_deflate_get_dictionary(strm, dictionary, dict_length)
+    z_streamp strm;
+    Bytef *dictionary;
+    uInt *dict_length;
+{
+    deflate_state FAR *state = (deflate_state FAR *)strm->state;
+    struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state);
+    struct dfltcc_param_v0 FAR *param = &dfltcc_state->param;
+
+    if (dictionary) {
+        if (param->ho + param->hl <= HB_SIZE)
+            /* Circular history buffer does not wrap - copy one chunk */
+            zmemcpy(dictionary, state->window + param->ho, param->hl);
+        else {
+            /* Circular history buffer wraps - copy two chunks */
+            zmemcpy(dictionary,
+                    state->window + param->ho,
+                    HB_SIZE - param->ho);
+            zmemcpy(dictionary + HB_SIZE - param->ho,
+                    state->window,
+                    param->ho + param->hl - HB_SIZE);
+        }
+    }
+    if (dict_length)
+        *dict_length = param->hl;
+    return Z_OK;
+}
\ No newline at end of file
diff --git a/contrib/s390/dfltcc.h b/contrib/s390/dfltcc.h
new file mode 100644
index 0000000..574e84c
--- /dev/null
+++ b/contrib/s390/dfltcc.h
@@ -0,0 +1,55 @@
+#ifndef DFLTCC_H
+#define DFLTCC_H
+
+#include "../../zlib.h"
+#include "../../zutil.h"
+
+voidpf ZLIB_INTERNAL dfltcc_alloc_state OF((z_streamp strm, uInt items,
+                                            uInt size));
+void ZLIB_INTERNAL dfltcc_copy_state OF((voidpf dst, const voidpf src,
+                                         uInt size));
+void ZLIB_INTERNAL dfltcc_reset OF((z_streamp strm, uInt size));
+voidpf ZLIB_INTERNAL dfltcc_alloc_window OF((z_streamp strm, uInt items,
+                                             uInt size));
+void ZLIB_INTERNAL dfltcc_free_window OF((z_streamp strm, voidpf w));
+int ZLIB_INTERNAL dfltcc_can_inflate OF((z_streamp strm));
+typedef enum {
+    DFLTCC_INFLATE_CONTINUE,
+    DFLTCC_INFLATE_BREAK,
+    DFLTCC_INFLATE_SOFTWARE,
+} dfltcc_inflate_action;
+dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate OF((z_streamp strm,
+                                                       int flush, int *ret));
+int ZLIB_INTERNAL dfltcc_was_inflate_used OF((z_streamp strm));
+int ZLIB_INTERNAL dfltcc_inflate_disable OF((z_streamp strm));
+
+#define ZALLOC_STATE dfltcc_alloc_state
+#define ZFREE_STATE ZFREE
+#define ZCOPY_STATE dfltcc_copy_state
+#define ZALLOC_WINDOW dfltcc_alloc_window
+#define ZFREE_WINDOW dfltcc_free_window
+#define TRY_FREE_WINDOW dfltcc_free_window
+#define INFLATE_RESET_KEEP_HOOK(strm) \
+    dfltcc_reset((strm), sizeof(struct inflate_state))
+#define INFLATE_PRIME_HOOK(strm, bits, value) \
+    do { if (dfltcc_inflate_disable((strm))) return Z_STREAM_ERROR; } while (0)
+#define INFLATE_TYPEDO_HOOK(strm, flush) \
+    if (dfltcc_can_inflate((strm))) { \
+        dfltcc_inflate_action action; \
+\
+        RESTORE(); \
+        action = dfltcc_inflate((strm), (flush), &ret); \
+        LOAD(); \
+        if (action == DFLTCC_INFLATE_CONTINUE) \
+            break; \
+        else if (action == DFLTCC_INFLATE_BREAK) \
+            goto inf_leave; \
+    }
+#define INFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_inflate((strm)))
+#define INFLATE_NEED_UPDATEWINDOW(strm) (!dfltcc_can_inflate((strm)))
+#define INFLATE_MARK_HOOK(strm) \
+    do { \
+        if (dfltcc_was_inflate_used((strm))) return -(1L << 16); \
+    } while (0)
+
+#endif
\ No newline at end of file
diff --git a/contrib/s390/dfltcc_deflate.h b/contrib/s390/dfltcc_deflate.h
new file mode 100644
index 0000000..a129a91
--- /dev/null
+++ b/contrib/s390/dfltcc_deflate.h
@@ -0,0 +1,50 @@
+#ifndef DFLTCC_DEFLATE_H
+#define DFLTCC_DEFLATE_H
+
+#include "dfltcc.h"
+
+int ZLIB_INTERNAL dfltcc_can_deflate OF((z_streamp strm));
+int ZLIB_INTERNAL dfltcc_deflate OF((z_streamp strm,
+                                     int flush,
+                                     block_state *result));
+int ZLIB_INTERNAL dfltcc_deflate_params OF((z_streamp strm,
+                                            int level,
+                                            int strategy));
+int ZLIB_INTERNAL dfltcc_deflate_set_dictionary OF((z_streamp strm,
+                                                    const Bytef *dictionary,
+                                                    uInt dict_length));
+int ZLIB_INTERNAL dfltcc_deflate_get_dictionary OF((z_streamp strm,
+                                                    Bytef *dictionary,
+                                                    uInt* dict_length));
+
+#define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \
+    do { \
+        if (dfltcc_can_deflate((strm))) \
+            return dfltcc_deflate_set_dictionary((strm), (dict), (dict_len)); \
+    } while (0)
+#define DEFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \
+    do { \
+        if (dfltcc_can_deflate((strm))) \
+            return dfltcc_deflate_get_dictionary((strm), (dict), (dict_len)); \
+    } while (0)
+#define DEFLATE_RESET_KEEP_HOOK(strm) \
+    dfltcc_reset((strm), sizeof(deflate_state))
+#define DEFLATE_PARAMS_HOOK(strm, level, strategy) \
+    do { \
+        int err; \
+\
+        err = dfltcc_deflate_params((strm), (level), (strategy)); \
+        if (err == Z_STREAM_ERROR) \
+            return err; \
+    } while (0)
+#define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, source_len) \
+    do { \
+        if (dfltcc_can_deflate((strm))) \
+            (complen) = (3 + 5 + 5 + 4 + 19 * 3 + (286 + 30) * 7 + \
+                         (source_len) * 16 + 15 + 7) >> 3; \
+    } while (0)
+#define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) (dfltcc_can_deflate((strm)))
+#define DEFLATE_HOOK dfltcc_deflate
+#define DEFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_deflate((strm)))
+
+#endif
\ No newline at end of file
diff --git a/deflate.c b/deflate.c
index 1ec7614..089285a 100644
--- a/deflate.c
+++ b/deflate.c
@@ -61,15 +61,29 @@ const char deflate_copyright[] =
  */
 
 /* ===========================================================================
- *  Function prototypes.
+ *  Architecture-specific bits.
  */
-typedef enum {
-    need_more,      /* block not completed, need more input or more output */
-    block_done,     /* block flush performed */
-    finish_started, /* finish started, need only more output at next deflate */
-    finish_done     /* finish done, accept no more input or output */
-} block_state;
+#ifdef DFLTCC
+#  include "contrib/s390/dfltcc_deflate.h"
+#else
+#define ZALLOC_STATE ZALLOC
+#define ZFREE_STATE ZFREE
+#define ZCOPY_STATE zmemcpy
+#define ZALLOC_WINDOW ZALLOC
+#define TRY_FREE_WINDOW TRY_FREE
+#define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0)
+#define DEFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0)
+#define DEFLATE_RESET_KEEP_HOOK(strm) do {} while (0)
+#define DEFLATE_PARAMS_HOOK(strm, level, strategy) do {} while (0)
+#define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, sourceLen) do {} while (0)
+#define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) 0
+#define DEFLATE_HOOK(strm, flush, bstate) 0
+#define DEFLATE_NEED_CHECKSUM(strm) 1
+#endif
 
+/* ===========================================================================
+ *  Function prototypes.
+ */
 typedef block_state (*compress_func) OF((deflate_state *s, int flush));
 /* Compression function. Returns the block state after the call. */
 
@@ -85,7 +99,6 @@ local block_state deflate_rle    OF((deflate_state *s, int flush));
 local block_state deflate_huff   OF((deflate_state *s, int flush));
 local void lm_init        OF((deflate_state *s));
 local void putShortMSB    OF((deflate_state *s, uInt b));
-local void flush_pending  OF((z_streamp strm));
 local unsigned read_buf   OF((z_streamp strm, Bytef *buf, unsigned size));
 #ifdef ASMV
 #  pragma message("Assembler code may have bugs -- use at your own risk")
@@ -301,7 +314,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
         return Z_STREAM_ERROR;
     }
     if (windowBits == 8) windowBits = 9;  /* until 256-byte window bug fixed */
-    s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state));
+    s = (deflate_state *) ZALLOC_STATE(strm, 1, sizeof(deflate_state));
     if (s == Z_NULL) return Z_MEM_ERROR;
     strm->state = (struct internal_state FAR *)s;
     s->strm = strm;
@@ -318,7 +331,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
     s->hash_mask = s->hash_size - 1;
     s->hash_shift =  ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
 
-    s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte));
+    s->window = (Bytef *) ZALLOC_WINDOW(strm, s->w_size, 2*sizeof(Byte));
     s->prev   = (Posf *)  ZALLOC(strm, s->w_size, sizeof(Pos));
     s->head   = (Posf *)  ZALLOC(strm, s->hash_size, sizeof(Pos));
 
@@ -394,6 +407,7 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
     /* when using zlib wrappers, compute Adler-32 for provided dictionary */
     if (wrap == 1)
         strm->adler = adler32(strm->adler, dictionary, dictLength);
+    DEFLATE_SET_DICTIONARY_HOOK(strm, dictionary, dictLength);
     s->wrap = 0;                    /* avoid computing Adler-32 in read_buf */
 
     /* if dictionary would fill window, just replace the history */
@@ -452,6 +466,7 @@ int ZEXPORT deflateGetDictionary (strm, dictionary, dictLength)
 
     if (deflateStateCheck(strm))
         return Z_STREAM_ERROR;
+    DEFLATE_GET_DICTIONARY_HOOK(strm, dictionary, dictLength);
     s = strm->state;
     len = s->strstart + s->lookahead;
     if (len > s->w_size)
@@ -498,6 +513,8 @@ int ZEXPORT deflateResetKeep (strm)
 
     _tr_init(s);
 
+    DEFLATE_RESET_KEEP_HOOK(strm);
+
     return Z_OK;
 }
 
@@ -584,6 +601,7 @@ int ZEXPORT deflateParams(strm, level, strategy)
     if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) {
         return Z_STREAM_ERROR;
     }
+    DEFLATE_PARAMS_HOOK(strm, level, strategy);
     func = configuration_table[s->level].func;
 
     if ((strategy != s->strategy || func != configuration_table[level].func) &&
@@ -659,6 +677,7 @@ uLong ZEXPORT deflateBound(strm, sourceLen)
     /* conservative upper bound for compressed data */
     complen = sourceLen +
               ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 5;
+    DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, sourceLen);
 
     /* if can't get parameters, return conservative bound plus zlib wrapper */
     if (deflateStateCheck(strm))
@@ -700,7 +719,8 @@ uLong ZEXPORT deflateBound(strm, sourceLen)
     }
 
     /* if not default parameters, return conservative bound */
-    if (s->w_bits != 15 || s->hash_bits != 8 + 7)
+    if (DEFLATE_NEED_CONSERVATIVE_BOUND(strm) ||
+            s->w_bits != 15 || s->hash_bits != 8 + 7)
         return complen + wraplen;
 
     /* default settings: return tight bound for that case */
@@ -727,7 +747,7 @@ local void putShortMSB (s, b)
  * applications may wish to modify it to avoid allocating a large
  * strm->next_out buffer and copying into it. (See also read_buf()).
  */
-local void flush_pending(strm)
+void ZLIB_INTERNAL flush_pending(strm)
     z_streamp strm;
 {
     unsigned len;
@@ -997,7 +1017,8 @@ int ZEXPORT deflate (strm, flush)
         (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) {
         block_state bstate;
 
-        bstate = s->level == 0 ? deflate_stored(s, flush) :
+        bstate = DEFLATE_HOOK(strm, flush, &bstate) ? bstate :
+                 s->level == 0 ? deflate_stored(s, flush) :
                  s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) :
                  s->strategy == Z_RLE ? deflate_rle(s, flush) :
                  (*(configuration_table[s->level].func))(s, flush);
@@ -1086,9 +1107,9 @@ int ZEXPORT deflateEnd (strm)
     TRY_FREE(strm, strm->state->pending_buf);
     TRY_FREE(strm, strm->state->head);
     TRY_FREE(strm, strm->state->prev);
-    TRY_FREE(strm, strm->state->window);
+    TRY_FREE_WINDOW(strm, strm->state->window);
 
-    ZFREE(strm, strm->state);
+    ZFREE_STATE(strm, strm->state);
     strm->state = Z_NULL;
 
     return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
@@ -1119,13 +1140,13 @@ int ZEXPORT deflateCopy (dest, source)
 
     zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream));
 
-    ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state));
+    ds = (deflate_state *) ZALLOC_STATE(dest, 1, sizeof(deflate_state));
     if (ds == Z_NULL) return Z_MEM_ERROR;
     dest->state = (struct internal_state FAR *) ds;
-    zmemcpy((voidpf)ds, (voidpf)ss, sizeof(deflate_state));
+    ZCOPY_STATE((voidpf)ds, (voidpf)ss, sizeof(deflate_state));
     ds->strm = dest;
 
-    ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte));
+    ds->window = (Bytef *) ZALLOC_WINDOW(dest, ds->w_size, 2*sizeof(Byte));
     ds->prev   = (Posf *)  ZALLOC(dest, ds->w_size, sizeof(Pos));
     ds->head   = (Posf *)  ZALLOC(dest, ds->hash_size, sizeof(Pos));
     overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2);
@@ -1174,7 +1195,8 @@ local unsigned read_buf(strm, buf, size)
     strm->avail_in  -= len;
 
     zmemcpy(buf, strm->next_in, len);
-    if (strm->state->wrap == 1) {
+        if (!DEFLATE_NEED_CHECKSUM(strm)) {}
+    else if (strm->state->wrap == 1) {
         strm->adler = adler32(strm->adler, buf, len);
     }
 #ifdef GZIP
diff --git a/deflate.h b/deflate.h
index 23ecdd3..821a4b9 100644
--- a/deflate.h
+++ b/deflate.h
@@ -304,6 +304,7 @@ void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state *s));
 void ZLIB_INTERNAL _tr_align OF((deflate_state *s));
 void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,
                         ulg stored_len, int last));
+void ZLIB_INTERNAL _tr_send_bits OF((deflate_state *s, int value, int length));
 
 #define d_code(dist) \
    ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
@@ -346,4 +347,15 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,
               flush = _tr_tally(s, distance, length)
 #endif
 
+typedef enum {
+    need_more,      /* block not completed, need more input or more output */
+    block_done,     /* block flush performed */
+    finish_started, /* finish started, need only more output at next deflate */
+    finish_done     /* finish done, accept no more input or output */
+} block_state;
+
+unsigned ZLIB_INTERNAL bi_reverse OF((unsigned code, int len));
+void ZLIB_INTERNAL bi_windup OF((deflate_state *s));
+void ZLIB_INTERNAL flush_pending OF((z_streamp strm));
+
 #endif /* DEFLATE_H */
diff --git a/gzguts.h b/gzguts.h
index 990a4d2..3218395 100644
--- a/gzguts.h
+++ b/gzguts.h
@@ -153,7 +153,11 @@
 
 /* default i/o buffer size -- double this for output when reading (this and
    twice this must be able to fit in an unsigned type) */
+#ifdef DFLTCC
+#define GZBUFSIZE 131072
+#else
 #define GZBUFSIZE 8192
+#endif
 
 /* gzip modes, also provide a little integrity check on the passed structure */
 #define GZ_NONE 0
diff --git a/inflate.c b/inflate.c
index ac333e8..f77c2ae 100644
--- a/inflate.c
+++ b/inflate.c
@@ -85,6 +85,23 @@
 #include "inflate.h"
 #include "inffast.h"
 
+/* architecture-specific bits */
+#ifdef DFLTCC
+#  include "contrib/s390/dfltcc.h"
+#else
+#define ZALLOC_STATE ZALLOC
+#define ZFREE_STATE ZFREE
+#define ZCOPY_STATE zmemcpy
+#define ZALLOC_WINDOW ZALLOC
+#define ZFREE_WINDOW ZFREE
+#define INFLATE_RESET_KEEP_HOOK(strm) do {} while (0)
+#define INFLATE_PRIME_HOOK(strm, bits, value) do {} while (0)
+#define INFLATE_TYPEDO_HOOK(strm, flush) do {} while (0)
+#define INFLATE_NEED_CHECKSUM(strm) 1
+#define INFLATE_NEED_UPDATEWINDOW(strm) 1
+#define INFLATE_MARK_HOOK(strm) do {} while (0)
+#endif
+
 #ifdef MAKEFIXED
 #  ifndef BUILDFIXED
 #    define BUILDFIXED
@@ -137,6 +154,7 @@ z_streamp strm;
     state->lencode = state->distcode = state->next = state->codes;
     state->sane = 1;
     state->back = -1;
+    INFLATE_RESET_KEEP_HOOK(strm);
     Tracev((stderr, "inflate: reset\n"));
     return Z_OK;
 }
@@ -182,7 +200,7 @@ int windowBits;
     if (windowBits && (windowBits < 8 || windowBits > 15))
         return Z_STREAM_ERROR;
     if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) {
-        ZFREE(strm, state->window);
+        ZFREE_WINDOW(strm, state->window);
         state->window = Z_NULL;
     }
 
@@ -221,7 +239,7 @@ int stream_size;
         strm->zfree = zcfree;
 #endif
     state = (struct inflate_state FAR *)
-            ZALLOC(strm, 1, sizeof(struct inflate_state));
+            ZALLOC_STATE(strm, 1, sizeof(struct inflate_state));
     if (state == Z_NULL) return Z_MEM_ERROR;
     Tracev((stderr, "inflate: allocated\n"));
     strm->state = (struct internal_state FAR *)state;
@@ -230,7 +248,7 @@ int stream_size;
     state->mode = HEAD;     /* to pass state test in inflateReset2() */
     ret = inflateReset2(strm, windowBits);
     if (ret != Z_OK) {
-        ZFREE(strm, state);
+        ZFREE_STATE(strm, state);
         strm->state = Z_NULL;
     }
     return ret;
@@ -252,6 +270,7 @@ int value;
     struct inflate_state FAR *state;
 
     if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    INFLATE_PRIME_HOOK(strm, bits, value);
     state = (struct inflate_state FAR *)strm->state;
     if (bits < 0) {
         state->hold = 0;
@@ -379,6 +398,27 @@ void makefixed()
 }
 #endif /* MAKEFIXED */
 
+int ZLIB_INTERNAL inflate_ensure_window(state)
+    struct inflate_state *state;
+{
+    /* if it hasn't been done already, allocate space for the window */
+    if (state->window == Z_NULL) {
+        state->window = (unsigned char FAR *)
+                        ZALLOC_WINDOW(state->strm, 1U << state->wbits,
+                                      sizeof(unsigned char));
+        if (state->window == Z_NULL) return 1;
+    }
+
+    /* if window not in use yet, initialize */
+    if (state->wsize == 0) {
+        state->wsize = 1U << state->wbits;
+        state->wnext = 0;
+        state->whave = 0;
+    }
+
+    return 0;
+}
+
 /*
    Update the window with the last wsize (normally 32K) bytes written before
    returning.  If window does not exist yet, create it.  This is only called
@@ -403,20 +443,7 @@ unsigned copy;
 
     state = (struct inflate_state FAR *)strm->state;
 
-    /* if it hasn't been done already, allocate space for the window */
-    if (state->window == Z_NULL) {
-        state->window = (unsigned char FAR *)
-                        ZALLOC(strm, 1U << state->wbits,
-                               sizeof(unsigned char));
-        if (state->window == Z_NULL) return 1;
-    }
-
-    /* if window not in use yet, initialize */
-    if (state->wsize == 0) {
-        state->wsize = 1U << state->wbits;
-        state->wnext = 0;
-        state->whave = 0;
-    }
+    if (inflate_ensure_window(state)) return 1;
 
     /* copy state->wsize or less output bytes into the circular window */
     if (copy >= state->wsize) {
@@ -849,6 +876,7 @@ int flush;
         case TYPE:
             if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave;
         case TYPEDO:
+            INFLATE_TYPEDO_HOOK(strm, flush);
             if (state->last) {
                 BYTEBITS();
                 state->mode = CHECK;
@@ -1200,7 +1228,7 @@ int flush;
                 out -= left;
                 strm->total_out += out;
                 state->total += out;
-                if ((state->wrap & 4) && out)
+                if (INFLATE_NEED_CHECKSUM(strm) && (state->wrap & 4) && out)
                     strm->adler = state->check =
                         UPDATE(state->check, put - out, out);
                 out = left;
@@ -1252,8 +1280,9 @@ int flush;
      */
   inf_leave:
     RESTORE();
-    if (state->wsize || (out != strm->avail_out && state->mode < BAD &&
-            (state->mode < CHECK || flush != Z_FINISH)))
+    if (INFLATE_NEED_UPDATEWINDOW(strm) &&
+        (state->wsize || (out != strm->avail_out && state->mode < BAD &&
+                (state->mode < CHECK || flush != Z_FINISH))))
         if (updatewindow(strm, strm->next_out, out - strm->avail_out)) {
             state->mode = MEM;
             return Z_MEM_ERROR;
@@ -1263,7 +1292,7 @@ int flush;
     strm->total_in += in;
     strm->total_out += out;
     state->total += out;
-    if ((state->wrap & 4) && out)
+    if (INFLATE_NEED_CHECKSUM(strm) && (state->wrap & 4) && out)
         strm->adler = state->check =
             UPDATE(state->check, strm->next_out - out, out);
     strm->data_type = (int)state->bits + (state->last ? 64 : 0) +
@@ -1281,8 +1310,8 @@ z_streamp strm;
     if (inflateStateCheck(strm))
         return Z_STREAM_ERROR;
     state = (struct inflate_state FAR *)strm->state;
-    if (state->window != Z_NULL) ZFREE(strm, state->window);
-    ZFREE(strm, strm->state);
+    if (state->window != Z_NULL) ZFREE_WINDOW(strm, state->window);
+    ZFREE_STATE(strm, strm->state);
     strm->state = Z_NULL;
     Tracev((stderr, "inflate: end\n"));
     return Z_OK;
@@ -1474,21 +1503,21 @@ z_streamp source;
 
     /* allocate space */
     copy = (struct inflate_state FAR *)
-           ZALLOC(source, 1, sizeof(struct inflate_state));
+           ZALLOC_STATE(source, 1, sizeof(struct inflate_state));
     if (copy == Z_NULL) return Z_MEM_ERROR;
     window = Z_NULL;
     if (state->window != Z_NULL) {
         window = (unsigned char FAR *)
-                 ZALLOC(source, 1U << state->wbits, sizeof(unsigned char));
+                 ZALLOC_WINDOW(source, 1U << state->wbits, sizeof(unsigned char));
         if (window == Z_NULL) {
-            ZFREE(source, copy);
+            ZFREE_STATE(source, copy);
             return Z_MEM_ERROR;
         }
     }
 
     /* copy state */
     zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream));
-    zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state));
+    ZCOPY_STATE((voidpf)copy, (voidpf)state, sizeof(struct inflate_state));
     copy->strm = dest;
     if (state->lencode >= state->codes &&
         state->lencode <= state->codes + ENOUGH - 1) {
@@ -1545,6 +1574,7 @@ z_streamp strm;
 
     if (inflateStateCheck(strm))
         return -(1L << 16);
+    INFLATE_MARK_HOOK(strm);
     state = (struct inflate_state FAR *)strm->state;
     return (long)(((unsigned long)((long)state->back)) << 16) +
         (state->mode == COPY ? state->length :
diff --git a/inflate.h b/inflate.h
index a46cce6..7b19617 100644
--- a/inflate.h
+++ b/inflate.h
@@ -123,3 +123,5 @@ struct inflate_state {
     int back;                   /* bits back of last unprocessed length/lit */
     unsigned was;               /* initial length of match */
 };
+
+int ZLIB_INTERNAL inflate_ensure_window OF((struct inflate_state *state));
diff --git a/test/infcover.c b/test/infcover.c
index 2be0164..a34cd17 100644
--- a/test/infcover.c
+++ b/test/infcover.c
@@ -444,7 +444,7 @@ local void cover_wrap(void)
 }
 
 /* input and output functions for inflateBack() */
-local unsigned pull(void *desc, unsigned char **buf)
+local unsigned pull(void *desc, z_const unsigned char **buf)
 {
     static unsigned int next = 0;
     static unsigned char dat[] = {0x63, 0, 2, 0};
diff --git a/test/minigzip.c b/test/minigzip.c
index e22fb08..4b5f4ef 100644
--- a/test/minigzip.c
+++ b/test/minigzip.c
@@ -132,7 +132,11 @@ static void pwinerror (s)
 #endif
 #define SUFFIX_LEN (sizeof(GZ_SUFFIX)-1)
 
+#ifdef DFLTCC
+#define BUFLEN      262144
+#else
 #define BUFLEN      16384
+#endif
 #define MAX_NAME_LEN 1024
 
 #ifdef MAXSEG_64K
diff --git a/trees.c b/trees.c
index 50cf4b4..ad51207 100644
--- a/trees.c
+++ b/trees.c
@@ -149,8 +149,6 @@ local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
 local void compress_block OF((deflate_state *s, const ct_data *ltree,
                               const ct_data *dtree));
 local int  detect_data_type OF((deflate_state *s));
-local unsigned bi_reverse OF((unsigned value, int length));
-local void bi_windup      OF((deflate_state *s));
 local void bi_flush       OF((deflate_state *s));
 
 #ifdef GEN_TREES_H
@@ -223,6 +221,13 @@ local void send_bits(s, value, length)
 }
 #endif /* ZLIB_DEBUG */
 
+void ZLIB_INTERNAL _tr_send_bits(s, value, length)
+    deflate_state *s;
+    int value;
+    int length;
+{
+    send_bits(s, value, length);
+}
 
 /* the arguments must not have side effects */
 
@@ -1155,7 +1160,7 @@ local int detect_data_type(s)
  * method would use a table)
  * IN assertion: 1 <= len <= 15
  */
-local unsigned bi_reverse(code, len)
+unsigned ZLIB_INTERNAL bi_reverse(code, len)
     unsigned code; /* the value to invert */
     int len;       /* its bit length */
 {
@@ -1187,7 +1192,7 @@ local void bi_flush(s)
 /* ===========================================================================
  * Flush the bit buffer and align the output on a byte boundary
  */
-local void bi_windup(s)
+void ZLIB_INTERNAL bi_windup(s)
     deflate_state *s;
 {
     if (s->bi_valid > 8) {
-- 
2.19.1