diff --git a/xen.git-35d2960ae65f28106fdc5c2130f5f08fadca0e4c.patch b/xen.git-35d2960ae65f28106fdc5c2130f5f08fadca0e4c.patch new file mode 100644 index 0000000..181a9ba --- /dev/null +++ b/xen.git-35d2960ae65f28106fdc5c2130f5f08fadca0e4c.patch @@ -0,0 +1,865 @@ +From 35d2960ae65f28106fdc5c2130f5f08fadca0e4c Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Mon, 18 Jan 2021 12:10:34 +0100 +Subject: [PATCH] lib: introduce xxhash + +Taken from Linux at commit d89775fc929c ("lib/: replace HTTP links with +HTTPS ones"), but split into separate 32-bit and 64-bit sources, since +the immediate consumer (zstd) will need only the latter. + +Note that the building of this code is restricted to x86 for now because +of the need to sort asm/unaligned.h for Arm. + +Signed-off-by: Jan Beulich +Acked-by: Andrew Cooper +--- + xen/include/xen/xxhash.h | 259 ++++++++++++++++++++++++++++++++++ + xen/lib/Makefile | 2 + + xen/lib/xxhash32.c | 259 ++++++++++++++++++++++++++++++++++ + xen/lib/xxhash64.c | 294 +++++++++++++++++++++++++++++++++++++++ + 4 files changed, 814 insertions(+) + create mode 100644 xen/include/xen/xxhash.h + create mode 100644 xen/lib/xxhash32.c + create mode 100644 xen/lib/xxhash64.c + +diff --git a/xen/include/xen/xxhash.h b/xen/include/xen/xxhash.h +new file mode 100644 +index 0000000000..6f2237cbcf +--- /dev/null ++++ b/xen/include/xen/xxhash.h +@@ -0,0 +1,259 @@ ++/* ++ * xxHash - Extremely Fast Hash algorithm ++ * Copyright (C) 2012-2016, Yann Collet. ++ * ++ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions are ++ * met: ++ * ++ * * Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * * Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following disclaimer ++ * in the documentation and/or other materials provided with the ++ * distribution. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ * ++ * This program is free software; you can redistribute it and/or modify it under ++ * the terms of the GNU General Public License version 2 as published by the ++ * Free Software Foundation. This program is dual-licensed; you may select ++ * either version 2 of the GNU General Public License ("GPL") or BSD license ++ * ("BSD"). ++ * ++ * You can contact the author at: ++ * - xxHash homepage: https://cyan4973.github.io/xxHash/ ++ * - xxHash source repository: https://github.com/Cyan4973/xxHash ++ */ ++ ++/* ++ * Notice extracted from xxHash homepage: ++ * ++ * xxHash is an extremely fast Hash algorithm, running at RAM speed limits. ++ * It also successfully passes all tests from the SMHasher suite. ++ * ++ * Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 ++ * Duo @3GHz) ++ * ++ * Name Speed Q.Score Author ++ * xxHash 5.4 GB/s 10 ++ * CrapWow 3.2 GB/s 2 Andrew ++ * MumurHash 3a 2.7 GB/s 10 Austin Appleby ++ * SpookyHash 2.0 GB/s 10 Bob Jenkins ++ * SBox 1.4 GB/s 9 Bret Mulvey ++ * Lookup3 1.2 GB/s 9 Bob Jenkins ++ * SuperFastHash 1.2 GB/s 1 Paul Hsieh ++ * CityHash64 1.05 GB/s 10 Pike & Alakuijala ++ * FNV 0.55 GB/s 5 Fowler, Noll, Vo ++ * CRC32 0.43 GB/s 9 ++ * MD5-32 0.33 GB/s 10 Ronald L. Rivest ++ * SHA1-32 0.28 GB/s 10 ++ * ++ * Q.Score is a measure of quality of the hash function. ++ * It depends on successfully passing SMHasher test set. ++ * 10 is a perfect score. ++ * ++ * A 64-bits version, named xxh64 offers much better speed, ++ * but for 64-bits applications only. ++ * Name Speed on 64 bits Speed on 32 bits ++ * xxh64 13.8 GB/s 1.9 GB/s ++ * xxh32 6.8 GB/s 6.0 GB/s ++ */ ++ ++#ifndef __XENXXHASH_H__ ++#define __XENXXHASH_H__ ++ ++#include ++ ++/*-**************************** ++ * Simple Hash Functions ++ *****************************/ ++ ++/** ++ * xxh32() - calculate the 32-bit hash of the input with a given seed. ++ * ++ * @input: The data to hash. ++ * @length: The length of the data to hash. ++ * @seed: The seed can be used to alter the result predictably. ++ * ++ * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s ++ * ++ * Return: The 32-bit hash of the data. ++ */ ++uint32_t xxh32(const void *input, size_t length, uint32_t seed); ++ ++/** ++ * xxh64() - calculate the 64-bit hash of the input with a given seed. ++ * ++ * @input: The data to hash. ++ * @length: The length of the data to hash. ++ * @seed: The seed can be used to alter the result predictably. ++ * ++ * This function runs 2x faster on 64-bit systems, but slower on 32-bit systems. ++ * ++ * Return: The 64-bit hash of the data. ++ */ ++uint64_t xxh64(const void *input, size_t length, uint64_t seed); ++ ++/** ++ * xxhash() - calculate wordsize hash of the input with a given seed ++ * @input: The data to hash. ++ * @length: The length of the data to hash. ++ * @seed: The seed can be used to alter the result predictably. ++ * ++ * If the hash does not need to be comparable between machines with ++ * different word sizes, this function will call whichever of xxh32() ++ * or xxh64() is faster. ++ * ++ * Return: wordsize hash of the data. ++ */ ++ ++static inline unsigned long xxhash(const void *input, size_t length, ++ uint64_t seed) ++{ ++#if BITS_PER_LONG == 64 ++ return xxh64(input, length, seed); ++#else ++ return xxh32(input, length, seed); ++#endif ++} ++ ++/*-**************************** ++ * Streaming Hash Functions ++ *****************************/ ++ ++/* ++ * These definitions are only meant to allow allocation of XXH state ++ * statically, on stack, or in a struct for example. ++ * Do not use members directly. ++ */ ++ ++/** ++ * struct xxh32_state - private xxh32 state, do not use members directly ++ */ ++struct xxh32_state { ++ uint32_t total_len_32; ++ uint32_t large_len; ++ uint32_t v1; ++ uint32_t v2; ++ uint32_t v3; ++ uint32_t v4; ++ uint32_t mem32[4]; ++ uint32_t memsize; ++}; ++ ++/** ++ * struct xxh32_state - private xxh64 state, do not use members directly ++ */ ++struct xxh64_state { ++ uint64_t total_len; ++ uint64_t v1; ++ uint64_t v2; ++ uint64_t v3; ++ uint64_t v4; ++ uint64_t mem64[4]; ++ uint32_t memsize; ++}; ++ ++/** ++ * xxh32_reset() - reset the xxh32 state to start a new hashing operation ++ * ++ * @state: The xxh32 state to reset. ++ * @seed: Initialize the hash state with this seed. ++ * ++ * Call this function on any xxh32_state to prepare for a new hashing operation. ++ */ ++void xxh32_reset(struct xxh32_state *state, uint32_t seed); ++ ++/** ++ * xxh32_update() - hash the data given and update the xxh32 state ++ * ++ * @state: The xxh32 state to update. ++ * @input: The data to hash. ++ * @length: The length of the data to hash. ++ * ++ * After calling xxh32_reset() call xxh32_update() as many times as necessary. ++ * ++ * Return: Zero on success, otherwise an error code. ++ */ ++int xxh32_update(struct xxh32_state *state, const void *input, size_t length); ++ ++/** ++ * xxh32_digest() - produce the current xxh32 hash ++ * ++ * @state: Produce the current xxh32 hash of this state. ++ * ++ * A hash value can be produced at any time. It is still possible to continue ++ * inserting input into the hash state after a call to xxh32_digest(), and ++ * generate new hashes later on, by calling xxh32_digest() again. ++ * ++ * Return: The xxh32 hash stored in the state. ++ */ ++uint32_t xxh32_digest(const struct xxh32_state *state); ++ ++/** ++ * xxh64_reset() - reset the xxh64 state to start a new hashing operation ++ * ++ * @state: The xxh64 state to reset. ++ * @seed: Initialize the hash state with this seed. ++ */ ++void xxh64_reset(struct xxh64_state *state, uint64_t seed); ++ ++/** ++ * xxh64_update() - hash the data given and update the xxh64 state ++ * @state: The xxh64 state to update. ++ * @input: The data to hash. ++ * @length: The length of the data to hash. ++ * ++ * After calling xxh64_reset() call xxh64_update() as many times as necessary. ++ * ++ * Return: Zero on success, otherwise an error code. ++ */ ++int xxh64_update(struct xxh64_state *state, const void *input, size_t length); ++ ++/** ++ * xxh64_digest() - produce the current xxh64 hash ++ * ++ * @state: Produce the current xxh64 hash of this state. ++ * ++ * A hash value can be produced at any time. It is still possible to continue ++ * inserting input into the hash state after a call to xxh64_digest(), and ++ * generate new hashes later on, by calling xxh64_digest() again. ++ * ++ * Return: The xxh64 hash stored in the state. ++ */ ++uint64_t xxh64_digest(const struct xxh64_state *state); ++ ++/*-************************** ++ * Utils ++ ***************************/ ++ ++/** ++ * xxh32_copy_state() - copy the source state into the destination state ++ * ++ * @src: The source xxh32 state. ++ * @dst: The destination xxh32 state. ++ */ ++void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src); ++ ++/** ++ * xxh64_copy_state() - copy the source state into the destination state ++ * ++ * @src: The source xxh64 state. ++ * @dst: The destination xxh64 state. ++ */ ++void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src); ++ ++#endif /* __XENXXHASH_H__ */ +diff --git a/xen/lib/Makefile b/xen/lib/Makefile +index 42cf7a1164..1c2227cbfc 100644 +--- a/xen/lib/Makefile ++++ b/xen/lib/Makefile +@@ -1,1 +1,3 @@ lib-y += list-sort.o + obj-$(CONFIG_X86) += x86/ ++obj-$(CONFIG_X86) += xxhash32.o ++obj-$(CONFIG_X86) += xxhash64.o +diff --git a/xen/lib/xxhash32.c b/xen/lib/xxhash32.c +new file mode 100644 +index 0000000000..e8d403e5ce +--- /dev/null ++++ b/xen/lib/xxhash32.c +@@ -0,0 +1,259 @@ ++/* ++ * xxHash - Extremely Fast Hash algorithm ++ * Copyright (C) 2012-2016, Yann Collet. ++ * ++ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions are ++ * met: ++ * ++ * * Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * * Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following disclaimer ++ * in the documentation and/or other materials provided with the ++ * distribution. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ * ++ * This program is free software; you can redistribute it and/or modify it under ++ * the terms of the GNU General Public License version 2 as published by the ++ * Free Software Foundation. This program is dual-licensed; you may select ++ * either version 2 of the GNU General Public License ("GPL") or BSD license ++ * ("BSD"). ++ * ++ * You can contact the author at: ++ * - xxHash homepage: https://cyan4973.github.io/xxHash/ ++ * - xxHash source repository: https://github.com/Cyan4973/xxHash ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++/*-************************************* ++ * Macros ++ **************************************/ ++#define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r))) ++ ++#ifdef __LITTLE_ENDIAN ++# define XXH_CPU_LITTLE_ENDIAN 1 ++#else ++# define XXH_CPU_LITTLE_ENDIAN 0 ++#endif ++ ++/*-************************************* ++ * Constants ++ **************************************/ ++static const uint32_t PRIME32_1 = 2654435761U; ++static const uint32_t PRIME32_2 = 2246822519U; ++static const uint32_t PRIME32_3 = 3266489917U; ++static const uint32_t PRIME32_4 = 668265263U; ++static const uint32_t PRIME32_5 = 374761393U; ++ ++/*-************************** ++ * Utils ++ ***************************/ ++void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src) ++{ ++ memcpy(dst, src, sizeof(*dst)); ++} ++ ++/*-*************************** ++ * Simple Hash Functions ++ ****************************/ ++static uint32_t xxh32_round(uint32_t seed, const uint32_t input) ++{ ++ seed += input * PRIME32_2; ++ seed = xxh_rotl32(seed, 13); ++ seed *= PRIME32_1; ++ return seed; ++} ++ ++uint32_t xxh32(const void *input, const size_t len, const uint32_t seed) ++{ ++ const uint8_t *p = (const uint8_t *)input; ++ const uint8_t *b_end = p + len; ++ uint32_t h32; ++ ++ if (len >= 16) { ++ const uint8_t *const limit = b_end - 16; ++ uint32_t v1 = seed + PRIME32_1 + PRIME32_2; ++ uint32_t v2 = seed + PRIME32_2; ++ uint32_t v3 = seed + 0; ++ uint32_t v4 = seed - PRIME32_1; ++ ++ do { ++ v1 = xxh32_round(v1, get_unaligned_le32(p)); ++ p += 4; ++ v2 = xxh32_round(v2, get_unaligned_le32(p)); ++ p += 4; ++ v3 = xxh32_round(v3, get_unaligned_le32(p)); ++ p += 4; ++ v4 = xxh32_round(v4, get_unaligned_le32(p)); ++ p += 4; ++ } while (p <= limit); ++ ++ h32 = xxh_rotl32(v1, 1) + xxh_rotl32(v2, 7) + ++ xxh_rotl32(v3, 12) + xxh_rotl32(v4, 18); ++ } else { ++ h32 = seed + PRIME32_5; ++ } ++ ++ h32 += (uint32_t)len; ++ ++ while (p + 4 <= b_end) { ++ h32 += get_unaligned_le32(p) * PRIME32_3; ++ h32 = xxh_rotl32(h32, 17) * PRIME32_4; ++ p += 4; ++ } ++ ++ while (p < b_end) { ++ h32 += (*p) * PRIME32_5; ++ h32 = xxh_rotl32(h32, 11) * PRIME32_1; ++ p++; ++ } ++ ++ h32 ^= h32 >> 15; ++ h32 *= PRIME32_2; ++ h32 ^= h32 >> 13; ++ h32 *= PRIME32_3; ++ h32 ^= h32 >> 16; ++ ++ return h32; ++} ++ ++/*-************************************************** ++ * Advanced Hash Functions ++ ***************************************************/ ++void xxh32_reset(struct xxh32_state *statePtr, const uint32_t seed) ++{ ++ /* use a local state for memcpy() to avoid strict-aliasing warnings */ ++ struct xxh32_state state; ++ ++ memset(&state, 0, sizeof(state)); ++ state.v1 = seed + PRIME32_1 + PRIME32_2; ++ state.v2 = seed + PRIME32_2; ++ state.v3 = seed + 0; ++ state.v4 = seed - PRIME32_1; ++ memcpy(statePtr, &state, sizeof(state)); ++} ++ ++int xxh32_update(struct xxh32_state *state, const void *input, const size_t len) ++{ ++ const uint8_t *p = (const uint8_t *)input; ++ const uint8_t *const b_end = p + len; ++ ++ if (input == NULL) ++ return -EINVAL; ++ ++ state->total_len_32 += (uint32_t)len; ++ state->large_len |= (len >= 16) | (state->total_len_32 >= 16); ++ ++ if (state->memsize + len < 16) { /* fill in tmp buffer */ ++ memcpy((uint8_t *)(state->mem32) + state->memsize, input, len); ++ state->memsize += (uint32_t)len; ++ return 0; ++ } ++ ++ if (state->memsize) { /* some data left from previous update */ ++ const uint32_t *p32 = state->mem32; ++ ++ memcpy((uint8_t *)(state->mem32) + state->memsize, input, ++ 16 - state->memsize); ++ ++ state->v1 = xxh32_round(state->v1, get_unaligned_le32(p32)); ++ p32++; ++ state->v2 = xxh32_round(state->v2, get_unaligned_le32(p32)); ++ p32++; ++ state->v3 = xxh32_round(state->v3, get_unaligned_le32(p32)); ++ p32++; ++ state->v4 = xxh32_round(state->v4, get_unaligned_le32(p32)); ++ p32++; ++ ++ p += 16-state->memsize; ++ state->memsize = 0; ++ } ++ ++ if (p <= b_end - 16) { ++ const uint8_t *const limit = b_end - 16; ++ uint32_t v1 = state->v1; ++ uint32_t v2 = state->v2; ++ uint32_t v3 = state->v3; ++ uint32_t v4 = state->v4; ++ ++ do { ++ v1 = xxh32_round(v1, get_unaligned_le32(p)); ++ p += 4; ++ v2 = xxh32_round(v2, get_unaligned_le32(p)); ++ p += 4; ++ v3 = xxh32_round(v3, get_unaligned_le32(p)); ++ p += 4; ++ v4 = xxh32_round(v4, get_unaligned_le32(p)); ++ p += 4; ++ } while (p <= limit); ++ ++ state->v1 = v1; ++ state->v2 = v2; ++ state->v3 = v3; ++ state->v4 = v4; ++ } ++ ++ if (p < b_end) { ++ memcpy(state->mem32, p, (size_t)(b_end-p)); ++ state->memsize = (uint32_t)(b_end-p); ++ } ++ ++ return 0; ++} ++ ++uint32_t xxh32_digest(const struct xxh32_state *state) ++{ ++ const uint8_t *p = (const uint8_t *)state->mem32; ++ const uint8_t *const b_end = (const uint8_t *)(state->mem32) + ++ state->memsize; ++ uint32_t h32; ++ ++ if (state->large_len) { ++ h32 = xxh_rotl32(state->v1, 1) + xxh_rotl32(state->v2, 7) + ++ xxh_rotl32(state->v3, 12) + xxh_rotl32(state->v4, 18); ++ } else { ++ h32 = state->v3 /* == seed */ + PRIME32_5; ++ } ++ ++ h32 += state->total_len_32; ++ ++ while (p + 4 <= b_end) { ++ h32 += get_unaligned_le32(p) * PRIME32_3; ++ h32 = xxh_rotl32(h32, 17) * PRIME32_4; ++ p += 4; ++ } ++ ++ while (p < b_end) { ++ h32 += (*p) * PRIME32_5; ++ h32 = xxh_rotl32(h32, 11) * PRIME32_1; ++ p++; ++ } ++ ++ h32 ^= h32 >> 15; ++ h32 *= PRIME32_2; ++ h32 ^= h32 >> 13; ++ h32 *= PRIME32_3; ++ h32 ^= h32 >> 16; ++ ++ return h32; ++} ++ +diff --git a/xen/lib/xxhash64.c b/xen/lib/xxhash64.c +new file mode 100644 +index 0000000000..ba6bcf152d +--- /dev/null ++++ b/xen/lib/xxhash64.c +@@ -0,0 +1,294 @@ ++/* ++ * xxHash - Extremely Fast Hash algorithm ++ * Copyright (C) 2012-2016, Yann Collet. ++ * ++ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions are ++ * met: ++ * ++ * * Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * * Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following disclaimer ++ * in the documentation and/or other materials provided with the ++ * distribution. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ * ++ * This program is free software; you can redistribute it and/or modify it under ++ * the terms of the GNU General Public License version 2 as published by the ++ * Free Software Foundation. This program is dual-licensed; you may select ++ * either version 2 of the GNU General Public License ("GPL") or BSD license ++ * ("BSD"). ++ * ++ * You can contact the author at: ++ * - xxHash homepage: https://cyan4973.github.io/xxHash/ ++ * - xxHash source repository: https://github.com/Cyan4973/xxHash ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++/*-************************************* ++ * Macros ++ **************************************/ ++#define xxh_rotl64(x, r) ((x << r) | (x >> (64 - r))) ++ ++#ifdef __LITTLE_ENDIAN ++# define XXH_CPU_LITTLE_ENDIAN 1 ++#else ++# define XXH_CPU_LITTLE_ENDIAN 0 ++#endif ++ ++/*-************************************* ++ * Constants ++ **************************************/ ++static const uint64_t PRIME64_1 = 11400714785074694791ULL; ++static const uint64_t PRIME64_2 = 14029467366897019727ULL; ++static const uint64_t PRIME64_3 = 1609587929392839161ULL; ++static const uint64_t PRIME64_4 = 9650029242287828579ULL; ++static const uint64_t PRIME64_5 = 2870177450012600261ULL; ++ ++/*-************************** ++ * Utils ++ ***************************/ ++void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src) ++{ ++ memcpy(dst, src, sizeof(*dst)); ++} ++ ++/*-*************************** ++ * Simple Hash Functions ++ ****************************/ ++static uint64_t xxh64_round(uint64_t acc, const uint64_t input) ++{ ++ acc += input * PRIME64_2; ++ acc = xxh_rotl64(acc, 31); ++ acc *= PRIME64_1; ++ return acc; ++} ++ ++static uint64_t xxh64_merge_round(uint64_t acc, uint64_t val) ++{ ++ val = xxh64_round(0, val); ++ acc ^= val; ++ acc = acc * PRIME64_1 + PRIME64_4; ++ return acc; ++} ++ ++uint64_t xxh64(const void *input, const size_t len, const uint64_t seed) ++{ ++ const uint8_t *p = (const uint8_t *)input; ++ const uint8_t *const b_end = p + len; ++ uint64_t h64; ++ ++ if (len >= 32) { ++ const uint8_t *const limit = b_end - 32; ++ uint64_t v1 = seed + PRIME64_1 + PRIME64_2; ++ uint64_t v2 = seed + PRIME64_2; ++ uint64_t v3 = seed + 0; ++ uint64_t v4 = seed - PRIME64_1; ++ ++ do { ++ v1 = xxh64_round(v1, get_unaligned_le64(p)); ++ p += 8; ++ v2 = xxh64_round(v2, get_unaligned_le64(p)); ++ p += 8; ++ v3 = xxh64_round(v3, get_unaligned_le64(p)); ++ p += 8; ++ v4 = xxh64_round(v4, get_unaligned_le64(p)); ++ p += 8; ++ } while (p <= limit); ++ ++ h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) + ++ xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18); ++ h64 = xxh64_merge_round(h64, v1); ++ h64 = xxh64_merge_round(h64, v2); ++ h64 = xxh64_merge_round(h64, v3); ++ h64 = xxh64_merge_round(h64, v4); ++ ++ } else { ++ h64 = seed + PRIME64_5; ++ } ++ ++ h64 += (uint64_t)len; ++ ++ while (p + 8 <= b_end) { ++ const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p)); ++ ++ h64 ^= k1; ++ h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4; ++ p += 8; ++ } ++ ++ if (p + 4 <= b_end) { ++ h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1; ++ h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; ++ p += 4; ++ } ++ ++ while (p < b_end) { ++ h64 ^= (*p) * PRIME64_5; ++ h64 = xxh_rotl64(h64, 11) * PRIME64_1; ++ p++; ++ } ++ ++ h64 ^= h64 >> 33; ++ h64 *= PRIME64_2; ++ h64 ^= h64 >> 29; ++ h64 *= PRIME64_3; ++ h64 ^= h64 >> 32; ++ ++ return h64; ++} ++ ++/*-************************************************** ++ * Advanced Hash Functions ++ ***************************************************/ ++void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed) ++{ ++ /* use a local state for memcpy() to avoid strict-aliasing warnings */ ++ struct xxh64_state state; ++ ++ memset(&state, 0, sizeof(state)); ++ state.v1 = seed + PRIME64_1 + PRIME64_2; ++ state.v2 = seed + PRIME64_2; ++ state.v3 = seed + 0; ++ state.v4 = seed - PRIME64_1; ++ memcpy(statePtr, &state, sizeof(state)); ++} ++ ++int xxh64_update(struct xxh64_state *state, const void *input, const size_t len) ++{ ++ const uint8_t *p = (const uint8_t *)input; ++ const uint8_t *const b_end = p + len; ++ ++ if (input == NULL) ++ return -EINVAL; ++ ++ state->total_len += len; ++ ++ if (state->memsize + len < 32) { /* fill in tmp buffer */ ++ memcpy(((uint8_t *)state->mem64) + state->memsize, input, len); ++ state->memsize += (uint32_t)len; ++ return 0; ++ } ++ ++ if (state->memsize) { /* tmp buffer is full */ ++ uint64_t *p64 = state->mem64; ++ ++ memcpy(((uint8_t *)p64) + state->memsize, input, ++ 32 - state->memsize); ++ ++ state->v1 = xxh64_round(state->v1, get_unaligned_le64(p64)); ++ p64++; ++ state->v2 = xxh64_round(state->v2, get_unaligned_le64(p64)); ++ p64++; ++ state->v3 = xxh64_round(state->v3, get_unaligned_le64(p64)); ++ p64++; ++ state->v4 = xxh64_round(state->v4, get_unaligned_le64(p64)); ++ ++ p += 32 - state->memsize; ++ state->memsize = 0; ++ } ++ ++ if (p + 32 <= b_end) { ++ const uint8_t *const limit = b_end - 32; ++ uint64_t v1 = state->v1; ++ uint64_t v2 = state->v2; ++ uint64_t v3 = state->v3; ++ uint64_t v4 = state->v4; ++ ++ do { ++ v1 = xxh64_round(v1, get_unaligned_le64(p)); ++ p += 8; ++ v2 = xxh64_round(v2, get_unaligned_le64(p)); ++ p += 8; ++ v3 = xxh64_round(v3, get_unaligned_le64(p)); ++ p += 8; ++ v4 = xxh64_round(v4, get_unaligned_le64(p)); ++ p += 8; ++ } while (p <= limit); ++ ++ state->v1 = v1; ++ state->v2 = v2; ++ state->v3 = v3; ++ state->v4 = v4; ++ } ++ ++ if (p < b_end) { ++ memcpy(state->mem64, p, (size_t)(b_end-p)); ++ state->memsize = (uint32_t)(b_end - p); ++ } ++ ++ return 0; ++} ++ ++uint64_t xxh64_digest(const struct xxh64_state *state) ++{ ++ const uint8_t *p = (const uint8_t *)state->mem64; ++ const uint8_t *const b_end = (const uint8_t *)state->mem64 + ++ state->memsize; ++ uint64_t h64; ++ ++ if (state->total_len >= 32) { ++ const uint64_t v1 = state->v1; ++ const uint64_t v2 = state->v2; ++ const uint64_t v3 = state->v3; ++ const uint64_t v4 = state->v4; ++ ++ h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) + ++ xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18); ++ h64 = xxh64_merge_round(h64, v1); ++ h64 = xxh64_merge_round(h64, v2); ++ h64 = xxh64_merge_round(h64, v3); ++ h64 = xxh64_merge_round(h64, v4); ++ } else { ++ h64 = state->v3 + PRIME64_5; ++ } ++ ++ h64 += (uint64_t)state->total_len; ++ ++ while (p + 8 <= b_end) { ++ const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p)); ++ ++ h64 ^= k1; ++ h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4; ++ p += 8; ++ } ++ ++ if (p + 4 <= b_end) { ++ h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1; ++ h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; ++ p += 4; ++ } ++ ++ while (p < b_end) { ++ h64 ^= (*p) * PRIME64_5; ++ h64 = xxh_rotl64(h64, 11) * PRIME64_1; ++ p++; ++ } ++ ++ h64 ^= h64 >> 33; ++ h64 *= PRIME64_2; ++ h64 ^= h64 >> 29; ++ h64 *= PRIME64_3; ++ h64 ^= h64 >> 32; ++ ++ return h64; ++} +-- +2.20.1 + diff --git a/xen.git-7c9f81687ad611515474b1c17afc2f79f19faef5.patch b/xen.git-7c9f81687ad611515474b1c17afc2f79f19faef5.patch new file mode 100644 index 0000000..42ee5bd --- /dev/null +++ b/xen.git-7c9f81687ad611515474b1c17afc2f79f19faef5.patch @@ -0,0 +1,280 @@ +From 7c9f81687ad611515474b1c17afc2f79f19faef5 Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Mon, 18 Jan 2021 12:09:13 +0100 +Subject: [PATCH] introduce unaligned.h + +Rather than open-coding commonly used constructs in yet more places when +pulling in zstd decompression support (and its xxhash prereq), pull out +the custom bits into a commonly used header (for the hypervisor build; +the tool stack and stubdom builds of libxenguest will still remain in +need of similarly taking care of). For now this is limited to x86, where +custom logic isn't needed (considering this is going to be used in init +code only, even using alternatives patching to use MOVBE doesn't seem +worthwhile). + +For Arm64 with CONFIG_ACPI=y (due to efi-dom0.c's re-use of xz/crc32.c) +drop the not really necessary inclusion of xz's private.h. + +No change in generated code. + +Signed-off-by: Jan Beulich +Acked-by: Andrew Cooper +--- + xen/common/lz4/defs.h | 9 ++-- + xen/common/lzo.c | 7 ++- + xen/common/unlzo.c | 19 ++------ + xen/common/xz/crc32.c | 2 - + xen/common/xz/private.h | 23 +++------- + xen/include/asm-x86/unaligned.h | 6 +++ + xen/include/xen/unaligned.h | 79 +++++++++++++++++++++++++++++++++ + 7 files changed, 104 insertions(+), 41 deletions(-) + create mode 100644 xen/include/asm-x86/unaligned.h + create mode 100644 xen/include/xen/unaligned.h + +diff --git a/xen/common/lz4/defs.h b/xen/common/lz4/defs.h +index d886a4e122..4fbea2ac3d 100644 +--- a/xen/common/lz4/defs.h ++++ b/xen/common/lz4/defs.h +@@ -10,18 +10,21 @@ + + #ifdef __XEN__ + #include +-#endif ++#include ++#else + +-static inline u16 INIT get_unaligned_le16(const void *p) ++static inline u16 get_unaligned_le16(const void *p) + { + return le16_to_cpup(p); + } + +-static inline u32 INIT get_unaligned_le32(const void *p) ++static inline u32 get_unaligned_le32(const void *p) + { + return le32_to_cpup(p); + } + ++#endif ++ + /* + * Detects 64 bits mode + */ +diff --git a/xen/common/lzo.c b/xen/common/lzo.c +index f4c0ad8530..e4841794f4 100644 +--- a/xen/common/lzo.c ++++ b/xen/common/lzo.c +@@ -97,13 +97,12 @@ + #ifdef __XEN__ + #include + #include ++#include ++#else ++#define get_unaligned_le16(_p) (*(u16 *)(_p)) + #endif + + #include +-#define get_unaligned(_p) (*(_p)) +-#define put_unaligned(_val,_p) (*(_p)=_val) +-#define get_unaligned_le16(_p) (*(u16 *)(_p)) +-#define get_unaligned_le32(_p) (*(u32 *)(_p)) + + #include "decompress.h" + +diff --git a/xen/common/unlzo.c b/xen/common/unlzo.c +index 5ae6cf911e..11f64fcf3b 100644 +--- a/xen/common/unlzo.c ++++ b/xen/common/unlzo.c +@@ -34,30 +34,19 @@ + + #ifdef __XEN__ + #include +-#endif ++#include ++#else + +-#if 1 /* ndef CONFIG_??? */ +-static inline u16 INIT get_unaligned_be16(void *p) ++static inline u16 get_unaligned_be16(const void *p) + { + return be16_to_cpup(p); + } + +-static inline u32 INIT get_unaligned_be32(void *p) ++static inline u32 get_unaligned_be32(const void *p) + { + return be32_to_cpup(p); + } +-#else +-#include +- +-static inline u16 INIT get_unaligned_be16(void *p) +-{ +- return be16_to_cpu(__get_unaligned(p, 2)); +-} + +-static inline u32 INIT get_unaligned_be32(void *p) +-{ +- return be32_to_cpu(__get_unaligned(p, 4)); +-} + #endif + + static const unsigned char lzop_magic[] = { +diff --git a/xen/common/xz/crc32.c b/xen/common/xz/crc32.c +index af08ae2cf6..0708b61638 100644 +--- a/xen/common/xz/crc32.c ++++ b/xen/common/xz/crc32.c +@@ -15,8 +15,6 @@ + * but they are bigger and use more memory for the lookup table. + */ + +-#include "private.h" +- + XZ_EXTERN uint32_t INITDATA xz_crc32_table[256]; + + XZ_EXTERN void INIT xz_crc32_init(void) +diff --git a/xen/common/xz/private.h b/xen/common/xz/private.h +index 7ea2489229..511343fcc2 100644 +--- a/xen/common/xz/private.h ++++ b/xen/common/xz/private.h +@@ -13,34 +13,23 @@ + #ifdef __XEN__ + #include + #include +-#endif +- +-#define get_le32(p) le32_to_cpup((const uint32_t *)(p)) ++#include ++#else + +-#if 1 /* ndef CONFIG_??? */ +-static inline u32 INIT get_unaligned_le32(void *p) ++static inline u32 get_unaligned_le32(const void *p) + { + return le32_to_cpup(p); + } + +-static inline void INIT put_unaligned_le32(u32 val, void *p) ++static inline void put_unaligned_le32(u32 val, void *p) + { + *(__force __le32*)p = cpu_to_le32(val); + } +-#else +-#include +- +-static inline u32 INIT get_unaligned_le32(void *p) +-{ +- return le32_to_cpu(__get_unaligned(p, 4)); +-} + +-static inline void INIT put_unaligned_le32(u32 val, void *p) +-{ +- __put_unaligned(cpu_to_le32(val), p, 4); +-} + #endif + ++#define get_le32(p) le32_to_cpup((const uint32_t *)(p)) ++ + #define false 0 + #define true 1 + +diff --git a/xen/include/asm-x86/unaligned.h b/xen/include/asm-x86/unaligned.h +new file mode 100644 +index 0000000000..6070801d4a +--- /dev/null ++++ b/xen/include/asm-x86/unaligned.h +@@ -0,0 +1,6 @@ ++#ifndef __ASM_UNALIGNED_H__ ++#define __ASM_UNALIGNED_H__ ++ ++#include ++ ++#endif /* __ASM_UNALIGNED_H__ */ +diff --git a/xen/include/xen/unaligned.h b/xen/include/xen/unaligned.h +new file mode 100644 +index 0000000000..eef7ec73b6 +--- /dev/null ++++ b/xen/include/xen/unaligned.h +@@ -0,0 +1,79 @@ ++/* ++ * This header can be used by architectures where unaligned accesses work ++ * without faulting, and at least reasonably efficiently. Other architectures ++ * will need to have a custom asm/unaligned.h. ++ */ ++#ifndef __ASM_UNALIGNED_H__ ++#error "xen/unaligned.h should not be included directly - include asm/unaligned.h instead" ++#endif ++ ++#ifndef __XEN_UNALIGNED_H__ ++#define __XEN_UNALIGNED_H__ ++ ++#include ++#include ++ ++#define get_unaligned(p) (*(p)) ++#define put_unaligned(val, p) (*(p) = (val)) ++ ++static inline uint16_t get_unaligned_be16(const void *p) ++{ ++ return be16_to_cpup(p); ++} ++ ++static inline void put_unaligned_be16(uint16_t val, void *p) ++{ ++ *(__force __be16*)p = cpu_to_be16(val); ++} ++ ++static inline uint32_t get_unaligned_be32(const void *p) ++{ ++ return be32_to_cpup(p); ++} ++ ++static inline void put_unaligned_be32(uint32_t val, void *p) ++{ ++ *(__force __be32*)p = cpu_to_be32(val); ++} ++ ++static inline uint64_t get_unaligned_be64(const void *p) ++{ ++ return be64_to_cpup(p); ++} ++ ++static inline void put_unaligned_be64(uint64_t val, void *p) ++{ ++ *(__force __be64*)p = cpu_to_be64(val); ++} ++ ++static inline uint16_t get_unaligned_le16(const void *p) ++{ ++ return le16_to_cpup(p); ++} ++ ++static inline void put_unaligned_le16(uint16_t val, void *p) ++{ ++ *(__force __le16*)p = cpu_to_le16(val); ++} ++ ++static inline uint32_t get_unaligned_le32(const void *p) ++{ ++ return le32_to_cpup(p); ++} ++ ++static inline void put_unaligned_le32(uint32_t val, void *p) ++{ ++ *(__force __le32*)p = cpu_to_le32(val); ++} ++ ++static inline uint64_t get_unaligned_le64(const void *p) ++{ ++ return le64_to_cpup(p); ++} ++ ++static inline void put_unaligned_le64(uint64_t val, void *p) ++{ ++ *(__force __le64*)p = cpu_to_le64(val); ++} ++ ++#endif /* __XEN_UNALIGNED_H__ */ +-- +2.20.1 + diff --git a/xen.git-8169f82049efb5b2044b33aa482ba3a136b7804d.patch b/xen.git-8169f82049efb5b2044b33aa482ba3a136b7804d.patch new file mode 100644 index 0000000..4d4f5b3 --- /dev/null +++ b/xen.git-8169f82049efb5b2044b33aa482ba3a136b7804d.patch @@ -0,0 +1,821 @@ +From 8169f82049efb5b2044b33aa482ba3a136b7804d Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Tue, 26 Jan 2021 14:16:34 +0100 +Subject: [PATCH] libxenguest: support zstd compressed kernels + +This follows the logic used for other decompression methods utilizing an +external library, albeit here we can't ignore the 32-bit size field +appended to the compressed image - its presence causes decompression to +fail. Leverage the field instead to allocate the output buffer in one +go, i.e. without incrementally realloc()ing. + +As far as configure.ac goes, I'm pretty sure there is a better (more +"standard") way of using PKG_CHECK_MODULES(). The construct also gets +put next to the other decompression library checks, albeit I think they +all ought to be x86-specific (e.g. placed in the existing case block a +few lines down). + +Note that, where possible, instead of #ifdef-ing xen/*.h inclusions, +they get removed. + +Signed-off-by: Jan Beulich +Acked-by: Wei Liu +Reviewed-by: Ian Jackson +Release-Acked-by: Ian Jackson +--- + README | 2 + + tools/configure | 79 ++++++++++++++++ + tools/configure.ac | 2 + + tools/libs/guest/Makefile | 1 + + tools/libs/guest/xg_dom_bzimageloader.c | 90 +++++++++++++++++++ + tools/libs/guest/xg_dom_decompress_unsafe.h | 2 + + .../guest/xg_dom_decompress_unsafe_zstd.c | 45 ++++++++++ + xen/common/zstd/decompress.c | 67 +++++++++----- + xen/common/zstd/error_private.h | 5 -- + xen/common/zstd/fse.h | 5 -- + xen/common/zstd/fse_decompress.c | 2 - + xen/common/zstd/huf.h | 3 - + xen/common/zstd/huf_decompress.c | 2 - + xen/common/zstd/mem.h | 2 + + xen/common/zstd/zstd_internal.h | 4 + + xen/include/xen/unaligned.h | 2 + + xen/lib/xxhash64.c | 2 + + 17 files changed, 276 insertions(+), 39 deletions(-) + create mode 100644 tools/libs/guest/xg_dom_decompress_unsafe_zstd.c + +diff --git a/README b/README +index 0e4787c1a6..33cdf6b826 100644 +--- a/README ++++ b/README +@@ -84,6 +84,8 @@ disabled at compile time: + * 16-bit x86 assembler, loader and compiler for qemu-traditional / rombios + (dev86 rpm or bin86 & bcc debs) + * Development install of liblzma for rombios ++ * Development install of libbz2, liblzma, liblzo2, and libzstd for DomU ++ kernel decompression. + + Second, you need to acquire a suitable kernel for use in domain 0. If + possible you should use a kernel provided by your OS distributor. If +diff --git a/tools/configure b/tools/configure +index 131112c41e..42c0d05bc1 100755 +--- a/tools/configure ++++ b/tools/configure +@@ -643,6 +643,8 @@ PTHREAD_CFLAGS + EXTFS_LIBS + system_aio + zlib ++libzstd_LIBS ++libzstd_CFLAGS + FETCHER + FTP + FALSE +@@ -857,6 +859,8 @@ glib_CFLAGS + glib_LIBS + pixman_CFLAGS + pixman_LIBS ++libzstd_CFLAGS ++libzstd_LIBS + LIBNL3_CFLAGS + LIBNL3_LIBS + SYSTEMD_CFLAGS +@@ -1605,6 +1609,10 @@ Some influential environment variables: + pixman_CFLAGS + C compiler flags for pixman, overriding pkg-config + pixman_LIBS linker flags for pixman, overriding pkg-config ++ libzstd_CFLAGS ++ C compiler flags for libzstd, overriding pkg-config ++ libzstd_LIBS ++ linker flags for libzstd, overriding pkg-config + LIBNL3_CFLAGS + C compiler flags for LIBNL3, overriding pkg-config + LIBNL3_LIBS linker flags for LIBNL3, overriding pkg-config +@@ -8744,6 +8752,77 @@ fi + + + ++pkg_failed=no ++{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for libzstd" >&5 ++$as_echo_n "checking for libzstd... " >&6; } ++ ++if test -n "$libzstd_CFLAGS"; then ++ pkg_cv_libzstd_CFLAGS="$libzstd_CFLAGS" ++ elif test -n "$PKG_CONFIG"; then ++ if test -n "$PKG_CONFIG" && \ ++ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libzstd\""; } >&5 ++ ($PKG_CONFIG --exists --print-errors "libzstd") 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; then ++ pkg_cv_libzstd_CFLAGS=`$PKG_CONFIG --cflags "libzstd" 2>/dev/null` ++ test "x$?" != "x0" && pkg_failed=yes ++else ++ pkg_failed=yes ++fi ++ else ++ pkg_failed=untried ++fi ++if test -n "$libzstd_LIBS"; then ++ pkg_cv_libzstd_LIBS="$libzstd_LIBS" ++ elif test -n "$PKG_CONFIG"; then ++ if test -n "$PKG_CONFIG" && \ ++ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libzstd\""; } >&5 ++ ($PKG_CONFIG --exists --print-errors "libzstd") 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; then ++ pkg_cv_libzstd_LIBS=`$PKG_CONFIG --libs "libzstd" 2>/dev/null` ++ test "x$?" != "x0" && pkg_failed=yes ++else ++ pkg_failed=yes ++fi ++ else ++ pkg_failed=untried ++fi ++ ++ ++ ++if test $pkg_failed = yes; then ++ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 ++$as_echo "no" >&6; } ++ ++if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then ++ _pkg_short_errors_supported=yes ++else ++ _pkg_short_errors_supported=no ++fi ++ if test $_pkg_short_errors_supported = yes; then ++ libzstd_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libzstd" 2>&1` ++ else ++ libzstd_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libzstd" 2>&1` ++ fi ++ # Put the nasty error message in config.log where it belongs ++ echo "$libzstd_PKG_ERRORS" >&5 ++ ++ true ++elif test $pkg_failed = untried; then ++ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 ++$as_echo "no" >&6; } ++ true ++else ++ libzstd_CFLAGS=$pkg_cv_libzstd_CFLAGS ++ libzstd_LIBS=$pkg_cv_libzstd_LIBS ++ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 ++$as_echo "yes" >&6; } ++ zlib="$zlib -DHAVE_ZSTD $libzstd_CFLAGS $libzstd_LIBS" ++fi ++ + + + ac_fn_c_check_header_mongrel "$LINENO" "ext2fs/ext2fs.h" "ac_cv_header_ext2fs_ext2fs_h" "$ac_includes_default" +diff --git a/tools/configure.ac b/tools/configure.ac +index ee8ba5ff24..5b328700e0 100644 +--- a/tools/configure.ac ++++ b/tools/configure.ac +@@ -414,6 +414,8 @@ AC_CHECK_LIB([lzma], [lzma_stream_decoder], [zlib="$zlib -DHAVE_LZMA -llzma"]) + AC_CHECK_HEADER([lzo/lzo1x.h], [ + AC_CHECK_LIB([lzo2], [lzo1x_decompress], [zlib="$zlib -DHAVE_LZO1X -llzo2"]) + ]) ++PKG_CHECK_MODULES([libzstd], [libzstd], ++ [zlib="$zlib -DHAVE_ZSTD $libzstd_CFLAGS $libzstd_LIBS"], [true]) + AC_SUBST(zlib) + AC_SUBST(system_aio) + AX_CHECK_EXTFS +diff --git a/tools/libs/guest/Makefile b/tools/libs/guest/Makefile +index 1c729040b3..58c50250df 100644 +--- a/tools/libxc/Makefile ++++ b/tools/libxc/Makefile +@@ -64,6 +64,7 @@ SRCS-y += xg_dom_decompress_unsafe_bzip2.c + GUEST_SRCS-y += xc_dom_decompress_unsafe_lzma.c + GUEST_SRCS-y += xc_dom_decompress_unsafe_lzo1x.c + GUEST_SRCS-y += xc_dom_decompress_unsafe_xz.c ++GUEST_SRCS-y += xc_dom_decompress_unsafe_zstd.c + endif + + -include $(XEN_TARGET_ARCH)/Makefile +diff --git a/tools/libs/guest/xg_dom_bzimageloader.c b/tools/libs/guest/xg_dom_bzimageloader.c +index f959a77602..668881458c 100644 +--- a/tools/libxc/xc_dom_bzimageloader.c ++++ b/tools/libxc/xc_dom_bzimageloader.c +@@ -589,6 +589,85 @@ static int xc_try_lzo1x_decode( + + #endif + ++#if defined(HAVE_ZSTD) ++ ++#include ++ ++static int xc_try_zstd_decode( ++ struct xc_dom_image *dom, void **blob, size_t *size) ++{ ++ size_t outsize, insize, actual; ++ unsigned char *outbuf; ++ ++ /* Magic, descriptor byte, and trailing size field. */ ++ if ( *size <= 9 ) ++ { ++ DOMPRINTF("ZSTD: insufficient input data"); ++ return -1; ++ } ++ ++ insize = *size - 4; ++ outsize = get_unaligned_le32(*blob + insize); ++ ++ if ( xc_dom_kernel_check_size(dom, outsize) ) ++ { ++ DOMPRINTF("ZSTD: output too large"); ++ return -1; ++ } ++ ++ outbuf = malloc(outsize); ++ if ( !outbuf ) ++ { ++ DOMPRINTF("ZSTD: failed to alloc memory"); ++ return -1; ++ } ++ ++ actual = ZSTD_decompress(outbuf, outsize, *blob, insize); ++ ++ if ( ZSTD_isError(actual) ) ++ { ++ DOMPRINTF("ZSTD: error: %s", ZSTD_getErrorName(actual)); ++ free(outbuf); ++ return -1; ++ } ++ ++ if ( actual != outsize ) ++ { ++ DOMPRINTF("ZSTD: got 0x%zx bytes instead of 0x%zx", ++ actual, outsize); ++ free(outbuf); ++ return -1; ++ } ++ ++ if ( xc_dom_register_external(dom, outbuf, outsize) ) ++ { ++ DOMPRINTF("ZSTD: error registering stream output"); ++ free(outbuf); ++ return -1; ++ } ++ ++ DOMPRINTF("%s: ZSTD decompress OK, 0x%zx -> 0x%zx", ++ __FUNCTION__, insize, outsize); ++ ++ *blob = outbuf; ++ *size = outsize; ++ ++ return 0; ++} ++ ++#else /* !defined(HAVE_ZSTD) */ ++ ++static int xc_try_zstd_decode( ++ struct xc_dom_image *dom, void **blob, size_t *size) ++{ ++ xc_dom_panic(dom->xch, XC_INTERNAL_ERROR, ++ "%s: ZSTD decompress support unavailable\n", ++ __FUNCTION__); ++ return -1; ++} ++ ++#endif ++ + #else /* __MINIOS__ */ + + int xc_try_bzip2_decode(struct xc_dom_image *dom, void **blob, size_t *size); +@@ -736,6 +815,17 @@ static int xc_dom_probe_bzimage_kernel(struct xc_dom_image *dom) + return -EINVAL; + } + } ++ else if ( check_magic(dom, "\x28\xb5\x2f\xfd", 4) ) ++ { ++ ret = xc_try_zstd_decode(dom, &dom->kernel_blob, &dom->kernel_size); ++ if ( ret < 0 ) ++ { ++ xc_dom_panic(dom->xch, XC_INVALID_KERNEL, ++ "%s unable to ZSTD decompress kernel", ++ __FUNCTION__); ++ return -EINVAL; ++ } ++ } + else if ( check_magic(dom, "\135\000", 2) ) + { + ret = xc_try_lzma_decode(dom, &dom->kernel_blob, &dom->kernel_size); +diff --git a/tools/libs/guest/xg_dom_decompress_unsafe.h b/tools/libs/guest/xg_dom_decompress_unsafe.h +index 5dc18c896e..969846cb32 100644 +--- a/tools/libxc/xc_dom_decompress_unsafe.h ++++ b/tools/libxc/xc_dom_decompress_unsafe.h +@@ -16,3 +16,5 @@ int xc_try_lzo1x_decode(struct xc_dom_image *dom, void **blob, size_t *size) + __attribute__((visibility("internal"))); + int xc_try_xz_decode(struct xc_dom_image *dom, void **blob, size_t *size) + __attribute__((visibility("internal"))); ++int xc_try_zstd_decode(struct xc_dom_image *dom, void **blob, size_t *size) ++ __attribute__((visibility("internal"))); +diff --git a/tools/libs/guest/xg_dom_decompress_unsafe_zstd.c b/tools/libs/guest/xg_dom_decompress_unsafe_zstd.c +new file mode 100644 +index 0000000000..52558d2ffc +--- /dev/null ++++ b/tools/libxc/xc_dom_decompress_unsafe_zstd.c +@@ -0,0 +1,45 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "xg_private.h" ++#include "xc_dom_decompress_unsafe.h" ++ ++typedef uint8_t u8; ++ ++typedef uint16_t __u16; ++typedef uint32_t __u32; ++typedef uint64_t __u64; ++ ++typedef uint16_t __le16; ++typedef uint32_t __le32; ++typedef uint64_t __le64; ++ ++typedef uint16_t __be16; ++typedef uint32_t __be32; ++typedef uint64_t __be64; ++ ++#define __attribute_const__ ++#define __force ++#define always_inline ++#define noinline ++ ++#undef ERROR ++ ++#define __BYTEORDER_HAS_U64__ ++#define __TYPES_H__ /* xen/types.h guard */ ++#include "../../xen/include/xen/byteorder/little_endian.h" ++#define __ASM_UNALIGNED_H__ /* asm/unaligned.h guard */ ++#include "../../xen/include/xen/unaligned.h" ++#include "../../xen/include/xen/xxhash.h" ++#include "../../xen/lib/xxhash64.c" ++#include "../../xen/common/unzstd.c" ++ ++int xc_try_zstd_decode( ++ struct xc_dom_image *dom, void **blob, size_t *size) ++{ ++ return xc_dom_decompress_unsafe(unzstd, dom, blob, size); ++} +diff --git a/xen/common/zstd/decompress.c b/xen/common/zstd/decompress.c +index 3d3ef136e5..b024910814 100644 +--- a/xen/common/zstd/decompress.c ++++ b/xen/common/zstd/decompress.c +@@ -33,7 +33,6 @@ + #include "huf.h" + #include "mem.h" /* low level memory routines */ + #include "zstd_internal.h" +-#include /* memcpy, memmove, memset */ + + #define ZSTD_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0) + +@@ -99,9 +98,12 @@ struct ZSTD_DCtx_s { + BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; + }; /* typedef'd to ZSTD_DCtx within "zstd.h" */ + +-size_t INIT ZSTD_DCtxWorkspaceBound(void) { return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_DCtx)); } ++STATIC size_t INIT ZSTD_DCtxWorkspaceBound(void) ++{ ++ return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_DCtx)); ++} + +-size_t INIT ZSTD_decompressBegin(ZSTD_DCtx *dctx) ++STATIC size_t INIT ZSTD_decompressBegin(ZSTD_DCtx *dctx) + { + dctx->expected = ZSTD_frameHeaderSize_prefix; + dctx->stage = ZSTDds_getFrameHeaderSize; +@@ -121,7 +123,7 @@ size_t INIT ZSTD_decompressBegin(ZSTD_DCtx *dctx) + return 0; + } + +-ZSTD_DCtx *INIT ZSTD_createDCtx_advanced(ZSTD_customMem customMem) ++STATIC ZSTD_DCtx *INIT ZSTD_createDCtx_advanced(ZSTD_customMem customMem) + { + ZSTD_DCtx *dctx; + +@@ -136,7 +138,7 @@ ZSTD_DCtx *INIT ZSTD_createDCtx_advanced(ZSTD_customMem customMem) + return dctx; + } + +-ZSTD_DCtx *INIT ZSTD_initDCtx(void *workspace, size_t workspaceSize) ++STATIC ZSTD_DCtx *INIT ZSTD_initDCtx(void *workspace, size_t workspaceSize) + { + ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize); + return ZSTD_createDCtx_advanced(stackMem); +@@ -150,11 +152,13 @@ size_t INIT ZSTD_freeDCtx(ZSTD_DCtx *dctx) + return 0; /* reserved as a potential error code in the future */ + } + ++#ifdef BUILD_DEAD_CODE + void INIT ZSTD_copyDCtx(ZSTD_DCtx *dstDCtx, const ZSTD_DCtx *srcDCtx) + { + size_t const workSpaceSize = (ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH) + ZSTD_frameHeaderSize_max; + memcpy(dstDCtx, srcDCtx, sizeof(ZSTD_DCtx) - workSpaceSize); /* no need to copy workspace */ + } ++#endif + + STATIC size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize); + STATIC size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict, +@@ -166,6 +170,7 @@ static void ZSTD_refDDict(ZSTD_DCtx *dstDCtx, const ZSTD_DDict *ddict); + * Decompression section + ***************************************************************/ + ++#ifdef BUILD_DEAD_CODE + /*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. +@@ -184,6 +189,7 @@ unsigned INIT ZSTD_isFrame(const void *buffer, size_t size) + } + return 0; + } ++#endif + + /** ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_frameHeaderSize_prefix. +@@ -206,7 +212,7 @@ static size_t INIT ZSTD_frameHeaderSize(const void *src, size_t srcSize) + * @return : 0, `fparamsPtr` is correctly filled, + * >0, `srcSize` is too small, result is expected `srcSize`, + * or an error code, which can be tested using ZSTD_isError() */ +-size_t INIT ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src, size_t srcSize) ++STATIC size_t INIT ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src, size_t srcSize) + { + const BYTE *ip = (const BYTE *)src; + +@@ -291,6 +297,7 @@ size_t INIT ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src, s + return 0; + } + ++#ifdef BUILD_DEAD_CODE + /** ZSTD_getFrameContentSize() : + * compatible with legacy mode + * @return : decompressed size of the single frame pointed to be `src` if known, otherwise +@@ -367,6 +374,7 @@ unsigned long long INIT ZSTD_findDecompressedSize(const void *src, size_t srcSiz + return totalDstSize; + } + } ++#endif /* BUILD_DEAD_CODE */ + + /** ZSTD_decodeFrameHeader() : + * `headerSize` must be the size provided by ZSTD_frameHeaderSize(). +@@ -393,7 +401,7 @@ typedef struct { + + /*! ZSTD_getcBlockSize() : + * Provides the size of compressed block from block header `src` */ +-size_t INIT ZSTD_getcBlockSize(const void *src, size_t srcSize, blockProperties_t *bpPtr) ++STATIC size_t INIT ZSTD_getcBlockSize(const void *src, size_t srcSize, blockProperties_t *bpPtr) + { + if (srcSize < ZSTD_blockHeaderSize) + return ERROR(srcSize_wrong); +@@ -431,7 +439,7 @@ static size_t INIT ZSTD_setRleBlock(void *dst, size_t dstCapacity, const void *s + + /*! ZSTD_decodeLiteralsBlock() : + @return : nb of bytes read from src (< srcSize ) */ +-size_t INIT ZSTD_decodeLiteralsBlock(ZSTD_DCtx *dctx, const void *src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ ++STATIC size_t INIT ZSTD_decodeLiteralsBlock(ZSTD_DCtx *dctx, const void *src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ + { + if (srcSize < MIN_CBLOCK_SIZE) + return ERROR(corruption_detected); +@@ -795,7 +803,7 @@ static size_t INIT ZSTD_buildSeqTable(FSE_DTable *DTableSpace, const FSE_DTable + } + } + +-size_t INIT ZSTD_decodeSeqHeaders(ZSTD_DCtx *dctx, int *nbSeqPtr, const void *src, size_t srcSize) ++STATIC size_t INIT ZSTD_decodeSeqHeaders(ZSTD_DCtx *dctx, int *nbSeqPtr, const void *src, size_t srcSize) + { + const BYTE *const istart = (const BYTE *const)src; + const BYTE *const iend = istart + srcSize; +@@ -1481,6 +1489,7 @@ static void INIT ZSTD_checkContinuity(ZSTD_DCtx *dctx, const void *dst) + } + } + ++#ifdef BUILD_DEAD_CODE + size_t INIT ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) + { + size_t dSize; +@@ -1498,8 +1507,9 @@ size_t INIT ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart, size_t blo + dctx->previousDstEnd = (const char *)blockStart + blockSize; + return blockSize; + } ++#endif /* BUILD_DEAD_CODE */ + +-size_t INIT ZSTD_generateNxBytes(void *dst, size_t dstCapacity, BYTE byte, size_t length) ++STATIC size_t INIT ZSTD_generateNxBytes(void *dst, size_t dstCapacity, BYTE byte, size_t length) + { + if (length > dstCapacity) + return ERROR(dstSize_tooSmall); +@@ -1512,7 +1522,7 @@ size_t INIT ZSTD_generateNxBytes(void *dst, size_t dstCapacity, BYTE byte, size_ + * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame + * `srcSize` must be at least as large as the frame contained + * @return : the compressed size of the frame starting at `src` */ +-size_t INIT ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) ++STATIC size_t INIT ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) + { + if (srcSize >= ZSTD_skippableHeaderSize && (ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { + return ZSTD_skippableHeaderSize + ZSTD_readLE32((const BYTE *)src + 4); +@@ -1709,12 +1719,12 @@ static size_t INIT ZSTD_decompressMultiFrame(ZSTD_DCtx *dctx, void *dst, size_t + return (BYTE *)dst - (BYTE *)dststart; + } + +-size_t INIT ZSTD_decompress_usingDict(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize) ++STATIC size_t INIT ZSTD_decompress_usingDict(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize) + { + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL); + } + +-size_t INIT ZSTD_decompressDCtx(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) ++STATIC size_t INIT ZSTD_decompressDCtx(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) + { + return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0); + } +@@ -1723,9 +1733,12 @@ size_t INIT ZSTD_decompressDCtx(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, + * Advanced Streaming Decompression API + * Bufferless and synchronous + ****************************************/ +-size_t INIT ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx) { return dctx->expected; } ++STATIC size_t INIT ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx) ++{ ++ return dctx->expected; ++} + +-ZSTD_nextInputType_e INIT ZSTD_nextInputType(ZSTD_DCtx *dctx) ++STATIC ZSTD_nextInputType_e INIT ZSTD_nextInputType(ZSTD_DCtx *dctx) + { + switch (dctx->stage) { + default: /* should not happen */ +@@ -1745,7 +1758,7 @@ int INIT ZSTD_isSkipFrame(ZSTD_DCtx *dctx) { return dctx->stage == ZSTDds_skipFr + /** ZSTD_decompressContinue() : + * @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) + * or an error code, which can be tested using ZSTD_isError() */ +-size_t INIT ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) ++STATIC size_t INIT ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) + { + /* Sanity check */ + if (srcSize != dctx->expected) +@@ -1971,7 +1984,7 @@ static size_t INIT ZSTD_decompress_insertDictionary(ZSTD_DCtx *dctx, const void + return ZSTD_refDictContent(dctx, dict, dictSize); + } + +-size_t INIT ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict, size_t dictSize) ++STATIC size_t INIT ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict, size_t dictSize) + { + CHECK_F(ZSTD_decompressBegin(dctx)); + if (dict && dictSize) +@@ -1991,7 +2004,9 @@ struct ZSTD_DDict_s { + ZSTD_customMem cMem; + }; /* typedef'd to ZSTD_DDict within "zstd.h" */ + ++#ifdef BUILD_DEAD_CODE + size_t INIT ZSTD_DDictWorkspaceBound(void) { return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_DDict)); } ++#endif + + static const void *INIT ZSTD_DDictDictContent(const ZSTD_DDict *ddict) { return ddict->dictContent; } + +@@ -2023,6 +2038,7 @@ static void INIT ZSTD_refDDict(ZSTD_DCtx *dstDCtx, const ZSTD_DDict *ddict) + } + } + ++#ifdef BUILD_DEAD_CODE + static size_t INIT ZSTD_loadEntropy_inDDict(ZSTD_DDict *ddict) + { + ddict->dictID = 0; +@@ -2090,6 +2106,7 @@ ZSTD_DDict *INIT ZSTD_initDDict(const void *dict, size_t dictSize, void *workspa + ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize); + return ZSTD_createDDict_advanced(dict, dictSize, 1, stackMem); + } ++#endif /* BUILD_DEAD_CODE */ + + size_t INIT ZSTD_freeDDict(ZSTD_DDict *ddict) + { +@@ -2103,6 +2120,7 @@ size_t INIT ZSTD_freeDDict(ZSTD_DDict *ddict) + } + } + ++#ifdef BUILD_DEAD_CODE + /*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. +@@ -2145,11 +2163,12 @@ unsigned INIT ZSTD_getDictID_fromFrame(const void *src, size_t srcSize) + return 0; + return zfp.dictID; + } ++#endif /* BUILD_DEAD_CODE */ + + /*! ZSTD_decompress_usingDDict() : + * Decompression using a pre-digested Dictionary + * Use dictionary without significant overhead. */ +-size_t INIT ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const ZSTD_DDict *ddict) ++STATIC size_t INIT ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const ZSTD_DDict *ddict) + { + /* pass content and size in case legacy frames are encountered */ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, NULL, 0, ddict); +@@ -2186,7 +2205,7 @@ struct ZSTD_DStream_s { + U32 hostageByte; + }; /* typedef'd to ZSTD_DStream within "zstd.h" */ + +-size_t INIT ZSTD_DStreamWorkspaceBound(size_t maxWindowSize) ++STATIC size_t INIT ZSTD_DStreamWorkspaceBound(size_t maxWindowSize) + { + size_t const blockSize = MIN(maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); + size_t const inBuffSize = blockSize; +@@ -2216,7 +2235,7 @@ static ZSTD_DStream *INIT ZSTD_createDStream_advanced(ZSTD_customMem customMem) + return zds; + } + +-ZSTD_DStream *INIT ZSTD_initDStream(size_t maxWindowSize, void *workspace, size_t workspaceSize) ++STATIC ZSTD_DStream *INIT ZSTD_initDStream(size_t maxWindowSize, void *workspace, size_t workspaceSize) + { + ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize); + ZSTD_DStream *zds = ZSTD_createDStream_advanced(stackMem); +@@ -2249,6 +2268,7 @@ ZSTD_DStream *INIT ZSTD_initDStream(size_t maxWindowSize, void *workspace, size_ + return zds; + } + ++#ifdef BUILD_DEAD_CODE + ZSTD_DStream *INIT ZSTD_initDStream_usingDDict(size_t maxWindowSize, const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize) + { + ZSTD_DStream *zds = ZSTD_initDStream(maxWindowSize, workspace, workspaceSize); +@@ -2257,6 +2277,7 @@ ZSTD_DStream *INIT ZSTD_initDStream_usingDDict(size_t maxWindowSize, const ZSTD_ + } + return zds; + } ++#endif + + size_t INIT ZSTD_freeDStream(ZSTD_DStream *zds) + { +@@ -2279,10 +2300,12 @@ size_t INIT ZSTD_freeDStream(ZSTD_DStream *zds) + + /* *** Initialization *** */ + ++#ifdef BUILD_DEAD_CODE + size_t INIT ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX + ZSTD_blockHeaderSize; } + size_t INIT ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; } ++#endif + +-size_t INIT ZSTD_resetDStream(ZSTD_DStream *zds) ++STATIC size_t INIT ZSTD_resetDStream(ZSTD_DStream *zds) + { + zds->stage = zdss_loadHeader; + zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; +@@ -2300,7 +2323,7 @@ ZSTD_STATIC size_t INIT ZSTD_limitCopy(void *dst, size_t dstCapacity, const void + return length; + } + +-size_t INIT ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inBuffer *input) ++STATIC size_t INIT ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inBuffer *input) + { + const char *const istart = (const char *)(input->src) + input->pos; + const char *const iend = (const char *)(input->src) + input->size; +diff --git a/xen/common/zstd/error_private.h b/xen/common/zstd/error_private.h +index d07bf3cb9b..906d537e08 100644 +--- a/xen/common/zstd/error_private.h ++++ b/xen/common/zstd/error_private.h +@@ -19,11 +19,6 @@ + #ifndef ERROR_H_MODULE + #define ERROR_H_MODULE + +-/* **************************************** +-* Dependencies +-******************************************/ +-#include /* size_t */ +- + /** + * enum ZSTD_ErrorCode - zstd error codes + * +diff --git a/xen/common/zstd/fse.h b/xen/common/zstd/fse.h +index b86717c34d..5761e09f17 100644 +--- a/xen/common/zstd/fse.h ++++ b/xen/common/zstd/fse.h +@@ -40,11 +40,6 @@ + #ifndef FSE_H + #define FSE_H + +-/*-***************************************** +-* Dependencies +-******************************************/ +-#include /* size_t, ptrdiff_t */ +- + /*-***************************************** + * FSE_PUBLIC_API : control library symbols visibility + ******************************************/ +diff --git a/xen/common/zstd/fse_decompress.c b/xen/common/zstd/fse_decompress.c +index cc51206df6..6c61e9002e 100644 +--- a/xen/common/zstd/fse_decompress.c ++++ b/xen/common/zstd/fse_decompress.c +@@ -48,8 +48,6 @@ + #include "bitstream.h" + #include "fse.h" + #include "zstd_internal.h" +-#include +-#include /* memcpy, memset */ + + /* ************************************************************** + * Error Management +diff --git a/xen/common/zstd/huf.h b/xen/common/zstd/huf.h +index a9d522c7bb..a498e0de28 100644 +--- a/xen/common/zstd/huf.h ++++ b/xen/common/zstd/huf.h +@@ -40,9 +40,6 @@ + #ifndef HUF_H_298734234 + #define HUF_H_298734234 + +-/* *** Dependencies *** */ +-#include /* size_t */ +- + /* *** Tool functions *** */ + #define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ + size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ +diff --git a/xen/common/zstd/huf_decompress.c b/xen/common/zstd/huf_decompress.c +index 341619e642..f6aca709a6 100644 +--- a/xen/common/zstd/huf_decompress.c ++++ b/xen/common/zstd/huf_decompress.c +@@ -48,8 +48,6 @@ + #include "bitstream.h" /* BIT_* */ + #include "fse.h" /* header compression */ + #include "huf.h" +-#include +-#include /* memcpy, memset */ + + /* ************************************************************** + * Error Management +diff --git a/xen/common/zstd/mem.h b/xen/common/zstd/mem.h +index 2883200696..2acae6a8ed 100644 +--- a/xen/common/zstd/mem.h ++++ b/xen/common/zstd/mem.h +@@ -20,9 +20,11 @@ + /*-**************************************** + * Dependencies + ******************************************/ ++#ifdef __XEN__ + #include /* memcpy */ + #include /* size_t, ptrdiff_t */ + #include ++#endif + + /*-**************************************** + * Compiler specifics +diff --git a/xen/common/zstd/zstd_internal.h b/xen/common/zstd/zstd_internal.h +index 7f8e5529eb..caa7aab406 100644 +--- a/xen/common/zstd/zstd_internal.h ++++ b/xen/common/zstd/zstd_internal.h +@@ -28,8 +28,10 @@ + ***************************************/ + #include "error_private.h" + #include "mem.h" ++#ifdef __XEN__ + #include + #include ++#endif + + #define ALIGN(x, a) ((x + (a) - 1) & ~((a) - 1)) + #define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) +@@ -95,8 +97,10 @@ typedef struct ZSTD_DStream_s ZSTD_DStream; + /*-************************************* + * shared macros + ***************************************/ ++#ifndef MIN + #define MIN(a, b) ((a) < (b) ? (a) : (b)) + #define MAX(a, b) ((a) > (b) ? (a) : (b)) ++#endif + #define CHECK_F(f) \ + { \ + size_t const errcod = f; \ +diff --git a/xen/include/xen/unaligned.h b/xen/include/xen/unaligned.h +index eef7ec73b6..0a2b16d05d 100644 +--- a/xen/include/xen/unaligned.h ++++ b/xen/include/xen/unaligned.h +@@ -10,8 +10,10 @@ + #ifndef __XEN_UNALIGNED_H__ + #define __XEN_UNALIGNED_H__ + ++#ifdef __XEN__ + #include + #include ++#endif + + #define get_unaligned(p) (*(p)) + #define put_unaligned(val, p) (*(p) = (val)) +diff --git a/xen/lib/xxhash64.c b/xen/lib/xxhash64.c +index ba6bcf152d..481e76fbcf 100644 +--- a/xen/lib/xxhash64.c ++++ b/xen/lib/xxhash64.c +@@ -38,11 +38,13 @@ + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + ++#ifdef __XEN__ + #include + #include + #include + #include + #include ++#endif + + /*-************************************* + * Macros +-- +2.20.1 + diff --git a/xen.git-d6627cf1b63ce57a6a7e2c1800dbc50eed742c32.patch b/xen.git-d6627cf1b63ce57a6a7e2c1800dbc50eed742c32.patch new file mode 100644 index 0000000..c8643cf --- /dev/null +++ b/xen.git-d6627cf1b63ce57a6a7e2c1800dbc50eed742c32.patch @@ -0,0 +1,6399 @@ +From d6627cf1b63ce57a6a7e2c1800dbc50eed742c32 Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Mon, 18 Jan 2021 12:12:23 +0100 +Subject: [PATCH] x86/Dom0: support zstd compressed kernels + +Taken from Linux at commit 1c4dd334df3a ("lib: decompress_unzstd: Limit +output size") for unzstd.c (renamed from decompress_unzstd.c) and +36f9ff9e03de ("lib: Fix fall-through warnings for Clang") for zstd/, +with bits from linux/zstd.h merged into suitable other headers. + +To limit the editing necessary, introduce ptrdiff_t. + +Signed-off-by: Jan Beulich +Acked-by: Andrew Cooper +--- + xen/common/Makefile | 2 +- + xen/common/decompress.c | 3 + + xen/common/unzstd.c | 308 ++++ + xen/common/zstd/bitstream.h | 380 +++++ + xen/common/zstd/decompress.c | 2496 ++++++++++++++++++++++++++++++ + xen/common/zstd/entropy_common.c | 243 +++ + xen/common/zstd/error_private.h | 110 ++ + xen/common/zstd/fse.h | 575 +++++++ + xen/common/zstd/fse_decompress.c | 324 ++++ + xen/common/zstd/huf.h | 212 +++ + xen/common/zstd/huf_decompress.c | 960 ++++++++++++ + xen/common/zstd/mem.h | 151 ++ + xen/common/zstd/zstd_common.c | 74 + + xen/common/zstd/zstd_internal.h | 372 +++++ + xen/include/asm-arm/types.h | 6 + + xen/include/asm-x86/types.h | 6 + + xen/include/xen/decompress.h | 2 +- + 17 files changed, 6222 insertions(+), 2 deletions(-) + create mode 100644 xen/common/unzstd.c + create mode 100644 xen/common/zstd/bitstream.h + create mode 100644 xen/common/zstd/decompress.c + create mode 100644 xen/common/zstd/entropy_common.c + create mode 100644 xen/common/zstd/error_private.h + create mode 100644 xen/common/zstd/fse.h + create mode 100644 xen/common/zstd/fse_decompress.c + create mode 100644 xen/common/zstd/huf.h + create mode 100644 xen/common/zstd/huf_decompress.c + create mode 100644 xen/common/zstd/mem.h + create mode 100644 xen/common/zstd/zstd_common.c + create mode 100644 xen/common/zstd/zstd_internal.h + +diff --git a/xen/common/Makefile b/xen/common/Makefile +index 7a4e652b57..d751315a87 100644 +--- a/xen/common/Makefile ++++ b/xen/common/Makefile +@@ -55,7 +55,7 @@ obj-bin-y += warning.init.o + obj-$(CONFIG_XENOPROF) += xenoprof.o + obj-y += xmalloc_tlsf.o + +-obj-bin-$(CONFIG_X86) += $(foreach n,decompress bunzip2 unxz unlzma lzo unlzo unlz4 earlycpio,$(n).init.o) ++obj-bin-$(CONFIG_X86) += $(foreach n,decompress bunzip2 unxz unlzma lzo unlzo unlz4 unzstd earlycpio,$(n).init.o) + + obj-$(CONFIG_COMPAT) += $(addprefix compat/,domain.o kernel.o memory.o multicall.o xlat.o) + +diff --git a/xen/common/decompress.c b/xen/common/decompress.c +index 9d6e0c4ab0..79e60f4802 100644 +--- a/xen/common/decompress.c ++++ b/xen/common/decompress.c +@@ -31,5 +31,8 @@ int __init decompress(void *inbuf, unsigned int len, void *outbuf) + if ( len >= 2 && !memcmp(inbuf, "\x02\x21", 2) ) + return unlz4(inbuf, len, NULL, NULL, outbuf, NULL, error); + ++ if ( len >= 4 && !memcmp(inbuf, "\x28\xb5\x2f\xfd", 4) ) ++ return unzstd(inbuf, len, NULL, NULL, outbuf, NULL, error); ++ + return 1; + } +diff --git a/xen/common/unzstd.c b/xen/common/unzstd.c +new file mode 100644 +index 0000000000..a107616427 +--- /dev/null ++++ b/xen/common/unzstd.c +@@ -0,0 +1,308 @@ ++// SPDX-License-Identifier: GPL-2.0 ++ ++/* ++ * Important notes about in-place decompression ++ * ++ * At least on x86, the kernel is decompressed in place: the compressed data ++ * is placed to the end of the output buffer, and the decompressor overwrites ++ * most of the compressed data. There must be enough safety margin to ++ * guarantee that the write position is always behind the read position. ++ * ++ * The safety margin for ZSTD with a 128 KB block size is calculated below. ++ * Note that the margin with ZSTD is bigger than with GZIP or XZ! ++ * ++ * The worst case for in-place decompression is that the beginning of ++ * the file is compressed extremely well, and the rest of the file is ++ * uncompressible. Thus, we must look for worst-case expansion when the ++ * compressor is encoding uncompressible data. ++ * ++ * The structure of the .zst file in case of a compresed kernel is as follows. ++ * Maximum sizes (as bytes) of the fields are in parenthesis. ++ * ++ * Frame Header: (18) ++ * Blocks: (N) ++ * Checksum: (4) ++ * ++ * The frame header and checksum overhead is at most 22 bytes. ++ * ++ * ZSTD stores the data in blocks. Each block has a header whose size is ++ * a 3 bytes. After the block header, there is up to 128 KB of payload. ++ * The maximum uncompressed size of the payload is 128 KB. The minimum ++ * uncompressed size of the payload is never less than the payload size ++ * (excluding the block header). ++ * ++ * The assumption, that the uncompressed size of the payload is never ++ * smaller than the payload itself, is valid only when talking about ++ * the payload as a whole. It is possible that the payload has parts where ++ * the decompressor consumes more input than it produces output. Calculating ++ * the worst case for this would be tricky. Instead of trying to do that, ++ * let's simply make sure that the decompressor never overwrites any bytes ++ * of the payload which it is currently reading. ++ * ++ * Now we have enough information to calculate the safety margin. We need ++ * - 22 bytes for the .zst file format headers; ++ * - 3 bytes per every 128 KiB of uncompressed size (one block header per ++ * block); and ++ * - 128 KiB (biggest possible zstd block size) to make sure that the ++ * decompressor never overwrites anything from the block it is currently ++ * reading. ++ * ++ * We get the following formula: ++ * ++ * safety_margin = 22 + uncompressed_size * 3 / 131072 + 131072 ++ * <= 22 + (uncompressed_size >> 15) + 131072 ++ */ ++ ++#include "decompress.h" ++ ++#include "zstd/entropy_common.c" ++#include "zstd/fse_decompress.c" ++#include "zstd/huf_decompress.c" ++#include "zstd/zstd_common.c" ++#include "zstd/decompress.c" ++ ++/* 128MB is the maximum window size supported by zstd. */ ++#define ZSTD_WINDOWSIZE_MAX (1 << ZSTD_WINDOWLOG_MAX) ++/* ++ * Size of the input and output buffers in multi-call mode. ++ * Pick a larger size because it isn't used during kernel decompression, ++ * since that is single pass, and we have to allocate a large buffer for ++ * zstd's window anyway. The larger size speeds up initramfs decompression. ++ */ ++#define ZSTD_IOBUF_SIZE (1 << 17) ++ ++static int INIT handle_zstd_error(size_t ret, void (*error)(const char *x)) ++{ ++ const int err = ZSTD_getErrorCode(ret); ++ ++ if (!ZSTD_isError(ret)) ++ return 0; ++ ++ switch (err) { ++ case ZSTD_error_memory_allocation: ++ error("ZSTD decompressor ran out of memory"); ++ break; ++ case ZSTD_error_prefix_unknown: ++ error("Input is not in the ZSTD format (wrong magic bytes)"); ++ break; ++ case ZSTD_error_dstSize_tooSmall: ++ case ZSTD_error_corruption_detected: ++ case ZSTD_error_checksum_wrong: ++ error("ZSTD-compressed data is corrupt"); ++ break; ++ default: ++ error("ZSTD-compressed data is probably corrupt"); ++ break; ++ } ++ return -1; ++} ++ ++/* ++ * Handle the case where we have the entire input and output in one segment. ++ * We can allocate less memory (no circular buffer for the sliding window), ++ * and avoid some memcpy() calls. ++ */ ++static int INIT decompress_single(const u8 *in_buf, long in_len, u8 *out_buf, ++ long out_len, unsigned int *in_pos, ++ void (*error)(const char *x)) ++{ ++ const size_t wksp_size = ZSTD_DCtxWorkspaceBound(); ++ void *wksp = large_malloc(wksp_size); ++ ZSTD_DCtx *dctx = ZSTD_initDCtx(wksp, wksp_size); ++ int err; ++ size_t ret; ++ ++ if (dctx == NULL) { ++ error("Out of memory while allocating ZSTD_DCtx"); ++ err = -1; ++ goto out; ++ } ++ /* ++ * Find out how large the frame actually is, there may be junk at ++ * the end of the frame that ZSTD_decompressDCtx() can't handle. ++ */ ++ ret = ZSTD_findFrameCompressedSize(in_buf, in_len); ++ err = handle_zstd_error(ret, error); ++ if (err) ++ goto out; ++ in_len = (long)ret; ++ ++ ret = ZSTD_decompressDCtx(dctx, out_buf, out_len, in_buf, in_len); ++ err = handle_zstd_error(ret, error); ++ if (err) ++ goto out; ++ ++ if (in_pos != NULL) ++ *in_pos = in_len; ++ ++ err = 0; ++out: ++ if (wksp != NULL) ++ large_free(wksp); ++ return err; ++} ++ ++STATIC int INIT unzstd(unsigned char *in_buf, unsigned int in_len, ++ int (*fill)(void*, unsigned int), ++ int (*flush)(void*, unsigned int), ++ unsigned char *out_buf, ++ unsigned int *in_pos, ++ void (*error)(const char *x)) ++{ ++ ZSTD_inBuffer in; ++ ZSTD_outBuffer out; ++ ZSTD_frameParams params; ++ void *in_allocated = NULL; ++ void *out_allocated = NULL; ++ void *wksp = NULL; ++ size_t wksp_size; ++ ZSTD_DStream *dstream; ++ int err; ++ size_t ret; ++ /* ++ * ZSTD decompression code won't be happy if the buffer size is so big ++ * that its end address overflows. When the size is not provided, make ++ * it as big as possible without having the end address overflow. ++ */ ++ unsigned long out_len = ULONG_MAX - (unsigned long)out_buf; ++ ++ if (fill == NULL && flush == NULL) ++ /* ++ * We can decompress faster and with less memory when we have a ++ * single chunk. ++ */ ++ return decompress_single(in_buf, in_len, out_buf, out_len, ++ in_pos, error); ++ ++ /* ++ * If in_buf is not provided, we must be using fill(), so allocate ++ * a large enough buffer. If it is provided, it must be at least ++ * ZSTD_IOBUF_SIZE large. ++ */ ++ if (in_buf == NULL) { ++ in_allocated = large_malloc(ZSTD_IOBUF_SIZE); ++ if (in_allocated == NULL) { ++ error("Out of memory while allocating input buffer"); ++ err = -1; ++ goto out; ++ } ++ in_buf = in_allocated; ++ in_len = 0; ++ } ++ /* Read the first chunk, since we need to decode the frame header. */ ++ if (fill != NULL) ++ in_len = fill(in_buf, ZSTD_IOBUF_SIZE); ++ if ((int)in_len < 0) { ++ error("ZSTD-compressed data is truncated"); ++ err = -1; ++ goto out; ++ } ++ /* Set the first non-empty input buffer. */ ++ in.src = in_buf; ++ in.pos = 0; ++ in.size = in_len; ++ /* Allocate the output buffer if we are using flush(). */ ++ if (flush != NULL) { ++ out_allocated = large_malloc(ZSTD_IOBUF_SIZE); ++ if (out_allocated == NULL) { ++ error("Out of memory while allocating output buffer"); ++ err = -1; ++ goto out; ++ } ++ out_buf = out_allocated; ++ out_len = ZSTD_IOBUF_SIZE; ++ } ++ /* Set the output buffer. */ ++ out.dst = out_buf; ++ out.pos = 0; ++ out.size = out_len; ++ ++ /* ++ * We need to know the window size to allocate the ZSTD_DStream. ++ * Since we are streaming, we need to allocate a buffer for the sliding ++ * window. The window size varies from 1 KB to ZSTD_WINDOWSIZE_MAX ++ * (8 MB), so it is important to use the actual value so as not to ++ * waste memory when it is smaller. ++ */ ++ ret = ZSTD_getFrameParams(¶ms, in.src, in.size); ++ err = handle_zstd_error(ret, error); ++ if (err) ++ goto out; ++ if (ret != 0) { ++ error("ZSTD-compressed data has an incomplete frame header"); ++ err = -1; ++ goto out; ++ } ++ if (params.windowSize > ZSTD_WINDOWSIZE_MAX) { ++ error("ZSTD-compressed data has too large a window size"); ++ err = -1; ++ goto out; ++ } ++ ++ /* ++ * Allocate the ZSTD_DStream now that we know how much memory is ++ * required. ++ */ ++ wksp_size = ZSTD_DStreamWorkspaceBound(params.windowSize); ++ wksp = large_malloc(wksp_size); ++ dstream = ZSTD_initDStream(params.windowSize, wksp, wksp_size); ++ if (dstream == NULL) { ++ error("Out of memory while allocating ZSTD_DStream"); ++ err = -1; ++ goto out; ++ } ++ ++ /* ++ * Decompression loop: ++ * Read more data if necessary (error if no more data can be read). ++ * Call the decompression function, which returns 0 when finished. ++ * Flush any data produced if using flush(). ++ */ ++ if (in_pos != NULL) ++ *in_pos = 0; ++ do { ++ /* ++ * If we need to reload data, either we have fill() and can ++ * try to get more data, or we don't and the input is truncated. ++ */ ++ if (in.pos == in.size) { ++ if (in_pos != NULL) ++ *in_pos += in.pos; ++ in_len = fill ? fill(in_buf, ZSTD_IOBUF_SIZE) : -1; ++ if ((int)in_len < 0) { ++ error("ZSTD-compressed data is truncated"); ++ err = -1; ++ goto out; ++ } ++ in.pos = 0; ++ in.size = in_len; ++ } ++ /* Returns zero when the frame is complete. */ ++ ret = ZSTD_decompressStream(dstream, &out, &in); ++ err = handle_zstd_error(ret, error); ++ if (err) ++ goto out; ++ /* Flush all of the data produced if using flush(). */ ++ if (flush != NULL && out.pos > 0) { ++ if (out.pos != flush(out.dst, out.pos)) { ++ error("Failed to flush()"); ++ err = -1; ++ goto out; ++ } ++ out.pos = 0; ++ } ++ } while (ret != 0); ++ ++ if (in_pos != NULL) ++ *in_pos += in.pos; ++ ++ err = 0; ++out: ++ if (in_allocated != NULL) ++ large_free(in_allocated); ++ if (out_allocated != NULL) ++ large_free(out_allocated); ++ if (wksp != NULL) ++ large_free(wksp); ++ return err; ++} +diff --git a/xen/common/zstd/bitstream.h b/xen/common/zstd/bitstream.h +new file mode 100644 +index 0000000000..2b06d4551f +--- /dev/null ++++ b/xen/common/zstd/bitstream.h +@@ -0,0 +1,380 @@ ++/* ++ * bitstream ++ * Part of FSE library ++ * header file (to include) ++ * Copyright (C) 2013-2016, Yann Collet. ++ * ++ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions are ++ * met: ++ * ++ * * Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * * Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following disclaimer ++ * in the documentation and/or other materials provided with the ++ * distribution. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ * ++ * This program is free software; you can redistribute it and/or modify it under ++ * the terms of the GNU General Public License version 2 as published by the ++ * Free Software Foundation. This program is dual-licensed; you may select ++ * either version 2 of the GNU General Public License ("GPL") or BSD license ++ * ("BSD"). ++ * ++ * You can contact the author at : ++ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy ++ */ ++#ifndef BITSTREAM_H_MODULE ++#define BITSTREAM_H_MODULE ++ ++/* ++* This API consists of small unitary functions, which must be inlined for best performance. ++* Since link-time-optimization is not available for all compilers, ++* these functions are defined into a .h to be included. ++*/ ++ ++/*-**************************************** ++* Dependencies ++******************************************/ ++#include "error_private.h" /* error codes and messages */ ++#include "mem.h" /* unaligned access routines */ ++ ++/*========================================= ++* Target specific ++=========================================*/ ++#define STREAM_ACCUMULATOR_MIN_32 25 ++#define STREAM_ACCUMULATOR_MIN_64 57 ++#define STREAM_ACCUMULATOR_MIN ((U32)(ZSTD_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) ++ ++/*-****************************************** ++* bitStream encoding API (write forward) ++********************************************/ ++/* bitStream can mix input from multiple sources. ++* A critical property of these streams is that they encode and decode in **reverse** direction. ++* So the first bit sequence you add will be the last to be read, like a LIFO stack. ++*/ ++typedef struct { ++ size_t bitContainer; ++ int bitPos; ++ char *startPtr; ++ char *ptr; ++ char *endPtr; ++} BIT_CStream_t; ++ ++ZSTD_STATIC size_t BIT_initCStream(BIT_CStream_t *bitC, void *dstBuffer, size_t dstCapacity); ++ZSTD_STATIC void BIT_addBits(BIT_CStream_t *bitC, size_t value, unsigned nbBits); ++ZSTD_STATIC void BIT_flushBits(BIT_CStream_t *bitC); ++ZSTD_STATIC size_t BIT_closeCStream(BIT_CStream_t *bitC); ++ ++/* Start with initCStream, providing the size of buffer to write into. ++* bitStream will never write outside of this buffer. ++* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. ++* ++* bits are first added to a local register. ++* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. ++* Writing data into memory is an explicit operation, performed by the flushBits function. ++* Hence keep track how many bits are potentially stored into local register to avoid register overflow. ++* After a flushBits, a maximum of 7 bits might still be stored into local register. ++* ++* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. ++* ++* Last operation is to close the bitStream. ++* The function returns the final size of CStream in bytes. ++* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) ++*/ ++ ++/*-******************************************** ++* bitStream decoding API (read backward) ++**********************************************/ ++typedef struct { ++ size_t bitContainer; ++ unsigned bitsConsumed; ++ const char *ptr; ++ const char *start; ++} BIT_DStream_t; ++ ++typedef enum { ++ BIT_DStream_unfinished = 0, ++ BIT_DStream_endOfBuffer = 1, ++ BIT_DStream_completed = 2, ++ BIT_DStream_overflow = 3 ++} BIT_DStream_status; /* result of BIT_reloadDStream() */ ++/* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ ++ ++ZSTD_STATIC size_t BIT_initDStream(BIT_DStream_t *bitD, const void *srcBuffer, size_t srcSize); ++ZSTD_STATIC size_t BIT_readBits(BIT_DStream_t *bitD, unsigned nbBits); ++ZSTD_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t *bitD); ++ZSTD_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t *bitD); ++ ++/* Start by invoking BIT_initDStream(). ++* A chunk of the bitStream is then stored into a local register. ++* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). ++* You can then retrieve bitFields stored into the local register, **in reverse order**. ++* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. ++* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. ++* Otherwise, it can be less than that, so proceed accordingly. ++* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). ++*/ ++ ++/*-**************************************** ++* unsafe API ++******************************************/ ++ZSTD_STATIC void BIT_addBitsFast(BIT_CStream_t *bitC, size_t value, unsigned nbBits); ++/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ ++ ++ZSTD_STATIC void BIT_flushBitsFast(BIT_CStream_t *bitC); ++/* unsafe version; does not check buffer overflow */ ++ ++ZSTD_STATIC size_t BIT_readBitsFast(BIT_DStream_t *bitD, unsigned nbBits); ++/* faster, but works only if nbBits >= 1 */ ++ ++/*-************************************************************** ++* Internal functions ++****************************************************************/ ++ZSTD_STATIC unsigned BIT_highbit32(register U32 val) { return 31 - __builtin_clz(val); } ++ ++/*===== Local Constants =====*/ ++static const unsigned BIT_mask[] = {0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, ++ 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, ++ 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF}; /* up to 26 bits */ ++ ++/*-************************************************************** ++* bitStream encoding ++****************************************************************/ ++/*! BIT_initCStream() : ++ * `dstCapacity` must be > sizeof(void*) ++ * @return : 0 if success, ++ otherwise an error code (can be tested using ERR_isError() ) */ ++ZSTD_STATIC size_t BIT_initCStream(BIT_CStream_t *bitC, void *startPtr, size_t dstCapacity) ++{ ++ bitC->bitContainer = 0; ++ bitC->bitPos = 0; ++ bitC->startPtr = (char *)startPtr; ++ bitC->ptr = bitC->startPtr; ++ bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr); ++ if (dstCapacity <= sizeof(bitC->ptr)) ++ return ERROR(dstSize_tooSmall); ++ return 0; ++} ++ ++/*! BIT_addBits() : ++ can add up to 26 bits into `bitC`. ++ Does not check for register overflow ! */ ++ZSTD_STATIC void BIT_addBits(BIT_CStream_t *bitC, size_t value, unsigned nbBits) ++{ ++ bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; ++ bitC->bitPos += nbBits; ++} ++ ++/*! BIT_addBitsFast() : ++ * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */ ++ZSTD_STATIC void BIT_addBitsFast(BIT_CStream_t *bitC, size_t value, unsigned nbBits) ++{ ++ bitC->bitContainer |= value << bitC->bitPos; ++ bitC->bitPos += nbBits; ++} ++ ++/*! BIT_flushBitsFast() : ++ * unsafe version; does not check buffer overflow */ ++ZSTD_STATIC void BIT_flushBitsFast(BIT_CStream_t *bitC) ++{ ++ size_t const nbBytes = bitC->bitPos >> 3; ++ ZSTD_writeLEST(bitC->ptr, bitC->bitContainer); ++ bitC->ptr += nbBytes; ++ bitC->bitPos &= 7; ++ bitC->bitContainer >>= nbBytes * 8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */ ++} ++ ++/*! BIT_flushBits() : ++ * safe version; check for buffer overflow, and prevents it. ++ * note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */ ++ZSTD_STATIC void BIT_flushBits(BIT_CStream_t *bitC) ++{ ++ size_t const nbBytes = bitC->bitPos >> 3; ++ ZSTD_writeLEST(bitC->ptr, bitC->bitContainer); ++ bitC->ptr += nbBytes; ++ if (bitC->ptr > bitC->endPtr) ++ bitC->ptr = bitC->endPtr; ++ bitC->bitPos &= 7; ++ bitC->bitContainer >>= nbBytes * 8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */ ++} ++ ++/*! BIT_closeCStream() : ++ * @return : size of CStream, in bytes, ++ or 0 if it could not fit into dstBuffer */ ++ZSTD_STATIC size_t BIT_closeCStream(BIT_CStream_t *bitC) ++{ ++ BIT_addBitsFast(bitC, 1, 1); /* endMark */ ++ BIT_flushBits(bitC); ++ ++ if (bitC->ptr >= bitC->endPtr) ++ return 0; /* doesn't fit within authorized budget : cancel */ ++ ++ return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); ++} ++ ++/*-******************************************************** ++* bitStream decoding ++**********************************************************/ ++/*! BIT_initDStream() : ++* Initialize a BIT_DStream_t. ++* `bitD` : a pointer to an already allocated BIT_DStream_t structure. ++* `srcSize` must be the *exact* size of the bitStream, in bytes. ++* @return : size of stream (== srcSize) or an errorCode if a problem is detected ++*/ ++ZSTD_STATIC size_t BIT_initDStream(BIT_DStream_t *bitD, const void *srcBuffer, size_t srcSize) ++{ ++ if (srcSize < 1) { ++ memset(bitD, 0, sizeof(*bitD)); ++ return ERROR(srcSize_wrong); ++ } ++ ++ if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ ++ bitD->start = (const char *)srcBuffer; ++ bitD->ptr = (const char *)srcBuffer + srcSize - sizeof(bitD->bitContainer); ++ bitD->bitContainer = ZSTD_readLEST(bitD->ptr); ++ { ++ BYTE const lastByte = ((const BYTE *)srcBuffer)[srcSize - 1]; ++ bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ ++ if (lastByte == 0) ++ return ERROR(GENERIC); /* endMark not present */ ++ } ++ } else { ++ bitD->start = (const char *)srcBuffer; ++ bitD->ptr = bitD->start; ++ bitD->bitContainer = *(const BYTE *)(bitD->start); ++ switch (srcSize) { ++ case 7: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[6]) << (sizeof(bitD->bitContainer) * 8 - 16); ++ /* fallthrough */ ++ case 6: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[5]) << (sizeof(bitD->bitContainer) * 8 - 24); ++ /* fallthrough */ ++ case 5: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[4]) << (sizeof(bitD->bitContainer) * 8 - 32); ++ /* fallthrough */ ++ case 4: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[3]) << 24; ++ /* fallthrough */ ++ case 3: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[2]) << 16; ++ /* fallthrough */ ++ case 2: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[1]) << 8; ++ /* fallthrough */ ++ default:; ++ } ++ { ++ BYTE const lastByte = ((const BYTE *)srcBuffer)[srcSize - 1]; ++ bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; ++ if (lastByte == 0) ++ return ERROR(GENERIC); /* endMark not present */ ++ } ++ bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize) * 8; ++ } ++ ++ return srcSize; ++} ++ ++ZSTD_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) { return bitContainer >> start; } ++ ++ZSTD_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) { return (bitContainer >> start) & BIT_mask[nbBits]; } ++ ++ZSTD_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) { return bitContainer & BIT_mask[nbBits]; } ++ ++/*! BIT_lookBits() : ++ * Provides next n bits from local register. ++ * local register is not modified. ++ * On 32-bits, maxNbBits==24. ++ * On 64-bits, maxNbBits==56. ++ * @return : value extracted ++ */ ++ZSTD_STATIC size_t BIT_lookBits(const BIT_DStream_t *bitD, U32 nbBits) ++{ ++ U32 const bitMask = sizeof(bitD->bitContainer) * 8 - 1; ++ return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask - nbBits) & bitMask); ++} ++ ++/*! BIT_lookBitsFast() : ++* unsafe version; only works only if nbBits >= 1 */ ++ZSTD_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t *bitD, U32 nbBits) ++{ ++ U32 const bitMask = sizeof(bitD->bitContainer) * 8 - 1; ++ return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask + 1) - nbBits) & bitMask); ++} ++ ++ZSTD_STATIC void BIT_skipBits(BIT_DStream_t *bitD, U32 nbBits) { bitD->bitsConsumed += nbBits; } ++ ++/*! BIT_readBits() : ++ * Read (consume) next n bits from local register and update. ++ * Pay attention to not read more than nbBits contained into local register. ++ * @return : extracted value. ++ */ ++ZSTD_STATIC size_t BIT_readBits(BIT_DStream_t *bitD, U32 nbBits) ++{ ++ size_t const value = BIT_lookBits(bitD, nbBits); ++ BIT_skipBits(bitD, nbBits); ++ return value; ++} ++ ++/*! BIT_readBitsFast() : ++* unsafe version; only works only if nbBits >= 1 */ ++ZSTD_STATIC size_t BIT_readBitsFast(BIT_DStream_t *bitD, U32 nbBits) ++{ ++ size_t const value = BIT_lookBitsFast(bitD, nbBits); ++ BIT_skipBits(bitD, nbBits); ++ return value; ++} ++ ++/*! BIT_reloadDStream() : ++* Refill `bitD` from buffer previously set in BIT_initDStream() . ++* This function is safe, it guarantees it will not read beyond src buffer. ++* @return : status of `BIT_DStream_t` internal register. ++ if status == BIT_DStream_unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */ ++ZSTD_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t *bitD) ++{ ++ if (bitD->bitsConsumed > (sizeof(bitD->bitContainer) * 8)) /* should not happen => corruption detected */ ++ return BIT_DStream_overflow; ++ ++ if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) { ++ bitD->ptr -= bitD->bitsConsumed >> 3; ++ bitD->bitsConsumed &= 7; ++ bitD->bitContainer = ZSTD_readLEST(bitD->ptr); ++ return BIT_DStream_unfinished; ++ } ++ if (bitD->ptr == bitD->start) { ++ if (bitD->bitsConsumed < sizeof(bitD->bitContainer) * 8) ++ return BIT_DStream_endOfBuffer; ++ return BIT_DStream_completed; ++ } ++ { ++ U32 nbBytes = bitD->bitsConsumed >> 3; ++ BIT_DStream_status result = BIT_DStream_unfinished; ++ if (bitD->ptr - nbBytes < bitD->start) { ++ nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ ++ result = BIT_DStream_endOfBuffer; ++ } ++ bitD->ptr -= nbBytes; ++ bitD->bitsConsumed -= nbBytes * 8; ++ bitD->bitContainer = ZSTD_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ ++ return result; ++ } ++} ++ ++/*! BIT_endOfDStream() : ++* @return Tells if DStream has exactly reached its end (all bits consumed). ++*/ ++ZSTD_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t *DStream) ++{ ++ return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer) * 8)); ++} ++ ++#endif /* BITSTREAM_H_MODULE */ +diff --git a/xen/common/zstd/decompress.c b/xen/common/zstd/decompress.c +new file mode 100644 +index 0000000000..3d3ef136e5 +--- /dev/null ++++ b/xen/common/zstd/decompress.c +@@ -0,0 +1,2496 @@ ++/** ++ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. ++ * All rights reserved. ++ * ++ * This source code is licensed under the BSD-style license found in the ++ * LICENSE file in the root directory of https://github.com/facebook/zstd. ++ * An additional grant of patent rights can be found in the PATENTS file in the ++ * same directory. ++ * ++ * This program is free software; you can redistribute it and/or modify it under ++ * the terms of the GNU General Public License version 2 as published by the ++ * Free Software Foundation. This program is dual-licensed; you may select ++ * either version 2 of the GNU General Public License ("GPL") or BSD license ++ * ("BSD"). ++ */ ++ ++/* *************************************************************** ++* Tuning parameters ++*****************************************************************/ ++/*! ++* MAXWINDOWSIZE_DEFAULT : ++* maximum window size accepted by DStream, by default. ++* Frames requiring more memory will be rejected. ++*/ ++#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT ++#define ZSTD_MAXWINDOWSIZE_DEFAULT ((1 << ZSTD_WINDOWLOG_MAX) + 1) /* defined within zstd.h */ ++#endif ++ ++/*-******************************************************* ++* Dependencies ++*********************************************************/ ++#include "fse.h" ++#include "huf.h" ++#include "mem.h" /* low level memory routines */ ++#include "zstd_internal.h" ++#include /* memcpy, memmove, memset */ ++ ++#define ZSTD_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0) ++ ++/*-************************************* ++* Macros ++***************************************/ ++#define ZSTD_isError ERR_isError /* for inlining */ ++#define FSE_isError ERR_isError ++#define HUF_isError ERR_isError ++ ++/*_******************************************************* ++* Memory operations ++**********************************************************/ ++static void INIT ZSTD_copy4(void *dst, const void *src) { memcpy(dst, src, 4); } ++ ++/*-************************************************************* ++* Context management ++***************************************************************/ ++typedef enum { ++ ZSTDds_getFrameHeaderSize, ++ ZSTDds_decodeFrameHeader, ++ ZSTDds_decodeBlockHeader, ++ ZSTDds_decompressBlock, ++ ZSTDds_decompressLastBlock, ++ ZSTDds_checkChecksum, ++ ZSTDds_decodeSkippableHeader, ++ ZSTDds_skipFrame ++} ZSTD_dStage; ++ ++typedef struct { ++ FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)]; ++ FSE_DTable OFTable[FSE_DTABLE_SIZE_U32(OffFSELog)]; ++ FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)]; ++ HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ ++ U64 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32 / 2]; ++ U32 rep[ZSTD_REP_NUM]; ++} ZSTD_entropyTables_t; ++ ++struct ZSTD_DCtx_s { ++ const FSE_DTable *LLTptr; ++ const FSE_DTable *MLTptr; ++ const FSE_DTable *OFTptr; ++ const HUF_DTable *HUFptr; ++ ZSTD_entropyTables_t entropy; ++ const void *previousDstEnd; /* detect continuity */ ++ const void *base; /* start of curr segment */ ++ const void *vBase; /* virtual start of previous segment if it was just before curr one */ ++ const void *dictEnd; /* end of previous segment */ ++ size_t expected; ++ ZSTD_frameParams fParams; ++ blockType_e bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */ ++ ZSTD_dStage stage; ++ U32 litEntropy; ++ U32 fseEntropy; ++ struct xxh64_state xxhState; ++ size_t headerSize; ++ U32 dictID; ++ const BYTE *litPtr; ++ ZSTD_customMem customMem; ++ size_t litSize; ++ size_t rleSize; ++ BYTE litBuffer[ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH]; ++ BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; ++}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ ++ ++size_t INIT ZSTD_DCtxWorkspaceBound(void) { return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_DCtx)); } ++ ++size_t INIT ZSTD_decompressBegin(ZSTD_DCtx *dctx) ++{ ++ dctx->expected = ZSTD_frameHeaderSize_prefix; ++ dctx->stage = ZSTDds_getFrameHeaderSize; ++ dctx->previousDstEnd = NULL; ++ dctx->base = NULL; ++ dctx->vBase = NULL; ++ dctx->dictEnd = NULL; ++ dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ ++ dctx->litEntropy = dctx->fseEntropy = 0; ++ dctx->dictID = 0; ++ ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); ++ memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ ++ dctx->LLTptr = dctx->entropy.LLTable; ++ dctx->MLTptr = dctx->entropy.MLTable; ++ dctx->OFTptr = dctx->entropy.OFTable; ++ dctx->HUFptr = dctx->entropy.hufTable; ++ return 0; ++} ++ ++ZSTD_DCtx *INIT ZSTD_createDCtx_advanced(ZSTD_customMem customMem) ++{ ++ ZSTD_DCtx *dctx; ++ ++ if (!customMem.customAlloc || !customMem.customFree) ++ return NULL; ++ ++ dctx = (ZSTD_DCtx *)ZSTD_malloc(sizeof(ZSTD_DCtx), customMem); ++ if (!dctx) ++ return NULL; ++ memcpy(&dctx->customMem, &customMem, sizeof(customMem)); ++ ZSTD_decompressBegin(dctx); ++ return dctx; ++} ++ ++ZSTD_DCtx *INIT ZSTD_initDCtx(void *workspace, size_t workspaceSize) ++{ ++ ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize); ++ return ZSTD_createDCtx_advanced(stackMem); ++} ++ ++size_t INIT ZSTD_freeDCtx(ZSTD_DCtx *dctx) ++{ ++ if (dctx == NULL) ++ return 0; /* support free on NULL */ ++ ZSTD_free(dctx, dctx->customMem); ++ return 0; /* reserved as a potential error code in the future */ ++} ++ ++void INIT ZSTD_copyDCtx(ZSTD_DCtx *dstDCtx, const ZSTD_DCtx *srcDCtx) ++{ ++ size_t const workSpaceSize = (ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH) + ZSTD_frameHeaderSize_max; ++ memcpy(dstDCtx, srcDCtx, sizeof(ZSTD_DCtx) - workSpaceSize); /* no need to copy workspace */ ++} ++ ++STATIC size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize); ++STATIC size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict, ++ size_t dictSize); ++ ++static void ZSTD_refDDict(ZSTD_DCtx *dstDCtx, const ZSTD_DDict *ddict); ++ ++/*-************************************************************* ++* Decompression section ++***************************************************************/ ++ ++/*! ZSTD_isFrame() : ++ * Tells if the content of `buffer` starts with a valid Frame Identifier. ++ * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. ++ * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. ++ * Note 3 : Skippable Frame Identifiers are considered valid. */ ++unsigned INIT ZSTD_isFrame(const void *buffer, size_t size) ++{ ++ if (size < 4) ++ return 0; ++ { ++ U32 const magic = ZSTD_readLE32(buffer); ++ if (magic == ZSTD_MAGICNUMBER) ++ return 1; ++ if ((magic & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) ++ return 1; ++ } ++ return 0; ++} ++ ++/** ZSTD_frameHeaderSize() : ++* srcSize must be >= ZSTD_frameHeaderSize_prefix. ++* @return : size of the Frame Header */ ++static size_t INIT ZSTD_frameHeaderSize(const void *src, size_t srcSize) ++{ ++ if (srcSize < ZSTD_frameHeaderSize_prefix) ++ return ERROR(srcSize_wrong); ++ { ++ BYTE const fhd = ((const BYTE *)src)[4]; ++ U32 const dictID = fhd & 3; ++ U32 const singleSegment = (fhd >> 5) & 1; ++ U32 const fcsId = fhd >> 6; ++ return ZSTD_frameHeaderSize_prefix + !singleSegment + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] + (singleSegment && !fcsId); ++ } ++} ++ ++/** ZSTD_getFrameParams() : ++* decode Frame Header, or require larger `srcSize`. ++* @return : 0, `fparamsPtr` is correctly filled, ++* >0, `srcSize` is too small, result is expected `srcSize`, ++* or an error code, which can be tested using ZSTD_isError() */ ++size_t INIT ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src, size_t srcSize) ++{ ++ const BYTE *ip = (const BYTE *)src; ++ ++ if (srcSize < ZSTD_frameHeaderSize_prefix) ++ return ZSTD_frameHeaderSize_prefix; ++ if (ZSTD_readLE32(src) != ZSTD_MAGICNUMBER) { ++ if ((ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { ++ if (srcSize < ZSTD_skippableHeaderSize) ++ return ZSTD_skippableHeaderSize; /* magic number + skippable frame length */ ++ memset(fparamsPtr, 0, sizeof(*fparamsPtr)); ++ fparamsPtr->frameContentSize = ZSTD_readLE32((const char *)src + 4); ++ fparamsPtr->windowSize = 0; /* windowSize==0 means a frame is skippable */ ++ return 0; ++ } ++ return ERROR(prefix_unknown); ++ } ++ ++ /* ensure there is enough `srcSize` to fully read/decode frame header */ ++ { ++ size_t const fhsize = ZSTD_frameHeaderSize(src, srcSize); ++ if (srcSize < fhsize) ++ return fhsize; ++ } ++ ++ { ++ BYTE const fhdByte = ip[4]; ++ size_t pos = 5; ++ U32 const dictIDSizeCode = fhdByte & 3; ++ U32 const checksumFlag = (fhdByte >> 2) & 1; ++ U32 const singleSegment = (fhdByte >> 5) & 1; ++ U32 const fcsID = fhdByte >> 6; ++ U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; ++ U32 windowSize = 0; ++ U32 dictID = 0; ++ U64 frameContentSize = 0; ++ if ((fhdByte & 0x08) != 0) ++ return ERROR(frameParameter_unsupported); /* reserved bits, which must be zero */ ++ if (!singleSegment) { ++ BYTE const wlByte = ip[pos++]; ++ U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; ++ if (windowLog > ZSTD_WINDOWLOG_MAX) ++ return ERROR(frameParameter_windowTooLarge); /* avoids issue with 1 << windowLog */ ++ windowSize = (1U << windowLog); ++ windowSize += (windowSize >> 3) * (wlByte & 7); ++ } ++ ++ switch (dictIDSizeCode) { ++ default: /* impossible */ ++ case 0: break; ++ case 1: ++ dictID = ip[pos]; ++ pos++; ++ break; ++ case 2: ++ dictID = ZSTD_readLE16(ip + pos); ++ pos += 2; ++ break; ++ case 3: ++ dictID = ZSTD_readLE32(ip + pos); ++ pos += 4; ++ break; ++ } ++ switch (fcsID) { ++ default: /* impossible */ ++ case 0: ++ if (singleSegment) ++ frameContentSize = ip[pos]; ++ break; ++ case 1: frameContentSize = ZSTD_readLE16(ip + pos) + 256; break; ++ case 2: frameContentSize = ZSTD_readLE32(ip + pos); break; ++ case 3: frameContentSize = ZSTD_readLE64(ip + pos); break; ++ } ++ if (!windowSize) ++ windowSize = (U32)frameContentSize; ++ if (windowSize > windowSizeMax) ++ return ERROR(frameParameter_windowTooLarge); ++ fparamsPtr->frameContentSize = frameContentSize; ++ fparamsPtr->windowSize = windowSize; ++ fparamsPtr->dictID = dictID; ++ fparamsPtr->checksumFlag = checksumFlag; ++ } ++ return 0; ++} ++ ++/** ZSTD_getFrameContentSize() : ++* compatible with legacy mode ++* @return : decompressed size of the single frame pointed to be `src` if known, otherwise ++* - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined ++* - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ ++unsigned long long INIT ZSTD_getFrameContentSize(const void *src, size_t srcSize) ++{ ++ { ++ ZSTD_frameParams fParams; ++ if (ZSTD_getFrameParams(&fParams, src, srcSize) != 0) ++ return ZSTD_CONTENTSIZE_ERROR; ++ if (fParams.windowSize == 0) { ++ /* Either skippable or empty frame, size == 0 either way */ ++ return 0; ++ } else if (fParams.frameContentSize != 0) { ++ return fParams.frameContentSize; ++ } else { ++ return ZSTD_CONTENTSIZE_UNKNOWN; ++ } ++ } ++} ++ ++/** ZSTD_findDecompressedSize() : ++ * compatible with legacy mode ++ * `srcSize` must be the exact length of some number of ZSTD compressed and/or ++ * skippable frames ++ * @return : decompressed size of the frames contained */ ++unsigned long long INIT ZSTD_findDecompressedSize(const void *src, size_t srcSize) ++{ ++ { ++ unsigned long long totalDstSize = 0; ++ while (srcSize >= ZSTD_frameHeaderSize_prefix) { ++ const U32 magicNumber = ZSTD_readLE32(src); ++ ++ if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { ++ size_t skippableSize; ++ if (srcSize < ZSTD_skippableHeaderSize) ++ return ERROR(srcSize_wrong); ++ skippableSize = ZSTD_readLE32((const BYTE *)src + 4) + ZSTD_skippableHeaderSize; ++ if (srcSize < skippableSize) { ++ return ZSTD_CONTENTSIZE_ERROR; ++ } ++ ++ src = (const BYTE *)src + skippableSize; ++ srcSize -= skippableSize; ++ continue; ++ } ++ ++ { ++ unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); ++ if (ret >= ZSTD_CONTENTSIZE_ERROR) ++ return ret; ++ ++ /* check for overflow */ ++ if (totalDstSize + ret < totalDstSize) ++ return ZSTD_CONTENTSIZE_ERROR; ++ totalDstSize += ret; ++ } ++ { ++ size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); ++ if (ZSTD_isError(frameSrcSize)) { ++ return ZSTD_CONTENTSIZE_ERROR; ++ } ++ ++ src = (const BYTE *)src + frameSrcSize; ++ srcSize -= frameSrcSize; ++ } ++ } ++ ++ if (srcSize) { ++ return ZSTD_CONTENTSIZE_ERROR; ++ } ++ ++ return totalDstSize; ++ } ++} ++ ++/** ZSTD_decodeFrameHeader() : ++* `headerSize` must be the size provided by ZSTD_frameHeaderSize(). ++* @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ ++static size_t INIT ZSTD_decodeFrameHeader(ZSTD_DCtx *dctx, const void *src, size_t headerSize) ++{ ++ size_t const result = ZSTD_getFrameParams(&(dctx->fParams), src, headerSize); ++ if (ZSTD_isError(result)) ++ return result; /* invalid header */ ++ if (result > 0) ++ return ERROR(srcSize_wrong); /* headerSize too small */ ++ if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) ++ return ERROR(dictionary_wrong); ++ if (dctx->fParams.checksumFlag) ++ xxh64_reset(&dctx->xxhState, 0); ++ return 0; ++} ++ ++typedef struct { ++ blockType_e blockType; ++ U32 lastBlock; ++ U32 origSize; ++} blockProperties_t; ++ ++/*! ZSTD_getcBlockSize() : ++* Provides the size of compressed block from block header `src` */ ++size_t INIT ZSTD_getcBlockSize(const void *src, size_t srcSize, blockProperties_t *bpPtr) ++{ ++ if (srcSize < ZSTD_blockHeaderSize) ++ return ERROR(srcSize_wrong); ++ { ++ U32 const cBlockHeader = ZSTD_readLE24(src); ++ U32 const cSize = cBlockHeader >> 3; ++ bpPtr->lastBlock = cBlockHeader & 1; ++ bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); ++ bpPtr->origSize = cSize; /* only useful for RLE */ ++ if (bpPtr->blockType == bt_rle) ++ return 1; ++ if (bpPtr->blockType == bt_reserved) ++ return ERROR(corruption_detected); ++ return cSize; ++ } ++} ++ ++static size_t INIT ZSTD_copyRawBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize) ++{ ++ if (srcSize > dstCapacity) ++ return ERROR(dstSize_tooSmall); ++ memcpy(dst, src, srcSize); ++ return srcSize; ++} ++ ++static size_t INIT ZSTD_setRleBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize, size_t regenSize) ++{ ++ if (srcSize != 1) ++ return ERROR(srcSize_wrong); ++ if (regenSize > dstCapacity) ++ return ERROR(dstSize_tooSmall); ++ memset(dst, *(const BYTE *)src, regenSize); ++ return regenSize; ++} ++ ++/*! ZSTD_decodeLiteralsBlock() : ++ @return : nb of bytes read from src (< srcSize ) */ ++size_t INIT ZSTD_decodeLiteralsBlock(ZSTD_DCtx *dctx, const void *src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ ++{ ++ if (srcSize < MIN_CBLOCK_SIZE) ++ return ERROR(corruption_detected); ++ ++ { ++ const BYTE *const istart = (const BYTE *)src; ++ symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); ++ ++ switch (litEncType) { ++ case set_repeat: ++ if (dctx->litEntropy == 0) ++ return ERROR(dictionary_corrupted); ++ /* fallthrough */ ++ case set_compressed: ++ if (srcSize < 5) ++ return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */ ++ { ++ size_t lhSize, litSize, litCSize; ++ U32 singleStream = 0; ++ U32 const lhlCode = (istart[0] >> 2) & 3; ++ U32 const lhc = ZSTD_readLE32(istart); ++ switch (lhlCode) { ++ case 0: ++ case 1: ++ default: /* note : default is impossible, since lhlCode into [0..3] */ ++ /* 2 - 2 - 10 - 10 */ ++ singleStream = !lhlCode; ++ lhSize = 3; ++ litSize = (lhc >> 4) & 0x3FF; ++ litCSize = (lhc >> 14) & 0x3FF; ++ break; ++ case 2: ++ /* 2 - 2 - 14 - 14 */ ++ lhSize = 4; ++ litSize = (lhc >> 4) & 0x3FFF; ++ litCSize = lhc >> 18; ++ break; ++ case 3: ++ /* 2 - 2 - 18 - 18 */ ++ lhSize = 5; ++ litSize = (lhc >> 4) & 0x3FFFF; ++ litCSize = (lhc >> 22) + (istart[4] << 10); ++ break; ++ } ++ if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) ++ return ERROR(corruption_detected); ++ if (litCSize + lhSize > srcSize) ++ return ERROR(corruption_detected); ++ ++ if (HUF_isError( ++ (litEncType == set_repeat) ++ ? (singleStream ? HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart + lhSize, litCSize, dctx->HUFptr) ++ : HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart + lhSize, litCSize, dctx->HUFptr)) ++ : (singleStream ++ ? HUF_decompress1X2_DCtx_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart + lhSize, litCSize, ++ dctx->entropy.workspace, sizeof(dctx->entropy.workspace)) ++ : HUF_decompress4X_hufOnly_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart + lhSize, litCSize, ++ dctx->entropy.workspace, sizeof(dctx->entropy.workspace))))) ++ return ERROR(corruption_detected); ++ ++ dctx->litPtr = dctx->litBuffer; ++ dctx->litSize = litSize; ++ dctx->litEntropy = 1; ++ if (litEncType == set_compressed) ++ dctx->HUFptr = dctx->entropy.hufTable; ++ memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); ++ return litCSize + lhSize; ++ } ++ ++ case set_basic: { ++ size_t litSize, lhSize; ++ U32 const lhlCode = ((istart[0]) >> 2) & 3; ++ switch (lhlCode) { ++ case 0: ++ case 2: ++ default: /* note : default is impossible, since lhlCode into [0..3] */ ++ lhSize = 1; ++ litSize = istart[0] >> 3; ++ break; ++ case 1: ++ lhSize = 2; ++ litSize = ZSTD_readLE16(istart) >> 4; ++ break; ++ case 3: ++ lhSize = 3; ++ litSize = ZSTD_readLE24(istart) >> 4; ++ break; ++ } ++ ++ if (lhSize + litSize + WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ ++ if (litSize + lhSize > srcSize) ++ return ERROR(corruption_detected); ++ memcpy(dctx->litBuffer, istart + lhSize, litSize); ++ dctx->litPtr = dctx->litBuffer; ++ dctx->litSize = litSize; ++ memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); ++ return lhSize + litSize; ++ } ++ /* direct reference into compressed stream */ ++ dctx->litPtr = istart + lhSize; ++ dctx->litSize = litSize; ++ return lhSize + litSize; ++ } ++ ++ case set_rle: { ++ U32 const lhlCode = ((istart[0]) >> 2) & 3; ++ size_t litSize, lhSize; ++ switch (lhlCode) { ++ case 0: ++ case 2: ++ default: /* note : default is impossible, since lhlCode into [0..3] */ ++ lhSize = 1; ++ litSize = istart[0] >> 3; ++ break; ++ case 1: ++ lhSize = 2; ++ litSize = ZSTD_readLE16(istart) >> 4; ++ break; ++ case 3: ++ lhSize = 3; ++ litSize = ZSTD_readLE24(istart) >> 4; ++ if (srcSize < 4) ++ return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */ ++ break; ++ } ++ if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) ++ return ERROR(corruption_detected); ++ memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); ++ dctx->litPtr = dctx->litBuffer; ++ dctx->litSize = litSize; ++ return lhSize + 1; ++ } ++ default: ++ return ERROR(corruption_detected); /* impossible */ ++ } ++ } ++} ++ ++typedef union { ++ FSE_decode_t realData; ++ U32 alignedBy4; ++} FSE_decode_t4; ++ ++static const FSE_decode_t4 LL_defaultDTable[(1 << LL_DEFAULTNORMLOG) + 1] = { ++ {{LL_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */ ++ {{0, 0, 4}}, /* 0 : base, symbol, bits */ ++ {{16, 0, 4}}, ++ {{32, 1, 5}}, ++ {{0, 3, 5}}, ++ {{0, 4, 5}}, ++ {{0, 6, 5}}, ++ {{0, 7, 5}}, ++ {{0, 9, 5}}, ++ {{0, 10, 5}}, ++ {{0, 12, 5}}, ++ {{0, 14, 6}}, ++ {{0, 16, 5}}, ++ {{0, 18, 5}}, ++ {{0, 19, 5}}, ++ {{0, 21, 5}}, ++ {{0, 22, 5}}, ++ {{0, 24, 5}}, ++ {{32, 25, 5}}, ++ {{0, 26, 5}}, ++ {{0, 27, 6}}, ++ {{0, 29, 6}}, ++ {{0, 31, 6}}, ++ {{32, 0, 4}}, ++ {{0, 1, 4}}, ++ {{0, 2, 5}}, ++ {{32, 4, 5}}, ++ {{0, 5, 5}}, ++ {{32, 7, 5}}, ++ {{0, 8, 5}}, ++ {{32, 10, 5}}, ++ {{0, 11, 5}}, ++ {{0, 13, 6}}, ++ {{32, 16, 5}}, ++ {{0, 17, 5}}, ++ {{32, 19, 5}}, ++ {{0, 20, 5}}, ++ {{32, 22, 5}}, ++ {{0, 23, 5}}, ++ {{0, 25, 4}}, ++ {{16, 25, 4}}, ++ {{32, 26, 5}}, ++ {{0, 28, 6}}, ++ {{0, 30, 6}}, ++ {{48, 0, 4}}, ++ {{16, 1, 4}}, ++ {{32, 2, 5}}, ++ {{32, 3, 5}}, ++ {{32, 5, 5}}, ++ {{32, 6, 5}}, ++ {{32, 8, 5}}, ++ {{32, 9, 5}}, ++ {{32, 11, 5}}, ++ {{32, 12, 5}}, ++ {{0, 15, 6}}, ++ {{32, 17, 5}}, ++ {{32, 18, 5}}, ++ {{32, 20, 5}}, ++ {{32, 21, 5}}, ++ {{32, 23, 5}}, ++ {{32, 24, 5}}, ++ {{0, 35, 6}}, ++ {{0, 34, 6}}, ++ {{0, 33, 6}}, ++ {{0, 32, 6}}, ++}; /* LL_defaultDTable */ ++ ++static const FSE_decode_t4 ML_defaultDTable[(1 << ML_DEFAULTNORMLOG) + 1] = { ++ {{ML_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */ ++ {{0, 0, 6}}, /* 0 : base, symbol, bits */ ++ {{0, 1, 4}}, ++ {{32, 2, 5}}, ++ {{0, 3, 5}}, ++ {{0, 5, 5}}, ++ {{0, 6, 5}}, ++ {{0, 8, 5}}, ++ {{0, 10, 6}}, ++ {{0, 13, 6}}, ++ {{0, 16, 6}}, ++ {{0, 19, 6}}, ++ {{0, 22, 6}}, ++ {{0, 25, 6}}, ++ {{0, 28, 6}}, ++ {{0, 31, 6}}, ++ {{0, 33, 6}}, ++ {{0, 35, 6}}, ++ {{0, 37, 6}}, ++ {{0, 39, 6}}, ++ {{0, 41, 6}}, ++ {{0, 43, 6}}, ++ {{0, 45, 6}}, ++ {{16, 1, 4}}, ++ {{0, 2, 4}}, ++ {{32, 3, 5}}, ++ {{0, 4, 5}}, ++ {{32, 6, 5}}, ++ {{0, 7, 5}}, ++ {{0, 9, 6}}, ++ {{0, 12, 6}}, ++ {{0, 15, 6}}, ++ {{0, 18, 6}}, ++ {{0, 21, 6}}, ++ {{0, 24, 6}}, ++ {{0, 27, 6}}, ++ {{0, 30, 6}}, ++ {{0, 32, 6}}, ++ {{0, 34, 6}}, ++ {{0, 36, 6}}, ++ {{0, 38, 6}}, ++ {{0, 40, 6}}, ++ {{0, 42, 6}}, ++ {{0, 44, 6}}, ++ {{32, 1, 4}}, ++ {{48, 1, 4}}, ++ {{16, 2, 4}}, ++ {{32, 4, 5}}, ++ {{32, 5, 5}}, ++ {{32, 7, 5}}, ++ {{32, 8, 5}}, ++ {{0, 11, 6}}, ++ {{0, 14, 6}}, ++ {{0, 17, 6}}, ++ {{0, 20, 6}}, ++ {{0, 23, 6}}, ++ {{0, 26, 6}}, ++ {{0, 29, 6}}, ++ {{0, 52, 6}}, ++ {{0, 51, 6}}, ++ {{0, 50, 6}}, ++ {{0, 49, 6}}, ++ {{0, 48, 6}}, ++ {{0, 47, 6}}, ++ {{0, 46, 6}}, ++}; /* ML_defaultDTable */ ++ ++static const FSE_decode_t4 OF_defaultDTable[(1 << OF_DEFAULTNORMLOG) + 1] = { ++ {{OF_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */ ++ {{0, 0, 5}}, /* 0 : base, symbol, bits */ ++ {{0, 6, 4}}, ++ {{0, 9, 5}}, ++ {{0, 15, 5}}, ++ {{0, 21, 5}}, ++ {{0, 3, 5}}, ++ {{0, 7, 4}}, ++ {{0, 12, 5}}, ++ {{0, 18, 5}}, ++ {{0, 23, 5}}, ++ {{0, 5, 5}}, ++ {{0, 8, 4}}, ++ {{0, 14, 5}}, ++ {{0, 20, 5}}, ++ {{0, 2, 5}}, ++ {{16, 7, 4}}, ++ {{0, 11, 5}}, ++ {{0, 17, 5}}, ++ {{0, 22, 5}}, ++ {{0, 4, 5}}, ++ {{16, 8, 4}}, ++ {{0, 13, 5}}, ++ {{0, 19, 5}}, ++ {{0, 1, 5}}, ++ {{16, 6, 4}}, ++ {{0, 10, 5}}, ++ {{0, 16, 5}}, ++ {{0, 28, 5}}, ++ {{0, 27, 5}}, ++ {{0, 26, 5}}, ++ {{0, 25, 5}}, ++ {{0, 24, 5}}, ++}; /* OF_defaultDTable */ ++ ++/*! ZSTD_buildSeqTable() : ++ @return : nb bytes read from src, ++ or an error code if it fails, testable with ZSTD_isError() ++*/ ++static size_t INIT ZSTD_buildSeqTable(FSE_DTable *DTableSpace, const FSE_DTable **DTablePtr, ++ symbolEncodingType_e type, U32 max, U32 maxLog, const void *src, ++ size_t srcSize, const FSE_decode_t4 *defaultTable, ++ U32 flagRepeatTable, void *workspace, size_t workspaceSize) ++{ ++ const void *const tmpPtr = defaultTable; /* bypass strict aliasing */ ++ switch (type) { ++ case set_rle: ++ if (!srcSize) ++ return ERROR(srcSize_wrong); ++ if ((*(const BYTE *)src) > max) ++ return ERROR(corruption_detected); ++ FSE_buildDTable_rle(DTableSpace, *(const BYTE *)src); ++ *DTablePtr = DTableSpace; ++ return 1; ++ case set_basic: *DTablePtr = (const FSE_DTable *)tmpPtr; return 0; ++ case set_repeat: ++ if (!flagRepeatTable) ++ return ERROR(corruption_detected); ++ return 0; ++ default: /* impossible */ ++ case set_compressed: { ++ U32 tableLog; ++ S16 *norm = (S16 *)workspace; ++ size_t const spaceUsed32 = ALIGN(sizeof(S16) * (MaxSeq + 1), sizeof(U32)) >> 2; ++ ++ if ((spaceUsed32 << 2) > workspaceSize) ++ return ERROR(GENERIC); ++ workspace = (U32 *)workspace + spaceUsed32; ++ workspaceSize -= (spaceUsed32 << 2); ++ { ++ size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); ++ if (FSE_isError(headerSize)) ++ return ERROR(corruption_detected); ++ if (tableLog > maxLog) ++ return ERROR(corruption_detected); ++ FSE_buildDTable_wksp(DTableSpace, norm, max, tableLog, workspace, workspaceSize); ++ *DTablePtr = DTableSpace; ++ return headerSize; ++ } ++ } ++ } ++} ++ ++size_t INIT ZSTD_decodeSeqHeaders(ZSTD_DCtx *dctx, int *nbSeqPtr, const void *src, size_t srcSize) ++{ ++ const BYTE *const istart = (const BYTE *const)src; ++ const BYTE *const iend = istart + srcSize; ++ const BYTE *ip = istart; ++ ++ /* check */ ++ if (srcSize < MIN_SEQUENCES_SIZE) ++ return ERROR(srcSize_wrong); ++ ++ /* SeqHead */ ++ { ++ int nbSeq = *ip++; ++ if (!nbSeq) { ++ *nbSeqPtr = 0; ++ return 1; ++ } ++ if (nbSeq > 0x7F) { ++ if (nbSeq == 0xFF) { ++ if (ip + 2 > iend) ++ return ERROR(srcSize_wrong); ++ nbSeq = ZSTD_readLE16(ip) + LONGNBSEQ, ip += 2; ++ } else { ++ if (ip >= iend) ++ return ERROR(srcSize_wrong); ++ nbSeq = ((nbSeq - 0x80) << 8) + *ip++; ++ } ++ } ++ *nbSeqPtr = nbSeq; ++ } ++ ++ /* FSE table descriptors */ ++ if (ip + 4 > iend) ++ return ERROR(srcSize_wrong); /* minimum possible size */ ++ { ++ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); ++ symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); ++ symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); ++ ip++; ++ ++ /* Build DTables */ ++ { ++ size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, LLtype, MaxLL, LLFSELog, ip, iend - ip, ++ LL_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace)); ++ if (ZSTD_isError(llhSize)) ++ return ERROR(corruption_detected); ++ ip += llhSize; ++ } ++ { ++ size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, OFtype, MaxOff, OffFSELog, ip, iend - ip, ++ OF_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace)); ++ if (ZSTD_isError(ofhSize)) ++ return ERROR(corruption_detected); ++ ip += ofhSize; ++ } ++ { ++ size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, MLtype, MaxML, MLFSELog, ip, iend - ip, ++ ML_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace)); ++ if (ZSTD_isError(mlhSize)) ++ return ERROR(corruption_detected); ++ ip += mlhSize; ++ } ++ } ++ ++ return ip - istart; ++} ++ ++typedef struct { ++ size_t litLength; ++ size_t matchLength; ++ size_t offset; ++ const BYTE *match; ++} seq_t; ++ ++typedef struct { ++ BIT_DStream_t DStream; ++ FSE_DState_t stateLL; ++ FSE_DState_t stateOffb; ++ FSE_DState_t stateML; ++ size_t prevOffset[ZSTD_REP_NUM]; ++ const BYTE *base; ++ size_t pos; ++ uPtrDiff gotoDict; ++} seqState_t; ++ ++FORCE_NOINLINE ++size_t ZSTD_execSequenceLast7(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base, ++ const BYTE *const vBase, const BYTE *const dictEnd) ++{ ++ BYTE *const oLitEnd = op + sequence.litLength; ++ size_t const sequenceLength = sequence.litLength + sequence.matchLength; ++ BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ ++ BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH; ++ const BYTE *const iLitEnd = *litPtr + sequence.litLength; ++ const BYTE *match = oLitEnd - sequence.offset; ++ ++ /* check */ ++ if (oMatchEnd > oend) ++ return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ ++ if (iLitEnd > litLimit) ++ return ERROR(corruption_detected); /* over-read beyond lit buffer */ ++ if (oLitEnd <= oend_w) ++ return ERROR(GENERIC); /* Precondition */ ++ ++ /* copy literals */ ++ if (op < oend_w) { ++ ZSTD_wildcopy(op, *litPtr, oend_w - op); ++ *litPtr += oend_w - op; ++ op = oend_w; ++ } ++ while (op < oLitEnd) ++ *op++ = *(*litPtr)++; ++ ++ /* copy Match */ ++ if (sequence.offset > (size_t)(oLitEnd - base)) { ++ /* offset beyond prefix */ ++ if (sequence.offset > (size_t)(oLitEnd - vBase)) ++ return ERROR(corruption_detected); ++ match = dictEnd - (base - match); ++ if (match + sequence.matchLength <= dictEnd) { ++ memmove(oLitEnd, match, sequence.matchLength); ++ return sequenceLength; ++ } ++ /* span extDict & currPrefixSegment */ ++ { ++ size_t const length1 = dictEnd - match; ++ memmove(oLitEnd, match, length1); ++ op = oLitEnd + length1; ++ sequence.matchLength -= length1; ++ match = base; ++ } ++ } ++ while (op < oMatchEnd) ++ *op++ = *match++; ++ return sequenceLength; ++} ++ ++static seq_t INIT ZSTD_decodeSequence(seqState_t *seqState) ++{ ++ seq_t seq; ++ ++ U32 const llCode = FSE_peekSymbol(&seqState->stateLL); ++ U32 const mlCode = FSE_peekSymbol(&seqState->stateML); ++ U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */ ++ ++ U32 const llBits = LL_bits[llCode]; ++ U32 const mlBits = ML_bits[mlCode]; ++ U32 const ofBits = ofCode; ++ U32 const totalBits = llBits + mlBits + ofBits; ++ ++ static const U32 LL_base[MaxLL + 1] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, ++ 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000}; ++ ++ static const U32 ML_base[MaxML + 1] = {3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ++ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 41, ++ 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, 0x1003, 0x2003, 0x4003, 0x8003, 0x10003}; ++ ++ static const U32 OF_base[MaxOff + 1] = {0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, 0xFD, 0x1FD, ++ 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, ++ 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD}; ++ ++ /* sequence */ ++ { ++ size_t offset; ++ if (!ofCode) ++ offset = 0; ++ else { ++ offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ ++ if (ZSTD_32bits()) ++ BIT_reloadDStream(&seqState->DStream); ++ } ++ ++ if (ofCode <= 1) { ++ offset += (llCode == 0); ++ if (offset) { ++ size_t temp = (offset == 3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; ++ temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ ++ if (offset != 1) ++ seqState->prevOffset[2] = seqState->prevOffset[1]; ++ seqState->prevOffset[1] = seqState->prevOffset[0]; ++ seqState->prevOffset[0] = offset = temp; ++ } else { ++ offset = seqState->prevOffset[0]; ++ } ++ } else { ++ seqState->prevOffset[2] = seqState->prevOffset[1]; ++ seqState->prevOffset[1] = seqState->prevOffset[0]; ++ seqState->prevOffset[0] = offset; ++ } ++ seq.offset = offset; ++ } ++ ++ seq.matchLength = ML_base[mlCode] + ((mlCode > 31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ ++ if (ZSTD_32bits() && (mlBits + llBits > 24)) ++ BIT_reloadDStream(&seqState->DStream); ++ ++ seq.litLength = LL_base[llCode] + ((llCode > 15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ ++ if (ZSTD_32bits() || (totalBits > 64 - 7 - (LLFSELog + MLFSELog + OffFSELog))) ++ BIT_reloadDStream(&seqState->DStream); ++ ++ /* ANS state update */ ++ FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ ++ FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ ++ if (ZSTD_32bits()) ++ BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ ++ FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ ++ ++ seq.match = NULL; ++ ++ return seq; ++} ++ ++FORCE_INLINE ++size_t ZSTD_execSequence(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base, ++ const BYTE *const vBase, const BYTE *const dictEnd) ++{ ++ BYTE *const oLitEnd = op + sequence.litLength; ++ size_t const sequenceLength = sequence.litLength + sequence.matchLength; ++ BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ ++ BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH; ++ const BYTE *const iLitEnd = *litPtr + sequence.litLength; ++ const BYTE *match = oLitEnd - sequence.offset; ++ ++ /* check */ ++ if (oMatchEnd > oend) ++ return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ ++ if (iLitEnd > litLimit) ++ return ERROR(corruption_detected); /* over-read beyond lit buffer */ ++ if (oLitEnd > oend_w) ++ return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd); ++ ++ /* copy Literals */ ++ ZSTD_copy8(op, *litPtr); ++ if (sequence.litLength > 8) ++ ZSTD_wildcopy(op + 8, (*litPtr) + 8, ++ sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ ++ op = oLitEnd; ++ *litPtr = iLitEnd; /* update for next sequence */ ++ ++ /* copy Match */ ++ if (sequence.offset > (size_t)(oLitEnd - base)) { ++ /* offset beyond prefix */ ++ if (sequence.offset > (size_t)(oLitEnd - vBase)) ++ return ERROR(corruption_detected); ++ match = dictEnd + (match - base); ++ if (match + sequence.matchLength <= dictEnd) { ++ memmove(oLitEnd, match, sequence.matchLength); ++ return sequenceLength; ++ } ++ /* span extDict & currPrefixSegment */ ++ { ++ size_t const length1 = dictEnd - match; ++ memmove(oLitEnd, match, length1); ++ op = oLitEnd + length1; ++ sequence.matchLength -= length1; ++ match = base; ++ if (op > oend_w || sequence.matchLength < MINMATCH) { ++ U32 i; ++ for (i = 0; i < sequence.matchLength; ++i) ++ op[i] = match[i]; ++ return sequenceLength; ++ } ++ } ++ } ++ /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */ ++ ++ /* match within prefix */ ++ if (sequence.offset < 8) { ++ /* close range match, overlap */ ++ static const U32 dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */ ++ static const int dec64table[] = {8, 8, 8, 7, 8, 9, 10, 11}; /* subtracted */ ++ int const sub2 = dec64table[sequence.offset]; ++ op[0] = match[0]; ++ op[1] = match[1]; ++ op[2] = match[2]; ++ op[3] = match[3]; ++ match += dec32table[sequence.offset]; ++ ZSTD_copy4(op + 4, match); ++ match -= sub2; ++ } else { ++ ZSTD_copy8(op, match); ++ } ++ op += 8; ++ match += 8; ++ ++ if (oMatchEnd > oend - (16 - MINMATCH)) { ++ if (op < oend_w) { ++ ZSTD_wildcopy(op, match, oend_w - op); ++ match += oend_w - op; ++ op = oend_w; ++ } ++ while (op < oMatchEnd) ++ *op++ = *match++; ++ } else { ++ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8); /* works even if matchLength < 8 */ ++ } ++ return sequenceLength; ++} ++ ++static size_t INIT ZSTD_decompressSequences(ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, const void *seqStart, size_t seqSize) ++{ ++ const BYTE *ip = (const BYTE *)seqStart; ++ const BYTE *const iend = ip + seqSize; ++ BYTE *const ostart = (BYTE * const)dst; ++ BYTE *const oend = ostart + maxDstSize; ++ BYTE *op = ostart; ++ const BYTE *litPtr = dctx->litPtr; ++ const BYTE *const litEnd = litPtr + dctx->litSize; ++ const BYTE *const base = (const BYTE *)(dctx->base); ++ const BYTE *const vBase = (const BYTE *)(dctx->vBase); ++ const BYTE *const dictEnd = (const BYTE *)(dctx->dictEnd); ++ int nbSeq; ++ ++ /* Build Decoding Tables */ ++ { ++ size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize); ++ if (ZSTD_isError(seqHSize)) ++ return seqHSize; ++ ip += seqHSize; ++ } ++ ++ /* Regen sequences */ ++ if (nbSeq) { ++ seqState_t seqState; ++ dctx->fseEntropy = 1; ++ { ++ U32 i; ++ for (i = 0; i < ZSTD_REP_NUM; i++) ++ seqState.prevOffset[i] = dctx->entropy.rep[i]; ++ } ++ CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend - ip), corruption_detected); ++ FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); ++ FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); ++ FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); ++ ++ for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq;) { ++ nbSeq--; ++ { ++ seq_t const sequence = ZSTD_decodeSequence(&seqState); ++ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd); ++ if (ZSTD_isError(oneSeqSize)) ++ return oneSeqSize; ++ op += oneSeqSize; ++ } ++ } ++ ++ /* check if reached exact end */ ++ if (nbSeq) ++ return ERROR(corruption_detected); ++ /* save reps for next block */ ++ { ++ U32 i; ++ for (i = 0; i < ZSTD_REP_NUM; i++) ++ dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); ++ } ++ } ++ ++ /* last literal segment */ ++ { ++ size_t const lastLLSize = litEnd - litPtr; ++ if (lastLLSize > (size_t)(oend - op)) ++ return ERROR(dstSize_tooSmall); ++ memcpy(op, litPtr, lastLLSize); ++ op += lastLLSize; ++ } ++ ++ return op - ostart; ++} ++ ++FORCE_INLINE seq_t ZSTD_decodeSequenceLong_generic(seqState_t *seqState, int const longOffsets) ++{ ++ seq_t seq; ++ ++ U32 const llCode = FSE_peekSymbol(&seqState->stateLL); ++ U32 const mlCode = FSE_peekSymbol(&seqState->stateML); ++ U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */ ++ ++ U32 const llBits = LL_bits[llCode]; ++ U32 const mlBits = ML_bits[mlCode]; ++ U32 const ofBits = ofCode; ++ U32 const totalBits = llBits + mlBits + ofBits; ++ ++ static const U32 LL_base[MaxLL + 1] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, ++ 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000}; ++ ++ static const U32 ML_base[MaxML + 1] = {3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ++ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 41, ++ 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, 0x1003, 0x2003, 0x4003, 0x8003, 0x10003}; ++ ++ static const U32 OF_base[MaxOff + 1] = {0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, 0xFD, 0x1FD, ++ 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, ++ 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD}; ++ ++ /* sequence */ ++ { ++ size_t offset; ++ if (!ofCode) ++ offset = 0; ++ else { ++ if (longOffsets) { ++ int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN); ++ offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); ++ if (ZSTD_32bits() || extraBits) ++ BIT_reloadDStream(&seqState->DStream); ++ if (extraBits) ++ offset += BIT_readBitsFast(&seqState->DStream, extraBits); ++ } else { ++ offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ ++ if (ZSTD_32bits()) ++ BIT_reloadDStream(&seqState->DStream); ++ } ++ } ++ ++ if (ofCode <= 1) { ++ offset += (llCode == 0); ++ if (offset) { ++ size_t temp = (offset == 3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; ++ temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ ++ if (offset != 1) ++ seqState->prevOffset[2] = seqState->prevOffset[1]; ++ seqState->prevOffset[1] = seqState->prevOffset[0]; ++ seqState->prevOffset[0] = offset = temp; ++ } else { ++ offset = seqState->prevOffset[0]; ++ } ++ } else { ++ seqState->prevOffset[2] = seqState->prevOffset[1]; ++ seqState->prevOffset[1] = seqState->prevOffset[0]; ++ seqState->prevOffset[0] = offset; ++ } ++ seq.offset = offset; ++ } ++ ++ seq.matchLength = ML_base[mlCode] + ((mlCode > 31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ ++ if (ZSTD_32bits() && (mlBits + llBits > 24)) ++ BIT_reloadDStream(&seqState->DStream); ++ ++ seq.litLength = LL_base[llCode] + ((llCode > 15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ ++ if (ZSTD_32bits() || (totalBits > 64 - 7 - (LLFSELog + MLFSELog + OffFSELog))) ++ BIT_reloadDStream(&seqState->DStream); ++ ++ { ++ size_t const pos = seqState->pos + seq.litLength; ++ seq.match = seqState->base + pos - seq.offset; /* single memory segment */ ++ if (seq.offset > pos) ++ seq.match += seqState->gotoDict; /* separate memory segment */ ++ seqState->pos = pos + seq.matchLength; ++ } ++ ++ /* ANS state update */ ++ FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ ++ FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ ++ if (ZSTD_32bits()) ++ BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ ++ FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ ++ ++ return seq; ++} ++ ++static seq_t INIT ZSTD_decodeSequenceLong(seqState_t *seqState, unsigned const windowSize) ++{ ++ if (ZSTD_highbit32(windowSize) > STREAM_ACCUMULATOR_MIN) { ++ return ZSTD_decodeSequenceLong_generic(seqState, 1); ++ } else { ++ return ZSTD_decodeSequenceLong_generic(seqState, 0); ++ } ++} ++ ++FORCE_INLINE ++size_t INIT ZSTD_execSequenceLong(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, ++ const BYTE *const litLimit, const BYTE *const base, ++ const BYTE *const vBase, const BYTE *const dictEnd) ++{ ++ BYTE *const oLitEnd = op + sequence.litLength; ++ size_t const sequenceLength = sequence.litLength + sequence.matchLength; ++ BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ ++ BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH; ++ const BYTE *const iLitEnd = *litPtr + sequence.litLength; ++ const BYTE *match = sequence.match; ++ ++ /* check */ ++ if (oMatchEnd > oend) ++ return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ ++ if (iLitEnd > litLimit) ++ return ERROR(corruption_detected); /* over-read beyond lit buffer */ ++ if (oLitEnd > oend_w) ++ return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd); ++ ++ /* copy Literals */ ++ ZSTD_copy8(op, *litPtr); ++ if (sequence.litLength > 8) ++ ZSTD_wildcopy(op + 8, (*litPtr) + 8, ++ sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ ++ op = oLitEnd; ++ *litPtr = iLitEnd; /* update for next sequence */ ++ ++ /* copy Match */ ++ if (sequence.offset > (size_t)(oLitEnd - base)) { ++ /* offset beyond prefix */ ++ if (sequence.offset > (size_t)(oLitEnd - vBase)) ++ return ERROR(corruption_detected); ++ if (match + sequence.matchLength <= dictEnd) { ++ memmove(oLitEnd, match, sequence.matchLength); ++ return sequenceLength; ++ } ++ /* span extDict & currPrefixSegment */ ++ { ++ size_t const length1 = dictEnd - match; ++ memmove(oLitEnd, match, length1); ++ op = oLitEnd + length1; ++ sequence.matchLength -= length1; ++ match = base; ++ if (op > oend_w || sequence.matchLength < MINMATCH) { ++ U32 i; ++ for (i = 0; i < sequence.matchLength; ++i) ++ op[i] = match[i]; ++ return sequenceLength; ++ } ++ } ++ } ++ /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */ ++ ++ /* match within prefix */ ++ if (sequence.offset < 8) { ++ /* close range match, overlap */ ++ static const U32 dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */ ++ static const int dec64table[] = {8, 8, 8, 7, 8, 9, 10, 11}; /* subtracted */ ++ int const sub2 = dec64table[sequence.offset]; ++ op[0] = match[0]; ++ op[1] = match[1]; ++ op[2] = match[2]; ++ op[3] = match[3]; ++ match += dec32table[sequence.offset]; ++ ZSTD_copy4(op + 4, match); ++ match -= sub2; ++ } else { ++ ZSTD_copy8(op, match); ++ } ++ op += 8; ++ match += 8; ++ ++ if (oMatchEnd > oend - (16 - MINMATCH)) { ++ if (op < oend_w) { ++ ZSTD_wildcopy(op, match, oend_w - op); ++ match += oend_w - op; ++ op = oend_w; ++ } ++ while (op < oMatchEnd) ++ *op++ = *match++; ++ } else { ++ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8); /* works even if matchLength < 8 */ ++ } ++ return sequenceLength; ++} ++ ++static size_t INIT ZSTD_decompressSequencesLong(ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, const void *seqStart, size_t seqSize) ++{ ++ const BYTE *ip = (const BYTE *)seqStart; ++ const BYTE *const iend = ip + seqSize; ++ BYTE *const ostart = (BYTE * const)dst; ++ BYTE *const oend = ostart + maxDstSize; ++ BYTE *op = ostart; ++ const BYTE *litPtr = dctx->litPtr; ++ const BYTE *const litEnd = litPtr + dctx->litSize; ++ const BYTE *const base = (const BYTE *)(dctx->base); ++ const BYTE *const vBase = (const BYTE *)(dctx->vBase); ++ const BYTE *const dictEnd = (const BYTE *)(dctx->dictEnd); ++ unsigned const windowSize = dctx->fParams.windowSize; ++ int nbSeq; ++ ++ /* Build Decoding Tables */ ++ { ++ size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize); ++ if (ZSTD_isError(seqHSize)) ++ return seqHSize; ++ ip += seqHSize; ++ } ++ ++ /* Regen sequences */ ++ if (nbSeq) { ++#define STORED_SEQS 4 ++#define STOSEQ_MASK (STORED_SEQS - 1) ++#define ADVANCED_SEQS 4 ++ seq_t *sequences = (seq_t *)dctx->entropy.workspace; ++ int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); ++ seqState_t seqState; ++ int seqNb; ++ ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.workspace) >= sizeof(seq_t) * STORED_SEQS); ++ dctx->fseEntropy = 1; ++ { ++ U32 i; ++ for (i = 0; i < ZSTD_REP_NUM; i++) ++ seqState.prevOffset[i] = dctx->entropy.rep[i]; ++ } ++ seqState.base = base; ++ seqState.pos = (size_t)(op - base); ++ seqState.gotoDict = (uPtrDiff)dictEnd - (uPtrDiff)base; /* cast to avoid undefined behaviour */ ++ CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend - ip), corruption_detected); ++ FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); ++ FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); ++ FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); ++ ++ /* prepare in advance */ ++ for (seqNb = 0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb < seqAdvance; seqNb++) { ++ sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, windowSize); ++ } ++ if (seqNb < seqAdvance) ++ return ERROR(corruption_detected); ++ ++ /* decode and decompress */ ++ for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb < nbSeq; seqNb++) { ++ seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, windowSize); ++ size_t const oneSeqSize = ++ ZSTD_execSequenceLong(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd); ++ if (ZSTD_isError(oneSeqSize)) ++ return oneSeqSize; ++ ZSTD_PREFETCH(sequence.match); ++ sequences[seqNb & STOSEQ_MASK] = sequence; ++ op += oneSeqSize; ++ } ++ if (seqNb < nbSeq) ++ return ERROR(corruption_detected); ++ ++ /* finish queue */ ++ seqNb -= seqAdvance; ++ for (; seqNb < nbSeq; seqNb++) { ++ size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd); ++ if (ZSTD_isError(oneSeqSize)) ++ return oneSeqSize; ++ op += oneSeqSize; ++ } ++ ++ /* save reps for next block */ ++ { ++ U32 i; ++ for (i = 0; i < ZSTD_REP_NUM; i++) ++ dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); ++ } ++ } ++ ++ /* last literal segment */ ++ { ++ size_t const lastLLSize = litEnd - litPtr; ++ if (lastLLSize > (size_t)(oend - op)) ++ return ERROR(dstSize_tooSmall); ++ memcpy(op, litPtr, lastLLSize); ++ op += lastLLSize; ++ } ++ ++ return op - ostart; ++} ++ ++static size_t INIT ZSTD_decompressBlock_internal(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) ++{ /* blockType == blockCompressed */ ++ const BYTE *ip = (const BYTE *)src; ++ ++ if (srcSize >= ZSTD_BLOCKSIZE_ABSOLUTEMAX) ++ return ERROR(srcSize_wrong); ++ ++ /* Decode literals section */ ++ { ++ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); ++ if (ZSTD_isError(litCSize)) ++ return litCSize; ++ ip += litCSize; ++ srcSize -= litCSize; ++ } ++ if (sizeof(size_t) > 4) /* do not enable prefetching on 32-bits x86, as it's performance detrimental */ ++ /* likely because of register pressure */ ++ /* if that's the correct cause, then 32-bits ARM should be affected differently */ ++ /* it would be good to test this on ARM real hardware, to see if prefetch version improves speed */ ++ if (dctx->fParams.windowSize > (1 << 23)) ++ return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize); ++ return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize); ++} ++ ++static void INIT ZSTD_checkContinuity(ZSTD_DCtx *dctx, const void *dst) ++{ ++ if (dst != dctx->previousDstEnd) { /* not contiguous */ ++ dctx->dictEnd = dctx->previousDstEnd; ++ dctx->vBase = (const char *)dst - ((const char *)(dctx->previousDstEnd) - (const char *)(dctx->base)); ++ dctx->base = dst; ++ dctx->previousDstEnd = dst; ++ } ++} ++ ++size_t INIT ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) ++{ ++ size_t dSize; ++ ZSTD_checkContinuity(dctx, dst); ++ dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); ++ dctx->previousDstEnd = (char *)dst + dSize; ++ return dSize; ++} ++ ++/** ZSTD_insertBlock() : ++ insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ ++size_t INIT ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart, size_t blockSize) ++{ ++ ZSTD_checkContinuity(dctx, blockStart); ++ dctx->previousDstEnd = (const char *)blockStart + blockSize; ++ return blockSize; ++} ++ ++size_t INIT ZSTD_generateNxBytes(void *dst, size_t dstCapacity, BYTE byte, size_t length) ++{ ++ if (length > dstCapacity) ++ return ERROR(dstSize_tooSmall); ++ memset(dst, byte, length); ++ return length; ++} ++ ++/** ZSTD_findFrameCompressedSize() : ++ * compatible with legacy mode ++ * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame ++ * `srcSize` must be at least as large as the frame contained ++ * @return : the compressed size of the frame starting at `src` */ ++size_t INIT ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) ++{ ++ if (srcSize >= ZSTD_skippableHeaderSize && (ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { ++ return ZSTD_skippableHeaderSize + ZSTD_readLE32((const BYTE *)src + 4); ++ } else { ++ const BYTE *ip = (const BYTE *)src; ++ const BYTE *const ipstart = ip; ++ size_t remainingSize = srcSize; ++ ZSTD_frameParams fParams; ++ ++ size_t const headerSize = ZSTD_frameHeaderSize(ip, remainingSize); ++ if (ZSTD_isError(headerSize)) ++ return headerSize; ++ ++ /* Frame Header */ ++ { ++ size_t const ret = ZSTD_getFrameParams(&fParams, ip, remainingSize); ++ if (ZSTD_isError(ret)) ++ return ret; ++ if (ret > 0) ++ return ERROR(srcSize_wrong); ++ } ++ ++ ip += headerSize; ++ remainingSize -= headerSize; ++ ++ /* Loop on each block */ ++ while (1) { ++ blockProperties_t blockProperties; ++ size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); ++ if (ZSTD_isError(cBlockSize)) ++ return cBlockSize; ++ ++ if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) ++ return ERROR(srcSize_wrong); ++ ++ ip += ZSTD_blockHeaderSize + cBlockSize; ++ remainingSize -= ZSTD_blockHeaderSize + cBlockSize; ++ ++ if (blockProperties.lastBlock) ++ break; ++ } ++ ++ if (fParams.checksumFlag) { /* Frame content checksum */ ++ if (remainingSize < 4) ++ return ERROR(srcSize_wrong); ++ ip += 4; ++ remainingSize -= 4; ++ } ++ ++ return ip - ipstart; ++ } ++} ++ ++/*! ZSTD_decompressFrame() : ++* @dctx must be properly initialized */ ++static size_t INIT ZSTD_decompressFrame(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void **srcPtr, size_t *srcSizePtr) ++{ ++ const BYTE *ip = (const BYTE *)(*srcPtr); ++ BYTE *const ostart = (BYTE * const)dst; ++ BYTE *const oend = ostart + dstCapacity; ++ BYTE *op = ostart; ++ size_t remainingSize = *srcSizePtr; ++ ++ /* check */ ++ if (remainingSize < ZSTD_frameHeaderSize_min + ZSTD_blockHeaderSize) ++ return ERROR(srcSize_wrong); ++ ++ /* Frame Header */ ++ { ++ size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_frameHeaderSize_prefix); ++ if (ZSTD_isError(frameHeaderSize)) ++ return frameHeaderSize; ++ if (remainingSize < frameHeaderSize + ZSTD_blockHeaderSize) ++ return ERROR(srcSize_wrong); ++ CHECK_F(ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize)); ++ ip += frameHeaderSize; ++ remainingSize -= frameHeaderSize; ++ } ++ ++ /* Loop on each block */ ++ while (1) { ++ size_t decodedSize; ++ blockProperties_t blockProperties; ++ size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); ++ if (ZSTD_isError(cBlockSize)) ++ return cBlockSize; ++ ++ ip += ZSTD_blockHeaderSize; ++ remainingSize -= ZSTD_blockHeaderSize; ++ if (cBlockSize > remainingSize) ++ return ERROR(srcSize_wrong); ++ ++ switch (blockProperties.blockType) { ++ case bt_compressed: decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend - op, ip, cBlockSize); break; ++ case bt_raw: decodedSize = ZSTD_copyRawBlock(op, oend - op, ip, cBlockSize); break; ++ case bt_rle: decodedSize = ZSTD_generateNxBytes(op, oend - op, *ip, blockProperties.origSize); break; ++ case bt_reserved: ++ default: return ERROR(corruption_detected); ++ } ++ ++ if (ZSTD_isError(decodedSize)) ++ return decodedSize; ++ if (dctx->fParams.checksumFlag) ++ xxh64_update(&dctx->xxhState, op, decodedSize); ++ op += decodedSize; ++ ip += cBlockSize; ++ remainingSize -= cBlockSize; ++ if (blockProperties.lastBlock) ++ break; ++ } ++ ++ if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ ++ U32 const checkCalc = (U32)xxh64_digest(&dctx->xxhState); ++ U32 checkRead; ++ if (remainingSize < 4) ++ return ERROR(checksum_wrong); ++ checkRead = ZSTD_readLE32(ip); ++ if (checkRead != checkCalc) ++ return ERROR(checksum_wrong); ++ ip += 4; ++ remainingSize -= 4; ++ } ++ ++ /* Allow caller to get size read */ ++ *srcPtr = ip; ++ *srcSizePtr = remainingSize; ++ return op - ostart; ++} ++ ++static const void *ZSTD_DDictDictContent(const ZSTD_DDict *ddict); ++static size_t ZSTD_DDictDictSize(const ZSTD_DDict *ddict); ++ ++static size_t INIT ZSTD_decompressMultiFrame(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize, ++ const ZSTD_DDict *ddict) ++{ ++ void *const dststart = dst; ++ ++ if (ddict) { ++ if (dict) { ++ /* programmer error, these two cases should be mutually exclusive */ ++ return ERROR(GENERIC); ++ } ++ ++ dict = ZSTD_DDictDictContent(ddict); ++ dictSize = ZSTD_DDictDictSize(ddict); ++ } ++ ++ while (srcSize >= ZSTD_frameHeaderSize_prefix) { ++ U32 magicNumber; ++ ++ magicNumber = ZSTD_readLE32(src); ++ if (magicNumber != ZSTD_MAGICNUMBER) { ++ if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { ++ size_t skippableSize; ++ if (srcSize < ZSTD_skippableHeaderSize) ++ return ERROR(srcSize_wrong); ++ skippableSize = ZSTD_readLE32((const BYTE *)src + 4) + ZSTD_skippableHeaderSize; ++ if (srcSize < skippableSize) { ++ return ERROR(srcSize_wrong); ++ } ++ ++ src = (const BYTE *)src + skippableSize; ++ srcSize -= skippableSize; ++ continue; ++ } else { ++ return ERROR(prefix_unknown); ++ } ++ } ++ ++ if (ddict) { ++ /* we were called from ZSTD_decompress_usingDDict */ ++ ZSTD_refDDict(dctx, ddict); ++ } else { ++ /* this will initialize correctly with no dict if dict == NULL, so ++ * use this in all cases but ddict */ ++ CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize)); ++ } ++ ZSTD_checkContinuity(dctx, dst); ++ ++ { ++ const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, &src, &srcSize); ++ if (ZSTD_isError(res)) ++ return res; ++ /* don't need to bounds check this, ZSTD_decompressFrame will have ++ * already */ ++ dst = (BYTE *)dst + res; ++ dstCapacity -= res; ++ } ++ } ++ ++ if (srcSize) ++ return ERROR(srcSize_wrong); /* input not entirely consumed */ ++ ++ return (BYTE *)dst - (BYTE *)dststart; ++} ++ ++size_t INIT ZSTD_decompress_usingDict(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize) ++{ ++ return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL); ++} ++ ++size_t INIT ZSTD_decompressDCtx(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) ++{ ++ return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0); ++} ++ ++/*-************************************** ++* Advanced Streaming Decompression API ++* Bufferless and synchronous ++****************************************/ ++size_t INIT ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx) { return dctx->expected; } ++ ++ZSTD_nextInputType_e INIT ZSTD_nextInputType(ZSTD_DCtx *dctx) ++{ ++ switch (dctx->stage) { ++ default: /* should not happen */ ++ case ZSTDds_getFrameHeaderSize: ++ case ZSTDds_decodeFrameHeader: return ZSTDnit_frameHeader; ++ case ZSTDds_decodeBlockHeader: return ZSTDnit_blockHeader; ++ case ZSTDds_decompressBlock: return ZSTDnit_block; ++ case ZSTDds_decompressLastBlock: return ZSTDnit_lastBlock; ++ case ZSTDds_checkChecksum: return ZSTDnit_checksum; ++ case ZSTDds_decodeSkippableHeader: ++ case ZSTDds_skipFrame: return ZSTDnit_skippableFrame; ++ } ++} ++ ++int INIT ZSTD_isSkipFrame(ZSTD_DCtx *dctx) { return dctx->stage == ZSTDds_skipFrame; } /* for zbuff */ ++ ++/** ZSTD_decompressContinue() : ++* @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) ++* or an error code, which can be tested using ZSTD_isError() */ ++size_t INIT ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) ++{ ++ /* Sanity check */ ++ if (srcSize != dctx->expected) ++ return ERROR(srcSize_wrong); ++ if (dstCapacity) ++ ZSTD_checkContinuity(dctx, dst); ++ ++ switch (dctx->stage) { ++ case ZSTDds_getFrameHeaderSize: ++ if (srcSize != ZSTD_frameHeaderSize_prefix) ++ return ERROR(srcSize_wrong); /* impossible */ ++ if ((ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ ++ memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix); ++ dctx->expected = ZSTD_skippableHeaderSize - ZSTD_frameHeaderSize_prefix; /* magic number + skippable frame length */ ++ dctx->stage = ZSTDds_decodeSkippableHeader; ++ return 0; ++ } ++ dctx->headerSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_prefix); ++ if (ZSTD_isError(dctx->headerSize)) ++ return dctx->headerSize; ++ memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix); ++ if (dctx->headerSize > ZSTD_frameHeaderSize_prefix) { ++ dctx->expected = dctx->headerSize - ZSTD_frameHeaderSize_prefix; ++ dctx->stage = ZSTDds_decodeFrameHeader; ++ return 0; ++ } ++ dctx->expected = 0; /* not necessary to copy more */ ++ /* fallthrough */ ++ ++ case ZSTDds_decodeFrameHeader: ++ memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected); ++ CHECK_F(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize)); ++ dctx->expected = ZSTD_blockHeaderSize; ++ dctx->stage = ZSTDds_decodeBlockHeader; ++ return 0; ++ ++ case ZSTDds_decodeBlockHeader: { ++ blockProperties_t bp; ++ size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); ++ if (ZSTD_isError(cBlockSize)) ++ return cBlockSize; ++ dctx->expected = cBlockSize; ++ dctx->bType = bp.blockType; ++ dctx->rleSize = bp.origSize; ++ if (cBlockSize) { ++ dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock; ++ return 0; ++ } ++ /* empty block */ ++ if (bp.lastBlock) { ++ if (dctx->fParams.checksumFlag) { ++ dctx->expected = 4; ++ dctx->stage = ZSTDds_checkChecksum; ++ } else { ++ dctx->expected = 0; /* end of frame */ ++ dctx->stage = ZSTDds_getFrameHeaderSize; ++ } ++ } else { ++ dctx->expected = 3; /* go directly to next header */ ++ dctx->stage = ZSTDds_decodeBlockHeader; ++ } ++ return 0; ++ } ++ case ZSTDds_decompressLastBlock: ++ case ZSTDds_decompressBlock: { ++ size_t rSize; ++ switch (dctx->bType) { ++ case bt_compressed: rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); break; ++ case bt_raw: rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); break; ++ case bt_rle: rSize = ZSTD_setRleBlock(dst, dstCapacity, src, srcSize, dctx->rleSize); break; ++ case bt_reserved: /* should never happen */ ++ default: return ERROR(corruption_detected); ++ } ++ if (ZSTD_isError(rSize)) ++ return rSize; ++ if (dctx->fParams.checksumFlag) ++ xxh64_update(&dctx->xxhState, dst, rSize); ++ ++ if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ ++ if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ ++ dctx->expected = 4; ++ dctx->stage = ZSTDds_checkChecksum; ++ } else { ++ dctx->expected = 0; /* ends here */ ++ dctx->stage = ZSTDds_getFrameHeaderSize; ++ } ++ } else { ++ dctx->stage = ZSTDds_decodeBlockHeader; ++ dctx->expected = ZSTD_blockHeaderSize; ++ dctx->previousDstEnd = (char *)dst + rSize; ++ } ++ return rSize; ++ } ++ case ZSTDds_checkChecksum: { ++ U32 const h32 = (U32)xxh64_digest(&dctx->xxhState); ++ U32 const check32 = ZSTD_readLE32(src); /* srcSize == 4, guaranteed by dctx->expected */ ++ if (check32 != h32) ++ return ERROR(checksum_wrong); ++ dctx->expected = 0; ++ dctx->stage = ZSTDds_getFrameHeaderSize; ++ return 0; ++ } ++ case ZSTDds_decodeSkippableHeader: { ++ memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected); ++ dctx->expected = ZSTD_readLE32(dctx->headerBuffer + 4); ++ dctx->stage = ZSTDds_skipFrame; ++ return 0; ++ } ++ case ZSTDds_skipFrame: { ++ dctx->expected = 0; ++ dctx->stage = ZSTDds_getFrameHeaderSize; ++ return 0; ++ } ++ default: ++ return ERROR(GENERIC); /* impossible */ ++ } ++} ++ ++static size_t INIT ZSTD_refDictContent(ZSTD_DCtx *dctx, const void *dict, size_t dictSize) ++{ ++ dctx->dictEnd = dctx->previousDstEnd; ++ dctx->vBase = (const char *)dict - ((const char *)(dctx->previousDstEnd) - (const char *)(dctx->base)); ++ dctx->base = dict; ++ dctx->previousDstEnd = (const char *)dict + dictSize; ++ return 0; ++} ++ ++/* ZSTD_loadEntropy() : ++ * dict : must point at beginning of a valid zstd dictionary ++ * @return : size of entropy tables read */ ++static size_t INIT ZSTD_loadEntropy(ZSTD_entropyTables_t *entropy, const void *const dict, size_t const dictSize) ++{ ++ const BYTE *dictPtr = (const BYTE *)dict; ++ const BYTE *const dictEnd = dictPtr + dictSize; ++ ++ if (dictSize <= 8) ++ return ERROR(dictionary_corrupted); ++ dictPtr += 8; /* skip header = magic + dictID */ ++ ++ { ++ size_t const hSize = HUF_readDTableX4_wksp(entropy->hufTable, dictPtr, dictEnd - dictPtr, entropy->workspace, sizeof(entropy->workspace)); ++ if (HUF_isError(hSize)) ++ return ERROR(dictionary_corrupted); ++ dictPtr += hSize; ++ } ++ ++ { ++ short offcodeNCount[MaxOff + 1]; ++ U32 offcodeMaxValue = MaxOff, offcodeLog; ++ size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd - dictPtr); ++ if (FSE_isError(offcodeHeaderSize)) ++ return ERROR(dictionary_corrupted); ++ if (offcodeLog > OffFSELog) ++ return ERROR(dictionary_corrupted); ++ CHECK_E(FSE_buildDTable_wksp(entropy->OFTable, offcodeNCount, offcodeMaxValue, offcodeLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted); ++ dictPtr += offcodeHeaderSize; ++ } ++ ++ { ++ short matchlengthNCount[MaxML + 1]; ++ unsigned matchlengthMaxValue = MaxML, matchlengthLog; ++ size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd - dictPtr); ++ if (FSE_isError(matchlengthHeaderSize)) ++ return ERROR(dictionary_corrupted); ++ if (matchlengthLog > MLFSELog) ++ return ERROR(dictionary_corrupted); ++ CHECK_E(FSE_buildDTable_wksp(entropy->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted); ++ dictPtr += matchlengthHeaderSize; ++ } ++ ++ { ++ short litlengthNCount[MaxLL + 1]; ++ unsigned litlengthMaxValue = MaxLL, litlengthLog; ++ size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd - dictPtr); ++ if (FSE_isError(litlengthHeaderSize)) ++ return ERROR(dictionary_corrupted); ++ if (litlengthLog > LLFSELog) ++ return ERROR(dictionary_corrupted); ++ CHECK_E(FSE_buildDTable_wksp(entropy->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted); ++ dictPtr += litlengthHeaderSize; ++ } ++ ++ if (dictPtr + 12 > dictEnd) ++ return ERROR(dictionary_corrupted); ++ { ++ int i; ++ size_t const dictContentSize = (size_t)(dictEnd - (dictPtr + 12)); ++ for (i = 0; i < 3; i++) { ++ U32 const rep = ZSTD_readLE32(dictPtr); ++ dictPtr += 4; ++ if (rep == 0 || rep >= dictContentSize) ++ return ERROR(dictionary_corrupted); ++ entropy->rep[i] = rep; ++ } ++ } ++ ++ return dictPtr - (const BYTE *)dict; ++} ++ ++static size_t INIT ZSTD_decompress_insertDictionary(ZSTD_DCtx *dctx, const void *dict, size_t dictSize) ++{ ++ if (dictSize < 8) ++ return ZSTD_refDictContent(dctx, dict, dictSize); ++ { ++ U32 const magic = ZSTD_readLE32(dict); ++ if (magic != ZSTD_DICT_MAGIC) { ++ return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ ++ } ++ } ++ dctx->dictID = ZSTD_readLE32((const char *)dict + 4); ++ ++ /* load entropy tables */ ++ { ++ size_t const eSize = ZSTD_loadEntropy(&dctx->entropy, dict, dictSize); ++ if (ZSTD_isError(eSize)) ++ return ERROR(dictionary_corrupted); ++ dict = (const char *)dict + eSize; ++ dictSize -= eSize; ++ } ++ dctx->litEntropy = dctx->fseEntropy = 1; ++ ++ /* reference dictionary content */ ++ return ZSTD_refDictContent(dctx, dict, dictSize); ++} ++ ++size_t INIT ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict, size_t dictSize) ++{ ++ CHECK_F(ZSTD_decompressBegin(dctx)); ++ if (dict && dictSize) ++ CHECK_E(ZSTD_decompress_insertDictionary(dctx, dict, dictSize), dictionary_corrupted); ++ return 0; ++} ++ ++/* ====== ZSTD_DDict ====== */ ++ ++struct ZSTD_DDict_s { ++ void *dictBuffer; ++ const void *dictContent; ++ size_t dictSize; ++ ZSTD_entropyTables_t entropy; ++ U32 dictID; ++ U32 entropyPresent; ++ ZSTD_customMem cMem; ++}; /* typedef'd to ZSTD_DDict within "zstd.h" */ ++ ++size_t INIT ZSTD_DDictWorkspaceBound(void) { return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_DDict)); } ++ ++static const void *INIT ZSTD_DDictDictContent(const ZSTD_DDict *ddict) { return ddict->dictContent; } ++ ++static size_t INIT ZSTD_DDictDictSize(const ZSTD_DDict *ddict) { return ddict->dictSize; } ++ ++static void INIT ZSTD_refDDict(ZSTD_DCtx *dstDCtx, const ZSTD_DDict *ddict) ++{ ++ ZSTD_decompressBegin(dstDCtx); /* init */ ++ if (ddict) { /* support refDDict on NULL */ ++ dstDCtx->dictID = ddict->dictID; ++ dstDCtx->base = ddict->dictContent; ++ dstDCtx->vBase = ddict->dictContent; ++ dstDCtx->dictEnd = (const BYTE *)ddict->dictContent + ddict->dictSize; ++ dstDCtx->previousDstEnd = dstDCtx->dictEnd; ++ if (ddict->entropyPresent) { ++ dstDCtx->litEntropy = 1; ++ dstDCtx->fseEntropy = 1; ++ dstDCtx->LLTptr = ddict->entropy.LLTable; ++ dstDCtx->MLTptr = ddict->entropy.MLTable; ++ dstDCtx->OFTptr = ddict->entropy.OFTable; ++ dstDCtx->HUFptr = ddict->entropy.hufTable; ++ dstDCtx->entropy.rep[0] = ddict->entropy.rep[0]; ++ dstDCtx->entropy.rep[1] = ddict->entropy.rep[1]; ++ dstDCtx->entropy.rep[2] = ddict->entropy.rep[2]; ++ } else { ++ dstDCtx->litEntropy = 0; ++ dstDCtx->fseEntropy = 0; ++ } ++ } ++} ++ ++static size_t INIT ZSTD_loadEntropy_inDDict(ZSTD_DDict *ddict) ++{ ++ ddict->dictID = 0; ++ ddict->entropyPresent = 0; ++ if (ddict->dictSize < 8) ++ return 0; ++ { ++ U32 const magic = ZSTD_readLE32(ddict->dictContent); ++ if (magic != ZSTD_DICT_MAGIC) ++ return 0; /* pure content mode */ ++ } ++ ddict->dictID = ZSTD_readLE32((const char *)ddict->dictContent + 4); ++ ++ /* load entropy tables */ ++ CHECK_E(ZSTD_loadEntropy(&ddict->entropy, ddict->dictContent, ddict->dictSize), dictionary_corrupted); ++ ddict->entropyPresent = 1; ++ return 0; ++} ++ ++static ZSTD_DDict *INIT ZSTD_createDDict_advanced(const void *dict, size_t dictSize, unsigned byReference, ZSTD_customMem customMem) ++{ ++ if (!customMem.customAlloc || !customMem.customFree) ++ return NULL; ++ ++ { ++ ZSTD_DDict *const ddict = (ZSTD_DDict *)ZSTD_malloc(sizeof(ZSTD_DDict), customMem); ++ if (!ddict) ++ return NULL; ++ ddict->cMem = customMem; ++ ++ if ((byReference) || (!dict) || (!dictSize)) { ++ ddict->dictBuffer = NULL; ++ ddict->dictContent = dict; ++ } else { ++ void *const internalBuffer = ZSTD_malloc(dictSize, customMem); ++ if (!internalBuffer) { ++ ZSTD_freeDDict(ddict); ++ return NULL; ++ } ++ memcpy(internalBuffer, dict, dictSize); ++ ddict->dictBuffer = internalBuffer; ++ ddict->dictContent = internalBuffer; ++ } ++ ddict->dictSize = dictSize; ++ ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ ++ /* parse dictionary content */ ++ { ++ size_t const errorCode = ZSTD_loadEntropy_inDDict(ddict); ++ if (ZSTD_isError(errorCode)) { ++ ZSTD_freeDDict(ddict); ++ return NULL; ++ } ++ } ++ ++ return ddict; ++ } ++} ++ ++/*! ZSTD_initDDict() : ++* Create a digested dictionary, to start decompression without startup delay. ++* `dict` content is copied inside DDict. ++* Consequently, `dict` can be released after `ZSTD_DDict` creation */ ++ZSTD_DDict *INIT ZSTD_initDDict(const void *dict, size_t dictSize, void *workspace, size_t workspaceSize) ++{ ++ ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize); ++ return ZSTD_createDDict_advanced(dict, dictSize, 1, stackMem); ++} ++ ++size_t INIT ZSTD_freeDDict(ZSTD_DDict *ddict) ++{ ++ if (ddict == NULL) ++ return 0; /* support free on NULL */ ++ { ++ ZSTD_customMem const cMem = ddict->cMem; ++ ZSTD_free(ddict->dictBuffer, cMem); ++ ZSTD_free(ddict, cMem); ++ return 0; ++ } ++} ++ ++/*! ZSTD_getDictID_fromDict() : ++ * Provides the dictID stored within dictionary. ++ * if @return == 0, the dictionary is not conformant with Zstandard specification. ++ * It can still be loaded, but as a content-only dictionary. */ ++unsigned INIT ZSTD_getDictID_fromDict(const void *dict, size_t dictSize) ++{ ++ if (dictSize < 8) ++ return 0; ++ if (ZSTD_readLE32(dict) != ZSTD_DICT_MAGIC) ++ return 0; ++ return ZSTD_readLE32((const char *)dict + 4); ++} ++ ++/*! ZSTD_getDictID_fromDDict() : ++ * Provides the dictID of the dictionary loaded into `ddict`. ++ * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. ++ * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ ++unsigned INIT ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict) ++{ ++ if (ddict == NULL) ++ return 0; ++ return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); ++} ++ ++/*! ZSTD_getDictID_fromFrame() : ++ * Provides the dictID required to decompressed the frame stored within `src`. ++ * If @return == 0, the dictID could not be decoded. ++ * This could for one of the following reasons : ++ * - The frame does not require a dictionary to be decoded (most common case). ++ * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. ++ * Note : this use case also happens when using a non-conformant dictionary. ++ * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). ++ * - This is not a Zstandard frame. ++ * When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */ ++unsigned INIT ZSTD_getDictID_fromFrame(const void *src, size_t srcSize) ++{ ++ ZSTD_frameParams zfp = {0, 0, 0, 0}; ++ size_t const hError = ZSTD_getFrameParams(&zfp, src, srcSize); ++ if (ZSTD_isError(hError)) ++ return 0; ++ return zfp.dictID; ++} ++ ++/*! ZSTD_decompress_usingDDict() : ++* Decompression using a pre-digested Dictionary ++* Use dictionary without significant overhead. */ ++size_t INIT ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const ZSTD_DDict *ddict) ++{ ++ /* pass content and size in case legacy frames are encountered */ ++ return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, NULL, 0, ddict); ++} ++ ++/*===================================== ++* Streaming decompression ++*====================================*/ ++ ++typedef enum { zdss_init, zdss_loadHeader, zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; ++ ++/* *** Resource management *** */ ++struct ZSTD_DStream_s { ++ ZSTD_DCtx *dctx; ++ ZSTD_DDict *ddictLocal; ++ const ZSTD_DDict *ddict; ++ ZSTD_frameParams fParams; ++ ZSTD_dStreamStage stage; ++ char *inBuff; ++ size_t inBuffSize; ++ size_t inPos; ++ size_t maxWindowSize; ++ char *outBuff; ++ size_t outBuffSize; ++ size_t outStart; ++ size_t outEnd; ++ size_t blockSize; ++ BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; /* tmp buffer to store frame header */ ++ size_t lhSize; ++ ZSTD_customMem customMem; ++ void *legacyContext; ++ U32 previousLegacyVersion; ++ U32 legacyVersion; ++ U32 hostageByte; ++}; /* typedef'd to ZSTD_DStream within "zstd.h" */ ++ ++size_t INIT ZSTD_DStreamWorkspaceBound(size_t maxWindowSize) ++{ ++ size_t const blockSize = MIN(maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); ++ size_t const inBuffSize = blockSize; ++ size_t const outBuffSize = maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2; ++ return ZSTD_DCtxWorkspaceBound() + ZSTD_ALIGN(sizeof(ZSTD_DStream)) + ZSTD_ALIGN(inBuffSize) + ZSTD_ALIGN(outBuffSize); ++} ++ ++static ZSTD_DStream *INIT ZSTD_createDStream_advanced(ZSTD_customMem customMem) ++{ ++ ZSTD_DStream *zds; ++ ++ if (!customMem.customAlloc || !customMem.customFree) ++ return NULL; ++ ++ zds = (ZSTD_DStream *)ZSTD_malloc(sizeof(ZSTD_DStream), customMem); ++ if (zds == NULL) ++ return NULL; ++ memset(zds, 0, sizeof(ZSTD_DStream)); ++ memcpy(&zds->customMem, &customMem, sizeof(ZSTD_customMem)); ++ zds->dctx = ZSTD_createDCtx_advanced(customMem); ++ if (zds->dctx == NULL) { ++ ZSTD_freeDStream(zds); ++ return NULL; ++ } ++ zds->stage = zdss_init; ++ zds->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; ++ return zds; ++} ++ ++ZSTD_DStream *INIT ZSTD_initDStream(size_t maxWindowSize, void *workspace, size_t workspaceSize) ++{ ++ ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize); ++ ZSTD_DStream *zds = ZSTD_createDStream_advanced(stackMem); ++ if (!zds) { ++ return NULL; ++ } ++ ++ zds->maxWindowSize = maxWindowSize; ++ zds->stage = zdss_loadHeader; ++ zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; ++ ZSTD_freeDDict(zds->ddictLocal); ++ zds->ddictLocal = NULL; ++ zds->ddict = zds->ddictLocal; ++ zds->legacyVersion = 0; ++ zds->hostageByte = 0; ++ ++ { ++ size_t const blockSize = MIN(zds->maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); ++ size_t const neededOutSize = zds->maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2; ++ ++ zds->inBuff = (char *)ZSTD_malloc(blockSize, zds->customMem); ++ zds->inBuffSize = blockSize; ++ zds->outBuff = (char *)ZSTD_malloc(neededOutSize, zds->customMem); ++ zds->outBuffSize = neededOutSize; ++ if (zds->inBuff == NULL || zds->outBuff == NULL) { ++ ZSTD_freeDStream(zds); ++ return NULL; ++ } ++ } ++ return zds; ++} ++ ++ZSTD_DStream *INIT ZSTD_initDStream_usingDDict(size_t maxWindowSize, const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize) ++{ ++ ZSTD_DStream *zds = ZSTD_initDStream(maxWindowSize, workspace, workspaceSize); ++ if (zds) { ++ zds->ddict = ddict; ++ } ++ return zds; ++} ++ ++size_t INIT ZSTD_freeDStream(ZSTD_DStream *zds) ++{ ++ if (zds == NULL) ++ return 0; /* support free on null */ ++ { ++ ZSTD_customMem const cMem = zds->customMem; ++ ZSTD_freeDCtx(zds->dctx); ++ zds->dctx = NULL; ++ ZSTD_freeDDict(zds->ddictLocal); ++ zds->ddictLocal = NULL; ++ ZSTD_free(zds->inBuff, cMem); ++ zds->inBuff = NULL; ++ ZSTD_free(zds->outBuff, cMem); ++ zds->outBuff = NULL; ++ ZSTD_free(zds, cMem); ++ return 0; ++ } ++} ++ ++/* *** Initialization *** */ ++ ++size_t INIT ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX + ZSTD_blockHeaderSize; } ++size_t INIT ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; } ++ ++size_t INIT ZSTD_resetDStream(ZSTD_DStream *zds) ++{ ++ zds->stage = zdss_loadHeader; ++ zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; ++ zds->legacyVersion = 0; ++ zds->hostageByte = 0; ++ return ZSTD_frameHeaderSize_prefix; ++} ++ ++/* ***** Decompression ***** */ ++ ++ZSTD_STATIC size_t INIT ZSTD_limitCopy(void *dst, size_t dstCapacity, const void *src, size_t srcSize) ++{ ++ size_t const length = MIN(dstCapacity, srcSize); ++ memcpy(dst, src, length); ++ return length; ++} ++ ++size_t INIT ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inBuffer *input) ++{ ++ const char *const istart = (const char *)(input->src) + input->pos; ++ const char *const iend = (const char *)(input->src) + input->size; ++ const char *ip = istart; ++ char *const ostart = (char *)(output->dst) + output->pos; ++ char *const oend = (char *)(output->dst) + output->size; ++ char *op = ostart; ++ U32 someMoreWork = 1; ++ ++ while (someMoreWork) { ++ switch (zds->stage) { ++ case zdss_init: ++ ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */ ++ /* fallthrough */ ++ ++ case zdss_loadHeader: { ++ size_t const hSize = ZSTD_getFrameParams(&zds->fParams, zds->headerBuffer, zds->lhSize); ++ if (ZSTD_isError(hSize)) ++ return hSize; ++ if (hSize != 0) { /* need more input */ ++ size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */ ++ if (toLoad > (size_t)(iend - ip)) { /* not enough input to load full header */ ++ memcpy(zds->headerBuffer + zds->lhSize, ip, iend - ip); ++ zds->lhSize += iend - ip; ++ input->pos = input->size; ++ return (MAX(ZSTD_frameHeaderSize_min, hSize) - zds->lhSize) + ++ ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ ++ } ++ memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); ++ zds->lhSize = hSize; ++ ip += toLoad; ++ break; ++ } ++ ++ /* check for single-pass mode opportunity */ ++ if (zds->fParams.frameContentSize && zds->fParams.windowSize /* skippable frame if == 0 */ ++ && (U64)(size_t)(oend - op) >= zds->fParams.frameContentSize) { ++ size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend - istart); ++ if (cSize <= (size_t)(iend - istart)) { ++ size_t const decompressedSize = ZSTD_decompress_usingDDict(zds->dctx, op, oend - op, istart, cSize, zds->ddict); ++ if (ZSTD_isError(decompressedSize)) ++ return decompressedSize; ++ ip = istart + cSize; ++ op += decompressedSize; ++ zds->dctx->expected = 0; ++ zds->stage = zdss_init; ++ someMoreWork = 0; ++ break; ++ } ++ } ++ ++ /* Consume header */ ++ ZSTD_refDDict(zds->dctx, zds->ddict); ++ { ++ size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zds->dctx); /* == ZSTD_frameHeaderSize_prefix */ ++ CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer, h1Size)); ++ { ++ size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zds->dctx); ++ CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer + h1Size, h2Size)); ++ } ++ } ++ ++ zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); ++ if (zds->fParams.windowSize > zds->maxWindowSize) ++ return ERROR(frameParameter_windowTooLarge); ++ ++ /* Buffers are preallocated, but double check */ ++ { ++ size_t const blockSize = MIN(zds->maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); ++ size_t const neededOutSize = zds->maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2; ++ if (zds->inBuffSize < blockSize) { ++ return ERROR(GENERIC); ++ } ++ if (zds->outBuffSize < neededOutSize) { ++ return ERROR(GENERIC); ++ } ++ zds->blockSize = blockSize; ++ } ++ zds->stage = zdss_read; ++ } ++ /* fallthrough */ ++ ++ case zdss_read: { ++ size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx); ++ if (neededInSize == 0) { /* end of frame */ ++ zds->stage = zdss_init; ++ someMoreWork = 0; ++ break; ++ } ++ if ((size_t)(iend - ip) >= neededInSize) { /* decode directly from src */ ++ const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx); ++ size_t const decodedSize = ZSTD_decompressContinue(zds->dctx, zds->outBuff + zds->outStart, ++ (isSkipFrame ? 0 : zds->outBuffSize - zds->outStart), ip, neededInSize); ++ if (ZSTD_isError(decodedSize)) ++ return decodedSize; ++ ip += neededInSize; ++ if (!decodedSize && !isSkipFrame) ++ break; /* this was just a header */ ++ zds->outEnd = zds->outStart + decodedSize; ++ zds->stage = zdss_flush; ++ break; ++ } ++ if (ip == iend) { ++ someMoreWork = 0; ++ break; ++ } /* no more input */ ++ zds->stage = zdss_load; ++ /* pass-through */ ++ } ++ /* fallthrough */ ++ ++ case zdss_load: { ++ size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx); ++ size_t const toLoad = neededInSize - zds->inPos; /* should always be <= remaining space within inBuff */ ++ size_t loadedSize; ++ if (toLoad > zds->inBuffSize - zds->inPos) ++ return ERROR(corruption_detected); /* should never happen */ ++ loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend - ip); ++ ip += loadedSize; ++ zds->inPos += loadedSize; ++ if (loadedSize < toLoad) { ++ someMoreWork = 0; ++ break; ++ } /* not enough input, wait for more */ ++ ++ /* decode loaded input */ ++ { ++ const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx); ++ size_t const decodedSize = ZSTD_decompressContinue(zds->dctx, zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart, ++ zds->inBuff, neededInSize); ++ if (ZSTD_isError(decodedSize)) ++ return decodedSize; ++ zds->inPos = 0; /* input is consumed */ ++ if (!decodedSize && !isSkipFrame) { ++ zds->stage = zdss_read; ++ break; ++ } /* this was just a header */ ++ zds->outEnd = zds->outStart + decodedSize; ++ zds->stage = zdss_flush; ++ /* pass-through */ ++ } ++ } ++ /* fallthrough */ ++ ++ case zdss_flush: { ++ size_t const toFlushSize = zds->outEnd - zds->outStart; ++ size_t const flushedSize = ZSTD_limitCopy(op, oend - op, zds->outBuff + zds->outStart, toFlushSize); ++ op += flushedSize; ++ zds->outStart += flushedSize; ++ if (flushedSize == toFlushSize) { /* flush completed */ ++ zds->stage = zdss_read; ++ if (zds->outStart + zds->blockSize > zds->outBuffSize) ++ zds->outStart = zds->outEnd = 0; ++ break; ++ } ++ /* cannot complete flush */ ++ someMoreWork = 0; ++ break; ++ } ++ default: ++ return ERROR(GENERIC); /* impossible */ ++ } ++ } ++ ++ /* result */ ++ input->pos += (size_t)(ip - istart); ++ output->pos += (size_t)(op - ostart); ++ { ++ size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds->dctx); ++ if (!nextSrcSizeHint) { /* frame fully decoded */ ++ if (zds->outEnd == zds->outStart) { /* output fully flushed */ ++ if (zds->hostageByte) { ++ if (input->pos >= input->size) { ++ zds->stage = zdss_read; ++ return 1; ++ } /* can't release hostage (not present) */ ++ input->pos++; /* release hostage */ ++ } ++ return 0; ++ } ++ if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */ ++ input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */ ++ zds->hostageByte = 1; ++ } ++ return 1; ++ } ++ nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds->dctx) == ZSTDnit_block); /* preload header of next block */ ++ if (zds->inPos > nextSrcSizeHint) ++ return ERROR(GENERIC); /* should never happen */ ++ nextSrcSizeHint -= zds->inPos; /* already loaded*/ ++ return nextSrcSizeHint; ++ } ++} +diff --git a/xen/common/zstd/entropy_common.c b/xen/common/zstd/entropy_common.c +new file mode 100644 +index 0000000000..bcdb57982b +--- /dev/null ++++ b/xen/common/zstd/entropy_common.c +@@ -0,0 +1,243 @@ ++/* ++ * Common functions of New Generation Entropy library ++ * Copyright (C) 2016, Yann Collet. ++ * ++ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions are ++ * met: ++ * ++ * * Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * * Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following disclaimer ++ * in the documentation and/or other materials provided with the ++ * distribution. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ * ++ * This program is free software; you can redistribute it and/or modify it under ++ * the terms of the GNU General Public License version 2 as published by the ++ * Free Software Foundation. This program is dual-licensed; you may select ++ * either version 2 of the GNU General Public License ("GPL") or BSD license ++ * ("BSD"). ++ * ++ * You can contact the author at : ++ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy ++ */ ++ ++/* ************************************* ++* Dependencies ++***************************************/ ++#include "error_private.h" /* ERR_*, ERROR */ ++#include "fse.h" ++#include "huf.h" ++#include "mem.h" ++ ++/*=== Version ===*/ ++unsigned INIT FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } ++ ++/*=== Error Management ===*/ ++unsigned INIT FSE_isError(size_t code) { return ERR_isError(code); } ++ ++unsigned INIT HUF_isError(size_t code) { return ERR_isError(code); } ++ ++/*-************************************************************** ++* FSE NCount encoding-decoding ++****************************************************************/ ++size_t INIT FSE_readNCount(short *normalizedCounter, unsigned *maxSVPtr, unsigned *tableLogPtr, const void *headerBuffer, size_t hbSize) ++{ ++ const BYTE *const istart = (const BYTE *)headerBuffer; ++ const BYTE *const iend = istart + hbSize; ++ const BYTE *ip = istart; ++ int nbBits; ++ int remaining; ++ int threshold; ++ U32 bitStream; ++ int bitCount; ++ unsigned charnum = 0; ++ int previous0 = 0; ++ ++ if (hbSize < 4) ++ return ERROR(srcSize_wrong); ++ bitStream = ZSTD_readLE32(ip); ++ nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ ++ if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) ++ return ERROR(tableLog_tooLarge); ++ bitStream >>= 4; ++ bitCount = 4; ++ *tableLogPtr = nbBits; ++ remaining = (1 << nbBits) + 1; ++ threshold = 1 << nbBits; ++ nbBits++; ++ ++ while ((remaining > 1) & (charnum <= *maxSVPtr)) { ++ if (previous0) { ++ unsigned n0 = charnum; ++ while ((bitStream & 0xFFFF) == 0xFFFF) { ++ n0 += 24; ++ if (ip < iend - 5) { ++ ip += 2; ++ bitStream = ZSTD_readLE32(ip) >> bitCount; ++ } else { ++ bitStream >>= 16; ++ bitCount += 16; ++ } ++ } ++ while ((bitStream & 3) == 3) { ++ n0 += 3; ++ bitStream >>= 2; ++ bitCount += 2; ++ } ++ n0 += bitStream & 3; ++ bitCount += 2; ++ if (n0 > *maxSVPtr) ++ return ERROR(maxSymbolValue_tooSmall); ++ while (charnum < n0) ++ normalizedCounter[charnum++] = 0; ++ if ((ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4)) { ++ ip += bitCount >> 3; ++ bitCount &= 7; ++ bitStream = ZSTD_readLE32(ip) >> bitCount; ++ } else { ++ bitStream >>= 2; ++ } ++ } ++ { ++ int const max = (2 * threshold - 1) - remaining; ++ int count; ++ ++ if ((bitStream & (threshold - 1)) < (U32)max) { ++ count = bitStream & (threshold - 1); ++ bitCount += nbBits - 1; ++ } else { ++ count = bitStream & (2 * threshold - 1); ++ if (count >= threshold) ++ count -= max; ++ bitCount += nbBits; ++ } ++ ++ count--; /* extra accuracy */ ++ remaining -= count < 0 ? -count : count; /* -1 means +1 */ ++ normalizedCounter[charnum++] = (short)count; ++ previous0 = !count; ++ while (remaining < threshold) { ++ nbBits--; ++ threshold >>= 1; ++ } ++ ++ if ((ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4)) { ++ ip += bitCount >> 3; ++ bitCount &= 7; ++ } else { ++ bitCount -= (int)(8 * (iend - 4 - ip)); ++ ip = iend - 4; ++ } ++ bitStream = ZSTD_readLE32(ip) >> (bitCount & 31); ++ } ++ } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ ++ if (remaining != 1) ++ return ERROR(corruption_detected); ++ if (bitCount > 32) ++ return ERROR(corruption_detected); ++ *maxSVPtr = charnum - 1; ++ ++ ip += (bitCount + 7) >> 3; ++ return ip - istart; ++} ++ ++/*! HUF_readStats() : ++ Read compact Huffman tree, saved by HUF_writeCTable(). ++ `huffWeight` is destination buffer. ++ `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. ++ @return : size read from `src` , or an error Code . ++ Note : Needed by HUF_readCTable() and HUF_readDTableX?() . ++*/ ++size_t INIT HUF_readStats_wksp(BYTE *huffWeight, size_t hwSize, U32 *rankStats, U32 *nbSymbolsPtr, U32 *tableLogPtr, const void *src, size_t srcSize, void *workspace, size_t workspaceSize) ++{ ++ U32 weightTotal; ++ const BYTE *ip = (const BYTE *)src; ++ size_t iSize; ++ size_t oSize; ++ ++ if (!srcSize) ++ return ERROR(srcSize_wrong); ++ iSize = ip[0]; ++ /* memset(huffWeight, 0, hwSize); */ /* is not necessary, even though some analyzer complain ... */ ++ ++ if (iSize >= 128) { /* special header */ ++ oSize = iSize - 127; ++ iSize = ((oSize + 1) / 2); ++ if (iSize + 1 > srcSize) ++ return ERROR(srcSize_wrong); ++ if (oSize >= hwSize) ++ return ERROR(corruption_detected); ++ ip += 1; ++ { ++ U32 n; ++ for (n = 0; n < oSize; n += 2) { ++ huffWeight[n] = ip[n / 2] >> 4; ++ huffWeight[n + 1] = ip[n / 2] & 15; ++ } ++ } ++ } else { /* header compressed with FSE (normal case) */ ++ if (iSize + 1 > srcSize) ++ return ERROR(srcSize_wrong); ++ oSize = FSE_decompress_wksp(huffWeight, hwSize - 1, ip + 1, iSize, 6, workspace, workspaceSize); /* max (hwSize-1) values decoded, as last one is implied */ ++ if (FSE_isError(oSize)) ++ return oSize; ++ } ++ ++ /* collect weight stats */ ++ memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); ++ weightTotal = 0; ++ { ++ U32 n; ++ for (n = 0; n < oSize; n++) { ++ if (huffWeight[n] >= HUF_TABLELOG_MAX) ++ return ERROR(corruption_detected); ++ rankStats[huffWeight[n]]++; ++ weightTotal += (1 << huffWeight[n]) >> 1; ++ } ++ } ++ if (weightTotal == 0) ++ return ERROR(corruption_detected); ++ ++ /* get last non-null symbol weight (implied, total must be 2^n) */ ++ { ++ U32 const tableLog = BIT_highbit32(weightTotal) + 1; ++ if (tableLog > HUF_TABLELOG_MAX) ++ return ERROR(corruption_detected); ++ *tableLogPtr = tableLog; ++ /* determine last weight */ ++ { ++ U32 const total = 1 << tableLog; ++ U32 const rest = total - weightTotal; ++ U32 const verif = 1 << BIT_highbit32(rest); ++ U32 const lastWeight = BIT_highbit32(rest) + 1; ++ if (verif != rest) ++ return ERROR(corruption_detected); /* last value must be a clean power of 2 */ ++ huffWeight[oSize] = (BYTE)lastWeight; ++ rankStats[lastWeight]++; ++ } ++ } ++ ++ /* check tree construction validity */ ++ if ((rankStats[1] < 2) || (rankStats[1] & 1)) ++ return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ ++ ++ /* results */ ++ *nbSymbolsPtr = (U32)(oSize + 1); ++ return iSize + 1; ++} +diff --git a/xen/common/zstd/error_private.h b/xen/common/zstd/error_private.h +new file mode 100644 +index 0000000000..d07bf3cb9b +--- /dev/null ++++ b/xen/common/zstd/error_private.h +@@ -0,0 +1,110 @@ ++/** ++ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. ++ * All rights reserved. ++ * ++ * This source code is licensed under the BSD-style license found in the ++ * LICENSE file in the root directory of https://github.com/facebook/zstd. ++ * An additional grant of patent rights can be found in the PATENTS file in the ++ * same directory. ++ * ++ * This program is free software; you can redistribute it and/or modify it under ++ * the terms of the GNU General Public License version 2 as published by the ++ * Free Software Foundation. This program is dual-licensed; you may select ++ * either version 2 of the GNU General Public License ("GPL") or BSD license ++ * ("BSD"). ++ */ ++ ++/* Note : this module is expected to remain private, do not expose it */ ++ ++#ifndef ERROR_H_MODULE ++#define ERROR_H_MODULE ++ ++/* **************************************** ++* Dependencies ++******************************************/ ++#include /* size_t */ ++ ++/** ++ * enum ZSTD_ErrorCode - zstd error codes ++ * ++ * Functions that return size_t can be checked for errors using ZSTD_isError() ++ * and the ZSTD_ErrorCode can be extracted using ZSTD_getErrorCode(). ++ */ ++typedef enum { ++ ZSTD_error_no_error, ++ ZSTD_error_GENERIC, ++ ZSTD_error_prefix_unknown, ++ ZSTD_error_version_unsupported, ++ ZSTD_error_parameter_unknown, ++ ZSTD_error_frameParameter_unsupported, ++ ZSTD_error_frameParameter_unsupportedBy32bits, ++ ZSTD_error_frameParameter_windowTooLarge, ++ ZSTD_error_compressionParameter_unsupported, ++ ZSTD_error_init_missing, ++ ZSTD_error_memory_allocation, ++ ZSTD_error_stage_wrong, ++ ZSTD_error_dstSize_tooSmall, ++ ZSTD_error_srcSize_wrong, ++ ZSTD_error_corruption_detected, ++ ZSTD_error_checksum_wrong, ++ ZSTD_error_tableLog_tooLarge, ++ ZSTD_error_maxSymbolValue_tooLarge, ++ ZSTD_error_maxSymbolValue_tooSmall, ++ ZSTD_error_dictionary_corrupted, ++ ZSTD_error_dictionary_wrong, ++ ZSTD_error_dictionaryCreation_failed, ++ ZSTD_error_maxCode ++} ZSTD_ErrorCode; ++ ++/* **************************************** ++* Compiler-specific ++******************************************/ ++#define ERR_STATIC static __attribute__((unused)) ++ ++/*-**************************************** ++* Customization (error_public.h) ++******************************************/ ++typedef ZSTD_ErrorCode ERR_enum; ++#define PREFIX(name) ZSTD_error_##name ++ ++/*-**************************************** ++* Error codes handling ++******************************************/ ++#define ERROR(name) ((size_t)-PREFIX(name)) ++ ++ERR_STATIC unsigned INIT ERR_isError(size_t code) { return (code > ERROR(maxCode)); } ++ ++ERR_STATIC ERR_enum INIT ERR_getErrorCode(size_t code) ++{ ++ if (!ERR_isError(code)) ++ return (ERR_enum)0; ++ return (ERR_enum)(0 - code); ++} ++ ++/** ++ * ZSTD_isError() - tells if a size_t function result is an error code ++ * @code: The function result to check for error. ++ * ++ * Return: Non-zero iff the code is an error. ++ */ ++static __attribute__((unused)) unsigned int INIT ZSTD_isError(size_t code) ++{ ++ return code > (size_t)-ZSTD_error_maxCode; ++} ++ ++/** ++ * ZSTD_getErrorCode() - translates an error function result to a ZSTD_ErrorCode ++ * @functionResult: The result of a function for which ZSTD_isError() is true. ++ * ++ * Return: The ZSTD_ErrorCode corresponding to the functionResult or 0 ++ * if the functionResult isn't an error. ++ */ ++static __attribute__((unused)) ZSTD_ErrorCode INIT ZSTD_getErrorCode( ++ size_t functionResult) ++{ ++ if (!ZSTD_isError(functionResult)) ++ return (ZSTD_ErrorCode)0; ++ return (ZSTD_ErrorCode)(0 - functionResult); ++} ++ ++#endif /* ERROR_H_MODULE */ +diff --git a/xen/common/zstd/fse.h b/xen/common/zstd/fse.h +new file mode 100644 +index 0000000000..b86717c34d +--- /dev/null ++++ b/xen/common/zstd/fse.h +@@ -0,0 +1,575 @@ ++/* ++ * FSE : Finite State Entropy codec ++ * Public Prototypes declaration ++ * Copyright (C) 2013-2016, Yann Collet. ++ * ++ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions are ++ * met: ++ * ++ * * Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * * Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following disclaimer ++ * in the documentation and/or other materials provided with the ++ * distribution. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ * ++ * This program is free software; you can redistribute it and/or modify it under ++ * the terms of the GNU General Public License version 2 as published by the ++ * Free Software Foundation. This program is dual-licensed; you may select ++ * either version 2 of the GNU General Public License ("GPL") or BSD license ++ * ("BSD"). ++ * ++ * You can contact the author at : ++ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy ++ */ ++#ifndef FSE_H ++#define FSE_H ++ ++/*-***************************************** ++* Dependencies ++******************************************/ ++#include /* size_t, ptrdiff_t */ ++ ++/*-***************************************** ++* FSE_PUBLIC_API : control library symbols visibility ++******************************************/ ++#define FSE_PUBLIC_API ++ ++/*------ Version ------*/ ++#define FSE_VERSION_MAJOR 0 ++#define FSE_VERSION_MINOR 9 ++#define FSE_VERSION_RELEASE 0 ++ ++#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE ++#define FSE_QUOTE(str) #str ++#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str) ++#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION) ++ ++#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR * 100 * 100 + FSE_VERSION_MINOR * 100 + FSE_VERSION_RELEASE) ++FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ ++ ++/*-***************************************** ++* Tool functions ++******************************************/ ++FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */ ++ ++/* Error Management */ ++FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ ++ ++/*-***************************************** ++* FSE detailed API ++******************************************/ ++/*! ++FSE_compress() does the following: ++1. count symbol occurrence from source[] into table count[] ++2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) ++3. save normalized counters to memory buffer using writeNCount() ++4. build encoding table 'CTable' from normalized counters ++5. encode the data stream using encoding table 'CTable' ++ ++FSE_decompress() does the following: ++1. read normalized counters with readNCount() ++2. build decoding table 'DTable' from normalized counters ++3. decode the data stream using decoding table 'DTable' ++ ++The following API allows targeting specific sub-functions for advanced tasks. ++For example, it's possible to compress several blocks using the same 'CTable', ++or to save and provide normalized distribution using external method. ++*/ ++ ++/* *** COMPRESSION *** */ ++/*! FSE_optimalTableLog(): ++ dynamically downsize 'tableLog' when conditions are met. ++ It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. ++ @return : recommended tableLog (necessarily <= 'maxTableLog') */ ++FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); ++ ++/*! FSE_normalizeCount(): ++ normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) ++ 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). ++ @return : tableLog, ++ or an errorCode, which can be tested using FSE_isError() */ ++FSE_PUBLIC_API size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t srcSize, unsigned maxSymbolValue); ++ ++/*! FSE_NCountWriteBound(): ++ Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. ++ Typically useful for allocation purpose. */ ++FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); ++ ++/*! FSE_writeNCount(): ++ Compactly save 'normalizedCounter' into 'buffer'. ++ @return : size of the compressed table, ++ or an errorCode, which can be tested using FSE_isError(). */ ++FSE_PUBLIC_API size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); ++ ++/*! Constructor and Destructor of FSE_CTable. ++ Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ ++typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ ++ ++/*! FSE_compress_usingCTable(): ++ Compress `src` using `ct` into `dst` which must be already allocated. ++ @return : size of compressed data (<= `dstCapacity`), ++ or 0 if compressed data could not fit into `dst`, ++ or an errorCode, which can be tested using FSE_isError() */ ++FSE_PUBLIC_API size_t FSE_compress_usingCTable(void *dst, size_t dstCapacity, const void *src, size_t srcSize, const FSE_CTable *ct); ++ ++/*! ++Tutorial : ++---------- ++The first step is to count all symbols. FSE_count() does this job very fast. ++Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. ++'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] ++maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) ++FSE_count() will return the number of occurrence of the most frequent symbol. ++This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. ++If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). ++ ++The next step is to normalize the frequencies. ++FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. ++It also guarantees a minimum of 1 to any Symbol with frequency >= 1. ++You can use 'tableLog'==0 to mean "use default tableLog value". ++If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), ++which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). ++ ++The result of FSE_normalizeCount() will be saved into a table, ++called 'normalizedCounter', which is a table of signed short. ++'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. ++The return value is tableLog if everything proceeded as expected. ++It is 0 if there is a single symbol within distribution. ++If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). ++ ++'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). ++'buffer' must be already allocated. ++For guaranteed success, buffer size must be at least FSE_headerBound(). ++The result of the function is the number of bytes written into 'buffer'. ++If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). ++ ++'normalizedCounter' can then be used to create the compression table 'CTable'. ++The space required by 'CTable' must be already allocated, using FSE_createCTable(). ++You can then use FSE_buildCTable() to fill 'CTable'. ++If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). ++ ++'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). ++Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' ++The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. ++If it returns '0', compressed data could not fit into 'dst'. ++If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). ++*/ ++ ++/* *** DECOMPRESSION *** */ ++ ++/*! FSE_readNCount(): ++ Read compactly saved 'normalizedCounter' from 'rBuffer'. ++ @return : size read from 'rBuffer', ++ or an errorCode, which can be tested using FSE_isError(). ++ maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ ++FSE_PUBLIC_API size_t FSE_readNCount(short *normalizedCounter, unsigned *maxSymbolValuePtr, unsigned *tableLogPtr, const void *rBuffer, size_t rBuffSize); ++ ++/*! Constructor and Destructor of FSE_DTable. ++ Note that its size depends on 'tableLog' */ ++typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ ++ ++/*! FSE_buildDTable(): ++ Builds 'dt', which must be already allocated, using FSE_createDTable(). ++ return : 0, or an errorCode, which can be tested using FSE_isError() */ ++FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable *dt, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize); ++ ++/*! FSE_decompress_usingDTable(): ++ Decompress compressed source `cSrc` of size `cSrcSize` using `dt` ++ into `dst` which must be already allocated. ++ @return : size of regenerated data (necessarily <= `dstCapacity`), ++ or an errorCode, which can be tested using FSE_isError() */ ++FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void *dst, size_t dstCapacity, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt); ++ ++/*! ++Tutorial : ++---------- ++(Note : these functions only decompress FSE-compressed blocks. ++ If block is uncompressed, use memcpy() instead ++ If block is a single repeated byte, use memset() instead ) ++ ++The first step is to obtain the normalized frequencies of symbols. ++This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). ++'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. ++In practice, that means it's necessary to know 'maxSymbolValue' beforehand, ++or size the table to handle worst case situations (typically 256). ++FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. ++The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. ++Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. ++If there is an error, the function will return an error code, which can be tested using FSE_isError(). ++ ++The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. ++This is performed by the function FSE_buildDTable(). ++The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). ++If there is an error, the function will return an error code, which can be tested using FSE_isError(). ++ ++`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). ++`cSrcSize` must be strictly correct, otherwise decompression will fail. ++FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). ++If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) ++*/ ++ ++/* *** Dependency *** */ ++#include "bitstream.h" ++ ++/* ***************************************** ++* Static allocation ++*******************************************/ ++/* FSE buffer bounds */ ++#define FSE_NCOUNTBOUND 512 ++#define FSE_BLOCKBOUND(size) (size + (size >> 7)) ++#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ ++ ++/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ ++#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1 << (maxTableLog - 1)) + ((maxSymbolValue + 1) * 2)) ++#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1 << maxTableLog)) ++ ++/* ***************************************** ++* FSE advanced API ++*******************************************/ ++/* FSE_count_wksp() : ++ * Same as FSE_count(), but using an externally provided scratch buffer. ++ * `workSpace` size must be table of >= `1024` unsigned ++ */ ++size_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace); ++ ++/* FSE_countFast_wksp() : ++ * Same as FSE_countFast(), but using an externally provided scratch buffer. ++ * `workSpace` must be a table of minimum `1024` unsigned ++ */ ++size_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize, unsigned *workSpace); ++ ++/*! FSE_count_simple ++ * Same as FSE_countFast(), but does not use any additional memory (not even on stack). ++ * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`). ++*/ ++size_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize); ++ ++unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); ++/**< same as FSE_optimalTableLog(), which used `minus==2` */ ++ ++size_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits); ++/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ ++ ++size_t FSE_buildCTable_rle(FSE_CTable *ct, unsigned char symbolValue); ++/**< build a fake FSE_CTable, designed to compress always the same symbolValue */ ++ ++/* FSE_buildCTable_wksp() : ++ * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). ++ * `wkspSize` must be >= `(1<= BIT_DStream_completed ++ ++When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. ++Checking if DStream has reached its end is performed by : ++ BIT_endOfDStream(&DStream); ++Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. ++ FSE_endOfDState(&DState); ++*/ ++ ++/* ***************************************** ++* FSE unsafe API ++*******************************************/ ++static unsigned char FSE_decodeSymbolFast(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD); ++/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ ++ ++/* ***************************************** ++* Implementation of inlined functions ++*******************************************/ ++typedef struct { ++ int deltaFindState; ++ U32 deltaNbBits; ++} FSE_symbolCompressionTransform; /* total 8 bytes */ ++ ++ZSTD_STATIC void FSE_initCState(FSE_CState_t *statePtr, const FSE_CTable *ct) ++{ ++ const void *ptr = ct; ++ const U16 *u16ptr = (const U16 *)ptr; ++ const U32 tableLog = ZSTD_read16(ptr); ++ statePtr->value = (ptrdiff_t)1 << tableLog; ++ statePtr->stateTable = u16ptr + 2; ++ statePtr->symbolTT = ((const U32 *)ct + 1 + (tableLog ? (1 << (tableLog - 1)) : 1)); ++ statePtr->stateLog = tableLog; ++} ++ ++/*! FSE_initCState2() : ++* Same as FSE_initCState(), but the first symbol to include (which will be the last to be read) ++* uses the smallest state value possible, saving the cost of this symbol */ ++ZSTD_STATIC void FSE_initCState2(FSE_CState_t *statePtr, const FSE_CTable *ct, U32 symbol) ++{ ++ FSE_initCState(statePtr, ct); ++ { ++ const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform *)(statePtr->symbolTT))[symbol]; ++ const U16 *stateTable = (const U16 *)(statePtr->stateTable); ++ U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1 << 15)) >> 16); ++ statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; ++ statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; ++ } ++} ++ ++ZSTD_STATIC void FSE_encodeSymbol(BIT_CStream_t *bitC, FSE_CState_t *statePtr, U32 symbol) ++{ ++ const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform *)(statePtr->symbolTT))[symbol]; ++ const U16 *const stateTable = (const U16 *)(statePtr->stateTable); ++ U32 nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); ++ BIT_addBits(bitC, statePtr->value, nbBitsOut); ++ statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; ++} ++ ++ZSTD_STATIC void FSE_flushCState(BIT_CStream_t *bitC, const FSE_CState_t *statePtr) ++{ ++ BIT_addBits(bitC, statePtr->value, statePtr->stateLog); ++ BIT_flushBits(bitC); ++} ++ ++/* ====== Decompression ====== */ ++ ++typedef struct { ++ U16 tableLog; ++ U16 fastMode; ++} FSE_DTableHeader; /* sizeof U32 */ ++ ++typedef struct { ++ unsigned short newState; ++ unsigned char symbol; ++ unsigned char nbBits; ++} FSE_decode_t; /* size == U32 */ ++ ++ZSTD_STATIC void FSE_initDState(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD, const FSE_DTable *dt) ++{ ++ const void *ptr = dt; ++ const FSE_DTableHeader *const DTableH = (const FSE_DTableHeader *)ptr; ++ DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); ++ BIT_reloadDStream(bitD); ++ DStatePtr->table = dt + 1; ++} ++ ++ZSTD_STATIC BYTE FSE_peekSymbol(const FSE_DState_t *DStatePtr) ++{ ++ FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state]; ++ return DInfo.symbol; ++} ++ ++ZSTD_STATIC void FSE_updateState(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD) ++{ ++ FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state]; ++ U32 const nbBits = DInfo.nbBits; ++ size_t const lowBits = BIT_readBits(bitD, nbBits); ++ DStatePtr->state = DInfo.newState + lowBits; ++} ++ ++ZSTD_STATIC BYTE FSE_decodeSymbol(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD) ++{ ++ FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state]; ++ U32 const nbBits = DInfo.nbBits; ++ BYTE const symbol = DInfo.symbol; ++ size_t const lowBits = BIT_readBits(bitD, nbBits); ++ ++ DStatePtr->state = DInfo.newState + lowBits; ++ return symbol; ++} ++ ++/*! FSE_decodeSymbolFast() : ++ unsafe, only works if no symbol has a probability > 50% */ ++ZSTD_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD) ++{ ++ FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state]; ++ U32 const nbBits = DInfo.nbBits; ++ BYTE const symbol = DInfo.symbol; ++ size_t const lowBits = BIT_readBitsFast(bitD, nbBits); ++ ++ DStatePtr->state = DInfo.newState + lowBits; ++ return symbol; ++} ++ ++ZSTD_STATIC unsigned FSE_endOfDState(const FSE_DState_t *DStatePtr) { return DStatePtr->state == 0; } ++ ++/* ************************************************************** ++* Tuning parameters ++****************************************************************/ ++/*!MEMORY_USAGE : ++* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) ++* Increasing memory usage improves compression ratio ++* Reduced memory usage can improve speed, due to cache effect ++* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ ++#ifndef FSE_MAX_MEMORY_USAGE ++#define FSE_MAX_MEMORY_USAGE 14 ++#endif ++#ifndef FSE_DEFAULT_MEMORY_USAGE ++#define FSE_DEFAULT_MEMORY_USAGE 13 ++#endif ++ ++/*!FSE_MAX_SYMBOL_VALUE : ++* Maximum symbol value authorized. ++* Required for proper stack allocation */ ++#ifndef FSE_MAX_SYMBOL_VALUE ++#define FSE_MAX_SYMBOL_VALUE 255 ++#endif ++ ++/* ************************************************************** ++* template functions type & suffix ++****************************************************************/ ++#define FSE_FUNCTION_TYPE BYTE ++#define FSE_FUNCTION_EXTENSION ++#define FSE_DECODE_TYPE FSE_decode_t ++ ++/* *************************************************************** ++* Constants ++*****************************************************************/ ++#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE - 2) ++#define FSE_MAX_TABLESIZE (1U << FSE_MAX_TABLELOG) ++#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE - 1) ++#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE - 2) ++#define FSE_MIN_TABLELOG 5 ++ ++#define FSE_TABLELOG_ABSOLUTE_MAX 15 ++#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX ++#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" ++#endif ++ ++#define FSE_TABLESTEP(tableSize) ((tableSize >> 1) + (tableSize >> 3) + 3) ++ ++#endif /* FSE_H */ +diff --git a/xen/common/zstd/fse_decompress.c b/xen/common/zstd/fse_decompress.c +new file mode 100644 +index 0000000000..cc51206df6 +--- /dev/null ++++ b/xen/common/zstd/fse_decompress.c +@@ -0,0 +1,324 @@ ++/* ++ * FSE : Finite State Entropy decoder ++ * Copyright (C) 2013-2015, Yann Collet. ++ * ++ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions are ++ * met: ++ * ++ * * Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * * Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following disclaimer ++ * in the documentation and/or other materials provided with the ++ * distribution. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ * ++ * This program is free software; you can redistribute it and/or modify it under ++ * the terms of the GNU General Public License version 2 as published by the ++ * Free Software Foundation. This program is dual-licensed; you may select ++ * either version 2 of the GNU General Public License ("GPL") or BSD license ++ * ("BSD"). ++ * ++ * You can contact the author at : ++ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy ++ */ ++ ++/* ************************************************************** ++* Compiler specifics ++****************************************************************/ ++#define FORCE_INLINE static always_inline ++ ++/* ************************************************************** ++* Includes ++****************************************************************/ ++#include "bitstream.h" ++#include "fse.h" ++#include "zstd_internal.h" ++#include ++#include /* memcpy, memset */ ++ ++/* ************************************************************** ++* Error Management ++****************************************************************/ ++#define FSE_isError ERR_isError ++#define FSE_STATIC_ASSERT(c) \ ++ { \ ++ enum { FSE_static_assert = 1 / (int)(!!(c)) }; \ ++ } /* use only *after* variable declarations */ ++ ++/* ************************************************************** ++* Templates ++****************************************************************/ ++/* ++ designed to be included ++ for type-specific functions (template emulation in C) ++ Objective is to write these functions only once, for improved maintenance ++*/ ++ ++/* safety checks */ ++#ifndef FSE_FUNCTION_EXTENSION ++#error "FSE_FUNCTION_EXTENSION must be defined" ++#endif ++#ifndef FSE_FUNCTION_TYPE ++#error "FSE_FUNCTION_TYPE must be defined" ++#endif ++ ++/* Function names */ ++#define FSE_CAT(X, Y) X##Y ++#define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y) ++#define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y) ++ ++/* Function templates */ ++ ++size_t INIT FSE_buildDTable_wksp(FSE_DTable *dt, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize) ++{ ++ void *const tdPtr = dt + 1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ ++ FSE_DECODE_TYPE *const tableDecode = (FSE_DECODE_TYPE *)(tdPtr); ++ U16 *symbolNext = (U16 *)workspace; ++ ++ U32 const maxSV1 = maxSymbolValue + 1; ++ U32 const tableSize = 1 << tableLog; ++ U32 highThreshold = tableSize - 1; ++ ++ /* Sanity Checks */ ++ if (workspaceSize < sizeof(U16) * (FSE_MAX_SYMBOL_VALUE + 1)) ++ return ERROR(tableLog_tooLarge); ++ if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) ++ return ERROR(maxSymbolValue_tooLarge); ++ if (tableLog > FSE_MAX_TABLELOG) ++ return ERROR(tableLog_tooLarge); ++ ++ /* Init, lay down lowprob symbols */ ++ { ++ FSE_DTableHeader DTableH; ++ DTableH.tableLog = (U16)tableLog; ++ DTableH.fastMode = 1; ++ { ++ S16 const largeLimit = (S16)(1 << (tableLog - 1)); ++ U32 s; ++ for (s = 0; s < maxSV1; s++) { ++ if (normalizedCounter[s] == -1) { ++ tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; ++ symbolNext[s] = 1; ++ } else { ++ if (normalizedCounter[s] >= largeLimit) ++ DTableH.fastMode = 0; ++ symbolNext[s] = normalizedCounter[s]; ++ } ++ } ++ } ++ memcpy(dt, &DTableH, sizeof(DTableH)); ++ } ++ ++ /* Spread symbols */ ++ { ++ U32 const tableMask = tableSize - 1; ++ U32 const step = FSE_TABLESTEP(tableSize); ++ U32 s, position = 0; ++ for (s = 0; s < maxSV1; s++) { ++ int i; ++ for (i = 0; i < normalizedCounter[s]; i++) { ++ tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s; ++ position = (position + step) & tableMask; ++ while (position > highThreshold) ++ position = (position + step) & tableMask; /* lowprob area */ ++ } ++ } ++ if (position != 0) ++ return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ ++ } ++ ++ /* Build Decoding table */ ++ { ++ U32 u; ++ for (u = 0; u < tableSize; u++) { ++ FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol); ++ U16 nextState = symbolNext[symbol]++; ++ tableDecode[u].nbBits = (BYTE)(tableLog - BIT_highbit32((U32)nextState)); ++ tableDecode[u].newState = (U16)((nextState << tableDecode[u].nbBits) - tableSize); ++ } ++ } ++ ++ return 0; ++} ++ ++/*-******************************************************* ++* Decompression (Byte symbols) ++*********************************************************/ ++size_t INIT FSE_buildDTable_rle(FSE_DTable *dt, BYTE symbolValue) ++{ ++ void *ptr = dt; ++ FSE_DTableHeader *const DTableH = (FSE_DTableHeader *)ptr; ++ void *dPtr = dt + 1; ++ FSE_decode_t *const cell = (FSE_decode_t *)dPtr; ++ ++ DTableH->tableLog = 0; ++ DTableH->fastMode = 0; ++ ++ cell->newState = 0; ++ cell->symbol = symbolValue; ++ cell->nbBits = 0; ++ ++ return 0; ++} ++ ++size_t INIT FSE_buildDTable_raw(FSE_DTable *dt, unsigned nbBits) ++{ ++ void *ptr = dt; ++ FSE_DTableHeader *const DTableH = (FSE_DTableHeader *)ptr; ++ void *dPtr = dt + 1; ++ FSE_decode_t *const dinfo = (FSE_decode_t *)dPtr; ++ const unsigned tableSize = 1 << nbBits; ++ const unsigned tableMask = tableSize - 1; ++ const unsigned maxSV1 = tableMask + 1; ++ unsigned s; ++ ++ /* Sanity checks */ ++ if (nbBits < 1) ++ return ERROR(GENERIC); /* min size */ ++ ++ /* Build Decoding Table */ ++ DTableH->tableLog = (U16)nbBits; ++ DTableH->fastMode = 1; ++ for (s = 0; s < maxSV1; s++) { ++ dinfo[s].newState = 0; ++ dinfo[s].symbol = (BYTE)s; ++ dinfo[s].nbBits = (BYTE)nbBits; ++ } ++ ++ return 0; ++} ++ ++FORCE_INLINE size_t FSE_decompress_usingDTable_generic(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt, ++ const unsigned fast) ++{ ++ BYTE *const ostart = (BYTE *)dst; ++ BYTE *op = ostart; ++ BYTE *const omax = op + maxDstSize; ++ BYTE *const olimit = omax - 3; ++ ++ BIT_DStream_t bitD; ++ FSE_DState_t state1; ++ FSE_DState_t state2; ++ ++ /* Init */ ++ CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize)); ++ ++ FSE_initDState(&state1, &bitD, dt); ++ FSE_initDState(&state2, &bitD, dt); ++ ++#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) ++ ++ /* 4 symbols per loop */ ++ for (; (BIT_reloadDStream(&bitD) == BIT_DStream_unfinished) & (op < olimit); op += 4) { ++ op[0] = FSE_GETSYMBOL(&state1); ++ ++ if (FSE_MAX_TABLELOG * 2 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */ ++ BIT_reloadDStream(&bitD); ++ ++ op[1] = FSE_GETSYMBOL(&state2); ++ ++ if (FSE_MAX_TABLELOG * 4 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */ ++ { ++ if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { ++ op += 2; ++ break; ++ } ++ } ++ ++ op[2] = FSE_GETSYMBOL(&state1); ++ ++ if (FSE_MAX_TABLELOG * 2 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */ ++ BIT_reloadDStream(&bitD); ++ ++ op[3] = FSE_GETSYMBOL(&state2); ++ } ++ ++ /* tail */ ++ /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ ++ while (1) { ++ if (op > (omax - 2)) ++ return ERROR(dstSize_tooSmall); ++ *op++ = FSE_GETSYMBOL(&state1); ++ if (BIT_reloadDStream(&bitD) == BIT_DStream_overflow) { ++ *op++ = FSE_GETSYMBOL(&state2); ++ break; ++ } ++ ++ if (op > (omax - 2)) ++ return ERROR(dstSize_tooSmall); ++ *op++ = FSE_GETSYMBOL(&state2); ++ if (BIT_reloadDStream(&bitD) == BIT_DStream_overflow) { ++ *op++ = FSE_GETSYMBOL(&state1); ++ break; ++ } ++ } ++ ++ return op - ostart; ++} ++ ++size_t INIT FSE_decompress_usingDTable(void *dst, size_t originalSize, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt) ++{ ++ const void *ptr = dt; ++ const FSE_DTableHeader *DTableH = (const FSE_DTableHeader *)ptr; ++ const U32 fastMode = DTableH->fastMode; ++ ++ /* select fast mode (static) */ ++ if (fastMode) ++ return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); ++ return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); ++} ++ ++size_t INIT FSE_decompress_wksp(void *dst, size_t dstCapacity, const void *cSrc, size_t cSrcSize, unsigned maxLog, void *workspace, size_t workspaceSize) ++{ ++ const BYTE *const istart = (const BYTE *)cSrc; ++ const BYTE *ip = istart; ++ unsigned tableLog; ++ unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; ++ size_t NCountLength; ++ ++ FSE_DTable *dt; ++ short *counting; ++ size_t spaceUsed32 = 0; ++ ++ FSE_STATIC_ASSERT(sizeof(FSE_DTable) == sizeof(U32)); ++ ++ dt = (FSE_DTable *)((U32 *)workspace + spaceUsed32); ++ spaceUsed32 += FSE_DTABLE_SIZE_U32(maxLog); ++ counting = (short *)((U32 *)workspace + spaceUsed32); ++ spaceUsed32 += ALIGN(sizeof(short) * (FSE_MAX_SYMBOL_VALUE + 1), sizeof(U32)) >> 2; ++ ++ if ((spaceUsed32 << 2) > workspaceSize) ++ return ERROR(tableLog_tooLarge); ++ workspace = (U32 *)workspace + spaceUsed32; ++ workspaceSize -= (spaceUsed32 << 2); ++ ++ /* normal FSE decoding mode */ ++ NCountLength = FSE_readNCount(counting, &maxSymbolValue, &tableLog, istart, cSrcSize); ++ if (FSE_isError(NCountLength)) ++ return NCountLength; ++ // if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size; supposed to be already checked in NCountLength, only remaining ++ // case : NCountLength==cSrcSize */ ++ if (tableLog > maxLog) ++ return ERROR(tableLog_tooLarge); ++ ip += NCountLength; ++ cSrcSize -= NCountLength; ++ ++ CHECK_F(FSE_buildDTable_wksp(dt, counting, maxSymbolValue, tableLog, workspace, workspaceSize)); ++ ++ return FSE_decompress_usingDTable(dst, dstCapacity, ip, cSrcSize, dt); /* always return, even if it is an error code */ ++} +diff --git a/xen/common/zstd/huf.h b/xen/common/zstd/huf.h +new file mode 100644 +index 0000000000..a9d522c7bb +--- /dev/null ++++ b/xen/common/zstd/huf.h +@@ -0,0 +1,212 @@ ++/* ++ * Huffman coder, part of New Generation Entropy library ++ * header file ++ * Copyright (C) 2013-2016, Yann Collet. ++ * ++ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions are ++ * met: ++ * ++ * * Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * * Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following disclaimer ++ * in the documentation and/or other materials provided with the ++ * distribution. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ * ++ * This program is free software; you can redistribute it and/or modify it under ++ * the terms of the GNU General Public License version 2 as published by the ++ * Free Software Foundation. This program is dual-licensed; you may select ++ * either version 2 of the GNU General Public License ("GPL") or BSD license ++ * ("BSD"). ++ * ++ * You can contact the author at : ++ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy ++ */ ++#ifndef HUF_H_298734234 ++#define HUF_H_298734234 ++ ++/* *** Dependencies *** */ ++#include /* size_t */ ++ ++/* *** Tool functions *** */ ++#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ ++size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ ++ ++/* Error Management */ ++unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ ++ ++/* *** Advanced function *** */ ++ ++/** HUF_compress4X_wksp() : ++* Same as HUF_compress2(), but uses externally allocated `workSpace`, which must be a table of >= 1024 unsigned */ ++size_t HUF_compress4X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, ++ size_t wkspSize); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */ ++ ++/* *** Dependencies *** */ ++#include "mem.h" /* U32 */ ++ ++/* *** Constants *** */ ++#define HUF_TABLELOG_MAX 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ ++#define HUF_TABLELOG_DEFAULT 11 /* tableLog by default, when not specified */ ++#define HUF_SYMBOLVALUE_MAX 255 ++ ++#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ ++#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) ++#error "HUF_TABLELOG_MAX is too large !" ++#endif ++ ++/* **************************************** ++* Static allocation ++******************************************/ ++/* HUF buffer bounds */ ++#define HUF_CTABLEBOUND 129 ++#define HUF_BLOCKBOUND(size) (size + (size >> 8) + 8) /* only true if incompressible pre-filtered with fast heuristic */ ++#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ ++ ++/* static allocation of HUF's Compression Table */ ++#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ ++ U32 name##hb[maxSymbolValue + 1]; \ ++ void *name##hv = &(name##hb); \ ++ HUF_CElt *name = (HUF_CElt *)(name##hv) /* no final ; */ ++ ++/* static allocation of HUF's DTable */ ++typedef U32 HUF_DTable; ++#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1 << (maxTableLog))) ++#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = {((U32)((maxTableLog)-1) * 0x01000001)} ++#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = {((U32)(maxTableLog)*0x01000001)} ++ ++/* The workspace must have alignment at least 4 and be at least this large */ ++#define HUF_COMPRESS_WORKSPACE_SIZE (6 << 10) ++#define HUF_COMPRESS_WORKSPACE_SIZE_U32 (HUF_COMPRESS_WORKSPACE_SIZE / sizeof(U32)) ++ ++/* The workspace must have alignment at least 4 and be at least this large */ ++#define HUF_DECOMPRESS_WORKSPACE_SIZE (3 << 10) ++#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) ++ ++/* **************************************** ++* Advanced decompression functions ++******************************************/ ++size_t HUF_decompress4X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize); /**< decodes RLE and uncompressed */ ++size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, ++ size_t workspaceSize); /**< considers RLE and uncompressed as errors */ ++size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, ++ size_t workspaceSize); /**< single-symbol decoder */ ++size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, ++ size_t workspaceSize); /**< double-symbols decoder */ ++ ++/* **************************************** ++* HUF detailed API ++******************************************/ ++/*! ++HUF_compress() does the following: ++1. count symbol occurrence from source[] into table count[] using FSE_count() ++2. (optional) refine tableLog using HUF_optimalTableLog() ++3. build Huffman table from count using HUF_buildCTable() ++4. save Huffman table to memory buffer using HUF_writeCTable_wksp() ++5. encode the data stream using HUF_compress4X_usingCTable() ++ ++The following API allows targeting specific sub-functions for advanced tasks. ++For example, it's possible to compress several blocks using the same 'CTable', ++or to save and regenerate 'CTable' using external methods. ++*/ ++/* FSE_count() : find it within "fse.h" */ ++unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); ++typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ ++size_t HUF_writeCTable_wksp(void *dst, size_t maxDstSize, const HUF_CElt *CTable, unsigned maxSymbolValue, unsigned huffLog, void *workspace, size_t workspaceSize); ++size_t HUF_compress4X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable); ++ ++typedef enum { ++ HUF_repeat_none, /**< Cannot use the previous table */ ++ HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, ++ 4}X_repeat */ ++ HUF_repeat_valid /**< Can use the previous table and it is asumed to be valid */ ++} HUF_repeat; ++/** HUF_compress4X_repeat() : ++* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. ++* If it uses hufTable it does not modify hufTable or repeat. ++* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. ++* If preferRepeat then the old table will always be used if valid. */ ++size_t HUF_compress4X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, ++ size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, ++ int preferRepeat); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */ ++ ++/** HUF_buildCTable_wksp() : ++ * Same as HUF_buildCTable(), but using externally allocated scratch buffer. ++ * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned. ++ */ ++size_t HUF_buildCTable_wksp(HUF_CElt *tree, const U32 *count, U32 maxSymbolValue, U32 maxNbBits, void *workSpace, size_t wkspSize); ++ ++/*! HUF_readStats() : ++ Read compact Huffman tree, saved by HUF_writeCTable(). ++ `huffWeight` is destination buffer. ++ @return : size read from `src` , or an error Code . ++ Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ ++size_t HUF_readStats_wksp(BYTE *huffWeight, size_t hwSize, U32 *rankStats, U32 *nbSymbolsPtr, U32 *tableLogPtr, const void *src, size_t srcSize, ++ void *workspace, size_t workspaceSize); ++ ++/** HUF_readCTable() : ++* Loading a CTable saved with HUF_writeCTable() */ ++size_t HUF_readCTable_wksp(HUF_CElt *CTable, unsigned maxSymbolValue, const void *src, size_t srcSize, void *workspace, size_t workspaceSize); ++ ++/* ++HUF_decompress() does the following: ++1. select the decompression algorithm (X2, X4) based on pre-computed heuristics ++2. build Huffman table from save, using HUF_readDTableXn() ++3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable ++*/ ++ ++/** HUF_selectDecoder() : ++* Tells which decoder is likely to decode faster, ++* based on a set of pre-determined metrics. ++* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . ++* Assumption : 0 < cSrcSize < dstSize <= 128 KB */ ++U32 HUF_selectDecoder(size_t dstSize, size_t cSrcSize); ++ ++size_t HUF_readDTableX2_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize); ++size_t HUF_readDTableX4_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize); ++ ++size_t HUF_decompress4X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable); ++size_t HUF_decompress4X2_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable); ++size_t HUF_decompress4X4_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable); ++ ++/* single stream variants */ ++ ++size_t HUF_compress1X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, ++ size_t wkspSize); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */ ++size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable); ++/** HUF_compress1X_repeat() : ++* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. ++* If it uses hufTable it does not modify hufTable or repeat. ++* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. ++* If preferRepeat then the old table will always be used if valid. */ ++size_t HUF_compress1X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, ++ size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, ++ int preferRepeat); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */ ++ ++size_t HUF_decompress1X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize); ++size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, ++ size_t workspaceSize); /**< single-symbol decoder */ ++size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, ++ size_t workspaceSize); /**< double-symbols decoder */ ++ ++size_t HUF_decompress1X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, ++ const HUF_DTable *DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ ++size_t HUF_decompress1X2_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable); ++size_t HUF_decompress1X4_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable); ++ ++#endif /* HUF_H_298734234 */ +diff --git a/xen/common/zstd/huf_decompress.c b/xen/common/zstd/huf_decompress.c +new file mode 100644 +index 0000000000..341619e642 +--- /dev/null ++++ b/xen/common/zstd/huf_decompress.c +@@ -0,0 +1,960 @@ ++/* ++ * Huffman decoder, part of New Generation Entropy library ++ * Copyright (C) 2013-2016, Yann Collet. ++ * ++ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions are ++ * met: ++ * ++ * * Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * * Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following disclaimer ++ * in the documentation and/or other materials provided with the ++ * distribution. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ * ++ * This program is free software; you can redistribute it and/or modify it under ++ * the terms of the GNU General Public License version 2 as published by the ++ * Free Software Foundation. This program is dual-licensed; you may select ++ * either version 2 of the GNU General Public License ("GPL") or BSD license ++ * ("BSD"). ++ * ++ * You can contact the author at : ++ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy ++ */ ++ ++/* ************************************************************** ++* Compiler specifics ++****************************************************************/ ++#define FORCE_INLINE static always_inline ++ ++/* ************************************************************** ++* Dependencies ++****************************************************************/ ++#include "bitstream.h" /* BIT_* */ ++#include "fse.h" /* header compression */ ++#include "huf.h" ++#include ++#include /* memcpy, memset */ ++ ++/* ************************************************************** ++* Error Management ++****************************************************************/ ++#define HUF_STATIC_ASSERT(c) \ ++ { \ ++ enum { HUF_static_assert = 1 / (int)(!!(c)) }; \ ++ } /* use only *after* variable declarations */ ++ ++/*-***************************/ ++/* generic DTableDesc */ ++/*-***************************/ ++ ++typedef struct { ++ BYTE maxTableLog; ++ BYTE tableType; ++ BYTE tableLog; ++ BYTE reserved; ++} DTableDesc; ++ ++static DTableDesc INIT HUF_getDTableDesc(const HUF_DTable *table) ++{ ++ DTableDesc dtd; ++ memcpy(&dtd, table, sizeof(dtd)); ++ return dtd; ++} ++ ++/*-***************************/ ++/* single-symbol decoding */ ++/*-***************************/ ++ ++typedef struct { ++ BYTE byte; ++ BYTE nbBits; ++} HUF_DEltX2; /* single-symbol decoding */ ++ ++size_t INIT HUF_readDTableX2_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize) ++{ ++ U32 tableLog = 0; ++ U32 nbSymbols = 0; ++ size_t iSize; ++ void *const dtPtr = DTable + 1; ++ HUF_DEltX2 *const dt = (HUF_DEltX2 *)dtPtr; ++ ++ U32 *rankVal; ++ BYTE *huffWeight; ++ size_t spaceUsed32 = 0; ++ ++ rankVal = (U32 *)workspace + spaceUsed32; ++ spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; ++ huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32); ++ spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; ++ ++ if ((spaceUsed32 << 2) > workspaceSize) ++ return ERROR(tableLog_tooLarge); ++ workspace = (U32 *)workspace + spaceUsed32; ++ workspaceSize -= (spaceUsed32 << 2); ++ ++ HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); ++ /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ ++ ++ iSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize); ++ if (HUF_isError(iSize)) ++ return iSize; ++ ++ /* Table header */ ++ { ++ DTableDesc dtd = HUF_getDTableDesc(DTable); ++ if (tableLog > (U32)(dtd.maxTableLog + 1)) ++ return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ ++ dtd.tableType = 0; ++ dtd.tableLog = (BYTE)tableLog; ++ memcpy(DTable, &dtd, sizeof(dtd)); ++ } ++ ++ /* Calculate starting value for each rank */ ++ { ++ U32 n, nextRankStart = 0; ++ for (n = 1; n < tableLog + 1; n++) { ++ U32 const curr = nextRankStart; ++ nextRankStart += (rankVal[n] << (n - 1)); ++ rankVal[n] = curr; ++ } ++ } ++ ++ /* fill DTable */ ++ { ++ U32 n; ++ for (n = 0; n < nbSymbols; n++) { ++ U32 const w = huffWeight[n]; ++ U32 const length = (1 << w) >> 1; ++ U32 u; ++ HUF_DEltX2 D; ++ D.byte = (BYTE)n; ++ D.nbBits = (BYTE)(tableLog + 1 - w); ++ for (u = rankVal[w]; u < rankVal[w] + length; u++) ++ dt[u] = D; ++ rankVal[w] += length; ++ } ++ } ++ ++ return iSize; ++} ++ ++static BYTE INIT HUF_decodeSymbolX2(BIT_DStream_t *Dstream, const HUF_DEltX2 *dt, const U32 dtLog) ++{ ++ size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ ++ BYTE const c = dt[val].byte; ++ BIT_skipBits(Dstream, dt[val].nbBits); ++ return c; ++} ++ ++#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) ++ ++#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ ++ if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \ ++ HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) ++ ++#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ ++ if (ZSTD_64bits()) \ ++ HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) ++ ++FORCE_INLINE size_t HUF_decodeStreamX2(BYTE *p, BIT_DStream_t *const bitDPtr, BYTE *const pEnd, const HUF_DEltX2 *const dt, const U32 dtLog) ++{ ++ BYTE *const pStart = p; ++ ++ /* up to 4 symbols at a time */ ++ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd - 4)) { ++ HUF_DECODE_SYMBOLX2_2(p, bitDPtr); ++ HUF_DECODE_SYMBOLX2_1(p, bitDPtr); ++ HUF_DECODE_SYMBOLX2_2(p, bitDPtr); ++ HUF_DECODE_SYMBOLX2_0(p, bitDPtr); ++ } ++ ++ /* closer to the end */ ++ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd)) ++ HUF_DECODE_SYMBOLX2_0(p, bitDPtr); ++ ++ /* no more data to retrieve from bitstream, hence no need to reload */ ++ while (p < pEnd) ++ HUF_DECODE_SYMBOLX2_0(p, bitDPtr); ++ ++ return pEnd - pStart; ++} ++ ++static size_t INIT HUF_decompress1X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) ++{ ++ BYTE *op = (BYTE *)dst; ++ BYTE *const oend = op + dstSize; ++ const void *dtPtr = DTable + 1; ++ const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr; ++ BIT_DStream_t bitD; ++ DTableDesc const dtd = HUF_getDTableDesc(DTable); ++ U32 const dtLog = dtd.tableLog; ++ ++ { ++ size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); ++ if (HUF_isError(errorCode)) ++ return errorCode; ++ } ++ ++ HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog); ++ ++ /* check */ ++ if (!BIT_endOfDStream(&bitD)) ++ return ERROR(corruption_detected); ++ ++ return dstSize; ++} ++ ++size_t INIT HUF_decompress1X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) ++{ ++ DTableDesc dtd = HUF_getDTableDesc(DTable); ++ if (dtd.tableType != 0) ++ return ERROR(GENERIC); ++ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); ++} ++ ++size_t INIT HUF_decompress1X2_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) ++{ ++ const BYTE *ip = (const BYTE *)cSrc; ++ ++ size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize); ++ if (HUF_isError(hSize)) ++ return hSize; ++ if (hSize >= cSrcSize) ++ return ERROR(srcSize_wrong); ++ ip += hSize; ++ cSrcSize -= hSize; ++ ++ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx); ++} ++ ++static size_t INIT HUF_decompress4X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) ++{ ++ /* Check */ ++ if (cSrcSize < 10) ++ return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ ++ ++ { ++ const BYTE *const istart = (const BYTE *)cSrc; ++ BYTE *const ostart = (BYTE *)dst; ++ BYTE *const oend = ostart + dstSize; ++ const void *const dtPtr = DTable + 1; ++ const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr; ++ ++ /* Init */ ++ BIT_DStream_t bitD1; ++ BIT_DStream_t bitD2; ++ BIT_DStream_t bitD3; ++ BIT_DStream_t bitD4; ++ size_t const length1 = ZSTD_readLE16(istart); ++ size_t const length2 = ZSTD_readLE16(istart + 2); ++ size_t const length3 = ZSTD_readLE16(istart + 4); ++ size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); ++ const BYTE *const istart1 = istart + 6; /* jumpTable */ ++ const BYTE *const istart2 = istart1 + length1; ++ const BYTE *const istart3 = istart2 + length2; ++ const BYTE *const istart4 = istart3 + length3; ++ const size_t segmentSize = (dstSize + 3) / 4; ++ BYTE *const opStart2 = ostart + segmentSize; ++ BYTE *const opStart3 = opStart2 + segmentSize; ++ BYTE *const opStart4 = opStart3 + segmentSize; ++ BYTE *op1 = ostart; ++ BYTE *op2 = opStart2; ++ BYTE *op3 = opStart3; ++ BYTE *op4 = opStart4; ++ U32 endSignal; ++ DTableDesc const dtd = HUF_getDTableDesc(DTable); ++ U32 const dtLog = dtd.tableLog; ++ ++ if (length4 > cSrcSize) ++ return ERROR(corruption_detected); /* overflow */ ++ { ++ size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1); ++ if (HUF_isError(errorCode)) ++ return errorCode; ++ } ++ { ++ size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2); ++ if (HUF_isError(errorCode)) ++ return errorCode; ++ } ++ { ++ size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3); ++ if (HUF_isError(errorCode)) ++ return errorCode; ++ } ++ { ++ size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4); ++ if (HUF_isError(errorCode)) ++ return errorCode; ++ } ++ ++ /* 16-32 symbols per loop (4-8 symbols per stream) */ ++ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); ++ for (; (endSignal == BIT_DStream_unfinished) && (op4 < (oend - 7));) { ++ HUF_DECODE_SYMBOLX2_2(op1, &bitD1); ++ HUF_DECODE_SYMBOLX2_2(op2, &bitD2); ++ HUF_DECODE_SYMBOLX2_2(op3, &bitD3); ++ HUF_DECODE_SYMBOLX2_2(op4, &bitD4); ++ HUF_DECODE_SYMBOLX2_1(op1, &bitD1); ++ HUF_DECODE_SYMBOLX2_1(op2, &bitD2); ++ HUF_DECODE_SYMBOLX2_1(op3, &bitD3); ++ HUF_DECODE_SYMBOLX2_1(op4, &bitD4); ++ HUF_DECODE_SYMBOLX2_2(op1, &bitD1); ++ HUF_DECODE_SYMBOLX2_2(op2, &bitD2); ++ HUF_DECODE_SYMBOLX2_2(op3, &bitD3); ++ HUF_DECODE_SYMBOLX2_2(op4, &bitD4); ++ HUF_DECODE_SYMBOLX2_0(op1, &bitD1); ++ HUF_DECODE_SYMBOLX2_0(op2, &bitD2); ++ HUF_DECODE_SYMBOLX2_0(op3, &bitD3); ++ HUF_DECODE_SYMBOLX2_0(op4, &bitD4); ++ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); ++ } ++ ++ /* check corruption */ ++ if (op1 > opStart2) ++ return ERROR(corruption_detected); ++ if (op2 > opStart3) ++ return ERROR(corruption_detected); ++ if (op3 > opStart4) ++ return ERROR(corruption_detected); ++ /* note : op4 supposed already verified within main loop */ ++ ++ /* finish bitStreams one by one */ ++ HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); ++ HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); ++ HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); ++ HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); ++ ++ /* check */ ++ endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); ++ if (!endSignal) ++ return ERROR(corruption_detected); ++ ++ /* decoded size */ ++ return dstSize; ++ } ++} ++ ++size_t INIT HUF_decompress4X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) ++{ ++ DTableDesc dtd = HUF_getDTableDesc(DTable); ++ if (dtd.tableType != 0) ++ return ERROR(GENERIC); ++ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); ++} ++ ++size_t INIT HUF_decompress4X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) ++{ ++ const BYTE *ip = (const BYTE *)cSrc; ++ ++ size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize); ++ if (HUF_isError(hSize)) ++ return hSize; ++ if (hSize >= cSrcSize) ++ return ERROR(srcSize_wrong); ++ ip += hSize; ++ cSrcSize -= hSize; ++ ++ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx); ++} ++ ++/* *************************/ ++/* double-symbols decoding */ ++/* *************************/ ++typedef struct { ++ U16 sequence; ++ BYTE nbBits; ++ BYTE length; ++} HUF_DEltX4; /* double-symbols decoding */ ++ ++typedef struct { ++ BYTE symbol; ++ BYTE weight; ++} sortedSymbol_t; ++ ++/* HUF_fillDTableX4Level2() : ++ * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ ++static void INIT HUF_fillDTableX4Level2(HUF_DEltX4 *DTable, U32 sizeLog, const U32 consumed, const U32 *rankValOrigin, const int minWeight, ++ const sortedSymbol_t *sortedSymbols, const U32 sortedListSize, U32 nbBitsBaseline, U16 baseSeq) ++{ ++ HUF_DEltX4 DElt; ++ U32 rankVal[HUF_TABLELOG_MAX + 1]; ++ ++ /* get pre-calculated rankVal */ ++ memcpy(rankVal, rankValOrigin, sizeof(rankVal)); ++ ++ /* fill skipped values */ ++ if (minWeight > 1) { ++ U32 i, skipSize = rankVal[minWeight]; ++ ZSTD_writeLE16(&(DElt.sequence), baseSeq); ++ DElt.nbBits = (BYTE)(consumed); ++ DElt.length = 1; ++ for (i = 0; i < skipSize; i++) ++ DTable[i] = DElt; ++ } ++ ++ /* fill DTable */ ++ { ++ U32 s; ++ for (s = 0; s < sortedListSize; s++) { /* note : sortedSymbols already skipped */ ++ const U32 symbol = sortedSymbols[s].symbol; ++ const U32 weight = sortedSymbols[s].weight; ++ const U32 nbBits = nbBitsBaseline - weight; ++ const U32 length = 1 << (sizeLog - nbBits); ++ const U32 start = rankVal[weight]; ++ U32 i = start; ++ const U32 end = start + length; ++ ++ ZSTD_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8))); ++ DElt.nbBits = (BYTE)(nbBits + consumed); ++ DElt.length = 2; ++ do { ++ DTable[i++] = DElt; ++ } while (i < end); /* since length >= 1 */ ++ ++ rankVal[weight] += length; ++ } ++ } ++} ++ ++typedef U32 rankVal_t[HUF_TABLELOG_MAX][HUF_TABLELOG_MAX + 1]; ++typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; ++ ++static void INIT HUF_fillDTableX4(HUF_DEltX4 *DTable, const U32 targetLog, const sortedSymbol_t *sortedList, ++ const U32 sortedListSize, const U32 *rankStart, ++ rankVal_t rankValOrigin, const U32 maxWeight, const U32 nbBitsBaseline) ++{ ++ U32 rankVal[HUF_TABLELOG_MAX + 1]; ++ const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ ++ const U32 minBits = nbBitsBaseline - maxWeight; ++ U32 s; ++ ++ memcpy(rankVal, rankValOrigin, sizeof(rankVal)); ++ ++ /* fill DTable */ ++ for (s = 0; s < sortedListSize; s++) { ++ const U16 symbol = sortedList[s].symbol; ++ const U32 weight = sortedList[s].weight; ++ const U32 nbBits = nbBitsBaseline - weight; ++ const U32 start = rankVal[weight]; ++ const U32 length = 1 << (targetLog - nbBits); ++ ++ if (targetLog - nbBits >= minBits) { /* enough room for a second symbol */ ++ U32 sortedRank; ++ int minWeight = nbBits + scaleLog; ++ if (minWeight < 1) ++ minWeight = 1; ++ sortedRank = rankStart[minWeight]; ++ HUF_fillDTableX4Level2(DTable + start, targetLog - nbBits, nbBits, rankValOrigin[nbBits], minWeight, sortedList + sortedRank, ++ sortedListSize - sortedRank, nbBitsBaseline, symbol); ++ } else { ++ HUF_DEltX4 DElt; ++ ZSTD_writeLE16(&(DElt.sequence), symbol); ++ DElt.nbBits = (BYTE)(nbBits); ++ DElt.length = 1; ++ { ++ U32 const end = start + length; ++ U32 u; ++ for (u = start; u < end; u++) ++ DTable[u] = DElt; ++ } ++ } ++ rankVal[weight] += length; ++ } ++} ++ ++size_t INIT HUF_readDTableX4_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize) ++{ ++ U32 tableLog, maxW, sizeOfSort, nbSymbols; ++ DTableDesc dtd = HUF_getDTableDesc(DTable); ++ U32 const maxTableLog = dtd.maxTableLog; ++ size_t iSize; ++ void *dtPtr = DTable + 1; /* force compiler to avoid strict-aliasing */ ++ HUF_DEltX4 *const dt = (HUF_DEltX4 *)dtPtr; ++ U32 *rankStart; ++ ++ rankValCol_t *rankVal; ++ U32 *rankStats; ++ U32 *rankStart0; ++ sortedSymbol_t *sortedSymbol; ++ BYTE *weightList; ++ size_t spaceUsed32 = 0; ++ ++ HUF_STATIC_ASSERT((sizeof(rankValCol_t) & 3) == 0); ++ ++ rankVal = (rankValCol_t *)((U32 *)workspace + spaceUsed32); ++ spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2; ++ rankStats = (U32 *)workspace + spaceUsed32; ++ spaceUsed32 += HUF_TABLELOG_MAX + 1; ++ rankStart0 = (U32 *)workspace + spaceUsed32; ++ spaceUsed32 += HUF_TABLELOG_MAX + 2; ++ sortedSymbol = (sortedSymbol_t *)((U32 *)workspace + spaceUsed32); ++ spaceUsed32 += ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2; ++ weightList = (BYTE *)((U32 *)workspace + spaceUsed32); ++ spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; ++ ++ if ((spaceUsed32 << 2) > workspaceSize) ++ return ERROR(tableLog_tooLarge); ++ workspace = (U32 *)workspace + spaceUsed32; ++ workspaceSize -= (spaceUsed32 << 2); ++ ++ rankStart = rankStart0 + 1; ++ memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); ++ ++ HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ ++ if (maxTableLog > HUF_TABLELOG_MAX) ++ return ERROR(tableLog_tooLarge); ++ /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ ++ ++ iSize = HUF_readStats_wksp(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize); ++ if (HUF_isError(iSize)) ++ return iSize; ++ ++ /* check result */ ++ if (tableLog > maxTableLog) ++ return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ ++ ++ /* find maxWeight */ ++ for (maxW = tableLog; rankStats[maxW] == 0; maxW--) { ++ } /* necessarily finds a solution before 0 */ ++ ++ /* Get start index of each weight */ ++ { ++ U32 w, nextRankStart = 0; ++ for (w = 1; w < maxW + 1; w++) { ++ U32 curr = nextRankStart; ++ nextRankStart += rankStats[w]; ++ rankStart[w] = curr; ++ } ++ rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ ++ sizeOfSort = nextRankStart; ++ } ++ ++ /* sort symbols by weight */ ++ { ++ U32 s; ++ for (s = 0; s < nbSymbols; s++) { ++ U32 const w = weightList[s]; ++ U32 const r = rankStart[w]++; ++ sortedSymbol[r].symbol = (BYTE)s; ++ sortedSymbol[r].weight = (BYTE)w; ++ } ++ rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */ ++ } ++ ++ /* Build rankVal */ ++ { ++ U32 *const rankVal0 = rankVal[0]; ++ { ++ int const rescale = (maxTableLog - tableLog) - 1; /* tableLog <= maxTableLog */ ++ U32 nextRankVal = 0; ++ U32 w; ++ for (w = 1; w < maxW + 1; w++) { ++ U32 curr = nextRankVal; ++ nextRankVal += rankStats[w] << (w + rescale); ++ rankVal0[w] = curr; ++ } ++ } ++ { ++ U32 const minBits = tableLog + 1 - maxW; ++ U32 consumed; ++ for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) { ++ U32 *const rankValPtr = rankVal[consumed]; ++ U32 w; ++ for (w = 1; w < maxW + 1; w++) { ++ rankValPtr[w] = rankVal0[w] >> consumed; ++ } ++ } ++ } ++ } ++ ++ HUF_fillDTableX4(dt, maxTableLog, sortedSymbol, sizeOfSort, rankStart0, rankVal, maxW, tableLog + 1); ++ ++ dtd.tableLog = (BYTE)maxTableLog; ++ dtd.tableType = 1; ++ memcpy(DTable, &dtd, sizeof(dtd)); ++ return iSize; ++} ++ ++static U32 INIT HUF_decodeSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog) ++{ ++ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ ++ memcpy(op, dt + val, 2); ++ BIT_skipBits(DStream, dt[val].nbBits); ++ return dt[val].length; ++} ++ ++static U32 INIT HUF_decodeLastSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog) ++{ ++ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ ++ memcpy(op, dt + val, 1); ++ if (dt[val].length == 1) ++ BIT_skipBits(DStream, dt[val].nbBits); ++ else { ++ if (DStream->bitsConsumed < (sizeof(DStream->bitContainer) * 8)) { ++ BIT_skipBits(DStream, dt[val].nbBits); ++ if (DStream->bitsConsumed > (sizeof(DStream->bitContainer) * 8)) ++ /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ ++ DStream->bitsConsumed = (sizeof(DStream->bitContainer) * 8); ++ } ++ } ++ return 1; ++} ++ ++#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) ++ ++#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ ++ if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \ ++ ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) ++ ++#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ ++ if (ZSTD_64bits()) \ ++ ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) ++ ++FORCE_INLINE size_t HUF_decodeStreamX4(BYTE *p, BIT_DStream_t *bitDPtr, BYTE *const pEnd, const HUF_DEltX4 *const dt, const U32 dtLog) ++{ ++ BYTE *const pStart = p; ++ ++ /* up to 8 symbols at a time */ ++ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd - (sizeof(bitDPtr->bitContainer) - 1))) { ++ HUF_DECODE_SYMBOLX4_2(p, bitDPtr); ++ HUF_DECODE_SYMBOLX4_1(p, bitDPtr); ++ HUF_DECODE_SYMBOLX4_2(p, bitDPtr); ++ HUF_DECODE_SYMBOLX4_0(p, bitDPtr); ++ } ++ ++ /* closer to end : up to 2 symbols at a time */ ++ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd - 2)) ++ HUF_DECODE_SYMBOLX4_0(p, bitDPtr); ++ ++ while (p <= pEnd - 2) ++ HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ ++ ++ if (p < pEnd) ++ p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog); ++ ++ return p - pStart; ++} ++ ++static size_t INIT HUF_decompress1X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) ++{ ++ BIT_DStream_t bitD; ++ ++ /* Init */ ++ { ++ size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); ++ if (HUF_isError(errorCode)) ++ return errorCode; ++ } ++ ++ /* decode */ ++ { ++ BYTE *const ostart = (BYTE *)dst; ++ BYTE *const oend = ostart + dstSize; ++ const void *const dtPtr = DTable + 1; /* force compiler to not use strict-aliasing */ ++ const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr; ++ DTableDesc const dtd = HUF_getDTableDesc(DTable); ++ HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog); ++ } ++ ++ /* check */ ++ if (!BIT_endOfDStream(&bitD)) ++ return ERROR(corruption_detected); ++ ++ /* decoded size */ ++ return dstSize; ++} ++ ++size_t INIT HUF_decompress1X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) ++{ ++ DTableDesc dtd = HUF_getDTableDesc(DTable); ++ if (dtd.tableType != 1) ++ return ERROR(GENERIC); ++ return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); ++} ++ ++size_t INIT HUF_decompress1X4_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) ++{ ++ const BYTE *ip = (const BYTE *)cSrc; ++ ++ size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize); ++ if (HUF_isError(hSize)) ++ return hSize; ++ if (hSize >= cSrcSize) ++ return ERROR(srcSize_wrong); ++ ip += hSize; ++ cSrcSize -= hSize; ++ ++ return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx); ++} ++ ++static size_t INIT HUF_decompress4X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) ++{ ++ if (cSrcSize < 10) ++ return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ ++ ++ { ++ const BYTE *const istart = (const BYTE *)cSrc; ++ BYTE *const ostart = (BYTE *)dst; ++ BYTE *const oend = ostart + dstSize; ++ const void *const dtPtr = DTable + 1; ++ const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr; ++ ++ /* Init */ ++ BIT_DStream_t bitD1; ++ BIT_DStream_t bitD2; ++ BIT_DStream_t bitD3; ++ BIT_DStream_t bitD4; ++ size_t const length1 = ZSTD_readLE16(istart); ++ size_t const length2 = ZSTD_readLE16(istart + 2); ++ size_t const length3 = ZSTD_readLE16(istart + 4); ++ size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); ++ const BYTE *const istart1 = istart + 6; /* jumpTable */ ++ const BYTE *const istart2 = istart1 + length1; ++ const BYTE *const istart3 = istart2 + length2; ++ const BYTE *const istart4 = istart3 + length3; ++ size_t const segmentSize = (dstSize + 3) / 4; ++ BYTE *const opStart2 = ostart + segmentSize; ++ BYTE *const opStart3 = opStart2 + segmentSize; ++ BYTE *const opStart4 = opStart3 + segmentSize; ++ BYTE *op1 = ostart; ++ BYTE *op2 = opStart2; ++ BYTE *op3 = opStart3; ++ BYTE *op4 = opStart4; ++ U32 endSignal; ++ DTableDesc const dtd = HUF_getDTableDesc(DTable); ++ U32 const dtLog = dtd.tableLog; ++ ++ if (length4 > cSrcSize) ++ return ERROR(corruption_detected); /* overflow */ ++ { ++ size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1); ++ if (HUF_isError(errorCode)) ++ return errorCode; ++ } ++ { ++ size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2); ++ if (HUF_isError(errorCode)) ++ return errorCode; ++ } ++ { ++ size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3); ++ if (HUF_isError(errorCode)) ++ return errorCode; ++ } ++ { ++ size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4); ++ if (HUF_isError(errorCode)) ++ return errorCode; ++ } ++ ++ /* 16-32 symbols per loop (4-8 symbols per stream) */ ++ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); ++ for (; (endSignal == BIT_DStream_unfinished) & (op4 < (oend - (sizeof(bitD4.bitContainer) - 1)));) { ++ HUF_DECODE_SYMBOLX4_2(op1, &bitD1); ++ HUF_DECODE_SYMBOLX4_2(op2, &bitD2); ++ HUF_DECODE_SYMBOLX4_2(op3, &bitD3); ++ HUF_DECODE_SYMBOLX4_2(op4, &bitD4); ++ HUF_DECODE_SYMBOLX4_1(op1, &bitD1); ++ HUF_DECODE_SYMBOLX4_1(op2, &bitD2); ++ HUF_DECODE_SYMBOLX4_1(op3, &bitD3); ++ HUF_DECODE_SYMBOLX4_1(op4, &bitD4); ++ HUF_DECODE_SYMBOLX4_2(op1, &bitD1); ++ HUF_DECODE_SYMBOLX4_2(op2, &bitD2); ++ HUF_DECODE_SYMBOLX4_2(op3, &bitD3); ++ HUF_DECODE_SYMBOLX4_2(op4, &bitD4); ++ HUF_DECODE_SYMBOLX4_0(op1, &bitD1); ++ HUF_DECODE_SYMBOLX4_0(op2, &bitD2); ++ HUF_DECODE_SYMBOLX4_0(op3, &bitD3); ++ HUF_DECODE_SYMBOLX4_0(op4, &bitD4); ++ ++ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); ++ } ++ ++ /* check corruption */ ++ if (op1 > opStart2) ++ return ERROR(corruption_detected); ++ if (op2 > opStart3) ++ return ERROR(corruption_detected); ++ if (op3 > opStart4) ++ return ERROR(corruption_detected); ++ /* note : op4 already verified within main loop */ ++ ++ /* finish bitStreams one by one */ ++ HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog); ++ HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog); ++ HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog); ++ HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog); ++ ++ /* check */ ++ { ++ U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); ++ if (!endCheck) ++ return ERROR(corruption_detected); ++ } ++ ++ /* decoded size */ ++ return dstSize; ++ } ++} ++ ++size_t INIT HUF_decompress4X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) ++{ ++ DTableDesc dtd = HUF_getDTableDesc(DTable); ++ if (dtd.tableType != 1) ++ return ERROR(GENERIC); ++ return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); ++} ++ ++size_t INIT HUF_decompress4X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) ++{ ++ const BYTE *ip = (const BYTE *)cSrc; ++ ++ size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize); ++ if (HUF_isError(hSize)) ++ return hSize; ++ if (hSize >= cSrcSize) ++ return ERROR(srcSize_wrong); ++ ip += hSize; ++ cSrcSize -= hSize; ++ ++ return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx); ++} ++ ++/* ********************************/ ++/* Generic decompression selector */ ++/* ********************************/ ++ ++size_t INIT HUF_decompress1X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) ++{ ++ DTableDesc const dtd = HUF_getDTableDesc(DTable); ++ return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) ++ : HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable); ++} ++ ++size_t INIT HUF_decompress4X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) ++{ ++ DTableDesc const dtd = HUF_getDTableDesc(DTable); ++ return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) ++ : HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable); ++} ++ ++typedef struct { ++ U32 tableTime; ++ U32 decode256Time; ++} algo_time_t; ++static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = { ++ /* single, double, quad */ ++ {{0, 0}, {1, 1}, {2, 2}}, /* Q==0 : impossible */ ++ {{0, 0}, {1, 1}, {2, 2}}, /* Q==1 : impossible */ ++ {{38, 130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ ++ {{448, 128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ ++ {{556, 128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ ++ {{714, 128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ ++ {{883, 128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ ++ {{897, 128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ ++ {{926, 128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ ++ {{947, 128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ ++ {{1107, 128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ ++ {{1177, 128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ ++ {{1242, 128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ ++ {{1349, 128}, {2644, 106}, {5260, 106}}, /* Q ==13 : 81-87% */ ++ {{1455, 128}, {2422, 124}, {4174, 124}}, /* Q ==14 : 87-93% */ ++ {{722, 128}, {1891, 145}, {1936, 146}}, /* Q ==15 : 93-99% */ ++}; ++ ++/** HUF_selectDecoder() : ++* Tells which decoder is likely to decode faster, ++* based on a set of pre-determined metrics. ++* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . ++* Assumption : 0 < cSrcSize < dstSize <= 128 KB */ ++U32 INIT HUF_selectDecoder(size_t dstSize, size_t cSrcSize) ++{ ++ /* decoder timing evaluation */ ++ U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */ ++ U32 const D256 = (U32)(dstSize >> 8); ++ U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); ++ U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); ++ DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */ ++ ++ return DTime1 < DTime0; ++} ++ ++typedef size_t (*decompressionAlgo)(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize); ++ ++size_t INIT HUF_decompress4X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) ++{ ++ /* validation checks */ ++ if (dstSize == 0) ++ return ERROR(dstSize_tooSmall); ++ if (cSrcSize > dstSize) ++ return ERROR(corruption_detected); /* invalid */ ++ if (cSrcSize == dstSize) { ++ memcpy(dst, cSrc, dstSize); ++ return dstSize; ++ } /* not compressed */ ++ if (cSrcSize == 1) { ++ memset(dst, *(const BYTE *)cSrc, dstSize); ++ return dstSize; ++ } /* RLE */ ++ ++ { ++ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); ++ return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize) ++ : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize); ++ } ++} ++ ++size_t INIT HUF_decompress4X_hufOnly_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) ++{ ++ /* validation checks */ ++ if (dstSize == 0) ++ return ERROR(dstSize_tooSmall); ++ if ((cSrcSize >= dstSize) || (cSrcSize <= 1)) ++ return ERROR(corruption_detected); /* invalid */ ++ ++ { ++ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); ++ return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize) ++ : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize); ++ } ++} ++ ++size_t INIT HUF_decompress1X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) ++{ ++ /* validation checks */ ++ if (dstSize == 0) ++ return ERROR(dstSize_tooSmall); ++ if (cSrcSize > dstSize) ++ return ERROR(corruption_detected); /* invalid */ ++ if (cSrcSize == dstSize) { ++ memcpy(dst, cSrc, dstSize); ++ return dstSize; ++ } /* not compressed */ ++ if (cSrcSize == 1) { ++ memset(dst, *(const BYTE *)cSrc, dstSize); ++ return dstSize; ++ } /* RLE */ ++ ++ { ++ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); ++ return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize) ++ : HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize); ++ } ++} +diff --git a/xen/common/zstd/mem.h b/xen/common/zstd/mem.h +new file mode 100644 +index 0000000000..2883200696 +--- /dev/null ++++ b/xen/common/zstd/mem.h +@@ -0,0 +1,151 @@ ++/** ++ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. ++ * All rights reserved. ++ * ++ * This source code is licensed under the BSD-style license found in the ++ * LICENSE file in the root directory of https://github.com/facebook/zstd. ++ * An additional grant of patent rights can be found in the PATENTS file in the ++ * same directory. ++ * ++ * This program is free software; you can redistribute it and/or modify it under ++ * the terms of the GNU General Public License version 2 as published by the ++ * Free Software Foundation. This program is dual-licensed; you may select ++ * either version 2 of the GNU General Public License ("GPL") or BSD license ++ * ("BSD"). ++ */ ++ ++#ifndef MEM_H_MODULE ++#define MEM_H_MODULE ++ ++/*-**************************************** ++* Dependencies ++******************************************/ ++#include /* memcpy */ ++#include /* size_t, ptrdiff_t */ ++#include ++ ++/*-**************************************** ++* Compiler specifics ++******************************************/ ++#define ZSTD_STATIC static inline ++ ++/*-************************************************************** ++* Basic Types ++*****************************************************************/ ++typedef uint8_t BYTE; ++typedef uint16_t U16; ++typedef int16_t S16; ++typedef uint32_t U32; ++typedef int32_t S32; ++typedef uint64_t U64; ++typedef int64_t S64; ++typedef ptrdiff_t iPtrDiff; ++typedef uintptr_t uPtrDiff; ++ ++/*-************************************************************** ++* Memory I/O ++*****************************************************************/ ++ZSTD_STATIC unsigned ZSTD_32bits(void) { return sizeof(size_t) == 4; } ++ZSTD_STATIC unsigned ZSTD_64bits(void) { return sizeof(size_t) == 8; } ++ ++#if defined(__LITTLE_ENDIAN) ++#define ZSTD_LITTLE_ENDIAN 1 ++#else ++#define ZSTD_LITTLE_ENDIAN 0 ++#endif ++ ++ZSTD_STATIC unsigned ZSTD_isLittleEndian(void) { return ZSTD_LITTLE_ENDIAN; } ++ ++ZSTD_STATIC U16 ZSTD_read16(const void *memPtr) { return get_unaligned((const U16 *)memPtr); } ++ ++ZSTD_STATIC U32 ZSTD_read32(const void *memPtr) { return get_unaligned((const U32 *)memPtr); } ++ ++ZSTD_STATIC U64 ZSTD_read64(const void *memPtr) { return get_unaligned((const U64 *)memPtr); } ++ ++ZSTD_STATIC size_t ZSTD_readST(const void *memPtr) { return get_unaligned((const size_t *)memPtr); } ++ ++ZSTD_STATIC void ZSTD_write16(void *memPtr, U16 value) { put_unaligned(value, (U16 *)memPtr); } ++ ++ZSTD_STATIC void ZSTD_write32(void *memPtr, U32 value) { put_unaligned(value, (U32 *)memPtr); } ++ ++ZSTD_STATIC void ZSTD_write64(void *memPtr, U64 value) { put_unaligned(value, (U64 *)memPtr); } ++ ++/*=== Little endian r/w ===*/ ++ ++ZSTD_STATIC U16 ZSTD_readLE16(const void *memPtr) { return get_unaligned_le16(memPtr); } ++ ++ZSTD_STATIC void ZSTD_writeLE16(void *memPtr, U16 val) { put_unaligned_le16(val, memPtr); } ++ ++ZSTD_STATIC U32 ZSTD_readLE24(const void *memPtr) { return ZSTD_readLE16(memPtr) + (((const BYTE *)memPtr)[2] << 16); } ++ ++ZSTD_STATIC void ZSTD_writeLE24(void *memPtr, U32 val) ++{ ++ ZSTD_writeLE16(memPtr, (U16)val); ++ ((BYTE *)memPtr)[2] = (BYTE)(val >> 16); ++} ++ ++ZSTD_STATIC U32 ZSTD_readLE32(const void *memPtr) { return get_unaligned_le32(memPtr); } ++ ++ZSTD_STATIC void ZSTD_writeLE32(void *memPtr, U32 val32) { put_unaligned_le32(val32, memPtr); } ++ ++ZSTD_STATIC U64 ZSTD_readLE64(const void *memPtr) { return get_unaligned_le64(memPtr); } ++ ++ZSTD_STATIC void ZSTD_writeLE64(void *memPtr, U64 val64) { put_unaligned_le64(val64, memPtr); } ++ ++ZSTD_STATIC size_t ZSTD_readLEST(const void *memPtr) ++{ ++ if (ZSTD_32bits()) ++ return (size_t)ZSTD_readLE32(memPtr); ++ else ++ return (size_t)ZSTD_readLE64(memPtr); ++} ++ ++ZSTD_STATIC void ZSTD_writeLEST(void *memPtr, size_t val) ++{ ++ if (ZSTD_32bits()) ++ ZSTD_writeLE32(memPtr, (U32)val); ++ else ++ ZSTD_writeLE64(memPtr, (U64)val); ++} ++ ++/*=== Big endian r/w ===*/ ++ ++ZSTD_STATIC U32 ZSTD_readBE32(const void *memPtr) { return get_unaligned_be32(memPtr); } ++ ++ZSTD_STATIC void ZSTD_writeBE32(void *memPtr, U32 val32) { put_unaligned_be32(val32, memPtr); } ++ ++ZSTD_STATIC U64 ZSTD_readBE64(const void *memPtr) { return get_unaligned_be64(memPtr); } ++ ++ZSTD_STATIC void ZSTD_writeBE64(void *memPtr, U64 val64) { put_unaligned_be64(val64, memPtr); } ++ ++ZSTD_STATIC size_t ZSTD_readBEST(const void *memPtr) ++{ ++ if (ZSTD_32bits()) ++ return (size_t)ZSTD_readBE32(memPtr); ++ else ++ return (size_t)ZSTD_readBE64(memPtr); ++} ++ ++ZSTD_STATIC void ZSTD_writeBEST(void *memPtr, size_t val) ++{ ++ if (ZSTD_32bits()) ++ ZSTD_writeBE32(memPtr, (U32)val); ++ else ++ ZSTD_writeBE64(memPtr, (U64)val); ++} ++ ++/* function safe only for comparisons */ ++ZSTD_STATIC U32 ZSTD_readMINMATCH(const void *memPtr, U32 length) ++{ ++ switch (length) { ++ default: ++ case 4: return ZSTD_read32(memPtr); ++ case 3: ++ if (ZSTD_isLittleEndian()) ++ return ZSTD_read32(memPtr) << 8; ++ else ++ return ZSTD_read32(memPtr) >> 8; ++ } ++} ++ ++#endif /* MEM_H_MODULE */ +diff --git a/xen/common/zstd/zstd_common.c b/xen/common/zstd/zstd_common.c +new file mode 100644 +index 0000000000..a35c4a5f14 +--- /dev/null ++++ b/xen/common/zstd/zstd_common.c +@@ -0,0 +1,74 @@ ++/** ++ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. ++ * All rights reserved. ++ * ++ * This source code is licensed under the BSD-style license found in the ++ * LICENSE file in the root directory of https://github.com/facebook/zstd. ++ * An additional grant of patent rights can be found in the PATENTS file in the ++ * same directory. ++ * ++ * This program is free software; you can redistribute it and/or modify it under ++ * the terms of the GNU General Public License version 2 as published by the ++ * Free Software Foundation. This program is dual-licensed; you may select ++ * either version 2 of the GNU General Public License ("GPL") or BSD license ++ * ("BSD"). ++ */ ++ ++/*-************************************* ++* Dependencies ++***************************************/ ++#include "error_private.h" ++#include "zstd_internal.h" /* declaration of ZSTD_isError, ZSTD_getErrorName, ZSTD_getErrorCode, ZSTD_getErrorString, ZSTD_versionNumber */ ++ ++/*=************************************************************** ++* Custom allocator ++****************************************************************/ ++ ++#define stack_push(stack, size) \ ++ ({ \ ++ void *const ptr = ZSTD_PTR_ALIGN((stack)->ptr); \ ++ (stack)->ptr = (char *)ptr + (size); \ ++ (stack)->ptr <= (stack)->end ? ptr : NULL; \ ++ }) ++ ++ZSTD_customMem INIT ZSTD_initStack(void *workspace, size_t workspaceSize) ++{ ++ ZSTD_customMem stackMem = {ZSTD_stackAlloc, ZSTD_stackFree, workspace}; ++ ZSTD_stack *stack = (ZSTD_stack *)workspace; ++ /* Verify preconditions */ ++ if (!workspace || workspaceSize < sizeof(ZSTD_stack) || workspace != ZSTD_PTR_ALIGN(workspace)) { ++ ZSTD_customMem error = {NULL, NULL, NULL}; ++ return error; ++ } ++ /* Initialize the stack */ ++ stack->ptr = workspace; ++ stack->end = (char *)workspace + workspaceSize; ++ stack_push(stack, sizeof(ZSTD_stack)); ++ return stackMem; ++} ++ ++void *INIT ZSTD_stackAllocAll(void *opaque, size_t *size) ++{ ++ ZSTD_stack *stack = (ZSTD_stack *)opaque; ++ *size = (BYTE const *)stack->end - (BYTE *)ZSTD_PTR_ALIGN(stack->ptr); ++ return stack_push(stack, *size); ++} ++ ++void *INIT ZSTD_stackAlloc(void *opaque, size_t size) ++{ ++ ZSTD_stack *stack = (ZSTD_stack *)opaque; ++ return stack_push(stack, size); ++} ++void INIT ZSTD_stackFree(void *opaque, void *address) ++{ ++ (void)opaque; ++ (void)address; ++} ++ ++void *INIT ZSTD_malloc(size_t size, ZSTD_customMem customMem) { return customMem.customAlloc(customMem.opaque, size); } ++ ++void INIT ZSTD_free(void *ptr, ZSTD_customMem customMem) ++{ ++ if (ptr != NULL) ++ customMem.customFree(customMem.opaque, ptr); ++} +diff --git a/xen/common/zstd/zstd_internal.h b/xen/common/zstd/zstd_internal.h +new file mode 100644 +index 0000000000..7f8e5529eb +--- /dev/null ++++ b/xen/common/zstd/zstd_internal.h +@@ -0,0 +1,372 @@ ++/** ++ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. ++ * All rights reserved. ++ * ++ * This source code is licensed under the BSD-style license found in the ++ * LICENSE file in the root directory of https://github.com/facebook/zstd. ++ * An additional grant of patent rights can be found in the PATENTS file in the ++ * same directory. ++ * ++ * This program is free software; you can redistribute it and/or modify it under ++ * the terms of the GNU General Public License version 2 as published by the ++ * Free Software Foundation. This program is dual-licensed; you may select ++ * either version 2 of the GNU General Public License ("GPL") or BSD license ++ * ("BSD"). ++ */ ++ ++#ifndef ZSTD_CCOMMON_H_MODULE ++#define ZSTD_CCOMMON_H_MODULE ++ ++/*-******************************************************* ++* Compiler specifics ++*********************************************************/ ++#define FORCE_INLINE static always_inline ++#define FORCE_NOINLINE static noinline INIT ++ ++/*-************************************* ++* Dependencies ++***************************************/ ++#include "error_private.h" ++#include "mem.h" ++#include ++#include ++ ++#define ALIGN(x, a) ((x + (a) - 1) & ~((a) - 1)) ++#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) ++ ++typedef enum { ++ ZSTDnit_frameHeader, ++ ZSTDnit_blockHeader, ++ ZSTDnit_block, ++ ZSTDnit_lastBlock, ++ ZSTDnit_checksum, ++ ZSTDnit_skippableFrame ++} ZSTD_nextInputType_e; ++ ++/** ++ * struct ZSTD_frameParams - zstd frame parameters stored in the frame header ++ * @frameContentSize: The frame content size, or 0 if not present. ++ * @windowSize: The window size, or 0 if the frame is a skippable frame. ++ * @dictID: The dictionary id, or 0 if not present. ++ * @checksumFlag: Whether a checksum was used. ++ */ ++typedef struct { ++ unsigned long long frameContentSize; ++ unsigned int windowSize; ++ unsigned int dictID; ++ unsigned int checksumFlag; ++} ZSTD_frameParams; ++ ++/** ++ * struct ZSTD_inBuffer - input buffer for streaming ++ * @src: Start of the input buffer. ++ * @size: Size of the input buffer. ++ * @pos: Position where reading stopped. Will be updated. ++ * Necessarily 0 <= pos <= size. ++ */ ++typedef struct ZSTD_inBuffer_s { ++ const void *src; ++ size_t size; ++ size_t pos; ++} ZSTD_inBuffer; ++ ++/** ++ * struct ZSTD_outBuffer - output buffer for streaming ++ * @dst: Start of the output buffer. ++ * @size: Size of the output buffer. ++ * @pos: Position where writing stopped. Will be updated. ++ * Necessarily 0 <= pos <= size. ++ */ ++typedef struct ZSTD_outBuffer_s { ++ void *dst; ++ size_t size; ++ size_t pos; ++} ZSTD_outBuffer; ++ ++typedef struct ZSTD_CCtx_s ZSTD_CCtx; ++typedef struct ZSTD_DCtx_s ZSTD_DCtx; ++ ++typedef struct ZSTD_CDict_s ZSTD_CDict; ++typedef struct ZSTD_DDict_s ZSTD_DDict; ++ ++typedef struct ZSTD_CStream_s ZSTD_CStream; ++typedef struct ZSTD_DStream_s ZSTD_DStream; ++ ++/*-************************************* ++* shared macros ++***************************************/ ++#define MIN(a, b) ((a) < (b) ? (a) : (b)) ++#define MAX(a, b) ((a) > (b) ? (a) : (b)) ++#define CHECK_F(f) \ ++ { \ ++ size_t const errcod = f; \ ++ if (ERR_isError(errcod)) \ ++ return errcod; \ ++ } /* check and Forward error code */ ++#define CHECK_E(f, e) \ ++ { \ ++ size_t const errcod = f; \ ++ if (ERR_isError(errcod)) \ ++ return ERROR(e); \ ++ } /* check and send Error code */ ++#define ZSTD_STATIC_ASSERT(c) \ ++ { \ ++ enum { ZSTD_static_assert = 1 / (int)(!!(c)) }; \ ++ } ++ ++/*-************************************* ++* Common constants ++***************************************/ ++#define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */ ++#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U ++ ++#define ZSTD_OPT_NUM (1 << 12) ++#define ZSTD_DICT_MAGIC 0xEC30A437 /* v0.7+ */ ++ ++#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) ++#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) ++ ++#define ZSTD_WINDOWLOG_MAX_32 27 ++#define ZSTD_WINDOWLOG_MAX_64 27 ++#define ZSTD_WINDOWLOG_MAX \ ++ ((unsigned int)(sizeof(size_t) == 4 \ ++ ? ZSTD_WINDOWLOG_MAX_32 \ ++ : ZSTD_WINDOWLOG_MAX_64)) ++#define ZSTD_WINDOWLOG_MIN 10 ++#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX ++#define ZSTD_HASHLOG_MIN 6 ++#define ZSTD_CHAINLOG_MAX (ZSTD_WINDOWLOG_MAX+1) ++#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN ++#define ZSTD_HASHLOG3_MAX 17 ++#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) ++#define ZSTD_SEARCHLOG_MIN 1 ++/* only for ZSTD_fast, other strategies are limited to 6 */ ++#define ZSTD_SEARCHLENGTH_MAX 7 ++/* only for ZSTD_btopt, other strategies are limited to 4 */ ++#define ZSTD_SEARCHLENGTH_MIN 3 ++#define ZSTD_TARGETLENGTH_MIN 4 ++#define ZSTD_TARGETLENGTH_MAX 999 ++ ++#define ZSTD_REP_NUM 3 /* number of repcodes */ ++#define ZSTD_REP_CHECK (ZSTD_REP_NUM) /* number of repcodes to check by the optimal parser */ ++#define ZSTD_REP_MOVE (ZSTD_REP_NUM - 1) ++#define ZSTD_REP_MOVE_OPT (ZSTD_REP_NUM) ++static const U32 repStartValue[ZSTD_REP_NUM] = {1, 4, 8}; ++ ++/* for static allocation */ ++#define ZSTD_FRAMEHEADERSIZE_MAX 18 ++#define ZSTD_FRAMEHEADERSIZE_MIN 6 ++static const size_t ZSTD_frameHeaderSize_prefix = 5; ++static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN; ++static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX; ++/* magic number + skippable frame length */ ++static const size_t ZSTD_skippableHeaderSize = 8; ++ ++#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024) ++ ++#if 0 /* These don't seem to be usable - not sure what their purpose is. */ ++#define KB *(1 << 10) ++#define MB *(1 << 20) ++#define GB *(1U << 30) ++#endif ++ ++#define BIT7 128 ++#define BIT6 64 ++#define BIT5 32 ++#define BIT4 16 ++#define BIT1 2 ++#define BIT0 1 ++ ++#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 ++static const size_t ZSTD_fcs_fieldSize[4] = {0, 2, 4, 8}; ++static const size_t ZSTD_did_fieldSize[4] = {0, 1, 2, 4}; ++ ++#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ ++static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; ++typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; ++ ++#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ ++#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ ++ ++#define HufLog 12 ++typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; ++ ++#define LONGNBSEQ 0x7F00 ++ ++#define MINMATCH 3 ++#define EQUAL_READ32 4 ++ ++#define Litbits 8 ++#define MaxLit ((1 << Litbits) - 1) ++#define MaxML 52 ++#define MaxLL 35 ++#define MaxOff 28 ++#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */ ++#define MLFSELog 9 ++#define LLFSELog 9 ++#define OffFSELog 8 ++ ++static const U32 LL_bits[MaxLL + 1] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++static const S16 LL_defaultNorm[MaxLL + 1] = {4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, -1, -1, -1, -1}; ++#define LL_DEFAULTNORMLOG 6 /* for static allocation */ ++static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG; ++ ++static const U32 ML_bits[MaxML + 1] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ++ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++static const S16 ML_defaultNorm[MaxML + 1] = {1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ++ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1}; ++#define ML_DEFAULTNORMLOG 6 /* for static allocation */ ++static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG; ++ ++static const S16 OF_defaultNorm[MaxOff + 1] = {1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1}; ++#define OF_DEFAULTNORMLOG 5 /* for static allocation */ ++static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; ++ ++/*-******************************************* ++* Shared functions to include for inlining ++*********************************************/ ++ZSTD_STATIC void ZSTD_copy8(void *dst, const void *src) { ++ /* ++ * zstd relies heavily on gcc being able to analyze and inline this ++ * memcpy() call, since it is called in a tight loop. Preboot mode ++ * is compiled in freestanding mode, which stops gcc from analyzing ++ * memcpy(). Use __builtin_memcpy() to tell gcc to analyze this as a ++ * regular memcpy(). ++ */ ++ __builtin_memcpy(dst, src, 8); ++} ++/*! ZSTD_wildcopy() : ++* custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */ ++#define WILDCOPY_OVERLENGTH 8 ++ZSTD_STATIC void ZSTD_wildcopy(void *dst, const void *src, ptrdiff_t length) ++{ ++ const BYTE* ip = (const BYTE*)src; ++ BYTE* op = (BYTE*)dst; ++ BYTE* const oend = op + length; ++#if defined(GCC_VERSION) && GCC_VERSION >= 70000 && GCC_VERSION < 70200 ++ /* ++ * Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388. ++ * Avoid the bad case where the loop only runs once by handling the ++ * special case separately. This doesn't trigger the bug because it ++ * doesn't involve pointer/integer overflow. ++ */ ++ if (length <= 8) ++ return ZSTD_copy8(dst, src); ++#endif ++ do { ++ ZSTD_copy8(op, ip); ++ op += 8; ++ ip += 8; ++ } while (op < oend); ++} ++ ++/*-******************************************* ++* Private interfaces ++*********************************************/ ++typedef struct ZSTD_stats_s ZSTD_stats_t; ++ ++typedef struct { ++ U32 off; ++ U32 len; ++} ZSTD_match_t; ++ ++typedef struct { ++ U32 price; ++ U32 off; ++ U32 mlen; ++ U32 litlen; ++ U32 rep[ZSTD_REP_NUM]; ++} ZSTD_optimal_t; ++ ++typedef struct seqDef_s { ++ U32 offset; ++ U16 litLength; ++ U16 matchLength; ++} seqDef; ++ ++typedef struct { ++ seqDef *sequencesStart; ++ seqDef *sequences; ++ BYTE *litStart; ++ BYTE *lit; ++ BYTE *llCode; ++ BYTE *mlCode; ++ BYTE *ofCode; ++ U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */ ++ U32 longLengthPos; ++ /* opt */ ++ ZSTD_optimal_t *priceTable; ++ ZSTD_match_t *matchTable; ++ U32 *matchLengthFreq; ++ U32 *litLengthFreq; ++ U32 *litFreq; ++ U32 *offCodeFreq; ++ U32 matchLengthSum; ++ U32 matchSum; ++ U32 litLengthSum; ++ U32 litSum; ++ U32 offCodeSum; ++ U32 log2matchLengthSum; ++ U32 log2matchSum; ++ U32 log2litLengthSum; ++ U32 log2litSum; ++ U32 log2offCodeSum; ++ U32 factor; ++ U32 staticPrices; ++ U32 cachedPrice; ++ U32 cachedLitLength; ++ const BYTE *cachedLiterals; ++} seqStore_t; ++ ++const seqStore_t *ZSTD_getSeqStore(const ZSTD_CCtx *ctx); ++void ZSTD_seqToCodes(const seqStore_t *seqStorePtr); ++int ZSTD_isSkipFrame(ZSTD_DCtx *dctx); ++ ++/*= Custom memory allocation functions */ ++typedef void *(*ZSTD_allocFunction)(void *opaque, size_t size); ++typedef void (*ZSTD_freeFunction)(void *opaque, void *address); ++typedef struct { ++ ZSTD_allocFunction customAlloc; ++ ZSTD_freeFunction customFree; ++ void *opaque; ++} ZSTD_customMem; ++ ++void *ZSTD_malloc(size_t size, ZSTD_customMem customMem); ++void ZSTD_free(void *ptr, ZSTD_customMem customMem); ++ ++/*====== stack allocation ======*/ ++ ++typedef struct { ++ void *ptr; ++ const void *end; ++} ZSTD_stack; ++ ++#define ZSTD_ALIGN(x) ALIGN(x, sizeof(size_t)) ++#define ZSTD_PTR_ALIGN(p) PTR_ALIGN(p, sizeof(size_t)) ++ ++ZSTD_customMem ZSTD_initStack(void *workspace, size_t workspaceSize); ++ ++void *ZSTD_stackAllocAll(void *opaque, size_t *size); ++void *ZSTD_stackAlloc(void *opaque, size_t size); ++void ZSTD_stackFree(void *opaque, void *address); ++ ++/*====== common function ======*/ ++ ++ZSTD_STATIC U32 ZSTD_highbit32(U32 val) { return 31 - __builtin_clz(val); } ++ ++/* hidden functions */ ++ ++/* ZSTD_invalidateRepCodes() : ++ * ensures next compression will not use repcodes from previous block. ++ * Note : only works with regular variant; ++ * do not use with extDict variant ! */ ++void ZSTD_invalidateRepCodes(ZSTD_CCtx *cctx); ++ ++size_t ZSTD_freeCCtx(ZSTD_CCtx *cctx); ++size_t ZSTD_freeDCtx(ZSTD_DCtx *dctx); ++size_t ZSTD_freeCDict(ZSTD_CDict *cdict); ++size_t ZSTD_freeDDict(ZSTD_DDict *cdict); ++size_t ZSTD_freeCStream(ZSTD_CStream *zcs); ++size_t ZSTD_freeDStream(ZSTD_DStream *zds); ++ ++#endif /* ZSTD_CCOMMON_H_MODULE */ +diff --git a/xen/include/asm-arm/types.h b/xen/include/asm-arm/types.h +index 89aae25ffe..083acbd151 100644 +--- a/xen/include/asm-arm/types.h ++++ b/xen/include/asm-arm/types.h +@@ -61,6 +61,12 @@ typedef unsigned long size_t; + #endif + typedef signed long ssize_t; + ++#if defined(__PTRDIFF_TYPE__) ++typedef __PTRDIFF_TYPE__ ptrdiff_t; ++#else ++typedef signed long ptrdiff_t; ++#endif ++ + #endif /* __ASSEMBLY__ */ + + #endif /* __ARM_TYPES_H__ */ +diff --git a/xen/include/asm-x86/types.h b/xen/include/asm-x86/types.h +index fdf4f7dcc0..7817132048 100644 +--- a/xen/include/asm-x86/types.h ++++ b/xen/include/asm-x86/types.h +@@ -39,6 +39,12 @@ typedef unsigned long size_t; + #endif + typedef signed long ssize_t; + ++#if defined(__PTRDIFF_TYPE__) ++typedef __PTRDIFF_TYPE__ ptrdiff_t; ++#else ++typedef signed long ptrdiff_t; ++#endif ++ + #endif /* __ASSEMBLY__ */ + + #endif /* __X86_TYPES_H__ */ +diff --git a/xen/include/xen/decompress.h b/xen/include/xen/decompress.h +index b2955faa4b..f5bc17f2b6 100644 +--- a/xen/include/xen/decompress.h ++++ b/xen/include/xen/decompress.h +@@ -31,7 +31,7 @@ typedef int decompress_fn(unsigned char *inbuf, unsigned int len, + * dependent). + */ + +-decompress_fn bunzip2, unxz, unlzma, unlzo, unlz4; ++decompress_fn bunzip2, unxz, unlzma, unlzo, unlz4, unzstd; + + int decompress(void *inbuf, unsigned int len, void *outbuf); + +-- +2.20.1 + diff --git a/xen.git-d8099d94dfaa3573bd86ebfc457cbc8f70a3ecda.patch b/xen.git-d8099d94dfaa3573bd86ebfc457cbc8f70a3ecda.patch new file mode 100644 index 0000000..8437382 --- /dev/null +++ b/xen.git-d8099d94dfaa3573bd86ebfc457cbc8f70a3ecda.patch @@ -0,0 +1,105 @@ +From d8099d94dfaa3573bd86ebfc457cbc8f70a3ecda Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Tue, 26 Jan 2021 14:14:39 +0100 +Subject: [PATCH] libxenguest: add get_unaligned_le32() + +Abstract xc_dom_check_gzip()'s reading of the uncompressed size into a +helper re-usable, in particular, by other decompressor code. + +Sadly in the mini-os case this conflicts with other functions of the +same name (and purpose), which can't be easily replaced individually. +Yet it was requested that no full set of helpers be introduced at this +point in the release cycle. Hence the awkward XG_NEED_UNALIGNED. + +Requested-by: Ian Jackson +Signed-off-by: Jan Beulich +Reviewed-by: Ian Jackson +Release-Acked-by: Ian Jackson +--- + tools/libs/guest/xg_dom_core.c | 5 ++--- + tools/libs/guest/xg_dom_decompress_lz4.c | 1 + + tools/libs/guest/xg_private.h | 9 +++++++++ + xen/common/lz4/defs.h | 5 ----- + 4 files changed, 12 insertions(+), 8 deletions(-) + +diff --git a/tools/libs/guest/xg_dom_core.c b/tools/libs/guest/xg_dom_core.c +index f846d8e1ed..98ef8e8fc9 100644 +--- a/tools/libxc/xc_dom_core.c ++++ b/tools/libxc/xc_dom_core.c +@@ -31,6 +31,7 @@ + #include + #include + ++#define XG_NEED_UNALIGNED + #include "xg_private.h" + #include "xc_dom.h" + #include "_paths.h" +@@ -325,7 +326,6 @@ int xc_dom_kernel_check_size(struct xc_dom_image *dom, size_t sz) + + size_t xc_dom_check_gzip(xc_interface *xch, void *blob, size_t ziplen) + { +- unsigned char *gzlen; + size_t unziplen; + + if ( ziplen < 6 ) +@@ -337,8 +337,7 @@ size_t xc_dom_check_gzip(xc_interface *xch, void *blob, size_t ziplen) + /* not gzipped */ + return 0; + +- gzlen = blob + ziplen - 4; +- unziplen = (size_t)gzlen[3] << 24 | gzlen[2] << 16 | gzlen[1] << 8 | gzlen[0]; ++ unziplen = get_unaligned_le32(blob + ziplen - 4); + if ( unziplen > XC_DOM_DECOMPRESS_MAX ) + { + xc_dom_printf +diff --git a/tools/libs/guest/xg_dom_decompress_lz4.c b/tools/libs/guest/xg_dom_decompress_lz4.c +index 97ba620d86..34a1a13d84 100644 +--- a/tools/libxc/xc_dom_decompress_lz4.c ++++ b/tools/libxc/xc_dom_decompress_lz4.c +@@ -3,6 +3,7 @@ + #include + #include + ++#define XG_NEED_UNALIGNED + #include "xg_private.h" + #include "xc_dom_decompress.h" + +diff --git a/tools/libs/guest/xg_private.h b/tools/libs/guest/xg_private.h +index c3ed8c1257..8f9b257a2f 100644 +--- a/tools/libxc/xg_private.h ++++ b/tools/libxc/xg_private.h +@@ -62,6 +62,15 @@ char *xc_inflate_buffer(xc_interface *xch, + unsigned long in_size, + unsigned long *out_size); + ++#if !defined(__MINIOS__) || defined(XG_NEED_UNALIGNED) ++ ++static inline unsigned int get_unaligned_le32(const uint8_t *buf) ++{ ++ return ((unsigned int)buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; ++} ++ ++#endif /* !__MINIOS__ || XG_NEED_UNALIGNED */ ++ + unsigned long csum_page (void * page); + + #define _PAGE_PRESENT 0x001 +diff --git a/xen/common/lz4/defs.h b/xen/common/lz4/defs.h +index 4fbea2ac3d..10609f5a53 100644 +--- a/xen/common/lz4/defs.h ++++ b/xen/common/lz4/defs.h +@@ -18,11 +18,6 @@ static inline u16 get_unaligned_le16(const void *p) + return le16_to_cpup(p); + } + +-static inline u32 get_unaligned_le32(const void *p) +-{ +- return le32_to_cpup(p); +-} +- + #endif + + /* +-- +2.20.1 + diff --git a/xen.spec b/xen.spec index dc23fb6..b4907ec 100644 --- a/xen.spec +++ b/xen.spec @@ -58,7 +58,7 @@ Summary: Xen is a virtual machine monitor Name: xen Version: 4.14.1 -Release: 3%{?dist} +Release: 4%{?dist} License: GPLv2+ and LGPLv2+ and BSD URL: http://xen.org/ Source0: https://downloads.xenproject.org/release/xen/%{version}/xen-%{version}.tar.gz @@ -112,8 +112,12 @@ Patch40: xen.drop.brctl.patch Patch41: xen.gcc9.fixes.patch Patch42: xen.gcc10.fixes.patch Patch43: xen.gcc11.fixes.patch -Patch44: zstd-dom0.patch -Patch45: xsa360-4.14.patch +Patch44: xsa360-4.14.patch +Patch45: xen.git-7c9f81687ad611515474b1c17afc2f79f19faef5.patch +Patch46: xen.git-35d2960ae65f28106fdc5c2130f5f08fadca0e4c.patch +Patch47: xen.git-d6627cf1b63ce57a6a7e2c1800dbc50eed742c32.patch +Patch48: xen.git-d8099d94dfaa3573bd86ebfc457cbc8f70a3ecda.patch +Patch49: xen.git-8169f82049efb5b2044b33aa482ba3a136b7804d.patch %if %build_qemutrad @@ -142,7 +146,7 @@ BuildRequires: libuuid-devel # iasl needed to build hvmloader BuildRequires: acpica-tools # modern compressed kernels -BuildRequires: bzip2-devel xz-devel +BuildRequires: bzip2-devel xz-devel libzstd-devel # libfsimage BuildRequires: e2fsprogs-devel # tools now require yajl and wget @@ -185,6 +189,12 @@ BuildRequires: edk2-ovmf %endif %if %build_hyp BuildRequires: bison flex +%ifarch %{ix86} +Suggests: grub2-pc-modules +%endif +%ifarch x86_64 +Suggests: grub2-pc-modules grub2-efi-x64-modules +%endif %endif %description @@ -318,6 +328,10 @@ manage Xen virtual machines. %patch43 -p1 %patch44 -p1 %patch45 -p1 +%patch46 -p1 +%patch47 -p1 +%patch48 -p1 +%patch49 -p1 # qemu-xen-traditional patches pushd tools/qemu-xen-traditional @@ -910,6 +924,11 @@ fi %endif %changelog +* Mon Feb 01 2021 Michael Young - 4.14.1-4 +- backport upstream zstd dom0 and guest patches +- add libzstd-devel BuildRequires +- add weak dependency on grub modules to improve initial boot setup + * Wed Jan 27 2021 Fedora Release Engineering - 4.14.1-3 - Rebuilt for https://fedoraproject.org/wiki/Fedora_34_Mass_Rebuild diff --git a/zstd-dom0.patch b/zstd-dom0.patch deleted file mode 100644 index 57b7f76..0000000 --- a/zstd-dom0.patch +++ /dev/null @@ -1,9214 +0,0 @@ -diff --git a/xen/common/Makefile b/xen/common/Makefile -index d109f279a4..5ba09f04ac 100644 ---- a/xen/common/Makefile -+++ b/xen/common/Makefile -@@ -59,7 +59,7 @@ obj-bin-y += warning.init.o - obj-$(CONFIG_XENOPROF) += xenoprof.o - obj-y += xmalloc_tlsf.o - --obj-bin-$(CONFIG_X86) += $(foreach n,decompress bunzip2 unxz unlzma lzo unlzo unlz4 earlycpio,$(n).init.o) -+obj-bin-$(CONFIG_X86) += $(foreach n,decompress bunzip2 unxz unlzma lzo unlzo unlz4 unzstd earlycpio,$(n).init.o) - - obj-$(CONFIG_COMPAT) += $(addprefix compat/,domain.o kernel.o memory.o multicall.o xlat.o) - -diff --git a/xen/common/decompress.c b/xen/common/decompress.c -index 9d6e0c4ab0..0da27b0ab6 100644 ---- a/xen/common/decompress.c -+++ b/xen/common/decompress.c -@@ -31,5 +31,8 @@ int __init decompress(void *inbuf, unsigned int len, void *outbuf) - if ( len >= 2 && !memcmp(inbuf, "\x02\x21", 2) ) - return unlz4(inbuf, len, NULL, NULL, outbuf, NULL, error); - -+ if ( len >= 4 && !memcmp(inbuf, "\050\265\057\375", 4) ) -+ return unzstd(inbuf, len, NULL, NULL, outbuf, NULL, error); -+ - return 1; - } -diff --git a/xen/common/unzstd.c b/xen/common/unzstd.c -new file mode 100644 -index 0000000000..a2c382fddc ---- /dev/null -+++ b/xen/common/unzstd.c -@@ -0,0 +1,332 @@ -+/* -+ * Important notes about in-place decompression -+ * -+ * At least on x86, the kernel is decompressed in place: the compressed data -+ * is placed to the end of the output buffer, and the decompressor overwrites -+ * most of the compressed data. There must be enough safety margin to -+ * guarantee that the write position is always behind the read position. -+ * -+ * The safety margin for ZSTD with a 128 KB block size is calculated below. -+ * Note that the margin with ZSTD is bigger than with GZIP or XZ! -+ * -+ * The worst case for in-place decompression is that the beginning of -+ * the file is compressed extremely well, and the rest of the file is -+ * uncompressible. Thus, we must look for worst-case expansion when the -+ * compressor is encoding uncompressible data. -+ * -+ * The structure of the .zst file in case of a compresed kernel is as follows. -+ * Maximum sizes (as bytes) of the fields are in parenthesis. -+ * -+ * Frame Header: (18) -+ * Blocks: (N) -+ * Checksum: (4) -+ * -+ * The frame header and checksum overhead is at most 22 bytes. -+ * -+ * ZSTD stores the data in blocks. Each block has a header whose size is -+ * a 3 bytes. After the block header, there is up to 128 KB of payload. -+ * The maximum uncompressed size of the payload is 128 KB. The minimum -+ * uncompressed size of the payload is never less than the payload size -+ * (excluding the block header). -+ * -+ * The assumption, that the uncompressed size of the payload is never -+ * smaller than the payload itself, is valid only when talking about -+ * the payload as a whole. It is possible that the payload has parts where -+ * the decompressor consumes more input than it produces output. Calculating -+ * the worst case for this would be tricky. Instead of trying to do that, -+ * let's simply make sure that the decompressor never overwrites any bytes -+ * of the payload which it is currently reading. -+ * -+ * Now we have enough information to calculate the safety margin. We need -+ * - 22 bytes for the .zst file format headers; -+ * - 3 bytes per every 128 KiB of uncompressed size (one block header per -+ * block); and -+ * - 128 KiB (biggest possible zstd block size) to make sure that the -+ * decompressor never overwrites anything from the block it is currently -+ * reading. -+ * -+ * We get the following formula: -+ * -+ * safety_margin = 22 + uncompressed_size * 3 / 131072 + 131072 -+ * <= 22 + (uncompressed_size >> 15) + 131072 -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ */ -+ -+/* -+ * Preboot environments #include "path/to/decompress_unzstd.c". -+ * All of the source files we depend on must be #included. -+ * zstd's only source dependeny is xxhash, which has no source -+ * dependencies. -+ * -+ * When UNZSTD_PREBOOT is defined we declare __decompress(), which is -+ * used for kernel decompression, instead of unzstd(). -+ * -+ * Define __DISABLE_EXPORTS in preboot environments to prevent symbols -+ * from xxhash and zstd from being exported by the EXPORT_SYMBOL macro. -+ */ -+ -+#include "decompress.h" -+#include "xxhash.c" -+#include "zstd/entropy_common.c" -+#include "zstd/fse_decompress.c" -+#include "zstd/huf_decompress.c" -+#include "zstd/zstd_common.c" -+#include "zstd/decompress.c" -+ -+#include -+ -+/* 128MB is the maximum window size supported by zstd. */ -+#define ZSTD_WINDOWSIZE_MAX (1 << ZSTD_WINDOWLOG_MAX) -+/* -+ * Size of the input and output buffers in multi-call mode. -+ * Pick a larger size because it isn't used during kernel decompression, -+ * since that is single pass, and we have to allocate a large buffer for -+ * zstd's window anyway. The larger size speeds up initramfs decompression. -+ */ -+#define ZSTD_IOBUF_SIZE (1 << 17) -+ -+static int INIT handle_zstd_error(size_t ret, void (*error)(const char *x)) -+{ -+ const int err = ZSTD_getErrorCode(ret); -+ -+ if (!ZSTD_isError(ret)) -+ return 0; -+ -+ switch (err) { -+ case ZSTD_error_memory_allocation: -+ error("ZSTD decompressor ran out of memory"); -+ break; -+ case ZSTD_error_prefix_unknown: -+ error("Input is not in the ZSTD format (wrong magic bytes)"); -+ break; -+ case ZSTD_error_dstSize_tooSmall: -+ case ZSTD_error_corruption_detected: -+ case ZSTD_error_checksum_wrong: -+ error("ZSTD-compressed data is corrupt"); -+ break; -+ default: -+ error("ZSTD-compressed data is probably corrupt"); -+ break; -+ } -+ return -1; -+} -+ -+/* -+ * Handle the case where we have the entire input and output in one segment. -+ * We can allocate less memory (no circular buffer for the sliding window), -+ * and avoid some memcpy() calls. -+ */ -+static int INIT decompress_single(const u8 *in_buf, unsigned int in_len, u8 *out_buf, -+ long out_len, unsigned int *in_pos, -+ void (*error)(const char *x)) -+{ -+ const size_t wksp_size = ZSTD_DCtxWorkspaceBound(); -+ void *wksp = large_malloc(wksp_size); -+ ZSTD_DCtx *dctx = ZSTD_initDCtx(wksp, wksp_size); -+ int err; -+ size_t ret; -+ -+ if (dctx == NULL) { -+ error("Out of memory while allocating ZSTD_DCtx"); -+ err = -1; -+ goto out; -+ } -+ /* -+ * Find out how large the frame actually is, there may be junk at -+ * the end of the frame that ZSTD_decompressDCtx() can't handle. -+ */ -+ ret = ZSTD_findFrameCompressedSize(in_buf, in_len); -+ err = handle_zstd_error(ret, error); -+ if (err) -+ goto out; -+ in_len = (long)ret; -+ -+ ret = ZSTD_decompressDCtx(dctx, out_buf, out_len, in_buf, in_len); -+ err = handle_zstd_error(ret, error); -+ if (err) -+ goto out; -+ -+ if (in_pos != NULL) -+ *in_pos = in_len; -+ -+ err = 0; -+out: -+ if (wksp != NULL) -+ large_free(wksp); -+ return err; -+} -+ -+static int INIT __unzstd(unsigned char *in_buf, unsigned int in_len, -+ int (*fill)(void*, unsigned int), -+ int (*flush)(void*, unsigned int), -+ unsigned char *out_buf, long out_len, -+ unsigned int *in_pos, -+ void (*error)(const char *x)) -+{ -+ ZSTD_inBuffer in; -+ ZSTD_outBuffer out; -+ ZSTD_frameParams params; -+ void *in_allocated = NULL; -+ void *out_allocated = NULL; -+ void *wksp = NULL; -+ size_t wksp_size; -+ ZSTD_DStream *dstream; -+ int err; -+ size_t ret; -+ -+ if (out_len == 0) -+ out_len = INT_MAX; /* no limit */ -+ -+ if (fill == NULL && flush == NULL) -+ /* -+ * We can decompress faster and with less memory when we have a -+ * single chunk. -+ */ -+ return decompress_single(in_buf, in_len, out_buf, out_len, -+ in_pos, error); -+ -+ /* -+ * If in_buf is not provided, we must be using fill(), so allocate -+ * a large enough buffer. If it is provided, it must be at least -+ * ZSTD_IOBUF_SIZE large. -+ */ -+ if (in_buf == NULL) { -+ in_allocated = large_malloc(ZSTD_IOBUF_SIZE); -+ if (in_allocated == NULL) { -+ error("Out of memory while allocating input buffer"); -+ err = -1; -+ goto out; -+ } -+ in_buf = in_allocated; -+ in_len = 0; -+ } -+ /* Read the first chunk, since we need to decode the frame header. */ -+ if (fill != NULL) -+ in_len = fill(in_buf, ZSTD_IOBUF_SIZE); -+ if (in_len < 0) { -+ error("ZSTD-compressed data is truncated"); -+ err = -1; -+ goto out; -+ } -+ /* Set the first non-empty input buffer. */ -+ in.src = in_buf; -+ in.pos = 0; -+ in.size = in_len; -+ /* Allocate the output buffer if we are using flush(). */ -+ if (flush != NULL) { -+ out_allocated = large_malloc(ZSTD_IOBUF_SIZE); -+ if (out_allocated == NULL) { -+ error("Out of memory while allocating output buffer"); -+ err = -1; -+ goto out; -+ } -+ out_buf = out_allocated; -+ out_len = ZSTD_IOBUF_SIZE; -+ } -+ /* Set the output buffer. */ -+ out.dst = out_buf; -+ out.pos = 0; -+ out.size = out_len; -+ -+ /* -+ * We need to know the window size to allocate the ZSTD_DStream. -+ * Since we are streaming, we need to allocate a buffer for the sliding -+ * window. The window size varies from 1 KB to ZSTD_WINDOWSIZE_MAX -+ * (8 MB), so it is important to use the actual value so as not to -+ * waste memory when it is smaller. -+ */ -+ ret = ZSTD_getFrameParams(¶ms, in.src, in.size); -+ err = handle_zstd_error(ret, error); -+ if (err) -+ goto out; -+ if (ret != 0) { -+ error("ZSTD-compressed data has an incomplete frame header"); -+ err = -1; -+ goto out; -+ } -+ if (params.windowSize > ZSTD_WINDOWSIZE_MAX) { -+ error("ZSTD-compressed data has too large a window size"); -+ err = -1; -+ goto out; -+ } -+ -+ /* -+ * Allocate the ZSTD_DStream now that we know how much memory is -+ * required. -+ */ -+ wksp_size = ZSTD_DStreamWorkspaceBound(params.windowSize); -+ wksp = large_malloc(wksp_size); -+ dstream = ZSTD_initDStream(params.windowSize, wksp, wksp_size); -+ if (dstream == NULL) { -+ error("Out of memory while allocating ZSTD_DStream"); -+ err = -1; -+ goto out; -+ } -+ -+ /* -+ * Decompression loop: -+ * Read more data if necessary (error if no more data can be read). -+ * Call the decompression function, which returns 0 when finished. -+ * Flush any data produced if using flush(). -+ */ -+ if (in_pos != NULL) -+ *in_pos = 0; -+ do { -+ /* -+ * If we need to reload data, either we have fill() and can -+ * try to get more data, or we don't and the input is truncated. -+ */ -+ if (in.pos == in.size) { -+ if (in_pos != NULL) -+ *in_pos += in.pos; -+ in_len = fill ? fill(in_buf, ZSTD_IOBUF_SIZE) : -1; -+ if (in_len < 0) { -+ error("ZSTD-compressed data is truncated"); -+ err = -1; -+ goto out; -+ } -+ in.pos = 0; -+ in.size = in_len; -+ } -+ /* Returns zero when the frame is complete. */ -+ ret = ZSTD_decompressStream(dstream, &out, &in); -+ err = handle_zstd_error(ret, error); -+ if (err) -+ goto out; -+ /* Flush all of the data produced if using flush(). */ -+ if (flush != NULL && out.pos > 0) { -+ if (out.pos != flush(out.dst, out.pos)) { -+ error("Failed to flush()"); -+ err = -1; -+ goto out; -+ } -+ out.pos = 0; -+ } -+ } while (ret != 0); -+ -+ if (in_pos != NULL) -+ *in_pos += in.pos; -+ -+ err = 0; -+out: -+ if (in_allocated != NULL) -+ large_free(in_allocated); -+ if (out_allocated != NULL) -+ large_free(out_allocated); -+ if (wksp != NULL) -+ large_free(wksp); -+ return err; -+} -+ -+STATIC int INIT unzstd(unsigned char *buf, unsigned int len, -+ int (*fill)(void*, unsigned int), -+ int (*flush)(void*, unsigned int), -+ unsigned char *out_buf, -+ unsigned int *pos, -+ void (*error)(const char *x)) -+{ -+ return __unzstd(buf, len, fill, flush, out_buf, 0, pos, error); -+} -diff --git a/xen/common/xxhash.c b/xen/common/xxhash.c -new file mode 100644 -index 0000000000..3ab3e01859 ---- /dev/null -+++ b/xen/common/xxhash.c -@@ -0,0 +1,484 @@ -+/* -+ * xxHash - Extremely Fast Hash algorithm -+ * Copyright (C) 2012-2016, Yann Collet. -+ * -+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are -+ * met: -+ * -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following disclaimer -+ * in the documentation and/or other materials provided with the -+ * distribution. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ * -+ * This program is free software; you can redistribute it and/or modify it under -+ * the terms of the GNU General Public License version 2 as published by the -+ * Free Software Foundation. This program is dual-licensed; you may select -+ * either version 2 of the GNU General Public License ("GPL") or BSD license -+ * ("BSD"). -+ * -+ * You can contact the author at: -+ * - xxHash homepage: https://cyan4973.github.io/xxHash/ -+ * - xxHash source repository: https://github.com/Cyan4973/xxHash -+ */ -+ -+#include -+#include -+#include -+#include "zstd/private.h" -+ -+/*-************************************* -+ * Macros -+ **************************************/ -+#define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r))) -+#define xxh_rotl64(x, r) ((x << r) | (x >> (64 - r))) -+ -+#ifdef __LITTLE_ENDIAN -+# define XXH_CPU_LITTLE_ENDIAN 1 -+#else -+# define XXH_CPU_LITTLE_ENDIAN 0 -+#endif -+ -+/*-************************************* -+ * Constants -+ **************************************/ -+static const uint32_t PRIME32_1 = 2654435761U; -+static const uint32_t PRIME32_2 = 2246822519U; -+static const uint32_t PRIME32_3 = 3266489917U; -+static const uint32_t PRIME32_4 = 668265263U; -+static const uint32_t PRIME32_5 = 374761393U; -+ -+static const uint64_t PRIME64_1 = 11400714785074694791ULL; -+static const uint64_t PRIME64_2 = 14029467366897019727ULL; -+static const uint64_t PRIME64_3 = 1609587929392839161ULL; -+static const uint64_t PRIME64_4 = 9650029242287828579ULL; -+static const uint64_t PRIME64_5 = 2870177450012600261ULL; -+ -+/*-************************** -+ * Utils -+ ***************************/ -+void INIT xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src) -+{ -+ memcpy(dst, src, sizeof(*dst)); -+} -+ -+void INIT xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src) -+{ -+ memcpy(dst, src, sizeof(*dst)); -+} -+ -+/*-*************************** -+ * Simple Hash Functions -+ ****************************/ -+static uint32_t INIT xxh32_round(uint32_t seed, const uint32_t input) -+{ -+ seed += input * PRIME32_2; -+ seed = xxh_rotl32(seed, 13); -+ seed *= PRIME32_1; -+ return seed; -+} -+ -+uint32_t INIT xxh32(const void *input, const size_t len, const uint32_t seed) -+{ -+ const uint8_t *p = (const uint8_t *)input; -+ const uint8_t *b_end = p + len; -+ uint32_t h32; -+ -+ if (len >= 16) { -+ const uint8_t *const limit = b_end - 16; -+ uint32_t v1 = seed + PRIME32_1 + PRIME32_2; -+ uint32_t v2 = seed + PRIME32_2; -+ uint32_t v3 = seed + 0; -+ uint32_t v4 = seed - PRIME32_1; -+ -+ do { -+ v1 = xxh32_round(v1, get_unaligned_le32(p)); -+ p += 4; -+ v2 = xxh32_round(v2, get_unaligned_le32(p)); -+ p += 4; -+ v3 = xxh32_round(v3, get_unaligned_le32(p)); -+ p += 4; -+ v4 = xxh32_round(v4, get_unaligned_le32(p)); -+ p += 4; -+ } while (p <= limit); -+ -+ h32 = xxh_rotl32(v1, 1) + xxh_rotl32(v2, 7) + -+ xxh_rotl32(v3, 12) + xxh_rotl32(v4, 18); -+ } else { -+ h32 = seed + PRIME32_5; -+ } -+ -+ h32 += (uint32_t)len; -+ -+ while (p + 4 <= b_end) { -+ h32 += get_unaligned_le32(p) * PRIME32_3; -+ h32 = xxh_rotl32(h32, 17) * PRIME32_4; -+ p += 4; -+ } -+ -+ while (p < b_end) { -+ h32 += (*p) * PRIME32_5; -+ h32 = xxh_rotl32(h32, 11) * PRIME32_1; -+ p++; -+ } -+ -+ h32 ^= h32 >> 15; -+ h32 *= PRIME32_2; -+ h32 ^= h32 >> 13; -+ h32 *= PRIME32_3; -+ h32 ^= h32 >> 16; -+ -+ return h32; -+} -+ -+static uint64_t INIT xxh64_round(uint64_t acc, const uint64_t input) -+{ -+ acc += input * PRIME64_2; -+ acc = xxh_rotl64(acc, 31); -+ acc *= PRIME64_1; -+ return acc; -+} -+ -+static uint64_t INIT xxh64_merge_round(uint64_t acc, uint64_t val) -+{ -+ val = xxh64_round(0, val); -+ acc ^= val; -+ acc = acc * PRIME64_1 + PRIME64_4; -+ return acc; -+} -+ -+uint64_t INIT xxh64(const void *input, const size_t len, const uint64_t seed) -+{ -+ const uint8_t *p = (const uint8_t *)input; -+ const uint8_t *const b_end = p + len; -+ uint64_t h64; -+ -+ if (len >= 32) { -+ const uint8_t *const limit = b_end - 32; -+ uint64_t v1 = seed + PRIME64_1 + PRIME64_2; -+ uint64_t v2 = seed + PRIME64_2; -+ uint64_t v3 = seed + 0; -+ uint64_t v4 = seed - PRIME64_1; -+ -+ do { -+ v1 = xxh64_round(v1, get_unaligned_le64(p)); -+ p += 8; -+ v2 = xxh64_round(v2, get_unaligned_le64(p)); -+ p += 8; -+ v3 = xxh64_round(v3, get_unaligned_le64(p)); -+ p += 8; -+ v4 = xxh64_round(v4, get_unaligned_le64(p)); -+ p += 8; -+ } while (p <= limit); -+ -+ h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) + -+ xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18); -+ h64 = xxh64_merge_round(h64, v1); -+ h64 = xxh64_merge_round(h64, v2); -+ h64 = xxh64_merge_round(h64, v3); -+ h64 = xxh64_merge_round(h64, v4); -+ -+ } else { -+ h64 = seed + PRIME64_5; -+ } -+ -+ h64 += (uint64_t)len; -+ -+ while (p + 8 <= b_end) { -+ const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p)); -+ -+ h64 ^= k1; -+ h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4; -+ p += 8; -+ } -+ -+ if (p + 4 <= b_end) { -+ h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1; -+ h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; -+ p += 4; -+ } -+ -+ while (p < b_end) { -+ h64 ^= (*p) * PRIME64_5; -+ h64 = xxh_rotl64(h64, 11) * PRIME64_1; -+ p++; -+ } -+ -+ h64 ^= h64 >> 33; -+ h64 *= PRIME64_2; -+ h64 ^= h64 >> 29; -+ h64 *= PRIME64_3; -+ h64 ^= h64 >> 32; -+ -+ return h64; -+} -+ -+/*-************************************************** -+ * Advanced Hash Functions -+ ***************************************************/ -+void INIT xxh32_reset(struct xxh32_state *statePtr, const uint32_t seed) -+{ -+ /* use a local state for memcpy() to avoid strict-aliasing warnings */ -+ struct xxh32_state state; -+ -+ memset(&state, 0, sizeof(state)); -+ state.v1 = seed + PRIME32_1 + PRIME32_2; -+ state.v2 = seed + PRIME32_2; -+ state.v3 = seed + 0; -+ state.v4 = seed - PRIME32_1; -+ memcpy(statePtr, &state, sizeof(state)); -+} -+ -+void INIT xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed) -+{ -+ /* use a local state for memcpy() to avoid strict-aliasing warnings */ -+ struct xxh64_state state; -+ -+ memset(&state, 0, sizeof(state)); -+ state.v1 = seed + PRIME64_1 + PRIME64_2; -+ state.v2 = seed + PRIME64_2; -+ state.v3 = seed + 0; -+ state.v4 = seed - PRIME64_1; -+ memcpy(statePtr, &state, sizeof(state)); -+} -+ -+int INIT xxh32_update(struct xxh32_state *state, const void *input, const size_t len) -+{ -+ const uint8_t *p = (const uint8_t *)input; -+ const uint8_t *const b_end = p + len; -+ -+ if (input == NULL) -+ return -EINVAL; -+ -+ state->total_len_32 += (uint32_t)len; -+ state->large_len |= (len >= 16) | (state->total_len_32 >= 16); -+ -+ if (state->memsize + len < 16) { /* fill in tmp buffer */ -+ memcpy((uint8_t *)(state->mem32) + state->memsize, input, len); -+ state->memsize += (uint32_t)len; -+ return 0; -+ } -+ -+ if (state->memsize) { /* some data left from previous update */ -+ const uint32_t *p32 = state->mem32; -+ -+ memcpy((uint8_t *)(state->mem32) + state->memsize, input, -+ 16 - state->memsize); -+ -+ state->v1 = xxh32_round(state->v1, get_unaligned_le32(p32)); -+ p32++; -+ state->v2 = xxh32_round(state->v2, get_unaligned_le32(p32)); -+ p32++; -+ state->v3 = xxh32_round(state->v3, get_unaligned_le32(p32)); -+ p32++; -+ state->v4 = xxh32_round(state->v4, get_unaligned_le32(p32)); -+ p32++; -+ -+ p += 16-state->memsize; -+ state->memsize = 0; -+ } -+ -+ if (p <= b_end - 16) { -+ const uint8_t *const limit = b_end - 16; -+ uint32_t v1 = state->v1; -+ uint32_t v2 = state->v2; -+ uint32_t v3 = state->v3; -+ uint32_t v4 = state->v4; -+ -+ do { -+ v1 = xxh32_round(v1, get_unaligned_le32(p)); -+ p += 4; -+ v2 = xxh32_round(v2, get_unaligned_le32(p)); -+ p += 4; -+ v3 = xxh32_round(v3, get_unaligned_le32(p)); -+ p += 4; -+ v4 = xxh32_round(v4, get_unaligned_le32(p)); -+ p += 4; -+ } while (p <= limit); -+ -+ state->v1 = v1; -+ state->v2 = v2; -+ state->v3 = v3; -+ state->v4 = v4; -+ } -+ -+ if (p < b_end) { -+ memcpy(state->mem32, p, (size_t)(b_end-p)); -+ state->memsize = (uint32_t)(b_end-p); -+ } -+ -+ return 0; -+} -+ -+uint32_t INIT xxh32_digest(const struct xxh32_state *state) -+{ -+ const uint8_t *p = (const uint8_t *)state->mem32; -+ const uint8_t *const b_end = (const uint8_t *)(state->mem32) + -+ state->memsize; -+ uint32_t h32; -+ -+ if (state->large_len) { -+ h32 = xxh_rotl32(state->v1, 1) + xxh_rotl32(state->v2, 7) + -+ xxh_rotl32(state->v3, 12) + xxh_rotl32(state->v4, 18); -+ } else { -+ h32 = state->v3 /* == seed */ + PRIME32_5; -+ } -+ -+ h32 += state->total_len_32; -+ -+ while (p + 4 <= b_end) { -+ h32 += get_unaligned_le32(p) * PRIME32_3; -+ h32 = xxh_rotl32(h32, 17) * PRIME32_4; -+ p += 4; -+ } -+ -+ while (p < b_end) { -+ h32 += (*p) * PRIME32_5; -+ h32 = xxh_rotl32(h32, 11) * PRIME32_1; -+ p++; -+ } -+ -+ h32 ^= h32 >> 15; -+ h32 *= PRIME32_2; -+ h32 ^= h32 >> 13; -+ h32 *= PRIME32_3; -+ h32 ^= h32 >> 16; -+ -+ return h32; -+} -+ -+int INIT xxh64_update(struct xxh64_state *state, const void *input, const size_t len) -+{ -+ const uint8_t *p = (const uint8_t *)input; -+ const uint8_t *const b_end = p + len; -+ -+ if (input == NULL) -+ return -EINVAL; -+ -+ state->total_len += len; -+ -+ if (state->memsize + len < 32) { /* fill in tmp buffer */ -+ memcpy(((uint8_t *)state->mem64) + state->memsize, input, len); -+ state->memsize += (uint32_t)len; -+ return 0; -+ } -+ -+ if (state->memsize) { /* tmp buffer is full */ -+ uint64_t *p64 = state->mem64; -+ -+ memcpy(((uint8_t *)p64) + state->memsize, input, -+ 32 - state->memsize); -+ -+ state->v1 = xxh64_round(state->v1, get_unaligned_le64(p64)); -+ p64++; -+ state->v2 = xxh64_round(state->v2, get_unaligned_le64(p64)); -+ p64++; -+ state->v3 = xxh64_round(state->v3, get_unaligned_le64(p64)); -+ p64++; -+ state->v4 = xxh64_round(state->v4, get_unaligned_le64(p64)); -+ -+ p += 32 - state->memsize; -+ state->memsize = 0; -+ } -+ -+ if (p + 32 <= b_end) { -+ const uint8_t *const limit = b_end - 32; -+ uint64_t v1 = state->v1; -+ uint64_t v2 = state->v2; -+ uint64_t v3 = state->v3; -+ uint64_t v4 = state->v4; -+ -+ do { -+ v1 = xxh64_round(v1, get_unaligned_le64(p)); -+ p += 8; -+ v2 = xxh64_round(v2, get_unaligned_le64(p)); -+ p += 8; -+ v3 = xxh64_round(v3, get_unaligned_le64(p)); -+ p += 8; -+ v4 = xxh64_round(v4, get_unaligned_le64(p)); -+ p += 8; -+ } while (p <= limit); -+ -+ state->v1 = v1; -+ state->v2 = v2; -+ state->v3 = v3; -+ state->v4 = v4; -+ } -+ -+ if (p < b_end) { -+ memcpy(state->mem64, p, (size_t)(b_end-p)); -+ state->memsize = (uint32_t)(b_end - p); -+ } -+ -+ return 0; -+} -+ -+uint64_t INIT xxh64_digest(const struct xxh64_state *state) -+{ -+ const uint8_t *p = (const uint8_t *)state->mem64; -+ const uint8_t *const b_end = (const uint8_t *)state->mem64 + -+ state->memsize; -+ uint64_t h64; -+ -+ if (state->total_len >= 32) { -+ const uint64_t v1 = state->v1; -+ const uint64_t v2 = state->v2; -+ const uint64_t v3 = state->v3; -+ const uint64_t v4 = state->v4; -+ -+ h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) + -+ xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18); -+ h64 = xxh64_merge_round(h64, v1); -+ h64 = xxh64_merge_round(h64, v2); -+ h64 = xxh64_merge_round(h64, v3); -+ h64 = xxh64_merge_round(h64, v4); -+ } else { -+ h64 = state->v3 + PRIME64_5; -+ } -+ -+ h64 += (uint64_t)state->total_len; -+ -+ while (p + 8 <= b_end) { -+ const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p)); -+ -+ h64 ^= k1; -+ h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4; -+ p += 8; -+ } -+ -+ if (p + 4 <= b_end) { -+ h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1; -+ h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; -+ p += 4; -+ } -+ -+ while (p < b_end) { -+ h64 ^= (*p) * PRIME64_5; -+ h64 = xxh_rotl64(h64, 11) * PRIME64_1; -+ p++; -+ } -+ -+ h64 ^= h64 >> 33; -+ h64 *= PRIME64_2; -+ h64 ^= h64 >> 29; -+ h64 *= PRIME64_3; -+ h64 ^= h64 >> 32; -+ -+ return h64; -+} -diff --git a/xen/common/zstd/bitstream.h b/xen/common/zstd/bitstream.h -new file mode 100644 -index 0000000000..3a49784d5c ---- /dev/null -+++ b/xen/common/zstd/bitstream.h -@@ -0,0 +1,379 @@ -+/* -+ * bitstream -+ * Part of FSE library -+ * header file (to include) -+ * Copyright (C) 2013-2016, Yann Collet. -+ * -+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are -+ * met: -+ * -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following disclaimer -+ * in the documentation and/or other materials provided with the -+ * distribution. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ * -+ * This program is free software; you can redistribute it and/or modify it under -+ * the terms of the GNU General Public License version 2 as published by the -+ * Free Software Foundation. This program is dual-licensed; you may select -+ * either version 2 of the GNU General Public License ("GPL") or BSD license -+ * ("BSD"). -+ * -+ * You can contact the author at : -+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy -+ */ -+#ifndef BITSTREAM_H_MODULE -+#define BITSTREAM_H_MODULE -+ -+/* -+* This API consists of small unitary functions, which must be inlined for best performance. -+* Since link-time-optimization is not available for all compilers, -+* these functions are defined into a .h to be included. -+*/ -+ -+/*-**************************************** -+* Dependencies -+******************************************/ -+#include "error_private.h" /* error codes and messages */ -+#include "mem.h" /* unaligned access routines */ -+ -+/*========================================= -+* Target specific -+=========================================*/ -+#define STREAM_ACCUMULATOR_MIN_32 25 -+#define STREAM_ACCUMULATOR_MIN_64 57 -+#define STREAM_ACCUMULATOR_MIN ((U32)(ZSTD_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) -+ -+/*-****************************************** -+* bitStream encoding API (write forward) -+********************************************/ -+/* bitStream can mix input from multiple sources. -+* A critical property of these streams is that they encode and decode in **reverse** direction. -+* So the first bit sequence you add will be the last to be read, like a LIFO stack. -+*/ -+typedef struct { -+ size_t bitContainer; -+ int bitPos; -+ char *startPtr; -+ char *ptr; -+ char *endPtr; -+} BIT_CStream_t; -+ -+ZSTD_STATIC size_t BIT_initCStream(BIT_CStream_t *bitC, void *dstBuffer, size_t dstCapacity); -+ZSTD_STATIC void BIT_addBits(BIT_CStream_t *bitC, size_t value, unsigned nbBits); -+ZSTD_STATIC void BIT_flushBits(BIT_CStream_t *bitC); -+ZSTD_STATIC size_t BIT_closeCStream(BIT_CStream_t *bitC); -+ -+/* Start with initCStream, providing the size of buffer to write into. -+* bitStream will never write outside of this buffer. -+* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. -+* -+* bits are first added to a local register. -+* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. -+* Writing data into memory is an explicit operation, performed by the flushBits function. -+* Hence keep track how many bits are potentially stored into local register to avoid register overflow. -+* After a flushBits, a maximum of 7 bits might still be stored into local register. -+* -+* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. -+* -+* Last operation is to close the bitStream. -+* The function returns the final size of CStream in bytes. -+* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) -+*/ -+ -+/*-******************************************** -+* bitStream decoding API (read backward) -+**********************************************/ -+typedef struct { -+ size_t bitContainer; -+ unsigned bitsConsumed; -+ const char *ptr; -+ const char *start; -+} BIT_DStream_t; -+ -+typedef enum { -+ BIT_DStream_unfinished = 0, -+ BIT_DStream_endOfBuffer = 1, -+ BIT_DStream_completed = 2, -+ BIT_DStream_overflow = 3 -+} BIT_DStream_status; /* result of BIT_reloadDStream() */ -+/* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ -+ -+ZSTD_STATIC size_t BIT_initDStream(BIT_DStream_t *bitD, const void *srcBuffer, size_t srcSize); -+ZSTD_STATIC size_t BIT_readBits(BIT_DStream_t *bitD, unsigned nbBits); -+ZSTD_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t *bitD); -+ZSTD_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t *bitD); -+ -+/* Start by invoking BIT_initDStream(). -+* A chunk of the bitStream is then stored into a local register. -+* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). -+* You can then retrieve bitFields stored into the local register, **in reverse order**. -+* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. -+* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. -+* Otherwise, it can be less than that, so proceed accordingly. -+* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). -+*/ -+ -+/*-**************************************** -+* unsafe API -+******************************************/ -+ZSTD_STATIC void BIT_addBitsFast(BIT_CStream_t *bitC, size_t value, unsigned nbBits); -+/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ -+ -+ZSTD_STATIC void BIT_flushBitsFast(BIT_CStream_t *bitC); -+/* unsafe version; does not check buffer overflow */ -+ -+ZSTD_STATIC size_t BIT_readBitsFast(BIT_DStream_t *bitD, unsigned nbBits); -+/* faster, but works only if nbBits >= 1 */ -+ -+/*-************************************************************** -+* Internal functions -+****************************************************************/ -+ZSTD_STATIC unsigned BIT_highbit32(register U32 val) { return 31 - __builtin_clz(val); } -+ -+/*===== Local Constants =====*/ -+static const unsigned BIT_mask[] = {0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -+ 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, -+ 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF}; /* up to 26 bits */ -+ -+/*-************************************************************** -+* bitStream encoding -+****************************************************************/ -+/*! BIT_initCStream() : -+ * `dstCapacity` must be > sizeof(void*) -+ * @return : 0 if success, -+ otherwise an error code (can be tested using ERR_isError() ) */ -+ZSTD_STATIC size_t BIT_initCStream(BIT_CStream_t *bitC, void *startPtr, size_t dstCapacity) -+{ -+ bitC->bitContainer = 0; -+ bitC->bitPos = 0; -+ bitC->startPtr = (char *)startPtr; -+ bitC->ptr = bitC->startPtr; -+ bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr); -+ if (dstCapacity <= sizeof(bitC->ptr)) -+ return ERROR(dstSize_tooSmall); -+ return 0; -+} -+ -+/*! BIT_addBits() : -+ can add up to 26 bits into `bitC`. -+ Does not check for register overflow ! */ -+ZSTD_STATIC void BIT_addBits(BIT_CStream_t *bitC, size_t value, unsigned nbBits) -+{ -+ bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; -+ bitC->bitPos += nbBits; -+} -+ -+/*! BIT_addBitsFast() : -+ * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */ -+ZSTD_STATIC void BIT_addBitsFast(BIT_CStream_t *bitC, size_t value, unsigned nbBits) -+{ -+ bitC->bitContainer |= value << bitC->bitPos; -+ bitC->bitPos += nbBits; -+} -+ -+/*! BIT_flushBitsFast() : -+ * unsafe version; does not check buffer overflow */ -+ZSTD_STATIC void BIT_flushBitsFast(BIT_CStream_t *bitC) -+{ -+ size_t const nbBytes = bitC->bitPos >> 3; -+ ZSTD_writeLEST(bitC->ptr, bitC->bitContainer); -+ bitC->ptr += nbBytes; -+ bitC->bitPos &= 7; -+ bitC->bitContainer >>= nbBytes * 8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */ -+} -+ -+/*! BIT_flushBits() : -+ * safe version; check for buffer overflow, and prevents it. -+ * note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */ -+ZSTD_STATIC void BIT_flushBits(BIT_CStream_t *bitC) -+{ -+ size_t const nbBytes = bitC->bitPos >> 3; -+ ZSTD_writeLEST(bitC->ptr, bitC->bitContainer); -+ bitC->ptr += nbBytes; -+ if (bitC->ptr > bitC->endPtr) -+ bitC->ptr = bitC->endPtr; -+ bitC->bitPos &= 7; -+ bitC->bitContainer >>= nbBytes * 8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */ -+} -+ -+/*! BIT_closeCStream() : -+ * @return : size of CStream, in bytes, -+ or 0 if it could not fit into dstBuffer */ -+ZSTD_STATIC size_t BIT_closeCStream(BIT_CStream_t *bitC) -+{ -+ BIT_addBitsFast(bitC, 1, 1); /* endMark */ -+ BIT_flushBits(bitC); -+ -+ if (bitC->ptr >= bitC->endPtr) -+ return 0; /* doesn't fit within authorized budget : cancel */ -+ -+ return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); -+} -+ -+/*-******************************************************** -+* bitStream decoding -+**********************************************************/ -+/*! BIT_initDStream() : -+* Initialize a BIT_DStream_t. -+* `bitD` : a pointer to an already allocated BIT_DStream_t structure. -+* `srcSize` must be the *exact* size of the bitStream, in bytes. -+* @return : size of stream (== srcSize) or an errorCode if a problem is detected -+*/ -+ZSTD_STATIC size_t BIT_initDStream(BIT_DStream_t *bitD, const void *srcBuffer, size_t srcSize) -+{ -+ if (srcSize < 1) { -+ memset(bitD, 0, sizeof(*bitD)); -+ return ERROR(srcSize_wrong); -+ } -+ -+ if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ -+ bitD->start = (const char *)srcBuffer; -+ bitD->ptr = (const char *)srcBuffer + srcSize - sizeof(bitD->bitContainer); -+ bitD->bitContainer = ZSTD_readLEST(bitD->ptr); -+ { -+ BYTE const lastByte = ((const BYTE *)srcBuffer)[srcSize - 1]; -+ bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ -+ if (lastByte == 0) -+ return ERROR(GENERIC); /* endMark not present */ -+ } -+ } else { -+ bitD->start = (const char *)srcBuffer; -+ bitD->ptr = bitD->start; -+ bitD->bitContainer = *(const BYTE *)(bitD->start); -+ switch (srcSize) { -+ case 7: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[6]) << (sizeof(bitD->bitContainer) * 8 - 16); -+ /* fall through */ -+ case 6: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[5]) << (sizeof(bitD->bitContainer) * 8 - 24); -+ /* fall through */ -+ case 5: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[4]) << (sizeof(bitD->bitContainer) * 8 - 32); -+ /* fall through */ -+ case 4: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[3]) << 24; -+ /* fall through */ -+ case 3: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[2]) << 16; -+ /* fall through */ -+ case 2: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[1]) << 8; -+ default:; -+ } -+ { -+ BYTE const lastByte = ((const BYTE *)srcBuffer)[srcSize - 1]; -+ bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; -+ if (lastByte == 0) -+ return ERROR(GENERIC); /* endMark not present */ -+ } -+ bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize) * 8; -+ } -+ -+ return srcSize; -+} -+ -+ZSTD_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) { return bitContainer >> start; } -+ -+ZSTD_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) { return (bitContainer >> start) & BIT_mask[nbBits]; } -+ -+ZSTD_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) { return bitContainer & BIT_mask[nbBits]; } -+ -+/*! BIT_lookBits() : -+ * Provides next n bits from local register. -+ * local register is not modified. -+ * On 32-bits, maxNbBits==24. -+ * On 64-bits, maxNbBits==56. -+ * @return : value extracted -+ */ -+ZSTD_STATIC size_t BIT_lookBits(const BIT_DStream_t *bitD, U32 nbBits) -+{ -+ U32 const bitMask = sizeof(bitD->bitContainer) * 8 - 1; -+ return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask - nbBits) & bitMask); -+} -+ -+/*! BIT_lookBitsFast() : -+* unsafe version; only works only if nbBits >= 1 */ -+ZSTD_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t *bitD, U32 nbBits) -+{ -+ U32 const bitMask = sizeof(bitD->bitContainer) * 8 - 1; -+ return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask + 1) - nbBits) & bitMask); -+} -+ -+ZSTD_STATIC void BIT_skipBits(BIT_DStream_t *bitD, U32 nbBits) { bitD->bitsConsumed += nbBits; } -+ -+/*! BIT_readBits() : -+ * Read (consume) next n bits from local register and update. -+ * Pay attention to not read more than nbBits contained into local register. -+ * @return : extracted value. -+ */ -+ZSTD_STATIC size_t BIT_readBits(BIT_DStream_t *bitD, U32 nbBits) -+{ -+ size_t const value = BIT_lookBits(bitD, nbBits); -+ BIT_skipBits(bitD, nbBits); -+ return value; -+} -+ -+/*! BIT_readBitsFast() : -+* unsafe version; only works only if nbBits >= 1 */ -+ZSTD_STATIC size_t BIT_readBitsFast(BIT_DStream_t *bitD, U32 nbBits) -+{ -+ size_t const value = BIT_lookBitsFast(bitD, nbBits); -+ BIT_skipBits(bitD, nbBits); -+ return value; -+} -+ -+/*! BIT_reloadDStream() : -+* Refill `bitD` from buffer previously set in BIT_initDStream() . -+* This function is safe, it guarantees it will not read beyond src buffer. -+* @return : status of `BIT_DStream_t` internal register. -+ if status == BIT_DStream_unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */ -+ZSTD_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t *bitD) -+{ -+ if (bitD->bitsConsumed > (sizeof(bitD->bitContainer) * 8)) /* should not happen => corruption detected */ -+ return BIT_DStream_overflow; -+ -+ if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) { -+ bitD->ptr -= bitD->bitsConsumed >> 3; -+ bitD->bitsConsumed &= 7; -+ bitD->bitContainer = ZSTD_readLEST(bitD->ptr); -+ return BIT_DStream_unfinished; -+ } -+ if (bitD->ptr == bitD->start) { -+ if (bitD->bitsConsumed < sizeof(bitD->bitContainer) * 8) -+ return BIT_DStream_endOfBuffer; -+ return BIT_DStream_completed; -+ } -+ { -+ U32 nbBytes = bitD->bitsConsumed >> 3; -+ BIT_DStream_status result = BIT_DStream_unfinished; -+ if (bitD->ptr - nbBytes < bitD->start) { -+ nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ -+ result = BIT_DStream_endOfBuffer; -+ } -+ bitD->ptr -= nbBytes; -+ bitD->bitsConsumed -= nbBytes * 8; -+ bitD->bitContainer = ZSTD_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ -+ return result; -+ } -+} -+ -+/*! BIT_endOfDStream() : -+* @return Tells if DStream has exactly reached its end (all bits consumed). -+*/ -+ZSTD_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t *DStream) -+{ -+ return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer) * 8)); -+} -+ -+#endif /* BITSTREAM_H_MODULE */ -diff --git a/xen/common/zstd/decompress.c b/xen/common/zstd/decompress.c -new file mode 100644 -index 0000000000..8e627d881a ---- /dev/null -+++ b/xen/common/zstd/decompress.c -@@ -0,0 +1,2489 @@ -+/** -+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. -+ * All rights reserved. -+ * -+ * This source code is licensed under the BSD-style license found in the -+ * LICENSE file in the root directory of https://github.com/facebook/zstd. -+ * An additional grant of patent rights can be found in the PATENTS file in the -+ * same directory. -+ * -+ * This program is free software; you can redistribute it and/or modify it under -+ * the terms of the GNU General Public License version 2 as published by the -+ * Free Software Foundation. This program is dual-licensed; you may select -+ * either version 2 of the GNU General Public License ("GPL") or BSD license -+ * ("BSD"). -+ */ -+ -+/* *************************************************************** -+* Tuning parameters -+*****************************************************************/ -+/*! -+* MAXWINDOWSIZE_DEFAULT : -+* maximum window size accepted by DStream, by default. -+* Frames requiring more memory will be rejected. -+*/ -+#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT -+#define ZSTD_MAXWINDOWSIZE_DEFAULT ((1 << ZSTD_WINDOWLOG_MAX) + 1) /* defined within zstd.h */ -+#endif -+ -+/*-******************************************************* -+* Dependencies -+*********************************************************/ -+#include "fse.h" -+#include "huf.h" -+#include "mem.h" /* low level memory routines */ -+#include "zstd_internal.h" -+#include /* memcpy, memmove, memset */ -+ -+#define ZSTD_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0) -+ -+/*-************************************* -+* Macros -+***************************************/ -+#define ZSTD_isError ERR_isError /* for inlining */ -+#define FSE_isError ERR_isError -+#define HUF_isError ERR_isError -+ -+/*_******************************************************* -+* Memory operations -+**********************************************************/ -+static void INIT ZSTD_copy4(void *dst, const void *src) { memcpy(dst, src, 4); } -+ -+/*-************************************************************* -+* Context management -+***************************************************************/ -+typedef enum { -+ ZSTDds_getFrameHeaderSize, -+ ZSTDds_decodeFrameHeader, -+ ZSTDds_decodeBlockHeader, -+ ZSTDds_decompressBlock, -+ ZSTDds_decompressLastBlock, -+ ZSTDds_checkChecksum, -+ ZSTDds_decodeSkippableHeader, -+ ZSTDds_skipFrame -+} ZSTD_dStage; -+ -+typedef struct { -+ FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)]; -+ FSE_DTable OFTable[FSE_DTABLE_SIZE_U32(OffFSELog)]; -+ FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)]; -+ HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ -+ U64 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32 / 2]; -+ U32 rep[ZSTD_REP_NUM]; -+} ZSTD_entropyTables_t; -+ -+struct ZSTD_DCtx_s { -+ const FSE_DTable *LLTptr; -+ const FSE_DTable *MLTptr; -+ const FSE_DTable *OFTptr; -+ const HUF_DTable *HUFptr; -+ ZSTD_entropyTables_t entropy; -+ const void *previousDstEnd; /* detect continuity */ -+ const void *base; /* start of curr segment */ -+ const void *vBase; /* virtual start of previous segment if it was just before curr one */ -+ const void *dictEnd; /* end of previous segment */ -+ size_t expected; -+ ZSTD_frameParams fParams; -+ blockType_e bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */ -+ ZSTD_dStage stage; -+ U32 litEntropy; -+ U32 fseEntropy; -+ struct xxh64_state xxhState; -+ size_t headerSize; -+ U32 dictID; -+ const BYTE *litPtr; -+ ZSTD_customMem customMem; -+ size_t litSize; -+ size_t rleSize; -+ BYTE litBuffer[ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH]; -+ BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; -+}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ -+ -+size_t INIT ZSTD_DCtxWorkspaceBound(void) { return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_DCtx)); } -+ -+size_t INIT ZSTD_decompressBegin(ZSTD_DCtx *dctx) -+{ -+ dctx->expected = ZSTD_frameHeaderSize_prefix; -+ dctx->stage = ZSTDds_getFrameHeaderSize; -+ dctx->previousDstEnd = NULL; -+ dctx->base = NULL; -+ dctx->vBase = NULL; -+ dctx->dictEnd = NULL; -+ dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ -+ dctx->litEntropy = dctx->fseEntropy = 0; -+ dctx->dictID = 0; -+ ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); -+ memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ -+ dctx->LLTptr = dctx->entropy.LLTable; -+ dctx->MLTptr = dctx->entropy.MLTable; -+ dctx->OFTptr = dctx->entropy.OFTable; -+ dctx->HUFptr = dctx->entropy.hufTable; -+ return 0; -+} -+ -+ZSTD_DCtx INIT *ZSTD_createDCtx_advanced(ZSTD_customMem customMem) -+{ -+ ZSTD_DCtx *dctx; -+ -+ if (!customMem.customAlloc || !customMem.customFree) -+ return NULL; -+ -+ dctx = (ZSTD_DCtx *)ZSTD_malloc(sizeof(ZSTD_DCtx), customMem); -+ if (!dctx) -+ return NULL; -+ memcpy(&dctx->customMem, &customMem, sizeof(customMem)); -+ ZSTD_decompressBegin(dctx); -+ return dctx; -+} -+ -+ZSTD_DCtx INIT *ZSTD_initDCtx(void *workspace, size_t workspaceSize) -+{ -+ ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize); -+ return ZSTD_createDCtx_advanced(stackMem); -+} -+ -+size_t INIT ZSTD_freeDCtx(ZSTD_DCtx *dctx) -+{ -+ if (dctx == NULL) -+ return 0; /* support free on NULL */ -+ ZSTD_free(dctx, dctx->customMem); -+ return 0; /* reserved as a potential error code in the future */ -+} -+ -+void INIT ZSTD_copyDCtx(ZSTD_DCtx *dstDCtx, const ZSTD_DCtx *srcDCtx) -+{ -+ size_t const workSpaceSize = (ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH) + ZSTD_frameHeaderSize_max; -+ memcpy(dstDCtx, srcDCtx, sizeof(ZSTD_DCtx) - workSpaceSize); /* no need to copy workspace */ -+} -+ -+static void INIT ZSTD_refDDict(ZSTD_DCtx *dstDCtx, const ZSTD_DDict *ddict); -+ -+/*-************************************************************* -+* Decompression section -+***************************************************************/ -+ -+/*! ZSTD_isFrame() : -+ * Tells if the content of `buffer` starts with a valid Frame Identifier. -+ * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. -+ * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. -+ * Note 3 : Skippable Frame Identifiers are considered valid. */ -+unsigned INIT ZSTD_isFrame(const void *buffer, size_t size) -+{ -+ if (size < 4) -+ return 0; -+ { -+ U32 const magic = ZSTD_readLE32(buffer); -+ if (magic == ZSTD_MAGICNUMBER) -+ return 1; -+ if ((magic & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) -+ return 1; -+ } -+ return 0; -+} -+ -+/** ZSTD_frameHeaderSize() : -+* srcSize must be >= ZSTD_frameHeaderSize_prefix. -+* @return : size of the Frame Header */ -+static size_t INIT ZSTD_frameHeaderSize(const void *src, size_t srcSize) -+{ -+ if (srcSize < ZSTD_frameHeaderSize_prefix) -+ return ERROR(srcSize_wrong); -+ { -+ BYTE const fhd = ((const BYTE *)src)[4]; -+ U32 const dictID = fhd & 3; -+ U32 const singleSegment = (fhd >> 5) & 1; -+ U32 const fcsId = fhd >> 6; -+ return ZSTD_frameHeaderSize_prefix + !singleSegment + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] + (singleSegment && !fcsId); -+ } -+} -+ -+/** ZSTD_getFrameParams() : -+* decode Frame Header, or require larger `srcSize`. -+* @return : 0, `fparamsPtr` is correctly filled, -+* >0, `srcSize` is too small, result is expected `srcSize`, -+* or an error code, which can be tested using ZSTD_isError() */ -+size_t INIT ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src, size_t srcSize) -+{ -+ const BYTE *ip = (const BYTE *)src; -+ -+ if (srcSize < ZSTD_frameHeaderSize_prefix) -+ return ZSTD_frameHeaderSize_prefix; -+ if (ZSTD_readLE32(src) != ZSTD_MAGICNUMBER) { -+ if ((ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { -+ if (srcSize < ZSTD_skippableHeaderSize) -+ return ZSTD_skippableHeaderSize; /* magic number + skippable frame length */ -+ memset(fparamsPtr, 0, sizeof(*fparamsPtr)); -+ fparamsPtr->frameContentSize = ZSTD_readLE32((const char *)src + 4); -+ fparamsPtr->windowSize = 0; /* windowSize==0 means a frame is skippable */ -+ return 0; -+ } -+ return ERROR(prefix_unknown); -+ } -+ -+ /* ensure there is enough `srcSize` to fully read/decode frame header */ -+ { -+ size_t const fhsize = ZSTD_frameHeaderSize(src, srcSize); -+ if (srcSize < fhsize) -+ return fhsize; -+ } -+ -+ { -+ BYTE const fhdByte = ip[4]; -+ size_t pos = 5; -+ U32 const dictIDSizeCode = fhdByte & 3; -+ U32 const checksumFlag = (fhdByte >> 2) & 1; -+ U32 const singleSegment = (fhdByte >> 5) & 1; -+ U32 const fcsID = fhdByte >> 6; -+ U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; -+ U32 windowSize = 0; -+ U32 dictID = 0; -+ U64 frameContentSize = 0; -+ if ((fhdByte & 0x08) != 0) -+ return ERROR(frameParameter_unsupported); /* reserved bits, which must be zero */ -+ if (!singleSegment) { -+ BYTE const wlByte = ip[pos++]; -+ U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; -+ if (windowLog > ZSTD_WINDOWLOG_MAX) -+ return ERROR(frameParameter_windowTooLarge); /* avoids issue with 1 << windowLog */ -+ windowSize = (1U << windowLog); -+ windowSize += (windowSize >> 3) * (wlByte & 7); -+ } -+ -+ switch (dictIDSizeCode) { -+ default: /* impossible */ -+ case 0: break; -+ case 1: -+ dictID = ip[pos]; -+ pos++; -+ break; -+ case 2: -+ dictID = ZSTD_readLE16(ip + pos); -+ pos += 2; -+ break; -+ case 3: -+ dictID = ZSTD_readLE32(ip + pos); -+ pos += 4; -+ break; -+ } -+ switch (fcsID) { -+ default: /* impossible */ -+ case 0: -+ if (singleSegment) -+ frameContentSize = ip[pos]; -+ break; -+ case 1: frameContentSize = ZSTD_readLE16(ip + pos) + 256; break; -+ case 2: frameContentSize = ZSTD_readLE32(ip + pos); break; -+ case 3: frameContentSize = ZSTD_readLE64(ip + pos); break; -+ } -+ if (!windowSize) -+ windowSize = (U32)frameContentSize; -+ if (windowSize > windowSizeMax) -+ return ERROR(frameParameter_windowTooLarge); -+ fparamsPtr->frameContentSize = frameContentSize; -+ fparamsPtr->windowSize = windowSize; -+ fparamsPtr->dictID = dictID; -+ fparamsPtr->checksumFlag = checksumFlag; -+ } -+ return 0; -+} -+ -+/** ZSTD_getFrameContentSize() : -+* compatible with legacy mode -+* @return : decompressed size of the single frame pointed to be `src` if known, otherwise -+* - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined -+* - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ -+unsigned long long INIT ZSTD_getFrameContentSize(const void *src, size_t srcSize) -+{ -+ { -+ ZSTD_frameParams fParams; -+ if (ZSTD_getFrameParams(&fParams, src, srcSize) != 0) -+ return ZSTD_CONTENTSIZE_ERROR; -+ if (fParams.windowSize == 0) { -+ /* Either skippable or empty frame, size == 0 either way */ -+ return 0; -+ } else if (fParams.frameContentSize != 0) { -+ return fParams.frameContentSize; -+ } else { -+ return ZSTD_CONTENTSIZE_UNKNOWN; -+ } -+ } -+} -+ -+/** ZSTD_findDecompressedSize() : -+ * compatible with legacy mode -+ * `srcSize` must be the exact length of some number of ZSTD compressed and/or -+ * skippable frames -+ * @return : decompressed size of the frames contained */ -+unsigned long long INIT ZSTD_findDecompressedSize(const void *src, size_t srcSize) -+{ -+ { -+ unsigned long long totalDstSize = 0; -+ while (srcSize >= ZSTD_frameHeaderSize_prefix) { -+ const U32 magicNumber = ZSTD_readLE32(src); -+ -+ if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { -+ size_t skippableSize; -+ if (srcSize < ZSTD_skippableHeaderSize) -+ return ERROR(srcSize_wrong); -+ skippableSize = ZSTD_readLE32((const BYTE *)src + 4) + ZSTD_skippableHeaderSize; -+ if (srcSize < skippableSize) { -+ return ZSTD_CONTENTSIZE_ERROR; -+ } -+ -+ src = (const BYTE *)src + skippableSize; -+ srcSize -= skippableSize; -+ continue; -+ } -+ -+ { -+ unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); -+ if (ret >= ZSTD_CONTENTSIZE_ERROR) -+ return ret; -+ -+ /* check for overflow */ -+ if (totalDstSize + ret < totalDstSize) -+ return ZSTD_CONTENTSIZE_ERROR; -+ totalDstSize += ret; -+ } -+ { -+ size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); -+ if (ZSTD_isError(frameSrcSize)) { -+ return ZSTD_CONTENTSIZE_ERROR; -+ } -+ -+ src = (const BYTE *)src + frameSrcSize; -+ srcSize -= frameSrcSize; -+ } -+ } -+ -+ if (srcSize) { -+ return ZSTD_CONTENTSIZE_ERROR; -+ } -+ -+ return totalDstSize; -+ } -+} -+ -+/** ZSTD_decodeFrameHeader() : -+* `headerSize` must be the size provided by ZSTD_frameHeaderSize(). -+* @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ -+static size_t INIT ZSTD_decodeFrameHeader(ZSTD_DCtx *dctx, const void *src, size_t headerSize) -+{ -+ size_t const result = ZSTD_getFrameParams(&(dctx->fParams), src, headerSize); -+ if (ZSTD_isError(result)) -+ return result; /* invalid header */ -+ if (result > 0) -+ return ERROR(srcSize_wrong); /* headerSize too small */ -+ if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) -+ return ERROR(dictionary_wrong); -+ if (dctx->fParams.checksumFlag) -+ xxh64_reset(&dctx->xxhState, 0); -+ return 0; -+} -+ -+typedef struct { -+ blockType_e blockType; -+ U32 lastBlock; -+ U32 origSize; -+} blockProperties_t; -+ -+/*! ZSTD_getcBlockSize() : -+* Provides the size of compressed block from block header `src` */ -+size_t INIT ZSTD_getcBlockSize(const void *src, size_t srcSize, blockProperties_t *bpPtr) -+{ -+ if (srcSize < ZSTD_blockHeaderSize) -+ return ERROR(srcSize_wrong); -+ { -+ U32 const cBlockHeader = ZSTD_readLE24(src); -+ U32 const cSize = cBlockHeader >> 3; -+ bpPtr->lastBlock = cBlockHeader & 1; -+ bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); -+ bpPtr->origSize = cSize; /* only useful for RLE */ -+ if (bpPtr->blockType == bt_rle) -+ return 1; -+ if (bpPtr->blockType == bt_reserved) -+ return ERROR(corruption_detected); -+ return cSize; -+ } -+} -+ -+static size_t INIT ZSTD_copyRawBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize) -+{ -+ if (srcSize > dstCapacity) -+ return ERROR(dstSize_tooSmall); -+ memcpy(dst, src, srcSize); -+ return srcSize; -+} -+ -+static size_t INIT ZSTD_setRleBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize, size_t regenSize) -+{ -+ if (srcSize != 1) -+ return ERROR(srcSize_wrong); -+ if (regenSize > dstCapacity) -+ return ERROR(dstSize_tooSmall); -+ memset(dst, *(const BYTE *)src, regenSize); -+ return regenSize; -+} -+ -+/*! ZSTD_decodeLiteralsBlock() : -+ @return : nb of bytes read from src (< srcSize ) */ -+size_t INIT ZSTD_decodeLiteralsBlock(ZSTD_DCtx *dctx, const void *src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ -+{ -+ if (srcSize < MIN_CBLOCK_SIZE) -+ return ERROR(corruption_detected); -+ -+ { -+ const BYTE *const istart = (const BYTE *)src; -+ symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); -+ -+ switch (litEncType) { -+ case set_repeat: -+ if (dctx->litEntropy == 0) -+ return ERROR(dictionary_corrupted); -+ /* fall through */ -+ case set_compressed: -+ if (srcSize < 5) -+ return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */ -+ { -+ size_t lhSize, litSize, litCSize; -+ U32 singleStream = 0; -+ U32 const lhlCode = (istart[0] >> 2) & 3; -+ U32 const lhc = ZSTD_readLE32(istart); -+ switch (lhlCode) { -+ case 0: -+ case 1: -+ default: /* note : default is impossible, since lhlCode into [0..3] */ -+ /* 2 - 2 - 10 - 10 */ -+ singleStream = !lhlCode; -+ lhSize = 3; -+ litSize = (lhc >> 4) & 0x3FF; -+ litCSize = (lhc >> 14) & 0x3FF; -+ break; -+ case 2: -+ /* 2 - 2 - 14 - 14 */ -+ lhSize = 4; -+ litSize = (lhc >> 4) & 0x3FFF; -+ litCSize = lhc >> 18; -+ break; -+ case 3: -+ /* 2 - 2 - 18 - 18 */ -+ lhSize = 5; -+ litSize = (lhc >> 4) & 0x3FFFF; -+ litCSize = (lhc >> 22) + (istart[4] << 10); -+ break; -+ } -+ if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) -+ return ERROR(corruption_detected); -+ if (litCSize + lhSize > srcSize) -+ return ERROR(corruption_detected); -+ -+ if (HUF_isError( -+ (litEncType == set_repeat) -+ ? (singleStream ? HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart + lhSize, litCSize, dctx->HUFptr) -+ : HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart + lhSize, litCSize, dctx->HUFptr)) -+ : (singleStream -+ ? HUF_decompress1X2_DCtx_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart + lhSize, litCSize, -+ dctx->entropy.workspace, sizeof(dctx->entropy.workspace)) -+ : HUF_decompress4X_hufOnly_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart + lhSize, litCSize, -+ dctx->entropy.workspace, sizeof(dctx->entropy.workspace))))) -+ return ERROR(corruption_detected); -+ -+ dctx->litPtr = dctx->litBuffer; -+ dctx->litSize = litSize; -+ dctx->litEntropy = 1; -+ if (litEncType == set_compressed) -+ dctx->HUFptr = dctx->entropy.hufTable; -+ memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); -+ return litCSize + lhSize; -+ } -+ -+ case set_basic: { -+ size_t litSize, lhSize; -+ U32 const lhlCode = ((istart[0]) >> 2) & 3; -+ switch (lhlCode) { -+ case 0: -+ case 2: -+ default: /* note : default is impossible, since lhlCode into [0..3] */ -+ lhSize = 1; -+ litSize = istart[0] >> 3; -+ break; -+ case 1: -+ lhSize = 2; -+ litSize = ZSTD_readLE16(istart) >> 4; -+ break; -+ case 3: -+ lhSize = 3; -+ litSize = ZSTD_readLE24(istart) >> 4; -+ break; -+ } -+ -+ if (lhSize + litSize + WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ -+ if (litSize + lhSize > srcSize) -+ return ERROR(corruption_detected); -+ memcpy(dctx->litBuffer, istart + lhSize, litSize); -+ dctx->litPtr = dctx->litBuffer; -+ dctx->litSize = litSize; -+ memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); -+ return lhSize + litSize; -+ } -+ /* direct reference into compressed stream */ -+ dctx->litPtr = istart + lhSize; -+ dctx->litSize = litSize; -+ return lhSize + litSize; -+ } -+ -+ case set_rle: { -+ U32 const lhlCode = ((istart[0]) >> 2) & 3; -+ size_t litSize, lhSize; -+ switch (lhlCode) { -+ case 0: -+ case 2: -+ default: /* note : default is impossible, since lhlCode into [0..3] */ -+ lhSize = 1; -+ litSize = istart[0] >> 3; -+ break; -+ case 1: -+ lhSize = 2; -+ litSize = ZSTD_readLE16(istart) >> 4; -+ break; -+ case 3: -+ lhSize = 3; -+ litSize = ZSTD_readLE24(istart) >> 4; -+ if (srcSize < 4) -+ return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */ -+ break; -+ } -+ if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) -+ return ERROR(corruption_detected); -+ memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); -+ dctx->litPtr = dctx->litBuffer; -+ dctx->litSize = litSize; -+ return lhSize + 1; -+ } -+ default: -+ return ERROR(corruption_detected); /* impossible */ -+ } -+ } -+} -+ -+typedef union { -+ FSE_decode_t realData; -+ U32 alignedBy4; -+} FSE_decode_t4; -+ -+static const FSE_decode_t4 LL_defaultDTable[(1 << LL_DEFAULTNORMLOG) + 1] = { -+ {{LL_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */ -+ {{0, 0, 4}}, /* 0 : base, symbol, bits */ -+ {{16, 0, 4}}, -+ {{32, 1, 5}}, -+ {{0, 3, 5}}, -+ {{0, 4, 5}}, -+ {{0, 6, 5}}, -+ {{0, 7, 5}}, -+ {{0, 9, 5}}, -+ {{0, 10, 5}}, -+ {{0, 12, 5}}, -+ {{0, 14, 6}}, -+ {{0, 16, 5}}, -+ {{0, 18, 5}}, -+ {{0, 19, 5}}, -+ {{0, 21, 5}}, -+ {{0, 22, 5}}, -+ {{0, 24, 5}}, -+ {{32, 25, 5}}, -+ {{0, 26, 5}}, -+ {{0, 27, 6}}, -+ {{0, 29, 6}}, -+ {{0, 31, 6}}, -+ {{32, 0, 4}}, -+ {{0, 1, 4}}, -+ {{0, 2, 5}}, -+ {{32, 4, 5}}, -+ {{0, 5, 5}}, -+ {{32, 7, 5}}, -+ {{0, 8, 5}}, -+ {{32, 10, 5}}, -+ {{0, 11, 5}}, -+ {{0, 13, 6}}, -+ {{32, 16, 5}}, -+ {{0, 17, 5}}, -+ {{32, 19, 5}}, -+ {{0, 20, 5}}, -+ {{32, 22, 5}}, -+ {{0, 23, 5}}, -+ {{0, 25, 4}}, -+ {{16, 25, 4}}, -+ {{32, 26, 5}}, -+ {{0, 28, 6}}, -+ {{0, 30, 6}}, -+ {{48, 0, 4}}, -+ {{16, 1, 4}}, -+ {{32, 2, 5}}, -+ {{32, 3, 5}}, -+ {{32, 5, 5}}, -+ {{32, 6, 5}}, -+ {{32, 8, 5}}, -+ {{32, 9, 5}}, -+ {{32, 11, 5}}, -+ {{32, 12, 5}}, -+ {{0, 15, 6}}, -+ {{32, 17, 5}}, -+ {{32, 18, 5}}, -+ {{32, 20, 5}}, -+ {{32, 21, 5}}, -+ {{32, 23, 5}}, -+ {{32, 24, 5}}, -+ {{0, 35, 6}}, -+ {{0, 34, 6}}, -+ {{0, 33, 6}}, -+ {{0, 32, 6}}, -+}; /* LL_defaultDTable */ -+ -+static const FSE_decode_t4 ML_defaultDTable[(1 << ML_DEFAULTNORMLOG) + 1] = { -+ {{ML_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */ -+ {{0, 0, 6}}, /* 0 : base, symbol, bits */ -+ {{0, 1, 4}}, -+ {{32, 2, 5}}, -+ {{0, 3, 5}}, -+ {{0, 5, 5}}, -+ {{0, 6, 5}}, -+ {{0, 8, 5}}, -+ {{0, 10, 6}}, -+ {{0, 13, 6}}, -+ {{0, 16, 6}}, -+ {{0, 19, 6}}, -+ {{0, 22, 6}}, -+ {{0, 25, 6}}, -+ {{0, 28, 6}}, -+ {{0, 31, 6}}, -+ {{0, 33, 6}}, -+ {{0, 35, 6}}, -+ {{0, 37, 6}}, -+ {{0, 39, 6}}, -+ {{0, 41, 6}}, -+ {{0, 43, 6}}, -+ {{0, 45, 6}}, -+ {{16, 1, 4}}, -+ {{0, 2, 4}}, -+ {{32, 3, 5}}, -+ {{0, 4, 5}}, -+ {{32, 6, 5}}, -+ {{0, 7, 5}}, -+ {{0, 9, 6}}, -+ {{0, 12, 6}}, -+ {{0, 15, 6}}, -+ {{0, 18, 6}}, -+ {{0, 21, 6}}, -+ {{0, 24, 6}}, -+ {{0, 27, 6}}, -+ {{0, 30, 6}}, -+ {{0, 32, 6}}, -+ {{0, 34, 6}}, -+ {{0, 36, 6}}, -+ {{0, 38, 6}}, -+ {{0, 40, 6}}, -+ {{0, 42, 6}}, -+ {{0, 44, 6}}, -+ {{32, 1, 4}}, -+ {{48, 1, 4}}, -+ {{16, 2, 4}}, -+ {{32, 4, 5}}, -+ {{32, 5, 5}}, -+ {{32, 7, 5}}, -+ {{32, 8, 5}}, -+ {{0, 11, 6}}, -+ {{0, 14, 6}}, -+ {{0, 17, 6}}, -+ {{0, 20, 6}}, -+ {{0, 23, 6}}, -+ {{0, 26, 6}}, -+ {{0, 29, 6}}, -+ {{0, 52, 6}}, -+ {{0, 51, 6}}, -+ {{0, 50, 6}}, -+ {{0, 49, 6}}, -+ {{0, 48, 6}}, -+ {{0, 47, 6}}, -+ {{0, 46, 6}}, -+}; /* ML_defaultDTable */ -+ -+static const FSE_decode_t4 OF_defaultDTable[(1 << OF_DEFAULTNORMLOG) + 1] = { -+ {{OF_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */ -+ {{0, 0, 5}}, /* 0 : base, symbol, bits */ -+ {{0, 6, 4}}, -+ {{0, 9, 5}}, -+ {{0, 15, 5}}, -+ {{0, 21, 5}}, -+ {{0, 3, 5}}, -+ {{0, 7, 4}}, -+ {{0, 12, 5}}, -+ {{0, 18, 5}}, -+ {{0, 23, 5}}, -+ {{0, 5, 5}}, -+ {{0, 8, 4}}, -+ {{0, 14, 5}}, -+ {{0, 20, 5}}, -+ {{0, 2, 5}}, -+ {{16, 7, 4}}, -+ {{0, 11, 5}}, -+ {{0, 17, 5}}, -+ {{0, 22, 5}}, -+ {{0, 4, 5}}, -+ {{16, 8, 4}}, -+ {{0, 13, 5}}, -+ {{0, 19, 5}}, -+ {{0, 1, 5}}, -+ {{16, 6, 4}}, -+ {{0, 10, 5}}, -+ {{0, 16, 5}}, -+ {{0, 28, 5}}, -+ {{0, 27, 5}}, -+ {{0, 26, 5}}, -+ {{0, 25, 5}}, -+ {{0, 24, 5}}, -+}; /* OF_defaultDTable */ -+ -+/*! ZSTD_buildSeqTable() : -+ @return : nb bytes read from src, -+ or an error code if it fails, testable with ZSTD_isError() -+*/ -+static size_t INIT ZSTD_buildSeqTable(FSE_DTable *DTableSpace, const FSE_DTable **DTablePtr, symbolEncodingType_e type, U32 max, U32 maxLog, const void *src, -+ size_t srcSize, const FSE_decode_t4 *defaultTable, U32 flagRepeatTable, void *workspace, size_t workspaceSize) -+{ -+ const void *const tmpPtr = defaultTable; /* bypass strict aliasing */ -+ switch (type) { -+ case set_rle: -+ if (!srcSize) -+ return ERROR(srcSize_wrong); -+ if ((*(const BYTE *)src) > max) -+ return ERROR(corruption_detected); -+ FSE_buildDTable_rle(DTableSpace, *(const BYTE *)src); -+ *DTablePtr = DTableSpace; -+ return 1; -+ case set_basic: *DTablePtr = (const FSE_DTable *)tmpPtr; return 0; -+ case set_repeat: -+ if (!flagRepeatTable) -+ return ERROR(corruption_detected); -+ return 0; -+ default: /* impossible */ -+ case set_compressed: { -+ U32 tableLog; -+ S16 *norm = (S16 *)workspace; -+ size_t const spaceUsed32 = ALIGN(sizeof(S16) * (MaxSeq + 1), sizeof(U32)) >> 2; -+ -+ if ((spaceUsed32 << 2) > workspaceSize) -+ return ERROR(GENERIC); -+ workspace = (U32 *)workspace + spaceUsed32; -+ workspaceSize -= (spaceUsed32 << 2); -+ { -+ size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); -+ if (FSE_isError(headerSize)) -+ return ERROR(corruption_detected); -+ if (tableLog > maxLog) -+ return ERROR(corruption_detected); -+ FSE_buildDTable_wksp(DTableSpace, norm, max, tableLog, workspace, workspaceSize); -+ *DTablePtr = DTableSpace; -+ return headerSize; -+ } -+ } -+ } -+} -+ -+size_t INIT ZSTD_decodeSeqHeaders(ZSTD_DCtx *dctx, int *nbSeqPtr, const void *src, size_t srcSize) -+{ -+ const BYTE *const istart = (const BYTE *const)src; -+ const BYTE *const iend = istart + srcSize; -+ const BYTE *ip = istart; -+ -+ /* check */ -+ if (srcSize < MIN_SEQUENCES_SIZE) -+ return ERROR(srcSize_wrong); -+ -+ /* SeqHead */ -+ { -+ int nbSeq = *ip++; -+ if (!nbSeq) { -+ *nbSeqPtr = 0; -+ return 1; -+ } -+ if (nbSeq > 0x7F) { -+ if (nbSeq == 0xFF) { -+ if (ip + 2 > iend) -+ return ERROR(srcSize_wrong); -+ nbSeq = ZSTD_readLE16(ip) + LONGNBSEQ, ip += 2; -+ } else { -+ if (ip >= iend) -+ return ERROR(srcSize_wrong); -+ nbSeq = ((nbSeq - 0x80) << 8) + *ip++; -+ } -+ } -+ *nbSeqPtr = nbSeq; -+ } -+ -+ /* FSE table descriptors */ -+ if (ip + 4 > iend) -+ return ERROR(srcSize_wrong); /* minimum possible size */ -+ { -+ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); -+ symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); -+ symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); -+ ip++; -+ -+ /* Build DTables */ -+ { -+ size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, LLtype, MaxLL, LLFSELog, ip, iend - ip, -+ LL_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace)); -+ if (ZSTD_isError(llhSize)) -+ return ERROR(corruption_detected); -+ ip += llhSize; -+ } -+ { -+ size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, OFtype, MaxOff, OffFSELog, ip, iend - ip, -+ OF_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace)); -+ if (ZSTD_isError(ofhSize)) -+ return ERROR(corruption_detected); -+ ip += ofhSize; -+ } -+ { -+ size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, MLtype, MaxML, MLFSELog, ip, iend - ip, -+ ML_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace)); -+ if (ZSTD_isError(mlhSize)) -+ return ERROR(corruption_detected); -+ ip += mlhSize; -+ } -+ } -+ -+ return ip - istart; -+} -+ -+typedef struct { -+ size_t litLength; -+ size_t matchLength; -+ size_t offset; -+ const BYTE *match; -+} seq_t; -+ -+typedef struct { -+ BIT_DStream_t DStream; -+ FSE_DState_t stateLL; -+ FSE_DState_t stateOffb; -+ FSE_DState_t stateML; -+ size_t prevOffset[ZSTD_REP_NUM]; -+ const BYTE *base; -+ size_t pos; -+ uPtrDiff gotoDict; -+} seqState_t; -+ -+FORCE_NOINLINE -+size_t INIT ZSTD_execSequenceLast7(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base, -+ const BYTE *const vBase, const BYTE *const dictEnd) -+{ -+ BYTE *const oLitEnd = op + sequence.litLength; -+ size_t const sequenceLength = sequence.litLength + sequence.matchLength; -+ BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ -+ BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH; -+ const BYTE *const iLitEnd = *litPtr + sequence.litLength; -+ const BYTE *match = oLitEnd - sequence.offset; -+ -+ /* check */ -+ if (oMatchEnd > oend) -+ return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ -+ if (iLitEnd > litLimit) -+ return ERROR(corruption_detected); /* over-read beyond lit buffer */ -+ if (oLitEnd <= oend_w) -+ return ERROR(GENERIC); /* Precondition */ -+ -+ /* copy literals */ -+ if (op < oend_w) { -+ ZSTD_wildcopy(op, *litPtr, oend_w - op); -+ *litPtr += oend_w - op; -+ op = oend_w; -+ } -+ while (op < oLitEnd) -+ *op++ = *(*litPtr)++; -+ -+ /* copy Match */ -+ if (sequence.offset > (size_t)(oLitEnd - base)) { -+ /* offset beyond prefix */ -+ if (sequence.offset > (size_t)(oLitEnd - vBase)) -+ return ERROR(corruption_detected); -+ match = dictEnd - (base - match); -+ if (match + sequence.matchLength <= dictEnd) { -+ memmove(oLitEnd, match, sequence.matchLength); -+ return sequenceLength; -+ } -+ /* span extDict & currPrefixSegment */ -+ { -+ size_t const length1 = dictEnd - match; -+ memmove(oLitEnd, match, length1); -+ op = oLitEnd + length1; -+ sequence.matchLength -= length1; -+ match = base; -+ } -+ } -+ while (op < oMatchEnd) -+ *op++ = *match++; -+ return sequenceLength; -+} -+ -+static seq_t INIT ZSTD_decodeSequence(seqState_t *seqState) -+{ -+ seq_t seq; -+ -+ U32 const llCode = FSE_peekSymbol(&seqState->stateLL); -+ U32 const mlCode = FSE_peekSymbol(&seqState->stateML); -+ U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */ -+ -+ U32 const llBits = LL_bits[llCode]; -+ U32 const mlBits = ML_bits[mlCode]; -+ U32 const ofBits = ofCode; -+ U32 const totalBits = llBits + mlBits + ofBits; -+ -+ static const U32 LL_base[MaxLL + 1] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, -+ 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000}; -+ -+ static const U32 ML_base[MaxML + 1] = {3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, -+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 41, -+ 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, 0x1003, 0x2003, 0x4003, 0x8003, 0x10003}; -+ -+ static const U32 OF_base[MaxOff + 1] = {0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, 0xFD, 0x1FD, -+ 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, -+ 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD}; -+ -+ /* sequence */ -+ { -+ size_t offset; -+ if (!ofCode) -+ offset = 0; -+ else { -+ offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ -+ if (ZSTD_32bits()) -+ BIT_reloadDStream(&seqState->DStream); -+ } -+ -+ if (ofCode <= 1) { -+ offset += (llCode == 0); -+ if (offset) { -+ size_t temp = (offset == 3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; -+ temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ -+ if (offset != 1) -+ seqState->prevOffset[2] = seqState->prevOffset[1]; -+ seqState->prevOffset[1] = seqState->prevOffset[0]; -+ seqState->prevOffset[0] = offset = temp; -+ } else { -+ offset = seqState->prevOffset[0]; -+ } -+ } else { -+ seqState->prevOffset[2] = seqState->prevOffset[1]; -+ seqState->prevOffset[1] = seqState->prevOffset[0]; -+ seqState->prevOffset[0] = offset; -+ } -+ seq.offset = offset; -+ } -+ -+ seq.matchLength = ML_base[mlCode] + ((mlCode > 31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ -+ if (ZSTD_32bits() && (mlBits + llBits > 24)) -+ BIT_reloadDStream(&seqState->DStream); -+ -+ seq.litLength = LL_base[llCode] + ((llCode > 15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ -+ if (ZSTD_32bits() || (totalBits > 64 - 7 - (LLFSELog + MLFSELog + OffFSELog))) -+ BIT_reloadDStream(&seqState->DStream); -+ -+ /* ANS state update */ -+ FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ -+ FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ -+ if (ZSTD_32bits()) -+ BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ -+ FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ -+ -+ seq.match = NULL; -+ -+ return seq; -+} -+ -+FORCE_INLINE -+size_t ZSTD_execSequence(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base, -+ const BYTE *const vBase, const BYTE *const dictEnd) -+{ -+ BYTE *const oLitEnd = op + sequence.litLength; -+ size_t const sequenceLength = sequence.litLength + sequence.matchLength; -+ BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ -+ BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH; -+ const BYTE *const iLitEnd = *litPtr + sequence.litLength; -+ const BYTE *match = oLitEnd - sequence.offset; -+ -+ /* check */ -+ if (oMatchEnd > oend) -+ return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ -+ if (iLitEnd > litLimit) -+ return ERROR(corruption_detected); /* over-read beyond lit buffer */ -+ if (oLitEnd > oend_w) -+ return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd); -+ -+ /* copy Literals */ -+ ZSTD_copy8(op, *litPtr); -+ if (sequence.litLength > 8) -+ ZSTD_wildcopy(op + 8, (*litPtr) + 8, -+ sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ -+ op = oLitEnd; -+ *litPtr = iLitEnd; /* update for next sequence */ -+ -+ /* copy Match */ -+ if (sequence.offset > (size_t)(oLitEnd - base)) { -+ /* offset beyond prefix */ -+ if (sequence.offset > (size_t)(oLitEnd - vBase)) -+ return ERROR(corruption_detected); -+ match = dictEnd + (match - base); -+ if (match + sequence.matchLength <= dictEnd) { -+ memmove(oLitEnd, match, sequence.matchLength); -+ return sequenceLength; -+ } -+ /* span extDict & currPrefixSegment */ -+ { -+ size_t const length1 = dictEnd - match; -+ memmove(oLitEnd, match, length1); -+ op = oLitEnd + length1; -+ sequence.matchLength -= length1; -+ match = base; -+ if (op > oend_w || sequence.matchLength < MINMATCH) { -+ U32 i; -+ for (i = 0; i < sequence.matchLength; ++i) -+ op[i] = match[i]; -+ return sequenceLength; -+ } -+ } -+ } -+ /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */ -+ -+ /* match within prefix */ -+ if (sequence.offset < 8) { -+ /* close range match, overlap */ -+ static const U32 dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */ -+ static const int dec64table[] = {8, 8, 8, 7, 8, 9, 10, 11}; /* subtracted */ -+ int const sub2 = dec64table[sequence.offset]; -+ op[0] = match[0]; -+ op[1] = match[1]; -+ op[2] = match[2]; -+ op[3] = match[3]; -+ match += dec32table[sequence.offset]; -+ ZSTD_copy4(op + 4, match); -+ match -= sub2; -+ } else { -+ ZSTD_copy8(op, match); -+ } -+ op += 8; -+ match += 8; -+ -+ if (oMatchEnd > oend - (16 - MINMATCH)) { -+ if (op < oend_w) { -+ ZSTD_wildcopy(op, match, oend_w - op); -+ match += oend_w - op; -+ op = oend_w; -+ } -+ while (op < oMatchEnd) -+ *op++ = *match++; -+ } else { -+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8); /* works even if matchLength < 8 */ -+ } -+ return sequenceLength; -+} -+ -+static size_t INIT ZSTD_decompressSequences(ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, const void *seqStart, size_t seqSize) -+{ -+ const BYTE *ip = (const BYTE *)seqStart; -+ const BYTE *const iend = ip + seqSize; -+ BYTE *const ostart = (BYTE * const)dst; -+ BYTE *const oend = ostart + maxDstSize; -+ BYTE *op = ostart; -+ const BYTE *litPtr = dctx->litPtr; -+ const BYTE *const litEnd = litPtr + dctx->litSize; -+ const BYTE *const base = (const BYTE *)(dctx->base); -+ const BYTE *const vBase = (const BYTE *)(dctx->vBase); -+ const BYTE *const dictEnd = (const BYTE *)(dctx->dictEnd); -+ int nbSeq; -+ -+ /* Build Decoding Tables */ -+ { -+ size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize); -+ if (ZSTD_isError(seqHSize)) -+ return seqHSize; -+ ip += seqHSize; -+ } -+ -+ /* Regen sequences */ -+ if (nbSeq) { -+ seqState_t seqState; -+ dctx->fseEntropy = 1; -+ { -+ U32 i; -+ for (i = 0; i < ZSTD_REP_NUM; i++) -+ seqState.prevOffset[i] = dctx->entropy.rep[i]; -+ } -+ CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend - ip), corruption_detected); -+ FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); -+ FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); -+ FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); -+ -+ for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq;) { -+ nbSeq--; -+ { -+ seq_t const sequence = ZSTD_decodeSequence(&seqState); -+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd); -+ if (ZSTD_isError(oneSeqSize)) -+ return oneSeqSize; -+ op += oneSeqSize; -+ } -+ } -+ -+ /* check if reached exact end */ -+ if (nbSeq) -+ return ERROR(corruption_detected); -+ /* save reps for next block */ -+ { -+ U32 i; -+ for (i = 0; i < ZSTD_REP_NUM; i++) -+ dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); -+ } -+ } -+ -+ /* last literal segment */ -+ { -+ size_t const lastLLSize = litEnd - litPtr; -+ if (lastLLSize > (size_t)(oend - op)) -+ return ERROR(dstSize_tooSmall); -+ memcpy(op, litPtr, lastLLSize); -+ op += lastLLSize; -+ } -+ -+ return op - ostart; -+} -+ -+FORCE_INLINE seq_t INIT ZSTD_decodeSequenceLong_generic(seqState_t *seqState, int const longOffsets) -+{ -+ seq_t seq; -+ -+ U32 const llCode = FSE_peekSymbol(&seqState->stateLL); -+ U32 const mlCode = FSE_peekSymbol(&seqState->stateML); -+ U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */ -+ -+ U32 const llBits = LL_bits[llCode]; -+ U32 const mlBits = ML_bits[mlCode]; -+ U32 const ofBits = ofCode; -+ U32 const totalBits = llBits + mlBits + ofBits; -+ -+ static const U32 LL_base[MaxLL + 1] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, -+ 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000}; -+ -+ static const U32 ML_base[MaxML + 1] = {3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, -+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 41, -+ 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, 0x1003, 0x2003, 0x4003, 0x8003, 0x10003}; -+ -+ static const U32 OF_base[MaxOff + 1] = {0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, 0xFD, 0x1FD, -+ 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, -+ 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD}; -+ -+ /* sequence */ -+ { -+ size_t offset; -+ if (!ofCode) -+ offset = 0; -+ else { -+ if (longOffsets) { -+ int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN); -+ offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); -+ if (ZSTD_32bits() || extraBits) -+ BIT_reloadDStream(&seqState->DStream); -+ if (extraBits) -+ offset += BIT_readBitsFast(&seqState->DStream, extraBits); -+ } else { -+ offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ -+ if (ZSTD_32bits()) -+ BIT_reloadDStream(&seqState->DStream); -+ } -+ } -+ -+ if (ofCode <= 1) { -+ offset += (llCode == 0); -+ if (offset) { -+ size_t temp = (offset == 3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; -+ temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ -+ if (offset != 1) -+ seqState->prevOffset[2] = seqState->prevOffset[1]; -+ seqState->prevOffset[1] = seqState->prevOffset[0]; -+ seqState->prevOffset[0] = offset = temp; -+ } else { -+ offset = seqState->prevOffset[0]; -+ } -+ } else { -+ seqState->prevOffset[2] = seqState->prevOffset[1]; -+ seqState->prevOffset[1] = seqState->prevOffset[0]; -+ seqState->prevOffset[0] = offset; -+ } -+ seq.offset = offset; -+ } -+ -+ seq.matchLength = ML_base[mlCode] + ((mlCode > 31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ -+ if (ZSTD_32bits() && (mlBits + llBits > 24)) -+ BIT_reloadDStream(&seqState->DStream); -+ -+ seq.litLength = LL_base[llCode] + ((llCode > 15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ -+ if (ZSTD_32bits() || (totalBits > 64 - 7 - (LLFSELog + MLFSELog + OffFSELog))) -+ BIT_reloadDStream(&seqState->DStream); -+ -+ { -+ size_t const pos = seqState->pos + seq.litLength; -+ seq.match = seqState->base + pos - seq.offset; /* single memory segment */ -+ if (seq.offset > pos) -+ seq.match += seqState->gotoDict; /* separate memory segment */ -+ seqState->pos = pos + seq.matchLength; -+ } -+ -+ /* ANS state update */ -+ FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ -+ FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ -+ if (ZSTD_32bits()) -+ BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ -+ FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ -+ -+ return seq; -+} -+ -+static seq_t INIT ZSTD_decodeSequenceLong(seqState_t *seqState, unsigned const windowSize) -+{ -+ if (ZSTD_highbit32(windowSize) > STREAM_ACCUMULATOR_MIN) { -+ return ZSTD_decodeSequenceLong_generic(seqState, 1); -+ } else { -+ return ZSTD_decodeSequenceLong_generic(seqState, 0); -+ } -+} -+ -+FORCE_INLINE -+size_t ZSTD_execSequenceLong(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base, -+ const BYTE *const vBase, const BYTE *const dictEnd) -+{ -+ BYTE *const oLitEnd = op + sequence.litLength; -+ size_t const sequenceLength = sequence.litLength + sequence.matchLength; -+ BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ -+ BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH; -+ const BYTE *const iLitEnd = *litPtr + sequence.litLength; -+ const BYTE *match = sequence.match; -+ -+ /* check */ -+ if (oMatchEnd > oend) -+ return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ -+ if (iLitEnd > litLimit) -+ return ERROR(corruption_detected); /* over-read beyond lit buffer */ -+ if (oLitEnd > oend_w) -+ return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd); -+ -+ /* copy Literals */ -+ ZSTD_copy8(op, *litPtr); -+ if (sequence.litLength > 8) -+ ZSTD_wildcopy(op + 8, (*litPtr) + 8, -+ sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ -+ op = oLitEnd; -+ *litPtr = iLitEnd; /* update for next sequence */ -+ -+ /* copy Match */ -+ if (sequence.offset > (size_t)(oLitEnd - base)) { -+ /* offset beyond prefix */ -+ if (sequence.offset > (size_t)(oLitEnd - vBase)) -+ return ERROR(corruption_detected); -+ if (match + sequence.matchLength <= dictEnd) { -+ memmove(oLitEnd, match, sequence.matchLength); -+ return sequenceLength; -+ } -+ /* span extDict & currPrefixSegment */ -+ { -+ size_t const length1 = dictEnd - match; -+ memmove(oLitEnd, match, length1); -+ op = oLitEnd + length1; -+ sequence.matchLength -= length1; -+ match = base; -+ if (op > oend_w || sequence.matchLength < MINMATCH) { -+ U32 i; -+ for (i = 0; i < sequence.matchLength; ++i) -+ op[i] = match[i]; -+ return sequenceLength; -+ } -+ } -+ } -+ /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */ -+ -+ /* match within prefix */ -+ if (sequence.offset < 8) { -+ /* close range match, overlap */ -+ static const U32 dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */ -+ static const int dec64table[] = {8, 8, 8, 7, 8, 9, 10, 11}; /* subtracted */ -+ int const sub2 = dec64table[sequence.offset]; -+ op[0] = match[0]; -+ op[1] = match[1]; -+ op[2] = match[2]; -+ op[3] = match[3]; -+ match += dec32table[sequence.offset]; -+ ZSTD_copy4(op + 4, match); -+ match -= sub2; -+ } else { -+ ZSTD_copy8(op, match); -+ } -+ op += 8; -+ match += 8; -+ -+ if (oMatchEnd > oend - (16 - MINMATCH)) { -+ if (op < oend_w) { -+ ZSTD_wildcopy(op, match, oend_w - op); -+ match += oend_w - op; -+ op = oend_w; -+ } -+ while (op < oMatchEnd) -+ *op++ = *match++; -+ } else { -+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8); /* works even if matchLength < 8 */ -+ } -+ return sequenceLength; -+} -+ -+static size_t INIT ZSTD_decompressSequencesLong(ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, const void *seqStart, size_t seqSize) -+{ -+ const BYTE *ip = (const BYTE *)seqStart; -+ const BYTE *const iend = ip + seqSize; -+ BYTE *const ostart = (BYTE * const)dst; -+ BYTE *const oend = ostart + maxDstSize; -+ BYTE *op = ostart; -+ const BYTE *litPtr = dctx->litPtr; -+ const BYTE *const litEnd = litPtr + dctx->litSize; -+ const BYTE *const base = (const BYTE *)(dctx->base); -+ const BYTE *const vBase = (const BYTE *)(dctx->vBase); -+ const BYTE *const dictEnd = (const BYTE *)(dctx->dictEnd); -+ unsigned const windowSize = dctx->fParams.windowSize; -+ int nbSeq; -+ -+ /* Build Decoding Tables */ -+ { -+ size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize); -+ if (ZSTD_isError(seqHSize)) -+ return seqHSize; -+ ip += seqHSize; -+ } -+ -+ /* Regen sequences */ -+ if (nbSeq) { -+#define STORED_SEQS 4 -+#define STOSEQ_MASK (STORED_SEQS - 1) -+#define ADVANCED_SEQS 4 -+ seq_t *sequences = (seq_t *)dctx->entropy.workspace; -+ int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); -+ seqState_t seqState; -+ int seqNb; -+ ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.workspace) >= sizeof(seq_t) * STORED_SEQS); -+ dctx->fseEntropy = 1; -+ { -+ U32 i; -+ for (i = 0; i < ZSTD_REP_NUM; i++) -+ seqState.prevOffset[i] = dctx->entropy.rep[i]; -+ } -+ seqState.base = base; -+ seqState.pos = (size_t)(op - base); -+ seqState.gotoDict = (uPtrDiff)dictEnd - (uPtrDiff)base; /* cast to avoid undefined behaviour */ -+ CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend - ip), corruption_detected); -+ FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); -+ FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); -+ FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); -+ -+ /* prepare in advance */ -+ for (seqNb = 0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb < seqAdvance; seqNb++) { -+ sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, windowSize); -+ } -+ if (seqNb < seqAdvance) -+ return ERROR(corruption_detected); -+ -+ /* decode and decompress */ -+ for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb < nbSeq; seqNb++) { -+ seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, windowSize); -+ size_t const oneSeqSize = -+ ZSTD_execSequenceLong(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd); -+ if (ZSTD_isError(oneSeqSize)) -+ return oneSeqSize; -+ ZSTD_PREFETCH(sequence.match); -+ sequences[seqNb & STOSEQ_MASK] = sequence; -+ op += oneSeqSize; -+ } -+ if (seqNb < nbSeq) -+ return ERROR(corruption_detected); -+ -+ /* finish queue */ -+ seqNb -= seqAdvance; -+ for (; seqNb < nbSeq; seqNb++) { -+ size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd); -+ if (ZSTD_isError(oneSeqSize)) -+ return oneSeqSize; -+ op += oneSeqSize; -+ } -+ -+ /* save reps for next block */ -+ { -+ U32 i; -+ for (i = 0; i < ZSTD_REP_NUM; i++) -+ dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); -+ } -+ } -+ -+ /* last literal segment */ -+ { -+ size_t const lastLLSize = litEnd - litPtr; -+ if (lastLLSize > (size_t)(oend - op)) -+ return ERROR(dstSize_tooSmall); -+ memcpy(op, litPtr, lastLLSize); -+ op += lastLLSize; -+ } -+ -+ return op - ostart; -+} -+ -+static size_t INIT ZSTD_decompressBlock_internal(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) -+{ /* blockType == blockCompressed */ -+ const BYTE *ip = (const BYTE *)src; -+ -+ if (srcSize >= ZSTD_BLOCKSIZE_ABSOLUTEMAX) -+ return ERROR(srcSize_wrong); -+ -+ /* Decode literals section */ -+ { -+ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); -+ if (ZSTD_isError(litCSize)) -+ return litCSize; -+ ip += litCSize; -+ srcSize -= litCSize; -+ } -+ if (sizeof(size_t) > 4) /* do not enable prefetching on 32-bits x86, as it's performance detrimental */ -+ /* likely because of register pressure */ -+ /* if that's the correct cause, then 32-bits ARM should be affected differently */ -+ /* it would be good to test this on ARM real hardware, to see if prefetch version improves speed */ -+ if (dctx->fParams.windowSize > (1 << 23)) -+ return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize); -+ return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize); -+} -+ -+static void INIT ZSTD_checkContinuity(ZSTD_DCtx *dctx, const void *dst) -+{ -+ if (dst != dctx->previousDstEnd) { /* not contiguous */ -+ dctx->dictEnd = dctx->previousDstEnd; -+ dctx->vBase = (const char *)dst - ((const char *)(dctx->previousDstEnd) - (const char *)(dctx->base)); -+ dctx->base = dst; -+ dctx->previousDstEnd = dst; -+ } -+} -+ -+size_t INIT ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) -+{ -+ size_t dSize; -+ ZSTD_checkContinuity(dctx, dst); -+ dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); -+ dctx->previousDstEnd = (char *)dst + dSize; -+ return dSize; -+} -+ -+/** ZSTD_insertBlock() : -+ insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ -+size_t INIT ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart, size_t blockSize) -+{ -+ ZSTD_checkContinuity(dctx, blockStart); -+ dctx->previousDstEnd = (const char *)blockStart + blockSize; -+ return blockSize; -+} -+ -+size_t INIT ZSTD_generateNxBytes(void *dst, size_t dstCapacity, BYTE byte, size_t length) -+{ -+ if (length > dstCapacity) -+ return ERROR(dstSize_tooSmall); -+ memset(dst, byte, length); -+ return length; -+} -+ -+/** ZSTD_findFrameCompressedSize() : -+ * compatible with legacy mode -+ * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame -+ * `srcSize` must be at least as large as the frame contained -+ * @return : the compressed size of the frame starting at `src` */ -+size_t INIT ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) -+{ -+ if (srcSize >= ZSTD_skippableHeaderSize && (ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { -+ return ZSTD_skippableHeaderSize + ZSTD_readLE32((const BYTE *)src + 4); -+ } else { -+ const BYTE *ip = (const BYTE *)src; -+ const BYTE *const ipstart = ip; -+ size_t remainingSize = srcSize; -+ ZSTD_frameParams fParams; -+ -+ size_t const headerSize = ZSTD_frameHeaderSize(ip, remainingSize); -+ if (ZSTD_isError(headerSize)) -+ return headerSize; -+ -+ /* Frame Header */ -+ { -+ size_t const ret = ZSTD_getFrameParams(&fParams, ip, remainingSize); -+ if (ZSTD_isError(ret)) -+ return ret; -+ if (ret > 0) -+ return ERROR(srcSize_wrong); -+ } -+ -+ ip += headerSize; -+ remainingSize -= headerSize; -+ -+ /* Loop on each block */ -+ while (1) { -+ blockProperties_t blockProperties; -+ size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); -+ if (ZSTD_isError(cBlockSize)) -+ return cBlockSize; -+ -+ if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) -+ return ERROR(srcSize_wrong); -+ -+ ip += ZSTD_blockHeaderSize + cBlockSize; -+ remainingSize -= ZSTD_blockHeaderSize + cBlockSize; -+ -+ if (blockProperties.lastBlock) -+ break; -+ } -+ -+ if (fParams.checksumFlag) { /* Frame content checksum */ -+ if (remainingSize < 4) -+ return ERROR(srcSize_wrong); -+ ip += 4; -+ remainingSize -= 4; -+ } -+ -+ return ip - ipstart; -+ } -+} -+ -+/*! ZSTD_decompressFrame() : -+* @dctx must be properly initialized */ -+static size_t INIT ZSTD_decompressFrame(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void **srcPtr, size_t *srcSizePtr) -+{ -+ const BYTE *ip = (const BYTE *)(*srcPtr); -+ BYTE *const ostart = (BYTE * const)dst; -+ BYTE *const oend = ostart + dstCapacity; -+ BYTE *op = ostart; -+ size_t remainingSize = *srcSizePtr; -+ -+ /* check */ -+ if (remainingSize < ZSTD_frameHeaderSize_min + ZSTD_blockHeaderSize) -+ return ERROR(srcSize_wrong); -+ -+ /* Frame Header */ -+ { -+ size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_frameHeaderSize_prefix); -+ if (ZSTD_isError(frameHeaderSize)) -+ return frameHeaderSize; -+ if (remainingSize < frameHeaderSize + ZSTD_blockHeaderSize) -+ return ERROR(srcSize_wrong); -+ CHECK_F(ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize)); -+ ip += frameHeaderSize; -+ remainingSize -= frameHeaderSize; -+ } -+ -+ /* Loop on each block */ -+ while (1) { -+ size_t decodedSize; -+ blockProperties_t blockProperties; -+ size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); -+ if (ZSTD_isError(cBlockSize)) -+ return cBlockSize; -+ -+ ip += ZSTD_blockHeaderSize; -+ remainingSize -= ZSTD_blockHeaderSize; -+ if (cBlockSize > remainingSize) -+ return ERROR(srcSize_wrong); -+ -+ switch (blockProperties.blockType) { -+ case bt_compressed: decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend - op, ip, cBlockSize); break; -+ case bt_raw: decodedSize = ZSTD_copyRawBlock(op, oend - op, ip, cBlockSize); break; -+ case bt_rle: decodedSize = ZSTD_generateNxBytes(op, oend - op, *ip, blockProperties.origSize); break; -+ case bt_reserved: -+ default: return ERROR(corruption_detected); -+ } -+ -+ if (ZSTD_isError(decodedSize)) -+ return decodedSize; -+ if (dctx->fParams.checksumFlag) -+ xxh64_update(&dctx->xxhState, op, decodedSize); -+ op += decodedSize; -+ ip += cBlockSize; -+ remainingSize -= cBlockSize; -+ if (blockProperties.lastBlock) -+ break; -+ } -+ -+ if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ -+ U32 const checkCalc = (U32)xxh64_digest(&dctx->xxhState); -+ U32 checkRead; -+ if (remainingSize < 4) -+ return ERROR(checksum_wrong); -+ checkRead = ZSTD_readLE32(ip); -+ if (checkRead != checkCalc) -+ return ERROR(checksum_wrong); -+ ip += 4; -+ remainingSize -= 4; -+ } -+ -+ /* Allow caller to get size read */ -+ *srcPtr = ip; -+ *srcSizePtr = remainingSize; -+ return op - ostart; -+} -+ -+static const void INIT *ZSTD_DDictDictContent(const ZSTD_DDict *ddict); -+static size_t INIT ZSTD_DDictDictSize(const ZSTD_DDict *ddict); -+ -+static size_t INIT ZSTD_decompressMultiFrame(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize, -+ const ZSTD_DDict *ddict) -+{ -+ void *const dststart = dst; -+ -+ if (ddict) { -+ if (dict) { -+ /* programmer error, these two cases should be mutually exclusive */ -+ return ERROR(GENERIC); -+ } -+ -+ dict = ZSTD_DDictDictContent(ddict); -+ dictSize = ZSTD_DDictDictSize(ddict); -+ } -+ -+ while (srcSize >= ZSTD_frameHeaderSize_prefix) { -+ U32 magicNumber; -+ -+ magicNumber = ZSTD_readLE32(src); -+ if (magicNumber != ZSTD_MAGICNUMBER) { -+ if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { -+ size_t skippableSize; -+ if (srcSize < ZSTD_skippableHeaderSize) -+ return ERROR(srcSize_wrong); -+ skippableSize = ZSTD_readLE32((const BYTE *)src + 4) + ZSTD_skippableHeaderSize; -+ if (srcSize < skippableSize) { -+ return ERROR(srcSize_wrong); -+ } -+ -+ src = (const BYTE *)src + skippableSize; -+ srcSize -= skippableSize; -+ continue; -+ } else { -+ return ERROR(prefix_unknown); -+ } -+ } -+ -+ if (ddict) { -+ /* we were called from ZSTD_decompress_usingDDict */ -+ ZSTD_refDDict(dctx, ddict); -+ } else { -+ /* this will initialize correctly with no dict if dict == NULL, so -+ * use this in all cases but ddict */ -+ CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize)); -+ } -+ ZSTD_checkContinuity(dctx, dst); -+ -+ { -+ const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, &src, &srcSize); -+ if (ZSTD_isError(res)) -+ return res; -+ /* don't need to bounds check this, ZSTD_decompressFrame will have -+ * already */ -+ dst = (BYTE *)dst + res; -+ dstCapacity -= res; -+ } -+ } -+ -+ if (srcSize) -+ return ERROR(srcSize_wrong); /* input not entirely consumed */ -+ -+ return (BYTE *)dst - (BYTE *)dststart; -+} -+ -+size_t INIT ZSTD_decompress_usingDict(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize) -+{ -+ return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL); -+} -+ -+size_t INIT ZSTD_decompressDCtx(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) -+{ -+ return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0); -+} -+ -+/*-************************************** -+* Advanced Streaming Decompression API -+* Bufferless and synchronous -+****************************************/ -+size_t INIT ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx) { return dctx->expected; } -+ -+ZSTD_nextInputType_e INIT ZSTD_nextInputType(ZSTD_DCtx *dctx) -+{ -+ switch (dctx->stage) { -+ default: /* should not happen */ -+ case ZSTDds_getFrameHeaderSize: -+ case ZSTDds_decodeFrameHeader: return ZSTDnit_frameHeader; -+ case ZSTDds_decodeBlockHeader: return ZSTDnit_blockHeader; -+ case ZSTDds_decompressBlock: return ZSTDnit_block; -+ case ZSTDds_decompressLastBlock: return ZSTDnit_lastBlock; -+ case ZSTDds_checkChecksum: return ZSTDnit_checksum; -+ case ZSTDds_decodeSkippableHeader: -+ case ZSTDds_skipFrame: return ZSTDnit_skippableFrame; -+ } -+} -+ -+int INIT ZSTD_isSkipFrame(ZSTD_DCtx *dctx) { return dctx->stage == ZSTDds_skipFrame; } /* for zbuff */ -+ -+/** ZSTD_decompressContinue() : -+* @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) -+* or an error code, which can be tested using ZSTD_isError() */ -+size_t INIT ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) -+{ -+ /* Sanity check */ -+ if (srcSize != dctx->expected) -+ return ERROR(srcSize_wrong); -+ if (dstCapacity) -+ ZSTD_checkContinuity(dctx, dst); -+ -+ switch (dctx->stage) { -+ case ZSTDds_getFrameHeaderSize: -+ if (srcSize != ZSTD_frameHeaderSize_prefix) -+ return ERROR(srcSize_wrong); /* impossible */ -+ if ((ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ -+ memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix); -+ dctx->expected = ZSTD_skippableHeaderSize - ZSTD_frameHeaderSize_prefix; /* magic number + skippable frame length */ -+ dctx->stage = ZSTDds_decodeSkippableHeader; -+ return 0; -+ } -+ dctx->headerSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_prefix); -+ if (ZSTD_isError(dctx->headerSize)) -+ return dctx->headerSize; -+ memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix); -+ if (dctx->headerSize > ZSTD_frameHeaderSize_prefix) { -+ dctx->expected = dctx->headerSize - ZSTD_frameHeaderSize_prefix; -+ dctx->stage = ZSTDds_decodeFrameHeader; -+ return 0; -+ } -+ dctx->expected = 0; /* not necessary to copy more */ -+ /* fall through */ -+ -+ case ZSTDds_decodeFrameHeader: -+ memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected); -+ CHECK_F(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize)); -+ dctx->expected = ZSTD_blockHeaderSize; -+ dctx->stage = ZSTDds_decodeBlockHeader; -+ return 0; -+ -+ case ZSTDds_decodeBlockHeader: { -+ blockProperties_t bp; -+ size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); -+ if (ZSTD_isError(cBlockSize)) -+ return cBlockSize; -+ dctx->expected = cBlockSize; -+ dctx->bType = bp.blockType; -+ dctx->rleSize = bp.origSize; -+ if (cBlockSize) { -+ dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock; -+ return 0; -+ } -+ /* empty block */ -+ if (bp.lastBlock) { -+ if (dctx->fParams.checksumFlag) { -+ dctx->expected = 4; -+ dctx->stage = ZSTDds_checkChecksum; -+ } else { -+ dctx->expected = 0; /* end of frame */ -+ dctx->stage = ZSTDds_getFrameHeaderSize; -+ } -+ } else { -+ dctx->expected = 3; /* go directly to next header */ -+ dctx->stage = ZSTDds_decodeBlockHeader; -+ } -+ return 0; -+ } -+ case ZSTDds_decompressLastBlock: -+ case ZSTDds_decompressBlock: { -+ size_t rSize; -+ switch (dctx->bType) { -+ case bt_compressed: rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); break; -+ case bt_raw: rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); break; -+ case bt_rle: rSize = ZSTD_setRleBlock(dst, dstCapacity, src, srcSize, dctx->rleSize); break; -+ case bt_reserved: /* should never happen */ -+ default: return ERROR(corruption_detected); -+ } -+ if (ZSTD_isError(rSize)) -+ return rSize; -+ if (dctx->fParams.checksumFlag) -+ xxh64_update(&dctx->xxhState, dst, rSize); -+ -+ if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ -+ if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ -+ dctx->expected = 4; -+ dctx->stage = ZSTDds_checkChecksum; -+ } else { -+ dctx->expected = 0; /* ends here */ -+ dctx->stage = ZSTDds_getFrameHeaderSize; -+ } -+ } else { -+ dctx->stage = ZSTDds_decodeBlockHeader; -+ dctx->expected = ZSTD_blockHeaderSize; -+ dctx->previousDstEnd = (char *)dst + rSize; -+ } -+ return rSize; -+ } -+ case ZSTDds_checkChecksum: { -+ U32 const h32 = (U32)xxh64_digest(&dctx->xxhState); -+ U32 const check32 = ZSTD_readLE32(src); /* srcSize == 4, guaranteed by dctx->expected */ -+ if (check32 != h32) -+ return ERROR(checksum_wrong); -+ dctx->expected = 0; -+ dctx->stage = ZSTDds_getFrameHeaderSize; -+ return 0; -+ } -+ case ZSTDds_decodeSkippableHeader: { -+ memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected); -+ dctx->expected = ZSTD_readLE32(dctx->headerBuffer + 4); -+ dctx->stage = ZSTDds_skipFrame; -+ return 0; -+ } -+ case ZSTDds_skipFrame: { -+ dctx->expected = 0; -+ dctx->stage = ZSTDds_getFrameHeaderSize; -+ return 0; -+ } -+ default: -+ return ERROR(GENERIC); /* impossible */ -+ } -+} -+ -+static size_t INIT ZSTD_refDictContent(ZSTD_DCtx *dctx, const void *dict, size_t dictSize) -+{ -+ dctx->dictEnd = dctx->previousDstEnd; -+ dctx->vBase = (const char *)dict - ((const char *)(dctx->previousDstEnd) - (const char *)(dctx->base)); -+ dctx->base = dict; -+ dctx->previousDstEnd = (const char *)dict + dictSize; -+ return 0; -+} -+ -+/* ZSTD_loadEntropy() : -+ * dict : must point at beginning of a valid zstd dictionary -+ * @return : size of entropy tables read */ -+static size_t INIT ZSTD_loadEntropy(ZSTD_entropyTables_t *entropy, const void *const dict, size_t const dictSize) -+{ -+ const BYTE *dictPtr = (const BYTE *)dict; -+ const BYTE *const dictEnd = dictPtr + dictSize; -+ -+ if (dictSize <= 8) -+ return ERROR(dictionary_corrupted); -+ dictPtr += 8; /* skip header = magic + dictID */ -+ -+ { -+ size_t const hSize = HUF_readDTableX4_wksp(entropy->hufTable, dictPtr, dictEnd - dictPtr, entropy->workspace, sizeof(entropy->workspace)); -+ if (HUF_isError(hSize)) -+ return ERROR(dictionary_corrupted); -+ dictPtr += hSize; -+ } -+ -+ { -+ short offcodeNCount[MaxOff + 1]; -+ U32 offcodeMaxValue = MaxOff, offcodeLog; -+ size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd - dictPtr); -+ if (FSE_isError(offcodeHeaderSize)) -+ return ERROR(dictionary_corrupted); -+ if (offcodeLog > OffFSELog) -+ return ERROR(dictionary_corrupted); -+ CHECK_E(FSE_buildDTable_wksp(entropy->OFTable, offcodeNCount, offcodeMaxValue, offcodeLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted); -+ dictPtr += offcodeHeaderSize; -+ } -+ -+ { -+ short matchlengthNCount[MaxML + 1]; -+ unsigned matchlengthMaxValue = MaxML, matchlengthLog; -+ size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd - dictPtr); -+ if (FSE_isError(matchlengthHeaderSize)) -+ return ERROR(dictionary_corrupted); -+ if (matchlengthLog > MLFSELog) -+ return ERROR(dictionary_corrupted); -+ CHECK_E(FSE_buildDTable_wksp(entropy->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted); -+ dictPtr += matchlengthHeaderSize; -+ } -+ -+ { -+ short litlengthNCount[MaxLL + 1]; -+ unsigned litlengthMaxValue = MaxLL, litlengthLog; -+ size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd - dictPtr); -+ if (FSE_isError(litlengthHeaderSize)) -+ return ERROR(dictionary_corrupted); -+ if (litlengthLog > LLFSELog) -+ return ERROR(dictionary_corrupted); -+ CHECK_E(FSE_buildDTable_wksp(entropy->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted); -+ dictPtr += litlengthHeaderSize; -+ } -+ -+ if (dictPtr + 12 > dictEnd) -+ return ERROR(dictionary_corrupted); -+ { -+ int i; -+ size_t const dictContentSize = (size_t)(dictEnd - (dictPtr + 12)); -+ for (i = 0; i < 3; i++) { -+ U32 const rep = ZSTD_readLE32(dictPtr); -+ dictPtr += 4; -+ if (rep == 0 || rep >= dictContentSize) -+ return ERROR(dictionary_corrupted); -+ entropy->rep[i] = rep; -+ } -+ } -+ -+ return dictPtr - (const BYTE *)dict; -+} -+ -+static size_t INIT ZSTD_decompress_insertDictionary(ZSTD_DCtx *dctx, const void *dict, size_t dictSize) -+{ -+ if (dictSize < 8) -+ return ZSTD_refDictContent(dctx, dict, dictSize); -+ { -+ U32 const magic = ZSTD_readLE32(dict); -+ if (magic != ZSTD_DICT_MAGIC) { -+ return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ -+ } -+ } -+ dctx->dictID = ZSTD_readLE32((const char *)dict + 4); -+ -+ /* load entropy tables */ -+ { -+ size_t const eSize = ZSTD_loadEntropy(&dctx->entropy, dict, dictSize); -+ if (ZSTD_isError(eSize)) -+ return ERROR(dictionary_corrupted); -+ dict = (const char *)dict + eSize; -+ dictSize -= eSize; -+ } -+ dctx->litEntropy = dctx->fseEntropy = 1; -+ -+ /* reference dictionary content */ -+ return ZSTD_refDictContent(dctx, dict, dictSize); -+} -+ -+size_t INIT ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict, size_t dictSize) -+{ -+ CHECK_F(ZSTD_decompressBegin(dctx)); -+ if (dict && dictSize) -+ CHECK_E(ZSTD_decompress_insertDictionary(dctx, dict, dictSize), dictionary_corrupted); -+ return 0; -+} -+ -+/* ====== ZSTD_DDict ====== */ -+ -+struct ZSTD_DDict_s { -+ void *dictBuffer; -+ const void *dictContent; -+ size_t dictSize; -+ ZSTD_entropyTables_t entropy; -+ U32 dictID; -+ U32 entropyPresent; -+ ZSTD_customMem cMem; -+}; /* typedef'd to ZSTD_DDict within "zstd.h" */ -+ -+size_t INIT ZSTD_DDictWorkspaceBound(void) { return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_DDict)); } -+ -+static const void INIT *ZSTD_DDictDictContent(const ZSTD_DDict *ddict) { return ddict->dictContent; } -+ -+static size_t INIT ZSTD_DDictDictSize(const ZSTD_DDict *ddict) { return ddict->dictSize; } -+ -+static void INIT ZSTD_refDDict(ZSTD_DCtx *dstDCtx, const ZSTD_DDict *ddict) -+{ -+ ZSTD_decompressBegin(dstDCtx); /* init */ -+ if (ddict) { /* support refDDict on NULL */ -+ dstDCtx->dictID = ddict->dictID; -+ dstDCtx->base = ddict->dictContent; -+ dstDCtx->vBase = ddict->dictContent; -+ dstDCtx->dictEnd = (const BYTE *)ddict->dictContent + ddict->dictSize; -+ dstDCtx->previousDstEnd = dstDCtx->dictEnd; -+ if (ddict->entropyPresent) { -+ dstDCtx->litEntropy = 1; -+ dstDCtx->fseEntropy = 1; -+ dstDCtx->LLTptr = ddict->entropy.LLTable; -+ dstDCtx->MLTptr = ddict->entropy.MLTable; -+ dstDCtx->OFTptr = ddict->entropy.OFTable; -+ dstDCtx->HUFptr = ddict->entropy.hufTable; -+ dstDCtx->entropy.rep[0] = ddict->entropy.rep[0]; -+ dstDCtx->entropy.rep[1] = ddict->entropy.rep[1]; -+ dstDCtx->entropy.rep[2] = ddict->entropy.rep[2]; -+ } else { -+ dstDCtx->litEntropy = 0; -+ dstDCtx->fseEntropy = 0; -+ } -+ } -+} -+ -+static size_t INIT ZSTD_loadEntropy_inDDict(ZSTD_DDict *ddict) -+{ -+ ddict->dictID = 0; -+ ddict->entropyPresent = 0; -+ if (ddict->dictSize < 8) -+ return 0; -+ { -+ U32 const magic = ZSTD_readLE32(ddict->dictContent); -+ if (magic != ZSTD_DICT_MAGIC) -+ return 0; /* pure content mode */ -+ } -+ ddict->dictID = ZSTD_readLE32((const char *)ddict->dictContent + 4); -+ -+ /* load entropy tables */ -+ CHECK_E(ZSTD_loadEntropy(&ddict->entropy, ddict->dictContent, ddict->dictSize), dictionary_corrupted); -+ ddict->entropyPresent = 1; -+ return 0; -+} -+ -+static ZSTD_DDict INIT *ZSTD_createDDict_advanced(const void *dict, size_t dictSize, unsigned byReference, ZSTD_customMem customMem) -+{ -+ if (!customMem.customAlloc || !customMem.customFree) -+ return NULL; -+ -+ { -+ ZSTD_DDict *const ddict = (ZSTD_DDict *)ZSTD_malloc(sizeof(ZSTD_DDict), customMem); -+ if (!ddict) -+ return NULL; -+ ddict->cMem = customMem; -+ -+ if ((byReference) || (!dict) || (!dictSize)) { -+ ddict->dictBuffer = NULL; -+ ddict->dictContent = dict; -+ } else { -+ void *const internalBuffer = ZSTD_malloc(dictSize, customMem); -+ if (!internalBuffer) { -+ ZSTD_freeDDict(ddict); -+ return NULL; -+ } -+ memcpy(internalBuffer, dict, dictSize); -+ ddict->dictBuffer = internalBuffer; -+ ddict->dictContent = internalBuffer; -+ } -+ ddict->dictSize = dictSize; -+ ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ -+ /* parse dictionary content */ -+ { -+ size_t const errorCode = ZSTD_loadEntropy_inDDict(ddict); -+ if (ZSTD_isError(errorCode)) { -+ ZSTD_freeDDict(ddict); -+ return NULL; -+ } -+ } -+ -+ return ddict; -+ } -+} -+ -+/*! ZSTD_initDDict() : -+* Create a digested dictionary, to start decompression without startup delay. -+* `dict` content is copied inside DDict. -+* Consequently, `dict` can be released after `ZSTD_DDict` creation */ -+ZSTD_DDict INIT *ZSTD_initDDict(const void *dict, size_t dictSize, void *workspace, size_t workspaceSize) -+{ -+ ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize); -+ return ZSTD_createDDict_advanced(dict, dictSize, 1, stackMem); -+} -+ -+size_t INIT ZSTD_freeDDict(ZSTD_DDict *ddict) -+{ -+ if (ddict == NULL) -+ return 0; /* support free on NULL */ -+ { -+ ZSTD_customMem const cMem = ddict->cMem; -+ ZSTD_free(ddict->dictBuffer, cMem); -+ ZSTD_free(ddict, cMem); -+ return 0; -+ } -+} -+ -+/*! ZSTD_getDictID_fromDict() : -+ * Provides the dictID stored within dictionary. -+ * if @return == 0, the dictionary is not conformant with Zstandard specification. -+ * It can still be loaded, but as a content-only dictionary. */ -+unsigned INIT ZSTD_getDictID_fromDict(const void *dict, size_t dictSize) -+{ -+ if (dictSize < 8) -+ return 0; -+ if (ZSTD_readLE32(dict) != ZSTD_DICT_MAGIC) -+ return 0; -+ return ZSTD_readLE32((const char *)dict + 4); -+} -+ -+/*! ZSTD_getDictID_fromDDict() : -+ * Provides the dictID of the dictionary loaded into `ddict`. -+ * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. -+ * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ -+unsigned INIT ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict) -+{ -+ if (ddict == NULL) -+ return 0; -+ return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); -+} -+ -+/*! ZSTD_getDictID_fromFrame() : -+ * Provides the dictID required to decompressed the frame stored within `src`. -+ * If @return == 0, the dictID could not be decoded. -+ * This could for one of the following reasons : -+ * - The frame does not require a dictionary to be decoded (most common case). -+ * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. -+ * Note : this use case also happens when using a non-conformant dictionary. -+ * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). -+ * - This is not a Zstandard frame. -+ * When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */ -+unsigned INIT ZSTD_getDictID_fromFrame(const void *src, size_t srcSize) -+{ -+ ZSTD_frameParams zfp = {0, 0, 0, 0}; -+ size_t const hError = ZSTD_getFrameParams(&zfp, src, srcSize); -+ if (ZSTD_isError(hError)) -+ return 0; -+ return zfp.dictID; -+} -+ -+/*! ZSTD_decompress_usingDDict() : -+* Decompression using a pre-digested Dictionary -+* Use dictionary without significant overhead. */ -+size_t INIT ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const ZSTD_DDict *ddict) -+{ -+ /* pass content and size in case legacy frames are encountered */ -+ return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, NULL, 0, ddict); -+} -+ -+/*===================================== -+* Streaming decompression -+*====================================*/ -+ -+typedef enum { zdss_init, zdss_loadHeader, zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; -+ -+/* *** Resource management *** */ -+struct ZSTD_DStream_s { -+ ZSTD_DCtx *dctx; -+ ZSTD_DDict *ddictLocal; -+ const ZSTD_DDict *ddict; -+ ZSTD_frameParams fParams; -+ ZSTD_dStreamStage stage; -+ char *inBuff; -+ size_t inBuffSize; -+ size_t inPos; -+ size_t maxWindowSize; -+ char *outBuff; -+ size_t outBuffSize; -+ size_t outStart; -+ size_t outEnd; -+ size_t blockSize; -+ BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; /* tmp buffer to store frame header */ -+ size_t lhSize; -+ ZSTD_customMem customMem; -+ void *legacyContext; -+ U32 previousLegacyVersion; -+ U32 legacyVersion; -+ U32 hostageByte; -+}; /* typedef'd to ZSTD_DStream within "zstd.h" */ -+ -+size_t INIT ZSTD_DStreamWorkspaceBound(size_t maxWindowSize) -+{ -+ size_t const blockSize = MIN(maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); -+ size_t const inBuffSize = blockSize; -+ size_t const outBuffSize = maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2; -+ return ZSTD_DCtxWorkspaceBound() + ZSTD_ALIGN(sizeof(ZSTD_DStream)) + ZSTD_ALIGN(inBuffSize) + ZSTD_ALIGN(outBuffSize); -+} -+ -+static ZSTD_DStream INIT *ZSTD_createDStream_advanced(ZSTD_customMem customMem) -+{ -+ ZSTD_DStream *zds; -+ -+ if (!customMem.customAlloc || !customMem.customFree) -+ return NULL; -+ -+ zds = (ZSTD_DStream *)ZSTD_malloc(sizeof(ZSTD_DStream), customMem); -+ if (zds == NULL) -+ return NULL; -+ memset(zds, 0, sizeof(ZSTD_DStream)); -+ memcpy(&zds->customMem, &customMem, sizeof(ZSTD_customMem)); -+ zds->dctx = ZSTD_createDCtx_advanced(customMem); -+ if (zds->dctx == NULL) { -+ ZSTD_freeDStream(zds); -+ return NULL; -+ } -+ zds->stage = zdss_init; -+ zds->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; -+ return zds; -+} -+ -+ZSTD_DStream INIT *ZSTD_initDStream(size_t maxWindowSize, void *workspace, size_t workspaceSize) -+{ -+ ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize); -+ ZSTD_DStream *zds = ZSTD_createDStream_advanced(stackMem); -+ if (!zds) { -+ return NULL; -+ } -+ -+ zds->maxWindowSize = maxWindowSize; -+ zds->stage = zdss_loadHeader; -+ zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; -+ ZSTD_freeDDict(zds->ddictLocal); -+ zds->ddictLocal = NULL; -+ zds->ddict = zds->ddictLocal; -+ zds->legacyVersion = 0; -+ zds->hostageByte = 0; -+ -+ { -+ size_t const blockSize = MIN(zds->maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); -+ size_t const neededOutSize = zds->maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2; -+ -+ zds->inBuff = (char *)ZSTD_malloc(blockSize, zds->customMem); -+ zds->inBuffSize = blockSize; -+ zds->outBuff = (char *)ZSTD_malloc(neededOutSize, zds->customMem); -+ zds->outBuffSize = neededOutSize; -+ if (zds->inBuff == NULL || zds->outBuff == NULL) { -+ ZSTD_freeDStream(zds); -+ return NULL; -+ } -+ } -+ return zds; -+} -+ -+ZSTD_DStream INIT *ZSTD_initDStream_usingDDict(size_t maxWindowSize, const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize) -+{ -+ ZSTD_DStream *zds = ZSTD_initDStream(maxWindowSize, workspace, workspaceSize); -+ if (zds) { -+ zds->ddict = ddict; -+ } -+ return zds; -+} -+ -+size_t INIT ZSTD_freeDStream(ZSTD_DStream *zds) -+{ -+ if (zds == NULL) -+ return 0; /* support free on null */ -+ { -+ ZSTD_customMem const cMem = zds->customMem; -+ ZSTD_freeDCtx(zds->dctx); -+ zds->dctx = NULL; -+ ZSTD_freeDDict(zds->ddictLocal); -+ zds->ddictLocal = NULL; -+ ZSTD_free(zds->inBuff, cMem); -+ zds->inBuff = NULL; -+ ZSTD_free(zds->outBuff, cMem); -+ zds->outBuff = NULL; -+ ZSTD_free(zds, cMem); -+ return 0; -+ } -+} -+ -+/* *** Initialization *** */ -+ -+size_t INIT ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX + ZSTD_blockHeaderSize; } -+size_t INIT ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; } -+ -+size_t INIT ZSTD_resetDStream(ZSTD_DStream *zds) -+{ -+ zds->stage = zdss_loadHeader; -+ zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; -+ zds->legacyVersion = 0; -+ zds->hostageByte = 0; -+ return ZSTD_frameHeaderSize_prefix; -+} -+ -+/* ***** Decompression ***** */ -+ -+ZSTD_STATIC size_t INIT ZSTD_limitCopy(void *dst, size_t dstCapacity, const void *src, size_t srcSize) -+{ -+ size_t const length = MIN(dstCapacity, srcSize); -+ memcpy(dst, src, length); -+ return length; -+} -+ -+size_t INIT ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inBuffer *input) -+{ -+ const char *const istart = (const char *)(input->src) + input->pos; -+ const char *const iend = (const char *)(input->src) + input->size; -+ const char *ip = istart; -+ char *const ostart = (char *)(output->dst) + output->pos; -+ char *const oend = (char *)(output->dst) + output->size; -+ char *op = ostart; -+ U32 someMoreWork = 1; -+ -+ while (someMoreWork) { -+ switch (zds->stage) { -+ case zdss_init: -+ ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */ -+ /* fall through */ -+ -+ case zdss_loadHeader: { -+ size_t const hSize = ZSTD_getFrameParams(&zds->fParams, zds->headerBuffer, zds->lhSize); -+ if (ZSTD_isError(hSize)) -+ return hSize; -+ if (hSize != 0) { /* need more input */ -+ size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */ -+ if (toLoad > (size_t)(iend - ip)) { /* not enough input to load full header */ -+ memcpy(zds->headerBuffer + zds->lhSize, ip, iend - ip); -+ zds->lhSize += iend - ip; -+ input->pos = input->size; -+ return (MAX(ZSTD_frameHeaderSize_min, hSize) - zds->lhSize) + -+ ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ -+ } -+ memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); -+ zds->lhSize = hSize; -+ ip += toLoad; -+ break; -+ } -+ -+ /* check for single-pass mode opportunity */ -+ if (zds->fParams.frameContentSize && zds->fParams.windowSize /* skippable frame if == 0 */ -+ && (U64)(size_t)(oend - op) >= zds->fParams.frameContentSize) { -+ size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend - istart); -+ if (cSize <= (size_t)(iend - istart)) { -+ size_t const decompressedSize = ZSTD_decompress_usingDDict(zds->dctx, op, oend - op, istart, cSize, zds->ddict); -+ if (ZSTD_isError(decompressedSize)) -+ return decompressedSize; -+ ip = istart + cSize; -+ op += decompressedSize; -+ zds->dctx->expected = 0; -+ zds->stage = zdss_init; -+ someMoreWork = 0; -+ break; -+ } -+ } -+ -+ /* Consume header */ -+ ZSTD_refDDict(zds->dctx, zds->ddict); -+ { -+ size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zds->dctx); /* == ZSTD_frameHeaderSize_prefix */ -+ CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer, h1Size)); -+ { -+ size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zds->dctx); -+ CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer + h1Size, h2Size)); -+ } -+ } -+ -+ zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); -+ if (zds->fParams.windowSize > zds->maxWindowSize) -+ return ERROR(frameParameter_windowTooLarge); -+ -+ /* Buffers are preallocated, but double check */ -+ { -+ size_t const blockSize = MIN(zds->maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); -+ size_t const neededOutSize = zds->maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2; -+ if (zds->inBuffSize < blockSize) { -+ return ERROR(GENERIC); -+ } -+ if (zds->outBuffSize < neededOutSize) { -+ return ERROR(GENERIC); -+ } -+ zds->blockSize = blockSize; -+ } -+ zds->stage = zdss_read; -+ } -+ /* fall through */ -+ -+ case zdss_read: { -+ size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx); -+ if (neededInSize == 0) { /* end of frame */ -+ zds->stage = zdss_init; -+ someMoreWork = 0; -+ break; -+ } -+ if ((size_t)(iend - ip) >= neededInSize) { /* decode directly from src */ -+ const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx); -+ size_t const decodedSize = ZSTD_decompressContinue(zds->dctx, zds->outBuff + zds->outStart, -+ (isSkipFrame ? 0 : zds->outBuffSize - zds->outStart), ip, neededInSize); -+ if (ZSTD_isError(decodedSize)) -+ return decodedSize; -+ ip += neededInSize; -+ if (!decodedSize && !isSkipFrame) -+ break; /* this was just a header */ -+ zds->outEnd = zds->outStart + decodedSize; -+ zds->stage = zdss_flush; -+ break; -+ } -+ if (ip == iend) { -+ someMoreWork = 0; -+ break; -+ } /* no more input */ -+ zds->stage = zdss_load; -+ /* pass-through */ -+ } -+ /* fall through */ -+ -+ case zdss_load: { -+ size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx); -+ size_t const toLoad = neededInSize - zds->inPos; /* should always be <= remaining space within inBuff */ -+ size_t loadedSize; -+ if (toLoad > zds->inBuffSize - zds->inPos) -+ return ERROR(corruption_detected); /* should never happen */ -+ loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend - ip); -+ ip += loadedSize; -+ zds->inPos += loadedSize; -+ if (loadedSize < toLoad) { -+ someMoreWork = 0; -+ break; -+ } /* not enough input, wait for more */ -+ -+ /* decode loaded input */ -+ { -+ const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx); -+ size_t const decodedSize = ZSTD_decompressContinue(zds->dctx, zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart, -+ zds->inBuff, neededInSize); -+ if (ZSTD_isError(decodedSize)) -+ return decodedSize; -+ zds->inPos = 0; /* input is consumed */ -+ if (!decodedSize && !isSkipFrame) { -+ zds->stage = zdss_read; -+ break; -+ } /* this was just a header */ -+ zds->outEnd = zds->outStart + decodedSize; -+ zds->stage = zdss_flush; -+ /* pass-through */ -+ } -+ } -+ /* fall through */ -+ -+ case zdss_flush: { -+ size_t const toFlushSize = zds->outEnd - zds->outStart; -+ size_t const flushedSize = ZSTD_limitCopy(op, oend - op, zds->outBuff + zds->outStart, toFlushSize); -+ op += flushedSize; -+ zds->outStart += flushedSize; -+ if (flushedSize == toFlushSize) { /* flush completed */ -+ zds->stage = zdss_read; -+ if (zds->outStart + zds->blockSize > zds->outBuffSize) -+ zds->outStart = zds->outEnd = 0; -+ break; -+ } -+ /* cannot complete flush */ -+ someMoreWork = 0; -+ break; -+ } -+ default: -+ return ERROR(GENERIC); /* impossible */ -+ } -+ } -+ -+ /* result */ -+ input->pos += (size_t)(ip - istart); -+ output->pos += (size_t)(op - ostart); -+ { -+ size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds->dctx); -+ if (!nextSrcSizeHint) { /* frame fully decoded */ -+ if (zds->outEnd == zds->outStart) { /* output fully flushed */ -+ if (zds->hostageByte) { -+ if (input->pos >= input->size) { -+ zds->stage = zdss_read; -+ return 1; -+ } /* can't release hostage (not present) */ -+ input->pos++; /* release hostage */ -+ } -+ return 0; -+ } -+ if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */ -+ input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */ -+ zds->hostageByte = 1; -+ } -+ return 1; -+ } -+ nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds->dctx) == ZSTDnit_block); /* preload header of next block */ -+ if (zds->inPos > nextSrcSizeHint) -+ return ERROR(GENERIC); /* should never happen */ -+ nextSrcSizeHint -= zds->inPos; /* already loaded*/ -+ return nextSrcSizeHint; -+ } -+} -diff --git a/xen/common/zstd/entropy_common.c b/xen/common/zstd/entropy_common.c -new file mode 100644 -index 0000000000..bcdb57982b ---- /dev/null -+++ b/xen/common/zstd/entropy_common.c -@@ -0,0 +1,243 @@ -+/* -+ * Common functions of New Generation Entropy library -+ * Copyright (C) 2016, Yann Collet. -+ * -+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are -+ * met: -+ * -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following disclaimer -+ * in the documentation and/or other materials provided with the -+ * distribution. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ * -+ * This program is free software; you can redistribute it and/or modify it under -+ * the terms of the GNU General Public License version 2 as published by the -+ * Free Software Foundation. This program is dual-licensed; you may select -+ * either version 2 of the GNU General Public License ("GPL") or BSD license -+ * ("BSD"). -+ * -+ * You can contact the author at : -+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy -+ */ -+ -+/* ************************************* -+* Dependencies -+***************************************/ -+#include "error_private.h" /* ERR_*, ERROR */ -+#include "fse.h" -+#include "huf.h" -+#include "mem.h" -+ -+/*=== Version ===*/ -+unsigned INIT FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } -+ -+/*=== Error Management ===*/ -+unsigned INIT FSE_isError(size_t code) { return ERR_isError(code); } -+ -+unsigned INIT HUF_isError(size_t code) { return ERR_isError(code); } -+ -+/*-************************************************************** -+* FSE NCount encoding-decoding -+****************************************************************/ -+size_t INIT FSE_readNCount(short *normalizedCounter, unsigned *maxSVPtr, unsigned *tableLogPtr, const void *headerBuffer, size_t hbSize) -+{ -+ const BYTE *const istart = (const BYTE *)headerBuffer; -+ const BYTE *const iend = istart + hbSize; -+ const BYTE *ip = istart; -+ int nbBits; -+ int remaining; -+ int threshold; -+ U32 bitStream; -+ int bitCount; -+ unsigned charnum = 0; -+ int previous0 = 0; -+ -+ if (hbSize < 4) -+ return ERROR(srcSize_wrong); -+ bitStream = ZSTD_readLE32(ip); -+ nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ -+ if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) -+ return ERROR(tableLog_tooLarge); -+ bitStream >>= 4; -+ bitCount = 4; -+ *tableLogPtr = nbBits; -+ remaining = (1 << nbBits) + 1; -+ threshold = 1 << nbBits; -+ nbBits++; -+ -+ while ((remaining > 1) & (charnum <= *maxSVPtr)) { -+ if (previous0) { -+ unsigned n0 = charnum; -+ while ((bitStream & 0xFFFF) == 0xFFFF) { -+ n0 += 24; -+ if (ip < iend - 5) { -+ ip += 2; -+ bitStream = ZSTD_readLE32(ip) >> bitCount; -+ } else { -+ bitStream >>= 16; -+ bitCount += 16; -+ } -+ } -+ while ((bitStream & 3) == 3) { -+ n0 += 3; -+ bitStream >>= 2; -+ bitCount += 2; -+ } -+ n0 += bitStream & 3; -+ bitCount += 2; -+ if (n0 > *maxSVPtr) -+ return ERROR(maxSymbolValue_tooSmall); -+ while (charnum < n0) -+ normalizedCounter[charnum++] = 0; -+ if ((ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4)) { -+ ip += bitCount >> 3; -+ bitCount &= 7; -+ bitStream = ZSTD_readLE32(ip) >> bitCount; -+ } else { -+ bitStream >>= 2; -+ } -+ } -+ { -+ int const max = (2 * threshold - 1) - remaining; -+ int count; -+ -+ if ((bitStream & (threshold - 1)) < (U32)max) { -+ count = bitStream & (threshold - 1); -+ bitCount += nbBits - 1; -+ } else { -+ count = bitStream & (2 * threshold - 1); -+ if (count >= threshold) -+ count -= max; -+ bitCount += nbBits; -+ } -+ -+ count--; /* extra accuracy */ -+ remaining -= count < 0 ? -count : count; /* -1 means +1 */ -+ normalizedCounter[charnum++] = (short)count; -+ previous0 = !count; -+ while (remaining < threshold) { -+ nbBits--; -+ threshold >>= 1; -+ } -+ -+ if ((ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4)) { -+ ip += bitCount >> 3; -+ bitCount &= 7; -+ } else { -+ bitCount -= (int)(8 * (iend - 4 - ip)); -+ ip = iend - 4; -+ } -+ bitStream = ZSTD_readLE32(ip) >> (bitCount & 31); -+ } -+ } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ -+ if (remaining != 1) -+ return ERROR(corruption_detected); -+ if (bitCount > 32) -+ return ERROR(corruption_detected); -+ *maxSVPtr = charnum - 1; -+ -+ ip += (bitCount + 7) >> 3; -+ return ip - istart; -+} -+ -+/*! HUF_readStats() : -+ Read compact Huffman tree, saved by HUF_writeCTable(). -+ `huffWeight` is destination buffer. -+ `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. -+ @return : size read from `src` , or an error Code . -+ Note : Needed by HUF_readCTable() and HUF_readDTableX?() . -+*/ -+size_t INIT HUF_readStats_wksp(BYTE *huffWeight, size_t hwSize, U32 *rankStats, U32 *nbSymbolsPtr, U32 *tableLogPtr, const void *src, size_t srcSize, void *workspace, size_t workspaceSize) -+{ -+ U32 weightTotal; -+ const BYTE *ip = (const BYTE *)src; -+ size_t iSize; -+ size_t oSize; -+ -+ if (!srcSize) -+ return ERROR(srcSize_wrong); -+ iSize = ip[0]; -+ /* memset(huffWeight, 0, hwSize); */ /* is not necessary, even though some analyzer complain ... */ -+ -+ if (iSize >= 128) { /* special header */ -+ oSize = iSize - 127; -+ iSize = ((oSize + 1) / 2); -+ if (iSize + 1 > srcSize) -+ return ERROR(srcSize_wrong); -+ if (oSize >= hwSize) -+ return ERROR(corruption_detected); -+ ip += 1; -+ { -+ U32 n; -+ for (n = 0; n < oSize; n += 2) { -+ huffWeight[n] = ip[n / 2] >> 4; -+ huffWeight[n + 1] = ip[n / 2] & 15; -+ } -+ } -+ } else { /* header compressed with FSE (normal case) */ -+ if (iSize + 1 > srcSize) -+ return ERROR(srcSize_wrong); -+ oSize = FSE_decompress_wksp(huffWeight, hwSize - 1, ip + 1, iSize, 6, workspace, workspaceSize); /* max (hwSize-1) values decoded, as last one is implied */ -+ if (FSE_isError(oSize)) -+ return oSize; -+ } -+ -+ /* collect weight stats */ -+ memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); -+ weightTotal = 0; -+ { -+ U32 n; -+ for (n = 0; n < oSize; n++) { -+ if (huffWeight[n] >= HUF_TABLELOG_MAX) -+ return ERROR(corruption_detected); -+ rankStats[huffWeight[n]]++; -+ weightTotal += (1 << huffWeight[n]) >> 1; -+ } -+ } -+ if (weightTotal == 0) -+ return ERROR(corruption_detected); -+ -+ /* get last non-null symbol weight (implied, total must be 2^n) */ -+ { -+ U32 const tableLog = BIT_highbit32(weightTotal) + 1; -+ if (tableLog > HUF_TABLELOG_MAX) -+ return ERROR(corruption_detected); -+ *tableLogPtr = tableLog; -+ /* determine last weight */ -+ { -+ U32 const total = 1 << tableLog; -+ U32 const rest = total - weightTotal; -+ U32 const verif = 1 << BIT_highbit32(rest); -+ U32 const lastWeight = BIT_highbit32(rest) + 1; -+ if (verif != rest) -+ return ERROR(corruption_detected); /* last value must be a clean power of 2 */ -+ huffWeight[oSize] = (BYTE)lastWeight; -+ rankStats[lastWeight]++; -+ } -+ } -+ -+ /* check tree construction validity */ -+ if ((rankStats[1] < 2) || (rankStats[1] & 1)) -+ return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ -+ -+ /* results */ -+ *nbSymbolsPtr = (U32)(oSize + 1); -+ return iSize + 1; -+} -diff --git a/xen/common/zstd/error_private.h b/xen/common/zstd/error_private.h -new file mode 100644 -index 0000000000..ecbfe51dfb ---- /dev/null -+++ b/xen/common/zstd/error_private.h -@@ -0,0 +1,53 @@ -+/** -+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. -+ * All rights reserved. -+ * -+ * This source code is licensed under the BSD-style license found in the -+ * LICENSE file in the root directory of https://github.com/facebook/zstd. -+ * An additional grant of patent rights can be found in the PATENTS file in the -+ * same directory. -+ * -+ * This program is free software; you can redistribute it and/or modify it under -+ * the terms of the GNU General Public License version 2 as published by the -+ * Free Software Foundation. This program is dual-licensed; you may select -+ * either version 2 of the GNU General Public License ("GPL") or BSD license -+ * ("BSD"). -+ */ -+ -+/* Note : this module is expected to remain private, do not expose it */ -+ -+#ifndef ERROR_H_MODULE -+#define ERROR_H_MODULE -+ -+/* **************************************** -+* Dependencies -+******************************************/ -+#include /* size_t */ -+#include /* enum list */ -+ -+/* **************************************** -+* Compiler-specific -+******************************************/ -+#define ERR_STATIC static __attribute__((unused)) -+ -+/*-**************************************** -+* Customization (error_public.h) -+******************************************/ -+typedef ZSTD_ErrorCode ERR_enum; -+#define PREFIX(name) ZSTD_error_##name -+ -+/*-**************************************** -+* Error codes handling -+******************************************/ -+#define ERROR(name) ((size_t)-PREFIX(name)) -+ -+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } -+ -+ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) -+{ -+ if (!ERR_isError(code)) -+ return (ERR_enum)0; -+ return (ERR_enum)(0 - code); -+} -+ -+#endif /* ERROR_H_MODULE */ -diff --git a/xen/common/zstd/fse.h b/xen/common/zstd/fse.h -new file mode 100644 -index 0000000000..b86717c34d ---- /dev/null -+++ b/xen/common/zstd/fse.h -@@ -0,0 +1,575 @@ -+/* -+ * FSE : Finite State Entropy codec -+ * Public Prototypes declaration -+ * Copyright (C) 2013-2016, Yann Collet. -+ * -+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are -+ * met: -+ * -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following disclaimer -+ * in the documentation and/or other materials provided with the -+ * distribution. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ * -+ * This program is free software; you can redistribute it and/or modify it under -+ * the terms of the GNU General Public License version 2 as published by the -+ * Free Software Foundation. This program is dual-licensed; you may select -+ * either version 2 of the GNU General Public License ("GPL") or BSD license -+ * ("BSD"). -+ * -+ * You can contact the author at : -+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy -+ */ -+#ifndef FSE_H -+#define FSE_H -+ -+/*-***************************************** -+* Dependencies -+******************************************/ -+#include /* size_t, ptrdiff_t */ -+ -+/*-***************************************** -+* FSE_PUBLIC_API : control library symbols visibility -+******************************************/ -+#define FSE_PUBLIC_API -+ -+/*------ Version ------*/ -+#define FSE_VERSION_MAJOR 0 -+#define FSE_VERSION_MINOR 9 -+#define FSE_VERSION_RELEASE 0 -+ -+#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE -+#define FSE_QUOTE(str) #str -+#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str) -+#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION) -+ -+#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR * 100 * 100 + FSE_VERSION_MINOR * 100 + FSE_VERSION_RELEASE) -+FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ -+ -+/*-***************************************** -+* Tool functions -+******************************************/ -+FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */ -+ -+/* Error Management */ -+FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ -+ -+/*-***************************************** -+* FSE detailed API -+******************************************/ -+/*! -+FSE_compress() does the following: -+1. count symbol occurrence from source[] into table count[] -+2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) -+3. save normalized counters to memory buffer using writeNCount() -+4. build encoding table 'CTable' from normalized counters -+5. encode the data stream using encoding table 'CTable' -+ -+FSE_decompress() does the following: -+1. read normalized counters with readNCount() -+2. build decoding table 'DTable' from normalized counters -+3. decode the data stream using decoding table 'DTable' -+ -+The following API allows targeting specific sub-functions for advanced tasks. -+For example, it's possible to compress several blocks using the same 'CTable', -+or to save and provide normalized distribution using external method. -+*/ -+ -+/* *** COMPRESSION *** */ -+/*! FSE_optimalTableLog(): -+ dynamically downsize 'tableLog' when conditions are met. -+ It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. -+ @return : recommended tableLog (necessarily <= 'maxTableLog') */ -+FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); -+ -+/*! FSE_normalizeCount(): -+ normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) -+ 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). -+ @return : tableLog, -+ or an errorCode, which can be tested using FSE_isError() */ -+FSE_PUBLIC_API size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t srcSize, unsigned maxSymbolValue); -+ -+/*! FSE_NCountWriteBound(): -+ Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. -+ Typically useful for allocation purpose. */ -+FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); -+ -+/*! FSE_writeNCount(): -+ Compactly save 'normalizedCounter' into 'buffer'. -+ @return : size of the compressed table, -+ or an errorCode, which can be tested using FSE_isError(). */ -+FSE_PUBLIC_API size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); -+ -+/*! Constructor and Destructor of FSE_CTable. -+ Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ -+typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ -+ -+/*! FSE_compress_usingCTable(): -+ Compress `src` using `ct` into `dst` which must be already allocated. -+ @return : size of compressed data (<= `dstCapacity`), -+ or 0 if compressed data could not fit into `dst`, -+ or an errorCode, which can be tested using FSE_isError() */ -+FSE_PUBLIC_API size_t FSE_compress_usingCTable(void *dst, size_t dstCapacity, const void *src, size_t srcSize, const FSE_CTable *ct); -+ -+/*! -+Tutorial : -+---------- -+The first step is to count all symbols. FSE_count() does this job very fast. -+Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. -+'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] -+maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) -+FSE_count() will return the number of occurrence of the most frequent symbol. -+This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. -+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). -+ -+The next step is to normalize the frequencies. -+FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. -+It also guarantees a minimum of 1 to any Symbol with frequency >= 1. -+You can use 'tableLog'==0 to mean "use default tableLog value". -+If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), -+which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). -+ -+The result of FSE_normalizeCount() will be saved into a table, -+called 'normalizedCounter', which is a table of signed short. -+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. -+The return value is tableLog if everything proceeded as expected. -+It is 0 if there is a single symbol within distribution. -+If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). -+ -+'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). -+'buffer' must be already allocated. -+For guaranteed success, buffer size must be at least FSE_headerBound(). -+The result of the function is the number of bytes written into 'buffer'. -+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). -+ -+'normalizedCounter' can then be used to create the compression table 'CTable'. -+The space required by 'CTable' must be already allocated, using FSE_createCTable(). -+You can then use FSE_buildCTable() to fill 'CTable'. -+If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). -+ -+'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). -+Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' -+The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. -+If it returns '0', compressed data could not fit into 'dst'. -+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). -+*/ -+ -+/* *** DECOMPRESSION *** */ -+ -+/*! FSE_readNCount(): -+ Read compactly saved 'normalizedCounter' from 'rBuffer'. -+ @return : size read from 'rBuffer', -+ or an errorCode, which can be tested using FSE_isError(). -+ maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ -+FSE_PUBLIC_API size_t FSE_readNCount(short *normalizedCounter, unsigned *maxSymbolValuePtr, unsigned *tableLogPtr, const void *rBuffer, size_t rBuffSize); -+ -+/*! Constructor and Destructor of FSE_DTable. -+ Note that its size depends on 'tableLog' */ -+typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ -+ -+/*! FSE_buildDTable(): -+ Builds 'dt', which must be already allocated, using FSE_createDTable(). -+ return : 0, or an errorCode, which can be tested using FSE_isError() */ -+FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable *dt, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize); -+ -+/*! FSE_decompress_usingDTable(): -+ Decompress compressed source `cSrc` of size `cSrcSize` using `dt` -+ into `dst` which must be already allocated. -+ @return : size of regenerated data (necessarily <= `dstCapacity`), -+ or an errorCode, which can be tested using FSE_isError() */ -+FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void *dst, size_t dstCapacity, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt); -+ -+/*! -+Tutorial : -+---------- -+(Note : these functions only decompress FSE-compressed blocks. -+ If block is uncompressed, use memcpy() instead -+ If block is a single repeated byte, use memset() instead ) -+ -+The first step is to obtain the normalized frequencies of symbols. -+This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). -+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. -+In practice, that means it's necessary to know 'maxSymbolValue' beforehand, -+or size the table to handle worst case situations (typically 256). -+FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. -+The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. -+Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. -+If there is an error, the function will return an error code, which can be tested using FSE_isError(). -+ -+The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. -+This is performed by the function FSE_buildDTable(). -+The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). -+If there is an error, the function will return an error code, which can be tested using FSE_isError(). -+ -+`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). -+`cSrcSize` must be strictly correct, otherwise decompression will fail. -+FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). -+If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) -+*/ -+ -+/* *** Dependency *** */ -+#include "bitstream.h" -+ -+/* ***************************************** -+* Static allocation -+*******************************************/ -+/* FSE buffer bounds */ -+#define FSE_NCOUNTBOUND 512 -+#define FSE_BLOCKBOUND(size) (size + (size >> 7)) -+#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ -+ -+/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ -+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1 << (maxTableLog - 1)) + ((maxSymbolValue + 1) * 2)) -+#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1 << maxTableLog)) -+ -+/* ***************************************** -+* FSE advanced API -+*******************************************/ -+/* FSE_count_wksp() : -+ * Same as FSE_count(), but using an externally provided scratch buffer. -+ * `workSpace` size must be table of >= `1024` unsigned -+ */ -+size_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace); -+ -+/* FSE_countFast_wksp() : -+ * Same as FSE_countFast(), but using an externally provided scratch buffer. -+ * `workSpace` must be a table of minimum `1024` unsigned -+ */ -+size_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize, unsigned *workSpace); -+ -+/*! FSE_count_simple -+ * Same as FSE_countFast(), but does not use any additional memory (not even on stack). -+ * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`). -+*/ -+size_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize); -+ -+unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); -+/**< same as FSE_optimalTableLog(), which used `minus==2` */ -+ -+size_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits); -+/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ -+ -+size_t FSE_buildCTable_rle(FSE_CTable *ct, unsigned char symbolValue); -+/**< build a fake FSE_CTable, designed to compress always the same symbolValue */ -+ -+/* FSE_buildCTable_wksp() : -+ * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). -+ * `wkspSize` must be >= `(1<= BIT_DStream_completed -+ -+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. -+Checking if DStream has reached its end is performed by : -+ BIT_endOfDStream(&DStream); -+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. -+ FSE_endOfDState(&DState); -+*/ -+ -+/* ***************************************** -+* FSE unsafe API -+*******************************************/ -+static unsigned char FSE_decodeSymbolFast(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD); -+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ -+ -+/* ***************************************** -+* Implementation of inlined functions -+*******************************************/ -+typedef struct { -+ int deltaFindState; -+ U32 deltaNbBits; -+} FSE_symbolCompressionTransform; /* total 8 bytes */ -+ -+ZSTD_STATIC void FSE_initCState(FSE_CState_t *statePtr, const FSE_CTable *ct) -+{ -+ const void *ptr = ct; -+ const U16 *u16ptr = (const U16 *)ptr; -+ const U32 tableLog = ZSTD_read16(ptr); -+ statePtr->value = (ptrdiff_t)1 << tableLog; -+ statePtr->stateTable = u16ptr + 2; -+ statePtr->symbolTT = ((const U32 *)ct + 1 + (tableLog ? (1 << (tableLog - 1)) : 1)); -+ statePtr->stateLog = tableLog; -+} -+ -+/*! FSE_initCState2() : -+* Same as FSE_initCState(), but the first symbol to include (which will be the last to be read) -+* uses the smallest state value possible, saving the cost of this symbol */ -+ZSTD_STATIC void FSE_initCState2(FSE_CState_t *statePtr, const FSE_CTable *ct, U32 symbol) -+{ -+ FSE_initCState(statePtr, ct); -+ { -+ const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform *)(statePtr->symbolTT))[symbol]; -+ const U16 *stateTable = (const U16 *)(statePtr->stateTable); -+ U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1 << 15)) >> 16); -+ statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; -+ statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; -+ } -+} -+ -+ZSTD_STATIC void FSE_encodeSymbol(BIT_CStream_t *bitC, FSE_CState_t *statePtr, U32 symbol) -+{ -+ const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform *)(statePtr->symbolTT))[symbol]; -+ const U16 *const stateTable = (const U16 *)(statePtr->stateTable); -+ U32 nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); -+ BIT_addBits(bitC, statePtr->value, nbBitsOut); -+ statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; -+} -+ -+ZSTD_STATIC void FSE_flushCState(BIT_CStream_t *bitC, const FSE_CState_t *statePtr) -+{ -+ BIT_addBits(bitC, statePtr->value, statePtr->stateLog); -+ BIT_flushBits(bitC); -+} -+ -+/* ====== Decompression ====== */ -+ -+typedef struct { -+ U16 tableLog; -+ U16 fastMode; -+} FSE_DTableHeader; /* sizeof U32 */ -+ -+typedef struct { -+ unsigned short newState; -+ unsigned char symbol; -+ unsigned char nbBits; -+} FSE_decode_t; /* size == U32 */ -+ -+ZSTD_STATIC void FSE_initDState(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD, const FSE_DTable *dt) -+{ -+ const void *ptr = dt; -+ const FSE_DTableHeader *const DTableH = (const FSE_DTableHeader *)ptr; -+ DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); -+ BIT_reloadDStream(bitD); -+ DStatePtr->table = dt + 1; -+} -+ -+ZSTD_STATIC BYTE FSE_peekSymbol(const FSE_DState_t *DStatePtr) -+{ -+ FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state]; -+ return DInfo.symbol; -+} -+ -+ZSTD_STATIC void FSE_updateState(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD) -+{ -+ FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state]; -+ U32 const nbBits = DInfo.nbBits; -+ size_t const lowBits = BIT_readBits(bitD, nbBits); -+ DStatePtr->state = DInfo.newState + lowBits; -+} -+ -+ZSTD_STATIC BYTE FSE_decodeSymbol(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD) -+{ -+ FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state]; -+ U32 const nbBits = DInfo.nbBits; -+ BYTE const symbol = DInfo.symbol; -+ size_t const lowBits = BIT_readBits(bitD, nbBits); -+ -+ DStatePtr->state = DInfo.newState + lowBits; -+ return symbol; -+} -+ -+/*! FSE_decodeSymbolFast() : -+ unsafe, only works if no symbol has a probability > 50% */ -+ZSTD_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD) -+{ -+ FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state]; -+ U32 const nbBits = DInfo.nbBits; -+ BYTE const symbol = DInfo.symbol; -+ size_t const lowBits = BIT_readBitsFast(bitD, nbBits); -+ -+ DStatePtr->state = DInfo.newState + lowBits; -+ return symbol; -+} -+ -+ZSTD_STATIC unsigned FSE_endOfDState(const FSE_DState_t *DStatePtr) { return DStatePtr->state == 0; } -+ -+/* ************************************************************** -+* Tuning parameters -+****************************************************************/ -+/*!MEMORY_USAGE : -+* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) -+* Increasing memory usage improves compression ratio -+* Reduced memory usage can improve speed, due to cache effect -+* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ -+#ifndef FSE_MAX_MEMORY_USAGE -+#define FSE_MAX_MEMORY_USAGE 14 -+#endif -+#ifndef FSE_DEFAULT_MEMORY_USAGE -+#define FSE_DEFAULT_MEMORY_USAGE 13 -+#endif -+ -+/*!FSE_MAX_SYMBOL_VALUE : -+* Maximum symbol value authorized. -+* Required for proper stack allocation */ -+#ifndef FSE_MAX_SYMBOL_VALUE -+#define FSE_MAX_SYMBOL_VALUE 255 -+#endif -+ -+/* ************************************************************** -+* template functions type & suffix -+****************************************************************/ -+#define FSE_FUNCTION_TYPE BYTE -+#define FSE_FUNCTION_EXTENSION -+#define FSE_DECODE_TYPE FSE_decode_t -+ -+/* *************************************************************** -+* Constants -+*****************************************************************/ -+#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE - 2) -+#define FSE_MAX_TABLESIZE (1U << FSE_MAX_TABLELOG) -+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE - 1) -+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE - 2) -+#define FSE_MIN_TABLELOG 5 -+ -+#define FSE_TABLELOG_ABSOLUTE_MAX 15 -+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX -+#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" -+#endif -+ -+#define FSE_TABLESTEP(tableSize) ((tableSize >> 1) + (tableSize >> 3) + 3) -+ -+#endif /* FSE_H */ -diff --git a/xen/common/zstd/fse_decompress.c b/xen/common/zstd/fse_decompress.c -new file mode 100644 -index 0000000000..041a5a1f0a ---- /dev/null -+++ b/xen/common/zstd/fse_decompress.c -@@ -0,0 +1,323 @@ -+/* -+ * FSE : Finite State Entropy decoder -+ * Copyright (C) 2013-2015, Yann Collet. -+ * -+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are -+ * met: -+ * -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following disclaimer -+ * in the documentation and/or other materials provided with the -+ * distribution. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ * -+ * This program is free software; you can redistribute it and/or modify it under -+ * the terms of the GNU General Public License version 2 as published by the -+ * Free Software Foundation. This program is dual-licensed; you may select -+ * either version 2 of the GNU General Public License ("GPL") or BSD license -+ * ("BSD"). -+ * -+ * You can contact the author at : -+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy -+ */ -+ -+/* ************************************************************** -+* Compiler specifics -+****************************************************************/ -+#define FORCE_INLINE static always_inline -+ -+/* ************************************************************** -+* Includes -+****************************************************************/ -+#include "bitstream.h" -+#include "fse.h" -+#include "zstd_internal.h" -+#include /* memcpy, memset */ -+ -+/* ************************************************************** -+* Error Management -+****************************************************************/ -+#define FSE_isError ERR_isError -+#define FSE_STATIC_ASSERT(c) \ -+ { \ -+ enum { FSE_static_assert = 1 / (int)(!!(c)) }; \ -+ } /* use only *after* variable declarations */ -+ -+/* ************************************************************** -+* Templates -+****************************************************************/ -+/* -+ designed to be included -+ for type-specific functions (template emulation in C) -+ Objective is to write these functions only once, for improved maintenance -+*/ -+ -+/* safety checks */ -+#ifndef FSE_FUNCTION_EXTENSION -+#error "FSE_FUNCTION_EXTENSION must be defined" -+#endif -+#ifndef FSE_FUNCTION_TYPE -+#error "FSE_FUNCTION_TYPE must be defined" -+#endif -+ -+/* Function names */ -+#define FSE_CAT(X, Y) X##Y -+#define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y) -+#define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y) -+ -+/* Function templates */ -+ -+size_t INIT FSE_buildDTable_wksp(FSE_DTable *dt, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize) -+{ -+ void *const tdPtr = dt + 1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ -+ FSE_DECODE_TYPE *const tableDecode = (FSE_DECODE_TYPE *)(tdPtr); -+ U16 *symbolNext = (U16 *)workspace; -+ -+ U32 const maxSV1 = maxSymbolValue + 1; -+ U32 const tableSize = 1 << tableLog; -+ U32 highThreshold = tableSize - 1; -+ -+ /* Sanity Checks */ -+ if (workspaceSize < sizeof(U16) * (FSE_MAX_SYMBOL_VALUE + 1)) -+ return ERROR(tableLog_tooLarge); -+ if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) -+ return ERROR(maxSymbolValue_tooLarge); -+ if (tableLog > FSE_MAX_TABLELOG) -+ return ERROR(tableLog_tooLarge); -+ -+ /* Init, lay down lowprob symbols */ -+ { -+ FSE_DTableHeader DTableH; -+ DTableH.tableLog = (U16)tableLog; -+ DTableH.fastMode = 1; -+ { -+ S16 const largeLimit = (S16)(1 << (tableLog - 1)); -+ U32 s; -+ for (s = 0; s < maxSV1; s++) { -+ if (normalizedCounter[s] == -1) { -+ tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; -+ symbolNext[s] = 1; -+ } else { -+ if (normalizedCounter[s] >= largeLimit) -+ DTableH.fastMode = 0; -+ symbolNext[s] = normalizedCounter[s]; -+ } -+ } -+ } -+ memcpy(dt, &DTableH, sizeof(DTableH)); -+ } -+ -+ /* Spread symbols */ -+ { -+ U32 const tableMask = tableSize - 1; -+ U32 const step = FSE_TABLESTEP(tableSize); -+ U32 s, position = 0; -+ for (s = 0; s < maxSV1; s++) { -+ int i; -+ for (i = 0; i < normalizedCounter[s]; i++) { -+ tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s; -+ position = (position + step) & tableMask; -+ while (position > highThreshold) -+ position = (position + step) & tableMask; /* lowprob area */ -+ } -+ } -+ if (position != 0) -+ return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ -+ } -+ -+ /* Build Decoding table */ -+ { -+ U32 u; -+ for (u = 0; u < tableSize; u++) { -+ FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol); -+ U16 nextState = symbolNext[symbol]++; -+ tableDecode[u].nbBits = (BYTE)(tableLog - BIT_highbit32((U32)nextState)); -+ tableDecode[u].newState = (U16)((nextState << tableDecode[u].nbBits) - tableSize); -+ } -+ } -+ -+ return 0; -+} -+ -+/*-******************************************************* -+* Decompression (Byte symbols) -+*********************************************************/ -+size_t INIT FSE_buildDTable_rle(FSE_DTable *dt, BYTE symbolValue) -+{ -+ void *ptr = dt; -+ FSE_DTableHeader *const DTableH = (FSE_DTableHeader *)ptr; -+ void *dPtr = dt + 1; -+ FSE_decode_t *const cell = (FSE_decode_t *)dPtr; -+ -+ DTableH->tableLog = 0; -+ DTableH->fastMode = 0; -+ -+ cell->newState = 0; -+ cell->symbol = symbolValue; -+ cell->nbBits = 0; -+ -+ return 0; -+} -+ -+size_t INIT FSE_buildDTable_raw(FSE_DTable *dt, unsigned nbBits) -+{ -+ void *ptr = dt; -+ FSE_DTableHeader *const DTableH = (FSE_DTableHeader *)ptr; -+ void *dPtr = dt + 1; -+ FSE_decode_t *const dinfo = (FSE_decode_t *)dPtr; -+ const unsigned tableSize = 1 << nbBits; -+ const unsigned tableMask = tableSize - 1; -+ const unsigned maxSV1 = tableMask + 1; -+ unsigned s; -+ -+ /* Sanity checks */ -+ if (nbBits < 1) -+ return ERROR(GENERIC); /* min size */ -+ -+ /* Build Decoding Table */ -+ DTableH->tableLog = (U16)nbBits; -+ DTableH->fastMode = 1; -+ for (s = 0; s < maxSV1; s++) { -+ dinfo[s].newState = 0; -+ dinfo[s].symbol = (BYTE)s; -+ dinfo[s].nbBits = (BYTE)nbBits; -+ } -+ -+ return 0; -+} -+ -+FORCE_INLINE size_t FSE_decompress_usingDTable_generic(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt, -+ const unsigned fast) -+{ -+ BYTE *const ostart = (BYTE *)dst; -+ BYTE *op = ostart; -+ BYTE *const omax = op + maxDstSize; -+ BYTE *const olimit = omax - 3; -+ -+ BIT_DStream_t bitD; -+ FSE_DState_t state1; -+ FSE_DState_t state2; -+ -+ /* Init */ -+ CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize)); -+ -+ FSE_initDState(&state1, &bitD, dt); -+ FSE_initDState(&state2, &bitD, dt); -+ -+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) -+ -+ /* 4 symbols per loop */ -+ for (; (BIT_reloadDStream(&bitD) == BIT_DStream_unfinished) & (op < olimit); op += 4) { -+ op[0] = FSE_GETSYMBOL(&state1); -+ -+ if (FSE_MAX_TABLELOG * 2 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */ -+ BIT_reloadDStream(&bitD); -+ -+ op[1] = FSE_GETSYMBOL(&state2); -+ -+ if (FSE_MAX_TABLELOG * 4 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */ -+ { -+ if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { -+ op += 2; -+ break; -+ } -+ } -+ -+ op[2] = FSE_GETSYMBOL(&state1); -+ -+ if (FSE_MAX_TABLELOG * 2 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */ -+ BIT_reloadDStream(&bitD); -+ -+ op[3] = FSE_GETSYMBOL(&state2); -+ } -+ -+ /* tail */ -+ /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ -+ while (1) { -+ if (op > (omax - 2)) -+ return ERROR(dstSize_tooSmall); -+ *op++ = FSE_GETSYMBOL(&state1); -+ if (BIT_reloadDStream(&bitD) == BIT_DStream_overflow) { -+ *op++ = FSE_GETSYMBOL(&state2); -+ break; -+ } -+ -+ if (op > (omax - 2)) -+ return ERROR(dstSize_tooSmall); -+ *op++ = FSE_GETSYMBOL(&state2); -+ if (BIT_reloadDStream(&bitD) == BIT_DStream_overflow) { -+ *op++ = FSE_GETSYMBOL(&state1); -+ break; -+ } -+ } -+ -+ return op - ostart; -+} -+ -+size_t INIT FSE_decompress_usingDTable(void *dst, size_t originalSize, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt) -+{ -+ const void *ptr = dt; -+ const FSE_DTableHeader *DTableH = (const FSE_DTableHeader *)ptr; -+ const U32 fastMode = DTableH->fastMode; -+ -+ /* select fast mode (static) */ -+ if (fastMode) -+ return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); -+ return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); -+} -+ -+size_t INIT FSE_decompress_wksp(void *dst, size_t dstCapacity, const void *cSrc, size_t cSrcSize, unsigned maxLog, void *workspace, size_t workspaceSize) -+{ -+ const BYTE *const istart = (const BYTE *)cSrc; -+ const BYTE *ip = istart; -+ unsigned tableLog; -+ unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; -+ size_t NCountLength; -+ -+ FSE_DTable *dt; -+ short *counting; -+ size_t spaceUsed32 = 0; -+ -+ FSE_STATIC_ASSERT(sizeof(FSE_DTable) == sizeof(U32)); -+ -+ dt = (FSE_DTable *)((U32 *)workspace + spaceUsed32); -+ spaceUsed32 += FSE_DTABLE_SIZE_U32(maxLog); -+ counting = (short *)((U32 *)workspace + spaceUsed32); -+ spaceUsed32 += ALIGN(sizeof(short) * (FSE_MAX_SYMBOL_VALUE + 1), sizeof(U32)) >> 2; -+ -+ if ((spaceUsed32 << 2) > workspaceSize) -+ return ERROR(tableLog_tooLarge); -+ workspace = (U32 *)workspace + spaceUsed32; -+ workspaceSize -= (spaceUsed32 << 2); -+ -+ /* normal FSE decoding mode */ -+ NCountLength = FSE_readNCount(counting, &maxSymbolValue, &tableLog, istart, cSrcSize); -+ if (FSE_isError(NCountLength)) -+ return NCountLength; -+ // if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size; supposed to be already checked in NCountLength, only remaining -+ // case : NCountLength==cSrcSize */ -+ if (tableLog > maxLog) -+ return ERROR(tableLog_tooLarge); -+ ip += NCountLength; -+ cSrcSize -= NCountLength; -+ -+ CHECK_F(FSE_buildDTable_wksp(dt, counting, maxSymbolValue, tableLog, workspace, workspaceSize)); -+ -+ return FSE_decompress_usingDTable(dst, dstCapacity, ip, cSrcSize, dt); /* always return, even if it is an error code */ -+} -diff --git a/xen/common/zstd/huf.h b/xen/common/zstd/huf.h -new file mode 100644 -index 0000000000..a9d522c7bb ---- /dev/null -+++ b/xen/common/zstd/huf.h -@@ -0,0 +1,212 @@ -+/* -+ * Huffman coder, part of New Generation Entropy library -+ * header file -+ * Copyright (C) 2013-2016, Yann Collet. -+ * -+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are -+ * met: -+ * -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following disclaimer -+ * in the documentation and/or other materials provided with the -+ * distribution. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ * -+ * This program is free software; you can redistribute it and/or modify it under -+ * the terms of the GNU General Public License version 2 as published by the -+ * Free Software Foundation. This program is dual-licensed; you may select -+ * either version 2 of the GNU General Public License ("GPL") or BSD license -+ * ("BSD"). -+ * -+ * You can contact the author at : -+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy -+ */ -+#ifndef HUF_H_298734234 -+#define HUF_H_298734234 -+ -+/* *** Dependencies *** */ -+#include /* size_t */ -+ -+/* *** Tool functions *** */ -+#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ -+size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ -+ -+/* Error Management */ -+unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ -+ -+/* *** Advanced function *** */ -+ -+/** HUF_compress4X_wksp() : -+* Same as HUF_compress2(), but uses externally allocated `workSpace`, which must be a table of >= 1024 unsigned */ -+size_t HUF_compress4X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, -+ size_t wkspSize); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */ -+ -+/* *** Dependencies *** */ -+#include "mem.h" /* U32 */ -+ -+/* *** Constants *** */ -+#define HUF_TABLELOG_MAX 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ -+#define HUF_TABLELOG_DEFAULT 11 /* tableLog by default, when not specified */ -+#define HUF_SYMBOLVALUE_MAX 255 -+ -+#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ -+#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) -+#error "HUF_TABLELOG_MAX is too large !" -+#endif -+ -+/* **************************************** -+* Static allocation -+******************************************/ -+/* HUF buffer bounds */ -+#define HUF_CTABLEBOUND 129 -+#define HUF_BLOCKBOUND(size) (size + (size >> 8) + 8) /* only true if incompressible pre-filtered with fast heuristic */ -+#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ -+ -+/* static allocation of HUF's Compression Table */ -+#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ -+ U32 name##hb[maxSymbolValue + 1]; \ -+ void *name##hv = &(name##hb); \ -+ HUF_CElt *name = (HUF_CElt *)(name##hv) /* no final ; */ -+ -+/* static allocation of HUF's DTable */ -+typedef U32 HUF_DTable; -+#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1 << (maxTableLog))) -+#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = {((U32)((maxTableLog)-1) * 0x01000001)} -+#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = {((U32)(maxTableLog)*0x01000001)} -+ -+/* The workspace must have alignment at least 4 and be at least this large */ -+#define HUF_COMPRESS_WORKSPACE_SIZE (6 << 10) -+#define HUF_COMPRESS_WORKSPACE_SIZE_U32 (HUF_COMPRESS_WORKSPACE_SIZE / sizeof(U32)) -+ -+/* The workspace must have alignment at least 4 and be at least this large */ -+#define HUF_DECOMPRESS_WORKSPACE_SIZE (3 << 10) -+#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) -+ -+/* **************************************** -+* Advanced decompression functions -+******************************************/ -+size_t HUF_decompress4X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize); /**< decodes RLE and uncompressed */ -+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, -+ size_t workspaceSize); /**< considers RLE and uncompressed as errors */ -+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, -+ size_t workspaceSize); /**< single-symbol decoder */ -+size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, -+ size_t workspaceSize); /**< double-symbols decoder */ -+ -+/* **************************************** -+* HUF detailed API -+******************************************/ -+/*! -+HUF_compress() does the following: -+1. count symbol occurrence from source[] into table count[] using FSE_count() -+2. (optional) refine tableLog using HUF_optimalTableLog() -+3. build Huffman table from count using HUF_buildCTable() -+4. save Huffman table to memory buffer using HUF_writeCTable_wksp() -+5. encode the data stream using HUF_compress4X_usingCTable() -+ -+The following API allows targeting specific sub-functions for advanced tasks. -+For example, it's possible to compress several blocks using the same 'CTable', -+or to save and regenerate 'CTable' using external methods. -+*/ -+/* FSE_count() : find it within "fse.h" */ -+unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); -+typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ -+size_t HUF_writeCTable_wksp(void *dst, size_t maxDstSize, const HUF_CElt *CTable, unsigned maxSymbolValue, unsigned huffLog, void *workspace, size_t workspaceSize); -+size_t HUF_compress4X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable); -+ -+typedef enum { -+ HUF_repeat_none, /**< Cannot use the previous table */ -+ HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, -+ 4}X_repeat */ -+ HUF_repeat_valid /**< Can use the previous table and it is asumed to be valid */ -+} HUF_repeat; -+/** HUF_compress4X_repeat() : -+* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. -+* If it uses hufTable it does not modify hufTable or repeat. -+* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. -+* If preferRepeat then the old table will always be used if valid. */ -+size_t HUF_compress4X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, -+ size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, -+ int preferRepeat); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */ -+ -+/** HUF_buildCTable_wksp() : -+ * Same as HUF_buildCTable(), but using externally allocated scratch buffer. -+ * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned. -+ */ -+size_t HUF_buildCTable_wksp(HUF_CElt *tree, const U32 *count, U32 maxSymbolValue, U32 maxNbBits, void *workSpace, size_t wkspSize); -+ -+/*! HUF_readStats() : -+ Read compact Huffman tree, saved by HUF_writeCTable(). -+ `huffWeight` is destination buffer. -+ @return : size read from `src` , or an error Code . -+ Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ -+size_t HUF_readStats_wksp(BYTE *huffWeight, size_t hwSize, U32 *rankStats, U32 *nbSymbolsPtr, U32 *tableLogPtr, const void *src, size_t srcSize, -+ void *workspace, size_t workspaceSize); -+ -+/** HUF_readCTable() : -+* Loading a CTable saved with HUF_writeCTable() */ -+size_t HUF_readCTable_wksp(HUF_CElt *CTable, unsigned maxSymbolValue, const void *src, size_t srcSize, void *workspace, size_t workspaceSize); -+ -+/* -+HUF_decompress() does the following: -+1. select the decompression algorithm (X2, X4) based on pre-computed heuristics -+2. build Huffman table from save, using HUF_readDTableXn() -+3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable -+*/ -+ -+/** HUF_selectDecoder() : -+* Tells which decoder is likely to decode faster, -+* based on a set of pre-determined metrics. -+* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . -+* Assumption : 0 < cSrcSize < dstSize <= 128 KB */ -+U32 HUF_selectDecoder(size_t dstSize, size_t cSrcSize); -+ -+size_t HUF_readDTableX2_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize); -+size_t HUF_readDTableX4_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize); -+ -+size_t HUF_decompress4X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable); -+size_t HUF_decompress4X2_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable); -+size_t HUF_decompress4X4_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable); -+ -+/* single stream variants */ -+ -+size_t HUF_compress1X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, -+ size_t wkspSize); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */ -+size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable); -+/** HUF_compress1X_repeat() : -+* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. -+* If it uses hufTable it does not modify hufTable or repeat. -+* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. -+* If preferRepeat then the old table will always be used if valid. */ -+size_t HUF_compress1X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, -+ size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, -+ int preferRepeat); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */ -+ -+size_t HUF_decompress1X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize); -+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, -+ size_t workspaceSize); /**< single-symbol decoder */ -+size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, -+ size_t workspaceSize); /**< double-symbols decoder */ -+ -+size_t HUF_decompress1X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, -+ const HUF_DTable *DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ -+size_t HUF_decompress1X2_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable); -+size_t HUF_decompress1X4_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable); -+ -+#endif /* HUF_H_298734234 */ -diff --git a/xen/common/zstd/huf_decompress.c b/xen/common/zstd/huf_decompress.c -new file mode 100644 -index 0000000000..f79603a12f ---- /dev/null -+++ b/xen/common/zstd/huf_decompress.c -@@ -0,0 +1,958 @@ -+/* -+ * Huffman decoder, part of New Generation Entropy library -+ * Copyright (C) 2013-2016, Yann Collet. -+ * -+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are -+ * met: -+ * -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following disclaimer -+ * in the documentation and/or other materials provided with the -+ * distribution. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ * -+ * This program is free software; you can redistribute it and/or modify it under -+ * the terms of the GNU General Public License version 2 as published by the -+ * Free Software Foundation. This program is dual-licensed; you may select -+ * either version 2 of the GNU General Public License ("GPL") or BSD license -+ * ("BSD"). -+ * -+ * You can contact the author at : -+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy -+ */ -+ -+/* ************************************************************** -+* Compiler specifics -+****************************************************************/ -+#define FORCE_INLINE static always_inline -+ -+/* ************************************************************** -+* Dependencies -+****************************************************************/ -+#include "bitstream.h" /* BIT_* */ -+#include "fse.h" /* header compression */ -+#include "huf.h" -+#include /* memcpy, memset */ -+ -+/* ************************************************************** -+* Error Management -+****************************************************************/ -+#define HUF_STATIC_ASSERT(c) \ -+ { \ -+ enum { HUF_static_assert = 1 / (int)(!!(c)) }; \ -+ } /* use only *after* variable declarations */ -+ -+/*-***************************/ -+/* generic DTableDesc */ -+/*-***************************/ -+ -+typedef struct { -+ BYTE maxTableLog; -+ BYTE tableType; -+ BYTE tableLog; -+ BYTE reserved; -+} DTableDesc; -+ -+static DTableDesc INIT HUF_getDTableDesc(const HUF_DTable *table) -+{ -+ DTableDesc dtd; -+ memcpy(&dtd, table, sizeof(dtd)); -+ return dtd; -+} -+ -+/*-***************************/ -+/* single-symbol decoding */ -+/*-***************************/ -+ -+typedef struct { -+ BYTE byte; -+ BYTE nbBits; -+} HUF_DEltX2; /* single-symbol decoding */ -+ -+size_t INIT HUF_readDTableX2_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize) -+{ -+ U32 tableLog = 0; -+ U32 nbSymbols = 0; -+ size_t iSize; -+ void *const dtPtr = DTable + 1; -+ HUF_DEltX2 *const dt = (HUF_DEltX2 *)dtPtr; -+ -+ U32 *rankVal; -+ BYTE *huffWeight; -+ size_t spaceUsed32 = 0; -+ -+ rankVal = (U32 *)workspace + spaceUsed32; -+ spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; -+ huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32); -+ spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; -+ -+ if ((spaceUsed32 << 2) > workspaceSize) -+ return ERROR(tableLog_tooLarge); -+ workspace = (U32 *)workspace + spaceUsed32; -+ workspaceSize -= (spaceUsed32 << 2); -+ -+ HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); -+ /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ -+ -+ iSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize); -+ if (HUF_isError(iSize)) -+ return iSize; -+ -+ /* Table header */ -+ { -+ DTableDesc dtd = HUF_getDTableDesc(DTable); -+ if (tableLog > (U32)(dtd.maxTableLog + 1)) -+ return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ -+ dtd.tableType = 0; -+ dtd.tableLog = (BYTE)tableLog; -+ memcpy(DTable, &dtd, sizeof(dtd)); -+ } -+ -+ /* Calculate starting value for each rank */ -+ { -+ U32 n, nextRankStart = 0; -+ for (n = 1; n < tableLog + 1; n++) { -+ U32 const curr = nextRankStart; -+ nextRankStart += (rankVal[n] << (n - 1)); -+ rankVal[n] = curr; -+ } -+ } -+ -+ /* fill DTable */ -+ { -+ U32 n; -+ for (n = 0; n < nbSymbols; n++) { -+ U32 const w = huffWeight[n]; -+ U32 const length = (1 << w) >> 1; -+ U32 u; -+ HUF_DEltX2 D; -+ D.byte = (BYTE)n; -+ D.nbBits = (BYTE)(tableLog + 1 - w); -+ for (u = rankVal[w]; u < rankVal[w] + length; u++) -+ dt[u] = D; -+ rankVal[w] += length; -+ } -+ } -+ -+ return iSize; -+} -+ -+static BYTE INIT HUF_decodeSymbolX2(BIT_DStream_t *Dstream, const HUF_DEltX2 *dt, const U32 dtLog) -+{ -+ size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ -+ BYTE const c = dt[val].byte; -+ BIT_skipBits(Dstream, dt[val].nbBits); -+ return c; -+} -+ -+#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) -+ -+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ -+ if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \ -+ HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) -+ -+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ -+ if (ZSTD_64bits()) \ -+ HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) -+ -+FORCE_INLINE size_t HUF_decodeStreamX2(BYTE *p, BIT_DStream_t *const bitDPtr, BYTE *const pEnd, const HUF_DEltX2 *const dt, const U32 dtLog) -+{ -+ BYTE *const pStart = p; -+ -+ /* up to 4 symbols at a time */ -+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd - 4)) { -+ HUF_DECODE_SYMBOLX2_2(p, bitDPtr); -+ HUF_DECODE_SYMBOLX2_1(p, bitDPtr); -+ HUF_DECODE_SYMBOLX2_2(p, bitDPtr); -+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr); -+ } -+ -+ /* closer to the end */ -+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd)) -+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr); -+ -+ /* no more data to retrieve from bitstream, hence no need to reload */ -+ while (p < pEnd) -+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr); -+ -+ return pEnd - pStart; -+} -+ -+static size_t INIT HUF_decompress1X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -+{ -+ BYTE *op = (BYTE *)dst; -+ BYTE *const oend = op + dstSize; -+ const void *dtPtr = DTable + 1; -+ const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr; -+ BIT_DStream_t bitD; -+ DTableDesc const dtd = HUF_getDTableDesc(DTable); -+ U32 const dtLog = dtd.tableLog; -+ -+ { -+ size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); -+ if (HUF_isError(errorCode)) -+ return errorCode; -+ } -+ -+ HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog); -+ -+ /* check */ -+ if (!BIT_endOfDStream(&bitD)) -+ return ERROR(corruption_detected); -+ -+ return dstSize; -+} -+ -+size_t INIT HUF_decompress1X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -+{ -+ DTableDesc dtd = HUF_getDTableDesc(DTable); -+ if (dtd.tableType != 0) -+ return ERROR(GENERIC); -+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); -+} -+ -+size_t INIT HUF_decompress1X2_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) -+{ -+ const BYTE *ip = (const BYTE *)cSrc; -+ -+ size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize); -+ if (HUF_isError(hSize)) -+ return hSize; -+ if (hSize >= cSrcSize) -+ return ERROR(srcSize_wrong); -+ ip += hSize; -+ cSrcSize -= hSize; -+ -+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx); -+} -+ -+static size_t INIT HUF_decompress4X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -+{ -+ /* Check */ -+ if (cSrcSize < 10) -+ return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ -+ -+ { -+ const BYTE *const istart = (const BYTE *)cSrc; -+ BYTE *const ostart = (BYTE *)dst; -+ BYTE *const oend = ostart + dstSize; -+ const void *const dtPtr = DTable + 1; -+ const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr; -+ -+ /* Init */ -+ BIT_DStream_t bitD1; -+ BIT_DStream_t bitD2; -+ BIT_DStream_t bitD3; -+ BIT_DStream_t bitD4; -+ size_t const length1 = ZSTD_readLE16(istart); -+ size_t const length2 = ZSTD_readLE16(istart + 2); -+ size_t const length3 = ZSTD_readLE16(istart + 4); -+ size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); -+ const BYTE *const istart1 = istart + 6; /* jumpTable */ -+ const BYTE *const istart2 = istart1 + length1; -+ const BYTE *const istart3 = istart2 + length2; -+ const BYTE *const istart4 = istart3 + length3; -+ const size_t segmentSize = (dstSize + 3) / 4; -+ BYTE *const opStart2 = ostart + segmentSize; -+ BYTE *const opStart3 = opStart2 + segmentSize; -+ BYTE *const opStart4 = opStart3 + segmentSize; -+ BYTE *op1 = ostart; -+ BYTE *op2 = opStart2; -+ BYTE *op3 = opStart3; -+ BYTE *op4 = opStart4; -+ U32 endSignal; -+ DTableDesc const dtd = HUF_getDTableDesc(DTable); -+ U32 const dtLog = dtd.tableLog; -+ -+ if (length4 > cSrcSize) -+ return ERROR(corruption_detected); /* overflow */ -+ { -+ size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1); -+ if (HUF_isError(errorCode)) -+ return errorCode; -+ } -+ { -+ size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2); -+ if (HUF_isError(errorCode)) -+ return errorCode; -+ } -+ { -+ size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3); -+ if (HUF_isError(errorCode)) -+ return errorCode; -+ } -+ { -+ size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4); -+ if (HUF_isError(errorCode)) -+ return errorCode; -+ } -+ -+ /* 16-32 symbols per loop (4-8 symbols per stream) */ -+ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); -+ for (; (endSignal == BIT_DStream_unfinished) && (op4 < (oend - 7));) { -+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1); -+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2); -+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3); -+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4); -+ HUF_DECODE_SYMBOLX2_1(op1, &bitD1); -+ HUF_DECODE_SYMBOLX2_1(op2, &bitD2); -+ HUF_DECODE_SYMBOLX2_1(op3, &bitD3); -+ HUF_DECODE_SYMBOLX2_1(op4, &bitD4); -+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1); -+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2); -+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3); -+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4); -+ HUF_DECODE_SYMBOLX2_0(op1, &bitD1); -+ HUF_DECODE_SYMBOLX2_0(op2, &bitD2); -+ HUF_DECODE_SYMBOLX2_0(op3, &bitD3); -+ HUF_DECODE_SYMBOLX2_0(op4, &bitD4); -+ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); -+ } -+ -+ /* check corruption */ -+ if (op1 > opStart2) -+ return ERROR(corruption_detected); -+ if (op2 > opStart3) -+ return ERROR(corruption_detected); -+ if (op3 > opStart4) -+ return ERROR(corruption_detected); -+ /* note : op4 supposed already verified within main loop */ -+ -+ /* finish bitStreams one by one */ -+ HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); -+ HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); -+ HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); -+ HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); -+ -+ /* check */ -+ endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); -+ if (!endSignal) -+ return ERROR(corruption_detected); -+ -+ /* decoded size */ -+ return dstSize; -+ } -+} -+ -+size_t INIT HUF_decompress4X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -+{ -+ DTableDesc dtd = HUF_getDTableDesc(DTable); -+ if (dtd.tableType != 0) -+ return ERROR(GENERIC); -+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); -+} -+ -+size_t INIT HUF_decompress4X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) -+{ -+ const BYTE *ip = (const BYTE *)cSrc; -+ -+ size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize); -+ if (HUF_isError(hSize)) -+ return hSize; -+ if (hSize >= cSrcSize) -+ return ERROR(srcSize_wrong); -+ ip += hSize; -+ cSrcSize -= hSize; -+ -+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx); -+} -+ -+/* *************************/ -+/* double-symbols decoding */ -+/* *************************/ -+typedef struct { -+ U16 sequence; -+ BYTE nbBits; -+ BYTE length; -+} HUF_DEltX4; /* double-symbols decoding */ -+ -+typedef struct { -+ BYTE symbol; -+ BYTE weight; -+} sortedSymbol_t; -+ -+/* HUF_fillDTableX4Level2() : -+ * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ -+static void INIT HUF_fillDTableX4Level2(HUF_DEltX4 *DTable, U32 sizeLog, const U32 consumed, const U32 *rankValOrigin, const int minWeight, -+ const sortedSymbol_t *sortedSymbols, const U32 sortedListSize, U32 nbBitsBaseline, U16 baseSeq) -+{ -+ HUF_DEltX4 DElt; -+ U32 rankVal[HUF_TABLELOG_MAX + 1]; -+ -+ /* get pre-calculated rankVal */ -+ memcpy(rankVal, rankValOrigin, sizeof(rankVal)); -+ -+ /* fill skipped values */ -+ if (minWeight > 1) { -+ U32 i, skipSize = rankVal[minWeight]; -+ ZSTD_writeLE16(&(DElt.sequence), baseSeq); -+ DElt.nbBits = (BYTE)(consumed); -+ DElt.length = 1; -+ for (i = 0; i < skipSize; i++) -+ DTable[i] = DElt; -+ } -+ -+ /* fill DTable */ -+ { -+ U32 s; -+ for (s = 0; s < sortedListSize; s++) { /* note : sortedSymbols already skipped */ -+ const U32 symbol = sortedSymbols[s].symbol; -+ const U32 weight = sortedSymbols[s].weight; -+ const U32 nbBits = nbBitsBaseline - weight; -+ const U32 length = 1 << (sizeLog - nbBits); -+ const U32 start = rankVal[weight]; -+ U32 i = start; -+ const U32 end = start + length; -+ -+ ZSTD_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8))); -+ DElt.nbBits = (BYTE)(nbBits + consumed); -+ DElt.length = 2; -+ do { -+ DTable[i++] = DElt; -+ } while (i < end); /* since length >= 1 */ -+ -+ rankVal[weight] += length; -+ } -+ } -+} -+ -+typedef U32 rankVal_t[HUF_TABLELOG_MAX][HUF_TABLELOG_MAX + 1]; -+typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; -+ -+static void INIT HUF_fillDTableX4(HUF_DEltX4 *DTable, const U32 targetLog, const sortedSymbol_t *sortedList, const U32 sortedListSize, const U32 *rankStart, -+ rankVal_t rankValOrigin, const U32 maxWeight, const U32 nbBitsBaseline) -+{ -+ U32 rankVal[HUF_TABLELOG_MAX + 1]; -+ const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ -+ const U32 minBits = nbBitsBaseline - maxWeight; -+ U32 s; -+ -+ memcpy(rankVal, rankValOrigin, sizeof(rankVal)); -+ -+ /* fill DTable */ -+ for (s = 0; s < sortedListSize; s++) { -+ const U16 symbol = sortedList[s].symbol; -+ const U32 weight = sortedList[s].weight; -+ const U32 nbBits = nbBitsBaseline - weight; -+ const U32 start = rankVal[weight]; -+ const U32 length = 1 << (targetLog - nbBits); -+ -+ if (targetLog - nbBits >= minBits) { /* enough room for a second symbol */ -+ U32 sortedRank; -+ int minWeight = nbBits + scaleLog; -+ if (minWeight < 1) -+ minWeight = 1; -+ sortedRank = rankStart[minWeight]; -+ HUF_fillDTableX4Level2(DTable + start, targetLog - nbBits, nbBits, rankValOrigin[nbBits], minWeight, sortedList + sortedRank, -+ sortedListSize - sortedRank, nbBitsBaseline, symbol); -+ } else { -+ HUF_DEltX4 DElt; -+ ZSTD_writeLE16(&(DElt.sequence), symbol); -+ DElt.nbBits = (BYTE)(nbBits); -+ DElt.length = 1; -+ { -+ U32 const end = start + length; -+ U32 u; -+ for (u = start; u < end; u++) -+ DTable[u] = DElt; -+ } -+ } -+ rankVal[weight] += length; -+ } -+} -+ -+size_t INIT HUF_readDTableX4_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize) -+{ -+ U32 tableLog, maxW, sizeOfSort, nbSymbols; -+ DTableDesc dtd = HUF_getDTableDesc(DTable); -+ U32 const maxTableLog = dtd.maxTableLog; -+ size_t iSize; -+ void *dtPtr = DTable + 1; /* force compiler to avoid strict-aliasing */ -+ HUF_DEltX4 *const dt = (HUF_DEltX4 *)dtPtr; -+ U32 *rankStart; -+ -+ rankValCol_t *rankVal; -+ U32 *rankStats; -+ U32 *rankStart0; -+ sortedSymbol_t *sortedSymbol; -+ BYTE *weightList; -+ size_t spaceUsed32 = 0; -+ -+ HUF_STATIC_ASSERT((sizeof(rankValCol_t) & 3) == 0); -+ -+ rankVal = (rankValCol_t *)((U32 *)workspace + spaceUsed32); -+ spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2; -+ rankStats = (U32 *)workspace + spaceUsed32; -+ spaceUsed32 += HUF_TABLELOG_MAX + 1; -+ rankStart0 = (U32 *)workspace + spaceUsed32; -+ spaceUsed32 += HUF_TABLELOG_MAX + 2; -+ sortedSymbol = (sortedSymbol_t *)((U32 *)workspace + spaceUsed32); -+ spaceUsed32 += ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2; -+ weightList = (BYTE *)((U32 *)workspace + spaceUsed32); -+ spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; -+ -+ if ((spaceUsed32 << 2) > workspaceSize) -+ return ERROR(tableLog_tooLarge); -+ workspace = (U32 *)workspace + spaceUsed32; -+ workspaceSize -= (spaceUsed32 << 2); -+ -+ rankStart = rankStart0 + 1; -+ memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); -+ -+ HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ -+ if (maxTableLog > HUF_TABLELOG_MAX) -+ return ERROR(tableLog_tooLarge); -+ /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ -+ -+ iSize = HUF_readStats_wksp(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize); -+ if (HUF_isError(iSize)) -+ return iSize; -+ -+ /* check result */ -+ if (tableLog > maxTableLog) -+ return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ -+ -+ /* find maxWeight */ -+ for (maxW = tableLog; rankStats[maxW] == 0; maxW--) { -+ } /* necessarily finds a solution before 0 */ -+ -+ /* Get start index of each weight */ -+ { -+ U32 w, nextRankStart = 0; -+ for (w = 1; w < maxW + 1; w++) { -+ U32 curr = nextRankStart; -+ nextRankStart += rankStats[w]; -+ rankStart[w] = curr; -+ } -+ rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ -+ sizeOfSort = nextRankStart; -+ } -+ -+ /* sort symbols by weight */ -+ { -+ U32 s; -+ for (s = 0; s < nbSymbols; s++) { -+ U32 const w = weightList[s]; -+ U32 const r = rankStart[w]++; -+ sortedSymbol[r].symbol = (BYTE)s; -+ sortedSymbol[r].weight = (BYTE)w; -+ } -+ rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */ -+ } -+ -+ /* Build rankVal */ -+ { -+ U32 *const rankVal0 = rankVal[0]; -+ { -+ int const rescale = (maxTableLog - tableLog) - 1; /* tableLog <= maxTableLog */ -+ U32 nextRankVal = 0; -+ U32 w; -+ for (w = 1; w < maxW + 1; w++) { -+ U32 curr = nextRankVal; -+ nextRankVal += rankStats[w] << (w + rescale); -+ rankVal0[w] = curr; -+ } -+ } -+ { -+ U32 const minBits = tableLog + 1 - maxW; -+ U32 consumed; -+ for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) { -+ U32 *const rankValPtr = rankVal[consumed]; -+ U32 w; -+ for (w = 1; w < maxW + 1; w++) { -+ rankValPtr[w] = rankVal0[w] >> consumed; -+ } -+ } -+ } -+ } -+ -+ HUF_fillDTableX4(dt, maxTableLog, sortedSymbol, sizeOfSort, rankStart0, rankVal, maxW, tableLog + 1); -+ -+ dtd.tableLog = (BYTE)maxTableLog; -+ dtd.tableType = 1; -+ memcpy(DTable, &dtd, sizeof(dtd)); -+ return iSize; -+} -+ -+static U32 INIT HUF_decodeSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog) -+{ -+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ -+ memcpy(op, dt + val, 2); -+ BIT_skipBits(DStream, dt[val].nbBits); -+ return dt[val].length; -+} -+ -+static U32 INIT HUF_decodeLastSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog) -+{ -+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ -+ memcpy(op, dt + val, 1); -+ if (dt[val].length == 1) -+ BIT_skipBits(DStream, dt[val].nbBits); -+ else { -+ if (DStream->bitsConsumed < (sizeof(DStream->bitContainer) * 8)) { -+ BIT_skipBits(DStream, dt[val].nbBits); -+ if (DStream->bitsConsumed > (sizeof(DStream->bitContainer) * 8)) -+ /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ -+ DStream->bitsConsumed = (sizeof(DStream->bitContainer) * 8); -+ } -+ } -+ return 1; -+} -+ -+#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) -+ -+#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ -+ if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \ -+ ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) -+ -+#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ -+ if (ZSTD_64bits()) \ -+ ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) -+ -+FORCE_INLINE size_t HUF_decodeStreamX4(BYTE *p, BIT_DStream_t *bitDPtr, BYTE *const pEnd, const HUF_DEltX4 *const dt, const U32 dtLog) -+{ -+ BYTE *const pStart = p; -+ -+ /* up to 8 symbols at a time */ -+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd - (sizeof(bitDPtr->bitContainer) - 1))) { -+ HUF_DECODE_SYMBOLX4_2(p, bitDPtr); -+ HUF_DECODE_SYMBOLX4_1(p, bitDPtr); -+ HUF_DECODE_SYMBOLX4_2(p, bitDPtr); -+ HUF_DECODE_SYMBOLX4_0(p, bitDPtr); -+ } -+ -+ /* closer to end : up to 2 symbols at a time */ -+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd - 2)) -+ HUF_DECODE_SYMBOLX4_0(p, bitDPtr); -+ -+ while (p <= pEnd - 2) -+ HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ -+ -+ if (p < pEnd) -+ p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog); -+ -+ return p - pStart; -+} -+ -+static size_t INIT HUF_decompress1X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -+{ -+ BIT_DStream_t bitD; -+ -+ /* Init */ -+ { -+ size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); -+ if (HUF_isError(errorCode)) -+ return errorCode; -+ } -+ -+ /* decode */ -+ { -+ BYTE *const ostart = (BYTE *)dst; -+ BYTE *const oend = ostart + dstSize; -+ const void *const dtPtr = DTable + 1; /* force compiler to not use strict-aliasing */ -+ const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr; -+ DTableDesc const dtd = HUF_getDTableDesc(DTable); -+ HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog); -+ } -+ -+ /* check */ -+ if (!BIT_endOfDStream(&bitD)) -+ return ERROR(corruption_detected); -+ -+ /* decoded size */ -+ return dstSize; -+} -+ -+size_t INIT HUF_decompress1X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -+{ -+ DTableDesc dtd = HUF_getDTableDesc(DTable); -+ if (dtd.tableType != 1) -+ return ERROR(GENERIC); -+ return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); -+} -+ -+size_t INIT HUF_decompress1X4_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) -+{ -+ const BYTE *ip = (const BYTE *)cSrc; -+ -+ size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize); -+ if (HUF_isError(hSize)) -+ return hSize; -+ if (hSize >= cSrcSize) -+ return ERROR(srcSize_wrong); -+ ip += hSize; -+ cSrcSize -= hSize; -+ -+ return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx); -+} -+ -+static size_t INIT HUF_decompress4X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -+{ -+ if (cSrcSize < 10) -+ return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ -+ -+ { -+ const BYTE *const istart = (const BYTE *)cSrc; -+ BYTE *const ostart = (BYTE *)dst; -+ BYTE *const oend = ostart + dstSize; -+ const void *const dtPtr = DTable + 1; -+ const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr; -+ -+ /* Init */ -+ BIT_DStream_t bitD1; -+ BIT_DStream_t bitD2; -+ BIT_DStream_t bitD3; -+ BIT_DStream_t bitD4; -+ size_t const length1 = ZSTD_readLE16(istart); -+ size_t const length2 = ZSTD_readLE16(istart + 2); -+ size_t const length3 = ZSTD_readLE16(istart + 4); -+ size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); -+ const BYTE *const istart1 = istart + 6; /* jumpTable */ -+ const BYTE *const istart2 = istart1 + length1; -+ const BYTE *const istart3 = istart2 + length2; -+ const BYTE *const istart4 = istart3 + length3; -+ size_t const segmentSize = (dstSize + 3) / 4; -+ BYTE *const opStart2 = ostart + segmentSize; -+ BYTE *const opStart3 = opStart2 + segmentSize; -+ BYTE *const opStart4 = opStart3 + segmentSize; -+ BYTE *op1 = ostart; -+ BYTE *op2 = opStart2; -+ BYTE *op3 = opStart3; -+ BYTE *op4 = opStart4; -+ U32 endSignal; -+ DTableDesc const dtd = HUF_getDTableDesc(DTable); -+ U32 const dtLog = dtd.tableLog; -+ -+ if (length4 > cSrcSize) -+ return ERROR(corruption_detected); /* overflow */ -+ { -+ size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1); -+ if (HUF_isError(errorCode)) -+ return errorCode; -+ } -+ { -+ size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2); -+ if (HUF_isError(errorCode)) -+ return errorCode; -+ } -+ { -+ size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3); -+ if (HUF_isError(errorCode)) -+ return errorCode; -+ } -+ { -+ size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4); -+ if (HUF_isError(errorCode)) -+ return errorCode; -+ } -+ -+ /* 16-32 symbols per loop (4-8 symbols per stream) */ -+ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); -+ for (; (endSignal == BIT_DStream_unfinished) & (op4 < (oend - (sizeof(bitD4.bitContainer) - 1)));) { -+ HUF_DECODE_SYMBOLX4_2(op1, &bitD1); -+ HUF_DECODE_SYMBOLX4_2(op2, &bitD2); -+ HUF_DECODE_SYMBOLX4_2(op3, &bitD3); -+ HUF_DECODE_SYMBOLX4_2(op4, &bitD4); -+ HUF_DECODE_SYMBOLX4_1(op1, &bitD1); -+ HUF_DECODE_SYMBOLX4_1(op2, &bitD2); -+ HUF_DECODE_SYMBOLX4_1(op3, &bitD3); -+ HUF_DECODE_SYMBOLX4_1(op4, &bitD4); -+ HUF_DECODE_SYMBOLX4_2(op1, &bitD1); -+ HUF_DECODE_SYMBOLX4_2(op2, &bitD2); -+ HUF_DECODE_SYMBOLX4_2(op3, &bitD3); -+ HUF_DECODE_SYMBOLX4_2(op4, &bitD4); -+ HUF_DECODE_SYMBOLX4_0(op1, &bitD1); -+ HUF_DECODE_SYMBOLX4_0(op2, &bitD2); -+ HUF_DECODE_SYMBOLX4_0(op3, &bitD3); -+ HUF_DECODE_SYMBOLX4_0(op4, &bitD4); -+ -+ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); -+ } -+ -+ /* check corruption */ -+ if (op1 > opStart2) -+ return ERROR(corruption_detected); -+ if (op2 > opStart3) -+ return ERROR(corruption_detected); -+ if (op3 > opStart4) -+ return ERROR(corruption_detected); -+ /* note : op4 already verified within main loop */ -+ -+ /* finish bitStreams one by one */ -+ HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog); -+ HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog); -+ HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog); -+ HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog); -+ -+ /* check */ -+ { -+ U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); -+ if (!endCheck) -+ return ERROR(corruption_detected); -+ } -+ -+ /* decoded size */ -+ return dstSize; -+ } -+} -+ -+size_t INIT HUF_decompress4X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -+{ -+ DTableDesc dtd = HUF_getDTableDesc(DTable); -+ if (dtd.tableType != 1) -+ return ERROR(GENERIC); -+ return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); -+} -+ -+size_t INIT HUF_decompress4X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) -+{ -+ const BYTE *ip = (const BYTE *)cSrc; -+ -+ size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize); -+ if (HUF_isError(hSize)) -+ return hSize; -+ if (hSize >= cSrcSize) -+ return ERROR(srcSize_wrong); -+ ip += hSize; -+ cSrcSize -= hSize; -+ -+ return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx); -+} -+ -+/* ********************************/ -+/* Generic decompression selector */ -+/* ********************************/ -+ -+size_t INIT HUF_decompress1X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -+{ -+ DTableDesc const dtd = HUF_getDTableDesc(DTable); -+ return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) -+ : HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable); -+} -+ -+size_t INIT HUF_decompress4X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -+{ -+ DTableDesc const dtd = HUF_getDTableDesc(DTable); -+ return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) -+ : HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable); -+} -+ -+typedef struct { -+ U32 tableTime; -+ U32 decode256Time; -+} algo_time_t; -+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = { -+ /* single, double, quad */ -+ {{0, 0}, {1, 1}, {2, 2}}, /* Q==0 : impossible */ -+ {{0, 0}, {1, 1}, {2, 2}}, /* Q==1 : impossible */ -+ {{38, 130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ -+ {{448, 128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ -+ {{556, 128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ -+ {{714, 128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ -+ {{883, 128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ -+ {{897, 128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ -+ {{926, 128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ -+ {{947, 128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ -+ {{1107, 128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ -+ {{1177, 128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ -+ {{1242, 128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ -+ {{1349, 128}, {2644, 106}, {5260, 106}}, /* Q ==13 : 81-87% */ -+ {{1455, 128}, {2422, 124}, {4174, 124}}, /* Q ==14 : 87-93% */ -+ {{722, 128}, {1891, 145}, {1936, 146}}, /* Q ==15 : 93-99% */ -+}; -+ -+/** HUF_selectDecoder() : -+* Tells which decoder is likely to decode faster, -+* based on a set of pre-determined metrics. -+* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . -+* Assumption : 0 < cSrcSize < dstSize <= 128 KB */ -+U32 INIT HUF_selectDecoder(size_t dstSize, size_t cSrcSize) -+{ -+ /* decoder timing evaluation */ -+ U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */ -+ U32 const D256 = (U32)(dstSize >> 8); -+ U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); -+ U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); -+ DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */ -+ -+ return DTime1 < DTime0; -+} -+ -+typedef size_t (*decompressionAlgo)(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize); -+ -+size_t INIT HUF_decompress4X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) -+{ -+ /* validation checks */ -+ if (dstSize == 0) -+ return ERROR(dstSize_tooSmall); -+ if (cSrcSize > dstSize) -+ return ERROR(corruption_detected); /* invalid */ -+ if (cSrcSize == dstSize) { -+ memcpy(dst, cSrc, dstSize); -+ return dstSize; -+ } /* not compressed */ -+ if (cSrcSize == 1) { -+ memset(dst, *(const BYTE *)cSrc, dstSize); -+ return dstSize; -+ } /* RLE */ -+ -+ { -+ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -+ return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize) -+ : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize); -+ } -+} -+ -+size_t INIT HUF_decompress4X_hufOnly_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) -+{ -+ /* validation checks */ -+ if (dstSize == 0) -+ return ERROR(dstSize_tooSmall); -+ if ((cSrcSize >= dstSize) || (cSrcSize <= 1)) -+ return ERROR(corruption_detected); /* invalid */ -+ -+ { -+ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -+ return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize) -+ : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize); -+ } -+} -+ -+size_t INIT HUF_decompress1X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) -+{ -+ /* validation checks */ -+ if (dstSize == 0) -+ return ERROR(dstSize_tooSmall); -+ if (cSrcSize > dstSize) -+ return ERROR(corruption_detected); /* invalid */ -+ if (cSrcSize == dstSize) { -+ memcpy(dst, cSrc, dstSize); -+ return dstSize; -+ } /* not compressed */ -+ if (cSrcSize == 1) { -+ memset(dst, *(const BYTE *)cSrc, dstSize); -+ return dstSize; -+ } /* RLE */ -+ -+ { -+ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -+ return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize) -+ : HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize); -+ } -+} -diff --git a/xen/common/zstd/mem.h b/xen/common/zstd/mem.h -new file mode 100644 -index 0000000000..d2fa444687 ---- /dev/null -+++ b/xen/common/zstd/mem.h -@@ -0,0 +1,151 @@ -+/** -+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. -+ * All rights reserved. -+ * -+ * This source code is licensed under the BSD-style license found in the -+ * LICENSE file in the root directory of https://github.com/facebook/zstd. -+ * An additional grant of patent rights can be found in the PATENTS file in the -+ * same directory. -+ * -+ * This program is free software; you can redistribute it and/or modify it under -+ * the terms of the GNU General Public License version 2 as published by the -+ * Free Software Foundation. This program is dual-licensed; you may select -+ * either version 2 of the GNU General Public License ("GPL") or BSD license -+ * ("BSD"). -+ */ -+ -+#ifndef MEM_H_MODULE -+#define MEM_H_MODULE -+ -+/*-**************************************** -+* Dependencies -+******************************************/ -+#include /* memcpy */ -+#include /* size_t, ptrdiff_t */ -+#include "private.h" -+ -+/*-**************************************** -+* Compiler specifics -+******************************************/ -+#define ZSTD_STATIC static inline -+ -+/*-************************************************************** -+* Basic Types -+*****************************************************************/ -+typedef uint8_t BYTE; -+typedef uint16_t U16; -+typedef int16_t S16; -+typedef uint32_t U32; -+typedef int32_t S32; -+typedef uint64_t U64; -+typedef int64_t S64; -+typedef ptrdiff_t iPtrDiff; -+typedef uintptr_t uPtrDiff; -+ -+/*-************************************************************** -+* Memory I/O -+*****************************************************************/ -+ZSTD_STATIC unsigned ZSTD_32bits(void) { return sizeof(size_t) == 4; } -+ZSTD_STATIC unsigned ZSTD_64bits(void) { return sizeof(size_t) == 8; } -+ -+#if defined(__LITTLE_ENDIAN) -+#define ZSTD_LITTLE_ENDIAN 1 -+#else -+#define ZSTD_LITTLE_ENDIAN 0 -+#endif -+ -+ZSTD_STATIC unsigned ZSTD_isLittleEndian(void) { return ZSTD_LITTLE_ENDIAN; } -+ -+ZSTD_STATIC U16 ZSTD_read16(const void *memPtr) { return get_unaligned((const U16 *)memPtr); } -+ -+ZSTD_STATIC U32 ZSTD_read32(const void *memPtr) { return get_unaligned((const U32 *)memPtr); } -+ -+ZSTD_STATIC U64 ZSTD_read64(const void *memPtr) { return get_unaligned((const U64 *)memPtr); } -+ -+ZSTD_STATIC size_t ZSTD_readST(const void *memPtr) { return get_unaligned((const size_t *)memPtr); } -+ -+ZSTD_STATIC void ZSTD_write16(void *memPtr, U16 value) { put_unaligned(value, (U16 *)memPtr); } -+ -+ZSTD_STATIC void ZSTD_write32(void *memPtr, U32 value) { put_unaligned(value, (U32 *)memPtr); } -+ -+ZSTD_STATIC void ZSTD_write64(void *memPtr, U64 value) { put_unaligned(value, (U64 *)memPtr); } -+ -+/*=== Little endian r/w ===*/ -+ -+ZSTD_STATIC U16 ZSTD_readLE16(const void *memPtr) { return get_unaligned_le16(memPtr); } -+ -+ZSTD_STATIC void ZSTD_writeLE16(void *memPtr, U16 val) { put_unaligned_le16(val, memPtr); } -+ -+ZSTD_STATIC U32 ZSTD_readLE24(const void *memPtr) { return ZSTD_readLE16(memPtr) + (((const BYTE *)memPtr)[2] << 16); } -+ -+ZSTD_STATIC void ZSTD_writeLE24(void *memPtr, U32 val) -+{ -+ ZSTD_writeLE16(memPtr, (U16)val); -+ ((BYTE *)memPtr)[2] = (BYTE)(val >> 16); -+} -+ -+ZSTD_STATIC U32 ZSTD_readLE32(const void *memPtr) { return get_unaligned_le32(memPtr); } -+ -+ZSTD_STATIC void ZSTD_writeLE32(void *memPtr, U32 val32) { put_unaligned_le32(val32, memPtr); } -+ -+ZSTD_STATIC U64 ZSTD_readLE64(const void *memPtr) { return get_unaligned_le64(memPtr); } -+ -+ZSTD_STATIC void ZSTD_writeLE64(void *memPtr, U64 val64) { put_unaligned_le64(val64, memPtr); } -+ -+ZSTD_STATIC size_t ZSTD_readLEST(const void *memPtr) -+{ -+ if (ZSTD_32bits()) -+ return (size_t)ZSTD_readLE32(memPtr); -+ else -+ return (size_t)ZSTD_readLE64(memPtr); -+} -+ -+ZSTD_STATIC void ZSTD_writeLEST(void *memPtr, size_t val) -+{ -+ if (ZSTD_32bits()) -+ ZSTD_writeLE32(memPtr, (U32)val); -+ else -+ ZSTD_writeLE64(memPtr, (U64)val); -+} -+ -+/*=== Big endian r/w ===*/ -+ -+ZSTD_STATIC U32 ZSTD_readBE32(const void *memPtr) { return get_unaligned_be32(memPtr); } -+ -+ZSTD_STATIC void ZSTD_writeBE32(void *memPtr, U32 val32) { put_unaligned_be32(val32, memPtr); } -+ -+ZSTD_STATIC U64 ZSTD_readBE64(const void *memPtr) { return get_unaligned_be64(memPtr); } -+ -+ZSTD_STATIC void ZSTD_writeBE64(void *memPtr, U64 val64) { put_unaligned_be64(val64, memPtr); } -+ -+ZSTD_STATIC size_t ZSTD_readBEST(const void *memPtr) -+{ -+ if (ZSTD_32bits()) -+ return (size_t)ZSTD_readBE32(memPtr); -+ else -+ return (size_t)ZSTD_readBE64(memPtr); -+} -+ -+ZSTD_STATIC void ZSTD_writeBEST(void *memPtr, size_t val) -+{ -+ if (ZSTD_32bits()) -+ ZSTD_writeBE32(memPtr, (U32)val); -+ else -+ ZSTD_writeBE64(memPtr, (U64)val); -+} -+ -+/* function safe only for comparisons */ -+ZSTD_STATIC U32 ZSTD_readMINMATCH(const void *memPtr, U32 length) -+{ -+ switch (length) { -+ default: -+ case 4: return ZSTD_read32(memPtr); -+ case 3: -+ if (ZSTD_isLittleEndian()) -+ return ZSTD_read32(memPtr) << 8; -+ else -+ return ZSTD_read32(memPtr) >> 8; -+ } -+} -+ -+#endif /* MEM_H_MODULE */ -diff --git a/xen/common/zstd/private.h b/xen/common/zstd/private.h -new file mode 100644 -index 0000000000..fac4d3c095 ---- /dev/null -+++ b/xen/common/zstd/private.h -@@ -0,0 +1,105 @@ -+#ifndef ZSTD_PRIVATE_H -+#define ZSTD_PRIVATE_H -+ -+#include -+#include -+#include -+ -+typedef ssize_t __attribute__((__mode__(__pointer__))) ptrdiff_t; -+ -+/* from kernel include/linux/unaligned/access_ok.h */ -+ -+static always_inline u16 get_unaligned_le16(const void *p) -+{ -+ return le16_to_cpup((__le16 *)p); -+} -+ -+static always_inline u32 get_unaligned_le32(const void *p) -+{ -+ return le32_to_cpup((__le32 *)p); -+} -+ -+static always_inline u64 get_unaligned_le64(const void *p) -+{ -+ return le64_to_cpup((__le64 *)p); -+} -+ -+static always_inline u32 get_unaligned_be32(const void *p) -+{ -+ return be32_to_cpup((__be32 *)p); -+} -+ -+static always_inline u64 get_unaligned_be64(const void *p) -+{ -+ return be64_to_cpup((__be64 *)p); -+} -+ -+static always_inline void put_unaligned_le16(u16 val, void *p) -+{ -+ *((__le16 *)p) = cpu_to_le16(val); -+} -+ -+static always_inline void put_unaligned_le32(u32 val, void *p) -+{ -+ *((__le32 *)p) = cpu_to_le32(val); -+} -+ -+static always_inline void put_unaligned_le64(u64 val, void *p) -+{ -+ *((__le64 *)p) = cpu_to_le64(val); -+} -+ -+static always_inline void put_unaligned_be32(u32 val, void *p) -+{ -+ *((__be32 *)p) = cpu_to_be32(val); -+} -+ -+static always_inline void put_unaligned_be64(u64 val, void *p) -+{ -+ *((__be64 *)p) = cpu_to_be64(val); -+} -+ -+ -+/* from kernel include/asm-generic/unaligned.h with linux/unaligned/generic.h -+ assuming little endian */ -+ -+extern void __bad_unaligned_access_size(void); -+ -+#define get_unaligned(ptr) ((__force typeof(*(ptr)))({ \ -+ __builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr), \ -+ __builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_le16((ptr)), \ -+ __builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_le32((ptr)), \ -+ __builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_le64((ptr)), \ -+ __bad_unaligned_access_size())))); \ -+ })) -+ -+#define put_unaligned(val, ptr) ({ \ -+ void *__gu_p = (ptr); \ -+ switch (sizeof(*(ptr))) { \ -+ case 1: \ -+ *(u8 *)__gu_p = (__force u8)(val); \ -+ break; \ -+ case 2: \ -+ put_unaligned_le16((__force u16)(val), __gu_p); \ -+ break; \ -+ case 4: \ -+ put_unaligned_le32((__force u32)(val), __gu_p); \ -+ break; \ -+ case 8: \ -+ put_unaligned_le64((__force u64)(val), __gu_p); \ -+ break; \ -+ default: \ -+ __bad_unaligned_access_size(); \ -+ break; \ -+ } \ -+ (void)0; }) -+ -+ -+/* from kernel linux/kernel.h and uapi/linux/kernel.h */ -+ -+#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) -+#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) -+#define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) -+#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) -+ -+#endif /* ZSTD_PRIVATE_H */ -diff --git a/xen/common/zstd/zstd_common.c b/xen/common/zstd/zstd_common.c -new file mode 100644 -index 0000000000..1b13903538 ---- /dev/null -+++ b/xen/common/zstd/zstd_common.c -@@ -0,0 +1,74 @@ -+/** -+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. -+ * All rights reserved. -+ * -+ * This source code is licensed under the BSD-style license found in the -+ * LICENSE file in the root directory of https://github.com/facebook/zstd. -+ * An additional grant of patent rights can be found in the PATENTS file in the -+ * same directory. -+ * -+ * This program is free software; you can redistribute it and/or modify it under -+ * the terms of the GNU General Public License version 2 as published by the -+ * Free Software Foundation. This program is dual-licensed; you may select -+ * either version 2 of the GNU General Public License ("GPL") or BSD license -+ * ("BSD"). -+ */ -+ -+/*-************************************* -+* Dependencies -+***************************************/ -+#include "error_private.h" -+#include "zstd_internal.h" /* declaration of ZSTD_isError, ZSTD_getErrorName, ZSTD_getErrorCode, ZSTD_getErrorString, ZSTD_versionNumber */ -+ -+/*=************************************************************** -+* Custom allocator -+****************************************************************/ -+ -+#define stack_push(stack, size) \ -+ ({ \ -+ void *const ptr = ZSTD_PTR_ALIGN((stack)->ptr); \ -+ (stack)->ptr = (char *)ptr + (size); \ -+ (stack)->ptr <= (stack)->end ? ptr : NULL; \ -+ }) -+ -+ZSTD_customMem INIT ZSTD_initStack(void *workspace, size_t workspaceSize) -+{ -+ ZSTD_customMem stackMem = {ZSTD_stackAlloc, ZSTD_stackFree, workspace}; -+ ZSTD_stack *stack = (ZSTD_stack *)workspace; -+ /* Verify preconditions */ -+ if (!workspace || workspaceSize < sizeof(ZSTD_stack) || workspace != ZSTD_PTR_ALIGN(workspace)) { -+ ZSTD_customMem error = {NULL, NULL, NULL}; -+ return error; -+ } -+ /* Initialize the stack */ -+ stack->ptr = workspace; -+ stack->end = (char *)workspace + workspaceSize; -+ stack_push(stack, sizeof(ZSTD_stack)); -+ return stackMem; -+} -+ -+void INIT *ZSTD_stackAllocAll(void *opaque, size_t *size) -+{ -+ ZSTD_stack *stack = (ZSTD_stack *)opaque; -+ *size = (BYTE const *)stack->end - (BYTE *)ZSTD_PTR_ALIGN(stack->ptr); -+ return stack_push(stack, *size); -+} -+ -+void INIT *ZSTD_stackAlloc(void *opaque, size_t size) -+{ -+ ZSTD_stack *stack = (ZSTD_stack *)opaque; -+ return stack_push(stack, size); -+} -+void INIT ZSTD_stackFree(void *opaque, void *address) -+{ -+ (void)opaque; -+ (void)address; -+} -+ -+void INIT *ZSTD_malloc(size_t size, ZSTD_customMem customMem) { return customMem.customAlloc(customMem.opaque, size); } -+ -+void INIT ZSTD_free(void *ptr, ZSTD_customMem customMem) -+{ -+ if (ptr != NULL) -+ customMem.customFree(customMem.opaque, ptr); -+} -diff --git a/xen/common/zstd/zstd_internal.h b/xen/common/zstd/zstd_internal.h -new file mode 100644 -index 0000000000..1b13840c44 ---- /dev/null -+++ b/xen/common/zstd/zstd_internal.h -@@ -0,0 +1,265 @@ -+/** -+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. -+ * All rights reserved. -+ * -+ * This source code is licensed under the BSD-style license found in the -+ * LICENSE file in the root directory of https://github.com/facebook/zstd. -+ * An additional grant of patent rights can be found in the PATENTS file in the -+ * same directory. -+ * -+ * This program is free software; you can redistribute it and/or modify it under -+ * the terms of the GNU General Public License version 2 as published by the -+ * Free Software Foundation. This program is dual-licensed; you may select -+ * either version 2 of the GNU General Public License ("GPL") or BSD license -+ * ("BSD"). -+ */ -+ -+#ifndef ZSTD_CCOMMON_H_MODULE -+#define ZSTD_CCOMMON_H_MODULE -+ -+/*-******************************************************* -+* Compiler specifics -+*********************************************************/ -+#define FORCE_INLINE static always_inline -+#define FORCE_NOINLINE static noinline -+ -+/*-************************************* -+* Dependencies -+***************************************/ -+#include "error_private.h" -+#include "mem.h" -+#include -+#include -+ -+/*-************************************* -+* shared macros -+***************************************/ -+#define CHECK_F(f) \ -+ { \ -+ size_t const errcod = f; \ -+ if (ERR_isError(errcod)) \ -+ return errcod; \ -+ } /* check and Forward error code */ -+#define CHECK_E(f, e) \ -+ { \ -+ size_t const errcod = f; \ -+ if (ERR_isError(errcod)) \ -+ return ERROR(e); \ -+ } /* check and send Error code */ -+#define ZSTD_STATIC_ASSERT(c) \ -+ { \ -+ enum { ZSTD_static_assert = 1 / (int)(!!(c)) }; \ -+ } -+ -+/*-************************************* -+* Common constants -+***************************************/ -+#define ZSTD_OPT_NUM (1 << 12) -+#define ZSTD_DICT_MAGIC 0xEC30A437 /* v0.7+ */ -+ -+#define ZSTD_REP_NUM 3 /* number of repcodes */ -+#define ZSTD_REP_CHECK (ZSTD_REP_NUM) /* number of repcodes to check by the optimal parser */ -+#define ZSTD_REP_MOVE (ZSTD_REP_NUM - 1) -+#define ZSTD_REP_MOVE_OPT (ZSTD_REP_NUM) -+static const U32 repStartValue[ZSTD_REP_NUM] = {1, 4, 8}; -+ -+#define BIT7 128 -+#define BIT6 64 -+#define BIT5 32 -+#define BIT4 16 -+#define BIT1 2 -+#define BIT0 1 -+ -+#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 -+static const size_t ZSTD_fcs_fieldSize[4] = {0, 2, 4, 8}; -+static const size_t ZSTD_did_fieldSize[4] = {0, 1, 2, 4}; -+ -+#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ -+static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; -+typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; -+ -+#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ -+#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ -+ -+#define HufLog 12 -+typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; -+ -+#define LONGNBSEQ 0x7F00 -+ -+#define MINMATCH 3 -+#define EQUAL_READ32 4 -+ -+#define Litbits 8 -+#define MaxLit ((1 << Litbits) - 1) -+#define MaxML 52 -+#define MaxLL 35 -+#define MaxOff 28 -+#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */ -+#define MLFSELog 9 -+#define LLFSELog 9 -+#define OffFSELog 8 -+ -+static const U32 LL_bits[MaxLL + 1] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+static const S16 LL_defaultNorm[MaxLL + 1] = {4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, -1, -1, -1, -1}; -+#define LL_DEFAULTNORMLOG 6 /* for static allocation */ -+static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG; -+ -+static const U32 ML_bits[MaxML + 1] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -+ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+static const S16 ML_defaultNorm[MaxML + 1] = {1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1}; -+#define ML_DEFAULTNORMLOG 6 /* for static allocation */ -+static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG; -+ -+static const S16 OF_defaultNorm[MaxOff + 1] = {1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1}; -+#define OF_DEFAULTNORMLOG 5 /* for static allocation */ -+static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; -+ -+/*-******************************************* -+* Shared functions to include for inlining -+*********************************************/ -+ZSTD_STATIC void ZSTD_copy8(void *dst, const void *src) { -+ /* -+ * zstd relies heavily on gcc being able to analyze and inline this -+ * memcpy() call, since it is called in a tight loop. Preboot mode -+ * is compiled in freestanding mode, which stops gcc from analyzing -+ * memcpy(). Use __builtin_memcpy() to tell gcc to analyze this as a -+ * regular memcpy(). -+ */ -+ __builtin_memcpy(dst, src, 8); -+} -+/*! ZSTD_wildcopy() : -+* custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */ -+#define WILDCOPY_OVERLENGTH 8 -+ZSTD_STATIC void ZSTD_wildcopy(void *dst, const void *src, ptrdiff_t length) -+{ -+ const BYTE* ip = (const BYTE*)src; -+ BYTE* op = (BYTE*)dst; -+ BYTE* const oend = op + length; -+#if defined(GCC_VERSION) && GCC_VERSION >= 70000 && GCC_VERSION < 70200 -+ /* -+ * Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388. -+ * Avoid the bad case where the loop only runs once by handling the -+ * special case separately. This doesn't trigger the bug because it -+ * doesn't involve pointer/integer overflow. -+ */ -+ if (length <= 8) -+ return ZSTD_copy8(dst, src); -+#endif -+ do { -+ ZSTD_copy8(op, ip); -+ op += 8; -+ ip += 8; -+ } while (op < oend); -+} -+ -+/*-******************************************* -+* Private interfaces -+*********************************************/ -+typedef struct ZSTD_stats_s ZSTD_stats_t; -+ -+typedef struct { -+ U32 off; -+ U32 len; -+} ZSTD_match_t; -+ -+typedef struct { -+ U32 price; -+ U32 off; -+ U32 mlen; -+ U32 litlen; -+ U32 rep[ZSTD_REP_NUM]; -+} ZSTD_optimal_t; -+ -+typedef struct seqDef_s { -+ U32 offset; -+ U16 litLength; -+ U16 matchLength; -+} seqDef; -+ -+typedef struct { -+ seqDef *sequencesStart; -+ seqDef *sequences; -+ BYTE *litStart; -+ BYTE *lit; -+ BYTE *llCode; -+ BYTE *mlCode; -+ BYTE *ofCode; -+ U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */ -+ U32 longLengthPos; -+ /* opt */ -+ ZSTD_optimal_t *priceTable; -+ ZSTD_match_t *matchTable; -+ U32 *matchLengthFreq; -+ U32 *litLengthFreq; -+ U32 *litFreq; -+ U32 *offCodeFreq; -+ U32 matchLengthSum; -+ U32 matchSum; -+ U32 litLengthSum; -+ U32 litSum; -+ U32 offCodeSum; -+ U32 log2matchLengthSum; -+ U32 log2matchSum; -+ U32 log2litLengthSum; -+ U32 log2litSum; -+ U32 log2offCodeSum; -+ U32 factor; -+ U32 staticPrices; -+ U32 cachedPrice; -+ U32 cachedLitLength; -+ const BYTE *cachedLiterals; -+} seqStore_t; -+ -+const seqStore_t *ZSTD_getSeqStore(const ZSTD_CCtx *ctx); -+void ZSTD_seqToCodes(const seqStore_t *seqStorePtr); -+int ZSTD_isSkipFrame(ZSTD_DCtx *dctx); -+ -+/*= Custom memory allocation functions */ -+typedef void *(*ZSTD_allocFunction)(void *opaque, size_t size); -+typedef void (*ZSTD_freeFunction)(void *opaque, void *address); -+typedef struct { -+ ZSTD_allocFunction customAlloc; -+ ZSTD_freeFunction customFree; -+ void *opaque; -+} ZSTD_customMem; -+ -+void *ZSTD_malloc(size_t size, ZSTD_customMem customMem); -+void ZSTD_free(void *ptr, ZSTD_customMem customMem); -+ -+/*====== stack allocation ======*/ -+ -+typedef struct { -+ void *ptr; -+ const void *end; -+} ZSTD_stack; -+ -+#define ZSTD_ALIGN(x) ALIGN(x, sizeof(size_t)) -+#define ZSTD_PTR_ALIGN(p) PTR_ALIGN(p, sizeof(size_t)) -+ -+ZSTD_customMem ZSTD_initStack(void *workspace, size_t workspaceSize); -+ -+void *ZSTD_stackAllocAll(void *opaque, size_t *size); -+void *ZSTD_stackAlloc(void *opaque, size_t size); -+void ZSTD_stackFree(void *opaque, void *address); -+ -+/*====== common function ======*/ -+ -+ZSTD_STATIC U32 ZSTD_highbit32(U32 val) { return 31 - __builtin_clz(val); } -+ -+/* hidden functions */ -+ -+/* ZSTD_invalidateRepCodes() : -+ * ensures next compression will not use repcodes from previous block. -+ * Note : only works with regular variant; -+ * do not use with extDict variant ! */ -+void ZSTD_invalidateRepCodes(ZSTD_CCtx *cctx); -+ -+size_t ZSTD_freeCCtx(ZSTD_CCtx *cctx); -+size_t ZSTD_freeDCtx(ZSTD_DCtx *dctx); -+size_t ZSTD_freeCDict(ZSTD_CDict *cdict); -+size_t ZSTD_freeDDict(ZSTD_DDict *cdict); -+size_t ZSTD_freeCStream(ZSTD_CStream *zcs); -+size_t ZSTD_freeDStream(ZSTD_DStream *zds); -+ -+#endif /* ZSTD_CCOMMON_H_MODULE */ -diff --git a/xen/common/zstd/zstd_opt.h b/xen/common/zstd/zstd_opt.h -new file mode 100644 -index 0000000000..55e1b4cba8 ---- /dev/null -+++ b/xen/common/zstd/zstd_opt.h -@@ -0,0 +1,1014 @@ -+/** -+ * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. -+ * All rights reserved. -+ * -+ * This source code is licensed under the BSD-style license found in the -+ * LICENSE file in the root directory of https://github.com/facebook/zstd. -+ * An additional grant of patent rights can be found in the PATENTS file in the -+ * same directory. -+ * -+ * This program is free software; you can redistribute it and/or modify it under -+ * the terms of the GNU General Public License version 2 as published by the -+ * Free Software Foundation. This program is dual-licensed; you may select -+ * either version 2 of the GNU General Public License ("GPL") or BSD license -+ * ("BSD"). -+ */ -+ -+/* Note : this file is intended to be included within zstd_compress.c */ -+ -+#ifndef ZSTD_OPT_H_91842398743 -+#define ZSTD_OPT_H_91842398743 -+ -+#define ZSTD_LITFREQ_ADD 2 -+#define ZSTD_FREQ_DIV 4 -+#define ZSTD_MAX_PRICE (1 << 30) -+ -+/*-************************************* -+* Price functions for optimal parser -+***************************************/ -+FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t *ssPtr) -+{ -+ ssPtr->log2matchLengthSum = ZSTD_highbit32(ssPtr->matchLengthSum + 1); -+ ssPtr->log2litLengthSum = ZSTD_highbit32(ssPtr->litLengthSum + 1); -+ ssPtr->log2litSum = ZSTD_highbit32(ssPtr->litSum + 1); -+ ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum + 1); -+ ssPtr->factor = 1 + ((ssPtr->litSum >> 5) / ssPtr->litLengthSum) + ((ssPtr->litSum << 1) / (ssPtr->litSum + ssPtr->matchSum)); -+} -+ -+ZSTD_STATIC void ZSTD_rescaleFreqs(seqStore_t *ssPtr, const BYTE *src, size_t srcSize) -+{ -+ unsigned u; -+ -+ ssPtr->cachedLiterals = NULL; -+ ssPtr->cachedPrice = ssPtr->cachedLitLength = 0; -+ ssPtr->staticPrices = 0; -+ -+ if (ssPtr->litLengthSum == 0) { -+ if (srcSize <= 1024) -+ ssPtr->staticPrices = 1; -+ -+ for (u = 0; u <= MaxLit; u++) -+ ssPtr->litFreq[u] = 0; -+ for (u = 0; u < srcSize; u++) -+ ssPtr->litFreq[src[u]]++; -+ -+ ssPtr->litSum = 0; -+ ssPtr->litLengthSum = MaxLL + 1; -+ ssPtr->matchLengthSum = MaxML + 1; -+ ssPtr->offCodeSum = (MaxOff + 1); -+ ssPtr->matchSum = (ZSTD_LITFREQ_ADD << Litbits); -+ -+ for (u = 0; u <= MaxLit; u++) { -+ ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u] >> ZSTD_FREQ_DIV); -+ ssPtr->litSum += ssPtr->litFreq[u]; -+ } -+ for (u = 0; u <= MaxLL; u++) -+ ssPtr->litLengthFreq[u] = 1; -+ for (u = 0; u <= MaxML; u++) -+ ssPtr->matchLengthFreq[u] = 1; -+ for (u = 0; u <= MaxOff; u++) -+ ssPtr->offCodeFreq[u] = 1; -+ } else { -+ ssPtr->matchLengthSum = 0; -+ ssPtr->litLengthSum = 0; -+ ssPtr->offCodeSum = 0; -+ ssPtr->matchSum = 0; -+ ssPtr->litSum = 0; -+ -+ for (u = 0; u <= MaxLit; u++) { -+ ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u] >> (ZSTD_FREQ_DIV + 1)); -+ ssPtr->litSum += ssPtr->litFreq[u]; -+ } -+ for (u = 0; u <= MaxLL; u++) { -+ ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u] >> (ZSTD_FREQ_DIV + 1)); -+ ssPtr->litLengthSum += ssPtr->litLengthFreq[u]; -+ } -+ for (u = 0; u <= MaxML; u++) { -+ ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u] >> ZSTD_FREQ_DIV); -+ ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u]; -+ ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3); -+ } -+ ssPtr->matchSum *= ZSTD_LITFREQ_ADD; -+ for (u = 0; u <= MaxOff; u++) { -+ ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u] >> ZSTD_FREQ_DIV); -+ ssPtr->offCodeSum += ssPtr->offCodeFreq[u]; -+ } -+ } -+ -+ ZSTD_setLog2Prices(ssPtr); -+} -+ -+FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t *ssPtr, U32 litLength, const BYTE *literals) -+{ -+ U32 price, u; -+ -+ if (ssPtr->staticPrices) -+ return ZSTD_highbit32((U32)litLength + 1) + (litLength * 6); -+ -+ if (litLength == 0) -+ return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0] + 1); -+ -+ /* literals */ -+ if (ssPtr->cachedLiterals == literals) { -+ U32 const additional = litLength - ssPtr->cachedLitLength; -+ const BYTE *literals2 = ssPtr->cachedLiterals + ssPtr->cachedLitLength; -+ price = ssPtr->cachedPrice + additional * ssPtr->log2litSum; -+ for (u = 0; u < additional; u++) -+ price -= ZSTD_highbit32(ssPtr->litFreq[literals2[u]] + 1); -+ ssPtr->cachedPrice = price; -+ ssPtr->cachedLitLength = litLength; -+ } else { -+ price = litLength * ssPtr->log2litSum; -+ for (u = 0; u < litLength; u++) -+ price -= ZSTD_highbit32(ssPtr->litFreq[literals[u]] + 1); -+ -+ if (litLength >= 12) { -+ ssPtr->cachedLiterals = literals; -+ ssPtr->cachedPrice = price; -+ ssPtr->cachedLitLength = litLength; -+ } -+ } -+ -+ /* literal Length */ -+ { -+ const BYTE LL_deltaCode = 19; -+ const BYTE llCode = (litLength > 63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; -+ price += LL_bits[llCode] + ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[llCode] + 1); -+ } -+ -+ return price; -+} -+ -+FORCE_INLINE U32 ZSTD_getPrice(seqStore_t *seqStorePtr, U32 litLength, const BYTE *literals, U32 offset, U32 matchLength, const int ultra) -+{ -+ /* offset */ -+ U32 price; -+ BYTE const offCode = (BYTE)ZSTD_highbit32(offset + 1); -+ -+ if (seqStorePtr->staticPrices) -+ return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength + 1) + 16 + offCode; -+ -+ price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode] + 1); -+ if (!ultra && offCode >= 20) -+ price += (offCode - 19) * 2; -+ -+ /* match Length */ -+ { -+ const BYTE ML_deltaCode = 36; -+ const BYTE mlCode = (matchLength > 127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; -+ price += ML_bits[mlCode] + seqStorePtr->log2matchLengthSum - ZSTD_highbit32(seqStorePtr->matchLengthFreq[mlCode] + 1); -+ } -+ -+ return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + seqStorePtr->factor; -+} -+ -+ZSTD_STATIC void ZSTD_updatePrice(seqStore_t *seqStorePtr, U32 litLength, const BYTE *literals, U32 offset, U32 matchLength) -+{ -+ U32 u; -+ -+ /* literals */ -+ seqStorePtr->litSum += litLength * ZSTD_LITFREQ_ADD; -+ for (u = 0; u < litLength; u++) -+ seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; -+ -+ /* literal Length */ -+ { -+ const BYTE LL_deltaCode = 19; -+ const BYTE llCode = (litLength > 63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; -+ seqStorePtr->litLengthFreq[llCode]++; -+ seqStorePtr->litLengthSum++; -+ } -+ -+ /* match offset */ -+ { -+ BYTE const offCode = (BYTE)ZSTD_highbit32(offset + 1); -+ seqStorePtr->offCodeSum++; -+ seqStorePtr->offCodeFreq[offCode]++; -+ } -+ -+ /* match Length */ -+ { -+ const BYTE ML_deltaCode = 36; -+ const BYTE mlCode = (matchLength > 127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; -+ seqStorePtr->matchLengthFreq[mlCode]++; -+ seqStorePtr->matchLengthSum++; -+ } -+ -+ ZSTD_setLog2Prices(seqStorePtr); -+} -+ -+#define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \ -+ { \ -+ while (last_pos < pos) { \ -+ opt[last_pos + 1].price = ZSTD_MAX_PRICE; \ -+ last_pos++; \ -+ } \ -+ opt[pos].mlen = mlen_; \ -+ opt[pos].off = offset_; \ -+ opt[pos].litlen = litlen_; \ -+ opt[pos].price = price_; \ -+ } -+ -+/* Update hashTable3 up to ip (excluded) -+ Assumption : always within prefix (i.e. not within extDict) */ -+FORCE_INLINE -+U32 ZSTD_insertAndFindFirstIndexHash3(ZSTD_CCtx *zc, const BYTE *ip) -+{ -+ U32 *const hashTable3 = zc->hashTable3; -+ U32 const hashLog3 = zc->hashLog3; -+ const BYTE *const base = zc->base; -+ U32 idx = zc->nextToUpdate3; -+ const U32 target = zc->nextToUpdate3 = (U32)(ip - base); -+ const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3); -+ -+ while (idx < target) { -+ hashTable3[ZSTD_hash3Ptr(base + idx, hashLog3)] = idx; -+ idx++; -+ } -+ -+ return hashTable3[hash3]; -+} -+ -+/*-************************************* -+* Binary Tree search -+***************************************/ -+static U32 ZSTD_insertBtAndGetAllMatches(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, U32 nbCompares, const U32 mls, U32 extDict, -+ ZSTD_match_t *matches, const U32 minMatchLen) -+{ -+ const BYTE *const base = zc->base; -+ const U32 curr = (U32)(ip - base); -+ const U32 hashLog = zc->params.cParams.hashLog; -+ const size_t h = ZSTD_hashPtr(ip, hashLog, mls); -+ U32 *const hashTable = zc->hashTable; -+ U32 matchIndex = hashTable[h]; -+ U32 *const bt = zc->chainTable; -+ const U32 btLog = zc->params.cParams.chainLog - 1; -+ const U32 btMask = (1U << btLog) - 1; -+ size_t commonLengthSmaller = 0, commonLengthLarger = 0; -+ const BYTE *const dictBase = zc->dictBase; -+ const U32 dictLimit = zc->dictLimit; -+ const BYTE *const dictEnd = dictBase + dictLimit; -+ const BYTE *const prefixStart = base + dictLimit; -+ const U32 btLow = btMask >= curr ? 0 : curr - btMask; -+ const U32 windowLow = zc->lowLimit; -+ U32 *smallerPtr = bt + 2 * (curr & btMask); -+ U32 *largerPtr = bt + 2 * (curr & btMask) + 1; -+ U32 matchEndIdx = curr + 8; -+ U32 dummy32; /* to be nullified at the end */ -+ U32 mnum = 0; -+ -+ const U32 minMatch = (mls == 3) ? 3 : 4; -+ size_t bestLength = minMatchLen - 1; -+ -+ if (minMatch == 3) { /* HC3 match finder */ -+ U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(zc, ip); -+ if (matchIndex3 > windowLow && (curr - matchIndex3 < (1 << 18))) { -+ const BYTE *match; -+ size_t currMl = 0; -+ if ((!extDict) || matchIndex3 >= dictLimit) { -+ match = base + matchIndex3; -+ if (match[bestLength] == ip[bestLength]) -+ currMl = ZSTD_count(ip, match, iLimit); -+ } else { -+ match = dictBase + matchIndex3; -+ if (ZSTD_readMINMATCH(match, MINMATCH) == -+ ZSTD_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */ -+ currMl = ZSTD_count_2segments(ip + MINMATCH, match + MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH; -+ } -+ -+ /* save best solution */ -+ if (currMl > bestLength) { -+ bestLength = currMl; -+ matches[mnum].off = ZSTD_REP_MOVE_OPT + curr - matchIndex3; -+ matches[mnum].len = (U32)currMl; -+ mnum++; -+ if (currMl > ZSTD_OPT_NUM) -+ goto update; -+ if (ip + currMl == iLimit) -+ goto update; /* best possible, and avoid read overflow*/ -+ } -+ } -+ } -+ -+ hashTable[h] = curr; /* Update Hash Table */ -+ -+ while (nbCompares-- && (matchIndex > windowLow)) { -+ U32 *nextPtr = bt + 2 * (matchIndex & btMask); -+ size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ -+ const BYTE *match; -+ -+ if ((!extDict) || (matchIndex + matchLength >= dictLimit)) { -+ match = base + matchIndex; -+ if (match[matchLength] == ip[matchLength]) { -+ matchLength += ZSTD_count(ip + matchLength + 1, match + matchLength + 1, iLimit) + 1; -+ } -+ } else { -+ match = dictBase + matchIndex; -+ matchLength += ZSTD_count_2segments(ip + matchLength, match + matchLength, iLimit, dictEnd, prefixStart); -+ if (matchIndex + matchLength >= dictLimit) -+ match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ -+ } -+ -+ if (matchLength > bestLength) { -+ if (matchLength > matchEndIdx - matchIndex) -+ matchEndIdx = matchIndex + (U32)matchLength; -+ bestLength = matchLength; -+ matches[mnum].off = ZSTD_REP_MOVE_OPT + curr - matchIndex; -+ matches[mnum].len = (U32)matchLength; -+ mnum++; -+ if (matchLength > ZSTD_OPT_NUM) -+ break; -+ if (ip + matchLength == iLimit) /* equal : no way to know if inf or sup */ -+ break; /* drop, to guarantee consistency (miss a little bit of compression) */ -+ } -+ -+ if (match[matchLength] < ip[matchLength]) { -+ /* match is smaller than curr */ -+ *smallerPtr = matchIndex; /* update smaller idx */ -+ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ -+ if (matchIndex <= btLow) { -+ smallerPtr = &dummy32; -+ break; -+ } /* beyond tree size, stop the search */ -+ smallerPtr = nextPtr + 1; /* new "smaller" => larger of match */ -+ matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to curr) */ -+ } else { -+ /* match is larger than curr */ -+ *largerPtr = matchIndex; -+ commonLengthLarger = matchLength; -+ if (matchIndex <= btLow) { -+ largerPtr = &dummy32; -+ break; -+ } /* beyond tree size, stop the search */ -+ largerPtr = nextPtr; -+ matchIndex = nextPtr[0]; -+ } -+ } -+ -+ *smallerPtr = *largerPtr = 0; -+ -+update: -+ zc->nextToUpdate = (matchEndIdx > curr + 8) ? matchEndIdx - 8 : curr + 1; -+ return mnum; -+} -+ -+/** Tree updater, providing best match */ -+static U32 ZSTD_BtGetAllMatches(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, const U32 maxNbAttempts, const U32 mls, ZSTD_match_t *matches, -+ const U32 minMatchLen) -+{ -+ if (ip < zc->base + zc->nextToUpdate) -+ return 0; /* skipped area */ -+ ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); -+ return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen); -+} -+ -+static U32 ZSTD_BtGetAllMatches_selectMLS(ZSTD_CCtx *zc, /* Index table will be updated */ -+ const BYTE *ip, const BYTE *const iHighLimit, const U32 maxNbAttempts, const U32 matchLengthSearch, -+ ZSTD_match_t *matches, const U32 minMatchLen) -+{ -+ switch (matchLengthSearch) { -+ case 3: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); -+ default: -+ case 4: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); -+ case 5: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); -+ case 7: -+ case 6: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); -+ } -+} -+ -+/** Tree updater, providing best match */ -+static U32 ZSTD_BtGetAllMatches_extDict(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, const U32 maxNbAttempts, const U32 mls, -+ ZSTD_match_t *matches, const U32 minMatchLen) -+{ -+ if (ip < zc->base + zc->nextToUpdate) -+ return 0; /* skipped area */ -+ ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); -+ return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen); -+} -+ -+static U32 ZSTD_BtGetAllMatches_selectMLS_extDict(ZSTD_CCtx *zc, /* Index table will be updated */ -+ const BYTE *ip, const BYTE *const iHighLimit, const U32 maxNbAttempts, const U32 matchLengthSearch, -+ ZSTD_match_t *matches, const U32 minMatchLen) -+{ -+ switch (matchLengthSearch) { -+ case 3: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); -+ default: -+ case 4: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); -+ case 5: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); -+ case 7: -+ case 6: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); -+ } -+} -+ -+/*-******************************* -+* Optimal parser -+*********************************/ -+FORCE_INLINE -+void ZSTD_compressBlock_opt_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const int ultra) -+{ -+ seqStore_t *seqStorePtr = &(ctx->seqStore); -+ const BYTE *const istart = (const BYTE *)src; -+ const BYTE *ip = istart; -+ const BYTE *anchor = istart; -+ const BYTE *const iend = istart + srcSize; -+ const BYTE *const ilimit = iend - 8; -+ const BYTE *const base = ctx->base; -+ const BYTE *const prefixStart = base + ctx->dictLimit; -+ -+ const U32 maxSearches = 1U << ctx->params.cParams.searchLog; -+ const U32 sufficient_len = ctx->params.cParams.targetLength; -+ const U32 mls = ctx->params.cParams.searchLength; -+ const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4; -+ -+ ZSTD_optimal_t *opt = seqStorePtr->priceTable; -+ ZSTD_match_t *matches = seqStorePtr->matchTable; -+ const BYTE *inr; -+ U32 offset, rep[ZSTD_REP_NUM]; -+ -+ /* init */ -+ ctx->nextToUpdate3 = ctx->nextToUpdate; -+ ZSTD_rescaleFreqs(seqStorePtr, (const BYTE *)src, srcSize); -+ ip += (ip == prefixStart); -+ { -+ U32 i; -+ for (i = 0; i < ZSTD_REP_NUM; i++) -+ rep[i] = ctx->rep[i]; -+ } -+ -+ /* Match Loop */ -+ while (ip < ilimit) { -+ U32 cur, match_num, last_pos, litlen, price; -+ U32 u, mlen, best_mlen, best_off, litLength; -+ memset(opt, 0, sizeof(ZSTD_optimal_t)); -+ last_pos = 0; -+ litlen = (U32)(ip - anchor); -+ -+ /* check repCode */ -+ { -+ U32 i, last_i = ZSTD_REP_CHECK + (ip == anchor); -+ for (i = (ip == anchor); i < last_i; i++) { -+ const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i]; -+ if ((repCur > 0) && (repCur < (S32)(ip - prefixStart)) && -+ (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repCur, minMatch))) { -+ mlen = (U32)ZSTD_count(ip + minMatch, ip + minMatch - repCur, iend) + minMatch; -+ if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { -+ best_mlen = mlen; -+ best_off = i; -+ cur = 0; -+ last_pos = 1; -+ goto _storeSequence; -+ } -+ best_off = i - (ip == anchor); -+ do { -+ price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); -+ if (mlen > last_pos || price < opt[mlen].price) -+ SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ -+ mlen--; -+ } while (mlen >= minMatch); -+ } -+ } -+ } -+ -+ match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch); -+ -+ if (!last_pos && !match_num) { -+ ip++; -+ continue; -+ } -+ -+ if (match_num && (matches[match_num - 1].len > sufficient_len || matches[match_num - 1].len >= ZSTD_OPT_NUM)) { -+ best_mlen = matches[match_num - 1].len; -+ best_off = matches[match_num - 1].off; -+ cur = 0; -+ last_pos = 1; -+ goto _storeSequence; -+ } -+ -+ /* set prices using matches at position = 0 */ -+ best_mlen = (last_pos) ? last_pos : minMatch; -+ for (u = 0; u < match_num; u++) { -+ mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen; -+ best_mlen = matches[u].len; -+ while (mlen <= best_mlen) { -+ price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra); -+ if (mlen > last_pos || price < opt[mlen].price) -+ SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */ -+ mlen++; -+ } -+ } -+ -+ if (last_pos < minMatch) { -+ ip++; -+ continue; -+ } -+ -+ /* initialize opt[0] */ -+ { -+ U32 i; -+ for (i = 0; i < ZSTD_REP_NUM; i++) -+ opt[0].rep[i] = rep[i]; -+ } -+ opt[0].mlen = 1; -+ opt[0].litlen = litlen; -+ -+ /* check further positions */ -+ for (cur = 1; cur <= last_pos; cur++) { -+ inr = ip + cur; -+ -+ if (opt[cur - 1].mlen == 1) { -+ litlen = opt[cur - 1].litlen + 1; -+ if (cur > litlen) { -+ price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - litlen); -+ } else -+ price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor); -+ } else { -+ litlen = 1; -+ price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - 1); -+ } -+ -+ if (cur > last_pos || price <= opt[cur].price) -+ SET_PRICE(cur, 1, 0, litlen, price); -+ -+ if (cur == last_pos) -+ break; -+ -+ if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ -+ continue; -+ -+ mlen = opt[cur].mlen; -+ if (opt[cur].off > ZSTD_REP_MOVE_OPT) { -+ opt[cur].rep[2] = opt[cur - mlen].rep[1]; -+ opt[cur].rep[1] = opt[cur - mlen].rep[0]; -+ opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT; -+ } else { -+ opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur - mlen].rep[1] : opt[cur - mlen].rep[2]; -+ opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur - mlen].rep[0] : opt[cur - mlen].rep[1]; -+ opt[cur].rep[0] = -+ ((opt[cur].off == ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur - mlen].rep[0] - 1) : (opt[cur - mlen].rep[opt[cur].off]); -+ } -+ -+ best_mlen = minMatch; -+ { -+ U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); -+ for (i = (opt[cur].mlen != 1); i < last_i; i++) { /* check rep */ -+ const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i]; -+ if ((repCur > 0) && (repCur < (S32)(inr - prefixStart)) && -+ (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) { -+ mlen = (U32)ZSTD_count(inr + minMatch, inr + minMatch - repCur, iend) + minMatch; -+ -+ if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { -+ best_mlen = mlen; -+ best_off = i; -+ last_pos = cur + 1; -+ goto _storeSequence; -+ } -+ -+ best_off = i - (opt[cur].mlen != 1); -+ if (mlen > best_mlen) -+ best_mlen = mlen; -+ -+ do { -+ if (opt[cur].mlen == 1) { -+ litlen = opt[cur].litlen; -+ if (cur > litlen) { -+ price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr - litlen, -+ best_off, mlen - MINMATCH, ultra); -+ } else -+ price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); -+ } else { -+ litlen = 0; -+ price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra); -+ } -+ -+ if (cur + mlen > last_pos || price <= opt[cur + mlen].price) -+ SET_PRICE(cur + mlen, mlen, i, litlen, price); -+ mlen--; -+ } while (mlen >= minMatch); -+ } -+ } -+ } -+ -+ match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen); -+ -+ if (match_num > 0 && (matches[match_num - 1].len > sufficient_len || cur + matches[match_num - 1].len >= ZSTD_OPT_NUM)) { -+ best_mlen = matches[match_num - 1].len; -+ best_off = matches[match_num - 1].off; -+ last_pos = cur + 1; -+ goto _storeSequence; -+ } -+ -+ /* set prices using matches at position = cur */ -+ for (u = 0; u < match_num; u++) { -+ mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen; -+ best_mlen = matches[u].len; -+ -+ while (mlen <= best_mlen) { -+ if (opt[cur].mlen == 1) { -+ litlen = opt[cur].litlen; -+ if (cur > litlen) -+ price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip + cur - litlen, -+ matches[u].off - 1, mlen - MINMATCH, ultra); -+ else -+ price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra); -+ } else { -+ litlen = 0; -+ price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off - 1, mlen - MINMATCH, ultra); -+ } -+ -+ if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) -+ SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); -+ -+ mlen++; -+ } -+ } -+ } -+ -+ best_mlen = opt[last_pos].mlen; -+ best_off = opt[last_pos].off; -+ cur = last_pos - best_mlen; -+ -+ /* store sequence */ -+_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ -+ opt[0].mlen = 1; -+ -+ while (1) { -+ mlen = opt[cur].mlen; -+ offset = opt[cur].off; -+ opt[cur].mlen = best_mlen; -+ opt[cur].off = best_off; -+ best_mlen = mlen; -+ best_off = offset; -+ if (mlen > cur) -+ break; -+ cur -= mlen; -+ } -+ -+ for (u = 0; u <= last_pos;) { -+ u += opt[u].mlen; -+ } -+ -+ for (cur = 0; cur < last_pos;) { -+ mlen = opt[cur].mlen; -+ if (mlen == 1) { -+ ip++; -+ cur++; -+ continue; -+ } -+ offset = opt[cur].off; -+ cur += mlen; -+ litLength = (U32)(ip - anchor); -+ -+ if (offset > ZSTD_REP_MOVE_OPT) { -+ rep[2] = rep[1]; -+ rep[1] = rep[0]; -+ rep[0] = offset - ZSTD_REP_MOVE_OPT; -+ offset--; -+ } else { -+ if (offset != 0) { -+ best_off = (offset == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]); -+ if (offset != 1) -+ rep[2] = rep[1]; -+ rep[1] = rep[0]; -+ rep[0] = best_off; -+ } -+ if (litLength == 0) -+ offset--; -+ } -+ -+ ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH); -+ ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH); -+ anchor = ip = ip + mlen; -+ } -+ } /* for (cur=0; cur < last_pos; ) */ -+ -+ /* Save reps for next block */ -+ { -+ int i; -+ for (i = 0; i < ZSTD_REP_NUM; i++) -+ ctx->repToConfirm[i] = rep[i]; -+ } -+ -+ /* Last Literals */ -+ { -+ size_t const lastLLSize = iend - anchor; -+ memcpy(seqStorePtr->lit, anchor, lastLLSize); -+ seqStorePtr->lit += lastLLSize; -+ } -+} -+ -+FORCE_INLINE -+void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const int ultra) -+{ -+ seqStore_t *seqStorePtr = &(ctx->seqStore); -+ const BYTE *const istart = (const BYTE *)src; -+ const BYTE *ip = istart; -+ const BYTE *anchor = istart; -+ const BYTE *const iend = istart + srcSize; -+ const BYTE *const ilimit = iend - 8; -+ const BYTE *const base = ctx->base; -+ const U32 lowestIndex = ctx->lowLimit; -+ const U32 dictLimit = ctx->dictLimit; -+ const BYTE *const prefixStart = base + dictLimit; -+ const BYTE *const dictBase = ctx->dictBase; -+ const BYTE *const dictEnd = dictBase + dictLimit; -+ -+ const U32 maxSearches = 1U << ctx->params.cParams.searchLog; -+ const U32 sufficient_len = ctx->params.cParams.targetLength; -+ const U32 mls = ctx->params.cParams.searchLength; -+ const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4; -+ -+ ZSTD_optimal_t *opt = seqStorePtr->priceTable; -+ ZSTD_match_t *matches = seqStorePtr->matchTable; -+ const BYTE *inr; -+ -+ /* init */ -+ U32 offset, rep[ZSTD_REP_NUM]; -+ { -+ U32 i; -+ for (i = 0; i < ZSTD_REP_NUM; i++) -+ rep[i] = ctx->rep[i]; -+ } -+ -+ ctx->nextToUpdate3 = ctx->nextToUpdate; -+ ZSTD_rescaleFreqs(seqStorePtr, (const BYTE *)src, srcSize); -+ ip += (ip == prefixStart); -+ -+ /* Match Loop */ -+ while (ip < ilimit) { -+ U32 cur, match_num, last_pos, litlen, price; -+ U32 u, mlen, best_mlen, best_off, litLength; -+ U32 curr = (U32)(ip - base); -+ memset(opt, 0, sizeof(ZSTD_optimal_t)); -+ last_pos = 0; -+ opt[0].litlen = (U32)(ip - anchor); -+ -+ /* check repCode */ -+ { -+ U32 i, last_i = ZSTD_REP_CHECK + (ip == anchor); -+ for (i = (ip == anchor); i < last_i; i++) { -+ const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i]; -+ const U32 repIndex = (U32)(curr - repCur); -+ const BYTE *const repBase = repIndex < dictLimit ? dictBase : base; -+ const BYTE *const repMatch = repBase + repIndex; -+ if ((repCur > 0 && repCur <= (S32)curr) && -+ (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ -+ && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch))) { -+ /* repcode detected we should take it */ -+ const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend; -+ mlen = (U32)ZSTD_count_2segments(ip + minMatch, repMatch + minMatch, iend, repEnd, prefixStart) + minMatch; -+ -+ if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { -+ best_mlen = mlen; -+ best_off = i; -+ cur = 0; -+ last_pos = 1; -+ goto _storeSequence; -+ } -+ -+ best_off = i - (ip == anchor); -+ litlen = opt[0].litlen; -+ do { -+ price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); -+ if (mlen > last_pos || price < opt[mlen].price) -+ SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ -+ mlen--; -+ } while (mlen >= minMatch); -+ } -+ } -+ } -+ -+ match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */ -+ -+ if (!last_pos && !match_num) { -+ ip++; -+ continue; -+ } -+ -+ { -+ U32 i; -+ for (i = 0; i < ZSTD_REP_NUM; i++) -+ opt[0].rep[i] = rep[i]; -+ } -+ opt[0].mlen = 1; -+ -+ if (match_num && (matches[match_num - 1].len > sufficient_len || matches[match_num - 1].len >= ZSTD_OPT_NUM)) { -+ best_mlen = matches[match_num - 1].len; -+ best_off = matches[match_num - 1].off; -+ cur = 0; -+ last_pos = 1; -+ goto _storeSequence; -+ } -+ -+ best_mlen = (last_pos) ? last_pos : minMatch; -+ -+ /* set prices using matches at position = 0 */ -+ for (u = 0; u < match_num; u++) { -+ mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen; -+ best_mlen = matches[u].len; -+ litlen = opt[0].litlen; -+ while (mlen <= best_mlen) { -+ price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra); -+ if (mlen > last_pos || price < opt[mlen].price) -+ SET_PRICE(mlen, mlen, matches[u].off, litlen, price); -+ mlen++; -+ } -+ } -+ -+ if (last_pos < minMatch) { -+ ip++; -+ continue; -+ } -+ -+ /* check further positions */ -+ for (cur = 1; cur <= last_pos; cur++) { -+ inr = ip + cur; -+ -+ if (opt[cur - 1].mlen == 1) { -+ litlen = opt[cur - 1].litlen + 1; -+ if (cur > litlen) { -+ price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - litlen); -+ } else -+ price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor); -+ } else { -+ litlen = 1; -+ price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - 1); -+ } -+ -+ if (cur > last_pos || price <= opt[cur].price) -+ SET_PRICE(cur, 1, 0, litlen, price); -+ -+ if (cur == last_pos) -+ break; -+ -+ if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ -+ continue; -+ -+ mlen = opt[cur].mlen; -+ if (opt[cur].off > ZSTD_REP_MOVE_OPT) { -+ opt[cur].rep[2] = opt[cur - mlen].rep[1]; -+ opt[cur].rep[1] = opt[cur - mlen].rep[0]; -+ opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT; -+ } else { -+ opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur - mlen].rep[1] : opt[cur - mlen].rep[2]; -+ opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur - mlen].rep[0] : opt[cur - mlen].rep[1]; -+ opt[cur].rep[0] = -+ ((opt[cur].off == ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur - mlen].rep[0] - 1) : (opt[cur - mlen].rep[opt[cur].off]); -+ } -+ -+ best_mlen = minMatch; -+ { -+ U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); -+ for (i = (mlen != 1); i < last_i; i++) { -+ const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i]; -+ const U32 repIndex = (U32)(curr + cur - repCur); -+ const BYTE *const repBase = repIndex < dictLimit ? dictBase : base; -+ const BYTE *const repMatch = repBase + repIndex; -+ if ((repCur > 0 && repCur <= (S32)(curr + cur)) && -+ (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ -+ && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch))) { -+ /* repcode detected */ -+ const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend; -+ mlen = (U32)ZSTD_count_2segments(inr + minMatch, repMatch + minMatch, iend, repEnd, prefixStart) + minMatch; -+ -+ if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { -+ best_mlen = mlen; -+ best_off = i; -+ last_pos = cur + 1; -+ goto _storeSequence; -+ } -+ -+ best_off = i - (opt[cur].mlen != 1); -+ if (mlen > best_mlen) -+ best_mlen = mlen; -+ -+ do { -+ if (opt[cur].mlen == 1) { -+ litlen = opt[cur].litlen; -+ if (cur > litlen) { -+ price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr - litlen, -+ best_off, mlen - MINMATCH, ultra); -+ } else -+ price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); -+ } else { -+ litlen = 0; -+ price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra); -+ } -+ -+ if (cur + mlen > last_pos || price <= opt[cur + mlen].price) -+ SET_PRICE(cur + mlen, mlen, i, litlen, price); -+ mlen--; -+ } while (mlen >= minMatch); -+ } -+ } -+ } -+ -+ match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch); -+ -+ if (match_num > 0 && (matches[match_num - 1].len > sufficient_len || cur + matches[match_num - 1].len >= ZSTD_OPT_NUM)) { -+ best_mlen = matches[match_num - 1].len; -+ best_off = matches[match_num - 1].off; -+ last_pos = cur + 1; -+ goto _storeSequence; -+ } -+ -+ /* set prices using matches at position = cur */ -+ for (u = 0; u < match_num; u++) { -+ mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen; -+ best_mlen = matches[u].len; -+ -+ while (mlen <= best_mlen) { -+ if (opt[cur].mlen == 1) { -+ litlen = opt[cur].litlen; -+ if (cur > litlen) -+ price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip + cur - litlen, -+ matches[u].off - 1, mlen - MINMATCH, ultra); -+ else -+ price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra); -+ } else { -+ litlen = 0; -+ price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off - 1, mlen - MINMATCH, ultra); -+ } -+ -+ if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) -+ SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); -+ -+ mlen++; -+ } -+ } -+ } /* for (cur = 1; cur <= last_pos; cur++) */ -+ -+ best_mlen = opt[last_pos].mlen; -+ best_off = opt[last_pos].off; -+ cur = last_pos - best_mlen; -+ -+ /* store sequence */ -+_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ -+ opt[0].mlen = 1; -+ -+ while (1) { -+ mlen = opt[cur].mlen; -+ offset = opt[cur].off; -+ opt[cur].mlen = best_mlen; -+ opt[cur].off = best_off; -+ best_mlen = mlen; -+ best_off = offset; -+ if (mlen > cur) -+ break; -+ cur -= mlen; -+ } -+ -+ for (u = 0; u <= last_pos;) { -+ u += opt[u].mlen; -+ } -+ -+ for (cur = 0; cur < last_pos;) { -+ mlen = opt[cur].mlen; -+ if (mlen == 1) { -+ ip++; -+ cur++; -+ continue; -+ } -+ offset = opt[cur].off; -+ cur += mlen; -+ litLength = (U32)(ip - anchor); -+ -+ if (offset > ZSTD_REP_MOVE_OPT) { -+ rep[2] = rep[1]; -+ rep[1] = rep[0]; -+ rep[0] = offset - ZSTD_REP_MOVE_OPT; -+ offset--; -+ } else { -+ if (offset != 0) { -+ best_off = (offset == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]); -+ if (offset != 1) -+ rep[2] = rep[1]; -+ rep[1] = rep[0]; -+ rep[0] = best_off; -+ } -+ -+ if (litLength == 0) -+ offset--; -+ } -+ -+ ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH); -+ ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH); -+ anchor = ip = ip + mlen; -+ } -+ } /* for (cur=0; cur < last_pos; ) */ -+ -+ /* Save reps for next block */ -+ { -+ int i; -+ for (i = 0; i < ZSTD_REP_NUM; i++) -+ ctx->repToConfirm[i] = rep[i]; -+ } -+ -+ /* Last Literals */ -+ { -+ size_t lastLLSize = iend - anchor; -+ memcpy(seqStorePtr->lit, anchor, lastLLSize); -+ seqStorePtr->lit += lastLLSize; -+ } -+} -+ -+#endif /* ZSTD_OPT_H_91842398743 */ -diff --git a/xen/include/xen/decompress.h b/xen/include/xen/decompress.h -index b2955faa4b..f5bc17f2b6 100644 ---- a/xen/include/xen/decompress.h -+++ b/xen/include/xen/decompress.h -@@ -31,7 +31,7 @@ typedef int decompress_fn(unsigned char *inbuf, unsigned int len, - * dependent). - */ - --decompress_fn bunzip2, unxz, unlzma, unlzo, unlz4; -+decompress_fn bunzip2, unxz, unlzma, unlzo, unlz4, unzstd; - - int decompress(void *inbuf, unsigned int len, void *outbuf); - -diff --git a/xen/include/xen/xxhash.h b/xen/include/xen/xxhash.h -new file mode 100644 -index 0000000000..13ddc616d1 ---- /dev/null -+++ b/xen/include/xen/xxhash.h -@@ -0,0 +1,259 @@ -+/* -+ * xxHash - Extremely Fast Hash algorithm -+ * Copyright (C) 2012-2016, Yann Collet. -+ * -+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are -+ * met: -+ * -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following disclaimer -+ * in the documentation and/or other materials provided with the -+ * distribution. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ * -+ * This program is free software; you can redistribute it and/or modify it under -+ * the terms of the GNU General Public License version 2 as published by the -+ * Free Software Foundation. This program is dual-licensed; you may select -+ * either version 2 of the GNU General Public License ("GPL") or BSD license -+ * ("BSD"). -+ * -+ * You can contact the author at: -+ * - xxHash homepage: https://cyan4973.github.io/xxHash/ -+ * - xxHash source repository: https://github.com/Cyan4973/xxHash -+ */ -+ -+/* -+ * Notice extracted from xxHash homepage: -+ * -+ * xxHash is an extremely fast Hash algorithm, running at RAM speed limits. -+ * It also successfully passes all tests from the SMHasher suite. -+ * -+ * Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 -+ * Duo @3GHz) -+ * -+ * Name Speed Q.Score Author -+ * xxHash 5.4 GB/s 10 -+ * CrapWow 3.2 GB/s 2 Andrew -+ * MumurHash 3a 2.7 GB/s 10 Austin Appleby -+ * SpookyHash 2.0 GB/s 10 Bob Jenkins -+ * SBox 1.4 GB/s 9 Bret Mulvey -+ * Lookup3 1.2 GB/s 9 Bob Jenkins -+ * SuperFastHash 1.2 GB/s 1 Paul Hsieh -+ * CityHash64 1.05 GB/s 10 Pike & Alakuijala -+ * FNV 0.55 GB/s 5 Fowler, Noll, Vo -+ * CRC32 0.43 GB/s 9 -+ * MD5-32 0.33 GB/s 10 Ronald L. Rivest -+ * SHA1-32 0.28 GB/s 10 -+ * -+ * Q.Score is a measure of quality of the hash function. -+ * It depends on successfully passing SMHasher test set. -+ * 10 is a perfect score. -+ * -+ * A 64-bits version, named xxh64 offers much better speed, -+ * but for 64-bits applications only. -+ * Name Speed on 64 bits Speed on 32 bits -+ * xxh64 13.8 GB/s 1.9 GB/s -+ * xxh32 6.8 GB/s 6.0 GB/s -+ */ -+ -+#ifndef XXHASH_H -+#define XXHASH_H -+ -+#include -+ -+/*-**************************** -+ * Simple Hash Functions -+ *****************************/ -+ -+/** -+ * xxh32() - calculate the 32-bit hash of the input with a given seed. -+ * -+ * @input: The data to hash. -+ * @length: The length of the data to hash. -+ * @seed: The seed can be used to alter the result predictably. -+ * -+ * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s -+ * -+ * Return: The 32-bit hash of the data. -+ */ -+uint32_t xxh32(const void *input, size_t length, uint32_t seed); -+ -+/** -+ * xxh64() - calculate the 64-bit hash of the input with a given seed. -+ * -+ * @input: The data to hash. -+ * @length: The length of the data to hash. -+ * @seed: The seed can be used to alter the result predictably. -+ * -+ * This function runs 2x faster on 64-bit systems, but slower on 32-bit systems. -+ * -+ * Return: The 64-bit hash of the data. -+ */ -+uint64_t xxh64(const void *input, size_t length, uint64_t seed); -+ -+/** -+ * xxhash() - calculate wordsize hash of the input with a given seed -+ * @input: The data to hash. -+ * @length: The length of the data to hash. -+ * @seed: The seed can be used to alter the result predictably. -+ * -+ * If the hash does not need to be comparable between machines with -+ * different word sizes, this function will call whichever of xxh32() -+ * or xxh64() is faster. -+ * -+ * Return: wordsize hash of the data. -+ */ -+ -+static inline unsigned long xxhash(const void *input, size_t length, -+ uint64_t seed) -+{ -+#if BITS_PER_LONG == 64 -+ return xxh64(input, length, seed); -+#else -+ return xxh32(input, length, seed); -+#endif -+} -+ -+/*-**************************** -+ * Streaming Hash Functions -+ *****************************/ -+ -+/* -+ * These definitions are only meant to allow allocation of XXH state -+ * statically, on stack, or in a struct for example. -+ * Do not use members directly. -+ */ -+ -+/** -+ * struct xxh32_state - private xxh32 state, do not use members directly -+ */ -+struct xxh32_state { -+ uint32_t total_len_32; -+ uint32_t large_len; -+ uint32_t v1; -+ uint32_t v2; -+ uint32_t v3; -+ uint32_t v4; -+ uint32_t mem32[4]; -+ uint32_t memsize; -+}; -+ -+/** -+ * struct xxh32_state - private xxh64 state, do not use members directly -+ */ -+struct xxh64_state { -+ uint64_t total_len; -+ uint64_t v1; -+ uint64_t v2; -+ uint64_t v3; -+ uint64_t v4; -+ uint64_t mem64[4]; -+ uint32_t memsize; -+}; -+ -+/** -+ * xxh32_reset() - reset the xxh32 state to start a new hashing operation -+ * -+ * @state: The xxh32 state to reset. -+ * @seed: Initialize the hash state with this seed. -+ * -+ * Call this function on any xxh32_state to prepare for a new hashing operation. -+ */ -+void xxh32_reset(struct xxh32_state *state, uint32_t seed); -+ -+/** -+ * xxh32_update() - hash the data given and update the xxh32 state -+ * -+ * @state: The xxh32 state to update. -+ * @input: The data to hash. -+ * @length: The length of the data to hash. -+ * -+ * After calling xxh32_reset() call xxh32_update() as many times as necessary. -+ * -+ * Return: Zero on success, otherwise an error code. -+ */ -+int xxh32_update(struct xxh32_state *state, const void *input, size_t length); -+ -+/** -+ * xxh32_digest() - produce the current xxh32 hash -+ * -+ * @state: Produce the current xxh32 hash of this state. -+ * -+ * A hash value can be produced at any time. It is still possible to continue -+ * inserting input into the hash state after a call to xxh32_digest(), and -+ * generate new hashes later on, by calling xxh32_digest() again. -+ * -+ * Return: The xxh32 hash stored in the state. -+ */ -+uint32_t xxh32_digest(const struct xxh32_state *state); -+ -+/** -+ * xxh64_reset() - reset the xxh64 state to start a new hashing operation -+ * -+ * @state: The xxh64 state to reset. -+ * @seed: Initialize the hash state with this seed. -+ */ -+void xxh64_reset(struct xxh64_state *state, uint64_t seed); -+ -+/** -+ * xxh64_update() - hash the data given and update the xxh64 state -+ * @state: The xxh64 state to update. -+ * @input: The data to hash. -+ * @length: The length of the data to hash. -+ * -+ * After calling xxh64_reset() call xxh64_update() as many times as necessary. -+ * -+ * Return: Zero on success, otherwise an error code. -+ */ -+int xxh64_update(struct xxh64_state *state, const void *input, size_t length); -+ -+/** -+ * xxh64_digest() - produce the current xxh64 hash -+ * -+ * @state: Produce the current xxh64 hash of this state. -+ * -+ * A hash value can be produced at any time. It is still possible to continue -+ * inserting input into the hash state after a call to xxh64_digest(), and -+ * generate new hashes later on, by calling xxh64_digest() again. -+ * -+ * Return: The xxh64 hash stored in the state. -+ */ -+uint64_t xxh64_digest(const struct xxh64_state *state); -+ -+/*-************************** -+ * Utils -+ ***************************/ -+ -+/** -+ * xxh32_copy_state() - copy the source state into the destination state -+ * -+ * @src: The source xxh32 state. -+ * @dst: The destination xxh32 state. -+ */ -+void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src); -+ -+/** -+ * xxh64_copy_state() - copy the source state into the destination state -+ * -+ * @src: The source xxh64 state. -+ * @dst: The destination xxh64 state. -+ */ -+void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src); -+ -+#endif /* XXHASH_H */ -diff --git a/xen/include/xen/zstd.h b/xen/include/xen/zstd.h -new file mode 100644 -index 0000000000..eb33582a18 ---- /dev/null -+++ b/xen/include/xen/zstd.h -@@ -0,0 +1,1157 @@ -+/* -+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. -+ * All rights reserved. -+ * -+ * This source code is licensed under the BSD-style license found in the -+ * LICENSE file in the root directory of https://github.com/facebook/zstd. -+ * An additional grant of patent rights can be found in the PATENTS file in the -+ * same directory. -+ * -+ * This program is free software; you can redistribute it and/or modify it under -+ * the terms of the GNU General Public License version 2 as published by the -+ * Free Software Foundation. This program is dual-licensed; you may select -+ * either version 2 of the GNU General Public License ("GPL") or BSD license -+ * ("BSD"). -+ */ -+ -+#ifndef ZSTD_H -+#define ZSTD_H -+ -+/* ====== Dependency ======*/ -+#include /* size_t */ -+ -+ -+/*-***************************************************************************** -+ * Introduction -+ * -+ * zstd, short for Zstandard, is a fast lossless compression algorithm, -+ * targeting real-time compression scenarios at zlib-level and better -+ * compression ratios. The zstd compression library provides in-memory -+ * compression and decompression functions. The library supports compression -+ * levels from 1 up to ZSTD_maxCLevel() which is 22. Levels >= 20, labeled -+ * ultra, should be used with caution, as they require more memory. -+ * Compression can be done in: -+ * - a single step, reusing a context (described as Explicit memory management) -+ * - unbounded multiple steps (described as Streaming compression) -+ * The compression ratio achievable on small data can be highly improved using -+ * compression with a dictionary in: -+ * - a single step (described as Simple dictionary API) -+ * - a single step, reusing a dictionary (described as Fast dictionary API) -+ ******************************************************************************/ -+ -+/*====== Helper functions ======*/ -+ -+/** -+ * enum ZSTD_ErrorCode - zstd error codes -+ * -+ * Functions that return size_t can be checked for errors using ZSTD_isError() -+ * and the ZSTD_ErrorCode can be extracted using ZSTD_getErrorCode(). -+ */ -+typedef enum { -+ ZSTD_error_no_error, -+ ZSTD_error_GENERIC, -+ ZSTD_error_prefix_unknown, -+ ZSTD_error_version_unsupported, -+ ZSTD_error_parameter_unknown, -+ ZSTD_error_frameParameter_unsupported, -+ ZSTD_error_frameParameter_unsupportedBy32bits, -+ ZSTD_error_frameParameter_windowTooLarge, -+ ZSTD_error_compressionParameter_unsupported, -+ ZSTD_error_init_missing, -+ ZSTD_error_memory_allocation, -+ ZSTD_error_stage_wrong, -+ ZSTD_error_dstSize_tooSmall, -+ ZSTD_error_srcSize_wrong, -+ ZSTD_error_corruption_detected, -+ ZSTD_error_checksum_wrong, -+ ZSTD_error_tableLog_tooLarge, -+ ZSTD_error_maxSymbolValue_tooLarge, -+ ZSTD_error_maxSymbolValue_tooSmall, -+ ZSTD_error_dictionary_corrupted, -+ ZSTD_error_dictionary_wrong, -+ ZSTD_error_dictionaryCreation_failed, -+ ZSTD_error_maxCode -+} ZSTD_ErrorCode; -+ -+/** -+ * ZSTD_maxCLevel() - maximum compression level available -+ * -+ * Return: Maximum compression level available. -+ */ -+int ZSTD_maxCLevel(void); -+/** -+ * ZSTD_compressBound() - maximum compressed size in worst case scenario -+ * @srcSize: The size of the data to compress. -+ * -+ * Return: The maximum compressed size in the worst case scenario. -+ */ -+size_t ZSTD_compressBound(size_t srcSize); -+/** -+ * ZSTD_isError() - tells if a size_t function result is an error code -+ * @code: The function result to check for error. -+ * -+ * Return: Non-zero iff the code is an error. -+ */ -+static __attribute__((unused)) unsigned int ZSTD_isError(size_t code) -+{ -+ return code > (size_t)-ZSTD_error_maxCode; -+} -+/** -+ * ZSTD_getErrorCode() - translates an error function result to a ZSTD_ErrorCode -+ * @functionResult: The result of a function for which ZSTD_isError() is true. -+ * -+ * Return: The ZSTD_ErrorCode corresponding to the functionResult or 0 -+ * if the functionResult isn't an error. -+ */ -+static __attribute__((unused)) ZSTD_ErrorCode ZSTD_getErrorCode( -+ size_t functionResult) -+{ -+ if (!ZSTD_isError(functionResult)) -+ return (ZSTD_ErrorCode)0; -+ return (ZSTD_ErrorCode)(0 - functionResult); -+} -+ -+/** -+ * enum ZSTD_strategy - zstd compression search strategy -+ * -+ * From faster to stronger. -+ */ -+typedef enum { -+ ZSTD_fast, -+ ZSTD_dfast, -+ ZSTD_greedy, -+ ZSTD_lazy, -+ ZSTD_lazy2, -+ ZSTD_btlazy2, -+ ZSTD_btopt, -+ ZSTD_btopt2 -+} ZSTD_strategy; -+ -+/** -+ * struct ZSTD_compressionParameters - zstd compression parameters -+ * @windowLog: Log of the largest match distance. Larger means more -+ * compression, and more memory needed during decompression. -+ * @chainLog: Fully searched segment. Larger means more compression, slower, -+ * and more memory (useless for fast). -+ * @hashLog: Dispatch table. Larger means more compression, -+ * slower, and more memory. -+ * @searchLog: Number of searches. Larger means more compression and slower. -+ * @searchLength: Match length searched. Larger means faster decompression, -+ * sometimes less compression. -+ * @targetLength: Acceptable match size for optimal parser (only). Larger means -+ * more compression, and slower. -+ * @strategy: The zstd compression strategy. -+ */ -+typedef struct { -+ unsigned int windowLog; -+ unsigned int chainLog; -+ unsigned int hashLog; -+ unsigned int searchLog; -+ unsigned int searchLength; -+ unsigned int targetLength; -+ ZSTD_strategy strategy; -+} ZSTD_compressionParameters; -+ -+/** -+ * struct ZSTD_frameParameters - zstd frame parameters -+ * @contentSizeFlag: Controls whether content size will be present in the frame -+ * header (when known). -+ * @checksumFlag: Controls whether a 32-bit checksum is generated at the end -+ * of the frame for error detection. -+ * @noDictIDFlag: Controls whether dictID will be saved into the frame header -+ * when using dictionary compression. -+ * -+ * The default value is all fields set to 0. -+ */ -+typedef struct { -+ unsigned int contentSizeFlag; -+ unsigned int checksumFlag; -+ unsigned int noDictIDFlag; -+} ZSTD_frameParameters; -+ -+/** -+ * struct ZSTD_parameters - zstd parameters -+ * @cParams: The compression parameters. -+ * @fParams: The frame parameters. -+ */ -+typedef struct { -+ ZSTD_compressionParameters cParams; -+ ZSTD_frameParameters fParams; -+} ZSTD_parameters; -+ -+/** -+ * ZSTD_getCParams() - returns ZSTD_compressionParameters for selected level -+ * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel(). -+ * @estimatedSrcSize: The estimated source size to compress or 0 if unknown. -+ * @dictSize: The dictionary size or 0 if a dictionary isn't being used. -+ * -+ * Return: The selected ZSTD_compressionParameters. -+ */ -+ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, -+ unsigned long long estimatedSrcSize, size_t dictSize); -+ -+/** -+ * ZSTD_getParams() - returns ZSTD_parameters for selected level -+ * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel(). -+ * @estimatedSrcSize: The estimated source size to compress or 0 if unknown. -+ * @dictSize: The dictionary size or 0 if a dictionary isn't being used. -+ * -+ * The same as ZSTD_getCParams() except also selects the default frame -+ * parameters (all zero). -+ * -+ * Return: The selected ZSTD_parameters. -+ */ -+ZSTD_parameters ZSTD_getParams(int compressionLevel, -+ unsigned long long estimatedSrcSize, size_t dictSize); -+ -+/*-************************************* -+ * Explicit memory management -+ **************************************/ -+ -+/** -+ * ZSTD_CCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_CCtx -+ * @cParams: The compression parameters to be used for compression. -+ * -+ * If multiple compression parameters might be used, the caller must call -+ * ZSTD_CCtxWorkspaceBound() for each set of parameters and use the maximum -+ * size. -+ * -+ * Return: A lower bound on the size of the workspace that is passed to -+ * ZSTD_initCCtx(). -+ */ -+size_t ZSTD_CCtxWorkspaceBound(ZSTD_compressionParameters cParams); -+ -+/** -+ * struct ZSTD_CCtx - the zstd compression context -+ * -+ * When compressing many times it is recommended to allocate a context just once -+ * and reuse it for each successive compression operation. -+ */ -+typedef struct ZSTD_CCtx_s ZSTD_CCtx; -+/** -+ * ZSTD_initCCtx() - initialize a zstd compression context -+ * @workspace: The workspace to emplace the context into. It must outlive -+ * the returned context. -+ * @workspaceSize: The size of workspace. Use ZSTD_CCtxWorkspaceBound() to -+ * determine how large the workspace must be. -+ * -+ * Return: A compression context emplaced into workspace. -+ */ -+ZSTD_CCtx *ZSTD_initCCtx(void *workspace, size_t workspaceSize); -+ -+/** -+ * ZSTD_compressCCtx() - compress src into dst -+ * @ctx: The context. Must have been initialized with a workspace at -+ * least as large as ZSTD_CCtxWorkspaceBound(params.cParams). -+ * @dst: The buffer to compress src into. -+ * @dstCapacity: The size of the destination buffer. May be any size, but -+ * ZSTD_compressBound(srcSize) is guaranteed to be large enough. -+ * @src: The data to compress. -+ * @srcSize: The size of the data to compress. -+ * @params: The parameters to use for compression. See ZSTD_getParams(). -+ * -+ * Return: The compressed size or an error, which can be checked using -+ * ZSTD_isError(). -+ */ -+size_t ZSTD_compressCCtx(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, -+ const void *src, size_t srcSize, ZSTD_parameters params); -+ -+/** -+ * ZSTD_DCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_DCtx -+ * -+ * Return: A lower bound on the size of the workspace that is passed to -+ * ZSTD_initDCtx(). -+ */ -+size_t ZSTD_DCtxWorkspaceBound(void); -+ -+/** -+ * struct ZSTD_DCtx - the zstd decompression context -+ * -+ * When decompressing many times it is recommended to allocate a context just -+ * once and reuse it for each successive decompression operation. -+ */ -+typedef struct ZSTD_DCtx_s ZSTD_DCtx; -+/** -+ * ZSTD_initDCtx() - initialize a zstd decompression context -+ * @workspace: The workspace to emplace the context into. It must outlive -+ * the returned context. -+ * @workspaceSize: The size of workspace. Use ZSTD_DCtxWorkspaceBound() to -+ * determine how large the workspace must be. -+ * -+ * Return: A decompression context emplaced into workspace. -+ */ -+ZSTD_DCtx *ZSTD_initDCtx(void *workspace, size_t workspaceSize); -+ -+/** -+ * ZSTD_decompressDCtx() - decompress zstd compressed src into dst -+ * @ctx: The decompression context. -+ * @dst: The buffer to decompress src into. -+ * @dstCapacity: The size of the destination buffer. Must be at least as large -+ * as the decompressed size. If the caller cannot upper bound the -+ * decompressed size, then it's better to use the streaming API. -+ * @src: The zstd compressed data to decompress. Multiple concatenated -+ * frames and skippable frames are allowed. -+ * @srcSize: The exact size of the data to decompress. -+ * -+ * Return: The decompressed size or an error, which can be checked using -+ * ZSTD_isError(). -+ */ -+size_t ZSTD_decompressDCtx(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity, -+ const void *src, size_t srcSize); -+ -+/*-************************ -+ * Simple dictionary API -+ **************************/ -+ -+/** -+ * ZSTD_compress_usingDict() - compress src into dst using a dictionary -+ * @ctx: The context. Must have been initialized with a workspace at -+ * least as large as ZSTD_CCtxWorkspaceBound(params.cParams). -+ * @dst: The buffer to compress src into. -+ * @dstCapacity: The size of the destination buffer. May be any size, but -+ * ZSTD_compressBound(srcSize) is guaranteed to be large enough. -+ * @src: The data to compress. -+ * @srcSize: The size of the data to compress. -+ * @dict: The dictionary to use for compression. -+ * @dictSize: The size of the dictionary. -+ * @params: The parameters to use for compression. See ZSTD_getParams(). -+ * -+ * Compression using a predefined dictionary. The same dictionary must be used -+ * during decompression. -+ * -+ * Return: The compressed size or an error, which can be checked using -+ * ZSTD_isError(). -+ */ -+size_t ZSTD_compress_usingDict(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, -+ const void *src, size_t srcSize, const void *dict, size_t dictSize, -+ ZSTD_parameters params); -+ -+/** -+ * ZSTD_decompress_usingDict() - decompress src into dst using a dictionary -+ * @ctx: The decompression context. -+ * @dst: The buffer to decompress src into. -+ * @dstCapacity: The size of the destination buffer. Must be at least as large -+ * as the decompressed size. If the caller cannot upper bound the -+ * decompressed size, then it's better to use the streaming API. -+ * @src: The zstd compressed data to decompress. Multiple concatenated -+ * frames and skippable frames are allowed. -+ * @srcSize: The exact size of the data to decompress. -+ * @dict: The dictionary to use for decompression. The same dictionary -+ * must've been used to compress the data. -+ * @dictSize: The size of the dictionary. -+ * -+ * Return: The decompressed size or an error, which can be checked using -+ * ZSTD_isError(). -+ */ -+size_t ZSTD_decompress_usingDict(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity, -+ const void *src, size_t srcSize, const void *dict, size_t dictSize); -+ -+/*-************************** -+ * Fast dictionary API -+ ***************************/ -+ -+/** -+ * ZSTD_CDictWorkspaceBound() - memory needed to initialize a ZSTD_CDict -+ * @cParams: The compression parameters to be used for compression. -+ * -+ * Return: A lower bound on the size of the workspace that is passed to -+ * ZSTD_initCDict(). -+ */ -+size_t ZSTD_CDictWorkspaceBound(ZSTD_compressionParameters cParams); -+ -+/** -+ * struct ZSTD_CDict - a digested dictionary to be used for compression -+ */ -+typedef struct ZSTD_CDict_s ZSTD_CDict; -+ -+/** -+ * ZSTD_initCDict() - initialize a digested dictionary for compression -+ * @dictBuffer: The dictionary to digest. The buffer is referenced by the -+ * ZSTD_CDict so it must outlive the returned ZSTD_CDict. -+ * @dictSize: The size of the dictionary. -+ * @params: The parameters to use for compression. See ZSTD_getParams(). -+ * @workspace: The workspace. It must outlive the returned ZSTD_CDict. -+ * @workspaceSize: The workspace size. Must be at least -+ * ZSTD_CDictWorkspaceBound(params.cParams). -+ * -+ * When compressing multiple messages / blocks with the same dictionary it is -+ * recommended to load it just once. The ZSTD_CDict merely references the -+ * dictBuffer, so it must outlive the returned ZSTD_CDict. -+ * -+ * Return: The digested dictionary emplaced into workspace. -+ */ -+ZSTD_CDict *ZSTD_initCDict(const void *dictBuffer, size_t dictSize, -+ ZSTD_parameters params, void *workspace, size_t workspaceSize); -+ -+/** -+ * ZSTD_compress_usingCDict() - compress src into dst using a ZSTD_CDict -+ * @ctx: The context. Must have been initialized with a workspace at -+ * least as large as ZSTD_CCtxWorkspaceBound(cParams) where -+ * cParams are the compression parameters used to initialize the -+ * cdict. -+ * @dst: The buffer to compress src into. -+ * @dstCapacity: The size of the destination buffer. May be any size, but -+ * ZSTD_compressBound(srcSize) is guaranteed to be large enough. -+ * @src: The data to compress. -+ * @srcSize: The size of the data to compress. -+ * @cdict: The digested dictionary to use for compression. -+ * @params: The parameters to use for compression. See ZSTD_getParams(). -+ * -+ * Compression using a digested dictionary. The same dictionary must be used -+ * during decompression. -+ * -+ * Return: The compressed size or an error, which can be checked using -+ * ZSTD_isError(). -+ */ -+size_t ZSTD_compress_usingCDict(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, -+ const void *src, size_t srcSize, const ZSTD_CDict *cdict); -+ -+ -+/** -+ * ZSTD_DDictWorkspaceBound() - memory needed to initialize a ZSTD_DDict -+ * -+ * Return: A lower bound on the size of the workspace that is passed to -+ * ZSTD_initDDict(). -+ */ -+size_t ZSTD_DDictWorkspaceBound(void); -+ -+/** -+ * struct ZSTD_DDict - a digested dictionary to be used for decompression -+ */ -+typedef struct ZSTD_DDict_s ZSTD_DDict; -+ -+/** -+ * ZSTD_initDDict() - initialize a digested dictionary for decompression -+ * @dictBuffer: The dictionary to digest. The buffer is referenced by the -+ * ZSTD_DDict so it must outlive the returned ZSTD_DDict. -+ * @dictSize: The size of the dictionary. -+ * @workspace: The workspace. It must outlive the returned ZSTD_DDict. -+ * @workspaceSize: The workspace size. Must be at least -+ * ZSTD_DDictWorkspaceBound(). -+ * -+ * When decompressing multiple messages / blocks with the same dictionary it is -+ * recommended to load it just once. The ZSTD_DDict merely references the -+ * dictBuffer, so it must outlive the returned ZSTD_DDict. -+ * -+ * Return: The digested dictionary emplaced into workspace. -+ */ -+ZSTD_DDict *ZSTD_initDDict(const void *dictBuffer, size_t dictSize, -+ void *workspace, size_t workspaceSize); -+ -+/** -+ * ZSTD_decompress_usingDDict() - decompress src into dst using a ZSTD_DDict -+ * @ctx: The decompression context. -+ * @dst: The buffer to decompress src into. -+ * @dstCapacity: The size of the destination buffer. Must be at least as large -+ * as the decompressed size. If the caller cannot upper bound the -+ * decompressed size, then it's better to use the streaming API. -+ * @src: The zstd compressed data to decompress. Multiple concatenated -+ * frames and skippable frames are allowed. -+ * @srcSize: The exact size of the data to decompress. -+ * @ddict: The digested dictionary to use for decompression. The same -+ * dictionary must've been used to compress the data. -+ * -+ * Return: The decompressed size or an error, which can be checked using -+ * ZSTD_isError(). -+ */ -+size_t ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst, -+ size_t dstCapacity, const void *src, size_t srcSize, -+ const ZSTD_DDict *ddict); -+ -+ -+/*-************************** -+ * Streaming -+ ***************************/ -+ -+/** -+ * struct ZSTD_inBuffer - input buffer for streaming -+ * @src: Start of the input buffer. -+ * @size: Size of the input buffer. -+ * @pos: Position where reading stopped. Will be updated. -+ * Necessarily 0 <= pos <= size. -+ */ -+typedef struct ZSTD_inBuffer_s { -+ const void *src; -+ size_t size; -+ size_t pos; -+} ZSTD_inBuffer; -+ -+/** -+ * struct ZSTD_outBuffer - output buffer for streaming -+ * @dst: Start of the output buffer. -+ * @size: Size of the output buffer. -+ * @pos: Position where writing stopped. Will be updated. -+ * Necessarily 0 <= pos <= size. -+ */ -+typedef struct ZSTD_outBuffer_s { -+ void *dst; -+ size_t size; -+ size_t pos; -+} ZSTD_outBuffer; -+ -+ -+ -+/*-***************************************************************************** -+ * Streaming compression - HowTo -+ * -+ * A ZSTD_CStream object is required to track streaming operation. -+ * Use ZSTD_initCStream() to initialize a ZSTD_CStream object. -+ * ZSTD_CStream objects can be reused multiple times on consecutive compression -+ * operations. It is recommended to re-use ZSTD_CStream in situations where many -+ * streaming operations will be achieved consecutively. Use one separate -+ * ZSTD_CStream per thread for parallel execution. -+ * -+ * Use ZSTD_compressStream() repetitively to consume input stream. -+ * The function will automatically update both `pos` fields. -+ * Note that it may not consume the entire input, in which case `pos < size`, -+ * and it's up to the caller to present again remaining data. -+ * It returns a hint for the preferred number of bytes to use as an input for -+ * the next function call. -+ * -+ * At any moment, it's possible to flush whatever data remains within internal -+ * buffer, using ZSTD_flushStream(). `output->pos` will be updated. There might -+ * still be some content left within the internal buffer if `output->size` is -+ * too small. It returns the number of bytes left in the internal buffer and -+ * must be called until it returns 0. -+ * -+ * ZSTD_endStream() instructs to finish a frame. It will perform a flush and -+ * write frame epilogue. The epilogue is required for decoders to consider a -+ * frame completed. Similar to ZSTD_flushStream(), it may not be able to flush -+ * the full content if `output->size` is too small. In which case, call again -+ * ZSTD_endStream() to complete the flush. It returns the number of bytes left -+ * in the internal buffer and must be called until it returns 0. -+ ******************************************************************************/ -+ -+/** -+ * ZSTD_CStreamWorkspaceBound() - memory needed to initialize a ZSTD_CStream -+ * @cParams: The compression parameters to be used for compression. -+ * -+ * Return: A lower bound on the size of the workspace that is passed to -+ * ZSTD_initCStream() and ZSTD_initCStream_usingCDict(). -+ */ -+size_t ZSTD_CStreamWorkspaceBound(ZSTD_compressionParameters cParams); -+ -+/** -+ * struct ZSTD_CStream - the zstd streaming compression context -+ */ -+typedef struct ZSTD_CStream_s ZSTD_CStream; -+ -+/*===== ZSTD_CStream management functions =====*/ -+/** -+ * ZSTD_initCStream() - initialize a zstd streaming compression context -+ * @params: The zstd compression parameters. -+ * @pledgedSrcSize: If params.fParams.contentSizeFlag == 1 then the caller must -+ * pass the source size (zero means empty source). Otherwise, -+ * the caller may optionally pass the source size, or zero if -+ * unknown. -+ * @workspace: The workspace to emplace the context into. It must outlive -+ * the returned context. -+ * @workspaceSize: The size of workspace. -+ * Use ZSTD_CStreamWorkspaceBound(params.cParams) to determine -+ * how large the workspace must be. -+ * -+ * Return: The zstd streaming compression context. -+ */ -+ZSTD_CStream *ZSTD_initCStream(ZSTD_parameters params, -+ unsigned long long pledgedSrcSize, void *workspace, -+ size_t workspaceSize); -+ -+/** -+ * ZSTD_initCStream_usingCDict() - initialize a streaming compression context -+ * @cdict: The digested dictionary to use for compression. -+ * @pledgedSrcSize: Optionally the source size, or zero if unknown. -+ * @workspace: The workspace to emplace the context into. It must outlive -+ * the returned context. -+ * @workspaceSize: The size of workspace. Call ZSTD_CStreamWorkspaceBound() -+ * with the cParams used to initialize the cdict to determine -+ * how large the workspace must be. -+ * -+ * Return: The zstd streaming compression context. -+ */ -+ZSTD_CStream *ZSTD_initCStream_usingCDict(const ZSTD_CDict *cdict, -+ unsigned long long pledgedSrcSize, void *workspace, -+ size_t workspaceSize); -+ -+/*===== Streaming compression functions =====*/ -+/** -+ * ZSTD_resetCStream() - reset the context using parameters from creation -+ * @zcs: The zstd streaming compression context to reset. -+ * @pledgedSrcSize: Optionally the source size, or zero if unknown. -+ * -+ * Resets the context using the parameters from creation. Skips dictionary -+ * loading, since it can be reused. If `pledgedSrcSize` is non-zero the frame -+ * content size is always written into the frame header. -+ * -+ * Return: Zero or an error, which can be checked using ZSTD_isError(). -+ */ -+size_t ZSTD_resetCStream(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize); -+/** -+ * ZSTD_compressStream() - streaming compress some of input into output -+ * @zcs: The zstd streaming compression context. -+ * @output: Destination buffer. `output->pos` is updated to indicate how much -+ * compressed data was written. -+ * @input: Source buffer. `input->pos` is updated to indicate how much data was -+ * read. Note that it may not consume the entire input, in which case -+ * `input->pos < input->size`, and it's up to the caller to present -+ * remaining data again. -+ * -+ * The `input` and `output` buffers may be any size. Guaranteed to make some -+ * forward progress if `input` and `output` are not empty. -+ * -+ * Return: A hint for the number of bytes to use as the input for the next -+ * function call or an error, which can be checked using -+ * ZSTD_isError(). -+ */ -+size_t ZSTD_compressStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output, -+ ZSTD_inBuffer *input); -+/** -+ * ZSTD_flushStream() - flush internal buffers into output -+ * @zcs: The zstd streaming compression context. -+ * @output: Destination buffer. `output->pos` is updated to indicate how much -+ * compressed data was written. -+ * -+ * ZSTD_flushStream() must be called until it returns 0, meaning all the data -+ * has been flushed. Since ZSTD_flushStream() causes a block to be ended, -+ * calling it too often will degrade the compression ratio. -+ * -+ * Return: The number of bytes still present within internal buffers or an -+ * error, which can be checked using ZSTD_isError(). -+ */ -+size_t ZSTD_flushStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output); -+/** -+ * ZSTD_endStream() - flush internal buffers into output and end the frame -+ * @zcs: The zstd streaming compression context. -+ * @output: Destination buffer. `output->pos` is updated to indicate how much -+ * compressed data was written. -+ * -+ * ZSTD_endStream() must be called until it returns 0, meaning all the data has -+ * been flushed and the frame epilogue has been written. -+ * -+ * Return: The number of bytes still present within internal buffers or an -+ * error, which can be checked using ZSTD_isError(). -+ */ -+size_t ZSTD_endStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output); -+ -+/** -+ * ZSTD_CStreamInSize() - recommended size for the input buffer -+ * -+ * Return: The recommended size for the input buffer. -+ */ -+size_t ZSTD_CStreamInSize(void); -+/** -+ * ZSTD_CStreamOutSize() - recommended size for the output buffer -+ * -+ * When the output buffer is at least this large, it is guaranteed to be large -+ * enough to flush at least one complete compressed block. -+ * -+ * Return: The recommended size for the output buffer. -+ */ -+size_t ZSTD_CStreamOutSize(void); -+ -+ -+ -+/*-***************************************************************************** -+ * Streaming decompression - HowTo -+ * -+ * A ZSTD_DStream object is required to track streaming operations. -+ * Use ZSTD_initDStream() to initialize a ZSTD_DStream object. -+ * ZSTD_DStream objects can be re-used multiple times. -+ * -+ * Use ZSTD_decompressStream() repetitively to consume your input. -+ * The function will update both `pos` fields. -+ * If `input->pos < input->size`, some input has not been consumed. -+ * It's up to the caller to present again remaining data. -+ * If `output->pos < output->size`, decoder has flushed everything it could. -+ * Returns 0 iff a frame is completely decoded and fully flushed. -+ * Otherwise it returns a suggested next input size that will never load more -+ * than the current frame. -+ ******************************************************************************/ -+ -+/** -+ * ZSTD_DStreamWorkspaceBound() - memory needed to initialize a ZSTD_DStream -+ * @maxWindowSize: The maximum window size allowed for compressed frames. -+ * -+ * Return: A lower bound on the size of the workspace that is passed to -+ * ZSTD_initDStream() and ZSTD_initDStream_usingDDict(). -+ */ -+size_t ZSTD_DStreamWorkspaceBound(size_t maxWindowSize); -+ -+/** -+ * struct ZSTD_DStream - the zstd streaming decompression context -+ */ -+typedef struct ZSTD_DStream_s ZSTD_DStream; -+/*===== ZSTD_DStream management functions =====*/ -+/** -+ * ZSTD_initDStream() - initialize a zstd streaming decompression context -+ * @maxWindowSize: The maximum window size allowed for compressed frames. -+ * @workspace: The workspace to emplace the context into. It must outlive -+ * the returned context. -+ * @workspaceSize: The size of workspace. -+ * Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine -+ * how large the workspace must be. -+ * -+ * Return: The zstd streaming decompression context. -+ */ -+ZSTD_DStream *ZSTD_initDStream(size_t maxWindowSize, void *workspace, -+ size_t workspaceSize); -+/** -+ * ZSTD_initDStream_usingDDict() - initialize streaming decompression context -+ * @maxWindowSize: The maximum window size allowed for compressed frames. -+ * @ddict: The digested dictionary to use for decompression. -+ * @workspace: The workspace to emplace the context into. It must outlive -+ * the returned context. -+ * @workspaceSize: The size of workspace. -+ * Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine -+ * how large the workspace must be. -+ * -+ * Return: The zstd streaming decompression context. -+ */ -+ZSTD_DStream *ZSTD_initDStream_usingDDict(size_t maxWindowSize, -+ const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize); -+ -+/*===== Streaming decompression functions =====*/ -+/** -+ * ZSTD_resetDStream() - reset the context using parameters from creation -+ * @zds: The zstd streaming decompression context to reset. -+ * -+ * Resets the context using the parameters from creation. Skips dictionary -+ * loading, since it can be reused. -+ * -+ * Return: Zero or an error, which can be checked using ZSTD_isError(). -+ */ -+size_t ZSTD_resetDStream(ZSTD_DStream *zds); -+/** -+ * ZSTD_decompressStream() - streaming decompress some of input into output -+ * @zds: The zstd streaming decompression context. -+ * @output: Destination buffer. `output.pos` is updated to indicate how much -+ * decompressed data was written. -+ * @input: Source buffer. `input.pos` is updated to indicate how much data was -+ * read. Note that it may not consume the entire input, in which case -+ * `input.pos < input.size`, and it's up to the caller to present -+ * remaining data again. -+ * -+ * The `input` and `output` buffers may be any size. Guaranteed to make some -+ * forward progress if `input` and `output` are not empty. -+ * ZSTD_decompressStream() will not consume the last byte of the frame until -+ * the entire frame is flushed. -+ * -+ * Return: Returns 0 iff a frame is completely decoded and fully flushed. -+ * Otherwise returns a hint for the number of bytes to use as the input -+ * for the next function call or an error, which can be checked using -+ * ZSTD_isError(). The size hint will never load more than the frame. -+ */ -+size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, -+ ZSTD_inBuffer *input); -+ -+/** -+ * ZSTD_DStreamInSize() - recommended size for the input buffer -+ * -+ * Return: The recommended size for the input buffer. -+ */ -+size_t ZSTD_DStreamInSize(void); -+/** -+ * ZSTD_DStreamOutSize() - recommended size for the output buffer -+ * -+ * When the output buffer is at least this large, it is guaranteed to be large -+ * enough to flush at least one complete decompressed block. -+ * -+ * Return: The recommended size for the output buffer. -+ */ -+size_t ZSTD_DStreamOutSize(void); -+ -+ -+/* --- Constants ---*/ -+#define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */ -+#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U -+ -+#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) -+#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) -+ -+#define ZSTD_WINDOWLOG_MAX_32 27 -+#define ZSTD_WINDOWLOG_MAX_64 27 -+#define ZSTD_WINDOWLOG_MAX \ -+ ((unsigned int)(sizeof(size_t) == 4 \ -+ ? ZSTD_WINDOWLOG_MAX_32 \ -+ : ZSTD_WINDOWLOG_MAX_64)) -+#define ZSTD_WINDOWLOG_MIN 10 -+#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX -+#define ZSTD_HASHLOG_MIN 6 -+#define ZSTD_CHAINLOG_MAX (ZSTD_WINDOWLOG_MAX+1) -+#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN -+#define ZSTD_HASHLOG3_MAX 17 -+#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) -+#define ZSTD_SEARCHLOG_MIN 1 -+/* only for ZSTD_fast, other strategies are limited to 6 */ -+#define ZSTD_SEARCHLENGTH_MAX 7 -+/* only for ZSTD_btopt, other strategies are limited to 4 */ -+#define ZSTD_SEARCHLENGTH_MIN 3 -+#define ZSTD_TARGETLENGTH_MIN 4 -+#define ZSTD_TARGETLENGTH_MAX 999 -+ -+/* for static allocation */ -+#define ZSTD_FRAMEHEADERSIZE_MAX 18 -+#define ZSTD_FRAMEHEADERSIZE_MIN 6 -+static const size_t ZSTD_frameHeaderSize_prefix = 5; -+static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN; -+static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX; -+/* magic number + skippable frame length */ -+static const size_t ZSTD_skippableHeaderSize = 8; -+ -+ -+/*-************************************* -+ * Compressed size functions -+ **************************************/ -+ -+/** -+ * ZSTD_findFrameCompressedSize() - returns the size of a compressed frame -+ * @src: Source buffer. It should point to the start of a zstd encoded frame -+ * or a skippable frame. -+ * @srcSize: The size of the source buffer. It must be at least as large as the -+ * size of the frame. -+ * -+ * Return: The compressed size of the frame pointed to by `src` or an error, -+ * which can be check with ZSTD_isError(). -+ * Suitable to pass to ZSTD_decompress() or similar functions. -+ */ -+size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize); -+ -+/*-************************************* -+ * Decompressed size functions -+ **************************************/ -+/** -+ * ZSTD_getFrameContentSize() - returns the content size in a zstd frame header -+ * @src: It should point to the start of a zstd encoded frame. -+ * @srcSize: The size of the source buffer. It must be at least as large as the -+ * frame header. `ZSTD_frameHeaderSize_max` is always large enough. -+ * -+ * Return: The frame content size stored in the frame header if known. -+ * `ZSTD_CONTENTSIZE_UNKNOWN` if the content size isn't stored in the -+ * frame header. `ZSTD_CONTENTSIZE_ERROR` on invalid input. -+ */ -+unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); -+ -+/** -+ * ZSTD_findDecompressedSize() - returns decompressed size of a series of frames -+ * @src: It should point to the start of a series of zstd encoded and/or -+ * skippable frames. -+ * @srcSize: The exact size of the series of frames. -+ * -+ * If any zstd encoded frame in the series doesn't have the frame content size -+ * set, `ZSTD_CONTENTSIZE_UNKNOWN` is returned. But frame content size is always -+ * set when using ZSTD_compress(). The decompressed size can be very large. -+ * If the source is untrusted, the decompressed size could be wrong or -+ * intentionally modified. Always ensure the result fits within the -+ * application's authorized limits. ZSTD_findDecompressedSize() handles multiple -+ * frames, and so it must traverse the input to read each frame header. This is -+ * efficient as most of the data is skipped, however it does mean that all frame -+ * data must be present and valid. -+ * -+ * Return: Decompressed size of all the data contained in the frames if known. -+ * `ZSTD_CONTENTSIZE_UNKNOWN` if the decompressed size is unknown. -+ * `ZSTD_CONTENTSIZE_ERROR` if an error occurred. -+ */ -+unsigned long long ZSTD_findDecompressedSize(const void *src, size_t srcSize); -+ -+/*-************************************* -+ * Advanced compression functions -+ **************************************/ -+/** -+ * ZSTD_checkCParams() - ensure parameter values remain within authorized range -+ * @cParams: The zstd compression parameters. -+ * -+ * Return: Zero or an error, which can be checked using ZSTD_isError(). -+ */ -+size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams); -+ -+/** -+ * ZSTD_adjustCParams() - optimize parameters for a given srcSize and dictSize -+ * @srcSize: Optionally the estimated source size, or zero if unknown. -+ * @dictSize: Optionally the estimated dictionary size, or zero if unknown. -+ * -+ * Return: The optimized parameters. -+ */ -+ZSTD_compressionParameters ZSTD_adjustCParams( -+ ZSTD_compressionParameters cParams, unsigned long long srcSize, -+ size_t dictSize); -+ -+/*--- Advanced decompression functions ---*/ -+ -+/** -+ * ZSTD_isFrame() - returns true iff the buffer starts with a valid frame -+ * @buffer: The source buffer to check. -+ * @size: The size of the source buffer, must be at least 4 bytes. -+ * -+ * Return: True iff the buffer starts with a zstd or skippable frame identifier. -+ */ -+unsigned int ZSTD_isFrame(const void *buffer, size_t size); -+ -+/** -+ * ZSTD_getDictID_fromDict() - returns the dictionary id stored in a dictionary -+ * @dict: The dictionary buffer. -+ * @dictSize: The size of the dictionary buffer. -+ * -+ * Return: The dictionary id stored within the dictionary or 0 if the -+ * dictionary is not a zstd dictionary. If it returns 0 the -+ * dictionary can still be loaded as a content-only dictionary. -+ */ -+unsigned int ZSTD_getDictID_fromDict(const void *dict, size_t dictSize); -+ -+/** -+ * ZSTD_getDictID_fromDDict() - returns the dictionary id stored in a ZSTD_DDict -+ * @ddict: The ddict to find the id of. -+ * -+ * Return: The dictionary id stored within `ddict` or 0 if the dictionary is not -+ * a zstd dictionary. If it returns 0 `ddict` will be loaded as a -+ * content-only dictionary. -+ */ -+unsigned int ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict); -+ -+/** -+ * ZSTD_getDictID_fromFrame() - returns the dictionary id stored in a zstd frame -+ * @src: Source buffer. It must be a zstd encoded frame. -+ * @srcSize: The size of the source buffer. It must be at least as large as the -+ * frame header. `ZSTD_frameHeaderSize_max` is always large enough. -+ * -+ * Return: The dictionary id required to decompress the frame stored within -+ * `src` or 0 if the dictionary id could not be decoded. It can return -+ * 0 if the frame does not require a dictionary, the dictionary id -+ * wasn't stored in the frame, `src` is not a zstd frame, or `srcSize` -+ * is too small. -+ */ -+unsigned int ZSTD_getDictID_fromFrame(const void *src, size_t srcSize); -+ -+/** -+ * struct ZSTD_frameParams - zstd frame parameters stored in the frame header -+ * @frameContentSize: The frame content size, or 0 if not present. -+ * @windowSize: The window size, or 0 if the frame is a skippable frame. -+ * @dictID: The dictionary id, or 0 if not present. -+ * @checksumFlag: Whether a checksum was used. -+ */ -+typedef struct { -+ unsigned long long frameContentSize; -+ unsigned int windowSize; -+ unsigned int dictID; -+ unsigned int checksumFlag; -+} ZSTD_frameParams; -+ -+/** -+ * ZSTD_getFrameParams() - extracts parameters from a zstd or skippable frame -+ * @fparamsPtr: On success the frame parameters are written here. -+ * @src: The source buffer. It must point to a zstd or skippable frame. -+ * @srcSize: The size of the source buffer. `ZSTD_frameHeaderSize_max` is -+ * always large enough to succeed. -+ * -+ * Return: 0 on success. If more data is required it returns how many bytes -+ * must be provided to make forward progress. Otherwise it returns -+ * an error, which can be checked using ZSTD_isError(). -+ */ -+size_t ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src, -+ size_t srcSize); -+ -+/*-***************************************************************************** -+ * Buffer-less and synchronous inner streaming functions -+ * -+ * This is an advanced API, giving full control over buffer management, for -+ * users which need direct control over memory. -+ * But it's also a complex one, with many restrictions (documented below). -+ * Prefer using normal streaming API for an easier experience -+ ******************************************************************************/ -+ -+/*-***************************************************************************** -+ * Buffer-less streaming compression (synchronous mode) -+ * -+ * A ZSTD_CCtx object is required to track streaming operations. -+ * Use ZSTD_initCCtx() to initialize a context. -+ * ZSTD_CCtx object can be re-used multiple times within successive compression -+ * operations. -+ * -+ * Start by initializing a context. -+ * Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary -+ * compression, -+ * or ZSTD_compressBegin_advanced(), for finer parameter control. -+ * It's also possible to duplicate a reference context which has already been -+ * initialized, using ZSTD_copyCCtx() -+ * -+ * Then, consume your input using ZSTD_compressContinue(). -+ * There are some important considerations to keep in mind when using this -+ * advanced function : -+ * - ZSTD_compressContinue() has no internal buffer. It uses externally provided -+ * buffer only. -+ * - Interface is synchronous : input is consumed entirely and produce 1+ -+ * (or more) compressed blocks. -+ * - Caller must ensure there is enough space in `dst` to store compressed data -+ * under worst case scenario. Worst case evaluation is provided by -+ * ZSTD_compressBound(). -+ * ZSTD_compressContinue() doesn't guarantee recover after a failed -+ * compression. -+ * - ZSTD_compressContinue() presumes prior input ***is still accessible and -+ * unmodified*** (up to maximum distance size, see WindowLog). -+ * It remembers all previous contiguous blocks, plus one separated memory -+ * segment (which can itself consists of multiple contiguous blocks) -+ * - ZSTD_compressContinue() detects that prior input has been overwritten when -+ * `src` buffer overlaps. In which case, it will "discard" the relevant memory -+ * section from its history. -+ * -+ * Finish a frame with ZSTD_compressEnd(), which will write the last block(s) -+ * and optional checksum. It's possible to use srcSize==0, in which case, it -+ * will write a final empty block to end the frame. Without last block mark, -+ * frames will be considered unfinished (corrupted) by decoders. -+ * -+ * `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new -+ * frame. -+ ******************************************************************************/ -+ -+/*===== Buffer-less streaming compression functions =====*/ -+size_t ZSTD_compressBegin(ZSTD_CCtx *cctx, int compressionLevel); -+size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx *cctx, const void *dict, -+ size_t dictSize, int compressionLevel); -+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx *cctx, const void *dict, -+ size_t dictSize, ZSTD_parameters params, -+ unsigned long long pledgedSrcSize); -+size_t ZSTD_copyCCtx(ZSTD_CCtx *cctx, const ZSTD_CCtx *preparedCCtx, -+ unsigned long long pledgedSrcSize); -+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx *cctx, const ZSTD_CDict *cdict, -+ unsigned long long pledgedSrcSize); -+size_t ZSTD_compressContinue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, -+ const void *src, size_t srcSize); -+size_t ZSTD_compressEnd(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, -+ const void *src, size_t srcSize); -+ -+ -+ -+/*-***************************************************************************** -+ * Buffer-less streaming decompression (synchronous mode) -+ * -+ * A ZSTD_DCtx object is required to track streaming operations. -+ * Use ZSTD_initDCtx() to initialize a context. -+ * A ZSTD_DCtx object can be re-used multiple times. -+ * -+ * First typical operation is to retrieve frame parameters, using -+ * ZSTD_getFrameParams(). It fills a ZSTD_frameParams structure which provide -+ * important information to correctly decode the frame, such as the minimum -+ * rolling buffer size to allocate to decompress data (`windowSize`), and the -+ * dictionary ID used. -+ * Note: content size is optional, it may not be present. 0 means unknown. -+ * Note that these values could be wrong, either because of data malformation, -+ * or because an attacker is spoofing deliberate false information. As a -+ * consequence, check that values remain within valid application range, -+ * especially `windowSize`, before allocation. Each application can set its own -+ * limit, depending on local restrictions. For extended interoperability, it is -+ * recommended to support at least 8 MB. -+ * Frame parameters are extracted from the beginning of the compressed frame. -+ * Data fragment must be large enough to ensure successful decoding, typically -+ * `ZSTD_frameHeaderSize_max` bytes. -+ * Result: 0: successful decoding, the `ZSTD_frameParams` structure is filled. -+ * >0: `srcSize` is too small, provide at least this many bytes. -+ * errorCode, which can be tested using ZSTD_isError(). -+ * -+ * Start decompression, with ZSTD_decompressBegin() or -+ * ZSTD_decompressBegin_usingDict(). Alternatively, you can copy a prepared -+ * context, using ZSTD_copyDCtx(). -+ * -+ * Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() -+ * alternatively. -+ * ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' -+ * to ZSTD_decompressContinue(). -+ * ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will -+ * fail. -+ * -+ * The result of ZSTD_decompressContinue() is the number of bytes regenerated -+ * within 'dst' (necessarily <= dstCapacity). It can be zero, which is not an -+ * error; it just means ZSTD_decompressContinue() has decoded some metadata -+ * item. It can also be an error code, which can be tested with ZSTD_isError(). -+ * -+ * ZSTD_decompressContinue() needs previous data blocks during decompression, up -+ * to `windowSize`. They should preferably be located contiguously, prior to -+ * current block. Alternatively, a round buffer of sufficient size is also -+ * possible. Sufficient size is determined by frame parameters. -+ * ZSTD_decompressContinue() is very sensitive to contiguity, if 2 blocks don't -+ * follow each other, make sure that either the compressor breaks contiguity at -+ * the same place, or that previous contiguous segment is large enough to -+ * properly handle maximum back-reference. -+ * -+ * A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. -+ * Context can then be reset to start a new decompression. -+ * -+ * Note: it's possible to know if next input to present is a header or a block, -+ * using ZSTD_nextInputType(). This information is not required to properly -+ * decode a frame. -+ * -+ * == Special case: skippable frames == -+ * -+ * Skippable frames allow integration of user-defined data into a flow of -+ * concatenated frames. Skippable frames will be ignored (skipped) by a -+ * decompressor. The format of skippable frames is as follows: -+ * a) Skippable frame ID - 4 Bytes, Little endian format, any value from -+ * 0x184D2A50 to 0x184D2A5F -+ * b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits -+ * c) Frame Content - any content (User Data) of length equal to Frame Size -+ * For skippable frames ZSTD_decompressContinue() always returns 0. -+ * For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0 -+ * what means that a frame is skippable. -+ * Note: If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might -+ * actually be a zstd encoded frame with no content. For purposes of -+ * decompression, it is valid in both cases to skip the frame using -+ * ZSTD_findFrameCompressedSize() to find its size in bytes. -+ * It also returns frame size as fparamsPtr->frameContentSize. -+ ******************************************************************************/ -+ -+/*===== Buffer-less streaming decompression functions =====*/ -+size_t ZSTD_decompressBegin(ZSTD_DCtx *dctx); -+size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict, -+ size_t dictSize); -+void ZSTD_copyDCtx(ZSTD_DCtx *dctx, const ZSTD_DCtx *preparedDCtx); -+size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx); -+size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, -+ const void *src, size_t srcSize); -+typedef enum { -+ ZSTDnit_frameHeader, -+ ZSTDnit_blockHeader, -+ ZSTDnit_block, -+ ZSTDnit_lastBlock, -+ ZSTDnit_checksum, -+ ZSTDnit_skippableFrame -+} ZSTD_nextInputType_e; -+ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx *dctx); -+ -+/*-***************************************************************************** -+ * Block functions -+ * -+ * Block functions produce and decode raw zstd blocks, without frame metadata. -+ * Frame metadata cost is typically ~18 bytes, which can be non-negligible for -+ * very small blocks (< 100 bytes). User will have to take in charge required -+ * information to regenerate data, such as compressed and content sizes. -+ * -+ * A few rules to respect: -+ * - Compressing and decompressing require a context structure -+ * + Use ZSTD_initCCtx() and ZSTD_initDCtx() -+ * - It is necessary to init context before starting -+ * + compression : ZSTD_compressBegin() -+ * + decompression : ZSTD_decompressBegin() -+ * + variants _usingDict() are also allowed -+ * + copyCCtx() and copyDCtx() work too -+ * - Block size is limited, it must be <= ZSTD_getBlockSizeMax() -+ * + If you need to compress more, cut data into multiple blocks -+ * + Consider using the regular ZSTD_compress() instead, as frame metadata -+ * costs become negligible when source size is large. -+ * - When a block is considered not compressible enough, ZSTD_compressBlock() -+ * result will be zero. In which case, nothing is produced into `dst`. -+ * + User must test for such outcome and deal directly with uncompressed data -+ * + ZSTD_decompressBlock() doesn't accept uncompressed data as input!!! -+ * + In case of multiple successive blocks, decoder must be informed of -+ * uncompressed block existence to follow proper history. Use -+ * ZSTD_insertBlock() in such a case. -+ ******************************************************************************/ -+ -+/* Define for static allocation */ -+#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024) -+/*===== Raw zstd block functions =====*/ -+size_t ZSTD_getBlockSizeMax(ZSTD_CCtx *cctx); -+size_t ZSTD_compressBlock(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, -+ const void *src, size_t srcSize); -+size_t ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, -+ const void *src, size_t srcSize); -+size_t ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart, -+ size_t blockSize); -+ -+#endif /* ZSTD_H */