Whamcloud - gitweb
EX-6275 lustre: add lz4 and lz4hc kernel modules
authorSebastien Buisson <sbuisson@ddn.com>
Thu, 17 Nov 2022 07:31:45 +0000 (08:31 +0100)
committerAndreas Dilger <adilger@whamcloud.com>
Fri, 26 May 2023 10:24:52 +0000 (10:24 +0000)
lz4 and lz4hc kernel modules implement compression according to the
lz4 and lz4hc algorithms respectively, through the kernel Crypto API.
lz4 module provides 2 cipher drivers under the generic name 'lz4':
* lz4-lustre-generic of type compression
* lz4-lustre-scomp of type scomp
lz4hc module provides 2 cipher drivers under the generic name 'lz4hc':
* lz4hc-lustre-generic of type compression
* lz4hc-lustre-scomp of type scomp

lz4 and lz4hc kernel module sources are copied from linux v6.1-rc5,
and renamed to llz4.c and llz4hc.c respectively to avoid name
collisions. Use of vmalloc has been changed to kvmalloc since it is
faster in most cases. They implement the Crypto API interface, and
rely on the lz4/lz4hc kernel library for compression implementation.
They have been modified to grok a compression acceleration/level, as
read from the top 4 bits of the crypto_tfm flags, and pass it to the
underlying library.
The lz4/lz4hc library sources are also copied from linux v6.1-rc5 and
built statically, so lz4_compress, lz4_decompress and lz4hc_compress
sources have been "de-modulified", and EXPORT_SYMBOLs removed. Headers
have also been copied from linux v6.1-rc5 for consistency, and source
files modified to include the copied headers instead of the system
headers.
All aforementioned sources are located in the lustre/lz4 directory.

The lz4/lz4hc modules are built only if the kernel does not provide
them. This is verified by checking if the CONFIG_CRYPTO_LZ4 and
CONFIG_CRYPTO_LZ4HC kernel config options are defined.

This patch provides unit testing of several compression modules, such
as lz4, lz4hc, lzo and gzip. This is done via a new test kernel module
kcompr.ko, and new sanity test_84. File lustre/tests/kernel/kcompr.c
contains examples of how to call the compression/decompression
routines.

Signed-off-by: Sebastien Buisson <sbuisson@ddn.com>
Change-Id: I74ce95ff18194c6052d291588d7e8c79596a7f23
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/49208
Reviewed-by: Artem Blagodarenko <ablagodarenko@ddn.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
26 files changed:
config/lustre-build.m4
debian/dkms.conf.in
debian/rules
lustre.spec.in
lustre/Makefile.in
lustre/autoMakefile.am
lustre/autoconf/lustre-core.m4
lustre/include/lustre_crypto.h
lustre/lz4/Makefile.in [new file with mode: 0644]
lustre/lz4/autoMakefile.am [new file with mode: 0644]
lustre/lz4/llz4.c [new file with mode: 0644]
lustre/lz4/llz4hc.c [new file with mode: 0644]
lustre/lz4/lz4.h [new file with mode: 0644]
lustre/lz4/lz4_compress.c [new file with mode: 0644]
lustre/lz4/lz4_decompress.c [new file with mode: 0644]
lustre/lz4/lz4defs.h [new file with mode: 0644]
lustre/lz4/lz4hc_compress.c [new file with mode: 0644]
lustre/lz4/scompress.h [new file with mode: 0644]
lustre/scripts/dkms.mkconf
lustre/scripts/lustre_rmmod
lustre/tests/AMSR_E_L3_DailyOcean_V05_20111003.hdf.bz2 [new file with mode: 0644]
lustre/tests/Makefile.am
lustre/tests/kernel/Makefile.in
lustre/tests/kernel/autoMakefile.am
lustre/tests/kernel/kcompr.c [new file with mode: 0644]
lustre/tests/sanity.sh

index 640c979..2f7488a 100644 (file)
@@ -749,6 +749,8 @@ LC_CONFIG_CLIENT
 LB_CONFIG_MPITESTS
 LB_CONFIG_SERVERS
 LC_CONFIG_CRYPTO
+LC_CONFIG_LZ4
+LC_CONFIG_LZ4HC
 LC_GLIBC_SUPPORT_COPY_FILE_RANGE
 LC_OPENSSL_SSK
 
index 95fbc67..cf3a7c5 100644 (file)
@@ -20,19 +20,20 @@ BUILT_MODULE_NAME[1]="fld"
 BUILT_MODULE_NAME[2]="lmv"
 BUILT_MODULE_NAME[3]="lov"
 BUILT_MODULE_NAME[4]="lustre"
-BUILT_MODULE_NAME[5]="mdc"
-BUILT_MODULE_NAME[6]="mgc"
-BUILT_MODULE_NAME[7]="obdclass"
-BUILT_MODULE_NAME[8]="obdecho"
-BUILT_MODULE_NAME[9]="osc"
-BUILT_MODULE_NAME[10]="ptlrpc"
-BUILT_MODULE_NAME[11]="ko2iblnd"
-BUILT_MODULE_NAME[12]="ksocklnd"
-BUILT_MODULE_NAME[13]="libcfs"
-BUILT_MODULE_NAME[14]="lnet"
-BUILT_MODULE_NAME[15]="lnet_selftest"
-BUILT_MODULE_NAME[16]="ptlrpc_gss"
-
+BUILT_MODULE_NAME[5]="lz4"
+BUILT_MODULE_NAME[6]="lz4hc"
+BUILT_MODULE_NAME[7]="mdc"
+BUILT_MODULE_NAME[8]="mgc"
+BUILT_MODULE_NAME[9]="obdclass"
+BUILT_MODULE_NAME[10]="obdecho"
+BUILT_MODULE_NAME[11]="osc"
+BUILT_MODULE_NAME[12]="ptlrpc"
+BUILT_MODULE_NAME[13]="ko2iblnd"
+BUILT_MODULE_NAME[14]="ksocklnd"
+BUILT_MODULE_NAME[15]="libcfs"
+BUILT_MODULE_NAME[16]="lnet"
+BUILT_MODULE_NAME[17]="lnet_selftest"
+BUILT_MODULE_NAME[18]="ptlrpc_gss"
 
 # Location of the modules in the source tree after build
 BUILT_MODULE_LOCATION[0]="lustre/fid"
@@ -40,19 +41,20 @@ BUILT_MODULE_LOCATION[1]="lustre/fld"
 BUILT_MODULE_LOCATION[2]="lustre/lmv"
 BUILT_MODULE_LOCATION[3]="lustre/lov"
 BUILT_MODULE_LOCATION[4]="lustre/llite"
-BUILT_MODULE_LOCATION[5]="lustre/mdc"
-BUILT_MODULE_LOCATION[6]="lustre/mgc"
-BUILT_MODULE_LOCATION[7]="lustre/obdclass"
-BUILT_MODULE_LOCATION[8]="lustre/obdecho"
-BUILT_MODULE_LOCATION[9]="lustre/osc"
-BUILT_MODULE_LOCATION[10]="lustre/ptlrpc"
-BUILT_MODULE_LOCATION[11]="lnet/klnds/o2iblnd"
-BUILT_MODULE_LOCATION[12]="lnet/klnds/socklnd"
-BUILT_MODULE_LOCATION[13]="libcfs/libcfs"
-BUILT_MODULE_LOCATION[14]="lnet/lnet"
-BUILT_MODULE_LOCATION[15]="lnet/selftest"
-BUILT_MODULE_LOCATION[16]="lustre/ptlrpc/gss"
-
+BUILT_MODULE_LOCATION[5]="lustre/lz4"
+BUILT_MODULE_LOCATION[6]="lustre/lz4"
+BUILT_MODULE_LOCATION[7]="lustre/mdc"
+BUILT_MODULE_LOCATION[8]="lustre/mgc"
+BUILT_MODULE_LOCATION[9]="lustre/obdclass"
+BUILT_MODULE_LOCATION[10]="lustre/obdecho"
+BUILT_MODULE_LOCATION[11]="lustre/osc"
+BUILT_MODULE_LOCATION[12]="lustre/ptlrpc"
+BUILT_MODULE_LOCATION[13]="lnet/klnds/o2iblnd"
+BUILT_MODULE_LOCATION[14]="lnet/klnds/socklnd"
+BUILT_MODULE_LOCATION[15]="libcfs/libcfs"
+BUILT_MODULE_LOCATION[16]="lnet/lnet"
+BUILT_MODULE_LOCATION[17]="lnet/selftest"
+BUILT_MODULE_LOCATION[18]="lustre/ptlrpc/gss"
 
 # Final destination of the modules under /lib/modules/<kernel>/
 DEST_MODULE_LOCATION[0]="/updates/kernel/fs/lustre"
@@ -66,9 +68,11 @@ DEST_MODULE_LOCATION[7]="/updates/kernel/fs/lustre"
 DEST_MODULE_LOCATION[8]="/updates/kernel/fs/lustre"
 DEST_MODULE_LOCATION[9]="/updates/kernel/fs/lustre"
 DEST_MODULE_LOCATION[10]="/updates/kernel/fs/lustre"
-DEST_MODULE_LOCATION[11]="/updates/kernel/net/lustre"
-DEST_MODULE_LOCATION[12]="/updates/kernel/net/lustre"
+DEST_MODULE_LOCATION[11]="/updates/kernel/fs/lustre"
+DEST_MODULE_LOCATION[12]="/updates/kernel/fs/lustre"
 DEST_MODULE_LOCATION[13]="/updates/kernel/net/lustre"
 DEST_MODULE_LOCATION[14]="/updates/kernel/net/lustre"
 DEST_MODULE_LOCATION[15]="/updates/kernel/net/lustre"
-DEST_MODULE_LOCATION[16]="/updates/kernel/fs/lustre"
+DEST_MODULE_LOCATION[16]="/updates/kernel/net/lustre"
+DEST_MODULE_LOCATION[17]="/updates/kernel/net/lustre"
+DEST_MODULE_LOCATION[18]="/updates/kernel/fs/lustre"
index 074b59a..688e1cb 100755 (executable)
@@ -325,6 +325,11 @@ binary-$(TESTS_PKG): build-stamp
        dh_installchangelogs -p $(TESTS_PKG) lustre/ChangeLog
        dh_compress -p $(TESTS_PKG)
        dh_strip -p $(TESTS_PKG)
+       if [ -e $(PWD)/lustre/tests/kernel/kcompr.ko ]; then \
+               mkdir debian/$(TESTS_PKG)/usr/lib/lustre/tests/kernel; \
+               cp $(PWD)/lustre/tests/kernel/kcompr.ko \
+                       debian/$(TESTS_PKG)/usr/lib/lustre/tests/kernel/; \
+       fi
        dh_installdeb -p $(TESTS_PKG)
        dh_fixperms -p $(TESTS_PKG)
        dh_gencontrol -p $(TESTS_PKG)
index 17a93e8..516dc5e 100644 (file)
@@ -810,6 +810,7 @@ mkdir -p $basemodpath-tests/fs
 mv $basemodpath/fs/llog_test.ko $basemodpath-tests/fs/llog_test.ko
 mkdir -p $RPM_BUILD_ROOT%{_libdir}/lustre/tests/kernel/
 mv $basemodpath/fs/kinode.ko $RPM_BUILD_ROOT%{_libdir}/lustre/tests/kernel/
+mv $basemodpath/fs/kcompr.ko $RPM_BUILD_ROOT%{_libdir}/lustre/tests/kernel/
 %endif
 %endif
 
index 4f046a9..be8b3d8 100644 (file)
@@ -6,7 +6,7 @@ obj-m += mgc/
 obj-m += tests/kernel/
 
 @SERVER_TRUE@obj-m += ost/ mgs/ mdt/ mdd/ ofd/ quota/ osp/ lod/ lfsck/
-@CLIENT_TRUE@obj-m += lov/ osc/ mdc/ lmv/ llite/ fld/
+@CLIENT_TRUE@obj-m += lov/ osc/ mdc/ lmv/ llite/ fld/ lz4/
 @LDISKFS_ENABLED_TRUE@obj-m += osd-ldiskfs/
 @ZFS_ENABLED_TRUE@obj-m += osd-zfs/
 @OSDADDON@
index e94ce3d..a4374da 100644 (file)
@@ -41,7 +41,7 @@ ALWAYS_SUBDIRS = include obdclass ldlm ptlrpc obdecho \
 SERVER_SUBDIRS = ost mgs mdt mdd ofd osd-zfs osd-ldiskfs \
        quota osp lod target lfsck
 
-CLIENT_SUBDIRS = mdc lmv llite lov osc
+CLIENT_SUBDIRS = mdc lmv llite lov osc lz4
 
 SUBDIRS := $(ALWAYS_SUBDIRS)
 
index d7c3711..6202554 100644 (file)
@@ -1251,6 +1251,24 @@ crypto_alloc_skcipher, [
 ]) # LC_HAVE_CRYPTO_ALLOC_SKCIPHER
 
 #
+# LC_HAVE_CRYPTO_INIT_WAIT
+#
+# Kernel version 4.15 commit ada69a1639ec
+# introduced crypto_init_wait().
+#
+AC_DEFUN([LC_HAVE_CRYPTO_INIT_WAIT], [
+LB_CHECK_COMPILE([if crypto_init_wait is defined],
+crypto_init_wait, [
+       #include <linux/crypto.h>
+],[
+       crypto_init_wait(NULL);
+],[
+       AC_DEFINE(HAVE_CRYPTO_INIT_WAIT, 1,
+               [crypto_init_wait is defined])
+])
+]) # LC_HAVE_CRYPTO_INIT_WAIT
+
+#
 # LC_HAVE_INTERVAL_EXP_BLK_INTEGRITY
 #
 # 4.3 replace interval with interval_exp in 'struct blk_integrity'
@@ -2940,6 +2958,9 @@ AC_DEFUN([LC_PROG_LINUX], [
        LC_PAGEVEC_LOOKUP_THREE_PARAM
        LC_BI_BDEV
 
+       # 4.15
+       LC_HAVE_CRYPTO_INIT_WAIT
+
        # 4.17
        LC_VM_FAULT_T
        LC_VM_FAULT_RETRY
@@ -3176,6 +3197,38 @@ AC_MSG_RESULT([$enable_crypto])
 ]) # LC_CONFIG_CRYPTO
 
 #
+# LC_CONFIG_LZ4
+#
+# Check whether to build our own LZ4 kernel module.
+# Always embed on SLES, as kernel config might not be reliable.
+#
+AC_DEFUN([LC_CONFIG_LZ4], [
+AS_IF([test x$SUSE_KERNEL = xyes], [enable_lz4=yes], [
+LB_CHECK_CONFIG_IM([CRYPTO_LZ4],[
+       enable_lz4=no],[
+       enable_lz4=yes
+])
+])
+AC_MSG_RESULT([Embed lz4 $enable_lz4])
+]) # LC_CONFIG_LZ4
+
+#
+# LC_CONFIG_LZ4HC
+#
+# Check whether to build our own LZ4HC kernel module.
+# Always embed on SLES, as kernel config might not be reliable.
+#
+AC_DEFUN([LC_CONFIG_LZ4HC], [
+AS_IF([test x$SUSE_KERNEL = xyes], [enable_lz4hc=yes], [
+LB_CHECK_CONFIG_IM([CRYPTO_LZ4HC],[
+       enable_lz4hc=no],[
+       enable_lz4hc=yes
+])
+])
+AC_MSG_RESULT([Embed lz4hc $enable_lz4hc])
+]) # LC_CONFIG_LZ4HC
+
+#
 # LC_CONFIGURE
 #
 # other configure checks
@@ -3351,6 +3404,8 @@ AM_CONDITIONAL(SELINUX, test "$SELINUX" = "-lselinux")
 AM_CONDITIONAL(GETSEPOL, test x$enable_getsepol = xyes)
 AM_CONDITIONAL(LLCRYPT, test x$enable_llcrypt = xyes)
 AM_CONDITIONAL(LIBAIO, test x$enable_libaio = xyes)
+AM_CONDITIONAL(LZ4, test x$enable_lz4 = xyes)
+AM_CONDITIONAL(LZ4HC, test x$enable_lz4hc = xyes)
 ]) # LC_CONDITIONALS
 
 #
@@ -3401,6 +3456,8 @@ lustre/llite/Makefile
 lustre/llite/autoMakefile
 lustre/lov/Makefile
 lustre/lov/autoMakefile
+lustre/lz4/Makefile
+lustre/lz4/autoMakefile
 lustre/mdc/Makefile
 lustre/mdc/autoMakefile
 lustre/lmv/Makefile
index 4a47dbc..f737c18 100644 (file)
@@ -41,6 +41,37 @@ void ll_sbi_set_name_encrypt(struct ll_sb_info *sbi, bool set);
 /* sizeof(struct fscrypt_context_v2) = 40 */
 #define LLCRYPT_ENC_CTX_SIZE 40
 
+/* Only the lower bits of the crt_flags field (u32) of struct crypto_tfm are
+ * currently used. So use the top 4 bits to store the compression level.
+ */
+#define LZ4COMPR_LEVEL_SHIFT   28
+#define LZ4COMPR_LEVEL_MASK    (~((1 << LZ4COMPR_LEVEL_SHIFT) - 1))
+
+#include <linux/crypto.h>
+/* This is how 'level' is passed to the compression algorithm through
+ * the Cyrpto API when using the generic variant. This is taken into
+ * account only by the Lustre-modified lz4 and lz4hc modules.
+ */
+static inline void ll_crypto_comp_set_level(struct crypto_comp *cc, int level)
+{
+       crypto_tfm_set_flags(crypto_comp_tfm(cc),
+                            level << LZ4COMPR_LEVEL_SHIFT);
+}
+
+#ifdef HAVE_CRYPTO_INIT_WAIT
+#include <crypto/acompress.h>
+/* This is how 'level' is passed to the compression algorithm through
+ * the Cyrpto API when using the scomp variant. This is taken into
+ * account only by the Lustre-modified lz4 and lz4hc modules.
+ */
+static inline void ll_crypto_acomp_set_level(struct crypto_acomp *ca, int level)
+{
+       struct crypto_scomp **tfm_ctx = crypto_tfm_ctx(crypto_acomp_tfm(ca));
+       struct crypto_tfm *tfm = (struct crypto_tfm *)*tfm_ctx;
+
+       crypto_tfm_set_flags(tfm, level << LZ4COMPR_LEVEL_SHIFT);
+}
+#endif
 
 /* Encoding/decoding routines inspired from yEnc principles.
  * We just take care of a few critical characters:
diff --git a/lustre/lz4/Makefile.in b/lustre/lz4/Makefile.in
new file mode 100644 (file)
index 0000000..74c6975
--- /dev/null
@@ -0,0 +1,12 @@
+MODULES := 
+EXTRA_DIST := 
+
+@LZ4_TRUE@MODULES += lz4
+@LZ4_TRUE@lz4-objs := lz4_decompress.o lz4_compress.o llz4.o
+@LZ4_TRUE@EXTRA_DIST += $(lz4-objs:.o=.c) lz4defs.h lz4.h scompress.h
+
+@LZ4HC_TRUE@MODULES += lz4hc
+@LZ4HC_TRUE@lz4hc-objs := lz4_decompress.o lz4hc_compress.o llz4hc.o
+@LZ4HC_TRUE@EXTRA_DIST += $(lz4hc-objs:.o=.c) lz4defs.h lz4.h scompress.h
+
+@INCLUDE_RULES@
diff --git a/lustre/lz4/autoMakefile.am b/lustre/lz4/autoMakefile.am
new file mode 100644 (file)
index 0000000..cfd9253
--- /dev/null
@@ -0,0 +1,37 @@
+#
+# GPL HEADER START
+#
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 only,
+# as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License version 2 for more details (a copy is included
+# in the LICENSE file that accompanied this code).
+#
+# You should have received a copy of the GNU General Public License
+# version 2 along with this program; If not, see
+# http://www.gnu.org/licenses/gpl-2.0.html
+#
+# GPL HEADER END
+#
+
+#
+# This file is part of Lustre, http://www.lustre.org/
+#
+
+if MODULES
+modulefs_DATA := 
+if LZ4
+modulefs_DATA += lz4$(KMODEXT)
+endif
+if LZ4HC
+modulefs_DATA += lz4hc$(KMODEXT)
+endif
+endif
+
+MOSTLYCLEANFILES := @MOSTLYCLEANFILES@
diff --git a/lustre/lz4/llz4.c b/lustre/lz4/llz4.c
new file mode 100644 (file)
index 0000000..cfcab72
--- /dev/null
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Cryptographic API.
+ *
+ * Copyright (c) 2013 Chanho Min <chanho.min@lge.com>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/mm.h>
+#include <lustre_crypto.h>
+#include "lz4.h"
+#include "scompress.h"
+
+struct lz4_ctx {
+       void *lz4_comp_mem;
+};
+
+static void *lz4_alloc_ctx(struct crypto_scomp *tfm)
+{
+       void *ctx;
+
+       ctx = kvmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);
+       if (!ctx)
+               return ERR_PTR(-ENOMEM);
+
+       return ctx;
+}
+
+static int lz4_init(struct crypto_tfm *tfm)
+{
+       struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       ctx->lz4_comp_mem = lz4_alloc_ctx(NULL);
+       if (IS_ERR(ctx->lz4_comp_mem))
+               return -ENOMEM;
+
+       return 0;
+}
+
+static void lz4_free_ctx(struct crypto_scomp *tfm, void *ctx)
+{
+       kvfree(ctx);
+}
+
+static void lz4_exit(struct crypto_tfm *tfm)
+{
+       struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       lz4_free_ctx(NULL, ctx->lz4_comp_mem);
+}
+
+static int __lz4_compress_crypto(struct crypto_tfm *tfm,
+                                const u8 *src, unsigned int slen,
+                                u8 *dst, unsigned int *dlen, void *ctx)
+{
+       u32 flags = crypto_tfm_get_flags(tfm);
+       u8 level = flags >> LZ4COMPR_LEVEL_SHIFT;
+       int out_len;
+
+       /* Remove compression level from flags */
+       crypto_tfm_set_flags(tfm, flags & ~LZ4COMPR_LEVEL_MASK);
+
+       if (level)
+               out_len = LZ4_compress_fast(src, dst, slen, *dlen,
+                                           to_lz4_level(level), ctx);
+       else
+               out_len = LZ4_compress_default(src, dst, slen, *dlen, ctx);
+       if (!out_len)
+               return -EINVAL;
+
+       *dlen = out_len;
+       return 0;
+}
+
+static int lz4_scompress(struct crypto_scomp *tfm, const u8 *src,
+                        unsigned int slen, u8 *dst, unsigned int *dlen,
+                        void *ctx)
+{
+       return __lz4_compress_crypto(crypto_scomp_tfm(tfm), src, slen,
+                                    dst, dlen, ctx);
+}
+
+static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
+                              unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+       struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       return __lz4_compress_crypto(tfm, src, slen, dst, dlen,
+                                    ctx->lz4_comp_mem);
+}
+
+static int __lz4_decompress_crypto(const u8 *src, unsigned int slen,
+                                  u8 *dst, unsigned int *dlen, void *ctx)
+{
+       int out_len = LZ4_decompress_safe(src, dst, slen, *dlen);
+
+       if (out_len < 0)
+               return -EINVAL;
+
+       *dlen = out_len;
+       return 0;
+}
+
+static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src,
+                          unsigned int slen, u8 *dst, unsigned int *dlen,
+                          void *ctx)
+{
+       return __lz4_decompress_crypto(src, slen, dst, dlen, NULL);
+}
+
+static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
+                                unsigned int slen, u8 *dst,
+                                unsigned int *dlen)
+{
+       return __lz4_decompress_crypto(src, slen, dst, dlen, NULL);
+}
+
+static struct crypto_alg alg_lz4 = {
+       .cra_name               = "lz4",
+       .cra_driver_name        = "lz4-lustre-generic",
+       .cra_priority           = 1,
+       .cra_flags              = CRYPTO_ALG_TYPE_COMPRESS,
+       .cra_ctxsize            = sizeof(struct lz4_ctx),
+       .cra_module             = THIS_MODULE,
+       .cra_init               = lz4_init,
+       .cra_exit               = lz4_exit,
+       .cra_u                  = { .compress = {
+       .coa_compress           = lz4_compress_crypto,
+       .coa_decompress         = lz4_decompress_crypto } }
+};
+
+static struct scomp_alg scomp = {
+       .alloc_ctx              = lz4_alloc_ctx,
+       .free_ctx               = lz4_free_ctx,
+       .compress               = lz4_scompress,
+       .decompress             = lz4_sdecompress,
+       .base                   = {
+               .cra_name        = "lz4",
+               .cra_driver_name = "lz4-lustre-scomp",
+               .cra_priority    = 1,
+               .cra_module      = THIS_MODULE,
+       }
+};
+
+static int __init lz4_mod_init(void)
+{
+       int ret;
+
+       ret = crypto_register_alg(&alg_lz4);
+       if (ret)
+               return ret;
+
+       ret = crypto_register_scomp(&scomp);
+       if (ret) {
+               crypto_unregister_alg(&alg_lz4);
+               return ret;
+       }
+
+       return ret;
+}
+
+static void __exit lz4_mod_fini(void)
+{
+       crypto_unregister_alg(&alg_lz4);
+       crypto_unregister_scomp(&scomp);
+}
+
+subsys_initcall(lz4_mod_init);
+module_exit(lz4_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("LZ4 Compression Algorithm");
+MODULE_ALIAS_CRYPTO("lz4");
diff --git a/lustre/lz4/llz4hc.c b/lustre/lz4/llz4hc.c
new file mode 100644 (file)
index 0000000..7ff8461
--- /dev/null
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Cryptographic API.
+ *
+ * Copyright (c) 2013 Chanho Min <chanho.min@lge.com>
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/mm.h>
+#include <lustre_crypto.h>
+#include "lz4.h"
+#include "scompress.h"
+
+struct lz4hc_ctx {
+       void *lz4hc_comp_mem;
+};
+
+static void *lz4hc_alloc_ctx(struct crypto_scomp *tfm)
+{
+       void *ctx;
+
+       ctx = kvmalloc(LZ4HC_MEM_COMPRESS, GFP_KERNEL);
+       if (!ctx)
+               return ERR_PTR(-ENOMEM);
+
+       return ctx;
+}
+
+static int lz4hc_init(struct crypto_tfm *tfm)
+{
+       struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       ctx->lz4hc_comp_mem = lz4hc_alloc_ctx(NULL);
+       if (IS_ERR(ctx->lz4hc_comp_mem))
+               return -ENOMEM;
+
+       return 0;
+}
+
+static void lz4hc_free_ctx(struct crypto_scomp *tfm, void *ctx)
+{
+       kvfree(ctx);
+}
+
+static void lz4hc_exit(struct crypto_tfm *tfm)
+{
+       struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       lz4hc_free_ctx(NULL, ctx->lz4hc_comp_mem);
+}
+
+static int __lz4hc_compress_crypto(struct crypto_tfm *tfm,
+                                  const u8 *src, unsigned int slen,
+                                  u8 *dst, unsigned int *dlen, void *ctx)
+{
+       u32 flags = crypto_tfm_get_flags(tfm);
+       u8 level = flags >> LZ4COMPR_LEVEL_SHIFT;
+       int out_len;
+
+       /* Remove compression level from flags */
+       crypto_tfm_set_flags(tfm, flags & ~LZ4COMPR_LEVEL_MASK);
+
+       out_len = LZ4_compress_HC(src, dst, slen, *dlen,
+                                 to_lz4hc_level(level), ctx);
+       if (!out_len)
+               return -EINVAL;
+
+       *dlen = out_len;
+       return 0;
+}
+
+static int lz4hc_scompress(struct crypto_scomp *tfm, const u8 *src,
+                          unsigned int slen, u8 *dst, unsigned int *dlen,
+                          void *ctx)
+{
+       return __lz4hc_compress_crypto(crypto_scomp_tfm(tfm), src, slen,
+                                      dst, dlen, ctx);
+}
+
+static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
+                                unsigned int slen, u8 *dst,
+                                unsigned int *dlen)
+{
+       struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       return __lz4hc_compress_crypto(tfm, src, slen, dst, dlen,
+                                      ctx->lz4hc_comp_mem);
+}
+
+static int __lz4hc_decompress_crypto(const u8 *src, unsigned int slen,
+                                    u8 *dst, unsigned int *dlen, void *ctx)
+{
+       int out_len = LZ4_decompress_safe(src, dst, slen, *dlen);
+
+       if (out_len < 0)
+               return -EINVAL;
+
+       *dlen = out_len;
+       return 0;
+}
+
+static int lz4hc_sdecompress(struct crypto_scomp *tfm, const u8 *src,
+                            unsigned int slen, u8 *dst, unsigned int *dlen,
+                            void *ctx)
+{
+       return __lz4hc_decompress_crypto(src, slen, dst, dlen, NULL);
+}
+
+static int lz4hc_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
+                                  unsigned int slen, u8 *dst,
+                                  unsigned int *dlen)
+{
+       return __lz4hc_decompress_crypto(src, slen, dst, dlen, NULL);
+}
+
+static struct crypto_alg alg_lz4hc = {
+       .cra_name               = "lz4hc",
+       .cra_driver_name        = "lz4hc-lustre-generic",
+       .cra_priority           = 1,
+       .cra_flags              = CRYPTO_ALG_TYPE_COMPRESS,
+       .cra_ctxsize            = sizeof(struct lz4hc_ctx),
+       .cra_module             = THIS_MODULE,
+       .cra_init               = lz4hc_init,
+       .cra_exit               = lz4hc_exit,
+       .cra_u                  = { .compress = {
+       .coa_compress           = lz4hc_compress_crypto,
+       .coa_decompress         = lz4hc_decompress_crypto } }
+};
+
+static struct scomp_alg scomp = {
+       .alloc_ctx              = lz4hc_alloc_ctx,
+       .free_ctx               = lz4hc_free_ctx,
+       .compress               = lz4hc_scompress,
+       .decompress             = lz4hc_sdecompress,
+       .base                   = {
+               .cra_name        = "lz4hc",
+               .cra_driver_name = "lz4hc-lustre-scomp",
+               .cra_priority    = 1,
+               .cra_module      = THIS_MODULE,
+       }
+};
+
+static int __init lz4hc_mod_init(void)
+{
+       int ret;
+
+       ret = crypto_register_alg(&alg_lz4hc);
+       if (ret)
+               return ret;
+
+       ret = crypto_register_scomp(&scomp);
+       if (ret) {
+               crypto_unregister_alg(&alg_lz4hc);
+               return ret;
+       }
+
+       return ret;
+}
+
+static void __exit lz4hc_mod_fini(void)
+{
+       crypto_unregister_alg(&alg_lz4hc);
+       crypto_unregister_scomp(&scomp);
+}
+
+subsys_initcall(lz4hc_mod_init);
+module_exit(lz4hc_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("LZ4HC Compression Algorithm");
+MODULE_ALIAS_CRYPTO("lz4hc");
diff --git a/lustre/lz4/lz4.h b/lustre/lz4/lz4.h
new file mode 100644 (file)
index 0000000..5562349
--- /dev/null
@@ -0,0 +1,677 @@
+/* LZ4 Kernel Interface
+ *
+ * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This file is based on the original header file
+ * for LZ4 - Fast LZ compression algorithm.
+ *
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *     - LZ4 homepage : http://www.lz4.org
+ *     - LZ4 source repository : https://github.com/lz4/lz4
+ */
+
+#ifndef __LZ4_H__
+#define __LZ4_H__
+
+#include <linux/types.h>
+#include <linux/string.h>       /* memset, memcpy */
+
+/*-************************************************************************
+ *     CONSTANTS
+ **************************************************************************/
+/*
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes
+ * (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio
+ * Reduced memory usage can improve speed, due to cache effect
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+ */
+#define LZ4_MEMORY_USAGE 14
+
+#define LZ4_MAX_INPUT_SIZE     0x7E000000 /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize)       (\
+       (unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE \
+       ? 0 \
+       : (isize) + ((isize)/255) + 16)
+
+#define LZ4_ACCELERATION_DEFAULT 1
+#define LZ4_HASHLOG     (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)
+
+/* Level as stored in the Lustre file layout is limited to 4 bits, i.e.
+ * between 0 and 15. But for lz4, acceleration factor can be fine tuned
+ * with each successive value providing roughly +~3% to speed.
+ * So we map the provided level to the lz4 acceleration factor by keeping 1-9
+ * as-is, and then going by steps of 3 so 12 15 18 21 24 27.
+ */
+static inline u8 to_lz4_level(u8 level)
+{
+       if (!level)
+               return LZ4_ACCELERATION_DEFAULT;
+       else if (level < 10)
+               return level;
+       else
+               return (level - 9) * 3 + 9;
+}
+
+#define LZ4HC_MIN_CLEVEL                       3
+#define LZ4HC_DEFAULT_CLEVEL                   9
+#define LZ4HC_MAX_CLEVEL                       16
+
+/* Level as stored in the Lustre file layout is limited to 4 bits, i.e.
+ * between 0 and 15.
+ * So we map the provided level to the lz4hc compression level
+ * simply by adding 1.
+ */
+static inline u8 to_lz4hc_level(u8 level)
+{
+       if (!level)
+               return LZ4HC_DEFAULT_CLEVEL;
+       else
+               return level + 1;
+}
+
+#define LZ4HC_DICTIONARY_LOGSIZE 16
+#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
+#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
+#define LZ4HC_HASH_LOG (LZ4HC_DICTIONARY_LOGSIZE - 1)
+#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
+#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
+
+/*-************************************************************************
+ *     STREAMING CONSTANTS AND STRUCTURES
+ **************************************************************************/
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE - 3)) + 4)
+#define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
+
+#define LZ4_STREAMHCSIZE        262192
+#define LZ4_STREAMHCSIZE_SIZET (262192 / sizeof(size_t))
+
+#define LZ4_STREAMDECODESIZE_U64       4
+#define LZ4_STREAMDECODESIZE            (LZ4_STREAMDECODESIZE_U64 * \
+       sizeof(unsigned long long))
+
+/*
+ * LZ4_stream_t - information structure to track an LZ4 stream.
+ */
+typedef struct {
+       uint32_t hashTable[LZ4_HASH_SIZE_U32];
+       uint32_t currentOffset;
+       uint32_t initCheck;
+       const uint8_t *dictionary;
+       uint8_t *bufferStart;
+       uint32_t dictSize;
+} LZ4_stream_t_internal;
+typedef union {
+       unsigned long long table[LZ4_STREAMSIZE_U64];
+       LZ4_stream_t_internal internal_donotuse;
+} LZ4_stream_t;
+
+/*
+ * LZ4_streamHC_t - information structure to track an LZ4HC stream.
+ */
+typedef struct {
+       unsigned int     hashTable[LZ4HC_HASHTABLESIZE];
+       unsigned short   chainTable[LZ4HC_MAXD];
+       /* next block to continue on current prefix */
+       const unsigned char *end;
+       /* All index relative to this position */
+       const unsigned char *base;
+       /* alternate base for extDict */
+       const unsigned char *dictBase;
+       /* below that point, need extDict */
+       unsigned int     dictLimit;
+       /* below that point, no more dict */
+       unsigned int     lowLimit;
+       /* index from which to continue dict update */
+       unsigned int     nextToUpdate;
+       unsigned int     compressionLevel;
+} LZ4HC_CCtx_internal;
+typedef union {
+       size_t table[LZ4_STREAMHCSIZE_SIZET];
+       LZ4HC_CCtx_internal internal_donotuse;
+} LZ4_streamHC_t;
+
+/*
+ * LZ4_streamDecode_t - information structure to track an
+ *     LZ4 stream during decompression.
+ *
+ * init this structure using LZ4_setStreamDecode (or memset()) before first use
+ */
+typedef struct {
+       const uint8_t *externalDict;
+       size_t extDictSize;
+       const uint8_t *prefixEnd;
+       size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+typedef union {
+       unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+       LZ4_streamDecode_t_internal internal_donotuse;
+} LZ4_streamDecode_t;
+
+/*-************************************************************************
+ *     SIZE OF STATE
+ **************************************************************************/
+#define LZ4_MEM_COMPRESS       LZ4_STREAMSIZE
+#define LZ4HC_MEM_COMPRESS     LZ4_STREAMHCSIZE
+
+/*-************************************************************************
+ *     Compression Functions
+ **************************************************************************/
+
+/**
+ * LZ4_compressBound() - Max. output size in worst case szenarios
+ * @isize: Size of the input data
+ *
+ * Return: Max. size LZ4 may output in a "worst case" szenario
+ * (data not compressible)
+ */
+static inline int LZ4_compressBound(size_t isize)
+{
+       return LZ4_COMPRESSBOUND(isize);
+}
+
+/**
+ * LZ4_compress_default() - Compress data from source to dest
+ * @source: source address of the original data
+ * @dest: output buffer address of the compressed data
+ * @inputSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxOutputSize: full or partial size of buffer 'dest'
+ *     which must be already allocated
+ * @wrkmem: address of the working memory.
+ *     This requires 'workmem' of LZ4_MEM_COMPRESS.
+ *
+ * Compresses 'sourceSize' bytes from buffer 'source'
+ * into already allocated 'dest' buffer of size 'maxOutputSize'.
+ * Compression is guaranteed to succeed if
+ * 'maxOutputSize' >= LZ4_compressBound(inputSize).
+ * It also runs faster, so it's a recommended setting.
+ * If the function cannot compress 'source' into a more limited 'dest' budget,
+ * compression stops *immediately*, and the function result is zero.
+ * As a consequence, 'dest' content is not valid.
+ *
+ * Return: Number of bytes written into buffer 'dest'
+ *     (necessarily <= maxOutputSize) or 0 if compression fails
+ */
+int LZ4_compress_default(const char *source, char *dest, int inputSize,
+       int maxOutputSize, void *wrkmem);
+
+/**
+ * LZ4_compress_fast() - As LZ4_compress_default providing an acceleration param
+ * @source: source address of the original data
+ * @dest: output buffer address of the compressed data
+ * @inputSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxOutputSize: full or partial size of buffer 'dest'
+ *     which must be already allocated
+ * @acceleration: acceleration factor
+ * @wrkmem: address of the working memory.
+ *     This requires 'workmem' of LZ4_MEM_COMPRESS.
+ *
+ * Same as LZ4_compress_default(), but allows to select an "acceleration"
+ * factor. The larger the acceleration value, the faster the algorithm,
+ * but also the lesser the compression. It's a trade-off. It can be fine tuned,
+ * with each successive value providing roughly +~3% to speed.
+ * An acceleration value of "1" is the same as regular LZ4_compress_default()
+ * Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT, which is 1.
+ *
+ * Return: Number of bytes written into buffer 'dest'
+ *     (necessarily <= maxOutputSize) or 0 if compression fails
+ */
+int LZ4_compress_fast(const char *source, char *dest, int inputSize,
+       int maxOutputSize, int acceleration, void *wrkmem);
+
+/**
+ * LZ4_compress_destSize() - Compress as much data as possible
+ *     from source to dest
+ * @source: source address of the original data
+ * @dest: output buffer address of the compressed data
+ * @sourceSizePtr: will be modified to indicate how many bytes where read
+ *     from 'source' to fill 'dest'. New value is necessarily <= old value.
+ * @targetDestSize: Size of buffer 'dest' which must be already allocated
+ * @wrkmem: address of the working memory.
+ *     This requires 'workmem' of LZ4_MEM_COMPRESS.
+ *
+ * Reverse the logic, by compressing as much data as possible
+ * from 'source' buffer into already allocated buffer 'dest'
+ * of size 'targetDestSize'.
+ * This function either compresses the entire 'source' content into 'dest'
+ * if it's large enough, or fill 'dest' buffer completely with as much data as
+ * possible from 'source'.
+ *
+ * Return: Number of bytes written into 'dest' (necessarily <= targetDestSize)
+ *     or 0 if compression fails
+ */
+int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr,
+       int targetDestSize, void *wrkmem);
+
+/*-************************************************************************
+ *     Decompression Functions
+ **************************************************************************/
+
+/**
+ * LZ4_decompress_fast() - Decompresses data from 'source' into 'dest'
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *     which must be already allocated with 'originalSize' bytes
+ * @originalSize: is the original and therefore uncompressed size
+ *
+ * Decompresses data from 'source' into 'dest'.
+ * This function fully respect memory boundaries for properly formed
+ * compressed data.
+ * It is a bit faster than LZ4_decompress_safe().
+ * However, it does not provide any protection against intentionally
+ * modified data stream (malicious input).
+ * Use this function in trusted environment only
+ * (data to decode comes from a trusted source).
+ *
+ * Return: number of bytes read from the source buffer
+ *     or a negative result if decompression fails.
+ */
+int LZ4_decompress_fast(const char *source, char *dest, int originalSize);
+
+/**
+ * LZ4_decompress_safe() - Decompression protected against buffer overflow
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *     which must be already allocated
+ * @compressedSize: is the precise full size of the compressed block
+ * @maxDecompressedSize: is the size of 'dest' buffer
+ *
+ * Decompresses data from 'source' into 'dest'.
+ * If the source stream is detected malformed, the function will
+ * stop decoding and return a negative result.
+ * This function is protected against buffer overflow exploits,
+ * including malicious data packets. It never writes outside output buffer,
+ * nor reads outside input buffer.
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *     (necessarily <= maxDecompressedSize)
+ *     or a negative result in case of error
+ */
+int LZ4_decompress_safe(const char *source, char *dest, int compressedSize,
+       int maxDecompressedSize);
+
+/**
+ * LZ4_decompress_safe_partial() - Decompress a block of size 'compressedSize'
+ *     at position 'source' into buffer 'dest'
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the decompressed data which must be
+ *     already allocated
+ * @compressedSize: is the precise full size of the compressed block.
+ * @targetOutputSize: the decompression operation will try
+ *     to stop as soon as 'targetOutputSize' has been reached
+ * @maxDecompressedSize: is the size of destination buffer
+ *
+ * This function decompresses a compressed block of size 'compressedSize'
+ * at position 'source' into destination buffer 'dest'
+ * of size 'maxDecompressedSize'.
+ * The function tries to stop decompressing operation as soon as
+ * 'targetOutputSize' has been reached, reducing decompression time.
+ * This function never writes outside of output buffer,
+ * and never reads outside of input buffer.
+ * It is therefore protected against malicious data packets.
+ *
+ * Return: the number of bytes decoded in the destination buffer
+ *     (necessarily <= maxDecompressedSize)
+ *     or a negative result in case of error
+ *
+ */
+int LZ4_decompress_safe_partial(const char *source, char *dest,
+       int compressedSize, int targetOutputSize, int maxDecompressedSize);
+
+/*-************************************************************************
+ *     LZ4 HC Compression
+ **************************************************************************/
+
+/**
+ * LZ4_compress_HC() - Compress data from `src` into `dst`, using HC algorithm
+ * @src: source address of the original data
+ * @dst: output buffer address of the compressed data
+ * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @dstCapacity: full or partial size of buffer 'dst',
+ *     which must be already allocated
+ * @compressionLevel: Recommended values are between 4 and 9, although any
+ *     value between 1 and LZ4HC_MAX_CLEVEL will work.
+ *     Values >LZ4HC_MAX_CLEVEL behave the same as 16.
+ * @wrkmem: address of the working memory.
+ *     This requires 'wrkmem' of size LZ4HC_MEM_COMPRESS.
+ *
+ * Compress data from 'src' into 'dst', using the more powerful
+ * but slower "HC" algorithm. Compression is guaranteed to succeed if
+ * `dstCapacity >= LZ4_compressBound(srcSize)
+ *
+ * Return : the number of bytes written into 'dst' or 0 if compression fails.
+ */
+int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity,
+       int compressionLevel, void *wrkmem);
+
+/**
+ * LZ4_resetStreamHC() - Init an allocated 'LZ4_streamHC_t' structure
+ * @streamHCPtr: pointer to the 'LZ4_streamHC_t' structure
+ * @compressionLevel: Recommended values are between 4 and 9, although any
+ *     value between 1 and LZ4HC_MAX_CLEVEL will work.
+ *     Values >LZ4HC_MAX_CLEVEL behave the same as 16.
+ *
+ * An LZ4_streamHC_t structure can be allocated once
+ * and re-used multiple times.
+ * Use this function to init an allocated `LZ4_streamHC_t` structure
+ * and start a new compression.
+ */
+void LZ4_resetStreamHC(LZ4_streamHC_t *streamHCPtr, int compressionLevel);
+
+/**
+ * LZ4_loadDictHC() - Load a static dictionary into LZ4_streamHC
+ * @streamHCPtr: pointer to the LZ4HC_stream_t
+ * @dictionary: dictionary to load
+ * @dictSize: size of dictionary
+ *
+ * Use this function to load a static dictionary into LZ4HC_stream.
+ * Any previous data will be forgotten, only 'dictionary'
+ * will remain in memory.
+ * Loading a size of 0 is allowed.
+ *
+ * Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int    LZ4_loadDictHC(LZ4_streamHC_t *streamHCPtr, const char *dictionary,
+       int dictSize);
+
+/**
+ * LZ4_compress_HC_continue() - Compress 'src' using data from previously
+ *     compressed blocks as a dictionary using the HC algorithm
+ * @streamHCPtr: Pointer to the previous 'LZ4_streamHC_t' structure
+ * @src: source address of the original data
+ * @dst: output buffer address of the compressed data,
+ *     which must be already allocated
+ * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxDstSize: full or partial size of buffer 'dest'
+ *     which must be already allocated
+ *
+ * These functions compress data in successive blocks of any size, using
+ * previous blocks as dictionary. One key assumption is that previous
+ * blocks (up to 64 KB) remain read-accessible while
+ * compressing next blocks. There is an exception for ring buffers,
+ * which can be smaller than 64 KB.
+ * Ring buffers scenario is automatically detected and handled by
+ * LZ4_compress_HC_continue().
+ * Before starting compression, state must be properly initialized,
+ * using LZ4_resetStreamHC().
+ * A first "fictional block" can then be designated as
+ * initial dictionary, using LZ4_loadDictHC() (Optional).
+ * Then, use LZ4_compress_HC_continue()
+ * to compress each successive block. Previous memory blocks
+ * (including initial dictionary when present) must remain accessible
+ * and unmodified during compression.
+ * 'dst' buffer should be sized to handle worst case scenarios, using
+ *  LZ4_compressBound(), to ensure operation success.
+ *  If, for any reason, previous data blocks can't be preserved unmodified
+ *  in memory during next compression block,
+ *  you must save it to a safer memory space, using LZ4_saveDictHC().
+ * Return value of LZ4_saveDictHC() is the size of dictionary
+ * effectively saved into 'safeBuffer'.
+ *
+ * Return: Number of bytes written into buffer 'dst'  or 0 if compression fails
+ */
+int LZ4_compress_HC_continue(LZ4_streamHC_t *streamHCPtr, const char *src,
+       char *dst, int srcSize, int maxDstSize);
+
+/**
+ * LZ4_saveDictHC() - Save static dictionary from LZ4HC_stream
+ * @streamHCPtr: pointer to the 'LZ4HC_stream_t' structure
+ * @safeBuffer: buffer to save dictionary to, must be already allocated
+ * @maxDictSize: size of 'safeBuffer'
+ *
+ * If previously compressed data block is not guaranteed
+ * to remain available at its memory location,
+ * save it into a safer place (char *safeBuffer).
+ * Note : you don't need to call LZ4_loadDictHC() afterwards,
+ * dictionary is immediately usable, you can therefore call
+ * LZ4_compress_HC_continue().
+ *
+ * Return : saved dictionary size in bytes (necessarily <= maxDictSize),
+ *     or 0 if error.
+ */
+int LZ4_saveDictHC(LZ4_streamHC_t *streamHCPtr, char *safeBuffer,
+       int maxDictSize);
+
+/*-*********************************************
+ *     Streaming Compression Functions
+ ***********************************************/
+
+/**
+ * LZ4_resetStream() - Init an allocated 'LZ4_stream_t' structure
+ * @LZ4_stream: pointer to the 'LZ4_stream_t' structure
+ *
+ * An LZ4_stream_t structure can be allocated once
+ * and re-used multiple times.
+ * Use this function to init an allocated `LZ4_stream_t` structure
+ * and start a new compression.
+ */
+void LZ4_resetStream(LZ4_stream_t *LZ4_stream);
+
+/**
+ * LZ4_loadDict() - Load a static dictionary into LZ4_stream
+ * @streamPtr: pointer to the LZ4_stream_t
+ * @dictionary: dictionary to load
+ * @dictSize: size of dictionary
+ *
+ * Use this function to load a static dictionary into LZ4_stream.
+ * Any previous data will be forgotten, only 'dictionary'
+ * will remain in memory.
+ * Loading a size of 0 is allowed.
+ *
+ * Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int LZ4_loadDict(LZ4_stream_t *streamPtr, const char *dictionary,
+       int dictSize);
+
+/**
+ * LZ4_saveDict() - Save static dictionary from LZ4_stream
+ * @streamPtr: pointer to the 'LZ4_stream_t' structure
+ * @safeBuffer: buffer to save dictionary to, must be already allocated
+ * @dictSize: size of 'safeBuffer'
+ *
+ * If previously compressed data block is not guaranteed
+ * to remain available at its memory location,
+ * save it into a safer place (char *safeBuffer).
+ * Note : you don't need to call LZ4_loadDict() afterwards,
+ * dictionary is immediately usable, you can therefore call
+ * LZ4_compress_fast_continue().
+ *
+ * Return : saved dictionary size in bytes (necessarily <= dictSize),
+ *     or 0 if error.
+ */
+int LZ4_saveDict(LZ4_stream_t *streamPtr, char *safeBuffer, int dictSize);
+
+/**
+ * LZ4_compress_fast_continue() - Compress 'src' using data from previously
+ *     compressed blocks as a dictionary
+ * @streamPtr: Pointer to the previous 'LZ4_stream_t' structure
+ * @src: source address of the original data
+ * @dst: output buffer address of the compressed data,
+ *     which must be already allocated
+ * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxDstSize: full or partial size of buffer 'dest'
+ *     which must be already allocated
+ * @acceleration: acceleration factor
+ *
+ * Compress buffer content 'src', using data from previously compressed blocks
+ * as dictionary to improve compression ratio.
+ * Important : Previous data blocks are assumed to still
+ * be present and unmodified !
+ * If maxDstSize >= LZ4_compressBound(srcSize),
+ * compression is guaranteed to succeed, and runs faster.
+ *
+ * Return: Number of bytes written into buffer 'dst'  or 0 if compression fails
+ */
+int LZ4_compress_fast_continue(LZ4_stream_t *streamPtr, const char *src,
+       char *dst, int srcSize, int maxDstSize, int acceleration);
+
+/**
+ * LZ4_setStreamDecode() - Instruct where to find dictionary
+ * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
+ * @dictionary: dictionary to use
+ * @dictSize: size of dictionary
+ *
+ * Use this function to instruct where to find the dictionary.
+ *     Setting a size of 0 is allowed (same effect as reset).
+ *
+ * Return: 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+       const char *dictionary, int dictSize);
+
+/**
+ * LZ4_decompress_safe_continue() - Decompress blocks in streaming mode
+ * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *     which must be already allocated
+ * @compressedSize: is the precise full size of the compressed block
+ * @maxDecompressedSize: is the size of 'dest' buffer
+ *
+ * This decoding function allows decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks *must* remain available at the memory position
+ * where they were decoded (up to 64 KB)
+ * In the case of a ring buffers, decoding buffer must be either :
+ *    - Exactly same size as encoding buffer, with same update rule
+ *      (block boundaries at same positions) In which case,
+ *      the decoding & encoding ring buffer can have any size,
+ *      including very small ones ( < 64 KB).
+ *    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ *      maxBlockSize is implementation dependent.
+ *      It's the maximum size you intend to compress into a single block.
+ *      In which case, encoding and decoding buffers do not need
+ *      to be synchronized, and encoding ring buffer can have any size,
+ *      including small ones ( < 64 KB).
+ *    - _At least_ 64 KB + 8 bytes + maxBlockSize.
+ *      In which case, encoding and decoding buffers do not need to be
+ *      synchronized, and encoding ring buffer can have any size,
+ *      including larger than decoding buffer. W
+ * Whenever these conditions are not possible, save the last 64KB of decoded
+ * data into a safe buffer, and indicate where it is saved
+ * using LZ4_setStreamDecode()
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *     (necessarily <= maxDecompressedSize)
+ *     or a negative result in case of error
+ */
+int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+       const char *source, char *dest, int compressedSize,
+       int maxDecompressedSize);
+
+/**
+ * LZ4_decompress_fast_continue() - Decompress blocks in streaming mode
+ * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *     which must be already allocated with 'originalSize' bytes
+ * @originalSize: is the original and therefore uncompressed size
+ *
+ * This decoding function allows decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks *must* remain available at the memory position
+ * where they were decoded (up to 64 KB)
+ * In the case of a ring buffers, decoding buffer must be either :
+ *    - Exactly same size as encoding buffer, with same update rule
+ *      (block boundaries at same positions) In which case,
+ *      the decoding & encoding ring buffer can have any size,
+ *      including very small ones ( < 64 KB).
+ *    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ *      maxBlockSize is implementation dependent.
+ *      It's the maximum size you intend to compress into a single block.
+ *      In which case, encoding and decoding buffers do not need
+ *      to be synchronized, and encoding ring buffer can have any size,
+ *      including small ones ( < 64 KB).
+ *    - _At least_ 64 KB + 8 bytes + maxBlockSize.
+ *      In which case, encoding and decoding buffers do not need to be
+ *      synchronized, and encoding ring buffer can have any size,
+ *      including larger than decoding buffer. W
+ * Whenever these conditions are not possible, save the last 64KB of decoded
+ * data into a safe buffer, and indicate where it is saved
+ * using LZ4_setStreamDecode()
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *     (necessarily <= maxDecompressedSize)
+ *     or a negative result in case of error
+ */
+int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+       const char *source, char *dest, int originalSize);
+
+/**
+ * LZ4_decompress_safe_usingDict() - Same as LZ4_setStreamDecode()
+ *     followed by LZ4_decompress_safe_continue()
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *     which must be already allocated
+ * @compressedSize: is the precise full size of the compressed block
+ * @maxDecompressedSize: is the size of 'dest' buffer
+ * @dictStart: pointer to the start of the dictionary in memory
+ * @dictSize: size of dictionary
+ *
+ * This decoding function works the same as
+ * a combination of LZ4_setStreamDecode() followed by
+ * LZ4_decompress_safe_continue()
+ * It is stand-alone, and doesn't need an LZ4_streamDecode_t structure.
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *     (necessarily <= maxDecompressedSize)
+ *     or a negative result in case of error
+ */
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+       int compressedSize, int maxDecompressedSize, const char *dictStart,
+       int dictSize);
+
+/**
+ * LZ4_decompress_fast_usingDict() - Same as LZ4_setStreamDecode()
+ *     followed by LZ4_decompress_fast_continue()
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *     which must be already allocated with 'originalSize' bytes
+ * @originalSize: is the original and therefore uncompressed size
+ * @dictStart: pointer to the start of the dictionary in memory
+ * @dictSize: size of dictionary
+ *
+ * This decoding function works the same as
+ * a combination of LZ4_setStreamDecode() followed by
+ * LZ4_decompress_fast_continue()
+ * It is stand-alone, and doesn't need an LZ4_streamDecode_t structure.
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *     (necessarily <= maxDecompressedSize)
+ *     or a negative result in case of error
+ */
+int LZ4_decompress_fast_usingDict(const char *source, char *dest,
+       int originalSize, const char *dictStart, int dictSize);
+
+#endif
diff --git a/lustre/lz4/lz4_compress.c b/lustre/lz4/lz4_compress.c
new file mode 100644 (file)
index 0000000..a4eef36
--- /dev/null
@@ -0,0 +1,930 @@
+/*
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011 - 2016, Yann Collet.
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *     - LZ4 homepage : http://www.lz4.org
+ *     - LZ4 source repository : https://github.com/lz4/lz4
+ *
+ *     Changed for kernel usage by:
+ *     Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
+ */
+
+/*-************************************
+ *     Dependencies
+ **************************************/
+#include "lz4.h"
+#include "lz4defs.h"
+#include <linux/kernel.h>
+#include <asm/unaligned.h>
+
+static const int LZ4_minLength = (MFLIMIT + 1);
+static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT - 1));
+
+/*-******************************
+ *     Compression functions
+ ********************************/
+static FORCE_INLINE U32 LZ4_hash4(
+       U32 sequence,
+       tableType_t const tableType)
+{
+       if (tableType == byU16)
+               return ((sequence * 2654435761U)
+                       >> ((MINMATCH * 8) - (LZ4_HASHLOG + 1)));
+       else
+               return ((sequence * 2654435761U)
+                       >> ((MINMATCH * 8) - LZ4_HASHLOG));
+}
+
+static FORCE_INLINE U32 LZ4_hash5(
+       U64 sequence,
+       tableType_t const tableType)
+{
+       const U32 hashLog = (tableType == byU16)
+               ? LZ4_HASHLOG + 1
+               : LZ4_HASHLOG;
+
+#if LZ4_LITTLE_ENDIAN
+       static const U64 prime5bytes = 889523592379ULL;
+
+       return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
+#else
+       static const U64 prime8bytes = 11400714785074694791ULL;
+
+       return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
+#endif
+}
+
+static FORCE_INLINE U32 LZ4_hashPosition(
+       const void *p,
+       tableType_t const tableType)
+{
+#if LZ4_ARCH64
+       if (tableType == byU32)
+               return LZ4_hash5(LZ4_read_ARCH(p), tableType);
+#endif
+
+       return LZ4_hash4(LZ4_read32(p), tableType);
+}
+
+static void LZ4_putPositionOnHash(
+       const BYTE *p,
+       U32 h,
+       void *tableBase,
+       tableType_t const tableType,
+       const BYTE *srcBase)
+{
+       switch (tableType) {
+       case byPtr:
+       {
+               const BYTE **hashTable = (const BYTE **)tableBase;
+
+               hashTable[h] = p;
+               return;
+       }
+       case byU32:
+       {
+               U32 *hashTable = (U32 *) tableBase;
+
+               hashTable[h] = (U32)(p - srcBase);
+               return;
+       }
+       case byU16:
+       {
+               U16 *hashTable = (U16 *) tableBase;
+
+               hashTable[h] = (U16)(p - srcBase);
+               return;
+       }
+       }
+}
+
+static FORCE_INLINE void LZ4_putPosition(
+       const BYTE *p,
+       void *tableBase,
+       tableType_t tableType,
+       const BYTE *srcBase)
+{
+       U32 const h = LZ4_hashPosition(p, tableType);
+
+       LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+static const BYTE *LZ4_getPositionOnHash(
+       U32 h,
+       void *tableBase,
+       tableType_t tableType,
+       const BYTE *srcBase)
+{
+       if (tableType == byPtr) {
+               const BYTE **hashTable = (const BYTE **) tableBase;
+
+               return hashTable[h];
+       }
+
+       if (tableType == byU32) {
+               const U32 * const hashTable = (U32 *) tableBase;
+
+               return hashTable[h] + srcBase;
+       }
+
+       {
+               /* default, to ensure a return */
+               const U16 * const hashTable = (U16 *) tableBase;
+
+               return hashTable[h] + srcBase;
+       }
+}
+
+static FORCE_INLINE const BYTE *LZ4_getPosition(
+       const BYTE *p,
+       void *tableBase,
+       tableType_t tableType,
+       const BYTE *srcBase)
+{
+       U32 const h = LZ4_hashPosition(p, tableType);
+
+       return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
+
+
+/*
+ * LZ4_compress_generic() :
+ * inlined, to ensure branches are decided at compilation time
+ */
+static FORCE_INLINE int LZ4_compress_generic(
+       LZ4_stream_t_internal * const dictPtr,
+       const char * const source,
+       char * const dest,
+       const int inputSize,
+       const int maxOutputSize,
+       const limitedOutput_directive outputLimited,
+       const tableType_t tableType,
+       const dict_directive dict,
+       const dictIssue_directive dictIssue,
+       const U32 acceleration)
+{
+       const BYTE *ip = (const BYTE *) source;
+       const BYTE *base;
+       const BYTE *lowLimit;
+       const BYTE * const lowRefLimit = ip - dictPtr->dictSize;
+       const BYTE * const dictionary = dictPtr->dictionary;
+       const BYTE * const dictEnd = dictionary + dictPtr->dictSize;
+       const size_t dictDelta = dictEnd - (const BYTE *)source;
+       const BYTE *anchor = (const BYTE *) source;
+       const BYTE * const iend = ip + inputSize;
+       const BYTE * const mflimit = iend - MFLIMIT;
+       const BYTE * const matchlimit = iend - LASTLITERALS;
+
+       BYTE *op = (BYTE *) dest;
+       BYTE * const olimit = op + maxOutputSize;
+
+       U32 forwardH;
+       size_t refDelta = 0;
+
+       /* Init conditions */
+       if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) {
+               /* Unsupported inputSize, too large (or negative) */
+               return 0;
+       }
+
+       switch (dict) {
+       case noDict:
+       default:
+               base = (const BYTE *)source;
+               lowLimit = (const BYTE *)source;
+               break;
+       case withPrefix64k:
+               base = (const BYTE *)source - dictPtr->currentOffset;
+               lowLimit = (const BYTE *)source - dictPtr->dictSize;
+               break;
+       case usingExtDict:
+               base = (const BYTE *)source - dictPtr->currentOffset;
+               lowLimit = (const BYTE *)source;
+               break;
+       }
+
+       if ((tableType == byU16)
+               && (inputSize >= LZ4_64Klimit)) {
+               /* Size too large (not within 64K limit) */
+               return 0;
+       }
+
+       if (inputSize < LZ4_minLength) {
+               /* Input too small, no compression (all literals) */
+               goto _last_literals;
+       }
+
+       /* First Byte */
+       LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
+       ip++;
+       forwardH = LZ4_hashPosition(ip, tableType);
+
+       /* Main Loop */
+       for ( ; ; ) {
+               const BYTE *match;
+               BYTE *token;
+
+               /* Find a match */
+               {
+                       const BYTE *forwardIp = ip;
+                       unsigned int step = 1;
+                       unsigned int searchMatchNb = acceleration << LZ4_SKIPTRIGGER;
+
+                       do {
+                               U32 const h = forwardH;
+
+                               ip = forwardIp;
+                               forwardIp += step;
+                               step = (searchMatchNb++ >> LZ4_SKIPTRIGGER);
+
+                               if (unlikely(forwardIp > mflimit))
+                                       goto _last_literals;
+
+                               match = LZ4_getPositionOnHash(h,
+                                       dictPtr->hashTable,
+                                       tableType, base);
+
+                               if (dict == usingExtDict) {
+                                       if (match < (const BYTE *)source) {
+                                               refDelta = dictDelta;
+                                               lowLimit = dictionary;
+                                       } else {
+                                               refDelta = 0;
+                                               lowLimit = (const BYTE *)source;
+                               }        }
+
+                               forwardH = LZ4_hashPosition(forwardIp,
+                                       tableType);
+
+                               LZ4_putPositionOnHash(ip, h, dictPtr->hashTable,
+                                       tableType, base);
+                       } while (((dictIssue == dictSmall)
+                                       ? (match < lowRefLimit)
+                                       : 0)
+                               || ((tableType == byU16)
+                                       ? 0
+                                       : (match + MAX_DISTANCE < ip))
+                               || (LZ4_read32(match + refDelta)
+                                       != LZ4_read32(ip)));
+               }
+
+               /* Catch up */
+               while (((ip > anchor) & (match + refDelta > lowLimit))
+                               && (unlikely(ip[-1] == match[refDelta - 1]))) {
+                       ip--;
+                       match--;
+               }
+
+               /* Encode Literals */
+               {
+                       unsigned const int litLength = (unsigned int)(ip - anchor);
+
+                       token = op++;
+
+                       if ((outputLimited) &&
+                               /* Check output buffer overflow */
+                               (unlikely(op + litLength +
+                                       (2 + 1 + LASTLITERALS) +
+                                       (litLength / 255) > olimit)))
+                               return 0;
+
+                       if (litLength >= RUN_MASK) {
+                               int len = (int)litLength - RUN_MASK;
+
+                               *token = (RUN_MASK << ML_BITS);
+
+                               for (; len >= 255; len -= 255)
+                                       *op++ = 255;
+                               *op++ = (BYTE)len;
+                       } else
+                               *token = (BYTE)(litLength << ML_BITS);
+
+                       /* Copy Literals */
+                       LZ4_wildCopy(op, anchor, op + litLength);
+                       op += litLength;
+               }
+
+_next_match:
+               /* Encode Offset */
+               LZ4_writeLE16(op, (U16)(ip - match));
+               op += 2;
+
+               /* Encode MatchLength */
+               {
+                       unsigned int matchCode;
+
+                       if ((dict == usingExtDict)
+                               && (lowLimit == dictionary)) {
+                               const BYTE *limit;
+
+                               match += refDelta;
+                               limit = ip + (dictEnd - match);
+
+                               if (limit > matchlimit)
+                                       limit = matchlimit;
+
+                               matchCode = LZ4_count(ip + MINMATCH,
+                                       match + MINMATCH, limit);
+
+                               ip += MINMATCH + matchCode;
+
+                               if (ip == limit) {
+                                       unsigned const int more = LZ4_count(ip,
+                                               (const BYTE *)source,
+                                               matchlimit);
+
+                                       matchCode += more;
+                                       ip += more;
+                               }
+                       } else {
+                               matchCode = LZ4_count(ip + MINMATCH,
+                                       match + MINMATCH, matchlimit);
+                               ip += MINMATCH + matchCode;
+                       }
+
+                       if (outputLimited &&
+                               /* Check output buffer overflow */
+                               (unlikely(op +
+                                       (1 + LASTLITERALS) +
+                                       (matchCode >> 8) > olimit)))
+                               return 0;
+
+                       if (matchCode >= ML_MASK) {
+                               *token += ML_MASK;
+                               matchCode -= ML_MASK;
+                               LZ4_write32(op, 0xFFFFFFFF);
+
+                               while (matchCode >= 4 * 255) {
+                                       op += 4;
+                                       LZ4_write32(op, 0xFFFFFFFF);
+                                       matchCode -= 4 * 255;
+                               }
+
+                               op += matchCode / 255;
+                               *op++ = (BYTE)(matchCode % 255);
+                       } else
+                               *token += (BYTE)(matchCode);
+               }
+
+               anchor = ip;
+
+               /* Test end of chunk */
+               if (ip > mflimit)
+                       break;
+
+               /* Fill table */
+               LZ4_putPosition(ip - 2, dictPtr->hashTable, tableType, base);
+
+               /* Test next position */
+               match = LZ4_getPosition(ip, dictPtr->hashTable,
+                       tableType, base);
+
+               if (dict == usingExtDict) {
+                       if (match < (const BYTE *)source) {
+                               refDelta = dictDelta;
+                               lowLimit = dictionary;
+                       } else {
+                               refDelta = 0;
+                               lowLimit = (const BYTE *)source;
+                       }
+               }
+
+               LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
+
+               if (((dictIssue == dictSmall) ? (match >= lowRefLimit) : 1)
+                       && (match + MAX_DISTANCE >= ip)
+                       && (LZ4_read32(match + refDelta) == LZ4_read32(ip))) {
+                       token = op++;
+                       *token = 0;
+                       goto _next_match;
+               }
+
+               /* Prepare next loop */
+               forwardH = LZ4_hashPosition(++ip, tableType);
+       }
+
+_last_literals:
+       /* Encode Last Literals */
+       {
+               size_t const lastRun = (size_t)(iend - anchor);
+
+               if ((outputLimited) &&
+                       /* Check output buffer overflow */
+                       ((op - (BYTE *)dest) + lastRun + 1 +
+                       ((lastRun + 255 - RUN_MASK) / 255) > (U32)maxOutputSize))
+                       return 0;
+
+               if (lastRun >= RUN_MASK) {
+                       size_t accumulator = lastRun - RUN_MASK;
+                       *op++ = RUN_MASK << ML_BITS;
+                       for (; accumulator >= 255; accumulator -= 255)
+                               *op++ = 255;
+                       *op++ = (BYTE) accumulator;
+               } else {
+                       *op++ = (BYTE)(lastRun << ML_BITS);
+               }
+
+               LZ4_memcpy(op, anchor, lastRun);
+
+               op += lastRun;
+       }
+
+       /* End */
+       return (int) (((char *)op) - dest);
+}
+
+static int LZ4_compress_fast_extState(
+       void *state,
+       const char *source,
+       char *dest,
+       int inputSize,
+       int maxOutputSize,
+       int acceleration)
+{
+       LZ4_stream_t_internal *ctx = &((LZ4_stream_t *)state)->internal_donotuse;
+#if LZ4_ARCH64
+       const tableType_t tableType = byU32;
+#else
+       const tableType_t tableType = byPtr;
+#endif
+
+       LZ4_resetStream((LZ4_stream_t *)state);
+
+       if (acceleration < 1)
+               acceleration = LZ4_ACCELERATION_DEFAULT;
+
+       if (maxOutputSize >= LZ4_COMPRESSBOUND(inputSize)) {
+               if (inputSize < LZ4_64Klimit)
+                       return LZ4_compress_generic(ctx, source,
+                               dest, inputSize, 0,
+                               noLimit, byU16, noDict,
+                               noDictIssue, acceleration);
+               else
+                       return LZ4_compress_generic(ctx, source,
+                               dest, inputSize, 0,
+                               noLimit, tableType, noDict,
+                               noDictIssue, acceleration);
+       } else {
+               if (inputSize < LZ4_64Klimit)
+                       return LZ4_compress_generic(ctx, source,
+                               dest, inputSize,
+                               maxOutputSize, limitedOutput, byU16, noDict,
+                               noDictIssue, acceleration);
+               else
+                       return LZ4_compress_generic(ctx, source,
+                               dest, inputSize,
+                               maxOutputSize, limitedOutput, tableType, noDict,
+                               noDictIssue, acceleration);
+       }
+}
+
+int LZ4_compress_fast(const char *source, char *dest, int inputSize,
+       int maxOutputSize, int acceleration, void *wrkmem)
+{
+       return LZ4_compress_fast_extState(wrkmem, source, dest, inputSize,
+               maxOutputSize, acceleration);
+}
+
+int LZ4_compress_default(const char *source, char *dest, int inputSize,
+       int maxOutputSize, void *wrkmem)
+{
+       return LZ4_compress_fast(source, dest, inputSize,
+               maxOutputSize, LZ4_ACCELERATION_DEFAULT, wrkmem);
+}
+
+/*-******************************
+ *     *_destSize() variant
+ ********************************/
+static int LZ4_compress_destSize_generic(
+       LZ4_stream_t_internal * const ctx,
+       const char * const src,
+       char * const dst,
+       int * const srcSizePtr,
+       const int targetDstSize,
+       const tableType_t tableType)
+{
+       const BYTE *ip = (const BYTE *) src;
+       const BYTE *base = (const BYTE *) src;
+       const BYTE *lowLimit = (const BYTE *) src;
+       const BYTE *anchor = ip;
+       const BYTE * const iend = ip + *srcSizePtr;
+       const BYTE * const mflimit = iend - MFLIMIT;
+       const BYTE * const matchlimit = iend - LASTLITERALS;
+
+       BYTE *op = (BYTE *) dst;
+       BYTE * const oend = op + targetDstSize;
+       BYTE * const oMaxLit = op + targetDstSize - 2 /* offset */
+               - 8 /* because 8 + MINMATCH == MFLIMIT */ - 1 /* token */;
+       BYTE * const oMaxMatch = op + targetDstSize
+               - (LASTLITERALS + 1 /* token */);
+       BYTE * const oMaxSeq = oMaxLit - 1 /* token */;
+
+       U32 forwardH;
+
+       /* Init conditions */
+       /* Impossible to store anything */
+       if (targetDstSize < 1)
+               return 0;
+       /* Unsupported input size, too large (or negative) */
+       if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE)
+               return 0;
+       /* Size too large (not within 64K limit) */
+       if ((tableType == byU16) && (*srcSizePtr >= LZ4_64Klimit))
+               return 0;
+       /* Input too small, no compression (all literals) */
+       if (*srcSizePtr < LZ4_minLength)
+               goto _last_literals;
+
+       /* First Byte */
+       *srcSizePtr = 0;
+       LZ4_putPosition(ip, ctx->hashTable, tableType, base);
+       ip++; forwardH = LZ4_hashPosition(ip, tableType);
+
+       /* Main Loop */
+       for ( ; ; ) {
+               const BYTE *match;
+               BYTE *token;
+
+               /* Find a match */
+               {
+                       const BYTE *forwardIp = ip;
+                       unsigned int step = 1;
+                       unsigned int searchMatchNb = 1 << LZ4_SKIPTRIGGER;
+
+                       do {
+                               U32 h = forwardH;
+
+                               ip = forwardIp;
+                               forwardIp += step;
+                               step = (searchMatchNb++ >> LZ4_SKIPTRIGGER);
+
+                               if (unlikely(forwardIp > mflimit))
+                                       goto _last_literals;
+
+                               match = LZ4_getPositionOnHash(h, ctx->hashTable,
+                                       tableType, base);
+                               forwardH = LZ4_hashPosition(forwardIp,
+                                       tableType);
+                               LZ4_putPositionOnHash(ip, h,
+                                       ctx->hashTable, tableType,
+                                       base);
+
+                       } while (((tableType == byU16)
+                               ? 0
+                               : (match + MAX_DISTANCE < ip))
+                               || (LZ4_read32(match) != LZ4_read32(ip)));
+               }
+
+               /* Catch up */
+               while ((ip > anchor)
+                       && (match > lowLimit)
+                       && (unlikely(ip[-1] == match[-1]))) {
+                       ip--;
+                       match--;
+               }
+
+               /* Encode Literal length */
+               {
+                       unsigned int litLength = (unsigned int)(ip - anchor);
+
+                       token = op++;
+                       if (op + ((litLength + 240) / 255)
+                               + litLength > oMaxLit) {
+                               /* Not enough space for a last match */
+                               op--;
+                               goto _last_literals;
+                       }
+                       if (litLength >= RUN_MASK) {
+                               unsigned int len = litLength - RUN_MASK;
+                               *token = (RUN_MASK<<ML_BITS);
+                               for (; len >= 255; len -= 255)
+                                       *op++ = 255;
+                               *op++ = (BYTE)len;
+                       } else
+                               *token = (BYTE)(litLength << ML_BITS);
+
+                       /* Copy Literals */
+                       LZ4_wildCopy(op, anchor, op + litLength);
+                       op += litLength;
+               }
+
+_next_match:
+               /* Encode Offset */
+               LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
+
+               /* Encode MatchLength */
+               {
+                       size_t matchLength = LZ4_count(ip + MINMATCH,
+                       match + MINMATCH, matchlimit);
+
+                       if (op + ((matchLength + 240)/255) > oMaxMatch) {
+                               /* Match description too long : reduce it */
+                               matchLength = (15 - 1) + (oMaxMatch - op) * 255;
+                       }
+                       ip += MINMATCH + matchLength;
+
+                       if (matchLength >= ML_MASK) {
+                               *token += ML_MASK;
+                               matchLength -= ML_MASK;
+                               while (matchLength >= 255) {
+                                       matchLength -= 255;
+                                       *op++ = 255;
+                               }
+                               *op++ = (BYTE)matchLength;
+                       } else
+                               *token += (BYTE)(matchLength);
+               }
+
+               anchor = ip;
+
+               /* Test end of block */
+               if (ip > mflimit)
+                       break;
+               if (op > oMaxSeq)
+                       break;
+
+               /* Fill table */
+               LZ4_putPosition(ip - 2, ctx->hashTable, tableType, base);
+
+               /* Test next position */
+               match = LZ4_getPosition(ip, ctx->hashTable, tableType, base);
+               LZ4_putPosition(ip, ctx->hashTable, tableType, base);
+
+               if ((match + MAX_DISTANCE >= ip)
+                       && (LZ4_read32(match) == LZ4_read32(ip))) {
+                       token = op++; *token = 0;
+                       goto _next_match;
+               }
+
+               /* Prepare next loop */
+               forwardH = LZ4_hashPosition(++ip, tableType);
+       }
+
+_last_literals:
+       /* Encode Last Literals */
+       {
+               size_t lastRunSize = (size_t)(iend - anchor);
+
+               if (op + 1 /* token */
+                       + ((lastRunSize + 240) / 255) /* litLength */
+                       + lastRunSize /* literals */ > oend) {
+                       /* adapt lastRunSize to fill 'dst' */
+                       lastRunSize     = (oend - op) - 1;
+                       lastRunSize -= (lastRunSize + 240) / 255;
+               }
+               ip = anchor + lastRunSize;
+
+               if (lastRunSize >= RUN_MASK) {
+                       size_t accumulator = lastRunSize - RUN_MASK;
+
+                       *op++ = RUN_MASK << ML_BITS;
+                       for (; accumulator >= 255; accumulator -= 255)
+                               *op++ = 255;
+                       *op++ = (BYTE) accumulator;
+               } else {
+                       *op++ = (BYTE)(lastRunSize<<ML_BITS);
+               }
+               LZ4_memcpy(op, anchor, lastRunSize);
+               op += lastRunSize;
+       }
+
+       /* End */
+       *srcSizePtr = (int) (((const char *)ip) - src);
+       return (int) (((char *)op) - dst);
+}
+
+static int LZ4_compress_destSize_extState(
+       LZ4_stream_t *state,
+       const char *src,
+       char *dst,
+       int *srcSizePtr,
+       int targetDstSize)
+{
+#if LZ4_ARCH64
+       const tableType_t tableType = byU32;
+#else
+       const tableType_t tableType = byPtr;
+#endif
+
+       LZ4_resetStream(state);
+
+       if (targetDstSize >= LZ4_COMPRESSBOUND(*srcSizePtr)) {
+               /* compression success is guaranteed */
+               return LZ4_compress_fast_extState(
+                       state, src, dst, *srcSizePtr,
+                       targetDstSize, 1);
+       } else {
+               if (*srcSizePtr < LZ4_64Klimit)
+                       return LZ4_compress_destSize_generic(
+                               &state->internal_donotuse,
+                               src, dst, srcSizePtr,
+                               targetDstSize, byU16);
+               else
+                       return LZ4_compress_destSize_generic(
+                               &state->internal_donotuse,
+                               src, dst, srcSizePtr,
+                               targetDstSize, tableType);
+       }
+}
+
+
+int LZ4_compress_destSize(
+       const char *src,
+       char *dst,
+       int *srcSizePtr,
+       int targetDstSize,
+       void *wrkmem)
+{
+       return LZ4_compress_destSize_extState(wrkmem, src, dst, srcSizePtr,
+               targetDstSize);
+}
+
+/*-******************************
+ *     Streaming functions
+ ********************************/
+void LZ4_resetStream(LZ4_stream_t *LZ4_stream)
+{
+       memset(LZ4_stream, 0, sizeof(LZ4_stream_t));
+}
+
+int LZ4_loadDict(LZ4_stream_t *LZ4_dict,
+       const char *dictionary, int dictSize)
+{
+       LZ4_stream_t_internal *dict = &LZ4_dict->internal_donotuse;
+       const BYTE *p = (const BYTE *)dictionary;
+       const BYTE * const dictEnd = p + dictSize;
+       const BYTE *base;
+
+       if ((dict->initCheck)
+               || (dict->currentOffset > 1 * GB)) {
+               /* Uninitialized structure, or reuse overflow */
+               LZ4_resetStream(LZ4_dict);
+       }
+
+       if (dictSize < (int)HASH_UNIT) {
+               dict->dictionary = NULL;
+               dict->dictSize = 0;
+               return 0;
+       }
+
+       if ((dictEnd - p) > 64 * KB)
+               p = dictEnd - 64 * KB;
+       dict->currentOffset += 64 * KB;
+       base = p - dict->currentOffset;
+       dict->dictionary = p;
+       dict->dictSize = (U32)(dictEnd - p);
+       dict->currentOffset += dict->dictSize;
+
+       while (p <= dictEnd - HASH_UNIT) {
+               LZ4_putPosition(p, dict->hashTable, byU32, base);
+               p += 3;
+       }
+
+       return dict->dictSize;
+}
+
+static void LZ4_renormDictT(LZ4_stream_t_internal *LZ4_dict,
+       const BYTE *src)
+{
+       if ((LZ4_dict->currentOffset > 0x80000000) ||
+               ((uptrval)LZ4_dict->currentOffset > (uptrval)src)) {
+               /* address space overflow */
+               /* rescale hash table */
+               U32 const delta = LZ4_dict->currentOffset - 64 * KB;
+               const BYTE *dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+               int i;
+
+               for (i = 0; i < LZ4_HASH_SIZE_U32; i++) {
+                       if (LZ4_dict->hashTable[i] < delta)
+                               LZ4_dict->hashTable[i] = 0;
+                       else
+                               LZ4_dict->hashTable[i] -= delta;
+               }
+               LZ4_dict->currentOffset = 64 * KB;
+               if (LZ4_dict->dictSize > 64 * KB)
+                       LZ4_dict->dictSize = 64 * KB;
+               LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+       }
+}
+
+int LZ4_saveDict(LZ4_stream_t *LZ4_dict, char *safeBuffer, int dictSize)
+{
+       LZ4_stream_t_internal * const dict = &LZ4_dict->internal_donotuse;
+       const BYTE * const previousDictEnd = dict->dictionary + dict->dictSize;
+
+       if ((U32)dictSize > 64 * KB) {
+               /* useless to define a dictionary > 64 * KB */
+               dictSize = 64 * KB;
+       }
+       if ((U32)dictSize > dict->dictSize)
+               dictSize = dict->dictSize;
+
+       memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+
+       dict->dictionary = (const BYTE *)safeBuffer;
+       dict->dictSize = (U32)dictSize;
+
+       return dictSize;
+}
+
+int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
+       char *dest, int inputSize, int maxOutputSize, int acceleration)
+{
+       LZ4_stream_t_internal *streamPtr = &LZ4_stream->internal_donotuse;
+       const BYTE * const dictEnd = streamPtr->dictionary
+               + streamPtr->dictSize;
+
+       const BYTE *smallest = (const BYTE *) source;
+
+       if (streamPtr->initCheck) {
+               /* Uninitialized structure detected */
+               return 0;
+       }
+
+       if ((streamPtr->dictSize > 0) && (smallest > dictEnd))
+               smallest = dictEnd;
+
+       LZ4_renormDictT(streamPtr, smallest);
+
+       if (acceleration < 1)
+               acceleration = LZ4_ACCELERATION_DEFAULT;
+
+       /* Check overlapping input/dictionary space */
+       {
+               const BYTE *sourceEnd = (const BYTE *) source + inputSize;
+
+               if ((sourceEnd > streamPtr->dictionary)
+                       && (sourceEnd < dictEnd)) {
+                       streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+                       if (streamPtr->dictSize > 64 * KB)
+                               streamPtr->dictSize = 64 * KB;
+                       if (streamPtr->dictSize < 4)
+                               streamPtr->dictSize = 0;
+                       streamPtr->dictionary = dictEnd - streamPtr->dictSize;
+               }
+       }
+
+       /* prefix mode : source data follows dictionary */
+       if (dictEnd == (const BYTE *)source) {
+               int result;
+
+               if ((streamPtr->dictSize < 64 * KB) &&
+                       (streamPtr->dictSize < streamPtr->currentOffset)) {
+                       result = LZ4_compress_generic(
+                               streamPtr, source, dest, inputSize,
+                               maxOutputSize, limitedOutput, byU32,
+                               withPrefix64k, dictSmall, acceleration);
+               } else {
+                       result = LZ4_compress_generic(
+                               streamPtr, source, dest, inputSize,
+                               maxOutputSize, limitedOutput, byU32,
+                               withPrefix64k, noDictIssue, acceleration);
+               }
+               streamPtr->dictSize += (U32)inputSize;
+               streamPtr->currentOffset += (U32)inputSize;
+               return result;
+       }
+
+       /* external dictionary mode */
+       {
+               int result;
+
+               if ((streamPtr->dictSize < 64 * KB) &&
+                       (streamPtr->dictSize < streamPtr->currentOffset)) {
+                       result = LZ4_compress_generic(
+                               streamPtr, source, dest, inputSize,
+                               maxOutputSize, limitedOutput, byU32,
+                               usingExtDict, dictSmall, acceleration);
+               } else {
+                       result = LZ4_compress_generic(
+                               streamPtr, source, dest, inputSize,
+                               maxOutputSize, limitedOutput, byU32,
+                               usingExtDict, noDictIssue, acceleration);
+               }
+               streamPtr->dictionary = (const BYTE *)source;
+               streamPtr->dictSize = (U32)inputSize;
+               streamPtr->currentOffset += (U32)inputSize;
+               return result;
+       }
+}
diff --git a/lustre/lz4/lz4_decompress.c b/lustre/lz4/lz4_decompress.c
new file mode 100644 (file)
index 0000000..251b9a7
--- /dev/null
@@ -0,0 +1,704 @@
+/*
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011 - 2016, Yann Collet.
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *     - LZ4 homepage : http://www.lz4.org
+ *     - LZ4 source repository : https://github.com/lz4/lz4
+ *
+ *     Changed for kernel usage by:
+ *     Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
+ */
+
+/*-************************************
+ *     Dependencies
+ **************************************/
+#include "lz4.h"
+#include "lz4defs.h"
+#include <linux/kernel.h>
+#include <asm/unaligned.h>
+
+/*-*****************************
+ *     Decompression functions
+ *******************************/
+
+#define DEBUGLOG(l, ...) {}    /* disabled */
+
+#ifndef assert
+#define assert(condition) ((void)0)
+#endif
+
+/*
+ * LZ4_decompress_generic() :
+ * This generic decompression function covers all use cases.
+ * It shall be instantiated several times, using different sets of directives.
+ * Note that it is important for performance that this function really get inlined,
+ * in order to remove useless branches during compilation optimization.
+ */
+static FORCE_INLINE int LZ4_decompress_generic(
+        const char * const src,
+        char * const dst,
+        int srcSize,
+               /*
+                * If endOnInput == endOnInputSize,
+                * this value is `dstCapacity`
+                */
+        int outputSize,
+        /* endOnOutputSize, endOnInputSize */
+        endCondition_directive endOnInput,
+        /* full, partial */
+        earlyEnd_directive partialDecoding,
+        /* noDict, withPrefix64k, usingExtDict */
+        dict_directive dict,
+        /* always <= dst, == dst when no prefix */
+        const BYTE * const lowPrefix,
+        /* only if dict == usingExtDict */
+        const BYTE * const dictStart,
+        /* note : = 0 if noDict */
+        const size_t dictSize
+        )
+{
+       const BYTE *ip = (const BYTE *) src;
+       const BYTE * const iend = ip + srcSize;
+
+       BYTE *op = (BYTE *) dst;
+       BYTE * const oend = op + outputSize;
+       BYTE *cpy;
+
+       const BYTE * const dictEnd = (const BYTE *)dictStart + dictSize;
+       static const unsigned int inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4};
+       static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
+
+       const int safeDecode = (endOnInput == endOnInputSize);
+       const int checkOffset = ((safeDecode) && (dictSize < (int)(64 * KB)));
+
+       /* Set up the "end" pointers for the shortcut. */
+       const BYTE *const shortiend = iend -
+               (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
+       const BYTE *const shortoend = oend -
+               (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
+
+       DEBUGLOG(5, "%s (srcSize:%i, dstSize:%i)", __func__,
+                srcSize, outputSize);
+
+       /* Special cases */
+       assert(lowPrefix <= op);
+       assert(src != NULL);
+
+       /* Empty output buffer */
+       if ((endOnInput) && (unlikely(outputSize == 0)))
+               return ((srcSize == 1) && (*ip == 0)) ? 0 : -1;
+
+       if ((!endOnInput) && (unlikely(outputSize == 0)))
+               return (*ip == 0 ? 1 : -1);
+
+       if ((endOnInput) && unlikely(srcSize == 0))
+               return -1;
+
+       /* Main Loop : decode sequences */
+       while (1) {
+               size_t length;
+               const BYTE *match;
+               size_t offset;
+
+               /* get literal length */
+               unsigned int const token = *ip++;
+               length = token>>ML_BITS;
+
+               /* ip < iend before the increment */
+               assert(!endOnInput || ip <= iend);
+
+               /*
+                * A two-stage shortcut for the most common case:
+                * 1) If the literal length is 0..14, and there is enough
+                * space, enter the shortcut and copy 16 bytes on behalf
+                * of the literals (in the fast mode, only 8 bytes can be
+                * safely copied this way).
+                * 2) Further if the match length is 4..18, copy 18 bytes
+                * in a similar manner; but we ensure that there's enough
+                * space in the output for those 18 bytes earlier, upon
+                * entering the shortcut (in other words, there is a
+                * combined check for both stages).
+                *
+                * The & in the likely() below is intentionally not && so that
+                * some compilers can produce better parallelized runtime code
+                */
+               if ((endOnInput ? length != RUN_MASK : length <= 8)
+                  /*
+                   * strictly "less than" on input, to re-enter
+                   * the loop with at least one byte
+                   */
+                  && likely((endOnInput ? ip < shortiend : 1) &
+                            (op <= shortoend))) {
+                       /* Copy the literals */
+                       LZ4_memcpy(op, ip, endOnInput ? 16 : 8);
+                       op += length; ip += length;
+
+                       /*
+                        * The second stage:
+                        * prepare for match copying, decode full info.
+                        * If it doesn't work out, the info won't be wasted.
+                        */
+                       length = token & ML_MASK; /* match length */
+                       offset = LZ4_readLE16(ip);
+                       ip += 2;
+                       match = op - offset;
+                       assert(match <= op); /* check overflow */
+
+                       /* Do not deal with overlapping matches. */
+                       if ((length != ML_MASK) &&
+                           (offset >= 8) &&
+                           (dict == withPrefix64k || match >= lowPrefix)) {
+                               /* Copy the match. */
+                               LZ4_memcpy(op + 0, match + 0, 8);
+                               LZ4_memcpy(op + 8, match + 8, 8);
+                               LZ4_memcpy(op + 16, match + 16, 2);
+                               op += length + MINMATCH;
+                               /* Both stages worked, load the next token. */
+                               continue;
+                       }
+
+                       /*
+                        * The second stage didn't work out, but the info
+                        * is ready. Propel it right to the point of match
+                        * copying.
+                        */
+                       goto _copy_match;
+               }
+
+               /* decode literal length */
+               if (length == RUN_MASK) {
+                       unsigned int s;
+
+                       if (unlikely(endOnInput ? ip >= iend - RUN_MASK : 0)) {
+                               /* overflow detection */
+                               goto _output_error;
+                       }
+                       do {
+                               s = *ip++;
+                               length += s;
+                       } while (likely(endOnInput
+                               ? ip < iend - RUN_MASK
+                               : 1) & (s == 255));
+
+                       if ((safeDecode)
+                           && unlikely((uptrval)(op) +
+                                       length < (uptrval)(op))) {
+                               /* overflow detection */
+                               goto _output_error;
+                       }
+                       if ((safeDecode)
+                           && unlikely((uptrval)(ip) +
+                                       length < (uptrval)(ip))) {
+                               /* overflow detection */
+                               goto _output_error;
+                       }
+               }
+
+               /* copy literals */
+               cpy = op + length;
+               LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
+
+               if (((endOnInput) && ((cpy > oend - MFLIMIT)
+                       || (ip + length > iend - (2 + 1 + LASTLITERALS))))
+                       || ((!endOnInput) && (cpy > oend - WILDCOPYLENGTH))) {
+                       if (partialDecoding) {
+                               if (cpy > oend) {
+                                       /*
+                                        * Partial decoding :
+                                        * stop in the middle of literal segment
+                                        */
+                                       cpy = oend;
+                                       length = oend - op;
+                               }
+                               if ((endOnInput)
+                                       && (ip + length > iend)) {
+                                       /*
+                                        * Error :
+                                        * read attempt beyond
+                                        * end of input buffer
+                                        */
+                                       goto _output_error;
+                               }
+                       } else {
+                               if ((!endOnInput)
+                                       && (cpy != oend)) {
+                                       /*
+                                        * Error :
+                                        * block decoding must
+                                        * stop exactly there
+                                        */
+                                       goto _output_error;
+                               }
+                               if ((endOnInput)
+                                       && ((ip + length != iend)
+                                       || (cpy > oend))) {
+                                       /*
+                                        * Error :
+                                        * input must be consumed
+                                        */
+                                       goto _output_error;
+                               }
+                       }
+
+                       /*
+                        * supports overlapping memory regions; only matters
+                        * for in-place decompression scenarios
+                        */
+                       LZ4_memmove(op, ip, length);
+                       ip += length;
+                       op += length;
+
+                       /* Necessarily EOF when !partialDecoding.
+                        * When partialDecoding, it is EOF if we've either
+                        * filled the output buffer or
+                        * can't proceed with reading an offset for following match.
+                        */
+                       if (!partialDecoding || (cpy == oend) || (ip >= (iend - 2)))
+                               break;
+               } else {
+                       /* may overwrite up to WILDCOPYLENGTH beyond cpy */
+                       LZ4_wildCopy(op, ip, cpy);
+                       ip += length;
+                       op = cpy;
+               }
+
+               /* get offset */
+               offset = LZ4_readLE16(ip);
+               ip += 2;
+               match = op - offset;
+
+               /* get matchlength */
+               length = token & ML_MASK;
+
+_copy_match:
+               if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) {
+                       /* Error : offset outside buffers */
+                       goto _output_error;
+               }
+
+               /* costs ~1%; silence an msan warning when offset == 0 */
+               /*
+                * note : when partialDecoding, there is no guarantee that
+                * at least 4 bytes remain available in output buffer
+                */
+               if (!partialDecoding) {
+                       assert(oend > op);
+                       assert(oend - op >= 4);
+
+                       LZ4_write32(op, (U32)offset);
+               }
+
+               if (length == ML_MASK) {
+                       unsigned int s;
+
+                       do {
+                               s = *ip++;
+
+                               if ((endOnInput) && (ip > iend - LASTLITERALS))
+                                       goto _output_error;
+
+                               length += s;
+                       } while (s == 255);
+
+                       if ((safeDecode)
+                               && unlikely(
+                                       (uptrval)(op) + length < (uptrval)op)) {
+                               /* overflow detection */
+                               goto _output_error;
+                       }
+               }
+
+               length += MINMATCH;
+
+               /* match starting within external dictionary */
+               if ((dict == usingExtDict) && (match < lowPrefix)) {
+                       if (unlikely(op + length > oend - LASTLITERALS)) {
+                               /* doesn't respect parsing restriction */
+                               if (!partialDecoding)
+                                       goto _output_error;
+                               length = min(length, (size_t)(oend - op));
+                       }
+
+                       if (length <= (size_t)(lowPrefix - match)) {
+                               /*
+                                * match fits entirely within external
+                                * dictionary : just copy
+                                */
+                               memmove(op, dictEnd - (lowPrefix - match),
+                                       length);
+                               op += length;
+                       } else {
+                               /*
+                                * match stretches into both external
+                                * dictionary and current block
+                                */
+                               size_t const copySize = (size_t)(lowPrefix - match);
+                               size_t const restSize = length - copySize;
+
+                               LZ4_memcpy(op, dictEnd - copySize, copySize);
+                               op += copySize;
+                               if (restSize > (size_t)(op - lowPrefix)) {
+                                       /* overlap copy */
+                                       BYTE * const endOfMatch = op + restSize;
+                                       const BYTE *copyFrom = lowPrefix;
+
+                                       while (op < endOfMatch)
+                                               *op++ = *copyFrom++;
+                               } else {
+                                       LZ4_memcpy(op, lowPrefix, restSize);
+                                       op += restSize;
+                               }
+                       }
+                       continue;
+               }
+
+               /* copy match within block */
+               cpy = op + length;
+
+               /*
+                * partialDecoding :
+                * may not respect endBlock parsing restrictions
+                */
+               assert(op <= oend);
+               if (partialDecoding &&
+                   (cpy > oend - MATCH_SAFEGUARD_DISTANCE)) {
+                       size_t const mlen = min(length, (size_t)(oend - op));
+                       const BYTE * const matchEnd = match + mlen;
+                       BYTE * const copyEnd = op + mlen;
+
+                       if (matchEnd > op) {
+                               /* overlap copy */
+                               while (op < copyEnd)
+                                       *op++ = *match++;
+                       } else {
+                               LZ4_memcpy(op, match, mlen);
+                       }
+                       op = copyEnd;
+                       if (op == oend)
+                               break;
+                       continue;
+               }
+
+               if (unlikely(offset < 8)) {
+                       op[0] = match[0];
+                       op[1] = match[1];
+                       op[2] = match[2];
+                       op[3] = match[3];
+                       match += inc32table[offset];
+                       LZ4_memcpy(op + 4, match, 4);
+                       match -= dec64table[offset];
+               } else {
+                       LZ4_copy8(op, match);
+                       match += 8;
+               }
+
+               op += 8;
+
+               if (unlikely(cpy > oend - MATCH_SAFEGUARD_DISTANCE)) {
+                       BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
+
+                       if (cpy > oend - LASTLITERALS) {
+                               /*
+                                * Error : last LASTLITERALS bytes
+                                * must be literals (uncompressed)
+                                */
+                               goto _output_error;
+                       }
+
+                       if (op < oCopyLimit) {
+                               LZ4_wildCopy(op, match, oCopyLimit);
+                               match += oCopyLimit - op;
+                               op = oCopyLimit;
+                       }
+                       while (op < cpy)
+                               *op++ = *match++;
+               } else {
+                       LZ4_copy8(op, match);
+                       if (length > 16)
+                               LZ4_wildCopy(op + 8, match + 8, cpy);
+               }
+               op = cpy; /* wildcopy correction */
+       }
+
+       /* end of decoding */
+       if (endOnInput) {
+               /* Nb of output bytes decoded */
+               return (int) (((char *)op) - dst);
+       } else {
+               /* Nb of input bytes read */
+               return (int) (((const char *)ip) - src);
+       }
+
+       /* Overflow error detected */
+_output_error:
+       return (int) (-(((const char *)ip) - src)) - 1;
+}
+
+int LZ4_decompress_safe(const char *source, char *dest,
+       int compressedSize, int maxDecompressedSize)
+{
+       return LZ4_decompress_generic(source, dest,
+                                     compressedSize, maxDecompressedSize,
+                                     endOnInputSize, decode_full_block,
+                                     noDict, (BYTE *)dest, NULL, 0);
+}
+
+int LZ4_decompress_safe_partial(const char *src, char *dst,
+       int compressedSize, int targetOutputSize, int dstCapacity)
+{
+       dstCapacity = min(targetOutputSize, dstCapacity);
+       return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
+                                     endOnInputSize, partial_decode,
+                                     noDict, (BYTE *)dst, NULL, 0);
+}
+
+int LZ4_decompress_fast(const char *source, char *dest, int originalSize)
+{
+       return LZ4_decompress_generic(source, dest, 0, originalSize,
+                                     endOnOutputSize, decode_full_block,
+                                     withPrefix64k,
+                                     (BYTE *)dest - 64 * KB, NULL, 0);
+}
+
+/* ===== Instantiate a few more decoding cases, used more than once. ===== */
+
+static int LZ4_decompress_safe_withPrefix64k(const char *source, char *dest,
+                                     int compressedSize, int maxOutputSize)
+{
+       return LZ4_decompress_generic(source, dest,
+                                     compressedSize, maxOutputSize,
+                                     endOnInputSize, decode_full_block,
+                                     withPrefix64k,
+                                     (BYTE *)dest - 64 * KB, NULL, 0);
+}
+
+static int LZ4_decompress_safe_withSmallPrefix(const char *source, char *dest,
+                                              int compressedSize,
+                                              int maxOutputSize,
+                                              size_t prefixSize)
+{
+       return LZ4_decompress_generic(source, dest,
+                                     compressedSize, maxOutputSize,
+                                     endOnInputSize, decode_full_block,
+                                     noDict,
+                                     (BYTE *)dest - prefixSize, NULL, 0);
+}
+
+static int LZ4_decompress_safe_forceExtDict(const char *source, char *dest,
+                                           int compressedSize, int maxOutputSize,
+                                           const void *dictStart, size_t dictSize)
+{
+       return LZ4_decompress_generic(source, dest,
+                                     compressedSize, maxOutputSize,
+                                     endOnInputSize, decode_full_block,
+                                     usingExtDict, (BYTE *)dest,
+                                     (const BYTE *)dictStart, dictSize);
+}
+
+static int LZ4_decompress_fast_extDict(const char *source, char *dest,
+                                      int originalSize,
+                                      const void *dictStart, size_t dictSize)
+{
+       return LZ4_decompress_generic(source, dest,
+                                     0, originalSize,
+                                     endOnOutputSize, decode_full_block,
+                                     usingExtDict, (BYTE *)dest,
+                                     (const BYTE *)dictStart, dictSize);
+}
+
+/*
+ * The "double dictionary" mode, for use with e.g. ring buffers: the first part
+ * of the dictionary is passed as prefix, and the second via dictStart + dictSize.
+ * These routines are used only once, in LZ4_decompress_*_continue().
+ */
+static FORCE_INLINE
+int LZ4_decompress_safe_doubleDict(const char *source, char *dest,
+                                  int compressedSize, int maxOutputSize,
+                                  size_t prefixSize,
+                                  const void *dictStart, size_t dictSize)
+{
+       return LZ4_decompress_generic(source, dest,
+                                     compressedSize, maxOutputSize,
+                                     endOnInputSize, decode_full_block,
+                                     usingExtDict, (BYTE *)dest - prefixSize,
+                                     (const BYTE *)dictStart, dictSize);
+}
+
+static FORCE_INLINE
+int LZ4_decompress_fast_doubleDict(const char *source, char *dest,
+                                  int originalSize, size_t prefixSize,
+                                  const void *dictStart, size_t dictSize)
+{
+       return LZ4_decompress_generic(source, dest,
+                                     0, originalSize,
+                                     endOnOutputSize, decode_full_block,
+                                     usingExtDict, (BYTE *)dest - prefixSize,
+                                     (const BYTE *)dictStart, dictSize);
+}
+
+/* ===== streaming decompression functions ===== */
+
+int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+       const char *dictionary, int dictSize)
+{
+       LZ4_streamDecode_t_internal *lz4sd =
+               &LZ4_streamDecode->internal_donotuse;
+
+       lz4sd->prefixSize = (size_t) dictSize;
+       lz4sd->prefixEnd = (const BYTE *) dictionary + dictSize;
+       lz4sd->externalDict = NULL;
+       lz4sd->extDictSize      = 0;
+       return 1;
+}
+
+/*
+ * *_continue() :
+ * These decoding functions allow decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks must still be available at the memory
+ * position where they were decoded.
+ * If it's not possible, save the relevant part of
+ * decoded data into a safe buffer,
+ * and indicate where it stands using LZ4_setStreamDecode()
+ */
+int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+       const char *source, char *dest, int compressedSize, int maxOutputSize)
+{
+       LZ4_streamDecode_t_internal *lz4sd =
+               &LZ4_streamDecode->internal_donotuse;
+       int result;
+
+       if (lz4sd->prefixSize == 0) {
+               /* The first call, no dictionary yet. */
+               assert(lz4sd->extDictSize == 0);
+               result = LZ4_decompress_safe(source, dest,
+                       compressedSize, maxOutputSize);
+               if (result <= 0)
+                       return result;
+               lz4sd->prefixSize = result;
+               lz4sd->prefixEnd = (BYTE *)dest + result;
+       } else if (lz4sd->prefixEnd == (BYTE *)dest) {
+               /* They're rolling the current segment. */
+               if (lz4sd->prefixSize >= 64 * KB - 1)
+                       result = LZ4_decompress_safe_withPrefix64k(source, dest,
+                               compressedSize, maxOutputSize);
+               else if (lz4sd->extDictSize == 0)
+                       result = LZ4_decompress_safe_withSmallPrefix(source,
+                               dest, compressedSize, maxOutputSize,
+                               lz4sd->prefixSize);
+               else
+                       result = LZ4_decompress_safe_doubleDict(source, dest,
+                               compressedSize, maxOutputSize,
+                               lz4sd->prefixSize,
+                               lz4sd->externalDict, lz4sd->extDictSize);
+               if (result <= 0)
+                       return result;
+               lz4sd->prefixSize += result;
+               lz4sd->prefixEnd  += result;
+       } else {
+               /*
+                * The buffer wraps around, or they're
+                * switching to another buffer.
+                */
+               lz4sd->extDictSize = lz4sd->prefixSize;
+               lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+               result = LZ4_decompress_safe_forceExtDict(source, dest,
+                       compressedSize, maxOutputSize,
+                       lz4sd->externalDict, lz4sd->extDictSize);
+               if (result <= 0)
+                       return result;
+               lz4sd->prefixSize = result;
+               lz4sd->prefixEnd  = (BYTE *)dest + result;
+       }
+
+       return result;
+}
+
+int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+       const char *source, char *dest, int originalSize)
+{
+       LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
+       int result;
+
+       if (lz4sd->prefixSize == 0) {
+               assert(lz4sd->extDictSize == 0);
+               result = LZ4_decompress_fast(source, dest, originalSize);
+               if (result <= 0)
+                       return result;
+               lz4sd->prefixSize = originalSize;
+               lz4sd->prefixEnd = (BYTE *)dest + originalSize;
+       } else if (lz4sd->prefixEnd == (BYTE *)dest) {
+               if (lz4sd->prefixSize >= 64 * KB - 1 ||
+                   lz4sd->extDictSize == 0)
+                       result = LZ4_decompress_fast(source, dest,
+                                                    originalSize);
+               else
+                       result = LZ4_decompress_fast_doubleDict(source, dest,
+                               originalSize, lz4sd->prefixSize,
+                               lz4sd->externalDict, lz4sd->extDictSize);
+               if (result <= 0)
+                       return result;
+               lz4sd->prefixSize += originalSize;
+               lz4sd->prefixEnd  += originalSize;
+       } else {
+               lz4sd->extDictSize = lz4sd->prefixSize;
+               lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+               result = LZ4_decompress_fast_extDict(source, dest,
+                       originalSize, lz4sd->externalDict, lz4sd->extDictSize);
+               if (result <= 0)
+                       return result;
+               lz4sd->prefixSize = originalSize;
+               lz4sd->prefixEnd = (BYTE *)dest + originalSize;
+       }
+       return result;
+}
+
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+                                 int compressedSize, int maxOutputSize,
+                                 const char *dictStart, int dictSize)
+{
+       if (dictSize == 0)
+               return LZ4_decompress_safe(source, dest,
+                                          compressedSize, maxOutputSize);
+       if (dictStart+dictSize == dest) {
+               if (dictSize >= 64 * KB - 1)
+                       return LZ4_decompress_safe_withPrefix64k(source, dest,
+                               compressedSize, maxOutputSize);
+               return LZ4_decompress_safe_withSmallPrefix(source, dest,
+                       compressedSize, maxOutputSize, dictSize);
+       }
+       return LZ4_decompress_safe_forceExtDict(source, dest,
+               compressedSize, maxOutputSize, dictStart, dictSize);
+}
+
+int LZ4_decompress_fast_usingDict(const char *source, char *dest,
+                                 int originalSize,
+                                 const char *dictStart, int dictSize)
+{
+       if (dictSize == 0 || dictStart + dictSize == dest)
+               return LZ4_decompress_fast(source, dest, originalSize);
+
+       return LZ4_decompress_fast_extDict(source, dest, originalSize,
+               dictStart, dictSize);
+}
diff --git a/lustre/lz4/lz4defs.h b/lustre/lz4/lz4defs.h
new file mode 100644 (file)
index 0000000..fe0cd70
--- /dev/null
@@ -0,0 +1,248 @@
+#ifndef __LZ4DEFS_H__
+#define __LZ4DEFS_H__
+
+/*
+ * lz4defs.h -- common and architecture specific defines for the kernel usage
+
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *     - LZ4 homepage : http://www.lz4.org
+ *     - LZ4 source repository : https://github.com/lz4/lz4
+ *
+ *     Changed for kernel usage by:
+ *     Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
+ */
+
+#include <asm/unaligned.h>
+
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/string.h>       /* memset, memcpy */
+
+#define FORCE_INLINE __always_inline
+
+/*-************************************
+ *     Basic Types
+ **************************************/
+#include <linux/types.h>
+
+typedef        uint8_t BYTE;
+typedef uint16_t U16;
+typedef uint32_t U32;
+typedef        int32_t S32;
+typedef uint64_t U64;
+typedef uintptr_t uptrval;
+
+/*-************************************
+ *     Architecture specifics
+ **************************************/
+#if defined(CONFIG_64BIT)
+#define LZ4_ARCH64 1
+#else
+#define LZ4_ARCH64 0
+#endif
+
+#if defined(__LITTLE_ENDIAN)
+#define LZ4_LITTLE_ENDIAN 1
+#else
+#define LZ4_LITTLE_ENDIAN 0
+#endif
+
+/*-************************************
+ *     Constants
+ **************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (WILDCOPYLENGTH + MINMATCH)
+/*
+ * ensure it's possible to write 2 x wildcopyLength
+ * without overflowing output buffer
+ */
+#define MATCH_SAFEGUARD_DISTANCE  ((2 * WILDCOPYLENGTH) - MINMATCH)
+
+/* Increase this value ==> compression run slower on incompressible data */
+#define LZ4_SKIPTRIGGER 6
+
+#define HASH_UNIT sizeof(size_t)
+
+#define KB (1 << 10)
+#define MB (1 << 20)
+#define GB (1U << 30)
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+#define STEPSIZE sizeof(size_t)
+
+#define ML_BITS        4
+#define ML_MASK        ((1U << ML_BITS) - 1)
+#define RUN_BITS (8 - ML_BITS)
+#define RUN_MASK ((1U << RUN_BITS) - 1)
+
+/*-************************************
+ *     Reading and writing into memory
+ **************************************/
+static FORCE_INLINE U16 LZ4_read16(const void *ptr)
+{
+       return get_unaligned((const U16 *)ptr);
+}
+
+static FORCE_INLINE U32 LZ4_read32(const void *ptr)
+{
+       return get_unaligned((const U32 *)ptr);
+}
+
+static FORCE_INLINE size_t LZ4_read_ARCH(const void *ptr)
+{
+       return get_unaligned((const size_t *)ptr);
+}
+
+static FORCE_INLINE void LZ4_write16(void *memPtr, U16 value)
+{
+       put_unaligned(value, (U16 *)memPtr);
+}
+
+static FORCE_INLINE void LZ4_write32(void *memPtr, U32 value)
+{
+       put_unaligned(value, (U32 *)memPtr);
+}
+
+static FORCE_INLINE U16 LZ4_readLE16(const void *memPtr)
+{
+       return get_unaligned_le16(memPtr);
+}
+
+static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value)
+{
+       return put_unaligned_le16(value, memPtr);
+}
+
+/*
+ * LZ4 relies on memcpy with a constant size being inlined. In freestanding
+ * environments, the compiler can't assume the implementation of memcpy() is
+ * standard compliant, so apply its specialized memcpy() inlining logic. When
+ * possible, use __builtin_memcpy() to tell the compiler to analyze memcpy()
+ * as-if it were standard compliant, so it can inline it in freestanding
+ * environments. This is needed when decompressing the Linux Kernel, for example.
+ */
+#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
+#define LZ4_memmove(dst, src, size) __builtin_memmove(dst, src, size)
+
+static FORCE_INLINE void LZ4_copy8(void *dst, const void *src)
+{
+#if LZ4_ARCH64
+       U64 a = get_unaligned((const U64 *)src);
+
+       put_unaligned(a, (U64 *)dst);
+#else
+       U32 a = get_unaligned((const U32 *)src);
+       U32 b = get_unaligned((const U32 *)src + 1);
+
+       put_unaligned(a, (U32 *)dst);
+       put_unaligned(b, (U32 *)dst + 1);
+#endif
+}
+
+/*
+ * customized variant of memcpy,
+ * which can overwrite up to 7 bytes beyond dstEnd
+ */
+static FORCE_INLINE void LZ4_wildCopy(void *dstPtr,
+       const void *srcPtr, void *dstEnd)
+{
+       BYTE *d = (BYTE *)dstPtr;
+       const BYTE *s = (const BYTE *)srcPtr;
+       BYTE *const e = (BYTE *)dstEnd;
+
+       do {
+               LZ4_copy8(d, s);
+               d += 8;
+               s += 8;
+       } while (d < e);
+}
+
+static FORCE_INLINE unsigned int LZ4_NbCommonBytes(register size_t val)
+{
+#if LZ4_LITTLE_ENDIAN
+       return __ffs(val) >> 3;
+#else
+       return (BITS_PER_LONG - 1 - __fls(val)) >> 3;
+#endif
+}
+
+static FORCE_INLINE unsigned int LZ4_count(
+       const BYTE *pIn,
+       const BYTE *pMatch,
+       const BYTE *pInLimit)
+{
+       const BYTE *const pStart = pIn;
+
+       while (likely(pIn < pInLimit - (STEPSIZE - 1))) {
+               size_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+
+               if (!diff) {
+                       pIn += STEPSIZE;
+                       pMatch += STEPSIZE;
+                       continue;
+               }
+
+               pIn += LZ4_NbCommonBytes(diff);
+
+               return (unsigned int)(pIn - pStart);
+       }
+
+#if LZ4_ARCH64
+       if ((pIn < (pInLimit - 3))
+               && (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
+               pIn += 4;
+               pMatch += 4;
+       }
+#endif
+
+       if ((pIn < (pInLimit - 1))
+               && (LZ4_read16(pMatch) == LZ4_read16(pIn))) {
+               pIn += 2;
+               pMatch += 2;
+       }
+
+       if ((pIn < pInLimit) && (*pMatch == *pIn))
+               pIn++;
+
+       return (unsigned int)(pIn - pStart);
+}
+
+typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
+typedef enum { byPtr, byU32, byU16 } tableType_t;
+
+typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
+
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
+
+#define LZ4_STATIC_ASSERT(c)   BUILD_BUG_ON(!(c))
+
+#endif
diff --git a/lustre/lz4/lz4hc_compress.c b/lustre/lz4/lz4hc_compress.c
new file mode 100644 (file)
index 0000000..474812d
--- /dev/null
@@ -0,0 +1,760 @@
+/*
+ * LZ4 HC - High Compression Mode of LZ4
+ * Copyright (C) 2011-2015, Yann Collet.
+ *
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *     - LZ4 homepage : http://www.lz4.org
+ *     - LZ4 source repository : https://github.com/lz4/lz4
+ *
+ *     Changed for kernel usage by:
+ *     Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
+ */
+
+/*-************************************
+ *     Dependencies
+ **************************************/
+#include "lz4.h"
+#include "lz4defs.h"
+#include <linux/kernel.h>
+#include <linux/string.h> /* memset */
+
+/* *************************************
+ *     Local Constants and types
+ ***************************************/
+
+#define OPTIMAL_ML (int)((ML_MASK - 1) + MINMATCH)
+
+#define HASH_FUNCTION(i)       (((i) * 2654435761U) \
+       >> ((MINMATCH*8) - LZ4HC_HASH_LOG))
+#define DELTANEXTU16(p)        chainTable[(U16)(p)] /* faster */
+
+static U32 LZ4HC_hashPtr(const void *ptr)
+{
+       return HASH_FUNCTION(LZ4_read32(ptr));
+}
+
+/**************************************
+ *     HC Compression
+ **************************************/
+static void LZ4HC_init(LZ4HC_CCtx_internal *hc4, const BYTE *start)
+{
+       memset((void *)hc4->hashTable, 0, sizeof(hc4->hashTable));
+       memset(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
+       hc4->nextToUpdate = 64 * KB;
+       hc4->base = start - 64 * KB;
+       hc4->end = start;
+       hc4->dictBase = start - 64 * KB;
+       hc4->dictLimit = 64 * KB;
+       hc4->lowLimit = 64 * KB;
+}
+
+/* Update chains up to ip (excluded) */
+static FORCE_INLINE void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
+       const BYTE *ip)
+{
+       U16 * const chainTable = hc4->chainTable;
+       U32 * const hashTable   = hc4->hashTable;
+       const BYTE * const base = hc4->base;
+       U32 const target = (U32)(ip - base);
+       U32 idx = hc4->nextToUpdate;
+
+       while (idx < target) {
+               U32 const h = LZ4HC_hashPtr(base + idx);
+               size_t delta = idx - hashTable[h];
+
+               if (delta > MAX_DISTANCE)
+                       delta = MAX_DISTANCE;
+
+               DELTANEXTU16(idx) = (U16)delta;
+
+               hashTable[h] = idx;
+               idx++;
+       }
+
+       hc4->nextToUpdate = target;
+}
+
+static FORCE_INLINE int LZ4HC_InsertAndFindBestMatch(
+       LZ4HC_CCtx_internal *hc4, /* Index table will be updated */
+       const BYTE *ip,
+       const BYTE * const iLimit,
+       const BYTE **matchpos,
+       const int maxNbAttempts)
+{
+       U16 * const chainTable = hc4->chainTable;
+       U32 * const HashTable = hc4->hashTable;
+       const BYTE * const base = hc4->base;
+       const BYTE * const dictBase = hc4->dictBase;
+       const U32 dictLimit = hc4->dictLimit;
+       const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base))
+               ? hc4->lowLimit
+               : (U32)(ip - base) - (64 * KB - 1);
+       U32 matchIndex;
+       int nbAttempts = maxNbAttempts;
+       size_t ml = 0;
+
+       /* HC4 match finder */
+       LZ4HC_Insert(hc4, ip);
+       matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+       while ((matchIndex >= lowLimit)
+               && (nbAttempts)) {
+               nbAttempts--;
+               if (matchIndex >= dictLimit) {
+                       const BYTE * const match = base + matchIndex;
+
+                       if (*(match + ml) == *(ip + ml)
+                               && (LZ4_read32(match) == LZ4_read32(ip))) {
+                               size_t const mlt = LZ4_count(ip + MINMATCH,
+                                       match + MINMATCH, iLimit) + MINMATCH;
+
+                               if (mlt > ml) {
+                                       ml = mlt;
+                                       *matchpos = match;
+                               }
+                       }
+               } else {
+                       const BYTE * const match = dictBase + matchIndex;
+
+                       if (LZ4_read32(match) == LZ4_read32(ip)) {
+                               size_t mlt;
+                               const BYTE *vLimit = ip
+                                       + (dictLimit - matchIndex);
+
+                               if (vLimit > iLimit)
+                                       vLimit = iLimit;
+                               mlt = LZ4_count(ip + MINMATCH,
+                                       match + MINMATCH, vLimit) + MINMATCH;
+                               if ((ip + mlt == vLimit)
+                                       && (vLimit < iLimit))
+                                       mlt += LZ4_count(ip + mlt,
+                                               base + dictLimit,
+                                               iLimit);
+                               if (mlt > ml) {
+                                       /* virtual matchpos */
+                                       ml = mlt;
+                                       *matchpos = base + matchIndex;
+                               }
+                       }
+               }
+               matchIndex -= DELTANEXTU16(matchIndex);
+       }
+
+       return (int)ml;
+}
+
+static FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch(
+       LZ4HC_CCtx_internal *hc4,
+       const BYTE * const ip,
+       const BYTE * const iLowLimit,
+       const BYTE * const iHighLimit,
+       int longest,
+       const BYTE **matchpos,
+       const BYTE **startpos,
+       const int maxNbAttempts)
+{
+       U16 * const chainTable = hc4->chainTable;
+       U32 * const HashTable = hc4->hashTable;
+       const BYTE * const base = hc4->base;
+       const U32 dictLimit = hc4->dictLimit;
+       const BYTE * const lowPrefixPtr = base + dictLimit;
+       const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base))
+               ? hc4->lowLimit
+               : (U32)(ip - base) - (64 * KB - 1);
+       const BYTE * const dictBase = hc4->dictBase;
+       U32 matchIndex;
+       int nbAttempts = maxNbAttempts;
+       int delta = (int)(ip - iLowLimit);
+
+       /* First Match */
+       LZ4HC_Insert(hc4, ip);
+       matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+       while ((matchIndex >= lowLimit)
+               && (nbAttempts)) {
+               nbAttempts--;
+               if (matchIndex >= dictLimit) {
+                       const BYTE *matchPtr = base + matchIndex;
+
+                       if (*(iLowLimit + longest)
+                               == *(matchPtr - delta + longest)) {
+                               if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+                                       int mlt = MINMATCH + LZ4_count(
+                                               ip + MINMATCH,
+                                               matchPtr + MINMATCH,
+                                               iHighLimit);
+                                       int back = 0;
+
+                                       while ((ip + back > iLowLimit)
+                                               && (matchPtr + back > lowPrefixPtr)
+                                               && (ip[back - 1] == matchPtr[back - 1]))
+                                               back--;
+
+                                       mlt -= back;
+
+                                       if (mlt > longest) {
+                                               longest = (int)mlt;
+                                               *matchpos = matchPtr + back;
+                                               *startpos = ip + back;
+                                       }
+                               }
+                       }
+               } else {
+                       const BYTE * const matchPtr = dictBase + matchIndex;
+
+                       if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+                               size_t mlt;
+                               int back = 0;
+                               const BYTE *vLimit = ip + (dictLimit - matchIndex);
+
+                               if (vLimit > iHighLimit)
+                                       vLimit = iHighLimit;
+
+                               mlt = LZ4_count(ip + MINMATCH,
+                                       matchPtr + MINMATCH, vLimit) + MINMATCH;
+
+                               if ((ip + mlt == vLimit) && (vLimit < iHighLimit))
+                                       mlt += LZ4_count(ip + mlt, base + dictLimit,
+                                               iHighLimit);
+                               while ((ip + back > iLowLimit)
+                                       && (matchIndex + back > lowLimit)
+                                       && (ip[back - 1] == matchPtr[back - 1]))
+                                       back--;
+
+                               mlt -= back;
+
+                               if ((int)mlt > longest) {
+                                       longest = (int)mlt;
+                                       *matchpos = base + matchIndex + back;
+                                       *startpos = ip + back;
+                               }
+                       }
+               }
+
+               matchIndex -= DELTANEXTU16(matchIndex);
+       }
+
+       return longest;
+}
+
+static FORCE_INLINE int LZ4HC_encodeSequence(
+       const BYTE **ip,
+       BYTE **op,
+       const BYTE **anchor,
+       int matchLength,
+       const BYTE * const match,
+       limitedOutput_directive limitedOutputBuffer,
+       BYTE *oend)
+{
+       int length;
+       BYTE *token;
+
+       /* Encode Literal length */
+       length = (int)(*ip - *anchor);
+       token = (*op)++;
+
+       if ((limitedOutputBuffer)
+               && ((*op + (length>>8)
+                       + length + (2 + 1 + LASTLITERALS)) > oend)) {
+               /* Check output limit */
+               return 1;
+       }
+       if (length >= (int)RUN_MASK) {
+               int len;
+
+               *token = (RUN_MASK<<ML_BITS);
+               len = length - RUN_MASK;
+               for (; len > 254 ; len -= 255)
+                       *(*op)++ = 255;
+               *(*op)++ = (BYTE)len;
+       } else
+               *token = (BYTE)(length<<ML_BITS);
+
+       /* Copy Literals */
+       LZ4_wildCopy(*op, *anchor, (*op) + length);
+       *op += length;
+
+       /* Encode Offset */
+       LZ4_writeLE16(*op, (U16)(*ip - match));
+       *op += 2;
+
+       /* Encode MatchLength */
+       length = (int)(matchLength - MINMATCH);
+
+       if ((limitedOutputBuffer)
+               && (*op + (length>>8)
+                       + (1 + LASTLITERALS) > oend)) {
+               /* Check output limit */
+               return 1;
+       }
+
+       if (length >= (int)ML_MASK) {
+               *token += ML_MASK;
+               length -= ML_MASK;
+
+               for (; length > 509 ; length -= 510) {
+                       *(*op)++ = 255;
+                       *(*op)++ = 255;
+               }
+
+               if (length > 254) {
+                       length -= 255;
+                       *(*op)++ = 255;
+               }
+
+               *(*op)++ = (BYTE)length;
+       } else
+               *token += (BYTE)(length);
+
+       /* Prepare next loop */
+       *ip += matchLength;
+       *anchor = *ip;
+
+       return 0;
+}
+
+static int LZ4HC_compress_generic(
+       LZ4HC_CCtx_internal *const ctx,
+       const char * const source,
+       char * const dest,
+       int const inputSize,
+       int const maxOutputSize,
+       int compressionLevel,
+       limitedOutput_directive limit
+       )
+{
+       const BYTE *ip = (const BYTE *) source;
+       const BYTE *anchor = ip;
+       const BYTE * const iend = ip + inputSize;
+       const BYTE * const mflimit = iend - MFLIMIT;
+       const BYTE * const matchlimit = (iend - LASTLITERALS);
+
+       BYTE *op = (BYTE *) dest;
+       BYTE * const oend = op + maxOutputSize;
+
+       unsigned int maxNbAttempts;
+       int ml, ml2, ml3, ml0;
+       const BYTE *ref = NULL;
+       const BYTE *start2 = NULL;
+       const BYTE *ref2 = NULL;
+       const BYTE *start3 = NULL;
+       const BYTE *ref3 = NULL;
+       const BYTE *start0;
+       const BYTE *ref0;
+
+       /* init */
+       if (compressionLevel > LZ4HC_MAX_CLEVEL)
+               compressionLevel = LZ4HC_MAX_CLEVEL;
+       if (compressionLevel < 1)
+               compressionLevel = LZ4HC_DEFAULT_CLEVEL;
+       maxNbAttempts = 1 << (compressionLevel - 1);
+       ctx->end += inputSize;
+
+       ip++;
+
+       /* Main Loop */
+       while (ip < mflimit) {
+               ml = LZ4HC_InsertAndFindBestMatch(ctx, ip,
+                       matchlimit, (&ref), maxNbAttempts);
+               if (!ml) {
+                       ip++;
+                       continue;
+               }
+
+               /* saved, in case we would skip too much */
+               start0 = ip;
+               ref0 = ref;
+               ml0 = ml;
+
+_Search2:
+               if (ip + ml < mflimit)
+                       ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
+                               ip + ml - 2, ip + 0,
+                               matchlimit, ml, &ref2,
+                               &start2, maxNbAttempts);
+               else
+                       ml2 = ml;
+
+               if (ml2 == ml) {
+                       /* No better match */
+                       if (LZ4HC_encodeSequence(&ip, &op,
+                               &anchor, ml, ref, limit, oend))
+                               return 0;
+                       continue;
+               }
+
+               if (start0 < ip) {
+                       if (start2 < ip + ml0) {
+                               /* empirical */
+                               ip = start0;
+                               ref = ref0;
+                               ml = ml0;
+                       }
+               }
+
+               /* Here, start0 == ip */
+               if ((start2 - ip) < 3) {
+                       /* First Match too small : removed */
+                       ml = ml2;
+                       ip = start2;
+                       ref = ref2;
+                       goto _Search2;
+               }
+
+_Search3:
+               /*
+               * Currently we have :
+               * ml2 > ml1, and
+               * ip1 + 3 <= ip2 (usually < ip1 + ml1)
+               */
+               if ((start2 - ip) < OPTIMAL_ML) {
+                       int correction;
+                       int new_ml = ml;
+
+                       if (new_ml > OPTIMAL_ML)
+                               new_ml = OPTIMAL_ML;
+                       if (ip + new_ml > start2 + ml2 - MINMATCH)
+                               new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
+
+                       correction = new_ml - (int)(start2 - ip);
+
+                       if (correction > 0) {
+                               start2 += correction;
+                               ref2 += correction;
+                               ml2 -= correction;
+                       }
+               }
+               /*
+                * Now, we have start2 = ip + new_ml,
+                * with new_ml = min(ml, OPTIMAL_ML = 18)
+                */
+
+               if (start2 + ml2 < mflimit)
+                       ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
+                               start2 + ml2 - 3, start2,
+                               matchlimit, ml2, &ref3, &start3,
+                               maxNbAttempts);
+               else
+                       ml3 = ml2;
+
+               if (ml3 == ml2) {
+                       /* No better match : 2 sequences to encode */
+                       /* ip & ref are known; Now for ml */
+                       if (start2 < ip + ml)
+                               ml = (int)(start2 - ip);
+                       /* Now, encode 2 sequences */
+                       if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+                               ml, ref, limit, oend))
+                               return 0;
+                       ip = start2;
+                       if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+                               ml2, ref2, limit, oend))
+                               return 0;
+                       continue;
+               }
+
+               if (start3 < ip + ml + 3) {
+                       /* Not enough space for match 2 : remove it */
+                       if (start3 >= (ip + ml)) {
+                               /* can write Seq1 immediately
+                                * ==> Seq2 is removed,
+                                * so Seq3 becomes Seq1
+                                */
+                               if (start2 < ip + ml) {
+                                       int correction = (int)(ip + ml - start2);
+
+                                       start2 += correction;
+                                       ref2 += correction;
+                                       ml2 -= correction;
+                                       if (ml2 < MINMATCH) {
+                                               start2 = start3;
+                                               ref2 = ref3;
+                                               ml2 = ml3;
+                                       }
+                               }
+
+                               if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+                                       ml, ref, limit, oend))
+                                       return 0;
+                               ip = start3;
+                               ref = ref3;
+                               ml = ml3;
+
+                               start0 = start2;
+                               ref0 = ref2;
+                               ml0 = ml2;
+                               goto _Search2;
+                       }
+
+                       start2 = start3;
+                       ref2 = ref3;
+                       ml2 = ml3;
+                       goto _Search3;
+               }
+
+               /*
+               * OK, now we have 3 ascending matches;
+               * let's write at least the first one
+               * ip & ref are known; Now for ml
+               */
+               if (start2 < ip + ml) {
+                       if ((start2 - ip) < (int)ML_MASK) {
+                               int correction;
+
+                               if (ml > OPTIMAL_ML)
+                                       ml = OPTIMAL_ML;
+                               if (ip + ml > start2 + ml2 - MINMATCH)
+                                       ml = (int)(start2 - ip) + ml2 - MINMATCH;
+                               correction = ml - (int)(start2 - ip);
+                               if (correction > 0) {
+                                       start2 += correction;
+                                       ref2 += correction;
+                                       ml2 -= correction;
+                               }
+                       } else
+                               ml = (int)(start2 - ip);
+               }
+               if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml,
+                       ref, limit, oend))
+                       return 0;
+
+               ip = start2;
+               ref = ref2;
+               ml = ml2;
+
+               start2 = start3;
+               ref2 = ref3;
+               ml2 = ml3;
+
+               goto _Search3;
+       }
+
+       /* Encode Last Literals */
+       {
+               int lastRun = (int)(iend - anchor);
+
+               if ((limit)
+                       && (((char *)op - dest) + lastRun + 1
+                               + ((lastRun + 255 - RUN_MASK)/255)
+                                       > (U32)maxOutputSize)) {
+                       /* Check output limit */
+                       return 0;
+               }
+               if (lastRun >= (int)RUN_MASK) {
+                       *op++ = (RUN_MASK<<ML_BITS);
+                       lastRun -= RUN_MASK;
+                       for (; lastRun > 254 ; lastRun -= 255)
+                               *op++ = 255;
+                       *op++ = (BYTE) lastRun;
+               } else
+                       *op++ = (BYTE)(lastRun<<ML_BITS);
+               LZ4_memcpy(op, anchor, iend - anchor);
+               op += iend - anchor;
+       }
+
+       /* End */
+       return (int) (((char *)op) - dest);
+}
+
+static int LZ4_compress_HC_extStateHC(
+       void *state,
+       const char *src,
+       char *dst,
+       int srcSize,
+       int maxDstSize,
+       int compressionLevel)
+{
+       LZ4HC_CCtx_internal *ctx = &((LZ4_streamHC_t *)state)->internal_donotuse;
+
+       if (((size_t)(state)&(sizeof(void *) - 1)) != 0) {
+               /* Error : state is not aligned
+                * for pointers (32 or 64 bits)
+                */
+               return 0;
+       }
+
+       LZ4HC_init(ctx, (const BYTE *)src);
+
+       if (maxDstSize < LZ4_compressBound(srcSize))
+               return LZ4HC_compress_generic(ctx, src, dst,
+                       srcSize, maxDstSize, compressionLevel, limitedOutput);
+       else
+               return LZ4HC_compress_generic(ctx, src, dst,
+                       srcSize, maxDstSize, compressionLevel, noLimit);
+}
+
+int LZ4_compress_HC(const char *src, char *dst, int srcSize,
+       int maxDstSize, int compressionLevel, void *wrkmem)
+{
+       return LZ4_compress_HC_extStateHC(wrkmem, src, dst,
+               srcSize, maxDstSize, compressionLevel);
+}
+
+/**************************************
+ *     Streaming Functions
+ **************************************/
+void LZ4_resetStreamHC(LZ4_streamHC_t *LZ4_streamHCPtr, int compressionLevel)
+{
+       LZ4_streamHCPtr->internal_donotuse.base = NULL;
+       LZ4_streamHCPtr->internal_donotuse.compressionLevel = (unsigned int)compressionLevel;
+}
+
+int LZ4_loadDictHC(LZ4_streamHC_t *LZ4_streamHCPtr,
+       const char *dictionary,
+       int dictSize)
+{
+       LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+
+       if (dictSize > 64 * KB) {
+               dictionary += dictSize - 64 * KB;
+               dictSize = 64 * KB;
+       }
+       LZ4HC_init(ctxPtr, (const BYTE *)dictionary);
+       if (dictSize >= 4)
+               LZ4HC_Insert(ctxPtr, (const BYTE *)dictionary + (dictSize - 3));
+       ctxPtr->end = (const BYTE *)dictionary + dictSize;
+       return dictSize;
+}
+
+/* compression */
+
+static void LZ4HC_setExternalDict(
+       LZ4HC_CCtx_internal *ctxPtr,
+       const BYTE *newBlock)
+{
+       if (ctxPtr->end >= ctxPtr->base + 4) {
+               /* Referencing remaining dictionary content */
+               LZ4HC_Insert(ctxPtr, ctxPtr->end - 3);
+       }
+
+       /*
+        * Only one memory segment for extDict,
+        * so any previous extDict is lost at this stage
+        */
+       ctxPtr->lowLimit        = ctxPtr->dictLimit;
+       ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
+       ctxPtr->dictBase        = ctxPtr->base;
+       ctxPtr->base = newBlock - ctxPtr->dictLimit;
+       ctxPtr->end     = newBlock;
+       /* match referencing will resume from there */
+       ctxPtr->nextToUpdate = ctxPtr->dictLimit;
+}
+
+static int LZ4_compressHC_continue_generic(
+       LZ4_streamHC_t *LZ4_streamHCPtr,
+       const char *source,
+       char *dest,
+       int inputSize,
+       int maxOutputSize,
+       limitedOutput_directive limit)
+{
+       LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+
+       /* auto - init if forgotten */
+       if (ctxPtr->base == NULL)
+               LZ4HC_init(ctxPtr, (const BYTE *) source);
+
+       /* Check overflow */
+       if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 * GB) {
+               size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base)
+                       - ctxPtr->dictLimit;
+               if (dictSize > 64 * KB)
+                       dictSize = 64 * KB;
+               LZ4_loadDictHC(LZ4_streamHCPtr,
+                       (const char *)(ctxPtr->end) - dictSize, (int)dictSize);
+       }
+
+       /* Check if blocks follow each other */
+       if ((const BYTE *)source != ctxPtr->end)
+               LZ4HC_setExternalDict(ctxPtr, (const BYTE *)source);
+
+       /* Check overlapping input/dictionary space */
+       {
+               const BYTE *sourceEnd = (const BYTE *) source + inputSize;
+               const BYTE * const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
+               const BYTE * const dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit;
+
+               if ((sourceEnd > dictBegin)
+                       && ((const BYTE *)source < dictEnd)) {
+                       if (sourceEnd > dictEnd)
+                               sourceEnd = dictEnd;
+                       ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
+
+                       if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4)
+                               ctxPtr->lowLimit = ctxPtr->dictLimit;
+               }
+       }
+
+       return LZ4HC_compress_generic(ctxPtr, source, dest,
+               inputSize, maxOutputSize, ctxPtr->compressionLevel, limit);
+}
+
+int LZ4_compress_HC_continue(
+       LZ4_streamHC_t *LZ4_streamHCPtr,
+       const char *source,
+       char *dest,
+       int inputSize,
+       int maxOutputSize)
+{
+       if (maxOutputSize < LZ4_compressBound(inputSize))
+               return LZ4_compressHC_continue_generic(LZ4_streamHCPtr,
+                       source, dest, inputSize, maxOutputSize, limitedOutput);
+       else
+               return LZ4_compressHC_continue_generic(LZ4_streamHCPtr,
+                       source, dest, inputSize, maxOutputSize, noLimit);
+}
+
+/* dictionary saving */
+
+int LZ4_saveDictHC(
+       LZ4_streamHC_t *LZ4_streamHCPtr,
+       char *safeBuffer,
+       int dictSize)
+{
+       LZ4HC_CCtx_internal *const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
+       int const prefixSize = (int)(streamPtr->end
+               - (streamPtr->base + streamPtr->dictLimit));
+
+       if (dictSize > 64 * KB)
+               dictSize = 64 * KB;
+       if (dictSize < 4)
+               dictSize = 0;
+       if (dictSize > prefixSize)
+               dictSize = prefixSize;
+
+       memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
+
+       {
+               U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
+
+               streamPtr->end = (const BYTE *)safeBuffer + dictSize;
+               streamPtr->base = streamPtr->end - endIndex;
+               streamPtr->dictLimit = endIndex - dictSize;
+               streamPtr->lowLimit = endIndex - dictSize;
+
+               if (streamPtr->nextToUpdate < streamPtr->dictLimit)
+                       streamPtr->nextToUpdate = streamPtr->dictLimit;
+       }
+       return dictSize;
+}
diff --git a/lustre/lz4/scompress.h b/lustre/lz4/scompress.h
new file mode 100644 (file)
index 0000000..f834274
--- /dev/null
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Synchronous Compression operations
+ *
+ * Copyright 2015 LG Electronics Inc.
+ * Copyright (c) 2016, Intel Corporation
+ * Author: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+ */
+#ifndef _CRYPTO_SCOMP_INT_H
+#define _CRYPTO_SCOMP_INT_H
+#include <linux/crypto.h>
+
+#define SCOMP_SCRATCH_SIZE     131072
+
+struct crypto_scomp {
+       struct crypto_tfm base;
+};
+
+/**
+ * struct scomp_alg - synchronous compression algorithm
+ *
+ * @alloc_ctx: Function allocates algorithm specific context
+ * @free_ctx:  Function frees context allocated with alloc_ctx
+ * @compress:  Function performs a compress operation
+ * @decompress:        Function performs a de-compress operation
+ * @base:      Common crypto API algorithm data structure
+ */
+struct scomp_alg {
+       void *(*alloc_ctx)(struct crypto_scomp *tfm);
+       void (*free_ctx)(struct crypto_scomp *tfm, void *ctx);
+       int (*compress)(struct crypto_scomp *tfm, const u8 *src,
+                       unsigned int slen, u8 *dst, unsigned int *dlen,
+                       void *ctx);
+       int (*decompress)(struct crypto_scomp *tfm, const u8 *src,
+                         unsigned int slen, u8 *dst, unsigned int *dlen,
+                         void *ctx);
+       struct crypto_alg base;
+};
+
+static inline struct scomp_alg *__crypto_scomp_alg(struct crypto_alg *alg)
+{
+       return container_of(alg, struct scomp_alg, base);
+}
+
+static inline struct crypto_scomp *__crypto_scomp_tfm(struct crypto_tfm *tfm)
+{
+       return container_of(tfm, struct crypto_scomp, base);
+}
+
+static inline struct crypto_tfm *crypto_scomp_tfm(struct crypto_scomp *tfm)
+{
+       return &tfm->base;
+}
+
+static inline void crypto_free_scomp(struct crypto_scomp *tfm)
+{
+       crypto_destroy_tfm(tfm, crypto_scomp_tfm(tfm));
+}
+
+static inline struct scomp_alg *crypto_scomp_alg(struct crypto_scomp *tfm)
+{
+       return __crypto_scomp_alg(crypto_scomp_tfm(tfm)->__crt_alg);
+}
+
+static inline void *crypto_scomp_alloc_ctx(struct crypto_scomp *tfm)
+{
+       return crypto_scomp_alg(tfm)->alloc_ctx(tfm);
+}
+
+static inline void crypto_scomp_free_ctx(struct crypto_scomp *tfm,
+                                        void *ctx)
+{
+       return crypto_scomp_alg(tfm)->free_ctx(tfm, ctx);
+}
+
+static inline int crypto_scomp_compress(struct crypto_scomp *tfm,
+                                       const u8 *src, unsigned int slen,
+                                       u8 *dst, unsigned int *dlen, void *ctx)
+{
+       return crypto_scomp_alg(tfm)->compress(tfm, src, slen, dst, dlen, ctx);
+}
+
+static inline int crypto_scomp_decompress(struct crypto_scomp *tfm,
+                                         const u8 *src, unsigned int slen,
+                                         u8 *dst, unsigned int *dlen,
+                                         void *ctx)
+{
+       return crypto_scomp_alg(tfm)->decompress(tfm, src, slen, dst, dlen,
+                                                ctx);
+}
+
+int crypto_init_scomp_ops_async(struct crypto_tfm *tfm);
+struct acomp_req *crypto_acomp_scomp_alloc_ctx(struct acomp_req *req);
+void crypto_acomp_scomp_free_ctx(struct acomp_req *req);
+
+/**
+ * crypto_register_scomp() -- Register synchronous compression algorithm
+ *
+ * Function registers an implementation of a synchronous
+ * compression algorithm
+ *
+ * @alg:       algorithm definition
+ *
+ * Return: zero on success; error code in case of error
+ */
+int crypto_register_scomp(struct scomp_alg *alg);
+
+/**
+ * crypto_unregister_scomp() -- Unregister synchronous compression algorithm
+ *
+ * Function unregisters an implementation of a synchronous
+ * compression algorithm
+ *
+ * @alg:       algorithm definition
+ */
+void crypto_unregister_scomp(struct scomp_alg *alg);
+
+int crypto_register_scomps(struct scomp_alg *algs, int count);
+void crypto_unregister_scomps(struct scomp_alg *algs, int count);
+
+#endif
index 1472679..8889d90 100755 (executable)
@@ -97,6 +97,12 @@ DEST_MODULE_LOCATION[\${#DEST_MODULE_LOCATION[@]}]="/${kmoddir}/lustre/"
 BUILT_MODULE_NAME[\${#BUILT_MODULE_NAME[@]}]="lustre"
 BUILT_MODULE_LOCATION[\${#BUILT_MODULE_LOCATION[@]}]="lustre/llite/"
 DEST_MODULE_LOCATION[\${#DEST_MODULE_LOCATION[@]}]="/${kmoddir}/lustre/"
+BUILT_MODULE_NAME[\${#BUILT_MODULE_NAME[@]}]="lz4"
+BUILT_MODULE_LOCATION[\${#BUILT_MODULE_LOCATION[@]}]="lustre/lz4/"
+DEST_MODULE_LOCATION[\${#DEST_MODULE_LOCATION[@]}]="/${kmoddir}/lustre/"
+BUILT_MODULE_NAME[\${#BUILT_MODULE_NAME[@]}]="lz4hc"
+BUILT_MODULE_LOCATION[\${#BUILT_MODULE_LOCATION[@]}]="lustre/lz4/"
+DEST_MODULE_LOCATION[\${#DEST_MODULE_LOCATION[@]}]="/${kmoddir}/lustre/"
 if \$( rpm -qa | grep -q krb5-devel ) ; then
 BUILT_MODULE_NAME[\${#BUILT_MODULE_NAME[@]}]="ptlrpc_gss"
 BUILT_MODULE_LOCATION[\${#BUILT_MODULE_LOCATION[@]}]="lustre/ptlrpc/gss/"
index b31c34e..2299127 100755 (executable)
@@ -89,7 +89,7 @@ done
 # is given. It's ugly, but is needed to emulate the prior functionality
 if [ "${#modules[@]}" -eq 0 ] || [ "${modules[*]}" = "ldiskfs" ]; then
        unload_all=true
-       modules=('lnet_selftest' 'ldiskfs' 'libcfs')
+       modules=('lnet_selftest' 'ldiskfs' 'libcfs' 'lz4' 'lz4hc')
 else
        unload_all=false
 fi
diff --git a/lustre/tests/AMSR_E_L3_DailyOcean_V05_20111003.hdf.bz2 b/lustre/tests/AMSR_E_L3_DailyOcean_V05_20111003.hdf.bz2
new file mode 100644 (file)
index 0000000..97ddd6d
Binary files /dev/null and b/lustre/tests/AMSR_E_L3_DailyOcean_V05_20111003.hdf.bz2 differ
index d5fc90f..245c09b 100644 (file)
@@ -11,6 +11,7 @@ noinst_DATA += ldiskfs_mdt1_2_11.tar.bz2 ldiskfs_mdt2_2_11.tar.bz2
 noinst_DATA += ldiskfs_ost1_2_11.tar.bz2 ldiskfs_ost2_2_11.tar.bz2
 noinst_DATA += zfs_mdt1_2_11.tar.bz2 zfs_mdt2_2_11.tar.bz2
 noinst_DATA += zfs_ost1_2_11.tar.bz2 zfs_ost2_2_11.tar.bz2
+noinst_DATA += AMSR_E_L3_DailyOcean_V05_20111003.hdf.bz2
 noinst_SCRIPTS = leak_finder.pl llmount.sh llmountcleanup.sh functions.sh
 noinst_SCRIPTS += test-framework.sh runvmstat runiozone runtests sanity.sh
 noinst_SCRIPTS += rundbench acceptance-small.sh compile.sh conf-sanity.sh
index 9cba127..f6cc33a 100644 (file)
@@ -1,5 +1,5 @@
-MODULES := kinode
+MODULES := kinode kcompr
 
-EXTRA_DIST = kinode.c
+EXTRA_DIST = kinode.c kcompr.c
 
 @INCLUDE_RULES@
index 9309582..036405b 100644 (file)
@@ -27,7 +27,7 @@
 
 if MODULES
 if TESTS
-modulefs_DATA = kinode$(KMODEXT)
+modulefs_DATA = kinode$(KMODEXT) kcompr$(KMODEXT)
 endif
 endif
 
diff --git a/lustre/tests/kernel/kcompr.c b/lustre/tests/kernel/kcompr.c
new file mode 100644 (file)
index 0000000..a0a8e0e
--- /dev/null
@@ -0,0 +1,576 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2022 DataDirect Networks
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ */
+
+/* Test various compression/decompression routines as exported via the
+ * kernel Crypto API.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/crypto.h>
+#ifdef HAVE_CRYPTO_INIT_WAIT
+#include <linux/scatterlist.h>
+#include <crypto/acompress.h>
+#endif
+#include <linux/random.h>
+#include <lustre_crypto.h>
+#include <obd_support.h>
+
+/* Random ID passed by userspace, and printed in messages, used to
+ * separate different runs of that module.
+ */
+static int run_id;
+module_param(run_id, int, 0644);
+MODULE_PARM_DESC(run_id, "run ID");
+
+/* Path to file to compress passed by userspace. */
+static char *input_file;
+module_param(input_file, charp, 0644);
+MODULE_PARM_DESC(input_file, "path to file to compress");
+
+#define PREFIX "lustre_kcompr_%u: "
+#define COMPR_CHUNK_SIZE (1 << 16) /* 64 KB */
+
+struct compr_desc {
+       char *cd_name;  /* compression alg name */
+       int   cd_level; /* compression level */
+};
+
+/* Compression algorithms to test */
+/* For LZ4, cd_level is interpreted as an "acceleration" factor. The larger the
+ * acceleration value, the faster the algorithm, but the lesser the compression.
+ * An acceleration value of "1" is the default.
+ * This acceleration factor is taken into account only by the Lustre-modified
+ * lz4 module.
+ */
+/* For LZ4HC, cd_level is interpreted as a compression level. The higher the
+ * level value, the better the compression, but the slower the algorithm.
+ * A level value of "9" is the default.
+ * This acceleration factor is taken into account only by the Lustre-modified
+ * lz4hc module.
+ */
+/* To conform to values that can be stored in the Lustre file layout (limited to
+ * 4 bits) we use levels between 0 and 15, 0 meaning 'use default'. Then this
+ * level is mapped to actual acceleration factor or compression level in the
+ * corresponding compression module. See lustre/lz4/lz4.h for mapping in place.
+ */
+static struct compr_desc compr_algs[] = {
+       { .cd_name = "lz4",     .cd_level = 1 },
+       { .cd_name = "lz4",     .cd_level = 4 },
+       { .cd_name = "lz4",     .cd_level = 9 },
+       { .cd_name = "lz4",     .cd_level = 15 },
+       { .cd_name = "lz4hc",   .cd_level = 1 },
+       { .cd_name = "lz4hc",   .cd_level = 3 },
+       { .cd_name = "lz4hc",   .cd_level = 8 },
+       { .cd_name = "lz4hc",   .cd_level = 15 },
+       { .cd_name = "lzo",     .cd_level = -1 },
+       /* deflate is the crypto module implementing zlib compression (gzip) */
+       { .cd_name = "deflate", .cd_level = -1 },
+};
+
+int fill_input_from_file(char *filepath, struct page *page, size_t count)
+{
+       struct file *filp;
+       loff_t pos = 0;
+       void *pageaddr;
+       int err = 0;
+
+       pageaddr = kmap(page);
+       filp = filp_open(filepath, O_RDONLY, 0);
+       if (IS_ERR_OR_NULL(filp)) {
+               err = PTR_ERR(filp);
+               filp = NULL;
+               goto out;
+       }
+
+       err = cfs_kernel_read(filp, pageaddr, count, &pos);
+       if (err < 0)
+               goto out;
+
+out:
+       if (filp)
+               filp_close(filp, NULL);
+       kunmap(page);
+       return err;
+}
+
+int fill_input_with_rand(struct page *page, size_t count)
+{
+       char randbuf1[67], randbuf2[14];
+       void *pageaddr;
+       char *p;
+
+       pageaddr = kmap(page);
+       /* write binary test data into pages */
+       p = pageaddr;
+       get_random_bytes(randbuf1, sizeof(randbuf1));
+       while (p + sizeof(randbuf1) - (char *)pageaddr < count) {
+               memcpy(p, randbuf1, sizeof(randbuf1));
+               p += sizeof(randbuf1);
+       }
+       p = pageaddr;
+       while (p + sizeof(randbuf2) - (char *)pageaddr < count) {
+               get_random_bytes(randbuf2, sizeof(randbuf2));
+               memcpy(p, randbuf2, sizeof(randbuf2));
+               p += sizeof(randbuf2) + prandom_u32_max(1 << 13);
+       }
+       kunmap(page);
+       return count;
+}
+
+/* compress and decompress, generic variant */
+int test_comp_compress_decompress(struct page *in_page, unsigned int order,
+                                 char *compr_name, int level)
+{
+       void *inpageaddr = NULL, *comppageaddr = NULL, *decomppageaddr = NULL;
+       struct page *comp_out_page = NULL, *decomp_out_page = NULL;
+       unsigned int in_len = 1 << (PAGE_SHIFT + order);
+       unsigned int comp_len = in_len, decomp_len = in_len;
+       unsigned int curr_len, curr_comp_len;
+       void *incurrpageaddr, *outcurrpageaddr;
+       unsigned int chunk_size = COMPR_CHUNK_SIZE;
+       struct crypto_comp *cc = NULL;
+       ktime_t started, finished;
+       u64 usdelta, currusdelta;
+       int err = -1;
+
+       /* allocate 1 << order contiguous pages for compression output */
+       comp_out_page = alloc_pages(GFP_KERNEL, order);
+       if (!comp_out_page) {
+               pr_err(PREFIX "ENOMEM cannot allocate comp_out_page\n", run_id);
+               goto free;
+       }
+       /* allocate 1 << order contiguous pages for decompression output */
+       decomp_out_page = alloc_pages(GFP_KERNEL, order);
+       if (!decomp_out_page) {
+               pr_err(PREFIX "ENOMEM cannot allocate decomp_out_page\n",
+                      run_id);
+               goto free;
+       }
+
+       cc = crypto_alloc_comp(compr_name, 0, 0);
+       if (IS_ERR(cc)) {
+               pr_err(PREFIX
+                      "ERROR cannot initialize compressor %s, error %ld\n",
+                      run_id, compr_name, PTR_ERR(cc));
+               cc = NULL;
+               goto free;
+       }
+
+       if (level != -1)
+               ll_crypto_comp_set_level(cc, level);
+
+       inpageaddr = kmap(in_page);
+       comppageaddr = kmap(comp_out_page);
+
+       /* Try compression by COMPR_CHUNK_SIZE chunks only if multiple */
+       if (!(in_len & (COMPR_CHUNK_SIZE - 1))) {
+               /* loop on chunk_size, from COMPR_CHUNK_SIZE
+                * and double each time
+                */
+               while (chunk_size < in_len) {
+                       incurrpageaddr = inpageaddr;
+                       outcurrpageaddr = comppageaddr;
+                       curr_len = in_len;
+                       currusdelta = 0;
+                       curr_comp_len = 0;
+                       /* compress input data, one chunk at a time */
+                       while (curr_len > 0) {
+                               comp_len = chunk_size;
+                               started = ktime_get();
+                               err = crypto_comp_compress(cc, incurrpageaddr,
+                                                          chunk_size,
+                                                          outcurrpageaddr,
+                                                          &comp_len);
+                               finished = ktime_get();
+                               if (err || comp_len >= chunk_size) {
+                                       /* If compress returns error, or if
+                                        * output len is greater than input len,
+                                        * it means it is not worth compressing.
+                                        * So just copy input into output.
+                                        */
+                                       pr_err(PREFIX "cannot compress %d bytes, compressor %s (out len %d), leave uncompressed\n",
+                                              run_id, chunk_size, compr_name,
+                                              comp_len);
+                                       memcpy(outcurrpageaddr, incurrpageaddr,
+                                              chunk_size);
+                                       comp_len = chunk_size;
+                                       err = 0;
+                               }
+                               curr_comp_len += comp_len;
+                               currusdelta +=
+                                       ktime_us_delta(finished, started);
+                               curr_len -= chunk_size;
+                               incurrpageaddr += chunk_size;
+                               outcurrpageaddr += chunk_size;
+                       }
+                       pr_err(PREFIX "compr %s(%d) in %ukB chunks took %lld us (%llu MB/s), compress ratio: %u.%02u\n",
+                              run_id, compr_name, level, chunk_size >> 10,
+                              currusdelta,
+                              (u64)in_len * 1000000 / currusdelta / 1048576,
+                              in_len / curr_comp_len,
+                              (in_len % curr_comp_len) * 100 / curr_comp_len);
+                       chunk_size = chunk_size << 1;
+                       cond_resched();
+               }
+       }
+
+       comp_len = in_len;
+       started = ktime_get();
+       err = crypto_comp_compress(cc, inpageaddr, in_len,
+                                  comppageaddr, &comp_len);
+       finished = ktime_get();
+       if (err || comp_len >= in_len) {
+               /* If compress returns error, or if output len is greater than
+                * input len, it means it is not worth compressing.
+                * So just copy input into output.
+                */
+               pr_err(PREFIX "cannot compress %d bytes, compressor %s,%d (out len %d), leave uncompressed\n",
+                      run_id, in_len, compr_name, level, comp_len);
+               memcpy(comppageaddr, inpageaddr, in_len);
+               comp_len = in_len;
+               err = 0;
+       }
+       usdelta = ktime_us_delta(finished, started);
+       pr_err(PREFIX "compr %s(%d) in %ukB chunks took %lld us (%llu MB/s), compress ratio: %u.%02u\n",
+              run_id, compr_name, level, in_len >> 10,
+              usdelta, (u64)in_len * 1000000 / usdelta / 1048576,
+              in_len / comp_len, (in_len % comp_len) * 100 / comp_len);
+
+       decomppageaddr = kmap(decomp_out_page);
+       started = ktime_get();
+       err = crypto_comp_decompress(cc, comppageaddr, comp_len,
+                                    decomppageaddr, &decomp_len);
+       finished = ktime_get();
+       if (err) {
+               pr_err(PREFIX "ERROR cannot decompress %d bytes, compressor %s, error %d\n",
+                      run_id, comp_len, compr_name, err);
+               goto free;
+       }
+       if (decomp_len != in_len) {
+               pr_err(PREFIX "ERROR decompressed len %d != initial len %d\n",
+                      run_id, decomp_len, in_len);
+               err = -1;
+               goto free;
+       }
+       if (memcmp(inpageaddr, decomppageaddr, in_len)) {
+               pr_err(PREFIX "ERROR decompressed different from initial\n",
+                      run_id);
+               err = -1;
+       }
+       usdelta = ktime_us_delta(finished, started);
+       pr_err(PREFIX "decompr %s(%d) took %lld us (%llu MB/s)\n",
+              run_id, compr_name, level, usdelta,
+              (u64)comp_len * 1000000 / usdelta / 1048576);
+
+free:
+       if (cc)
+               crypto_free_comp(cc);
+       if (inpageaddr)
+               kunmap(inpageaddr);
+       if (comppageaddr)
+               kunmap(comppageaddr);
+       if (decomppageaddr)
+               kunmap(decomppageaddr);
+       if (comp_out_page)
+               __free_pages(comp_out_page, order);
+       if (decomp_out_page)
+               __free_pages(decomp_out_page, order);
+       return err;
+}
+
+/* compress and decompress, scomp variant */
+int test_acomp_compress_decompress(struct page *in_page, unsigned int order,
+                                  char *compr_name, int level)
+{
+#ifdef HAVE_CRYPTO_INIT_WAIT
+       struct page *comp_out_page = NULL, *decomp_out_page = NULL;
+       unsigned int in_len = 1 << (PAGE_SHIFT + order);
+       void *inpageaddr = NULL, *decomppageaddr = NULL;
+       struct sg_table src_sgt, dst_sgt;
+       unsigned int comp_len, decomp_len;
+       struct crypto_acomp *ca = NULL;
+       struct acomp_req *req = NULL;
+       ktime_t started, finished;
+       struct crypto_wait wait;
+       struct scatterlist *s;
+       unsigned long size;
+       struct page *p;
+       u64 usdelta;
+       int i, err = -1;
+
+       /* allocate 1 << order contiguous pages for compression output */
+       comp_out_page = alloc_pages(GFP_KERNEL, order);
+       if (!comp_out_page) {
+               pr_err(PREFIX "ENOMEM cannot allocate comp_out_page\n", run_id);
+               goto free;
+       }
+       /* allocate 1 << order contiguous pages for decompression output */
+       decomp_out_page = alloc_pages(GFP_KERNEL, order);
+       if (!decomp_out_page) {
+               pr_err(PREFIX "ENOMEM cannot allocate decomp_out_page\n",
+                      run_id);
+               goto free;
+       }
+
+       ca = crypto_alloc_acomp(compr_name, 0, 0);
+       if (IS_ERR(ca)) {
+               pr_err(PREFIX
+                      "ERROR cannot initialize compressor %s, error %ld\n",
+                      run_id, compr_name, PTR_ERR(ca));
+               ca = NULL;
+               goto free;
+       }
+
+       if (level != -1)
+               ll_crypto_acomp_set_level(ca, level);
+
+       req = acomp_request_alloc(ca);
+       if (!req) {
+               pr_err(PREFIX
+                      "ERROR request alloc failed for %s\n",
+                      run_id, compr_name);
+               goto free;
+       }
+
+       crypto_init_wait(&wait);
+       err = sg_alloc_table(&src_sgt, 1 << order, GFP_KERNEL);
+       if (err)
+               goto free;
+       p = in_page;
+       for_each_sg(src_sgt.sgl, s, src_sgt.orig_nents, i)
+               sg_set_page(s, p + i, PAGE_SIZE, 0);
+       err = sg_alloc_table(&dst_sgt, 1 << order, GFP_KERNEL);
+       if (err) {
+               sg_free_table(&src_sgt);
+               goto free;
+       }
+       p = comp_out_page;
+       for_each_sg(dst_sgt.sgl, s, dst_sgt.orig_nents, i)
+               sg_set_page(s, p + i, PAGE_SIZE, 0);
+
+       acomp_request_set_params(req, src_sgt.sgl, dst_sgt.sgl, in_len, in_len);
+       acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+                                  crypto_req_done, &wait);
+
+       started = ktime_get();
+       err = crypto_wait_req(crypto_acomp_compress(req), &wait);
+       finished = ktime_get();
+       sg_free_table(&src_sgt);
+       sg_free_table(&dst_sgt);
+       comp_len = req->dlen;
+       if (err || comp_len >= in_len) {
+               /* If compress returns error, or if output len is greater than
+                * input len, it means it is not worth compressing.
+                * So just copy input into output.
+                */
+               pr_err(PREFIX "cannot compress %d bytes, compressor %s,%d (out len %d), leave uncompressed\n",
+                      run_id, in_len, compr_name, level, comp_len);
+               inpageaddr = kmap(in_page);
+               decomppageaddr = kmap(comp_out_page);
+               memcpy(decomppageaddr, inpageaddr, in_len);
+               kunmap(inpageaddr);
+               kunmap(decomppageaddr);
+               comp_len = in_len;
+               err = 0;
+       }
+       usdelta = ktime_us_delta(finished, started);
+       pr_err(PREFIX "acompr %s(%d) of %ukB chunks took %lld us (%llu MB/s), compress ratio: %u.%02u\n",
+              run_id, compr_name, level, in_len >> 10,
+              usdelta, (u64)in_len * 1000000 / usdelta / 1048576,
+              in_len / comp_len, (in_len % comp_len) * 100 / comp_len);
+
+       crypto_init_wait(&wait);
+       err = sg_alloc_table(&src_sgt, 1 << order, GFP_KERNEL);
+       if (err)
+               goto free;
+       p = comp_out_page;
+       size = comp_len;
+       for_each_sg(src_sgt.sgl, s, src_sgt.orig_nents, i) {
+               sg_set_page(s, p + i, min_t(unsigned long, size, PAGE_SIZE), 0);
+               if (size < PAGE_SIZE)
+                       break;
+               size -= PAGE_SIZE;
+       }
+       err = sg_alloc_table(&dst_sgt, 1 << order, GFP_KERNEL);
+       if (err) {
+               sg_free_table(&src_sgt);
+               goto free;
+       }
+       p = decomp_out_page;
+       for_each_sg(dst_sgt.sgl, s, dst_sgt.orig_nents, i)
+               sg_set_page(s, p + i, PAGE_SIZE, 0);
+
+       acomp_request_set_params(req, src_sgt.sgl, dst_sgt.sgl,
+                                comp_len, in_len);
+
+       started = ktime_get();
+       err = crypto_wait_req(crypto_acomp_decompress(req), &wait);
+       finished = ktime_get();
+       sg_free_table(&src_sgt);
+       sg_free_table(&dst_sgt);
+       decomp_len = req->dlen;
+       if (err) {
+               pr_err(PREFIX "ERROR cannot decompress %d bytes, compressor %s, error %d\n",
+                      run_id, comp_len, compr_name, err);
+               goto free;
+       }
+       if (decomp_len != in_len) {
+               pr_err(PREFIX "ERROR decompressed len %d != initial len %d\n",
+                      run_id, decomp_len, in_len);
+               err = -1;
+               goto free;
+       }
+       inpageaddr = kmap(in_page);
+       decomppageaddr = kmap(decomp_out_page);
+       if (memcmp(inpageaddr, decomppageaddr, in_len)) {
+               pr_err(PREFIX "ERROR decompressed different from initial\n",
+                      run_id);
+               err = -1;
+       }
+       kunmap(inpageaddr);
+       kunmap(decomppageaddr);
+       usdelta = ktime_us_delta(finished, started);
+       pr_err(PREFIX "adecompr %s(%d) took %lld us (%llu MB/s)\n",
+              run_id, compr_name, level, usdelta,
+              (u64)comp_len * 1000000 / usdelta / 1048576);
+
+free:
+       if (req)
+               acomp_request_free(req);
+       if (ca)
+               crypto_free_acomp(ca);
+       return err;
+#else /* !HAVE_CRYPTO_INIT_WAIT */
+       pr_err(PREFIX "SKIP test_acomp_compress_decompress(%s), not supported\n",
+              run_id, compr_name);
+       return 0;
+#endif /* HAVE_CRYPTO_INIT_WAIT */
+}
+
+static int order_from_len(unsigned int len)
+{
+       int order = 0;
+
+       while (len > 1) {
+               order++;
+               len >>= 1;
+       }
+
+       return order - PAGE_SHIFT;
+}
+
+static int __init kcompr_init(void)
+{
+       /* Allocate 1 << 22 = 4MB for input data */
+       unsigned int order = 22 - PAGE_SHIFT, order_eff;
+       struct page *page = NULL;
+       int idx = 0;
+       int rc = 0;
+
+       /* allocate 1 << order contiguous pages */
+       page = alloc_pages(GFP_KERNEL, order);
+       if (!page) {
+               pr_err(PREFIX "ENOMEM cannot allocate pages\n", run_id);
+               goto out;
+       }
+
+       /* initialize input buffers */
+       if (input_file != NULL && input_file[0] != '\0') {
+               /* write content of provided file into pages to compress */
+               rc = fill_input_from_file(input_file, page,
+                                         1 << (PAGE_SHIFT + order));
+               pr_err(PREFIX "*****\n", run_id);
+               pr_err(PREFIX "(de)compression test on provided file %s\n",
+                      run_id, input_file);
+               pr_err(PREFIX "*****\n", run_id);
+       } else {
+               /* write binary test data into pages to compress */
+               rc = fill_input_with_rand(page, 1 << (PAGE_SHIFT + order));
+               pr_err(PREFIX "*****\n", run_id);
+               pr_err(PREFIX "(de)compression test on random binary data\n",
+                      run_id);
+               pr_err(PREFIX "*****\n", run_id);
+       }
+       if (rc < 0) {
+               pr_err(PREFIX "cannot fill in input buffer, ret %d\n",
+                      run_id, rc);
+               goto out;
+       }
+
+       order_eff = order_from_len(rc);
+
+       /* proceed to compression/decompression, for each algorithm in the list,
+        * for both generic and scomp variants
+        */
+       for (idx = 0; idx < ARRAY_SIZE(compr_algs); idx++) {
+               rc = test_comp_compress_decompress(page, order_eff,
+                                                  compr_algs[idx].cd_name,
+                                                  compr_algs[idx].cd_level);
+               pr_err(PREFIX "test_comp_compress_decompress(%s,%d) ret %d\n",
+                      run_id, compr_algs[idx].cd_name,
+                      compr_algs[idx].cd_level, rc);
+               if (rc)
+                       goto out;
+
+               /* only compress first 1 << 17 = 128 kB,
+                * max len accepted by scomp variants
+                */
+               rc = test_acomp_compress_decompress(page,
+                                                   min((int)order_eff,
+                                                       17 - PAGE_SHIFT),
+                                                   compr_algs[idx].cd_name,
+                                                   compr_algs[idx].cd_level);
+               pr_err(PREFIX "test_acomp_compress_decompress(%s,%d) ret %d\n",
+                      run_id, compr_algs[idx].cd_name,
+                      compr_algs[idx].cd_level, rc);
+               if (rc)
+                       goto out;
+       }
+
+       pr_err(PREFIX "SUCCESS\n", run_id);
+
+out:
+       if (page)
+               __free_pages(page, order);
+       /* Don't load. */
+       return -EINVAL;
+}
+
+static void __exit kcompr_exit(void)
+{
+}
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("compression/decompression test module");
+MODULE_VERSION(LUSTRE_VERSION_STRING);
+
+module_init(kcompr_init);
+module_exit(kcompr_exit);
index d70d0a3..4f615e3 100755 (executable)
@@ -10085,6 +10085,90 @@ test_81b() { # LU-456
 }
 run_test 81b "OST should return -ENOSPC when retry still fails ======="
 
+test_84()
+{
+       (( $CLIENT_VERSION >= $(version_code 2.14.0.66) )) ||
+               skip "Need at least version 2.14.0.66"
+       [ -f $LUSTRE/tests/kernel/kcompr.ko ] || skip "Need MODULES build"
+
+       local run_id=$RANDOM
+       local tmpfile=$TMP/$tfile
+       local tmpout=$TMP/$tfile.$run_id.out
+       local hdf=$LUSTRE/tests/AMSR_E_L3_DailyOcean_V05_20111003.hdf
+
+       # Try to insert the module. This will always fail as the
+       # module is designed to not be inserted.
+
+       # First launch without input_file: use auto generated random input
+       echo "Compress/decompress synthetic data"
+       insmod $LUSTRE/tests/kernel/kcompr.ko run_id=$run_id &> /dev/null
+
+       dmesg | grep "lustre_kcompr_$run_id:" > $tmpout
+       stack_trap "rm -f $tmpout"
+       cat $tmpout
+       if $(grep -q "lustre_kcompr_$run_id: ENOMEM" $tmpout); then
+               error_noexit "lz4 compression/decompression failed"
+       elif ! $(grep -q "lustre_kcompr_$run_id: SUCCESS" $tmpout); then
+               error "compression/decompression for large text file failed"
+       fi
+
+       run_id=$RANDOM
+       # Second launch with large text file we build
+       find "$LUSTRE" -name "*.sh" -o -name "*.[ch]" -o -name "*.py" |
+               while read F; do
+                       echo "$F"
+                       cat $F
+                       (( $(stat -c %s $tmpfile) < (8 << 20) )) || break
+               done >> $tmpfile
+       find /etc -type f |
+               while read F; do
+                       echo "$F"
+                       cat $F
+                       (( $(stat -c %s $tmpfile) < (8 << 20) )) || break
+               done >> $tmpfile
+       stack_trap "rm -f $tmpfile"
+       echo "Compress/decompress text file, size $(stat -c %s $tmpfile)"
+       insmod $LUSTRE/tests/kernel/kcompr.ko run_id=$run_id \
+               input_file=$tmpfile &> /dev/null
+
+       dmesg | grep "lustre_kcompr_$run_id:" > $tmpout
+       stack_trap "rm -f $tmpout"
+       cat $tmpout
+       if $(grep -q "lustre_kcompr_$run_id: ENOMEM" $tmpout); then
+               error_noexit "lz4 compression/decompression failed"
+       elif ! $(grep -q "lustre_kcompr_$run_id: SUCCESS" $tmpout); then
+               error "compression/decompression for pseudo random data failed"
+       fi
+
+       run_id=$RANDOM
+       # Third launch with downloaded HDF file
+       # HDF file AMSR_E_L3_DailyOcean_V05_20111003.hdf taken from
+       # https://nsidc.org/data/ae_dyocn/versions/2
+       if ! [ -e $hdf.bz2 ]; then
+               echo "HDF file not present"
+               exit 0
+       fi
+       if [ $(which bzcat) ]; then
+               bzcat $hdf.bz2 > $tmpfile
+       else
+               cp $hdf.bz2 $tmpfile.bz2
+               bunzip2 $tmpfile.bz2
+       fi
+       echo "Compress/decompress $hdf"
+       insmod $LUSTRE/tests/kernel/kcompr.ko run_id=$run_id \
+               input_file=$tmpfile &> /dev/null
+
+       dmesg | grep "lustre_kcompr_$run_id:" > $tmpout
+       stack_trap "rm -f $tmpout"
+       cat $tmpout
+       if $(grep -q "lustre_kcompr_$run_id: ENOMEM" $tmpout); then
+               error_noexit "lz4 compression/decompression failed"
+       elif ! $(grep -q "lustre_kcompr_$run_id: SUCCESS" $tmpout); then
+               error "compression/decompression for HDF file failed"
+       fi
+}
+run_test 84 "lz4/lz4hc compression/decompression kernel module"
+
 test_99() {
        [ -z "$(which cvs 2>/dev/null)" ] && skip_env "could not find cvs"