45 files changed, 7795 insertions, 113 deletions
diff --git a/Makefile.inc b/Makefile.inc
index b8cba07bb0..1889128be3 100644
--- a/Makefile.inc
+++ b/Makefile.inc
@@ -308,6 +308,11 @@ ifeq ($(CONFIG_COMPRESSED_PAYLOAD_LZMA),y)
 CBFS_PAYLOAD_COMPRESS_FLAG:=LZMA
 endif
 
+CBFS_PRERAM_COMPRESS_FLAG:=none
+ifeq ($(CONFIG_COMPRESS_PRERAM_STAGES),y)
+CBFS_PRERAM_COMPRESS_FLAG:=LZ4
+endif
+
 ifneq ($(CONFIG_LOCALVERSION),"")
 export COREBOOT_EXTRA_VERSION := -$(call strip_quotes,$(CONFIG_LOCALVERSION))
 endif
@@ -773,7 +778,7 @@ endif
 cbfs-files-y += $(CONFIG_CBFS_PREFIX)/romstage
 $(CONFIG_CBFS_PREFIX)/romstage-file := $(objcbfs)/romstage.elf
 $(CONFIG_CBFS_PREFIX)/romstage-type := stage
-$(CONFIG_CBFS_PREFIX)/romstage-compression := none
+$(CONFIG_CBFS_PREFIX)/romstage-compression := $(CBFS_PRERAM_COMPRESS_FLAG)
 ifeq ($(CONFIG_ARCH_ROMSTAGE_ARM),y)
 $(CONFIG_CBFS_PREFIX)/romstage-options := -b 0
 endif
diff --git a/payloads/libpayload/include/cbfs_core.h b/payloads/libpayload/include/cbfs_core.h
index 4c59f4131a..4cbc4c0628 100644
--- a/payloads/libpayload/include/cbfs_core.h
+++ b/payloads/libpayload/include/cbfs_core.h
@@ -58,6 +58,7 @@
 
 #define CBFS_COMPRESS_NONE  0
 #define CBFS_COMPRESS_LZMA  1
+#define CBFS_COMPRESS_LZ4   2
 
 /** These are standard component types for well known
     components (i.e - those that coreboot needs to consume.
diff --git a/payloads/libpayload/include/lz4.h b/payloads/libpayload/include/lz4.h
index 1f2830db46..d2120a48fc 100644
--- a/payloads/libpayload/include/lz4.h
+++ b/payloads/libpayload/include/lz4.h
@@ -36,7 +36,10 @@
 
 /* Decompresses an LZ4F image (multiple LZ4 blocks with frame header) from src
  * to dst, ensuring that it doesn't read more than srcn bytes and doesn't write
- * more than dstn. Buffer sizes must stay below 2GB.
+ * more than dstn. Buffer sizes must stay below 2GB. Can decompress files loaded
+ * to the end of a buffer in-place, as long as buffer is larger than the final
+ * output size. (Usually just a few bytes, but may be up to (8 + dstn/255) in
+ * worst case. Will reliably return an error if buffer was too small.)
  * Returns amount of decompressed bytes, or 0 on error.
  */
 size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn);
@@ -44,4 +47,4 @@ size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn);
 /* Same as ulz4fn() but does not perform any bounds checks. */
 size_t ulz4f(const void *src, void *dst);
 
-#endif /* __LZO_H_ */
+#endif /* __LZ4_H_ */
diff --git a/payloads/libpayload/libcbfs/cbfs.c b/payloads/libpayload/libcbfs/cbfs.c
index 49e4941181..a1cc7e443c 100644
--- a/payloads/libpayload/libcbfs/cbfs.c
+++ b/payloads/libpayload/libcbfs/cbfs.c
@@ -35,11 +35,16 @@
 #  include <lzma.h>
 #  define CBFS_CORE_WITH_LZMA
 # endif
+# if IS_ENABLED(CONFIG_LP_LZ4)
+#  include <lz4.h>
+#  define CBFS_CORE_WITH_LZ4
+# endif
 # define CBFS_MINI_BUILD
 #elif defined(__SMM__)
 # define CBFS_MINI_BUILD
 #else
 # define CBFS_CORE_WITH_LZMA
+# define CBFS_CORE_WITH_LZ4
 # include <lib.h>
 #endif
 
diff --git a/payloads/libpayload/libcbfs/cbfs_core.c b/payloads/libpayload/libcbfs/cbfs_core.c
index ddf0da5f42..c32d262b33 100644
--- a/payloads/libpayload/libcbfs/cbfs_core.c
+++ b/payloads/libpayload/libcbfs/cbfs_core.c
@@ -34,6 +34,9 @@
  * CBFS_CORE_WITH_LZMA (must be #define)
  *      if defined, ulzma() must exist for decompression of data streams
  *
+ * CBFS_CORE_WITH_LZ4 (must be #define)
+ *      if defined, ulz4f() must exist for decompression of data streams
+ *
  * ERROR(x...)
  *      print an error message x (in printf format)
  *
@@ -330,6 +333,10 @@ int cbfs_decompress(int algo, void *src, void *dst, int len)
 		case CBFS_COMPRESS_LZMA:
 			return ulzma(src, dst);
 #endif
+#ifdef CBFS_CORE_WITH_LZ4
+		case CBFS_COMPRESS_LZ4:
+			return ulz4f(src, dst);
+#endif
 		default:
 			ERROR("tried to decompress %d bytes with algorithm #%x,"
 			      "but that algorithm id is unsupported.\n", len,
diff --git a/payloads/libpayload/liblz4/lz4.c b/payloads/libpayload/liblz4/lz4.c.inc
index fb89090ee2..b3be4e5b44 100644
--- a/payloads/libpayload/liblz4/lz4.c
+++ b/payloads/libpayload/liblz4/lz4.c.inc
@@ -37,12 +37,19 @@
 *  Reading and writing into memory
 **************************************/
 
-/* customized version of memcpy, which may overwrite up to 7 bytes beyond dstEnd */
+/* customized variant of memcpy, which can overwrite up to 7 bytes beyond dstEnd */
 static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
 {
     BYTE* d = (BYTE*)dstPtr;
     const BYTE* s = (const BYTE*)srcPtr;
-    BYTE* e = (BYTE*)dstEnd;
+    BYTE* const e = (BYTE*)dstEnd;
+
+#if 0
+    const size_t l2 = 8 - (((size_t)d) & (sizeof(void*)-1));
+    LZ4_copy8(d,s); if (d>e-9) return;
+    d+=l2; s+=l2;
+#endif /* join to align */
+
     do { LZ4_copy8(d,s); d+=8; s+=8; } while (d<e);
 }
 
@@ -52,9 +59,9 @@ static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
 **************************************/
 #define MINMATCH 4
 
-#define COPYLENGTH 8
+#define WILDCOPYLENGTH 8
 #define LASTLITERALS 5
-#define MFLIMIT (COPYLENGTH+MINMATCH)
+#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
 static const int LZ4_minLength = (MFLIMIT+1);
 
 #define KB *(1 <<10)
@@ -114,11 +121,12 @@ FORCE_INLINE int LZ4_decompress_generic(
     const BYTE* const lowLimit = lowPrefix - dictSize;
 
     const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
-    const size_t dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4};
-    const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
+    const unsigned dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4};
+    const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
 
     const int safeDecode = (endOnInput==endOnInputSize);
     const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
+    const int inPlaceDecode = ((ip >= op) && (ip < oend));
 
 
     /* Special cases */
@@ -133,6 +141,9 @@ FORCE_INLINE int LZ4_decompress_generic(
         unsigned token;
         size_t length;
         const BYTE* match;
+        size_t offset;
+
+        if (unlikely((inPlaceDecode) && (op + WILDCOPYLENGTH > ip))) goto _output_error;   /* output stream ran over input stream */
 
         /* get literal length */
         token = *ip++;
@@ -144,7 +155,7 @@ FORCE_INLINE int LZ4_decompress_generic(
                 s = *ip++;
                 length += s;
             }
-            while (likely((endOnInput)?ip<iend-RUN_MASK:1) && (s==255));
+            while ( likely(endOnInput ? ip<iend-RUN_MASK : 1) && (s==255) );
             if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)(op))) goto _output_error;   /* overflow detection */
             if ((safeDecode) && unlikely((size_t)(ip+length)<(size_t)(ip))) goto _output_error;   /* overflow detection */
         }
@@ -152,7 +163,7 @@ FORCE_INLINE int LZ4_decompress_generic(
         /* copy literals */
         cpy = op+length;
         if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
-            || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
+            || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)))
         {
             if (partialDecoding)
             {
@@ -164,7 +175,7 @@ FORCE_INLINE int LZ4_decompress_generic(
                 if ((!endOnInput) && (cpy != oend)) goto _output_error;       /* Error : block decoding must stop exactly there */
                 if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   /* Error : input must be consumed */
             }
-            memcpy(op, ip, length);
+            memmove(op, ip, length);
             ip += length;
             op += length;
             break;     /* Necessarily EOF, due to parsing restrictions */
@@ -173,8 +184,9 @@ FORCE_INLINE int LZ4_decompress_generic(
         ip += length; op = cpy;
 
         /* get offset */
-        match = cpy - LZ4_readLE16(ip); ip+=2;
-        if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error;   /* Error : offset outside destination buffer */
+        offset = LZ4_readLE16(ip); ip+=2;
+        match = op - offset;
+        if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error;   /* Error : offset outside buffers */
 
         /* get matchlength */
         length = token & ML_MASK;
@@ -204,12 +216,12 @@ FORCE_INLINE int LZ4_decompress_generic(
             }
             else
             {
-                /* match encompass external dictionary and current segment */
+                /* match encompass external dictionary and current block */
                 size_t copySize = (size_t)(lowPrefix-match);
                 memcpy(op, dictEnd - copySize, copySize);
                 op += copySize;
                 copySize = length - copySize;
-                if (copySize > (size_t)(op-lowPrefix))   /* overlap within current segment */
+                if (copySize > (size_t)(op-lowPrefix))   /* overlap copy */
                 {
                     BYTE* const endOfMatch = op + copySize;
                     const BYTE* copyFrom = lowPrefix;
@@ -224,28 +236,30 @@ FORCE_INLINE int LZ4_decompress_generic(
             continue;
         }
 
-        /* copy repeated sequence */
+        /* copy match within block */
         cpy = op + length;
-        if (unlikely((op-match)<8))
+        if (unlikely(offset<8))
         {
-            const size_t dec64 = dec64table[op-match];
+            const int dec64 = dec64table[offset];
             op[0] = match[0];
             op[1] = match[1];
             op[2] = match[2];
             op[3] = match[3];
-            match += dec32table[op-match];
-            LZ4_copy4(op+4, match);
-            op += 8; match -= dec64;
-        } else { LZ4_copy8(op, match); op+=8; match+=8; }
+            match += dec32table[offset];
+            memcpy(op+4, match, 4);
+            match -= dec64;
+        } else { LZ4_copy8(op, match); match+=8; }
+        op += 8;
 
         if (unlikely(cpy>oend-12))
         {
-            if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals */
-            if (op < oend-8)
+            BYTE* const oCopyLimit = oend-(WILDCOPYLENGTH-1);
+            if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
+            if (op < oCopyLimit)
             {
-                LZ4_wildCopy(op, match, oend-8);
-                match += (oend-8) - op;
-                op = oend-8;
+                LZ4_wildCopy(op, match, oCopyLimit);
+                match += oCopyLimit - op;
+                op = oCopyLimit;
             }
             while (op<cpy) *op++ = *match++;
         }
diff --git a/payloads/libpayload/liblz4/lz4_wrapper.c b/payloads/libpayload/liblz4/lz4_wrapper.c
index 431fb55cc0..6de140e403 100644
--- a/payloads/libpayload/liblz4/lz4_wrapper.c
+++ b/payloads/libpayload/liblz4/lz4_wrapper.c
@@ -29,7 +29,6 @@
  * SUCH DAMAGE.
  */
 
-#include <assert.h>
 #include <endian.h>
 #include <libpayload.h>
 #include <lz4.h>
@@ -38,9 +37,28 @@
  * seem to be very inefficient in practice (at least on ARM64). Since libpayload
  * knows about endinaness and allows some basic assumptions (such as unaligned
  * access support), we can easily write the ones we need ourselves. */
-static u16 LZ4_readLE16(const void *src) { return le16toh(*(u16 *)src); }
-static void LZ4_copy4(void *dst, const void *src) { *(u32 *)dst = *(u32 *)src; }
-static void LZ4_copy8(void *dst, const void *src) { *(u64 *)dst = *(u64 *)src; }
+static uint16_t LZ4_readLE16(const void *src)
+{
+	return le16toh(*(uint16_t *)src);
+}
+static void LZ4_copy8(void *dst, const void *src)
+{
+/* ARM32 needs to be a special snowflake to prevent GCC from coalescing the
+ * access into LDRD/STRD (which don't support unaligned accesses). */
+#ifdef __arm__
+	uint32_t x0, x1;
+	asm volatile (
+		"ldr %[x0], [%[src]]\n\t"
+		"ldr %[x1], [%[src], #4]\n\t"
+		"str %[x0], [%[dst]]\n\t"
+		"str %[x1], [%[dst], #4]\n\t"
+		: [x0]"=r"(x0), [x1]"=r"(x1)
+		: [src]"r"(src), [dst]"r"(dst)
+		: "memory" );
+#else
+	*(uint64_t *)dst = *(const uint64_t *)src;
+#endif
+}
 
 typedef  uint8_t BYTE;
 typedef uint16_t U16;
@@ -52,58 +70,59 @@ typedef uint64_t U64;
 #define likely(expr) __builtin_expect((expr) != 0, 1)
 #define unlikely(expr) __builtin_expect((expr) != 0, 0)
 
-/* Unaltered (except removing unrelated code) from github.com/Cyan4973/lz4. */
-#include "lz4.c"	/* #include for inlining, do not link! */
+/* Unaltered (just removed unrelated code) from github.com/Cyan4973/lz4/dev. */
+#include "lz4.c.inc"	/* #include for inlining, do not link! */
 
 #define LZ4F_MAGICNUMBER 0x184D2204
 
 struct lz4_frame_header {
-	u32 magic;
+	uint32_t magic;
 	union {
-		u8 flags;
+		uint8_t flags;
 		struct {
-			u8 reserved0		: 2;
-			u8 has_content_checksum	: 1;
-			u8 has_content_size	: 1;
-			u8 has_block_checksum	: 1;
-			u8 independent_blocks	: 1;
-			u8 version		: 2;
+			uint8_t reserved0		: 2;
+			uint8_t has_content_checksum	: 1;
+			uint8_t has_content_size	: 1;
+			uint8_t has_block_checksum	: 1;
+			uint8_t independent_blocks	: 1;
+			uint8_t version			: 2;
 		};
 	};
 	union {
-		u8 block_descriptor;
+		uint8_t block_descriptor;
 		struct {
-			u8 reserved1		: 4;
-			u8 max_block_size	: 3;
-			u8 reserved2		: 1;
+			uint8_t reserved1		: 4;
+			uint8_t max_block_size		: 3;
+			uint8_t reserved2		: 1;
 		};
 	};
-	/* + u64 content_size iff has_content_size is set */
-	/* + u8 header_checksum */
+	/* + uint64_t content_size iff has_content_size is set */
+	/* + uint8_t header_checksum */
 } __attribute__((packed));
 
 struct lz4_block_header {
 	union {
-		u32 raw;
+		uint32_t raw;
 		struct {
-			u32 size		: 31;
-			u32 not_compressed	: 1;
+			uint32_t size		: 31;
+			uint32_t not_compressed	: 1;
 		};
 	};
 	/* + size bytes of data */
-	/* + u32 block_checksum iff has_block_checksum is set */
+	/* + uint32_t block_checksum iff has_block_checksum is set */
 } __attribute__((packed));
 
 size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn)
 {
 	const void *in = src;
 	void *out = dst;
+	size_t out_size = 0;
 	int has_block_checksum;
 
 	{ /* With in-place decompression the header may become invalid later. */
 		const struct lz4_frame_header *h = in;
 
-		if (srcn < sizeof(*h) + sizeof(u64) + sizeof(u8))
+		if (srcn < sizeof(*h) + sizeof(uint64_t) + sizeof(uint8_t))
 			return 0;	/* input overrun */
 
 		/* We assume there's always only a single, standard frame. */
@@ -117,25 +136,27 @@ size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn)
 
 		in += sizeof(*h);
 		if (h->has_content_size)
-			in += sizeof(u64);
-		in += sizeof(u8);
+			in += sizeof(uint64_t);
+		in += sizeof(uint8_t);
 	}
 
 	while (1) {
-		struct lz4_block_header b = { .raw = le32toh(*(u32 *)in) };
+		struct lz4_block_header b = { .raw = le32toh(*(uint32_t *)in) };
 		in += sizeof(struct lz4_block_header);
 
-		if (in - src + b.size > srcn)
-			return 0;		/* input overrun */
+		if ((size_t)(in - src) + b.size > srcn)
+			break;			/* input overrun */
 
-		if (!b.size)
-			return out - dst;	/* decompression successful */
+		if (!b.size) {
+			out_size = out - dst;
+			break;			/* decompression successful */
+		}
 
 		if (b.not_compressed) {
-			size_t size = MIN((u32)b.size, dst + dstn - out);
+			size_t size = MIN((uint32_t)b.size, dst + dstn - out);
 			memcpy(out, in, size);
 			if (size < b.size)
-				return 0;	/* output overrun */
+				break;		/* output overrun */
 			else
 				out += size;
 		} else {
@@ -144,15 +165,17 @@ size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn)
 					dst + dstn - out, endOnInputSize,
 					full, 0, noDict, out, NULL, 0);
 			if (ret < 0)
-				return 0;	/* decompression error */
+				break;		/* decompression error */
 			else
 				out += ret;
 		}
 
 		in += b.size;
 		if (has_block_checksum)
-			in += sizeof(u32);
+			in += sizeof(uint32_t);
 	}
+
+	return out_size;
 }
 
 size_t ulz4f(const void *src, void *dst)
diff --git a/src/Kconfig b/src/Kconfig
index 4cceb141c7..3def65b758 100644
--- a/src/Kconfig
+++ b/src/Kconfig
@@ -156,6 +156,17 @@ config COMPRESS_RAMSTAGE
 	  that decompression might slow down booting if the boot flash
 	  is connected through a slow link (i.e. SPI).
 
+config COMPRESS_PRERAM_STAGES
+	bool "Compress romstage and verstage with LZ4"
+	default y if !ARCH_X86
+	default n
+	help
+	  Compress romstage and (if it exists) verstage with LZ4 to save flash
+	  space and speed up boot, since the time for reading the image from SPI
+	  (and in the vboot case verifying it) is usually much greater than the
+	  time spent decompressing. Doesn't work for XIP stages (assume all
+	  ARCH_X86 for now) for obvious reasons.
+
 config INCLUDE_CONFIG_FILE
 	bool "Include the coreboot .config file into the ROM image"
 	default y
diff --git a/src/commonlib/Makefile.inc b/src/commonlib/Makefile.inc
index 19f9ba3b8d..2752922067 100644
--- a/src/commonlib/Makefile.inc
+++ b/src/commonlib/Makefile.inc
@@ -16,3 +16,8 @@ verstage-y += cbfs.c
 romstage-y += cbfs.c
 ramstage-y += cbfs.c
 smm-y += cbfs.c
+
+bootblock-y += lz4_wrapper.c
+verstage-y += lz4_wrapper.c
+romstage-y += lz4_wrapper.c
+ramstage-y += lz4_wrapper.c
diff --git a/src/commonlib/include/commonlib/cbfs_serialized.h b/src/commonlib/include/commonlib/cbfs_serialized.h
index bea5d6bb9d..c1a1c3b542 100644
--- a/src/commonlib/include/commonlib/cbfs_serialized.h
+++ b/src/commonlib/include/commonlib/cbfs_serialized.h
@@ -56,6 +56,7 @@
 
 #define CBFS_COMPRESS_NONE  0
 #define CBFS_COMPRESS_LZMA  1
+#define CBFS_COMPRESS_LZ4   2
 
 /** These are standard component types for well known
     components (i.e - those that coreboot needs to consume.
diff --git a/src/commonlib/include/commonlib/compression.h b/src/commonlib/include/commonlib/compression.h
new file mode 100644
index 0000000000..428ee42e65
--- /dev/null
+++ b/src/commonlib/include/commonlib/compression.h
@@ -0,0 +1,34 @@
+/*
+ * This file is part of the coreboot project.
+ *
+ * Copyright 2016 Google Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _COMMONLIB_COMPRESSION_H_
+#define _COMMONLIB_COMPRESSION_H_
+
+#include <stddef.h>
+
+/* Decompresses an LZ4F image (multiple LZ4 blocks with frame header) from src
+ * to dst, ensuring that it doesn't read more than srcn bytes and doesn't write
+ * more than dstn. Buffer sizes must stay below 2GB. Can decompress files loaded
+ * to the end of a buffer in-place, as long as buffer is larger than the final
+ * output size. (Usually just a few bytes, but may be up to (8 + dstn/255) in
+ * worst case. Will reliably return an error if buffer was too small.)
+ * Returns amount of decompressed bytes, or 0 on error.
+ */
+size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn);
+
+/* Same as ulz4fn() but does not perform any bounds checks. */
+size_t ulz4f(const void *src, void *dst);
+
+#endif	/* _COMMONLIB_COMPRESSION_H_ */
diff --git a/src/commonlib/include/commonlib/timestamp_serialized.h b/src/commonlib/include/commonlib/timestamp_serialized.h
index 6ce087924d..c665964a0d 100644
--- a/src/commonlib/include/commonlib/timestamp_serialized.h
+++ b/src/commonlib/include/commonlib/timestamp_serialized.h
@@ -47,6 +47,8 @@ enum timestamp_id {
 	TS_END_COPYROM = 14,
 	TS_START_ULZMA = 15,
 	TS_END_ULZMA = 16,
+	TS_START_ULZ4F = 17,
+	TS_END_ULZ4F = 18,
 	TS_DEVICE_ENUMERATE = 30,
 	TS_DEVICE_CONFIGURE = 40,
 	TS_DEVICE_ENABLE = 50,
diff --git a/src/commonlib/lz4.c.inc b/src/commonlib/lz4.c.inc
new file mode 100644
index 0000000000..b3be4e5b44
--- /dev/null
+++ b/src/commonlib/lz4.c.inc
@@ -0,0 +1,280 @@
+/*
+   LZ4 - Fast LZ compression algorithm
+   Copyright (C) 2011-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : https://github.com/Cyan4973/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+
+/**************************************
+*  Reading and writing into memory
+**************************************/
+
+/* customized variant of memcpy, which can overwrite up to 7 bytes beyond dstEnd */
+static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+    BYTE* d = (BYTE*)dstPtr;
+    const BYTE* s = (const BYTE*)srcPtr;
+    BYTE* const e = (BYTE*)dstEnd;
+
+#if 0
+    const size_t l2 = 8 - (((size_t)d) & (sizeof(void*)-1));
+    LZ4_copy8(d,s); if (d>e-9) return;
+    d+=l2; s+=l2;
+#endif /* join to align */
+
+    do { LZ4_copy8(d,s); d+=8; s+=8; } while (d<e);
+}
+
+
+/**************************************
+*  Common Constants
+**************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+
+#define ML_BITS  4
+#define ML_MASK  ((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+
+/**************************************
+*  Local Structures and types
+**************************************/
+typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { full = 0, partial = 1 } earlyEnd_directive;
+
+
+
+/*******************************
+*  Decompression functions
+*******************************/
+/*
+ * This generic decompression function cover all use cases.
+ * It shall be instantiated several times, using different sets of directives
+ * Note that it is essential this generic function is really inlined,
+ * in order to remove useless branches during compilation optimization.
+ */
+FORCE_INLINE int LZ4_decompress_generic(
+                 const char* const source,
+                 char* const dest,
+                 int inputSize,
+                 int outputSize,         /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */
+
+                 int endOnInput,         /* endOnOutputSize, endOnInputSize */
+                 int partialDecoding,    /* full, partial */
+                 int targetOutputSize,   /* only used if partialDecoding==partial */
+                 int dict,               /* noDict, withPrefix64k, usingExtDict */
+                 const BYTE* const lowPrefix,  /* == dest if dict == noDict */
+                 const BYTE* const dictStart,  /* only if dict==usingExtDict */
+                 const size_t dictSize         /* note : = 0 if noDict */
+                 )
+{
+    /* Local Variables */
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* const iend = ip + inputSize;
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + outputSize;
+    BYTE* cpy;
+    BYTE* oexit = op + targetOutputSize;
+    const BYTE* const lowLimit = lowPrefix - dictSize;
+
+    const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
+    const unsigned dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4};
+    const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+
+    const int safeDecode = (endOnInput==endOnInputSize);
+    const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
+    const int inPlaceDecode = ((ip >= op) && (ip < oend));
+
+
+    /* Special cases */
+    if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT;                         /* targetOutputSize too high => decode everything */
+    if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1;  /* Empty output buffer */
+    if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1);
+
+
+    /* Main Loop */
+    while (1)
+    {
+        unsigned token;
+        size_t length;
+        const BYTE* match;
+        size_t offset;
+
+        if (unlikely((inPlaceDecode) && (op + WILDCOPYLENGTH > ip))) goto _output_error;   /* output stream ran over input stream */
+
+        /* get literal length */
+        token = *ip++;
+        if ((length=(token>>ML_BITS)) == RUN_MASK)
+        {
+            unsigned s;
+            do
+            {
+                s = *ip++;
+                length += s;
+            }
+            while ( likely(endOnInput ? ip<iend-RUN_MASK : 1) && (s==255) );
+            if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)(op))) goto _output_error;   /* overflow detection */
+            if ((safeDecode) && unlikely((size_t)(ip+length)<(size_t)(ip))) goto _output_error;   /* overflow detection */
+        }
+
+        /* copy literals */
+        cpy = op+length;
+        if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
+            || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)))
+        {
+            if (partialDecoding)
+            {
+                if (cpy > oend) goto _output_error;                           /* Error : write attempt beyond end of output buffer */
+                if ((endOnInput) && (ip+length > iend)) goto _output_error;   /* Error : read attempt beyond end of input buffer */
+            }
+            else
+            {
+                if ((!endOnInput) && (cpy != oend)) goto _output_error;       /* Error : block decoding must stop exactly there */
+                if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   /* Error : input must be consumed */
+            }
+            memmove(op, ip, length);
+            ip += length;
+            op += length;
+            break;     /* Necessarily EOF, due to parsing restrictions */
+        }
+        LZ4_wildCopy(op, ip, cpy);
+        ip += length; op = cpy;
+
+        /* get offset */
+        offset = LZ4_readLE16(ip); ip+=2;
+        match = op - offset;
+        if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error;   /* Error : offset outside buffers */
+
+        /* get matchlength */
+        length = token & ML_MASK;
+        if (length == ML_MASK)
+        {
+            unsigned s;
+            do
+            {
+                if ((endOnInput) && (ip > iend-LASTLITERALS)) goto _output_error;
+                s = *ip++;
+                length += s;
+            } while (s==255);
+            if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)op)) goto _output_error;   /* overflow detection */
+        }
+        length += MINMATCH;
+
+        /* check external dictionary */
+        if ((dict==usingExtDict) && (match < lowPrefix))
+        {
+            if (unlikely(op+length > oend-LASTLITERALS)) goto _output_error;   /* doesn't respect parsing restriction */
+
+            if (length <= (size_t)(lowPrefix-match))
+            {
+                /* match can be copied as a single segment from external dictionary */
+                match = dictEnd - (lowPrefix-match);
+                memmove(op, match, length); op += length;
+            }
+            else
+            {
+                /* match encompass external dictionary and current block */
+                size_t copySize = (size_t)(lowPrefix-match);
+                memcpy(op, dictEnd - copySize, copySize);
+                op += copySize;
+                copySize = length - copySize;
+                if (copySize > (size_t)(op-lowPrefix))   /* overlap copy */
+                {
+                    BYTE* const endOfMatch = op + copySize;
+                    const BYTE* copyFrom = lowPrefix;
+                    while (op < endOfMatch) *op++ = *copyFrom++;
+                }
+                else
+                {
+                    memcpy(op, lowPrefix, copySize);
+                    op += copySize;
+                }
+            }
+            continue;
+        }
+
+        /* copy match within block */
+        cpy = op + length;
+        if (unlikely(offset<8))
+        {
+            const int dec64 = dec64table[offset];
+            op[0] = match[0];
+            op[1] = match[1];
+            op[2] = match[2];
+            op[3] = match[3];
+            match += dec32table[offset];
+            memcpy(op+4, match, 4);
+            match -= dec64;
+        } else { LZ4_copy8(op, match); match+=8; }
+        op += 8;
+
+        if (unlikely(cpy>oend-12))
+        {
+            BYTE* const oCopyLimit = oend-(WILDCOPYLENGTH-1);
+            if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
+            if (op < oCopyLimit)
+            {
+                LZ4_wildCopy(op, match, oCopyLimit);
+                match += oCopyLimit - op;
+                op = oCopyLimit;
+            }
+            while (op<cpy) *op++ = *match++;
+        }
+        else
+            LZ4_wildCopy(op, match, cpy);
+        op=cpy;   /* correction */
+    }
+
+    /* end of decoding */
+    if (endOnInput)
+       return (int) (((char*)op)-dest);     /* Nb of output bytes decoded */
+    else
+       return (int) (((const char*)ip)-source);   /* Nb of input bytes read */
+
+    /* Overflow error detected */
+_output_error:
+    return (int) (-(((const char*)ip)-source))-1;
+}
diff --git a/src/commonlib/lz4_wrapper.c b/src/commonlib/lz4_wrapper.c
new file mode 100644
index 0000000000..ea7b90d8fd
--- /dev/null
+++ b/src/commonlib/lz4_wrapper.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <commonlib/compression.h>
+#include <commonlib/endian.h>
+#include <commonlib/helpers.h>
+#include <stdint.h>
+#include <string.h>
+
+/* LZ4 comes with its own supposedly portable memory access functions, but they
+ * seem to be very inefficient in practice (at least on ARM64). Since coreboot
+ * knows about endinaness and allows some basic assumptions (such as unaligned
+ * access support), we can easily write the ones we need ourselves. */
+static uint16_t LZ4_readLE16(const void *src)
+{
+	return read_le16(src);
+}
+static void LZ4_copy8(void *dst, const void *src)
+{
+/* ARM32 needs to be a special snowflake to prevent GCC from coalescing the
+ * access into LDRD/STRD (which don't support unaligned accesses). */
+#ifdef __arm__	/* ARMv < 6 doesn't support unaligned accesses at all. */
+	#if defined(__COREBOOT_ARM_ARCH__) && __COREBOOT_ARM_ARCH__ < 6
+		int i;
+		for (i = 0; i < 8; i++)
+			((uint8_t *)dst)[i] = ((uint8_t *)src)[i];
+	#else
+		uint32_t x0, x1;
+		asm volatile (
+			"ldr %[x0], [%[src]]\n\t"
+			"ldr %[x1], [%[src], #4]\n\t"
+			"str %[x0], [%[dst]]\n\t"
+			"str %[x1], [%[dst], #4]\n\t"
+			: [x0]"=r"(x0), [x1]"=r"(x1)
+			: [src]"r"(src), [dst]"r"(dst)
+			: "memory" );
+	#endif
+#else
+	*(uint64_t *)dst = *(const uint64_t *)src;
+#endif
+}
+
+typedef  uint8_t BYTE;
+typedef uint16_t U16;
+typedef uint32_t U32;
+typedef  int32_t S32;
+typedef uint64_t U64;
+
+#define FORCE_INLINE static inline __attribute__((always_inline))
+#define likely(expr) __builtin_expect((expr) != 0, 1)
+#define unlikely(expr) __builtin_expect((expr) != 0, 0)
+
+/* Unaltered (just removed unrelated code) from github.com/Cyan4973/lz4/dev. */
+#include "lz4.c.inc"	/* #include for inlining, do not link! */
+
+#define LZ4F_MAGICNUMBER 0x184D2204
+
+struct lz4_frame_header {
+	uint32_t magic;
+	union {
+		uint8_t flags;
+		struct {
+			uint8_t reserved0		: 2;
+			uint8_t has_content_checksum	: 1;
+			uint8_t has_content_size	: 1;
+			uint8_t has_block_checksum	: 1;
+			uint8_t independent_blocks	: 1;
+			uint8_t version			: 2;
+		};
+	};
+	union {
+		uint8_t block_descriptor;
+		struct {
+			uint8_t reserved1		: 4;
+			uint8_t max_block_size		: 3;
+			uint8_t reserved2		: 1;
+		};
+	};
+	/* + uint64_t content_size iff has_content_size is set */
+	/* + uint8_t header_checksum */
+} __attribute__((packed));
+
+struct lz4_block_header {
+	union {
+		uint32_t raw;
+		struct {
+			uint32_t size		: 31;
+			uint32_t not_compressed	: 1;
+		};
+	};
+	/* + size bytes of data */
+	/* + uint32_t block_checksum iff has_block_checksum is set */
+} __attribute__((packed));
+
+size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn)
+{
+	const void *in = src;
+	void *out = dst;
+	size_t out_size = 0;
+	int has_block_checksum;
+
+	{ /* With in-place decompression the header may become invalid later. */
+		const struct lz4_frame_header *h = in;
+
+		if (srcn < sizeof(*h) + sizeof(uint64_t) + sizeof(uint8_t))
+			return 0;	/* input overrun */
+
+		/* We assume there's always only a single, standard frame. */
+		if (read_le32(&h->magic) != LZ4F_MAGICNUMBER || h->version != 1)
+			return 0;	/* unknown format */
+		if (h->reserved0 || h->reserved1 || h->reserved2)
+			return 0;	/* reserved must be zero */
+		if (!h->independent_blocks)
+			return 0;	/* we don't support block dependency */
+		has_block_checksum = h->has_block_checksum;
+
+		in += sizeof(*h);
+		if (h->has_content_size)
+			in += sizeof(uint64_t);
+		in += sizeof(uint8_t);
+	}
+
+	while (1) {
+		struct lz4_block_header b = { .raw = read_le32(in) };
+		in += sizeof(struct lz4_block_header);
+
+		if ((size_t)(in - src) + b.size > srcn)
+			break;			/* input overrun */
+
+		if (!b.size) {
+			out_size = out - dst;
+			break;			/* decompression successful */
+		}
+
+		if (b.not_compressed) {
+			size_t size = MIN((uint32_t)b.size, dst + dstn - out);
+			memcpy(out, in, size);
+			if (size < b.size)
+				break;		/* output overrun */
+			else
+				out += size;
+		} else {
+			/* constant folding essential, do not touch params! */
+			int ret = LZ4_decompress_generic(in, out, b.size,
+					dst + dstn - out, endOnInputSize,
+					full, 0, noDict, out, NULL, 0);
+			if (ret < 0)
+				break;		/* decompression error */
+			else
+				out += ret;
+		}
+
+		in += b.size;
+		if (has_block_checksum)
+			in += sizeof(uint32_t);
+	}
+
+	return out_size;
+}
+
+size_t ulz4f(const void *src, void *dst)
+{
+	/* LZ4 uses signed size parameters, so can't just use ((u32)-1) here. */
+	return ulz4fn(src, 1*GiB, dst, 1*GiB);
+}
diff --git a/src/include/cbfs.h b/src/include/cbfs.h
index 0d95387940..2d1921898a 100644
--- a/src/include/cbfs.h
+++ b/src/include/cbfs.h
@@ -35,6 +35,14 @@ int cbfs_boot_locate(struct cbfsf *fh, const char *name, uint32_t *type);
  * the mapping and sets the size of the file. */
 void *cbfs_boot_map_with_leak(const char *name, uint32_t type, size_t *size);
 
+/* Load |in_size| bytes from |rdev| at |offset| to the |buffer_size| bytes
+ * large |buffer|, decompressing it according to |compression| in the process.
+ * Returns the decompressed file size, or 0 on error.
+ * LZMA files will be mapped for decompression. LZ4 files will be decompressed
+ * in-place with the buffer size requirements outlined in compression.h. */
+size_t cbfs_load_and_decompress(const struct region_device *rdev, size_t offset,
+	size_t in_size, void *buffer, size_t buffer_size, uint32_t compression);
+
 /* Load stage into memory filling in prog. Return 0 on success. < 0 on error. */
 int cbfs_prog_stage_load(struct prog *prog);
 
diff --git a/src/lib/cbfs.c b/src/lib/cbfs.c
index 55a8536a36..82bfa2d8ad 100644
--- a/src/lib/cbfs.c
+++ b/src/lib/cbfs.c
@@ -19,9 +19,11 @@
 #include <stdlib.h>
 #include <boot_device.h>
 #include <cbfs.h>
+#include <commonlib/compression.h>
 #include <endian.h>
 #include <lib.h>
 #include <symbols.h>
+#include <timestamp.h>
 
 #define ERROR(x...) printk(BIOS_ERR, "CBFS: " x)
 #define LOG(x...) printk(BIOS_INFO, "CBFS: " x)
@@ -68,13 +70,57 @@ void *cbfs_boot_map_with_leak(const char *name, uint32_t type, size_t *size)
 	return rdev_mmap(&fh.data, 0, fsize);
 }
 
-static size_t inflate(void *src, void *dst)
+size_t cbfs_load_and_decompress(const struct region_device *rdev, size_t offset,
+	size_t in_size, void *buffer, size_t buffer_size, uint32_t compression)
 {
-	if (ENV_BOOTBLOCK || ENV_VERSTAGE)
-		return 0;
-	if (ENV_ROMSTAGE && !IS_ENABLED(CONFIG_COMPRESS_RAMSTAGE))
+	size_t out_size;
+
+	switch (compression) {
+	case CBFS_COMPRESS_NONE:
+		if (rdev_readat(rdev, buffer, offset, in_size) != in_size)
+			return 0;
+		return in_size;
+
+	case CBFS_COMPRESS_LZ4:
+		if ((ENV_BOOTBLOCK || ENV_VERSTAGE) &&
+		    !IS_ENABLED(CONFIG_COMPRESS_PRERAM_STAGES))
+			return 0;
+
+		/* Load the compressed image to the end of the available memory
+		 * area for in-place decompression. It is the responsibility of
+		 * the caller to ensure that buffer_size is large enough
+		 * (see compression.h, guaranteed by cbfstool for stages). */
+		void *compr_start = buffer + buffer_size - in_size;
+		if (rdev_readat(rdev, compr_start, offset, in_size) != in_size)
+			return 0;
+
+		timestamp_add_now(TS_START_ULZ4F);
+		out_size = ulz4fn(compr_start, in_size, buffer, buffer_size);
+		timestamp_add_now(TS_END_ULZ4F);
+		return out_size;
+
+	case CBFS_COMPRESS_LZMA:
+		if (ENV_BOOTBLOCK || ENV_VERSTAGE)
+			return 0;
+		if (ENV_ROMSTAGE && !IS_ENABLED(CONFIG_COMPRESS_RAMSTAGE))
+			return 0;
+
+		void *map = rdev_mmap(rdev, offset, in_size);
+		if (map == NULL)
+			return 0;
+
+		/* Note: timestamp not useful for memory-mapped media (x86) */
+		timestamp_add_now(TS_START_ULZMA);
+		out_size = ulzman(map, in_size, buffer, buffer_size);
+		timestamp_add_now(TS_END_ULZMA);
+
+		rdev_munmap(rdev, map);
+
+		return out_size;
+
+	default:
 		return 0;
-	return ulzma(src, dst);
+	}
 }
 
 static inline int tohex4(unsigned int c)
@@ -152,22 +198,9 @@ int cbfs_prog_stage_load(struct prog *pstage)
 			goto out;
 	}
 
-	if (stage.compression == CBFS_COMPRESS_NONE) {
-		if (rdev_readat(fh, load, foffset, fsize) != fsize)
-			return -1;
-	} else if (stage.compression == CBFS_COMPRESS_LZMA) {
-		void *map = rdev_mmap(fh, foffset, fsize);
-
-		if (map == NULL)
-			return -1;
-
-		fsize = inflate(map, load);
-
-		rdev_munmap(fh, map);
-
-		if (!fsize)
-			return -1;
-	} else
+	fsize = cbfs_load_and_decompress(fh, foffset, fsize, load,
+					 stage.memlen, stage.compression);
+	if (!fsize)
 		return -1;
 
 	/* Clear area not covered by file. */
diff --git a/src/lib/lzma.c b/src/lib/lzma.c
index 5566cd5dfc..d0b4c355b1 100644
--- a/src/lib/lzma.c
+++ b/src/lib/lzma.c
@@ -29,8 +29,6 @@ size_t ulzman(const void *src, size_t srcn, void *dst, size_t dstn)
 	MAYBE_STATIC unsigned char scratchpad[15980];
 	const unsigned char *cp;
 
-	/* Note: these timestamps aren't useful for memory-mapped media (x86) */
-	timestamp_add_now(TS_START_ULZMA);
 	memcpy(properties, src, LZMA_PROPERTIES_SIZE);
 	/* The outSize in LZMA stream is a 64bit integer stored in little-endian
 	 * (ref: lzma.cc@LZMACompress: put_64). To prevent accessing by
@@ -55,7 +53,6 @@ size_t ulzman(const void *src, size_t srcn, void *dst, size_t dstn)
 		printk(BIOS_WARNING, "lzma: Decoding error = %d\n", res);
 		return 0;
 	}
-	timestamp_add_now(TS_END_ULZMA);
 	return outProcessed;
 }
 
diff --git a/src/lib/rmodule.c b/src/lib/rmodule.c
index 628195cafa..585fb5f2ef 100644
--- a/src/lib/rmodule.c
+++ b/src/lib/rmodule.c
@@ -259,8 +259,6 @@ int rmodule_stage_load(struct rmod_stage_load *rsl)
 	struct cbfs_stage stage;
 	void *rmod_loc;
 	struct region_device *fh;
-	const int use_lzma = ENV_RAMSTAGE
-		|| (ENV_ROMSTAGE && IS_ENABLED(CONFIG_COMPRESS_RAMSTAGE));
 
 	if (rsl->prog == NULL || prog_name(rsl->prog) == NULL)
 		return -1;
@@ -284,24 +282,8 @@ int rmodule_stage_load(struct rmod_stage_load *rsl)
 	printk(BIOS_INFO, "Decompressing stage %s @ 0x%p (%d bytes)\n",
 	       prog_name(rsl->prog), rmod_loc, stage.memlen);
 
-	if (stage.compression == CBFS_COMPRESS_NONE) {
-		if (rdev_readat(fh, rmod_loc, sizeof(stage), stage.len) !=
-		    stage.len)
-			return -1;
-	} else if (use_lzma && (stage.compression == CBFS_COMPRESS_LZMA)) {
-		size_t fsize;
-		void *map = rdev_mmap(fh, sizeof(stage), stage.len);
-
-		if (map == NULL)
-			return -1;
-
-		fsize = ulzman(map, stage.len, rmod_loc, stage.memlen);
-
-		rdev_munmap(fh, map);
-
-		if (!fsize)
-			return -1;
-	} else
+	if (!cbfs_load_and_decompress(fh, sizeof(stage), stage.len, rmod_loc,
+				      stage.memlen, stage.compression))
 		return -1;
 
 	if (rmodule_parse(rmod_loc, &rmod_stage))
diff --git a/src/lib/selfboot.c b/src/lib/selfboot.c
index f3a1e52728..7d3e2dd5f6 100644
--- a/src/lib/selfboot.c
+++ b/src/lib/selfboot.c
@@ -14,6 +14,7 @@
  * GNU General Public License for more details.
  */
 
+#include <commonlib/compression.h>
 #include <console/console.h>
 #include <cpu/cpu.h>
 #include <endian.h>
@@ -25,6 +26,7 @@
 #include <lib.h>
 #include <bootmem.h>
 #include <program_loading.h>
+#include <timestamp.h>
 
 static const unsigned long lb_start = (unsigned long)&_program;
 static const unsigned long lb_end = (unsigned long)&_eprogram;
@@ -386,7 +388,18 @@ static int load_self_segments(
 			switch(ptr->compression) {
 				case CBFS_COMPRESS_LZMA: {
 					printk(BIOS_DEBUG, "using LZMA\n");
+					timestamp_add_now(TS_START_ULZMA);
 					len = ulzman(src, len, dest, memsz);
+					timestamp_add_now(TS_END_ULZMA);
+					if (!len) /* Decompression Error. */
+						return 0;
+					break;
+				}
+				case CBFS_COMPRESS_LZ4: {
+					printk(BIOS_DEBUG, "using LZ4\n");
+					timestamp_add_now(TS_START_ULZ4F);
+					len = ulz4fn(src, len, dest, memsz);
+					timestamp_add_now(TS_END_ULZ4F);
 					if (!len) /* Decompression Error. */
 						return 0;
 					break;
diff --git a/src/vendorcode/google/chromeos/vboot2/Makefile.inc b/src/vendorcode/google/chromeos/vboot2/Makefile.inc
index 61cf71df5a..2ca8a1adc3 100644
--- a/src/vendorcode/google/chromeos/vboot2/Makefile.inc
+++ b/src/vendorcode/google/chromeos/vboot2/Makefile.inc
@@ -72,6 +72,7 @@ ifeq ($(CONFIG_SEPARATE_VERSTAGE),y)
 cbfs-files-$(CONFIG_SEPARATE_VERSTAGE) += $(CONFIG_CBFS_PREFIX)/verstage
 $(CONFIG_CBFS_PREFIX)/verstage-file := $(objcbfs)/verstage.elf
 $(CONFIG_CBFS_PREFIX)/verstage-type := stage
+$(CONFIG_CBFS_PREFIX)/verstage-compression := $(CBFS_PRERAM_COMPRESS_FLAG)
 # Verstage on x86 expected to be xip.
 ifeq ($(CONFIG_ARCH_VERSTAGE_X86_32)$(CONFIG_ARCH_VERSTAGE_X86_64),y)
 $(CONFIG_CBFS_PREFIX)/verstage-options := -a 64 --xip -S ".car.data"
diff --git a/util/cbfstool/Makefile.inc b/util/cbfstool/Makefile.inc
index 8e04411206..f4806168af 100644
--- a/util/cbfstool/Makefile.inc
+++ b/util/cbfstool/Makefile.inc
@@ -2,19 +2,21 @@ cbfsobj :=
 cbfsobj += cbfstool.o
 cbfsobj += common.o
 cbfsobj += compress.o
-cbfsobj += cbfs.o
 cbfsobj += cbfs_hash.o
 cbfsobj += cbfs_image.o
 cbfsobj += cbfs-mkstage.o
 cbfsobj += cbfs-mkpayload.o
 cbfsobj += elfheaders.o
-cbfsobj += mem_pool.o
-cbfsobj += region.o
 cbfsobj += rmodule.o
 cbfsobj += xdr.o
 cbfsobj += fit.o
 cbfsobj += partitioned_file.o
+# COMMONLIB
+cbfsobj += cbfs.o
 cbfsobj += fsp1_1_relocate.o
+cbfsobj += lz4_wrapper.o
+cbfsobj += mem_pool.o
+cbfsobj += region.o
 # LZMA
 cbfsobj += lzma.o
 cbfsobj += LzFind.o
@@ -25,6 +27,11 @@ cbfsobj += 2sha_utility.o
 cbfsobj += 2sha1.o
 cbfsobj += 2sha256.o
 cbfsobj += 2sha512.o
+# LZ4
+cbfsobj += lz4.o
+cbfsobj += lz4hc.o
+cbfsobj += lz4frame.o
+cbfsobj += xxhash.o
 # FMAP
 cbfsobj += fmap.o
 cbfsobj += kv_pair.o
@@ -110,6 +117,10 @@ $(objutil)/cbfstool/%.o: $(top)/src/commonlib/%.c
 	printf "    HOSTCC     $(subst $(objutil)/,,$(@))\n"
 	$(HOSTCC) $(TOOLCPPFLAGS) $(TOOLCFLAGS) $(HOSTCFLAGS) -c -o $@ $<
 
+$(objutil)/cbfstool/%.o: $(top)/util/cbfstool/lz4/lib/%.c
+	printf "    HOSTCC     $(subst $(objutil)/,,$(@))\n"
+	$(HOSTCC) $(TOOLCPPFLAGS) $(TOOLCFLAGS) $(HOSTCFLAGS) -c -o $@ $<
+
 $(objutil)/cbfstool/cbfstool: $(addprefix $(objutil)/cbfstool/,$(cbfsobj))
 	printf "    HOSTCC     $(subst $(objutil)/,,$(@)) (link)\n"
 	$(HOSTCC) $(TOOLLDFLAGS) -o $@ $(addprefix $(objutil)/cbfstool/,$(cbfsobj))
@@ -137,6 +148,8 @@ $(objutil)/cbfstool/2sha1.o: TOOLCFLAGS += -Wno-cast-qual
 $(objutil)/cbfstool/region.o: TOOLCFLAGS += -Wno-sign-compare -Wno-cast-qual
 $(objutil)/cbfstool/cbfs.o: TOOLCFLAGS += -Wno-sign-compare -Wno-cast-qual
 $(objutil)/cbfstool/mem_pool.o: TOOLCFLAGS += -Wno-sign-compare -Wno-cast-qual
+# Tolerate lz4 warnings
+$(objutil)/cbfstool/lz4.o: TOOLCFLAGS += -Wno-missing-prototypes
 
 $(objutil)/cbfstool/fmd.o: $(objutil)/cbfstool/fmd_parser.h
 $(objutil)/cbfstool/fmd.o: $(objutil)/cbfstool/fmd_scanner.h
diff --git a/util/cbfstool/cbfs-mkstage.c b/util/cbfstool/cbfs-mkstage.c
index 9d8865828e..53d3edbd3e 100644
--- a/util/cbfstool/cbfs-mkstage.c
+++ b/util/cbfstool/cbfs-mkstage.c
@@ -26,6 +26,8 @@
 #include "cbfs.h"
 #include "rmodule.h"
 
+#include <commonlib/compression.h>
+
 /* Checks if program segment contains the ignored section */
 static int is_phdr_ignored(Elf64_Phdr *phdr, Elf64_Shdr *shdr)
 {
@@ -267,8 +269,44 @@ int parse_elf_to_stage(const struct buffer *input, struct buffer *output,
 		     "- disabled.\n");
 		memcpy(output->data + sizeof(struct cbfs_stage),
 		       buffer, data_end - data_start);
+		outlen = data_end - data_start;
 		algo = CBFS_COMPRESS_NONE;
 	}
+
+	/* Check for enough BSS scratch space to decompress LZ4 in-place. */
+	if (algo == CBFS_COMPRESS_LZ4) {
+		size_t result;
+		size_t memlen = mem_end - data_start;
+		size_t compressed_size = outlen;
+		char *compare_buffer = malloc(memlen);
+		char *start = compare_buffer + memlen - compressed_size;
+
+		if (compare_buffer == NULL) {
+			ERROR("Can't allocate memory!\n");
+			free(buffer);
+			goto err;
+		}
+
+		memcpy(start, output->data + sizeof(struct cbfs_stage),
+		       compressed_size);
+		result = ulz4fn(start, compressed_size, compare_buffer, memlen);
+
+		if (result == 0) {
+			ERROR("Not enough scratch space to decompress LZ4 in-place -- increase BSS size or disable compression!\n");
+			free(compare_buffer);
+			free(buffer);
+			goto err;
+		}
+		if (result != data_end - data_start ||
+		    memcmp(compare_buffer, buffer, data_end - data_start)) {
+			ERROR("LZ4 compression BUG! Report to mailing list.\n");
+			free(compare_buffer);
+			free(buffer);
+			goto err;
+		}
+		free(compare_buffer);
+	}
+
 	free(buffer);
 
 	/* Set up for output marshaling. */
diff --git a/util/cbfstool/cbfs_image.c b/util/cbfstool/cbfs_image.c
index 95e6f42b17..6ed26743cd 100644
--- a/util/cbfstool/cbfs_image.c
+++ b/util/cbfstool/cbfs_image.c
@@ -54,7 +54,8 @@
 static const struct typedesc_t types_cbfs_compression[] = {
 	{CBFS_COMPRESS_NONE, "none"},
 	{CBFS_COMPRESS_LZMA, "LZMA"},
-	{0, NULL}
+	{CBFS_COMPRESS_LZ4, "LZ4"},
+	{0, NULL},
 };
 
 static const char *lookup_name_by_type(const struct typedesc_t *desc, uint32_t type,
diff --git a/util/cbfstool/common.h b/util/cbfstool/common.h
index 42ae1e2b3d..ea6f3cf06c 100644
--- a/util/cbfstool/common.h
+++ b/util/cbfstool/common.h
@@ -165,7 +165,11 @@ typedef int (*comp_func_ptr) (char *in, int in_len, char *out, int *out_len);
 typedef int (*decomp_func_ptr) (char *in, int in_len, char *out, int out_len,
 				size_t *actual_size);
 
-enum comp_algo { CBFS_COMPRESS_NONE = 0, CBFS_COMPRESS_LZMA = 1 };
+enum comp_algo {
+	CBFS_COMPRESS_NONE = 0,
+	CBFS_COMPRESS_LZMA = 1,
+	CBFS_COMPRESS_LZ4 = 2,
+};
 
 comp_func_ptr compression_function(enum comp_algo algo);
 decomp_func_ptr decompression_function(enum comp_algo algo);
diff --git a/util/cbfstool/compress.c b/util/cbfstool/compress.c
index 51353317f4..a6a0df4dd1 100644
--- a/util/cbfstool/compress.c
+++ b/util/cbfstool/compress.c
@@ -20,7 +20,42 @@
 
 #include <string.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include "common.h"
+#include "lz4/lib/lz4frame.h"
+#include <commonlib/compression.h>
+
+static int lz4_compress(char *in, int in_len, char *out, int *out_len)
+{
+	LZ4F_preferences_t prefs = {
+		.compressionLevel = 20,
+		.frameInfo = {
+			.blockSizeID = max4MB,
+			.blockMode = blockIndependent,
+			.contentChecksumFlag = noContentChecksum,
+		},
+	};
+	size_t worst_size = LZ4F_compressFrameBound(in_len, &prefs);
+	void *bounce = malloc(worst_size);
+	if (!bounce)
+		return -1;
+	*out_len = LZ4F_compressFrame(bounce, worst_size, in, in_len, &prefs);
+	if (LZ4F_isError(*out_len) || *out_len >= in_len)
+		return -1;
+	memcpy(out, bounce, *out_len);
+	return 0;
+}
+
+static int lz4_decompress(char *in, int in_len, char *out, int out_len,
+			  size_t *actual_size)
+{
+	size_t result = ulz4fn(in, in_len, out, out_len);
+	if (result == 0)
+		return -1;
+	if (actual_size != NULL)
+		*actual_size = result;
+	return 0;
+}
 
 static int lzma_compress(char *in, int in_len, char *out, int *out_len)
 {
@@ -58,6 +93,9 @@ comp_func_ptr compression_function(enum comp_algo algo)
 	case CBFS_COMPRESS_LZMA:
 		compress = lzma_compress;
 		break;
+	case CBFS_COMPRESS_LZ4:
+		compress = lz4_compress;
+		break;
 	default:
 		ERROR("Unknown compression algorithm %d!\n", algo);
 		return NULL;
@@ -75,6 +113,9 @@ decomp_func_ptr decompression_function(enum comp_algo algo)
 	case CBFS_COMPRESS_LZMA:
 		decompress = lzma_decompress;
 		break;
+	case CBFS_COMPRESS_LZ4:
+		decompress = lz4_decompress;
+		break;
 	default:
 		ERROR("Unknown compression algorithm %d!\n", algo);
 		return NULL;
diff --git a/util/cbfstool/lz4/Makefile b/util/cbfstool/lz4/Makefile
new file mode 100644
index 0000000000..931da35d95
--- /dev/null
+++ b/util/cbfstool/lz4/Makefile
@@ -0,0 +1,119 @@
+# ################################################################
+# LZ4 - Makefile
+# Copyright (C) Yann Collet 2011-2015
+# All rights reserved.
+#
+# BSD license
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice, this
+#   list of conditions and the following disclaimer in the documentation and/or
+#   other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# You can contact the author at :
+#  - LZ4 source repository : https://github.com/Cyan4973/lz4
+#  - LZ4 forum froup : https://groups.google.com/forum/#!forum/lz4c
+# ################################################################
+
+# Version number
+export VERSION=132
+export RELEASE=r$(VERSION)
+
+DESTDIR?=
+PREFIX ?= /usr/local
+
+LIBDIR ?= $(PREFIX)/lib
+INCLUDEDIR=$(PREFIX)/include
+PRGDIR  = programs
+LZ4DIR  = lib
+
+
+# Define nul output
+ifneq (,$(filter Windows%,$(OS)))
+VOID = nul
+else
+VOID = /dev/null
+endif
+
+
+.PHONY: default all lib lz4programs clean test versionsTest examples
+
+default: lz4programs
+
+all: lib lz4programs
+
+lib:
+	@$(MAKE) -C $(LZ4DIR) all
+
+lz4programs:
+	@$(MAKE) -C $(PRGDIR)
+
+clean:
+	@$(MAKE) -C $(PRGDIR) $@ > $(VOID)
+	@$(MAKE) -C $(LZ4DIR) $@ > $(VOID)
+	@$(MAKE) -C examples $@ > $(VOID)
+	@$(MAKE) -C versionsTest $@ > $(VOID)
+	@echo Cleaning completed
+
+
+#------------------------------------------------------------------------
+#make install is validated only for Linux, OSX, kFreeBSD and Hurd targets
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU))
+
+install:
+	@$(MAKE) -C $(LZ4DIR) $@
+	@$(MAKE) -C $(PRGDIR) $@
+
+uninstall:
+	@$(MAKE) -C $(LZ4DIR) $@
+	@$(MAKE) -C $(PRGDIR) $@
+
+travis-install:
+	sudo $(MAKE) install
+
+test:
+	$(MAKE) -C $(PRGDIR) test
+
+cmake:
+	@cd cmake_unofficial; cmake CMakeLists.txt; $(MAKE)
+
+gpptest: clean
+	$(MAKE) all CC=g++ CFLAGS="-O3 -I../lib -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
+
+clangtest: clean
+	CFLAGS="-O3 -Werror -Wconversion -Wno-sign-conversion" $(MAKE) all CC=clang
+
+sanitize: clean
+	CFLAGS="-O3 -g -fsanitize=undefined" $(MAKE) test CC=clang FUZZER_TIME="-T1mn" NB_LOOPS=-i1
+
+staticAnalyze: clean
+	CFLAGS=-g scan-build --status-bugs -v $(MAKE) all
+
+armtest: clean
+	CFLAGS="-O3 -Werror" $(MAKE) -C $(LZ4DIR) all CC=arm-linux-gnueabi-gcc
+	CFLAGS="-O3 -Werror" $(MAKE) -C $(PRGDIR) bins CC=arm-linux-gnueabi-gcc
+
+versionsTest: clean
+	$(MAKE) -C versionsTest
+
+examples:
+	$(MAKE) -C $(LZ4DIR)
+	$(MAKE) -C $(PRGDIR) lz4
+	$(MAKE) -C examples test
+
+endif
diff --git a/util/cbfstool/lz4/NEWS b/util/cbfstool/lz4/NEWS
new file mode 100644
index 0000000000..a1baead891
--- /dev/null
+++ b/util/cbfstool/lz4/NEWS
@@ -0,0 +1,185 @@
+r132
+Improved: Small decompression speed boost (+4%)
+Improved: Performance on ARMv6 and ARMv7
+Added : Debianization, by Evgeniy Polyakov
+Makefile: Safely protects lz4 version when selecting custom CFLAGS
+Makefile: Generates intermediate object files (*.o) for faster compilation on low power systems
+
+r131
+New    : Dos/DJGPP target, thanks to Louis Santillan (#114)
+Added  : Example using lz4frame library, by Zbigniew Jędrzejewski-Szmek (#118)
+Changed: xxhash symbols are modified (namespace emulation) within liblz4
+
+r130:
+Fixed  : incompatibility sparse mode vs console, reported by Yongwoon Cho (#105)
+Fixed  : LZ4IO exits too early when frame crc not present, reported by Yongwoon Cho (#106)
+Fixed  : incompatibility sparse mode vs append mode, reported by Takayuki Matsuoka (#110)
+Performance fix : big compression speed boost for clang (+30%)
+New    : cross-version test, by Takayuki Matsuoka
+
+r129:
+Added  : LZ4_compress_fast(), LZ4_compress_fast_continue()
+Added  : LZ4_compress_destSize()
+Changed: New lz4 and lz4hc compression API. Previous function prototypes still supported.
+Changed: Sparse file support enabled by default
+New    : LZ4 CLI improved performance compressing/decompressing multiple files (#86, kind contribution from Kyle J. Harper & Takayuki Matsuoka)
+Fixed  : GCC 4.9+ optimization bug - Reported by Markus Trippelsdorf, Greg Slazinski & Evan Nemerson
+Changed: Enums converted to LZ4F_ namespace convention - by Takayuki Matsuoka
+Added  : AppVeyor CI environment, for Visual tests - Suggested by Takayuki Matsuoka
+Modified:Obsolete functions generate warnings - Suggested by Evan Nemerson, contributed by Takayuki Matsuoka
+Fixed  : Bug #75 (unfinished stream), reported by Yongwoon Cho
+Updated: Documentation converted to MarkDown format
+
+r128:
+New    : lz4cli sparse file support (Requested by Neil Wilson, and contributed by Takayuki Matsuoka)
+New    : command -m, to compress multiple files in a single command (suggested by Kyle J. Harper)
+Fixed  : Restored lz4hc compression ratio (slightly lower since r124)
+New    : lz4 cli supports long commands (suggested by Takayuki Matsuoka)
+New    : lz4frame & lz4cli frame content size support
+New    : lz4frame supports skippable frames, as requested by Sergey Cherepanov
+Changed: Default "make install" directory is /usr/local, as notified by Ron Johnson
+New    : lz4 cli supports "pass-through" mode, requested by Neil Wilson
+New    : datagen can generate sparse files
+New    : scan-build tests, thanks to kind help by Takayuki Matsuoka
+New    : g++ compatibility tests
+New    : arm cross-compilation test, thanks to kind help by Takayuki Matsuoka
+Fixed  : Fuzzer + frametest compatibility with NetBSD (issue #48, reported by Thomas Klausner)
+Added  : Visual project directory
+Updated: Man page & Specification
+
+r127:
+N/A   : added a file on SVN
+
+r126:
+New   : lz4frame API is now integrated into liblz4
+Fixed : GCC 4.9 bug on highest performance settings, reported by Greg Slazinski
+Fixed : bug within LZ4 HC streaming mode, reported by James Boyle
+Fixed : older compiler don't like nameless unions, reported by Cheyi Lin
+Changed : lz4 is C90 compatible
+Changed : added -pedantic option, fixed a few mminor warnings
+
+r125:
+Changed : endian and alignment code
+Changed : directory structure : new "lib" directory
+Updated : lz4io, now uses lz4frame
+Improved: slightly improved decoding speed
+Fixed : LZ4_compress_limitedOutput(); Special thanks to Christopher Speller !
+Fixed : some alignment warnings under clang
+Fixed : deprecated function LZ4_slideInputBufferHC()
+
+r124:
+New : LZ4 HC streaming mode
+Fixed : LZ4F_compressBound() using null preferencesPtr
+Updated : xxHash to r38
+Updated library number, to 1.4.0
+
+r123:
+Added : experimental lz4frame API, thanks to Takayuki Matsuoka and Christopher Jackson for testings
+Fix : s390x support, thanks to Nobuhiro Iwamatsu
+Fix : test mode (-t) no longer requires confirmation, thanks to Thary Nguyen
+
+r122:
+Fix : AIX & AIX64 support (SamG)
+Fix : mips 64-bits support (lew van)
+Added : Examples directory, using code examples from Takayuki Matsuoka
+Updated : Framing specification, to v1.4.1
+Updated : xxHash, to r36
+
+r121:
+Added : Makefile : install for kFreeBSD and Hurd (Nobuhiro Iwamatsu)
+Fix : Makefile : install for OS-X and BSD, thanks to Takayuki Matsuoka
+
+r120:
+Modified : Streaming API, using strong types
+Added : LZ4_versionNumber(), thanks to Takayuki Matsuoka
+Fix : OS-X : library install name, thanks to Clemens Lang
+Updated : Makefile : synchronize library version number with lz4.h, thanks to Takayuki Matsuoka
+Updated : Makefile : stricter compilation flags
+Added : pkg-config, thanks to Zbigniew Jędrzejewski-Szmek (issue 135)
+Makefile : lz4-test only test native binaries, as suggested by Michał Górny (issue 136)
+Updated : xxHash to r35
+
+r119:
+Fix : Issue 134 : extended malicious address space overflow in 32-bits mode for some specific configurations
+
+r118:
+New : LZ4 Streaming API (Fast version), special thanks to Takayuki Matsuoka
+New : datagen : parametrable synthetic data generator for tests
+Improved : fuzzer, support more test cases, more parameters, ability to jump to specific test
+fix : support ppc64le platform (issue 131)
+fix : Issue 52 (malicious address space overflow in 32-bits mode when using large custom format)
+fix : Makefile : minor issue 130 : header files permissions
+
+r117:
+Added : man pages for lz4c and lz4cat
+Added : automated tests on Travis, thanks to Takayuki Matsuoka !
+fix : block-dependency command line (issue 127)
+fix : lz4fullbench (issue 128)
+
+r116:
+hotfix (issue 124 & 125)
+
+r115:
+Added : lz4cat utility, installed on POSX systems (issue 118)
+OS-X compatible compilation of dynamic library (issue 115)
+
+r114:
+Makefile : library correctly compiled with -O3 switch (issue 114)
+Makefile : library compilation compatible with clang
+Makefile : library is versioned and linked (issue 119)
+lz4.h : no more static inline prototypes (issue 116)
+man : improved header/footer (issue 111)
+Makefile : Use system default $(CC) & $(MAKE) variables (issue 112)
+xxhash : updated to r34
+
+r113:
+Large decompression speed improvement for GCC 32-bits. Thanks to Valery Croizier !
+LZ4HC : Compression Level is now a programmable parameter (CLI from 4 to 9)
+Separated IO routines from command line (lz4io.c)
+Version number into lz4.h (suggested by Francesc Alted)
+
+r112:
+quickfix
+
+r111 :
+Makefile : added capability to install libraries
+Modified Directory tree, to better separate libraries from programs.
+
+r110 :
+lz4 & lz4hc : added capability to allocate state & stream state with custom allocator (issue 99)
+fuzzer & fullbench : updated to test new functions
+man : documented -l command (Legacy format, for Linux kernel compression) (issue 102)
+cmake : improved version by Mika Attila, building programs and libraries (issue 100)
+xxHash : updated to r33
+Makefile : clean also delete local package .tar.gz
+
+r109 :
+lz4.c : corrected issue 98 (LZ4_compress_limitedOutput())
+Makefile : can specify version number from makefile
+
+r108 :
+lz4.c : corrected compression efficiency issue 97 in 64-bits chained mode (-BD) for streams > 4 GB (thanks Roman Strashkin for reporting)
+
+r107 :
+Makefile : support DESTDIR for staged installs. Thanks Jorge Aparicio.
+Makefile : make install installs both lz4 and lz4c (Jorge Aparicio)
+Makefile : removed -Wno-implicit-declaration compilation switch
+lz4cli.c : include <stduni.h> for isatty() (Luca Barbato)
+lz4.h : introduced LZ4_MAX_INPUT_SIZE constant (Shay Green)
+lz4.h : LZ4_compressBound() : unified macro and inline definitions (Shay Green)
+lz4.h : LZ4_decompressSafe_partial() : clarify comments (Shay Green)
+lz4.c : LZ4_compress() verify input size condition (Shay Green)
+bench.c : corrected a bug in free memory size evaluation
+cmake : install into bin/ directory (Richard Yao)
+cmake : check for just C compiler (Elan Ruusamae)
+
+r106 :
+Makefile : make dist modify text files in the package to respect Unix EoL convention
+lz4cli.c : corrected small display bug in HC mode
+
+r105 :
+Makefile : New install script and man page, contributed by Prasad Pandit
+lz4cli.c : Minor modifications, for easier extensibility
+COPYING  : added license file
+LZ4_Streaming_Format.odt : modified file name to remove white space characters
+Makefile : .exe suffix now properly added only for Windows target
diff --git a/util/cbfstool/lz4/README.md b/util/cbfstool/lz4/README.md
new file mode 100644
index 0000000000..d992f2819d
--- /dev/null
+++ b/util/cbfstool/lz4/README.md
@@ -0,0 +1,90 @@
+LZ4 - Extremely fast compression
+================================
+
+LZ4 is lossless compression algorithm,
+providing compression speed at 400 MB/s per core,
+scalable with multi-cores CPU.
+It features an extremely fast decoder,
+with speed in multiple GB/s per core,
+typically reaching RAM speed limits on multi-core systems.
+
+Speed can be tuned dynamically, selecting an "acceleration" factor
+which trades compression ratio for more speed up.
+On the other end, a high compression derivative, LZ4_HC, is also provided,
+trading CPU time for improved compression ratio.
+All versions feature the same decompression speed.
+
+LZ4 library is provided as open-source software using BSD license.s
+
+
+|Branch      |Status   |
+|------------|---------|
+|master      | [![Build Status][travisMasterBadge]][travisLink] [![Build status][AppveyorMasterBadge]][AppveyorLink] [![coverity][coverBadge]][coverlink] |
+|dev         | [![Build Status][travisDevBadge]][travisLink]    [![Build status][AppveyorDevBadge]][AppveyorLink]                                         |
+
+[travisMasterBadge]: https://travis-ci.org/Cyan4973/lz4.svg?branch=master "Continuous Integration test suite"
+[travisDevBadge]: https://travis-ci.org/Cyan4973/lz4.svg?branch=dev "Continuous Integration test suite"
+[travisLink]: https://ci.appveyor.com/project/YannCollet/lz4
+[AppveyorMasterBadge]: https://ci.appveyor.com/api/projects/status/v6kxv9si529477cq/branch/master?svg=true "Visual test suite"
+[AppveyorDevBadge]: https://ci.appveyor.com/api/projects/status/v6kxv9si529477cq/branch/dev?svg=true "Visual test suite"
+[AppveyorLink]: https://ci.appveyor.com/project/YannCollet/lz4
+[coverBadge]: https://scan.coverity.com/projects/4735/badge.svg "Static code analysis of Master branch"
+[coverlink]: https://scan.coverity.com/projects/4735
+
+> **Branch Policy:**
+
+> - The "master" branch is considered stable, at all times.
+> - The "dev" branch is the one where all contributions must be merged
+    before being promoted to master.
+>   + If you plan to propose a patch, please commit into the "dev" branch,
+      or its own feature branch.
+      Direct commit to "master" are not permitted.
+
+Benchmarks
+-------------------------
+
+The benchmark uses the [Open-Source Benchmark program by m^2 (v0.14.3)]
+compiled with GCC v4.8.2 on Linux Mint 64-bits v17.
+The reference system uses a Core i5-4300U @1.9GHz.
+Benchmark evaluates the compression of reference [Silesia Corpus]
+in single-thread mode.
+
+|  Compressor          | Ratio   | Compression | Decompression |
+|  ----------          | -----   | ----------- | ------------- |
+|  memcpy              |  1.000  | 4200 MB/s   |   4200 MB/s   |
+|**LZ4 fast 17 (r129)**|  1.607  |**690 MB/s** | **2220 MB/s** |
+|**LZ4 default (r129)**|**2.101**|**385 MB/s** | **1850 MB/s** |
+|  LZO 2.06            |  2.108  |  350 MB/s   |    510 MB/s   |
+|  QuickLZ 1.5.1.b6    |  2.238  |  320 MB/s   |    380 MB/s   |
+|  Snappy 1.1.0        |  2.091  |  250 MB/s   |    960 MB/s   |
+|  LZF v3.6            |  2.073  |  175 MB/s   |    500 MB/s   |
+|  zlib 1.2.8 -1       |  2.730  |   59 MB/s   |    250 MB/s   |
+|**LZ4 HC (r129)**     |**2.720**|   22 MB/s   | **1830 MB/s** |
+|  zlib 1.2.8 -6       |  3.099  |   18 MB/s   |    270 MB/s   |
+
+
+Documentation
+-------------------------
+
+The raw LZ4 block compression format is detailed within [lz4_Block_format].
+
+To compress an arbitrarily long file or data stream, multiple blocks are required.
+Organizing these blocks and providing a common header format to handle their content
+is the purpose of the Frame format, defined into [lz4_Frame_format].
+Interoperable versions of LZ4 must respect this frame format.
+
+
+Other source versions
+-------------------------
+
+Beyond the C reference source,
+many contributors have created versions of lz4 in multiple languages
+(Java, C#, Python, Perl, Ruby, etc.).
+A list of known source ports is maintained on the [LZ4 Homepage].
+
+
+[Open-Source Benchmark program by m^2 (v0.14.3)]: http://encode.ru/threads/1371-Filesystem-benchmark?p=34029&viewfull=1#post34029
+[Silesia Corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia
+[lz4_Block_format]: lz4_Block_format.md
+[lz4_Frame_format]: lz4_Frame_format.md
+[LZ4 Homepage]: http://www.lz4.org
diff --git a/util/cbfstool/lz4/lib/LICENSE b/util/cbfstool/lz4/lib/LICENSE
new file mode 100644
index 0000000000..bd3c80db7c
--- /dev/null
+++ b/util/cbfstool/lz4/lib/LICENSE
@@ -0,0 +1,24 @@
+LZ4 Library
+Copyright (c) 2011-2015, Yann Collet
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/util/cbfstool/lz4/lib/Makefile b/util/cbfstool/lz4/lib/Makefile
new file mode 100644
index 0000000000..12a741d147
--- /dev/null
+++ b/util/cbfstool/lz4/lib/Makefile
@@ -0,0 +1,118 @@
+# ################################################################
+# LZ4 library - Makefile
+# Copyright (C) Yann Collet 2011-2015
+# All rights reserved.
+#
+# BSD license
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice, this
+#   list of conditions and the following disclaimer in the documentation and/or
+#   other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# You can contact the author at :
+#  - LZ4 source repository : https://github.com/Cyan4973/lz4
+#  - LZ4 forum froup : https://groups.google.com/forum/#!forum/lz4c
+# ################################################################
+
+# Version numbers
+VERSION?= 132
+LIBVER_MAJOR:=`sed -n '/define LZ4_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < lz4.h`
+LIBVER_MINOR:=`sed -n '/define LZ4_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < lz4.h`
+LIBVER_PATCH:=`sed -n '/define LZ4_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < lz4.h`
+LIBVER := $(LIBVER_MAJOR).$(LIBVER_MINOR).$(LIBVER_PATCH)
+
+DESTDIR?=
+PREFIX ?= /usr/local
+CPPFLAGS= -DXXH_NAMESPACE=LZ4_
+CFLAGS ?= -O3
+CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wcast-qual -Wstrict-prototypes -pedantic
+FLAGS   = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
+
+LIBDIR?= $(PREFIX)/lib
+INCLUDEDIR=$(PREFIX)/include
+
+
+# OS X linker doesn't support -soname, and use different extension
+# see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html
+ifeq ($(shell uname), Darwin)
+	SHARED_EXT = dylib
+	SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT)
+	SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT)
+	SONAME_FLAGS = -install_name $(PREFIX)/lib/liblz4.$(SHARED_EXT_MAJOR) -compatibility_version $(LIBVER_MAJOR) -current_version $(LIBVER)
+else
+	SONAME_FLAGS = -Wl,-soname=liblz4.$(SHARED_EXT).$(LIBVER_MAJOR)
+	SHARED_EXT = so
+	SHARED_EXT_MAJOR = $(SHARED_EXT).$(LIBVER_MAJOR)
+	SHARED_EXT_VER = $(SHARED_EXT).$(LIBVER)
+endif
+
+default: liblz4
+
+all: liblz4
+
+liblz4: lz4.c lz4hc.c lz4frame.c xxhash.c   # need to compile once with -fPIC, and once without -fPIC
+	@echo compiling static library
+	@$(AR) rcs $@.a $^
+	@echo compiling dynamic library $(LIBVER)
+	@$(CC) $(FLAGS) -shared $^ -fPIC $(SONAME_FLAGS) -o $@.$(SHARED_EXT_VER)
+	@echo creating versioned links
+	@ln -sf $@.$(SHARED_EXT_VER) $@.$(SHARED_EXT_MAJOR)
+	@ln -sf $@.$(SHARED_EXT_VER) $@.$(SHARED_EXT)
+
+clean:
+	@rm -f core *.o *.a *.$(SHARED_EXT) *.$(SHARED_EXT).* liblz4.pc
+	@echo Cleaning library completed
+
+
+#------------------------------------------------------------------------
+#make install is validated only for Linux, OSX, kFreeBSD and Hurd targets
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU))
+
+liblz4.pc: liblz4.pc.in Makefile
+	@echo creating pkgconfig
+	@sed -e 's|@PREFIX@|$(PREFIX)|' \
+            -e 's|@LIBDIR@|$(LIBDIR)|' \
+            -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \
+            -e 's|@VERSION@|$(VERSION)|' \
+             $< >$@
+
+install: liblz4 liblz4.pc
+	@install -d -m 755 $(DESTDIR)$(LIBDIR)/pkgconfig/ $(DESTDIR)$(INCLUDEDIR)/
+	@install -m 755 liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(LIBDIR)
+	@ln -sf liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT_MAJOR)
+	@ln -sf liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT)
+	@install -m 644 liblz4.pc $(DESTDIR)$(LIBDIR)/pkgconfig/
+	@install -m 644 liblz4.a $(DESTDIR)$(LIBDIR)/liblz4.a
+	@install -m 644 lz4.h $(DESTDIR)$(INCLUDEDIR)/lz4.h
+	@install -m 644 lz4hc.h $(DESTDIR)$(INCLUDEDIR)/lz4hc.h
+	@install -m 644 lz4frame.h $(DESTDIR)$(INCLUDEDIR)/lz4frame.h
+	@echo lz4 static and shared libraries installed
+
+uninstall:
+	@rm -f $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT)
+	@rm -f $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT_MAJOR)
+	@rm -f $(DESTDIR)$(LIBDIR)/pkgconfig/liblz4.pc
+	@[ -x $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT_VER) ] && rm -f $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT_VER)
+	@[ -f $(DESTDIR)$(LIBDIR)/liblz4.a ] && rm -f $(DESTDIR)$(LIBDIR)/liblz4.a
+	@[ -f $(DESTDIR)$(INCLUDEDIR)/lz4.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/lz4.h
+	@[ -f $(DESTDIR)$(INCLUDEDIR)/lz4hc.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/lz4hc.h
+	@[ -f $(DESTDIR)$(INCLUDEDIR)/lz4frame.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/lz4frame.h
+	@echo lz4 libraries successfully uninstalled
+
+endif
diff --git a/util/cbfstool/lz4/lib/README.md b/util/cbfstool/lz4/lib/README.md
new file mode 100644
index 0000000000..f6ebf5e10b
--- /dev/null
+++ b/util/cbfstool/lz4/lib/README.md
@@ -0,0 +1,21 @@
+LZ4 - Library Files
+================================
+
+The __lib__ directory contains several files, but you don't necessarily need them all.
+
+To integrate fast LZ4 compression/decompression into your program, you basically just need "**lz4.c**" and "**lz4.h**".
+
+For more compression at the cost of compression speed (while preserving decompression speed), use **lz4hc** on top of regular lz4. `lz4hc` only provides compression functions. It also needs `lz4` to compile properly.
+
+If you want to produce files or data streams compatible with `lz4` command line utility, use **lz4frame**. This library encapsulates lz4-compressed blocks into the [official interoperable frame format]. In order to work properly, lz4frame needs lz4 and lz4hc, and also **xxhash**, which provides error detection algorithm.
+(_Advanced stuff_ : It's possible to hide xxhash symbols into a local namespace. This is what `liblz4` does, to avoid symbol duplication in case a user program would link to several libraries containing xxhash symbols.)
+
+A more complex "lz4frame_static.h" is also provided, although its usage is not recommended. It contains definitions which are not guaranteed to remain stable within future versions. Use for static linking ***only***.
+
+The other files are not source code. There are :
+
+ - LICENSE : contains the BSD license text
+ - Makefile : script to compile or install lz4 library (static or dynamic)
+ - liblz4.pc.in : for pkg-config (make install)
+
+[official interoperable frame format]: ../lz4_Frame_format.md
diff --git a/util/cbfstool/lz4/lib/liblz4.pc.in b/util/cbfstool/lz4/lib/liblz4.pc.in
new file mode 100644
index 0000000000..d9393c7d4c
--- /dev/null
+++ b/util/cbfstool/lz4/lib/liblz4.pc.in
@@ -0,0 +1,14 @@
+#   LZ4 - Fast LZ compression algorithm
+#   Copyright (C) 2011-2014, Yann Collet.
+#   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+prefix=@PREFIX@
+libdir=@LIBDIR@
+includedir=@INCLUDEDIR@
+
+Name: lz4
+Description: fast lossless compression algorithm library
+URL: http://lz4.org/
+Version: @VERSION@
+Libs: -L@LIBDIR@ -llz4
+Cflags: -I@INCLUDEDIR@
diff --git a/util/cbfstool/lz4/lib/lz4.c b/util/cbfstool/lz4/lib/lz4.c
new file mode 100644
index 0000000000..8512452b3d
--- /dev/null
+++ b/util/cbfstool/lz4/lib/lz4.c
@@ -0,0 +1,1567 @@
+/*
+   LZ4 - Fast LZ compression algorithm
+   Copyright (C) 2011-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : https://github.com/Cyan4973/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+
+/**************************************
+*  Tuning parameters
+**************************************/
+/*
+ * HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
+ */
+#define HEAPMODE 0
+
+/*
+ * ACCELERATION_DEFAULT :
+ * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
+ */
+#define ACCELERATION_DEFAULT 1
+
+
+/**************************************
+*  CPU Feature Detection
+**************************************/
+/* LZ4_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets which generate assembly depending on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef LZ4_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define LZ4_FORCE_MEMORY_ACCESS 2
+#  elif defined(__INTEL_COMPILER) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define LZ4_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+/*
+ * LZ4_FORCE_SW_BITCOUNT
+ * Define this parameter if your target system or compiler does not support hardware bit count
+ */
+#if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for Windows CE does not support Hardware bit count */
+#  define LZ4_FORCE_SW_BITCOUNT
+#endif
+
+
+/**************************************
+*  Includes
+**************************************/
+#include "lz4.h"
+
+
+/**************************************
+*  Compiler Options
+**************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4293)        /* disable: C4293: too large shift (32-bits) */
+#else
+#  if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
+#    if defined(__GNUC__) || defined(__clang__)
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif   /* __STDC_VERSION__ */
+#endif  /* _MSC_VER */
+
+/* LZ4_GCC_VERSION is defined into lz4.h */
+#if (LZ4_GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
+#  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
+#else
+#  define expect(expr,value)    (expr)
+#endif
+
+#define likely(expr)     expect((expr) != 0, 1)
+#define unlikely(expr)   expect((expr) != 0, 0)
+
+
+/**************************************
+*  Memory routines
+**************************************/
+#include <stdlib.h>   /* malloc, calloc, free */
+#define ALLOCATOR(n,s) calloc(n,s)
+#define FREEMEM        free
+#include <string.h>   /* memset, memcpy */
+#define MEM_INIT       memset
+
+
+/**************************************
+*  Basic Types
+**************************************/
+#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+#endif
+
+
+/**************************************
+*  Reading and writing into memory
+**************************************/
+#define STEPSIZE sizeof(size_t)
+
+static unsigned LZ4_64bits(void) { return sizeof(void*)==8; }
+
+static unsigned LZ4_isLittleEndian(void)
+{
+    const union { U32 i; BYTE c[4]; } one = { 1 };   // don't use static : performance detrimental
+    return one.c[0];
+}
+
+
+#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
+
+static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
+static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
+static size_t LZ4_read_ARCH(const void* memPtr) { return *(const size_t*) memPtr; }
+
+static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+
+#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; size_t uArch; } __attribute__((packed)) unalign;
+
+static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+static U32 LZ4_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+static size_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArch; }
+
+static void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+
+#else
+
+static U16 LZ4_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static U32 LZ4_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static size_t LZ4_read_ARCH(const void* memPtr)
+{
+    size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static void LZ4_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif // LZ4_FORCE_MEMORY_ACCESS
+
+
+static U16 LZ4_readLE16(const void* memPtr)
+{
+    if (LZ4_isLittleEndian())
+    {
+        return LZ4_read16(memPtr);
+    }
+    else
+    {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)((U16)p[0] + (p[1]<<8));
+    }
+}
+
+static void LZ4_writeLE16(void* memPtr, U16 value)
+{
+    if (LZ4_isLittleEndian())
+    {
+        LZ4_write16(memPtr, value);
+    }
+    else
+    {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE) value;
+        p[1] = (BYTE)(value>>8);
+    }
+}
+
+static void LZ4_copy8(void* dst, const void* src)
+{
+    memcpy(dst,src,8);
+}
+
+/* customized variant of memcpy, which can overwrite up to 7 bytes beyond dstEnd */
+static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+    BYTE* d = (BYTE*)dstPtr;
+    const BYTE* s = (const BYTE*)srcPtr;
+    BYTE* const e = (BYTE*)dstEnd;
+
+#if 0
+    const size_t l2 = 8 - (((size_t)d) & (sizeof(void*)-1));
+    LZ4_copy8(d,s); if (d>e-9) return;
+    d+=l2; s+=l2;
+#endif /* join to align */
+
+    do { LZ4_copy8(d,s); d+=8; s+=8; } while (d<e);
+}
+
+
+/**************************************
+*  Common Constants
+**************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+
+#define ML_BITS  4
+#define ML_MASK  ((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+
+/**************************************
+*  Common Utils
+**************************************/
+#define LZ4_STATIC_ASSERT(c)    { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/**************************************
+*  Common functions
+**************************************/
+static unsigned LZ4_NbCommonBytes (register size_t val)
+{
+    if (LZ4_isLittleEndian())
+    {
+        if (LZ4_64bits())
+        {
+#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanForward64( &r, (U64)val );
+            return (int)(r>>3);
+#       elif (defined(__clang__) || (LZ4_GCC_VERSION >= 304)) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_ctzll((U64)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#       endif
+        }
+        else /* 32 bits */
+        {
+#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r;
+            _BitScanForward( &r, (U32)val );
+            return (int)(r>>3);
+#       elif (defined(__clang__) || (LZ4_GCC_VERSION >= 304)) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_ctz((U32)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#       endif
+        }
+    }
+    else   /* Big Endian CPU */
+    {
+        if (LZ4_64bits())
+        {
+#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanReverse64( &r, val );
+            return (unsigned)(r>>3);
+#       elif (defined(__clang__) || (LZ4_GCC_VERSION >= 304)) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_clzll((U64)val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
+            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        }
+        else /* 32 bits */
+        {
+#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanReverse( &r, (unsigned long)val );
+            return (unsigned)(r>>3);
+#       elif (defined(__clang__) || (LZ4_GCC_VERSION >= 304)) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_clz((U32)val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        }
+    }
+}
+
+static unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+{
+    const BYTE* const pStart = pIn;
+
+    while (likely(pIn<pInLimit-(STEPSIZE-1)))
+    {
+        size_t diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+        if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
+        pIn += LZ4_NbCommonBytes(diff);
+        return (unsigned)(pIn - pStart);
+    }
+
+    if (LZ4_64bits()) if ((pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
+    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+    return (unsigned)(pIn - pStart);
+}
+
+
+#ifndef LZ4_COMMONDEFS_ONLY
+/**************************************
+*  Local Constants
+**************************************/
+#define LZ4_HASHLOG   (LZ4_MEMORY_USAGE-2)
+#define HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define HASH_SIZE_U32 (1 << LZ4_HASHLOG)       /* required as macro for static allocation */
+
+static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
+static const U32 LZ4_skipTrigger = 6;  /* Increase this value ==> compression run slower on incompressible data */
+
+
+/**************************************
+*  Local Structures and types
+**************************************/
+typedef struct {
+    U32 hashTable[HASH_SIZE_U32];
+    U32 currentOffset;
+    U32 initCheck;
+    const BYTE* dictionary;
+    BYTE* bufferStart;   /* obsolete, used for slideInputBuffer */
+    U32 dictSize;
+} LZ4_stream_t_internal;
+
+typedef enum { notLimited = 0, limitedOutput = 1 } limitedOutput_directive;
+typedef enum { byPtr, byU32, byU16 } tableType_t;
+
+typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
+
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { full = 0, partial = 1 } earlyEnd_directive;
+
+
+/**************************************
+*  Local Utils
+**************************************/
+int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
+int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
+int LZ4_sizeofState() { return LZ4_STREAMSIZE; }
+
+
+
+/********************************
+*  Compression functions
+********************************/
+
+static U32 LZ4_hashSequence(U32 sequence, tableType_t const tableType)
+{
+    if (tableType == byU16)
+        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
+    else
+        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
+}
+
+static const U64 prime5bytes = 889523592379ULL;
+static U32 LZ4_hashSequence64(size_t sequence, tableType_t const tableType)
+{
+    const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
+    const U32 hashMask = (1<<hashLog) - 1;
+    return ((sequence * prime5bytes) >> (40 - hashLog)) & hashMask;
+}
+
+static U32 LZ4_hashSequenceT(size_t sequence, tableType_t const tableType)
+{
+    if (LZ4_64bits())
+        return LZ4_hashSequence64(sequence, tableType);
+    return LZ4_hashSequence((U32)sequence, tableType);
+}
+
+static U32 LZ4_hashPosition(const void* p, tableType_t tableType) { return LZ4_hashSequenceT(LZ4_read_ARCH(p), tableType); }
+
+static void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t const tableType, const BYTE* srcBase)
+{
+    switch (tableType)
+    {
+    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; }
+    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; }
+    }
+}
+
+static void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    U32 h = LZ4_hashPosition(p, tableType);
+    LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+static const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; }
+    if (tableType == byU32) { U32* hashTable = (U32*) tableBase; return hashTable[h] + srcBase; }
+    { U16* hashTable = (U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
+}
+
+static const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    U32 h = LZ4_hashPosition(p, tableType);
+    return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
+
+FORCE_INLINE int LZ4_compress_generic(
+                 void* const ctx,
+                 const char* const source,
+                 char* const dest,
+                 const int inputSize,
+                 const int maxOutputSize,
+                 const limitedOutput_directive outputLimited,
+                 const tableType_t tableType,
+                 const dict_directive dict,
+                 const dictIssue_directive dictIssue,
+                 const U32 acceleration)
+{
+    LZ4_stream_t_internal* const dictPtr = (LZ4_stream_t_internal*)ctx;
+
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* base;
+    const BYTE* lowLimit;
+    const BYTE* const lowRefLimit = ip - dictPtr->dictSize;
+    const BYTE* const dictionary = dictPtr->dictionary;
+    const BYTE* const dictEnd = dictionary + dictPtr->dictSize;
+    const size_t dictDelta = dictEnd - (const BYTE*)source;
+    const BYTE* anchor = (const BYTE*) source;
+    const BYTE* const iend = ip + inputSize;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = iend - LASTLITERALS;
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const olimit = op + maxOutputSize;
+
+    U32 forwardH;
+    size_t refDelta=0;
+
+    /* Init conditions */
+    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;   /* Unsupported input size, too large (or negative) */
+    switch(dict)
+    {
+    case noDict:
+    default:
+        base = (const BYTE*)source;
+        lowLimit = (const BYTE*)source;
+        break;
+    case withPrefix64k:
+        base = (const BYTE*)source - dictPtr->currentOffset;
+        lowLimit = (const BYTE*)source - dictPtr->dictSize;
+        break;
+    case usingExtDict:
+        base = (const BYTE*)source - dictPtr->currentOffset;
+        lowLimit = (const BYTE*)source;
+        break;
+    }
+    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0;   /* Size too large (not within 64K limit) */
+    if (inputSize<LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
+
+    /* First Byte */
+    LZ4_putPosition(ip, ctx, tableType, base);
+    ip++; forwardH = LZ4_hashPosition(ip, tableType);
+
+    /* Main Loop */
+    for ( ; ; )
+    {
+        const BYTE* match;
+        BYTE* token;
+        {
+            const BYTE* forwardIp = ip;
+            unsigned step = 1;
+            unsigned searchMatchNb = acceleration << LZ4_skipTrigger;
+
+            /* Find a match */
+            do {
+                U32 h = forwardH;
+                ip = forwardIp;
+                forwardIp += step;
+                step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+                if (unlikely(forwardIp > mflimit)) goto _last_literals;
+
+                match = LZ4_getPositionOnHash(h, ctx, tableType, base);
+                if (dict==usingExtDict)
+                {
+                    if (match<(const BYTE*)source)
+                    {
+                        refDelta = dictDelta;
+                        lowLimit = dictionary;
+                    }
+                    else
+                    {
+                        refDelta = 0;
+                        lowLimit = (const BYTE*)source;
+                    }
+                }
+                forwardH = LZ4_hashPosition(forwardIp, tableType);
+                LZ4_putPositionOnHash(ip, h, ctx, tableType, base);
+
+            } while ( ((dictIssue==dictSmall) ? (match < lowRefLimit) : 0)
+                || ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip))
+                || (LZ4_read32(match+refDelta) != LZ4_read32(ip)) );
+        }
+
+        /* Catch up */
+        while ((ip>anchor) && (match+refDelta > lowLimit) && (unlikely(ip[-1]==match[refDelta-1]))) { ip--; match--; }
+
+        {
+            /* Encode Literal length */
+            unsigned litLength = (unsigned)(ip - anchor);
+            token = op++;
+            if ((outputLimited) && (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)))
+                return 0;   /* Check output limit */
+            if (litLength>=RUN_MASK)
+            {
+                int len = (int)litLength-RUN_MASK;
+                *token=(RUN_MASK<<ML_BITS);
+                for(; len >= 255 ; len-=255) *op++ = 255;
+                *op++ = (BYTE)len;
+            }
+            else *token = (BYTE)(litLength<<ML_BITS);
+
+            /* Copy Literals */
+            LZ4_wildCopy(op, anchor, op+litLength);
+            op+=litLength;
+        }
+
+_next_match:
+        /* Encode Offset */
+        LZ4_writeLE16(op, (U16)(ip-match)); op+=2;
+
+        /* Encode MatchLength */
+        {
+            unsigned matchLength;
+
+            if ((dict==usingExtDict) && (lowLimit==dictionary))
+            {
+                const BYTE* limit;
+                match += refDelta;
+                limit = ip + (dictEnd-match);
+                if (limit > matchlimit) limit = matchlimit;
+                matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
+                ip += MINMATCH + matchLength;
+                if (ip==limit)
+                {
+                    unsigned more = LZ4_count(ip, (const BYTE*)source, matchlimit);
+                    matchLength += more;
+                    ip += more;
+                }
+            }
+            else
+            {
+                matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
+                ip += MINMATCH + matchLength;
+            }
+
+            if ((outputLimited) && (unlikely(op + (1 + LASTLITERALS) + (matchLength>>8) > olimit)))
+                return 0;    /* Check output limit */
+            if (matchLength>=ML_MASK)
+            {
+                *token += ML_MASK;
+                matchLength -= ML_MASK;
+                for (; matchLength >= 510 ; matchLength-=510) { *op++ = 255; *op++ = 255; }
+                if (matchLength >= 255) { matchLength-=255; *op++ = 255; }
+                *op++ = (BYTE)matchLength;
+            }
+            else *token += (BYTE)(matchLength);
+        }
+
+        anchor = ip;
+
+        /* Test end of chunk */
+        if (ip > mflimit) break;
+
+        /* Fill table */
+        LZ4_putPosition(ip-2, ctx, tableType, base);
+
+        /* Test next position */
+        match = LZ4_getPosition(ip, ctx, tableType, base);
+        if (dict==usingExtDict)
+        {
+            if (match<(const BYTE*)source)
+            {
+                refDelta = dictDelta;
+                lowLimit = dictionary;
+            }
+            else
+            {
+                refDelta = 0;
+                lowLimit = (const BYTE*)source;
+            }
+        }
+        LZ4_putPosition(ip, ctx, tableType, base);
+        if ( ((dictIssue==dictSmall) ? (match>=lowRefLimit) : 1)
+            && (match+MAX_DISTANCE>=ip)
+            && (LZ4_read32(match+refDelta)==LZ4_read32(ip)) )
+        { token=op++; *token=0; goto _next_match; }
+
+        /* Prepare next loop */
+        forwardH = LZ4_hashPosition(++ip, tableType);
+    }
+
+_last_literals:
+    /* Encode Last Literals */
+    {
+        const size_t lastRun = (size_t)(iend - anchor);
+        if ((outputLimited) && ((op - (BYTE*)dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize))
+            return 0;   /* Check output limit */
+        if (lastRun >= RUN_MASK)
+        {
+            size_t accumulator = lastRun - RUN_MASK;
+            *op++ = RUN_MASK << ML_BITS;
+            for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
+            *op++ = (BYTE) accumulator;
+        }
+        else
+        {
+            *op++ = (BYTE)(lastRun<<ML_BITS);
+        }
+        memcpy(op, anchor, lastRun);
+        op += lastRun;
+    }
+
+    /* End */
+    return (int) (((char*)op)-dest);
+}
+
+
+int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+    LZ4_resetStream((LZ4_stream_t*)state);
+    if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
+
+    if (maxOutputSize >= LZ4_compressBound(inputSize))
+    {
+        if (inputSize < LZ4_64Klimit)
+            return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, byU16,                        noDict, noDictIssue, acceleration);
+        else
+            return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration);
+    }
+    else
+    {
+        if (inputSize < LZ4_64Klimit)
+            return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, byU16,                        noDict, noDictIssue, acceleration);
+        else
+            return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration);
+    }
+}
+
+
+int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+#if (HEAPMODE)
+    void* ctxPtr = ALLOCATOR(1, sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+#else
+    LZ4_stream_t ctx;
+    void* ctxPtr = &ctx;
+#endif
+
+    int result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);
+
+#if (HEAPMODE)
+    FREEMEM(ctxPtr);
+#endif
+    return result;
+}
+
+
+int LZ4_compress_default(const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    return LZ4_compress_fast(source, dest, inputSize, maxOutputSize, 1);
+}
+
+
+/* hidden debug function */
+/* strangely enough, gcc generates faster code when this function is uncommented, even if unused */
+int LZ4_compress_fast_force(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+    LZ4_stream_t ctx;
+
+    LZ4_resetStream(&ctx);
+
+    if (inputSize < LZ4_64Klimit)
+        return LZ4_compress_generic(&ctx, source, dest, inputSize, maxOutputSize, limitedOutput, byU16,                        noDict, noDictIssue, acceleration);
+    else
+        return LZ4_compress_generic(&ctx, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration);
+}
+
+
+/********************************
+*  destSize variant
+********************************/
+
+static int LZ4_compress_destSize_generic(
+                       void* const ctx,
+                 const char* const src,
+                       char* const dst,
+                       int*  const srcSizePtr,
+                 const int targetDstSize,
+                 const tableType_t tableType)
+{
+    const BYTE* ip = (const BYTE*) src;
+    const BYTE* base = (const BYTE*) src;
+    const BYTE* lowLimit = (const BYTE*) src;
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + *srcSizePtr;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = iend - LASTLITERALS;
+
+    BYTE* op = (BYTE*) dst;
+    BYTE* const oend = op + targetDstSize;
+    BYTE* const oMaxLit = op + targetDstSize - 2 /* offset */ - 8 /* because 8+MINMATCH==MFLIMIT */ - 1 /* token */;
+    BYTE* const oMaxMatch = op + targetDstSize - (LASTLITERALS + 1 /* token */);
+    BYTE* const oMaxSeq = oMaxLit - 1 /* token */;
+
+    U32 forwardH;
+
+
+    /* Init conditions */
+    if (targetDstSize < 1) return 0;                                     /* Impossible to store anything */
+    if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0;            /* Unsupported input size, too large (or negative) */
+    if ((tableType == byU16) && (*srcSizePtr>=LZ4_64Klimit)) return 0;   /* Size too large (not within 64K limit) */
+    if (*srcSizePtr<LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
+
+    /* First Byte */
+    *srcSizePtr = 0;
+    LZ4_putPosition(ip, ctx, tableType, base);
+    ip++; forwardH = LZ4_hashPosition(ip, tableType);
+
+    /* Main Loop */
+    for ( ; ; )
+    {
+        const BYTE* match;
+        BYTE* token;
+        {
+            const BYTE* forwardIp = ip;
+            unsigned step = 1;
+            unsigned searchMatchNb = 1 << LZ4_skipTrigger;
+
+            /* Find a match */
+            do {
+                U32 h = forwardH;
+                ip = forwardIp;
+                forwardIp += step;
+                step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+                if (unlikely(forwardIp > mflimit))
+                    goto _last_literals;
+
+                match = LZ4_getPositionOnHash(h, ctx, tableType, base);
+                forwardH = LZ4_hashPosition(forwardIp, tableType);
+                LZ4_putPositionOnHash(ip, h, ctx, tableType, base);
+
+            } while ( ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip))
+                || (LZ4_read32(match) != LZ4_read32(ip)) );
+        }
+
+        /* Catch up */
+        while ((ip>anchor) && (match > lowLimit) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
+
+        {
+            /* Encode Literal length */
+            unsigned litLength = (unsigned)(ip - anchor);
+            token = op++;
+            if (op + ((litLength+240)/255) + litLength > oMaxLit)
+            {
+                /* Not enough space for a last match */
+                op--;
+                goto _last_literals;
+            }
+            if (litLength>=RUN_MASK)
+            {
+                unsigned len = litLength - RUN_MASK;
+                *token=(RUN_MASK<<ML_BITS);
+                for(; len >= 255 ; len-=255) *op++ = 255;
+                *op++ = (BYTE)len;
+            }
+            else *token = (BYTE)(litLength<<ML_BITS);
+
+            /* Copy Literals */
+            LZ4_wildCopy(op, anchor, op+litLength);
+            op += litLength;
+        }
+
+_next_match:
+        /* Encode Offset */
+        LZ4_writeLE16(op, (U16)(ip-match)); op+=2;
+
+        /* Encode MatchLength */
+        {
+            size_t matchLength;
+
+            matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
+
+            if (op + ((matchLength+240)/255) > oMaxMatch)
+            {
+                /* Match description too long : reduce it */
+                matchLength = (15-1) + (oMaxMatch-op) * 255;
+            }
+            ip += MINMATCH + matchLength;
+
+            if (matchLength>=ML_MASK)
+            {
+                *token += ML_MASK;
+                matchLength -= ML_MASK;
+                while (matchLength >= 255) { matchLength-=255; *op++ = 255; }
+                *op++ = (BYTE)matchLength;
+            }
+            else *token += (BYTE)(matchLength);
+        }
+
+        anchor = ip;
+
+        /* Test end of block */
+        if (ip > mflimit) break;
+        if (op > oMaxSeq) break;
+
+        /* Fill table */
+        LZ4_putPosition(ip-2, ctx, tableType, base);
+
+        /* Test next position */
+        match = LZ4_getPosition(ip, ctx, tableType, base);
+        LZ4_putPosition(ip, ctx, tableType, base);
+        if ( (match+MAX_DISTANCE>=ip)
+            && (LZ4_read32(match)==LZ4_read32(ip)) )
+        { token=op++; *token=0; goto _next_match; }
+
+        /* Prepare next loop */
+        forwardH = LZ4_hashPosition(++ip, tableType);
+    }
+
+_last_literals:
+    /* Encode Last Literals */
+    {
+        size_t lastRunSize = (size_t)(iend - anchor);
+        if (op + 1 /* token */ + ((lastRunSize+240)/255) /* litLength */ + lastRunSize /* literals */ > oend)
+        {
+            /* adapt lastRunSize to fill 'dst' */
+            lastRunSize  = (oend-op) - 1;
+            lastRunSize -= (lastRunSize+240)/255;
+        }
+        ip = anchor + lastRunSize;
+
+        if (lastRunSize >= RUN_MASK)
+        {
+            size_t accumulator = lastRunSize - RUN_MASK;
+            *op++ = RUN_MASK << ML_BITS;
+            for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
+            *op++ = (BYTE) accumulator;
+        }
+        else
+        {
+            *op++ = (BYTE)(lastRunSize<<ML_BITS);
+        }
+        memcpy(op, anchor, lastRunSize);
+        op += lastRunSize;
+    }
+
+    /* End */
+    *srcSizePtr = (int) (((const char*)ip)-src);
+    return (int) (((char*)op)-dst);
+}
+
+
+static int LZ4_compress_destSize_extState (void* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
+{
+    LZ4_resetStream((LZ4_stream_t*)state);
+
+    if (targetDstSize >= LZ4_compressBound(*srcSizePtr))   /* compression success is guaranteed */
+    {
+        return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1);
+    }
+    else
+    {
+        if (*srcSizePtr < LZ4_64Klimit)
+            return LZ4_compress_destSize_generic(state, src, dst, srcSizePtr, targetDstSize, byU16);
+        else
+            return LZ4_compress_destSize_generic(state, src, dst, srcSizePtr, targetDstSize, LZ4_64bits() ? byU32 : byPtr);
+    }
+}
+
+
+int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
+{
+#if (HEAPMODE)
+    void* ctx = ALLOCATOR(1, sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+#else
+    LZ4_stream_t ctxBody;
+    void* ctx = &ctxBody;
+#endif
+
+    int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize);
+
+#if (HEAPMODE)
+    FREEMEM(ctx);
+#endif
+    return result;
+}
+
+
+
+/********************************
+*  Streaming functions
+********************************/
+
+LZ4_stream_t* LZ4_createStream(void)
+{
+    LZ4_stream_t* lz4s = (LZ4_stream_t*)ALLOCATOR(8, LZ4_STREAMSIZE_U64);
+    LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal));    /* A compilation error here means LZ4_STREAMSIZE is not large enough */
+    LZ4_resetStream(lz4s);
+    return lz4s;
+}
+
+void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
+{
+    MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t));
+}
+
+int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
+{
+    FREEMEM(LZ4_stream);
+    return (0);
+}
+
+
+#define HASH_UNIT sizeof(size_t)
+int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
+{
+    LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict;
+    const BYTE* p = (const BYTE*)dictionary;
+    const BYTE* const dictEnd = p + dictSize;
+    const BYTE* base;
+
+    if ((dict->initCheck) || (dict->currentOffset > 1 GB))  /* Uninitialized structure, or reuse overflow */
+        LZ4_resetStream(LZ4_dict);
+
+    if (dictSize < (int)HASH_UNIT)
+    {
+        dict->dictionary = NULL;
+        dict->dictSize = 0;
+        return 0;
+    }
+
+    if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
+    dict->currentOffset += 64 KB;
+    base = p - dict->currentOffset;
+    dict->dictionary = p;
+    dict->dictSize = (U32)(dictEnd - p);
+    dict->currentOffset += dict->dictSize;
+
+    while (p <= dictEnd-HASH_UNIT)
+    {
+        LZ4_putPosition(p, dict->hashTable, byU32, base);
+        p+=3;
+    }
+
+    return dict->dictSize;
+}
+
+
+static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, const BYTE* src)
+{
+    if ((LZ4_dict->currentOffset > 0x80000000) ||
+        ((size_t)LZ4_dict->currentOffset > (size_t)src))   /* address space overflow */
+    {
+        /* rescale hash table */
+        U32 delta = LZ4_dict->currentOffset - 64 KB;
+        const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+        int i;
+        for (i=0; i<HASH_SIZE_U32; i++)
+        {
+            if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
+            else LZ4_dict->hashTable[i] -= delta;
+        }
+        LZ4_dict->currentOffset = 64 KB;
+        if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
+        LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+    }
+}
+
+
+int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+    LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_stream;
+    const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+
+    const BYTE* smallest = (const BYTE*) source;
+    if (streamPtr->initCheck) return 0;   /* Uninitialized structure detected */
+    if ((streamPtr->dictSize>0) && (smallest>dictEnd)) smallest = dictEnd;
+    LZ4_renormDictT(streamPtr, smallest);
+    if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
+
+    /* Check overlapping input/dictionary space */
+    {
+        const BYTE* sourceEnd = (const BYTE*) source + inputSize;
+        if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd))
+        {
+            streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+            if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
+            if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
+            streamPtr->dictionary = dictEnd - streamPtr->dictSize;
+        }
+    }
+
+    /* prefix mode : source data follows dictionary */
+    if (dictEnd == (const BYTE*)source)
+    {
+        int result;
+        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
+            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, dictSmall, acceleration);
+        else
+            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, noDictIssue, acceleration);
+        streamPtr->dictSize += (U32)inputSize;
+        streamPtr->currentOffset += (U32)inputSize;
+        return result;
+    }
+
+    /* external dictionary mode */
+    {
+        int result;
+        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
+            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, dictSmall, acceleration);
+        else
+            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, noDictIssue, acceleration);
+        streamPtr->dictionary = (const BYTE*)source;
+        streamPtr->dictSize = (U32)inputSize;
+        streamPtr->currentOffset += (U32)inputSize;
+        return result;
+    }
+}
+
+
+/* Hidden debug function, to force external dictionary mode */
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int inputSize)
+{
+    LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_dict;
+    int result;
+    const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+
+    const BYTE* smallest = dictEnd;
+    if (smallest > (const BYTE*) source) smallest = (const BYTE*) source;
+    LZ4_renormDictT((LZ4_stream_t_internal*)LZ4_dict, smallest);
+
+    result = LZ4_compress_generic(LZ4_dict, source, dest, inputSize, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
+
+    streamPtr->dictionary = (const BYTE*)source;
+    streamPtr->dictSize = (U32)inputSize;
+    streamPtr->currentOffset += (U32)inputSize;
+
+    return result;
+}
+
+
+int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
+{
+    LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict;
+    const BYTE* previousDictEnd = dict->dictionary + dict->dictSize;
+
+    if ((U32)dictSize > 64 KB) dictSize = 64 KB;   /* useless to define a dictionary > 64 KB */
+    if ((U32)dictSize > dict->dictSize) dictSize = dict->dictSize;
+
+    memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+
+    dict->dictionary = (const BYTE*)safeBuffer;
+    dict->dictSize = (U32)dictSize;
+
+    return dictSize;
+}
+
+
+
+/*******************************
+*  Decompression functions
+*******************************/
+/*
+ * This generic decompression function cover all use cases.
+ * It shall be instantiated several times, using different sets of directives
+ * Note that it is essential this generic function is really inlined,
+ * in order to remove useless branches during compilation optimization.
+ */
+FORCE_INLINE int LZ4_decompress_generic(
+                 const char* const source,
+                 char* const dest,
+                 int inputSize,
+                 int outputSize,         /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */
+
+                 int endOnInput,         /* endOnOutputSize, endOnInputSize */
+                 int partialDecoding,    /* full, partial */
+                 int targetOutputSize,   /* only used if partialDecoding==partial */
+                 int dict,               /* noDict, withPrefix64k, usingExtDict */
+                 const BYTE* const lowPrefix,  /* == dest if dict == noDict */
+                 const BYTE* const dictStart,  /* only if dict==usingExtDict */
+                 const size_t dictSize         /* note : = 0 if noDict */
+                 )
+{
+    /* Local Variables */
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* const iend = ip + inputSize;
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + outputSize;
+    BYTE* cpy;
+    BYTE* oexit = op + targetOutputSize;
+    const BYTE* const lowLimit = lowPrefix - dictSize;
+
+    const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
+    const unsigned dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4};
+    const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+
+    const int safeDecode = (endOnInput==endOnInputSize);
+    const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
+    const int inPlaceDecode = ((ip >= op) && (ip < oend));
+
+
+    /* Special cases */
+    if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT;                         /* targetOutputSize too high => decode everything */
+    if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1;  /* Empty output buffer */
+    if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1);
+
+
+    /* Main Loop */
+    while (1)
+    {
+        unsigned token;
+        size_t length;
+        const BYTE* match;
+        size_t offset;
+
+        if (unlikely((inPlaceDecode) && (op + WILDCOPYLENGTH > ip))) goto _output_error;   /* output stream ran over input stream */
+
+        /* get literal length */
+        token = *ip++;
+        if ((length=(token>>ML_BITS)) == RUN_MASK)
+        {
+            unsigned s;
+            do
+            {
+                s = *ip++;
+                length += s;
+            }
+            while ( likely(endOnInput ? ip<iend-RUN_MASK : 1) && (s==255) );
+            if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)(op))) goto _output_error;   /* overflow detection */
+            if ((safeDecode) && unlikely((size_t)(ip+length)<(size_t)(ip))) goto _output_error;   /* overflow detection */
+        }
+
+        /* copy literals */
+        cpy = op+length;
+        if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
+            || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)))
+        {
+            if (partialDecoding)
+            {
+                if (cpy > oend) goto _output_error;                           /* Error : write attempt beyond end of output buffer */
+                if ((endOnInput) && (ip+length > iend)) goto _output_error;   /* Error : read attempt beyond end of input buffer */
+            }
+            else
+            {
+                if ((!endOnInput) && (cpy != oend)) goto _output_error;       /* Error : block decoding must stop exactly there */
+                if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   /* Error : input must be consumed */
+            }
+            memmove(op, ip, length);
+            ip += length;
+            op += length;
+            break;     /* Necessarily EOF, due to parsing restrictions */
+        }
+        LZ4_wildCopy(op, ip, cpy);
+        ip += length; op = cpy;
+
+        /* get offset */
+        offset = LZ4_readLE16(ip); ip+=2;
+        match = op - offset;
+        if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error;   /* Error : offset outside buffers */
+
+        /* get matchlength */
+        length = token & ML_MASK;
+        if (length == ML_MASK)
+        {
+            unsigned s;
+            do
+            {
+                if ((endOnInput) && (ip > iend-LASTLITERALS)) goto _output_error;
+                s = *ip++;
+                length += s;
+            } while (s==255);
+            if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)op)) goto _output_error;   /* overflow detection */
+        }
+        length += MINMATCH;
+
+        /* check external dictionary */
+        if ((dict==usingExtDict) && (match < lowPrefix))
+        {
+            if (unlikely(op+length > oend-LASTLITERALS)) goto _output_error;   /* doesn't respect parsing restriction */
+
+            if (length <= (size_t)(lowPrefix-match))
+            {
+                /* match can be copied as a single segment from external dictionary */
+                match = dictEnd - (lowPrefix-match);
+                memmove(op, match, length); op += length;
+            }
+            else
+            {
+                /* match encompass external dictionary and current block */
+                size_t copySize = (size_t)(lowPrefix-match);
+                memcpy(op, dictEnd - copySize, copySize);
+                op += copySize;
+                copySize = length - copySize;
+                if (copySize > (size_t)(op-lowPrefix))   /* overlap copy */
+                {
+                    BYTE* const endOfMatch = op + copySize;
+                    const BYTE* copyFrom = lowPrefix;
+                    while (op < endOfMatch) *op++ = *copyFrom++;
+                }
+                else
+                {
+                    memcpy(op, lowPrefix, copySize);
+                    op += copySize;
+                }
+            }
+            continue;
+        }
+
+        /* copy match within block */
+        cpy = op + length;
+        if (unlikely(offset<8))
+        {
+            const int dec64 = dec64table[offset];
+            op[0] = match[0];
+            op[1] = match[1];
+            op[2] = match[2];
+            op[3] = match[3];
+            match += dec32table[offset];
+            memcpy(op+4, match, 4);
+            match -= dec64;
+        } else { LZ4_copy8(op, match); match+=8; }
+        op += 8;
+
+        if (unlikely(cpy>oend-12))
+        {
+            BYTE* const oCopyLimit = oend-(WILDCOPYLENGTH-1);
+            if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
+            if (op < oCopyLimit)
+            {
+                LZ4_wildCopy(op, match, oCopyLimit);
+                match += oCopyLimit - op;
+                op = oCopyLimit;
+            }
+            while (op<cpy) *op++ = *match++;
+        }
+        else
+            LZ4_wildCopy(op, match, cpy);
+        op=cpy;   /* correction */
+    }
+
+    /* end of decoding */
+    if (endOnInput)
+       return (int) (((char*)op)-dest);     /* Nb of output bytes decoded */
+    else
+       return (int) (((const char*)ip)-source);   /* Nb of input bytes read */
+
+    /* Overflow error detected */
+_output_error:
+    return (int) (-(((const char*)ip)-source))-1;
+}
+
+
+int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, full, 0, noDict, (BYTE*)dest, NULL, 0);
+}
+
+int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, partial, targetOutputSize, noDict, (BYTE*)dest, NULL, 0);
+}
+
+int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)(dest - 64 KB), NULL, 64 KB);
+}
+
+
+/* streaming decompression functions */
+
+typedef struct
+{
+    const BYTE* externalDict;
+    size_t extDictSize;
+    const BYTE* prefixEnd;
+    size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+
+/*
+ * If you prefer dynamic allocation methods,
+ * LZ4_createStreamDecode()
+ * provides a pointer (void*) towards an initialized LZ4_streamDecode_t structure.
+ */
+LZ4_streamDecode_t* LZ4_createStreamDecode(void)
+{
+    LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOCATOR(1, sizeof(LZ4_streamDecode_t));
+    return lz4s;
+}
+
+int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
+{
+    FREEMEM(LZ4_stream);
+    return 0;
+}
+
+/*
+ * LZ4_setStreamDecode
+ * Use this function to instruct where to find the dictionary
+ * This function is not necessary if previous data is still available where it was decoded.
+ * Loading a size of 0 is allowed (same effect as no dictionary).
+ * Return : 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
+    lz4sd->prefixSize = (size_t) dictSize;
+    lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize;
+    lz4sd->externalDict = NULL;
+    lz4sd->extDictSize  = 0;
+    return 1;
+}
+
+/*
+*_continue() :
+    These decoding functions allow decompression of multiple blocks in "streaming" mode.
+    Previously decoded blocks must still be available at the memory position where they were decoded.
+    If it's not possible, save the relevant part of decoded data into a safe buffer,
+    and indicate where it stands using LZ4_setStreamDecode()
+*/
+int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
+    int result;
+
+    if (lz4sd->prefixEnd == (BYTE*)dest)
+    {
+        result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                        endOnInputSize, full, 0,
+                                        usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize += result;
+        lz4sd->prefixEnd  += result;
+    }
+    else
+    {
+        lz4sd->extDictSize = lz4sd->prefixSize;
+        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+        result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                        endOnInputSize, full, 0,
+                                        usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = result;
+        lz4sd->prefixEnd  = (BYTE*)dest + result;
+    }
+
+    return result;
+}
+
+int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
+    int result;
+
+    if (lz4sd->prefixEnd == (BYTE*)dest)
+    {
+        result = LZ4_decompress_generic(source, dest, 0, originalSize,
+                                        endOnOutputSize, full, 0,
+                                        usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize += originalSize;
+        lz4sd->prefixEnd  += originalSize;
+    }
+    else
+    {
+        lz4sd->extDictSize = lz4sd->prefixSize;
+        lz4sd->externalDict = (BYTE*)dest - lz4sd->extDictSize;
+        result = LZ4_decompress_generic(source, dest, 0, originalSize,
+                                        endOnOutputSize, full, 0,
+                                        usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = originalSize;
+        lz4sd->prefixEnd  = (BYTE*)dest + originalSize;
+    }
+
+    return result;
+}
+
+
+/*
+Advanced decoding functions :
+*_usingDict() :
+    These decoding functions work the same as "_continue" ones,
+    the dictionary must be explicitly provided within parameters
+*/
+
+FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char* dest, int compressedSize, int maxOutputSize, int safe, const char* dictStart, int dictSize)
+{
+    if (dictSize==0)
+        return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest, NULL, 0);
+    if (dictStart+dictSize == dest)
+    {
+        if (dictSize >= (int)(64 KB - 1))
+            return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, withPrefix64k, (BYTE*)dest-64 KB, NULL, 0);
+        return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest-dictSize, NULL, 0);
+    }
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
+{
+    return LZ4_decompress_usingDict_generic(source, dest, compressedSize, maxOutputSize, 1, dictStart, dictSize);
+}
+
+int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
+{
+    return LZ4_decompress_usingDict_generic(source, dest, 0, originalSize, 0, dictStart, dictSize);
+}
+
+/* debug function */
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+
+/***************************************************
+*  Obsolete Functions
+***************************************************/
+/* obsolete compression functions */
+int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) { return LZ4_compress_default(source, dest, inputSize, maxOutputSize); }
+int LZ4_compress(const char* source, char* dest, int inputSize) { return LZ4_compress_default(source, dest, inputSize, LZ4_compressBound(inputSize)); }
+int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1); }
+int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1); }
+int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, maxDstSize, 1); }
+int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize) { return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1); }
+
+/*
+These function names are deprecated and should no longer be used.
+They are only provided here for compatibility with older user programs.
+- LZ4_uncompress is totally equivalent to LZ4_decompress_fast
+- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
+*/
+int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); }
+int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); }
+
+
+/* Obsolete Streaming functions */
+
+int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; }
+
+static void LZ4_init(LZ4_stream_t_internal* lz4ds, BYTE* base)
+{
+    MEM_INIT(lz4ds, 0, LZ4_STREAMSIZE);
+    lz4ds->bufferStart = base;
+}
+
+int LZ4_resetStreamState(void* state, char* inputBuffer)
+{
+    if ((((size_t)state) & 3) != 0) return 1;   /* Error : pointer is not aligned on 4-bytes boundary */
+    LZ4_init((LZ4_stream_t_internal*)state, (BYTE*)inputBuffer);
+    return 0;
+}
+
+void* LZ4_create (char* inputBuffer)
+{
+    void* lz4ds = ALLOCATOR(8, LZ4_STREAMSIZE_U64);
+    LZ4_init ((LZ4_stream_t_internal*)lz4ds, (BYTE*)inputBuffer);
+    return lz4ds;
+}
+
+char* LZ4_slideInputBuffer (void* LZ4_Data)
+{
+    LZ4_stream_t_internal* ctx = (LZ4_stream_t_internal*)LZ4_Data;
+    int dictSize = LZ4_saveDict((LZ4_stream_t*)LZ4_Data, (char*)ctx->bufferStart, 64 KB);
+    return (char*)(ctx->bufferStart + dictSize);
+}
+
+/* Obsolete streaming decompression functions */
+
+int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB);
+}
+
+int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB);
+}
+
+#endif   /* LZ4_COMMONDEFS_ONLY */
+
diff --git a/util/cbfstool/lz4/lib/lz4.h b/util/cbfstool/lz4/lib/lz4.h
new file mode 100644
index 0000000000..96e25a6615
--- /dev/null
+++ b/util/cbfstool/lz4/lib/lz4.h
@@ -0,0 +1,360 @@
+/*
+   LZ4 - Fast LZ compression algorithm
+   Header File
+   Copyright (C) 2011-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : https://github.com/Cyan4973/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * lz4.h provides block compression functions, and gives full buffer control to programmer.
+ * If you need to generate inter-operable compressed data (respecting LZ4 frame specification),
+ * and can let the library handle its own memory, please use lz4frame.h instead.
+*/
+
+/**************************************
+*  Version
+**************************************/
+#define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
+#define LZ4_VERSION_MINOR    7    /* for new (non-breaking) interface capabilities */
+#define LZ4_VERSION_RELEASE  2    /* for tweaks, bug-fixes, or development */
+#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
+int LZ4_versionNumber (void);
+
+/**************************************
+*  Tuning parameter
+**************************************/
+/*
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio
+ * Reduced memory usage can improve speed, due to cache effect
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+ */
+#define LZ4_MEMORY_USAGE 14
+
+
+/**************************************
+*  Simple Functions
+**************************************/
+
+int LZ4_compress_default(const char* source, char* dest, int sourceSize, int maxDestSize);
+int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize);
+
+/*
+LZ4_compress_default() :
+    Compresses 'sourceSize' bytes from buffer 'source'
+    into already allocated 'dest' buffer of size 'maxDestSize'.
+    Compression is guaranteed to succeed if 'maxDestSize' >= LZ4_compressBound(sourceSize).
+    It also runs faster, so it's a recommended setting.
+    If the function cannot compress 'source' into a more limited 'dest' budget,
+    compression stops *immediately*, and the function result is zero.
+    As a consequence, 'dest' content is not valid.
+    This function never writes outside 'dest' buffer, nor read outside 'source' buffer.
+        sourceSize  : Max supported value is LZ4_MAX_INPUT_VALUE
+        maxDestSize : full or partial size of buffer 'dest' (which must be already allocated)
+        return : the number of bytes written into buffer 'dest' (necessarily <= maxOutputSize)
+              or 0 if compression fails
+
+LZ4_decompress_safe() :
+    compressedSize : is the precise full size of the compressed block.
+    maxDecompressedSize : is the size of destination buffer, which must be already allocated.
+    return : the number of bytes decompressed into destination buffer (necessarily <= maxDecompressedSize)
+             If destination buffer is not large enough, decoding will stop and output an error code (<0).
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             This function is protected against buffer overflow exploits, including malicious data packets.
+             It never writes outside output buffer, nor reads outside input buffer.
+*/
+
+
+/**************************************
+*  Advanced Functions
+**************************************/
+#define LZ4_MAX_INPUT_SIZE        0x7E000000   /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize)  ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
+
+/*
+LZ4_compressBound() :
+    Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
+    This function is primarily useful for memory allocation purposes (destination buffer size).
+    Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
+    Note that LZ4_compress_default() compress faster when dest buffer size is >= LZ4_compressBound(srcSize)
+        inputSize  : max supported value is LZ4_MAX_INPUT_SIZE
+        return : maximum output size in a "worst case" scenario
+              or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
+*/
+int LZ4_compressBound(int inputSize);
+
+/*
+LZ4_compress_fast() :
+    Same as LZ4_compress_default(), but allows to select an "acceleration" factor.
+    The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
+    It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
+    An acceleration value of "1" is the same as regular LZ4_compress_default()
+    Values <= 0 will be replaced by ACCELERATION_DEFAULT (see lz4.c), which is 1.
+*/
+int LZ4_compress_fast (const char* source, char* dest, int sourceSize, int maxDestSize, int acceleration);
+
+
+/*
+LZ4_compress_fast_extState() :
+    Same compression function, just using an externally allocated memory space to store compression state.
+    Use LZ4_sizeofState() to know how much memory must be allocated,
+    and allocate it on 8-bytes boundaries (using malloc() typically).
+    Then, provide it as 'void* state' to compression function.
+*/
+int LZ4_sizeofState(void);
+int LZ4_compress_fast_extState (void* state, const char* source, char* dest, int inputSize, int maxDestSize, int acceleration);
+
+
+/*
+LZ4_compress_destSize() :
+    Reverse the logic, by compressing as much data as possible from 'source' buffer
+    into already allocated buffer 'dest' of size 'targetDestSize'.
+    This function either compresses the entire 'source' content into 'dest' if it's large enough,
+    or fill 'dest' buffer completely with as much data as possible from 'source'.
+        *sourceSizePtr : will be modified to indicate how many bytes where read from 'source' to fill 'dest'.
+                         New value is necessarily <= old value.
+        return : Nb bytes written into 'dest' (necessarily <= targetDestSize)
+              or 0 if compression fails
+*/
+int LZ4_compress_destSize (const char* source, char* dest, int* sourceSizePtr, int targetDestSize);
+
+
+/*
+LZ4_decompress_fast() :
+    originalSize : is the original and therefore uncompressed size
+    return : the number of bytes read from the source buffer (in other words, the compressed size)
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             Destination buffer must be already allocated. Its size must be a minimum of 'originalSize' bytes.
+    note : This function fully respect memory boundaries for properly formed compressed data.
+           It is a bit faster than LZ4_decompress_safe().
+           However, it does not provide any protection against intentionally modified data stream (malicious input).
+           Use this function in trusted environment only (data to decode comes from a trusted source).
+*/
+int LZ4_decompress_fast (const char* source, char* dest, int originalSize);
+
+/*
+LZ4_decompress_safe_partial() :
+    This function decompress a compressed block of size 'compressedSize' at position 'source'
+    into destination buffer 'dest' of size 'maxDecompressedSize'.
+    The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached,
+    reducing decompression time.
+    return : the number of bytes decoded in the destination buffer (necessarily <= maxDecompressedSize)
+       Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller.
+             Always control how many bytes were decoded.
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
+*/
+int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize);
+
+
+/***********************************************
+*  Streaming Compression Functions
+***********************************************/
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U64 * sizeof(long long))
+/*
+ * LZ4_stream_t
+ * information structure to track an LZ4 stream.
+ * important : init this structure content before first use !
+ * note : only allocated directly the structure if you are statically linking LZ4
+ *        If you are using liblz4 as a DLL, please use below construction methods instead.
+ */
+typedef struct { long long table[LZ4_STREAMSIZE_U64]; } LZ4_stream_t;
+
+/*
+ * LZ4_resetStream
+ * Use this function to init an allocated LZ4_stream_t structure
+ */
+void LZ4_resetStream (LZ4_stream_t* streamPtr);
+
+/*
+ * LZ4_createStream will allocate and initialize an LZ4_stream_t structure
+ * LZ4_freeStream releases its memory.
+ * In the context of a DLL (liblz4), please use these methods rather than the static struct.
+ * They are more future proof, in case of a change of LZ4_stream_t size.
+ */
+LZ4_stream_t* LZ4_createStream(void);
+int           LZ4_freeStream (LZ4_stream_t* streamPtr);
+
+/*
+ * LZ4_loadDict
+ * Use this function to load a static dictionary into LZ4_stream.
+ * Any previous data will be forgotten, only 'dictionary' will remain in memory.
+ * Loading a size of 0 is allowed.
+ * Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
+
+/*
+ * LZ4_compress_fast_continue
+ * Compress buffer content 'src', using data from previously compressed blocks as dictionary to improve compression ratio.
+ * Important : Previous data blocks are assumed to still be present and unmodified !
+ * 'dst' buffer must be already allocated.
+ * If maxDstSize >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
+ * If not, and if compressed data cannot fit into 'dst' buffer size, compression stops, and function returns a zero.
+ */
+int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int maxDstSize, int acceleration);
+
+/*
+ * LZ4_saveDict
+ * If previously compressed data block is not guaranteed to remain available at its memory location
+ * save it into a safer place (char* safeBuffer)
+ * Note : you don't need to call LZ4_loadDict() afterwards,
+ *        dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue()
+ * Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error
+ */
+int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int dictSize);
+
+
+/************************************************
+*  Streaming Decompression Functions
+************************************************/
+
+#define LZ4_STREAMDECODESIZE_U64  4
+#define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
+typedef struct { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; } LZ4_streamDecode_t;
+/*
+ * LZ4_streamDecode_t
+ * information structure to track an LZ4 stream.
+ * init this structure content using LZ4_setStreamDecode or memset() before first use !
+ *
+ * In the context of a DLL (liblz4) please prefer usage of construction methods below.
+ * They are more future proof, in case of a change of LZ4_streamDecode_t size in the future.
+ * LZ4_createStreamDecode will allocate and initialize an LZ4_streamDecode_t structure
+ * LZ4_freeStreamDecode releases its memory.
+ */
+LZ4_streamDecode_t* LZ4_createStreamDecode(void);
+int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
+
+/*
+ * LZ4_setStreamDecode
+ * Use this function to instruct where to find the dictionary.
+ * Setting a size of 0 is allowed (same effect as reset).
+ * Return : 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
+
+/*
+*_continue() :
+    These decoding functions allow decompression of multiple blocks in "streaming" mode.
+    Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB)
+    In the case of a ring buffers, decoding buffer must be either :
+    - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions)
+      In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB).
+    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+      maxBlockSize is implementation dependent. It's the maximum size you intend to compress into a single block.
+      In which case, encoding and decoding buffers do not need to be synchronized,
+      and encoding ring buffer can have any size, including small ones ( < 64 KB).
+    - _At least_ 64 KB + 8 bytes + maxBlockSize.
+      In which case, encoding and decoding buffers do not need to be synchronized,
+      and encoding ring buffer can have any size, including larger than decoding buffer.
+    Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer,
+    and indicate where it is saved using LZ4_setStreamDecode()
+*/
+int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxDecompressedSize);
+int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize);
+
+
+/*
+Advanced decoding functions :
+*_usingDict() :
+    These decoding functions work the same as
+    a combination of LZ4_setStreamDecode() followed by LZ4_decompress_x_continue()
+    They are stand-alone. They don't need nor update an LZ4_streamDecode_t structure.
+*/
+int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize);
+int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize);
+
+
+/**************************************
+*  Obsolete Functions
+**************************************/
+/* Deprecate Warnings */
+/* Should these warnings messages be a problem,
+   it is generally possible to disable them,
+   with -Wno-deprecated-declarations for gcc
+   or _CRT_SECURE_NO_WARNINGS in Visual for example.
+   Otherwise, you can also define LZ4_DISABLE_DEPRECATE_WARNINGS */
+#define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
+#  define LZ4_DEPRECATED()   /* disable deprecation warnings */
+#else
+#  if (LZ4_GCC_VERSION >= 405) || defined(__clang__)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
+#  elif (LZ4_GCC_VERSION >= 301)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
+#  elif defined(_MSC_VER)
+#    define LZ4_DEPRECATED(message) __declspec(deprecated(message))
+#  else
+#    pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
+#    define LZ4_DEPRECATED(message)
+#  endif
+#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
+
+/* Obsolete compression functions */
+/* These functions will generate warnings in a future release */
+int LZ4_compress               (const char* source, char* dest, int sourceSize);
+int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
+int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
+int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
+int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/* Obsolete decompression functions */
+/* These function names are completely deprecated and must no longer be used.
+   They are only provided in lz4.c for compatibility with older programs.
+    - LZ4_uncompress is the same as LZ4_decompress_fast
+    - LZ4_uncompress_unknownOutputSize is the same as LZ4_decompress_safe
+   These function prototypes are now disabled; uncomment them only if you really need them.
+   It is highly recommended to stop using these prototypes and migrate to maintained ones */
+/* int LZ4_uncompress (const char* source, char* dest, int outputSize); */
+/* int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); */
+
+/* Obsolete streaming functions; use new streaming interface whenever possible */
+LZ4_DEPRECATED("use LZ4_createStream() instead") void* LZ4_create (char* inputBuffer);
+LZ4_DEPRECATED("use LZ4_createStream() instead") int   LZ4_sizeofStreamState(void);
+LZ4_DEPRECATED("use LZ4_resetStream() instead")  int   LZ4_resetStreamState(void* state, char* inputBuffer);
+LZ4_DEPRECATED("use LZ4_saveDict() instead")     char* LZ4_slideInputBuffer (void* state);
+
+/* Obsolete streaming decoding functions */
+LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
+LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/util/cbfstool/lz4/lib/lz4frame.c b/util/cbfstool/lz4/lib/lz4frame.c
new file mode 100644
index 0000000000..e5458bb9e4
--- /dev/null
+++ b/util/cbfstool/lz4/lib/lz4frame.c
@@ -0,0 +1,1479 @@
+/*
+LZ4 auto-framing library
+Copyright (C) 2011-2015, Yann Collet.
+
+BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+You can contact the author at :
+- LZ4 source repository : https://github.com/Cyan4973/lz4
+- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* LZ4F is a stand-alone API to create LZ4-compressed Frames
+*  in full conformance with specification v1.5.0
+*  All related operations, including memory management, are handled by the library.
+* */
+
+
+/**************************************
+*  Compiler Options
+**************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#endif
+
+
+/**************************************
+*  Memory routines
+**************************************/
+#include <stdlib.h>   /* malloc, calloc, free */
+#define ALLOCATOR(s)   calloc(1,s)
+#define FREEMEM        free
+#include <string.h>   /* memset, memcpy, memmove */
+#define MEM_INIT       memset
+
+
+/**************************************
+*  Includes
+**************************************/
+#include "lz4frame_static.h"
+#include "lz4.h"
+#include "lz4hc.h"
+#include "xxhash.h"
+
+
+/**************************************
+*  Basic Types
+**************************************/
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
+# include <stdint.h>
+typedef  uint8_t BYTE;
+typedef uint16_t U16;
+typedef uint32_t U32;
+typedef  int32_t S32;
+typedef uint64_t U64;
+#else
+typedef unsigned char       BYTE;
+typedef unsigned short      U16;
+typedef unsigned int        U32;
+typedef   signed int        S32;
+typedef unsigned long long  U64;
+#endif
+
+
+/**************************************
+*  Constants
+**************************************/
+#define KB *(1<<10)
+#define MB *(1<<20)
+#define GB *(1<<30)
+
+#define _1BIT  0x01
+#define _2BITS 0x03
+#define _3BITS 0x07
+#define _4BITS 0x0F
+#define _8BITS 0xFF
+
+#define LZ4F_MAGIC_SKIPPABLE_START 0x184D2A50U
+#define LZ4F_MAGICNUMBER 0x184D2204U
+#define LZ4F_BLOCKUNCOMPRESSED_FLAG 0x80000000U
+#define LZ4F_BLOCKSIZEID_DEFAULT LZ4F_max64KB
+
+static const size_t minFHSize = 7;
+static const size_t maxFHSize = 15;
+static const size_t BHSize = 4;
+static const int    minHClevel = 3;
+
+
+/**************************************
+*  Structures and local types
+**************************************/
+typedef struct LZ4F_cctx_s
+{
+    LZ4F_preferences_t prefs;
+    U32    version;
+    U32    cStage;
+    size_t maxBlockSize;
+    size_t maxBufferSize;
+    BYTE*  tmpBuff;
+    BYTE*  tmpIn;
+    size_t tmpInSize;
+    U64    totalInSize;
+    XXH32_state_t xxh;
+    void*  lz4CtxPtr;
+    U32    lz4CtxLevel;     /* 0: unallocated;  1: LZ4_stream_t;  3: LZ4_streamHC_t */
+} LZ4F_cctx_t;
+
+typedef struct LZ4F_dctx_s
+{
+    LZ4F_frameInfo_t frameInfo;
+    U32    version;
+    U32    dStage;
+    U64    frameRemainingSize;
+    size_t maxBlockSize;
+    size_t maxBufferSize;
+    const BYTE* srcExpect;
+    BYTE*  tmpIn;
+    size_t tmpInSize;
+    size_t tmpInTarget;
+    BYTE*  tmpOutBuffer;
+    const BYTE*  dict;
+    size_t dictSize;
+    BYTE*  tmpOut;
+    size_t tmpOutSize;
+    size_t tmpOutStart;
+    XXH32_state_t xxh;
+    BYTE   header[16];
+} LZ4F_dctx_t;
+
+
+/**************************************
+*  Error management
+**************************************/
+#define LZ4F_GENERATE_STRING(STRING) #STRING,
+static const char* LZ4F_errorStrings[] = { LZ4F_LIST_ERRORS(LZ4F_GENERATE_STRING) };
+
+
+unsigned LZ4F_isError(LZ4F_errorCode_t code)
+{
+    return (code > (LZ4F_errorCode_t)(-LZ4F_ERROR_maxCode));
+}
+
+const char* LZ4F_getErrorName(LZ4F_errorCode_t code)
+{
+    static const char* codeError = "Unspecified error code";
+    if (LZ4F_isError(code)) return LZ4F_errorStrings[-(int)(code)];
+    return codeError;
+}
+
+
+/**************************************
+*  Private functions
+**************************************/
+static size_t LZ4F_getBlockSize(unsigned blockSizeID)
+{
+    static const size_t blockSizes[4] = { 64 KB, 256 KB, 1 MB, 4 MB };
+
+    if (blockSizeID == 0) blockSizeID = LZ4F_BLOCKSIZEID_DEFAULT;
+    blockSizeID -= 4;
+    if (blockSizeID > 3) return (size_t)-LZ4F_ERROR_maxBlockSize_invalid;
+    return blockSizes[blockSizeID];
+}
+
+
+/* unoptimized version; solves endianess & alignment issues */
+static U32 LZ4F_readLE32 (const BYTE* srcPtr)
+{
+    U32 value32 = srcPtr[0];
+    value32 += (srcPtr[1]<<8);
+    value32 += (srcPtr[2]<<16);
+    value32 += ((U32)srcPtr[3])<<24;
+    return value32;
+}
+
+static void LZ4F_writeLE32 (BYTE* dstPtr, U32 value32)
+{
+    dstPtr[0] = (BYTE)value32;
+    dstPtr[1] = (BYTE)(value32 >> 8);
+    dstPtr[2] = (BYTE)(value32 >> 16);
+    dstPtr[3] = (BYTE)(value32 >> 24);
+}
+
+static U64 LZ4F_readLE64 (const BYTE* srcPtr)
+{
+    U64 value64 = srcPtr[0];
+    value64 += ((U64)srcPtr[1]<<8);
+    value64 += ((U64)srcPtr[2]<<16);
+    value64 += ((U64)srcPtr[3]<<24);
+    value64 += ((U64)srcPtr[4]<<32);
+    value64 += ((U64)srcPtr[5]<<40);
+    value64 += ((U64)srcPtr[6]<<48);
+    value64 += ((U64)srcPtr[7]<<56);
+    return value64;
+}
+
+static void LZ4F_writeLE64 (BYTE* dstPtr, U64 value64)
+{
+    dstPtr[0] = (BYTE)value64;
+    dstPtr[1] = (BYTE)(value64 >> 8);
+    dstPtr[2] = (BYTE)(value64 >> 16);
+    dstPtr[3] = (BYTE)(value64 >> 24);
+    dstPtr[4] = (BYTE)(value64 >> 32);
+    dstPtr[5] = (BYTE)(value64 >> 40);
+    dstPtr[6] = (BYTE)(value64 >> 48);
+    dstPtr[7] = (BYTE)(value64 >> 56);
+}
+
+
+static BYTE LZ4F_headerChecksum (const void* header, size_t length)
+{
+    U32 xxh = XXH32(header, length, 0);
+    return (BYTE)(xxh >> 8);
+}
+
+
+/**************************************
+*  Simple compression functions
+**************************************/
+static LZ4F_blockSizeID_t LZ4F_optimalBSID(const LZ4F_blockSizeID_t requestedBSID, const size_t srcSize)
+{
+    LZ4F_blockSizeID_t proposedBSID = LZ4F_max64KB;
+    size_t maxBlockSize = 64 KB;
+    while (requestedBSID > proposedBSID)
+    {
+        if (srcSize <= maxBlockSize)
+            return proposedBSID;
+        proposedBSID = (LZ4F_blockSizeID_t)((int)proposedBSID + 1);
+        maxBlockSize <<= 2;
+    }
+    return requestedBSID;
+}
+
+
+size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr)
+{
+    LZ4F_preferences_t prefs;
+    size_t headerSize;
+    size_t streamSize;
+
+    if (preferencesPtr!=NULL) prefs = *preferencesPtr;
+    else memset(&prefs, 0, sizeof(prefs));
+
+    prefs.frameInfo.blockSizeID = LZ4F_optimalBSID(prefs.frameInfo.blockSizeID, srcSize);
+    prefs.autoFlush = 1;
+
+    headerSize = maxFHSize;      /* header size, including magic number and frame content size*/
+    streamSize = LZ4F_compressBound(srcSize, &prefs);
+
+    return headerSize + streamSize;
+}
+
+
+/* LZ4F_compressFrame()
+* Compress an entire srcBuffer into a valid LZ4 frame, as defined by specification v1.5.0, in a single step.
+* The most important rule is that dstBuffer MUST be large enough (dstMaxSize) to ensure compression completion even in worst case.
+* You can get the minimum value of dstMaxSize by using LZ4F_compressFrameBound()
+* If this condition is not respected, LZ4F_compressFrame() will fail (result is an errorCode)
+* The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will then be set to default.
+* The result of the function is the number of bytes written into dstBuffer.
+* The function outputs an error code if it fails (can be tested using LZ4F_isError())
+*/
+size_t LZ4F_compressFrame(void* dstBuffer, size_t dstMaxSize, const void* srcBuffer, size_t srcSize, const LZ4F_preferences_t* preferencesPtr)
+{
+    LZ4F_cctx_t cctxI;
+    LZ4_stream_t lz4ctx;
+    LZ4F_preferences_t prefs;
+    LZ4F_compressOptions_t options;
+    LZ4F_errorCode_t errorCode;
+    BYTE* const dstStart = (BYTE*) dstBuffer;
+    BYTE* dstPtr = dstStart;
+    BYTE* const dstEnd = dstStart + dstMaxSize;
+
+    memset(&cctxI, 0, sizeof(cctxI));   /* works because no allocation */
+    memset(&options, 0, sizeof(options));
+
+    cctxI.version = LZ4F_VERSION;
+    cctxI.maxBufferSize = 5 MB;   /* mess with real buffer size to prevent allocation; works because autoflush==1 & stableSrc==1 */
+
+    if (preferencesPtr!=NULL)
+        prefs = *preferencesPtr;
+    else
+        memset(&prefs, 0, sizeof(prefs));
+    if (prefs.frameInfo.contentSize != 0)
+        prefs.frameInfo.contentSize = (U64)srcSize;   /* auto-correct content size if selected (!=0) */
+
+    if (prefs.compressionLevel < (int)minHClevel)
+    {
+        cctxI.lz4CtxPtr = &lz4ctx;
+        cctxI.lz4CtxLevel = 1;
+    }
+
+    prefs.frameInfo.blockSizeID = LZ4F_optimalBSID(prefs.frameInfo.blockSizeID, srcSize);
+    prefs.autoFlush = 1;
+    if (srcSize <= LZ4F_getBlockSize(prefs.frameInfo.blockSizeID))
+        prefs.frameInfo.blockMode = LZ4F_blockIndependent;   /* no need for linked blocks */
+
+    options.stableSrc = 1;
+
+    if (dstMaxSize < LZ4F_compressFrameBound(srcSize, &prefs))
+        return (size_t)-LZ4F_ERROR_dstMaxSize_tooSmall;
+
+    errorCode = LZ4F_compressBegin(&cctxI, dstBuffer, dstMaxSize, &prefs);  /* write header */
+    if (LZ4F_isError(errorCode)) return errorCode;
+    dstPtr += errorCode;   /* header size */
+
+    errorCode = LZ4F_compressUpdate(&cctxI, dstPtr, dstEnd-dstPtr, srcBuffer, srcSize, &options);
+    if (LZ4F_isError(errorCode)) return errorCode;
+    dstPtr += errorCode;
+
+    errorCode = LZ4F_compressEnd(&cctxI, dstPtr, dstEnd-dstPtr, &options);   /* flush last block, and generate suffix */
+    if (LZ4F_isError(errorCode)) return errorCode;
+    dstPtr += errorCode;
+
+    if (prefs.compressionLevel >= (int)minHClevel)   /* no allocation necessary with lz4 fast */
+        FREEMEM(cctxI.lz4CtxPtr);
+
+    return (dstPtr - dstStart);
+}
+
+
+/***********************************
+*  Advanced compression functions
+***********************************/
+
+/* LZ4F_createCompressionContext() :
+* The first thing to do is to create a compressionContext object, which will be used in all compression operations.
+* This is achieved using LZ4F_createCompressionContext(), which takes as argument a version and an LZ4F_preferences_t structure.
+* The version provided MUST be LZ4F_VERSION. It is intended to track potential version differences between different binaries.
+* The function will provide a pointer to an allocated LZ4F_compressionContext_t object.
+* If the result LZ4F_errorCode_t is not OK_NoError, there was an error during context creation.
+* Object can release its memory using LZ4F_freeCompressionContext();
+*/
+LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_compressionContext_t* LZ4F_compressionContextPtr, unsigned version)
+{
+    LZ4F_cctx_t* cctxPtr;
+
+    cctxPtr = (LZ4F_cctx_t*)ALLOCATOR(sizeof(LZ4F_cctx_t));
+    if (cctxPtr==NULL) return (LZ4F_errorCode_t)(-LZ4F_ERROR_allocation_failed);
+
+    cctxPtr->version = version;
+    cctxPtr->cStage = 0;   /* Next stage : write header */
+
+    *LZ4F_compressionContextPtr = (LZ4F_compressionContext_t)cctxPtr;
+
+    return LZ4F_OK_NoError;
+}
+
+
+LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_compressionContext_t LZ4F_compressionContext)
+{
+    LZ4F_cctx_t* cctxPtr = (LZ4F_cctx_t*)LZ4F_compressionContext;
+
+    if (cctxPtr != NULL)   /* null pointers can be safely provided to this function, like free() */
+    {
+       FREEMEM(cctxPtr->lz4CtxPtr);
+       FREEMEM(cctxPtr->tmpBuff);
+       FREEMEM(LZ4F_compressionContext);
+    }
+
+    return LZ4F_OK_NoError;
+}
+
+
+/* LZ4F_compressBegin() :
+* will write the frame header into dstBuffer.
+* dstBuffer must be large enough to accommodate a header (dstMaxSize). Maximum header size is LZ4F_MAXHEADERFRAME_SIZE bytes.
+* The result of the function is the number of bytes written into dstBuffer for the header
+* or an error code (can be tested using LZ4F_isError())
+*/
+size_t LZ4F_compressBegin(LZ4F_compressionContext_t compressionContext, void* dstBuffer, size_t dstMaxSize, const LZ4F_preferences_t* preferencesPtr)
+{
+    LZ4F_preferences_t prefNull;
+    LZ4F_cctx_t* cctxPtr = (LZ4F_cctx_t*)compressionContext;
+    BYTE* const dstStart = (BYTE*)dstBuffer;
+    BYTE* dstPtr = dstStart;
+    BYTE* headerStart;
+    size_t requiredBuffSize;
+
+    if (dstMaxSize < maxFHSize) return (size_t)-LZ4F_ERROR_dstMaxSize_tooSmall;
+    if (cctxPtr->cStage != 0) return (size_t)-LZ4F_ERROR_GENERIC;
+    memset(&prefNull, 0, sizeof(prefNull));
+    if (preferencesPtr == NULL) preferencesPtr = &prefNull;
+    cctxPtr->prefs = *preferencesPtr;
+
+    /* ctx Management */
+    {
+        U32 tableID = (cctxPtr->prefs.compressionLevel < minHClevel) ? 1 : 2;  /* 0:nothing ; 1:LZ4 table ; 2:HC tables */
+        if (cctxPtr->lz4CtxLevel < tableID)
+        {
+            FREEMEM(cctxPtr->lz4CtxPtr);
+            if (cctxPtr->prefs.compressionLevel < minHClevel)
+                cctxPtr->lz4CtxPtr = (void*)LZ4_createStream();
+            else
+                cctxPtr->lz4CtxPtr = (void*)LZ4_createStreamHC();
+            cctxPtr->lz4CtxLevel = tableID;
+        }
+    }
+
+    /* Buffer Management */
+    if (cctxPtr->prefs.frameInfo.blockSizeID == 0) cctxPtr->prefs.frameInfo.blockSizeID = LZ4F_BLOCKSIZEID_DEFAULT;
+    cctxPtr->maxBlockSize = LZ4F_getBlockSize(cctxPtr->prefs.frameInfo.blockSizeID);
+
+    requiredBuffSize = cctxPtr->maxBlockSize + ((cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) * 128 KB);
+    if (preferencesPtr->autoFlush)
+        requiredBuffSize = (cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) * 64 KB;   /* just needs dict */
+
+    if (cctxPtr->maxBufferSize < requiredBuffSize)
+    {
+        cctxPtr->maxBufferSize = requiredBuffSize;
+        FREEMEM(cctxPtr->tmpBuff);
+        cctxPtr->tmpBuff = (BYTE*)ALLOCATOR(requiredBuffSize);
+        if (cctxPtr->tmpBuff == NULL) return (size_t)-LZ4F_ERROR_allocation_failed;
+    }
+    cctxPtr->tmpIn = cctxPtr->tmpBuff;
+    cctxPtr->tmpInSize = 0;
+    XXH32_reset(&(cctxPtr->xxh), 0);
+    if (cctxPtr->prefs.compressionLevel < minHClevel)
+        LZ4_resetStream((LZ4_stream_t*)(cctxPtr->lz4CtxPtr));
+    else
+        LZ4_resetStreamHC((LZ4_streamHC_t*)(cctxPtr->lz4CtxPtr), cctxPtr->prefs.compressionLevel);
+
+    /* Magic Number */
+    LZ4F_writeLE32(dstPtr, LZ4F_MAGICNUMBER);
+    dstPtr += 4;
+    headerStart = dstPtr;
+
+    /* FLG Byte */
+    *dstPtr++ = (BYTE)(((1 & _2BITS) << 6)    /* Version('01') */
+        + ((cctxPtr->prefs.frameInfo.blockMode & _1BIT ) << 5)    /* Block mode */
+        + ((cctxPtr->prefs.frameInfo.contentChecksumFlag & _1BIT ) << 2)   /* Frame checksum */
+        + ((cctxPtr->prefs.frameInfo.contentSize > 0) << 3));   /* Frame content size */
+    /* BD Byte */
+    *dstPtr++ = (BYTE)((cctxPtr->prefs.frameInfo.blockSizeID & _3BITS) << 4);
+    /* Optional Frame content size field */
+    if (cctxPtr->prefs.frameInfo.contentSize)
+    {
+        LZ4F_writeLE64(dstPtr, cctxPtr->prefs.frameInfo.contentSize);
+        dstPtr += 8;
+        cctxPtr->totalInSize = 0;
+    }
+    /* CRC Byte */
+    *dstPtr = LZ4F_headerChecksum(headerStart, dstPtr - headerStart);
+    dstPtr++;
+
+    cctxPtr->cStage = 1;   /* header written, now request input data block */
+
+    return (dstPtr - dstStart);
+}
+
+
+/* LZ4F_compressBound() : gives the size of Dst buffer given a srcSize to handle worst case situations.
+*                        The LZ4F_frameInfo_t structure is optional :
+*                        you can provide NULL as argument, preferences will then be set to cover worst case situations.
+* */
+size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr)
+{
+    LZ4F_preferences_t prefsNull;
+    memset(&prefsNull, 0, sizeof(prefsNull));
+    prefsNull.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled;   /* worst case */
+    {
+        const LZ4F_preferences_t* prefsPtr = (preferencesPtr==NULL) ? &prefsNull : preferencesPtr;
+        LZ4F_blockSizeID_t bid = prefsPtr->frameInfo.blockSizeID;
+        size_t blockSize = LZ4F_getBlockSize(bid);
+        unsigned nbBlocks = (unsigned)(srcSize / blockSize) + 1;
+        size_t lastBlockSize = prefsPtr->autoFlush ? srcSize % blockSize : blockSize;
+        size_t blockInfo = 4;   /* default, without block CRC option */
+        size_t frameEnd = 4 + (prefsPtr->frameInfo.contentChecksumFlag*4);
+
+        return (blockInfo * nbBlocks) + (blockSize * (nbBlocks-1)) + lastBlockSize + frameEnd;;
+    }
+}
+
+
+typedef int (*compressFunc_t)(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level);
+
+static size_t LZ4F_compressBlock(void* dst, const void* src, size_t srcSize, compressFunc_t compress, void* lz4ctx, int level)
+{
+    /* compress one block */
+    BYTE* cSizePtr = (BYTE*)dst;
+    U32 cSize;
+    cSize = (U32)compress(lz4ctx, (const char*)src, (char*)(cSizePtr+4), (int)(srcSize), (int)(srcSize-1), level);
+    LZ4F_writeLE32(cSizePtr, cSize);
+    if (cSize == 0)   /* compression failed */
+    {
+        cSize = (U32)srcSize;
+        LZ4F_writeLE32(cSizePtr, cSize + LZ4F_BLOCKUNCOMPRESSED_FLAG);
+        memcpy(cSizePtr+4, src, srcSize);
+    }
+    return cSize + 4;
+}
+
+
+static int LZ4F_localLZ4_compress_limitedOutput_withState(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level)
+{
+    (void) level;
+    return LZ4_compress_limitedOutput_withState(ctx, src, dst, srcSize, dstSize);
+}
+
+static int LZ4F_localLZ4_compress_limitedOutput_continue(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level)
+{
+    (void) level;
+    return LZ4_compress_limitedOutput_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstSize);
+}
+
+static int LZ4F_localLZ4_compressHC_limitedOutput_continue(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level)
+{
+    (void) level;
+    return LZ4_compress_HC_continue((LZ4_streamHC_t*)ctx, src, dst, srcSize, dstSize);
+}
+
+static compressFunc_t LZ4F_selectCompression(LZ4F_blockMode_t blockMode, int level)
+{
+    if (level < minHClevel)
+    {
+        if (blockMode == LZ4F_blockIndependent) return LZ4F_localLZ4_compress_limitedOutput_withState;
+        return LZ4F_localLZ4_compress_limitedOutput_continue;
+    }
+    if (blockMode == LZ4F_blockIndependent) return LZ4_compress_HC_extStateHC;
+    return LZ4F_localLZ4_compressHC_limitedOutput_continue;
+}
+
+static int LZ4F_localSaveDict(LZ4F_cctx_t* cctxPtr)
+{
+    if (cctxPtr->prefs.compressionLevel < minHClevel)
+        return LZ4_saveDict ((LZ4_stream_t*)(cctxPtr->lz4CtxPtr), (char*)(cctxPtr->tmpBuff), 64 KB);
+    return LZ4_saveDictHC ((LZ4_streamHC_t*)(cctxPtr->lz4CtxPtr), (char*)(cctxPtr->tmpBuff), 64 KB);
+}
+
+typedef enum { notDone, fromTmpBuffer, fromSrcBuffer } LZ4F_lastBlockStatus;
+
+/* LZ4F_compressUpdate()
+* LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
+* The most important rule is that dstBuffer MUST be large enough (dstMaxSize) to ensure compression completion even in worst case.
+* If this condition is not respected, LZ4F_compress() will fail (result is an errorCode)
+* You can get the minimum value of dstMaxSize by using LZ4F_compressBound()
+* The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+* The result of the function is the number of bytes written into dstBuffer : it can be zero, meaning input data was just buffered.
+* The function outputs an error code if it fails (can be tested using LZ4F_isError())
+*/
+size_t LZ4F_compressUpdate(LZ4F_compressionContext_t compressionContext, void* dstBuffer, size_t dstMaxSize, const void* srcBuffer, size_t srcSize, const LZ4F_compressOptions_t* compressOptionsPtr)
+{
+    LZ4F_compressOptions_t cOptionsNull;
+    LZ4F_cctx_t* cctxPtr = (LZ4F_cctx_t*)compressionContext;
+    size_t blockSize = cctxPtr->maxBlockSize;
+    const BYTE* srcPtr = (const BYTE*)srcBuffer;
+    const BYTE* const srcEnd = srcPtr + srcSize;
+    BYTE* const dstStart = (BYTE*)dstBuffer;
+    BYTE* dstPtr = dstStart;
+    LZ4F_lastBlockStatus lastBlockCompressed = notDone;
+    compressFunc_t compress;
+
+
+    if (cctxPtr->cStage != 1) return (size_t)-LZ4F_ERROR_GENERIC;
+    if (dstMaxSize < LZ4F_compressBound(srcSize, &(cctxPtr->prefs))) return (size_t)-LZ4F_ERROR_dstMaxSize_tooSmall;
+    memset(&cOptionsNull, 0, sizeof(cOptionsNull));
+    if (compressOptionsPtr == NULL) compressOptionsPtr = &cOptionsNull;
+
+    /* select compression function */
+    compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel);
+
+    /* complete tmp buffer */
+    if (cctxPtr->tmpInSize > 0)   /* some data already within tmp buffer */
+    {
+        size_t sizeToCopy = blockSize - cctxPtr->tmpInSize;
+        if (sizeToCopy > srcSize)
+        {
+            /* add src to tmpIn buffer */
+            memcpy(cctxPtr->tmpIn + cctxPtr->tmpInSize, srcBuffer, srcSize);
+            srcPtr = srcEnd;
+            cctxPtr->tmpInSize += srcSize;
+            /* still needs some CRC */
+        }
+        else
+        {
+            /* complete tmpIn block and then compress it */
+            lastBlockCompressed = fromTmpBuffer;
+            memcpy(cctxPtr->tmpIn + cctxPtr->tmpInSize, srcBuffer, sizeToCopy);
+            srcPtr += sizeToCopy;
+
+            dstPtr += LZ4F_compressBlock(dstPtr, cctxPtr->tmpIn, blockSize, compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
+
+            if (cctxPtr->prefs.frameInfo.blockMode==LZ4F_blockLinked) cctxPtr->tmpIn += blockSize;
+            cctxPtr->tmpInSize = 0;
+        }
+    }
+
+    while ((size_t)(srcEnd - srcPtr) >= blockSize)
+    {
+        /* compress full block */
+        lastBlockCompressed = fromSrcBuffer;
+        dstPtr += LZ4F_compressBlock(dstPtr, srcPtr, blockSize, compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
+        srcPtr += blockSize;
+    }
+
+    if ((cctxPtr->prefs.autoFlush) && (srcPtr < srcEnd))
+    {
+        /* compress remaining input < blockSize */
+        lastBlockCompressed = fromSrcBuffer;
+        dstPtr += LZ4F_compressBlock(dstPtr, srcPtr, srcEnd - srcPtr, compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
+        srcPtr  = srcEnd;
+    }
+
+    /* preserve dictionary if necessary */
+    if ((cctxPtr->prefs.frameInfo.blockMode==LZ4F_blockLinked) && (lastBlockCompressed==fromSrcBuffer))
+    {
+        if (compressOptionsPtr->stableSrc)
+        {
+            cctxPtr->tmpIn = cctxPtr->tmpBuff;
+        }
+        else
+        {
+            int realDictSize = LZ4F_localSaveDict(cctxPtr);
+            if (realDictSize==0) return (size_t)-LZ4F_ERROR_GENERIC;
+            cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize;
+        }
+    }
+
+    /* keep tmpIn within limits */
+    if ((cctxPtr->tmpIn + blockSize) > (cctxPtr->tmpBuff + cctxPtr->maxBufferSize)   /* necessarily LZ4F_blockLinked && lastBlockCompressed==fromTmpBuffer */
+        && !(cctxPtr->prefs.autoFlush))
+    {
+        int realDictSize = LZ4F_localSaveDict(cctxPtr);
+        cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize;
+    }
+
+    /* some input data left, necessarily < blockSize */
+    if (srcPtr < srcEnd)
+    {
+        /* fill tmp buffer */
+        size_t sizeToCopy = srcEnd - srcPtr;
+        memcpy(cctxPtr->tmpIn, srcPtr, sizeToCopy);
+        cctxPtr->tmpInSize = sizeToCopy;
+    }
+
+    if (cctxPtr->prefs.frameInfo.contentChecksumFlag == LZ4F_contentChecksumEnabled)
+        XXH32_update(&(cctxPtr->xxh), srcBuffer, srcSize);
+
+    cctxPtr->totalInSize += srcSize;
+    return dstPtr - dstStart;
+}
+
+
+/* LZ4F_flush()
+* Should you need to create compressed data immediately, without waiting for a block to be filled,
+* you can call LZ4_flush(), which will immediately compress any remaining data stored within compressionContext.
+* The result of the function is the number of bytes written into dstBuffer
+* (it can be zero, this means there was no data left within compressionContext)
+* The function outputs an error code if it fails (can be tested using LZ4F_isError())
+* The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+*/
+size_t LZ4F_flush(LZ4F_compressionContext_t compressionContext, void* dstBuffer, size_t dstMaxSize, const LZ4F_compressOptions_t* compressOptionsPtr)
+{
+    LZ4F_cctx_t* cctxPtr = (LZ4F_cctx_t*)compressionContext;
+    BYTE* const dstStart = (BYTE*)dstBuffer;
+    BYTE* dstPtr = dstStart;
+    compressFunc_t compress;
+
+
+    if (cctxPtr->tmpInSize == 0) return 0;   /* nothing to flush */
+    if (cctxPtr->cStage != 1) return (size_t)-LZ4F_ERROR_GENERIC;
+    if (dstMaxSize < (cctxPtr->tmpInSize + 8)) return (size_t)-LZ4F_ERROR_dstMaxSize_tooSmall;   /* +8 : block header(4) + block checksum(4) */
+    (void)compressOptionsPtr;   /* not yet useful */
+
+    /* select compression function */
+    compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel);
+
+    /* compress tmp buffer */
+    dstPtr += LZ4F_compressBlock(dstPtr, cctxPtr->tmpIn, cctxPtr->tmpInSize, compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
+    if (cctxPtr->prefs.frameInfo.blockMode==LZ4F_blockLinked) cctxPtr->tmpIn += cctxPtr->tmpInSize;
+    cctxPtr->tmpInSize = 0;
+
+    /* keep tmpIn within limits */
+    if ((cctxPtr->tmpIn + cctxPtr->maxBlockSize) > (cctxPtr->tmpBuff + cctxPtr->maxBufferSize))   /* necessarily LZ4F_blockLinked */
+    {
+        int realDictSize = LZ4F_localSaveDict(cctxPtr);
+        cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize;
+    }
+
+    return dstPtr - dstStart;
+}
+
+
+/* LZ4F_compressEnd()
+* When you want to properly finish the compressed frame, just call LZ4F_compressEnd().
+* It will flush whatever data remained within compressionContext (like LZ4_flush())
+* but also properly finalize the frame, with an endMark and a checksum.
+* The result of the function is the number of bytes written into dstBuffer (necessarily >= 4 (endMark size))
+* The function outputs an error code if it fails (can be tested using LZ4F_isError())
+* The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+* compressionContext can then be used again, starting with LZ4F_compressBegin(). The preferences will remain the same.
+*/
+size_t LZ4F_compressEnd(LZ4F_compressionContext_t compressionContext, void* dstBuffer, size_t dstMaxSize, const LZ4F_compressOptions_t* compressOptionsPtr)
+{
+    LZ4F_cctx_t* cctxPtr = (LZ4F_cctx_t*)compressionContext;
+    BYTE* const dstStart = (BYTE*)dstBuffer;
+    BYTE* dstPtr = dstStart;
+    size_t errorCode;
+
+    errorCode = LZ4F_flush(compressionContext, dstBuffer, dstMaxSize, compressOptionsPtr);
+    if (LZ4F_isError(errorCode)) return errorCode;
+    dstPtr += errorCode;
+
+    LZ4F_writeLE32(dstPtr, 0);
+    dstPtr+=4;   /* endMark */
+
+    if (cctxPtr->prefs.frameInfo.contentChecksumFlag == LZ4F_contentChecksumEnabled)
+    {
+        U32 xxh = XXH32_digest(&(cctxPtr->xxh));
+        LZ4F_writeLE32(dstPtr, xxh);
+        dstPtr+=4;   /* content Checksum */
+    }
+
+    cctxPtr->cStage = 0;   /* state is now re-usable (with identical preferences) */
+
+    if (cctxPtr->prefs.frameInfo.contentSize)
+    {
+        if (cctxPtr->prefs.frameInfo.contentSize != cctxPtr->totalInSize)
+            return (size_t)-LZ4F_ERROR_frameSize_wrong;
+    }
+
+    return dstPtr - dstStart;
+}
+
+
+/**********************************
+*  Decompression functions
+**********************************/
+
+/* Resource management */
+
+/* LZ4F_createDecompressionContext() :
+* The first thing to do is to create a decompressionContext object, which will be used in all decompression operations.
+* This is achieved using LZ4F_createDecompressionContext().
+* The function will provide a pointer to a fully allocated and initialized LZ4F_decompressionContext object.
+* If the result LZ4F_errorCode_t is not zero, there was an error during context creation.
+* Object can release its memory using LZ4F_freeDecompressionContext();
+*/
+LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_decompressionContext_t* LZ4F_decompressionContextPtr, unsigned versionNumber)
+{
+    LZ4F_dctx_t* dctxPtr;
+
+    dctxPtr = (LZ4F_dctx_t*)ALLOCATOR(sizeof(LZ4F_dctx_t));
+    if (dctxPtr==NULL) return (LZ4F_errorCode_t)-LZ4F_ERROR_GENERIC;
+
+    dctxPtr->version = versionNumber;
+    *LZ4F_decompressionContextPtr = (LZ4F_decompressionContext_t)dctxPtr;
+    return LZ4F_OK_NoError;
+}
+
+LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_decompressionContext_t LZ4F_decompressionContext)
+{
+    LZ4F_errorCode_t result = LZ4F_OK_NoError;
+    LZ4F_dctx_t* dctxPtr = (LZ4F_dctx_t*)LZ4F_decompressionContext;
+    if (dctxPtr != NULL)   /* can accept NULL input, like free() */
+    {
+      result = (LZ4F_errorCode_t)dctxPtr->dStage;
+      FREEMEM(dctxPtr->tmpIn);
+      FREEMEM(dctxPtr->tmpOutBuffer);
+      FREEMEM(dctxPtr);
+    }
+    return result;
+}
+
+
+/* ******************************************************************** */
+/* ********************* Decompression ******************************** */
+/* ******************************************************************** */
+
+typedef enum { dstage_getHeader=0, dstage_storeHeader,
+    dstage_getCBlockSize, dstage_storeCBlockSize,
+    dstage_copyDirect,
+    dstage_getCBlock, dstage_storeCBlock,
+    dstage_decodeCBlock, dstage_decodeCBlock_intoDst,
+    dstage_decodeCBlock_intoTmp, dstage_flushOut,
+    dstage_getSuffix, dstage_storeSuffix,
+    dstage_getSFrameSize, dstage_storeSFrameSize,
+    dstage_skipSkippable
+} dStage_t;
+
+
+/* LZ4F_decodeHeader
+   return : nb Bytes read from srcVoidPtr (necessarily <= srcSize)
+            or an error code (testable with LZ4F_isError())
+   output : set internal values of dctx, such as
+            dctxPtr->frameInfo and dctxPtr->dStage.
+   input  : srcVoidPtr points at the **beginning of the frame**
+*/
+static size_t LZ4F_decodeHeader(LZ4F_dctx_t* dctxPtr, const void* srcVoidPtr, size_t srcSize)
+{
+    BYTE FLG, BD, HC;
+    unsigned version, blockMode, blockChecksumFlag, contentSizeFlag, contentChecksumFlag, blockSizeID;
+    size_t bufferNeeded;
+    size_t frameHeaderSize;
+    const BYTE* srcPtr = (const BYTE*)srcVoidPtr;
+
+    /* need to decode header to get frameInfo */
+    if (srcSize < minFHSize) return (size_t)-LZ4F_ERROR_frameHeader_incomplete;   /* minimal frame header size */
+    memset(&(dctxPtr->frameInfo), 0, sizeof(dctxPtr->frameInfo));
+
+    /* special case : skippable frames */
+    if ((LZ4F_readLE32(srcPtr) & 0xFFFFFFF0U) == LZ4F_MAGIC_SKIPPABLE_START)
+    {
+        dctxPtr->frameInfo.frameType = LZ4F_skippableFrame;
+        if (srcVoidPtr == (void*)(dctxPtr->header))
+        {
+            dctxPtr->tmpInSize = srcSize;
+            dctxPtr->tmpInTarget = 8;
+            dctxPtr->dStage = dstage_storeSFrameSize;
+            return srcSize;
+        }
+        else
+        {
+            dctxPtr->dStage = dstage_getSFrameSize;
+            return 4;
+        }
+    }
+
+    /* control magic number */
+    if (LZ4F_readLE32(srcPtr) != LZ4F_MAGICNUMBER) return (size_t)-LZ4F_ERROR_frameType_unknown;
+    dctxPtr->frameInfo.frameType = LZ4F_frame;
+
+    /* Flags */
+    FLG = srcPtr[4];
+    version = (FLG>>6) & _2BITS;
+    blockMode = (FLG>>5) & _1BIT;
+    blockChecksumFlag = (FLG>>4) & _1BIT;
+    contentSizeFlag = (FLG>>3) & _1BIT;
+    contentChecksumFlag = (FLG>>2) & _1BIT;
+
+    /* Frame Header Size */
+    frameHeaderSize = contentSizeFlag ? maxFHSize : minFHSize;
+
+    if (srcSize < frameHeaderSize)
+    {
+        /* not enough input to fully decode frame header */
+        if (srcPtr != dctxPtr->header)
+            memcpy(dctxPtr->header, srcPtr, srcSize);
+        dctxPtr->tmpInSize = srcSize;
+        dctxPtr->tmpInTarget = frameHeaderSize;
+        dctxPtr->dStage = dstage_storeHeader;
+        return srcSize;
+    }
+
+    BD = srcPtr[5];
+    blockSizeID = (BD>>4) & _3BITS;
+
+    /* validate */
+    if (version != 1) return (size_t)-LZ4F_ERROR_headerVersion_wrong;        /* Version Number, only supported value */
+    if (blockChecksumFlag != 0) return (size_t)-LZ4F_ERROR_blockChecksum_unsupported; /* Not supported for the time being */
+    if (((FLG>>0)&_2BITS) != 0) return (size_t)-LZ4F_ERROR_reservedFlag_set; /* Reserved bits */
+    if (((BD>>7)&_1BIT) != 0) return (size_t)-LZ4F_ERROR_reservedFlag_set;   /* Reserved bit */
+    if (blockSizeID < 4) return (size_t)-LZ4F_ERROR_maxBlockSize_invalid;    /* 4-7 only supported values for the time being */
+    if (((BD>>0)&_4BITS) != 0) return (size_t)-LZ4F_ERROR_reservedFlag_set;  /* Reserved bits */
+
+    /* check */
+    HC = LZ4F_headerChecksum(srcPtr+4, frameHeaderSize-5);
+    if (HC != srcPtr[frameHeaderSize-1]) return (size_t)-LZ4F_ERROR_headerChecksum_invalid;   /* Bad header checksum error */
+
+    /* save */
+    dctxPtr->frameInfo.blockMode = (LZ4F_blockMode_t)blockMode;
+    dctxPtr->frameInfo.contentChecksumFlag = (LZ4F_contentChecksum_t)contentChecksumFlag;
+    dctxPtr->frameInfo.blockSizeID = (LZ4F_blockSizeID_t)blockSizeID;
+    dctxPtr->maxBlockSize = LZ4F_getBlockSize(blockSizeID);
+    if (contentSizeFlag)
+        dctxPtr->frameRemainingSize = dctxPtr->frameInfo.contentSize = LZ4F_readLE64(srcPtr+6);
+
+    /* init */
+    if (contentChecksumFlag) XXH32_reset(&(dctxPtr->xxh), 0);
+
+    /* alloc */
+    bufferNeeded = dctxPtr->maxBlockSize + ((dctxPtr->frameInfo.blockMode==LZ4F_blockLinked) * 128 KB);
+    if (bufferNeeded > dctxPtr->maxBufferSize)   /* tmp buffers too small */
+    {
+        FREEMEM(dctxPtr->tmpIn);
+        FREEMEM(dctxPtr->tmpOutBuffer);
+        dctxPtr->maxBufferSize = bufferNeeded;
+        dctxPtr->tmpIn = (BYTE*)ALLOCATOR(dctxPtr->maxBlockSize);
+        if (dctxPtr->tmpIn == NULL) return (size_t)-LZ4F_ERROR_GENERIC;
+        dctxPtr->tmpOutBuffer= (BYTE*)ALLOCATOR(dctxPtr->maxBufferSize);
+        if (dctxPtr->tmpOutBuffer== NULL) return (size_t)-LZ4F_ERROR_GENERIC;
+    }
+    dctxPtr->tmpInSize = 0;
+    dctxPtr->tmpInTarget = 0;
+    dctxPtr->dict = dctxPtr->tmpOutBuffer;
+    dctxPtr->dictSize = 0;
+    dctxPtr->tmpOut = dctxPtr->tmpOutBuffer;
+    dctxPtr->tmpOutStart = 0;
+    dctxPtr->tmpOutSize = 0;
+
+    dctxPtr->dStage = dstage_getCBlockSize;
+
+    return frameHeaderSize;
+}
+
+
+/* LZ4F_getFrameInfo()
+* This function decodes frame header information, such as blockSize.
+* It is optional : you could start by calling directly LZ4F_decompress() instead.
+* The objective is to extract header information without starting decompression, typically for allocation purposes.
+* LZ4F_getFrameInfo() can also be used *after* starting decompression, on a valid LZ4F_decompressionContext_t.
+* The number of bytes read from srcBuffer will be provided within *srcSizePtr (necessarily <= original value).
+* You are expected to resume decompression from where it stopped (srcBuffer + *srcSizePtr)
+* The function result is an hint of the better srcSize to use for next call to LZ4F_decompress,
+* or an error code which can be tested using LZ4F_isError().
+*/
+LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_decompressionContext_t dCtx, LZ4F_frameInfo_t* frameInfoPtr,
+                                   const void* srcBuffer, size_t* srcSizePtr)
+{
+    LZ4F_dctx_t* dctxPtr = (LZ4F_dctx_t*)dCtx;
+
+    if (dctxPtr->dStage > dstage_storeHeader)   /* note : requires dstage_* header related to be at beginning of enum */
+    {
+        size_t o=0, i=0;
+        /* frameInfo already decoded */
+        *srcSizePtr = 0;
+        *frameInfoPtr = dctxPtr->frameInfo;
+        return LZ4F_decompress(dCtx, NULL, &o, NULL, &i, NULL);
+    }
+    else
+    {
+        size_t o=0;
+        size_t nextSrcSize = LZ4F_decompress(dCtx, NULL, &o, srcBuffer, srcSizePtr, NULL);
+        if (dctxPtr->dStage <= dstage_storeHeader)   /* note : requires dstage_* header related to be at beginning of enum */
+            return (size_t)-LZ4F_ERROR_frameHeader_incomplete;
+        *frameInfoPtr = dctxPtr->frameInfo;
+        return nextSrcSize;
+    }
+}
+
+
+/* trivial redirector, for common prototype */
+static int LZ4F_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize)
+{
+    (void)dictStart; (void)dictSize;
+    return LZ4_decompress_safe (source, dest, compressedSize, maxDecompressedSize);
+}
+
+
+static void LZ4F_updateDict(LZ4F_dctx_t* dctxPtr, const BYTE* dstPtr, size_t dstSize, const BYTE* dstPtr0, unsigned withinTmp)
+{
+    if (dctxPtr->dictSize==0)
+        dctxPtr->dict = (const BYTE*)dstPtr;   /* priority to dictionary continuity */
+
+    if (dctxPtr->dict + dctxPtr->dictSize == dstPtr)   /* dictionary continuity */
+    {
+        dctxPtr->dictSize += dstSize;
+        return;
+    }
+
+    if (dstPtr - dstPtr0 + dstSize >= 64 KB)   /* dstBuffer large enough to become dictionary */
+    {
+        dctxPtr->dict = (const BYTE*)dstPtr0;
+        dctxPtr->dictSize = dstPtr - dstPtr0 + dstSize;
+        return;
+    }
+
+    if ((withinTmp) && (dctxPtr->dict == dctxPtr->tmpOutBuffer))
+    {
+        /* assumption : dctxPtr->dict + dctxPtr->dictSize == dctxPtr->tmpOut + dctxPtr->tmpOutStart */
+        dctxPtr->dictSize += dstSize;
+        return;
+    }
+
+    if (withinTmp) /* copy relevant dict portion in front of tmpOut within tmpOutBuffer */
+    {
+        size_t preserveSize = dctxPtr->tmpOut - dctxPtr->tmpOutBuffer;
+        size_t copySize = 64 KB - dctxPtr->tmpOutSize;
+        const BYTE* oldDictEnd = dctxPtr->dict + dctxPtr->dictSize - dctxPtr->tmpOutStart;
+        if (dctxPtr->tmpOutSize > 64 KB) copySize = 0;
+        if (copySize > preserveSize) copySize = preserveSize;
+
+        memcpy(dctxPtr->tmpOutBuffer + preserveSize - copySize, oldDictEnd - copySize, copySize);
+
+        dctxPtr->dict = dctxPtr->tmpOutBuffer;
+        dctxPtr->dictSize = preserveSize + dctxPtr->tmpOutStart + dstSize;
+        return;
+    }
+
+    if (dctxPtr->dict == dctxPtr->tmpOutBuffer)     /* copy dst into tmp to complete dict */
+    {
+        if (dctxPtr->dictSize + dstSize > dctxPtr->maxBufferSize)   /* tmp buffer not large enough */
+        {
+            size_t preserveSize = 64 KB - dstSize;   /* note : dstSize < 64 KB */
+            memcpy(dctxPtr->tmpOutBuffer, dctxPtr->dict + dctxPtr->dictSize - preserveSize, preserveSize);
+            dctxPtr->dictSize = preserveSize;
+        }
+        memcpy(dctxPtr->tmpOutBuffer + dctxPtr->dictSize, dstPtr, dstSize);
+        dctxPtr->dictSize += dstSize;
+        return;
+    }
+
+    /* join dict & dest into tmp */
+    {
+        size_t preserveSize = 64 KB - dstSize;   /* note : dstSize < 64 KB */
+        if (preserveSize > dctxPtr->dictSize) preserveSize = dctxPtr->dictSize;
+        memcpy(dctxPtr->tmpOutBuffer, dctxPtr->dict + dctxPtr->dictSize - preserveSize, preserveSize);
+        memcpy(dctxPtr->tmpOutBuffer + preserveSize, dstPtr, dstSize);
+        dctxPtr->dict = dctxPtr->tmpOutBuffer;
+        dctxPtr->dictSize = preserveSize + dstSize;
+    }
+}
+
+
+
+/* LZ4F_decompress()
+* Call this function repetitively to regenerate data compressed within srcBuffer.
+* The function will attempt to decode *srcSizePtr from srcBuffer, into dstBuffer of maximum size *dstSizePtr.
+*
+* The number of bytes regenerated into dstBuffer will be provided within *dstSizePtr (necessarily <= original value).
+*
+* The number of bytes effectively read from srcBuffer will be provided within *srcSizePtr (necessarily <= original value).
+* If the number of bytes read is < number of bytes provided, then the decompression operation is not complete.
+* You will have to call it again, continuing from where it stopped.
+*
+* The function result is an hint of the better srcSize to use for next call to LZ4F_decompress.
+* Basically, it's the size of the current (or remaining) compressed block + header of next block.
+* Respecting the hint provides some boost to performance, since it allows less buffer shuffling.
+* Note that this is just a hint, you can always provide any srcSize you want.
+* When a frame is fully decoded, the function result will be 0.
+* If decompression failed, function result is an error code which can be tested using LZ4F_isError().
+*/
+size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
+                       void* dstBuffer, size_t* dstSizePtr,
+                       const void* srcBuffer, size_t* srcSizePtr,
+                       const LZ4F_decompressOptions_t* decompressOptionsPtr)
+{
+    LZ4F_dctx_t* dctxPtr = (LZ4F_dctx_t*)decompressionContext;
+    LZ4F_decompressOptions_t optionsNull;
+    const BYTE* const srcStart = (const BYTE*)srcBuffer;
+    const BYTE* const srcEnd = srcStart + *srcSizePtr;
+    const BYTE* srcPtr = srcStart;
+    BYTE* const dstStart = (BYTE*)dstBuffer;
+    BYTE* const dstEnd = dstStart + *dstSizePtr;
+    BYTE* dstPtr = dstStart;
+    const BYTE* selectedIn = NULL;
+    unsigned doAnotherStage = 1;
+    size_t nextSrcSizeHint = 1;
+
+
+    memset(&optionsNull, 0, sizeof(optionsNull));
+    if (decompressOptionsPtr==NULL) decompressOptionsPtr = &optionsNull;
+    *srcSizePtr = 0;
+    *dstSizePtr = 0;
+
+    /* expect to continue decoding src buffer where it left previously */
+    if (dctxPtr->srcExpect != NULL)
+    {
+        if (srcStart != dctxPtr->srcExpect) return (size_t)-LZ4F_ERROR_srcPtr_wrong;
+    }
+
+    /* programmed as a state machine */
+
+    while (doAnotherStage)
+    {
+
+        switch(dctxPtr->dStage)
+        {
+
+        case dstage_getHeader:
+            {
+                if ((size_t)(srcEnd-srcPtr) >= maxFHSize)   /* enough to decode - shortcut */
+                {
+                    LZ4F_errorCode_t errorCode = LZ4F_decodeHeader(dctxPtr, srcPtr, srcEnd-srcPtr);
+                    if (LZ4F_isError(errorCode)) return errorCode;
+                    srcPtr += errorCode;
+                    break;
+                }
+                dctxPtr->tmpInSize = 0;
+                dctxPtr->tmpInTarget = minFHSize;   /* minimum to attempt decode */
+                dctxPtr->dStage = dstage_storeHeader;
+            }
+
+        case dstage_storeHeader:
+            {
+                size_t sizeToCopy = dctxPtr->tmpInTarget - dctxPtr->tmpInSize;
+                if (sizeToCopy > (size_t)(srcEnd - srcPtr)) sizeToCopy =  srcEnd - srcPtr;
+                memcpy(dctxPtr->header + dctxPtr->tmpInSize, srcPtr, sizeToCopy);
+                dctxPtr->tmpInSize += sizeToCopy;
+                srcPtr += sizeToCopy;
+                if (dctxPtr->tmpInSize < dctxPtr->tmpInTarget)
+                {
+                    nextSrcSizeHint = (dctxPtr->tmpInTarget - dctxPtr->tmpInSize) + BHSize;   /* rest of header + nextBlockHeader */
+                    doAnotherStage = 0;   /* not enough src data, ask for some more */
+                    break;
+                }
+                {
+                    LZ4F_errorCode_t errorCode = LZ4F_decodeHeader(dctxPtr, dctxPtr->header, dctxPtr->tmpInTarget);
+                    if (LZ4F_isError(errorCode)) return errorCode;
+                }
+                break;
+            }
+
+        case dstage_getCBlockSize:
+            {
+                if ((size_t)(srcEnd - srcPtr) >= BHSize)
+                {
+                    selectedIn = srcPtr;
+                    srcPtr += BHSize;
+                }
+                else
+                {
+                /* not enough input to read cBlockSize field */
+                    dctxPtr->tmpInSize = 0;
+                    dctxPtr->dStage = dstage_storeCBlockSize;
+                }
+            }
+
+            if (dctxPtr->dStage == dstage_storeCBlockSize)
+        case dstage_storeCBlockSize:
+            {
+                size_t sizeToCopy = BHSize - dctxPtr->tmpInSize;
+                if (sizeToCopy > (size_t)(srcEnd - srcPtr)) sizeToCopy = srcEnd - srcPtr;
+                memcpy(dctxPtr->tmpIn + dctxPtr->tmpInSize, srcPtr, sizeToCopy);
+                srcPtr += sizeToCopy;
+                dctxPtr->tmpInSize += sizeToCopy;
+                if (dctxPtr->tmpInSize < BHSize) /* not enough input to get full cBlockSize; wait for more */
+                {
+                    nextSrcSizeHint = BHSize - dctxPtr->tmpInSize;
+                    doAnotherStage  = 0;
+                    break;
+                }
+                selectedIn = dctxPtr->tmpIn;
+            }
+
+        /* case dstage_decodeCBlockSize: */   /* no more direct access, to prevent scan-build warning */
+            {
+                size_t nextCBlockSize = LZ4F_readLE32(selectedIn) & 0x7FFFFFFFU;
+                if (nextCBlockSize==0)   /* frameEnd signal, no more CBlock */
+                {
+                    dctxPtr->dStage = dstage_getSuffix;
+                    break;
+                }
+                if (nextCBlockSize > dctxPtr->maxBlockSize) return (size_t)-LZ4F_ERROR_GENERIC;   /* invalid cBlockSize */
+                dctxPtr->tmpInTarget = nextCBlockSize;
+                if (LZ4F_readLE32(selectedIn) & LZ4F_BLOCKUNCOMPRESSED_FLAG)
+                {
+                    dctxPtr->dStage = dstage_copyDirect;
+                    break;
+                }
+                dctxPtr->dStage = dstage_getCBlock;
+                if (dstPtr==dstEnd)
+                {
+                    nextSrcSizeHint = nextCBlockSize + BHSize;
+                    doAnotherStage = 0;
+                }
+                break;
+            }
+
+        case dstage_copyDirect:   /* uncompressed block */
+            {
+                size_t sizeToCopy = dctxPtr->tmpInTarget;
+                if ((size_t)(srcEnd-srcPtr) < sizeToCopy) sizeToCopy = srcEnd - srcPtr;  /* not enough input to read full block */
+                if ((size_t)(dstEnd-dstPtr) < sizeToCopy) sizeToCopy = dstEnd - dstPtr;
+                memcpy(dstPtr, srcPtr, sizeToCopy);
+                if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), srcPtr, sizeToCopy);
+                if (dctxPtr->frameInfo.contentSize) dctxPtr->frameRemainingSize -= sizeToCopy;
+
+                /* dictionary management */
+                if (dctxPtr->frameInfo.blockMode==LZ4F_blockLinked)
+                    LZ4F_updateDict(dctxPtr, dstPtr, sizeToCopy, dstStart, 0);
+
+                srcPtr += sizeToCopy;
+                dstPtr += sizeToCopy;
+                if (sizeToCopy == dctxPtr->tmpInTarget)   /* all copied */
+                {
+                    dctxPtr->dStage = dstage_getCBlockSize;
+                    break;
+                }
+                dctxPtr->tmpInTarget -= sizeToCopy;   /* still need to copy more */
+                nextSrcSizeHint = dctxPtr->tmpInTarget + BHSize;
+                doAnotherStage = 0;
+                break;
+            }
+
+        case dstage_getCBlock:   /* entry from dstage_decodeCBlockSize */
+            {
+                if ((size_t)(srcEnd-srcPtr) < dctxPtr->tmpInTarget)
+                {
+                    dctxPtr->tmpInSize = 0;
+                    dctxPtr->dStage = dstage_storeCBlock;
+                    break;
+                }
+                selectedIn = srcPtr;
+                srcPtr += dctxPtr->tmpInTarget;
+                dctxPtr->dStage = dstage_decodeCBlock;
+                break;
+            }
+
+        case dstage_storeCBlock:
+            {
+                size_t sizeToCopy = dctxPtr->tmpInTarget - dctxPtr->tmpInSize;
+                if (sizeToCopy > (size_t)(srcEnd-srcPtr)) sizeToCopy = srcEnd-srcPtr;
+                memcpy(dctxPtr->tmpIn + dctxPtr->tmpInSize, srcPtr, sizeToCopy);
+                dctxPtr->tmpInSize += sizeToCopy;
+                srcPtr += sizeToCopy;
+                if (dctxPtr->tmpInSize < dctxPtr->tmpInTarget)  /* need more input */
+                {
+                    nextSrcSizeHint = (dctxPtr->tmpInTarget - dctxPtr->tmpInSize) + BHSize;
+                    doAnotherStage=0;
+                    break;
+                }
+                selectedIn = dctxPtr->tmpIn;
+                dctxPtr->dStage = dstage_decodeCBlock;
+                break;
+            }
+
+        case dstage_decodeCBlock:
+            {
+                if ((size_t)(dstEnd-dstPtr) < dctxPtr->maxBlockSize)   /* not enough place into dst : decode into tmpOut */
+                    dctxPtr->dStage = dstage_decodeCBlock_intoTmp;
+                else
+                    dctxPtr->dStage = dstage_decodeCBlock_intoDst;
+                break;
+            }
+
+        case dstage_decodeCBlock_intoDst:
+            {
+                int (*decoder)(const char*, char*, int, int, const char*, int);
+                int decodedSize;
+
+                if (dctxPtr->frameInfo.blockMode == LZ4F_blockLinked)
+                    decoder = LZ4_decompress_safe_usingDict;
+                else
+                    decoder = LZ4F_decompress_safe;
+
+                decodedSize = decoder((const char*)selectedIn, (char*)dstPtr, (int)dctxPtr->tmpInTarget, (int)dctxPtr->maxBlockSize, (const char*)dctxPtr->dict, (int)dctxPtr->dictSize);
+                if (decodedSize < 0) return (size_t)-LZ4F_ERROR_GENERIC;   /* decompression failed */
+                if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), dstPtr, decodedSize);
+                if (dctxPtr->frameInfo.contentSize) dctxPtr->frameRemainingSize -= decodedSize;
+
+                /* dictionary management */
+                if (dctxPtr->frameInfo.blockMode==LZ4F_blockLinked)
+                    LZ4F_updateDict(dctxPtr, dstPtr, decodedSize, dstStart, 0);
+
+                dstPtr += decodedSize;
+                dctxPtr->dStage = dstage_getCBlockSize;
+                break;
+            }
+
+        case dstage_decodeCBlock_intoTmp:
+            {
+                /* not enough place into dst : decode into tmpOut */
+                int (*decoder)(const char*, char*, int, int, const char*, int);
+                int decodedSize;
+
+                if (dctxPtr->frameInfo.blockMode == LZ4F_blockLinked)
+                    decoder = LZ4_decompress_safe_usingDict;
+                else
+                    decoder = LZ4F_decompress_safe;
+
+                /* ensure enough place for tmpOut */
+                if (dctxPtr->frameInfo.blockMode == LZ4F_blockLinked)
+                {
+                    if (dctxPtr->dict == dctxPtr->tmpOutBuffer)
+                    {
+                        if (dctxPtr->dictSize > 128 KB)
+                        {
+                            memcpy(dctxPtr->tmpOutBuffer, dctxPtr->dict + dctxPtr->dictSize - 64 KB, 64 KB);
+                            dctxPtr->dictSize = 64 KB;
+                        }
+                        dctxPtr->tmpOut = dctxPtr->tmpOutBuffer + dctxPtr->dictSize;
+                    }
+                    else   /* dict not within tmp */
+                    {
+                        size_t reservedDictSpace = dctxPtr->dictSize;
+                        if (reservedDictSpace > 64 KB) reservedDictSpace = 64 KB;
+                        dctxPtr->tmpOut = dctxPtr->tmpOutBuffer + reservedDictSpace;
+                    }
+                }
+
+                /* Decode */
+                decodedSize = decoder((const char*)selectedIn, (char*)dctxPtr->tmpOut, (int)dctxPtr->tmpInTarget, (int)dctxPtr->maxBlockSize, (const char*)dctxPtr->dict, (int)dctxPtr->dictSize);
+                if (decodedSize < 0) return (size_t)-LZ4F_ERROR_decompressionFailed;   /* decompression failed */
+                if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), dctxPtr->tmpOut, decodedSize);
+                if (dctxPtr->frameInfo.contentSize) dctxPtr->frameRemainingSize -= decodedSize;
+                dctxPtr->tmpOutSize = decodedSize;
+                dctxPtr->tmpOutStart = 0;
+                dctxPtr->dStage = dstage_flushOut;
+                break;
+            }
+
+        case dstage_flushOut:  /* flush decoded data from tmpOut to dstBuffer */
+            {
+                size_t sizeToCopy = dctxPtr->tmpOutSize - dctxPtr->tmpOutStart;
+                if (sizeToCopy > (size_t)(dstEnd-dstPtr)) sizeToCopy = dstEnd-dstPtr;
+                memcpy(dstPtr, dctxPtr->tmpOut + dctxPtr->tmpOutStart, sizeToCopy);
+
+                /* dictionary management */
+                if (dctxPtr->frameInfo.blockMode==LZ4F_blockLinked)
+                    LZ4F_updateDict(dctxPtr, dstPtr, sizeToCopy, dstStart, 1);
+
+                dctxPtr->tmpOutStart += sizeToCopy;
+                dstPtr += sizeToCopy;
+
+                /* end of flush ? */
+                if (dctxPtr->tmpOutStart == dctxPtr->tmpOutSize)
+                {
+                    dctxPtr->dStage = dstage_getCBlockSize;
+                    break;
+                }
+                nextSrcSizeHint = BHSize;
+                doAnotherStage = 0;   /* still some data to flush */
+                break;
+            }
+
+        case dstage_getSuffix:
+            {
+                size_t suffixSize = dctxPtr->frameInfo.contentChecksumFlag * 4;
+                if (dctxPtr->frameRemainingSize) return (size_t)-LZ4F_ERROR_frameSize_wrong;   /* incorrect frame size decoded */
+                if (suffixSize == 0)   /* frame completed */
+                {
+                    nextSrcSizeHint = 0;
+                    dctxPtr->dStage = dstage_getHeader;
+                    doAnotherStage = 0;
+                    break;
+                }
+                if ((srcEnd - srcPtr) < 4)   /* not enough size for entire CRC */
+                {
+                    dctxPtr->tmpInSize = 0;
+                    dctxPtr->dStage = dstage_storeSuffix;
+                }
+                else
+                {
+                    selectedIn = srcPtr;
+                    srcPtr += 4;
+                }
+            }
+
+            if (dctxPtr->dStage == dstage_storeSuffix)
+        case dstage_storeSuffix:
+            {
+                size_t sizeToCopy = 4 - dctxPtr->tmpInSize;
+                if (sizeToCopy > (size_t)(srcEnd - srcPtr)) sizeToCopy = srcEnd - srcPtr;
+                memcpy(dctxPtr->tmpIn + dctxPtr->tmpInSize, srcPtr, sizeToCopy);
+                srcPtr += sizeToCopy;
+                dctxPtr->tmpInSize += sizeToCopy;
+                if (dctxPtr->tmpInSize < 4)  /* not enough input to read complete suffix */
+                {
+                    nextSrcSizeHint = 4 - dctxPtr->tmpInSize;
+                    doAnotherStage=0;
+                    break;
+                }
+                selectedIn = dctxPtr->tmpIn;
+            }
+
+        /* case dstage_checkSuffix: */   /* no direct call, to avoid scan-build warning */
+            {
+                U32 readCRC = LZ4F_readLE32(selectedIn);
+                U32 resultCRC = XXH32_digest(&(dctxPtr->xxh));
+                if (readCRC != resultCRC) return (size_t)-LZ4F_ERROR_contentChecksum_invalid;
+                nextSrcSizeHint = 0;
+                dctxPtr->dStage = dstage_getHeader;
+                doAnotherStage = 0;
+                break;
+            }
+
+        case dstage_getSFrameSize:
+            {
+                if ((srcEnd - srcPtr) >= 4)
+                {
+                    selectedIn = srcPtr;
+                    srcPtr += 4;
+                }
+                else
+                {
+                /* not enough input to read cBlockSize field */
+                    dctxPtr->tmpInSize = 4;
+                    dctxPtr->tmpInTarget = 8;
+                    dctxPtr->dStage = dstage_storeSFrameSize;
+                }
+            }
+
+            if (dctxPtr->dStage == dstage_storeSFrameSize)
+        case dstage_storeSFrameSize:
+            {
+                size_t sizeToCopy = dctxPtr->tmpInTarget - dctxPtr->tmpInSize;
+                if (sizeToCopy > (size_t)(srcEnd - srcPtr)) sizeToCopy = srcEnd - srcPtr;
+                memcpy(dctxPtr->header + dctxPtr->tmpInSize, srcPtr, sizeToCopy);
+                srcPtr += sizeToCopy;
+                dctxPtr->tmpInSize += sizeToCopy;
+                if (dctxPtr->tmpInSize < dctxPtr->tmpInTarget) /* not enough input to get full sBlockSize; wait for more */
+                {
+                    nextSrcSizeHint = dctxPtr->tmpInTarget - dctxPtr->tmpInSize;
+                    doAnotherStage = 0;
+                    break;
+                }
+                selectedIn = dctxPtr->header + 4;
+            }
+
+        /* case dstage_decodeSFrameSize: */   /* no direct access */
+            {
+                size_t SFrameSize = LZ4F_readLE32(selectedIn);
+                dctxPtr->frameInfo.contentSize = SFrameSize;
+                dctxPtr->tmpInTarget = SFrameSize;
+                dctxPtr->dStage = dstage_skipSkippable;
+                break;
+            }
+
+        case dstage_skipSkippable:
+            {
+                size_t skipSize = dctxPtr->tmpInTarget;
+                if (skipSize > (size_t)(srcEnd-srcPtr)) skipSize = srcEnd-srcPtr;
+                srcPtr += skipSize;
+                dctxPtr->tmpInTarget -= skipSize;
+                doAnotherStage = 0;
+                nextSrcSizeHint = dctxPtr->tmpInTarget;
+                if (nextSrcSizeHint) break;
+                dctxPtr->dStage = dstage_getHeader;
+                break;
+            }
+        }
+    }
+
+    /* preserve dictionary within tmp if necessary */
+    if ( (dctxPtr->frameInfo.blockMode==LZ4F_blockLinked)
+        &&(dctxPtr->dict != dctxPtr->tmpOutBuffer)
+        &&(!decompressOptionsPtr->stableDst)
+        &&((unsigned)(dctxPtr->dStage-1) < (unsigned)(dstage_getSuffix-1))
+        )
+    {
+        if (dctxPtr->dStage == dstage_flushOut)
+        {
+            size_t preserveSize = dctxPtr->tmpOut - dctxPtr->tmpOutBuffer;
+            size_t copySize = 64 KB - dctxPtr->tmpOutSize;
+            const BYTE* oldDictEnd = dctxPtr->dict + dctxPtr->dictSize - dctxPtr->tmpOutStart;
+            if (dctxPtr->tmpOutSize > 64 KB) copySize = 0;
+            if (copySize > preserveSize) copySize = preserveSize;
+
+            memcpy(dctxPtr->tmpOutBuffer + preserveSize - copySize, oldDictEnd - copySize, copySize);
+
+            dctxPtr->dict = dctxPtr->tmpOutBuffer;
+            dctxPtr->dictSize = preserveSize + dctxPtr->tmpOutStart;
+        }
+        else
+        {
+            size_t newDictSize = dctxPtr->dictSize;
+            const BYTE* oldDictEnd = dctxPtr->dict + dctxPtr->dictSize;
+            if ((newDictSize) > 64 KB) newDictSize = 64 KB;
+
+            memcpy(dctxPtr->tmpOutBuffer, oldDictEnd - newDictSize, newDictSize);
+
+            dctxPtr->dict = dctxPtr->tmpOutBuffer;
+            dctxPtr->dictSize = newDictSize;
+            dctxPtr->tmpOut = dctxPtr->tmpOutBuffer + newDictSize;
+        }
+    }
+
+    /* require function to be called again from position where it stopped */
+    if (srcPtr<srcEnd)
+        dctxPtr->srcExpect = srcPtr;
+    else
+        dctxPtr->srcExpect = NULL;
+
+    *srcSizePtr = (srcPtr - srcStart);
+    *dstSizePtr = (dstPtr - dstStart);
+    return nextSrcSizeHint;
+}
diff --git a/util/cbfstool/lz4/lib/lz4frame.h b/util/cbfstool/lz4/lib/lz4frame.h
new file mode 100644
index 0000000000..c039e5d559
--- /dev/null
+++ b/util/cbfstool/lz4/lib/lz4frame.h
@@ -0,0 +1,303 @@
+/*
+   LZ4 auto-framing library
+   Header File
+   Copyright (C) 2011-2015, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : https://github.com/Cyan4973/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* LZ4F is a stand-alone API to create LZ4-compressed frames
+ * conformant with specification v1.5.1.
+ * All related operations, including memory management, are handled internally by the library.
+ * You don't need lz4.h when using lz4frame.h.
+ * */
+
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/**************************************
+*  Includes
+**************************************/
+#include <stddef.h>   /* size_t */
+
+
+/**************************************
+*  Error management
+**************************************/
+typedef size_t LZ4F_errorCode_t;
+
+unsigned    LZ4F_isError(LZ4F_errorCode_t code);
+const char* LZ4F_getErrorName(LZ4F_errorCode_t code);   /* return error code string; useful for debugging */
+
+
+/**************************************
+*  Frame compression types
+**************************************/
+//#define LZ4F_DISABLE_OBSOLETE_ENUMS
+#ifndef LZ4F_DISABLE_OBSOLETE_ENUMS
+#  define LZ4F_OBSOLETE_ENUM(x) ,x
+#else
+#  define LZ4F_OBSOLETE_ENUM(x)
+#endif
+
+typedef enum {
+    LZ4F_default=0,
+    LZ4F_max64KB=4,
+    LZ4F_max256KB=5,
+    LZ4F_max1MB=6,
+    LZ4F_max4MB=7
+    LZ4F_OBSOLETE_ENUM(max64KB = LZ4F_max64KB)
+    LZ4F_OBSOLETE_ENUM(max256KB = LZ4F_max256KB)
+    LZ4F_OBSOLETE_ENUM(max1MB = LZ4F_max1MB)
+    LZ4F_OBSOLETE_ENUM(max4MB = LZ4F_max4MB)
+} LZ4F_blockSizeID_t;
+
+typedef enum {
+    LZ4F_blockLinked=0,
+    LZ4F_blockIndependent
+    LZ4F_OBSOLETE_ENUM(blockLinked = LZ4F_blockLinked)
+    LZ4F_OBSOLETE_ENUM(blockIndependent = LZ4F_blockIndependent)
+} LZ4F_blockMode_t;
+
+typedef enum {
+    LZ4F_noContentChecksum=0,
+    LZ4F_contentChecksumEnabled
+    LZ4F_OBSOLETE_ENUM(noContentChecksum = LZ4F_noContentChecksum)
+    LZ4F_OBSOLETE_ENUM(contentChecksumEnabled = LZ4F_contentChecksumEnabled)
+} LZ4F_contentChecksum_t;
+
+typedef enum {
+    LZ4F_frame=0,
+    LZ4F_skippableFrame
+    LZ4F_OBSOLETE_ENUM(skippableFrame = LZ4F_skippableFrame)
+} LZ4F_frameType_t;
+
+#ifndef LZ4F_DISABLE_OBSOLETE_ENUMS
+typedef LZ4F_blockSizeID_t blockSizeID_t;
+typedef LZ4F_blockMode_t blockMode_t;
+typedef LZ4F_frameType_t frameType_t;
+typedef LZ4F_contentChecksum_t contentChecksum_t;
+#endif
+
+typedef struct {
+  LZ4F_blockSizeID_t     blockSizeID;           /* max64KB, max256KB, max1MB, max4MB ; 0 == default */
+  LZ4F_blockMode_t       blockMode;             /* blockLinked, blockIndependent ; 0 == default */
+  LZ4F_contentChecksum_t contentChecksumFlag;   /* noContentChecksum, contentChecksumEnabled ; 0 == default  */
+  LZ4F_frameType_t       frameType;             /* LZ4F_frame, skippableFrame ; 0 == default */
+  unsigned long long     contentSize;           /* Size of uncompressed (original) content ; 0 == unknown */
+  unsigned               reserved[2];           /* must be zero for forward compatibility */
+} LZ4F_frameInfo_t;
+
+typedef struct {
+  LZ4F_frameInfo_t frameInfo;
+  int      compressionLevel;       /* 0 == default (fast mode); values above 16 count as 16; values below 0 count as 0 */
+  unsigned autoFlush;              /* 1 == always flush (reduce need for tmp buffer) */
+  unsigned reserved[4];            /* must be zero for forward compatibility */
+} LZ4F_preferences_t;
+
+
+/***********************************
+*  Simple compression function
+***********************************/
+size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr);
+
+size_t LZ4F_compressFrame(void* dstBuffer, size_t dstMaxSize, const void* srcBuffer, size_t srcSize, const LZ4F_preferences_t* preferencesPtr);
+/* LZ4F_compressFrame()
+ * Compress an entire srcBuffer into a valid LZ4 frame, as defined by specification v1.5.1
+ * The most important rule is that dstBuffer MUST be large enough (dstMaxSize) to ensure compression completion even in worst case.
+ * You can get the minimum value of dstMaxSize by using LZ4F_compressFrameBound()
+ * If this condition is not respected, LZ4F_compressFrame() will fail (result is an errorCode)
+ * The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default.
+ * The result of the function is the number of bytes written into dstBuffer.
+ * The function outputs an error code if it fails (can be tested using LZ4F_isError())
+ */
+
+
+
+/**********************************
+*  Advanced compression functions
+**********************************/
+typedef struct LZ4F_cctx_s* LZ4F_compressionContext_t;   /* must be aligned on 8-bytes */
+
+typedef struct {
+  unsigned stableSrc;    /* 1 == src content will remain available on future calls to LZ4F_compress(); avoid saving src content within tmp buffer as future dictionary */
+  unsigned reserved[3];
+} LZ4F_compressOptions_t;
+
+/* Resource Management */
+
+#define LZ4F_VERSION 100
+LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_compressionContext_t* cctxPtr, unsigned version);
+LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_compressionContext_t cctx);
+/* LZ4F_createCompressionContext() :
+ * The first thing to do is to create a compressionContext object, which will be used in all compression operations.
+ * This is achieved using LZ4F_createCompressionContext(), which takes as argument a version and an LZ4F_preferences_t structure.
+ * The version provided MUST be LZ4F_VERSION. It is intended to track potential version differences between different binaries.
+ * The function will provide a pointer to a fully allocated LZ4F_compressionContext_t object.
+ * If the result LZ4F_errorCode_t is not zero, there was an error during context creation.
+ * Object can release its memory using LZ4F_freeCompressionContext();
+ */
+
+
+/* Compression */
+
+size_t LZ4F_compressBegin(LZ4F_compressionContext_t cctx, void* dstBuffer, size_t dstMaxSize, const LZ4F_preferences_t* prefsPtr);
+/* LZ4F_compressBegin() :
+ * will write the frame header into dstBuffer.
+ * dstBuffer must be large enough to accommodate a header (dstMaxSize). Maximum header size is 15 bytes.
+ * The LZ4F_preferences_t structure is optional : you can provide NULL as argument, all preferences will then be set to default.
+ * The result of the function is the number of bytes written into dstBuffer for the header
+ * or an error code (can be tested using LZ4F_isError())
+ */
+
+size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* prefsPtr);
+/* LZ4F_compressBound() :
+ * Provides the minimum size of Dst buffer given srcSize to handle worst case situations.
+ * Different preferences can produce different results.
+ * prefsPtr is optional : you can provide NULL as argument, all preferences will then be set to cover worst case.
+ * This function includes frame termination cost (4 bytes, or 8 if frame checksum is enabled)
+ */
+
+size_t LZ4F_compressUpdate(LZ4F_compressionContext_t cctx, void* dstBuffer, size_t dstMaxSize, const void* srcBuffer, size_t srcSize, const LZ4F_compressOptions_t* cOptPtr);
+/* LZ4F_compressUpdate()
+ * LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
+ * The most important rule is that dstBuffer MUST be large enough (dstMaxSize) to ensure compression completion even in worst case.
+ * You can get the minimum value of dstMaxSize by using LZ4F_compressBound().
+ * If this condition is not respected, LZ4F_compress() will fail (result is an errorCode).
+ * LZ4F_compressUpdate() doesn't guarantee error recovery, so you have to reset compression context when an error occurs.
+ * The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+ * The result of the function is the number of bytes written into dstBuffer : it can be zero, meaning input data was just buffered.
+ * The function outputs an error code if it fails (can be tested using LZ4F_isError())
+ */
+
+size_t LZ4F_flush(LZ4F_compressionContext_t cctx, void* dstBuffer, size_t dstMaxSize, const LZ4F_compressOptions_t* cOptPtr);
+/* LZ4F_flush()
+ * Should you need to generate compressed data immediately, without waiting for the current block to be filled,
+ * you can call LZ4_flush(), which will immediately compress any remaining data buffered within cctx.
+ * Note that dstMaxSize must be large enough to ensure the operation will be successful.
+ * LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+ * The result of the function is the number of bytes written into dstBuffer
+ * (it can be zero, this means there was no data left within cctx)
+ * The function outputs an error code if it fails (can be tested using LZ4F_isError())
+ */
+
+size_t LZ4F_compressEnd(LZ4F_compressionContext_t cctx, void* dstBuffer, size_t dstMaxSize, const LZ4F_compressOptions_t* cOptPtr);
+/* LZ4F_compressEnd()
+ * When you want to properly finish the compressed frame, just call LZ4F_compressEnd().
+ * It will flush whatever data remained within compressionContext (like LZ4_flush())
+ * but also properly finalize the frame, with an endMark and a checksum.
+ * The result of the function is the number of bytes written into dstBuffer (necessarily >= 4 (endMark), or 8 if optional frame checksum is enabled)
+ * The function outputs an error code if it fails (can be tested using LZ4F_isError())
+ * The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+ * A successful call to LZ4F_compressEnd() makes cctx available again for next compression task.
+ */
+
+
+/***********************************
+*  Decompression functions
+***********************************/
+
+typedef struct LZ4F_dctx_s* LZ4F_decompressionContext_t;   /* must be aligned on 8-bytes */
+
+typedef struct {
+  unsigned stableDst;       /* guarantee that decompressed data will still be there on next function calls (avoid storage into tmp buffers) */
+  unsigned reserved[3];
+} LZ4F_decompressOptions_t;
+
+
+/* Resource management */
+
+LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_decompressionContext_t* dctxPtr, unsigned version);
+LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_decompressionContext_t dctx);
+/* LZ4F_createDecompressionContext() :
+ * The first thing to do is to create an LZ4F_decompressionContext_t object, which will be used in all decompression operations.
+ * This is achieved using LZ4F_createDecompressionContext().
+ * The version provided MUST be LZ4F_VERSION. It is intended to track potential breaking differences between different versions.
+ * The function will provide a pointer to a fully allocated and initialized LZ4F_decompressionContext_t object.
+ * The result is an errorCode, which can be tested using LZ4F_isError().
+ * dctx memory can be released using LZ4F_freeDecompressionContext();
+ * The result of LZ4F_freeDecompressionContext() is indicative of the current state of decompressionContext when being released.
+ * That is, it should be == 0 if decompression has been completed fully and correctly.
+ */
+
+
+/* Decompression */
+
+size_t LZ4F_getFrameInfo(LZ4F_decompressionContext_t dctx,
+                         LZ4F_frameInfo_t* frameInfoPtr,
+                         const void* srcBuffer, size_t* srcSizePtr);
+/* LZ4F_getFrameInfo()
+ * This function decodes frame header information (such as max blockSize, frame checksum, etc.).
+ * Its usage is optional. The objective is to extract frame header information, typically for allocation purposes.
+ * A header size is variable and can be from 7 to 15 bytes. It's also possible to input more bytes than that.
+ * The number of bytes read from srcBuffer will be updated within *srcSizePtr (necessarily <= original value).
+ * (note that LZ4F_getFrameInfo() can also be used anytime *after* starting decompression, in this case 0 input byte is enough)
+ * Frame header info is *copied into* an already allocated LZ4F_frameInfo_t structure.
+ * The function result is an hint about how many srcSize bytes LZ4F_decompress() expects for next call,
+ *                        or an error code which can be tested using LZ4F_isError()
+ *                        (typically, when there is not enough src bytes to fully decode the frame header)
+ * Decompression is expected to resume from where it stopped (srcBuffer + *srcSizePtr)
+ */
+
+size_t LZ4F_decompress(LZ4F_decompressionContext_t dctx,
+                       void* dstBuffer, size_t* dstSizePtr,
+                       const void* srcBuffer, size_t* srcSizePtr,
+                       const LZ4F_decompressOptions_t* dOptPtr);
+/* LZ4F_decompress()
+ * Call this function repetitively to regenerate data compressed within srcBuffer.
+ * The function will attempt to decode *srcSizePtr bytes from srcBuffer, into dstBuffer of maximum size *dstSizePtr.
+ *
+ * The number of bytes regenerated into dstBuffer will be provided within *dstSizePtr (necessarily <= original value).
+ *
+ * The number of bytes read from srcBuffer will be provided within *srcSizePtr (necessarily <= original value).
+ * If number of bytes read is < number of bytes provided, then decompression operation is not completed.
+ * It typically happens when dstBuffer is not large enough to contain all decoded data.
+ * LZ4F_decompress() must be called again, starting from where it stopped (srcBuffer + *srcSizePtr)
+ * The function will check this condition, and refuse to continue if it is not respected.
+ *
+ * dstBuffer is supposed to be flushed between each call to the function, since its content will be overwritten.
+ * dst arguments can be changed at will with each consecutive call to the function.
+ *
+ * The function result is an hint of how many srcSize bytes LZ4F_decompress() expects for next call.
+ * Schematically, it's the size of the current (or remaining) compressed block + header of next block.
+ * Respecting the hint provides some boost to performance, since it does skip intermediate buffers.
+ * This is just a hint, you can always provide any srcSize you want.
+ * When a frame is fully decoded, the function result will be 0 (no more data expected).
+ * If decompression failed, function result is an error code, which can be tested using LZ4F_isError().
+ *
+ * After a frame is fully decoded, dctx can be used again to decompress another frame.
+ */
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/util/cbfstool/lz4/lib/lz4frame_static.h b/util/cbfstool/lz4/lib/lz4frame_static.h
new file mode 100644
index 0000000000..0d909753b9
--- /dev/null
+++ b/util/cbfstool/lz4/lib/lz4frame_static.h
@@ -0,0 +1,81 @@
+/*
+   LZ4 auto-framing library
+   Header File for static linking only
+   Copyright (C) 2011-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : https://github.com/Cyan4973/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* lz4frame_static.h should be used solely in the context of static linking.
+ * It contains definitions which may still change overtime.
+ * Never use it in the context of DLL linking.
+ * */
+
+
+/**************************************
+*  Includes
+**************************************/
+#include "lz4frame.h"
+
+
+/**************************************
+ * Error management
+ * ************************************/
+#define LZ4F_LIST_ERRORS(ITEM) \
+        ITEM(OK_NoError) ITEM(ERROR_GENERIC) \
+        ITEM(ERROR_maxBlockSize_invalid) ITEM(ERROR_blockMode_invalid) ITEM(ERROR_contentChecksumFlag_invalid) \
+        ITEM(ERROR_compressionLevel_invalid) \
+        ITEM(ERROR_headerVersion_wrong) ITEM(ERROR_blockChecksum_unsupported) ITEM(ERROR_reservedFlag_set) \
+        ITEM(ERROR_allocation_failed) \
+        ITEM(ERROR_srcSize_tooLarge) ITEM(ERROR_dstMaxSize_tooSmall) \
+        ITEM(ERROR_frameHeader_incomplete) ITEM(ERROR_frameType_unknown) ITEM(ERROR_frameSize_wrong) \
+        ITEM(ERROR_srcPtr_wrong) \
+        ITEM(ERROR_decompressionFailed) \
+        ITEM(ERROR_headerChecksum_invalid) ITEM(ERROR_contentChecksum_invalid) \
+        ITEM(ERROR_maxCode)
+
+//#define LZ4F_DISABLE_OLD_ENUMS
+#ifndef LZ4F_DISABLE_OLD_ENUMS
+#define LZ4F_GENERATE_ENUM(ENUM) LZ4F_##ENUM, ENUM = LZ4F_##ENUM,
+#else
+#define LZ4F_GENERATE_ENUM(ENUM) LZ4F_##ENUM,
+#endif
+typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM) } LZ4F_errorCodes;  /* enum is exposed, to handle specific errors; compare function result to -enum value */
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/util/cbfstool/lz4/lib/lz4hc.c b/util/cbfstool/lz4/lib/lz4hc.c
new file mode 100644
index 0000000000..80bfa39672
--- /dev/null
+++ b/util/cbfstool/lz4/lib/lz4hc.c
@@ -0,0 +1,748 @@
+/*
+    LZ4 HC - High Compression Mode of LZ4
+    Copyright (C) 2011-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+       - LZ4 source repository : https://github.com/Cyan4973/lz4
+       - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+
+
+/* *************************************
+*  Tuning Parameter
+***************************************/
+static const int LZ4HC_compressionLevel_default = 9;
+
+/*!
+ * HEAPMODE :
+ * Select how default compression function will allocate workplace memory,
+ * in stack (0:fastest), or in heap (1:requires malloc()).
+ * Since workplace is rather large, heap mode is recommended.
+ */
+#define LZ4HC_HEAPMODE 0
+
+
+/* *************************************
+*  Includes
+***************************************/
+#include "lz4hc.h"
+
+
+/* *************************************
+*  Local Compiler Options
+***************************************/
+#if defined(__GNUC__)
+#  pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+#if defined (__clang__)
+#  pragma clang diagnostic ignored "-Wunused-function"
+#endif
+
+
+/* *************************************
+*  Common LZ4 definition
+***************************************/
+#define LZ4_COMMONDEFS_ONLY
+#include "lz4.c"
+
+
+/* *************************************
+*  Local Constants
+***************************************/
+#define DICTIONARY_LOGSIZE 16
+#define MAXD (1<<DICTIONARY_LOGSIZE)
+#define MAXD_MASK (MAXD - 1)
+
+#define HASH_LOG (DICTIONARY_LOGSIZE-1)
+#define HASHTABLESIZE (1 << HASH_LOG)
+#define HASH_MASK (HASHTABLESIZE - 1)
+
+#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
+
+static const int g_maxCompressionLevel = 16;
+
+
+/**************************************
+*  Local Types
+**************************************/
+typedef struct
+{
+    U32   hashTable[HASHTABLESIZE];
+    U16   chainTable[MAXD];
+    const BYTE* end;        /* next block here to continue on current prefix */
+    const BYTE* base;       /* All index relative to this position */
+    const BYTE* dictBase;   /* alternate base for extDict */
+    BYTE* inputBuffer;      /* deprecated */
+    U32   dictLimit;        /* below that point, need extDict */
+    U32   lowLimit;         /* below that point, no more dict */
+    U32   nextToUpdate;     /* index from which to continue dictionary update */
+    U32   compressionLevel;
+} LZ4HC_Data_Structure;
+
+
+/**************************************
+*  Local Macros
+**************************************/
+#define HASH_FUNCTION(i)       (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG))
+//#define DELTANEXTU16(p)        chainTable[(p) & MAXD_MASK]   /* flexible, MAXD dependent */
+#define DELTANEXTU16(p)        chainTable[(U16)(p)]   /* faster */
+
+static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
+
+
+
+/**************************************
+*  HC Compression
+**************************************/
+static void LZ4HC_init (LZ4HC_Data_Structure* hc4, const BYTE* start)
+{
+    MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
+    MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
+    hc4->nextToUpdate = 64 KB;
+    hc4->base = start - 64 KB;
+    hc4->end = start;
+    hc4->dictBase = start - 64 KB;
+    hc4->dictLimit = 64 KB;
+    hc4->lowLimit = 64 KB;
+}
+
+
+/* Update chains up to ip (excluded) */
+FORCE_INLINE void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
+{
+    U16* chainTable = hc4->chainTable;
+    U32* HashTable  = hc4->hashTable;
+    const BYTE* const base = hc4->base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = hc4->nextToUpdate;
+
+    while(idx < target)
+    {
+        U32 h = LZ4HC_hashPtr(base+idx);
+        size_t delta = idx - HashTable[h];
+        if (delta>MAX_DISTANCE) delta = MAX_DISTANCE;
+        DELTANEXTU16(idx) = (U16)delta;
+        HashTable[h] = idx;
+        idx++;
+    }
+
+    hc4->nextToUpdate = target;
+}
+
+
+FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4,   /* Index table will be updated */
+                                               const BYTE* ip, const BYTE* const iLimit,
+                                               const BYTE** matchpos,
+                                               const int maxNbAttempts)
+{
+    U16* const chainTable = hc4->chainTable;
+    U32* const HashTable = hc4->hashTable;
+    const BYTE* const base = hc4->base;
+    const BYTE* const dictBase = hc4->dictBase;
+    const U32 dictLimit = hc4->dictLimit;
+    const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
+    U32 matchIndex;
+    const BYTE* match;
+    int nbAttempts=maxNbAttempts;
+    size_t ml=0;
+
+    /* HC4 match finder */
+    LZ4HC_Insert(hc4, ip);
+    matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+    while ((matchIndex>=lowLimit) && (nbAttempts))
+    {
+        nbAttempts--;
+        if (matchIndex >= dictLimit)
+        {
+            match = base + matchIndex;
+            if (*(match+ml) == *(ip+ml)
+                && (LZ4_read32(match) == LZ4_read32(ip)))
+            {
+                size_t mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
+                if (mlt > ml) { ml = mlt; *matchpos = match; }
+            }
+        }
+        else
+        {
+            match = dictBase + matchIndex;
+            if (LZ4_read32(match) == LZ4_read32(ip))
+            {
+                size_t mlt;
+                const BYTE* vLimit = ip + (dictLimit - matchIndex);
+                if (vLimit > iLimit) vLimit = iLimit;
+                mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
+                if ((ip+mlt == vLimit) && (vLimit < iLimit))
+                    mlt += LZ4_count(ip+mlt, base+dictLimit, iLimit);
+                if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; }   /* virtual matchpos */
+            }
+        }
+        matchIndex -= DELTANEXTU16(matchIndex);
+    }
+
+    return (int)ml;
+}
+
+
+FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
+    LZ4HC_Data_Structure* hc4,
+    const BYTE* const ip,
+    const BYTE* const iLowLimit,
+    const BYTE* const iHighLimit,
+    int longest,
+    const BYTE** matchpos,
+    const BYTE** startpos,
+    const int maxNbAttempts)
+{
+    U16* const chainTable = hc4->chainTable;
+    U32* const HashTable = hc4->hashTable;
+    const BYTE* const base = hc4->base;
+    const U32 dictLimit = hc4->dictLimit;
+    const BYTE* const lowPrefixPtr = base + dictLimit;
+    const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
+    const BYTE* const dictBase = hc4->dictBase;
+    U32   matchIndex;
+    int nbAttempts = maxNbAttempts;
+    int delta = (int)(ip-iLowLimit);
+
+
+    /* First Match */
+    LZ4HC_Insert(hc4, ip);
+    matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+    while ((matchIndex>=lowLimit) && (nbAttempts))
+    {
+        nbAttempts--;
+        if (matchIndex >= dictLimit)
+        {
+            const BYTE* matchPtr = base + matchIndex;
+            if (*(iLowLimit + longest) == *(matchPtr - delta + longest))
+                if (LZ4_read32(matchPtr) == LZ4_read32(ip))
+                {
+                    int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
+                    int back = 0;
+
+                    while ((ip+back>iLowLimit)
+                           && (matchPtr+back > lowPrefixPtr)
+                           && (ip[back-1] == matchPtr[back-1]))
+                            back--;
+
+                    mlt -= back;
+
+                    if (mlt > longest)
+                    {
+                        longest = (int)mlt;
+                        *matchpos = matchPtr+back;
+                        *startpos = ip+back;
+                    }
+                }
+        }
+        else
+        {
+            const BYTE* matchPtr = dictBase + matchIndex;
+            if (LZ4_read32(matchPtr) == LZ4_read32(ip))
+            {
+                size_t mlt;
+                int back=0;
+                const BYTE* vLimit = ip + (dictLimit - matchIndex);
+                if (vLimit > iHighLimit) vLimit = iHighLimit;
+                mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
+                if ((ip+mlt == vLimit) && (vLimit < iHighLimit))
+                    mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit);
+                while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == matchPtr[back-1])) back--;
+                mlt -= back;
+                if ((int)mlt > longest) { longest = (int)mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; }
+            }
+        }
+        matchIndex -= DELTANEXTU16(matchIndex);
+    }
+
+    return longest;
+}
+
+
+typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
+
+#define LZ4HC_DEBUG 0
+#if LZ4HC_DEBUG
+static unsigned debug = 0;
+#endif
+
+FORCE_INLINE int LZ4HC_encodeSequence (
+    const BYTE** ip,
+    BYTE** op,
+    const BYTE** anchor,
+    int matchLength,
+    const BYTE* const match,
+    limitedOutput_directive limitedOutputBuffer,
+    BYTE* oend)
+{
+    int length;
+    BYTE* token;
+
+#if LZ4HC_DEBUG
+    if (debug) printf("literal : %u  --  match : %u  --  offset : %u\n", (U32)(*ip - *anchor), (U32)matchLength, (U32)(*ip-match));
+#endif
+
+    /* Encode Literal length */
+    length = (int)(*ip - *anchor);
+    token = (*op)++;
+    if ((limitedOutputBuffer) && ((*op + (length>>8) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1;   /* Check output limit */
+    if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255;  *(*op)++ = (BYTE)len; }
+    else *token = (BYTE)(length<<ML_BITS);
+
+    /* Copy Literals */
+    LZ4_wildCopy(*op, *anchor, (*op) + length);
+    *op += length;
+
+    /* Encode Offset */
+    LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2;
+
+    /* Encode MatchLength */
+    length = (int)(matchLength-MINMATCH);
+    if ((limitedOutputBuffer) && (*op + (length>>8) + (1 + LASTLITERALS) > oend)) return 1;   /* Check output limit */
+    if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; }
+    else *token += (BYTE)(length);
+
+    /* Prepare next loop */
+    *ip += matchLength;
+    *anchor = *ip;
+
+    return 0;
+}
+
+
+static int LZ4HC_compress_generic (
+    void* ctxvoid,
+    const char* source,
+    char* dest,
+    int inputSize,
+    int maxOutputSize,
+    int compressionLevel,
+    limitedOutput_directive limit
+    )
+{
+    LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid;
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + inputSize;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = (iend - LASTLITERALS);
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + maxOutputSize;
+
+    unsigned maxNbAttempts;
+    int   ml, ml2, ml3, ml0;
+    const BYTE* ref=NULL;
+    const BYTE* start2=NULL;
+    const BYTE* ref2=NULL;
+    const BYTE* start3=NULL;
+    const BYTE* ref3=NULL;
+    const BYTE* start0;
+    const BYTE* ref0;
+
+
+    /* init */
+    if (compressionLevel > g_maxCompressionLevel) compressionLevel = g_maxCompressionLevel;
+    if (compressionLevel < 1) compressionLevel = LZ4HC_compressionLevel_default;
+    maxNbAttempts = 1 << (compressionLevel-1);
+    ctx->end += inputSize;
+
+    ip++;
+
+    /* Main Loop */
+    while (ip < mflimit)
+    {
+        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts);
+        if (!ml) { ip++; continue; }
+
+        /* saved, in case we would skip too much */
+        start0 = ip;
+        ref0 = ref;
+        ml0 = ml;
+
+_Search2:
+        if (ip+ml < mflimit)
+            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2, maxNbAttempts);
+        else ml2 = ml;
+
+        if (ml2 == ml)  /* No better match */
+        {
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+            continue;
+        }
+
+        if (start0 < ip)
+        {
+            if (start2 < ip + ml0)   /* empirical */
+            {
+                ip = start0;
+                ref = ref0;
+                ml = ml0;
+            }
+        }
+
+        /* Here, start0==ip */
+        if ((start2 - ip) < 3)   /* First Match too small : removed */
+        {
+            ml = ml2;
+            ip = start2;
+            ref =ref2;
+            goto _Search2;
+        }
+
+_Search3:
+        /*
+        * Currently we have :
+        * ml2 > ml1, and
+        * ip1+3 <= ip2 (usually < ip1+ml1)
+        */
+        if ((start2 - ip) < OPTIMAL_ML)
+        {
+            int correction;
+            int new_ml = ml;
+            if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
+            if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
+            correction = new_ml - (int)(start2 - ip);
+            if (correction > 0)
+            {
+                start2 += correction;
+                ref2 += correction;
+                ml2 -= correction;
+            }
+        }
+        /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
+
+        if (start2 + ml2 < mflimit)
+            ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts);
+        else ml3 = ml2;
+
+        if (ml3 == ml2) /* No better match : 2 sequences to encode */
+        {
+            /* ip & ref are known; Now for ml */
+            if (start2 < ip+ml)  ml = (int)(start2 - ip);
+            /* Now, encode 2 sequences */
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+            ip = start2;
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) return 0;
+            continue;
+        }
+
+        if (start3 < ip+ml+3) /* Not enough space for match 2 : remove it */
+        {
+            if (start3 >= (ip+ml)) /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
+            {
+                if (start2 < ip+ml)
+                {
+                    int correction = (int)(ip+ml - start2);
+                    start2 += correction;
+                    ref2 += correction;
+                    ml2 -= correction;
+                    if (ml2 < MINMATCH)
+                    {
+                        start2 = start3;
+                        ref2 = ref3;
+                        ml2 = ml3;
+                    }
+                }
+
+                if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+                ip  = start3;
+                ref = ref3;
+                ml  = ml3;
+
+                start0 = start2;
+                ref0 = ref2;
+                ml0 = ml2;
+                goto _Search2;
+            }
+
+            start2 = start3;
+            ref2 = ref3;
+            ml2 = ml3;
+            goto _Search3;
+        }
+
+        /*
+        * OK, now we have 3 ascending matches; let's write at least the first one
+        * ip & ref are known; Now for ml
+        */
+        if (start2 < ip+ml)
+        {
+            if ((start2 - ip) < (int)ML_MASK)
+            {
+                int correction;
+                if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
+                if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
+                correction = ml - (int)(start2 - ip);
+                if (correction > 0)
+                {
+                    start2 += correction;
+                    ref2 += correction;
+                    ml2 -= correction;
+                }
+            }
+            else
+            {
+                ml = (int)(start2 - ip);
+            }
+        }
+        if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+
+        ip = start2;
+        ref = ref2;
+        ml = ml2;
+
+        start2 = start3;
+        ref2 = ref3;
+        ml2 = ml3;
+
+        goto _Search3;
+    }
+
+    /* Encode Last Literals */
+    {
+        int lastRun = (int)(iend - anchor);
+        if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0;  /* Check output limit */
+        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
+        else *op++ = (BYTE)(lastRun<<ML_BITS);
+        memcpy(op, anchor, iend - anchor);
+        op += iend-anchor;
+    }
+
+    /* End */
+    return (int) (((char*)op)-dest);
+}
+
+
+int LZ4_sizeofStateHC(void) { return sizeof(LZ4HC_Data_Structure); }
+
+int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel)
+{
+    if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0;   /* Error : state is not aligned for pointers (32 or 64 bits) */
+    LZ4HC_init ((LZ4HC_Data_Structure*)state, (const BYTE*)src);
+    if (maxDstSize < LZ4_compressBound(srcSize))
+        return LZ4HC_compress_generic (state, src, dst, srcSize, maxDstSize, compressionLevel, limitedOutput);
+    else
+        return LZ4HC_compress_generic (state, src, dst, srcSize, maxDstSize, compressionLevel, noLimit);
+}
+
+int LZ4_compress_HC(const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel)
+{
+#if LZ4HC_HEAPMODE==1
+    LZ4HC_Data_Structure* statePtr = malloc(sizeof(LZ4HC_Data_Structure));
+#else
+    LZ4HC_Data_Structure state;
+    LZ4HC_Data_Structure* const statePtr = &state;
+#endif
+    int cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, maxDstSize, compressionLevel);
+#if LZ4HC_HEAPMODE==1
+    free(statePtr);
+#endif
+    return cSize;
+}
+
+
+
+/**************************************
+*  Streaming Functions
+**************************************/
+/* allocation */
+LZ4_streamHC_t* LZ4_createStreamHC(void) { return (LZ4_streamHC_t*)malloc(sizeof(LZ4_streamHC_t)); }
+int             LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) { free(LZ4_streamHCPtr); return 0; }
+
+
+/* initialization */
+void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
+{
+    LZ4_STATIC_ASSERT(sizeof(LZ4HC_Data_Structure) <= sizeof(LZ4_streamHC_t));   /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
+    ((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->base = NULL;
+    ((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->compressionLevel = (unsigned)compressionLevel;
+}
+
+int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int dictSize)
+{
+    LZ4HC_Data_Structure* ctxPtr = (LZ4HC_Data_Structure*) LZ4_streamHCPtr;
+    if (dictSize > 64 KB)
+    {
+        dictionary += dictSize - 64 KB;
+        dictSize = 64 KB;
+    }
+    LZ4HC_init (ctxPtr, (const BYTE*)dictionary);
+    if (dictSize >= 4) LZ4HC_Insert (ctxPtr, (const BYTE*)dictionary +(dictSize-3));
+    ctxPtr->end = (const BYTE*)dictionary + dictSize;
+    return dictSize;
+}
+
+
+/* compression */
+
+static void LZ4HC_setExternalDict(LZ4HC_Data_Structure* ctxPtr, const BYTE* newBlock)
+{
+    if (ctxPtr->end >= ctxPtr->base + 4)
+        LZ4HC_Insert (ctxPtr, ctxPtr->end-3);   /* Referencing remaining dictionary content */
+    /* Only one memory segment for extDict, so any previous extDict is lost at this stage */
+    ctxPtr->lowLimit  = ctxPtr->dictLimit;
+    ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
+    ctxPtr->dictBase  = ctxPtr->base;
+    ctxPtr->base = newBlock - ctxPtr->dictLimit;
+    ctxPtr->end  = newBlock;
+    ctxPtr->nextToUpdate = ctxPtr->dictLimit;   /* match referencing will resume from there */
+}
+
+static int LZ4_compressHC_continue_generic (LZ4HC_Data_Structure* ctxPtr,
+                                            const char* source, char* dest,
+                                            int inputSize, int maxOutputSize, limitedOutput_directive limit)
+{
+    /* auto-init if forgotten */
+    if (ctxPtr->base == NULL)
+        LZ4HC_init (ctxPtr, (const BYTE*) source);
+
+    /* Check overflow */
+    if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB)
+    {
+        size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base) - ctxPtr->dictLimit;
+        if (dictSize > 64 KB) dictSize = 64 KB;
+
+        LZ4_loadDictHC((LZ4_streamHC_t*)ctxPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize);
+    }
+
+    /* Check if blocks follow each other */
+    if ((const BYTE*)source != ctxPtr->end)
+        LZ4HC_setExternalDict(ctxPtr, (const BYTE*)source);
+
+    /* Check overlapping input/dictionary space */
+    {
+        const BYTE* sourceEnd = (const BYTE*) source + inputSize;
+        const BYTE* dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
+        const BYTE* dictEnd   = ctxPtr->dictBase + ctxPtr->dictLimit;
+        if ((sourceEnd > dictBegin) && ((const BYTE*)source < dictEnd))
+        {
+            if (sourceEnd > dictEnd) sourceEnd = dictEnd;
+            ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
+            if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit;
+        }
+    }
+
+    return LZ4HC_compress_generic (ctxPtr, source, dest, inputSize, maxOutputSize, ctxPtr->compressionLevel, limit);
+}
+
+int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    if (maxOutputSize < LZ4_compressBound(inputSize))
+        return LZ4_compressHC_continue_generic ((LZ4HC_Data_Structure*)LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, limitedOutput);
+    else
+        return LZ4_compressHC_continue_generic ((LZ4HC_Data_Structure*)LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, noLimit);
+}
+
+
+/* dictionary saving */
+
+int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize)
+{
+    LZ4HC_Data_Structure* streamPtr = (LZ4HC_Data_Structure*)LZ4_streamHCPtr;
+    int prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit));
+    if (dictSize > 64 KB) dictSize = 64 KB;
+    if (dictSize < 4) dictSize = 0;
+    if (dictSize > prefixSize) dictSize = prefixSize;
+    memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
+    {
+        U32 endIndex = (U32)(streamPtr->end - streamPtr->base);
+        streamPtr->end = (const BYTE*)safeBuffer + dictSize;
+        streamPtr->base = streamPtr->end - endIndex;
+        streamPtr->dictLimit = endIndex - dictSize;
+        streamPtr->lowLimit = endIndex - dictSize;
+        if (streamPtr->nextToUpdate < streamPtr->dictLimit) streamPtr->nextToUpdate = streamPtr->dictLimit;
+    }
+    return dictSize;
+}
+
+
+/***********************************
+*  Deprecated Functions
+***********************************/
+/* Deprecated compression functions */
+/* These functions are planned to start generate warnings by r131 approximately */
+int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
+int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); }
+int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
+int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); }
+int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
+int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); }
+int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
+int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); }
+int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); }
+int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); }
+
+
+/* Deprecated streaming functions */
+/* These functions currently generate deprecation warnings */
+int LZ4_sizeofStreamStateHC(void) { return LZ4_STREAMHCSIZE; }
+
+int LZ4_resetStreamStateHC(void* state, char* inputBuffer)
+{
+    if ((((size_t)state) & (sizeof(void*)-1)) != 0) return 1;   /* Error : pointer is not aligned for pointer (32 or 64 bits) */
+    LZ4HC_init((LZ4HC_Data_Structure*)state, (const BYTE*)inputBuffer);
+    ((LZ4HC_Data_Structure*)state)->inputBuffer = (BYTE*)inputBuffer;
+    return 0;
+}
+
+void* LZ4_createHC (char* inputBuffer)
+{
+    void* hc4 = ALLOCATOR(1, sizeof(LZ4HC_Data_Structure));
+    if (hc4 == NULL) return NULL;   /* not enough memory */
+    LZ4HC_init ((LZ4HC_Data_Structure*)hc4, (const BYTE*)inputBuffer);
+    ((LZ4HC_Data_Structure*)hc4)->inputBuffer = (BYTE*)inputBuffer;
+    return hc4;
+}
+
+int LZ4_freeHC (void* LZ4HC_Data)
+{
+    FREEMEM(LZ4HC_Data);
+    return (0);
+}
+
+int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel)
+{
+    return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, 0, compressionLevel, noLimit);
+}
+
+int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel)
+{
+    return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, maxOutputSize, compressionLevel, limitedOutput);
+}
+
+char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
+{
+    LZ4HC_Data_Structure* hc4 = (LZ4HC_Data_Structure*)LZ4HC_Data;
+    int dictSize = LZ4_saveDictHC((LZ4_streamHC_t*)LZ4HC_Data, (char*)(hc4->inputBuffer), 64 KB);
+    return (char*)(hc4->inputBuffer + dictSize);
+}
diff --git a/util/cbfstool/lz4/lib/lz4hc.h b/util/cbfstool/lz4/lib/lz4hc.h
new file mode 100644
index 0000000000..431f7c87c8
--- /dev/null
+++ b/util/cbfstool/lz4/lib/lz4hc.h
@@ -0,0 +1,189 @@
+/*
+   LZ4 HC - High Compression Mode of LZ4
+   Header File
+   Copyright (C) 2011-2015, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : https://github.com/Cyan4973/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#pragma once
+
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*****************************
+*  Includes
+*****************************/
+#include <stddef.h>   /* size_t */
+
+
+/**************************************
+*  Block Compression
+**************************************/
+int LZ4_compress_HC (const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
+/*
+LZ4_compress_HC :
+    Destination buffer 'dst' must be already allocated.
+    Compression completion is guaranteed if 'dst' buffer is sized to handle worst circumstances (data not compressible)
+    Worst size evaluation is provided by function LZ4_compressBound() (see "lz4.h")
+      srcSize  : Max supported value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
+      compressionLevel : Recommended values are between 4 and 9, although any value between 0 and 16 will work.
+                         0 means "use default value" (see lz4hc.c).
+                         Values >16 behave the same as 16.
+      return : the number of bytes written into buffer 'dst'
+            or 0 if compression fails.
+*/
+
+
+/* Note :
+   Decompression functions are provided within LZ4 source code (see "lz4.h") (BSD license)
+*/
+
+
+int LZ4_sizeofStateHC(void);
+int LZ4_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
+/*
+LZ4_compress_HC_extStateHC() :
+   Use this function if you prefer to manually allocate memory for compression tables.
+   To know how much memory must be allocated for the compression tables, use :
+      int LZ4_sizeofStateHC();
+
+   Allocated memory must be aligned on 8-bytes boundaries (which a normal malloc() will do properly).
+
+   The allocated memory can then be provided to the compression functions using 'void* state' parameter.
+   LZ4_compress_HC_extStateHC() is equivalent to previously described function.
+   It just uses externally allocated memory for stateHC.
+*/
+
+
+/**************************************
+*  Streaming Compression
+**************************************/
+#define LZ4_STREAMHCSIZE        262192
+#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t))
+typedef struct { size_t table[LZ4_STREAMHCSIZE_SIZET]; } LZ4_streamHC_t;
+/*
+  LZ4_streamHC_t
+  This structure allows static allocation of LZ4 HC streaming state.
+  State must then be initialized using LZ4_resetStreamHC() before first use.
+
+  Static allocation should only be used in combination with static linking.
+  If you want to use LZ4 as a DLL, please use construction functions below, which are future-proof.
+*/
+
+
+LZ4_streamHC_t* LZ4_createStreamHC(void);
+int             LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr);
+/*
+  These functions create and release memory for LZ4 HC streaming state.
+  Newly created states are already initialized.
+  Existing state space can be re-used anytime using LZ4_resetStreamHC().
+  If you use LZ4 as a DLL, use these functions instead of static structure allocation,
+  to avoid size mismatch between different versions.
+*/
+
+void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel);
+int  LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize);
+
+int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, const char* src, char* dst, int srcSize, int maxDstSize);
+
+int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize);
+
+/*
+  These functions compress data in successive blocks of any size, using previous blocks as dictionary.
+  One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks.
+  There is an exception for ring buffers, which can be smaller 64 KB.
+  Such case is automatically detected and correctly handled by LZ4_compress_HC_continue().
+
+  Before starting compression, state must be properly initialized, using LZ4_resetStreamHC().
+  A first "fictional block" can then be designated as initial dictionary, using LZ4_loadDictHC() (Optional).
+
+  Then, use LZ4_compress_HC_continue() to compress each successive block.
+  It works like LZ4_compress_HC(), but use previous memory blocks as dictionary to improve compression.
+  Previous memory blocks (including initial dictionary when present) must remain accessible and unmodified during compression.
+  As a reminder, size 'dst' buffer to handle worst cases, using LZ4_compressBound(), to ensure success of compression operation.
+
+  If, for any reason, previous data blocks can't be preserved unmodified in memory during next compression block,
+  you must save it to a safer memory space, using LZ4_saveDictHC().
+  Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer'.
+*/
+
+
+
+/**************************************
+*  Deprecated Functions
+**************************************/
+/* Deprecate Warnings */
+/* Should these warnings messages be a problem,
+   it is generally possible to disable them,
+   with -Wno-deprecated-declarations for gcc
+   or _CRT_SECURE_NO_WARNINGS in Visual for example.
+   You can also define LZ4_DEPRECATE_WARNING_DEFBLOCK. */
+#ifndef LZ4_DEPRECATE_WARNING_DEFBLOCK
+#  define LZ4_DEPRECATE_WARNING_DEFBLOCK
+#  define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  if (LZ4_GCC_VERSION >= 405) || defined(__clang__)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
+#  elif (LZ4_GCC_VERSION >= 301)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
+#  elif defined(_MSC_VER)
+#    define LZ4_DEPRECATED(message) __declspec(deprecated(message))
+#  else
+#    pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
+#    define LZ4_DEPRECATED(message)
+#  endif
+#endif // LZ4_DEPRECATE_WARNING_DEFBLOCK
+
+/* compression functions */
+/* these functions are planned to trigger warning messages by r131 approximately */
+int LZ4_compressHC                (const char* source, char* dest, int inputSize);
+int LZ4_compressHC_limitedOutput  (const char* source, char* dest, int inputSize, int maxOutputSize);
+int LZ4_compressHC2               (const char* source, char* dest, int inputSize, int compressionLevel);
+int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+int LZ4_compressHC_withStateHC               (void* state, const char* source, char* dest, int inputSize);
+int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+int LZ4_compressHC2_withStateHC              (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
+int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+int LZ4_compressHC_continue               (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
+int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/* Streaming functions following the older model; should no longer be used */
+LZ4_DEPRECATED("use LZ4_createStreamHC() instead") void* LZ4_createHC (char* inputBuffer);
+LZ4_DEPRECATED("use LZ4_saveDictHC() instead")     char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
+LZ4_DEPRECATED("use LZ4_freeStreamHC() instead")   int   LZ4_freeHC (void* LZ4HC_Data);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int   LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int   LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_createStreamHC() instead") int   LZ4_sizeofStreamStateHC(void);
+LZ4_DEPRECATED("use LZ4_resetStreamHC() instead")  int   LZ4_resetStreamStateHC(void* state, char* inputBuffer);
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/util/cbfstool/lz4/lib/xxhash.c b/util/cbfstool/lz4/lib/xxhash.c
new file mode 100644
index 0000000000..511d9941a2
--- /dev/null
+++ b/util/cbfstool/lz4/lib/xxhash.c
@@ -0,0 +1,962 @@
+/*
+xxHash - Fast Hash algorithm
+Copyright (C) 2012-2015, Yann Collet
+
+BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+You can contact the author at :
+- xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+
+/**************************************
+*  Tuning parameters
+**************************************/
+/* XXH_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets which generate assembly depending on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://stackoverflow.com/a/32095106/646947 for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define XXH_FORCE_MEMORY_ACCESS 2
+#  elif defined(__INTEL_COMPILER) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define XXH_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+/* XXH_ACCEPT_NULL_INPUT_POINTER :
+ * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
+ * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
+ * By default, this option is disabled. To enable it, uncomment below define :
+ */
+/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
+
+/* XXH_FORCE_NATIVE_FORMAT :
+ * By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
+ * Results are therefore identical for little-endian and big-endian CPU.
+ * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
+ * Should endian-independance be of no importance for your application, you may set the #define below to 1,
+ * to improve speed for Big-endian CPU.
+ * This option has no impact on Little_Endian CPU.
+ */
+#define XXH_FORCE_NATIVE_FORMAT 0
+
+/* XXH_USELESS_ALIGN_BRANCH :
+ * This is a minor performance trick, only useful with lots of very small keys.
+ * It means : don't make a test between aligned/unaligned, because performance will be the same.
+ * It saves one initial branch per hash.
+ */
+#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+#  define XXH_USELESS_ALIGN_BRANCH 1
+#endif
+
+
+/**************************************
+*  Compiler Specific Options
+***************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
+#  define FORCE_INLINE static __forceinline
+#else
+#  if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
+
+
+/**************************************
+*  Includes & Memory related functions
+***************************************/
+#include "xxhash.h"
+/* Modify the local functions below should you wish to use some other memory routines */
+/* for malloc(), free() */
+#include <stdlib.h>
+static void* XXH_malloc(size_t s) { return malloc(s); }
+static void  XXH_free  (void* p)  { free(p); }
+/* for memcpy() */
+#include <string.h>
+static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
+
+
+/**************************************
+*  Basic Types
+***************************************/
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+# include <stdint.h>
+  typedef uint8_t  BYTE;
+  typedef uint16_t U16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+#else
+  typedef unsigned char      BYTE;
+  typedef unsigned short     U16;
+  typedef unsigned int       U32;
+  typedef   signed int       S32;
+  typedef unsigned long long U64;
+#endif
+
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
+static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+
+static U32 XXH_read32(const void* memPtr)
+{
+    U32 val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+static U64 XXH_read64(const void* memPtr)
+{
+    U64 val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif // XXH_FORCE_DIRECT_MEMORY_ACCESS
+
+
+/******************************************
+*  Compiler-specific Functions and Macros
+******************************************/
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
+#if defined(_MSC_VER)
+#  define XXH_rotl32(x,r) _rotl(x,r)
+#  define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+#  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+#  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
+#endif
+
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXH_swap32 _byteswap_ulong
+#  define XXH_swap64 _byteswap_uint64
+#elif GCC_VERSION >= 403
+#  define XXH_swap32 __builtin_bswap32
+#  define XXH_swap64 __builtin_bswap64
+#else
+static U32 XXH_swap32 (U32 x)
+{
+    return  ((x << 24) & 0xff000000 ) |
+            ((x <<  8) & 0x00ff0000 ) |
+            ((x >>  8) & 0x0000ff00 ) |
+            ((x >> 24) & 0x000000ff );
+}
+static U64 XXH_swap64 (U64 x)
+{
+    return  ((x << 56) & 0xff00000000000000ULL) |
+            ((x << 40) & 0x00ff000000000000ULL) |
+            ((x << 24) & 0x0000ff0000000000ULL) |
+            ((x << 8)  & 0x000000ff00000000ULL) |
+            ((x >> 8)  & 0x00000000ff000000ULL) |
+            ((x >> 24) & 0x0000000000ff0000ULL) |
+            ((x >> 40) & 0x000000000000ff00ULL) |
+            ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+
+/***************************************
+*  Architecture Macros
+***************************************/
+typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
+
+/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example one the compiler command line */
+#ifndef XXH_CPU_LITTLE_ENDIAN
+    static const int one = 1;
+#   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&one))
+#endif
+
+
+/*****************************
+*  Memory reads
+*****************************/
+typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
+
+FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
+    else
+        return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
+}
+
+FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE32_align(ptr, endian, XXH_unaligned);
+}
+
+FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
+    else
+        return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
+}
+
+FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE64_align(ptr, endian, XXH_unaligned);
+}
+
+
+/***************************************
+*  Macros
+***************************************/
+#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(!!(c)) }; }    /* use only *after* variable declarations */
+
+
+/***************************************
+*  Constants
+***************************************/
+#define PRIME32_1   2654435761U
+#define PRIME32_2   2246822519U
+#define PRIME32_3   3266489917U
+#define PRIME32_4    668265263U
+#define PRIME32_5    374761393U
+
+#define PRIME64_1 11400714785074694791ULL
+#define PRIME64_2 14029467366897019727ULL
+#define PRIME64_3  1609587929392839161ULL
+#define PRIME64_4  9650029242287828579ULL
+#define PRIME64_5  2870177450012600261ULL
+
+
+/*****************************
+*  Simple Hash Functions
+*****************************/
+FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* bEnd = p + len;
+    U32 h32;
+#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL)
+    {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)16;
+    }
+#endif
+
+    if (len>=16)
+    {
+        const BYTE* const limit = bEnd - 16;
+        U32 v1 = seed + PRIME32_1 + PRIME32_2;
+        U32 v2 = seed + PRIME32_2;
+        U32 v3 = seed + 0;
+        U32 v4 = seed - PRIME32_1;
+
+        do
+        {
+            v1 += XXH_get32bits(p) * PRIME32_2;
+            v1 = XXH_rotl32(v1, 13);
+            v1 *= PRIME32_1;
+            p+=4;
+            v2 += XXH_get32bits(p) * PRIME32_2;
+            v2 = XXH_rotl32(v2, 13);
+            v2 *= PRIME32_1;
+            p+=4;
+            v3 += XXH_get32bits(p) * PRIME32_2;
+            v3 = XXH_rotl32(v3, 13);
+            v3 *= PRIME32_1;
+            p+=4;
+            v4 += XXH_get32bits(p) * PRIME32_2;
+            v4 = XXH_rotl32(v4, 13);
+            v4 *= PRIME32_1;
+            p+=4;
+        }
+        while (p<=limit);
+
+        h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+    }
+    else
+    {
+        h32  = seed + PRIME32_5;
+    }
+
+    h32 += (U32) len;
+
+    while (p+4<=bEnd)
+    {
+        h32 += XXH_get32bits(p) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h32 += (*p) * PRIME32_5;
+        h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH32_state_t state;
+    XXH32_reset(&state, seed);
+    XXH32_update(&state, input, len);
+    return XXH32_digest(&state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+#  if !defined(XXH_USELESS_ALIGN_BRANCH)
+    if ((((size_t)input) & 3) == 0)   /* Input is 4-bytes aligned, leverage the speed benefit */
+    {
+        if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+            return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+        else
+            return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }
+#  endif
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* bEnd = p + len;
+    U64 h64;
+#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL)
+    {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)32;
+    }
+#endif
+
+    if (len>=32)
+    {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = seed + PRIME64_1 + PRIME64_2;
+        U64 v2 = seed + PRIME64_2;
+        U64 v3 = seed + 0;
+        U64 v4 = seed - PRIME64_1;
+
+        do
+        {
+            v1 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v1 = XXH_rotl64(v1, 31);
+            v1 *= PRIME64_1;
+            v2 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v2 = XXH_rotl64(v2, 31);
+            v2 *= PRIME64_1;
+            v3 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v3 = XXH_rotl64(v3, 31);
+            v3 *= PRIME64_1;
+            v4 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v4 = XXH_rotl64(v4, 31);
+            v4 *= PRIME64_1;
+        }
+        while (p<=limit);
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+
+        v1 *= PRIME64_2;
+        v1 = XXH_rotl64(v1, 31);
+        v1 *= PRIME64_1;
+        h64 ^= v1;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+
+        v2 *= PRIME64_2;
+        v2 = XXH_rotl64(v2, 31);
+        v2 *= PRIME64_1;
+        h64 ^= v2;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+
+        v3 *= PRIME64_2;
+        v3 = XXH_rotl64(v3, 31);
+        v3 *= PRIME64_1;
+        h64 ^= v3;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+
+        v4 *= PRIME64_2;
+        v4 = XXH_rotl64(v4, 31);
+        v4 *= PRIME64_1;
+        h64 ^= v4;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+    }
+    else
+    {
+        h64  = seed + PRIME64_5;
+    }
+
+    h64 += (U64) len;
+
+    while (p+8<=bEnd)
+    {
+        U64 k1 = XXH_get64bits(p);
+        k1 *= PRIME64_2;
+        k1 = XXH_rotl64(k1,31);
+        k1 *= PRIME64_1;
+        h64 ^= k1;
+        h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd)
+    {
+        h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
+        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h64 ^= (*p) * PRIME64_5;
+        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+
+unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH64_state_t state;
+    XXH64_reset(&state, seed);
+    XXH64_update(&state, input, len);
+    return XXH64_digest(&state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+#  if !defined(XXH_USELESS_ALIGN_BRANCH)
+    if ((((size_t)input) & 7)==0)   /* Input is aligned, let's leverage the speed advantage */
+    {
+        if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+            return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+        else
+            return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }
+#  endif
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+/****************************************************
+*  Advanced Hash Functions
+****************************************************/
+
+/*** Allocation ***/
+typedef struct
+{
+    U64 total_len;
+    U32 seed;
+    U32 v1;
+    U32 v2;
+    U32 v3;
+    U32 v4;
+    U32 mem32[4];   /* defined as U32 for alignment */
+    U32 memsize;
+} XXH_istate32_t;
+
+typedef struct
+{
+    U64 total_len;
+    U64 seed;
+    U64 v1;
+    U64 v2;
+    U64 v3;
+    U64 v4;
+    U64 mem64[4];   /* defined as U64 for alignment */
+    U32 memsize;
+} XXH_istate64_t;
+
+
+XXH32_state_t* XXH32_createState(void)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t));   /* A compilation error here means XXH32_state_t is not large enough */
+    return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
+}
+XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+XXH64_state_t* XXH64_createState(void)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(XXH_istate64_t));   /* A compilation error here means XXH64_state_t is not large enough */
+    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+
+/*** Hash feed ***/
+
+XXH_errorcode XXH32_reset(XXH32_state_t* state_in, unsigned int seed)
+{
+    XXH_istate32_t* state = (XXH_istate32_t*) state_in;
+    state->seed = seed;
+    state->v1 = seed + PRIME32_1 + PRIME32_2;
+    state->v2 = seed + PRIME32_2;
+    state->v3 = seed + 0;
+    state->v4 = seed - PRIME32_1;
+    state->total_len = 0;
+    state->memsize = 0;
+    return XXH_OK;
+}
+
+XXH_errorcode XXH64_reset(XXH64_state_t* state_in, unsigned long long seed)
+{
+    XXH_istate64_t* state = (XXH_istate64_t*) state_in;
+    state->seed = seed;
+    state->v1 = seed + PRIME64_1 + PRIME64_2;
+    state->v2 = seed + PRIME64_2;
+    state->v3 = seed + 0;
+    state->v4 = seed - PRIME64_1;
+    state->total_len = 0;
+    state->memsize = 0;
+    return XXH_OK;
+}
+
+
+FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
+{
+    XXH_istate32_t* state = (XXH_istate32_t *) state_in;
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len += len;
+
+    if (state->memsize + len < 16)   /* fill in tmp buffer */
+    {
+        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
+        state->memsize += (U32)len;
+        return XXH_OK;
+    }
+
+    if (state->memsize)   /* some data left from previous update */
+    {
+        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
+        {
+            const U32* p32 = state->mem32;
+            state->v1 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v1 = XXH_rotl32(state->v1, 13);
+            state->v1 *= PRIME32_1;
+            p32++;
+            state->v2 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v2 = XXH_rotl32(state->v2, 13);
+            state->v2 *= PRIME32_1;
+            p32++;
+            state->v3 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v3 = XXH_rotl32(state->v3, 13);
+            state->v3 *= PRIME32_1;
+            p32++;
+            state->v4 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v4 = XXH_rotl32(state->v4, 13);
+            state->v4 *= PRIME32_1;
+            p32++;
+        }
+        p += 16-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p <= bEnd-16)
+    {
+        const BYTE* const limit = bEnd - 16;
+        U32 v1 = state->v1;
+        U32 v2 = state->v2;
+        U32 v3 = state->v3;
+        U32 v4 = state->v4;
+
+        do
+        {
+            v1 += XXH_readLE32(p, endian) * PRIME32_2;
+            v1 = XXH_rotl32(v1, 13);
+            v1 *= PRIME32_1;
+            p+=4;
+            v2 += XXH_readLE32(p, endian) * PRIME32_2;
+            v2 = XXH_rotl32(v2, 13);
+            v2 *= PRIME32_1;
+            p+=4;
+            v3 += XXH_readLE32(p, endian) * PRIME32_2;
+            v3 = XXH_rotl32(v3, 13);
+            v3 *= PRIME32_1;
+            p+=4;
+            v4 += XXH_readLE32(p, endian) * PRIME32_2;
+            v4 = XXH_rotl32(v4, 13);
+            v4 *= PRIME32_1;
+            p+=4;
+        }
+        while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd)
+    {
+        XXH_memcpy(state->mem32, p, bEnd-p);
+        state->memsize = (int)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianess endian)
+{
+    const XXH_istate32_t* state = (const XXH_istate32_t*) state_in;
+    const BYTE * p = (const BYTE*)state->mem32;
+    const BYTE* bEnd = (const BYTE*)(state->mem32) + state->memsize;
+    U32 h32;
+
+    if (state->total_len >= 16)
+    {
+        h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
+    }
+    else
+    {
+        h32  = state->seed + PRIME32_5;
+    }
+
+    h32 += (U32) state->total_len;
+
+    while (p+4<=bEnd)
+    {
+        h32 += XXH_readLE32(p, endian) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h32 += (*p) * PRIME32_5;
+        h32 = XXH_rotl32(h32, 11) * PRIME32_1;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+unsigned int XXH32_digest (const XXH32_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH32_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
+{
+    XXH_istate64_t * state = (XXH_istate64_t *) state_in;
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len += len;
+
+    if (state->memsize + len < 32)   /* fill in tmp buffer */
+    {
+        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+        state->memsize += (U32)len;
+        return XXH_OK;
+    }
+
+    if (state->memsize)   /* some data left from previous update */
+    {
+        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
+        {
+            const U64* p64 = state->mem64;
+            state->v1 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v1 = XXH_rotl64(state->v1, 31);
+            state->v1 *= PRIME64_1;
+            p64++;
+            state->v2 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v2 = XXH_rotl64(state->v2, 31);
+            state->v2 *= PRIME64_1;
+            p64++;
+            state->v3 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v3 = XXH_rotl64(state->v3, 31);
+            state->v3 *= PRIME64_1;
+            p64++;
+            state->v4 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v4 = XXH_rotl64(state->v4, 31);
+            state->v4 *= PRIME64_1;
+            p64++;
+        }
+        p += 32-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p+32 <= bEnd)
+    {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = state->v1;
+        U64 v2 = state->v2;
+        U64 v3 = state->v3;
+        U64 v4 = state->v4;
+
+        do
+        {
+            v1 += XXH_readLE64(p, endian) * PRIME64_2;
+            v1 = XXH_rotl64(v1, 31);
+            v1 *= PRIME64_1;
+            p+=8;
+            v2 += XXH_readLE64(p, endian) * PRIME64_2;
+            v2 = XXH_rotl64(v2, 31);
+            v2 *= PRIME64_1;
+            p+=8;
+            v3 += XXH_readLE64(p, endian) * PRIME64_2;
+            v3 = XXH_rotl64(v3, 31);
+            v3 *= PRIME64_1;
+            p+=8;
+            v4 += XXH_readLE64(p, endian) * PRIME64_2;
+            v4 = XXH_rotl64(v4, 31);
+            v4 *= PRIME64_1;
+            p+=8;
+        }
+        while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd)
+    {
+        XXH_memcpy(state->mem64, p, bEnd-p);
+        state->memsize = (int)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianess endian)
+{
+    const XXH_istate64_t * state = (const XXH_istate64_t *) state_in;
+    const BYTE * p = (const BYTE*)state->mem64;
+    const BYTE* bEnd = (const BYTE*)state->mem64 + state->memsize;
+    U64 h64;
+
+    if (state->total_len >= 32)
+    {
+        U64 v1 = state->v1;
+        U64 v2 = state->v2;
+        U64 v3 = state->v3;
+        U64 v4 = state->v4;
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+
+        v1 *= PRIME64_2;
+        v1 = XXH_rotl64(v1, 31);
+        v1 *= PRIME64_1;
+        h64 ^= v1;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+
+        v2 *= PRIME64_2;
+        v2 = XXH_rotl64(v2, 31);
+        v2 *= PRIME64_1;
+        h64 ^= v2;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+
+        v3 *= PRIME64_2;
+        v3 = XXH_rotl64(v3, 31);
+        v3 *= PRIME64_1;
+        h64 ^= v3;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+
+        v4 *= PRIME64_2;
+        v4 = XXH_rotl64(v4, 31);
+        v4 *= PRIME64_1;
+        h64 ^= v4;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+    }
+    else
+    {
+        h64  = state->seed + PRIME64_5;
+    }
+
+    h64 += (U64) state->total_len;
+
+    while (p+8<=bEnd)
+    {
+        U64 k1 = XXH_readLE64(p, endian);
+        k1 *= PRIME64_2;
+        k1 = XXH_rotl64(k1,31);
+        k1 *= PRIME64_1;
+        h64 ^= k1;
+        h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd)
+    {
+        h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
+        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h64 ^= (*p) * PRIME64_5;
+        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+
+unsigned long long XXH64_digest (const XXH64_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH64_digest_endian(state_in, XXH_bigEndian);
+}
+
+
diff --git a/util/cbfstool/lz4/lib/xxhash.h b/util/cbfstool/lz4/lib/xxhash.h
new file mode 100644
index 0000000000..c60aa61571
--- /dev/null
+++ b/util/cbfstool/lz4/lib/xxhash.h
@@ -0,0 +1,192 @@
+/*
+   xxHash - Extremely Fast Hash algorithm
+   Header File
+   Copyright (C) 2012-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+/* Notice extracted from xxHash homepage :
+
+xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name            Speed       Q.Score   Author
+xxHash          5.4 GB/s     10
+CrapWow         3.2 GB/s      2       Andrew
+MumurHash 3a    2.7 GB/s     10       Austin Appleby
+SpookyHash      2.0 GB/s     10       Bob Jenkins
+SBox            1.4 GB/s      9       Bret Mulvey
+Lookup3         1.2 GB/s      9       Bob Jenkins
+SuperFastHash   1.2 GB/s      1       Paul Hsieh
+CityHash64      1.05 GB/s    10       Pike & Alakuijala
+FNV             0.55 GB/s     5       Fowler, Noll, Vo
+CRC32           0.43 GB/s     9
+MD5-32          0.33 GB/s    10       Ronald L. Rivest
+SHA1-32         0.28 GB/s    10
+
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
+10 is a perfect score.
+
+A 64-bits version, named XXH64, is available since r35.
+It offers much better speed, but for 64-bits applications only.
+Name     Speed on 64 bits    Speed on 32 bits
+XXH64       13.8 GB/s            1.9 GB/s
+XXH32        6.8 GB/s            6.0 GB/s
+*/
+
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*****************************
+*  Definitions
+*****************************/
+#include <stddef.h>   /* size_t */
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+/*****************************
+*  Namespace Emulation
+*****************************/
+/* Motivations :
+
+If you need to include xxHash into your library,
+but wish to avoid xxHash symbols to be present on your library interface
+in an effort to avoid potential name collision if another library also includes xxHash,
+
+you can use XXH_NAMESPACE, which will automatically prefix any symbol from xxHash
+with the value of XXH_NAMESPACE (so avoid to keep it NULL, and avoid numeric values).
+
+Note that no change is required within the calling program :
+it can still call xxHash functions using their regular name.
+They will be automatically translated by this header.
+*/
+#ifdef XXH_NAMESPACE
+#  define XXH_CAT(A,B) A##B
+#  define XXH_NAME2(A,B) XXH_CAT(A,B)
+#  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+#  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+#  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+#  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+#  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+#  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+#  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+#  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+#  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+#  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+#  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+#  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+#endif
+
+
+/*****************************
+*  Simple Hash Functions
+*****************************/
+
+unsigned int       XXH32 (const void* input, size_t length, unsigned seed);
+unsigned long long XXH64 (const void* input, size_t length, unsigned long long seed);
+
+/*
+XXH32() :
+    Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
+    The memory between input & input+length must be valid (allocated and read-accessible).
+    "seed" can be used to alter the result predictably.
+    This function successfully passes all SMHasher tests.
+    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
+XXH64() :
+    Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
+    Faster on 64-bits systems. Slower on 32-bits systems.
+*/
+
+
+
+/*****************************
+*  Advanced Hash Functions
+*****************************/
+typedef struct { long long ll[ 6]; } XXH32_state_t;
+typedef struct { long long ll[11]; } XXH64_state_t;
+
+/*
+These structures allow static allocation of XXH states.
+States must then be initialized using XXHnn_reset() before first use.
+
+If you prefer dynamic allocation, please refer to functions below.
+*/
+
+XXH32_state_t* XXH32_createState(void);
+XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
+
+XXH64_state_t* XXH64_createState(void);
+XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
+
+/*
+These functions create and release memory for XXH state.
+States must then be initialized using XXHnn_reset() before first use.
+*/
+
+
+XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned seed);
+XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+unsigned int  XXH32_digest (const XXH32_state_t* statePtr);
+
+XXH_errorcode      XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
+XXH_errorcode      XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+unsigned long long XXH64_digest (const XXH64_state_t* statePtr);
+
+/*
+These functions calculate the xxHash of an input provided in multiple smaller packets,
+as opposed to an input provided as a single block.
+
+XXH state space must first be allocated, using either static or dynamic method provided above.
+
+Start a new hash by initializing state with a seed, using XXHnn_reset().
+
+Then, feed the hash state by calling XXHnn_update() as many times as necessary.
+Obviously, input must be valid, meaning allocated and read accessible.
+The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+
+Finally, you can produce a hash anytime, by using XXHnn_digest().
+This function returns the final nn-bits hash.
+You can nonetheless continue feeding the hash state with more input,
+and therefore get some new hashes, by calling again XXHnn_digest().
+
+When you are done, don't forget to free XXH state space, using typically XXHnn_freeState().
+*/
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/util/cbfstool/lz4/lz4_Block_format.md b/util/cbfstool/lz4/lz4_Block_format.md
new file mode 100644
index 0000000000..0f6a5ba9d3
--- /dev/null
+++ b/util/cbfstool/lz4/lz4_Block_format.md
@@ -0,0 +1,127 @@
+LZ4 Block Format Description
+============================
+Last revised: 2015-05-07.
+Author : Yann Collet
+
+
+This specification is intended for developers
+willing to produce LZ4-compatible compressed data blocks
+using any programming language.
+
+LZ4 is an LZ77-type compressor with a fixed, byte-oriented encoding.
+There is no entropy encoder back-end nor framing layer.
+The latter is assumed to be handled by other parts of the system (see [LZ4 Frame format]).
+This design is assumed to favor simplicity and speed.
+It helps later on for optimizations, compactness, and features.
+
+This document describes only the block format,
+not how the compressor nor decompressor actually work.
+The correctness of the decompressor should not depend
+on implementation details of the compressor, and vice versa.
+
+[LZ4 Frame format]: lz4_Frame_format.md
+
+
+
+Compressed block format
+-----------------------
+An LZ4 compressed block is composed of sequences.
+A sequence is a suite of literals (not-compressed bytes),
+followed by a match copy.
+
+Each sequence starts with a token.
+The token is a one byte value, separated into two 4-bits fields.
+Therefore each field ranges from 0 to 15.
+
+
+The first field uses the 4 high-bits of the token.
+It provides the length of literals to follow.
+
+If the field value is 0, then there is no literal.
+If it is 15, then we need to add some more bytes to indicate the full length.
+Each additional byte then represent a value from 0 to 255,
+which is added to the previous value to produce a total length.
+When the byte value is 255, another byte is output.
+There can be any number of bytes following the token. There is no "size limit".
+(Side note : this is why a not-compressible input block is expanded by 0.4%).
+
+Example 1 : A length of 48 will be represented as :
+
+  - 15 : value for the 4-bits High field
+  - 33 : (=48-15) remaining length to reach 48
+
+Example 2 : A length of 280 will be represented as :
+
+  - 15  : value for the 4-bits High field
+  - 255 : following byte is maxed, since 280-15 >= 255
+  - 10  : (=280 - 15 - 255) ) remaining length to reach 280
+
+Example 3 : A length of 15 will be represented as :
+
+  - 15 : value for the 4-bits High field
+  - 0  : (=15-15) yes, the zero must be output
+
+Following the token and optional length bytes, are the literals themselves.
+They are exactly as numerous as previously decoded (length of literals).
+It's possible that there are zero literal.
+
+
+Following the literals is the match copy operation.
+
+It starts by the offset.
+This is a 2 bytes value, in little endian format
+(the 1st byte is the "low" byte, the 2nd one is the "high" byte).
+
+The offset represents the position of the match to be copied from.
+1 means "current position - 1 byte".
+The maximum offset value is 65535, 65536 cannot be coded.
+Note that 0 is an invalid value, not used.
+
+Then we need to extract the match length.
+For this, we use the second token field, the low 4-bits.
+Value, obviously, ranges from 0 to 15.
+However here, 0 means that the copy operation will be minimal.
+The minimum length of a match, called minmatch, is 4.
+As a consequence, a 0 value means 4 bytes, and a value of 15 means 19+ bytes.
+Similar to literal length, on reaching the highest possible value (15),
+we output additional bytes, one at a time, with values ranging from 0 to 255.
+They are added to total to provide the final match length.
+A 255 value means there is another byte to read and add.
+There is no limit to the number of optional bytes that can be output this way.
+(This points towards a maximum achievable compression ratio of about 250).
+
+With the offset and the matchlength,
+the decoder can now proceed to copy the data from the already decoded buffer.
+On decoding the matchlength, we reach the end of the compressed sequence,
+and therefore start another one.
+
+
+Parsing restrictions
+-----------------------
+There are specific parsing rules to respect in order to remain compatible
+with assumptions made by the decoder :
+
+1. The last 5 bytes are always literals
+2. The last match must start at least 12 bytes before end of block.
+   Consequently, a block with less than 13 bytes cannot be compressed.
+
+These rules are in place to ensure that the decoder
+will never read beyond the input buffer, nor write beyond the output buffer.
+
+Note that the last sequence is also incomplete,
+and stops right after literals.
+
+
+Additional notes
+-----------------------
+There is no assumption nor limits to the way the compressor
+searches and selects matches within the source data block.
+It could be a fast scan, a multi-probe, a full search using BST,
+standard hash chains or MMC, well whatever.
+
+Advanced parsing strategies can also be implemented, such as lazy match,
+or full optimal parsing.
+
+All these trade-off offer distinctive speed/memory/compression advantages.
+Whatever the method used by the compressor, its result will be decodable
+by any LZ4 decoder if it follows the format specification described above.
diff --git a/util/cbfstool/lz4/lz4_Frame_format.md b/util/cbfstool/lz4/lz4_Frame_format.md
new file mode 100644
index 0000000000..2ea1a86802
--- /dev/null
+++ b/util/cbfstool/lz4/lz4_Frame_format.md
@@ -0,0 +1,385 @@
+LZ4 Frame Format Description
+============================
+
+###Notices
+
+Copyright (c) 2013-2015 Yann Collet
+
+Permission is granted to copy and distribute this document
+for any  purpose and without charge,
+including translations into other  languages
+and incorporation into compilations,
+provided that the copyright notice and this notice are preserved,
+and that any substantive changes or deletions from the original
+are clearly marked.
+Distribution of this document is unlimited.
+
+###Version
+
+1.5.1 (31/03/2015)
+
+
+Introduction
+------------
+
+The purpose of this document is to define a lossless compressed data format,
+that is independent of CPU type, operating system,
+file system and character set, suitable for
+File compression, Pipe and streaming compression
+using the [LZ4 algorithm](http://www.lz4.org).
+
+The data can be produced or consumed,
+even for an arbitrarily long sequentially presented input data stream,
+using only an a priori bounded amount of intermediate storage,
+and hence can be used in data communications.
+The format uses the LZ4 compression method,
+and optional [xxHash-32 checksum method](https://github.com/Cyan4973/xxHash),
+for detection of data corruption.
+
+The data format defined by this specification
+does not attempt to allow random access to compressed data.
+
+This specification is intended for use by implementers of software
+to compress data into LZ4 format and/or decompress data from LZ4 format.
+The text of the specification assumes a basic background in programming
+at the level of bits and other primitive data representations.
+
+Unless otherwise indicated below,
+a compliant compressor must produce data sets
+that conform to the specifications presented here.
+It doesn’t need to support all options though.
+
+A compliant decompressor must be able to decompress
+at least one working set of parameters
+that conforms to the specifications presented here.
+It may also ignore checksums.
+Whenever it does not support a specific parameter within the compressed stream,
+it must produce a non-ambiguous error code
+and associated error message explaining which parameter is unsupported.
+
+
+General Structure of LZ4 Frame format
+-------------------------------------
+
+| MagicNb | F. Descriptor | Block | (...) | EndMark | C. Checksum |
+|:-------:|:-------------:| ----- | ----- | ------- | ----------- |
+| 4 bytes |  3-11 bytes   |       |       | 4 bytes | 0-4 bytes   |
+
+__Magic Number__
+
+4 Bytes, Little endian format.
+Value : 0x184D2204
+
+__Frame Descriptor__
+
+3 to 11 Bytes, to be detailed in the next part.
+Most important part of the spec.
+
+__Data Blocks__
+
+To be detailed later on.
+That’s where compressed data is stored.
+
+__EndMark__
+
+The flow of blocks ends when the last data block has a size of “0”.
+The size is expressed as a 32-bits value.
+
+__Content Checksum__
+
+Content Checksum verify that the full content has been decoded correctly.
+The content checksum is the result
+of [xxh32() hash function](https://github.com/Cyan4973/xxHash)
+digesting the original (decoded) data as input, and a seed of zero.
+Content checksum is only present when its associated flag
+is set in the frame descriptor.
+Content Checksum validates the result,
+that all blocks were fully transmitted in the correct order and without error,
+and also that the encoding/decoding process itself generated no distortion.
+Its usage is recommended.
+
+__Frame Concatenation__
+
+In some circumstances, it may be preferable to append multiple frames,
+for example in order to add new data to an existing compressed file
+without re-framing it.
+
+In such case, each frame has its own set of descriptor flags.
+Each frame is considered independent.
+The only relation between frames is their sequential order.
+
+The ability to decode multiple concatenated frames
+within a single stream or file
+is left outside of this specification.
+As an example, the reference lz4 command line utility behavior is
+to decode all concatenated frames in their sequential order.
+
+
+Frame Descriptor
+----------------
+
+| FLG     | BD      | (Content Size) | HC      |
+| ------- | ------- |:--------------:| ------- |
+| 1 byte  | 1 byte  |  0 - 8 bytes   | 1 byte  |
+
+The descriptor uses a minimum of 3 bytes,
+and up to 11 bytes depending on optional parameters.
+
+__FLG byte__
+
+|  BitNb  |   7-6   |    5    |     4     |   3     |     2     |    1-0   |
+| ------- | ------- | ------- | --------- | ------- | --------- | -------- |
+|FieldName| Version | B.Indep | B.Checksum| C.Size  | C.Checksum|*Reserved*|
+
+
+__BD byte__
+
+|  BitNb  |     7    |     6-5-4    |  3-2-1-0 |
+| ------- | -------- | ------------ | -------- |
+|FieldName|*Reserved*| Block MaxSize|*Reserved*|
+
+In the tables, bit 7 is highest bit, while bit 0 is lowest.
+
+__Version Number__
+
+2-bits field, must be set to “01”.
+Any other value cannot be decoded by this version of the specification.
+Other version numbers will use different flag layouts.
+
+__Block Independence flag__
+
+If this flag is set to “1”, blocks are independent.
+If this flag is set to “0”, each block depends on previous ones
+(up to LZ4 window size, which is 64 KB).
+In such case, it’s necessary to decode all blocks in sequence.
+
+Block dependency improves compression ratio, especially for small blocks.
+On the other hand, it makes direct jumps or multi-threaded decoding impossible.
+
+__Block checksum flag__
+
+If this flag is set, each data block will be followed by a 4-bytes checksum,
+calculated by using the xxHash-32 algorithm on the raw (compressed) data block.
+The intention is to detect data corruption (storage or transmission errors)
+immediately, before decoding.
+Block checksum usage is optional.
+
+__Content Size flag__
+
+If this flag is set, the uncompressed size of data included within the frame
+will be present as an 8 bytes unsigned little endian value, after the flags.
+Content Size usage is optional.
+
+__Content checksum flag__
+
+If this flag is set, a content checksum will be appended after the EndMark.
+
+Recommended value : “1” (content checksum is present)
+
+__Block Maximum Size__
+
+This information is intended to help the decoder allocate memory.
+Size here refers to the original (uncompressed) data size.
+Block Maximum Size is one value among the following table :
+
+|  0  |  1  |  2  |  3  |   4   |   5    |  6   |  7   |
+| --- | --- | --- | --- | ----- | ------ | ---- | ---- |
+| N/A | N/A | N/A | N/A | 64 KB | 256 KB | 1 MB | 4 MB |
+
+The decoder may refuse to allocate block sizes above a (system-specific) size.
+Unused values may be used in a future revision of the spec.
+A decoder conformant to the current version of the spec
+is only able to decode blocksizes defined in this spec.
+
+__Reserved bits__
+
+Value of reserved bits **must** be 0 (zero).
+Reserved bit might be used in a future version of the specification,
+typically enabling new optional features.
+If this happens, a decoder respecting the current version of the specification
+shall not be able to decode such a frame.
+
+__Content Size__
+
+This is the original (uncompressed) size.
+This information is optional, and only present if the associated flag is set.
+Content size is provided using unsigned 8 Bytes, for a maximum of 16 HexaBytes.
+Format is Little endian.
+This value is informational, typically for display or memory allocation.
+It can be skipped by a decoder, or used to validate content correctness.
+
+__Header Checksum__
+
+One-byte checksum of combined descriptor fields, including optional ones.
+The value is the second byte of xxh32() : ` (xxh32()>>8) & 0xFF `
+using zero as a seed,
+and the full Frame Descriptor as an input
+(including optional fields when they are present).
+A wrong checksum indicates an error in the descriptor.
+Header checksum is informational and can be skipped.
+
+
+Data Blocks
+-----------
+
+| Block Size |  data  | (Block Checksum) |
+|:----------:| ------ |:----------------:|
+|  4 bytes   |        |   0 - 4 bytes    |
+
+
+__Block Size__
+
+This field uses 4-bytes, format is little-endian.
+
+The highest bit is “1” if data in the block is uncompressed.
+
+The highest bit is “0” if data in the block is compressed by LZ4.
+
+All other bits give the size, in bytes, of the following data block
+(the size does not include the block checksum if present).
+
+Block Size shall never be larger than Block Maximum Size.
+Such a thing could happen for incompressible source data.
+In such case, such a data block shall be passed in uncompressed format.
+
+__Data__
+
+Where the actual data to decode stands.
+It might be compressed or not, depending on previous field indications.
+Uncompressed size of Data can be any size, up to “block maximum size”.
+Note that data block is not necessarily full :
+an arbitrary “flush” may happen anytime. Any block can be “partially filled”.
+
+__Block checksum__
+
+Only present if the associated flag is set.
+This is a 4-bytes checksum value, in little endian format,
+calculated by using the xxHash-32 algorithm on the raw (undecoded) data block,
+and a seed of zero.
+The intention is to detect data corruption (storage or transmission errors)
+before decoding.
+
+Block checksum is cumulative with Content checksum.
+
+
+Skippable Frames
+----------------
+
+| Magic Number | Frame Size | User Data |
+|:------------:|:----------:| --------- |
+|   4 bytes    |  4 bytes   |           |
+
+Skippable frames allow the integration of user-defined data
+into a flow of concatenated frames.
+Its design is pretty straightforward,
+with the sole objective to allow the decoder to quickly skip
+over user-defined data and continue decoding.
+
+For the purpose of facilitating identification,
+it is discouraged to start a flow of concatenated frames with a skippable frame.
+If there is a need to start such a flow with some user data
+encapsulated into a skippable frame,
+it’s recommended to start with a zero-byte LZ4 frame
+followed by a skippable frame.
+This will make it easier for file type identifiers.
+
+
+__Magic Number__
+
+4 Bytes, Little endian format.
+Value : 0x184D2A5X, which means any value from 0x184D2A50 to 0x184D2A5F.
+All 16 values are valid to identify a skippable frame.
+
+__Frame Size__
+
+This is the size, in bytes, of the following User Data
+(without including the magic number nor the size field itself).
+4 Bytes, Little endian format, unsigned 32-bits.
+This means User Data can’t be bigger than (2^32-1) Bytes.
+
+__User Data__
+
+User Data can be anything. Data will just be skipped by the decoder.
+
+
+Legacy frame
+------------
+
+The Legacy frame format was defined into the initial versions of “LZ4Demo”.
+Newer compressors should not use this format anymore, as it is too restrictive.
+
+Main characteristics of the legacy format :
+
+- Fixed block size : 8 MB.
+- All blocks must be completely filled, except the last one.
+- All blocks are always compressed, even when compression is detrimental.
+- The last block is detected either because
+  it is followed by the “EOF” (End of File) mark,
+  or because it is followed by a known Frame Magic Number.
+- No checksum
+- Convention is Little endian
+
+| MagicNb | B.CSize | CData | B.CSize | CData |  (...)  | EndMark |
+| ------- | ------- | ----- | ------- | ----- | ------- | ------- |
+| 4 bytes | 4 bytes | CSize | 4 bytes | CSize | x times |   EOF   |
+
+
+__Magic Number__
+
+4 Bytes, Little endian format.
+Value : 0x184C2102
+
+__Block Compressed Size__
+
+This is the size, in bytes, of the following compressed data block.
+4 Bytes, Little endian format.
+
+__Data__
+
+Where the actual compressed data stands.
+Data is always compressed, even when compression is detrimental.
+
+__EndMark__
+
+End of legacy frame is implicit only.
+It must be followed by a standard EOF (End Of File) signal,
+wether it is a file or a stream.
+
+Alternatively, if the frame is followed by a valid Frame Magic Number,
+it is considered completed.
+It makes legacy frames compatible with frame concatenation.
+
+Any other value will be interpreted as a block size,
+and trigger an error if it does not fit within acceptable range.
+
+
+Version changes
+---------------
+
+1.5.1 : changed format to MarkDown compatible
+
+1.5 : removed Dictionary ID from specification
+
+1.4.1 : changed wording from “stream” to “frame”
+
+1.4 : added skippable streams, re-added stream checksum
+
+1.3 : modified header checksum
+
+1.2 : reduced choice of “block size”, to postpone decision on “dynamic size of BlockSize Field”.
+
+1.1 : optional fields are now part of the descriptor
+
+1.0 : changed “block size” specification, adding a compressed/uncompressed flag
+
+0.9 : reduced scale of “block maximum size” table
+
+0.8 : removed : high compression flag
+
+0.7 : removed : stream checksum
+
+0.6 : settled : stream size uses 8 bytes, endian convention is little endian
+
+0.5: added copyright notice
+
+0.4 : changed format to Google Doc compatible OpenDocument
diff --git a/util/cbmem/cbmem.c b/util/cbmem/cbmem.c
index 465ab4bb1e..4c563c8d89 100644
--- a/util/cbmem/cbmem.c
+++ b/util/cbmem/cbmem.c
@@ -500,6 +500,8 @@ static const struct timestamp_id_to_name {
 	{ TS_END_COPYROM,	"finished loading romstage" },
 	{ TS_START_ULZMA,	"starting LZMA decompress (ignore for x86)" },
 	{ TS_END_ULZMA,		"finished LZMA decompress (ignore for x86)" },
+	{ TS_START_ULZ4F,	"starting LZ4 decompress (ignore for x86)" },
+	{ TS_END_ULZ4F,		"finished LZ4 decompress (ignore for x86)" },
 	{ TS_DEVICE_ENUMERATE,	"device enumeration" },
 	{ TS_DEVICE_CONFIGURE,	"device configuration" },
 	{ TS_DEVICE_ENABLE,	"device enable" },
diff --git a/util/nvramtool/cbfs.h b/util/nvramtool/cbfs.h
index 58ef126b92..3a5bddbbda 100644
--- a/util/nvramtool/cbfs.h
+++ b/util/nvramtool/cbfs.h
@@ -59,6 +59,7 @@ typedef uint8_t u8;
 
 #define CBFS_COMPRESS_NONE  0
 #define CBFS_COMPRESS_LZMA  1
+#define CBFS_COMPRESS_LZ4   2
 
 /** These are standard component types for well known
     components (i.e - those that coreboot needs to consume.