diff options
Diffstat (limited to 'payloads')
-rw-r--r-- | payloads/libpayload/include/cbfs_core.h | 1 | ||||
-rw-r--r-- | payloads/libpayload/include/lz4.h | 7 | ||||
-rw-r--r-- | payloads/libpayload/libcbfs/cbfs.c | 5 | ||||
-rw-r--r-- | payloads/libpayload/libcbfs/cbfs_core.c | 7 | ||||
-rw-r--r-- | payloads/libpayload/liblz4/lz4.c.inc (renamed from payloads/libpayload/liblz4/lz4.c) | 64 | ||||
-rw-r--r-- | payloads/libpayload/liblz4/lz4_wrapper.c | 95 |
6 files changed, 116 insertions, 63 deletions
diff --git a/payloads/libpayload/include/cbfs_core.h b/payloads/libpayload/include/cbfs_core.h index 4c59f4131a..4cbc4c0628 100644 --- a/payloads/libpayload/include/cbfs_core.h +++ b/payloads/libpayload/include/cbfs_core.h @@ -58,6 +58,7 @@ #define CBFS_COMPRESS_NONE 0 #define CBFS_COMPRESS_LZMA 1 +#define CBFS_COMPRESS_LZ4 2 /** These are standard component types for well known components (i.e - those that coreboot needs to consume. diff --git a/payloads/libpayload/include/lz4.h b/payloads/libpayload/include/lz4.h index 1f2830db46..d2120a48fc 100644 --- a/payloads/libpayload/include/lz4.h +++ b/payloads/libpayload/include/lz4.h @@ -36,7 +36,10 @@ /* Decompresses an LZ4F image (multiple LZ4 blocks with frame header) from src * to dst, ensuring that it doesn't read more than srcn bytes and doesn't write - * more than dstn. Buffer sizes must stay below 2GB. + * more than dstn. Buffer sizes must stay below 2GB. Can decompress files loaded + * to the end of a buffer in-place, as long as buffer is larger than the final + * output size. (Usually just a few bytes, but may be up to (8 + dstn/255) in + * worst case. Will reliably return an error if buffer was too small.) * Returns amount of decompressed bytes, or 0 on error. */ size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn); @@ -44,4 +47,4 @@ size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn); /* Same as ulz4fn() but does not perform any bounds checks. */ size_t ulz4f(const void *src, void *dst); -#endif /* __LZO_H_ */ +#endif /* __LZ4_H_ */ diff --git a/payloads/libpayload/libcbfs/cbfs.c b/payloads/libpayload/libcbfs/cbfs.c index 49e4941181..a1cc7e443c 100644 --- a/payloads/libpayload/libcbfs/cbfs.c +++ b/payloads/libpayload/libcbfs/cbfs.c @@ -35,11 +35,16 @@ # include <lzma.h> # define CBFS_CORE_WITH_LZMA # endif +# if IS_ENABLED(CONFIG_LP_LZ4) +# include <lz4.h> +# define CBFS_CORE_WITH_LZ4 +# endif # define CBFS_MINI_BUILD #elif defined(__SMM__) # define CBFS_MINI_BUILD #else # define CBFS_CORE_WITH_LZMA +# define CBFS_CORE_WITH_LZ4 # include <lib.h> #endif diff --git a/payloads/libpayload/libcbfs/cbfs_core.c b/payloads/libpayload/libcbfs/cbfs_core.c index ddf0da5f42..c32d262b33 100644 --- a/payloads/libpayload/libcbfs/cbfs_core.c +++ b/payloads/libpayload/libcbfs/cbfs_core.c @@ -34,6 +34,9 @@ * CBFS_CORE_WITH_LZMA (must be #define) * if defined, ulzma() must exist for decompression of data streams * + * CBFS_CORE_WITH_LZ4 (must be #define) + * if defined, ulz4f() must exist for decompression of data streams + * * ERROR(x...) * print an error message x (in printf format) * @@ -330,6 +333,10 @@ int cbfs_decompress(int algo, void *src, void *dst, int len) case CBFS_COMPRESS_LZMA: return ulzma(src, dst); #endif +#ifdef CBFS_CORE_WITH_LZ4 + case CBFS_COMPRESS_LZ4: + return ulz4f(src, dst); +#endif default: ERROR("tried to decompress %d bytes with algorithm #%x," "but that algorithm id is unsupported.\n", len, diff --git a/payloads/libpayload/liblz4/lz4.c b/payloads/libpayload/liblz4/lz4.c.inc index fb89090ee2..b3be4e5b44 100644 --- a/payloads/libpayload/liblz4/lz4.c +++ b/payloads/libpayload/liblz4/lz4.c.inc @@ -37,12 +37,19 @@ * Reading and writing into memory **************************************/ -/* customized version of memcpy, which may overwrite up to 7 bytes beyond dstEnd */ +/* customized variant of memcpy, which can overwrite up to 7 bytes beyond dstEnd */ static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd) { BYTE* d = (BYTE*)dstPtr; const BYTE* s = (const BYTE*)srcPtr; - BYTE* e = (BYTE*)dstEnd; + BYTE* const e = (BYTE*)dstEnd; + +#if 0 + const size_t l2 = 8 - (((size_t)d) & (sizeof(void*)-1)); + LZ4_copy8(d,s); if (d>e-9) return; + d+=l2; s+=l2; +#endif /* join to align */ + do { LZ4_copy8(d,s); d+=8; s+=8; } while (d<e); } @@ -52,9 +59,9 @@ static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd) **************************************/ #define MINMATCH 4 -#define COPYLENGTH 8 +#define WILDCOPYLENGTH 8 #define LASTLITERALS 5 -#define MFLIMIT (COPYLENGTH+MINMATCH) +#define MFLIMIT (WILDCOPYLENGTH+MINMATCH) static const int LZ4_minLength = (MFLIMIT+1); #define KB *(1 <<10) @@ -114,11 +121,12 @@ FORCE_INLINE int LZ4_decompress_generic( const BYTE* const lowLimit = lowPrefix - dictSize; const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize; - const size_t dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4}; - const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; + const unsigned dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4}; + const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; const int safeDecode = (endOnInput==endOnInputSize); const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB))); + const int inPlaceDecode = ((ip >= op) && (ip < oend)); /* Special cases */ @@ -133,6 +141,9 @@ FORCE_INLINE int LZ4_decompress_generic( unsigned token; size_t length; const BYTE* match; + size_t offset; + + if (unlikely((inPlaceDecode) && (op + WILDCOPYLENGTH > ip))) goto _output_error; /* output stream ran over input stream */ /* get literal length */ token = *ip++; @@ -144,7 +155,7 @@ FORCE_INLINE int LZ4_decompress_generic( s = *ip++; length += s; } - while (likely((endOnInput)?ip<iend-RUN_MASK:1) && (s==255)); + while ( likely(endOnInput ? ip<iend-RUN_MASK : 1) && (s==255) ); if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)(op))) goto _output_error; /* overflow detection */ if ((safeDecode) && unlikely((size_t)(ip+length)<(size_t)(ip))) goto _output_error; /* overflow detection */ } @@ -152,7 +163,7 @@ FORCE_INLINE int LZ4_decompress_generic( /* copy literals */ cpy = op+length; if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) ) - || ((!endOnInput) && (cpy>oend-COPYLENGTH))) + || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH))) { if (partialDecoding) { @@ -164,7 +175,7 @@ FORCE_INLINE int LZ4_decompress_generic( if ((!endOnInput) && (cpy != oend)) goto _output_error; /* Error : block decoding must stop exactly there */ if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error; /* Error : input must be consumed */ } - memcpy(op, ip, length); + memmove(op, ip, length); ip += length; op += length; break; /* Necessarily EOF, due to parsing restrictions */ @@ -173,8 +184,9 @@ FORCE_INLINE int LZ4_decompress_generic( ip += length; op = cpy; /* get offset */ - match = cpy - LZ4_readLE16(ip); ip+=2; - if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error; /* Error : offset outside destination buffer */ + offset = LZ4_readLE16(ip); ip+=2; + match = op - offset; + if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error; /* Error : offset outside buffers */ /* get matchlength */ length = token & ML_MASK; @@ -204,12 +216,12 @@ FORCE_INLINE int LZ4_decompress_generic( } else { - /* match encompass external dictionary and current segment */ + /* match encompass external dictionary and current block */ size_t copySize = (size_t)(lowPrefix-match); memcpy(op, dictEnd - copySize, copySize); op += copySize; copySize = length - copySize; - if (copySize > (size_t)(op-lowPrefix)) /* overlap within current segment */ + if (copySize > (size_t)(op-lowPrefix)) /* overlap copy */ { BYTE* const endOfMatch = op + copySize; const BYTE* copyFrom = lowPrefix; @@ -224,28 +236,30 @@ FORCE_INLINE int LZ4_decompress_generic( continue; } - /* copy repeated sequence */ + /* copy match within block */ cpy = op + length; - if (unlikely((op-match)<8)) + if (unlikely(offset<8)) { - const size_t dec64 = dec64table[op-match]; + const int dec64 = dec64table[offset]; op[0] = match[0]; op[1] = match[1]; op[2] = match[2]; op[3] = match[3]; - match += dec32table[op-match]; - LZ4_copy4(op+4, match); - op += 8; match -= dec64; - } else { LZ4_copy8(op, match); op+=8; match+=8; } + match += dec32table[offset]; + memcpy(op+4, match, 4); + match -= dec64; + } else { LZ4_copy8(op, match); match+=8; } + op += 8; if (unlikely(cpy>oend-12)) { - if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last LASTLITERALS bytes must be literals */ - if (op < oend-8) + BYTE* const oCopyLimit = oend-(WILDCOPYLENGTH-1); + if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ + if (op < oCopyLimit) { - LZ4_wildCopy(op, match, oend-8); - match += (oend-8) - op; - op = oend-8; + LZ4_wildCopy(op, match, oCopyLimit); + match += oCopyLimit - op; + op = oCopyLimit; } while (op<cpy) *op++ = *match++; } diff --git a/payloads/libpayload/liblz4/lz4_wrapper.c b/payloads/libpayload/liblz4/lz4_wrapper.c index 431fb55cc0..6de140e403 100644 --- a/payloads/libpayload/liblz4/lz4_wrapper.c +++ b/payloads/libpayload/liblz4/lz4_wrapper.c @@ -29,7 +29,6 @@ * SUCH DAMAGE. */ -#include <assert.h> #include <endian.h> #include <libpayload.h> #include <lz4.h> @@ -38,9 +37,28 @@ * seem to be very inefficient in practice (at least on ARM64). Since libpayload * knows about endinaness and allows some basic assumptions (such as unaligned * access support), we can easily write the ones we need ourselves. */ -static u16 LZ4_readLE16(const void *src) { return le16toh(*(u16 *)src); } -static void LZ4_copy4(void *dst, const void *src) { *(u32 *)dst = *(u32 *)src; } -static void LZ4_copy8(void *dst, const void *src) { *(u64 *)dst = *(u64 *)src; } +static uint16_t LZ4_readLE16(const void *src) +{ + return le16toh(*(uint16_t *)src); +} +static void LZ4_copy8(void *dst, const void *src) +{ +/* ARM32 needs to be a special snowflake to prevent GCC from coalescing the + * access into LDRD/STRD (which don't support unaligned accesses). */ +#ifdef __arm__ + uint32_t x0, x1; + asm volatile ( + "ldr %[x0], [%[src]]\n\t" + "ldr %[x1], [%[src], #4]\n\t" + "str %[x0], [%[dst]]\n\t" + "str %[x1], [%[dst], #4]\n\t" + : [x0]"=r"(x0), [x1]"=r"(x1) + : [src]"r"(src), [dst]"r"(dst) + : "memory" ); +#else + *(uint64_t *)dst = *(const uint64_t *)src; +#endif +} typedef uint8_t BYTE; typedef uint16_t U16; @@ -52,58 +70,59 @@ typedef uint64_t U64; #define likely(expr) __builtin_expect((expr) != 0, 1) #define unlikely(expr) __builtin_expect((expr) != 0, 0) -/* Unaltered (except removing unrelated code) from github.com/Cyan4973/lz4. */ -#include "lz4.c" /* #include for inlining, do not link! */ +/* Unaltered (just removed unrelated code) from github.com/Cyan4973/lz4/dev. */ +#include "lz4.c.inc" /* #include for inlining, do not link! */ #define LZ4F_MAGICNUMBER 0x184D2204 struct lz4_frame_header { - u32 magic; + uint32_t magic; union { - u8 flags; + uint8_t flags; struct { - u8 reserved0 : 2; - u8 has_content_checksum : 1; - u8 has_content_size : 1; - u8 has_block_checksum : 1; - u8 independent_blocks : 1; - u8 version : 2; + uint8_t reserved0 : 2; + uint8_t has_content_checksum : 1; + uint8_t has_content_size : 1; + uint8_t has_block_checksum : 1; + uint8_t independent_blocks : 1; + uint8_t version : 2; }; }; union { - u8 block_descriptor; + uint8_t block_descriptor; struct { - u8 reserved1 : 4; - u8 max_block_size : 3; - u8 reserved2 : 1; + uint8_t reserved1 : 4; + uint8_t max_block_size : 3; + uint8_t reserved2 : 1; }; }; - /* + u64 content_size iff has_content_size is set */ - /* + u8 header_checksum */ + /* + uint64_t content_size iff has_content_size is set */ + /* + uint8_t header_checksum */ } __attribute__((packed)); struct lz4_block_header { union { - u32 raw; + uint32_t raw; struct { - u32 size : 31; - u32 not_compressed : 1; + uint32_t size : 31; + uint32_t not_compressed : 1; }; }; /* + size bytes of data */ - /* + u32 block_checksum iff has_block_checksum is set */ + /* + uint32_t block_checksum iff has_block_checksum is set */ } __attribute__((packed)); size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn) { const void *in = src; void *out = dst; + size_t out_size = 0; int has_block_checksum; { /* With in-place decompression the header may become invalid later. */ const struct lz4_frame_header *h = in; - if (srcn < sizeof(*h) + sizeof(u64) + sizeof(u8)) + if (srcn < sizeof(*h) + sizeof(uint64_t) + sizeof(uint8_t)) return 0; /* input overrun */ /* We assume there's always only a single, standard frame. */ @@ -117,25 +136,27 @@ size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn) in += sizeof(*h); if (h->has_content_size) - in += sizeof(u64); - in += sizeof(u8); + in += sizeof(uint64_t); + in += sizeof(uint8_t); } while (1) { - struct lz4_block_header b = { .raw = le32toh(*(u32 *)in) }; + struct lz4_block_header b = { .raw = le32toh(*(uint32_t *)in) }; in += sizeof(struct lz4_block_header); - if (in - src + b.size > srcn) - return 0; /* input overrun */ + if ((size_t)(in - src) + b.size > srcn) + break; /* input overrun */ - if (!b.size) - return out - dst; /* decompression successful */ + if (!b.size) { + out_size = out - dst; + break; /* decompression successful */ + } if (b.not_compressed) { - size_t size = MIN((u32)b.size, dst + dstn - out); + size_t size = MIN((uint32_t)b.size, dst + dstn - out); memcpy(out, in, size); if (size < b.size) - return 0; /* output overrun */ + break; /* output overrun */ else out += size; } else { @@ -144,15 +165,17 @@ size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn) dst + dstn - out, endOnInputSize, full, 0, noDict, out, NULL, 0); if (ret < 0) - return 0; /* decompression error */ + break; /* decompression error */ else out += ret; } in += b.size; if (has_block_checksum) - in += sizeof(u32); + in += sizeof(uint32_t); } + + return out_size; } size_t ulz4f(const void *src, void *dst) |