1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
|
/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only */
#include <commonlib/bsd/compression.h>
#include <commonlib/bsd/helpers.h>
#include <endian.h>
#include <stdint.h>
#include <string.h>
/* LZ4 comes with its own supposedly portable memory access functions, but they
* seem to be very inefficient in practice (at least on ARM64). Since coreboot
* knows about endinaness and allows some basic assumptions (such as unaligned
* access support), we can easily write the ones we need ourselves. */
static uint16_t LZ4_readLE16(const void *src)
{
return le16toh(*(const uint16_t *)src);
}
static void LZ4_copy8(void *dst, const void *src)
{
/* ARM32 needs to be a special snowflake to prevent GCC from coalescing the
* access into LDRD/STRD (which don't support unaligned accesses). */
#ifdef __arm__ /* ARMv < 6 doesn't support unaligned accesses at all. */
#if defined(__COREBOOT_ARM_ARCH__) && __COREBOOT_ARM_ARCH__ < 6
int i;
for (i = 0; i < 8; i++)
((uint8_t *)dst)[i] = ((uint8_t *)src)[i];
#else
uint32_t x0, x1;
__asm__ ("ldr %[x0], [%[src]]"
: [x0]"=r"(x0)
: [src]"r"(src), "m"(*(const uint32_t *)src));
__asm__ ("ldr %[x1], [%[src], #4]"
: [x1]"=r"(x1)
: [src]"r"(src), "m"(*(const uint32_t *)(src + 4)));
__asm__ ("str %[x0], [%[dst]]"
: "=m"(*(uint32_t *)dst)
: [x0]"r"(x0), [dst]"r"(dst));
__asm__ ("str %[x1], [%[dst], #4]"
: "=m"(*(uint32_t *)(dst + 4))
: [x1]"r"(x1), [dst]"r"(dst));
#endif
#elif defined(__riscv)
/* RISC-V implementations may trap on any unaligned access. */
int i;
for (i = 0; i < 8; i++)
((uint8_t *)dst)[i] = ((uint8_t *)src)[i];
#else
*(uint64_t *)dst = *(const uint64_t *)src;
#endif
}
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef uint32_t U32;
typedef int32_t S32;
typedef uint64_t U64;
#define FORCE_INLINE static __always_inline
#define likely(expr) __builtin_expect((expr) != 0, 1)
#define unlikely(expr) __builtin_expect((expr) != 0, 0)
/* Unaltered (just removed unrelated code) from github.com/Cyan4973/lz4/dev. */
#include "lz4.c.inc" /* #include for inlining, do not link! */
#define LZ4F_MAGICNUMBER 0x184D2204
struct lz4_frame_header {
uint32_t magic;
union {
uint8_t flags;
struct {
uint8_t reserved0 : 2;
uint8_t has_content_checksum : 1;
uint8_t has_content_size : 1;
uint8_t has_block_checksum : 1;
uint8_t independent_blocks : 1;
uint8_t version : 2;
};
};
union {
uint8_t block_descriptor;
struct {
uint8_t reserved1 : 4;
uint8_t max_block_size : 3;
uint8_t reserved2 : 1;
};
};
/* + uint64_t content_size iff has_content_size is set */
/* + uint8_t header_checksum */
} __packed;
struct lz4_block_header {
union {
uint32_t raw;
struct {
uint32_t size : 31;
uint32_t not_compressed : 1;
};
};
/* + size bytes of data */
/* + uint32_t block_checksum iff has_block_checksum is set */
} __packed;
size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn)
{
const void *in = src;
void *out = dst;
size_t out_size = 0;
int has_block_checksum;
{ /* With in-place decompression the header may become invalid later. */
const struct lz4_frame_header *h = in;
if (srcn < sizeof(*h) + sizeof(uint64_t) + sizeof(uint8_t))
return 0; /* input overrun */
/* We assume there's always only a single, standard frame. */
if (le32toh(h->magic) != LZ4F_MAGICNUMBER || h->version != 1)
return 0; /* unknown format */
if (h->reserved0 || h->reserved1 || h->reserved2)
return 0; /* reserved must be zero */
if (!h->independent_blocks)
return 0; /* we don't support block dependency */
has_block_checksum = h->has_block_checksum;
in += sizeof(*h);
if (h->has_content_size)
in += sizeof(uint64_t);
in += sizeof(uint8_t);
}
while (1) {
struct lz4_block_header b = {
{ .raw = le32toh(*(const uint32_t *)in) }
};
in += sizeof(struct lz4_block_header);
if ((size_t)(in - src) + b.size > srcn)
break; /* input overrun */
if (!b.size) {
out_size = out - dst;
break; /* decompression successful */
}
if (b.not_compressed) {
size_t size = MIN((uintptr_t)b.size, (uintptr_t)dst
+ dstn - (uintptr_t)out);
memcpy(out, in, size);
if (size < b.size)
break; /* output overrun */
out += size;
} else {
/* constant folding essential, do not touch params! */
int ret = LZ4_decompress_generic(in, out, b.size,
dst + dstn - out, endOnInputSize,
full, 0, noDict, out, NULL, 0);
if (ret < 0)
break; /* decompression error */
out += ret;
}
in += b.size;
if (has_block_checksum)
in += sizeof(uint32_t);
}
return out_size;
}
size_t ulz4f(const void *src, void *dst)
{
/* LZ4 uses signed size parameters, so can't just use ((u32)-1) here. */
return ulz4fn(src, 1*GiB, dst, 1*GiB);
}
|