From 99bed46c5d5470a7052a477c7b7edae24942165e Mon Sep 17 00:00:00 2001
From: Maximilian Brune <maximilian.brune@9elements.com>
Date: Mon, 15 Apr 2024 18:17:54 +0200
Subject: commonlib/bsd/lz4_wrapper.c: Fix misaligned access

Currently the HiFive Unleashed produces the following exception:
[DEBUG]  Exception:          Load address misaligned
[DEBUG]  Hart ID:            0
[DEBUG]  Previous mode:      machine
[DEBUG]  Bad instruction pc: 0x080010d0
[DEBUG]  Bad address:        0x08026ab3
[DEBUG]  Stored ra:          0x080010c8
[DEBUG]  Stored sp:          0x08010cc8

The coreboot LZ4 decompression code does some misaligned access during
decompression which the FU540 apparently does not support in SRAM.

Make the compiler generate code that adheres to natural alignment by
fixing the LZ4_readLE16() function and creating LZ4_readLE32().

Signed-off-by: Maximilian Brune <maximilian.brune@9elements.com>
Change-Id: Id165829bfd35be2bce2bbb019c208a304f627add
Reviewed-on: https://review.coreboot.org/c/coreboot/+/81910
Reviewed-by: Nico Huber <nico.h@gmx.de>
Reviewed-by: Angel Pons <th3fanbus@gmail.com>
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
---
 src/commonlib/bsd/lz4_wrapper.c | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/src/commonlib/bsd/lz4_wrapper.c b/src/commonlib/bsd/lz4_wrapper.c
index efe246bd02..566c2bf727 100644
--- a/src/commonlib/bsd/lz4_wrapper.c
+++ b/src/commonlib/bsd/lz4_wrapper.c
@@ -5,15 +5,29 @@
 #include <commonlib/bsd/sysincludes.h>
 #include <stdint.h>
 #include <string.h>
-
-/* LZ4 comes with its own supposedly portable memory access functions, but they
- * seem to be very inefficient in practice (at least on ARM64). Since coreboot
- * knows about endianness and allows some basic assumptions (such as unaligned
- * access support), we can easily write the ones we need ourselves. */
+#include <endian.h>
+
+/*
+ * RISC-V and older ARM architectures do not mandate support for misaligned access.
+ * Our le16toh and friends functions assume misaligned access support. Writing the access
+ * like this causes the compiler to generate instructions using misaligned access (or not)
+ * depending on the architecture. So there is no performance penalty for platforms supporting
+ * misaligned access.
+ */
 static uint16_t LZ4_readLE16(const void *src)
 {
-	return le16toh(*(const uint16_t *)src);
+	return *((const uint8_t *)src + 1) << 8
+	      | *(const uint8_t *)src;
 }
+
+static uint32_t LZ4_readLE32(const void *src)
+{
+	return *((const uint8_t *)src + 3) << 24
+	     | *((const uint8_t *)src + 2) << 16
+	     | *((const uint8_t *)src + 1) << 8
+	     |  *(const uint8_t *)src;
+}
+
 static void LZ4_copy8(void *dst, const void *src)
 {
 /* ARM32 needs to be a special snowflake to prevent GCC from coalescing the
@@ -107,7 +121,7 @@ size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn)
 			return 0;	/* input overrun */
 
 		/* We assume there's always only a single, standard frame. */
-		if (le32toh(h->magic) != LZ4F_MAGICNUMBER
+		if (LZ4_readLE32(&h->magic) != LZ4F_MAGICNUMBER
 		    || (h->flags & VERSION) != (1 << VERSION_SHIFT))
 			return 0;	/* unknown format */
 		if ((h->flags & RESERVED0) || (h->block_descriptor & RESERVED1_2))
@@ -127,7 +141,7 @@ size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn)
 			break;          /* input overrun */
 
 		struct lz4_block_header b = {
-			.raw = le32toh(*(const uint32_t *)in)
+			.raw = LZ4_readLE32((const uint32_t *)in)
 		};
 		in += sizeof(struct lz4_block_header);
 
-- 
cgit v1.2.3