diff options
author | Arthur Heymans <arthur@aheymans.xyz> | 2022-11-30 23:24:31 +0100 |
---|---|---|
committer | Lean Sheng Tan <sheng.tan@9elements.com> | 2024-02-21 16:19:05 +0000 |
commit | 7552eb210c1681747bfac6030af9f74d25330595 (patch) | |
tree | 0c2d40efda2474f4edaedb7d77a8a42ce30ecf6a /src/lib | |
parent | f317068fc3138162a59b802773795edfe3e8a8ef (diff) |
lib/lzmadecode: Allow for 8 byte reads on 64bit
This adds an optimization to lzma decode to also read from the boot
medium in chunks of 8 bytes if that is the general purpose register
length instead of always 4 bytes. It depends on the cache / memory / spi
controller whether this is faster, but it's likely to be either the same
or faster.
TESTED
- google/vilboz: cached boot medium
64bit before - 32bit - 64bit after
load FSP-M: 35,674 - 35,595 - 34,690
load ramstage: 42,134 - 43,378 - 40,882
load FSP-S: 24,954 - 25,496 - 24,368
- foxconn/g41m: uncached boot medium for testing
64bit before - 32bit - 64bit after
load ramstage: 51,164 - 51,872 - 51,894
Change-Id: I890c075307c0aec877618d9902ea352ae42a3bfa
Signed-off-by: Arthur Heymans <arthur@aheymans.xyz>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/70175
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Lean Sheng Tan <sheng.tan@9elements.com>
Reviewed-by: Julius Werner <jwerner@chromium.org>
Diffstat (limited to 'src/lib')
-rw-r--r-- | src/lib/lzmadecode.c | 18 | ||||
-rw-r--r-- | src/lib/lzmadecode.h | 4 |
2 files changed, 12 insertions, 10 deletions
diff --git a/src/lib/lzmadecode.c b/src/lib/lzmadecode.c index cb868290aa..5c6baa4160 100644 --- a/src/lib/lzmadecode.c +++ b/src/lib/lzmadecode.c @@ -35,15 +35,15 @@ #define kBitModelTotal (1 << kNumBitModelTotalBits) #define kNumMoveBits 5 -/* Use 32-bit reads whenever possible to avoid bad flash performance. Fall back - * to byte reads for last 4 bytes since RC_TEST returns an error when BufferLim +/* Use sizeof(SizeT) sized reads whenever possible to avoid bad flash performance. Fall back + * to byte reads for last sizeof(SizeT) bytes since RC_TEST returns an error when BufferLim * is *reached* (not surpassed!), meaning we can't allow that to happen while * there are still bytes to decode from the algorithm's point of view. */ #define RC_READ_BYTE \ - (look_ahead_ptr < 4 ? look_ahead.raw[look_ahead_ptr++] \ - : ((((uintptr_t) Buffer & 3) \ - || ((SizeT) (BufferLim - Buffer) <= 4)) ? (*Buffer++) \ - : ((look_ahead.dw = *(UInt32 *)Buffer), (Buffer += 4), \ + (look_ahead_ptr < sizeof(SizeT) ? look_ahead.raw[look_ahead_ptr++] \ + : ((((uintptr_t) Buffer & (sizeof(SizeT) - 1)) \ + || ((SizeT) (BufferLim - Buffer) <= sizeof(SizeT))) ? (*Buffer++) \ + : ((look_ahead.dw = *(SizeT *)Buffer), (Buffer += sizeof(SizeT)), \ (look_ahead_ptr = 1), look_ahead.raw[0]))) #define RC_INIT2 Code = 0; Range = 0xFFFFFFFF; \ @@ -207,10 +207,10 @@ int LzmaDecode(CLzmaDecoderState *vs, int len = 0; const Byte *Buffer; const Byte *BufferLim; - int look_ahead_ptr = 4; + int look_ahead_ptr = sizeof(SizeT); union { - Byte raw[4]; - UInt32 dw; + Byte raw[sizeof(SizeT)]; + SizeT dw; } look_ahead; UInt32 Range; UInt32 Code; diff --git a/src/lib/lzmadecode.h b/src/lib/lzmadecode.h index 9ed352a564..5498061762 100644 --- a/src/lib/lzmadecode.h +++ b/src/lib/lzmadecode.h @@ -22,10 +22,12 @@ #ifndef __LZMADECODE_H #define __LZMADECODE_H +#include <types.h> + typedef unsigned char Byte; typedef unsigned short UInt16; typedef unsigned int UInt32; -typedef UInt32 SizeT; +typedef size_t SizeT; #define CProb UInt16 |