From 89fae18bf4e41e299a021a4c52688e42a022f2c6 Mon Sep 17 00:00:00 2001 From: Julius Werner Date: Tue, 30 Jan 2024 17:26:52 -0800 Subject: commonlib: Add assembly optimization for ipchksum() on arm64 This patch adds a bit of optimized assembly code to the ipchksum() algorithm for arm64 targets in order to take advantage of larger load sizes and the add-with-carry instruction. This improves execution speed on a Cortex-A75 by more than 20x. Change-Id: I9c7bbc9d7a1cd083ced62fe9222592243a796077 Signed-off-by: Julius Werner Reviewed-on: https://review.coreboot.org/c/coreboot/+/80254 Tested-by: build bot (Jenkins) Reviewed-by: Arthur Heymans Reviewed-by: Yidi Lin --- src/commonlib/bsd/ipchksum.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'src/commonlib/bsd/ipchksum.c') diff --git a/src/commonlib/bsd/ipchksum.c b/src/commonlib/bsd/ipchksum.c index a40b86cbb4..89d261f4cc 100644 --- a/src/commonlib/bsd/ipchksum.c +++ b/src/commonlib/bsd/ipchksum.c @@ -11,6 +11,31 @@ uint16_t ipchksum(const void *data, size_t size) uint32_t sum = 0; size_t i = 0; +#if defined(__aarch64__) + size_t size16 = size / 16; + const uint64_t *p8 = data; + if (size16) { + unsigned long tmp1, tmp2; + i = size16 * 16; + asm ( + "adds xzr, xzr, xzr\n\t" /* init carry flag for addition */ + "1:\n\t" + "ldp %[v1], %[v2], [%[p8]], #16\n\t" + "adcs %[wsum], %[wsum], %[v1]\n\t" + "adcs %[wsum], %[wsum], %[v2]\n\t" + "sub %[size16], %[size16], #1\n\t" + "cbnz %[size16], 1b\n\t" + "adcs %[wsum], %[wsum], xzr\n\t" /* use up last carry */ + : [v1] "=r" (tmp1), + [v2] "=r" (tmp2), + [wsum] "+r" (wide_sum), + [p8] "+r" (p8), + [size16] "+r" (size16) + :: "cc" + ); + } +#endif + while (wide_sum) { sum += wide_sum & 0xFFFF; wide_sum >>= 16; -- cgit v1.2.3