From f6e358483bd4d70d552ead732709b3f828a0522a Mon Sep 17 00:00:00 2001 From: Julius Werner Date: Tue, 30 Jan 2024 17:34:05 -0800 Subject: commonlib: Add assembly optimization for ipchksum() on x86 This patch adds a bit of optimized assembly code to the ipchksum() algorithm for x86 targets in order to take advantage of larger load sizes and the add-with-carry instruction. The same assembly (with one minor manual tweak) works for both 32 and 64 bit mode (with most of the work being done by GCC which automatically inserts `rax` or `eax` in the inline assembly depending on the build target). Change-Id: I484620dc14679ff5ca02b2ced2f84650730a6efc Signed-off-by: Julius Werner Reviewed-on: https://review.coreboot.org/c/coreboot/+/80255 Reviewed-by: Arthur Heymans Tested-by: build bot (Jenkins) --- src/commonlib/bsd/ipchksum.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'src/commonlib/bsd/ipchksum.c') diff --git a/src/commonlib/bsd/ipchksum.c b/src/commonlib/bsd/ipchksum.c index 89d261f4cc..b7434e5fd7 100644 --- a/src/commonlib/bsd/ipchksum.c +++ b/src/commonlib/bsd/ipchksum.c @@ -34,7 +34,30 @@ uint16_t ipchksum(const void *data, size_t size) :: "cc" ); } -#endif +#elif defined(__i386__) || defined(__x86_64__) + size_t size8 = size / 8; + const uint64_t *p8 = data; + i = size8 * 8; + asm ( + "clc\n\t" + "1:\n\t" + "jecxz 2f\n\t" /* technically RCX on 64, but not gonna be that big */ + "adc (%[p8]), %[wsum]\n\t" +#if defined(__i386__) + "adc 4(%[p8]), %[wsum]\n\t" +#endif /* __i386__ */ + "lea -1(%[size8]), %[size8]\n\t" /* Use LEA as a makeshift ADD that */ + "lea 8(%[p8]), %[p8]\n\t" /* doesn't modify the carry flag. */ + "jmp 1b\n\t" + "2:\n\t" + "setc %b[size8]\n\t" /* reuse size register to save last carry */ + "add %[size8], %[wsum]\n\t" + : [wsum] "+r" (wide_sum), + [p8] "+r" (p8), + [size8] "+c" (size8) /* put size in ECX so we can JECXZ */ + :: "cc" + ); +#endif /* __i386__ || __x86_64__ */ while (wide_sum) { sum += wide_sum & 0xFFFF; -- cgit v1.2.3