diff options
author | Julius Werner <jwerner@chromium.org> | 2018-05-16 14:14:04 -0700 |
---|---|---|
committer | Julius Werner <jwerner@chromium.org> | 2018-05-22 02:44:14 +0000 |
commit | 99f4683adf3203d11c164b15a5455e778709a3e0 (patch) | |
tree | 077268ffcfa2df08671633ff0e0501e9626fda22 /src/arch | |
parent | 9123449734f43922fe39cdb08c3d60f02f0eb3ed (diff) |
Introduce bootblock self-decompression
Masked ROMs are the silent killers of boot speed on devices without
memory-mapped SPI flash. They often contain awfully slow SPI drivers
(presumably bit-banged) that take hundreds of milliseconds to load our
bootblock, and every extra kilobyte of bootblock size has a hugely
disproportionate impact on boot speed. The coreboot timestamps can never
show that component, but it impacts our users all the same.
This patch tries to alleviate that issue a bit by allowing us to
compress the bootblock with LZ4, which can cut its size down to nearly
half. Of course, masked ROMs usually don't come with decompression
algorithms built in, so we need to introduce a little decompression stub
that can decompress the rest of the bootblock. This is done by creating
a new "decompressor" stage which runs before the bootblock, but includes
the compressed bootblock code in its data section. It needs to be as
small as possible to get a real benefit from this approach, which means
no device drivers, no console output, no exception handling, etc.
Besides the decompression algorithm itself we only include the timer
driver so that we can measure the boot speed impact of decompression. On
ARM and ARM64 systems, we also need to give SoC code a chance to
initialize the MMU, since running decompression without MMU is
prohibitively slow on these architectures.
This feature is implemented for ARM and ARM64 architectures for now,
although most of it is architecture-independent and it should be
relatively simple to port to other platforms where a masked ROM loads
the bootblock into SRAM. It is also supposed to be a clean starting
point from which later optimizations can hopefully cut down the
decompression stub size (currently ~4K on RK3399) a bit more.
NOTE: Bootblock compression is not for everyone. Possible side effects
include trying to run LZ4 on CPUs that come out of reset extremely
underclocked or enabling this too early in SoC bring-up and getting
frustrated trying to find issues in an undebuggable environment. Ask
your SoC vendor if bootblock compression is right for you.
Change-Id: I0dc1cad9ae7508892e477739e743cd1afb5945e8
Signed-off-by: Julius Werner <jwerner@chromium.org>
Reviewed-on: https://review.coreboot.org/26340
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Aaron Durbin <adurbin@chromium.org>
Diffstat (limited to 'src/arch')
-rw-r--r-- | src/arch/arm/Makefile.inc | 17 | ||||
-rw-r--r-- | src/arch/arm/armv7/Makefile.inc | 11 | ||||
-rw-r--r-- | src/arch/arm/include/arch/header.ld | 4 | ||||
-rw-r--r-- | src/arch/arm/libgcc/Makefile.inc | 1 | ||||
-rw-r--r-- | src/arch/arm64/Makefile.inc | 16 | ||||
-rw-r--r-- | src/arch/arm64/armv8/Makefile.inc | 10 | ||||
-rw-r--r-- | src/arch/arm64/armv8/lib/Makefile.inc | 1 | ||||
-rw-r--r-- | src/arch/arm64/include/arch/header.ld | 2 |
8 files changed, 54 insertions, 8 deletions
diff --git a/src/arch/arm/Makefile.inc b/src/arch/arm/Makefile.inc index 013a4dda75..06adfe454a 100644 --- a/src/arch/arm/Makefile.inc +++ b/src/arch/arm/Makefile.inc @@ -44,22 +44,35 @@ endif # CONFIG_ARCH_ARM ifeq ($(CONFIG_ARCH_BOOTBLOCK_ARM),y) +decompressor-y += id.S bootblock-y += id.S +$(call src-to-obj,decompressor,$(dir)/id.S): $(obj)/build.h $(call src-to-obj,bootblock,$(dir)/id.S): $(obj)/build.h +decompressor-y += boot.c bootblock-y += boot.c -bootblock-y += stages.c +decompressor-y += div0.c +bootblock-y += div0.c +decompressor-y += eabi_compat.c bootblock-y += eabi_compat.c +decompressor-y += memset.S bootblock-y += memset.S +decompressor-y += memcpy.S bootblock-y += memcpy.S +decompressor-y += memmove.S bootblock-y += memmove.S -bootblock-y += div0.c + bootblock-y += clock.c +bootblock-y += stages.c $(objcbfs)/bootblock.debug: $$(bootblock-objs) @printf " LINK $(subst $(obj)/,,$(@))\n" $(LD_bootblock) $(LDFLAGS_bootblock) -o $@ -L$(obj) -T $(call src-to-obj,bootblock,src/mainboard/$(MAINBOARDDIR)/memlayout.ld) --whole-archive --start-group $(filter-out %.ld,$(bootblock-objs)) --end-group +$(objcbfs)/decompressor.debug: $$(decompressor-objs) + @printf " LINK $(subst $(obj)/,,$(@))\n" + $(LD_bootblock) $(LDFLAGS_bootblock) -o $@ -L$(obj) -T $(call src-to-obj,decompressor,src/mainboard/$(MAINBOARDDIR)/memlayout.ld) --whole-archive --start-group $(filter-out %.ld,$(decompressor-objs)) --end-group + endif # CONFIG_ARCH_BOOTBLOCK_ARM ############################################################################### diff --git a/src/arch/arm/armv7/Makefile.inc b/src/arch/arm/armv7/Makefile.inc index fe0b446623..1d3ae52f54 100644 --- a/src/arch/arm/armv7/Makefile.inc +++ b/src/arch/arm/armv7/Makefile.inc @@ -28,18 +28,27 @@ armv7-r_asm_flags = $(armv7-r_flags) $(armv7_asm_flags) ############################################################################### ifeq ($(CONFIG_ARCH_BOOTBLOCK_ARMV7),y) +decompressor-generic-ccopts += $(armv7-a_flags) +decompressor-S-ccopts += $(armv7_asm_flags) bootblock-generic-ccopts += $(armv7-a_flags) bootblock-S-ccopts += $(armv7_asm_flags) ifneq ($(CONFIG_BOOTBLOCK_CUSTOM),y) +decompressor-y += bootblock.S +ifneq ($(CONFIG_COMPRESS_BOOTBLOCK),y) bootblock-y += bootblock.S endif +endif +decompressor-y += cache.c bootblock-y += cache.c +decompressor-y += cpu.S bootblock-y += cpu.S +decompressor-y += mmu.c +bootblock-y += mmu.c + bootblock-$(CONFIG_BOOTBLOCK_CONSOLE) += exception.c bootblock-$(CONFIG_BOOTBLOCK_CONSOLE) += exception_asm.S -bootblock-y += mmu.c else ifeq ($(CONFIG_ARCH_BOOTBLOCK_ARMV7_M),y) bootblock-generic-ccopts += $(armv7-m_flags) diff --git a/src/arch/arm/include/arch/header.ld b/src/arch/arm/include/arch/header.ld index 8a107783d4..89473109f0 100644 --- a/src/arch/arm/include/arch/header.ld +++ b/src/arch/arm/include/arch/header.ld @@ -13,6 +13,8 @@ * GNU General Public License for more details. */ +#include <rules.h> + /* We use ELF as output format. So that we can debug the code in some form. */ OUTPUT_FORMAT("elf32-littlearm", "elf32-littlearm", "elf32-littlearm") OUTPUT_ARCH(arm) @@ -22,7 +24,7 @@ PHDRS to_load PT_LOAD; } -#ifdef __BOOTBLOCK__ +#if ENV_DECOMPRESSOR || ENV_BOOTBLOCK || ENV_RMODULE ENTRY(_start) #else ENTRY(stage_entry) diff --git a/src/arch/arm/libgcc/Makefile.inc b/src/arch/arm/libgcc/Makefile.inc index cb91107e8e..9a8d4fca53 100644 --- a/src/arch/arm/libgcc/Makefile.inc +++ b/src/arch/arm/libgcc/Makefile.inc @@ -19,6 +19,7 @@ libgcc_files = ashldi3.S lib1funcs.S lshrdi3.S muldi3.S ucmpdi2.S uldivmod.S libgcc_files += udivmoddi4.c umoddi3.c ifeq ($(CONFIG_ARCH_BOOTBLOCK_ARM),y) +decompressor-y += $(libgcc_files) bootblock-y += $(libgcc_files) endif diff --git a/src/arch/arm64/Makefile.inc b/src/arch/arm64/Makefile.inc index 997c2da70f..f57ef720b9 100644 --- a/src/arch/arm64/Makefile.inc +++ b/src/arch/arm64/Makefile.inc @@ -39,17 +39,25 @@ endif ifeq ($(CONFIG_ARCH_BOOTBLOCK_ARM64),y) +decompressor-y += boot.c +bootblock-y += boot.c +decompressor-y += div0.c bootblock-y += div0.c +decompressor-y += eabi_compat.c +bootblock-y += eabi_compat.c +decompressor-y += id.S bootblock-y += id.S +$(call src-to-obj,decompressor,$(dir)/id.S): $(obj)/build.h $(call src-to-obj,bootblock,$(dir)/id.S): $(obj)/build.h -bootblock-y += boot.c -bootblock-y += eabi_compat.c bootblock-$(CONFIG_ARM64_USE_ARCH_TIMER) += arch_timer.c bootblock-y += transition.c transition_asm.S +decompressor-y += memset.S bootblock-y += memset.S +decompressor-y += memcpy.S bootblock-y += memcpy.S +decompressor-y += memmove.S bootblock-y += memmove.S # Build the bootblock @@ -58,6 +66,10 @@ $(objcbfs)/bootblock.debug: $$(bootblock-objs) $(obj)/config.h @printf " LINK $(subst $(obj)/,,$(@))\n" $(LD_bootblock) $(LDFLAGS_bootblock) -o $@ -L$(obj) --whole-archive --start-group $(filter-out %.ld,$(bootblock-objs)) --end-group -T $(call src-to-obj,bootblock,src/mainboard/$(MAINBOARDDIR)/memlayout.ld) +$(objcbfs)/decompressor.debug: $$(decompressor-objs) $(obj)/config.h + @printf " LINK $(subst $(obj)/,,$(@))\n" + $(LD_bootblock) $(LDFLAGS_bootblock) -o $@ -L$(obj) --whole-archive --start-group $(filter-out %.ld,$(decompressor-objs)) --end-group -T $(call src-to-obj,decompressor,src/mainboard/$(MAINBOARDDIR)/memlayout.ld) + endif # CONFIG_ARCH_BOOTBLOCK_ARM64 ############################################################################### diff --git a/src/arch/arm64/armv8/Makefile.inc b/src/arch/arm64/armv8/Makefile.inc index 14a784bb92..db7bd33793 100644 --- a/src/arch/arm64/armv8/Makefile.inc +++ b/src/arch/arm64/armv8/Makefile.inc @@ -31,19 +31,27 @@ armv8_flags = -march=$(march) -I$(src)/arch/arm64/include/armv8/ -D__COREBOOT_AR ifeq ($(CONFIG_ARCH_BOOTBLOCK_ARMV8_64),y) ifneq ($(CONFIG_BOOTBLOCK_CUSTOM),y) +decompressor-y += bootblock.S +ifneq ($(CONFIG_COMPRESS_BOOTBLOCK),y) bootblock-y += bootblock.S endif -bootblock-y += cache.c +endif +decompressor-y += cpu.S bootblock-y += cpu.S +decompressor-y += cache.c +bootblock-y += cache.c +decompressor-y += mmu.c bootblock-y += mmu.c bootblock-$(CONFIG_BOOTBLOCK_CONSOLE) += exception.c +decompressor-generic-ccopts += $(armv8_flags) bootblock-generic-ccopts += $(armv8_flags) # Required to access unaligned timestamp struct members before MMU is active # (TODO: Maybe use explicit unaligned accesses in timestamp code instead, or # evaluate redesigning timestamp data structures to avoid misaligned members.) +decompressor-c-ccopts += -mstrict-align bootblock-c-ccopts += -mstrict-align endif diff --git a/src/arch/arm64/armv8/lib/Makefile.inc b/src/arch/arm64/armv8/lib/Makefile.inc index 2bf1a37c7a..bfc87c38d7 100644 --- a/src/arch/arm64/armv8/lib/Makefile.inc +++ b/src/arch/arm64/armv8/lib/Makefile.inc @@ -18,6 +18,7 @@ lib_access = pstate.c sysctrl.c cache.c tlb.c clock.c ifeq ($(CONFIG_ARCH_BOOTBLOCK_ARMV8_64),y) +decompressor-y += $(lib_access) bootblock-y += $(lib_access) endif diff --git a/src/arch/arm64/include/arch/header.ld b/src/arch/arm64/include/arch/header.ld index c82cb3f8c7..9d8764ea58 100644 --- a/src/arch/arm64/include/arch/header.ld +++ b/src/arch/arm64/include/arch/header.ld @@ -24,7 +24,7 @@ PHDRS to_load PT_LOAD; } -#if ENV_BOOTBLOCK || ENV_RMODULE +#if ENV_DECOMPRESSOR || ENV_BOOTBLOCK || ENV_RMODULE ENTRY(_start) #else ENTRY(stage_entry) |