From 3f4aece4e07b15a5a2d191873da04b88c8e87049 Mon Sep 17 00:00:00 2001 From: Andrey Petrov Date: Mon, 27 Jun 2016 13:39:34 -0700 Subject: soc/intel/apollolake: Add CQOS CAR implementation Add new option to set up Cache-As-RAM by using CQOS, Cache Quality of Service. CQOS allows setting ways of cache in no-fill mode, while keeping other ways in regular evicting mode. This effectively allows using CAR and cache simultaneously. BUG=chrome-os-partner:51959 TEST=switch from NEM to CQOS and back, boot Change-Id: Ic7f9899918f94a5788b02a4fbd2f5d5ba9aaf91d Signed-off-by: Andrey Petrov Reviewed-on: https://review.coreboot.org/15455 Tested-by: build bot (Jenkins) Reviewed-by: Aaron Durbin --- src/soc/intel/apollolake/Kconfig | 21 ++++++++ src/soc/intel/apollolake/bootblock/cache_as_ram.S | 64 +++++++++++++++++++++++ src/soc/intel/apollolake/exit_car.S | 25 ++++++++- src/soc/intel/apollolake/include/soc/cpu.h | 20 +++++++ 4 files changed, 128 insertions(+), 2 deletions(-) (limited to 'src/soc/intel') diff --git a/src/soc/intel/apollolake/Kconfig b/src/soc/intel/apollolake/Kconfig index d8e33caf7b..9fa37a1ff3 100644 --- a/src/soc/intel/apollolake/Kconfig +++ b/src/soc/intel/apollolake/Kconfig @@ -227,6 +227,27 @@ config NHLT_DA7219 default n help Include DSP firmware settings for headset codec. +choice + prompt "Cache-as-ram implementation" + default CAR_CQOS + help + This option allows you to select how cache-as-ram (CAR) is set up. + +config CAR_NEM + bool "Non-evict mode" + help + Traditionally, CAR is set up by using Non-Evict mode. This method + does not allow CAR and cache to co-exist, because cache fills are + block in NEM mode. + +config CAR_CQOS + bool "Cache Quality of Service" + help + Cache Quality of Service allows more fine-grained control of cache + usage. As result, it is possible to set up portion of L2 cache for + CAR and use remainder for actual caching. + +endchoice config SPI_FLASH_INCLUDE_ALL_DRIVERS bool diff --git a/src/soc/intel/apollolake/bootblock/cache_as_ram.S b/src/soc/intel/apollolake/bootblock/cache_as_ram.S index e8fae28b5b..65dd4c83f9 100644 --- a/src/soc/intel/apollolake/bootblock/cache_as_ram.S +++ b/src/soc/intel/apollolake/bootblock/cache_as_ram.S @@ -124,12 +124,61 @@ clear_var_mtrr: invd mov %eax, %cr0 +#if IS_ENABLED(CONFIG_CAR_NEM) /* Disable cache eviction (setup stage) */ mov $MSR_EVICT_CTL, %ecx rdmsr or $0x1, %eax wrmsr +#else + /* + * Disable both L1 and L2 prefetcher. For yet-to-understood reason, + * prefetchers slow down filling cache with rep stos in CQOS mode. + */ + mov $MSR_PREFETCH_CTL, %ecx + rdmsr + or $(PREFETCH_L1_DISABLE | PREFETCH_L2_DISABLE), %eax + wrmsr +#endif + +#if IS_ENABLED(CONFIG_CAR_CQOS) +#if (CONFIG_DCACHE_RAM_SIZE == L2_CACHE_SIZE) +/* + * If CAR size is set to full L2 size, mask is calculated as all-zeros. + * This is not supported by the CPU/uCode. + */ +#error "CQOS CAR may not use whole L2 cache area" +#endif + /* Calculate how many bits to be used for CAR */ + xor %edx, %edx + mov $CONFIG_DCACHE_RAM_SIZE, %eax /* dividend */ + mov $CACHE_QOS_SIZE_PER_BIT, %ecx /* divisor */ + div %ecx /* result is in eax */ + mov %eax, %ecx /* save to ecx */ + mov $1, %ebx + shl %cl, %ebx + sub $1, %ebx /* resulting mask is is in ebx */ + + /* Set this mask for initial cache fill */ + mov $MSR_L2_QOS_MASK(0), %ecx + rdmsr + mov %bl, %al + wrmsr + + /* Set CLOS selector to 0 */ + mov $MSR_IA32_PQR_ASSOC, %ecx + rdmsr + and $~IA32_PQR_ASSOC_MASK, %edx /* select mask 0 */ + wrmsr + /* We will need to block CAR region from evicts */ + mov $MSR_L2_QOS_MASK(1), %ecx + rdmsr + /* Invert bits that are to be used for cache */ + mov %bl, %al + xor $~0, %al /* invert 8 bits */ + wrmsr +#endif post_code(0x26) /* Clear the cache memory region. This will also fill up the cache */ @@ -140,11 +189,26 @@ clear_var_mtrr: post_code(0x27) +#if IS_ENABLED(CONFIG_CAR_NEM) /* Disable cache eviction (run stage) */ mov $MSR_EVICT_CTL, %ecx rdmsr or $0x2, %eax wrmsr +#else + /* Cache is populated. Use mask 1 that will block evicts */ + mov $MSR_IA32_PQR_ASSOC, %ecx + rdmsr + and $~IA32_PQR_ASSOC_MASK, %edx /* clear index bits first */ + or $1, %edx /* select mask 1 */ + wrmsr + + /* Enable prefetchers */ + mov $MSR_PREFETCH_CTL, %ecx + rdmsr + and $~(PREFETCH_L1_DISABLE | PREFETCH_L2_DISABLE), %eax + wrmsr +#endif post_code(0x28) diff --git a/src/soc/intel/apollolake/exit_car.S b/src/soc/intel/apollolake/exit_car.S index 339242b319..e21aa294ce 100644 --- a/src/soc/intel/apollolake/exit_car.S +++ b/src/soc/intel/apollolake/exit_car.S @@ -27,14 +27,34 @@ chipset_teardown_car: */ pop %ebx - /* invalidate cache contents. */ - invd /* Disable MTRRs. */ mov $(MTRR_DEF_TYPE_MSR), %ecx rdmsr and $(~(MTRR_DEF_TYPE_EN | MTRR_DEF_TYPE_FIX_EN)), %eax wrmsr +#if IS_ENABLED(CONFIG_CAR_CQOS) + /* Go back to all-evicting mode, set both masks to all-1s */ + mov $MSR_L2_QOS_MASK(0), %ecx + rdmsr + mov $~0, %al + wrmsr + + mov $MSR_L2_QOS_MASK(1), %ecx + rdmsr + mov $~0, %al + wrmsr + + /* Reset CLOS selector to 0 */ + mov $MSR_IA32_PQR_ASSOC, %ecx + rdmsr + and $~IA32_PQR_ASSOC_MASK, %edx + wrmsr +#endif + /* invalidate cache contents. */ + invd + +#if IS_ENABLED(CONFIG_CAR_NEM) /* Knock down bit 1 then bit 0 of NEM control not combining steps. */ mov $(MSR_EVICT_CTL), %ecx rdmsr @@ -42,6 +62,7 @@ chipset_teardown_car: wrmsr and $(~(1 << 0)), %eax wrmsr +#endif /* Return to caller. */ jmp *%ebx diff --git a/src/soc/intel/apollolake/include/soc/cpu.h b/src/soc/intel/apollolake/include/soc/cpu.h index 78fc0b0df6..8887c17e81 100644 --- a/src/soc/intel/apollolake/include/soc/cpu.h +++ b/src/soc/intel/apollolake/include/soc/cpu.h @@ -34,6 +34,26 @@ void apollolake_init_cpus(struct device *dev); #define MSR_EVICT_CTL 0x2e0 #define MSR_EMULATE_PM_TMR 0x121 #define EMULATE_PM_TMR_EN (1 << 16) +#define MSR_PREFETCH_CTL 0x1a4 +#define PREFETCH_L1_DISABLE (1 << 0) +#define PREFETCH_L2_DISABLE (1 << 2) + + +#define MSR_L2_QOS_MASK(reg) (0xd10 + reg) +#define MSR_IA32_PQR_ASSOC 0xc8f +/* MSR bits 33:32 encode slot number 0-3 */ +#define IA32_PQR_ASSOC_MASK (1 << 0 | 1 << 1) +/* 16 way cache, 8 bits per QOS, 64 byte cache line, 1024 sets */ +#define CACHE_WAYS 16 +#define CACHE_BITS_PER_MASK 8 +#define CACHE_LINE_SIZE 64 +#define CACHE_SETS 1024 +/* + * Each bit in QOS mask controls this many bytes. This is calculated as: + * (CACHE_WAYS / CACHE_BITS_PER_MASK) * CACHE_LINE_SIZE * CACHE_SETS + */ +#define CACHE_QOS_SIZE_PER_BIT (128 * KiB) +#define L2_CACHE_SIZE 0x100000 #define BASE_CLOCK_MHZ 100 -- cgit v1.2.3