cpu/x86/smm: Introduce SMM module loader version 2

Xeon-SP Skylake Scalable Processor can have 36 CPU threads (18 cores). Current coreboot SMM is unable to handle more than ~32 CPU threads. This patch introduces a version 2 of the SMM module loader which addresses this problem. Having two versions of the SMM module loader prevents any issues to current projects. Future Xeon-SP products will be using this version of the SMM loader. Subsequent patches will enable board specific functionality for Xeon-SP. The reason for moving to version 2 is the state save area begins to encroach upon the SMI handling code when more than 32 CPU threads are in the system. This can cause system hangs, reboots, etc. The second change is related to staggered entry points with simple near jumps. In the current loader, near jumps will not work because the CPU is jumping within the same code segment. In version 2, "far" address jumps are necessary therefore protected mode must be enabled first. The SMM layout and how the CPUs are staggered are documented in the code. By making the modifications above, this allows the smm module loader to expand easily as more CPU threads are added. TEST=build for Tiogapass platform under OCP mainboard. Enable the following in Kconfig. select CPU_INTEL_COMMON_SMM select SOC_INTEL_COMMON_BLOCK_SMM select SMM_TSEG select HAVE_SMI_HANDLER select ACPI_INTEL_HARDWARE_SLEEP_VALUES Debug console will show all 36 cores relocated. Further tested by generating SMI's to port 0xb2 using XDP/ITP HW debugger and ensured all cores entering and exiting SMM properly. In addition, booted to Linux 5.4 kernel and observed no issues during mp init. Change-Id: I00a23a5f2a46110536c344254868390dbb71854c Signed-off-by: Rocky Phagura <rphagura@fb.com> Reviewed-on: https://review.coreboot.org/c/coreboot/+/43684 Tested-by: build bot (Jenkins) <no-reply@coreboot.org> Reviewed-by: Angel Pons <th3fanbus@gmail.com>
author: Rocky Phagura <rphagura@fb.com> 2020-07-21 14:48:48 -0700
committer: David Hendricks <david.hendricks@gmail.com> 2020-08-15 02:16:32 +0000
commit: afb7a814783cda12f5b72167163b9109ee1d15a7 (patch)
tree: 191e6a067c12da5fda30bed060fbe5cfe89891e0
parent: 5b52592773fce8ba33a18380074b7dcdba7721b4 (diff)
6 files changed, 726 insertions, 8 deletions
diff --git a/Documentation/releases/coreboot-4.13-relnotes.md b/Documentation/releases/coreboot-4.13-relnotes.md
index 2910867f78..dcc8bf44af 100644
--- a/Documentation/releases/coreboot-4.13-relnotes.md
+++ b/Documentation/releases/coreboot-4.13-relnotes.md
@@ -39,4 +39,14 @@ attributes as per their datasheet and convert those attributes into SPD files fo
 the platforms. More details about the tools are added in
 [README.md](https://review.coreboot.org/plugins/gitiles/coreboot/+/refs/heads/master/util/spd_tools/intel/lp4x/README.md).
 
+### New version of SMM loader
+
+A new version of the SMM loader which accomodates platforms with over 32 CPU
+CPU threads.  The existing version of SMM loader uses a 64K code/data
+segment and only a limited number of CPU threads can fit into one segment
+(because of save state, STM, other features, etc). This loader extends beyond
+the 64K segment to accomodate additional CPUs and in theory allows as many
+CPU threads as possible limited only by SMRAM space and not by 64K. By default
+this loader version is disabled. Please see cpu/x86/Kconfig for more info.
+
 ### Add significant changes here
diff --git a/src/cpu/x86/Kconfig b/src/cpu/x86/Kconfig
index 5394cd023d..b3a16bcf63 100644
--- a/src/cpu/x86/Kconfig
+++ b/src/cpu/x86/Kconfig
@@ -121,6 +121,14 @@ config SMM_STUB_STACK_SIZE
 
 endif
 
+config X86_SMM_LOADER_VERSION2
+	bool
+	default n
+	depends on HAVE_SMI_HANDLER
+	help
+	  This option enables SMM module loader that works with server
+	  platforms which may contain more than 32 CPU threads.
+
 config SMM_LAPIC_REMAP_MITIGATION
 	bool
 	default y if NORTHBRIDGE_INTEL_I945
diff --git a/src/cpu/x86/mp_init.c b/src/cpu/x86/mp_init.c
index caed8f4005..5807831c98 100644
--- a/src/cpu/x86/mp_init.c
+++ b/src/cpu/x86/mp_init.c
@@ -726,12 +726,21 @@ static void asmlinkage smm_do_relocation(void *arg)
 	 * the location of the new SMBASE. If using SMM modules then this
 	 * calculation needs to match that of the module loader.
 	 */
+#if CONFIG(X86_SMM_LOADER_VERSION2)
+	perm_smbase = smm_get_cpu_smbase(cpu);
+	mp_state.perm_smbase = perm_smbase;
+	if (!perm_smbase) {
+		printk(BIOS_ERR, "%s: bad SMBASE for CPU %d\n", __func__, cpu);
+		return;
+	}
+#else
 	perm_smbase = mp_state.perm_smbase;
 	perm_smbase -= cpu * runtime->save_state_size;
-
-	printk(BIOS_DEBUG, "New SMBASE 0x%08lx\n", perm_smbase);
+#endif
 
 	/* Setup code checks this callback for validity. */
+	printk(BIOS_INFO, "%s : curr_smbase 0x%x perm_smbase 0x%x, cpu = %d\n",
+		__func__, (int)curr_smbase, (int)perm_smbase, cpu);
 	mp_state.ops.relocation_handler(cpu, curr_smbase, perm_smbase);
 
 	if (CONFIG(STM)) {
@@ -758,9 +767,17 @@ static void adjust_smm_apic_id_map(struct smm_loader_params *smm_params)
 
 static int install_relocation_handler(int num_cpus, size_t save_state_size)
 {
+	int cpus = num_cpus;
+#if CONFIG(X86_SMM_LOADER_VERSION2)
+	/* Default SMRAM size is not big enough to concurrently
+	 * handle relocation for more than ~32 CPU threads
+	 * therefore, relocate 1 by 1. */
+	cpus = 1;
+#endif
+
 	struct smm_loader_params smm_params = {
 		.per_cpu_stack_size = CONFIG_SMM_STUB_STACK_SIZE,
-		.num_concurrent_stacks = num_cpus,
+		.num_concurrent_stacks = cpus,
 		.per_cpu_save_state_size = save_state_size,
 		.num_concurrent_save_states = 1,
 		.handler = smm_do_relocation,
@@ -770,9 +787,10 @@ static int install_relocation_handler(int num_cpus, size_t save_state_size)
 	if (mp_state.ops.adjust_smm_params != NULL)
 		mp_state.ops.adjust_smm_params(&smm_params, 0);
 
-	if (smm_setup_relocation_handler(&smm_params))
+	if (smm_setup_relocation_handler(&smm_params)) {
+		printk(BIOS_ERR, "%s: smm setup failed\n", __func__);
 		return -1;
-
+	}
 	adjust_smm_apic_id_map(&smm_params);
 
 	return 0;
@@ -781,8 +799,13 @@ static int install_relocation_handler(int num_cpus, size_t save_state_size)
 static int install_permanent_handler(int num_cpus, uintptr_t smbase,
 					size_t smsize, size_t save_state_size)
 {
-	/* There are num_cpus concurrent stacks and num_cpus concurrent save
-	 * state areas. Lastly, set the stack size to 1KiB. */
+	/*
+	 * All the CPUs will relocate to permanaent handler now. Set parameters
+	 * needed for all CPUs. The placement of each CPUs entry point is
+	 * determined by the loader. This code simply provides the beginning of
+	 * SMRAM region, the number of CPUs who will use the handler, the stack
+	 * size and save state size for each CPU.
+	 */
 	struct smm_loader_params smm_params = {
 		.per_cpu_stack_size = CONFIG_SMM_MODULE_STACK_SIZE,
 		.num_concurrent_stacks = num_cpus,
@@ -794,7 +817,7 @@ static int install_permanent_handler(int num_cpus, uintptr_t smbase,
 	if (mp_state.ops.adjust_smm_params != NULL)
 		mp_state.ops.adjust_smm_params(&smm_params, 1);
 
-	printk(BIOS_DEBUG, "Installing SMM handler to 0x%08lx\n", smbase);
+	printk(BIOS_DEBUG, "Installing permanent SMM handler to 0x%08lx\n", smbase);
 
 	if (smm_load_module((void *)smbase, smsize, &smm_params))
 		return -1;
diff --git a/src/cpu/x86/smm/Makefile.inc b/src/cpu/x86/smm/Makefile.inc
index dbe567a8a2..1273a6cf27 100644
--- a/src/cpu/x86/smm/Makefile.inc
+++ b/src/cpu/x86/smm/Makefile.inc
@@ -1,6 +1,10 @@
 ## SPDX-License-Identifier: GPL-2.0-only
 
+ifeq ($(CONFIG_X86_SMM_LOADER_VERSION2),y)
+ramstage-y += smm_module_loaderv2.c
+else
 ramstage-y += smm_module_loader.c
+endif
 ramstage-y += smi_trigger.c
 
 ifeq ($(CONFIG_ARCH_RAMSTAGE_X86_32),y)
diff --git a/src/cpu/x86/smm/smm_module_loaderv2.c b/src/cpu/x86/smm/smm_module_loaderv2.c
new file mode 100644
index 0000000000..10cc6281f7
--- /dev/null
+++ b/src/cpu/x86/smm/smm_module_loaderv2.c
@@ -0,0 +1,655 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <stdint.h>
+#include <string.h>
+#include <rmodule.h>
+#include <cpu/x86/smm.h>
+#include <commonlib/helpers.h>
+#include <console/console.h>
+#include <security/intel/stm/SmmStm.h>
+
+#define FXSAVE_SIZE 512
+#define SMM_CODE_SEGMENT_SIZE 0x10000
+/* FXSAVE area during relocation. While it may not be strictly needed the
+   SMM stub code relies on the FXSAVE area being non-zero to enable SSE
+   instructions within SMM mode. */
+static uint8_t fxsave_area_relocation[CONFIG_MAX_CPUS][FXSAVE_SIZE]
+__attribute__((aligned(16)));
+
+/*
+ * Components that make up the SMRAM:
+ * 1. Save state - the total save state memory used
+ * 2. Stack - stacks for the CPUs in the SMM handler
+ * 3. Stub - SMM stub code for calling into handler
+ * 4. Handler - C-based SMM handler.
+ *
+ * The components are assumed to consist of one consecutive region.
+ */
+
+/* These parameters are used by the SMM stub code. A pointer to the params
+ * is also passed to the C-base handler. */
+struct smm_stub_params {
+	u32 stack_size;
+	u32 stack_top;
+	u32 c_handler;
+	u32 c_handler_arg;
+	u32 fxsave_area;
+	u32 fxsave_area_size;
+	struct smm_runtime runtime;
+} __packed;
+
+/*
+ * The stub is the entry point that sets up protected mode and stacks for each
+ * CPU. It then calls into the SMM handler module. It is encoded as an rmodule.
+ */
+extern unsigned char _binary_smmstub_start[];
+
+/* Per CPU minimum stack size. */
+#define SMM_MINIMUM_STACK_SIZE 32
+
+struct cpu_smm_info {
+	uint8_t active;
+	uintptr_t smbase;
+	uintptr_t entry;
+	uintptr_t ss_start;
+	uintptr_t code_start;
+	uintptr_t code_end;
+};
+struct cpu_smm_info cpus[CONFIG_MAX_CPUS] = { 0 };
+
+/*
+ * This method creates a map of all the CPU entry points, save state locations
+ * and the beginning and end of code segments for each CPU. This map is used
+ * during relocation to properly align as many CPUs that can fit into the SMRAM
+ * region. For more information on how SMRAM works, refer to the latest Intel
+ * developer's manuals (volume 3, chapter 34). SMRAM is divided up into the
+ * following regions:
+ * +-----------------+ Top of SMRAM
+ * |                 | <- MSEG, FXSAVE
+ * +-----------------+
+ * |    common       |
+ * |  smi handler    | 64K
+ * |                 |
+ * +-----------------+
+ * | CPU 0 code  seg |
+ * +-----------------+
+ * | CPU 1 code seg  |
+ * +-----------------+
+ * | CPU x code seg  |
+ * +-----------------+
+ * |                 |
+ * |                 |
+ * +-----------------+
+ * |    stacks       |
+ * +-----------------+ <- START of SMRAM
+ *
+ * The code below checks when a code segment is full and begins placing the remainder
+ * CPUs in the lower segments. The entry point for each CPU is smbase + 0x8000
+ * and save state is smbase + 0x8000 + (0x8000 - state save size). Save state
+ * area grows downward into the CPUs entry point.  Therefore staggering too many
+ * CPUs in one 32K block will corrupt CPU0's entry code as the save states move
+ * downward.
+ * input : smbase of first CPU (all other CPUs
+ *         will go below this address)
+ * input : num_cpus in the system. The map will
+ *         be created from 0 to num_cpus.
+ */
+static int smm_create_map(uintptr_t smbase, unsigned int num_cpus,
+			const struct smm_loader_params *params)
+{
+	unsigned int i;
+	struct rmodule smm_stub;
+	unsigned int ss_size = params->per_cpu_save_state_size, stub_size;
+	unsigned int smm_entry_offset = params->smm_main_entry_offset;
+	unsigned int seg_count = 0, segments = 0, available;
+	unsigned int cpus_in_segment = 0;
+	unsigned int base = smbase;
+
+	if (rmodule_parse(&_binary_smmstub_start, &smm_stub)) {
+		printk(BIOS_ERR, "%s: unable to get SMM module size\n", __func__);
+		return 0;
+	}
+
+	stub_size = rmodule_memory_size(&smm_stub);
+	/* How many CPUs can fit into one 64K segment? */
+	available = 0xFFFF - smm_entry_offset - ss_size - stub_size;
+	if (available > 0) {
+		cpus_in_segment = available / ss_size;
+		/* minimum segments needed will always be 1 */
+		segments = num_cpus / cpus_in_segment + 1;
+		printk(BIOS_DEBUG,
+			"%s: cpus allowed in one segment %d\n", __func__, cpus_in_segment);
+		printk(BIOS_DEBUG,
+			"%s: min # of segments needed %d\n", __func__, segments);
+	} else {
+		printk(BIOS_ERR, "%s: not enough space in SMM to setup all CPUs\n", __func__);
+		printk(BIOS_ERR, "    save state & stub size need to be reduced\n");
+		printk(BIOS_ERR, "    or increase SMRAM size\n");
+		return 0;
+	}
+
+	if (sizeof(cpus) / sizeof(struct cpu_smm_info) < num_cpus) {
+		printk(BIOS_ERR,
+			"%s: increase MAX_CPUS in Kconfig\n", __func__);
+		return 0;
+	}
+
+	for (i = 0; i < num_cpus; i++) {
+		cpus[i].smbase = base;
+		cpus[i].entry = base + smm_entry_offset;
+		cpus[i].ss_start = cpus[i].entry + (smm_entry_offset - ss_size);
+		cpus[i].code_start = cpus[i].entry;
+		cpus[i].code_end = cpus[i].entry + stub_size;
+		cpus[i].active = 1;
+		base -= ss_size;
+		seg_count++;
+		if (seg_count >= cpus_in_segment) {
+			base -= smm_entry_offset;
+			seg_count = 0;
+		}
+	}
+
+	if (CONFIG_DEFAULT_CONSOLE_LOGLEVEL >= BIOS_DEBUG) {
+		seg_count = 0;
+		for (i = 0; i < num_cpus; i++) {
+			printk(BIOS_DEBUG, "CPU 0x%x\n", i);
+			printk(BIOS_DEBUG,
+				"    smbase %zx  entry %zx\n",
+				cpus[i].smbase, cpus[i].entry);
+			printk(BIOS_DEBUG,
+				"           ss_start %zx  code_end %zx\n",
+				cpus[i].ss_start, cpus[i].code_end);
+			seg_count++;
+			if (seg_count >= cpus_in_segment) {
+				printk(BIOS_DEBUG,
+					"-------------NEW CODE SEGMENT --------------\n");
+				seg_count = 0;
+			}
+		}
+	}
+	return 1;
+}
+
+/*
+ * This method expects the smm relocation map to be complete.
+ * This method does not read any HW registers, it simply uses a
+ * map that was created during SMM setup.
+ * input: cpu_num - cpu number which is used as an index into the
+ *       map to return the smbase
+ */
+u32 smm_get_cpu_smbase(unsigned int cpu_num)
+{
+	if (cpu_num < CONFIG_MAX_CPUS) {
+		if (cpus[cpu_num].active)
+			return cpus[cpu_num].smbase;
+	}
+	return 0;
+}
+
+/*
+ * This method assumes that at least 1 CPU has been set up from
+ * which it will place other CPUs below its smbase ensuring that
+ * save state does not clobber the first CPUs init code segment. The init
+ * code which is the smm stub code is the same for all CPUs. They enter
+ * smm, setup stacks (based on their apic id), enter protected mode
+ * and then jump to the common smi handler.  The stack is allocated
+ * at the beginning of smram (aka tseg base, not smbase). The stack
+ * pointer for each CPU is calculated by using its apic id
+ * (code is in smm_stub.s)
+ * Each entry point will now have the same stub code which, sets up the CPU
+ * stack, enters protected mode and then jumps to the smi handler. It is
+ * important to enter protected mode before the jump because the "jump to
+ * address" might be larger than the 20bit address supported by real mode.
+ * SMI entry right now is in real mode.
+ * input: smbase - this is the smbase of the first cpu not the smbase
+ *        where tseg starts (aka smram_start). All CPUs code segment
+ *        and stack will be below this point except for the common
+ *        SMI handler which is one segment above
+ * input: num_cpus - number of cpus that need relocation including
+ *        the first CPU (though its code is already loaded)
+ * input: top of stack (stacks work downward by default in Intel HW)
+ * output: return -1, if runtime smi code could not be installed. In
+ *         this case SMM will not work and any SMI's generated will
+ *         cause a CPU shutdown or general protection fault because
+ *         the appropriate smi handling code was not installed
+ */
+
+static int smm_place_entry_code(uintptr_t smbase, unsigned int num_cpus,
+				unsigned int stack_top, const struct smm_loader_params *params)
+{
+	unsigned int i;
+	unsigned int size;
+	if (smm_create_map(smbase, num_cpus, params)) {
+		/*
+		 * Ensure there was enough space and the last CPUs smbase
+		 * did not encroach upon the stack. Stack top is smram start
+		 * + size of stack.
+		 */
+		if (cpus[num_cpus].active) {
+			if (cpus[num_cpus - 1].smbase +
+				params->smm_main_entry_offset < stack_top) {
+				printk(BIOS_ERR, "%s: stack encroachment\n", __func__);
+				printk(BIOS_ERR, "%s: smbase %zx, stack_top %x\n",
+					__func__, cpus[num_cpus].smbase, stack_top);
+				return 0;
+			}
+		}
+	} else {
+		printk(BIOS_ERR, "%s: unable to place smm entry code\n", __func__);
+		return 0;
+	}
+
+	printk(BIOS_INFO, "%s: smbase %zx, stack_top %x\n",
+		__func__, cpus[num_cpus-1].smbase, stack_top);
+
+	/* start at 1, the first CPU stub code is already there */
+	size = cpus[0].code_end - cpus[0].code_start;
+	for (i = 1; i < num_cpus; i++) {
+		memcpy((int *)cpus[i].code_start, (int *)cpus[0].code_start, size);
+		printk(BIOS_DEBUG,
+			"SMM Module: placing smm entry code at %zx,  cpu # 0x%x\n",
+			cpus[i].code_start, i);
+		printk(BIOS_DEBUG, "%s: copying from %zx to %zx 0x%x bytes\n",
+			__func__, cpus[0].code_start, cpus[i].code_start, size);
+	}
+	return 1;
+}
+
+/*
+ * Place stacks in base -> base + size region, but ensure the stacks don't
+ * overlap the staggered entry points.
+ */
+static void *smm_stub_place_stacks(char *base, size_t size,
+				   struct smm_loader_params *params)
+{
+	size_t total_stack_size;
+	char *stacks_top;
+
+	/* If stack space is requested assume the space lives in the lower
+	 * half of SMRAM. */
+	total_stack_size = params->per_cpu_stack_size *
+			   params->num_concurrent_stacks;
+	printk(BIOS_DEBUG, "%s: cpus: %zx : stack space: needed -> %zx\n",
+		__func__, params->num_concurrent_stacks,
+		total_stack_size);
+	printk(BIOS_DEBUG, "  available -> %zx : per_cpu_stack_size : %zx\n",
+		size, params->per_cpu_stack_size);
+
+	/* There has to be at least one stack user. */
+	if (params->num_concurrent_stacks < 1)
+		return NULL;
+
+	/* Total stack size cannot fit. */
+	if (total_stack_size > size)
+		return NULL;
+
+	/* Stacks extend down to SMBASE */
+	stacks_top = &base[total_stack_size];
+	printk(BIOS_DEBUG, "%s: exit, stack_top %p\n", __func__, stacks_top);
+
+	return stacks_top;
+}
+
+/*
+ * Place the staggered entry points for each CPU. The entry points are
+ * staggered by the per CPU SMM save state size extending down from
+ * SMM_ENTRY_OFFSET.
+ */
+static int smm_stub_place_staggered_entry_points(char *base,
+	const struct smm_loader_params *params, const struct rmodule *smm_stub)
+{
+	size_t stub_entry_offset;
+	int rc = 1;
+	stub_entry_offset = rmodule_entry_offset(smm_stub);
+	/* Each CPU now has its own stub code, which enters protected mode,
+	 * sets up the stack, and then jumps to common SMI handler
+	 */
+	if (params->num_concurrent_save_states > 1 || stub_entry_offset != 0) {
+		rc = smm_place_entry_code((unsigned int)base,
+			params->num_concurrent_save_states,
+			(unsigned int)params->stack_top, params);
+	}
+	return rc;
+}
+
+/*
+ * The stub setup code assumes it is completely contained within the
+ * default SMRAM size (0x10000) for the default SMI handler (entry at
+ * 0x30000), but no assumption should be made for the permanent SMI handler.
+ * The placement of CPU entry points for permanent handler are determined
+ * by the number of CPUs in the system and the amount of SMRAM.
+ * There are potentially 3 regions to place
+ * within the default SMRAM size:
+ * 1. Save state areas
+ * 2. Stub code
+ * 3. Stack areas
+ *
+ * The save state and smm stack are treated as contiguous for the number of
+ * concurrent areas requested. The save state always lives at the top of the
+ * the CPUS smbase (and the entry point is at offset 0x8000). This allows only a certain
+ * number of CPUs with staggered entry points until the save state area comes
+ * down far enough to overwrite/corrupt the entry code (stub code). Therefore,
+ * an SMM map is created to avoid this corruption, see smm_create_map() above.
+ * This module setup code works for the default (0x30000) SMM handler setup and the
+ * permanent SMM handler.
+ */
+static int smm_module_setup_stub(void *smbase, size_t smm_size,
+				 struct smm_loader_params *params,
+				 void *fxsave_area)
+{
+	size_t total_save_state_size;
+	size_t smm_stub_size;
+	size_t stub_entry_offset;
+	char *smm_stub_loc;
+	void *stacks_top;
+	size_t size;
+	char *base;
+	size_t i;
+	struct smm_stub_params *stub_params;
+	struct rmodule smm_stub;
+	unsigned int total_size_all;
+	base = smbase;
+	size = smm_size;
+
+	/* The number of concurrent stacks cannot exceed CONFIG_MAX_CPUS. */
+	if (params->num_concurrent_stacks > CONFIG_MAX_CPUS) {
+		printk(BIOS_ERR, "%s: not enough stacks\n", __func__);
+		return -1;
+	}
+
+	/* Fail if can't parse the smm stub rmodule. */
+	if (rmodule_parse(&_binary_smmstub_start, &smm_stub)) {
+		printk(BIOS_ERR, "%s: unable to parse smm stub\n", __func__);
+		return -1;
+	}
+
+	/* Adjust remaining size to account for save state. */
+	total_save_state_size = params->per_cpu_save_state_size *
+				params->num_concurrent_save_states;
+	if (total_save_state_size > size) {
+		printk(BIOS_ERR,
+			"%s: more state save space needed:need -> %zx:available->%zx\n",
+			__func__, total_save_state_size, size);
+		return -1;
+	}
+
+	size -= total_save_state_size;
+
+	/* The save state size encroached over the first SMM entry point. */
+	if (size <= params->smm_main_entry_offset) {
+		printk(BIOS_ERR, "%s: encroachment over SMM entry point\n", __func__);
+		printk(BIOS_ERR, "%s: state save size: %zx : smm_entry_offset -> %x\n",
+			__func__, size, params->smm_main_entry_offset);
+		return -1;
+	}
+
+	/* Need a minimum stack size and alignment. */
+	if (params->per_cpu_stack_size <= SMM_MINIMUM_STACK_SIZE ||
+	    (params->per_cpu_stack_size & 3) != 0) {
+		printk(BIOS_ERR, "%s: need minimum stack size\n", __func__);
+		return -1;
+	}
+
+	smm_stub_loc = NULL;
+	smm_stub_size = rmodule_memory_size(&smm_stub);
+	stub_entry_offset = rmodule_entry_offset(&smm_stub);
+
+	/* Put the stub at the main entry point */
+	smm_stub_loc = &base[params->smm_main_entry_offset];
+
+	/* Stub is too big to fit. */
+	if (smm_stub_size > (size - params->smm_main_entry_offset)) {
+		printk(BIOS_ERR, "%s: stub is too big to fit\n", __func__);
+		return -1;
+	}
+
+	/* The stacks, if requested, live in the lower half of SMRAM space
+	 * for default handler, but for relocated handler it lives at the beginning
+	 * of SMRAM which is TSEG base
+	 */
+	size = params->num_concurrent_stacks * params->per_cpu_stack_size;
+	stacks_top = smm_stub_place_stacks((char *)params->smram_start, size, params);
+	if (stacks_top == NULL) {
+		printk(BIOS_ERR, "%s: not enough space for stacks\n", __func__);
+		printk(BIOS_ERR, "%s: ....need -> %p : available -> %zx\n", __func__,
+			base, size);
+		return -1;
+	}
+	params->stack_top = stacks_top;
+	/* Load the stub. */
+	if (rmodule_load(smm_stub_loc, &smm_stub)) {
+		printk(BIOS_ERR, "%s: load module failed\n", __func__);
+		return -1;
+	}
+
+	if (!smm_stub_place_staggered_entry_points(base, params, &smm_stub)) {
+		printk(BIOS_ERR, "%s: staggered entry points failed\n", __func__);
+		return -1;
+	}
+
+	/* Setup the parameters for the stub code. */
+	stub_params = rmodule_parameters(&smm_stub);
+	stub_params->stack_top = (uintptr_t)stacks_top;
+	stub_params->stack_size = params->per_cpu_stack_size;
+	stub_params->c_handler = (uintptr_t)params->handler;
+	stub_params->c_handler_arg = (uintptr_t)params->handler_arg;
+	stub_params->fxsave_area = (uintptr_t)fxsave_area;
+	stub_params->fxsave_area_size = FXSAVE_SIZE;
+	stub_params->runtime.smbase = (uintptr_t)smbase;
+	stub_params->runtime.smm_size = smm_size;
+	stub_params->runtime.save_state_size = params->per_cpu_save_state_size;
+	stub_params->runtime.num_cpus = params->num_concurrent_stacks;
+
+	printk(BIOS_DEBUG, "%s: stack_end = 0x%x\n",
+		__func__, stub_params->runtime.smbase);
+	printk(BIOS_DEBUG,
+		"%s: stack_top = 0x%x\n", __func__, stub_params->stack_top);
+	printk(BIOS_DEBUG, "%s: stack_size = 0x%x\n",
+		__func__, stub_params->stack_size);
+	printk(BIOS_DEBUG, "%s: runtime.smbase = 0x%x\n",
+		__func__, stub_params->runtime.smbase);
+	printk(BIOS_DEBUG, "%s: runtime.start32_offset = 0x%x\n", __func__,
+		stub_params->runtime.start32_offset);
+	printk(BIOS_DEBUG, "%s: runtime.smm_size = 0x%zx\n",
+		__func__, smm_size);
+	printk(BIOS_DEBUG, "%s: per_cpu_save_state_size = 0x%x\n",
+		__func__, stub_params->runtime.save_state_size);
+	printk(BIOS_DEBUG, "%s: num_cpus = 0x%x\n", __func__,
+		stub_params->runtime.num_cpus);
+	printk(BIOS_DEBUG, "%s: total_save_state_size = 0x%x\n",
+		__func__, (stub_params->runtime.save_state_size *
+		stub_params->runtime.num_cpus));
+	total_size_all = stub_params->stack_size +
+		(stub_params->runtime.save_state_size *
+		stub_params->runtime.num_cpus);
+	printk(BIOS_DEBUG, "%s: total_size_all = 0x%x\n", __func__,
+		total_size_all);
+
+	/* Initialize the APIC id to CPU number table to be 1:1 */
+	for (i = 0; i < params->num_concurrent_stacks; i++)
+		stub_params->runtime.apic_id_to_cpu[i] = i;
+
+	/* Allow the initiator to manipulate SMM stub parameters. */
+	params->runtime = &stub_params->runtime;
+
+	printk(BIOS_DEBUG, "SMM Module: stub loaded at %p. Will call %p(%p)\n",
+	       smm_stub_loc, params->handler, params->handler_arg);
+	return 0;
+}
+
+/*
+ * smm_setup_relocation_handler assumes the callback is already loaded in
+ * memory. i.e. Another SMM module isn't chained to the stub. The other
+ * assumption is that the stub will be entered from the default SMRAM
+ * location: 0x30000 -> 0x40000.
+ */
+int smm_setup_relocation_handler(struct smm_loader_params *params)
+{
+	void *smram = (void *)(SMM_DEFAULT_BASE);
+	printk(BIOS_SPEW, "%s: enter\n", __func__);
+	/* There can't be more than 1 concurrent save state for the relocation
+	 * handler because all CPUs default to 0x30000 as SMBASE. */
+	if (params->num_concurrent_save_states > 1)
+		return -1;
+
+	/* A handler has to be defined to call for relocation. */
+	if (params->handler == NULL)
+		return -1;
+
+	/* Since the relocation handler always uses stack, adjust the number
+	 * of concurrent stack users to be CONFIG_MAX_CPUS. */
+	if (params->num_concurrent_stacks == 0)
+		params->num_concurrent_stacks = CONFIG_MAX_CPUS;
+
+	params->smm_main_entry_offset = SMM_ENTRY_OFFSET;
+	params->smram_start = SMM_DEFAULT_BASE;
+	params->smram_end = SMM_DEFAULT_BASE + SMM_DEFAULT_SIZE;
+	return smm_module_setup_stub(smram, SMM_DEFAULT_SIZE,
+				params, fxsave_area_relocation);
+	printk(BIOS_SPEW, "%s: exit\n", __func__);
+}
+
+/*
+ *The SMM module is placed within the provided region in the following
+ * manner:
+ * +-----------------+ <- smram + size
+ * | BIOS resource   |
+ * | list (STM)      |
+ * +-----------------+
+ * |  fxsave area    |
+ * +-----------------+
+ * |  smi handler    |
+ * |      ...        |
+ * +-----------------+ <- cpu0
+ * |    stub code    | <- cpu1
+ * |    stub code    | <- cpu2
+ * |    stub code    | <- cpu3, etc
+ * |                 |
+ * |                 |
+ * |                 |
+ * |    stacks       |
+ * +-----------------+ <- smram start
+
+ * It should be noted that this algorithm will not work for
+ * SMM_DEFAULT_SIZE SMRAM regions such as the A segment. This algorithm
+ * expects a region large enough to encompass the handler and stacks
+ * as well as the SMM_DEFAULT_SIZE.
+ */
+int smm_load_module(void *smram, size_t size, struct smm_loader_params *params)
+{
+	struct rmodule smm_mod;
+	size_t total_stack_size;
+	size_t handler_size;
+	size_t module_alignment;
+	size_t alignment_size;
+	size_t fxsave_size;
+	void *fxsave_area;
+	size_t total_size = 0;
+	char *base;
+
+	if (size <= SMM_DEFAULT_SIZE)
+		return -1;
+
+	/* Load main SMI handler at the top of SMRAM
+	 * everything else will go below
+	 */
+	base = smram;
+	base += size;
+	params->smram_start = (uintptr_t)smram;
+	params->smram_end = params->smram_start + size;
+	params->smm_main_entry_offset = SMM_ENTRY_OFFSET;
+
+	/* Fail if can't parse the smm rmodule. */
+	if (rmodule_parse(&_binary_smm_start, &smm_mod))
+		return -1;
+
+	/* Clear SMM region */
+	if (CONFIG(DEBUG_SMI))
+		memset(smram, 0xcd, size);
+
+	total_stack_size = params->per_cpu_stack_size *
+			   params->num_concurrent_stacks;
+	total_size += total_stack_size;
+	/* Stacks are the base of SMRAM */
+	params->stack_top = smram + total_stack_size;
+
+	/* MSEG starts at the top of SMRAM and works down */
+	if (CONFIG(STM)) {
+		base -= CONFIG_MSEG_SIZE + CONFIG_BIOS_RESOURCE_LIST_SIZE;
+		total_size += CONFIG_MSEG_SIZE + CONFIG_BIOS_RESOURCE_LIST_SIZE;
+	}
+
+	/* FXSAVE goes below MSEG */
+	if (CONFIG(SSE)) {
+		fxsave_size = FXSAVE_SIZE * params->num_concurrent_stacks;
+		fxsave_area = base - fxsave_size;
+		base -= fxsave_size;
+		total_size += fxsave_size;
+	} else {
+		fxsave_size = 0;
+		fxsave_area = NULL;
+	}
+
+
+	handler_size = rmodule_memory_size(&smm_mod);
+	base -= handler_size;
+	total_size += handler_size;
+	module_alignment = rmodule_load_alignment(&smm_mod);
+	alignment_size = module_alignment -
+				((uintptr_t)base % module_alignment);
+	if (alignment_size != module_alignment) {
+		handler_size += alignment_size;
+		base += alignment_size;
+	}
+
+	printk(BIOS_DEBUG,
+		"%s: total_smm_space_needed %zx, available -> %zx\n",
+		 __func__, total_size, size);
+
+	/* Does the required amount of memory exceed the SMRAM region size? */
+	if (total_size > size) {
+		printk(BIOS_ERR, "%s: need more SMRAM\n", __func__);
+		return -1;
+	}
+	if (handler_size > SMM_CODE_SEGMENT_SIZE) {
+		printk(BIOS_ERR, "%s: increase SMM_CODE_SEGMENT_SIZE: handler_size = %zx\n",
+			__func__, handler_size);
+		return -1;
+	}
+
+	if (rmodule_load(base, &smm_mod))
+		return -1;
+
+	params->handler = rmodule_entry(&smm_mod);
+	params->handler_arg = rmodule_parameters(&smm_mod);
+
+	printk(BIOS_DEBUG, "%s: smram_start: 0x%p\n",
+		 __func__, smram);
+	printk(BIOS_DEBUG, "%s: smram_end: %p\n",
+		 __func__, smram + size);
+	printk(BIOS_DEBUG, "%s: stack_top: %p\n",
+		 __func__, params->stack_top);
+	printk(BIOS_DEBUG, "%s: handler start %p\n",
+		 __func__, params->handler);
+	printk(BIOS_DEBUG, "%s: handler_size %zx\n",
+		 __func__, handler_size);
+	printk(BIOS_DEBUG, "%s: handler_arg %p\n",
+		 __func__, params->handler_arg);
+	printk(BIOS_DEBUG, "%s: fxsave_area %p\n",
+		 __func__, fxsave_area);
+	printk(BIOS_DEBUG, "%s: fxsave_size %zx\n",
+		 __func__, fxsave_size);
+	printk(BIOS_DEBUG, "%s: CONFIG_MSEG_SIZE 0x%x\n",
+		 __func__, CONFIG_MSEG_SIZE);
+	printk(BIOS_DEBUG, "%s: CONFIG_BIOS_RESOURCE_LIST_SIZE 0x%x\n",
+		 __func__, CONFIG_BIOS_RESOURCE_LIST_SIZE);
+
+	/* CPU 0 smbase goes first, all other CPUs
+	 * will be staggered below
+	 */
+	base -= SMM_CODE_SEGMENT_SIZE;
+	printk(BIOS_DEBUG, "%s: cpu0 entry: %p\n",
+		 __func__, base);
+	params->smm_entry = (uintptr_t)base + params->smm_main_entry_offset;
+	return smm_module_setup_stub(base, size, params, fxsave_area);
+}
diff --git a/src/include/cpu/x86/smm.h b/src/include/cpu/x86/smm.h
index a3101e5155..db63e8be25 100644
--- a/src/include/cpu/x86/smm.h
+++ b/src/include/cpu/x86/smm.h
@@ -128,6 +128,12 @@ static inline bool smm_points_to_smram(const void *ptr, const size_t len)
  *             into this field so the code doing the loading can manipulate the
  *             runtime's assumptions. e.g. updating the APIC id to CPU map to
  *             handle sparse APIC id space.
+ * The following parameters are only used when X86_SMM_LOADER_VERSION2 is enabled.
+ * - smm_entry - entry address of first CPU thread, all others will be tiled
+ *               below this address.
+ * - smm_main_entry_offset - default entry offset (e.g 0x8000)
+ * - smram_start - smaram starting address
+ * - smram_end - smram ending address
  */
 struct smm_loader_params {
 	void *stack_top;
@@ -141,12 +147,24 @@ struct smm_loader_params {
 	void *handler_arg;
 
 	struct smm_runtime *runtime;
+
+	/* The following are only used by X86_SMM_LOADER_VERSION2 */
+#if CONFIG(X86_SMM_LOADER_VERSION2)
+	unsigned int smm_entry;
+	unsigned int smm_main_entry_offset;
+	unsigned int smram_start;
+	unsigned int smram_end;
+#endif
 };
 
 /* Both of these return 0 on success, < 0 on failure. */
 int smm_setup_relocation_handler(struct smm_loader_params *params);
 int smm_load_module(void *smram, size_t size, struct smm_loader_params *params);
 
+#if CONFIG(X86_SMM_LOADER_VERSION2)
+u32 smm_get_cpu_smbase(unsigned int cpu_num);
+#endif
+
 /* Backup and restore default SMM region. */
 void *backup_default_smm_area(void);
 void restore_default_smm_area(void *smm_save_area);
author	Rocky Phagura <rphagura@fb.com>	2020-07-21 14:48:48 -0700
committer	David Hendricks <david.hendricks@gmail.com>	2020-08-15 02:16:32 +0000
commit	afb7a814783cda12f5b72167163b9109ee1d15a7 (patch)
tree	191e6a067c12da5fda30bed060fbe5cfe89891e0
parent	5b52592773fce8ba33a18380074b7dcdba7721b4 (diff)