5 files changed, 85 insertions, 46 deletions
diff --git a/src/arch/riscv/Kconfig b/src/arch/riscv/Kconfig
index 148d966059..b7fc0cab01 100644
--- a/src/arch/riscv/Kconfig
+++ b/src/arch/riscv/Kconfig
@@ -134,4 +134,14 @@ config RISCV_SOC_HAS_MENVCFG
 	bool
 	default y
 
+config RISCV_GET_HART_COUNT_AT_RUNTIME
+	bool
+	default n
+	help
+	  Usually RISC-V SOCs have a specific amount of harts (CONFIG_MAX_CPUS).
+	  It is however possible that the amount of harts can only be known at runtime.
+	  This is for example the case for socketed and for emulation systems.
+	  SOC/Mainboards select this option in case the number of harts is not known at
+	  build time. In this case the SOC must have a scheme in place to discover all harts.
+
 endif # if ARCH_RISCV
diff --git a/src/arch/riscv/include/arch/smp/smp.h b/src/arch/riscv/include/arch/smp/smp.h
index 9d3ae5f92b..758c4de18f 100644
--- a/src/arch/riscv/include/arch/smp/smp.h
+++ b/src/arch/riscv/include/arch/smp/smp.h
@@ -3,6 +3,8 @@
 #ifndef _RISCV_SMP_H
 #define _RISCV_SMP_H
 
+unsigned int smp_get_hart_count(void);
+
 /*
  * This function is used to pause smp. Only the hart with hartid equal
  * to working_hartid can be returned from smp_pause, other harts will
diff --git a/src/arch/riscv/include/mcall.h b/src/arch/riscv/include/mcall.h
index c6ed7d804e..69eb5741ef 100644
--- a/src/arch/riscv/include/mcall.h
+++ b/src/arch/riscv/include/mcall.h
@@ -6,11 +6,11 @@
 // NOTE: this is the size of struct hls below. A static_assert would be
 // nice to have.
 #if __riscv_xlen == 64
-#define HLS_SIZE 88
+#define HLS_SIZE 96
 #endif
 
 #if __riscv_xlen == 32
-#define HLS_SIZE 52
+#define HLS_SIZE 56
 #endif
 
 /* We save 37 registers, currently. */
@@ -42,6 +42,7 @@ struct hls {
 	struct sbi_device_message *device_response_queue_head;
 	struct sbi_device_message *device_response_queue_tail;
 
+	int enabled;
 	int hart_id;
 	int ipi_pending;
 	uint64_t *timecmp;
diff --git a/src/arch/riscv/mcall.c b/src/arch/riscv/mcall.c
index 7f846bd83e..a775c91e29 100644
--- a/src/arch/riscv/mcall.c
+++ b/src/arch/riscv/mcall.c
@@ -9,6 +9,7 @@ void hls_init(uint32_t hart_id, void *fdt)
 	memset(HLS(), 0, sizeof(*HLS()));
 	HLS()->fdt = fdt;
 	HLS()->hart_id = hart_id;
+	HLS()->enabled = 1;
 
 	mtime_init();
 }
diff --git a/src/arch/riscv/smp.c b/src/arch/riscv/smp.c
index 0a93763cb0..67dc13b8fc 100644
--- a/src/arch/riscv/smp.c
+++ b/src/arch/riscv/smp.c
@@ -7,68 +7,93 @@
 #include <console/console.h>
 #include <mcall.h>
 
+// made up value to sync hart state
+#define HART_SLEEPING 0x1
+#define HART_AWAKE    0x2
+
 void smp_pause(int working_hartid)
 {
-#define SYNCA (OTHER_HLS(working_hartid)->entry.sync_a)
-#define SYNCB (OTHER_HLS(working_hartid)->entry.sync_b)
-
 	int hartid = read_csr(mhartid);
 
+	// pause all harts which are not the working hart
 	if (hartid != working_hartid) {
-		/* waiting for work hart */
-		do {
-			barrier();
-		} while (atomic_read(&SYNCA) != 0x01234567);
-
-		clear_csr(mstatus, MSTATUS_MIE);
-		write_csr(mie, MIP_MSIP);
-
-		/* count how many cores enter the halt */
-		atomic_add(&SYNCB, 1);
+		clear_csr(mstatus, MSTATUS_MIE); // disable all interrupts
+		set_msip(hartid, 0); // clear pending interrupts
+		write_csr(mie, MIP_MSIP); // enable only IPI (for smp_resume)
+		barrier();
+		atomic_set(&HLS()->entry.sync_a, HART_SLEEPING); // mark the hart as sleeping.
 
+		// pause hart
 		do {
-			barrier();
-			__asm__ volatile ("wfi");
+			__asm__ volatile ("wfi"); // wait for interrupt
 		} while ((read_csr(mip) & MIP_MSIP) == 0);
-		set_msip(hartid, 0);
-		HLS()->entry.fn(HLS()->entry.arg);
-	} else {
-		/* Initialize the counter and
-		 * mark the work hart into smp_pause */
-		atomic_set(&SYNCB, 0);
-		atomic_set(&SYNCA, 0x01234567);
-
-		/* waiting for other Hart to enter the halt */
-		do {
-			barrier();
-		} while (atomic_read(&SYNCB) + 1 < CONFIG_MAX_CPUS);
 
-		/* initialize for the next call */
-		atomic_set(&SYNCA, 0);
-		atomic_set(&SYNCB, 0);
+		atomic_set(&HLS()->entry.sync_a, HART_AWAKE); // mark the hart as awake
+		HLS()->entry.fn(HLS()->entry.arg);
 	}
-#undef SYNCA
-#undef SYNCB
 }
 
+// must only be called by the WORKING_HARTID
 void smp_resume(void (*fn)(void *), void *arg)
 {
-	int hartid = read_csr(mhartid);
+	if (fn == NULL) {
+		printk(BIOS_ERR, "must pass a non-null function pointer\n");
+		return; // we can still boot with one hart
+	}
+
+	int working_hartid = read_csr(mhartid);
+
+	int hart_count = CONFIG_MAX_CPUS;
+	if (CONFIG(RISCV_GET_HART_COUNT_AT_RUNTIME))
+		hart_count = smp_get_hart_count();
 
-	if (fn == NULL)
-		die("must pass a non-null function pointer\n");
+	// check that all harts are present
 
-	for (int i = 0; i < CONFIG_MAX_CPUS; i++) {
-		OTHER_HLS(i)->entry.fn = fn;
-		OTHER_HLS(i)->entry.arg = arg;
+	u32 count_awake_harts = 0;
+	for (int i = 0; i < hart_count; i++) {
+		// The working hart never sleeps. It is a hard working hart.
+		if (i == working_hartid)
+			continue;
+
+		if (atomic_read(&OTHER_HLS(i)->entry.sync_a) != HART_SLEEPING) {
+			/*
+			 * we assmue here that the time between smp_pause and smp_resume
+			 * is enough for all harts to reach the smp_pause state.
+			 * But for some reason that was not the case for this hart ...
+			 */
+			printk(BIOS_ERR, "hart %d did not enter smp_pause\n", i);
+			OTHER_HLS(i)->enabled = 0; // disable hart
+		} else {
+			// hart is in wfi (wait for interrupt) state like it should be.
+
+			OTHER_HLS(i)->entry.fn = fn;
+			OTHER_HLS(i)->entry.arg = arg;
+			barrier();
+			set_msip(i, 1); // wake up hart
+		}
 	}
 
-	for (int i = 0; i < CONFIG_MAX_CPUS; i++)
-		if (i != hartid)
-			set_msip(i, 1);
+	printk(BIOS_DEBUG, "waiting for all harts to wake up...\n");
+	// confirm that all harts are wake
+	for (int i = 0; i < hart_count; i++) {
+		// The working hart never sleeps. It is a hard working hart.
+		if (i == working_hartid || !OTHER_HLS(i)->enabled)
+			continue;
 
-	if (HLS()->entry.fn == NULL)
-		die("entry fn not set\n");
+		// wait for hart to publish its waking state
+		while (atomic_read(&OTHER_HLS(i)->entry.sync_a) != HART_AWAKE)
+			;
+		count_awake_harts++;
+	}
+	printk(BIOS_DEBUG, "all harts up and running...\n");
 
-	HLS()->entry.fn(HLS()->entry.arg);
+	if ((hart_count - 1) != count_awake_harts) { // exclude working hart
+		/*
+		 * Apparently one or more harts did not reach smp_pause before smp_resume has
+		 * been called by the working hart. That should not happen and may indicate we
+		 * need a timeout of sorts to make sure we get all harts resumed.
+		 */
+		printk(BIOS_ERR, "some harts were too slow and could not resume\n");
+	}
+	fn(arg); // jump to fn with working hart
 }