summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Zhang <jonzhang@fb.com>2020-10-28 11:35:40 -0700
committerMarc Jones <marc@marcjonesconsulting.com>2020-10-30 18:53:20 +0000
commitcbbce66baf6fccb7d0fb4f2bf9cb6e456c432c49 (patch)
treeeb6533a86e0efce9c5889de6dda73dd766bf0d05
parent2e9315c4c666f7c49a90298723ec043f79371602 (diff)
cpu/x86: increase timeout for CPUs to check in after 2nd SIPI
Increase timeout for CPUs to check in after 2nd SIPI completion from 10ms to 100ms. Update logging level for mp init failure cases from BIOS_DEBUG to BIOS_ERR. Without this patch, "mp initialization failure" happens on some reboots on DeltaLake server. As consequence, not all 52 cpus come up in Linux: [root@localhost ~]# lscpu ... CPU(s): 40 Also following Hardware Errors are seen: [ 4.365762] mce: [Hardware Error]: Machine check events logged [ 4.366565] mce: [Hardware Error]: CPU 0: Machine Check: 0 Bank 9: ee2000000003110a [ 4.367561] mce: [Hardware Error]: TSC 0 ADDR fe9e0000 MISC 228aa040101086 [ 4.368563] mce: [Hardware Error]: PROCESSOR 0:5065b TIME 948438164 SOCKET 0 APIC 0 microcode 700001d With this patch, no such failure is observed with 370 reboots. Signed-off-by: Tim Chu <Tim.Chu@quantatw.com> Signed-off-by: Jonathan Zhang <jonzhang@fb.com> Change-Id: Iab10f116dd4af152c24d5d8f999928c038a5b208 Reviewed-on: https://review.coreboot.org/c/coreboot/+/46898 Reviewed-by: Arthur Heymans <arthur@aheymans.xyz> Reviewed-by: Angel Pons <th3fanbus@gmail.com> Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
-rw-r--r--src/cpu/x86/mp_init.c14
1 files changed, 7 insertions, 7 deletions
diff --git a/src/cpu/x86/mp_init.c b/src/cpu/x86/mp_init.c
index 4870529cfe..db212624d0 100644
--- a/src/cpu/x86/mp_init.c
+++ b/src/cpu/x86/mp_init.c
@@ -434,7 +434,7 @@ static int start_aps(struct bus *cpu_bus, int ap_count, atomic_t *num_aps)
if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
printk(BIOS_DEBUG, "Waiting for ICR not to be busy...");
if (apic_wait_timeout(1000 /* 1 ms */, 50)) {
- printk(BIOS_DEBUG, "timed out. Aborting.\n");
+ printk(BIOS_ERR, "timed out. Aborting.\n");
return -1;
}
printk(BIOS_DEBUG, "done.\n");
@@ -451,7 +451,7 @@ static int start_aps(struct bus *cpu_bus, int ap_count, atomic_t *num_aps)
if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
printk(BIOS_DEBUG, "Waiting for ICR not to be busy...");
if (apic_wait_timeout(1000 /* 1 ms */, 50)) {
- printk(BIOS_DEBUG, "timed out. Aborting.\n");
+ printk(BIOS_ERR, "timed out. Aborting.\n");
return -1;
}
printk(BIOS_DEBUG, "done.\n");
@@ -462,7 +462,7 @@ static int start_aps(struct bus *cpu_bus, int ap_count, atomic_t *num_aps)
LAPIC_DM_STARTUP | sipi_vector);
printk(BIOS_DEBUG, "Waiting for 1st SIPI to complete...");
if (apic_wait_timeout(10000 /* 10 ms */, 50 /* us */)) {
- printk(BIOS_DEBUG, "timed out.\n");
+ printk(BIOS_ERR, "timed out.\n");
return -1;
}
printk(BIOS_DEBUG, "done.\n");
@@ -477,7 +477,7 @@ static int start_aps(struct bus *cpu_bus, int ap_count, atomic_t *num_aps)
if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
printk(BIOS_DEBUG, "Waiting for ICR not to be busy...");
if (apic_wait_timeout(1000 /* 1 ms */, 50)) {
- printk(BIOS_DEBUG, "timed out. Aborting.\n");
+ printk(BIOS_ERR, "timed out. Aborting.\n");
return -1;
}
printk(BIOS_DEBUG, "done.\n");
@@ -488,14 +488,14 @@ static int start_aps(struct bus *cpu_bus, int ap_count, atomic_t *num_aps)
LAPIC_DM_STARTUP | sipi_vector);
printk(BIOS_DEBUG, "Waiting for 2nd SIPI to complete...");
if (apic_wait_timeout(10000 /* 10 ms */, 50 /* us */)) {
- printk(BIOS_DEBUG, "timed out.\n");
+ printk(BIOS_ERR, "timed out.\n");
return -1;
}
printk(BIOS_DEBUG, "done.\n");
/* Wait for CPUs to check in. */
- if (wait_for_aps(num_aps, ap_count, 10000 /* 10 ms */, 50 /* us */)) {
- printk(BIOS_DEBUG, "Not all APs checked in: %d/%d.\n",
+ if (wait_for_aps(num_aps, ap_count, 100000 /* 100 ms */, 50 /* us */)) {
+ printk(BIOS_ERR, "Not all APs checked in: %d/%d.\n",
atomic_read(num_aps), ap_count);
return -1;
}