From f4e3f15b44f0d0e117781d194f700cc19b3a88c7 Mon Sep 17 00:00:00 2001
From: Hsuan Ting Chen <roccochen@chromium.org>
Date: Thu, 6 Jul 2023 15:58:42 +0800
Subject: lib: Introduce new parsing rules for ux_locales.c

Introduce new parsing rules for ux_locales.c:ux_locales_get_text():
* Add a version byte: PRERAM_LOCALES_VERSION_BYTE in the beginning. This
  provides more flexibility if we want to change the format of
  preram_locales region.
* Add a new delimiter 0x01 between two string_names. This could fix the
  issue that 'string_name' and 'localized_string' might be the same.

Also fix two bugs:
1. We would search for the language ID exceeding the range of current
   string_name.
2. In 'move_next()', we would exceed the 'size' due to the unconditional
   increase of offset.

Finally, make some minor improvements to some existing comments.

BUG=b:264666392, b:289995591
BRANCH=brya
TEST=emerge-brya coreboot chromeos-bootimage

Signed-off-by: Hsuan Ting Chen <roccochen@chromium.org>
Change-Id: Ic0916a0badd7071fa2c43ee9cfc76ca5e79dbf8f
Reviewed-on: https://review.coreboot.org/c/coreboot/+/76320
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Yu-Ping Wu <yupingso@google.com>
---
 src/lib/ux_locales.c | 91 ++++++++++++++++++++++++++++++++--------------------
 1 file changed, 57 insertions(+), 34 deletions(-)

(limited to 'src')

diff --git a/src/lib/ux_locales.c b/src/lib/ux_locales.c
index 86126195b4..a2e38fc1dc 100644
--- a/src/lib/ux_locales.c
+++ b/src/lib/ux_locales.c
@@ -10,8 +10,15 @@
 
 #define LANG_ID_MAX 100
 #define LANG_ID_LEN 3
+
+#define PRERAM_LOCALES_VERSION_BYTE 0x01
 #define PRERAM_LOCALES_NAME "preram_locales"
 
+/* We need different delimiters to deal with the case where 'string_name' is the same as
+   'localized_string'. */
+#define DELIM_STR 0x00
+#define DELIM_NAME 0x01
+
 /*
  * Devices which support early vga have the capability to show localized text in
  * Code Page 437 encoding. (see src/drivers/pc80/vga/vga_font_8x16.c)
@@ -19,9 +26,11 @@
  * preram_locales located in CBFS is an uncompressed file located in either RO
  * or RW CBFS. It contains the localization information in the following format:
  *
+ * [PRERAM_LOCALES_VERSION_BYTE]
  * [string_name_1] [\x00]
  * [language_id_1] [\x00] [localized_string_1] [\x00]
  * [language_id_2] [\x00] [localized_string_2] [\x00] ...
+ * [\x01]
  * [string_name_2] [\x00] ...
  *
  * This file contains tools to locate the file and search for localized strings
@@ -62,31 +71,36 @@ static void *locales_get_map(size_t *size_out, bool unmap)
 	return cached_state.data;
 }
 
-/* Move to the next string in the data. Strings are separated by 0x00. */
-static size_t move_next(const char *data, size_t offset, size_t size)
+/* Move to the next string in the data. Strings are separated by delim. */
+static size_t move_next(const char *data, size_t offset, size_t size, char delim)
 {
-	if (offset >= size)
-		return size;
-	while (offset < size && data[offset] != '\0')
+	while (offset < size && data[offset] != delim)
+		offset++;
+	/* If we found delim, move to the start of the next string. */
+	if (offset < size)
 		offset++;
-	offset++;
 	return offset;
 }
 
-/* Find the next occurrence of the specific string. */
+/* Find the next occurrence of the specific string. Strings are separated by delim. */
 static size_t search_for(const char *data, size_t offset, size_t size,
-			 const char *str)
+			 const char *str, char delim)
 {
-	if (offset >= size)
-		return size;
 	while (offset < size) {
 		if (!strncmp(data + offset, str, size - offset))
 			return offset;
-		offset = move_next(data, offset, size);
+		offset = move_next(data, offset, size, delim);
 	}
 	return size;
 }
 
+/* Find the next occurrence of the string_name, which should always follow a DELIM_NAME. */
+static inline size_t search_for_name(const char *data, size_t offset, size_t size,
+				     const char *name)
+{
+	return search_for(data, offset, size, name, DELIM_NAME);
+}
+
 /* Find the next occurrence of the integer ID, where ID is less than 100. */
 static size_t search_for_id(const char *data, size_t offset, size_t size,
 			    int id)
@@ -95,17 +109,18 @@ static size_t search_for_id(const char *data, size_t offset, size_t size,
 		return offset;
 	char int_to_str[LANG_ID_LEN] = {};
 	snprintf(int_to_str, LANG_ID_LEN, "%d", id);
-	return search_for(data, offset, size, int_to_str);
+	return search_for(data, offset, size, int_to_str, DELIM_STR);
 }
 
 const char *ux_locales_get_text(const char *name)
 {
 	const char *data;
-	size_t size, offset, name_offset, next;
+	size_t size, offset, name_offset, next_name_offset, next;
 	uint32_t lang_id;
+	unsigned char version;
 
 	data = locales_get_map(&size, false);
-	if (!data) {
+	if (!data || size == 0) {
 		printk(BIOS_ERR, "%s: %s not found.\n", __func__,
 		       PRERAM_LOCALES_NAME);
 		return NULL;
@@ -123,40 +138,48 @@ const char *ux_locales_get_text(const char *name)
 	printk(BIOS_INFO, "%s: Search for %s with language ID: %u\n",
 	       __func__, name, lang_id);
 
-	/* Search for name. */
-	offset = search_for(data, 0, size, name);
+	/* Check if the version byte is the expected version. */
+	version = (unsigned char)data[0];
+	if (version != PRERAM_LOCALES_VERSION_BYTE) {
+		printk(BIOS_ERR, "%s: The version %u is not the expected one %u\n",
+		       __func__, version, PRERAM_LOCALES_VERSION_BYTE);
+		return NULL;
+	}
+
+	/* Search for name. Skip the version byte. */
+	offset = search_for_name(data, 1, size, name);
 	if (offset >= size) {
 		printk(BIOS_ERR, "%s: Name %s not found.\n", __func__, name);
 		return NULL;
 	}
 	name_offset = offset;
 
-	/* Search for language ID. */
-	offset = search_for_id(data,  name_offset, size, lang_id);
-	/* Language ID not supported; fallback to English if the current
-	 * language is not English (0). */
-	if (offset >= size) {
-		/*
-		 * Since we only support a limited charset, it is very normal
-		 * that a language is not supported and we fallback here
-		 * silently.
-		 */
+	/* Search for language ID. We should not search beyond the range of the current
+	   string_name. */
+	next_name_offset = move_next(data, offset, size, DELIM_NAME);
+	assert(next_name_offset <= size);
+	offset = search_for_id(data,  name_offset, next_name_offset, lang_id);
+	/* Language ID not supported; fallback to English if the current language is not
+	   English (0). */
+	if (offset >= next_name_offset) {
+		/* Since we only support a limited charset, it is very normal that a language
+		   is not supported and we fallback here silently. */
 		if (lang_id != 0)
-			offset = search_for_id(data, name_offset, size, 0);
-		if (offset >= size) {
-			printk(BIOS_ERR, "%s: Neither %d nor 0 found.\n",
-			       __func__, lang_id);
+			offset = search_for_id(data, name_offset, next_name_offset, 0);
+		if (offset >= next_name_offset) {
+			printk(BIOS_ERR, "%s: Neither %d nor 0 found.\n", __func__, lang_id);
 			return NULL;
 		}
 	}
 
-	offset = move_next(data, offset, size);
-	if (offset >= size)
+	/* Move to the corresponding localized_string. */
+	offset = move_next(data, offset, next_name_offset, DELIM_STR);
+	if (offset >= next_name_offset)
 		return NULL;
 
 	/* Validity check that the returned string must be NULL terminated. */
-	next = move_next(data, offset, size) - 1;
-	if (next >= size || data[next] != '\0') {
+	next = move_next(data, offset, next_name_offset, DELIM_STR) - 1;
+	if (next >= next_name_offset || data[next] != '\0') {
 		printk(BIOS_ERR, "%s: %s is not NULL terminated.\n",
 		       __func__, PRERAM_LOCALES_NAME);
 		return NULL;
-- 
cgit v1.2.3