aboutsummaryrefslogtreecommitdiff
path: root/util/cbfstool/cbfs-mkstage.c
diff options
context:
space:
mode:
Diffstat (limited to 'util/cbfstool/cbfs-mkstage.c')
-rw-r--r--util/cbfstool/cbfs-mkstage.c347
1 files changed, 287 insertions, 60 deletions
diff --git a/util/cbfstool/cbfs-mkstage.c b/util/cbfstool/cbfs-mkstage.c
index dfc93c20e9..5c8014f072 100644
--- a/util/cbfstool/cbfs-mkstage.c
+++ b/util/cbfstool/cbfs-mkstage.c
@@ -28,46 +28,204 @@
#include "cbfs.h"
#include "elf.h"
-static unsigned int idemp(unsigned int x)
-{
- return x;
-}
+/*
+ * Short form: this is complicated, but we've tried making it simple
+ * and we keep hitting problems with our ELF parsing.
+ *
+ * The ELF parsing situation has always been a bit tricky. In fact,
+ * we (and most others) have been getting it wrong in small ways for
+ * years. Recently this has caused real trouble for the ARM V8 build.
+ * In this file we attempt to finally get it right for all variations
+ * of endian-ness and word size and target architectures and
+ * architectures we might get run on. Phew!. To do this we borrow a
+ * page from the FreeBSD NFS xdr model (see elf_ehdr and elf_phdr),
+ * the Plan 9 endianness functions (see xdr.c), and Go interfaces (see
+ * how we use buffer structs in this file). This ends up being a bit
+ * wordy at the lowest level, but greatly simplifies the elf parsing
+ * code and removes a common source of bugs, namely, forgetting to
+ * flip type endianness when referencing a struct member.
+ *
+ * ELF files can have four combinations of data layout: 32/64, and
+ * big/little endian. Further, to add to the fun, depending on the
+ * word size, the size of the ELF structs varies. The coreboot SELF
+ * format is simpler in theory: it's supposed to be always BE, and the
+ * various struct members allow room for growth: the entry point is
+ * always 64 bits, for example, so the size of a SELF struct is
+ * constant, regardless of target architecture word size. Hence, we
+ * need to do some transformation of the ELF files.
+ *
+ * A given architecture, realistically, only supports one of the four
+ * combinations at a time as the 'native' format. Hence, our code has
+ * been sprinkled with every variation of [nh]to[hn][sll] over the
+ * years. We've never quite gotten it all right, however, and a quick
+ * pass over this code revealed another bug. It's all worked because,
+ * until now, all the working platforms that had CBFS were 32 LE. Even then,
+ * however, bugs crept in: we recently realized that we're not
+ * transforming the entry point to big format when we store into the
+ * SELF image.
+ *
+ * The problem is essentially an XDR operation:
+ * we have something in a foreign format and need to transform it.
+ * It's most like XDR because:
+ * 1) the byte order can be wrong
+ * 2) the word size can be wrong
+ * 3) the size of elements in the stream depends on the value
+ * of other elements in the stream
+ * it's not like XDR because:
+ * 1) the byte order can be right
+ * 2) the word size can be right
+ * 3) the struct members are all on a natural alignment
+ *
+ * Hence, this new approach. To cover word size issues, we *always*
+ * transform the two structs we care about, the file header and
+ * program header, into a native struct in the 64 bit format:
+ *
+ * [32,little] -> [Elf64_Ehdr, Elf64_Phdr]
+ * [64,little] -> [Elf64_Ehdr, Elf64_Phdr]
+ * [32,big] -> [Elf64_Ehdr, Elf64_Phdr]
+ * [64,big] -> [Elf64_Ehdr, Elf64_Phdr]
+ * Then we just use those structs, and all the need for inline ntoh* goes away,
+ * as well as all the chances for error.
+ * This works because all the SELF structs have fields large enough for
+ * the largest ELF 64 struct members, and all the Elf64 struct members
+ * are at least large enough for all ELF 32 struct members.
+ * We end up with one function to do all our ELF parsing, and two functions
+ * to transform the headers. For the put case, we also have
+ * XDR functions, and hopefully we'll never again spend 5 years with the
+ * wrong endian-ness on an output value :-)
+ * This should work for all word sizes and endianness we hope to target.
+ * I *really* don't want to be here for 128 bit addresses.
+ *
+ * The parse functions are called with a pointer to an input buffer
+ * struct. One might ask: are there enough bytes in the input buffer?
+ * We know there need to be at *least* sizeof(Elf32_Ehdr) +
+ * sizeof(Elf32_Phdr) bytes. Realistically, there has to be some data
+ * too. If we start to worry, though we have not in the past, we
+ * might apply the simple test: the input buffer needs to be at least
+ * sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) bytes because, even if it's
+ * ELF 32, there's got to be *some* data! This is not theoretically
+ * accurate but it is actually good enough in practice. It allows the
+ * header transformation code to ignore the possibility of underrun.
+ *
+ * We also must accomodate different ELF files, and hence formats,
+ * in the same cbfs invocation. We might load a 64-bit payload
+ * on a 32-bit machine; we might even have a mixed armv7/armv8
+ * SOC or even a system with an x86/ARM!
+ *
+ * A possibly problematic (though unlikely to be so) assumption
+ * is that we expect the BIOS to remain in the lowest 32 bits
+ * of the physical address space. Since ARMV8 has standardized
+ * on that, and x86_64 also has, this seems a safe assumption.
+ *
+ * To repeat, ELF structs are different sizes because ELF struct
+ * members are different sizes, depending on values in the ELF file
+ * header. For this we use the functions defined in xdr.c, which
+ * consume bytes, convert the endianness, and advance the data pointer
+ * in the buffer struct.
+ */
-/* This is a wrapper around the swab32() macro to make it
- * usable for the current implementation of parse_elf_to_stage()
+/* Get the ident array, so we can figure out
+ * endian-ness, word size, and in future other useful
+ * parameters
*/
-static unsigned int swap32(unsigned int x)
+static void
+elf_eident(struct buffer *input, Elf64_Ehdr *ehdr)
{
- return swab32(x);
+ memmove(ehdr->e_ident, input->data, sizeof(ehdr->e_ident));
+ input->data += sizeof(ehdr->e_ident);
+ input->size -= sizeof(ehdr->e_ident);
}
-unsigned int (*elf32_to_native) (unsigned int) = idemp;
-/* returns size of result, or -1 if error */
-int parse_elf_to_stage(const struct buffer *input, struct buffer *output,
- comp_algo algo, uint32_t *location)
+static void
+elf_ehdr(struct buffer *input, Elf64_Ehdr *ehdr, struct xdr *xdr, int bit64)
{
- Elf32_Phdr *phdr;
- Elf32_Ehdr *ehdr = (Elf32_Ehdr *)input->data;
- char *header, *buffer;
-
- int headers;
- int i;
- struct cbfs_stage *stage;
- unsigned int data_start, data_end, mem_end;
+ ehdr->e_type = xdr->get16(input);
+ ehdr->e_machine = xdr->get16(input);
+ ehdr->e_version = xdr->get32(input);
+ if (bit64){
+ ehdr->e_entry = xdr->get64(input);
+ ehdr->e_phoff = xdr->get64(input);
+ ehdr->e_shoff = xdr->get64(input);
+ } else {
+ ehdr->e_entry = xdr->get32(input);
+ ehdr->e_phoff = xdr->get32(input);
+ ehdr->e_shoff = xdr->get32(input);
+ }
+ ehdr->e_flags = xdr->get32(input);
+ ehdr->e_ehsize = xdr->get16(input);
+ ehdr->e_phentsize = xdr->get16(input);
+ ehdr->e_phnum = xdr->get16(input);
+ ehdr->e_shentsize = xdr->get16(input);
+ ehdr->e_shnum = xdr->get16(input);
+ ehdr->e_shstrndx = xdr->get16(input);
+}
- int elf_bigendian = 0;
+static void
+elf_phdr(struct buffer *pinput, Elf64_Phdr *phdr,
+ int entsize, struct xdr *xdr, int bit64)
+{
+ /*
+ * The entsize need not be sizeof(*phdr).
+ * Hence, it is easier to keep a copy of the input,
+ * as the xdr functions may not advance the input
+ * pointer the full entsize; rather than get tricky
+ * we just advance it below.
+ */
+ struct buffer input = *pinput;
+ if (bit64){
+ phdr->p_type = xdr->get32(&input);
+ phdr->p_flags = xdr->get32(&input);
+ phdr->p_offset = xdr->get64(&input);
+ phdr->p_vaddr = xdr->get64(&input);
+ phdr->p_paddr = xdr->get64(&input);
+ phdr->p_filesz = xdr->get64(&input);
+ phdr->p_memsz = xdr->get64(&input);
+ phdr->p_align = xdr->get64(&input);
+ } else {
+ phdr->p_type = xdr->get32(&input);
+ phdr->p_offset = xdr->get32(&input);
+ phdr->p_vaddr = xdr->get32(&input);
+ phdr->p_paddr = xdr->get32(&input);
+ phdr->p_filesz = xdr->get32(&input);
+ phdr->p_memsz = xdr->get32(&input);
+ phdr->p_flags = xdr->get32(&input);
+ phdr->p_align = xdr->get32(&input);
+ }
+ pinput->size -= entsize;
+ pinput->data += entsize;
+}
- comp_func_ptr compress = compression_function(algo);
- if (!compress)
- return -1;
+/* Get the headers from the buffer.
+ * Return -1 in the event of an error.
+ */
+static int
+elf_headers(const struct buffer *pinput, Elf64_Ehdr *ehdr, Elf64_Phdr **pphdr)
+{
+ int i;
+ struct xdr *xdr = &xdr_le;
+ int bit64 = 0;
+ struct buffer input = *(struct buffer *)pinput;
+ struct buffer phdr_buf;
+ Elf64_Phdr *phdr;
- DEBUG("start: parse_elf_to_stage(location=0x%x)\n", *location);
- if (!iself((unsigned char *)input->data)) {
+ if (!iself((unsigned char *)pinput->data)) {
ERROR("The stage file is not in ELF format!\n");
return -1;
}
+ elf_eident(&input, ehdr);
+ bit64 = ehdr->e_ident[EI_CLASS] == ELFCLASS64;
+ /* Assume LE unless we are sure otherwise.
+ * We're not going to take on the task of
+ * fully validating the ELF file. That way
+ * lies madness.
+ */
+ if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
+ xdr = &xdr_be;
+
+ elf_ehdr(&input, ehdr, xdr, bit64);
+
// The tool may work in architecture-independent way.
if (arch != CBFS_ARCHITECTURE_UNKNOWN &&
!((ehdr->e_machine == EM_ARM) && (arch == CBFS_ARCHITECTURE_ARMV7)) &&
@@ -76,42 +234,84 @@ int parse_elf_to_stage(const struct buffer *input, struct buffer *output,
return -1;
}
- if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) {
- elf_bigendian = 1;
+ if (pinput->size < ehdr->e_phoff){
+ ERROR("The program header offset is greater than "
+ "the remaining file size."
+ "%ld bytes left, program header offset is %ld \n",
+ pinput->size, ehdr->e_phoff);
+ return -1;
}
- if (elf_bigendian != is_big_endian()) {
- elf32_to_native = swap32;
+ /* cons up an input buffer for the headers.
+ * Note that the program headers can be anywhere,
+ * per the ELF spec, You'd be surprised how many ELF
+ * readers miss this little detail.
+ */
+ phdr_buf.data = &pinput->data[ehdr->e_phoff];
+ phdr_buf.size = ehdr->e_phentsize * ehdr->e_phnum;
+ if (phdr_buf.size > (pinput->size - ehdr->e_phoff)){
+ ERROR("The file is not large enough for the program headers."
+ "%ld bytes left, %ld bytes of headers\n",
+ pinput->size - ehdr->e_phoff, phdr_buf.size);
+ return -1;
}
+ /* gather up all the phdrs.
+ * We do them all at once because there is more
+ * than one loop over all the phdrs.
+ */
+ phdr = calloc(sizeof(*phdr), ehdr->e_phnum);
+ for (i = 0; i < ehdr->e_phnum; i++)
+ elf_phdr(&phdr_buf, &phdr[i], ehdr->e_phentsize, xdr, bit64);
+ *pphdr = phdr;
+ return 0;
+}
- headers = ehdr->e_phnum;
- header = (char *)ehdr;
+/* returns size of result, or -1 if error.
+ * Note that, with the new code, this function
+ * works for all elf files, not just the restricted set.
+ */
+int parse_elf_to_stage(const struct buffer *input, struct buffer *output,
+ comp_algo algo, uint32_t *location)
+{
+ Elf64_Phdr *phdr;
+ Elf64_Ehdr ehdr;
+ char *buffer;
+ struct buffer outheader;
- phdr = (Elf32_Phdr *) & header[elf32_to_native(ehdr->e_phoff)];
+ int headers;
+ int i, outlen;
+ uint32_t data_start, data_end, mem_end;
- /* Now, regular headers - we only care about PT_LOAD headers,
- * because thats what we're actually going to load
- */
+ comp_func_ptr compress = compression_function(algo);
+ if (!compress)
+ return -1;
+
+ DEBUG("start: parse_elf_to_stage(location=0x%x)\n", *location);
+
+ if (elf_headers(input, &ehdr, &phdr) < 0)
+ return -1;
+
+ headers = ehdr.e_phnum;
- data_start = 0xFFFFFFFF;
+ data_start = ~0;
data_end = 0;
mem_end = 0;
for (i = 0; i < headers; i++) {
unsigned int start, mend, rend;
- if (elf32_to_native(phdr[i].p_type) != PT_LOAD)
+ if (phdr[i].p_type != PT_LOAD)
continue;
/* Empty segments are never interesting */
- if (elf32_to_native(phdr[i].p_memsz) == 0)
+ if (phdr[i].p_memsz == 0)
continue;
/* BSS */
- start = elf32_to_native(phdr[i].p_paddr);
+ start = phdr[i].p_paddr;
- mend = start + elf32_to_native(phdr[i].p_memsz);
- rend = start + elf32_to_native(phdr[i].p_filesz);
+ mend = start + phdr[i].p_memsz;
+ rend = start + phdr[i].p_filesz;
if (start < data_start)
data_start = start;
@@ -128,8 +328,9 @@ int parse_elf_to_stage(const struct buffer *input, struct buffer *output,
}
if (data_end <= data_start) {
- ERROR("data ends before it starts. Make sure the "
- "ELF file is correct and resides in ROM space.\n");
+ ERROR("data ends (%08lx) before it starts (%08lx). Make sure "
+ "the ELF file is correct and resides in ROM space.\n",
+ (unsigned long)data_end, (unsigned long)data_start);
exit(1);
}
@@ -146,25 +347,39 @@ int parse_elf_to_stage(const struct buffer *input, struct buffer *output,
for (i = 0; i < headers; i++) {
unsigned int l_start, l_offset = 0;
- if (elf32_to_native(phdr[i].p_type) != PT_LOAD)
+ if (phdr[i].p_type != PT_LOAD)
continue;
- if (elf32_to_native(phdr[i].p_memsz) == 0)
+ if (phdr[i].p_memsz == 0)
continue;
- l_start = elf32_to_native(phdr[i].p_paddr);
+ l_start = phdr[i].p_paddr;
if (l_start < *location) {
l_offset = *location - l_start;
l_start = *location;
}
+ /* A legal ELF file can have a program header with
+ * non-zero length but zero-length file size and a
+ * non-zero offset which, added together, are > than
+ * input->size (i.e. the total file size). So we need
+ * to not even test in the case that p_filesz is zero.
+ */
+ if (! phdr[i].p_filesz)
+ continue;
+ if (input->size < (phdr[i].p_offset + phdr[i].p_filesz)){
+ ERROR("Underflow copying out the segment."
+ "File has %ld bytes left, segment end is %ld\n",
+ input->size, phdr[i].p_offset + phdr[i].p_filesz);
+ return -1;
+ }
memcpy(buffer + (l_start - data_start),
- &header[elf32_to_native(phdr[i].p_offset)+l_offset],
- elf32_to_native(phdr[i].p_filesz)-l_offset);
+ &input->data[phdr[i].p_offset + l_offset],
+ phdr[i].p_filesz - l_offset);
}
/* Now make the output buffer */
- if (buffer_create(output, sizeof(*stage) + data_end - data_start,
+ if (buffer_create(output, sizeof(struct cbfs_stage) + data_end - data_start,
input->name) != 0) {
ERROR("Unable to allocate memory: %m\n");
free(buffer);
@@ -172,19 +387,31 @@ int parse_elf_to_stage(const struct buffer *input, struct buffer *output,
}
memset(output->data, 0, output->size);
- stage = (struct cbfs_stage *)output->data;
-
- stage->load = data_start; /* FIXME: htonll */
- stage->memlen = mem_end - data_start;
- stage->compression = algo;
- stage->entry = ehdr->e_entry; /* FIXME: htonll */
-
- compress(buffer, data_end - data_start, (output->data + sizeof(*stage)),
- (int *)&stage->len);
+ /* Compress the data, at which point we'll know information
+ * to fill out the header. This seems backward but it works because
+ * - the output header is a known size (not always true in many xdr's)
+ * - we do need to know the compressed output size first
+ */
+ compress(buffer, data_end - data_start,
+ (output->data + sizeof(struct cbfs_stage)),
+ &outlen);
free(buffer);
+ /* Set up for output marshaling. */
+ outheader.data = output->data;
+ outheader.size = 0;
+ /* N.B. The original plan was that SELF data was B.E.
+ * but: this is all L.E.
+ * Maybe we should just change the spec.
+ */
+ xdr_le.put32(&outheader, algo);
+ xdr_le.put64(&outheader, ehdr.e_entry);
+ xdr_le.put64(&outheader, data_start);
+ xdr_le.put32(&outheader, outlen);
+ xdr_le.put32(&outheader, mem_end - data_start);
+
if (*location)
*location -= sizeof(struct cbfs_stage);
- output->size = sizeof(*stage) + stage->len;
+ output->size = sizeof(struct cbfs_stage) + outlen;
return 0;
}