diff options
Diffstat (limited to 'util/cbfstool/cbfs-mkstage.c')
-rw-r--r-- | util/cbfstool/cbfs-mkstage.c | 347 |
1 files changed, 287 insertions, 60 deletions
diff --git a/util/cbfstool/cbfs-mkstage.c b/util/cbfstool/cbfs-mkstage.c index dfc93c20e9..5c8014f072 100644 --- a/util/cbfstool/cbfs-mkstage.c +++ b/util/cbfstool/cbfs-mkstage.c @@ -28,46 +28,204 @@ #include "cbfs.h" #include "elf.h" -static unsigned int idemp(unsigned int x) -{ - return x; -} +/* + * Short form: this is complicated, but we've tried making it simple + * and we keep hitting problems with our ELF parsing. + * + * The ELF parsing situation has always been a bit tricky. In fact, + * we (and most others) have been getting it wrong in small ways for + * years. Recently this has caused real trouble for the ARM V8 build. + * In this file we attempt to finally get it right for all variations + * of endian-ness and word size and target architectures and + * architectures we might get run on. Phew!. To do this we borrow a + * page from the FreeBSD NFS xdr model (see elf_ehdr and elf_phdr), + * the Plan 9 endianness functions (see xdr.c), and Go interfaces (see + * how we use buffer structs in this file). This ends up being a bit + * wordy at the lowest level, but greatly simplifies the elf parsing + * code and removes a common source of bugs, namely, forgetting to + * flip type endianness when referencing a struct member. + * + * ELF files can have four combinations of data layout: 32/64, and + * big/little endian. Further, to add to the fun, depending on the + * word size, the size of the ELF structs varies. The coreboot SELF + * format is simpler in theory: it's supposed to be always BE, and the + * various struct members allow room for growth: the entry point is + * always 64 bits, for example, so the size of a SELF struct is + * constant, regardless of target architecture word size. Hence, we + * need to do some transformation of the ELF files. + * + * A given architecture, realistically, only supports one of the four + * combinations at a time as the 'native' format. Hence, our code has + * been sprinkled with every variation of [nh]to[hn][sll] over the + * years. We've never quite gotten it all right, however, and a quick + * pass over this code revealed another bug. It's all worked because, + * until now, all the working platforms that had CBFS were 32 LE. Even then, + * however, bugs crept in: we recently realized that we're not + * transforming the entry point to big format when we store into the + * SELF image. + * + * The problem is essentially an XDR operation: + * we have something in a foreign format and need to transform it. + * It's most like XDR because: + * 1) the byte order can be wrong + * 2) the word size can be wrong + * 3) the size of elements in the stream depends on the value + * of other elements in the stream + * it's not like XDR because: + * 1) the byte order can be right + * 2) the word size can be right + * 3) the struct members are all on a natural alignment + * + * Hence, this new approach. To cover word size issues, we *always* + * transform the two structs we care about, the file header and + * program header, into a native struct in the 64 bit format: + * + * [32,little] -> [Elf64_Ehdr, Elf64_Phdr] + * [64,little] -> [Elf64_Ehdr, Elf64_Phdr] + * [32,big] -> [Elf64_Ehdr, Elf64_Phdr] + * [64,big] -> [Elf64_Ehdr, Elf64_Phdr] + * Then we just use those structs, and all the need for inline ntoh* goes away, + * as well as all the chances for error. + * This works because all the SELF structs have fields large enough for + * the largest ELF 64 struct members, and all the Elf64 struct members + * are at least large enough for all ELF 32 struct members. + * We end up with one function to do all our ELF parsing, and two functions + * to transform the headers. For the put case, we also have + * XDR functions, and hopefully we'll never again spend 5 years with the + * wrong endian-ness on an output value :-) + * This should work for all word sizes and endianness we hope to target. + * I *really* don't want to be here for 128 bit addresses. + * + * The parse functions are called with a pointer to an input buffer + * struct. One might ask: are there enough bytes in the input buffer? + * We know there need to be at *least* sizeof(Elf32_Ehdr) + + * sizeof(Elf32_Phdr) bytes. Realistically, there has to be some data + * too. If we start to worry, though we have not in the past, we + * might apply the simple test: the input buffer needs to be at least + * sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) bytes because, even if it's + * ELF 32, there's got to be *some* data! This is not theoretically + * accurate but it is actually good enough in practice. It allows the + * header transformation code to ignore the possibility of underrun. + * + * We also must accomodate different ELF files, and hence formats, + * in the same cbfs invocation. We might load a 64-bit payload + * on a 32-bit machine; we might even have a mixed armv7/armv8 + * SOC or even a system with an x86/ARM! + * + * A possibly problematic (though unlikely to be so) assumption + * is that we expect the BIOS to remain in the lowest 32 bits + * of the physical address space. Since ARMV8 has standardized + * on that, and x86_64 also has, this seems a safe assumption. + * + * To repeat, ELF structs are different sizes because ELF struct + * members are different sizes, depending on values in the ELF file + * header. For this we use the functions defined in xdr.c, which + * consume bytes, convert the endianness, and advance the data pointer + * in the buffer struct. + */ -/* This is a wrapper around the swab32() macro to make it - * usable for the current implementation of parse_elf_to_stage() +/* Get the ident array, so we can figure out + * endian-ness, word size, and in future other useful + * parameters */ -static unsigned int swap32(unsigned int x) +static void +elf_eident(struct buffer *input, Elf64_Ehdr *ehdr) { - return swab32(x); + memmove(ehdr->e_ident, input->data, sizeof(ehdr->e_ident)); + input->data += sizeof(ehdr->e_ident); + input->size -= sizeof(ehdr->e_ident); } -unsigned int (*elf32_to_native) (unsigned int) = idemp; -/* returns size of result, or -1 if error */ -int parse_elf_to_stage(const struct buffer *input, struct buffer *output, - comp_algo algo, uint32_t *location) +static void +elf_ehdr(struct buffer *input, Elf64_Ehdr *ehdr, struct xdr *xdr, int bit64) { - Elf32_Phdr *phdr; - Elf32_Ehdr *ehdr = (Elf32_Ehdr *)input->data; - char *header, *buffer; - - int headers; - int i; - struct cbfs_stage *stage; - unsigned int data_start, data_end, mem_end; + ehdr->e_type = xdr->get16(input); + ehdr->e_machine = xdr->get16(input); + ehdr->e_version = xdr->get32(input); + if (bit64){ + ehdr->e_entry = xdr->get64(input); + ehdr->e_phoff = xdr->get64(input); + ehdr->e_shoff = xdr->get64(input); + } else { + ehdr->e_entry = xdr->get32(input); + ehdr->e_phoff = xdr->get32(input); + ehdr->e_shoff = xdr->get32(input); + } + ehdr->e_flags = xdr->get32(input); + ehdr->e_ehsize = xdr->get16(input); + ehdr->e_phentsize = xdr->get16(input); + ehdr->e_phnum = xdr->get16(input); + ehdr->e_shentsize = xdr->get16(input); + ehdr->e_shnum = xdr->get16(input); + ehdr->e_shstrndx = xdr->get16(input); +} - int elf_bigendian = 0; +static void +elf_phdr(struct buffer *pinput, Elf64_Phdr *phdr, + int entsize, struct xdr *xdr, int bit64) +{ + /* + * The entsize need not be sizeof(*phdr). + * Hence, it is easier to keep a copy of the input, + * as the xdr functions may not advance the input + * pointer the full entsize; rather than get tricky + * we just advance it below. + */ + struct buffer input = *pinput; + if (bit64){ + phdr->p_type = xdr->get32(&input); + phdr->p_flags = xdr->get32(&input); + phdr->p_offset = xdr->get64(&input); + phdr->p_vaddr = xdr->get64(&input); + phdr->p_paddr = xdr->get64(&input); + phdr->p_filesz = xdr->get64(&input); + phdr->p_memsz = xdr->get64(&input); + phdr->p_align = xdr->get64(&input); + } else { + phdr->p_type = xdr->get32(&input); + phdr->p_offset = xdr->get32(&input); + phdr->p_vaddr = xdr->get32(&input); + phdr->p_paddr = xdr->get32(&input); + phdr->p_filesz = xdr->get32(&input); + phdr->p_memsz = xdr->get32(&input); + phdr->p_flags = xdr->get32(&input); + phdr->p_align = xdr->get32(&input); + } + pinput->size -= entsize; + pinput->data += entsize; +} - comp_func_ptr compress = compression_function(algo); - if (!compress) - return -1; +/* Get the headers from the buffer. + * Return -1 in the event of an error. + */ +static int +elf_headers(const struct buffer *pinput, Elf64_Ehdr *ehdr, Elf64_Phdr **pphdr) +{ + int i; + struct xdr *xdr = &xdr_le; + int bit64 = 0; + struct buffer input = *(struct buffer *)pinput; + struct buffer phdr_buf; + Elf64_Phdr *phdr; - DEBUG("start: parse_elf_to_stage(location=0x%x)\n", *location); - if (!iself((unsigned char *)input->data)) { + if (!iself((unsigned char *)pinput->data)) { ERROR("The stage file is not in ELF format!\n"); return -1; } + elf_eident(&input, ehdr); + bit64 = ehdr->e_ident[EI_CLASS] == ELFCLASS64; + /* Assume LE unless we are sure otherwise. + * We're not going to take on the task of + * fully validating the ELF file. That way + * lies madness. + */ + if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) + xdr = &xdr_be; + + elf_ehdr(&input, ehdr, xdr, bit64); + // The tool may work in architecture-independent way. if (arch != CBFS_ARCHITECTURE_UNKNOWN && !((ehdr->e_machine == EM_ARM) && (arch == CBFS_ARCHITECTURE_ARMV7)) && @@ -76,42 +234,84 @@ int parse_elf_to_stage(const struct buffer *input, struct buffer *output, return -1; } - if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) { - elf_bigendian = 1; + if (pinput->size < ehdr->e_phoff){ + ERROR("The program header offset is greater than " + "the remaining file size." + "%ld bytes left, program header offset is %ld \n", + pinput->size, ehdr->e_phoff); + return -1; } - if (elf_bigendian != is_big_endian()) { - elf32_to_native = swap32; + /* cons up an input buffer for the headers. + * Note that the program headers can be anywhere, + * per the ELF spec, You'd be surprised how many ELF + * readers miss this little detail. + */ + phdr_buf.data = &pinput->data[ehdr->e_phoff]; + phdr_buf.size = ehdr->e_phentsize * ehdr->e_phnum; + if (phdr_buf.size > (pinput->size - ehdr->e_phoff)){ + ERROR("The file is not large enough for the program headers." + "%ld bytes left, %ld bytes of headers\n", + pinput->size - ehdr->e_phoff, phdr_buf.size); + return -1; } + /* gather up all the phdrs. + * We do them all at once because there is more + * than one loop over all the phdrs. + */ + phdr = calloc(sizeof(*phdr), ehdr->e_phnum); + for (i = 0; i < ehdr->e_phnum; i++) + elf_phdr(&phdr_buf, &phdr[i], ehdr->e_phentsize, xdr, bit64); + *pphdr = phdr; + return 0; +} - headers = ehdr->e_phnum; - header = (char *)ehdr; +/* returns size of result, or -1 if error. + * Note that, with the new code, this function + * works for all elf files, not just the restricted set. + */ +int parse_elf_to_stage(const struct buffer *input, struct buffer *output, + comp_algo algo, uint32_t *location) +{ + Elf64_Phdr *phdr; + Elf64_Ehdr ehdr; + char *buffer; + struct buffer outheader; - phdr = (Elf32_Phdr *) & header[elf32_to_native(ehdr->e_phoff)]; + int headers; + int i, outlen; + uint32_t data_start, data_end, mem_end; - /* Now, regular headers - we only care about PT_LOAD headers, - * because thats what we're actually going to load - */ + comp_func_ptr compress = compression_function(algo); + if (!compress) + return -1; + + DEBUG("start: parse_elf_to_stage(location=0x%x)\n", *location); + + if (elf_headers(input, &ehdr, &phdr) < 0) + return -1; + + headers = ehdr.e_phnum; - data_start = 0xFFFFFFFF; + data_start = ~0; data_end = 0; mem_end = 0; for (i = 0; i < headers; i++) { unsigned int start, mend, rend; - if (elf32_to_native(phdr[i].p_type) != PT_LOAD) + if (phdr[i].p_type != PT_LOAD) continue; /* Empty segments are never interesting */ - if (elf32_to_native(phdr[i].p_memsz) == 0) + if (phdr[i].p_memsz == 0) continue; /* BSS */ - start = elf32_to_native(phdr[i].p_paddr); + start = phdr[i].p_paddr; - mend = start + elf32_to_native(phdr[i].p_memsz); - rend = start + elf32_to_native(phdr[i].p_filesz); + mend = start + phdr[i].p_memsz; + rend = start + phdr[i].p_filesz; if (start < data_start) data_start = start; @@ -128,8 +328,9 @@ int parse_elf_to_stage(const struct buffer *input, struct buffer *output, } if (data_end <= data_start) { - ERROR("data ends before it starts. Make sure the " - "ELF file is correct and resides in ROM space.\n"); + ERROR("data ends (%08lx) before it starts (%08lx). Make sure " + "the ELF file is correct and resides in ROM space.\n", + (unsigned long)data_end, (unsigned long)data_start); exit(1); } @@ -146,25 +347,39 @@ int parse_elf_to_stage(const struct buffer *input, struct buffer *output, for (i = 0; i < headers; i++) { unsigned int l_start, l_offset = 0; - if (elf32_to_native(phdr[i].p_type) != PT_LOAD) + if (phdr[i].p_type != PT_LOAD) continue; - if (elf32_to_native(phdr[i].p_memsz) == 0) + if (phdr[i].p_memsz == 0) continue; - l_start = elf32_to_native(phdr[i].p_paddr); + l_start = phdr[i].p_paddr; if (l_start < *location) { l_offset = *location - l_start; l_start = *location; } + /* A legal ELF file can have a program header with + * non-zero length but zero-length file size and a + * non-zero offset which, added together, are > than + * input->size (i.e. the total file size). So we need + * to not even test in the case that p_filesz is zero. + */ + if (! phdr[i].p_filesz) + continue; + if (input->size < (phdr[i].p_offset + phdr[i].p_filesz)){ + ERROR("Underflow copying out the segment." + "File has %ld bytes left, segment end is %ld\n", + input->size, phdr[i].p_offset + phdr[i].p_filesz); + return -1; + } memcpy(buffer + (l_start - data_start), - &header[elf32_to_native(phdr[i].p_offset)+l_offset], - elf32_to_native(phdr[i].p_filesz)-l_offset); + &input->data[phdr[i].p_offset + l_offset], + phdr[i].p_filesz - l_offset); } /* Now make the output buffer */ - if (buffer_create(output, sizeof(*stage) + data_end - data_start, + if (buffer_create(output, sizeof(struct cbfs_stage) + data_end - data_start, input->name) != 0) { ERROR("Unable to allocate memory: %m\n"); free(buffer); @@ -172,19 +387,31 @@ int parse_elf_to_stage(const struct buffer *input, struct buffer *output, } memset(output->data, 0, output->size); - stage = (struct cbfs_stage *)output->data; - - stage->load = data_start; /* FIXME: htonll */ - stage->memlen = mem_end - data_start; - stage->compression = algo; - stage->entry = ehdr->e_entry; /* FIXME: htonll */ - - compress(buffer, data_end - data_start, (output->data + sizeof(*stage)), - (int *)&stage->len); + /* Compress the data, at which point we'll know information + * to fill out the header. This seems backward but it works because + * - the output header is a known size (not always true in many xdr's) + * - we do need to know the compressed output size first + */ + compress(buffer, data_end - data_start, + (output->data + sizeof(struct cbfs_stage)), + &outlen); free(buffer); + /* Set up for output marshaling. */ + outheader.data = output->data; + outheader.size = 0; + /* N.B. The original plan was that SELF data was B.E. + * but: this is all L.E. + * Maybe we should just change the spec. + */ + xdr_le.put32(&outheader, algo); + xdr_le.put64(&outheader, ehdr.e_entry); + xdr_le.put64(&outheader, data_start); + xdr_le.put32(&outheader, outlen); + xdr_le.put32(&outheader, mem_end - data_start); + if (*location) *location -= sizeof(struct cbfs_stage); - output->size = sizeof(*stage) + stage->len; + output->size = sizeof(struct cbfs_stage) + outlen; return 0; } |