/* * Early initialization code for riscv virtual memory * * Copyright 2015 Google Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; version 2 of * the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the * GNU General Public License for more details. */ #include #include #include #include #include #include #include /* Delegate controls which traps are delegated to the payload. If you * wish to temporarily disable some or all delegation you can, in a * debugger, set it to a different value (e.g. 0 to have all traps go * to M-mode). In practice, this variable has been a lifesaver. It is * still not quite determined which delegation might by unallowed by * the spec so for now we enumerate and set them all. */ static int delegate = 0 | (1 << CAUSE_MISALIGNED_FETCH) | (1 << CAUSE_FAULT_FETCH) | (1 << CAUSE_ILLEGAL_INSTRUCTION) | (1 << CAUSE_BREAKPOINT) | (1 << CAUSE_FAULT_LOAD) | (1 << CAUSE_FAULT_STORE) | (1 << CAUSE_USER_ECALL) ; pte_t* root_page_table; /* Indent the following text by 2*level spaces */ static void indent(int level) { int i; for (i = 0; i < level; i++) printk(BIOS_DEBUG, " "); } /* * Convert a page table index at a given page table level to a virtual address * offset */ static uintptr_t index_to_virt_addr(int index, int level) { /* * Index is at most RISCV_PGLEVEL_BITS bits wide (not considering the * leading zeroes. If level==0, the below expression thus shifts index * into the highest bits of a 64-bit number, and then shifts it down * with sign extension. * * If level>0, then the expression should work as expected, without any * magic. */ return ((intptr_t)index) << (64 - RISCV_PGLEVEL_BITS - level * RISCV_PGLEVEL_BITS) >> (64 - VA_BITS); } /* Dump the page table structures to the console -- helper function */ static void print_page_table_at(pte_t *pt, intptr_t virt_addr, int level) { int i; indent(level); printk(BIOS_DEBUG, "Level %d page table at 0x%p\n", level, pt); for (i = 0; i < RISCV_PGSIZE / sizeof(pte_t); i++) { char urwx[8]; uintptr_t pointer; intptr_t next_virt_addr; if (!(pt[i] & PTE_V)) continue; urwx[0] = (pt[i] & PTE_U)? 'u' : '-'; urwx[1] = (pt[i] & PTE_R)? 'r' : '-'; urwx[2] = (pt[i] & PTE_W)? 'w' : '-'; urwx[3] = (pt[i] & PTE_X)? 'x' : '-'; urwx[4] = '\0'; next_virt_addr = virt_addr + index_to_virt_addr(i, level); pointer = ((uintptr_t)pt[i] >> 10) << RISCV_PGSHIFT; indent(level + 1); printk(BIOS_DEBUG, "Valid PTE at index %d (0x%016zx -> 0x%zx), ", i, (size_t) next_virt_addr, (size_t) pointer); if (PTE_TABLE(pt[i])) printk(BIOS_DEBUG, "page table\n"); else printk(BIOS_DEBUG, "protections %s\n", urwx); if (PTE_TABLE(pt[i])) { print_page_table_at((pte_t *)pointer, next_virt_addr, level + 1); } } } /* Print the page table structures to the console */ void print_page_table(void) { print_page_table_at((void *)(read_csr(sptbr) << RISCV_PGSHIFT), 0, 0); } void flush_tlb(void) { asm volatile("sfence.vm"); } size_t pte_ppn(pte_t pte) { return pte >> PTE_PPN_SHIFT; } pte_t ptd_create(uintptr_t ppn) { return (ppn << PTE_PPN_SHIFT) | PTE_V; } pte_t pte_create(uintptr_t ppn, int prot, int user) { pte_t pte = (ppn << PTE_PPN_SHIFT) | PTE_R | PTE_V; if (prot & PTE_W) pte |= PTE_W; if (prot & PTE_X) pte |= PTE_X; if (user) pte |= PTE_U; return pte; } // The current RISCV *physical* address space is this: // * 0 - 2 GiB: miscellaneous IO devices // * 2 GiB - 4 GiB DRAM // We have determined, also, that if code references a physical address // not backed by a device, we'll take a fault. In other words, we don't // need to finely map the memory-mapped devices as we would on an x86. // We can use GiB mappings for the IO space and we will take a trap // if we reference hardware that does not exist. // // The intent of the RISCV designers is that pages be set up in M mode // for lower privilege software. They have also told me that they // expect, unlike other platforms, that next level software use these // page tables. Some kernels (Linux) prefer the old fashioned model, // where kernel starts with an identity (ID) map and sets up page tables as // it sees fit. Other kernels (harvey) are fine with using whatever // firmware sets up. We need to accommodate both. So, we set up the // identity map for Linux, but also set up the map for kernels that // are more willing to conform to the RISCV model. The map is as // follows: // // ID map: map IO space and all of DRAM 1:1 using 1 GiB PTEs // I.e. we use 1 GiB PTEs for 4 GiB. // Linux/BSD uses this mapping just enough to replace it. // // Top 2G map: map the 2 Gib - 4 GiB of physical address space to // 0xffffffff_80000000. This will be needed until the GNU toolchain can compile // code to run at 0xffffffc000000000, i.e. the start of Sv39. // // Only Harvey/Plan 9 uses this Mapping, and temporarily. // // standard RISCV map long term: Map IO space, and all of DRAM, to the *lowest* // possible negative address for this implementation, // e.g. 0xffffffc000000000 for Sv39 CPUs. For now we can use GiB PTEs. // // RISCV map for now: map IO space, and all of DRAM, starting at // 0xffff_ffc0_0000_0000, i.e. just as for Sv39. // // It is our intent on Harvey (and eventually Akaros) that we use // this map, once the toolchain can correctly support it. // We have tested this arrangement and it lets us boot harvey to user mode. void init_vm(uintptr_t virtMemStart, uintptr_t physMemStart, pte_t *pt) { // 0xFFF... - 0xFFFFFFFF81000000 - RISCV_PGSIZE intptr_t memorySize = 0x7F000000; // middle page table pte_t* middle_pt = (void*)pt; size_t num_middle_pts = 2; // 3 level page table, 39 bit virtual address space for now // root page table pte_t* root_pt = (void*)middle_pt + num_middle_pts * RISCV_PGSIZE; memset(middle_pt, 0, (num_middle_pts + 1) * RISCV_PGSIZE); // 0's out middle_pt and root_pt for (size_t i = 0; i < num_middle_pts; i++) root_pt[(1<> RISCV_PGSHIFT) + i); // fill the middle page table for (uintptr_t vaddr = virtMemStart, paddr = physMemStart; paddr < physMemStart + memorySize; vaddr += SUPERPAGE_SIZE, paddr += SUPERPAGE_SIZE) { int l2_shift = RISCV_PGLEVEL_BITS + RISCV_PGSHIFT; size_t l2_idx = (virtMemStart >> l2_shift) & ((1 << RISCV_PGLEVEL_BITS)-1); l2_idx += ((vaddr - virtMemStart) >> l2_shift); middle_pt[l2_idx] = pte_create(paddr >> RISCV_PGSHIFT, PTE_U|PTE_R|PTE_W|PTE_X, 0); } // IO space. Identity mapped. root_pt[0x000] = pte_create(0x00000000 >> RISCV_PGSHIFT, PTE_R | PTE_W, 0); root_pt[0x001] = pte_create(0x40000000 >> RISCV_PGSHIFT, PTE_R | PTE_W, 0); root_pt[0x002] = pte_create(0x80000000 >> RISCV_PGSHIFT, PTE_R | PTE_W | PTE_X, 0); root_pt[0x003] = pte_create(0xc0000000 >> RISCV_PGSHIFT, PTE_R | PTE_W | PTE_X, 0); // Negative address space map at 0xffffffc000000000 root_pt[0x100] = root_pt[0]; root_pt[0x101] = root_pt[1]; root_pt[0x102] = root_pt[2]; root_pt[0x103] = root_pt[3]; mb(); root_page_table = root_pt; uintptr_t ptbr = ((uintptr_t) root_pt) >> RISCV_PGSHIFT; write_csr(sptbr, ptbr); } void initVirtualMemory(void) { uintptr_t ms; ms = read_csr(mstatus); ms = INSERT_FIELD(ms, MSTATUS_VM, VM_CHOICE); write_csr(mstatus, ms); ms = read_csr(mstatus); if (EXTRACT_FIELD(ms, MSTATUS_VM) != VM_CHOICE) { printk(BIOS_DEBUG, "We don't have virtual memory...\n"); return; } else { printk(BIOS_DEBUG, "-----------------------------\n"); printk(BIOS_DEBUG, "Virtual memory status enabled\n"); printk(BIOS_DEBUG, "-----------------------------\n"); } // TODO: Figure out how to grab this from cbfs // N.B. We used to map physical from 0x81000000, // but since kernels need to be able to see the page tables // created by firmware, we're going to map from start of RAM. // All this is subject to change as we learn more. Much // about RISCV is still in flux. printk(BIOS_DEBUG, "Initializing virtual memory...\n"); uintptr_t physicalStart = 0x80000000; uintptr_t virtualStart = 0xffffffff80000000; init_vm(virtualStart, physicalStart, (pte_t *)_pagetables); mb(); flush_tlb(); #if IS_ENABLED(CONFIG_DEBUG_PRINT_PAGE_TABLES) printk(BIOS_DEBUG, "Finished initializing virtual memory, starting walk...\n"); print_page_table(); #else printk(BIOS_DEBUG, "Finished initializing virtual memory\n"); #endif } void mstatus_init(void) { uintptr_t ms = 0; ms = INSERT_FIELD(ms, MSTATUS_FS, 3); ms = INSERT_FIELD(ms, MSTATUS_XS, 3); write_csr(mstatus, ms); // clear any pending timer interrupts. clear_csr(mip, MIP_STIP | MIP_SSIP); // enable machine and supervisor timer and // all other supervisor interrupts. set_csr(mie, MIP_MTIP | MIP_STIP | MIP_SSIP); // Delegate supervisor timer and other interrupts // to supervisor mode. set_csr(mideleg, MIP_STIP | MIP_SSIP); set_csr(medeleg, delegate); // Enable all user/supervisor-mode counters using // v1.10 register addresses. // They moved from the earlier spec. // Until we trust our toolchain use the hardcoded constants. // These were in flux and people who get the older toolchain // will have difficult-to-debug failures. write_csr(/*mcounteren*/0x306, 7); }