diff --git a/kernel/arch/aarch64/trap/trap.cpp b/kernel/arch/aarch64/trap/trap.cpp index a6796acd..7f09e07d 100644 --- a/kernel/arch/aarch64/trap/trap.cpp +++ b/kernel/arch/aarch64/trap/trap.cpp @@ -13,6 +13,7 @@ #include "timer/timer.h" #include "sched/sched.h" #include "sched/task.h" +#include "mm/mm.h" // Forward declaration of syscall dispatch extern "C" void stlx_aarch64_syscall_dispatch(aarch64::trap_frame* tf); @@ -79,6 +80,35 @@ void stlx_aarch64_el0_sync_handler(aarch64::trap_frame* tf) { return; } + // If it's a page fault, attempt to handle it for on-demand paging + if (in_user_code && ( + ec == aarch64::EC_DATA_ABORT_LOWER || + ec == aarch64::EC_INST_ABORT_LOWER) + ) { + uintptr_t fault_addr = aarch64::get_far(tf); + uint32_t pf_flags = 0; + + // DFSC[5:0] is in ESR.ISS bits [5:0] for data/instruction aborts. + uint32_t dfsc = esr & 0x3F; + uint32_t fault_class = dfsc >> 2; // top 4 bits identify the class + if (fault_class == 0b0011) pf_flags |= mm::PF_FLAG_PRESENT; // permission fault + + // ESR.ISS bit 6 = WnR (Write not Read) for data aborts. + if (ec == aarch64::EC_DATA_ABORT_LOWER && (esr & (1u << 6))) { + pf_flags |= mm::PF_FLAG_WRITE; + } + + if (ec == aarch64::EC_INST_ABORT_LOWER) { + pf_flags |= mm::PF_FLAG_INSTRUCTION; + } + + if (mm::handle_user_pf(guard.task_core->mm_ctx, fault_addr, pf_flags)) { + // Fault has been handled successfully, restart instruction + restore_post_trap_elevation_state(); + return; + } + } + if (in_user_code && ( ec == aarch64::EC_DATA_ABORT_LOWER || ec == aarch64::EC_INST_ABORT_LOWER || diff --git a/kernel/arch/x86_64/trap/trap.cpp b/kernel/arch/x86_64/trap/trap.cpp index 4af06169..a0d6554c 100644 --- a/kernel/arch/x86_64/trap/trap.cpp +++ b/kernel/arch/x86_64/trap/trap.cpp @@ -1,4 +1,4 @@ -#include "trap_frame.h" +#include "trap/trap_frame.h" #include "defs/vectors.h" #include "irq/irq.h" #include "io/serial.h" @@ -10,6 +10,7 @@ #include "msi/msi.h" #include "sched/sched.h" #include "sched/task.h" +#include "mm/mm.h" namespace sched { __PRIVILEGED_CODE void on_yield(x86::trap_frame* tf); @@ -90,6 +91,23 @@ extern "C" __PRIVILEGED_CODE void stlx_x86_64_trap_handler(x86::trap_frame* tf) return; } + // If it's a page fault, attempt to handle it for on-demand paging + if (in_user_code && tf->vector == x86::EXC_PAGE_FAULT) { + uintptr_t fault_addr = x86::read_cr2(); + uint64_t ec = tf->error_code; + uint32_t pf_flags = 0; + if (ec & 0x1) pf_flags |= mm::PF_FLAG_PRESENT; + if (ec & 0x2) pf_flags |= mm::PF_FLAG_WRITE; + if (ec & 0x10) pf_flags |= mm::PF_FLAG_INSTRUCTION; + + if (mm::handle_user_pf(irq_task_core->mm_ctx, fault_addr, pf_flags)) { + // Fault has been handled successfully, restart instruction + irq_task_core->flags &= ~sched::TASK_FLAG_IN_IRQ; + restore_post_trap_elevation_state(); + return; + } + } + if (in_user_code && ( tf->vector == x86::EXC_PAGE_FAULT || tf->vector == x86::EXC_GENERAL_PROTECTION || diff --git a/kernel/drivers/graphics/gfxfb.cpp b/kernel/drivers/graphics/gfxfb.cpp index d88b04dd..08377143 100644 --- a/kernel/drivers/graphics/gfxfb.cpp +++ b/kernel/drivers/graphics/gfxfb.cpp @@ -6,6 +6,7 @@ #include "fs/fs.h" #include "fs/node.h" #include "mm/heap.h" +#include "mm/mm.h" #include "mm/paging_types.h" #include "mm/pmm_types.h" #include "mm/uaccess.h" diff --git a/kernel/exec/elf.cpp b/kernel/exec/elf.cpp index 6ae88ba3..227bfb97 100644 --- a/kernel/exec/elf.cpp +++ b/kernel/exec/elf.cpp @@ -5,6 +5,7 @@ #include "mm/heap.h" #include "mm/paging.h" #include "mm/pmm.h" +#include "mm/mm.h" #include "mm/vma.h" #include "dynpriv/dynpriv.h" #include "common/string.h" diff --git a/kernel/mm/mm.cpp b/kernel/mm/mm.cpp index bce6053e..f2f01428 100644 --- a/kernel/mm/mm.cpp +++ b/kernel/mm/mm.cpp @@ -4,10 +4,39 @@ #include "mm/kva.h" #include "mm/vmm.h" #include "mm/heap.h" +#include "mm/paging.h" +#include "mm/shmem.h" +#include "common/string.h" #include "common/logging.h" namespace mm { + /** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE +void mm_context::ref_destroy(mm_context* self) { + if (!self) { + return; + } + + sync::mutex_lock(self->lock); + while (vma* node = self->vmas.min()) { + if (node->flags & (VMA_FLAG_SHARED | VMA_FLAG_DEVICE)) { + unmap_pages_only(self, node->start, node->end); + } else { + unmap_and_free_pages(self, node->start, node->end); + } + self->vmas.remove(*node); + free_vma(node); + } + sync::mutex_unlock(self->lock); + + paging::destroy_user_pt_root(self->pt_root); + self->pt_root = 0; + heap::kfree_delete(self); +} + /** * @note Privilege: **required** */ @@ -40,4 +69,629 @@ __PRIVILEGED_CODE int32_t init() { return OK; } +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE +bool handle_user_pf( + mm_context* mm_ctx, + uintptr_t fault_address, + uint64_t pf_flags +) { + if (!mm_ctx) { + return false; + } + + /** + * For now, on-demand paging is not yet fully complete, so + * the only operation that's supported is lazy stack growing. + */ + if (pf_flags & PF_FLAG_PRESENT) { + return false; + } + + uintptr_t page_addr = fault_address & ~(pmm::PAGE_SIZE - 1); + + sync::mutex_lock(mm_ctx->lock); + + // Get the virtual memory area for this page + vma* vm = vma_find_locked(mm_ctx, page_addr); + + // Ensure that on-demand paging is supported for this page type + if (!vm || !(vm->flags & VMA_FLAG_STACK)) { + sync::mutex_unlock(mm_ctx->lock); + return false; + } + + // Concurrent page fault on same page won by another CPU, retry + if (paging::get_physical(page_addr, mm_ctx->pt_root) != 0) { + sync::mutex_unlock(mm_ctx->lock); + return true; + } + + // Allocate a new physical page to back the memory + pmm::phys_addr_t phys = pmm::alloc_page(); + if (phys == 0) { + sync::mutex_unlock(mm_ctx->lock); + return false; // OOM - my favorite thing + } + + // Zero out the memory + string::memset(paging::phys_to_virt(phys), 0, pmm::PAGE_SIZE); + + // Setup the PTE with appropriate protection bits + paging::page_flags_t pagefl = prot_to_page_flags(vm->prot); + if (paging::map_page(page_addr, phys, pagefl, mm_ctx->pt_root) != paging::OK) { + pmm::free_page(phys); + sync::mutex_unlock(mm_ctx->lock); + return false; + } + + sync::mutex_unlock(mm_ctx->lock); + log::info("Lazily loading stack page: %p\n", page_addr); + return true; +} + +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE mm_context* mm_context_create() { + mm_context* mm_ctx = heap::kalloc_new(); + if (!mm_ctx) { + return nullptr; + } + + mm_ctx->pt_root = paging::create_user_pt_root(); + if (mm_ctx->pt_root == 0) { + heap::kfree_delete(mm_ctx); + return nullptr; + } + + mm_ctx->mmap_base = MMAP_BASE_DEFAULT; + mm_ctx->mmap_end = USER_STACK_TOP - + (USER_STACK_MAX_PAGES + USER_STACK_GUARD_PAGES) * pmm::PAGE_SIZE; + if (mm_ctx->mmap_end <= mm_ctx->mmap_base) { + paging::destroy_user_pt_root(mm_ctx->pt_root); + heap::kfree_delete(mm_ctx); + return nullptr; + } + + mm_ctx->lock.init(); + return mm_ctx; +} + +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE void mm_context_add_ref(mm_context* mm_ctx) { + if (mm_ctx) { + mm_ctx->add_ref(); + } +} + +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE void mm_context_release(mm_context* mm_ctx) { + if (!mm_ctx) { + return; + } + + if (mm_ctx->release()) { + mm_context::ref_destroy(mm_ctx); + } +} + +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE int32_t mm_context_add_vma( + mm_context* mm_ctx, + uintptr_t start, + size_t length, + uint32_t prot, + uint32_t vma_flags +) { + if (!mm_ctx || (prot & ~MM_PROT_MASK) != 0) { + return MM_CTX_ERR_INVALID_ARG; + } + if (!is_page_aligned(start)) { + return MM_CTX_ERR_INVALID_ARG; + } + + size_t aligned_len = pmm::page_align_up(length); + uintptr_t end = 0; + if (!range_from_len(start, aligned_len, end)) { + return MM_CTX_ERR_INVALID_ARG; + } + + sync::mutex_lock(mm_ctx->lock); + + vma* node = alloc_vma(start, end, prot, vma_flags); + if (!node) { + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_NO_MEM; + } + + if (!vma_insert_locked(mm_ctx, node)) { + free_vma(node); + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_EXISTS; + } + + coalesce_all_locked(mm_ctx); + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_OK; +} + +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE int32_t mm_context_map_anonymous( + mm_context* mm_ctx, + uintptr_t addr, + size_t length, + uint32_t prot, + uint32_t map_flags, + uintptr_t* out_addr +) { + if (!mm_ctx || !out_addr) { + return MM_CTX_ERR_INVALID_ARG; + } + if ((prot & ~MM_PROT_MASK) != 0) { + return MM_CTX_ERR_INVALID_ARG; + } + if ((map_flags & ~MM_MAP_ALLOWED_FLAGS) != 0) { + return MM_CTX_ERR_INVALID_ARG; + } + if (!(map_flags & MM_MAP_PRIVATE) || !(map_flags & MM_MAP_ANONYMOUS)) { + return MM_CTX_ERR_INVALID_ARG; + } + + size_t aligned_len = pmm::page_align_up(length); + if (aligned_len == 0) { + return MM_CTX_ERR_INVALID_ARG; + } + + const bool fixed = (map_flags & (MM_MAP_FIXED | MM_MAP_FIXED_NOREPLACE)) != 0; + const bool no_replace = (map_flags & MM_MAP_FIXED_NOREPLACE) != 0; + const bool stack_map = (map_flags & MM_MAP_STACK) != 0; + + uintptr_t start = 0; + uintptr_t end = 0; + + if (fixed) { + if (!is_page_aligned(addr)) { + return MM_CTX_ERR_INVALID_ARG; + } + start = addr; + if (!range_from_len(start, aligned_len, end)) { + return MM_CTX_ERR_INVALID_ARG; + } + + if (!stack_map && (start < mm_ctx->mmap_base || end > mm_ctx->mmap_end)) { + return MM_CTX_ERR_NO_VIRT; + } + } + + sync::mutex_lock(mm_ctx->lock); + + if (fixed) { + if (no_replace && vma_find_overlap_locked(mm_ctx, start, end)) { + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_EXISTS; + } + + if (!no_replace) { + int32_t rc = unmap_range_locked(mm_ctx, start, end); + if (rc != MM_CTX_OK) { + sync::mutex_unlock(mm_ctx->lock); + return rc; + } + } + } else { + start = vma_find_gap_topdown_locked(mm_ctx, aligned_len); + if (start == 0) { + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_NO_VIRT; + } + end = start + aligned_len; + } + + // Don't eagerly allocate pages lazy pages that will get populated through on-demand faults + if (!(map_flags & MM_MAP_LAZY)) { + paging::page_flags_t page_flags = prot_to_page_flags(prot); + uintptr_t mapped_end = start; + for (uintptr_t vaddr = start; vaddr < end; vaddr += pmm::PAGE_SIZE) { + pmm::phys_addr_t phys = pmm::alloc_page(); + if (phys == 0) { + rollback_new_pages(mm_ctx, start, mapped_end); + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_NO_MEM; + } + + string::memset(paging::phys_to_virt(phys), 0, pmm::PAGE_SIZE); + if (paging::map_page(vaddr, phys, page_flags, mm_ctx->pt_root) != paging::OK) { + pmm::free_page(phys); + rollback_new_pages(mm_ctx, start, mapped_end); + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_MAP_FAILED; + } + mapped_end = vaddr + pmm::PAGE_SIZE; + } + } + + uint32_t vma_flags = VMA_FLAG_PRIVATE | VMA_FLAG_ANONYMOUS; + if (stack_map) { + vma_flags |= VMA_FLAG_STACK; + } + + vma* node = alloc_vma(start, end, prot, vma_flags); + if (!node) { + rollback_new_pages(mm_ctx, start, end); + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_NO_MEM; + } + + if (!vma_insert_locked(mm_ctx, node)) { + free_vma(node); + rollback_new_pages(mm_ctx, start, end); + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_EXISTS; + } + + coalesce_all_locked(mm_ctx); + sync::mutex_unlock(mm_ctx->lock); + + *out_addr = start; + return MM_CTX_OK; +} + +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE int32_t mm_context_unmap( + mm_context* mm_ctx, + uintptr_t addr, + size_t length +) { + if (!mm_ctx || !is_page_aligned(addr) || length == 0) { + return MM_CTX_ERR_INVALID_ARG; + } + + size_t aligned_len = pmm::page_align_up(length); + uintptr_t end = 0; + if (!range_from_len(addr, aligned_len, end)) { + return MM_CTX_ERR_INVALID_ARG; + } + + sync::mutex_lock(mm_ctx->lock); + int32_t rc = unmap_range_locked(mm_ctx, addr, end); + sync::mutex_unlock(mm_ctx->lock); + return rc; +} + +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE int32_t mm_context_mprotect( + mm_context* mm_ctx, + uintptr_t addr, + size_t length, + uint32_t prot +) { + if (!mm_ctx || !is_page_aligned(addr) || length == 0) { + return MM_CTX_ERR_INVALID_ARG; + } + if ((prot & ~MM_PROT_MASK) != 0) { + return MM_CTX_ERR_INVALID_ARG; + } + + size_t aligned_len = pmm::page_align_up(length); + uintptr_t end = 0; + if (!range_from_len(addr, aligned_len, end)) { + return MM_CTX_ERR_INVALID_ARG; + } + + sync::mutex_lock(mm_ctx->lock); + + if (!range_fully_mapped_locked(mm_ctx, addr, end)) { + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_NOT_MAPPED; + } + + vma* at_start = vma_find_locked(mm_ctx, addr); + if (at_start && at_start->start < addr && addr < at_start->end) { + if (!split_vma_locked(mm_ctx, at_start, addr)) { + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_NO_MEM; + } + } + + vma* at_end = vma_find_locked(mm_ctx, end - 1); + if (at_end && at_end->start < end && end < at_end->end) { + if (!split_vma_locked(mm_ctx, at_end, end)) { + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_NO_MEM; + } + } + + vma probe{}; + probe.start = addr; + probe.end = addr; + probe.prot = 0; + probe.flags = 0; + + vma* cur = mm_ctx->vmas.lower_bound(probe); + vma* pred = cur ? mm_ctx->vmas.prev(*cur) : mm_ctx->vmas.max(); + if (pred && pred->end > addr) { + cur = pred; + } + + while (cur && cur->start < end) { + vma* next = mm_ctx->vmas.next(*cur); + uintptr_t range_start = (cur->start > addr) ? cur->start : addr; + uintptr_t range_end = (cur->end < end) ? cur->end : end; + + int32_t rc = apply_page_protection(mm_ctx, range_start, range_end, prot); + if (rc != MM_CTX_OK) { + sync::mutex_unlock(mm_ctx->lock); + return rc; + } + + cur->prot = prot; + cur = next; + } + + coalesce_all_locked(mm_ctx); + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_OK; +} + +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE int32_t mm_context_map_shared( + mm_context* mm_ctx, + shmem* backing, + uint64_t offset, + size_t length, + uint32_t prot, + uint32_t map_flags, + uintptr_t addr, + uintptr_t* out_addr +) { + if (!mm_ctx || !backing || !out_addr) { + return MM_CTX_ERR_INVALID_ARG; + } + if ((prot & ~MM_PROT_MASK) != 0) { + return MM_CTX_ERR_INVALID_ARG; + } + if (!(map_flags & MM_MAP_SHARED)) { + return MM_CTX_ERR_INVALID_ARG; + } + + size_t aligned_len = pmm::page_align_up(length); + if (aligned_len == 0) { + return MM_CTX_ERR_INVALID_ARG; + } + if (offset % pmm::PAGE_SIZE != 0) { + return MM_CTX_ERR_INVALID_ARG; + } + + const bool fixed = (map_flags & (MM_MAP_FIXED | MM_MAP_FIXED_NOREPLACE)) != 0; + const bool no_replace = (map_flags & MM_MAP_FIXED_NOREPLACE) != 0; + + uintptr_t start = 0; + uintptr_t end = 0; + + sync::mutex_lock(mm_ctx->lock); + + if (fixed) { + if (!is_page_aligned(addr)) { + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_INVALID_ARG; + } + start = addr; + if (!range_from_len(start, aligned_len, end)) { + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_INVALID_ARG; + } + if (start < mm_ctx->mmap_base || end > mm_ctx->mmap_end) { + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_NO_VIRT; + } + + if (no_replace && vma_find_overlap_locked(mm_ctx, start, end)) { + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_EXISTS; + } + if (!no_replace) { + int32_t rc = unmap_range_locked(mm_ctx, start, end); + if (rc != MM_CTX_OK) { + sync::mutex_unlock(mm_ctx->lock); + return rc; + } + } + } else { + start = vma_find_gap_topdown_locked(mm_ctx, aligned_len); + if (start == 0) { + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_NO_VIRT; + } + end = start + aligned_len; + } + + sync::mutex_lock(backing->lock); + + size_t backed_size = backing->m_page_count * pmm::PAGE_SIZE; + if (aligned_len > backed_size || offset > backed_size - aligned_len) { + sync::mutex_unlock(backing->lock); + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_INVALID_ARG; + } + + paging::page_flags_t page_flags = prot_to_page_flags(prot); + size_t pages = aligned_len / pmm::PAGE_SIZE; + size_t page_offset = static_cast(offset / pmm::PAGE_SIZE); + + for (size_t i = 0; i < pages; i++) { + pmm::phys_addr_t phys = shmem_get_page_locked(backing, page_offset + i); + if (phys == 0) { + unmap_pages_only(mm_ctx, start, start + i * pmm::PAGE_SIZE); + sync::mutex_unlock(backing->lock); + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_NO_MEM; + } + + uintptr_t vaddr = start + i * pmm::PAGE_SIZE; + if (paging::map_page(vaddr, phys, page_flags, mm_ctx->pt_root) != paging::OK) { + unmap_pages_only(mm_ctx, start, vaddr); + sync::mutex_unlock(backing->lock); + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_MAP_FAILED; + } + } + + sync::mutex_unlock(backing->lock); + + vma* node = alloc_vma(start, end, prot, VMA_FLAG_SHARED); + if (!node) { + unmap_pages_only(mm_ctx, start, end); + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_NO_MEM; + } + + backing->add_ref(); + node->shmem_backing = rc::strong_ref::adopt(backing); + node->backing_offset = offset; + + if (!vma_insert_locked(mm_ctx, node)) { + unmap_pages_only(mm_ctx, start, end); + free_vma(node); + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_EXISTS; + } + + coalesce_all_locked(mm_ctx); + sync::mutex_unlock(mm_ctx->lock); + + *out_addr = start; + return MM_CTX_OK; +} + +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE int32_t mm_context_map_device( + mm_context* mm_ctx, + pmm::phys_addr_t phys_base, + size_t length, + uint32_t prot, + uint32_t cache_type, + uint32_t map_flags, + uintptr_t addr, + uintptr_t* out_addr +) { + if (!mm_ctx || !out_addr) { + return MM_CTX_ERR_INVALID_ARG; + } + if ((prot & ~MM_PROT_MASK) != 0) { + return MM_CTX_ERR_INVALID_ARG; + } + if (!is_page_aligned(phys_base)) { + return MM_CTX_ERR_INVALID_ARG; + } + + size_t aligned_len = pmm::page_align_up(length); + if (aligned_len == 0) { + return MM_CTX_ERR_INVALID_ARG; + } + + const bool fixed = (map_flags & (MM_MAP_FIXED | MM_MAP_FIXED_NOREPLACE)) != 0; + const bool no_replace = (map_flags & MM_MAP_FIXED_NOREPLACE) != 0; + + uintptr_t start = 0; + uintptr_t end = 0; + + sync::mutex_lock(mm_ctx->lock); + + if (fixed) { + if (!is_page_aligned(addr)) { + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_INVALID_ARG; + } + start = addr; + if (!range_from_len(start, aligned_len, end)) { + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_INVALID_ARG; + } + if (start < mm_ctx->mmap_base || end > mm_ctx->mmap_end) { + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_NO_VIRT; + } + + if (no_replace && vma_find_overlap_locked(mm_ctx, start, end)) { + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_EXISTS; + } + if (!no_replace) { + int32_t rc = unmap_range_locked(mm_ctx, start, end); + if (rc != MM_CTX_OK) { + sync::mutex_unlock(mm_ctx->lock); + return rc; + } + } + } else { + start = vma_find_gap_topdown_locked(mm_ctx, aligned_len); + if (start == 0) { + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_NO_VIRT; + } + end = start + aligned_len; + } + + paging::page_flags_t page_flags = prot_to_page_flags(prot) | cache_type; + size_t pages = aligned_len / pmm::PAGE_SIZE; + + if (paging::map_pages(start, phys_base, page_flags, pages, mm_ctx->pt_root) != paging::OK) { + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_MAP_FAILED; + } + + vma* node = alloc_vma(start, end, prot, VMA_FLAG_DEVICE); + if (!node) { + unmap_pages_only(mm_ctx, start, end); + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_NO_MEM; + } + + if (!vma_insert_locked(mm_ctx, node)) { + unmap_pages_only(mm_ctx, start, end); + free_vma(node); + sync::mutex_unlock(mm_ctx->lock); + return MM_CTX_ERR_EXISTS; + } + + coalesce_all_locked(mm_ctx); + sync::mutex_unlock(mm_ctx->lock); + + *out_addr = start; + return MM_CTX_OK; +} + +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE size_t mm_context_vma_count(mm_context* mm_ctx) { + if (!mm_ctx) { + return 0; + } + + sync::mutex_lock(mm_ctx->lock); + size_t count = mm_ctx->vmas.size(); + sync::mutex_unlock(mm_ctx->lock); + return count; +} + } // namespace mm diff --git a/kernel/mm/mm.h b/kernel/mm/mm.h index 039d4ebe..2f64a222 100644 --- a/kernel/mm/mm.h +++ b/kernel/mm/mm.h @@ -1,13 +1,32 @@ #ifndef STELLUX_MM_MM_H #define STELLUX_MM_MM_H -#include "common/types.h" +#include "mm/vma.h" namespace mm { constexpr int32_t OK = 0; constexpr int32_t ERR = -1; +// Page-fault classification, arch-translated by the trap handlers +constexpr uint64_t PF_FLAG_PRESENT = (1u << 0); // page was present, so must be a protection violation +constexpr uint64_t PF_FLAG_WRITE = (1u << 1); // write access violation +constexpr uint64_t PF_FLAG_INSTRUCTION = (1u << 2); // instruction fetch (NX violation) + +struct mm_context final : rc::ref_counted { + pmm::phys_addr_t pt_root; + uintptr_t mmap_base; + uintptr_t mmap_end; + sync::mutex lock; + vma_tree vmas; + + /** + * @brief Destroy mm_context and reclaim all mapped resources. + * @note Privilege: **required** + */ + __PRIVILEGED_CODE static void ref_destroy(mm_context* self); +}; + /** * @brief Initialize the memory management subsystem. * Calls PMM, VA layout, KVA, and VMM init in order. @@ -17,6 +36,146 @@ constexpr int32_t ERR = -1; */ __PRIVILEGED_CODE int32_t init(); +/** + * @brief Attempt to resolve a userland page fault via on-demand paging. + * @param mm_ctx Address-space context of the faulting task. + * @param fault_address Linear address that triggered the fault + * (x86 CR2, aarch64 FAR_EL1). + * @param pf_flags Generic page fault flags describing the fault. + * @return true if the fault was resolved and the instruction may safely retry, + * false if the access was invalid. + * @note Privilege: **required** + */ +__PRIVILEGED_CODE bool handle_user_pf( + mm_context* mm_ctx, + uintptr_t fault_address, + uint64_t pf_flags +); + +/** + * @brief Create a user address-space context with a new user page-table root. + * @return New mm_context on success, nullptr on failure. + * @note Privilege: **required** + */ +[[nodiscard]] __PRIVILEGED_CODE mm_context* mm_context_create(); + +/** + * @brief Increment mm_context reference count. + * @note Privilege: **required** + */ +__PRIVILEGED_CODE void mm_context_add_ref(mm_context* mm_ctx); + +/** + * @brief Decrement mm_context reference count and destroy on last reference. + * @note Privilege: **required** + */ +__PRIVILEGED_CODE void mm_context_release(mm_context* mm_ctx); + +/** + * @brief Track an already-mapped user range as a VMA. + * Does not map physical pages. + * @note Privilege: **required** + */ +__PRIVILEGED_CODE int32_t mm_context_add_vma( + mm_context* mm_ctx, + uintptr_t start, + size_t length, + uint32_t prot, + uint32_t vma_flags +); + +/** + * @brief Map anonymous pages into a user mm_context and track as VMA. + * Supports fixed and non-fixed allocation modes. + * @note Privilege: **required** + */ +__PRIVILEGED_CODE int32_t mm_context_map_anonymous( + mm_context* mm_ctx, + uintptr_t addr, + size_t length, + uint32_t prot, + uint32_t map_flags, + uintptr_t* out_addr +); + +/** + * @brief Unmap [addr, addr+length) from a user mm_context. + * Idempotent when the range is already unmapped. + * @note Privilege: **required** + */ +__PRIVILEGED_CODE int32_t mm_context_unmap( + mm_context* mm_ctx, + uintptr_t addr, + size_t length +); + +/** + * @brief Change protection of an existing mapped range. + * Returns ERR_NOT_MAPPED when any part of the range is unmapped. + * @note Privilege: **required** + */ +__PRIVILEGED_CODE int32_t mm_context_mprotect( + mm_context* mm_ctx, + uintptr_t addr, + size_t length, + uint32_t prot +); + +/** + * @brief Map a shmem backing into a user mm_context with MAP_SHARED semantics. + * Pages come from the backing; they are not allocated per-mapping. + * @param backing Shmem backing. Must have sufficient size for offset+length. + * @param offset Byte offset into backing (must be page-aligned). + * @param length Number of bytes to map (rounded up to page boundary). + * @param prot MM_PROT_READ / MM_PROT_WRITE / MM_PROT_EXEC. + * @param map_flags MM_MAP_SHARED, optionally MM_MAP_FIXED / MM_MAP_FIXED_NOREPLACE. + * @param addr Hint or fixed address. + * @param out_addr Receives the mapped virtual address. + * @return MM_CTX_OK on success, error code on failure. + * @note Privilege: **required** + */ +__PRIVILEGED_CODE int32_t mm_context_map_shared( + mm_context* mm_ctx, + shmem* backing, + uint64_t offset, + size_t length, + uint32_t prot, + uint32_t map_flags, + uintptr_t addr, + uintptr_t* out_addr +); + +/** + * @brief Map a contiguous physical address range into a user mm_context. + * Pages are not owned by the kernel - they are not freed on unmap. + * Useful for framebuffers, MMIO regions, and other device memory. + * @param phys_base Physical base address (must be page-aligned). + * @param length Number of bytes to map (rounded up to page boundary). + * @param prot MM_PROT_READ / MM_PROT_WRITE / MM_PROT_EXEC. + * @param cache_type Paging memory type (e.g. paging::PAGE_WC, paging::PAGE_DEVICE). + * @param map_flags MM_MAP_SHARED, optionally MM_MAP_FIXED / MM_MAP_FIXED_NOREPLACE. + * @param addr Hint or fixed address. + * @param out_addr Receives the mapped virtual address. + * @return MM_CTX_OK on success, error code on failure. + * @note Privilege: **required** + */ +__PRIVILEGED_CODE int32_t mm_context_map_device( + mm_context* mm_ctx, + pmm::phys_addr_t phys_base, + size_t length, + uint32_t prot, + uint32_t cache_type, + uint32_t map_flags, + uintptr_t addr, + uintptr_t* out_addr +); + +/** + * @brief Return current VMA count. + * @note Privilege: **required** + */ +[[nodiscard]] __PRIVILEGED_CODE size_t mm_context_vma_count(mm_context* mm_ctx); + } // namespace mm #endif // STELLUX_MM_MM_H diff --git a/kernel/mm/uaccess.cpp b/kernel/mm/uaccess.cpp index 5565f009..6e5723fd 100644 --- a/kernel/mm/uaccess.cpp +++ b/kernel/mm/uaccess.cpp @@ -1,6 +1,7 @@ #include "mm/uaccess.h" #include "mm/pmm.h" #include "mm/vma.h" +#include "mm/mm.h" #include "sched/sched.h" #include "sched/task.h" #include "sync/mutex.h" @@ -64,6 +65,22 @@ __PRIVILEGED_CODE int32_t validate_user_range( } sync::mutex_unlock(mm_ctx->lock); + + // Pre-fault any lazy pages in the validated range so that the kernel-mode + // memcpy in copy_from_user/copy_to_user doesn't fault on a not-present PTE. + uintptr_t end_page = end & ~(pmm::PAGE_SIZE - 1); + for (uintptr_t page = start & ~(pmm::PAGE_SIZE - 1); + page <= end_page; + page += pmm::PAGE_SIZE) { + if (paging::get_physical(page, mm_ctx->pt_root) != 0) { + continue; + } + + if (!handle_user_pf(mm_ctx, page, 0)) { + return ERR_FAULT; + } + } + return OK; } diff --git a/kernel/mm/vma.cpp b/kernel/mm/vma.cpp index bbe54430..99d8501d 100644 --- a/kernel/mm/vma.cpp +++ b/kernel/mm/vma.cpp @@ -1,6 +1,7 @@ #include "mm/vma.h" #include "common/string.h" +#include "mm/mm.h" #include "mm/heap.h" #include "mm/paging.h" #include "mm/pmm.h" @@ -8,36 +9,28 @@ namespace mm { -namespace { - -constexpr uint32_t MM_MAP_ALLOWED_FLAGS = - MM_MAP_SHARED | MM_MAP_PRIVATE | MM_MAP_ANONYMOUS | MM_MAP_FIXED | - MM_MAP_FIXED_NOREPLACE | MM_MAP_STACK; - -inline bool is_page_aligned(uintptr_t value) { - return (value & (pmm::PAGE_SIZE - 1)) == 0; +inline bool ranges_overlap(uintptr_t a_start, uintptr_t a_end, + uintptr_t b_start, uintptr_t b_end) { + return a_start < b_end && b_start < a_end; } -inline bool range_from_len(uintptr_t start, size_t length, uintptr_t& end_out) { - if (length == 0) { +inline bool vma_can_merge(const vma& left, const vma& right) { + if (left.end != right.start || left.prot != right.prot || + left.flags != right.flags) { return false; } - - uintptr_t end = start + length; - if (end < start) { - return false; + if ((left.flags & VMA_FLAG_SHARED) || (right.flags & VMA_FLAG_SHARED)) { + if (left.shmem_backing.ptr() != right.shmem_backing.ptr()) { + return false; + } + if (left.backing_offset + (left.end - left.start) != right.backing_offset) { + return false; + } } - - end_out = end; return true; } -inline bool ranges_overlap(uintptr_t a_start, uintptr_t a_end, - uintptr_t b_start, uintptr_t b_end) { - return a_start < b_end && b_start < a_end; -} - -inline paging::page_flags_t prot_to_page_flags(uint32_t prot) { +paging::page_flags_t prot_to_page_flags(uint32_t prot) { paging::page_flags_t flags = 0; if (prot != 0) { flags |= paging::PAGE_USER; @@ -54,23 +47,7 @@ inline paging::page_flags_t prot_to_page_flags(uint32_t prot) { return flags; } -inline bool vma_can_merge(const vma& left, const vma& right) { - if (left.end != right.start || left.prot != right.prot || - left.flags != right.flags) { - return false; - } - if ((left.flags & VMA_FLAG_SHARED) || (right.flags & VMA_FLAG_SHARED)) { - if (left.shmem_backing.ptr() != right.shmem_backing.ptr()) { - return false; - } - if (left.backing_offset + (left.end - left.start) != right.backing_offset) { - return false; - } - } - return true; -} - -vma* alloc_vma(uintptr_t start, uintptr_t end, uint32_t prot, uint32_t flags) { +__PRIVILEGED_CODE vma* alloc_vma(uintptr_t start, uintptr_t end, uint32_t prot, uint32_t flags) { vma* node = heap::kalloc_new(); if (!node) { return nullptr; @@ -85,214 +62,12 @@ vma* alloc_vma(uintptr_t start, uintptr_t end, uint32_t prot, uint32_t flags) { return node; } -void free_vma(vma* node) { +__PRIVILEGED_CODE void free_vma(vma* node) { if (node) { heap::kfree_delete(node); } } -__PRIVILEGED_CODE void unmap_and_free_pages(mm_context* mm_ctx, uintptr_t start, uintptr_t end) { - for (uintptr_t vaddr = start; vaddr < end; vaddr += pmm::PAGE_SIZE) { - if (!paging::is_mapped(vaddr, mm_ctx->pt_root)) { - continue; - } - - pmm::phys_addr_t phys = paging::get_physical(vaddr, mm_ctx->pt_root); - paging::unmap_page(vaddr, mm_ctx->pt_root); - if (phys != 0) { - pmm::free_page(phys); - } - } -} - -__PRIVILEGED_CODE void unmap_pages_only(mm_context* mm_ctx, uintptr_t start, uintptr_t end) { - for (uintptr_t vaddr = start; vaddr < end; vaddr += pmm::PAGE_SIZE) { - if (!paging::is_mapped(vaddr, mm_ctx->pt_root)) { - continue; - } - paging::unmap_page(vaddr, mm_ctx->pt_root); - } -} - -__PRIVILEGED_CODE void rollback_new_pages(mm_context* mm_ctx, uintptr_t start, uintptr_t mapped_end) { - unmap_and_free_pages(mm_ctx, start, mapped_end); -} - -void coalesce_all_locked(mm_context* mm_ctx) { - vma* cur = mm_ctx->vmas.min(); - while (cur) { - vma* next = mm_ctx->vmas.next(*cur); - if (next && vma_can_merge(*cur, *next)) { - cur->end = next->end; - mm_ctx->vmas.remove(*next); - free_vma(next); - continue; - } - cur = next; - } -} - -vma* split_vma_locked(mm_context* mm_ctx, vma* node, uintptr_t split_addr) { - if (!node) { - return nullptr; - } - if (split_addr <= node->start || split_addr >= node->end) { - return nullptr; - } - - vma* right = alloc_vma(split_addr, node->end, node->prot, node->flags); - if (!right) { - return nullptr; - } - - right->shmem_backing = node->shmem_backing; - right->backing_offset = node->backing_offset + (split_addr - node->start); - - uintptr_t old_end = node->end; - node->end = split_addr; - if (!vma_insert_locked(mm_ctx, right)) { - node->end = old_end; - free_vma(right); - return nullptr; - } - - return right; -} - -__PRIVILEGED_CODE int32_t unmap_range_locked(mm_context* mm_ctx, uintptr_t start, uintptr_t end) { - for (;;) { - vma* overlap = vma_find_overlap_locked(mm_ctx, start, end); - if (!overlap || overlap->start >= end) { - break; - } - - if (start > overlap->start) { - overlap = split_vma_locked(mm_ctx, overlap, start); - if (!overlap) { - return MM_CTX_ERR_NO_MEM; - } - } - - if (end < overlap->end) { - if (!split_vma_locked(mm_ctx, overlap, end)) { - return MM_CTX_ERR_NO_MEM; - } - } - - if (overlap->flags & (VMA_FLAG_SHARED | VMA_FLAG_DEVICE)) { - unmap_pages_only(mm_ctx, overlap->start, overlap->end); - } else { - unmap_and_free_pages(mm_ctx, overlap->start, overlap->end); - } - mm_ctx->vmas.remove(*overlap); - free_vma(overlap); - } - - coalesce_all_locked(mm_ctx); - return MM_CTX_OK; -} - -bool range_fully_mapped_locked(mm_context* mm_ctx, uintptr_t start, uintptr_t end) { - uintptr_t cur = start; - while (cur < end) { - vma* node = vma_find_locked(mm_ctx, cur); - if (!node || node->start > cur) { - return false; - } - cur = (node->end < end) ? node->end : end; - } - return true; -} - -__PRIVILEGED_CODE int32_t apply_page_protection( - mm_context* mm_ctx, uintptr_t start, uintptr_t end, uint32_t prot -) { - paging::page_flags_t page_flags = prot_to_page_flags(prot); - for (uintptr_t vaddr = start; vaddr < end; vaddr += pmm::PAGE_SIZE) { - if (!paging::is_mapped(vaddr, mm_ctx->pt_root)) { - return MM_CTX_ERR_NOT_MAPPED; - } - if (paging::set_page_flags(vaddr, page_flags, mm_ctx->pt_root) != paging::OK) { - return MM_CTX_ERR_MAP_FAILED; - } - } - return MM_CTX_OK; -} - -} // namespace - -__PRIVILEGED_CODE void mm_context::ref_destroy(mm_context* self) { - if (!self) { - return; - } - - sync::mutex_lock(self->lock); - while (vma* node = self->vmas.min()) { - if (node->flags & (VMA_FLAG_SHARED | VMA_FLAG_DEVICE)) { - unmap_pages_only(self, node->start, node->end); - } else { - unmap_and_free_pages(self, node->start, node->end); - } - self->vmas.remove(*node); - free_vma(node); - } - sync::mutex_unlock(self->lock); - - paging::destroy_user_pt_root(self->pt_root); - self->pt_root = 0; - heap::kfree_delete(self); -} - -/** - * @note Privilege: **required** - */ -__PRIVILEGED_CODE mm_context* mm_context_create() { - mm_context* mm_ctx = heap::kalloc_new(); - if (!mm_ctx) { - return nullptr; - } - - mm_ctx->pt_root = paging::create_user_pt_root(); - if (mm_ctx->pt_root == 0) { - heap::kfree_delete(mm_ctx); - return nullptr; - } - - mm_ctx->mmap_base = MMAP_BASE_DEFAULT; - mm_ctx->mmap_end = USER_STACK_TOP - - (USER_STACK_PAGES + USER_STACK_GUARD_PAGES) * pmm::PAGE_SIZE; - if (mm_ctx->mmap_end <= mm_ctx->mmap_base) { - paging::destroy_user_pt_root(mm_ctx->pt_root); - heap::kfree_delete(mm_ctx); - return nullptr; - } - - mm_ctx->lock.init(); - return mm_ctx; -} - -/** - * @note Privilege: **required** - */ -__PRIVILEGED_CODE void mm_context_add_ref(mm_context* mm_ctx) { - if (mm_ctx) { - mm_ctx->add_ref(); - } -} - -/** - * @note Privilege: **required** - */ -__PRIVILEGED_CODE void mm_context_release(mm_context* mm_ctx) { - if (!mm_ctx) { - return; - } - - if (mm_ctx->release()) { - mm_context::ref_destroy(mm_ctx); - } -} - /** * @note Privilege: **required** */ @@ -425,513 +200,132 @@ __PRIVILEGED_CODE uintptr_t vma_find_gap_topdown_locked(mm_context* mm_ctx, size return 0; } -/** - * @note Privilege: **required** - */ -__PRIVILEGED_CODE int32_t mm_context_add_vma( - mm_context* mm_ctx, - uintptr_t start, - size_t length, - uint32_t prot, - uint32_t vma_flags -) { - if (!mm_ctx || (prot & ~MM_PROT_MASK) != 0) { - return MM_CTX_ERR_INVALID_ARG; - } - if (!is_page_aligned(start)) { - return MM_CTX_ERR_INVALID_ARG; - } - - size_t aligned_len = pmm::page_align_up(length); - uintptr_t end = 0; - if (!range_from_len(start, aligned_len, end)) { - return MM_CTX_ERR_INVALID_ARG; - } - - sync::mutex_lock(mm_ctx->lock); - - vma* node = alloc_vma(start, end, prot, vma_flags); - if (!node) { - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_NO_MEM; - } - - if (!vma_insert_locked(mm_ctx, node)) { - free_vma(node); - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_EXISTS; - } - - coalesce_all_locked(mm_ctx); - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_OK; -} - -/** - * @note Privilege: **required** - */ -__PRIVILEGED_CODE int32_t mm_context_map_anonymous( - mm_context* mm_ctx, - uintptr_t addr, - size_t length, - uint32_t prot, - uint32_t map_flags, - uintptr_t* out_addr -) { - if (!mm_ctx || !out_addr) { - return MM_CTX_ERR_INVALID_ARG; - } - if ((prot & ~MM_PROT_MASK) != 0) { - return MM_CTX_ERR_INVALID_ARG; - } - if ((map_flags & ~MM_MAP_ALLOWED_FLAGS) != 0) { - return MM_CTX_ERR_INVALID_ARG; - } - if (!(map_flags & MM_MAP_PRIVATE) || !(map_flags & MM_MAP_ANONYMOUS)) { - return MM_CTX_ERR_INVALID_ARG; - } - - size_t aligned_len = pmm::page_align_up(length); - if (aligned_len == 0) { - return MM_CTX_ERR_INVALID_ARG; - } - - const bool fixed = (map_flags & (MM_MAP_FIXED | MM_MAP_FIXED_NOREPLACE)) != 0; - const bool no_replace = (map_flags & MM_MAP_FIXED_NOREPLACE) != 0; - const bool stack_map = (map_flags & MM_MAP_STACK) != 0; - - uintptr_t start = 0; - uintptr_t end = 0; - - if (fixed) { - if (!is_page_aligned(addr)) { - return MM_CTX_ERR_INVALID_ARG; - } - start = addr; - if (!range_from_len(start, aligned_len, end)) { - return MM_CTX_ERR_INVALID_ARG; - } - - if (!stack_map && (start < mm_ctx->mmap_base || end > mm_ctx->mmap_end)) { - return MM_CTX_ERR_NO_VIRT; - } - } - - sync::mutex_lock(mm_ctx->lock); - - if (fixed) { - if (no_replace && vma_find_overlap_locked(mm_ctx, start, end)) { - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_EXISTS; - } - - if (!no_replace) { - int32_t rc = unmap_range_locked(mm_ctx, start, end); - if (rc != MM_CTX_OK) { - sync::mutex_unlock(mm_ctx->lock); - return rc; - } - } - } else { - start = vma_find_gap_topdown_locked(mm_ctx, aligned_len); - if (start == 0) { - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_NO_VIRT; - } - end = start + aligned_len; - } - - paging::page_flags_t page_flags = prot_to_page_flags(prot); - uintptr_t mapped_end = start; +__PRIVILEGED_CODE void unmap_and_free_pages(mm_context* mm_ctx, uintptr_t start, uintptr_t end) { for (uintptr_t vaddr = start; vaddr < end; vaddr += pmm::PAGE_SIZE) { - pmm::phys_addr_t phys = pmm::alloc_page(); - if (phys == 0) { - rollback_new_pages(mm_ctx, start, mapped_end); - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_NO_MEM; + if (!paging::is_mapped(vaddr, mm_ctx->pt_root)) { + continue; } - string::memset(paging::phys_to_virt(phys), 0, pmm::PAGE_SIZE); - if (paging::map_page(vaddr, phys, page_flags, mm_ctx->pt_root) != paging::OK) { + pmm::phys_addr_t phys = paging::get_physical(vaddr, mm_ctx->pt_root); + paging::unmap_page(vaddr, mm_ctx->pt_root); + if (phys != 0) { pmm::free_page(phys); - rollback_new_pages(mm_ctx, start, mapped_end); - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_MAP_FAILED; } - mapped_end = vaddr + pmm::PAGE_SIZE; } - - uint32_t vma_flags = VMA_FLAG_PRIVATE | VMA_FLAG_ANONYMOUS; - if (stack_map) { - vma_flags |= VMA_FLAG_STACK; - } - - vma* node = alloc_vma(start, end, prot, vma_flags); - if (!node) { - rollback_new_pages(mm_ctx, start, end); - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_NO_MEM; - } - - if (!vma_insert_locked(mm_ctx, node)) { - free_vma(node); - rollback_new_pages(mm_ctx, start, end); - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_EXISTS; - } - - coalesce_all_locked(mm_ctx); - sync::mutex_unlock(mm_ctx->lock); - - *out_addr = start; - return MM_CTX_OK; } -/** - * @note Privilege: **required** - */ -__PRIVILEGED_CODE int32_t mm_context_unmap( - mm_context* mm_ctx, - uintptr_t addr, - size_t length -) { - if (!mm_ctx || !is_page_aligned(addr) || length == 0) { - return MM_CTX_ERR_INVALID_ARG; - } - - size_t aligned_len = pmm::page_align_up(length); - uintptr_t end = 0; - if (!range_from_len(addr, aligned_len, end)) { - return MM_CTX_ERR_INVALID_ARG; - } - - sync::mutex_lock(mm_ctx->lock); - int32_t rc = unmap_range_locked(mm_ctx, addr, end); - sync::mutex_unlock(mm_ctx->lock); - return rc; -} - -/** - * @note Privilege: **required** - */ -__PRIVILEGED_CODE int32_t mm_context_mprotect( - mm_context* mm_ctx, - uintptr_t addr, - size_t length, - uint32_t prot -) { - if (!mm_ctx || !is_page_aligned(addr) || length == 0) { - return MM_CTX_ERR_INVALID_ARG; - } - if ((prot & ~MM_PROT_MASK) != 0) { - return MM_CTX_ERR_INVALID_ARG; - } - - size_t aligned_len = pmm::page_align_up(length); - uintptr_t end = 0; - if (!range_from_len(addr, aligned_len, end)) { - return MM_CTX_ERR_INVALID_ARG; - } - - sync::mutex_lock(mm_ctx->lock); - - if (!range_fully_mapped_locked(mm_ctx, addr, end)) { - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_NOT_MAPPED; - } - - vma* at_start = vma_find_locked(mm_ctx, addr); - if (at_start && at_start->start < addr && addr < at_start->end) { - if (!split_vma_locked(mm_ctx, at_start, addr)) { - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_NO_MEM; - } - } - - vma* at_end = vma_find_locked(mm_ctx, end - 1); - if (at_end && at_end->start < end && end < at_end->end) { - if (!split_vma_locked(mm_ctx, at_end, end)) { - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_NO_MEM; +__PRIVILEGED_CODE void unmap_pages_only(mm_context* mm_ctx, uintptr_t start, uintptr_t end) { + for (uintptr_t vaddr = start; vaddr < end; vaddr += pmm::PAGE_SIZE) { + if (!paging::is_mapped(vaddr, mm_ctx->pt_root)) { + continue; } + paging::unmap_page(vaddr, mm_ctx->pt_root); } +} - vma probe{}; - probe.start = addr; - probe.end = addr; - probe.prot = 0; - probe.flags = 0; - - vma* cur = mm_ctx->vmas.lower_bound(probe); - vma* pred = cur ? mm_ctx->vmas.prev(*cur) : mm_ctx->vmas.max(); - if (pred && pred->end > addr) { - cur = pred; - } +__PRIVILEGED_CODE void rollback_new_pages(mm_context* mm_ctx, uintptr_t start, uintptr_t mapped_end) { + unmap_and_free_pages(mm_ctx, start, mapped_end); +} - while (cur && cur->start < end) { +__PRIVILEGED_CODE void coalesce_all_locked(mm_context* mm_ctx) { + vma* cur = mm_ctx->vmas.min(); + while (cur) { vma* next = mm_ctx->vmas.next(*cur); - uintptr_t range_start = (cur->start > addr) ? cur->start : addr; - uintptr_t range_end = (cur->end < end) ? cur->end : end; - - int32_t rc = apply_page_protection(mm_ctx, range_start, range_end, prot); - if (rc != MM_CTX_OK) { - sync::mutex_unlock(mm_ctx->lock); - return rc; + if (next && vma_can_merge(*cur, *next)) { + cur->end = next->end; + mm_ctx->vmas.remove(*next); + free_vma(next); + continue; } - - cur->prot = prot; cur = next; } - - coalesce_all_locked(mm_ctx); - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_OK; } -/** - * @note Privilege: **required** - */ -__PRIVILEGED_CODE int32_t mm_context_map_shared( - mm_context* mm_ctx, - shmem* backing, - uint64_t offset, - size_t length, - uint32_t prot, - uint32_t map_flags, - uintptr_t addr, - uintptr_t* out_addr -) { - if (!mm_ctx || !backing || !out_addr) { - return MM_CTX_ERR_INVALID_ARG; - } - if ((prot & ~MM_PROT_MASK) != 0) { - return MM_CTX_ERR_INVALID_ARG; +__PRIVILEGED_CODE vma* split_vma_locked(mm_context* mm_ctx, vma* node, uintptr_t split_addr) { + if (!node) { + return nullptr; } - if (!(map_flags & MM_MAP_SHARED)) { - return MM_CTX_ERR_INVALID_ARG; + if (split_addr <= node->start || split_addr >= node->end) { + return nullptr; } - size_t aligned_len = pmm::page_align_up(length); - if (aligned_len == 0) { - return MM_CTX_ERR_INVALID_ARG; - } - if (offset % pmm::PAGE_SIZE != 0) { - return MM_CTX_ERR_INVALID_ARG; + vma* right = alloc_vma(split_addr, node->end, node->prot, node->flags); + if (!right) { + return nullptr; } - const bool fixed = (map_flags & (MM_MAP_FIXED | MM_MAP_FIXED_NOREPLACE)) != 0; - const bool no_replace = (map_flags & MM_MAP_FIXED_NOREPLACE) != 0; + right->shmem_backing = node->shmem_backing; + right->backing_offset = node->backing_offset + (split_addr - node->start); - uintptr_t start = 0; - uintptr_t end = 0; + uintptr_t old_end = node->end; + node->end = split_addr; + if (!vma_insert_locked(mm_ctx, right)) { + node->end = old_end; + free_vma(right); + return nullptr; + } - sync::mutex_lock(mm_ctx->lock); + return right; +} - if (fixed) { - if (!is_page_aligned(addr)) { - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_INVALID_ARG; - } - start = addr; - if (!range_from_len(start, aligned_len, end)) { - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_INVALID_ARG; - } - if (start < mm_ctx->mmap_base || end > mm_ctx->mmap_end) { - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_NO_VIRT; +__PRIVILEGED_CODE int32_t unmap_range_locked(mm_context* mm_ctx, uintptr_t start, uintptr_t end) { + for (;;) { + vma* overlap = vma_find_overlap_locked(mm_ctx, start, end); + if (!overlap || overlap->start >= end) { + break; } - if (no_replace && vma_find_overlap_locked(mm_ctx, start, end)) { - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_EXISTS; - } - if (!no_replace) { - int32_t rc = unmap_range_locked(mm_ctx, start, end); - if (rc != MM_CTX_OK) { - sync::mutex_unlock(mm_ctx->lock); - return rc; + if (start > overlap->start) { + overlap = split_vma_locked(mm_ctx, overlap, start); + if (!overlap) { + return MM_CTX_ERR_NO_MEM; } } - } else { - start = vma_find_gap_topdown_locked(mm_ctx, aligned_len); - if (start == 0) { - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_NO_VIRT; - } - end = start + aligned_len; - } - - sync::mutex_lock(backing->lock); - size_t backed_size = backing->m_page_count * pmm::PAGE_SIZE; - if (aligned_len > backed_size || offset > backed_size - aligned_len) { - sync::mutex_unlock(backing->lock); - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_INVALID_ARG; - } - - paging::page_flags_t page_flags = prot_to_page_flags(prot); - size_t pages = aligned_len / pmm::PAGE_SIZE; - size_t page_offset = static_cast(offset / pmm::PAGE_SIZE); - - for (size_t i = 0; i < pages; i++) { - pmm::phys_addr_t phys = shmem_get_page_locked(backing, page_offset + i); - if (phys == 0) { - unmap_pages_only(mm_ctx, start, start + i * pmm::PAGE_SIZE); - sync::mutex_unlock(backing->lock); - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_NO_MEM; + if (end < overlap->end) { + if (!split_vma_locked(mm_ctx, overlap, end)) { + return MM_CTX_ERR_NO_MEM; + } } - uintptr_t vaddr = start + i * pmm::PAGE_SIZE; - if (paging::map_page(vaddr, phys, page_flags, mm_ctx->pt_root) != paging::OK) { - unmap_pages_only(mm_ctx, start, vaddr); - sync::mutex_unlock(backing->lock); - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_MAP_FAILED; + if (overlap->flags & (VMA_FLAG_SHARED | VMA_FLAG_DEVICE)) { + unmap_pages_only(mm_ctx, overlap->start, overlap->end); + } else { + unmap_and_free_pages(mm_ctx, overlap->start, overlap->end); } - } - - sync::mutex_unlock(backing->lock); - - vma* node = alloc_vma(start, end, prot, VMA_FLAG_SHARED); - if (!node) { - unmap_pages_only(mm_ctx, start, end); - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_NO_MEM; - } - - backing->add_ref(); - node->shmem_backing = rc::strong_ref::adopt(backing); - node->backing_offset = offset; - - if (!vma_insert_locked(mm_ctx, node)) { - unmap_pages_only(mm_ctx, start, end); - free_vma(node); - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_EXISTS; + mm_ctx->vmas.remove(*overlap); + free_vma(overlap); } coalesce_all_locked(mm_ctx); - sync::mutex_unlock(mm_ctx->lock); - - *out_addr = start; return MM_CTX_OK; } -/** - * @note Privilege: **required** - */ -__PRIVILEGED_CODE int32_t mm_context_map_device( - mm_context* mm_ctx, - pmm::phys_addr_t phys_base, - size_t length, - uint32_t prot, - uint32_t cache_type, - uint32_t map_flags, - uintptr_t addr, - uintptr_t* out_addr -) { - if (!mm_ctx || !out_addr) { - return MM_CTX_ERR_INVALID_ARG; - } - if ((prot & ~MM_PROT_MASK) != 0) { - return MM_CTX_ERR_INVALID_ARG; - } - if (!is_page_aligned(phys_base)) { - return MM_CTX_ERR_INVALID_ARG; - } - - size_t aligned_len = pmm::page_align_up(length); - if (aligned_len == 0) { - return MM_CTX_ERR_INVALID_ARG; - } - - const bool fixed = (map_flags & (MM_MAP_FIXED | MM_MAP_FIXED_NOREPLACE)) != 0; - const bool no_replace = (map_flags & MM_MAP_FIXED_NOREPLACE) != 0; - - uintptr_t start = 0; - uintptr_t end = 0; - - sync::mutex_lock(mm_ctx->lock); - - if (fixed) { - if (!is_page_aligned(addr)) { - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_INVALID_ARG; - } - start = addr; - if (!range_from_len(start, aligned_len, end)) { - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_INVALID_ARG; - } - if (start < mm_ctx->mmap_base || end > mm_ctx->mmap_end) { - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_NO_VIRT; +bool range_fully_mapped_locked(mm_context* mm_ctx, uintptr_t start, uintptr_t end) { + uintptr_t cur = start; + while (cur < end) { + vma* node = vma_find_locked(mm_ctx, cur); + if (!node || node->start > cur) { + return false; } + cur = (node->end < end) ? node->end : end; + } + return true; +} - if (no_replace && vma_find_overlap_locked(mm_ctx, start, end)) { - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_EXISTS; - } - if (!no_replace) { - int32_t rc = unmap_range_locked(mm_ctx, start, end); - if (rc != MM_CTX_OK) { - sync::mutex_unlock(mm_ctx->lock); - return rc; - } +__PRIVILEGED_CODE int32_t apply_page_protection( + mm_context* mm_ctx, uintptr_t start, uintptr_t end, uint32_t prot +) { + paging::page_flags_t page_flags = prot_to_page_flags(prot); + for (uintptr_t vaddr = start; vaddr < end; vaddr += pmm::PAGE_SIZE) { + if (!paging::is_mapped(vaddr, mm_ctx->pt_root)) { + return MM_CTX_ERR_NOT_MAPPED; } - } else { - start = vma_find_gap_topdown_locked(mm_ctx, aligned_len); - if (start == 0) { - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_NO_VIRT; + if (paging::set_page_flags(vaddr, page_flags, mm_ctx->pt_root) != paging::OK) { + return MM_CTX_ERR_MAP_FAILED; } - end = start + aligned_len; - } - - paging::page_flags_t page_flags = prot_to_page_flags(prot) | cache_type; - size_t pages = aligned_len / pmm::PAGE_SIZE; - - if (paging::map_pages(start, phys_base, page_flags, pages, mm_ctx->pt_root) != paging::OK) { - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_MAP_FAILED; - } - - vma* node = alloc_vma(start, end, prot, VMA_FLAG_DEVICE); - if (!node) { - unmap_pages_only(mm_ctx, start, end); - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_NO_MEM; } - - if (!vma_insert_locked(mm_ctx, node)) { - unmap_pages_only(mm_ctx, start, end); - free_vma(node); - sync::mutex_unlock(mm_ctx->lock); - return MM_CTX_ERR_EXISTS; - } - - coalesce_all_locked(mm_ctx); - sync::mutex_unlock(mm_ctx->lock); - - *out_addr = start; return MM_CTX_OK; } -/** - * @note Privilege: **required** - */ -__PRIVILEGED_CODE size_t mm_context_vma_count(mm_context* mm_ctx) { - if (!mm_ctx) { - return 0; - } - - sync::mutex_lock(mm_ctx->lock); - size_t count = mm_ctx->vmas.size(); - sync::mutex_unlock(mm_ctx->lock); - return count; -} - } // namespace mm diff --git a/kernel/mm/vma.h b/kernel/mm/vma.h index 262b7702..52ecc725 100644 --- a/kernel/mm/vma.h +++ b/kernel/mm/vma.h @@ -3,6 +3,7 @@ #include "common/types.h" #include "common/rb_tree.h" +#include "mm/paging.h" #include "mm/pmm_types.h" #include "mm/shmem.h" #include "sync/mutex.h" @@ -30,6 +31,11 @@ constexpr uint32_t MM_MAP_FIXED = 0x00000010u; constexpr uint32_t MM_MAP_ANONYMOUS = 0x00000020u; constexpr uint32_t MM_MAP_STACK = 0x00020000u; constexpr uint32_t MM_MAP_FIXED_NOREPLACE = 0x00100000u; +constexpr uint32_t MM_MAP_LAZY = 0x00200000u; + +constexpr uint32_t MM_MAP_ALLOWED_FLAGS = + MM_MAP_SHARED | MM_MAP_PRIVATE | MM_MAP_ANONYMOUS | MM_MAP_FIXED | + MM_MAP_FIXED_NOREPLACE | MM_MAP_STACK | MM_MAP_LAZY; constexpr uint32_t VMA_FLAG_PRIVATE = (1u << 0); constexpr uint32_t VMA_FLAG_ANONYMOUS = (1u << 1); @@ -40,9 +46,12 @@ constexpr uint32_t VMA_FLAG_DEVICE = (1u << 5); constexpr uintptr_t MMAP_BASE_DEFAULT = 0x00000080000000ULL; constexpr uintptr_t USER_STACK_TOP = 0x00007FFFFFF00000ULL; -constexpr size_t USER_STACK_PAGES = 8; // 32 KiB +constexpr size_t USER_STACK_PAGES = 8; // 32 KiB +constexpr size_t USER_STACK_MAX_PAGES = 2048; // 8 MiB max stack space via lazy on-demand paging constexpr size_t USER_STACK_GUARD_PAGES = 1; +struct mm_context; + struct vma { uintptr_t start; uintptr_t end; @@ -64,38 +73,50 @@ struct vma_addr_cmp { using vma_tree = rbt::tree; -struct mm_context final : rc::ref_counted { - pmm::phys_addr_t pt_root; - uintptr_t mmap_base; - uintptr_t mmap_end; - sync::mutex lock; - vma_tree vmas; - - /** - * @brief Destroy mm_context and reclaim all mapped resources. - * @note Privilege: **required** - */ - __PRIVILEGED_CODE static void ref_destroy(mm_context* self); -}; +/** + * @brief Check whether an address is aligned to the system page size. + */ +[[nodiscard]] inline bool is_page_aligned(uintptr_t value) { + return (value & (pmm::PAGE_SIZE - 1)) == 0; +} /** - * @brief Create a user address-space context with a new user page-table root. - * @return New mm_context on success, nullptr on failure. - * @note Privilege: **required** + * @brief Compute the inclusive-end address of a [start, start+length) range. + * Rejects zero length and any overflow. + * @return true and writes end into end_out on success, false otherwise. */ -[[nodiscard]] __PRIVILEGED_CODE mm_context* mm_context_create(); +[[nodiscard]] inline bool range_from_len(uintptr_t start, size_t length, uintptr_t& end_out) { + if (length == 0) { + return false; + } + uintptr_t end = start + length; + if (end < start) { + return false; + } + end_out = end; + return true; +} + +/** + * @brief Convert MM_PROT_* protection bits into architecture-specific page-table flags. + */ +[[nodiscard]] paging::page_flags_t prot_to_page_flags(uint32_t prot); /** - * @brief Increment mm_context reference count. + * @brief Allocate a VMA node and initialize its fields. + * Does not insert into any tree. + * @return New VMA on success, nullptr on allocation failure. * @note Privilege: **required** */ -__PRIVILEGED_CODE void mm_context_add_ref(mm_context* mm_ctx); +[[nodiscard]] __PRIVILEGED_CODE vma* alloc_vma( + uintptr_t start, uintptr_t end, uint32_t prot, uint32_t flags); /** - * @brief Decrement mm_context reference count and destroy on last reference. + * @brief Free a VMA node previously returned by alloc_vma. + * No-op on nullptr. Does not detach from any tree. * @note Privilege: **required** */ -__PRIVILEGED_CODE void mm_context_release(mm_context* mm_ctx); +__PRIVILEGED_CODE void free_vma(vma* node); /** * @brief Find VMA containing address. @@ -137,109 +158,75 @@ __PRIVILEGED_CODE void vma_remove_locked(mm_context* mm_ctx, vma& node); mm_context* mm_ctx, size_t length); /** - * @brief Track an already-mapped user range as a VMA. - * Does not map physical pages. + * @brief Unmap [start, end) from a user mm_context, freeing physical pages. + * Iterates page-by-page; pages that aren't mapped are skipped. + * Used for anonymous/stack mappings the kernel owns. * @note Privilege: **required** */ -__PRIVILEGED_CODE int32_t mm_context_add_vma( - mm_context* mm_ctx, - uintptr_t start, - size_t length, - uint32_t prot, - uint32_t vma_flags -); +__PRIVILEGED_CODE void unmap_and_free_pages( + mm_context* mm_ctx, uintptr_t start, uintptr_t end); /** - * @brief Map anonymous pages into a user mm_context and track as VMA. - * Supports fixed and non-fixed allocation modes. + * @brief Unmap [start, end) from a user mm_context without freeing pages. + * For shared and device mappings whose physical pages are owned elsewhere. * @note Privilege: **required** */ -__PRIVILEGED_CODE int32_t mm_context_map_anonymous( - mm_context* mm_ctx, - uintptr_t addr, - size_t length, - uint32_t prot, - uint32_t map_flags, - uintptr_t* out_addr -); +__PRIVILEGED_CODE void unmap_pages_only( + mm_context* mm_ctx, uintptr_t start, uintptr_t end); /** - * @brief Unmap [addr, addr+length) from a user mm_context. - * Idempotent when the range is already unmapped. + * @brief Roll back a partially-completed eager allocation. + * Equivalent to unmap_and_free_pages over the [start, mapped_end) prefix. + * Used when an mm_context_map_* call needs to undo work after an error. * @note Privilege: **required** */ -__PRIVILEGED_CODE int32_t mm_context_unmap( - mm_context* mm_ctx, - uintptr_t addr, - size_t length -); +__PRIVILEGED_CODE void rollback_new_pages( + mm_context* mm_ctx, uintptr_t start, uintptr_t mapped_end); /** - * @brief Change protection of an existing mapped range. - * Returns ERR_NOT_MAPPED when any part of the range is unmapped. + * @brief Merge adjacent VMAs with identical prot/flags/backing. + * Idempotent. Caller must hold mm_ctx->lock. * @note Privilege: **required** */ -__PRIVILEGED_CODE int32_t mm_context_mprotect( - mm_context* mm_ctx, - uintptr_t addr, - size_t length, - uint32_t prot -); - -/** - * @brief Map a shmem backing into a user mm_context with MAP_SHARED semantics. - * Pages come from the backing; they are not allocated per-mapping. - * @param backing Shmem backing. Must have sufficient size for offset+length. - * @param offset Byte offset into backing (must be page-aligned). - * @param length Number of bytes to map (rounded up to page boundary). - * @param prot MM_PROT_READ / MM_PROT_WRITE / MM_PROT_EXEC. - * @param map_flags MM_MAP_SHARED, optionally MM_MAP_FIXED / MM_MAP_FIXED_NOREPLACE. - * @param addr Hint or fixed address. - * @param out_addr Receives the mapped virtual address. - * @return MM_CTX_OK on success, error code on failure. +__PRIVILEGED_CODE void coalesce_all_locked(mm_context* mm_ctx); + +/** + * @brief Split a VMA at split_addr into two adjacent VMAs. + * The left part keeps the original node; the right part is freshly allocated + * and inserted into the tree. Caller must hold mm_ctx->lock. + * @return The newly-allocated right-hand VMA, or nullptr on allocation failure + * or out-of-range split_addr. * @note Privilege: **required** */ -__PRIVILEGED_CODE int32_t mm_context_map_shared( - mm_context* mm_ctx, - shmem* backing, - uint64_t offset, - size_t length, - uint32_t prot, - uint32_t map_flags, - uintptr_t addr, - uintptr_t* out_addr -); - -/** - * @brief Map a contiguous physical address range into a user mm_context. - * Pages are not owned by the kernel — they are not freed on unmap. - * Useful for framebuffers, MMIO regions, and other device memory. - * @param phys_base Physical base address (must be page-aligned). - * @param length Number of bytes to map (rounded up to page boundary). - * @param prot MM_PROT_READ / MM_PROT_WRITE / MM_PROT_EXEC. - * @param cache_type Paging memory type (e.g. paging::PAGE_WC, paging::PAGE_DEVICE). - * @param map_flags MM_MAP_SHARED, optionally MM_MAP_FIXED / MM_MAP_FIXED_NOREPLACE. - * @param addr Hint or fixed address. - * @param out_addr Receives the mapped virtual address. - * @return MM_CTX_OK on success, error code on failure. +[[nodiscard]] __PRIVILEGED_CODE vma* split_vma_locked( + mm_context* mm_ctx, vma* node, uintptr_t split_addr); + +/** + * @brief Unmap every VMA overlapping [start, end), splitting at the edges. + * Frees pages for owned VMAs and releases backing refs for shared/device VMAs. + * Idempotent over already-unmapped regions. Caller must hold mm_ctx->lock. + * @return MM_CTX_OK on success, MM_CTX_ERR_NO_MEM if an edge split fails. * @note Privilege: **required** */ -__PRIVILEGED_CODE int32_t mm_context_map_device( - mm_context* mm_ctx, - pmm::phys_addr_t phys_base, - size_t length, - uint32_t prot, - uint32_t cache_type, - uint32_t map_flags, - uintptr_t addr, - uintptr_t* out_addr -); +__PRIVILEGED_CODE int32_t unmap_range_locked( + mm_context* mm_ctx, uintptr_t start, uintptr_t end); + +/** + * @brief Check whether every page in [start, end) is covered by a VMA. + * Caller must hold mm_ctx->lock. + */ +[[nodiscard]] bool range_fully_mapped_locked( + mm_context* mm_ctx, uintptr_t start, uintptr_t end); /** - * @brief Return current VMA count. + * @brief Apply new protection bits to the existing PTEs for [start, end). + * Does not change VMA records; caller is responsible for VMA updates. + * @return MM_CTX_OK on success, MM_CTX_ERR_NOT_MAPPED if any page is unmapped, + * MM_CTX_ERR_MAP_FAILED on PTE-update failure. * @note Privilege: **required** */ -[[nodiscard]] __PRIVILEGED_CODE size_t mm_context_vma_count(mm_context* mm_ctx); +__PRIVILEGED_CODE int32_t apply_page_protection( + mm_context* mm_ctx, uintptr_t start, uintptr_t end, uint32_t prot); } // namespace mm diff --git a/kernel/resource/providers/proc_provider.cpp b/kernel/resource/providers/proc_provider.cpp index 1e25795b..5d3040b5 100644 --- a/kernel/resource/providers/proc_provider.cpp +++ b/kernel/resource/providers/proc_provider.cpp @@ -2,6 +2,7 @@ #include "sched/sched.h" #include "sched/task.h" #include "sched/task_registry.h" +#include "mm/mm.h" #include "mm/vma.h" #include "mm/vmm.h" #include "mm/heap.h" diff --git a/kernel/sched/sched.cpp b/kernel/sched/sched.cpp index 31949340..d4abf793 100644 --- a/kernel/sched/sched.cpp +++ b/kernel/sched/sched.cpp @@ -10,6 +10,7 @@ #include "mm/heap.h" #include "mm/vmm.h" #include "mm/kva.h" +#include "mm/mm.h" #include "mm/paging.h" #include "common/logging.h" #include "sync/spinlock.h" @@ -666,20 +667,52 @@ __PRIVILEGED_CODE task* create_user_task( return nullptr; } - uintptr_t user_stack_base = mm::USER_STACK_TOP - mm::USER_STACK_PAGES * pmm::PAGE_SIZE; - uintptr_t mapped_stack_addr = 0; - uint32_t stack_map_flags = mm::MM_MAP_PRIVATE | mm::MM_MAP_ANONYMOUS | + // Stack region layout: a single coalesced MM_MAP_STACK vma spanning + // USER_STACK_MAX_PAGES at the top of user VA. The bottom portion is + // reserved lazily (no eager pages) so userland faults grow it on demand. + // The top USER_STACK_PAGES window is eagerly mapped so the kernel can + // write argv/envp into it (+ performance) before the user task ever runs. + uintptr_t stack_max_base = mm::USER_STACK_TOP - mm::USER_STACK_MAX_PAGES * pmm::PAGE_SIZE; + uintptr_t eager_base = mm::USER_STACK_TOP - mm::USER_STACK_PAGES * pmm::PAGE_SIZE; + size_t lazy_bytes = (mm::USER_STACK_MAX_PAGES - mm::USER_STACK_PAGES) * pmm::PAGE_SIZE; + size_t eager_bytes = mm::USER_STACK_PAGES * pmm::PAGE_SIZE; + size_t total_bytes = mm::USER_STACK_MAX_PAGES * pmm::PAGE_SIZE; + + uint32_t base_stack_flags = mm::MM_MAP_PRIVATE | mm::MM_MAP_ANONYMOUS | mm::MM_MAP_FIXED | mm::MM_MAP_STACK; - int32_t map_rc = mm::mm_context_map_anonymous( + + // Reserve the lower (lazy) portion of the stack VMA - no eager pages. + uintptr_t reserved_addr = 0; + int32_t lazy_rc = mm::mm_context_map_anonymous( + mm_ctx, + stack_max_base, + lazy_bytes, + mm::MM_PROT_READ | mm::MM_PROT_WRITE, + base_stack_flags | mm::MM_MAP_LAZY, + &reserved_addr + ); + if (lazy_rc != mm::MM_CTX_OK) { + log::error("sched: failed to reserve user stack VMA (rc=%d)", lazy_rc); + vmm::free(sys_stack_base); + heap::kfree_delete(t); + return nullptr; + } + + // Eagerly map the top window so argv/envp setup can write into it. + // Coalesce in mm_context_map_anonymous will merge this with the lazy + // reservation into one STACK vma covering [stack_max_base, USER_STACK_TOP). + uintptr_t mapped_stack_addr = 0; + int32_t eager_rc = mm::mm_context_map_anonymous( mm_ctx, - user_stack_base, - mm::USER_STACK_PAGES * pmm::PAGE_SIZE, + eager_base, + eager_bytes, mm::MM_PROT_READ | mm::MM_PROT_WRITE, - stack_map_flags, + base_stack_flags, &mapped_stack_addr ); - if (map_rc != mm::MM_CTX_OK) { - log::error("sched: failed to map user stack VMA (rc=%d)", map_rc); + if (eager_rc != mm::MM_CTX_OK) { + log::error("sched: failed to map user stack window (rc=%d)", eager_rc); + mm::mm_context_unmap(mm_ctx, stack_max_base, lazy_bytes); vmm::free(sys_stack_base); heap::kfree_delete(t); return nullptr; @@ -689,7 +722,7 @@ __PRIVILEGED_CODE task* create_user_task( paging::get_physical(mm::USER_STACK_TOP - pmm::PAGE_SIZE, mm_ctx->pt_root); if (last_stack_page_phys == 0) { log::error("sched: failed to resolve user stack top page"); - mm::mm_context_unmap(mm_ctx, mapped_stack_addr, mm::USER_STACK_PAGES * pmm::PAGE_SIZE); + mm::mm_context_unmap(mm_ctx, stack_max_base, total_bytes); vmm::free(sys_stack_base); heap::kfree_delete(t); return nullptr; @@ -699,7 +732,7 @@ __PRIVILEGED_CODE task* create_user_task( last_stack_page_phys, mm::USER_STACK_TOP, *image, name, argc, argv); if (user_sp == 0) { log::error("sched: user stack setup failed (argv too large?)"); - mm::mm_context_unmap(mm_ctx, mapped_stack_addr, mm::USER_STACK_PAGES * pmm::PAGE_SIZE); + mm::mm_context_unmap(mm_ctx, stack_max_base, total_bytes); vmm::free(sys_stack_base); heap::kfree_delete(t); return nullptr; @@ -836,7 +869,7 @@ __PRIVILEGED_CODE task* create_user_thread( t->exit_code = 0; t->cleanup_stage = TASK_CLEANUP_STAGE_ACTIVE; t->kill_pending = 0; - string::memcpy(t->name, name, string::strnlen(name, TASK_NAME_MAX - 1)); + string::memcpy(t->name, name, string::strnlen(name, TASK_NAME_MAX - 1)); t->name[string::strnlen(name, TASK_NAME_MAX - 1)] = '\0'; t->task_registry_link = {}; diff --git a/kernel/syscall/handlers/sys_mmap.cpp b/kernel/syscall/handlers/sys_mmap.cpp index 6934c3b1..1494c342 100644 --- a/kernel/syscall/handlers/sys_mmap.cpp +++ b/kernel/syscall/handlers/sys_mmap.cpp @@ -1,5 +1,6 @@ #include "syscall/handlers/sys_mmap.h" +#include "mm/mm.h" #include "mm/vma.h" #include "mm/shmem.h" #include "resource/resource.h" diff --git a/kernel/tests/memory/shmem.test.cpp b/kernel/tests/memory/shmem.test.cpp index 2811f9c6..0b335aa4 100644 --- a/kernel/tests/memory/shmem.test.cpp +++ b/kernel/tests/memory/shmem.test.cpp @@ -1,6 +1,7 @@ #define STLX_TEST_TIER TIER_MM_CORE #include "stlx_unit_test.h" +#include "mm/mm.h" #include "mm/shmem.h" #include "mm/vma.h" #include "mm/paging.h" diff --git a/kernel/tests/memory/vma.test.cpp b/kernel/tests/memory/vma.test.cpp index 2e9b4343..17ec9992 100644 --- a/kernel/tests/memory/vma.test.cpp +++ b/kernel/tests/memory/vma.test.cpp @@ -1,6 +1,7 @@ #define STLX_TEST_TIER TIER_MM_CORE #include "stlx_unit_test.h" +#include "mm/mm.h" #include "mm/vma.h" #include "mm/paging.h" #include "mm/pmm.h"