From 0de451beeb601a4fefe6a9931c83d22d80ffa363 Mon Sep 17 00:00:00 2001 From: 0x5459 <0x5459@protonmail.com> Date: Tue, 17 Sep 2024 18:55:52 +0800 Subject: [PATCH 1/5] increase the size of uvm memory allocation From `1<<16` to `1<<32` --- kernel-open/nvidia-uvm/uvm_kvmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel-open/nvidia-uvm/uvm_kvmalloc.c b/kernel-open/nvidia-uvm/uvm_kvmalloc.c index c531bff64c..46fb122276 100644 --- a/kernel-open/nvidia-uvm/uvm_kvmalloc.c +++ b/kernel-open/nvidia-uvm/uvm_kvmalloc.c @@ -257,7 +257,7 @@ static void *alloc_internal(size_t size, bool zero_memory) // Make sure that (sizeof(hdr) + size) is what it should be BUILD_BUG_ON(sizeof(uvm_vmalloc_hdr_t) != offsetof(uvm_vmalloc_hdr_t *, ptr)); - assert(size <= (1 << 16)); + assert(size <= (1 << 32)); if (size <= UVM_KMALLOC_THRESHOLD) { if (zero_memory) return kzalloc(size, NV_UVM_GFP_FLAGS); From 7632b7c0a01d90643edc21f958da68742ce62ab2 Mon Sep 17 00:00:00 2001 From: 0x5459 <0x5459@protonmail.com> Date: Mon, 4 Nov 2024 11:25:38 +0800 Subject: [PATCH 2/5] Correctly set the value of UVM_KMALLOC_THRESHOLD --- kernel-open/common/inc/nv-nanos.h | 4 ++-- kernel-open/nvidia-uvm/uvm_kvmalloc.h | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel-open/common/inc/nv-nanos.h b/kernel-open/common/inc/nv-nanos.h index 3cc14cac57..6739400571 100644 --- a/kernel-open/common/inc/nv-nanos.h +++ b/kernel-open/common/inc/nv-nanos.h @@ -507,8 +507,8 @@ typedef struct nvidia_event #define vmalloc(size) kmalloc(size, 0) #define vzalloc(size) kzalloc(size, 0) #define ksize(p) objcache_from_object(u64_from_pointer(p), PAGESIZE_2M)->pagesize -#define is_vmalloc_addr(p) false -#define vfree kfree +#define is_vmalloc_addr(p) (objcache_from_object(u64_from_pointer(p), PAGESIZE_2M) == INVALID_ADDRESS) +#define vfree NV_KFREE static inline void *kmalloc(unsigned long size, int flags) { diff --git a/kernel-open/nvidia-uvm/uvm_kvmalloc.h b/kernel-open/nvidia-uvm/uvm_kvmalloc.h index 92f3fd55b8..73901869c9 100644 --- a/kernel-open/nvidia-uvm/uvm_kvmalloc.h +++ b/kernel-open/nvidia-uvm/uvm_kvmalloc.h @@ -41,7 +41,9 @@ // // This is in the header so callers can use it to inform their allocation sizes // if they wish. -#define UVM_KMALLOC_THRESHOLD infinity +// +// Default value is 16. See: https://github.com/nanovms/nanos/blob/103518003874e7c8c3cb6e5a93d81fd0f197fde6/src/config.h#L84-L85 +#define UVM_KMALLOC_THRESHOLD 16 NV_STATUS uvm_kvmalloc_init(void); void uvm_kvmalloc_exit(void); From 32b874ea0c0101ca07960b74b621749cee06c338 Mon Sep 17 00:00:00 2001 From: 0x5459 <0x5459@protonmail.com> Date: Wed, 13 Nov 2024 14:37:05 +0800 Subject: [PATCH 3/5] use `MAX_MCACHE_ORDER` instead of hard-coded --- kernel-open/nvidia-uvm/uvm_kvmalloc.c | 1 - kernel-open/nvidia-uvm/uvm_kvmalloc.h | 9 ++++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel-open/nvidia-uvm/uvm_kvmalloc.c b/kernel-open/nvidia-uvm/uvm_kvmalloc.c index 46fb122276..10ba343071 100644 --- a/kernel-open/nvidia-uvm/uvm_kvmalloc.c +++ b/kernel-open/nvidia-uvm/uvm_kvmalloc.c @@ -257,7 +257,6 @@ static void *alloc_internal(size_t size, bool zero_memory) // Make sure that (sizeof(hdr) + size) is what it should be BUILD_BUG_ON(sizeof(uvm_vmalloc_hdr_t) != offsetof(uvm_vmalloc_hdr_t *, ptr)); - assert(size <= (1 << 32)); if (size <= UVM_KMALLOC_THRESHOLD) { if (zero_memory) return kzalloc(size, NV_UVM_GFP_FLAGS); diff --git a/kernel-open/nvidia-uvm/uvm_kvmalloc.h b/kernel-open/nvidia-uvm/uvm_kvmalloc.h index 73901869c9..95c906c41a 100644 --- a/kernel-open/nvidia-uvm/uvm_kvmalloc.h +++ b/kernel-open/nvidia-uvm/uvm_kvmalloc.h @@ -27,6 +27,11 @@ #include "uvm_nanos.h" #include "uvm_test_ioctl.h" +#ifndef _CONFIG_H_ +#include +#define _CONFIG_H_ +#endif + // kmalloc is faster than vmalloc because it doesn't have to remap kernel // virtual memory, but for that same reason it requires physically-contiguous // memory. It also supports a native krealloc function which is missing in @@ -41,9 +46,7 @@ // // This is in the header so callers can use it to inform their allocation sizes // if they wish. -// -// Default value is 16. See: https://github.com/nanovms/nanos/blob/103518003874e7c8c3cb6e5a93d81fd0f197fde6/src/config.h#L84-L85 -#define UVM_KMALLOC_THRESHOLD 16 +#define UVM_KMALLOC_THRESHOLD (1 << MAX_MCACHE_ORDER) NV_STATUS uvm_kvmalloc_init(void); void uvm_kvmalloc_exit(void); From d340014ec80460646f678cefa0b9415c868706ec Mon Sep 17 00:00:00 2001 From: 0x5459 <0x5459@protonmail.com> Date: Wed, 13 Nov 2024 15:21:09 +0800 Subject: [PATCH 4/5] Correctly call vfree --- kernel-open/common/inc/nv-linux.h | 2 +- kernel-open/nvidia-modeset/nvidia-modeset-linux.c | 2 +- kernel-open/nvidia-uvm/nv-kthread-q-selftest.c | 2 +- kernel-open/nvidia-uvm/uvm_kvmalloc.c | 14 +++++++++----- kernel-open/nvidia/linux_nvswitch.c | 2 +- 5 files changed, 13 insertions(+), 9 deletions(-) diff --git a/kernel-open/common/inc/nv-linux.h b/kernel-open/common/inc/nv-linux.h index fd6f99d161..d00a3ae67c 100644 --- a/kernel-open/common/inc/nv-linux.h +++ b/kernel-open/common/inc/nv-linux.h @@ -506,7 +506,7 @@ static inline void *nv_vmalloc(unsigned long size) static inline void nv_vfree(void *ptr, NvU64 size) { NV_MEMDBG_REMOVE(ptr, size); - vfree(ptr); + vfree(ptr, size); } static inline void *nv_ioremap(NvU64 phys, NvU64 size) diff --git a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c index d4d1d26837..ebb5e49acb 100644 --- a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c +++ b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c @@ -260,7 +260,7 @@ void nvkms_free(void *ptr, size_t size) if (size <= KMALLOC_LIMIT) { kfree(ptr); } else { - vfree(ptr); + vfree(ptr, size); } } diff --git a/kernel-open/nvidia-uvm/nv-kthread-q-selftest.c b/kernel-open/nvidia-uvm/nv-kthread-q-selftest.c index 88b70a4e85..439d612cea 100644 --- a/kernel-open/nvidia-uvm/nv-kthread-q-selftest.c +++ b/kernel-open/nvidia-uvm/nv-kthread-q-selftest.c @@ -241,7 +241,7 @@ static int _multithreaded_q_kthread_function(void *args) done: if (q_items) - vfree(q_items); + vfree(q_items, alloc_size); while (!kthread_should_stop()) schedule(); diff --git a/kernel-open/nvidia-uvm/uvm_kvmalloc.c b/kernel-open/nvidia-uvm/uvm_kvmalloc.c index 10ba343071..bf1c2c314b 100644 --- a/kernel-open/nvidia-uvm/uvm_kvmalloc.c +++ b/kernel-open/nvidia-uvm/uvm_kvmalloc.c @@ -297,16 +297,20 @@ void *__uvm_kvmalloc_zero(size_t size, const char *file, int line, const char *f void uvm_kvfree(void *p) { + uvm_vmalloc_hdr_t *hdr = NULL; + if (!p) return; if (uvm_leak_checker) alloc_tracking_remove(p); - if (is_vmalloc_addr(p)) - vfree(get_hdr(p)); - else + if (is_vmalloc_addr(p)){ + hdr = get_hdr(p); + vfree(hdr, hdr->alloc_size); + } else { kfree(p); + } } // Handle reallocs of kmalloc-based allocations @@ -334,7 +338,7 @@ static void *realloc_from_vmalloc(void *p, size_t new_size) void *new_p; if (new_size == 0) { - vfree(old_hdr); + vfree(old_hdr, old_hdr->alloc_size); return ZERO_SIZE_PTR; // What krealloc returns for this case } @@ -348,7 +352,7 @@ static void *realloc_from_vmalloc(void *p, size_t new_size) return NULL; memcpy(new_p, p, min(new_size, old_hdr->alloc_size)); - vfree(old_hdr); + vfree(old_hdr, old_hdr->alloc_size); return new_p; } diff --git a/kernel-open/nvidia/linux_nvswitch.c b/kernel-open/nvidia/linux_nvswitch.c index 07c16f300a..373e4e2226 100644 --- a/kernel-open/nvidia/linux_nvswitch.c +++ b/kernel-open/nvidia/linux_nvswitch.c @@ -711,7 +711,7 @@ _nvswitch_os_free if (is_vmalloc_addr(ptr)) { - vfree(ptr); + vfree(ptr, -1ull); } else { From 4bdb13220758057bdaa4bc3e43f11a30e90d8f42 Mon Sep 17 00:00:00 2001 From: 0x5459 <0x5459@protonmail.com> Date: Wed, 13 Nov 2024 17:32:28 +0800 Subject: [PATCH 5/5] Export the uvm_vmalloc_hdr_t structure to nv-nanos.h. And modify vfree, get alloc_size from ptr --- kernel-open/common/inc/nv-linux.h | 2 +- kernel-open/common/inc/nv-nanos.h | 19 +++++++++++++- .../nvidia-modeset/nvidia-modeset-linux.c | 2 +- .../nvidia-uvm/nv-kthread-q-selftest.c | 2 +- kernel-open/nvidia-uvm/uvm_kvmalloc.c | 26 ++++--------------- kernel-open/nvidia/linux_nvswitch.c | 2 +- 6 files changed, 27 insertions(+), 26 deletions(-) diff --git a/kernel-open/common/inc/nv-linux.h b/kernel-open/common/inc/nv-linux.h index d00a3ae67c..fd6f99d161 100644 --- a/kernel-open/common/inc/nv-linux.h +++ b/kernel-open/common/inc/nv-linux.h @@ -506,7 +506,7 @@ static inline void *nv_vmalloc(unsigned long size) static inline void nv_vfree(void *ptr, NvU64 size) { NV_MEMDBG_REMOVE(ptr, size); - vfree(ptr, size); + vfree(ptr); } static inline void *nv_ioremap(NvU64 phys, NvU64 size) diff --git a/kernel-open/common/inc/nv-nanos.h b/kernel-open/common/inc/nv-nanos.h index 6739400571..35e46877c3 100644 --- a/kernel-open/common/inc/nv-nanos.h +++ b/kernel-open/common/inc/nv-nanos.h @@ -481,6 +481,18 @@ typedef struct nvidia_event #define BUILD_BUG_ON(expr) build_assert(!(expr)) #define BUILD_BUG_ON_NOT_POWER_OF_2(expr) build_assert(((expr) & ((expr) - 1)) == 0) +// To implement realloc for vmalloc-based allocations we need to track the size +// of the original allocation. We can do that by allocating a header along with +// the allocation itself. Since vmalloc is only used for relatively large +// allocations, this overhead is very small. +// +// We don't need this for kmalloc since we can use ksize(). +typedef struct +{ + size_t alloc_size; + uint8_t ptr[0]; +} uvm_vmalloc_hdr_t; + #define ZERO_SIZE_PTR pointer_from_u64(16) #define ZERO_OR_NULL_PTR(p) (u64_from_pointer(p) <= u64_from_pointer(ZERO_SIZE_PTR)) @@ -508,7 +520,12 @@ typedef struct nvidia_event #define vzalloc(size) kzalloc(size, 0) #define ksize(p) objcache_from_object(u64_from_pointer(p), PAGESIZE_2M)->pagesize #define is_vmalloc_addr(p) (objcache_from_object(u64_from_pointer(p), PAGESIZE_2M) == INVALID_ADDRESS) -#define vfree NV_KFREE +#define vfree(p) do { \ + uvm_vmalloc_hdr_t *hdr; \ + hdr = container_of(p, uvm_vmalloc_hdr_t, ptr); \ + NV_KFREE(p, hdr->alloc_size); \ +} while (0) + static inline void *kmalloc(unsigned long size, int flags) { diff --git a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c index ebb5e49acb..d4d1d26837 100644 --- a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c +++ b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c @@ -260,7 +260,7 @@ void nvkms_free(void *ptr, size_t size) if (size <= KMALLOC_LIMIT) { kfree(ptr); } else { - vfree(ptr, size); + vfree(ptr); } } diff --git a/kernel-open/nvidia-uvm/nv-kthread-q-selftest.c b/kernel-open/nvidia-uvm/nv-kthread-q-selftest.c index 439d612cea..88b70a4e85 100644 --- a/kernel-open/nvidia-uvm/nv-kthread-q-selftest.c +++ b/kernel-open/nvidia-uvm/nv-kthread-q-selftest.c @@ -241,7 +241,7 @@ static int _multithreaded_q_kthread_function(void *args) done: if (q_items) - vfree(q_items, alloc_size); + vfree(q_items); while (!kthread_should_stop()) schedule(); diff --git a/kernel-open/nvidia-uvm/uvm_kvmalloc.c b/kernel-open/nvidia-uvm/uvm_kvmalloc.c index bf1c2c314b..5b51772786 100644 --- a/kernel-open/nvidia-uvm/uvm_kvmalloc.c +++ b/kernel-open/nvidia-uvm/uvm_kvmalloc.c @@ -27,18 +27,6 @@ #include "uvm_kvmalloc.h" #include "uvm_rb_tree.h" -// To implement realloc for vmalloc-based allocations we need to track the size -// of the original allocation. We can do that by allocating a header along with -// the allocation itself. Since vmalloc is only used for relatively large -// allocations, this overhead is very small. -// -// We don't need this for kmalloc since we can use ksize(). -typedef struct -{ - size_t alloc_size; - uint8_t ptr[0]; -} uvm_vmalloc_hdr_t; - typedef struct { const char *file; @@ -297,20 +285,16 @@ void *__uvm_kvmalloc_zero(size_t size, const char *file, int line, const char *f void uvm_kvfree(void *p) { - uvm_vmalloc_hdr_t *hdr = NULL; - if (!p) return; if (uvm_leak_checker) alloc_tracking_remove(p); - if (is_vmalloc_addr(p)){ - hdr = get_hdr(p); - vfree(hdr, hdr->alloc_size); - } else { + if (is_vmalloc_addr(p)) + vfree(get_hdr(p)); + else kfree(p); - } } // Handle reallocs of kmalloc-based allocations @@ -338,7 +322,7 @@ static void *realloc_from_vmalloc(void *p, size_t new_size) void *new_p; if (new_size == 0) { - vfree(old_hdr, old_hdr->alloc_size); + vfree(old_hdr); return ZERO_SIZE_PTR; // What krealloc returns for this case } @@ -352,7 +336,7 @@ static void *realloc_from_vmalloc(void *p, size_t new_size) return NULL; memcpy(new_p, p, min(new_size, old_hdr->alloc_size)); - vfree(old_hdr, old_hdr->alloc_size); + vfree(old_hdr); return new_p; } diff --git a/kernel-open/nvidia/linux_nvswitch.c b/kernel-open/nvidia/linux_nvswitch.c index 373e4e2226..07c16f300a 100644 --- a/kernel-open/nvidia/linux_nvswitch.c +++ b/kernel-open/nvidia/linux_nvswitch.c @@ -711,7 +711,7 @@ _nvswitch_os_free if (is_vmalloc_addr(ptr)) { - vfree(ptr, -1ull); + vfree(ptr); } else {