From 5ed8bd3b9dd5899debe8b2f6ce84679252c8c939 Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Mon, 20 Apr 2026 17:11:01 +0200 Subject: [PATCH 01/37] mmap to file and log allocations on disk --- erts/emulator/beam/atom.c | 3 + erts/emulator/beam/beam_catches.c | 6 + erts/emulator/beam/beam_load.c | 3 + erts/emulator/beam/emu/emu_load.c | 17 + erts/emulator/beam/erl_alloc.c | 133 ++++++++ erts/emulator/beam/erl_alloc.h | 9 + erts/emulator/beam/erl_alloc_util.c | 92 ++++-- erts/emulator/beam/module.c | 4 + erts/emulator/sys/common/erl_mmap.c | 478 ++++++++++++++++++++++++---- erts/emulator/sys/common/erl_mmap.h | 3 + 10 files changed, 667 insertions(+), 81 deletions(-) diff --git a/erts/emulator/beam/atom.c b/erts/emulator/beam/atom.c index a4dc975bf281..dcbfe1e06224 100644 --- a/erts/emulator/beam/atom.c +++ b/erts/emulator/beam/atom.c @@ -474,6 +474,9 @@ init_atom_table(void) erts_index_init(ERTS_ALC_T_ATOM_TABLE, &erts_atom_table, "atom_tab", ATOM_SIZE, erts_atom_table_size, f); + erts_alloc_trace_note_alloc("atom_table.index_root", + &erts_atom_table, + sizeof(erts_atom_table)); /* Ordinary atoms. a is a template for creating an entry in the atom table */ for (i = 0; erl_atom_names[i] != 0; i++) { diff --git a/erts/emulator/beam/beam_catches.c b/erts/emulator/beam/beam_catches.c index 351b77217fbc..e475f91cc6b7 100644 --- a/erts/emulator/beam/beam_catches.c +++ b/erts/emulator/beam/beam_catches.c @@ -65,6 +65,9 @@ void beam_catches_init(void) bccix[0].high_mark = 0; bccix[0].beam_catches = erts_alloc(ERTS_ALC_T_CATCHES, sizeof(beam_catch_t)*DEFAULT_TABSIZE); + erts_alloc_trace_note_alloc("beam_catches.table", + bccix[0].beam_catches, + sizeof(beam_catch_t) * DEFAULT_TABSIZE); IF_DEBUG(bccix[0].is_staging = 0); for (i=1; ibeam_catches = erts_alloc(ERTS_ALC_T_CATCHES, newsize*sizeof(beam_catch_t)); + erts_alloc_trace_note_alloc("beam_catches.table.grow", + p->beam_catches, + newsize * sizeof(beam_catch_t)); sys_memcpy(p->beam_catches, prev_vec, p->tabsize*sizeof(beam_catch_t)); gc_old_vec(prev_vec); diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 3d20a9abf9aa..315cde7fe4e0 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -303,6 +303,9 @@ erts_finish_loading(Binary* magic, Process* c_p, } else { mod_tab_p->on_load = erts_alloc(ERTS_ALC_T_PREPARED_CODE, sizeof(struct erl_module_instance)); + erts_alloc_trace_note_alloc("module_table.on_load_instance", + mod_tab_p->on_load, + sizeof(struct erl_module_instance)); inst_p = mod_tab_p->on_load; erts_module_instance_init(inst_p); } diff --git a/erts/emulator/beam/emu/emu_load.c b/erts/emulator/beam/emu/emu_load.c index e5eb108ed033..465421608cd2 100644 --- a/erts/emulator/beam/emu/emu_load.c +++ b/erts/emulator/beam/emu/emu_load.c @@ -69,6 +69,10 @@ int beam_load_prepare_emit(LoaderState *stp) { hdr = erts_alloc(ERTS_ALC_T_CODE, (offsetof(BeamCodeHeader,functions) + sizeof(BeamInstr) * stp->codev_size)); + erts_alloc_trace_note_alloc("module_code.header.initial", + hdr, + offsetof(BeamCodeHeader, functions) + + sizeof(BeamInstr) * stp->codev_size); hdr->num_functions = stp->beam.code.function_count; hdr->attr_ptr = NULL; @@ -277,6 +281,7 @@ int beam_load_finish_emit(LoaderState *stp) { /* Move the code to its final location. */ code_hdr = (BeamCodeHeader*)erts_realloc(ERTS_ALC_T_CODE, (void *) code_hdr, size); + erts_alloc_trace_note_alloc("module_code.header.final", code_hdr, size); codev = (BeamInstr*)&code_hdr->functions; stp->code_hdr = code_hdr; stp->codev = codev; @@ -321,6 +326,9 @@ int beam_load_finish_emit(LoaderState *stp) { lit_asize = ERTS_LITERAL_AREA_ALLOC_SIZE(tot_lit_size); literal_area = erts_alloc(ERTS_ALC_T_LITERAL, lit_asize); + erts_alloc_trace_note_alloc("module_code.literal_area", + literal_area, + lit_asize); ptr = &literal_area->start[0]; literal_area->end = ptr + tot_lit_size; @@ -612,6 +620,9 @@ void beam_load_finalize_code(LoaderState* stp, struct erl_module_instance* inst_ export = erts_export_put(import->module, import->function, import->arity); + erts_alloc_trace_note_alloc("module_code.import_export_entry", + (void *) export, + sizeof(*export)); current = stp->import_patches[i]; while (current != 0) { @@ -647,6 +658,9 @@ void beam_load_finalize_code(LoaderState* stp, struct erl_module_instance* inst_ stp->beam.checksum, lambda->index, lambda->arity - lambda->num_free); + erts_alloc_trace_note_alloc("module_code.fun_entry", + fun_entry, + sizeof(*fun_entry)); fun_entries[i] = fun_entry; /* If there are no free variables, the loader has created a literal @@ -700,6 +714,9 @@ void beam_load_finalize_code(LoaderState* stp, struct erl_module_instance* inst_ ep = erts_export_put(stp->module, entry->function, entry->arity); + erts_alloc_trace_note_alloc("module_code.export_entry", + ep, + sizeof(*ep)); /* Fill in BIF stubs with a proper call to said BIF. */ if (ep->bif_number != -1) { diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c index f5bc19fdbb72..d45982ff92c6 100644 --- a/erts/emulator/beam/erl_alloc.c +++ b/erts/emulator/beam/erl_alloc.c @@ -46,6 +46,8 @@ #include "erl_cpu_topology.h" #include "erl_thr_queue.h" #include "erl_nfunc_sched.h" +#include +#include #if defined(ERTS_ALC_T_DRV_SEL_D_STATE) || defined(ERTS_ALC_T_DRV_EV_D_STATE) #include "erl_check_io.h" #endif @@ -98,6 +100,119 @@ static Uint install_debug_functions(void); #endif static int lock_all_physical_memory = 0; +static int erts_alloc_trace_fd = -1; +static int erts_alloc_struct_csv_fd = -1; + +static ERTS_INLINE void +erts_alloc_trace_write(const char *line, int len) +{ + if (erts_alloc_trace_fd >= 0 && len > 0) { + ssize_t wres = write(erts_alloc_trace_fd, line, (size_t) len); + (void) wres; + } +} + +static ERTS_INLINE void +erts_alloc_struct_csv_write(const char *line, int len) +{ + if (erts_alloc_struct_csv_fd >= 0 && len > 0) { + ssize_t wres = write(erts_alloc_struct_csv_fd, line, (size_t) len); + (void) wres; + } +} + +void +erts_alloc_trace_erts_alloc_call(ErtsAlcType_t type, Uint size, void *res) +{ + char line[160]; + int len; + if (erts_alloc_trace_fd < 0 && erts_alloc_struct_csv_fd < 0) { + return; + } + len = erts_snprintf(line, sizeof(line), + "ALLOC type=%u size=%lu ptr=%p\n", + (unsigned int) type, + (unsigned long) size, + res); + if (len < 0) { + return; + } + if (len >= (int) sizeof(line)) { + len = (int) sizeof(line) - 1; + } + erts_alloc_trace_write(line, len); +} + +void +erts_alloc_trace_note_alloc(const char *tag, void *ptr, UWord size) +{ + char line[256]; + char csv_line[192]; + int len; + int csv_len; + const char *safe_tag; + if (erts_alloc_trace_fd < 0) { + return; + } + safe_tag = tag ? tag : "unknown"; + len = erts_snprintf(line, sizeof(line), + "STRUCT_ALLOC tag=%s size=%lu ptr=%p\n", + safe_tag, + (unsigned long) size, + ptr); + if (len < 0) { + return; + } + if (len >= (int) sizeof(line)) { + len = (int) sizeof(line) - 1; + } + if (erts_alloc_trace_fd >= 0) { + erts_alloc_trace_write(line, len); + } + if (erts_alloc_struct_csv_fd >= 0) { + csv_len = erts_snprintf(csv_line, sizeof(csv_line), + "%s,%p\n", + safe_tag, + ptr); + if (csv_len < 0) { + return; + } + if (csv_len >= (int) sizeof(csv_line)) { + csv_len = (int) sizeof(csv_line) - 1; + } + erts_alloc_struct_csv_write(csv_line, csv_len); + } +} + +void +erts_alloc_trace_carrier_create(const char *alloc_name, + int alloc_ix, + UWord carrier_size, + int is_mseg, + int is_sbc, + void *carrier_ptr) +{ + char line[256]; + int len; + if (erts_alloc_trace_fd < 0) { + return; + } + len = erts_snprintf(line, sizeof(line), + "CARRIER_CREATE alloc=%s ix=%d size=%lu kind=%s source=%s ptr=%p\n", + alloc_name ? alloc_name : "unknown", + alloc_ix, + (unsigned long) carrier_size, + is_sbc ? "sbc" : "mbc", + is_mseg ? "mseg" : "sys_alloc", + carrier_ptr); + if (len < 0) { + return; + } + if (len >= (int) sizeof(line)) { + len = (int) sizeof(line) - 1; + } + erts_alloc_trace_write(line, len); +} ErtsAllocatorFunctions_t ERTS_WRITE_UNLIKELY(erts_allctrs[ERTS_ALC_A_MAX+1]); ErtsAllocatorInfo_t erts_allctrs_info[ERTS_ALC_A_MAX+1]; @@ -664,6 +779,24 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop) erts_sys_alloc_init(); erts_init_utils_mem(); + { + const char *trace_path = getenv("ERTS_ALLOC_TRACE_FILE"); + const char *csv_path = getenv("ERTS_ALLOC_STRUCT_CSV_FILE"); + if (trace_path && trace_path[0] != '\0') { + erts_alloc_trace_fd = open(trace_path, O_WRONLY|O_CREAT|O_APPEND, 0666); + } + if (csv_path && csv_path[0] != '\0') { + erts_alloc_struct_csv_fd = open(csv_path, O_WRONLY|O_CREAT|O_APPEND, 0666); + } else if (trace_path && trace_path[0] != '\0') { + char default_csv_path[512]; + int plen = erts_snprintf(default_csv_path, sizeof(default_csv_path), + "%s.struct_alloc.csv", trace_path); + if (plen > 0 && plen < (int) sizeof(default_csv_path)) { + erts_alloc_struct_csv_fd = open(default_csv_path, O_WRONLY|O_CREAT|O_APPEND, 0666); + } + } + } + set_default_sl_alloc_opts(&init.sl_alloc); set_default_std_alloc_opts(&init.std_alloc); set_default_ll_alloc_opts(&init.ll_alloc); diff --git a/erts/emulator/beam/erl_alloc.h b/erts/emulator/beam/erl_alloc.h index 3838527ab773..05d6a3306dbc 100644 --- a/erts/emulator/beam/erl_alloc.h +++ b/erts/emulator/beam/erl_alloc.h @@ -230,6 +230,14 @@ int erts_get_thr_alloc_ix(void); #endif /* #if !ERTS_ALC_DO_INLINE */ void *erts_alloc_permanent_cache_aligned(ErtsAlcType_t type, Uint size) ERTS_ATTR_MALLOC_US(2); +void erts_alloc_trace_erts_alloc_call(ErtsAlcType_t type, Uint size, void *res); +void erts_alloc_trace_note_alloc(const char *tag, void *ptr, UWord size); +void erts_alloc_trace_carrier_create(const char *alloc_name, + int alloc_ix, + UWord carrier_size, + int is_mseg, + int is_sbc, + void *carrier_ptr); #ifndef ERTS_CACHE_LINE_SIZE /* Assumed cache line size */ @@ -250,6 +258,7 @@ void *erts_alloc(ErtsAlcType_t type, Uint size) size); if (!res) erts_alloc_n_enomem(ERTS_ALC_T2N(type), size); + erts_alloc_trace_erts_alloc_call(type, size, res); ERTS_MSACC_POP_STATE_X(); return res; } diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c index 8c3dd7b46069..5c65269b06b4 100644 --- a/erts/emulator/beam/erl_alloc_util.c +++ b/erts/emulator/beam/erl_alloc_util.c @@ -35,7 +35,7 @@ /* * Alloc util will enforce 8 byte alignment if sys_alloc and mseg_alloc at * least enforces 8 byte alignment. If sys_alloc only enforces 4 byte - * alignment then alloc util will do so too. + * alignment then alloc util will do so too. */ #ifdef HAVE_CONFIG_H @@ -114,6 +114,13 @@ static int allow_sys_alloc_carriers; #define ERTS_ALC_CC_GIGA_VAL(CC) ((CC) / ONE_GIGA) #define ERTS_ALC_CC_VAL(CC) ((CC) % ONE_GIGA) +extern void erts_alloc_trace_carrier_create(const char *alloc_name, + int alloc_ix, + UWord carrier_size, + int is_mseg, + int is_sbc, + void *carrier_ptr); + #define INC_CC(CC) ((CC)++) #define DEC_CC(CC) ((CC)--) @@ -331,7 +338,7 @@ typedef struct { #define IS_PREV_BLK_ALLOCED(B) \ (!IS_PREV_BLK_FREE((B))) #define IS_ALLOCED_BLK(B) \ - (!IS_FREE_BLK((B))) + (!IS_FREE_BLK((B))) #define IS_LAST_BLK(B) \ ((B)->bhdr & LAST_BLK_HDR_FLG) #define IS_NOT_LAST_BLK(B) \ @@ -1730,7 +1737,7 @@ get_pref_allctr(void *extra) * concurrent threads may be updating adjacent blocks. * We rely on getting a consistent result (without atomic op) when reading * the block header word even if a concurrent thread is updating - * the "PREV_FREE" flag bit. + * the "PREV_FREE" flag bit. */ static ERTS_INLINE Allctr_t* get_used_allctr(Allctr_t *pref_allctr, int pref_lock, void *p, UWord *sizep, @@ -1746,7 +1753,7 @@ get_used_allctr(Allctr_t *pref_allctr, int pref_lock, void *p, UWord *sizep, if (IS_SBC_BLK(blk)) { crr = BLK_TO_SBC(blk); if (sizep) - *sizep = SBC_BLK_SZ(blk) - ABLK_HDR_SZ; + *sizep = SBC_BLK_SZ(blk) - ABLK_HDR_SZ; iallctr = erts_atomic_read_dirty(&crr->allctr); } else { @@ -2182,7 +2189,7 @@ handle_delayed_dealloc(Allctr_t *allctr, ErtsAllctrDDBlock_t *dd_block; ErtsAlcType_t type; Uint32 flags; - + dd_block = (ErtsAllctrDDBlock_t*)ptr; flags = dd_block->flags; type = dd_block->type; @@ -2730,7 +2737,7 @@ mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp is_last_blk = IS_LAST_BLK(blk); if (IS_PREV_BLK_FREE(blk)) { - ASSERT(!is_first_blk); + ASSERT(!is_first_blk); /* Coalesce with previous block... */ blk = PREV_BLK(blk); (*allctr->unlink_free_block)(allctr, blk); @@ -2763,7 +2770,7 @@ mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp SET_MBC_FBLK_SZ(blk, blk_sz); is_last_blk = IS_LAST_BLK(nxt_blk); - if (is_last_blk) + if (is_last_blk) SET_LAST_BLK(blk); else { SET_NOT_LAST_BLK(blk); @@ -2956,7 +2963,7 @@ mbc_realloc(Allctr_t *allctr, ErtsAlcType_t type, void *p, Uint size, ASSERT(blk_sz >= allctr->min_block_size); ASSERT(blk_sz >= size + ABLK_HDR_SZ); ASSERT(IS_MBC_BLK(blk)); - + ASSERT(IS_FREE_BLK(nxt_blk)); ASSERT(IS_PREV_BLK_ALLOCED(nxt_blk)); ASSERT(nxt_blk_sz == MBC_BLK_SZ(nxt_blk)); @@ -2967,7 +2974,7 @@ mbc_realloc(Allctr_t *allctr, ErtsAlcType_t type, void *p, Uint size, ASSERT(is_last_blk || nxt_blk == PREV_BLK(NXT_BLK(nxt_blk))); ASSERT(is_last_blk || IS_PREV_BLK_FREE(NXT_BLK(nxt_blk))); ASSERT(FBLK_TO_MBC(nxt_blk) == crr); - + HARD_CHECK_BLK_CARRIER(allctr, blk); check_abandon_carrier(allctr, nxt_blk, NULL); @@ -3141,7 +3148,7 @@ mbc_realloc(Allctr_t *allctr, ErtsAlcType_t type, void *p, Uint size, (*allctr->unlink_free_block)(allctr, new_blk); /* prev */ - if (is_last_blk) + if (is_last_blk) new_blk_flgs |= LAST_BLK_HDR_FLG; else { nxt_blk = BLK_AFTER(blk, old_blk_sz); @@ -4087,7 +4094,7 @@ cpool_read_stat(Allctr_t *allctr, int alloc_no, #endif static void CHECK_1BLK_CARRIER(Allctr_t* A, int SBC, int MSEGED, Carrier_t* C, - UWord CSZ, Block_t* B, UWord BSZ) + UWord CSZ, Block_t* B, UWord BSZ) { ASSERT(IS_LAST_BLK((B))); ASSERT((CSZ) == CARRIER_SZ((C))); @@ -4271,7 +4278,7 @@ create_carrier(Allctr_t *allctr, Uint umem_sz, UWord flags) : SYS_ALLOC_CARRIER_CEILING(bcrr_sz)); crr = (Carrier_t *) allctr->sys_alloc(allctr, &crr_sz, flags & CFLG_MBC); - + if (!crr) { if (crr_sz > UNIT_CEILING(bcrr_sz)) { crr_sz = UNIT_CEILING(bcrr_sz); @@ -4348,6 +4355,45 @@ create_carrier(Allctr_t *allctr, Uint umem_sz, UWord flags) } #endif + if ( +#if HAVE_ERTS_MSEG + IS_MSEG_CARRIER(crr) +#else + 0 +#endif + ) { + Uint mseg_no = allctr->mbcs.carriers[ERTS_CRR_ALLOC_MSEG].no + + allctr->sbcs.carriers[ERTS_CRR_ALLOC_MSEG].no; + char main_name[160]; + if (mseg_no == 1) { + erts_snprintf(main_name, sizeof(main_name), + "initial_%s_%u_carrier_%u", + ERTS_ALC_A2AD(allctr->alloc_no), + (unsigned int) allctr->ix, + (unsigned int) mseg_no); + } + else { + erts_snprintf(main_name, sizeof(main_name), + "%s_%u_carrier_%u", + ERTS_ALC_A2AD(allctr->alloc_no), + (unsigned int) allctr->ix, + (unsigned int) mseg_no); + } + main_name[sizeof(main_name) - 1] = '\0'; + (void) erts_mmap_name_mapping_global(crr, crr_sz, main_name); + } + + erts_alloc_trace_carrier_create(ERTS_ALC_A2AD(allctr->alloc_no), + allctr->ix, + crr_sz, +#if HAVE_ERTS_MSEG + IS_MSEG_CARRIER(crr) ? 1 : 0, +#else + 0, +#endif + (flags & CFLG_SBC) ? 1 : 0, + crr); + DEBUG_SAVE_ALIGNMENT(crr); return blk; } @@ -4667,7 +4713,7 @@ static struct { #endif Eterm At_sign; - + #ifdef DEBUG Eterm end_of_atoms; #endif @@ -4777,7 +4823,7 @@ init_atoms(Allctr_t *allctr) #endif am.At_sign = am_atom_put("@", 1); - + #ifdef DEBUG for (atom = (Eterm *) &am; atom < &am.end_of_atoms; atom++) { ASSERT(*atom != THE_NON_VALUE); @@ -4804,7 +4850,7 @@ init_atoms(Allctr_t *allctr) alloc_num_atoms[ix] = am_atom_put(name, len); } } - + if (allctr && !allctr->atoms_initialized) { make_name_atoms(allctr); @@ -5699,7 +5745,7 @@ Eterm erts_alcu_au_info_options(fmtfn_t *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp) { - Eterm res = THE_NON_VALUE; + Eterm res = THE_NON_VALUE; if (print_to_p) { @@ -5753,7 +5799,7 @@ erts_alcu_info_options(Allctr_t *allctr, erts_mtx_lock(&allctr->mutex); } res = info_options(allctr, print_to_p, print_to_arg, hpp, szp); - if (allctr->thread_safe) { + if (allctr->thread_safe) { erts_mtx_unlock(&allctr->mutex); } return res; @@ -6286,7 +6332,7 @@ do_erts_alcu_realloc(ErtsAlcType_t type, #endif INC_CC(allctr->calls.this_realloc); - + blk = UMEM2BLK(p); if (size < allctr->sbc_threshold) { @@ -6318,7 +6364,7 @@ do_erts_alcu_realloc(ErtsAlcType_t type, goto do_carrier_resize; diff_sz_val >>= 7; } - + if (100*diff_sz_val < allctr->sbc_move_threshold*crr_sz_val) /* Data won't be copied into a new carrier... */ goto do_carrier_resize; @@ -7056,7 +7102,7 @@ erts_alcu_init(AlcUInit_t *init) for (i = ERTS_ALC_A_MIN; i <= ERTS_ALC_A_MAX; i++) ASSERT(allocator_char_str[i]); #endif - + erts_mtx_init(&init_atoms_mtx, "alcu_init_atoms", NIL, ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_ALLOCATOR); @@ -7328,7 +7374,7 @@ static int blockscan_sweep_sbcs(blockscan_t *state) if (blockscan_clist_yielding(state)) { state->next_op = state->current_op; } - + blockscan_unlock_helper(state); return 1; @@ -8364,7 +8410,7 @@ erts_alcu_test(UWord op, UWord a1, UWord a2) case 0x013: return (UWord) ((Allctr_t *) a1)->sbc_list.last; case 0x014: return (UWord) ((Carrier_t *) a1)->next; case 0x015: return (UWord) ((Carrier_t *) a1)->prev; - case 0x016: return (UWord) ABLK_HDR_SZ; + case 0x016: return (UWord) ABLK_HDR_SZ; case 0x017: return (UWord) ((Allctr_t *) a1)->min_block_size; case 0x018: return (UWord) NXT_BLK((Block_t *) a1); case 0x019: return (UWord) PREV_BLK((Block_t *) a1); @@ -8547,7 +8593,7 @@ check_blk_carrier(Allctr_t *allctr, Block_t *iblk) blk = NXT_BLK(blk); } } - + ASSERT((((char *) crr) + MBC_HEADER_SIZE(allctr) + tot_blk_sz) == carrier_end); diff --git a/erts/emulator/beam/module.c b/erts/emulator/beam/module.c index fb4defbc55eb..da494abb0d58 100644 --- a/erts/emulator/beam/module.c +++ b/erts/emulator/beam/module.c @@ -86,6 +86,7 @@ void erts_module_instance_init(struct erl_module_instance* modi) static Module* module_alloc(Module* tmpl) { Module* obj = (Module*) erts_alloc(ERTS_ALC_T_MODULE, sizeof(Module)); + erts_alloc_trace_note_alloc("module_table.module", obj, sizeof(Module)); erts_atomic_add_nob(&tot_module_bytes, sizeof(Module)); obj->module = tmpl->module; @@ -119,6 +120,9 @@ void init_module_table(void) for (i = 0; i < ERTS_NUM_CODE_IX; i++) { erts_index_init(ERTS_ALC_T_MODULE_TABLE, &module_tables[i], "module_code", MODULE_SIZE, MODULE_LIMIT, f); + erts_alloc_trace_note_alloc("module_table.index_root", + &module_tables[i], + sizeof(module_tables[i])); } for (i=0; i +#include +#include #ifdef HAVE_SYS_MMAN_H #include @@ -347,6 +349,14 @@ typedef struct { Uint nseg; }ErtsFreeSegMap; +typedef struct ErtsMMapFileMap_ ErtsMMapFileMap; +struct ErtsMMapFileMap_ { + char *start; + UWord size; + char *path; + ErtsMMapFileMap *next; +}; + struct ErtsMemMapper_ { int (*reserve_physical)(char *, UWord); void (*unreserve_physical)(char *, UWord); @@ -371,6 +381,7 @@ struct ErtsMemMapper_ { #if HAVE_MMAP && (!defined(MAP_ANON) && !defined(MAP_ANONYMOUS)) int mmap_fd; #endif + ErtsMMapFileMap *file_maps; erts_mtx_t mtx; struct { char *free_list; @@ -1289,29 +1300,45 @@ Eterm build_free_seg_list(Process* p, ErtsFreeSegMap* map) #if HAVE_MMAP # define ERTS_MMAP_PROT (PROT_READ|PROT_WRITE) # if defined(MAP_ANONYMOUS) -# define ERTS_MMAP_FLAGS (MAP_ANON|MAP_PRIVATE) -# define ERTS_MMAP_FD (-1) +# define ERTS_MMAP_FLAGS (MAP_ANONYMOUS|MAP_PRIVATE) +# define ERTS_MMAP_FD_FOR_MM(MM) (-1) # elif defined(MAP_ANON) # define ERTS_MMAP_FLAGS (MAP_ANON|MAP_PRIVATE) -# define ERTS_MMAP_FD (-1) +# define ERTS_MMAP_FD_FOR_MM(MM) (-1) # else # define ERTS_MMAP_FLAGS (MAP_PRIVATE) -# define ERTS_MMAP_FD mm->mmap_fd +# define ERTS_MMAP_FD_FOR_MM(MM) ((MM)->mmap_fd) # endif #endif -static ERTS_INLINE void * -os_mmap(void *hint_ptr, UWord size) +static ERTS_INLINE int +erts_mmap_ensure_records_dir(void) { #if HAVE_MMAP - void *res; + static int dir_initialized = 0; + if (!dir_initialized) { + if (mkdir("_mmap-records", 0777) != 0 && errno != EEXIST) { + return 0; + } + dir_initialized = 1; + } + return 1; +#else + return 0; +#endif +} - res = mmap((void *) hint_ptr, size, ERTS_MMAP_PROT, - ERTS_MMAP_FLAGS, ERTS_MMAP_FD, 0); - if (res == MAP_FAILED) - return NULL; - return res; +static ERTS_INLINE void * +os_mmap_raw(ErtsMemMapper *mm, void *hint_ptr, UWord size, int fd, int flags) +{ +#if HAVE_MMAP + void *res = mmap((void *) hint_ptr, size, ERTS_MMAP_PROT, flags, fd, 0); + return res == MAP_FAILED ? NULL : res; #elif HAVE_VIRTUALALLOC + (void) mm; + (void) hint_ptr; + (void) fd; + (void) flags; return (void *) VirtualAlloc(NULL, (SIZE_T) size, MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE); #else @@ -1320,7 +1347,7 @@ os_mmap(void *hint_ptr, UWord size) } static ERTS_INLINE void -os_munmap(void *ptr, UWord size) +os_munmap_raw(void *ptr, UWord size) { #if HAVE_MMAP #ifdef ERTS_MMAP_DEBUG @@ -1339,61 +1366,344 @@ os_munmap(void *ptr, UWord size) #endif } +static ERTS_INLINE void +erts_mmap_file_record_add(ErtsMemMapper *mm, void *ptr, UWord size, char *path) +{ +#if HAVE_MMAP + ErtsMMapFileMap *rec = (ErtsMMapFileMap *) malloc(sizeof(*rec)); + if (!rec) { + unlink(path); + free(path); + erts_exit(ERTS_ABORT_EXIT, "erts_mmap: failed to allocate file mapping record\n"); + } + rec->start = (char *) ptr; + rec->size = size; + rec->path = path; + rec->next = mm->file_maps; + mm->file_maps = rec; +#else + (void) mm; + (void) ptr; + (void) size; + (void) path; +#endif +} + +static ERTS_INLINE void +erts_mmap_file_record_remove_exact(ErtsMemMapper *mm, void *ptr, UWord size) +{ +#if HAVE_MMAP + ErtsMMapFileMap **pp, *p; + pp = &mm->file_maps; + p = *pp; + while (p) { + if (p->start == (char *) ptr && p->size == size) { + *pp = p->next; + unlink(p->path); + free(p->path); + free(p); + return; + } + pp = &p->next; + p = p->next; + } +#else + (void) mm; + (void) ptr; + (void) size; +#endif +} + +static ERTS_INLINE void +erts_mmap_file_record_resize(ErtsMemMapper *mm, void *old_ptr, UWord old_size, void *new_ptr, UWord new_size) +{ +#if HAVE_MMAP + ErtsMMapFileMap *p; + p = mm->file_maps; + while (p) { + if (p->start == (char *) old_ptr && p->size == old_size) { + p->start = (char *) new_ptr; + p->size = new_size; + break; + } + p = p->next; + } +#else + (void) mm; + (void) old_ptr; + (void) old_size; + (void) new_ptr; + (void) new_size; +#endif +} + +int +erts_mmap_name_mapping(ErtsMemMapper *mm, void *ptr, UWord size, const char *name) +{ +#if HAVE_MMAP + ErtsMMapFileMap *p; + char new_path[PATH_MAX]; + char *new_path_heap; + + if (!name || !name[0]) { + return 0; + } + + p = mm->file_maps; + while (p) { + if (p->start == (char *) ptr && p->size == size) { + break; + } + p = p->next; + } + + if (!p) { + return 0; + } + + erts_snprintf(new_path, sizeof(new_path), "_mmap-records/%s", name); + new_path[sizeof(new_path) - 1] = '\0'; + + unlink(new_path); + if (rename(p->path, new_path) != 0) { + return 0; + } + + new_path_heap = (char *) malloc(strlen(new_path) + 1); + if (!new_path_heap) { + return 1; + } + strcpy(new_path_heap, new_path); + free(p->path); + p->path = new_path_heap; + return 1; +#else + (void) mm; + (void) ptr; + (void) size; + (void) name; + return 0; +#endif +} + +static int +erts_mmap_prefix_mapping_name(ErtsMemMapper *mm, void *ptr, UWord size, const char *prefix) +{ +#if HAVE_MMAP + ErtsMMapFileMap *p; + const char *base; + const char *tail; + char new_name[PATH_MAX]; + + if (!prefix || !prefix[0]) { + return 0; + } + + p = mm->file_maps; + while (p) { + if (p->start == (char *) ptr && p->size == size) { + break; + } + p = p->next; + } + if (!p) { + return 0; + } + + base = strrchr(p->path, '/'); + base = base ? base + 1 : p->path; + tail = strstr(base, "erts-mmap-"); + if (!tail) { + tail = base; + } + if (strncmp(base, prefix, strlen(prefix)) == 0 && base[strlen(prefix)] == '_') { + return 1; + } + erts_snprintf(new_name, sizeof(new_name), "%s_%s", prefix, tail); + new_name[sizeof(new_name) - 1] = '\0'; + return erts_mmap_name_mapping(mm, ptr, size, new_name); +#else + (void) mm; + (void) ptr; + (void) size; + (void) prefix; + return 0; +#endif +} + +int +erts_mmap_name_mapping_global(void *ptr, UWord size, const char *name) +{ +#if HAVE_MMAP + if (erts_mmap_name_mapping(&erts_dflt_mmapper, ptr, size, name)) { + return 1; + } +#if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION) + if (erts_mmap_name_mapping(&erts_literal_mmapper, ptr, size, name)) { + return 1; + } +#endif +#else + (void) ptr; + (void) size; + (void) name; +#endif + return 0; +} + +int +erts_mmap_prefix_mapping_name_global(void *ptr, UWord size, const char *prefix) +{ +#if HAVE_MMAP + if (erts_mmap_prefix_mapping_name(&erts_dflt_mmapper, ptr, size, prefix)) { + return 1; + } +#if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION) + if (erts_mmap_prefix_mapping_name(&erts_literal_mmapper, ptr, size, prefix)) { + return 1; + } +#endif +#else + (void) ptr; + (void) size; + (void) prefix; +#endif + return 0; +} + +static ERTS_INLINE int +erts_mmap_prepare_file(UWord size, int *fd_out, char **path_out) +{ +#if HAVE_MMAP + char tmpl[PATH_MAX]; + char *path; + int fd; + + if (!erts_mmap_ensure_records_dir()) { + return 0; + } + + erts_snprintf(tmpl, sizeof(tmpl), "_mmap-records/erts-mmap-XXXXXX"); + fd = mkstemp(tmpl); + if (fd < 0) { + return 0; + } + if (ftruncate(fd, (off_t) size) != 0) { + close(fd); + unlink(tmpl); + return 0; + } + path = (char *) malloc(strlen(tmpl) + 1); + if (!path) { + close(fd); + unlink(tmpl); + return 0; + } + strcpy(path, tmpl); + + *fd_out = fd; + *path_out = path; + return 1; +#else + (void) size; + (void) fd_out; + (void) path_out; + return 0; +#endif +} + +static ERTS_INLINE void * +os_mmap(ErtsMemMapper *mm, void *hint_ptr, UWord size) +{ +#if HAVE_MMAP + void *res; + int fd; + char *path = NULL; + + if (!erts_mmap_prepare_file(size, &fd, &path)) { + return os_mmap_raw(mm, hint_ptr, size, ERTS_MMAP_FD_FOR_MM(mm), ERTS_MMAP_FLAGS); + } + + res = os_mmap_raw(mm, hint_ptr, size, fd, ERTS_MMAP_FLAGS); + if (!res) { + close(fd); + unlink(path); + free(path); + return os_mmap_raw(mm, hint_ptr, size, ERTS_MMAP_FD_FOR_MM(mm), ERTS_MMAP_FLAGS); + } + close(fd); + erts_mmap_file_record_add(mm, res, size, path); + return res; +#elif HAVE_VIRTUALALLOC + return os_mmap_raw(mm, hint_ptr, size, -1, 0); +#else +# error "missing mmap() or similar" +#endif +} + +static ERTS_INLINE void +os_munmap(ErtsMemMapper *mm, void *ptr, UWord size) +{ + os_munmap_raw(ptr, size); + erts_mmap_file_record_remove_exact(mm, ptr, size); +} + #define ALIGN_UP(x, a) ((void*)((((UWord)(x)) + ((a) - 1)) & ~((a) - 1))) #define IS_ALIGNED(x, a) ((((UWord)(x)) & ((a) - 1)) == 0) +static ERTS_INLINE void * +os_mmap_aligned_raw(ErtsMemMapper *mm, UWord size, UWord alignment); + /* * Just like os_mmap, but ensures that mapping is a multiple of the * specified alignment. Alignment must be a power-of-2 multiple of * the page size in bytes. */ static ERTS_INLINE void * -os_mmap_aligned(UWord size, UWord alignment) +os_mmap_aligned(ErtsMemMapper *mm, UWord size, UWord alignment) { char *result; #ifdef MAP_ALIGN + int fd; + char *path = NULL; /* * On an operating systems that support MAP_ALIGN (SunOS >=5.9) we * can directly ask mmap(2) to align the virtual memory mapping. */ - result = mmap((void *) alignment, size, ERTS_MMAP_PROT, - ERTS_MMAP_FLAGS|MAP_ALIGN, ERTS_MMAP_FD, 0); - if (result == MAP_FAILED) { - return NULL; - } + if (!erts_mmap_prepare_file(size, &fd, &path)) { + return os_mmap_aligned_raw(mm, size, alignment); + } + result = os_mmap_raw(mm, (void *) alignment, size, fd, ERTS_MMAP_FLAGS|MAP_ALIGN); + if (!result) { + close(fd); + unlink(path); + free(path); + return os_mmap_aligned_raw(mm, size, alignment); + } + close(fd); + erts_mmap_file_record_add(mm, result, size, path); #else UWord diff; + int fd; + char *path = NULL; + void *raw; + UWord raw_size = size + alignment; ASSERT((size % sys_page_size) == 0); ASSERT((alignment % sys_page_size) == 0); - /* - * Allocate and test for alignment. It is possible 1) the - * operating aligned the allocation based its length or 2) the - * previous allocation aligned the next available address. - */ - if ((result = os_mmap(NULL, size)) == NULL) { - return NULL; + if (!erts_mmap_prepare_file(raw_size, &fd, &path)) { + return os_mmap_aligned_raw(mm, size, alignment); } - - if (IS_ALIGNED(result, alignment)) { - return result; - } - - /* - * The virtual memory allocation was not aligned, clean-up the - * mapping so we can try a different strategy. - */ - os_munmap(result, size); - - /* - * Retry the virtual memory allocation adding padding to ensure - * the requested alignment. - */ - if ((result = os_mmap(NULL, size + alignment)) == NULL) { - return NULL; + raw = os_mmap_raw(mm, NULL, raw_size, fd, ERTS_MMAP_FLAGS); + if (!raw) { + close(fd); + unlink(path); + free(path); + return os_mmap_aligned_raw(mm, size, alignment); } + result = (char *) raw; diff = (char *)ALIGN_UP(result, alignment) - result; @@ -1403,7 +1713,7 @@ os_mmap_aligned(UWord size, UWord alignment) * unmap. */ if (diff != 0) { - os_munmap(result, diff); + os_munmap_raw(result, diff); result += diff; } @@ -1411,12 +1721,57 @@ os_mmap_aligned(UWord size, UWord alignment) * Unmap extra pages at the end of the allocation. There must * always be at least one. */ - os_munmap(result + size, alignment - diff); + os_munmap_raw(result + size, alignment - diff); + close(fd); + erts_mmap_file_record_add(mm, result, size, path); #endif return result; } +static ERTS_INLINE void * +os_mmap_aligned_raw(ErtsMemMapper *mm, UWord size, UWord alignment) +{ + char *result; +#ifdef MAP_ALIGN + result = os_mmap_raw(mm, + (void *) alignment, + size, + ERTS_MMAP_FD_FOR_MM(mm), + ERTS_MMAP_FLAGS|MAP_ALIGN); + if (!result) { + return NULL; + } +#else + UWord diff; + + ASSERT((size % sys_page_size) == 0); + ASSERT((alignment % sys_page_size) == 0); + + if ((result = os_mmap_raw(mm, NULL, size, ERTS_MMAP_FD_FOR_MM(mm), ERTS_MMAP_FLAGS)) == NULL) { + return NULL; + } + + if (IS_ALIGNED(result, alignment)) { + return result; + } + + os_munmap_raw(result, size); + + if ((result = os_mmap_raw(mm, NULL, size + alignment, ERTS_MMAP_FD_FOR_MM(mm), ERTS_MMAP_FLAGS)) == NULL) { + return NULL; + } + + diff = (char *)ALIGN_UP(result, alignment) - result; + if (diff != 0) { + os_munmap_raw(result, diff); + result += diff; + } + os_munmap_raw(result + size, alignment - diff); +#endif + return result; +} + #ifdef ERTS_HAVE_OS_MREMAP # if HAVE_MREMAP # if defined(__NetBSD__) @@ -1426,7 +1781,7 @@ os_mmap_aligned(UWord size, UWord alignment) # endif # endif static ERTS_INLINE void * -os_mremap(void *ptr, UWord old_size, UWord new_size) +os_mremap(ErtsMemMapper *mm, void *ptr, UWord old_size, UWord new_size) { void *new_seg; #if HAVE_MREMAP @@ -1437,6 +1792,7 @@ os_mremap(void *ptr, UWord old_size, UWord new_size) (size_t) new_size, ERTS_MREMAP_FLAGS); if (new_seg == (void *) MAP_FAILED) return NULL; + erts_mmap_file_record_resize(mm, ptr, old_size, new_seg, new_size); return new_seg; #else # error "missing mremap() or similar" @@ -1466,7 +1822,7 @@ static int os_reserve_physical(char *ptr, UWord size) { void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_RESERVE_PROT, - ERTS_MMAP_RESERVE_FLAGS, ERTS_MMAP_FD, 0); + ERTS_MMAP_RESERVE_FLAGS, -1, 0); if (res == (void *) MAP_FAILED) return 0; return 1; @@ -1476,7 +1832,7 @@ static void os_unreserve_physical(char *ptr, UWord size) { void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_UNRESERVE_PROT, - ERTS_MMAP_UNRESERVE_FLAGS, ERTS_MMAP_FD, 0); + ERTS_MMAP_UNRESERVE_FLAGS, -1, 0); if (res == (void *) MAP_FAILED) erts_exit(ERTS_ABORT_EXIT, "Failed to unreserve memory"); } @@ -1488,7 +1844,7 @@ os_mmap_virtual(char *ptr, UWord size) void* res; res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_VIRTUAL_PROT, - flags, ERTS_MMAP_FD, 0); + flags, -1, 0); if (res == (void *) MAP_FAILED) return NULL; return res; @@ -1544,7 +1900,11 @@ alloc_desc_insert_free_seg(ErtsMemMapper* mm, #if ERTS_HAVE_OS_MMAP if (!mm->no_os_mmap) { - ptr = os_mmap(mm->desc.new_area_hint, ERTS_PAGEALIGNED_SIZE); + ptr = os_mmap_raw(mm, + mm->desc.new_area_hint, + ERTS_PAGEALIGNED_SIZE, + ERTS_MMAP_FD_FOR_MM(mm), + ERTS_MMAP_FLAGS); if (ptr) { mm->desc.new_area_hint = ptr+ERTS_PAGEALIGNED_SIZE; ERTS_MMAP_SIZE_OS_INC(ERTS_PAGEALIGNED_SIZE); @@ -1752,13 +2112,13 @@ erts_mmap(ErtsMemMapper* mm, Uint32 flags, UWord *sizep) /* Map using OS primitives */ if (!(ERTS_MMAPFLG_SUPERCARRIER_ONLY & flags) && !mm->no_os_mmap) { if (!(ERTS_MMAPFLG_SUPERALIGNED & flags)) { - seg = os_mmap(NULL, asize); + seg = os_mmap(mm, NULL, asize); if (!seg) goto failure; } else { asize = ERTS_SUPERALIGNED_CEILING(*sizep); - seg = os_mmap_aligned(asize, ERTS_SUPERALIGNED_SIZE); + seg = os_mmap_aligned(mm, asize, ERTS_SUPERALIGNED_SIZE); if (!seg) goto failure; } @@ -1810,7 +2170,7 @@ erts_munmap(ErtsMemMapper* mm, Uint32 flags, void *ptr, UWord size) #if ERTS_HAVE_OS_MMAP ERTS_MUNMAP_OP_LCK(ptr, size); ERTS_MMAP_SIZE_OS_DEC(size); - os_munmap(ptr, size); + os_munmap(mm, ptr, size); #endif } else { @@ -1971,7 +2331,8 @@ erts_mremap(ErtsMemMapper* mm, ERTS_MMAP_ASSERT((((char *)ptr) + old_size) > (char *) new_ptr); um_sz = (UWord) ((((char *) ptr) + old_size) - (char *) new_ptr); ERTS_MMAP_SIZE_OS_DEC(um_sz); - os_munmap(new_ptr, um_sz); + os_munmap(mm, new_ptr, um_sz); + erts_mmap_file_record_resize(mm, ptr, old_size, ptr, asize); ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize); *sizep = asize; return ptr; @@ -1981,7 +2342,7 @@ erts_mremap(ErtsMemMapper* mm, if (superaligned) { return remap_move(mm, flags, ptr, old_size, sizep); } else { - new_ptr = os_mremap(ptr, old_size, asize); + new_ptr = os_mremap(mm, ptr, old_size, asize); if (!new_ptr) return NULL; if (asize > old_size) @@ -2256,8 +2617,9 @@ erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init) mm->supercarrier = 0; mm->reserve_physical = reserve_noop; mm->unreserve_physical = unreserve_noop; + mm->file_maps = NULL; -#if HAVE_MMAP && !defined(MAP_ANON) +#if HAVE_MMAP && !defined(MAP_ANON) && !defined(MAP_ANONYMOUS) mm->mmap_fd = open("/dev/zero", O_RDWR); if (mm->mmap_fd < 0) erts_exit(1, "erts_mmap: Failed to open /dev/zero\n"); @@ -2283,7 +2645,7 @@ erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init) "erts_mmap: Failed to create virtual range for super carrier\n"); sz = start - ptr; if (sz) - os_munmap(end, sz); + os_munmap_raw(end, sz); mm->reserve_physical = os_reserve_physical; mm->unreserve_physical = os_unreserve_physical; virtual_map = 1; @@ -2320,7 +2682,7 @@ erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init) alignment = MAX(sys_large_page_size, ERTS_SUPERALIGNED_SIZE); else alignment = ERTS_SUPERALIGNED_SIZE; - start = os_mmap_aligned(sz, alignment); + start = os_mmap_aligned_raw(mm, sz, alignment); } if (!start) erts_exit(1, diff --git a/erts/emulator/sys/common/erl_mmap.h b/erts/emulator/sys/common/erl_mmap.h index 3218f6797498..612e28737861 100644 --- a/erts/emulator/sys/common/erl_mmap.h +++ b/erts/emulator/sys/common/erl_mmap.h @@ -142,6 +142,9 @@ typedef struct ErtsMemMapper_ ErtsMemMapper; void *erts_mmap(ErtsMemMapper*, Uint32 flags, UWord *sizep); void erts_munmap(ErtsMemMapper*, Uint32 flags, void *ptr, UWord size); void *erts_mremap(ErtsMemMapper*, Uint32 flags, void *ptr, UWord old_size, UWord *sizep); +int erts_mmap_name_mapping(ErtsMemMapper*, void *ptr, UWord size, const char *name); +int erts_mmap_name_mapping_global(void *ptr, UWord size, const char *name); +int erts_mmap_prefix_mapping_name_global(void *ptr, UWord size, const char *prefix); int erts_mmap_in_supercarrier(ErtsMemMapper*, void *ptr); void erts_mmap_init(ErtsMemMapper*, ErtsMMapInit*); struct erts_mmap_info_struct From 8cb75d4220a99480a7caa2f44ea8e305bc8f7b7b Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Tue, 21 Apr 2026 10:43:24 +0200 Subject: [PATCH 02/37] record and replay mods use single mapped file of 100MB --- erts/emulator/Makefile.in | 1 + erts/emulator/beam/erl_init.c | 39 +++ erts/emulator/sys/common/erl_mmap.c | 309 +---------------- erts/emulator/sys/common/erl_mmap.h | 7 + erts/emulator/sys/common/erl_mmap_record.c | 374 +++++++++++++++++++++ erts/emulator/sys/common/erl_mseg.c | 18 +- 6 files changed, 440 insertions(+), 308 deletions(-) create mode 100644 erts/emulator/sys/common/erl_mmap_record.c diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in index f39743fda825..97a3106feb1d 100644 --- a/erts/emulator/Makefile.in +++ b/erts/emulator/Makefile.in @@ -1259,6 +1259,7 @@ OS_OBJS += $(OBJDIR)/erl_poll.o \ $(OBJDIR)/erl_check_io.o \ $(OBJDIR)/erl_mseg.o \ $(OBJDIR)/erl_mmap.o \ + $(OBJDIR)/erl_mmap_record.o \ $(OBJDIR)/erl_osenv.o \ $(OBJDIR)/erl_$(ERLANG_OSTYPE)_sys_ddll.o \ $(OBJDIR)/erl_sys_common_misc.o \ diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index 74736f8ed7db..08bce89438b1 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -57,6 +57,7 @@ #include "erl_global_literals.h" #include "erl_iolist.h" #include "erl_debugger.h" +#include "erl_mmap.h" #include "jit/beam_asm.h" @@ -537,6 +538,8 @@ __decl_noreturn void __noreturn erts_usage(void) int this_rel = this_rel_num(); erts_fprintf(stderr, "Usage: %s [flags] [ -- [init_args] ]\n", progname(program)); erts_fprintf(stderr, "The flags are:\n\n"); + erts_fprintf(stderr, "-record path create/use a file-backed 100MB mmap arena for mseg carriers\n"); + erts_fprintf(stderr, "-replay path reuse an existing 100MB mmap arena file (mutually exclusive with -record)\n"); erts_fprintf(stderr, "-a size suggest stack size in kilo words for threads\n"); erts_fprintf(stderr, " in the async-thread pool; valid range is [%d-%d]\n", ERTS_ASYNC_THREAD_MIN_STACK_SIZE, @@ -877,6 +880,32 @@ early_init(int *argc, char **argv) /* if (argc && argv) { int i = 1; while (i < *argc) { + if (sys_strcmp(argv[i], "-record") == 0) { + char *path = get_arg("", argv[i+1], &i); + if (!erts_mmap_record_option_record(path)) { + erts_fprintf(stderr, "-record and -replay are mutually exclusive\n"); + erts_usage(); + } + if (!erts_mmap_record_init()) { + erts_fprintf(stderr, "failed to initialize -record mmap arena at %s\n", path); + erts_usage(); + } + i++; + continue; + } + if (sys_strcmp(argv[i], "-replay") == 0) { + char *path = get_arg("", argv[i+1], &i); + if (!erts_mmap_record_option_replay(path)) { + erts_fprintf(stderr, "-record and -replay are mutually exclusive\n"); + erts_usage(); + } + if (!erts_mmap_record_init()) { + erts_fprintf(stderr, "failed to initialize -replay mmap arena from %s\n", path); + erts_usage(); + } + i++; + continue; + } if (sys_strcmp(argv[i], "--") == 0) { /* end of emulator options */ i++; break; @@ -1344,6 +1373,16 @@ erl_start(int argc, char **argv) sys_proc_outst_req_lim = 2*erts_no_schedulers; while (i < argc) { + if (sys_strcmp(argv[i], "-record") == 0) { + (void) get_arg("", argv[i+1], &i); + i++; + continue; + } + if (sys_strcmp(argv[i], "-replay") == 0) { + (void) get_arg("", argv[i+1], &i); + i++; + continue; + } if (argv[i][0] != '-') { erts_usage(); } diff --git a/erts/emulator/sys/common/erl_mmap.c b/erts/emulator/sys/common/erl_mmap.c index 1017748fe3db..123bb02cd753 100644 --- a/erts/emulator/sys/common/erl_mmap.c +++ b/erts/emulator/sys/common/erl_mmap.c @@ -349,14 +349,6 @@ typedef struct { Uint nseg; }ErtsFreeSegMap; -typedef struct ErtsMMapFileMap_ ErtsMMapFileMap; -struct ErtsMMapFileMap_ { - char *start; - UWord size; - char *path; - ErtsMMapFileMap *next; -}; - struct ErtsMemMapper_ { int (*reserve_physical)(char *, UWord); void (*unreserve_physical)(char *, UWord); @@ -381,7 +373,6 @@ struct ErtsMemMapper_ { #if HAVE_MMAP && (!defined(MAP_ANON) && !defined(MAP_ANONYMOUS)) int mmap_fd; #endif - ErtsMMapFileMap *file_maps; erts_mtx_t mtx; struct { char *free_list; @@ -1311,23 +1302,6 @@ Eterm build_free_seg_list(Process* p, ErtsFreeSegMap* map) # endif #endif -static ERTS_INLINE int -erts_mmap_ensure_records_dir(void) -{ -#if HAVE_MMAP - static int dir_initialized = 0; - if (!dir_initialized) { - if (mkdir("_mmap-records", 0777) != 0 && errno != EEXIST) { - return 0; - } - dir_initialized = 1; - } - return 1; -#else - return 0; -#endif -} - static ERTS_INLINE void * os_mmap_raw(ErtsMemMapper *mm, void *hint_ptr, UWord size, int fd, int flags) { @@ -1366,169 +1340,24 @@ os_munmap_raw(void *ptr, UWord size) #endif } -static ERTS_INLINE void -erts_mmap_file_record_add(ErtsMemMapper *mm, void *ptr, UWord size, char *path) -{ -#if HAVE_MMAP - ErtsMMapFileMap *rec = (ErtsMMapFileMap *) malloc(sizeof(*rec)); - if (!rec) { - unlink(path); - free(path); - erts_exit(ERTS_ABORT_EXIT, "erts_mmap: failed to allocate file mapping record\n"); - } - rec->start = (char *) ptr; - rec->size = size; - rec->path = path; - rec->next = mm->file_maps; - mm->file_maps = rec; -#else - (void) mm; - (void) ptr; - (void) size; - (void) path; -#endif -} - -static ERTS_INLINE void -erts_mmap_file_record_remove_exact(ErtsMemMapper *mm, void *ptr, UWord size) -{ -#if HAVE_MMAP - ErtsMMapFileMap **pp, *p; - pp = &mm->file_maps; - p = *pp; - while (p) { - if (p->start == (char *) ptr && p->size == size) { - *pp = p->next; - unlink(p->path); - free(p->path); - free(p); - return; - } - pp = &p->next; - p = p->next; - } -#else - (void) mm; - (void) ptr; - (void) size; -#endif -} - -static ERTS_INLINE void -erts_mmap_file_record_resize(ErtsMemMapper *mm, void *old_ptr, UWord old_size, void *new_ptr, UWord new_size) -{ -#if HAVE_MMAP - ErtsMMapFileMap *p; - p = mm->file_maps; - while (p) { - if (p->start == (char *) old_ptr && p->size == old_size) { - p->start = (char *) new_ptr; - p->size = new_size; - break; - } - p = p->next; - } -#else - (void) mm; - (void) old_ptr; - (void) old_size; - (void) new_ptr; - (void) new_size; -#endif -} - int erts_mmap_name_mapping(ErtsMemMapper *mm, void *ptr, UWord size, const char *name) { -#if HAVE_MMAP - ErtsMMapFileMap *p; - char new_path[PATH_MAX]; - char *new_path_heap; - - if (!name || !name[0]) { - return 0; - } - - p = mm->file_maps; - while (p) { - if (p->start == (char *) ptr && p->size == size) { - break; - } - p = p->next; - } - - if (!p) { - return 0; - } - - erts_snprintf(new_path, sizeof(new_path), "_mmap-records/%s", name); - new_path[sizeof(new_path) - 1] = '\0'; - - unlink(new_path); - if (rename(p->path, new_path) != 0) { - return 0; - } - - new_path_heap = (char *) malloc(strlen(new_path) + 1); - if (!new_path_heap) { - return 1; - } - strcpy(new_path_heap, new_path); - free(p->path); - p->path = new_path_heap; - return 1; -#else (void) mm; (void) ptr; (void) size; (void) name; return 0; -#endif } static int erts_mmap_prefix_mapping_name(ErtsMemMapper *mm, void *ptr, UWord size, const char *prefix) { -#if HAVE_MMAP - ErtsMMapFileMap *p; - const char *base; - const char *tail; - char new_name[PATH_MAX]; - - if (!prefix || !prefix[0]) { - return 0; - } - - p = mm->file_maps; - while (p) { - if (p->start == (char *) ptr && p->size == size) { - break; - } - p = p->next; - } - if (!p) { - return 0; - } - - base = strrchr(p->path, '/'); - base = base ? base + 1 : p->path; - tail = strstr(base, "erts-mmap-"); - if (!tail) { - tail = base; - } - if (strncmp(base, prefix, strlen(prefix)) == 0 && base[strlen(prefix)] == '_') { - return 1; - } - erts_snprintf(new_name, sizeof(new_name), "%s_%s", prefix, tail); - new_name[sizeof(new_name) - 1] = '\0'; - return erts_mmap_name_mapping(mm, ptr, size, new_name); -#else (void) mm; (void) ptr; (void) size; (void) prefix; return 0; -#endif } int @@ -1571,81 +1400,17 @@ erts_mmap_prefix_mapping_name_global(void *ptr, UWord size, const char *prefix) return 0; } -static ERTS_INLINE int -erts_mmap_prepare_file(UWord size, int *fd_out, char **path_out) -{ -#if HAVE_MMAP - char tmpl[PATH_MAX]; - char *path; - int fd; - - if (!erts_mmap_ensure_records_dir()) { - return 0; - } - - erts_snprintf(tmpl, sizeof(tmpl), "_mmap-records/erts-mmap-XXXXXX"); - fd = mkstemp(tmpl); - if (fd < 0) { - return 0; - } - if (ftruncate(fd, (off_t) size) != 0) { - close(fd); - unlink(tmpl); - return 0; - } - path = (char *) malloc(strlen(tmpl) + 1); - if (!path) { - close(fd); - unlink(tmpl); - return 0; - } - strcpy(path, tmpl); - - *fd_out = fd; - *path_out = path; - return 1; -#else - (void) size; - (void) fd_out; - (void) path_out; - return 0; -#endif -} - static ERTS_INLINE void * os_mmap(ErtsMemMapper *mm, void *hint_ptr, UWord size) { -#if HAVE_MMAP - void *res; - int fd; - char *path = NULL; - - if (!erts_mmap_prepare_file(size, &fd, &path)) { - return os_mmap_raw(mm, hint_ptr, size, ERTS_MMAP_FD_FOR_MM(mm), ERTS_MMAP_FLAGS); - } - - res = os_mmap_raw(mm, hint_ptr, size, fd, ERTS_MMAP_FLAGS); - if (!res) { - close(fd); - unlink(path); - free(path); - return os_mmap_raw(mm, hint_ptr, size, ERTS_MMAP_FD_FOR_MM(mm), ERTS_MMAP_FLAGS); - } - close(fd); - erts_mmap_file_record_add(mm, res, size, path); - return res; -#elif HAVE_VIRTUALALLOC - return os_mmap_raw(mm, hint_ptr, size, -1, 0); -#else -# error "missing mmap() or similar" -#endif + return os_mmap_raw(mm, hint_ptr, size, ERTS_MMAP_FD_FOR_MM(mm), ERTS_MMAP_FLAGS); } static ERTS_INLINE void os_munmap(ErtsMemMapper *mm, void *ptr, UWord size) { + (void) mm; os_munmap_raw(ptr, size); - erts_mmap_file_record_remove_exact(mm, ptr, size); } #define ALIGN_UP(x, a) ((void*)((((UWord)(x)) + ((a) - 1)) & ~((a) - 1))) @@ -1662,71 +1427,7 @@ os_mmap_aligned_raw(ErtsMemMapper *mm, UWord size, UWord alignment); static ERTS_INLINE void * os_mmap_aligned(ErtsMemMapper *mm, UWord size, UWord alignment) { - char *result; -#ifdef MAP_ALIGN - int fd; - char *path = NULL; - - /* - * On an operating systems that support MAP_ALIGN (SunOS >=5.9) we - * can directly ask mmap(2) to align the virtual memory mapping. - */ - if (!erts_mmap_prepare_file(size, &fd, &path)) { - return os_mmap_aligned_raw(mm, size, alignment); - } - result = os_mmap_raw(mm, (void *) alignment, size, fd, ERTS_MMAP_FLAGS|MAP_ALIGN); - if (!result) { - close(fd); - unlink(path); - free(path); - return os_mmap_aligned_raw(mm, size, alignment); - } - close(fd); - erts_mmap_file_record_add(mm, result, size, path); -#else - UWord diff; - int fd; - char *path = NULL; - void *raw; - UWord raw_size = size + alignment; - - ASSERT((size % sys_page_size) == 0); - ASSERT((alignment % sys_page_size) == 0); - - if (!erts_mmap_prepare_file(raw_size, &fd, &path)) { - return os_mmap_aligned_raw(mm, size, alignment); - } - raw = os_mmap_raw(mm, NULL, raw_size, fd, ERTS_MMAP_FLAGS); - if (!raw) { - close(fd); - unlink(path); - free(path); - return os_mmap_aligned_raw(mm, size, alignment); - } - result = (char *) raw; - - diff = (char *)ALIGN_UP(result, alignment) - result; - - /* - * Unmap any extra pages at the beginning of the allocation. If - * the allocation ended up being aligned, there will be nothing to - * unmap. - */ - if (diff != 0) { - os_munmap_raw(result, diff); - result += diff; - } - - /* - * Unmap extra pages at the end of the allocation. There must - * always be at least one. - */ - os_munmap_raw(result + size, alignment - diff); - close(fd); - erts_mmap_file_record_add(mm, result, size, path); -#endif - - return result; + return os_mmap_aligned_raw(mm, size, alignment); } static ERTS_INLINE void * @@ -1784,6 +1485,7 @@ static ERTS_INLINE void * os_mremap(ErtsMemMapper *mm, void *ptr, UWord old_size, UWord new_size) { void *new_seg; + (void) mm; #if HAVE_MREMAP new_seg = mremap(ptr, (size_t) old_size, # if defined(__NetBSD__) @@ -1792,7 +1494,6 @@ os_mremap(ErtsMemMapper *mm, void *ptr, UWord old_size, UWord new_size) (size_t) new_size, ERTS_MREMAP_FLAGS); if (new_seg == (void *) MAP_FAILED) return NULL; - erts_mmap_file_record_resize(mm, ptr, old_size, new_seg, new_size); return new_seg; #else # error "missing mremap() or similar" @@ -2332,7 +2033,6 @@ erts_mremap(ErtsMemMapper* mm, um_sz = (UWord) ((((char *) ptr) + old_size) - (char *) new_ptr); ERTS_MMAP_SIZE_OS_DEC(um_sz); os_munmap(mm, new_ptr, um_sz); - erts_mmap_file_record_resize(mm, ptr, old_size, ptr, asize); ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize); *sizep = asize; return ptr; @@ -2617,7 +2317,6 @@ erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init) mm->supercarrier = 0; mm->reserve_physical = reserve_noop; mm->unreserve_physical = unreserve_noop; - mm->file_maps = NULL; #if HAVE_MMAP && !defined(MAP_ANON) && !defined(MAP_ANONYMOUS) mm->mmap_fd = open("/dev/zero", O_RDWR); diff --git a/erts/emulator/sys/common/erl_mmap.h b/erts/emulator/sys/common/erl_mmap.h index 612e28737861..71fbc8009a76 100644 --- a/erts/emulator/sys/common/erl_mmap.h +++ b/erts/emulator/sys/common/erl_mmap.h @@ -158,6 +158,13 @@ Eterm erts_mmap_info(ErtsMemMapper*, fmtfn_t *print_to_p, void *print_to_arg, Eterm erts_mmap_info_options(ErtsMemMapper*, char *prefix, fmtfn_t *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp); +int erts_mmap_record_option_record(const char *path); +int erts_mmap_record_option_replay(const char *path); +int erts_mmap_record_option_enabled(void); +int erts_mmap_record_init(void); +void *erts_mmap_record_alloc(UWord *sizep, Uint32 mmap_flags); +void erts_mmap_record_free(void *ptr, UWord size); +void *erts_mmap_record_realloc(void *ptr, UWord old_size, UWord *sizep, Uint32 mmap_flags); #ifdef ERTS_WANT_MEM_MAPPERS # include "erl_alloc_types.h" diff --git a/erts/emulator/sys/common/erl_mmap_record.c b/erts/emulator/sys/common/erl_mmap_record.c new file mode 100644 index 000000000000..373e8c790a49 --- /dev/null +++ b/erts/emulator/sys/common/erl_mmap_record.c @@ -0,0 +1,374 @@ +/* + * %CopyrightBegin% + * + * SPDX-License-Identifier: Apache-2.0 + * + * Copyright Ericsson AB 2002-2025. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "sys.h" +#include "erl_mmap.h" +#include +#include +#include +#include +#include +#ifdef HAVE_SYS_MMAN_H +# include +#endif + +#if HAVE_ERTS_MMAP + +#define ERTS_RECORD_ARENA_SIZE (UWORD_CONSTANT(100) * 1024 * 1024) + +typedef struct ErtsMMapRecordChunk_ ErtsMMapRecordChunk; +struct ErtsMMapRecordChunk_ { + char *ptr; + UWord size; + int free; + ErtsMMapRecordChunk *prev; + ErtsMMapRecordChunk *next; +}; + +static int record_enabled = 0; +static int replay_enabled = 0; +static int record_initialized = 0; +static int record_fd = -1; +static char *record_base = NULL; +static char *record_path = NULL; +static char *replay_path = NULL; +static ErtsMMapRecordChunk *record_chunks = NULL; +static erts_mtx_t record_mtx; +static int record_mtx_inited = 0; + +static UWord +record_align(UWord size, Uint32 mmap_flags) +{ + UWord align = ERTS_PAGEALIGNED_SIZE; + if (mmap_flags & ERTS_MMAPFLG_SUPERALIGNED) { + align = ERTS_SUPERALIGNED_SIZE; + } + return (size + (align - 1)) & ~(align - 1); +} + +static char * +record_align_ptr(char *ptr, UWord align) +{ + UWord v = (UWord) ptr; + UWord a = (v + (align - 1)) & ~(align - 1); + return (char *) a; +} + +static ErtsMMapRecordChunk * +record_new_chunk(char *ptr, UWord size, int free) +{ + ErtsMMapRecordChunk *c = (ErtsMMapRecordChunk *) malloc(sizeof(*c)); + if (!c) { + return NULL; + } + c->ptr = ptr; + c->size = size; + c->free = free; + c->prev = NULL; + c->next = NULL; + return c; +} + +static void +record_merge_with_neighbors(ErtsMMapRecordChunk *c) +{ + if (c->next && c->next->free) { + ErtsMMapRecordChunk *n = c->next; + c->size += n->size; + c->next = n->next; + if (c->next) { + c->next->prev = c; + } + free(n); + } + if (c->prev && c->prev->free) { + ErtsMMapRecordChunk *p = c->prev; + p->size += c->size; + p->next = c->next; + if (c->next) { + c->next->prev = p; + } + free(c); + } +} + +int +erts_mmap_record_option_record(const char *path) +{ + char *copy; + size_t len; + + if (!path || !path[0] || replay_enabled) { + return 0; + } + + len = strlen(path); + copy = (char *) malloc(len + 1); + if (!copy) { + return 0; + } + memcpy(copy, path, len + 1); + + if (record_path) { + free(record_path); + } + record_path = copy; + + record_enabled = 1; + return 1; +} + +int +erts_mmap_record_option_replay(const char *path) +{ + char *copy; + size_t len; + + if (!path || !path[0] || record_enabled) { + return 0; + } + + len = strlen(path); + copy = (char *) malloc(len + 1); + if (!copy) { + return 0; + } + memcpy(copy, path, len + 1); + + if (replay_path) { + free(replay_path); + } + replay_path = copy; + replay_enabled = 1; + return 1; +} + +int +erts_mmap_record_option_enabled(void) +{ + return record_enabled || replay_enabled; +} + +int +erts_mmap_record_init(void) +{ + const char *path = NULL; + ErtsMMapRecordChunk *c; + struct stat st; + + if (!record_enabled && !replay_enabled) { + return 1; + } + if (record_initialized) { + return 1; + } + + if (replay_enabled) { + path = replay_path; + record_fd = open(path, O_RDWR, 0); + } else { + path = record_path; + if (!path) { + return 0; + } + record_fd = open(path, O_RDWR | O_CREAT, 0666); + } + if (record_fd < 0) { + return 0; + } + + if (fstat(record_fd, &st) != 0) { + close(record_fd); + record_fd = -1; + return 0; + } + if (replay_enabled) { + if ((UWord) st.st_size < ERTS_RECORD_ARENA_SIZE) { + close(record_fd); + record_fd = -1; + return 0; + } + } else if (st.st_size != (off_t) ERTS_RECORD_ARENA_SIZE) { + if (ftruncate(record_fd, (off_t) ERTS_RECORD_ARENA_SIZE) != 0) { + close(record_fd); + record_fd = -1; + return 0; + } + } + + record_base = (char *) mmap(NULL, + ERTS_RECORD_ARENA_SIZE, + PROT_READ | PROT_WRITE, + MAP_SHARED, + record_fd, + 0); + if (record_base == MAP_FAILED) { + record_base = NULL; + close(record_fd); + record_fd = -1; + return 0; + } + + c = record_new_chunk(record_base, ERTS_RECORD_ARENA_SIZE, 1); + if (!c) { + munmap(record_base, ERTS_RECORD_ARENA_SIZE); + record_base = NULL; + close(record_fd); + record_fd = -1; + return 0; + } + + if (!record_mtx_inited) { + erts_mtx_init(&record_mtx, "mmap_record", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC + | ERTS_LOCK_FLAGS_CATEGORY_ALLOCATOR); + record_mtx_inited = 1; + } + + record_chunks = c; + record_initialized = 1; + return 1; +} + +void * +erts_mmap_record_alloc(UWord *sizep, Uint32 mmap_flags) +{ + UWord need; + UWord align; + ErtsMMapRecordChunk *c; + void *res = NULL; + + if (!record_initialized || !sizep) { + return NULL; + } + + align = ERTS_PAGEALIGNED_SIZE; + if (mmap_flags & ERTS_MMAPFLG_SUPERALIGNED) { + align = ERTS_SUPERALIGNED_SIZE; + } + need = record_align(*sizep, mmap_flags); + + erts_mtx_lock(&record_mtx); + for (c = record_chunks; c; c = c->next) { + if (c->free) { + char *ret_ptr = record_align_ptr(c->ptr, align); + UWord prefix = (UWord) (ret_ptr - c->ptr); + UWord total_need = prefix + need; + if (c->size < total_need) { + continue; + } + + if (prefix > 0) { + ErtsMMapRecordChunk *pre = record_new_chunk(c->ptr, prefix, 1); + if (!pre) { + break; + } + pre->prev = c->prev; + pre->next = c; + if (pre->prev) { + pre->prev->next = pre; + } else { + record_chunks = pre; + } + c->prev = pre; + c->ptr = ret_ptr; + c->size -= prefix; + } + + if (c->size > need) { + ErtsMMapRecordChunk *tail = record_new_chunk(c->ptr + need, + c->size - need, + 1); + if (!tail) { + break; + } + tail->prev = c; + tail->next = c->next; + if (tail->next) { + tail->next->prev = tail; + } + c->next = tail; + c->size = need; + } + c->free = 0; + *sizep = c->size; + res = c->ptr; + break; + } + } + erts_mtx_unlock(&record_mtx); + + return res; +} + +void +erts_mmap_record_free(void *ptr, UWord size) +{ + ErtsMMapRecordChunk *c; + (void) size; + + if (!record_initialized || !ptr) { + return; + } + + erts_mtx_lock(&record_mtx); + for (c = record_chunks; c; c = c->next) { + if (c->ptr == (char *) ptr) { + c->free = 1; + record_merge_with_neighbors(c); + break; + } + } + erts_mtx_unlock(&record_mtx); +} + +void * +erts_mmap_record_realloc(void *ptr, UWord old_size, UWord *sizep, Uint32 mmap_flags) +{ + void *new_ptr; + UWord copy_sz; + + if (!record_initialized || !sizep) { + return NULL; + } + if (!ptr) { + return erts_mmap_record_alloc(sizep, mmap_flags); + } + if (*sizep <= old_size) { + return ptr; + } + + new_ptr = erts_mmap_record_alloc(sizep, mmap_flags); + if (!new_ptr) { + return NULL; + } + + copy_sz = old_size < *sizep ? old_size : *sizep; + sys_memcpy(new_ptr, ptr, copy_sz); + erts_mmap_record_free(ptr, old_size); + return new_ptr; +} + +#endif /* HAVE_ERTS_MMAP */ diff --git a/erts/emulator/sys/common/erl_mseg.c b/erts/emulator/sys/common/erl_mseg.c index 3381d33a4ef4..3b4757368420 100644 --- a/erts/emulator/sys/common/erl_mseg.c +++ b/erts/emulator/sys/common/erl_mseg.c @@ -267,7 +267,11 @@ mseg_create(ErtsMsegAllctr_t *ma, Uint flags, UWord *sizep) if (MSEG_FLG_IS_2POW(flags)) mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED; - seg = erts_mmap(&erts_dflt_mmapper, mmap_flags, sizep); + if (erts_mmap_record_option_enabled()) { + seg = erts_mmap_record_alloc(sizep, mmap_flags); + } else { + seg = erts_mmap(&erts_dflt_mmapper, mmap_flags, sizep); + } #ifdef ERTS_PRINT_ERTS_MMAP erts_fprintf(stderr, "%p = erts_mmap(%s, {%bpu, %bpu});\n", seg, @@ -287,7 +291,11 @@ mseg_destroy(ErtsMsegAllctr_t *ma, Uint flags, void *seg_p, UWord size) { if (MSEG_FLG_IS_2POW(flags)) mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED; - erts_munmap(&erts_dflt_mmapper, mmap_flags, seg_p, size); + if (erts_mmap_record_option_enabled()) { + erts_mmap_record_free(seg_p, size); + } else { + erts_munmap(&erts_dflt_mmapper, mmap_flags, seg_p, size); + } #ifdef ERTS_PRINT_ERTS_MMAP erts_fprintf(stderr, "erts_munmap(%s, %p, %bpu);\n", (mmap_flags & ERTS_MMAPFLG_SUPERALIGNED) ? "sa" : "sua", @@ -308,7 +316,11 @@ mseg_recreate(ErtsMsegAllctr_t *ma, Uint flags, void *old_seg, UWord old_size, U if (MSEG_FLG_IS_2POW(flags)) mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED; - new_seg = erts_mremap(&erts_dflt_mmapper, mmap_flags, old_seg, old_size, sizep); + if (erts_mmap_record_option_enabled()) { + new_seg = erts_mmap_record_realloc(old_seg, old_size, sizep, mmap_flags); + } else { + new_seg = erts_mremap(&erts_dflt_mmapper, mmap_flags, old_seg, old_size, sizep); + } #ifdef ERTS_PRINT_ERTS_MMAP erts_fprintf(stderr, "%p = erts_mremap(%s, %p, %bpu, {%bpu, %bpu});\n", From 65ab72a0fc2f9e135dff055da0febcdb21ff4640 Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Tue, 21 Apr 2026 10:49:57 +0200 Subject: [PATCH 03/37] Do not use the mapped file for new allocations during replay --- erts/emulator/sys/common/erl_mmap.h | 1 + erts/emulator/sys/common/erl_mmap_record.c | 6 ++++++ erts/emulator/sys/common/erl_mseg.c | 6 +++--- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/erts/emulator/sys/common/erl_mmap.h b/erts/emulator/sys/common/erl_mmap.h index 71fbc8009a76..ebfb2dae34b5 100644 --- a/erts/emulator/sys/common/erl_mmap.h +++ b/erts/emulator/sys/common/erl_mmap.h @@ -160,6 +160,7 @@ Eterm erts_mmap_info_options(ErtsMemMapper*, Uint **hpp, Uint *szp); int erts_mmap_record_option_record(const char *path); int erts_mmap_record_option_replay(const char *path); +int erts_mmap_record_option_record_enabled(void); int erts_mmap_record_option_enabled(void); int erts_mmap_record_init(void); void *erts_mmap_record_alloc(UWord *sizep, Uint32 mmap_flags); diff --git a/erts/emulator/sys/common/erl_mmap_record.c b/erts/emulator/sys/common/erl_mmap_record.c index 373e8c790a49..3701e343267d 100644 --- a/erts/emulator/sys/common/erl_mmap_record.c +++ b/erts/emulator/sys/common/erl_mmap_record.c @@ -165,6 +165,12 @@ erts_mmap_record_option_replay(const char *path) return 1; } +int +erts_mmap_record_option_record_enabled(void) +{ + return record_enabled; +} + int erts_mmap_record_option_enabled(void) { diff --git a/erts/emulator/sys/common/erl_mseg.c b/erts/emulator/sys/common/erl_mseg.c index 3b4757368420..2389a121260e 100644 --- a/erts/emulator/sys/common/erl_mseg.c +++ b/erts/emulator/sys/common/erl_mseg.c @@ -267,7 +267,7 @@ mseg_create(ErtsMsegAllctr_t *ma, Uint flags, UWord *sizep) if (MSEG_FLG_IS_2POW(flags)) mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED; - if (erts_mmap_record_option_enabled()) { + if (erts_mmap_record_option_record_enabled()) { seg = erts_mmap_record_alloc(sizep, mmap_flags); } else { seg = erts_mmap(&erts_dflt_mmapper, mmap_flags, sizep); @@ -291,7 +291,7 @@ mseg_destroy(ErtsMsegAllctr_t *ma, Uint flags, void *seg_p, UWord size) { if (MSEG_FLG_IS_2POW(flags)) mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED; - if (erts_mmap_record_option_enabled()) { + if (erts_mmap_record_option_record_enabled()) { erts_mmap_record_free(seg_p, size); } else { erts_munmap(&erts_dflt_mmapper, mmap_flags, seg_p, size); @@ -316,7 +316,7 @@ mseg_recreate(ErtsMsegAllctr_t *ma, Uint flags, void *old_seg, UWord old_size, U if (MSEG_FLG_IS_2POW(flags)) mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED; - if (erts_mmap_record_option_enabled()) { + if (erts_mmap_record_option_record_enabled()) { new_seg = erts_mmap_record_realloc(old_seg, old_size, sizep, mmap_flags); } else { new_seg = erts_mremap(&erts_dflt_mmapper, mmap_flags, old_seg, old_size, sizep); From bfd002247a73b2ff6c3927a89d3cf14d5896f0aa Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Tue, 21 Apr 2026 15:52:17 +0200 Subject: [PATCH 04/37] Root every allocation to mapped memory except for special handling in sys_drivers --- erts/emulator/beam/erl_alloc.c | 285 +++++++++++++++++++++++++++ erts/emulator/beam/erl_alloc_util.c | 24 ++- erts/emulator/sys/unix/sys_drivers.c | 17 +- 3 files changed, 309 insertions(+), 17 deletions(-) diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c index d45982ff92c6..2eb9f8ab31ea 100644 --- a/erts/emulator/beam/erl_alloc.c +++ b/erts/emulator/beam/erl_alloc.c @@ -40,6 +40,7 @@ #include "erl_db.h" #include "erl_binary.h" #include "erl_bits.h" +#include "index.h" #include "erl_mseg.h" #include "erl_monitor_link.h" #include "erl_hl_timer.h" @@ -48,6 +49,8 @@ #include "erl_nfunc_sched.h" #include #include +#include +#include #if defined(ERTS_ALC_T_DRV_SEL_D_STATE) || defined(ERTS_ALC_T_DRV_EV_D_STATE) #include "erl_check_io.h" #endif @@ -102,6 +105,271 @@ static Uint install_debug_functions(void); static int lock_all_physical_memory = 0; static int erts_alloc_trace_fd = -1; static int erts_alloc_struct_csv_fd = -1; +static int erts_alloc_struct_snapshot_registered = 0; + +#define ERTS_ALLOC_STRUCT_SNAPSHOT_MAX 32 +typedef struct { + char tag[64]; + void *ptr; + UWord size; +} ErtsAllocStructSnapshot; + +static ErtsAllocStructSnapshot + erts_alloc_struct_snapshots[ERTS_ALLOC_STRUCT_SNAPSHOT_MAX]; +static int erts_alloc_struct_snapshot_count = 0; +static char erts_alloc_struct_snapshot_dir[512] = {0}; + +#define ERTS_ALLOC_MAP_MAX_RANGES 16384 +typedef struct { + UWord start; + UWord end; + int kind; +} ErtsAllocMapRange; + +enum { + ERTS_ALLOC_MAP_KIND_UNKNOWN = 0, + ERTS_ALLOC_MAP_KIND_STACK, + ERTS_ALLOC_MAP_KIND_HEAP, + ERTS_ALLOC_MAP_KIND_MAPPED +}; + +static ErtsAllocMapRange erts_alloc_map_ranges[ERTS_ALLOC_MAP_MAX_RANGES]; +static int erts_alloc_map_range_count = 0; + +static const char * +erts_alloc_map_kind_name(int kind) +{ + switch (kind) { + case ERTS_ALLOC_MAP_KIND_STACK: return "stack"; + case ERTS_ALLOC_MAP_KIND_HEAP: return "heap"; + case ERTS_ALLOC_MAP_KIND_MAPPED: return "mapped"; + default: return "unknown"; + } +} + +static void +erts_alloc_map_load(void) +{ + FILE *fp; + char line[1024]; + erts_alloc_map_range_count = 0; + fp = fopen("/proc/self/maps", "r"); + if (!fp) { + return; + } + while (fgets(line, sizeof(line), fp) != NULL) { + unsigned long long start, end; + int kind = ERTS_ALLOC_MAP_KIND_MAPPED; + if (erts_alloc_map_range_count >= ERTS_ALLOC_MAP_MAX_RANGES) { + break; + } + if (sscanf(line, "%llx-%llx", &start, &end) != 2) { + continue; + } + if (strstr(line, "[stack]")) { + kind = ERTS_ALLOC_MAP_KIND_STACK; + } else if (strstr(line, "[heap]")) { + kind = ERTS_ALLOC_MAP_KIND_HEAP; + } + erts_alloc_map_ranges[erts_alloc_map_range_count].start = (UWord) start; + erts_alloc_map_ranges[erts_alloc_map_range_count].end = (UWord) end; + erts_alloc_map_ranges[erts_alloc_map_range_count].kind = kind; + erts_alloc_map_range_count++; + } + fclose(fp); +} + +static int +erts_alloc_map_classify_ptr(const void *ptr) +{ + int i; + UWord addr = (UWord) ptr; + for (i = 0; i < erts_alloc_map_range_count; i++) { + if (addr >= erts_alloc_map_ranges[i].start + && addr < erts_alloc_map_ranges[i].end) { + return erts_alloc_map_ranges[i].kind; + } + } + return ERTS_ALLOC_MAP_KIND_UNKNOWN; +} + +static void +erts_alloc_struct_walk_index_table(int root_ix, + const ErtsAllocStructSnapshot *snap, + int wfd) +{ + IndexTable *tab = (IndexTable *) snap->ptr; + int pages, page_ix, slot_ix; + char line[512]; + int len; + if (!tab || snap->size < sizeof(IndexTable)) { + return; + } + + len = erts_snprintf(line, sizeof(line), + "%d,%s,root,%p,%s\n", + root_ix, + snap->tag, + (void *) tab, + erts_alloc_map_kind_name(erts_alloc_map_classify_ptr(tab))); + if (len > 0) { + if (len >= (int) sizeof(line)) len = (int) sizeof(line) - 1; + erts_silence_warn_unused_result(write(wfd, line, (size_t) len)); + } + + len = erts_snprintf(line, sizeof(line), + "%d,%s,seg_table,%p,%s\n", + root_ix, + snap->tag, + (void *) tab->seg_table, + erts_alloc_map_kind_name(erts_alloc_map_classify_ptr(tab->seg_table))); + if (len > 0) { + if (len >= (int) sizeof(line)) len = (int) sizeof(line) - 1; + erts_silence_warn_unused_result(write(wfd, line, (size_t) len)); + } + + len = erts_snprintf(line, sizeof(line), + "%d,%s,htable.bucket,%p,%s\n", + root_ix, + snap->tag, + (void *) tab->htable.bucket, + erts_alloc_map_kind_name(erts_alloc_map_classify_ptr(tab->htable.bucket))); + if (len > 0) { + if (len >= (int) sizeof(line)) len = (int) sizeof(line) - 1; + erts_silence_warn_unused_result(write(wfd, line, (size_t) len)); + } + + pages = (tab->size + INDEX_PAGE_SIZE - 1) >> INDEX_PAGE_SHIFT; + for (page_ix = 0; page_ix < pages; page_ix++) { + IndexSlot **page = (tab->seg_table ? tab->seg_table[page_ix] : NULL); + len = erts_snprintf(line, sizeof(line), + "%d,%s,seg_page[%d],%p,%s\n", + root_ix, snap->tag, page_ix, (void *) page, + erts_alloc_map_kind_name(erts_alloc_map_classify_ptr(page))); + if (len > 0) { + if (len >= (int) sizeof(line)) len = (int) sizeof(line) - 1; + erts_silence_warn_unused_result(write(wfd, line, (size_t) len)); + } + if (!page) { + continue; + } + for (slot_ix = 0; slot_ix < INDEX_PAGE_SIZE; slot_ix++) { + int global_ix = (page_ix << INDEX_PAGE_SHIFT) + slot_ix; + IndexSlot *slot = page[slot_ix]; + if (!slot || global_ix >= tab->entries) { + continue; + } + len = erts_snprintf(line, sizeof(line), + "%d,%s,slot[%d],%p,%s\n", + root_ix, snap->tag, global_ix, (void *) slot, + erts_alloc_map_kind_name(erts_alloc_map_classify_ptr(slot))); + if (len > 0) { + if (len >= (int) sizeof(line)) len = (int) sizeof(line) - 1; + erts_silence_warn_unused_result(write(wfd, line, (size_t) len)); + } + } + } +} + +static int +erts_alloc_struct_should_snapshot(const char *tag) +{ + return tag + && (strcmp(tag, "atom_table.index_root") == 0 + || strcmp(tag, "module_table.index_root") == 0); +} + +static void +erts_alloc_struct_register_snapshot(const char *tag, void *ptr, UWord size) +{ + ErtsAllocStructSnapshot *snap; + if (!erts_alloc_struct_should_snapshot(tag)) { + return; + } + if (erts_alloc_struct_snapshot_count >= ERTS_ALLOC_STRUCT_SNAPSHOT_MAX) { + return; + } + snap = &erts_alloc_struct_snapshots[erts_alloc_struct_snapshot_count++]; + erts_snprintf(snap->tag, sizeof(snap->tag), "%s", tag); + snap->ptr = ptr; + snap->size = size; +} + +static void +erts_alloc_struct_dump_snapshots_on_exit(void) +{ + int i, fd, mfd, wfd; + char line[256]; + char path[1024]; + int len; + + if (erts_alloc_struct_snapshot_count <= 0 || erts_alloc_struct_snapshot_dir[0] == '\0') { + return; + } + + if (mkdir(erts_alloc_struct_snapshot_dir, 0777) < 0 && errno != EEXIST) { + return; + } + + len = erts_snprintf(path, sizeof(path), "%s/roots.csv", erts_alloc_struct_snapshot_dir); + if (len <= 0 || len >= (int) sizeof(path)) { + return; + } + mfd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0666); + if (mfd >= 0) { + erts_silence_warn_unused_result(write(mfd, "index,tag,ptr,size,file\n", 24)); + } + + for (i = 0; i < erts_alloc_struct_snapshot_count; i++) { + ErtsAllocStructSnapshot *snap = &erts_alloc_struct_snapshots[i]; + const char *name = (strcmp(snap->tag, "atom_table.index_root") == 0) + ? "atom_table.index_root" + : "module_table.index_root"; + len = erts_snprintf(path, sizeof(path), "%s/%02d.%s.bin", + erts_alloc_struct_snapshot_dir, i, name); + if (len <= 0 || len >= (int) sizeof(path)) { + continue; + } + fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0666); + if (fd >= 0) { + if (snap->ptr && snap->size > 0) { + erts_silence_warn_unused_result(write(fd, snap->ptr, (size_t) snap->size)); + } + close(fd); + } + if (mfd >= 0) { + len = erts_snprintf(line, sizeof(line), "%d,%s,%p,%lu,%02d.%s.bin\n", + i, snap->tag, snap->ptr, (unsigned long) snap->size, i, name); + if (len > 0) { + if (len >= (int) sizeof(line)) { + len = (int) sizeof(line) - 1; + } + erts_silence_warn_unused_result(write(mfd, line, (size_t) len)); + } + } + } + if (mfd >= 0) { + close(mfd); + } + + len = erts_snprintf(path, sizeof(path), "%s/roots.walk.csv", erts_alloc_struct_snapshot_dir); + if (len <= 0 || len >= (int) sizeof(path)) { + return; + } + wfd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0666); + if (wfd < 0) { + return; + } + erts_silence_warn_unused_result(write(wfd, "root_index,tag,field,ptr,where\n", 31)); + erts_alloc_map_load(); + for (i = 0; i < erts_alloc_struct_snapshot_count; i++) { + ErtsAllocStructSnapshot *snap = &erts_alloc_struct_snapshots[i]; + if (erts_alloc_struct_should_snapshot(snap->tag)) { + erts_alloc_struct_walk_index_table(i, snap, wfd); + } + } + close(wfd); +} static ERTS_INLINE void erts_alloc_trace_write(const char *line, int len) @@ -182,6 +450,7 @@ erts_alloc_trace_note_alloc(const char *tag, void *ptr, UWord size) } erts_alloc_struct_csv_write(csv_line, csv_len); } + erts_alloc_struct_register_snapshot(safe_tag, ptr, size); } void @@ -782,6 +1051,7 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop) { const char *trace_path = getenv("ERTS_ALLOC_TRACE_FILE"); const char *csv_path = getenv("ERTS_ALLOC_STRUCT_CSV_FILE"); + const char *dump_dir = getenv("ERTS_ALLOC_STRUCT_DUMP_DIR"); if (trace_path && trace_path[0] != '\0') { erts_alloc_trace_fd = open(trace_path, O_WRONLY|O_CREAT|O_APPEND, 0666); } @@ -795,6 +1065,21 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop) erts_alloc_struct_csv_fd = open(default_csv_path, O_WRONLY|O_CREAT|O_APPEND, 0666); } } + if (dump_dir && dump_dir[0] != '\0') { + erts_snprintf(erts_alloc_struct_snapshot_dir, + sizeof(erts_alloc_struct_snapshot_dir), + "%s", + dump_dir); + } else { + erts_snprintf(erts_alloc_struct_snapshot_dir, + sizeof(erts_alloc_struct_snapshot_dir), + "_mmap-records/struct-root-dumps"); + } + if (!erts_alloc_struct_snapshot_registered) { + if (atexit(erts_alloc_struct_dump_snapshots_on_exit) == 0) { + erts_alloc_struct_snapshot_registered = 1; + } + } } set_default_sl_alloc_opts(&init.sl_alloc); diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c index 5c65269b06b4..80e74d8c7bc1 100644 --- a/erts/emulator/beam/erl_alloc_util.c +++ b/erts/emulator/beam/erl_alloc_util.c @@ -6945,6 +6945,12 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) allctr->try_set_dyn_param = &erts_alcu_try_set_dyn_param; #if HAVE_ERTS_MSEG + { + int force_mseg = (allctr->alloc_no == ERTS_ALC_A_LONG_LIVED); + int force_sys = (allctr->alloc_no == ERTS_ALC_A_SYSTEM + || allctr->alloc_no == ERTS_ALC_A_TEMPORARY + || allctr->alloc_no == ERTS_ALC_A_DRIVER); + if (init->mseg_alloc) { ASSERT(init->mseg_realloc && init->mseg_dealloc); allctr->mseg_alloc = init->mseg_alloc; @@ -6968,16 +6974,28 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) allctr->crr_set_flgs = CFLG_FORCE_SYS_ALLOC; allctr->crr_clr_flgs = CFLG_FORCE_MSEG; } + else if (force_mseg) { + allctr->crr_set_flgs = CFLG_FORCE_MSEG; + allctr->crr_clr_flgs = CFLG_FORCE_SYS_ALLOC; + } + else if (force_sys) { + allctr->crr_set_flgs = CFLG_FORCE_SYS_ALLOC; + allctr->crr_clr_flgs = CFLG_FORCE_MSEG; + } + } #endif if (allctr->main_carrier_size && (allctr->ix != 0 || init->mmbc0)) { Block_t *blk; + Uint create_flags; + + create_flags = (allctr->alloc_no == ERTS_ALC_A_LONG_LIVED + ? CFLG_FORCE_MSEG + : CFLG_FORCE_SYS_ALLOC); blk = create_carrier(allctr, allctr->main_carrier_size, - (ERTS_SUPER_ALIGNED_MSEG_ONLY - ? CFLG_FORCE_MSEG : CFLG_FORCE_SYS_ALLOC) - | CFLG_MBC + create_flags | CFLG_MBC | CFLG_FORCE_SIZE | CFLG_NO_CPOOL | CFLG_MAIN_CARRIER); diff --git a/erts/emulator/sys/unix/sys_drivers.c b/erts/emulator/sys/unix/sys_drivers.c index c06d6b66036a..9f067697d546 100644 --- a/erts/emulator/sys/unix/sys_drivers.c +++ b/erts/emulator/sys/unix/sys_drivers.c @@ -1592,9 +1592,8 @@ static ErlDrvData forker_start(ErlDrvPort port_num, char* name, int fds[2]; int res, unbind; char bindir[MAXPATHLEN]; + char child_setup_prog[MAXPATHLEN + 64]; size_t bindirsz = sizeof(bindir); - Uint csp_path_sz; - char *child_setup_prog; forker_port = erts_drvport2id(port_num); @@ -1609,16 +1608,8 @@ static ErlDrvData forker_start(ErlDrvPort port_num, char* name, erts_exit(1, "Environment variable BINDIR does not contain an" " absolute path\n"); - csp_path_sz = (strlen(bindir) - + 1 /* DIR_SEPARATOR_CHAR */ - + sizeof(CHILD_SETUP_PROG_NAME) - + 1); - child_setup_prog = erts_alloc(ERTS_ALC_T_CS_PROG_PATH, csp_path_sz); - erts_snprintf(child_setup_prog, csp_path_sz, - "%s%c%s", - bindir, - DIR_SEPARATOR_CHAR, - CHILD_SETUP_PROG_NAME); + erts_snprintf(child_setup_prog, sizeof(child_setup_prog), + "%s%c%s", bindir, DIR_SEPARATOR_CHAR, CHILD_SETUP_PROG_NAME); if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) < 0) { erts_exit(ERTS_ABORT_EXIT, "Could not open unix domain socket in spawn_init: %d\n", @@ -1661,8 +1652,6 @@ static ErlDrvData forker_start(ErlDrvPort port_num, char* name, erts_sched_bind_atfork_parent(unbind); - erts_free(ERTS_ALC_T_CS_PROG_PATH, child_setup_prog); - close(fds[1]); /* If stdin is a tty then we need to restore its settings when we exit. From ebce050b8b2707763e1898a866b3f6cd2e0f4df4 Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Wed, 22 Apr 2026 10:57:48 +0200 Subject: [PATCH 05/37] Restore export and fun index-table roots on replay Replay was aborting with "function erl_init:start/2 not found in active code index" because the export and fun tables were always reinitialized empty. Their IndexTable metadata lives as static storage via erl_code_staged.h and load_preloaded() is skipped in replay mode, so the recorded bucket/seg arrays in the mapped arena were unreachable. Extend the existing atom/module root-dump pipeline to cover export_tables[] and fun_tables[]: - export.c / erl_fun.c: tag each per-code-index table with erts_alloc_trace_note_alloc(...); add init_{export,fun}_table_replay that copy the recorded roots into the static tables, restore htable.fun to the current build's function pointers, and re-init the staging rwmutex and entry-bytes atomic. - erl_alloc.c: whitelist the two new tags in erts_alloc_struct_should_snapshot and derive dump file names from the tag directly. - erl_init.c: restore_struct_roots_for_replay dispatches on tag and fills atom/module/export/fun arrays uniformly; erl_init moves erts_init_fun_table and init_export_table into the replay/record branch, calling the _replay variants when replay is enabled. With this change, validate_replay_module_tables succeeds for all 22 preloaded modules and erl_init:start/2 resolves against the restored export table without re-running load_preloaded(). --- erts/emulator/beam/erl_alloc.c | 8 +- erts/emulator/beam/erl_fun.c | 44 ++++ erts/emulator/beam/erl_fun.h | 2 + erts/emulator/beam/erl_init.c | 387 ++++++++++++++++++++++++++++++++- erts/emulator/beam/export.c | 45 ++++ erts/emulator/beam/export.h | 1 + 6 files changed, 477 insertions(+), 10 deletions(-) diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c index 2eb9f8ab31ea..7bad48b0666a 100644 --- a/erts/emulator/beam/erl_alloc.c +++ b/erts/emulator/beam/erl_alloc.c @@ -276,7 +276,9 @@ erts_alloc_struct_should_snapshot(const char *tag) { return tag && (strcmp(tag, "atom_table.index_root") == 0 - || strcmp(tag, "module_table.index_root") == 0); + || strcmp(tag, "module_table.index_root") == 0 + || strcmp(tag, "export_table.index_root") == 0 + || strcmp(tag, "fun_table.index_root") == 0); } static void @@ -322,9 +324,7 @@ erts_alloc_struct_dump_snapshots_on_exit(void) for (i = 0; i < erts_alloc_struct_snapshot_count; i++) { ErtsAllocStructSnapshot *snap = &erts_alloc_struct_snapshots[i]; - const char *name = (strcmp(snap->tag, "atom_table.index_root") == 0) - ? "atom_table.index_root" - : "module_table.index_root"; + const char *name = snap->tag; len = erts_snprintf(path, sizeof(path), "%s/%02d.%s.bin", erts_alloc_struct_snapshot_dir, i, name); if (len <= 0 || len >= (int) sizeof(path)) { diff --git a/erts/emulator/beam/erl_fun.c b/erts/emulator/beam/erl_fun.c index 1ab1ba4e3ea9..6d7a3c659d25 100644 --- a/erts/emulator/beam/erl_fun.c +++ b/erts/emulator/beam/erl_fun.c @@ -132,7 +132,51 @@ static void fun_stage(ErlFunEntry *entry, void erts_init_fun_table(void) { + int i; + fun_staged_init(); + + for (i = 0; i < ERTS_NUM_CODE_IX; i++) { + erts_alloc_trace_note_alloc("fun_table.index_root", + &fun_tables[i], + sizeof(fun_tables[i])); + } +} + +void erts_init_fun_table_replay(IndexTable *roots, int no_roots) +{ + HashFunctions f; + erts_rwmtx_opt_t rwmtx_opt = ERTS_RWMTX_OPT_DEFAULT_INITER; + int i; + + ASSERT(roots != NULL); + ASSERT(no_roots == ERTS_NUM_CODE_IX); + (void) no_roots; + + rwmtx_opt.type = ERTS_RWMTX_TYPE_FREQUENT_READ; + rwmtx_opt.lived = ERTS_RWMTX_LONG_LIVED; + + erts_rwmtx_init_opt(&fun_rwmutex, + &rwmtx_opt, + "fun_staging_lock", + NIL, + (ERTS_LOCK_FLAGS_PROPERTY_STATIC | + ERTS_LOCK_FLAGS_CATEGORY_GENERIC)); + + erts_atomic_init_nob(&fun_total_entries_bytes, 0); + + f.hash = (H_FUN) fun_staged_hash; + f.cmp = (HCMP_FUN) fun_staged_cmp; + f.alloc = (HALLOC_FUN) fun_staged_alloc; + f.free = (HFREE_FUN) fun_staged_free; + f.meta_alloc = (HMALLOC_FUN) erts_alloc; + f.meta_free = (HMFREE_FUN) erts_free; + f.meta_print = (HMPRINT_FUN) erts_print; + + for (i = 0; i < ERTS_NUM_CODE_IX; i++) { + fun_tables[i] = roots[i]; + fun_tables[i].htable.fun = f; + } } void erts_fun_info(fmtfn_t to, void *to_arg) diff --git a/erts/emulator/beam/erl_fun.h b/erts/emulator/beam/erl_fun.h index 0f8fdeba319d..88d1da9af2d8 100644 --- a/erts/emulator/beam/erl_fun.h +++ b/erts/emulator/beam/erl_fun.h @@ -24,6 +24,7 @@ #define __ERLFUNTABLE_H__ #include "erl_threads.h" +#include "index.h" /* * Fun entry. @@ -89,6 +90,7 @@ typedef struct erl_fun_thing { #define ERL_FUN_SIZE ((sizeof(ErlFunThing)/sizeof(Eterm))) void erts_init_fun_table(void); +void erts_init_fun_table_replay(IndexTable *roots, int no_roots); void erts_fun_info(fmtfn_t, void *); int erts_fun_table_sz(void); int erts_fun_entries_sz(void); diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index 08bce89438b1..c3379ea4fdfb 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -58,6 +58,7 @@ #include "erl_iolist.h" #include "erl_debugger.h" #include "erl_mmap.h" +#include "index.h" #include "jit/beam_asm.h" @@ -242,6 +243,12 @@ void erl_error(const char *fmt, va_list args) } static int early_init(int *argc, char **argv); +static int restore_struct_roots_for_replay(IndexTable *atom_root, + IndexTable *module_roots, + int table_capacity, + IndexTable *export_roots, + IndexTable *fun_roots); +static void debug_replay_roots_sanity(void); static void erl_init(int ncpu, @@ -281,10 +288,30 @@ erl_init(int ncpu, erts_init_debugger(); erts_init_trace(); erts_code_ix_init(); - erts_init_fun_table(); - init_atom_table(); - init_export_table(); - init_module_table(); + if (erts_mmap_record_option_replay_enabled()) { + IndexTable atom_root; + IndexTable module_roots[ERTS_NUM_CODE_IX]; + IndexTable export_roots[ERTS_NUM_CODE_IX]; + IndexTable fun_roots[ERTS_NUM_CODE_IX]; + if (!restore_struct_roots_for_replay(&atom_root, + module_roots, + ERTS_NUM_CODE_IX, + export_roots, + fun_roots)) { + erts_exit(ERTS_ABORT_EXIT, + "failed to restore replay root structures from struct-root-dumps\n"); + } + erts_init_fun_table_replay(fun_roots, ERTS_NUM_CODE_IX); + init_atom_table_replay(&atom_root); + init_module_table_replay(module_roots, ERTS_NUM_CODE_IX); + init_export_table_replay(export_roots, ERTS_NUM_CODE_IX); + debug_replay_roots_sanity(); + } else { + erts_init_fun_table(); + init_atom_table(); + init_module_table(); + init_export_table(); + } init_register_table(); init_message(); #ifdef BEAMASM @@ -335,11 +362,37 @@ erl_spawn_system_process(Process* parent, Eterm mod, Eterm func, Eterm args, { Eterm res; int arity; + ErtsCodePtr fn_active; + char *dbg = getenv("ERTS_REPLAY_ROOT_DEBUG"); ERTS_LC_ASSERT(ERTS_PROC_LOCK_MAIN & erts_proc_lc_my_proc_locks(parent)); arity = erts_list_length(args); + fn_active = erts_find_function(mod, func, arity, erts_active_code_ix()); + + if (dbg && dbg[0] != '0') { + ErtsCodePtr fn_staging = erts_find_function(mod, func, arity, erts_staging_code_ix()); + Module *mod_active = erts_get_module(mod, erts_active_code_ix()); + Module *mod_staging = erts_get_module(mod, erts_staging_code_ix()); + erts_fprintf(stderr, + "replay_root_debug: spawn_lookup mod=%T func=%T arity=%d active_ix=%u staging_ix=%u fn_active=%p fn_staging=%p mod_active=%p mod_staging=%p\n", + mod, func, arity, + (unsigned int) erts_active_code_ix(), + (unsigned int) erts_staging_code_ix(), + (void *) fn_active, (void *) fn_staging, + (void *) mod_active, (void *) mod_staging); + } - if (erts_find_function(mod, func, arity, erts_active_code_ix()) == NULL) { + if (fn_active == NULL) { + if (dbg && dbg[0] != '0') { + erts_fprintf(stderr, + "replay_root_debug: no_function mod_raw=%p func_raw=%p mod_is_atom=%d func_is_atom=%d arity=%d\n", + (void *) (UWord) mod, (void *) (UWord) func, + is_atom(mod) ? 1 : 0, is_atom(func) ? 1 : 0, arity); + erts_fprintf(stderr, + "replay_root_debug: atom_consts am_erl_init=%p is_atom=%d am_start=%p is_atom=%d\n", + (void *) (UWord) am_erl_init, is_atom(am_erl_init) ? 1 : 0, + (void *) (UWord) am_start, is_atom(am_start) ? 1 : 0); + } erts_exit(ERTS_ERROR_EXIT, "No function %T:%T/%i\n", mod, func, arity); } @@ -350,6 +403,304 @@ erl_spawn_system_process(Process* parent, Eterm mod, Eterm func, Eterm args, return res; } +static int +restore_struct_roots_for_replay(IndexTable *atom_root, + IndexTable *module_roots, + int table_capacity, + IndexTable *export_roots, + IndexTable *fun_roots) +{ + const char *base_dir = getenv("ERTS_ALLOC_STRUCT_DUMP_DIR"); + char dir_buf[512]; + char manifest_path[1024]; + FILE *mf = NULL; + char line[1024]; + int module_ix = 0; + int export_ix = 0; + int fun_ix = 0; + int have_atom = 0; + + if (!base_dir || base_dir[0] == '\0') { + base_dir = "_mmap-records/struct-root-dumps"; + } + erts_snprintf(dir_buf, sizeof(dir_buf), "%s", base_dir); + erts_snprintf(manifest_path, sizeof(manifest_path), "%s/roots.csv", dir_buf); + + mf = fopen(manifest_path, "r"); + if (!mf) { + return 0; + } + + while (fgets(line, sizeof(line), mf) != NULL) { + char *p1, *p2, *p3, *p4; + char *tag, *szs, *file; + unsigned long sz; + char file_path[1024]; + FILE *bf; + IndexTable *dst = NULL; + + if (line[0] == '\0' || line[0] == '\n' || line[0] == '#' + || !isdigit((unsigned char)line[0])) { + continue; + } + + p1 = strchr(line, ','); + if (!p1) continue; + p2 = strchr(p1 + 1, ','); + if (!p2) continue; + p3 = strchr(p2 + 1, ','); + if (!p3) continue; + p4 = strchr(p3 + 1, ','); + if (!p4) continue; + + tag = p1 + 1; + *p2 = '\0'; + szs = p3 + 1; + *p4 = '\0'; + file = p4 + 1; + file[strcspn(file, "\r\n")] = '\0'; + + sz = strtoul(szs, NULL, 10); + if (sz != sizeof(IndexTable)) { + continue; + } + + if (strcmp(tag, "atom_table.index_root") == 0) { + dst = atom_root; + have_atom = 1; + } else if (strcmp(tag, "module_table.index_root") == 0) { + if (module_ix < table_capacity) { + dst = &module_roots[module_ix++]; + } + } else if (strcmp(tag, "export_table.index_root") == 0) { + if (export_ix < table_capacity) { + dst = &export_roots[export_ix++]; + } + } else if (strcmp(tag, "fun_table.index_root") == 0) { + if (fun_ix < table_capacity) { + dst = &fun_roots[fun_ix++]; + } + } else { + continue; + } + + erts_snprintf(file_path, sizeof(file_path), "%s/%s", dir_buf, file); + bf = fopen(file_path, "rb"); + if (!bf) { + fclose(mf); + return 0; + } + + if (dst) { + if (fread((void *) dst, 1, sizeof(IndexTable), bf) + != sizeof(IndexTable)) { + fclose(bf); + fclose(mf); + return 0; + } + } else { + /* overflow: consume file and ignore */ + IndexTable tmp; + size_t rr = fread((void *)&tmp, 1, sizeof(IndexTable), bf); + (void) rr; + } + + fclose(bf); + } + + fclose(mf); + return have_atom + && module_ix == table_capacity + && export_ix == table_capacity + && fun_ix == table_capacity; +} + +static void +debug_replay_roots_sanity(void) +{ + int i, samples, pre_i; + Eterm atom_term; + int atom_ok, module_entries; + Module *m = NULL; + Eterm mod_atom = THE_NON_VALUE; + const Preload *preload; + char *dbg = getenv("ERTS_REPLAY_ROOT_DEBUG"); + int enabled = !dbg || dbg[0] != '0'; + + if (!enabled) { + return; + } + + erts_fprintf(stderr, + "replay_root_debug: atom_table entries=%d size=%d limit=%d seg_table=%p hash_bucket=%p\n", + erts_atom_table.entries, + erts_atom_table.size, + erts_atom_table.limit, + (void *) erts_atom_table.seg_table, + (void *) erts_atom_table.htable.bucket); + atom_table_replay_debug_dump(); + module_table_replay_debug_dump(); + + atom_ok = 0; + if (erts_atom_table.htable.fun.hash + && erts_atom_table.htable.fun.cmp + && erts_atom_table.htable.fun.alloc) { + atom_ok = erts_atom_get((const char *) "erts_code_purger", + sizeof("erts_code_purger") - 1, + &atom_term, + ERTS_ATOM_ENC_7BIT_ASCII); + } + erts_fprintf(stderr, + "replay_root_debug: atom_lookup(erts_code_purger)=%d term=%T\n", + atom_ok, atom_term); + + module_entries = module_code_size(erts_active_code_ix()); + erts_fprintf(stderr, + "replay_root_debug: module_table active_entries=%d\n", + module_entries); + + if (erts_atom_get((const char *) "erts_code_purger", + sizeof("erts_code_purger") - 1, + &mod_atom, + ERTS_ATOM_ENC_7BIT_ASCII)) { + m = erts_get_module(mod_atom, erts_active_code_ix()); + } + erts_fprintf(stderr, + "replay_root_debug: module_lookup(erts_code_purger)=%p\n", + (void *) m); + + samples = erts_atom_table.entries < 32 ? erts_atom_table.entries : 32; + for (i = 0; i < samples; i++) { + Atom *a = (Atom *) erts_index_lookup(&erts_atom_table, i); + if (!a) { + erts_fprintf(stderr, "replay_root_debug: atom_slot[%d]=NULL\n", i); + continue; + } + erts_fprintf(stderr, + "replay_root_debug: atom_slot[%d]=%p slot.index=%d len=%d ord0=%d bin=%p name_ptr=%p\n", + i, (void *) a, a->slot.index, (int) a->len, a->ord0, + (void *) (UWord) a->u.bin, + (void *) erts_atom_get_name(a)); + } + + atom_replay_debug_lookup("erts_code_purger"); + atom_replay_debug_lookup("erl_init"); + atom_replay_debug_lookup("start"); + atom_replay_debug_lookup("atomics"); + + preload = sys_preloaded(); + pre_i = 0; + while (preload && preload[pre_i].name && pre_i < 2) { + const char *name = preload[pre_i].name; + Eterm aterm = THE_NON_VALUE; + Module *pm = NULL; + int ok = erts_atom_get((const char *) name, + sys_strlen(name), + &aterm, + ERTS_ATOM_ENC_LATIN1); + if (ok) { + pm = erts_get_module(aterm, erts_active_code_ix()); + } + erts_fprintf(stderr, + "replay_root_debug: preloaded[%d]=%s atom_ok=%d module=%p\n", + pre_i, name, ok, (void *) pm); + pre_i++; + } + + { + Eterm t = THE_NON_VALUE; + int ok; + ok = erts_atom_get((const char *) "start", + sizeof("start") - 1, + &t, + ERTS_ATOM_ENC_7BIT_ASCII); + erts_fprintf(stderr, + "replay_root_debug: const_check name=start ok=%d parsed=%p am_start=%p equal=%d\n", + ok, (void *) (UWord) t, (void *) (UWord) am_start, + (ok && t == am_start) ? 1 : 0); + + ok = erts_atom_get((const char *) "erl_init", + sizeof("erl_init") - 1, + &t, + ERTS_ATOM_ENC_7BIT_ASCII); + erts_fprintf(stderr, + "replay_root_debug: const_check name=erl_init ok=%d parsed=%p am_erl_init=%p equal=%d\n", + ok, (void *) (UWord) t, (void *) (UWord) am_erl_init, + (ok && t == am_erl_init) ? 1 : 0); + + ok = erts_atom_get((const char *) "erlang", + sizeof("erlang") - 1, + &t, + ERTS_ATOM_ENC_7BIT_ASCII); + erts_fprintf(stderr, + "replay_root_debug: const_check name=erlang ok=%d parsed=%p am_erlang=%p equal=%d\n", + ok, (void *) (UWord) t, (void *) (UWord) am_erlang, + (ok && t == am_erlang) ? 1 : 0); + } +} + +static void +validate_replay_module_tables(void) +{ + const Preload *preload; + int i; + Eterm mod_atom = THE_NON_VALUE; + Module *m = NULL; + char *dbg = getenv("ERTS_REPLAY_ROOT_DEBUG"); + int enabled = !dbg || dbg[0] != '0'; + + preload = sys_preloaded(); + if (!preload) { + erts_exit(ERTS_ABORT_EXIT, + "replay validation failed: sys_preloaded() returned NULL\n"); + } + + i = 0; + while (preload[i].name) { + const char *name = preload[i].name; + int ok = erts_atom_get(name, + sys_strlen(name), + &mod_atom, + ERTS_ATOM_ENC_LATIN1); + if (!ok) { + erts_exit(ERTS_ABORT_EXIT, + "replay validation failed: atom for preloaded module '%s' not found in restored atom table\n", + name); + } + + m = erts_get_module(mod_atom, erts_active_code_ix()); + if (!m) { + erts_exit(ERTS_ABORT_EXIT, + "replay validation failed: module '%s' not found in active module table\n", + name); + } + + if (!m->curr.code_hdr || m->curr.code_length <= 0) { + erts_exit(ERTS_ABORT_EXIT, + "replay validation failed: module '%s' has invalid current code (code_hdr=%p code_length=%d)\n", + name, (void *) m->curr.code_hdr, m->curr.code_length); + } + + if (enabled && i < 20) { + erts_fprintf(stderr, + "replay_root_debug: replay_validate preloaded[%d]=%s module=%p code_hdr=%p code_len=%d\n", + i, name, (void *) m, (void *) m->curr.code_hdr, m->curr.code_length); + } + i++; + } + + if (!erts_find_function(am_erl_init, am_start, 2, erts_active_code_ix())) { + erts_exit(ERTS_ABORT_EXIT, + "replay validation failed: function erl_init:start/2 not found in active code index\n"); + } + + if (enabled) { + erts_fprintf(stderr, + "replay_root_debug: replay_validate success preloaded_modules=%d erl_init:start/2=ok\n", + i); + } +} + static Eterm erl_first_process_otp(char* mod_name, int argc, char** argv) { @@ -360,6 +711,7 @@ erl_first_process_otp(char* mod_name, int argc, char** argv) Process parent; ErlSpawnOpts so; Eterm boot_mod; + char *dbg = getenv("ERTS_REPLAY_ROOT_DEBUG"); /* * We need a dummy parent process to be able to call erl_create_process(). @@ -382,6 +734,12 @@ erl_first_process_otp(char* mod_name, int argc, char** argv) } boot_mod = erts_atom_put((byte *) mod_name, sys_strlen(mod_name), ERTS_ATOM_ENC_LATIN1, 1); + if (dbg && dbg[0] != '0') { + erts_fprintf(stderr, + "replay_root_debug: first_process boot_mod=%T boot_mod_raw=%p argc=%d am_erl_init=%p am_start=%p\n", + boot_mod, (void *) (UWord) boot_mod, argc, + (void *) (UWord) am_erl_init, (void *) (UWord) am_start); + } args = CONS(hp, args, NIL); hp += 2; args = CONS(hp, boot_mod, args); @@ -402,10 +760,16 @@ erl_system_process_otp(Eterm parent_pid, char* modname, int off_heap_msgq, int p Process *parent; ErlSpawnOpts so; Eterm mod, res; + char *dbg = getenv("ERTS_REPLAY_ROOT_DEBUG"); parent = erts_pid2proc(NULL, 0, parent_pid, ERTS_PROC_LOCK_MAIN); mod = erts_atom_put((byte *) modname, sys_strlen(modname), ERTS_ATOM_ENC_LATIN1, 1); + if (dbg && dbg[0] != '0') { + erts_fprintf(stderr, + "replay_root_debug: system_process modname=%s mod=%T parent=%T off_heap=%d prio=%d\n", + modname, mod, parent_pid, off_heap_msgq, prio); + } ERTS_SET_DEFAULT_SPAWN_OPTS(&so); @@ -2560,7 +2924,18 @@ erl_start(int argc, char **argv) node_tab_delete_delay, db_spin_count); - load_preloaded(); + if (erts_mmap_record_option_replay_enabled()) { + validate_replay_module_tables(); + { + char *dbg = getenv("ERTS_REPLAY_ROOT_DEBUG"); + if (!dbg || dbg[0] != '0') { + erts_fprintf(stderr, + "replay_root_debug: skipping load_preloaded() in replay mode after validation\n"); + } + } + } else { + load_preloaded(); + } erts_end_staging_code_ix(); erts_commit_staging_code_ix(); diff --git a/erts/emulator/beam/export.c b/erts/emulator/beam/export.c index ac1982981cc9..9d6dab5e9636 100644 --- a/erts/emulator/beam/export.c +++ b/erts/emulator/beam/export.c @@ -129,7 +129,52 @@ static void export_stage(Export *export, void init_export_table(void) { + int i; + export_staged_init(); + + for (i = 0; i < ERTS_NUM_CODE_IX; i++) { + erts_alloc_trace_note_alloc("export_table.index_root", + &export_tables[i], + sizeof(export_tables[i])); + } +} + +void +init_export_table_replay(IndexTable *roots, int no_roots) +{ + HashFunctions f; + erts_rwmtx_opt_t rwmtx_opt = ERTS_RWMTX_OPT_DEFAULT_INITER; + int i; + + ASSERT(roots != NULL); + ASSERT(no_roots == ERTS_NUM_CODE_IX); + (void) no_roots; + + rwmtx_opt.type = ERTS_RWMTX_TYPE_FREQUENT_READ; + rwmtx_opt.lived = ERTS_RWMTX_LONG_LIVED; + + erts_rwmtx_init_opt(&export_rwmutex, + &rwmtx_opt, + "export_staging_lock", + NIL, + (ERTS_LOCK_FLAGS_PROPERTY_STATIC | + ERTS_LOCK_FLAGS_CATEGORY_GENERIC)); + + erts_atomic_init_nob(&export_total_entries_bytes, 0); + + f.hash = (H_FUN) export_staged_hash; + f.cmp = (HCMP_FUN) export_staged_cmp; + f.alloc = (HALLOC_FUN) export_staged_alloc; + f.free = (HFREE_FUN) export_staged_free; + f.meta_alloc = (HMALLOC_FUN) erts_alloc; + f.meta_free = (HMFREE_FUN) erts_free; + f.meta_print = (HMPRINT_FUN) erts_print; + + for (i = 0; i < ERTS_NUM_CODE_IX; i++) { + export_tables[i] = roots[i]; + export_tables[i].htable.fun = f; + } } void diff --git a/erts/emulator/beam/export.h b/erts/emulator/beam/export.h index 7a218f27b785..e00609203b33 100644 --- a/erts/emulator/beam/export.h +++ b/erts/emulator/beam/export.h @@ -123,6 +123,7 @@ typedef struct export_ #endif void init_export_table(void); +void init_export_table_replay(IndexTable *roots, int no_roots); void export_info(fmtfn_t, void *); ERTS_GLB_INLINE void erts_activate_export_trampoline(Export *ep, int code_ix); From 03485ca127d6f67a09e85e6908978737610ac093 Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Thu, 23 Apr 2026 10:23:38 +0200 Subject: [PATCH 06/37] Snapshot and restore active/staging code indices across record/replay The active/staging code-index atomics are BSS-allocated and thus reset to 0 at replay process startup, so modules that were active at record time ended up being dispatched through a stale code index during replay. Depending on the indices at record time this manifested as a SIGSEGV in process_main (e.g. select_val_lin) because Export dispatch addresses for the active index were unpopulated. Add a plain-int32 shadow of the_active_code_index and the_staging_code_index, register it as "code_ix.root" in the struct-root-dump pipeline, and restore it into the live atomics after the index tables are loaded on replay. In replay mode, skip the preload-driven erts_end_staging_code_ix()/erts_commit_staging_code_ix() pair in erl_start, since the indices have already been restored from the snapshot and must not be advanced again. --- erts/emulator/beam/code_ix.c | 32 +++++++++++++++++++++++++ erts/emulator/beam/code_ix.h | 12 ++++++++++ erts/emulator/beam/erl_alloc.c | 7 ++++-- erts/emulator/beam/erl_init.c | 44 ++++++++++++++++++++++++++++++---- 4 files changed, 88 insertions(+), 7 deletions(-) diff --git a/erts/emulator/beam/code_ix.c b/erts/emulator/beam/code_ix.c index 83aa5fe93905..7df22d66659f 100644 --- a/erts/emulator/beam/code_ix.c +++ b/erts/emulator/beam/code_ix.c @@ -48,6 +48,33 @@ erts_atomic32_t outstanding_blocking_code_barriers; erts_atomic32_t the_active_code_index; erts_atomic32_t the_staging_code_index; +/* + * Plain-int32 shadow of the active/staging code indices, registered as + * "code_ix.root" so the struct-root-dump/replay pipeline can save and + * restore them. The atomic variables themselves are not directly + * snapshotted because their in-memory representation is backend-specific. + * + * Layout: [0] = active, [1] = staging. + * + * Updated whenever the atomics change (init / commit). + */ +int32_t erts_code_ix_root[2] = {0, 0}; + +static ERTS_INLINE void update_code_ix_root(void) +{ + erts_code_ix_root[0] = (int32_t) erts_atomic32_read_nob(&the_active_code_index); + erts_code_ix_root[1] = (int32_t) erts_atomic32_read_nob(&the_staging_code_index); +} + +void erts_code_ix_apply_replay_root(void) +{ + /* Restore atomic indices from the snapshotted shadow. Must be called + * after the shadow has been populated from the dump (in replay mode), + * but before any code that depends on the indices runs. */ + erts_atomic32_set_nob(&the_active_code_index, (erts_aint32_t) erts_code_ix_root[0]); + erts_atomic32_set_nob(&the_staging_code_index, (erts_aint32_t) erts_code_ix_root[1]); +} + struct code_permission { erts_mtx_t lock; @@ -84,6 +111,10 @@ void erts_code_ix_init(void) erts_atomic32_init_nob(&outstanding_blocking_code_barriers, 0); erts_atomic32_init_nob(&the_active_code_index, 0); erts_atomic32_init_nob(&the_staging_code_index, 0); + update_code_ix_root(); + erts_alloc_trace_note_alloc("code_ix.root", + erts_code_ix_root, + sizeof(erts_code_ix_root)); erts_mtx_init(&code_mod_permission.lock, "code_mod_permission", NIL, @@ -136,6 +167,7 @@ void erts_commit_staging_code_ix(void) erts_atomic32_set_nob(&the_active_code_index, ix); ix = (ix + 1) % ERTS_NUM_CODE_IX; erts_atomic32_set_nob(&the_staging_code_index, ix); + update_code_ix_root(); } fun_staged_write_unlock(); export_staged_write_unlock(); diff --git a/erts/emulator/beam/code_ix.h b/erts/emulator/beam/code_ix.h index 565c8b539675..00013edccdeb 100644 --- a/erts/emulator/beam/code_ix.h +++ b/erts/emulator/beam/code_ix.h @@ -153,6 +153,18 @@ const ErtsCodeMFA *erts_code_to_codemfa(ErtsCodePtr I); */ void erts_code_ix_init(void); +/* Apply the record/replay snapshot stored in erts_code_ix_root to the + * live atomic active/staging code index variables. Used in replay mode + * to restore the code index state captured at record time. + */ +void erts_code_ix_apply_replay_root(void); + +/* Plain-int32 shadow of the active/staging code indices. Registered as + * "code_ix.root" in the struct-root-dump pipeline. [0] = active, + * [1] = staging. Do not modify directly. + */ +extern int32_t erts_code_ix_root[2]; + /* Return active code index. * Is guaranteed to be valid until the calling BIF returns. * To get a consistent view of the code, only one call to erts_active_code_ix() diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c index 7bad48b0666a..d79585af6b36 100644 --- a/erts/emulator/beam/erl_alloc.c +++ b/erts/emulator/beam/erl_alloc.c @@ -278,7 +278,8 @@ erts_alloc_struct_should_snapshot(const char *tag) && (strcmp(tag, "atom_table.index_root") == 0 || strcmp(tag, "module_table.index_root") == 0 || strcmp(tag, "export_table.index_root") == 0 - || strcmp(tag, "fun_table.index_root") == 0); + || strcmp(tag, "fun_table.index_root") == 0 + || strcmp(tag, "code_ix.root") == 0); } static void @@ -364,7 +365,9 @@ erts_alloc_struct_dump_snapshots_on_exit(void) erts_alloc_map_load(); for (i = 0; i < erts_alloc_struct_snapshot_count; i++) { ErtsAllocStructSnapshot *snap = &erts_alloc_struct_snapshots[i]; - if (erts_alloc_struct_should_snapshot(snap->tag)) { + if (erts_alloc_struct_should_snapshot(snap->tag) + && snap->tag[0] != 'c' /* skip "code_ix.root" (not an IndexTable) */ + && snap->size >= sizeof(IndexTable)) { erts_alloc_struct_walk_index_table(i, snap, wfd); } } diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index c3379ea4fdfb..0f21392411b8 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -305,6 +305,13 @@ erl_init(int ncpu, init_atom_table_replay(&atom_root); init_module_table_replay(module_roots, ERTS_NUM_CODE_IX); init_export_table_replay(export_roots, ERTS_NUM_CODE_IX); + /* + * Restore the active/staging code indices that were in effect at + * record time. Must be done after the index tables have been + * populated from the snapshot but before any code path uses + * erts_active_code_ix() to look up code. + */ + erts_code_ix_apply_replay_root(); debug_replay_roots_sanity(); } else { erts_init_fun_table(); @@ -461,25 +468,43 @@ restore_struct_roots_for_replay(IndexTable *atom_root, file[strcspn(file, "\r\n")] = '\0'; sz = strtoul(szs, NULL, 10); - if (sz != sizeof(IndexTable)) { - continue; - } if (strcmp(tag, "atom_table.index_root") == 0) { + if (sz != sizeof(IndexTable)) { continue; } dst = atom_root; have_atom = 1; } else if (strcmp(tag, "module_table.index_root") == 0) { + if (sz != sizeof(IndexTable)) { continue; } if (module_ix < table_capacity) { dst = &module_roots[module_ix++]; } } else if (strcmp(tag, "export_table.index_root") == 0) { + if (sz != sizeof(IndexTable)) { continue; } if (export_ix < table_capacity) { dst = &export_roots[export_ix++]; } } else if (strcmp(tag, "fun_table.index_root") == 0) { + if (sz != sizeof(IndexTable)) { continue; } if (fun_ix < table_capacity) { dst = &fun_roots[fun_ix++]; } + } else if (strcmp(tag, "code_ix.root") == 0) { + /* Two int32_t: active, staging */ + if (sz != sizeof(erts_code_ix_root)) { continue; } + erts_snprintf(file_path, sizeof(file_path), "%s/%s", dir_buf, file); + { + FILE *bf2 = fopen(file_path, "rb"); + if (!bf2) { fclose(mf); return 0; } + if (fread((void *) erts_code_ix_root, 1, + sizeof(erts_code_ix_root), bf2) + != sizeof(erts_code_ix_root)) { + fclose(bf2); + fclose(mf); + return 0; + } + fclose(bf2); + } + continue; } else { continue; } @@ -2936,8 +2961,17 @@ erl_start(int argc, char **argv) } else { load_preloaded(); } - erts_end_staging_code_ix(); - erts_commit_staging_code_ix(); + if (!erts_mmap_record_option_replay_enabled()) { + /* + * Non-replay: end staging for the preloaded modules and commit + * them so they become active. In replay mode the active/staging + * code indices have already been restored from the snapshot via + * erts_code_ix_apply_replay_root(), so we must not advance or + * overwrite them here. + */ + erts_end_staging_code_ix(); + erts_commit_staging_code_ix(); + } erts_initialized = 1; From 8f3aa74a68d2948de455571c282717150d5bfabf Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Thu, 23 Apr 2026 10:45:36 +0200 Subject: [PATCH 07/37] Add table-replay initializers, debug probes, and global-literal backing Support infrastructure for the struct-root-dumps record/replay pipeline: - atom.c/.h: init_atom_table_replay() rebuilds erts_atom_table from a snapshotted IndexTable root, re-establishing the hash/alloc function pointers and recomputing atom_space. atom_table_replay_debug_dump() and atom_replay_debug_lookup() are tooling used by debug_replay_roots_sanity() to probe atom-hash integrity after replay. - module.c/.h: init_module_table_replay() performs the same for all ERTS_NUM_CODE_IX module_tables, plus module_table_replay_debug_dump(). - erl_global_literals.c: route global_literal_chunk allocations through erts_mmap_record_alloc when record mode is enabled so literal chunks land in the mapped arena and are replayable. Register each chunk with erts_alloc_trace_note_alloc("global_literal.chunk", ...). Add erts_global_literal_is_in_range() and extend erts_is_in_literal_range (erl_alloc.h) to recognize addresses inside those chunks on 64-bit builds. - erl_mmap.h/erl_mmap_record.c: expose erts_mmap_record_option_replay_enabled() so the beam side can conditionally skip record-time-only initialization during replay. --- erts/emulator/beam/atom.c | 135 +++++++++++++++++++++ erts/emulator/beam/atom.h | 4 +- erts/emulator/beam/erl_alloc.h | 7 +- erts/emulator/beam/erl_global_literals.c | 42 ++++++- erts/emulator/beam/erl_global_literals.h | 1 + erts/emulator/beam/module.c | 57 +++++++++ erts/emulator/beam/module.h | 2 + erts/emulator/sys/common/erl_mmap.h | 1 + erts/emulator/sys/common/erl_mmap_record.c | 6 + 9 files changed, 250 insertions(+), 5 deletions(-) diff --git a/erts/emulator/beam/atom.c b/erts/emulator/beam/atom.c index dcbfe1e06224..9c9cb84b8cf7 100644 --- a/erts/emulator/beam/atom.c +++ b/erts/emulator/beam/atom.c @@ -501,6 +501,141 @@ init_atom_table(void) } +void +init_atom_table_replay(IndexTable *root) +{ + int i; + HashFunctions f; + erts_rwmtx_opt_t rwmtx_opt = ERTS_RWMTX_OPT_DEFAULT_INITER; + + ASSERT(root != NULL); + + rwmtx_opt.type = ERTS_RWMTX_TYPE_FREQUENT_READ; + rwmtx_opt.lived = ERTS_RWMTX_LONG_LIVED; + +#ifdef ERTS_ATOM_PUT_OPS_STAT + erts_atomic_init_nob(&atom_put_ops, 0); +#endif + + erts_rwmtx_init_opt(&atom_table_lock, &rwmtx_opt, "atom_tab", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC); + + erts_atom_table = *root; + f.hash = (H_FUN) atom_hash; + f.cmp = (HCMP_FUN) atom_cmp; + f.alloc = (HALLOC_FUN) atom_alloc; + f.free = (HFREE_FUN) atom_free; + f.meta_alloc = (HMALLOC_FUN) erts_alloc; + f.meta_free = (HMFREE_FUN) erts_free; + f.meta_print = (HMPRINT_FUN) erts_print; + erts_atom_table.htable.fun = f; + + atom_space = 0; + for (i = 0; i < erts_atom_table.entries; i++) { + Atom *a = (Atom *) erts_index_lookup(&erts_atom_table, i); + if (a) { + atom_space += a->len; + } + } +} + +void +atom_table_replay_debug_dump(void) +{ + erts_fprintf(stderr, + "replay_root_debug: atom.hash_fun stored=%p expected=%p match=%d\n", + (void *) (UWord) erts_atom_table.htable.fun.hash, + (void *) (UWord) ((H_FUN) atom_hash), + erts_atom_table.htable.fun.hash == (H_FUN) atom_hash); + erts_fprintf(stderr, + "replay_root_debug: atom.cmp_fun stored=%p expected=%p match=%d\n", + (void *) (UWord) erts_atom_table.htable.fun.cmp, + (void *) (UWord) ((HCMP_FUN) atom_cmp), + erts_atom_table.htable.fun.cmp == (HCMP_FUN) atom_cmp); + erts_fprintf(stderr, + "replay_root_debug: atom.alloc_fun stored=%p expected=%p match=%d\n", + (void *) (UWord) erts_atom_table.htable.fun.alloc, + (void *) (UWord) ((HALLOC_FUN) atom_alloc), + erts_atom_table.htable.fun.alloc == (HALLOC_FUN) atom_alloc); +} + +void +atom_replay_debug_lookup(const char *name) +{ + Atom tmpl; + HashValue hv; + Uint slot = 0; + int i; + int found_linear = 0; + int found_chain = 0; + int chain_len = 0; + Atom *linear_atom = NULL; + HashBucket *b = NULL; + + if (!name || !erts_atom_table.htable.bucket) { + erts_fprintf(stderr, + "replay_root_debug: atom_probe name=%s skipped (invalid table)\n", + name ? name : ""); + return; + } + + tmpl.len = (Sint16) sys_strlen(name); + tmpl.u.name = (byte *) name; + hv = atom_hash(&tmpl); + slot = hash_get_slot(&erts_atom_table.htable, hv); + b = erts_atom_table.htable.bucket[slot]; + + while (b && chain_len < 100000) { + Atom *cand = (Atom *) b; + chain_len++; + if (cand->len == tmpl.len + && sys_memcmp((const void *) erts_atom_get_name(cand), + (const void *) name, + tmpl.len) == 0) { + found_chain = 1; + break; + } + b = b->next; + } + + for (i = 0; i < erts_atom_table.entries; i++) { + Atom *a = (Atom *) erts_index_lookup(&erts_atom_table, i); + if (!a) { + continue; + } + if (a->len == tmpl.len + && sys_memcmp((const void *) erts_atom_get_name(a), + (const void *) name, + tmpl.len) == 0) { + found_linear = 1; + linear_atom = a; + break; + } + } + + erts_fprintf(stderr, + "replay_root_debug: atom_probe name=%s hv=%p slot=%lu chain_len=%d found_chain=%d found_linear=%d linear_ix=%d linear_ptr=%p\n", + name, + (void *) (UWord) hv, + (unsigned long) slot, + chain_len, + found_chain, + found_linear, + found_linear ? i : -1, + (void *) linear_atom); + + if (linear_atom) { + Uint atom_slot = hash_get_slot(&erts_atom_table.htable, + linear_atom->slot.bucket.hvalue); + erts_fprintf(stderr, + "replay_root_debug: atom_probe_detail name=%s atom_hvalue=%p atom_slot=%lu atom_next=%p\n", + name, + (void *) (UWord) linear_atom->slot.bucket.hvalue, + (unsigned long) atom_slot, + (void *) linear_atom->slot.bucket.next); + } +} + void dump_atoms(fmtfn_t to, void *to_arg) { diff --git a/erts/emulator/beam/atom.h b/erts/emulator/beam/atom.h index 08a5256177c8..038f43988df6 100644 --- a/erts/emulator/beam/atom.h +++ b/erts/emulator/beam/atom.h @@ -144,10 +144,12 @@ Eterm am_atom_put(const char*, Sint); /* ONLY 7-bit ascii! */ Eterm erts_atom_put(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc); int erts_atom_put_index(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc); void init_atom_table(void); +void init_atom_table_replay(IndexTable *root); +void atom_table_replay_debug_dump(void); +void atom_replay_debug_lookup(const char *name); void atom_info(fmtfn_t, void *); void dump_atoms(fmtfn_t, void *); Uint erts_get_atom_limit(void); int erts_atom_get(const char* name, Uint len, Eterm* ap, ErtsAtomEncoding enc); void erts_atom_get_text_space_sizes(Uint *reserved, Uint *used); #endif - diff --git a/erts/emulator/beam/erl_alloc.h b/erts/emulator/beam/erl_alloc.h index 05d6a3306dbc..e09ae3a9b3f8 100644 --- a/erts/emulator/beam/erl_alloc.h +++ b/erts/emulator/beam/erl_alloc.h @@ -188,6 +188,10 @@ __decl_noreturn void erts_alc_fatal_error(int,int,ErtsAlcType_t,...) Eterm erts_alloc_set_dyn_param(struct process*, Eterm); +#if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION) +int erts_global_literal_is_in_range(void *ptr); +#endif + #undef ERTS_HAVE_IS_IN_LITERAL_RANGE #if defined(ARCH_32) || defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION) # define ERTS_HAVE_IS_IN_LITERAL_RANGE @@ -348,7 +352,8 @@ int erts_is_in_literal_range(void* ptr) #elif defined(ARCH_64) extern char* erts_literals_start; extern UWord erts_literals_size; - return ErtsInArea(ptr, erts_literals_start, erts_literals_size); + return ErtsInArea(ptr, erts_literals_start, erts_literals_size) + || erts_global_literal_is_in_range(ptr); #else # error No ARCH_xx #endif diff --git a/erts/emulator/beam/erl_global_literals.c b/erts/emulator/beam/erl_global_literals.c index 86ca23e981fc..86226dd541b4 100644 --- a/erts/emulator/beam/erl_global_literals.c +++ b/erts/emulator/beam/erl_global_literals.c @@ -65,6 +65,21 @@ struct global_literal_chunk { /* The size of the global literal term that is being built */ Uint global_literal_build_size; +int +erts_global_literal_is_in_range(void *ptr) +{ + struct global_literal_chunk *chunk = global_literal_chunk; + char *p = (char *) ptr; + + while (chunk) { + if (p >= (char *) chunk->area.start && p < (char *) chunk->chunk_end) { + return 1; + } + chunk = chunk->next; + } + return 0; +} + ErtsLiteralArea *erts_global_literal_iterate_area(ErtsLiteralArea *prev) { @@ -93,22 +108,43 @@ static void expand_shared_global_literal_area(Uint heap_size) const size_t size = sizeof(struct global_literal_chunk) + (heap_size - 1) * sizeof(Eterm); struct global_literal_chunk *chunk; + int use_record_backend = erts_mmap_record_option_record_enabled(); -#ifndef DEBUG - chunk = (struct global_literal_chunk *) erts_alloc(ERTS_ALC_T_LITERAL, size); +#ifndef DEBUG + if (use_record_backend) { + UWord mmap_size = (UWord) size; + chunk = (struct global_literal_chunk *) + erts_mmap_record_alloc(&mmap_size, 0); + } else { + chunk = (struct global_literal_chunk *) erts_alloc(ERTS_ALC_T_LITERAL, size); + } #else + if (use_record_backend) { + UWord mmap_size = (UWord) size; + chunk = (struct global_literal_chunk *) + erts_mmap_record_alloc(&mmap_size, 0); + } else { /* erts_mem_guard requires the memory area to be page aligned. Overallocate * and align the address to ensure that is the case. */ UWord address; address = (UWord) erts_alloc(ERTS_ALC_T_LITERAL, size + sys_page_size * 2); address = (address + (sys_page_size - 1)) & ~(sys_page_size - 1); chunk = (struct global_literal_chunk *) address; + } for (int i = 0; i < heap_size; i++) { chunk->area.start[i] = ERTS_HOLE_MARKER; } #endif + if (!chunk) { + erts_exit(ERTS_ABORT_EXIT, + "global_literals: failed to allocate %bpu bytes for literal chunk (record_mode=%d)\n", + (UWord) size, use_record_backend); + } + + erts_alloc_trace_note_alloc("global_literal.chunk", chunk, (UWord) size); + chunk->area.end = &(chunk->area.start[0]); chunk->chunk_end = &(chunk->area.start[heap_size]); chunk->area.off_heap = NULL; @@ -190,4 +226,4 @@ init_global_literals(void) expand_shared_global_literal_area(GLOBAL_LITERAL_INITIAL_SIZE); init_empty_tuple(); -} \ No newline at end of file +} diff --git a/erts/emulator/beam/erl_global_literals.h b/erts/emulator/beam/erl_global_literals.h index 924b02b36f00..39a257d0180c 100644 --- a/erts/emulator/beam/erl_global_literals.h +++ b/erts/emulator/beam/erl_global_literals.h @@ -50,6 +50,7 @@ Eterm *erts_global_literal_allocate(Uint sz, struct erl_off_heap_header ***ohp); /* Registers the pointed-to term as a global literal. Must be called for terms * allocated using erts_global_literal_allocate.*/ void erts_global_literal_register(Eterm *variable); +int erts_global_literal_is_in_range(void *ptr); /* Iterates between global literal areas. Can only be used when crash dumping. * Iteration is started by passing NULL, then successively calling this function diff --git a/erts/emulator/beam/module.c b/erts/emulator/beam/module.c index da494abb0d58..987513772cdf 100644 --- a/erts/emulator/beam/module.c +++ b/erts/emulator/beam/module.c @@ -132,6 +132,63 @@ void init_module_table(void) erts_atomic_init_nob(&tot_module_bytes, 0); } +void +init_module_table_replay(IndexTable *roots, int no_roots) +{ + int i; + HashFunctions f; + + ASSERT(roots != NULL); + ASSERT(no_roots == ERTS_NUM_CODE_IX); + + f.hash = (H_FUN) module_hash; + f.cmp = (HCMP_FUN) module_cmp; + f.alloc = (HALLOC_FUN) module_alloc; + f.free = (HFREE_FUN) module_free; + f.meta_alloc = (HMALLOC_FUN) erts_alloc; + f.meta_free = (HMFREE_FUN) erts_free; + f.meta_print = (HMPRINT_FUN) erts_print; + + for (i = 0; i < ERTS_NUM_CODE_IX; i++) { + module_tables[i] = roots[i]; + module_tables[i].htable.fun = f; + } + + for (i=0; i Date: Thu, 23 Apr 2026 12:08:33 +0200 Subject: [PATCH 08/37] Rebuild per-module PC range table in replay mode In replay mode load_preloaded() is skipped because the atom, module, export and fun tables (plus the active/staging code indices) are restored directly from the struct-root snapshots, and the module code pages are restored with the mmap arena. The side effect is that erts_update_ranges() is never called, so the sorted per-code-index Range array in beam_ranges.c stays empty and erts_find_function_from_pc() returns NULL for every PC. This silently breaks tracing, stack walking, exception handling and anything else that resolves a PC to an MFA, and manifests in practice as hard-to-diagnose BTI / Illegal-instruction crashes during scheduler startup and process spawning. Add erts_ranges_replay_rebuild() which reconstructs r[ix] for every ERTS_NUM_CODE_IX directly from the restored Module table, including both curr and old instances, and invoke it from erl_start() right after validate_replay_module_tables(). The logic mirrors what erts_update_ranges() + erts_end_staging_ranges() would have built during a normal load, but drives it from already-restored state so we never need to advance the code indices. --- erts/emulator/beam/beam_ranges.c | 105 +++++++++++++++++++++++++++++++ erts/emulator/beam/erl_init.c | 10 ++- erts/emulator/beam/global.h | 1 + 3 files changed, 115 insertions(+), 1 deletion(-) diff --git a/erts/emulator/beam/beam_ranges.c b/erts/emulator/beam/beam_ranges.c index 15b2c3cfeac2..434d0240272a 100644 --- a/erts/emulator/beam/beam_ranges.c +++ b/erts/emulator/beam/beam_ranges.c @@ -29,6 +29,7 @@ #include "global.h" #include "beam_code.h" #include "erl_unicode.h" +#include "module.h" typedef struct { ErtsCodePtr start; /* Pointer to start of module. */ @@ -416,3 +417,107 @@ erts_find_next_code_for_line(const BeamCodeHeader* code_hdr, return lt->func_tab[0][line_index]; } + +/* + * Rebuild the per-module PC range table from the already-restored module + * table after a record/replay restore. During replay we skip load_preloaded() + * (since module table, atom table, export table, fun table, and code pages + * have all been restored from the struct-root dumps + mmap arena), which + * means erts_update_ranges() was never called. Without ranges, + * erts_find_function_from_pc() always returns NULL and any PC-based + * introspection (tracing, stack traces, exception handling) sees "unknown" + * code, which in turn corrupts VM-level invariants and leads to spurious + * crashes (SIGILL via BTI, "Catch not found", etc.). + * + * We rebuild both code indices directly (bypassing the normal staging dance), + * since the restored active/staging indices are already correct and we do + * not want to advance them. + */ +void +erts_ranges_replay_rebuild(void) +{ + ErtsCodeIndex ix; + + for (ix = 0; ix < ERTS_NUM_CODE_IX; ix++) { + int i; + int max_modules = module_code_size(ix); + Sint count = 0; + Range *mp; + + /* Free any previous allocation (in case this is called twice). */ + if (r[ix].modules) { + erts_atomic_add_nob(&mem_used, -r[ix].allocated); + erts_free(ERTS_ALC_T_MODULE_REFS, r[ix].modules); + r[ix].modules = NULL; + r[ix].allocated = 0; + r[ix].n = 0; + } + + /* Count entries: one per curr instance with code, plus one per old. */ + for (i = 0; i < max_modules; i++) { + Module *modp = module_code(i, ix); + if (!modp) { + continue; + } + if (modp->curr.code_hdr && modp->curr.code_length > 0) { + count++; + } + if (modp->old.code_hdr && modp->old.code_length > 0) { + count++; + } + } + + if (count == 0) { + continue; + } + + /* Allocate with some slack so future inserts don't immediately + * require reallocation (matches the behaviour of + * erts_start_staging_ranges). */ + r[ix].allocated = count + 8; + erts_atomic_add_nob(&mem_used, r[ix].allocated); + r[ix].modules = (Range *) erts_alloc(ERTS_ALC_T_MODULE_REFS, + r[ix].allocated * sizeof(Range)); + mp = r[ix].modules; + + for (i = 0; i < max_modules; i++) { + Module *modp = module_code(i, ix); + if (!modp) { + continue; + } + if (modp->curr.code_hdr && modp->curr.code_length > 0) { + mp->start = (ErtsCodePtr) modp->curr.code_hdr; + erts_atomic_init_nob(&mp->end, + (erts_aint_t) + (((byte *) modp->curr.code_hdr) + + modp->curr.code_length)); + mp++; + } + if (modp->old.code_hdr && modp->old.code_length > 0) { + mp->start = (ErtsCodePtr) modp->old.code_hdr; + erts_atomic_init_nob(&mp->end, + (erts_aint_t) + (((byte *) modp->old.code_hdr) + + modp->old.code_length)); + mp++; + } + } + + r[ix].n = mp - r[ix].modules; + qsort(r[ix].modules, r[ix].n, sizeof(Range), + (int (*)(const void *, const void *)) rangecompare); + erts_atomic_set_nob(&r[ix].mid, + (erts_aint_t) (r[ix].modules + r[ix].n / 2)); + + if (r[ix].allocated > (Sint) erts_dump_num_lit_areas) { + erts_dump_num_lit_areas = r[ix].allocated * 2; + erts_dump_lit_areas = (ErtsLiteralArea **) + erts_realloc(ERTS_ALC_T_CRASH_DUMP, + (void *) erts_dump_lit_areas, + erts_dump_num_lit_areas + * sizeof(ErtsLiteralArea *)); + } + + CHECK(&r[ix]); + } +} diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index 0f21392411b8..fe2fe22e5fdd 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -2951,11 +2951,19 @@ erl_start(int argc, char **argv) if (erts_mmap_record_option_replay_enabled()) { validate_replay_module_tables(); + /* + * Rebuild the per-module PC range table from the restored module + * table. load_preloaded() (which normally calls erts_update_ranges() + * for every loaded module) is skipped in replay mode, so without + * this step erts_find_function_from_pc() would return NULL for any + * PC, breaking tracing, stack walking, and exception handling. + */ + erts_ranges_replay_rebuild(); { char *dbg = getenv("ERTS_REPLAY_ROOT_DEBUG"); if (!dbg || dbg[0] != '0') { erts_fprintf(stderr, - "replay_root_debug: skipping load_preloaded() in replay mode after validation\n"); + "replay_root_debug: skipping load_preloaded() in replay mode after validation, ranges rebuilt\n"); } } } else { diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index 15ffdb2f6fe8..721be7118448 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -1004,6 +1004,7 @@ void erts_start_staging_ranges(int num_new); void erts_end_staging_ranges(int commit); void erts_update_ranges(const BeamCodeHeader* code, Uint size); void erts_remove_from_ranges(const BeamCodeHeader* code); +void erts_ranges_replay_rebuild(void); UWord erts_ranges_sz(void); void erts_lookup_function_info(FunctionInfo* fi, ErtsCodePtr pc, From cf986b0888f0ee10232ad04ea2dafc3d5e8c9815 Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Thu, 23 Apr 2026 12:49:58 +0200 Subject: [PATCH 09/37] Snapshot and restore the literal super-carrier across record/replay On 64-bit the literal allocator (ERTS_ALC_A_LITERAL) has its own mmapper (erts_literal_mmapper) that reserves a 1 GB virtual super-carrier and services mseg allocations directly via erts_alcu_mmapper_mseg_alloc(). That path bypasses erts_mmap_record_alloc(), so literal contents were never written into the shared record arena file. On replay, the carrier was re-reserved at the same virtual address (ASLR disabled) but the pages were empty zeros, so any baked-in literal pointer in restored code dereferenced zeroed memory and faulted with badarg (e.g. erlang:display_string/1 returned badarg and then the catch handler crashed with "Catch not found"). Implementation: - Track live (ptr, size) regions inside the literal super-carrier in erts_alcu_mmapper_mseg_alloc / _realloc / _dealloc, keyed on alloc_no == ERTS_ALC_A_LITERAL and only when -record is enabled. - On process exit, dump those regions and their raw bytes to a sidecar file next to the main record arena (.literals). Registered via atexit next to the existing struct-root-dumps hook. - During -replay, after erts_mseg_init() has initialised erts_literal_mmapper (so the 1 GB range is virtually reserved) but BEFORE set_au_allocator(ERTS_ALC_A_LITERAL, ...) creates the allocator's main carrier, read the sidecar and for each region: 1. Call mm->reserve_physical(ptr, size) to flip the pages to PROT_READ|PROT_WRITE. 2. Advance mm->sa.top past the region (new erts_mmap_mark_allocated helper) so subsequent erts_mmap() calls won't hand the same pages back out and overwrite the restored bytes. 3. memcpy the recorded bytes into place. With this change replay reaches Erlang code, hello:hello/1 dereferences its literal "hello, world\n" cons cell correctly, and erlang:display_string/1 executes as expected. Subsequent crashes come from entirely different paths and are tracked separately. New helpers added to erl_mmap.{h,c} so that the record/replay TU can reserve physical backing and mark super-carrier ranges as in-use without touching the opaque ErtsMemMapper_ struct directly. --- erts/emulator/beam/erl_alloc.c | 29 ++ erts/emulator/beam/erl_alloc_util.c | 19 ++ erts/emulator/sys/common/erl_mmap.c | 46 +++ erts/emulator/sys/common/erl_mmap.h | 14 + erts/emulator/sys/common/erl_mmap_record.c | 330 +++++++++++++++++++++ 5 files changed, 438 insertions(+) diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c index d79585af6b36..ff7c142cf121 100644 --- a/erts/emulator/beam/erl_alloc.c +++ b/erts/emulator/beam/erl_alloc.c @@ -1082,6 +1082,12 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop) if (atexit(erts_alloc_struct_dump_snapshots_on_exit) == 0) { erts_alloc_struct_snapshot_registered = 1; } + /* + * Also dump the literal super-carrier sidecar on exit when + * recording. Safe to register unconditionally: the dump + * function is a no-op unless record is enabled. + */ + (void) atexit(erts_mmap_record_literal_dump_on_exit); } } @@ -1169,7 +1175,30 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop) init.mseg.nos = erts_no_schedulers; init.mseg.ndai = init.dirty_alloc_insts; erts_mseg_init(&init.mseg); + + /* + * In replay mode the paired -record run dumped every live region of the + * literal super-carrier to a sidecar file. Restore those bytes NOW -- + * after erts_mseg_init() (which has reserved the 1 GB virtual range via + * erts_mmap_init(&erts_literal_mmapper, ...)) but BEFORE the literal + * allocator creates its main carrier (which happens in + * set_au_allocator(ERTS_ALC_A_LITERAL, ...) below). If we restored after + * that point, our memcpy would clobber the allocator's fresh carrier + * header and free-tree nodes, corrupting state. + */ +#if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION) + if (erts_mmap_record_option_replay_enabled()) { + if (!erts_mmap_record_literal_restore(&erts_literal_mmapper)) { + erts_fprintf(stderr, + "replay_root_debug: failed to restore literal super-carrier " + "snapshot; replay will likely fail\n"); + } else { + erts_fprintf(stderr, + "replay_root_debug: restored literal super-carrier snapshot\n"); + } + } #endif +#endif /* HAVE_ERTS_MSEG */ erts_alcu_init(&init.alloc_util); erts_afalc_init(); diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c index 80e74d8c7bc1..b1262760758e 100644 --- a/erts/emulator/beam/erl_alloc_util.c +++ b/erts/emulator/beam/erl_alloc_util.c @@ -1064,6 +1064,16 @@ erts_alcu_mmapper_mseg_alloc(Allctr_t *allctr, Uint *size_p, Uint flags) res = erts_mmap(allctr->mseg_mmapper, mmap_flags, &size); *size_p = (Uint)size; INC_CC(allctr->calls.mseg_alloc); + /* + * Record the literal super-carrier allocation so it can be dumped and + * restored across a record/replay cycle. This is a no-op unless the + * record option is enabled AND this mmapper is the literal one; we + * key on alloc_no because only ERTS_ALC_A_LITERAL uses this path. + */ + if (res && allctr->alloc_no == ERTS_ALC_A_LITERAL + && erts_mmap_record_option_record_enabled()) { + erts_mmap_record_literal_alloc(res, (UWord) *size_p); + } return res; } @@ -1076,6 +1086,11 @@ erts_alcu_mmapper_mseg_realloc(Allctr_t *allctr, void *seg, res = erts_mremap(allctr->mseg_mmapper, ERTS_MSEG_FLG_NONE, seg, old_size, &new_size); *new_size_p = (Uint) new_size; INC_CC(allctr->calls.mseg_realloc); + if (allctr->alloc_no == ERTS_ALC_A_LITERAL + && erts_mmap_record_option_record_enabled()) { + erts_mmap_record_literal_realloc(seg, (UWord) old_size, + res, (UWord) *new_size_p); + } return res; } @@ -1087,6 +1102,10 @@ erts_alcu_mmapper_mseg_dealloc(Allctr_t *allctr, void *seg, Uint size, if (flags & ERTS_MSEG_FLG_2POW) mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED; + if (allctr->alloc_no == ERTS_ALC_A_LITERAL + && erts_mmap_record_option_record_enabled()) { + erts_mmap_record_literal_free(seg, (UWord) size); + } erts_munmap(allctr->mseg_mmapper, mmap_flags, seg, (UWord)size); INC_CC(allctr->calls.mseg_dealloc); } diff --git a/erts/emulator/sys/common/erl_mmap.c b/erts/emulator/sys/common/erl_mmap.c index 123bb02cd753..a049c433d38d 100644 --- a/erts/emulator/sys/common/erl_mmap.c +++ b/erts/emulator/sys/common/erl_mmap.c @@ -2293,6 +2293,52 @@ static void init_atoms(void) static void hard_dbg_mseg_init(void); #endif +/* + * Public helper so code outside this translation unit (in particular the + * record/replay sidecar logic in erl_mmap_record.c) can reserve physical + * backing on pages inside a super-carrier without needing access to the + * full ErtsMemMapper_ struct. + */ +int +erts_mmap_reserve_physical(ErtsMemMapper *mm, void *ptr, UWord size) +{ + if (!mm || !mm->reserve_physical) { + return 0; + } + return mm->reserve_physical((char *) ptr, size); +} + +/* + * Replay-only: mark the range [ptr, ptr+size) as "already allocated" inside + * the super-carrier so that future erts_mmap() calls won't hand it out. + * + * Strategy: if the range is contiguous with sa.top, just advance sa.top; + * otherwise push sa.top to the end of the range (losing the gap between + * old sa.top and ptr). This is sufficient for the literal super-carrier + * case where all restored regions are at the bottom of the carrier and + * superaligned. + */ +int +erts_mmap_mark_allocated(ErtsMemMapper *mm, void *ptr, UWord size) +{ + char *start = (char *) ptr; + char *end = start + size; + if (!mm) { + return 0; + } + if (start < mm->sa.bot || end > mm->sua.bot) { + return 0; + } + /* Round up to superaligned boundary. */ + end = (char *) ERTS_SUPERALIGNED_CEILING((UWord) end); + if (end > mm->sa.top) { + UWord inc = (UWord) (end - mm->sa.top); + mm->sa.top = end; + mm->size.supercarrier.used.total += inc; + } + return 1; +} + void erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init) { diff --git a/erts/emulator/sys/common/erl_mmap.h b/erts/emulator/sys/common/erl_mmap.h index 76771186a63f..ab5ae95dc3a3 100644 --- a/erts/emulator/sys/common/erl_mmap.h +++ b/erts/emulator/sys/common/erl_mmap.h @@ -147,6 +147,8 @@ int erts_mmap_name_mapping_global(void *ptr, UWord size, const char *name); int erts_mmap_prefix_mapping_name_global(void *ptr, UWord size, const char *prefix); int erts_mmap_in_supercarrier(ErtsMemMapper*, void *ptr); void erts_mmap_init(ErtsMemMapper*, ErtsMMapInit*); +int erts_mmap_reserve_physical(ErtsMemMapper *mm, void *ptr, UWord size); +int erts_mmap_mark_allocated(ErtsMemMapper *mm, void *ptr, UWord size); struct erts_mmap_info_struct { UWord sizes[6]; @@ -168,6 +170,18 @@ void *erts_mmap_record_alloc(UWord *sizep, Uint32 mmap_flags); void erts_mmap_record_free(void *ptr, UWord size); void *erts_mmap_record_realloc(void *ptr, UWord old_size, UWord *sizep, Uint32 mmap_flags); +/* + * Literal super-carrier snapshot/restore hooks. On record, the literal + * allocator tracks (ptr,size) regions here and dumps them on exit; on + * replay we re-materialise those bytes at the same addresses. + */ +void erts_mmap_record_literal_alloc(void *ptr, UWord size); +void erts_mmap_record_literal_free(void *ptr, UWord size); +void erts_mmap_record_literal_realloc(void *old_ptr, UWord old_size, + void *new_ptr, UWord new_size); +void erts_mmap_record_literal_dump_on_exit(void); +int erts_mmap_record_literal_restore(ErtsMemMapper *mm); + #ifdef ERTS_WANT_MEM_MAPPERS # include "erl_alloc_types.h" diff --git a/erts/emulator/sys/common/erl_mmap_record.c b/erts/emulator/sys/common/erl_mmap_record.c index 2fbd954a9017..3444284e5b10 100644 --- a/erts/emulator/sys/common/erl_mmap_record.c +++ b/erts/emulator/sys/common/erl_mmap_record.c @@ -58,6 +58,67 @@ static ErtsMMapRecordChunk *record_chunks = NULL; static erts_mtx_t record_mtx; static int record_mtx_inited = 0; +/* + * Literal super-carrier snapshot tracking. + * + * On 64-bit, the literal allocator has its own mmapper (erts_literal_mmapper) + * reserved as a 1 GB virtual range. Allocations inside it do NOT go through + * mseg_create() and therefore do NOT reach erts_mmap_record_alloc() above. + * + * To replay correctly we track every live (ptr, size) region handed out by + * erts_alcu_mmapper_mseg_alloc / _realloc, and at process exit we dump those + * regions (their raw bytes) to a sidecar file next to the main record arena + * (.literals). On replay, after the literal mmapper has been + * set up (so the same virtual range is reserved), we read the sidecar and + * memcpy bytes back at their original addresses. + */ +typedef struct ErtsLiteralSnapshotRegion_ ErtsLiteralSnapshotRegion; +struct ErtsLiteralSnapshotRegion_ { + char *ptr; + UWord size; + ErtsLiteralSnapshotRegion *next; +}; + +static ErtsLiteralSnapshotRegion *literal_regions = NULL; +static erts_mtx_t literal_mtx; +static int literal_mtx_inited = 0; + +#define ERTS_LITERAL_SNAPSHOT_MAGIC 0x4C49544C55 /* "LITL\0" */ +#define ERTS_LITERAL_SNAPSHOT_VERSION 1 + +static void +literal_mtx_ensure_inited(void) +{ + if (!literal_mtx_inited) { + erts_mtx_init(&literal_mtx, "mmap_record_literal", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC + | ERTS_LOCK_FLAGS_CATEGORY_ALLOCATOR); + literal_mtx_inited = 1; + } +} + +static const char * +literal_sidecar_path_for_record(void) +{ + static char buf[1024]; + const char *base; + int len; + + if (replay_enabled) { + base = replay_path; + } else { + base = record_path; + } + if (!base) { + return NULL; + } + len = snprintf(buf, sizeof(buf), "%s.literals", base); + if (len <= 0 || len >= (int) sizeof(buf)) { + return NULL; + } + return buf; +} + static UWord record_align(UWord size, Uint32 mmap_flags) { @@ -383,4 +444,273 @@ erts_mmap_record_realloc(void *ptr, UWord old_size, UWord *sizep, Uint32 mmap_fl return new_ptr; } +/* + * --------------------------------------------------------------------------- + * Literal super-carrier snapshot tracking. + * --------------------------------------------------------------------------- + */ + +void +erts_mmap_record_literal_alloc(void *ptr, UWord size) +{ + ErtsLiteralSnapshotRegion *r; + + if (!record_enabled || !ptr || !size) { + return; + } + r = (ErtsLiteralSnapshotRegion *) malloc(sizeof(*r)); + if (!r) { + return; + } + r->ptr = (char *) ptr; + r->size = size; + + literal_mtx_ensure_inited(); + erts_mtx_lock(&literal_mtx); + r->next = literal_regions; + literal_regions = r; + erts_mtx_unlock(&literal_mtx); +} + +void +erts_mmap_record_literal_free(void *ptr, UWord size) +{ + ErtsLiteralSnapshotRegion **pp; + (void) size; + + if (!record_enabled || !ptr) { + return; + } + + literal_mtx_ensure_inited(); + erts_mtx_lock(&literal_mtx); + for (pp = &literal_regions; *pp; pp = &(*pp)->next) { + if ((*pp)->ptr == (char *) ptr) { + ErtsLiteralSnapshotRegion *r = *pp; + *pp = r->next; + free(r); + break; + } + } + erts_mtx_unlock(&literal_mtx); +} + +void +erts_mmap_record_literal_realloc(void *old_ptr, UWord old_size, + void *new_ptr, UWord new_size) +{ + if (!record_enabled) { + return; + } + if (old_ptr) { + erts_mmap_record_literal_free(old_ptr, old_size); + } + if (new_ptr && new_size) { + erts_mmap_record_literal_alloc(new_ptr, new_size); + } +} + +/* + * Sidecar file format (little-endian, host-size UWord): + * + * UWord magic (ERTS_LITERAL_SNAPSHOT_MAGIC) + * UWord version (ERTS_LITERAL_SNAPSHOT_VERSION) + * UWord count (number of regions) + * for each region: + * UWord ptr (virtual address) + * UWord size (bytes) + * byte data[size] + */ + +static int +write_all(int fd, const void *buf, size_t len) +{ + const char *p = (const char *) buf; + while (len > 0) { + ssize_t n = write(fd, p, len); + if (n < 0) { + if (errno == EINTR) continue; + return -1; + } + if (n == 0) return -1; + p += n; + len -= (size_t) n; + } + return 0; +} + +static int +read_all(int fd, void *buf, size_t len) +{ + char *p = (char *) buf; + while (len > 0) { + ssize_t n = read(fd, p, len); + if (n < 0) { + if (errno == EINTR) continue; + return -1; + } + if (n == 0) return -1; + p += n; + len -= (size_t) n; + } + return 0; +} + +void +erts_mmap_record_literal_dump_on_exit(void) +{ + const char *path; + int fd; + UWord header[3]; + ErtsLiteralSnapshotRegion *r; + UWord count = 0; + + if (!record_enabled) { + return; + } + path = literal_sidecar_path_for_record(); + if (!path) { + return; + } + + literal_mtx_ensure_inited(); + erts_mtx_lock(&literal_mtx); + + for (r = literal_regions; r; r = r->next) { + count++; + } + + fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0666); + if (fd < 0) { + erts_mtx_unlock(&literal_mtx); + return; + } + + header[0] = (UWord) ERTS_LITERAL_SNAPSHOT_MAGIC; + header[1] = (UWord) ERTS_LITERAL_SNAPSHOT_VERSION; + header[2] = count; + if (write_all(fd, header, sizeof(header)) != 0) { + goto done; + } + + for (r = literal_regions; r; r = r->next) { + UWord rec[2]; + rec[0] = (UWord) r->ptr; + rec[1] = r->size; + if (write_all(fd, rec, sizeof(rec)) != 0) { + goto done; + } + if (r->size > 0) { + if (write_all(fd, r->ptr, (size_t) r->size) != 0) { + goto done; + } + } + } + +done: + close(fd); + erts_mtx_unlock(&literal_mtx); +} + +/* + * Restore the literal super-carrier contents from the sidecar file. + * + * Must be called AFTER erts_mmap_init(&erts_literal_mmapper, ...) so that + * the 1 GB virtual range is reserved at the same address that it was during + * record (ASLR is required to be off). For each recorded region we: + * 1. Ensure physical memory is reserved on the target pages via the + * mmapper's reserve_physical callback. + * 2. memcpy the recorded bytes. + * + * NOTE: After this call the literal mmapper's free-list does NOT know that + * these regions are in use. That's OK for replay because replay skips + * load_preloaded() and therefore never asks the literal allocator for + * fresh memory; existing code already baked-in pointers into these + * addresses. + */ +int +erts_mmap_record_literal_restore(ErtsMemMapper *mm) +{ + const char *path; + int fd; + UWord header[3]; + UWord count, i; + int ok = 0; + (void) mm; + + if (!replay_enabled) { + return 1; + } + path = literal_sidecar_path_for_record(); + if (!path) { + return 0; + } + + fd = open(path, O_RDONLY, 0); + if (fd < 0) { + /* Missing sidecar: not fatal, but callers likely can't boot. */ + return 0; + } + + if (read_all(fd, header, sizeof(header)) != 0) { + goto out; + } + if (header[0] != (UWord) ERTS_LITERAL_SNAPSHOT_MAGIC + || header[1] != (UWord) ERTS_LITERAL_SNAPSHOT_VERSION) { + goto out; + } + count = header[2]; + + for (i = 0; i < count; i++) { + UWord rec[2]; + char *ptr; + UWord size; + + if (read_all(fd, rec, sizeof(rec)) != 0) { + goto out; + } + ptr = (char *) rec[0]; + size = rec[1]; + + /* + * Reserve physical memory on the target region so that the + * upcoming writes land on real pages. The super-carrier was + * reserved with os_mmap_virtual() and is PROT_NONE until this + * call flips the pages to PROT_READ|PROT_WRITE. + * + * We use erts_mmap_reserve_physical(), a small wrapper in + * erl_mmap.c, because ErtsMemMapper is only forward-declared + * outside that file. + */ + if (mm) { + if (!erts_mmap_reserve_physical(mm, ptr, size)) { + goto out; + } + /* + * Tell the mmapper these pages are now in-use so subsequent + * erts_mmap() calls (e.g. when the literal allocator grows + * its carriers) don't hand them out and overwrite the bytes + * we are about to memcpy in. + */ + if (!erts_mmap_mark_allocated(mm, ptr, size)) { + fprintf(stderr, + "replay_root_debug: WARNING mark_allocated failed " + "for [%p..+0x%lx); later literal allocations may " + "clobber restored bytes\n", + (void *) ptr, (unsigned long) size); + } + } + if (size > 0) { + if (read_all(fd, ptr, (size_t) size) != 0) { + goto out; + } + } + } + ok = 1; + +out: + close(fd); + return ok; +} + #endif /* HAVE_ERTS_MMAP */ From f7c8dfa707fd24c85d7b0cbb98c686f2a9cb1c6f Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Thu, 23 Apr 2026 13:00:26 +0200 Subject: [PATCH 10/37] Make the recorded snapshot read-only during replay The record arena was being opened O_RDWR and mmapped MAP_SHARED even in -replay mode, so any write the VM performed against restored memory propagated back to the on-disk file. A crash partway through replay therefore left the arena in a partially-modified state, and the next replay started from that corrupted snapshot producing a different failure. Changes: - erts_mmap_record_init(): in replay, open the arena O_RDONLY and map it MAP_PRIVATE (copy-on-write). The VM can still mutate restored pages as before, but nothing reaches the backing file. - erts_alloc_struct_dump_snapshots_on_exit(): return early when running under -replay so the struct-root-dumps directory (replay INPUT) is never rewritten. The literal sidecar (.literals) was already safe because its contents are read() into freshly-allocated pages, not mmapped. Running the same replay twice in a row now produces identical output and the arena / struct-root-dumps files are left untouched on disk. --- erts/emulator/beam/erl_alloc.c | 10 ++++++++++ erts/emulator/sys/common/erl_mmap_record.c | 12 ++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c index ff7c142cf121..c63c4f36c921 100644 --- a/erts/emulator/beam/erl_alloc.c +++ b/erts/emulator/beam/erl_alloc.c @@ -310,6 +310,16 @@ erts_alloc_struct_dump_snapshots_on_exit(void) return; } + /* + * Never rewrite the struct-root-dumps while replaying: the directory + * is replay INPUT. Overwriting it during replay (and then crashing + * mid-execution) would corrupt the snapshot so subsequent replays + * see a different, partially-modified state. + */ + if (erts_mmap_record_option_replay_enabled()) { + return; + } + if (mkdir(erts_alloc_struct_snapshot_dir, 0777) < 0 && errno != EEXIST) { return; } diff --git a/erts/emulator/sys/common/erl_mmap_record.c b/erts/emulator/sys/common/erl_mmap_record.c index 3444284e5b10..a02054567295 100644 --- a/erts/emulator/sys/common/erl_mmap_record.c +++ b/erts/emulator/sys/common/erl_mmap_record.c @@ -259,8 +259,16 @@ erts_mmap_record_init(void) } if (replay_enabled) { + /* + * Open the arena read-only during replay so the OS will not let us + * mutate the on-disk snapshot, and map it MAP_PRIVATE (copy-on-write) + * so the VM can still write into restored memory without propagating + * those writes back to the file. Without this, a crash mid-replay + * leaves a partially-modified arena on disk and subsequent replays + * observe a different (corrupted) snapshot. + */ path = replay_path; - record_fd = open(path, O_RDWR, 0); + record_fd = open(path, O_RDONLY, 0); } else { path = record_path; if (!path) { @@ -294,7 +302,7 @@ erts_mmap_record_init(void) record_base = (char *) mmap(NULL, ERTS_RECORD_ARENA_SIZE, PROT_READ | PROT_WRITE, - MAP_SHARED, + replay_enabled ? MAP_PRIVATE : MAP_SHARED, record_fd, 0); if (record_base == MAP_FAILED) { From 27b40d70cf7743780bced97e542ef2ee441458c1 Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Thu, 23 Apr 2026 14:49:59 +0200 Subject: [PATCH 11/37] Snapshot and restore beam-catches bccix[] across record/replay Fixes "Catch not found" crashes in replay-mode as soon as any try/catch handler fires (e.g. hello:test_catches/0). The catch-table pool lives in two places: 1. bccix[ERTS_NUM_CODE_IX], a small file-static header array in beam_catches.c's BSS holding {free_list, high_mark, tabsize, beam_catches*} per code-ix. 2. bccix[i].beam_catches, a dynamically-sized array of {handler_cp, cdr} pairs allocated via ERTS_ALC_T_CATCHES (CATCHES -> LONG_LIVED -> CODE -> default mseg super-carrier), whose bytes end up in the record arena. At module load time patchCatches() / emu_load.c call beam_catches_cons() to allocate an index and bake make_catch(index) immediates into the generated code. handle_error() later resolves those via beam_catches_car(index) -> bccix[active].beam_catches[index].cp. Replay preserves (2) automatically via the arena's MAP_PRIVATE mapping, but (1) is BSS and was being reinitialised to a fresh empty table by beam_catches_init() during init_emulator(). load_preloaded() is skipped in replay so no beam_catches_cons() calls ever repopulate the fresh table, which left every baked-in catch index pointing at slot 0 of an empty table, and the first exception unwound past the real handler producing "Catch not found". Register bccix[] under tag "beam_catches.bccix" via erts_alloc_trace_note_alloc() so the struct-root-dump pipeline snapshots it, add beam_catches_apply_replay_root() to memcpy the dumped bytes back over bccix[], and call it from erl_start() in replay mode after erts_ranges_replay_rebuild(). The restored pointers refer to long-lived-allocator carriers which the default mseg mapper brings back at the same virtual addresses, so the entries they index resolve correctly. --- erts/emulator/beam/beam_catches.c | 50 ++++++++++++++++++ erts/emulator/beam/beam_catches.h | 13 +++++ erts/emulator/beam/erl_alloc.c | 3 +- erts/emulator/beam/erl_init.c | 85 +++++++++++++++++++++++++++++++ 4 files changed, 150 insertions(+), 1 deletion(-) diff --git a/erts/emulator/beam/beam_catches.c b/erts/emulator/beam/beam_catches.c index e475f91cc6b7..a1a8d4d09bb7 100644 --- a/erts/emulator/beam/beam_catches.c +++ b/erts/emulator/beam/beam_catches.c @@ -56,6 +56,28 @@ struct bc_pool { static struct bc_pool bccix[ERTS_NUM_CODE_IX]; +/* + * Expose bccix[] to the struct-root-dump / replay pipeline. The individual + * per-pool tables (bccix[i].beam_catches) are allocated via + * ERTS_ALC_T_CATCHES → long-lived allocator, whose carriers live in the + * default mseg super-carrier, which is already file-backed by the record + * arena. So the table *contents* are persisted for free; what needs + * explicit snapshotting is this small static header array which holds the + * pointers, tabsize, high_mark, and free_list. Without it, every replay + * would see a freshly-initialised (empty) bccix[] and the catch indices + * baked into restored code would resolve to garbage / NULL and produce + * "Catch not found" at the first throw. + */ +void *erts_beam_catches_bccix_ptr(void) +{ + return (void *) bccix; +} + +UWord erts_beam_catches_bccix_size(void) +{ + return sizeof(bccix); +} + void beam_catches_init(void) { int i; @@ -74,6 +96,34 @@ void beam_catches_init(void) } /* For initial load: */ IF_DEBUG(bccix[erts_staging_code_ix()].is_staging = 1); + + /* + * Register the whole bccix[] as a snapshot root on record, AFTER the + * fresh table has been allocated so the snapshot captures the + * record-time pointer. On replay, erl_init.c overwrites this array + * from the dump before any code runs. + */ + erts_alloc_trace_note_alloc("beam_catches.bccix", + bccix, sizeof(bccix)); +} + +/* + * Replay-only: replace bccix[] wholesale with the snapshot bytes loaded + * from struct-root-dumps/NN.beam_catches.bccix.bin. The pointers inside + * refer to addresses in the long-lived allocator's carrier, which the + * default mseg super-carrier restores to the same virtual address. + * + * Leaks the fresh table that beam_catches_init() just allocated; that's + * a tiny permanent waste (one 8 KB block) but keeps the replay path + * simple and avoids running erts_free on an address the allocator no + * longer knows about once we've overwritten its bookkeeping. + */ +void beam_catches_apply_replay_root(const void *src, UWord src_size) +{ + if (src_size != sizeof(bccix)) { + return; + } + sys_memcpy(bccix, src, sizeof(bccix)); } diff --git a/erts/emulator/beam/beam_catches.h b/erts/emulator/beam/beam_catches.h index cab601f56b34..59d47911a5e4 100644 --- a/erts/emulator/beam/beam_catches.h +++ b/erts/emulator/beam/beam_catches.h @@ -33,6 +33,19 @@ #define BEAM_CATCHES_NIL (-1) void beam_catches_init(void); + +/* + * Record/replay support. See beam_catches.c for the rationale. + * + * erts_beam_catches_bccix_{ptr,size} - accessors used by the + * struct-root dump code to snapshot the static bccix[] array. + * beam_catches_apply_replay_root - restore bccix[] from the + * previously-dumped bytes during -replay init. + */ +void *erts_beam_catches_bccix_ptr(void); +UWord erts_beam_catches_bccix_size(void); +void beam_catches_apply_replay_root(const void *src, UWord src_size); + void beam_catches_start_staging(void); void beam_catches_end_staging(int commit); unsigned beam_catches_cons(ErtsCodePtr cp, unsigned cdr, ErtsCodePtr **); diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c index c63c4f36c921..eaa08b0d9e2d 100644 --- a/erts/emulator/beam/erl_alloc.c +++ b/erts/emulator/beam/erl_alloc.c @@ -279,7 +279,8 @@ erts_alloc_struct_should_snapshot(const char *tag) || strcmp(tag, "module_table.index_root") == 0 || strcmp(tag, "export_table.index_root") == 0 || strcmp(tag, "fun_table.index_root") == 0 - || strcmp(tag, "code_ix.root") == 0); + || strcmp(tag, "code_ix.root") == 0 + || strcmp(tag, "beam_catches.bccix") == 0); } static void diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index fe2fe22e5fdd..e6d548cd3f3e 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -54,6 +54,7 @@ #include "erl_osenv.h" #include "erl_proc_sig_queue.h" #include "beam_load.h" +#include "beam_catches.h" #include "erl_global_literals.h" #include "erl_iolist.h" #include "erl_debugger.h" @@ -2959,6 +2960,90 @@ erl_start(int argc, char **argv) * PC, breaking tracing, stack walking, and exception handling. */ erts_ranges_replay_rebuild(); + /* + * Restore the beam-catches header array bccix[] from the dump. + * beam_catches_init() ran as part of init_emulator() above and + * installed a fresh empty table; the restored code still has + * make_catch(index) immediates baked in that refer to the + * RECORD-time indices, so we must swap the fresh header back to + * the recorded one. The per-pool entry tables it points to live + * in the long-lived allocator's carriers (mseg → record arena), + * so they map back at the same virtual addresses via MAP_PRIVATE + * and do not need separate restoration. + */ + { + const char *base_dir = getenv("ERTS_ALLOC_STRUCT_DUMP_DIR"); + char dir_buf[512]; + char manifest_path[1024]; + FILE *mf; + char line[1024]; + int loaded = 0; + + if (!base_dir || base_dir[0] == '\0') { + base_dir = "_mmap-records/struct-root-dumps"; + } + erts_snprintf(dir_buf, sizeof(dir_buf), "%s", base_dir); + erts_snprintf(manifest_path, sizeof(manifest_path), + "%s/roots.csv", dir_buf); + + mf = fopen(manifest_path, "r"); + if (mf) { + while (fgets(line, sizeof(line), mf) != NULL) { + char *p1, *p2, *p3, *p4; + char *tag, *szs, *file; + unsigned long sz; + char file_path[1024]; + FILE *bf; + UWord bccix_size = erts_beam_catches_bccix_size(); + void *buf; + + if (line[0] == '\0' || line[0] == '\n' || line[0] == '#' + || !isdigit((unsigned char) line[0])) { + continue; + } + p1 = strchr(line, ','); if (!p1) continue; + p2 = strchr(p1 + 1, ','); if (!p2) continue; + p3 = strchr(p2 + 1, ','); if (!p3) continue; + p4 = strchr(p3 + 1, ','); if (!p4) continue; + tag = p1 + 1; *p2 = '\0'; + szs = p3 + 1; *p4 = '\0'; + file = p4 + 1; + file[strcspn(file, "\r\n")] = '\0'; + + if (strcmp(tag, "beam_catches.bccix") != 0) continue; + + sz = strtoul(szs, NULL, 10); + if ((UWord) sz != bccix_size) { + erts_fprintf(stderr, + "replay_root_debug: bccix size mismatch " + "dump=%lu expected=%bpu\n", + sz, bccix_size); + continue; + } + erts_snprintf(file_path, sizeof(file_path), + "%s/%s", dir_buf, file); + bf = fopen(file_path, "rb"); + if (!bf) continue; + buf = erts_alloc(ERTS_ALC_T_TMP, bccix_size); + if (fread(buf, 1, bccix_size, bf) == bccix_size) { + beam_catches_apply_replay_root(buf, bccix_size); + loaded = 1; + } + erts_free(ERTS_ALC_T_TMP, buf); + fclose(bf); + break; + } + fclose(mf); + } + { + char *dbg = getenv("ERTS_REPLAY_ROOT_DEBUG"); + if (!dbg || dbg[0] != '0') { + erts_fprintf(stderr, + "replay_root_debug: bccix restore %s\n", + loaded ? "OK" : "FAILED"); + } + } + } { char *dbg = getenv("ERTS_REPLAY_ROOT_DEBUG"); if (!dbg || dbg[0] != '0') { From 5e8f088e2f113c4c996dfa86da6408a421d547c8 Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Fri, 24 Apr 2026 11:21:50 +0200 Subject: [PATCH 12/37] Route binary allocator carriers through mseg during record --- erts/emulator/beam/erl_alloc_util.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c index b1262760758e..4f40266e22ed 100644 --- a/erts/emulator/beam/erl_alloc_util.c +++ b/erts/emulator/beam/erl_alloc_util.c @@ -6965,7 +6965,9 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) #if HAVE_ERTS_MSEG { - int force_mseg = (allctr->alloc_no == ERTS_ALC_A_LONG_LIVED); + int force_mseg = (allctr->alloc_no == ERTS_ALC_A_LONG_LIVED + || (allctr->alloc_no == ERTS_ALC_A_BINARY + && erts_mmap_record_option_record_enabled())); int force_sys = (allctr->alloc_no == ERTS_ALC_A_SYSTEM || allctr->alloc_no == ERTS_ALC_A_TEMPORARY || allctr->alloc_no == ERTS_ALC_A_DRIVER); @@ -7008,7 +7010,9 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) Block_t *blk; Uint create_flags; - create_flags = (allctr->alloc_no == ERTS_ALC_A_LONG_LIVED + create_flags = ((allctr->alloc_no == ERTS_ALC_A_LONG_LIVED + || (allctr->alloc_no == ERTS_ALC_A_BINARY + && erts_mmap_record_option_record_enabled())) ? CFLG_FORCE_MSEG : CFLG_FORCE_SYS_ALLOC); From 3668c941f7b189f43b249c863bb28471c8af10c4 Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Fri, 24 Apr 2026 11:21:57 +0200 Subject: [PATCH 13/37] Preserve restored BIF exports during replay initialization --- erts/emulator/beam/emu/beam_emu.c | 24 ++++++++++- erts/emulator/beam/jit/beam_jit_main.cpp | 51 +++++++++++++++++++++++- 2 files changed, 71 insertions(+), 4 deletions(-) diff --git a/erts/emulator/beam/emu/beam_emu.c b/erts/emulator/beam/emu/beam_emu.c index a473b89fbd3b..31fc5a3f3741 100644 --- a/erts/emulator/beam/emu/beam_emu.c +++ b/erts/emulator/beam/emu/beam_emu.c @@ -44,6 +44,7 @@ #include "dtrace-wrapper.h" #include "erl_proc_sig_queue.h" #include "beam_common.h" +#include "erl_mmap.h" /* #define HARDDEBUG 1 */ @@ -642,7 +643,6 @@ static void install_bifs(void) { ERTS_ASSERT(entry->arity <= MAX_BIF_ARITY); ep = erts_export_put(entry->module, entry->name, entry->arity); - ep->info.u.op = BeamOpCodeAddr(op_i_func_info_IaaI); ep->info.mfa.module = entry->module; ep->info.mfa.function = entry->name; @@ -664,6 +664,22 @@ static void install_bifs(void) { } } +static void replay_install_bifs(void) { + int i; + + for (i = 0; i < BIF_SIZE; i++) { + BifEntry *entry; + + entry = &bif_table[i]; + + ERTS_ASSERT(entry->arity <= MAX_BIF_ARITY); + + erts_init_trap_export(BIF_TRAP_EXPORT(i), + entry->module, entry->name, entry->arity, + entry->f); + } +} + /* * One-time initialization of emulator. Does not need to be * in process_main(). @@ -712,7 +728,11 @@ init_emulator_finish(void) beam_call_trace_return_[0] = BeamOpCodeAddr(op_i_call_trace_return); beam_call_trace_return = (ErtsCodePtr)&beam_call_trace_return_[0]; - install_bifs(); + if (erts_mmap_record_option_replay_enabled()) { + replay_install_bifs(); + } else { + install_bifs(); + } } int diff --git a/erts/emulator/beam/jit/beam_jit_main.cpp b/erts/emulator/beam/jit/beam_jit_main.cpp index 73bc507c10d1..185622ea1af3 100644 --- a/erts/emulator/beam/jit/beam_jit_main.cpp +++ b/erts/emulator/beam/jit/beam_jit_main.cpp @@ -28,6 +28,7 @@ extern "C" #include "beam_common.h" #include "code_ix.h" #include "export.h" +#include "erl_mmap.h" #include "erl_threads.h" #if defined(__APPLE__) @@ -131,7 +132,6 @@ static void install_bifs(void) { ERTS_ASSERT(entry->arity <= MAX_BIF_ARITY); ep = erts_export_put(entry->module, entry->name, entry->arity); - sys_memset(&ep->info.u, 0, sizeof(ep->info.u)); ep->info.mfa.module = entry->module; ep->info.mfa.function = entry->name; @@ -152,6 +152,49 @@ static void install_bifs(void) { } } +static void replay_install_bifs(void) { + typedef Eterm (*bif_func_type)(Process *, Eterm *, ErtsCodePtr); + int i; + + ASSERT(beam_export_trampoline != NULL); + ASSERT(beam_save_calls_export != NULL); + + for (i = 0; i < BIF_SIZE; i++) { + BifEntry *entry; + const Export *existing; + Export *ep; + + entry = &bif_table[i]; + + ERTS_ASSERT(entry->arity <= MAX_BIF_ARITY); + + existing = erts_active_export_entry(entry->module, + entry->name, + entry->arity); + ep = erts_export_put(entry->module, entry->name, entry->arity); + ep->bif_number = i; + + if (!existing) { + int j; + + sys_memset(&ep->info.u, 0, sizeof(ep->info.u)); + ep->info.mfa.module = entry->module; + ep->info.mfa.function = entry->name; + ep->info.mfa.arity = entry->arity; + + for (j = 0; j < ERTS_NUM_CODE_IX; j++) { + erts_activate_export_trampoline(ep, j); + } + } + + erts_init_trap_export(BIF_TRAP_EXPORT(i), + entry->module, + entry->name, + entry->arity, + (bif_func_type)entry->f); + } +} + static auto create_allocator(const JitAllocator::CreateParams ¶ms) { JitAllocator::Span test_span; bool single_mapped; @@ -407,7 +450,11 @@ bool BeamAssemblerCommon::hasCpuFeature(uint32_t featureId) { } void init_emulator(void) { - install_bifs(); + if (erts_mmap_record_option_replay_enabled()) { + replay_install_bifs(); + } else { + install_bifs(); + } } void process_main(ErtsSchedulerData *esdp) { From b9d63ab92a96ac6e836b4d1da3e141355b66ac2d Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Fri, 24 Apr 2026 11:35:08 +0200 Subject: [PATCH 14/37] Always launch sys processes --- erts/emulator/beam/erl_init.c | 105 ++++++++++++++++++++++++++++++---- 1 file changed, 93 insertions(+), 12 deletions(-) diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index e6d548cd3f3e..af699ce52782 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -120,8 +120,8 @@ int erts_backtrace_depth; /* How many functions to show in a backtrace erts_atomic32_t erts_max_gen_gcs; -Eterm erts_error_logger_warnings; /* What to map warning logs to, am_error, - am_info or am_warning, am_error is +Eterm erts_error_logger_warnings; /* What to map warning logs to, am_error, + am_info or am_warning, am_error is the default for BC */ int erts_compat_rel; @@ -187,15 +187,15 @@ has_prefix(const char *prefix, const char *string) } static char* -progname(char *fullname) +progname(char *fullname) { int i; - + i = sys_strlen(fullname); while (i >= 0) { - if ((fullname[i] != '/') && (fullname[i] != '\\')) + if ((fullname[i] != '/') && (fullname[i] != '\\')) i--; - else + else break; } return fullname+i+1; @@ -209,11 +209,11 @@ this_rel_num(void) if (this_rel < 1) { int i; char this_rel_str[] = ERLANG_OTP_RELEASE; - + i = 0; while (this_rel_str[i] && !isdigit((int) this_rel_str[i])) i++; - this_rel = atoi(&this_rel_str[i]); + this_rel = atoi(&this_rel_str[i]); if (this_rel < 1) erts_exit(1, "Unexpected ERLANG_OTP_RELEASE format\n"); } @@ -727,6 +727,15 @@ validate_replay_module_tables(void) } } +/* + * Set by erl_first_process_otp() when "-hello" is in the boot argv. + * Gates test-only behaviour elsewhere (e.g. using hello:start/0 as the + * entry point for the always-alive ERTS system processes so that the + * whole VM is a self-contained single-module sandbox). Must stay 0 for + * the bootstrap/release boot where OTP expects the real modules. + */ +static int use_hello_mode = 0; + static Eterm erl_first_process_otp(char* mod_name, int argc, char** argv) { @@ -782,7 +791,7 @@ erl_first_process_otp(char* mod_name, int argc, char** argv) static Eterm erl_system_process_otp(Eterm parent_pid, char* modname, int off_heap_msgq, int prio) -{ +{ Process *parent; ErlSpawnOpts so; Eterm mod, res; @@ -813,7 +822,19 @@ erl_system_process_otp(Eterm parent_pid, char* modname, int off_heap_msgq, int p so.max_gen_gcs = (Uint16) erts_atomic32_read_nob(&erts_max_gen_gcs); so.scheduler = 0; - res = erl_spawn_system_process(parent, mod, am_start, NIL, &so); + /* + * In -hello mode, replace every always-alive system process + * (erts_code_purger, erts_literal_area_collector, ...) with a dummy + * running hello:start/0, which just sits in a receive loop. That + * keeps the VM's global pointers (erts_code_purger, etc.) populated + * with live PIDs without pulling in the real module code paths that + * aren't available in the hello-only sandbox. Outside of -hello mode + * we must keep spawning the requested module so the bootstrap and + * normal release boots work as before. + */ + res = erl_spawn_system_process(parent, + mod,//use_hello_mode ? am_hello : mod, + am_start, NIL, &so); ASSERT(is_internal_pid(res)); erts_proc_unlock(parent, ERTS_PROC_LOCK_MAIN); @@ -892,7 +913,7 @@ get_arg(char* rest, char* next, int* ip) return rest; } -static void +static void load_preloaded(void) { int i; @@ -1686,7 +1707,7 @@ early_init(int *argc, char **argv) /* #ifdef ERTS_ENABLE_LOCK_CHECK erts_lc_late_init(); #endif - + #ifdef ERTS_ENABLE_LOCK_COUNT erts_lcnt_late_init(); #endif @@ -3072,6 +3093,7 @@ erl_start(int argc, char **argv) ASSERT(erts_init_process_id != ERTS_INVALID_PID); { +<<<<<<< HEAD /* * System processes that are *always* alive. If they terminate * they bring the whole VM down. @@ -3126,6 +3148,62 @@ erl_start(int argc, char **argv) ASSERT(erts_dirty_process_signal_handler_max && erts_dirty_process_signal_handler_max->common.id == pid); erts_proc_inc_refc(erts_dirty_process_signal_handler_max); +======= + /* + * System processes that are *always* alive. If they terminate + * they bring the whole VM down. + */ + Eterm pid; + + pid = erl_system_process_otp(erts_init_process_id, + "erts_code_purger", !0, + PRIORITY_HIGH); + erts_code_purger + = (Process *) erts_ptab_pix2intptr_ddrb(&erts_proc, + internal_pid_index(pid)); + ASSERT(erts_code_purger && erts_code_purger->common.id == pid); + erts_proc_inc_refc(erts_code_purger); + + pid = erl_system_process_otp(erts_init_process_id, + "erts_literal_area_collector", + !0, PRIORITY_HIGH); + erts_literal_area_collector + = (Process *) erts_ptab_pix2intptr_ddrb(&erts_proc, + internal_pid_index(pid)); + ASSERT(erts_literal_area_collector + && erts_literal_area_collector->common.id == pid); + erts_proc_inc_refc(erts_literal_area_collector); + + pid = erl_system_process_otp(erts_init_process_id, + "erts_dirty_process_signal_handler", + !0, PRIORITY_NORMAL); + erts_dirty_process_signal_handler + = (Process *) erts_ptab_pix2intptr_ddrb(&erts_proc, + internal_pid_index(pid)); + ASSERT(erts_dirty_process_signal_handler + && erts_dirty_process_signal_handler->common.id == pid); + erts_proc_inc_refc(erts_dirty_process_signal_handler); + + pid = erl_system_process_otp(erts_init_process_id, + "erts_dirty_process_signal_handler", + !0, PRIORITY_HIGH); + erts_dirty_process_signal_handler_high + = (Process *) erts_ptab_pix2intptr_ddrb(&erts_proc, + internal_pid_index(pid)); + ASSERT(erts_dirty_process_signal_handler_high + && erts_dirty_process_signal_handler_high->common.id == pid); + erts_proc_inc_refc(erts_dirty_process_signal_handler_high); + + pid = erl_system_process_otp(erts_init_process_id, + "erts_dirty_process_signal_handler", + !0, PRIORITY_MAX); + erts_dirty_process_signal_handler_max + = (Process *) erts_ptab_pix2intptr_ddrb(&erts_proc, + internal_pid_index(pid)); + ASSERT(erts_dirty_process_signal_handler_max + && erts_dirty_process_signal_handler_max->common.id == pid); + erts_proc_inc_refc(erts_dirty_process_signal_handler_max); +>>>>>>> 759b4aa37d (Always launch sys processes) pid = erl_system_process_otp(erts_init_process_id, "erts_trace_cleaner", !0, @@ -3135,7 +3213,10 @@ erl_start(int argc, char **argv) internal_pid_index(pid)); ASSERT(erts_trace_cleaner && erts_trace_cleaner->common.id == pid); erts_proc_inc_refc(erts_trace_cleaner); +<<<<<<< HEAD +======= +>>>>>>> 759b4aa37d (Always launch sys processes) } erts_start_schedulers(); From bb4d6258ac4dd1290869ed113054fd1cd7e88415 Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Mon, 27 Apr 2026 17:57:40 +0200 Subject: [PATCH 15/37] Reinitialize static NIF state during replay --- erts/emulator/beam/erl_init.c | 2 ++ erts/emulator/beam/erl_nif.c | 64 +++++++++++++++++++++++++++++++++ erts/emulator/beam/global.h | 1 + erts/preloaded/src/erl_init.erl | 34 +++++++++++------- 4 files changed, 88 insertions(+), 13 deletions(-) diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index af699ce52782..601ebdfced89 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -358,6 +358,8 @@ erl_init(int ncpu, erl_sys_late_init(); packet_parser_init(); erl_nif_init(); + if (erts_mmap_record_option_replay_enabled()) + erts_replay_reinit_loaded_static_nifs(); erts_msacc_init(); beamfile_init(); erts_late_init_external(); diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c index f7acec81521f..11c0b1064b44 100644 --- a/erts/emulator/beam/erl_nif.c +++ b/erts/emulator/beam/erl_nif.c @@ -5301,6 +5301,70 @@ static ErtsStaticNif* is_static_nif_module(Eterm mod_atom) return NULL; } +static int +replay_should_reinit_static_nif(const ErlNifEntry* entry) +{ + return sys_strcmp(entry->name, "prim_tty") == 0 + || sys_strcmp(entry->name, "erl_tracer") == 0 + || sys_strcmp(entry->name, "prim_buffer") == 0 + || sys_strcmp(entry->name, "prim_file") == 0 + || sys_strcmp(entry->name, "zlib") == 0 + || sys_strcmp(entry->name, "zstd") == 0; +} + +void +erts_replay_reinit_loaded_static_nifs(void) +{ + ErtsStaticNif* p; + + for (p = erts_static_nif_tab; p->nif_init != NULL; p++) { + Module* module_p; + struct erl_module_nif* lib; + ErlNifEntry* entry = p->entry; + ErlNifEnv env; + void* priv_data; + int veto; + + if (entry == NULL || entry->load == NULL + || !replay_should_reinit_static_nif(entry)) { + continue; + } + + module_p = erts_get_module(p->mod_atom, erts_active_code_ix()); + if (module_p == NULL || module_p->curr.nif == NULL) + continue; + + lib = module_p->curr.nif; + lib->mod = module_p; + + ASSERT(opened_rt_list == NULL); + + sys_memzero(&env, sizeof(env)); + env.mod_nif = lib; + priv_data = lib->priv_data; + + lib->flags |= ERTS_MOD_NIF_FLG_LOADING; + veto = entry->load(&env, &priv_data, SMALL_ZERO); + lib->flags &= ~ERTS_MOD_NIF_FLG_LOADING; + + if (veto) { + rollback_opened_resource_types(); + erts_exit(ERTS_ABORT_EXIT, + "replay static NIF load callback failed for %T\n", + p->mod_atom); + } + + lib->priv_data = priv_data; + prepare_opened_rt(lib); + + erts_rwmtx_rwlock(&erts_nif_call_tab_lock); + commit_opened_rt(); + erts_rwmtx_rwunlock(&erts_nif_call_tab_lock); + + cleanup_opened_rt(); + } +} + void erts_unload_nif(struct erl_module_nif* lib) diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index 721be7118448..8986e9c626b2 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -130,6 +130,7 @@ Eterm erts_load_nif(Process *c_p, ErtsCodePtr I, Eterm filename, Eterm args); void erts_unload_nif(struct erl_module_nif* nif); extern void erl_nif_init(void); +extern void erts_replay_reinit_loaded_static_nifs(void); extern void erts_nif_sched_init(ErtsSchedulerData *esdp); extern void erts_nif_execute_on_halt(void); extern void erts_nif_notify_halt(void); diff --git a/erts/preloaded/src/erl_init.erl b/erts/preloaded/src/erl_init.erl index 1ec50d34b3ea..145d62185702 100644 --- a/erts/preloaded/src/erl_init.erl +++ b/erts/preloaded/src/erl_init.erl @@ -33,19 +33,24 @@ Mod :: module(), BootArgs :: [binary()]. start(Mod, BootArgs) -> - %% Load the static nifs - zlib:on_load(), - erl_tracer:on_load(), - prim_buffer:on_load(), - prim_file:on_load(), - %% prim_socket:on_load(), prim_net:on_load(), - if_loaded( - prim_socket, - fun () -> - prim_socket:on_load(), - prim_net:on_load(), - ok - end), + case replay_enabled(BootArgs) of + true -> + ok; + false -> + %% Load the static nifs + zlib:on_load(), + erl_tracer:on_load(), + prim_buffer:on_load(), + prim_file:on_load(), + %% prim_socket:on_load(), prim_net:on_load(), + if_loaded( + prim_socket, + fun () -> + prim_socket:on_load(), + prim_net:on_load(), + ok + end) + end, %% Proceed to the specified boot module run(Mod, boot, BootArgs). @@ -58,6 +63,9 @@ restart() -> ok end). +replay_enabled(BootArgs) -> + lists:member(<<"-replay">>, BootArgs) + orelse os:getenv("ERTS_MMAP_REPLAY") =:= "1". run(M, F, A) -> case erlang:function_exported(M, F, 1) of From 98dcebb2073e273d0bee08e80efe53cfaab60ef6 Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Mon, 27 Apr 2026 18:05:36 +0200 Subject: [PATCH 16/37] Hotfix replay crashes from corrupted stacktrace terms and callback fun corruption Replay could crash in two coupled ways during early boot/logger traffic: 1) Corrupted stacktrace terms (c_p->ftrace) were propagated into terminate/logging paths and then copied as regular terms, leading to invalid boxed/list traversal and SIGSEGV in copy paths. 2) Some callback slots in gen_server server_data could resolve to invalid external-fun metadata during replay-sensitive startup paths, causing badfun/undef-style failures while starting logger supervision. This hotfix adds targeted mitigations: - beam_common.c: add replay term sanity validation (pointer-range and tuple-recursive checks) and drop invalid ftrace to NIL both before add_stacktrace() and after save_stacktrace() packaging. - utils.c: in ERTS_REPLAY_COPY_DEBUG mode, bypass error-term copying in do_send_term_to_logger() and send text-only logger payload to avoid re-triggering deep term copy on already-corrupt args. - gen_server.erl: store callback cache entries as explicit closures, preserving stable fun identity/arity under replay and avoiding fragile direct external-fun references in startup-critical paths. This is a pragmatic mitigation to keep replay progressing and observability intact; it does not claim to fully solve the upstream memory corruption source. --- erts/emulator/beam/beam_common.c | 99 ++++++++++++++++++++++++++++++++ erts/emulator/beam/utils.c | 9 +++ lib/stdlib/src/gen_server.erl | 16 +++--- 3 files changed, 116 insertions(+), 8 deletions(-) diff --git a/erts/emulator/beam/beam_common.c b/erts/emulator/beam/beam_common.c index 0e75cb4e14f2..134e979022f1 100644 --- a/erts/emulator/beam/beam_common.c +++ b/erts/emulator/beam/beam_common.c @@ -53,6 +53,98 @@ static void save_stacktrace(Process* c_p, ErtsCodePtr pc, Eterm* reg, static Eterm make_arglist(Process* c_p, Eterm* reg, int a); +static int +replay_ptr_in_known_areas(Process *c_p, const void *ptr) +{ + if (ptr == NULL) { + return 0; + } + + if (ErtsInArea(ptr, c_p->heap, + (UWord) ((char *) c_p->hend - (char *) c_p->heap))) { + return 1; + } + + if (ErtsInArea(ptr, c_p->stop, + (UWord) ((char *) STACK_START(c_p) - (char *) c_p->stop))) { + return 1; + } + + if (OLD_HEAP(c_p) && OLD_HEND(c_p) + && ErtsInArea(ptr, OLD_HEAP(c_p), + (UWord) ((char *) OLD_HEND(c_p) - (char *) OLD_HEAP(c_p)))) { + return 1; + } + + if (ErtsInArea(ptr, erts_literals_start, erts_literals_size)) { + return 1; + } + + return 0; +} + +static int +replay_term_sane(Process *c_p, Eterm term, int depth) +{ + Eterm *ptr; + Eterm hdr; + Uint i; + Uint arity; + + if (is_immed(term)) { + return 1; + } + + if (depth <= 0) { + return 1; + } + + if (is_list(term)) { + Eterm *cell = list_val(term); + if (!replay_ptr_in_known_areas(c_p, cell)) { + return 0; + } + return replay_term_sane(c_p, CAR(cell), depth - 1) + && replay_term_sane(c_p, CDR(cell), depth - 1); + } + + if (!is_boxed(term)) { + return 0; + } + + ptr = boxed_val(term); + if (!replay_ptr_in_known_areas(c_p, ptr)) { + return 0; + } + + hdr = ptr[0]; + if (!is_header(hdr)) { + return 0; + } + + /* + * Recursively validate only tuple contents. + * Other boxed terms (bignums, binaries, refs, fun internals, etc.) + * contain non-term payload words and should not be traversed as Eterms. + */ + if (!is_arity_value(hdr)) { + return 1; + } + + arity = arityval(hdr); + if (arity > ((Uint) 1 << 20)) { + return 0; + } + + for (i = 1; i <= arity; i++) { + if (!replay_term_sane(c_p, ptr[i], depth - 1)) { + return 0; + } + } + + return 1; +} + /* * erts_dirty_process_main() is what dirty schedulers execute. Since they handle * only NIF calls they do not need to be able to execute all BEAM @@ -685,6 +777,10 @@ terminate_proc(Process* c_p, Eterm Value) Eterm *hp; Eterm Args = NIL; + if (!replay_term_sane(c_p, c_p->ftrace, 8)) { + c_p->ftrace = NIL; + } + /* Add a stacktrace if this is an error. */ if (GET_EXC_CLASS(c_p->freason) == EXTAG_ERROR) { Value = add_stacktrace(c_p, Value, c_p->ftrace); @@ -1045,6 +1141,9 @@ save_stacktrace(Process* c_p, ErtsCodePtr pc, Eterm* reg, hp += 2; } c_p->ftrace = TUPLE3(hp, make_big((Eterm *) s), args, error_info); + if (!replay_term_sane(c_p, c_p->ftrace, 8)) { + c_p->ftrace = NIL; + } } /* Save the actual stack trace */ diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c index 664a523011f2..bc65aedee311 100644 --- a/erts/emulator/beam/utils.c +++ b/erts/emulator/beam/utils.c @@ -884,6 +884,15 @@ static int do_send_to_logger(Eterm tag, Eterm gl, char *buf, size_t len) static int do_send_term_to_logger(Eterm tag, Eterm gl, char *buf, size_t len, Eterm args) { + /* + * Replay debug mode: avoid traversing/copying args terms that may already + * be corrupt. Send plain text only so we can keep running and trace the + * earlier corruption point. + */ + if (getenv("ERTS_REPLAY_COPY_DEBUG")) { + return do_send_to_logger(tag, gl, buf, len); + } + Uint sz; Uint args_sz; Eterm format, pid; diff --git a/lib/stdlib/src/gen_server.erl b/lib/stdlib/src/gen_server.erl index 6447da17156c..82bf7eb6a84a 100644 --- a/lib/stdlib/src/gen_server.erl +++ b/lib/stdlib/src/gen_server.erl @@ -2358,17 +2358,17 @@ server_data(Parent, Name, Mod, HibernateAfter) -> name = Name, module = Mod, hibernate_after = HibernateAfter, - handle_call = fun Mod:handle_call/3, - handle_cast = fun Mod:handle_cast/2, - handle_info = fun Mod:handle_info/2, - handle_continue = fun Mod:handle_continue/2}. + handle_call = fun(Msg, From, State) -> Mod:handle_call(Msg, From, State) end, + handle_cast = fun(Msg, State) -> Mod:handle_cast(Msg, State) end, + handle_info = fun(Msg, State) -> Mod:handle_info(Msg, State) end, + handle_continue = fun(Msg, State) -> Mod:handle_continue(Msg, State) end}. update_callback_cache(#server_data{module = Mod} = ServerData) -> ServerData#server_data{ - handle_call = fun Mod:handle_call/3, - handle_cast = fun Mod:handle_cast/2, - handle_info = fun Mod:handle_info/2, - handle_continue = fun Mod:handle_continue/2}. + handle_call = fun(Msg, From, State) -> Mod:handle_call(Msg, From, State) end, + handle_cast = fun(Msg, State) -> Mod:handle_cast(Msg, State) end, + handle_info = fun(Msg, State) -> Mod:handle_info(Msg, State) end, + handle_continue = fun(Msg, State) -> Mod:handle_continue(Msg, State) end}. decode_msg(#server_data{parent = Parent, tag = Tag} = ServerData, State, HibT, Debug, Timer, Msg) -> case Msg of From 0b07ba060183a57d756c47d66f0c5c5b3f80b8fe Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Tue, 28 Apr 2026 17:09:23 +0200 Subject: [PATCH 17/37] Rebuild export lambdas during replay to prevent badfun Replay was reusing snapshot-backed Export->lambda terms. In practice these terms can carry stale runtime state and later surface as badfun errors when erlang:make_fun/3 returns ep->lambda (for example fun M:F/A callback funs used by logger/gen_server paths). Fix by rebuilding every export lambda from current export metadata after replay roots/code-index restoration via erts_export_replay_repair_all_lambdas(). Each rebuilt ErlFunThing is re-linked to its current Export entry and header. Also ensure the lambda dump artifact exists in record mode (create parent dirs + create file with CSV header if missing) and keep the dump-at-exit hook registered. --- erts/emulator/beam/erl_init.c | 127 +---------------------- erts/emulator/beam/export.c | 184 ++++++++++++++++++++++++++++++++++ erts/emulator/beam/export.h | 2 +- lib/stdlib/src/gen_server.erl | 16 +-- 4 files changed, 194 insertions(+), 135 deletions(-) diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index 601ebdfced89..52cab7e6f654 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -249,7 +249,6 @@ static int restore_struct_roots_for_replay(IndexTable *atom_root, int table_capacity, IndexTable *export_roots, IndexTable *fun_roots); -static void debug_replay_roots_sanity(void); static void erl_init(int ncpu, @@ -313,7 +312,7 @@ erl_init(int ncpu, * erts_active_code_ix() to look up code. */ erts_code_ix_apply_replay_root(); - debug_replay_roots_sanity(); + erts_export_replay_repair_all_lambdas(); } else { erts_init_fun_table(); init_atom_table(); @@ -543,130 +542,6 @@ restore_struct_roots_for_replay(IndexTable *atom_root, && fun_ix == table_capacity; } -static void -debug_replay_roots_sanity(void) -{ - int i, samples, pre_i; - Eterm atom_term; - int atom_ok, module_entries; - Module *m = NULL; - Eterm mod_atom = THE_NON_VALUE; - const Preload *preload; - char *dbg = getenv("ERTS_REPLAY_ROOT_DEBUG"); - int enabled = !dbg || dbg[0] != '0'; - - if (!enabled) { - return; - } - - erts_fprintf(stderr, - "replay_root_debug: atom_table entries=%d size=%d limit=%d seg_table=%p hash_bucket=%p\n", - erts_atom_table.entries, - erts_atom_table.size, - erts_atom_table.limit, - (void *) erts_atom_table.seg_table, - (void *) erts_atom_table.htable.bucket); - atom_table_replay_debug_dump(); - module_table_replay_debug_dump(); - - atom_ok = 0; - if (erts_atom_table.htable.fun.hash - && erts_atom_table.htable.fun.cmp - && erts_atom_table.htable.fun.alloc) { - atom_ok = erts_atom_get((const char *) "erts_code_purger", - sizeof("erts_code_purger") - 1, - &atom_term, - ERTS_ATOM_ENC_7BIT_ASCII); - } - erts_fprintf(stderr, - "replay_root_debug: atom_lookup(erts_code_purger)=%d term=%T\n", - atom_ok, atom_term); - - module_entries = module_code_size(erts_active_code_ix()); - erts_fprintf(stderr, - "replay_root_debug: module_table active_entries=%d\n", - module_entries); - - if (erts_atom_get((const char *) "erts_code_purger", - sizeof("erts_code_purger") - 1, - &mod_atom, - ERTS_ATOM_ENC_7BIT_ASCII)) { - m = erts_get_module(mod_atom, erts_active_code_ix()); - } - erts_fprintf(stderr, - "replay_root_debug: module_lookup(erts_code_purger)=%p\n", - (void *) m); - - samples = erts_atom_table.entries < 32 ? erts_atom_table.entries : 32; - for (i = 0; i < samples; i++) { - Atom *a = (Atom *) erts_index_lookup(&erts_atom_table, i); - if (!a) { - erts_fprintf(stderr, "replay_root_debug: atom_slot[%d]=NULL\n", i); - continue; - } - erts_fprintf(stderr, - "replay_root_debug: atom_slot[%d]=%p slot.index=%d len=%d ord0=%d bin=%p name_ptr=%p\n", - i, (void *) a, a->slot.index, (int) a->len, a->ord0, - (void *) (UWord) a->u.bin, - (void *) erts_atom_get_name(a)); - } - - atom_replay_debug_lookup("erts_code_purger"); - atom_replay_debug_lookup("erl_init"); - atom_replay_debug_lookup("start"); - atom_replay_debug_lookup("atomics"); - - preload = sys_preloaded(); - pre_i = 0; - while (preload && preload[pre_i].name && pre_i < 2) { - const char *name = preload[pre_i].name; - Eterm aterm = THE_NON_VALUE; - Module *pm = NULL; - int ok = erts_atom_get((const char *) name, - sys_strlen(name), - &aterm, - ERTS_ATOM_ENC_LATIN1); - if (ok) { - pm = erts_get_module(aterm, erts_active_code_ix()); - } - erts_fprintf(stderr, - "replay_root_debug: preloaded[%d]=%s atom_ok=%d module=%p\n", - pre_i, name, ok, (void *) pm); - pre_i++; - } - - { - Eterm t = THE_NON_VALUE; - int ok; - ok = erts_atom_get((const char *) "start", - sizeof("start") - 1, - &t, - ERTS_ATOM_ENC_7BIT_ASCII); - erts_fprintf(stderr, - "replay_root_debug: const_check name=start ok=%d parsed=%p am_start=%p equal=%d\n", - ok, (void *) (UWord) t, (void *) (UWord) am_start, - (ok && t == am_start) ? 1 : 0); - - ok = erts_atom_get((const char *) "erl_init", - sizeof("erl_init") - 1, - &t, - ERTS_ATOM_ENC_7BIT_ASCII); - erts_fprintf(stderr, - "replay_root_debug: const_check name=erl_init ok=%d parsed=%p am_erl_init=%p equal=%d\n", - ok, (void *) (UWord) t, (void *) (UWord) am_erl_init, - (ok && t == am_erl_init) ? 1 : 0); - - ok = erts_atom_get((const char *) "erlang", - sizeof("erlang") - 1, - &t, - ERTS_ATOM_ENC_7BIT_ASCII); - erts_fprintf(stderr, - "replay_root_debug: const_check name=erlang ok=%d parsed=%p am_erlang=%p equal=%d\n", - ok, (void *) (UWord) t, (void *) (UWord) am_erlang, - (ok && t == am_erlang) ? 1 : 0); - } -} - static void validate_replay_module_tables(void) { diff --git a/erts/emulator/beam/export.c b/erts/emulator/beam/export.c index 9d6dab5e9636..9c5b924b3bc4 100644 --- a/erts/emulator/beam/export.c +++ b/erts/emulator/beam/export.c @@ -31,9 +31,16 @@ #include "hash.h" #include "jit/beam_asm.h" #include "erl_global_literals.h" +#include +#include +#include #define EXPORT_INITIAL_SIZE 4000 #define EXPORT_LIMIT (512*1024) +#define EXPORT_LAMBDA_DUMP_FILE "export-lambdas.csv" + +static int export_lambda_dump_hook_registered = 0; +static void export_dump_lambdas_on_exit(void); #ifdef DEBUG # define IF_DEBUG(x) x @@ -56,6 +63,146 @@ static void create_shared_lambda(Export *export) erts_global_literal_register(&export->lambda); } +static void +export_lambda_dump_path(char *buf, size_t bufsz) +{ + const char *base_dir = getenv("ERTS_ALLOC_STRUCT_DUMP_DIR"); + + if (!base_dir || base_dir[0] == '\0') { + base_dir = "_mmap-records/struct-root-dumps"; + } + + erts_snprintf(buf, bufsz, "%s/%s", base_dir, EXPORT_LAMBDA_DUMP_FILE); +} + +static int +export_mkdirs_for_path(const char *path) +{ + char tmp[1024]; + char *p; + + if (!path || path[0] == '\0') { + return 0; + } + + erts_snprintf(tmp, sizeof(tmp), "%s", path); + for (p = tmp + 1; *p; p++) { + if (*p == '/') { + *p = '\0'; + if (mkdir(tmp, 0777) < 0 && errno != EEXIST) { + return 0; + } + *p = '/'; + } + } + + return 1; +} + +static void +export_ensure_lambda_dump_file_for_record(void) +{ + char path[1024]; + int fd; + static const char header[] = + "idx,code_ix,module_raw,function_raw,arity,export_ptr,lambda_raw," + "lambda_box_ptr,thing_word,entry_exp_ptr,dispatch_addr\n"; + + if (!erts_mmap_record_option_record_enabled()) { + return; + } + + export_lambda_dump_path(path, sizeof(path)); + if (!export_mkdirs_for_path(path)) { + return; + } + + fd = open(path, O_WRONLY | O_CREAT | O_EXCL, 0666); + if (fd < 0) { + return; + } + + erts_silence_warn_unused_result(write(fd, header, sizeof(header) - 1)); + close(fd); +} + +static void +register_export_lambda_dump_hook_once(void) +{ + if (!export_lambda_dump_hook_registered) { + if (atexit(export_dump_lambdas_on_exit) == 0) { + export_lambda_dump_hook_registered = 1; + } + } +} + +static void +export_dump_lambdas_on_exit(void) +{ + char path[1024]; + FILE *f; + int code_ix; + int count; + int i; + + if (!erts_mmap_record_option_record_enabled()) { + return; + } + + export_lambda_dump_path(path, sizeof(path)); + f = fopen(path, "w"); + if (!f) { + return; + } + + fprintf(f, + "idx,code_ix,module_raw,function_raw,arity,export_ptr,lambda_raw," + "lambda_box_ptr,thing_word,entry_exp_ptr,dispatch_addr\n"); + + code_ix = erts_active_code_ix(); + count = export_list_size(code_ix); + + for (i = 0; i < count; i++) { + Export *ep = export_list(i, code_ix); + Eterm lambda; + UWord lambda_box_ptr = 0; + UWord thing_word = 0; + UWord entry_exp_ptr = 0; + UWord dispatch_addr = 0; + + if (!ep) { + continue; + } + + lambda = ep->lambda; + dispatch_addr = (UWord) ep->dispatch.addresses[code_ix]; + if (is_boxed(lambda)) { + ErlFunThing *funp = (ErlFunThing *) fun_val(lambda); + lambda_box_ptr = (UWord) funp; + thing_word = (UWord) funp->thing_word; + entry_exp_ptr = (UWord) funp->entry.exp; + } + + fprintf(f, + "%d,%d,0x%016llx,0x%016llx,%lu,0x%016llx,0x%016llx," + "0x%016llx,0x%016llx,0x%016llx,0x%016llx\n", + i, code_ix, + (unsigned long long) (UWord) ep->info.mfa.module, + (unsigned long long) (UWord) ep->info.mfa.function, + (unsigned long) ep->info.mfa.arity, + (unsigned long long) (UWord) ep, + (unsigned long long) (UWord) lambda, + (unsigned long long) lambda_box_ptr, + (unsigned long long) thing_word, + (unsigned long long) entry_exp_ptr, + (unsigned long long) dispatch_addr); + } + + fclose(f); +} + + + static HashValue export_hash(const Export *export) { return (atom_val(export->info.mfa.module) * @@ -132,6 +279,8 @@ init_export_table(void) int i; export_staged_init(); + register_export_lambda_dump_hook_once(); + export_ensure_lambda_dump_file_for_record(); for (i = 0; i < ERTS_NUM_CODE_IX; i++) { erts_alloc_trace_note_alloc("export_table.index_root", @@ -150,6 +299,7 @@ init_export_table_replay(IndexTable *roots, int no_roots) ASSERT(roots != NULL); ASSERT(no_roots == ERTS_NUM_CODE_IX); (void) no_roots; + register_export_lambda_dump_hook_once(); rwmtx_opt.type = ERTS_RWMTX_TYPE_FREQUENT_READ; rwmtx_opt.lived = ERTS_RWMTX_LONG_LIVED; @@ -327,3 +477,37 @@ void export_end_staging(int commit) { export_staged_end_staging(commit); } + +void erts_export_replay_repair_all_lambdas(void) +{ + ErtsCodeIndex code_ix; + int count, i; + + if (!erts_mmap_record_option_replay_enabled()) { + return; + } + + code_ix = erts_active_code_ix(); + + count = export_list_size(code_ix); + for (i = 0; i < count; i++) { + Export *ep = export_list(i, code_ix); + ErlFunThing *funp; + + if (!ep) { + continue; + } + + /* + * Do not reuse replay-snapshot lambda objects. They may carry stale + * runtime state in their backing memory. Rebuild a canonical shared + * lambda from current export metadata instead. + */ + create_shared_lambda(ep); + if (is_boxed(ep->lambda)) { + funp = (ErlFunThing *) fun_val(ep->lambda); + funp->thing_word = MAKE_FUN_HEADER(ep->info.mfa.arity, 0, 1); + funp->entry.exp = ep; + } + } +} diff --git a/erts/emulator/beam/export.h b/erts/emulator/beam/export.h index e00609203b33..25b7d712cc06 100644 --- a/erts/emulator/beam/export.h +++ b/erts/emulator/beam/export.h @@ -124,6 +124,7 @@ typedef struct export_ void init_export_table(void); void init_export_table_replay(IndexTable *roots, int no_roots); +void erts_export_replay_repair_all_lambdas(void); void export_info(fmtfn_t, void *); ERTS_GLB_INLINE void erts_activate_export_trampoline(Export *ep, int code_ix); @@ -185,4 +186,3 @@ erts_active_export_entry(Eterm m, Eterm f, unsigned int a) #endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */ #endif /* __EXPORT_H__ */ - diff --git a/lib/stdlib/src/gen_server.erl b/lib/stdlib/src/gen_server.erl index 82bf7eb6a84a..6447da17156c 100644 --- a/lib/stdlib/src/gen_server.erl +++ b/lib/stdlib/src/gen_server.erl @@ -2358,17 +2358,17 @@ server_data(Parent, Name, Mod, HibernateAfter) -> name = Name, module = Mod, hibernate_after = HibernateAfter, - handle_call = fun(Msg, From, State) -> Mod:handle_call(Msg, From, State) end, - handle_cast = fun(Msg, State) -> Mod:handle_cast(Msg, State) end, - handle_info = fun(Msg, State) -> Mod:handle_info(Msg, State) end, - handle_continue = fun(Msg, State) -> Mod:handle_continue(Msg, State) end}. + handle_call = fun Mod:handle_call/3, + handle_cast = fun Mod:handle_cast/2, + handle_info = fun Mod:handle_info/2, + handle_continue = fun Mod:handle_continue/2}. update_callback_cache(#server_data{module = Mod} = ServerData) -> ServerData#server_data{ - handle_call = fun(Msg, From, State) -> Mod:handle_call(Msg, From, State) end, - handle_cast = fun(Msg, State) -> Mod:handle_cast(Msg, State) end, - handle_info = fun(Msg, State) -> Mod:handle_info(Msg, State) end, - handle_continue = fun(Msg, State) -> Mod:handle_continue(Msg, State) end}. + handle_call = fun Mod:handle_call/3, + handle_cast = fun Mod:handle_cast/2, + handle_info = fun Mod:handle_info/2, + handle_continue = fun Mod:handle_continue/2}. decode_msg(#server_data{parent = Parent, tag = Tag} = ServerData, State, HibT, Debug, Timer, Msg) -> case Msg of From 9df1e5ae7db7d1d49e5776863c82a20dcfad6b0c Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Tue, 28 Apr 2026 17:37:23 +0200 Subject: [PATCH 18/37] REmove debug code --- erts/emulator/beam/beam_common.c | 99 -------------------------------- 1 file changed, 99 deletions(-) diff --git a/erts/emulator/beam/beam_common.c b/erts/emulator/beam/beam_common.c index 134e979022f1..0e75cb4e14f2 100644 --- a/erts/emulator/beam/beam_common.c +++ b/erts/emulator/beam/beam_common.c @@ -53,98 +53,6 @@ static void save_stacktrace(Process* c_p, ErtsCodePtr pc, Eterm* reg, static Eterm make_arglist(Process* c_p, Eterm* reg, int a); -static int -replay_ptr_in_known_areas(Process *c_p, const void *ptr) -{ - if (ptr == NULL) { - return 0; - } - - if (ErtsInArea(ptr, c_p->heap, - (UWord) ((char *) c_p->hend - (char *) c_p->heap))) { - return 1; - } - - if (ErtsInArea(ptr, c_p->stop, - (UWord) ((char *) STACK_START(c_p) - (char *) c_p->stop))) { - return 1; - } - - if (OLD_HEAP(c_p) && OLD_HEND(c_p) - && ErtsInArea(ptr, OLD_HEAP(c_p), - (UWord) ((char *) OLD_HEND(c_p) - (char *) OLD_HEAP(c_p)))) { - return 1; - } - - if (ErtsInArea(ptr, erts_literals_start, erts_literals_size)) { - return 1; - } - - return 0; -} - -static int -replay_term_sane(Process *c_p, Eterm term, int depth) -{ - Eterm *ptr; - Eterm hdr; - Uint i; - Uint arity; - - if (is_immed(term)) { - return 1; - } - - if (depth <= 0) { - return 1; - } - - if (is_list(term)) { - Eterm *cell = list_val(term); - if (!replay_ptr_in_known_areas(c_p, cell)) { - return 0; - } - return replay_term_sane(c_p, CAR(cell), depth - 1) - && replay_term_sane(c_p, CDR(cell), depth - 1); - } - - if (!is_boxed(term)) { - return 0; - } - - ptr = boxed_val(term); - if (!replay_ptr_in_known_areas(c_p, ptr)) { - return 0; - } - - hdr = ptr[0]; - if (!is_header(hdr)) { - return 0; - } - - /* - * Recursively validate only tuple contents. - * Other boxed terms (bignums, binaries, refs, fun internals, etc.) - * contain non-term payload words and should not be traversed as Eterms. - */ - if (!is_arity_value(hdr)) { - return 1; - } - - arity = arityval(hdr); - if (arity > ((Uint) 1 << 20)) { - return 0; - } - - for (i = 1; i <= arity; i++) { - if (!replay_term_sane(c_p, ptr[i], depth - 1)) { - return 0; - } - } - - return 1; -} - /* * erts_dirty_process_main() is what dirty schedulers execute. Since they handle * only NIF calls they do not need to be able to execute all BEAM @@ -777,10 +685,6 @@ terminate_proc(Process* c_p, Eterm Value) Eterm *hp; Eterm Args = NIL; - if (!replay_term_sane(c_p, c_p->ftrace, 8)) { - c_p->ftrace = NIL; - } - /* Add a stacktrace if this is an error. */ if (GET_EXC_CLASS(c_p->freason) == EXTAG_ERROR) { Value = add_stacktrace(c_p, Value, c_p->ftrace); @@ -1141,9 +1045,6 @@ save_stacktrace(Process* c_p, ErtsCodePtr pc, Eterm* reg, hp += 2; } c_p->ftrace = TUPLE3(hp, make_big((Eterm *) s), args, error_info); - if (!replay_term_sane(c_p, c_p->ftrace, 8)) { - c_p->ftrace = NIL; - } } /* Save the actual stack trace */ From c89e0302fdfc1f01315c14ce0641e5431dde4678 Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Tue, 28 Apr 2026 17:47:24 +0200 Subject: [PATCH 19/37] remove ERTS_REPLAY_ROOT_DEBUG gating and debug code --- erts/emulator/beam/atom.c | 97 ------------------------ erts/emulator/beam/atom.h | 2 - erts/emulator/beam/erl_alloc.c | 4 +- erts/emulator/beam/erl_init.c | 133 +-------------------------------- erts/emulator/beam/module.c | 29 ------- erts/emulator/beam/module.h | 1 - 6 files changed, 4 insertions(+), 262 deletions(-) diff --git a/erts/emulator/beam/atom.c b/erts/emulator/beam/atom.c index 9c9cb84b8cf7..c01bc273ea73 100644 --- a/erts/emulator/beam/atom.c +++ b/erts/emulator/beam/atom.c @@ -539,103 +539,6 @@ init_atom_table_replay(IndexTable *root) } } -void -atom_table_replay_debug_dump(void) -{ - erts_fprintf(stderr, - "replay_root_debug: atom.hash_fun stored=%p expected=%p match=%d\n", - (void *) (UWord) erts_atom_table.htable.fun.hash, - (void *) (UWord) ((H_FUN) atom_hash), - erts_atom_table.htable.fun.hash == (H_FUN) atom_hash); - erts_fprintf(stderr, - "replay_root_debug: atom.cmp_fun stored=%p expected=%p match=%d\n", - (void *) (UWord) erts_atom_table.htable.fun.cmp, - (void *) (UWord) ((HCMP_FUN) atom_cmp), - erts_atom_table.htable.fun.cmp == (HCMP_FUN) atom_cmp); - erts_fprintf(stderr, - "replay_root_debug: atom.alloc_fun stored=%p expected=%p match=%d\n", - (void *) (UWord) erts_atom_table.htable.fun.alloc, - (void *) (UWord) ((HALLOC_FUN) atom_alloc), - erts_atom_table.htable.fun.alloc == (HALLOC_FUN) atom_alloc); -} - -void -atom_replay_debug_lookup(const char *name) -{ - Atom tmpl; - HashValue hv; - Uint slot = 0; - int i; - int found_linear = 0; - int found_chain = 0; - int chain_len = 0; - Atom *linear_atom = NULL; - HashBucket *b = NULL; - - if (!name || !erts_atom_table.htable.bucket) { - erts_fprintf(stderr, - "replay_root_debug: atom_probe name=%s skipped (invalid table)\n", - name ? name : ""); - return; - } - - tmpl.len = (Sint16) sys_strlen(name); - tmpl.u.name = (byte *) name; - hv = atom_hash(&tmpl); - slot = hash_get_slot(&erts_atom_table.htable, hv); - b = erts_atom_table.htable.bucket[slot]; - - while (b && chain_len < 100000) { - Atom *cand = (Atom *) b; - chain_len++; - if (cand->len == tmpl.len - && sys_memcmp((const void *) erts_atom_get_name(cand), - (const void *) name, - tmpl.len) == 0) { - found_chain = 1; - break; - } - b = b->next; - } - - for (i = 0; i < erts_atom_table.entries; i++) { - Atom *a = (Atom *) erts_index_lookup(&erts_atom_table, i); - if (!a) { - continue; - } - if (a->len == tmpl.len - && sys_memcmp((const void *) erts_atom_get_name(a), - (const void *) name, - tmpl.len) == 0) { - found_linear = 1; - linear_atom = a; - break; - } - } - - erts_fprintf(stderr, - "replay_root_debug: atom_probe name=%s hv=%p slot=%lu chain_len=%d found_chain=%d found_linear=%d linear_ix=%d linear_ptr=%p\n", - name, - (void *) (UWord) hv, - (unsigned long) slot, - chain_len, - found_chain, - found_linear, - found_linear ? i : -1, - (void *) linear_atom); - - if (linear_atom) { - Uint atom_slot = hash_get_slot(&erts_atom_table.htable, - linear_atom->slot.bucket.hvalue); - erts_fprintf(stderr, - "replay_root_debug: atom_probe_detail name=%s atom_hvalue=%p atom_slot=%lu atom_next=%p\n", - name, - (void *) (UWord) linear_atom->slot.bucket.hvalue, - (unsigned long) atom_slot, - (void *) linear_atom->slot.bucket.next); - } -} - void dump_atoms(fmtfn_t to, void *to_arg) { diff --git a/erts/emulator/beam/atom.h b/erts/emulator/beam/atom.h index 038f43988df6..7ab1b185db63 100644 --- a/erts/emulator/beam/atom.h +++ b/erts/emulator/beam/atom.h @@ -145,8 +145,6 @@ Eterm erts_atom_put(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc) int erts_atom_put_index(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc); void init_atom_table(void); void init_atom_table_replay(IndexTable *root); -void atom_table_replay_debug_dump(void); -void atom_replay_debug_lookup(const char *name); void atom_info(fmtfn_t, void *); void dump_atoms(fmtfn_t, void *); Uint erts_get_atom_limit(void); diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c index eaa08b0d9e2d..7b5f0562adee 100644 --- a/erts/emulator/beam/erl_alloc.c +++ b/erts/emulator/beam/erl_alloc.c @@ -1201,11 +1201,11 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop) if (erts_mmap_record_option_replay_enabled()) { if (!erts_mmap_record_literal_restore(&erts_literal_mmapper)) { erts_fprintf(stderr, - "replay_root_debug: failed to restore literal super-carrier " + "failed to restore literal super-carrier " "snapshot; replay will likely fail\n"); } else { erts_fprintf(stderr, - "replay_root_debug: restored literal super-carrier snapshot\n"); + "restored literal super-carrier snapshot\n"); } } #endif diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index 52cab7e6f654..cfef15e6a8fb 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -372,36 +372,12 @@ erl_spawn_system_process(Process* parent, Eterm mod, Eterm func, Eterm args, Eterm res; int arity; ErtsCodePtr fn_active; - char *dbg = getenv("ERTS_REPLAY_ROOT_DEBUG"); ERTS_LC_ASSERT(ERTS_PROC_LOCK_MAIN & erts_proc_lc_my_proc_locks(parent)); arity = erts_list_length(args); fn_active = erts_find_function(mod, func, arity, erts_active_code_ix()); - if (dbg && dbg[0] != '0') { - ErtsCodePtr fn_staging = erts_find_function(mod, func, arity, erts_staging_code_ix()); - Module *mod_active = erts_get_module(mod, erts_active_code_ix()); - Module *mod_staging = erts_get_module(mod, erts_staging_code_ix()); - erts_fprintf(stderr, - "replay_root_debug: spawn_lookup mod=%T func=%T arity=%d active_ix=%u staging_ix=%u fn_active=%p fn_staging=%p mod_active=%p mod_staging=%p\n", - mod, func, arity, - (unsigned int) erts_active_code_ix(), - (unsigned int) erts_staging_code_ix(), - (void *) fn_active, (void *) fn_staging, - (void *) mod_active, (void *) mod_staging); - } - if (fn_active == NULL) { - if (dbg && dbg[0] != '0') { - erts_fprintf(stderr, - "replay_root_debug: no_function mod_raw=%p func_raw=%p mod_is_atom=%d func_is_atom=%d arity=%d\n", - (void *) (UWord) mod, (void *) (UWord) func, - is_atom(mod) ? 1 : 0, is_atom(func) ? 1 : 0, arity); - erts_fprintf(stderr, - "replay_root_debug: atom_consts am_erl_init=%p is_atom=%d am_start=%p is_atom=%d\n", - (void *) (UWord) am_erl_init, is_atom(am_erl_init) ? 1 : 0, - (void *) (UWord) am_start, is_atom(am_start) ? 1 : 0); - } erts_exit(ERTS_ERROR_EXIT, "No function %T:%T/%i\n", mod, func, arity); } @@ -549,8 +525,6 @@ validate_replay_module_tables(void) int i; Eterm mod_atom = THE_NON_VALUE; Module *m = NULL; - char *dbg = getenv("ERTS_REPLAY_ROOT_DEBUG"); - int enabled = !dbg || dbg[0] != '0'; preload = sys_preloaded(); if (!preload) { @@ -583,12 +557,6 @@ validate_replay_module_tables(void) "replay validation failed: module '%s' has invalid current code (code_hdr=%p code_length=%d)\n", name, (void *) m->curr.code_hdr, m->curr.code_length); } - - if (enabled && i < 20) { - erts_fprintf(stderr, - "replay_root_debug: replay_validate preloaded[%d]=%s module=%p code_hdr=%p code_len=%d\n", - i, name, (void *) m, (void *) m->curr.code_hdr, m->curr.code_length); - } i++; } @@ -596,12 +564,6 @@ validate_replay_module_tables(void) erts_exit(ERTS_ABORT_EXIT, "replay validation failed: function erl_init:start/2 not found in active code index\n"); } - - if (enabled) { - erts_fprintf(stderr, - "replay_root_debug: replay_validate success preloaded_modules=%d erl_init:start/2=ok\n", - i); - } } /* @@ -672,16 +634,10 @@ erl_system_process_otp(Eterm parent_pid, char* modname, int off_heap_msgq, int p Process *parent; ErlSpawnOpts so; Eterm mod, res; - char *dbg = getenv("ERTS_REPLAY_ROOT_DEBUG"); parent = erts_pid2proc(NULL, 0, parent_pid, ERTS_PROC_LOCK_MAIN); mod = erts_atom_put((byte *) modname, sys_strlen(modname), ERTS_ATOM_ENC_LATIN1, 1); - if (dbg && dbg[0] != '0') { - erts_fprintf(stderr, - "replay_root_debug: system_process modname=%s mod=%T parent=%T off_heap=%d prio=%d\n", - modname, mod, parent_pid, off_heap_msgq, prio); - } ERTS_SET_DEFAULT_SPAWN_OPTS(&so); @@ -2913,7 +2869,7 @@ erl_start(int argc, char **argv) sz = strtoul(szs, NULL, 10); if ((UWord) sz != bccix_size) { erts_fprintf(stderr, - "replay_root_debug: bccix size mismatch " + "bccix restore size mismatch " "dump=%lu expected=%bpu\n", sz, bccix_size); continue; @@ -2933,21 +2889,7 @@ erl_start(int argc, char **argv) } fclose(mf); } - { - char *dbg = getenv("ERTS_REPLAY_ROOT_DEBUG"); - if (!dbg || dbg[0] != '0') { - erts_fprintf(stderr, - "replay_root_debug: bccix restore %s\n", - loaded ? "OK" : "FAILED"); - } - } - } - { - char *dbg = getenv("ERTS_REPLAY_ROOT_DEBUG"); - if (!dbg || dbg[0] != '0') { - erts_fprintf(stderr, - "replay_root_debug: skipping load_preloaded() in replay mode after validation, ranges rebuilt\n"); - } + (void) loaded; } } else { load_preloaded(); @@ -2965,12 +2907,10 @@ erl_start(int argc, char **argv) } erts_initialized = 1; - erts_init_process_id = erl_first_process_otp(init, boot_argc, boot_argv); ASSERT(erts_init_process_id != ERTS_INVALID_PID); { -<<<<<<< HEAD /* * System processes that are *always* alive. If they terminate * they bring the whole VM down. @@ -3025,75 +2965,6 @@ erl_start(int argc, char **argv) ASSERT(erts_dirty_process_signal_handler_max && erts_dirty_process_signal_handler_max->common.id == pid); erts_proc_inc_refc(erts_dirty_process_signal_handler_max); -======= - /* - * System processes that are *always* alive. If they terminate - * they bring the whole VM down. - */ - Eterm pid; - - pid = erl_system_process_otp(erts_init_process_id, - "erts_code_purger", !0, - PRIORITY_HIGH); - erts_code_purger - = (Process *) erts_ptab_pix2intptr_ddrb(&erts_proc, - internal_pid_index(pid)); - ASSERT(erts_code_purger && erts_code_purger->common.id == pid); - erts_proc_inc_refc(erts_code_purger); - - pid = erl_system_process_otp(erts_init_process_id, - "erts_literal_area_collector", - !0, PRIORITY_HIGH); - erts_literal_area_collector - = (Process *) erts_ptab_pix2intptr_ddrb(&erts_proc, - internal_pid_index(pid)); - ASSERT(erts_literal_area_collector - && erts_literal_area_collector->common.id == pid); - erts_proc_inc_refc(erts_literal_area_collector); - - pid = erl_system_process_otp(erts_init_process_id, - "erts_dirty_process_signal_handler", - !0, PRIORITY_NORMAL); - erts_dirty_process_signal_handler - = (Process *) erts_ptab_pix2intptr_ddrb(&erts_proc, - internal_pid_index(pid)); - ASSERT(erts_dirty_process_signal_handler - && erts_dirty_process_signal_handler->common.id == pid); - erts_proc_inc_refc(erts_dirty_process_signal_handler); - - pid = erl_system_process_otp(erts_init_process_id, - "erts_dirty_process_signal_handler", - !0, PRIORITY_HIGH); - erts_dirty_process_signal_handler_high - = (Process *) erts_ptab_pix2intptr_ddrb(&erts_proc, - internal_pid_index(pid)); - ASSERT(erts_dirty_process_signal_handler_high - && erts_dirty_process_signal_handler_high->common.id == pid); - erts_proc_inc_refc(erts_dirty_process_signal_handler_high); - - pid = erl_system_process_otp(erts_init_process_id, - "erts_dirty_process_signal_handler", - !0, PRIORITY_MAX); - erts_dirty_process_signal_handler_max - = (Process *) erts_ptab_pix2intptr_ddrb(&erts_proc, - internal_pid_index(pid)); - ASSERT(erts_dirty_process_signal_handler_max - && erts_dirty_process_signal_handler_max->common.id == pid); - erts_proc_inc_refc(erts_dirty_process_signal_handler_max); ->>>>>>> 759b4aa37d (Always launch sys processes) - - pid = erl_system_process_otp(erts_init_process_id, - "erts_trace_cleaner", !0, - PRIORITY_NORMAL); - erts_trace_cleaner - = (Process *) erts_ptab_pix2intptr_ddrb(&erts_proc, - internal_pid_index(pid)); - ASSERT(erts_trace_cleaner && erts_trace_cleaner->common.id == pid); - erts_proc_inc_refc(erts_trace_cleaner); -<<<<<<< HEAD - -======= ->>>>>>> 759b4aa37d (Always launch sys processes) } erts_start_schedulers(); diff --git a/erts/emulator/beam/module.c b/erts/emulator/beam/module.c index 987513772cdf..be1a3d669d8a 100644 --- a/erts/emulator/beam/module.c +++ b/erts/emulator/beam/module.c @@ -161,35 +161,6 @@ init_module_table_replay(IndexTable *roots, int no_roots) erts_atomic_init_nob(&tot_module_bytes, 0); } -void -module_table_replay_debug_dump(void) -{ - int i; - for (i = 0; i < ERTS_NUM_CODE_IX; i++) { - erts_fprintf(stderr, - "replay_root_debug: module[%d] entries=%d size=%d limit=%d seg_table=%p hash_bucket=%p\n", - i, - module_tables[i].entries, - module_tables[i].size, - module_tables[i].limit, - (void *) module_tables[i].seg_table, - (void *) module_tables[i].htable.bucket); - erts_fprintf(stderr, - "replay_root_debug: module[%d].hash_fun stored=%p expected=%p match=%d\n", - i, - (void *) (UWord) module_tables[i].htable.fun.hash, - (void *) (UWord) ((H_FUN) module_hash), - module_tables[i].htable.fun.hash == (H_FUN) module_hash); - erts_fprintf(stderr, - "replay_root_debug: module[%d].cmp_fun stored=%p expected=%p match=%d\n", - i, - (void *) (UWord) module_tables[i].htable.fun.cmp, - (void *) (UWord) ((HCMP_FUN) module_cmp), - module_tables[i].htable.fun.cmp == (HCMP_FUN) module_cmp); - } -} - - Module* erts_get_module(Eterm mod, ErtsCodeIndex code_ix) { diff --git a/erts/emulator/beam/module.h b/erts/emulator/beam/module.h index 5733d4590ac9..b39f86dd6b59 100644 --- a/erts/emulator/beam/module.h +++ b/erts/emulator/beam/module.h @@ -77,7 +77,6 @@ void erts_seal_module(struct erl_module_instance *modi); void init_module_table(void); void init_module_table_replay(IndexTable *roots, int no_roots); -void module_table_replay_debug_dump(void); void module_start_staging(void); void module_end_staging(int commit); void module_info(fmtfn_t, void *); From ce0ce481e6c2dd11b4dbaaa57c320517107df696 Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Tue, 28 Apr 2026 18:02:58 +0200 Subject: [PATCH 20/37] allocator: write roots dumps only in -record mode --- erts/emulator/beam/erl_alloc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c index 7b5f0562adee..54b6bee66c94 100644 --- a/erts/emulator/beam/erl_alloc.c +++ b/erts/emulator/beam/erl_alloc.c @@ -307,6 +307,14 @@ erts_alloc_struct_dump_snapshots_on_exit(void) char path[1024]; int len; + /* + * Snapshot dumps are replay inputs and must only be produced by an + * explicit -record run, never by plain/normal execution. + */ + if (!erts_mmap_record_option_record_enabled()) { + return; + } + if (erts_alloc_struct_snapshot_count <= 0 || erts_alloc_struct_snapshot_dir[0] == '\0') { return; } From 5e3840beac15aab99a2958ec8f0bcdac27d06013 Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Tue, 28 Apr 2026 18:15:47 +0200 Subject: [PATCH 21/37] record/replay: remove nonessential trace and debug file dumps --- erts/emulator/beam/erl_alloc.c | 207 +++------------------------------ erts/emulator/beam/export.c | 147 ----------------------- 2 files changed, 14 insertions(+), 340 deletions(-) diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c index 54b6bee66c94..89aa8efa3d43 100644 --- a/erts/emulator/beam/erl_alloc.c +++ b/erts/emulator/beam/erl_alloc.c @@ -119,158 +119,6 @@ static ErtsAllocStructSnapshot static int erts_alloc_struct_snapshot_count = 0; static char erts_alloc_struct_snapshot_dir[512] = {0}; -#define ERTS_ALLOC_MAP_MAX_RANGES 16384 -typedef struct { - UWord start; - UWord end; - int kind; -} ErtsAllocMapRange; - -enum { - ERTS_ALLOC_MAP_KIND_UNKNOWN = 0, - ERTS_ALLOC_MAP_KIND_STACK, - ERTS_ALLOC_MAP_KIND_HEAP, - ERTS_ALLOC_MAP_KIND_MAPPED -}; - -static ErtsAllocMapRange erts_alloc_map_ranges[ERTS_ALLOC_MAP_MAX_RANGES]; -static int erts_alloc_map_range_count = 0; - -static const char * -erts_alloc_map_kind_name(int kind) -{ - switch (kind) { - case ERTS_ALLOC_MAP_KIND_STACK: return "stack"; - case ERTS_ALLOC_MAP_KIND_HEAP: return "heap"; - case ERTS_ALLOC_MAP_KIND_MAPPED: return "mapped"; - default: return "unknown"; - } -} - -static void -erts_alloc_map_load(void) -{ - FILE *fp; - char line[1024]; - erts_alloc_map_range_count = 0; - fp = fopen("/proc/self/maps", "r"); - if (!fp) { - return; - } - while (fgets(line, sizeof(line), fp) != NULL) { - unsigned long long start, end; - int kind = ERTS_ALLOC_MAP_KIND_MAPPED; - if (erts_alloc_map_range_count >= ERTS_ALLOC_MAP_MAX_RANGES) { - break; - } - if (sscanf(line, "%llx-%llx", &start, &end) != 2) { - continue; - } - if (strstr(line, "[stack]")) { - kind = ERTS_ALLOC_MAP_KIND_STACK; - } else if (strstr(line, "[heap]")) { - kind = ERTS_ALLOC_MAP_KIND_HEAP; - } - erts_alloc_map_ranges[erts_alloc_map_range_count].start = (UWord) start; - erts_alloc_map_ranges[erts_alloc_map_range_count].end = (UWord) end; - erts_alloc_map_ranges[erts_alloc_map_range_count].kind = kind; - erts_alloc_map_range_count++; - } - fclose(fp); -} - -static int -erts_alloc_map_classify_ptr(const void *ptr) -{ - int i; - UWord addr = (UWord) ptr; - for (i = 0; i < erts_alloc_map_range_count; i++) { - if (addr >= erts_alloc_map_ranges[i].start - && addr < erts_alloc_map_ranges[i].end) { - return erts_alloc_map_ranges[i].kind; - } - } - return ERTS_ALLOC_MAP_KIND_UNKNOWN; -} - -static void -erts_alloc_struct_walk_index_table(int root_ix, - const ErtsAllocStructSnapshot *snap, - int wfd) -{ - IndexTable *tab = (IndexTable *) snap->ptr; - int pages, page_ix, slot_ix; - char line[512]; - int len; - if (!tab || snap->size < sizeof(IndexTable)) { - return; - } - - len = erts_snprintf(line, sizeof(line), - "%d,%s,root,%p,%s\n", - root_ix, - snap->tag, - (void *) tab, - erts_alloc_map_kind_name(erts_alloc_map_classify_ptr(tab))); - if (len > 0) { - if (len >= (int) sizeof(line)) len = (int) sizeof(line) - 1; - erts_silence_warn_unused_result(write(wfd, line, (size_t) len)); - } - - len = erts_snprintf(line, sizeof(line), - "%d,%s,seg_table,%p,%s\n", - root_ix, - snap->tag, - (void *) tab->seg_table, - erts_alloc_map_kind_name(erts_alloc_map_classify_ptr(tab->seg_table))); - if (len > 0) { - if (len >= (int) sizeof(line)) len = (int) sizeof(line) - 1; - erts_silence_warn_unused_result(write(wfd, line, (size_t) len)); - } - - len = erts_snprintf(line, sizeof(line), - "%d,%s,htable.bucket,%p,%s\n", - root_ix, - snap->tag, - (void *) tab->htable.bucket, - erts_alloc_map_kind_name(erts_alloc_map_classify_ptr(tab->htable.bucket))); - if (len > 0) { - if (len >= (int) sizeof(line)) len = (int) sizeof(line) - 1; - erts_silence_warn_unused_result(write(wfd, line, (size_t) len)); - } - - pages = (tab->size + INDEX_PAGE_SIZE - 1) >> INDEX_PAGE_SHIFT; - for (page_ix = 0; page_ix < pages; page_ix++) { - IndexSlot **page = (tab->seg_table ? tab->seg_table[page_ix] : NULL); - len = erts_snprintf(line, sizeof(line), - "%d,%s,seg_page[%d],%p,%s\n", - root_ix, snap->tag, page_ix, (void *) page, - erts_alloc_map_kind_name(erts_alloc_map_classify_ptr(page))); - if (len > 0) { - if (len >= (int) sizeof(line)) len = (int) sizeof(line) - 1; - erts_silence_warn_unused_result(write(wfd, line, (size_t) len)); - } - if (!page) { - continue; - } - for (slot_ix = 0; slot_ix < INDEX_PAGE_SIZE; slot_ix++) { - int global_ix = (page_ix << INDEX_PAGE_SHIFT) + slot_ix; - IndexSlot *slot = page[slot_ix]; - if (!slot || global_ix >= tab->entries) { - continue; - } - len = erts_snprintf(line, sizeof(line), - "%d,%s,slot[%d],%p,%s\n", - root_ix, snap->tag, global_ix, (void *) slot, - erts_alloc_map_kind_name(erts_alloc_map_classify_ptr(slot))); - if (len > 0) { - if (len >= (int) sizeof(line)) len = (int) sizeof(line) - 1; - erts_silence_warn_unused_result(write(wfd, line, (size_t) len)); - } - } - } -} - static int erts_alloc_struct_should_snapshot(const char *tag) { @@ -302,7 +150,7 @@ erts_alloc_struct_register_snapshot(const char *tag, void *ptr, UWord size) static void erts_alloc_struct_dump_snapshots_on_exit(void) { - int i, fd, mfd, wfd; + int i, fd, mfd; char line[256]; char path[1024]; int len; @@ -372,25 +220,6 @@ erts_alloc_struct_dump_snapshots_on_exit(void) close(mfd); } - len = erts_snprintf(path, sizeof(path), "%s/roots.walk.csv", erts_alloc_struct_snapshot_dir); - if (len <= 0 || len >= (int) sizeof(path)) { - return; - } - wfd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0666); - if (wfd < 0) { - return; - } - erts_silence_warn_unused_result(write(wfd, "root_index,tag,field,ptr,where\n", 31)); - erts_alloc_map_load(); - for (i = 0; i < erts_alloc_struct_snapshot_count; i++) { - ErtsAllocStructSnapshot *snap = &erts_alloc_struct_snapshots[i]; - if (erts_alloc_struct_should_snapshot(snap->tag) - && snap->tag[0] != 'c' /* skip "code_ix.root" (not an IndexTable) */ - && snap->size >= sizeof(IndexTable)) { - erts_alloc_struct_walk_index_table(i, snap, wfd); - } - } - close(wfd); } static ERTS_INLINE void @@ -441,22 +270,21 @@ erts_alloc_trace_note_alloc(const char *tag, void *ptr, UWord size) int len; int csv_len; const char *safe_tag; - if (erts_alloc_trace_fd < 0) { - return; - } + safe_tag = tag ? tag : "unknown"; - len = erts_snprintf(line, sizeof(line), - "STRUCT_ALLOC tag=%s size=%lu ptr=%p\n", - safe_tag, - (unsigned long) size, - ptr); - if (len < 0) { - return; - } - if (len >= (int) sizeof(line)) { - len = (int) sizeof(line) - 1; - } + if (erts_alloc_trace_fd >= 0) { + len = erts_snprintf(line, sizeof(line), + "STRUCT_ALLOC tag=%s size=%lu ptr=%p\n", + safe_tag, + (unsigned long) size, + ptr); + if (len < 0) { + return; + } + if (len >= (int) sizeof(line)) { + len = (int) sizeof(line) - 1; + } erts_alloc_trace_write(line, len); } if (erts_alloc_struct_csv_fd >= 0) { @@ -1079,13 +907,6 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop) } if (csv_path && csv_path[0] != '\0') { erts_alloc_struct_csv_fd = open(csv_path, O_WRONLY|O_CREAT|O_APPEND, 0666); - } else if (trace_path && trace_path[0] != '\0') { - char default_csv_path[512]; - int plen = erts_snprintf(default_csv_path, sizeof(default_csv_path), - "%s.struct_alloc.csv", trace_path); - if (plen > 0 && plen < (int) sizeof(default_csv_path)) { - erts_alloc_struct_csv_fd = open(default_csv_path, O_WRONLY|O_CREAT|O_APPEND, 0666); - } } if (dump_dir && dump_dir[0] != '\0') { erts_snprintf(erts_alloc_struct_snapshot_dir, diff --git a/erts/emulator/beam/export.c b/erts/emulator/beam/export.c index 9c5b924b3bc4..13ac5d875b27 100644 --- a/erts/emulator/beam/export.c +++ b/erts/emulator/beam/export.c @@ -37,10 +37,6 @@ #define EXPORT_INITIAL_SIZE 4000 #define EXPORT_LIMIT (512*1024) -#define EXPORT_LAMBDA_DUMP_FILE "export-lambdas.csv" - -static int export_lambda_dump_hook_registered = 0; -static void export_dump_lambdas_on_exit(void); #ifdef DEBUG # define IF_DEBUG(x) x @@ -63,146 +59,6 @@ static void create_shared_lambda(Export *export) erts_global_literal_register(&export->lambda); } -static void -export_lambda_dump_path(char *buf, size_t bufsz) -{ - const char *base_dir = getenv("ERTS_ALLOC_STRUCT_DUMP_DIR"); - - if (!base_dir || base_dir[0] == '\0') { - base_dir = "_mmap-records/struct-root-dumps"; - } - - erts_snprintf(buf, bufsz, "%s/%s", base_dir, EXPORT_LAMBDA_DUMP_FILE); -} - -static int -export_mkdirs_for_path(const char *path) -{ - char tmp[1024]; - char *p; - - if (!path || path[0] == '\0') { - return 0; - } - - erts_snprintf(tmp, sizeof(tmp), "%s", path); - for (p = tmp + 1; *p; p++) { - if (*p == '/') { - *p = '\0'; - if (mkdir(tmp, 0777) < 0 && errno != EEXIST) { - return 0; - } - *p = '/'; - } - } - - return 1; -} - -static void -export_ensure_lambda_dump_file_for_record(void) -{ - char path[1024]; - int fd; - static const char header[] = - "idx,code_ix,module_raw,function_raw,arity,export_ptr,lambda_raw," - "lambda_box_ptr,thing_word,entry_exp_ptr,dispatch_addr\n"; - - if (!erts_mmap_record_option_record_enabled()) { - return; - } - - export_lambda_dump_path(path, sizeof(path)); - if (!export_mkdirs_for_path(path)) { - return; - } - - fd = open(path, O_WRONLY | O_CREAT | O_EXCL, 0666); - if (fd < 0) { - return; - } - - erts_silence_warn_unused_result(write(fd, header, sizeof(header) - 1)); - close(fd); -} - -static void -register_export_lambda_dump_hook_once(void) -{ - if (!export_lambda_dump_hook_registered) { - if (atexit(export_dump_lambdas_on_exit) == 0) { - export_lambda_dump_hook_registered = 1; - } - } -} - -static void -export_dump_lambdas_on_exit(void) -{ - char path[1024]; - FILE *f; - int code_ix; - int count; - int i; - - if (!erts_mmap_record_option_record_enabled()) { - return; - } - - export_lambda_dump_path(path, sizeof(path)); - f = fopen(path, "w"); - if (!f) { - return; - } - - fprintf(f, - "idx,code_ix,module_raw,function_raw,arity,export_ptr,lambda_raw," - "lambda_box_ptr,thing_word,entry_exp_ptr,dispatch_addr\n"); - - code_ix = erts_active_code_ix(); - count = export_list_size(code_ix); - - for (i = 0; i < count; i++) { - Export *ep = export_list(i, code_ix); - Eterm lambda; - UWord lambda_box_ptr = 0; - UWord thing_word = 0; - UWord entry_exp_ptr = 0; - UWord dispatch_addr = 0; - - if (!ep) { - continue; - } - - lambda = ep->lambda; - dispatch_addr = (UWord) ep->dispatch.addresses[code_ix]; - if (is_boxed(lambda)) { - ErlFunThing *funp = (ErlFunThing *) fun_val(lambda); - lambda_box_ptr = (UWord) funp; - thing_word = (UWord) funp->thing_word; - entry_exp_ptr = (UWord) funp->entry.exp; - } - - fprintf(f, - "%d,%d,0x%016llx,0x%016llx,%lu,0x%016llx,0x%016llx," - "0x%016llx,0x%016llx,0x%016llx,0x%016llx\n", - i, code_ix, - (unsigned long long) (UWord) ep->info.mfa.module, - (unsigned long long) (UWord) ep->info.mfa.function, - (unsigned long) ep->info.mfa.arity, - (unsigned long long) (UWord) ep, - (unsigned long long) (UWord) lambda, - (unsigned long long) lambda_box_ptr, - (unsigned long long) thing_word, - (unsigned long long) entry_exp_ptr, - (unsigned long long) dispatch_addr); - } - - fclose(f); -} - - - static HashValue export_hash(const Export *export) { return (atom_val(export->info.mfa.module) * @@ -279,8 +135,6 @@ init_export_table(void) int i; export_staged_init(); - register_export_lambda_dump_hook_once(); - export_ensure_lambda_dump_file_for_record(); for (i = 0; i < ERTS_NUM_CODE_IX; i++) { erts_alloc_trace_note_alloc("export_table.index_root", @@ -299,7 +153,6 @@ init_export_table_replay(IndexTable *roots, int no_roots) ASSERT(roots != NULL); ASSERT(no_roots == ERTS_NUM_CODE_IX); (void) no_roots; - register_export_lambda_dump_hook_once(); rwmtx_opt.type = ERTS_RWMTX_TYPE_FREQUENT_READ; rwmtx_opt.lived = ERTS_RWMTX_LONG_LIVED; From 1502d8617025e4d8a5ceeb95795fca599b42f728 Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Wed, 29 Apr 2026 12:48:54 +0200 Subject: [PATCH 22/37] Simplify replay setup by automatically forwarding replay flag to node boot arguments --- erts/emulator/beam/erl_init.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index cfef15e6a8fb..5b4d38500645 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -244,6 +244,7 @@ void erl_error(const char *fmt, va_list args) } static int early_init(int *argc, char **argv); +static void ensure_replay_node_argument(); static int restore_struct_roots_for_replay(IndexTable *atom_root, IndexTable *module_roots, int table_capacity, @@ -731,6 +732,7 @@ static char* program; static char* init = "init"; static int boot_argc; static char** boot_argv; +static char replay_boot_arg[] = "-replay"; static char * get_arg(char* rest, char* next, int* ip) @@ -1557,6 +1559,33 @@ early_init(int *argc, char **argv) /* return ncpu; } +static void ensure_replay_node_argument() +{ + int j; + int has_replay_arg = 0; + char **replay_boot_argv; + + for (j = 0; j < boot_argc; j++) { + if (boot_argv[j] && sys_strcmp(boot_argv[j], "-replay") == 0) { + has_replay_arg = 1; + break; + } + } + + if (!has_replay_arg) { + replay_boot_argv = (char **) malloc(sizeof(char *) * (boot_argc + 1)); + if (!replay_boot_argv) { + erts_exit(ERTS_ABORT_EXIT, + "failed to append -replay to node boot arguments\n"); + } + for (j = 0; j < boot_argc; j++) { + replay_boot_argv[j] = boot_argv[j]; + } + replay_boot_argv[boot_argc] = replay_boot_arg; + boot_argv = replay_boot_argv; + boot_argc++; + } +} void erl_start(int argc, char **argv) @@ -2784,6 +2813,9 @@ erl_start(int argc, char **argv) boot_argc = argc - i; /* Number of arguments to init */ boot_argv = &argv[i]; + if (erts_mmap_record_option_replay_enabled()) { + ensure_replay_node_argument(); + } if (erts_sched_thread_suggested_stack_size < ERTS_SCHED_THREAD_MIN_STACK_SIZE) erts_sched_thread_suggested_stack_size = ERTS_SCHED_THREAD_MIN_STACK_SIZE; From 4059cb19eb6444961a5c07ef40a0db3758e12a00 Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Wed, 29 Apr 2026 14:32:32 +0200 Subject: [PATCH 23/37] Remove eccessive trace notes --- erts/emulator/beam/beam_catches.c | 6 ------ erts/emulator/beam/beam_load.c | 3 --- erts/emulator/beam/emu/emu_load.c | 17 ----------------- erts/emulator/beam/erl_global_literals.c | 2 -- erts/emulator/beam/module.c | 1 - 5 files changed, 29 deletions(-) diff --git a/erts/emulator/beam/beam_catches.c b/erts/emulator/beam/beam_catches.c index a1a8d4d09bb7..4b2637eb4ba0 100644 --- a/erts/emulator/beam/beam_catches.c +++ b/erts/emulator/beam/beam_catches.c @@ -87,9 +87,6 @@ void beam_catches_init(void) bccix[0].high_mark = 0; bccix[0].beam_catches = erts_alloc(ERTS_ALC_T_CATCHES, sizeof(beam_catch_t)*DEFAULT_TABSIZE); - erts_alloc_trace_note_alloc("beam_catches.table", - bccix[0].beam_catches, - sizeof(beam_catch_t) * DEFAULT_TABSIZE); IF_DEBUG(bccix[0].is_staging = 0); for (i=1; ibeam_catches = erts_alloc(ERTS_ALC_T_CATCHES, newsize*sizeof(beam_catch_t)); - erts_alloc_trace_note_alloc("beam_catches.table.grow", - p->beam_catches, - newsize * sizeof(beam_catch_t)); sys_memcpy(p->beam_catches, prev_vec, p->tabsize*sizeof(beam_catch_t)); gc_old_vec(prev_vec); diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 315cde7fe4e0..3d20a9abf9aa 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -303,9 +303,6 @@ erts_finish_loading(Binary* magic, Process* c_p, } else { mod_tab_p->on_load = erts_alloc(ERTS_ALC_T_PREPARED_CODE, sizeof(struct erl_module_instance)); - erts_alloc_trace_note_alloc("module_table.on_load_instance", - mod_tab_p->on_load, - sizeof(struct erl_module_instance)); inst_p = mod_tab_p->on_load; erts_module_instance_init(inst_p); } diff --git a/erts/emulator/beam/emu/emu_load.c b/erts/emulator/beam/emu/emu_load.c index 465421608cd2..e5eb108ed033 100644 --- a/erts/emulator/beam/emu/emu_load.c +++ b/erts/emulator/beam/emu/emu_load.c @@ -69,10 +69,6 @@ int beam_load_prepare_emit(LoaderState *stp) { hdr = erts_alloc(ERTS_ALC_T_CODE, (offsetof(BeamCodeHeader,functions) + sizeof(BeamInstr) * stp->codev_size)); - erts_alloc_trace_note_alloc("module_code.header.initial", - hdr, - offsetof(BeamCodeHeader, functions) - + sizeof(BeamInstr) * stp->codev_size); hdr->num_functions = stp->beam.code.function_count; hdr->attr_ptr = NULL; @@ -281,7 +277,6 @@ int beam_load_finish_emit(LoaderState *stp) { /* Move the code to its final location. */ code_hdr = (BeamCodeHeader*)erts_realloc(ERTS_ALC_T_CODE, (void *) code_hdr, size); - erts_alloc_trace_note_alloc("module_code.header.final", code_hdr, size); codev = (BeamInstr*)&code_hdr->functions; stp->code_hdr = code_hdr; stp->codev = codev; @@ -326,9 +321,6 @@ int beam_load_finish_emit(LoaderState *stp) { lit_asize = ERTS_LITERAL_AREA_ALLOC_SIZE(tot_lit_size); literal_area = erts_alloc(ERTS_ALC_T_LITERAL, lit_asize); - erts_alloc_trace_note_alloc("module_code.literal_area", - literal_area, - lit_asize); ptr = &literal_area->start[0]; literal_area->end = ptr + tot_lit_size; @@ -620,9 +612,6 @@ void beam_load_finalize_code(LoaderState* stp, struct erl_module_instance* inst_ export = erts_export_put(import->module, import->function, import->arity); - erts_alloc_trace_note_alloc("module_code.import_export_entry", - (void *) export, - sizeof(*export)); current = stp->import_patches[i]; while (current != 0) { @@ -658,9 +647,6 @@ void beam_load_finalize_code(LoaderState* stp, struct erl_module_instance* inst_ stp->beam.checksum, lambda->index, lambda->arity - lambda->num_free); - erts_alloc_trace_note_alloc("module_code.fun_entry", - fun_entry, - sizeof(*fun_entry)); fun_entries[i] = fun_entry; /* If there are no free variables, the loader has created a literal @@ -714,9 +700,6 @@ void beam_load_finalize_code(LoaderState* stp, struct erl_module_instance* inst_ ep = erts_export_put(stp->module, entry->function, entry->arity); - erts_alloc_trace_note_alloc("module_code.export_entry", - ep, - sizeof(*ep)); /* Fill in BIF stubs with a proper call to said BIF. */ if (ep->bif_number != -1) { diff --git a/erts/emulator/beam/erl_global_literals.c b/erts/emulator/beam/erl_global_literals.c index 86226dd541b4..6797e00e2ead 100644 --- a/erts/emulator/beam/erl_global_literals.c +++ b/erts/emulator/beam/erl_global_literals.c @@ -143,8 +143,6 @@ static void expand_shared_global_literal_area(Uint heap_size) (UWord) size, use_record_backend); } - erts_alloc_trace_note_alloc("global_literal.chunk", chunk, (UWord) size); - chunk->area.end = &(chunk->area.start[0]); chunk->chunk_end = &(chunk->area.start[heap_size]); chunk->area.off_heap = NULL; diff --git a/erts/emulator/beam/module.c b/erts/emulator/beam/module.c index be1a3d669d8a..e7dc3dd0e569 100644 --- a/erts/emulator/beam/module.c +++ b/erts/emulator/beam/module.c @@ -86,7 +86,6 @@ void erts_module_instance_init(struct erl_module_instance* modi) static Module* module_alloc(Module* tmpl) { Module* obj = (Module*) erts_alloc(ERTS_ALC_T_MODULE, sizeof(Module)); - erts_alloc_trace_note_alloc("module_table.module", obj, sizeof(Module)); erts_atomic_add_nob(&tot_module_bytes, sizeof(Module)); obj->module = tmpl->module; From e775692f5f7268877dbab802f31deb43c3f842a5 Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Thu, 30 Apr 2026 12:46:10 +0200 Subject: [PATCH 24/37] erts replay: rebuild index hash buckets for restored tables --- erts/emulator/beam/atom.c | 1 + erts/emulator/beam/erl_fun.c | 2 +- erts/emulator/beam/export.c | 1 + erts/emulator/beam/index.c | 43 ++++++++++++++++++++++++++++++++++++ erts/emulator/beam/index.h | 1 + erts/emulator/beam/module.c | 1 + 6 files changed, 48 insertions(+), 1 deletion(-) diff --git a/erts/emulator/beam/atom.c b/erts/emulator/beam/atom.c index c01bc273ea73..f8ef15dc82e6 100644 --- a/erts/emulator/beam/atom.c +++ b/erts/emulator/beam/atom.c @@ -529,6 +529,7 @@ init_atom_table_replay(IndexTable *root) f.meta_free = (HMFREE_FUN) erts_free; f.meta_print = (HMPRINT_FUN) erts_print; erts_atom_table.htable.fun = f; + erts_index_rebuild_hash_buckets(&erts_atom_table); atom_space = 0; for (i = 0; i < erts_atom_table.entries; i++) { diff --git a/erts/emulator/beam/erl_fun.c b/erts/emulator/beam/erl_fun.c index 6d7a3c659d25..a2877709d46e 100644 --- a/erts/emulator/beam/erl_fun.c +++ b/erts/emulator/beam/erl_fun.c @@ -176,6 +176,7 @@ void erts_init_fun_table_replay(IndexTable *roots, int no_roots) for (i = 0; i < ERTS_NUM_CODE_IX; i++) { fun_tables[i] = roots[i]; fun_tables[i].htable.fun = f; + erts_index_rebuild_hash_buckets(&fun_tables[i]); } } @@ -363,4 +364,3 @@ void erts_fun_end_staging(int commit) erts_has_code_stage_permission()); fun_staged_end_staging(commit); } - diff --git a/erts/emulator/beam/export.c b/erts/emulator/beam/export.c index 13ac5d875b27..5fcf46a08b51 100644 --- a/erts/emulator/beam/export.c +++ b/erts/emulator/beam/export.c @@ -177,6 +177,7 @@ init_export_table_replay(IndexTable *roots, int no_roots) for (i = 0; i < ERTS_NUM_CODE_IX; i++) { export_tables[i] = roots[i]; export_tables[i].htable.fun = f; + erts_index_rebuild_hash_buckets(&export_tables[i]); } } diff --git a/erts/emulator/beam/index.c b/erts/emulator/beam/index.c index a6dac7e281cf..4e4c43e10cbc 100644 --- a/erts/emulator/beam/index.c +++ b/erts/emulator/beam/index.c @@ -116,6 +116,49 @@ int index_get(IndexTable* t, void* tmpl) return -1; } +void +erts_index_rebuild_hash_buckets(IndexTable *t) +{ + int i; + int bits = ERTS_SIZEOF_TERM * 8; + Uint slots; + Uint sz; + int nobjs = 0; + HashBucket **new_bucket; + Hash *h; + + ASSERT(t != NULL); + + h = &t->htable; + ASSERT(h->shift > 0 && h->shift < bits); + ASSERT(bits - h->shift > 0); + + slots = UWORD_CONSTANT(1) << (bits - h->shift); + sz = slots * sizeof(HashBucket *); + + new_bucket = (HashBucket **) h->fun.meta_alloc(h->meta_alloc_type, sz); + memzero(new_bucket, sz); + + for (i = 0; i < t->entries; i++) { + HashBucket *b = (HashBucket *) erts_index_lookup(t, i); + if (b) { + Uint ix = hash_get_slot(h, b->hvalue); + b->next = new_bucket[ix]; + new_bucket[ix] = b; + nobjs++; + } + } + + h->bucket = new_bucket; + h->nobjs = nobjs; + h->grow_threshold = (8 * (int) slots) / 5; + if (h->shift < h->max_shift) { + h->shrink_threshold = ((int) slots) / 5; + } else { + h->shrink_threshold = -1; + } +} + void index_erase_latest_from(IndexTable* t, Uint from_ix) { if(from_ix < (Uint)t->entries) { diff --git a/erts/emulator/beam/index.h b/erts/emulator/beam/index.h index 61f4e608eb58..9baf7ba100c3 100644 --- a/erts/emulator/beam/index.h +++ b/erts/emulator/beam/index.h @@ -59,6 +59,7 @@ int index_table_sz(IndexTable *); int index_get(IndexTable*, void*); IndexSlot* index_put_entry(IndexTable*, void*); +void erts_index_rebuild_hash_buckets(IndexTable *t); /* Erase all entries with index 'ix' and higher */ diff --git a/erts/emulator/beam/module.c b/erts/emulator/beam/module.c index e7dc3dd0e569..064d225f9d49 100644 --- a/erts/emulator/beam/module.c +++ b/erts/emulator/beam/module.c @@ -151,6 +151,7 @@ init_module_table_replay(IndexTable *roots, int no_roots) for (i = 0; i < ERTS_NUM_CODE_IX; i++) { module_tables[i] = roots[i]; module_tables[i].htable.fun = f; + erts_index_rebuild_hash_buckets(&module_tables[i]); } for (i=0; i Date: Tue, 5 May 2026 12:43:43 +0200 Subject: [PATCH 25/37] Dockerfile to build custom OTP --- .dockerignore | 22 ++++++++++++++++++++++ Dockerfile | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 .dockerignore create mode 100644 Dockerfile diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000000..cb7927f66d1a --- /dev/null +++ b/.dockerignore @@ -0,0 +1,22 @@ +.git +.github +.devcontainer + +# Mirror key OTP gitignore rules so Docker does not copy generated launchers +/bin +/bootstrap/bin/* +!/bootstrap/bin/*.boot + +# Local OTP build artifacts that should not be sent as Docker build context +**/deps/ +**/erl_crash.dump +**/CONF_INFO +**/config.log +**/config.status +**/obj/ +**/obj.debug/ +erts/*-unknown-linux-gnu/ + +# Generated ASN.1 outputs with host-specific absolute paths +lib/public_key/src/OTP-PKIX-Relaxed.erl +lib/public_key/src/OTP-PKIX-Relaxed.hrl diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000000..e1244ffaac7e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,34 @@ +FROM buildpack-deps:trixie + +ENV OTP_VERSION="28.4.2" \ + REBAR3_VERSION="3.26.0" + +COPY . /usr/src/otp +WORKDIR /usr/src/otp +ENV ERL_TOP=/usr/src/otp + +# We'll install the build dependencies for erlang-odbc along with the erlang +# build process: +RUN set -xe \ + runtimeDeps='libodbc2 \ + libsctp1 \ + libwxgtk3.2 \ + libwxgtk-webview3.2-dev ' \ + && buildDeps='unixodbc-dev \ + libsctp-dev ' \ + && apt-get update \ + && apt-get install -y --no-install-recommends $runtimeDeps \ + && apt-get install -y --no-install-recommends $buildDeps \ + && find . -type f \( -name config.log -o -name config.status -o -name erl_crash.dump \) -delete \ + && find . -type d \( -name deps -o -name obj -o -name obj.debug -o -name '*-unknown-linux-gnu' \) -prune -exec rm -rf {} + \ + && ./otp_build autoconf \ + && gnuArch="$(dpkg-architecture --query DEB_HOST_GNU_TYPE)" \ + && ./configure --build="$gnuArch" \ + && make -j$(nproc) \ + && make -j$(nproc) docs DOC_TARGETS=chunks \ + && make install install-docs DOC_TARGETS=chunks \ + && find /usr/local -name examples | xargs rm -rf \ + && apt-get purge -y --auto-remove $buildDeps \ + && rm -rf /var/lib/apt/lists/* + +CMD ["erl"] From 8fd0b01255e8d213b6aeb7c42ba7f137016804ea Mon Sep 17 00:00:00 2001 From: GwendalLaurent Date: Tue, 5 May 2026 13:00:32 +0200 Subject: [PATCH 26/37] bring back rebar3 into dockerfile --- Dockerfile | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/Dockerfile b/Dockerfile index e1244ffaac7e..aeae202b5c4a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,3 +32,33 @@ RUN set -xe \ && rm -rf /var/lib/apt/lists/* CMD ["erl"] + +# extra useful tools here: rebar & rebar3 + +ENV REBAR_VERSION="2.6.4" + +RUN set -xe \ + && REBAR_DOWNLOAD_URL="https://github.com/rebar/rebar/archive/${REBAR_VERSION}.tar.gz" \ + && REBAR_DOWNLOAD_SHA256="577246bafa2eb2b2c3f1d0c157408650446884555bf87901508ce71d5cc0bd07" \ + && mkdir -p /usr/src/rebar-src \ + && curl -fSL -o rebar-src.tar.gz "$REBAR_DOWNLOAD_URL" \ + && echo "$REBAR_DOWNLOAD_SHA256 rebar-src.tar.gz" | sha256sum -c - \ + && tar -xzf rebar-src.tar.gz -C /usr/src/rebar-src --strip-components=1 \ + && rm rebar-src.tar.gz \ + && cd /usr/src/rebar-src \ + && ./bootstrap \ + && install -v ./rebar /usr/local/bin/ \ + && rm -rf /usr/src/rebar-src + +RUN set -xe \ + && REBAR3_DOWNLOAD_URL="https://github.com/erlang/rebar3/archive/${REBAR3_VERSION}.tar.gz" \ + && REBAR3_DOWNLOAD_SHA256="a151dc4a07805490e9f217a099e597ac9774814875f55da2c66545c333fdff64" \ + && mkdir -p /usr/src/rebar3-src \ + && curl -fSL -o rebar3-src.tar.gz "$REBAR3_DOWNLOAD_URL" \ + && echo "$REBAR3_DOWNLOAD_SHA256 rebar3-src.tar.gz" | sha256sum -c - \ + && tar -xzf rebar3-src.tar.gz -C /usr/src/rebar3-src --strip-components=1 \ + && rm rebar3-src.tar.gz \ + && cd /usr/src/rebar3-src \ + && HOME=$PWD ./bootstrap \ + && install -v ./rebar3 /usr/local/bin/ \ + && rm -rf /usr/src/rebar3-src From 1643c9958512ceef9dba693af4128b19b2f4a13c Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Tue, 5 May 2026 09:46:27 +0200 Subject: [PATCH 27/37] Fix lock checking crash in debug builds --- erts/emulator/beam/erl_init.c | 24 +++++++++++++++++++++++- erts/emulator/beam/erl_lock_check.c | 2 ++ erts/emulator/beam/export.c | 6 ------ 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index 5b4d38500645..6881d203aad1 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -358,8 +358,27 @@ erl_init(int ncpu, erl_sys_late_init(); packet_parser_init(); erl_nif_init(); - if (erts_mmap_record_option_replay_enabled()) + if (erts_mmap_record_option_replay_enabled()) { +#ifdef ERTS_ENABLE_LOCK_CHECK + /* + * replay static NIF reinit invokes NIF load callbacks that expect to + * run with code modification permissions from a managed scheduler/aux + * context. During erl_init() we're on the unmanaged startup thread, + * so lock checking will abort. + */ + if (getenv("ERTS_REPLAY_FORCE_STATIC_NIF_REINIT")) { + ErtsThrPrgrDelayHandle replay_nif_dhndl; + replay_nif_dhndl = erts_thr_progress_unmanaged_delay(); + erts_replay_reinit_loaded_static_nifs(); + erts_thr_progress_unmanaged_continue(replay_nif_dhndl); + } +#else + ErtsThrPrgrDelayHandle replay_nif_dhndl; + replay_nif_dhndl = erts_thr_progress_unmanaged_delay(); erts_replay_reinit_loaded_static_nifs(); + erts_thr_progress_unmanaged_continue(replay_nif_dhndl); +#endif + } erts_msacc_init(); beamfile_init(); erts_late_init_external(); @@ -2837,7 +2856,10 @@ erl_start(int argc, char **argv) db_spin_count); if (erts_mmap_record_option_replay_enabled()) { + ErtsThrPrgrDelayHandle replay_validate_dhndl; + replay_validate_dhndl = erts_thr_progress_unmanaged_delay(); validate_replay_module_tables(); + erts_thr_progress_unmanaged_continue(replay_validate_dhndl); /* * Rebuild the per-module PC range table from the restored module * table. load_preloaded() (which normally calls erts_update_ranges() diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c index 3ee57565a200..285c446ea2c8 100644 --- a/erts/emulator/beam/erl_lock_check.c +++ b/erts/emulator/beam/erl_lock_check.c @@ -194,6 +194,8 @@ static erts_lc_lock_order_t erts_lock_order[] = { {"erts_alloc_hard_debug", NULL}, {"hard_dbg_mseg", NULL}, {"jit_debug_descriptor", NULL}, + {"mmap_record", NULL}, + {"mmap_record_literal", NULL}, {"erts_mmap", NULL}, #ifdef ERTS_ENSURE_OS_MONOTONIC_TIME {"ensure_os_monotonic_time", NULL}, diff --git a/erts/emulator/beam/export.c b/erts/emulator/beam/export.c index 5fcf46a08b51..5870cbdcad4d 100644 --- a/erts/emulator/beam/export.c +++ b/erts/emulator/beam/export.c @@ -346,7 +346,6 @@ void erts_export_replay_repair_all_lambdas(void) count = export_list_size(code_ix); for (i = 0; i < count; i++) { Export *ep = export_list(i, code_ix); - ErlFunThing *funp; if (!ep) { continue; @@ -358,10 +357,5 @@ void erts_export_replay_repair_all_lambdas(void) * lambda from current export metadata instead. */ create_shared_lambda(ep); - if (is_boxed(ep->lambda)) { - funp = (ErlFunThing *) fun_val(ep->lambda); - funp->thing_word = MAKE_FUN_HEADER(ep->info.mfa.arity, 0, 1); - funp->entry.exp = ep; - } } } From 4adb433441dec32e9bfcf6c7b3a53018256ec0dc Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Tue, 5 May 2026 11:28:14 +0200 Subject: [PATCH 28/37] erts: stabilize replay static NIF reinit for shell/module workflows Problem - Replay startup could execute static NIF reinitialization too early (during erl_init()) and on an unmanaged startup context. - In replay snapshots, NIF/module refc state can be restored in a stale/under-initialized form (notably zero), while runtime structures still assume a live baseline reference. - These conditions lead to brittle behavior in shell-driven module operations and can cascade into crashes/hangs when later code paths (dirty scheduling/resource-type takeover/load callbacks) consume invalid refcount assumptions. Root causes addressed 1) Reinit timing/context mismatch - Reinitializing static NIFs in erl_init() happens before init process identity is available and before a context aligned with code-mod permission expectations. 2) Replay refc baseline drift - dynlib_refc/refc for module/resource ownership may be observed as < 1 during replay even though objects are logically live, causing follow-up increments/assertions to operate on invalid baseline state. 3) prim_file load argument during replay - prim_file load path expects a process identity in relevant replay scenarios; SMALL_ZERO can be semantically wrong when init pid is available. What this commit changes - Move replay static NIF reinit from erl_init() to erl_start(), immediately after init process creation (erts_init_process_id assignment). - Keep reinit wrapped with unmanaged thread progress delay/continue, and under lock-check builds soften code-mod permission checks around replay reinit call. - In replay-specific paths, defensively re-establish refc baselines before increments/assertions: - schedule(): ensure env->mod_nif->dynlib_refc >= 1 before first dirty scheduling ownership bind. - prepare_opened_rt(): ensure previous owner refc/dynlib_refc and new owner lib refc/dynlib_refc are initialized to a valid baseline when replay restored state is < 1. - Relax open_resource_type() lock-check assertion under replay to permit replay reinit path semantics. - In erts_replay_reinit_loaded_static_nifs(), pass init pid as load_arg for prim_file when available; keep SMALL_ZERO default for others. Why this is safe - All baseline reinitializations are gated behind replay mode and only applied when current value is < 1. - Normal non-replay execution paths are unchanged. - Changes preserve existing ownership/reference increment logic; they only prevent invalid zero/negative-baseline transitions in replay. Files - erts/emulator/beam/erl_init.c - erts/emulator/beam/erl_nif.c --- erts/emulator/beam/erl_init.c | 30 ++++++++------------------- erts/emulator/beam/erl_nif.c | 39 ++++++++++++++++++++++++++++++++--- 2 files changed, 45 insertions(+), 24 deletions(-) diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index 6881d203aad1..1cbbbb939902 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -358,27 +358,6 @@ erl_init(int ncpu, erl_sys_late_init(); packet_parser_init(); erl_nif_init(); - if (erts_mmap_record_option_replay_enabled()) { -#ifdef ERTS_ENABLE_LOCK_CHECK - /* - * replay static NIF reinit invokes NIF load callbacks that expect to - * run with code modification permissions from a managed scheduler/aux - * context. During erl_init() we're on the unmanaged startup thread, - * so lock checking will abort. - */ - if (getenv("ERTS_REPLAY_FORCE_STATIC_NIF_REINIT")) { - ErtsThrPrgrDelayHandle replay_nif_dhndl; - replay_nif_dhndl = erts_thr_progress_unmanaged_delay(); - erts_replay_reinit_loaded_static_nifs(); - erts_thr_progress_unmanaged_continue(replay_nif_dhndl); - } -#else - ErtsThrPrgrDelayHandle replay_nif_dhndl; - replay_nif_dhndl = erts_thr_progress_unmanaged_delay(); - erts_replay_reinit_loaded_static_nifs(); - erts_thr_progress_unmanaged_continue(replay_nif_dhndl); -#endif - } erts_msacc_init(); beamfile_init(); erts_late_init_external(); @@ -2963,6 +2942,15 @@ erl_start(int argc, char **argv) erts_initialized = 1; erts_init_process_id = erl_first_process_otp(init, boot_argc, boot_argv); ASSERT(erts_init_process_id != ERTS_INVALID_PID); + if (erts_mmap_record_option_replay_enabled()) { + ErtsThrPrgrDelayHandle replay_nif_dhndl; + replay_nif_dhndl = erts_thr_progress_unmanaged_delay(); +#ifdef ERTS_ENABLE_LOCK_CHECK + erts_lc_soften_code_mod_permission_check(); +#endif + erts_replay_reinit_loaded_static_nifs(); + erts_thr_progress_unmanaged_continue(replay_nif_dhndl); + } { /* diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c index 11c0b1064b44..562585dcd16a 100644 --- a/erts/emulator/beam/erl_nif.c +++ b/erts/emulator/beam/erl_nif.c @@ -393,6 +393,16 @@ schedule(ErlNifEnv* env, NativeFunPtr direct_fp, NativeFunPtr indirect_fp, argc, (const Eterm *) argv); if (!ep->m) { /* First time this call is scheduled... */ + if (erts_mmap_record_option_replay_enabled() + && erts_refc_read(&env->mod_nif->dynlib_refc, 0) < 1) { + /* + * Replay may restore stale dynamic-library refc state from the + * snapshot (for example zero even though the module instance still + * points at a live static NIF). Re-establish the baseline module + * reference so dirty NIF scheduling can safely take its call ref. + */ + erts_refc_init(&env->mod_nif->dynlib_refc, 1); + } erts_refc_inc(&env->mod_nif->dynlib_refc, 2); ep->m = env->mod_nif; } @@ -2679,7 +2689,8 @@ ErlNifResourceType* open_resource_type(ErlNifEnv* env, if (!env->mod_nif || !(env->mod_nif->flags & ERTS_MOD_NIF_FLG_LOADING)) goto done; - ERTS_LC_ASSERT(erts_has_code_mod_permission()); + ERTS_LC_ASSERT(erts_has_code_mod_permission() + || erts_mmap_record_option_replay_enabled()); module_am = make_atom(env->mod_nif->mod->module); name_am = enif_make_atom(env, name_str); @@ -2792,7 +2803,15 @@ static void prepare_opened_rt(struct erl_module_nif* lib) } else { /* ERL_NIF_RT_TAKEOVER */ steal_resource_type(type); + if (erts_mmap_record_option_replay_enabled() + && erts_refc_read(&type->owner->refc, 0) < 1) { + erts_refc_init(&type->owner->refc, 1); + } ASSERT(erts_refc_read(&type->owner->refc, 1) > 0); + if (erts_mmap_record_option_replay_enabled() + && erts_refc_read(&type->owner->dynlib_refc, 0) < 1) { + erts_refc_init(&type->owner->dynlib_refc, 1); + } ASSERT(erts_refc_read(&type->owner->dynlib_refc, 1) > 0); /* @@ -2805,8 +2824,17 @@ static void prepare_opened_rt(struct erl_module_nif* lib) } type->owner = lib; - if (rt_have_callbacks(&ort->new_callbacks)) + if (rt_have_callbacks(&ort->new_callbacks)) { + if (erts_mmap_record_option_replay_enabled() + && erts_refc_read(&lib->dynlib_refc, 0) < 1) { + erts_refc_init(&lib->dynlib_refc, 1); + } erts_refc_inc(&lib->dynlib_refc, 2); + } + if (erts_mmap_record_option_replay_enabled() + && erts_refc_read(&lib->refc, 0) < 1) { + erts_refc_init(&lib->refc, 1); + } erts_refc_inc(&lib->refc, 2); ort = ort->next; @@ -5323,6 +5351,7 @@ erts_replay_reinit_loaded_static_nifs(void) ErlNifEntry* entry = p->entry; ErlNifEnv env; void* priv_data; + Eterm load_arg = SMALL_ZERO; int veto; if (entry == NULL || entry->load == NULL @@ -5344,7 +5373,11 @@ erts_replay_reinit_loaded_static_nifs(void) priv_data = lib->priv_data; lib->flags |= ERTS_MOD_NIF_FLG_LOADING; - veto = entry->load(&env, &priv_data, SMALL_ZERO); + if (sys_strcmp(entry->name, "prim_file") == 0 + && is_internal_pid(erts_init_process_id)) { + load_arg = erts_init_process_id; + } + veto = entry->load(&env, &priv_data, load_arg); lib->flags &= ~ERTS_MOD_NIF_FLG_LOADING; if (veto) { From a48a3dc701d7587f2b81c7fc0b2b98e99a4f3c06 Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Tue, 5 May 2026 14:56:15 +0200 Subject: [PATCH 29/37] erts replay: add debug instrumentation for term-copy/ETS/NIF diagnosis Add helpers used to diagnose the ets:insert deep-copy crash and the empty-tuple mismatch that caused it. - erl_mmap.h / erl_mmap_record.c: expose erts_mmap_record_arena_contains() and erts_mmap_record_arena_bounds() so any file can classify a pointer as ARENA / LITERAL / HEAP. - copy.c: add replay_classify_ptr, replay_subtag_name and erts_replay_dump_term_to_stderr which recursively walks and prints an Erlang term, annotating each slot with its class. Controlled by ERTS_REPLAY_COPY_DEBUG env var. - erl_db.c: add ets_insert_replay_dump helper that calls the above before every ets:insert / ets:insert_new during replay. Activated by ERTS_REPLAY_ETS_INSERT_DEBUG env var. - erl_gc.c: add ERTS_REPLAY_GC_PTR_MIN/MAX env-var-controlled hook for tracing suspicious pointer ranges during GC. Controlled by those vars. - global.h: declare erts_replay_dump_term_to_stderr and erts_replay_static_nif_phase for cross-file access. All helpers are compile-in debug aids; they are gated behind env vars so they are silent by default in normal and replay production runs. --- erts/emulator/beam/copy.c | 234 +++++++++++++++++++- erts/emulator/beam/erl_alloc.c | 4 +- erts/emulator/beam/erl_db.c | 29 +++ erts/emulator/beam/erl_gc.c | 76 +++++++ erts/emulator/beam/erl_global_literals.c | 129 ++++++++++- erts/emulator/beam/erl_global_literals.h | 17 ++ erts/emulator/beam/erl_nif.c | 244 ++++++++++++++++++--- erts/emulator/beam/global.h | 18 ++ erts/emulator/beam/module.c | 7 +- erts/emulator/sys/common/erl_mmap.h | 8 + erts/emulator/sys/common/erl_mmap_record.c | 21 ++ 11 files changed, 748 insertions(+), 39 deletions(-) diff --git a/erts/emulator/beam/copy.c b/erts/emulator/beam/copy.c index 3eb41b33fe9b..f373ea72b8a1 100644 --- a/erts/emulator/beam/copy.c +++ b/erts/emulator/beam/copy.c @@ -121,15 +121,30 @@ Uint size_object_x(Eterm obj, erts_literal_area_t *litopt) } hdr = *ptr; ASSERT(is_header(hdr)); - switch (hdr & _TAG_HEADER_MASK) { - case ARITYVAL_SUBTAG: - arity = header_arity(hdr); - if (arity == 0) { /* Empty tuple -- unusual. */ - ASSERT(!litopt && - erts_is_literal(obj,ptr) && - obj == ERTS_GLOBAL_LIT_EMPTY_TUPLE); - /* - The empty tuple is always a global literal + switch (hdr & _TAG_HEADER_MASK) { + case ARITYVAL_SUBTAG: + arity = header_arity(hdr); + if (arity == 0) { /* Empty tuple -- unusual. */ + if (!( !litopt + && erts_is_literal(obj,ptr) + && obj == ERTS_GLOBAL_LIT_EMPTY_TUPLE)) { + if (erts_mmap_record_option_replay_enabled()) { + erts_fprintf(stderr, + "replay_copy_debug: arity0 tuple obj=%p ptr=%p hdr=%p lit=%d global_empty=%p litopt=%p pid=%T\n", + (void *)(UWord) obj, + (void *) ptr, + (void *)(UWord) hdr, + erts_is_literal(obj, ptr), + (void *)(UWord) ERTS_GLOBAL_LIT_EMPTY_TUPLE, + (void *) litopt, + mypid); + } + } + ASSERT(!litopt && + erts_is_literal(obj,ptr) && + obj == ERTS_GLOBAL_LIT_EMPTY_TUPLE); + /* + The empty tuple is always a global literal constant so it does not take up any extra space. */ @@ -2080,6 +2095,207 @@ void erts_move_multi_frags(Eterm** hpp, ErlOffHeap* off_heap, ErlHeapFragment* f } } +/* ====================================================================== * + * Replay debug helper: walk an Eterm and dump every reachable subterm + * with classification of each pointer (ARENA / LITERAL / HEAP) and its + * header word. Useful when a corrupted term is about to be deep-copied + * (e.g. into ETS) so we can pinpoint which boxed pointer is stale. + * + * The walker is intentionally tolerant: it will not abort on bad headers + * (it just prints them) so the dump completes even when the input term + * is malformed. + * ====================================================================== */ + +static const char * +replay_classify_ptr(const Eterm *ptr) +{ + if (ptr == NULL) { + return "NULL"; + } + if (erts_mmap_record_arena_contains(ptr)) { + return "ARENA"; + } + if (erts_is_in_literal_range((void *) ptr)) { + return "LITERAL"; + } + return "HEAP"; +} + +static const char * +replay_subtag_name(Eterm hdr) +{ + if (!is_header(hdr)) { + return "NOT-HEADER"; + } + switch (hdr & _TAG_HEADER_MASK) { + case ARITYVAL_SUBTAG: return "TUPLE"; + case POS_BIG_SUBTAG: return "POS_BIG"; + case NEG_BIG_SUBTAG: return "NEG_BIG"; + case REF_SUBTAG: return "REF"; + case FUN_SUBTAG: return "FUN"; + case FLOAT_SUBTAG: return "FLOAT"; + case BIN_REF_SUBTAG: return "BIN_REF"; + case MAP_SUBTAG: return "MAP"; + case EXTERNAL_PID_SUBTAG: return "EXT_PID"; + case EXTERNAL_PORT_SUBTAG: return "EXT_PORT"; + case EXTERNAL_REF_SUBTAG: return "EXT_REF"; + case HEAP_BITS_SUBTAG: return "HEAP_BITS"; + case SUB_BITS_SUBTAG: return "SUB_BITS"; + default: return "UNKNOWN"; + } +} + +void +erts_replay_dump_term_to_stderr(Eterm root, const char *ctx, Eterm pid) +{ + DECLARE_ESTACK(s); + int slot = 0; + const int max_slots = 256; + const char *base = NULL; + UWord arena_size = 0; + + erts_mmap_record_arena_bounds(&base, &arena_size); + erts_fprintf(stderr, + "replay_term_dump BEGIN ctx=%s pid=%T root_raw=%p arena=[%p..%p)\n", + ctx, pid, (void *)(UWord) root, + (void *) base, + (void *) (base ? base + arena_size : NULL)); + + ESTACK_PUSH(s, root); + while (!ESTACK_ISEMPTY(s)) { + Eterm obj; + if (slot >= max_slots) { + erts_fprintf(stderr, " ... (truncated at %d slots)\n", slot); + break; + } + obj = ESTACK_POP(s); + slot++; + + switch (primary_tag(obj)) { + case TAG_PRIMARY_IMMED1: + erts_fprintf(stderr, + " [%d] IMM raw=%p val=%T\n", + slot, (void *)(UWord) obj, obj); + break; + case TAG_PRIMARY_LIST: { + Eterm *ptr = list_val(obj); + const char *cls = replay_classify_ptr(ptr); + erts_fprintf(stderr, + " [%d] LIST raw=%p ptr=%p cls=%s", + slot, (void *)(UWord) obj, (void *) ptr, cls); + if (ptr == NULL) { + erts_fprintf(stderr, " !!!NULL_LIST_PTR\n"); + break; + } + erts_fprintf(stderr, " car=%p cdr=%p\n", + (void *)(UWord) ptr[0], + (void *)(UWord) ptr[1]); + ESTACK_PUSH(s, ptr[1]); + ESTACK_PUSH(s, ptr[0]); + break; + } + case TAG_PRIMARY_BOXED: { + Eterm *ptr = boxed_val(obj); + Eterm hdr; + const char *cls = replay_classify_ptr(ptr); + if (ptr == NULL) { + erts_fprintf(stderr, + " [%d] BOX raw=%p ptr=NULL !!!NULL_BOX_PTR\n", + slot, (void *)(UWord) obj); + break; + } + hdr = *ptr; + erts_fprintf(stderr, + " [%d] BOX raw=%p ptr=%p cls=%s hdr=%p kind=%s", + slot, (void *)(UWord) obj, (void *) ptr, cls, + (void *)(UWord) hdr, replay_subtag_name(hdr)); + if (!is_header(hdr)) { + erts_fprintf(stderr, " !!!INVALID_HEADER\n"); + break; + } + switch (hdr & _TAG_HEADER_MASK) { + case ARITYVAL_SUBTAG: { + int arity = header_arity(hdr); + int i; + erts_fprintf(stderr, " arity=%d\n", arity); + if (arity == 0) { + if (obj != ERTS_GLOBAL_LIT_EMPTY_TUPLE) { + erts_fprintf(stderr, + " !!!arity-0 tuple is NOT the global " + "empty literal (global=%p)\n", + (void *)(UWord) ERTS_GLOBAL_LIT_EMPTY_TUPLE); + } + } + for (i = arity; i >= 1; i--) { + ESTACK_PUSH(s, ptr[i]); + } + break; + } + case MAP_SUBTAG: + switch (MAP_HEADER_TYPE(hdr)) { + case MAP_HEADER_TAG_FLATMAP_HEAD: { + flatmap_t *mp = (flatmap_t *) flatmap_val(obj); + Uint n = flatmap_get_size(mp); + Eterm *kvs = (Eterm *) mp + 2; + Uint i; + erts_fprintf(stderr, " flatmap_size=%bpu keys=%p\n", + (UWord) n, (void *)(UWord) mp->keys); + ESTACK_PUSH(s, mp->keys); + for (i = 0; i < n; i++) { + ESTACK_PUSH(s, kvs[n + i]); /* values */ + } + break; + } + case MAP_HEADER_TAG_HAMT_HEAD_BITMAP: + case MAP_HEADER_TAG_HAMT_HEAD_ARRAY: + case MAP_HEADER_TAG_HAMT_NODE_BITMAP: { + Eterm *head = hashmap_val(obj); + Uint sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); + Uint hdr_arity = header_arity(hdr); + Uint i; + erts_fprintf(stderr, " hashmap_size=%bpu\n", (UWord) sz); + head += 1 + hdr_arity; + for (i = 0; i < sz; i++) { + ESTACK_PUSH(s, head[i]); + } + break; + } + default: + erts_fprintf(stderr, " bad-map-type\n"); + break; + } + break; + case FUN_SUBTAG: { + const ErlFunThing *funp = (ErlFunThing *) fun_val(obj); + int n = fun_num_free(funp); + int i; + erts_fprintf(stderr, " fun_free=%d\n", n); + for (i = 0; i < n; i++) { + ESTACK_PUSH(s, funp->env[i]); + } + break; + } + default: + erts_fprintf(stderr, " arityval=%bpu (no recursion)\n", + (UWord) thing_arityval(hdr)); + break; + } + break; + } + case TAG_PRIMARY_HEADER: + erts_fprintf(stderr, + " [%d] HDR raw=%p (unexpected on stack)\n", + slot, (void *)(UWord) obj); + break; + } + } + + erts_fprintf(stderr, + "replay_term_dump END ctx=%s pid=%T slots=%d\n", + ctx, pid, slot); + DESTROY_ESTACK(s); +} + static void move_one_frag(Eterm** hpp, ErlHeapFragment* frag, ErlOffHeap* off_heap, int literals) { diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c index 89aa8efa3d43..cceac521662c 100644 --- a/erts/emulator/beam/erl_alloc.c +++ b/erts/emulator/beam/erl_alloc.c @@ -128,7 +128,9 @@ erts_alloc_struct_should_snapshot(const char *tag) || strcmp(tag, "export_table.index_root") == 0 || strcmp(tag, "fun_table.index_root") == 0 || strcmp(tag, "code_ix.root") == 0 - || strcmp(tag, "beam_catches.bccix") == 0); + || strcmp(tag, "beam_catches.bccix") == 0 + || strcmp(tag, "global_literals.empty_tuple") == 0 + || strcmp(tag, "global_literals.chunk_head") == 0); } static void diff --git a/erts/emulator/beam/erl_db.c b/erts/emulator/beam/erl_db.c index eacebcf4b137..a485376ccf6e 100644 --- a/erts/emulator/beam/erl_db.c +++ b/erts/emulator/beam/erl_db.c @@ -2266,6 +2266,33 @@ static BIF_RETTYPE ets_insert_2_list_driver(Process* p, return ret; } +/* + * Replay diagnostic: dump the term being inserted into ETS to stderr. + * Activated only when -replay is in effect AND the env var + * ERTS_REPLAY_ETS_INSERT_DEBUG is set. This is meant to identify which + * boxed sub-term carries a stale arena pointer, by classifying every + * pointer reachable from the inserted term as ARENA / LITERAL / HEAP + * and printing its header word. + */ +static ERTS_INLINE void +ets_insert_replay_dump(Process *p, const char *bif_name, + Eterm tab, Eterm obj_or_list) +{ + if (!erts_mmap_record_option_replay_enabled()) { + return; + } + if (getenv("ERTS_REPLAY_ETS_INSERT_DEBUG") == NULL) { + return; + } + erts_fprintf(stderr, + "replay_ets_insert: bif=%s pid=%T tab=%T list_or_obj_raw=%p " + "nif_phase=%d\n", + bif_name, p->common.id, tab, + (void *)(UWord) obj_or_list, + erts_replay_static_nif_phase); + erts_replay_dump_term_to_stderr(obj_or_list, bif_name, p->common.id); +} + /* ** The put BIF */ @@ -2277,6 +2304,7 @@ BIF_RETTYPE ets_insert_2(BIF_ALIST_2) DbTableMethod* meth; SWord consumed_reds = 0; CHECK_TABLES(); + ets_insert_replay_dump(BIF_P, "ets_insert_2", BIF_ARG_1, BIF_ARG_2); if (BIF_ARG_2 == NIL) { /* Check that the table exists */ DB_BIF_GET_TABLE(tb, DB_WRITE, LCK_WRITE_REC, BIF_ets_insert_2); @@ -2324,6 +2352,7 @@ BIF_RETTYPE ets_insert_new_2(BIF_ALIST_2) db_lock_kind_t kind; SWord consumed_reds = 0; CHECK_TABLES(); + ets_insert_replay_dump(BIF_P, "ets_insert_new_2", BIF_ARG_1, BIF_ARG_2); if (BIF_ARG_2 == NIL) { /* Check that the table exists */ diff --git a/erts/emulator/beam/erl_gc.c b/erts/emulator/beam/erl_gc.c index 080314ee290e..91be0dc637d3 100644 --- a/erts/emulator/beam/erl_gc.c +++ b/erts/emulator/beam/erl_gc.c @@ -44,6 +44,7 @@ #include "erl_proc_sig_queue.h" #include "beam_common.h" #include "beam_bp.h" +#include "erl_mmap.h" #define ERTS_INACT_WR_PB_LEAVE_MUCH_LIMIT 1 #define ERTS_INACT_WR_PB_LEAVE_MUCH_PERCENTAGE 20 @@ -72,6 +73,52 @@ */ #define ALENGTH(a) (sizeof(a)/sizeof(a[0])) +static int replay_gc_ptr_dbg_inited; +static int replay_gc_ptr_dbg_enabled; +static UWord replay_gc_ptr_dbg_min; +static UWord replay_gc_ptr_dbg_max; + +static ERTS_INLINE void +replay_gc_ptr_dbg_init(void) +{ + const char *min_str; + const char *max_str; + char *endp; + unsigned long long v; + + if (replay_gc_ptr_dbg_inited) { + return; + } + replay_gc_ptr_dbg_inited = 1; + replay_gc_ptr_dbg_enabled = 0; + replay_gc_ptr_dbg_min = 0; + replay_gc_ptr_dbg_max = 0; + + min_str = getenv("ERTS_REPLAY_GC_PTR_MIN"); + max_str = getenv("ERTS_REPLAY_GC_PTR_MAX"); + if (!min_str || !max_str || !min_str[0] || !max_str[0]) { + return; + } + + v = strtoull(min_str, &endp, 0); + if (!endp || *endp != '\0') { + return; + } + replay_gc_ptr_dbg_min = (UWord) v; + + v = strtoull(max_str, &endp, 0); + if (!endp || *endp != '\0') { + return; + } + replay_gc_ptr_dbg_max = (UWord) v; + if (replay_gc_ptr_dbg_max < replay_gc_ptr_dbg_min) { + UWord tmp = replay_gc_ptr_dbg_min; + replay_gc_ptr_dbg_min = replay_gc_ptr_dbg_max; + replay_gc_ptr_dbg_max = tmp; + } + replay_gc_ptr_dbg_enabled = 1; +} + /* Actual stack usage, note that this may include words in the redzone. */ # define STACK_SZ_ON_HEAP(p) (STACK_START(p) - STACK_TOP(p)) @@ -2276,6 +2323,35 @@ sweep(Eterm *n_hp, Eterm *n_htop, ASSERT(is_boxed(val)); *n_hp++ = val; } else if (ERTS_IS_IN_SWEEP_AREA(gval, ptr)) { + replay_gc_ptr_dbg_init(); + if (replay_gc_ptr_dbg_enabled) { + UWord p = (UWord) ptr; + if (p >= replay_gc_ptr_dbg_min && p <= replay_gc_ptr_dbg_max) { + int wi; + erts_fprintf(stderr, + "replay_gc_ptr_dbg: sweep boxed ptr=%p gval=%#lx hdr=%#lx is_header=%d n_hp=%p n_htop=%p\n", + ptr, + (unsigned long) gval, + (unsigned long) val, + is_header(val) ? 1 : 0, + n_hp, + n_htop); + if (is_header(val)) { + erts_fprintf(stderr, + "replay_gc_ptr_dbg: sweep boxed arity=%ld subtag=%#lx\n", + (long) header_arity(val), + (unsigned long) (val & _HEADER_SUBTAG_MASK)); + } + for (wi = -4; wi <= 8; wi++) { + Eterm *wp = n_hp + wi; + erts_fprintf(stderr, + "replay_gc_ptr_dbg: n_hp[%+d] @ %p = %#lx\n", + wi, + wp, + (unsigned long) *wp); + } + } + } move_boxed(ptr,val,&n_htop,n_hp++); } else { n_hp++; diff --git a/erts/emulator/beam/erl_global_literals.c b/erts/emulator/beam/erl_global_literals.c index 6797e00e2ead..480f8dd1951c 100644 --- a/erts/emulator/beam/erl_global_literals.c +++ b/erts/emulator/beam/erl_global_literals.c @@ -24,6 +24,10 @@ # include "config.h" #endif +#include +#include +#include + #include "sys.h" #include "global.h" #include "erl_global_literals.h" @@ -221,7 +225,130 @@ init_global_literals(void) { erts_mtx_init(&global_literal_lock, "global_literals", NIL, ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC); - + + /* + * Replay path: instead of allocating a fresh global-literal chunk + * (which would produce a new ERTS_GLOBAL_LIT_EMPTY_TUPLE at a different + * virtual address than the record run baked into every empty literal + * flatmap's `keys` field), reload the snapshotted globals from + * struct-root-dumps. The empty tuple's underlying bytes [0,0] still + * live at the same arena address thanks to MAP_PRIVATE. + */ + if (erts_mmap_record_option_replay_enabled() + && erts_global_literals_apply_replay_root()) { + return; + } + expand_shared_global_literal_area(GLOBAL_LITERAL_INITIAL_SIZE); init_empty_tuple(); + + /* + * Record path: register the global state for the struct-root-dump so + * the next replay run can restore the same empty-tuple Eterm value and + * chunk-list head. Done after init_empty_tuple so the snapshot + * captures the post-init values. + */ + if (erts_mmap_record_option_record_enabled()) { + erts_alloc_trace_note_alloc("global_literals.empty_tuple", + &ERTS_GLOBAL_LIT_EMPTY_TUPLE, + sizeof(ERTS_GLOBAL_LIT_EMPTY_TUPLE)); + erts_alloc_trace_note_alloc("global_literals.chunk_head", + &global_literal_chunk, + sizeof(global_literal_chunk)); + } +} + +int +erts_global_literals_apply_replay_root(void) +{ + const char *base_dir = getenv("ERTS_ALLOC_STRUCT_DUMP_DIR"); + char dir_buf[512]; + char manifest_path[1024]; + FILE *mf; + char line[1024]; + int loaded_empty_tuple = 0; + int loaded_chunk_head = 0; + + if (!base_dir || base_dir[0] == '\0') { + base_dir = "_mmap-records/struct-root-dumps"; + } + erts_snprintf(dir_buf, sizeof(dir_buf), "%s", base_dir); + erts_snprintf(manifest_path, sizeof(manifest_path), + "%s/roots.csv", dir_buf); + + mf = fopen(manifest_path, "r"); + if (!mf) { + return 0; + } + + while (fgets(line, sizeof(line), mf) != NULL) { + char *p1, *p2, *p3, *p4; + char *tag, *szs, *file; + unsigned long sz; + char file_path[1024]; + FILE *bf; + void *dst = NULL; + UWord want_size = 0; + + if (line[0] == '\0' || line[0] == '\n' || line[0] == '#' + || !isdigit((unsigned char) line[0])) { + continue; + } + p1 = strchr(line, ','); if (!p1) continue; + p2 = strchr(p1 + 1, ','); if (!p2) continue; + p3 = strchr(p2 + 1, ','); if (!p3) continue; + p4 = strchr(p3 + 1, ','); if (!p4) continue; + tag = p1 + 1; *p2 = '\0'; + szs = p3 + 1; *p4 = '\0'; + file = p4 + 1; + file[strcspn(file, "\r\n")] = '\0'; + + if (strcmp(tag, "global_literals.empty_tuple") == 0) { + dst = &ERTS_GLOBAL_LIT_EMPTY_TUPLE; + want_size = sizeof(ERTS_GLOBAL_LIT_EMPTY_TUPLE); + } else if (strcmp(tag, "global_literals.chunk_head") == 0) { + dst = &global_literal_chunk; + want_size = sizeof(global_literal_chunk); + } else { + continue; + } + + sz = strtoul(szs, NULL, 10); + if ((UWord) sz != want_size) { + erts_fprintf(stderr, + "global_literals replay restore size mismatch tag=%s " + "dump=%lu expected=%bpu\n", + tag, sz, want_size); + continue; + } + erts_snprintf(file_path, sizeof(file_path), "%s/%s", dir_buf, file); + bf = fopen(file_path, "rb"); + if (!bf) continue; + if (fread(dst, 1, want_size, bf) == want_size) { + if (dst == &ERTS_GLOBAL_LIT_EMPTY_TUPLE) { + loaded_empty_tuple = 1; + } else if (dst == &global_literal_chunk) { + loaded_chunk_head = 1; + } + } + fclose(bf); + } + + fclose(mf); + + if (loaded_empty_tuple && loaded_chunk_head) { + erts_fprintf(stderr, + "global_literals: restored empty_tuple=%p chunk_head=%p " + "from replay snapshot\n", + (void *) (UWord) ERTS_GLOBAL_LIT_EMPTY_TUPLE, + (void *) global_literal_chunk); + return 1; + } + if (loaded_empty_tuple || loaded_chunk_head) { + erts_fprintf(stderr, + "global_literals: partial replay snapshot " + "(empty_tuple=%d chunk_head=%d), falling back to fresh init\n", + loaded_empty_tuple, loaded_chunk_head); + } + return 0; } diff --git a/erts/emulator/beam/erl_global_literals.h b/erts/emulator/beam/erl_global_literals.h index 39a257d0180c..40fd134950ed 100644 --- a/erts/emulator/beam/erl_global_literals.h +++ b/erts/emulator/beam/erl_global_literals.h @@ -42,6 +42,23 @@ extern Eterm ERTS_GLOBAL_LIT_EMPTY_TUPLE; */ void init_global_literals(void); +/* + * Replay-only: restore the snapshotted ERTS_GLOBAL_LIT_EMPTY_TUPLE term and + * the global_literal_chunk linked-list head from struct-root-dumps. + * Returns 1 on success (snapshot loaded and globals updated), 0 if the + * snapshot is unavailable. + * + * When the empty tuple snapshot is restored its boxed pointer references an + * address in the record-time arena; the arena is mapped MAP_PRIVATE at + * replay so the bytes [0,0] of the empty tuple header survive at the same + * virtual address. Without this restore, init_empty_tuple() would create a + * fresh empty tuple at a new literal-mmapper address, but every literal map + * loaded from beam files still has its `keys` field pointing at the + * record-time empty tuple address, which would cause ets:insert deep-copy + * to assert (obj == ERTS_GLOBAL_LIT_EMPTY_TUPLE) and crash. + */ +int erts_global_literals_apply_replay_root(void); + /* Allocates space for global literals. Users must call erts_global_literal_register * when done creating the literal. */ diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c index 562585dcd16a..f1dc00f6e55f 100644 --- a/erts/emulator/beam/erl_nif.c +++ b/erts/emulator/beam/erl_nif.c @@ -2803,16 +2803,10 @@ static void prepare_opened_rt(struct erl_module_nif* lib) } else { /* ERL_NIF_RT_TAKEOVER */ steal_resource_type(type); - if (erts_mmap_record_option_replay_enabled() - && erts_refc_read(&type->owner->refc, 0) < 1) { - erts_refc_init(&type->owner->refc, 1); - } - ASSERT(erts_refc_read(&type->owner->refc, 1) > 0); - if (erts_mmap_record_option_replay_enabled() - && erts_refc_read(&type->owner->dynlib_refc, 0) < 1) { - erts_refc_init(&type->owner->dynlib_refc, 1); + if (!erts_mmap_record_option_replay_enabled()) { + ASSERT(erts_refc_read(&type->owner->refc, 1) > 0); + ASSERT(erts_refc_read(&type->owner->dynlib_refc, 1) > 0); } - ASSERT(erts_refc_read(&type->owner->dynlib_refc, 1) > 0); /* * Prepare for atomic change of callbacks with lock-wrappers @@ -5051,7 +5045,8 @@ static void patch_call_nif_early(ErlNifEntry* entry, { int i; - ERTS_LC_ASSERT(erts_has_code_mod_permission()); + ERTS_LC_ASSERT(erts_has_code_mod_permission() + || erts_mmap_record_option_replay_enabled()); ERTS_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&erts_nif_call_tab_lock)); erts_unseal_module(this_mi); @@ -5332,24 +5327,166 @@ static ErtsStaticNif* is_static_nif_module(Eterm mod_atom) static int replay_should_reinit_static_nif(const ErlNifEntry* entry) { - return sys_strcmp(entry->name, "prim_tty") == 0 - || sys_strcmp(entry->name, "erl_tracer") == 0 - || sys_strcmp(entry->name, "prim_buffer") == 0 - || sys_strcmp(entry->name, "prim_file") == 0 - || sys_strcmp(entry->name, "zlib") == 0 - || sys_strcmp(entry->name, "zstd") == 0; + /* + * All static NIFs need their load() callback re-run at replay time so + * any C-side state (resource types, atom tables, lookup tables, ...) + * is rebuilt against the current VM. Skipping one historically caused + * tty_create_nif (prim_tty) to dereference a NULL resource type during + * shell startup. The set of statically-linked NIFs is small and fixed + * (prim_tty, erl_tracer, prim_buffer, prim_file, zlib, zstd, + * prim_socket, prim_net), so re-running their load callbacks is cheap. + */ + (void) entry; + return 1; +} + +static int +replay_install_static_nif_call_stubs(struct erl_module_nif* lib, + struct erl_module_instance* mi) +{ + ErlNifEntry *entry = &lib->entry; + ErtsNifFinish *fin; + Eterm f_atom; + int i; + Uint miss_hash = 0; + const int replay_dbg = !!getenv("ERTS_REPLAY_NIF_DEBUG"); + + fin = erts_alloc(ERTS_ALC_T_NIF, sizeof_ErtsNifFinish(entry->num_of_funcs)); + fin->nstubs_hashed = 0; + + erts_rwmtx_rwlock(&erts_nif_call_tab_lock); + for (i = 0; i < entry->num_of_funcs; i++) { + int func_ix; + const ErtsCodeInfo *ci; + ErtsNifBeamStub tmpl; + ErtsNifBeamStub *stub = &fin->beam_stubv[i]; + ErlNifFunc *f = &entry->funcs[i]; + + if (!erts_atom_get(f->name, sys_strlen(f->name), &f_atom, ERTS_ATOM_ENC_LATIN1) + || (func_ix = get_func_ix(mi->code_hdr, f_atom, f->arity)) < 0) { + continue; + } + + ci = mi->code_hdr->functions[func_ix]; + stub->code_info_ptr = ci; + stub->info = *ci; + + tmpl.code_info_ptr = ci; + if (hash_get(&erts_nif_call_tab, &tmpl) != NULL) { + hash_erase(&erts_nif_call_tab, &tmpl); + } + if (hash_put(&erts_nif_call_tab, stub) != stub) { + miss_hash++; + continue; + } + fin->nstubs_hashed++; + +#ifdef BEAMASM + { + void* normal_fptr; + void* dirty_fptr; + + if (f->flags) { + if (f->flags == ERL_NIF_DIRTY_JOB_IO_BOUND) { + normal_fptr = static_schedule_dirty_io_nif; + } else { + normal_fptr = static_schedule_dirty_cpu_nif; + } + dirty_fptr = f->fptr; + } else { + dirty_fptr = NULL; + normal_fptr = f->fptr; + } + + beamasm_emit_call_nif(ci, + normal_fptr, + lib, + dirty_fptr, + (char *)&stub->info, + sizeof(stub->info) + sizeof(stub->code)); + } +#else + stub->code.call_nif[0] = BeamOpCodeAddr(op_call_nif_WWW); + stub->code.call_nif[2] = (BeamInstr) lib; + + if (f->flags) { + stub->code.call_nif[3] = (BeamInstr) f->fptr; + stub->code.call_nif[1] = + (f->flags == ERL_NIF_DIRTY_JOB_IO_BOUND) + ? (BeamInstr) static_schedule_dirty_io_nif + : (BeamInstr) static_schedule_dirty_cpu_nif; + } else { + stub->code.call_nif[1] = (BeamInstr) f->fptr; + } +#endif + } + if (fin->nstubs_hashed == 0) { + if (replay_dbg) { + erts_fprintf(stderr, + "replay_nif: stubs_failed module=%s looked_up=0 hash_fail=%bpu\n", + entry->name, miss_hash); + } + erts_rwmtx_rwunlock(&erts_nif_call_tab_lock); + erts_free(ERTS_ALC_T_NIF, fin); + return 0; + } + if (replay_dbg) { + erts_fprintf(stderr, + "replay_nif: stubs_ok module=%s count=%d\n", + entry->name, fin->nstubs_hashed); + } + patch_call_nif_early(entry, mi); + erts_rwmtx_rwunlock(&erts_nif_call_tab_lock); + lib->finish = fin; + return 1; +} + +static struct erl_module_nif * +replay_create_static_nif_lib(Module *module_p, ErlNifEntry *entry) +{ + struct erl_module_nif *lib; + + lib = create_lib(entry); + lib->handle = NULL; + erts_refc_init(&lib->refc, 2); + erts_refc_init(&lib->dynlib_refc, 1); + lib->flags = 0; + lib->on_halt.callback = NULL; + lib->unload_thr_callback = NULL; + erts_atomic_init_nob(&lib->unload_thr_counter, -1); + lib->mod = module_p; + lib->mi_copy = module_p->curr; + lib->priv_data = NULL; + lib->finish = NULL; + + if (!replay_install_static_nif_call_stubs(lib, &module_p->curr)) { + erts_free(ERTS_ALC_T_NIF, lib); + return NULL; + } + return lib; } +/* + * Replay-time correlation flag used by ETS / copy diagnostics: + * 0 = static NIF replay reinit has not run / not active for current entry + * 1 = currently inside a static NIF load callback (e.g. prim_file:load/3) + * 2 = a static NIF load callback has already returned (sticky once any + * reinit finishes, so later corruption can be tied back to it) + */ +int erts_replay_static_nif_phase = 0; + void erts_replay_reinit_loaded_static_nifs(void) { ErtsStaticNif* p; + const int replay_dbg = !!getenv("ERTS_REPLAY_NIF_DEBUG"); for (p = erts_static_nif_tab; p->nif_init != NULL; p++) { Module* module_p; struct erl_module_nif* lib; ErlNifEntry* entry = p->entry; - ErlNifEnv env; + struct enif_msg_environment_t msg_env; + ErlNifEnv *env; void* priv_data; Eterm load_arg = SMALL_ZERO; int veto; @@ -5358,18 +5495,44 @@ erts_replay_reinit_loaded_static_nifs(void) || !replay_should_reinit_static_nif(entry)) { continue; } + if (replay_dbg) { + erts_fprintf(stderr, "replay_nif: candidate=%s\n", entry->name); + } module_p = erts_get_module(p->mod_atom, erts_active_code_ix()); - if (module_p == NULL || module_p->curr.nif == NULL) + if (module_p == NULL || module_p->curr.code_hdr == NULL) { + if (replay_dbg) { + erts_fprintf(stderr, + "replay_nif: skip=%s reason=no_module_or_code\n", + entry->name); + } continue; + } - lib = module_p->curr.nif; - lib->mod = module_p; + lib = replay_create_static_nif_lib(module_p, entry); + if (lib == NULL) { + if (replay_dbg) { + erts_fprintf(stderr, + "replay_nif: skip=%s reason=create_lib_failed\n", + entry->name); + } + continue; + } + /* + * Mirror normal load_nif flow: from this point on, use the + * normalized entry copy embedded in `lib`. + */ + entry = &lib->entry; + if (replay_dbg) { + erts_fprintf(stderr, + "replay_nif: install=%s lib=%p\n", + entry->name, lib); + } ASSERT(opened_rt_list == NULL); - sys_memzero(&env, sizeof(env)); - env.mod_nif = lib; + env = &msg_env.env; + pre_nif_noproc(&msg_env, lib, NULL); priv_data = lib->priv_data; lib->flags |= ERTS_MOD_NIF_FLG_LOADING; @@ -5377,16 +5540,45 @@ erts_replay_reinit_loaded_static_nifs(void) && is_internal_pid(erts_init_process_id)) { load_arg = erts_init_process_id; } - veto = entry->load(&env, &priv_data, load_arg); + erts_replay_static_nif_phase = 1; + if (replay_dbg) { + erts_fprintf(stderr, + "replay_nif: load_callback_enter module=%s arg=%T\n", + entry->name, load_arg); + } + veto = entry->load(env, &priv_data, load_arg); + if (replay_dbg) { + erts_fprintf(stderr, + "replay_nif: load_callback_exit module=%s veto=%d\n", + entry->name, veto); + } + erts_replay_static_nif_phase = 2; + post_nif_noproc(&msg_env); lib->flags &= ~ERTS_MOD_NIF_FLG_LOADING; if (veto) { + /* + * NIF load() reported failure during replay. Common reasons: + * - an I/O subsystem (e.g. prim_socket esock_io) refusing + * a second init, + * - a one-shot enif_set_option() rejecting a duplicate call. + * Don't abort: the existing call stubs from the restored + * module table still resolve to functioning code, so leaving + * this NIF without a fresh re-load is much better than killing + * the whole VM. We just rollback any partially-opened resource + * types (which would otherwise leak in a half-installed + * state) and keep going. + */ rollback_opened_resource_types(); - erts_exit(ERTS_ABORT_EXIT, - "replay static NIF load callback failed for %T\n", - p->mod_atom); + cleanup_opened_rt(); + erts_fprintf(stderr, + "replay static NIF load callback returned veto=%d " + "for %T; continuing without re-load\n", + veto, p->mod_atom); + continue; } + module_p->curr.nif = lib; lib->priv_data = priv_data; prepare_opened_rt(lib); diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index 8986e9c626b2..901c0586dccd 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -1128,6 +1128,24 @@ Uint size_object_x(Eterm, erts_literal_area_t*); #define size_object(Term) size_object_x(Term,NULL) #define size_object_litopt(Term,LitArea) size_object_x(Term,LitArea) +/* + * Replay diagnostic: walk an Eterm and dump every reachable subterm to + * stderr, classifying each pointer (ARENA / LITERAL / HEAP) and printing + * its header word. Tolerant of malformed terms. Intended to be called + * just before a deep-copy that is suspected to crash on a stale arena + * pointer (e.g. `ets:insert/2`). + */ +void erts_replay_dump_term_to_stderr(Eterm root, const char *ctx, Eterm pid); + +/* + * Replay correlation flag: + * 0 = static NIF reinit phase has not started for current candidate + * 1 = currently inside a static NIF load callback (e.g. prim_file) + * 2 = a static NIF load callback has returned (sticky after first one) + * Defined in erl_nif.c. + */ +extern int erts_replay_static_nif_phase; + Uint copy_shared_calculate(Eterm, erts_shcopy_t*); Uint size_shared(Eterm); diff --git a/erts/emulator/beam/module.c b/erts/emulator/beam/module.c index 064d225f9d49..507f5bf184ab 100644 --- a/erts/emulator/beam/module.c +++ b/erts/emulator/beam/module.c @@ -29,6 +29,7 @@ #include "global.h" #include "module.h" #include "beam_catches.h" +#include "erl_mmap.h" #ifdef BEAMASM # include "beam_asm.h" @@ -243,7 +244,8 @@ static struct erl_module_instance *unsealed_module = NULL; void erts_unseal_module(struct erl_module_instance *modi) { ERTS_LC_ASSERT(erts_initialized == 0 || erts_thr_progress_is_blocking() || - erts_has_code_mod_permission()); + erts_has_code_mod_permission() || + erts_mmap_record_option_replay_enabled()); ASSERT(unsealed_module == NULL && !modi->unsealed); #ifdef BEAMASM @@ -262,7 +264,8 @@ void erts_seal_module(struct erl_module_instance *modi) { ERTS_LC_ASSERT(erts_initialized == 0 || erts_thr_progress_is_blocking() || - erts_has_code_mod_permission()); + erts_has_code_mod_permission() || + erts_mmap_record_option_replay_enabled()); ASSERT(unsealed_module == modi && modi->unsealed == 1); #ifdef BEAMASM diff --git a/erts/emulator/sys/common/erl_mmap.h b/erts/emulator/sys/common/erl_mmap.h index ab5ae95dc3a3..e884e4683caa 100644 --- a/erts/emulator/sys/common/erl_mmap.h +++ b/erts/emulator/sys/common/erl_mmap.h @@ -169,6 +169,14 @@ int erts_mmap_record_init(void); void *erts_mmap_record_alloc(UWord *sizep, Uint32 mmap_flags); void erts_mmap_record_free(void *ptr, UWord size); void *erts_mmap_record_realloc(void *ptr, UWord old_size, UWord *sizep, Uint32 mmap_flags); +/* + * Diagnostic helpers: identify whether a pointer falls inside the recorded + * mseg arena (the file-backed [record_base, record_base+ERTS_RECORD_ARENA_SIZE) + * range). Useful in replay-time instrumentation when a corrupted term carries + * a pointer that may or may not originate from the restored arena. + */ +int erts_mmap_record_arena_contains(const void *ptr); +void erts_mmap_record_arena_bounds(const char **base_out, UWord *size_out); /* * Literal super-carrier snapshot/restore hooks. On record, the literal diff --git a/erts/emulator/sys/common/erl_mmap_record.c b/erts/emulator/sys/common/erl_mmap_record.c index a02054567295..834becae5b3c 100644 --- a/erts/emulator/sys/common/erl_mmap_record.c +++ b/erts/emulator/sys/common/erl_mmap_record.c @@ -244,6 +244,27 @@ erts_mmap_record_option_enabled(void) return record_enabled || replay_enabled; } +int +erts_mmap_record_arena_contains(const void *ptr) +{ + if (!record_base) { + return 0; + } + return (const char *) ptr >= record_base + && (const char *) ptr < record_base + ERTS_RECORD_ARENA_SIZE; +} + +void +erts_mmap_record_arena_bounds(const char **base_out, UWord *size_out) +{ + if (base_out) { + *base_out = record_base; + } + if (size_out) { + *size_out = ERTS_RECORD_ARENA_SIZE; + } +} + int erts_mmap_record_init(void) { From 959dcb31a7d1ce2ab42d568b2db572a260cfdbfc Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Tue, 5 May 2026 15:11:55 +0200 Subject: [PATCH 30/37] erts replay: reset staged-table debug sentinels in replay init paths In DEBUG builds the staged-table template (erl_code_staged.h) and module.c track whether staging is currently in progress using sentinel variables (fun/export _debug_stage_ix == ~0, dbg_load_code_ix == -1 means idle). BSS zero-initialises all three to 0, which is not the idle value. In a normal boot they are set to their idle sentinel when the preloaded-module staging cycle runs end_staging; replay skips that cycle entirely, leaving all three at 0. When compile:file later calls erts_start_staging_code_ix the very first assertion in fun_staged_start_staging fires: beam/erl_code_staged.h:385: Assertion failed: fun_debug_stage_ix == ~0 Fix: reset each sentinel to the idle value at the end of the replay- specific table init functions: - fun_debug_stage_ix = ~0 in erts_init_fun_table_replay (erl_fun.c) - export_debug_stage_ix = ~0 in init_export_table_replay (export.c) - dbg_load_code_ix = -1 in init_module_table_replay (module.c) Also move the dbg_load_code_ix static declaration earlier in module.c so it is visible at the call site inside init_module_table_replay. All changes are #ifdef DEBUG / IF_DEBUG guarded, so release builds are unaffected. Verified: compile:file succeeds across 5 deterministic replay runs. --- erts/emulator/beam/copy.c | 2 -- erts/emulator/beam/erl_fun.c | 11 +++++++++++ erts/emulator/beam/export.c | 11 +++++++++++ erts/emulator/beam/module.c | 15 +++++++++++---- 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/erts/emulator/beam/copy.c b/erts/emulator/beam/copy.c index f373ea72b8a1..46ba59b74174 100644 --- a/erts/emulator/beam/copy.c +++ b/erts/emulator/beam/copy.c @@ -92,9 +92,7 @@ Uint size_object_x(Eterm obj, erts_literal_area_t *litopt) int arity; Eterm *lit_purge_ptr = litopt ? litopt->lit_purge_ptr : NULL; Uint lit_purge_sz = litopt ? litopt->lit_purge_sz : 0; -#ifdef DEBUG Eterm mypid = erts_get_current_pid(); -#endif DECLARE_ESTACK(s); VERBOSE(DEBUG_SHCOPY, ("[pid=%T] size_object %p\n", mypid, obj)); diff --git a/erts/emulator/beam/erl_fun.c b/erts/emulator/beam/erl_fun.c index a2877709d46e..2192c05dd369 100644 --- a/erts/emulator/beam/erl_fun.c +++ b/erts/emulator/beam/erl_fun.c @@ -178,6 +178,17 @@ void erts_init_fun_table_replay(IndexTable *roots, int no_roots) fun_tables[i].htable.fun = f; erts_index_rebuild_hash_buckets(&fun_tables[i]); } + + /* + * In debug builds the template tracks whether staging is active via + * fun_debug_stage_ix: ~0 means idle, anything else means in-progress. + * The BSS zero-initialised value (0) is not the idle sentinel, so reset + * it here – replay skips the normal start_staging/end_staging cycle that + * would otherwise leave it at ~0. + */ +#ifdef DEBUG + fun_debug_stage_ix = ~0; +#endif } void erts_fun_info(fmtfn_t to, void *to_arg) diff --git a/erts/emulator/beam/export.c b/erts/emulator/beam/export.c index 5870cbdcad4d..6ef658ce036e 100644 --- a/erts/emulator/beam/export.c +++ b/erts/emulator/beam/export.c @@ -179,6 +179,17 @@ init_export_table_replay(IndexTable *roots, int no_roots) export_tables[i].htable.fun = f; erts_index_rebuild_hash_buckets(&export_tables[i]); } + + /* + * In debug builds the staged-table template tracks whether staging is + * active via export_debug_stage_ix (~0 = idle). BSS leaves it at 0, + * which is not the idle sentinel; replay skips the normal staging cycle + * so we must reset it here to prevent spurious assertion failures when + * compile:file later triggers erts_start_staging_code_ix. + */ +#ifdef DEBUG + export_debug_stage_ix = ~0; +#endif } void diff --git a/erts/emulator/beam/module.c b/erts/emulator/beam/module.c index 507f5bf184ab..0ce5690773d4 100644 --- a/erts/emulator/beam/module.c +++ b/erts/emulator/beam/module.c @@ -50,6 +50,10 @@ erts_rwmtx_t the_old_code_rwlocks[ERTS_NUM_CODE_IX]; static erts_atomic_t tot_module_bytes; +#ifdef DEBUG +static ErtsCodeIndex dbg_load_code_ix = 0; +#endif + /* SMP note: Active module table lookup and current module instance can be * read without any locks. Old module instances are protected by * "the_old_code_rwlocks" as purging is done on active module table. @@ -160,6 +164,13 @@ init_module_table_replay(IndexTable *roots, int no_roots) ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC); } erts_atomic_init_nob(&tot_module_bytes, 0); + + /* + * module_start_staging() asserts dbg_load_code_ix == -1 (the idle + * sentinel). BSS leaves it at 0; replay skips the normal staging cycle + * that resets it, so do so explicitly here. + */ + IF_DEBUG(dbg_load_code_ix = -1); } Module* @@ -296,10 +307,6 @@ int module_table_sz(void) return erts_atomic_read_nob(&tot_module_bytes); } -#ifdef DEBUG -static ErtsCodeIndex dbg_load_code_ix = 0; -#endif - static int entries_at_start_staging = 0; static ERTS_INLINE void copy_module(Module* dst_mod, Module* src_mod) From d63da2f2639b6ec921160f490f2b5fd039b1eee5 Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Mon, 11 May 2026 14:33:18 +0200 Subject: [PATCH 31/37] Add support for record and replay flags in erlexec --- erts/etc/common/erlexec.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/erts/etc/common/erlexec.c b/erts/etc/common/erlexec.c index fdad5b0d5ec9..f7bcd88362b5 100644 --- a/erts/etc/common/erlexec.c +++ b/erts/etc/common/erlexec.c @@ -809,6 +809,18 @@ int main(int argc, char **argv) } break; + case 'r': + if (strcmp(argv[i], "-record") == 0 + || strcmp(argv[i], "-replay") == 0) { + NEXT_ARG_CHECK(); + add_Eargs(argv[i]); + add_Eargs(argv[i+1]); + i++; + } else { + add_arg(argv[i]); + } + break; + case 's': /* -sname NAME */ if (strcmp(argv[i], "-sname") == 0) { NEXT_ARG_CHECK(); @@ -1036,8 +1048,16 @@ int main(int argc, char **argv) } break; case 'r': - if (!is_one_of_strings(&argv[i][2], - plusr_val_switches)) + if (strcmp(argv[i], "+record") == 0 + || strcmp(argv[i], "+replay") == 0) { + NEXT_ARG_CHECK(); + argv[i][0] = '-'; + add_Eargs(argv[i]); + add_Eargs(argv[i+1]); + i++; + } + else if (!is_one_of_strings(&argv[i][2], + plusr_val_switches)) goto the_default; else { NEXT_ARG_CHECK(); From 858530e889e92a59d0ccafd7fe9abfa99fca50c1 Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Mon, 11 May 2026 15:31:19 +0200 Subject: [PATCH 32/37] Use record and replay path as directory, always dump struct when recording --- erts/emulator/beam/erl_alloc.c | 47 +++++- erts/emulator/beam/erl_global_literals.c | 12 +- erts/emulator/beam/erl_init.c | 32 ++-- erts/emulator/sys/common/erl_mmap.h | 1 + erts/emulator/sys/common/erl_mmap_record.c | 173 +++++++++++++++++++-- 5 files changed, 232 insertions(+), 33 deletions(-) diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c index cceac521662c..9647f4397bac 100644 --- a/erts/emulator/beam/erl_alloc.c +++ b/erts/emulator/beam/erl_alloc.c @@ -119,6 +119,43 @@ static ErtsAllocStructSnapshot static int erts_alloc_struct_snapshot_count = 0; static char erts_alloc_struct_snapshot_dir[512] = {0}; +static int +erts_alloc_struct_ensure_dir_path(const char *dir) +{ + char path[1024]; + size_t i, len; + + if (!dir || dir[0] == '\0') { + return -1; + } + + len = strlen(dir); + if (len >= sizeof(path)) { + return -1; + } + + memcpy(path, dir, len + 1); + + for (i = 1; i < len; i++) { + if (path[i] == '/' || path[i] == '\\') { + char saved = path[i]; + path[i] = '\0'; + if (path[i - 1] != ':' + && mkdir(path, 0777) < 0 + && errno != EEXIST) { + return -1; + } + path[i] = saved; + } + } + + if (mkdir(path, 0777) < 0 && errno != EEXIST) { + return -1; + } + + return 0; +} + static int erts_alloc_struct_should_snapshot(const char *tag) { @@ -179,7 +216,7 @@ erts_alloc_struct_dump_snapshots_on_exit(void) return; } - if (mkdir(erts_alloc_struct_snapshot_dir, 0777) < 0 && errno != EEXIST) { + if (erts_alloc_struct_ensure_dir_path(erts_alloc_struct_snapshot_dir) != 0) { return; } @@ -903,18 +940,18 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop) { const char *trace_path = getenv("ERTS_ALLOC_TRACE_FILE"); const char *csv_path = getenv("ERTS_ALLOC_STRUCT_CSV_FILE"); - const char *dump_dir = getenv("ERTS_ALLOC_STRUCT_DUMP_DIR"); + const char *rr_dir = erts_mmap_record_option_dir(); if (trace_path && trace_path[0] != '\0') { erts_alloc_trace_fd = open(trace_path, O_WRONLY|O_CREAT|O_APPEND, 0666); } if (csv_path && csv_path[0] != '\0') { erts_alloc_struct_csv_fd = open(csv_path, O_WRONLY|O_CREAT|O_APPEND, 0666); } - if (dump_dir && dump_dir[0] != '\0') { + if (rr_dir && rr_dir[0] != '\0') { erts_snprintf(erts_alloc_struct_snapshot_dir, sizeof(erts_alloc_struct_snapshot_dir), - "%s", - dump_dir); + "%s/struct-root-dumps", + rr_dir); } else { erts_snprintf(erts_alloc_struct_snapshot_dir, sizeof(erts_alloc_struct_snapshot_dir), diff --git a/erts/emulator/beam/erl_global_literals.c b/erts/emulator/beam/erl_global_literals.c index 480f8dd1951c..65f335ff1546 100644 --- a/erts/emulator/beam/erl_global_literals.c +++ b/erts/emulator/beam/erl_global_literals.c @@ -261,7 +261,7 @@ init_global_literals(void) int erts_global_literals_apply_replay_root(void) { - const char *base_dir = getenv("ERTS_ALLOC_STRUCT_DUMP_DIR"); + const char *base_dir = NULL; char dir_buf[512]; char manifest_path[1024]; FILE *mf; @@ -269,8 +269,14 @@ erts_global_literals_apply_replay_root(void) int loaded_empty_tuple = 0; int loaded_chunk_head = 0; - if (!base_dir || base_dir[0] == '\0') { - base_dir = "_mmap-records/struct-root-dumps"; + { + const char *rr_dir = erts_mmap_record_option_dir(); + if (rr_dir && rr_dir[0] != '\0') { + erts_snprintf(dir_buf, sizeof(dir_buf), "%s/struct-root-dumps", rr_dir); + base_dir = dir_buf; + } else { + base_dir = "_mmap-records/struct-root-dumps"; + } } erts_snprintf(dir_buf, sizeof(dir_buf), "%s", base_dir); erts_snprintf(manifest_path, sizeof(manifest_path), diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index 1cbbbb939902..f46dd3f78d41 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -394,7 +394,7 @@ restore_struct_roots_for_replay(IndexTable *atom_root, IndexTable *export_roots, IndexTable *fun_roots) { - const char *base_dir = getenv("ERTS_ALLOC_STRUCT_DUMP_DIR"); + const char *base_dir = NULL; char dir_buf[512]; char manifest_path[1024]; FILE *mf = NULL; @@ -404,8 +404,14 @@ restore_struct_roots_for_replay(IndexTable *atom_root, int fun_ix = 0; int have_atom = 0; - if (!base_dir || base_dir[0] == '\0') { - base_dir = "_mmap-records/struct-root-dumps"; + { + const char *rr_dir = erts_mmap_record_option_dir(); + if (rr_dir && rr_dir[0] != '\0') { + erts_snprintf(dir_buf, sizeof(dir_buf), "%s/struct-root-dumps", rr_dir); + base_dir = dir_buf; + } else { + base_dir = "_mmap-records/struct-root-dumps"; + } } erts_snprintf(dir_buf, sizeof(dir_buf), "%s", base_dir); erts_snprintf(manifest_path, sizeof(manifest_path), "%s/roots.csv", dir_buf); @@ -782,8 +788,8 @@ __decl_noreturn void __noreturn erts_usage(void) int this_rel = this_rel_num(); erts_fprintf(stderr, "Usage: %s [flags] [ -- [init_args] ]\n", progname(program)); erts_fprintf(stderr, "The flags are:\n\n"); - erts_fprintf(stderr, "-record path create/use a file-backed 100MB mmap arena for mseg carriers\n"); - erts_fprintf(stderr, "-replay path reuse an existing 100MB mmap arena file (mutually exclusive with -record)\n"); + erts_fprintf(stderr, "-record dir record into /mseg-arena.bin (creates missing subdirectories)\n"); + erts_fprintf(stderr, "-replay dir replay from /mseg-arena.bin (mutually exclusive with -record)\n"); erts_fprintf(stderr, "-a size suggest stack size in kilo words for threads\n"); erts_fprintf(stderr, " in the async-thread pool; valid range is [%d-%d]\n", ERTS_ASYNC_THREAD_MIN_STACK_SIZE, @@ -1131,7 +1137,7 @@ early_init(int *argc, char **argv) /* erts_usage(); } if (!erts_mmap_record_init()) { - erts_fprintf(stderr, "failed to initialize -record mmap arena at %s\n", path); + erts_fprintf(stderr, "failed to initialize -record mmap arena directory %s\n", path); erts_usage(); } i++; @@ -1144,7 +1150,7 @@ early_init(int *argc, char **argv) /* erts_usage(); } if (!erts_mmap_record_init()) { - erts_fprintf(stderr, "failed to initialize -replay mmap arena from %s\n", path); + erts_fprintf(stderr, "failed to initialize -replay mmap arena directory %s\n", path); erts_usage(); } i++; @@ -2859,15 +2865,21 @@ erl_start(int argc, char **argv) * and do not need separate restoration. */ { - const char *base_dir = getenv("ERTS_ALLOC_STRUCT_DUMP_DIR"); + const char *base_dir = NULL; char dir_buf[512]; char manifest_path[1024]; FILE *mf; char line[1024]; int loaded = 0; - if (!base_dir || base_dir[0] == '\0') { - base_dir = "_mmap-records/struct-root-dumps"; + { + const char *rr_dir = erts_mmap_record_option_dir(); + if (rr_dir && rr_dir[0] != '\0') { + erts_snprintf(dir_buf, sizeof(dir_buf), "%s/struct-root-dumps", rr_dir); + base_dir = dir_buf; + } else { + base_dir = "_mmap-records/struct-root-dumps"; + } } erts_snprintf(dir_buf, sizeof(dir_buf), "%s", base_dir); erts_snprintf(manifest_path, sizeof(manifest_path), diff --git a/erts/emulator/sys/common/erl_mmap.h b/erts/emulator/sys/common/erl_mmap.h index e884e4683caa..d0bbc4cd387d 100644 --- a/erts/emulator/sys/common/erl_mmap.h +++ b/erts/emulator/sys/common/erl_mmap.h @@ -165,6 +165,7 @@ int erts_mmap_record_option_replay(const char *path); int erts_mmap_record_option_record_enabled(void); int erts_mmap_record_option_replay_enabled(void); int erts_mmap_record_option_enabled(void); +const char *erts_mmap_record_option_dir(void); int erts_mmap_record_init(void); void *erts_mmap_record_alloc(UWord *sizep, Uint32 mmap_flags); void erts_mmap_record_free(void *ptr, UWord size); diff --git a/erts/emulator/sys/common/erl_mmap_record.c b/erts/emulator/sys/common/erl_mmap_record.c index 834becae5b3c..6fb60de3be45 100644 --- a/erts/emulator/sys/common/erl_mmap_record.c +++ b/erts/emulator/sys/common/erl_mmap_record.c @@ -30,6 +30,7 @@ #include #include #include +#include #ifdef HAVE_SYS_MMAN_H # include #endif @@ -37,6 +38,7 @@ #if HAVE_ERTS_MMAP #define ERTS_RECORD_ARENA_SIZE (UWORD_CONSTANT(100) * 1024 * 1024) +#define ERTS_RECORD_ARENA_FILE "mseg-arena.bin" typedef struct ErtsMMapRecordChunk_ ErtsMMapRecordChunk; struct ErtsMMapRecordChunk_ { @@ -52,12 +54,104 @@ static int replay_enabled = 0; static int record_initialized = 0; static int record_fd = -1; static char *record_base = NULL; +static char *record_dir = NULL; +static char *replay_dir = NULL; static char *record_path = NULL; static char *replay_path = NULL; static ErtsMMapRecordChunk *record_chunks = NULL; static erts_mtx_t record_mtx; static int record_mtx_inited = 0; +static char * +copy_trimmed_dir(const char *path) +{ + size_t len; + char *copy; + + if (!path || !path[0]) { + return NULL; + } + + len = strlen(path); + while (len > 1 && (path[len - 1] == '/' || path[len - 1] == '\\')) { + if (len == 3 && path[1] == ':') { + break; + } + len--; + } + + copy = (char *) malloc(len + 1); + if (!copy) { + return NULL; + } + memcpy(copy, path, len); + copy[len] = '\0'; + return copy; +} + +static int +ensure_dir_path(const char *dir) +{ + char path[1024]; + size_t i, len; + + if (!dir || dir[0] == '\0') { + return -1; + } + + len = strlen(dir); + if (len >= sizeof(path)) { + return -1; + } + + memcpy(path, dir, len + 1); + + for (i = 1; i < len; i++) { + if (path[i] == '/' || path[i] == '\\') { + char saved = path[i]; + path[i] = '\0'; + if (path[i - 1] != ':' + && mkdir(path, 0777) < 0 + && errno != EEXIST) { + return -1; + } + path[i] = saved; + } + } + + if (mkdir(path, 0777) < 0 && errno != EEXIST) { + return -1; + } + + return 0; +} + +static char * +join_dir_file(const char *dir, const char *name) +{ + size_t dlen, nlen, need_sep, sz; + char *res; + + if (!dir || !name) { + return NULL; + } + + dlen = strlen(dir); + nlen = strlen(name); + need_sep = dlen > 0 && dir[dlen - 1] != '/' && dir[dlen - 1] != '\\'; + sz = dlen + need_sep + nlen + 1; + res = (char *) malloc(sz); + if (!res) { + return NULL; + } + if (need_sep) { + erts_snprintf(res, sz, "%s/%s", dir, name); + } else { + erts_snprintf(res, sz, "%s%s", dir, name); + } + return res; +} + /* * Literal super-carrier snapshot tracking. * @@ -68,7 +162,8 @@ static int record_mtx_inited = 0; * To replay correctly we track every live (ptr, size) region handed out by * erts_alcu_mmapper_mseg_alloc / _realloc, and at process exit we dump those * regions (their raw bytes) to a sidecar file next to the main record arena - * (.literals). On replay, after the literal mmapper has been + * (/mseg-arena.bin.literals). On replay, after the literal + * mmapper has been * set up (so the same virtual range is reserved), we read the sidecar and * memcpy bytes back at their original addresses. */ @@ -178,24 +273,51 @@ record_merge_with_neighbors(ErtsMMapRecordChunk *c) int erts_mmap_record_option_record(const char *path) { - char *copy; - size_t len; + char *dir; + char *arena_path; + char *dump_dir; if (!path || !path[0] || replay_enabled) { return 0; } - len = strlen(path); - copy = (char *) malloc(len + 1); - if (!copy) { + dir = copy_trimmed_dir(path); + if (!dir) { + return 0; + } + + if (ensure_dir_path(dir) != 0) { + free(dir); + return 0; + } + + dump_dir = join_dir_file(dir, "struct-root-dumps"); + if (!dump_dir) { + free(dir); return 0; } - memcpy(copy, path, len + 1); + if (ensure_dir_path(dump_dir) != 0) { + free(dump_dir); + free(dir); + return 0; + } + free(dump_dir); + + arena_path = join_dir_file(dir, ERTS_RECORD_ARENA_FILE); + if (!arena_path) { + free(dir); + return 0; + } + + if (record_dir) { + free(record_dir); + } + record_dir = dir; if (record_path) { free(record_path); } - record_path = copy; + record_path = arena_path; record_enabled = 1; return 1; @@ -204,24 +326,33 @@ erts_mmap_record_option_record(const char *path) int erts_mmap_record_option_replay(const char *path) { - char *copy; - size_t len; + char *dir; + char *arena_path; if (!path || !path[0] || record_enabled) { return 0; } - len = strlen(path); - copy = (char *) malloc(len + 1); - if (!copy) { + dir = copy_trimmed_dir(path); + if (!dir) { + return 0; + } + + arena_path = join_dir_file(dir, ERTS_RECORD_ARENA_FILE); + if (!arena_path) { + free(dir); return 0; } - memcpy(copy, path, len + 1); + + if (replay_dir) { + free(replay_dir); + } + replay_dir = dir; if (replay_path) { free(replay_path); } - replay_path = copy; + replay_path = arena_path; replay_enabled = 1; return 1; } @@ -244,6 +375,18 @@ erts_mmap_record_option_enabled(void) return record_enabled || replay_enabled; } +const char * +erts_mmap_record_option_dir(void) +{ + if (record_enabled) { + return record_dir; + } + if (replay_enabled) { + return replay_dir; + } + return NULL; +} + int erts_mmap_record_arena_contains(const void *ptr) { From fc80ff67e8d368b862f87586e71db46a3942acb0 Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Mon, 11 May 2026 16:26:20 +0200 Subject: [PATCH 33/37] Update preloaded --- erts/preloaded/ebin/erl_init.beam | Bin 1276 -> 1492 bytes erts/preloaded/ebin/erlang.beam | Bin 40416 -> 40416 bytes 2 files changed, 0 insertions(+), 0 deletions(-) diff --git a/erts/preloaded/ebin/erl_init.beam b/erts/preloaded/ebin/erl_init.beam index bf205fab15d50df1cea57d2a3a092486d7a46432..4c917779b12a48083e19aa46b0aedfeeb76f1b94 100644 GIT binary patch literal 1492 zcmYjRL2nyH6rSTPaO1Sc+0B<{U%D&F@oA=(l_jYI2zT8|S zB=hHk)s>C)t|tj0p998n4agiStio=x_t^Vvp5102u#ee&_JBQPpRqMoW9#e@d(7(W zE4IbHVax1OX0t`Mw7nN#z8pXUq$bY^Ux~lIM+kj#et!PviLL6gy!o;;=8p zk8ZaW_+G>Ho%R7b#G$hz!=oVV_8P5zrz68*!aH48URWc4XII9e<@<3E$|To`4C4rL z$1?OB*O-=}6Ujy(!>Aj@(u*6g(2Gt(8Mw|~LwZijmF>CoKC3M17 zT_=`m>cr+B*_g=UPWZh^FL`zMPkx{sBulb)_LNxuj`Bvjtw^5vcC~BB@^SmOmmIkJhA`pL_ zP-99IwJ-Qow24nyz6M!*?um3+cE^$1=c>u~J0RJoezc5!3 zKmjJ81VB6TX#k#kAN&u;{B7VU=yL-wwj;+p3~&>Gf_z-(yJNokGU@9s=-AVB z0Olxw1 z^ZPw`>Tq81$OA3y?-IVcxxLlc*jTAIHdpKGD^JiF>=bTYVgi|L`!cdTKejGz6EIJJ uIkm7zHe~F8THSZyNuQ-&@Xk^bbt-`X literal 1276 zcmYjRO^n+_6rPE*YmbT8iJje0O6drTf5IBNh($;+TWCw6EfExPOq|W6^{!)^@kH%% zidrFYj8@_>w+L}SfEE2H7I-k&np7Pft&e$3iBZIE_`iP?1#p`y}q=S=vdms6RC}C8J#- zXSs}ro!)3L5OPxTL7a%Ww#at(h2p&|Q@Ipnnj;}q0cn+xX_Pn{LPmw?Fvm9Hm{T5v6yW`e2mqsu(JZ`*|jn=(h(^QV6c(NB~t)(iT!?vOCQBqeSHWxX6>} zfZvT0HHimh!=m3ceTzOXqJd!9{%r0HoeZ**J3@8N?45dR(d}l~g6gfX#mzySw8{}S z-(4uj<+!O(=##7uXNmuSbEE(xj{wgB2&S4=yq6eeHO9DKHDDxW)%H~{3hZLEZwcL$NbRe)qppwl~6Do>b&Cpj4+3{rpGqoXQJplYK1*UxUDA@ z@2?PdYb@k}Daq@Bp4wsiL+hs|cj*sqeS~w~@cCiDtCpU9M;RW|G5W=7=C0@ZZliGx z9bTh9GNhl_w9D(3K5}S|@I+5PxA0USS3I}L@T^A7RQkk5V_x;S6+r7N;ZusAXa@#& zn;|~h{x{eOE(LG?ed$3BfxD%h`Xl!o`qrWz^6~L%_!OUP{TXogdCSKR)d&L5Bka_< zXWgO`?09n(#SWD4jh0QZ)6k9wo)^G#-5iPI+h}Hzn5@o*b{b&42-aq4UAGZ5wPB;9 zg^&60zgF@#Ea>_$H|y_!X931k1sGWYEcxmg4sY>){WHL!NGT?M4PbH@z7D_(Ztm%l zoA1Mj@p%ET)Eiy_IDi)cMhsu}z?S^V{j%q+#r{03CWj9&+(;eZmTVqx@|L(dO3l0` z*6c3=j5Oy9LaPURzXHFo*e_=&^*3T=Vqag_OuYYaAFcH>QLr>qY<^Wiy$b5eCj7k6 rO`#%KS`QO=;=7f5RHRWa5&h0UL}~;#t~A_phtWY#n9DBr_Q3T&7*7J0 diff --git a/erts/preloaded/ebin/erlang.beam b/erts/preloaded/ebin/erlang.beam index 7507a883b625b534b73559bd96fee895bde53ba6..a7b94c6911a6263cbc6649e26b4cb167f8fd34d4 100644 GIT binary patch delta 2642 zcmWNRTXaY71+}7%Tw)7~mWyDEAu3{F0b072&BK2A)?VMke%L2E zN=|f?EHm;;YM0i&TKih<`?Y&&kJKKm{a5WNs$4l|&Xv11*FRU8%jABO8=1Q&cWY;nw=B0Ww?EgFJ8k!}OYFW>dF^sLVOQAI_5k|^`$l_^U1JZnhuA~yQTAwitbLC? z(Vk*YvLCh|u^+Xk+l}^2d!D_>UTHVktMm4|_Imph`|tL*_ICwGK`q=_7*V*tFtN~B zm{G6`&4qP^mkO^IUMsW}wieD6&KK39vluB-yr$T{*idYwDo|WdTv&X*_(Jio#ht|i z#m?^L?kBrn>|WcwrMs>BNcT~c5K0)O6r~@^RFr8b^HAoaJVjnvhO!c670Mcv=TO$8 zY(Qy2X+_zB(uT4fr5$A#%5IcBC?BBgL)njV5akd`7s_cISK#P{qaTi|aNLPw1dc`= zGjOcK@e+=19N(aZP{XLDsQplHLLG=Y5j8&vbsFk}s12yoQU8Lv1oc_e6{zi~J5YC_ z?nb?UdJ(4^rw3;R&NR+yoY&!e0;i3070%T-U&mR%*@kl)&NDdA;yj1*N1T^&skr>O z!nkx?XFML8a#vX+=XWpo=5T2 z<5`a9S@NC&o)$b`f?$@QN$_rhV+cM-@F9XT2tGz|0l|d??L5I|f(Hp6BG^UnG}@JD z3R(mWv@}{J+ON+x z4i0&u)Dbde}b`d>I^c>Oi#Jt3O#I7NB zEwLMj-AHT%v0oGW9kB<9%_26N*i*!o5!*tnjo638J|cFKeC#V?mx%pHTqW)#el_tj z;#I_}i4P}!JMjs`?-%(ae*>K7xQ8t;f7b#mySqo*Yl;z){>|OdB^iR>>q`yV^Fv@Ewe}MAe zlTb)FNEjqiBu0|Bi$pz%1`ld<9U*2Fs{Jpg%QLEVI(k87^5-n#;C)n$C!>W8)F{E z(-w zydU#H%z2peF;`%&!rXwl5py%<7R>iC_h25z{2a3jvqyUx^DO2$%u9L9^O!$lm0m0=~Z3@jZhf;9jugO$aq!5WNp8&)mWaI8D9Mq-V^x*KaO);O#> ztnpYAuqI+n#+ril09Fob3050IAS9xiytofB0WlHr2gFoFJ)!~e7-Ak`0iqeP46z)s z2Jr&oWyE^KTEsfUD~K(K9@kbx0nvhJMQlQBN3R5I-O;Abv#rgt(0CjqHQ?8F@9*iwq&dNEPWsCXfa) zjjTcrK;DELi2NmT5V8h&8*&(OIC3O%6!IQq9r9k}c;sZ{ROGZg@*!kBawc*Xat?AX zaz1haauKo#X(OAFOOQ*EPa~f}u0XCtu0cMJT#Hc`vIW_Sd=t4DxfQt$ zxgFV#+==W!?n3THzK8r9@&n|D43-}w_aYA?O%0zeT&UtV68CNFupFR7j=lBH?#-uD$15jPeM%h5p{P+%zFj0TiZ+qxnd z*786_MT&}2ltm&S1ytxM0`6ugP!|*t5OG9`h&nQW9OumN_nv$1&wH?~>%q3JPo;{F z=by+g&#%b8mw!LMH~)41Q2t-kcnWI4UAU-laUoNvF5FnSsW7~7TVZBlR^f@lyu$WE zd*N{52d9g3j?40CRFMmWQrT4%g7*15-- z^^S7}ITXlYETwp3qgD9tW8rNyP@ z((=-ZQcG!F>DSV~%dWCoHp-E5ynJ!_{&GDv!SdAdwDJq(MdeN9*7A<>&Q7OuPUnlA zFLkc%Z0X$F`8A4;5<-cgRH776CZH5i{(qyKB%>*#j&UpP$#0^k6MR17xi(}`KZsLzJxb3V0^qc^J=RJWKH` z!_$UmGoE%lJMiqovmeh-c#h*e7jJjG09zJibBr#^D=}&%rkbUo*ZX_*(I`;oFMu9eh9I`vu<#d?)dP-^4HR z*We$Fe+d4O_($Qt3x5Is6#PZ}3-CALe--~q{Qtzi{Ve<){~0R0RQRYcsfbc>2^Br5 z$WdWY(T9pFsW?Q%cLW*;JV0OpfhGd$32Y#+jlequb`j_xaEQQJ=M;g{1cL;1f(e32 zf)+uW;BbN?2;M_*62S)vK16Uj!5IYS6ba5H_&LGd1dkBxL|4#V=%AbE)#zFD>(Kk5 zk3b)VJ`(+I^oi(?qEAMD41EIn?dZ+uOVHP$uSfq9eGmG#=!emdqyLP40{sm7DMIHH zQV4|!8H6q)bUDS)Btm~DG=tDgLeCP~OlT{i1BAXI+@0`wgfA!Dn{YqE{Ruxz_z}X5 zgl7|8L-=*VTL^C@yr1v^!haC{FNO!hivfm-QH_zs=!J19#+4X-G5TW+z!-ut6ysKm z;-4`_VK^9bFqUJiz}SHC21W-Zd}>_q4IG4%ML2~Criq!xtirq+^BT;7nAc-Y z!7O4vkNE=T8qC)*cVO#2SgMCiXh9^~8Q6cAQF$N)MG0DxtE9$_$nLsT{xsGq_+T7arrnpNJ14KA8A; z;&%{VOuU)+D&nh&7vCZNEqhw=aA}3N+%U2l_XU~>Q++2NsT5|N2-C;WKvT}JwvLA)FM(Xq~0LaN~(j@ zZc+zG9VI=A^k~u#kbZ>pG}2F#o=tic={2N3BfXpSKB@v#=~P9jx}K^$RX0-1bR}~x z8I{b{WUe7|E13~w#*rCMW;&S}WZKE>Aajt+w^Uz7bziElqIxpbQ>b1{bu-niRJW1s zO12x>zGSZ=`vBPo$-Y8%DcN?iJIL-O`!zWaIWM^@$n_z2FS+~36^rElL2eegMslmj zttR&&xsS+ok~@lZ9@hC-S*#q^4On@so3U=e8i92?)@ZCTShZMVvF^c|iZu@L{du)AX`*frR_uw&Sj*m3NO zuv6G+>XYqWBA<7BLP{Kuko`A?gtiA*LdxBjzHWMm&pHgm?+@3Sudu8L|a$SaY3k=G*o zA^Rf-BJ;?>$YID^khdX6BS#{~An!m;M7 Od77GT8*Wn@=l>t Date: Mon, 11 May 2026 16:41:39 +0200 Subject: [PATCH 34/37] add dockerfile --- Dockerfile | 99 ++++++++++++++++++++++++++---------------------------- 1 file changed, 48 insertions(+), 51 deletions(-) diff --git a/Dockerfile b/Dockerfile index aeae202b5c4a..72302efc66fc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,64 +1,61 @@ -FROM buildpack-deps:trixie +# SPDX-FileCopyrightText: 2026 Dipl.Phys. Peer Stritzinger GmbH +# SPDX-License-Identifier: Apache-2.0 + +FROM alpine:3.20 ENV OTP_VERSION="28.4.2" \ REBAR3_VERSION="3.26.0" +# Build-time and run-time deps. We deliberately skip wx/odbc/jit/megaco/etc. +# to keep the build self-contained on musl. +RUN apk add --no-cache \ + bash \ + autoconf automake libtool make perl \ + gcc g++ \ + musl-dev linux-headers \ + ncurses-dev ncurses-static \ + openssl-dev openssl-libs-static \ + zlib-dev zlib-static \ + curl wget file ca-certificates git + COPY . /usr/src/otp WORKDIR /usr/src/otp ENV ERL_TOP=/usr/src/otp -# We'll install the build dependencies for erlang-odbc along with the erlang -# build process: RUN set -xe \ - runtimeDeps='libodbc2 \ - libsctp1 \ - libwxgtk3.2 \ - libwxgtk-webview3.2-dev ' \ - && buildDeps='unixodbc-dev \ - libsctp-dev ' \ - && apt-get update \ - && apt-get install -y --no-install-recommends $runtimeDeps \ - && apt-get install -y --no-install-recommends $buildDeps \ - && find . -type f \( -name config.log -o -name config.status -o -name erl_crash.dump \) -delete \ - && find . -type d \( -name deps -o -name obj -o -name obj.debug -o -name '*-unknown-linux-gnu' \) -prune -exec rm -rf {} + \ - && ./otp_build autoconf \ - && gnuArch="$(dpkg-architecture --query DEB_HOST_GNU_TYPE)" \ - && ./configure --build="$gnuArch" \ - && make -j$(nproc) \ - && make -j$(nproc) docs DOC_TARGETS=chunks \ - && make install install-docs DOC_TARGETS=chunks \ - && find /usr/local -name examples | xargs rm -rf \ - && apt-get purge -y --auto-remove $buildDeps \ - && rm -rf /var/lib/apt/lists/* + && find . -type f \( -name config.log -o -name config.status -o -name erl_crash.dump \) -delete \ + && find . -type d \( -name deps -o -name obj -o -name obj.debug \ + -o -name '*-unknown-linux-gnu' \ + -o -name '*-unknown-linux-musl' \) -prune -exec rm -rf {} + \ + && ./otp_build autoconf \ + && ./configure \ + --without-javac \ + --without-jinterface \ + --without-wx \ + --without-megaco \ + --without-odbc \ + --without-debugger \ + --without-observer \ + --without-et \ + --disable-jit \ + --disable-dynamic-ssl-lib \ + && make -j"$(nproc)" \ + && make -j"$(nproc)" docs DOC_TARGETS=chunks \ + && make install install-docs DOC_TARGETS=chunks \ + && find /usr/local -name examples | xargs rm -rf CMD ["erl"] -# extra useful tools here: rebar & rebar3 - -ENV REBAR_VERSION="2.6.4" - -RUN set -xe \ - && REBAR_DOWNLOAD_URL="https://github.com/rebar/rebar/archive/${REBAR_VERSION}.tar.gz" \ - && REBAR_DOWNLOAD_SHA256="577246bafa2eb2b2c3f1d0c157408650446884555bf87901508ce71d5cc0bd07" \ - && mkdir -p /usr/src/rebar-src \ - && curl -fSL -o rebar-src.tar.gz "$REBAR_DOWNLOAD_URL" \ - && echo "$REBAR_DOWNLOAD_SHA256 rebar-src.tar.gz" | sha256sum -c - \ - && tar -xzf rebar-src.tar.gz -C /usr/src/rebar-src --strip-components=1 \ - && rm rebar-src.tar.gz \ - && cd /usr/src/rebar-src \ - && ./bootstrap \ - && install -v ./rebar /usr/local/bin/ \ - && rm -rf /usr/src/rebar-src - +# rebar3 (kept for in-image use; calzone-sandbox installs its own copy too) RUN set -xe \ - && REBAR3_DOWNLOAD_URL="https://github.com/erlang/rebar3/archive/${REBAR3_VERSION}.tar.gz" \ - && REBAR3_DOWNLOAD_SHA256="a151dc4a07805490e9f217a099e597ac9774814875f55da2c66545c333fdff64" \ - && mkdir -p /usr/src/rebar3-src \ - && curl -fSL -o rebar3-src.tar.gz "$REBAR3_DOWNLOAD_URL" \ - && echo "$REBAR3_DOWNLOAD_SHA256 rebar3-src.tar.gz" | sha256sum -c - \ - && tar -xzf rebar3-src.tar.gz -C /usr/src/rebar3-src --strip-components=1 \ - && rm rebar3-src.tar.gz \ - && cd /usr/src/rebar3-src \ - && HOME=$PWD ./bootstrap \ - && install -v ./rebar3 /usr/local/bin/ \ - && rm -rf /usr/src/rebar3-src + && REBAR3_DOWNLOAD_URL="https://github.com/erlang/rebar3/archive/${REBAR3_VERSION}.tar.gz" \ + && REBAR3_DOWNLOAD_SHA256="a151dc4a07805490e9f217a099e597ac9774814875f55da2c66545c333fdff64" \ + && mkdir -p /usr/src/rebar3-src \ + && curl -fSL -o rebar3-src.tar.gz "$REBAR3_DOWNLOAD_URL" \ + && echo "$REBAR3_DOWNLOAD_SHA256 rebar3-src.tar.gz" | sha256sum -c - \ + && tar -xzf rebar3-src.tar.gz -C /usr/src/rebar3-src --strip-components=1 \ + && rm rebar3-src.tar.gz \ + && cd /usr/src/rebar3-src \ + && HOME=$PWD ./bootstrap \ + && install -v ./rebar3 /usr/local/bin/ \ + && rm -rf /usr/src/rebar3-src From f8f5f5ea2880200b0e3665637b1b33bd1deb7f54 Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Tue, 12 May 2026 17:10:36 +0200 Subject: [PATCH 35/37] cleanup replay argument in erl_init --- erts/preloaded/ebin/erl_init.beam | Bin 1492 -> 1596 bytes erts/preloaded/src/erl_init.erl | 9 ++++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/erts/preloaded/ebin/erl_init.beam b/erts/preloaded/ebin/erl_init.beam index 4c917779b12a48083e19aa46b0aedfeeb76f1b94..1c98bd3db60e57b75ac414ff4b5e9c52a13259c2 100644 GIT binary patch delta 877 zcmXw2O=uHA6n>N0bZ53%nC#@IY5KEvYl}w9whgo@%Cy>simlNK6;axPK?JKY6;Tk< zi_l}+;9lDtCc6~wGr|q zS;Q{Fw8Cg0d_-rhN#?4qB%!kCrVx6=68maTDjJ{=4gUzD^j5@c04c@*^sWs`OoYbB z7w}ogG^iaK^<4>MMb{(VFG0#N5Q<7w0ICmcznFVZ$2^c^Llp$ohqhShjFxG^r$*e; zN0F-MBn8=>{7$E+q`9xM03BkV)MLU|($Y;llJLchMX4twImmyYk{oI#BA$~9$x+M? zRbVNaQ1L*sxE*d>p>8~_oGw?4{Uc44kZRA2qnY|~s7Cz~i#!@Yvq z^g%Oei!Bw$!~_$&Kv*38Zc@TL&63!y4~ymLsaVQ}y0Tg~g*OnoI08#(mNt1IVaZ81 z9ZUt|LC#H$@5BH{LEo;18e6?e^Hv)U4MACQg&+;;YI% zd;&06`%5d;G%|{ih#mwv<>qm=c|UR!CMQ6&b;BbFjyQ`jVR*E~0EXLq=1tkO%p)ws r0K#}=0i4zXIb>?Tt>KG9hGMi^f>mF~L13_@p8PYWw58W3qaXhRv{{93 delta 763 zcmXw1F>ljA6n^K6@jcfO7dxq&lu{SBASfU=WhfxfwLsHW1quxy2Ba<>7=Tc9VM-PR zW2mQ-7@`VBL{y25p$i*B23VLP#1F{QIlN2TCw=nwzW3gD_C0_1eht+Z??0RYuzxPx znZLXG{N*M9ED?@lj7Z5O7~>?K#|t=(7x6M)#cQ~Lx9~PDVg*-m8CP%(AK*GZ#v6DO z6P&?v^-azAc?3?dPWD00Z$Fv?U{Cw~{-4I?)`xdr(-romU{PHXOzaU(5!}?Xmd6iR zqILt7iB+E4$>)eE-LU*UU~A;7n`y%e>j%I#bj$MpN>=kgDjui`aNTyIwduFK>_fmB zsv^1GcjU&9UE`IA8?mW7mhTo4*7Bl6#Tmu(Di`anlQgdL-=T8DALVbXt@44|ja7#0 z-fJ1Lec6CdJgm7RZqY5}D@lE~qTWZKg>+<7<+v6(AzQ#Yy1T+zR|}s`8H-rYlaUHC zS_BTXRVGk8Ry*`j*_>oTYLPc>2@|=wY!J`Kv+;D+pWX3*hRCz9=Z?b@F3pPz{{&4j z5nRd2Sk>K#la|*&1{1(ULHYci+-G9=P$K)#TnedChlvp_hN&~A&Z4 end) end, %% Proceed to the specified boot module - run(Mod, boot, BootArgs). + run(Mod, boot, remove_replay_args(BootArgs)). restart() -> erts_internal:erase_persistent_terms(), @@ -67,6 +67,13 @@ replay_enabled(BootArgs) -> lists:member(<<"-replay">>, BootArgs) orelse os:getenv("ERTS_MMAP_REPLAY") =:= "1". +remove_replay_args([<<"-replay">> | Args]) -> + remove_replay_args(Args); +remove_replay_args([Arg | Args]) -> + [Arg | remove_replay_args(Args)]; +remove_replay_args([]) -> + []. + run(M, F, A) -> case erlang:function_exported(M, F, 1) of false -> From 51b9acc583b7f78cef589977cd754ff46cdcd99b Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Wed, 13 May 2026 12:15:45 +0200 Subject: [PATCH 36/37] Move diagnostic prints to debug knobs --- erts/emulator/beam/erl_alloc.c | 4 +++- erts/emulator/beam/erl_global_literals.c | 16 +++++++++++----- erts/emulator/beam/erl_nif.c | 10 ++++++---- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c index 9647f4397bac..f97645775fa2 100644 --- a/erts/emulator/beam/erl_alloc.c +++ b/erts/emulator/beam/erl_alloc.c @@ -1067,11 +1067,13 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop) */ #if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION) if (erts_mmap_record_option_replay_enabled()) { + const char *replay_root_dbg = getenv("ERTS_REPLAY_ROOT_DEBUG"); if (!erts_mmap_record_literal_restore(&erts_literal_mmapper)) { erts_fprintf(stderr, "failed to restore literal super-carrier " "snapshot; replay will likely fail\n"); - } else { + } else if (replay_root_dbg && replay_root_dbg[0] != '\0' + && replay_root_dbg[0] != '0') { erts_fprintf(stderr, "restored literal super-carrier snapshot\n"); } diff --git a/erts/emulator/beam/erl_global_literals.c b/erts/emulator/beam/erl_global_literals.c index 65f335ff1546..ac53bf34e969 100644 --- a/erts/emulator/beam/erl_global_literals.c +++ b/erts/emulator/beam/erl_global_literals.c @@ -268,6 +268,10 @@ erts_global_literals_apply_replay_root(void) char line[1024]; int loaded_empty_tuple = 0; int loaded_chunk_head = 0; + const char *replay_root_dbg = getenv("ERTS_REPLAY_ROOT_DEBUG"); + int replay_dbg_enabled = replay_root_dbg + && replay_root_dbg[0] != '\0' + && replay_root_dbg[0] != '0'; { const char *rr_dir = erts_mmap_record_option_dir(); @@ -343,11 +347,13 @@ erts_global_literals_apply_replay_root(void) fclose(mf); if (loaded_empty_tuple && loaded_chunk_head) { - erts_fprintf(stderr, - "global_literals: restored empty_tuple=%p chunk_head=%p " - "from replay snapshot\n", - (void *) (UWord) ERTS_GLOBAL_LIT_EMPTY_TUPLE, - (void *) global_literal_chunk); + if (replay_dbg_enabled) { + erts_fprintf(stderr, + "global_literals: restored empty_tuple=%p chunk_head=%p " + "from replay snapshot\n", + (void *) (UWord) ERTS_GLOBAL_LIT_EMPTY_TUPLE, + (void *) global_literal_chunk); + } return 1; } if (loaded_empty_tuple || loaded_chunk_head) { diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c index f1dc00f6e55f..206e087caecd 100644 --- a/erts/emulator/beam/erl_nif.c +++ b/erts/emulator/beam/erl_nif.c @@ -5571,10 +5571,12 @@ erts_replay_reinit_loaded_static_nifs(void) */ rollback_opened_resource_types(); cleanup_opened_rt(); - erts_fprintf(stderr, - "replay static NIF load callback returned veto=%d " - "for %T; continuing without re-load\n", - veto, p->mod_atom); + if (replay_dbg) { + erts_fprintf(stderr, + "replay static NIF load callback returned veto=%d " + "for %T; continuing without re-load\n", + veto, p->mod_atom); + } continue; } From 20e007ffe1470e129cc3e0847933fd2158e53bb8 Mon Sep 17 00:00:00 2001 From: Luca Succi Date: Fri, 15 May 2026 14:50:43 +0200 Subject: [PATCH 37/37] Increase arena record size to 256 MB --- erts/emulator/sys/common/erl_mmap_record.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/erts/emulator/sys/common/erl_mmap_record.c b/erts/emulator/sys/common/erl_mmap_record.c index 6fb60de3be45..341a6e8910a3 100644 --- a/erts/emulator/sys/common/erl_mmap_record.c +++ b/erts/emulator/sys/common/erl_mmap_record.c @@ -37,7 +37,7 @@ #if HAVE_ERTS_MMAP -#define ERTS_RECORD_ARENA_SIZE (UWORD_CONSTANT(100) * 1024 * 1024) +#define ERTS_RECORD_ARENA_SIZE (UWORD_CONSTANT(256) * 1024 * 1024) #define ERTS_RECORD_ARENA_FILE "mseg-arena.bin" typedef struct ErtsMMapRecordChunk_ ErtsMMapRecordChunk;