diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000000..cb7927f66d1a --- /dev/null +++ b/.dockerignore @@ -0,0 +1,22 @@ +.git +.github +.devcontainer + +# Mirror key OTP gitignore rules so Docker does not copy generated launchers +/bin +/bootstrap/bin/* +!/bootstrap/bin/*.boot + +# Local OTP build artifacts that should not be sent as Docker build context +**/deps/ +**/erl_crash.dump +**/CONF_INFO +**/config.log +**/config.status +**/obj/ +**/obj.debug/ +erts/*-unknown-linux-gnu/ + +# Generated ASN.1 outputs with host-specific absolute paths +lib/public_key/src/OTP-PKIX-Relaxed.erl +lib/public_key/src/OTP-PKIX-Relaxed.hrl diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000000..72302efc66fc --- /dev/null +++ b/Dockerfile @@ -0,0 +1,61 @@ +# SPDX-FileCopyrightText: 2026 Dipl.Phys. Peer Stritzinger GmbH +# SPDX-License-Identifier: Apache-2.0 + +FROM alpine:3.20 + +ENV OTP_VERSION="28.4.2" \ + REBAR3_VERSION="3.26.0" + +# Build-time and run-time deps. We deliberately skip wx/odbc/jit/megaco/etc. +# to keep the build self-contained on musl. +RUN apk add --no-cache \ + bash \ + autoconf automake libtool make perl \ + gcc g++ \ + musl-dev linux-headers \ + ncurses-dev ncurses-static \ + openssl-dev openssl-libs-static \ + zlib-dev zlib-static \ + curl wget file ca-certificates git + +COPY . /usr/src/otp +WORKDIR /usr/src/otp +ENV ERL_TOP=/usr/src/otp + +RUN set -xe \ + && find . -type f \( -name config.log -o -name config.status -o -name erl_crash.dump \) -delete \ + && find . -type d \( -name deps -o -name obj -o -name obj.debug \ + -o -name '*-unknown-linux-gnu' \ + -o -name '*-unknown-linux-musl' \) -prune -exec rm -rf {} + \ + && ./otp_build autoconf \ + && ./configure \ + --without-javac \ + --without-jinterface \ + --without-wx \ + --without-megaco \ + --without-odbc \ + --without-debugger \ + --without-observer \ + --without-et \ + --disable-jit \ + --disable-dynamic-ssl-lib \ + && make -j"$(nproc)" \ + && make -j"$(nproc)" docs DOC_TARGETS=chunks \ + && make install install-docs DOC_TARGETS=chunks \ + && find /usr/local -name examples | xargs rm -rf + +CMD ["erl"] + +# rebar3 (kept for in-image use; calzone-sandbox installs its own copy too) +RUN set -xe \ + && REBAR3_DOWNLOAD_URL="https://github.com/erlang/rebar3/archive/${REBAR3_VERSION}.tar.gz" \ + && REBAR3_DOWNLOAD_SHA256="a151dc4a07805490e9f217a099e597ac9774814875f55da2c66545c333fdff64" \ + && mkdir -p /usr/src/rebar3-src \ + && curl -fSL -o rebar3-src.tar.gz "$REBAR3_DOWNLOAD_URL" \ + && echo "$REBAR3_DOWNLOAD_SHA256 rebar3-src.tar.gz" | sha256sum -c - \ + && tar -xzf rebar3-src.tar.gz -C /usr/src/rebar3-src --strip-components=1 \ + && rm rebar3-src.tar.gz \ + && cd /usr/src/rebar3-src \ + && HOME=$PWD ./bootstrap \ + && install -v ./rebar3 /usr/local/bin/ \ + && rm -rf /usr/src/rebar3-src diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in index f39743fda825..97a3106feb1d 100644 --- a/erts/emulator/Makefile.in +++ b/erts/emulator/Makefile.in @@ -1259,6 +1259,7 @@ OS_OBJS += $(OBJDIR)/erl_poll.o \ $(OBJDIR)/erl_check_io.o \ $(OBJDIR)/erl_mseg.o \ $(OBJDIR)/erl_mmap.o \ + $(OBJDIR)/erl_mmap_record.o \ $(OBJDIR)/erl_osenv.o \ $(OBJDIR)/erl_$(ERLANG_OSTYPE)_sys_ddll.o \ $(OBJDIR)/erl_sys_common_misc.o \ diff --git a/erts/emulator/beam/atom.c b/erts/emulator/beam/atom.c index a4dc975bf281..f8ef15dc82e6 100644 --- a/erts/emulator/beam/atom.c +++ b/erts/emulator/beam/atom.c @@ -474,6 +474,9 @@ init_atom_table(void) erts_index_init(ERTS_ALC_T_ATOM_TABLE, &erts_atom_table, "atom_tab", ATOM_SIZE, erts_atom_table_size, f); + erts_alloc_trace_note_alloc("atom_table.index_root", + &erts_atom_table, + sizeof(erts_atom_table)); /* Ordinary atoms. a is a template for creating an entry in the atom table */ for (i = 0; erl_atom_names[i] != 0; i++) { @@ -498,6 +501,45 @@ init_atom_table(void) } +void +init_atom_table_replay(IndexTable *root) +{ + int i; + HashFunctions f; + erts_rwmtx_opt_t rwmtx_opt = ERTS_RWMTX_OPT_DEFAULT_INITER; + + ASSERT(root != NULL); + + rwmtx_opt.type = ERTS_RWMTX_TYPE_FREQUENT_READ; + rwmtx_opt.lived = ERTS_RWMTX_LONG_LIVED; + +#ifdef ERTS_ATOM_PUT_OPS_STAT + erts_atomic_init_nob(&atom_put_ops, 0); +#endif + + erts_rwmtx_init_opt(&atom_table_lock, &rwmtx_opt, "atom_tab", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC); + + erts_atom_table = *root; + f.hash = (H_FUN) atom_hash; + f.cmp = (HCMP_FUN) atom_cmp; + f.alloc = (HALLOC_FUN) atom_alloc; + f.free = (HFREE_FUN) atom_free; + f.meta_alloc = (HMALLOC_FUN) erts_alloc; + f.meta_free = (HMFREE_FUN) erts_free; + f.meta_print = (HMPRINT_FUN) erts_print; + erts_atom_table.htable.fun = f; + erts_index_rebuild_hash_buckets(&erts_atom_table); + + atom_space = 0; + for (i = 0; i < erts_atom_table.entries; i++) { + Atom *a = (Atom *) erts_index_lookup(&erts_atom_table, i); + if (a) { + atom_space += a->len; + } + } +} + void dump_atoms(fmtfn_t to, void *to_arg) { diff --git a/erts/emulator/beam/atom.h b/erts/emulator/beam/atom.h index 08a5256177c8..7ab1b185db63 100644 --- a/erts/emulator/beam/atom.h +++ b/erts/emulator/beam/atom.h @@ -144,10 +144,10 @@ Eterm am_atom_put(const char*, Sint); /* ONLY 7-bit ascii! */ Eterm erts_atom_put(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc); int erts_atom_put_index(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc); void init_atom_table(void); +void init_atom_table_replay(IndexTable *root); void atom_info(fmtfn_t, void *); void dump_atoms(fmtfn_t, void *); Uint erts_get_atom_limit(void); int erts_atom_get(const char* name, Uint len, Eterm* ap, ErtsAtomEncoding enc); void erts_atom_get_text_space_sizes(Uint *reserved, Uint *used); #endif - diff --git a/erts/emulator/beam/beam_catches.c b/erts/emulator/beam/beam_catches.c index 351b77217fbc..4b2637eb4ba0 100644 --- a/erts/emulator/beam/beam_catches.c +++ b/erts/emulator/beam/beam_catches.c @@ -56,6 +56,28 @@ struct bc_pool { static struct bc_pool bccix[ERTS_NUM_CODE_IX]; +/* + * Expose bccix[] to the struct-root-dump / replay pipeline. The individual + * per-pool tables (bccix[i].beam_catches) are allocated via + * ERTS_ALC_T_CATCHES → long-lived allocator, whose carriers live in the + * default mseg super-carrier, which is already file-backed by the record + * arena. So the table *contents* are persisted for free; what needs + * explicit snapshotting is this small static header array which holds the + * pointers, tabsize, high_mark, and free_list. Without it, every replay + * would see a freshly-initialised (empty) bccix[] and the catch indices + * baked into restored code would resolve to garbage / NULL and produce + * "Catch not found" at the first throw. + */ +void *erts_beam_catches_bccix_ptr(void) +{ + return (void *) bccix; +} + +UWord erts_beam_catches_bccix_size(void) +{ + return sizeof(bccix); +} + void beam_catches_init(void) { int i; @@ -71,6 +93,34 @@ void beam_catches_init(void) } /* For initial load: */ IF_DEBUG(bccix[erts_staging_code_ix()].is_staging = 1); + + /* + * Register the whole bccix[] as a snapshot root on record, AFTER the + * fresh table has been allocated so the snapshot captures the + * record-time pointer. On replay, erl_init.c overwrites this array + * from the dump before any code runs. + */ + erts_alloc_trace_note_alloc("beam_catches.bccix", + bccix, sizeof(bccix)); +} + +/* + * Replay-only: replace bccix[] wholesale with the snapshot bytes loaded + * from struct-root-dumps/NN.beam_catches.bccix.bin. The pointers inside + * refer to addresses in the long-lived allocator's carrier, which the + * default mseg super-carrier restores to the same virtual address. + * + * Leaks the fresh table that beam_catches_init() just allocated; that's + * a tiny permanent waste (one 8 KB block) but keeps the replay path + * simple and avoids running erts_free on an address the allocator no + * longer knows about once we've overwritten its bookkeeping. + */ +void beam_catches_apply_replay_root(const void *src, UWord src_size) +{ + if (src_size != sizeof(bccix)) { + return; + } + sys_memcpy(bccix, src, sizeof(bccix)); } diff --git a/erts/emulator/beam/beam_catches.h b/erts/emulator/beam/beam_catches.h index cab601f56b34..59d47911a5e4 100644 --- a/erts/emulator/beam/beam_catches.h +++ b/erts/emulator/beam/beam_catches.h @@ -33,6 +33,19 @@ #define BEAM_CATCHES_NIL (-1) void beam_catches_init(void); + +/* + * Record/replay support. See beam_catches.c for the rationale. + * + * erts_beam_catches_bccix_{ptr,size} - accessors used by the + * struct-root dump code to snapshot the static bccix[] array. + * beam_catches_apply_replay_root - restore bccix[] from the + * previously-dumped bytes during -replay init. + */ +void *erts_beam_catches_bccix_ptr(void); +UWord erts_beam_catches_bccix_size(void); +void beam_catches_apply_replay_root(const void *src, UWord src_size); + void beam_catches_start_staging(void); void beam_catches_end_staging(int commit); unsigned beam_catches_cons(ErtsCodePtr cp, unsigned cdr, ErtsCodePtr **); diff --git a/erts/emulator/beam/beam_ranges.c b/erts/emulator/beam/beam_ranges.c index 15b2c3cfeac2..434d0240272a 100644 --- a/erts/emulator/beam/beam_ranges.c +++ b/erts/emulator/beam/beam_ranges.c @@ -29,6 +29,7 @@ #include "global.h" #include "beam_code.h" #include "erl_unicode.h" +#include "module.h" typedef struct { ErtsCodePtr start; /* Pointer to start of module. */ @@ -416,3 +417,107 @@ erts_find_next_code_for_line(const BeamCodeHeader* code_hdr, return lt->func_tab[0][line_index]; } + +/* + * Rebuild the per-module PC range table from the already-restored module + * table after a record/replay restore. During replay we skip load_preloaded() + * (since module table, atom table, export table, fun table, and code pages + * have all been restored from the struct-root dumps + mmap arena), which + * means erts_update_ranges() was never called. Without ranges, + * erts_find_function_from_pc() always returns NULL and any PC-based + * introspection (tracing, stack traces, exception handling) sees "unknown" + * code, which in turn corrupts VM-level invariants and leads to spurious + * crashes (SIGILL via BTI, "Catch not found", etc.). + * + * We rebuild both code indices directly (bypassing the normal staging dance), + * since the restored active/staging indices are already correct and we do + * not want to advance them. + */ +void +erts_ranges_replay_rebuild(void) +{ + ErtsCodeIndex ix; + + for (ix = 0; ix < ERTS_NUM_CODE_IX; ix++) { + int i; + int max_modules = module_code_size(ix); + Sint count = 0; + Range *mp; + + /* Free any previous allocation (in case this is called twice). */ + if (r[ix].modules) { + erts_atomic_add_nob(&mem_used, -r[ix].allocated); + erts_free(ERTS_ALC_T_MODULE_REFS, r[ix].modules); + r[ix].modules = NULL; + r[ix].allocated = 0; + r[ix].n = 0; + } + + /* Count entries: one per curr instance with code, plus one per old. */ + for (i = 0; i < max_modules; i++) { + Module *modp = module_code(i, ix); + if (!modp) { + continue; + } + if (modp->curr.code_hdr && modp->curr.code_length > 0) { + count++; + } + if (modp->old.code_hdr && modp->old.code_length > 0) { + count++; + } + } + + if (count == 0) { + continue; + } + + /* Allocate with some slack so future inserts don't immediately + * require reallocation (matches the behaviour of + * erts_start_staging_ranges). */ + r[ix].allocated = count + 8; + erts_atomic_add_nob(&mem_used, r[ix].allocated); + r[ix].modules = (Range *) erts_alloc(ERTS_ALC_T_MODULE_REFS, + r[ix].allocated * sizeof(Range)); + mp = r[ix].modules; + + for (i = 0; i < max_modules; i++) { + Module *modp = module_code(i, ix); + if (!modp) { + continue; + } + if (modp->curr.code_hdr && modp->curr.code_length > 0) { + mp->start = (ErtsCodePtr) modp->curr.code_hdr; + erts_atomic_init_nob(&mp->end, + (erts_aint_t) + (((byte *) modp->curr.code_hdr) + + modp->curr.code_length)); + mp++; + } + if (modp->old.code_hdr && modp->old.code_length > 0) { + mp->start = (ErtsCodePtr) modp->old.code_hdr; + erts_atomic_init_nob(&mp->end, + (erts_aint_t) + (((byte *) modp->old.code_hdr) + + modp->old.code_length)); + mp++; + } + } + + r[ix].n = mp - r[ix].modules; + qsort(r[ix].modules, r[ix].n, sizeof(Range), + (int (*)(const void *, const void *)) rangecompare); + erts_atomic_set_nob(&r[ix].mid, + (erts_aint_t) (r[ix].modules + r[ix].n / 2)); + + if (r[ix].allocated > (Sint) erts_dump_num_lit_areas) { + erts_dump_num_lit_areas = r[ix].allocated * 2; + erts_dump_lit_areas = (ErtsLiteralArea **) + erts_realloc(ERTS_ALC_T_CRASH_DUMP, + (void *) erts_dump_lit_areas, + erts_dump_num_lit_areas + * sizeof(ErtsLiteralArea *)); + } + + CHECK(&r[ix]); + } +} diff --git a/erts/emulator/beam/code_ix.c b/erts/emulator/beam/code_ix.c index 83aa5fe93905..7df22d66659f 100644 --- a/erts/emulator/beam/code_ix.c +++ b/erts/emulator/beam/code_ix.c @@ -48,6 +48,33 @@ erts_atomic32_t outstanding_blocking_code_barriers; erts_atomic32_t the_active_code_index; erts_atomic32_t the_staging_code_index; +/* + * Plain-int32 shadow of the active/staging code indices, registered as + * "code_ix.root" so the struct-root-dump/replay pipeline can save and + * restore them. The atomic variables themselves are not directly + * snapshotted because their in-memory representation is backend-specific. + * + * Layout: [0] = active, [1] = staging. + * + * Updated whenever the atomics change (init / commit). + */ +int32_t erts_code_ix_root[2] = {0, 0}; + +static ERTS_INLINE void update_code_ix_root(void) +{ + erts_code_ix_root[0] = (int32_t) erts_atomic32_read_nob(&the_active_code_index); + erts_code_ix_root[1] = (int32_t) erts_atomic32_read_nob(&the_staging_code_index); +} + +void erts_code_ix_apply_replay_root(void) +{ + /* Restore atomic indices from the snapshotted shadow. Must be called + * after the shadow has been populated from the dump (in replay mode), + * but before any code that depends on the indices runs. */ + erts_atomic32_set_nob(&the_active_code_index, (erts_aint32_t) erts_code_ix_root[0]); + erts_atomic32_set_nob(&the_staging_code_index, (erts_aint32_t) erts_code_ix_root[1]); +} + struct code_permission { erts_mtx_t lock; @@ -84,6 +111,10 @@ void erts_code_ix_init(void) erts_atomic32_init_nob(&outstanding_blocking_code_barriers, 0); erts_atomic32_init_nob(&the_active_code_index, 0); erts_atomic32_init_nob(&the_staging_code_index, 0); + update_code_ix_root(); + erts_alloc_trace_note_alloc("code_ix.root", + erts_code_ix_root, + sizeof(erts_code_ix_root)); erts_mtx_init(&code_mod_permission.lock, "code_mod_permission", NIL, @@ -136,6 +167,7 @@ void erts_commit_staging_code_ix(void) erts_atomic32_set_nob(&the_active_code_index, ix); ix = (ix + 1) % ERTS_NUM_CODE_IX; erts_atomic32_set_nob(&the_staging_code_index, ix); + update_code_ix_root(); } fun_staged_write_unlock(); export_staged_write_unlock(); diff --git a/erts/emulator/beam/code_ix.h b/erts/emulator/beam/code_ix.h index 565c8b539675..00013edccdeb 100644 --- a/erts/emulator/beam/code_ix.h +++ b/erts/emulator/beam/code_ix.h @@ -153,6 +153,18 @@ const ErtsCodeMFA *erts_code_to_codemfa(ErtsCodePtr I); */ void erts_code_ix_init(void); +/* Apply the record/replay snapshot stored in erts_code_ix_root to the + * live atomic active/staging code index variables. Used in replay mode + * to restore the code index state captured at record time. + */ +void erts_code_ix_apply_replay_root(void); + +/* Plain-int32 shadow of the active/staging code indices. Registered as + * "code_ix.root" in the struct-root-dump pipeline. [0] = active, + * [1] = staging. Do not modify directly. + */ +extern int32_t erts_code_ix_root[2]; + /* Return active code index. * Is guaranteed to be valid until the calling BIF returns. * To get a consistent view of the code, only one call to erts_active_code_ix() diff --git a/erts/emulator/beam/copy.c b/erts/emulator/beam/copy.c index 3eb41b33fe9b..46ba59b74174 100644 --- a/erts/emulator/beam/copy.c +++ b/erts/emulator/beam/copy.c @@ -92,9 +92,7 @@ Uint size_object_x(Eterm obj, erts_literal_area_t *litopt) int arity; Eterm *lit_purge_ptr = litopt ? litopt->lit_purge_ptr : NULL; Uint lit_purge_sz = litopt ? litopt->lit_purge_sz : 0; -#ifdef DEBUG Eterm mypid = erts_get_current_pid(); -#endif DECLARE_ESTACK(s); VERBOSE(DEBUG_SHCOPY, ("[pid=%T] size_object %p\n", mypid, obj)); @@ -121,15 +119,30 @@ Uint size_object_x(Eterm obj, erts_literal_area_t *litopt) } hdr = *ptr; ASSERT(is_header(hdr)); - switch (hdr & _TAG_HEADER_MASK) { - case ARITYVAL_SUBTAG: - arity = header_arity(hdr); - if (arity == 0) { /* Empty tuple -- unusual. */ - ASSERT(!litopt && - erts_is_literal(obj,ptr) && - obj == ERTS_GLOBAL_LIT_EMPTY_TUPLE); - /* - The empty tuple is always a global literal + switch (hdr & _TAG_HEADER_MASK) { + case ARITYVAL_SUBTAG: + arity = header_arity(hdr); + if (arity == 0) { /* Empty tuple -- unusual. */ + if (!( !litopt + && erts_is_literal(obj,ptr) + && obj == ERTS_GLOBAL_LIT_EMPTY_TUPLE)) { + if (erts_mmap_record_option_replay_enabled()) { + erts_fprintf(stderr, + "replay_copy_debug: arity0 tuple obj=%p ptr=%p hdr=%p lit=%d global_empty=%p litopt=%p pid=%T\n", + (void *)(UWord) obj, + (void *) ptr, + (void *)(UWord) hdr, + erts_is_literal(obj, ptr), + (void *)(UWord) ERTS_GLOBAL_LIT_EMPTY_TUPLE, + (void *) litopt, + mypid); + } + } + ASSERT(!litopt && + erts_is_literal(obj,ptr) && + obj == ERTS_GLOBAL_LIT_EMPTY_TUPLE); + /* + The empty tuple is always a global literal constant so it does not take up any extra space. */ @@ -2080,6 +2093,207 @@ void erts_move_multi_frags(Eterm** hpp, ErlOffHeap* off_heap, ErlHeapFragment* f } } +/* ====================================================================== * + * Replay debug helper: walk an Eterm and dump every reachable subterm + * with classification of each pointer (ARENA / LITERAL / HEAP) and its + * header word. Useful when a corrupted term is about to be deep-copied + * (e.g. into ETS) so we can pinpoint which boxed pointer is stale. + * + * The walker is intentionally tolerant: it will not abort on bad headers + * (it just prints them) so the dump completes even when the input term + * is malformed. + * ====================================================================== */ + +static const char * +replay_classify_ptr(const Eterm *ptr) +{ + if (ptr == NULL) { + return "NULL"; + } + if (erts_mmap_record_arena_contains(ptr)) { + return "ARENA"; + } + if (erts_is_in_literal_range((void *) ptr)) { + return "LITERAL"; + } + return "HEAP"; +} + +static const char * +replay_subtag_name(Eterm hdr) +{ + if (!is_header(hdr)) { + return "NOT-HEADER"; + } + switch (hdr & _TAG_HEADER_MASK) { + case ARITYVAL_SUBTAG: return "TUPLE"; + case POS_BIG_SUBTAG: return "POS_BIG"; + case NEG_BIG_SUBTAG: return "NEG_BIG"; + case REF_SUBTAG: return "REF"; + case FUN_SUBTAG: return "FUN"; + case FLOAT_SUBTAG: return "FLOAT"; + case BIN_REF_SUBTAG: return "BIN_REF"; + case MAP_SUBTAG: return "MAP"; + case EXTERNAL_PID_SUBTAG: return "EXT_PID"; + case EXTERNAL_PORT_SUBTAG: return "EXT_PORT"; + case EXTERNAL_REF_SUBTAG: return "EXT_REF"; + case HEAP_BITS_SUBTAG: return "HEAP_BITS"; + case SUB_BITS_SUBTAG: return "SUB_BITS"; + default: return "UNKNOWN"; + } +} + +void +erts_replay_dump_term_to_stderr(Eterm root, const char *ctx, Eterm pid) +{ + DECLARE_ESTACK(s); + int slot = 0; + const int max_slots = 256; + const char *base = NULL; + UWord arena_size = 0; + + erts_mmap_record_arena_bounds(&base, &arena_size); + erts_fprintf(stderr, + "replay_term_dump BEGIN ctx=%s pid=%T root_raw=%p arena=[%p..%p)\n", + ctx, pid, (void *)(UWord) root, + (void *) base, + (void *) (base ? base + arena_size : NULL)); + + ESTACK_PUSH(s, root); + while (!ESTACK_ISEMPTY(s)) { + Eterm obj; + if (slot >= max_slots) { + erts_fprintf(stderr, " ... (truncated at %d slots)\n", slot); + break; + } + obj = ESTACK_POP(s); + slot++; + + switch (primary_tag(obj)) { + case TAG_PRIMARY_IMMED1: + erts_fprintf(stderr, + " [%d] IMM raw=%p val=%T\n", + slot, (void *)(UWord) obj, obj); + break; + case TAG_PRIMARY_LIST: { + Eterm *ptr = list_val(obj); + const char *cls = replay_classify_ptr(ptr); + erts_fprintf(stderr, + " [%d] LIST raw=%p ptr=%p cls=%s", + slot, (void *)(UWord) obj, (void *) ptr, cls); + if (ptr == NULL) { + erts_fprintf(stderr, " !!!NULL_LIST_PTR\n"); + break; + } + erts_fprintf(stderr, " car=%p cdr=%p\n", + (void *)(UWord) ptr[0], + (void *)(UWord) ptr[1]); + ESTACK_PUSH(s, ptr[1]); + ESTACK_PUSH(s, ptr[0]); + break; + } + case TAG_PRIMARY_BOXED: { + Eterm *ptr = boxed_val(obj); + Eterm hdr; + const char *cls = replay_classify_ptr(ptr); + if (ptr == NULL) { + erts_fprintf(stderr, + " [%d] BOX raw=%p ptr=NULL !!!NULL_BOX_PTR\n", + slot, (void *)(UWord) obj); + break; + } + hdr = *ptr; + erts_fprintf(stderr, + " [%d] BOX raw=%p ptr=%p cls=%s hdr=%p kind=%s", + slot, (void *)(UWord) obj, (void *) ptr, cls, + (void *)(UWord) hdr, replay_subtag_name(hdr)); + if (!is_header(hdr)) { + erts_fprintf(stderr, " !!!INVALID_HEADER\n"); + break; + } + switch (hdr & _TAG_HEADER_MASK) { + case ARITYVAL_SUBTAG: { + int arity = header_arity(hdr); + int i; + erts_fprintf(stderr, " arity=%d\n", arity); + if (arity == 0) { + if (obj != ERTS_GLOBAL_LIT_EMPTY_TUPLE) { + erts_fprintf(stderr, + " !!!arity-0 tuple is NOT the global " + "empty literal (global=%p)\n", + (void *)(UWord) ERTS_GLOBAL_LIT_EMPTY_TUPLE); + } + } + for (i = arity; i >= 1; i--) { + ESTACK_PUSH(s, ptr[i]); + } + break; + } + case MAP_SUBTAG: + switch (MAP_HEADER_TYPE(hdr)) { + case MAP_HEADER_TAG_FLATMAP_HEAD: { + flatmap_t *mp = (flatmap_t *) flatmap_val(obj); + Uint n = flatmap_get_size(mp); + Eterm *kvs = (Eterm *) mp + 2; + Uint i; + erts_fprintf(stderr, " flatmap_size=%bpu keys=%p\n", + (UWord) n, (void *)(UWord) mp->keys); + ESTACK_PUSH(s, mp->keys); + for (i = 0; i < n; i++) { + ESTACK_PUSH(s, kvs[n + i]); /* values */ + } + break; + } + case MAP_HEADER_TAG_HAMT_HEAD_BITMAP: + case MAP_HEADER_TAG_HAMT_HEAD_ARRAY: + case MAP_HEADER_TAG_HAMT_NODE_BITMAP: { + Eterm *head = hashmap_val(obj); + Uint sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); + Uint hdr_arity = header_arity(hdr); + Uint i; + erts_fprintf(stderr, " hashmap_size=%bpu\n", (UWord) sz); + head += 1 + hdr_arity; + for (i = 0; i < sz; i++) { + ESTACK_PUSH(s, head[i]); + } + break; + } + default: + erts_fprintf(stderr, " bad-map-type\n"); + break; + } + break; + case FUN_SUBTAG: { + const ErlFunThing *funp = (ErlFunThing *) fun_val(obj); + int n = fun_num_free(funp); + int i; + erts_fprintf(stderr, " fun_free=%d\n", n); + for (i = 0; i < n; i++) { + ESTACK_PUSH(s, funp->env[i]); + } + break; + } + default: + erts_fprintf(stderr, " arityval=%bpu (no recursion)\n", + (UWord) thing_arityval(hdr)); + break; + } + break; + } + case TAG_PRIMARY_HEADER: + erts_fprintf(stderr, + " [%d] HDR raw=%p (unexpected on stack)\n", + slot, (void *)(UWord) obj); + break; + } + } + + erts_fprintf(stderr, + "replay_term_dump END ctx=%s pid=%T slots=%d\n", + ctx, pid, slot); + DESTROY_ESTACK(s); +} + static void move_one_frag(Eterm** hpp, ErlHeapFragment* frag, ErlOffHeap* off_heap, int literals) { diff --git a/erts/emulator/beam/emu/beam_emu.c b/erts/emulator/beam/emu/beam_emu.c index a473b89fbd3b..31fc5a3f3741 100644 --- a/erts/emulator/beam/emu/beam_emu.c +++ b/erts/emulator/beam/emu/beam_emu.c @@ -44,6 +44,7 @@ #include "dtrace-wrapper.h" #include "erl_proc_sig_queue.h" #include "beam_common.h" +#include "erl_mmap.h" /* #define HARDDEBUG 1 */ @@ -642,7 +643,6 @@ static void install_bifs(void) { ERTS_ASSERT(entry->arity <= MAX_BIF_ARITY); ep = erts_export_put(entry->module, entry->name, entry->arity); - ep->info.u.op = BeamOpCodeAddr(op_i_func_info_IaaI); ep->info.mfa.module = entry->module; ep->info.mfa.function = entry->name; @@ -664,6 +664,22 @@ static void install_bifs(void) { } } +static void replay_install_bifs(void) { + int i; + + for (i = 0; i < BIF_SIZE; i++) { + BifEntry *entry; + + entry = &bif_table[i]; + + ERTS_ASSERT(entry->arity <= MAX_BIF_ARITY); + + erts_init_trap_export(BIF_TRAP_EXPORT(i), + entry->module, entry->name, entry->arity, + entry->f); + } +} + /* * One-time initialization of emulator. Does not need to be * in process_main(). @@ -712,7 +728,11 @@ init_emulator_finish(void) beam_call_trace_return_[0] = BeamOpCodeAddr(op_i_call_trace_return); beam_call_trace_return = (ErtsCodePtr)&beam_call_trace_return_[0]; - install_bifs(); + if (erts_mmap_record_option_replay_enabled()) { + replay_install_bifs(); + } else { + install_bifs(); + } } int diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c index f5bc19fdbb72..f97645775fa2 100644 --- a/erts/emulator/beam/erl_alloc.c +++ b/erts/emulator/beam/erl_alloc.c @@ -40,12 +40,17 @@ #include "erl_db.h" #include "erl_binary.h" #include "erl_bits.h" +#include "index.h" #include "erl_mseg.h" #include "erl_monitor_link.h" #include "erl_hl_timer.h" #include "erl_cpu_topology.h" #include "erl_thr_queue.h" #include "erl_nfunc_sched.h" +#include +#include +#include +#include #if defined(ERTS_ALC_T_DRV_SEL_D_STATE) || defined(ERTS_ALC_T_DRV_EV_D_STATE) #include "erl_check_io.h" #endif @@ -98,6 +103,274 @@ static Uint install_debug_functions(void); #endif static int lock_all_physical_memory = 0; +static int erts_alloc_trace_fd = -1; +static int erts_alloc_struct_csv_fd = -1; +static int erts_alloc_struct_snapshot_registered = 0; + +#define ERTS_ALLOC_STRUCT_SNAPSHOT_MAX 32 +typedef struct { + char tag[64]; + void *ptr; + UWord size; +} ErtsAllocStructSnapshot; + +static ErtsAllocStructSnapshot + erts_alloc_struct_snapshots[ERTS_ALLOC_STRUCT_SNAPSHOT_MAX]; +static int erts_alloc_struct_snapshot_count = 0; +static char erts_alloc_struct_snapshot_dir[512] = {0}; + +static int +erts_alloc_struct_ensure_dir_path(const char *dir) +{ + char path[1024]; + size_t i, len; + + if (!dir || dir[0] == '\0') { + return -1; + } + + len = strlen(dir); + if (len >= sizeof(path)) { + return -1; + } + + memcpy(path, dir, len + 1); + + for (i = 1; i < len; i++) { + if (path[i] == '/' || path[i] == '\\') { + char saved = path[i]; + path[i] = '\0'; + if (path[i - 1] != ':' + && mkdir(path, 0777) < 0 + && errno != EEXIST) { + return -1; + } + path[i] = saved; + } + } + + if (mkdir(path, 0777) < 0 && errno != EEXIST) { + return -1; + } + + return 0; +} + +static int +erts_alloc_struct_should_snapshot(const char *tag) +{ + return tag + && (strcmp(tag, "atom_table.index_root") == 0 + || strcmp(tag, "module_table.index_root") == 0 + || strcmp(tag, "export_table.index_root") == 0 + || strcmp(tag, "fun_table.index_root") == 0 + || strcmp(tag, "code_ix.root") == 0 + || strcmp(tag, "beam_catches.bccix") == 0 + || strcmp(tag, "global_literals.empty_tuple") == 0 + || strcmp(tag, "global_literals.chunk_head") == 0); +} + +static void +erts_alloc_struct_register_snapshot(const char *tag, void *ptr, UWord size) +{ + ErtsAllocStructSnapshot *snap; + if (!erts_alloc_struct_should_snapshot(tag)) { + return; + } + if (erts_alloc_struct_snapshot_count >= ERTS_ALLOC_STRUCT_SNAPSHOT_MAX) { + return; + } + snap = &erts_alloc_struct_snapshots[erts_alloc_struct_snapshot_count++]; + erts_snprintf(snap->tag, sizeof(snap->tag), "%s", tag); + snap->ptr = ptr; + snap->size = size; +} + +static void +erts_alloc_struct_dump_snapshots_on_exit(void) +{ + int i, fd, mfd; + char line[256]; + char path[1024]; + int len; + + /* + * Snapshot dumps are replay inputs and must only be produced by an + * explicit -record run, never by plain/normal execution. + */ + if (!erts_mmap_record_option_record_enabled()) { + return; + } + + if (erts_alloc_struct_snapshot_count <= 0 || erts_alloc_struct_snapshot_dir[0] == '\0') { + return; + } + + /* + * Never rewrite the struct-root-dumps while replaying: the directory + * is replay INPUT. Overwriting it during replay (and then crashing + * mid-execution) would corrupt the snapshot so subsequent replays + * see a different, partially-modified state. + */ + if (erts_mmap_record_option_replay_enabled()) { + return; + } + + if (erts_alloc_struct_ensure_dir_path(erts_alloc_struct_snapshot_dir) != 0) { + return; + } + + len = erts_snprintf(path, sizeof(path), "%s/roots.csv", erts_alloc_struct_snapshot_dir); + if (len <= 0 || len >= (int) sizeof(path)) { + return; + } + mfd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0666); + if (mfd >= 0) { + erts_silence_warn_unused_result(write(mfd, "index,tag,ptr,size,file\n", 24)); + } + + for (i = 0; i < erts_alloc_struct_snapshot_count; i++) { + ErtsAllocStructSnapshot *snap = &erts_alloc_struct_snapshots[i]; + const char *name = snap->tag; + len = erts_snprintf(path, sizeof(path), "%s/%02d.%s.bin", + erts_alloc_struct_snapshot_dir, i, name); + if (len <= 0 || len >= (int) sizeof(path)) { + continue; + } + fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0666); + if (fd >= 0) { + if (snap->ptr && snap->size > 0) { + erts_silence_warn_unused_result(write(fd, snap->ptr, (size_t) snap->size)); + } + close(fd); + } + if (mfd >= 0) { + len = erts_snprintf(line, sizeof(line), "%d,%s,%p,%lu,%02d.%s.bin\n", + i, snap->tag, snap->ptr, (unsigned long) snap->size, i, name); + if (len > 0) { + if (len >= (int) sizeof(line)) { + len = (int) sizeof(line) - 1; + } + erts_silence_warn_unused_result(write(mfd, line, (size_t) len)); + } + } + } + if (mfd >= 0) { + close(mfd); + } + +} + +static ERTS_INLINE void +erts_alloc_trace_write(const char *line, int len) +{ + if (erts_alloc_trace_fd >= 0 && len > 0) { + ssize_t wres = write(erts_alloc_trace_fd, line, (size_t) len); + (void) wres; + } +} + +static ERTS_INLINE void +erts_alloc_struct_csv_write(const char *line, int len) +{ + if (erts_alloc_struct_csv_fd >= 0 && len > 0) { + ssize_t wres = write(erts_alloc_struct_csv_fd, line, (size_t) len); + (void) wres; + } +} + +void +erts_alloc_trace_erts_alloc_call(ErtsAlcType_t type, Uint size, void *res) +{ + char line[160]; + int len; + if (erts_alloc_trace_fd < 0 && erts_alloc_struct_csv_fd < 0) { + return; + } + len = erts_snprintf(line, sizeof(line), + "ALLOC type=%u size=%lu ptr=%p\n", + (unsigned int) type, + (unsigned long) size, + res); + if (len < 0) { + return; + } + if (len >= (int) sizeof(line)) { + len = (int) sizeof(line) - 1; + } + erts_alloc_trace_write(line, len); +} + +void +erts_alloc_trace_note_alloc(const char *tag, void *ptr, UWord size) +{ + char line[256]; + char csv_line[192]; + int len; + int csv_len; + const char *safe_tag; + + safe_tag = tag ? tag : "unknown"; + + if (erts_alloc_trace_fd >= 0) { + len = erts_snprintf(line, sizeof(line), + "STRUCT_ALLOC tag=%s size=%lu ptr=%p\n", + safe_tag, + (unsigned long) size, + ptr); + if (len < 0) { + return; + } + if (len >= (int) sizeof(line)) { + len = (int) sizeof(line) - 1; + } + erts_alloc_trace_write(line, len); + } + if (erts_alloc_struct_csv_fd >= 0) { + csv_len = erts_snprintf(csv_line, sizeof(csv_line), + "%s,%p\n", + safe_tag, + ptr); + if (csv_len < 0) { + return; + } + if (csv_len >= (int) sizeof(csv_line)) { + csv_len = (int) sizeof(csv_line) - 1; + } + erts_alloc_struct_csv_write(csv_line, csv_len); + } + erts_alloc_struct_register_snapshot(safe_tag, ptr, size); +} + +void +erts_alloc_trace_carrier_create(const char *alloc_name, + int alloc_ix, + UWord carrier_size, + int is_mseg, + int is_sbc, + void *carrier_ptr) +{ + char line[256]; + int len; + if (erts_alloc_trace_fd < 0) { + return; + } + len = erts_snprintf(line, sizeof(line), + "CARRIER_CREATE alloc=%s ix=%d size=%lu kind=%s source=%s ptr=%p\n", + alloc_name ? alloc_name : "unknown", + alloc_ix, + (unsigned long) carrier_size, + is_sbc ? "sbc" : "mbc", + is_mseg ? "mseg" : "sys_alloc", + carrier_ptr); + if (len < 0) { + return; + } + if (len >= (int) sizeof(line)) { + len = (int) sizeof(line) - 1; + } + erts_alloc_trace_write(line, len); +} ErtsAllocatorFunctions_t ERTS_WRITE_UNLIKELY(erts_allctrs[ERTS_ALC_A_MAX+1]); ErtsAllocatorInfo_t erts_allctrs_info[ERTS_ALC_A_MAX+1]; @@ -664,6 +937,39 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop) erts_sys_alloc_init(); erts_init_utils_mem(); + { + const char *trace_path = getenv("ERTS_ALLOC_TRACE_FILE"); + const char *csv_path = getenv("ERTS_ALLOC_STRUCT_CSV_FILE"); + const char *rr_dir = erts_mmap_record_option_dir(); + if (trace_path && trace_path[0] != '\0') { + erts_alloc_trace_fd = open(trace_path, O_WRONLY|O_CREAT|O_APPEND, 0666); + } + if (csv_path && csv_path[0] != '\0') { + erts_alloc_struct_csv_fd = open(csv_path, O_WRONLY|O_CREAT|O_APPEND, 0666); + } + if (rr_dir && rr_dir[0] != '\0') { + erts_snprintf(erts_alloc_struct_snapshot_dir, + sizeof(erts_alloc_struct_snapshot_dir), + "%s/struct-root-dumps", + rr_dir); + } else { + erts_snprintf(erts_alloc_struct_snapshot_dir, + sizeof(erts_alloc_struct_snapshot_dir), + "_mmap-records/struct-root-dumps"); + } + if (!erts_alloc_struct_snapshot_registered) { + if (atexit(erts_alloc_struct_dump_snapshots_on_exit) == 0) { + erts_alloc_struct_snapshot_registered = 1; + } + /* + * Also dump the literal super-carrier sidecar on exit when + * recording. Safe to register unconditionally: the dump + * function is a no-op unless record is enabled. + */ + (void) atexit(erts_mmap_record_literal_dump_on_exit); + } + } + set_default_sl_alloc_opts(&init.sl_alloc); set_default_std_alloc_opts(&init.std_alloc); set_default_ll_alloc_opts(&init.ll_alloc); @@ -748,7 +1054,32 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop) init.mseg.nos = erts_no_schedulers; init.mseg.ndai = init.dirty_alloc_insts; erts_mseg_init(&init.mseg); + + /* + * In replay mode the paired -record run dumped every live region of the + * literal super-carrier to a sidecar file. Restore those bytes NOW -- + * after erts_mseg_init() (which has reserved the 1 GB virtual range via + * erts_mmap_init(&erts_literal_mmapper, ...)) but BEFORE the literal + * allocator creates its main carrier (which happens in + * set_au_allocator(ERTS_ALC_A_LITERAL, ...) below). If we restored after + * that point, our memcpy would clobber the allocator's fresh carrier + * header and free-tree nodes, corrupting state. + */ +#if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION) + if (erts_mmap_record_option_replay_enabled()) { + const char *replay_root_dbg = getenv("ERTS_REPLAY_ROOT_DEBUG"); + if (!erts_mmap_record_literal_restore(&erts_literal_mmapper)) { + erts_fprintf(stderr, + "failed to restore literal super-carrier " + "snapshot; replay will likely fail\n"); + } else if (replay_root_dbg && replay_root_dbg[0] != '\0' + && replay_root_dbg[0] != '0') { + erts_fprintf(stderr, + "restored literal super-carrier snapshot\n"); + } + } #endif +#endif /* HAVE_ERTS_MSEG */ erts_alcu_init(&init.alloc_util); erts_afalc_init(); diff --git a/erts/emulator/beam/erl_alloc.h b/erts/emulator/beam/erl_alloc.h index 3838527ab773..e09ae3a9b3f8 100644 --- a/erts/emulator/beam/erl_alloc.h +++ b/erts/emulator/beam/erl_alloc.h @@ -188,6 +188,10 @@ __decl_noreturn void erts_alc_fatal_error(int,int,ErtsAlcType_t,...) Eterm erts_alloc_set_dyn_param(struct process*, Eterm); +#if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION) +int erts_global_literal_is_in_range(void *ptr); +#endif + #undef ERTS_HAVE_IS_IN_LITERAL_RANGE #if defined(ARCH_32) || defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION) # define ERTS_HAVE_IS_IN_LITERAL_RANGE @@ -230,6 +234,14 @@ int erts_get_thr_alloc_ix(void); #endif /* #if !ERTS_ALC_DO_INLINE */ void *erts_alloc_permanent_cache_aligned(ErtsAlcType_t type, Uint size) ERTS_ATTR_MALLOC_US(2); +void erts_alloc_trace_erts_alloc_call(ErtsAlcType_t type, Uint size, void *res); +void erts_alloc_trace_note_alloc(const char *tag, void *ptr, UWord size); +void erts_alloc_trace_carrier_create(const char *alloc_name, + int alloc_ix, + UWord carrier_size, + int is_mseg, + int is_sbc, + void *carrier_ptr); #ifndef ERTS_CACHE_LINE_SIZE /* Assumed cache line size */ @@ -250,6 +262,7 @@ void *erts_alloc(ErtsAlcType_t type, Uint size) size); if (!res) erts_alloc_n_enomem(ERTS_ALC_T2N(type), size); + erts_alloc_trace_erts_alloc_call(type, size, res); ERTS_MSACC_POP_STATE_X(); return res; } @@ -339,7 +352,8 @@ int erts_is_in_literal_range(void* ptr) #elif defined(ARCH_64) extern char* erts_literals_start; extern UWord erts_literals_size; - return ErtsInArea(ptr, erts_literals_start, erts_literals_size); + return ErtsInArea(ptr, erts_literals_start, erts_literals_size) + || erts_global_literal_is_in_range(ptr); #else # error No ARCH_xx #endif diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c index 8c3dd7b46069..4f40266e22ed 100644 --- a/erts/emulator/beam/erl_alloc_util.c +++ b/erts/emulator/beam/erl_alloc_util.c @@ -35,7 +35,7 @@ /* * Alloc util will enforce 8 byte alignment if sys_alloc and mseg_alloc at * least enforces 8 byte alignment. If sys_alloc only enforces 4 byte - * alignment then alloc util will do so too. + * alignment then alloc util will do so too. */ #ifdef HAVE_CONFIG_H @@ -114,6 +114,13 @@ static int allow_sys_alloc_carriers; #define ERTS_ALC_CC_GIGA_VAL(CC) ((CC) / ONE_GIGA) #define ERTS_ALC_CC_VAL(CC) ((CC) % ONE_GIGA) +extern void erts_alloc_trace_carrier_create(const char *alloc_name, + int alloc_ix, + UWord carrier_size, + int is_mseg, + int is_sbc, + void *carrier_ptr); + #define INC_CC(CC) ((CC)++) #define DEC_CC(CC) ((CC)--) @@ -331,7 +338,7 @@ typedef struct { #define IS_PREV_BLK_ALLOCED(B) \ (!IS_PREV_BLK_FREE((B))) #define IS_ALLOCED_BLK(B) \ - (!IS_FREE_BLK((B))) + (!IS_FREE_BLK((B))) #define IS_LAST_BLK(B) \ ((B)->bhdr & LAST_BLK_HDR_FLG) #define IS_NOT_LAST_BLK(B) \ @@ -1057,6 +1064,16 @@ erts_alcu_mmapper_mseg_alloc(Allctr_t *allctr, Uint *size_p, Uint flags) res = erts_mmap(allctr->mseg_mmapper, mmap_flags, &size); *size_p = (Uint)size; INC_CC(allctr->calls.mseg_alloc); + /* + * Record the literal super-carrier allocation so it can be dumped and + * restored across a record/replay cycle. This is a no-op unless the + * record option is enabled AND this mmapper is the literal one; we + * key on alloc_no because only ERTS_ALC_A_LITERAL uses this path. + */ + if (res && allctr->alloc_no == ERTS_ALC_A_LITERAL + && erts_mmap_record_option_record_enabled()) { + erts_mmap_record_literal_alloc(res, (UWord) *size_p); + } return res; } @@ -1069,6 +1086,11 @@ erts_alcu_mmapper_mseg_realloc(Allctr_t *allctr, void *seg, res = erts_mremap(allctr->mseg_mmapper, ERTS_MSEG_FLG_NONE, seg, old_size, &new_size); *new_size_p = (Uint) new_size; INC_CC(allctr->calls.mseg_realloc); + if (allctr->alloc_no == ERTS_ALC_A_LITERAL + && erts_mmap_record_option_record_enabled()) { + erts_mmap_record_literal_realloc(seg, (UWord) old_size, + res, (UWord) *new_size_p); + } return res; } @@ -1080,6 +1102,10 @@ erts_alcu_mmapper_mseg_dealloc(Allctr_t *allctr, void *seg, Uint size, if (flags & ERTS_MSEG_FLG_2POW) mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED; + if (allctr->alloc_no == ERTS_ALC_A_LITERAL + && erts_mmap_record_option_record_enabled()) { + erts_mmap_record_literal_free(seg, (UWord) size); + } erts_munmap(allctr->mseg_mmapper, mmap_flags, seg, (UWord)size); INC_CC(allctr->calls.mseg_dealloc); } @@ -1730,7 +1756,7 @@ get_pref_allctr(void *extra) * concurrent threads may be updating adjacent blocks. * We rely on getting a consistent result (without atomic op) when reading * the block header word even if a concurrent thread is updating - * the "PREV_FREE" flag bit. + * the "PREV_FREE" flag bit. */ static ERTS_INLINE Allctr_t* get_used_allctr(Allctr_t *pref_allctr, int pref_lock, void *p, UWord *sizep, @@ -1746,7 +1772,7 @@ get_used_allctr(Allctr_t *pref_allctr, int pref_lock, void *p, UWord *sizep, if (IS_SBC_BLK(blk)) { crr = BLK_TO_SBC(blk); if (sizep) - *sizep = SBC_BLK_SZ(blk) - ABLK_HDR_SZ; + *sizep = SBC_BLK_SZ(blk) - ABLK_HDR_SZ; iallctr = erts_atomic_read_dirty(&crr->allctr); } else { @@ -2182,7 +2208,7 @@ handle_delayed_dealloc(Allctr_t *allctr, ErtsAllctrDDBlock_t *dd_block; ErtsAlcType_t type; Uint32 flags; - + dd_block = (ErtsAllctrDDBlock_t*)ptr; flags = dd_block->flags; type = dd_block->type; @@ -2730,7 +2756,7 @@ mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp is_last_blk = IS_LAST_BLK(blk); if (IS_PREV_BLK_FREE(blk)) { - ASSERT(!is_first_blk); + ASSERT(!is_first_blk); /* Coalesce with previous block... */ blk = PREV_BLK(blk); (*allctr->unlink_free_block)(allctr, blk); @@ -2763,7 +2789,7 @@ mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp SET_MBC_FBLK_SZ(blk, blk_sz); is_last_blk = IS_LAST_BLK(nxt_blk); - if (is_last_blk) + if (is_last_blk) SET_LAST_BLK(blk); else { SET_NOT_LAST_BLK(blk); @@ -2956,7 +2982,7 @@ mbc_realloc(Allctr_t *allctr, ErtsAlcType_t type, void *p, Uint size, ASSERT(blk_sz >= allctr->min_block_size); ASSERT(blk_sz >= size + ABLK_HDR_SZ); ASSERT(IS_MBC_BLK(blk)); - + ASSERT(IS_FREE_BLK(nxt_blk)); ASSERT(IS_PREV_BLK_ALLOCED(nxt_blk)); ASSERT(nxt_blk_sz == MBC_BLK_SZ(nxt_blk)); @@ -2967,7 +2993,7 @@ mbc_realloc(Allctr_t *allctr, ErtsAlcType_t type, void *p, Uint size, ASSERT(is_last_blk || nxt_blk == PREV_BLK(NXT_BLK(nxt_blk))); ASSERT(is_last_blk || IS_PREV_BLK_FREE(NXT_BLK(nxt_blk))); ASSERT(FBLK_TO_MBC(nxt_blk) == crr); - + HARD_CHECK_BLK_CARRIER(allctr, blk); check_abandon_carrier(allctr, nxt_blk, NULL); @@ -3141,7 +3167,7 @@ mbc_realloc(Allctr_t *allctr, ErtsAlcType_t type, void *p, Uint size, (*allctr->unlink_free_block)(allctr, new_blk); /* prev */ - if (is_last_blk) + if (is_last_blk) new_blk_flgs |= LAST_BLK_HDR_FLG; else { nxt_blk = BLK_AFTER(blk, old_blk_sz); @@ -4087,7 +4113,7 @@ cpool_read_stat(Allctr_t *allctr, int alloc_no, #endif static void CHECK_1BLK_CARRIER(Allctr_t* A, int SBC, int MSEGED, Carrier_t* C, - UWord CSZ, Block_t* B, UWord BSZ) + UWord CSZ, Block_t* B, UWord BSZ) { ASSERT(IS_LAST_BLK((B))); ASSERT((CSZ) == CARRIER_SZ((C))); @@ -4271,7 +4297,7 @@ create_carrier(Allctr_t *allctr, Uint umem_sz, UWord flags) : SYS_ALLOC_CARRIER_CEILING(bcrr_sz)); crr = (Carrier_t *) allctr->sys_alloc(allctr, &crr_sz, flags & CFLG_MBC); - + if (!crr) { if (crr_sz > UNIT_CEILING(bcrr_sz)) { crr_sz = UNIT_CEILING(bcrr_sz); @@ -4348,6 +4374,45 @@ create_carrier(Allctr_t *allctr, Uint umem_sz, UWord flags) } #endif + if ( +#if HAVE_ERTS_MSEG + IS_MSEG_CARRIER(crr) +#else + 0 +#endif + ) { + Uint mseg_no = allctr->mbcs.carriers[ERTS_CRR_ALLOC_MSEG].no + + allctr->sbcs.carriers[ERTS_CRR_ALLOC_MSEG].no; + char main_name[160]; + if (mseg_no == 1) { + erts_snprintf(main_name, sizeof(main_name), + "initial_%s_%u_carrier_%u", + ERTS_ALC_A2AD(allctr->alloc_no), + (unsigned int) allctr->ix, + (unsigned int) mseg_no); + } + else { + erts_snprintf(main_name, sizeof(main_name), + "%s_%u_carrier_%u", + ERTS_ALC_A2AD(allctr->alloc_no), + (unsigned int) allctr->ix, + (unsigned int) mseg_no); + } + main_name[sizeof(main_name) - 1] = '\0'; + (void) erts_mmap_name_mapping_global(crr, crr_sz, main_name); + } + + erts_alloc_trace_carrier_create(ERTS_ALC_A2AD(allctr->alloc_no), + allctr->ix, + crr_sz, +#if HAVE_ERTS_MSEG + IS_MSEG_CARRIER(crr) ? 1 : 0, +#else + 0, +#endif + (flags & CFLG_SBC) ? 1 : 0, + crr); + DEBUG_SAVE_ALIGNMENT(crr); return blk; } @@ -4667,7 +4732,7 @@ static struct { #endif Eterm At_sign; - + #ifdef DEBUG Eterm end_of_atoms; #endif @@ -4777,7 +4842,7 @@ init_atoms(Allctr_t *allctr) #endif am.At_sign = am_atom_put("@", 1); - + #ifdef DEBUG for (atom = (Eterm *) &am; atom < &am.end_of_atoms; atom++) { ASSERT(*atom != THE_NON_VALUE); @@ -4804,7 +4869,7 @@ init_atoms(Allctr_t *allctr) alloc_num_atoms[ix] = am_atom_put(name, len); } } - + if (allctr && !allctr->atoms_initialized) { make_name_atoms(allctr); @@ -5699,7 +5764,7 @@ Eterm erts_alcu_au_info_options(fmtfn_t *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp) { - Eterm res = THE_NON_VALUE; + Eterm res = THE_NON_VALUE; if (print_to_p) { @@ -5753,7 +5818,7 @@ erts_alcu_info_options(Allctr_t *allctr, erts_mtx_lock(&allctr->mutex); } res = info_options(allctr, print_to_p, print_to_arg, hpp, szp); - if (allctr->thread_safe) { + if (allctr->thread_safe) { erts_mtx_unlock(&allctr->mutex); } return res; @@ -6286,7 +6351,7 @@ do_erts_alcu_realloc(ErtsAlcType_t type, #endif INC_CC(allctr->calls.this_realloc); - + blk = UMEM2BLK(p); if (size < allctr->sbc_threshold) { @@ -6318,7 +6383,7 @@ do_erts_alcu_realloc(ErtsAlcType_t type, goto do_carrier_resize; diff_sz_val >>= 7; } - + if (100*diff_sz_val < allctr->sbc_move_threshold*crr_sz_val) /* Data won't be copied into a new carrier... */ goto do_carrier_resize; @@ -6899,6 +6964,14 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) allctr->try_set_dyn_param = &erts_alcu_try_set_dyn_param; #if HAVE_ERTS_MSEG + { + int force_mseg = (allctr->alloc_no == ERTS_ALC_A_LONG_LIVED + || (allctr->alloc_no == ERTS_ALC_A_BINARY + && erts_mmap_record_option_record_enabled())); + int force_sys = (allctr->alloc_no == ERTS_ALC_A_SYSTEM + || allctr->alloc_no == ERTS_ALC_A_TEMPORARY + || allctr->alloc_no == ERTS_ALC_A_DRIVER); + if (init->mseg_alloc) { ASSERT(init->mseg_realloc && init->mseg_dealloc); allctr->mseg_alloc = init->mseg_alloc; @@ -6922,16 +6995,30 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) allctr->crr_set_flgs = CFLG_FORCE_SYS_ALLOC; allctr->crr_clr_flgs = CFLG_FORCE_MSEG; } + else if (force_mseg) { + allctr->crr_set_flgs = CFLG_FORCE_MSEG; + allctr->crr_clr_flgs = CFLG_FORCE_SYS_ALLOC; + } + else if (force_sys) { + allctr->crr_set_flgs = CFLG_FORCE_SYS_ALLOC; + allctr->crr_clr_flgs = CFLG_FORCE_MSEG; + } + } #endif if (allctr->main_carrier_size && (allctr->ix != 0 || init->mmbc0)) { Block_t *blk; + Uint create_flags; + + create_flags = ((allctr->alloc_no == ERTS_ALC_A_LONG_LIVED + || (allctr->alloc_no == ERTS_ALC_A_BINARY + && erts_mmap_record_option_record_enabled())) + ? CFLG_FORCE_MSEG + : CFLG_FORCE_SYS_ALLOC); blk = create_carrier(allctr, allctr->main_carrier_size, - (ERTS_SUPER_ALIGNED_MSEG_ONLY - ? CFLG_FORCE_MSEG : CFLG_FORCE_SYS_ALLOC) - | CFLG_MBC + create_flags | CFLG_MBC | CFLG_FORCE_SIZE | CFLG_NO_CPOOL | CFLG_MAIN_CARRIER); @@ -7056,7 +7143,7 @@ erts_alcu_init(AlcUInit_t *init) for (i = ERTS_ALC_A_MIN; i <= ERTS_ALC_A_MAX; i++) ASSERT(allocator_char_str[i]); #endif - + erts_mtx_init(&init_atoms_mtx, "alcu_init_atoms", NIL, ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_ALLOCATOR); @@ -7328,7 +7415,7 @@ static int blockscan_sweep_sbcs(blockscan_t *state) if (blockscan_clist_yielding(state)) { state->next_op = state->current_op; } - + blockscan_unlock_helper(state); return 1; @@ -8364,7 +8451,7 @@ erts_alcu_test(UWord op, UWord a1, UWord a2) case 0x013: return (UWord) ((Allctr_t *) a1)->sbc_list.last; case 0x014: return (UWord) ((Carrier_t *) a1)->next; case 0x015: return (UWord) ((Carrier_t *) a1)->prev; - case 0x016: return (UWord) ABLK_HDR_SZ; + case 0x016: return (UWord) ABLK_HDR_SZ; case 0x017: return (UWord) ((Allctr_t *) a1)->min_block_size; case 0x018: return (UWord) NXT_BLK((Block_t *) a1); case 0x019: return (UWord) PREV_BLK((Block_t *) a1); @@ -8547,7 +8634,7 @@ check_blk_carrier(Allctr_t *allctr, Block_t *iblk) blk = NXT_BLK(blk); } } - + ASSERT((((char *) crr) + MBC_HEADER_SIZE(allctr) + tot_blk_sz) == carrier_end); diff --git a/erts/emulator/beam/erl_db.c b/erts/emulator/beam/erl_db.c index eacebcf4b137..a485376ccf6e 100644 --- a/erts/emulator/beam/erl_db.c +++ b/erts/emulator/beam/erl_db.c @@ -2266,6 +2266,33 @@ static BIF_RETTYPE ets_insert_2_list_driver(Process* p, return ret; } +/* + * Replay diagnostic: dump the term being inserted into ETS to stderr. + * Activated only when -replay is in effect AND the env var + * ERTS_REPLAY_ETS_INSERT_DEBUG is set. This is meant to identify which + * boxed sub-term carries a stale arena pointer, by classifying every + * pointer reachable from the inserted term as ARENA / LITERAL / HEAP + * and printing its header word. + */ +static ERTS_INLINE void +ets_insert_replay_dump(Process *p, const char *bif_name, + Eterm tab, Eterm obj_or_list) +{ + if (!erts_mmap_record_option_replay_enabled()) { + return; + } + if (getenv("ERTS_REPLAY_ETS_INSERT_DEBUG") == NULL) { + return; + } + erts_fprintf(stderr, + "replay_ets_insert: bif=%s pid=%T tab=%T list_or_obj_raw=%p " + "nif_phase=%d\n", + bif_name, p->common.id, tab, + (void *)(UWord) obj_or_list, + erts_replay_static_nif_phase); + erts_replay_dump_term_to_stderr(obj_or_list, bif_name, p->common.id); +} + /* ** The put BIF */ @@ -2277,6 +2304,7 @@ BIF_RETTYPE ets_insert_2(BIF_ALIST_2) DbTableMethod* meth; SWord consumed_reds = 0; CHECK_TABLES(); + ets_insert_replay_dump(BIF_P, "ets_insert_2", BIF_ARG_1, BIF_ARG_2); if (BIF_ARG_2 == NIL) { /* Check that the table exists */ DB_BIF_GET_TABLE(tb, DB_WRITE, LCK_WRITE_REC, BIF_ets_insert_2); @@ -2324,6 +2352,7 @@ BIF_RETTYPE ets_insert_new_2(BIF_ALIST_2) db_lock_kind_t kind; SWord consumed_reds = 0; CHECK_TABLES(); + ets_insert_replay_dump(BIF_P, "ets_insert_new_2", BIF_ARG_1, BIF_ARG_2); if (BIF_ARG_2 == NIL) { /* Check that the table exists */ diff --git a/erts/emulator/beam/erl_fun.c b/erts/emulator/beam/erl_fun.c index 1ab1ba4e3ea9..2192c05dd369 100644 --- a/erts/emulator/beam/erl_fun.c +++ b/erts/emulator/beam/erl_fun.c @@ -132,7 +132,63 @@ static void fun_stage(ErlFunEntry *entry, void erts_init_fun_table(void) { + int i; + fun_staged_init(); + + for (i = 0; i < ERTS_NUM_CODE_IX; i++) { + erts_alloc_trace_note_alloc("fun_table.index_root", + &fun_tables[i], + sizeof(fun_tables[i])); + } +} + +void erts_init_fun_table_replay(IndexTable *roots, int no_roots) +{ + HashFunctions f; + erts_rwmtx_opt_t rwmtx_opt = ERTS_RWMTX_OPT_DEFAULT_INITER; + int i; + + ASSERT(roots != NULL); + ASSERT(no_roots == ERTS_NUM_CODE_IX); + (void) no_roots; + + rwmtx_opt.type = ERTS_RWMTX_TYPE_FREQUENT_READ; + rwmtx_opt.lived = ERTS_RWMTX_LONG_LIVED; + + erts_rwmtx_init_opt(&fun_rwmutex, + &rwmtx_opt, + "fun_staging_lock", + NIL, + (ERTS_LOCK_FLAGS_PROPERTY_STATIC | + ERTS_LOCK_FLAGS_CATEGORY_GENERIC)); + + erts_atomic_init_nob(&fun_total_entries_bytes, 0); + + f.hash = (H_FUN) fun_staged_hash; + f.cmp = (HCMP_FUN) fun_staged_cmp; + f.alloc = (HALLOC_FUN) fun_staged_alloc; + f.free = (HFREE_FUN) fun_staged_free; + f.meta_alloc = (HMALLOC_FUN) erts_alloc; + f.meta_free = (HMFREE_FUN) erts_free; + f.meta_print = (HMPRINT_FUN) erts_print; + + for (i = 0; i < ERTS_NUM_CODE_IX; i++) { + fun_tables[i] = roots[i]; + fun_tables[i].htable.fun = f; + erts_index_rebuild_hash_buckets(&fun_tables[i]); + } + + /* + * In debug builds the template tracks whether staging is active via + * fun_debug_stage_ix: ~0 means idle, anything else means in-progress. + * The BSS zero-initialised value (0) is not the idle sentinel, so reset + * it here – replay skips the normal start_staging/end_staging cycle that + * would otherwise leave it at ~0. + */ +#ifdef DEBUG + fun_debug_stage_ix = ~0; +#endif } void erts_fun_info(fmtfn_t to, void *to_arg) @@ -319,4 +375,3 @@ void erts_fun_end_staging(int commit) erts_has_code_stage_permission()); fun_staged_end_staging(commit); } - diff --git a/erts/emulator/beam/erl_fun.h b/erts/emulator/beam/erl_fun.h index 0f8fdeba319d..88d1da9af2d8 100644 --- a/erts/emulator/beam/erl_fun.h +++ b/erts/emulator/beam/erl_fun.h @@ -24,6 +24,7 @@ #define __ERLFUNTABLE_H__ #include "erl_threads.h" +#include "index.h" /* * Fun entry. @@ -89,6 +90,7 @@ typedef struct erl_fun_thing { #define ERL_FUN_SIZE ((sizeof(ErlFunThing)/sizeof(Eterm))) void erts_init_fun_table(void); +void erts_init_fun_table_replay(IndexTable *roots, int no_roots); void erts_fun_info(fmtfn_t, void *); int erts_fun_table_sz(void); int erts_fun_entries_sz(void); diff --git a/erts/emulator/beam/erl_gc.c b/erts/emulator/beam/erl_gc.c index 080314ee290e..91be0dc637d3 100644 --- a/erts/emulator/beam/erl_gc.c +++ b/erts/emulator/beam/erl_gc.c @@ -44,6 +44,7 @@ #include "erl_proc_sig_queue.h" #include "beam_common.h" #include "beam_bp.h" +#include "erl_mmap.h" #define ERTS_INACT_WR_PB_LEAVE_MUCH_LIMIT 1 #define ERTS_INACT_WR_PB_LEAVE_MUCH_PERCENTAGE 20 @@ -72,6 +73,52 @@ */ #define ALENGTH(a) (sizeof(a)/sizeof(a[0])) +static int replay_gc_ptr_dbg_inited; +static int replay_gc_ptr_dbg_enabled; +static UWord replay_gc_ptr_dbg_min; +static UWord replay_gc_ptr_dbg_max; + +static ERTS_INLINE void +replay_gc_ptr_dbg_init(void) +{ + const char *min_str; + const char *max_str; + char *endp; + unsigned long long v; + + if (replay_gc_ptr_dbg_inited) { + return; + } + replay_gc_ptr_dbg_inited = 1; + replay_gc_ptr_dbg_enabled = 0; + replay_gc_ptr_dbg_min = 0; + replay_gc_ptr_dbg_max = 0; + + min_str = getenv("ERTS_REPLAY_GC_PTR_MIN"); + max_str = getenv("ERTS_REPLAY_GC_PTR_MAX"); + if (!min_str || !max_str || !min_str[0] || !max_str[0]) { + return; + } + + v = strtoull(min_str, &endp, 0); + if (!endp || *endp != '\0') { + return; + } + replay_gc_ptr_dbg_min = (UWord) v; + + v = strtoull(max_str, &endp, 0); + if (!endp || *endp != '\0') { + return; + } + replay_gc_ptr_dbg_max = (UWord) v; + if (replay_gc_ptr_dbg_max < replay_gc_ptr_dbg_min) { + UWord tmp = replay_gc_ptr_dbg_min; + replay_gc_ptr_dbg_min = replay_gc_ptr_dbg_max; + replay_gc_ptr_dbg_max = tmp; + } + replay_gc_ptr_dbg_enabled = 1; +} + /* Actual stack usage, note that this may include words in the redzone. */ # define STACK_SZ_ON_HEAP(p) (STACK_START(p) - STACK_TOP(p)) @@ -2276,6 +2323,35 @@ sweep(Eterm *n_hp, Eterm *n_htop, ASSERT(is_boxed(val)); *n_hp++ = val; } else if (ERTS_IS_IN_SWEEP_AREA(gval, ptr)) { + replay_gc_ptr_dbg_init(); + if (replay_gc_ptr_dbg_enabled) { + UWord p = (UWord) ptr; + if (p >= replay_gc_ptr_dbg_min && p <= replay_gc_ptr_dbg_max) { + int wi; + erts_fprintf(stderr, + "replay_gc_ptr_dbg: sweep boxed ptr=%p gval=%#lx hdr=%#lx is_header=%d n_hp=%p n_htop=%p\n", + ptr, + (unsigned long) gval, + (unsigned long) val, + is_header(val) ? 1 : 0, + n_hp, + n_htop); + if (is_header(val)) { + erts_fprintf(stderr, + "replay_gc_ptr_dbg: sweep boxed arity=%ld subtag=%#lx\n", + (long) header_arity(val), + (unsigned long) (val & _HEADER_SUBTAG_MASK)); + } + for (wi = -4; wi <= 8; wi++) { + Eterm *wp = n_hp + wi; + erts_fprintf(stderr, + "replay_gc_ptr_dbg: n_hp[%+d] @ %p = %#lx\n", + wi, + wp, + (unsigned long) *wp); + } + } + } move_boxed(ptr,val,&n_htop,n_hp++); } else { n_hp++; diff --git a/erts/emulator/beam/erl_global_literals.c b/erts/emulator/beam/erl_global_literals.c index 86ca23e981fc..ac53bf34e969 100644 --- a/erts/emulator/beam/erl_global_literals.c +++ b/erts/emulator/beam/erl_global_literals.c @@ -24,6 +24,10 @@ # include "config.h" #endif +#include +#include +#include + #include "sys.h" #include "global.h" #include "erl_global_literals.h" @@ -65,6 +69,21 @@ struct global_literal_chunk { /* The size of the global literal term that is being built */ Uint global_literal_build_size; +int +erts_global_literal_is_in_range(void *ptr) +{ + struct global_literal_chunk *chunk = global_literal_chunk; + char *p = (char *) ptr; + + while (chunk) { + if (p >= (char *) chunk->area.start && p < (char *) chunk->chunk_end) { + return 1; + } + chunk = chunk->next; + } + return 0; +} + ErtsLiteralArea *erts_global_literal_iterate_area(ErtsLiteralArea *prev) { @@ -93,22 +112,41 @@ static void expand_shared_global_literal_area(Uint heap_size) const size_t size = sizeof(struct global_literal_chunk) + (heap_size - 1) * sizeof(Eterm); struct global_literal_chunk *chunk; + int use_record_backend = erts_mmap_record_option_record_enabled(); -#ifndef DEBUG - chunk = (struct global_literal_chunk *) erts_alloc(ERTS_ALC_T_LITERAL, size); +#ifndef DEBUG + if (use_record_backend) { + UWord mmap_size = (UWord) size; + chunk = (struct global_literal_chunk *) + erts_mmap_record_alloc(&mmap_size, 0); + } else { + chunk = (struct global_literal_chunk *) erts_alloc(ERTS_ALC_T_LITERAL, size); + } #else + if (use_record_backend) { + UWord mmap_size = (UWord) size; + chunk = (struct global_literal_chunk *) + erts_mmap_record_alloc(&mmap_size, 0); + } else { /* erts_mem_guard requires the memory area to be page aligned. Overallocate * and align the address to ensure that is the case. */ UWord address; address = (UWord) erts_alloc(ERTS_ALC_T_LITERAL, size + sys_page_size * 2); address = (address + (sys_page_size - 1)) & ~(sys_page_size - 1); chunk = (struct global_literal_chunk *) address; + } for (int i = 0; i < heap_size; i++) { chunk->area.start[i] = ERTS_HOLE_MARKER; } #endif + if (!chunk) { + erts_exit(ERTS_ABORT_EXIT, + "global_literals: failed to allocate %bpu bytes for literal chunk (record_mode=%d)\n", + (UWord) size, use_record_backend); + } + chunk->area.end = &(chunk->area.start[0]); chunk->chunk_end = &(chunk->area.start[heap_size]); chunk->area.off_heap = NULL; @@ -187,7 +225,142 @@ init_global_literals(void) { erts_mtx_init(&global_literal_lock, "global_literals", NIL, ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC); - + + /* + * Replay path: instead of allocating a fresh global-literal chunk + * (which would produce a new ERTS_GLOBAL_LIT_EMPTY_TUPLE at a different + * virtual address than the record run baked into every empty literal + * flatmap's `keys` field), reload the snapshotted globals from + * struct-root-dumps. The empty tuple's underlying bytes [0,0] still + * live at the same arena address thanks to MAP_PRIVATE. + */ + if (erts_mmap_record_option_replay_enabled() + && erts_global_literals_apply_replay_root()) { + return; + } + expand_shared_global_literal_area(GLOBAL_LITERAL_INITIAL_SIZE); init_empty_tuple(); -} \ No newline at end of file + + /* + * Record path: register the global state for the struct-root-dump so + * the next replay run can restore the same empty-tuple Eterm value and + * chunk-list head. Done after init_empty_tuple so the snapshot + * captures the post-init values. + */ + if (erts_mmap_record_option_record_enabled()) { + erts_alloc_trace_note_alloc("global_literals.empty_tuple", + &ERTS_GLOBAL_LIT_EMPTY_TUPLE, + sizeof(ERTS_GLOBAL_LIT_EMPTY_TUPLE)); + erts_alloc_trace_note_alloc("global_literals.chunk_head", + &global_literal_chunk, + sizeof(global_literal_chunk)); + } +} + +int +erts_global_literals_apply_replay_root(void) +{ + const char *base_dir = NULL; + char dir_buf[512]; + char manifest_path[1024]; + FILE *mf; + char line[1024]; + int loaded_empty_tuple = 0; + int loaded_chunk_head = 0; + const char *replay_root_dbg = getenv("ERTS_REPLAY_ROOT_DEBUG"); + int replay_dbg_enabled = replay_root_dbg + && replay_root_dbg[0] != '\0' + && replay_root_dbg[0] != '0'; + + { + const char *rr_dir = erts_mmap_record_option_dir(); + if (rr_dir && rr_dir[0] != '\0') { + erts_snprintf(dir_buf, sizeof(dir_buf), "%s/struct-root-dumps", rr_dir); + base_dir = dir_buf; + } else { + base_dir = "_mmap-records/struct-root-dumps"; + } + } + erts_snprintf(dir_buf, sizeof(dir_buf), "%s", base_dir); + erts_snprintf(manifest_path, sizeof(manifest_path), + "%s/roots.csv", dir_buf); + + mf = fopen(manifest_path, "r"); + if (!mf) { + return 0; + } + + while (fgets(line, sizeof(line), mf) != NULL) { + char *p1, *p2, *p3, *p4; + char *tag, *szs, *file; + unsigned long sz; + char file_path[1024]; + FILE *bf; + void *dst = NULL; + UWord want_size = 0; + + if (line[0] == '\0' || line[0] == '\n' || line[0] == '#' + || !isdigit((unsigned char) line[0])) { + continue; + } + p1 = strchr(line, ','); if (!p1) continue; + p2 = strchr(p1 + 1, ','); if (!p2) continue; + p3 = strchr(p2 + 1, ','); if (!p3) continue; + p4 = strchr(p3 + 1, ','); if (!p4) continue; + tag = p1 + 1; *p2 = '\0'; + szs = p3 + 1; *p4 = '\0'; + file = p4 + 1; + file[strcspn(file, "\r\n")] = '\0'; + + if (strcmp(tag, "global_literals.empty_tuple") == 0) { + dst = &ERTS_GLOBAL_LIT_EMPTY_TUPLE; + want_size = sizeof(ERTS_GLOBAL_LIT_EMPTY_TUPLE); + } else if (strcmp(tag, "global_literals.chunk_head") == 0) { + dst = &global_literal_chunk; + want_size = sizeof(global_literal_chunk); + } else { + continue; + } + + sz = strtoul(szs, NULL, 10); + if ((UWord) sz != want_size) { + erts_fprintf(stderr, + "global_literals replay restore size mismatch tag=%s " + "dump=%lu expected=%bpu\n", + tag, sz, want_size); + continue; + } + erts_snprintf(file_path, sizeof(file_path), "%s/%s", dir_buf, file); + bf = fopen(file_path, "rb"); + if (!bf) continue; + if (fread(dst, 1, want_size, bf) == want_size) { + if (dst == &ERTS_GLOBAL_LIT_EMPTY_TUPLE) { + loaded_empty_tuple = 1; + } else if (dst == &global_literal_chunk) { + loaded_chunk_head = 1; + } + } + fclose(bf); + } + + fclose(mf); + + if (loaded_empty_tuple && loaded_chunk_head) { + if (replay_dbg_enabled) { + erts_fprintf(stderr, + "global_literals: restored empty_tuple=%p chunk_head=%p " + "from replay snapshot\n", + (void *) (UWord) ERTS_GLOBAL_LIT_EMPTY_TUPLE, + (void *) global_literal_chunk); + } + return 1; + } + if (loaded_empty_tuple || loaded_chunk_head) { + erts_fprintf(stderr, + "global_literals: partial replay snapshot " + "(empty_tuple=%d chunk_head=%d), falling back to fresh init\n", + loaded_empty_tuple, loaded_chunk_head); + } + return 0; +} diff --git a/erts/emulator/beam/erl_global_literals.h b/erts/emulator/beam/erl_global_literals.h index 924b02b36f00..40fd134950ed 100644 --- a/erts/emulator/beam/erl_global_literals.h +++ b/erts/emulator/beam/erl_global_literals.h @@ -42,6 +42,23 @@ extern Eterm ERTS_GLOBAL_LIT_EMPTY_TUPLE; */ void init_global_literals(void); +/* + * Replay-only: restore the snapshotted ERTS_GLOBAL_LIT_EMPTY_TUPLE term and + * the global_literal_chunk linked-list head from struct-root-dumps. + * Returns 1 on success (snapshot loaded and globals updated), 0 if the + * snapshot is unavailable. + * + * When the empty tuple snapshot is restored its boxed pointer references an + * address in the record-time arena; the arena is mapped MAP_PRIVATE at + * replay so the bytes [0,0] of the empty tuple header survive at the same + * virtual address. Without this restore, init_empty_tuple() would create a + * fresh empty tuple at a new literal-mmapper address, but every literal map + * loaded from beam files still has its `keys` field pointing at the + * record-time empty tuple address, which would cause ets:insert deep-copy + * to assert (obj == ERTS_GLOBAL_LIT_EMPTY_TUPLE) and crash. + */ +int erts_global_literals_apply_replay_root(void); + /* Allocates space for global literals. Users must call erts_global_literal_register * when done creating the literal. */ @@ -50,6 +67,7 @@ Eterm *erts_global_literal_allocate(Uint sz, struct erl_off_heap_header ***ohp); /* Registers the pointed-to term as a global literal. Must be called for terms * allocated using erts_global_literal_allocate.*/ void erts_global_literal_register(Eterm *variable); +int erts_global_literal_is_in_range(void *ptr); /* Iterates between global literal areas. Can only be used when crash dumping. * Iteration is started by passing NULL, then successively calling this function diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index 74736f8ed7db..f46dd3f78d41 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -54,9 +54,12 @@ #include "erl_osenv.h" #include "erl_proc_sig_queue.h" #include "beam_load.h" +#include "beam_catches.h" #include "erl_global_literals.h" #include "erl_iolist.h" #include "erl_debugger.h" +#include "erl_mmap.h" +#include "index.h" #include "jit/beam_asm.h" @@ -117,8 +120,8 @@ int erts_backtrace_depth; /* How many functions to show in a backtrace erts_atomic32_t erts_max_gen_gcs; -Eterm erts_error_logger_warnings; /* What to map warning logs to, am_error, - am_info or am_warning, am_error is +Eterm erts_error_logger_warnings; /* What to map warning logs to, am_error, + am_info or am_warning, am_error is the default for BC */ int erts_compat_rel; @@ -184,15 +187,15 @@ has_prefix(const char *prefix, const char *string) } static char* -progname(char *fullname) +progname(char *fullname) { int i; - + i = sys_strlen(fullname); while (i >= 0) { - if ((fullname[i] != '/') && (fullname[i] != '\\')) + if ((fullname[i] != '/') && (fullname[i] != '\\')) i--; - else + else break; } return fullname+i+1; @@ -206,11 +209,11 @@ this_rel_num(void) if (this_rel < 1) { int i; char this_rel_str[] = ERLANG_OTP_RELEASE; - + i = 0; while (this_rel_str[i] && !isdigit((int) this_rel_str[i])) i++; - this_rel = atoi(&this_rel_str[i]); + this_rel = atoi(&this_rel_str[i]); if (this_rel < 1) erts_exit(1, "Unexpected ERLANG_OTP_RELEASE format\n"); } @@ -241,6 +244,12 @@ void erl_error(const char *fmt, va_list args) } static int early_init(int *argc, char **argv); +static void ensure_replay_node_argument(); +static int restore_struct_roots_for_replay(IndexTable *atom_root, + IndexTable *module_roots, + int table_capacity, + IndexTable *export_roots, + IndexTable *fun_roots); static void erl_init(int ncpu, @@ -280,10 +289,37 @@ erl_init(int ncpu, erts_init_debugger(); erts_init_trace(); erts_code_ix_init(); - erts_init_fun_table(); - init_atom_table(); - init_export_table(); - init_module_table(); + if (erts_mmap_record_option_replay_enabled()) { + IndexTable atom_root; + IndexTable module_roots[ERTS_NUM_CODE_IX]; + IndexTable export_roots[ERTS_NUM_CODE_IX]; + IndexTable fun_roots[ERTS_NUM_CODE_IX]; + if (!restore_struct_roots_for_replay(&atom_root, + module_roots, + ERTS_NUM_CODE_IX, + export_roots, + fun_roots)) { + erts_exit(ERTS_ABORT_EXIT, + "failed to restore replay root structures from struct-root-dumps\n"); + } + erts_init_fun_table_replay(fun_roots, ERTS_NUM_CODE_IX); + init_atom_table_replay(&atom_root); + init_module_table_replay(module_roots, ERTS_NUM_CODE_IX); + init_export_table_replay(export_roots, ERTS_NUM_CODE_IX); + /* + * Restore the active/staging code indices that were in effect at + * record time. Must be done after the index tables have been + * populated from the snapshot but before any code path uses + * erts_active_code_ix() to look up code. + */ + erts_code_ix_apply_replay_root(); + erts_export_replay_repair_all_lambdas(); + } else { + erts_init_fun_table(); + init_atom_table(); + init_module_table(); + init_export_table(); + } init_register_table(); init_message(); #ifdef BEAMASM @@ -334,11 +370,13 @@ erl_spawn_system_process(Process* parent, Eterm mod, Eterm func, Eterm args, { Eterm res; int arity; + ErtsCodePtr fn_active; ERTS_LC_ASSERT(ERTS_PROC_LOCK_MAIN & erts_proc_lc_my_proc_locks(parent)); arity = erts_list_length(args); + fn_active = erts_find_function(mod, func, arity, erts_active_code_ix()); - if (erts_find_function(mod, func, arity, erts_active_code_ix()) == NULL) { + if (fn_active == NULL) { erts_exit(ERTS_ERROR_EXIT, "No function %T:%T/%i\n", mod, func, arity); } @@ -349,6 +387,199 @@ erl_spawn_system_process(Process* parent, Eterm mod, Eterm func, Eterm args, return res; } +static int +restore_struct_roots_for_replay(IndexTable *atom_root, + IndexTable *module_roots, + int table_capacity, + IndexTable *export_roots, + IndexTable *fun_roots) +{ + const char *base_dir = NULL; + char dir_buf[512]; + char manifest_path[1024]; + FILE *mf = NULL; + char line[1024]; + int module_ix = 0; + int export_ix = 0; + int fun_ix = 0; + int have_atom = 0; + + { + const char *rr_dir = erts_mmap_record_option_dir(); + if (rr_dir && rr_dir[0] != '\0') { + erts_snprintf(dir_buf, sizeof(dir_buf), "%s/struct-root-dumps", rr_dir); + base_dir = dir_buf; + } else { + base_dir = "_mmap-records/struct-root-dumps"; + } + } + erts_snprintf(dir_buf, sizeof(dir_buf), "%s", base_dir); + erts_snprintf(manifest_path, sizeof(manifest_path), "%s/roots.csv", dir_buf); + + mf = fopen(manifest_path, "r"); + if (!mf) { + return 0; + } + + while (fgets(line, sizeof(line), mf) != NULL) { + char *p1, *p2, *p3, *p4; + char *tag, *szs, *file; + unsigned long sz; + char file_path[1024]; + FILE *bf; + IndexTable *dst = NULL; + + if (line[0] == '\0' || line[0] == '\n' || line[0] == '#' + || !isdigit((unsigned char)line[0])) { + continue; + } + + p1 = strchr(line, ','); + if (!p1) continue; + p2 = strchr(p1 + 1, ','); + if (!p2) continue; + p3 = strchr(p2 + 1, ','); + if (!p3) continue; + p4 = strchr(p3 + 1, ','); + if (!p4) continue; + + tag = p1 + 1; + *p2 = '\0'; + szs = p3 + 1; + *p4 = '\0'; + file = p4 + 1; + file[strcspn(file, "\r\n")] = '\0'; + + sz = strtoul(szs, NULL, 10); + + if (strcmp(tag, "atom_table.index_root") == 0) { + if (sz != sizeof(IndexTable)) { continue; } + dst = atom_root; + have_atom = 1; + } else if (strcmp(tag, "module_table.index_root") == 0) { + if (sz != sizeof(IndexTable)) { continue; } + if (module_ix < table_capacity) { + dst = &module_roots[module_ix++]; + } + } else if (strcmp(tag, "export_table.index_root") == 0) { + if (sz != sizeof(IndexTable)) { continue; } + if (export_ix < table_capacity) { + dst = &export_roots[export_ix++]; + } + } else if (strcmp(tag, "fun_table.index_root") == 0) { + if (sz != sizeof(IndexTable)) { continue; } + if (fun_ix < table_capacity) { + dst = &fun_roots[fun_ix++]; + } + } else if (strcmp(tag, "code_ix.root") == 0) { + /* Two int32_t: active, staging */ + if (sz != sizeof(erts_code_ix_root)) { continue; } + erts_snprintf(file_path, sizeof(file_path), "%s/%s", dir_buf, file); + { + FILE *bf2 = fopen(file_path, "rb"); + if (!bf2) { fclose(mf); return 0; } + if (fread((void *) erts_code_ix_root, 1, + sizeof(erts_code_ix_root), bf2) + != sizeof(erts_code_ix_root)) { + fclose(bf2); + fclose(mf); + return 0; + } + fclose(bf2); + } + continue; + } else { + continue; + } + + erts_snprintf(file_path, sizeof(file_path), "%s/%s", dir_buf, file); + bf = fopen(file_path, "rb"); + if (!bf) { + fclose(mf); + return 0; + } + + if (dst) { + if (fread((void *) dst, 1, sizeof(IndexTable), bf) + != sizeof(IndexTable)) { + fclose(bf); + fclose(mf); + return 0; + } + } else { + /* overflow: consume file and ignore */ + IndexTable tmp; + size_t rr = fread((void *)&tmp, 1, sizeof(IndexTable), bf); + (void) rr; + } + + fclose(bf); + } + + fclose(mf); + return have_atom + && module_ix == table_capacity + && export_ix == table_capacity + && fun_ix == table_capacity; +} + +static void +validate_replay_module_tables(void) +{ + const Preload *preload; + int i; + Eterm mod_atom = THE_NON_VALUE; + Module *m = NULL; + + preload = sys_preloaded(); + if (!preload) { + erts_exit(ERTS_ABORT_EXIT, + "replay validation failed: sys_preloaded() returned NULL\n"); + } + + i = 0; + while (preload[i].name) { + const char *name = preload[i].name; + int ok = erts_atom_get(name, + sys_strlen(name), + &mod_atom, + ERTS_ATOM_ENC_LATIN1); + if (!ok) { + erts_exit(ERTS_ABORT_EXIT, + "replay validation failed: atom for preloaded module '%s' not found in restored atom table\n", + name); + } + + m = erts_get_module(mod_atom, erts_active_code_ix()); + if (!m) { + erts_exit(ERTS_ABORT_EXIT, + "replay validation failed: module '%s' not found in active module table\n", + name); + } + + if (!m->curr.code_hdr || m->curr.code_length <= 0) { + erts_exit(ERTS_ABORT_EXIT, + "replay validation failed: module '%s' has invalid current code (code_hdr=%p code_length=%d)\n", + name, (void *) m->curr.code_hdr, m->curr.code_length); + } + i++; + } + + if (!erts_find_function(am_erl_init, am_start, 2, erts_active_code_ix())) { + erts_exit(ERTS_ABORT_EXIT, + "replay validation failed: function erl_init:start/2 not found in active code index\n"); + } +} + +/* + * Set by erl_first_process_otp() when "-hello" is in the boot argv. + * Gates test-only behaviour elsewhere (e.g. using hello:start/0 as the + * entry point for the always-alive ERTS system processes so that the + * whole VM is a self-contained single-module sandbox). Must stay 0 for + * the bootstrap/release boot where OTP expects the real modules. + */ +static int use_hello_mode = 0; + static Eterm erl_first_process_otp(char* mod_name, int argc, char** argv) { @@ -359,6 +590,7 @@ erl_first_process_otp(char* mod_name, int argc, char** argv) Process parent; ErlSpawnOpts so; Eterm boot_mod; + char *dbg = getenv("ERTS_REPLAY_ROOT_DEBUG"); /* * We need a dummy parent process to be able to call erl_create_process(). @@ -381,6 +613,12 @@ erl_first_process_otp(char* mod_name, int argc, char** argv) } boot_mod = erts_atom_put((byte *) mod_name, sys_strlen(mod_name), ERTS_ATOM_ENC_LATIN1, 1); + if (dbg && dbg[0] != '0') { + erts_fprintf(stderr, + "replay_root_debug: first_process boot_mod=%T boot_mod_raw=%p argc=%d am_erl_init=%p am_start=%p\n", + boot_mod, (void *) (UWord) boot_mod, argc, + (void *) (UWord) am_erl_init, (void *) (UWord) am_start); + } args = CONS(hp, args, NIL); hp += 2; args = CONS(hp, boot_mod, args); @@ -397,7 +635,7 @@ erl_first_process_otp(char* mod_name, int argc, char** argv) static Eterm erl_system_process_otp(Eterm parent_pid, char* modname, int off_heap_msgq, int prio) -{ +{ Process *parent; ErlSpawnOpts so; Eterm mod, res; @@ -422,7 +660,19 @@ erl_system_process_otp(Eterm parent_pid, char* modname, int off_heap_msgq, int p so.max_gen_gcs = (Uint16) erts_atomic32_read_nob(&erts_max_gen_gcs); so.scheduler = 0; - res = erl_spawn_system_process(parent, mod, am_start, NIL, &so); + /* + * In -hello mode, replace every always-alive system process + * (erts_code_purger, erts_literal_area_collector, ...) with a dummy + * running hello:start/0, which just sits in a receive loop. That + * keeps the VM's global pointers (erts_code_purger, etc.) populated + * with live PIDs without pulling in the real module code paths that + * aren't available in the hello-only sandbox. Outside of -hello mode + * we must keep spawning the requested module so the bootstrap and + * normal release boots work as before. + */ + res = erl_spawn_system_process(parent, + mod,//use_hello_mode ? am_hello : mod, + am_start, NIL, &so); ASSERT(is_internal_pid(res)); erts_proc_unlock(parent, ERTS_PROC_LOCK_MAIN); @@ -486,6 +736,7 @@ static char* program; static char* init = "init"; static int boot_argc; static char** boot_argv; +static char replay_boot_arg[] = "-replay"; static char * get_arg(char* rest, char* next, int* ip) @@ -501,7 +752,7 @@ get_arg(char* rest, char* next, int* ip) return rest; } -static void +static void load_preloaded(void) { int i; @@ -537,6 +788,8 @@ __decl_noreturn void __noreturn erts_usage(void) int this_rel = this_rel_num(); erts_fprintf(stderr, "Usage: %s [flags] [ -- [init_args] ]\n", progname(program)); erts_fprintf(stderr, "The flags are:\n\n"); + erts_fprintf(stderr, "-record dir record into /mseg-arena.bin (creates missing subdirectories)\n"); + erts_fprintf(stderr, "-replay dir replay from /mseg-arena.bin (mutually exclusive with -record)\n"); erts_fprintf(stderr, "-a size suggest stack size in kilo words for threads\n"); erts_fprintf(stderr, " in the async-thread pool; valid range is [%d-%d]\n", ERTS_ASYNC_THREAD_MIN_STACK_SIZE, @@ -877,6 +1130,32 @@ early_init(int *argc, char **argv) /* if (argc && argv) { int i = 1; while (i < *argc) { + if (sys_strcmp(argv[i], "-record") == 0) { + char *path = get_arg("", argv[i+1], &i); + if (!erts_mmap_record_option_record(path)) { + erts_fprintf(stderr, "-record and -replay are mutually exclusive\n"); + erts_usage(); + } + if (!erts_mmap_record_init()) { + erts_fprintf(stderr, "failed to initialize -record mmap arena directory %s\n", path); + erts_usage(); + } + i++; + continue; + } + if (sys_strcmp(argv[i], "-replay") == 0) { + char *path = get_arg("", argv[i+1], &i); + if (!erts_mmap_record_option_replay(path)) { + erts_fprintf(stderr, "-record and -replay are mutually exclusive\n"); + erts_usage(); + } + if (!erts_mmap_record_init()) { + erts_fprintf(stderr, "failed to initialize -replay mmap arena directory %s\n", path); + erts_usage(); + } + i++; + continue; + } if (sys_strcmp(argv[i], "--") == 0) { /* end of emulator options */ i++; break; @@ -1267,7 +1546,7 @@ early_init(int *argc, char **argv) /* #ifdef ERTS_ENABLE_LOCK_CHECK erts_lc_late_init(); #endif - + #ifdef ERTS_ENABLE_LOCK_COUNT erts_lcnt_late_init(); #endif @@ -1284,6 +1563,33 @@ early_init(int *argc, char **argv) /* return ncpu; } +static void ensure_replay_node_argument() +{ + int j; + int has_replay_arg = 0; + char **replay_boot_argv; + + for (j = 0; j < boot_argc; j++) { + if (boot_argv[j] && sys_strcmp(boot_argv[j], "-replay") == 0) { + has_replay_arg = 1; + break; + } + } + + if (!has_replay_arg) { + replay_boot_argv = (char **) malloc(sizeof(char *) * (boot_argc + 1)); + if (!replay_boot_argv) { + erts_exit(ERTS_ABORT_EXIT, + "failed to append -replay to node boot arguments\n"); + } + for (j = 0; j < boot_argc; j++) { + replay_boot_argv[j] = boot_argv[j]; + } + replay_boot_argv[boot_argc] = replay_boot_arg; + boot_argv = replay_boot_argv; + boot_argc++; + } +} void erl_start(int argc, char **argv) @@ -1344,6 +1650,16 @@ erl_start(int argc, char **argv) sys_proc_outst_req_lim = 2*erts_no_schedulers; while (i < argc) { + if (sys_strcmp(argv[i], "-record") == 0) { + (void) get_arg("", argv[i+1], &i); + i++; + continue; + } + if (sys_strcmp(argv[i], "-replay") == 0) { + (void) get_arg("", argv[i+1], &i); + i++; + continue; + } if (argv[i][0] != '-') { erts_usage(); } @@ -2501,6 +2817,9 @@ erl_start(int argc, char **argv) boot_argc = argc - i; /* Number of arguments to init */ boot_argv = &argv[i]; + if (erts_mmap_record_option_replay_enabled()) { + ensure_replay_node_argument(); + } if (erts_sched_thread_suggested_stack_size < ERTS_SCHED_THREAD_MIN_STACK_SIZE) erts_sched_thread_suggested_stack_size = ERTS_SCHED_THREAD_MIN_STACK_SIZE; @@ -2521,14 +2840,129 @@ erl_start(int argc, char **argv) node_tab_delete_delay, db_spin_count); - load_preloaded(); - erts_end_staging_code_ix(); - erts_commit_staging_code_ix(); + if (erts_mmap_record_option_replay_enabled()) { + ErtsThrPrgrDelayHandle replay_validate_dhndl; + replay_validate_dhndl = erts_thr_progress_unmanaged_delay(); + validate_replay_module_tables(); + erts_thr_progress_unmanaged_continue(replay_validate_dhndl); + /* + * Rebuild the per-module PC range table from the restored module + * table. load_preloaded() (which normally calls erts_update_ranges() + * for every loaded module) is skipped in replay mode, so without + * this step erts_find_function_from_pc() would return NULL for any + * PC, breaking tracing, stack walking, and exception handling. + */ + erts_ranges_replay_rebuild(); + /* + * Restore the beam-catches header array bccix[] from the dump. + * beam_catches_init() ran as part of init_emulator() above and + * installed a fresh empty table; the restored code still has + * make_catch(index) immediates baked in that refer to the + * RECORD-time indices, so we must swap the fresh header back to + * the recorded one. The per-pool entry tables it points to live + * in the long-lived allocator's carriers (mseg → record arena), + * so they map back at the same virtual addresses via MAP_PRIVATE + * and do not need separate restoration. + */ + { + const char *base_dir = NULL; + char dir_buf[512]; + char manifest_path[1024]; + FILE *mf; + char line[1024]; + int loaded = 0; - erts_initialized = 1; + { + const char *rr_dir = erts_mmap_record_option_dir(); + if (rr_dir && rr_dir[0] != '\0') { + erts_snprintf(dir_buf, sizeof(dir_buf), "%s/struct-root-dumps", rr_dir); + base_dir = dir_buf; + } else { + base_dir = "_mmap-records/struct-root-dumps"; + } + } + erts_snprintf(dir_buf, sizeof(dir_buf), "%s", base_dir); + erts_snprintf(manifest_path, sizeof(manifest_path), + "%s/roots.csv", dir_buf); + + mf = fopen(manifest_path, "r"); + if (mf) { + while (fgets(line, sizeof(line), mf) != NULL) { + char *p1, *p2, *p3, *p4; + char *tag, *szs, *file; + unsigned long sz; + char file_path[1024]; + FILE *bf; + UWord bccix_size = erts_beam_catches_bccix_size(); + void *buf; + + if (line[0] == '\0' || line[0] == '\n' || line[0] == '#' + || !isdigit((unsigned char) line[0])) { + continue; + } + p1 = strchr(line, ','); if (!p1) continue; + p2 = strchr(p1 + 1, ','); if (!p2) continue; + p3 = strchr(p2 + 1, ','); if (!p3) continue; + p4 = strchr(p3 + 1, ','); if (!p4) continue; + tag = p1 + 1; *p2 = '\0'; + szs = p3 + 1; *p4 = '\0'; + file = p4 + 1; + file[strcspn(file, "\r\n")] = '\0'; + + if (strcmp(tag, "beam_catches.bccix") != 0) continue; + + sz = strtoul(szs, NULL, 10); + if ((UWord) sz != bccix_size) { + erts_fprintf(stderr, + "bccix restore size mismatch " + "dump=%lu expected=%bpu\n", + sz, bccix_size); + continue; + } + erts_snprintf(file_path, sizeof(file_path), + "%s/%s", dir_buf, file); + bf = fopen(file_path, "rb"); + if (!bf) continue; + buf = erts_alloc(ERTS_ALC_T_TMP, bccix_size); + if (fread(buf, 1, bccix_size, bf) == bccix_size) { + beam_catches_apply_replay_root(buf, bccix_size); + loaded = 1; + } + erts_free(ERTS_ALC_T_TMP, buf); + fclose(bf); + break; + } + fclose(mf); + } + (void) loaded; + } + } else { + load_preloaded(); + } + if (!erts_mmap_record_option_replay_enabled()) { + /* + * Non-replay: end staging for the preloaded modules and commit + * them so they become active. In replay mode the active/staging + * code indices have already been restored from the snapshot via + * erts_code_ix_apply_replay_root(), so we must not advance or + * overwrite them here. + */ + erts_end_staging_code_ix(); + erts_commit_staging_code_ix(); + } + erts_initialized = 1; erts_init_process_id = erl_first_process_otp(init, boot_argc, boot_argv); ASSERT(erts_init_process_id != ERTS_INVALID_PID); + if (erts_mmap_record_option_replay_enabled()) { + ErtsThrPrgrDelayHandle replay_nif_dhndl; + replay_nif_dhndl = erts_thr_progress_unmanaged_delay(); +#ifdef ERTS_ENABLE_LOCK_CHECK + erts_lc_soften_code_mod_permission_check(); +#endif + erts_replay_reinit_loaded_static_nifs(); + erts_thr_progress_unmanaged_continue(replay_nif_dhndl); + } { /* @@ -2585,16 +3019,6 @@ erl_start(int argc, char **argv) ASSERT(erts_dirty_process_signal_handler_max && erts_dirty_process_signal_handler_max->common.id == pid); erts_proc_inc_refc(erts_dirty_process_signal_handler_max); - - pid = erl_system_process_otp(erts_init_process_id, - "erts_trace_cleaner", !0, - PRIORITY_NORMAL); - erts_trace_cleaner - = (Process *) erts_ptab_pix2intptr_ddrb(&erts_proc, - internal_pid_index(pid)); - ASSERT(erts_trace_cleaner && erts_trace_cleaner->common.id == pid); - erts_proc_inc_refc(erts_trace_cleaner); - } erts_start_schedulers(); diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c index 3ee57565a200..285c446ea2c8 100644 --- a/erts/emulator/beam/erl_lock_check.c +++ b/erts/emulator/beam/erl_lock_check.c @@ -194,6 +194,8 @@ static erts_lc_lock_order_t erts_lock_order[] = { {"erts_alloc_hard_debug", NULL}, {"hard_dbg_mseg", NULL}, {"jit_debug_descriptor", NULL}, + {"mmap_record", NULL}, + {"mmap_record_literal", NULL}, {"erts_mmap", NULL}, #ifdef ERTS_ENSURE_OS_MONOTONIC_TIME {"ensure_os_monotonic_time", NULL}, diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c index f7acec81521f..206e087caecd 100644 --- a/erts/emulator/beam/erl_nif.c +++ b/erts/emulator/beam/erl_nif.c @@ -393,6 +393,16 @@ schedule(ErlNifEnv* env, NativeFunPtr direct_fp, NativeFunPtr indirect_fp, argc, (const Eterm *) argv); if (!ep->m) { /* First time this call is scheduled... */ + if (erts_mmap_record_option_replay_enabled() + && erts_refc_read(&env->mod_nif->dynlib_refc, 0) < 1) { + /* + * Replay may restore stale dynamic-library refc state from the + * snapshot (for example zero even though the module instance still + * points at a live static NIF). Re-establish the baseline module + * reference so dirty NIF scheduling can safely take its call ref. + */ + erts_refc_init(&env->mod_nif->dynlib_refc, 1); + } erts_refc_inc(&env->mod_nif->dynlib_refc, 2); ep->m = env->mod_nif; } @@ -2679,7 +2689,8 @@ ErlNifResourceType* open_resource_type(ErlNifEnv* env, if (!env->mod_nif || !(env->mod_nif->flags & ERTS_MOD_NIF_FLG_LOADING)) goto done; - ERTS_LC_ASSERT(erts_has_code_mod_permission()); + ERTS_LC_ASSERT(erts_has_code_mod_permission() + || erts_mmap_record_option_replay_enabled()); module_am = make_atom(env->mod_nif->mod->module); name_am = enif_make_atom(env, name_str); @@ -2792,8 +2803,10 @@ static void prepare_opened_rt(struct erl_module_nif* lib) } else { /* ERL_NIF_RT_TAKEOVER */ steal_resource_type(type); - ASSERT(erts_refc_read(&type->owner->refc, 1) > 0); - ASSERT(erts_refc_read(&type->owner->dynlib_refc, 1) > 0); + if (!erts_mmap_record_option_replay_enabled()) { + ASSERT(erts_refc_read(&type->owner->refc, 1) > 0); + ASSERT(erts_refc_read(&type->owner->dynlib_refc, 1) > 0); + } /* * Prepare for atomic change of callbacks with lock-wrappers @@ -2805,8 +2818,17 @@ static void prepare_opened_rt(struct erl_module_nif* lib) } type->owner = lib; - if (rt_have_callbacks(&ort->new_callbacks)) + if (rt_have_callbacks(&ort->new_callbacks)) { + if (erts_mmap_record_option_replay_enabled() + && erts_refc_read(&lib->dynlib_refc, 0) < 1) { + erts_refc_init(&lib->dynlib_refc, 1); + } erts_refc_inc(&lib->dynlib_refc, 2); + } + if (erts_mmap_record_option_replay_enabled() + && erts_refc_read(&lib->refc, 0) < 1) { + erts_refc_init(&lib->refc, 1); + } erts_refc_inc(&lib->refc, 2); ort = ort->next; @@ -5023,7 +5045,8 @@ static void patch_call_nif_early(ErlNifEntry* entry, { int i; - ERTS_LC_ASSERT(erts_has_code_mod_permission()); + ERTS_LC_ASSERT(erts_has_code_mod_permission() + || erts_mmap_record_option_replay_enabled()); ERTS_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&erts_nif_call_tab_lock)); erts_unseal_module(this_mi); @@ -5301,6 +5324,274 @@ static ErtsStaticNif* is_static_nif_module(Eterm mod_atom) return NULL; } +static int +replay_should_reinit_static_nif(const ErlNifEntry* entry) +{ + /* + * All static NIFs need their load() callback re-run at replay time so + * any C-side state (resource types, atom tables, lookup tables, ...) + * is rebuilt against the current VM. Skipping one historically caused + * tty_create_nif (prim_tty) to dereference a NULL resource type during + * shell startup. The set of statically-linked NIFs is small and fixed + * (prim_tty, erl_tracer, prim_buffer, prim_file, zlib, zstd, + * prim_socket, prim_net), so re-running their load callbacks is cheap. + */ + (void) entry; + return 1; +} + +static int +replay_install_static_nif_call_stubs(struct erl_module_nif* lib, + struct erl_module_instance* mi) +{ + ErlNifEntry *entry = &lib->entry; + ErtsNifFinish *fin; + Eterm f_atom; + int i; + Uint miss_hash = 0; + const int replay_dbg = !!getenv("ERTS_REPLAY_NIF_DEBUG"); + + fin = erts_alloc(ERTS_ALC_T_NIF, sizeof_ErtsNifFinish(entry->num_of_funcs)); + fin->nstubs_hashed = 0; + + erts_rwmtx_rwlock(&erts_nif_call_tab_lock); + for (i = 0; i < entry->num_of_funcs; i++) { + int func_ix; + const ErtsCodeInfo *ci; + ErtsNifBeamStub tmpl; + ErtsNifBeamStub *stub = &fin->beam_stubv[i]; + ErlNifFunc *f = &entry->funcs[i]; + + if (!erts_atom_get(f->name, sys_strlen(f->name), &f_atom, ERTS_ATOM_ENC_LATIN1) + || (func_ix = get_func_ix(mi->code_hdr, f_atom, f->arity)) < 0) { + continue; + } + + ci = mi->code_hdr->functions[func_ix]; + stub->code_info_ptr = ci; + stub->info = *ci; + + tmpl.code_info_ptr = ci; + if (hash_get(&erts_nif_call_tab, &tmpl) != NULL) { + hash_erase(&erts_nif_call_tab, &tmpl); + } + if (hash_put(&erts_nif_call_tab, stub) != stub) { + miss_hash++; + continue; + } + fin->nstubs_hashed++; + +#ifdef BEAMASM + { + void* normal_fptr; + void* dirty_fptr; + + if (f->flags) { + if (f->flags == ERL_NIF_DIRTY_JOB_IO_BOUND) { + normal_fptr = static_schedule_dirty_io_nif; + } else { + normal_fptr = static_schedule_dirty_cpu_nif; + } + dirty_fptr = f->fptr; + } else { + dirty_fptr = NULL; + normal_fptr = f->fptr; + } + + beamasm_emit_call_nif(ci, + normal_fptr, + lib, + dirty_fptr, + (char *)&stub->info, + sizeof(stub->info) + sizeof(stub->code)); + } +#else + stub->code.call_nif[0] = BeamOpCodeAddr(op_call_nif_WWW); + stub->code.call_nif[2] = (BeamInstr) lib; + + if (f->flags) { + stub->code.call_nif[3] = (BeamInstr) f->fptr; + stub->code.call_nif[1] = + (f->flags == ERL_NIF_DIRTY_JOB_IO_BOUND) + ? (BeamInstr) static_schedule_dirty_io_nif + : (BeamInstr) static_schedule_dirty_cpu_nif; + } else { + stub->code.call_nif[1] = (BeamInstr) f->fptr; + } +#endif + } + if (fin->nstubs_hashed == 0) { + if (replay_dbg) { + erts_fprintf(stderr, + "replay_nif: stubs_failed module=%s looked_up=0 hash_fail=%bpu\n", + entry->name, miss_hash); + } + erts_rwmtx_rwunlock(&erts_nif_call_tab_lock); + erts_free(ERTS_ALC_T_NIF, fin); + return 0; + } + if (replay_dbg) { + erts_fprintf(stderr, + "replay_nif: stubs_ok module=%s count=%d\n", + entry->name, fin->nstubs_hashed); + } + patch_call_nif_early(entry, mi); + erts_rwmtx_rwunlock(&erts_nif_call_tab_lock); + lib->finish = fin; + return 1; +} + +static struct erl_module_nif * +replay_create_static_nif_lib(Module *module_p, ErlNifEntry *entry) +{ + struct erl_module_nif *lib; + + lib = create_lib(entry); + lib->handle = NULL; + erts_refc_init(&lib->refc, 2); + erts_refc_init(&lib->dynlib_refc, 1); + lib->flags = 0; + lib->on_halt.callback = NULL; + lib->unload_thr_callback = NULL; + erts_atomic_init_nob(&lib->unload_thr_counter, -1); + lib->mod = module_p; + lib->mi_copy = module_p->curr; + lib->priv_data = NULL; + lib->finish = NULL; + + if (!replay_install_static_nif_call_stubs(lib, &module_p->curr)) { + erts_free(ERTS_ALC_T_NIF, lib); + return NULL; + } + return lib; +} + +/* + * Replay-time correlation flag used by ETS / copy diagnostics: + * 0 = static NIF replay reinit has not run / not active for current entry + * 1 = currently inside a static NIF load callback (e.g. prim_file:load/3) + * 2 = a static NIF load callback has already returned (sticky once any + * reinit finishes, so later corruption can be tied back to it) + */ +int erts_replay_static_nif_phase = 0; + +void +erts_replay_reinit_loaded_static_nifs(void) +{ + ErtsStaticNif* p; + const int replay_dbg = !!getenv("ERTS_REPLAY_NIF_DEBUG"); + + for (p = erts_static_nif_tab; p->nif_init != NULL; p++) { + Module* module_p; + struct erl_module_nif* lib; + ErlNifEntry* entry = p->entry; + struct enif_msg_environment_t msg_env; + ErlNifEnv *env; + void* priv_data; + Eterm load_arg = SMALL_ZERO; + int veto; + + if (entry == NULL || entry->load == NULL + || !replay_should_reinit_static_nif(entry)) { + continue; + } + if (replay_dbg) { + erts_fprintf(stderr, "replay_nif: candidate=%s\n", entry->name); + } + + module_p = erts_get_module(p->mod_atom, erts_active_code_ix()); + if (module_p == NULL || module_p->curr.code_hdr == NULL) { + if (replay_dbg) { + erts_fprintf(stderr, + "replay_nif: skip=%s reason=no_module_or_code\n", + entry->name); + } + continue; + } + + lib = replay_create_static_nif_lib(module_p, entry); + if (lib == NULL) { + if (replay_dbg) { + erts_fprintf(stderr, + "replay_nif: skip=%s reason=create_lib_failed\n", + entry->name); + } + continue; + } + /* + * Mirror normal load_nif flow: from this point on, use the + * normalized entry copy embedded in `lib`. + */ + entry = &lib->entry; + if (replay_dbg) { + erts_fprintf(stderr, + "replay_nif: install=%s lib=%p\n", + entry->name, lib); + } + + ASSERT(opened_rt_list == NULL); + + env = &msg_env.env; + pre_nif_noproc(&msg_env, lib, NULL); + priv_data = lib->priv_data; + + lib->flags |= ERTS_MOD_NIF_FLG_LOADING; + if (sys_strcmp(entry->name, "prim_file") == 0 + && is_internal_pid(erts_init_process_id)) { + load_arg = erts_init_process_id; + } + erts_replay_static_nif_phase = 1; + if (replay_dbg) { + erts_fprintf(stderr, + "replay_nif: load_callback_enter module=%s arg=%T\n", + entry->name, load_arg); + } + veto = entry->load(env, &priv_data, load_arg); + if (replay_dbg) { + erts_fprintf(stderr, + "replay_nif: load_callback_exit module=%s veto=%d\n", + entry->name, veto); + } + erts_replay_static_nif_phase = 2; + post_nif_noproc(&msg_env); + lib->flags &= ~ERTS_MOD_NIF_FLG_LOADING; + + if (veto) { + /* + * NIF load() reported failure during replay. Common reasons: + * - an I/O subsystem (e.g. prim_socket esock_io) refusing + * a second init, + * - a one-shot enif_set_option() rejecting a duplicate call. + * Don't abort: the existing call stubs from the restored + * module table still resolve to functioning code, so leaving + * this NIF without a fresh re-load is much better than killing + * the whole VM. We just rollback any partially-opened resource + * types (which would otherwise leak in a half-installed + * state) and keep going. + */ + rollback_opened_resource_types(); + cleanup_opened_rt(); + if (replay_dbg) { + erts_fprintf(stderr, + "replay static NIF load callback returned veto=%d " + "for %T; continuing without re-load\n", + veto, p->mod_atom); + } + continue; + } + + module_p->curr.nif = lib; + lib->priv_data = priv_data; + prepare_opened_rt(lib); + + erts_rwmtx_rwlock(&erts_nif_call_tab_lock); + commit_opened_rt(); + erts_rwmtx_rwunlock(&erts_nif_call_tab_lock); + + cleanup_opened_rt(); + } +} + void erts_unload_nif(struct erl_module_nif* lib) diff --git a/erts/emulator/beam/export.c b/erts/emulator/beam/export.c index ac1982981cc9..6ef658ce036e 100644 --- a/erts/emulator/beam/export.c +++ b/erts/emulator/beam/export.c @@ -31,6 +31,9 @@ #include "hash.h" #include "jit/beam_asm.h" #include "erl_global_literals.h" +#include +#include +#include #define EXPORT_INITIAL_SIZE 4000 #define EXPORT_LIMIT (512*1024) @@ -129,7 +132,64 @@ static void export_stage(Export *export, void init_export_table(void) { + int i; + export_staged_init(); + + for (i = 0; i < ERTS_NUM_CODE_IX; i++) { + erts_alloc_trace_note_alloc("export_table.index_root", + &export_tables[i], + sizeof(export_tables[i])); + } +} + +void +init_export_table_replay(IndexTable *roots, int no_roots) +{ + HashFunctions f; + erts_rwmtx_opt_t rwmtx_opt = ERTS_RWMTX_OPT_DEFAULT_INITER; + int i; + + ASSERT(roots != NULL); + ASSERT(no_roots == ERTS_NUM_CODE_IX); + (void) no_roots; + + rwmtx_opt.type = ERTS_RWMTX_TYPE_FREQUENT_READ; + rwmtx_opt.lived = ERTS_RWMTX_LONG_LIVED; + + erts_rwmtx_init_opt(&export_rwmutex, + &rwmtx_opt, + "export_staging_lock", + NIL, + (ERTS_LOCK_FLAGS_PROPERTY_STATIC | + ERTS_LOCK_FLAGS_CATEGORY_GENERIC)); + + erts_atomic_init_nob(&export_total_entries_bytes, 0); + + f.hash = (H_FUN) export_staged_hash; + f.cmp = (HCMP_FUN) export_staged_cmp; + f.alloc = (HALLOC_FUN) export_staged_alloc; + f.free = (HFREE_FUN) export_staged_free; + f.meta_alloc = (HMALLOC_FUN) erts_alloc; + f.meta_free = (HMFREE_FUN) erts_free; + f.meta_print = (HMPRINT_FUN) erts_print; + + for (i = 0; i < ERTS_NUM_CODE_IX; i++) { + export_tables[i] = roots[i]; + export_tables[i].htable.fun = f; + erts_index_rebuild_hash_buckets(&export_tables[i]); + } + + /* + * In debug builds the staged-table template tracks whether staging is + * active via export_debug_stage_ix (~0 = idle). BSS leaves it at 0, + * which is not the idle sentinel; replay skips the normal staging cycle + * so we must reset it here to prevent spurious assertion failures when + * compile:file later triggers erts_start_staging_code_ix. + */ +#ifdef DEBUG + export_debug_stage_ix = ~0; +#endif } void @@ -282,3 +342,31 @@ void export_end_staging(int commit) { export_staged_end_staging(commit); } + +void erts_export_replay_repair_all_lambdas(void) +{ + ErtsCodeIndex code_ix; + int count, i; + + if (!erts_mmap_record_option_replay_enabled()) { + return; + } + + code_ix = erts_active_code_ix(); + + count = export_list_size(code_ix); + for (i = 0; i < count; i++) { + Export *ep = export_list(i, code_ix); + + if (!ep) { + continue; + } + + /* + * Do not reuse replay-snapshot lambda objects. They may carry stale + * runtime state in their backing memory. Rebuild a canonical shared + * lambda from current export metadata instead. + */ + create_shared_lambda(ep); + } +} diff --git a/erts/emulator/beam/export.h b/erts/emulator/beam/export.h index 7a218f27b785..25b7d712cc06 100644 --- a/erts/emulator/beam/export.h +++ b/erts/emulator/beam/export.h @@ -123,6 +123,8 @@ typedef struct export_ #endif void init_export_table(void); +void init_export_table_replay(IndexTable *roots, int no_roots); +void erts_export_replay_repair_all_lambdas(void); void export_info(fmtfn_t, void *); ERTS_GLB_INLINE void erts_activate_export_trampoline(Export *ep, int code_ix); @@ -184,4 +186,3 @@ erts_active_export_entry(Eterm m, Eterm f, unsigned int a) #endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */ #endif /* __EXPORT_H__ */ - diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index 15ffdb2f6fe8..901c0586dccd 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -130,6 +130,7 @@ Eterm erts_load_nif(Process *c_p, ErtsCodePtr I, Eterm filename, Eterm args); void erts_unload_nif(struct erl_module_nif* nif); extern void erl_nif_init(void); +extern void erts_replay_reinit_loaded_static_nifs(void); extern void erts_nif_sched_init(ErtsSchedulerData *esdp); extern void erts_nif_execute_on_halt(void); extern void erts_nif_notify_halt(void); @@ -1004,6 +1005,7 @@ void erts_start_staging_ranges(int num_new); void erts_end_staging_ranges(int commit); void erts_update_ranges(const BeamCodeHeader* code, Uint size); void erts_remove_from_ranges(const BeamCodeHeader* code); +void erts_ranges_replay_rebuild(void); UWord erts_ranges_sz(void); void erts_lookup_function_info(FunctionInfo* fi, ErtsCodePtr pc, @@ -1126,6 +1128,24 @@ Uint size_object_x(Eterm, erts_literal_area_t*); #define size_object(Term) size_object_x(Term,NULL) #define size_object_litopt(Term,LitArea) size_object_x(Term,LitArea) +/* + * Replay diagnostic: walk an Eterm and dump every reachable subterm to + * stderr, classifying each pointer (ARENA / LITERAL / HEAP) and printing + * its header word. Tolerant of malformed terms. Intended to be called + * just before a deep-copy that is suspected to crash on a stale arena + * pointer (e.g. `ets:insert/2`). + */ +void erts_replay_dump_term_to_stderr(Eterm root, const char *ctx, Eterm pid); + +/* + * Replay correlation flag: + * 0 = static NIF reinit phase has not started for current candidate + * 1 = currently inside a static NIF load callback (e.g. prim_file) + * 2 = a static NIF load callback has returned (sticky after first one) + * Defined in erl_nif.c. + */ +extern int erts_replay_static_nif_phase; + Uint copy_shared_calculate(Eterm, erts_shcopy_t*); Uint size_shared(Eterm); diff --git a/erts/emulator/beam/index.c b/erts/emulator/beam/index.c index a6dac7e281cf..4e4c43e10cbc 100644 --- a/erts/emulator/beam/index.c +++ b/erts/emulator/beam/index.c @@ -116,6 +116,49 @@ int index_get(IndexTable* t, void* tmpl) return -1; } +void +erts_index_rebuild_hash_buckets(IndexTable *t) +{ + int i; + int bits = ERTS_SIZEOF_TERM * 8; + Uint slots; + Uint sz; + int nobjs = 0; + HashBucket **new_bucket; + Hash *h; + + ASSERT(t != NULL); + + h = &t->htable; + ASSERT(h->shift > 0 && h->shift < bits); + ASSERT(bits - h->shift > 0); + + slots = UWORD_CONSTANT(1) << (bits - h->shift); + sz = slots * sizeof(HashBucket *); + + new_bucket = (HashBucket **) h->fun.meta_alloc(h->meta_alloc_type, sz); + memzero(new_bucket, sz); + + for (i = 0; i < t->entries; i++) { + HashBucket *b = (HashBucket *) erts_index_lookup(t, i); + if (b) { + Uint ix = hash_get_slot(h, b->hvalue); + b->next = new_bucket[ix]; + new_bucket[ix] = b; + nobjs++; + } + } + + h->bucket = new_bucket; + h->nobjs = nobjs; + h->grow_threshold = (8 * (int) slots) / 5; + if (h->shift < h->max_shift) { + h->shrink_threshold = ((int) slots) / 5; + } else { + h->shrink_threshold = -1; + } +} + void index_erase_latest_from(IndexTable* t, Uint from_ix) { if(from_ix < (Uint)t->entries) { diff --git a/erts/emulator/beam/index.h b/erts/emulator/beam/index.h index 61f4e608eb58..9baf7ba100c3 100644 --- a/erts/emulator/beam/index.h +++ b/erts/emulator/beam/index.h @@ -59,6 +59,7 @@ int index_table_sz(IndexTable *); int index_get(IndexTable*, void*); IndexSlot* index_put_entry(IndexTable*, void*); +void erts_index_rebuild_hash_buckets(IndexTable *t); /* Erase all entries with index 'ix' and higher */ diff --git a/erts/emulator/beam/jit/beam_jit_main.cpp b/erts/emulator/beam/jit/beam_jit_main.cpp index 73bc507c10d1..185622ea1af3 100644 --- a/erts/emulator/beam/jit/beam_jit_main.cpp +++ b/erts/emulator/beam/jit/beam_jit_main.cpp @@ -28,6 +28,7 @@ extern "C" #include "beam_common.h" #include "code_ix.h" #include "export.h" +#include "erl_mmap.h" #include "erl_threads.h" #if defined(__APPLE__) @@ -131,7 +132,6 @@ static void install_bifs(void) { ERTS_ASSERT(entry->arity <= MAX_BIF_ARITY); ep = erts_export_put(entry->module, entry->name, entry->arity); - sys_memset(&ep->info.u, 0, sizeof(ep->info.u)); ep->info.mfa.module = entry->module; ep->info.mfa.function = entry->name; @@ -152,6 +152,49 @@ static void install_bifs(void) { } } +static void replay_install_bifs(void) { + typedef Eterm (*bif_func_type)(Process *, Eterm *, ErtsCodePtr); + int i; + + ASSERT(beam_export_trampoline != NULL); + ASSERT(beam_save_calls_export != NULL); + + for (i = 0; i < BIF_SIZE; i++) { + BifEntry *entry; + const Export *existing; + Export *ep; + + entry = &bif_table[i]; + + ERTS_ASSERT(entry->arity <= MAX_BIF_ARITY); + + existing = erts_active_export_entry(entry->module, + entry->name, + entry->arity); + ep = erts_export_put(entry->module, entry->name, entry->arity); + ep->bif_number = i; + + if (!existing) { + int j; + + sys_memset(&ep->info.u, 0, sizeof(ep->info.u)); + ep->info.mfa.module = entry->module; + ep->info.mfa.function = entry->name; + ep->info.mfa.arity = entry->arity; + + for (j = 0; j < ERTS_NUM_CODE_IX; j++) { + erts_activate_export_trampoline(ep, j); + } + } + + erts_init_trap_export(BIF_TRAP_EXPORT(i), + entry->module, + entry->name, + entry->arity, + (bif_func_type)entry->f); + } +} + static auto create_allocator(const JitAllocator::CreateParams ¶ms) { JitAllocator::Span test_span; bool single_mapped; @@ -407,7 +450,11 @@ bool BeamAssemblerCommon::hasCpuFeature(uint32_t featureId) { } void init_emulator(void) { - install_bifs(); + if (erts_mmap_record_option_replay_enabled()) { + replay_install_bifs(); + } else { + install_bifs(); + } } void process_main(ErtsSchedulerData *esdp) { diff --git a/erts/emulator/beam/module.c b/erts/emulator/beam/module.c index fb4defbc55eb..0ce5690773d4 100644 --- a/erts/emulator/beam/module.c +++ b/erts/emulator/beam/module.c @@ -29,6 +29,7 @@ #include "global.h" #include "module.h" #include "beam_catches.h" +#include "erl_mmap.h" #ifdef BEAMASM # include "beam_asm.h" @@ -49,6 +50,10 @@ erts_rwmtx_t the_old_code_rwlocks[ERTS_NUM_CODE_IX]; static erts_atomic_t tot_module_bytes; +#ifdef DEBUG +static ErtsCodeIndex dbg_load_code_ix = 0; +#endif + /* SMP note: Active module table lookup and current module instance can be * read without any locks. Old module instances are protected by * "the_old_code_rwlocks" as purging is done on active module table. @@ -119,6 +124,9 @@ void init_module_table(void) for (i = 0; i < ERTS_NUM_CODE_IX; i++) { erts_index_init(ERTS_ALC_T_MODULE_TABLE, &module_tables[i], "module_code", MODULE_SIZE, MODULE_LIMIT, f); + erts_alloc_trace_note_alloc("module_table.index_root", + &module_tables[i], + sizeof(module_tables[i])); } for (i=0; iunsealed); #ifdef BEAMASM @@ -230,7 +275,8 @@ void erts_seal_module(struct erl_module_instance *modi) { ERTS_LC_ASSERT(erts_initialized == 0 || erts_thr_progress_is_blocking() || - erts_has_code_mod_permission()); + erts_has_code_mod_permission() || + erts_mmap_record_option_replay_enabled()); ASSERT(unsealed_module == modi && modi->unsealed == 1); #ifdef BEAMASM @@ -261,10 +307,6 @@ int module_table_sz(void) return erts_atomic_read_nob(&tot_module_bytes); } -#ifdef DEBUG -static ErtsCodeIndex dbg_load_code_ix = 0; -#endif - static int entries_at_start_staging = 0; static ERTS_INLINE void copy_module(Module* dst_mod, Module* src_mod) diff --git a/erts/emulator/beam/module.h b/erts/emulator/beam/module.h index 3449837e5d93..b39f86dd6b59 100644 --- a/erts/emulator/beam/module.h +++ b/erts/emulator/beam/module.h @@ -76,6 +76,7 @@ void erts_unseal_module(struct erl_module_instance *modi); void erts_seal_module(struct erl_module_instance *modi); void init_module_table(void); +void init_module_table_replay(IndexTable *roots, int no_roots); void module_start_staging(void); void module_end_staging(int commit); void module_info(fmtfn_t, void *); diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c index 664a523011f2..bc65aedee311 100644 --- a/erts/emulator/beam/utils.c +++ b/erts/emulator/beam/utils.c @@ -884,6 +884,15 @@ static int do_send_to_logger(Eterm tag, Eterm gl, char *buf, size_t len) static int do_send_term_to_logger(Eterm tag, Eterm gl, char *buf, size_t len, Eterm args) { + /* + * Replay debug mode: avoid traversing/copying args terms that may already + * be corrupt. Send plain text only so we can keep running and trace the + * earlier corruption point. + */ + if (getenv("ERTS_REPLAY_COPY_DEBUG")) { + return do_send_to_logger(tag, gl, buf, len); + } + Uint sz; Uint args_sz; Eterm format, pid; diff --git a/erts/emulator/sys/common/erl_mmap.c b/erts/emulator/sys/common/erl_mmap.c index e07a1622b35b..a049c433d38d 100644 --- a/erts/emulator/sys/common/erl_mmap.c +++ b/erts/emulator/sys/common/erl_mmap.c @@ -29,6 +29,8 @@ #include "atom.h" #include "erl_mmap.h" #include +#include +#include #ifdef HAVE_SYS_MMAN_H #include @@ -1289,29 +1291,28 @@ Eterm build_free_seg_list(Process* p, ErtsFreeSegMap* map) #if HAVE_MMAP # define ERTS_MMAP_PROT (PROT_READ|PROT_WRITE) # if defined(MAP_ANONYMOUS) -# define ERTS_MMAP_FLAGS (MAP_ANON|MAP_PRIVATE) -# define ERTS_MMAP_FD (-1) +# define ERTS_MMAP_FLAGS (MAP_ANONYMOUS|MAP_PRIVATE) +# define ERTS_MMAP_FD_FOR_MM(MM) (-1) # elif defined(MAP_ANON) # define ERTS_MMAP_FLAGS (MAP_ANON|MAP_PRIVATE) -# define ERTS_MMAP_FD (-1) +# define ERTS_MMAP_FD_FOR_MM(MM) (-1) # else # define ERTS_MMAP_FLAGS (MAP_PRIVATE) -# define ERTS_MMAP_FD mm->mmap_fd +# define ERTS_MMAP_FD_FOR_MM(MM) ((MM)->mmap_fd) # endif #endif static ERTS_INLINE void * -os_mmap(void *hint_ptr, UWord size) +os_mmap_raw(ErtsMemMapper *mm, void *hint_ptr, UWord size, int fd, int flags) { #if HAVE_MMAP - void *res; - - res = mmap((void *) hint_ptr, size, ERTS_MMAP_PROT, - ERTS_MMAP_FLAGS, ERTS_MMAP_FD, 0); - if (res == MAP_FAILED) - return NULL; - return res; + void *res = mmap((void *) hint_ptr, size, ERTS_MMAP_PROT, flags, fd, 0); + return res == MAP_FAILED ? NULL : res; #elif HAVE_VIRTUALALLOC + (void) mm; + (void) hint_ptr; + (void) fd; + (void) flags; return (void *) VirtualAlloc(NULL, (SIZE_T) size, MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE); #else @@ -1320,7 +1321,7 @@ os_mmap(void *hint_ptr, UWord size) } static ERTS_INLINE void -os_munmap(void *ptr, UWord size) +os_munmap_raw(void *ptr, UWord size) { #if HAVE_MMAP #ifdef ERTS_MMAP_DEBUG @@ -1339,27 +1340,107 @@ os_munmap(void *ptr, UWord size) #endif } +int +erts_mmap_name_mapping(ErtsMemMapper *mm, void *ptr, UWord size, const char *name) +{ + (void) mm; + (void) ptr; + (void) size; + (void) name; + return 0; +} + +static int +erts_mmap_prefix_mapping_name(ErtsMemMapper *mm, void *ptr, UWord size, const char *prefix) +{ + (void) mm; + (void) ptr; + (void) size; + (void) prefix; + return 0; +} + +int +erts_mmap_name_mapping_global(void *ptr, UWord size, const char *name) +{ +#if HAVE_MMAP + if (erts_mmap_name_mapping(&erts_dflt_mmapper, ptr, size, name)) { + return 1; + } +#if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION) + if (erts_mmap_name_mapping(&erts_literal_mmapper, ptr, size, name)) { + return 1; + } +#endif +#else + (void) ptr; + (void) size; + (void) name; +#endif + return 0; +} + +int +erts_mmap_prefix_mapping_name_global(void *ptr, UWord size, const char *prefix) +{ +#if HAVE_MMAP + if (erts_mmap_prefix_mapping_name(&erts_dflt_mmapper, ptr, size, prefix)) { + return 1; + } +#if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION) + if (erts_mmap_prefix_mapping_name(&erts_literal_mmapper, ptr, size, prefix)) { + return 1; + } +#endif +#else + (void) ptr; + (void) size; + (void) prefix; +#endif + return 0; +} + +static ERTS_INLINE void * +os_mmap(ErtsMemMapper *mm, void *hint_ptr, UWord size) +{ + return os_mmap_raw(mm, hint_ptr, size, ERTS_MMAP_FD_FOR_MM(mm), ERTS_MMAP_FLAGS); +} + +static ERTS_INLINE void +os_munmap(ErtsMemMapper *mm, void *ptr, UWord size) +{ + (void) mm; + os_munmap_raw(ptr, size); +} + #define ALIGN_UP(x, a) ((void*)((((UWord)(x)) + ((a) - 1)) & ~((a) - 1))) #define IS_ALIGNED(x, a) ((((UWord)(x)) & ((a) - 1)) == 0) +static ERTS_INLINE void * +os_mmap_aligned_raw(ErtsMemMapper *mm, UWord size, UWord alignment); + /* * Just like os_mmap, but ensures that mapping is a multiple of the * specified alignment. Alignment must be a power-of-2 multiple of * the page size in bytes. */ static ERTS_INLINE void * -os_mmap_aligned(UWord size, UWord alignment) +os_mmap_aligned(ErtsMemMapper *mm, UWord size, UWord alignment) +{ + return os_mmap_aligned_raw(mm, size, alignment); +} + +static ERTS_INLINE void * +os_mmap_aligned_raw(ErtsMemMapper *mm, UWord size, UWord alignment) { char *result; #ifdef MAP_ALIGN - - /* - * On an operating systems that support MAP_ALIGN (SunOS >=5.9) we - * can directly ask mmap(2) to align the virtual memory mapping. - */ - result = mmap((void *) alignment, size, ERTS_MMAP_PROT, - ERTS_MMAP_FLAGS|MAP_ALIGN, ERTS_MMAP_FD, 0); - if (result == MAP_FAILED) { + result = os_mmap_raw(mm, + (void *) alignment, + size, + ERTS_MMAP_FD_FOR_MM(mm), + ERTS_MMAP_FLAGS|MAP_ALIGN); + if (!result) { return NULL; } #else @@ -1368,12 +1449,7 @@ os_mmap_aligned(UWord size, UWord alignment) ASSERT((size % sys_page_size) == 0); ASSERT((alignment % sys_page_size) == 0); - /* - * Allocate and test for alignment. It is possible 1) the - * operating aligned the allocation based its length or 2) the - * previous allocation aligned the next available address. - */ - if ((result = os_mmap(NULL, size)) == NULL) { + if ((result = os_mmap_raw(mm, NULL, size, ERTS_MMAP_FD_FOR_MM(mm), ERTS_MMAP_FLAGS)) == NULL) { return NULL; } @@ -1381,39 +1457,19 @@ os_mmap_aligned(UWord size, UWord alignment) return result; } - /* - * The virtual memory allocation was not aligned, clean-up the - * mapping so we can try a different strategy. - */ - os_munmap(result, size); + os_munmap_raw(result, size); - /* - * Retry the virtual memory allocation adding padding to ensure - * the requested alignment. - */ - if ((result = os_mmap(NULL, size + alignment)) == NULL) { + if ((result = os_mmap_raw(mm, NULL, size + alignment, ERTS_MMAP_FD_FOR_MM(mm), ERTS_MMAP_FLAGS)) == NULL) { return NULL; } diff = (char *)ALIGN_UP(result, alignment) - result; - - /* - * Unmap any extra pages at the beginning of the allocation. If - * the allocation ended up being aligned, there will be nothing to - * unmap. - */ if (diff != 0) { - os_munmap(result, diff); + os_munmap_raw(result, diff); result += diff; } - - /* - * Unmap extra pages at the end of the allocation. There must - * always be at least one. - */ - os_munmap(result + size, alignment - diff); + os_munmap_raw(result + size, alignment - diff); #endif - return result; } @@ -1426,9 +1482,10 @@ os_mmap_aligned(UWord size, UWord alignment) # endif # endif static ERTS_INLINE void * -os_mremap(void *ptr, UWord old_size, UWord new_size) +os_mremap(ErtsMemMapper *mm, void *ptr, UWord old_size, UWord new_size) { void *new_seg; + (void) mm; #if HAVE_MREMAP new_seg = mremap(ptr, (size_t) old_size, # if defined(__NetBSD__) @@ -1466,7 +1523,7 @@ static int os_reserve_physical(char *ptr, UWord size) { void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_RESERVE_PROT, - ERTS_MMAP_RESERVE_FLAGS, ERTS_MMAP_FD, 0); + ERTS_MMAP_RESERVE_FLAGS, -1, 0); if (res == (void *) MAP_FAILED) return 0; return 1; @@ -1476,7 +1533,7 @@ static void os_unreserve_physical(char *ptr, UWord size) { void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_UNRESERVE_PROT, - ERTS_MMAP_UNRESERVE_FLAGS, ERTS_MMAP_FD, 0); + ERTS_MMAP_UNRESERVE_FLAGS, -1, 0); if (res == (void *) MAP_FAILED) erts_exit(ERTS_ABORT_EXIT, "Failed to unreserve memory"); } @@ -1488,7 +1545,7 @@ os_mmap_virtual(char *ptr, UWord size) void* res; res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_VIRTUAL_PROT, - flags, ERTS_MMAP_FD, 0); + flags, -1, 0); if (res == (void *) MAP_FAILED) return NULL; return res; @@ -1544,7 +1601,11 @@ alloc_desc_insert_free_seg(ErtsMemMapper* mm, #if ERTS_HAVE_OS_MMAP if (!mm->no_os_mmap) { - ptr = os_mmap(mm->desc.new_area_hint, ERTS_PAGEALIGNED_SIZE); + ptr = os_mmap_raw(mm, + mm->desc.new_area_hint, + ERTS_PAGEALIGNED_SIZE, + ERTS_MMAP_FD_FOR_MM(mm), + ERTS_MMAP_FLAGS); if (ptr) { mm->desc.new_area_hint = ptr+ERTS_PAGEALIGNED_SIZE; ERTS_MMAP_SIZE_OS_INC(ERTS_PAGEALIGNED_SIZE); @@ -1752,13 +1813,13 @@ erts_mmap(ErtsMemMapper* mm, Uint32 flags, UWord *sizep) /* Map using OS primitives */ if (!(ERTS_MMAPFLG_SUPERCARRIER_ONLY & flags) && !mm->no_os_mmap) { if (!(ERTS_MMAPFLG_SUPERALIGNED & flags)) { - seg = os_mmap(NULL, asize); + seg = os_mmap(mm, NULL, asize); if (!seg) goto failure; } else { asize = ERTS_SUPERALIGNED_CEILING(*sizep); - seg = os_mmap_aligned(asize, ERTS_SUPERALIGNED_SIZE); + seg = os_mmap_aligned(mm, asize, ERTS_SUPERALIGNED_SIZE); if (!seg) goto failure; } @@ -1810,7 +1871,7 @@ erts_munmap(ErtsMemMapper* mm, Uint32 flags, void *ptr, UWord size) #if ERTS_HAVE_OS_MMAP ERTS_MUNMAP_OP_LCK(ptr, size); ERTS_MMAP_SIZE_OS_DEC(size); - os_munmap(ptr, size); + os_munmap(mm, ptr, size); #endif } else { @@ -1971,7 +2032,7 @@ erts_mremap(ErtsMemMapper* mm, ERTS_MMAP_ASSERT((((char *)ptr) + old_size) > (char *) new_ptr); um_sz = (UWord) ((((char *) ptr) + old_size) - (char *) new_ptr); ERTS_MMAP_SIZE_OS_DEC(um_sz); - os_munmap(new_ptr, um_sz); + os_munmap(mm, new_ptr, um_sz); ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize); *sizep = asize; return ptr; @@ -1981,7 +2042,7 @@ erts_mremap(ErtsMemMapper* mm, if (superaligned) { return remap_move(mm, flags, ptr, old_size, sizep); } else { - new_ptr = os_mremap(ptr, old_size, asize); + new_ptr = os_mremap(mm, ptr, old_size, asize); if (!new_ptr) return NULL; if (asize > old_size) @@ -2232,6 +2293,52 @@ static void init_atoms(void) static void hard_dbg_mseg_init(void); #endif +/* + * Public helper so code outside this translation unit (in particular the + * record/replay sidecar logic in erl_mmap_record.c) can reserve physical + * backing on pages inside a super-carrier without needing access to the + * full ErtsMemMapper_ struct. + */ +int +erts_mmap_reserve_physical(ErtsMemMapper *mm, void *ptr, UWord size) +{ + if (!mm || !mm->reserve_physical) { + return 0; + } + return mm->reserve_physical((char *) ptr, size); +} + +/* + * Replay-only: mark the range [ptr, ptr+size) as "already allocated" inside + * the super-carrier so that future erts_mmap() calls won't hand it out. + * + * Strategy: if the range is contiguous with sa.top, just advance sa.top; + * otherwise push sa.top to the end of the range (losing the gap between + * old sa.top and ptr). This is sufficient for the literal super-carrier + * case where all restored regions are at the bottom of the carrier and + * superaligned. + */ +int +erts_mmap_mark_allocated(ErtsMemMapper *mm, void *ptr, UWord size) +{ + char *start = (char *) ptr; + char *end = start + size; + if (!mm) { + return 0; + } + if (start < mm->sa.bot || end > mm->sua.bot) { + return 0; + } + /* Round up to superaligned boundary. */ + end = (char *) ERTS_SUPERALIGNED_CEILING((UWord) end); + if (end > mm->sa.top) { + UWord inc = (UWord) (end - mm->sa.top); + mm->sa.top = end; + mm->size.supercarrier.used.total += inc; + } + return 1; +} + void erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init) { @@ -2257,7 +2364,7 @@ erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init) mm->reserve_physical = reserve_noop; mm->unreserve_physical = unreserve_noop; -#if HAVE_MMAP && !defined(MAP_ANON) +#if HAVE_MMAP && !defined(MAP_ANON) && !defined(MAP_ANONYMOUS) mm->mmap_fd = open("/dev/zero", O_RDWR); if (mm->mmap_fd < 0) erts_exit(1, "erts_mmap: Failed to open /dev/zero\n"); @@ -2283,7 +2390,7 @@ erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init) "erts_mmap: Failed to create virtual range for super carrier\n"); sz = start - ptr; if (sz) - os_munmap(end, sz); + os_munmap_raw(end, sz); mm->reserve_physical = os_reserve_physical; mm->unreserve_physical = os_unreserve_physical; virtual_map = 1; @@ -2320,7 +2427,7 @@ erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init) alignment = MAX(sys_large_page_size, ERTS_SUPERALIGNED_SIZE); else alignment = ERTS_SUPERALIGNED_SIZE; - start = os_mmap_aligned(sz, alignment); + start = os_mmap_aligned_raw(mm, sz, alignment); } if (!start) erts_exit(1, diff --git a/erts/emulator/sys/common/erl_mmap.h b/erts/emulator/sys/common/erl_mmap.h index 3218f6797498..d0bbc4cd387d 100644 --- a/erts/emulator/sys/common/erl_mmap.h +++ b/erts/emulator/sys/common/erl_mmap.h @@ -142,8 +142,13 @@ typedef struct ErtsMemMapper_ ErtsMemMapper; void *erts_mmap(ErtsMemMapper*, Uint32 flags, UWord *sizep); void erts_munmap(ErtsMemMapper*, Uint32 flags, void *ptr, UWord size); void *erts_mremap(ErtsMemMapper*, Uint32 flags, void *ptr, UWord old_size, UWord *sizep); +int erts_mmap_name_mapping(ErtsMemMapper*, void *ptr, UWord size, const char *name); +int erts_mmap_name_mapping_global(void *ptr, UWord size, const char *name); +int erts_mmap_prefix_mapping_name_global(void *ptr, UWord size, const char *prefix); int erts_mmap_in_supercarrier(ErtsMemMapper*, void *ptr); void erts_mmap_init(ErtsMemMapper*, ErtsMMapInit*); +int erts_mmap_reserve_physical(ErtsMemMapper *mm, void *ptr, UWord size); +int erts_mmap_mark_allocated(ErtsMemMapper *mm, void *ptr, UWord size); struct erts_mmap_info_struct { UWord sizes[6]; @@ -155,6 +160,36 @@ Eterm erts_mmap_info(ErtsMemMapper*, fmtfn_t *print_to_p, void *print_to_arg, Eterm erts_mmap_info_options(ErtsMemMapper*, char *prefix, fmtfn_t *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp); +int erts_mmap_record_option_record(const char *path); +int erts_mmap_record_option_replay(const char *path); +int erts_mmap_record_option_record_enabled(void); +int erts_mmap_record_option_replay_enabled(void); +int erts_mmap_record_option_enabled(void); +const char *erts_mmap_record_option_dir(void); +int erts_mmap_record_init(void); +void *erts_mmap_record_alloc(UWord *sizep, Uint32 mmap_flags); +void erts_mmap_record_free(void *ptr, UWord size); +void *erts_mmap_record_realloc(void *ptr, UWord old_size, UWord *sizep, Uint32 mmap_flags); +/* + * Diagnostic helpers: identify whether a pointer falls inside the recorded + * mseg arena (the file-backed [record_base, record_base+ERTS_RECORD_ARENA_SIZE) + * range). Useful in replay-time instrumentation when a corrupted term carries + * a pointer that may or may not originate from the restored arena. + */ +int erts_mmap_record_arena_contains(const void *ptr); +void erts_mmap_record_arena_bounds(const char **base_out, UWord *size_out); + +/* + * Literal super-carrier snapshot/restore hooks. On record, the literal + * allocator tracks (ptr,size) regions here and dumps them on exit; on + * replay we re-materialise those bytes at the same addresses. + */ +void erts_mmap_record_literal_alloc(void *ptr, UWord size); +void erts_mmap_record_literal_free(void *ptr, UWord size); +void erts_mmap_record_literal_realloc(void *old_ptr, UWord old_size, + void *new_ptr, UWord new_size); +void erts_mmap_record_literal_dump_on_exit(void); +int erts_mmap_record_literal_restore(ErtsMemMapper *mm); #ifdef ERTS_WANT_MEM_MAPPERS # include "erl_alloc_types.h" diff --git a/erts/emulator/sys/common/erl_mmap_record.c b/erts/emulator/sys/common/erl_mmap_record.c new file mode 100644 index 000000000000..341a6e8910a3 --- /dev/null +++ b/erts/emulator/sys/common/erl_mmap_record.c @@ -0,0 +1,888 @@ +/* + * %CopyrightBegin% + * + * SPDX-License-Identifier: Apache-2.0 + * + * Copyright Ericsson AB 2002-2025. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "sys.h" +#include "erl_mmap.h" +#include +#include +#include +#include +#include +#include +#ifdef HAVE_SYS_MMAN_H +# include +#endif + +#if HAVE_ERTS_MMAP + +#define ERTS_RECORD_ARENA_SIZE (UWORD_CONSTANT(256) * 1024 * 1024) +#define ERTS_RECORD_ARENA_FILE "mseg-arena.bin" + +typedef struct ErtsMMapRecordChunk_ ErtsMMapRecordChunk; +struct ErtsMMapRecordChunk_ { + char *ptr; + UWord size; + int free; + ErtsMMapRecordChunk *prev; + ErtsMMapRecordChunk *next; +}; + +static int record_enabled = 0; +static int replay_enabled = 0; +static int record_initialized = 0; +static int record_fd = -1; +static char *record_base = NULL; +static char *record_dir = NULL; +static char *replay_dir = NULL; +static char *record_path = NULL; +static char *replay_path = NULL; +static ErtsMMapRecordChunk *record_chunks = NULL; +static erts_mtx_t record_mtx; +static int record_mtx_inited = 0; + +static char * +copy_trimmed_dir(const char *path) +{ + size_t len; + char *copy; + + if (!path || !path[0]) { + return NULL; + } + + len = strlen(path); + while (len > 1 && (path[len - 1] == '/' || path[len - 1] == '\\')) { + if (len == 3 && path[1] == ':') { + break; + } + len--; + } + + copy = (char *) malloc(len + 1); + if (!copy) { + return NULL; + } + memcpy(copy, path, len); + copy[len] = '\0'; + return copy; +} + +static int +ensure_dir_path(const char *dir) +{ + char path[1024]; + size_t i, len; + + if (!dir || dir[0] == '\0') { + return -1; + } + + len = strlen(dir); + if (len >= sizeof(path)) { + return -1; + } + + memcpy(path, dir, len + 1); + + for (i = 1; i < len; i++) { + if (path[i] == '/' || path[i] == '\\') { + char saved = path[i]; + path[i] = '\0'; + if (path[i - 1] != ':' + && mkdir(path, 0777) < 0 + && errno != EEXIST) { + return -1; + } + path[i] = saved; + } + } + + if (mkdir(path, 0777) < 0 && errno != EEXIST) { + return -1; + } + + return 0; +} + +static char * +join_dir_file(const char *dir, const char *name) +{ + size_t dlen, nlen, need_sep, sz; + char *res; + + if (!dir || !name) { + return NULL; + } + + dlen = strlen(dir); + nlen = strlen(name); + need_sep = dlen > 0 && dir[dlen - 1] != '/' && dir[dlen - 1] != '\\'; + sz = dlen + need_sep + nlen + 1; + res = (char *) malloc(sz); + if (!res) { + return NULL; + } + if (need_sep) { + erts_snprintf(res, sz, "%s/%s", dir, name); + } else { + erts_snprintf(res, sz, "%s%s", dir, name); + } + return res; +} + +/* + * Literal super-carrier snapshot tracking. + * + * On 64-bit, the literal allocator has its own mmapper (erts_literal_mmapper) + * reserved as a 1 GB virtual range. Allocations inside it do NOT go through + * mseg_create() and therefore do NOT reach erts_mmap_record_alloc() above. + * + * To replay correctly we track every live (ptr, size) region handed out by + * erts_alcu_mmapper_mseg_alloc / _realloc, and at process exit we dump those + * regions (their raw bytes) to a sidecar file next to the main record arena + * (/mseg-arena.bin.literals). On replay, after the literal + * mmapper has been + * set up (so the same virtual range is reserved), we read the sidecar and + * memcpy bytes back at their original addresses. + */ +typedef struct ErtsLiteralSnapshotRegion_ ErtsLiteralSnapshotRegion; +struct ErtsLiteralSnapshotRegion_ { + char *ptr; + UWord size; + ErtsLiteralSnapshotRegion *next; +}; + +static ErtsLiteralSnapshotRegion *literal_regions = NULL; +static erts_mtx_t literal_mtx; +static int literal_mtx_inited = 0; + +#define ERTS_LITERAL_SNAPSHOT_MAGIC 0x4C49544C55 /* "LITL\0" */ +#define ERTS_LITERAL_SNAPSHOT_VERSION 1 + +static void +literal_mtx_ensure_inited(void) +{ + if (!literal_mtx_inited) { + erts_mtx_init(&literal_mtx, "mmap_record_literal", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC + | ERTS_LOCK_FLAGS_CATEGORY_ALLOCATOR); + literal_mtx_inited = 1; + } +} + +static const char * +literal_sidecar_path_for_record(void) +{ + static char buf[1024]; + const char *base; + int len; + + if (replay_enabled) { + base = replay_path; + } else { + base = record_path; + } + if (!base) { + return NULL; + } + len = snprintf(buf, sizeof(buf), "%s.literals", base); + if (len <= 0 || len >= (int) sizeof(buf)) { + return NULL; + } + return buf; +} + +static UWord +record_align(UWord size, Uint32 mmap_flags) +{ + UWord align = ERTS_PAGEALIGNED_SIZE; + if (mmap_flags & ERTS_MMAPFLG_SUPERALIGNED) { + align = ERTS_SUPERALIGNED_SIZE; + } + return (size + (align - 1)) & ~(align - 1); +} + +static char * +record_align_ptr(char *ptr, UWord align) +{ + UWord v = (UWord) ptr; + UWord a = (v + (align - 1)) & ~(align - 1); + return (char *) a; +} + +static ErtsMMapRecordChunk * +record_new_chunk(char *ptr, UWord size, int free) +{ + ErtsMMapRecordChunk *c = (ErtsMMapRecordChunk *) malloc(sizeof(*c)); + if (!c) { + return NULL; + } + c->ptr = ptr; + c->size = size; + c->free = free; + c->prev = NULL; + c->next = NULL; + return c; +} + +static void +record_merge_with_neighbors(ErtsMMapRecordChunk *c) +{ + if (c->next && c->next->free) { + ErtsMMapRecordChunk *n = c->next; + c->size += n->size; + c->next = n->next; + if (c->next) { + c->next->prev = c; + } + free(n); + } + if (c->prev && c->prev->free) { + ErtsMMapRecordChunk *p = c->prev; + p->size += c->size; + p->next = c->next; + if (c->next) { + c->next->prev = p; + } + free(c); + } +} + +int +erts_mmap_record_option_record(const char *path) +{ + char *dir; + char *arena_path; + char *dump_dir; + + if (!path || !path[0] || replay_enabled) { + return 0; + } + + dir = copy_trimmed_dir(path); + if (!dir) { + return 0; + } + + if (ensure_dir_path(dir) != 0) { + free(dir); + return 0; + } + + dump_dir = join_dir_file(dir, "struct-root-dumps"); + if (!dump_dir) { + free(dir); + return 0; + } + if (ensure_dir_path(dump_dir) != 0) { + free(dump_dir); + free(dir); + return 0; + } + free(dump_dir); + + arena_path = join_dir_file(dir, ERTS_RECORD_ARENA_FILE); + if (!arena_path) { + free(dir); + return 0; + } + + if (record_dir) { + free(record_dir); + } + record_dir = dir; + + if (record_path) { + free(record_path); + } + record_path = arena_path; + + record_enabled = 1; + return 1; +} + +int +erts_mmap_record_option_replay(const char *path) +{ + char *dir; + char *arena_path; + + if (!path || !path[0] || record_enabled) { + return 0; + } + + dir = copy_trimmed_dir(path); + if (!dir) { + return 0; + } + + arena_path = join_dir_file(dir, ERTS_RECORD_ARENA_FILE); + if (!arena_path) { + free(dir); + return 0; + } + + if (replay_dir) { + free(replay_dir); + } + replay_dir = dir; + + if (replay_path) { + free(replay_path); + } + replay_path = arena_path; + replay_enabled = 1; + return 1; +} + +int +erts_mmap_record_option_record_enabled(void) +{ + return record_enabled; +} + +int +erts_mmap_record_option_replay_enabled(void) +{ + return replay_enabled; +} + +int +erts_mmap_record_option_enabled(void) +{ + return record_enabled || replay_enabled; +} + +const char * +erts_mmap_record_option_dir(void) +{ + if (record_enabled) { + return record_dir; + } + if (replay_enabled) { + return replay_dir; + } + return NULL; +} + +int +erts_mmap_record_arena_contains(const void *ptr) +{ + if (!record_base) { + return 0; + } + return (const char *) ptr >= record_base + && (const char *) ptr < record_base + ERTS_RECORD_ARENA_SIZE; +} + +void +erts_mmap_record_arena_bounds(const char **base_out, UWord *size_out) +{ + if (base_out) { + *base_out = record_base; + } + if (size_out) { + *size_out = ERTS_RECORD_ARENA_SIZE; + } +} + +int +erts_mmap_record_init(void) +{ + const char *path = NULL; + ErtsMMapRecordChunk *c; + struct stat st; + + if (!record_enabled && !replay_enabled) { + return 1; + } + if (record_initialized) { + return 1; + } + + if (replay_enabled) { + /* + * Open the arena read-only during replay so the OS will not let us + * mutate the on-disk snapshot, and map it MAP_PRIVATE (copy-on-write) + * so the VM can still write into restored memory without propagating + * those writes back to the file. Without this, a crash mid-replay + * leaves a partially-modified arena on disk and subsequent replays + * observe a different (corrupted) snapshot. + */ + path = replay_path; + record_fd = open(path, O_RDONLY, 0); + } else { + path = record_path; + if (!path) { + return 0; + } + record_fd = open(path, O_RDWR | O_CREAT, 0666); + } + if (record_fd < 0) { + return 0; + } + + if (fstat(record_fd, &st) != 0) { + close(record_fd); + record_fd = -1; + return 0; + } + if (replay_enabled) { + if ((UWord) st.st_size < ERTS_RECORD_ARENA_SIZE) { + close(record_fd); + record_fd = -1; + return 0; + } + } else if (st.st_size != (off_t) ERTS_RECORD_ARENA_SIZE) { + if (ftruncate(record_fd, (off_t) ERTS_RECORD_ARENA_SIZE) != 0) { + close(record_fd); + record_fd = -1; + return 0; + } + } + + record_base = (char *) mmap(NULL, + ERTS_RECORD_ARENA_SIZE, + PROT_READ | PROT_WRITE, + replay_enabled ? MAP_PRIVATE : MAP_SHARED, + record_fd, + 0); + if (record_base == MAP_FAILED) { + record_base = NULL; + close(record_fd); + record_fd = -1; + return 0; + } + + c = record_new_chunk(record_base, ERTS_RECORD_ARENA_SIZE, 1); + if (!c) { + munmap(record_base, ERTS_RECORD_ARENA_SIZE); + record_base = NULL; + close(record_fd); + record_fd = -1; + return 0; + } + + if (!record_mtx_inited) { + erts_mtx_init(&record_mtx, "mmap_record", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC + | ERTS_LOCK_FLAGS_CATEGORY_ALLOCATOR); + record_mtx_inited = 1; + } + + record_chunks = c; + record_initialized = 1; + return 1; +} + +void * +erts_mmap_record_alloc(UWord *sizep, Uint32 mmap_flags) +{ + UWord need; + UWord align; + ErtsMMapRecordChunk *c; + void *res = NULL; + + if (!record_initialized || !sizep) { + return NULL; + } + + align = ERTS_PAGEALIGNED_SIZE; + if (mmap_flags & ERTS_MMAPFLG_SUPERALIGNED) { + align = ERTS_SUPERALIGNED_SIZE; + } + need = record_align(*sizep, mmap_flags); + + erts_mtx_lock(&record_mtx); + for (c = record_chunks; c; c = c->next) { + if (c->free) { + char *ret_ptr = record_align_ptr(c->ptr, align); + UWord prefix = (UWord) (ret_ptr - c->ptr); + UWord total_need = prefix + need; + if (c->size < total_need) { + continue; + } + + if (prefix > 0) { + ErtsMMapRecordChunk *pre = record_new_chunk(c->ptr, prefix, 1); + if (!pre) { + break; + } + pre->prev = c->prev; + pre->next = c; + if (pre->prev) { + pre->prev->next = pre; + } else { + record_chunks = pre; + } + c->prev = pre; + c->ptr = ret_ptr; + c->size -= prefix; + } + + if (c->size > need) { + ErtsMMapRecordChunk *tail = record_new_chunk(c->ptr + need, + c->size - need, + 1); + if (!tail) { + break; + } + tail->prev = c; + tail->next = c->next; + if (tail->next) { + tail->next->prev = tail; + } + c->next = tail; + c->size = need; + } + c->free = 0; + *sizep = c->size; + res = c->ptr; + break; + } + } + erts_mtx_unlock(&record_mtx); + + return res; +} + +void +erts_mmap_record_free(void *ptr, UWord size) +{ + ErtsMMapRecordChunk *c; + (void) size; + + if (!record_initialized || !ptr) { + return; + } + + erts_mtx_lock(&record_mtx); + for (c = record_chunks; c; c = c->next) { + if (c->ptr == (char *) ptr) { + c->free = 1; + record_merge_with_neighbors(c); + break; + } + } + erts_mtx_unlock(&record_mtx); +} + +void * +erts_mmap_record_realloc(void *ptr, UWord old_size, UWord *sizep, Uint32 mmap_flags) +{ + void *new_ptr; + UWord copy_sz; + + if (!record_initialized || !sizep) { + return NULL; + } + if (!ptr) { + return erts_mmap_record_alloc(sizep, mmap_flags); + } + if (*sizep <= old_size) { + return ptr; + } + + new_ptr = erts_mmap_record_alloc(sizep, mmap_flags); + if (!new_ptr) { + return NULL; + } + + copy_sz = old_size < *sizep ? old_size : *sizep; + sys_memcpy(new_ptr, ptr, copy_sz); + erts_mmap_record_free(ptr, old_size); + return new_ptr; +} + +/* + * --------------------------------------------------------------------------- + * Literal super-carrier snapshot tracking. + * --------------------------------------------------------------------------- + */ + +void +erts_mmap_record_literal_alloc(void *ptr, UWord size) +{ + ErtsLiteralSnapshotRegion *r; + + if (!record_enabled || !ptr || !size) { + return; + } + r = (ErtsLiteralSnapshotRegion *) malloc(sizeof(*r)); + if (!r) { + return; + } + r->ptr = (char *) ptr; + r->size = size; + + literal_mtx_ensure_inited(); + erts_mtx_lock(&literal_mtx); + r->next = literal_regions; + literal_regions = r; + erts_mtx_unlock(&literal_mtx); +} + +void +erts_mmap_record_literal_free(void *ptr, UWord size) +{ + ErtsLiteralSnapshotRegion **pp; + (void) size; + + if (!record_enabled || !ptr) { + return; + } + + literal_mtx_ensure_inited(); + erts_mtx_lock(&literal_mtx); + for (pp = &literal_regions; *pp; pp = &(*pp)->next) { + if ((*pp)->ptr == (char *) ptr) { + ErtsLiteralSnapshotRegion *r = *pp; + *pp = r->next; + free(r); + break; + } + } + erts_mtx_unlock(&literal_mtx); +} + +void +erts_mmap_record_literal_realloc(void *old_ptr, UWord old_size, + void *new_ptr, UWord new_size) +{ + if (!record_enabled) { + return; + } + if (old_ptr) { + erts_mmap_record_literal_free(old_ptr, old_size); + } + if (new_ptr && new_size) { + erts_mmap_record_literal_alloc(new_ptr, new_size); + } +} + +/* + * Sidecar file format (little-endian, host-size UWord): + * + * UWord magic (ERTS_LITERAL_SNAPSHOT_MAGIC) + * UWord version (ERTS_LITERAL_SNAPSHOT_VERSION) + * UWord count (number of regions) + * for each region: + * UWord ptr (virtual address) + * UWord size (bytes) + * byte data[size] + */ + +static int +write_all(int fd, const void *buf, size_t len) +{ + const char *p = (const char *) buf; + while (len > 0) { + ssize_t n = write(fd, p, len); + if (n < 0) { + if (errno == EINTR) continue; + return -1; + } + if (n == 0) return -1; + p += n; + len -= (size_t) n; + } + return 0; +} + +static int +read_all(int fd, void *buf, size_t len) +{ + char *p = (char *) buf; + while (len > 0) { + ssize_t n = read(fd, p, len); + if (n < 0) { + if (errno == EINTR) continue; + return -1; + } + if (n == 0) return -1; + p += n; + len -= (size_t) n; + } + return 0; +} + +void +erts_mmap_record_literal_dump_on_exit(void) +{ + const char *path; + int fd; + UWord header[3]; + ErtsLiteralSnapshotRegion *r; + UWord count = 0; + + if (!record_enabled) { + return; + } + path = literal_sidecar_path_for_record(); + if (!path) { + return; + } + + literal_mtx_ensure_inited(); + erts_mtx_lock(&literal_mtx); + + for (r = literal_regions; r; r = r->next) { + count++; + } + + fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0666); + if (fd < 0) { + erts_mtx_unlock(&literal_mtx); + return; + } + + header[0] = (UWord) ERTS_LITERAL_SNAPSHOT_MAGIC; + header[1] = (UWord) ERTS_LITERAL_SNAPSHOT_VERSION; + header[2] = count; + if (write_all(fd, header, sizeof(header)) != 0) { + goto done; + } + + for (r = literal_regions; r; r = r->next) { + UWord rec[2]; + rec[0] = (UWord) r->ptr; + rec[1] = r->size; + if (write_all(fd, rec, sizeof(rec)) != 0) { + goto done; + } + if (r->size > 0) { + if (write_all(fd, r->ptr, (size_t) r->size) != 0) { + goto done; + } + } + } + +done: + close(fd); + erts_mtx_unlock(&literal_mtx); +} + +/* + * Restore the literal super-carrier contents from the sidecar file. + * + * Must be called AFTER erts_mmap_init(&erts_literal_mmapper, ...) so that + * the 1 GB virtual range is reserved at the same address that it was during + * record (ASLR is required to be off). For each recorded region we: + * 1. Ensure physical memory is reserved on the target pages via the + * mmapper's reserve_physical callback. + * 2. memcpy the recorded bytes. + * + * NOTE: After this call the literal mmapper's free-list does NOT know that + * these regions are in use. That's OK for replay because replay skips + * load_preloaded() and therefore never asks the literal allocator for + * fresh memory; existing code already baked-in pointers into these + * addresses. + */ +int +erts_mmap_record_literal_restore(ErtsMemMapper *mm) +{ + const char *path; + int fd; + UWord header[3]; + UWord count, i; + int ok = 0; + (void) mm; + + if (!replay_enabled) { + return 1; + } + path = literal_sidecar_path_for_record(); + if (!path) { + return 0; + } + + fd = open(path, O_RDONLY, 0); + if (fd < 0) { + /* Missing sidecar: not fatal, but callers likely can't boot. */ + return 0; + } + + if (read_all(fd, header, sizeof(header)) != 0) { + goto out; + } + if (header[0] != (UWord) ERTS_LITERAL_SNAPSHOT_MAGIC + || header[1] != (UWord) ERTS_LITERAL_SNAPSHOT_VERSION) { + goto out; + } + count = header[2]; + + for (i = 0; i < count; i++) { + UWord rec[2]; + char *ptr; + UWord size; + + if (read_all(fd, rec, sizeof(rec)) != 0) { + goto out; + } + ptr = (char *) rec[0]; + size = rec[1]; + + /* + * Reserve physical memory on the target region so that the + * upcoming writes land on real pages. The super-carrier was + * reserved with os_mmap_virtual() and is PROT_NONE until this + * call flips the pages to PROT_READ|PROT_WRITE. + * + * We use erts_mmap_reserve_physical(), a small wrapper in + * erl_mmap.c, because ErtsMemMapper is only forward-declared + * outside that file. + */ + if (mm) { + if (!erts_mmap_reserve_physical(mm, ptr, size)) { + goto out; + } + /* + * Tell the mmapper these pages are now in-use so subsequent + * erts_mmap() calls (e.g. when the literal allocator grows + * its carriers) don't hand them out and overwrite the bytes + * we are about to memcpy in. + */ + if (!erts_mmap_mark_allocated(mm, ptr, size)) { + fprintf(stderr, + "replay_root_debug: WARNING mark_allocated failed " + "for [%p..+0x%lx); later literal allocations may " + "clobber restored bytes\n", + (void *) ptr, (unsigned long) size); + } + } + if (size > 0) { + if (read_all(fd, ptr, (size_t) size) != 0) { + goto out; + } + } + } + ok = 1; + +out: + close(fd); + return ok; +} + +#endif /* HAVE_ERTS_MMAP */ diff --git a/erts/emulator/sys/common/erl_mseg.c b/erts/emulator/sys/common/erl_mseg.c index 3381d33a4ef4..2389a121260e 100644 --- a/erts/emulator/sys/common/erl_mseg.c +++ b/erts/emulator/sys/common/erl_mseg.c @@ -267,7 +267,11 @@ mseg_create(ErtsMsegAllctr_t *ma, Uint flags, UWord *sizep) if (MSEG_FLG_IS_2POW(flags)) mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED; - seg = erts_mmap(&erts_dflt_mmapper, mmap_flags, sizep); + if (erts_mmap_record_option_record_enabled()) { + seg = erts_mmap_record_alloc(sizep, mmap_flags); + } else { + seg = erts_mmap(&erts_dflt_mmapper, mmap_flags, sizep); + } #ifdef ERTS_PRINT_ERTS_MMAP erts_fprintf(stderr, "%p = erts_mmap(%s, {%bpu, %bpu});\n", seg, @@ -287,7 +291,11 @@ mseg_destroy(ErtsMsegAllctr_t *ma, Uint flags, void *seg_p, UWord size) { if (MSEG_FLG_IS_2POW(flags)) mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED; - erts_munmap(&erts_dflt_mmapper, mmap_flags, seg_p, size); + if (erts_mmap_record_option_record_enabled()) { + erts_mmap_record_free(seg_p, size); + } else { + erts_munmap(&erts_dflt_mmapper, mmap_flags, seg_p, size); + } #ifdef ERTS_PRINT_ERTS_MMAP erts_fprintf(stderr, "erts_munmap(%s, %p, %bpu);\n", (mmap_flags & ERTS_MMAPFLG_SUPERALIGNED) ? "sa" : "sua", @@ -308,7 +316,11 @@ mseg_recreate(ErtsMsegAllctr_t *ma, Uint flags, void *old_seg, UWord old_size, U if (MSEG_FLG_IS_2POW(flags)) mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED; - new_seg = erts_mremap(&erts_dflt_mmapper, mmap_flags, old_seg, old_size, sizep); + if (erts_mmap_record_option_record_enabled()) { + new_seg = erts_mmap_record_realloc(old_seg, old_size, sizep, mmap_flags); + } else { + new_seg = erts_mremap(&erts_dflt_mmapper, mmap_flags, old_seg, old_size, sizep); + } #ifdef ERTS_PRINT_ERTS_MMAP erts_fprintf(stderr, "%p = erts_mremap(%s, %p, %bpu, {%bpu, %bpu});\n", diff --git a/erts/emulator/sys/unix/sys_drivers.c b/erts/emulator/sys/unix/sys_drivers.c index c06d6b66036a..9f067697d546 100644 --- a/erts/emulator/sys/unix/sys_drivers.c +++ b/erts/emulator/sys/unix/sys_drivers.c @@ -1592,9 +1592,8 @@ static ErlDrvData forker_start(ErlDrvPort port_num, char* name, int fds[2]; int res, unbind; char bindir[MAXPATHLEN]; + char child_setup_prog[MAXPATHLEN + 64]; size_t bindirsz = sizeof(bindir); - Uint csp_path_sz; - char *child_setup_prog; forker_port = erts_drvport2id(port_num); @@ -1609,16 +1608,8 @@ static ErlDrvData forker_start(ErlDrvPort port_num, char* name, erts_exit(1, "Environment variable BINDIR does not contain an" " absolute path\n"); - csp_path_sz = (strlen(bindir) - + 1 /* DIR_SEPARATOR_CHAR */ - + sizeof(CHILD_SETUP_PROG_NAME) - + 1); - child_setup_prog = erts_alloc(ERTS_ALC_T_CS_PROG_PATH, csp_path_sz); - erts_snprintf(child_setup_prog, csp_path_sz, - "%s%c%s", - bindir, - DIR_SEPARATOR_CHAR, - CHILD_SETUP_PROG_NAME); + erts_snprintf(child_setup_prog, sizeof(child_setup_prog), + "%s%c%s", bindir, DIR_SEPARATOR_CHAR, CHILD_SETUP_PROG_NAME); if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) < 0) { erts_exit(ERTS_ABORT_EXIT, "Could not open unix domain socket in spawn_init: %d\n", @@ -1661,8 +1652,6 @@ static ErlDrvData forker_start(ErlDrvPort port_num, char* name, erts_sched_bind_atfork_parent(unbind); - erts_free(ERTS_ALC_T_CS_PROG_PATH, child_setup_prog); - close(fds[1]); /* If stdin is a tty then we need to restore its settings when we exit. diff --git a/erts/etc/common/erlexec.c b/erts/etc/common/erlexec.c index fdad5b0d5ec9..f7bcd88362b5 100644 --- a/erts/etc/common/erlexec.c +++ b/erts/etc/common/erlexec.c @@ -809,6 +809,18 @@ int main(int argc, char **argv) } break; + case 'r': + if (strcmp(argv[i], "-record") == 0 + || strcmp(argv[i], "-replay") == 0) { + NEXT_ARG_CHECK(); + add_Eargs(argv[i]); + add_Eargs(argv[i+1]); + i++; + } else { + add_arg(argv[i]); + } + break; + case 's': /* -sname NAME */ if (strcmp(argv[i], "-sname") == 0) { NEXT_ARG_CHECK(); @@ -1036,8 +1048,16 @@ int main(int argc, char **argv) } break; case 'r': - if (!is_one_of_strings(&argv[i][2], - plusr_val_switches)) + if (strcmp(argv[i], "+record") == 0 + || strcmp(argv[i], "+replay") == 0) { + NEXT_ARG_CHECK(); + argv[i][0] = '-'; + add_Eargs(argv[i]); + add_Eargs(argv[i+1]); + i++; + } + else if (!is_one_of_strings(&argv[i][2], + plusr_val_switches)) goto the_default; else { NEXT_ARG_CHECK(); diff --git a/erts/preloaded/ebin/erl_init.beam b/erts/preloaded/ebin/erl_init.beam index bf205fab15d5..1c98bd3db60e 100644 Binary files a/erts/preloaded/ebin/erl_init.beam and b/erts/preloaded/ebin/erl_init.beam differ diff --git a/erts/preloaded/ebin/erlang.beam b/erts/preloaded/ebin/erlang.beam index 7507a883b625..a7b94c6911a6 100644 Binary files a/erts/preloaded/ebin/erlang.beam and b/erts/preloaded/ebin/erlang.beam differ diff --git a/erts/preloaded/src/erl_init.erl b/erts/preloaded/src/erl_init.erl index 1ec50d34b3ea..be2eda81b0d1 100644 --- a/erts/preloaded/src/erl_init.erl +++ b/erts/preloaded/src/erl_init.erl @@ -33,21 +33,26 @@ Mod :: module(), BootArgs :: [binary()]. start(Mod, BootArgs) -> - %% Load the static nifs - zlib:on_load(), - erl_tracer:on_load(), - prim_buffer:on_load(), - prim_file:on_load(), - %% prim_socket:on_load(), prim_net:on_load(), - if_loaded( - prim_socket, - fun () -> - prim_socket:on_load(), - prim_net:on_load(), - ok - end), + case replay_enabled(BootArgs) of + true -> + ok; + false -> + %% Load the static nifs + zlib:on_load(), + erl_tracer:on_load(), + prim_buffer:on_load(), + prim_file:on_load(), + %% prim_socket:on_load(), prim_net:on_load(), + if_loaded( + prim_socket, + fun () -> + prim_socket:on_load(), + prim_net:on_load(), + ok + end) + end, %% Proceed to the specified boot module - run(Mod, boot, BootArgs). + run(Mod, boot, remove_replay_args(BootArgs)). restart() -> erts_internal:erase_persistent_terms(), @@ -58,6 +63,16 @@ restart() -> ok end). +replay_enabled(BootArgs) -> + lists:member(<<"-replay">>, BootArgs) + orelse os:getenv("ERTS_MMAP_REPLAY") =:= "1". + +remove_replay_args([<<"-replay">> | Args]) -> + remove_replay_args(Args); +remove_replay_args([Arg | Args]) -> + [Arg | remove_replay_args(Args)]; +remove_replay_args([]) -> + []. run(M, F, A) -> case erlang:function_exported(M, F, 1) of