Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
5ed8bd3
mmap to file and log allocations on disk
ziopio Apr 20, 2026
8cb75d4
record and replay mods use single mapped file of 100MB
ziopio Apr 21, 2026
65ab72a
Do not use the mapped file for new allocations during replay
ziopio Apr 21, 2026
bfd0022
Root every allocation to mapped memory except for special handling in…
ziopio Apr 21, 2026
ebce050
Restore export and fun index-table roots on replay
ziopio Apr 22, 2026
03485ca
Snapshot and restore active/staging code indices across record/replay
ziopio Apr 23, 2026
8f3aa74
Add table-replay initializers, debug probes, and global-literal backing
ziopio Apr 23, 2026
550303f
Rebuild per-module PC range table in replay mode
ziopio Apr 23, 2026
cf986b0
Snapshot and restore the literal super-carrier across record/replay
ziopio Apr 23, 2026
f7c8dfa
Make the recorded snapshot read-only during replay
ziopio Apr 23, 2026
27b40d7
Snapshot and restore beam-catches bccix[] across record/replay
ziopio Apr 23, 2026
5e8f088
Route binary allocator carriers through mseg during record
ziopio Apr 24, 2026
3668c94
Preserve restored BIF exports during replay initialization
ziopio Apr 24, 2026
b9d63ab
Always launch sys processes
ziopio Apr 24, 2026
bb4d625
Reinitialize static NIF state during replay
ziopio Apr 27, 2026
98dcebb
Hotfix replay crashes from corrupted stacktrace terms and callback fu…
ziopio Apr 27, 2026
0b07ba0
Rebuild export lambdas during replay to prevent badfun
ziopio Apr 28, 2026
9df1e5a
REmove debug code
ziopio Apr 28, 2026
c89e030
remove ERTS_REPLAY_ROOT_DEBUG gating and debug code
ziopio Apr 28, 2026
ce0ce48
allocator: write roots dumps only in -record mode
ziopio Apr 28, 2026
5e3840b
record/replay: remove nonessential trace and debug file dumps
ziopio Apr 28, 2026
1502d86
Simplify replay setup by automatically forwarding replay flag to node…
ziopio Apr 29, 2026
4059cb1
Remove eccessive trace notes
ziopio Apr 29, 2026
e775692
erts replay: rebuild index hash buckets for restored tables
ziopio Apr 30, 2026
0ba9835
Dockerfile to build custom OTP
GwendalLaurent May 5, 2026
8fd0b01
bring back rebar3 into dockerfile
GwendalLaurent May 5, 2026
1643c99
Fix lock checking crash in debug builds
ziopio May 5, 2026
4adb433
erts: stabilize replay static NIF reinit for shell/module workflows
ziopio May 5, 2026
a48a3dc
erts replay: add debug instrumentation for term-copy/ETS/NIF diagnosis
ziopio May 5, 2026
959dcb3
erts replay: reset staged-table debug sentinels in replay init paths
ziopio May 5, 2026
d63da2f
Add support for record and replay flags in erlexec
ziopio May 11, 2026
858530e
Use record and replay path as directory, always dump struct when reco…
ziopio May 11, 2026
fc80ff6
Update preloaded
ziopio May 11, 2026
b03b833
add dockerfile
GwendalLaurent May 11, 2026
f8f5f5e
cleanup replay argument in erl_init
ziopio May 12, 2026
51b9acc
Move diagnostic prints to debug knobs
ziopio May 13, 2026
20e007f
Increase arena record size to 256 MB
ziopio May 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
.git
.github
.devcontainer

# Mirror key OTP gitignore rules so Docker does not copy generated launchers
/bin
/bootstrap/bin/*
!/bootstrap/bin/*.boot

# Local OTP build artifacts that should not be sent as Docker build context
**/deps/
**/erl_crash.dump
**/CONF_INFO
**/config.log
**/config.status
**/obj/
**/obj.debug/
erts/*-unknown-linux-gnu/

# Generated ASN.1 outputs with host-specific absolute paths
lib/public_key/src/OTP-PKIX-Relaxed.erl
lib/public_key/src/OTP-PKIX-Relaxed.hrl
61 changes: 61 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# SPDX-FileCopyrightText: 2026 Dipl.Phys. Peer Stritzinger GmbH
# SPDX-License-Identifier: Apache-2.0

FROM alpine:3.20

ENV OTP_VERSION="28.4.2" \
REBAR3_VERSION="3.26.0"

# Build-time and run-time deps. We deliberately skip wx/odbc/jit/megaco/etc.
# to keep the build self-contained on musl.
RUN apk add --no-cache \
bash \
autoconf automake libtool make perl \
gcc g++ \
musl-dev linux-headers \
ncurses-dev ncurses-static \
openssl-dev openssl-libs-static \
zlib-dev zlib-static \
curl wget file ca-certificates git

COPY . /usr/src/otp
WORKDIR /usr/src/otp
ENV ERL_TOP=/usr/src/otp

RUN set -xe \
&& find . -type f \( -name config.log -o -name config.status -o -name erl_crash.dump \) -delete \
&& find . -type d \( -name deps -o -name obj -o -name obj.debug \
-o -name '*-unknown-linux-gnu' \
-o -name '*-unknown-linux-musl' \) -prune -exec rm -rf {} + \
&& ./otp_build autoconf \
&& ./configure \
--without-javac \
--without-jinterface \
--without-wx \
--without-megaco \
--without-odbc \
--without-debugger \
--without-observer \
--without-et \
--disable-jit \
--disable-dynamic-ssl-lib \
&& make -j"$(nproc)" \
&& make -j"$(nproc)" docs DOC_TARGETS=chunks \
&& make install install-docs DOC_TARGETS=chunks \
&& find /usr/local -name examples | xargs rm -rf

CMD ["erl"]

# rebar3 (kept for in-image use; calzone-sandbox installs its own copy too)
RUN set -xe \
&& REBAR3_DOWNLOAD_URL="https://github.com/erlang/rebar3/archive/${REBAR3_VERSION}.tar.gz" \
&& REBAR3_DOWNLOAD_SHA256="a151dc4a07805490e9f217a099e597ac9774814875f55da2c66545c333fdff64" \
&& mkdir -p /usr/src/rebar3-src \
&& curl -fSL -o rebar3-src.tar.gz "$REBAR3_DOWNLOAD_URL" \
&& echo "$REBAR3_DOWNLOAD_SHA256 rebar3-src.tar.gz" | sha256sum -c - \
&& tar -xzf rebar3-src.tar.gz -C /usr/src/rebar3-src --strip-components=1 \
&& rm rebar3-src.tar.gz \
&& cd /usr/src/rebar3-src \
&& HOME=$PWD ./bootstrap \
&& install -v ./rebar3 /usr/local/bin/ \
&& rm -rf /usr/src/rebar3-src
1 change: 1 addition & 0 deletions erts/emulator/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -1259,6 +1259,7 @@ OS_OBJS += $(OBJDIR)/erl_poll.o \
$(OBJDIR)/erl_check_io.o \
$(OBJDIR)/erl_mseg.o \
$(OBJDIR)/erl_mmap.o \
$(OBJDIR)/erl_mmap_record.o \
$(OBJDIR)/erl_osenv.o \
$(OBJDIR)/erl_$(ERLANG_OSTYPE)_sys_ddll.o \
$(OBJDIR)/erl_sys_common_misc.o \
Expand Down
42 changes: 42 additions & 0 deletions erts/emulator/beam/atom.c
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,9 @@ init_atom_table(void)

erts_index_init(ERTS_ALC_T_ATOM_TABLE, &erts_atom_table,
"atom_tab", ATOM_SIZE, erts_atom_table_size, f);
erts_alloc_trace_note_alloc("atom_table.index_root",
&erts_atom_table,
sizeof(erts_atom_table));

/* Ordinary atoms. a is a template for creating an entry in the atom table */
for (i = 0; erl_atom_names[i] != 0; i++) {
Expand All @@ -498,6 +501,45 @@ init_atom_table(void)

}

void
init_atom_table_replay(IndexTable *root)
{
int i;
HashFunctions f;
erts_rwmtx_opt_t rwmtx_opt = ERTS_RWMTX_OPT_DEFAULT_INITER;

ASSERT(root != NULL);

rwmtx_opt.type = ERTS_RWMTX_TYPE_FREQUENT_READ;
rwmtx_opt.lived = ERTS_RWMTX_LONG_LIVED;

#ifdef ERTS_ATOM_PUT_OPS_STAT
erts_atomic_init_nob(&atom_put_ops, 0);
#endif

erts_rwmtx_init_opt(&atom_table_lock, &rwmtx_opt, "atom_tab", NIL,
ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC);

erts_atom_table = *root;
f.hash = (H_FUN) atom_hash;
f.cmp = (HCMP_FUN) atom_cmp;
f.alloc = (HALLOC_FUN) atom_alloc;
f.free = (HFREE_FUN) atom_free;
f.meta_alloc = (HMALLOC_FUN) erts_alloc;
f.meta_free = (HMFREE_FUN) erts_free;
f.meta_print = (HMPRINT_FUN) erts_print;
erts_atom_table.htable.fun = f;
erts_index_rebuild_hash_buckets(&erts_atom_table);

atom_space = 0;
for (i = 0; i < erts_atom_table.entries; i++) {
Atom *a = (Atom *) erts_index_lookup(&erts_atom_table, i);
if (a) {
atom_space += a->len;
}
}
}

void
dump_atoms(fmtfn_t to, void *to_arg)
{
Expand Down
2 changes: 1 addition & 1 deletion erts/emulator/beam/atom.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,10 @@ Eterm am_atom_put(const char*, Sint); /* ONLY 7-bit ascii! */
Eterm erts_atom_put(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc);
int erts_atom_put_index(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc);
void init_atom_table(void);
void init_atom_table_replay(IndexTable *root);
void atom_info(fmtfn_t, void *);
void dump_atoms(fmtfn_t, void *);
Uint erts_get_atom_limit(void);
int erts_atom_get(const char* name, Uint len, Eterm* ap, ErtsAtomEncoding enc);
void erts_atom_get_text_space_sizes(Uint *reserved, Uint *used);
#endif

50 changes: 50 additions & 0 deletions erts/emulator/beam/beam_catches.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,28 @@ struct bc_pool {

static struct bc_pool bccix[ERTS_NUM_CODE_IX];

/*
* Expose bccix[] to the struct-root-dump / replay pipeline. The individual
* per-pool tables (bccix[i].beam_catches) are allocated via
* ERTS_ALC_T_CATCHES → long-lived allocator, whose carriers live in the
* default mseg super-carrier, which is already file-backed by the record
* arena. So the table *contents* are persisted for free; what needs
* explicit snapshotting is this small static header array which holds the
* pointers, tabsize, high_mark, and free_list. Without it, every replay
* would see a freshly-initialised (empty) bccix[] and the catch indices
* baked into restored code would resolve to garbage / NULL and produce
* "Catch not found" at the first throw.
*/
void *erts_beam_catches_bccix_ptr(void)
{
return (void *) bccix;
}

UWord erts_beam_catches_bccix_size(void)
{
return sizeof(bccix);
}

void beam_catches_init(void)
{
int i;
Expand All @@ -71,6 +93,34 @@ void beam_catches_init(void)
}
/* For initial load: */
IF_DEBUG(bccix[erts_staging_code_ix()].is_staging = 1);

/*
* Register the whole bccix[] as a snapshot root on record, AFTER the
* fresh table has been allocated so the snapshot captures the
* record-time pointer. On replay, erl_init.c overwrites this array
* from the dump before any code runs.
*/
erts_alloc_trace_note_alloc("beam_catches.bccix",
bccix, sizeof(bccix));
}

/*
* Replay-only: replace bccix[] wholesale with the snapshot bytes loaded
* from struct-root-dumps/NN.beam_catches.bccix.bin. The pointers inside
* refer to addresses in the long-lived allocator's carrier, which the
* default mseg super-carrier restores to the same virtual address.
*
* Leaks the fresh table that beam_catches_init() just allocated; that's
* a tiny permanent waste (one 8 KB block) but keeps the replay path
* simple and avoids running erts_free on an address the allocator no
* longer knows about once we've overwritten its bookkeeping.
*/
void beam_catches_apply_replay_root(const void *src, UWord src_size)
{
if (src_size != sizeof(bccix)) {
return;
}
sys_memcpy(bccix, src, sizeof(bccix));
}


Expand Down
13 changes: 13 additions & 0 deletions erts/emulator/beam/beam_catches.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,19 @@
#define BEAM_CATCHES_NIL (-1)

void beam_catches_init(void);

/*
* Record/replay support. See beam_catches.c for the rationale.
*
* erts_beam_catches_bccix_{ptr,size} - accessors used by the
* struct-root dump code to snapshot the static bccix[] array.
* beam_catches_apply_replay_root - restore bccix[] from the
* previously-dumped bytes during -replay init.
*/
void *erts_beam_catches_bccix_ptr(void);
UWord erts_beam_catches_bccix_size(void);
void beam_catches_apply_replay_root(const void *src, UWord src_size);

void beam_catches_start_staging(void);
void beam_catches_end_staging(int commit);
unsigned beam_catches_cons(ErtsCodePtr cp, unsigned cdr, ErtsCodePtr **);
Expand Down
105 changes: 105 additions & 0 deletions erts/emulator/beam/beam_ranges.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "global.h"
#include "beam_code.h"
#include "erl_unicode.h"
#include "module.h"

typedef struct {
ErtsCodePtr start; /* Pointer to start of module. */
Expand Down Expand Up @@ -416,3 +417,107 @@ erts_find_next_code_for_line(const BeamCodeHeader* code_hdr,

return lt->func_tab[0][line_index];
}

/*
* Rebuild the per-module PC range table from the already-restored module
* table after a record/replay restore. During replay we skip load_preloaded()
* (since module table, atom table, export table, fun table, and code pages
* have all been restored from the struct-root dumps + mmap arena), which
* means erts_update_ranges() was never called. Without ranges,
* erts_find_function_from_pc() always returns NULL and any PC-based
* introspection (tracing, stack traces, exception handling) sees "unknown"
* code, which in turn corrupts VM-level invariants and leads to spurious
* crashes (SIGILL via BTI, "Catch not found", etc.).
*
* We rebuild both code indices directly (bypassing the normal staging dance),
* since the restored active/staging indices are already correct and we do
* not want to advance them.
*/
void
erts_ranges_replay_rebuild(void)
{
ErtsCodeIndex ix;

for (ix = 0; ix < ERTS_NUM_CODE_IX; ix++) {
int i;
int max_modules = module_code_size(ix);
Sint count = 0;
Range *mp;

/* Free any previous allocation (in case this is called twice). */
if (r[ix].modules) {
erts_atomic_add_nob(&mem_used, -r[ix].allocated);
erts_free(ERTS_ALC_T_MODULE_REFS, r[ix].modules);
r[ix].modules = NULL;
r[ix].allocated = 0;
r[ix].n = 0;
}

/* Count entries: one per curr instance with code, plus one per old. */
for (i = 0; i < max_modules; i++) {
Module *modp = module_code(i, ix);
if (!modp) {
continue;
}
if (modp->curr.code_hdr && modp->curr.code_length > 0) {
count++;
}
if (modp->old.code_hdr && modp->old.code_length > 0) {
count++;
}
}

if (count == 0) {
continue;
}

/* Allocate with some slack so future inserts don't immediately
* require reallocation (matches the behaviour of
* erts_start_staging_ranges). */
r[ix].allocated = count + 8;
erts_atomic_add_nob(&mem_used, r[ix].allocated);
r[ix].modules = (Range *) erts_alloc(ERTS_ALC_T_MODULE_REFS,
r[ix].allocated * sizeof(Range));
mp = r[ix].modules;

for (i = 0; i < max_modules; i++) {
Module *modp = module_code(i, ix);
if (!modp) {
continue;
}
if (modp->curr.code_hdr && modp->curr.code_length > 0) {
mp->start = (ErtsCodePtr) modp->curr.code_hdr;
erts_atomic_init_nob(&mp->end,
(erts_aint_t)
(((byte *) modp->curr.code_hdr)
+ modp->curr.code_length));
mp++;
}
if (modp->old.code_hdr && modp->old.code_length > 0) {
mp->start = (ErtsCodePtr) modp->old.code_hdr;
erts_atomic_init_nob(&mp->end,
(erts_aint_t)
(((byte *) modp->old.code_hdr)
+ modp->old.code_length));
mp++;
}
}

r[ix].n = mp - r[ix].modules;
qsort(r[ix].modules, r[ix].n, sizeof(Range),
(int (*)(const void *, const void *)) rangecompare);
erts_atomic_set_nob(&r[ix].mid,
(erts_aint_t) (r[ix].modules + r[ix].n / 2));

if (r[ix].allocated > (Sint) erts_dump_num_lit_areas) {
erts_dump_num_lit_areas = r[ix].allocated * 2;
erts_dump_lit_areas = (ErtsLiteralArea **)
erts_realloc(ERTS_ALC_T_CRASH_DUMP,
(void *) erts_dump_lit_areas,
erts_dump_num_lit_areas
* sizeof(ErtsLiteralArea *));
}

CHECK(&r[ix]);
}
}
Loading