From 16cf8e43a6f30664367dc3343361e9f3149a3a49 Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Fri, 27 Mar 2026 19:49:30 -0500 Subject: [PATCH 01/31] wip: blob 1 --- pkg/noun/allocate.c | 23 ++- pkg/noun/allocate.h | 34 ++++ pkg/noun/imprison.c | 23 +++ pkg/noun/imprison.h | 8 + pkg/noun/options.h | 1 + pkg/noun/retrieve.c | 197 ++++++++++++++++++++- pkg/noun/retrieve.h | 9 + pkg/noun/serial.c | 417 ++++++++++++++++++++++++++++++++++++++++++++ pkg/noun/serial.h | 31 ++++ pkg/noun/version.h | 3 +- pkg/vere/blob.c | 378 +++++++++++++++++++++++++++++++++++++++ pkg/vere/blob.h | 81 +++++++++ pkg/vere/build.zig | 2 + pkg/vere/disk.c | 114 ++++++++++-- pkg/vere/io/mesa.c | 22 ++- pkg/vere/io/unix.c | 266 +++++++++++++++++++++------- pkg/vere/lord.c | 26 ++- pkg/vere/mars.c | 31 +++- pkg/vere/newt.c | 23 ++- pkg/vere/vere.h | 6 + 20 files changed, 1592 insertions(+), 103 deletions(-) create mode 100644 pkg/vere/blob.c create mode 100644 pkg/vere/blob.h diff --git a/pkg/noun/allocate.c b/pkg/noun/allocate.c index 2319b60c78..8b7d63edaf 100644 --- a/pkg/noun/allocate.c +++ b/pkg/noun/allocate.c @@ -502,7 +502,10 @@ _me_gain_use(u3_noun dog) static inline u3_atom _ca_take_atom(u3a_atom* old_u) { - c3_w* new_w = u3a_walloc(old_u->len_w + c3_wiseof(u3a_atom)); + // use masked length; bob atoms carry u3a_blob_flag in len_w + // + c3_w dat_w = old_u->len_w & u3a_blob_mask; + c3_w* new_w = u3a_walloc(dat_w + c3_wiseof(u3a_atom)); u3a_atom* new_u = (u3a_atom*)(void *)new_w; u3_noun new = u3a_to_pug(u3a_outa(new_u)); @@ -526,7 +529,7 @@ _ca_take_atom(u3a_atom* old_u) { c3_w i_w; - for ( i_w=0; i_w < old_u->len_w; i_w++ ) { + for ( i_w=0; i_w < dat_w; i_w++ ) { new_u->buf_w[i_w] = old_u->buf_w[i_w]; } } @@ -902,6 +905,14 @@ _me_lose_north(u3_noun dog) } } else { + // notify blob store when a bob atom is freed + // + if ( (c3y == u3a_is_bob(dog)) + && (u3C.bob_free_f) ) + { + u3a_atom* atm_u = (u3a_atom*)box_u; + u3C.bob_free_f(atm_u->mug_h, atm_u->buf_w[0]); + } u3a_wfree(box_u); } } @@ -941,6 +952,14 @@ _me_lose_south(u3_noun dog) } } else { + // notify blob store when a bob atom is freed + // + if ( (c3y == u3a_is_bob(dog)) + && (u3C.bob_free_f) ) + { + u3a_atom* atm_u = (u3a_atom*)box_u; + u3C.bob_free_f(atm_u->mug_h, atm_u->buf_w[0]); + } u3a_wfree(box_u); } } diff --git a/pkg/noun/allocate.h b/pkg/noun/allocate.h index 2b92799eaa..4d8abd1121 100644 --- a/pkg/noun/allocate.h +++ b/pkg/noun/allocate.h @@ -333,6 +333,12 @@ STATIC_ASSERT( u3a_vits <= u3a_min_log, /* u3a_is_cell: yes if noun [som] is cell. */ # define u3a_is_cell(som) u3a_is_pom(som) + + /* u3a_blob_flag: MSB of u3a_atom.len_w marks an indirect atom as a bob + ** (blob reference). The remaining 31 bits hold the actual data word count. + */ +# define u3a_blob_flag ((c3_w)0x80000000u) +# define u3a_blob_mask ((c3_w)0x7FFFFFFFu) # define u3du(som) u3a_is_cell(som) /* u3a_h(): get head of cell [som]. Bail if [som] is not cell. @@ -615,6 +621,34 @@ typedef struct { return (pil_u->top_p == u3R->cap_p) ? c3y : c3n; } + /* u3a_is_bob(): yes if [som] is an indirect atom flagged as a bob (blob ref). + ** Follows naming convention: u3a_is_cat, u3a_is_pug, u3a_is_pom, u3a_is_bob. + */ + static inline c3_o + u3a_is_bob(u3_atom som) { + if ( c3n == u3a_is_pug(som) ) return c3n; + u3a_atom* atm_u = u3a_to_ptr(som); + return (atm_u->len_w & u3a_blob_flag) ? c3y : c3n; + } + + /* u3a_bob_mug(): 31-bit mug of a bob atom's content (= blob directory name). + ** [som] must be a bob atom. + */ + static inline c3_h + u3a_bob_mug(u3_atom som) { + u3a_atom* atm_u = u3a_to_ptr(som); + return atm_u->mug_h; + } + + /* u3a_bob_seq(): sequence number of a bob atom within its mug bucket. + ** [som] must be a bob atom. + */ + static inline c3_w + u3a_bob_seq(u3_atom som) { + u3a_atom* atm_u = u3a_to_ptr(som); + return atm_u->buf_w[0]; + } + /** Functions. **/ diff --git a/pkg/noun/imprison.c b/pkg/noun/imprison.c index 8a7fb5e55f..63990f4909 100644 --- a/pkg/noun/imprison.c +++ b/pkg/noun/imprison.c @@ -885,3 +885,26 @@ u3i_molt(u3_noun som, ...) u3z(som); return pro; } + +/* u3i_blob(): construct a bob atom (blob reference). +** +** A bob atom is an indirect atom with the MSB of len_w set. +** [mug_h] is the 31-bit mug of the content (stored in mug_h and used +** as the blob directory name). +** [seq_w] is the sequence number within $pier/.urb/bob//. +*/ +u3_atom +u3i_blob(c3_h mug_h, c3_w seq_w) +{ + // allocate: u3a_atom header + 1 word for seq_w + // + c3_w* nov_w = u3a_walloc(1 + c3_wiseof(u3a_atom)); + u3a_atom* vat_u = (void *)nov_w; + + vat_u->use_w = 1; + vat_u->mug_h = mug_h; + vat_u->len_w = 1 | u3a_blob_flag; // 1 word of payload + bob flag + vat_u->buf_w[0] = seq_w; + + return u3a_to_pug(u3a_outa(nov_w)); +} diff --git a/pkg/noun/imprison.h b/pkg/noun/imprison.h index ed075e0c8b..fc2d15fd3d 100644 --- a/pkg/noun/imprison.h +++ b/pkg/noun/imprison.h @@ -95,6 +95,14 @@ u3i_bytes(c3_w a_w, const c3_y* b_y); + /* u3i_blob(): construct a bob atom (blob reference). + ** + ** [mug_h] is the 31-bit mug of the blob content (= blob directory name). + ** [seq_w] is the sequence number within $pier/.urb/bob//. + */ + u3_atom + u3i_blob(c3_h mug_h, c3_w seq_w); + /* u3i_words(): Copy [a] words from [b] into an atom. */ u3_atom diff --git a/pkg/noun/options.h b/pkg/noun/options.h index 298a02d674..aff1f72765 100644 --- a/pkg/noun/options.h +++ b/pkg/noun/options.h @@ -23,6 +23,7 @@ void (*slog_f)(u3_noun); // function pointer for slog void (*sign_hold_f)(void); // suspend system signal regime void (*sign_move_f)(void); // restore system signal regime + void (*bob_free_f)(c3_h, c3_w); // blob freed: mug_h, seq_w } u3o_config; /* u3o_flag: process/system flags. diff --git a/pkg/noun/retrieve.c b/pkg/noun/retrieve.c index 5658cb7faf..d5566c4439 100644 --- a/pkg/noun/retrieve.c +++ b/pkg/noun/retrieve.c @@ -6,9 +6,18 @@ #include "hashtable.h" #include "imprison.h" #include "murmur3.h" +#include "options.h" #include "trace.h" #include "xtract.h" +#include +#include +#include +#include +#include +#include +#include + // declarations of inline functions // @@ -321,6 +330,31 @@ _cr_sing_atom(u3_atom a, u3_noun b) return c3n; } else { + c3_o a_bob = u3a_is_bob(a); + c3_o b_bob = u3a_is_bob(b); + + // bob vs bob: equal iff same mug bucket and sequence number + // + if ( (c3y == a_bob) && (c3y == b_bob) ) { + u3a_atom* a_u = u3a_to_ptr(a); + u3a_atom* b_u = u3a_to_ptr(b); + return ( (a_u->mug_h == b_u->mug_h) + && (a_u->buf_w[0] == b_u->buf_w[0]) ) ? c3y : c3n; + } + // bob vs normal (or normal vs bob): materialize the bob + // + if ( (c3y == a_bob) || (c3y == b_bob) ) { + u3_atom bob = (c3y == a_bob) ? a : (u3_atom)b; + u3_atom nrm = (c3y == a_bob) ? (u3_atom)b : a; + u3_atom mat = u3r_blob_load(bob, u3C.dir_c); + if ( u3_none == mat ) { + return c3n; + } + c3_o ret_o = _cr_sing_atom(mat, nrm); + u3z(mat); + return ret_o; + } + u3a_atom* a_u = u3a_to_ptr(a); u3a_atom* b_u = u3a_to_ptr(b); @@ -988,6 +1022,18 @@ u3r_met(c3_y a_y, daz_w = b; } else { + // materialize bob atoms before measuring + // + if ( c3y == u3a_is_bob(b) ) { + u3_atom mat = u3r_blob_load(b, u3C.dir_c); + if ( u3_none == mat ) { + return (c3_w)u3m_bail(c3__fail); + } + c3_w ret_w = u3r_met(a_y, mat); + u3z(mat); + return ret_w; + } + u3a_atom* b_u = u3a_to_ptr(b); gal_w = (b_u->len_w) - 1; @@ -1031,6 +1077,18 @@ u3r_bit(c3_w a_w, else return (1 & (b >> a_w)); } else { + // materialize bob atoms before extracting bit + // + if ( c3y == u3a_is_bob(b) ) { + u3_atom mat = u3r_blob_load(b, u3C.dir_c); + if ( u3_none == mat ) { + return 0; + } + c3_b ret_b = u3r_bit(a_w, mat); + u3z(mat); + return ret_b; + } + u3a_atom* b_u = u3a_to_ptr(b); c3_y vut_y = (a_w & (u3a_word_bits - 1)); c3_w pix_w = (a_w >> u3a_word_bits_log); @@ -1101,6 +1159,19 @@ u3r_bytes(c3_w a_w, } } else { + // materialize bob atoms before extracting bytes + // + if ( c3y == u3a_is_bob(d) ) { + u3_atom mat = u3r_blob_load(d, u3C.dir_c); + if ( u3_none == mat ) { + memset(c_y, 0, b_w); + return; + } + u3r_bytes(a_w, b_w, c_y, mat); + u3z(mat); + return; + } + u3a_atom* d_u = u3a_to_ptr(d); c3_w n_w = d_u->len_w << u3a_word_bytes_shift; c3_y* x_y = (c3_y*)d_u->buf_w + a_w; @@ -1273,6 +1344,18 @@ u3r_half(c3_w a_w, #endif } else { + // materialize bob atoms before extracting half-word + // + if ( c3y == u3a_is_bob(b) ) { + u3_atom mat = u3r_blob_load(b, u3C.dir_c); + if ( u3_none == mat ) { + return 0; + } + c3_h ret_h = u3r_half(a_w, mat); + u3z(mat); + return ret_h; + } + u3a_atom* b_u = u3a_to_ptr(b); #ifdef VERE64 if ( a_w >= (b_u->len_w * 2) ) { @@ -1408,6 +1491,19 @@ u3r_halfs(c3_w a_w, } } else { + // materialize bob atoms before extracting half-words + // + if ( c3y == u3a_is_bob(d) ) { + u3_atom mat = u3r_blob_load(d, u3C.dir_c); + if ( u3_none == mat ) { + memset((c3_y*)c_h, 0, b_w << u3a_half_bytes_shift); + return; + } + u3r_halfs(a_w, b_w, c_h, mat); + u3z(mat); + return; + } + c3_w len_w; c3_h* buf_h; // XX: 64 little endian. very ugly! @@ -1464,6 +1560,19 @@ u3r_chubs(c3_w a_w, } } else { + // materialize bob atoms before extracting chubs + // + if ( c3y == u3a_is_bob(d) ) { + u3_atom mat = u3r_blob_load(d, u3C.dir_c); + if ( u3_none == mat ) { + memset((c3_y*)c_d, 0, b_w << u3a_chub_bytes_shift); + return; + } + u3r_chubs(a_w, b_w, c_d, mat); + u3z(mat); + return; + } + u3a_atom* d_u = u3a_to_ptr(d); #ifndef VERE64 c3_w len_w = d_u->len_w * 2; @@ -1985,7 +2094,24 @@ _cr_mug_next(u3a_pile* pil_u, u3_noun veb) // else if ( c3y == u3a_is_atom(veb) ) { u3a_atom* vat_u = (u3a_atom*)veb_u; - c3_h mug_h = u3r_mug_words(vat_u->buf_w, vat_u->len_w); + c3_h mug_h; + // bob atoms: mug was set from blob content hash on creation; + // materialize only if somehow missing (should not occur) + // + if ( c3y == u3a_is_bob(veb) ) { + if ( vat_u->mug_h ) { + return (c3_h)vat_u->mug_h; + } + u3_atom mat = u3r_blob_load(veb, u3C.dir_c); + if ( u3_none == mat ) { + return (c3_h)u3m_bail(c3__fail); + } + mug_h = u3r_mug(mat); + u3z(mat); + } + else { + mug_h = u3r_mug_words(vat_u->buf_w, vat_u->len_w); + } vat_u->mug_h = mug_h; return mug_h; } @@ -2168,6 +2294,12 @@ u3r_word_buffer(u3_atom* a, c3_w* len_w) *len_w = 1; return a; } + // bob atoms have no in-loom word buffer; caller must materialize first + // + if ( c3y == u3a_is_bob(*a) ) { + u3m_bail(c3__fail); + return 0; // unreachable + } u3a_atom* pug_u = u3a_to_ptr(*a); *len_w = pug_u->len_w; return pug_u->buf_w; @@ -2209,3 +2341,66 @@ u3r_comp(u3_atom a, u3_atom b) return 0; } + +/* u3r_blob_load(): materialize a bob atom by loading from the blob store. +** +** Opens $pier/.urb/bob// and constructs a normal atom. +** Returns u3_none on any error. +*/ +u3_weak +u3r_blob_load(u3_atom a, const c3_c* pax_c) +{ + u3_assert( c3y == u3a_is_bob(a) ); + + c3_h mug_h = u3a_bob_mug(a); + c3_w seq_w = u3a_bob_seq(a); + + // build path: $pier/.urb/bob// + // + c3_c fil_c[8192]; + snprintf(fil_c, sizeof(fil_c), "%s/.urb/bob/%" PRIc3_h "/%" PRIc3_w, + pax_c, mug_h, seq_w); + + struct stat st_u; + if ( -1 == stat(fil_c, &st_u) ) { + fprintf(stderr, "retrieve: blob missing %s: %s\r\n", + fil_c, strerror(errno)); + return u3_none; + } + + c3_d len_d = (c3_d)st_u.st_size; + c3_i fid_i = open(fil_c, O_RDONLY); + if ( -1 == fid_i ) { + fprintf(stderr, "retrieve: blob open failed %s: %s\r\n", + fil_c, strerror(errno)); + return u3_none; + } + + // allocate a temporary C-heap buffer, read, then copy into loom + // + c3_y* dat_y = c3_malloc(len_d); + c3_d rem_d = len_d; + c3_y* ptr_y = dat_y; + while ( rem_d > 0 ) { + // cap each read() to 1 GiB: macOS returns EINVAL for count > INT_MAX + // + size_t ask_i = ( rem_d > 0x40000000UL ) ? 0x40000000UL : (size_t)rem_d; + ssize_t got_i = read(fid_i, ptr_y, ask_i); + if ( got_i <= 0 ) { + fprintf(stderr, "retrieve: blob read failed %s: %s\r\n", + fil_c, strerror(errno)); + close(fid_i); + c3_free(dat_y); + return u3_none; + } + ptr_y += got_i; + rem_d -= got_i; + } + close(fid_i); + + // u3i_bytes takes c3_w (32-bit) length; safe for <4GiB blobs + // + u3_noun res = u3i_bytes((c3_w)len_d, dat_y); + c3_free(dat_y); + return res; +} diff --git a/pkg/noun/retrieve.h b/pkg/noun/retrieve.h index 77e14f4232..e8a7abb6da 100644 --- a/pkg/noun/retrieve.h +++ b/pkg/noun/retrieve.h @@ -606,4 +606,13 @@ c3_ys u3r_comp(u3_atom a, u3_atom b); + /* u3r_blob_load(): materialize a bob atom by loading from the blob store. + ** + ** Returns a normal indirect atom with the blob's bytes, or u3_none on + ** failure. [pax_c] is the pier path ($pier/). + ** Does NOT consume [a]; caller must manage refcounts as usual. + */ + u3_weak + u3r_blob_load(u3_atom a, const c3_c* pax_c); + #endif /* ifndef U3_RETRIEVE_H */ diff --git a/pkg/noun/serial.c b/pkg/noun/serial.c index a4b3f293cb..594ebb49f3 100644 --- a/pkg/noun/serial.c +++ b/pkg/noun/serial.c @@ -7,8 +7,10 @@ #include "allocate.h" #include "hashtable.h" +#include "imprison.h" #include "jets/k.h" #include "jets/q.h" +#include "options.h" #include "retrieve.h" #include "serial.h" #include "ur/ur.h" @@ -1469,3 +1471,418 @@ u3s_sift_ud(u3_atom a) return u3s_sift_ud_bytes(len_w, byt_y); } + +/* +** Ram/Tap — reference-aware serialization +** +** Ram extends jam's bit-encoding with a 2-bit fixed tag: +** 00 = normal atom (mat-encoded value follows) +** 01 = blob ref (mat-encoded mug, then mat-encoded seq) +** 10 = cell +** 11 = backref (mat-encoded bit-position follows) +** +** All tags are exactly 2 bits (LSB first). +** Wire format: ["RAM\0" 4B][0x01 1B][ram_bits...] +** +** Ram does NOT blobify atoms. Caller must ensure large atoms are +** already bob atoms before calling u3s_ram_xeno(). +*/ + +#define U3S_RAM_MAGIC "\x52\x41\x4d\x00" /* "RAM\0" */ +#define U3S_RAM_VERSION 0x01 + +/* Ram 2-bit tag values (LSB first) +*/ +#define U3S_RAM_TAG_ATOM 0 /* 00 */ +#define U3S_RAM_TAG_BOB 1 /* 01 */ +#define U3S_RAM_TAG_CELL 2 /* 10 */ +#define U3S_RAM_TAG_BACK 3 /* 11 */ + +/* _ram_xeno_t: state for the ram encoding walk. +*/ +typedef struct _ram_xeno_s { + u3p(u3h_root) har_p; + ur_bsw_t rit_u; +} _ram_xeno_t; + +/* _cs_ram_bsw_2tag(): write a 2-bit ram tag to the bitstream. +*/ +static inline void +_cs_ram_bsw_2tag(ur_bsw_t* rit_u, c3_y tag_y) +{ + ur_bsw8(rit_u, 2, tag_y & 3); +} + +/* _cs_ram_bsw_normal_atom(): encode a normal (non-bob) atom as tag 00 + mat. +*/ +static inline void +_cs_ram_bsw_normal_atom(ur_bsw_t* rit_u, c3_w met_w, u3_atom a) +{ + // write 2-bit tag 00 + // + _cs_ram_bsw_2tag(rit_u, U3S_RAM_TAG_ATOM); + + if ( c3y == u3a_is_cat(a) ) { + ur_bsw_mat64(rit_u, (c3_y)met_w, (c3_d)a); + } + else { + u3a_atom* vat_u = u3a_to_ptr(a); + c3_y* byt_y = (c3_y*)vat_u->buf_w; + ur_bsw_mat_bytes(rit_u, (c3_d)met_w, byt_y); + } +} + +/* _cs_ram_bsw_bob(): encode a bob atom as tag 01 + mat(mug) + mat(seq). +*/ +static inline void +_cs_ram_bsw_bob(ur_bsw_t* rit_u, u3_atom a) +{ + c3_h mug_h = u3a_bob_mug(a); + c3_w seq_w = u3a_bob_seq(a); + + // write 2-bit tag 01 + // + _cs_ram_bsw_2tag(rit_u, U3S_RAM_TAG_BOB); + + // write mat(mug) and mat(seq) + // + ur_bsw_mat64(rit_u, u3r_met(0, (u3_atom)mug_h), (c3_d)mug_h); + ur_bsw_mat64(rit_u, u3r_met(0, (u3_atom)seq_w), (c3_d)seq_w); +} + +/* _cs_ram_bsw_back(): encode a backref as tag 11 + mat(bit-position). +*/ +static inline void +_cs_ram_bsw_back(ur_bsw_t* rit_u, c3_w met_w, u3_atom a) +{ + c3_d bak_d = ( c3y == u3a_is_cat(a) ) + ? (c3_d)a + : u3r_chub(0, a); + + // write 2-bit tag 11 + // + _cs_ram_bsw_2tag(rit_u, U3S_RAM_TAG_BACK); + ur_bsw_mat64(rit_u, (c3_y)met_w, bak_d); +} + +/* _cs_ram_xeno_atom(): encode atom (or backref) in ram bitstream. +*/ +static void +_cs_ram_xeno_atom(u3_atom a, void* ptr_v) +{ + _ram_xeno_t* ram_u = ptr_v; + ur_bsw_t* rit_u = &(ram_u->rit_u); + u3_weak bak = u3h_git(ram_u->har_p, a); + c3_o bob_o = u3a_is_bob(a); + // only compute met for non-bob atoms; bobs use mug+seq encoding + // + c3_w met_w = (c3n == bob_o) ? u3r_met(0, a) : 0; + + if ( u3_none == bak ) { + u3h_put(ram_u->har_p, a, _cs_coin_chub(rit_u->bits)); + if ( c3y == bob_o ) { + _cs_ram_bsw_bob(rit_u, a); + } + else { + _cs_ram_bsw_normal_atom(rit_u, met_w, a); + } + } + else { + c3_w bak_w = u3r_met(0, bak); + + if ( met_w <= bak_w ) { + if ( c3y == bob_o ) { + _cs_ram_bsw_bob(rit_u, a); + } + else { + _cs_ram_bsw_normal_atom(rit_u, met_w, a); + } + } + else { + _cs_ram_bsw_back(rit_u, bak_w, bak); + } + } +} + +/* _cs_ram_xeno_cell(): encode cell (or backref) in ram bitstream. +*/ +static c3_o +_cs_ram_xeno_cell(u3_noun a, void* ptr_v) +{ + _ram_xeno_t* ram_u = ptr_v; + ur_bsw_t* rit_u = &(ram_u->rit_u); + u3_weak bak = u3h_git(ram_u->har_p, a); + + if ( u3_none == bak ) { + u3h_put(ram_u->har_p, a, _cs_coin_chub(rit_u->bits)); + // write 2-bit tag 10 (cell) + // + _cs_ram_bsw_2tag(rit_u, U3S_RAM_TAG_CELL); + return c3y; + } + else { + _cs_ram_bsw_back(rit_u, u3r_met(0, bak), bak); + return c3n; + } +} + +/* u3s_ram_xeno(): ram with off-loom buffer (re-)allocation. +*/ +c3_d +u3s_ram_xeno(u3_noun a, c3_d* len_d, c3_y** byt_y) +{ + _ram_xeno_t ram_u = {0}; + ur_bsw_init(&ram_u.rit_u, ur_fib11, ur_fib12); + ram_u.har_p = u3h_new(); + + u3a_walk_fore(a, &ram_u, _cs_ram_xeno_atom, _cs_ram_xeno_cell); + + u3h_free(ram_u.har_p); + + { + c3_d raw_bytes_d; // byte count of raw ram bits + c3_y* raw_y; + c3_d out_d; + c3_y* out_y; + + // ur_bsw_done() returns bit count, sets raw_bytes_d to byte count + // + ur_bsw_done(&ram_u.rit_u, &raw_bytes_d, &raw_y); + + // prepend 5-byte header: "RAM\0" + version + // + out_d = 5 + raw_bytes_d; + out_y = malloc(out_d); + if ( !out_y ) { + free(raw_y); + *len_d = 0; + return 0; + } + + memcpy(out_y, U3S_RAM_MAGIC, 4); + out_y[4] = U3S_RAM_VERSION; + memcpy(out_y + 5, raw_y, raw_bytes_d); + free(raw_y); + + *len_d = out_d; + *byt_y = out_y; + return out_d; + } +} + +/* +** Tap — ram deserialization +*/ + +/* _tap_frame_t: stack frame for cell reconstruction. +*/ +typedef struct _tap_frame_s { + u3_weak ref; // taken head, or u3_none if still on head + c3_d bit_d; // bit position of this noun +} _tap_frame_t; + +/* _cs_tap_xeno_next(): read next value from ram bitstream. +*/ +static inline ur_cue_res_e +_cs_tap_xeno_next(u3a_pile* pil_u, + ur_bsr_t* red_u, + ur_dictn_t* dic_u, + u3_noun* out) +{ + ur_root_t* rot_u = 0; + + while ( 1 ) { + c3_d len_d; + c3_d bit_d = red_u->bits; + c3_y tag_y; + ur_cue_res_e res_e; + + // read 2-bit ram tag (LSB first) + // + tag_y = ur_bsr8_any(red_u, 2); + + switch ( tag_y ) { + default: + return ur_cue_gone; + + case U3S_RAM_TAG_CELL: { + // push a head-frame and continue reading head + // + _tap_frame_t* fam_u = u3a_push(pil_u); + fam_u->ref = u3_none; + fam_u->bit_d = bit_d; + continue; + } + + case U3S_RAM_TAG_BACK: { + if ( ur_cue_good != (res_e = ur_bsr_rub_len(red_u, &len_d)) ) { + return res_e; + } + else if ( 62 < len_d ) { + return ur_cue_meme; + } + else { + c3_d bak_d = ur_bsr64_any(red_u, len_d); + c3_w bak_w; + + if ( !ur_dictn_get(rot_u, dic_u, bak_d, &bak_w) ) { + return ur_cue_back; + } + + *out = u3k((u3_noun)bak_w); + return ur_cue_good; + } + } + + case U3S_RAM_TAG_ATOM: { + // read mat-encoded value (same as cue atom path) + // + if ( ur_cue_good != (res_e = ur_bsr_rub_len(red_u, &len_d)) ) { + return res_e; + } + + if ( (u3a_word_bits - 1) >= len_d ) { + *out = (u3_noun)ur_bsrn_any(red_u, len_d); + } + else { + c3_d byt_d = (len_d + 0x7) >> 3; + u3i_slab sab_u; + + if ( c3_w_max < byt_d ) { + return ur_cue_meme; + } + u3i_slab_init(&sab_u, 3, byt_d); + ur_bsr_bytes_any(red_u, len_d, sab_u.buf_y); + *out = u3i_slab_mint_bytes(&sab_u); + } + + ur_dictn_put(rot_u, dic_u, bit_d, *out); + return ur_cue_good; + } + + case U3S_RAM_TAG_BOB: { + // read mat(mug) + mat(seq) and produce bob atom + // + c3_d mug_d, seq_d; + + if ( ur_cue_good != (res_e = ur_bsr_rub_len(red_u, &len_d)) ) { + return res_e; + } + else if ( 62 < len_d ) { + return ur_cue_meme; + } + mug_d = ur_bsr64_any(red_u, len_d); + + if ( ur_cue_good != (res_e = ur_bsr_rub_len(red_u, &len_d)) ) { + return res_e; + } + else if ( 62 < len_d ) { + return ur_cue_meme; + } + seq_d = ur_bsr64_any(red_u, len_d); + + *out = u3i_blob((c3_h)mug_d, (c3_w)seq_d); + + ur_dictn_put(rot_u, dic_u, bit_d, *out); + return ur_cue_good; + } + } + } +} + +/* _cs_tap_xeno(): tap on-loom, with off-loom dictionary. +*/ +static u3_weak +_cs_tap_xeno(u3_cue_xeno* sil_u, + c3_d len_d, + const c3_y* byt_y) +{ + ur_bsr_t red_u = {0}; + ur_dictn_t* dic_u = &sil_u->dic_u; + u3a_pile pil_u; + _tap_frame_t* fam_u; + ur_cue_res_e res_e; + u3_noun ref; + + u3a_pile_prep(&pil_u, sizeof(*fam_u)); + + if ( ur_cue_good != (res_e = ur_bsr_init(&red_u, len_d, byt_y)) ) { + return u3_none; + } + else if ( 0x7ffffffffffffffULL < len_d ) { + return u3_none; + } + + res_e = _cs_tap_xeno_next(&pil_u, &red_u, dic_u, &ref); + + if ( (c3n == u3a_pile_done(&pil_u)) + && (ur_cue_good == res_e) ) + { + fam_u = u3a_peek(&pil_u); + + do { + // head-frame: stash result and read the tail + // + if ( u3_none == fam_u->ref ) { + fam_u->ref = ref; + res_e = _cs_tap_xeno_next(&pil_u, &red_u, dic_u, &ref); + fam_u = u3a_peek(&pil_u); + } + // tail-frame: build cell and pop stack + // + else { + ur_root_t* rot_u = 0; + ref = u3nc(fam_u->ref, ref); + ur_dictn_put(rot_u, dic_u, fam_u->bit_d, ref); + fam_u = u3a_pop(&pil_u); + } + } + while ( (c3n == u3a_pile_done(&pil_u)) + && (ur_cue_good == res_e) ); + } + + if ( ur_cue_good == res_e ) { + return ref; + } + + // on failure, unwind and discard intermediates + // + if ( c3n == u3a_pile_done(&pil_u) ) { + do { + if ( u3_none != fam_u->ref ) { + u3z(fam_u->ref); + } + fam_u = u3a_pop(&pil_u); + } + while ( c3n == u3a_pile_done(&pil_u) ); + } + + return u3_none; +} + +/* u3s_tap_xeno(): tap on-loom, with off-loom dictionary. +*/ +u3_weak +u3s_tap_xeno(c3_d len_d, const c3_y* byt_y) +{ + // validate header + // + if ( (len_d < 5) + || (0 != memcmp(byt_y, U3S_RAM_MAGIC, 4)) + || (U3S_RAM_VERSION != byt_y[4]) ) + { + return u3_none; + } + + { + u3_cue_xeno* sil_u = u3s_cue_xeno_init(); + u3_weak som; + + // decode after the 5-byte header + // + som = _cs_tap_xeno(sil_u, len_d - 5, byt_y + 5); + ur_dictn_wipe(&sil_u->dic_u); + u3s_cue_xeno_done(sil_u); + return som; + } +} diff --git a/pkg/noun/serial.h b/pkg/noun/serial.h index d7dd5a8eaa..99838e1352 100644 --- a/pkg/noun/serial.h +++ b/pkg/noun/serial.h @@ -134,4 +134,35 @@ u3_weak u3s_sift_ud(u3_atom a); + /* Ram/Tap — reference-aware serialization that encodes bob atoms. + ** + ** Ram extends jam with a 2-bit tag scheme: + ** 00 = normal atom (costs 1 extra bit vs jam) + ** 01 = blob ref (mat(mug) + mat(seq)) + ** 10 = cell + ** 11 = backref + ** + ** Wire format: [magic "RAM\0" 4B][version 0x01 1B][ram_bits...] + */ + + /* u3s_ram_xeno(): ram with off-loom buffer (re-)allocation. + ** + ** Encodes bob atoms as 01-tagged blob refs. + ** Normal atoms (including large non-bob atoms) are 00-tagged. + ** Does NOT blobify atoms — caller must do that first. + ** + ** Returns number of bytes written (including 5-byte header). + ** On error returns 0 and *byt_y is unchanged. + */ + c3_d + u3s_ram_xeno(u3_noun a, c3_d* len_d, c3_y** byt_y); + + /* u3s_tap_xeno(): tap on-loom, with off-loom dictionary. + ** + ** Decodes ram bytes. 01-tagged blob refs become bob atoms. + ** Returns u3_none on any error. + */ + u3_weak + u3s_tap_xeno(c3_d len_d, const c3_y* byt_y); + #endif /* ifndef U3_SERIAL_H */ diff --git a/pkg/noun/version.h b/pkg/noun/version.h index 92b754f83d..924ce78275 100644 --- a/pkg/noun/version.h +++ b/pkg/noun/version.h @@ -43,6 +43,7 @@ typedef c3_h u3e_version; */ #define U3E_VER1 1 // north+south.bin #define U3E_VER2 2 // image.bin -#define U3E_VERLAT U3E_VER2 +#define U3E_VER3 3 // image.bin + blobs.txt + ram events +#define U3E_VERLAT U3E_VER3 #endif /* ifndef U3_VERSION_H */ diff --git a/pkg/vere/blob.c b/pkg/vere/blob.c new file mode 100644 index 0000000000..99d9896352 --- /dev/null +++ b/pkg/vere/blob.c @@ -0,0 +1,378 @@ +/// @file + +#include "blob.h" +#include "vere.h" + +#include +#include +#include +#include +#include +#include + +// maximum bytes per single read()/write() call. +// POSIX allows read()/write() to return EINVAL if count > SSIZE_MAX; +// macOS returns EINVAL if count > INT_MAX. cap conservatively at 1 GiB. +// +#define BLOB_IO_MAX ((size_t)0x40000000UL) + +/* _blob_bob_dir(): write path to $pier/.urb/bob/ into [out_c]. +*/ +static void +_blob_bob_dir(c3_c* out_c, const c3_c* pax_c) +{ + snprintf(out_c, 8192, "%s/.urb/bob", pax_c); +} + +/* _blob_mug_dir(): write path to $pier/.urb/bob// into [out_c]. +*/ +static void +_blob_mug_dir(c3_c* out_c, const c3_c* pax_c, c3_h mug_h) +{ + snprintf(out_c, 8192, "%s/.urb/bob/%" PRIc3_h, pax_c, mug_h); +} + +/* _blob_lock_path(): write path to $pier/.urb/bob//lock into [out_c]. +*/ +static void +_blob_lock_path(c3_c* out_c, const c3_c* pax_c, c3_h mug_h) +{ + snprintf(out_c, 8192, "%s/.urb/bob/%" PRIc3_h "/lock", pax_c, mug_h); +} + +/* u3_blob_path(): write filesystem path for a blob into [out_c]. +*/ +void +u3_blob_path(c3_c* out_c, const c3_c* pax_c, c3_h mug_h, c3_w seq_w) +{ + snprintf(out_c, 8192, "%s/.urb/bob/%" PRIc3_h "/%" PRIc3_w, + pax_c, mug_h, seq_w); +} + +/* u3_blob_init(): initialize blob store; create .urb/bob/ if needed. +*/ +void +u3_blob_init(const c3_c* pax_c) +{ + c3_c bob_c[8192]; + _blob_bob_dir(bob_c, pax_c); + + if ( 0 != c3_mkdir(bob_c, 0700) && EEXIST != errno ) { + fprintf(stderr, "blob: failed to create %s: %s\r\n", + bob_c, strerror(errno)); + } +} + +/* _blob_lock_acquire(): acquire mug bucket lock, return next seq number. +** +** Creates the mug directory and lockfile if needed. +** Returns 0 on failure. +*/ +static c3_w +_blob_lock_acquire(const c3_c* pax_c, c3_h mug_h) +{ + c3_c dir_c[8192]; + c3_c lck_c[8192]; + _blob_mug_dir(dir_c, pax_c, mug_h); + _blob_lock_path(lck_c, pax_c, mug_h); + + // create mug bucket directory if needed + if ( 0 != c3_mkdir(dir_c, 0700) && EEXIST != errno ) { + fprintf(stderr, "blob: failed to create bucket %s: %s\r\n", + dir_c, strerror(errno)); + return 0; + } + + // open lockfile, creating if needed + c3_i lok_i = c3_open(lck_c, O_RDWR | O_CREAT, 0600); + if ( -1 == lok_i ) { + fprintf(stderr, "blob: failed to open lock %s: %s\r\n", + lck_c, strerror(errno)); + return 0; + } + + // exclusive advisory lock + struct flock flk_u = { + .l_type = F_WRLCK, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 0, + }; + if ( -1 == fcntl(lok_i, F_SETLKW, &flk_u) ) { + fprintf(stderr, "blob: failed to lock %s: %s\r\n", + lck_c, strerror(errno)); + close(lok_i); + return 0; + } + + // read current next-seq (0 means empty/new file) + c3_c buf_c[32] = {0}; + ssize_t red_i = read(lok_i, buf_c, sizeof(buf_c) - 1); + c3_w nex_w = ( red_i > 0 ) ? (c3_w)strtoul(buf_c, 0, 10) : 1; + if ( 0 == nex_w ) { + nex_w = 1; + } + + // write incremented value back + if ( -1 == lseek(lok_i, 0, SEEK_SET) ) { + fprintf(stderr, "blob: lseek failed on %s: %s\r\n", + lck_c, strerror(errno)); + close(lok_i); + return 0; + } + if ( -1 == ftruncate(lok_i, 0) ) { + fprintf(stderr, "blob: ftruncate failed on %s: %s\r\n", + lck_c, strerror(errno)); + close(lok_i); + return 0; + } + + c3_c wri_c[32]; + snprintf(wri_c, sizeof(wri_c), "%" PRIc3_w, nex_w + 1); + if ( -1 == write(lok_i, wri_c, strlen(wri_c)) ) { + fprintf(stderr, "blob: failed to write lock %s: %s\r\n", + lck_c, strerror(errno)); + close(lok_i); + return 0; + } + + // fsync and close (releases lock) + fsync(lok_i); + close(lok_i); + + return nex_w; +} + +/* _blob_dedup(): scan bucket for byte-equal content. +** +** Returns the sequence number of an existing equal blob, or 0 if none. +*/ +static c3_w +_blob_dedup(const c3_c* pax_c, c3_h mug_h, c3_w max_w, + const c3_y* dat_y, c3_d len_d) +{ + for ( c3_w seq_w = 1; seq_w < max_w; seq_w++ ) { + c3_c fil_c[8192]; + u3_blob_path(fil_c, pax_c, mug_h, seq_w); + + struct stat st_u; + if ( -1 == stat(fil_c, &st_u) ) { + continue; + } + if ( (c3_d)st_u.st_size != len_d ) { + continue; + } + + c3_i fid_i = open(fil_c, O_RDONLY); + if ( -1 == fid_i ) { + continue; + } + + c3_o eql_o = c3y; + c3_d rem_d = len_d; + const c3_y* ptr_y = dat_y; + c3_y buf_y[4096]; + + while ( rem_d > 0 ) { + c3_d ask_d = ( rem_d < sizeof(buf_y) ) ? rem_d : sizeof(buf_y); + ssize_t got_i = read(fid_i, buf_y, ask_d); + if ( got_i <= 0 || (c3_d)got_i != ask_d || + 0 != memcmp(ptr_y, buf_y, ask_d) ) + { + eql_o = c3n; + break; + } + ptr_y += ask_d; + rem_d -= ask_d; + } + + close(fid_i); + if ( c3y == eql_o ) { + return seq_w; + } + } + return 0; +} + +/* u3_blob_save(): write bytes to blob store. +*/ +c3_o +u3_blob_save(const c3_c* pax_c, + const c3_y* dat_y, + c3_d len_d, + c3_h* mug_h, + c3_w* seq_w) +{ + // compute mug of atom bytes + // XX: u3r_mug_bytes takes c3_h len — safe for <=4GiB + c3_h len_h = (c3_h)len_d; + *mug_h = u3r_mug_bytes(dat_y, len_h); + + // acquire lock and get next sequence number + c3_w nex_w = _blob_lock_acquire(pax_c, *mug_h); + if ( 0 == nex_w ) { + return c3n; + } + + // check for duplicate before writing + c3_w dup_w = _blob_dedup(pax_c, *mug_h, nex_w, dat_y, len_d); + if ( 0 != dup_w ) { + *seq_w = dup_w; + // we already incremented the lock counter, but that's harmless — + // nex_w slot will simply be skipped (sparse sequence numbers are fine) + return c3y; + } + + // write blob file + c3_c fil_c[8192]; + u3_blob_path(fil_c, pax_c, *mug_h, nex_w); + + c3_i fid_i = open(fil_c, O_WRONLY | O_CREAT | O_EXCL, 0400); + if ( -1 == fid_i ) { + fprintf(stderr, "blob: failed to create %s: %s\r\n", + fil_c, strerror(errno)); + return c3n; + } + + c3_d rem_d = len_d; + const c3_y* ptr_y = dat_y; + while ( rem_d > 0 ) { + size_t ask_i = ( rem_d < BLOB_IO_MAX ) ? (size_t)rem_d : BLOB_IO_MAX; + ssize_t wrt_i = write(fid_i, ptr_y, ask_i); + if ( wrt_i <= 0 ) { + fprintf(stderr, "blob: write failed on %s: %s\r\n", + fil_c, strerror(errno)); + close(fid_i); + unlink(fil_c); + return c3n; + } + ptr_y += wrt_i; + rem_d -= wrt_i; + } + + fsync(fid_i); + close(fid_i); + + *seq_w = nex_w; + return c3y; +} + +/* u3_blob_save_fd(): streaming write from open file descriptor. +*/ +c3_o +u3_blob_save_fd(const c3_c* pax_c, + c3_i fid_i, + c3_d len_d, + c3_h* mug_h, + c3_w* seq_w) +{ + // We need the full content in memory to compute the mug (MurmurHash3 is + // not incremental) and to run the dedup check. Read into a heap buffer + // in BLOB_IO_MAX-sized chunks to avoid the EINVAL that macOS returns when + // a single read() count exceeds INT_MAX. + // + // XX: u3r_mug_bytes len is c3_h (uint32_t) — mug is unreliable for + // files larger than 4 GiB. Tracked as a known limitation. + // + if ( len_d > (c3_d)SIZE_MAX ) { + fprintf(stderr, "blob: file too large to map (%" PRIc3_d " bytes)\r\n", + len_d); + return c3n; + } + + c3_y* buf_y = c3_malloc(len_d); + if ( !buf_y ) { + fprintf(stderr, "blob: failed to allocate %" PRIc3_d " bytes\r\n", len_d); + return c3n; + } + + c3_d rem_d = len_d; + c3_y* ptr_y = buf_y; + while ( rem_d > 0 ) { + size_t ask_i = ( rem_d < BLOB_IO_MAX ) ? (size_t)rem_d : BLOB_IO_MAX; + ssize_t got_i = read(fid_i, ptr_y, ask_i); + if ( got_i <= 0 ) { + fprintf(stderr, "blob: read failed: %s\r\n", strerror(errno)); + c3_free(buf_y); + return c3n; + } + ptr_y += got_i; + rem_d -= got_i; + } + + c3_o ret_o = u3_blob_save(pax_c, buf_y, len_d, mug_h, seq_w); + c3_free(buf_y); + return ret_o; +} + +/* u3_blob_load(): read blob into a loom atom. +*/ +u3_weak +u3_blob_load(const c3_c* pax_c, c3_h mug_h, c3_w seq_w) +{ + c3_c fil_c[8192]; + u3_blob_path(fil_c, pax_c, mug_h, seq_w); + + struct stat st_u; + if ( -1 == stat(fil_c, &st_u) ) { + fprintf(stderr, "blob: missing blob %" PRIc3_h "/%" PRIc3_w ": %s\r\n", + mug_h, seq_w, strerror(errno)); + return u3_none; + } + + c3_d len_d = (c3_d)st_u.st_size; + c3_i fid_i = open(fil_c, O_RDONLY); + if ( -1 == fid_i ) { + fprintf(stderr, "blob: failed to open %s: %s\r\n", + fil_c, strerror(errno)); + return u3_none; + } + + c3_y* dat_y = c3_malloc(len_d); + c3_d rem_d = len_d; + c3_y* ptr_y = dat_y; + while ( rem_d > 0 ) { + size_t ask_i = ( rem_d < BLOB_IO_MAX ) ? (size_t)rem_d : BLOB_IO_MAX; + ssize_t got_i = read(fid_i, ptr_y, ask_i); + if ( got_i <= 0 ) { + fprintf(stderr, "blob: read failed on %s: %s\r\n", + fil_c, strerror(errno)); + close(fid_i); + c3_free(dat_y); + return u3_none; + } + ptr_y += got_i; + rem_d -= got_i; + } + close(fid_i); + + u3_noun res = u3i_bytes((c3_w)len_d, dat_y); + c3_free(dat_y); + return res; +} + +/* u3_blob_exists(): check whether a blob file exists. +*/ +c3_o +u3_blob_exists(const c3_c* pax_c, c3_h mug_h, c3_w seq_w) +{ + c3_c fil_c[8192]; + u3_blob_path(fil_c, pax_c, mug_h, seq_w); + + struct stat st_u; + return ( 0 == stat(fil_c, &st_u) ) ? c3y : c3n; +} + +/* u3_blob_delete(): delete a blob file. +*/ +void +u3_blob_delete(const c3_c* pax_c, c3_h mug_h, c3_w seq_w) +{ + c3_c fil_c[8192]; + u3_blob_path(fil_c, pax_c, mug_h, seq_w); + + if ( 0 != unlink(fil_c) && ENOENT != errno ) { + fprintf(stderr, "blob: failed to delete %s: %s\r\n", + fil_c, strerror(errno)); + } +} diff --git a/pkg/vere/blob.h b/pkg/vere/blob.h new file mode 100644 index 0000000000..c5601bdc59 --- /dev/null +++ b/pkg/vere/blob.h @@ -0,0 +1,81 @@ +/// @file + +#ifndef U3_VERE_BLOB_H +#define U3_VERE_BLOB_H + +#include "c3/c3.h" +#include "noun.h" + + /* Blob store: content-addressed storage for large atoms. + ** + ** Files live in $pier/.urb/bob//. + ** Each mug bucket has a lockfile ($pier/.urb/bob//lock) holding + ** the next available sequence number (ASCII decimal). + ** + ** Earth is the sole writer; Mars is read-only. + */ + + /* U3_BLOB_THRESH: atoms larger than this (in bytes) are blobified. + */ +# define U3_BLOB_THRESH (32ULL * 1024ULL * 1024ULL) + + /* u3_blob_init(): initialize blob store; create .urb/bob/ if needed. + */ + void + u3_blob_init(const c3_c* pax_c); + + /* u3_blob_save(): write bytes to blob store. + ** + ** Deduplicates within the mug bucket (byte-for-byte comparison). + ** On success, returns c3y and sets *mug_h and *seq_w. + */ + c3_o + u3_blob_save(const c3_c* pax_c, + const c3_y* dat_y, + c3_d len_d, + c3_h* mug_h, + c3_w* seq_w); + + /* u3_blob_save_fd(): streaming write from open file descriptor. + ** + ** Reads [len_d] bytes from [fid_i], writes to blob store. + ** Avoids double-buffering for large file ingestion. + ** On success, returns c3y and sets *mug_h and *seq_w. + */ + c3_o + u3_blob_save_fd(const c3_c* pax_c, + c3_i fid_i, + c3_d len_d, + c3_h* mug_h, + c3_w* seq_w); + + /* u3_blob_load(): read blob into a loom atom. + ** + ** Returns u3_none on failure. + */ + u3_weak + u3_blob_load(const c3_c* pax_c, c3_h mug_h, c3_w seq_w); + + /* u3_blob_exists(): check whether a blob file exists. + */ + c3_o + u3_blob_exists(const c3_c* pax_c, c3_h mug_h, c3_w seq_w); + + /* u3_blob_delete(): delete a blob file. + ** + ** Called when a bob atom's total refcount reaches zero. + */ + void + u3_blob_delete(const c3_c* pax_c, c3_h mug_h, c3_w seq_w); + + /* u3_blob_path(): write filesystem path for a blob into [out_c]. + ** + ** [out_c] must be at least 8192 bytes. + */ + void + u3_blob_path(c3_c* out_c, + const c3_c* pax_c, + c3_h mug_h, + c3_w seq_w); + +#endif /* ifndef U3_VERE_BLOB_H */ diff --git a/pkg/vere/build.zig b/pkg/vere/build.zig index c26d21ff1b..d2924d4533 100644 --- a/pkg/vere/build.zig +++ b/pkg/vere/build.zig @@ -215,6 +215,7 @@ pub fn build(b: *std.Build) !void { const c_source_files = [_][]const u8{ "auto.c", + "blob.c", "ca_bundle/ca_bundle.c", "dawn.c", "db/lmdb.c", @@ -247,6 +248,7 @@ const c_source_files = [_][]const u8{ }; const install_headers = [_][]const u8{ + "blob.h", "db/lmdb.h", "dns_sd.h", "io/ames/stun.h", diff --git a/pkg/vere/disk.c b/pkg/vere/disk.c index cb6cb4af4d..75fabef369 100644 --- a/pkg/vere/disk.c +++ b/pkg/vere/disk.c @@ -5,6 +5,7 @@ #include "vere.h" #include "version.h" #include "db/lmdb.h" +#include "blob.h" #include #include "migrate.h" @@ -118,6 +119,19 @@ _disk_commit_start(u3_disk* log_u) _disk_commit_after_cb); } +/* _disk_bob_free_cb(): called by noun allocator when a bob atom is freed. +** +** Deletes the blob file from the store. The pier path is read from +** u3C.dir_c, which is set before any nouns are allocated. +*/ +static void +_disk_bob_free_cb(c3_h mug_h, c3_w seq_w) +{ + if ( u3C.dir_c ) { + u3_blob_delete(u3C.dir_c, mug_h, seq_w); + } +} + /* u3_disk_etch(): serialize an event for persistence. RETAIN [eve] */ size_t @@ -132,23 +146,25 @@ u3_disk_etch(u3_disk* log_u, u3t_event_trace("disk etch", 'B'); #endif - // XX check version number in log_u - // XX needs api redesign to limit allocations + // serialize event with ram (reference-aware encoding for bob atoms) + // output: [4B mug][ram_bytes...] // { - u3_atom mat = u3qe_jam(eve); - c3_w len_w = u3r_met(3, mat); + c3_d ram_d; + c3_y* ram_y; - len_i = 4 + len_w; + u3s_ram_xeno(eve, &ram_d, &ram_y); + + len_i = 4 + ram_d; dat_y = c3_malloc(len_i); - dat_y[0] = mug_h & 0xff; - dat_y[1] = (mug_h >> 8) & 0xff; + dat_y[0] = mug_h & 0xff; + dat_y[1] = (mug_h >> 8) & 0xff; dat_y[2] = (mug_h >> 16) & 0xff; dat_y[3] = (mug_h >> 24) & 0xff; - u3r_bytes(0, len_w, dat_y + 4, mat); + memcpy(dat_y + 4, ram_y, ram_d); - u3z(mat); + c3_free(ram_y); } #ifdef DISK_TRACE_JAM @@ -324,16 +340,27 @@ u3_disk_sift(u3_disk* log_u, u3t_event_trace("disk sift", 'B'); #endif - // XX check version in log_u - // *mug_h = dat_y[0] ^ (dat_y[1] << 8) ^ (dat_y[2] << 16) ^ (dat_y[3] << 24); - // XX u3m_soft? + // try ram (VER3 events) first, fall back to jam (VER1/VER2 events) // - *job = u3ke_cue(u3i_bytes(len_i - 4, dat_y + 4)); + { + c3_d pay_d = len_i - 4; + c3_y* pay_y = dat_y + 4; + u3_weak tap = u3s_tap_xeno(pay_d, pay_y); + + if ( u3_none != tap ) { + *job = tap; + } + else { + // XX u3m_soft? + // + *job = u3ke_cue(u3i_bytes(pay_d, pay_y)); + } + } #ifdef DISK_TRACE_CUE u3t_event_trace("disk sift", 'E'); @@ -975,6 +1002,26 @@ _disk_epoc_meta(u3_disk* log_u, return c3y; } +/* _disk_epoc_blobs_init(): create empty blobs.txt in epoch directory. +*/ +static c3_o +_disk_epoc_blobs_init(const c3_c* epo_c) +{ + c3_c blb_c[8193]; + snprintf(blb_c, sizeof(blb_c), "%s/blobs.txt", epo_c); + + // create empty blobs.txt (open for append, creating if needed) + c3_i fid_i = open(blb_c, O_WRONLY | O_CREAT | O_APPEND, 0600); + if ( -1 == fid_i ) { + fprintf(stderr, "disk: failed to create blobs.txt in %s: %s\r\n", + epo_c, strerror(errno)); + return c3n; + } + fsync(fid_i); + close(fid_i); + return c3y; +} + /* _disk_epoc_zero: make epoch zero. */ static c3_o @@ -1047,6 +1094,12 @@ _disk_epoc_zero(c3_c* pax_c) close(epo_i); #endif + // create empty blobs.txt for GC tracking + // + if ( c3n == _disk_epoc_blobs_init(epo_c) ) { + goto fail3; + } + // success return c3y; @@ -1185,6 +1238,12 @@ _disk_epoc_roll(u3_disk* log_u, c3_d epo_d) close(epo_i); #endif + // create empty blobs.txt for GC tracking + // + if ( c3n == _disk_epoc_blobs_init(epo_c) ) { + goto fail3; + } + fprintf(stderr, "disk: created epoch %" PRIc3_d "\r\n", epo_d); // load new epoch directory and set it in log_u @@ -1971,6 +2030,13 @@ _disk_epoc_load(u3_disk* log_u, c3_d lat_d, u3_disk_load_e lod_e) return _epoc_good; } break; + case U3E_VER3: { + // VER3 is the current epoch format (image.bin + blobs.txt + ram events). + // Load path is identical to VER2; no migration needed. + // Fall through to VER2 handling. + // + } // fallthrough + case U3E_VER2: { if ( u3_dlod_epoc == lod_e ) { c3_c chk_c[8193]; @@ -2056,6 +2122,16 @@ _disk_epoc_load(u3_disk* log_u, c3_d lat_d, u3_disk_load_e lod_e) || (!log_u->epo_d && log_u->dun_d && !u3A->eve_d) || (c3n == _disk_vere_diff(log_u)) ) { + // VER2 epoch: always roll to VER3 if snapshot is up-to-date + // + if ( (U3E_VER2 == ver_h) + && (log_u->dun_d == u3A->eve_d) ) + { + if ( c3n == _disk_epoc_roll(log_u, log_u->dun_d) ) { + fprintf(stderr, "disk: failed to roll VER2 epoch to VER3\r\n"); + exit(1); + } + } return _epoc_good; } else if ( log_u->dun_d != u3A->eve_d ) { @@ -2144,6 +2220,10 @@ u3_disk_make(c3_c* pax_c) } } + // make $pier/.urb/bob (blob store) + // + u3_blob_init(pax_c); + return c3y; } @@ -2212,6 +2292,14 @@ u3_disk_load(c3_c* pax_c, u3_disk_load_e lod_e) return 0; } + // initialize blob store (creates .urb/bob/ if needed) + // + u3_blob_init(pax_c); + + // register blob-free callback so noun GC can delete orphaned blobs + // + u3C.bob_free_f = _disk_bob_free_cb; + // XX move this into u3_disk_make // if ( u3_dlod_boot == lod_e ) { diff --git a/pkg/vere/io/mesa.c b/pkg/vere/io/mesa.c index 44479a579a..c7ca9c5797 100644 --- a/pkg/vere/io/mesa.c +++ b/pkg/vere/io/mesa.c @@ -9,6 +9,7 @@ #include "io/ames/stun.h" #include "mesa/mesa.h" #include "mesa/bitset.h" +#include "../blob.h" #include #include #include @@ -2648,7 +2649,26 @@ _mesa_hear_page(u3_mesa_pict* pic_u, sockaddr_in lan_u) c3_y* buf_y = c3_calloc(mesa_size_pact(pac_u)); c3_h res_h = mesa_etch_pact_to_buf(buf_y, mesa_size_pact(pac_u), pac_u); - pac = u3i_bytes(res_h, buf_y); + + // large reassembled packets: store as blob, return bob atom + // + if ( (c3_d)res_h > U3_BLOB_THRESH ) { + c3_h bob_mug_h; + c3_w bob_seq_w; + + if ( c3y == u3_blob_save(sam_u->pir_u->pax_c, buf_y, + (c3_d)res_h, &bob_mug_h, &bob_seq_w) ) + { + pac = u3i_blob(bob_mug_h, bob_seq_w); + } + else { + pac = u3i_bytes(res_h, buf_y); + } + } + else { + pac = u3i_bytes(res_h, buf_y); + } + c3_free(buf_y); } cad = u3nt(c3__heer, lan, pac); diff --git a/pkg/vere/io/unix.c b/pkg/vere/io/unix.c index 686b541c73..dc72ed3554 100644 --- a/pkg/vere/io/unix.c +++ b/pkg/vere/io/unix.c @@ -39,6 +39,7 @@ #include #include "noun.h" +#include "../blob.h" struct _u3_umon; struct _u3_udir; @@ -431,9 +432,7 @@ static c3_m _unix_write_file_hard(c3_c* pax_c, u3_noun mim) { c3_i fid_i = c3_open(pax_c, O_WRONLY | O_CREAT | O_TRUNC, 0666); - c3_w len_w, rit_w, siz_w; c3_h mug_h = 0; - c3_y* dat_y; u3_noun dat = u3t(u3t(mim)); @@ -444,27 +443,76 @@ _unix_write_file_hard(c3_c* pax_c, u3_noun mim) return 0; } - siz_w = u3h(u3t(mim)); - len_w = u3r_met(3, dat); - dat_y = c3_calloc(siz_w); - - u3r_bytes(0, len_w, dat_y, dat); - u3z(mim); - - rit_w = write(fid_i, dat_y, siz_w); + // bob atom: stream from blob store to file + // + if ( c3y == u3a_is_bob(dat) ) { + c3_h bob_mug_h = u3a_bob_mug(dat); + c3_w bob_seq_w = u3a_bob_seq(dat); + c3_c src_c[8192]; + u3_blob_path(src_c, u3C.dir_c, bob_mug_h, bob_seq_w); + + c3_i src_i = open(src_c, O_RDONLY); + if ( src_i < 0 ) { + u3l_log("error opening blob %s for reading: %s", + src_c, strerror(errno)); + close(fid_i); + u3z(mim); + return 0; + } - if ( rit_w != siz_w ) { - u3l_log("error writing %s: %s", - pax_c, strerror(errno)); - mug_h = 0; + c3_y buf_y[65536]; + ssize_t got_i; + while ( (got_i = read(src_i, buf_y, sizeof(buf_y))) > 0 ) { + c3_y* ptr_y = buf_y; + ssize_t rem_i = got_i; + while ( rem_i > 0 ) { + ssize_t wrt_i = write(fid_i, ptr_y, rem_i); + if ( wrt_i <= 0 ) { + u3l_log("error writing %s: %s", pax_c, strerror(errno)); + close(src_i); + close(fid_i); + u3z(mim); + return 0; + } + ptr_y += wrt_i; + rem_i -= wrt_i; + } + } + close(src_i); + mug_h = bob_mug_h; } else { - mug_h = u3r_mug_bytes(dat_y, len_w); + // normal atom: materialize and write in chunks + // + c3_w siz_w = u3h(u3t(mim)); + c3_w len_w = u3r_met(3, dat); + c3_y* dat_y = c3_calloc(siz_w); + + u3r_bytes(0, len_w, dat_y, dat); + + c3_d rem_d = (c3_d)siz_w; + c3_y* ptr_y = dat_y; + c3_o ok_o = c3y; + while ( rem_d > 0 ) { + size_t ask_i = ( rem_d > 0x40000000UL ) ? 0x40000000UL : (size_t)rem_d; + ssize_t wrt_i = write(fid_i, ptr_y, ask_i); + if ( wrt_i <= 0 ) { + u3l_log("error writing %s: %s", pax_c, strerror(errno)); + ok_o = c3n; + break; + } + ptr_y += wrt_i; + rem_d -= wrt_i; + } + + if ( c3y == ok_o ) { + mug_h = u3r_mug_bytes(dat_y, len_w); + } + c3_free(dat_y); } + u3z(mim); close(fid_i); - c3_free(dat_y); - return mug_h; } @@ -473,6 +521,22 @@ _unix_write_file_hard(c3_c* pax_c, u3_noun mim) static void _unix_write_file_soft(u3_ufil* fil_u, u3_noun mim) { + u3_noun dat = u3t(u3t(mim)); + + // for bob atoms, compare mugs directly — no disk read needed + // + if ( c3y == u3a_is_bob(dat) ) { + c3_h bob_mug_h = u3a_bob_mug(dat); + if ( bob_mug_h == fil_u->gum_w ) { + u3z(mim); + return; + } + fil_u->gum_w = _unix_write_file_hard(fil_u->pax_c, mim); + return; + } + + // small files: read existing content to detect no-op writes + // struct stat buf_u; c3_i fid_i = c3_open(fil_u->pax_c, O_RDONLY, 0644); c3_ws len_ws, red_ws; @@ -888,6 +952,9 @@ static u3_noun _unix_update_node(u3_unix* unx_u, u3_unod* nod_u); ** otherwise, read the file and get a mug checksum. if same as ** gum_w, move on. otherwise, overwrite add path plus data to ** %into event. +** +** Files larger than U3_BLOB_THRESH are stored in the blob store +** and referenced by a bob atom instead of copying bytes into loom. */ static u3_noun _unix_update_file(u3_unix* unx_u, u3_ufil* fil_u) @@ -903,7 +970,6 @@ _unix_update_file(u3_unix* unx_u, u3_ufil* fil_u) struct stat buf_u; c3_i fid_i = c3_open(fil_u->pax_c, O_RDONLY, 0644); c3_ws len_ws, red_ws; - c3_y* dat_y; if ( fid_i < 0 || fstat(fid_i, &buf_u) < 0 ) { if ( ENOENT == errno ) { @@ -917,42 +983,82 @@ _unix_update_file(u3_unix* unx_u, u3_ufil* fil_u) } len_ws = buf_u.st_size; - dat_y = c3_malloc(len_ws); - red_ws = read(fid_i, dat_y, len_ws); + // large files: stream into blob store, return bob atom + // + if ( (c3_d)len_ws > U3_BLOB_THRESH ) { + c3_h bob_mug_h; + c3_w bob_seq_w; - if ( close(fid_i) < 0 ) { - u3l_log("error closing file %s: %s", - fil_u->pax_c, strerror(errno)); - } + c3_o ok_o = u3_blob_save_fd(unx_u->pax_c, fid_i, + (c3_d)len_ws, &bob_mug_h, &bob_seq_w); - if ( len_ws != red_ws ) { - if ( red_ws < 0 ) { - u3l_log("error reading file %s: %s", + if ( close(fid_i) < 0 ) { + u3l_log("error closing file %s: %s", fil_u->pax_c, strerror(errno)); } - else { - u3l_log("wrong # of bytes read in file %s: %" PRIc3_ws " %" PRIc3_ws, - fil_u->pax_c, len_ws, red_ws); + + if ( c3n == ok_o ) { + u3l_log("blob: failed to save large file %s", fil_u->pax_c); + return u3_nul; } - c3_free(dat_y); - return u3_nul; - } - else { - c3_h mug_h = u3r_mug_bytes(dat_y, len_ws); - if ( mug_h == fil_u->gum_w ) { - c3_free(dat_y); + + // skip if content unchanged (bob_mug_h is content mug) + // + if ( bob_mug_h == fil_u->gum_w ) { return u3_nul; } - else { + + { u3_noun pax = _unix_string_to_path(unx_u, fil_u->pax_c); u3_noun mim = u3nt(c3__text, u3i_string("plain"), u3_nul); - u3_noun dat = u3nt(mim, len_ws, u3i_bytes(len_ws, dat_y)); + u3_atom atm = u3i_blob(bob_mug_h, bob_seq_w); + u3_noun dat = u3nt(mim, (u3_atom)len_ws, atm); - c3_free(dat_y); return u3nc(u3nt(pax, u3_nul, dat), u3_nul); } } + + // small files: existing path — read into buffer + // + { + c3_y* dat_y = c3_malloc(len_ws); + + red_ws = read(fid_i, dat_y, len_ws); + + if ( close(fid_i) < 0 ) { + u3l_log("error closing file %s: %s", + fil_u->pax_c, strerror(errno)); + } + + if ( len_ws != red_ws ) { + if ( red_ws < 0 ) { + u3l_log("error reading file %s: %s", + fil_u->pax_c, strerror(errno)); + } + else { + u3l_log("wrong # of bytes read in file %s: %" PRIc3_ws " %" PRIc3_ws, + fil_u->pax_c, len_ws, red_ws); + } + c3_free(dat_y); + return u3_nul; + } + else { + c3_h mug_h = u3r_mug_bytes(dat_y, len_ws); + if ( mug_h == fil_u->gum_w ) { + c3_free(dat_y); + return u3_nul; + } + else { + u3_noun pax = _unix_string_to_path(unx_u, fil_u->pax_c); + u3_noun mim = u3nt(c3__text, u3i_string("plain"), u3_nul); + u3_noun dat = u3nt(mim, len_ws, u3i_bytes(len_ws, dat_y)); + + c3_free(dat_y); + return u3nc(u3nt(pax, u3_nul, dat), u3_nul); + } + } + } } /* _unix_update_dir(): update directory, producing list of changes @@ -1162,6 +1268,8 @@ _unix_update_mount(u3_unix* unx_u, u3_umon* mon_u, u3_noun all) /* _unix_initial_update_file(): read file, but don't watch ** XX deduplicate with _unix_update_file() +** +** Files larger than U3_BLOB_THRESH are stored in the blob store. */ static u3_noun _unix_initial_update_file(c3_c* pax_c, c3_c* bas_c) @@ -1169,7 +1277,6 @@ _unix_initial_update_file(c3_c* pax_c, c3_c* bas_c) struct stat buf_u; c3_i fid_i = c3_open(pax_c, O_RDONLY, 0644); c3_ws len_ws, red_ws; - c3_y* dat_y; if ( fid_i < 0 || fstat(fid_i, &buf_u) < 0 ) { if ( ENOENT == errno ) { @@ -1183,36 +1290,69 @@ _unix_initial_update_file(c3_c* pax_c, c3_c* bas_c) } len_ws = buf_u.st_size; - dat_y = c3_malloc(len_ws); - red_ws = read(fid_i, dat_y, len_ws); + // large files: stream into blob store + // + if ( (c3_d)len_ws > U3_BLOB_THRESH ) { + c3_h bob_mug_h; + c3_w bob_seq_w; - if ( close(fid_i) < 0 ) { - u3l_log("error closing initial file %s: %s", - pax_c, strerror(errno)); + c3_o ok_o = u3_blob_save_fd(u3C.dir_c, fid_i, + (c3_d)len_ws, &bob_mug_h, &bob_seq_w); + + if ( close(fid_i) < 0 ) { + u3l_log("error closing initial file %s: %s", pax_c, strerror(errno)); + } + + if ( c3n == ok_o ) { + u3l_log("blob: failed to save large initial file %s", pax_c); + return u3_nul; + } + + { + u3_noun rel_pax = _unix_string_to_path_helper(pax_c + strlen(bas_c) + 1); + u3_noun mim = u3nt(c3__text, u3i_string("plain"), u3_nul); + u3_atom atm = u3i_blob(bob_mug_h, bob_seq_w); + u3_noun dat = u3nt(mim, (u3_atom)len_ws, atm); + + return u3nc(u3nt(rel_pax, u3_nul, dat), u3_nul); + } } - if ( len_ws != red_ws ) { - if ( red_ws < 0 ) { - u3l_log("error reading initial file %s: %s", + // small files: existing path — read into buffer + // + { + c3_y* dat_y = c3_malloc(len_ws); + + red_ws = read(fid_i, dat_y, len_ws); + + if ( close(fid_i) < 0 ) { + u3l_log("error closing initial file %s: %s", pax_c, strerror(errno)); } - else { - u3l_log("wrong # of bytes read in initial file %s: %" PRIc3_ws " %" PRIc3_ws, - pax_c, len_ws, red_ws); + + if ( len_ws != red_ws ) { + if ( red_ws < 0 ) { + u3l_log("error reading initial file %s: %s", + pax_c, strerror(errno)); + } + else { + u3l_log("wrong # of bytes read in initial file %s: %" PRIc3_ws " %" PRIc3_ws, + pax_c, len_ws, red_ws); + } + c3_free(dat_y); + return u3_nul; } - c3_free(dat_y); - return u3_nul; - } - else { - u3_noun pax = _unix_string_to_path_helper(pax_c - + strlen(bas_c) - + 1); /* XX slightly less VERY BAD than before*/ - u3_noun mim = u3nt(c3__text, u3i_string("plain"), u3_nul); - u3_noun dat = u3nt(mim, len_ws, u3i_bytes(len_ws, dat_y)); + else { + u3_noun rel_pax = _unix_string_to_path_helper(pax_c + + strlen(bas_c) + + 1); /* XX slightly less VERY BAD than before*/ + u3_noun mim = u3nt(c3__text, u3i_string("plain"), u3_nul); + u3_noun dat = u3nt(mim, len_ws, u3i_bytes(len_ws, dat_y)); - c3_free(dat_y); - return u3nc(u3nt(pax, u3_nul, dat), u3_nul); + c3_free(dat_y); + return u3nc(u3nt(rel_pax, u3_nul, dat), u3_nul); + } } } diff --git a/pkg/vere/lord.c b/pkg/vere/lord.c index 60cb4d3e2b..a9a72326d8 100644 --- a/pkg/vere/lord.c +++ b/pkg/vere/lord.c @@ -518,7 +518,12 @@ _lord_on_plea(void* ptr_v, c3_d len_d, c3_y* byt_y) u3t_event_trace("king ipc cue", 'B'); #endif - jar = u3s_cue_xeno_with(god_u->sil_u, len_d, byt_y); + // decode incoming message: try ram first, fall back to jam + // + jar = u3s_tap_xeno(len_d, byt_y); + if ( u3_none == jar ) { + jar = u3s_cue_xeno_with(god_u->sil_u, len_d, byt_y); + } #ifdef LORD_TRACE_CUE u3t_event_trace("king ipc cue", 'E'); @@ -663,13 +668,13 @@ _lord_writ_send(u3_lord* god_u, u3_writ* wit_u) u3t_event_trace("king ipc jam", 'B'); #endif - u3s_jam_xeno(jar, &len_d, &byt_y); + u3s_ram_xeno(jar, &len_d, &byt_y); #ifdef LORD_TRACE_JAM u3t_event_trace("king ipc jam", 'E'); #endif - u3_newt_send(&god_u->inn_u, len_d, byt_y); + u3_newt_send_vers(&god_u->inn_u, 0x01, len_d, byt_y); u3z(jar); } } @@ -686,13 +691,13 @@ _lord_send(u3_lord* god_u, u3_noun jar) u3t_event_trace("king ipc jam", 'B'); #endif - u3s_jam_xeno(jar, &len_d, &byt_y); + u3s_ram_xeno(jar, &len_d, &byt_y); #ifdef LORD_TRACE_JAM u3t_event_trace("king ipc jam", 'E'); #endif - u3_newt_send(&god_u->inn_u, len_d, byt_y); + u3_newt_send_vers(&god_u->inn_u, 0x01, len_d, byt_y); u3z(jar); } @@ -1198,7 +1203,12 @@ _lord_on_plea_boot(void* ptr_v, c3_d len_d, c3_y* byt_y) { _lord_boot* bot_u = ptr_v; - u3_weak jar = u3s_cue_xeno_with(bot_u->sil_u, len_d, byt_y); + // decode incoming message: try ram first, fall back to jam + // + u3_weak jar = u3s_tap_xeno(len_d, byt_y); + if ( u3_none == jar ) { + jar = u3s_cue_xeno_with(bot_u->sil_u, len_d, byt_y); + } u3_noun tag, dat; if ( u3_none == jar ) { @@ -1369,8 +1379,8 @@ u3_lord_boot(c3_c* pax_c, { c3_d len_d; c3_y* byt_y; - u3s_jam_xeno(msg, &len_d, &byt_y); - u3_newt_send(&bot_u->inn_u, len_d, byt_y); + u3s_ram_xeno(msg, &len_d, &byt_y); + u3_newt_send_vers(&bot_u->inn_u, 0x01, len_d, byt_y); u3z(msg); } } diff --git a/pkg/vere/mars.c b/pkg/vere/mars.c index dcb380101a..c476b557c1 100644 --- a/pkg/vere/mars.c +++ b/pkg/vere/mars.c @@ -256,7 +256,7 @@ _mars_fact(u3_mars* mar_u, gif_u->sat_e = u3_gift_fact_e; gif_u->eve_d = mar_u->dun_d; - u3s_jam_xeno(pro, &gif_u->len_d, &gif_u->hun_y); + u3s_ram_xeno(pro, &gif_u->len_d, &gif_u->hun_y); u3z(pro); if ( !mar_u->gif_u.ent_u ) { @@ -280,7 +280,7 @@ _mars_gift(u3_mars* mar_u, u3_noun pro) gif_u->sat_e = u3_gift_rest_e; gif_u->ptr_v = 0; - u3s_jam_xeno(pro, &gif_u->len_d, &gif_u->hun_y); + u3s_ram_xeno(pro, &gif_u->len_d, &gif_u->hun_y); u3z(pro); if ( !mar_u->gif_u.ent_u ) { @@ -834,7 +834,7 @@ _mars_flush(u3_mars* mar_u) && ( (u3_gift_rest_e == gif_u->sat_e) || (gif_u->eve_d <= mar_u->log_u->dun_d)) ) { - u3_newt_send(mar_u->out_u, gif_u->len_d, gif_u->hun_y); + u3_newt_send_vers(mar_u->out_u, 0x01, gif_u->len_d, gif_u->hun_y); mar_u->gif_u.ext_u = gif_u->nex_u; c3_free(gif_u); @@ -902,7 +902,12 @@ u3_mars_kick(void* ram_u, c3_d len_d, c3_y* hun_y) // XX optimize for stateless tasks w/ peek-next // if ( u3_mars_work_e == mar_u->sat_e ) { - u3_weak jar = u3s_cue_xeno_with(mar_u->sil_u, len_d, hun_y); + // decode incoming message: try ram first, fall back to jam + // + u3_weak jar = u3s_tap_xeno(len_d, hun_y); + if ( u3_none == jar ) { + jar = u3s_cue_xeno_with(mar_u->sil_u, len_d, hun_y); + } // parse errors are fatal // @@ -1501,8 +1506,8 @@ u3_mars_work(u3_mars* mar_u) u3nc(u3i_chub(mar_u->dun_d), mar_u->mug_h)); - u3s_jam_xeno(msg, &len_d, &hun_y); - u3_newt_send(mar_u->out_u, len_d, hun_y); + u3s_ram_xeno(msg, &len_d, &hun_y); + u3_newt_send_vers(mar_u->out_u, 0x01, len_d, hun_y); u3z(msg); } @@ -1926,7 +1931,19 @@ u3_mars_boot(u3_mars* mar_u, c3_d len_d, c3_y* hun_y) } { - u3_weak jar = u3s_cue_xeno(len_d, hun_y); + // decode boot message: try ram first, fall back to jam + // + u3_weak jar = u3s_tap_xeno(len_d, hun_y); + if ( u3_none == jar ) { + fprintf(stderr, "boot: tap failed (len=%" PRIu64 " hdr=%02x%02x%02x%02x%02x), trying cue\r\n", + len_d, + (len_d > 0) ? hun_y[0] : 0, + (len_d > 1) ? hun_y[1] : 0, + (len_d > 2) ? hun_y[2] : 0, + (len_d > 3) ? hun_y[3] : 0, + (len_d > 4) ? hun_y[4] : 0); + jar = u3s_cue_xeno(len_d, hun_y); + } if ( (u3_none == jar) || (c3n == u3r_p(jar, c3__boot, &com)) ) { diff --git a/pkg/vere/newt.c b/pkg/vere/newt.c index a183777c88..1569ff86b9 100644 --- a/pkg/vere/newt.c +++ b/pkg/vere/newt.c @@ -124,15 +124,16 @@ u3_newt_decode(u3_moat* mot_u, c3_y* buf_y, c3_d len_d) | (((c3_d)hed_y[3]) << 16) | (((c3_d)hed_y[4]) << 24); - // check for version tag and nonzero length + // check for version tag (0x00=jam, 0x01=ram) and nonzero length // - if ( 0x0 != hed_y[0] || !met_d ) { + if ( (0x0 != hed_y[0] && 0x1 != hed_y[0]) || !met_d ) { return c3n; } - // await body + // await body, stash version // _newt_mess_tail(mes_u, met_d); + mes_u->tal_u.met_u->ver_y = hed_y[0]; } } break; @@ -378,18 +379,18 @@ u3_newt_mojo_stop(u3_mojo* moj_u, u3_moor_bail bal_f) uv_close((uv_handle_t*)&moj_u->pyp_u, _mojo_stop_cb); } -/* u3_newt_send(): write buffer to stream. +/* u3_newt_send_vers(): write buffer with explicit version byte to stream. */ void -u3_newt_send(u3_mojo* moj_u, c3_d len_d, c3_y* byt_y) +u3_newt_send_vers(u3_mojo* moj_u, c3_y ver_y, c3_d len_d, c3_y* byt_y) { n_req* req_u = c3_malloc(sizeof(*req_u)); req_u->moj_u = moj_u; req_u->buf_y = byt_y; - // write header + // write header: [ver_y][len LE 4B] // - req_u->hed_y[0] = 0x0; + req_u->hed_y[0] = ver_y; req_u->hed_y[1] = ( len_d & 0xff); req_u->hed_y[2] = ((len_d >> 8) & 0xff); req_u->hed_y[3] = ((len_d >> 16) & 0xff); @@ -414,3 +415,11 @@ u3_newt_send(u3_mojo* moj_u, c3_d len_d, c3_y* byt_y) } } } + +/* u3_newt_send(): write buffer to stream (legacy v0x00 / jam). +*/ +void +u3_newt_send(u3_mojo* moj_u, c3_d len_d, c3_y* byt_y) +{ + u3_newt_send_vers(moj_u, 0x00, len_d, byt_y); +} diff --git a/pkg/vere/vere.h b/pkg/vere/vere.h index cbe33eb566..e8389ab58f 100644 --- a/pkg/vere/vere.h +++ b/pkg/vere/vere.h @@ -63,6 +63,7 @@ typedef struct _u3_meat { struct _u3_meat* nex_u; c3_d len_d; + c3_y ver_y; // protocol version: 0x00=jam, 0x01=ram c3_y hun_y[0]; } u3_meat; @@ -1217,6 +1218,11 @@ void u3_newt_send(u3_mojo* moj_u, c3_d len_d, c3_y* byt_y); + /* u3_newt_send_vers(): write versioned buffer to stream. + */ + void + u3_newt_send_vers(u3_mojo* moj_u, c3_y ver_y, c3_d len_d, c3_y* byt_y); + /* u3_newt_read(): activate reading on input stream. */ void From 965acedd84363ab069cd2c16d5038487a6164c59 Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Mon, 30 Mar 2026 13:31:27 -0500 Subject: [PATCH 02/31] wip: blob 2 --- pkg/noun/allocate.h | 14 ++++-- pkg/noun/retrieve.c | 40 +++++++-------- pkg/vere/blob.c | 120 ++++++++++++++++++++++++-------------------- 3 files changed, 93 insertions(+), 81 deletions(-) diff --git a/pkg/noun/allocate.h b/pkg/noun/allocate.h index 4d8abd1121..c0e88fae1c 100644 --- a/pkg/noun/allocate.h +++ b/pkg/noun/allocate.h @@ -335,10 +335,16 @@ STATIC_ASSERT( u3a_vits <= u3a_min_log, # define u3a_is_cell(som) u3a_is_pom(som) /* u3a_blob_flag: MSB of u3a_atom.len_w marks an indirect atom as a bob - ** (blob reference). The remaining 31 bits hold the actual data word count. - */ -# define u3a_blob_flag ((c3_w)0x80000000u) -# define u3a_blob_mask ((c3_w)0x7FFFFFFFu) + ** (blob reference). The remaining bits hold the actual data word count. + ** In VERE64, len_w is uint64_t so we use bit 63; in 32-bit we use bit 31. + */ +# ifdef VERE64 +# define u3a_blob_flag ((c3_w)0x8000000000000000ULL) +# define u3a_blob_mask ((c3_w)0x7FFFFFFFFFFFFFFFULL) +# else +# define u3a_blob_flag ((c3_w)0x80000000U) +# define u3a_blob_mask ((c3_w)0x7FFFFFFFU) +# endif # define u3du(som) u3a_is_cell(som) /* u3a_h(): get head of cell [som]. Bail if [som] is not cell. diff --git a/pkg/noun/retrieve.c b/pkg/noun/retrieve.c index d5566c4439..b6c57e9865 100644 --- a/pkg/noun/retrieve.c +++ b/pkg/noun/retrieve.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -2376,31 +2377,24 @@ u3r_blob_load(u3_atom a, const c3_c* pax_c) return u3_none; } - // allocate a temporary C-heap buffer, read, then copy into loom + // mmap the file and copy into the loom via u3i_slab (handles >4 GiB). // - c3_y* dat_y = c3_malloc(len_d); - c3_d rem_d = len_d; - c3_y* ptr_y = dat_y; - while ( rem_d > 0 ) { - // cap each read() to 1 GiB: macOS returns EINVAL for count > INT_MAX - // - size_t ask_i = ( rem_d > 0x40000000UL ) ? 0x40000000UL : (size_t)rem_d; - ssize_t got_i = read(fid_i, ptr_y, ask_i); - if ( got_i <= 0 ) { - fprintf(stderr, "retrieve: blob read failed %s: %s\r\n", - fil_c, strerror(errno)); - close(fid_i); - c3_free(dat_y); - return u3_none; - } - ptr_y += got_i; - rem_d -= got_i; - } + void* map_v = mmap(0, (size_t)len_d, PROT_READ, MAP_PRIVATE, fid_i, 0); close(fid_i); - // u3i_bytes takes c3_w (32-bit) length; safe for <4GiB blobs + if ( MAP_FAILED == map_v ) { + fprintf(stderr, "retrieve: blob mmap failed %s: %s\r\n", + fil_c, strerror(errno)); + return u3_none; + } + madvise(map_v, (size_t)len_d, MADV_SEQUENTIAL); + + // bloq 3 = bytes; len_d = byte count // - u3_noun res = u3i_bytes((c3_w)len_d, dat_y); - c3_free(dat_y); - return res; + u3i_slab sab_u; + u3i_slab_bare(&sab_u, 3, len_d); + memcpy(sab_u.buf_y, map_v, (size_t)len_d); + munmap(map_v, (size_t)len_d); + + return u3i_slab_mint_bytes(&sab_u); } diff --git a/pkg/vere/blob.c b/pkg/vere/blob.c index 99d9896352..697dcadb7a 100644 --- a/pkg/vere/blob.c +++ b/pkg/vere/blob.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -194,6 +195,36 @@ _blob_dedup(const c3_c* pax_c, c3_h mug_h, c3_w max_w, return 0; } +/* _blob_mug(): compute a 31-bit mug suitable for blob bucketing. +** +** u3r_mug_bytes takes c3_h (uint32_t) for the length, so we cap each +** window at 0xFFFFFFFF bytes. For files > 4 GiB we hash the first +** window, mix in the high bits of length, and mix in the last window +** so that files of identical size but different tail content get +** distinct buckets. +*/ +static c3_h +_blob_mug(const c3_y* dat_y, c3_d len_d) +{ + // cap first window to UINT32_MAX to avoid hashing 0 bytes + // + c3_h win_h = ( len_d > 0xFFFFFFFFULL ) ? 0xFFFFFFFFu : (c3_h)len_d; + c3_h mug_h = u3r_mug_bytes(dat_y, win_h); + + // for files larger than 4 GiB, also fold in the high length bits + // and hash the last 4 GiB window for tail sensitivity + // + c3_h hi_h = (c3_h)(len_d >> 32); + if ( hi_h ) { + mug_h = u3r_mug_both(mug_h, hi_h); + // hash the final window (last min(len_d, 0xFFFFFFFF) bytes) + c3_d off_d = len_d > 0xFFFFFFFFULL ? len_d - 0xFFFFFFFFULL : 0; + c3_h tail_h = u3r_mug_bytes(dat_y + off_d, (c3_h)(len_d - off_d)); + mug_h = u3r_mug_both(mug_h, tail_h); + } + return mug_h; +} + /* u3_blob_save(): write bytes to blob store. */ c3_o @@ -203,10 +234,7 @@ u3_blob_save(const c3_c* pax_c, c3_h* mug_h, c3_w* seq_w) { - // compute mug of atom bytes - // XX: u3r_mug_bytes takes c3_h len — safe for <=4GiB - c3_h len_h = (c3_h)len_d; - *mug_h = u3r_mug_bytes(dat_y, len_h); + *mug_h = _blob_mug(dat_y, len_d); // acquire lock and get next sequence number c3_w nex_w = _blob_lock_acquire(pax_c, *mug_h); @@ -257,7 +285,11 @@ u3_blob_save(const c3_c* pax_c, return c3y; } -/* u3_blob_save_fd(): streaming write from open file descriptor. +/* u3_blob_save_fd(): write from open file descriptor into the blob store. +** +** Uses mmap() to avoid a large malloc: the OS pages in only what +** _blob_mug and the dedup scan actually touch, and can evict cold pages +** immediately. Works for files of any size that fit in the address space. */ c3_o u3_blob_save_fd(const c3_c* pax_c, @@ -266,46 +298,28 @@ u3_blob_save_fd(const c3_c* pax_c, c3_h* mug_h, c3_w* seq_w) { - // We need the full content in memory to compute the mug (MurmurHash3 is - // not incremental) and to run the dedup check. Read into a heap buffer - // in BLOB_IO_MAX-sized chunks to avoid the EINVAL that macOS returns when - // a single read() count exceeds INT_MAX. - // - // XX: u3r_mug_bytes len is c3_h (uint32_t) — mug is unreliable for - // files larger than 4 GiB. Tracked as a known limitation. - // - if ( len_d > (c3_d)SIZE_MAX ) { - fprintf(stderr, "blob: file too large to map (%" PRIc3_d " bytes)\r\n", - len_d); + if ( 0 == len_d ) { + fprintf(stderr, "blob: refusing to save empty file\r\n"); return c3n; } - c3_y* buf_y = c3_malloc(len_d); - if ( !buf_y ) { - fprintf(stderr, "blob: failed to allocate %" PRIc3_d " bytes\r\n", len_d); + void* map_v = mmap(0, (size_t)len_d, PROT_READ, MAP_PRIVATE, fid_i, 0); + if ( MAP_FAILED == map_v ) { + fprintf(stderr, "blob: mmap failed (%" PRIc3_d " bytes): %s\r\n", + len_d, strerror(errno)); return c3n; } + madvise(map_v, (size_t)len_d, MADV_SEQUENTIAL); - c3_d rem_d = len_d; - c3_y* ptr_y = buf_y; - while ( rem_d > 0 ) { - size_t ask_i = ( rem_d < BLOB_IO_MAX ) ? (size_t)rem_d : BLOB_IO_MAX; - ssize_t got_i = read(fid_i, ptr_y, ask_i); - if ( got_i <= 0 ) { - fprintf(stderr, "blob: read failed: %s\r\n", strerror(errno)); - c3_free(buf_y); - return c3n; - } - ptr_y += got_i; - rem_d -= got_i; - } - - c3_o ret_o = u3_blob_save(pax_c, buf_y, len_d, mug_h, seq_w); - c3_free(buf_y); + c3_o ret_o = u3_blob_save(pax_c, (const c3_y*)map_v, len_d, mug_h, seq_w); + munmap(map_v, (size_t)len_d); return ret_o; } /* u3_blob_load(): read blob into a loom atom. +** +** Uses mmap() and u3i_slab to handle blobs of any size, including >4 GiB. +** The mapping is released immediately after the loom copy. */ u3_weak u3_blob_load(const c3_c* pax_c, c3_h mug_h, c3_w seq_w) @@ -328,27 +342,25 @@ u3_blob_load(const c3_c* pax_c, c3_h mug_h, c3_w seq_w) return u3_none; } - c3_y* dat_y = c3_malloc(len_d); - c3_d rem_d = len_d; - c3_y* ptr_y = dat_y; - while ( rem_d > 0 ) { - size_t ask_i = ( rem_d < BLOB_IO_MAX ) ? (size_t)rem_d : BLOB_IO_MAX; - ssize_t got_i = read(fid_i, ptr_y, ask_i); - if ( got_i <= 0 ) { - fprintf(stderr, "blob: read failed on %s: %s\r\n", - fil_c, strerror(errno)); - close(fid_i); - c3_free(dat_y); - return u3_none; - } - ptr_y += got_i; - rem_d -= got_i; - } + void* map_v = mmap(0, (size_t)len_d, PROT_READ, MAP_PRIVATE, fid_i, 0); close(fid_i); - u3_noun res = u3i_bytes((c3_w)len_d, dat_y); - c3_free(dat_y); - return res; + if ( MAP_FAILED == map_v ) { + fprintf(stderr, "blob: mmap failed on %s: %s\r\n", + fil_c, strerror(errno)); + return u3_none; + } + madvise(map_v, (size_t)len_d, MADV_SEQUENTIAL); + + // use u3i_slab (c3_d length) to correctly handle blobs >4 GiB. + // bloq 3 = bytes; len_d = byte count. + // + u3i_slab sab_u; + u3i_slab_bare(&sab_u, 3, len_d); + memcpy(sab_u.buf_y, map_v, (size_t)len_d); + munmap(map_v, (size_t)len_d); + + return u3i_slab_mint_bytes(&sab_u); } /* u3_blob_exists(): check whether a blob file exists. From 9657a775db39ebbad049ed6be90bbe88bc1af30e Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Tue, 31 Mar 2026 16:44:22 -0500 Subject: [PATCH 03/31] wip: blob 3 --- pkg/noun/allocate.c | 16 ----- pkg/noun/allocate.h | 14 ++++ pkg/noun/manage.c | 15 ++++ pkg/noun/options.h | 1 - pkg/noun/vortex.h | 29 ++++++++ pkg/vere/blob.c | 166 ++++++++++++++++++++++++++++++++++++++++++++ pkg/vere/blob.h | 22 ++++++ pkg/vere/disk.c | 80 ++++++++++++++++----- pkg/vere/lord.c | 83 ++++++++++++++++++++++ pkg/vere/mars.c | 51 ++++++++++++++ pkg/vere/vere.h | 20 +++++- 11 files changed, 461 insertions(+), 36 deletions(-) diff --git a/pkg/noun/allocate.c b/pkg/noun/allocate.c index 8b7d63edaf..bde4814c14 100644 --- a/pkg/noun/allocate.c +++ b/pkg/noun/allocate.c @@ -905,14 +905,6 @@ _me_lose_north(u3_noun dog) } } else { - // notify blob store when a bob atom is freed - // - if ( (c3y == u3a_is_bob(dog)) - && (u3C.bob_free_f) ) - { - u3a_atom* atm_u = (u3a_atom*)box_u; - u3C.bob_free_f(atm_u->mug_h, atm_u->buf_w[0]); - } u3a_wfree(box_u); } } @@ -952,14 +944,6 @@ _me_lose_south(u3_noun dog) } } else { - // notify blob store when a bob atom is freed - // - if ( (c3y == u3a_is_bob(dog)) - && (u3C.bob_free_f) ) - { - u3a_atom* atm_u = (u3a_atom*)box_u; - u3C.bob_free_f(atm_u->mug_h, atm_u->buf_w[0]); - } u3a_wfree(box_u); } } diff --git a/pkg/noun/allocate.h b/pkg/noun/allocate.h index c0e88fae1c..35c8e1a0d5 100644 --- a/pkg/noun/allocate.h +++ b/pkg/noun/allocate.h @@ -162,6 +162,20 @@ u3_noun tel; } u3a_cell; + /* u3a_blob: loom-resident metadata for a committed blob. + ** + ** use_w: refcount from event-log refs + active leases. + ** Independent from u3a_atom.use_w (noun refcount). + ** A blob file is deleted when both use_w == 0 AND + ** no live bob atoms in the loom point to it. + */ + typedef struct __attribute__((aligned(4))) { + c3_w use_w; // refcount: event-log refs + active leases + c3_h mug_h; // 31-bit content mug (= bucket dir name) + c3_w seq_w; // sequence number within bucket + c3_d siz_d; // byte size of blob file + } u3a_blob; + STATIC_ASSERT( (((c3_w)1) << u3a_min_log) == u3a_minimum, "log2 minimum allocation" ); STATIC_ASSERT( u3a_vits <= u3a_min_log, diff --git a/pkg/noun/manage.c b/pkg/noun/manage.c index 8dec6574e2..8d52dde9e5 100644 --- a/pkg/noun/manage.c +++ b/pkg/noun/manage.c @@ -529,6 +529,12 @@ _pave_parts(void) u3R->lop_p = u3h_new(); u3R->tim = u3_nul; u3R->how.fag_w = 0; + + // initialize blob bank HAMTs + // + u3H->ban_u.blb_p = u3h_new(); + u3H->ban_u.res_p = u3h_new(); + u3H->ban_u.nxt_d = 0; } static c3_d @@ -672,6 +678,15 @@ _find_home(void) if (!u3R->lop_p) { u3R->lop_p = u3h_new(); } + + // lazy-init blob bank HAMTs (zero if snapshot predates blob store) + // + if ( !u3H->ban_u.blb_p ) { + u3H->ban_u.blb_p = u3h_new(); + } + if ( !u3H->ban_u.res_p ) { + u3H->ban_u.res_p = u3h_new(); + } } /* u3m_pave(): instantiate or activate image. diff --git a/pkg/noun/options.h b/pkg/noun/options.h index aff1f72765..298a02d674 100644 --- a/pkg/noun/options.h +++ b/pkg/noun/options.h @@ -23,7 +23,6 @@ void (*slog_f)(u3_noun); // function pointer for slog void (*sign_hold_f)(void); // suspend system signal regime void (*sign_move_f)(void); // restore system signal regime - void (*bob_free_f)(c3_h, c3_w); // blob freed: mug_h, seq_w } u3o_config; /* u3o_flag: process/system flags. diff --git a/pkg/noun/vortex.h b/pkg/noun/vortex.h index d5bdae2b74..cef0c17ffe 100644 --- a/pkg/noun/vortex.h +++ b/pkg/noun/vortex.h @@ -18,6 +18,34 @@ u3_noun yot; // cached gates } u3v_arvo; + /* u3v_lease: active staging reservation in the blob store. + ** + ** Created when Mars installs a blob (receives %blob-install). + ** Holds one u3a_blob.use_w ref until the owning event is committed + ** to the event log (at which point the ref becomes an event-log ref), + ** or until the lease expires (TTL). + */ + typedef struct _u3v_lease { + c3_d res_d; // reservation id (monotonic counter) + c3_d exp_d; // expiry time (Unix ms); 0 = no expiry + c3_h mug_h; // blob mug + c3_w seq_w; // blob seq within mug bucket + c3_c stg_c[4096]; // staging path that was installed (for logging) + } u3v_lease; + + /* u3v_bank: loom-resident blob bank. + ** + ** Lives in u3v_home, checkpointed in image.bin. + ** blb_p: HAMT mapping blob_id (u64 = mug<<32|seq) -> u3a_blob loom offset + ** res_p: HAMT mapping res_id (u64) -> u3v_lease loom offset + ** nxt_d: monotonic reservation counter + */ + typedef struct _u3v_bank { + u3p(u3h_root) blb_p; // blob_id -> u3a_blob* + u3p(u3h_root) res_p; // res_id -> u3v_lease* + c3_d nxt_d; // next reservation id + } u3v_bank; + /* u3v_home: all internal (within image) state. ** NB: version must first for ease of migration. */ @@ -25,6 +53,7 @@ u3v_version ver_d; // version number c3_d pam_d; // parameters u3v_arvo arv_u; // arvo state + u3v_bank ban_u; // blob bank u3a_road rod_u; // storage state } u3v_home; diff --git a/pkg/vere/blob.c b/pkg/vere/blob.c index 697dcadb7a..1278200aaa 100644 --- a/pkg/vere/blob.c +++ b/pkg/vere/blob.c @@ -3,6 +3,7 @@ #include "blob.h" #include "vere.h" +#include #include #include #include @@ -64,6 +65,57 @@ u3_blob_init(const c3_c* pax_c) } } +/* _blob_stg_dir(): write path to $pier/.urb/bob/stg/ into [out_c]. +*/ +static void +_blob_stg_dir(c3_c* out_c, const c3_c* pax_c) +{ + snprintf(out_c, 8192, "%s/.urb/bob/stg", pax_c); +} + +/* _blob_stg_rm_rf(): recursively delete all files inside staging dir. +** +** Only removes regular files, not subdirectories. Staging should +** only ever contain flat temp files so this is sufficient. +*/ +static void +_blob_stg_clean(const c3_c* stg_c) +{ + DIR* dir_u = opendir(stg_c); + if ( !dir_u ) { + return; + } + struct dirent* ent_u; + while ( (ent_u = readdir(dir_u)) ) { + if ( '.' == ent_u->d_name[0] ) { + continue; + } + c3_c fil_c[8192]; + snprintf(fil_c, sizeof(fil_c), "%s/%s", stg_c, ent_u->d_name); + c3_unlink(fil_c); + } + closedir(dir_u); +} + +/* u3_blob_stg_init(): initialize staging area; create/clean .urb/bob/stg/. +*/ +void +u3_blob_stg_init(const c3_c* pax_c) +{ + c3_c stg_c[8192]; + _blob_stg_dir(stg_c, pax_c); + + if ( 0 != c3_mkdir(stg_c, 0700) && EEXIST != errno ) { + fprintf(stderr, "blob: failed to create staging dir %s: %s\r\n", + stg_c, strerror(errno)); + return; + } + + // clean any leftover temp files from a prior crash + // + _blob_stg_clean(stg_c); +} + /* _blob_lock_acquire(): acquire mug bucket lock, return next seq number. ** ** Creates the mug directory and lockfile if needed. @@ -388,3 +440,117 @@ u3_blob_delete(const c3_c* pax_c, c3_h mug_h, c3_w seq_w) fil_c, strerror(errno)); } } + +/* u3_blob_install_stg(): install a staging file into the blob store. +** +** [stg_c] is the path to a temp file under $pier/.urb/bob/stg/. +** Computes the mug of its content, checks for duplicates, then either +** renames the staging file into bob// (no dup) or unlinks it +** (dup found). On success sets *mug_h and *seq_w. +** +** The staging file is always consumed (renamed or unlinked) on success. +** On failure the staging file is left in place. +*/ +c3_o +u3_blob_install_stg(const c3_c* pax_c, + const c3_c* stg_c, + c3_h* mug_h, + c3_w* seq_w) +{ + struct stat st_u; + if ( -1 == stat(stg_c, &st_u) ) { + fprintf(stderr, "blob: install_stg: stat failed on %s: %s\r\n", + stg_c, strerror(errno)); + return c3n; + } + + c3_d len_d = (c3_d)st_u.st_size; + + if ( 0 == len_d ) { + fprintf(stderr, "blob: install_stg: refusing empty staging file %s\r\n", + stg_c); + return c3n; + } + + c3_i fid_i = open(stg_c, O_RDONLY); + if ( -1 == fid_i ) { + fprintf(stderr, "blob: install_stg: open failed on %s: %s\r\n", + stg_c, strerror(errno)); + return c3n; + } + + void* map_v = mmap(0, (size_t)len_d, PROT_READ, MAP_PRIVATE, fid_i, 0); + close(fid_i); + + if ( MAP_FAILED == map_v ) { + fprintf(stderr, "blob: install_stg: mmap failed on %s: %s\r\n", + stg_c, strerror(errno)); + return c3n; + } + madvise(map_v, (size_t)len_d, MADV_SEQUENTIAL); + + *mug_h = _blob_mug((const c3_y*)map_v, len_d); + + // acquire mug-bucket lock and get next sequence number + // + c3_w nex_w = _blob_lock_acquire(pax_c, *mug_h); + if ( 0 == nex_w ) { + munmap(map_v, (size_t)len_d); + return c3n; + } + + // check for duplicate content + // + c3_w dup_w = _blob_dedup(pax_c, *mug_h, nex_w, + (const c3_y*)map_v, len_d); + munmap(map_v, (size_t)len_d); + + if ( 0 != dup_w ) { + // duplicate found — consume staging file and return existing seq + // + c3_unlink(stg_c); + *seq_w = dup_w; + return c3y; + } + + // rename staging file into final location + // + c3_c dst_c[8192]; + u3_blob_path(dst_c, pax_c, *mug_h, nex_w); + + if ( 0 != rename(stg_c, dst_c) ) { + // rename can fail cross-device; fall back to copy-and-unlink + // + c3_i src_i = open(stg_c, O_RDONLY); + c3_i dst_i = open(dst_c, O_WRONLY | O_CREAT | O_EXCL, 0400); + if ( -1 == src_i || -1 == dst_i ) { + fprintf(stderr, "blob: install_stg: rename+fallback failed on %s: %s\r\n", + stg_c, strerror(errno)); + if ( -1 != src_i ) close(src_i); + if ( -1 != dst_i ) { close(dst_i); unlink(dst_c); } + return c3n; + } + + // copy in chunks + // + c3_y buf_y[65536]; + ssize_t red_i; + while ( (red_i = read(src_i, buf_y, sizeof(buf_y))) > 0 ) { + if ( write(dst_i, buf_y, (size_t)red_i) != red_i ) { + fprintf(stderr, "blob: install_stg: copy write failed: %s\r\n", + strerror(errno)); + close(src_i); + close(dst_i); + unlink(dst_c); + return c3n; + } + } + fsync(dst_i); + close(src_i); + close(dst_i); + c3_unlink(stg_c); + } + + *seq_w = nex_w; + return c3y; +} diff --git a/pkg/vere/blob.h b/pkg/vere/blob.h index c5601bdc59..6dd3783955 100644 --- a/pkg/vere/blob.h +++ b/pkg/vere/blob.h @@ -24,6 +24,15 @@ void u3_blob_init(const c3_c* pax_c); + /* u3_blob_stg_init(): initialize staging area; create .urb/bob/stg/ if needed. + ** + ** The staging dir holds mkstemp(3) temp files written by Earth before + ** they are handed to Mars for rename(2) into the final bob// + ** location. Cleaned (emptied) on every boot. + */ + void + u3_blob_stg_init(const c3_c* pax_c); + /* u3_blob_save(): write bytes to blob store. ** ** Deduplicates within the mug bucket (byte-for-byte comparison). @@ -68,6 +77,19 @@ void u3_blob_delete(const c3_c* pax_c, c3_h mug_h, c3_w seq_w); + /* u3_blob_install_stg(): install a staging file into the blob store. + ** + ** [stg_c] is the path of a temp file in $pier/.urb/bob/stg/. + ** Computes mug, deduplicates, then rename(2)s into bob//. + ** The staging file is always consumed on success. + ** On success, returns c3y and sets *mug_h and *seq_w. + */ + c3_o + u3_blob_install_stg(const c3_c* pax_c, + const c3_c* stg_c, + c3_h* mug_h, + c3_w* seq_w); + /* u3_blob_path(): write filesystem path for a blob into [out_c]. ** ** [out_c] must be at least 8192 bytes. diff --git a/pkg/vere/disk.c b/pkg/vere/disk.c index 75fabef369..ac9dd7506d 100644 --- a/pkg/vere/disk.c +++ b/pkg/vere/disk.c @@ -6,6 +6,7 @@ #include "version.h" #include "db/lmdb.h" #include "blob.h" +#include #include #include "migrate.h" @@ -119,19 +120,6 @@ _disk_commit_start(u3_disk* log_u) _disk_commit_after_cb); } -/* _disk_bob_free_cb(): called by noun allocator when a bob atom is freed. -** -** Deletes the blob file from the store. The pier path is read from -** u3C.dir_c, which is set before any nouns are allocated. -*/ -static void -_disk_bob_free_cb(c3_h mug_h, c3_w seq_w) -{ - if ( u3C.dir_c ) { - u3_blob_delete(u3C.dir_c, mug_h, seq_w); - } -} - /* u3_disk_etch(): serialize an event for persistence. RETAIN [eve] */ size_t @@ -1581,6 +1569,63 @@ u3_disk_chop(u3_disk* log_u, c3_d eve_d) // cleanup c3_free(sot_d); + // GC: sweep blob store for orphaned blobs (not in ban_u.blb_p) + // + // Any blob not registered in the bank is an orphan from a crashed + // or incomplete install. Safe to delete. + // + { + c3_c bob_c[8192]; + snprintf(bob_c, sizeof(bob_c), "%s/.urb/bob", log_u->dir_u->pax_c); + + DIR* top_u = opendir(bob_c); + if ( top_u ) { + struct dirent* mug_e; + while ( (mug_e = readdir(top_u)) ) { + if ( '.' == mug_e->d_name[0] || 0 == strcmp(mug_e->d_name, "stg") ) { + continue; + } + c3_h mug_h = (c3_h)strtoul(mug_e->d_name, 0, 10); + if ( 0 == mug_h ) { + continue; + } + + c3_c mug_c[8192]; + snprintf(mug_c, sizeof(mug_c), "%s/%s", bob_c, mug_e->d_name); + + DIR* bkt_u = opendir(mug_c); + if ( !bkt_u ) { + continue; + } + struct dirent* seq_e; + while ( (seq_e = readdir(bkt_u)) ) { + if ( '.' == seq_e->d_name[0] || 0 == strcmp(seq_e->d_name, "lock") ) { + continue; + } + c3_w seq_w = (c3_w)strtoul(seq_e->d_name, 0, 10); + if ( 0 == seq_w ) { + continue; + } + + c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; + u3_noun key = u3i_chub(bid_d); + u3_weak ref = u3h_get(u3H->ban_u.blb_p, key); + u3z(key); + + if ( u3_none == ref ) { + // orphan — delete + u3_blob_delete(log_u->dir_u->pax_c, mug_h, seq_w); + fprintf(stderr, "chop: gc: deleted orphan blob %" PRIc3_h + "/%" PRIc3_w "\r\n", mug_h, seq_w); + } + // else: ref > 0, keep it + } + closedir(bkt_u); + } + closedir(top_u); + } + } + // success fprintf(stderr, "chop: event log truncation complete\r\n"); } @@ -2220,9 +2265,10 @@ u3_disk_make(c3_c* pax_c) } } - // make $pier/.urb/bob (blob store) + // make $pier/.urb/bob (blob store) and .urb/bob/stg/ (staging area) // u3_blob_init(pax_c); + u3_blob_stg_init(pax_c); return c3y; } @@ -2292,13 +2338,11 @@ u3_disk_load(c3_c* pax_c, u3_disk_load_e lod_e) return 0; } - // initialize blob store (creates .urb/bob/ if needed) + // initialize blob store (creates .urb/bob/ if needed) and staging area // u3_blob_init(pax_c); + u3_blob_stg_init(pax_c); - // register blob-free callback so noun GC can delete orphaned blobs - // - u3C.bob_free_f = _disk_bob_free_cb; // XX move this into u3_disk_make // diff --git a/pkg/vere/lord.c b/pkg/vere/lord.c index a9a72326d8..be400edf92 100644 --- a/pkg/vere/lord.c +++ b/pkg/vere/lord.c @@ -85,6 +85,10 @@ _lord_writ_free(u3_writ* wit_u) case u3_writ_peek: { u3z(wit_u->pek_u->sam); } break; + + case u3_writ_blob: { + c3_free(wit_u->blb_u.pax_c); + } break; } c3_free(wit_u); @@ -156,6 +160,7 @@ _lord_writ_str(u3_writ_type typ_e) case u3_writ_live: return "live"; case u3_writ_exit: return "exit"; case u3_writ_quiz: return "quiz"; + case u3_writ_blob: return "blob"; } } @@ -403,6 +408,55 @@ _lord_plea_quiz(u3_lord* god_u, u3_noun dat) wit_u->qiz_u.qiz_f(wit_u->qiz_u.qiz_m, wit_u->qiz_u.ptr_v, dat); } +/* _lord_plea_blob(): handle blob-ack / blob-nack from serf. +** +** Expected dat: [c3y mug seq] on success, [c3n reason] on failure. +*/ +static void +_lord_plea_blob(u3_lord* god_u, u3_noun dat) +{ + u3_writ* wit_u = _lord_writ_need(god_u, u3_writ_blob); + + if ( !wit_u ) { + u3z(dat); + return; + } + + void* ptr_v = wit_u->blb_u.ptr_v; + void (*fun_f)(void*, c3_h, c3_w, c3_o) = wit_u->blb_u.fun_f; + c3_free(wit_u->blb_u.pax_c); + c3_free(wit_u); + + if ( c3n == u3a_is_cell(dat) ) { + // malformed response — treat as nack + // + if ( fun_f ) fun_f(ptr_v, 0, 0, c3n); + u3z(dat); + return; + } + + if ( c3y == u3h(dat) ) { + // [c3y mug seq] + // + u3_noun mug_a, seq_a; + c3_h mug_h = 0; + c3_w seq_w = 0; + + if ( (c3y == u3r_cell(u3t(dat), &mug_a, &seq_a)) ) { + u3r_safe_half(mug_a, &mug_h); + u3r_safe_word(seq_a, &seq_w); + } + if ( fun_f ) fun_f(ptr_v, mug_h, seq_w, c3y); + } + else { + // [c3n reason] + // + if ( fun_f ) fun_f(ptr_v, 0, 0, c3n); + } + + u3z(dat); +} + /* _lord_work_spin(): update spinner if more work is in progress. */ static void @@ -577,6 +631,10 @@ _lord_on_plea(void* ptr_v, c3_d len_d, c3_y* byt_y) case c3__quiz: { _lord_plea_quiz(god_u, u3k(dat)); } + + case c3__blob: { + _lord_plea_blob(god_u, u3k(dat)); + } break; } u3z(jar); @@ -642,6 +700,14 @@ _lord_writ_make(u3_lord* god_u, u3_writ* wit_u) case u3_writ_exit: { msg = u3nc(c3__exit, u3_nul); } break; + + case u3_writ_blob: { + // [%blob path-atom] — path is a null-terminated C string + // + msg = u3nc(c3__blob, + u3i_bytes(strlen(wit_u->blb_u.pax_c), + (const c3_y*)wit_u->blb_u.pax_c)); + } break; } return msg; @@ -762,6 +828,23 @@ u3_lord_work(u3_lord* god_u, u3_ovum* egg_u, u3_noun job) _lord_send(god_u, _lord_writ_make(god_u, wit_u)); } +/* u3_lord_blob_install(): request Mars install a staged blob file. +*/ +void +u3_lord_blob_install(u3_lord* god_u, + c3_c* pax_c, + void* ptr_v, + void (*fun_f)(void*, c3_h, c3_w, c3_o)) +{ + u3_writ* wit_u = _lord_writ_new(god_u); + wit_u->typ_e = u3_writ_blob; + wit_u->blb_u.pax_c = pax_c; + wit_u->blb_u.ptr_v = ptr_v; + wit_u->blb_u.fun_f = fun_f; + + _lord_writ_send(god_u, wit_u); +} + /* u3_lord_save(): save a snapshot. */ c3_o diff --git a/pkg/vere/mars.c b/pkg/vere/mars.c index c476b557c1..f36707a178 100644 --- a/pkg/vere/mars.c +++ b/pkg/vere/mars.c @@ -11,6 +11,7 @@ #include "ivory.h" #include "ur/ur.h" #include "db/lmdb.h" +#include "blob.h" #include #include @@ -724,6 +725,56 @@ _mars_work(u3_mars* mar_u, u3_noun jar) u3z(jar); mar_u->sat_e = u3_mars_exit_e; } break; + + case c3__blob: { + // [%blob path-atom] — install staging file from king + // + c3_h mug_h = 0; + c3_w seq_w = 0; + c3_o ok_o = c3n; + + // extract path string from atom + // + c3_d len_d = u3r_met(3, dat); + if ( len_d > 0 && len_d < 8192 ) { + c3_c stg_c[8192] = {0}; + u3r_bytes(0, (c3_w)len_d, (c3_y*)stg_c, dat); + + ok_o = u3_blob_install_stg(u3C.dir_c, stg_c, &mug_h, &seq_w); + + if ( c3y == ok_o ) { + // record in blob bank with initial refcount=1 + // + c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; + u3_noun key = u3i_chub(bid_d); + u3_weak old = u3h_get(u3H->ban_u.blb_p, key); + if ( u3_none == old ) { + // new blob — insert with refcount 1 + u3h_put(u3H->ban_u.blb_p, key, u3i_word(1)); + } + else { + // duplicate install — bump refcount + c3_w ref_w = 0; + u3r_safe_word(old, &ref_w); + u3h_put(u3H->ban_u.blb_p, key, u3i_word(ref_w + 1)); + } + u3z(key); + } + } + else { + fprintf(stderr, "mars: blob: bad path atom len %" PRIu64 "\r\n", len_d); + } + + u3z(jar); + + if ( c3y == ok_o ) { + _mars_gift(mar_u, u3nt(c3__blob, c3y, + u3nc(u3i_word(mug_h), u3i_word(seq_w)))); + } + else { + _mars_gift(mar_u, u3nc(c3__blob, c3n)); + } + } break; } return c3y; diff --git a/pkg/vere/vere.h b/pkg/vere/vere.h index e8389ab58f..564d4b532b 100644 --- a/pkg/vere/vere.h +++ b/pkg/vere/vere.h @@ -451,7 +451,8 @@ u3_writ_peek = 1, u3_writ_live = 2, u3_writ_exit = 3, - u3_writ_quiz = 4 + u3_writ_quiz = 4, + u3_writ_blob = 5 // blob-install request } u3_writ_type; /* u3_writ: ipc message from urth to mars @@ -470,6 +471,11 @@ void* ptr_v; // driver void (*qiz_f)(c3_m, void*, u3_noun); // callback } qiz_u; // + struct { // blob-install: + c3_c* pax_c; // staging path (heap-alloc'd) + void* ptr_v; // callback context + void (*fun_f)(void*, c3_h, c3_w, c3_o); // ack cb(ctx, mug, seq, ok) + } blb_u; // }; } u3_writ; @@ -1042,6 +1048,18 @@ void u3_lord_peek(u3_lord* god_u, u3_pico* pic_u); + /* u3_lord_blob_install(): request Mars install a staged blob file. + ** + ** [pax_c] is the path to a temp file in $pier/.urb/bob/stg/. + ** When Mars finishes (mug+dedup+rename), it calls fun_f(ptr_v, mug, seq, ok). + ** Ownership of [pax_c] passes to the writ; do not free it. + */ + void + u3_lord_blob_install(u3_lord* god_u, + c3_c* pax_c, + void* ptr_v, + void (*fun_f)(void*, c3_h, c3_w, c3_o)); + /** Filesystem (async). **/ /* u3_foil_folder(): load directory, blockingly. create if nonexistent. From 89f6d15a398577429b5dd209da841a0a79fd143c Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Wed, 1 Apr 2026 19:52:31 -0500 Subject: [PATCH 04/31] wip: blob 4 --- pkg/noun/allocate.c | 139 ++++++++++++++++++++++++++++++++ pkg/noun/allocate.h | 13 +++ pkg/noun/imprison.c | 13 +++ pkg/noun/manage.c | 61 ++++++++++++++ pkg/noun/retrieve.c | 183 ++++++++++++++++++++++++++++++++++++++++-- pkg/noun/retrieve.h | 24 ++++++ pkg/noun/serial.c | 91 ++++++++++++++++++--- pkg/noun/version.h | 2 +- pkg/vere/blob.c | 102 +++++++++++++++++++++++ pkg/vere/blob.h | 23 ++++++ pkg/vere/disk.c | 103 ++++++++++++++---------- pkg/vere/io/unix.c | 173 ++++++++++++++++++++++++++++++++++----- pkg/vere/newt_tests.c | 22 ++++- 13 files changed, 867 insertions(+), 82 deletions(-) diff --git a/pkg/noun/allocate.c b/pkg/noun/allocate.c index bde4814c14..9399887b74 100644 --- a/pkg/noun/allocate.c +++ b/pkg/noun/allocate.c @@ -2056,3 +2056,142 @@ void u3a_loom_sane(void) { } + +/* _u3a_bid_cmp(): qsort comparator for c3_d blob IDs. +*/ +static int +_u3a_bid_cmp(const void* a_v, const void* b_v) +{ + c3_d a_d = *(const c3_d*)a_v; + c3_d b_d = *(const c3_d*)b_v; + return (a_d > b_d) - (a_d < b_d); +} + +/* u3a_find_bobs(): scan the home road heap for bob atoms. +** +** Walks the page directory of u3R (must be the home road) scanning all +** chunk-allocated hunk pages. For each allocated chunk large enough to +** hold a u3a_atom, checks whether the len_w field has u3a_blob_flag set. +** If so, extracts mug_h and seq_w and packs them into a c3_d blob ID +** (mug_h in high 32 bits, seq_w in low 32 bits). +** +** Returns a malloc'd sorted array of blob IDs; sets [*out_z] to the count. +** Caller must c3_free() the array. +** +** This is an O(heap_pages) scan, much faster than u3a_walk_fore on the +** full noun tree. Used by the epoch-chop GC to rebuild the live blob set +** without touching the loom allocator (safe when loom may be undersized). +*/ +c3_d* +u3a_find_bobs(c3_z* out_z) +{ + // word offset of len_w within u3a_atom (= c3_wiseof(u3a_noun)) + // + c3_w len_off_w = c3_wiseof(u3a_noun); + + // exact chunk size (in words) of a bob atom: header + 1 data word. + // bob atoms always occupy this exact size class and no other. + // + c3_w bob_wor_w = c3_wiseof(u3a_atom) + 1; + + u3p(u3a_crag)* dir_u = u3to(u3p(u3a_crag), HEAP.pag_p); + + // cap scan to pages actually mapped in the current loom allocation + // (HEAP.len_w reflects the loom size at snapshot time, which may be + // larger than the loom mapped by the current process, e.g. when + // `urbit chop` runs with the default --loom 32 on a pier created with + // a larger loom) + // + c3_w map_w = (c3_w)(u3C.wor_i >> u3a_page); + c3_w lim_w = ( HEAP.len_w < map_w ) ? HEAP.len_w : map_w; + + if ( HEAP.len_w > map_w ) { + fprintf(stderr, "warn: loom smaller than snapshot (%" PRIc3_w " > %" PRIc3_w " pages); " + "blob GC may miss blobs — run with matching --loom\r\n", + HEAP.len_w, map_w); + } + + // dynamic C-heap array; grows as needed + // + c3_z cap_z = 64; + c3_z len_z = 0; + c3_d* ids_d = c3_malloc(cap_z * sizeof(c3_d)); + + for ( c3_w pag_w = 0; pag_w < lim_w; pag_w++ ) { + u3_post dir_p = dir_u[pag_w]; + + // skip free and large (multi-page) allocations — bob atoms are small + // + if ( dir_p <= u3a_rest_pg ) { + continue; + } + + // hunk page: dir_p is the crag post + // + u3a_crag* pag_u = u3to(u3a_crag, dir_p); + c3_s log_s = pag_u->log_s; + + // bob atoms are always exactly bob_wor_w = (c3_wiseof(u3a_atom) + 1) words. + // that falls into the unique size class whose chunk size equals bob_wor_w. + // skip all other size classes to eliminate false positives from cells, + // HAMT nodes, or any other allocation whose word-2 happens to equal + // (u3a_blob_flag | 1) by coincidence. + // + if ( (c3_w)1 << log_s != bob_wor_w ) { + continue; + } + + c3_s tot_s = u3a_Hunk[log_s - u3a_min_log].tot_s; + c3_s hun_s = u3a_Hunk[log_s - u3a_min_log].hun_s; + + u3_post pag_p = page_to_post(pag_w); + + for ( c3_s pos_s = hun_s; pos_s < tot_s; pos_s++ ) { + // bitmap: bit SET means FREE, bit CLEAR means ALLOCATED + // + c3_w blk_w = pos_s >> u3a_word_bits_log; + c3_w bit_w = (c3_w)1 << (pos_s & (u3a_word_bits - 1)); + + if ( pag_u->map_w[blk_w] & bit_w ) { + continue; // free chunk + } + + // allocated chunk — probe len_w at the expected offset + // + u3_post hun_p = pag_p + ((c3_w)pos_s << log_s); + c3_w* raw_w = u3to(c3_w, hun_p); + + // check len_w for bob atom: must be exactly (u3a_blob_flag | 1) + // (1 data word + flag). This exact-match avoids false positives + // from cells whose hed/tel fields happen to have the high bit set. + // + if ( raw_w[len_off_w] != (u3a_blob_flag | (c3_w)1) ) { + continue; + } + + // it's a bob atom: extract mug and seq + // + u3a_atom* atm_u = (u3a_atom*)raw_w; + c3_h mug_h = atm_u->mug_h; + c3_w seq_w = (c3_w)atm_u->buf_w[0]; + c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; + + // grow array if needed + // + if ( len_z == cap_z ) { + cap_z *= 2; + ids_d = c3_realloc(ids_d, cap_z * sizeof(c3_d)); + } + ids_d[len_z++] = bid_d; + } + } + + // sort for bsearch + // + if ( len_z > 1 ) { + qsort(ids_d, len_z, sizeof(c3_d), _u3a_bid_cmp); + } + + *out_z = len_z; + return ids_d; +} diff --git a/pkg/noun/allocate.h b/pkg/noun/allocate.h index 35c8e1a0d5..e6f2816fda 100644 --- a/pkg/noun/allocate.h +++ b/pkg/noun/allocate.h @@ -972,6 +972,19 @@ u3a_dash(void); void (*pat_f)(u3_atom, void*), c3_o (*cel_f)(u3_noun, void*)); + /* u3a_find_bobs(): scan the home road heap for bob atoms. + ** + ** O(heap_pages) scan via the page directory — much faster than + ** u3a_walk_fore for the purpose of rebuilding the blob GC live-set. + ** Must be called with u3R == home road. + ** + ** Returns a malloc'd, sorted array of blob IDs (each a c3_d packing + ** mug_h in the high 32 bits and seq_w in the low 32 bits). + ** Sets [*out_z] to the count. Caller must c3_free() the array. + */ + c3_d* + u3a_find_bobs(c3_z* out_z); + /* u3a_string(): `a` as an on-loom c-string. */ c3_c* diff --git a/pkg/noun/imprison.c b/pkg/noun/imprison.c index 63990f4909..5ad6344821 100644 --- a/pkg/noun/imprison.c +++ b/pkg/noun/imprison.c @@ -572,6 +572,19 @@ u3i_vint(u3_noun a) return u3m_bail(c3__exit); } else { + // bob atoms must be materialized before incrementing: + // pug_u->len_w carries u3a_blob_flag and buf_w[0] is a seq number, + // not atom data. + // + if ( c3y == u3a_is_bob(a) ) { + u3_atom mat = u3r_blob_load(a, u3C.dir_c); + if ( u3_none == mat ) { + return u3m_bail(c3__fail); + } + u3z(a); + return u3i_vint(mat); + } + u3i_slab sab_u; u3i_slab_init(&sab_u, 0, u3r_met(0, a) + 1); diff --git a/pkg/noun/manage.c b/pkg/noun/manage.c index 8d52dde9e5..d811999dcd 100644 --- a/pkg/noun/manage.c +++ b/pkg/noun/manage.c @@ -6,6 +6,7 @@ #ifndef U3_OS_windows #include #endif +#include #include #include #if defined(U3_OS_osx) @@ -687,6 +688,66 @@ _find_home(void) if ( !u3H->ban_u.res_p ) { u3H->ban_u.res_p = u3h_new(); } + + // scan .urb/bob/ and register any blobs not already in ban_u.blb_p. + // + // Blobs written by Earth (e.g. boot-time unix.c large-file ingestion) + // before Mars existed are not tracked in the bank yet. Walk the + // two-level / tree and insert them with refcount 1 so that + // epoch-chop GC does not delete them as orphans. + // + if ( u3C.dir_c ) { + c3_c bob_c[8192]; + snprintf(bob_c, sizeof(bob_c), "%s/.urb/bob", u3C.dir_c); + + DIR* top_u = opendir(bob_c); + if ( top_u ) { + struct dirent* mug_e; + while ( (mug_e = readdir(top_u)) ) { + if ( '.' == mug_e->d_name[0] || 0 == strcmp(mug_e->d_name, "stg") ) { + continue; + } + + c3_h mug_h = (c3_h)strtoul(mug_e->d_name, 0, 10); + if ( 0 == mug_h ) { + continue; + } + + c3_c mug_c[8192]; + snprintf(mug_c, sizeof(mug_c), "%s/%s", bob_c, mug_e->d_name); + + DIR* bkt_u = opendir(mug_c); + if ( !bkt_u ) { + continue; + } + + struct dirent* seq_e; + while ( (seq_e = readdir(bkt_u)) ) { + if ( '.' == seq_e->d_name[0] || 0 == strcmp(seq_e->d_name, "lock") ) { + continue; + } + + c3_w seq_w = (c3_w)strtoul(seq_e->d_name, 0, 10); + if ( 0 == seq_w ) { + continue; + } + + c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; + u3_noun key = u3i_chub(bid_d); + u3_weak old = u3h_get(u3H->ban_u.blb_p, key); + + if ( u3_none == old ) { + // blob exists on disk but is not tracked — register with rc=1 + u3h_put(u3H->ban_u.blb_p, key, u3i_word(1)); + } + + u3z(key); + } + closedir(bkt_u); + } + closedir(top_u); + } + } } /* u3m_pave(): instantiate or activate image. diff --git a/pkg/noun/retrieve.c b/pkg/noun/retrieve.c index b6c57e9865..ba0e28d1eb 100644 --- a/pkg/noun/retrieve.c +++ b/pkg/noun/retrieve.c @@ -121,6 +121,10 @@ u3r_at(u3_atom a, u3_noun b) u3t_off(far_o); return u3_none; } + else if ( c3y == u3a_is_bob(a) ) { + u3t_off(far_o); + return u3_none; + } else { u3a_atom* a_u = u3a_to_ptr(a); c3_w len_w = a_u->len_w; @@ -793,6 +797,27 @@ u3r_nord(u3_noun a, return 2; } else { + // materialize bob atoms before comparing word buffers + // + if ( c3y == u3a_is_bob(a) ) { + u3_atom mat = u3r_blob_load(a, u3C.dir_c); + if ( u3_none == mat ) { + return 0; + } + u3_atom ret = u3r_nord(mat, b); + u3z(mat); + return ret; + } + if ( c3y == u3a_is_bob(b) ) { + u3_atom mat = u3r_blob_load(b, u3C.dir_c); + if ( u3_none == mat ) { + return 2; + } + u3_atom ret = u3r_nord(a, mat); + u3z(mat); + return ret; + } + u3a_atom* a_u = u3a_to_ptr(a); u3a_atom* b_u = u3a_to_ptr(b); @@ -1023,16 +1048,20 @@ u3r_met(c3_y a_y, daz_w = b; } else { - // materialize bob atoms before measuring + // bob atoms: use blob met (only reads last byte of file, no loom allocation) + // then convert the bit-count to the requested bloq unit [a_y] // if ( c3y == u3a_is_bob(b) ) { - u3_atom mat = u3r_blob_load(b, u3C.dir_c); - if ( u3_none == mat ) { + c3_d bit_d = u3r_blob_met(b); + if ( 0 == bit_d ) { + // failed to read or empty blob: bail return (c3_w)u3m_bail(c3__fail); } - c3_w ret_w = u3r_met(a_y, mat); - u3z(mat); - return ret_w; + // convert bit count to a_y-bloq count (rounding up), same as the + // formula below: (bit_d + ((1<> a_y + // + c3_d rnd_d = (c3_d)((1 << a_y) - 1); + return (c3_w)((bit_d + rnd_d) >> a_y); } u3a_atom* b_u = u3a_to_ptr(b); @@ -1265,6 +1294,19 @@ u3r_mp(mpz_t a_mp, _mpz_init_set_word(a_mp, b); } else { + // bob atoms must be materialized before import + // + if ( c3y == u3a_is_bob(b) ) { + u3_atom mat = u3r_blob_load(b, u3C.dir_c); + if ( u3_none == mat ) { + mpz_init(a_mp); + return; + } + u3r_mp(a_mp, mat); + u3z(mat); + return; + } + u3a_atom* b_u = u3a_to_ptr(b); c3_w len_w = b_u->len_w; c3_d bit_d = (c3_d)len_w << u3a_word_bits_log; @@ -1294,6 +1336,18 @@ u3r_short(c3_w a_w, if ( c3y == u3a_is_cat(b) ) wor_w = b; else { + // materialize bob atoms before extracting short + // + if ( c3y == u3a_is_bob(b) ) { + u3_atom mat = u3r_blob_load(b, u3C.dir_c); + if ( u3_none == mat ) { + return 0; + } + c3_s ret_s = u3r_short(a_w, mat); + u3z(mat); + return ret_s; + } + u3a_atom* b_u = u3a_to_ptr(b); c3_w nix_w = a_w >> u3a_word_words; @@ -1394,6 +1448,18 @@ u3r_chub(c3_w a_w, else return b; } else { + // materialize bob atoms before extracting chub + // + if ( c3y == u3a_is_bob(b) ) { + u3_atom mat = u3r_blob_load(b, u3C.dir_c); + if ( u3_none == mat ) { + return 0; + } + c3_d ret_d = u3r_chub(a_w, mat); + u3z(mat); + return ret_d; + } + u3a_atom* b_u = u3a_to_ptr(b); if ( a_w >= b_u->len_w ) { @@ -1849,6 +1915,18 @@ u3r_chop(c3_g met_g, src_w = &src; } else { + // bob atoms must be materialized before chopping + // + if ( c3y == u3a_is_bob(src) ) { + u3_atom mat = u3r_blob_load(src, u3C.dir_c); + if ( u3_none == mat ) { + return; + } + u3r_chop(met_g, fum_w, wid_w, tou_w, dst_w, mat); + u3z(mat); + return; + } + u3a_atom* src_u = u3a_to_ptr(src); u3_assert(u3_none != src); @@ -2398,3 +2476,96 @@ u3r_blob_load(u3_atom a, const c3_c* pax_c) return u3i_slab_mint_bytes(&sab_u); } + +/* u3r_blob_map(): mmap a bob atom's blob file for direct byte access. +** +** Returns a read-only pointer to [*len_d] bytes, or NULL on failure. +** Release with u3r_blob_unmap(ptr, *len_d) when done. +** Uses u3C.dir_c as the pier path. +** No loom allocation is performed. +*/ +const c3_y* +u3r_blob_map(u3_atom a, c3_d* len_d) +{ + u3_assert( c3y == u3a_is_bob(a) ); + + c3_h mug_h = u3a_bob_mug(a); + c3_w seq_w = u3a_bob_seq(a); + + c3_c fil_c[8192]; + snprintf(fil_c, sizeof(fil_c), "%s/.urb/bob/%" PRIc3_h "/%" PRIc3_w, + u3C.dir_c, mug_h, seq_w); + + struct stat st_u; + if ( -1 == stat(fil_c, &st_u) ) { + fprintf(stderr, "retrieve: blob_map: stat failed %s: %s\r\n", + fil_c, strerror(errno)); + return 0; + } + + *len_d = (c3_d)st_u.st_size; + if ( 0 == *len_d ) { + return 0; + } + + c3_i fid_i = open(fil_c, O_RDONLY); + if ( -1 == fid_i ) { + fprintf(stderr, "retrieve: blob_map: open failed %s: %s\r\n", + fil_c, strerror(errno)); + return 0; + } + + void* map_v = mmap(0, (size_t)*len_d, PROT_READ, MAP_PRIVATE, fid_i, 0); + close(fid_i); + + if ( MAP_FAILED == map_v ) { + fprintf(stderr, "retrieve: blob_map: mmap failed %s: %s\r\n", + fil_c, strerror(errno)); + return 0; + } + + return (const c3_y*)map_v; +} + +/* u3r_blob_unmap(): release a mapping from u3r_blob_map(). +*/ +void +u3r_blob_unmap(const c3_y* ptr_y, c3_d len_d) +{ + if ( ptr_y && len_d ) { + munmap((void*)ptr_y, (size_t)len_d); + } +} + +/* u3r_blob_met(): compute bit-length of a bob atom without materialization. +** +** Equivalent to u3r_met(0, materialized) but avoids loom allocation. +** Scans the last byte to strip trailing zeroes. +** Returns 0 on error. +*/ +c3_d +u3r_blob_met(u3_atom a) +{ + u3_assert( c3y == u3a_is_bob(a) ); + + c3_d len_d; + const c3_y* byt_y = u3r_blob_map(a, &len_d); + if ( !byt_y ) { + return 0; + } + + c3_d pos_d = len_d; + while ( pos_d > 0 && 0 == byt_y[pos_d - 1] ) { + pos_d--; + } + + c3_d met_d = 0; + if ( pos_d > 0 ) { + c3_y top_y = byt_y[pos_d - 1]; + c3_y clz_y = (c3_y)(__builtin_clz((unsigned int)top_y) - 24); + met_d = (pos_d - 1) * 8 + (c3_d)(8 - clz_y); + } + + u3r_blob_unmap(byt_y, len_d); + return met_d; +} diff --git a/pkg/noun/retrieve.h b/pkg/noun/retrieve.h index e8a7abb6da..01babe0a15 100644 --- a/pkg/noun/retrieve.h +++ b/pkg/noun/retrieve.h @@ -615,4 +615,28 @@ u3_weak u3r_blob_load(u3_atom a, const c3_c* pax_c); + /* u3r_blob_map(): mmap a bob atom's blob file for direct byte access. + ** + ** Returns a read-only pointer to [*len_d] bytes, or NULL on failure. + ** Release with u3r_blob_unmap(ptr, *len_d) when done. + ** Uses u3C.dir_c as the pier path. + ** No loom allocation is performed. + */ + const c3_y* + u3r_blob_map(u3_atom a, c3_d* len_d); + + /* u3r_blob_unmap(): release a mapping from u3r_blob_map(). + */ + void + u3r_blob_unmap(const c3_y* ptr_y, c3_d len_d); + + /* u3r_blob_met(): compute bit-length of a bob atom without materialization. + ** + ** Equivalent to u3r_met(0, materialized) but avoids loom allocation. + ** Scans the last byte to strip trailing zeroes. + ** Returns 0 on error. + */ + c3_d + u3r_blob_met(u3_atom a); + #endif /* ifndef U3_RETRIEVE_H */ diff --git a/pkg/noun/serial.c b/pkg/noun/serial.c index 594ebb49f3..5f68335d1f 100644 --- a/pkg/noun/serial.c +++ b/pkg/noun/serial.c @@ -89,7 +89,43 @@ _cs_jam_fib_mat(struct _cs_jam_fib* fib_u, u3_noun a) _cs_jam_fib_chop(fib_u, 1, 1); } else { - c3_w a_w = u3r_met(0, a); + // for bob atoms, mmap the blob file directly to avoid loom allocation. + // for normal atoms, use u3r_met as before. + // + c3_o bob_o = u3a_is_bob(a); + c3_d byt_d = 0; + const c3_y* byt_y = 0; + + c3_w a_w; + if ( c3y == bob_o ) { + byt_y = u3r_blob_map(a, &byt_d); + if ( !byt_y ) { + u3m_bail(c3__fail); + return; + } + // compute bit-length (met) from mmap'd bytes; strip trailing zero bytes + // and find the MSB position of the last non-zero byte. + // + { + c3_d pos_d = byt_d; + while ( pos_d > 0 && 0 == byt_y[pos_d - 1] ) { + pos_d--; + } + if ( 0 == pos_d ) { + // blob is all zeros → atom value is 0; treat as zero atom + u3r_blob_unmap(byt_y, byt_d); + _cs_jam_fib_chop(fib_u, 1, 1); + return; + } + c3_y top_y = byt_y[pos_d - 1]; + c3_y clz_y = (c3_y)(__builtin_clz((unsigned int)top_y) - 24); + a_w = (c3_w)((pos_d - 1) * 8 + (c3_d)(8 - clz_y)); + } + } + else { + a_w = u3r_met(0, a); + } + c3_w b_w = c3_bits_word(a_w); c3_w bit_w = fib_u->bit_w; @@ -99,6 +135,7 @@ _cs_jam_fib_mat(struct _cs_jam_fib* fib_u, u3_noun a) c3_w met_w = a_w + (2 * b_w); if ( a_w > (c3_w_max - 64) ) { + if ( byt_y ) u3r_blob_unmap(byt_y, byt_d); u3m_bail(c3__fail); return; } @@ -142,7 +179,16 @@ _cs_jam_fib_mat(struct _cs_jam_fib* fib_u, u3_noun a) // _cs_jam_fib_chop(fib_u, a_w, a); // - u3r_chop(0, 0, a_w, bit_w, buf_w, a); + if ( byt_y ) { + // write bob atom bytes directly from mmap, no loom allocation + // + c3_w len_w = (c3_w)((byt_d + sizeof(c3_w) - 1) / sizeof(c3_w)); + u3r_chop_words(0, 0, a_w, bit_w, buf_w, len_w, (const c3_w*)byt_y); + u3r_blob_unmap(byt_y, byt_d); + } + else { + u3r_chop(0, 0, a_w, bit_w, buf_w, a); + } } } } @@ -255,6 +301,16 @@ _cs_jam_bsw_atom(ur_bsw_t* rit_u, c3_w met_w, u3_atom a) // ur_bsw_atom64(rit_u, (c3_y)met_w, (c3_d)a); } + else if ( c3y == u3a_is_bob(a) ) { + // bob atom: mmap the blob file and write bytes directly into the bitstream + // + c3_d len_d; + const c3_y* byt_y = u3r_blob_map(a, &len_d); + if ( byt_y ) { + ur_bsw_atom_bytes(rit_u, (c3_d)met_w, (c3_y*)byt_y); + u3r_blob_unmap(byt_y, len_d); + } + } else { u3a_atom* vat_u = u3a_to_ptr(a); // XX assumes little-endian @@ -287,7 +343,12 @@ _cs_jam_xeno_atom(u3_atom a, void* ptr_v) _jam_xeno_t* jam_u = ptr_v; ur_bsw_t* rit_u = &(jam_u->rit_u); u3_weak bak = u3h_git(jam_u->har_p, a); - c3_w met_w = u3r_met(0, a); + // for bob atoms, use the blob met to avoid materializing the large atom; + // met_w must fit in 32 bits here (jam uses c3_w for bit lengths) + // + c3_w met_w = ( c3y == u3a_is_bob(a) ) + ? (c3_w)u3r_blob_met(a) + : u3r_met(0, a); if ( u3_none == bak ) { u3h_put(jam_u->har_p, a, _cs_coin_chub(rit_u->bits)); @@ -902,11 +963,12 @@ u3s_cue_atom(u3_atom a) byt_y = (c3_y*)&a; } else { - u3a_atom* vat_u = u3a_to_ptr(a); - byt_y = (c3_y*)vat_u->buf_w; - } + u3_assert(c3n == u3a_is_bob(a)); + u3a_atom* vat_u = u3a_to_ptr(a); + byt_y = (c3_y*)vat_u->buf_w; + } - return u3s_cue_bytes((c3_d)len_w, byt_y); + return u3s_cue_bytes((c3_d)len_w, byt_y); } /* _cs_etch_ud_size(): output length in @ud for given mpz_t. @@ -1465,11 +1527,12 @@ u3s_sift_ud(u3_atom a) byt_y = (c3_y*)&a; } else { - u3a_atom* vat_u = u3a_to_ptr(a); - byt_y = (c3_y*)vat_u->buf_w; - } + u3_assert(c3n == u3a_is_bob(a)); + u3a_atom* vat_u = u3a_to_ptr(a); + byt_y = (c3_y*)vat_u->buf_w; + } - return u3s_sift_ud_bytes(len_w, byt_y); + return u3s_sift_ud_bytes(len_w, byt_y); } /* @@ -1574,9 +1637,11 @@ _cs_ram_xeno_atom(u3_atom a, void* ptr_v) ur_bsw_t* rit_u = &(ram_u->rit_u); u3_weak bak = u3h_git(ram_u->har_p, a); c3_o bob_o = u3a_is_bob(a); - // only compute met for non-bob atoms; bobs use mug+seq encoding + // for bob atoms, use the blob's true bit-length for backref comparison. + // for normal atoms, use u3r_met as before. // - c3_w met_w = (c3n == bob_o) ? u3r_met(0, a) : 0; + c3_w met_w = (c3n == bob_o) ? u3r_met(0, a) + : (c3_w)u3r_blob_met(a); if ( u3_none == bak ) { u3h_put(ram_u->har_p, a, _cs_coin_chub(rit_u->bits)); diff --git a/pkg/noun/version.h b/pkg/noun/version.h index 924ce78275..8753eee2bd 100644 --- a/pkg/noun/version.h +++ b/pkg/noun/version.h @@ -43,7 +43,7 @@ typedef c3_h u3e_version; */ #define U3E_VER1 1 // north+south.bin #define U3E_VER2 2 // image.bin -#define U3E_VER3 3 // image.bin + blobs.txt + ram events +#define U3E_VER3 3 // image.bin + ram events #define U3E_VERLAT U3E_VER3 #endif /* ifndef U3_VERSION_H */ diff --git a/pkg/vere/blob.c b/pkg/vere/blob.c index 1278200aaa..0271722a8e 100644 --- a/pkg/vere/blob.c +++ b/pkg/vere/blob.c @@ -554,3 +554,105 @@ u3_blob_install_stg(const c3_c* pax_c, *seq_w = nex_w; return c3y; } + +/* u3_blob_map(): mmap blob file for direct byte access. +*/ +const c3_y* +u3_blob_map(const c3_c* pax_c, c3_h mug_h, c3_w seq_w, c3_d* len_d) +{ + c3_c fil_c[8192]; + u3_blob_path(fil_c, pax_c, mug_h, seq_w); + + struct stat st_u; + if ( -1 == stat(fil_c, &st_u) ) { + fprintf(stderr, "blob: map: stat failed %s: %s\r\n", + fil_c, strerror(errno)); + return 0; + } + + *len_d = (c3_d)st_u.st_size; + if ( 0 == *len_d ) { + return 0; + } + + c3_i fid_i = open(fil_c, O_RDONLY); + if ( -1 == fid_i ) { + fprintf(stderr, "blob: map: open failed %s: %s\r\n", + fil_c, strerror(errno)); + return 0; + } + + void* map_v = mmap(0, (size_t)*len_d, PROT_READ, MAP_PRIVATE, fid_i, 0); + close(fid_i); + + if ( MAP_FAILED == map_v ) { + fprintf(stderr, "blob: map: mmap failed %s: %s\r\n", + fil_c, strerror(errno)); + return 0; + } + + return (const c3_y*)map_v; +} + +/* u3_blob_unmap(): release mapping returned by u3_blob_map(). +*/ +void +u3_blob_unmap(const c3_y* ptr_y, c3_d len_d) +{ + if ( ptr_y && len_d ) { + munmap((void*)ptr_y, (size_t)len_d); + } +} + +/* u3_blob_met(): compute bit-length of blob content without full materialization. +** +** Scans backward from end of file to find last non-zero byte, then +** returns (pos * 8 + 8 - clz(byte)). This matches u3r_met(0, atom). +** Returns 0 if blob is missing, empty, or all-zero bytes. +*/ +c3_d +u3_blob_met(const c3_c* pax_c, c3_h mug_h, c3_w seq_w) +{ + c3_c fil_c[8192]; + u3_blob_path(fil_c, pax_c, mug_h, seq_w); + + struct stat st_u; + if ( -1 == stat(fil_c, &st_u) || 0 == st_u.st_size ) { + return 0; + } + + c3_d len_d = (c3_d)st_u.st_size; + c3_i fid_i = open(fil_c, O_RDONLY); + if ( -1 == fid_i ) { + return 0; + } + + // mmap and scan backward for last non-zero byte (strips trailing zeroes) + // + void* map_v = mmap(0, (size_t)len_d, PROT_READ, MAP_PRIVATE, fid_i, 0); + close(fid_i); + if ( MAP_FAILED == map_v ) { + return 0; + } + + const c3_y* byt_y = (const c3_y*)map_v; + c3_d pos_d = len_d; + + while ( pos_d > 0 && 0 == byt_y[pos_d - 1] ) { + pos_d--; + } + + c3_d met_d = 0; + if ( pos_d > 0 ) { + c3_y top_y = byt_y[pos_d - 1]; + // bit count = (pos_d - 1) * 8 + (8 - count_of_leading_zeros_in_top_y) + // __builtin_clz operates on unsigned int (32 bits); subtract 24 to get + // the leading-zero count within just the low byte. + // + c3_y clz_y = (c3_y)(__builtin_clz((unsigned int)top_y) - 24); + met_d = (pos_d - 1) * 8 + (c3_d)(8 - clz_y); + } + + munmap(map_v, (size_t)len_d); + return met_d; +} \ No newline at end of file diff --git a/pkg/vere/blob.h b/pkg/vere/blob.h index 6dd3783955..db234f89e8 100644 --- a/pkg/vere/blob.h +++ b/pkg/vere/blob.h @@ -100,4 +100,27 @@ c3_h mug_h, c3_w seq_w); + /* u3_blob_map(): mmap a blob file for direct byte access. + ** + ** Returns a read-only pointer to the blob's bytes (length in *len_d), + ** or NULL on failure. The mapping must be released via u3_blob_unmap(). + ** No loom allocation is performed. + */ + const c3_y* + u3_blob_map(const c3_c* pax_c, c3_h mug_h, c3_w seq_w, c3_d* len_d); + + /* u3_blob_unmap(): release a mapping returned by u3_blob_map(). + */ + void + u3_blob_unmap(const c3_y* ptr_y, c3_d len_d); + + /* u3_blob_met(): compute the bit-length of a blob without full materialization. + ** + ** Equivalent to u3r_met(0, materialized_atom) but avoids loading the whole + ** blob into the loom. Reads only the file size and last byte. + ** Returns 0 on error (blob missing or empty). + */ + c3_d + u3_blob_met(const c3_c* pax_c, c3_h mug_h, c3_w seq_w); + #endif /* ifndef U3_VERE_BLOB_H */ diff --git a/pkg/vere/disk.c b/pkg/vere/disk.c index ac9dd7506d..fffe5a44a6 100644 --- a/pkg/vere/disk.c +++ b/pkg/vere/disk.c @@ -8,6 +8,7 @@ #include "blob.h" #include #include +#include #include "migrate.h" #ifdef VERE64 @@ -990,26 +991,6 @@ _disk_epoc_meta(u3_disk* log_u, return c3y; } -/* _disk_epoc_blobs_init(): create empty blobs.txt in epoch directory. -*/ -static c3_o -_disk_epoc_blobs_init(const c3_c* epo_c) -{ - c3_c blb_c[8193]; - snprintf(blb_c, sizeof(blb_c), "%s/blobs.txt", epo_c); - - // create empty blobs.txt (open for append, creating if needed) - c3_i fid_i = open(blb_c, O_WRONLY | O_CREAT | O_APPEND, 0600); - if ( -1 == fid_i ) { - fprintf(stderr, "disk: failed to create blobs.txt in %s: %s\r\n", - epo_c, strerror(errno)); - return c3n; - } - fsync(fid_i); - close(fid_i); - return c3y; -} - /* _disk_epoc_zero: make epoch zero. */ static c3_o @@ -1082,12 +1063,6 @@ _disk_epoc_zero(c3_c* pax_c) close(epo_i); #endif - // create empty blobs.txt for GC tracking - // - if ( c3n == _disk_epoc_blobs_init(epo_c) ) { - goto fail3; - } - // success return c3y; @@ -1226,12 +1201,6 @@ _disk_epoc_roll(u3_disk* log_u, c3_d epo_d) close(epo_i); #endif - // create empty blobs.txt for GC tracking - // - if ( c3n == _disk_epoc_blobs_init(epo_c) ) { - goto fail3; - } - fprintf(stderr, "disk: created epoch %" PRIc3_d "\r\n", epo_d); // load new epoch directory and set it in log_u @@ -1537,6 +1506,52 @@ _disk_vere_diff(u3_disk* log_u) return c3n; } +/* _disk_bid_cmp(): bsearch/qsort comparator for c3_d blob IDs. +*/ +static int +_disk_bid_cmp(const void* a_v, const void* b_v) +{ + c3_d a_d = *(const c3_d*)a_v; + c3_d b_d = *(const c3_d*)b_v; + return (a_d > b_d) - (a_d < b_d); +} + +/* _disk_blb_rebuild(): build a C-heap sorted array of live blob IDs. +** +** Scans the home road heap page directory for bob atoms — O(heap_pages), +** much faster than u3a_walk_fore on the full noun tree, and does NOT +** allocate in the loom (safe when the loom may be undersized at chop time). +** +** Filters the raw heap scan results by verifying each candidate blob file +** actually exists on disk, eliminating false positives from allocated chunks +** (cells, HAMT nodes, etc.) that happen to match the bob atom pattern. +** +** Returns a malloc'd sorted array of c3_d blob IDs (mug<<32|seq). +** Sets [*out_z] to the count. Caller must c3_free() the result. +*/ +static c3_d* +_disk_blb_rebuild(u3_disk* log_u, c3_z* out_z) +{ + c3_z raw_z = 0; + c3_d* raw_d = u3a_find_bobs(&raw_z); + + // filter: keep only candidates whose blob file actually exists + // + c3_z liv_z = 0; + for ( c3_z i_z = 0; i_z < raw_z; i_z++ ) { + c3_h mug_h = (c3_h)(raw_d[i_z] >> 32); + c3_w seq_w = (c3_w)(raw_d[i_z] & 0xFFFFFFFF); + if ( c3y == u3_blob_exists(log_u->dir_u->pax_c, mug_h, seq_w) ) { + raw_d[liv_z++] = raw_d[i_z]; + } + } + *out_z = liv_z; + + fprintf(stderr, "chop: gc: found %" PRIc3_z " live blob(s) in heap " + "(%" PRIc3_z " candidates)\r\n", liv_z, raw_z); + return raw_d; +} + /* u3_disk_chop(): delete all but the latest 2 epocs. */ void @@ -1569,10 +1584,15 @@ u3_disk_chop(u3_disk* log_u, c3_d eve_d) // cleanup c3_free(sot_d); - // GC: sweep blob store for orphaned blobs (not in ban_u.blb_p) + // build live blob set from heap scan (no loom allocation) + // + c3_z liv_z = 0; + c3_d* liv_d = _disk_blb_rebuild(log_u, &liv_z); + + // GC: sweep blob store for orphaned blobs (not in live set) // - // Any blob not registered in the bank is an orphan from a crashed - // or incomplete install. Safe to delete. + // Any blob not referenced by a live bob atom is an orphan. + // Safe to delete. // { c3_c bob_c[8192]; @@ -1608,17 +1628,16 @@ u3_disk_chop(u3_disk* log_u, c3_d eve_d) } c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; - u3_noun key = u3i_chub(bid_d); - u3_weak ref = u3h_get(u3H->ban_u.blb_p, key); - u3z(key); + c3_d* hit_d = bsearch(&bid_d, liv_d, liv_z, sizeof(c3_d), + _disk_bid_cmp); - if ( u3_none == ref ) { + if ( !hit_d ) { // orphan — delete u3_blob_delete(log_u->dir_u->pax_c, mug_h, seq_w); fprintf(stderr, "chop: gc: deleted orphan blob %" PRIc3_h "/%" PRIc3_w "\r\n", mug_h, seq_w); } - // else: ref > 0, keep it + // else: live blob, keep it } closedir(bkt_u); } @@ -1626,6 +1645,8 @@ u3_disk_chop(u3_disk* log_u, c3_d eve_d) } } + c3_free(liv_d); + // success fprintf(stderr, "chop: event log truncation complete\r\n"); } @@ -2076,7 +2097,7 @@ _disk_epoc_load(u3_disk* log_u, c3_d lat_d, u3_disk_load_e lod_e) } break; case U3E_VER3: { - // VER3 is the current epoch format (image.bin + blobs.txt + ram events). + // VER3 is the current epoch format (image.bin + ram events). // Load path is identical to VER2; no migration needed. // Fall through to VER2 handling. // diff --git a/pkg/vere/io/unix.c b/pkg/vere/io/unix.c index dc72ed3554..dbc99c11a3 100644 --- a/pkg/vere/io/unix.c +++ b/pkg/vere/io/unix.c @@ -106,6 +106,18 @@ struct _u3_ufil; #endif } u3_unix; +/* u3_unix_bob_ctx: callback context for async large-file blob install. +*/ + typedef struct _u3_unix_bob_ctx { + u3_unix* unx_u; // driver (for u3_auto_plan and sev_h) + u3_ufil* fil_u; // file node (for gum_w update) + c3_h old_w; // old gum_w (for change detection) + c3_c* mnt_c; // mount point name (strdup'd, freed on callback) + u3_noun pax; // file path noun (owned) + u3_noun mim; // mime type noun (owned) + c3_ws len_ws; // file byte length + } u3_unix_bob_ctx; + void u3_unix_ef_look(u3_unix* unx_u, u3_noun mon, u3_noun all); @@ -944,6 +956,57 @@ _unix_create_dir(u3_udir* dir_u, u3_udir* par_u, u3_noun nam) static u3_noun _unix_update_node(u3_unix* unx_u, u3_unod* nod_u); +/* _unix_blob_install_cb(): callback fired when Mars installs a large file blob. +** +** Injects a separate %into event for the single large file. +** Skips injection if the installed mug matches the prior gum_w (no change). +*/ +static void +_unix_blob_install_cb(void* ptr_v, + c3_h mug_h, + c3_w seq_w, + c3_o ok_o) +{ + u3_unix_bob_ctx* ctx = ptr_v; + + if ( c3y == ok_o ) { + // skip if content unchanged since last %into + // + if ( mug_h == ctx->old_w ) { + u3z(ctx->pax); + u3z(ctx->mim); + } + else { + // update the file node's checksum + // + if ( ctx->fil_u ) { + ctx->fil_u->gum_w = mug_h; + } + + u3_atom atm = u3i_blob(mug_h, seq_w); + u3_noun dat = u3nt(ctx->mim, (u3_atom)ctx->len_ws, atm); + u3_noun can = u3nc(u3nt(ctx->pax, u3_nul, dat), u3_nul); + u3_noun wir = u3nt(c3__sync, + u3dc("scot", c3__uv, ctx->unx_u->sev_h), + u3_nul); + u3_noun cad = u3nq(c3__into, + u3i_string(ctx->mnt_c), + c3n, + can); + + u3_auto_plan(&ctx->unx_u->car_u, u3_ovum_init(0, c3__c, wir, cad)); + } + } + else { + u3l_log("unix: blob install failed for large file in %s", ctx->mnt_c); + u3z(ctx->pax); + u3z(ctx->mim); + } + + c3_free(ctx->mnt_c); + c3_free(ctx); +} + /* _unix_update_file(): update file, producing list of changes ** ** when scanning through files, if dry, do nothing. otherwise, @@ -953,8 +1016,10 @@ static u3_noun _unix_update_node(u3_unix* unx_u, u3_unod* nod_u); ** gum_w, move on. otherwise, overwrite add path plus data to ** %into event. ** -** Files larger than U3_BLOB_THRESH are stored in the blob store -** and referenced by a bob atom instead of copying bytes into loom. +** Files larger than U3_BLOB_THRESH are staged to .urb/bob/stg/ and +** installed into the blob store via the Mars IPC path (%blob-install). +** The %into event for the large file is injected asynchronously from +** the blob-install callback; this function returns u3_nul for large files. */ static u3_noun _unix_update_file(u3_unix* unx_u, u3_ufil* fil_u) @@ -984,39 +1049,109 @@ _unix_update_file(u3_unix* unx_u, u3_ufil* fil_u) len_ws = buf_u.st_size; - // large files: stream into blob store, return bob atom + // large files: stage in .urb/bob/stg/, install via %blob-install IPC // if ( (c3_d)len_ws > U3_BLOB_THRESH ) { - c3_h bob_mug_h; - c3_w bob_seq_w; + // build staging path + // + c3_c stg_c[8192]; + snprintf(stg_c, sizeof(stg_c), "%s/.urb/bob/stg/unix-XXXXXX", + unx_u->pax_c); - c3_o ok_o = u3_blob_save_fd(unx_u->pax_c, fid_i, - (c3_d)len_ws, &bob_mug_h, &bob_seq_w); + c3_i stg_i = mkstemp(stg_c); + if ( stg_i < 0 ) { + u3l_log("unix: mkstemp failed for %s: %s", + fil_u->pax_c, strerror(errno)); + close(fid_i); + return u3_nul; + } - if ( close(fid_i) < 0 ) { - u3l_log("error closing file %s: %s", + // stream file to staging area in 64K chunks + // + c3_y buf_y[65536]; + c3_o ok_o = c3y; + ssize_t got_i; + while ( (got_i = read(fid_i, buf_y, sizeof(buf_y))) > 0 ) { + c3_y* ptr_y = buf_y; + ssize_t rem_i = got_i; + while ( rem_i > 0 ) { + ssize_t wrt_i = write(stg_i, ptr_y, (size_t)rem_i); + if ( wrt_i <= 0 ) { + u3l_log("unix: write to staging file failed: %s", strerror(errno)); + ok_o = c3n; + break; + } + ptr_y += wrt_i; + rem_i -= wrt_i; + } + if ( c3n == ok_o ) break; + } + + close(fid_i); + + if ( got_i < 0 ) { + u3l_log("unix: read from %s failed: %s", fil_u->pax_c, strerror(errno)); + ok_o = c3n; } if ( c3n == ok_o ) { - u3l_log("blob: failed to save large file %s", fil_u->pax_c); + close(stg_i); + c3_unlink(stg_c); return u3_nul; } - // skip if content unchanged (bob_mug_h is content mug) + fsync(stg_i); + close(stg_i); + + // find the mount name for this file (walk parent dirs up to mon_u) // - if ( bob_mug_h == fil_u->gum_w ) { + c3_c* mnt_c = 0; + { + u3_umon* mon_u; + for ( mon_u = unx_u->mon_u; mon_u; mon_u = mon_u->nex_u ) { + if ( 0 == strncmp(fil_u->pax_c, mon_u->dir_u.pax_c, + strlen(mon_u->dir_u.pax_c)) ) { + mnt_c = mon_u->nam_c; + break; + } + } + } + if ( !mnt_c ) { + u3l_log("unix: no mount found for large file %s", fil_u->pax_c); + c3_unlink(stg_c); return u3_nul; } - { - u3_noun pax = _unix_string_to_path(unx_u, fil_u->pax_c); - u3_noun mim = u3nt(c3__text, u3i_string("plain"), u3_nul); - u3_atom atm = u3i_blob(bob_mug_h, bob_seq_w); - u3_noun dat = u3nt(mim, (u3_atom)len_ws, atm); - - return u3nc(u3nt(pax, u3_nul, dat), u3_nul); + // allocate callback context + // + u3_unix_bob_ctx* ctx = c3_malloc(sizeof(*ctx)); + ctx->unx_u = unx_u; + ctx->fil_u = fil_u; + ctx->old_w = fil_u->gum_w; + ctx->mnt_c = strdup(mnt_c); + ctx->pax = _unix_string_to_path(unx_u, fil_u->pax_c); + ctx->mim = u3nt(c3__text, u3i_string("plain"), u3_nul); + ctx->len_ws = len_ws; + + // send to Mars for installation; stg_c ownership passes to lord + // + u3_lord* god_u = unx_u->car_u.pir_u->god_u; + if ( !god_u ) { + u3l_log("unix: no lord for blob install of %s", fil_u->pax_c); + c3_unlink(stg_c); + u3z(ctx->pax); + u3z(ctx->mim); + c3_free(ctx->mnt_c); + c3_free(ctx); + return u3_nul; } + + u3_lord_blob_install(god_u, strdup(stg_c), ctx, _unix_blob_install_cb); + + // return u3_nul here; the %into event is fired from the callback + // + return u3_nul; } // small files: existing path — read into buffer diff --git a/pkg/vere/newt_tests.c b/pkg/vere/newt_tests.c index 5236910b49..1b36e0a011 100644 --- a/pkg/vere/newt_tests.c +++ b/pkg/vere/newt_tests.c @@ -430,9 +430,9 @@ _test_newt_sick_vers(void) memset(&mot_u, 0, sizeof(u3_moat)); - // construct message with invalid version + // construct message with truly invalid version (0x02+) // - buf_y[0] = 0x1; // invalid version (should be 0x0) + buf_y[0] = 0x2; // invalid version (valid: 0x00=jam, 0x01=ram) buf_y[1] = 0x1; // length = 1 buf_y[2] = 0x0; buf_y[3] = 0x0; @@ -444,6 +444,24 @@ _test_newt_sick_vers(void) fprintf(stderr, "newt invalid version fail: should have rejected\n"); exit(1); } + + // version 0x01 (ram) should be accepted + // + memset(&mot_u, 0, sizeof(u3_moat)); + buf_y[0] = 0x1; // valid: ram version + buf_y[1] = 0x1; // length = 1 + buf_y[2] = 0x0; + buf_y[3] = 0x0; + buf_y[4] = 0x0; + if ( c3y != u3_newt_decode(&mot_u, buf_y, 5) ) { + fprintf(stderr, "newt ram version fail: should have accepted 0x01\n"); + exit(1); + } + // clean up allocated meat if any + // + if ( u3_mess_tail == mot_u.mes_u.sat_e ) { + c3_free(mot_u.mes_u.tal_u.met_u); + } } /* _test_newt_vast_size(): test handling of large 32-bit message sizes From 87155ca55f05686b88fb88f25ea6617eb168715e Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Wed, 1 Apr 2026 20:38:07 -0500 Subject: [PATCH 05/31] wip: blob 5 --- pkg/noun/allocate.c | 139 ---------------------------- pkg/noun/allocate.h | 13 +-- pkg/noun/manage.c | 61 ------------- pkg/vere/disk.c | 215 +++++++++++++++++++++----------------------- pkg/vere/mars.c | 184 ++++++++++++++++++++++++++++++++++--- 5 files changed, 272 insertions(+), 340 deletions(-) diff --git a/pkg/noun/allocate.c b/pkg/noun/allocate.c index 9399887b74..bde4814c14 100644 --- a/pkg/noun/allocate.c +++ b/pkg/noun/allocate.c @@ -2056,142 +2056,3 @@ void u3a_loom_sane(void) { } - -/* _u3a_bid_cmp(): qsort comparator for c3_d blob IDs. -*/ -static int -_u3a_bid_cmp(const void* a_v, const void* b_v) -{ - c3_d a_d = *(const c3_d*)a_v; - c3_d b_d = *(const c3_d*)b_v; - return (a_d > b_d) - (a_d < b_d); -} - -/* u3a_find_bobs(): scan the home road heap for bob atoms. -** -** Walks the page directory of u3R (must be the home road) scanning all -** chunk-allocated hunk pages. For each allocated chunk large enough to -** hold a u3a_atom, checks whether the len_w field has u3a_blob_flag set. -** If so, extracts mug_h and seq_w and packs them into a c3_d blob ID -** (mug_h in high 32 bits, seq_w in low 32 bits). -** -** Returns a malloc'd sorted array of blob IDs; sets [*out_z] to the count. -** Caller must c3_free() the array. -** -** This is an O(heap_pages) scan, much faster than u3a_walk_fore on the -** full noun tree. Used by the epoch-chop GC to rebuild the live blob set -** without touching the loom allocator (safe when loom may be undersized). -*/ -c3_d* -u3a_find_bobs(c3_z* out_z) -{ - // word offset of len_w within u3a_atom (= c3_wiseof(u3a_noun)) - // - c3_w len_off_w = c3_wiseof(u3a_noun); - - // exact chunk size (in words) of a bob atom: header + 1 data word. - // bob atoms always occupy this exact size class and no other. - // - c3_w bob_wor_w = c3_wiseof(u3a_atom) + 1; - - u3p(u3a_crag)* dir_u = u3to(u3p(u3a_crag), HEAP.pag_p); - - // cap scan to pages actually mapped in the current loom allocation - // (HEAP.len_w reflects the loom size at snapshot time, which may be - // larger than the loom mapped by the current process, e.g. when - // `urbit chop` runs with the default --loom 32 on a pier created with - // a larger loom) - // - c3_w map_w = (c3_w)(u3C.wor_i >> u3a_page); - c3_w lim_w = ( HEAP.len_w < map_w ) ? HEAP.len_w : map_w; - - if ( HEAP.len_w > map_w ) { - fprintf(stderr, "warn: loom smaller than snapshot (%" PRIc3_w " > %" PRIc3_w " pages); " - "blob GC may miss blobs — run with matching --loom\r\n", - HEAP.len_w, map_w); - } - - // dynamic C-heap array; grows as needed - // - c3_z cap_z = 64; - c3_z len_z = 0; - c3_d* ids_d = c3_malloc(cap_z * sizeof(c3_d)); - - for ( c3_w pag_w = 0; pag_w < lim_w; pag_w++ ) { - u3_post dir_p = dir_u[pag_w]; - - // skip free and large (multi-page) allocations — bob atoms are small - // - if ( dir_p <= u3a_rest_pg ) { - continue; - } - - // hunk page: dir_p is the crag post - // - u3a_crag* pag_u = u3to(u3a_crag, dir_p); - c3_s log_s = pag_u->log_s; - - // bob atoms are always exactly bob_wor_w = (c3_wiseof(u3a_atom) + 1) words. - // that falls into the unique size class whose chunk size equals bob_wor_w. - // skip all other size classes to eliminate false positives from cells, - // HAMT nodes, or any other allocation whose word-2 happens to equal - // (u3a_blob_flag | 1) by coincidence. - // - if ( (c3_w)1 << log_s != bob_wor_w ) { - continue; - } - - c3_s tot_s = u3a_Hunk[log_s - u3a_min_log].tot_s; - c3_s hun_s = u3a_Hunk[log_s - u3a_min_log].hun_s; - - u3_post pag_p = page_to_post(pag_w); - - for ( c3_s pos_s = hun_s; pos_s < tot_s; pos_s++ ) { - // bitmap: bit SET means FREE, bit CLEAR means ALLOCATED - // - c3_w blk_w = pos_s >> u3a_word_bits_log; - c3_w bit_w = (c3_w)1 << (pos_s & (u3a_word_bits - 1)); - - if ( pag_u->map_w[blk_w] & bit_w ) { - continue; // free chunk - } - - // allocated chunk — probe len_w at the expected offset - // - u3_post hun_p = pag_p + ((c3_w)pos_s << log_s); - c3_w* raw_w = u3to(c3_w, hun_p); - - // check len_w for bob atom: must be exactly (u3a_blob_flag | 1) - // (1 data word + flag). This exact-match avoids false positives - // from cells whose hed/tel fields happen to have the high bit set. - // - if ( raw_w[len_off_w] != (u3a_blob_flag | (c3_w)1) ) { - continue; - } - - // it's a bob atom: extract mug and seq - // - u3a_atom* atm_u = (u3a_atom*)raw_w; - c3_h mug_h = atm_u->mug_h; - c3_w seq_w = (c3_w)atm_u->buf_w[0]; - c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; - - // grow array if needed - // - if ( len_z == cap_z ) { - cap_z *= 2; - ids_d = c3_realloc(ids_d, cap_z * sizeof(c3_d)); - } - ids_d[len_z++] = bid_d; - } - } - - // sort for bsearch - // - if ( len_z > 1 ) { - qsort(ids_d, len_z, sizeof(c3_d), _u3a_bid_cmp); - } - - *out_z = len_z; - return ids_d; -} diff --git a/pkg/noun/allocate.h b/pkg/noun/allocate.h index e6f2816fda..514a45a692 100644 --- a/pkg/noun/allocate.h +++ b/pkg/noun/allocate.h @@ -972,18 +972,7 @@ u3a_dash(void); void (*pat_f)(u3_atom, void*), c3_o (*cel_f)(u3_noun, void*)); - /* u3a_find_bobs(): scan the home road heap for bob atoms. - ** - ** O(heap_pages) scan via the page directory — much faster than - ** u3a_walk_fore for the purpose of rebuilding the blob GC live-set. - ** Must be called with u3R == home road. - ** - ** Returns a malloc'd, sorted array of blob IDs (each a c3_d packing - ** mug_h in the high 32 bits and seq_w in the low 32 bits). - ** Sets [*out_z] to the count. Caller must c3_free() the array. - */ - c3_d* - u3a_find_bobs(c3_z* out_z); + /* u3a_string(): `a` as an on-loom c-string. */ diff --git a/pkg/noun/manage.c b/pkg/noun/manage.c index d811999dcd..8d52dde9e5 100644 --- a/pkg/noun/manage.c +++ b/pkg/noun/manage.c @@ -6,7 +6,6 @@ #ifndef U3_OS_windows #include #endif -#include #include #include #if defined(U3_OS_osx) @@ -688,66 +687,6 @@ _find_home(void) if ( !u3H->ban_u.res_p ) { u3H->ban_u.res_p = u3h_new(); } - - // scan .urb/bob/ and register any blobs not already in ban_u.blb_p. - // - // Blobs written by Earth (e.g. boot-time unix.c large-file ingestion) - // before Mars existed are not tracked in the bank yet. Walk the - // two-level / tree and insert them with refcount 1 so that - // epoch-chop GC does not delete them as orphans. - // - if ( u3C.dir_c ) { - c3_c bob_c[8192]; - snprintf(bob_c, sizeof(bob_c), "%s/.urb/bob", u3C.dir_c); - - DIR* top_u = opendir(bob_c); - if ( top_u ) { - struct dirent* mug_e; - while ( (mug_e = readdir(top_u)) ) { - if ( '.' == mug_e->d_name[0] || 0 == strcmp(mug_e->d_name, "stg") ) { - continue; - } - - c3_h mug_h = (c3_h)strtoul(mug_e->d_name, 0, 10); - if ( 0 == mug_h ) { - continue; - } - - c3_c mug_c[8192]; - snprintf(mug_c, sizeof(mug_c), "%s/%s", bob_c, mug_e->d_name); - - DIR* bkt_u = opendir(mug_c); - if ( !bkt_u ) { - continue; - } - - struct dirent* seq_e; - while ( (seq_e = readdir(bkt_u)) ) { - if ( '.' == seq_e->d_name[0] || 0 == strcmp(seq_e->d_name, "lock") ) { - continue; - } - - c3_w seq_w = (c3_w)strtoul(seq_e->d_name, 0, 10); - if ( 0 == seq_w ) { - continue; - } - - c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; - u3_noun key = u3i_chub(bid_d); - u3_weak old = u3h_get(u3H->ban_u.blb_p, key); - - if ( u3_none == old ) { - // blob exists on disk but is not tracked — register with rc=1 - u3h_put(u3H->ban_u.blb_p, key, u3i_word(1)); - } - - u3z(key); - } - closedir(bkt_u); - } - closedir(top_u); - } - } } /* u3m_pave(): instantiate or activate image. diff --git a/pkg/vere/disk.c b/pkg/vere/disk.c index fffe5a44a6..bd44eea0ac 100644 --- a/pkg/vere/disk.c +++ b/pkg/vere/disk.c @@ -1231,6 +1231,44 @@ _disk_epoc_kill(u3_disk* log_u, c3_d epo_d) c3_c epo_c[8193]; snprintf(epo_c, sizeof(epo_c), "%s/0i%" PRIc3_d, log_u->com_u->pax_c, epo_d); + // process blobs.txt: decrement event-log refcounts; delete files at zero + // + { + c3_c blt_c[8193]; + snprintf(blt_c, sizeof(blt_c), "%s/blobs.txt", epo_c); + FILE* blt_f = fopen(blt_c, "r"); + if ( blt_f ) { + while ( 1 ) { + uint32_t mug_i = 0, seq_i = 0; + if ( 2 != fscanf(blt_f, "%" SCNu32 " %" SCNu32, &mug_i, &seq_i) ) { + break; + } + c3_h mug_h = (c3_h)mug_i; + c3_w seq_w = (c3_w)seq_i; + c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; + u3_noun bk = u3i_chub(bid_d); + u3_weak bv = u3h_get(u3H->ban_u.blb_p, bk); + c3_w ref_w = 0; + if ( u3_none != bv ) { + u3r_safe_word(bv, &ref_w); + } + if ( ref_w > 1 ) { + u3h_put(u3H->ban_u.blb_p, bk, u3i_word(ref_w - 1)); + } + else { + // last ref — delete blob file + u3_blob_delete(log_u->dir_u->pax_c, mug_h, seq_w); + u3h_del(u3H->ban_u.blb_p, bk); + fprintf(stderr, "disk: gc: deleted blob %" PRIc3_h + "/%" PRIc3_w " (epoch 0i%" PRIc3_d ")\r\n", + mug_h, seq_w, epo_d); + } + u3z(bk); + } + fclose(blt_f); + } + } + // delete files in epoch directory u3_dire* dir_u = u3_foil_folder(epo_c); u3_dent* den_u = dir_u->all_u; @@ -1506,52 +1544,6 @@ _disk_vere_diff(u3_disk* log_u) return c3n; } -/* _disk_bid_cmp(): bsearch/qsort comparator for c3_d blob IDs. -*/ -static int -_disk_bid_cmp(const void* a_v, const void* b_v) -{ - c3_d a_d = *(const c3_d*)a_v; - c3_d b_d = *(const c3_d*)b_v; - return (a_d > b_d) - (a_d < b_d); -} - -/* _disk_blb_rebuild(): build a C-heap sorted array of live blob IDs. -** -** Scans the home road heap page directory for bob atoms — O(heap_pages), -** much faster than u3a_walk_fore on the full noun tree, and does NOT -** allocate in the loom (safe when the loom may be undersized at chop time). -** -** Filters the raw heap scan results by verifying each candidate blob file -** actually exists on disk, eliminating false positives from allocated chunks -** (cells, HAMT nodes, etc.) that happen to match the bob atom pattern. -** -** Returns a malloc'd sorted array of c3_d blob IDs (mug<<32|seq). -** Sets [*out_z] to the count. Caller must c3_free() the result. -*/ -static c3_d* -_disk_blb_rebuild(u3_disk* log_u, c3_z* out_z) -{ - c3_z raw_z = 0; - c3_d* raw_d = u3a_find_bobs(&raw_z); - - // filter: keep only candidates whose blob file actually exists - // - c3_z liv_z = 0; - for ( c3_z i_z = 0; i_z < raw_z; i_z++ ) { - c3_h mug_h = (c3_h)(raw_d[i_z] >> 32); - c3_w seq_w = (c3_w)(raw_d[i_z] & 0xFFFFFFFF); - if ( c3y == u3_blob_exists(log_u->dir_u->pax_c, mug_h, seq_w) ) { - raw_d[liv_z++] = raw_d[i_z]; - } - } - *out_z = liv_z; - - fprintf(stderr, "chop: gc: found %" PRIc3_z " live blob(s) in heap " - "(%" PRIc3_z " candidates)\r\n", liv_z, raw_z); - return raw_d; -} - /* u3_disk_chop(): delete all but the latest 2 epocs. */ void @@ -1568,86 +1560,22 @@ u3_disk_chop(u3_disk* log_u, c3_d eve_d) exit(0); // enjoy } - // delete all but the last two epochs + // delete all but the last two epochs. + // _disk_epoc_kill reads each epoch's blobs.txt and decrements + // blb_p refcounts, deleting blob files when they reach zero. // // XX parameterize the number of epochs to chop // for ( c3_z i_z = 2; i_z < len_z; i_z++ ) { - fprintf(stderr, "chop: deleting epoch 0i%" PRIu64 "\r\n", - sot_d[i_z]); + fprintf(stderr, "chop: deleting epoch 0i%" PRIu64 "\r\n", sot_d[i_z]); if ( c3y != _disk_epoc_kill(log_u, sot_d[i_z]) ) { fprintf(stderr, "chop: failed to delete epoch 0i%" PRIu64 "\r\n", sot_d[i_z]); exit(1); } } - // cleanup c3_free(sot_d); - // build live blob set from heap scan (no loom allocation) - // - c3_z liv_z = 0; - c3_d* liv_d = _disk_blb_rebuild(log_u, &liv_z); - - // GC: sweep blob store for orphaned blobs (not in live set) - // - // Any blob not referenced by a live bob atom is an orphan. - // Safe to delete. - // - { - c3_c bob_c[8192]; - snprintf(bob_c, sizeof(bob_c), "%s/.urb/bob", log_u->dir_u->pax_c); - - DIR* top_u = opendir(bob_c); - if ( top_u ) { - struct dirent* mug_e; - while ( (mug_e = readdir(top_u)) ) { - if ( '.' == mug_e->d_name[0] || 0 == strcmp(mug_e->d_name, "stg") ) { - continue; - } - c3_h mug_h = (c3_h)strtoul(mug_e->d_name, 0, 10); - if ( 0 == mug_h ) { - continue; - } - - c3_c mug_c[8192]; - snprintf(mug_c, sizeof(mug_c), "%s/%s", bob_c, mug_e->d_name); - - DIR* bkt_u = opendir(mug_c); - if ( !bkt_u ) { - continue; - } - struct dirent* seq_e; - while ( (seq_e = readdir(bkt_u)) ) { - if ( '.' == seq_e->d_name[0] || 0 == strcmp(seq_e->d_name, "lock") ) { - continue; - } - c3_w seq_w = (c3_w)strtoul(seq_e->d_name, 0, 10); - if ( 0 == seq_w ) { - continue; - } - - c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; - c3_d* hit_d = bsearch(&bid_d, liv_d, liv_z, sizeof(c3_d), - _disk_bid_cmp); - - if ( !hit_d ) { - // orphan — delete - u3_blob_delete(log_u->dir_u->pax_c, mug_h, seq_w); - fprintf(stderr, "chop: gc: deleted orphan blob %" PRIc3_h - "/%" PRIc3_w "\r\n", mug_h, seq_w); - } - // else: live blob, keep it - } - closedir(bkt_u); - } - closedir(top_u); - } - } - - c3_free(liv_d); - - // success fprintf(stderr, "chop: event log truncation complete\r\n"); } @@ -1997,6 +1925,61 @@ typedef enum { _epoc_late = 4 // format from the future } _epoc_kind; +/* _disk_blb_rebuild_from_epochs(): rebuild ban_u.blb_p from all epoch blobs.txt files. +** +** Called after u3m_boot() so the loom (u3H) is live. +** Walks all epoch directories under .urb/log/, reads each blobs.txt, +** and increments the blb_p refcount for each referenced blob. +** Replaces any stale blb_p from the snapshot with a freshly-computed map. +** +** Note: we do NOT u3h_free the old blb_p here. The snapshot may have +** been saved with a larger loom, in which case the stale HAMT nodes may +** live on pages beyond the current loom's HEAP.len_w, and u3h_free would +** crash with "palloc: page out of heap". The old nodes become dead loom +** memory and will be reclaimed at the next epoch roll / snapshot compaction. +*/ +static void +_disk_blb_rebuild_from_epochs(u3_disk* log_u) +{ + // discard stale snapshot blb_p; allocate a fresh, empty HAMT + // + u3H->ban_u.blb_p = u3h_new(); + + c3_z epo_z = u3_disk_epoc_list(log_u, 0); + c3_d* epo_d = c3_malloc(epo_z * sizeof(c3_d)); + u3_disk_epoc_list(log_u, epo_d); + + for ( c3_z i_z = 0; i_z < epo_z; i_z++ ) { + c3_c blt_c[8193]; + snprintf(blt_c, sizeof(blt_c), "%s/0i%" PRIc3_d "/blobs.txt", + log_u->com_u->pax_c, epo_d[i_z]); + + FILE* blt_f = fopen(blt_c, "r"); + if ( !blt_f ) { + continue; // no blobs.txt in this epoch (pre-VER3 or no blobs) + } + + while ( 1 ) { + uint32_t mug_i = 0, seq_i = 0; + if ( 2 != fscanf(blt_f, "%" SCNu32 " %" SCNu32, &mug_i, &seq_i) ) { + break; + } + c3_d bid_d = ((c3_d)(c3_h)mug_i << 32) | (c3_d)(c3_w)seq_i; + u3_noun bk = u3i_chub(bid_d); + u3_weak bv = u3h_get(u3H->ban_u.blb_p, bk); + c3_w ref_w = 0; + if ( u3_none != bv ) { + u3r_safe_word(bv, &ref_w); + } + u3h_put(u3H->ban_u.blb_p, bk, u3i_word(ref_w + 1)); + u3z(bk); + } + fclose(blt_f); + } + + c3_free(epo_d); +} + /* _disk_epoc_load(): load existing epoch, enumerating failures */ static _epoc_kind @@ -2169,6 +2152,10 @@ _disk_epoc_load(u3_disk* log_u, c3_d lat_d, u3_disk_load_e lod_e) u3m_boot(log_u->dir_u->pax_c, (size_t)1 << u3_Host.ops_u.lom_y); // XX confirm + // rebuild blob refcount map from surviving epoch blobs.txt files + // + _disk_blb_rebuild_from_epochs(log_u); + if ( log_u->dun_d < u3A->eve_d ) { // XX bad, add to enum fprintf(stderr, "mars: corrupt pier, snapshot (%" PRIu64 diff --git a/pkg/vere/mars.c b/pkg/vere/mars.c index f36707a178..ffca85fe2f 100644 --- a/pkg/vere/mars.c +++ b/pkg/vere/mars.c @@ -14,6 +14,7 @@ #include "blob.h" #include #include +#include c3_c tac_c[256]; // tracing label @@ -233,6 +234,35 @@ _mars_grab(u3_noun sac, c3_o pri_o) } } +/* _mars_blob_bobs_atom(): u3a_walk_fore atom callback — collect bob atoms. +*/ +static void +_mars_blob_bobs_atom(u3_atom a, void* ptr_v) +{ + if ( c3y != u3a_is_bob(a) ) { + return; + } + // grow the C-heap array + // + struct { c3_d* ids; c3_z len; c3_z cap; } *acc = ptr_v; + if ( acc->len == acc->cap ) { + acc->cap = acc->cap ? acc->cap * 2 : 8; + acc->ids = c3_realloc(acc->ids, acc->cap * sizeof(c3_d)); + } + c3_h mug_h = u3a_bob_mug(a); + c3_w seq_w = u3a_bob_seq(a); + acc->ids[acc->len++] = ((c3_d)mug_h << 32) | (c3_d)seq_w; +} + +/* _mars_blob_bobs_cell(): u3a_walk_fore cell callback — always descend. +*/ +static c3_o +_mars_blob_bobs_cell(u3_noun n, void* ptr_v) +{ + (void)n; (void)ptr_v; + return c3y; +} + /* _mars_fact(): commit a fact and enqueue its effects. */ static void @@ -240,6 +270,84 @@ _mars_fact(u3_mars* mar_u, u3_noun job, u3_noun pro) { + // find all bob atoms in the committed job noun + // and promote them from lease-refs to event-log refs. + // + { + struct { c3_d* ids; c3_z len; c3_z cap; } acc = {0, 0, 0}; + u3a_walk_fore(job, &acc, _mars_blob_bobs_atom, _mars_blob_bobs_cell); + + if ( acc.len ) { + // open (or create) the current epoch's blobs.txt for appending + // + c3_c blt_c[8192]; + snprintf(blt_c, sizeof(blt_c), "%s/0i%" PRIc3_d "/blobs.txt", + mar_u->log_u->com_u->pax_c, mar_u->log_u->epo_d); + FILE* blt_f = fopen(blt_c, "a"); + + for ( c3_z i_z = 0; i_z < acc.len; i_z++ ) { + c3_h mug_h = (c3_h)(acc.ids[i_z] >> 32); + c3_w seq_w = (c3_w)(acc.ids[i_z] & 0xFFFFFFFFULL); + + // bump event-log refcount in blb_p + // + u3_noun bkey = u3i_chub(acc.ids[i_z]); + u3_weak old = u3h_get(u3H->ban_u.blb_p, bkey); + c3_w ref_w = 0; + if ( u3_none != old ) { + u3r_safe_word(old, &ref_w); + } + u3h_put(u3H->ban_u.blb_p, bkey, u3i_word(ref_w + 1)); + u3z(bkey); + + // write to blobs.txt + // + if ( blt_f ) { + fprintf(blt_f, "%" PRIc3_h " %" PRIc3_w "\n", mug_h, seq_w); + } + + // free the lease (if any) for this blob + // walk res_p looking for a matching mug+seq + // + // XX: linear scan over res_p; replace with reverse index if needed + // + u3_noun rkey = 0; + { + u3p(u3h_root) res_p = u3H->ban_u.res_p; + // u3h_walk is not available; iterate nxt_d ids + // (leases are identified by res_d; we need to find the one for this blob) + // Use a simple approach: remember lease ptr stored as c3_d value + // We don't have a reverse index, so scan all res_d from 0..nxt_d-1. + // + for ( c3_d rid_d = 0; rid_d < u3H->ban_u.nxt_d; rid_d++ ) { + u3_noun k = u3i_chub(rid_d); + u3_weak v = u3h_get(res_p, k); + if ( u3_none != v ) { + c3_d ptr_d = 0; + u3r_safe_chub(v, &ptr_d); + u3v_lease* lea_u = (u3v_lease*)(uintptr_t)ptr_d; + if ( lea_u && lea_u->mug_h == mug_h && lea_u->seq_w == seq_w ) { + rkey = k; + c3_free(lea_u); + u3h_del(res_p, k); + u3z(k); + break; + } + } + u3z(k); + } + (void)rkey; + } + } + + if ( blt_f ) { + fflush(blt_f); + fclose(blt_f); + } + c3_free(acc.ids); + } + } + { u3_fact tac_u = { .job = job, @@ -567,6 +675,48 @@ _mars_work(u3_mars* mar_u, u3_noun jar) { u3_noun tag, dat, pro; + // lease expiry sweeper: free leases for blobs that were never committed. + // If the blob has no event-log refs (blb_p == 0), delete the file. + // + { + struct timeval tv_u; + gettimeofday(&tv_u, 0); + c3_d now_d = (c3_d)tv_u.tv_sec * 1000ULL + (c3_d)tv_u.tv_usec / 1000ULL; + + for ( c3_d rid_d = 0; rid_d < u3H->ban_u.nxt_d; rid_d++ ) { + u3_noun k = u3i_chub(rid_d); + u3_weak v = u3h_get(u3H->ban_u.res_p, k); + if ( u3_none != v ) { + c3_d ptr_d = 0; + u3r_safe_chub(v, &ptr_d); + u3v_lease* lea_u = (u3v_lease*)(uintptr_t)ptr_d; + if ( lea_u && lea_u->exp_d && now_d > lea_u->exp_d ) { + // lease expired — check if blob has any event-log refs + // + c3_d bid_d = ((c3_d)lea_u->mug_h << 32) | (c3_d)lea_u->seq_w; + u3_noun bk = u3i_chub(bid_d); + u3_weak bv = u3h_get(u3H->ban_u.blb_p, bk); + c3_w ref_w = 0; + if ( u3_none != bv ) { + u3r_safe_word(bv, &ref_w); + } + u3z(bk); + + if ( 0 == ref_w ) { + // no event-log refs — blob was never committed; delete it + // + u3_blob_delete(u3C.dir_c, lea_u->mug_h, lea_u->seq_w); + fprintf(stderr, "mars: blob: expired lease, deleted %" PRIc3_h + "/%" PRIc3_w "\r\n", lea_u->mug_h, lea_u->seq_w); + } + c3_free(lea_u); + u3h_del(u3H->ban_u.res_p, k); + } + } + u3z(k); + } + } + if ( c3n == u3r_cell(jar, &tag, &dat) ) { fprintf(stderr, "mars: fail a\r\n"); u3z(jar); @@ -743,22 +893,28 @@ _mars_work(u3_mars* mar_u, u3_noun jar) ok_o = u3_blob_install_stg(u3C.dir_c, stg_c, &mug_h, &seq_w); if ( c3y == ok_o ) { - // record in blob bank with initial refcount=1 + // create lease: holds a pending ref until the blob is committed + // to the event log (in _mars_fact) or the lease expires. + // blb_p tracks committed event-log refs only — not set here. // - c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; - u3_noun key = u3i_chub(bid_d); - u3_weak old = u3h_get(u3H->ban_u.blb_p, key); - if ( u3_none == old ) { - // new blob — insert with refcount 1 - u3h_put(u3H->ban_u.blb_p, key, u3i_word(1)); - } - else { - // duplicate install — bump refcount - c3_w ref_w = 0; - u3r_safe_word(old, &ref_w); - u3h_put(u3H->ban_u.blb_p, key, u3i_word(ref_w + 1)); + c3_d res_d = u3H->ban_u.nxt_d++; + u3v_lease* lea_u = c3_malloc(sizeof(*lea_u)); + lea_u->res_d = res_d; + lea_u->mug_h = mug_h; + lea_u->seq_w = seq_w; + // TTL: 5 minutes from now (Unix ms) + { + struct timeval tv_u; + gettimeofday(&tv_u, 0); + lea_u->exp_d = (c3_d)tv_u.tv_sec * 1000ULL + + (c3_d)tv_u.tv_usec / 1000ULL + + 300000ULL; } - u3z(key); + snprintf(lea_u->stg_c, sizeof(lea_u->stg_c), "%s", stg_c); + + u3_noun rkey = u3i_chub(res_d); + u3h_put(u3H->ban_u.res_p, rkey, u3i_chub((c3_d)(uintptr_t)lea_u)); + u3z(rkey); } } else { From 7d6cbf01fb9b4d7ea7f837c2d8c30bac3d254e19 Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Wed, 1 Apr 2026 20:46:08 -0500 Subject: [PATCH 06/31] wip: blob 6 --- pkg/vere/blob.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/pkg/vere/blob.c b/pkg/vere/blob.c index 0271722a8e..f931380ffa 100644 --- a/pkg/vere/blob.c +++ b/pkg/vere/blob.c @@ -439,6 +439,40 @@ u3_blob_delete(const c3_c* pax_c, c3_h mug_h, c3_w seq_w) fprintf(stderr, "blob: failed to delete %s: %s\r\n", fil_c, strerror(errno)); } + + // attempt to clean up the mug bucket directory if it is now empty. + // + // the lockfile is the only non-blob resident; we must remove it before + // rmdir can succeed. the two-step unlink+rmdir is safe because vere is + // single-threaded and blob installs never interleave with GC: + // + // - if another blob exists in the bucket, rmdir fails ENOTEMPTY — fine. + // - if a concurrent install races the window between unlink(lock) and + // rmdir, it recreates the lockfile, rmdir fails ENOTEMPTY — fine. + // + c3_c dir_c[8192]; + c3_c lck_c[8192]; + _blob_mug_dir(dir_c, pax_c, mug_h); + _blob_lock_path(lck_c, pax_c, mug_h); + + // first attempt: rmdir without touching lock (fast path for non-empty dirs) + // + if ( 0 == rmdir(dir_c) || ENOENT == errno ) { + return; + } + + // dir is non-empty: remove lock file and retry rmdir + // + if ( 0 != unlink(lck_c) && ENOENT != errno ) { + fprintf(stderr, "blob: failed to remove lock %s: %s\r\n", + lck_c, strerror(errno)); + return; + } + + if ( 0 != rmdir(dir_c) && ENOTEMPTY != errno && ENOENT != errno ) { + fprintf(stderr, "blob: failed to remove bucket %s: %s\r\n", + dir_c, strerror(errno)); + } } /* u3_blob_install_stg(): install a staging file into the blob store. From 53b30f5dd0599a9ca6aa6cad292d34f943638913 Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Tue, 14 Apr 2026 09:35:50 -0500 Subject: [PATCH 07/31] wip: blob 7 --- pkg/noun/manage.c | 4 + pkg/noun/serial_tests.c | 356 +++++++++++++++++++++++++++++++++++++++- pkg/noun/vortex.h | 7 + pkg/vere/lord.c | 4 +- pkg/vere/mars.c | 226 ++++++++++++++++++------- 5 files changed, 535 insertions(+), 62 deletions(-) diff --git a/pkg/noun/manage.c b/pkg/noun/manage.c index f4b08f74db..fb979dbf5b 100644 --- a/pkg/noun/manage.c +++ b/pkg/noun/manage.c @@ -528,6 +528,7 @@ _pave_parts(void) // u3H->ban_u.blb_p = u3h_new(); u3H->ban_u.res_p = u3h_new(); + u3H->ban_u.rev_p = u3h_new(); u3H->ban_u.nxt_d = 0; } @@ -679,6 +680,9 @@ _find_home(void) if ( !u3H->ban_u.res_p ) { u3H->ban_u.res_p = u3h_new(); } + if ( !u3H->ban_u.rev_p ) { + u3H->ban_u.rev_p = u3h_new(); + } if ( !u3R->lop_p ) u3R->lop_p = u3h_new(); if ( !u3R->cax.for_p ) u3R->cax.for_p = u3h_new_cache(u3C.per_w); } diff --git a/pkg/noun/serial_tests.c b/pkg/noun/serial_tests.c index e34d1db5e9..cdbf9d0033 100644 --- a/pkg/noun/serial_tests.c +++ b/pkg/noun/serial_tests.c @@ -2,6 +2,11 @@ #include "noun.h" +#include +#include +#include +#include + /* _setup(): prepare for tests. */ static void @@ -243,6 +248,329 @@ _test_jam_roundtrip(void) return ret_i; } +/* _test_ram_spec(): encode [ref] as ram, decode, and compare to [ref]. +** +** Verifies the wire format's 5-byte header and that the decoded +** noun equals the input. For bob atoms, equality is mug+seq only +** (no materialization). +*/ +static c3_i +_test_ram_spec(const c3_c* cap_c, u3_noun ref) +{ + c3_i ret_i = 1; + c3_d len_d = 0; + c3_y* byt_y = 0; + + u3s_ram_xeno(ref, &len_d, &byt_y); + + // validate wire header: "RAM\0" + version 0x01 + // + if ( (len_d < 5) + || (byt_y[0] != 'R') + || (byt_y[1] != 'A') + || (byt_y[2] != 'M') + || (byt_y[3] != 0x00) + || (byt_y[4] != 0x01) ) + { + fprintf(stderr, "\033[31mram header %s fail\033[0m\r\n", cap_c); + free(byt_y); + return 0; + } + + // round-trip via tap + // + u3_weak out = u3s_tap_xeno(len_d, byt_y); + free(byt_y); + + if ( u3_none == out ) { + fprintf(stderr, "\033[31mtap %s fail: u3_none\033[0m\r\n", cap_c); + return 0; + } + if ( c3n == u3r_sing(ref, out) ) { + fprintf(stderr, "\033[31mtap %s fail: mismatch\033[0m\r\n", cap_c); + u3m_p("ref", ref); + u3m_p("out", out); + ret_i = 0; + } + u3z(out); + return ret_i; +} + +static c3_i +_test_ram_roundtrip(void) +{ + c3_i ret_i = 1; + + // atoms (cat + indirect) + // + { u3_noun ref = 0; ret_i &= _test_ram_spec("0", ref); u3z(ref); } + { u3_noun ref = 1; ret_i &= _test_ram_spec("1", ref); u3z(ref); } + { u3_noun ref = 42; ret_i &= _test_ram_spec("42", ref); u3z(ref); } + { u3_noun ref = c3__fast; ret_i &= _test_ram_spec("%fast", ref); u3z(ref); } + + // wide atom (forces indirect path) + // + { + c3_y inp_y[33] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; + u3_noun ref = u3i_bytes(sizeof(inp_y), inp_y); + ret_i &= _test_ram_spec("wide", ref); + u3z(ref); + } + + // cells + // + { u3_noun ref = u3nc(0, 0); ret_i &= _test_ram_spec("[0 0]", ref); u3z(ref); } + { u3_noun ref = u3nc(1, 2); ret_i &= _test_ram_spec("[1 2]", ref); u3z(ref); } + { u3_noun ref = u3nt(1, 2, 3); ret_i &= _test_ram_spec("[1 2 3]", ref); u3z(ref); } + { + u3_noun ref = u3nc(u3nc(1, 2), 3); + ret_i &= _test_ram_spec("[[1 2] 3]", ref); + u3z(ref); + } + + // deep nesting + // + { + u3_noun ref = u3nc( + u3nc(u3nc(u3nc(1, 2), 3), 4), + u3nc(5, u3nc(6, 7))); + ret_i &= _test_ram_spec("deep", ref); + u3z(ref); + } + + // backref: repeated cell — second occurrence should be encoded as backref + // + { + u3_noun sub = u3nt(c3__fast, c3__full, c3__fast); + u3_noun ref = u3nc(u3k(sub), u3nc(u3k(sub), sub)); + ret_i &= _test_ram_spec("backref-cell", ref); + u3z(ref); + } + + // backref: repeated indirect atom + // + { + u3_noun a = u3i_string("abcdefghijklmnopqrstuvwxyz"); + u3_noun ref = u3nc(u3k(a), a); + ret_i &= _test_ram_spec("backref-atom", ref); + u3z(ref); + } + + return ret_i; +} + +/* _ram_tmp_dir / _ram_setup_tmp() / _ram_cleanup_tmp() / _ram_make_blob(): +** +** Helpers for bob-atom round-trip tests. The ram encoder calls +** u3r_blob_met() which reads the blob file at +** $u3C.dir_c/.urb/bob//, so actual files must exist. +*/ + +static c3_c _ram_tmp_dir[1024]; + +static c3_o +_ram_setup_tmp(void) +{ + snprintf(_ram_tmp_dir, sizeof(_ram_tmp_dir), "/tmp/vere-serial-test-XXXXXX"); + if ( !mkdtemp(_ram_tmp_dir) ) { + fprintf(stderr, "serial_tests: mkdtemp failed\r\n"); + return c3n; + } + u3C.dir_c = _ram_tmp_dir; + + c3_c pax_c[2048]; + snprintf(pax_c, sizeof(pax_c), "%s/.urb", _ram_tmp_dir); + mkdir(pax_c, 0755); + snprintf(pax_c, sizeof(pax_c), "%s/.urb/bob", _ram_tmp_dir); + mkdir(pax_c, 0755); + return c3y; +} + +static void +_ram_cleanup_tmp(void) +{ + c3_c cmd_c[2048]; + snprintf(cmd_c, sizeof(cmd_c), "rm -rf %s", _ram_tmp_dir); + (void)system(cmd_c); +} + +static c3_o +_ram_make_blob(c3_h mug_h, c3_w seq_w, const c3_y* dat_y, c3_d len_d) +{ + c3_c pax_c[2048]; + snprintf(pax_c, sizeof(pax_c), "%s/.urb/bob/%" PRIc3_h, _ram_tmp_dir, mug_h); + mkdir(pax_c, 0755); + snprintf(pax_c, sizeof(pax_c), "%s/.urb/bob/%" PRIc3_h "/%" PRIc3_w, + _ram_tmp_dir, mug_h, seq_w); + FILE* fil_f = fopen(pax_c, "wb"); + if ( !fil_f ) { + fprintf(stderr, "serial_tests: fopen %s: %s\r\n", pax_c, strerror(errno)); + return c3n; + } + if ( len_d && (len_d != fwrite(dat_y, 1, (size_t)len_d, fil_f)) ) { + fclose(fil_f); + return c3n; + } + fclose(fil_f); + return c3y; +} + +/* _test_ram_bob_spec(): round-trip a bob-containing noun via ram/tap. +** +** Cannot use u3r_sing for bob-containing refs unless the blob file +** exists and is decodable — and u3r_sing materializes bob vs normal. +** Since our reference IS the bob atom, u3r_sing_atom's bob-vs-bob +** fast path handles it by mug+seq. +*/ +static c3_i +_test_ram_bob_spec(const c3_c* cap_c, u3_noun ref) +{ + c3_i ret_i = 1; + c3_d len_d = 0; + c3_y* byt_y = 0; + + u3s_ram_xeno(ref, &len_d, &byt_y); + u3_weak out = u3s_tap_xeno(len_d, byt_y); + free(byt_y); + + if ( u3_none == out ) { + fprintf(stderr, "\033[31mram/tap bob %s fail: u3_none\033[0m\r\n", cap_c); + return 0; + } + if ( c3n == u3r_sing(ref, out) ) { + fprintf(stderr, "\033[31mram/tap bob %s fail: mismatch\033[0m\r\n", cap_c); + ret_i = 0; + } + u3z(out); + return ret_i; +} + +static c3_i +_test_ram_bob_roundtrip(void) +{ + c3_i ret_i = 1; + + if ( c3n == _ram_setup_tmp() ) { + return 0; + } + + // create two distinct blob files for testing + // + const c3_y dat1_y[] = "blob contents one"; + const c3_y dat2_y[] = "a different blob payload"; + if ( (c3n == _ram_make_blob(0x12345678, 1, dat1_y, sizeof(dat1_y))) + || (c3n == _ram_make_blob(0x12345678, 2, dat2_y, sizeof(dat2_y))) + || (c3n == _ram_make_blob(0x7a0b0000, 7, dat1_y, sizeof(dat1_y))) ) + { + _ram_cleanup_tmp(); + return 0; + } + + // single bob atom + // + { + u3_noun ref = u3i_blob(0x12345678, 1); + + c3_d len_d; + c3_y* byt_y; + u3s_ram_xeno(ref, &len_d, &byt_y); + u3_weak out = u3s_tap_xeno(len_d, byt_y); + free(byt_y); + + if ( u3_none == out ) { + fprintf(stderr, "\033[31mram bob solo fail: u3_none\033[0m\r\n"); + ret_i = 0; + } + else if ( c3n == u3a_is_bob(out) ) { + fprintf(stderr, "\033[31mram bob solo fail: decoded as non-bob\033[0m\r\n"); + ret_i = 0; + } + else if ( (u3a_bob_mug(out) != 0x12345678) + || (u3a_bob_seq(out) != 1) ) + { + fprintf(stderr, "\033[31mram bob solo fail: mug/seq mismatch " + "(got %" PRIc3_h "/%" PRIc3_w ")\033[0m\r\n", + u3a_bob_mug(out), u3a_bob_seq(out)); + ret_i = 0; + } + if ( u3_none != out ) u3z(out); + u3z(ref); + } + + // cell containing bob atom + // + { + u3_noun ref = u3nt(42, u3i_blob(0x12345678, 1), 99); + ret_i &= _test_ram_bob_spec("cell-with-bob", ref); + u3z(ref); + } + + // repeated bob: should trigger backref path after first occurrence + // + { + u3_noun bob = u3i_blob(0x12345678, 2); + u3_noun ref = u3nc(u3k(bob), u3nc(u3k(bob), bob)); + ret_i &= _test_ram_bob_spec("bob-repeat", ref); + u3z(ref); + } + + // mixed: two distinct bobs + normal atoms + nesting + // + { + u3_noun ref = u3nq( + u3i_blob(0x12345678, 1), + u3nc(c3__fast, u3i_blob(0x7a0b0000, 7)), + u3i_blob(0x12345678, 2), + 0x1234567890abcdefULL); + ret_i &= _test_ram_bob_spec("mixed", ref); + u3z(ref); + } + + _ram_cleanup_tmp(); + return ret_i; +} + +/* _test_ram_invalid(): u3s_tap_xeno rejects malformed input. +*/ +static c3_i +_test_ram_invalid(void) +{ + c3_i ret_i = 1; + + // too short (< 5 byte header) + // + { + c3_y byt_y[3] = { 'R', 'A', 'M' }; + if ( u3_none != u3s_tap_xeno(sizeof(byt_y), byt_y) ) { + fprintf(stderr, "\033[31mram invalid short fail\033[0m\r\n"); + ret_i = 0; + } + } + + // bad magic + // + { + c3_y byt_y[6] = { 'J', 'A', 'M', 0x00, 0x01, 0x00 }; + if ( u3_none != u3s_tap_xeno(sizeof(byt_y), byt_y) ) { + fprintf(stderr, "\033[31mram invalid magic fail\033[0m\r\n"); + ret_i = 0; + } + } + + // unsupported version (0x02) + // + { + c3_y byt_y[6] = { 'R', 'A', 'M', 0x00, 0x02, 0x00 }; + if ( u3_none != u3s_tap_xeno(sizeof(byt_y), byt_y) ) { + fprintf(stderr, "\033[31mram invalid version fail\033[0m\r\n"); + ret_i = 0; + } + } + + return ret_i; +} + /* main(): run all test cases. */ int @@ -255,10 +583,32 @@ main(int argc, char* argv[]) exit(1); } - // GC - // u3m_grab(); - fprintf(stderr, "test jam: ok\r\n"); + + if ( !_test_ram_roundtrip() ) { + fprintf(stderr, "test ram: failed\r\n"); + exit(1); + } + + u3m_grab(); + fprintf(stderr, "test ram: ok\r\n"); + + if ( !_test_ram_bob_roundtrip() ) { + fprintf(stderr, "test ram bob: failed\r\n"); + exit(1); + } + + u3m_grab(); + fprintf(stderr, "test ram bob: ok\r\n"); + + if ( !_test_ram_invalid() ) { + fprintf(stderr, "test ram invalid: failed\r\n"); + exit(1); + } + + u3m_grab(); + fprintf(stderr, "test ram invalid: ok\r\n"); + return 0; } diff --git a/pkg/noun/vortex.h b/pkg/noun/vortex.h index cef0c17ffe..8fc65847dd 100644 --- a/pkg/noun/vortex.h +++ b/pkg/noun/vortex.h @@ -24,12 +24,17 @@ ** Holds one u3a_blob.use_w ref until the owning event is committed ** to the event log (at which point the ref becomes an event-log ref), ** or until the lease expires (TTL). + ** + ** dead_o: c3y after the lease has been committed (or otherwise invalidated). + ** Set by _mars_fact via the reverse index; the lease pointer may remain in + ** the expiry priority queue until it bubbles to the top and is freed. */ typedef struct _u3v_lease { c3_d res_d; // reservation id (monotonic counter) c3_d exp_d; // expiry time (Unix ms); 0 = no expiry c3_h mug_h; // blob mug c3_w seq_w; // blob seq within mug bucket + c3_o dead_o; // c3y if lease has been committed/invalidated c3_c stg_c[4096]; // staging path that was installed (for logging) } u3v_lease; @@ -38,11 +43,13 @@ ** Lives in u3v_home, checkpointed in image.bin. ** blb_p: HAMT mapping blob_id (u64 = mug<<32|seq) -> u3a_blob loom offset ** res_p: HAMT mapping res_id (u64) -> u3v_lease loom offset + ** rev_p: HAMT mapping blob_id (u64 = mug<<32|seq) -> res_d ** nxt_d: monotonic reservation counter */ typedef struct _u3v_bank { u3p(u3h_root) blb_p; // blob_id -> u3a_blob* u3p(u3h_root) res_p; // res_id -> u3v_lease* + u3p(u3h_root) rev_p; // blob_id -> res_d c3_d nxt_d; // next reservation id } u3v_bank; diff --git a/pkg/vere/lord.c b/pkg/vere/lord.c index 36ee7e81b7..0a29a3271b 100644 --- a/pkg/vere/lord.c +++ b/pkg/vere/lord.c @@ -630,7 +630,7 @@ _lord_on_plea(void* ptr_v, c3_d len_d, c3_y* byt_y) case c3__quiz: { _lord_plea_quiz(god_u, u3k(dat)); - } + } break; case c3__blob: { _lord_plea_blob(god_u, u3k(dat)); @@ -695,7 +695,7 @@ _lord_writ_make(u3_lord* god_u, u3_writ* wit_u) case u3_writ_quiz: { msg = u3nt(c3__quiz, wit_u->qiz_u.qiz_m, u3_nul); - } + } break; case u3_writ_exit: { msg = u3nc(c3__exit, u3_nul); diff --git a/pkg/vere/mars.c b/pkg/vere/mars.c index 7847d6cadb..be60d95bf6 100644 --- a/pkg/vere/mars.c +++ b/pkg/vere/mars.c @@ -18,6 +18,83 @@ c3_c tac_c[256]; // tracing label +/* _mars_lease_pq: min-heap of u3v_lease*, keyed by lea_u->exp_d. +** +** C-heap structure (not in loom). Leases are owned by the PQ — +** it is the sole place that c3_free()s them. Committed leases are +** marked dead_o=c3y via the rev_p reverse index; the sweeper pops +** and frees them when they bubble to the top. +*/ +typedef struct _mars_lease_pq { + u3v_lease** arr_u; + c3_z len_z; + c3_z cap_z; +} _mars_lease_pq; + +static _mars_lease_pq _mars_pq = { 0, 0, 0 }; + +static inline void +_mars_pq_swap(_mars_lease_pq* pq_u, c3_z i_z, c3_z j_z) +{ + u3v_lease* t_u = pq_u->arr_u[i_z]; + pq_u->arr_u[i_z] = pq_u->arr_u[j_z]; + pq_u->arr_u[j_z] = t_u; +} + +static void +_mars_pq_up(_mars_lease_pq* pq_u, c3_z i_z) +{ + while ( i_z > 0 ) { + c3_z p_z = (i_z - 1) >> 1; + if ( pq_u->arr_u[p_z]->exp_d <= pq_u->arr_u[i_z]->exp_d ) break; + _mars_pq_swap(pq_u, p_z, i_z); + i_z = p_z; + } +} + +static void +_mars_pq_down(_mars_lease_pq* pq_u, c3_z i_z) +{ + c3_z n_z = pq_u->len_z; + while ( 1 ) { + c3_z l_z = (i_z << 1) + 1; + c3_z r_z = l_z + 1; + c3_z s_z = i_z; + if ( l_z < n_z && pq_u->arr_u[l_z]->exp_d < pq_u->arr_u[s_z]->exp_d ) s_z = l_z; + if ( r_z < n_z && pq_u->arr_u[r_z]->exp_d < pq_u->arr_u[s_z]->exp_d ) s_z = r_z; + if ( s_z == i_z ) break; + _mars_pq_swap(pq_u, i_z, s_z); + i_z = s_z; + } +} + +static void +_mars_pq_push(_mars_lease_pq* pq_u, u3v_lease* lea_u) +{ + if ( pq_u->len_z == pq_u->cap_z ) { + pq_u->cap_z = pq_u->cap_z ? (pq_u->cap_z << 1) : 16; + pq_u->arr_u = c3_realloc(pq_u->arr_u, pq_u->cap_z * sizeof(*pq_u->arr_u)); + } + pq_u->arr_u[pq_u->len_z++] = lea_u; + _mars_pq_up(pq_u, pq_u->len_z - 1); +} + +static u3v_lease* +_mars_pq_peek(_mars_lease_pq* pq_u) +{ + return pq_u->len_z ? pq_u->arr_u[0] : 0; +} + +static u3v_lease* +_mars_pq_pop(_mars_lease_pq* pq_u) +{ + if ( !pq_u->len_z ) return 0; + u3v_lease* r_u = pq_u->arr_u[0]; + pq_u->arr_u[0] = pq_u->arr_u[--pq_u->len_z]; + if ( pq_u->len_z ) _mars_pq_down(pq_u, 0); + return r_u; +} + /* :: peek=[gang (each path $%([%once @tas @tas path] [%beam @tas beam]))] :: ovum=ovum @@ -306,37 +383,34 @@ _mars_fact(u3_mars* mar_u, fprintf(blt_f, "%" PRIc3_h " %" PRIc3_w "\n", mug_h, seq_w); } - // free the lease (if any) for this blob - // walk res_p looking for a matching mug+seq - // - // XX: linear scan over res_p; replace with reverse index if needed + // mark the lease (if any) for this blob as dead via reverse index. + // The lease struct is freed later by the expiry sweeper when it + // bubbles to the top of the PQ. res_p and rev_p entries are + // removed here. // - u3_noun rkey = 0; { - u3p(u3h_root) res_p = u3H->ban_u.res_p; - // u3h_walk is not available; iterate nxt_d ids - // (leases are identified by res_d; we need to find the one for this blob) - // Use a simple approach: remember lease ptr stored as c3_d value - // We don't have a reverse index, so scan all res_d from 0..nxt_d-1. - // - for ( c3_d rid_d = 0; rid_d < u3H->ban_u.nxt_d; rid_d++ ) { - u3_noun k = u3i_chub(rid_d); - u3_weak v = u3h_get(res_p, k); - if ( u3_none != v ) { + u3_noun revkey = u3i_chub(acc.ids[i_z]); + u3_weak rv = u3h_get(u3H->ban_u.rev_p, revkey); + if ( u3_none != rv ) { + c3_d res_d = 0; + u3r_safe_chub(rv, &res_d); + + u3_noun rkey = u3i_chub(res_d); + u3_weak lv = u3h_get(u3H->ban_u.res_p, rkey); + if ( u3_none != lv ) { c3_d ptr_d = 0; - u3r_safe_chub(v, &ptr_d); + u3r_safe_chub(lv, &ptr_d); u3v_lease* lea_u = (u3v_lease*)(uintptr_t)ptr_d; - if ( lea_u && lea_u->mug_h == mug_h && lea_u->seq_w == seq_w ) { - rkey = k; - c3_free(lea_u); - u3h_del(res_p, k); - u3z(k); - break; + if ( lea_u ) { + lea_u->dead_o = c3y; } + u3h_del(u3H->ban_u.res_p, rkey); } - u3z(k); + u3z(rkey); + + u3h_del(u3H->ban_u.rev_p, revkey); } - (void)rkey; + u3z(revkey); } } @@ -676,44 +750,69 @@ _mars_work(u3_mars* mar_u, u3_noun jar) u3_noun tag, dat, pro; // lease expiry sweeper: free leases for blobs that were never committed. - // If the blob has no event-log refs (blb_p == 0), delete the file. + // Uses a min-heap PQ keyed by exp_d: peek at the root, stop once the + // earliest-expiring lease is still in the future. + // + // Committed leases are marked dead_o=c3y by _mars_fact (via rev_p) and + // left in the PQ; they are freed here when they bubble to the top. // { struct timeval tv_u; gettimeofday(&tv_u, 0); c3_d now_d = (c3_d)tv_u.tv_sec * 1000ULL + (c3_d)tv_u.tv_usec / 1000ULL; - for ( c3_d rid_d = 0; rid_d < u3H->ban_u.nxt_d; rid_d++ ) { - u3_noun k = u3i_chub(rid_d); - u3_weak v = u3h_get(u3H->ban_u.res_p, k); - if ( u3_none != v ) { - c3_d ptr_d = 0; - u3r_safe_chub(v, &ptr_d); - u3v_lease* lea_u = (u3v_lease*)(uintptr_t)ptr_d; - if ( lea_u && lea_u->exp_d && now_d > lea_u->exp_d ) { - // lease expired — check if blob has any event-log refs - // - c3_d bid_d = ((c3_d)lea_u->mug_h << 32) | (c3_d)lea_u->seq_w; - u3_noun bk = u3i_chub(bid_d); - u3_weak bv = u3h_get(u3H->ban_u.blb_p, bk); - c3_w ref_w = 0; - if ( u3_none != bv ) { - u3r_safe_word(bv, &ref_w); - } - u3z(bk); + while ( 1 ) { + u3v_lease* top_u = _mars_pq_peek(&_mars_pq); + if ( !top_u ) break; - if ( 0 == ref_w ) { - // no event-log refs — blob was never committed; delete it - // - u3_blob_delete(u3C.dir_c, lea_u->mug_h, lea_u->seq_w); - fprintf(stderr, "mars: blob: expired lease, deleted %" PRIc3_h - "/%" PRIc3_w "\r\n", lea_u->mug_h, lea_u->seq_w); - } - c3_free(lea_u); - u3h_del(u3H->ban_u.res_p, k); - } + // dead lease (already committed) — free and continue scanning + // + if ( c3y == top_u->dead_o ) { + _mars_pq_pop(&_mars_pq); + c3_free(top_u); + continue; + } + + // earliest expiry is still in the future — stop + // + if ( !top_u->exp_d || now_d <= top_u->exp_d ) { + break; + } + + // expired lease — check if blob has any event-log refs + // + _mars_pq_pop(&_mars_pq); + + c3_d bid_d = ((c3_d)top_u->mug_h << 32) | (c3_d)top_u->seq_w; + u3_noun bk = u3i_chub(bid_d); + u3_weak bv = u3h_get(u3H->ban_u.blb_p, bk); + c3_w ref_w = 0; + if ( u3_none != bv ) { + u3r_safe_word(bv, &ref_w); } - u3z(k); + u3z(bk); + + if ( 0 == ref_w ) { + // no event-log refs — blob was never committed; delete it + // + u3_blob_delete(u3C.dir_c, top_u->mug_h, top_u->seq_w); + fprintf(stderr, "mars: blob: expired lease, deleted %" PRIc3_h + "/%" PRIc3_w "\r\n", top_u->mug_h, top_u->seq_w); + } + + // remove from res_p and rev_p + // + { + u3_noun rkey = u3i_chub(top_u->res_d); + u3h_del(u3H->ban_u.res_p, rkey); + u3z(rkey); + + u3_noun revkey = u3i_chub(bid_d); + u3h_del(u3H->ban_u.rev_p, revkey); + u3z(revkey); + } + + c3_free(top_u); } } @@ -900,9 +999,10 @@ _mars_work(u3_mars* mar_u, u3_noun jar) // c3_d res_d = u3H->ban_u.nxt_d++; u3v_lease* lea_u = c3_malloc(sizeof(*lea_u)); - lea_u->res_d = res_d; - lea_u->mug_h = mug_h; - lea_u->seq_w = seq_w; + lea_u->res_d = res_d; + lea_u->mug_h = mug_h; + lea_u->seq_w = seq_w; + lea_u->dead_o = c3n; // TTL: 5 minutes from now (Unix ms) { struct timeval tv_u; @@ -913,9 +1013,21 @@ _mars_work(u3_mars* mar_u, u3_noun jar) } snprintf(lea_u->stg_c, sizeof(lea_u->stg_c), "%s", stg_c); + // record: res_d -> lease ptr (for commit-time dead-mark and sweep) + // u3_noun rkey = u3i_chub(res_d); u3h_put(u3H->ban_u.res_p, rkey, u3i_chub((c3_d)(uintptr_t)lea_u)); u3z(rkey); + + // record: blob_id -> res_d (reverse index for O(1) commit lookup) + // + u3_noun revkey = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_w); + u3h_put(u3H->ban_u.rev_p, revkey, u3i_chub(res_d)); + u3z(revkey); + + // push onto expiry PQ (ownership: PQ frees on pop) + // + _mars_pq_push(&_mars_pq, lea_u); } } else { From 2137f0d6ba575be21156a0c0c6eab55be2464cb2 Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Tue, 14 Apr 2026 09:42:09 -0500 Subject: [PATCH 08/31] wip: blob 8 --- build.zig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/build.zig b/build.zig index 33ce5afc71..b87fed0fdc 100644 --- a/build.zig +++ b/build.zig @@ -742,6 +742,11 @@ fn buildBinary( .file = "pkg/vere/newt_tests.c", .deps = vere_test_deps, }, + .{ + .name = "blob-test", + .file = "pkg/vere/blob_tests.c", + .deps = vere_test_deps, + }, .{ .name = "vere-noun-test", .file = "pkg/vere/noun_tests.c", From 77c91e05afd0d55a32a1849d54308010eb166bea Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Tue, 14 Apr 2026 10:20:30 -0500 Subject: [PATCH 09/31] wip: blob 9 --- pkg/vere/blob.c | 6 +++++- pkg/vere/io/conn.c | 6 +++++- pkg/vere/io/lick.c | 6 +++++- pkg/vere/lord.c | 22 ++++++++++------------ pkg/vere/mars.c | 29 ++++++++++------------------- pkg/vere/mars.h | 8 ++++++-- pkg/vere/newt.c | 3 ++- pkg/vere/vere.h | 6 +++++- 8 files changed, 48 insertions(+), 38 deletions(-) diff --git a/pkg/vere/blob.c b/pkg/vere/blob.c index f931380ffa..943197d017 100644 --- a/pkg/vere/blob.c +++ b/pkg/vere/blob.c @@ -407,8 +407,12 @@ u3_blob_load(const c3_c* pax_c, c3_h mug_h, c3_w seq_w) // use u3i_slab (c3_d length) to correctly handle blobs >4 GiB. // bloq 3 = bytes; len_d = byte count. // + // NB: use u3i_slab_init (not u3i_slab_bare) so the trailing bytes of + // the last loom word are zeroed when len_d isn't word-aligned. + // Otherwise u3r_met/u3r_word/etc. would read garbage from those bytes. + // u3i_slab sab_u; - u3i_slab_bare(&sab_u, 3, len_d); + u3i_slab_init(&sab_u, 3, len_d); memcpy(sab_u.buf_y, map_v, (size_t)len_d); munmap(map_v, (size_t)len_d); diff --git a/pkg/vere/io/conn.c b/pkg/vere/io/conn.c index c032d90004..6f2b9abc95 100644 --- a/pkg/vere/io/conn.c +++ b/pkg/vere/io/conn.c @@ -587,10 +587,14 @@ _conn_read_peel(u3_conn* con_u, u3_noun dat) } /* _conn_moor_poke(): called on message read from u3_moor. +** +** conn only speaks the legacy jam (0x00) wire format; the version byte +** is ignored here. */ static c3_o -_conn_moor_poke(void* ptr_v, c3_d len_d, c3_y* byt_y) +_conn_moor_poke(void* ptr_v, c3_y ver_y, c3_d len_d, c3_y* byt_y) { + (void)ver_y; u3_weak jar; u3_noun can, rid, tag, dat, rud = u3_nul, tar, wir, cad; u3_chan* can_u = (u3_chan*)ptr_v; diff --git a/pkg/vere/io/lick.c b/pkg/vere/io/lick.c index 87e0228294..b4d02aa0e5 100644 --- a/pkg/vere/io/lick.c +++ b/pkg/vere/io/lick.c @@ -156,10 +156,14 @@ _lick_close_cb(uv_handle_t* had_u) } /* _lick_moor_poke(): called on message read from u3_moor. +** +** lick only speaks the legacy jam (0x00) wire format; the version byte +** is ignored here. */ static c3_o -_lick_moor_poke(void* ptr_v, c3_d len_d, c3_y* byt_y) +_lick_moor_poke(void* ptr_v, c3_y ver_y, c3_d len_d, c3_y* byt_y) { + (void)ver_y; u3_weak put; u3_noun dev, nam, dat, wir, cad; diff --git a/pkg/vere/lord.c b/pkg/vere/lord.c index 0a29a3271b..be81f4ad98 100644 --- a/pkg/vere/lord.c +++ b/pkg/vere/lord.c @@ -557,7 +557,7 @@ _lord_plea_work(u3_lord* god_u, u3_noun dat) /* _lord_on_plea(): handle plea from serf. */ static c3_o -_lord_on_plea(void* ptr_v, c3_d len_d, c3_y* byt_y) +_lord_on_plea(void* ptr_v, c3_y ver_y, c3_d len_d, c3_y* byt_y) { u3_lord* god_u = ptr_v; u3_noun tag, dat; @@ -572,12 +572,11 @@ _lord_on_plea(void* ptr_v, c3_d len_d, c3_y* byt_y) u3t_event_trace("king ipc cue", 'B'); #endif - // decode incoming message: try ram first, fall back to jam + // pick decoder by protocol version (0x01 = ram, 0x00 = jam) // - jar = u3s_tap_xeno(len_d, byt_y); - if ( u3_none == jar ) { - jar = u3s_cue_xeno_with(god_u->sil_u, len_d, byt_y); - } + jar = ( 0x01 == ver_y ) + ? u3s_tap_xeno(len_d, byt_y) + : u3s_cue_xeno_with(god_u->sil_u, len_d, byt_y); #ifdef LORD_TRACE_CUE u3t_event_trace("king ipc cue", 'E'); @@ -1281,16 +1280,15 @@ _lord_on_serf_boot_bail(void* ptr_v, /* _lord_on_plea_boot(): handle plea from serf. */ static c3_o -_lord_on_plea_boot(void* ptr_v, c3_d len_d, c3_y* byt_y) +_lord_on_plea_boot(void* ptr_v, c3_y ver_y, c3_d len_d, c3_y* byt_y) { _lord_boot* bot_u = ptr_v; - // decode incoming message: try ram first, fall back to jam + // pick decoder by protocol version (0x01 = ram, 0x00 = jam) // - u3_weak jar = u3s_tap_xeno(len_d, byt_y); - if ( u3_none == jar ) { - jar = u3s_cue_xeno_with(bot_u->sil_u, len_d, byt_y); - } + u3_weak jar = ( 0x01 == ver_y ) + ? u3s_tap_xeno(len_d, byt_y) + : u3s_cue_xeno_with(bot_u->sil_u, len_d, byt_y); u3_noun tag, dat; if ( u3_none == jar ) { diff --git a/pkg/vere/mars.c b/pkg/vere/mars.c index be60d95bf6..abe79d99f1 100644 --- a/pkg/vere/mars.c +++ b/pkg/vere/mars.c @@ -1218,7 +1218,7 @@ _mars_step_trace(const c3_c* dir_c) /* u3_mars_kick(): maybe perform a task. */ c3_o -u3_mars_kick(void* ram_u, c3_d len_d, c3_y* hun_y) +u3_mars_kick(void* ram_u, c3_y ver_y, c3_d len_d, c3_y* hun_y) { u3_mars* mar_u = ram_u; c3_o ret_o = c3n; @@ -1228,12 +1228,11 @@ u3_mars_kick(void* ram_u, c3_d len_d, c3_y* hun_y) // XX optimize for stateless tasks w/ peek-next // if ( u3_mars_work_e == mar_u->sat_e ) { - // decode incoming message: try ram first, fall back to jam + // pick decoder by protocol version (0x01 = ram, 0x00 = jam) // - u3_weak jar = u3s_tap_xeno(len_d, hun_y); - if ( u3_none == jar ) { - jar = u3s_cue_xeno_with(mar_u->sil_u, len_d, hun_y); - } + u3_weak jar = ( 0x01 == ver_y ) + ? u3s_tap_xeno(len_d, hun_y) + : u3s_cue_xeno_with(mar_u->sil_u, len_d, hun_y); // parse errors are fatal // @@ -2231,7 +2230,7 @@ u3_mars_make(u3_mars* mar_u) * */ c3_o -u3_mars_boot(u3_mars* mar_u, c3_d len_d, c3_y* hun_y) +u3_mars_boot(u3_mars* mar_u, c3_y ver_y, c3_d len_d, c3_y* hun_y) { u3_disk* log_u = mar_u->log_u; u3_boot_opts inp_u; @@ -2256,19 +2255,11 @@ u3_mars_boot(u3_mars* mar_u, c3_d len_d, c3_y* hun_y) } { - // decode boot message: try ram first, fall back to jam + // pick decoder by protocol version (0x01 = ram, 0x00 = jam) // - u3_weak jar = u3s_tap_xeno(len_d, hun_y); - if ( u3_none == jar ) { - fprintf(stderr, "boot: tap failed (len=%" PRIu64 " hdr=%02x%02x%02x%02x%02x), trying cue\r\n", - len_d, - (len_d > 0) ? hun_y[0] : 0, - (len_d > 1) ? hun_y[1] : 0, - (len_d > 2) ? hun_y[2] : 0, - (len_d > 3) ? hun_y[3] : 0, - (len_d > 4) ? hun_y[4] : 0); - jar = u3s_cue_xeno(len_d, hun_y); - } + u3_weak jar = ( 0x01 == ver_y ) + ? u3s_tap_xeno(len_d, hun_y) + : u3s_cue_xeno(len_d, hun_y); if ( (u3_none == jar) || (c3n == u3r_p(jar, c3__boot, &com)) ) { diff --git a/pkg/vere/mars.h b/pkg/vere/mars.h index 91ff6a4ec0..eb3e664042 100644 --- a/pkg/vere/mars.h +++ b/pkg/vere/mars.h @@ -62,9 +62,11 @@ u3_mars_make(u3_mars* mar_u); /* u3_mars_boot(): boot a new ship. + ** + ** ver_y is the newt protocol version byte (0x00=jam, 0x01=ram). */ c3_o - u3_mars_boot(u3_mars* mar_u, c3_d len_d, c3_y* hun_y); + u3_mars_boot(u3_mars* mar_u, c3_y ver_y, c3_d len_d, c3_y* hun_y); /* u3_mars_load(): load pier. */ @@ -77,9 +79,11 @@ u3_mars_work(u3_mars* mar_u); /* u3_mars_kick(): try to send a task into mars. + ** + ** ver_y is the newt protocol version byte (0x00=jam, 0x01=ram). */ c3_o - u3_mars_kick(void* ram_u, c3_d len_d, c3_y* hun_y); + u3_mars_kick(void* ram_u, c3_y ver_y, c3_d len_d, c3_y* hun_y); /* u3_mars_grab(): garbage collect. */ diff --git a/pkg/vere/newt.c b/pkg/vere/newt.c index 4a2ad57eb6..fda0c701e3 100644 --- a/pkg/vere/newt.c +++ b/pkg/vere/newt.c @@ -67,7 +67,8 @@ _newt_meat_poke(u3_moat* mot_u) if ( met_u ) { uv_timer_start(&mot_u->tim_u, _newt_meat_next_cb, 0, 0); - if ( c3y == mot_u->pok_f(mot_u->ptr_v, met_u->len_d, met_u->hun_y) ) { + if ( c3y == mot_u->pok_f(mot_u->ptr_v, met_u->ver_y, + met_u->len_d, met_u->hun_y) ) { mot_u->ext_u = met_u->nex_u; if ( !mot_u->ext_u ) { diff --git a/pkg/vere/vere.h b/pkg/vere/vere.h index 5f4dbfd693..e151424464 100644 --- a/pkg/vere/vere.h +++ b/pkg/vere/vere.h @@ -51,8 +51,12 @@ } u3_lane; /* u3_moor_poke: poke callback function. + ** + ** ver_y is the newt protocol version byte from the message header + ** (0x00 = jam, 0x01 = ram). Handlers that decode the payload use it + ** to pick the correct deserializer; raw-bytes handlers may ignore it. */ - typedef c3_o (*u3_moor_poke)(void*, c3_d, c3_y*); + typedef c3_o (*u3_moor_poke)(void*, c3_y ver_y, c3_d, c3_y*); /* u3_moor_bail: bailout callback function. */ From f6f1cef9daa8378bc03b8535c239e36261954d84 Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Tue, 14 Apr 2026 11:45:23 -0500 Subject: [PATCH 10/31] wip: blob 10 --- pkg/vere/newt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pkg/vere/newt.c b/pkg/vere/newt.c index fda0c701e3..5ad6def101 100644 --- a/pkg/vere/newt.c +++ b/pkg/vere/newt.c @@ -131,10 +131,14 @@ u3_newt_decode(u3_moat* mot_u, c3_y* buf_y, c3_d len_d) return c3n; } - // await body, stash version + // NB: hed_y points into mes_u->hed_u, which shares storage with + // mes_u->tal_u via a union. _newt_mess_tail writes tal_u.met_u, + // clobbering hed_y[0..4]. Capture the version byte BEFORE that + // write, then stash it into the allocated meat. // + c3_y ver_y = hed_y[0]; _newt_mess_tail(mes_u, met_d); - mes_u->tal_u.met_u->ver_y = hed_y[0]; + mes_u->tal_u.met_u->ver_y = ver_y; } } break; From b8c26693d4fd98f3edf9c07712944feb22c917c8 Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Wed, 15 Apr 2026 08:42:38 -0500 Subject: [PATCH 11/31] wip: blob 11 --- pkg/noun/vortex.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pkg/noun/vortex.h b/pkg/noun/vortex.h index 8fc65847dd..5c928166b9 100644 --- a/pkg/noun/vortex.h +++ b/pkg/noun/vortex.h @@ -55,13 +55,19 @@ /* u3v_home: all internal (within image) state. ** NB: version must first for ease of migration. + ** + ** ban_u sits at the end so pre-blob-storage V5 snapshots still + ** load cleanly: old binaries never wrote past their (smaller) + ** sizeof(u3v_home), so the bytes at ban_u's new position are + ** reliably zero (MAP_ANON origin, persisted in saved pages). + ** _find_home's lazy-init turns those zeros into empty HAMTs. */ typedef struct _u3v_home { u3v_version ver_d; // version number c3_d pam_d; // parameters u3v_arvo arv_u; // arvo state - u3v_bank ban_u; // blob bank u3a_road rod_u; // storage state + u3v_bank ban_u; // blob bank (NB: must stay last) } u3v_home; From 816d0584c3fdb2cfb188c26710663f274bc01768 Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Wed, 15 Apr 2026 12:13:14 -0500 Subject: [PATCH 12/31] wip: blob 12 --- pkg/noun/jets/c/cut.c | 53 +++++++++- pkg/noun/retrieve.c | 7 ++ pkg/vere/io/cttp.c | 9 ++ pkg/vere/io/http.c | 224 +++++++++++++++++++++++++++++++++++++----- pkg/vere/vere.h | 17 ++++ 5 files changed, 286 insertions(+), 24 deletions(-) diff --git a/pkg/noun/jets/c/cut.c b/pkg/noun/jets/c/cut.c index dc118a5b17..a189f55d5e 100644 --- a/pkg/noun/jets/c/cut.c +++ b/pkg/noun/jets/c/cut.c @@ -5,6 +5,7 @@ #include "noun.h" +#include u3_noun u3qc_cut(u3_atom a, @@ -36,7 +37,57 @@ if ( (b_w == 0) && (c_w == len_w) ) { return u3k(d); } - else { + + // bob-aware fast path for byte-aligned cuts: mmap the blob and + // memcpy the requested byte range directly into the slab. No + // full-blob materialization (u3r_chop on a bob atom would call + // u3r_blob_load, which allocates and copies the entire file into + // the loom). + // + // We require bloq >= 3 so the cut range is a whole number of + // bytes; bit-level cuts (a_g < 3) are rare and fall through to + // the generic path below. + // + if ( (a_g >= 3) && (c3y == u3a_is_bob(d)) ) { + c3_d map_d = 0; + const c3_y* map_y = u3r_blob_map(d, &map_d); + + if ( map_y ) { + c3_g shf_g = a_g - 3; // bloq -> byte shift + c3_d off_d = (c3_d)b_w << shf_g; // byte offset in blob + c3_d byt_d = (c3_d)c_w << shf_g; // bytes to copy + + // clamp against actual file size. len_w (from u3r_met) + // reflects the atom's significant-bit length with trailing + // zeros stripped; the on-disk file may be a bit shorter + // than implied by the bloq count if the tail-word is + // partially significant. read only what's in the file; + // u3i_slab_init already zero-initialized the slab so any + // bytes past EOF remain as implicit zeros. + // + c3_d cpy_d = byt_d; + if ( off_d >= map_d ) { + cpy_d = 0; + } + else if ( off_d + cpy_d > map_d ) { + cpy_d = map_d - off_d; + } + + u3i_slab sab_u; + u3i_slab_init(&sab_u, a_g, c_w); + + if ( cpy_d ) { + memcpy(sab_u.buf_y, map_y + off_d, (size_t)cpy_d); + } + + u3r_blob_unmap(map_y, map_d); + return u3i_slab_mint(&sab_u); + } + // mmap failed (missing blob file) — fall through to u3r_chop, + // which will silently return zero via u3r_blob_load → u3_none. + } + + { u3i_slab sab_u; u3i_slab_init(&sab_u, a_g, c_w); diff --git a/pkg/noun/retrieve.c b/pkg/noun/retrieve.c index 8a48212ae4..c028cad3ff 100644 --- a/pkg/noun/retrieve.c +++ b/pkg/noun/retrieve.c @@ -2467,6 +2467,13 @@ u3r_blob_map(u3_atom a, c3_d* len_d) return 0; } + // mirror u3_blob_load / u3_blob_save_fd: we read these forward and + // rarely twice. MADV_SEQUENTIAL lets the kernel page ahead and drop + // pages we've already passed, keeping the HTTP-streaming page cache + // bounded regardless of file size. + // + madvise(map_v, (size_t)*len_d, MADV_SEQUENTIAL); + return (const c3_y*)map_v; } diff --git a/pkg/vere/io/cttp.c b/pkg/vere/io/cttp.c index 9d219d929b..f3945a8b4a 100644 --- a/pkg/vere/io/cttp.c +++ b/pkg/vere/io/cttp.c @@ -93,6 +93,9 @@ _cttp_bod_new(c3_w len_w, c3_c* hun_c) u3_hbod* bod_u = c3_malloc(1 + len_w + sizeof(*bod_u)); bod_u->hun_y[len_w] = 0; bod_u->len_w = len_w; + bod_u->map_y = 0; + bod_u->own_y = 0; + bod_u->map_d = 0; memcpy(bod_u->hun_y, (const c3_y*)hun_c, len_w); bod_u->nex_u = 0; @@ -114,6 +117,9 @@ _cttp_bod_from_hed(u3_hhed* hed_u) memcpy(bod_u->hun_y + hed_u->nam_w + 2 + hed_u->val_w, "\r\n", 2); bod_u->len_w = len_w; + bod_u->map_y = 0; + bod_u->own_y = 0; + bod_u->map_d = 0; bod_u->nex_u = 0; return bod_u; @@ -170,6 +176,9 @@ _cttp_bod_from_octs(u3_noun oct) u3_hbod* bod_u = c3_malloc(1 + len_w + sizeof(*bod_u)); bod_u->hun_y[len_w] = 0; bod_u->len_w = len_w; + bod_u->map_y = 0; + bod_u->own_y = 0; + bod_u->map_d = 0; u3r_bytes(0, len_w, bod_u->hun_y, u3t(oct)); bod_u->nex_u = 0; diff --git a/pkg/vere/io/http.c b/pkg/vere/io/http.c index b7f56697e8..37a4567c7d 100644 --- a/pkg/vere/io/http.c +++ b/pkg/vere/io/http.c @@ -2,6 +2,7 @@ #include "vere.h" +#include "blob.h" #include "h2o.h" #include "noun.h" #include "openssl/err.h" @@ -168,6 +169,15 @@ _http_vec_to_atom(h2o_iovec_t vec_u) } /* _http_vec_to_octs(): convert h2o_iovec_t to (unit octs) +** +** Bodies >= U3_BLOB_THRESH are persisted to the blob store and +** handed to arvo as a bob-atom reference instead of being copied +** into the loom. h2o has already buffered the whole entity in +** memory (capped by fig_u.max_request_entity_size), so this +** inline save is fine — same model as mesa.c for large packets. +** +** If the blob save fails (disk full, permissions, etc.) fall +** back to a regular indirect atom. */ static u3_noun _http_vec_to_octs(h2o_iovec_t vec_u) @@ -176,12 +186,32 @@ _http_vec_to_octs(h2o_iovec_t vec_u) return u3_nul; } + u3_atom bod = u3_none; + + if ( (c3_d)vec_u.len >= U3_BLOB_THRESH ) { + c3_h mug_h; + c3_w seq_w; + if ( c3y == u3_blob_save(u3C.dir_c, (const c3_y*)vec_u.base, + (c3_d)vec_u.len, &mug_h, &seq_w) ) + { + bod = u3i_blob(mug_h, seq_w); + } + } + if ( u3_none == bod ) { + bod = _http_vec_to_atom(vec_u); + } + // XX correct size_t -> atom? - return u3nt(u3_nul, u3i_chubs(1, (const c3_d*)&vec_u.len), - _http_vec_to_atom(vec_u)); + return u3nt(u3_nul, u3i_chubs(1, (const c3_d*)&vec_u.len), bod); } /* _cttp_bods_free(): free body structure. +** +** Ownership rule for mmap-backed chains (see u3_hbod in vere.h): +** the owner chunk (tail in bob chains) carries own_y != 0 and +** map_d != 0; earlier view chunks carry map_y != 0 but own_y == 0. +** Free walks head→tail, so the owner is released only after every +** view that referenced its mapping. */ static void _cttp_bods_free(u3_hbod* bod_u) @@ -189,12 +219,94 @@ _cttp_bods_free(u3_hbod* bod_u) while ( bod_u ) { u3_hbod* nex_u = bod_u->nex_u; + if ( bod_u->own_y ) { + // owner: release the whole mapping + u3_blob_unmap(bod_u->own_y, bod_u->map_d); + } + else if ( bod_u->map_y ) { + // view: hint kernel to drop page-cache pages we've already sent + // + madvise(bod_u->map_y, bod_u->len_w, MADV_DONTNEED); + } c3_free(bod_u); bod_u = nex_u; } } +// chunk size for mmap-backed response streaming. Each h2o_send call +// covers up to this many bytes so TLS output pools / HTTP/2 frame +// buffers / libuv write queues stay bounded and flow control drives +// the next fetch from the mmap. +// +#define U3_HTTP_BOB_CHUNK (1U << 20) + +/* _cttp_bod_from_bob(): wrap a bob atom's blob file in a chain of +** mmap-backed hbods — no copy, no loom allocation. +** +** The blob is mmap'd once; the chain slices the mapping into +** U3_HTTP_BOB_CHUNK-sized views. Ownership of the mapping is +** carried by the tail chunk (own_y = mmap base, map_d = map size); +** earlier chunks are pure views (own_y = 0). +** +** Returns 0 if the blob can't be mapped (missing file, empty, etc.); +** caller should fall back to the inline path. +*/ +static u3_hbod* +_cttp_bod_from_bob(u3_atom a, c3_w len_w) +{ + c3_h mug_h = u3a_bob_mug(a); + c3_w seq_w = u3a_bob_seq(a); + c3_d map_d = 0; + + const c3_y* map_y = u3_blob_map(u3C.dir_c, mug_h, seq_w, &map_d); + if ( !map_y || (c3_d)len_w > map_d ) { + if ( map_y ) { + u3_blob_unmap(map_y, map_d); + } + return 0; + } + + u3_hbod* hed_u = 0; + u3_hbod* tal_u = 0; + c3_w off_w = 0; + + while ( off_w < len_w ) { + c3_w rem_w = len_w - off_w; + c3_w cnk_w = (rem_w > U3_HTTP_BOB_CHUNK) ? U3_HTTP_BOB_CHUNK : rem_w; + + u3_hbod* bod_u = c3_malloc(sizeof(*bod_u)); + bod_u->nex_u = 0; + bod_u->len_w = cnk_w; + bod_u->map_y = (c3_y*)map_y + off_w; // iovec base (this slice) + bod_u->own_y = 0; // view, not owner + bod_u->map_d = 0; + + if ( !hed_u ) hed_u = bod_u; + else tal_u->nex_u = bod_u; + tal_u = bod_u; + + off_w += cnk_w; + } + + // last chunk is promoted to owner. map_y still points at its own + // slice (for its iovec), while own_y + map_d cover the full mmap + // region so _cttp_bods_free can munmap the whole thing at once. + // + tal_u->own_y = (c3_y*)map_y; + tal_u->map_d = map_d; + return hed_u; +} + /* _cttp_bod_from_octs(): translate octet-stream noun into body. +** +** Bob atoms take the zero-copy path: we mmap the blob file on the +** king's side and hand h2o a pointer into the mapping. Without +** this, u3r_bytes would call u3r_blob_load, which allocates a +** full-blob-sized atom in king's loom — that's where you'd see +** the RSS of the king process spike to match file size. +** +** Smaller bodies (or bob atoms whose blob file is missing) fall +** through to the original materialize-into-heap path. */ static u3_hbod* _cttp_bod_from_octs(u3_noun oct) @@ -206,10 +318,26 @@ _cttp_bod_from_octs(u3_noun oct) } len_w = u3h(oct); + // zero-copy path for bob atoms + // + if ( c3y == u3a_is_bob(u3t(oct)) ) { + u3_hbod* bod_u = _cttp_bod_from_bob(u3t(oct), len_w); + if ( bod_u ) { + u3z(oct); + return bod_u; + } + // fall through: blob file missing; u3r_bytes below will bail + // through the u3r_blob_load → u3m_bail path. not ideal but + // matches pre-blob semantics. + } + { u3_hbod* bod_u = c3_malloc(1 + len_w + sizeof(*bod_u)); bod_u->hun_y[len_w] = 0; bod_u->len_w = len_w; + bod_u->map_y = 0; + bod_u->own_y = 0; + bod_u->map_d = 0; u3r_bytes(0, len_w, bod_u->hun_y, u3t(oct)); bod_u->nex_u = 0; @@ -241,7 +369,8 @@ _cttp_bods_to_vec(u3_hbod* bod_u, c3_w* tot_w) len_w = 0; while( bod_u ) { - vec_u[len_w] = h2o_iovec_init(bod_u->hun_y, bod_u->len_w); + c3_y* base_y = bod_u->map_y ? bod_u->map_y : bod_u->hun_y; + vec_u[len_w] = h2o_iovec_init(base_y, bod_u->len_w); len_w++; bod_u = bod_u->nex_u; } @@ -1268,49 +1397,98 @@ _http_hgen_dispose(void* ptr_v) } /* _http_hgen_send(): send (some/more of a) response. +** +** Pops up to U3_HTTP_BOB_CHUNK bytes of pending body from gen_u->bod_u +** and hands them to h2o in one h2o_send call. If any body remains +** after this pop, or if we're still waiting on a %continue from arvo +** (u3_hgen_wait), the send is marked IN_PROGRESS and h2o will call +** back via _http_hgen_proceed when it's ready for the next batch. +** FINAL fires only when there's nothing left to emit. +** +** Inline (small) bodies fit in a single chunk so this is a no-op +** change for them. Large bob-atom bodies chunk naturally because +** _cttp_bod_from_bob produced U3_HTTP_BOB_CHUNK-sized views — this +** is what keeps king's memory and h2o's internal buffers bounded +** while streaming multi-GiB files. */ static void _http_hgen_send(u3_hgen* gen_u) { - u3_hreq* req_u = gen_u->req_u; - h2o_req_t* rec_u = req_u->rec_u; - c3_w len_w; - h2o_iovec_t* vec_u = _cttp_bods_to_vec(gen_u->bod_u, &len_w); + u3_hreq* req_u = gen_u->req_u; + h2o_req_t* rec_u = req_u->rec_u; - // not ready again until _proceed - // u3_assert( c3y == gen_u->red ); gen_u->red = c3n; - // stash [bod_u] to free later + // detach up to U3_HTTP_BOB_CHUNK bytes of body from the head of + // gen_u->bod_u into a private sub-chain to send this round. + // + u3_hbod* send_u = 0; + u3_hbod* tal_u = 0; + c3_d acc_d = 0; + + while ( gen_u->bod_u && acc_d < U3_HTTP_BOB_CHUNK ) { + u3_hbod* cur_u = gen_u->bod_u; + gen_u->bod_u = cur_u->nex_u; + cur_u->nex_u = 0; + + if ( !send_u ) send_u = cur_u; + else tal_u->nex_u = cur_u; + tal_u = cur_u; + + acc_d += cur_u->len_w; + } + + // free the previous round's sent chunks; the current batch becomes + // the new "just sent" set. its memory must stay live until h2o is + // done with the iovecs we're about to hand it. // _cttp_bods_free(gen_u->nud_u); - gen_u->nud_u = gen_u->bod_u; - gen_u->bod_u = 0; + gen_u->nud_u = send_u; + + // build iovec for this batch + // + c3_w len_w = 0; + h2o_iovec_t* vec_u = _cttp_bods_to_vec(send_u, &len_w); + // pick h2o send state based on whether any body still pending + // + h2o_send_state_t ste_e; switch ( gen_u->sat_e ) { case u3_hgen_wait: { - h2o_send(rec_u, vec_u, len_w, H2O_SEND_STATE_IN_PROGRESS); - uv_timer_start(req_u->tim_u, _http_req_timer_cb, 45 * 1000, 0); + ste_e = H2O_SEND_STATE_IN_PROGRESS; } break; case u3_hgen_done: { - // close connection if shutdown pending - // - u3_h2o_serv* h2o_u = req_u->hon_u->htp_u->h2o_u; - - if ( 0 != h2o_u->ctx_u.shutdown_requested ) { - rec_u->http1_is_persistent = 0; + if ( gen_u->bod_u ) { + ste_e = H2O_SEND_STATE_IN_PROGRESS; + } + else { + // last batch — close connection if shutdown pending + // + u3_h2o_serv* h2o_u = req_u->hon_u->htp_u->h2o_u; + if ( 0 != h2o_u->ctx_u.shutdown_requested ) { + rec_u->http1_is_persistent = 0; + } + ste_e = H2O_SEND_STATE_FINAL; } - - h2o_send(rec_u, vec_u, len_w, H2O_SEND_STATE_FINAL); } break; case u3_hgen_fail: { - h2o_send(rec_u, vec_u, len_w, H2O_SEND_STATE_ERROR); + ste_e = H2O_SEND_STATE_ERROR; + } break; + + default: { + ste_e = H2O_SEND_STATE_ERROR; // unreachable } break; } + h2o_send(rec_u, vec_u, len_w, ste_e); + + if ( H2O_SEND_STATE_IN_PROGRESS == ste_e ) { + uv_timer_start(req_u->tim_u, _http_req_timer_cb, 45 * 1000, 0); + } + c3_free(vec_u); } diff --git a/pkg/vere/vere.h b/pkg/vere/vere.h index e151424464..060babfc2c 100644 --- a/pkg/vere/vere.h +++ b/pkg/vere/vere.h @@ -36,10 +36,27 @@ } u3_hhed; /* u3_hbod: http body block. Also used for responses. + ** + ** Three shapes: + ** (a) inline — payload in hun_y[0..len_w]; map_y == own_y == 0. + ** (b) mmap view — map_y points into a shared mmap at the chunk's + ** offset; own_y == 0 (this chunk does not own the + ** mapping). hun_y is unused. + ** (c) mmap owner — same as (b) for the iovec (map_y = base + off), + ** plus own_y = mmap base and map_d = mmap size + ** so _cttp_bods_free can munmap. + ** + ** Bob-streaming chains (see _cttp_bod_from_bob) are built as a + ** head→tail list of views with the owner as the tail chunk. + ** Head-first free then MADV_DONTNEED's every view before the + ** owner finally munmaps. */ typedef struct _u3_hbod { struct _u3_hbod* nex_u; c3_w len_w; + c3_y* map_y; // iovec base (mmap view) or NULL (inline) + c3_y* own_y; // mmap base to munmap (NULL if not owner) + c3_d map_d; // mmap size for munmap (0 if not owner) c3_y hun_y[0]; } u3_hbod; From cf3d58dc01a2bade31c70e43556d986cc0458463 Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Wed, 15 Apr 2026 12:32:43 -0500 Subject: [PATCH 13/31] wip: blob 13 --- pkg/noun/jets/c/muk.c | 54 +-- pkg/noun/jets/e/adler.c | 63 +-- pkg/noun/jets/e/blake.c | 83 ++-- pkg/noun/jets/e/crc.c | 21 +- pkg/noun/jets/e/keccak.c | 16 +- pkg/noun/jets/e/ripe.c | 19 +- pkg/noun/jets/e/sha1.c | 13 +- pkg/noun/jets/e/shax.c | 57 +-- pkg/noun/retrieve.c | 86 ++++ pkg/noun/retrieve.h | 43 ++ pkg/noun/retrieve_tests.c | 143 ++++++ pkg/vere/blob_tests.c | 924 ++++++++++++++++++++++++++++++++++++++ 12 files changed, 1345 insertions(+), 177 deletions(-) create mode 100644 pkg/vere/blob_tests.c diff --git a/pkg/noun/jets/c/muk.c b/pkg/noun/jets/c/muk.c index f30f743485..0b5a3f7dec 100644 --- a/pkg/noun/jets/c/muk.c +++ b/pkg/noun/jets/c/muk.c @@ -16,46 +16,30 @@ u3qc_muk(u3_atom sed, if ( len > u3a_32_direct_max ) { return u3m_bail(c3__fail); } - else { - c3_h len_h = (c3_h)len; - c3_h key_h = u3r_met(3, key); - // NB: this condition is implicit in the pad subtraction - // - if ( key_h > len_h ) { - return u3m_bail(c3__exit); - } - else { - c3_h sed_h = u3r_half(0, sed); - c3_o loc_o = c3n; - c3_y* key_y = 0; - c3_h out_h; + c3_h len_h = (c3_h)len; + c3_h key_h = u3r_met(3, key); - // if we're hashing more bytes than we have, allocate and copy - // to ensure trailing null bytes - // - if ( len_h > key_h ) { - loc_o = c3y; - key_y = u3a_calloc(sizeof(c3_y), len_h); - u3r_bytes(0, len_h, key_y, key); - } - else if ( len_h > 0 ) { - // XX assumes little-endian - // - key_y = ( c3y == u3a_is_cat(key) ) - ? (c3_y*)&key - : (c3_y*)((u3a_atom*)u3a_to_ptr(key))->buf_w; - } + // NB: this condition is implicit in the pad subtraction + // + if ( key_h > len_h ) { + return u3m_bail(c3__exit); + } - MurmurHash3_x86_32(key_y, len_h, sed_h, &out_h); + c3_h sed_h = u3r_half(0, sed); + c3_h out_h; - if ( c3y == loc_o ) { - u3a_free(key_y); - } + // u3r_view_padded gives us len_h bytes — mmap-backed for bobs + // (previously would have returned seq_w bytes via the direct + // buf_w pointer and produced wrong hashes), heap-backed with + // zero-padding for atoms shorter than len_h. + // + u3r_view vu_u; + u3r_view_padded(&vu_u, key, len_h); + MurmurHash3_x86_32((c3_y*)vu_u.byt_y, len_h, sed_h, &out_h); + u3r_view_done(&vu_u); - return u3i_halfs(1, &out_h); - } - } + return u3i_halfs(1, &out_h); } u3_noun diff --git a/pkg/noun/jets/e/adler.c b/pkg/noun/jets/e/adler.c index b91eb38270..541ae150ff 100644 --- a/pkg/noun/jets/e/adler.c +++ b/pkg/noun/jets/e/adler.c @@ -20,36 +20,6 @@ static void _x_octs(u3_noun octs, u3_atom* p_octs, u3_atom* q_octs) { } } -static c3_o _x_octs_buffer(u3_atom* p_octs, u3_atom *q_octs, - c3_w* p_octs_w, c3_y** buf_y, - c3_w* len_w, c3_w* lead_w) -{ - if (c3n == u3r_safe_word(*p_octs, p_octs_w)) { - return c3n; - } - - *len_w = u3r_met(3, *q_octs); - - if (c3y == u3a_is_cat(*q_octs)) { - *buf_y = (c3_y*)q_octs; - } - else { - u3a_atom* ptr_a = u3a_to_ptr(*q_octs); - *buf_y = (c3_y*)ptr_a->buf_w; - } - - *lead_w = 0; - - if (*p_octs_w > *len_w) { - *lead_w = *p_octs_w - *len_w; - } - else { - *len_w = *p_octs_w; - } - - return c3y; -} - #define BASE 65521 #define NMAX 5552 @@ -59,21 +29,32 @@ u3_noun _qe_adler32(u3_noun octs) _x_octs(octs, &p_octs, &q_octs); - c3_w p_octs_w, len_w, lead_w; - c3_y *buf_y; - - if (c3n == _x_octs_buffer(&p_octs, &q_octs, - &p_octs_w, &buf_y, - &len_w, &lead_w)) { + c3_w p_octs_w; + if (c3n == u3r_safe_word(p_octs, &p_octs_w)) { return u3_none; } - c3_w adler_w, sum2_w; + // zero-copy view of the atom's significant bytes (mmap for bob). + // NB: the legacy direct-pointer path through ptr_a->buf_w read + // seq_w for bob atoms, which silently produced wrong checksums; + // using u3r_view fixes that bug in addition to avoiding the + // full-blob materialization. + // + u3r_view vu_u; + u3r_view_init(&vu_u, q_octs); + const c3_y* buf_y = vu_u.byt_y; + c3_w len_w = vu_u.len_w; - adler_w = 0x1; - sum2_w = 0x0; + // clamp the bytes we'll actually scan to the declared width; the + // remainder is "leading zeros" and handled below. + // + if (p_octs_w < len_w) { + len_w = p_octs_w; + } - c3_w pos_w = 0; + c3_w adler_w = 0x1; + c3_w sum2_w = 0x0; + c3_w pos_w = 0; // Process all non-zero bytes // @@ -94,6 +75,8 @@ u3_noun _qe_adler32(u3_noun octs) sum2_w %= BASE; } + u3r_view_done(&vu_u); + // Process leading zeros // while (pos_w < p_octs_w) { diff --git a/pkg/noun/jets/e/blake.c b/pkg/noun/jets/e/blake.c index 42093307c0..63c8f3e4f1 100644 --- a/pkg/noun/jets/e/blake.c +++ b/pkg/noun/jets/e/blake.c @@ -16,24 +16,25 @@ // impossible to represent an atom this large return u3m_bail(c3__fail); } + + // the hoon adjusts these widths to its liking + int err; + c3_y out_y[64], dak_y[64]; + c3_w wik_w = c3_min(wik, 64), + out_w = c3_max(1, c3_min(out, 64)); + + u3r_view vu_u; + u3r_view_padded(&vu_u, dat, wid_w); + + u3r_bytes(0, wik_w, dak_y, dak); + err = urcrypt_blake2(wid_w, (c3_y*)vu_u.byt_y, wik_w, dak_y, out_w, out_y); + u3r_view_done(&vu_u); + + if ( 0 == err ) { + return u3i_bytes(out_w, out_y); + } else { - // the hoon adjusts these widths to its liking - int err; - c3_y out_y[64], dak_y[64]; - c3_w wik_w = c3_min(wik, 64), - out_w = c3_max(1, c3_min(out, 64)); - c3_y *dat_y = u3r_bytes_alloc(0, wid_w, dat); - - u3r_bytes(0, wik_w, dak_y, dak); - err = urcrypt_blake2(wid_w, dat_y, wik_w, dak_y, out_w, out_y); - u3a_free(dat_y); - - if ( 0 == err ) { - return u3i_bytes(out_w, out_y); - } - else { - return u3_none; - } + return u3_none; } } @@ -65,18 +66,20 @@ if ( !u3r_word_fit(&wid_w, wid) || !u3r_word_fit(&out_w, out) ) { return u3m_bail(c3__fail); } - else { - c3_y key_y[32]; - u3r_bytes(0, 32, key_y, key); - c3_y flags_y = u3r_byte(0, flags); - c3_y *dat_y = u3r_bytes_alloc(0, wid_w, dat); - u3i_slab sab_u; - u3i_slab_bare(&sab_u, 3, out_w); - c3_y* out_y = sab_u.buf_y; - urcrypt_blake3_hash(wid_w, dat_y, key_y, flags_y, out, out_y); - u3a_free(dat_y); - return u3i_slab_mint(&sab_u); - } + + c3_y key_y[32]; + u3r_bytes(0, 32, key_y, key); + c3_y flags_y = u3r_byte(0, flags); + + u3r_view vu_u; + u3r_view_padded(&vu_u, dat, wid_w); + + u3i_slab sab_u; + u3i_slab_bare(&sab_u, 3, out_w); + c3_y* out_y = sab_u.buf_y; + urcrypt_blake3_hash(wid_w, (c3_y*)vu_u.byt_y, key_y, flags_y, out, out_y); + u3r_view_done(&vu_u); + return u3i_slab_mint(&sab_u); } u3_noun @@ -105,16 +108,20 @@ c3_w wid_w; if ( !u3r_word_fit(&wid_w, wid) ) { return u3m_bail(c3__fail); - } else { - c3_y cv_y[32], block_y[64], block_len; - c3_y *dat_y = u3r_bytes_alloc(0, wid_w, dat); - c3_d counter_d = u3r_chub(0, counter); - c3_y flags_y = u3r_byte(0, flags); - u3r_bytes(0, 32, cv_y, cv); - urcrypt_blake3_chunk_output(wid_w, dat_y, cv_y, block_y, &block_len, &counter_d, &flags_y); - u3a_free(dat_y); - return u3i_cell(u3i_bytes(32, cv_y), u3i_qual(u3k(counter), u3i_bytes(64, block_y), block_len, flags_y)); } + + c3_y cv_y[32], block_y[64], block_len; + c3_d counter_d = u3r_chub(0, counter); + c3_y flags_y = u3r_byte(0, flags); + u3r_bytes(0, 32, cv_y, cv); + + u3r_view vu_u; + u3r_view_padded(&vu_u, dat, wid_w); + + urcrypt_blake3_chunk_output(wid_w, (c3_y*)vu_u.byt_y, cv_y, + block_y, &block_len, &counter_d, &flags_y); + u3r_view_done(&vu_u); + return u3i_cell(u3i_bytes(32, cv_y), u3i_qual(u3k(counter), u3i_bytes(64, block_y), block_len, flags_y)); } u3_noun diff --git a/pkg/noun/jets/e/crc.c b/pkg/noun/jets/e/crc.c index a405781afa..8e599af072 100644 --- a/pkg/noun/jets/e/crc.c +++ b/pkg/noun/jets/e/crc.c @@ -18,25 +18,22 @@ u3qe_crc32(u3_noun input_octs) if ( c3n == u3r_safe_word(head, &hed_w) ) { return u3m_bail(c3__fail); } - c3_y* input; - - if (c3y == u3a_is_cat(tail)) { - input = (c3_y*)&tail; - } - else { - u3a_atom* vat_u = u3a_to_ptr(tail); - // XX: little endian - input = (c3_y*)vat_u->buf_w; - } if ( tel_w > hed_w ) { return u3m_error("subtract-underflow"); } + // zero-copy view of the atom's significant bytes (mmap for bob). + // NB: the legacy vat_u->buf_w path returned seq_w for bob atoms; + // u3r_view gives the actual file bytes. + // + u3r_view vu_u; + u3r_view_init(&vu_u, tail); + c3_w led_w = hed_w - tel_w; - c3_w crc_w = 0; + c3_w crc_w = crc32(0, vu_u.byt_y, tel_w); - crc_w = crc32(crc_w, input, tel_w); + u3r_view_done(&vu_u); while ( led_w > 0 ) { c3_y byt_y = 0; diff --git a/pkg/noun/jets/e/keccak.c b/pkg/noun/jets/e/keccak.c index 3630c7291b..2f6e359570 100644 --- a/pkg/noun/jets/e/keccak.c +++ b/pkg/noun/jets/e/keccak.c @@ -11,16 +11,16 @@ _kecc_##bits(c3_w len_w, u3_atom a) \ { \ c3_y out[byts]; \ - c3_y* buf_y = u3r_bytes_alloc(0, len_w, a); \ - if ( 0 != urcrypt_keccak_##bits(buf_y, len_w, out) ) { \ - /* urcrypt_keccac_##bits always succeeds when called correctly */ \ + u3r_view vu_u; \ + u3r_view_padded(&vu_u, a, len_w); \ + if ( 0 != urcrypt_keccak_##bits((c3_y*)vu_u.byt_y, len_w, out) ) { \ + /* urcrypt_keccak_##bits always succeeds when called correctly */ \ + u3r_view_done(&vu_u); \ return u3m_bail(c3__oops); \ } \ - else { \ - u3_atom pro = u3i_bytes(byts, out); \ - u3a_free(buf_y); \ - return pro; \ - } \ + u3_atom pro = u3i_bytes(byts, out); \ + u3r_view_done(&vu_u); \ + return pro; \ } \ \ u3_weak \ diff --git a/pkg/noun/jets/e/ripe.c b/pkg/noun/jets/e/ripe.c index c57a18776d..66a75bcd22 100644 --- a/pkg/noun/jets/e/ripe.c +++ b/pkg/noun/jets/e/ripe.c @@ -13,18 +13,17 @@ if ( !u3r_word_fit(&len_w, wid) ) { return u3m_bail(c3__fail); } - else { - u3_atom ret; - c3_y out_y[20]; - c3_y *dat_y = u3r_bytes_alloc(0, len_w, dat); - ret = ( 0 == urcrypt_ripemd160(dat_y, len_w, out_y) ) - ? u3i_bytes(20, out_y) - : u3_none; + c3_y out_y[20]; + u3r_view vu_u; + u3r_view_padded(&vu_u, dat, len_w); - u3a_free(dat_y); - return ret; - } + u3_atom ret = ( 0 == urcrypt_ripemd160((c3_y*)vu_u.byt_y, len_w, out_y) ) + ? u3i_bytes(20, out_y) + : u3_none; + + u3r_view_done(&vu_u); + return ret; } u3_noun diff --git a/pkg/noun/jets/e/sha1.c b/pkg/noun/jets/e/sha1.c index 6eb7d5248b..4a3ffe21c9 100644 --- a/pkg/noun/jets/e/sha1.c +++ b/pkg/noun/jets/e/sha1.c @@ -13,14 +13,13 @@ if ( !u3r_word_fit(&len_w, wid) ) { return u3m_bail(c3__fail); } - else { - c3_y out_y[20]; - c3_y *dat_y = u3r_bytes_alloc(0, len_w, dat); - urcrypt_sha1(dat_y, len_w, out_y); - u3a_free(dat_y); - return u3i_bytes(20, out_y); - } + c3_y out_y[20]; + u3r_view vu_u; + u3r_view_padded(&vu_u, dat, len_w); + urcrypt_sha1((c3_y*)vu_u.byt_y, len_w, out_y); + u3r_view_done(&vu_u); + return u3i_bytes(20, out_y); } u3_noun diff --git a/pkg/noun/jets/e/shax.c b/pkg/noun/jets/e/shax.c index 6d7029b4ef..87b4f5fc7f 100644 --- a/pkg/noun/jets/e/shax.c +++ b/pkg/noun/jets/e/shax.c @@ -16,23 +16,23 @@ if ( !u3r_word_fit(&len_w, wid) ) { return u3m_bail(c3__fail); } - else { - c3_y out_y[32]; - c3_y* dat_y = u3r_bytes_alloc(0, len_w, dat); - urcrypt_shay(dat_y, len_w, out_y); - u3a_free(dat_y); - return u3i_bytes(32, out_y); - } + + c3_y out_y[32]; + u3r_view vu_u; + u3r_view_padded(&vu_u, dat, len_w); + urcrypt_shay((c3_y*)vu_u.byt_y, len_w, out_y); + u3r_view_done(&vu_u); + return u3i_bytes(32, out_y); } static u3_atom _cqe_shax(u3_atom a) { - c3_w len_w; - c3_y out_y[32]; - c3_y* dat_y = u3r_bytes_all(&len_w, a); - urcrypt_shay(dat_y, len_w, out_y); - u3a_free(dat_y); + c3_y out_y[32]; + u3r_view vu_u; + u3r_view_init(&vu_u, a); + urcrypt_shay((c3_y*)vu_u.byt_y, vu_u.len_w, out_y); + u3r_view_done(&vu_u); return u3i_bytes(32, out_y); } @@ -44,27 +44,30 @@ if ( !u3r_word_fit(&len_w, wid) ) { return u3m_bail(c3__fail); } - else { - c3_y out_y[64]; - c3_y* dat_y = u3r_bytes_alloc(0, len_w, dat); - urcrypt_shal(dat_y, len_w, out_y); - u3a_free(dat_y); - return u3i_bytes(64, out_y); - } + + c3_y out_y[64]; + u3r_view vu_u; + u3r_view_padded(&vu_u, dat, len_w); + urcrypt_shal((c3_y*)vu_u.byt_y, len_w, out_y); + u3r_view_done(&vu_u); + return u3i_bytes(64, out_y); } static u3_atom _cqe_shas(u3_atom sal, u3_atom ruz) { - c3_w sal_w, ruz_w; - c3_y *sal_y, *ruz_y, out_y[32]; - - sal_y = u3r_bytes_all(&sal_w, sal); - ruz_y = u3r_bytes_all(&ruz_w, ruz); - urcrypt_shas(sal_y, sal_w, ruz_y, ruz_w, out_y); - u3a_free(sal_y); - u3a_free(ruz_y); + c3_y out_y[32]; + + u3r_view sa_u, ru_u; + u3r_view_init(&sa_u, sal); + u3r_view_init(&ru_u, ruz); + + urcrypt_shas((c3_y*)sa_u.byt_y, sa_u.len_w, + (c3_y*)ru_u.byt_y, ru_u.len_w, out_y); + + u3r_view_done(&sa_u); + u3r_view_done(&ru_u); return u3i_bytes(32, out_y); } diff --git a/pkg/noun/retrieve.c b/pkg/noun/retrieve.c index c028cad3ff..403a3ce5df 100644 --- a/pkg/noun/retrieve.c +++ b/pkg/noun/retrieve.c @@ -1205,6 +1205,92 @@ u3r_bytes_all(c3_w* len_w, u3_atom a) return u3r_bytes_alloc(0, met_w, a); } +/* u3r_view_init(): open a read-only byte view of (a). +** +** For bob atoms, u3r_blob_map gives us direct access to the mmap'd +** blob file — no loom allocation. For everything else, fall back +** to u3r_bytes_alloc (heap buffer + copy), preserving the existing +** u3r_bytes_all semantics. +*/ +void +u3r_view_init(u3r_view* vu_u, u3_atom a) +{ + c3_w met_w = u3r_met(3, a); + vu_u->len_w = met_w; + vu_u->map_d = 0; + vu_u->ali_y = 0; + vu_u->byt_y = 0; + + if ( 0 == met_w ) { + return; + } + + if ( c3y == u3a_is_bob(a) ) { + c3_d map_d = 0; + const c3_y* map_y = u3r_blob_map(a, &map_d); + if ( map_y ) { + vu_u->byt_y = map_y; + vu_u->map_d = map_d; + return; + } + // fall through to alloc-and-copy; note that u3r_bytes_alloc below + // will itself bail via u3r_blob_load if the blob really is missing + } + + c3_y* buf_y = u3r_bytes_alloc(0, met_w, a); + vu_u->byt_y = buf_y; + vu_u->ali_y = buf_y; +} + +/* u3r_view_padded(): open a view of exactly [wid_w] bytes (zero-padded). +*/ +void +u3r_view_padded(u3r_view* vu_u, u3_atom a, c3_w wid_w) +{ + u3r_view_init(vu_u, a); + + // atom has enough bytes — keep the zero-copy view; just cap len_w + // + if ( vu_u->len_w >= wid_w ) { + vu_u->len_w = wid_w; + return; + } + + // atom is shorter — allocate wid_w bytes, copy what's there, zero + // the tail. release the original backing (mmap or heap) and re- + // point the view at the padded buffer. + // + c3_y* pad_y = u3a_malloc(wid_w); + if ( vu_u->len_w && vu_u->byt_y ) { + memcpy(pad_y, vu_u->byt_y, vu_u->len_w); + } + memset(pad_y + vu_u->len_w, 0, wid_w - vu_u->len_w); + + u3r_view_done(vu_u); + + vu_u->byt_y = pad_y; + vu_u->len_w = wid_w; + vu_u->map_d = 0; + vu_u->ali_y = pad_y; +} + +/* u3r_view_done(): release the view's backing memory. +*/ +void +u3r_view_done(u3r_view* vu_u) +{ + if ( vu_u->map_d ) { + u3r_blob_unmap(vu_u->byt_y, vu_u->map_d); + } + else if ( vu_u->ali_y ) { + u3a_free(vu_u->ali_y); + } + vu_u->byt_y = 0; + vu_u->len_w = 0; + vu_u->map_d = 0; + vu_u->ali_y = 0; +} + /* _mpz_init_set_word(): ** ** Initialize (a_mp) from a single word (b_w). diff --git a/pkg/noun/retrieve.h b/pkg/noun/retrieve.h index 311fdf9965..94a774772f 100644 --- a/pkg/noun/retrieve.h +++ b/pkg/noun/retrieve.h @@ -410,6 +410,49 @@ u3r_bytes_all(c3_w* len_w, u3_atom a); + /* u3r_view: read-only byte view over an atom's significant bytes. + ** + ** For bob atoms, mmaps the underlying blob file — the caller sees + ** [byt_y, len_w) without any loom allocation. For normal atoms, + ** falls back to malloc + u3r_bytes (same cost as u3r_bytes_all). + ** + ** Lifecycle: u3r_view_init / ...use byt_y[0..len_w]... / + ** u3r_view_done. byt_y is invalid after _done. + ** + ** len_w matches u3r_met(3, a): significant-byte length (trailing + ** zero bytes stripped). For bob atoms this may be less than the + ** on-disk file size; callers only see the logical bytes. + */ + typedef struct { + const c3_y* byt_y; // bytes (mmap or heap) + c3_w len_w; // significant byte length + c3_d map_d; // mmap size for unmap (0 if heap-backed) + c3_y* ali_y; // heap allocation to free (0 if mmap-backed) + } u3r_view; + + /* u3r_view_init(): open a read-only byte view of [a]. + */ + void + u3r_view_init(u3r_view* vu_u, u3_atom a); + + /* u3r_view_padded(): open a view of at least [wid_w] bytes. + ** + ** After the call, byt_y[0..wid_w] is valid; len_w == wid_w. + ** If the atom already has >= wid_w significant bytes we keep + ** the zero-copy path (mmap for bobs, still cheap for cats + ** that fit in a single word). Otherwise we allocate a + ** wid_w-byte heap buffer, copy what's there, and zero-pad + ** the rest — exactly the semantics that callers previously + ** got from u3r_bytes_alloc(0, wid_w, a). + */ + void + u3r_view_padded(u3r_view* vu_u, u3_atom a, c3_w wid_w); + + /* u3r_view_done(): release the view's backing memory. + */ + void + u3r_view_done(u3r_view* vu_u); + /* u3r_chop_bits(): ** ** XOR `wid_d` bits from`src_w` at `bif_g` to `dst_w` at `bif_g` diff --git a/pkg/noun/retrieve_tests.c b/pkg/noun/retrieve_tests.c index a3d929dc20..f3cec6620d 100644 --- a/pkg/noun/retrieve_tests.c +++ b/pkg/noun/retrieve_tests.c @@ -2,6 +2,12 @@ #include "noun.h" +#include +#include +#include +#include +#include + /* _setup(): prepare for tests. */ static void @@ -1135,6 +1141,142 @@ _test_cell_trel_qual(void) exit(1); } +/* _test_view(): u3r_view_init on both regular and bob atoms. +*/ +static void +_test_view(void) +{ + // regular indirect atom: view falls back to heap alloc+copy + // + { + const c3_y src_y[] = "hello, u3r_view on a normal atom"; + const c3_w src_w = sizeof(src_y) - 1; + u3_atom a = u3i_bytes(src_w, src_y); + + u3r_view vu_u; + u3r_view_init(&vu_u, a); + + if ( vu_u.len_w != src_w ) { + fprintf(stderr, "_test_view(): normal len mismatch %" PRIc3_w + " vs %" PRIc3_w "\r\n", + vu_u.len_w, src_w); + exit(1); + } + if ( vu_u.map_d != 0 ) { + fprintf(stderr, "_test_view(): normal atom should not be mmap-backed\r\n"); + exit(1); + } + if ( vu_u.ali_y == 0 ) { + fprintf(stderr, "_test_view(): normal atom should have heap allocation\r\n"); + exit(1); + } + if ( 0 != memcmp(vu_u.byt_y, src_y, src_w) ) { + fprintf(stderr, "_test_view(): normal bytes mismatch\r\n"); + exit(1); + } + + u3r_view_done(&vu_u); + if ( vu_u.byt_y != 0 || vu_u.len_w != 0 ) { + fprintf(stderr, "_test_view(): normal view not reset on done\r\n"); + exit(1); + } + u3z(a); + } + + // zero atom: view should be empty, no allocation, no mmap + // + { + u3r_view vu_u; + u3r_view_init(&vu_u, 0); + if ( vu_u.len_w != 0 || vu_u.map_d != 0 || vu_u.ali_y != 0 ) { + fprintf(stderr, "_test_view(): zero atom view should be empty\r\n"); + exit(1); + } + u3r_view_done(&vu_u); + } + + // bob atom: view mmaps the underlying blob file + // + { + // set up a temp pier dir with a blob at .urb/bob// + // + c3_c dir_c[1024]; + snprintf(dir_c, sizeof(dir_c), "/tmp/vere-view-test-XXXXXX"); + if ( !mkdtemp(dir_c) ) { + fprintf(stderr, "_test_view(): mkdtemp failed: %s\r\n", strerror(errno)); + exit(1); + } + + c3_c pax_c[2048]; + snprintf(pax_c, sizeof(pax_c), "%s/.urb", dir_c); mkdir(pax_c, 0755); + snprintf(pax_c, sizeof(pax_c), "%s/.urb/bob", dir_c); mkdir(pax_c, 0755); + + const c3_h mug_h = 0xabcd1234; + const c3_w seq_w = 7; + + snprintf(pax_c, sizeof(pax_c), "%s/.urb/bob/%u", dir_c, (unsigned)mug_h); + mkdir(pax_c, 0755); + + const c3_y bob_y[] = "bob atom backed by a real file"; + const c3_d bob_d = sizeof(bob_y) - 1; + + snprintf(pax_c, sizeof(pax_c), "%s/.urb/bob/%u/%u", + dir_c, (unsigned)mug_h, (unsigned)seq_w); + FILE* fil_f = fopen(pax_c, "wb"); + if ( !fil_f ) { + fprintf(stderr, "_test_view(): fopen %s: %s\r\n", pax_c, strerror(errno)); + exit(1); + } + fwrite(bob_y, 1, bob_d, fil_f); + fclose(fil_f); + + // set u3C.dir_c so u3r_blob_map finds the blob + // + u3C.dir_c = dir_c; + + u3_atom a = u3i_blob(mug_h, seq_w); + + u3r_view vu_u; + u3r_view_init(&vu_u, a); + + // expect mmap-backed view: map_d > 0, ali_y == 0 + // + if ( vu_u.map_d == 0 ) { + fprintf(stderr, "_test_view(): bob atom should be mmap-backed " + "(len=%" PRIc3_w " map_d=%" PRIc3_d " ali=%p)\r\n", + vu_u.len_w, vu_u.map_d, (void*)vu_u.ali_y); + exit(1); + } + if ( vu_u.ali_y != 0 ) { + fprintf(stderr, "_test_view(): bob atom should not have heap alloc\r\n"); + exit(1); + } + if ( vu_u.len_w == 0 || (c3_d)vu_u.len_w > bob_d ) { + fprintf(stderr, "_test_view(): bob len_w=%" PRIc3_w + " out of range [1..%" PRIc3_d "]\r\n", + vu_u.len_w, bob_d); + exit(1); + } + // first bytes must match the source; trailing bytes past len_w are + // still in the mapping (full file size) but out of logical scope + // + if ( 0 != memcmp(vu_u.byt_y, bob_y, vu_u.len_w) ) { + fprintf(stderr, "_test_view(): bob bytes mismatch\r\n"); + exit(1); + } + + u3r_view_done(&vu_u); + u3z(a); + + // clean up temp pier + // + c3_c cmd_c[2048]; + snprintf(cmd_c, sizeof(cmd_c), "rm -rf %s", dir_c); + (void)system(cmd_c); + u3C.dir_c = 0; + } +} + /* main(): run all test cases. */ int @@ -1149,6 +1291,7 @@ main(int argc, char* argv[]) _test_words(); _test_safe(); _test_cell_trel_qual(); + _test_view(); // GC // diff --git a/pkg/vere/blob_tests.c b/pkg/vere/blob_tests.c new file mode 100644 index 0000000000..6312a57cd6 --- /dev/null +++ b/pkg/vere/blob_tests.c @@ -0,0 +1,924 @@ +/// @file + +#include "blob.h" +#include "noun.h" +#include "vere.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Tests for pkg/vere/blob.c — the content-addressed blob store. +** +** Each test creates a fresh temp pier under /tmp/vere-blob-test-XXXXXX +** and tears it down on exit. Tests run sequentially and exit nonzero +** on first failure. +*/ + +static c3_c _tmp_pier[1024]; + +/* _setup(): init loom (for u3_blob_load), make fresh temp pier. +*/ +static void +_setup(void) +{ + u3m_init(1 << 20); + u3m_pave(c3y); +} + +static void +_tmp_make(void) +{ + snprintf(_tmp_pier, sizeof(_tmp_pier), "/tmp/vere-blob-test-XXXXXX"); + if ( !mkdtemp(_tmp_pier) ) { + fprintf(stderr, "blob_tests: mkdtemp failed: %s\r\n", strerror(errno)); + exit(1); + } + + // blob.c assumes .urb exists (created earlier by disk code in production). + // Create it here so u3_blob_init can mkdir .urb/bob. + // + c3_c urb_c[2048]; + snprintf(urb_c, sizeof(urb_c), "%s/.urb", _tmp_pier); + if ( 0 != mkdir(urb_c, 0700) ) { + fprintf(stderr, "blob_tests: mkdir %s failed: %s\r\n", urb_c, strerror(errno)); + exit(1); + } +} + +static void +_tmp_clean(void) +{ + c3_c cmd_c[2048]; + snprintf(cmd_c, sizeof(cmd_c), "rm -rf %s", _tmp_pier); + (void)system(cmd_c); +} + +/* _path_exists(): true if [pax_c] exists on the filesystem. +*/ +static c3_o +_path_exists(const c3_c* pax_c) +{ + struct stat st_u; + return ( 0 == stat(pax_c, &st_u) ) ? c3y : c3n; +} + +/* _write_tmp_file(): create a temp file under $pier/.urb/bob/stg/ with +** [dat_y] bytes; returns path (malloc'd, caller frees). +*/ +static c3_c* +_write_tmp_file(const c3_y* dat_y, c3_d len_d) +{ + c3_c tmpl_c[2048]; + snprintf(tmpl_c, sizeof(tmpl_c), + "%s/.urb/bob/stg/blob-test-XXXXXX", _tmp_pier); + c3_i fid_i = mkstemp(tmpl_c); + if ( -1 == fid_i ) { + fprintf(stderr, "blob_tests: mkstemp(%s) failed: %s\r\n", + tmpl_c, strerror(errno)); + return 0; + } + if ( len_d && (ssize_t)len_d != write(fid_i, dat_y, (size_t)len_d) ) { + close(fid_i); + return 0; + } + close(fid_i); + return strdup(tmpl_c); +} + +/* _test_init(): u3_blob_init + u3_blob_stg_init create expected dirs. +*/ +static void +_test_init(void) +{ + _tmp_make(); + + u3_blob_init(_tmp_pier); + u3_blob_stg_init(_tmp_pier); + + c3_c pax_c[2048]; + snprintf(pax_c, sizeof(pax_c), "%s/.urb/bob", _tmp_pier); + if ( c3y != _path_exists(pax_c) ) { + fprintf(stderr, "\033[31mblob init fail: %s missing\033[0m\r\n", pax_c); + exit(1); + } + snprintf(pax_c, sizeof(pax_c), "%s/.urb/bob/stg", _tmp_pier); + if ( c3y != _path_exists(pax_c) ) { + fprintf(stderr, "\033[31mblob init fail: %s missing\033[0m\r\n", pax_c); + exit(1); + } + + // idempotent: second call should not error + // + u3_blob_init(_tmp_pier); + u3_blob_stg_init(_tmp_pier); + + _tmp_clean(); + fprintf(stderr, "test blob init: ok\r\n"); +} + +/* _test_stg_clean(): u3_blob_stg_init clears leftover staging files. +*/ +static void +_test_stg_clean(void) +{ + _tmp_make(); + u3_blob_init(_tmp_pier); + u3_blob_stg_init(_tmp_pier); + + // drop a dummy file in staging + // + c3_c stg_c[2048]; + snprintf(stg_c, sizeof(stg_c), "%s/.urb/bob/stg/leftover", _tmp_pier); + FILE* f = fopen(stg_c, "wb"); + if ( !f ) { + fprintf(stderr, "\033[31mblob stg_clean setup fail\033[0m\r\n"); + exit(1); + } + fputs("junk", f); + fclose(f); + + if ( c3y != _path_exists(stg_c) ) { + fprintf(stderr, "\033[31mblob stg_clean: file not created\033[0m\r\n"); + exit(1); + } + + // re-init should clean it + // + u3_blob_stg_init(_tmp_pier); + + if ( c3y == _path_exists(stg_c) ) { + fprintf(stderr, "\033[31mblob stg_clean fail: %s still exists\033[0m\r\n", + stg_c); + exit(1); + } + + _tmp_clean(); + fprintf(stderr, "test blob stg_clean: ok\r\n"); +} + +/* _test_path(): u3_blob_path produces expected string. +*/ +static void +_test_path(void) +{ + c3_c pax_c[8192]; + u3_blob_path(pax_c, "/pier", 0x12345678, 42); + + const c3_c* exp_c = "/pier/.urb/bob/305419896/42"; + if ( 0 != strcmp(pax_c, exp_c) ) { + fprintf(stderr, "\033[31mblob path fail: got %s, expected %s\033[0m\r\n", + pax_c, exp_c); + exit(1); + } + fprintf(stderr, "test blob path: ok\r\n"); +} + +/* _test_save_load(): save bytes, load, verify content. +*/ +static void +_test_save_load(void) +{ + _tmp_make(); + u3_blob_init(_tmp_pier); + u3_blob_stg_init(_tmp_pier); + + const c3_y dat_y[] = "the quick brown fox jumps over the lazy dog"; + const c3_d dat_d = sizeof(dat_y) - 1; // drop trailing NUL + c3_h mug_h = 0; + c3_w seq_w = 0; + + if ( c3y != u3_blob_save(_tmp_pier, dat_y, dat_d, &mug_h, &seq_w) ) { + fprintf(stderr, "\033[31mblob save fail\033[0m\r\n"); + exit(1); + } + if ( 1 != seq_w ) { + fprintf(stderr, "\033[31mblob save: expected seq=1, got %" PRIc3_w "\033[0m\r\n", + seq_w); + exit(1); + } + + // file should exist at computed path + // + c3_c fil_c[8192]; + u3_blob_path(fil_c, _tmp_pier, mug_h, seq_w); + if ( c3y != _path_exists(fil_c) ) { + fprintf(stderr, "\033[31mblob save: %s missing\033[0m\r\n", fil_c); + exit(1); + } + + if ( c3y != u3_blob_exists(_tmp_pier, mug_h, seq_w) ) { + fprintf(stderr, "\033[31mblob exists fail\033[0m\r\n"); + exit(1); + } + + // load and verify bytes + // + u3_weak atm = u3_blob_load(_tmp_pier, mug_h, seq_w); + if ( u3_none == atm ) { + fprintf(stderr, "\033[31mblob load: u3_none\033[0m\r\n"); + exit(1); + } + if ( dat_d != u3r_met(3, atm) ) { + fprintf(stderr, "\033[31mblob load: met mismatch\033[0m\r\n"); + exit(1); + } + c3_y* buf_y = c3_malloc(dat_d); + u3r_bytes(0, (c3_w)dat_d, buf_y, atm); + if ( 0 != memcmp(buf_y, dat_y, dat_d) ) { + fprintf(stderr, "\033[31mblob load: byte mismatch\033[0m\r\n"); + exit(1); + } + c3_free(buf_y); + u3z(atm); + + _tmp_clean(); + fprintf(stderr, "test blob save+load: ok\r\n"); +} + +/* _test_dedup(): saving identical content twice reuses the first seq. +*/ +static void +_test_dedup(void) +{ + _tmp_make(); + u3_blob_init(_tmp_pier); + u3_blob_stg_init(_tmp_pier); + + const c3_y dat_y[] = "dedup me, please and thank you"; + const c3_d dat_d = sizeof(dat_y) - 1; + + c3_h mug1_h, mug2_h; + c3_w seq1_w, seq2_w; + + if ( c3y != u3_blob_save(_tmp_pier, dat_y, dat_d, &mug1_h, &seq1_w) ) { + fprintf(stderr, "\033[31mblob dedup: first save failed\033[0m\r\n"); + exit(1); + } + if ( c3y != u3_blob_save(_tmp_pier, dat_y, dat_d, &mug2_h, &seq2_w) ) { + fprintf(stderr, "\033[31mblob dedup: second save failed\033[0m\r\n"); + exit(1); + } + + if ( mug1_h != mug2_h ) { + fprintf(stderr, "\033[31mblob dedup: mug changed (%" PRIc3_h + " vs %" PRIc3_h ")\033[0m\r\n", mug1_h, mug2_h); + exit(1); + } + if ( seq1_w != seq2_w ) { + fprintf(stderr, "\033[31mblob dedup: expected seq reuse, " + "got %" PRIc3_w "+%" PRIc3_w "\033[0m\r\n", seq1_w, seq2_w); + exit(1); + } + + // distinct content → distinct blob slot (may reuse bucket only if mug + // collides; overwhelmingly unlikely for ASCII content) + // + const c3_y alt_y[] = "a completely different payload"; + const c3_d alt_d = sizeof(alt_y) - 1; + c3_h mug3_h = 0; + c3_w seq3_w = 0; + if ( c3y != u3_blob_save(_tmp_pier, alt_y, alt_d, &mug3_h, &seq3_w) ) { + fprintf(stderr, "\033[31mblob dedup: alt save failed\033[0m\r\n"); + exit(1); + } + if ( mug1_h == mug3_h + && seq1_w == seq3_w ) + { + fprintf(stderr, "\033[31mblob dedup: distinct content got same blob\033[0m\r\n"); + exit(1); + } + + _tmp_clean(); + fprintf(stderr, "test blob dedup: ok\r\n"); +} + +/* _test_save_fd(): u3_blob_save_fd round-trips via mmap path. +*/ +static void +_test_save_fd(void) +{ + _tmp_make(); + u3_blob_init(_tmp_pier); + u3_blob_stg_init(_tmp_pier); + + const c3_y dat_y[] = "content delivered via file descriptor"; + const c3_d dat_d = sizeof(dat_y) - 1; + + // write a real file and re-open for reading + // + c3_c src_c[2048]; + snprintf(src_c, sizeof(src_c), "%s/src", _tmp_pier); + FILE* f = fopen(src_c, "wb"); + fwrite(dat_y, 1, (size_t)dat_d, f); + fclose(f); + + c3_i fid_i = open(src_c, O_RDONLY); + if ( -1 == fid_i ) { + fprintf(stderr, "\033[31mblob save_fd: open failed\033[0m\r\n"); + exit(1); + } + + c3_h mug_h = 0; + c3_w seq_w = 0; + c3_o ret_o = u3_blob_save_fd(_tmp_pier, fid_i, dat_d, &mug_h, &seq_w); + close(fid_i); + + if ( c3y != ret_o ) { + fprintf(stderr, "\033[31mblob save_fd failed\033[0m\r\n"); + exit(1); + } + + // u3_blob_save_fd rejects empty files + // + FILE* ef = fopen(src_c, "wb"); + fclose(ef); // truncate to zero + c3_i efid_i = open(src_c, O_RDONLY); + c3_h emh = 0; + c3_w esw = 0; + if ( c3n != u3_blob_save_fd(_tmp_pier, efid_i, 0, &emh, &esw) ) { + fprintf(stderr, "\033[31mblob save_fd: should reject empty\033[0m\r\n"); + exit(1); + } + close(efid_i); + + // verify loaded content matches + // + u3_weak atm = u3_blob_load(_tmp_pier, mug_h, seq_w); + if ( u3_none == atm ) { + fprintf(stderr, "\033[31mblob save_fd: load u3_none\033[0m\r\n"); + exit(1); + } + c3_y* buf_y = c3_malloc(dat_d); + u3r_bytes(0, (c3_w)dat_d, buf_y, atm); + if ( 0 != memcmp(buf_y, dat_y, dat_d) ) { + fprintf(stderr, "\033[31mblob save_fd: byte mismatch\033[0m\r\n"); + exit(1); + } + c3_free(buf_y); + u3z(atm); + + _tmp_clean(); + fprintf(stderr, "test blob save_fd: ok\r\n"); +} + +/* _test_delete_empty_bucket(): delete removes file AND empty bucket. +*/ +static void +_test_delete_empty_bucket(void) +{ + _tmp_make(); + u3_blob_init(_tmp_pier); + u3_blob_stg_init(_tmp_pier); + + const c3_y dat_y[] = "ephemeral blob"; + c3_h mug_h = 0; + c3_w seq_w = 0; + u3_blob_save(_tmp_pier, dat_y, sizeof(dat_y) - 1, &mug_h, &seq_w); + + c3_c fil_c[8192], dir_c[8192]; + u3_blob_path(fil_c, _tmp_pier, mug_h, seq_w); + snprintf(dir_c, sizeof(dir_c), "%s/.urb/bob/%" PRIc3_h, _tmp_pier, mug_h); + + if ( c3y != _path_exists(dir_c) ) { + fprintf(stderr, "\033[31mblob delete setup: bucket missing\033[0m\r\n"); + exit(1); + } + + u3_blob_delete(_tmp_pier, mug_h, seq_w); + + if ( c3y == _path_exists(fil_c) ) { + fprintf(stderr, "\033[31mblob delete: file %s still exists\033[0m\r\n", + fil_c); + exit(1); + } + if ( c3y == u3_blob_exists(_tmp_pier, mug_h, seq_w) ) { + fprintf(stderr, "\033[31mblob delete: exists() still true\033[0m\r\n"); + exit(1); + } + if ( c3y == _path_exists(dir_c) ) { + fprintf(stderr, "\033[31mblob delete: bucket %s not cleaned\033[0m\r\n", + dir_c); + exit(1); + } + + // deleting a nonexistent blob is a no-op (no error) + // + u3_blob_delete(_tmp_pier, 0xdeadbeef, 999); + + _tmp_clean(); + fprintf(stderr, "test blob delete (empty bucket): ok\r\n"); +} + +/* _test_install_stg(): stage a file → install → rename, blob exists. +*/ +static void +_test_install_stg(void) +{ + _tmp_make(); + u3_blob_init(_tmp_pier); + u3_blob_stg_init(_tmp_pier); + + const c3_y dat_y[] = "payload installed from staging"; + const c3_d dat_d = sizeof(dat_y) - 1; + c3_c* stg_c = _write_tmp_file(dat_y, dat_d); + if ( !stg_c ) { + fprintf(stderr, "\033[31mblob install_stg: _write_tmp_file failed\033[0m\r\n"); + exit(1); + } + + // staging file is present before install + // + if ( c3y != _path_exists(stg_c) ) { + fprintf(stderr, "\033[31mblob install_stg: staging file missing\033[0m\r\n"); + exit(1); + } + + c3_h mug_h = 0; + c3_w seq_w = 0; + if ( c3y != u3_blob_install_stg(_tmp_pier, stg_c, &mug_h, &seq_w) ) { + fprintf(stderr, "\033[31mblob install_stg failed\033[0m\r\n"); + exit(1); + } + if ( 1 != seq_w ) { + fprintf(stderr, "\033[31mblob install_stg: expected seq=1, got %" PRIc3_w + "\033[0m\r\n", seq_w); + exit(1); + } + + // staging file is consumed after install + // + if ( c3y == _path_exists(stg_c) ) { + fprintf(stderr, "\033[31mblob install_stg: staging file not consumed\033[0m\r\n"); + exit(1); + } + + if ( c3y != u3_blob_exists(_tmp_pier, mug_h, seq_w) ) { + fprintf(stderr, "\033[31mblob install_stg: blob not present after install\033[0m\r\n"); + exit(1); + } + + // content preserved + // + u3_weak atm = u3_blob_load(_tmp_pier, mug_h, seq_w); + if ( u3_none == atm ) { + fprintf(stderr, "\033[31mblob install_stg: load u3_none\033[0m\r\n"); + exit(1); + } + c3_y* buf_y = c3_malloc(dat_d); + u3r_bytes(0, (c3_w)dat_d, buf_y, atm); + if ( 0 != memcmp(buf_y, dat_y, dat_d) ) { + fprintf(stderr, "\033[31mblob install_stg: byte mismatch\033[0m\r\n"); + exit(1); + } + c3_free(buf_y); + u3z(atm); + + free(stg_c); + _tmp_clean(); + fprintf(stderr, "test blob install_stg: ok\r\n"); +} + +/* _test_install_stg_dedup(): installing a staging file with existing +** content returns the existing seq and consumes the staging file. +*/ +static void +_test_install_stg_dedup(void) +{ + _tmp_make(); + u3_blob_init(_tmp_pier); + u3_blob_stg_init(_tmp_pier); + + const c3_y dat_y[] = "shared bytes across save + install_stg"; + const c3_d dat_d = sizeof(dat_y) - 1; + + // first save via u3_blob_save + // + c3_h mug1_h = 0; + c3_w seq1_w = 0; + u3_blob_save(_tmp_pier, dat_y, dat_d, &mug1_h, &seq1_w); + + // then stage same content and install + // + c3_c* stg_c = _write_tmp_file(dat_y, dat_d); + + c3_h mug2_h = 0; + c3_w seq2_w = 0; + if ( c3y != u3_blob_install_stg(_tmp_pier, stg_c, &mug2_h, &seq2_w) ) { + fprintf(stderr, "\033[31mblob install_stg dedup: install failed\033[0m\r\n"); + exit(1); + } + + if ( mug1_h != mug2_h || seq1_w != seq2_w ) { + fprintf(stderr, "\033[31mblob install_stg dedup: expected %" + PRIc3_h "/%" PRIc3_w ", got %" PRIc3_h "/%" PRIc3_w + "\033[0m\r\n", mug1_h, seq1_w, mug2_h, seq2_w); + exit(1); + } + + // staging file consumed even on dedup + // + if ( c3y == _path_exists(stg_c) ) { + fprintf(stderr, "\033[31mblob install_stg dedup: staging file not consumed\033[0m\r\n"); + exit(1); + } + + // reject missing and empty staging files + // + c3_h m = 0; c3_w s = 0; + if ( c3n != u3_blob_install_stg(_tmp_pier, "/no/such/path", &m, &s) ) { + fprintf(stderr, "\033[31mblob install_stg: should reject missing file\033[0m\r\n"); + exit(1); + } + + c3_c* empty_c = _write_tmp_file((const c3_y*)"", 0); + if ( c3n != u3_blob_install_stg(_tmp_pier, empty_c, &m, &s) ) { + fprintf(stderr, "\033[31mblob install_stg: should reject empty\033[0m\r\n"); + exit(1); + } + unlink(empty_c); + free(empty_c); + + free(stg_c); + _tmp_clean(); + fprintf(stderr, "test blob install_stg dedup: ok\r\n"); +} + +/* _test_met(): u3_blob_met matches u3r_met on the materialized atom. +*/ +static void +_test_met(void) +{ + _tmp_make(); + u3_blob_init(_tmp_pier); + u3_blob_stg_init(_tmp_pier); + + // case 1: no trailing zeros, not word-aligned (4 bytes, < 1 VERE64 word) + // + // bytes: { 0xab, 0xcd, 0xef, 0x01 } + // last nonzero byte at pos 4, top byte = 0x01 (1 significant bit) + // expected met = (4-1)*8 + 1 = 25 + // + // Also verifies u3_blob_load zero-initializes the loom atom's trailing + // word bytes: u3r_met on the loaded atom must agree with u3_blob_met. + // + { + const c3_y dat_y[] = { 0xab, 0xcd, 0xef, 0x01 }; + const c3_d dat_d = sizeof(dat_y); + c3_h mug_h = 0; c3_w seq_w = 0; + u3_blob_save(_tmp_pier, dat_y, dat_d, &mug_h, &seq_w); + + c3_d bit_d = u3_blob_met(_tmp_pier, mug_h, seq_w); + if ( 25 != bit_d ) { + fprintf(stderr, "\033[31mblob met: dense got %" PRIc3_d ", expected 25" + "\033[0m\r\n", bit_d); + exit(1); + } + + u3_weak atm = u3_blob_load(_tmp_pier, mug_h, seq_w); + if ( u3_none == atm ) { + fprintf(stderr, "\033[31mblob met: load u3_none\033[0m\r\n"); + exit(1); + } + c3_w ref_w = u3r_met(0, atm); + u3z(atm); + if ( bit_d != (c3_d)ref_w ) { + fprintf(stderr, "\033[31mblob met: blob_met=%" PRIc3_d + " != u3r_met=%" PRIc3_w " (u3_blob_load " + "not zero-initializing trailing bytes?)\033[0m\r\n", + bit_d, ref_w); + exit(1); + } + } + + // case 2: trailing zeros — met should strip them + // + { + const c3_y dat_y[] = { 0xff, 0xff, 0x00, 0x00, 0x00 }; + const c3_d dat_d = sizeof(dat_y); + c3_h mug_h = 0; c3_w seq_w = 0; + u3_blob_save(_tmp_pier, dat_y, dat_d, &mug_h, &seq_w); + + c3_d bit_d = u3_blob_met(_tmp_pier, mug_h, seq_w); + // 16 significant bits; high byte 0xff → 8 bits + // total = 1*8 + 8 = 16 + // + if ( 16 != bit_d ) { + fprintf(stderr, "\033[31mblob met: trailing-zero got %" PRIc3_d + ", expected 16\033[0m\r\n", bit_d); + exit(1); + } + } + + // case 3: nonexistent blob → 0 + // + { + if ( 0 != u3_blob_met(_tmp_pier, 0xdeadbeef, 999) ) { + fprintf(stderr, "\033[31mblob met: missing blob should return 0\033[0m\r\n"); + exit(1); + } + } + + _tmp_clean(); + fprintf(stderr, "test blob met: ok\r\n"); +} + +/* _test_map(): u3_blob_map returns byte-accurate pointer. +*/ +static void +_test_map(void) +{ + _tmp_make(); + u3_blob_init(_tmp_pier); + u3_blob_stg_init(_tmp_pier); + + const c3_y dat_y[] = "mapped bytes should round-trip exactly"; + const c3_d dat_d = sizeof(dat_y) - 1; + c3_h mug_h = 0; c3_w seq_w = 0; + u3_blob_save(_tmp_pier, dat_y, dat_d, &mug_h, &seq_w); + + c3_d mlen_d = 0; + const c3_y* map_y = u3_blob_map(_tmp_pier, mug_h, seq_w, &mlen_d); + if ( !map_y ) { + fprintf(stderr, "\033[31mblob map: returned NULL\033[0m\r\n"); + exit(1); + } + if ( mlen_d != dat_d ) { + fprintf(stderr, "\033[31mblob map: len %" PRIc3_d " != %" PRIc3_d + "\033[0m\r\n", mlen_d, dat_d); + exit(1); + } + if ( 0 != memcmp(map_y, dat_y, dat_d) ) { + fprintf(stderr, "\033[31mblob map: byte mismatch\033[0m\r\n"); + exit(1); + } + u3_blob_unmap(map_y, mlen_d); + + // mapping nonexistent returns NULL + // + c3_d dlen_d = 0; + const c3_y* miss_y = u3_blob_map(_tmp_pier, 0xdeadbeef, 999, &dlen_d); + if ( miss_y ) { + fprintf(stderr, "\033[31mblob map: missing should be NULL\033[0m\r\n"); + u3_blob_unmap(miss_y, dlen_d); + exit(1); + } + + _tmp_clean(); + fprintf(stderr, "test blob map: ok\r\n"); +} + +/* _test_lifecycle(): end-to-end exercise of the blob-as-atom pipeline. +** +** Simulates the production flow: +** 1. Earth writes bytes to the blob store (save or install_stg). +** 2. Mars constructs bob atoms referencing those blobs. +** 3. The event containing the bob atoms is serialized with u3s_ram_xeno +** (as happens when writing to the event log or sending over newt IPC). +** 4. On replay (or IPC receive), u3s_tap_xeno reconstructs the bob atoms. +** 5. Arvo reads the atoms' bytes via u3r_bytes, which re-materializes +** each bob atom through u3r_blob_load → u3_blob_load → mmap. +** +** Also exercises the ram encoder's backref path for bob atoms (same bob +** appearing multiple times in a noun) and the dedup path through +** install_stg (two bob atoms resolving to the same on-disk blob). +*/ +static void +_test_lifecycle(void) +{ + _tmp_make(); + u3_blob_init(_tmp_pier); + u3_blob_stg_init(_tmp_pier); + + // u3s_ram_xeno calls u3r_blob_met on bob atoms during encoding, which + // reads the blob file at $u3C.dir_c/.urb/bob//. Set it now + // so encode, decode, and u3r_bytes all find the same store. + // + u3C.dir_c = _tmp_pier; + + // write two distinct blobs directly, plus a third via install_stg + // that should dedup against the first. + // + const c3_y dat1_y[] = "lifecycle: first blob contents"; + const c3_d dat1_d = sizeof(dat1_y) - 1; + const c3_y dat2_y[] = "lifecycle: entirely different payload here"; + const c3_d dat2_d = sizeof(dat2_y) - 1; + + c3_h mug1_h = 0, mug2_h = 0, mug3_h = 0; + c3_w seq1_w = 0, seq2_w = 0, seq3_w = 0; + + if ( c3y != u3_blob_save(_tmp_pier, dat1_y, dat1_d, &mug1_h, &seq1_w) ) { + fprintf(stderr, "\033[31mlifecycle: save1 failed\033[0m\r\n"); + exit(1); + } + if ( c3y != u3_blob_save(_tmp_pier, dat2_y, dat2_d, &mug2_h, &seq2_w) ) { + fprintf(stderr, "\033[31mlifecycle: save2 failed\033[0m\r\n"); + exit(1); + } + + // install a staging file with same content as blob 1 → dedup + // + { + c3_c* stg_c = _write_tmp_file(dat1_y, dat1_d); + if ( c3y != u3_blob_install_stg(_tmp_pier, stg_c, &mug3_h, &seq3_w) ) { + fprintf(stderr, "\033[31mlifecycle: install_stg failed\033[0m\r\n"); + exit(1); + } + free(stg_c); + } + if ( mug1_h != mug3_h || seq1_w != seq3_w ) { + fprintf(stderr, "\033[31mlifecycle: install_stg should dedup; " + "got %" PRIc3_h "/%" PRIc3_w " vs %" PRIc3_h "/%" + PRIc3_w "\033[0m\r\n", mug3_h, seq3_w, mug1_h, seq1_w); + exit(1); + } + + // build a noun with both blobs, bob1 appearing twice (to exercise ram's + // backref path for bob atoms). this mirrors a realistic Arvo event + // that references the same large binary in multiple places. + // + // shape: [%blob-evt [bob1 bob2] bob1 42] + // + u3_noun bob1 = u3i_blob(mug1_h, seq1_w); + u3_noun bob2 = u3i_blob(mug2_h, seq2_w); + u3_noun ref = u3nq(c3__blob, + u3nc(u3k(bob1), u3k(bob2)), + u3k(bob1), + 42); + u3z(bob1); + u3z(bob2); + + // encode via ram (what mars would write to the event log / newt) + // + c3_d len_d = 0; + c3_y* byt_y = 0; + u3s_ram_xeno(ref, &len_d, &byt_y); + + // validate header: "RAM\0" + 0x01 (the disk/newt framing) + // + if ( (len_d < 5) + || (byt_y[0] != 'R') + || (byt_y[1] != 'A') + || (byt_y[2] != 'M') + || (byt_y[3] != 0x00) + || (byt_y[4] != 0x01) ) + { + fprintf(stderr, "\033[31mlifecycle: ram header invalid\033[0m\r\n"); + exit(1); + } + + // decode via tap (what mars would do on replay / newt receive) + // + u3_weak out = u3s_tap_xeno(len_d, byt_y); + free(byt_y); + if ( u3_none == out ) { + fprintf(stderr, "\033[31mlifecycle: tap returned u3_none\033[0m\r\n"); + exit(1); + } + + // structural equality: mug+seq preserved for bob atoms; cat/indirect + // atoms compared by value + // + if ( c3n == u3r_sing(ref, out) ) { + fprintf(stderr, "\033[31mlifecycle: decoded noun differs from ref\033[0m\r\n"); + exit(1); + } + + // walk the decoded noun and pull bytes out of each bob atom. this + // exercises u3r_bytes → u3r_blob_load → u3_blob_load (mmap from disk). + // + u3_noun tag, cel, b2, rst; + if ( c3n == u3r_cell(out, &tag, &cel) ) { + fprintf(stderr, "\033[31mlifecycle: decoded root not cell\033[0m\r\n"); + exit(1); + } + if ( tag != c3__blob ) { + fprintf(stderr, "\033[31mlifecycle: wrong tag\033[0m\r\n"); + exit(1); + } + + u3_noun bobs, bob1_d, bob2_d; + u3x_trel(cel, &bobs, &b2, &rst); + u3x_cell(bobs, &bob1_d, &bob2_d); + + // the occurrences of bob1 should all be bob atoms pointing at the + // same (mug, seq). whether tap reuses the same loom slot for + // backrefs is an implementation detail — we only assert semantic + // equality here. + // + if ( c3n == u3a_is_bob(bob1_d) + || c3n == u3a_is_bob(bob2_d) + || c3n == u3a_is_bob(b2) ) + { + fprintf(stderr, "\033[31mlifecycle: decoded non-bob where bob expected\033[0m\r\n"); + exit(1); + } + if ( u3a_bob_mug(bob1_d) != mug1_h + || u3a_bob_seq(bob1_d) != seq1_w ) + { + fprintf(stderr, "\033[31mlifecycle: bob1 mug/seq mismatch\033[0m\r\n"); + exit(1); + } + if ( u3a_bob_mug(b2) != mug1_h + || u3a_bob_seq(b2) != seq1_w ) + { + fprintf(stderr, "\033[31mlifecycle: backref bob1 mug/seq mismatch\033[0m\r\n"); + exit(1); + } + if ( u3a_bob_mug(bob2_d) != mug2_h + || u3a_bob_seq(bob2_d) != seq2_w ) + { + fprintf(stderr, "\033[31mlifecycle: bob2 mug/seq mismatch\033[0m\r\n"); + exit(1); + } + + // materialize each bob to bytes and compare to original input. + // u3r_bytes → u3r_blob_load → u3_blob_load → mmap. + // + { + c3_y* buf_y = c3_malloc(dat1_d); + u3r_bytes(0, (c3_w)dat1_d, buf_y, bob1_d); + if ( 0 != memcmp(buf_y, dat1_y, dat1_d) ) { + fprintf(stderr, "\033[31mlifecycle: bob1 bytes mismatch\033[0m\r\n"); + exit(1); + } + c3_free(buf_y); + } + { + c3_y* buf_y = c3_malloc(dat2_d); + u3r_bytes(0, (c3_w)dat2_d, buf_y, bob2_d); + if ( 0 != memcmp(buf_y, dat2_y, dat2_d) ) { + fprintf(stderr, "\033[31mlifecycle: bob2 bytes mismatch\033[0m\r\n"); + exit(1); + } + c3_free(buf_y); + } + + // u3r_met (which materializes) should agree with u3r_blob_met + // (which reads the file directly) — exercises the cross-module + // invariant between retrieve.c and blob.c. + // + { + c3_d bit_d = u3r_blob_met(bob1_d); + u3_weak mat = u3_blob_load(_tmp_pier, mug1_h, seq1_w); + if ( u3_none == mat ) { + fprintf(stderr, "\033[31mlifecycle: u3_blob_load failed\033[0m\r\n"); + exit(1); + } + c3_w ref_w = u3r_met(0, mat); + u3z(mat); + if ( bit_d != (c3_d)ref_w ) { + fprintf(stderr, "\033[31mlifecycle: blob_met=%" PRIc3_d + " vs materialized met=%" PRIc3_w "\033[0m\r\n", + bit_d, ref_w); + exit(1); + } + } + + u3z(ref); + u3z(out); + + // tear down and confirm blob files are deleted cleanly + // + u3_blob_delete(_tmp_pier, mug1_h, seq1_w); + u3_blob_delete(_tmp_pier, mug2_h, seq2_w); + if ( c3y == u3_blob_exists(_tmp_pier, mug1_h, seq1_w) + || c3y == u3_blob_exists(_tmp_pier, mug2_h, seq2_w) ) + { + fprintf(stderr, "\033[31mlifecycle: blobs still present after delete\033[0m\r\n"); + exit(1); + } + + _tmp_clean(); + fprintf(stderr, "test blob lifecycle: ok\r\n"); +} + +/* main(): run all blob tests. +*/ +int +main(int argc, char* argv[]) +{ + (void)argc; (void)argv; + _setup(); + + _test_path(); // no filesystem + _test_init(); + _test_stg_clean(); + _test_save_load(); + _test_dedup(); + _test_save_fd(); + _test_delete_empty_bucket(); + _test_install_stg(); + _test_install_stg_dedup(); + _test_met(); + _test_map(); + _test_lifecycle(); + + fprintf(stderr, "test blob: ok\r\n"); + return 0; +} From e00de89ff9c5c3561cd1ca7bd8eb053dbd45d60d Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Wed, 15 Apr 2026 12:44:13 -0500 Subject: [PATCH 14/31] wip: blob 14 --- pkg/noun/jets/c/can.c | 25 +++++++++++++++-- pkg/noun/jets/c/cat.c | 59 +++++++++++++++++++++++++++++---------- pkg/noun/jets/c/end.c | 48 +++++++++++++++++++++++-------- pkg/noun/jets/c/rap.c | 24 ++++++++++++++-- pkg/noun/jets/c/rip.c | 55 +++++++++++++++++++++++------------- pkg/noun/jets/c/rsh.c | 54 ++++++++++++++++++++++++++++------- pkg/noun/jets/c/swp.c | 40 ++++++++++++++++++++++++-- pkg/noun/jets/e/aes_cbc.c | 17 +++++++++-- pkg/noun/jets/e/aes_siv.c | 36 ++++++++++++++++-------- pkg/noun/jets/e/chacha.c | 33 ++++++++++++++++------ pkg/noun/jets/e/ed_sign.c | 58 +++++++++++++++++--------------------- pkg/noun/jets/e/ed_veri.c | 25 ++++++++--------- pkg/noun/jets/e/leer.c | 53 ++++++++++++++++++++++------------- 13 files changed, 375 insertions(+), 152 deletions(-) diff --git a/pkg/noun/jets/c/can.c b/pkg/noun/jets/c/can.c index cc7e4d3545..72e20b9ca8 100644 --- a/pkg/noun/jets/c/can.c +++ b/pkg/noun/jets/c/can.c @@ -5,6 +5,7 @@ #include "noun.h" +#include u3_noun u3qc_can(u3_atom a, @@ -49,18 +50,36 @@ u3i_slab_init(&sab_u, a_g, tot_w); } - /* Chop the list atoms in. + /* Chop the list atoms in. For byte-aligned bloqs and bob + atoms, mmap the blob and memcpy directly — avoids a full + u3r_blob_load per item. */ { u3_noun cab = b; c3_w pos_w = 0; + c3_g shf_g = (a_g >= 3) ? (a_g - 3) : 0; while ( 0 != cab ) { - u3_noun i_cab = u3h(cab); + u3_noun i_cab = u3h(cab); u3_atom pi_cab = u3h(i_cab); u3_atom qi_cab = u3t(i_cab); - u3r_chop(a_g, 0, pi_cab, pos_w, sab_u.buf_w, qi_cab); + if ( a_g >= 3 ) { + c3_w pos_b = pos_w << shf_g; + c3_w len_b = ((c3_w)pi_cab) << shf_g; + + u3r_view vu_u; + u3r_view_init(&vu_u, qi_cab); + c3_w cpy_w = (vu_u.len_w < len_b) ? vu_u.len_w : len_b; + if ( cpy_w ) { + memcpy(sab_u.buf_y + pos_b, vu_u.byt_y, cpy_w); + } + u3r_view_done(&vu_u); + } + else { + u3r_chop(a_g, 0, pi_cab, pos_w, sab_u.buf_w, qi_cab); + } + pos_w += pi_cab; cab = u3t(cab); } diff --git a/pkg/noun/jets/c/cat.c b/pkg/noun/jets/c/cat.c index 6eefe54db4..f92e541a70 100644 --- a/pkg/noun/jets/c/cat.c +++ b/pkg/noun/jets/c/cat.c @@ -5,6 +5,7 @@ #include "noun.h" +#include u3_noun u3qc_cat(u3_atom a, @@ -14,24 +15,54 @@ if ( !_(u3a_is_cat(a)) || (a >= u3a_word_bits) ) { return u3m_bail(c3__fail); } - else { - c3_g a_g = a; - c3_w lew_w = u3r_met(a_g, b); - c3_w ler_w = u3r_met(a_g, c); - c3_w all_w = (lew_w + ler_w); - - if ( 0 == all_w ) { - return 0; - } - else { - u3i_slab sab_u; - u3i_slab_from(&sab_u, b, a_g, all_w); - u3r_chop(a_g, 0, ler_w, lew_w, sab_u.buf_w, c); + c3_g a_g = a; + c3_w lew_w = u3r_met(a_g, b); + c3_w ler_w = u3r_met(a_g, c); + c3_w all_w = (lew_w + ler_w); + + if ( 0 == all_w ) { + return 0; + } + + // byte-aligned fast path: mmap each bob input directly. the + // legacy u3i_slab_from + u3r_chop pair would otherwise materialize + // either operand if it's a bob (via u3r_words → u3r_blob_load and + // u3r_chop → u3r_blob_load respectively). + // + if ( a_g >= 3 ) { + c3_g shf_g = a_g - 3; + c3_w lew_b = lew_w << shf_g; + c3_w ler_b = ler_w << shf_g; + + u3i_slab sab_u; + u3i_slab_init(&sab_u, a_g, all_w); - return u3i_slab_mint(&sab_u); + u3r_view vb_u, vc_u; + u3r_view_init(&vb_u, b); + u3r_view_init(&vc_u, c); + + c3_w cpy_w; + cpy_w = (vb_u.len_w < lew_b) ? vb_u.len_w : lew_b; + if ( cpy_w ) { + memcpy(sab_u.buf_y, vb_u.byt_y, cpy_w); + } + cpy_w = (vc_u.len_w < ler_b) ? vc_u.len_w : ler_b; + if ( cpy_w ) { + memcpy(sab_u.buf_y + lew_b, vc_u.byt_y, cpy_w); } + + u3r_view_done(&vb_u); + u3r_view_done(&vc_u); + return u3i_slab_mint(&sab_u); } + + // bit-level fallback — existing path materializes for bobs + // + u3i_slab sab_u; + u3i_slab_from(&sab_u, b, a_g, all_w); + u3r_chop(a_g, 0, ler_w, lew_w, sab_u.buf_w, c); + return u3i_slab_mint(&sab_u); } u3_noun diff --git a/pkg/noun/jets/c/end.c b/pkg/noun/jets/c/end.c index 17c8600467..3f28666256 100644 --- a/pkg/noun/jets/c/end.c +++ b/pkg/noun/jets/c/end.c @@ -5,6 +5,8 @@ #include "noun.h" +#include + u3_noun u3qc_end(u3_atom a, u3_atom b, @@ -16,26 +18,48 @@ u3qc_end(u3_atom a, else if ( !_(u3a_is_cat(b)) ) { return u3k(c); } - else { - c3_g a_g = a; - c3_w b_w = b; - c3_w len_w = u3r_met(a_g, c); - if ( 0 == b_w ) { - return 0; - } - else if ( b_w >= len_w ) { - return u3k(c); - } - else { + c3_g a_g = a; + c3_w b_w = b; + c3_w len_w = u3r_met(a_g, c); + + if ( 0 == b_w ) { + return 0; + } + if ( b_w >= len_w ) { + return u3k(c); + } + + // bob-aware fast path for byte-aligned prefixes: mmap the blob + // and memcpy only the requested leading bytes. equivalent to + // cut(a_g, 0, b_w, c) — same mmap logic. + // + if ( (a_g >= 3) && (c3y == u3a_is_bob(c)) ) { + c3_d map_d = 0; + const c3_y* map_y = u3r_blob_map(c, &map_d); + + if ( map_y ) { + c3_g shf_g = a_g - 3; + c3_d byt_d = (c3_d)b_w << shf_g; // bytes to copy + c3_d cpy_d = (byt_d > map_d) ? map_d : byt_d; + u3i_slab sab_u; u3i_slab_init(&sab_u, a_g, b_w); - u3r_chop(a_g, 0, b_w, 0, sab_u.buf_w, c); + if ( cpy_d ) { + memcpy(sab_u.buf_y, map_y, (size_t)cpy_d); + } + u3r_blob_unmap(map_y, map_d); return u3i_slab_mint(&sab_u); } + // mmap failed — fall through } + + u3i_slab sab_u; + u3i_slab_init(&sab_u, a_g, b_w); + u3r_chop(a_g, 0, b_w, 0, sab_u.buf_w, c); + return u3i_slab_mint(&sab_u); } u3_noun diff --git a/pkg/noun/jets/c/rap.c b/pkg/noun/jets/c/rap.c index fffe36280e..4e4e4cdf8d 100644 --- a/pkg/noun/jets/c/rap.c +++ b/pkg/noun/jets/c/rap.c @@ -5,6 +5,7 @@ #include "noun.h" +#include u3_noun u3qc_rap(u3_atom a, @@ -50,17 +51,34 @@ u3i_slab_init(&sab_u, a_g, tot_w); } - /* Chop the list atoms in. + /* Chop the list atoms in. Byte-aligned bloqs and bob atoms + take the mmap+memcpy fast path. */ { u3_noun cab = b; - c3_w pos_w = 0; + c3_w pos_w = 0; + c3_g shf_g = (a_g >= 3) ? (a_g - 3) : 0; while ( 0 != cab ) { u3_noun h_cab = u3h(cab); c3_w len_w = u3r_met(a_g, h_cab); - u3r_chop(a_g, 0, len_w, pos_w, sab_u.buf_w, h_cab); + if ( a_g >= 3 ) { + c3_w pos_b = pos_w << shf_g; + c3_w len_b = len_w << shf_g; + + u3r_view vu_u; + u3r_view_init(&vu_u, h_cab); + c3_w cpy_w = (vu_u.len_w < len_b) ? vu_u.len_w : len_b; + if ( cpy_w ) { + memcpy(sab_u.buf_y + pos_b, vu_u.byt_y, cpy_w); + } + u3r_view_done(&vu_u); + } + else { + u3r_chop(a_g, 0, len_w, pos_w, sab_u.buf_w, h_cab); + } + pos_w += len_w; cab = u3t(cab); } diff --git a/pkg/noun/jets/c/rip.c b/pkg/noun/jets/c/rip.c index 8a9ed76e84..07e5127077 100644 --- a/pkg/noun/jets/c/rip.c +++ b/pkg/noun/jets/c/rip.c @@ -167,32 +167,47 @@ u3qc_rip(u3_atom a, return u3m_bail(c3__fail); } - if ( 1 == b ) { - return _block_rip(a, c); + // correctness: the _bit_rip / _block_rip / u3r_chop paths all read + // [c]'s buf_w directly (via u3r_word) or call u3r_blob_load per + // chunk — the former returns seq_w (wrong) for bob atoms, the + // latter allocates a full-size atom per iteration (O(n*blob) + // memory churn). Materialize the bob once up front: single full + // loom allocation, correct reads thereafter. A fully zero-copy + // rip would mmap once and build chunks directly, but that needs a + // deeper rewrite of _bit_rip and _block_rip. + // + u3_atom mat = u3_none; + if ( c3y == u3a_is_bob(c) ) { + mat = u3r_blob_load(c, u3C.dir_c); + if ( u3_none == mat ) { + return u3m_bail(c3__fail); + } + c = mat; } - if ( 0 == a ) { - return _bit_rip(b, c); + u3_noun pro; + + if ( 1 == b ) { + pro = _block_rip(a, c); } + else if ( 0 == a ) { + pro = _bit_rip(b, c); + } + else { + u3i_slab sab_u; + pro = u3_nul; + c3_w len_w = DIVCEIL(u3r_met(a, c), b); - u3i_slab sab_u; - u3_noun pro = u3_nul; - //u3_noun *lit = &pro; - //u3_noun *hed; - //u3_noun *tal; - c3_w len_w = DIVCEIL(u3r_met(a, c), b); - - //for (c3_w i_w = 0; i_w < len_w; i_w++) { - for (c3_w i_w = len_w; 0 < i_w; i_w--) { - u3i_slab_init(&sab_u, a, b); - u3r_chop(a, (i_w - 1) * b, b, 0, sab_u.buf_w, c); - //*lit = u3i_defcons(&hed, &tal); - //*hed = u3i_slab_mint(&sab_u); - //lit = tal; - pro = u3nc(u3i_slab_mint(&sab_u), pro); + for (c3_w i_w = len_w; 0 < i_w; i_w--) { + u3i_slab_init(&sab_u, a, b); + u3r_chop(a, (i_w - 1) * b, b, 0, sab_u.buf_w, c); + pro = u3nc(u3i_slab_mint(&sab_u), pro); + } } - //*lit = u3_nul; + if ( u3_none != mat ) { + u3z(mat); + } return pro; } diff --git a/pkg/noun/jets/c/rsh.c b/pkg/noun/jets/c/rsh.c index 82a8671b8d..b8f168466c 100644 --- a/pkg/noun/jets/c/rsh.c +++ b/pkg/noun/jets/c/rsh.c @@ -6,6 +6,8 @@ #include "noun.h" +#include + u3_noun u3qc_rsh(u3_atom a, u3_atom b, @@ -17,23 +19,55 @@ u3qc_rsh(u3_atom a, else if ( !_(u3a_is_cat(b)) ) { return 0; } - else { - c3_g a_g = a; - c3_w b_w = b; - c3_w len_w = u3r_met(a_g, c); - if ( b_w >= len_w ) { - return 0; - } - else { + c3_g a_g = a; + c3_w b_w = b; + c3_w len_w = u3r_met(a_g, c); + + if ( b_w >= len_w ) { + return 0; + } + + c3_w wid_w = len_w - b_w; + + // bob-aware fast path for byte-aligned suffixes: mmap the blob + // and memcpy only the requested bytes starting at offset b_w. + // equivalent to cut(a_g, b_w, wid_w, c). + // + if ( (a_g >= 3) && (c3y == u3a_is_bob(c)) ) { + c3_d map_d = 0; + const c3_y* map_y = u3r_blob_map(c, &map_d); + + if ( map_y ) { + c3_g shf_g = a_g - 3; + c3_d off_d = (c3_d)b_w << shf_g; + c3_d byt_d = (c3_d)wid_w << shf_g; + + c3_d cpy_d = byt_d; + if ( off_d >= map_d ) { + cpy_d = 0; + } + else if ( off_d + cpy_d > map_d ) { + cpy_d = map_d - off_d; + } + u3i_slab sab_u; - u3i_slab_init(&sab_u, a_g, (len_w - b_w)); + u3i_slab_init(&sab_u, a_g, wid_w); - u3r_chop(a_g, b_w, (len_w - b_w), 0, sab_u.buf_w, c); + if ( cpy_d ) { + memcpy(sab_u.buf_y, map_y + off_d, (size_t)cpy_d); + } + u3r_blob_unmap(map_y, map_d); return u3i_slab_mint(&sab_u); } + // mmap failed — fall through } + + u3i_slab sab_u; + u3i_slab_init(&sab_u, a_g, wid_w); + u3r_chop(a_g, b_w, wid_w, 0, sab_u.buf_w, c); + return u3i_slab_mint(&sab_u); } u3_noun diff --git a/pkg/noun/jets/c/swp.c b/pkg/noun/jets/c/swp.c index a77ea4eebd..37d301f5b2 100644 --- a/pkg/noun/jets/c/swp.c +++ b/pkg/noun/jets/c/swp.c @@ -6,6 +6,8 @@ #include "noun.h" +#include + u3_noun u3qc_swp(u3_atom a, u3_atom b) @@ -13,15 +15,49 @@ u3qc_swp(u3_atom a, if ( !_(u3a_is_cat(a)) || (a >= u3a_word_bits) ) { return u3m_bail(c3__fail); } - c3_g a_g = a; + c3_g a_g = a; c3_w len_w = u3r_met(a_g, b); u3i_slab sab_u; u3i_slab_init(&sab_u, a_g, len_w); + // byte-aligned fast path: mmap once, reverse bloqs via memcpy. + // the generic path below would invoke u3r_chop per bloq, each of + // which calls u3r_blob_load — an O(len_w * bob_size) disaster on + // large bob atoms. + // + if ( a_g >= 3 ) { + c3_g shf_g = a_g - 3; + c3_w blq_b = (c3_w)1 << shf_g; // bytes per bloq + + u3r_view vu_u; + u3r_view_init(&vu_u, b); + + for ( c3_w i = 0; i < len_w; i++ ) { + c3_w src_b = i * blq_b; + c3_w dst_b = (len_w - i - 1) * blq_b; + c3_w cpy_b = blq_b; + + if ( src_b >= vu_u.len_w ) { + // past the view — dest bytes stay zero (slab_init) + continue; + } + if ( src_b + cpy_b > vu_u.len_w ) { + cpy_b = vu_u.len_w - src_b; + } + memcpy(sab_u.buf_y + dst_b, vu_u.byt_y + src_b, cpy_b); + } + + u3r_view_done(&vu_u); + return u3i_slab_mint(&sab_u); + } + + // bit-level fallback — slow for bobs (u3r_chop materializes per + // iteration) but correctness is preserved. + // for (c3_w i = 0; i < len_w; i++) { u3r_chop(a_g, i, 1, len_w - i - 1, sab_u.buf_w, b); } - + return u3i_slab_mint(&sab_u); } diff --git a/pkg/noun/jets/e/aes_cbc.c b/pkg/noun/jets/e/aes_cbc.c index c751f2240c..2b619e568c 100644 --- a/pkg/noun/jets/e/aes_cbc.c +++ b/pkg/noun/jets/e/aes_cbc.c @@ -20,10 +20,21 @@ typedef int (*urcrypt_cbc)(c3_y**, _cqea_cbc_help(c3_y* key_y, u3_atom iv, u3_atom msg, urcrypt_cbc low_f) { u3_atom ret; - c3_w met_w; c3_y iv_y[16]; - c3_y* msg_y = u3r_bytes_all(&met_w, msg); - size_t len = met_w; + + // urcrypt_cbc_* modifies the buffer in place and may realloc it + // via u3a_realloc, so we need a heap-owned writable buffer. + // view the input (mmap for bobs, no full-blob loom alloc) then + // copy into a fresh heap allocation. + // + u3r_view vu_u; + u3r_view_init(&vu_u, msg); + size_t len = vu_u.len_w; + c3_y* msg_y = u3a_malloc(len ? len : 1); + if ( len ) { + memcpy(msg_y, vu_u.byt_y, len); + } + u3r_view_done(&vu_u); u3r_bytes(0, 16, iv_y, iv); if ( 0 != (*low_f)(&msg_y, &len, key_y, iv_y, &u3a_realloc) ) { diff --git a/pkg/noun/jets/e/aes_siv.c b/pkg/noun/jets/e/aes_siv.c index 896a21b78d..527ba35dfb 100644 --- a/pkg/noun/jets/e/aes_siv.c +++ b/pkg/noun/jets/e/aes_siv.c @@ -106,22 +106,28 @@ _cqea_siv_en(c3_y* key_y, urcrypt_siv low_f) { u3_noun ret; - c3_w txt_w; c3_w soc_w; - c3_y *txt_y, *out_y, iv_y[16]; + c3_y *out_y, iv_y[16]; urcrypt_aes_siv_data *dat_u; + // zero-copy view on the plaintext. urcrypt_siv's signature + // takes non-const c3_y* but treats the buffer as input-only; + // the encrypted output goes to out_y. cast-away is safe. + // + u3r_view vu_u; + u3r_view_init(&vu_u, txt); + c3_w txt_w = vu_u.len_w; + dat_u = _cqea_ads_alloc(ads, &soc_w); - txt_y = u3r_bytes_all(&txt_w, txt); - out_y = u3a_malloc(txt_w); + out_y = u3a_malloc(txt_w ? txt_w : 1); - ret = ( 0 != (*low_f)(txt_y, txt_w, dat_u, soc_w, key_y, iv_y, out_y) ) + ret = ( 0 != (*low_f)((c3_y*)vu_u.byt_y, txt_w, dat_u, soc_w, key_y, iv_y, out_y) ) ? u3_none : u3nt(u3i_bytes(16, iv_y), u3i_word(txt_w), u3i_bytes(txt_w, out_y)); - u3a_free(txt_y); + u3r_view_done(&vu_u); u3a_free(out_y); _cqea_ads_free(dat_u); return ret; @@ -143,21 +149,29 @@ _cqea_siv_de(c3_y* key_y, else { u3_noun ret; c3_w soc_w; - c3_y *txt_y, *out_y, iv_y[16]; + c3_y *out_y, iv_y[16]; urcrypt_aes_siv_data *dat_u; u3r_bytes(0, 16, iv_y, iv); dat_u = _cqea_ads_alloc(ads, &soc_w); - txt_y = u3r_bytes_alloc(0, txt_w, txt); - out_y = u3a_malloc(txt_w); - if ( 0 != (*low_f)(txt_y, txt_w, dat_u, soc_w, key_y, iv_y, out_y) ) { + // zero-copy view on the ciphertext; zero-pad if atom is shorter + // than the declared length. + // + u3r_view vu_u; + u3r_view_padded(&vu_u, txt, txt_w); + out_y = u3a_malloc(txt_w ? txt_w : 1); + + if ( 0 != (*low_f)((c3_y*)vu_u.byt_y, txt_w, dat_u, soc_w, key_y, iv_y, out_y) ) { + u3r_view_done(&vu_u); + u3a_free(out_y); + _cqea_ads_free(dat_u); return u3m_bail(c3__evil); } ret = u3nc(0, u3i_bytes(txt_w, out_y)); - u3a_free(txt_y); + u3r_view_done(&vu_u); u3a_free(out_y); _cqea_ads_free(dat_u); diff --git a/pkg/noun/jets/e/chacha.c b/pkg/noun/jets/e/chacha.c index e42f8e221c..82be7b3ecd 100644 --- a/pkg/noun/jets/e/chacha.c +++ b/pkg/noun/jets/e/chacha.c @@ -6,6 +6,8 @@ #include "noun.h" #include "urcrypt.h" +#include + static u3_atom _cqe_chacha_crypt(u3_atom rounds, u3_atom key, u3_atom nonce, u3_atom counter, u3_atom wid, u3_atom dat) { @@ -14,16 +16,29 @@ if ( !u3r_word_fit(&rounds_w, rounds) || !u3r_word_fit(&wid_w, wid) || c3n == u3r_safe_chub(counter, &counter_d) ) { return u3m_bail(c3__fail); } - else { - c3_y key_y[32], nonce_y[8]; - u3r_bytes(0, 32, key_y, key); - u3r_bytes(0, 8, nonce_y, nonce); - c3_y *dat_y = u3r_bytes_alloc(0, wid_w, dat); - urcrypt_chacha_crypt(rounds_w, key_y, nonce_y, counter_d, wid_w, dat_y); - u3_noun cry = u3i_bytes(wid_w, dat_y); - u3a_free(dat_y); - return u3i_cell(wid, cry); + + c3_y key_y[32], nonce_y[8]; + u3r_bytes(0, 32, key_y, key); + u3r_bytes(0, 8, nonce_y, nonce); + + // allocate output slab directly; copy plaintext from view (mmap + // for bobs) into the slab; chacha mutates in place. saves both + // a full-blob loom allocation (from u3r_bytes_alloc → u3r_blob_load) + // and the intermediate heap buffer. + // + u3i_slab sab_u; + u3i_slab_init(&sab_u, 3, wid_w); + + if ( wid_w ) { + u3r_view vu_u; + u3r_view_padded(&vu_u, dat, wid_w); + memcpy(sab_u.buf_y, vu_u.byt_y, wid_w); + u3r_view_done(&vu_u); } + + urcrypt_chacha_crypt(rounds_w, key_y, nonce_y, counter_d, wid_w, sab_u.buf_y); + + return u3i_cell(wid, u3i_slab_mint_bytes(&sab_u)); } u3_noun diff --git a/pkg/noun/jets/e/ed_sign.c b/pkg/noun/jets/e/ed_sign.c index ab68052466..0bc0002d5d 100644 --- a/pkg/noun/jets/e/ed_sign.c +++ b/pkg/noun/jets/e/ed_sign.c @@ -20,13 +20,13 @@ else if ( !u3r_word_fit(&len_w, len) ) { return u3m_bail(c3__fail); } - else { - c3_y sig_y[64]; - c3_y* dat_y = u3r_bytes_alloc(0, len_w, dat); - urcrypt_ed_sign(dat_y, len_w, sed_y, sig_y); - u3a_free(dat_y); - return u3i_bytes(64, sig_y); - } + + c3_y sig_y[64]; + u3r_view vu_u; + u3r_view_padded(&vu_u, dat, len_w); + urcrypt_ed_sign((c3_y*)vu_u.byt_y, len_w, sed_y, sig_y); + u3r_view_done(&vu_u); + return u3i_bytes(64, sig_y); } u3_noun @@ -61,13 +61,13 @@ else if ( !u3r_word_fit(&len_w, len) ) { return u3m_bail(c3__fail); } - else { - c3_y sig_y[64]; - c3_y* dat_y = u3r_bytes_alloc(0, len_w, dat); - urcrypt_ed_sign_raw(dat_y, len_w, pub_y, sek_y, sig_y); - u3a_free(dat_y); - return u3i_bytes(64, sig_y); - } + + c3_y sig_y[64]; + u3r_view vu_u; + u3r_view_padded(&vu_u, dat, len_w); + urcrypt_ed_sign_raw((c3_y*)vu_u.byt_y, len_w, pub_y, sek_y, sig_y); + u3r_view_done(&vu_u); + return u3i_bytes(64, sig_y); } u3_noun @@ -97,16 +97,13 @@ // hoon calls luck, which crashes return u3m_bail(c3__exit); } - else { - c3_y sig_y[64]; - c3_w met_w; - c3_y* msg_y = u3r_bytes_all(&met_w, msg); - urcrypt_ed_sign(msg_y, met_w, sed_y, sig_y); - u3a_free(msg_y); - - return u3i_bytes(64, sig_y); - } + c3_y sig_y[64]; + u3r_view vu_u; + u3r_view_init(&vu_u, msg); + urcrypt_ed_sign((c3_y*)vu_u.byt_y, vu_u.len_w, sed_y, sig_y); + u3r_view_done(&vu_u); + return u3i_bytes(64, sig_y); } u3_noun @@ -138,16 +135,13 @@ // hoon asserts size return u3m_bail(c3__exit); } - else { - c3_y sig_y[64]; - c3_w met_w; - c3_y* msg_y = u3r_bytes_all(&met_w, msg); - urcrypt_ed_sign_raw(msg_y, met_w, pub_y, sek_y, sig_y); - u3a_free(msg_y); - - return u3i_bytes(64, sig_y); - } + c3_y sig_y[64]; + u3r_view vu_u; + u3r_view_init(&vu_u, msg); + urcrypt_ed_sign_raw((c3_y*)vu_u.byt_y, vu_u.len_w, pub_y, sek_y, sig_y); + u3r_view_done(&vu_u); + return u3i_bytes(64, sig_y); } u3_noun diff --git a/pkg/noun/jets/e/ed_veri.c b/pkg/noun/jets/e/ed_veri.c index 0f09e1cec9..565497e10e 100644 --- a/pkg/noun/jets/e/ed_veri.c +++ b/pkg/noun/jets/e/ed_veri.c @@ -19,13 +19,13 @@ !u3r_word_fit(&len_w, len) ) { return c3n; } - else { - c3_y* dat_y = u3r_bytes_alloc(0, len_w, dat); - c3_t val_t = urcrypt_ed_veri(dat_y, len_w, pub_y, sig_y); - u3a_free(dat_y); - return val_t ? c3y : c3n; - } + u3r_view vu_u; + u3r_view_padded(&vu_u, dat, len_w); + c3_t val_t = urcrypt_ed_veri((c3_y*)vu_u.byt_y, len_w, pub_y, sig_y); + u3r_view_done(&vu_u); + + return val_t ? c3y : c3n; } u3_noun @@ -58,14 +58,13 @@ (0 != u3r_bytes_fit(32, pub_y, pk)) ) { return c3n; } - else { - c3_w met_w; - c3_y* mes_y = u3r_bytes_all(&met_w, m); - c3_t val_t = urcrypt_ed_veri(mes_y, met_w, pub_y, sig_y); - u3a_free(mes_y); - return val_t ? c3y : c3n; - } + u3r_view vu_u; + u3r_view_init(&vu_u, m); + c3_t val_t = urcrypt_ed_veri((c3_y*)vu_u.byt_y, vu_u.len_w, pub_y, sig_y); + u3r_view_done(&vu_u); + + return val_t ? c3y : c3n; } u3_noun diff --git a/pkg/noun/jets/e/leer.c b/pkg/noun/jets/e/leer.c index 67cbb1a4a0..5fa4c190ce 100644 --- a/pkg/noun/jets/e/leer.c +++ b/pkg/noun/jets/e/leer.c @@ -6,22 +6,22 @@ #include "noun.h" +#include + +// _leer_cut_view(): slice bytes [pos_w, pos_w+len_w) out of [src_y] +// into a fresh byte-atom. used on raw bytes from a u3r_view so we +// never go through u3r_bytes (which would materialize a bob). +// static u3_atom -_leer_cut(c3_w pos_w, c3_w len_w, u3_atom src) +_leer_cut_view(const c3_y* src_y, c3_w pos_w, c3_w len_w) { if ( 0 == len_w ) { return 0; } - else { - u3i_slab sab_u; - u3i_slab_bare(&sab_u, 3, len_w); - // XX: 64 what? - sab_u.buf_w[sab_u.len_w - 1] = 0; - - u3r_bytes(pos_w, len_w, sab_u.buf_y, src); - - return u3i_slab_mint_bytes(&sab_u); - } + u3i_slab sab_u; + u3i_slab_init(&sab_u, 3, len_w); + memcpy(sab_u.buf_y, src_y + pos_w, len_w); + return u3i_slab_mint_bytes(&sab_u); } // Leaving the lore jet in place for backwards compatibility. @@ -30,22 +30,26 @@ _leer_cut(c3_w pos_w, c3_w len_w, u3_atom src) u3_noun u3qe_lore(u3_atom lub) { - c3_w len_w = u3r_met(3, lub); - c3_w pos_w = 0; - u3_noun tez = u3_nul; + u3r_view vu_u; + u3r_view_init(&vu_u, lub); + + const c3_y* src_y = vu_u.byt_y; + c3_w len_w = vu_u.len_w; + c3_w pos_w = 0; + u3_noun tez = u3_nul; while ( 1 ) { c3_w meg_w = 0; c3_y end_y; - c3_y byt_y; + while ( 1 ) { if ( pos_w >= len_w ) { byt_y = 0; end_y = c3y; break; } - byt_y = u3r_byte(pos_w + meg_w, lub); + byt_y = src_y[pos_w + meg_w]; if ( (10 == byt_y) || (0 == byt_y) ) { end_y = __(byt_y == 0); @@ -54,15 +58,18 @@ u3qe_lore(u3_atom lub) } if ((byt_y == 0) && ((pos_w + meg_w + 1) < len_w)) { + u3r_view_done(&vu_u); return u3m_bail(c3__exit); } if ( !_(end_y) && pos_w >= len_w ) { + u3r_view_done(&vu_u); return u3kb_flop(tez); } else { - tez = u3nc(_leer_cut(pos_w, meg_w, lub), tez); + tez = u3nc(_leer_cut_view(src_y, pos_w, meg_w), tez); if ( _(end_y) ) { + u3r_view_done(&vu_u); return u3kb_flop(tez); } pos_w += (meg_w + 1); @@ -90,8 +97,13 @@ u3qe_leer(u3_atom txt) u3_noun pro; u3_noun* lit = &pro; + u3r_view vu_u; + u3r_view_init(&vu_u, txt); + const c3_y* src_y = vu_u.byt_y; + c3_w len_w = vu_u.len_w; + { - c3_w pos_w, i_w = 0, len_w = u3r_met(3, txt); + c3_w pos_w, i_w = 0; u3_noun* hed; u3_noun* tel; @@ -99,7 +111,7 @@ u3qe_leer(u3_atom txt) // scan till end or newline // for ( pos_w = i_w; i_w < len_w; ++i_w ) { - if ( 10 == u3r_byte(i_w, txt) ) { + if ( 10 == src_y[i_w] ) { break; } } @@ -107,7 +119,7 @@ u3qe_leer(u3_atom txt) // append to list // *lit = u3i_defcons(&hed, &tel); - *hed = _leer_cut(pos_w, i_w - pos_w, txt); + *hed = _leer_cut_view(src_y, pos_w, i_w - pos_w); lit = tel; i_w++; @@ -115,6 +127,7 @@ u3qe_leer(u3_atom txt) } *lit = u3_nul; + u3r_view_done(&vu_u); return pro; } From 8a2cb3e1aeb99620d3e5244ec96780c403bb602c Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Wed, 15 Apr 2026 21:06:07 -0500 Subject: [PATCH 15/31] wip: fixed blob gc --- pkg/noun/allocate.c | 39 +++++++ pkg/noun/imprison.c | 31 +++++- pkg/noun/jets/c/sew.c | 19 ++++ pkg/noun/jets/e/bytestream.c | 199 +++++++++++++++++++++-------------- pkg/noun/jets/e/zlib.c | 27 +++-- pkg/noun/manage.c | 7 +- pkg/noun/options.h | 1 + pkg/noun/retrieve.c | 67 +++++++++++- pkg/noun/serial.c | 31 ++++-- pkg/noun/vortex.h | 31 +++--- pkg/vere/disk.c | 17 +-- pkg/vere/io/mesa.c | 28 ++++- pkg/vere/mars.c | 112 ++++++++++---------- 13 files changed, 417 insertions(+), 192 deletions(-) diff --git a/pkg/noun/allocate.c b/pkg/noun/allocate.c index 99c0c8f11d..7748fc349e 100644 --- a/pkg/noun/allocate.c +++ b/pkg/noun/allocate.c @@ -873,6 +873,37 @@ _me_gain_south(u3_noun dog) } } +/* _me_bob_dead(): handle a bob atom whose loom refcount just hit zero. +** +** Removes the atom from the bob_p interning index. If the log +** refcount (blb_p) and lease map (rev_p) are also empty, deletes +** the backing blob file via the registered callback. +*/ +static void +_me_bob_dead(u3a_atom* atm_u) +{ + c3_h mug_h = atm_u->mug_h; + c3_w seq_w = atm_u->buf_w[0]; + c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; + u3_noun bid = u3i_chub(bid_d); + + u3h_del(u3H->ban_u.bob_p, bid); + + c3_w log_w = 0; + u3_weak lv = u3h_get(u3H->ban_u.blb_p, bid); + if ( u3_none != lv ) { + u3r_safe_word(lv, &log_w); + } + + c3_o has_lea = __(u3_none != u3h_get(u3H->ban_u.rev_p, bid)); + + u3z(bid); + + if ( 0 == log_w && c3n == has_lea && u3C.blob_delete_f ) { + u3C.blob_delete_f(mug_h, seq_w); + } +} + /* _me_lose_north(): lose on a north road. */ static void @@ -905,6 +936,10 @@ _me_lose_north(u3_noun dog) } } else { + u3a_atom* atm_u = (u3a_atom*)box_u; + if ( atm_u->len_w & u3a_blob_flag ) { + _me_bob_dead(atm_u); + } u3a_wfree(box_u); } } @@ -944,6 +979,10 @@ _me_lose_south(u3_noun dog) } } else { + u3a_atom* atm_u = (u3a_atom*)box_u; + if ( atm_u->len_w & u3a_blob_flag ) { + _me_bob_dead(atm_u); + } u3a_wfree(box_u); } } diff --git a/pkg/noun/imprison.c b/pkg/noun/imprison.c index 4969d5bace..9484504941 100644 --- a/pkg/noun/imprison.c +++ b/pkg/noun/imprison.c @@ -2,11 +2,13 @@ #include "imprison.h" +#include "hashtable.h" #include "jets/k.h" #include "jets/q.h" #include "manage.h" #include "retrieve.h" #include "trace.h" +#include "vortex.h" #include "xtract.h" #if defined(__x86_64__) @@ -832,16 +834,33 @@ u3i_vmolt(u3_noun som, u3i_molt_pair pairs[], c3_z len_z) return pro; } -/* u3i_blob(): construct a bob atom (blob reference). +/* u3i_blob(): construct or intern a bob atom (blob reference). ** ** A bob atom is an indirect atom with the MSB of len_w set. ** [mug_h] is the 31-bit mug of the content (stored in mug_h and used ** as the blob directory name). ** [seq_w] is the sequence number within $pier/.urb/bob//. +** +** Interned: at most one bob atom exists per (mug, seq) pair. +** bob_p maps bid -> loom offset of the canonical atom. */ u3_atom u3i_blob(c3_h mug_h, c3_w seq_w) { + c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; + u3_noun bid = u3i_chub(bid_d); + + // check for an existing interned bob atom + // + u3_weak got = u3h_get(u3H->ban_u.bob_p, bid); + if ( u3_none != got ) { + c3_w off_w = 0; + u3r_safe_word(got, &off_w); + u3z(bid); + u3_atom bob = u3a_to_pug(off_w); + return u3k(bob); + } + // allocate: u3a_atom header + 1 word for seq_w // c3_w* nov_w = u3a_walloc(1 + c3_wiseof(u3a_atom)); @@ -849,8 +868,14 @@ u3i_blob(c3_h mug_h, c3_w seq_w) vat_u->use_w = 1; vat_u->mug_h = mug_h; - vat_u->len_w = 1 | u3a_blob_flag; // 1 word of payload + bob flag + vat_u->len_w = 1 | u3a_blob_flag; vat_u->buf_w[0] = seq_w; - return u3a_to_pug(u3a_outa(nov_w)); + // store loom offset (not a noun ref) in bob_p + // + c3_w off_w = u3a_outa(nov_w); + u3h_put(u3H->ban_u.bob_p, bid, u3i_word(off_w)); + u3z(bid); + + return u3a_to_pug(off_w); } diff --git a/pkg/noun/jets/c/sew.c b/pkg/noun/jets/c/sew.c index d37b68e79d..ea0615ded5 100644 --- a/pkg/noun/jets/c/sew.c +++ b/pkg/noun/jets/c/sew.c @@ -28,15 +28,33 @@ u3qc_sew(u3_atom a, u3i_slab sab_u; c3_w* src_w; c3_w len_src_w; + + // view is only used for the bob path; done() is a no-op on a + // zero-init struct, so we can unconditionally call it at the end. + // + u3r_view vu_u = {0}; + if ( _(u3a_is_cat(e)) ) { len_src_w = e ? 1 : 0; src_w = &e; } + else if ( c3y == u3a_is_bob(e) ) { + // zero-copy view on the blob file. the legacy src_u->buf_w + // cast would have returned seq_w for bobs; mmap is page-aligned + // (therefore word-aligned) and POSIX guarantees bytes past EOF + // within the mapped last page read as zero, so the word-at-a-time + // read by u3r_chop_words is safe. + // + u3r_view_init(&vu_u, e); + len_src_w = (vu_u.len_w + u3a_word_bytes - 1) >> u3a_word_bytes_shift; + src_w = (c3_w*)vu_u.byt_y; + } else { u3a_atom* src_u = u3a_to_ptr(e); len_src_w = src_u->len_w; src_w = src_u->buf_w; } + u3i_slab_init(&sab_u, a_g, c3_max(len_e_w, b_w + c_w)); u3r_chop_words(a_g, 0, b_w, 0, sab_u.buf_w, len_src_w, src_w); u3r_chop(a_g, 0, c_w, b_w, sab_u.buf_w, d); @@ -49,6 +67,7 @@ u3qc_sew(u3_atom a, len_src_w, src_w); } + u3r_view_done(&vu_u); return u3i_slab_mint(&sab_u); } diff --git a/pkg/noun/jets/e/bytestream.c b/pkg/noun/jets/e/bytestream.c index 47bc279e18..70eae0a62d 100644 --- a/pkg/noun/jets/e/bytestream.c +++ b/pkg/noun/jets/e/bytestream.c @@ -23,24 +23,31 @@ _x_octs(u3_noun octs, u3_atom* p_octs, u3_atom* q_octs) { u3m_bail(c3__exit); } } +// _x_octs_buffer(): open a byte view of [*q_octs], clamped to [*p_octs]. +// +// [vu_u] must be zero-initialized by the caller; it is filled in on +// success and the caller MUST call u3r_view_done(vu_u) before the +// function exits — including error paths — otherwise the mmap or +// heap buffer leaks. vu_u->byt_y is the bytes, *len_w is the clamped +// significant byte length, *lead_w is the count of implicit leading +// zero bytes (p_octs - met). +// +// The legacy version read (c3_y*)ptr_a->buf_w directly, which for a +// bob atom returned seq_w rather than the blob's content. Going +// through u3r_view gets the real bytes (mmap for bobs). +// static c3_o -_x_octs_buffer(u3_atom* p_octs, u3_atom *q_octs, - c3_w* p_octs_w, c3_y** buf_y, - c3_w* len_w, c3_w* lead_w) +_x_octs_buffer(u3r_view* vu_u, + u3_atom* p_octs, u3_atom *q_octs, + c3_w* p_octs_w, + c3_w* len_w, c3_w* lead_w) { if (c3n == u3r_safe_word(*p_octs, p_octs_w)) { return c3n; } - *len_w = u3r_met(3, *q_octs); - - if (c3y == u3a_is_cat(*q_octs)) { - *buf_y = (c3_y*)q_octs; - } - else { - u3a_atom* ptr_a = u3a_to_ptr(*q_octs); - *buf_y = (c3_y*)ptr_a->buf_w; - } + u3r_view_init(vu_u, *q_octs); + *len_w = vu_u->len_w; *lead_w = 0; @@ -58,15 +65,16 @@ u3_noun _qe_bytestream_rip_octs(u3_atom p_octs, u3_atom q_octs) { c3_w p_octs_w, len_w, lead_w; - c3_y* buf_y; + u3r_view vu_u = {0}; - if (c3n == _x_octs_buffer(&p_octs, &q_octs, - &p_octs_w, &buf_y, - &len_w, &lead_w)){ + if (c3n == _x_octs_buffer(&vu_u, &p_octs, &q_octs, + &p_octs_w, &len_w, &lead_w)){ + u3r_view_done(&vu_u); return u3_none; } if (p_octs_w == 0) { + u3r_view_done(&vu_u); return u3_nul; } @@ -76,12 +84,13 @@ _qe_bytestream_rip_octs(u3_atom p_octs, u3_atom q_octs) { rip = u3nc(0x0, rip); } - buf_y += len_w - 1; + const c3_y* buf_y = vu_u.byt_y + len_w - 1; while (len_w--) { rip = u3nc(*(buf_y--), rip); } + u3r_view_done(&vu_u); return rip; } @@ -110,26 +119,30 @@ _qe_bytestream_cat_octs(u3_noun octs_a, u3_noun octs_b) { c3_w len_w, lem_w; c3_w lead_w, leaf_w; - c3_y* sea_y; - c3_y* seb_y; + u3r_view va_u = {0}, vb_u = {0}; - if (c3n == _x_octs_buffer(&p_octs_a, &q_octs_a, - &p_octs_a_w, &sea_y, - &len_w, &lead_w)) { + if (c3n == _x_octs_buffer(&va_u, &p_octs_a, &q_octs_a, + &p_octs_a_w, &len_w, &lead_w)) { + u3r_view_done(&va_u); return u3_none; } - if (c3n == _x_octs_buffer(&p_octs_b, &q_octs_b, - &p_octs_b_w, &seb_y, - &lem_w, &leaf_w)) { + if (c3n == _x_octs_buffer(&vb_u, &p_octs_b, &q_octs_b, + &p_octs_b_w, &lem_w, &leaf_w)) { + u3r_view_done(&va_u); + u3r_view_done(&vb_u); return u3_none; } if (p_octs_a_w == 0) { + u3r_view_done(&va_u); + u3r_view_done(&vb_u); return u3k(octs_b); } if (p_octs_b_w == 0) { + u3r_view_done(&va_u); + u3r_view_done(&vb_u); return u3k(octs_a); } @@ -148,13 +161,16 @@ _qe_bytestream_cat_octs(u3_noun octs_a, u3_noun octs_b) { u3i_slab_bare(&sab_u, 3, (c3_d)p_octs_a_w + lem_w); sab_u.buf_w[sab_u.len_w - 1] = 0; - memcpy(sab_u.buf_y, sea_y, len_w); + memcpy(sab_u.buf_y, va_u.byt_y, len_w); memset(sab_u.buf_y + len_w, 0, lead_w); - memcpy(sab_u.buf_y + p_octs_a_w, seb_y, lem_w); + memcpy(sab_u.buf_y + p_octs_a_w, vb_u.byt_y, lem_w); u3_noun q_octs = u3i_slab_moot(&sab_u); ret = u3nc(u3i_chub(p_octs_d), q_octs); } + + u3r_view_done(&va_u); + u3r_view_done(&vb_u); return ret; } @@ -245,7 +261,6 @@ _qe_bytestream_can_octs(u3_noun octs_list) { sab_u.buf_w[sab_u.len_w - 1] = 0; - c3_y* sea_y; u3_atom p_octs, q_octs; c3_w p_octs_w, q_octs_w; c3_w len_w, lead_w; @@ -261,18 +276,21 @@ _qe_bytestream_can_octs(u3_noun octs_list) { octs = u3h(octs_list); _x_octs(octs, &p_octs, &q_octs); - if (c3n == _x_octs_buffer(&p_octs, &q_octs, - &p_octs_w, &sea_y, - &len_w, &lead_w)){ + + u3r_view vu_u = {0}; + if (c3n == _x_octs_buffer(&vu_u, &p_octs, &q_octs, + &p_octs_w, &len_w, &lead_w)){ + u3r_view_done(&vu_u); return u3_none; } if (p_octs_w == 0) { + u3r_view_done(&vu_u); octs_list = u3t(octs_list); continue; } - memcpy(buf_y, sea_y, len_w); + memcpy(buf_y, vu_u.byt_y, len_w); buf_y += len_w; wit_d += len_w; @@ -284,6 +302,7 @@ _qe_bytestream_can_octs(u3_noun octs_list) { wit_d += lead_w; } + u3r_view_done(&vu_u); octs_list = u3t(octs_list); } @@ -317,28 +336,29 @@ _qe_bytestream_skip_line(u3_atom pos, u3_noun octs) c3_w p_octs_w; c3_w len_w, lead_w; - c3_y* sea_y; + u3r_view vu_u = {0}; - if (c3n == _x_octs_buffer(&p_octs, &q_octs, - &p_octs_w, &sea_y, - &len_w, &lead_w)) { + if (c3n == _x_octs_buffer(&vu_u, &p_octs, &q_octs, + &p_octs_w, &len_w, &lead_w)) { + u3r_view_done(&vu_u); return u3_none; } while (pos_w < len_w) { - if (*(sea_y + pos_w) == '\n') { + if (*(vu_u.byt_y + pos_w) == '\n') { break; } pos_w++; } // Newline not found, position at the end - if (*(sea_y + pos_w) != '\n') { + if (*(vu_u.byt_y + pos_w) != '\n') { pos_w = p_octs; } else { pos_w++; } + u3r_view_done(&vu_u); return u3nc(u3i_word(pos_w), u3k(octs)); } u3_noun @@ -372,17 +392,18 @@ _qe_bytestream_find_byte(u3_atom bat, u3_atom pos, u3_noun octs) c3_w p_octs_w; c3_w len_w, lead_w; - c3_y* sea_y; + u3r_view vu_u = {0}; - if (c3n == _x_octs_buffer(&p_octs, &q_octs, - &p_octs_w, &sea_y, - &len_w, &lead_w)) { + if (c3n == _x_octs_buffer(&vu_u, &p_octs, &q_octs, + &p_octs_w, &len_w, &lead_w)) { + u3r_view_done(&vu_u); return u3_none; } while (pos_w < len_w) { - if (*(sea_y + pos_w) == bat_w) { + if (*(vu_u.byt_y + pos_w) == bat_w) { + u3r_view_done(&vu_u); return u3nc(u3_nul, u3i_word(pos_w)); } @@ -397,9 +418,11 @@ _qe_bytestream_find_byte(u3_atom bat, u3_atom pos, u3_noun octs) // the first leading zero. // if (pos_w < p_octs && bat_w == 0) { + u3r_view_done(&vu_u); return u3nc(u3_nul, u3i_word(pos_w)); } + u3r_view_done(&vu_u); return u3_nul; } u3_noun @@ -434,19 +457,20 @@ _qe_bytestream_seek_byte(u3_atom bat, u3_atom pos, u3_noun octs) c3_w p_octs_w; c3_w len_w, lead_w; - c3_y* sea_y; + u3r_view vu_u = {0}; - if (c3n == _x_octs_buffer(&p_octs, &q_octs, - &p_octs_w, &sea_y, - &len_w, &lead_w)) { + if (c3n == _x_octs_buffer(&vu_u, &p_octs, &q_octs, + &p_octs_w, &len_w, &lead_w)) { + u3r_view_done(&vu_u); return u3_none; } while (pos_w < len_w) { - if (*(sea_y + pos_w) == bat_w) { + if (*(vu_u.byt_y + pos_w) == bat_w) { u3_noun idx = u3nc(u3_nul, u3i_word(pos_w)); u3_noun new_bays = u3nc(u3i_word(pos_w), u3k(octs)); + u3r_view_done(&vu_u); return u3nc(idx, new_bays); } @@ -458,9 +482,11 @@ _qe_bytestream_seek_byte(u3_atom bat, u3_atom pos, u3_noun octs) if (pos_w < p_octs && bat_w == 0) { u3_noun idx = u3nc(u3_nul, u3i_word(pos_w)); u3_noun new_bays = u3nc(u3i_word(pos_w), u3k(octs)); + u3r_view_done(&vu_u); return u3nc(idx, new_bays); } + u3r_view_done(&vu_u); return u3nc(u3_nul, u3nc(u3k(pos), u3k(octs))); } @@ -494,22 +520,23 @@ _qe_bytestream_read_byte(u3_atom pos, u3_noun octs) c3_w p_octs_w; c3_w len_w, lead_w; - c3_y* sea_y; + u3r_view vu_u = {0}; - if (c3n == _x_octs_buffer(&p_octs, &q_octs, - &p_octs_w, &sea_y, - &len_w, &lead_w)) { + if (c3n == _x_octs_buffer(&vu_u, &p_octs, &q_octs, + &p_octs_w, &len_w, &lead_w)) { + u3r_view_done(&vu_u); return u3_none; } if (pos_w + 1 > p_octs_w) { + u3r_view_done(&vu_u); u3m_bail(c3__exit); } c3_y bat_y; if (pos_w < len_w) { - bat_y = *(sea_y + pos_w); + bat_y = *(vu_u.byt_y + pos_w); } else { bat_y = 0; @@ -517,6 +544,7 @@ _qe_bytestream_read_byte(u3_atom pos, u3_noun octs) u3_noun new_bays = u3nc(u3i_word(pos_w + 1), u3k(octs)); + u3r_view_done(&vu_u); return u3nc(bat_y, new_bays); } @@ -556,15 +584,16 @@ _qe_bytestream_read_octs(u3_atom n, u3_atom pos, u3_noun octs) c3_w p_octs_w; c3_w len_w, lead_w; - c3_y* sea_y; + u3r_view vu_u = {0}; - if (c3n == _x_octs_buffer(&p_octs, &q_octs, - &p_octs_w, &sea_y, - &len_w, &lead_w)) { + if (c3n == _x_octs_buffer(&vu_u, &p_octs, &q_octs, + &p_octs_w, &len_w, &lead_w)) { + u3r_view_done(&vu_u); return u3_none; } if (pos_w + n_w > p_octs_w) { + u3r_view_done(&vu_u); u3m_bail(c3__exit); } @@ -593,7 +622,7 @@ _qe_bytestream_read_octs(u3_atom n, u3_atom pos, u3_noun octs) u3i_slab_bare(&sab_u, 3, n_w); sab_u.buf_w[sab_u.len_w - 1] = 0; - memcpy(sab_u.buf_y, sea_y + pos_w, red_w); + memcpy(sab_u.buf_y, vu_u.byt_y + pos_w, red_w); if (red_w < n_w) { memset(sab_u.buf_y + red_w, 0, (n_w - red_w)); @@ -604,6 +633,7 @@ _qe_bytestream_read_octs(u3_atom n, u3_atom pos, u3_noun octs) u3_noun new_bays = u3nc(u3i_word(pos_w + n_w), u3k(octs)); + u3r_view_done(&vu_u); return u3nc(read_octs, new_bays); } @@ -680,11 +710,11 @@ u3_noun _qe_bytestream_chunk(u3_atom size, u3_noun pos, u3_noun octs) c3_w p_octs_w; c3_w len_w, lead_w; - c3_y* sea_y; + u3r_view vu_u = {0}; - if (c3n == _x_octs_buffer(&p_octs, &q_octs, - &p_octs_w, &sea_y, - &len_w, &lead_w)) { + if (c3n == _x_octs_buffer(&vu_u, &p_octs, &q_octs, + &p_octs_w, &len_w, &lead_w)) { + u3r_view_done(&vu_u); return u3_none; } @@ -696,19 +726,20 @@ u3_noun _qe_bytestream_chunk(u3_atom size, u3_noun pos, u3_noun octs) c3_w rem = (p_octs - pos_w); if (rem < size) { - u3_noun octs = _qe_peek_octs(rem, pos_w, p_octs_w, sea_y, + u3_noun octs = _qe_peek_octs(rem, pos_w, p_octs_w, (c3_y*)vu_u.byt_y, len_w); hun = u3nc(octs, hun); pos_w += rem; } else { - u3_noun octs = _qe_peek_octs(size, pos_w, p_octs_w, sea_y, + u3_noun octs = _qe_peek_octs(size, pos_w, p_octs_w, (c3_y*)vu_u.byt_y, len_w); hun = u3nc(octs, hun); pos_w += size; } } + u3r_view_done(&vu_u); return u3kb_flop(hun); } @@ -747,11 +778,11 @@ _qe_bytestream_extract(u3_noun sea, u3_noun rac) c3_w p_octs_w; c3_w len_w, lead_w; - c3_y* sea_y; + u3r_view vu_u = {0}; - if (c3n == _x_octs_buffer(&p_octs, &q_octs, - &p_octs_w, &sea_y, - &len_w, &lead_w)) { + if (c3n == _x_octs_buffer(&vu_u, &p_octs, &q_octs, + &p_octs_w, &len_w, &lead_w)) { + u3r_view_done(&vu_u); return u3_none; } @@ -775,6 +806,7 @@ _qe_bytestream_extract(u3_noun sea, u3_noun rac) u3l_log("bytestream: sip fail"); u3z(dal); u3z(ext); + u3r_view_done(&vu_u); return u3_none; } @@ -782,6 +814,7 @@ _qe_bytestream_extract(u3_noun sea, u3_noun rac) u3l_log("bytestream: ken fail"); u3z(dal); u3z(ext); + u3r_view_done(&vu_u); return u3_none; } @@ -793,6 +826,7 @@ _qe_bytestream_extract(u3_noun sea, u3_noun rac) if (pos_w + sip_w > p_octs_w) { u3z(dal); + u3r_view_done(&vu_u); return u3_none; } @@ -802,13 +836,14 @@ _qe_bytestream_extract(u3_noun sea, u3_noun rac) continue; } - u3_noun octs = _qe_peek_octs(ken_w, pos_w, p_octs_w, sea_y, len_w); + u3_noun octs = _qe_peek_octs(ken_w, pos_w, p_octs_w, (c3_y*)vu_u.byt_y, len_w); pos_w += ken_w; dal = u3nc(octs, dal); } new_sea = u3nc(u3i_word(pos_w), u3k(octs)); + u3r_view_done(&vu_u); return u3nc(u3kb_flop(dal), new_sea); } u3_noun @@ -844,11 +879,11 @@ _qe_bytestream_fuse_extract(u3_noun sea, u3_noun rac) c3_w p_octs_w; c3_w len_w, lead_w; - c3_y* sea_y; + u3r_view vu_u = {0}; - if (c3n == _x_octs_buffer(&p_octs, &q_octs, - &p_octs_w, &sea_y, - &len_w, &lead_w)) { + if (c3n == _x_octs_buffer(&vu_u, &p_octs, &q_octs, + &p_octs_w, &len_w, &lead_w)) { + u3r_view_done(&vu_u); return u3_none; } @@ -872,12 +907,14 @@ _qe_bytestream_fuse_extract(u3_noun sea, u3_noun rac) u3l_log("bytestream: sip fail"); u3z(dal); u3z(ext); + u3r_view_done(&vu_u); return u3_none; } if (c3n == u3r_safe_word(ken, &ken_w)) { u3l_log("bytestream: ken fail"); u3z(dal); u3z(ext); + u3r_view_done(&vu_u); return u3_none; } @@ -889,6 +926,7 @@ _qe_bytestream_fuse_extract(u3_noun sea, u3_noun rac) if (pos_w + sip_w > p_octs_w) { u3z(dal); + u3r_view_done(&vu_u); return u3_none; } @@ -898,7 +936,7 @@ _qe_bytestream_fuse_extract(u3_noun sea, u3_noun rac) continue; } - u3_noun octs = _qe_peek_octs(ken_w, pos_w, p_octs_w, sea_y, len_w); + u3_noun octs = _qe_peek_octs(ken_w, pos_w, p_octs_w, (c3_y*)vu_u.byt_y, len_w); pos_w += ken_w; dal = u3nc(octs, dal); } @@ -909,6 +947,7 @@ _qe_bytestream_fuse_extract(u3_noun sea, u3_noun rac) new_sea = u3nc(u3i_word(pos_w), u3k(octs)); + u3r_view_done(&vu_u); return u3nc(data, new_sea); } @@ -988,22 +1027,23 @@ _qe_bytestream_need_bits(u3_atom n, u3_noun bits) c3_w p_octs_w; c3_w len_w, lead_w; - c3_y* sea_y; + u3r_view vu_u = {0}; - if (c3n == _x_octs_buffer(&p_octs, &q_octs, - &p_octs_w, &sea_y, - &len_w, &lead_w)) { + if (c3n == _x_octs_buffer(&vu_u, &p_octs, &q_octs, + &p_octs_w, &len_w, &lead_w)) { + u3r_view_done(&vu_u); return u3_none; } if (pos_w + need_bytes_w > p_octs_w) { + u3r_view_done(&vu_u); u3m_bail(c3__exit); } while (need_bytes_w--) { if (pos_w < len_w) { - bit_d += *(sea_y + pos_w) << num_w; + bit_d += *(vu_u.byt_y + pos_w) << num_w; } num_w += 8; pos_w++; @@ -1013,6 +1053,7 @@ _qe_bytestream_need_bits(u3_atom n, u3_noun bits) u3_noun new_bays = u3nc(u3i_word(pos_w), u3k(octs)); + u3r_view_done(&vu_u); return u3nt(u3i_word(num_w), u3i_chub(bit_d), new_bays); } // +$ bits $+ bits diff --git a/pkg/noun/jets/e/zlib.c b/pkg/noun/jets/e/zlib.c index 89937df390..5189dcbd70 100644 --- a/pkg/noun/jets/e/zlib.c +++ b/pkg/noun/jets/e/zlib.c @@ -38,7 +38,15 @@ _decompress(u3_atom pos, u3_noun octs, int window_bits) return u3_none; } - c3_w len_w = u3r_met(3, q_octs); + // zero-copy view on the compressed input (mmap for bobs; the + // legacy `vat_u->buf_w + pos_w` cast returned seq_w for bobs). + // the view stays live for the whole inflate loop so the stream + // reader can scan it freely; every return path must call + // u3r_view_done(&vu_u). + // + u3r_view vu_u; + u3r_view_init(&vu_u, q_octs); + c3_w len_w = vu_u.len_w; int leading_zeros = 0; @@ -52,18 +60,11 @@ _decompress(u3_atom pos, u3_noun octs, int window_bits) // Bytestream exhausted // if (pos_w >= len_w) { + u3r_view_done(&vu_u); return u3_none; } - c3_y* input; - - if (c3y == u3a_is_cat(q_octs)) { - input = (c3_y*)&q_octs + pos_w; - } - else { - u3a_atom* vat_u = u3a_to_ptr(q_octs); - input = (c3_y*)vat_u->buf_w + pos_w; - } + c3_y* input = (c3_y*)vu_u.byt_y + pos_w; int ret; z_stream strm; @@ -85,6 +86,7 @@ _decompress(u3_atom pos, u3_noun octs, int window_bits) if (ret != Z_OK) { u3l_log("%i", ret); u3l_log("%s", strm.msg); + u3r_view_done(&vu_u); return u3m_bail(c3__exit); } @@ -142,6 +144,7 @@ _decompress(u3_atom pos, u3_noun octs, int window_bits) u3l_log("%s", strm.msg); inflateEnd(&strm); u3i_slab_free(&sab_u); + u3r_view_done(&vu_u); return u3m_bail(c3__exit); } } @@ -150,6 +153,7 @@ _decompress(u3_atom pos, u3_noun octs, int window_bits) u3l_log("%s", strm.msg); inflateEnd(&strm); u3i_slab_free(&sab_u); + u3r_view_done(&vu_u); return u3m_bail(c3__exit); } } @@ -159,6 +163,7 @@ _decompress(u3_atom pos, u3_noun octs, int window_bits) u3l_log("%s", strm.msg); inflateEnd(&strm); u3i_slab_free(&sab_u); + u3r_view_done(&vu_u); return u3m_bail(c3__exit); } ret = inflateEnd(&strm); @@ -167,6 +172,7 @@ _decompress(u3_atom pos, u3_noun octs, int window_bits) u3l_log("%i", ret); u3l_log("%s", strm.msg); u3i_slab_free(&sab_u); + u3r_view_done(&vu_u); return u3m_bail(c3__exit); } @@ -174,6 +180,7 @@ _decompress(u3_atom pos, u3_noun octs, int window_bits) u3_noun new_pos = pos_w + strm.total_in; u3_noun new_stream = u3nc(u3i_word(new_pos), u3k(octs)); + u3r_view_done(&vu_u); return u3nc(decompressed_octs, new_stream); } diff --git a/pkg/noun/manage.c b/pkg/noun/manage.c index fb979dbf5b..232ee9f25d 100644 --- a/pkg/noun/manage.c +++ b/pkg/noun/manage.c @@ -527,9 +527,8 @@ _pave_parts(void) // initialize blob bank HAMTs // u3H->ban_u.blb_p = u3h_new(); - u3H->ban_u.res_p = u3h_new(); + u3H->ban_u.bob_p = u3h_new(); u3H->ban_u.rev_p = u3h_new(); - u3H->ban_u.nxt_d = 0; } static c3_d @@ -677,8 +676,8 @@ _find_home(void) if ( !u3H->ban_u.blb_p ) { u3H->ban_u.blb_p = u3h_new(); } - if ( !u3H->ban_u.res_p ) { - u3H->ban_u.res_p = u3h_new(); + if ( !u3H->ban_u.bob_p ) { + u3H->ban_u.bob_p = u3h_new(); } if ( !u3H->ban_u.rev_p ) { u3H->ban_u.rev_p = u3h_new(); diff --git a/pkg/noun/options.h b/pkg/noun/options.h index 298a02d674..5305a52f2a 100644 --- a/pkg/noun/options.h +++ b/pkg/noun/options.h @@ -23,6 +23,7 @@ void (*slog_f)(u3_noun); // function pointer for slog void (*sign_hold_f)(void); // suspend system signal regime void (*sign_move_f)(void); // restore system signal regime + void (*blob_delete_f)(c3_h, c3_w); // delete blob file by (mug, seq) } u3o_config; /* u3o_flag: process/system flags. diff --git a/pkg/noun/retrieve.c b/pkg/noun/retrieve.c index 403a3ce5df..c0d9fc8c6e 100644 --- a/pkg/noun/retrieve.c +++ b/pkg/noun/retrieve.c @@ -1094,7 +1094,24 @@ u3r_byte(c3_w a_w, } else return (255 & (b >> (a_w << 3))); } - else { + + // bob atom: mmap the backing file and read one byte. mmap/munmap + // is per-call overhead — callers that scan many bytes should open + // a u3r_view themselves and index into vu.byt_y directly, rather + // than calling u3r_byte in a loop. + // + if ( c3y == u3a_is_bob(b) ) { + c3_d map_d = 0; + const c3_y* map_y = u3r_blob_map(b, &map_d); + if ( !map_y ) { + return 0; + } + c3_y res = ((c3_d)a_w < map_d) ? map_y[a_w] : 0; + u3r_blob_unmap(map_y, map_d); + return res; + } + + { u3a_atom* b_u = u3a_to_ptr(b); c3_y vut_y = (a_w & (u3a_word_bytes - 1)); c3_w pix_w = (a_w >> u3a_word_bytes_shift); @@ -2425,11 +2442,55 @@ _comp_words(c3_w a_w, c3_w b_w) c3_ys u3r_comp(u3_atom a, u3_atom b) { + if ( a == b ) return 0; + + c3_o a_bob = ( c3y == u3a_is_cat(a) ) ? c3n : u3a_is_bob(a); + c3_o b_bob = ( c3y == u3a_is_cat(b) ) ? c3n : u3a_is_bob(b); + + // bob vs bob with matching (mug, seq): blob store is content-addressed + // and deduplicates within a bucket, so same id means byte-equal content. + // + if ( (c3y == a_bob) && (c3y == b_bob) ) { + if ( (u3a_bob_mug(a) == u3a_bob_mug(b)) && + (u3a_bob_seq(a) == u3a_bob_seq(b)) ) + { + return 0; + } + } + + // any comparison touching a bob goes through u3r_view (mmap for + // bobs, heap-alloc fallback for normal atoms) and compares by + // significant-byte length then MSB-first byte sequence. The + // non-bob paths below use the faster word-at-a-time compare. + // + if ( (c3y == a_bob) || (c3y == b_bob) ) { + u3r_view va_u, vb_u; + u3r_view_init(&va_u, a); + u3r_view_init(&vb_u, b); + + c3_ys res; + if ( va_u.len_w != vb_u.len_w ) { + res = _comp_words(va_u.len_w, vb_u.len_w); + } + else { + res = 0; + for ( c3_w i_w = va_u.len_w; i_w--; ) { + if ( va_u.byt_y[i_w] != vb_u.byt_y[i_w] ) { + res = (c3_ys)(va_u.byt_y[i_w] > vb_u.byt_y[i_w]) + - (c3_ys)(va_u.byt_y[i_w] < vb_u.byt_y[i_w]); + break; + } + } + } + + u3r_view_done(&va_u); + u3r_view_done(&vb_u); + return res; + } + if (c3y == u3a_is_cat(a) || c3y == u3a_is_cat(b)) { return _comp_words(a, b); } - - if ( a == b ) return 0; u3a_atom* a_u = u3a_to_ptr(a); u3a_atom* b_u = u3a_to_ptr(b); diff --git a/pkg/noun/serial.c b/pkg/noun/serial.c index 5f68335d1f..761c5ab49e 100644 --- a/pkg/noun/serial.c +++ b/pkg/noun/serial.c @@ -954,21 +954,30 @@ u3s_cue_bytes(c3_d len_d, const c3_y* byt_y) u3_noun u3s_cue_atom(u3_atom a) { - c3_w len_w = u3r_met(3, a); - c3_y* byt_y; - // XX assumes little-endian // if ( c3y == u3a_is_cat(a) ) { - byt_y = (c3_y*)&a; - } - else { - u3_assert(c3n == u3a_is_bob(a)); - u3a_atom* vat_u = u3a_to_ptr(a); - byt_y = (c3_y*)vat_u->buf_w; - } + c3_w len_w = u3r_met(3, a); + return u3s_cue_bytes((c3_d)len_w, (c3_y*)&a); + } + + // bob atom: mmap the backing file instead of dereferencing buf_w + // (which for a bob would yield seq_w). The view stays live for + // the whole cue so the bitstream reader can scan freely. + // + if ( c3y == u3a_is_bob(a) ) { + u3r_view vu_u; + u3r_view_init(&vu_u, a); + u3_noun res = u3s_cue_bytes((c3_d)vu_u.len_w, (c3_y*)vu_u.byt_y); + u3r_view_done(&vu_u); + return res; + } - return u3s_cue_bytes((c3_d)len_w, byt_y); + { + c3_w len_w = u3r_met(3, a); + u3a_atom* vat_u = u3a_to_ptr(a); + return u3s_cue_bytes((c3_d)len_w, (c3_y*)vat_u->buf_w); + } } /* _cs_etch_ud_size(): output length in @ud for given mpz_t. diff --git a/pkg/noun/vortex.h b/pkg/noun/vortex.h index 5c928166b9..095595fd3d 100644 --- a/pkg/noun/vortex.h +++ b/pkg/noun/vortex.h @@ -21,16 +21,15 @@ /* u3v_lease: active staging reservation in the blob store. ** ** Created when Mars installs a blob (receives %blob-install). - ** Holds one u3a_blob.use_w ref until the owning event is committed - ** to the event log (at which point the ref becomes an event-log ref), - ** or until the lease expires (TTL). + ** Holds a pending ref until the owning event is committed to the + ** event log (at which point the ref becomes an event-log ref in + ** blb_p), or until the lease expires (TTL). ** - ** dead_o: c3y after the lease has been committed (or otherwise invalidated). - ** Set by _mars_fact via the reverse index; the lease pointer may remain in - ** the expiry priority queue until it bubbles to the top and is freed. + ** dead_o: c3y after the lease has been committed (or otherwise + ** invalidated). Set by _mars_fact via rev_p; the struct may + ** remain in the expiry PQ until it bubbles to the top. */ typedef struct _u3v_lease { - c3_d res_d; // reservation id (monotonic counter) c3_d exp_d; // expiry time (Unix ms); 0 = no expiry c3_h mug_h; // blob mug c3_w seq_w; // blob seq within mug bucket @@ -41,16 +40,18 @@ /* u3v_bank: loom-resident blob bank. ** ** Lives in u3v_home, checkpointed in image.bin. - ** blb_p: HAMT mapping blob_id (u64 = mug<<32|seq) -> u3a_blob loom offset - ** res_p: HAMT mapping res_id (u64) -> u3v_lease loom offset - ** rev_p: HAMT mapping blob_id (u64 = mug<<32|seq) -> res_d - ** nxt_d: monotonic reservation counter + ** + ** blb_p: HAMT bid -> count (event-log refcount per blob) + ** bob_p: HAMT bid -> offset (interned bob atom loom offsets) + ** rev_p: HAMT bid -> ptr (active u3v_lease pointers) + ** + ** A blob file is deleted when all three are empty for that bid: + ** blb_p count == 0, bob_p absent, rev_p absent. */ typedef struct _u3v_bank { - u3p(u3h_root) blb_p; // blob_id -> u3a_blob* - u3p(u3h_root) res_p; // res_id -> u3v_lease* - u3p(u3h_root) rev_p; // blob_id -> res_d - c3_d nxt_d; // next reservation id + u3p(u3h_root) blb_p; // blob_id -> log refcount + u3p(u3h_root) bob_p; // blob_id -> bob atom loom offset + u3p(u3h_root) rev_p; // blob_id -> u3v_lease* (active leases) } u3v_bank; /* u3v_home: all internal (within image) state. diff --git a/pkg/vere/disk.c b/pkg/vere/disk.c index a9ba9cbba8..72706531b4 100644 --- a/pkg/vere/disk.c +++ b/pkg/vere/disk.c @@ -1229,7 +1229,8 @@ _disk_epoc_kill(u3_disk* log_u, c3_d epo_d) c3_c epo_c[8193]; snprintf(epo_c, sizeof(epo_c), "%s/0i%" PRIc3_d, log_u->com_u->pax_c, epo_d); - // process blobs.txt: decrement event-log refcounts; delete files at zero + // process blobs.txt: decrement event-log refcounts. + // after decrementing, check the full delete condition (log + noun + lease). // { c3_c blt_c[8193]; @@ -1254,12 +1255,16 @@ _disk_epoc_kill(u3_disk* log_u, c3_d epo_d) u3h_put(u3H->ban_u.blb_p, bk, u3i_word(ref_w - 1)); } else { - // last ref — delete blob file - u3_blob_delete(log_u->dir_u->pax_c, mug_h, seq_w); u3h_del(u3H->ban_u.blb_p, bk); - fprintf(stderr, "disk: gc: deleted blob %" PRIc3_h - "/%" PRIc3_w " (epoch 0i%" PRIc3_d ")\r\n", - mug_h, seq_w, epo_d); + + // check all three ref sources before deleting + // + c3_o has_bob = __(u3_none != u3h_get(u3H->ban_u.bob_p, bk)); + c3_o has_lea = __(u3_none != u3h_get(u3H->ban_u.rev_p, bk)); + + if ( c3n == has_bob && c3n == has_lea ) { + u3_blob_delete(log_u->dir_u->pax_c, mug_h, seq_w); + } } u3z(bk); } diff --git a/pkg/vere/io/mesa.c b/pkg/vere/io/mesa.c index 54c2763b84..3a9efea348 100644 --- a/pkg/vere/io/mesa.c +++ b/pkg/vere/io/mesa.c @@ -1385,10 +1385,21 @@ _mesa_hear(u3_mesa* sam_u, static void _mesa_ef_send(u3_mesa* sam_u, u3_noun las, u3_noun pac) { - c3_w len_w = u3r_met(3, pac); + // zero-copy read from [pac] (mmap if it's a bob) into the arena + // buffer. the arena is the long-lived owner — we still copy bytes + // into it because it gets stashed in u3_mesa_resend_data->buf_y for + // the resend timer. using u3r_view skips the full-blob loom alloc + // that u3r_bytes → u3r_blob_load would have caused. + // + u3r_view vu_u; + u3r_view_init(&vu_u, pac); + c3_w len_w = vu_u.len_w; arena are_u = arena_create(len_w + 16384); c3_y* buf_y = new(&are_u, c3_y, len_w); - u3r_bytes(0, len_w, buf_y, pac); + if ( len_w ) { + memcpy(buf_y, vu_u.byt_y, len_w); + } + u3r_view_done(&vu_u); u3_mesa_pact pac_u; memset(&pac_u, 0x11, sizeof(pac_u)); @@ -1799,9 +1810,18 @@ _mesa_page_scry_jumbo_cb(void* vod_p, u3_noun res) u3_mesa_line* lin_u; { - c3_w jumbo_w = u3r_met(3, pac); + // zero-copy read of the jumbo frame bytes (mmap if [pac] is a bob). + // we still copy into a c3_calloc'd buffer because mesa_sift_pact_from_buf + // expects a stable, mutable buffer that outlives the view. + // + u3r_view vu_u; + u3r_view_init(&vu_u, pac); + c3_w jumbo_w = vu_u.len_w; c3_y* jumbo_y = c3_calloc(jumbo_w); - u3r_bytes(0, jumbo_w, jumbo_y, pac); + if ( jumbo_w ) { + memcpy(jumbo_y, vu_u.byt_y, jumbo_w); + } + u3r_view_done(&vu_u); u3_mesa_pact jum_u; c3_c* err_c = mesa_sift_pact_from_buf(&jum_u, jumbo_y, jumbo_w); diff --git a/pkg/vere/mars.c b/pkg/vere/mars.c index abe79d99f1..8f4bf4fb24 100644 --- a/pkg/vere/mars.c +++ b/pkg/vere/mars.c @@ -95,6 +95,41 @@ _mars_pq_pop(_mars_lease_pq* pq_u) return r_u; } +/* _mars_blob_delete(): callback for pkg/noun to delete blob files. +*/ +static void +_mars_blob_delete(c3_h mug_h, c3_w seq_w) +{ + u3_blob_delete(u3C.dir_c, mug_h, seq_w); +} + +/* _blob_maybe_delete(): delete blob file iff fully unreferenced. +** +** Checks all three ref sources: bob_p (live noun), blb_p (event log), +** rev_p (active lease). Only unlinks the file when all are absent/zero. +*/ +static void +_blob_maybe_delete(c3_h mug_h, c3_w seq_w) +{ + c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; + u3_noun bid = u3i_chub(bid_d); + + c3_w log_w = 0; + u3_weak lv = u3h_get(u3H->ban_u.blb_p, bid); + if ( u3_none != lv ) { + u3r_safe_word(lv, &log_w); + } + + c3_o has_bob = __(u3_none != u3h_get(u3H->ban_u.bob_p, bid)); + c3_o has_lea = __(u3_none != u3h_get(u3H->ban_u.rev_p, bid)); + + u3z(bid); + + if ( 0 == log_w && c3n == has_bob && c3n == has_lea ) { + u3_blob_delete(u3C.dir_c, mug_h, seq_w); + } +} + /* :: peek=[gang (each path $%([%once @tas @tas path] [%beam @tas beam]))] :: ovum=ovum @@ -383,31 +418,20 @@ _mars_fact(u3_mars* mar_u, fprintf(blt_f, "%" PRIc3_h " %" PRIc3_w "\n", mug_h, seq_w); } - // mark the lease (if any) for this blob as dead via reverse index. - // The lease struct is freed later by the expiry sweeper when it - // bubbles to the top of the PQ. res_p and rev_p entries are - // removed here. + // mark the lease (if any) as dead via rev_p. the lease struct + // is freed later by the expiry sweeper when it bubbles to the + // top of the PQ. rev_p entry removed now. // { u3_noun revkey = u3i_chub(acc.ids[i_z]); u3_weak rv = u3h_get(u3H->ban_u.rev_p, revkey); if ( u3_none != rv ) { - c3_d res_d = 0; - u3r_safe_chub(rv, &res_d); - - u3_noun rkey = u3i_chub(res_d); - u3_weak lv = u3h_get(u3H->ban_u.res_p, rkey); - if ( u3_none != lv ) { - c3_d ptr_d = 0; - u3r_safe_chub(lv, &ptr_d); - u3v_lease* lea_u = (u3v_lease*)(uintptr_t)ptr_d; - if ( lea_u ) { - lea_u->dead_o = c3y; - } - u3h_del(u3H->ban_u.res_p, rkey); + c3_d ptr_d = 0; + u3r_safe_chub(rv, &ptr_d); + u3v_lease* lea_u = (u3v_lease*)(uintptr_t)ptr_d; + if ( lea_u ) { + lea_u->dead_o = c3y; } - u3z(rkey); - u3h_del(u3H->ban_u.rev_p, revkey); } u3z(revkey); @@ -779,39 +803,19 @@ _mars_work(u3_mars* mar_u, u3_noun jar) break; } - // expired lease — check if blob has any event-log refs + // expired lease — remove from rev_p, then check full delete condition // _mars_pq_pop(&_mars_pq); - c3_d bid_d = ((c3_d)top_u->mug_h << 32) | (c3_d)top_u->seq_w; - u3_noun bk = u3i_chub(bid_d); - u3_weak bv = u3h_get(u3H->ban_u.blb_p, bk); - c3_w ref_w = 0; - if ( u3_none != bv ) { - u3r_safe_word(bv, &ref_w); - } - u3z(bk); - - if ( 0 == ref_w ) { - // no event-log refs — blob was never committed; delete it - // - u3_blob_delete(u3C.dir_c, top_u->mug_h, top_u->seq_w); - fprintf(stderr, "mars: blob: expired lease, deleted %" PRIc3_h - "/%" PRIc3_w "\r\n", top_u->mug_h, top_u->seq_w); - } - - // remove from res_p and rev_p - // { - u3_noun rkey = u3i_chub(top_u->res_d); - u3h_del(u3H->ban_u.res_p, rkey); - u3z(rkey); - + c3_d bid_d = ((c3_d)top_u->mug_h << 32) | (c3_d)top_u->seq_w; u3_noun revkey = u3i_chub(bid_d); u3h_del(u3H->ban_u.rev_p, revkey); u3z(revkey); } + _blob_maybe_delete(top_u->mug_h, top_u->seq_w); + c3_free(top_u); } } @@ -993,17 +997,13 @@ _mars_work(u3_mars* mar_u, u3_noun jar) ok_o = u3_blob_install_stg(u3C.dir_c, stg_c, &mug_h, &seq_w); if ( c3y == ok_o ) { - // create lease: holds a pending ref until the blob is committed - // to the event log (in _mars_fact) or the lease expires. - // blb_p tracks committed event-log refs only — not set here. + // create lease: pending ref until blob is committed to the + // event log (in _mars_fact) or the lease expires. // - c3_d res_d = u3H->ban_u.nxt_d++; u3v_lease* lea_u = c3_malloc(sizeof(*lea_u)); - lea_u->res_d = res_d; lea_u->mug_h = mug_h; lea_u->seq_w = seq_w; lea_u->dead_o = c3n; - // TTL: 5 minutes from now (Unix ms) { struct timeval tv_u; gettimeofday(&tv_u, 0); @@ -1013,16 +1013,10 @@ _mars_work(u3_mars* mar_u, u3_noun jar) } snprintf(lea_u->stg_c, sizeof(lea_u->stg_c), "%s", stg_c); - // record: res_d -> lease ptr (for commit-time dead-mark and sweep) - // - u3_noun rkey = u3i_chub(res_d); - u3h_put(u3H->ban_u.res_p, rkey, u3i_chub((c3_d)(uintptr_t)lea_u)); - u3z(rkey); - - // record: blob_id -> res_d (reverse index for O(1) commit lookup) + // record: bid -> lease ptr (for commit-time dead-mark) // u3_noun revkey = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_w); - u3h_put(u3H->ban_u.rev_p, revkey, u3i_chub(res_d)); + u3h_put(u3H->ban_u.rev_p, revkey, u3i_chub((c3_d)(uintptr_t)lea_u)); u3z(revkey); // push onto expiry PQ (ownership: PQ frees on pop) @@ -1809,6 +1803,10 @@ u3_mars_work(u3_mars* mar_u) u3C.sign_hold_f = _mars_sign_hold; u3C.sign_move_f = _mars_sign_move; + // wire up blob delete callback (pkg/noun can't link pkg/vere) + // + u3C.blob_delete_f = _mars_blob_delete; + // XX do something better // if ( mar_u->log_u->dun_d > mar_u->dun_d ) { From 9a7b40966ef956241bf47c65250954014cba26df Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Thu, 23 Apr 2026 10:34:12 -0500 Subject: [PATCH 16/31] noun: adds `jam_shax` jet --- pkg/noun/jets/135/tree.c | 3 + pkg/noun/jets/e/jam_shax.c | 295 +++++++++++++++++++++++++++++++++++++ 2 files changed, 298 insertions(+) create mode 100644 pkg/noun/jets/e/jam_shax.c diff --git a/pkg/noun/jets/135/tree.c b/pkg/noun/jets/135/tree.c index 951a7fd9b2..8d70674713 100644 --- a/pkg/noun/jets/135/tree.c +++ b/pkg/noun/jets/135/tree.c @@ -634,6 +634,8 @@ static u3j_core _135_tri__sha_d[] = {} }; +static u3j_harm _135_tri_jam_shax_a[] = {{".2", u3we_jam_shax}, {}}; + static u3j_harm _135_tri_shax_a[] = {{".2", u3we_shax}, {}}; static u3j_harm _135_tri_shay_a[] = {{".2", u3we_shay}, {}}; static u3j_harm _135_tri_shas_a[] = {{".2", u3we_shas}, {}}; @@ -1107,6 +1109,7 @@ static u3j_core _135_tri_d[] = { "shay", 7, _135_tri_shay_a, 0, no_hashes }, { "shas", 7, _135_tri_shas_a, 0, no_hashes }, { "shal", 7, _135_tri_shal_a, 0, no_hashes }, + { "jam-shax", 7, _135_tri_jam_shax_a, 0, no_hashes }, { "ob", 3, 0, _135_ob_d, no_hashes, _135_ob_ho }, {} diff --git a/pkg/noun/jets/e/jam_shax.c b/pkg/noun/jets/e/jam_shax.c new file mode 100644 index 0000000000..40007a5a33 --- /dev/null +++ b/pkg/noun/jets/e/jam_shax.c @@ -0,0 +1,295 @@ +/// @file +/// Streaming jam-then-SHA-256: computes shax(jam(a)) without +/// materializing the intermediate jam output. Feeds jam bits +/// directly into an incremental SHA-256 context. + +#include "jets/q.h" +#include "jets/w.h" + +#include "noun.h" + +#include +#include + +typedef struct libscrypt_SHA256Context { + uint32_t state[8]; + uint32_t count[2]; + unsigned char buf[64]; +} _js_SHA256_CTX; + +extern void libscrypt_SHA256_Init(_js_SHA256_CTX *); +extern void libscrypt_SHA256_Update(_js_SHA256_CTX *, const void *, size_t); +extern void libscrypt_SHA256_Final(unsigned char [], _js_SHA256_CTX *); + +// ---- streaming bit-writer → SHA-256 --------------------------------- + +#define _JS_BUFSZ 8192 + +typedef struct { + _js_SHA256_CTX sha_u; + c3_y buf_y[_JS_BUFSZ]; + c3_w buf_w; + c3_y par_y; // partial byte accumulator + c3_g off_g; // bits used in par_y (0–7) + c3_d bit_d; // total bits written + u3p(u3h_root) har_p; // dedup HAMT: noun → first-occurrence bit pos +} _jam_shax; + +static void +_js_init(_jam_shax* ctx) +{ + libscrypt_SHA256_Init(&ctx->sha_u); + ctx->buf_w = 0; + ctx->par_y = 0; + ctx->off_g = 0; + ctx->bit_d = 0; + ctx->har_p = u3h_new(); +} + +static inline void +_js_push_byte(_jam_shax* ctx, c3_y byt_y) +{ + ctx->buf_y[ctx->buf_w++] = byt_y; + if ( _JS_BUFSZ == ctx->buf_w ) { + libscrypt_SHA256_Update(&ctx->sha_u, ctx->buf_y, _JS_BUFSZ); + ctx->buf_w = 0; + } +} + +// write [wid_g] bits of [val_d] (LSB-first) into the stream. +// wid_g must be <= 64. +// +static void +_js_bits(_jam_shax* ctx, c3_d val_d, c3_g wid_g) +{ + for ( c3_g i_g = 0; i_g < wid_g; i_g++ ) { + if ( val_d & ((c3_d)1 << i_g) ) { + ctx->par_y |= ((c3_y)1 << ctx->off_g); + } + ctx->off_g++; + ctx->bit_d++; + if ( 8 == ctx->off_g ) { + _js_push_byte(ctx, ctx->par_y); + ctx->par_y = 0; + ctx->off_g = 0; + } + } +} + +// write [len_d] source bytes, bit-shifted by ctx->off_g, into the stream. +// +static void +_js_bytes(_jam_shax* ctx, const c3_y* src_y, c3_d len_d) +{ + c3_g off_g = ctx->off_g; + + if ( 0 == off_g ) { + // fast path: byte-aligned + // + for ( c3_d i_d = 0; i_d < len_d; i_d++ ) { + _js_push_byte(ctx, src_y[i_d]); + } + ctx->bit_d += len_d * 8; + } + else { + // slow path: bit-shifted + // + c3_g rsh_g = 8 - off_g; + for ( c3_d i_d = 0; i_d < len_d; i_d++ ) { + c3_y src = src_y[i_d]; + _js_push_byte(ctx, ctx->par_y | (src << off_g)); + ctx->par_y = src >> rsh_g; + ctx->bit_d += 8; + } + } +} + +static void +_js_done(_jam_shax* ctx, c3_y out_y[32]) +{ + // flush partial byte (zero-padded high bits) + // + if ( ctx->off_g > 0 ) { + _js_push_byte(ctx, ctx->par_y); + } + // flush remaining buffer to SHA-256 + // + if ( ctx->buf_w > 0 ) { + libscrypt_SHA256_Update(&ctx->sha_u, ctx->buf_y, ctx->buf_w); + } + libscrypt_SHA256_Final(out_y, &ctx->sha_u); + u3h_free(ctx->har_p); +} + +// ---- mat encoding --------------------------------------------------- + +// write mat-encoded value [val_w] (the bit-length of an atom). +// +static void +_js_mat_w(_jam_shax* ctx, c3_w val_w) +{ + if ( 0 == val_w ) { + _js_bits(ctx, 1, 1); + return; + } + c3_g b_g = (c3_g)c3_bits_word(val_w); + // b_g zero bits + one 1 bit + // + _js_bits(ctx, (c3_d)1 << b_g, b_g + 1); + // low (b_g - 1) bits of val_w + // + if ( b_g > 1 ) { + _js_bits(ctx, (c3_d)val_w, b_g - 1); + } +} + +// ---- atom encoding -------------------------------------------------- + +// compute bit-length of an atom from mmap'd bytes (for bob atoms). +// +static c3_w +_js_bob_met(const c3_y* byt_y, c3_d len_d) +{ + c3_d pos_d = len_d; + while ( pos_d > 0 && 0 == byt_y[pos_d - 1] ) { + pos_d--; + } + if ( 0 == pos_d ) return 0; + c3_y top_y = byt_y[pos_d - 1]; + c3_y clz_y = (c3_y)(__builtin_clz((unsigned int)top_y) - 24); + return (c3_w)((pos_d - 1) * 8 + (8 - clz_y)); +} + +// encode a single atom: tag 0 + mat(bit_len) + data bits. +// +static void +_js_encode_atom(_jam_shax* ctx, u3_atom a) +{ + // atom tag + // + _js_bits(ctx, 0, 1); + + if ( 0 == a ) { + _js_mat_w(ctx, 0); + return; + } + + u3r_view vu_u = {0}; + c3_w bit_w; + + if ( _(u3a_is_cat(a)) ) { + bit_w = (c3_g)c3_bits_word(a); + } + else if ( c3y == u3a_is_bob(a) ) { + u3r_view_init(&vu_u, a); + bit_w = _js_bob_met(vu_u.byt_y, vu_u.len_w); + if ( 0 == bit_w ) { + u3r_view_done(&vu_u); + _js_mat_w(ctx, 0); + return; + } + } + else { + bit_w = u3r_met(0, a); + u3r_view_init(&vu_u, a); + } + + // mat header: encodes the bit-length + // + _js_mat_w(ctx, bit_w); + + // atom data bits + // + if ( _(u3a_is_cat(a)) ) { + _js_bits(ctx, (c3_d)a, (c3_g)bit_w); + } + else { + c3_w full_w = bit_w >> 3; + c3_g rem_g = bit_w & 7; + _js_bytes(ctx, vu_u.byt_y, full_w); + if ( rem_g > 0 ) { + _js_bits(ctx, vu_u.byt_y[full_w], rem_g); + } + u3r_view_done(&vu_u); + } +} + +// ---- noun encoding (with dedup) ------------------------------------ + +static void _js_encode(_jam_shax* ctx, u3_noun a); + +static void +_js_encode(_jam_shax* ctx, u3_noun a) +{ + u3_weak got = u3h_git(ctx->har_p, a); + + if ( _(u3a_is_atom(a)) ) { + if ( u3_none == got ) { + // first occurrence: record position, encode atom + // + u3h_put(ctx->har_p, a, u3i_chub(ctx->bit_d)); + _js_encode_atom(ctx, a); + } + else { + // seen before: compare atom encoding cost vs backref cost + // + c3_w a_w = _(u3a_is_cat(a)) ? c3_bits_word(a) : u3r_met(0, a); + c3_w a_cost = 1 + (0 == a ? 1 : 2 * c3_bits_word(a_w) + a_w); + + c3_d pos_d = 0; + u3r_safe_chub(got, &pos_d); + c3_w p_w = (0 == pos_d) ? 0 : c3_bits_word((c3_w)pos_d); + c3_w b_cost = 2 + (0 == pos_d ? 1 : 2 * p_w); + + if ( a_cost <= b_cost ) { + _js_encode_atom(ctx, a); + } + else { + _js_bits(ctx, 3, 2); // backref tag 11 + _js_mat_w(ctx, (c3_w)pos_d); + } + } + } + else { + if ( u3_none != got ) { + // cell seen before: always use backref + // + c3_d pos_d = 0; + u3r_safe_chub(got, &pos_d); + _js_bits(ctx, 3, 2); + _js_mat_w(ctx, (c3_w)pos_d); + } + else { + // first occurrence: record position, encode cell + recurse + // + u3h_put(ctx->har_p, a, u3i_chub(ctx->bit_d)); + _js_bits(ctx, 1, 2); // cell tag 01 + _js_encode(ctx, u3h(a)); + _js_encode(ctx, u3t(a)); + } + } +} + +// ---- public API ---------------------------------------------------- + +u3_atom +u3qe_jam_shax(u3_noun a) +{ + fprintf(stderr, "jam-shax jet: firing (bob=%c)\r\n", + (!_(u3a_is_cat(a)) && _(u3a_is_cell(a)) && !_(u3a_is_cat(u3t(a))) && c3y == u3a_is_bob(u3t(a))) ? 'y' : 'n'); + + _jam_shax ctx; + _js_init(&ctx); + _js_encode(&ctx, a); + + c3_y hash_y[32]; + _js_done(&ctx, hash_y); + return u3i_bytes(32, hash_y); +} + +u3_noun +u3we_jam_shax(u3_noun cor) +{ + u3_noun a = u3x_at(u3x_sam, cor); + return u3qe_jam_shax(a); +} From 4b3b6d2c311aef92164972e5c3e4c50e26b50160 Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Thu, 23 Apr 2026 10:43:40 -0500 Subject: [PATCH 17/31] wip: blob refcounting redesign 1 --- pkg/noun/allocate.c | 42 ++++-- pkg/noun/allocate.h | 18 ++- pkg/noun/build.zig | 1 + pkg/noun/imprison.c | 32 ++++- pkg/noun/jets/e/jam_shax.c | 3 - pkg/noun/jets/w.h | 3 + pkg/noun/manage.c | 4 - pkg/noun/options.h | 2 +- pkg/noun/vortex.h | 35 ++--- pkg/vere/disk.c | 108 ++------------ pkg/vere/io/http.c | 37 ++--- pkg/vere/king.c | 14 ++ pkg/vere/lord.c | 20 +++ pkg/vere/mars.c | 283 ++++++++++++++++++++++++------------- pkg/vere/vere.h | 10 ++ 15 files changed, 345 insertions(+), 267 deletions(-) diff --git a/pkg/noun/allocate.c b/pkg/noun/allocate.c index 7748fc349e..89eeb5fb74 100644 --- a/pkg/noun/allocate.c +++ b/pkg/noun/allocate.c @@ -875,9 +875,10 @@ _me_gain_south(u3_noun dog) /* _me_bob_dead(): handle a bob atom whose loom refcount just hit zero. ** -** Removes the atom from the bob_p interning index. If the log -** refcount (blb_p) and lease map (rev_p) are also empty, deletes -** the backing blob file via the registered callback. +** Removes the atom from the bob_p interning index. Then checks +** the deletion condition: log_w == 0 && les_w == 0 && bob_p absent. +** Calls u3C.blob_del_f to either release the lease (king) or +** delete the blob file (mars). */ static void _me_bob_dead(u3a_atom* atm_u) @@ -887,20 +888,35 @@ _me_bob_dead(u3a_atom* atm_u) c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; u3_noun bid = u3i_chub(bid_d); + // remove from interning index (noun is about to be freed) + // u3h_del(u3H->ban_u.bob_p, bid); - c3_w log_w = 0; - u3_weak lv = u3h_get(u3H->ban_u.blb_p, bid); - if ( u3_none != lv ) { - u3r_safe_word(lv, &log_w); - } - - c3_o has_lea = __(u3_none != u3h_get(u3H->ban_u.rev_p, bid)); - + // check u3a_blob refcounts + // + u3_weak bv = u3h_get(u3H->ban_u.blb_p, bid); u3z(bid); - if ( 0 == log_w && c3n == has_lea && u3C.blob_delete_f ) { - u3C.blob_delete_f(mug_h, seq_w); + if ( u3C.blob_del_f ) { + if ( u3_none == bv ) { + // no u3a_blob entry → blob was never registered or already deleted. + // call del_f anyway (king needs to release the lease). + // + u3C.blob_del_f(mug_h, seq_w); + } + else { + // u3a_blob exists — extract and check refcounts. + // bob_p[bid] is already absent (just removed above). + // if log_w and les_w are both 0, delete the blob. + // + c3_w off_w = 0; + u3r_safe_word(bv, &off_w); + u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); + + if ( 0 == blb_u->log_w && 0 == blb_u->les_w ) { + u3C.blob_del_f(mug_h, seq_w); + } + } } } diff --git a/pkg/noun/allocate.h b/pkg/noun/allocate.h index c4e9308d11..1eb2e6ba0a 100644 --- a/pkg/noun/allocate.h +++ b/pkg/noun/allocate.h @@ -164,15 +164,21 @@ u3_noun tel; } u3a_cell; - /* u3a_blob: loom-resident metadata for a committed blob. + /* u3a_blob: loom-resident metadata for a blob file. ** - ** use_w: refcount from event-log refs + active leases. - ** Independent from u3a_atom.use_w (noun refcount). - ** A blob file is deleted when both use_w == 0 AND - ** no live bob atoms in the loom point to it. + ** Stored in ban_u.blb_p HAMT keyed by bid = (mug_h << 32) | seq_w. + ** Three independent ref-sources protect the backing file: + ** + ** log_w — event-log refs (inc on commit, dec on chop) + ** les_w — lease refs (inc on king acquire, dec on release/expiry) + ** bob_p — noun liveness (bob_p[bid] present ↔ live u3a_atom) + ** + ** The blob file is deleted when ALL are zero: + ** log_w == 0 && les_w == 0 && bob_p[bid] absent */ typedef struct __attribute__((aligned(4))) { - c3_w use_w; // refcount: event-log refs + active leases + c3_w log_w; // event-log refcount + c3_w les_w; // lease refcount c3_h mug_h; // 31-bit content mug (= bucket dir name) c3_w seq_w; // sequence number within bucket c3_d siz_d; // byte size of blob file diff --git a/pkg/noun/build.zig b/pkg/noun/build.zig index 9d76e0c458..c17fabaa4e 100644 --- a/pkg/noun/build.zig +++ b/pkg/noun/build.zig @@ -336,6 +336,7 @@ const c_source_files = [_][]const u8{ "jets/e/fynd_ob.c", "jets/e/hmac.c", "jets/e/jam.c", + "jets/e/jam_shax.c", "jets/e/json_de.c", "jets/e/json_en.c", "jets/e/keccak.c", diff --git a/pkg/noun/imprison.c b/pkg/noun/imprison.c index 9484504941..5d8d786b49 100644 --- a/pkg/noun/imprison.c +++ b/pkg/noun/imprison.c @@ -843,6 +843,8 @@ u3i_vmolt(u3_noun som, u3i_molt_pair pairs[], c3_z len_z) ** ** Interned: at most one bob atom exists per (mug, seq) pair. ** bob_p maps bid -> loom offset of the canonical atom. +** blb_p maps bid -> loom offset of the u3a_blob refcount struct. +** If no u3a_blob exists yet, one is allocated with {log_w=0, les_w=0}. */ u3_atom u3i_blob(c3_h mug_h, c3_w seq_w) @@ -861,7 +863,7 @@ u3i_blob(c3_h mug_h, c3_w seq_w) return u3k(bob); } - // allocate: u3a_atom header + 1 word for seq_w + // allocate bob atom: u3a_atom header + 1 word for seq_w // c3_w* nov_w = u3a_walloc(1 + c3_wiseof(u3a_atom)); u3a_atom* vat_u = (void *)nov_w; @@ -871,11 +873,29 @@ u3i_blob(c3_h mug_h, c3_w seq_w) vat_u->len_w = 1 | u3a_blob_flag; vat_u->buf_w[0] = seq_w; - // store loom offset (not a noun ref) in bob_p + // store atom loom offset in bob_p (interning index) // - c3_w off_w = u3a_outa(nov_w); - u3h_put(u3H->ban_u.bob_p, bid, u3i_word(off_w)); - u3z(bid); + c3_w atm_off_w = u3a_outa(nov_w); + u3h_put(u3H->ban_u.bob_p, bid, u3i_word(atm_off_w)); + + // ensure u3a_blob exists in blb_p + // + u3_weak bv = u3h_get(u3H->ban_u.blb_p, bid); + if ( u3_none == bv ) { + // allocate fresh u3a_blob with zero refcounts + // + c3_w* blb_w = u3a_walloc(c3_wiseof(u3a_blob)); + u3a_blob* blb_u = (u3a_blob*)blb_w; + blb_u->log_w = 0; + blb_u->les_w = 0; + blb_u->mug_h = mug_h; + blb_u->seq_w = seq_w; + blb_u->siz_d = 0; // filled later by blob_save / blob_install - return u3a_to_pug(off_w); + c3_w blb_off_w = u3a_outa(blb_w); + u3h_put(u3H->ban_u.blb_p, bid, u3i_word(blb_off_w)); + } + + u3z(bid); + return u3a_to_pug(atm_off_w); } diff --git a/pkg/noun/jets/e/jam_shax.c b/pkg/noun/jets/e/jam_shax.c index 40007a5a33..2cf0b56649 100644 --- a/pkg/noun/jets/e/jam_shax.c +++ b/pkg/noun/jets/e/jam_shax.c @@ -275,9 +275,6 @@ _js_encode(_jam_shax* ctx, u3_noun a) u3_atom u3qe_jam_shax(u3_noun a) { - fprintf(stderr, "jam-shax jet: firing (bob=%c)\r\n", - (!_(u3a_is_cat(a)) && _(u3a_is_cell(a)) && !_(u3a_is_cat(u3t(a))) && c3y == u3a_is_bob(u3t(a))) ? 'y' : 'n'); - _jam_shax ctx; _js_init(&ctx); _js_encode(&ctx, a); diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index 57d9677e7d..4103f9e2a1 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -190,6 +190,9 @@ u3_noun u3we_shal(u3_noun); u3_noun u3we_sha1(u3_noun); + u3_noun u3we_jam_shax(u3_noun); + u3_noun u3qe_jam_shax(u3_noun); + u3_noun u3we_fein_ob(u3_noun); u3_noun u3we_fynd_ob(u3_noun); diff --git a/pkg/noun/manage.c b/pkg/noun/manage.c index 232ee9f25d..1637cd2c72 100644 --- a/pkg/noun/manage.c +++ b/pkg/noun/manage.c @@ -528,7 +528,6 @@ _pave_parts(void) // u3H->ban_u.blb_p = u3h_new(); u3H->ban_u.bob_p = u3h_new(); - u3H->ban_u.rev_p = u3h_new(); } static c3_d @@ -679,9 +678,6 @@ _find_home(void) if ( !u3H->ban_u.bob_p ) { u3H->ban_u.bob_p = u3h_new(); } - if ( !u3H->ban_u.rev_p ) { - u3H->ban_u.rev_p = u3h_new(); - } if ( !u3R->lop_p ) u3R->lop_p = u3h_new(); if ( !u3R->cax.for_p ) u3R->cax.for_p = u3h_new_cache(u3C.per_w); } diff --git a/pkg/noun/options.h b/pkg/noun/options.h index 5305a52f2a..ea34d901e0 100644 --- a/pkg/noun/options.h +++ b/pkg/noun/options.h @@ -23,7 +23,7 @@ void (*slog_f)(u3_noun); // function pointer for slog void (*sign_hold_f)(void); // suspend system signal regime void (*sign_move_f)(void); // restore system signal regime - void (*blob_delete_f)(c3_h, c3_w); // delete blob file by (mug, seq) + void (*blob_del_f)(c3_h, c3_w); // blob uninstall: king=release lease, mars=delete file } u3o_config; /* u3o_flag: process/system flags. diff --git a/pkg/noun/vortex.h b/pkg/noun/vortex.h index 095595fd3d..cde55fd058 100644 --- a/pkg/noun/vortex.h +++ b/pkg/noun/vortex.h @@ -18,40 +18,35 @@ u3_noun yot; // cached gates } u3v_arvo; - /* u3v_lease: active staging reservation in the blob store. + /* u3v_lease: PQ entry for lease TTL expiry. ** - ** Created when Mars installs a blob (receives %blob-install). - ** Holds a pending ref until the owning event is committed to the - ** event log (at which point the ref becomes an event-log ref in - ** blb_p), or until the lease expires (TTL). - ** - ** dead_o: c3y after the lease has been committed (or otherwise - ** invalidated). Set by _mars_fact via rev_p; the struct may - ** remain in the expiry PQ until it bubbles to the top. + ** Tracks a single les_w increment for a blob. If the king + ** releases the lease (via %blob-release IPC) before expiry, + ** dead_o is set to c3y and the PQ sweeper skips the decrement. + ** If the king crashes, the TTL fires and les_w is decremented. */ typedef struct _u3v_lease { - c3_d exp_d; // expiry time (Unix ms); 0 = no expiry + c3_d exp_d; // expiry time (Unix ms) c3_h mug_h; // blob mug c3_w seq_w; // blob seq within mug bucket - c3_o dead_o; // c3y if lease has been committed/invalidated - c3_c stg_c[4096]; // staging path that was installed (for logging) + c3_o dead_o; // c3y if lease already released } u3v_lease; /* u3v_bank: loom-resident blob bank. ** ** Lives in u3v_home, checkpointed in image.bin. ** - ** blb_p: HAMT bid -> count (event-log refcount per blob) - ** bob_p: HAMT bid -> offset (interned bob atom loom offsets) - ** rev_p: HAMT bid -> ptr (active u3v_lease pointers) + ** blb_p: HAMT bid -> u3a_blob* (loom offset of refcount struct) + ** bob_p: HAMT bid -> u3a_atom* (loom offset of interned bob atom) ** - ** A blob file is deleted when all three are empty for that bid: - ** blb_p count == 0, bob_p absent, rev_p absent. + ** A blob file is deleted when ALL of: + ** u3a_blob.log_w == 0 (no event-log refs) + ** u3a_blob.les_w == 0 (no active leases) + ** bob_p[bid] absent (no live bob atom in the loom) */ typedef struct _u3v_bank { - u3p(u3h_root) blb_p; // blob_id -> log refcount - u3p(u3h_root) bob_p; // blob_id -> bob atom loom offset - u3p(u3h_root) rev_p; // blob_id -> u3v_lease* (active leases) + u3p(u3h_root) blb_p; // bid -> u3a_blob* (loom offset) + u3p(u3h_root) bob_p; // bid -> u3a_atom* (loom offset, interning) } u3v_bank; /* u3v_home: all internal (within image) state. diff --git a/pkg/vere/disk.c b/pkg/vere/disk.c index 72706531b4..ac4a3745b4 100644 --- a/pkg/vere/disk.c +++ b/pkg/vere/disk.c @@ -1229,48 +1229,11 @@ _disk_epoc_kill(u3_disk* log_u, c3_d epo_d) c3_c epo_c[8193]; snprintf(epo_c, sizeof(epo_c), "%s/0i%" PRIc3_d, log_u->com_u->pax_c, epo_d); - // process blobs.txt: decrement event-log refcounts. - // after decrementing, check the full delete condition (log + noun + lease). + // TODO: scan LMDB range for blob-ref events (tag 0x02, op 0x03) + // in the chopped epoch and decrement u3a_blob.log_w for each. + // Then call _blob_maybe_delete for each affected bid. + // For now, log_w is only incremented (never decremented on chop). // - { - c3_c blt_c[8193]; - snprintf(blt_c, sizeof(blt_c), "%s/blobs.txt", epo_c); - FILE* blt_f = fopen(blt_c, "r"); - if ( blt_f ) { - while ( 1 ) { - uint32_t mug_i = 0, seq_i = 0; - if ( 2 != fscanf(blt_f, "%" SCNu32 " %" SCNu32, &mug_i, &seq_i) ) { - break; - } - c3_h mug_h = (c3_h)mug_i; - c3_w seq_w = (c3_w)seq_i; - c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; - u3_noun bk = u3i_chub(bid_d); - u3_weak bv = u3h_get(u3H->ban_u.blb_p, bk); - c3_w ref_w = 0; - if ( u3_none != bv ) { - u3r_safe_word(bv, &ref_w); - } - if ( ref_w > 1 ) { - u3h_put(u3H->ban_u.blb_p, bk, u3i_word(ref_w - 1)); - } - else { - u3h_del(u3H->ban_u.blb_p, bk); - - // check all three ref sources before deleting - // - c3_o has_bob = __(u3_none != u3h_get(u3H->ban_u.bob_p, bk)); - c3_o has_lea = __(u3_none != u3h_get(u3H->ban_u.rev_p, bk)); - - if ( c3n == has_bob && c3n == has_lea ) { - u3_blob_delete(log_u->dir_u->pax_c, mug_h, seq_w); - } - } - u3z(bk); - } - fclose(blt_f); - } - } // delete files in epoch directory u3_dire* dir_u = u3_foil_folder(epo_c); @@ -1564,8 +1527,6 @@ u3_disk_chop(u3_disk* log_u, c3_d eve_d) } // delete all but the last two epochs. - // _disk_epoc_kill reads each epoch's blobs.txt and decrements - // blb_p refcounts, deleting blob files when they reach zero. // // XX parameterize the number of epochs to chop // @@ -1930,60 +1891,11 @@ typedef enum { _epoc_late = 4 // format from the future } _epoc_kind; -/* _disk_blb_rebuild_from_epochs(): rebuild ban_u.blb_p from all epoch blobs.txt files. -** -** Called after u3m_boot() so the loom (u3H) is live. -** Walks all epoch directories under .urb/log/, reads each blobs.txt, -** and increments the blb_p refcount for each referenced blob. -** Replaces any stale blb_p from the snapshot with a freshly-computed map. -** -** Note: we do NOT u3h_free the old blb_p here. The snapshot may have -** been saved with a larger loom, in which case the stale HAMT nodes may -** live on pages beyond the current loom's HEAP.len_w, and u3h_free would -** crash with "palloc: page out of heap". The old nodes become dead loom -** memory and will be reclaimed at the next epoch roll / snapshot compaction. +/* NOTE: _disk_blb_rebuild_from_epochs removed. +** Blob log-refs are now tracked via LMDB blob-ref events (tag 0x02), +** not via blobs.txt files. u3a_blob structs in blb_p persist in the +** loom snapshot; on replay, blob-ref events reconstruct the counters. */ -static void -_disk_blb_rebuild_from_epochs(u3_disk* log_u) -{ - // discard stale snapshot blb_p; allocate a fresh, empty HAMT - // - u3H->ban_u.blb_p = u3h_new(); - - c3_z epo_z = u3_disk_epoc_list(log_u, 0); - c3_d* epo_d = c3_malloc(epo_z * sizeof(c3_d)); - u3_disk_epoc_list(log_u, epo_d); - - for ( c3_z i_z = 0; i_z < epo_z; i_z++ ) { - c3_c blt_c[8193]; - snprintf(blt_c, sizeof(blt_c), "%s/0i%" PRIc3_d "/blobs.txt", - log_u->com_u->pax_c, epo_d[i_z]); - - FILE* blt_f = fopen(blt_c, "r"); - if ( !blt_f ) { - continue; // no blobs.txt in this epoch (pre-VER3 or no blobs) - } - - while ( 1 ) { - uint32_t mug_i = 0, seq_i = 0; - if ( 2 != fscanf(blt_f, "%" SCNu32 " %" SCNu32, &mug_i, &seq_i) ) { - break; - } - c3_d bid_d = ((c3_d)(c3_h)mug_i << 32) | (c3_d)(c3_w)seq_i; - u3_noun bk = u3i_chub(bid_d); - u3_weak bv = u3h_get(u3H->ban_u.blb_p, bk); - c3_w ref_w = 0; - if ( u3_none != bv ) { - u3r_safe_word(bv, &ref_w); - } - u3h_put(u3H->ban_u.blb_p, bk, u3i_word(ref_w + 1)); - u3z(bk); - } - fclose(blt_f); - } - - c3_free(epo_d); -} /* _disk_epoc_load(): load existing epoch, enumerating failures */ @@ -2157,9 +2069,9 @@ _disk_epoc_load(u3_disk* log_u, c3_d lat_d, u3_disk_load_e lod_e) u3m_boot(log_u->dir_u->pax_c, (size_t)1 << u3_Host.ops_u.lom_y); // XX confirm - // rebuild blob refcount map from surviving epoch blobs.txt files + // blob refcounts (u3a_blob in blb_p) persist in the loom snapshot. + // on replay, LMDB blob-ref events will reconstruct log_w/les_w. // - _disk_blb_rebuild_from_epochs(log_u); if ( log_u->dun_d < u3A->eve_d ) { // XX bad, add to enum diff --git a/pkg/vere/io/http.c b/pkg/vere/io/http.c index 37a4567c7d..927e7c82dc 100644 --- a/pkg/vere/io/http.c +++ b/pkg/vere/io/http.c @@ -1196,23 +1196,25 @@ _http_req_dispatch(u3_hreq* req_u, u3_noun req) byte_range rng_u; c3_o rng_o = _get_range(req_headers, &rng_u); - // prepare spur for eyre range scry + // if no Range header, synthesize bytes=0- so the response is + // always a chunked 206 with Content-Range (not a 200 with the + // entire body). this lets the browser discover accept-ranges + // and switch to Range-based seeking — critical for large video + // files with moov-at-end. // - u3_noun spur; if ( c3n == rng_o ) { - // full range: '/range/0//foo' - spur = u3nq(u3i_string("range"), c3_s1('0'), u3_blip, u3k(bem.pur)); + rng_u.beg_z = 0; + rng_u.end_z = SIZE_MAX; } - else { - _chunk_align(&rng_u); - u3_atom beg = ( SIZE_MAX == rng_u.beg_z) ? - u3_blip : u3dc("scot", c3__ud, u3i_chub(rng_u.beg_z)); - u3_atom end = ( SIZE_MAX == rng_u.end_z) ? - u3_blip : u3dc("scot", c3__ud, u3i_chub(rng_u.end_z)); + _chunk_align(&rng_u); - spur = u3nq(u3i_string("range"), beg, end, u3k(bem.pur)); - } + u3_atom beg = ( SIZE_MAX == rng_u.beg_z) ? + u3_blip : u3dc("scot", c3__ud, u3i_chub(rng_u.beg_z)); + u3_atom end = ( SIZE_MAX == rng_u.end_z) ? + u3_blip : u3dc("scot", c3__ud, u3i_chub(rng_u.end_z)); + + u3_noun spur = u3nq(u3i_string("range"), beg, end, u3k(bem.pur)); if ( c3n == _http_peek_dispatch(req_u, &bem, gang, spur) ) { u3z(req_u->peq_u->pax); @@ -1563,6 +1565,7 @@ _http_start_respond(u3_hreq* req_u, u3_hhed* deh_u = hed_u; c3_i has_len_i = 0; + size_t con_len = 0; while ( 0 != hed_u ) { if ( 0x200 <= rec_u->version ) { @@ -1575,6 +1578,7 @@ _http_start_respond(u3_hreq* req_u, } if ( 0 == strncmp(hed_u->nam_c, "content-length", 14) ) { has_len_i = 1; + con_len = strtoull(hed_u->val_c, 0, 10); } else { h2o_add_header_by_str(&rec_u->pool, &rec_u->res.headers, @@ -1599,12 +1603,13 @@ _http_start_respond(u3_hreq* req_u, gen_u->hed_u = deh_u; gen_u->req_u = req_u; - // if we don't explicitly set this field, h2o will send with - // transfer-encoding: chunked + // tell h2o the true content-length from eyre's response headers. + // without this, h2o defaults to transfer-encoding: chunked. + // the old code used gen_u->bod_u->len_w which is only the first + // chunk (1MB for bob-streamed bodies), not the total — wrong. // if ( 1 == has_len_i ) { - rec_u->res.content_length = ( 0 == gen_u->bod_u ) ? - 0 : gen_u->bod_u->len_w; + rec_u->res.content_length = con_len; } req_u->gen_u = gen_u; diff --git a/pkg/vere/king.c b/pkg/vere/king.c index b3a5d8057c..c506e69ad6 100644 --- a/pkg/vere/king.c +++ b/pkg/vere/king.c @@ -19,6 +19,16 @@ u3_king u3_King; static const c3_c* ver_hos_c = "https://bootstrap.urbit.org/vere"; +/* _king_blob_del(): king-side del_f — release a blob lease via IPC. +*/ +static void +_king_blob_del(c3_h mug_h, c3_w seq_w) +{ + if ( u3K.pir_u && u3K.pir_u->god_u ) { + u3_lord_blob_release(u3K.pir_u->god_u, mug_h, seq_w); + } +} + // stash config flags for worker // static c3_h sag_h; @@ -186,6 +196,8 @@ _king_boot_done(void* ptr_v, c3_o ret_o) } u3K.pir_u = u3_pier_stay(sag_h, u3i_string(u3_Host.dir_c), rift); + + u3C.blob_del_f = _king_blob_del; } /* _king_prop(): events from prop arguments @@ -339,6 +351,8 @@ _king_pier(u3_noun pier) u3K.pir_u = u3_pier_stay(sag_h, u3k(u3t(pier)), u3_none); u3z(pier); + + u3C.blob_del_f = _king_blob_del; } /* king_curl_alloc(): allocate a response buffer for curl diff --git a/pkg/vere/lord.c b/pkg/vere/lord.c index be81f4ad98..6b650dc659 100644 --- a/pkg/vere/lord.c +++ b/pkg/vere/lord.c @@ -844,6 +844,26 @@ u3_lord_blob_install(u3_lord* god_u, _lord_writ_send(god_u, wit_u); } +/* u3_lord_blob_lease(): tell Mars king is acquiring a blob lease. +*/ +void +u3_lord_blob_lease(u3_lord* god_u, c3_h mug_h, c3_w seq_w) +{ + _lord_send(god_u, u3nt(c3_s4('b','l','a','s'), + u3i_word(mug_h), + u3i_word(seq_w))); +} + +/* u3_lord_blob_release(): tell Mars king is releasing a blob lease. +*/ +void +u3_lord_blob_release(u3_lord* god_u, c3_h mug_h, c3_w seq_w) +{ + _lord_send(god_u, u3nt(c3_s4('b','l','r','l'), + u3i_word(mug_h), + u3i_word(seq_w))); +} + /* u3_lord_save(): save a snapshot. */ c3_o diff --git a/pkg/vere/mars.c b/pkg/vere/mars.c index 8f4bf4fb24..1d5675abcd 100644 --- a/pkg/vere/mars.c +++ b/pkg/vere/mars.c @@ -22,7 +22,7 @@ c3_c tac_c[256]; // tracing label ** ** C-heap structure (not in loom). Leases are owned by the PQ — ** it is the sole place that c3_free()s them. Committed leases are -** marked dead_o=c3y via the rev_p reverse index; the sweeper pops +** marked dead_o=c3y when their lease is released; the sweeper pops ** and frees them when they bubble to the top. */ typedef struct _mars_lease_pq { @@ -95,39 +95,64 @@ _mars_pq_pop(_mars_lease_pq* pq_u) return r_u; } -/* _mars_blob_delete(): callback for pkg/noun to delete blob files. +/* _mars_blob_del(): mars-side del_f — delete blob file + u3a_blob struct. */ static void -_mars_blob_delete(c3_h mug_h, c3_w seq_w) +_mars_blob_del(c3_h mug_h, c3_w seq_w) { u3_blob_delete(u3C.dir_c, mug_h, seq_w); + + // free the u3a_blob struct and remove from blb_p + // + c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; + u3_noun bid = u3i_chub(bid_d); + u3_weak bv = u3h_get(u3H->ban_u.blb_p, bid); + if ( u3_none != bv ) { + c3_w off_w = 0; + u3r_safe_word(bv, &off_w); + u3a_wfree((u3a_blob*)u3a_into(off_w)); + u3h_del(u3H->ban_u.blb_p, bid); + } + u3z(bid); } -/* _blob_maybe_delete(): delete blob file iff fully unreferenced. -** -** Checks all three ref sources: bob_p (live noun), blb_p (event log), -** rev_p (active lease). Only unlinks the file when all are absent/zero. +/* _blob_lookup(): get u3a_blob* for a bid, or NULL. */ -static void -_blob_maybe_delete(c3_h mug_h, c3_w seq_w) +static u3a_blob* +_blob_lookup(c3_h mug_h, c3_w seq_w) { c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; u3_noun bid = u3i_chub(bid_d); + u3_weak bv = u3h_get(u3H->ban_u.blb_p, bid); + u3z(bid); - c3_w log_w = 0; - u3_weak lv = u3h_get(u3H->ban_u.blb_p, bid); - if ( u3_none != lv ) { - u3r_safe_word(lv, &log_w); - } + if ( u3_none == bv ) return 0; - c3_o has_bob = __(u3_none != u3h_get(u3H->ban_u.bob_p, bid)); - c3_o has_lea = __(u3_none != u3h_get(u3H->ban_u.rev_p, bid)); + c3_w off_w = 0; + u3r_safe_word(bv, &off_w); + return (u3a_blob*)u3a_into(off_w); +} +/* _blob_maybe_delete(): delete blob iff ALL refcounts are zero. +** +** Checks: u3a_blob.log_w, u3a_blob.les_w, bob_p[bid] presence. +*/ +static void +_blob_maybe_delete(c3_h mug_h, c3_w seq_w) +{ + u3a_blob* blb_u = _blob_lookup(mug_h, seq_w); + if ( !blb_u ) return; + + if ( 0 != blb_u->log_w || 0 != blb_u->les_w ) return; + + c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; + u3_noun bid = u3i_chub(bid_d); + c3_o has_bob = __(u3_none != u3h_get(u3H->ban_u.bob_p, bid)); u3z(bid); - if ( 0 == log_w && c3n == has_bob && c3n == has_lea ) { - u3_blob_delete(u3C.dir_c, mug_h, seq_w); - } + if ( c3y == has_bob ) return; + + _mars_blob_del(mug_h, seq_w); } /* @@ -382,68 +407,26 @@ _mars_fact(u3_mars* mar_u, u3_noun job, u3_noun pro) { - // find all bob atoms in the committed job noun - // and promote them from lease-refs to event-log refs. + // find all bob atoms in the committed event and + // increment their event-log refcount (u3a_blob.log_w). // { struct { c3_d* ids; c3_z len; c3_z cap; } acc = {0, 0, 0}; u3a_walk_fore(job, &acc, _mars_blob_bobs_atom, _mars_blob_bobs_cell); - if ( acc.len ) { - // open (or create) the current epoch's blobs.txt for appending - // - c3_c blt_c[8192]; - snprintf(blt_c, sizeof(blt_c), "%s/0i%" PRIc3_d "/blobs.txt", - mar_u->log_u->com_u->pax_c, mar_u->log_u->epo_d); - FILE* blt_f = fopen(blt_c, "a"); - - for ( c3_z i_z = 0; i_z < acc.len; i_z++ ) { - c3_h mug_h = (c3_h)(acc.ids[i_z] >> 32); - c3_w seq_w = (c3_w)(acc.ids[i_z] & 0xFFFFFFFFULL); - - // bump event-log refcount in blb_p - // - u3_noun bkey = u3i_chub(acc.ids[i_z]); - u3_weak old = u3h_get(u3H->ban_u.blb_p, bkey); - c3_w ref_w = 0; - if ( u3_none != old ) { - u3r_safe_word(old, &ref_w); - } - u3h_put(u3H->ban_u.blb_p, bkey, u3i_word(ref_w + 1)); - u3z(bkey); - - // write to blobs.txt - // - if ( blt_f ) { - fprintf(blt_f, "%" PRIc3_h " %" PRIc3_w "\n", mug_h, seq_w); - } + for ( c3_z i_z = 0; i_z < acc.len; i_z++ ) { + c3_h mug_h = (c3_h)(acc.ids[i_z] >> 32); + c3_w seq_w = (c3_w)(acc.ids[i_z] & 0xFFFFFFFFULL); - // mark the lease (if any) as dead via rev_p. the lease struct - // is freed later by the expiry sweeper when it bubbles to the - // top of the PQ. rev_p entry removed now. - // - { - u3_noun revkey = u3i_chub(acc.ids[i_z]); - u3_weak rv = u3h_get(u3H->ban_u.rev_p, revkey); - if ( u3_none != rv ) { - c3_d ptr_d = 0; - u3r_safe_chub(rv, &ptr_d); - u3v_lease* lea_u = (u3v_lease*)(uintptr_t)ptr_d; - if ( lea_u ) { - lea_u->dead_o = c3y; - } - u3h_del(u3H->ban_u.rev_p, revkey); - } - u3z(revkey); - } + u3a_blob* blb_u = _blob_lookup(mug_h, seq_w); + if ( blb_u ) { + blb_u->log_w++; } - if ( blt_f ) { - fflush(blt_f); - fclose(blt_f); - } - c3_free(acc.ids); + // TODO: write blob-ref log-inc event to LMDB (tag 0x02, op 0x03) } + + c3_free(acc.ids); } { @@ -777,7 +760,7 @@ _mars_work(u3_mars* mar_u, u3_noun jar) // Uses a min-heap PQ keyed by exp_d: peek at the root, stop once the // earliest-expiring lease is still in the future. // - // Committed leases are marked dead_o=c3y by _mars_fact (via rev_p) and + // Released leases are marked dead_o=c3y by %blob-release and // left in the PQ; they are freed here when they bubble to the top. // { @@ -789,7 +772,7 @@ _mars_work(u3_mars* mar_u, u3_noun jar) u3v_lease* top_u = _mars_pq_peek(&_mars_pq); if ( !top_u ) break; - // dead lease (already committed) — free and continue scanning + // dead lease (already released) — free and continue scanning // if ( c3y == top_u->dead_o ) { _mars_pq_pop(&_mars_pq); @@ -803,17 +786,18 @@ _mars_work(u3_mars* mar_u, u3_noun jar) break; } - // expired lease — remove from rev_p, then check full delete condition + // expired lease — decrement les_w, check deletion condition // _mars_pq_pop(&_mars_pq); { - c3_d bid_d = ((c3_d)top_u->mug_h << 32) | (c3_d)top_u->seq_w; - u3_noun revkey = u3i_chub(bid_d); - u3h_del(u3H->ban_u.rev_p, revkey); - u3z(revkey); + u3a_blob* blb_u = _blob_lookup(top_u->mug_h, top_u->seq_w); + if ( blb_u && blb_u->les_w > 0 ) { + blb_u->les_w--; + } } + // TODO: write blob-ref lease-release event to LMDB (tag 0x02, op 0x02) _blob_maybe_delete(top_u->mug_h, top_u->seq_w); c3_free(top_u); @@ -997,31 +981,43 @@ _mars_work(u3_mars* mar_u, u3_noun jar) ok_o = u3_blob_install_stg(u3C.dir_c, stg_c, &mug_h, &seq_w); if ( c3y == ok_o ) { - // create lease: pending ref until blob is committed to the - // event log (in _mars_fact) or the lease expires. + // create u3a_blob (if not present) with les_w = 1 (implicit + // first lease for king). push PQ entry for TTL expiry. // - u3v_lease* lea_u = c3_malloc(sizeof(*lea_u)); - lea_u->mug_h = mug_h; - lea_u->seq_w = seq_w; - lea_u->dead_o = c3n; { - struct timeval tv_u; - gettimeofday(&tv_u, 0); - lea_u->exp_d = (c3_d)tv_u.tv_sec * 1000ULL - + (c3_d)tv_u.tv_usec / 1000ULL - + 300000ULL; + u3a_blob* blb_u = _blob_lookup(mug_h, seq_w); + if ( !blb_u ) { + c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; + u3_noun bid = u3i_chub(bid_d); + c3_w* blb_w = u3a_walloc(c3_wiseof(u3a_blob)); + blb_u = (u3a_blob*)blb_w; + blb_u->log_w = 0; + blb_u->les_w = 0; + blb_u->mug_h = mug_h; + blb_u->seq_w = seq_w; + blb_u->siz_d = 0; + u3h_put(u3H->ban_u.blb_p, bid, u3i_word(u3a_outa(blb_w))); + u3z(bid); + } + blb_u->les_w++; } - snprintf(lea_u->stg_c, sizeof(lea_u->stg_c), "%s", stg_c); - // record: bid -> lease ptr (for commit-time dead-mark) - // - u3_noun revkey = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_w); - u3h_put(u3H->ban_u.rev_p, revkey, u3i_chub((c3_d)(uintptr_t)lea_u)); - u3z(revkey); + { + u3v_lease* lea_u = c3_malloc(sizeof(*lea_u)); + lea_u->mug_h = mug_h; + lea_u->seq_w = seq_w; + lea_u->dead_o = c3n; + { + struct timeval tv_u; + gettimeofday(&tv_u, 0); + lea_u->exp_d = (c3_d)tv_u.tv_sec * 1000ULL + + (c3_d)tv_u.tv_usec / 1000ULL + + 900000ULL; // 15 min TTL + } + _mars_pq_push(&_mars_pq, lea_u); + } - // push onto expiry PQ (ownership: PQ frees on pop) - // - _mars_pq_push(&_mars_pq, lea_u); + // TODO: write blob-ref lease-issue event to LMDB (tag 0x02, op 0x01) } } else { @@ -1038,6 +1034,74 @@ _mars_work(u3_mars* mar_u, u3_noun jar) _mars_gift(mar_u, u3nc(c3__blob, c3n)); } } break; + + // %blas: king acquires a lease on a blob + // + case c3_s4('b','l','a','s'): { + u3_noun mug_n, seq_n; + if ( c3n == u3r_cell(dat, &mug_n, &seq_n) ) { + u3z(jar); + return c3n; + } + c3_h mug_h = 0; + c3_w seq_w = 0; + u3r_safe_half(mug_n, &mug_h); + u3r_safe_word(seq_n, &seq_w); + + u3a_blob* blb_u = _blob_lookup(mug_h, seq_w); + if ( blb_u ) { + blb_u->les_w++; + + // push PQ entry for TTL failsafe (15 min) + // + u3v_lease* lea_u = c3_malloc(sizeof(*lea_u)); + lea_u->mug_h = mug_h; + lea_u->seq_w = seq_w; + lea_u->dead_o = c3n; + { + struct timeval tv_u; + gettimeofday(&tv_u, 0); + lea_u->exp_d = (c3_d)tv_u.tv_sec * 1000ULL + + (c3_d)tv_u.tv_usec / 1000ULL + + 900000ULL; + } + _mars_pq_push(&_mars_pq, lea_u); + } + + u3z(jar); + } break; + + // %blrl: king releases a lease on a blob + // + case c3_s4('b','l','r','l'): { + u3_noun mug_n, seq_n; + if ( c3n == u3r_cell(dat, &mug_n, &seq_n) ) { + u3z(jar); + return c3n; + } + c3_h mug_h = 0; + c3_w seq_w = 0; + u3r_safe_half(mug_n, &mug_h); + u3r_safe_word(seq_n, &seq_w); + + u3a_blob* blb_u = _blob_lookup(mug_h, seq_w); + if ( blb_u && blb_u->les_w > 0 ) { + blb_u->les_w--; + } + + // mark the corresponding PQ entry dead (if findable) + // expiry sweeper will skip it + // + // TODO: for now we rely on les_w accounting. PQ entries + // with dead_o=c3n that outlive their lease's release will + // try to decrement les_w again on expiry, but les_w is + // already 0 → clamped by the >0 check → safe. + // + + _blob_maybe_delete(mug_h, seq_w); + + u3z(jar); + } break; } return c3y; @@ -1315,6 +1379,25 @@ _mars_poke_play(u3_mars* mar_u, const u3_fact* tac_u) u3z(u3A->roc); u3A->roc = u3k(cor); u3A->eve_d++; + + // increment log_w for any bob atoms in the replayed event. + // snapshot has log_w correct up to snapshot time; replay covers + // the gap from snapshot to head. + // + { + struct { c3_d* ids; c3_z len; c3_z cap; } acc = {0, 0, 0}; + u3a_walk_fore(tac_u->job, &acc, _mars_blob_bobs_atom, _mars_blob_bobs_cell); + + for ( c3_z i_z = 0; i_z < acc.len; i_z++ ) { + c3_h mug_h = (c3_h)(acc.ids[i_z] >> 32); + c3_w seq_w = (c3_w)(acc.ids[i_z] & 0xFFFFFFFFULL); + u3a_blob* blb_u = _blob_lookup(mug_h, seq_w); + if ( blb_u ) { + blb_u->log_w++; + } + } + c3_free(acc.ids); + } } u3z(gon); @@ -1805,7 +1888,7 @@ u3_mars_work(u3_mars* mar_u) // wire up blob delete callback (pkg/noun can't link pkg/vere) // - u3C.blob_delete_f = _mars_blob_delete; + u3C.blob_del_f = _mars_blob_del; // XX do something better // diff --git a/pkg/vere/vere.h b/pkg/vere/vere.h index 060babfc2c..c4c9c63c6f 100644 --- a/pkg/vere/vere.h +++ b/pkg/vere/vere.h @@ -1085,6 +1085,16 @@ void* ptr_v, void (*fun_f)(void*, c3_h, c3_w, c3_o)); + /* u3_lord_blob_lease(): tell Mars king is acquiring a blob lease. + */ + void + u3_lord_blob_lease(u3_lord* god_u, c3_h mug_h, c3_w seq_w); + + /* u3_lord_blob_release(): tell Mars king is releasing a blob lease. + */ + void + u3_lord_blob_release(u3_lord* god_u, c3_h mug_h, c3_w seq_w); + /** Filesystem (async). **/ /* u3_foil_folder(): load directory, blockingly. create if nonexistent. From 41b2c8b49869b93ce7233662454087e7b1fea271 Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Mon, 27 Apr 2026 10:24:28 -0500 Subject: [PATCH 18/31] wip: blob refcounting redesign 2 --- pkg/noun/allocate.c | 4 +- pkg/noun/imprison.c | 34 ++++----- pkg/noun/manage.c | 90 ++++++++++++++++++++-- pkg/noun/vortex.c | 22 +++++- pkg/noun/vortex.h | 13 ++-- pkg/vere/disk.c | 179 +++++++++++++++++++++++++++++++++++++++++++- pkg/vere/main.c | 1 + pkg/vere/mars.c | 93 +++++++++++++++++------ 8 files changed, 376 insertions(+), 60 deletions(-) diff --git a/pkg/noun/allocate.c b/pkg/noun/allocate.c index 89eeb5fb74..b7037fdf70 100644 --- a/pkg/noun/allocate.c +++ b/pkg/noun/allocate.c @@ -886,6 +886,7 @@ _me_bob_dead(u3a_atom* atm_u) c3_h mug_h = atm_u->mug_h; c3_w seq_w = atm_u->buf_w[0]; c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; + u3_noun bid = u3i_chub(bid_d); // remove from interning index (noun is about to be freed) @@ -899,7 +900,7 @@ _me_bob_dead(u3a_atom* atm_u) if ( u3C.blob_del_f ) { if ( u3_none == bv ) { - // no u3a_blob entry → blob was never registered or already deleted. + // no u3a_blob entry — blob was never registered or already deleted. // call del_f anyway (king needs to release the lease). // u3C.blob_del_f(mug_h, seq_w); @@ -915,6 +916,7 @@ _me_bob_dead(u3a_atom* atm_u) if ( 0 == blb_u->log_w && 0 == blb_u->les_w ) { u3C.blob_del_f(mug_h, seq_w); + return; } } } diff --git a/pkg/noun/imprison.c b/pkg/noun/imprison.c index 5d8d786b49..29128bb216 100644 --- a/pkg/noun/imprison.c +++ b/pkg/noun/imprison.c @@ -849,6 +849,16 @@ u3i_vmolt(u3_noun som, u3i_molt_pair pairs[], c3_z len_z) u3_atom u3i_blob(c3_h mug_h, c3_w seq_w) { + // ban_u HAMTs live on the home road — must not be called on inner roads + // (inner-road nodes would be freed when the road pops). + // + u3_assert( &(u3H->rod_u) == u3R ); + + fprintf(stderr, "u3i_blob: [%x/%u] blb_p=%u wyt=%u\r\n", + (unsigned)mug_h, (unsigned)seq_w, + (unsigned)u3H->ban_u.blb_p, + (unsigned)u3h_wyt(u3H->ban_u.blb_p)); + c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; u3_noun bid = u3i_chub(bid_d); @@ -873,28 +883,14 @@ u3i_blob(c3_h mug_h, c3_w seq_w) vat_u->len_w = 1 | u3a_blob_flag; vat_u->buf_w[0] = seq_w; - // store atom loom offset in bob_p (interning index) - // c3_w atm_off_w = u3a_outa(nov_w); - u3h_put(u3H->ban_u.bob_p, bid, u3i_word(atm_off_w)); - // ensure u3a_blob exists in blb_p + // store atom loom offset in bob_p (interning index) // - u3_weak bv = u3h_get(u3H->ban_u.blb_p, bid); - if ( u3_none == bv ) { - // allocate fresh u3a_blob with zero refcounts - // - c3_w* blb_w = u3a_walloc(c3_wiseof(u3a_blob)); - u3a_blob* blb_u = (u3a_blob*)blb_w; - blb_u->log_w = 0; - blb_u->les_w = 0; - blb_u->mug_h = mug_h; - blb_u->seq_w = seq_w; - blb_u->siz_d = 0; // filled later by blob_save / blob_install - - c3_w blb_off_w = u3a_outa(blb_w); - u3h_put(u3H->ban_u.blb_p, bid, u3i_word(blb_off_w)); - } + // blb_p entries (refcount structs) are created by the %blob IPC handler + // in mars.c, NOT here. u3i_blob only handles atom interning. + // + u3h_put(u3H->ban_u.bob_p, bid, u3i_word(atm_off_w)); u3z(bid); return u3a_to_pug(atm_off_w); diff --git a/pkg/noun/manage.c b/pkg/noun/manage.c index 1637cd2c72..eedfc4a80c 100644 --- a/pkg/noun/manage.c +++ b/pkg/noun/manage.c @@ -524,10 +524,6 @@ _pave_parts(void) u3R->tim = u3_nul; u3R->how.fag_w = 0; - // initialize blob bank HAMTs - // - u3H->ban_u.blb_p = u3h_new(); - u3H->ban_u.bob_p = u3h_new(); } static c3_d @@ -566,6 +562,11 @@ _pave_home(void) u3R->mat_p = u3R->cap_p = top_p; _pave_parts(); + + // initialize blob bank HAMTs (home road only) + // + u3H->ban_u.blb_p = u3h_new(); + u3H->ban_u.bob_p = u3h_new(); } STATIC_ASSERT( (c3_wiseof(u3v_home) <= (((c3_w)1) << u3a_page)), @@ -575,6 +576,18 @@ STATIC_ASSERT( ((c3_wiseof(u3v_home) * sizeof(c3_w)) == sizeof(u3v_home)), STATIC_ASSERT( U3N_VERLAT < (1U << 5), "5-bit bytecode version" ); +/* _find_home_zero_les(): u3h_walk_with callback — zero les_w on boot. +*/ +static void +_find_home_zero_les(u3_noun kev, void* ptr_v) +{ + (void)ptr_v; + c3_w off_w = 0; + u3r_safe_word(u3t(kev), &off_w); + u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); + blb_u->les_w = 0; +} + /* _find_home(): in restored image, point to home road. */ static void @@ -678,6 +691,12 @@ _find_home(void) if ( !u3H->ban_u.bob_p ) { u3H->ban_u.bob_p = u3h_new(); } + + // reset all les_w to 0: leases are transient IPC state backed by a + // C-heap PQ that is not persisted. after restart the PQ is empty, + // so the entries that would decrement les_w are gone. + // + u3h_walk_with(u3H->ban_u.blb_p, _find_home_zero_les, 0); if ( !u3R->lop_p ) u3R->lop_p = u3h_new(); if ( !u3R->cax.for_p ) u3R->cax.for_p = u3h_new_cache(u3C.per_w); } @@ -1044,8 +1063,17 @@ void u3m_leap(c3_w pad_w) { u3_road* rod_u; + u3p(u3h_root) _lc = u3H->ban_u.blb_p; +#define _LEAP_CHK(tag) do { \ + if ( _lc != u3H->ban_u.blb_p ) { \ + fprintf(stderr, "!!! LEAP CLOBBER at %s: was %lu now %lu\r\n", \ + (tag), (unsigned long)_lc, (unsigned long)u3H->ban_u.blb_p); \ + _lc = u3H->ban_u.blb_p; \ + } \ +} while(0) _rod_vaal(u3R); + _LEAP_CHK("post-vaal"); // push a new road struct onto the stack // @@ -1053,7 +1081,9 @@ u3m_leap(c3_w pad_w) u3a_pile pil_u; c3_p ptr_p; u3a_pile_prep(&pil_u, sizeof(u3a_road) + 15); // XX refactor to wiseof + _LEAP_CHK("post-pile-prep"); ptr_p = (c3_p)u3a_push(&pil_u); + _LEAP_CHK("post-push"); // XX add push_once, push_once_aligned // @@ -1066,6 +1096,7 @@ u3m_leap(c3_w pad_w) rod_u = (void*)ptr_p; memset(rod_u, 0, sizeof(u3a_road)); + _LEAP_CHK("post-memset"); } /* Allocate a region on the cap. @@ -1087,8 +1118,10 @@ u3m_leap(c3_w pad_w) } u3e_ward(bot_p - 1, top_p); + _LEAP_CHK("post-ward-N"); rod_u->mat_p = rod_u->cap_p = bot_p; rod_u->rut_p = rod_u->hat_p = top_p; + _LEAP_CHK("post-rod-init-N"); // in a south road, the heap is high and the stack is low // @@ -1121,8 +1154,10 @@ u3m_leap(c3_w pad_w) } u3e_ward(bot_p - 1, top_p); + _LEAP_CHK("post-ward-S"); rod_u->rut_p = rod_u->hat_p = bot_p; rod_u->mat_p = rod_u->cap_p = top_p; + _LEAP_CHK("post-rod-init-S"); // in a north road, the heap is low and the stack is high // @@ -1153,18 +1188,22 @@ u3m_leap(c3_w pad_w) rod_u->par_p = u3of(u3_road, u3R); u3R->kid_p = u3of(u3_road, rod_u); } + _LEAP_CHK("post-attach"); // Stash slow stack pointer if ( NULL != u3t_Spin ) { u3R->off_w = u3t_Spin->off_w; u3R->fow_w = u3t_Spin->fow_w; - } + } + _LEAP_CHK("post-spin"); /* Set up the new road. */ { u3R = rod_u; + _LEAP_CHK("post-switch"); _pave_parts(); + _LEAP_CHK("post-pave"); } #ifdef U3_MEMORY_DEBUG rod_u->all.fre_w = 0; @@ -1363,6 +1402,15 @@ u3m_timer_pop(void) u3_noun u3m_love(u3_noun pro) { + u3p(u3h_root) _chk = u3H->ban_u.blb_p; +#define _LOVE_CHK(tag) do { \ + if ( _chk != u3H->ban_u.blb_p ) { \ + fprintf(stderr, "!!! LOVE CLOBBER at %s: was %lu now %lu\r\n", \ + (tag), (unsigned long)_chk, (unsigned long)u3H->ban_u.blb_p); \ + _chk = u3H->ban_u.blb_p; \ + } \ +} while(0) + // save cache pointers from current road // u3p(u3h_root) byc_p = u3R->byc.har_p; @@ -1376,7 +1424,9 @@ u3m_love(u3_noun pro) // fallback to parent road (child heap on parent's stack) // + _LOVE_CHK("pre-fall"); u3m_fall(); + _LOVE_CHK("post-fall"); if ( _(tim_o) ) _m_renew_now(); @@ -1388,25 +1438,37 @@ u3m_love(u3_noun pro) // copy product and caches off our stack // + _LOVE_CHK("pre-take"); pro = u3a_take(pro); + _LOVE_CHK("post-take-pro"); jed_u = u3j_take(jed_u); + _LOVE_CHK("post-take-jed"); byc_p = u3n_take(byc_p); + _LOVE_CHK("post-take-byc"); per_p = u3h_take(per_p); + _LOVE_CHK("post-take-per"); for_p = u3h_take(for_p); + _LOVE_CHK("post-take-for"); // pop the stack // u3a_drop_heap(u3R->cap_p, u3R->ear_p); + _LOVE_CHK("post-drop"); u3R->cap_p = u3R->ear_p; u3R->ear_p = 0; // integrate junior caches // u3j_reap(jed_u); + _LOVE_CHK("post-reap-jed"); u3n_reap(byc_p); + _LOVE_CHK("post-reap-byc"); u3z_reap(u3z_memo_keep, per_p); + _LOVE_CHK("post-reap-per"); u3z_reap(u3z_memo_ford, for_p); + _LOVE_CHK("post-reap-for"); +#undef _LOVE_CHK return pro; } @@ -1510,7 +1572,14 @@ u3m_soft_top(c3_w mil_w, // timer ms /* Record the cap, and leap. */ - u3m_hate(pad_w); + { + u3p(u3h_root) _s = u3H->ban_u.blb_p; + u3m_hate(pad_w); + if ( _s != u3H->ban_u.blb_p ) { + fprintf(stderr, "!!! HATE CLOBBERED blb_p: was %lu now %lu\r\n", + (unsigned long)_s, (unsigned long)u3H->ban_u.blb_p); + } + } if ( mil_w ) { u3m_timer_set(u3m_time_gap_in_mil(mil_w)); @@ -1523,7 +1592,14 @@ u3m_soft_top(c3_w mil_w, // timer ms #else if ( 0 == _setjmp(u3R->esc.buf) ) { #endif - pro = fun_f(arg); + { + u3p(u3h_root) _s = u3H->ban_u.blb_p; + pro = fun_f(arg); + if ( _s != u3H->ban_u.blb_p ) { + fprintf(stderr, "!!! NOCK CLOBBERED blb_p: was %lu now %lu\r\n", + (unsigned long)_s, (unsigned long)u3H->ban_u.blb_p); + } + } /* Make sure the inner routine did not create garbage. */ diff --git a/pkg/noun/vortex.c b/pkg/noun/vortex.c index 9bf26a9029..335a74a774 100644 --- a/pkg/noun/vortex.c +++ b/pkg/noun/vortex.c @@ -3,6 +3,7 @@ #include "vortex.h" #include "allocate.h" +#include "hashtable.h" #include "imprison.h" #include "jets/k.h" #include "jets/q.h" @@ -370,10 +371,20 @@ u3v_mark() qua_u[0]->nam_c = strdup("kernel"); qua_u[0]->siz_w = u3a_mark_noun(arv_u->roc) * sizeof(c3_w); - qua_u[1] = c3_calloc(sizeof(*qua_u[2])); + qua_u[1] = c3_calloc(sizeof(*qua_u[1])); qua_u[1]->nam_c = strdup("wish cache"); qua_u[1]->siz_w = u3a_mark_noun(arv_u->yot) * sizeof(c3_w); + // mark blob bank HAMTs as live GC roots so their nodes + // aren't swept during u3m_pack / u3a_sweep. + // + if ( u3H->ban_u.blb_p ) { + u3h_mark(u3H->ban_u.blb_p); + } + if ( u3H->ban_u.bob_p ) { + u3h_mark(u3H->ban_u.bob_p); + } + qua_u[2] = NULL; u3m_quac* tot_u = c3_malloc(sizeof(*tot_u)); @@ -408,4 +419,13 @@ u3v_rewrite_compact(void) // u3a_relocate_noun(&(u3A->roc)); u3a_relocate_noun(&(u3A->yot)); + + // relocate blob bank HAMT roots for compaction. + // + if ( u3H->ban_u.blb_p ) { + u3h_relocate(&(u3H->ban_u.blb_p)); + } + if ( u3H->ban_u.bob_p ) { + u3h_relocate(&(u3H->ban_u.bob_p)); + } } diff --git a/pkg/noun/vortex.h b/pkg/noun/vortex.h index cde55fd058..2345616e02 100644 --- a/pkg/noun/vortex.h +++ b/pkg/noun/vortex.h @@ -52,18 +52,19 @@ /* u3v_home: all internal (within image) state. ** NB: version must first for ease of migration. ** - ** ban_u sits at the end so pre-blob-storage V5 snapshots still - ** load cleanly: old binaries never wrote past their (smaller) - ** sizeof(u3v_home), so the bytes at ban_u's new position are - ** reliably zero (MAP_ANON origin, persisted in saved pages). - ** _find_home's lazy-init turns those zeros into empty HAMTs. + ** ban_u is placed BEFORE rod_u to avoid a memory clobber: + ** rod_u.cax (last field of u3a_road) was immediately adjacent, + ** and inner-road initialization was overwriting ban_u. Placing + ** ban_u before rod_u puts it at a stable offset (after pam_d, + ** before the large road struct). lazy-init in _find_home + ** handles zero values from pre-blob snapshots. */ typedef struct _u3v_home { u3v_version ver_d; // version number c3_d pam_d; // parameters u3v_arvo arv_u; // arvo state + u3v_bank ban_u; // blob bank u3a_road rod_u; // storage state - u3v_bank ban_u; // blob bank (NB: must stay last) } u3v_home; diff --git a/pkg/vere/disk.c b/pkg/vere/disk.c index ac4a3745b4..be8c4db461 100644 --- a/pkg/vere/disk.c +++ b/pkg/vere/disk.c @@ -1220,6 +1220,30 @@ _disk_epoc_roll(u3_disk* log_u, c3_d epo_d) return c3n; } +/* _disk_chop_bob_atom(): u3a_walk_fore atom callback — collect bob bids. +*/ +static void +_disk_chop_bob_atom(u3_atom a, void* ptr_v) +{ + if ( c3y != u3a_is_bob(a) ) return; + struct { c3_d* ids; c3_z len; c3_z cap; } *acc = ptr_v; + if ( acc->len == acc->cap ) { + acc->cap = acc->cap ? acc->cap * 2 : 8; + acc->ids = c3_realloc(acc->ids, acc->cap * sizeof(c3_d)); + } + acc->ids[acc->len++] = + ((c3_d)u3a_bob_mug(a) << 32) | (c3_d)u3a_bob_seq(a); +} + +/* _disk_chop_bob_cell(): u3a_walk_fore cell callback — always descend. +*/ +static c3_o +_disk_chop_bob_cell(u3_noun n, void* ptr_v) +{ + (void)n; (void)ptr_v; + return c3y; +} + /* _disk_epoc_kill: delete an epoch. */ static c3_o @@ -1229,10 +1253,7 @@ _disk_epoc_kill(u3_disk* log_u, c3_d epo_d) c3_c epo_c[8193]; snprintf(epo_c, sizeof(epo_c), "%s/0i%" PRIc3_d, log_u->com_u->pax_c, epo_d); - // TODO: scan LMDB range for blob-ref events (tag 0x02, op 0x03) - // in the chopped epoch and decrement u3a_blob.log_w for each. - // Then call _blob_maybe_delete for each affected bid. - // For now, log_w is only incremented (never decremented on chop). + // blob log_w is rebuilt post-chop by u3_disk_chop, not per-epoch. // // delete files in epoch directory @@ -1510,6 +1531,146 @@ _disk_vere_diff(u3_disk* log_u) return c3n; } +/* _disk_chop_zero_cb(): u3h_walk_with callback — zero log_w and les_w. +** +** les_w is zeroed because leases are transient IPC state: the lease PQ +** lives in C heap (not persisted), so after a restart/chop the lease +** entries that would decrement les_w are gone. +*/ +static void +_disk_chop_zero_cb(u3_noun kev, void* ptr_v) +{ + (void)ptr_v; + u3_noun val = u3t(kev); + c3_w off_w = 0; + u3r_safe_word(val, &off_w); + u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); + blb_u->log_w = 0; + blb_u->les_w = 0; +} + +/* _disk_chop_del: accumulator for collecting blb_p keys to delete. +*/ +typedef struct { + const c3_c* pax_c; + c3_d* bid_d; // array of bid keys to delete + c3_z len_z; + c3_z cap_z; +} _disk_chop_del; + +/* _disk_chop_delete_cb(): u3h_walk_with callback — collect dead blobs. +*/ +static void +_disk_chop_delete_cb(u3_noun kev, void* ptr_v) +{ + _disk_chop_del* del_u = ptr_v; + u3_noun key = u3h(kev); + u3_noun val = u3t(kev); + c3_w off_w = 0; + u3r_safe_word(val, &off_w); + u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); + + fprintf(stderr, "chop: blob mug=%u seq=%u log_w=%u les_w=%u\r\n", + (unsigned)blb_u->mug_h, (unsigned)blb_u->seq_w, + (unsigned)blb_u->log_w, (unsigned)blb_u->les_w); + + // delete when no event-log or lease refs remain. + // bob_p (live noun) is NOT checked: the file is the expensive part, + // and any surviving bob atom will gracefully fail on read (u3r_blob_map + // returns NULL for missing files). + // + if ( 0 == blb_u->log_w && 0 == blb_u->les_w ) { + fprintf(stderr, "chop: DELETING blob mug=%u seq=%u\r\n", + (unsigned)blb_u->mug_h, (unsigned)blb_u->seq_w); + u3_blob_delete(del_u->pax_c, blb_u->mug_h, blb_u->seq_w); + + // collect bid for post-walk blb_p cleanup + // + if ( del_u->len_z == del_u->cap_z ) { + del_u->cap_z = del_u->cap_z ? del_u->cap_z * 2 : 8; + del_u->bid_d = c3_realloc(del_u->bid_d, del_u->cap_z * sizeof(c3_d)); + } + del_u->bid_d[del_u->len_z++] = + ((c3_d)blb_u->mug_h << 32) | (c3_d)blb_u->seq_w; + } +} + +/* _disk_chop_rebuild_log_w(): rebuild blob log_w after epoch deletion. +** +** 1. zero all log_w via HAMT walk +** 2. scan remaining LMDB events, increment log_w for each bob atom +** 3. delete blob files whose total refcount is now zero +*/ +static void +_disk_chop_rebuild_log_w(u3_disk* log_u) +{ + // step 1: zero all log_w + // + u3h_walk_with(u3H->ban_u.blb_p, _disk_chop_zero_cb, 0); + + // step 2: scan remaining events for bob atoms + // + c3_d lo_d = 0, hi_d = 0; + u3_lmdb_gulf(log_u->mdb_u, &lo_d, &hi_d); + + if ( lo_d && hi_d >= lo_d ) { + u3_lmdb_walk itr_u; + if ( c3y == u3_lmdb_walk_init(log_u->mdb_u, &itr_u, lo_d, hi_d) ) { + while ( itr_u.nex_d <= itr_u.las_d ) { + size_t len_i; + void* buf_v; + + if ( c3n == u3_lmdb_walk_next(&itr_u, &len_i, &buf_v) ) break; + if ( len_i <= 4 ) continue; + + c3_y* pay_y = (c3_y*)buf_v + 4; + c3_d pay_d = len_i - 4; + u3_weak job = u3s_tap_xeno(pay_d, pay_y); + if ( u3_none == job ) continue; // jam event — no bob atoms + + struct { c3_d* ids; c3_z len; c3_z cap; } acc = {0, 0, 0}; + u3a_walk_fore(job, &acc, _disk_chop_bob_atom, _disk_chop_bob_cell); + + for ( c3_z i = 0; i < acc.len; i++ ) { + u3_noun bid = u3i_chub(acc.ids[i]); + u3_weak bv = u3h_get(u3H->ban_u.blb_p, bid); + if ( u3_none != bv ) { + c3_w off_w = 0; + u3r_safe_word(bv, &off_w); + u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); + blb_u->log_w++; + } + u3z(bid); + } + + c3_free(acc.ids); + u3z(job); + } + u3_lmdb_walk_done(&itr_u); + } + } + + // step 3: delete unreferenced blobs and clean up blb_p + // + { + _disk_chop_del del_u = { .pax_c = log_u->dir_u->pax_c }; + u3h_walk_with(u3H->ban_u.blb_p, _disk_chop_delete_cb, &del_u); + + for ( c3_z i_z = 0; i_z < del_u.len_z; i_z++ ) { + u3_noun bid = u3i_chub(del_u.bid_d[i_z]); + u3_weak bv = u3h_get(u3H->ban_u.blb_p, bid); + if ( u3_none != bv ) { + c3_w off_w = 0; + u3r_safe_word(bv, &off_w); + u3a_wfree((void*)u3a_into(off_w)); + u3h_del(u3H->ban_u.blb_p, bid); + } + u3z(bid); + } + c3_free(del_u.bid_d); + } +} + /* u3_disk_chop(): delete all but the latest 2 epocs. */ void @@ -1540,6 +1701,16 @@ u3_disk_chop(u3_disk* log_u, c3_d eve_d) c3_free(sot_d); + // rebuild blob log_w after chop. + // + // step 1: zero all log_w via u3h_walk_with on blb_p + // step 2: scan remaining LMDB events for bob atoms, rebuild log_w + // step 3: delete blobs with all-zero refcounts + // + fprintf(stderr, "chop: rebuilding blob log refs (blb_p entries: %u)...\r\n", + (unsigned)u3h_wyt(u3H->ban_u.blb_p)); + _disk_chop_rebuild_log_w(log_u); + fprintf(stderr, "chop: event log truncation complete\r\n"); } diff --git a/pkg/vere/main.c b/pkg/vere/main.c index 5be5246742..dba87f42f6 100644 --- a/pkg/vere/main.c +++ b/pkg/vere/main.c @@ -2557,6 +2557,7 @@ _cw_chop(c3_i argc, c3_c* argv[]) u3_disk_chop(log_u, u3_Host.eve_d); + u3m_save(); u3_disk_exit(log_u); u3m_stop(); } diff --git a/pkg/vere/mars.c b/pkg/vere/mars.c index 1d5675abcd..0f358f373f 100644 --- a/pkg/vere/mars.c +++ b/pkg/vere/mars.c @@ -100,6 +100,10 @@ _mars_pq_pop(_mars_lease_pq* pq_u) static void _mars_blob_del(c3_h mug_h, c3_w seq_w) { + fprintf(stderr, "_mars_blob_del: mug=%u seq=%u (blb_p was %u entries)\r\n", + (unsigned)mug_h, (unsigned)seq_w, + (unsigned)u3h_wyt(u3H->ban_u.blb_p)); + u3_blob_delete(u3C.dir_c, mug_h, seq_w); // free the u3a_blob struct and remove from blb_p @@ -110,7 +114,10 @@ _mars_blob_del(c3_h mug_h, c3_w seq_w) if ( u3_none != bv ) { c3_w off_w = 0; u3r_safe_word(bv, &off_w); - u3a_wfree((u3a_blob*)u3a_into(off_w)); + u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); + fprintf(stderr, "_mars_blob_del: removing blb_p entry (log_w=%u les_w=%u)\r\n", + (unsigned)blb_u->log_w, (unsigned)blb_u->les_w); + u3a_wfree(blb_u); u3h_del(u3H->ban_u.blb_p, bid); } u3z(bid); @@ -133,9 +140,7 @@ _blob_lookup(c3_h mug_h, c3_w seq_w) return (u3a_blob*)u3a_into(off_w); } -/* _blob_maybe_delete(): delete blob iff ALL refcounts are zero. -** -** Checks: u3a_blob.log_w, u3a_blob.les_w, bob_p[bid] presence. +/* _blob_maybe_delete(): delete blob iff log and lease refs are zero. */ static void _blob_maybe_delete(c3_h mug_h, c3_w seq_w) @@ -145,13 +150,6 @@ _blob_maybe_delete(c3_h mug_h, c3_w seq_w) if ( 0 != blb_u->log_w || 0 != blb_u->les_w ) return; - c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; - u3_noun bid = u3i_chub(bid_d); - c3_o has_bob = __(u3_none != u3h_get(u3H->ban_u.bob_p, bid)); - u3z(bid); - - if ( c3y == has_bob ) return; - _mars_blob_del(mug_h, seq_w); } @@ -421,6 +419,15 @@ _mars_fact(u3_mars* mar_u, u3a_blob* blb_u = _blob_lookup(mug_h, seq_w); if ( blb_u ) { blb_u->log_w++; + fprintf(stderr, "fact: log_w++ [%x/%u] log=%u les=%u (wyt=%u)\r\n", + (unsigned)mug_h, (unsigned)seq_w, + (unsigned)blb_u->log_w, (unsigned)blb_u->les_w, + (unsigned)u3h_wyt(u3H->ban_u.blb_p)); + } + else { + fprintf(stderr, "fact: blob NOT FOUND [%x/%u] (wyt=%u)\r\n", + (unsigned)mug_h, (unsigned)seq_w, + (unsigned)u3h_wyt(u3H->ban_u.blb_p)); } // TODO: write blob-ref log-inc event to LMDB (tag 0x02, op 0x03) @@ -845,19 +852,28 @@ _mars_work(u3_mars* mar_u, u3_noun jar) pre_w = u3a_open(u3R); mar_u->sen_d++; - if ( c3y == _mars_poke(mil_h, &job, &pro) ) { - mar_u->dun_d = mar_u->sen_d; - mar_u->mug_h = u3r_mug(u3A->roc); - mar_u->fag_w |= _mars_fag_mute; + { + u3p(u3h_root) _pre = u3H->ban_u.blb_p; - pro = _mars_sure_feck(mar_u, pre_w, pro); + if ( c3y == _mars_poke(mil_h, &job, &pro) ) { + if ( _pre != u3H->ban_u.blb_p ) { + fprintf(stderr, "!!! POKE CLOBBERED blb_p: was %lu now %lu\r\n", + (unsigned long)_pre, (unsigned long)u3H->ban_u.blb_p); + } - _mars_fact(mar_u, job, u3nt(c3__poke, c3y, pro)); - } - else { - mar_u->sen_d = mar_u->dun_d; - u3z(job); - _mars_gift(mar_u, u3nt(c3__poke, c3n, pro)); + mar_u->dun_d = mar_u->sen_d; + mar_u->mug_h = u3r_mug(u3A->roc); + mar_u->fag_w |= _mars_fag_mute; + + pro = _mars_sure_feck(mar_u, pre_w, pro); + + _mars_fact(mar_u, job, u3nt(c3__poke, c3y, pro)); + } + else { + mar_u->sen_d = mar_u->dun_d; + u3z(job); + _mars_gift(mar_u, u3nt(c3__poke, c3n, pro)); + } } u3_assert( mar_u->dun_d == u3A->eve_d ); @@ -998,6 +1014,9 @@ _mars_work(u3_mars* mar_u, u3_noun jar) blb_u->siz_d = 0; u3h_put(u3H->ban_u.blb_p, bid, u3i_word(u3a_outa(blb_w))); u3z(bid); + fprintf(stderr, "blob: install blb_p[%x/%u] new (wyt=%u)\r\n", + (unsigned)mug_h, (unsigned)seq_w, + (unsigned)u3h_wyt(u3H->ban_u.blb_p)); } blb_u->les_w++; } @@ -1018,6 +1037,10 @@ _mars_work(u3_mars* mar_u, u3_noun jar) } // TODO: write blob-ref lease-issue event to LMDB (tag 0x02, op 0x01) + + // save blob bank to snapshot so entries survive crash + // + mar_u->fag_w |= _mars_fag_mute; } } else { @@ -1242,6 +1265,9 @@ _mars_flush(u3_mars* mar_u) goto top; } else if ( u3_mars_exit_e == mar_u->sat_e ) { + fprintf(stderr, "mars: saving (blb_p entries: %u, bob_p entries: %u)\r\n", + (unsigned)u3h_wyt(u3H->ban_u.blb_p), + (unsigned)u3h_wyt(u3H->ban_u.bob_p)); u3m_save(); u3_disk_exit(mar_u->log_u); u3s_cue_xeno_done(mar_u->sil_u); @@ -1281,6 +1307,18 @@ u3_mars_kick(void* ram_u, c3_y ver_y, c3_d len_d, c3_y* hun_y) u3_mars* mar_u = ram_u; c3_o ret_o = c3n; + // watchdog: detect blb_p clobber + // + { + static u3p(u3h_root) _blb_watch = 0; + if ( _blb_watch && _blb_watch != u3H->ban_u.blb_p ) { + fprintf(stderr, "!!! BLB_P CLOBBER: was %lu now %lu (wyt=%u)\r\n", + (unsigned long)_blb_watch, (unsigned long)u3H->ban_u.blb_p, + (unsigned)u3h_wyt(u3H->ban_u.blb_p)); + } + _blb_watch = u3H->ban_u.blb_p; + } + _mars_step_trace(mar_u->dir_c); // XX optimize for stateless tasks w/ peek-next @@ -1305,6 +1343,17 @@ u3_mars_kick(void* ram_u, c3_y ver_y, c3_d len_d, c3_y* hun_y) _mars_post(mar_u); + // update watchdog after post + // + { + static u3p(u3h_root) _blb_post = 0; + if ( _blb_post && _blb_post != u3H->ban_u.blb_p ) { + fprintf(stderr, "!!! BLB_P CHANGED IN POST: was %lu now %lu\r\n", + (unsigned long)_blb_post, (unsigned long)u3H->ban_u.blb_p); + } + _blb_post = u3H->ban_u.blb_p; + } + ret_o = c3y; } From afe73202bdd750c5c4f3af11e7f97a1119d2ec63 Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Mon, 27 Apr 2026 10:57:47 -0500 Subject: [PATCH 19/31] wip: blob refcounting redesign 3 --- pkg/noun/allocate.c | 52 ++++++++++++++++--------------------- pkg/noun/allocate.h | 9 ++++--- pkg/noun/imprison.c | 57 ++++++++++++++++++++--------------------- pkg/noun/manage.c | 41 ++++++++++++++---------------- pkg/noun/vortex.c | 18 +++++-------- pkg/noun/vortex.h | 52 +++++++------------------------------ pkg/vere/disk.c | 17 +++++-------- pkg/vere/mars.c | 62 ++++++++++++++++++++++++++++----------------- 8 files changed, 134 insertions(+), 174 deletions(-) diff --git a/pkg/noun/allocate.c b/pkg/noun/allocate.c index b7037fdf70..8e78bf83b4 100644 --- a/pkg/noun/allocate.c +++ b/pkg/noun/allocate.c @@ -875,10 +875,8 @@ _me_gain_south(u3_noun dog) /* _me_bob_dead(): handle a bob atom whose loom refcount just hit zero. ** -** Removes the atom from the bob_p interning index. Then checks -** the deletion condition: log_w == 0 && les_w == 0 && bob_p absent. -** Calls u3C.blob_del_f to either release the lease (king) or -** delete the blob file (mars). +** Clears blb_u->atm_w and checks deletion condition: +** log_w == 0 && les_w == 0 && atm_w == 0. */ static void _me_bob_dead(u3a_atom* atm_u) @@ -886,40 +884,34 @@ _me_bob_dead(u3a_atom* atm_u) c3_h mug_h = atm_u->mug_h; c3_w seq_w = atm_u->buf_w[0]; c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; - u3_noun bid = u3i_chub(bid_d); - // remove from interning index (noun is about to be freed) - // - u3h_del(u3H->ban_u.bob_p, bid); - - // check u3a_blob refcounts + // clear the interned atom pointer in blb_p // - u3_weak bv = u3h_get(u3H->ban_u.blb_p, bid); + u3_weak bv = u3h_get(u3H->blb_p, bid); u3z(bid); - if ( u3C.blob_del_f ) { - if ( u3_none == bv ) { - // no u3a_blob entry — blob was never registered or already deleted. - // call del_f anyway (king needs to release the lease). - // - u3C.blob_del_f(mug_h, seq_w); - } - else { - // u3a_blob exists — extract and check refcounts. - // bob_p[bid] is already absent (just removed above). - // if log_w and les_w are both 0, delete the blob. - // - c3_w off_w = 0; - u3r_safe_word(bv, &off_w); - u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); + if ( u3_none != bv ) { + c3_w off_w = 0; + u3r_safe_word(bv, &off_w); + u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); + blb_u->atm_w = 0; - if ( 0 == blb_u->log_w && 0 == blb_u->les_w ) { - u3C.blob_del_f(mug_h, seq_w); - return; - } + // if all refs are zero, delete the blob + // + if ( u3C.blob_del_f + && 0 == blb_u->log_w + && 0 == blb_u->les_w ) + { + u3C.blob_del_f(mug_h, seq_w); } } + else if ( u3C.blob_del_f ) { + // no blb_p entry — blob was never registered or already deleted. + // notify king so it can release its lease. + // + u3C.blob_del_f(mug_h, seq_w); + } } /* _me_lose_north(): lose on a north road. diff --git a/pkg/noun/allocate.h b/pkg/noun/allocate.h index 1eb2e6ba0a..ae904c1d7f 100644 --- a/pkg/noun/allocate.h +++ b/pkg/noun/allocate.h @@ -166,15 +166,15 @@ /* u3a_blob: loom-resident metadata for a blob file. ** - ** Stored in ban_u.blb_p HAMT keyed by bid = (mug_h << 32) | seq_w. + ** Stored in u3H->blb_p HAMT keyed by bid = (mug_h << 32) | seq_w. ** Three independent ref-sources protect the backing file: ** ** log_w — event-log refs (inc on commit, dec on chop) ** les_w — lease refs (inc on king acquire, dec on release/expiry) - ** bob_p — noun liveness (bob_p[bid] present ↔ live u3a_atom) + ** atm_w — interned bob atom loom offset (0 = no live atom) ** - ** The blob file is deleted when ALL are zero: - ** log_w == 0 && les_w == 0 && bob_p[bid] absent + ** The blob file is deleted when: + ** log_w == 0 && les_w == 0 && atm_w == 0 */ typedef struct __attribute__((aligned(4))) { c3_w log_w; // event-log refcount @@ -182,6 +182,7 @@ c3_h mug_h; // 31-bit content mug (= bucket dir name) c3_w seq_w; // sequence number within bucket c3_d siz_d; // byte size of blob file + c3_w atm_w; // loom offset of interned bob atom (0 = none) } u3a_blob; STATIC_ASSERT( (((c3_w)1) << u3a_min_log) == u3a_minimum, diff --git a/pkg/noun/imprison.c b/pkg/noun/imprison.c index 29128bb216..5bbf7da787 100644 --- a/pkg/noun/imprison.c +++ b/pkg/noun/imprison.c @@ -837,60 +837,57 @@ u3i_vmolt(u3_noun som, u3i_molt_pair pairs[], c3_z len_z) /* u3i_blob(): construct or intern a bob atom (blob reference). ** ** A bob atom is an indirect atom with the MSB of len_w set. -** [mug_h] is the 31-bit mug of the content (stored in mug_h and used -** as the blob directory name). +** [mug_h] is the 31-bit mug of the content (= blob directory name). ** [seq_w] is the sequence number within $pier/.urb/bob//. ** -** Interned: at most one bob atom exists per (mug, seq) pair. -** bob_p maps bid -> loom offset of the canonical atom. -** blb_p maps bid -> loom offset of the u3a_blob refcount struct. -** If no u3a_blob exists yet, one is allocated with {log_w=0, les_w=0}. +** If a u3a_blob exists in blb_p with a live interned atom (atm_w != 0), +** returns the existing atom. Otherwise allocates a fresh bob atom and +** stores its offset in blb_u->atm_w (if a blb_p entry exists). */ u3_atom u3i_blob(c3_h mug_h, c3_w seq_w) { - // ban_u HAMTs live on the home road — must not be called on inner roads - // (inner-road nodes would be freed when the road pops). + // blb_p lives on the home road // u3_assert( &(u3H->rod_u) == u3R ); - fprintf(stderr, "u3i_blob: [%x/%u] blb_p=%u wyt=%u\r\n", - (unsigned)mug_h, (unsigned)seq_w, - (unsigned)u3H->ban_u.blb_p, - (unsigned)u3h_wyt(u3H->ban_u.blb_p)); - + // check blb_p for an existing interned atom + // c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; u3_noun bid = u3i_chub(bid_d); + u3_weak bv = u3h_get(u3H->blb_p, bid); - // check for an existing interned bob atom - // - u3_weak got = u3h_get(u3H->ban_u.bob_p, bid); - if ( u3_none != got ) { + if ( u3_none != bv ) { c3_w off_w = 0; - u3r_safe_word(got, &off_w); - u3z(bid); - u3_atom bob = u3a_to_pug(off_w); - return u3k(bob); + u3r_safe_word(bv, &off_w); + u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); + + if ( blb_u->atm_w ) { + u3z(bid); + return u3k(u3a_to_pug(blb_u->atm_w)); + } } - // allocate bob atom: u3a_atom header + 1 word for seq_w + // allocate fresh bob atom // c3_w* nov_w = u3a_walloc(1 + c3_wiseof(u3a_atom)); u3a_atom* vat_u = (void *)nov_w; - vat_u->use_w = 1; - vat_u->mug_h = mug_h; - vat_u->len_w = 1 | u3a_blob_flag; + vat_u->use_w = 1; + vat_u->mug_h = mug_h; + vat_u->len_w = 1 | u3a_blob_flag; vat_u->buf_w[0] = seq_w; c3_w atm_off_w = u3a_outa(nov_w); - // store atom loom offset in bob_p (interning index) - // - // blb_p entries (refcount structs) are created by the %blob IPC handler - // in mars.c, NOT here. u3i_blob only handles atom interning. + // store in blb_p entry if one exists (created by %blob IPC handler) // - u3h_put(u3H->ban_u.bob_p, bid, u3i_word(atm_off_w)); + if ( u3_none != bv ) { + c3_w off_w = 0; + u3r_safe_word(bv, &off_w); + u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); + blb_u->atm_w = atm_off_w; + } u3z(bid); return u3a_to_pug(atm_off_w); diff --git a/pkg/noun/manage.c b/pkg/noun/manage.c index eedfc4a80c..9481a9db29 100644 --- a/pkg/noun/manage.c +++ b/pkg/noun/manage.c @@ -565,8 +565,7 @@ _pave_home(void) // initialize blob bank HAMTs (home road only) // - u3H->ban_u.blb_p = u3h_new(); - u3H->ban_u.bob_p = u3h_new(); + u3H->blb_p = u3h_new(); } STATIC_ASSERT( (c3_wiseof(u3v_home) <= (((c3_w)1) << u3a_page)), @@ -685,18 +684,16 @@ _find_home(void) // // lazy-init blob bank HAMTs (zero if snapshot predates blob store) // - if ( !u3H->ban_u.blb_p ) { - u3H->ban_u.blb_p = u3h_new(); - } - if ( !u3H->ban_u.bob_p ) { - u3H->ban_u.bob_p = u3h_new(); + if ( !u3H->blb_p ) { + u3H->blb_p = u3h_new(); } + // reset all les_w to 0: leases are transient IPC state backed by a // C-heap PQ that is not persisted. after restart the PQ is empty, // so the entries that would decrement les_w are gone. // - u3h_walk_with(u3H->ban_u.blb_p, _find_home_zero_les, 0); + u3h_walk_with(u3H->blb_p, _find_home_zero_les, 0); if ( !u3R->lop_p ) u3R->lop_p = u3h_new(); if ( !u3R->cax.for_p ) u3R->cax.for_p = u3h_new_cache(u3C.per_w); } @@ -1063,12 +1060,12 @@ void u3m_leap(c3_w pad_w) { u3_road* rod_u; - u3p(u3h_root) _lc = u3H->ban_u.blb_p; + u3p(u3h_root) _lc = u3H->blb_p; #define _LEAP_CHK(tag) do { \ - if ( _lc != u3H->ban_u.blb_p ) { \ + if ( _lc != u3H->blb_p ) { \ fprintf(stderr, "!!! LEAP CLOBBER at %s: was %lu now %lu\r\n", \ - (tag), (unsigned long)_lc, (unsigned long)u3H->ban_u.blb_p); \ - _lc = u3H->ban_u.blb_p; \ + (tag), (unsigned long)_lc, (unsigned long)u3H->blb_p); \ + _lc = u3H->blb_p; \ } \ } while(0) @@ -1402,12 +1399,12 @@ u3m_timer_pop(void) u3_noun u3m_love(u3_noun pro) { - u3p(u3h_root) _chk = u3H->ban_u.blb_p; + u3p(u3h_root) _chk = u3H->blb_p; #define _LOVE_CHK(tag) do { \ - if ( _chk != u3H->ban_u.blb_p ) { \ + if ( _chk != u3H->blb_p ) { \ fprintf(stderr, "!!! LOVE CLOBBER at %s: was %lu now %lu\r\n", \ - (tag), (unsigned long)_chk, (unsigned long)u3H->ban_u.blb_p); \ - _chk = u3H->ban_u.blb_p; \ + (tag), (unsigned long)_chk, (unsigned long)u3H->blb_p); \ + _chk = u3H->blb_p; \ } \ } while(0) @@ -1573,11 +1570,11 @@ u3m_soft_top(c3_w mil_w, // timer ms /* Record the cap, and leap. */ { - u3p(u3h_root) _s = u3H->ban_u.blb_p; + u3p(u3h_root) _s = u3H->blb_p; u3m_hate(pad_w); - if ( _s != u3H->ban_u.blb_p ) { + if ( _s != u3H->blb_p ) { fprintf(stderr, "!!! HATE CLOBBERED blb_p: was %lu now %lu\r\n", - (unsigned long)_s, (unsigned long)u3H->ban_u.blb_p); + (unsigned long)_s, (unsigned long)u3H->blb_p); } } @@ -1593,11 +1590,11 @@ u3m_soft_top(c3_w mil_w, // timer ms if ( 0 == _setjmp(u3R->esc.buf) ) { #endif { - u3p(u3h_root) _s = u3H->ban_u.blb_p; + u3p(u3h_root) _s = u3H->blb_p; pro = fun_f(arg); - if ( _s != u3H->ban_u.blb_p ) { + if ( _s != u3H->blb_p ) { fprintf(stderr, "!!! NOCK CLOBBERED blb_p: was %lu now %lu\r\n", - (unsigned long)_s, (unsigned long)u3H->ban_u.blb_p); + (unsigned long)_s, (unsigned long)u3H->blb_p); } } diff --git a/pkg/noun/vortex.c b/pkg/noun/vortex.c index 335a74a774..86868be1d2 100644 --- a/pkg/noun/vortex.c +++ b/pkg/noun/vortex.c @@ -375,14 +375,11 @@ u3v_mark() qua_u[1]->nam_c = strdup("wish cache"); qua_u[1]->siz_w = u3a_mark_noun(arv_u->yot) * sizeof(c3_w); - // mark blob bank HAMTs as live GC roots so their nodes + // mark blob bank HAMT as live GC root so its nodes // aren't swept during u3m_pack / u3a_sweep. // - if ( u3H->ban_u.blb_p ) { - u3h_mark(u3H->ban_u.blb_p); - } - if ( u3H->ban_u.bob_p ) { - u3h_mark(u3H->ban_u.bob_p); + if ( u3H->blb_p ) { + u3h_mark(u3H->blb_p); } qua_u[2] = NULL; @@ -420,12 +417,9 @@ u3v_rewrite_compact(void) u3a_relocate_noun(&(u3A->roc)); u3a_relocate_noun(&(u3A->yot)); - // relocate blob bank HAMT roots for compaction. + // relocate blob bank HAMT root for compaction. // - if ( u3H->ban_u.blb_p ) { - u3h_relocate(&(u3H->ban_u.blb_p)); - } - if ( u3H->ban_u.bob_p ) { - u3h_relocate(&(u3H->ban_u.bob_p)); + if ( u3H->blb_p ) { + u3h_relocate(&(u3H->blb_p)); } } diff --git a/pkg/noun/vortex.h b/pkg/noun/vortex.h index 2345616e02..0dfff098cf 100644 --- a/pkg/noun/vortex.h +++ b/pkg/noun/vortex.h @@ -18,53 +18,19 @@ u3_noun yot; // cached gates } u3v_arvo; - /* u3v_lease: PQ entry for lease TTL expiry. - ** - ** Tracks a single les_w increment for a blob. If the king - ** releases the lease (via %blob-release IPC) before expiry, - ** dead_o is set to c3y and the PQ sweeper skips the decrement. - ** If the king crashes, the TTL fires and les_w is decremented. - */ - typedef struct _u3v_lease { - c3_d exp_d; // expiry time (Unix ms) - c3_h mug_h; // blob mug - c3_w seq_w; // blob seq within mug bucket - c3_o dead_o; // c3y if lease already released - } u3v_lease; - - /* u3v_bank: loom-resident blob bank. - ** - ** Lives in u3v_home, checkpointed in image.bin. - ** - ** blb_p: HAMT bid -> u3a_blob* (loom offset of refcount struct) - ** bob_p: HAMT bid -> u3a_atom* (loom offset of interned bob atom) - ** - ** A blob file is deleted when ALL of: - ** u3a_blob.log_w == 0 (no event-log refs) - ** u3a_blob.les_w == 0 (no active leases) - ** bob_p[bid] absent (no live bob atom in the loom) - */ - typedef struct _u3v_bank { - u3p(u3h_root) blb_p; // bid -> u3a_blob* (loom offset) - u3p(u3h_root) bob_p; // bid -> u3a_atom* (loom offset, interning) - } u3v_bank; - /* u3v_home: all internal (within image) state. - ** NB: version must first for ease of migration. + ** NB: version must be first for ease of migration. ** - ** ban_u is placed BEFORE rod_u to avoid a memory clobber: - ** rod_u.cax (last field of u3a_road) was immediately adjacent, - ** and inner-road initialization was overwriting ban_u. Placing - ** ban_u before rod_u puts it at a stable offset (after pam_d, - ** before the large road struct). lazy-init in _find_home - ** handles zero values from pre-blob snapshots. + ** blb_p is the blob bank HAMT (bid -> u3a_blob*), checkpointed + ** in image.bin. A blob file is deleted when: + ** log_w == 0 && les_w == 0 && atm_w == 0 */ typedef struct _u3v_home { - u3v_version ver_d; // version number - c3_d pam_d; // parameters - u3v_arvo arv_u; // arvo state - u3v_bank ban_u; // blob bank - u3a_road rod_u; // storage state + u3v_version ver_d; // version number + c3_d pam_d; // parameters + u3v_arvo arv_u; // arvo state + u3p(u3h_root) blb_p; // blob bank: bid -> u3a_blob* + u3a_road rod_u; // storage state } u3v_home; diff --git a/pkg/vere/disk.c b/pkg/vere/disk.c index be8c4db461..dd082f12e5 100644 --- a/pkg/vere/disk.c +++ b/pkg/vere/disk.c @@ -1575,11 +1575,8 @@ _disk_chop_delete_cb(u3_noun kev, void* ptr_v) (unsigned)blb_u->log_w, (unsigned)blb_u->les_w); // delete when no event-log or lease refs remain. - // bob_p (live noun) is NOT checked: the file is the expensive part, - // and any surviving bob atom will gracefully fail on read (u3r_blob_map - // returns NULL for missing files). // - if ( 0 == blb_u->log_w && 0 == blb_u->les_w ) { + if ( 0 == blb_u->log_w && 0 == blb_u->les_w && 0 == blb_u->atm_w ) { fprintf(stderr, "chop: DELETING blob mug=%u seq=%u\r\n", (unsigned)blb_u->mug_h, (unsigned)blb_u->seq_w); u3_blob_delete(del_u->pax_c, blb_u->mug_h, blb_u->seq_w); @@ -1606,7 +1603,7 @@ _disk_chop_rebuild_log_w(u3_disk* log_u) { // step 1: zero all log_w // - u3h_walk_with(u3H->ban_u.blb_p, _disk_chop_zero_cb, 0); + u3h_walk_with(u3H->blb_p, _disk_chop_zero_cb, 0); // step 2: scan remaining events for bob atoms // @@ -1633,7 +1630,7 @@ _disk_chop_rebuild_log_w(u3_disk* log_u) for ( c3_z i = 0; i < acc.len; i++ ) { u3_noun bid = u3i_chub(acc.ids[i]); - u3_weak bv = u3h_get(u3H->ban_u.blb_p, bid); + u3_weak bv = u3h_get(u3H->blb_p, bid); if ( u3_none != bv ) { c3_w off_w = 0; u3r_safe_word(bv, &off_w); @@ -1654,16 +1651,16 @@ _disk_chop_rebuild_log_w(u3_disk* log_u) // { _disk_chop_del del_u = { .pax_c = log_u->dir_u->pax_c }; - u3h_walk_with(u3H->ban_u.blb_p, _disk_chop_delete_cb, &del_u); + u3h_walk_with(u3H->blb_p, _disk_chop_delete_cb, &del_u); for ( c3_z i_z = 0; i_z < del_u.len_z; i_z++ ) { u3_noun bid = u3i_chub(del_u.bid_d[i_z]); - u3_weak bv = u3h_get(u3H->ban_u.blb_p, bid); + u3_weak bv = u3h_get(u3H->blb_p, bid); if ( u3_none != bv ) { c3_w off_w = 0; u3r_safe_word(bv, &off_w); u3a_wfree((void*)u3a_into(off_w)); - u3h_del(u3H->ban_u.blb_p, bid); + u3h_del(u3H->blb_p, bid); } u3z(bid); } @@ -1708,7 +1705,7 @@ u3_disk_chop(u3_disk* log_u, c3_d eve_d) // step 3: delete blobs with all-zero refcounts // fprintf(stderr, "chop: rebuilding blob log refs (blb_p entries: %u)...\r\n", - (unsigned)u3h_wyt(u3H->ban_u.blb_p)); + (unsigned)u3h_wyt(u3H->blb_p)); _disk_chop_rebuild_log_w(log_u); fprintf(stderr, "chop: event log truncation complete\r\n"); diff --git a/pkg/vere/mars.c b/pkg/vere/mars.c index 0f358f373f..ee66370947 100644 --- a/pkg/vere/mars.c +++ b/pkg/vere/mars.c @@ -16,6 +16,20 @@ #include #include +/* u3v_lease: PQ entry for lease TTL expiry. +** +** Tracks a single les_w increment for a blob. If the king +** releases the lease (via %blob-release IPC) before expiry, +** dead_o is set to c3y and the PQ sweeper skips the decrement. +** If the king crashes, the TTL fires and les_w is decremented. +*/ +typedef struct _u3v_lease { + c3_d exp_d; // expiry time (Unix ms) + c3_h mug_h; // blob mug + c3_w seq_w; // blob seq within mug bucket + c3_o dead_o; // c3y if lease already released +} u3v_lease; + c3_c tac_c[256]; // tracing label /* _mars_lease_pq: min-heap of u3v_lease*, keyed by lea_u->exp_d. @@ -102,7 +116,7 @@ _mars_blob_del(c3_h mug_h, c3_w seq_w) { fprintf(stderr, "_mars_blob_del: mug=%u seq=%u (blb_p was %u entries)\r\n", (unsigned)mug_h, (unsigned)seq_w, - (unsigned)u3h_wyt(u3H->ban_u.blb_p)); + (unsigned)u3h_wyt(u3H->blb_p)); u3_blob_delete(u3C.dir_c, mug_h, seq_w); @@ -110,7 +124,7 @@ _mars_blob_del(c3_h mug_h, c3_w seq_w) // c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; u3_noun bid = u3i_chub(bid_d); - u3_weak bv = u3h_get(u3H->ban_u.blb_p, bid); + u3_weak bv = u3h_get(u3H->blb_p, bid); if ( u3_none != bv ) { c3_w off_w = 0; u3r_safe_word(bv, &off_w); @@ -118,7 +132,7 @@ _mars_blob_del(c3_h mug_h, c3_w seq_w) fprintf(stderr, "_mars_blob_del: removing blb_p entry (log_w=%u les_w=%u)\r\n", (unsigned)blb_u->log_w, (unsigned)blb_u->les_w); u3a_wfree(blb_u); - u3h_del(u3H->ban_u.blb_p, bid); + u3h_del(u3H->blb_p, bid); } u3z(bid); } @@ -130,7 +144,7 @@ _blob_lookup(c3_h mug_h, c3_w seq_w) { c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; u3_noun bid = u3i_chub(bid_d); - u3_weak bv = u3h_get(u3H->ban_u.blb_p, bid); + u3_weak bv = u3h_get(u3H->blb_p, bid); u3z(bid); if ( u3_none == bv ) return 0; @@ -140,7 +154,9 @@ _blob_lookup(c3_h mug_h, c3_w seq_w) return (u3a_blob*)u3a_into(off_w); } -/* _blob_maybe_delete(): delete blob iff log and lease refs are zero. +/* _blob_maybe_delete(): delete blob iff all refs are zero. +** +** Deletion condition: log_w == 0 && les_w == 0 && atm_w == 0 */ static void _blob_maybe_delete(c3_h mug_h, c3_w seq_w) @@ -148,7 +164,7 @@ _blob_maybe_delete(c3_h mug_h, c3_w seq_w) u3a_blob* blb_u = _blob_lookup(mug_h, seq_w); if ( !blb_u ) return; - if ( 0 != blb_u->log_w || 0 != blb_u->les_w ) return; + if ( 0 != blb_u->log_w || 0 != blb_u->les_w || 0 != blb_u->atm_w ) return; _mars_blob_del(mug_h, seq_w); } @@ -422,12 +438,12 @@ _mars_fact(u3_mars* mar_u, fprintf(stderr, "fact: log_w++ [%x/%u] log=%u les=%u (wyt=%u)\r\n", (unsigned)mug_h, (unsigned)seq_w, (unsigned)blb_u->log_w, (unsigned)blb_u->les_w, - (unsigned)u3h_wyt(u3H->ban_u.blb_p)); + (unsigned)u3h_wyt(u3H->blb_p)); } else { fprintf(stderr, "fact: blob NOT FOUND [%x/%u] (wyt=%u)\r\n", (unsigned)mug_h, (unsigned)seq_w, - (unsigned)u3h_wyt(u3H->ban_u.blb_p)); + (unsigned)u3h_wyt(u3H->blb_p)); } // TODO: write blob-ref log-inc event to LMDB (tag 0x02, op 0x03) @@ -853,12 +869,12 @@ _mars_work(u3_mars* mar_u, u3_noun jar) mar_u->sen_d++; { - u3p(u3h_root) _pre = u3H->ban_u.blb_p; + u3p(u3h_root) _pre = u3H->blb_p; if ( c3y == _mars_poke(mil_h, &job, &pro) ) { - if ( _pre != u3H->ban_u.blb_p ) { + if ( _pre != u3H->blb_p ) { fprintf(stderr, "!!! POKE CLOBBERED blb_p: was %lu now %lu\r\n", - (unsigned long)_pre, (unsigned long)u3H->ban_u.blb_p); + (unsigned long)_pre, (unsigned long)u3H->blb_p); } mar_u->dun_d = mar_u->sen_d; @@ -1012,11 +1028,12 @@ _mars_work(u3_mars* mar_u, u3_noun jar) blb_u->mug_h = mug_h; blb_u->seq_w = seq_w; blb_u->siz_d = 0; - u3h_put(u3H->ban_u.blb_p, bid, u3i_word(u3a_outa(blb_w))); + blb_u->atm_w = 0; + u3h_put(u3H->blb_p, bid, u3i_word(u3a_outa(blb_w))); u3z(bid); fprintf(stderr, "blob: install blb_p[%x/%u] new (wyt=%u)\r\n", (unsigned)mug_h, (unsigned)seq_w, - (unsigned)u3h_wyt(u3H->ban_u.blb_p)); + (unsigned)u3h_wyt(u3H->blb_p)); } blb_u->les_w++; } @@ -1265,9 +1282,8 @@ _mars_flush(u3_mars* mar_u) goto top; } else if ( u3_mars_exit_e == mar_u->sat_e ) { - fprintf(stderr, "mars: saving (blb_p entries: %u, bob_p entries: %u)\r\n", - (unsigned)u3h_wyt(u3H->ban_u.blb_p), - (unsigned)u3h_wyt(u3H->ban_u.bob_p)); + fprintf(stderr, "mars: saving (blb_p entries: %u)\r\n", + (unsigned)u3h_wyt(u3H->blb_p)); u3m_save(); u3_disk_exit(mar_u->log_u); u3s_cue_xeno_done(mar_u->sil_u); @@ -1311,12 +1327,12 @@ u3_mars_kick(void* ram_u, c3_y ver_y, c3_d len_d, c3_y* hun_y) // { static u3p(u3h_root) _blb_watch = 0; - if ( _blb_watch && _blb_watch != u3H->ban_u.blb_p ) { + if ( _blb_watch && _blb_watch != u3H->blb_p ) { fprintf(stderr, "!!! BLB_P CLOBBER: was %lu now %lu (wyt=%u)\r\n", - (unsigned long)_blb_watch, (unsigned long)u3H->ban_u.blb_p, - (unsigned)u3h_wyt(u3H->ban_u.blb_p)); + (unsigned long)_blb_watch, (unsigned long)u3H->blb_p, + (unsigned)u3h_wyt(u3H->blb_p)); } - _blb_watch = u3H->ban_u.blb_p; + _blb_watch = u3H->blb_p; } _mars_step_trace(mar_u->dir_c); @@ -1347,11 +1363,11 @@ u3_mars_kick(void* ram_u, c3_y ver_y, c3_d len_d, c3_y* hun_y) // { static u3p(u3h_root) _blb_post = 0; - if ( _blb_post && _blb_post != u3H->ban_u.blb_p ) { + if ( _blb_post && _blb_post != u3H->blb_p ) { fprintf(stderr, "!!! BLB_P CHANGED IN POST: was %lu now %lu\r\n", - (unsigned long)_blb_post, (unsigned long)u3H->ban_u.blb_p); + (unsigned long)_blb_post, (unsigned long)u3H->blb_p); } - _blb_post = u3H->ban_u.blb_p; + _blb_post = u3H->blb_p; } ret_o = c3y; From bfe12ee43f6946d4414a2c0b324818051c4aebc0 Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Mon, 27 Apr 2026 12:47:12 -0500 Subject: [PATCH 20/31] wip: blob refcounting redesign 4 --- pkg/noun/manage.c | 61 ++------------------------------ pkg/vere/disk.c | 21 ++++------- pkg/vere/mars.c | 90 ++++++++--------------------------------------- 3 files changed, 23 insertions(+), 149 deletions(-) diff --git a/pkg/noun/manage.c b/pkg/noun/manage.c index 9481a9db29..498843bed6 100644 --- a/pkg/noun/manage.c +++ b/pkg/noun/manage.c @@ -1060,17 +1060,8 @@ void u3m_leap(c3_w pad_w) { u3_road* rod_u; - u3p(u3h_root) _lc = u3H->blb_p; -#define _LEAP_CHK(tag) do { \ - if ( _lc != u3H->blb_p ) { \ - fprintf(stderr, "!!! LEAP CLOBBER at %s: was %lu now %lu\r\n", \ - (tag), (unsigned long)_lc, (unsigned long)u3H->blb_p); \ - _lc = u3H->blb_p; \ - } \ -} while(0) _rod_vaal(u3R); - _LEAP_CHK("post-vaal"); // push a new road struct onto the stack // @@ -1078,9 +1069,7 @@ u3m_leap(c3_w pad_w) u3a_pile pil_u; c3_p ptr_p; u3a_pile_prep(&pil_u, sizeof(u3a_road) + 15); // XX refactor to wiseof - _LEAP_CHK("post-pile-prep"); ptr_p = (c3_p)u3a_push(&pil_u); - _LEAP_CHK("post-push"); // XX add push_once, push_once_aligned // @@ -1093,7 +1082,6 @@ u3m_leap(c3_w pad_w) rod_u = (void*)ptr_p; memset(rod_u, 0, sizeof(u3a_road)); - _LEAP_CHK("post-memset"); } /* Allocate a region on the cap. @@ -1115,10 +1103,8 @@ u3m_leap(c3_w pad_w) } u3e_ward(bot_p - 1, top_p); - _LEAP_CHK("post-ward-N"); rod_u->mat_p = rod_u->cap_p = bot_p; rod_u->rut_p = rod_u->hat_p = top_p; - _LEAP_CHK("post-rod-init-N"); // in a south road, the heap is high and the stack is low // @@ -1151,10 +1137,8 @@ u3m_leap(c3_w pad_w) } u3e_ward(bot_p - 1, top_p); - _LEAP_CHK("post-ward-S"); rod_u->rut_p = rod_u->hat_p = bot_p; rod_u->mat_p = rod_u->cap_p = top_p; - _LEAP_CHK("post-rod-init-S"); // in a north road, the heap is low and the stack is high // @@ -1185,22 +1169,18 @@ u3m_leap(c3_w pad_w) rod_u->par_p = u3of(u3_road, u3R); u3R->kid_p = u3of(u3_road, rod_u); } - _LEAP_CHK("post-attach"); // Stash slow stack pointer if ( NULL != u3t_Spin ) { u3R->off_w = u3t_Spin->off_w; u3R->fow_w = u3t_Spin->fow_w; } - _LEAP_CHK("post-spin"); /* Set up the new road. */ { u3R = rod_u; - _LEAP_CHK("post-switch"); _pave_parts(); - _LEAP_CHK("post-pave"); } #ifdef U3_MEMORY_DEBUG rod_u->all.fre_w = 0; @@ -1399,15 +1379,6 @@ u3m_timer_pop(void) u3_noun u3m_love(u3_noun pro) { - u3p(u3h_root) _chk = u3H->blb_p; -#define _LOVE_CHK(tag) do { \ - if ( _chk != u3H->blb_p ) { \ - fprintf(stderr, "!!! LOVE CLOBBER at %s: was %lu now %lu\r\n", \ - (tag), (unsigned long)_chk, (unsigned long)u3H->blb_p); \ - _chk = u3H->blb_p; \ - } \ -} while(0) - // save cache pointers from current road // u3p(u3h_root) byc_p = u3R->byc.har_p; @@ -1421,9 +1392,7 @@ u3m_love(u3_noun pro) // fallback to parent road (child heap on parent's stack) // - _LOVE_CHK("pre-fall"); u3m_fall(); - _LOVE_CHK("post-fall"); if ( _(tim_o) ) _m_renew_now(); @@ -1435,37 +1404,25 @@ u3m_love(u3_noun pro) // copy product and caches off our stack // - _LOVE_CHK("pre-take"); pro = u3a_take(pro); - _LOVE_CHK("post-take-pro"); jed_u = u3j_take(jed_u); - _LOVE_CHK("post-take-jed"); byc_p = u3n_take(byc_p); - _LOVE_CHK("post-take-byc"); per_p = u3h_take(per_p); - _LOVE_CHK("post-take-per"); for_p = u3h_take(for_p); - _LOVE_CHK("post-take-for"); // pop the stack // u3a_drop_heap(u3R->cap_p, u3R->ear_p); - _LOVE_CHK("post-drop"); u3R->cap_p = u3R->ear_p; u3R->ear_p = 0; // integrate junior caches // u3j_reap(jed_u); - _LOVE_CHK("post-reap-jed"); u3n_reap(byc_p); - _LOVE_CHK("post-reap-byc"); u3z_reap(u3z_memo_keep, per_p); - _LOVE_CHK("post-reap-per"); u3z_reap(u3z_memo_ford, for_p); - _LOVE_CHK("post-reap-for"); -#undef _LOVE_CHK return pro; } @@ -1569,14 +1526,7 @@ u3m_soft_top(c3_w mil_w, // timer ms /* Record the cap, and leap. */ - { - u3p(u3h_root) _s = u3H->blb_p; - u3m_hate(pad_w); - if ( _s != u3H->blb_p ) { - fprintf(stderr, "!!! HATE CLOBBERED blb_p: was %lu now %lu\r\n", - (unsigned long)_s, (unsigned long)u3H->blb_p); - } - } + u3m_hate(pad_w); if ( mil_w ) { u3m_timer_set(u3m_time_gap_in_mil(mil_w)); @@ -1589,14 +1539,7 @@ u3m_soft_top(c3_w mil_w, // timer ms #else if ( 0 == _setjmp(u3R->esc.buf) ) { #endif - { - u3p(u3h_root) _s = u3H->blb_p; - pro = fun_f(arg); - if ( _s != u3H->blb_p ) { - fprintf(stderr, "!!! NOCK CLOBBERED blb_p: was %lu now %lu\r\n", - (unsigned long)_s, (unsigned long)u3H->blb_p); - } - } + pro = fun_f(arg); /* Make sure the inner routine did not create garbage. */ diff --git a/pkg/vere/disk.c b/pkg/vere/disk.c index dd082f12e5..5443ee0fe4 100644 --- a/pkg/vere/disk.c +++ b/pkg/vere/disk.c @@ -1533,17 +1533,18 @@ _disk_vere_diff(u3_disk* log_u) /* _disk_chop_zero_cb(): u3h_walk_with callback — zero log_w and les_w. ** -** les_w is zeroed because leases are transient IPC state: the lease PQ -** lives in C heap (not persisted), so after a restart/chop the lease -** entries that would decrement les_w are gone. +** log_w: will be rebuilt by rescanning remaining events. +** les_w: transient IPC state (lease PQ is C-heap, lost on restart). +** atm_w: NOT zeroed — if a live atom exists, the blob file is kept. +** it will be deleted during live operation when the atom dies +** (_me_bob_dead sets atm_w=0, then _blob_maybe_delete fires). */ static void _disk_chop_zero_cb(u3_noun kev, void* ptr_v) { (void)ptr_v; - u3_noun val = u3t(kev); c3_w off_w = 0; - u3r_safe_word(val, &off_w); + u3r_safe_word(u3t(kev), &off_w); u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); blb_u->log_w = 0; blb_u->les_w = 0; @@ -1570,15 +1571,7 @@ _disk_chop_delete_cb(u3_noun kev, void* ptr_v) u3r_safe_word(val, &off_w); u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); - fprintf(stderr, "chop: blob mug=%u seq=%u log_w=%u les_w=%u\r\n", - (unsigned)blb_u->mug_h, (unsigned)blb_u->seq_w, - (unsigned)blb_u->log_w, (unsigned)blb_u->les_w); - - // delete when no event-log or lease refs remain. - // if ( 0 == blb_u->log_w && 0 == blb_u->les_w && 0 == blb_u->atm_w ) { - fprintf(stderr, "chop: DELETING blob mug=%u seq=%u\r\n", - (unsigned)blb_u->mug_h, (unsigned)blb_u->seq_w); u3_blob_delete(del_u->pax_c, blb_u->mug_h, blb_u->seq_w); // collect bid for post-walk blb_p cleanup @@ -1704,8 +1697,6 @@ u3_disk_chop(u3_disk* log_u, c3_d eve_d) // step 2: scan remaining LMDB events for bob atoms, rebuild log_w // step 3: delete blobs with all-zero refcounts // - fprintf(stderr, "chop: rebuilding blob log refs (blb_p entries: %u)...\r\n", - (unsigned)u3h_wyt(u3H->blb_p)); _disk_chop_rebuild_log_w(log_u); fprintf(stderr, "chop: event log truncation complete\r\n"); diff --git a/pkg/vere/mars.c b/pkg/vere/mars.c index ee66370947..18c279f2a8 100644 --- a/pkg/vere/mars.c +++ b/pkg/vere/mars.c @@ -109,31 +109,24 @@ _mars_pq_pop(_mars_lease_pq* pq_u) return r_u; } -/* _mars_blob_del(): mars-side del_f — delete blob file + u3a_blob struct. +/* _mars_blob_del(): delete blob file and clean up blb_p entry. */ static void _mars_blob_del(c3_h mug_h, c3_w seq_w) { - fprintf(stderr, "_mars_blob_del: mug=%u seq=%u (blb_p was %u entries)\r\n", - (unsigned)mug_h, (unsigned)seq_w, - (unsigned)u3h_wyt(u3H->blb_p)); - u3_blob_delete(u3C.dir_c, mug_h, seq_w); - // free the u3a_blob struct and remove from blb_p - // c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; u3_noun bid = u3i_chub(bid_d); u3_weak bv = u3h_get(u3H->blb_p, bid); + if ( u3_none != bv ) { c3_w off_w = 0; u3r_safe_word(bv, &off_w); - u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); - fprintf(stderr, "_mars_blob_del: removing blb_p entry (log_w=%u les_w=%u)\r\n", - (unsigned)blb_u->log_w, (unsigned)blb_u->les_w); - u3a_wfree(blb_u); + u3a_wfree((void*)u3a_into(off_w)); u3h_del(u3H->blb_p, bid); } + u3z(bid); } @@ -435,15 +428,6 @@ _mars_fact(u3_mars* mar_u, u3a_blob* blb_u = _blob_lookup(mug_h, seq_w); if ( blb_u ) { blb_u->log_w++; - fprintf(stderr, "fact: log_w++ [%x/%u] log=%u les=%u (wyt=%u)\r\n", - (unsigned)mug_h, (unsigned)seq_w, - (unsigned)blb_u->log_w, (unsigned)blb_u->les_w, - (unsigned)u3h_wyt(u3H->blb_p)); - } - else { - fprintf(stderr, "fact: blob NOT FOUND [%x/%u] (wyt=%u)\r\n", - (unsigned)mug_h, (unsigned)seq_w, - (unsigned)u3h_wyt(u3H->blb_p)); } // TODO: write blob-ref log-inc event to LMDB (tag 0x02, op 0x03) @@ -868,28 +852,19 @@ _mars_work(u3_mars* mar_u, u3_noun jar) pre_w = u3a_open(u3R); mar_u->sen_d++; - { - u3p(u3h_root) _pre = u3H->blb_p; + if ( c3y == _mars_poke(mil_h, &job, &pro) ) { + mar_u->dun_d = mar_u->sen_d; + mar_u->mug_h = u3r_mug(u3A->roc); + mar_u->fag_w |= _mars_fag_mute; - if ( c3y == _mars_poke(mil_h, &job, &pro) ) { - if ( _pre != u3H->blb_p ) { - fprintf(stderr, "!!! POKE CLOBBERED blb_p: was %lu now %lu\r\n", - (unsigned long)_pre, (unsigned long)u3H->blb_p); - } + pro = _mars_sure_feck(mar_u, pre_w, pro); - mar_u->dun_d = mar_u->sen_d; - mar_u->mug_h = u3r_mug(u3A->roc); - mar_u->fag_w |= _mars_fag_mute; - - pro = _mars_sure_feck(mar_u, pre_w, pro); - - _mars_fact(mar_u, job, u3nt(c3__poke, c3y, pro)); - } - else { - mar_u->sen_d = mar_u->dun_d; - u3z(job); - _mars_gift(mar_u, u3nt(c3__poke, c3n, pro)); - } + _mars_fact(mar_u, job, u3nt(c3__poke, c3y, pro)); + } + else { + mar_u->sen_d = mar_u->dun_d; + u3z(job); + _mars_gift(mar_u, u3nt(c3__poke, c3n, pro)); } u3_assert( mar_u->dun_d == u3A->eve_d ); @@ -1031,9 +1006,6 @@ _mars_work(u3_mars* mar_u, u3_noun jar) blb_u->atm_w = 0; u3h_put(u3H->blb_p, bid, u3i_word(u3a_outa(blb_w))); u3z(bid); - fprintf(stderr, "blob: install blb_p[%x/%u] new (wyt=%u)\r\n", - (unsigned)mug_h, (unsigned)seq_w, - (unsigned)u3h_wyt(u3H->blb_p)); } blb_u->les_w++; } @@ -1282,8 +1254,6 @@ _mars_flush(u3_mars* mar_u) goto top; } else if ( u3_mars_exit_e == mar_u->sat_e ) { - fprintf(stderr, "mars: saving (blb_p entries: %u)\r\n", - (unsigned)u3h_wyt(u3H->blb_p)); u3m_save(); u3_disk_exit(mar_u->log_u); u3s_cue_xeno_done(mar_u->sil_u); @@ -1323,53 +1293,23 @@ u3_mars_kick(void* ram_u, c3_y ver_y, c3_d len_d, c3_y* hun_y) u3_mars* mar_u = ram_u; c3_o ret_o = c3n; - // watchdog: detect blb_p clobber - // - { - static u3p(u3h_root) _blb_watch = 0; - if ( _blb_watch && _blb_watch != u3H->blb_p ) { - fprintf(stderr, "!!! BLB_P CLOBBER: was %lu now %lu (wyt=%u)\r\n", - (unsigned long)_blb_watch, (unsigned long)u3H->blb_p, - (unsigned)u3h_wyt(u3H->blb_p)); - } - _blb_watch = u3H->blb_p; - } - _mars_step_trace(mar_u->dir_c); // XX optimize for stateless tasks w/ peek-next // if ( u3_mars_work_e == mar_u->sat_e ) { - // pick decoder by protocol version (0x01 = ram, 0x00 = jam) - // u3_weak jar = ( 0x01 == ver_y ) ? u3s_tap_xeno(len_d, hun_y) : u3s_cue_xeno_with(mar_u->sil_u, len_d, hun_y); - // parse errors are fatal - // if ( (u3_none == jar) || (c3n == _mars_work(mar_u, jar)) ) { fprintf(stderr, "mars: bad\r\n"); - // XX error cb? - // exit(1); } _mars_post(mar_u); - - // update watchdog after post - // - { - static u3p(u3h_root) _blb_post = 0; - if ( _blb_post && _blb_post != u3H->blb_p ) { - fprintf(stderr, "!!! BLB_P CHANGED IN POST: was %lu now %lu\r\n", - (unsigned long)_blb_post, (unsigned long)u3H->blb_p); - } - _blb_post = u3H->blb_p; - } - ret_o = c3y; } From 0888234a51802363679477360040df330dcb61f2 Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Mon, 27 Apr 2026 20:07:21 -0500 Subject: [PATCH 21/31] wip: refactors blob ids --- pkg/noun/allocate.c | 21 ++--- pkg/noun/allocate.h | 27 ++++--- pkg/noun/imprison.c | 30 +++---- pkg/noun/imprison.h | 7 +- pkg/noun/jets/e/bytestream.c | 2 +- pkg/noun/options.h | 2 +- pkg/noun/retrieve.c | 20 ++--- pkg/noun/retrieve_tests.c | 6 +- pkg/noun/serial.c | 8 +- pkg/noun/serial_tests.c | 8 +- pkg/noun/vortex.c | 6 ++ pkg/vere/blob.c | 96 +++++++++++----------- pkg/vere/blob.h | 34 ++++---- pkg/vere/blob_tests.c | 150 +++++++++++++++++------------------ pkg/vere/disk.c | 15 ++-- pkg/vere/io/http.c | 14 ++-- pkg/vere/io/mesa.c | 6 +- pkg/vere/io/unix.c | 14 ++-- pkg/vere/king.c | 4 +- pkg/vere/lord.c | 18 ++--- pkg/vere/mars.c | 75 ++++++++---------- pkg/vere/vere.h | 8 +- 22 files changed, 277 insertions(+), 294 deletions(-) diff --git a/pkg/noun/allocate.c b/pkg/noun/allocate.c index 72c1145a00..5b6436db3d 100644 --- a/pkg/noun/allocate.c +++ b/pkg/noun/allocate.c @@ -853,15 +853,11 @@ _me_gain_south(u3_noun dog) static void _me_bob_dead(u3a_atom* atm_u) { - c3_h mug_h = atm_u->mug_h; - c3_w seq_w = atm_u->buf_w[0]; - c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; - u3_noun bid = u3i_chub(bid_d); + c3_h mug_h = (c3_h)atm_u->mug_w; + c3_h seq_h = (c3_h)atm_u->buf_w[0]; + c3_w bid_w = ((c3_w)mug_h << 32) | (c3_w)seq_h; - // clear the interned atom pointer in blb_p - // - u3_weak bv = u3h_get(u3H->blb_p, bid); - u3z(bid); + u3_weak bv = u3h_get(u3H->blb_p, bid_w); if ( u3_none != bv ) { c3_w off_w = 0; @@ -869,20 +865,15 @@ _me_bob_dead(u3a_atom* atm_u) u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); blb_u->atm_w = 0; - // if all refs are zero, delete the blob - // if ( u3C.blob_del_f && 0 == blb_u->log_w && 0 == blb_u->les_w ) { - u3C.blob_del_f(mug_h, seq_w); + u3C.blob_del_f(mug_h, seq_h); } } else if ( u3C.blob_del_f ) { - // no blb_p entry — blob was never registered or already deleted. - // notify king so it can release its lease. - // - u3C.blob_del_f(mug_h, seq_w); + u3C.blob_del_f(mug_h, seq_h); } } diff --git a/pkg/noun/allocate.h b/pkg/noun/allocate.h index 5b369a9dd8..b5eaa1d381 100644 --- a/pkg/noun/allocate.h +++ b/pkg/noun/allocate.h @@ -157,9 +157,10 @@ /* u3a_blob: loom-resident metadata for a blob file. ** - ** Stored in u3H->blb_p HAMT keyed by bid = (mug_h << 32) | seq_w. - ** Three independent ref-sources protect the backing file: + ** Stored in u3H->blb_p keyed by bid = (mug_h << 32) | seq_h. + ** bid is always a direct atom on VERE64 (63 bits max). ** + ** Three independent ref-sources protect the backing file: ** log_w — event-log refs (inc on commit, dec on chop) ** les_w — lease refs (inc on king acquire, dec on release/expiry) ** atm_w — interned bob atom loom offset (0 = no live atom) @@ -171,7 +172,7 @@ c3_w log_w; // event-log refcount c3_w les_w; // lease refcount c3_h mug_h; // 31-bit content mug (= bucket dir name) - c3_w seq_w; // sequence number within bucket + c3_h seq_h; // sequence number within bucket c3_d siz_d; // byte size of blob file c3_w atm_w; // loom offset of interned bob atom (0 = none) } u3a_blob; @@ -652,22 +653,26 @@ typedef struct { return (atm_u->len_w & u3a_blob_flag) ? c3y : c3n; } - /* u3a_bob_mug(): 31-bit mug of a bob atom's content (= blob directory name). - ** [som] must be a bob atom. + /* u3a_bob_mug(): content mug of a bob atom (= blob directory name). */ static inline c3_h u3a_bob_mug(u3_atom som) { - u3a_atom* atm_u = u3a_to_ptr(som); - return atm_u->mug_h; + return (c3_h)((u3a_atom*)u3a_to_ptr(som))->mug_w; } - /* u3a_bob_seq(): sequence number of a bob atom within its mug bucket. - ** [som] must be a bob atom. + /* u3a_bob_seq(): sequence number within mug bucket. */ - static inline c3_w + static inline c3_h u3a_bob_seq(u3_atom som) { + return (c3_h)((u3a_atom*)u3a_to_ptr(som))->buf_w[0]; + } + + /* u3a_bob_bid(): blob ID = (mug << 32) | seq. Direct atom on VERE64. + */ + static inline c3_w + u3a_bob_bid(u3_atom som) { u3a_atom* atm_u = u3a_to_ptr(som); - return atm_u->buf_w[0]; + return ((c3_w)(c3_h)atm_u->mug_w << 32) | (c3_w)(c3_h)atm_u->buf_w[0]; } /** Functions. diff --git a/pkg/noun/imprison.c b/pkg/noun/imprison.c index 8b4e7fd400..abbec37b9a 100644 --- a/pkg/noun/imprison.c +++ b/pkg/noun/imprison.c @@ -836,26 +836,19 @@ u3i_vmolt(u3_noun som, u3i_molt_pair pairs[], c3_z len_z) /* u3i_blob(): construct or intern a bob atom (blob reference). ** -** A bob atom is an indirect atom with the MSB of len_w set. -** [mug_h] is the 31-bit mug of the content (= blob directory name). -** [seq_w] is the sequence number within $pier/.urb/bob//. -** ** If a u3a_blob exists in blb_p with a live interned atom (atm_w != 0), ** returns the existing atom. Otherwise allocates a fresh bob atom and ** stores its offset in blb_u->atm_w (if a blb_p entry exists). */ u3_atom -u3i_blob(c3_h mug_h, c3_w seq_w) +u3i_blob(c3_h mug_h, c3_h seq_h) { - // blb_p lives on the home road - // u3_assert( &(u3H->rod_u) == u3R ); - // check blb_p for an existing interned atom + // bid is a direct atom on VERE64 (63 bits max) // - c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; - u3_noun bid = u3i_chub(bid_d); - u3_weak bv = u3h_get(u3H->blb_p, bid); + c3_w bid = ((c3_w)mug_h << 32) | (c3_w)seq_h; + u3_weak bv = u3h_get(u3H->blb_p, bid); if ( u3_none != bv ) { c3_w off_w = 0; @@ -863,7 +856,6 @@ u3i_blob(c3_h mug_h, c3_w seq_w) u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); if ( blb_u->atm_w ) { - u3z(bid); return u3k(u3a_to_pug(blb_u->atm_w)); } } @@ -874,21 +866,17 @@ u3i_blob(c3_h mug_h, c3_w seq_w) u3a_atom* vat_u = (void *)nov_w; vat_u->use_w = 1; - vat_u->mug_h = mug_h; + vat_u->mug_w = mug_h; vat_u->len_w = 1 | u3a_blob_flag; - vat_u->buf_w[0] = seq_w; + vat_u->buf_w[0] = seq_h; - c3_w atm_off_w = u3a_outa(nov_w); + c3_w atm_w = u3a_outa(nov_w); - // store in blb_p entry if one exists (created by %blob IPC handler) - // if ( u3_none != bv ) { c3_w off_w = 0; u3r_safe_word(bv, &off_w); - u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); - blb_u->atm_w = atm_off_w; + ((u3a_blob*)u3a_into(off_w))->atm_w = atm_w; } - u3z(bid); - return u3a_to_pug(atm_off_w); + return u3a_to_pug(atm_w); } diff --git a/pkg/noun/imprison.h b/pkg/noun/imprison.h index fe3da6a8ea..0663523094 100644 --- a/pkg/noun/imprison.h +++ b/pkg/noun/imprison.h @@ -95,13 +95,10 @@ u3i_bytes(c3_w a_w, const c3_y* b_y); - /* u3i_blob(): construct a bob atom (blob reference). - ** - ** [mug_h] is the 31-bit mug of the blob content (= blob directory name). - ** [seq_w] is the sequence number within $pier/.urb/bob//. + /* u3i_blob(): construct or intern a bob atom (blob reference). */ u3_atom - u3i_blob(c3_h mug_h, c3_w seq_w); + u3i_blob(c3_h mug_h, c3_h seq_h); /* u3i_words(): Copy [a] words from [b] into an atom. */ diff --git a/pkg/noun/jets/e/bytestream.c b/pkg/noun/jets/e/bytestream.c index 70eae0a62d..7733e01c87 100644 --- a/pkg/noun/jets/e/bytestream.c +++ b/pkg/noun/jets/e/bytestream.c @@ -33,7 +33,7 @@ _x_octs(u3_noun octs, u3_atom* p_octs, u3_atom* q_octs) { // zero bytes (p_octs - met). // // The legacy version read (c3_y*)ptr_a->buf_w directly, which for a -// bob atom returned seq_w rather than the blob's content. Going +// bob atom returned seq_h rather than the blob's content. Going // through u3r_view gets the real bytes (mmap for bobs). // static c3_o diff --git a/pkg/noun/options.h b/pkg/noun/options.h index ea34d901e0..69cb435d85 100644 --- a/pkg/noun/options.h +++ b/pkg/noun/options.h @@ -23,7 +23,7 @@ void (*slog_f)(u3_noun); // function pointer for slog void (*sign_hold_f)(void); // suspend system signal regime void (*sign_move_f)(void); // restore system signal regime - void (*blob_del_f)(c3_h, c3_w); // blob uninstall: king=release lease, mars=delete file + void (*blob_del_f)(c3_h, c3_h); // blob uninstall: king=release lease, mars=delete file } u3o_config; /* u3o_flag: process/system flags. diff --git a/pkg/noun/retrieve.c b/pkg/noun/retrieve.c index 964875f0d6..1b69ab1bb3 100644 --- a/pkg/noun/retrieve.c +++ b/pkg/noun/retrieve.c @@ -286,7 +286,7 @@ _cr_sing_atom(u3_atom a, u3_noun b) if ( (c3y == a_bob) && (c3y == b_bob) ) { u3a_atom* a_u = u3a_to_ptr(a); u3a_atom* b_u = u3a_to_ptr(b); - return ( (a_u->mug_h == b_u->mug_h) + return ( (a_u->mug_w == b_u->mug_w) && (a_u->buf_w[0] == b_u->buf_w[0]) ) ? c3y : c3n; } // bob vs normal (or normal vs bob): materialize the bob @@ -2224,8 +2224,8 @@ _cr_mug_next(u3a_pile* pil_u, u3_noun veb) // materialize only if somehow missing (should not occur) // if ( c3y == u3a_is_bob(veb) ) { - if ( vat_u->mug_h ) { - return (c3_h)vat_u->mug_h; + if ( vat_u->mug_w ) { + return (c3_h)vat_u->mug_w; } u3_atom mat = u3r_blob_load(veb, u3C.dir_c); if ( u3_none == mat ) { @@ -2237,7 +2237,7 @@ _cr_mug_next(u3a_pile* pil_u, u3_noun veb) else { mug_h = u3r_mug_words(vat_u->buf_w, vat_u->len_w); } - vat_u->mug_h = mug_h; + vat_u->mug_w = mug_h; return mug_h; } // veb is a cell, push a stack frame to mark head-recursion @@ -2522,13 +2522,13 @@ u3r_blob_load(u3_atom a, const c3_c* pax_c) u3_assert( c3y == u3a_is_bob(a) ); c3_h mug_h = u3a_bob_mug(a); - c3_w seq_w = u3a_bob_seq(a); + c3_h seq_h = u3a_bob_seq(a); // build path: $pier/.urb/bob// // c3_c fil_c[8192]; - snprintf(fil_c, sizeof(fil_c), "%s/.urb/bob/%" PRIc3_h "/%" PRIc3_w, - pax_c, mug_h, seq_w); + snprintf(fil_c, sizeof(fil_c), "%s/.urb/bob/%" PRIc3_h "/%" PRIc3_h, + pax_c, mug_h, seq_h); struct stat st_u; if ( -1 == stat(fil_c, &st_u) ) { @@ -2580,11 +2580,11 @@ u3r_blob_map(u3_atom a, c3_d* len_d) u3_assert( c3y == u3a_is_bob(a) ); c3_h mug_h = u3a_bob_mug(a); - c3_w seq_w = u3a_bob_seq(a); + c3_h seq_h = u3a_bob_seq(a); c3_c fil_c[8192]; - snprintf(fil_c, sizeof(fil_c), "%s/.urb/bob/%" PRIc3_h "/%" PRIc3_w, - u3C.dir_c, mug_h, seq_w); + snprintf(fil_c, sizeof(fil_c), "%s/.urb/bob/%" PRIc3_h "/%" PRIc3_h, + u3C.dir_c, mug_h, seq_h); struct stat st_u; if ( -1 == stat(fil_c, &st_u) ) { diff --git a/pkg/noun/retrieve_tests.c b/pkg/noun/retrieve_tests.c index f3cec6620d..3ee46fb566 100644 --- a/pkg/noun/retrieve_tests.c +++ b/pkg/noun/retrieve_tests.c @@ -1212,7 +1212,7 @@ _test_view(void) snprintf(pax_c, sizeof(pax_c), "%s/.urb/bob", dir_c); mkdir(pax_c, 0755); const c3_h mug_h = 0xabcd1234; - const c3_w seq_w = 7; + const c3_h seq_h = 7; snprintf(pax_c, sizeof(pax_c), "%s/.urb/bob/%u", dir_c, (unsigned)mug_h); mkdir(pax_c, 0755); @@ -1221,7 +1221,7 @@ _test_view(void) const c3_d bob_d = sizeof(bob_y) - 1; snprintf(pax_c, sizeof(pax_c), "%s/.urb/bob/%u/%u", - dir_c, (unsigned)mug_h, (unsigned)seq_w); + dir_c, (unsigned)mug_h, (unsigned)seq_h); FILE* fil_f = fopen(pax_c, "wb"); if ( !fil_f ) { fprintf(stderr, "_test_view(): fopen %s: %s\r\n", pax_c, strerror(errno)); @@ -1234,7 +1234,7 @@ _test_view(void) // u3C.dir_c = dir_c; - u3_atom a = u3i_blob(mug_h, seq_w); + u3_atom a = u3i_blob(mug_h, seq_h); u3r_view vu_u; u3r_view_init(&vu_u, a); diff --git a/pkg/noun/serial.c b/pkg/noun/serial.c index 761c5ab49e..25a2671a88 100644 --- a/pkg/noun/serial.c +++ b/pkg/noun/serial.c @@ -962,7 +962,7 @@ u3s_cue_atom(u3_atom a) } // bob atom: mmap the backing file instead of dereferencing buf_w - // (which for a bob would yield seq_w). The view stays live for + // (which for a bob would yield seq_h). The view stays live for // the whole cue so the bitstream reader can scan freely. // if ( c3y == u3a_is_bob(a) ) { @@ -1610,7 +1610,7 @@ static inline void _cs_ram_bsw_bob(ur_bsw_t* rit_u, u3_atom a) { c3_h mug_h = u3a_bob_mug(a); - c3_w seq_w = u3a_bob_seq(a); + c3_h seq_h = u3a_bob_seq(a); // write 2-bit tag 01 // @@ -1619,7 +1619,7 @@ _cs_ram_bsw_bob(ur_bsw_t* rit_u, u3_atom a) // write mat(mug) and mat(seq) // ur_bsw_mat64(rit_u, u3r_met(0, (u3_atom)mug_h), (c3_d)mug_h); - ur_bsw_mat64(rit_u, u3r_met(0, (u3_atom)seq_w), (c3_d)seq_w); + ur_bsw_mat64(rit_u, u3r_met(0, (u3_atom)seq_h), (c3_d)seq_h); } /* _cs_ram_bsw_back(): encode a backref as tag 11 + mat(bit-position). @@ -1855,7 +1855,7 @@ _cs_tap_xeno_next(u3a_pile* pil_u, } seq_d = ur_bsr64_any(red_u, len_d); - *out = u3i_blob((c3_h)mug_d, (c3_w)seq_d); + *out = u3i_blob((c3_h)mug_d, (c3_h)seq_d); ur_dictn_put(rot_u, dic_u, bit_d, *out); return ur_cue_good; diff --git a/pkg/noun/serial_tests.c b/pkg/noun/serial_tests.c index cdbf9d0033..57e2fbdd2f 100644 --- a/pkg/noun/serial_tests.c +++ b/pkg/noun/serial_tests.c @@ -396,13 +396,13 @@ _ram_cleanup_tmp(void) } static c3_o -_ram_make_blob(c3_h mug_h, c3_w seq_w, const c3_y* dat_y, c3_d len_d) +_ram_make_blob(c3_h mug_h, c3_h seq_h, const c3_y* dat_y, c3_d len_d) { c3_c pax_c[2048]; snprintf(pax_c, sizeof(pax_c), "%s/.urb/bob/%" PRIc3_h, _ram_tmp_dir, mug_h); mkdir(pax_c, 0755); - snprintf(pax_c, sizeof(pax_c), "%s/.urb/bob/%" PRIc3_h "/%" PRIc3_w, - _ram_tmp_dir, mug_h, seq_w); + snprintf(pax_c, sizeof(pax_c), "%s/.urb/bob/%" PRIc3_h "/%" PRIc3_h, + _ram_tmp_dir, mug_h, seq_h); FILE* fil_f = fopen(pax_c, "wb"); if ( !fil_f ) { fprintf(stderr, "serial_tests: fopen %s: %s\r\n", pax_c, strerror(errno)); @@ -490,7 +490,7 @@ _test_ram_bob_roundtrip(void) || (u3a_bob_seq(out) != 1) ) { fprintf(stderr, "\033[31mram bob solo fail: mug/seq mismatch " - "(got %" PRIc3_h "/%" PRIc3_w ")\033[0m\r\n", + "(got %" PRIc3_h "/%" PRIc3_h ")\033[0m\r\n", u3a_bob_mug(out), u3a_bob_seq(out)); ret_i = 0; } diff --git a/pkg/noun/vortex.c b/pkg/noun/vortex.c index 86868be1d2..64bb9f10f3 100644 --- a/pkg/noun/vortex.c +++ b/pkg/noun/vortex.c @@ -404,6 +404,12 @@ u3v_reclaim(void) if ( &(u3H->rod_u) == u3R ) { u3z(u3A->yot); u3A->yot = u3_nul; + + // // clear ford cache to release refs (e.g., blob atoms held + // // by cached computation results after |tomb) + // // + // u3h_free(u3R->cax.for_p); + // u3R->cax.for_p = u3h_new_cache(u3C.per_w); } } diff --git a/pkg/vere/blob.c b/pkg/vere/blob.c index 943197d017..dbba934b87 100644 --- a/pkg/vere/blob.c +++ b/pkg/vere/blob.c @@ -45,10 +45,10 @@ _blob_lock_path(c3_c* out_c, const c3_c* pax_c, c3_h mug_h) /* u3_blob_path(): write filesystem path for a blob into [out_c]. */ void -u3_blob_path(c3_c* out_c, const c3_c* pax_c, c3_h mug_h, c3_w seq_w) +u3_blob_path(c3_c* out_c, const c3_c* pax_c, c3_h mug_h, c3_h seq_h) { - snprintf(out_c, 8192, "%s/.urb/bob/%" PRIc3_h "/%" PRIc3_w, - pax_c, mug_h, seq_w); + snprintf(out_c, 8192, "%s/.urb/bob/%" PRIc3_h "/%" PRIc3_h, + pax_c, mug_h, seq_h); } /* u3_blob_init(): initialize blob store; create .urb/bob/ if needed. @@ -121,7 +121,7 @@ u3_blob_stg_init(const c3_c* pax_c) ** Creates the mug directory and lockfile if needed. ** Returns 0 on failure. */ -static c3_w +static c3_h _blob_lock_acquire(const c3_c* pax_c, c3_h mug_h) { c3_c dir_c[8192]; @@ -161,9 +161,9 @@ _blob_lock_acquire(const c3_c* pax_c, c3_h mug_h) // read current next-seq (0 means empty/new file) c3_c buf_c[32] = {0}; ssize_t red_i = read(lok_i, buf_c, sizeof(buf_c) - 1); - c3_w nex_w = ( red_i > 0 ) ? (c3_w)strtoul(buf_c, 0, 10) : 1; - if ( 0 == nex_w ) { - nex_w = 1; + c3_h nex_h = ( red_i > 0 ) ? (c3_h)strtoul(buf_c, 0, 10) : 1; + if ( 0 == nex_h ) { + nex_h = 1; } // write incremented value back @@ -181,7 +181,7 @@ _blob_lock_acquire(const c3_c* pax_c, c3_h mug_h) } c3_c wri_c[32]; - snprintf(wri_c, sizeof(wri_c), "%" PRIc3_w, nex_w + 1); + snprintf(wri_c, sizeof(wri_c), "%" PRIc3_h, nex_h + 1); if ( -1 == write(lok_i, wri_c, strlen(wri_c)) ) { fprintf(stderr, "blob: failed to write lock %s: %s\r\n", lck_c, strerror(errno)); @@ -193,20 +193,20 @@ _blob_lock_acquire(const c3_c* pax_c, c3_h mug_h) fsync(lok_i); close(lok_i); - return nex_w; + return nex_h; } /* _blob_dedup(): scan bucket for byte-equal content. ** ** Returns the sequence number of an existing equal blob, or 0 if none. */ -static c3_w -_blob_dedup(const c3_c* pax_c, c3_h mug_h, c3_w max_w, +static c3_h +_blob_dedup(const c3_c* pax_c, c3_h mug_h, c3_h max_h, const c3_y* dat_y, c3_d len_d) { - for ( c3_w seq_w = 1; seq_w < max_w; seq_w++ ) { + for ( c3_h seq_h = 1; seq_h < max_h; seq_h++ ) { c3_c fil_c[8192]; - u3_blob_path(fil_c, pax_c, mug_h, seq_w); + u3_blob_path(fil_c, pax_c, mug_h, seq_h); struct stat st_u; if ( -1 == stat(fil_c, &st_u) ) { @@ -241,7 +241,7 @@ _blob_dedup(const c3_c* pax_c, c3_h mug_h, c3_w max_w, close(fid_i); if ( c3y == eql_o ) { - return seq_w; + return seq_h; } } return 0; @@ -284,20 +284,20 @@ u3_blob_save(const c3_c* pax_c, const c3_y* dat_y, c3_d len_d, c3_h* mug_h, - c3_w* seq_w) + c3_h* seq_h) { *mug_h = _blob_mug(dat_y, len_d); // acquire lock and get next sequence number - c3_w nex_w = _blob_lock_acquire(pax_c, *mug_h); - if ( 0 == nex_w ) { + c3_h nex_h = _blob_lock_acquire(pax_c, *mug_h); + if ( 0 == nex_h ) { return c3n; } // check for duplicate before writing - c3_w dup_w = _blob_dedup(pax_c, *mug_h, nex_w, dat_y, len_d); - if ( 0 != dup_w ) { - *seq_w = dup_w; + c3_h dup_h = _blob_dedup(pax_c, *mug_h, nex_h, dat_y, len_d); + if ( 0 != dup_h ) { + *seq_h = dup_h; // we already incremented the lock counter, but that's harmless — // nex_w slot will simply be skipped (sparse sequence numbers are fine) return c3y; @@ -305,7 +305,7 @@ u3_blob_save(const c3_c* pax_c, // write blob file c3_c fil_c[8192]; - u3_blob_path(fil_c, pax_c, *mug_h, nex_w); + u3_blob_path(fil_c, pax_c, *mug_h, nex_h); c3_i fid_i = open(fil_c, O_WRONLY | O_CREAT | O_EXCL, 0400); if ( -1 == fid_i ) { @@ -333,7 +333,7 @@ u3_blob_save(const c3_c* pax_c, fsync(fid_i); close(fid_i); - *seq_w = nex_w; + *seq_h = nex_h; return c3y; } @@ -348,7 +348,7 @@ u3_blob_save_fd(const c3_c* pax_c, c3_i fid_i, c3_d len_d, c3_h* mug_h, - c3_w* seq_w) + c3_h* seq_h) { if ( 0 == len_d ) { fprintf(stderr, "blob: refusing to save empty file\r\n"); @@ -363,7 +363,7 @@ u3_blob_save_fd(const c3_c* pax_c, } madvise(map_v, (size_t)len_d, MADV_SEQUENTIAL); - c3_o ret_o = u3_blob_save(pax_c, (const c3_y*)map_v, len_d, mug_h, seq_w); + c3_o ret_o = u3_blob_save(pax_c, (const c3_y*)map_v, len_d, mug_h, seq_h); munmap(map_v, (size_t)len_d); return ret_o; } @@ -374,15 +374,15 @@ u3_blob_save_fd(const c3_c* pax_c, ** The mapping is released immediately after the loom copy. */ u3_weak -u3_blob_load(const c3_c* pax_c, c3_h mug_h, c3_w seq_w) +u3_blob_load(const c3_c* pax_c, c3_h mug_h, c3_h seq_h) { c3_c fil_c[8192]; - u3_blob_path(fil_c, pax_c, mug_h, seq_w); + u3_blob_path(fil_c, pax_c, mug_h, seq_h); struct stat st_u; if ( -1 == stat(fil_c, &st_u) ) { - fprintf(stderr, "blob: missing blob %" PRIc3_h "/%" PRIc3_w ": %s\r\n", - mug_h, seq_w, strerror(errno)); + fprintf(stderr, "blob: missing blob %" PRIc3_h "/%" PRIc3_h ": %s\r\n", + mug_h, seq_h, strerror(errno)); return u3_none; } @@ -422,22 +422,22 @@ u3_blob_load(const c3_c* pax_c, c3_h mug_h, c3_w seq_w) /* u3_blob_exists(): check whether a blob file exists. */ c3_o -u3_blob_exists(const c3_c* pax_c, c3_h mug_h, c3_w seq_w) +u3_blob_live(const c3_c* pax_c, c3_h mug_h, c3_h seq_h) { c3_c fil_c[8192]; - u3_blob_path(fil_c, pax_c, mug_h, seq_w); + u3_blob_path(fil_c, pax_c, mug_h, seq_h); struct stat st_u; return ( 0 == stat(fil_c, &st_u) ) ? c3y : c3n; } -/* u3_blob_delete(): delete a blob file. +/* u3_blob_wipe(): delete a blob file. */ void -u3_blob_delete(const c3_c* pax_c, c3_h mug_h, c3_w seq_w) +u3_blob_wipe(const c3_c* pax_c, c3_h mug_h, c3_h seq_h) { c3_c fil_c[8192]; - u3_blob_path(fil_c, pax_c, mug_h, seq_w); + u3_blob_path(fil_c, pax_c, mug_h, seq_h); if ( 0 != unlink(fil_c) && ENOENT != errno ) { fprintf(stderr, "blob: failed to delete %s: %s\r\n", @@ -484,16 +484,16 @@ u3_blob_delete(const c3_c* pax_c, c3_h mug_h, c3_w seq_w) ** [stg_c] is the path to a temp file under $pier/.urb/bob/stg/. ** Computes the mug of its content, checks for duplicates, then either ** renames the staging file into bob// (no dup) or unlinks it -** (dup found). On success sets *mug_h and *seq_w. +** (dup found). On success sets *mug_h and *seq_h. ** ** The staging file is always consumed (renamed or unlinked) on success. ** On failure the staging file is left in place. */ c3_o -u3_blob_install_stg(const c3_c* pax_c, +u3_blob_move_stg(const c3_c* pax_c, const c3_c* stg_c, c3_h* mug_h, - c3_w* seq_w) + c3_h* seq_h) { struct stat st_u; if ( -1 == stat(stg_c, &st_u) ) { @@ -531,30 +531,30 @@ u3_blob_install_stg(const c3_c* pax_c, // acquire mug-bucket lock and get next sequence number // - c3_w nex_w = _blob_lock_acquire(pax_c, *mug_h); - if ( 0 == nex_w ) { + c3_h nex_h = _blob_lock_acquire(pax_c, *mug_h); + if ( 0 == nex_h ) { munmap(map_v, (size_t)len_d); return c3n; } // check for duplicate content // - c3_w dup_w = _blob_dedup(pax_c, *mug_h, nex_w, + c3_h dup_h = _blob_dedup(pax_c, *mug_h, nex_h, (const c3_y*)map_v, len_d); munmap(map_v, (size_t)len_d); - if ( 0 != dup_w ) { + if ( 0 != dup_h ) { // duplicate found — consume staging file and return existing seq // c3_unlink(stg_c); - *seq_w = dup_w; + *seq_h = dup_h; return c3y; } // rename staging file into final location // c3_c dst_c[8192]; - u3_blob_path(dst_c, pax_c, *mug_h, nex_w); + u3_blob_path(dst_c, pax_c, *mug_h, nex_h); if ( 0 != rename(stg_c, dst_c) ) { // rename can fail cross-device; fall back to copy-and-unlink @@ -589,17 +589,17 @@ u3_blob_install_stg(const c3_c* pax_c, c3_unlink(stg_c); } - *seq_w = nex_w; + *seq_h = nex_h; return c3y; } /* u3_blob_map(): mmap blob file for direct byte access. */ const c3_y* -u3_blob_map(const c3_c* pax_c, c3_h mug_h, c3_w seq_w, c3_d* len_d) +u3_blob_mmap(const c3_c* pax_c, c3_h mug_h, c3_h seq_h, c3_d* len_d) { c3_c fil_c[8192]; - u3_blob_path(fil_c, pax_c, mug_h, seq_w); + u3_blob_path(fil_c, pax_c, mug_h, seq_h); struct stat st_u; if ( -1 == stat(fil_c, &st_u) ) { @@ -635,7 +635,7 @@ u3_blob_map(const c3_c* pax_c, c3_h mug_h, c3_w seq_w, c3_d* len_d) /* u3_blob_unmap(): release mapping returned by u3_blob_map(). */ void -u3_blob_unmap(const c3_y* ptr_y, c3_d len_d) +u3_blob_umap(const c3_y* ptr_y, c3_d len_d) { if ( ptr_y && len_d ) { munmap((void*)ptr_y, (size_t)len_d); @@ -649,10 +649,10 @@ u3_blob_unmap(const c3_y* ptr_y, c3_d len_d) ** Returns 0 if blob is missing, empty, or all-zero bytes. */ c3_d -u3_blob_met(const c3_c* pax_c, c3_h mug_h, c3_w seq_w) +u3_blob_met(const c3_c* pax_c, c3_h mug_h, c3_h seq_h) { c3_c fil_c[8192]; - u3_blob_path(fil_c, pax_c, mug_h, seq_w); + u3_blob_path(fil_c, pax_c, mug_h, seq_h); struct stat st_u; if ( -1 == stat(fil_c, &st_u) || 0 == st_u.st_size ) { diff --git a/pkg/vere/blob.h b/pkg/vere/blob.h index db234f89e8..1e12eb98ba 100644 --- a/pkg/vere/blob.h +++ b/pkg/vere/blob.h @@ -36,59 +36,59 @@ /* u3_blob_save(): write bytes to blob store. ** ** Deduplicates within the mug bucket (byte-for-byte comparison). - ** On success, returns c3y and sets *mug_h and *seq_w. + ** On success, returns c3y and sets *mug_h and *seq_h. */ c3_o u3_blob_save(const c3_c* pax_c, const c3_y* dat_y, c3_d len_d, c3_h* mug_h, - c3_w* seq_w); + c3_h* seq_h); /* u3_blob_save_fd(): streaming write from open file descriptor. ** ** Reads [len_d] bytes from [fid_i], writes to blob store. ** Avoids double-buffering for large file ingestion. - ** On success, returns c3y and sets *mug_h and *seq_w. + ** On success, returns c3y and sets *mug_h and *seq_h. */ c3_o u3_blob_save_fd(const c3_c* pax_c, c3_i fid_i, c3_d len_d, c3_h* mug_h, - c3_w* seq_w); + c3_h* seq_h); /* u3_blob_load(): read blob into a loom atom. ** ** Returns u3_none on failure. */ u3_weak - u3_blob_load(const c3_c* pax_c, c3_h mug_h, c3_w seq_w); + u3_blob_load(const c3_c* pax_c, c3_h mug_h, c3_h seq_h); - /* u3_blob_exists(): check whether a blob file exists. + /* u3_blob_live(): check whether a blob file exists. */ c3_o - u3_blob_exists(const c3_c* pax_c, c3_h mug_h, c3_w seq_w); + u3_blob_live(const c3_c* pax_c, c3_h mug_h, c3_h seq_h); - /* u3_blob_delete(): delete a blob file. + /* u3_blob_wipe(): delete a blob file. ** ** Called when a bob atom's total refcount reaches zero. */ void - u3_blob_delete(const c3_c* pax_c, c3_h mug_h, c3_w seq_w); + u3_blob_wipe(const c3_c* pax_c, c3_h mug_h, c3_h seq_h); - /* u3_blob_install_stg(): install a staging file into the blob store. + /* u3_blob_move_stg(): install a staging file into the blob store. ** ** [stg_c] is the path of a temp file in $pier/.urb/bob/stg/. ** Computes mug, deduplicates, then rename(2)s into bob//. ** The staging file is always consumed on success. - ** On success, returns c3y and sets *mug_h and *seq_w. + ** On success, returns c3y and sets *mug_h and *seq_h. */ c3_o - u3_blob_install_stg(const c3_c* pax_c, + u3_blob_move_stg(const c3_c* pax_c, const c3_c* stg_c, c3_h* mug_h, - c3_w* seq_w); + c3_h* seq_h); /* u3_blob_path(): write filesystem path for a blob into [out_c]. ** @@ -98,7 +98,7 @@ u3_blob_path(c3_c* out_c, const c3_c* pax_c, c3_h mug_h, - c3_w seq_w); + c3_h seq_h); /* u3_blob_map(): mmap a blob file for direct byte access. ** @@ -107,12 +107,12 @@ ** No loom allocation is performed. */ const c3_y* - u3_blob_map(const c3_c* pax_c, c3_h mug_h, c3_w seq_w, c3_d* len_d); + u3_blob_mmap(const c3_c* pax_c, c3_h mug_h, c3_h seq_h, c3_d* len_d); /* u3_blob_unmap(): release a mapping returned by u3_blob_map(). */ void - u3_blob_unmap(const c3_y* ptr_y, c3_d len_d); + u3_blob_umap(const c3_y* ptr_y, c3_d len_d); /* u3_blob_met(): compute the bit-length of a blob without full materialization. ** @@ -121,6 +121,6 @@ ** Returns 0 on error (blob missing or empty). */ c3_d - u3_blob_met(const c3_c* pax_c, c3_h mug_h, c3_w seq_w); + u3_blob_met(const c3_c* pax_c, c3_h mug_h, c3_h seq_h); #endif /* ifndef U3_VERE_BLOB_H */ diff --git a/pkg/vere/blob_tests.c b/pkg/vere/blob_tests.c index 6312a57cd6..911f144e9e 100644 --- a/pkg/vere/blob_tests.c +++ b/pkg/vere/blob_tests.c @@ -191,35 +191,35 @@ _test_save_load(void) const c3_y dat_y[] = "the quick brown fox jumps over the lazy dog"; const c3_d dat_d = sizeof(dat_y) - 1; // drop trailing NUL c3_h mug_h = 0; - c3_w seq_w = 0; + c3_h seq_h = 0; - if ( c3y != u3_blob_save(_tmp_pier, dat_y, dat_d, &mug_h, &seq_w) ) { + if ( c3y != u3_blob_save(_tmp_pier, dat_y, dat_d, &mug_h, &seq_h) ) { fprintf(stderr, "\033[31mblob save fail\033[0m\r\n"); exit(1); } - if ( 1 != seq_w ) { - fprintf(stderr, "\033[31mblob save: expected seq=1, got %" PRIc3_w "\033[0m\r\n", - seq_w); + if ( 1 != seq_h ) { + fprintf(stderr, "\033[31mblob save: expected seq=1, got %" PRIc3_h "\033[0m\r\n", + seq_h); exit(1); } // file should exist at computed path // c3_c fil_c[8192]; - u3_blob_path(fil_c, _tmp_pier, mug_h, seq_w); + u3_blob_path(fil_c, _tmp_pier, mug_h, seq_h); if ( c3y != _path_exists(fil_c) ) { fprintf(stderr, "\033[31mblob save: %s missing\033[0m\r\n", fil_c); exit(1); } - if ( c3y != u3_blob_exists(_tmp_pier, mug_h, seq_w) ) { + if ( c3y != u3_blob_live(_tmp_pier, mug_h, seq_h) ) { fprintf(stderr, "\033[31mblob exists fail\033[0m\r\n"); exit(1); } // load and verify bytes // - u3_weak atm = u3_blob_load(_tmp_pier, mug_h, seq_w); + u3_weak atm = u3_blob_load(_tmp_pier, mug_h, seq_h); if ( u3_none == atm ) { fprintf(stderr, "\033[31mblob load: u3_none\033[0m\r\n"); exit(1); @@ -254,13 +254,13 @@ _test_dedup(void) const c3_d dat_d = sizeof(dat_y) - 1; c3_h mug1_h, mug2_h; - c3_w seq1_w, seq2_w; + c3_h seq1_h, seq2_h; - if ( c3y != u3_blob_save(_tmp_pier, dat_y, dat_d, &mug1_h, &seq1_w) ) { + if ( c3y != u3_blob_save(_tmp_pier, dat_y, dat_d, &mug1_h, &seq1_h) ) { fprintf(stderr, "\033[31mblob dedup: first save failed\033[0m\r\n"); exit(1); } - if ( c3y != u3_blob_save(_tmp_pier, dat_y, dat_d, &mug2_h, &seq2_w) ) { + if ( c3y != u3_blob_save(_tmp_pier, dat_y, dat_d, &mug2_h, &seq2_h) ) { fprintf(stderr, "\033[31mblob dedup: second save failed\033[0m\r\n"); exit(1); } @@ -270,9 +270,9 @@ _test_dedup(void) " vs %" PRIc3_h ")\033[0m\r\n", mug1_h, mug2_h); exit(1); } - if ( seq1_w != seq2_w ) { + if ( seq1_h != seq2_h ) { fprintf(stderr, "\033[31mblob dedup: expected seq reuse, " - "got %" PRIc3_w "+%" PRIc3_w "\033[0m\r\n", seq1_w, seq2_w); + "got %" PRIc3_h "+%" PRIc3_h "\033[0m\r\n", seq1_h, seq2_h); exit(1); } @@ -282,13 +282,13 @@ _test_dedup(void) const c3_y alt_y[] = "a completely different payload"; const c3_d alt_d = sizeof(alt_y) - 1; c3_h mug3_h = 0; - c3_w seq3_w = 0; - if ( c3y != u3_blob_save(_tmp_pier, alt_y, alt_d, &mug3_h, &seq3_w) ) { + c3_h seq3_h = 0; + if ( c3y != u3_blob_save(_tmp_pier, alt_y, alt_d, &mug3_h, &seq3_h) ) { fprintf(stderr, "\033[31mblob dedup: alt save failed\033[0m\r\n"); exit(1); } if ( mug1_h == mug3_h - && seq1_w == seq3_w ) + && seq1_h == seq3_h ) { fprintf(stderr, "\033[31mblob dedup: distinct content got same blob\033[0m\r\n"); exit(1); @@ -325,8 +325,8 @@ _test_save_fd(void) } c3_h mug_h = 0; - c3_w seq_w = 0; - c3_o ret_o = u3_blob_save_fd(_tmp_pier, fid_i, dat_d, &mug_h, &seq_w); + c3_h seq_h = 0; + c3_o ret_o = u3_blob_save_fd(_tmp_pier, fid_i, dat_d, &mug_h, &seq_h); close(fid_i); if ( c3y != ret_o ) { @@ -340,8 +340,8 @@ _test_save_fd(void) fclose(ef); // truncate to zero c3_i efid_i = open(src_c, O_RDONLY); c3_h emh = 0; - c3_w esw = 0; - if ( c3n != u3_blob_save_fd(_tmp_pier, efid_i, 0, &emh, &esw) ) { + c3_h esh = 0; + if ( c3n != u3_blob_save_fd(_tmp_pier, efid_i, 0, &emh, &esh) ) { fprintf(stderr, "\033[31mblob save_fd: should reject empty\033[0m\r\n"); exit(1); } @@ -349,7 +349,7 @@ _test_save_fd(void) // verify loaded content matches // - u3_weak atm = u3_blob_load(_tmp_pier, mug_h, seq_w); + u3_weak atm = u3_blob_load(_tmp_pier, mug_h, seq_h); if ( u3_none == atm ) { fprintf(stderr, "\033[31mblob save_fd: load u3_none\033[0m\r\n"); exit(1); @@ -378,11 +378,11 @@ _test_delete_empty_bucket(void) const c3_y dat_y[] = "ephemeral blob"; c3_h mug_h = 0; - c3_w seq_w = 0; - u3_blob_save(_tmp_pier, dat_y, sizeof(dat_y) - 1, &mug_h, &seq_w); + c3_h seq_h = 0; + u3_blob_save(_tmp_pier, dat_y, sizeof(dat_y) - 1, &mug_h, &seq_h); c3_c fil_c[8192], dir_c[8192]; - u3_blob_path(fil_c, _tmp_pier, mug_h, seq_w); + u3_blob_path(fil_c, _tmp_pier, mug_h, seq_h); snprintf(dir_c, sizeof(dir_c), "%s/.urb/bob/%" PRIc3_h, _tmp_pier, mug_h); if ( c3y != _path_exists(dir_c) ) { @@ -390,14 +390,14 @@ _test_delete_empty_bucket(void) exit(1); } - u3_blob_delete(_tmp_pier, mug_h, seq_w); + u3_blob_wipe(_tmp_pier, mug_h, seq_h); if ( c3y == _path_exists(fil_c) ) { fprintf(stderr, "\033[31mblob delete: file %s still exists\033[0m\r\n", fil_c); exit(1); } - if ( c3y == u3_blob_exists(_tmp_pier, mug_h, seq_w) ) { + if ( c3y == u3_blob_live(_tmp_pier, mug_h, seq_h) ) { fprintf(stderr, "\033[31mblob delete: exists() still true\033[0m\r\n"); exit(1); } @@ -409,7 +409,7 @@ _test_delete_empty_bucket(void) // deleting a nonexistent blob is a no-op (no error) // - u3_blob_delete(_tmp_pier, 0xdeadbeef, 999); + u3_blob_wipe(_tmp_pier, 0xdeadbeef, 999); _tmp_clean(); fprintf(stderr, "test blob delete (empty bucket): ok\r\n"); @@ -440,14 +440,14 @@ _test_install_stg(void) } c3_h mug_h = 0; - c3_w seq_w = 0; - if ( c3y != u3_blob_install_stg(_tmp_pier, stg_c, &mug_h, &seq_w) ) { + c3_h seq_h = 0; + if ( c3y != u3_blob_move_stg(_tmp_pier, stg_c, &mug_h, &seq_h) ) { fprintf(stderr, "\033[31mblob install_stg failed\033[0m\r\n"); exit(1); } - if ( 1 != seq_w ) { - fprintf(stderr, "\033[31mblob install_stg: expected seq=1, got %" PRIc3_w - "\033[0m\r\n", seq_w); + if ( 1 != seq_h ) { + fprintf(stderr, "\033[31mblob install_stg: expected seq=1, got %" PRIc3_h + "\033[0m\r\n", seq_h); exit(1); } @@ -458,14 +458,14 @@ _test_install_stg(void) exit(1); } - if ( c3y != u3_blob_exists(_tmp_pier, mug_h, seq_w) ) { + if ( c3y != u3_blob_live(_tmp_pier, mug_h, seq_h) ) { fprintf(stderr, "\033[31mblob install_stg: blob not present after install\033[0m\r\n"); exit(1); } // content preserved // - u3_weak atm = u3_blob_load(_tmp_pier, mug_h, seq_w); + u3_weak atm = u3_blob_load(_tmp_pier, mug_h, seq_h); if ( u3_none == atm ) { fprintf(stderr, "\033[31mblob install_stg: load u3_none\033[0m\r\n"); exit(1); @@ -500,24 +500,24 @@ _test_install_stg_dedup(void) // first save via u3_blob_save // c3_h mug1_h = 0; - c3_w seq1_w = 0; - u3_blob_save(_tmp_pier, dat_y, dat_d, &mug1_h, &seq1_w); + c3_h seq1_h = 0; + u3_blob_save(_tmp_pier, dat_y, dat_d, &mug1_h, &seq1_h); // then stage same content and install // c3_c* stg_c = _write_tmp_file(dat_y, dat_d); c3_h mug2_h = 0; - c3_w seq2_w = 0; - if ( c3y != u3_blob_install_stg(_tmp_pier, stg_c, &mug2_h, &seq2_w) ) { + c3_h seq2_h = 0; + if ( c3y != u3_blob_move_stg(_tmp_pier, stg_c, &mug2_h, &seq2_h) ) { fprintf(stderr, "\033[31mblob install_stg dedup: install failed\033[0m\r\n"); exit(1); } - if ( mug1_h != mug2_h || seq1_w != seq2_w ) { + if ( mug1_h != mug2_h || seq1_h != seq2_h ) { fprintf(stderr, "\033[31mblob install_stg dedup: expected %" - PRIc3_h "/%" PRIc3_w ", got %" PRIc3_h "/%" PRIc3_w - "\033[0m\r\n", mug1_h, seq1_w, mug2_h, seq2_w); + PRIc3_h "/%" PRIc3_h ", got %" PRIc3_h "/%" PRIc3_h + "\033[0m\r\n", mug1_h, seq1_h, mug2_h, seq2_h); exit(1); } @@ -530,14 +530,14 @@ _test_install_stg_dedup(void) // reject missing and empty staging files // - c3_h m = 0; c3_w s = 0; - if ( c3n != u3_blob_install_stg(_tmp_pier, "/no/such/path", &m, &s) ) { + c3_h m = 0; c3_h s = 0; + if ( c3n != u3_blob_move_stg(_tmp_pier, "/no/such/path", &m, &s) ) { fprintf(stderr, "\033[31mblob install_stg: should reject missing file\033[0m\r\n"); exit(1); } c3_c* empty_c = _write_tmp_file((const c3_y*)"", 0); - if ( c3n != u3_blob_install_stg(_tmp_pier, empty_c, &m, &s) ) { + if ( c3n != u3_blob_move_stg(_tmp_pier, empty_c, &m, &s) ) { fprintf(stderr, "\033[31mblob install_stg: should reject empty\033[0m\r\n"); exit(1); } @@ -570,17 +570,17 @@ _test_met(void) { const c3_y dat_y[] = { 0xab, 0xcd, 0xef, 0x01 }; const c3_d dat_d = sizeof(dat_y); - c3_h mug_h = 0; c3_w seq_w = 0; - u3_blob_save(_tmp_pier, dat_y, dat_d, &mug_h, &seq_w); + c3_h mug_h = 0; c3_h seq_h = 0; + u3_blob_save(_tmp_pier, dat_y, dat_d, &mug_h, &seq_h); - c3_d bit_d = u3_blob_met(_tmp_pier, mug_h, seq_w); + c3_d bit_d = u3_blob_met(_tmp_pier, mug_h, seq_h); if ( 25 != bit_d ) { fprintf(stderr, "\033[31mblob met: dense got %" PRIc3_d ", expected 25" "\033[0m\r\n", bit_d); exit(1); } - u3_weak atm = u3_blob_load(_tmp_pier, mug_h, seq_w); + u3_weak atm = u3_blob_load(_tmp_pier, mug_h, seq_h); if ( u3_none == atm ) { fprintf(stderr, "\033[31mblob met: load u3_none\033[0m\r\n"); exit(1); @@ -601,10 +601,10 @@ _test_met(void) { const c3_y dat_y[] = { 0xff, 0xff, 0x00, 0x00, 0x00 }; const c3_d dat_d = sizeof(dat_y); - c3_h mug_h = 0; c3_w seq_w = 0; - u3_blob_save(_tmp_pier, dat_y, dat_d, &mug_h, &seq_w); + c3_h mug_h = 0; c3_h seq_h = 0; + u3_blob_save(_tmp_pier, dat_y, dat_d, &mug_h, &seq_h); - c3_d bit_d = u3_blob_met(_tmp_pier, mug_h, seq_w); + c3_d bit_d = u3_blob_met(_tmp_pier, mug_h, seq_h); // 16 significant bits; high byte 0xff → 8 bits // total = 1*8 + 8 = 16 // @@ -628,7 +628,7 @@ _test_met(void) fprintf(stderr, "test blob met: ok\r\n"); } -/* _test_map(): u3_blob_map returns byte-accurate pointer. +/* _test_map(): u3_blob_mmap returns byte-accurate pointer. */ static void _test_map(void) @@ -639,11 +639,11 @@ _test_map(void) const c3_y dat_y[] = "mapped bytes should round-trip exactly"; const c3_d dat_d = sizeof(dat_y) - 1; - c3_h mug_h = 0; c3_w seq_w = 0; - u3_blob_save(_tmp_pier, dat_y, dat_d, &mug_h, &seq_w); + c3_h mug_h = 0; c3_h seq_h = 0; + u3_blob_save(_tmp_pier, dat_y, dat_d, &mug_h, &seq_h); c3_d mlen_d = 0; - const c3_y* map_y = u3_blob_map(_tmp_pier, mug_h, seq_w, &mlen_d); + const c3_y* map_y = u3_blob_mmap(_tmp_pier, mug_h, seq_h, &mlen_d); if ( !map_y ) { fprintf(stderr, "\033[31mblob map: returned NULL\033[0m\r\n"); exit(1); @@ -657,15 +657,15 @@ _test_map(void) fprintf(stderr, "\033[31mblob map: byte mismatch\033[0m\r\n"); exit(1); } - u3_blob_unmap(map_y, mlen_d); + u3_blob_umap(map_y, mlen_d); // mapping nonexistent returns NULL // c3_d dlen_d = 0; - const c3_y* miss_y = u3_blob_map(_tmp_pier, 0xdeadbeef, 999, &dlen_d); + const c3_y* miss_y = u3_blob_mmap(_tmp_pier, 0xdeadbeef, 999, &dlen_d); if ( miss_y ) { fprintf(stderr, "\033[31mblob map: missing should be NULL\033[0m\r\n"); - u3_blob_unmap(miss_y, dlen_d); + u3_blob_umap(miss_y, dlen_d); exit(1); } @@ -710,13 +710,13 @@ _test_lifecycle(void) const c3_d dat2_d = sizeof(dat2_y) - 1; c3_h mug1_h = 0, mug2_h = 0, mug3_h = 0; - c3_w seq1_w = 0, seq2_w = 0, seq3_w = 0; + c3_h seq1_h = 0, seq2_h = 0, seq3_h = 0; - if ( c3y != u3_blob_save(_tmp_pier, dat1_y, dat1_d, &mug1_h, &seq1_w) ) { + if ( c3y != u3_blob_save(_tmp_pier, dat1_y, dat1_d, &mug1_h, &seq1_h) ) { fprintf(stderr, "\033[31mlifecycle: save1 failed\033[0m\r\n"); exit(1); } - if ( c3y != u3_blob_save(_tmp_pier, dat2_y, dat2_d, &mug2_h, &seq2_w) ) { + if ( c3y != u3_blob_save(_tmp_pier, dat2_y, dat2_d, &mug2_h, &seq2_h) ) { fprintf(stderr, "\033[31mlifecycle: save2 failed\033[0m\r\n"); exit(1); } @@ -725,16 +725,16 @@ _test_lifecycle(void) // { c3_c* stg_c = _write_tmp_file(dat1_y, dat1_d); - if ( c3y != u3_blob_install_stg(_tmp_pier, stg_c, &mug3_h, &seq3_w) ) { + if ( c3y != u3_blob_move_stg(_tmp_pier, stg_c, &mug3_h, &seq3_h) ) { fprintf(stderr, "\033[31mlifecycle: install_stg failed\033[0m\r\n"); exit(1); } free(stg_c); } - if ( mug1_h != mug3_h || seq1_w != seq3_w ) { + if ( mug1_h != mug3_h || seq1_h != seq3_h ) { fprintf(stderr, "\033[31mlifecycle: install_stg should dedup; " - "got %" PRIc3_h "/%" PRIc3_w " vs %" PRIc3_h "/%" - PRIc3_w "\033[0m\r\n", mug3_h, seq3_w, mug1_h, seq1_w); + "got %" PRIc3_h "/%" PRIc3_h " vs %" PRIc3_h "/%" + PRIc3_h "\033[0m\r\n", mug3_h, seq3_h, mug1_h, seq1_h); exit(1); } @@ -744,8 +744,8 @@ _test_lifecycle(void) // // shape: [%blob-evt [bob1 bob2] bob1 42] // - u3_noun bob1 = u3i_blob(mug1_h, seq1_w); - u3_noun bob2 = u3i_blob(mug2_h, seq2_w); + u3_noun bob1 = u3i_blob(mug1_h, seq1_h); + u3_noun bob2 = u3i_blob(mug2_h, seq2_h); u3_noun ref = u3nq(c3__blob, u3nc(u3k(bob1), u3k(bob2)), u3k(bob1), @@ -819,19 +819,19 @@ _test_lifecycle(void) exit(1); } if ( u3a_bob_mug(bob1_d) != mug1_h - || u3a_bob_seq(bob1_d) != seq1_w ) + || u3a_bob_seq(bob1_d) != seq1_h ) { fprintf(stderr, "\033[31mlifecycle: bob1 mug/seq mismatch\033[0m\r\n"); exit(1); } if ( u3a_bob_mug(b2) != mug1_h - || u3a_bob_seq(b2) != seq1_w ) + || u3a_bob_seq(b2) != seq1_h ) { fprintf(stderr, "\033[31mlifecycle: backref bob1 mug/seq mismatch\033[0m\r\n"); exit(1); } if ( u3a_bob_mug(bob2_d) != mug2_h - || u3a_bob_seq(bob2_d) != seq2_w ) + || u3a_bob_seq(bob2_d) != seq2_h ) { fprintf(stderr, "\033[31mlifecycle: bob2 mug/seq mismatch\033[0m\r\n"); exit(1); @@ -865,7 +865,7 @@ _test_lifecycle(void) // { c3_d bit_d = u3r_blob_met(bob1_d); - u3_weak mat = u3_blob_load(_tmp_pier, mug1_h, seq1_w); + u3_weak mat = u3_blob_load(_tmp_pier, mug1_h, seq1_h); if ( u3_none == mat ) { fprintf(stderr, "\033[31mlifecycle: u3_blob_load failed\033[0m\r\n"); exit(1); @@ -885,10 +885,10 @@ _test_lifecycle(void) // tear down and confirm blob files are deleted cleanly // - u3_blob_delete(_tmp_pier, mug1_h, seq1_w); - u3_blob_delete(_tmp_pier, mug2_h, seq2_w); - if ( c3y == u3_blob_exists(_tmp_pier, mug1_h, seq1_w) - || c3y == u3_blob_exists(_tmp_pier, mug2_h, seq2_w) ) + u3_blob_wipe(_tmp_pier, mug1_h, seq1_h); + u3_blob_wipe(_tmp_pier, mug2_h, seq2_h); + if ( c3y == u3_blob_live(_tmp_pier, mug1_h, seq1_h) + || c3y == u3_blob_live(_tmp_pier, mug2_h, seq2_h) ) { fprintf(stderr, "\033[31mlifecycle: blobs still present after delete\033[0m\r\n"); exit(1); diff --git a/pkg/vere/disk.c b/pkg/vere/disk.c index bc230e1de2..5cb1f19795 100644 --- a/pkg/vere/disk.c +++ b/pkg/vere/disk.c @@ -1571,8 +1571,13 @@ _disk_chop_delete_cb(u3_noun kev, void* ptr_v) u3r_safe_word(val, &off_w); u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); + fprintf(stderr, "chop: blob [%x/%u] log=%u les=%u atm=%u\r\n", + (unsigned)blb_u->mug_h, (unsigned)blb_u->seq_h, + (unsigned)blb_u->log_w, (unsigned)blb_u->les_w, + (unsigned)blb_u->atm_w); + if ( 0 == blb_u->log_w && 0 == blb_u->les_w && 0 == blb_u->atm_w ) { - u3_blob_delete(del_u->pax_c, blb_u->mug_h, blb_u->seq_w); + u3_blob_wipe(del_u->pax_c, blb_u->mug_h, blb_u->seq_h); // collect bid for post-walk blb_p cleanup // @@ -1581,7 +1586,7 @@ _disk_chop_delete_cb(u3_noun kev, void* ptr_v) del_u->bid_d = c3_realloc(del_u->bid_d, del_u->cap_z * sizeof(c3_d)); } del_u->bid_d[del_u->len_z++] = - ((c3_d)blb_u->mug_h << 32) | (c3_d)blb_u->seq_w; + ((c3_d)blb_u->mug_h << 32) | (c3_d)blb_u->seq_h; } } @@ -1622,7 +1627,7 @@ _disk_chop_rebuild_log_w(u3_disk* log_u) u3a_walk_fore(job, &acc, _disk_chop_bob_atom, _disk_chop_bob_cell); for ( c3_z i = 0; i < acc.len; i++ ) { - u3_noun bid = u3i_chub(acc.ids[i]); + c3_w bid = (c3_w)acc.ids[i]; u3_weak bv = u3h_get(u3H->blb_p, bid); if ( u3_none != bv ) { c3_w off_w = 0; @@ -1630,7 +1635,6 @@ _disk_chop_rebuild_log_w(u3_disk* log_u) u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); blb_u->log_w++; } - u3z(bid); } c3_free(acc.ids); @@ -1647,7 +1651,7 @@ _disk_chop_rebuild_log_w(u3_disk* log_u) u3h_walk_with(u3H->blb_p, _disk_chop_delete_cb, &del_u); for ( c3_z i_z = 0; i_z < del_u.len_z; i_z++ ) { - u3_noun bid = u3i_chub(del_u.bid_d[i_z]); + c3_w bid = (c3_w)del_u.bid_d[i_z]; u3_weak bv = u3h_get(u3H->blb_p, bid); if ( u3_none != bv ) { c3_w off_w = 0; @@ -1655,7 +1659,6 @@ _disk_chop_rebuild_log_w(u3_disk* log_u) u3a_wfree((void*)u3a_into(off_w)); u3h_del(u3H->blb_p, bid); } - u3z(bid); } c3_free(del_u.bid_d); } diff --git a/pkg/vere/io/http.c b/pkg/vere/io/http.c index 0d8113442a..d00125a2e9 100644 --- a/pkg/vere/io/http.c +++ b/pkg/vere/io/http.c @@ -190,11 +190,11 @@ _http_vec_to_octs(h2o_iovec_t vec_u) if ( (c3_d)vec_u.len >= U3_BLOB_THRESH ) { c3_h mug_h; - c3_w seq_w; + c3_h seq_h; if ( c3y == u3_blob_save(u3C.dir_c, (const c3_y*)vec_u.base, - (c3_d)vec_u.len, &mug_h, &seq_w) ) + (c3_d)vec_u.len, &mug_h, &seq_h) ) { - bod = u3i_blob(mug_h, seq_w); + bod = u3i_blob(mug_h, seq_h); } } if ( u3_none == bod ) { @@ -221,7 +221,7 @@ _cttp_bods_free(u3_hbod* bod_u) if ( bod_u->own_y ) { // owner: release the whole mapping - u3_blob_unmap(bod_u->own_y, bod_u->map_d); + u3_blob_umap(bod_u->own_y, bod_u->map_d); } else if ( bod_u->map_y ) { // view: hint kernel to drop page-cache pages we've already sent @@ -255,13 +255,13 @@ static u3_hbod* _cttp_bod_from_bob(u3_atom a, c3_w len_w) { c3_h mug_h = u3a_bob_mug(a); - c3_w seq_w = u3a_bob_seq(a); + c3_h seq_h = u3a_bob_seq(a); c3_d map_d = 0; - const c3_y* map_y = u3_blob_map(u3C.dir_c, mug_h, seq_w, &map_d); + const c3_y* map_y = u3_blob_mmap(u3C.dir_c, mug_h, seq_h, &map_d); if ( !map_y || (c3_d)len_w > map_d ) { if ( map_y ) { - u3_blob_unmap(map_y, map_d); + u3_blob_umap(map_y, map_d); } return 0; } diff --git a/pkg/vere/io/mesa.c b/pkg/vere/io/mesa.c index 3a9efea348..34d05393a6 100644 --- a/pkg/vere/io/mesa.c +++ b/pkg/vere/io/mesa.c @@ -2361,12 +2361,12 @@ _mesa_hear_page(u3_mesa_pict* pic_u, sockaddr_in lan_u) // if ( (c3_d)res_h > U3_BLOB_THRESH ) { c3_h bob_mug_h; - c3_w bob_seq_w; + c3_h bob_seq_h; if ( c3y == u3_blob_save(sam_u->pir_u->pax_c, buf_y, - (c3_d)res_h, &bob_mug_h, &bob_seq_w) ) + (c3_d)res_h, &bob_mug_h, &bob_seq_h) ) { - pac = u3i_blob(bob_mug_h, bob_seq_w); + pac = u3i_blob(bob_mug_h, bob_seq_h); } else { pac = u3i_bytes(res_h, buf_y); diff --git a/pkg/vere/io/unix.c b/pkg/vere/io/unix.c index dbc99c11a3..bdb2320c12 100644 --- a/pkg/vere/io/unix.c +++ b/pkg/vere/io/unix.c @@ -459,9 +459,9 @@ _unix_write_file_hard(c3_c* pax_c, u3_noun mim) // if ( c3y == u3a_is_bob(dat) ) { c3_h bob_mug_h = u3a_bob_mug(dat); - c3_w bob_seq_w = u3a_bob_seq(dat); + c3_h bob_seq_h = u3a_bob_seq(dat); c3_c src_c[8192]; - u3_blob_path(src_c, u3C.dir_c, bob_mug_h, bob_seq_w); + u3_blob_path(src_c, u3C.dir_c, bob_mug_h, bob_seq_h); c3_i src_i = open(src_c, O_RDONLY); if ( src_i < 0 ) { @@ -964,7 +964,7 @@ static u3_noun _unix_update_node(u3_unix* unx_u, u3_unod* nod_u); static void _unix_blob_install_cb(void* ptr_v, c3_h mug_h, - c3_w seq_w, + c3_h seq_h, c3_o ok_o) { u3_unix_bob_ctx* ctx = ptr_v; @@ -983,7 +983,7 @@ _unix_blob_install_cb(void* ptr_v, ctx->fil_u->gum_w = mug_h; } - u3_atom atm = u3i_blob(mug_h, seq_w); + u3_atom atm = u3i_blob(mug_h, seq_h); u3_noun dat = u3nt(ctx->mim, (u3_atom)ctx->len_ws, atm); u3_noun can = u3nc(u3nt(ctx->pax, u3_nul, dat), u3_nul); u3_noun wir = u3nt(c3__sync, @@ -1430,10 +1430,10 @@ _unix_initial_update_file(c3_c* pax_c, c3_c* bas_c) // if ( (c3_d)len_ws > U3_BLOB_THRESH ) { c3_h bob_mug_h; - c3_w bob_seq_w; + c3_h bob_seq_h; c3_o ok_o = u3_blob_save_fd(u3C.dir_c, fid_i, - (c3_d)len_ws, &bob_mug_h, &bob_seq_w); + (c3_d)len_ws, &bob_mug_h, &bob_seq_h); if ( close(fid_i) < 0 ) { u3l_log("error closing initial file %s: %s", pax_c, strerror(errno)); @@ -1447,7 +1447,7 @@ _unix_initial_update_file(c3_c* pax_c, c3_c* bas_c) { u3_noun rel_pax = _unix_string_to_path_helper(pax_c + strlen(bas_c) + 1); u3_noun mim = u3nt(c3__text, u3i_string("plain"), u3_nul); - u3_atom atm = u3i_blob(bob_mug_h, bob_seq_w); + u3_atom atm = u3i_blob(bob_mug_h, bob_seq_h); u3_noun dat = u3nt(mim, (u3_atom)len_ws, atm); return u3nc(u3nt(rel_pax, u3_nul, dat), u3_nul); diff --git a/pkg/vere/king.c b/pkg/vere/king.c index c433fd621c..3ea018376f 100644 --- a/pkg/vere/king.c +++ b/pkg/vere/king.c @@ -22,10 +22,10 @@ static const c3_c* ver_hos_c = "https://bootstrap.urbit.org/vere"; /* _king_blob_del(): king-side del_f — release a blob lease via IPC. */ static void -_king_blob_del(c3_h mug_h, c3_w seq_w) +_king_blob_del(c3_h mug_h, c3_h seq_h) { if ( u3K.pir_u && u3K.pir_u->god_u ) { - u3_lord_blob_release(u3K.pir_u->god_u, mug_h, seq_w); + u3_lord_blob_release(u3K.pir_u->god_u, mug_h, seq_h); } } diff --git a/pkg/vere/lord.c b/pkg/vere/lord.c index 6b650dc659..44303ca393 100644 --- a/pkg/vere/lord.c +++ b/pkg/vere/lord.c @@ -423,7 +423,7 @@ _lord_plea_blob(u3_lord* god_u, u3_noun dat) } void* ptr_v = wit_u->blb_u.ptr_v; - void (*fun_f)(void*, c3_h, c3_w, c3_o) = wit_u->blb_u.fun_f; + void (*fun_f)(void*, c3_h, c3_h, c3_o) = wit_u->blb_u.fun_f; c3_free(wit_u->blb_u.pax_c); c3_free(wit_u); @@ -440,13 +440,13 @@ _lord_plea_blob(u3_lord* god_u, u3_noun dat) // u3_noun mug_a, seq_a; c3_h mug_h = 0; - c3_w seq_w = 0; + c3_h seq_h = 0; if ( (c3y == u3r_cell(u3t(dat), &mug_a, &seq_a)) ) { u3r_safe_half(mug_a, &mug_h); - u3r_safe_word(seq_a, &seq_w); + u3r_safe_half(seq_a, &seq_h); } - if ( fun_f ) fun_f(ptr_v, mug_h, seq_w, c3y); + if ( fun_f ) fun_f(ptr_v, mug_h, seq_h, c3y); } else { // [c3n reason] @@ -833,7 +833,7 @@ void u3_lord_blob_install(u3_lord* god_u, c3_c* pax_c, void* ptr_v, - void (*fun_f)(void*, c3_h, c3_w, c3_o)) + void (*fun_f)(void*, c3_h, c3_h, c3_o)) { u3_writ* wit_u = _lord_writ_new(god_u); wit_u->typ_e = u3_writ_blob; @@ -847,21 +847,21 @@ u3_lord_blob_install(u3_lord* god_u, /* u3_lord_blob_lease(): tell Mars king is acquiring a blob lease. */ void -u3_lord_blob_lease(u3_lord* god_u, c3_h mug_h, c3_w seq_w) +u3_lord_blob_lease(u3_lord* god_u, c3_h mug_h, c3_h seq_h) { _lord_send(god_u, u3nt(c3_s4('b','l','a','s'), u3i_word(mug_h), - u3i_word(seq_w))); + u3i_word(seq_h))); } /* u3_lord_blob_release(): tell Mars king is releasing a blob lease. */ void -u3_lord_blob_release(u3_lord* god_u, c3_h mug_h, c3_w seq_w) +u3_lord_blob_release(u3_lord* god_u, c3_h mug_h, c3_h seq_h) { _lord_send(god_u, u3nt(c3_s4('b','l','r','l'), u3i_word(mug_h), - u3i_word(seq_w))); + u3i_word(seq_h))); } /* u3_lord_save(): save a snapshot. diff --git a/pkg/vere/mars.c b/pkg/vere/mars.c index 18c279f2a8..47ee10ee76 100644 --- a/pkg/vere/mars.c +++ b/pkg/vere/mars.c @@ -26,7 +26,7 @@ typedef struct _u3v_lease { c3_d exp_d; // expiry time (Unix ms) c3_h mug_h; // blob mug - c3_w seq_w; // blob seq within mug bucket + c3_h seq_h; // blob seq within mug bucket c3_o dead_o; // c3y if lease already released } u3v_lease; @@ -112,13 +112,12 @@ _mars_pq_pop(_mars_lease_pq* pq_u) /* _mars_blob_del(): delete blob file and clean up blb_p entry. */ static void -_mars_blob_del(c3_h mug_h, c3_w seq_w) +_mars_blob_del(c3_h mug_h, c3_h seq_h) { - u3_blob_delete(u3C.dir_c, mug_h, seq_w); + u3_blob_wipe(u3C.dir_c, mug_h, seq_h); - c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; - u3_noun bid = u3i_chub(bid_d); - u3_weak bv = u3h_get(u3H->blb_p, bid); + c3_w bid = ((c3_w)mug_h << 32) | (c3_w)seq_h; + u3_weak bv = u3h_get(u3H->blb_p, bid); if ( u3_none != bv ) { c3_w off_w = 0; @@ -126,19 +125,15 @@ _mars_blob_del(c3_h mug_h, c3_w seq_w) u3a_wfree((void*)u3a_into(off_w)); u3h_del(u3H->blb_p, bid); } - - u3z(bid); } /* _blob_lookup(): get u3a_blob* for a bid, or NULL. */ static u3a_blob* -_blob_lookup(c3_h mug_h, c3_w seq_w) +_blob_lookup(c3_h mug_h, c3_h seq_h) { - c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; - u3_noun bid = u3i_chub(bid_d); - u3_weak bv = u3h_get(u3H->blb_p, bid); - u3z(bid); + c3_w bid = ((c3_w)mug_h << 32) | (c3_w)seq_h; + u3_weak bv = u3h_get(u3H->blb_p, bid); if ( u3_none == bv ) return 0; @@ -152,14 +147,14 @@ _blob_lookup(c3_h mug_h, c3_w seq_w) ** Deletion condition: log_w == 0 && les_w == 0 && atm_w == 0 */ static void -_blob_maybe_delete(c3_h mug_h, c3_w seq_w) +_blob_maybe_delete(c3_h mug_h, c3_h seq_h) { - u3a_blob* blb_u = _blob_lookup(mug_h, seq_w); + u3a_blob* blb_u = _blob_lookup(mug_h, seq_h); if ( !blb_u ) return; if ( 0 != blb_u->log_w || 0 != blb_u->les_w || 0 != blb_u->atm_w ) return; - _mars_blob_del(mug_h, seq_w); + _mars_blob_del(mug_h, seq_h); } /* @@ -394,8 +389,8 @@ _mars_blob_bobs_atom(u3_atom a, void* ptr_v) acc->ids = c3_realloc(acc->ids, acc->cap * sizeof(c3_d)); } c3_h mug_h = u3a_bob_mug(a); - c3_w seq_w = u3a_bob_seq(a); - acc->ids[acc->len++] = ((c3_d)mug_h << 32) | (c3_d)seq_w; + c3_h seq_h = u3a_bob_seq(a); + acc->ids[acc->len++] = ((c3_d)mug_h << 32) | (c3_d)seq_h; } /* _mars_blob_bobs_cell(): u3a_walk_fore cell callback — always descend. @@ -423,9 +418,9 @@ _mars_fact(u3_mars* mar_u, for ( c3_z i_z = 0; i_z < acc.len; i_z++ ) { c3_h mug_h = (c3_h)(acc.ids[i_z] >> 32); - c3_w seq_w = (c3_w)(acc.ids[i_z] & 0xFFFFFFFFULL); + c3_h seq_h = (c3_h)(acc.ids[i_z] & 0xFFFFFFFF); - u3a_blob* blb_u = _blob_lookup(mug_h, seq_w); + u3a_blob* blb_u = _blob_lookup(mug_h, seq_h); if ( blb_u ) { blb_u->log_w++; } @@ -798,14 +793,14 @@ _mars_work(u3_mars* mar_u, u3_noun jar) _mars_pq_pop(&_mars_pq); { - u3a_blob* blb_u = _blob_lookup(top_u->mug_h, top_u->seq_w); + u3a_blob* blb_u = _blob_lookup(top_u->mug_h, top_u->seq_h); if ( blb_u && blb_u->les_w > 0 ) { blb_u->les_w--; } } // TODO: write blob-ref lease-release event to LMDB (tag 0x02, op 0x02) - _blob_maybe_delete(top_u->mug_h, top_u->seq_w); + _blob_maybe_delete(top_u->mug_h, top_u->seq_h); c3_free(top_u); } @@ -975,7 +970,7 @@ _mars_work(u3_mars* mar_u, u3_noun jar) // [%blob path-atom] — install staging file from king // c3_h mug_h = 0; - c3_w seq_w = 0; + c3_h seq_h = 0; c3_o ok_o = c3n; // extract path string from atom @@ -985,27 +980,25 @@ _mars_work(u3_mars* mar_u, u3_noun jar) c3_c stg_c[8192] = {0}; u3r_bytes(0, (c3_w)len_d, (c3_y*)stg_c, dat); - ok_o = u3_blob_install_stg(u3C.dir_c, stg_c, &mug_h, &seq_w); + ok_o = u3_blob_move_stg(u3C.dir_c, stg_c, &mug_h, &seq_h); if ( c3y == ok_o ) { // create u3a_blob (if not present) with les_w = 1 (implicit // first lease for king). push PQ entry for TTL expiry. // { - u3a_blob* blb_u = _blob_lookup(mug_h, seq_w); + u3a_blob* blb_u = _blob_lookup(mug_h, seq_h); if ( !blb_u ) { - c3_d bid_d = ((c3_d)mug_h << 32) | (c3_d)seq_w; - u3_noun bid = u3i_chub(bid_d); + c3_w bid = ((c3_w)mug_h << 32) | (c3_w)seq_h; c3_w* blb_w = u3a_walloc(c3_wiseof(u3a_blob)); blb_u = (u3a_blob*)blb_w; blb_u->log_w = 0; blb_u->les_w = 0; blb_u->mug_h = mug_h; - blb_u->seq_w = seq_w; + blb_u->seq_h = seq_h; blb_u->siz_d = 0; blb_u->atm_w = 0; u3h_put(u3H->blb_p, bid, u3i_word(u3a_outa(blb_w))); - u3z(bid); } blb_u->les_w++; } @@ -1013,7 +1006,7 @@ _mars_work(u3_mars* mar_u, u3_noun jar) { u3v_lease* lea_u = c3_malloc(sizeof(*lea_u)); lea_u->mug_h = mug_h; - lea_u->seq_w = seq_w; + lea_u->seq_h = seq_h; lea_u->dead_o = c3n; { struct timeval tv_u; @@ -1040,7 +1033,7 @@ _mars_work(u3_mars* mar_u, u3_noun jar) if ( c3y == ok_o ) { _mars_gift(mar_u, u3nt(c3__blob, c3y, - u3nc(u3i_word(mug_h), u3i_word(seq_w)))); + u3nc(u3i_word(mug_h), u3i_word(seq_h)))); } else { _mars_gift(mar_u, u3nc(c3__blob, c3n)); @@ -1056,11 +1049,11 @@ _mars_work(u3_mars* mar_u, u3_noun jar) return c3n; } c3_h mug_h = 0; - c3_w seq_w = 0; + c3_h seq_h = 0; u3r_safe_half(mug_n, &mug_h); - u3r_safe_word(seq_n, &seq_w); + u3r_safe_half(seq_n, &seq_h); - u3a_blob* blb_u = _blob_lookup(mug_h, seq_w); + u3a_blob* blb_u = _blob_lookup(mug_h, seq_h); if ( blb_u ) { blb_u->les_w++; @@ -1068,7 +1061,7 @@ _mars_work(u3_mars* mar_u, u3_noun jar) // u3v_lease* lea_u = c3_malloc(sizeof(*lea_u)); lea_u->mug_h = mug_h; - lea_u->seq_w = seq_w; + lea_u->seq_h = seq_h; lea_u->dead_o = c3n; { struct timeval tv_u; @@ -1092,11 +1085,11 @@ _mars_work(u3_mars* mar_u, u3_noun jar) return c3n; } c3_h mug_h = 0; - c3_w seq_w = 0; + c3_h seq_h = 0; u3r_safe_half(mug_n, &mug_h); - u3r_safe_word(seq_n, &seq_w); + u3r_safe_half(seq_n, &seq_h); - u3a_blob* blb_u = _blob_lookup(mug_h, seq_w); + u3a_blob* blb_u = _blob_lookup(mug_h, seq_h); if ( blb_u && blb_u->les_w > 0 ) { blb_u->les_w--; } @@ -1110,7 +1103,7 @@ _mars_work(u3_mars* mar_u, u3_noun jar) // already 0 → clamped by the >0 check → safe. // - _blob_maybe_delete(mug_h, seq_w); + _blob_maybe_delete(mug_h, seq_h); u3z(jar); } break; @@ -1395,8 +1388,8 @@ _mars_poke_play(u3_mars* mar_u, const u3_fact* tac_u) for ( c3_z i_z = 0; i_z < acc.len; i_z++ ) { c3_h mug_h = (c3_h)(acc.ids[i_z] >> 32); - c3_w seq_w = (c3_w)(acc.ids[i_z] & 0xFFFFFFFFULL); - u3a_blob* blb_u = _blob_lookup(mug_h, seq_w); + c3_h seq_h = (c3_h)(acc.ids[i_z] & 0xFFFFFFFF); + u3a_blob* blb_u = _blob_lookup(mug_h, seq_h); if ( blb_u ) { blb_u->log_w++; } diff --git a/pkg/vere/vere.h b/pkg/vere/vere.h index c4c9c63c6f..abd4b13570 100644 --- a/pkg/vere/vere.h +++ b/pkg/vere/vere.h @@ -499,7 +499,7 @@ struct { // blob-install: c3_c* pax_c; // staging path (heap-alloc'd) void* ptr_v; // callback context - void (*fun_f)(void*, c3_h, c3_w, c3_o); // ack cb(ctx, mug, seq, ok) + void (*fun_f)(void*, c3_h, c3_h, c3_o); // ack cb(ctx, mug, seq, ok) } blb_u; // }; } u3_writ; @@ -1083,17 +1083,17 @@ u3_lord_blob_install(u3_lord* god_u, c3_c* pax_c, void* ptr_v, - void (*fun_f)(void*, c3_h, c3_w, c3_o)); + void (*fun_f)(void*, c3_h, c3_h, c3_o)); /* u3_lord_blob_lease(): tell Mars king is acquiring a blob lease. */ void - u3_lord_blob_lease(u3_lord* god_u, c3_h mug_h, c3_w seq_w); + u3_lord_blob_lease(u3_lord* god_u, c3_h mug_h, c3_h seq_h); /* u3_lord_blob_release(): tell Mars king is releasing a blob lease. */ void - u3_lord_blob_release(u3_lord* god_u, c3_h mug_h, c3_w seq_w); + u3_lord_blob_release(u3_lord* god_u, c3_h mug_h, c3_h seq_h); /** Filesystem (async). **/ From 0a90694ca2802527a6f4a7d747f792f724c41a39 Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Mon, 27 Apr 2026 20:33:23 -0500 Subject: [PATCH 22/31] wip: ensures interned atom is correct post-canonicalization --- pkg/noun/allocate.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/pkg/noun/allocate.c b/pkg/noun/allocate.c index 5b6436db3d..d7d50ccd9e 100644 --- a/pkg/noun/allocate.c +++ b/pkg/noun/allocate.c @@ -863,13 +863,20 @@ _me_bob_dead(u3a_atom* atm_u) c3_w off_w = 0; u3r_safe_word(bv, &off_w); u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); - blb_u->atm_w = 0; - if ( u3C.blob_del_f - && 0 == blb_u->log_w - && 0 == blb_u->les_w ) - { - u3C.blob_del_f(mug_h, seq_h); + // only clear atm_w if this dying atom IS the interned one. + // meld may canonicalize duplicate bob atoms, freeing one while + // keeping the other — don't clear the survivor's pointer. + // + if ( blb_u->atm_w == u3a_outa(atm_u) ) { + blb_u->atm_w = 0; + + if ( u3C.blob_del_f + && 0 == blb_u->log_w + && 0 == blb_u->les_w ) + { + u3C.blob_del_f(mug_h, seq_h); + } } } else if ( u3C.blob_del_f ) { From c4c23e6a3d79fabda441b996153605302064a3cc Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Wed, 29 Apr 2026 10:38:03 -0500 Subject: [PATCH 23/31] wip: use atom blob ids and cells for blobref count metadata in `blb_p` --- pkg/noun/allocate.c | 41 ++++----- pkg/noun/allocate.h | 29 +++--- pkg/noun/imprison.c | 35 +++---- pkg/noun/jets/c/cut.c | 122 ++++++++++++------------- pkg/noun/jets/c/met.c | 11 ++- pkg/noun/manage.c | 46 ++++++++-- pkg/noun/vortex.c | 11 +-- pkg/noun/vortex.h | 7 +- pkg/vere/disk.c | 147 +++++++++++++++++------------ pkg/vere/mars.c | 208 ++++++++++++++++++++++++++---------------- 10 files changed, 376 insertions(+), 281 deletions(-) diff --git a/pkg/noun/allocate.c b/pkg/noun/allocate.c index d7d50ccd9e..b5cec76bff 100644 --- a/pkg/noun/allocate.c +++ b/pkg/noun/allocate.c @@ -847,41 +847,32 @@ _me_gain_south(u3_noun dog) /* _me_bob_dead(): handle a bob atom whose loom refcount just hit zero. ** -** Clears blb_u->atm_w and checks deletion condition: -** log_w == 0 && les_w == 0 && atm_w == 0. +** The blb_p cell holds a strong ref to the bob atom, so this only +** fires AFTER _mars_blob_del already removed the entry (u3h_del → +** u3z(cell) → u3z(bob) → here). Nothing left to do for tracked +** blobs. For untracked bob atoms (no blb_p entry — e.g., temporary +** atoms from cueing), notify the king to release its lease. */ static void _me_bob_dead(u3a_atom* atm_u) { - c3_h mug_h = (c3_h)atm_u->mug_w; - c3_h seq_h = (c3_h)atm_u->buf_w[0]; - c3_w bid_w = ((c3_w)mug_h << 32) | (c3_w)seq_h; + if ( !u3C.blob_del_f ) return; - u3_weak bv = u3h_get(u3H->blb_p, bid_w); + c3_h mug_h = (c3_h)atm_u->mug_w; + c3_h seq_h = (c3_h)atm_u->buf_w[0]; + u3_noun bid = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_h); - if ( u3_none != bv ) { - c3_w off_w = 0; - u3r_safe_word(bv, &off_w); - u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); + u3_weak bv = u3h_get(u3H->blb_p, bid); + u3z(bid); - // only clear atm_w if this dying atom IS the interned one. - // meld may canonicalize duplicate bob atoms, freeing one while - // keeping the other — don't clear the survivor's pointer. + if ( u3_none == bv ) { + // no entry — untracked bob atom (king-side lease release) // - if ( blb_u->atm_w == u3a_outa(atm_u) ) { - blb_u->atm_w = 0; - - if ( u3C.blob_del_f - && 0 == blb_u->log_w - && 0 == blb_u->les_w ) - { - u3C.blob_del_f(mug_h, seq_h); - } - } - } - else if ( u3C.blob_del_f ) { u3C.blob_del_f(mug_h, seq_h); } + else { + u3z(bv); + } } /* _me_lose_north(): lose on a north road. diff --git a/pkg/noun/allocate.h b/pkg/noun/allocate.h index b5eaa1d381..54f15bb485 100644 --- a/pkg/noun/allocate.h +++ b/pkg/noun/allocate.h @@ -155,27 +155,18 @@ u3_noun tel; } u3a_cell; - /* u3a_blob: loom-resident metadata for a blob file. + /* blb_p value layout: noun cell [log les bob] ** ** Stored in u3H->blb_p keyed by bid = (mug_h << 32) | seq_h. ** bid is always a direct atom on VERE64 (63 bits max). ** ** Three independent ref-sources protect the backing file: - ** log_w — event-log refs (inc on commit, dec on chop) - ** les_w — lease refs (inc on king acquire, dec on release/expiry) - ** atm_w — interned bob atom loom offset (0 = no live atom) + ** log — direct atom: event-log refcount (inc on commit, dec on chop) + ** les — direct atom: lease refcount (inc on king acquire, dec on release/expiry) + ** bob — 0 or the interned bob atom noun (GC-tracked) ** - ** The blob file is deleted when: - ** log_w == 0 && les_w == 0 && atm_w == 0 - */ - typedef struct __attribute__((aligned(4))) { - c3_w log_w; // event-log refcount - c3_w les_w; // lease refcount - c3_h mug_h; // 31-bit content mug (= bucket dir name) - c3_h seq_h; // sequence number within bucket - c3_d siz_d; // byte size of blob file - c3_w atm_w; // loom offset of interned bob atom (0 = none) - } u3a_blob; + ** The blob file is deleted when the value is [0 0 0]. + */ STATIC_ASSERT( (((c3_w)1) << u3a_min_log) == u3a_minimum, "log2 minimum allocation" ); @@ -667,12 +658,14 @@ typedef struct { return (c3_h)((u3a_atom*)u3a_to_ptr(som))->buf_w[0]; } - /* u3a_bob_bid(): blob ID = (mug << 32) | seq. Direct atom on VERE64. + /* u3a_bob_bid(): blob ID = (mug << 32) | seq. + ** On VERE64 this is a direct atom (63 bits). + ** On 32-bit this is a c3_d that must go through u3i_chub(). */ - static inline c3_w + static inline c3_d u3a_bob_bid(u3_atom som) { u3a_atom* atm_u = u3a_to_ptr(som); - return ((c3_w)(c3_h)atm_u->mug_w << 32) | (c3_w)(c3_h)atm_u->buf_w[0]; + return ((c3_d)(c3_h)atm_u->mug_w << 32) | (c3_d)(c3_h)atm_u->buf_w[0]; } /** Functions. diff --git a/pkg/noun/imprison.c b/pkg/noun/imprison.c index abbec37b9a..40f0b477bb 100644 --- a/pkg/noun/imprison.c +++ b/pkg/noun/imprison.c @@ -836,27 +836,26 @@ u3i_vmolt(u3_noun som, u3i_molt_pair pairs[], c3_z len_z) /* u3i_blob(): construct or intern a bob atom (blob reference). ** -** If a u3a_blob exists in blb_p with a live interned atom (atm_w != 0), -** returns the existing atom. Otherwise allocates a fresh bob atom and -** stores its offset in blb_u->atm_w (if a blb_p entry exists). +** If a blb_p entry exists with a live bob atom (bob != 0), +** returns the existing atom. Otherwise allocates a fresh bob atom +** and stores it in the blb_p entry (if one exists). */ u3_atom u3i_blob(c3_h mug_h, c3_h seq_h) { u3_assert( &(u3H->rod_u) == u3R ); - // bid is a direct atom on VERE64 (63 bits max) - // - c3_w bid = ((c3_w)mug_h << 32) | (c3_w)seq_h; + u3_noun bid = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_h); u3_weak bv = u3h_get(u3H->blb_p, bid); if ( u3_none != bv ) { - c3_w off_w = 0; - u3r_safe_word(bv, &off_w); - u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); + u3_noun bob = u3t(u3t(bv)); // third element of [log les bob] - if ( blb_u->atm_w ) { - return u3k(u3a_to_pug(blb_u->atm_w)); + if ( bob != 0 ) { + u3_atom ret = u3k(bob); + u3z(bv); + u3z(bid); + return ret; } } @@ -870,13 +869,17 @@ u3i_blob(c3_h mug_h, c3_h seq_h) vat_u->len_w = 1 | u3a_blob_flag; vat_u->buf_w[0] = seq_h; - c3_w atm_w = u3a_outa(nov_w); + u3_atom atm = u3a_to_pug(u3a_outa(nov_w)); + // update blb_p entry if one exists + // if ( u3_none != bv ) { - c3_w off_w = 0; - u3r_safe_word(bv, &off_w); - ((u3a_blob*)u3a_into(off_w))->atm_w = atm_w; + u3_noun log = u3k(u3h(bv)); + u3_noun les = u3k(u3h(u3t(bv))); + u3h_put(u3H->blb_p, bid, u3nt(log, les, u3k(atm))); + u3z(bv); } - return u3a_to_pug(atm_w); + u3z(bid); + return atm; } diff --git a/pkg/noun/jets/c/cut.c b/pkg/noun/jets/c/cut.c index a189f55d5e..987e5f5bb4 100644 --- a/pkg/noun/jets/c/cut.c +++ b/pkg/noun/jets/c/cut.c @@ -13,19 +13,68 @@ u3_atom c, u3_atom d) { - c3_w b_w, c_w; if ( !_(u3a_is_cat(a)) || (a >= u3a_word_bits) ) { return u3m_bail(c3__fail); } - if ( !_(u3r_safe_word(b, &b_w)) ) { - return u3m_bail(c3__fail); - } - if ( !_(u3r_safe_word(c, &c_w)) ) { - return u3m_bail(c3__fail); + + c3_g a_g = a; + + // blob fast path: uses c3_d offsets so files > 4GB work on 32-bit. + // must come before u3r_safe_word which would bail on large offsets. + // + if ( (a_g >= 3) && (c3y == u3a_is_bob(d)) ) { + c3_d b_d, c_d; + if ( c3n == u3r_safe_chub(b, &b_d) ) return u3m_bail(c3__fail); + if ( c3n == u3r_safe_chub(c, &c_d) ) return u3m_bail(c3__fail); + if ( 0 == c_d ) return 0; + + c3_d map_d = 0; + const c3_y* map_y = u3r_blob_map(d, &map_d); + + if ( map_y ) { + c3_g shf_g = a_g - 3; + c3_d off_d = b_d << shf_g; + c3_d byt_d = c_d << shf_g; + + c3_d cpy_d = byt_d; + if ( off_d >= map_d ) { + cpy_d = 0; + } + else if ( off_d + cpy_d > map_d ) { + cpy_d = map_d - off_d; + } + + // c_d must fit in c3_w for slab_init (max 4GB slab per cut) + // + if ( c_d > (c3_d)c3_w_max ) { + u3r_blob_unmap(map_y, map_d); + return u3m_bail(c3__fail); + } + + u3i_slab sab_u; + u3i_slab_init(&sab_u, a_g, (c3_w)c_d); + + if ( cpy_d ) { + memcpy(sab_u.buf_y, map_y + off_d, (size_t)cpy_d); + } + + u3r_blob_unmap(map_y, map_d); + return u3i_slab_mint(&sab_u); + } + // mmap failed — fall through to generic path } + // non-blob path: uses c3_w (offsets must fit in 32 bits) + // { - c3_g a_g = a; + c3_w b_w, c_w; + if ( !_(u3r_safe_word(b, &b_w)) ) { + return u3m_bail(c3__fail); + } + if ( !_(u3r_safe_word(c, &c_w)) ) { + return u3m_bail(c3__fail); + } + c3_w len_w = u3r_met(a_g, d); if ( (0 == c_w) || (b_w >= len_w) ) { @@ -38,63 +87,12 @@ return u3k(d); } - // bob-aware fast path for byte-aligned cuts: mmap the blob and - // memcpy the requested byte range directly into the slab. No - // full-blob materialization (u3r_chop on a bob atom would call - // u3r_blob_load, which allocates and copies the entire file into - // the loom). - // - // We require bloq >= 3 so the cut range is a whole number of - // bytes; bit-level cuts (a_g < 3) are rare and fall through to - // the generic path below. - // - if ( (a_g >= 3) && (c3y == u3a_is_bob(d)) ) { - c3_d map_d = 0; - const c3_y* map_y = u3r_blob_map(d, &map_d); - - if ( map_y ) { - c3_g shf_g = a_g - 3; // bloq -> byte shift - c3_d off_d = (c3_d)b_w << shf_g; // byte offset in blob - c3_d byt_d = (c3_d)c_w << shf_g; // bytes to copy - - // clamp against actual file size. len_w (from u3r_met) - // reflects the atom's significant-bit length with trailing - // zeros stripped; the on-disk file may be a bit shorter - // than implied by the bloq count if the tail-word is - // partially significant. read only what's in the file; - // u3i_slab_init already zero-initialized the slab so any - // bytes past EOF remain as implicit zeros. - // - c3_d cpy_d = byt_d; - if ( off_d >= map_d ) { - cpy_d = 0; - } - else if ( off_d + cpy_d > map_d ) { - cpy_d = map_d - off_d; - } - - u3i_slab sab_u; - u3i_slab_init(&sab_u, a_g, c_w); - - if ( cpy_d ) { - memcpy(sab_u.buf_y, map_y + off_d, (size_t)cpy_d); - } + u3i_slab sab_u; + u3i_slab_init(&sab_u, a_g, c_w); - u3r_blob_unmap(map_y, map_d); - return u3i_slab_mint(&sab_u); - } - // mmap failed (missing blob file) — fall through to u3r_chop, - // which will silently return zero via u3r_blob_load → u3_none. - } - - { - u3i_slab sab_u; - u3i_slab_init(&sab_u, a_g, c_w); + u3r_chop(a_g, b_w, c_w, 0, sab_u.buf_w, d); - u3r_chop(a_g, b_w, c_w, 0, sab_u.buf_w, d); - - return u3i_slab_mint(&sab_u); - } + return u3i_slab_mint(&sab_u); } } u3_noun diff --git a/pkg/noun/jets/c/met.c b/pkg/noun/jets/c/met.c index 88426b7341..ccb046b799 100644 --- a/pkg/noun/jets/c/met.c +++ b/pkg/noun/jets/c/met.c @@ -14,7 +14,16 @@ return 0; } else if ( !_(u3a_is_cat(a)) || (a >= u3a_word_bits) ) { - return u3m_bail(c3__fail);; + return u3m_bail(c3__fail); + } + // blob atoms: compute from u3r_blob_met (returns c3_d) to avoid + // c3_w truncation in u3r_met on 32-bit builds. + // + else if ( (c3y == u3a_is_bob(b)) && (3 <= a) ) { + c3_d bit_d = u3r_blob_met(b); + if ( 0 == bit_d ) return (u3_noun)u3m_bail(c3__fail); + c3_d rnd_d = (c3_d)((1 << a) - 1); + return u3i_chub((bit_d + rnd_d) >> a); } else { c3_w met_w = u3r_met(a, b); diff --git a/pkg/noun/manage.c b/pkg/noun/manage.c index 498843bed6..4121cacfad 100644 --- a/pkg/noun/manage.c +++ b/pkg/noun/manage.c @@ -575,16 +575,22 @@ STATIC_ASSERT( ((c3_wiseof(u3v_home) * sizeof(c3_w)) == sizeof(u3v_home)), STATIC_ASSERT( U3N_VERLAT < (1U << 5), "5-bit bytecode version" ); -/* _find_home_zero_les(): u3h_walk_with callback — zero les_w on boot. +/* _find_home_collect_bid(): u3h_walk_with callback — collect blb_p bids. */ static void -_find_home_zero_les(u3_noun kev, void* ptr_v) +_find_home_collect_bid(u3_noun kev, void* ptr_v) { - (void)ptr_v; - c3_w off_w = 0; - u3r_safe_word(u3t(kev), &off_w); - u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); - blb_u->les_w = 0; + struct { c3_d* bid_d; c3_z len_z; c3_z cap_z; } *acc = ptr_v; + u3_noun key = u3h(kev); + + c3_d bid_d = 0; + u3r_safe_chub(key, &bid_d); + + if ( acc->len_z == acc->cap_z ) { + acc->cap_z = acc->cap_z ? acc->cap_z * 2 : 8; + acc->bid_d = c3_realloc(acc->bid_d, acc->cap_z * sizeof(c3_d)); + } + acc->bid_d[acc->len_z++] = bid_d; } /* _find_home(): in restored image, point to home road. @@ -689,11 +695,31 @@ _find_home(void) } - // reset all les_w to 0: leases are transient IPC state backed by a + // reset all les to 0: leases are transient IPC state backed by a // C-heap PQ that is not persisted. after restart the PQ is empty, - // so the entries that would decrement les_w are gone. + // so the entries that would decrement les are gone. // - u3h_walk_with(u3H->blb_p, _find_home_zero_les, 0); + // can't modify HAMT entries during walk, so collect bids first. + // + { + struct { c3_d* bid_d; c3_z len_z; c3_z cap_z; } acc = {0, 0, 0}; + u3h_walk_with(u3H->blb_p, _find_home_collect_bid, &acc); + + for ( c3_z i_z = 0; i_z < acc.len_z; i_z++ ) { + u3_noun bid = u3i_chub(acc.bid_d[i_z]); + u3_weak bv = u3h_get(u3H->blb_p, bid); + + if ( u3_none != bv ) { + u3_noun log = u3k(u3h(bv)); + u3_noun bob = u3k(u3t(u3t(bv))); + u3h_put(u3H->blb_p, bid, u3nt(log, 0, bob)); + u3z(bv); + } + + u3z(bid); + } + c3_free(acc.bid_d); + } if ( !u3R->lop_p ) u3R->lop_p = u3h_new(); if ( !u3R->cax.for_p ) u3R->cax.for_p = u3h_new_cache(u3C.per_w); } diff --git a/pkg/noun/vortex.c b/pkg/noun/vortex.c index 64bb9f10f3..af2ca82896 100644 --- a/pkg/noun/vortex.c +++ b/pkg/noun/vortex.c @@ -375,8 +375,8 @@ u3v_mark() qua_u[1]->nam_c = strdup("wish cache"); qua_u[1]->siz_w = u3a_mark_noun(arv_u->yot) * sizeof(c3_w); - // mark blob bank HAMT as live GC root so its nodes - // aren't swept during u3m_pack / u3a_sweep. + // mark blob bank HAMT. values are noun cells [log les bob], + // so u3h_mark handles everything (nodes, keys, and value cells). // if ( u3H->blb_p ) { u3h_mark(u3H->blb_p); @@ -404,12 +404,6 @@ u3v_reclaim(void) if ( &(u3H->rod_u) == u3R ) { u3z(u3A->yot); u3A->yot = u3_nul; - - // // clear ford cache to release refs (e.g., blob atoms held - // // by cached computation results after |tomb) - // // - // u3h_free(u3R->cax.for_p); - // u3R->cax.for_p = u3h_new_cache(u3C.per_w); } } @@ -424,6 +418,7 @@ u3v_rewrite_compact(void) u3a_relocate_noun(&(u3A->yot)); // relocate blob bank HAMT root for compaction. + // values are noun cells [log les bob], so u3h_relocate handles them. // if ( u3H->blb_p ) { u3h_relocate(&(u3H->blb_p)); diff --git a/pkg/noun/vortex.h b/pkg/noun/vortex.h index 0dfff098cf..2ffd0d8d2b 100644 --- a/pkg/noun/vortex.h +++ b/pkg/noun/vortex.h @@ -21,15 +21,14 @@ /* u3v_home: all internal (within image) state. ** NB: version must be first for ease of migration. ** - ** blb_p is the blob bank HAMT (bid -> u3a_blob*), checkpointed - ** in image.bin. A blob file is deleted when: - ** log_w == 0 && les_w == 0 && atm_w == 0 + ** blb_p is the blob bank HAMT (bid -> [log les bob]), checkpointed + ** in image.bin. A blob file is deleted when value is [0 0 0]. */ typedef struct _u3v_home { u3v_version ver_d; // version number c3_d pam_d; // parameters u3v_arvo arv_u; // arvo state - u3p(u3h_root) blb_p; // blob bank: bid -> u3a_blob* + u3p(u3h_root) blb_p; // blob bank: bid -> [log les bob] u3a_road rod_u; // storage state } u3v_home; diff --git a/pkg/vere/disk.c b/pkg/vere/disk.c index 5cb1f19795..22c3bce5f8 100644 --- a/pkg/vere/disk.c +++ b/pkg/vere/disk.c @@ -1253,7 +1253,7 @@ _disk_epoc_kill(u3_disk* log_u, c3_d epo_d) c3_c epo_c[8193]; snprintf(epo_c, sizeof(epo_c), "%s/0i%" PRIc3_d, log_u->com_u->pax_c, epo_d); - // blob log_w is rebuilt post-chop by u3_disk_chop, not per-epoch. + // blob log is rebuilt post-chop by u3_disk_chop, not per-epoch. // // delete files in epoch directory @@ -1531,53 +1531,65 @@ _disk_vere_diff(u3_disk* log_u) return c3n; } -/* _disk_chop_zero_cb(): u3h_walk_with callback — zero log_w and les_w. -** -** log_w: will be rebuilt by rescanning remaining events. -** les_w: transient IPC state (lease PQ is C-heap, lost on restart). -** atm_w: NOT zeroed — if a live atom exists, the blob file is kept. -** it will be deleted during live operation when the atom dies -** (_me_bob_dead sets atm_w=0, then _blob_maybe_delete fires). -*/ -static void -_disk_chop_zero_cb(u3_noun kev, void* ptr_v) -{ - (void)ptr_v; - c3_w off_w = 0; - u3r_safe_word(u3t(kev), &off_w); - u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); - blb_u->log_w = 0; - blb_u->les_w = 0; -} - -/* _disk_chop_del: accumulator for collecting blb_p keys to delete. +/* _disk_chop_collect_bid: accumulator for collecting blb_p keys. */ typedef struct { const c3_c* pax_c; - c3_d* bid_d; // array of bid keys to delete + c3_d* bid_d; // array of bid keys c3_z len_z; c3_z cap_z; -} _disk_chop_del; +} _disk_chop_collect; + +/* _disk_chop_collect_cb(): u3h_walk_with callback — collect all bids. +*/ +static void +_disk_chop_collect_cb(u3_noun kev, void* ptr_v) +{ + _disk_chop_collect* col_u = ptr_v; + u3_noun key = u3h(kev); + + c3_d bid_d = 0; + u3r_safe_chub(key, &bid_d); + + if ( col_u->len_z == col_u->cap_z ) { + col_u->cap_z = col_u->cap_z ? col_u->cap_z * 2 : 8; + col_u->bid_d = c3_realloc(col_u->bid_d, col_u->cap_z * sizeof(c3_d)); + } + col_u->bid_d[col_u->len_z++] = bid_d; +} /* _disk_chop_delete_cb(): u3h_walk_with callback — collect dead blobs. +** +** Checks if blb_p value is [0 0 0]; if so, wipes the blob file +** and collects the bid for post-walk HAMT deletion. */ static void _disk_chop_delete_cb(u3_noun kev, void* ptr_v) { - _disk_chop_del* del_u = ptr_v; + _disk_chop_collect* del_u = ptr_v; u3_noun key = u3h(kev); u3_noun val = u3t(kev); - c3_w off_w = 0; - u3r_safe_word(val, &off_w); - u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); - fprintf(stderr, "chop: blob [%x/%u] log=%u les=%u atm=%u\r\n", - (unsigned)blb_u->mug_h, (unsigned)blb_u->seq_h, - (unsigned)blb_u->log_w, (unsigned)blb_u->les_w, - (unsigned)blb_u->atm_w); + u3_noun log = u3h(val); + u3_noun les = u3h(u3t(val)); + u3_noun bob = u3t(u3t(val)); + + c3_d bid_d = 0; + u3r_safe_chub(key, &bid_d); + c3_h mug_h = (c3_h)(bid_d >> 32); + c3_h seq_h = (c3_h)(bid_d & 0xFFFFFFFF); - if ( 0 == blb_u->log_w && 0 == blb_u->les_w && 0 == blb_u->atm_w ) { - u3_blob_wipe(del_u->pax_c, blb_u->mug_h, blb_u->seq_h); + // bob use_w == 1 means only the blb_p cell holds it (arvo lost its refs) + // + c3_o ded_o = ( 0 == bob ) ? c3y : __( 1 == u3a_use(bob) ); + + fprintf(stderr, "chop: blob [%x/%u] log=%u les=%u bob=%s\r\n", + (unsigned)mug_h, (unsigned)seq_h, + (unsigned)log, (unsigned)les, + (c3y == ded_o) ? "dead" : "live"); + + if ( 0 == log && 0 == les && c3y == ded_o ) { + u3_blob_wipe(del_u->pax_c, mug_h, seq_h); // collect bid for post-walk blb_p cleanup // @@ -1585,23 +1597,39 @@ _disk_chop_delete_cb(u3_noun kev, void* ptr_v) del_u->cap_z = del_u->cap_z ? del_u->cap_z * 2 : 8; del_u->bid_d = c3_realloc(del_u->bid_d, del_u->cap_z * sizeof(c3_d)); } - del_u->bid_d[del_u->len_z++] = - ((c3_d)blb_u->mug_h << 32) | (c3_d)blb_u->seq_h; + del_u->bid_d[del_u->len_z++] = bid_d; } } -/* _disk_chop_rebuild_log_w(): rebuild blob log_w after epoch deletion. +/* _disk_chop_rebuild_log_w(): rebuild blob log after epoch deletion. ** -** 1. zero all log_w via HAMT walk -** 2. scan remaining LMDB events, increment log_w for each bob atom +** 1. zero all log and les via collect-then-modify pattern +** 2. scan remaining LMDB events, increment log for each bob atom ** 3. delete blob files whose total refcount is now zero */ static void _disk_chop_rebuild_log_w(u3_disk* log_u) { - // step 1: zero all log_w + // step 1: zero all log and les (collect bids, then modify after walk) // - u3h_walk_with(u3H->blb_p, _disk_chop_zero_cb, 0); + { + _disk_chop_collect col_u = {0}; + u3h_walk_with(u3H->blb_p, _disk_chop_collect_cb, &col_u); + + for ( c3_z i_z = 0; i_z < col_u.len_z; i_z++ ) { + u3_noun bid = u3i_chub(col_u.bid_d[i_z]); + u3_weak bv = u3h_get(u3H->blb_p, bid); + + if ( u3_none != bv ) { + u3_noun bob = u3k(u3t(u3t(bv))); + u3h_put(u3H->blb_p, bid, u3nt(0, 0, bob)); + u3z(bv); + } + + u3z(bid); + } + c3_free(col_u.bid_d); + } // step 2: scan remaining events for bob atoms // @@ -1627,14 +1655,18 @@ _disk_chop_rebuild_log_w(u3_disk* log_u) u3a_walk_fore(job, &acc, _disk_chop_bob_atom, _disk_chop_bob_cell); for ( c3_z i = 0; i < acc.len; i++ ) { - c3_w bid = (c3_w)acc.ids[i]; + u3_noun bid = u3i_chub(acc.ids[i]); u3_weak bv = u3h_get(u3H->blb_p, bid); + if ( u3_none != bv ) { - c3_w off_w = 0; - u3r_safe_word(bv, &off_w); - u3a_blob* blb_u = (u3a_blob*)u3a_into(off_w); - blb_u->log_w++; + u3_noun log = u3h(bv); + u3_noun les = u3k(u3h(u3t(bv))); + u3_noun bob = u3k(u3t(u3t(bv))); + u3h_put(u3H->blb_p, bid, u3nt(log + 1, les, bob)); + u3z(bv); } + + u3z(bid); } c3_free(acc.ids); @@ -1647,18 +1679,13 @@ _disk_chop_rebuild_log_w(u3_disk* log_u) // step 3: delete unreferenced blobs and clean up blb_p // { - _disk_chop_del del_u = { .pax_c = log_u->dir_u->pax_c }; + _disk_chop_collect del_u = { .pax_c = log_u->dir_u->pax_c }; u3h_walk_with(u3H->blb_p, _disk_chop_delete_cb, &del_u); for ( c3_z i_z = 0; i_z < del_u.len_z; i_z++ ) { - c3_w bid = (c3_w)del_u.bid_d[i_z]; - u3_weak bv = u3h_get(u3H->blb_p, bid); - if ( u3_none != bv ) { - c3_w off_w = 0; - u3r_safe_word(bv, &off_w); - u3a_wfree((void*)u3a_into(off_w)); - u3h_del(u3H->blb_p, bid); - } + u3_noun bid = u3i_chub(del_u.bid_d[i_z]); + u3h_del(u3H->blb_p, bid); + u3z(bid); } c3_free(del_u.bid_d); } @@ -1694,10 +1721,10 @@ u3_disk_chop(u3_disk* log_u, c3_d eve_d) c3_free(sot_d); - // rebuild blob log_w after chop. + // rebuild blob log after chop. // - // step 1: zero all log_w via u3h_walk_with on blb_p - // step 2: scan remaining LMDB events for bob atoms, rebuild log_w + // step 1: zero all log and les via collect-then-modify on blb_p + // step 2: scan remaining LMDB events for bob atoms, rebuild log // step 3: delete blobs with all-zero refcounts // _disk_chop_rebuild_log_w(log_u); @@ -2055,7 +2082,7 @@ typedef enum { /* NOTE: _disk_blb_rebuild_from_epochs removed. ** Blob log-refs are now tracked via LMDB blob-ref events (tag 0x02), -** not via blobs.txt files. u3a_blob structs in blb_p persist in the +** not via blobs.txt files. blb_p cells [log les bob] persist in the ** loom snapshot; on replay, blob-ref events reconstruct the counters. */ @@ -2231,8 +2258,8 @@ _disk_epoc_load(u3_disk* log_u, c3_d lat_d, u3_disk_load_e lod_e) u3m_boot(log_u->dir_u->pax_c, (size_t)1 << u3_Host.ops_u.lom_y); // XX confirm - // blob refcounts (u3a_blob in blb_p) persist in the loom snapshot. - // on replay, LMDB blob-ref events will reconstruct log_w/les_w. + // blob refcounts ([log les bob] in blb_p) persist in the loom snapshot. + // on replay, LMDB blob-ref events will reconstruct log/les. // if ( log_u->dun_d < u3A->eve_d ) { diff --git a/pkg/vere/mars.c b/pkg/vere/mars.c index 47ee10ee76..b4a81cfdb7 100644 --- a/pkg/vere/mars.c +++ b/pkg/vere/mars.c @@ -116,45 +116,41 @@ _mars_blob_del(c3_h mug_h, c3_h seq_h) { u3_blob_wipe(u3C.dir_c, mug_h, seq_h); - c3_w bid = ((c3_w)mug_h << 32) | (c3_w)seq_h; - u3_weak bv = u3h_get(u3H->blb_p, bid); - - if ( u3_none != bv ) { - c3_w off_w = 0; - u3r_safe_word(bv, &off_w); - u3a_wfree((void*)u3a_into(off_w)); - u3h_del(u3H->blb_p, bid); - } -} - -/* _blob_lookup(): get u3a_blob* for a bid, or NULL. -*/ -static u3a_blob* -_blob_lookup(c3_h mug_h, c3_h seq_h) -{ - c3_w bid = ((c3_w)mug_h << 32) | (c3_w)seq_h; - u3_weak bv = u3h_get(u3H->blb_p, bid); - - if ( u3_none == bv ) return 0; - - c3_w off_w = 0; - u3r_safe_word(bv, &off_w); - return (u3a_blob*)u3a_into(off_w); + u3_noun bid = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_h); + u3h_del(u3H->blb_p, bid); + u3z(bid); } /* _blob_maybe_delete(): delete blob iff all refs are zero. ** -** Deletion condition: log_w == 0 && les_w == 0 && atm_w == 0 +** bob is a strong ref held by the cell. If use_w == 1, the cell +** is the ONLY holder — the kernel dropped its reference. Safe to +** delete. If use_w > 1, something else still references the atom. */ static void _blob_maybe_delete(c3_h mug_h, c3_h seq_h) { - u3a_blob* blb_u = _blob_lookup(mug_h, seq_h); - if ( !blb_u ) return; + u3_noun bid = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_h); + u3_weak bv = u3h_get(u3H->blb_p, bid); + u3z(bid); + + if ( u3_none == bv ) return; - if ( 0 != blb_u->log_w || 0 != blb_u->les_w || 0 != blb_u->atm_w ) return; + u3_noun log = u3h(bv); + u3_noun les = u3h(u3t(bv)); + u3_noun bob = u3t(u3t(bv)); - _mars_blob_del(mug_h, seq_h); + // bob == 0: no atom. bob != 0 && use_w == 1: cell is only holder. + // + c3_o bob_dead = ( 0 == bob ) ? c3y + : __( 1 == u3a_use(bob) ); + + c3_o dead_o = ( 0 == log && 0 == les && c3y == bob_dead ) ? c3y : c3n; + u3z(bv); + + if ( c3y == dead_o ) { + _mars_blob_del(mug_h, seq_h); + } } /* @@ -410,7 +406,7 @@ _mars_fact(u3_mars* mar_u, u3_noun pro) { // find all bob atoms in the committed event and - // increment their event-log refcount (u3a_blob.log_w). + // increment their event-log refcount (log in [log les bob]). // { struct { c3_d* ids; c3_z len; c3_z cap; } acc = {0, 0, 0}; @@ -420,11 +416,19 @@ _mars_fact(u3_mars* mar_u, c3_h mug_h = (c3_h)(acc.ids[i_z] >> 32); c3_h seq_h = (c3_h)(acc.ids[i_z] & 0xFFFFFFFF); - u3a_blob* blb_u = _blob_lookup(mug_h, seq_h); - if ( blb_u ) { - blb_u->log_w++; + u3_noun bid = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_h); + u3_weak bv = u3h_get(u3H->blb_p, bid); + + if ( u3_none != bv ) { + u3_noun log = u3h(bv); + u3_noun les = u3k(u3h(u3t(bv))); + u3_noun bob = u3k(u3t(u3t(bv))); + u3h_put(u3H->blb_p, bid, u3nt(log + 1, les, bob)); + u3z(bv); } + u3z(bid); + // TODO: write blob-ref log-inc event to LMDB (tag 0x02, op 0x03) } @@ -793,10 +797,24 @@ _mars_work(u3_mars* mar_u, u3_noun jar) _mars_pq_pop(&_mars_pq); { - u3a_blob* blb_u = _blob_lookup(top_u->mug_h, top_u->seq_h); - if ( blb_u && blb_u->les_w > 0 ) { - blb_u->les_w--; + u3_noun bid = u3i_chub(((c3_d)top_u->mug_h << 32) | (c3_d)top_u->seq_h); + u3_weak bv = u3h_get(u3H->blb_p, bid); + + if ( u3_none != bv ) { + u3_noun log = u3k(u3h(bv)); + u3_noun les = u3h(u3t(bv)); + u3_noun bob = u3k(u3t(u3t(bv))); + + if ( les > 0 ) { + u3h_put(u3H->blb_p, bid, u3nt(log, les - 1, bob)); + } + else { + u3h_put(u3H->blb_p, bid, u3nt(log, 0, bob)); + } + u3z(bv); } + + u3z(bid); } // TODO: write blob-ref lease-release event to LMDB (tag 0x02, op 0x02) @@ -983,24 +1001,25 @@ _mars_work(u3_mars* mar_u, u3_noun jar) ok_o = u3_blob_move_stg(u3C.dir_c, stg_c, &mug_h, &seq_h); if ( c3y == ok_o ) { - // create u3a_blob (if not present) with les_w = 1 (implicit - // first lease for king). push PQ entry for TTL expiry. + // create blb_p entry (if not present) and increment les. + // push PQ entry for TTL expiry. // { - u3a_blob* blb_u = _blob_lookup(mug_h, seq_h); - if ( !blb_u ) { - c3_w bid = ((c3_w)mug_h << 32) | (c3_w)seq_h; - c3_w* blb_w = u3a_walloc(c3_wiseof(u3a_blob)); - blb_u = (u3a_blob*)blb_w; - blb_u->log_w = 0; - blb_u->les_w = 0; - blb_u->mug_h = mug_h; - blb_u->seq_h = seq_h; - blb_u->siz_d = 0; - blb_u->atm_w = 0; - u3h_put(u3H->blb_p, bid, u3i_word(u3a_outa(blb_w))); + u3_noun bid = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_h); + u3_weak bv = u3h_get(u3H->blb_p, bid); + + if ( u3_none == bv ) { + u3h_put(u3H->blb_p, bid, u3nt(0, 1, 0)); + } + else { + u3_noun log = u3k(u3h(bv)); + u3_noun les = u3h(u3t(bv)); + u3_noun bob = u3k(u3t(u3t(bv))); + u3h_put(u3H->blb_p, bid, u3nt(log, les + 1, bob)); + u3z(bv); } - blb_u->les_w++; + + u3z(bid); } { @@ -1053,24 +1072,34 @@ _mars_work(u3_mars* mar_u, u3_noun jar) u3r_safe_half(mug_n, &mug_h); u3r_safe_half(seq_n, &seq_h); - u3a_blob* blb_u = _blob_lookup(mug_h, seq_h); - if ( blb_u ) { - blb_u->les_w++; + { + u3_noun bid = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_h); + u3_weak bv = u3h_get(u3H->blb_p, bid); - // push PQ entry for TTL failsafe (15 min) - // - u3v_lease* lea_u = c3_malloc(sizeof(*lea_u)); - lea_u->mug_h = mug_h; - lea_u->seq_h = seq_h; - lea_u->dead_o = c3n; - { - struct timeval tv_u; - gettimeofday(&tv_u, 0); - lea_u->exp_d = (c3_d)tv_u.tv_sec * 1000ULL - + (c3_d)tv_u.tv_usec / 1000ULL - + 900000ULL; + if ( u3_none != bv ) { + u3_noun log = u3k(u3h(bv)); + u3_noun les = u3h(u3t(bv)); + u3_noun bob = u3k(u3t(u3t(bv))); + u3h_put(u3H->blb_p, bid, u3nt(log, les + 1, bob)); + u3z(bv); + + // push PQ entry for TTL failsafe (15 min) + // + u3v_lease* lea_u = c3_malloc(sizeof(*lea_u)); + lea_u->mug_h = mug_h; + lea_u->seq_h = seq_h; + lea_u->dead_o = c3n; + { + struct timeval tv_u; + gettimeofday(&tv_u, 0); + lea_u->exp_d = (c3_d)tv_u.tv_sec * 1000ULL + + (c3_d)tv_u.tv_usec / 1000ULL + + 900000ULL; + } + _mars_pq_push(&_mars_pq, lea_u); } - _mars_pq_push(&_mars_pq, lea_u); + + u3z(bid); } u3z(jar); @@ -1089,17 +1118,33 @@ _mars_work(u3_mars* mar_u, u3_noun jar) u3r_safe_half(mug_n, &mug_h); u3r_safe_half(seq_n, &seq_h); - u3a_blob* blb_u = _blob_lookup(mug_h, seq_h); - if ( blb_u && blb_u->les_w > 0 ) { - blb_u->les_w--; + { + u3_noun bid = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_h); + u3_weak bv = u3h_get(u3H->blb_p, bid); + + if ( u3_none != bv ) { + u3_noun log = u3k(u3h(bv)); + u3_noun les = u3h(u3t(bv)); + u3_noun bob = u3k(u3t(u3t(bv))); + + if ( les > 0 ) { + u3h_put(u3H->blb_p, bid, u3nt(log, les - 1, bob)); + } + else { + u3h_put(u3H->blb_p, bid, u3nt(log, 0, bob)); + } + u3z(bv); + } + + u3z(bid); } // mark the corresponding PQ entry dead (if findable) // expiry sweeper will skip it // - // TODO: for now we rely on les_w accounting. PQ entries + // TODO: for now we rely on les accounting. PQ entries // with dead_o=c3n that outlive their lease's release will - // try to decrement les_w again on expiry, but les_w is + // try to decrement les again on expiry, but les is // already 0 → clamped by the >0 check → safe. // @@ -1378,8 +1423,8 @@ _mars_poke_play(u3_mars* mar_u, const u3_fact* tac_u) u3A->roc = u3k(cor); u3A->eve_d++; - // increment log_w for any bob atoms in the replayed event. - // snapshot has log_w correct up to snapshot time; replay covers + // increment log for any bob atoms in the replayed event. + // snapshot has log correct up to snapshot time; replay covers // the gap from snapshot to head. // { @@ -1389,10 +1434,19 @@ _mars_poke_play(u3_mars* mar_u, const u3_fact* tac_u) for ( c3_z i_z = 0; i_z < acc.len; i_z++ ) { c3_h mug_h = (c3_h)(acc.ids[i_z] >> 32); c3_h seq_h = (c3_h)(acc.ids[i_z] & 0xFFFFFFFF); - u3a_blob* blb_u = _blob_lookup(mug_h, seq_h); - if ( blb_u ) { - blb_u->log_w++; + + u3_noun bid = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_h); + u3_weak bv = u3h_get(u3H->blb_p, bid); + + if ( u3_none != bv ) { + u3_noun log = u3h(bv); + u3_noun les = u3k(u3h(u3t(bv))); + u3_noun bob = u3k(u3t(u3t(bv))); + u3h_put(u3H->blb_p, bid, u3nt(log + 1, les, bob)); + u3z(bv); } + + u3z(bid); } c3_free(acc.ids); } From 5799d2ad0fffdfec3d9e53ef974077859f93ba07 Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Wed, 29 Apr 2026 17:09:34 -0500 Subject: [PATCH 24/31] wip: blob gc working in both 32 and 64 bit modes --- pkg/noun/allocate.h | 2 - pkg/noun/imprison.c | 6 +- pkg/vere/blob.c | 10 +-- pkg/vere/db/lmdb.c | 184 +++++++++++++++++++++++++++++++++++++++++++- pkg/vere/db/lmdb.h | 25 ++++++ pkg/vere/disk.c | 94 ++++++++-------------- pkg/vere/mars.c | 88 ++++++++++----------- 7 files changed, 289 insertions(+), 120 deletions(-) diff --git a/pkg/noun/allocate.h b/pkg/noun/allocate.h index 54f15bb485..2b60216cf1 100644 --- a/pkg/noun/allocate.h +++ b/pkg/noun/allocate.h @@ -971,8 +971,6 @@ u3a_dash(void); void (*pat_f)(u3_atom, void*), c3_o (*cel_f)(u3_noun, void*)); - - /* u3a_string(): `a` as an on-loom c-string. */ c3_c* diff --git a/pkg/noun/imprison.c b/pkg/noun/imprison.c index 40f0b477bb..fd2d4b1aea 100644 --- a/pkg/noun/imprison.c +++ b/pkg/noun/imprison.c @@ -836,9 +836,9 @@ u3i_vmolt(u3_noun som, u3i_molt_pair pairs[], c3_z len_z) /* u3i_blob(): construct or intern a bob atom (blob reference). ** -** If a blb_p entry exists with a live bob atom (bob != 0), -** returns the existing atom. Otherwise allocates a fresh bob atom -** and stores it in the blb_p entry (if one exists). +** If a blb_p entry exists with a live bob atom (bob > 0), +** returns the existing atom. Otherwise, it allocates a fresh bob +** atom and stores it in the blb_p entry (if one exists). */ u3_atom u3i_blob(c3_h mug_h, c3_h seq_h) diff --git a/pkg/vere/blob.c b/pkg/vere/blob.c index dbba934b87..bb55b44d19 100644 --- a/pkg/vere/blob.c +++ b/pkg/vere/blob.c @@ -266,13 +266,13 @@ _blob_mug(const c3_y* dat_y, c3_d len_d) // for files larger than 4 GiB, also fold in the high length bits // and hash the last 4 GiB window for tail sensitivity // - c3_h hi_h = (c3_h)(len_d >> 32); - if ( hi_h ) { - mug_h = u3r_mug_both(mug_h, hi_h); + c3_h hig_h = (c3_h)(len_d >> 32); + if ( hig_h ) { + mug_h = u3r_mug_both(mug_h, hig_h); // hash the final window (last min(len_d, 0xFFFFFFFF) bytes) c3_d off_d = len_d > 0xFFFFFFFFULL ? len_d - 0xFFFFFFFFULL : 0; - c3_h tail_h = u3r_mug_bytes(dat_y + off_d, (c3_h)(len_d - off_d)); - mug_h = u3r_mug_both(mug_h, tail_h); + c3_h tel_h = u3r_mug_bytes(dat_y + off_d, (c3_h)(len_d - off_d)); + mug_h = u3r_mug_both(mug_h, tel_h); } return mug_h; } diff --git a/pkg/vere/db/lmdb.c b/pkg/vere/db/lmdb.c index 6827137f72..b9f50d89b7 100644 --- a/pkg/vere/db/lmdb.c +++ b/pkg/vere/db/lmdb.c @@ -48,9 +48,9 @@ u3_lmdb_init(const c3_c* pax_c, size_t siz_i) return 0; } - // Our databases have two tables: META and EVENTS + // Our databases have three tables: META, EVENTS, and BLOBS // - if ( (ret_h = mdb_env_set_maxdbs(env_u, 2)) ) { + if ( (ret_h = mdb_env_set_maxdbs(env_u, 3)) ) { mdb_logerror(stderr, ret_h, "lmdb: failed to set number of databases"); // XX dispose env_u // @@ -614,6 +614,186 @@ u3_lmdb_walk_done(u3_lmdb_walk* itr_u) mdb_txn_abort(itr_u->txn_u); } +/* u3_lmdb_save_blobs(): save blob IDs for event [eve_d] into BLOBS table. +*/ +c3_o +u3_lmdb_save_blobs(MDB_env* env_u, + c3_d eve_d, + c3_d* ids_d, + c3_z len_z) +{ + MDB_txn* txn_u; + MDB_dbi mdb_u; + c3_h ret_h; + + if ( !len_z ) { + return c3y; + } + + // create a write transaction + // + if ( (ret_h = mdb_txn_begin(env_u, 0, 0, &txn_u)) ) { + mdb_logerror(stderr, ret_h, "lmdb: blobs write: txn_begin fail"); + return c3n; + } + + // open the BLOBS database + // + { + c3_h ops_h = MDB_CREATE | MDB_INTEGERKEY; + + if ( (ret_h = mdb_dbi_open(txn_u, "BLOBS", ops_h, &mdb_u)) ) { + mdb_logerror(stderr, ret_h, "lmdb: blobs write: dbi_open fail"); + mdb_txn_abort(txn_u); + return c3n; + } + } + + // write packed array of blob IDs keyed by event number + // + { + MDB_val key_u = { .mv_size = sizeof(c3_d), .mv_data = &eve_d }; + MDB_val val_u = { .mv_size = len_z * sizeof(c3_d), .mv_data = ids_d }; + + if ( (ret_h = mdb_put(txn_u, mdb_u, &key_u, &val_u, 0)) ) { + mdb_logerror(stderr, ret_h, "lmdb: blobs write: put fail"); + mdb_txn_abort(txn_u); + return c3n; + } + } + + // commit transaction + // + if ( (ret_h = mdb_txn_commit(txn_u)) ) { + mdb_logerror(stderr, ret_h, "lmdb: blobs write: commit fail"); + return c3n; + } + + return c3y; +} + +/* u3_lmdb_read_blobs(): read blob IDs for event [eve_d] from BLOBS table. +** +** on success, sets [out_d] to a malloc'd array and [out_z] to its length. +** caller must c3_free(*out_d). returns c3n if no entry exists. +*/ +c3_o +u3_lmdb_read_blobs(MDB_env* env_u, + c3_d eve_d, + c3_d** out_d, + c3_z* out_z) +{ + MDB_txn* txn_u; + MDB_dbi mdb_u; + c3_h ret_h; + + if ( (ret_h = mdb_txn_begin(env_u, 0, MDB_RDONLY, &txn_u)) ) { + mdb_logerror(stderr, ret_h, "lmdb: blobs read: txn_begin fail"); + return c3n; + } + + // open the BLOBS database (read-only; may not exist yet) + // + { + c3_h ops_h = MDB_INTEGERKEY; + + if ( (ret_h = mdb_dbi_open(txn_u, "BLOBS", ops_h, &mdb_u)) ) { + // database doesn't exist yet -- not an error + // + mdb_txn_abort(txn_u); + *out_d = 0; + *out_z = 0; + return c3n; + } + } + + // look up by event number + // + { + MDB_val key_u = { .mv_size = sizeof(c3_d), .mv_data = &eve_d }; + MDB_val val_u; + + if ( (ret_h = mdb_get(txn_u, mdb_u, &key_u, &val_u)) ) { + mdb_txn_abort(txn_u); + *out_d = 0; + *out_z = 0; + return c3n; + } + + *out_z = val_u.mv_size / sizeof(c3_d); + *out_d = c3_malloc(val_u.mv_size); + memcpy(*out_d, val_u.mv_data, val_u.mv_size); + } + + mdb_txn_abort(txn_u); + return c3y; +} + +/* u3_lmdb_walk_blobs(): iterate BLOBS table for events in [lo_d, hi_d]. +** +** calls [fun_f] for each event that has blob refs. +*/ +void +u3_lmdb_walk_blobs(MDB_env* env_u, + c3_d lo_d, + c3_d hi_d, + void* ptr_v, + void (*fun_f)(void*, c3_d, c3_d*, c3_z)) +{ + MDB_txn* txn_u; + MDB_dbi mdb_u; + c3_h ret_h; + + if ( (ret_h = mdb_txn_begin(env_u, 0, MDB_RDONLY, &txn_u)) ) { + mdb_logerror(stderr, ret_h, "lmdb: blobs walk: txn_begin fail"); + return; + } + + { + c3_h ops_h = MDB_INTEGERKEY; + + if ( (ret_h = mdb_dbi_open(txn_u, "BLOBS", ops_h, &mdb_u)) ) { + // database doesn't exist yet -- nothing to walk + // + mdb_txn_abort(txn_u); + return; + } + } + + { + MDB_cursor* cur_u; + MDB_val key_u = { .mv_size = sizeof(c3_d), .mv_data = &lo_d }; + MDB_val val_u; + + if ( (ret_h = mdb_cursor_open(txn_u, mdb_u, &cur_u)) ) { + mdb_logerror(stderr, ret_h, "lmdb: blobs walk: cursor_open fail"); + mdb_txn_abort(txn_u); + return; + } + + // position at first key >= lo_d + // + ret_h = mdb_cursor_get(cur_u, &key_u, &val_u, MDB_SET_RANGE); + + while ( !ret_h ) { + c3_d eve_d = c3_sift_chub(key_u.mv_data); + + if ( eve_d > hi_d ) { + break; + } + + c3_z len_z = val_u.mv_size / sizeof(c3_d); + fun_f(ptr_v, eve_d, (c3_d*)val_u.mv_data, len_z); + + ret_h = mdb_cursor_get(cur_u, &key_u, &val_u, MDB_NEXT); + } + + mdb_cursor_close(cur_u); + } + + mdb_txn_abort(txn_u); +} + /* mdb_logerror(): writes an error message and lmdb error code to f. */ void mdb_logerror(FILE* f, int err, const char* fmt, ...) diff --git a/pkg/vere/db/lmdb.h b/pkg/vere/db/lmdb.h index e630cf4dd2..34e9252955 100644 --- a/pkg/vere/db/lmdb.h +++ b/pkg/vere/db/lmdb.h @@ -91,4 +91,29 @@ void u3_lmdb_walk_done(u3_lmdb_walk* itr_u); + /* u3_lmdb_save_blobs(): save blob IDs for an event into BLOBS table. + */ + c3_o + u3_lmdb_save_blobs(MDB_env* env_u, + c3_d eve_d, + c3_d* ids_d, + c3_z len_z); + + /* u3_lmdb_read_blobs(): read blob IDs for an event from BLOBS table. + */ + c3_o + u3_lmdb_read_blobs(MDB_env* env_u, + c3_d eve_d, + c3_d** out_d, + c3_z* out_z); + + /* u3_lmdb_walk_blobs(): iterate BLOBS table for events in a range. + */ + void + u3_lmdb_walk_blobs(MDB_env* env_u, + c3_d lo_d, + c3_d hi_d, + void* ptr_v, + void (*fun_f)(void*, c3_d, c3_d*, c3_z)); + #endif /* ifndef U3_VERE_DB_LMDB_H */ diff --git a/pkg/vere/disk.c b/pkg/vere/disk.c index 22c3bce5f8..9815627be7 100644 --- a/pkg/vere/disk.c +++ b/pkg/vere/disk.c @@ -1220,30 +1220,6 @@ _disk_epoc_roll(u3_disk* log_u, c3_d epo_d) return c3n; } -/* _disk_chop_bob_atom(): u3a_walk_fore atom callback — collect bob bids. -*/ -static void -_disk_chop_bob_atom(u3_atom a, void* ptr_v) -{ - if ( c3y != u3a_is_bob(a) ) return; - struct { c3_d* ids; c3_z len; c3_z cap; } *acc = ptr_v; - if ( acc->len == acc->cap ) { - acc->cap = acc->cap ? acc->cap * 2 : 8; - acc->ids = c3_realloc(acc->ids, acc->cap * sizeof(c3_d)); - } - acc->ids[acc->len++] = - ((c3_d)u3a_bob_mug(a) << 32) | (c3_d)u3a_bob_seq(a); -} - -/* _disk_chop_bob_cell(): u3a_walk_fore cell callback — always descend. -*/ -static c3_o -_disk_chop_bob_cell(u3_noun n, void* ptr_v) -{ - (void)n; (void)ptr_v; - return c3y; -} - /* _disk_epoc_kill: delete an epoch. */ static c3_o @@ -1586,7 +1562,7 @@ _disk_chop_delete_cb(u3_noun kev, void* ptr_v) fprintf(stderr, "chop: blob [%x/%u] log=%u les=%u bob=%s\r\n", (unsigned)mug_h, (unsigned)seq_h, (unsigned)log, (unsigned)les, - (c3y == ded_o) ? "dead" : "live"); + (c3y == ded_o) ? "ded" : "liv"); if ( 0 == log && 0 == les && c3y == ded_o ) { u3_blob_wipe(del_u->pax_c, mug_h, seq_h); @@ -1601,10 +1577,35 @@ _disk_chop_delete_cb(u3_noun kev, void* ptr_v) } } +/* _disk_chop_blobs_cb(): u3_lmdb_walk_blobs callback — increment log for +** each blob ID referenced by an event. +*/ +static void +_disk_chop_blobs_cb(void* ptr_v, c3_d eve_d, c3_d* ids_d, c3_z len_z) +{ + (void)ptr_v; + (void)eve_d; + + for ( c3_z i_z = 0; i_z < len_z; i_z++ ) { + u3_noun bid = u3i_chub(ids_d[i_z]); + u3_weak bv = u3h_get(u3H->blb_p, bid); + + if ( u3_none != bv ) { + u3_noun log = u3h(bv); + u3_noun les = u3k(u3h(u3t(bv))); + u3_noun bob = u3k(u3t(u3t(bv))); + u3h_put(u3H->blb_p, bid, u3nt(log + 1, les, bob)); + u3z(bv); + } + + u3z(bid); + } +} + /* _disk_chop_rebuild_log_w(): rebuild blob log after epoch deletion. ** ** 1. zero all log and les via collect-then-modify pattern -** 2. scan remaining LMDB events, increment log for each bob atom +** 2. scan BLOBS table for remaining events, increment log for each ref ** 3. delete blob files whose total refcount is now zero */ static void @@ -1631,49 +1632,14 @@ _disk_chop_rebuild_log_w(u3_disk* log_u) c3_free(col_u.bid_d); } - // step 2: scan remaining events for bob atoms + // step 2: scan BLOBS table for remaining events, rebuild log // c3_d lo_d = 0, hi_d = 0; u3_lmdb_gulf(log_u->mdb_u, &lo_d, &hi_d); if ( lo_d && hi_d >= lo_d ) { - u3_lmdb_walk itr_u; - if ( c3y == u3_lmdb_walk_init(log_u->mdb_u, &itr_u, lo_d, hi_d) ) { - while ( itr_u.nex_d <= itr_u.las_d ) { - size_t len_i; - void* buf_v; - - if ( c3n == u3_lmdb_walk_next(&itr_u, &len_i, &buf_v) ) break; - if ( len_i <= 4 ) continue; - - c3_y* pay_y = (c3_y*)buf_v + 4; - c3_d pay_d = len_i - 4; - u3_weak job = u3s_tap_xeno(pay_d, pay_y); - if ( u3_none == job ) continue; // jam event — no bob atoms - - struct { c3_d* ids; c3_z len; c3_z cap; } acc = {0, 0, 0}; - u3a_walk_fore(job, &acc, _disk_chop_bob_atom, _disk_chop_bob_cell); - - for ( c3_z i = 0; i < acc.len; i++ ) { - u3_noun bid = u3i_chub(acc.ids[i]); - u3_weak bv = u3h_get(u3H->blb_p, bid); - - if ( u3_none != bv ) { - u3_noun log = u3h(bv); - u3_noun les = u3k(u3h(u3t(bv))); - u3_noun bob = u3k(u3t(u3t(bv))); - u3h_put(u3H->blb_p, bid, u3nt(log + 1, les, bob)); - u3z(bv); - } - - u3z(bid); - } - - c3_free(acc.ids); - u3z(job); - } - u3_lmdb_walk_done(&itr_u); - } + u3_lmdb_walk_blobs(log_u->mdb_u, lo_d, hi_d, 0, + _disk_chop_blobs_cb); } // step 3: delete unreferenced blobs and clean up blb_p diff --git a/pkg/vere/mars.c b/pkg/vere/mars.c index b4a81cfdb7..707b964415 100644 --- a/pkg/vere/mars.c +++ b/pkg/vere/mars.c @@ -18,16 +18,16 @@ /* u3v_lease: PQ entry for lease TTL expiry. ** -** Tracks a single les_w increment for a blob. If the king +** tracks a single les_w increment for a blob. if the king ** releases the lease (via %blob-release IPC) before expiry, -** dead_o is set to c3y and the PQ sweeper skips the decrement. -** If the king crashes, the TTL fires and les_w is decremented. +** ded_o is set to c3y and the PQ sweeper skips the decrement. +** if the king crashes, the TTL fires and les_w is decremented. */ typedef struct _u3v_lease { c3_d exp_d; // expiry time (Unix ms) c3_h mug_h; // blob mug c3_h seq_h; // blob seq within mug bucket - c3_o dead_o; // c3y if lease already released + c3_o ded_o; // c3y if lease already released } u3v_lease; c3_c tac_c[256]; // tracing label @@ -36,7 +36,7 @@ c3_c tac_c[256]; // tracing label ** ** C-heap structure (not in loom). Leases are owned by the PQ — ** it is the sole place that c3_free()s them. Committed leases are -** marked dead_o=c3y when their lease is released; the sweeper pops +** marked ded_o=c3y when their lease is released; the sweeper pops ** and frees them when they bubble to the top. */ typedef struct _mars_lease_pq { @@ -123,9 +123,9 @@ _mars_blob_del(c3_h mug_h, c3_h seq_h) /* _blob_maybe_delete(): delete blob iff all refs are zero. ** -** bob is a strong ref held by the cell. If use_w == 1, the cell -** is the ONLY holder — the kernel dropped its reference. Safe to -** delete. If use_w > 1, something else still references the atom. +** bob is a strong ref held by the cell. if use_w == 1, the cell +** is the ONLY holder and the kernel lost its references: safe to +** delete. if use_w > 1, the arvo still references the atom. */ static void _blob_maybe_delete(c3_h mug_h, c3_h seq_h) @@ -140,15 +140,14 @@ _blob_maybe_delete(c3_h mug_h, c3_h seq_h) u3_noun les = u3h(u3t(bv)); u3_noun bob = u3t(u3t(bv)); - // bob == 0: no atom. bob != 0 && use_w == 1: cell is only holder. + // bob == 0: no atom. bob != 0 && use_w == 1: cell is only holder. // - c3_o bob_dead = ( 0 == bob ) ? c3y - : __( 1 == u3a_use(bob) ); + c3_o bob_o = ( 0 == bob ) ? c3y : __( 1 == u3a_use(bob) ); - c3_o dead_o = ( 0 == log && 0 == les && c3y == bob_dead ) ? c3y : c3n; + c3_o ded_o = ( 0 == log && 0 == les && c3y == bob_o ) ? c3y : c3n; u3z(bv); - if ( c3y == dead_o ) { + if ( c3y == ded_o ) { _mars_blob_del(mug_h, seq_h); } } @@ -428,8 +427,15 @@ _mars_fact(u3_mars* mar_u, } u3z(bid); + } - // TODO: write blob-ref log-inc event to LMDB (tag 0x02, op 0x03) + // persist blob refs to LMDB for this event + // + if ( acc.len ) { + u3_lmdb_save_blobs(mar_u->log_u->mdb_u, + mar_u->dun_d, + acc.ids, + acc.len); } c3_free(acc.ids); @@ -766,7 +772,7 @@ _mars_work(u3_mars* mar_u, u3_noun jar) // Uses a min-heap PQ keyed by exp_d: peek at the root, stop once the // earliest-expiring lease is still in the future. // - // Released leases are marked dead_o=c3y by %blob-release and + // Released leases are marked ded_o=c3y by %blob-release and // left in the PQ; they are freed here when they bubble to the top. // { @@ -780,7 +786,7 @@ _mars_work(u3_mars* mar_u, u3_noun jar) // dead lease (already released) — free and continue scanning // - if ( c3y == top_u->dead_o ) { + if ( c3y == top_u->ded_o ) { _mars_pq_pop(&_mars_pq); c3_free(top_u); continue; @@ -817,7 +823,6 @@ _mars_work(u3_mars* mar_u, u3_noun jar) u3z(bid); } - // TODO: write blob-ref lease-release event to LMDB (tag 0x02, op 0x02) _blob_maybe_delete(top_u->mug_h, top_u->seq_h); c3_free(top_u); @@ -1026,7 +1031,7 @@ _mars_work(u3_mars* mar_u, u3_noun jar) u3v_lease* lea_u = c3_malloc(sizeof(*lea_u)); lea_u->mug_h = mug_h; lea_u->seq_h = seq_h; - lea_u->dead_o = c3n; + lea_u->ded_o = c3n; { struct timeval tv_u; gettimeofday(&tv_u, 0); @@ -1037,8 +1042,6 @@ _mars_work(u3_mars* mar_u, u3_noun jar) _mars_pq_push(&_mars_pq, lea_u); } - // TODO: write blob-ref lease-issue event to LMDB (tag 0x02, op 0x01) - // save blob bank to snapshot so entries survive crash // mar_u->fag_w |= _mars_fag_mute; @@ -1088,7 +1091,7 @@ _mars_work(u3_mars* mar_u, u3_noun jar) u3v_lease* lea_u = c3_malloc(sizeof(*lea_u)); lea_u->mug_h = mug_h; lea_u->seq_h = seq_h; - lea_u->dead_o = c3n; + lea_u->ded_o = c3n; { struct timeval tv_u; gettimeofday(&tv_u, 0); @@ -1142,12 +1145,6 @@ _mars_work(u3_mars* mar_u, u3_noun jar) // mark the corresponding PQ entry dead (if findable) // expiry sweeper will skip it // - // TODO: for now we rely on les accounting. PQ entries - // with dead_o=c3n that outlive their lease's release will - // try to decrement les again on expiry, but les is - // already 0 → clamped by the >0 check → safe. - // - _blob_maybe_delete(mug_h, seq_h); u3z(jar); @@ -1428,27 +1425,30 @@ _mars_poke_play(u3_mars* mar_u, const u3_fact* tac_u) // the gap from snapshot to head. // { - struct { c3_d* ids; c3_z len; c3_z cap; } acc = {0, 0, 0}; - u3a_walk_fore(tac_u->job, &acc, _mars_blob_bobs_atom, _mars_blob_bobs_cell); - - for ( c3_z i_z = 0; i_z < acc.len; i_z++ ) { - c3_h mug_h = (c3_h)(acc.ids[i_z] >> 32); - c3_h seq_h = (c3_h)(acc.ids[i_z] & 0xFFFFFFFF); + c3_d* ids_d = 0; + c3_z len_z = 0; - u3_noun bid = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_h); - u3_weak bv = u3h_get(u3H->blb_p, bid); + if ( c3y == u3_lmdb_read_blobs(mar_u->log_u->mdb_u, + tac_u->eve_d, + &ids_d, + &len_z) ) + { + for ( c3_z i_z = 0; i_z < len_z; i_z++ ) { + u3_noun bid = u3i_chub(ids_d[i_z]); + u3_weak bv = u3h_get(u3H->blb_p, bid); + + if ( u3_none != bv ) { + u3_noun log = u3h(bv); + u3_noun les = u3k(u3h(u3t(bv))); + u3_noun bob = u3k(u3t(u3t(bv))); + u3h_put(u3H->blb_p, bid, u3nt(log + 1, les, bob)); + u3z(bv); + } - if ( u3_none != bv ) { - u3_noun log = u3h(bv); - u3_noun les = u3k(u3h(u3t(bv))); - u3_noun bob = u3k(u3t(u3t(bv))); - u3h_put(u3H->blb_p, bid, u3nt(log + 1, les, bob)); - u3z(bv); + u3z(bid); } - - u3z(bid); + c3_free(ids_d); } - c3_free(acc.ids); } } From 36ee292bf204bf5342b9fd856559dd9af440409f Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Thu, 30 Apr 2026 08:33:06 -0500 Subject: [PATCH 25/31] wip: cleanup and comment --- pkg/noun/allocate.c | 13 ++++++++----- pkg/noun/allocate.h | 3 ++- pkg/noun/vortex.h | 6 +++++- pkg/vere/disk.c | 5 ----- pkg/vere/mars.c | 23 +++++++++++++++++++++++ 5 files changed, 38 insertions(+), 12 deletions(-) diff --git a/pkg/noun/allocate.c b/pkg/noun/allocate.c index b5cec76bff..a87033b5c2 100644 --- a/pkg/noun/allocate.c +++ b/pkg/noun/allocate.c @@ -847,11 +847,14 @@ _me_gain_south(u3_noun dog) /* _me_bob_dead(): handle a bob atom whose loom refcount just hit zero. ** -** The blb_p cell holds a strong ref to the bob atom, so this only -** fires AFTER _mars_blob_del already removed the entry (u3h_del → -** u3z(cell) → u3z(bob) → here). Nothing left to do for tracked -** blobs. For untracked bob atoms (no blb_p entry — e.g., temporary -** atoms from cueing), notify the king to release its lease. +** Called during GC when a bob-flagged atom's use_w reaches zero. +** For tracked blobs (with a blb_p entry), this fires only after +** _mars_blob_del already removed the entry (u3h_del -> u3z(cell) +** -> u3z(bob) -> here), so there is nothing left to do. +** +** For untracked bob atoms on the king side (e.g., transient atoms +** from cueing that were never installed in blb_p), this notifies +** the king to release its IPC lease. */ static void _me_bob_dead(u3a_atom* atm_u) diff --git a/pkg/noun/allocate.h b/pkg/noun/allocate.h index 2b60216cf1..7ef09221e6 100644 --- a/pkg/noun/allocate.h +++ b/pkg/noun/allocate.h @@ -342,7 +342,8 @@ STATIC_ASSERT( u3a_vits <= u3a_min_log, # define u3a_is_cell(som) u3a_is_pom(som) /* u3a_blob_flag: MSB of u3a_atom.len_w marks an indirect atom as a bob - ** (blob reference). The remaining bits hold the actual data word count. + ** (blob reference backed by an on-disk file rather than loom data). + ** The remaining bits hold the actual data word count. ** In VERE64, len_w is uint64_t so we use bit 63; in 32-bit we use bit 31. */ # ifdef VERE64 diff --git a/pkg/noun/vortex.h b/pkg/noun/vortex.h index 2ffd0d8d2b..942f30de2a 100644 --- a/pkg/noun/vortex.h +++ b/pkg/noun/vortex.h @@ -22,7 +22,11 @@ ** NB: version must be first for ease of migration. ** ** blb_p is the blob bank HAMT (bid -> [log les bob]), checkpointed - ** in image.bin. A blob file is deleted when value is [0 0 0]. + ** in image.bin. Each value is a cell [log les bob] where: + ** log = event-log refcount + ** les = lease refcount (transient, zeroed on restart) + ** bob = 0 or the interned bob atom + ** A blob file is deleted when log == 0, les == 0, and bob is dead. */ typedef struct _u3v_home { u3v_version ver_d; // version number diff --git a/pkg/vere/disk.c b/pkg/vere/disk.c index 9815627be7..f54b731aaa 100644 --- a/pkg/vere/disk.c +++ b/pkg/vere/disk.c @@ -1559,11 +1559,6 @@ _disk_chop_delete_cb(u3_noun kev, void* ptr_v) // c3_o ded_o = ( 0 == bob ) ? c3y : __( 1 == u3a_use(bob) ); - fprintf(stderr, "chop: blob [%x/%u] log=%u les=%u bob=%s\r\n", - (unsigned)mug_h, (unsigned)seq_h, - (unsigned)log, (unsigned)les, - (c3y == ded_o) ? "ded" : "liv"); - if ( 0 == log && 0 == les && c3y == ded_o ) { u3_blob_wipe(del_u->pax_c, mug_h, seq_h); diff --git a/pkg/vere/mars.c b/pkg/vere/mars.c index 707b964415..90b379362d 100644 --- a/pkg/vere/mars.c +++ b/pkg/vere/mars.c @@ -16,6 +16,29 @@ #include #include +/* Blob storage lifecycle +** +** blb_p HAMT: bid -> [log les bob] +** bid = (mug_h << 32) | seq_h (direct atom on VERE64) +** log = event-log refcount (incremented on commit, rebuilt on chop) +** les = lease refcount (transient IPC state, zeroed on restart) +** bob = 0 or the interned bob atom +** +** Blob files live at $pier/.urb/bob//. +** +** Deletion condition: log == 0 && les == 0 && use_w(bob) <= 1 +** (use_w == 1 means only the cell holds the atom; kernel dropped it) +** +** Lifecycle: +** 1. King detects large file, saves to .urb/bob/stg/, sends %blob IPC +** 2. Serf installs blob: moves staging file, creates blb_p entry [0 1 0] +** 3. King sends poke event with bob atom (RAM-serialized with BOB tag) +** 4. Serf commits: _mars_fact increments log, writes blob-ref to LMDB +** 5. File deletion (|rm): kernel drops blob ref, les expires/released +** 6. Epoch deletion (chop): rebuilds log from LMDB BLOBS table +** 7. When log == 0 && les == 0 && use_w(bob) <= 1: delete file + entry +*/ + /* u3v_lease: PQ entry for lease TTL expiry. ** ** tracks a single les_w increment for a blob. if the king From da4181713c0c57ea43408b8cfeab675e3282c36b Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Thu, 30 Apr 2026 11:16:57 -0500 Subject: [PATCH 26/31] wip: cleans up includes --- pkg/c3/platform/windows/mman.h | 12 ++++++++++++ pkg/noun/retrieve.c | 2 -- pkg/vere/blob.c | 2 -- pkg/vere/blob_tests.c | 2 -- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/pkg/c3/platform/windows/mman.h b/pkg/c3/platform/windows/mman.h index 965fecfd8c..abb43675d0 100644 --- a/pkg/c3/platform/windows/mman.h +++ b/pkg/c3/platform/windows/mman.h @@ -23,4 +23,16 @@ int mprotect(void *addr, size_t len, int prot); #define MS_SYNC 0 /* Synchronous memory sync. */ #define MS_INVALIDATE 2 /* Invalidate the caches. */ +/* madvise: no-op on Windows. Advisory only — safe to skip. +*/ +#define MADV_NORMAL 0 +#define MADV_SEQUENTIAL 2 +#define MADV_DONTNEED 4 + +static inline int madvise(void* addr, size_t len, int advice) +{ + (void)addr; (void)len; (void)advice; + return 0; +} + #endif//_SYS_MMAN_H diff --git a/pkg/noun/retrieve.c b/pkg/noun/retrieve.c index 1b69ab1bb3..cc04bf6f99 100644 --- a/pkg/noun/retrieve.c +++ b/pkg/noun/retrieve.c @@ -15,9 +15,7 @@ #include #include #include -#include #include -#include // declarations of inline functions diff --git a/pkg/vere/blob.c b/pkg/vere/blob.c index bb55b44d19..229f3f0c30 100644 --- a/pkg/vere/blob.c +++ b/pkg/vere/blob.c @@ -8,9 +8,7 @@ #include #include #include -#include #include -#include // maximum bytes per single read()/write() call. // POSIX allows read()/write() to return EINVAL if count > SSIZE_MAX; diff --git a/pkg/vere/blob_tests.c b/pkg/vere/blob_tests.c index 911f144e9e..edf8cd1795 100644 --- a/pkg/vere/blob_tests.c +++ b/pkg/vere/blob_tests.c @@ -9,9 +9,7 @@ #include #include #include -#include #include -#include /* Tests for pkg/vere/blob.c — the content-addressed blob store. ** From 074e1560acf7876bfc61ab1e9bbf3c9dc96f12fd Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Fri, 1 May 2026 08:19:47 -0500 Subject: [PATCH 27/31] wip: fixes blobs for windows --- pkg/vere/blob.c | 45 ++++++++++++++++++++++++++++++--------------- pkg/vere/io/unix.c | 2 +- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/pkg/vere/blob.c b/pkg/vere/blob.c index 229f3f0c30..71d622a957 100644 --- a/pkg/vere/blob.c +++ b/pkg/vere/blob.c @@ -142,19 +142,34 @@ _blob_lock_acquire(const c3_c* pax_c, c3_h mug_h) return 0; } - // exclusive advisory lock - struct flock flk_u = { - .l_type = F_WRLCK, - .l_whence = SEEK_SET, - .l_start = 0, - .l_len = 0, - }; - if ( -1 == fcntl(lok_i, F_SETLKW, &flk_u) ) { - fprintf(stderr, "blob: failed to lock %s: %s\r\n", - lck_c, strerror(errno)); - close(lok_i); - return 0; + // exclusive advisory lock (blocking) + // +#ifdef U3_OS_windows + { + HANDLE han_u = (HANDLE)_get_osfhandle(lok_i); + OVERLAPPED olp_u = {0}; + if ( !LockFileEx(han_u, LOCKFILE_EXCLUSIVE_LOCK, 0, MAXDWORD, MAXDWORD, &olp_u) ) { + fprintf(stderr, "blob: failed to lock %s\r\n", lck_c); + close(lok_i); + return 0; + } + } +#else + { + struct flock flk_u = { + .l_type = F_WRLCK, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 0, + }; + if ( -1 == fcntl(lok_i, F_SETLKW, &flk_u) ) { + fprintf(stderr, "blob: failed to lock %s: %s\r\n", + lck_c, strerror(errno)); + close(lok_i); + return 0; + } } +#endif // read current next-seq (0 means empty/new file) c3_c buf_c[32] = {0}; @@ -188,7 +203,7 @@ _blob_lock_acquire(const c3_c* pax_c, c3_h mug_h) } // fsync and close (releases lock) - fsync(lok_i); + c3_sync(lok_i); close(lok_i); return nex_h; @@ -328,7 +343,7 @@ u3_blob_save(const c3_c* pax_c, rem_d -= wrt_i; } - fsync(fid_i); + c3_sync(fid_i); close(fid_i); *seq_h = nex_h; @@ -581,7 +596,7 @@ u3_blob_move_stg(const c3_c* pax_c, return c3n; } } - fsync(dst_i); + c3_sync(dst_i); close(src_i); close(dst_i); c3_unlink(stg_c); diff --git a/pkg/vere/io/unix.c b/pkg/vere/io/unix.c index bdb2320c12..30ca3daa9c 100644 --- a/pkg/vere/io/unix.c +++ b/pkg/vere/io/unix.c @@ -1101,7 +1101,7 @@ _unix_update_file(u3_unix* unx_u, u3_ufil* fil_u) return u3_nul; } - fsync(stg_i); + c3_sync(stg_i); close(stg_i); // find the mount name for this file (walk parent dirs up to mon_u) From c3bd045ff8b42fa1c3f60d063387f01bded9a5d6 Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Fri, 1 May 2026 09:59:27 -0500 Subject: [PATCH 28/31] wip: fixes serial tests for 32-bit mode --- pkg/noun/serial_tests.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/noun/serial_tests.c b/pkg/noun/serial_tests.c index 57e2fbdd2f..14453250f9 100644 --- a/pkg/noun/serial_tests.c +++ b/pkg/noun/serial_tests.c @@ -522,7 +522,7 @@ _test_ram_bob_roundtrip(void) u3i_blob(0x12345678, 1), u3nc(c3__fast, u3i_blob(0x7a0b0000, 7)), u3i_blob(0x12345678, 2), - 0x1234567890abcdefULL); + 0x12345678); ret_i &= _test_ram_bob_spec("mixed", ref); u3z(ref); } From f425672cc6755369a1d3b0576fa61d630f6abde8 Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Wed, 6 May 2026 14:58:03 -0500 Subject: [PATCH 29/31] blob: returns `u3a_blob` metadata struct --- pkg/noun/allocate.c | 81 +++++++++++++++++++++++------ pkg/noun/allocate.h | 38 ++++++++++++-- pkg/noun/imprison.c | 35 ++++--------- pkg/noun/manage.c | 48 +++++------------ pkg/noun/vortex.c | 37 +++++++++++-- pkg/noun/vortex.h | 12 ++--- pkg/vere/disk.c | 116 +++++++++++++++++++---------------------- pkg/vere/mars.c | 124 ++++++++++++-------------------------------- 8 files changed, 247 insertions(+), 244 deletions(-) diff --git a/pkg/noun/allocate.c b/pkg/noun/allocate.c index a87033b5c2..c8060a79ec 100644 --- a/pkg/noun/allocate.c +++ b/pkg/noun/allocate.c @@ -845,36 +845,87 @@ _me_gain_south(u3_noun dog) } } +/* u3a_blob_get(): look up blb_p entry for (mug_h, seq_h). RETAINS. +*/ +u3a_blob* +u3a_blob_get(c3_h mug_h, c3_h seq_h) +{ + u3_noun bid = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_h); + u3_weak bv = u3h_get(u3H->blb_p, bid); + u3z(bid); + + if ( u3_none == bv ) return 0; + + c3_d off_d = 0; + u3r_safe_chub(bv, &off_d); + u3z(bv); + + return (u3a_blob*)u3a_into((u3_post)off_d); +} + +/* u3a_blob_new(): allocate fresh u3a_blob and install it under (mug_h, seq_h). +*/ +u3a_blob* +u3a_blob_new(c3_h mug_h, c3_h seq_h) +{ + u3a_blob* blb_u = u3a_walloc(c3_wiseof(u3a_blob)); + blb_u->log_h = 0; + blb_u->les_h = 0; + blb_u->atm_p = 0; + + u3_post off_p = u3a_outa(blb_u); + u3_noun bid = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_h); + u3h_put(u3H->blb_p, bid, u3i_chub((c3_d)off_p)); + u3z(bid); + + return blb_u; +} + +/* u3a_blob_drop(): remove blb_p entry and free underlying u3a_blob. +*/ +void +u3a_blob_drop(c3_h mug_h, c3_h seq_h) +{ + u3a_blob* blb_u = u3a_blob_get(mug_h, seq_h); + if ( !blb_u ) return; + + u3a_wfree(blb_u); + + u3_noun bid = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_h); + u3h_del(u3H->blb_p, bid); + u3z(bid); +} + /* _me_bob_dead(): handle a bob atom whose loom refcount just hit zero. ** -** Called during GC when a bob-flagged atom's use_w reaches zero. -** For tracked blobs (with a blb_p entry), this fires only after -** _mars_blob_del already removed the entry (u3h_del -> u3z(cell) -** -> u3z(bob) -> here), so there is nothing left to do. +** Clears blb_u->atm_p so the blob is no longer pinned by a live atom. +** If log_h and les_h are also zero, calls blob_del_f to delete the +** file and the HAMT entry. ** -** For untracked bob atoms on the king side (e.g., transient atoms -** from cueing that were never installed in blb_p), this notifies -** the king to release its IPC lease. +** For untracked bob atoms (no blb_p entry — e.g., king-side transient +** atoms from cueing), notifies the king to release its IPC lease. */ static void _me_bob_dead(u3a_atom* atm_u) { if ( !u3C.blob_del_f ) return; - c3_h mug_h = (c3_h)atm_u->mug_w; - c3_h seq_h = (c3_h)atm_u->buf_w[0]; - u3_noun bid = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_h); + c3_h mug_h = (c3_h)atm_u->mug_w; + c3_h seq_h = (c3_h)atm_u->buf_w[0]; - u3_weak bv = u3h_get(u3H->blb_p, bid); - u3z(bid); + u3a_blob* blb_u = u3a_blob_get(mug_h, seq_h); - if ( u3_none == bv ) { + if ( !blb_u ) { // no entry — untracked bob atom (king-side lease release) // u3C.blob_del_f(mug_h, seq_h); + return; } - else { - u3z(bv); + + blb_u->atm_p = 0; + + if ( 0 == blb_u->log_h && 0 == blb_u->les_h ) { + u3C.blob_del_f(mug_h, seq_h); } } diff --git a/pkg/noun/allocate.h b/pkg/noun/allocate.h index 7ef09221e6..5828492133 100644 --- a/pkg/noun/allocate.h +++ b/pkg/noun/allocate.h @@ -155,18 +155,27 @@ u3_noun tel; } u3a_cell; - /* blb_p value layout: noun cell [log les bob] + /* u3a_blob: loom-resident metadata for a blob file. ** ** Stored in u3H->blb_p keyed by bid = (mug_h << 32) | seq_h. ** bid is always a direct atom on VERE64 (63 bits max). ** ** Three independent ref-sources protect the backing file: - ** log — direct atom: event-log refcount (inc on commit, dec on chop) - ** les — direct atom: lease refcount (inc on king acquire, dec on release/expiry) - ** bob — 0 or the interned bob atom noun (GC-tracked) + ** log_h — event-log refcount (inc on commit, rebuilt on chop) + ** les_h — lease refcount (inc on king acquire, dec on release/expiry) + ** atm_p — loom offset of the interned bob atom (0 = none) ** - ** The blob file is deleted when the value is [0 0 0]. + ** The blob file is deleted when log_h == 0 && les_h == 0 && atm_p == 0. + ** + ** Refcounts are fixed-width (c3_h) so the struct layout is + ** identical on VERE32 and VERE64; atm_p is platform-sized + ** (u3_post) since loom offsets widen on VERE64. */ + typedef struct __attribute__((aligned(4))) { + c3_h log_h; // event-log refcount + c3_h les_h; // lease refcount (transient, zeroed on restart) + u3p(u3a_atom) atm_p; // loom offset of interned bob atom (0 = none) + } u3a_blob; STATIC_ASSERT( (((c3_w)1) << u3a_min_log) == u3a_minimum, "log2 minimum allocation" ); @@ -875,6 +884,25 @@ u3a_post_info(u3_post); c3_w u3a_idle(u3a_road* rod_u); + /* u3a_blob_get(): look up blb_p entry by (mug_h, seq_h). + ** Returns NULL if no entry exists. RETAINS. + */ + u3a_blob* + u3a_blob_get(c3_h mug_h, c3_h seq_h); + + /* u3a_blob_new(): allocate a fresh u3a_blob and install it in + ** blb_p under (mug_h, seq_h). Caller is responsible for + ** ensuring no entry exists; struct is zero-initialized. + */ + u3a_blob* + u3a_blob_new(c3_h mug_h, c3_h seq_h); + + /* u3a_blob_drop(): remove blb_p entry for (mug_h, seq_h) and + ** free the underlying u3a_blob. No-op if no entry. + */ + void + u3a_blob_drop(c3_h mug_h, c3_h seq_h); + /* u3a_ream(): ream free-lists. */ void diff --git a/pkg/noun/imprison.c b/pkg/noun/imprison.c index fd2d4b1aea..344b9501eb 100644 --- a/pkg/noun/imprison.c +++ b/pkg/noun/imprison.c @@ -836,27 +836,20 @@ u3i_vmolt(u3_noun som, u3i_molt_pair pairs[], c3_z len_z) /* u3i_blob(): construct or intern a bob atom (blob reference). ** -** If a blb_p entry exists with a live bob atom (bob > 0), -** returns the existing atom. Otherwise, it allocates a fresh bob -** atom and stores it in the blb_p entry (if one exists). +** If a u3a_blob entry exists in blb_p with a live interned atom +** (atm_p != 0), returns the existing atom. Otherwise allocates a +** fresh bob atom and stores its offset in blb_u->atm_p (if a +** blb_p entry exists). */ u3_atom u3i_blob(c3_h mug_h, c3_h seq_h) { u3_assert( &(u3H->rod_u) == u3R ); - u3_noun bid = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_h); - u3_weak bv = u3h_get(u3H->blb_p, bid); + u3a_blob* blb_u = u3a_blob_get(mug_h, seq_h); - if ( u3_none != bv ) { - u3_noun bob = u3t(u3t(bv)); // third element of [log les bob] - - if ( bob != 0 ) { - u3_atom ret = u3k(bob); - u3z(bv); - u3z(bid); - return ret; - } + if ( blb_u && blb_u->atm_p ) { + return u3k(u3a_to_pug(blb_u->atm_p)); } // allocate fresh bob atom @@ -869,17 +862,11 @@ u3i_blob(c3_h mug_h, c3_h seq_h) vat_u->len_w = 1 | u3a_blob_flag; vat_u->buf_w[0] = seq_h; - u3_atom atm = u3a_to_pug(u3a_outa(nov_w)); + u3_post atm_p = u3a_outa(nov_w); - // update blb_p entry if one exists - // - if ( u3_none != bv ) { - u3_noun log = u3k(u3h(bv)); - u3_noun les = u3k(u3h(u3t(bv))); - u3h_put(u3H->blb_p, bid, u3nt(log, les, u3k(atm))); - u3z(bv); + if ( blb_u ) { + blb_u->atm_p = atm_p; } - u3z(bid); - return atm; + return u3a_to_pug(atm_p); } diff --git a/pkg/noun/manage.c b/pkg/noun/manage.c index 4121cacfad..2f764830f6 100644 --- a/pkg/noun/manage.c +++ b/pkg/noun/manage.c @@ -575,22 +575,22 @@ STATIC_ASSERT( ((c3_wiseof(u3v_home) * sizeof(c3_w)) == sizeof(u3v_home)), STATIC_ASSERT( U3N_VERLAT < (1U << 5), "5-bit bytecode version" ); -/* _find_home_collect_bid(): u3h_walk_with callback — collect blb_p bids. +/* _find_home_zero_les_cb(): u3h_walk_with callback — zero les_h on each +** u3a_blob in place. Leases are transient IPC state backed by a +** C-heap PQ that is not persisted; after restart the PQ is empty, +** so any les_h count from the previous boot is stale. */ static void -_find_home_collect_bid(u3_noun kev, void* ptr_v) +_find_home_zero_les_cb(u3_noun kev, void* ptr_v) { - struct { c3_d* bid_d; c3_z len_z; c3_z cap_z; } *acc = ptr_v; - u3_noun key = u3h(kev); + (void)ptr_v; + u3_noun val = u3t(kev); - c3_d bid_d = 0; - u3r_safe_chub(key, &bid_d); + c3_d off_d = 0; + u3r_safe_chub(val, &off_d); - if ( acc->len_z == acc->cap_z ) { - acc->cap_z = acc->cap_z ? acc->cap_z * 2 : 8; - acc->bid_d = c3_realloc(acc->bid_d, acc->cap_z * sizeof(c3_d)); - } - acc->bid_d[acc->len_z++] = bid_d; + u3a_blob* blb_u = (u3a_blob*)u3a_into((u3_post)off_d); + blb_u->les_h = 0; } /* _find_home(): in restored image, point to home road. @@ -695,31 +695,11 @@ _find_home(void) } - // reset all les to 0: leases are transient IPC state backed by a + // reset all les_h to 0: leases are transient IPC state backed by a // C-heap PQ that is not persisted. after restart the PQ is empty, - // so the entries that would decrement les are gone. - // - // can't modify HAMT entries during walk, so collect bids first. + // so any les_h count from the previous boot is stale. // - { - struct { c3_d* bid_d; c3_z len_z; c3_z cap_z; } acc = {0, 0, 0}; - u3h_walk_with(u3H->blb_p, _find_home_collect_bid, &acc); - - for ( c3_z i_z = 0; i_z < acc.len_z; i_z++ ) { - u3_noun bid = u3i_chub(acc.bid_d[i_z]); - u3_weak bv = u3h_get(u3H->blb_p, bid); - - if ( u3_none != bv ) { - u3_noun log = u3k(u3h(bv)); - u3_noun bob = u3k(u3t(u3t(bv))); - u3h_put(u3H->blb_p, bid, u3nt(log, 0, bob)); - u3z(bv); - } - - u3z(bid); - } - c3_free(acc.bid_d); - } + u3h_walk_with(u3H->blb_p, _find_home_zero_les_cb, 0); if ( !u3R->lop_p ) u3R->lop_p = u3h_new(); if ( !u3R->cax.for_p ) u3R->cax.for_p = u3h_new_cache(u3C.per_w); } diff --git a/pkg/noun/vortex.c b/pkg/noun/vortex.c index af2ca82896..b8924a8612 100644 --- a/pkg/noun/vortex.c +++ b/pkg/noun/vortex.c @@ -375,8 +375,9 @@ u3v_mark() qua_u[1]->nam_c = strdup("wish cache"); qua_u[1]->siz_w = u3a_mark_noun(arv_u->yot) * sizeof(c3_w); - // mark blob bank HAMT. values are noun cells [log les bob], - // so u3h_mark handles everything (nodes, keys, and value cells). + // mark blob bank HAMT. values are atoms encoding loom offsets of + // u3a_blob structs; the structs themselves are walloc'd blocks not + // subject to noun mark/sweep. u3h_mark covers nodes, keys, values. // if ( u3H->blb_p ) { u3h_mark(u3H->blb_p); @@ -407,6 +408,31 @@ u3v_reclaim(void) } } +/* _v_rewrite_blb_cb(): u3h_walk_with callback for compaction. +** +** Each blb_p value atom encodes the loom offset of a u3a_blob struct. +** pack_seek determines new offsets but the integer values stored +** inside the value atoms are not noun pointers, so u3h_relocate +** doesn't update them. We mutate cell->tel in place to the new +** offset. Must run BEFORE u3h_relocate(&blb_p) — at that point +** slot pointers and cells are still at their pre-pack addresses, +** so u3a_to_ptr(kev) resolves correctly. +*/ +static void +_v_rewrite_blb_cb(u3_noun kev, void* ptr_v) +{ + (void)ptr_v; + u3a_cell* cel_u = (u3a_cell*)u3a_to_ptr(kev); + + // tel is a direct atom; its value is the loom offset of the + // u3a_blob. loom offsets always fit in cat range on both VERE32 + // (≤30 bits) and VERE64 (≤34 bits). + // + u3_post off_p = (u3_post)cel_u->tel; + u3a_relocate_post(&off_p); + cel_u->tel = (u3_noun)off_p; +} + /* u3v_rewrite_compact(): rewrite arvo kernel for compaction. */ void @@ -417,10 +443,13 @@ u3v_rewrite_compact(void) u3a_relocate_noun(&(u3A->roc)); u3a_relocate_noun(&(u3A->yot)); - // relocate blob bank HAMT root for compaction. - // values are noun cells [log les bob], so u3h_relocate handles them. + // relocate blob bank HAMT. The values are atoms encoding loom + // offsets of u3a_blob structs; rewrite those offsets BEFORE the + // HAMT structure is relocated so we can still read kev cells at + // their pre-pack addresses. // if ( u3H->blb_p ) { + u3h_walk_with(u3H->blb_p, _v_rewrite_blb_cb, 0); u3h_relocate(&(u3H->blb_p)); } } diff --git a/pkg/noun/vortex.h b/pkg/noun/vortex.h index 942f30de2a..68b892d350 100644 --- a/pkg/noun/vortex.h +++ b/pkg/noun/vortex.h @@ -21,18 +21,16 @@ /* u3v_home: all internal (within image) state. ** NB: version must be first for ease of migration. ** - ** blb_p is the blob bank HAMT (bid -> [log les bob]), checkpointed - ** in image.bin. Each value is a cell [log les bob] where: - ** log = event-log refcount - ** les = lease refcount (transient, zeroed on restart) - ** bob = 0 or the interned bob atom - ** A blob file is deleted when log == 0, les == 0, and bob is dead. + ** blb_p is the blob bank HAMT (bid -> u3a_blob*), checkpointed in + ** image.bin. Each value is an atom carrying the loom offset of a + ** u3a_blob struct (see allocate.h). A blob file is deleted when + ** log_h == 0 && les_h == 0 && atm_p == 0. */ typedef struct _u3v_home { u3v_version ver_d; // version number c3_d pam_d; // parameters u3v_arvo arv_u; // arvo state - u3p(u3h_root) blb_p; // blob bank: bid -> [log les bob] + u3p(u3h_root) blb_p; // blob bank: bid -> u3a_blob* u3a_road rod_u; // storage state } u3v_home; diff --git a/pkg/vere/disk.c b/pkg/vere/disk.c index f54b731aaa..3ec06a0d9e 100644 --- a/pkg/vere/disk.c +++ b/pkg/vere/disk.c @@ -1507,7 +1507,7 @@ _disk_vere_diff(u3_disk* log_u) return c3n; } -/* _disk_chop_collect_bid: accumulator for collecting blb_p keys. +/* _disk_chop_collect: accumulator for collecting blb_p keys to delete. */ typedef struct { const c3_c* pax_c; @@ -1516,28 +1516,27 @@ typedef struct { c3_z cap_z; } _disk_chop_collect; -/* _disk_chop_collect_cb(): u3h_walk_with callback — collect all bids. +/* _disk_chop_zero_cb(): u3h_walk_with callback — zero log_h and les_h +** in place on each u3a_blob. */ static void -_disk_chop_collect_cb(u3_noun kev, void* ptr_v) +_disk_chop_zero_cb(u3_noun kev, void* ptr_v) { - _disk_chop_collect* col_u = ptr_v; - u3_noun key = u3h(kev); + (void)ptr_v; + u3_noun val = u3t(kev); - c3_d bid_d = 0; - u3r_safe_chub(key, &bid_d); + c3_d off_d = 0; + u3r_safe_chub(val, &off_d); - if ( col_u->len_z == col_u->cap_z ) { - col_u->cap_z = col_u->cap_z ? col_u->cap_z * 2 : 8; - col_u->bid_d = c3_realloc(col_u->bid_d, col_u->cap_z * sizeof(c3_d)); - } - col_u->bid_d[col_u->len_z++] = bid_d; + u3a_blob* blb_u = (u3a_blob*)u3a_into((u3_post)off_d); + blb_u->log_h = 0; + blb_u->les_h = 0; } /* _disk_chop_delete_cb(): u3h_walk_with callback — collect dead blobs. ** -** Checks if blb_p value is [0 0 0]; if so, wipes the blob file -** and collects the bid for post-walk HAMT deletion. +** Checks if u3a_blob is fully unreferenced; if so, wipes the blob +** file and collects the bid for post-walk HAMT/struct cleanup. */ static void _disk_chop_delete_cb(u3_noun kev, void* ptr_v) @@ -1546,20 +1545,27 @@ _disk_chop_delete_cb(u3_noun kev, void* ptr_v) u3_noun key = u3h(kev); u3_noun val = u3t(kev); - u3_noun log = u3h(val); - u3_noun les = u3h(u3t(val)); - u3_noun bob = u3t(u3t(val)); - c3_d bid_d = 0; u3r_safe_chub(key, &bid_d); c3_h mug_h = (c3_h)(bid_d >> 32); c3_h seq_h = (c3_h)(bid_d & 0xFFFFFFFF); - // bob use_w == 1 means only the blb_p cell holds it (arvo lost its refs) - // - c3_o ded_o = ( 0 == bob ) ? c3y : __( 1 == u3a_use(bob) ); + c3_d off_d = 0; + u3r_safe_chub(val, &off_d); + u3a_blob* blb_u = (u3a_blob*)u3a_into((u3_post)off_d); + + c3_o ded_o = ( 0 == blb_u->log_h + && 0 == blb_u->les_h + && 0 == blb_u->atm_p ) ? c3y : c3n; - if ( 0 == log && 0 == les && c3y == ded_o ) { + fprintf(stderr, + "chop: %010" PRIc3_h "/%010" PRIc3_h + " log=%" PRIc3_h " les=%" PRIc3_h " atm=0x%" PRIxPTR "%s\r\n", + mug_h, seq_h, + blb_u->log_h, blb_u->les_h, (uintptr_t)blb_u->atm_p, + (c3y == ded_o) ? " [DELETE]" : ""); + + if ( c3y == ded_o ) { u3_blob_wipe(del_u->pax_c, mug_h, seq_h); // collect bid for post-walk blb_p cleanup @@ -1572,8 +1578,8 @@ _disk_chop_delete_cb(u3_noun kev, void* ptr_v) } } -/* _disk_chop_blobs_cb(): u3_lmdb_walk_blobs callback — increment log for -** each blob ID referenced by an event. +/* _disk_chop_blobs_cb(): u3_lmdb_walk_blobs callback — increment log_h +** in place for each blob ID referenced by an event. */ static void _disk_chop_blobs_cb(void* ptr_v, c3_d eve_d, c3_d* ids_d, c3_z len_z) @@ -1582,52 +1588,28 @@ _disk_chop_blobs_cb(void* ptr_v, c3_d eve_d, c3_d* ids_d, c3_z len_z) (void)eve_d; for ( c3_z i_z = 0; i_z < len_z; i_z++ ) { - u3_noun bid = u3i_chub(ids_d[i_z]); - u3_weak bv = u3h_get(u3H->blb_p, bid); - - if ( u3_none != bv ) { - u3_noun log = u3h(bv); - u3_noun les = u3k(u3h(u3t(bv))); - u3_noun bob = u3k(u3t(u3t(bv))); - u3h_put(u3H->blb_p, bid, u3nt(log + 1, les, bob)); - u3z(bv); - } + c3_h mug_h = (c3_h)(ids_d[i_z] >> 32); + c3_h seq_h = (c3_h)(ids_d[i_z] & 0xFFFFFFFF); - u3z(bid); + u3a_blob* blb_u = u3a_blob_get(mug_h, seq_h); + if ( blb_u ) blb_u->log_h += 1; } } -/* _disk_chop_rebuild_log_w(): rebuild blob log after epoch deletion. +/* _disk_chop_rebuild_log_h(): rebuild blob log after epoch deletion. ** -** 1. zero all log and les via collect-then-modify pattern -** 2. scan BLOBS table for remaining events, increment log for each ref +** 1. zero all log_h and les_h in place +** 2. scan BLOBS table for remaining events, increment log_h for each ref ** 3. delete blob files whose total refcount is now zero */ static void -_disk_chop_rebuild_log_w(u3_disk* log_u) +_disk_chop_rebuild_log_h(u3_disk* log_u) { - // step 1: zero all log and les (collect bids, then modify after walk) + // step 1: zero all log_h and les_h in place // - { - _disk_chop_collect col_u = {0}; - u3h_walk_with(u3H->blb_p, _disk_chop_collect_cb, &col_u); - - for ( c3_z i_z = 0; i_z < col_u.len_z; i_z++ ) { - u3_noun bid = u3i_chub(col_u.bid_d[i_z]); - u3_weak bv = u3h_get(u3H->blb_p, bid); - - if ( u3_none != bv ) { - u3_noun bob = u3k(u3t(u3t(bv))); - u3h_put(u3H->blb_p, bid, u3nt(0, 0, bob)); - u3z(bv); - } - - u3z(bid); - } - c3_free(col_u.bid_d); - } + u3h_walk_with(u3H->blb_p, _disk_chop_zero_cb, 0); - // step 2: scan BLOBS table for remaining events, rebuild log + // step 2: scan BLOBS table for remaining events, rebuild log_h // c3_d lo_d = 0, hi_d = 0; u3_lmdb_gulf(log_u->mdb_u, &lo_d, &hi_d); @@ -1640,14 +1622,22 @@ _disk_chop_rebuild_log_w(u3_disk* log_u) // step 3: delete unreferenced blobs and clean up blb_p // { + c3_w tot_w = u3h_wyt(u3H->blb_p); + fprintf(stderr, "chop: scanning %" PRIc3_w " blob(s)\r\n", tot_w); + _disk_chop_collect del_u = { .pax_c = log_u->dir_u->pax_c }; u3h_walk_with(u3H->blb_p, _disk_chop_delete_cb, &del_u); for ( c3_z i_z = 0; i_z < del_u.len_z; i_z++ ) { - u3_noun bid = u3i_chub(del_u.bid_d[i_z]); - u3h_del(u3H->blb_p, bid); - u3z(bid); + c3_h mug_h = (c3_h)(del_u.bid_d[i_z] >> 32); + c3_h seq_h = (c3_h)(del_u.bid_d[i_z] & 0xFFFFFFFF); + u3a_blob_drop(mug_h, seq_h); } + + fprintf(stderr, + "chop: deleted %" PRIc3_z " blob(s), kept %" PRIc3_w "\r\n", + del_u.len_z, tot_w - (c3_w)del_u.len_z); + c3_free(del_u.bid_d); } } @@ -1688,7 +1678,7 @@ u3_disk_chop(u3_disk* log_u, c3_d eve_d) // step 2: scan remaining LMDB events for bob atoms, rebuild log // step 3: delete blobs with all-zero refcounts // - _disk_chop_rebuild_log_w(log_u); + _disk_chop_rebuild_log_h(log_u); fprintf(stderr, "chop: event log truncation complete\r\n"); } diff --git a/pkg/vere/mars.c b/pkg/vere/mars.c index 90b379362d..a152767062 100644 --- a/pkg/vere/mars.c +++ b/pkg/vere/mars.c @@ -18,25 +18,26 @@ /* Blob storage lifecycle ** -** blb_p HAMT: bid -> [log les bob] +** blb_p HAMT: bid -> u3a_blob* ** bid = (mug_h << 32) | seq_h (direct atom on VERE64) -** log = event-log refcount (incremented on commit, rebuilt on chop) -** les = lease refcount (transient IPC state, zeroed on restart) -** bob = 0 or the interned bob atom +** log_h = event-log refcount (incremented on commit, rebuilt on chop) +** les_h = lease refcount (transient IPC state, zeroed on restart) +** atm_p = loom offset of interned bob atom (0 = no live atom) ** ** Blob files live at $pier/.urb/bob//. ** -** Deletion condition: log == 0 && les == 0 && use_w(bob) <= 1 -** (use_w == 1 means only the cell holds the atom; kernel dropped it) +** Deletion condition: log_h == 0 && les_h == 0 && atm_p == 0. +** atm_p is cleared in _me_bob_dead when the bob atom's last +** refcount is dropped. ** ** Lifecycle: ** 1. King detects large file, saves to .urb/bob/stg/, sends %blob IPC -** 2. Serf installs blob: moves staging file, creates blb_p entry [0 1 0] +** 2. Serf installs blob: moves staging file, creates u3a_blob with les_h=1 ** 3. King sends poke event with bob atom (RAM-serialized with BOB tag) -** 4. Serf commits: _mars_fact increments log, writes blob-ref to LMDB -** 5. File deletion (|rm): kernel drops blob ref, les expires/released -** 6. Epoch deletion (chop): rebuilds log from LMDB BLOBS table -** 7. When log == 0 && les == 0 && use_w(bob) <= 1: delete file + entry +** 4. Serf commits: _mars_fact increments log_h, writes blob-ref to LMDB +** 5. File deletion (|rm): kernel drops blob ref → _me_bob_dead clears atm_p +** 6. Epoch deletion (chop): rebuilds log_h from LMDB BLOBS table +** 7. When log_h == 0 && les_h == 0 && atm_p == 0: delete file + entry */ /* u3v_lease: PQ entry for lease TTL expiry. @@ -138,39 +139,21 @@ static void _mars_blob_del(c3_h mug_h, c3_h seq_h) { u3_blob_wipe(u3C.dir_c, mug_h, seq_h); - - u3_noun bid = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_h); - u3h_del(u3H->blb_p, bid); - u3z(bid); + u3a_blob_drop(mug_h, seq_h); } /* _blob_maybe_delete(): delete blob iff all refs are zero. -** -** bob is a strong ref held by the cell. if use_w == 1, the cell -** is the ONLY holder and the kernel lost its references: safe to -** delete. if use_w > 1, the arvo still references the atom. */ static void _blob_maybe_delete(c3_h mug_h, c3_h seq_h) { - u3_noun bid = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_h); - u3_weak bv = u3h_get(u3H->blb_p, bid); - u3z(bid); - - if ( u3_none == bv ) return; - - u3_noun log = u3h(bv); - u3_noun les = u3h(u3t(bv)); - u3_noun bob = u3t(u3t(bv)); - - // bob == 0: no atom. bob != 0 && use_w == 1: cell is only holder. - // - c3_o bob_o = ( 0 == bob ) ? c3y : __( 1 == u3a_use(bob) ); - - c3_o ded_o = ( 0 == log && 0 == les && c3y == bob_o ) ? c3y : c3n; - u3z(bv); + u3a_blob* blb_u = u3a_blob_get(mug_h, seq_h); + if ( !blb_u ) return; - if ( c3y == ded_o ) { + if ( 0 == blb_u->log_h + && 0 == blb_u->les_h + && 0 == blb_u->atm_p ) + { _mars_blob_del(mug_h, seq_h); } } @@ -428,7 +411,7 @@ _mars_fact(u3_mars* mar_u, u3_noun pro) { // find all bob atoms in the committed event and - // increment their event-log refcount (log in [log les bob]). + // increment their event-log refcount (log_h on the u3a_blob). // { struct { c3_d* ids; c3_z len; c3_z cap; } acc = {0, 0, 0}; @@ -438,18 +421,8 @@ _mars_fact(u3_mars* mar_u, c3_h mug_h = (c3_h)(acc.ids[i_z] >> 32); c3_h seq_h = (c3_h)(acc.ids[i_z] & 0xFFFFFFFF); - u3_noun bid = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_h); - u3_weak bv = u3h_get(u3H->blb_p, bid); - - if ( u3_none != bv ) { - u3_noun log = u3h(bv); - u3_noun les = u3k(u3h(u3t(bv))); - u3_noun bob = u3k(u3t(u3t(bv))); - u3h_put(u3H->blb_p, bid, u3nt(log + 1, les, bob)); - u3z(bv); - } - - u3z(bid); + u3a_blob* blb_u = u3a_blob_get(mug_h, seq_h); + if ( blb_u ) blb_u->log_h += 1; } // persist blob refs to LMDB for this event @@ -1029,25 +1002,13 @@ _mars_work(u3_mars* mar_u, u3_noun jar) ok_o = u3_blob_move_stg(u3C.dir_c, stg_c, &mug_h, &seq_h); if ( c3y == ok_o ) { - // create blb_p entry (if not present) and increment les. + // create blb_p entry (if not present) and increment les_h. // push PQ entry for TTL expiry. // { - u3_noun bid = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_h); - u3_weak bv = u3h_get(u3H->blb_p, bid); - - if ( u3_none == bv ) { - u3h_put(u3H->blb_p, bid, u3nt(0, 1, 0)); - } - else { - u3_noun log = u3k(u3h(bv)); - u3_noun les = u3h(u3t(bv)); - u3_noun bob = u3k(u3t(u3t(bv))); - u3h_put(u3H->blb_p, bid, u3nt(log, les + 1, bob)); - u3z(bv); - } - - u3z(bid); + u3a_blob* blb_u = u3a_blob_get(mug_h, seq_h); + if ( !blb_u ) blb_u = u3a_blob_new(mug_h, seq_h); + blb_u->les_h += 1; } { @@ -1099,15 +1060,10 @@ _mars_work(u3_mars* mar_u, u3_noun jar) u3r_safe_half(seq_n, &seq_h); { - u3_noun bid = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_h); - u3_weak bv = u3h_get(u3H->blb_p, bid); + u3a_blob* blb_u = u3a_blob_get(mug_h, seq_h); - if ( u3_none != bv ) { - u3_noun log = u3k(u3h(bv)); - u3_noun les = u3h(u3t(bv)); - u3_noun bob = u3k(u3t(u3t(bv))); - u3h_put(u3H->blb_p, bid, u3nt(log, les + 1, bob)); - u3z(bv); + if ( blb_u ) { + blb_u->les_h += 1; // push PQ entry for TTL failsafe (15 min) // @@ -1124,8 +1080,6 @@ _mars_work(u3_mars* mar_u, u3_noun jar) } _mars_pq_push(&_mars_pq, lea_u); } - - u3z(bid); } u3z(jar); @@ -1145,24 +1099,10 @@ _mars_work(u3_mars* mar_u, u3_noun jar) u3r_safe_half(seq_n, &seq_h); { - u3_noun bid = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_h); - u3_weak bv = u3h_get(u3H->blb_p, bid); - - if ( u3_none != bv ) { - u3_noun log = u3k(u3h(bv)); - u3_noun les = u3h(u3t(bv)); - u3_noun bob = u3k(u3t(u3t(bv))); - - if ( les > 0 ) { - u3h_put(u3H->blb_p, bid, u3nt(log, les - 1, bob)); - } - else { - u3h_put(u3H->blb_p, bid, u3nt(log, 0, bob)); - } - u3z(bv); + u3a_blob* blb_u = u3a_blob_get(mug_h, seq_h); + if ( blb_u && blb_u->les_h > 0 ) { + blb_u->les_h -= 1; } - - u3z(bid); } // mark the corresponding PQ entry dead (if findable) From d45c1dffa6e8a050df802159b86f30998ca800eb Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Thu, 7 May 2026 15:48:49 -0500 Subject: [PATCH 30/31] wip: catches old cell-based blob refcounting --- pkg/vere/disk.c | 6 +++--- pkg/vere/mars.c | 38 +++++++++----------------------------- 2 files changed, 12 insertions(+), 32 deletions(-) diff --git a/pkg/vere/disk.c b/pkg/vere/disk.c index 7b89d877ad..bce48f4503 100644 --- a/pkg/vere/disk.c +++ b/pkg/vere/disk.c @@ -2030,7 +2030,7 @@ typedef enum { /* NOTE: _disk_blb_rebuild_from_epochs removed. ** Blob log-refs are now tracked via LMDB blob-ref events (tag 0x02), -** not via blobs.txt files. blb_p cells [log les bob] persist in the +** not via blobs.txt files. u3a_blob structs in blb_p persist in the ** loom snapshot; on replay, blob-ref events reconstruct the counters. */ @@ -2206,8 +2206,8 @@ _disk_epoc_load(u3_disk* log_u, c3_d lat_d, u3_disk_load_e lod_e) u3m_boot(log_u->dir_u->pax_c, (size_t)1 << u3_Host.ops_u.lom_y); // XX confirm - // blob refcounts ([log les bob] in blb_p) persist in the loom snapshot. - // on replay, LMDB blob-ref events will reconstruct log/les. + // u3a_blob structs in blb_p persist in the loom snapshot. + // on replay, LMDB blob-ref events will reconstruct log_h. // if ( log_u->dun_d < u3A->eve_d ) { diff --git a/pkg/vere/mars.c b/pkg/vere/mars.c index 8ccd6c98d4..679d2260d6 100644 --- a/pkg/vere/mars.c +++ b/pkg/vere/mars.c @@ -794,29 +794,15 @@ _mars_work(u3_mars* mar_u, u3_noun jar) break; } - // expired lease — decrement les_w, check deletion condition + // expired lease — decrement les_h, check deletion condition // _mars_pq_pop(&_mars_pq); { - u3_noun bid = u3i_chub(((c3_d)top_u->mug_h << 32) | (c3_d)top_u->seq_h); - u3_weak bv = u3h_get(u3H->blb_p, bid); - - if ( u3_none != bv ) { - u3_noun log = u3k(u3h(bv)); - u3_noun les = u3h(u3t(bv)); - u3_noun bob = u3k(u3t(u3t(bv))); - - if ( les > 0 ) { - u3h_put(u3H->blb_p, bid, u3nt(log, les - 1, bob)); - } - else { - u3h_put(u3H->blb_p, bid, u3nt(log, 0, bob)); - } - u3z(bv); + u3a_blob* blb_u = u3a_blob_get(top_u->mug_h, top_u->seq_h); + if ( blb_u && blb_u->les_h > 0 ) { + blb_u->les_h -= 1; } - - u3z(bid); } _blob_maybe_delete(top_u->mug_h, top_u->seq_h); @@ -1405,18 +1391,12 @@ _mars_poke_play(u3_mars* mar_u, const u3_fact* tac_u) &len_z) ) { for ( c3_z i_z = 0; i_z < len_z; i_z++ ) { - u3_noun bid = u3i_chub(ids_d[i_z]); - u3_weak bv = u3h_get(u3H->blb_p, bid); - - if ( u3_none != bv ) { - u3_noun log = u3h(bv); - u3_noun les = u3k(u3h(u3t(bv))); - u3_noun bob = u3k(u3t(u3t(bv))); - u3h_put(u3H->blb_p, bid, u3nt(log + 1, les, bob)); - u3z(bv); - } + c3_h mug_h = (c3_h)(ids_d[i_z] >> 32); + c3_h seq_h = (c3_h)(ids_d[i_z] & 0xFFFFFFFFULL); - u3z(bid); + u3a_blob* blb_u = u3a_blob_get(mug_h, seq_h); + if ( !blb_u ) blb_u = u3a_blob_new(mug_h, seq_h); + blb_u->log_h += 1; } c3_free(ids_d); } From 3a426cd5c0a071b2b56a99bbfcb23f21d780643c Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Tue, 12 May 2026 14:09:11 -0500 Subject: [PATCH 31/31] wip: blob refcounting + `u3a_atom.len_w` stores `u3a_blob*` --- pkg/noun/allocate.c | 50 +++++++++++++++++++++++---------------------- pkg/noun/allocate.h | 46 ++++++++++++++++++++++++++--------------- pkg/noun/events.h | 11 +++------- pkg/noun/imprison.c | 30 +++++++++------------------ pkg/noun/manage.c | 11 +++++----- pkg/vere/disk.c | 26 ++++++++++++----------- pkg/vere/king.c | 5 ++++- pkg/vere/mars.c | 31 +++++++++++++++++----------- 8 files changed, 111 insertions(+), 99 deletions(-) diff --git a/pkg/noun/allocate.c b/pkg/noun/allocate.c index c8060a79ec..4489990d63 100644 --- a/pkg/noun/allocate.c +++ b/pkg/noun/allocate.c @@ -869,9 +869,11 @@ u3a_blob* u3a_blob_new(c3_h mug_h, c3_h seq_h) { u3a_blob* blb_u = u3a_walloc(c3_wiseof(u3a_blob)); - blb_u->log_h = 0; + blb_u->use_w = 0; + blb_u->eve_w = 0; blb_u->les_h = 0; - blb_u->atm_p = 0; + blb_u->mug_h = mug_h; + blb_u->seq_h = seq_h; u3_post off_p = u3a_outa(blb_u); u3_noun bid = u3i_chub(((c3_d)mug_h << 32) | (c3_d)seq_h); @@ -896,36 +898,26 @@ u3a_blob_drop(c3_h mug_h, c3_h seq_h) u3z(bid); } -/* _me_bob_dead(): handle a bob atom whose loom refcount just hit zero. +/* _me_bob_dead(): bob atom's loom refcount just hit zero. ** -** Clears blb_u->atm_p so the blob is no longer pinned by a live atom. -** If log_h and les_h are also zero, calls blob_del_f to delete the -** file and the HAMT entry. -** -** For untracked bob atoms (no blb_p entry — e.g., king-side transient -** atoms from cueing), notifies the king to release its IPC lease. +** Decrements u3a_blob.use_w (atom cardinality contribution). If +** use_w hits zero, calls blob_del_f to wipe the file and drop the +** blb_p entry. */ static void _me_bob_dead(u3a_atom* atm_u) { if ( !u3C.blob_del_f ) return; - c3_h mug_h = (c3_h)atm_u->mug_w; - c3_h seq_h = (c3_h)atm_u->buf_w[0]; - - u3a_blob* blb_u = u3a_blob_get(mug_h, seq_h); + u3a_blob* blb_u = (u3a_blob*)u3a_into((u3_post)atm_u->buf_w[0]); + if ( !blb_u ) return; - if ( !blb_u ) { - // no entry — untracked bob atom (king-side lease release) - // - u3C.blob_del_f(mug_h, seq_h); - return; + if ( blb_u->use_w > 0 ) { + blb_u->use_w -= 1; } - blb_u->atm_p = 0; - - if ( 0 == blb_u->log_h && 0 == blb_u->les_h ) { - u3C.blob_del_f(mug_h, seq_h); + if ( 0 == blb_u->use_w ) { + u3C.blob_del_f(blb_u->mug_h, blb_u->seq_h); } } @@ -1247,8 +1239,18 @@ u3a_relocate_noun(u3_noun *som) old_p = u3a_to_off(old); if ( c3n == u3a_is_cell(old) ) { - new_p = _pack_relocate(old_p); - *som = u3a_to_pug(new_p); + // indirect atom: mark-tracked relocate so bob atoms can rewrite + // their u3a_blob pointer at old_p exactly once. + // + new_p = _pack_relocate_mark(old_p, &fir_t); + *som = u3a_to_pug(new_p); + + if ( fir_t ) { + u3a_atom* atm_u = u3to(u3a_atom, old_p); + if ( atm_u->len_w & u3a_blob_flag ) { + u3a_relocate_post((u3_post*)&atm_u->buf_w[0]); + } + } return; } diff --git a/pkg/noun/allocate.h b/pkg/noun/allocate.h index 44e835cc8c..913c73638e 100644 --- a/pkg/noun/allocate.h +++ b/pkg/noun/allocate.h @@ -226,24 +226,27 @@ /* u3a_blob: loom-resident metadata for a blob file. ** - ** Stored in u3H->blb_p keyed by bid = (mug_h << 32) | seq_h. - ** bid is always a direct atom on VERE64 (63 bits max). + ** Stored in u3H->blb_p keyed by bid = (mug_h << 32) | seq_h, with one + ** entry per (mug_h, seq_h). Each bob atom's buf_w[0] is the loom + ** offset of its u3a_blob. ** - ** Three independent ref-sources protect the backing file: - ** log_h — event-log refcount (inc on commit, rebuilt on chop) - ** les_h — lease refcount (inc on king acquire, dec on release/expiry) - ** atm_p — loom offset of the interned bob atom (0 = none) + ** Single-counter design: the blob file is deleted iff use_w == 0. + ** use_w is the sum of three component sources: + ** - eve_w: event-log refcount (rebuilt on chop) + ** - les_h: active king-held lease count (transient; zeroed on boot) + ** - implicit atom cardinality: number of live bob atoms whose + ** buf_w[0] points here. Updated only on atom alloc/free; not + ** affected by normal noun-refcount transitions. ** - ** The blob file is deleted when log_h == 0 && les_h == 0 && atm_p == 0. - ** - ** Refcounts are fixed-width (c3_h) so the struct layout is - ** identical on VERE32 and VERE64; atm_p is platform-sized - ** (u3_post) since loom offsets widen on VERE64. + ** On boot we zero les_h (and subtract from use_w); eve_w and atom + ** cardinality survive the snapshot. */ typedef struct __attribute__((aligned(4))) { - c3_h log_h; // event-log refcount - c3_h les_h; // lease refcount (transient, zeroed on restart) - u3p(u3a_atom) atm_p; // loom offset of interned bob atom (0 = none) + c3_w use_w; // total refs: eve_w + les_h + atom cardinality + c3_w eve_w; // event-log refcount (rebuildable from LMDB) + c3_h les_h; // active king-held leases (transient; zeroed on boot) + c3_h mug_h; // blob mug — identifies file in .urb/bob + c3_h seq_h; // blob seq — identifies file in .urb/bob } u3a_blob; STATIC_ASSERT( (((c3_w)1) << u3a_min_log) == u3a_minimum, @@ -822,7 +825,16 @@ typedef struct { return (atm_u->len_w & u3a_blob_flag) ? c3y : c3n; } + /* u3a_bob_blob(): u3a_blob* referenced by a bob atom (via buf_w[0]). + */ + static inline u3a_blob* + u3a_bob_blob(u3_atom som) { + u3a_atom* atm_u = u3a_to_ptr(som); + return (u3a_blob*)u3a_into((u3_post)atm_u->buf_w[0]); + } + /* u3a_bob_mug(): content mug of a bob atom (= blob directory name). + ** Stored redundantly on the atom (mug_w) for fast u3r_mug. */ static inline c3_h u3a_bob_mug(u3_atom som) { @@ -833,7 +845,7 @@ typedef struct { */ static inline c3_h u3a_bob_seq(u3_atom som) { - return (c3_h)((u3a_atom*)u3a_to_ptr(som))->buf_w[0]; + return u3a_bob_blob(som)->seq_h; } /* u3a_bob_bid(): blob ID = (mug << 32) | seq. @@ -842,8 +854,8 @@ typedef struct { */ static inline c3_d u3a_bob_bid(u3_atom som) { - u3a_atom* atm_u = u3a_to_ptr(som); - return ((c3_d)(c3_h)atm_u->mug_w << 32) | (c3_d)(c3_h)atm_u->buf_w[0]; + u3a_blob* blb_u = u3a_bob_blob(som); + return ((c3_d)blb_u->mug_h << 32) | (c3_d)blb_u->seq_h; } /** Functions. diff --git a/pkg/noun/events.h b/pkg/noun/events.h index 02f13f0603..e53b0d97be 100644 --- a/pkg/noun/events.h +++ b/pkg/noun/events.h @@ -128,8 +128,7 @@ c3_i u3e_image_open_any(c3_c* nam_c, c3_c* dir_c, c3_z* len_z, c3_i mod_i); - /* u3_{32,64}_load(): locate u3v_{32,64}_home in the mapped 32-bit / - ** 64-bit image. + /* u3_{32,64}_load(): locate u3v_{32,64}_home in the mapped image. */ void u3_32_load(c3_z wor_i); @@ -137,16 +136,12 @@ void u3_64_load(c3_z wor_i); - /* u3_migrate_32(): migrate the loaded 64-bit snapshot into the native - ** 32-bit loom. Called from disk.c on a 32-bit vere reading a 64-bit - ** snapshot. + /* u3_migrate_32(): migrate 64 -> 32. */ void u3_migrate_32(c3_d eve_d); - /* u3_migrate_64(): migrate the loaded 32-bit snapshot into the native - ** 64-bit loom. Called from disk.c on a 64-bit vere reading a 32-bit - ** snapshot. + /* u3_migrate_64(): migrate 32 -> 64. */ void u3_migrate_64(c3_d eve_d); diff --git a/pkg/noun/imprison.c b/pkg/noun/imprison.c index 344b9501eb..d3a2e48c23 100644 --- a/pkg/noun/imprison.c +++ b/pkg/noun/imprison.c @@ -834,12 +834,12 @@ u3i_vmolt(u3_noun som, u3i_molt_pair pairs[], c3_z len_z) return pro; } -/* u3i_blob(): construct or intern a bob atom (blob reference). +/* u3i_blob(): construct a bob atom (blob reference). ** -** If a u3a_blob entry exists in blb_p with a live interned atom -** (atm_p != 0), returns the existing atom. Otherwise allocates a -** fresh bob atom and stores its offset in blb_u->atm_p (if a -** blb_p entry exists). +** Allocates a fresh u3a_atom whose buf_w[0] points at the u3a_blob +** for (mug_h, seq_h). Looks up or creates the u3a_blob and bumps +** its use_w (atom cardinality). No interning: each call yields a +** new atom. */ u3_atom u3i_blob(c3_h mug_h, c3_h seq_h) @@ -847,26 +847,16 @@ u3i_blob(c3_h mug_h, c3_h seq_h) u3_assert( &(u3H->rod_u) == u3R ); u3a_blob* blb_u = u3a_blob_get(mug_h, seq_h); + if ( !blb_u ) blb_u = u3a_blob_new(mug_h, seq_h); + blb_u->use_w += 1; - if ( blb_u && blb_u->atm_p ) { - return u3k(u3a_to_pug(blb_u->atm_p)); - } - - // allocate fresh bob atom - // c3_w* nov_w = u3a_walloc(1 + c3_wiseof(u3a_atom)); u3a_atom* vat_u = (void *)nov_w; vat_u->use_w = 1; vat_u->mug_w = mug_h; - vat_u->len_w = 1 | u3a_blob_flag; - vat_u->buf_w[0] = seq_h; - - u3_post atm_p = u3a_outa(nov_w); + vat_u->len_w = 0 | u3a_blob_flag; + vat_u->buf_w[0] = (c3_w)u3a_outa(blb_u); - if ( blb_u ) { - blb_u->atm_p = atm_p; - } - - return u3a_to_pug(atm_p); + return u3a_to_pug(u3a_outa(nov_w)); } diff --git a/pkg/noun/manage.c b/pkg/noun/manage.c index 3288cabfe7..5f19306915 100644 --- a/pkg/noun/manage.c +++ b/pkg/noun/manage.c @@ -575,10 +575,10 @@ STATIC_ASSERT( ((c3_wiseof(u3v_home) * sizeof(c3_w)) == sizeof(u3v_home)), STATIC_ASSERT( U3N_VERLAT < (1U << 5), "5-bit bytecode version" ); -/* _find_home_zero_les_cb(): u3h_walk_with callback — zero les_h on each -** u3a_blob in place. Leases are transient IPC state backed by a -** C-heap PQ that is not persisted; after restart the PQ is empty, -** so any les_h count from the previous boot is stale. +/* _find_home_zero_les_cb(): u3h_walk_with callback — drop les_h from +** use_w and zero les_h on each u3a_blob. Leases are transient IPC +** state backed by a C-heap PQ that is not persisted; after restart +** the PQ is empty, so any les_h count from the previous boot is stale. */ static void _find_home_zero_les_cb(u3_noun kev, void* ptr_v) @@ -590,7 +590,8 @@ _find_home_zero_les_cb(u3_noun kev, void* ptr_v) u3r_safe_chub(val, &off_d); u3a_blob* blb_u = (u3a_blob*)u3a_into((u3_post)off_d); - blb_u->les_h = 0; + blb_u->use_w -= blb_u->les_h; + blb_u->les_h = 0; } /* _find_home(): in restored image, point to home road. diff --git a/pkg/vere/disk.c b/pkg/vere/disk.c index bce48f4503..43c95b1bdc 100644 --- a/pkg/vere/disk.c +++ b/pkg/vere/disk.c @@ -1513,8 +1513,9 @@ typedef struct { c3_z cap_z; } _disk_chop_collect; -/* _disk_chop_zero_cb(): u3h_walk_with callback — zero log_h and les_h -** in place on each u3a_blob. +/* _disk_chop_zero_cb(): u3h_walk_with callback — subtract eve_w from +** use_w and zero eve_w. Preserves atom cardinality + lease counts. +** The walk over LMDB blob refs (step 2) re-increments both. */ static void _disk_chop_zero_cb(u3_noun kev, void* ptr_v) @@ -1526,8 +1527,8 @@ _disk_chop_zero_cb(u3_noun kev, void* ptr_v) u3r_safe_chub(val, &off_d); u3a_blob* blb_u = (u3a_blob*)u3a_into((u3_post)off_d); - blb_u->log_h = 0; - blb_u->les_h = 0; + blb_u->use_w -= blb_u->eve_w; + blb_u->eve_w = 0; } /* _disk_chop_delete_cb(): u3h_walk_with callback — collect dead blobs. @@ -1551,15 +1552,13 @@ _disk_chop_delete_cb(u3_noun kev, void* ptr_v) u3r_safe_chub(val, &off_d); u3a_blob* blb_u = (u3a_blob*)u3a_into((u3_post)off_d); - c3_o ded_o = ( 0 == blb_u->log_h - && 0 == blb_u->les_h - && 0 == blb_u->atm_p ) ? c3y : c3n; + c3_o ded_o = ( 0 == blb_u->use_w ) ? c3y : c3n; fprintf(stderr, "chop: %010" PRIc3_h "/%010" PRIc3_h - " log=%" PRIc3_h " les=%" PRIc3_h " atm=0x%" PRIxPTR "%s\r\n", + " use=%" PRIc3_w " eve=%" PRIc3_w " les=%" PRIc3_h "%s\r\n", mug_h, seq_h, - blb_u->log_h, blb_u->les_h, (uintptr_t)blb_u->atm_p, + blb_u->use_w, blb_u->eve_w, blb_u->les_h, (c3y == ded_o) ? " [DELETE]" : ""); if ( c3y == ded_o ) { @@ -1575,8 +1574,8 @@ _disk_chop_delete_cb(u3_noun kev, void* ptr_v) } } -/* _disk_chop_blobs_cb(): u3_lmdb_walk_blobs callback — increment log_h -** in place for each blob ID referenced by an event. +/* _disk_chop_blobs_cb(): u3_lmdb_walk_blobs callback — increment eve_w +** and use_w in place for each blob ID referenced by an event. */ static void _disk_chop_blobs_cb(void* ptr_v, c3_d eve_d, c3_d* ids_d, c3_z len_z) @@ -1589,7 +1588,10 @@ _disk_chop_blobs_cb(void* ptr_v, c3_d eve_d, c3_d* ids_d, c3_z len_z) c3_h seq_h = (c3_h)(ids_d[i_z] & 0xFFFFFFFF); u3a_blob* blb_u = u3a_blob_get(mug_h, seq_h); - if ( blb_u ) blb_u->log_h += 1; + if ( blb_u ) { + blb_u->eve_w += 1; + blb_u->use_w += 1; + } } } diff --git a/pkg/vere/king.c b/pkg/vere/king.c index f4eb1d7957..17165d4635 100644 --- a/pkg/vere/king.c +++ b/pkg/vere/king.c @@ -19,7 +19,9 @@ u3_king u3_King; static const c3_c* ver_hos_c = "https://bootstrap.urbit.org/vere"; -/* _king_blob_del(): king-side del_f — release a blob lease via IPC. +/* _king_blob_del(): king-side del_f — release a blob lease via IPC and +** drop the king's transient u3a_blob registry entry. King never wipes +** files; only serf owns filesystem-side blob persistence. */ static void _king_blob_del(c3_h mug_h, c3_h seq_h) @@ -27,6 +29,7 @@ _king_blob_del(c3_h mug_h, c3_h seq_h) if ( u3K.pir_u && u3K.pir_u->god_u ) { u3_lord_blob_release(u3K.pir_u->god_u, mug_h, seq_h); } + u3a_blob_drop(mug_h, seq_h); } // stash config flags for worker diff --git a/pkg/vere/mars.c b/pkg/vere/mars.c index 679d2260d6..b12367903d 100644 --- a/pkg/vere/mars.c +++ b/pkg/vere/mars.c @@ -142,7 +142,7 @@ _mars_blob_del(c3_h mug_h, c3_h seq_h) u3a_blob_drop(mug_h, seq_h); } -/* _blob_maybe_delete(): delete blob iff all refs are zero. +/* _blob_maybe_delete(): delete blob iff use_w == 0. */ static void _blob_maybe_delete(c3_h mug_h, c3_h seq_h) @@ -150,10 +150,7 @@ _blob_maybe_delete(c3_h mug_h, c3_h seq_h) u3a_blob* blb_u = u3a_blob_get(mug_h, seq_h); if ( !blb_u ) return; - if ( 0 == blb_u->log_h - && 0 == blb_u->les_h - && 0 == blb_u->atm_p ) - { + if ( 0 == blb_u->use_w ) { _mars_blob_del(mug_h, seq_h); } } @@ -410,8 +407,9 @@ _mars_fact(u3_mars* mar_u, u3_noun job, u3_noun pro) { - // find all bob atoms in the committed event and - // increment their event-log refcount (log_h on the u3a_blob). + // find all bob atoms in the committed event and increment their + // event-log refcount (eve_w on the u3a_blob). Also bumps use_w + // so the blob survives the lifetime of the log entry. // { struct { c3_d* ids; c3_z len; c3_z cap; } acc = {0, 0, 0}; @@ -422,7 +420,10 @@ _mars_fact(u3_mars* mar_u, c3_h seq_h = (c3_h)(acc.ids[i_z] & 0xFFFFFFFF); u3a_blob* blb_u = u3a_blob_get(mug_h, seq_h); - if ( blb_u ) blb_u->log_h += 1; + if ( blb_u ) { + blb_u->eve_w += 1; + blb_u->use_w += 1; + } } // persist blob refs to LMDB for this event @@ -794,7 +795,7 @@ _mars_work(u3_mars* mar_u, u3_noun jar) break; } - // expired lease — decrement les_h, check deletion condition + // expired lease — decrement les_h and use_w, check deletion // _mars_pq_pop(&_mars_pq); @@ -802,6 +803,7 @@ _mars_work(u3_mars* mar_u, u3_noun jar) u3a_blob* blb_u = u3a_blob_get(top_u->mug_h, top_u->seq_h); if ( blb_u && blb_u->les_h > 0 ) { blb_u->les_h -= 1; + blb_u->use_w -= 1; } } @@ -996,13 +998,14 @@ _mars_work(u3_mars* mar_u, u3_noun jar) ok_o = u3_blob_move_stg(u3C.dir_c, stg_c, &mug_h, &seq_h); if ( c3y == ok_o ) { - // create blb_p entry (if not present) and increment les_h. + // create blb_p entry (if not present) and bump les_h + use_w. // push PQ entry for TTL expiry. // { u3a_blob* blb_u = u3a_blob_get(mug_h, seq_h); if ( !blb_u ) blb_u = u3a_blob_new(mug_h, seq_h); blb_u->les_h += 1; + blb_u->use_w += 1; } { @@ -1055,9 +1058,11 @@ _mars_work(u3_mars* mar_u, u3_noun jar) { u3a_blob* blb_u = u3a_blob_get(mug_h, seq_h); + if ( !blb_u ) blb_u = u3a_blob_new(mug_h, seq_h); - if ( blb_u ) { + { blb_u->les_h += 1; + blb_u->use_w += 1; // push PQ entry for TTL failsafe (15 min) // @@ -1096,6 +1101,7 @@ _mars_work(u3_mars* mar_u, u3_noun jar) u3a_blob* blb_u = u3a_blob_get(mug_h, seq_h); if ( blb_u && blb_u->les_h > 0 ) { blb_u->les_h -= 1; + blb_u->use_w -= 1; } } @@ -1396,7 +1402,8 @@ _mars_poke_play(u3_mars* mar_u, const u3_fact* tac_u) u3a_blob* blb_u = u3a_blob_get(mug_h, seq_h); if ( !blb_u ) blb_u = u3a_blob_new(mug_h, seq_h); - blb_u->log_h += 1; + blb_u->eve_w += 1; + blb_u->use_w += 1; } c3_free(ids_d); }