From d0c379ee3f325f4a80dada6c3129c23496b1c83e Mon Sep 17 00:00:00 2001 From: Kevin Bowling Date: Wed, 6 May 2026 22:06:14 -0700 Subject: [PATCH] expireover: Add bloom filter for fast history existence checks expireover checks every article in the overview database against the history file to detect orphaned entries. This requires a per-article HISlookup, which does random pread() calls into the DBZ index and history text file. On large spools (1B+ articles), this takes months. Add a bloom filter that is built from a single sequential HISwalk of the history file at startup. The bloom filter acts as a positive-only cache in OVhisthasmsgid: bloom hits skip the slow HISlookup, bloom misses fall through to HISlookup for correctness. False positives are benign (an orphaned overview entry survives one extra cycle). The bloom filter is controlled by the new inn.conf parameter expirebloomfp, which specifies the false positive rate as a reciprocal (default 10000 = 0.01%). Setting it to 0 disables the bloom filter. Memory usage is approximately 20 bits per article (48 MB for 20M articles, 2.4 GB for 1B articles). Changes: - Add lib/bloom.c and include/inn/bloom.h (bloom filter implementation using enhanced double hashing, Kirsch & Mitzenmacher 2006) - Extend HISwalk callback signature to include the message-ID HASH (HISwalk has had zero callers since it was added in 2001) - Set hisv6_walk ignore=true so corrupt lines don't abort the walk - Add OVTOKENCACHE to OVctl for passing the bloom filter to OVhisthasmsgid - Add expirebloomfp to innconf - Add unit tests (lib/bloom-t.c) and integration tests (lib/bloom-hiswalk-t.c) --- .gitignore | 2 + MANIFEST | 4 + doc/pod/expireover.pod | 16 +++ doc/pod/inn.conf.pod | 20 ++++ doc/pod/libinnhist.pod | 15 ++- expire/expireover.c | 61 +++++++++- history/his.c | 3 +- history/hisinterface.h | 3 +- history/hisv6/hisv6-private.h | 3 +- history/hisv6/hisv6.c | 15 ++- history/hisv6/hisv6.h | 4 +- include/inn/bloom.h | 69 +++++++++++ include/inn/history.h | 4 +- include/inn/innconf.h | 1 + include/inn/ov.h | 3 +- lib/Makefile | 4 +- lib/bloom.c | 213 ++++++++++++++++++++++++++++++++++ lib/innconf.c | 1 + samples/inn.conf.in | 1 + storage/expire.c | 18 +++ storage/ov.c | 3 + storage/ovinterface.h | 2 + support/mkmanifest | 2 + tests/Makefile | 9 +- tests/TESTS | 2 + tests/lib/bloom-hiswalk-t.c | 169 +++++++++++++++++++++++++++ tests/lib/bloom-t.c | 128 ++++++++++++++++++++ 27 files changed, 753 insertions(+), 22 deletions(-) create mode 100644 include/inn/bloom.h create mode 100644 lib/bloom.c create mode 100644 tests/lib/bloom-hiswalk-t.c create mode 100644 tests/lib/bloom-t.c diff --git a/.gitignore b/.gitignore index 4fc5e98b0..f933b3c2d 100644 --- a/.gitignore +++ b/.gitignore @@ -195,6 +195,8 @@ /tests/innd/chan.t /tests/lib/artnumber.t /tests/lib/asprintf.t +/tests/lib/bloom.t +/tests/lib/bloom-hiswalk.t /tests/lib/buffer.t /tests/lib/canlock.t /tests/lib/concat.t diff --git a/MANIFEST b/MANIFEST index 094b0a020..70bef9d76 100644 --- a/MANIFEST +++ b/MANIFEST @@ -412,6 +412,7 @@ include/Makefile Makefile for header files include/conffile.h Header file for reading *.conf files include/config.h.in Template configuration data include/inn Installed header files (Directory) +include/inn/bloom.h Header file for bloom filter include/inn/buffer.h Header file for reusable counted buffers include/inn/concat.h Header file for string concatenation include/inn/confparse.h Header file for configuration parser @@ -518,6 +519,7 @@ lib/Makefile Makefile for library lib/argparse.c Functions for parsing arguments lib/artnumber.c Manipulation of article numbers lib/asprintf.c asprintf replacement +lib/bloom.c Bloom filter implementation lib/buffer.c Reusable counted buffer lib/canlock.c Routines for Cancel-Lock lib/cleanfrom.c Clean out a From line @@ -938,6 +940,8 @@ tests/innd/fakeinnd.c Provide symbols defined by innd/innd.c tests/lib Test suite for libinn (Directory) tests/lib/artnumber-t.c Tests for lib/artnumber.c tests/lib/asprintf-t.c Tests for lib/asprintf.c +tests/lib/bloom-hiswalk-t.c Integration test for bloom filter with HISwalk +tests/lib/bloom-t.c Tests for lib/bloom.c tests/lib/buffer-t.c Tests for lib/buffer.c tests/lib/canlock-t.c Tests for lib/canlock.c tests/lib/concat-t.c Tests for lib/concat.c diff --git a/doc/pod/expireover.pod b/doc/pod/expireover.pod index 239b354b6..318ad73e5 100644 --- a/doc/pod/expireover.pod +++ b/doc/pod/expireover.pod @@ -39,6 +39,22 @@ By default, B purges all overview information for newsgroups that have been removed from the server; this behavior is suppressed if B<-f> is given. +To speed up the existence check for each article, B builds +a bloom filter from the history file at startup. This replaces +per-article random I/O into the history file with a single sequential +read, which is critical for large spools. The bloom filter is a +positive-only cache: if it reports an article probably exists, the +slow history lookup is skipped. If it reports the article is not found, +B falls back to a direct history lookup for correctness. +False positives (the bloom filter incorrectly reporting an article +exists) are benign; the orphaned overview entry will be cleaned up on +the next expiration run. + +The false positive rate and memory usage of the bloom filter are +controlled by the I setting in F. Setting it +to C<0> disables the bloom filter. The bloom filter is also disabled +when the B<-s> flag is used. + =head1 OPTIONS =over 4 diff --git a/doc/pod/inn.conf.pod b/doc/pod/inn.conf.pod index a65fe4abd..cfb446c48 100644 --- a/doc/pod/inn.conf.pod +++ b/doc/pod/inn.conf.pod @@ -526,6 +526,26 @@ will run much faster, but reading news from the system will be impossible true, I must also be set. This is a boolean value and the default is true. +=item I + +Controls the bloom filter used by B to accelerate overview +expiration. The value is the reciprocal of the desired false positive +rate: for example, C<10000> means a 1-in-10,000 (0.01%) false positive +rate. Higher values use more memory but produce fewer false positives. +At the default of C<10000>, memory usage is approximately 20 bits per +article in the history file (e.g., 48S for 20 million articles, +2.4S for 1 billion articles). + +Setting this to C<0> disables the bloom filter entirely, falling back +to per-article history lookups (the pre-existing behavior). This is +not recommended for large spools as it results in random I/O into the +history file for every article in the overview database. + +The bloom filter has no effect when B is run with the B<-s> +flag (which forces a filesystem stat of every article). + +This is a non-negative integer and the default is C<10000>. + =item I Besides the seven standard overview fields (which are in order C, diff --git a/doc/pod/libinnhist.pod b/doc/pod/libinnhist.pod index 05a4acc8d..89b51b42c 100644 --- a/doc/pod/libinnhist.pod +++ b/doc/pod/libinnhist.pod @@ -67,8 +67,9 @@ his - routines for managing INN history bool HISwalk(struct history *history, const char *reason, void *cookie, - bool (*callback)(void *cookie, time_t arrived, - time_t posted, time_t expires, + bool (*callback)(void *cookie, const HASH *hash, + time_t arrived, time_t posted, + time_t expires, const TOKEN *token)); struct histstats HISstats(struct history *history); @@ -210,10 +211,12 @@ unspecified. B provides an iteration function for the specified I database. For every entry in the history database, I is -invoked, passing the I, arrival, posting, and expiry times, in -addition to the token associated with the entry. If the I() -returns B the iteration is aborted and B returns -B to the caller. +invoked, passing the I, the message-ID I, arrival, +posting, and expiry times, in addition to the token associated with +the entry. If the entry has no storage token (a remembered +message-ID), I is B. If the I() returns +B the iteration is aborted and B returns B to +the caller. Malformed history lines are silently skipped. To process the entire database in the presence of a running server, I may be passed; if this argument is not B, it is used diff --git a/expire/expireover.c b/expire/expireover.c index 0548cae53..3eccae081 100644 --- a/expire/expireover.c +++ b/expire/expireover.c @@ -14,6 +14,10 @@ #include #include +#include + +#include "inn/bloom.h" +#include "inn/history.h" #include "inn/innconf.h" #include "inn/libinn.h" #include "inn/messages.h" @@ -45,6 +49,22 @@ fatal_signal(int sig) } +/* +** Callback for HISwalk that adds history entries with storage tokens to the +** bloom filter. Entries without tokens (remembered message-IDs) are skipped +** so that OVhisthasmsgid correctly identifies them as missing. +*/ +static bool +build_bloom_cb(void *cookie, const HASH *hash, + time_t arrived UNUSED, time_t posted UNUSED, + time_t expires UNUSED, const TOKEN *token) +{ + if (token != NULL) + bloom_add(cookie, hash); + return true; +} + + int main(int argc, char *argv[]) { @@ -60,6 +80,8 @@ main(int argc, char *argv[]) bool purge_deleted = false; bool always_stat = false; struct history *history; + struct bloom_filter *bloom = NULL; + struct bloom_filter *null_bloom = NULL; /* First thing, set up logging and our identity. */ openlog("expireover", L_OPENLOG_FLAGS | LOG_PID, LOG_INN_PROG); @@ -181,12 +203,43 @@ main(int argc, char *argv[]) if (!OVctl(OVSTATALL, &always_stat)) die("can't configure overview stat behavior"); - /* We want to be careful about being interrupted from this point on, so - set up our signal handlers. */ + /* Set up signal handlers before the bloom walk, which can take several + minutes on very large history files. */ xsignal(SIGTERM, fatal_signal); xsignal(SIGINT, fatal_signal); xsignal(SIGHUP, fatal_signal); + /* Build a bloom filter from the history file for fast existence checks. + This replaces millions of random pread() calls into the history file + with a single sequential read, making expireover feasible on large + spools (1B+ articles). The bloom filter is used as a positive-only + cache: hits skip the slow history lookup, misses fall through to + HISlookup for correctness (handles articles added after the walk). */ + if (innconf->expirebloomfp > 0 && !always_stat) { + struct stat st; + char *histpath; + size_t estimated = 0; + /* Minimum history line: 34 (hash) + 1 (tab) + 1 (arrived) + * + 1 (newline). Dividing file size by this gives a conservative + * overestimate of entries, which is what we want for bloom sizing. */ + const size_t min_history_line = 37; + + histpath = concatpath(innconf->pathdb, INN_PATH_HISTORY); + if (stat(histpath, &st) == 0) + estimated = st.st_size / min_history_line; + else + warn("can't stat %s, bloom filter will be undersized", histpath); + bloom = bloom_create(estimated, innconf->expirebloomfp); + if (!HISwalk(history, NULL, bloom, build_bloom_cb)) { + warn("can't walk history for bloom filter, using per-article" + " lookups"); + bloom_free(bloom); + bloom = NULL; + } + OVctl(OVTOKENCACHE, &bloom); + free(histpath); + } + /* Loop through each line of the input file and process each group, writing data to the lowmark file if desired. */ line = QIOread(qp); @@ -212,6 +265,10 @@ main(int argc, char *argv[]) warn("can't expire deleted newsgroups"); /* Close everything down in an orderly fashion. */ + if (bloom) { + OVctl(OVTOKENCACHE, &null_bloom); + bloom_free(bloom); + } QIOclose(qp); OVclose(); SMshutdown(); diff --git a/history/his.c b/history/his.c index 03127a358..f8dc7b8c7 100644 --- a/history/his.c +++ b/history/his.c @@ -328,7 +328,8 @@ HISreplace(struct history *h, const char *key, time_t arrived, time_t posted, bool HISwalk(struct history *h, const char *reason, void *cookie, - bool (*callback)(void *, time_t, time_t, time_t, const TOKEN *)) + bool (*callback)(void *, const HASH *, time_t, time_t, time_t, + const TOKEN *)) { bool r; diff --git a/history/hisinterface.h b/history/hisinterface.h index ce3ae72f1..cda2cd16b 100644 --- a/history/hisinterface.h +++ b/history/hisinterface.h @@ -6,6 +6,7 @@ #define HISINTERFACE_H #include "config.h" +#include "inn/libinn.h" #include struct token; @@ -27,7 +28,7 @@ typedef struct hismethod { bool (*expire)(void *, const char *, const char *, bool, void *, time_t, bool (*)(void *, time_t, time_t, time_t, struct token *)); bool (*walk)(void *, const char *, void *, - bool (*)(void *, time_t, time_t, time_t, + bool (*)(void *, const HASH *, time_t, time_t, time_t, const struct token *)); bool (*remember)(void *, const char *, time_t, time_t); bool (*ctl)(void *, int, void *); diff --git a/history/hisv6/hisv6-private.h b/history/hisv6/hisv6-private.h index 003d0d8dc..be29d2b01 100644 --- a/history/hisv6/hisv6-private.h +++ b/history/hisv6/hisv6-private.h @@ -64,7 +64,8 @@ struct hisv6 { struct hisv6_walkstate { union { bool (*expire)(void *, time_t, time_t, time_t, TOKEN *); - bool (*walk)(void *, time_t, time_t, time_t, const TOKEN *); + bool (*walk)(void *, const HASH *, time_t, time_t, time_t, + const TOKEN *); } cb; void *cookie; bool paused; diff --git a/history/hisv6/hisv6.c b/history/hisv6/hisv6.c index 72045cf2d..686dc50f6 100644 --- a/history/hisv6/hisv6.c +++ b/history/hisv6/hisv6.c @@ -1071,14 +1071,14 @@ hisv6_traverse(struct hisv6 *h, struct hisv6_walkstate *cookie, ** parameters the user callback expects **/ static bool -hisv6_traversecb(struct hisv6 *h UNUSED, void *cookie, const HASH *hash UNUSED, +hisv6_traversecb(struct hisv6 *h UNUSED, void *cookie, const HASH *hash, time_t arrived, time_t posted, time_t expires, const TOKEN *token) { struct hisv6_walkstate *hiscookie = cookie; - return (*hiscookie->cb.walk)(hiscookie->cookie, arrived, posted, expires, - token); + return (*hiscookie->cb.walk)(hiscookie->cookie, hash, arrived, posted, + expires, token); } @@ -1087,7 +1087,8 @@ hisv6_traversecb(struct hisv6 *h UNUSED, void *cookie, const HASH *hash UNUSED, */ bool hisv6_walk(void *history, const char *reason, void *cookie, - bool (*callback)(void *, time_t, time_t, time_t, const TOKEN *)) + bool (*callback)(void *, const HASH *, time_t, time_t, time_t, + const TOKEN *)) { struct hisv6 *h = history; struct hisv6_walkstate hiscookie; @@ -1099,7 +1100,11 @@ hisv6_walk(void *history, const char *reason, void *cookie, hiscookie.cookie = cookie; hiscookie.new = NULL; hiscookie.paused = false; - hiscookie.ignore = false; + /* Ignore malformed history lines during walk. The walk is a read-only + operation (e.g., building a bloom filter for expireover); aborting + over one corrupt line in a potentially 180 GB file would be + catastrophic. expire is what fixes corrupt history entries. */ + hiscookie.ignore = true; r = hisv6_traverse(h, &hiscookie, reason, hisv6_traversecb); diff --git a/history/hisv6/hisv6.h b/history/hisv6/hisv6.h index 30fcae787..8f9564f9c 100644 --- a/history/hisv6/hisv6.h +++ b/history/hisv6/hisv6.h @@ -5,6 +5,8 @@ #ifndef HISV6_H #define HISV6_H +#include "inn/libinn.h" + struct token; struct histopts; struct history; @@ -32,7 +34,7 @@ bool hisv6_expire(void *, const char *, const char *, bool, void *, struct token *)); bool hisv6_walk(void *, const char *, void *, - bool (*)(void *, time_t, time_t, time_t, + bool (*)(void *, const HASH *, time_t, time_t, time_t, const struct token *)); const char *hisv6_error(void *); diff --git a/include/inn/bloom.h b/include/inn/bloom.h new file mode 100644 index 000000000..d0f31d7ea --- /dev/null +++ b/include/inn/bloom.h @@ -0,0 +1,69 @@ +/* +** Bloom filter for fast set membership testing. +** +** A space-efficient probabilistic data structure that can test whether +** an element is a member of a set. False positive matches are possible, +** but false negatives are not: a query returns either "possibly in set" +** or "definitely not in set." +** +** Uses enhanced double hashing (Kirsch & Mitzenmacher 2006) to derive +** multiple hash positions from a single HASH value. +*/ + +#ifndef INN_BLOOM_H +#define INN_BLOOM_H + +#include "inn/libinn.h" +#include "inn/portable-macros.h" +#include "inn/portable-stdbool.h" + +#include + +BEGIN_DECLS + +/* The layout of this struct is entirely internal to the implementation. */ +struct bloom_filter; + +/* +** Create a new bloom filter sized for the given number of estimated entries +** and false positive rate expressed as a reciprocal (e.g., 10000 means +** 1-in-10,000 or 0.01% false positive rate). Uses xmalloc internally, +** so dies on allocation failure. +*/ +struct bloom_filter *bloom_create(size_t estimated_entries, unsigned long fp_inv); + +/* +** Add a HASH to the bloom filter. +*/ +void bloom_add(struct bloom_filter *bf, const HASH *hash); + +/* +** Check whether a HASH is possibly in the bloom filter. Returns true if +** the element is probably in the set (with false positive rate as configured), +** or false if the element is definitely not in the set. +*/ +bool bloom_check(const struct bloom_filter *bf, const HASH *hash); + +/* +** Free a bloom filter and all associated memory. Safe to call with NULL. +*/ +void bloom_free(struct bloom_filter *bf); + +/* +** Return the number of entries that have been added to the bloom filter. +*/ +size_t bloom_count(const struct bloom_filter *bf); + +/* +** Return the number of hash functions (k) used by the bloom filter. +*/ +unsigned int bloom_nhash(const struct bloom_filter *bf); + +/* +** Return the total number of bits (m) in the bloom filter. +*/ +size_t bloom_bits(const struct bloom_filter *bf); + +END_DECLS + +#endif /* INN_BLOOM_H */ diff --git a/include/inn/history.h b/include/inn/history.h index f14d9536f..5be46d984 100644 --- a/include/inn/history.h +++ b/include/inn/history.h @@ -5,6 +5,7 @@ #ifndef INN_HISTORY_H #define INN_HISTORY_H +#include "inn/libinn.h" #include "inn/macros.h" #include "inn/portable-stdbool.h" #include @@ -96,7 +97,8 @@ bool HISexpire(struct history *, const char *, const char *, bool, void *, time_t, bool (*)(void *, time_t, time_t, time_t, struct token *)); bool HISwalk(struct history *, const char *, void *, - bool (*)(void *, time_t, time_t, time_t, const struct token *)); + bool (*)(void *, const HASH *, time_t, time_t, time_t, + const struct token *)); struct histstats HISstats(struct history *); const char *HISerror(struct history *); bool HISctl(struct history *, int, void *); diff --git a/include/inn/innconf.h b/include/inn/innconf.h index f1add2628..a24c2d1d8 100644 --- a/include/inn/innconf.h +++ b/include/inn/innconf.h @@ -72,6 +72,7 @@ struct innconf { /* Article Storage */ unsigned long cnfscheckfudgesize; /* Additional CNFS integrity checking */ bool enableoverview; /* Store overview info for articles? */ + unsigned long expirebloomfp; /* Bloom filter FP rate 1/N (0 = disabled) */ struct vector *extraoverviewadvertised; /* Extra overview fields for LIST OVERVIEW.FMT */ struct vector diff --git a/include/inn/ov.h b/include/inn/ov.h index 2f2259276..ef7ba8541 100644 --- a/include/inn/ov.h +++ b/include/inn/ov.h @@ -19,7 +19,8 @@ typedef enum { OVSTATICSEARCH, OVSTATALL, OVCACHEKEEP, - OVCACHEFREE + OVCACHEFREE, + OVTOKENCACHE } OVCTLTYPE; #define OV_NOSPACE 100 typedef enum { diff --git a/lib/Makefile b/lib/Makefile index ae22203e5..0f4fb8ec5 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -12,8 +12,8 @@ top = .. CFLAGS = $(GCFLAGS) # The base library files that are always compiled and included. -SOURCES = argparse.c artnumber.c buffer.c cleanfrom.c clientactive.c \ - clientlib.c \ +SOURCES = argparse.c artnumber.c bloom.c buffer.c cleanfrom.c \ + clientactive.c clientlib.c \ commands.c concat.c conffile.c confparse.c \ date.c dbz.c defdist.c dispatch.c fdflag.c fdlimit.c \ getfqdn.c getmodaddr.c hash.c hashtab.c headers.c hex.c \ diff --git a/lib/bloom.c b/lib/bloom.c new file mode 100644 index 000000000..cfd669e7a --- /dev/null +++ b/lib/bloom.c @@ -0,0 +1,213 @@ +/* +** Bloom filter implementation. +** +** A space-efficient probabilistic data structure for set membership testing. +** Uses enhanced double hashing (Kirsch & Mitzenmacher 2006) to derive k +** independent bit positions from the two 64-bit halves of a 16-byte MD5 +** HASH value. +** +** Sizing uses the standard formulas: +** m = -n * ln(p) / (ln(2))^2 (optimal number of bits) +** k = (m / n) * ln(2) (optimal number of hash functions) +** where n = estimated entries, p = desired false positive rate. +** +** The create function takes the false positive rate as 1/fp_inv (an integer +** reciprocal) to avoid floating point in the library. Common values: +** fp_inv = 100 => 1% FP, ~10 bits/entry, k=7 +** fp_inv = 1000 => 0.1% FP, ~15 bits/entry, k=10 +** fp_inv = 10000 => 0.01% FP, ~20 bits/entry, k=14 +** fp_inv = 100000 => 0.001% FP, ~24 bits/entry, k=17 +*/ + +#include "portable/system.h" + +#include + +#include "inn/bloom.h" +#include "inn/messages.h" +#include "inn/xmalloc.h" + +struct bloom_filter { + uint8_t *bits; /* bit array */ + size_t nbits; /* total bits (m) */ + unsigned int nhash; /* number of hash functions (k) */ + size_t count; /* entries added */ +}; + + +/* +** Compute k bit positions for a given HASH using enhanced double hashing. +** h(i) = (h1 + i * h2) mod m, where h1 and h2 are the two 64-bit halves +** of the MD5 hash. Unsigned overflow on (h1 + i * h2) is well-defined +** in C and acts as additional mixing before the final mod m. +*/ +static void +bloom_positions(const struct bloom_filter *bf, const HASH *hash, + size_t *positions) +{ + uint64_t h1, h2; + unsigned int i; + + memcpy(&h1, hash->hash, 8); + memcpy(&h2, hash->hash + 8, 8); + for (i = 0; i < bf->nhash; i++) + positions[i] = (size_t) ((h1 + (uint64_t) i * h2) % bf->nbits); +} + + +/* +** Pre-computed bits-per-entry and optimal k for common false positive rates. +** Values are ceil(-ln(1/fp_inv) / ln(2)^2) and ceil(bits_per_entry * ln(2)). +** For fp_inv values not in the table, we interpolate conservatively. +*/ +static const struct { + unsigned long fp_inv; + unsigned int bits_per_entry; + unsigned int nhash; +} bloom_params[] = { + { 10, 5, 4 }, /* 10% FP */ + { 20, 7, 5 }, + { 50, 9, 6 }, + { 100, 10, 7 }, /* 1% FP */ + { 200, 12, 8 }, + { 500, 13, 9 }, + { 1000, 15, 10 }, /* 0.1% FP */ + { 2000, 16, 11 }, + { 5000, 18, 13 }, + { 10000, 20, 14 }, /* 0.01% FP */ + { 20000, 21, 15 }, + { 50000, 23, 16 }, + { 100000, 24, 17 }, /* 0.001% FP */ + { 1000000, 29, 20 }, /* 0.0001% FP */ + { 10000000, 34, 24 }, +}; +#define BLOOM_NPARAMS (sizeof(bloom_params) / sizeof(bloom_params[0])) +#define BLOOM_MAX_NHASH 24 /* must match max nhash in bloom_params table */ + +/* Maximum bloom filter size on 32-bit platforms where size_t overflow + * is a real concern. On 64-bit, there is no cap, xmalloc will die if + * the system doesn't have enough memory, which is the correct behavior + * for a batch job. On 32-bit, cap at SIZE_MAX/16 so that the conversion + * to bits (multiply by 8) stays within size_t. */ +#if SIZE_MAX <= UINT32_MAX +# define BLOOM_MAX_BITS ((SIZE_MAX / 16) * 8) +#endif + + +struct bloom_filter * +bloom_create(size_t estimated_entries, unsigned long fp_inv) +{ + struct bloom_filter *bf; + unsigned int bits_per_entry; + unsigned int nhash; + size_t nbits; + size_t nbytes; + size_t i; + + /* Look up parameters from the table. Use the entry with the smallest + * fp_inv that is >= the requested fp_inv (i.e., the FP rate at least as + * good as requested). If fp_inv exceeds all table entries, use the + * last (most conservative) entry. */ + bits_per_entry = bloom_params[BLOOM_NPARAMS - 1].bits_per_entry; + nhash = bloom_params[BLOOM_NPARAMS - 1].nhash; + for (i = 0; i < BLOOM_NPARAMS; i++) { + if (bloom_params[i].fp_inv >= fp_inv) { + bits_per_entry = bloom_params[i].bits_per_entry; + nhash = bloom_params[i].nhash; + break; + } + } + + bf = xmalloc(sizeof(*bf)); + + if (estimated_entries == 0) + estimated_entries = 1; + + /* Guard against size_t overflow on 32-bit platforms where + * bits_per_entry * estimated_entries can exceed SIZE_MAX. + * On 64-bit this check is effectively unreachable but harmless. + * On 32-bit, the cap degrades the FP rate but does not affect + * correctness. */ + if (estimated_entries > SIZE_MAX / bits_per_entry) +#if SIZE_MAX <= UINT32_MAX + nbits = BLOOM_MAX_BITS; +#else + die("bloom filter: entry count too large for size_t"); +#endif + else + nbits = (size_t) bits_per_entry * estimated_entries; +#if SIZE_MAX <= UINT32_MAX + if (nbits > BLOOM_MAX_BITS) + nbits = BLOOM_MAX_BITS; +#endif + if (nbits < 64) + nbits = 64; + + bf->nbits = nbits; + bf->nhash = nhash; + bf->count = 0; + + nbytes = (nbits + 7) / 8; + bf->bits = xcalloc(nbytes, 1); + + return bf; +} + + +void +bloom_add(struct bloom_filter *bf, const HASH *hash) +{ + size_t positions[BLOOM_MAX_NHASH]; + unsigned int i; + + bloom_positions(bf, hash, positions); + for (i = 0; i < bf->nhash; i++) + bf->bits[positions[i] / 8] |= (uint8_t) (1U << (positions[i] % 8)); + bf->count++; +} + + +bool +bloom_check(const struct bloom_filter *bf, const HASH *hash) +{ + size_t positions[BLOOM_MAX_NHASH]; + unsigned int i; + + bloom_positions(bf, hash, positions); + for (i = 0; i < bf->nhash; i++) { + if (!(bf->bits[positions[i] / 8] & (1U << (positions[i] % 8)))) + return false; + } + return true; +} + + +void +bloom_free(struct bloom_filter *bf) +{ + if (bf == NULL) + return; + free(bf->bits); + free(bf); +} + + +size_t +bloom_count(const struct bloom_filter *bf) +{ + return bf->count; +} + + +unsigned int +bloom_nhash(const struct bloom_filter *bf) +{ + return bf->nhash; +} + + +size_t +bloom_bits(const struct bloom_filter *bf) +{ + return bf->nbits; +} diff --git a/lib/innconf.c b/lib/innconf.c index 145e66b5d..199e7009a 100644 --- a/lib/innconf.c +++ b/lib/innconf.c @@ -205,6 +205,7 @@ static const struct config config_table[] = { /* The following settings are specific to the storage subsystem. */ {K(articlemmap), BOOL(true) }, {K(cnfscheckfudgesize), UNUMBER(0) }, + {K(expirebloomfp), UNUMBER(10000) }, {K(immediatecancel), BOOL(false) }, {K(keepmmappedthreshold), UNUMBER(1024) }, {K(nfswriter), BOOL(false) }, diff --git a/samples/inn.conf.in b/samples/inn.conf.in index 2e761ff67..089b0a60e 100644 --- a/samples/inn.conf.in +++ b/samples/inn.conf.in @@ -64,6 +64,7 @@ wipexpire: 10 cnfscheckfudgesize: 0 enableoverview: true +expirebloomfp: 10000 extraoverviewadvertised: [ ] extraoverviewhidden: [ ] groupbaseexpiry: true diff --git a/storage/expire.c b/storage/expire.c index 8836805c2..abd9f4e6c 100644 --- a/storage/expire.c +++ b/storage/expire.c @@ -12,6 +12,7 @@ #include #include +#include "inn/bloom.h" #include "inn/innconf.h" #include "inn/libinn.h" #include "inn/ov.h" @@ -27,6 +28,9 @@ enum KRP { Poison }; +/* Bloom filter token cache for fast OVhisthasmsgid lookups. */ +struct bloom_filter *OVtokencache = NULL; + /* Statistics */ long EXPprocessed; long EXPunlinked; @@ -803,6 +807,20 @@ OVhisthasmsgid(struct history *h, const char *data) } if ((p = OVERGetHeader(data, Messageidindex)) == NULL) return false; + + /* Fast path: if article is in the bloom filter, it (probably) exists. + * Bloom hits skip the slow history lookup. Bloom misses fall through + * to HISlookup to handle articles added after the filter was built. + * False positives (bloom says "yes" for an expired article) are benign: + * the orphaned overview entry is cleaned up on the next run. */ + if (OVtokencache) { + HASH hash = HashMessageID(p); + if (bloom_check(OVtokencache, &hash)) + return true; + } + + /* Slow path: per-article history lookup (original behavior). + * Only reached for bloom misses or when no bloom filter is loaded. */ return HISlookup(h, p, NULL, NULL, NULL, NULL); } diff --git a/storage/ov.c b/storage/ov.c index a8ff34c97..62ddd061d 100644 --- a/storage/ov.c +++ b/storage/ov.c @@ -399,6 +399,9 @@ OVctl(OVCTLTYPE type, void *val) case OVSTATALL: OVstatall = *(bool *) val; return true; + case OVTOKENCACHE: + OVtokencache = *(struct bloom_filter **) val; + return true; default: return ((*ov.ctl)(type, val)); } diff --git a/storage/ovinterface.h b/storage/ovinterface.h index 41ed2f16b..31eefb428 100644 --- a/storage/ovinterface.h +++ b/storage/ovinterface.h @@ -35,10 +35,12 @@ typedef struct overview_method { bool OVgroupbasedexpire(TOKEN token, const char *group, const char *data, int len, time_t arrived, time_t expires); +struct bloom_filter; bool OVhisthasmsgid(struct history *, const char *data); void OVEXPremove(TOKEN token, bool deletedgroups, char **xref, int ngroups); void OVEXPcleanup(void); +extern struct bloom_filter *OVtokencache; extern time_t OVnow; extern FILE *EXPunlinkfile; extern bool OVignoreselfexpire; diff --git a/support/mkmanifest b/support/mkmanifest index c75f9a701..e079cdac1 100755 --- a/support/mkmanifest +++ b/support/mkmanifest @@ -282,6 +282,8 @@ tests/innd/artparse.t tests/innd/chan.t tests/lib/artnumber.t tests/lib/asprintf.t +tests/lib/bloom.t +tests/lib/bloom-hiswalk.t tests/lib/buffer.t tests/lib/canlock.t tests/lib/concat.t diff --git a/tests/Makefile b/tests/Makefile index fa35b1a37..64d80256e 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -17,7 +17,8 @@ LIBM_LDFLAGS = '-lm' ## added to EXTRA. TESTS = authprogs/ident.t innd/artparse.t innd/chan.t lib/artnumber.t \ - lib/asprintf.t lib/buffer.t lib/canlock.t lib/concat.t lib/conffile.t \ + lib/asprintf.t lib/bloom.t lib/bloom-hiswalk.t lib/buffer.t \ + lib/canlock.t lib/concat.t lib/conffile.t \ lib/confparse.t lib/daemon.t lib/date.t \ lib/dispatch.t lib/fdflag.t \ lib/getaddrinfo.t lib/getnameinfo.t lib/hash.t \ @@ -150,6 +151,12 @@ lib/getnameinfo.t: lib/getnameinfo.o lib/getnameinfo-t.o tap/basic.o $(LIBINN) $(LINK) lib/getnameinfo.o lib/getnameinfo-t.o tap/basic.o \ $(LIBINN) $(LIBS) +lib/bloom.t: lib/bloom-t.o tap/basic.o $(LIBINN) + $(LINK) lib/bloom-t.o tap/basic.o $(LIBINN) + +lib/bloom-hiswalk.t: lib/bloom-hiswalk-t.o tap/basic.o $(STORAGEDEPS) + $(LINKDEPS) lib/bloom-hiswalk-t.o tap/basic.o $(STORAGELIBS) $(LIBS) + lib/hash.t: lib/hash-t.o tap/basic.o $(LIBINN) $(LINK) lib/hash-t.o tap/basic.o $(LIBINN) diff --git a/tests/TESTS b/tests/TESTS index b995af703..8cbc076ab 100644 --- a/tests/TESTS +++ b/tests/TESTS @@ -7,6 +7,8 @@ innd/artparse innd/chan lib/artnumber lib/asprintf +lib/bloom +lib/bloom-hiswalk lib/buffer lib/canlock lib/concat diff --git a/tests/lib/bloom-hiswalk-t.c b/tests/lib/bloom-hiswalk-t.c new file mode 100644 index 000000000..f6244de1e --- /dev/null +++ b/tests/lib/bloom-hiswalk-t.c @@ -0,0 +1,169 @@ +/* Integration test: HISwalk + bloom filter for expireover token cache. + * + * Creates a temporary history file with a mix of entries (some with tokens, + * some remembered-only), builds a bloom filter via HISwalk, and verifies + * that the bloom filter correctly identifies articles with tokens vs. + * remembered entries. */ + +#include "portable/system.h" + +#include +#include + +#include "inn/bloom.h" +#include "inn/history.h" +#include "inn/libinn.h" +#include "inn/messages.h" +#include "inn/storage.h" +#include "tap/basic.h" + +#define N_WITH_TOKEN 500 +#define N_REMEMBERED 100 +#define N_NOT_IN_HIST 200 + + +/* +** Generate a deterministic message-ID from an integer. +*/ +static char * +make_msgid(unsigned long n) +{ + char buf[64]; + snprintf(buf, sizeof(buf), "", n); + return xstrdup(buf); +} + + +/* +** HISwalk callback: add entries with tokens to the bloom filter. +** Same logic as build_bloom_cb in expireover.c. +*/ +static bool +build_bloom_cb(void *cookie, const HASH *hash, + time_t arrived UNUSED, time_t posted UNUSED, + time_t expires UNUSED, const TOKEN *token) +{ + if (token != NULL) + bloom_add(cookie, hash); + return true; +} + + +int +main(void) +{ + struct history *h; + struct bloom_filter *bloom; + char tmpdir[64]; + char histpath[128]; + TOKEN token; + unsigned long i; + unsigned long bloom_misses; + unsigned long false_negatives; + bool walk_ok; + + test_init(8); + + /* Create temporary directory for the history database. */ + strlcpy(tmpdir, "bloom-hiswalk-XXXXXX", sizeof(tmpdir)); + if (mkdtemp(tmpdir) == NULL) + sysbail("can't create temp directory"); + snprintf(histpath, sizeof(histpath), "%s/history", tmpdir); + + /* Create and populate the history database. */ + h = HISopen(histpath, "hisv6", HIS_CREAT | HIS_RDWR); + if (h == NULL) + bail("can't create history at %s", histpath); + + memset(&token, 0, sizeof(token)); + token.type = 1; + + /* Write entries with storage tokens. */ + for (i = 0; i < N_WITH_TOKEN; i++) { + char *msgid = make_msgid(i); + if (!HISwrite(h, msgid, (time_t) 1000000 + i, (time_t) 1000000 + i, + (time_t) 0, &token)) + bail("can't write history entry %lu: %s", i, HISerror(h)); + free(msgid); + } + + /* Write remembered entries (no token). */ + for (i = N_WITH_TOKEN; i < N_WITH_TOKEN + N_REMEMBERED; i++) { + char *msgid = make_msgid(i); + if (!HISremember(h, msgid, (time_t) 1000000 + i, + (time_t) 1000000 + i)) + bail("can't remember history entry %lu: %s", i, HISerror(h)); + free(msgid); + } + + HISsync(h); + HISclose(h); + ok(1, true); /* history created and populated */ + + /* Reopen read-only (as expireover does). */ + h = HISopen(histpath, "hisv6", HIS_RDONLY); + if (h == NULL) + bail("can't reopen history at %s", histpath); + ok(2, true); /* history reopened */ + + /* Build the bloom filter via HISwalk. */ + bloom = bloom_create(N_WITH_TOKEN + N_REMEMBERED, 10000); + walk_ok = HISwalk(h, NULL, bloom, build_bloom_cb); + ok(3, walk_ok); /* HISwalk succeeded */ + ok(4, bloom_count(bloom) == N_WITH_TOKEN); /* only token entries added */ + + /* Verify: all token entries should be bloom hits. */ + false_negatives = 0; + for (i = 0; i < N_WITH_TOKEN; i++) { + char *msgid = make_msgid(i); + HASH hash = HashMessageID(msgid); + if (!bloom_check(bloom, &hash)) + false_negatives++; + free(msgid); + } + ok(5, false_negatives == 0); /* no false negatives for token entries */ + if (false_negatives > 0) + diag("false negatives: %lu out of %d", false_negatives, N_WITH_TOKEN); + + /* Verify: remembered entries should NOT be in the bloom filter. + * (Some may be false positives, but most should miss.) */ + bloom_misses = 0; + for (i = N_WITH_TOKEN; i < N_WITH_TOKEN + N_REMEMBERED; i++) { + char *msgid = make_msgid(i); + HASH hash = HashMessageID(msgid); + if (!bloom_check(bloom, &hash)) + bloom_misses++; + free(msgid); + } + ok(6, bloom_misses > N_REMEMBERED * 9 / 10); /* >90% should miss */ + diag("remembered entries: %lu/%d were bloom misses (expected most)", + bloom_misses, N_REMEMBERED); + + /* Verify: entries not in history at all should mostly miss. */ + bloom_misses = 0; + for (i = N_WITH_TOKEN + N_REMEMBERED; + i < N_WITH_TOKEN + N_REMEMBERED + N_NOT_IN_HIST; i++) { + char *msgid = make_msgid(i); + HASH hash = HashMessageID(msgid); + if (!bloom_check(bloom, &hash)) + bloom_misses++; + free(msgid); + } + ok(7, bloom_misses > N_NOT_IN_HIST * 9 / 10); /* >90% should miss */ + diag("not-in-history entries: %lu/%d were bloom misses (expected most)", + bloom_misses, N_NOT_IN_HIST); + + bloom_free(bloom); + HISclose(h); + + /* Cleanup temp directory. */ + { + char cmd[128]; + snprintf(cmd, sizeof(cmd), "/bin/rm -rf %s", tmpdir); + if (system(cmd) < 0) + sysdiag("can't clean up %s", tmpdir); + } + ok(8, true); /* cleanup */ + + return 0; +} diff --git a/tests/lib/bloom-t.c b/tests/lib/bloom-t.c new file mode 100644 index 000000000..ef3b62bd1 --- /dev/null +++ b/tests/lib/bloom-t.c @@ -0,0 +1,128 @@ +/* Test suite for lib/bloom.c. */ + +#include "portable/system.h" + +#include + +#include "inn/bloom.h" +#include "inn/libinn.h" +#include "tap/basic.h" + + +/* +** Generate a deterministic HASH from an integer for testing. +*/ +static HASH +make_hash(unsigned long n) +{ + char buf[64]; + snprintf(buf, sizeof(buf), "", n); + return HashMessageID(buf); +} + + +int +main(void) +{ + struct bloom_filter *bf; + HASH h1, h2, h3; + unsigned long i; + unsigned long false_positives; + unsigned long n_check; + + test_init(20); + + /* Basic creation. */ + bf = bloom_create(1000, 10000); + ok(1, bf != NULL); + ok(2, bloom_bits(bf) >= 1000 * 20); /* 0.01% FP needs ~20 bits/entry */ + ok(3, bloom_nhash(bf) == 14); + ok(4, bloom_count(bf) == 0); + + /* Add and check: true positives. */ + h1 = make_hash(1); + h2 = make_hash(2); + h3 = make_hash(3); + bloom_add(bf, &h1); + bloom_add(bf, &h2); + ok(5, bloom_count(bf) == 2); + ok(6, bloom_check(bf, &h1)); + ok(7, bloom_check(bf, &h2)); + + /* True negative. */ + ok(8, !bloom_check(bf, &h3)); + + bloom_free(bf); + + /* Larger test: verify false positive rate. + * Add 10,000 items, then check 100,000 items that were NOT added. + * At 0.01% target FP rate, we expect ~10 false positives out of + * 100,000 checks. Allow up to 50 (0.05%) to account for variance. */ + bf = bloom_create(10000, 10000); + ok(9, bf != NULL); + + for (i = 0; i < 10000; i++) { + HASH h = make_hash(i); + bloom_add(bf, &h); + } + ok(10, bloom_count(bf) == 10000); + + /* Verify all added items are found (no false negatives). */ + for (i = 0; i < 10000; i++) { + HASH h = make_hash(i); + if (!bloom_check(bf, &h)) { + ok(11, false); + diag("false negative at i=%lu", i); + goto fp_test; + } + } + ok(11, true); + +fp_test: + /* Count false positives from items never added. */ + false_positives = 0; + n_check = 100000; + for (i = 10000; i < 10000 + n_check; i++) { + HASH h = make_hash(i); + if (bloom_check(bf, &h)) + false_positives++; + } + ok(12, false_positives <= 50); + if (false_positives > 50) + diag("false positives: %lu out of %lu (expected <= 50)", + false_positives, n_check); + else + diag("false positives: %lu out of %lu (%.4f%%)", + false_positives, n_check, + 100.0 * (double) false_positives / (double) n_check); + + bloom_free(bf); + + /* Test with different FP rate parameters. */ + bf = bloom_create(1000, 100); /* 1% FP rate */ + ok(13, bf != NULL); + ok(14, bloom_nhash(bf) == 7); /* k=7 for 1% FP */ + bloom_free(bf); + + bf = bloom_create(1000, 1000); /* 0.1% FP rate */ + ok(15, bf != NULL); + ok(16, bloom_nhash(bf) == 10); /* k=10 for 0.1% FP */ + bloom_free(bf); + + /* Edge case: very small filter. */ + bf = bloom_create(1, 10000); + ok(17, bf != NULL); + ok(18, bloom_bits(bf) >= 64); /* minimum size */ + bloom_free(bf); + + /* Test with max nhash (fp_inv >= 10000000, nhash=24). + * Exercises the full positions array to catch overflow. */ + bf = bloom_create(100, 10000000); + ok(19, bf != NULL); + h1 = make_hash(42); + bloom_add(bf, &h1); + ok(20, bloom_check(bf, &h1)); + bloom_free(bf); + + return 0; +}