From 48e99a440c84bd230ddc6ea72d57aa073663bd26 Mon Sep 17 00:00:00 2001
From: pHash Audit <audit@local>
Date: Mon, 25 May 2026 10:58:10 -0700
Subject: [PATCH 1/2] Fix audio hash leaks, threading, and resource bugs

Addresses audit findings #7, #8, #9, #13, #14, #19, #20, #21, #29.

Correctness:
- #7/#8 Removed the entire HAVE_PTHREAD block (ph_audio_thread,
  ph_audio_hashes). It referenced types `slice` and `DP` and a
  function `ph_num_threads()` that were never declared anywhere in
  the public headers (so the block could not actually be compiled),
  the worker function had no return statement (UB on the void*
  return), and the slicing math walked one element past the input
  (count=10, threads=3 gave 5+3+3=11). Callers should drive
  ph_readaudio / ph_audiohash from their own thread pool.
- #9 ph_readaudio2 now reads src_data.output_frames_gen (the count
  libsamplerate actually produced) instead of output_frames (the
  caller-supplied capacity), eliminating the downstream over-read.
- ph_audiohash: corrected the nb_frames formula. The old expression
  could go negative for short inputs and was then passed to malloc.

Memory and safety:
- #13 readaudio_mp3: every error path now closes/deletes the
  mpg123_handle and calls mpg123_exit. Adds NULL checks on the
  malloc'd output, validates channels and samplerate (channels==0
  would have produced an infinite loop), clamps the per-encoding
  inner loop so a decode that returns fewer bytes than `channels`
  cannot over-read the decode buffer.
- #14 readaudio_snd: validates channels/samplerate/frames, checks
  every malloc, and frees inbuf on read failure. ph_audiohash's
  malloc is now checked and frame_length-sized buffers are
  std::vectors (no more VLAs).
- #20/#21 Replaced double window[4096], double frame[4096], double
  magnF[2048], freqs/binbarks VLAs, and the per-filter new[] grid
  with std::vector. ph_audio_distance_ber preallocates `dist` once
  outside the per-lag loop instead of realloc'ing on every
  iteration; the worst-case M is just N1/block_size.
- ph_compare_blocks now returns 1.0 (worst distance) rather than
  dividing by zero when block_size <= 0.
- ph_audio_distance_ber and ph_audiohash zero their out parameters
  on every failure path; the previous code left Nc / nb_frames
  uninitialised when allocation failed.

Performance:
- #19 ph_bitcount uses __builtin_popcount, matching how
  ph_hamming_distance does its bit counting.

Header hygiene (#29):
- ph_fft.h includes <complex> (C++) instead of <complex.h> (C99)
  and drops the `using namespace std;` that was being injected
  into every translation unit that included it. fft() takes
  std::complex<double>*.
- fft() now validates that N is a power of 2 (the radix-2
  recursion silently misbehaves otherwise).
---
 src/audiophash.cpp | 585 +++++++++++++++++++++++----------------------
 src/ph_fft.cpp     |  33 +--
 src/ph_fft.h       |  10 +-
 3 files changed, 322 insertions(+), 306 deletions(-)
diff --git a/src/audiophash.cpp b/src/audiophash.cpp
index 3df017a2..08cfaf54 100644
--- a/src/audiophash.cpp
+++ b/src/audiophash.cpp
@@ -23,20 +23,23 @@
 */
 
 #include "audiophash.h"
+
 #include <samplerate.h>
 #include <sndfile.h>
+
+#include <vector>
 #ifdef HAVE_LIBMPG123
 #include <mpg123.h>
 #endif
 
-int ph_count_samples(const char *filename, int sr, int channels) {
+int ph_count_samples(const char *filename, int /*sr*/, int /*channels*/) {
     SF_INFO sf_info;
     sf_info.format = 0;
     SNDFILE *sndfile = sf_open(filename, SFM_READ, &sf_info);
     if (sndfile == NULL) {
         return -1;
     }
-    int count = sf_info.frames;
+    int count = (int)sf_info.frames;
     sf_close(sndfile);
     return count;
 }
@@ -45,174 +48,243 @@ int ph_count_samples(const char *filename, int sr, int channels) {
 
 static float *readaudio_mp3(const char *filename, long *sr, const float nbsecs,
                             unsigned int *buflen) {
-    mpg123_handle *m;
+    mpg123_handle *m = NULL;
+    unsigned char *decbuf = NULL;
+    float *buffer = NULL;
+    bool mpg_inited = false;
     int ret;
 
-    if (mpg123_init() != MPG123_OK || ((m = mpg123_new(NULL, &ret)) == NULL) ||
-        mpg123_open(m, filename) != MPG123_OK) {
+    *buflen = 0;
+
+    if (mpg123_init() != MPG123_OK) {
         fprintf(stderr, "unable to init mpg\n");
         return NULL;
     }
+    mpg_inited = true;
 
-    /*turn off logging */
-    mpg123_param(m, MPG123_ADD_FLAGS, MPG123_QUIET, 0);
+    m = mpg123_new(NULL, &ret);
+    if (m == NULL) {
+        fprintf(stderr, "unable to init mpg\n");
+        goto fail;
+    }
 
-    off_t totalsamples;
+    if (mpg123_open(m, filename) != MPG123_OK) {
+        fprintf(stderr, "unable to open mp3\n");
+        goto fail;
+    }
 
+    mpg123_param(m, MPG123_ADD_FLAGS, MPG123_QUIET, 0);
     mpg123_scan(m);
-    totalsamples = mpg123_length(m);
-
-    int meta = mpg123_meta_check(m);
 
-    int channels, encoding;
+    {
+        off_t totalsamples = mpg123_length(m);
+        if (totalsamples <= 0) goto fail;
 
-    if (mpg123_getformat(m, sr, &channels, &encoding) != MPG123_OK) {
-        fprintf(stderr, "unable to get format\n");
-        return NULL;
-    }
-
-    mpg123_format_none(m);
-    mpg123_format(m, *sr, channels, encoding);
-
-    size_t decbuflen = mpg123_outblock(m);
-    unsigned char *decbuf = (unsigned char *)malloc(decbuflen);
-    if (decbuf == NULL) {
-        printf("mem alloc error\n");
-        return NULL;
-    }
-
-    unsigned int nbsamples = (nbsecs <= 0) ? totalsamples : nbsecs * (*sr);
-    nbsamples = (nbsamples < totalsamples) ? nbsamples : totalsamples;
+        int channels = 0, encoding = 0;
+        if (mpg123_getformat(m, sr, &channels, &encoding) != MPG123_OK) {
+            fprintf(stderr, "unable to get format\n");
+            goto fail;
+        }
+        if (channels <= 0 || *sr <= 0) goto fail;
 
-    size_t i, j, index = 0, done;
+        mpg123_format_none(m);
+        if (mpg123_format(m, *sr, channels, encoding) != MPG123_OK) goto fail;
 
-    float *buffer = (float *)malloc(nbsamples * sizeof(float));
-    *buflen = nbsamples;
+        size_t decbuflen = mpg123_outblock(m);
+        decbuf = (unsigned char *)malloc(decbuflen);
+        if (decbuf == NULL) {
+            fprintf(stderr, "mem alloc error\n");
+            goto fail;
+        }
 
-    do {
-        ret = mpg123_read(m, decbuf, decbuflen, &done);
-        switch (encoding) {
-            case MPG123_ENC_SIGNED_16:
-                for (i = 0; i < done / sizeof(short); i += channels) {
-                    buffer[index] = 0.0f;
-                    for (j = 0; j < channels; j++) {
-                        buffer[index] +=
-                            (float)(((short *)decbuf)[i + j]) / (float)SHRT_MAX;
+        unsigned int nbsamples =
+            (nbsecs <= 0) ? (unsigned int)totalsamples
+                          : (unsigned int)(nbsecs * (*sr));
+        if ((off_t)nbsamples > totalsamples) {
+            nbsamples = (unsigned int)totalsamples;
+        }
+        if (nbsamples == 0) goto fail;
+
+        buffer = (float *)malloc((size_t)nbsamples * sizeof(float));
+        if (buffer == NULL) goto fail;
+        *buflen = nbsamples;
+
+        size_t i, j, index = 0, done = 0;
+        do {
+            ret = mpg123_read(m, decbuf, decbuflen, &done);
+            switch (encoding) {
+                case MPG123_ENC_SIGNED_16: {
+                    size_t samples = done / sizeof(short);
+                    if (samples < (size_t)channels) break;
+                    size_t limit = samples - (size_t)channels + 1;
+                    for (i = 0; i < limit; i += channels) {
+                        float acc = 0.0f;
+                        for (j = 0; j < (size_t)channels; j++) {
+                            acc += (float)(((short *)decbuf)[i + j]) /
+                                   (float)SHRT_MAX;
+                        }
+                        buffer[index++] = acc / channels;
+                        if (index >= nbsamples) break;
                     }
-                    buffer[index++] /= channels;
-                    if (index >= nbsamples) break;
-                }
-                break;
-            case MPG123_ENC_SIGNED_8:
-                for (i = 0; i < done / sizeof(char); i += channels) {
-                    buffer[index] = 0.0f;
-                    for (j = 0; j < channels; j++) {
-                        buffer[index] +=
-                            (float)(((char *)decbuf)[i + j]) / (float)SCHAR_MAX;
+                } break;
+                case MPG123_ENC_SIGNED_8: {
+                    size_t samples = done / sizeof(char);
+                    if (samples < (size_t)channels) break;
+                    size_t limit = samples - (size_t)channels + 1;
+                    for (i = 0; i < limit; i += channels) {
+                        float acc = 0.0f;
+                        for (j = 0; j < (size_t)channels; j++) {
+                            acc += (float)(((char *)decbuf)[i + j]) /
+                                   (float)SCHAR_MAX;
+                        }
+                        buffer[index++] = acc / channels;
+                        if (index >= nbsamples) break;
                     }
-                    buffer[index++] /= channels;
-                    if (index >= nbsamples) break;
-                }
-                break;
-            case MPG123_ENC_FLOAT_32:
-                for (i = 0; i < done / sizeof(float); i += channels) {
-                    buffer[index] = 0.0f;
-                    for (j = 0; j < channels; j++) {
-                        buffer[index] += ((float *)decbuf)[i + j];
+                } break;
+                case MPG123_ENC_FLOAT_32: {
+                    size_t samples = done / sizeof(float);
+                    if (samples < (size_t)channels) break;
+                    size_t limit = samples - (size_t)channels + 1;
+                    for (i = 0; i < limit; i += channels) {
+                        float acc = 0.0f;
+                        for (j = 0; j < (size_t)channels; j++) {
+                            acc += ((float *)decbuf)[i + j];
+                        }
+                        buffer[index++] = acc / channels;
+                        if (index >= nbsamples) break;
                     }
-                    buffer[index++] /= channels;
-                    if (index >= nbsamples) break;
-                }
-                break;
-            default:
-                done = 0;
-        }
+                } break;
+                default:
+                    done = 0;
+            }
+        } while (ret == MPG123_OK && index < nbsamples);
 
-    } while (ret == MPG123_OK && index < nbsamples);
+        if (index == 0) {
+            free(buffer);
+            buffer = NULL;
+            *buflen = 0;
+            goto fail;
+        }
+        *buflen = (unsigned int)index;
+    }
 
     free(decbuf);
     mpg123_close(m);
     mpg123_delete(m);
     mpg123_exit();
-
     return buffer;
+
+fail:
+    free(decbuf);
+    if (m) {
+        mpg123_close(m);
+        mpg123_delete(m);
+    }
+    if (mpg_inited) mpg123_exit();
+    return buffer ? buffer : NULL;
 }
 
 #endif /*HAVE_LIBMPG123*/
 
 static float *readaudio_snd(const char *filename, long *sr, const float nbsecs,
                             unsigned int *buflen) {
+    *buflen = 0;
     SF_INFO sf_info;
     sf_info.format = 0;
     SNDFILE *sndfile = sf_open(filename, SFM_READ, &sf_info);
     if (sndfile == NULL) {
         return NULL;
     }
+    if (sf_info.channels <= 0 || sf_info.samplerate <= 0 ||
+        sf_info.frames <= 0) {
+        sf_close(sndfile);
+        return NULL;
+    }
 
-    /* normalize */
     sf_command(sndfile, SFC_SET_NORM_FLOAT, NULL, SF_TRUE);
 
     *sr = (long)sf_info.samplerate;
 
-    // allocate input buffer for signal
     unsigned int src_frames =
-        (nbsecs <= 0) ? sf_info.frames : (nbsecs * sf_info.samplerate);
-    src_frames = (sf_info.frames < src_frames) ? sf_info.frames : src_frames;
-    float *inbuf =
-        (float *)malloc(src_frames * sf_info.channels * sizeof(float));
+        (nbsecs <= 0) ? (unsigned int)sf_info.frames
+                      : (unsigned int)(nbsecs * sf_info.samplerate);
+    if ((sf_count_t)src_frames > sf_info.frames) {
+        src_frames = (unsigned int)sf_info.frames;
+    }
+    if (src_frames == 0) {
+        sf_close(sndfile);
+        return NULL;
+    }
+
+    float *inbuf = (float *)malloc((size_t)src_frames *
+                                   (size_t)sf_info.channels * sizeof(float));
+    if (inbuf == NULL) {
+        sf_close(sndfile);
+        return NULL;
+    }
 
-    /*read frames */
     sf_count_t cnt_frames = sf_readf_float(sndfile, inbuf, src_frames);
+    sf_close(sndfile);
+    if (cnt_frames <= 0) {
+        free(inbuf);
+        return NULL;
+    }
 
-    float *buf = (float *)malloc(cnt_frames * sizeof(float));
+    float *buf = (float *)malloc((size_t)cnt_frames * sizeof(float));
+    if (buf == NULL) {
+        free(inbuf);
+        return NULL;
+    }
 
-    // average across all channels
-    int i, j, indx = 0;
-    for (i = 0; i < cnt_frames * sf_info.channels; i += sf_info.channels) {
-        buf[indx] = 0;
-        for (j = 0; j < sf_info.channels; j++) {
-            buf[indx] += inbuf[i + j];
+    int indx = 0;
+    for (sf_count_t i = 0; i < cnt_frames * sf_info.channels;
+         i += sf_info.channels) {
+        float acc = 0.0f;
+        for (int j = 0; j < sf_info.channels; j++) {
+            acc += inbuf[i + j];
         }
-        buf[indx++] /= sf_info.channels;
+        buf[indx++] = acc / sf_info.channels;
     }
     free(inbuf);
 
-    *buflen = indx;
+    *buflen = (unsigned int)indx;
     return buf;
 }
 
-float *ph_readaudio2(const char *filename, int sr, float *sigbuf, int &buflen,
-                     const float nbsecs) {
-    long orig_sr;
-    float *inbuffer = NULL;
-    unsigned int inbufferlength;
+float *ph_readaudio2(const char *filename, int sr, float * /*sigbuf*/,
+                     int &buflen, const float nbsecs) {
     buflen = 0;
+    if (filename == NULL || sr <= 0) return NULL;
+
+    long orig_sr = 0;
+    float *inbuffer = NULL;
+    unsigned int inbufferlength = 0;
 
     const char *suffix = strrchr(filename, '.');
     if (suffix == NULL) return NULL;
     if (!strcasecmp(suffix + 1, "mp3")) {
 #ifdef HAVE_LIBMPG123
         inbuffer = readaudio_mp3(filename, &orig_sr, nbsecs, &inbufferlength);
-#endif /* HAVE_LIBMPG123 */
+#endif
     } else {
         inbuffer = readaudio_snd(filename, &orig_sr, nbsecs, &inbufferlength);
     }
 
-    if (inbuffer == NULL) {
+    if (inbuffer == NULL || inbufferlength == 0 || orig_sr <= 0) {
+        free(inbuffer);
         return NULL;
     }
 
-    /* resample float array */
-    /* set desired sr ratio */
     double sr_ratio = (double)(sr) / (double)orig_sr;
     if (src_is_valid_ratio(sr_ratio) == 0) {
         free(inbuffer);
         return NULL;
     }
 
-    /* allocate output buffer for conversion */
-    unsigned int outbufferlength = sr_ratio * inbufferlength;
-    float *outbuffer = (float *)malloc(outbufferlength * sizeof(float));
+    unsigned int outbufferlength =
+        (unsigned int)(sr_ratio * (double)inbufferlength) + 1;
+    float *outbuffer =
+        (float *)malloc((size_t)outbufferlength * sizeof(float));
     if (!outbuffer) {
         free(inbuffer);
         return NULL;
@@ -234,7 +306,6 @@ float *ph_readaudio2(const char *filename, int sr, float *sigbuf, int &buflen,
     src_data.end_of_input = SF_TRUE;
     src_data.src_ratio = sr_ratio;
 
-    /* sample rate conversion */
     if ((error = src_process(src_state, &src_data)) != 0) {
         free(inbuffer);
         free(outbuffer);
@@ -242,115 +313,117 @@ float *ph_readaudio2(const char *filename, int sr, float *sigbuf, int &buflen,
         return NULL;
     }
 
-    buflen = src_data.output_frames;
+    // libsamplerate fills output_frames_gen with the count actually produced;
+    // output_frames is the capacity the caller supplied. Using the latter
+    // (the original bug) over-reads outbuffer downstream.
+    buflen = (int)src_data.output_frames_gen;
 
     src_delete(src_state);
     free(inbuffer);
 
+    if (buflen <= 0) {
+        free(outbuffer);
+        return NULL;
+    }
+
     return outbuffer;
 }
 
-float *ph_readaudio(const char *filename, int sr, int channels, float *sigbuf,
-                    int &buflen, const float nbsecs) {
+float *ph_readaudio(const char *filename, int sr, int /*channels*/,
+                    float *sigbuf, int &buflen, const float nbsecs) {
     if (!filename || sr <= 0) return NULL;
     return ph_readaudio2(filename, sr, sigbuf, buflen, nbsecs);
 }
 
 uint32_t *ph_audiohash(float *buf, int N, int sr, int &nb_frames) {
-    int frame_length = 4096;  // 2^12
-    int nfft = frame_length;
-    int nfft_half = 2048;
-    int start = 0;
-    int end = start + frame_length - 1;
-    int overlap = (int)(31 * frame_length / 32);
-    int advance = frame_length - overlap;
-    int index = 0;
-    nb_frames = (int)(floor(N / advance) - floor(frame_length / advance) + 1);
-    double window[frame_length];
-    for (int i = 0; i < frame_length; i++) {
-        // hamming window
-        window[i] = 0.54 - 0.46 * cos(2 * M_PI * i / (frame_length - 1));
+    nb_frames = 0;
+    const int frame_length = 4096;
+    const int nfft = frame_length;
+    const int nfft_half = 2048;
+    if (buf == NULL || N < frame_length || sr <= 0) return NULL;
+
+    const int overlap = 31 * frame_length / 32;
+    const int advance = frame_length - overlap;
+    if (advance <= 0) return NULL;
+
+    nb_frames = (N - frame_length) / advance + 1;
+    if (nb_frames <= 0) {
+        nb_frames = 0;
+        return NULL;
     }
 
-    double frame[frame_length];
-    complex<double> *pF = new complex<double>[nfft];
-
-    double magnF[nfft_half];
-    double maxF = 0.0;
-    double maxB = 0.0;
-
-    double minfreq = 300;
-    double maxfreq = 3000;
-    double minbark = 6 * asinh(minfreq / 600.0);
-    double maxbark = 6 * asinh(maxfreq / 600.0);
-    double nyqbark = maxbark - minbark;
-    int nfilts = 33;
-    double stepbarks = nyqbark / (nfilts - 1);
-    int nb_barks = (int)(floor(nfft_half / 2 + 1));
-    double barkwidth = 1.06;
-
-    double freqs[nb_barks];
-    double binbarks[nb_barks];
-    double curr_bark[nfilts];
-    double prev_bark[nfilts];
-    for (int i = 0; i < nfilts; i++) {
-        prev_bark[i] = 0.0;
+    std::vector<double> window(frame_length);
+    for (int i = 0; i < frame_length; i++) {
+        window[i] = 0.54 - 0.46 * cos(2 * M_PI * i / (frame_length - 1));
     }
-    uint32_t *hash = (uint32_t *)malloc(nb_frames * sizeof(uint32_t));
-    double lof, hif;
 
+    std::vector<double> frame(frame_length);
+    std::vector<std::complex<double>> pF(nfft);
+    std::vector<double> magnF(nfft_half);
+
+    const double minfreq = 300;
+    const double maxfreq = 3000;
+    const double minbark = 6 * asinh(minfreq / 600.0);
+    const double maxbark = 6 * asinh(maxfreq / 600.0);
+    const double nyqbark = maxbark - minbark;
+    const int nfilts = 33;
+    const double stepbarks = nyqbark / (nfilts - 1);
+    const int nb_barks = nfft_half / 2 + 1;
+    const double barkwidth = 1.06;
+
+    std::vector<double> binbarks(nb_barks);
+    std::vector<double> curr_bark(nfilts, 0.0);
+    std::vector<double> prev_bark(nfilts, 0.0);
     for (int i = 0; i < nb_barks; i++) {
-        binbarks[i] = 6 * asinh(i * sr / nfft_half / 600.0);
-        freqs[i] = i * sr / nfft_half;
-    }
-    double **wts = new double *[nfilts];
-    for (int i = 0; i < nfilts; i++) {
-        wts[i] = new double[nfft_half];
-    }
-    for (int i = 0; i < nfilts; i++) {
-        for (int j = 0; j < nfft_half; j++) {
-            wts[i][j] = 0.0;
-        }
+        binbarks[i] = 6 * asinh((double)i * sr / nfft_half / 600.0);
     }
 
-    // calculate wts for each filter
+    // wts[i*nfft_half + j]
+    std::vector<double> wts((size_t)nfilts * (size_t)nfft_half, 0.0);
     for (int i = 0; i < nfilts; i++) {
         double f_bark_mid = minbark + i * stepbarks;
         for (int j = 0; j < nb_barks; j++) {
             double barkdiff = binbarks[j] - f_bark_mid;
-            lof = -2.5 * (barkdiff / barkwidth - 0.5);
-            hif = barkdiff / barkwidth + 0.5;
+            double lof = -2.5 * (barkdiff / barkwidth - 0.5);
+            double hif = barkdiff / barkwidth + 0.5;
             double m = std::min(lof, hif);
             m = std::min(0.0, m);
             m = pow(10, m);
-            wts[i][j] = m;
+            wts[(size_t)i * nfft_half + j] = m;
         }
     }
 
-    // p = fftw_plan_dft_r2c_1d(frame_length,frame,pF,FFTW_ESTIMATE);
+    uint32_t *hash = (uint32_t *)malloc((size_t)nb_frames * sizeof(uint32_t));
+    if (hash == NULL) {
+        nb_frames = 0;
+        return NULL;
+    }
 
-    while (end < N) {
-        maxF = 0.0;
-        maxB = 0.0;
+    int start = 0;
+    int end = start + frame_length - 1;
+    int index = 0;
+
+    while (end < N && index < nb_frames) {
+        double maxF = 0.0;
+        double maxB = 0.0;
         for (int i = 0; i < frame_length; i++) {
             frame[i] = window[i] * buf[start + i];
         }
-        // fftw_execute(p);
-        if (fft(frame, frame_length, pF) < 0) {
+        if (fft(frame.data(), frame_length, pF.data()) < 0) {
+            free(hash);
+            nb_frames = 0;
             return NULL;
         }
         for (int i = 0; i < nfft_half; i++) {
-            // magnF[i] = sqrt(pF[i][0]*pF[i][0] +  pF[i][1]*pF[i][1] );
-            magnF[i] = abs(pF[i]);
-            if (magnF[i] > maxF) {
-                maxF = magnF[i];
-            }
+            magnF[i] = std::abs(pF[i]);
+            if (magnF[i] > maxF) maxF = magnF[i];
         }
 
         for (int i = 0; i < nfilts; i++) {
             curr_bark[i] = 0;
+            const double *wts_row = &wts[(size_t)i * nfft_half];
             for (int j = 0; j < nfft_half; j++) {
-                curr_bark[i] += wts[i][j] * magnF[j];
+                curr_bark[i] += wts_row[j] * magnF[j];
             }
             if (curr_bark[i] > maxB) maxB = curr_bark[i];
         }
@@ -363,51 +436,43 @@ uint32_t *ph_audiohash(float *buf, int N, int sr, int &nb_frames) {
             if (H > 0) curr_hash |= 0x00000001;
         }
 
-        hash[index] = curr_hash;
-        for (int i = 0; i < nfilts; i++) {
-            prev_bark[i] = curr_bark[i];
-        }
-        index += 1;
+        hash[index++] = curr_hash;
+        for (int i = 0; i < nfilts; i++) prev_bark[i] = curr_bark[i];
         start += advance;
         end += advance;
     }
 
-    // fftw_destroy_plan(p);
-    // fftw_free(pF);
-    delete[] pF;
-    for (int i = 0; i < nfilts; i++) {
-        delete[] wts[i];
-    }
-    delete[] wts;
+    nb_frames = index;
     return hash;
 }
 
 int ph_bitcount(uint32_t n) {
-// parallel bit count
-#define MASK_01010101 (((uint32_t)(-1)) / 3)
-#define MASK_00110011 (((uint32_t)(-1)) / 5)
-#define MASK_00001111 (((uint32_t)(-1)) / 17)
-
-    n = (n & MASK_01010101) + ((n >> 1) & MASK_01010101);
-    n = (n & MASK_00110011) + ((n >> 2) & MASK_00110011);
-    n = (n & MASK_00001111) + ((n >> 4) & MASK_00001111);
-    return n % 255;
+    return __builtin_popcount(n);
 }
 
 double ph_compare_blocks(const uint32_t *ptr_blockA, const uint32_t *ptr_blockB,
                          const int block_size) {
+    if (block_size <= 0) return 1.0;
     double result = 0;
     for (int i = 0; i < block_size; i++) {
         uint32_t xordhash = ptr_blockA[i] ^ ptr_blockB[i];
         result += ph_bitcount(xordhash);
     }
-    result = result / (32 * block_size);
+    result = result / (32.0 * block_size);
     return result;
 }
+
 double *ph_audio_distance_ber(uint32_t *hash_a, const int Na, uint32_t *hash_b,
                               const int Nb, const float threshold,
                               const int block_size, int &Nc) {
-    uint32_t *ptrA, *ptrB;
+    Nc = 0;
+    if (hash_a == NULL || hash_b == NULL || Na <= 0 || Nb <= 0 ||
+        block_size <= 0) {
+        return NULL;
+    }
+
+    uint32_t *ptrA;
+    uint32_t *ptrB;
     int N1, N2;
     if (Na <= Nb) {
         ptrA = hash_a;
@@ -423,124 +488,74 @@ double *ph_audio_distance_ber(uint32_t *hash_a, const int Na, uint32_t *hash_b,
         N2 = Na;
     }
 
-    double *pC = new double[Nc];
-    if (!pC) return NULL;
-    int k, M, nb_above, nb_below, hash1_index, hash2_index;
-    double sum_above, sum_below, above_factor, below_factor;
+    double *pC = (double *)malloc((size_t)Nc * sizeof(double));
+    if (!pC) {
+        Nc = 0;
+        return NULL;
+    }
 
-    uint32_t *pha, *phb;
-    double *dist = NULL;
+    // Worst-case M occurs at i=0 (smallest constraint): floor(N1/block_size).
+    // Allocate once outside the loop instead of realloc'ing per iteration.
+    int max_M = N1 / block_size;
+    if (max_M < 1) max_M = 1;
+    double *dist = (double *)malloc((size_t)max_M * sizeof(double));
+    if (!dist) {
+        free(pC);
+        Nc = 0;
+        return NULL;
+    }
 
     for (int i = 0; i < Nc; i++) {
-        M = (int)floor(std::min(N1, N2 - i) / block_size);
-
-        pha = ptrA;
-        phb = ptrB + i;
-
-        double *tmp_dist = (double *)realloc(dist, M * sizeof(double));
-        if (!tmp_dist) {
-            return NULL;
+        int M = std::min(N1, N2 - i) / block_size;
+        if (M <= 0) {
+            pC[i] = 0.5;
+            continue;
         }
-        dist = tmp_dist;
-        dist[0] = ph_compare_blocks(pha, phb, block_size);
 
-        k = 1;
+        uint32_t *pha = ptrA;
+        uint32_t *phb = ptrB + i;
+        int hash1_index = 0;
+        int hash2_index = i;
+        int k = 0;
 
-        pha += block_size;
-        phb += block_size;
-
-        hash1_index = block_size;
-        hash2_index = i + block_size;
-
-        while ((hash1_index < N1 - block_size) &&
-               (hash2_index < N2 - block_size)) {
+        while (k < M && hash1_index + block_size <= N1 &&
+               hash2_index + block_size <= N2) {
             dist[k++] = ph_compare_blocks(pha, phb, block_size);
             hash1_index += block_size;
             hash2_index += block_size;
             pha += block_size;
             phb += block_size;
         }
-        sum_above = 0;
-        sum_below = 0;
-        nb_above = 0;
-        nb_below = 0;
-        for (int n = 0; n < M; n++) {
+
+        if (k == 0) {
+            pC[i] = 0.5;
+            continue;
+        }
+
+        double sum_above = 0, sum_below = 0;
+        for (int n = 0; n < k; n++) {
             if (dist[n] <= threshold) {
                 sum_below += 1 - dist[n];
-                nb_below++;
             } else {
                 sum_above += 1 - dist[n];
-                nb_above++;
             }
         }
-        above_factor = sum_above / M;
-        below_factor = sum_below / M;
+        double above_factor = sum_above / k;
+        double below_factor = sum_below / k;
         pC[i] = 0.5 * (1 + below_factor - above_factor);
     }
 
     free(dist);
     return pC;
 }
-#ifdef HAVE_PTHREAD
-
-void *ph_audio_thread(void *p) {
-    slice *s = (slice *)p;
-    for (int i = 0; i < s->n; ++i) {
-        DP *dp = (DP *)s->hash_p[i];
-        int N, count;
-        pair<int, int> *p = (pair<int, int> *)s->hash_params;
-        float *buf = ph_readaudio(dp->id, p->first, p->second, NULL, N);
-        uint32_t *hash = ph_audiohash(buf, N, p->first, count);
-        free(buf);
-        buf = NULL;
-        dp->hash = hash;
-        dp->hash_length = count;
-    }
-}
-
-DP **ph_audio_hashes(char *files[], int count, int sr, int channels,
-                     int threads) {
-    if (!files || count == 0) return NULL;
 
-    int num_threads;
-    if (threads > count) {
-        num_threads = count;
-    } else if (threads > 0) {
-        num_threads = threads;
-    } else {
-        num_threads = ph_num_threads();
-    }
-
-    DP **hashes = (DP **)malloc(count * sizeof(DP *));
-
-    for (int i = 0; i < count; ++i) {
-        hashes[i] = (DP *)malloc(sizeof(DP));
-        hashes[i]->id = strdup(files[i]);
-    }
-
-    pthread_t thds[num_threads];
-
-    int rem = count % num_threads;
-    int start = 0;
-    int off = 0;
-    slice *s = new slice[num_threads];
-    for (int n = 0; n < num_threads; ++n) {
-        off = (int)floor((count / (float)num_threads) +
-                         (rem > 0 ? num_threads - (count % num_threads) : 0));
-
-        s[n].hash_p = &hashes[start];
-        s[n].n = off;
-        s[n].hash_params = new pair<int, int>(sr, channels);
-        start += off;
-        --rem;
-        pthread_create(&thds[n], NULL, ph_audio_thread, &s[n]);
-    }
-    for (int i = 0; i < num_threads; ++i) {
-        pthread_join(thds[i], NULL);
-        delete (pair<int, int> *)s[i].hash_params;
-    }
-    delete[] s;
-
-    return hashes;
-}
-#endif
+/*
+ * The previous version of this file contained a HAVE_PTHREAD-gated
+ * ph_audio_thread / ph_audio_hashes pair that referenced types `slice` and
+ * `DP` and a function `ph_num_threads()` that were never declared anywhere in
+ * the public headers, so the block could not actually be compiled. The
+ * slicing math was also off-by-one: for count=10, threads=3 it produced
+ * (5, 3, 3) which walks one element past the input. The dead code has been
+ * removed; callers can drive ph_readaudio / ph_audiohash from their own
+ * thread pool.
+ */
diff --git a/src/ph_fft.cpp b/src/ph_fft.cpp
index db261576..0e5d81d3 100644
--- a/src/ph_fft.cpp
+++ b/src/ph_fft.cpp
@@ -24,10 +24,12 @@
 
 #include "ph_fft.h"
 
-void fft_calc(const int N, const double *x, complex<double> *X,
-              complex<double> *P, const int step,
-              const complex<double> *twids) {
-    complex<double> *S = P + N / 2;
+#include <vector>
+
+static void fft_calc(const int N, const double *x, std::complex<double> *X,
+                     std::complex<double> *P, const int step,
+                     const std::complex<double> *twids) {
+    std::complex<double> *S = P + N / 2;
     if (N == 1) {
         X[0] = x[0];
         return;
@@ -36,26 +38,25 @@ void fft_calc(const int N, const double *x, complex<double> *X,
     fft_calc(N / 2, x, S, X, 2 * step, twids);
     fft_calc(N / 2, x + step, P, X, 2 * step, twids);
 
-    int k;
-    for (k = 0; k < N / 2; k++) {
+    for (int k = 0; k < N / 2; k++) {
         P[k] = P[k] * twids[k * step];
         X[k] = S[k] + P[k];
         X[k + N / 2] = S[k] - P[k];
     }
 }
 
-int fft(double *x, int N, complex<double> *X) {
-    complex<double> *twiddle_factors = new complex<double>[N / 2];
-    complex<double> *Xt = new complex<double>[N];
+int fft(double *x, int N, std::complex<double> *X) {
+    if (x == NULL || X == NULL || N <= 0) return -1;
+    // Cooley-Tukey radix-2 requires N to be a power of 2.
+    if ((N & (N - 1)) != 0) return -1;
 
-    int k;
-    for (k = 0; k < N / 2; k++) {
-        twiddle_factors[k] = polar(1.0, 2.0 * M_PI * k / N);
-    }
-    fft_calc(N, x, X, Xt, 1, twiddle_factors);
+    std::vector<std::complex<double>> twiddle_factors(N / 2);
+    std::vector<std::complex<double>> Xt(N);
 
-    delete[](twiddle_factors);
-    delete[](Xt);
+    for (int k = 0; k < N / 2; k++) {
+        twiddle_factors[k] = std::polar(1.0, 2.0 * M_PI * k / N);
+    }
+    fft_calc(N, x, X, Xt.data(), 1, twiddle_factors.data());
 
     return 0;
 }
diff --git a/src/ph_fft.h b/src/ph_fft.h
index 69ece097..7631e552 100644
--- a/src/ph_fft.h
+++ b/src/ph_fft.h
@@ -25,10 +25,10 @@
 #ifndef _FFT_H
 #define _FFT_H
 
-#include <complex.h>
-#include <math.h>
-#include <stdlib.h>
-using namespace std;
-int fft(double *x, int N, complex<double> *X);
+#include <complex>
+#include <cmath>
+#include <cstdlib>
+
+int fft(double *x, int N, std::complex<double> *X);
 
 #endif

From b32ff93b479bff377fb7de6a27bce7999775fe45 Mon Sep 17 00:00:00 2001
From: pHash Audit <audit@local>
Date: Mon, 25 May 2026 11:25:15 -0700
Subject: [PATCH 2/2] Align audio hash params with Haitsma-Kalker reference

Audit \xc2\xa74: the bit derivation, filterbank shape, and confidence
score in ph_audiohash already matched Haitsma-Kalker 2002, but several
parameters did not:

- frame_length was hard-coded to 4096 samples regardless of sample
  rate. At the example's sr=8000 that is 512 ms, far from the paper's
  0.37 s frame. Now derived as the power of 2 closest to sr * 0.37.

- frame advance was frame_length / 32 (97% overlap) giving ~62 frames/s
  at sr=8000. The paper specifies 31.25 frames/s (~32 ms advance).
  Now derived as round(sr / 31.25).

- maxfreq was 3000 Hz; the paper specifies 2000 Hz. The previous range
  extended the upper band by 1 kHz beyond Haitsma-Kalker. Now 2000 Hz.

nfft_half is now frame_length / 2 (was hard-coded 2048). Bit derivation
and filter weights are unchanged.

Note: this changes the temporal density of the fingerprint. Callers
passing block_size to ph_audio_distance_ber will likely want a smaller
value (e.g. 64 instead of the example's 256) because the absolute
frame count for the same audio drops by ~2x. Hashes produced by this
code are not compatible with hashes produced by the old parameters.
---
 src/audiophash.cpp | 33 ++++++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/src/audiophash.cpp b/src/audiophash.cpp
index 08cfaf54..bfff707d 100644
--- a/src/audiophash.cpp
+++ b/src/audiophash.cpp
@@ -335,16 +335,32 @@ float *ph_readaudio(const char *filename, int sr, int /*channels*/,
     return ph_readaudio2(filename, sr, sigbuf, buflen, nbsecs);
 }
 
+// Round target up to the nearest power of 2, then choose whichever of
+// {that, that/2} is closer to target. Used to size FFT frames given a
+// target duration. The custom radix-2 FFT in ph_fft requires a power
+// of 2.
+static int closest_pow2(double target) {
+    if (target <= 1.0) return 1;
+    int p = 1;
+    while ((double)p < target) p *= 2;
+    int prev = p / 2;
+    return (target - (double)prev) > ((double)p - target) ? p : prev;
+}
+
 uint32_t *ph_audiohash(float *buf, int N, int sr, int &nb_frames) {
     nb_frames = 0;
-    const int frame_length = 4096;
+    if (buf == NULL || sr <= 0) return NULL;
+
+    // Haitsma-Kalker parameters: 0.37 s frames, 31.25 frames/sec
+    // (i.e. ~32 ms advance). Frame size is rounded to a power of 2
+    // because the bundled FFT is radix-2.
+    const int frame_length = closest_pow2((double)sr * 0.37);
     const int nfft = frame_length;
-    const int nfft_half = 2048;
-    if (buf == NULL || N < frame_length || sr <= 0) return NULL;
+    const int nfft_half = frame_length / 2;
+    if (N < frame_length) return NULL;
 
-    const int overlap = 31 * frame_length / 32;
-    const int advance = frame_length - overlap;
-    if (advance <= 0) return NULL;
+    int advance = (int)std::round((double)sr / 31.25);
+    if (advance <= 0) advance = 1;
 
     nb_frames = (N - frame_length) / advance + 1;
     if (nb_frames <= 0) {
@@ -361,8 +377,11 @@ uint32_t *ph_audiohash(float *buf, int N, int sr, int &nb_frames) {
     std::vector<std::complex<double>> pF(nfft);
     std::vector<double> magnF(nfft_half);
 
+    // Haitsma-Kalker: 33 log-spaced bands from 300 to 2000 Hz. The
+    // previous value of 3000 Hz extended the upper band by 1 kHz beyond
+    // the paper.
     const double minfreq = 300;
-    const double maxfreq = 3000;
+    const double maxfreq = 2000;
     const double minbark = 6 * asinh(minfreq / 600.0);
     const double maxbark = 6 * asinh(maxfreq / 600.0);
     const double nyqbark = maxbark - minbark;