Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
a3b33c6
port: drive low-pass + keep_const to poly Ouroboros
jmxpearson Jun 4, 2026
11677b5
port: autonomy_score + integrate_poly_autonomous + generate_autonomous
jmxpearson Jun 4, 2026
818f14b
port: int16-aware voc-window loaders + blk445 staging script
jmxpearson Jun 4, 2026
7013d11
feat: edge-biased segment sampler with onset/offset/mid categories
jmxpearson Jun 4, 2026
33a0cdd
port: multi-resolution STFT loss module (rollout_refine.py)
jmxpearson Jun 4, 2026
a419b27
feat: spectral-rollout training step + loss_mode='spectral_rollout' b…
jmxpearson Jun 4, 2026
3a766c2
feat: model_seed_cv_spectral -- seed loop + cull + cold-start selection
jmxpearson Jun 4, 2026
17d7187
feat: blk445 spectral training entry point
jmxpearson Jun 4, 2026
68ebcc4
docs: spectral_rollout_blk445 recipe
jmxpearson Jun 4, 2026
4aed0d4
gitignore: poly_spectral_*/ training output dirs
jmxpearson Jun 4, 2026
e9ad1ab
fix: move periodic save_model out of the val block
jmxpearson Jun 4, 2026
63f1a22
fix: spectral val-block continue was skipping save_model too
jmxpearson Jun 5, 2026
b553863
feat: spec_warmup_epochs + precomputed tf_var for stable spectral tra…
jmxpearson Jun 5, 2026
c0ac15c
feat: --max-saved retains all per-epoch checkpoints for long runs
jmxpearson Jun 5, 2026
ce570f2
fix: monitor latest-checkpoint sort + add spectrogram plot script
jmxpearson Jun 5, 2026
d56fabb
plot_blk445_specgram: share waveform y-axis between target and autono…
jmxpearson Jun 5, 2026
16d0d32
monitor diagnose CLI + plot_loss_panels auto-smoothing
jmxpearson Jun 5, 2026
3de020d
plot_loss_panels: dashed warmup boundary per run
jmxpearson Jun 5, 2026
3524155
perf: drop duplicate Mamba forward in spectral_rollout_step
jmxpearson Jun 6, 2026
0a654ab
perf: speed up spectral-rollout training loop hot path
jmxpearson Jun 6, 2026
5d56174
feat: env_warmup_epochs (mirror of spec_warmup_epochs)
jmxpearson Jun 6, 2026
dbbb84f
plot_loss_panels: env1e4 in tab:green
jmxpearson Jun 6, 2026
7ce85fe
build: pull torch from the cu126 index so it runs on Pascal GPUs
jmxpearson Jun 6, 2026
a31bc63
perf: pluggable RK4 rollout backend (cudagraph/compile) + pow2 horizo…
jmxpearson Jun 6, 2026
28ef0d4
config: H_max=2048 (clean pow2 buckets) + expose rollout_backend/pow2…
jmxpearson Jun 6, 2026
6fbaded
feat: scan-based RK4 rollout core (rollout_backend='scan'), experimental
jmxpearson Jun 6, 2026
3f9b28c
plot_loss_panels: env1e5 in tab:red
jmxpearson Jun 6, 2026
ff13404
feat: osc_init van der Pol limit-cycle initialization (Strategy 1)
jmxpearson Jun 6, 2026
bc127d1
plot_loss_panels: osc1e5 in tab:purple
jmxpearson Jun 6, 2026
e81607a
feat: --lam-env-log log-ratio envelope loss
jmxpearson Jun 6, 2026
5fb1c71
plot_loss_panels: osc_none in tab:cyan, osc_log1e4 in tab:brown
jmxpearson Jun 6, 2026
c8df1f2
feat: signed amp_pen alongside autonomy_score (~zero extra compute go…
jmxpearson Jun 7, 2026
22eb3b7
feat: --spec-warmup-steps / --env-warmup-steps / --H-total-steps
jmxpearson Jun 7, 2026
b5411e6
plot_loss_panels: full_osc in tab:olive
jmxpearson Jun 7, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,6 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

# Training outputs from this PR's pipeline (checkpoints, plots, manifests, TB logs).
poly_spectral_*/
111 changes: 101 additions & 10 deletions data/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,14 @@ class aud_neur_ds(Dataset):
"""
think about how to change this to accept lists, rather than arrays
(probably just map, but make sure this works with arrays still)

If `categories` is passed (np.ndarray of int, shape (N,)), __getitem__ returns a
4-tuple (x, dxdt, dx2dt2, category) instead of the legacy 3-tuple. Used by the
edge-biased sampler so the training step knows which examples are ONSET/OFFSET/MID
(and should get the cold-start noise IC for the spectral rollout).
"""

def __init__(self, data, deriv_approx="nine-point", dxdt=[], dx2dt2=[]):
def __init__(self, data, deriv_approx="nine-point", dxdt=[], dx2dt2=[], categories=None):
self.x = data
if len(dxdt) > 0:
self.dxdt = dxdt
Expand All @@ -41,6 +46,13 @@ def __init__(self, data, deriv_approx="nine-point", dxdt=[], dx2dt2=[]):
)
elif deriv_approx == "nine-point":
self.dx2dt2 = deriv_approx_d2y(self.x)
# Optional per-example category labels (0=ONSET, 1=OFFSET, 2=MID) used by the
# edge-biased training loop to pick which examples get the cold-start noise IC.
if categories is not None:
assert len(categories) == self.x.shape[0], "categories must match data length"
self.categories = np.asarray(categories, dtype=np.int64)
else:
self.categories = None

def __len__(self):

Expand All @@ -52,12 +64,17 @@ def __getitem__(self, idx):
dxdt = self.dxdt[idx]
dx2dt2 = self.dx2dt2[idx]

# Store batches as float32 -- the model runs in float32, so float64 here just
# doubled the host->device transfer bytes and forced a post-copy GPU cast. The
# derivative numpy arrays remain float64; only the per-item tensor is downcast.
x, dxdt, dx2dt2 = (
torch.from_numpy(x).type(torch.DoubleTensor),
torch.from_numpy(dxdt).type(torch.DoubleTensor),
torch.from_numpy(dx2dt2).type(torch.DoubleTensor),
torch.from_numpy(x).type(torch.FloatTensor),
torch.from_numpy(dxdt).type(torch.FloatTensor),
torch.from_numpy(dx2dt2).type(torch.FloatTensor),
)

if self.categories is not None:
return x, dxdt, dx2dt2, int(self.categories[idx])
return x, dxdt, dx2dt2

def interpolate_oversample(self, oversample_prop, dt):
Expand Down Expand Up @@ -125,17 +142,20 @@ def get_loaders(
if oversample_prop > 1:
dsVal.interpolate_oversample(oversample_prop=oversample_prop, dt=dt)
dls["val"] = DataLoader(
dsVal, num_workers=num_workers, batch_size=batch_size, shuffle=False
dsVal, num_workers=num_workers, batch_size=batch_size, shuffle=False,
pin_memory=True,
)
dsTrain, dsTest = aud_neur_ds(X_train), aud_neur_ds(X_test)
if oversample_prop > 1:
dsTrain.interpolate_oversample(oversample_prop=oversample_prop, dt=dt)
dsTest.interpolate_oversample(oversample_prop=oversample_prop, dt=dt)
dls["train"] = DataLoader(
dsTrain, num_workers=num_workers, batch_size=batch_size, shuffle=True
dsTrain, num_workers=num_workers, batch_size=batch_size, shuffle=True,
pin_memory=True,
)
dls["test"] = DataLoader(
dsTest, num_workers=num_workers, batch_size=batch_size, shuffle=False
dsTest, num_workers=num_workers, batch_size=batch_size, shuffle=False,
pin_memory=True,
)

return dls
Expand Down Expand Up @@ -200,7 +220,8 @@ def get_loaders_interp(
if oversample_prop > 1:
dsVal.interpolate_oversample(oversample_prop=oversample_prop, dt=dt)
dls["val"] = DataLoader(
dsVal, num_workers=num_workers, batch_size=batch_size, shuffle=False
dsVal, num_workers=num_workers, batch_size=batch_size, shuffle=False,
pin_memory=True,
)
t_test = np.tile(t[None, :], (X_test.shape[0], 1))

Expand All @@ -210,10 +231,80 @@ def get_loaders_interp(
dsTrain.interpolate_oversample(oversample_prop=oversample_prop, dt=dt)
dsTest.interpolate_oversample(oversample_prop=oversample_prop, dt=dt)
dls["train"] = DataLoader(
dsTrain, num_workers=num_workers, batch_size=batch_size, shuffle=True
dsTrain, num_workers=num_workers, batch_size=batch_size, shuffle=True,
pin_memory=True,
)
dls["test"] = DataLoader(
dsTest, num_workers=num_workers, batch_size=batch_size, shuffle=False
dsTest, num_workers=num_workers, batch_size=batch_size, shuffle=False,
pin_memory=True,
)

return dls


# --- Edge-biased loaders for spectral-rollout training -------------------------------

def _stratified_split(N, categories, test_size, seed):
"""Per-category random split into (train_idx, test_idx); preserves the ratio."""
rng = np.random.default_rng(seed)
train_idx, test_idx = [], []
for cat in np.unique(categories):
idx = np.where(categories == cat)[0]
rng.shuffle(idx)
n_test = max(1, int(round(test_size * len(idx)))) if len(idx) > 1 else 0
test_idx.append(idx[:n_test])
train_idx.append(idx[n_test:])
return np.concatenate(train_idx), np.concatenate(test_idx)


def get_loaders_edge(
data,
categories,
num_workers=4,
batch_size=8,
train_size=0.8,
cv=True,
seed=None,
):
"""
DataLoaders for the edge-biased segment sampler. `data` is (N, L, 1) and `categories`
is (N,) int (ONSET=0/OFFSET=1/MID=2). Returns a dict {train, [val,] test} of loaders
whose datasets carry the matching per-example category labels so the training step
can drive the cold-start IC selection.

Stratified split: each category is split independently with the same train_size, so
val/test never have zero ONSET examples even when the ratio is small.
"""
data = np.asarray(data)
categories = np.asarray(categories, dtype=np.int64)
test_size = 1 - train_size

train_i, holdout = _stratified_split(len(data), categories, test_size, seed)
dls = {}
if cv:
# Stratified 50/50 split of the held-out tail into val and test (in local coords,
# then mapped back to globals via the `holdout` index array).
val_local, test_local = _stratified_split(
len(holdout), categories[holdout], test_size=0.5, seed=seed
)
val_idx = holdout[val_local]
test_idx = holdout[test_local]
dsVal = aud_neur_ds(data[val_idx], categories=categories[val_idx])
dls["val"] = DataLoader(
dsVal, num_workers=num_workers, batch_size=batch_size, shuffle=False,
pin_memory=True,
)
else:
test_idx = holdout

dsTrain = aud_neur_ds(data[train_i], categories=categories[train_i])
dsTest = aud_neur_ds(data[test_idx], categories=categories[test_idx])
dls["train"] = DataLoader(
dsTrain, num_workers=num_workers, batch_size=batch_size, shuffle=True,
pin_memory=True,
)
dls["test"] = DataLoader(
dsTest, num_workers=num_workers, batch_size=batch_size, shuffle=False,
pin_memory=True,
)
return dls
163 changes: 163 additions & 0 deletions data/load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,3 +197,166 @@ def get_segmented_audio(
)

return audio_segments, sr


# --- Edge-biased segment sampler for spectral-rollout training -----------------------

ONSET = 0
OFFSET = 1
MID = 2


def get_audio_training_edge_weighted(
audio_files: list[str],
seg_files: list[str],
*,
context_len: float,
edge_ms: float = 10.0,
silence_prefix_ms: float = 25.0,
silence_suffix_ms: float = 25.0,
ratio=(0.4, 0.4, 0.2),
max_segs: int = 5000,
seed: int = 0,
int16_norm: bool = True,
):
"""
Categorized segment sampler that heavily oversamples syllable onsets and offsets.

For each (wav, txt) pair, three pools of fixed-length (context_len seconds) windows
are built:
- ONSET (cat=0): window starts at on - silence_prefix_ms (real pre-onset audio
forms the silence-noise prefix; first context_len samples).
- OFFSET (cat=1): window ends at off + silence_suffix_ms (real post-offset audio
forms the silence tail; last context_len samples).
- MID (cat=2): non-overlapping windows of length context_len lying strictly inside
[on + edge_ms, off - edge_ms].

The pools are then sampled to `max_segs` segments with category-counts proportional to
`ratio` (with replacement only if a pool is exhausted). Returns
(segments_list, categories_array, sr).

The cold-start training step gets the cold-start noise IC only on ONSET examples
(categories == ONSET); OFFSET/MID examples use the data IC. See
train/spectral_rollout.py.
"""
rng = np.random.default_rng(seed)
pools = {ONSET: [], OFFSET: [], MID: []}
sr = None

for a, s in zip(audio_files, seg_files):
if not (os.path.isfile(a) and os.path.isfile(s)):
continue
sr, aud = wavfile.read(a)
if aud.dtype == np.int16 and int16_norm:
aud = aud / -np.iinfo(aud.dtype).min
aud = aud.astype(np.float64)

with warnings.catch_warnings():
warnings.simplefilter("ignore")
on_off = np.loadtxt(s, usecols=(0, 1))
if on_off.size == 0:
continue
if on_off.ndim == 1:
on_off = on_off[None, :]

L_seg = int(round(context_len * sr))
pre = int(round(silence_prefix_ms / 1000.0 * sr))
suff = int(round(silence_suffix_ms / 1000.0 * sr))
edge = int(round(edge_ms / 1000.0 * sr))

for on_s, off_s in on_off:
on_i = int(round(on_s * sr))
off_i = int(round(off_s * sr))

# ONSET: window starts pre samples before onset
start = on_i - pre
end = start + L_seg
if start >= 0 and end <= len(aud) and end > on_i:
pools[ONSET].append(aud[start:end])

# OFFSET: window ends suff samples after offset
end = off_i + suff
start = end - L_seg
if start >= 0 and end <= len(aud) and start < off_i:
pools[OFFSET].append(aud[start:end])

# MID: non-overlapping windows strictly inside (on+edge, off-edge)
mid_start = on_i + edge
mid_end = off_i - edge
if mid_end - mid_start >= L_seg:
for ms in range(mid_start, mid_end - L_seg + 1, L_seg):
pools[MID].append(aud[ms:ms + L_seg])

# If max_segs is 0 use the whole pool; otherwise sample to that target with the ratio.
pool_sizes = {c: len(pools[c]) for c in (ONSET, OFFSET, MID)}
if sum(pool_sizes.values()) == 0:
return [], np.array([], dtype=np.int64), sr

if max_segs <= 0:
# Return everything, no resampling.
out_segs, out_cats = [], []
for cat in (ONSET, OFFSET, MID):
for seg in pools[cat]:
out_segs.append(seg.reshape(-1, 1).astype(np.float32))
out_cats.append(cat)
order = rng.permutation(len(out_segs))
return ([out_segs[i] for i in order],
np.array([out_cats[i] for i in order], dtype=np.int64),
sr)

ratio = np.asarray(ratio, dtype=np.float64)
ratio = ratio / ratio.sum()
counts = np.round(ratio * max_segs).astype(int)
counts[-1] = max_segs - counts[:-1].sum() # exact total

out_segs, out_cats = [], []
for cat, n in zip((ONSET, OFFSET, MID), counts):
pool = pools[cat]
if not pool or n <= 0:
continue
replace = n > len(pool)
idx = rng.choice(len(pool), size=n, replace=replace)
for i in idx:
out_segs.append(pool[i].reshape(-1, 1).astype(np.float32))
out_cats.append(cat)

order = rng.permutation(len(out_segs))
return ([out_segs[i] for i in order],
np.array([out_cats[i] for i in order], dtype=np.int64),
sr)


def get_segmented_audio_edge_weighted(
audio_path: str,
seg_path: str,
*,
audio_id: str = ".wav",
context_len: float,
edge_ms: float = 10.0,
silence_prefix_ms: float = 25.0,
silence_suffix_ms: float = 25.0,
ratio=(0.4, 0.4, 0.2),
max_segs: int = 5000,
seed: int = 0,
shuffle_files: bool = True,
):
"""Convenience wrapper: glob audio files from audio_path, pair with seg_path/*.txt,
then call get_audio_training_edge_weighted. Returns (segments, categories, sr).
"""
gen = np.random.default_rng(seed)
audio_files = glob.glob(os.path.join(audio_path, "*" + audio_id))
if shuffle_files:
order = gen.permutation(len(audio_files))
audio_files = [audio_files[o] for o in order]
audio_tags = [a.split("/")[-1].split(audio_id)[0] for a in audio_files]
seg_files = [os.path.join(seg_path, a + ".txt") for a in audio_tags]
return get_audio_training_edge_weighted(
audio_files, seg_files,
context_len=context_len,
edge_ms=edge_ms,
silence_prefix_ms=silence_prefix_ms,
silence_suffix_ms=silence_suffix_ms,
ratio=ratio,
max_segs=max_segs,
seed=seed,
)
Loading