From b6c84c4094a7a2742e908ad13b7c9fa401ea6d9a Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Fri, 19 Jun 2026 23:17:55 +0200 Subject: [PATCH 01/33] ngmix: restore original-PSF fit; split the two PSF families by name (#749) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Metacalibration has two PSFs and ngmix was exporting only one of them under both column families, mislabeled. Fix the substance and the names together. Substance — the original image PSF is fit again. average_original_psf fits each epoch's pristine gal_obs.psf (the psfex/mccd model stamp) with the existing psf_runner, BEFORE boot.go reconvolves it, weight-averaged by the galaxy obs.weight.sum() — the same scheme average_multiepoch_psf uses for the reconvolution kernel. A shared _average_psf_fits core backs both averagers; do_ngmix_metacal now returns (resdict, psf_res, psfo_res), with psfo_res computed before boot.go so it sees the un-reconvolved PSF. Names — the two families are now self-naming and never alias: *_psf_orig original image PSF (average_original_psf) *_psf_reconv reconvolution kernel (average_multiepoch_psf) Each carries g1/g2 (+_err), T (+_err) and r50 (+_err). The back-fill and compile_results emit loop copy these keys straight through, so the column name is the value's provenance. Galaxy ellipticity is the scalar g1/g2 pair. The previous code emitted one reconvolution-kernel result under both g*_psfo_ngmix and g1_psf/g2_psf; that double-emit is gone. Docstrings (compile_results, the two averagers, do_ngmix_metacal) now name both PSFs explicitly; the old "psfo is the original image psf" note — false since the original fit was dropped — is replaced by an accurate one. Co-Authored-By: Claude Opus 4.8 --- src/shapepipe/modules/ngmix_package/ngmix.py | 278 +++++++++++++------ 1 file changed, 199 insertions(+), 79 deletions(-) diff --git a/src/shapepipe/modules/ngmix_package/ngmix.py b/src/shapepipe/modules/ngmix_package/ngmix.py index 386c25ef..ced2d392 100644 --- a/src/shapepipe/modules/ngmix_package/ngmix.py +++ b/src/shapepipe/modules/ngmix_package/ngmix.py @@ -355,8 +355,19 @@ def compile_results(self, results): Returns ------- dict - Compiled results ready to be written to a file - note: psfo is the original image psf from psfex or mccd + Compiled results ready to be written to a file. + + Two PSF column families — each carrying ellipticity *and* size, + for *different* PSFs (shapepipe#749): + + * ``*_psf_orig`` (``g1``/``g2`` + ``*_err``, ``T``, ``r50``) — the + ORIGINAL image PSF (the psfex/mccd model stamp), fit before + metacal reconvolution by :func:`average_original_psf`. This is + the PSF whose true ellipticity and size enter object-wise + PSF-leakage diagnostics. + * ``*_psf_reconv`` — the metacal RECONVOLUTION kernel (round and + enlarged by construction, used for the Tgal/Tpsf size cut and a + g~0 sanity check), fit by :func:`average_multiepoch_psf`. Raises ------ @@ -378,31 +389,40 @@ def compile_results(self, results): 'n_epoch_model', 'mcal_types_fail', 'nfev_fit', - 'g1_psfo_ngmix', - 'g2_psfo_ngmix', - 'g1_err_psfo_ngmix', - 'g2_err_psfo_ngmix', + # galaxy 'g1', 'g1_err', 'g2', 'g2_err', 'T', 'T_err', - 'Tpsf', - 'Tpsf_err', 'r50', 'r50_err', - 'r50psf', - 'r50psf_err', - 'g1_psf', - 'g2_psf', 'flux', 'flux_err', 's2n', 'mag', 'mag_err', 'flags', - 'mcal_flags' + 'mcal_flags', + # original image PSF (psfex/mccd), fit by average_original_psf + 'g1_psf_orig', + 'g2_psf_orig', + 'g1_err_psf_orig', + 'g2_err_psf_orig', + 'T_psf_orig', + 'T_err_psf_orig', + 'r50_psf_orig', + 'r50_err_psf_orig', + # metacal reconvolution kernel, fit by average_multiepoch_psf + 'g1_psf_reconv', + 'g2_psf_reconv', + 'g1_err_psf_reconv', + 'g2_err_psf_reconv', + 'T_psf_reconv', + 'T_err_psf_reconv', + 'r50_psf_reconv', + 'r50_err_psf_reconv', ] output_dict = {k: {kk: [] for kk in names2} for k in names} for idx in range(len(results)): @@ -439,24 +459,23 @@ def compile_results(self, results): output_dict[name]["nfev_fit"].append( fit.get("nfev", np.nan) ) - output_dict[name]["g1_psfo_ngmix"].append( - results[idx]["g_PSFo"][0] - ) - output_dict[name]["g2_psfo_ngmix"].append( - results[idx]["g_PSFo"][1] - ) - output_dict[name]["g1_err_psfo_ngmix"].append( - results[idx]["g_err_PSFo"][0] - ) - output_dict[name]["g2_err_psfo_ngmix"].append( - results[idx]["g_err_PSFo"][1] - ) - output_dict[name]["T"].append(T_gal) - output_dict[name]["T_err"].append(T_gal_err) - output_dict[name]["Tpsf"].append(results[idx]["T_PSFo"]) - output_dict[name]["Tpsf_err"].append(results[idx]["T_err_PSFo"]) - output_dict[name]["g1_psf"].append(results[idx]["g_PSFo"][0]) - output_dict[name]["g2_psf"].append(results[idx]["g_PSFo"][1]) + # The two PSF families are object-level (one value per + # object, not per shear type) and self-named: every key + # below is copied straight through from compile-loop input to + # output, so the column name *is* the value's provenance. + # *_psf_orig = original image PSF (average_original_psf) + # *_psf_reconv = reconvolution kernel (average_multiepoch_psf) + for psf_key in ( + 'g1_psf_orig', 'g2_psf_orig', + 'g1_err_psf_orig', 'g2_err_psf_orig', + 'T_psf_orig', 'T_err_psf_orig', + 'r50_psf_orig', 'r50_err_psf_orig', + 'g1_psf_reconv', 'g2_psf_reconv', + 'g1_err_psf_reconv', 'g2_err_psf_reconv', + 'T_psf_reconv', 'T_err_psf_reconv', + 'r50_psf_reconv', 'r50_err_psf_reconv', + ): + output_dict[name][psf_key].append(results[idx][psf_key]) # Galaxy half-light radius from the fitted area T = 2 sigma^2: # r50 = sqrt(ln 2 * T), with d r50 / d T = r50 / (2 T) @@ -465,16 +484,14 @@ def compile_results(self, results): r50_gal_err = r50_gal * T_gal_err / (2 * T_gal) else: r50_gal = r50_gal_err = np.nan - output_dict[name]['r50'].append(r50_gal) - output_dict[name]['r50_err'].append(r50_gal_err) - output_dict[name]['r50psf'].append(results[idx]["r50_PSFo"]) - output_dict[name]['r50psf_err'].append( - results[idx]["r50_err_PSFo"] - ) output_dict[name]["g1"].append(g[0]) output_dict[name]["g2"].append(g[1]) output_dict[name]["g1_err"].append(np.sqrt(g_cov[0, 0])) output_dict[name]["g2_err"].append(np.sqrt(g_cov[1, 1])) + output_dict[name]["T"].append(T_gal) + output_dict[name]["T_err"].append(T_gal_err) + output_dict[name]['r50'].append(r50_gal) + output_dict[name]['r50_err'].append(r50_gal_err) output_dict[name]["flux"].append(flux) output_dict[name]["flux_err"].append(flux_err) output_dict[name]["mag"].append(mag) @@ -662,7 +679,7 @@ def process(self): if tile_cat.flux is not None else 1.0 ) - res, psf_res = do_ngmix_metacal( + res, psf_res, psfo_res = do_ngmix_metacal( stamp, prior, flux_guess, @@ -689,17 +706,38 @@ def process(self): if res.get(k, {}).get('flags', 0) != 0 ) res['mcal_flags'] = get_mcal_flags(res) + # Two distinct PSF families (shapepipe#749), each with its own + # ellipticity AND size, written under self-naming res-keys: + # reconvolution kernel (psf_res) -> *_psf_reconv + # original image PSF (psfo_res) -> *_psf_orig # PSF half-light radius r50 = sqrt(2 ln 2) * sigma with - # sigma = sqrt(T / 2); error from d sigma / d T = 1 / (4 sigma) - sigma_psfo = np.sqrt(max(psf_res['T_psf'], 0) / 2) - res['g_PSFo'] = psf_res['g_psf'] - res['g_err_PSFo'] = psf_res['g_psf_err'] - res['T_PSFo'] = psf_res['T_psf'] - res['T_err_PSFo'] = psf_res['T_psf_err'] - res['r50_PSFo'] = SIGMA_TO_R50 * sigma_psfo - res['r50_err_PSFo'] = ( - SIGMA_TO_R50 * psf_res['T_psf_err'] / (4 * sigma_psfo) - if sigma_psfo > 0 else np.nan + # sigma = sqrt(T / 2); error from d sigma / d T = 1 / (4 sigma). + def _psf_r50(T_psf, T_psf_err): + sigma = np.sqrt(max(T_psf, 0) / 2) + r50 = SIGMA_TO_R50 * sigma + r50_err = ( + SIGMA_TO_R50 * T_psf_err / (4 * sigma) + if sigma > 0 else np.nan + ) + return r50, r50_err + + res['g1_psf_reconv'] = psf_res['g_psf'][0] + res['g2_psf_reconv'] = psf_res['g_psf'][1] + res['g1_err_psf_reconv'] = psf_res['g_psf_err'][0] + res['g2_err_psf_reconv'] = psf_res['g_psf_err'][1] + res['T_psf_reconv'] = psf_res['T_psf'] + res['T_err_psf_reconv'] = psf_res['T_psf_err'] + res['r50_psf_reconv'], res['r50_err_psf_reconv'] = _psf_r50( + psf_res['T_psf'], psf_res['T_psf_err'] + ) + res['g1_psf_orig'] = psfo_res['g_psf'][0] + res['g2_psf_orig'] = psfo_res['g_psf'][1] + res['g1_err_psf_orig'] = psfo_res['g_psf_err'][0] + res['g2_err_psf_orig'] = psfo_res['g_psf_err'][1] + res['T_psf_orig'] = psfo_res['T_psf'] + res['T_err_psf_orig'] = psfo_res['T_psf_err'] + res['r50_psf_orig'], res['r50_err_psf_orig'] = _psf_r50( + psfo_res['T_psf'], psfo_res['T_psf_err'] ) final_res.append(res) n_fitted += 1 @@ -1122,55 +1160,126 @@ def make_ngmix_observation( noise=noise_img, ) -def average_multiepoch_psf(obsdict): - """ averages psf information over multiple epochs - we may need to do this for original psf as well +def _average_psf_fits(results_and_weights): + """Weight-average a set of per-epoch ngmix PSF-fit results. + + Shared core for both PSF families this module exports: the metacal + reconvolution kernel (:func:`average_multiepoch_psf`) and the original + image PSF (:func:`average_original_psf`). Epochs whose PSF fit failed + (``flags != 0``, carrying only flags/pars and no T/g) are dropped. + Parameters ---------- - obsdict : dict - Observation dict returned by MetacalBootstrapper.go(). + results_and_weights : iterable of (dict, float) + Per-epoch ``(result, weight)`` pairs, where ``result`` is an ngmix + Fitter result with keys ``flags``, ``g``, ``g_err``, ``T``, + ``T_err`` and ``weight`` is the epoch's averaging weight. Returns ------- dict - Keys: 'g_psf', 'g_psf_err', 'T_psf', 'T_psf_err' (weighted - averages over the epochs whose PSF fit succeeded) and 'n_epoch' - (the number of those surviving epochs). + Keys ``g_psf``, ``g_psf_err``, ``T_psf``, ``T_psf_err`` (weighted + averages over the surviving epochs) and ``n_epoch`` (their count). """ - psf_dict = {} - nepoch = len(obsdict['noshear']) n_epoch_used = 0 wsum = 0 g_psf_sum = np.array([0., 0.]) g_psf_err_sum = np.array([0., 0.]) T_psf_sum = 0 T_psf_err_sum = 0 - for n_e in np.arange(nepoch): - result = obsdict['noshear'][n_e].psf.meta['result'] - # ignore_failed_psf=True drops failed-PSF epochs from the galaxy - # fit but keeps them in obsdict; their result carries only - # flags/pars (no T/g), so skip them here too. + for result, weight in results_and_weights: if result['flags'] != 0: continue - ne_wsum = obsdict['noshear'][n_e].weight.sum() - n_epoch_used += 1 - wsum += ne_wsum - g_psf_sum += result['g'] * ne_wsum - g_psf_err_sum += result['g_err'] * ne_wsum - T_psf_sum += result['T'] * ne_wsum - T_psf_err_sum += result['T_err'] * ne_wsum + wsum += weight + g_psf_sum += result['g'] * weight + g_psf_err_sum += result['g_err'] * weight + T_psf_sum += result['T'] * weight + T_psf_err_sum += result['T_err'] * weight if wsum == 0: raise ZeroDivisionError('Sum of weights = 0, division by zero') - psf_dict['g_psf'] = g_psf_sum / wsum - psf_dict['g_psf_err'] = g_psf_err_sum / wsum - psf_dict['T_psf'] = T_psf_sum / wsum - psf_dict['T_psf_err'] = T_psf_err_sum / wsum - psf_dict['n_epoch'] = n_epoch_used + return { + 'g_psf': g_psf_sum / wsum, + 'g_psf_err': g_psf_err_sum / wsum, + 'T_psf': T_psf_sum / wsum, + 'T_psf_err': T_psf_err_sum / wsum, + 'n_epoch': n_epoch_used, + } + + +def average_multiepoch_psf(obsdict): + """Average the metacal *reconvolution* PSF over epochs. + + The PSF carried by each metacal observation (``obs.psf``) is the + Gaussian reconvolution kernel that metacal fit and convolved back in — + round by construction and slightly enlarged relative to the original + PSF. This is the kernel defining the sheared galaxy images, exported to + the reconvolution-kernel columns + (``NGMIX_G1/G2_PSF_RECONV``, ``NGMIX_T_PSF_RECONV``). The independent fit + of the *original* image PSF is :func:`average_original_psf`. + + Parameters + ---------- + obsdict : dict + Observation dict returned by MetacalBootstrapper.go(). - return psf_dict + Returns + ------- + dict + Keys: 'g_psf', 'g_psf_err', 'T_psf', 'T_psf_err' (weighted + averages over the epochs whose PSF fit succeeded) and 'n_epoch' + (the number of those surviving epochs). + """ + # ignore_failed_psf=True drops failed-PSF epochs from the galaxy fit but + # keeps them in obsdict; _average_psf_fits skips them on flags != 0. + return _average_psf_fits( + (obs.psf.meta['result'], obs.weight.sum()) + for obs in obsdict['noshear'] + ) + + +def average_original_psf(gal_obs_list, psf_runner): + """Fit and average the *original* image PSF over epochs. + + The original PSF is the psfex/mccd model stamp handed to ngmix + (``gal_obs.psf``), fit here with the same ``psf_runner`` machinery — and + so the same ``psf_fit_prior`` — used inside metacal, but on the PSF + *before* metacal's reconvolution. Exported to the original-PSF columns + (``NGMIX_G1/G2_PSF_ORIG``, ``NGMIX_T_PSF_ORIG``). Distinct from the + reconvolution-kernel fit (:func:`average_multiepoch_psf`): the original + PSF retains its true ellipticity and size, whereas the reconvolution + kernel is round and enlarged by construction. This is the PSF whose true + shape and size enter object-wise PSF-leakage diagnostics. + + Epochs are weighted by the *galaxy* inverse-variance weight + (``gal_obs.weight.sum()``), matching :func:`average_multiepoch_psf`, so + the two PSF families share an averaging scheme and differ only in which + PSF is fit. + + Parameters + ---------- + gal_obs_list : ngmix.observation.ObsList + Per-epoch galaxy observations; each ``gal_obs.psf`` is the original + (pre-metacal) PSF observation to fit, with no further ``.psf`` of + its own so the runner fits the stamp itself. + psf_runner : ngmix.runners.PSFRunner + The module's PSF runner, carrying the resolved ``psf_fit_prior``. + + Returns + ------- + dict + Same keys as :func:`average_multiepoch_psf`. + """ + def fit(gal_obs): + # PSFRunner.go fits gal_obs.psf (the original PSF stamp) and sets + # gal_obs.psf.meta['result']; failed fits keep flags != 0 and are + # dropped by _average_psf_fits. + psf_runner.go(gal_obs) + return gal_obs.psf.meta['result'], gal_obs.weight.sum() + + return _average_psf_fits(fit(gal_obs) for gal_obs in gal_obs_list) def make_runners(prior, flux_guess, rng): @@ -1233,8 +1342,12 @@ def do_ngmix_metacal(stamp, prior, flux_guess, rng, centroid_source="hsm"): Returns ------- tuple - (resdict, psf_res) where resdict is the MetacalBootstrapper result - dict and psf_res is the averaged PSF dict from average_multiepoch_psf. + ``(resdict, psf_res, psfo_res)`` where ``resdict`` is the + MetacalBootstrapper result dict, ``psf_res`` is the averaged metacal + *reconvolution*-kernel PSF dict (:func:`average_multiepoch_psf`), and + ``psfo_res`` is the averaged *original* image-PSF dict + (:func:`average_original_psf`). The two PSF dicts share keys but + describe different PSFs. """ n_epoch = len(stamp.gals) if n_epoch == 0: @@ -1260,6 +1373,13 @@ def do_ngmix_metacal(stamp, prior, flux_guess, rng, centroid_source="hsm"): runner, psf_runner = make_runners(prior, flux_guess, rng) + # Fit the ORIGINAL (psfex/mccd) PSF before metacal reconvolves it, using + # the same psf_runner (so the same psf_fit_prior and centroid). This is + # the PSF_ORIG family; metacal below produces the PSF_RECONV family. Run + # first so the original PSF is fit on its own stamp, distinct from the + # round, enlarged kernel metacal convolves back in. + psfo_res = average_original_psf(gal_obs_list, psf_runner) + metacal_pars = { 'types': ['noshear', '1p', '1m', '2p', '2m'], 'step': 0.01, @@ -1277,4 +1397,4 @@ def do_ngmix_metacal(stamp, prior, flux_guess, rng, centroid_source="hsm"): ) resdict, obsdict = boot.go(gal_obs_list) psf_res = average_multiepoch_psf(obsdict) - return resdict, psf_res + return resdict, psf_res, psfo_res From 7e7887b22c93f1a2e1cf499a51159c19f287786a Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Fri, 19 Jun 2026 23:18:08 +0200 Subject: [PATCH 02/33] make_cat: rename ngmix columns to the ESTIMATOR_COMPONENT_OBJECT grammar (#749) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adopt NGMIX[m]_[_ERR]__, with OBJECT one of GAL, PSF_ORIG (original psfex/mccd image PSF) or PSF_RECONV (metacal reconvolution kernel), reading the self-named keys ngmix now writes: galaxy ELL (2-vec) -> scalar NGMIX_G1/G2_GAL, NGMIX_G1/G2_ERR_GAL galaxy NGMIX_T_ -> NGMIX_T_GAL (+ _ERR), SNR/FLUX/MAG/FLAGS_GAL NGMIX_ELL_PSFo (mis- -> genuine NGMIX_G1/G2_PSF_ORIG (from the restored labeled reconv) original fit) + NGMIX_G1/G2_PSF_RECONV NGMIX_T_PSFo, NGMIX_ -> NGMIX_T_PSF_ORIG and NGMIX_T_PSF_RECONV, each its Tpsf own family's size (+ _ERR on both) The reconvolution-kernel ellipticity gains its _ERR columns too, for symmetry with PSF_ORIG. The d6531b1b dedup (NGMIX_T_PSFo <- "Tpsf") is reverted: PSF_ORIG size now comes from the original fit ("T_psf_orig"), not the reconvolution-kernel "Tpsf" — they are different PSFs again. N_EPOCH / MCAL_FLAGS / MCAL_TYPES_FAIL keep their names (no PSF meaning). Co-Authored-By: Claude Opus 4.8 --- .../modules/make_cat_package/make_cat.py | 196 +++++++++++++----- 1 file changed, 142 insertions(+), 54 deletions(-) diff --git a/src/shapepipe/modules/make_cat_package/make_cat.py b/src/shapepipe/modules/make_cat_package/make_cat.py index 79b0571a..235fd14f 100644 --- a/src/shapepipe/modules/make_cat_package/make_cat.py +++ b/src/shapepipe/modules/make_cat_package/make_cat.py @@ -329,10 +329,22 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): Save the NGMIX catalogue into the final one. + Column grammar: ``NGMIX[m]_[_ERR]__``, with + ``OBJECT`` one of ``GAL`` (galaxy), ``PSF_ORIG`` (the original + psfex/mccd image PSF, fit by ``ngmix.average_original_psf`` — its true + ellipticity and size, feeding object-wise PSF leakage), or + ``PSF_RECONV`` (the metacal reconvolution kernel, round and enlarged + by construction, fit by ``ngmix.average_multiepoch_psf`` — the Tgal / + Tpsf size cut and a g~0 sanity check). ``PSF_ORIG`` and ``PSF_RECONV`` + are independent fits of different PSFs, no longer the single aliased + value of the pre-fix code (shapepipe#749). + Parameters ---------- ngmix_cat_path : str Path to NGMIX catalogue + moments : bool, optional + If True, write the parallel ``NGMIXm_*`` (moments-branch) columns. """ self._key_ends = ["1M", "1P", "2M", "2P", "NOSHEAR"] @@ -374,29 +386,47 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): prefix = f"NGMIX{m}" + # Galaxy (GAL), original image PSF (PSF_ORIG) and metacal + # reconvolution kernel (PSF_RECONV). G1/G2 are scalar reduced-shear + # components, not a 2-vector. Sentinels: sizes/fluxes/mags/flags 0, + # *_ERR fluxes/mags -1, ellipticities -10, *_ERR sizes 1e30. for key_str in ( - f"{prefix}_T_", - f"{prefix}_Tpsf_", - f"{prefix}_SNR_", - f"{prefix}_FLUX_", - f"{prefix}_MAG_", - f"{prefix}_FLAGS_", - f"{prefix}_T_PSFo_", + f"{prefix}_T_GAL_", + f"{prefix}_SNR_GAL_", + f"{prefix}_FLUX_GAL_", + f"{prefix}_MAG_GAL_", + f"{prefix}_FLAGS_GAL_", + f"{prefix}_T_PSF_ORIG_", + f"{prefix}_T_PSF_RECONV_", ): self._update_dict(key_str, np.zeros(n_obj)) - for key_str in (f"NGMIX{m}_FLUX_ERR_", f"NGMIX{m}_MAG_ERR_"): - self._update_dict(key_str, np.ones(len(self._obj_id)) * -1) for key_str in ( - f"NGMIX{m}_ELL_", - f"NGMIX{m}_ELL_ERR_", - f"NGMIX{m}_ELL_PSFo_", + f"{prefix}_FLUX_ERR_GAL_", + f"{prefix}_MAG_ERR_GAL_", ): - self._update_dict(key_str, np.ones((len(self._obj_id), 2)) * -10.0) - self._update_dict( - f"NGMIX{m}_T_ERR_", - np.ones(len(self._obj_id)) * 1e30, - ) - self._add2dict(f"NGMIX{m}_MCAL_FLAGS", np.zeros(len(self._obj_id))) + self._update_dict(key_str, np.ones(n_obj) * -1) + for key_str in ( + f"{prefix}_G1_GAL_", + f"{prefix}_G2_GAL_", + f"{prefix}_G1_ERR_GAL_", + f"{prefix}_G2_ERR_GAL_", + f"{prefix}_G1_PSF_ORIG_", + f"{prefix}_G2_PSF_ORIG_", + f"{prefix}_G1_ERR_PSF_ORIG_", + f"{prefix}_G2_ERR_PSF_ORIG_", + f"{prefix}_G1_PSF_RECONV_", + f"{prefix}_G2_PSF_RECONV_", + f"{prefix}_G1_ERR_PSF_RECONV_", + f"{prefix}_G2_ERR_PSF_RECONV_", + ): + self._update_dict(key_str, np.ones(n_obj) * -10.0) + for key_str in ( + f"{prefix}_T_ERR_GAL_", + f"{prefix}_T_ERR_PSF_ORIG_", + f"{prefix}_T_ERR_PSF_RECONV_", + ): + self._update_dict(key_str, np.ones(n_obj) * 1e30) + self._add2dict(f"{prefix}_MCAL_FLAGS", np.zeros(n_obj)) for idx, id_tmp in enumerate(self._obj_id): ind = np.where(id_tmp == ngmix_id)[0] @@ -406,46 +436,104 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): ncf_data = ngmix_cat_file.get_data(key) - g = (ncf_data["g1"][ind[0]], ncf_data["g2"][ind[0]]) - g_err = ( - ncf_data["g1_err"][ind[0]], - ncf_data["g2_err"][ind[0]], + # Galaxy shape, size and photometry. + self._add2dict( + f"{prefix}_G1_GAL_{key}", ncf_data["g1"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G2_GAL_{key}", ncf_data["g2"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G1_ERR_GAL_{key}", + ncf_data["g1_err"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G2_ERR_GAL_{key}", + ncf_data["g2_err"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_T_GAL_{key}", ncf_data["T"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_T_ERR_GAL_{key}", + ncf_data["T_err"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_SNR_GAL_{key}", ncf_data["s2n"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_FLUX_GAL_{key}", + ncf_data["flux"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_FLUX_ERR_GAL_{key}", + ncf_data["flux_err"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_MAG_GAL_{key}", ncf_data["mag"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_MAG_ERR_GAL_{key}", + ncf_data["mag_err"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_FLAGS_GAL_{key}", + ncf_data["flags"][ind[0]], idx ) - self._add2dict(f"NGMIX{m}_ELL_{key}", g, idx) - self._add2dict(f"NGMIX{m}_ELL_ERR_{key}", g_err, idx) - - t = ncf_data["T"][ind[0]] - t_err = ncf_data["T_err"][ind[0]] - tpsf = ncf_data["Tpsf"][ind[0]] - self._add2dict(f"NGMIX{m}_T_{key}", t, idx) - self._add2dict(f"NGMIX{m}_T_ERR_{key}", t_err, idx) - self._add2dict(f"NGMIX{m}_Tpsf_{key}", tpsf, idx) - - s2n = ncf_data["s2n"][ind[0]] - self._add2dict(f"NGMIX{m}_SNR_{key}", s2n, idx) - - flux = ncf_data["flux"][ind[0]] - flux_err = ncf_data["flux_err"][ind[0]] - self._add2dict(f"NGMIX{m}_FLUX_{key}", flux, idx) - self._add2dict(f"NGMIX{m}_FLUX_ERR_{key}", flux_err, idx) - - mag = ncf_data["mag"][ind[0]] - mag_err = ncf_data["mag_err"][ind[0]] - self._add2dict(f"NGMIX{m}_MAG_{key}", mag, idx) - self._add2dict(f"NGMIX{m}_MAG_ERR_{key}", mag_err, idx) - - flags = ncf_data["flags"][ind[0]] - self._add2dict(f"NGMIX{m}_FLAGS_{key}", flags, idx) - - g_psf = ( - ncf_data["g1_psfo_ngmix"][ind[0]], - ncf_data["g2_psfo_ngmix"][ind[0]], + # Original image PSF (psfex/mccd) — its genuine, + # un-reconvolved ellipticity and size. + self._add2dict( + f"{prefix}_G1_PSF_ORIG_{key}", + ncf_data["g1_psf_orig"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G2_PSF_ORIG_{key}", + ncf_data["g2_psf_orig"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G1_ERR_PSF_ORIG_{key}", + ncf_data["g1_err_psf_orig"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G2_ERR_PSF_ORIG_{key}", + ncf_data["g2_err_psf_orig"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_T_PSF_ORIG_{key}", + ncf_data["T_psf_orig"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_T_ERR_PSF_ORIG_{key}", + ncf_data["T_err_psf_orig"][ind[0]], idx ) - self._add2dict(f"NGMIX{m}_ELL_PSFo_{key}", g_psf, idx) - t_psfo = ncf_data["Tpsf"][ind[0]] - self._add2dict(f"NGMIX{m}_T_PSFo_{key}", t_psfo, idx) + # Metacal reconvolution kernel — round, enlarged by + # construction; the Tgal/Tpsf cut and g~0 sanity check. + self._add2dict( + f"{prefix}_G1_PSF_RECONV_{key}", + ncf_data["g1_psf_reconv"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G2_PSF_RECONV_{key}", + ncf_data["g2_psf_reconv"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G1_ERR_PSF_RECONV_{key}", + ncf_data["g1_err_psf_reconv"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G2_ERR_PSF_RECONV_{key}", + ncf_data["g2_err_psf_reconv"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_T_PSF_RECONV_{key}", + ncf_data["T_psf_reconv"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_T_ERR_PSF_RECONV_{key}", + ncf_data["T_err_psf_reconv"][ind[0]], idx + ) self._add2dict( f"NGMIX{m}_MCAL_FLAGS", From c3fbebebd648d350916d840b7e5518eda598913b Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Fri, 19 Jun 2026 23:18:20 +0200 Subject: [PATCH 03/33] tests: cover both PSF families and the 3-tuple do_ngmix_metacal (#749) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _fake_metacal_result now emits both self-named PSF key families with distinct values (original PSF elliptical and smaller; reconvolution kernel rounder and larger), so any regression to the old single-source aliasing fails loudly. New tests: test_compile_results_psf_families_are_unaliased — g1/g2_psf_orig differ from g1/g2_psf_reconv, each tracing its own source test_average_original_psf_fits_each_gal_psf_with_galaxy_weight — the runner fits every epoch's gal_obs.psf, weight-averaged by the galaxy inverse-variance weight, failed-PSF epochs dropped The size-columns test now checks both families' r50/T (orig != reconv). Every do_ngmix_metacal call site unpacks the (res, psf_res, psfo_res) 3-tuple: test_ngmix, test_ngmix_weight_validation, test_mbias, test_star_response. Co-Authored-By: Claude Opus 4.8 --- tests/module/test_ngmix.py | 206 +++++++++++++++---- tests/module/test_ngmix_weight_validation.py | 4 +- tests/science/test_mbias.py | 2 +- 3 files changed, 175 insertions(+), 37 deletions(-) diff --git a/tests/module/test_ngmix.py b/tests/module/test_ngmix.py index dcdcb2d2..e035a0e8 100644 --- a/tests/module/test_ngmix.py +++ b/tests/module/test_ngmix.py @@ -119,7 +119,7 @@ def _metacal_noshear_g(seed): flags, jacobs, ) - res, _ = do_ngmix_metacal(stamp, prior, 1.0, rng) + res, _, _ = do_ngmix_metacal(stamp, prior, 1.0, rng) return np.asarray(res["noshear"]["g"]) @@ -135,15 +135,46 @@ def test_metacal_is_reproducible_with_fixed_seed(): npt.assert_array_equal(_metacal_noshear_g(42), _metacal_noshear_g(42)) +# The two PSF families carry distinct ellipticity AND size (shapepipe#749): +# the original image PSF (-> *_psf_orig) and the metacal reconvolution kernel +# (-> *_psf_reconv) are independent fits, so a regression to the old aliasing +# (both from one source) would surface here. The original PSF is elliptical +# and smaller than the round, enlarged reconvolution kernel. +RECONV_PSF_G = [0.001, 0.002] +RECONV_PSF_G_ERR = [1e-5, 1e-5] +ORIG_PSF_G = [0.004, -0.003] +ORIG_PSF_G_ERR = [2e-5, 3e-5] +# original-PSF size, deliberately != the reconvolution T_psf the builder is +# handed, so the size un-aliasing is exercised end to end. +ORIG_PSF_T = 0.07 +ORIG_PSF_T_ERR = 0.0008 + + +def _psf_r50_pair(T_psf, T_psf_err, sigma_to_r50): + """``(r50, r50_err)`` for a PSF size, mirroring the source conversion.""" + sigma = np.sqrt(max(T_psf, 0) / 2) + return ( + sigma_to_r50 * sigma, + sigma_to_r50 * T_psf_err / (4 * sigma) if sigma > 0 else np.nan, + ) + + def _fake_metacal_result(T, T_err, T_psf, T_psf_err): """Build one minimal metacal result as produced by the process loop. - The PSF size entries mirror the conversion done at the source: - ``r50_PSFo = sqrt(2 ln 2) * sigma`` with ``sigma = sqrt(T_psf / 2)``. + Both PSF families are filled with self-named scalar keys, as the process + loop back-fills them: ``*_psf_orig`` (original image PSF) and + ``*_psf_reconv`` (metacal reconvolution kernel). The reconvolution-kernel + size is the builder's ``T_psf`` argument; the original-PSF size is the + fixed ``ORIG_PSF_T``, deliberately different so the un-aliasing is tested. + PSF r50 = ``sqrt(2 ln 2) * sigma`` with ``sigma = sqrt(T / 2)``. """ from shapepipe.modules.ngmix_package.ngmix import SIGMA_TO_R50 - sigma_psf = np.sqrt(max(T_psf, 0) / 2) + r50_reconv, r50_err_reconv = _psf_r50_pair(T_psf, T_psf_err, SIGMA_TO_R50) + r50_orig, r50_err_orig = _psf_r50_pair( + ORIG_PSF_T, ORIG_PSF_T_ERR, SIGMA_TO_R50 + ) per_type = { "nfev": 1, "g": [0.01, -0.02], @@ -159,16 +190,24 @@ def _fake_metacal_result(T, T_err, T_psf, T_psf_err): "obj_id": 1, "n_epoch_model": 1, "mcal_types_fail": 0, - "g_PSFo": [0.001, 0.002], - "g_err_PSFo": [1e-5, 1e-5], - "T_PSFo": T_psf, - "T_err_PSFo": T_psf_err, - "r50_PSFo": SIGMA_TO_R50 * sigma_psf, - "r50_err_PSFo": ( - SIGMA_TO_R50 * T_psf_err / (4 * sigma_psf) - if sigma_psf > 0 - else np.nan - ), + # original image PSF (psfex/mccd) family + "g1_psf_orig": ORIG_PSF_G[0], + "g2_psf_orig": ORIG_PSF_G[1], + "g1_err_psf_orig": ORIG_PSF_G_ERR[0], + "g2_err_psf_orig": ORIG_PSF_G_ERR[1], + "T_psf_orig": ORIG_PSF_T, + "T_err_psf_orig": ORIG_PSF_T_ERR, + "r50_psf_orig": r50_orig, + "r50_err_psf_orig": r50_err_orig, + # metacal reconvolution-kernel family + "g1_psf_reconv": RECONV_PSF_G[0], + "g2_psf_reconv": RECONV_PSF_G[1], + "g1_err_psf_reconv": RECONV_PSF_G_ERR[0], + "g2_err_psf_reconv": RECONV_PSF_G_ERR[1], + "T_psf_reconv": T_psf, + "T_err_psf_reconv": T_psf_err, + "r50_psf_reconv": r50_reconv, + "r50_err_psf_reconv": r50_err_reconv, "mcal_flags": 0, } res.update( @@ -178,11 +217,13 @@ def _fake_metacal_result(T, T_err, T_psf, T_psf_err): def test_compile_results_size_columns_are_half_light_radii(): - """Every r50 column is a true half-light radius, on both sides. + """Every r50 column is a true half-light radius, on every side. - Galaxy ``r50 = sqrt(ln 2 * T)`` (not the raw area ``pars[4]``), PSF - ``r50psf = sqrt(2 ln 2) * sigma_psf`` (not bare sigma), and the - redundant ``*_psfo_ngmix`` size duplicates are gone. + Galaxy ``r50 = sqrt(ln 2 * T)`` (not the raw area ``pars[4]``) and both + PSF families ``r50 = sqrt(2 ln 2) * sigma`` (not bare sigma), in the same + convention so galaxy / PSF radii are commensurable. The reconvolution + kernel (``*_psf_reconv``) and the original image PSF (``*_psf_orig``) are + independent fits with their own, different sizes (shapepipe#749). """ from shapepipe.modules.ngmix_package.ngmix import Ngmix, SIGMA_TO_R50 @@ -200,31 +241,66 @@ def test_compile_results_size_columns_are_half_light_radii(): npt.assert_allclose(noshear["r50"], [r50_expected]) npt.assert_allclose(noshear["r50_err"], [r50_expected * T_err / (2 * T)]) - # PSF: r50psf = sqrt(2 ln2) * sigma, sigma = sqrt(T_psf / 2) - sigma_psf = np.sqrt(T_psf / 2) - npt.assert_allclose(noshear["r50psf"], [SIGMA_TO_R50 * sigma_psf]) + # reconvolution kernel: r50 = sqrt(2 ln2) * sigma, sigma = sqrt(T_psf / 2) + sigma_reconv = np.sqrt(T_psf / 2) + npt.assert_allclose(noshear["r50_psf_reconv"], [SIGMA_TO_R50 * sigma_reconv]) npt.assert_allclose( - noshear["r50psf_err"], [SIGMA_TO_R50 * T_psf_err / (4 * sigma_psf)] + noshear["r50_err_psf_reconv"], + [SIGMA_TO_R50 * T_psf_err / (4 * sigma_reconv)], ) + npt.assert_allclose(noshear["T_psf_reconv"], [T_psf]) + npt.assert_allclose(noshear["T_err_psf_reconv"], [T_psf_err]) - # galaxy/PSF r50 are now commensurable: same convention on both sides + # galaxy/reconv-PSF r50 are commensurable: same convention on both sides npt.assert_allclose( - np.array(noshear["r50"]) / np.array(noshear["r50psf"]), + np.array(noshear["r50"]) / np.array(noshear["r50_psf_reconv"]), [np.sqrt(T / T_psf)], ) - # areas pass through untouched, with the err asymmetry fixed - npt.assert_allclose(noshear["Tpsf"], [T_psf]) - npt.assert_allclose(noshear["Tpsf_err"], [T_psf_err]) + # original image PSF carries its OWN size, un-aliased from the kernel + sigma_orig = np.sqrt(ORIG_PSF_T / 2) + npt.assert_allclose(noshear["T_psf_orig"], [ORIG_PSF_T]) + npt.assert_allclose(noshear["T_err_psf_orig"], [ORIG_PSF_T_ERR]) + npt.assert_allclose(noshear["r50_psf_orig"], [SIGMA_TO_R50 * sigma_orig]) + npt.assert_allclose( + noshear["r50_err_psf_orig"], + [SIGMA_TO_R50 * ORIG_PSF_T_ERR / (4 * sigma_orig)], + ) + assert noshear["T_psf_orig"] != noshear["T_psf_reconv"] + assert noshear["r50_psf_orig"] != noshear["r50_psf_reconv"] - # the *_psfo_ngmix size duplicates are retired - for retired in ( - "T_psfo_ngmix", - "T_err_psfo_ngmix", - "r50_psfo_ngmix", - "r50_err_psfo_ngmix", - ): - assert retired not in noshear + +def test_compile_results_psf_families_are_unaliased(): + """The two PSF-ellipticity families document *different* PSFs (#749). + + ``*_psf_reconv`` carries the metacal RECONVOLUTION kernel; ``*_psf_orig`` + carries the ORIGINAL image PSF, fit independently. Before the fix both + came from one ``average_multiepoch_psf`` result and so were byte- + identical; here they must differ, each tracing its own source. The + companion size un-aliasing is checked in + ``test_compile_results_size_columns_are_half_light_radii``. + """ + from shapepipe.modules.ngmix_package.ngmix import Ngmix + + inst = object.__new__(Ngmix) + inst._zero_point = 30.0 + noshear = inst.compile_results( + [_fake_metacal_result(0.18, 0.02, 0.09, 0.001)] + )["noshear"] + + # reconvolution kernel + npt.assert_allclose(noshear["g1_psf_reconv"], [RECONV_PSF_G[0]]) + npt.assert_allclose(noshear["g2_psf_reconv"], [RECONV_PSF_G[1]]) + npt.assert_allclose(noshear["g1_err_psf_reconv"], [RECONV_PSF_G_ERR[0]]) + npt.assert_allclose(noshear["g2_err_psf_reconv"], [RECONV_PSF_G_ERR[1]]) + # original image PSF + npt.assert_allclose(noshear["g1_psf_orig"], [ORIG_PSF_G[0]]) + npt.assert_allclose(noshear["g2_psf_orig"], [ORIG_PSF_G[1]]) + npt.assert_allclose(noshear["g1_err_psf_orig"], [ORIG_PSF_G_ERR[0]]) + npt.assert_allclose(noshear["g2_err_psf_orig"], [ORIG_PSF_G_ERR[1]]) + # the un-aliasing: the two families are no longer the same value + assert noshear["g1_psf_orig"] != noshear["g1_psf_reconv"] + assert noshear["g2_psf_orig"] != noshear["g2_psf_reconv"] def test_compile_results_nan_fills_failed_fit_types(): @@ -333,6 +409,66 @@ def epoch(result, weight_value): assert psf_res["n_epoch"] == 1 +def test_average_original_psf_fits_each_gal_psf_with_galaxy_weight(): + """The original-PSF average fits ``gal_obs.psf`` and weights by gal S/N. + + Restores the independent original-image-PSF fit (shapepipe#749): each + epoch's psfex/mccd PSF stamp (``gal_obs.psf``) is fit by the shared + ``psf_runner``, then weight-averaged by the *galaxy* inverse-variance + weight (matching :func:`average_multiepoch_psf`). A pre-set + ``psf.meta['result']`` per epoch stands in for the runner's fit so the + averaging math is checked in isolation. + """ + from types import SimpleNamespace + from shapepipe.modules.ngmix_package.ngmix import average_original_psf + + def gal_epoch(psf_result, weight_value): + # gal_obs carries a .psf whose meta['result'] is what the runner + # would set; .weight is the galaxy inverse-variance map. + return SimpleNamespace( + psf=SimpleNamespace(meta={"result": psf_result}), + weight=np.full((2, 2), weight_value), + ) + + # runner stub: a no-op .go, since meta['result'] is pre-populated. It + # must be *called* (the fit happens), so record that it was. + calls = [] + runner = SimpleNamespace(go=lambda obs: calls.append(obs)) + + good_a = { + "flags": 0, "T": 0.30, "T_err": 0.02, + "g": np.array([0.04, -0.03]), "g_err": np.array([1e-3, 1e-3]), + } + good_b = { + "flags": 0, "T": 0.20, "T_err": 0.01, + "g": np.array([0.06, -0.01]), "g_err": np.array([2e-3, 2e-3]), + } + failed = {"flags": 3, "nfev": 5, "pars": np.zeros(6)} # no T/g keys + + gal_obs_list = [ + gal_epoch(good_a, 1.0), # galaxy weight-sum = 4 + gal_epoch(good_b, 2.0), # galaxy weight-sum = 8 + gal_epoch(failed, 4.0), # dropped on flags != 0 + ] + psfo_res = average_original_psf(gal_obs_list, runner) + + # the runner fit every epoch's PSF + assert len(calls) == 3 + # weighted over the two surviving epochs (weights 4 and 8) + w = np.array([4.0, 8.0]) + npt.assert_allclose( + psfo_res["g_psf"], + (good_a["g"] * w[0] + good_b["g"] * w[1]) / w.sum(), + ) + npt.assert_allclose( + psfo_res["T_psf"], + (good_a["T"] * w[0] + good_b["T"] * w[1]) / w.sum(), + ) + assert psfo_res["n_epoch"] == 2 + # original PSF is elliptical — not the round reconvolution kernel + assert abs(psfo_res["g_psf"][0]) > 1e-3 + + def test_weight_map_recovers_injected_inverse_variance(): """A supplied inverse-variance map must not be renormalized twice.""" from shapepipe.modules.ngmix_package.ngmix import prepare_ngmix_weights diff --git a/tests/module/test_ngmix_weight_validation.py b/tests/module/test_ngmix_weight_validation.py index 243cbe83..2dad3a8c 100644 --- a/tests/module/test_ngmix_weight_validation.py +++ b/tests/module/test_ngmix_weight_validation.py @@ -145,7 +145,9 @@ def fit_metacal(gal, psf_im, bkg_rms, seed): stamp.jacobs = [WCS.jacobian()] if bkg_rms is not None: stamp.bkg_rms = [bkg_rms] - res, _ = do_ngmix_metacal(stamp, get_prior(PIX_SCALE, rng), GAL_FLUX, rng) + res, _, _ = do_ngmix_metacal( + stamp, get_prior(PIX_SCALE, rng), GAL_FLUX, rng + ) return res["noshear"] diff --git a/tests/science/test_mbias.py b/tests/science/test_mbias.py index 347bb5be..d7a0ebec 100644 --- a/tests/science/test_mbias.py +++ b/tests/science/test_mbias.py @@ -56,7 +56,7 @@ def _recover_g1_with_response(seed=42): stamp.gals, stamp.psfs, stamp.weights, stamp.flags, stamp.jacobs = ( gals, psfs, weights, flags, jacobs, ) - res, _ = do_ngmix_metacal(stamp, prior, 1.0, rng) + res, _, _ = do_ngmix_metacal(stamp, prior, 1.0, rng) g1_noshear = res["noshear"]["g"][0] R11 = (res["1p"]["g"][0] - res["1m"]["g"][0]) / (2 * METACAL_STEP) From b7ababa3a28376a951eaf1da5efbbd0a206d5ed5 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Fri, 19 Jun 2026 23:48:35 +0200 Subject: [PATCH 04/33] make_cat: update shipped param files to the new ngmix column grammar (#749) The two in-repo param files are the renamed columns' shipped consumer (read by scripts/python/create_final_cat.py). Map every NGMIX token to the NGMIX_[_ERR]_[_] grammar: NGMIX_ELL_PSFo_NOSHEAR -> NGMIX_G1/G2_PSF_ORIG_NOSHEAR NGMIX_ELL_ -> NGMIX_G1/G2_GAL_ NGMIX_ELL_ERR_NOSHEAR -> NGMIX_G1/G2_ERR_GAL_NOSHEAR NGMIX_T[_ERR]_ -> NGMIX_T[_ERR]_GAL_ NGMIX_Tpsf_ -> NGMIX_T_PSF_RECONV_ NGMIX_T_PSFo_NOSHEAR -> NGMIX_T_PSF_ORIG_NOSHEAR NGMIX_FLUX[_ERR]_ -> NGMIX_FLUX[_ERR]_GAL_ NGMIX_FLAGS_ -> NGMIX_FLAGS_GAL_ Non-NGMIX columns (NGMIX_MCAL_FLAGS, NGMIX_N_EPOCH, NGMIX_MOM_FAIL, and all SExtractor/external columns) are untouched. Every uncommented NGMIX column now matches a column make_cat._save_ngmix_data produces. Co-Authored-By: Claude Opus 4.8 --- example/cfis/final_cat.param | 99 +++++++++++++++------------- example/unions_800/cat_matched.param | 99 +++++++++++++++------------- 2 files changed, 110 insertions(+), 88 deletions(-) diff --git a/example/cfis/final_cat.param b/example/cfis/final_cat.param index eae7a2c0..a469aac7 100644 --- a/example/cfis/final_cat.param +++ b/example/cfis/final_cat.param @@ -11,8 +11,9 @@ FLAGS IMAFLAGS_ISO NGMIX_MCAL_FLAGS -# PSF ellipticity -NGMIX_ELL_PSFo_NOSHEAR +# PSF ellipticity (original image PSF) +NGMIX_G1_PSF_ORIG_NOSHEAR +NGMIX_G2_PSF_ORIG_NOSHEAR # spread class #SPREAD_CLASS @@ -29,52 +30,62 @@ NGMIX_N_EPOCH ## Ngmix: model fitting # galaxy ellipticity -NGMIX_ELL_1M -NGMIX_ELL_1P -NGMIX_ELL_2M -NGMIX_ELL_2P -NGMIX_ELL_NOSHEAR -#NGMIX_ELL_ERR_1M -#NGMIX_ELL_ERR_1P -#NGMIX_ELL_ERR_2M -#NGMIX_ELL_ERR_2P -NGMIX_ELL_ERR_NOSHEAR +NGMIX_G1_GAL_1M +NGMIX_G2_GAL_1M +NGMIX_G1_GAL_1P +NGMIX_G2_GAL_1P +NGMIX_G1_GAL_2M +NGMIX_G2_GAL_2M +NGMIX_G1_GAL_2P +NGMIX_G2_GAL_2P +NGMIX_G1_GAL_NOSHEAR +NGMIX_G2_GAL_NOSHEAR +#NGMIX_G1_ERR_GAL_1M +#NGMIX_G2_ERR_GAL_1M +#NGMIX_G1_ERR_GAL_1P +#NGMIX_G2_ERR_GAL_1P +#NGMIX_G1_ERR_GAL_2M +#NGMIX_G2_ERR_GAL_2M +#NGMIX_G1_ERR_GAL_2P +#NGMIX_G2_ERR_GAL_2P +NGMIX_G1_ERR_GAL_NOSHEAR +NGMIX_G2_ERR_GAL_NOSHEAR # flags -NGMIX_FLAGS_1M -NGMIX_FLAGS_1P -NGMIX_FLAGS_2M -NGMIX_FLAGS_2P -NGMIX_FLAGS_NOSHEAR +NGMIX_FLAGS_GAL_1M +NGMIX_FLAGS_GAL_1P +NGMIX_FLAGS_GAL_2M +NGMIX_FLAGS_GAL_2P +NGMIX_FLAGS_GAL_NOSHEAR # size and error -NGMIX_T_1M -NGMIX_T_1P -NGMIX_T_2M -NGMIX_T_2P -NGMIX_T_NOSHEAR -NGMIX_T_ERR_1M -NGMIX_T_ERR_1P -NGMIX_T_ERR_2M -NGMIX_T_ERR_2P -NGMIX_T_ERR_NOSHEAR -NGMIX_Tpsf_1M -NGMIX_Tpsf_1P -NGMIX_Tpsf_2M -NGMIX_Tpsf_2P -NGMIX_Tpsf_NOSHEAR +NGMIX_T_GAL_1M +NGMIX_T_GAL_1P +NGMIX_T_GAL_2M +NGMIX_T_GAL_2P +NGMIX_T_GAL_NOSHEAR +NGMIX_T_ERR_GAL_1M +NGMIX_T_ERR_GAL_1P +NGMIX_T_ERR_GAL_2M +NGMIX_T_ERR_GAL_2P +NGMIX_T_ERR_GAL_NOSHEAR +NGMIX_T_PSF_RECONV_1M +NGMIX_T_PSF_RECONV_1P +NGMIX_T_PSF_RECONV_2M +NGMIX_T_PSF_RECONV_2P +NGMIX_T_PSF_RECONV_NOSHEAR # flux and error -NGMIX_FLUX_1M -NGMIX_FLUX_1P -NGMIX_FLUX_2M -NGMIX_FLUX_2P -NGMIX_FLUX_NOSHEAR -NGMIX_FLUX_ERR_1M -NGMIX_FLUX_ERR_1P -NGMIX_FLUX_ERR_2M -NGMIX_FLUX_ERR_2P -NGMIX_FLUX_ERR_NOSHEAR +NGMIX_FLUX_GAL_1M +NGMIX_FLUX_GAL_1P +NGMIX_FLUX_GAL_2M +NGMIX_FLUX_GAL_2P +NGMIX_FLUX_GAL_NOSHEAR +NGMIX_FLUX_ERR_GAL_1M +NGMIX_FLUX_ERR_GAL_1P +NGMIX_FLUX_ERR_GAL_2M +NGMIX_FLUX_ERR_GAL_2P +NGMIX_FLUX_ERR_GAL_NOSHEAR # magnitudes MAG_AUTO @@ -94,10 +105,10 @@ FWHM_IMAGE FWHM_WORLD # PSF size measured on original image -NGMIX_T_PSFo_NOSHEAR +NGMIX_T_PSF_ORIG_NOSHEAR # PSF size measured on reconvolved image -# NGMIX_Tpsf_NOSHEAR +# NGMIX_T_PSF_RECONV_NOSHEAR # ngmix moment failure flag NGMIX_MOM_FAIL diff --git a/example/unions_800/cat_matched.param b/example/unions_800/cat_matched.param index 0df13407..e1869df5 100644 --- a/example/unions_800/cat_matched.param +++ b/example/unions_800/cat_matched.param @@ -11,8 +11,9 @@ FLAGS IMAFLAGS_ISO NGMIX_MCAL_FLAGS -# PSF ellipticity -NGMIX_ELL_PSFo_NOSHEAR +# PSF ellipticity (original image PSF) +NGMIX_G1_PSF_ORIG_NOSHEAR +NGMIX_G2_PSF_ORIG_NOSHEAR # spread class SPREAD_CLASS @@ -29,52 +30,62 @@ NGMIX_N_EPOCH ## Ngmix: model fitting # galaxy ellipticity -NGMIX_ELL_1M -NGMIX_ELL_1P -NGMIX_ELL_2M -NGMIX_ELL_2P -NGMIX_ELL_NOSHEAR -#NGMIX_ELL_ERR_1M -#NGMIX_ELL_ERR_1P -#NGMIX_ELL_ERR_2M -#NGMIX_ELL_ERR_2P -NGMIX_ELL_ERR_NOSHEAR +NGMIX_G1_GAL_1M +NGMIX_G2_GAL_1M +NGMIX_G1_GAL_1P +NGMIX_G2_GAL_1P +NGMIX_G1_GAL_2M +NGMIX_G2_GAL_2M +NGMIX_G1_GAL_2P +NGMIX_G2_GAL_2P +NGMIX_G1_GAL_NOSHEAR +NGMIX_G2_GAL_NOSHEAR +#NGMIX_G1_ERR_GAL_1M +#NGMIX_G2_ERR_GAL_1M +#NGMIX_G1_ERR_GAL_1P +#NGMIX_G2_ERR_GAL_1P +#NGMIX_G1_ERR_GAL_2M +#NGMIX_G2_ERR_GAL_2M +#NGMIX_G1_ERR_GAL_2P +#NGMIX_G2_ERR_GAL_2P +NGMIX_G1_ERR_GAL_NOSHEAR +NGMIX_G2_ERR_GAL_NOSHEAR # flags -NGMIX_FLAGS_1M -NGMIX_FLAGS_1P -NGMIX_FLAGS_2M -NGMIX_FLAGS_2P -NGMIX_FLAGS_NOSHEAR +NGMIX_FLAGS_GAL_1M +NGMIX_FLAGS_GAL_1P +NGMIX_FLAGS_GAL_2M +NGMIX_FLAGS_GAL_2P +NGMIX_FLAGS_GAL_NOSHEAR # size and error -NGMIX_T_1M -NGMIX_T_1P -NGMIX_T_2M -NGMIX_T_2P -NGMIX_T_NOSHEAR -NGMIX_T_ERR_1M -NGMIX_T_ERR_1P -NGMIX_T_ERR_2M -NGMIX_T_ERR_2P -NGMIX_T_ERR_NOSHEAR -NGMIX_Tpsf_1M -NGMIX_Tpsf_1P -NGMIX_Tpsf_2M -NGMIX_Tpsf_2P -NGMIX_Tpsf_NOSHEAR +NGMIX_T_GAL_1M +NGMIX_T_GAL_1P +NGMIX_T_GAL_2M +NGMIX_T_GAL_2P +NGMIX_T_GAL_NOSHEAR +NGMIX_T_ERR_GAL_1M +NGMIX_T_ERR_GAL_1P +NGMIX_T_ERR_GAL_2M +NGMIX_T_ERR_GAL_2P +NGMIX_T_ERR_GAL_NOSHEAR +NGMIX_T_PSF_RECONV_1M +NGMIX_T_PSF_RECONV_1P +NGMIX_T_PSF_RECONV_2M +NGMIX_T_PSF_RECONV_2P +NGMIX_T_PSF_RECONV_NOSHEAR # flux and error -NGMIX_FLUX_1M -NGMIX_FLUX_1P -NGMIX_FLUX_2M -NGMIX_FLUX_2P -NGMIX_FLUX_NOSHEAR -NGMIX_FLUX_ERR_1M -NGMIX_FLUX_ERR_1P -NGMIX_FLUX_ERR_2M -NGMIX_FLUX_ERR_2P -NGMIX_FLUX_ERR_NOSHEAR +NGMIX_FLUX_GAL_1M +NGMIX_FLUX_GAL_1P +NGMIX_FLUX_GAL_2M +NGMIX_FLUX_GAL_2P +NGMIX_FLUX_GAL_NOSHEAR +NGMIX_FLUX_ERR_GAL_1M +NGMIX_FLUX_ERR_GAL_1P +NGMIX_FLUX_ERR_GAL_2M +NGMIX_FLUX_ERR_GAL_2P +NGMIX_FLUX_ERR_GAL_NOSHEAR # Flag for multiple objects due to overlapping # tiles @@ -87,10 +98,10 @@ MAG_AUTO SNR_WIN # PSF size measured on original image -NGMIX_T_PSFo_NOSHEAR +NGMIX_T_PSF_ORIG_NOSHEAR # PSF size measured on reconvolved image -# NGMIX_Tpsf_NOSHEAR +# NGMIX_T_PSF_RECONV_NOSHEAR # ngmix moment failure flag NGMIX_MOM_FAIL From 9b3025dcedf4662af5a18cd8ec28fbe70e5b8f8d Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Fri, 19 Jun 2026 23:48:51 +0200 Subject: [PATCH 05/33] make_cat: add _save_ngmix_data tests; soften the grammar docstring (#749) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No make_cat test existed for the renamed ngmix columns. Add four tests driving SaveCatalogue._save_ngmix_data against a synthetic ngmix catalogue (distinct orig vs reconv sentinels): - produced columns follow the new grammar; no old name (_PSFo, _Tpsf, NGMIX_ELL_) survives - NGMIX_G1_PSF_ORIG_* traces the orig source and NGMIX_G1_PSF_RECONV_* the reconv source, and the two differ (the un-aliasing #749 restores) - NGMIX_T_PSF_ORIG_* != NGMIX_T_PSF_RECONV_* - an obj_id absent from the ngmix cat keeps its sentinel fill - end-to-end: make_cat reads a catalogue ngmix's own compile_results + save_results serialised, guarding the reader against key-set drift Soften the _save_ngmix_data grammar docstring so OBJECT and SHEAR read as optional (NGMIX[m]_[_ERR][_][_]) — the metadata columns NGMIX_MCAL_FLAGS / NGMIX_N_EPOCH / NGMIX_MCAL_TYPES_FAIL carry neither. Co-Authored-By: Claude Opus 4.8 --- .../modules/make_cat_package/make_cat.py | 21 +- tests/module/test_make_cat.py | 297 ++++++++++++++++++ 2 files changed, 309 insertions(+), 9 deletions(-) create mode 100644 tests/module/test_make_cat.py diff --git a/src/shapepipe/modules/make_cat_package/make_cat.py b/src/shapepipe/modules/make_cat_package/make_cat.py index 235fd14f..0122f9e4 100644 --- a/src/shapepipe/modules/make_cat_package/make_cat.py +++ b/src/shapepipe/modules/make_cat_package/make_cat.py @@ -329,15 +329,18 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): Save the NGMIX catalogue into the final one. - Column grammar: ``NGMIX[m]_[_ERR]__``, with - ``OBJECT`` one of ``GAL`` (galaxy), ``PSF_ORIG`` (the original - psfex/mccd image PSF, fit by ``ngmix.average_original_psf`` — its true - ellipticity and size, feeding object-wise PSF leakage), or - ``PSF_RECONV`` (the metacal reconvolution kernel, round and enlarged - by construction, fit by ``ngmix.average_multiepoch_psf`` — the Tgal / - Tpsf size cut and a g~0 sanity check). ``PSF_ORIG`` and ``PSF_RECONV`` - are independent fits of different PSFs, no longer the single aliased - value of the pre-fix code (shapepipe#749). + Column grammar: ``NGMIX[m]_[_ERR][_][_]``. + ``OBJECT`` and ``SHEAR`` are both optional: the per-object metadata + columns (``NGMIX_MCAL_FLAGS``, ``NGMIX_N_EPOCH``, + ``NGMIX_MCAL_TYPES_FAIL``) carry neither. When present, ``OBJECT`` is + one of ``GAL`` (galaxy), ``PSF_ORIG`` (the original psfex/mccd image + PSF, fit by ``ngmix.average_original_psf`` — its true ellipticity and + size, feeding object-wise PSF leakage), or ``PSF_RECONV`` (the metacal + reconvolution kernel, round and enlarged by construction, fit by + ``ngmix.average_multiepoch_psf`` — the Tgal / Tpsf size cut and a g~0 + sanity check). ``PSF_ORIG`` and ``PSF_RECONV`` are independent fits of + different PSFs, no longer the single aliased value of the pre-fix code + (shapepipe#749). Parameters ---------- diff --git a/tests/module/test_make_cat.py b/tests/module/test_make_cat.py new file mode 100644 index 00000000..c3728cef --- /dev/null +++ b/tests/module/test_make_cat.py @@ -0,0 +1,297 @@ +"""UNIT TESTS FOR MODULE PACKAGE: MAKE_CAT. + +Drives ``SaveCatalogue._save_ngmix_data`` against a synthetic ngmix catalogue +to lock in the renamed PSF-column grammar +``NGMIX_[_ERR]_[_]`` (shapepipe#749): the original +image PSF (``PSF_ORIG``) and the metacal reconvolution kernel (``PSF_RECONV``) +are independent fits of *different* PSFs, no longer the single aliased value of +the pre-fix code. +""" + +import numpy as np +import numpy.testing as npt +from astropy.io import fits + +from shapepipe.modules.make_cat_package.make_cat import SaveCatalogue +from shapepipe.modules.ngmix_package.ngmix import Ngmix + + +class _NullLogger: + def info(self, *_args, **_kwargs): + pass + + +# Per-object key set that compile_results emits into every shear-type +# extension (mirrors ngmix.compile_results ``names2``). Sentinel values are +# distinct per key so a mis-routed column is caught by value, and the orig vs +# reconv PSF families carry *different* sentinels so any aliasing surfaces. +NGMIX_KEYS = [ + "id", + "n_epoch_model", + "mcal_types_fail", + "nfev_fit", + "g1", "g1_err", "g2", "g2_err", + "T", "T_err", "r50", "r50_err", + "flux", "flux_err", "s2n", "mag", "mag_err", + "flags", "mcal_flags", + "g1_psf_orig", "g2_psf_orig", + "g1_err_psf_orig", "g2_err_psf_orig", + "T_psf_orig", "T_err_psf_orig", "r50_psf_orig", "r50_err_psf_orig", + "g1_psf_reconv", "g2_psf_reconv", + "g1_err_psf_reconv", "g2_err_psf_reconv", + "T_psf_reconv", "T_err_psf_reconv", "r50_psf_reconv", "r50_err_psf_reconv", +] + +SHEAR_EXTS = ["1M", "1P", "2M", "2P", "NOSHEAR"] +SHEAR_EXTS_LOWER = ["1m", "1p", "2m", "2p", "noshear"] + + +def _ngmix_row(obj_id): + """One object's per-key values, distinct for orig vs reconv PSF families. + + Built so every PSF column carries a sentinel that names its source: the + orig family from ``*_orig`` integers, the reconv family from ``*_reconv`` + integers, deliberately disjoint. + """ + return { + "id": obj_id, + "n_epoch_model": 3, + "mcal_types_fail": 0, + "nfev_fit": 7, + "g1": 0.10, "g1_err": 0.011, "g2": -0.20, "g2_err": 0.022, + "T": 0.30, "T_err": 0.033, "r50": 0.40, "r50_err": 0.044, + "flux": 100.0, "flux_err": 1.0, "s2n": 55.0, + "mag": 21.0, "mag_err": 0.05, + "flags": 0, "mcal_flags": 0, + # original image PSF — its own ellipticity and size + "g1_psf_orig": 0.0041, "g2_psf_orig": -0.0031, + "g1_err_psf_orig": 2e-5, "g2_err_psf_orig": 3e-5, + "T_psf_orig": 0.071, "T_err_psf_orig": 8e-4, + "r50_psf_orig": 0.221, "r50_err_psf_orig": 1.2e-3, + # metacal reconvolution kernel — round and enlarged, distinct values + "g1_psf_reconv": 0.0012, "g2_psf_reconv": 0.0022, + "g1_err_psf_reconv": 1e-5, "g2_err_psf_reconv": 1e-5, + "T_psf_reconv": 0.092, "T_err_psf_reconv": 1e-3, + "r50_psf_reconv": 0.252, "r50_err_psf_reconv": 1.0e-3, + } + + +def _write_ngmix_cat(path, obj_ids): + """Write a synthetic ngmix FITS with the five shear-type extensions. + + Each extension carries the exact ``compile_results`` key set, with the + same per-object values across extensions (the PSF families are + object-level, not per-shear). + """ + rows = [_ngmix_row(oid) for oid in obj_ids] + hdus = [fits.PrimaryHDU()] + for ext in SHEAR_EXTS: + cols = [ + fits.Column( + name=key, + format="K" if key in ("id", "n_epoch_model", "mcal_types_fail", + "nfev_fit", "flags", "mcal_flags") else "D", + array=np.array([row[key] for row in rows]), + ) + for key in NGMIX_KEYS + ] + hdus.append(fits.BinTableHDU.from_columns(cols, name=ext)) + fits.HDUList(hdus).writeto(path, overwrite=True) + + +def _run_save_ngmix(ngmix_path, obj_id, cat_size_target=None): + """Drive ``_save_ngmix_data`` and return its populated output dict.""" + inst = object.__new__(SaveCatalogue) + inst._obj_id = np.asarray(obj_id) + inst._output_dict = {} + inst._cat_size_target = ( + len(inst._obj_id) if cat_size_target is None else cat_size_target + ) + inst._w_log = _NullLogger() + + err_msg = inst._save_ngmix_data(str(ngmix_path)) + assert err_msg is None + return inst._output_dict + + +def test_save_ngmix_data_uses_new_grammar_and_no_old_names(tmp_path): + """Produced columns follow the new grammar; no old token survives. + + The renamed grammar drops ``_PSFo``, ``_Tpsf`` and the ellipticity + 2-vector ``NGMIX_ELL_*`` entirely (shapepipe#749). A regression to any of + those would be a silent break for the shipped param-file consumer + (``create_final_cat.py``), so the absence is asserted directly. + """ + ngmix_path = tmp_path / "ngmix-0.fits" + obj_ids = [11, 22, 33] + _write_ngmix_cat(ngmix_path, obj_ids) + + out = _run_save_ngmix(ngmix_path, obj_ids) + + # Every per-shear family is present under the new grammar. + for shear in SHEAR_EXTS: + for col in ( + f"NGMIX_G1_GAL_{shear}", f"NGMIX_G2_GAL_{shear}", + f"NGMIX_G1_ERR_GAL_{shear}", f"NGMIX_G2_ERR_GAL_{shear}", + f"NGMIX_T_GAL_{shear}", f"NGMIX_T_ERR_GAL_{shear}", + f"NGMIX_SNR_GAL_{shear}", + f"NGMIX_FLUX_GAL_{shear}", f"NGMIX_FLUX_ERR_GAL_{shear}", + f"NGMIX_MAG_GAL_{shear}", f"NGMIX_MAG_ERR_GAL_{shear}", + f"NGMIX_FLAGS_GAL_{shear}", + f"NGMIX_G1_PSF_ORIG_{shear}", f"NGMIX_G2_PSF_ORIG_{shear}", + f"NGMIX_T_PSF_ORIG_{shear}", + f"NGMIX_G1_PSF_RECONV_{shear}", f"NGMIX_G2_PSF_RECONV_{shear}", + f"NGMIX_T_PSF_RECONV_{shear}", + ): + assert col in out, f"missing {col}" + + # Object-level metadata columns carry no OBJECT/SHEAR token. + for col in ("NGMIX_MCAL_FLAGS", "NGMIX_N_EPOCH", "NGMIX_MCAL_TYPES_FAIL"): + assert col in out, f"missing {col}" + + # No old name survives anywhere in the produced columns. + for col in out: + assert "_PSFo" not in col, col + assert "_Tpsf" not in col and "TPSF" not in col, col + assert not col.startswith("NGMIX_ELL_"), col + assert "NGMIXm" not in col, col # moments branch off by default + + +def test_save_ngmix_data_psf_families_trace_distinct_sources(tmp_path): + """The two PSF families read from their own ngmix columns, un-aliased. + + ``NGMIX_G1_PSF_ORIG_*`` must equal the orig source and + ``NGMIX_G1_PSF_RECONV_*`` the reconv source, and the two must differ — + the un-aliasing that shapepipe#749 restores. Likewise the sizes: + ``NGMIX_T_PSF_ORIG_*`` != ``NGMIX_T_PSF_RECONV_*``. + """ + ngmix_path = tmp_path / "ngmix-1.fits" + obj_ids = [11, 22, 33] + _write_ngmix_cat(ngmix_path, obj_ids) + row = _ngmix_row(11) + + out = _run_save_ngmix(ngmix_path, obj_ids) + + for shear in SHEAR_EXTS: + npt.assert_allclose( + out[f"NGMIX_G1_PSF_ORIG_{shear}"], + [row["g1_psf_orig"]] * len(obj_ids), + ) + npt.assert_allclose( + out[f"NGMIX_G1_PSF_RECONV_{shear}"], + [row["g1_psf_reconv"]] * len(obj_ids), + ) + # the un-aliasing: orig and reconv ellipticities differ + assert not np.allclose( + out[f"NGMIX_G1_PSF_ORIG_{shear}"], + out[f"NGMIX_G1_PSF_RECONV_{shear}"], + ) + # sizes are independent too + npt.assert_allclose( + out[f"NGMIX_T_PSF_ORIG_{shear}"], [row["T_psf_orig"]] * len(obj_ids) + ) + npt.assert_allclose( + out[f"NGMIX_T_PSF_RECONV_{shear}"], + [row["T_psf_reconv"]] * len(obj_ids), + ) + assert not np.allclose( + out[f"NGMIX_T_PSF_ORIG_{shear}"], + out[f"NGMIX_T_PSF_RECONV_{shear}"], + ) + + +def test_save_ngmix_data_fills_sentinels_for_absent_objects(tmp_path): + """An obj_id absent from the ngmix cat keeps its sentinel fill. + + make_cat pre-fills every column with a type-specific sentinel and only + overwrites the rows whose ``NUMBER`` matches an ngmix ``id``. An object + SExtractor saw but ngmix never fit (no matching id) must therefore keep + the sentinels: 0 for sizes/flags, -10 for ellipticities, 1e30 for + ``T_ERR``, -1 for flux/mag errors. + """ + ngmix_path = tmp_path / "ngmix-2.fits" + # ngmix fit only object 22; the final cat also carries 11 and 99. + _write_ngmix_cat(ngmix_path, [22]) + obj_ids = [11, 22, 99] + + out = _run_save_ngmix(ngmix_path, obj_ids, cat_size_target=3) + + present = obj_ids.index(22) + absent = [obj_ids.index(11), obj_ids.index(99)] + + # The matched object got its measured value; the others keep sentinels. + row = _ngmix_row(22) + g1_orig = np.asarray(out["NGMIX_G1_PSF_ORIG_NOSHEAR"]) + npt.assert_allclose(g1_orig[present], row["g1_psf_orig"]) + npt.assert_allclose(g1_orig[absent], [-10.0, -10.0]) + + t_orig = np.asarray(out["NGMIX_T_PSF_ORIG_NOSHEAR"]) + npt.assert_allclose(t_orig[present], row["T_psf_orig"]) + npt.assert_allclose(t_orig[absent], [0.0, 0.0]) + + t_err_gal = np.asarray(out["NGMIX_T_ERR_GAL_NOSHEAR"]) + npt.assert_allclose(t_err_gal[present], row["T_err"]) + npt.assert_allclose(t_err_gal[absent], [1e30, 1e30]) + + flux_err = np.asarray(out["NGMIX_FLUX_ERR_GAL_NOSHEAR"]) + npt.assert_allclose(flux_err[present], row["flux_err"]) + npt.assert_allclose(flux_err[absent], [-1.0, -1.0]) + + n_epoch = np.asarray(out["NGMIX_N_EPOCH"]) + npt.assert_allclose(n_epoch[present], row["n_epoch_model"]) + npt.assert_allclose(n_epoch[absent], [0.0, 0.0]) + + +def test_save_ngmix_data_matches_module_serialised_catalogue(tmp_path): + """End-to-end: make_cat reads a catalogue ngmix itself serialised. + + Rather than hand-rolling the FITS, drive ngmix's own + ``compile_results`` + ``save_results`` (the real write path), then read it + back through ``_save_ngmix_data``. This guards the make_cat reader against + drift in the ngmix key set: the keys must line up end to end. + """ + obj_ids = [5, 7] + results = [] + for oid in obj_ids: + row = _ngmix_row(oid) + per_type = { + "nfev": row["nfev_fit"], + "g": [row["g1"], row["g2"]], + "g_cov": np.diag([row["g1_err"] ** 2, row["g2_err"] ** 2]), + "T": row["T"], "T_err": row["T_err"], + "flux": row["flux"], "flux_err": row["flux_err"], + "s2n": row["s2n"], "flags": 0, + } + res = { + "obj_id": oid, + "n_epoch_model": row["n_epoch_model"], + "mcal_types_fail": row["mcal_types_fail"], + "mcal_flags": row["mcal_flags"], + } + for key in NGMIX_KEYS: + if key.endswith("_psf_orig") or key.endswith("_psf_reconv"): + res[key] = row[key] + res.update({name: dict(per_type) for name in SHEAR_EXTS_LOWER}) + results.append(res) + + ngmix_inst = object.__new__(Ngmix) + ngmix_inst._zero_point = 30.0 + ngmix_inst._output_dir = str(tmp_path) + ngmix_inst._file_number_string = "-0" + out_dict = ngmix_inst.compile_results(results) + ngmix_inst.save_results(out_dict) + + ngmix_path = ngmix_inst.get_output_path(str(tmp_path)) + out = _run_save_ngmix(ngmix_path, obj_ids) + + # Both PSF families survive the round trip, distinct, on every object. + npt.assert_allclose( + out["NGMIX_G1_PSF_ORIG_NOSHEAR"], [_ngmix_row(o)["g1_psf_orig"] for o in obj_ids] + ) + npt.assert_allclose( + out["NGMIX_G1_PSF_RECONV_NOSHEAR"], + [_ngmix_row(o)["g1_psf_reconv"] for o in obj_ids], + ) + assert not np.allclose( + out["NGMIX_G1_PSF_ORIG_NOSHEAR"], out["NGMIX_G1_PSF_RECONV_NOSHEAR"] + ) From 9ab43af0a73cfe224955dda0ddeca3569720d146 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Fri, 19 Jun 2026 23:49:10 +0200 Subject: [PATCH 06/33] ngmix: fit a copy of the original PSF so gal_obs.psf is never mutated (#749) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SCIENCE-CRITICAL. average_original_psf called psf_runner.go(gal_obs), and PSFRunner.go sets .gmix (and .meta['result']) on the observation it fits — here gal_obs.psf, the PSF metacal later deep-copies and consumes via boot.go(gal_obs_list). A stray gmix survives that copy and is reused as the MetacalFitGaussPSF fallback when its own admom+ML PSF fits both fail, so objects the base branch dropped (BootPSFFailure) silently survived on this branch, changing the galaxy/shear result set. A rename+add-column refactor must be bit-identical on galaxy results, so fit gal_obs.psf.copy() and read from the copy, leaving gal_obs pristine (verified: gal_obs.psf carries no gmix and no leaked meta['result'] afterward, matching base-branch state). Also in do_ngmix_metacal: - return a MetacalResult NamedTuple (resdict, reconv, orig) so the two PSF families are named not positional, guarding against a reconv/orig transposition; still a tuple, so positional unpacking is unchanged. - rename psfo_res -> psf_orig_res everywhere (no trailing-o shorthand). - soften the average_original_psf weighting note: same form as average_multiepoch_psf (weight.sum(), skip flags!=0) but the reconv path weights by the fixnoise-combined inverse variance of the noshear metacal image while the orig path uses the raw galaxy inverse variance. - note the r50_psf_orig/reconv columns are intermediate-only (make_cat reads T_psf_*/g, not r50, so they do not reach the final catalogue). Tests: a do_ngmix_metacal guard asserting gal_obs.psf carries no gmix after the original-PSF pre-fit; and, on an elliptical PSF stamp, T_psf_reconv > T_psf_orig and |g_psf_reconv| < |g_psf_orig| (true by construction — the reconvolution kernel is round and dilated), which catches a psf_res / psf_orig_res transposition. Co-Authored-By: Claude Opus 4.8 --- src/shapepipe/modules/ngmix_package/ngmix.py | 109 +++++++++++----- tests/module/test_ngmix.py | 129 ++++++++++++++++++- 2 files changed, 201 insertions(+), 37 deletions(-) diff --git a/src/shapepipe/modules/ngmix_package/ngmix.py b/src/shapepipe/modules/ngmix_package/ngmix.py index ced2d392..b50f94f9 100644 --- a/src/shapepipe/modules/ngmix_package/ngmix.py +++ b/src/shapepipe/modules/ngmix_package/ngmix.py @@ -8,6 +8,8 @@ import os import re +from typing import NamedTuple + import ngmix import galsim import numpy as np @@ -26,6 +28,30 @@ METACAL_TYPES = ('noshear', '1p', '1m', '2p', '2m') +class MetacalResult(NamedTuple): + """Return of :func:`do_ngmix_metacal`: the metacal fit plus both PSFs. + + A NamedTuple so the two PSF families are named, not positional — guarding + against a reconv/orig transposition at call sites. Still a plain tuple, so + ``resdict, psf_res, psf_orig_res = do_ngmix_metacal(...)`` keeps working. + + Attributes + ---------- + resdict : dict + MetacalBootstrapper result dict (one entry per metacal type). + reconv : dict + Averaged metacal *reconvolution*-kernel PSF (round, enlarged): + :func:`average_multiepoch_psf`. + orig : dict + Averaged *original* image-PSF (psfex/mccd, its true shape and size): + :func:`average_original_psf`. + """ + + resdict: dict + reconv: dict + orig: dict + + def get_mcal_flags(res): """Get Metacal Flags. @@ -405,7 +431,10 @@ def compile_results(self, results): 'mag_err', 'flags', 'mcal_flags', - # original image PSF (psfex/mccd), fit by average_original_psf + # original image PSF (psfex/mccd), fit by average_original_psf. + # The r50_psf_* columns here are intermediate-only: make_cat reads + # the T_psf_* (and g) columns, not r50, so these do not reach the + # final catalogue. 'g1_psf_orig', 'g2_psf_orig', 'g1_err_psf_orig', @@ -679,7 +708,7 @@ def process(self): if tile_cat.flux is not None else 1.0 ) - res, psf_res, psfo_res = do_ngmix_metacal( + res, psf_res, psf_orig_res = do_ngmix_metacal( stamp, prior, flux_guess, @@ -708,8 +737,8 @@ def process(self): res['mcal_flags'] = get_mcal_flags(res) # Two distinct PSF families (shapepipe#749), each with its own # ellipticity AND size, written under self-naming res-keys: - # reconvolution kernel (psf_res) -> *_psf_reconv - # original image PSF (psfo_res) -> *_psf_orig + # reconvolution kernel (psf_res) -> *_psf_reconv + # original image PSF (psf_orig_res) -> *_psf_orig # PSF half-light radius r50 = sqrt(2 ln 2) * sigma with # sigma = sqrt(T / 2); error from d sigma / d T = 1 / (4 sigma). def _psf_r50(T_psf, T_psf_err): @@ -730,14 +759,14 @@ def _psf_r50(T_psf, T_psf_err): res['r50_psf_reconv'], res['r50_err_psf_reconv'] = _psf_r50( psf_res['T_psf'], psf_res['T_psf_err'] ) - res['g1_psf_orig'] = psfo_res['g_psf'][0] - res['g2_psf_orig'] = psfo_res['g_psf'][1] - res['g1_err_psf_orig'] = psfo_res['g_psf_err'][0] - res['g2_err_psf_orig'] = psfo_res['g_psf_err'][1] - res['T_psf_orig'] = psfo_res['T_psf'] - res['T_err_psf_orig'] = psfo_res['T_psf_err'] + res['g1_psf_orig'] = psf_orig_res['g_psf'][0] + res['g2_psf_orig'] = psf_orig_res['g_psf'][1] + res['g1_err_psf_orig'] = psf_orig_res['g_psf_err'][0] + res['g2_err_psf_orig'] = psf_orig_res['g_psf_err'][1] + res['T_psf_orig'] = psf_orig_res['T_psf'] + res['T_err_psf_orig'] = psf_orig_res['T_psf_err'] res['r50_psf_orig'], res['r50_err_psf_orig'] = _psf_r50( - psfo_res['T_psf'], psfo_res['T_psf_err'] + psf_orig_res['T_psf'], psf_orig_res['T_psf_err'] ) final_res.append(res) n_fitted += 1 @@ -1254,16 +1283,31 @@ def average_original_psf(gal_obs_list, psf_runner): shape and size enter object-wise PSF-leakage diagnostics. Epochs are weighted by the *galaxy* inverse-variance weight - (``gal_obs.weight.sum()``), matching :func:`average_multiepoch_psf`, so - the two PSF families share an averaging scheme and differ only in which - PSF is fit. + (``gal_obs.weight.sum()``). This is the same *form* as + :func:`average_multiepoch_psf` (``weight.sum()`` per epoch, skipping + ``flags != 0`` fits) but not the identical weight: the reconvolution path + weights by the fixnoise-combined inverse variance of the noshear metacal + image, whereas this path uses the raw galaxy inverse variance. So the two + PSF families share an averaging *scheme* and differ in which PSF is fit + and in the precise per-epoch weighting factor. + + The fit runs on a *copy* of each PSF observation so ``gal_obs.psf`` — + the object metacal later deep-copies and consumes via + ``boot.go(gal_obs_list)`` — is never mutated. ``PSFRunner.go`` sets both + ``.meta['result']`` and, on success, the ``.gmix`` attribute of the + observation it fits; were that ``gal_obs.psf`` itself, the stray gmix + would survive metacal's deep copy and be reused as the + ``MetacalFitGaussPSF`` fallback when its own admom+ML PSF fits both fail, + silently rescuing objects the base branch dropped (``BootPSFFailure``) and + changing the galaxy/shear result set. Fitting a copy keeps this add-column + refactor bit-identical on the galaxy results. Parameters ---------- gal_obs_list : ngmix.observation.ObsList Per-epoch galaxy observations; each ``gal_obs.psf`` is the original (pre-metacal) PSF observation to fit, with no further ``.psf`` of - its own so the runner fits the stamp itself. + its own so the runner fits the stamp itself. Left pristine. psf_runner : ngmix.runners.PSFRunner The module's PSF runner, carrying the resolved ``psf_fit_prior``. @@ -1273,11 +1317,14 @@ def average_original_psf(gal_obs_list, psf_runner): Same keys as :func:`average_multiepoch_psf`. """ def fit(gal_obs): - # PSFRunner.go fits gal_obs.psf (the original PSF stamp) and sets - # gal_obs.psf.meta['result']; failed fits keep flags != 0 and are - # dropped by _average_psf_fits. - psf_runner.go(gal_obs) - return gal_obs.psf.meta['result'], gal_obs.weight.sum() + # Fit a COPY of the PSF observation: PSFRunner.go fits the PSF stamp + # and sets its .meta['result'] (and .gmix on success). Reading from + # the copy leaves gal_obs.psf pristine — no gmix to leak through + # metacal's deep copy into the MetacalFitGaussPSF fallback. Failed + # fits keep flags != 0 and are dropped by _average_psf_fits. + psf_obs = gal_obs.psf.copy() + psf_runner.go(psf_obs) + return psf_obs.meta['result'], gal_obs.weight.sum() return _average_psf_fits(fit(gal_obs) for gal_obs in gal_obs_list) @@ -1341,13 +1388,13 @@ def do_ngmix_metacal(stamp, prior, flux_guess, rng, centroid_source="hsm"): Returns ------- - tuple - ``(resdict, psf_res, psfo_res)`` where ``resdict`` is the - MetacalBootstrapper result dict, ``psf_res`` is the averaged metacal - *reconvolution*-kernel PSF dict (:func:`average_multiepoch_psf`), and - ``psfo_res`` is the averaged *original* image-PSF dict - (:func:`average_original_psf`). The two PSF dicts share keys but - describe different PSFs. + MetacalResult + Named 3-tuple ``(resdict, reconv, orig)``: the MetacalBootstrapper + result dict, the averaged metacal *reconvolution*-kernel PSF dict + (:func:`average_multiepoch_psf`), and the averaged *original* image-PSF + dict (:func:`average_original_psf`). The two PSF dicts share keys but + describe different PSFs; the named fields guard against transposing + them. Unpacks positionally as ``resdict, psf_res, psf_orig_res``. """ n_epoch = len(stamp.gals) if n_epoch == 0: @@ -1377,8 +1424,10 @@ def do_ngmix_metacal(stamp, prior, flux_guess, rng, centroid_source="hsm"): # the same psf_runner (so the same psf_fit_prior and centroid). This is # the PSF_ORIG family; metacal below produces the PSF_RECONV family. Run # first so the original PSF is fit on its own stamp, distinct from the - # round, enlarged kernel metacal convolves back in. - psfo_res = average_original_psf(gal_obs_list, psf_runner) + # round, enlarged kernel metacal convolves back in. average_original_psf + # fits a COPY of each gal_obs.psf, so gal_obs_list reaches boot.go below + # pristine — no stray gmix to leak into MetacalFitGaussPSF's fallback. + psf_orig_res = average_original_psf(gal_obs_list, psf_runner) metacal_pars = { 'types': ['noshear', '1p', '1m', '2p', '2m'], @@ -1397,4 +1446,4 @@ def do_ngmix_metacal(stamp, prior, flux_guess, rng, centroid_source="hsm"): ) resdict, obsdict = boot.go(gal_obs_list) psf_res = average_multiepoch_psf(obsdict) - return resdict, psf_res, psfo_res + return MetacalResult(resdict=resdict, reconv=psf_res, orig=psf_orig_res) diff --git a/tests/module/test_ngmix.py b/tests/module/test_ngmix.py index e035a0e8..157dc726 100644 --- a/tests/module/test_ngmix.py +++ b/tests/module/test_ngmix.py @@ -424,9 +424,16 @@ def test_average_original_psf_fits_each_gal_psf_with_galaxy_weight(): def gal_epoch(psf_result, weight_value): # gal_obs carries a .psf whose meta['result'] is what the runner - # would set; .weight is the galaxy inverse-variance map. + # would set; .weight is the galaxy inverse-variance map. The PSF + # observation must expose .copy() (average_original_psf fits a copy + # so gal_obs.psf is never mutated); the copy carries the same + # pre-populated meta so the averaging math is unchanged. + psf_obs = SimpleNamespace(meta={"result": psf_result}) + psf_obs.copy = lambda _self=psf_obs: SimpleNamespace( + meta=dict(_self.meta) + ) return SimpleNamespace( - psf=SimpleNamespace(meta={"result": psf_result}), + psf=psf_obs, weight=np.full((2, 2), weight_value), ) @@ -450,23 +457,131 @@ def gal_epoch(psf_result, weight_value): gal_epoch(good_b, 2.0), # galaxy weight-sum = 8 gal_epoch(failed, 4.0), # dropped on flags != 0 ] - psfo_res = average_original_psf(gal_obs_list, runner) + psf_orig_res = average_original_psf(gal_obs_list, runner) # the runner fit every epoch's PSF assert len(calls) == 3 # weighted over the two surviving epochs (weights 4 and 8) w = np.array([4.0, 8.0]) npt.assert_allclose( - psfo_res["g_psf"], + psf_orig_res["g_psf"], (good_a["g"] * w[0] + good_b["g"] * w[1]) / w.sum(), ) npt.assert_allclose( - psfo_res["T_psf"], + psf_orig_res["T_psf"], (good_a["T"] * w[0] + good_b["T"] * w[1]) / w.sum(), ) - assert psfo_res["n_epoch"] == 2 + assert psf_orig_res["n_epoch"] == 2 # original PSF is elliptical — not the round reconvolution kernel - assert abs(psfo_res["g_psf"][0]) > 1e-3 + assert abs(psf_orig_res["g_psf"][0]) > 1e-3 + + +def _do_ngmix_metacal_on_psf(psf_shear, seed=7, n_epochs=2): + """Run do_ngmix_metacal on a simulated stamp with a given PSF shape. + + Returns ``(resdict, psf_reconv, psf_orig, gal_obs_list)``, where the last + is the list metacal consumed — so a caller can assert that the original-PSF + pre-fit left it pristine. + """ + from shapepipe.modules.ngmix_package.ngmix import ( + Postage_stamp, + do_ngmix_metacal, + get_prior, + ) + from shapepipe.testing.simulate import make_data + + rng = np.random.RandomState(seed) + prior = get_prior(0.1857, rng) + gals, psfs, _, weights, flags, jacobs = make_data( + rng=np.random.RandomState(123), + shear=(0.0, 0.0), + noise=1e-5, + n_epochs=n_epochs, + img_size=51, + psf_shear=psf_shear, + ) + stamp = Postage_stamp(bkg_sub=False, megacam_flip=False) + stamp.gals, stamp.psfs, stamp.weights, stamp.flags, stamp.jacobs = ( + gals, + psfs, + weights, + flags, + jacobs, + ) + resdict, psf_reconv, psf_orig = do_ngmix_metacal(stamp, prior, 1.0, rng) + return resdict, psf_reconv, psf_orig + + +def test_original_psf_prefit_leaves_gal_obs_psf_pristine(): + """The original-PSF pre-fit must not mutate the PSF metacal consumes (#749). + + ``average_original_psf`` fits each epoch's psfex/mccd PSF with the shared + ``psf_runner``. ``PSFRunner.go`` sets ``.gmix`` (and ``.meta['result']``) + on the observation it fits; if that were ``gal_obs.psf`` itself, the gmix + would survive metacal's deep copy and be reused as the + ``MetacalFitGaussPSF`` fallback when admom+ML both fail — silently + rescuing objects the base branch dropped (``BootPSFFailure``). The fix + fits a *copy*, so ``gal_obs.psf`` carries NO gmix afterward, matching the + base-branch state that makes this add-column refactor bit-identical on the + galaxy results. + """ + from ngmix.observation import ObsList + from shapepipe.modules.ngmix_package.ngmix import ( + average_original_psf, + get_prior, + make_ngmix_observation, + make_runners, + ) + from shapepipe.testing.simulate import make_data + + rng = np.random.RandomState(7) + prior = get_prior(0.1857, rng) + gals, psfs, _, weights, flags, jacobs = make_data( + rng=np.random.RandomState(123), + shear=(0.0, 0.0), + noise=1e-5, + n_epochs=2, + img_size=51, + psf_shear=(0.05, -0.04), + ) + gal_obs_list = ObsList() + for n_e in range(2): + gal_obs_list.append( + make_ngmix_observation( + gals[n_e], weights[n_e], flags[n_e], psfs[n_e], jacobs[n_e], + rng, + ) + ) + + # Pre-state: a freshly built PSF observation carries no gmix. + assert not any(obs.psf.has_gmix() for obs in gal_obs_list) + + runner, psf_runner = make_runners(prior, 1.0, rng) + average_original_psf(gal_obs_list, psf_runner) + + # The pre-fit fit a copy, so the originals are untouched: no gmix and no + # leaked fit result in meta. + assert not any(obs.psf.has_gmix() for obs in gal_obs_list), ( + "original-PSF pre-fit leaked a gmix into gal_obs.psf" + ) + assert not any("result" in obs.psf.meta for obs in gal_obs_list), ( + "original-PSF pre-fit leaked a fit result into gal_obs.psf.meta" + ) + + +def test_reconv_psf_is_rounder_and_larger_than_orig_on_elliptical_psf(): + """Reconvolution kernel is round + dilated relative to the original PSF. + + On an ELLIPTICAL PSF stamp the metacal reconvolution kernel is round and + enlarged by construction, so ``T_psf_reconv > T_psf_orig`` and + ``|g_psf_reconv| < |g_psf_orig|``. This end-to-end guard catches a + ``psf_res`` / ``psf_orig_res`` transposition in :func:`do_ngmix_metacal` + (the two families share keys, so a swap is otherwise silent). + """ + _, psf_reconv, psf_orig = _do_ngmix_metacal_on_psf((0.05, -0.04)) + + assert psf_reconv["T_psf"] > psf_orig["T_psf"] + assert np.hypot(*psf_reconv["g_psf"]) < np.hypot(*psf_orig["g_psf"]) def test_weight_map_recovers_injected_inverse_variance(): From b690d3f41d3f4d59ff75bf445b83f6edaf02f8ce Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Sat, 20 Jun 2026 00:35:03 +0200 Subject: [PATCH 07/33] ngmix: drop dead r50 columns (derivable from T) r50 = SIGMA_TO_R50*sqrt(T/2) is a deterministic function of T, which is already exported; the r50_* keys were written to the intermediate ngmix catalogue and read by nobody (confirmed across shapepipe + sp_validation). Remove the computation, schema entries, and the local SIGMA_TO_R50 constant; downstream r50 is derivable via cs_util.size.T_to_r50. Co-Authored-By: Claude Opus 4.8 --- src/shapepipe/modules/ngmix_package/ngmix.py | 46 +----------- tests/module/test_make_cat.py | 10 +-- tests/module/test_ngmix.py | 78 ++++---------------- 3 files changed, 19 insertions(+), 115 deletions(-) diff --git a/src/shapepipe/modules/ngmix_package/ngmix.py b/src/shapepipe/modules/ngmix_package/ngmix.py index b50f94f9..da21af66 100644 --- a/src/shapepipe/modules/ngmix_package/ngmix.py +++ b/src/shapepipe/modules/ngmix_package/ngmix.py @@ -20,11 +20,6 @@ from shapepipe.pipeline import file_io -# Gaussian half-light radius per unit sigma: r50 = sqrt(2 ln 2) * sigma -# = 1.17741 * sigma. With the ngmix area parameter T = 2 sigma^2 this -# gives r50 = SIGMA_TO_R50 * sqrt(T / 2) = sqrt(ln 2 * T). -SIGMA_TO_R50 = np.sqrt(2.0 * np.log(2.0)) - METACAL_TYPES = ('noshear', '1p', '1m', '2p', '2m') @@ -386,7 +381,7 @@ def compile_results(self, results): Two PSF column families — each carrying ellipticity *and* size, for *different* PSFs (shapepipe#749): - * ``*_psf_orig`` (``g1``/``g2`` + ``*_err``, ``T``, ``r50``) — the + * ``*_psf_orig`` (``g1``/``g2`` + ``*_err``, ``T``) — the ORIGINAL image PSF (the psfex/mccd model stamp), fit before metacal reconvolution by :func:`average_original_psf`. This is the PSF whose true ellipticity and size enter object-wise @@ -422,8 +417,6 @@ def compile_results(self, results): 'g2_err', 'T', 'T_err', - 'r50', - 'r50_err', 'flux', 'flux_err', 's2n', @@ -431,18 +424,13 @@ def compile_results(self, results): 'mag_err', 'flags', 'mcal_flags', - # original image PSF (psfex/mccd), fit by average_original_psf. - # The r50_psf_* columns here are intermediate-only: make_cat reads - # the T_psf_* (and g) columns, not r50, so these do not reach the - # final catalogue. + # original image PSF (psfex/mccd), fit by average_original_psf 'g1_psf_orig', 'g2_psf_orig', 'g1_err_psf_orig', 'g2_err_psf_orig', 'T_psf_orig', 'T_err_psf_orig', - 'r50_psf_orig', - 'r50_err_psf_orig', # metacal reconvolution kernel, fit by average_multiepoch_psf 'g1_psf_reconv', 'g2_psf_reconv', @@ -450,8 +438,6 @@ def compile_results(self, results): 'g2_err_psf_reconv', 'T_psf_reconv', 'T_err_psf_reconv', - 'r50_psf_reconv', - 'r50_err_psf_reconv', ] output_dict = {k: {kk: [] for kk in names2} for k in names} for idx in range(len(results)): @@ -498,29 +484,18 @@ def compile_results(self, results): 'g1_psf_orig', 'g2_psf_orig', 'g1_err_psf_orig', 'g2_err_psf_orig', 'T_psf_orig', 'T_err_psf_orig', - 'r50_psf_orig', 'r50_err_psf_orig', 'g1_psf_reconv', 'g2_psf_reconv', 'g1_err_psf_reconv', 'g2_err_psf_reconv', 'T_psf_reconv', 'T_err_psf_reconv', - 'r50_psf_reconv', 'r50_err_psf_reconv', ): output_dict[name][psf_key].append(results[idx][psf_key]) - # Galaxy half-light radius from the fitted area T = 2 sigma^2: - # r50 = sqrt(ln 2 * T), with d r50 / d T = r50 / (2 T) - if T_gal > 0: - r50_gal = SIGMA_TO_R50 * np.sqrt(T_gal / 2) - r50_gal_err = r50_gal * T_gal_err / (2 * T_gal) - else: - r50_gal = r50_gal_err = np.nan output_dict[name]["g1"].append(g[0]) output_dict[name]["g2"].append(g[1]) output_dict[name]["g1_err"].append(np.sqrt(g_cov[0, 0])) output_dict[name]["g2_err"].append(np.sqrt(g_cov[1, 1])) output_dict[name]["T"].append(T_gal) output_dict[name]["T_err"].append(T_gal_err) - output_dict[name]['r50'].append(r50_gal) - output_dict[name]['r50_err'].append(r50_gal_err) output_dict[name]["flux"].append(flux) output_dict[name]["flux_err"].append(flux_err) output_dict[name]["mag"].append(mag) @@ -739,35 +714,18 @@ def process(self): # ellipticity AND size, written under self-naming res-keys: # reconvolution kernel (psf_res) -> *_psf_reconv # original image PSF (psf_orig_res) -> *_psf_orig - # PSF half-light radius r50 = sqrt(2 ln 2) * sigma with - # sigma = sqrt(T / 2); error from d sigma / d T = 1 / (4 sigma). - def _psf_r50(T_psf, T_psf_err): - sigma = np.sqrt(max(T_psf, 0) / 2) - r50 = SIGMA_TO_R50 * sigma - r50_err = ( - SIGMA_TO_R50 * T_psf_err / (4 * sigma) - if sigma > 0 else np.nan - ) - return r50, r50_err - res['g1_psf_reconv'] = psf_res['g_psf'][0] res['g2_psf_reconv'] = psf_res['g_psf'][1] res['g1_err_psf_reconv'] = psf_res['g_psf_err'][0] res['g2_err_psf_reconv'] = psf_res['g_psf_err'][1] res['T_psf_reconv'] = psf_res['T_psf'] res['T_err_psf_reconv'] = psf_res['T_psf_err'] - res['r50_psf_reconv'], res['r50_err_psf_reconv'] = _psf_r50( - psf_res['T_psf'], psf_res['T_psf_err'] - ) res['g1_psf_orig'] = psf_orig_res['g_psf'][0] res['g2_psf_orig'] = psf_orig_res['g_psf'][1] res['g1_err_psf_orig'] = psf_orig_res['g_psf_err'][0] res['g2_err_psf_orig'] = psf_orig_res['g_psf_err'][1] res['T_psf_orig'] = psf_orig_res['T_psf'] res['T_err_psf_orig'] = psf_orig_res['T_psf_err'] - res['r50_psf_orig'], res['r50_err_psf_orig'] = _psf_r50( - psf_orig_res['T_psf'], psf_orig_res['T_psf_err'] - ) final_res.append(res) n_fitted += 1 count_batch += 1 diff --git a/tests/module/test_make_cat.py b/tests/module/test_make_cat.py index c3728cef..b5077d3a 100644 --- a/tests/module/test_make_cat.py +++ b/tests/module/test_make_cat.py @@ -31,15 +31,15 @@ def info(self, *_args, **_kwargs): "mcal_types_fail", "nfev_fit", "g1", "g1_err", "g2", "g2_err", - "T", "T_err", "r50", "r50_err", + "T", "T_err", "flux", "flux_err", "s2n", "mag", "mag_err", "flags", "mcal_flags", "g1_psf_orig", "g2_psf_orig", "g1_err_psf_orig", "g2_err_psf_orig", - "T_psf_orig", "T_err_psf_orig", "r50_psf_orig", "r50_err_psf_orig", + "T_psf_orig", "T_err_psf_orig", "g1_psf_reconv", "g2_psf_reconv", "g1_err_psf_reconv", "g2_err_psf_reconv", - "T_psf_reconv", "T_err_psf_reconv", "r50_psf_reconv", "r50_err_psf_reconv", + "T_psf_reconv", "T_err_psf_reconv", ] SHEAR_EXTS = ["1M", "1P", "2M", "2P", "NOSHEAR"] @@ -59,7 +59,7 @@ def _ngmix_row(obj_id): "mcal_types_fail": 0, "nfev_fit": 7, "g1": 0.10, "g1_err": 0.011, "g2": -0.20, "g2_err": 0.022, - "T": 0.30, "T_err": 0.033, "r50": 0.40, "r50_err": 0.044, + "T": 0.30, "T_err": 0.033, "flux": 100.0, "flux_err": 1.0, "s2n": 55.0, "mag": 21.0, "mag_err": 0.05, "flags": 0, "mcal_flags": 0, @@ -67,12 +67,10 @@ def _ngmix_row(obj_id): "g1_psf_orig": 0.0041, "g2_psf_orig": -0.0031, "g1_err_psf_orig": 2e-5, "g2_err_psf_orig": 3e-5, "T_psf_orig": 0.071, "T_err_psf_orig": 8e-4, - "r50_psf_orig": 0.221, "r50_err_psf_orig": 1.2e-3, # metacal reconvolution kernel — round and enlarged, distinct values "g1_psf_reconv": 0.0012, "g2_psf_reconv": 0.0022, "g1_err_psf_reconv": 1e-5, "g2_err_psf_reconv": 1e-5, "T_psf_reconv": 0.092, "T_err_psf_reconv": 1e-3, - "r50_psf_reconv": 0.252, "r50_err_psf_reconv": 1.0e-3, } diff --git a/tests/module/test_ngmix.py b/tests/module/test_ngmix.py index 157dc726..9d0e640f 100644 --- a/tests/module/test_ngmix.py +++ b/tests/module/test_ngmix.py @@ -150,15 +150,6 @@ def test_metacal_is_reproducible_with_fixed_seed(): ORIG_PSF_T_ERR = 0.0008 -def _psf_r50_pair(T_psf, T_psf_err, sigma_to_r50): - """``(r50, r50_err)`` for a PSF size, mirroring the source conversion.""" - sigma = np.sqrt(max(T_psf, 0) / 2) - return ( - sigma_to_r50 * sigma, - sigma_to_r50 * T_psf_err / (4 * sigma) if sigma > 0 else np.nan, - ) - - def _fake_metacal_result(T, T_err, T_psf, T_psf_err): """Build one minimal metacal result as produced by the process loop. @@ -167,14 +158,7 @@ def _fake_metacal_result(T, T_err, T_psf, T_psf_err): ``*_psf_reconv`` (metacal reconvolution kernel). The reconvolution-kernel size is the builder's ``T_psf`` argument; the original-PSF size is the fixed ``ORIG_PSF_T``, deliberately different so the un-aliasing is tested. - PSF r50 = ``sqrt(2 ln 2) * sigma`` with ``sigma = sqrt(T / 2)``. """ - from shapepipe.modules.ngmix_package.ngmix import SIGMA_TO_R50 - - r50_reconv, r50_err_reconv = _psf_r50_pair(T_psf, T_psf_err, SIGMA_TO_R50) - r50_orig, r50_err_orig = _psf_r50_pair( - ORIG_PSF_T, ORIG_PSF_T_ERR, SIGMA_TO_R50 - ) per_type = { "nfev": 1, "g": [0.01, -0.02], @@ -197,8 +181,6 @@ def _fake_metacal_result(T, T_err, T_psf, T_psf_err): "g2_err_psf_orig": ORIG_PSF_G_ERR[1], "T_psf_orig": ORIG_PSF_T, "T_err_psf_orig": ORIG_PSF_T_ERR, - "r50_psf_orig": r50_orig, - "r50_err_psf_orig": r50_err_orig, # metacal reconvolution-kernel family "g1_psf_reconv": RECONV_PSF_G[0], "g2_psf_reconv": RECONV_PSF_G[1], @@ -206,8 +188,6 @@ def _fake_metacal_result(T, T_err, T_psf, T_psf_err): "g2_err_psf_reconv": RECONV_PSF_G_ERR[1], "T_psf_reconv": T_psf, "T_err_psf_reconv": T_psf_err, - "r50_psf_reconv": r50_reconv, - "r50_err_psf_reconv": r50_err_reconv, "mcal_flags": 0, } res.update( @@ -216,16 +196,16 @@ def _fake_metacal_result(T, T_err, T_psf, T_psf_err): return res -def test_compile_results_size_columns_are_half_light_radii(): - """Every r50 column is a true half-light radius, on every side. +def test_compile_results_size_columns_are_unaliased(): + """Each PSF family carries its OWN fitted area ``T``, un-aliased (#749). - Galaxy ``r50 = sqrt(ln 2 * T)`` (not the raw area ``pars[4]``) and both - PSF families ``r50 = sqrt(2 ln 2) * sigma`` (not bare sigma), in the same - convention so galaxy / PSF radii are commensurable. The reconvolution - kernel (``*_psf_reconv``) and the original image PSF (``*_psf_orig``) are - independent fits with their own, different sizes (shapepipe#749). + The galaxy area ``T`` is the fitted ``pars[4]``; the reconvolution kernel + (``*_psf_reconv``) and the original image PSF (``*_psf_orig``) are + independent fits with their own, different sizes. Half-light radius is no + longer carried in the catalogue — it is derivable from ``T`` downstream + via :func:`cs_util.size.T_to_r50` — so only the ``T`` columns are checked. """ - from shapepipe.modules.ngmix_package.ngmix import Ngmix, SIGMA_TO_R50 + from shapepipe.modules.ngmix_package.ngmix import Ngmix T, T_err = 0.18, 0.02 T_psf, T_psf_err = 0.09, 0.001 @@ -236,38 +216,18 @@ def test_compile_results_size_columns_are_half_light_radii(): noshear = out["noshear"] - # galaxy: r50 = sqrt(ln2 * T) = 1.17741 * sqrt(T / 2), error dT * r50/(2T) - r50_expected = np.sqrt(np.log(2) * T) - npt.assert_allclose(noshear["r50"], [r50_expected]) - npt.assert_allclose(noshear["r50_err"], [r50_expected * T_err / (2 * T)]) + # galaxy area is the fitted T + npt.assert_allclose(noshear["T"], [T]) + npt.assert_allclose(noshear["T_err"], [T_err]) - # reconvolution kernel: r50 = sqrt(2 ln2) * sigma, sigma = sqrt(T_psf / 2) - sigma_reconv = np.sqrt(T_psf / 2) - npt.assert_allclose(noshear["r50_psf_reconv"], [SIGMA_TO_R50 * sigma_reconv]) - npt.assert_allclose( - noshear["r50_err_psf_reconv"], - [SIGMA_TO_R50 * T_psf_err / (4 * sigma_reconv)], - ) + # reconvolution kernel npt.assert_allclose(noshear["T_psf_reconv"], [T_psf]) npt.assert_allclose(noshear["T_err_psf_reconv"], [T_psf_err]) - # galaxy/reconv-PSF r50 are commensurable: same convention on both sides - npt.assert_allclose( - np.array(noshear["r50"]) / np.array(noshear["r50_psf_reconv"]), - [np.sqrt(T / T_psf)], - ) - # original image PSF carries its OWN size, un-aliased from the kernel - sigma_orig = np.sqrt(ORIG_PSF_T / 2) npt.assert_allclose(noshear["T_psf_orig"], [ORIG_PSF_T]) npt.assert_allclose(noshear["T_err_psf_orig"], [ORIG_PSF_T_ERR]) - npt.assert_allclose(noshear["r50_psf_orig"], [SIGMA_TO_R50 * sigma_orig]) - npt.assert_allclose( - noshear["r50_err_psf_orig"], - [SIGMA_TO_R50 * ORIG_PSF_T_ERR / (4 * sigma_orig)], - ) assert noshear["T_psf_orig"] != noshear["T_psf_reconv"] - assert noshear["r50_psf_orig"] != noshear["r50_psf_reconv"] def test_compile_results_psf_families_are_unaliased(): @@ -330,7 +290,7 @@ def test_compile_results_nan_fills_failed_fit_types(): failed = out["1p"] for col in ( "g1", "g2", "g1_err", "g2_err", "T", "T_err", "flux", "flux_err", - "s2n", "mag", "mag_err", "r50", "r50_err", + "s2n", "mag", "mag_err", ): assert np.isnan(failed[col]).all(), col assert failed["flags"] == [0x8] @@ -359,18 +319,6 @@ def test_get_mcal_flags_ors_per_type_fit_flags(): assert get_mcal_flags(res) == 0xA -def test_compile_results_nonpositive_T_gives_nan_r50(): - """A non-positive fitted area cannot yield a real half-light radius.""" - from shapepipe.modules.ngmix_package.ngmix import Ngmix - - inst = object.__new__(Ngmix) - inst._zero_point = 30.0 - out = inst.compile_results([_fake_metacal_result(-0.05, 0.02, 0.09, 0.001)]) - - assert np.isnan(out["noshear"]["r50"]).all() - assert np.isnan(out["noshear"]["r50_err"]).all() - - def test_average_multiepoch_psf_skips_failed_psf_epochs(): """A failed-PSF epoch must be skipped, not KeyError the whole object. From 85db8c134e366df0d131583500e28db698aa498f Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Sat, 20 Jun 2026 00:35:03 +0200 Subject: [PATCH 08/33] tests: hypothesis property tests for the PSF column refactor Four property-test modules, each mutation-verified non-vacuous: - grammar: orig/reconv never crossed for any distinct inputs; every emitted column matches the NGMIX_[_ERR]_[_] grammar; every NGMIX token in final_cat.param is producible by the writer. - physics: T_reconv > T_orig and |g_reconv| <= |g_orig| over random elliptical PSFs (the dilation clause is the robust transposition catcher). - no-op: average_original_psf leaves gal_obs.psf pristine (no gmix leak), so the restored original-PSF fit cannot alter the galaxy/shear results. - averaging: epoch-weighted mean within per-epoch range, flagged epochs excluded, single survivor passes through, sentinel fills for absent objects. Co-Authored-By: Claude Opus 4.8 --- tests/module/test_psf_averaging_properties.py | 329 ++++++++++++++++ tests/module/test_psf_grammar_properties.py | 353 ++++++++++++++++++ tests/module/test_psf_noop_property.py | 180 +++++++++ tests/module/test_psf_physics_properties.py | 116 ++++++ 4 files changed, 978 insertions(+) create mode 100644 tests/module/test_psf_averaging_properties.py create mode 100644 tests/module/test_psf_grammar_properties.py create mode 100644 tests/module/test_psf_noop_property.py create mode 100644 tests/module/test_psf_physics_properties.py diff --git a/tests/module/test_psf_averaging_properties.py b/tests/module/test_psf_averaging_properties.py new file mode 100644 index 00000000..25c0fc77 --- /dev/null +++ b/tests/module/test_psf_averaging_properties.py @@ -0,0 +1,329 @@ +"""PROPERTY-BASED TESTS FOR PSF EPOCH-AVERAGING + make_cat SENTINELS. + +The two PSF families this module exports (the metacal reconvolution kernel via +``average_multiepoch_psf`` and the original image PSF via +``average_original_psf``) share a single averaging core, +:func:`shapepipe.modules.ngmix_package.ngmix._average_psf_fits`. These +hypothesis properties pin the contract of that core directly — a weighted mean +over the surviving (``flags == 0``) epochs — and the companion sentinel-fill +contract on the make_cat reader, which must leave an obj_id absent from the +ngmix catalogue at its type-specific sentinel. + +Strategies are kept in physically sensible ranges (positive epoch weights, +positive sizes ``T``, ``|g| < 1``) so every generated input is a valid PSF-fit +result the averaging core would actually be handed in production. +""" + +import os +import tempfile +from itertools import zip_longest + +import numpy as np +import numpy.testing as npt +from astropy.io import fits +from hypothesis import given, settings +from hypothesis import strategies as st + +from shapepipe.modules.make_cat_package.make_cat import SaveCatalogue +from shapepipe.modules.ngmix_package.ngmix import _average_psf_fits + + +# --------------------------------------------------------------------------- # +# Strategies for one valid per-epoch PSF-fit result. +# --------------------------------------------------------------------------- # + +# Bounded, non-degenerate floats: positive sizes, |g| < 1, strictly positive +# weights. min_magnitude keeps weights away from 0 so wsum can never vanish on +# the surviving epochs (the core raises ZeroDivisionError there by contract). +_g_component = st.floats(min_value=-0.9, max_value=0.9, allow_nan=False) +_positive_T = st.floats(min_value=1e-3, max_value=10.0, allow_nan=False) +_positive_err = st.floats(min_value=1e-6, max_value=1.0, allow_nan=False) +_weight = st.floats( + min_value=1e-3, max_value=1e3, allow_nan=False, allow_infinity=False +) + + +def _result(g1, g2, t, g1_err, g2_err, t_err, flags=0): + """One ngmix PSF-fit result dict, as the averaging core consumes it.""" + return { + "flags": flags, + "g": np.array([g1, g2]), + "g_err": np.array([g1_err, g2_err]), + "T": t, + "T_err": t_err, + } + + +@st.composite +def _good_epoch(draw): + """A surviving (flags == 0) epoch paired with its positive weight.""" + return ( + _result( + draw(_g_component), draw(_g_component), draw(_positive_T), + draw(_positive_err), draw(_positive_err), draw(_positive_err), + ), + draw(_weight), + ) + + +# --------------------------------------------------------------------------- # +# (a) weighted average lies within [min, max] of the per-epoch values. +# --------------------------------------------------------------------------- # + +@settings(deadline=None, max_examples=50) +@given(st.lists(_good_epoch(), min_size=1, max_size=8)) +def test_average_lies_within_per_epoch_range(epochs): + """A positive-weight weighted mean is a convex combination of its inputs. + + For every averaged component (g1, g2, T) the result must sit within the + [min, max] envelope of the per-epoch values. This is the defining property + of a weighted mean with strictly positive weights; it fails immediately if + the core ever divided by the wrong weight sum, summed unweighted, or let a + value escape the convex hull. + """ + out = _average_psf_fits(epochs) + + g1_vals = np.array([r["g"][0] for r, _ in epochs]) + g2_vals = np.array([r["g"][1] for r, _ in epochs]) + t_vals = np.array([r["T"] for r, _ in epochs]) + + # rtol absorbs only floating-point round-off, not a real bound violation. + npt.assert_array_less(out["g_psf"][0], g1_vals.max() + 1e-9) + npt.assert_array_less(g1_vals.min() - 1e-9, out["g_psf"][0]) + npt.assert_array_less(out["g_psf"][1], g2_vals.max() + 1e-9) + npt.assert_array_less(g2_vals.min() - 1e-9, out["g_psf"][1]) + npt.assert_array_less(out["T_psf"], t_vals.max() + 1e-9) + npt.assert_array_less(t_vals.min() - 1e-9, out["T_psf"]) + + assert out["n_epoch"] == len(epochs) + + +@settings(deadline=None, max_examples=50) +@given(st.lists(_good_epoch(), min_size=1, max_size=8)) +def test_average_matches_explicit_weighted_mean(epochs): + """The core's output equals the textbook weighted mean of the survivors. + + Stronger than the [min, max] envelope: pins the exact value, so a swap of + weighting factor or an off-by-one in the accumulation is caught. + """ + out = _average_psf_fits(epochs) + w = np.array([weight for _, weight in epochs]) + + g = np.array([r["g"] for r, _ in epochs]) + t = np.array([r["T"] for r, _ in epochs]) + npt.assert_allclose(out["g_psf"], (g * w[:, None]).sum(0) / w.sum()) + npt.assert_allclose(out["T_psf"], (t * w).sum() / w.sum()) + + +# --------------------------------------------------------------------------- # +# (b) epochs with flags != 0 are excluded from the average. +# --------------------------------------------------------------------------- # + +@settings(deadline=None, max_examples=50) +@given( + st.lists(_good_epoch(), min_size=1, max_size=6), + st.lists( + st.tuples(_weight, st.integers(min_value=1, max_value=255)), + min_size=1, + max_size=6, + ), +) +def test_flagged_epochs_are_excluded(good_epochs, flagged_specs): + """A failed-PSF epoch (flags != 0) must not enter the average at all. + + Each flagged epoch carries poisoned NaN measurement fields and a huge, + out-of-range T — values that would wreck the mean (NaN-poison it, or shove + it far outside the good-epoch envelope) if they leaked in. The averaged + result and n_epoch must match a clean average over the good epochs only, + proving the flagged ones were dropped, not merely down-weighted. + """ + flagged = [ + ( + _result( + np.nan, np.nan, 1e6, np.nan, np.nan, np.nan, flags=flags + ), + weight, + ) + for weight, flags in flagged_specs + ] + # Interleave good and flagged epochs WITHOUT duplicating any good epoch, so + # exclusion can't be a happy accident of ordering. itertools.zip_longest + # threads them together; the leftover tail of the longer list follows. + mixed = [ + e + for pair in zip_longest(good_epochs, flagged) + for e in pair + if e is not None + ] + + out = _average_psf_fits(mixed) + expected = _average_psf_fits(good_epochs) + + npt.assert_allclose(out["g_psf"], expected["g_psf"]) + npt.assert_allclose(out["T_psf"], expected["T_psf"]) + npt.assert_allclose(out["T_psf_err"], expected["T_psf_err"]) + assert out["n_epoch"] == len(good_epochs) + assert np.isfinite(out["g_psf"]).all() and np.isfinite(out["T_psf"]) + + +# --------------------------------------------------------------------------- # +# (c) a single surviving epoch returns that epoch's value. +# --------------------------------------------------------------------------- # + +@settings(deadline=None, max_examples=50) +@given( + _good_epoch(), + st.lists( + st.tuples(_weight, st.integers(min_value=1, max_value=255)), + max_size=5, + ), +) +def test_single_survivor_returns_its_own_value(survivor, flagged_specs): + """When exactly one epoch survives, its values pass through untouched. + + The lone survivor's weight cancels in mean = (v*w)/w, so the result must be + the survivor's value exactly, regardless of how many flagged epochs (with + arbitrary weights) surround it. n_epoch must be 1. + """ + result, weight = survivor + flagged = [ + (_result(np.nan, np.nan, 1e6, np.nan, np.nan, np.nan, flags=f), w) + for w, f in flagged_specs + ] + out = _average_psf_fits(flagged + [survivor] + flagged) + + npt.assert_allclose(out["g_psf"], result["g"]) + npt.assert_allclose(out["g_psf_err"], result["g_err"]) + npt.assert_allclose(out["T_psf"], result["T"]) + npt.assert_allclose(out["T_psf_err"], result["T_err"]) + assert out["n_epoch"] == 1 + + +# --------------------------------------------------------------------------- # +# (d) make_cat: an obj_id absent from the ngmix cat keeps its sentinel fill. +# --------------------------------------------------------------------------- # + +# The sentinel value each column family is pre-filled with before the matched +# rows are overwritten (mirrors make_cat._save_ngmix_data). The property: a row +# whose obj_id never appears among the ngmix ids keeps exactly these. +_SENTINELS = { + "NGMIX_T_GAL_NOSHEAR": 0.0, + "NGMIX_SNR_GAL_NOSHEAR": 0.0, + "NGMIX_FLAGS_GAL_NOSHEAR": 0.0, + "NGMIX_T_PSF_ORIG_NOSHEAR": 0.0, + "NGMIX_T_PSF_RECONV_NOSHEAR": 0.0, + "NGMIX_FLUX_ERR_GAL_NOSHEAR": -1.0, + "NGMIX_MAG_ERR_GAL_NOSHEAR": -1.0, + "NGMIX_G1_GAL_NOSHEAR": -10.0, + "NGMIX_G2_GAL_NOSHEAR": -10.0, + "NGMIX_G1_PSF_ORIG_NOSHEAR": -10.0, + "NGMIX_G1_PSF_RECONV_NOSHEAR": -10.0, + "NGMIX_T_ERR_GAL_NOSHEAR": 1e30, + "NGMIX_T_ERR_PSF_ORIG_NOSHEAR": 1e30, + "NGMIX_T_ERR_PSF_RECONV_NOSHEAR": 1e30, + "NGMIX_N_EPOCH": 0.0, + "NGMIX_MCAL_FLAGS": 0.0, + "NGMIX_MCAL_TYPES_FAIL": 0.0, +} + +# Per-key write format and a measured value distinct from every sentinel, so a +# matched row is unmistakably "overwritten" and an absent row unmistakably not. +_NGMIX_KEYS = [ + "id", "n_epoch_model", "mcal_types_fail", "nfev_fit", + "g1", "g1_err", "g2", "g2_err", "T", "T_err", + "flux", "flux_err", "s2n", "mag", "mag_err", "flags", "mcal_flags", + "g1_psf_orig", "g2_psf_orig", "g1_err_psf_orig", "g2_err_psf_orig", + "T_psf_orig", "T_err_psf_orig", + "g1_psf_reconv", "g2_psf_reconv", "g1_err_psf_reconv", "g2_err_psf_reconv", + "T_psf_reconv", "T_err_psf_reconv", +] +_INT_KEYS = { + "id", "n_epoch_model", "mcal_types_fail", "nfev_fit", "flags", "mcal_flags" +} +_SHEAR_EXTS = ["1M", "1P", "2M", "2P", "NOSHEAR"] + + +class _NullLogger: + def info(self, *_args, **_kwargs): + pass + + +def _measured_row(obj_id): + """One fit object whose every value is far from any sentinel (5 / 0.5).""" + row = {key: (5 if key in _INT_KEYS else 0.5) for key in _NGMIX_KEYS} + row["id"] = obj_id + return row + + +def _write_ngmix_cat(path, obj_ids): + rows = [_measured_row(oid) for oid in obj_ids] + hdus = [fits.PrimaryHDU()] + for ext in _SHEAR_EXTS: + cols = [ + fits.Column( + name=key, + format="K" if key in _INT_KEYS else "D", + array=np.array([row[key] for row in rows]), + ) + for key in _NGMIX_KEYS + ] + hdus.append(fits.BinTableHDU.from_columns(cols, name=ext)) + fits.HDUList(hdus).writeto(path, overwrite=True) + + +def _run_save_ngmix(ngmix_path, obj_id): + inst = object.__new__(SaveCatalogue) + inst._obj_id = np.asarray(obj_id) + inst._output_dict = {} + inst._cat_size_target = len(inst._obj_id) + inst._w_log = _NullLogger() + err_msg = inst._save_ngmix_data(str(ngmix_path)) + assert err_msg is None + return inst._output_dict + + +# Distinct positive integer obj_ids; split into "fit by ngmix" vs "absent". +_distinct_ids = st.lists( + st.integers(min_value=1, max_value=10_000), + min_size=2, + max_size=8, + unique=True, +) + + +@settings(deadline=None, max_examples=25) +@given(_distinct_ids, st.data()) +def test_absent_obj_id_keeps_sentinel_fill(all_ids, data): + """An obj_id SExtractor saw but ngmix never fit keeps every sentinel. + + The final catalogue carries all of ``all_ids``; ngmix fit only a non-empty + proper subset. The unfit rows must retain the exact per-column sentinel + (0 / -10 / 1e30 / -1), while the fit rows must have been overwritten to the + measured value — so this is not vacuously satisfied by an all-sentinel cat. + """ + fit_ids = data.draw( + st.lists(st.sampled_from(all_ids), min_size=1, unique=True).filter( + lambda s: 0 < len(s) < len(all_ids) + ) + ) + absent_ids = [oid for oid in all_ids if oid not in fit_ids] + + with tempfile.TemporaryDirectory() as tmp: + ngmix_path = os.path.join(tmp, "ngmix.fits") + _write_ngmix_cat(ngmix_path, fit_ids) + out = _run_save_ngmix(ngmix_path, all_ids) + + absent_idx = [all_ids.index(oid) for oid in absent_ids] + fit_idx = [all_ids.index(oid) for oid in fit_ids] + + for col, sentinel in _SENTINELS.items(): + arr = np.asarray(out[col]) + npt.assert_allclose( + arr[absent_idx], sentinel, + err_msg=f"{col}: absent rows lost their sentinel {sentinel}", + ) + # The fit rows were overwritten — measured value 5 (int cols) or 0.5, + # both distinct from every sentinel, so the fill wasn't global. + assert not np.any(np.isclose(arr[fit_idx], sentinel)), ( + f"{col}: a fit row still carries the sentinel {sentinel}" + ) diff --git a/tests/module/test_psf_grammar_properties.py b/tests/module/test_psf_grammar_properties.py new file mode 100644 index 00000000..5123d7f3 --- /dev/null +++ b/tests/module/test_psf_grammar_properties.py @@ -0,0 +1,353 @@ +"""PROPERTY-BASED TESTS FOR THE NGMIX PSF COLUMN GRAMMAR. + +Drives ``SaveCatalogue._save_ngmix_data`` (the make_cat writer) under +hypothesis to pin three invariants of the renamed grammar +``NGMIX[m]_[_ERR]_[_]`` (shapepipe#749), where the +ORIGINAL image PSF (``PSF_ORIG``) and the metacal RECONVOLUTION kernel +(``PSF_RECONV``) are independent fits of *different* PSFs: + +(a) un-aliasing holds for ALL inputs: for arbitrary distinct values fed as the + orig vs reconv ngmix source columns, every emitted ``*_PSF_ORIG_*`` column + traces to its orig source and every ``*_PSF_RECONV_*`` to its reconv + source, and the two families are never crossed; +(b) every column name the writer emits matches the grammar regex; +(c) every ``NGMIX_*`` token the shipped param file + (``example/cfis/final_cat.param``) names is a column the writer can + produce — writer/param-file consistency. + +Setup mirrors ``tests/module/test_make_cat.py``: a synthetic ngmix FITS with +the five shear-type extensions carrying the exact ``compile_results`` key set, +read back through ``_save_ngmix_data``. +""" + +import re +from pathlib import Path + +import numpy as np +import numpy.testing as npt +from astropy.io import fits +from hypothesis import given, settings +from hypothesis import strategies as st + +from shapepipe.modules.make_cat_package.make_cat import SaveCatalogue + + +# --------------------------------------------------------------------------- +# Harness (mirrors tests/module/test_make_cat.py) +# --------------------------------------------------------------------------- + +class _NullLogger: + def info(self, *_args, **_kwargs): + pass + + +SHEAR_EXTS = ["1M", "1P", "2M", "2P", "NOSHEAR"] + +# The full per-object key set ngmix.compile_results emits into every +# shear-type extension. Integer-typed keys carry FITS format "K", the rest +# "D"; everything else mirrors test_make_cat._ngmix_row. +INT_KEYS = { + "id", "n_epoch_model", "mcal_types_fail", "nfev_fit", "flags", "mcal_flags", +} +NGMIX_KEYS = [ + "id", "n_epoch_model", "mcal_types_fail", "nfev_fit", + "g1", "g1_err", "g2", "g2_err", + "T", "T_err", + "flux", "flux_err", "s2n", "mag", "mag_err", + "flags", "mcal_flags", + "g1_psf_orig", "g2_psf_orig", + "g1_err_psf_orig", "g2_err_psf_orig", + "T_psf_orig", "T_err_psf_orig", + "g1_psf_reconv", "g2_psf_reconv", + "g1_err_psf_reconv", "g2_err_psf_reconv", + "T_psf_reconv", "T_err_psf_reconv", +] + +# The six orig-PSF source keys and the six reconv-PSF source keys, paired by +# the output column they feed. Each pair is (output-column infix, source key); +# the infix slots into f"NGMIX_{infix}_{shear}". Holding this mapping +# explicitly is what makes the un-aliasing property check each column against +# its OWN source rather than against a single family-wide value, so a within- +# family swap (e.g. G1 <-> T, or value <-> error) is caught too. +ORIG_COLS = { + "G1_PSF_ORIG": "g1_psf_orig", + "G2_PSF_ORIG": "g2_psf_orig", + "G1_ERR_PSF_ORIG": "g1_err_psf_orig", + "G2_ERR_PSF_ORIG": "g2_err_psf_orig", + "T_PSF_ORIG": "T_psf_orig", + "T_ERR_PSF_ORIG": "T_err_psf_orig", +} +RECONV_COLS = { + "G1_PSF_RECONV": "g1_psf_reconv", + "G2_PSF_RECONV": "g2_psf_reconv", + "G1_ERR_PSF_RECONV": "g1_err_psf_reconv", + "G2_ERR_PSF_RECONV": "g2_err_psf_reconv", + "T_PSF_RECONV": "T_psf_reconv", + "T_ERR_PSF_RECONV": "T_err_psf_reconv", +} + + +def _base_row(obj_id): + """One object's per-key values; PSF keys overwritten by the caller.""" + return { + "id": obj_id, + "n_epoch_model": 3, "mcal_types_fail": 0, "nfev_fit": 7, + "g1": 0.10, "g1_err": 0.011, "g2": -0.20, "g2_err": 0.022, + "T": 0.30, "T_err": 0.033, + "flux": 100.0, "flux_err": 1.0, "s2n": 55.0, + "mag": 21.0, "mag_err": 0.05, + "flags": 0, "mcal_flags": 0, + "g1_psf_orig": 0.0041, "g2_psf_orig": -0.0031, + "g1_err_psf_orig": 2e-5, "g2_err_psf_orig": 3e-5, + "T_psf_orig": 0.071, "T_err_psf_orig": 8e-4, + "g1_psf_reconv": 0.0012, "g2_psf_reconv": 0.0022, + "g1_err_psf_reconv": 1e-5, "g2_err_psf_reconv": 1e-5, + "T_psf_reconv": 0.092, "T_err_psf_reconv": 1e-3, + } + + +def _write_ngmix_cat(path, rows): + """Write a synthetic ngmix FITS with the five shear-type extensions. + + Each extension carries the exact compile_results key set; PSF families are + object-level, so the same per-object values are written across extensions. + """ + hdus = [fits.PrimaryHDU()] + for ext in SHEAR_EXTS: + cols = [ + fits.Column( + name=key, + format="K" if key in INT_KEYS else "D", + array=np.array([row[key] for row in rows]), + ) + for key in NGMIX_KEYS + ] + hdus.append(fits.BinTableHDU.from_columns(cols, name=ext)) + fits.HDUList(hdus).writeto(path, overwrite=True) + + +def _run_save_ngmix(ngmix_path, obj_ids, cat_size_target=None): + """Drive _save_ngmix_data and return its populated output dict.""" + inst = object.__new__(SaveCatalogue) + inst._obj_id = np.asarray(obj_ids) + inst._output_dict = {} + inst._cat_size_target = ( + len(inst._obj_id) if cat_size_target is None else cat_size_target + ) + inst._w_log = _NullLogger() + + err_msg = inst._save_ngmix_data(str(ngmix_path)) + assert err_msg is None + return inst._output_dict + + +# --------------------------------------------------------------------------- +# Grammar regex + producible-column model +# --------------------------------------------------------------------------- + +# NGMIX[m]_[_ERR]__, plus the three per-object +# metadata columns that carry neither OBJECT nor SHEAR. Anchored so a stray +# legacy token (_PSFo, _Tpsf, NGMIX_ELL_*) fails to match. +_COMPONENT = "G1|G2|T|SNR|FLUX|MAG|FLAGS" +_OBJECT = "GAL|PSF_ORIG|PSF_RECONV" +_SHEAR = "NOSHEAR|1P|1M|2P|2M" +GRAMMAR_RE = re.compile( + rf"^NGMIXm?_(?:{_COMPONENT})(?:_ERR)?_(?:{_OBJECT})_(?:{_SHEAR})$" + rf"|^NGMIXm?_(?:MCAL_FLAGS|MCAL_TYPES_FAIL|N_EPOCH)$" +) + +# example/cfis/final_cat.param lives two levels up from tests/module/. +PARAM_PATH = ( + Path(__file__).resolve().parents[2] / "example" / "cfis" / "final_cat.param" +) + +# The one param-file NGMIX token outside the _save_ngmix_data grammar: the +# moments-failure flag, set by a different code path (not the metacal writer). +# Named explicitly so the consistency check stays honest — it is excluded by +# name, not silently dropped, and the test asserts it really is absent from the +# producible set so this carve-out can never mask a real grammar regression. +NON_WRITER_PARAM_TOKENS = {"NGMIX_MOM_FAIL"} + + +def _param_ngmix_tokens(): + """Every distinct NGMIX_* token named in the shipped param file.""" + text = PARAM_PATH.read_text() + return set(re.findall(r"\bNGMIX[A-Za-z0-9_]*", text)) + + +def _produced_columns(obj_ids): + """The set of column names the writer emits for these obj_ids.""" + rows = [_base_row(oid) for oid in obj_ids] + import tempfile + with tempfile.NamedTemporaryFile(suffix=".fits", delete=False) as fh: + path = fh.name + _write_ngmix_cat(path, rows) + out = _run_save_ngmix(path, obj_ids) + Path(path).unlink() + return set(out) + + +# --------------------------------------------------------------------------- +# Strategies (physically-sensible ranges so generated inputs are valid) +# --------------------------------------------------------------------------- + +# Distinct, finite object NUMBERs. +obj_ids_strategy = st.lists( + st.integers(min_value=1, max_value=10_000), + min_size=1, max_size=5, unique=True, +) + +# A finite, well-separated value: ellipticity-/size-scale magnitudes the writer +# copies verbatim. Bounded away from zero and from each other (see below). +_finite = st.floats( + min_value=-1.0, max_value=1.0, + allow_nan=False, allow_infinity=False, +).filter(lambda x: abs(x) > 1e-6) + + +# --------------------------------------------------------------------------- +# (a) Un-aliasing for ALL inputs +# --------------------------------------------------------------------------- + +@settings(deadline=None, max_examples=25) +@given( + orig_seed=_finite, + reconv_seed=_finite, + obj_ids=obj_ids_strategy, +) +def test_psf_orig_reconv_unaliased_for_all_distinct_sources( + orig_seed, reconv_seed, obj_ids, tmp_path_factory +): + """Orig/reconv PSF columns trace to their OWN source, never crossed (#749). + + For arbitrary distinct seeds, fill the six orig-PSF source keys with values + derived from ``orig_seed`` and the six reconv-PSF source keys from + ``reconv_seed`` — each of the twelve keys a DISTINCT value (seed times a + per-key multiplier). Then assert every emitted ``*_PSF_ORIG_*`` / + ``*_PSF_RECONV_*`` column equals the value of the exact source key it must + read. A family swap (orig<->reconv) OR a within-family swap (G1<->T, + value<->error) breaks at least one equality, so the property is non-vacuous. + """ + # Multipliers so each of the six keys per family gets its own value; the + # two families also never collide because orig_seed != reconv_seed is + # enforced below and the multiplier set is shared. + mult = { + "g1": 1.0, "g2": 1.3, "g1_err": 1.7, "g2_err": 2.1, + "T": 2.6, "T_err": 3.1, + } + # Guard against orig_seed and reconv_seed mapping any key to equal values + # (would make a cross undetectable for that key). assume() would discard; + # instead nudge reconv so the families are provably disjoint per key. + if abs(orig_seed - reconv_seed) < 1e-3: + reconv_seed = reconv_seed + (0.5 if reconv_seed < 0 else -0.5) + + def _src_val(suffix, family_seed): + # suffix e.g. "g1", "g1_err", "T"; matched to a multiplier by stripping + # the "_psf_orig"/"_psf_reconv" tail the caller already removed. + return family_seed * mult[suffix] + + rows = [] + for oid in obj_ids: + row = _base_row(oid) + for col_infix, src_key in ORIG_COLS.items(): + suffix = src_key.replace("_psf_orig", "") + row[src_key] = _src_val(suffix, orig_seed) + for col_infix, src_key in RECONV_COLS.items(): + suffix = src_key.replace("_psf_reconv", "") + row[src_key] = _src_val(suffix, reconv_seed) + rows.append(row) + + ngmix_path = tmp_path_factory.mktemp("ng") / "ngmix.fits" + _write_ngmix_cat(ngmix_path, rows) + out = _run_save_ngmix(ngmix_path, obj_ids) + + n = len(obj_ids) + for shear in SHEAR_EXTS: + for col_infix, src_key in ORIG_COLS.items(): + suffix = src_key.replace("_psf_orig", "") + npt.assert_allclose( + out[f"NGMIX_{col_infix}_{shear}"], + [_src_val(suffix, orig_seed)] * n, + err_msg=f"NGMIX_{col_infix}_{shear} not from {src_key}", + ) + for col_infix, src_key in RECONV_COLS.items(): + suffix = src_key.replace("_psf_reconv", "") + npt.assert_allclose( + out[f"NGMIX_{col_infix}_{shear}"], + [_src_val(suffix, reconv_seed)] * n, + err_msg=f"NGMIX_{col_infix}_{shear} not from {src_key}", + ) + # The two families are genuinely distinct everywhere — the un-aliasing. + assert not np.allclose( + out[f"NGMIX_G1_PSF_ORIG_{shear}"], + out[f"NGMIX_G1_PSF_RECONV_{shear}"], + ) + assert not np.allclose( + out[f"NGMIX_T_PSF_ORIG_{shear}"], + out[f"NGMIX_T_PSF_RECONV_{shear}"], + ) + + +# --------------------------------------------------------------------------- +# (b) Every emitted column name matches the grammar +# --------------------------------------------------------------------------- + +@settings(deadline=None, max_examples=25) +@given(obj_ids=obj_ids_strategy) +def test_emitted_column_names_match_grammar(obj_ids, tmp_path_factory): + """Every column _save_ngmix_data emits matches the grammar regex. + + The grammar is ``NGMIX[m]_[_ERR]__`` plus the + three metadata columns. The regex is anchored, so a legacy token (``_PSFo``, + ``_Tpsf``, ``NGMIX_ELL_*``) or a malformed name would fail to match. Run + over hypothesis-varied object sets to confirm the emitted name set is + input-independent and always well-formed. + """ + rows = [_base_row(oid) for oid in obj_ids] + ngmix_path = tmp_path_factory.mktemp("ng") / "ngmix.fits" + _write_ngmix_cat(ngmix_path, rows) + out = _run_save_ngmix(ngmix_path, obj_ids) + + assert out, "writer produced no columns" + for col in out: + assert GRAMMAR_RE.match(col), f"off-grammar column emitted: {col!r}" + + +# --------------------------------------------------------------------------- +# (c) Param-file tokens are all producible by the writer +# --------------------------------------------------------------------------- + +@settings(deadline=None, max_examples=15) +@given(obj_ids=obj_ids_strategy) +def test_param_file_ngmix_tokens_are_producible(obj_ids): + """Every NGMIX_* token the param file names is a column the writer produces. + + ``example/cfis/final_cat.param`` is the consumer contract for the final + catalogue; ``create_final_cat`` keeps only the listed columns, so a token it + names that the writer cannot emit is a silent, empty column downstream. The + one known exception — ``NGMIX_MOM_FAIL``, the moments-failure flag set by a + different path — is excluded BY NAME, and we assert it is genuinely outside + the producible set so the carve-out cannot hide a real grammar regression. + + Driven over varied object sets to confirm the producible set is stable + across inputs (the contract is structural, not data-dependent). + """ + produced = _produced_columns(obj_ids) + param_tokens = _param_ngmix_tokens() + + # The carve-out is real: the excluded token is NOT producible. (If a future + # change made the writer emit it, this fails and we drop the exclusion.) + for tok in NON_WRITER_PARAM_TOKENS: + assert tok not in produced, ( + f"{tok} is now producible — remove it from NON_WRITER_PARAM_TOKENS" + ) + + missing = (param_tokens - NON_WRITER_PARAM_TOKENS) - produced + assert not missing, ( + f"param file names NGMIX columns the writer cannot produce: " + f"{sorted(missing)}" + ) + + # Non-vacuous: the writer DOES cover real grammar tokens from the param + # file (so the assertion above is exercised against a non-empty set). + assert (param_tokens - NON_WRITER_PARAM_TOKENS) & produced diff --git a/tests/module/test_psf_noop_property.py b/tests/module/test_psf_noop_property.py new file mode 100644 index 00000000..003efcaf --- /dev/null +++ b/tests/module/test_psf_noop_property.py @@ -0,0 +1,180 @@ +"""PROPERTY-BASED TESTS: the original-PSF pre-fit is a science no-op. + +``average_original_psf`` fits each epoch's psfex/mccd PSF stamp +(``gal_obs.psf``) with the shared ``psf_runner`` to populate the independent +original-PSF columns (``NGMIX_*_PSF_ORIG``; shapepipe#749). It fits a *copy* of +each PSF observation, so the observation metacal later deep-copies and consumes +(``boot.go(gal_obs_list)``) is left untouched. + +This is the invariant that makes the add-column refactor bit-identical to a +no-original-fit branch on the galaxy/shear results: if the pre-fit cannot alter +``gal_obs.psf``, it cannot alter what metacal consumes, so the galaxy results +are unchanged. ``PSFRunner.go`` sets ``.gmix`` (and ``.meta['result']``) on the +observation it fits; were that ``gal_obs.psf`` itself, a stray gmix would +survive metacal's deep copy and be reused as the ``MetacalFitGaussPSF`` fallback +when admom+ML both fail, silently rescuing objects the base branch dropped +(``BootPSFFailure``). + +The unit guard ``test_original_psf_prefit_leaves_gal_obs_psf_pristine`` in +``tests/module/test_ngmix.py`` pins this on one fixed input; here it is +generalised over varied PSF shapes, noise, epoch counts, stamp sizes, and RNG +seeds with hypothesis, so the no-op holds across the input space rather than at +a single point. The harness (``make_data`` -> ``make_ngmix_observation`` -> +``make_runners`` -> ``average_original_psf``) mirrors that unit test exactly. +""" + +import copy + +from hypothesis import given, settings +from hypothesis import strategies as st +import numpy as np +import numpy.testing as npt +from ngmix.observation import ObsList + +from shapepipe.modules.ngmix_package.ngmix import ( + average_original_psf, + get_prior, + make_ngmix_observation, + make_runners, +) +from shapepipe.testing.simulate import make_data + +PIXEL_SCALE = 0.1857 + + +def _jacobian_snapshot(jacobian): + """The six floats fully defining an ngmix Jacobian (affine + origin).""" + return ( + jacobian.dvdrow, jacobian.dvdcol, + jacobian.dudrow, jacobian.dudcol, + jacobian.row0, jacobian.col0, + ) + + +def _build_gal_obs_list(psf_shear, noise, n_epochs, img_size, data_seed, obs_seed): + """Build the per-epoch galaxy ObsList metacal would consume. + + Mirrors the construction in + ``test_original_psf_prefit_leaves_gal_obs_psf_pristine``: simulate epochs + with ``make_data`` (here over hypothesis-varied PSF shape, noise, epoch + count, stamp size, and seed), then wrap each through + ``make_ngmix_observation`` so each ``gal_obs.psf`` is a real, freshly built + PSF observation carrying no gmix. + """ + rng = np.random.RandomState(obs_seed) + gals, psfs, _, weights, flags, jacobs = make_data( + rng=np.random.RandomState(data_seed), + shear=(0.0, 0.0), + noise=noise, + n_epochs=n_epochs, + img_size=img_size, + psf_shear=psf_shear, + ) + gal_obs_list = ObsList() + for n_e in range(n_epochs): + gal_obs_list.append( + make_ngmix_observation( + gals[n_e], weights[n_e], flags[n_e], psfs[n_e], jacobs[n_e], + rng, + ) + ) + return gal_obs_list + + +# Physically sensible strategies: |g| well below 1 (Moffat shear must stay +# realisable), positive noise, >=1 epoch, odd stamp large enough to host the +# 0.55" FWHM PSF, and bounded RNG seeds. +psf_g_component = st.floats( + min_value=-0.3, max_value=0.3, allow_nan=False, allow_infinity=False +) +noise_strategy = st.floats( + min_value=1e-6, max_value=1e-3, allow_nan=False, allow_infinity=False +) +n_epochs_strategy = st.integers(min_value=1, max_value=3) +img_size_strategy = st.sampled_from([41, 51, 65]) +seed_strategy = st.integers(min_value=0, max_value=2**31 - 1) + + +@given( + g1=psf_g_component, + g2=psf_g_component, + noise=noise_strategy, + n_epochs=n_epochs_strategy, + img_size=img_size_strategy, + data_seed=seed_strategy, + obs_seed=seed_strategy, +) +@settings(deadline=None, max_examples=25) +def test_average_original_psf_leaves_gal_obs_psf_pristine_property( + g1, g2, noise, n_epochs, img_size, data_seed, obs_seed +): + """For arbitrary inputs, the original-PSF pre-fit leaves gal_obs.psf pristine. + + The no-op invariant that guarantees bit-identical galaxy/shear results: + after ``average_original_psf``, every epoch's ``gal_obs.psf`` is unchanged + in every channel metacal reads — + + * ``has_gmix()`` is still False (no fitted gmix leaked in), and no fit + ``result`` leaked into ``.meta``; + * the PSF ``image``, ``weight``, and ``jacobian`` are bit-for-bit what they + were before the call. + + Each assertion is load-bearing: were the pre-fit to fit ``gal_obs.psf`` + itself instead of a copy, ``PSFRunner.go`` would set ``.gmix`` / + ``.meta['result']`` (first two assertions catch that), and centroid/weight + handling inside the fit could perturb the arrays (the array/jacobian + snapshots catch that). The fit still *runs* on every epoch — this pins that + it runs harmlessly on a copy, not that it is skipped. + """ + psf_shear = (g1, g2) + rng = np.random.RandomState(obs_seed) + prior = get_prior(PIXEL_SCALE, rng) + gal_obs_list = _build_gal_obs_list( + psf_shear, noise, n_epochs, img_size, data_seed, obs_seed + ) + + # Pre-state: a freshly built PSF observation carries no gmix or result, and + # we snapshot every array channel metacal will consume. + assert not any(obs.psf.has_gmix() for obs in gal_obs_list) + pre = [ + { + "image": obs.psf.image.copy(), + "weight": obs.psf.weight.copy(), + "jacobian": _jacobian_snapshot(obs.psf.jacobian), + "meta": copy.deepcopy(dict(obs.psf.meta)), + } + for obs in gal_obs_list + ] + + runner, psf_runner = make_runners(prior, 1.0, rng) + psf_orig_res = average_original_psf(gal_obs_list, psf_runner) + + # The pre-fit actually did work (so the no-op is non-vacuous): at least one + # epoch's PSF was fit and averaged in. + assert psf_orig_res["n_epoch"] >= 1 + + for obs, snapshot in zip(gal_obs_list, pre): + psf = obs.psf + # No fitted gmix leaked into the observation metacal consumes. + assert not psf.has_gmix(), ( + "original-PSF pre-fit leaked a gmix into gal_obs.psf" + ) + # No fit result leaked into its metadata. + assert "result" not in psf.meta, ( + "original-PSF pre-fit leaked a fit result into gal_obs.psf.meta" + ) + assert dict(psf.meta) == snapshot["meta"], ( + "original-PSF pre-fit mutated gal_obs.psf.meta" + ) + # The image, weight, and jacobian metacal reads are bit-for-bit intact. + npt.assert_array_equal( + psf.image, snapshot["image"], + err_msg="original-PSF pre-fit mutated gal_obs.psf.image", + ) + npt.assert_array_equal( + psf.weight, snapshot["weight"], + err_msg="original-PSF pre-fit mutated gal_obs.psf.weight", + ) + assert _jacobian_snapshot(psf.jacobian) == snapshot["jacobian"], ( + "original-PSF pre-fit mutated gal_obs.psf.jacobian" + ) diff --git a/tests/module/test_psf_physics_properties.py b/tests/module/test_psf_physics_properties.py new file mode 100644 index 00000000..6b67f164 --- /dev/null +++ b/tests/module/test_psf_physics_properties.py @@ -0,0 +1,116 @@ +"""PROPERTY-BASED TESTS FOR NGMIX METACAL PSF PHYSICS. + +Hypothesis tests over *random elliptical PSF stamps*. The metacal +reconvolution kernel is round and dilated by construction, so for ANY input +PSF ellipticity / size it must come back rounder and larger than the original +image-PSF fit: + + T_reconv >= T_orig and |g_reconv| <= |g_orig|. + +This is a genuine metacal physics invariant (shapepipe#749) and, because the +two PSF families (``reconv`` -> ``average_multiepoch_psf``, ``orig`` -> +``average_original_psf``) share key names, it also catches any orig/reconv +routing transposition in :func:`do_ngmix_metacal` — a swap that is otherwise +silent. The single-example sibling lives in +``test_ngmix.test_reconv_psf_is_rounder_and_larger_than_orig_on_elliptical_psf``; +this widens it to a swept family of PSFs. +""" + +import numpy as np +from hypothesis import given, settings +from hypothesis import strategies as st + + +def _do_ngmix_metacal_on_psf(psf_shear, psf_fwhm=0.55, img_size=51, seed=7, + n_epochs=2): + """Run do_ngmix_metacal on a simulated stamp with a given PSF shape. + + Mirrors ``test_ngmix._do_ngmix_metacal_on_psf`` (the harness this file is + asked to reuse), exposing ``psf_fwhm`` so the PSF *size* can be swept too. + The galaxy stamp is drawn from a fixed seed (123); only the metacal RNG + varies with ``seed``. Returns ``(resdict, reconv, orig)`` — the + :class:`MetacalResult` NamedTuple, still unpacking positionally. + """ + from shapepipe.modules.ngmix_package.ngmix import ( + Postage_stamp, + do_ngmix_metacal, + get_prior, + ) + from shapepipe.testing.simulate import make_data + + rng = np.random.RandomState(seed) + prior = get_prior(0.1857, rng) + gals, psfs, _, weights, flags, jacobs = make_data( + rng=np.random.RandomState(123), + shear=(0.0, 0.0), + noise=1e-5, + n_epochs=n_epochs, + img_size=img_size, + psf_fwhm=psf_fwhm, + psf_shear=psf_shear, + ) + stamp = Postage_stamp(bkg_sub=False, megacam_flip=False) + stamp.gals, stamp.psfs, stamp.weights, stamp.flags, stamp.jacobs = ( + gals, + psfs, + weights, + flags, + jacobs, + ) + return do_ngmix_metacal(stamp, prior, 1.0, rng) + + +# Physically-sensible PSF strategies: ellipticity well inside |g| < 1 (real +# PSFs are mildly elliptical; a magnitude up to ~0.18 keeps the Moffat shear +# valid and the fit well-behaved), and a Moffat FWHM in arcsec spanning the +# realistic ground-based range. NB: the original-PSF fit uses the default galaxy +# prior (psf_fit_prior="galaxy"), which shrinks the *recovered* |g_orig| to +# O(1e-4) even for input |g|~0.1 (the shapepipe#749 prior-domination) -- so the +# dilation (T) inequality, not the rounding (|g|) one, is the primary guard. +_g_complex = st.complex_numbers( + min_magnitude=0.02, max_magnitude=0.18, allow_nan=False, allow_infinity=False +) +_psf_fwhm = st.floats(min_value=0.5, max_value=0.7, allow_nan=False, + allow_infinity=False) + + +@settings(deadline=None, max_examples=25) +@given(g=_g_complex, psf_fwhm=_psf_fwhm) +def test_reconv_psf_rounder_and_larger_than_orig_over_random_psfs(g, psf_fwhm): + """For ANY elliptical PSF, the reconv kernel is rounder and larger. + + Sweeps the input PSF ellipticity ``(g1, g2)`` (magnitude 0.02-0.18, any + orientation) and the Moffat FWHM, runs the full :func:`do_ngmix_metacal`, + and asserts the metacal reconvolution kernel is round + dilated relative to + the independently-fit original image PSF: + + ``T_reconv >= T_orig`` and ``|g_reconv| <= |g_orig|``. + + The ``T`` inequality is the robust transposition catcher: the kernel is + dilated, so ``T_reconv`` exceeds ``T_orig`` by a clear margin (~0.01-0.03) + and a strict ``>`` is asserted below -- a transposition makes ``T_reconv`` + the smaller value and fails it hard. The ``|g|`` inequality corroborates but + with a thin margin: under the default galaxy prior the recovered ``|g_orig|`` + is only O(1e-4) (shapepipe#749 prior-domination), so it is a secondary check. + """ + _, reconv, orig = _do_ngmix_metacal_on_psf( + (g.real, g.imag), psf_fwhm=psf_fwhm + ) + + g_reconv = np.hypot(*reconv["g_psf"]) + g_orig = np.hypot(*orig["g_psf"]) + + # Round: the reconvolution kernel carries less ellipticity than the + # original PSF it was built to dominate (tiny tolerance for fit noise). + assert g_reconv <= g_orig + 1e-6, ( + f"reconv |g|={g_reconv:.5f} not <= orig |g|={g_orig:.5f} " + f"(input PSF g=({g.real:.3f},{g.imag:.3f}), fwhm={psf_fwhm:.3f})" + ) + + # Larger: the reconvolution kernel is strictly dilated relative to the + # original PSF (observed dT ~0.01-0.03, far above fit noise); a transposition + # makes T_reconv the smaller value and fails this hard. + assert reconv["T_psf"] > orig["T_psf"], ( + f"reconv T={reconv['T_psf']:.5f} not dilated above orig T={orig['T_psf']:.5f} " + f"(input PSF g=({g.real:.3f},{g.imag:.3f}), fwhm={psf_fwhm:.3f})" + ) From dfac16211119488031ec871dec82f8dfdc6ebdb4 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Sat, 20 Jun 2026 01:21:16 +0200 Subject: [PATCH 09/33] deps: bump cs_util 0.2 -> 0.2.1 so cs_util.size is in the image MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pyproject pins cs_util>=0.1.9 and the lock already resolved it from PyPI, but to 0.2 — the last release *before* the size module. So the dev image installs cs_util, yet `import cs_util.size` raises ModuleNotFoundError: every path that would use the size-conversion helpers is broken in-container. cs_util 0.2.1 (the current PyPI release, "first release with cs_util.size") ships cs_util/size.py with the Gaussian size web (T_to_sigma / sigma_to_fwhm / sigma_to_r50 / ...). `uv lock --upgrade-package cs-util` bumps only that pin; its dependency set is unchanged, so no new heavy deps land. After the CI image rebuild (`uv sync --frozen` in the Dockerfile) `import cs_util.size` resolves. Co-Authored-By: Claude Opus 4.8 --- uv.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/uv.lock b/uv.lock index 7360bb6c..b483439a 100644 --- a/uv.lock +++ b/uv.lock @@ -654,7 +654,7 @@ wheels = [ [[package]] name = "cs-util" -version = "0.2" +version = "0.2.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "astropy", marker = "sys_platform == 'linux'" }, @@ -670,9 +670,9 @@ dependencies = [ { name = "swig", marker = "sys_platform == 'linux'" }, { name = "vos", marker = "sys_platform == 'linux'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5a/1d/068d7522652fed361d80502c958f37080a9f0cfee5ad201b9dea0197b562/cs_util-0.2.tar.gz", hash = "sha256:4cbe01556fb47e832b5d517af812a6cf4a94856ad134be8c21e74874b4d56de6", size = 24932, upload-time = "2026-04-07T07:59:21.174Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/08/d759e6af88c8a53b9c1c863a3bc3b9b5b26cf434f7232875a4bb75bec514/cs_util-0.2.1.tar.gz", hash = "sha256:5dd9be50249c412743ed116c0cb3364d5bcf1ad907be692dc52db0059a00412d", size = 26713, upload-time = "2026-06-10T23:58:23.017Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5f/63/9dcd114438db8b7be74dee98dc3841e43335b1c0cc6aba64ec4bd1de3c01/cs_util-0.2-py3-none-any.whl", hash = "sha256:e329ea5f2a4f3ea9b8f0649f5c9321bc18ddf4ad76bffc8152fb22e3f4168ef4", size = 29723, upload-time = "2026-04-07T07:59:19.764Z" }, + { url = "https://files.pythonhosted.org/packages/9a/2f/c8d89e8013567c3cf5535b3b941b81033e3c4a81101f3953d611af817dca/cs_util-0.2.1-py3-none-any.whl", hash = "sha256:577abc9ba04da8c78bdba9f6d235deecac99c266a0937e2c44ea1df5b50224de", size = 32209, upload-time = "2026-06-10T23:58:21.766Z" }, ] [[package]] From 413c5ddb89067ae0cec5b4cb77fde4243d1fad1a Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Sat, 20 Jun 2026 01:21:26 +0200 Subject: [PATCH 10/33] refactor: route size conversions through cs_util.size The Gaussian size conversions are now sourced from cs_util.size, the single source of truth shared with sp_validation, instead of being re-derived locally: - ngmix.py get_noise window: np.sqrt(guess[4]/2) -> cs_size.T_to_sigma (guess[4] is the ngmix area parameter T = 2 sigma^2; bit-exact). - make_cat.py PSF_FWHM: galaxy.sigma_to_fwhm -> cs_size.sigma_to_fwhm (bare conversion, no pixel_scale). - utilities/galaxy.sigma_to_fwhm becomes a thin wrapper that keeps the pixel_scale rescaling and ShapePipe input validation but delegates the bare 2*sqrt(2 ln 2) factor to cs_util.size; this preserves its second caller (spread_model.py, which uses pixel_scale) and the test_utilities contract. Matches the old hard-coded 2.35482004503 constant to <1e-11 relative. Co-Authored-By: Claude Opus 4.8 --- src/shapepipe/modules/make_cat_package/make_cat.py | 4 ++-- src/shapepipe/modules/ngmix_package/ngmix.py | 3 ++- src/shapepipe/utilities/galaxy.py | 10 +++++++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/shapepipe/modules/make_cat_package/make_cat.py b/src/shapepipe/modules/make_cat_package/make_cat.py index 0122f9e4..ca78dc75 100644 --- a/src/shapepipe/modules/make_cat_package/make_cat.py +++ b/src/shapepipe/modules/make_cat_package/make_cat.py @@ -13,10 +13,10 @@ from astropy import coordinates as coords from astropy import units as u from astropy.wcs import WCS +from cs_util import size as cs_size from sqlitedict import SqliteDict from shapepipe.pipeline import file_io -from shapepipe.utilities import galaxy def get_output_name(output_dir, file_number_string): @@ -725,7 +725,7 @@ def _save_psf_data(self, galaxy_psf_path): ) self._add2dict(f"PSF_ELL_{epoch + 1}", e_psf, idx) - psf_fwhm = galaxy.sigma_to_fwhm( + psf_fwhm = cs_size.sigma_to_fwhm( gpc_data["SHAPES"]["SIGMA_PSF_HSM"] ) self._add2dict(f"PSF_FWHM_{epoch + 1}", psf_fwhm, idx) diff --git a/src/shapepipe/modules/ngmix_package/ngmix.py b/src/shapepipe/modules/ngmix_package/ngmix.py index da21af66..56b974d5 100644 --- a/src/shapepipe/modules/ngmix_package/ngmix.py +++ b/src/shapepipe/modules/ngmix_package/ngmix.py @@ -14,6 +14,7 @@ import galsim import numpy as np from astropy.io import fits +from cs_util import size as cs_size from modopt.math.stats import sigma_mad from ngmix.observation import Observation, ObsList from sqlitedict import SqliteDict @@ -970,7 +971,7 @@ def get_noise(gal, weight, guess, pixel_scale, thresh=1.2): sig_tmp = sigma_mad(gal[m_weight]) - gauss_win = galsim.Gaussian(sigma=np.sqrt(guess[4] / 2), flux=guess[5]) + gauss_win = galsim.Gaussian(sigma=cs_size.T_to_sigma(guess[4]), flux=guess[5]) gauss_win = gauss_win.shear(g1=guess[2], g2=guess[3]) gauss_win = gauss_win.drawImage( nx=img_shape[0], ny=img_shape[1], scale=pixel_scale diff --git a/src/shapepipe/utilities/galaxy.py b/src/shapepipe/utilities/galaxy.py index 40948a8f..b593df45 100644 --- a/src/shapepipe/utilities/galaxy.py +++ b/src/shapepipe/utilities/galaxy.py @@ -7,6 +7,7 @@ """ import numpy as np +from cs_util import size as cs_size def sigma_to_fwhm(sigma, pixel_scale=1.0): @@ -15,6 +16,11 @@ def sigma_to_fwhm(sigma, pixel_scale=1.0): Transform standard deviation of a 1D Gaussian, sigma, to FWHM (Full Width Half Maximum). + Thin wrapper over :func:`cs_util.size.sigma_to_fwhm`, the single + source of truth for the bare ``FWHM = 2 sqrt(2 ln 2) sigma`` + conversion; this layer adds the optional ``pixel_scale`` rescaling + and ShapePipe's input validation. + Parameters ---------- sigma : numpy.ndarray @@ -89,6 +95,4 @@ def sigma_to_fwhm(sigma, pixel_scale=1.0): f"Invalid pixel scale {pixel_scale}, needs to be greater than 0.0." ) - cst = 2.35482004503 - - return sigma * cst * pixel_scale + return cs_size.sigma_to_fwhm(sigma) * pixel_scale From e2f37be9334cd819c86ae122951e108bca3c26fd Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Sat, 20 Jun 2026 02:15:14 +0200 Subject: [PATCH 11/33] test: make_data gains psf_shear for an elliptical PSF The PSF property tests (reconv rounder/larger than orig, pristine prefit) need a non-round PSF to exercise. Add an optional psf_shear=(0,0) param to the make_data test helper that shears the Moffat PSF; the default is a no-op, so every existing caller is unchanged. Test-helper only, no pipeline behaviour change. Co-Authored-By: Claude Opus 4.8 --- src/shapepipe/testing/simulate.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/shapepipe/testing/simulate.py b/src/shapepipe/testing/simulate.py index 81b9b6ab..8ab5a06f 100644 --- a/src/shapepipe/testing/simulate.py +++ b/src/shapepipe/testing/simulate.py @@ -21,6 +21,7 @@ def make_data( psf_fwhm=0.55, pixel_scale=0.1857, img_size=201, + psf_shear=(0.0, 0.0), ): """Simulate an exponential galaxy with Moffat PSF. @@ -48,6 +49,12 @@ def make_data( Pixel scale in arcsec/pixel. Default 0.1857. img_size : int, optional Stamp size in pixels (square). Default 201. + psf_shear : tuple of float, optional + Ellipticity (g1, g2) applied to the Moffat PSF. Default (0, 0), a + round PSF. A non-round PSF is what the PSF-leakage / additive-bias + guardrail needs: the PSF carries a shape that an unbiased deconvolution + must remove from a round galaxy. The same sheared PSF is used both to + convolve the object and as the PSF model stamp. Returns ------- @@ -71,7 +78,9 @@ def make_data( if not share_shift: dy, dx = rng.uniform(low=-scale / 2, high=scale / 2, size=2) - psf = galsim.Moffat(beta=2.5, fwhm=psf_fwhm) + psf = galsim.Moffat(beta=2.5, fwhm=psf_fwhm).shear( + g1=psf_shear[0], g2=psf_shear[1] + ) obj = galsim.Convolve( psf, galsim.Exponential(half_light_radius=gal_hlr, flux=gal_flux).shear( From 31d2080676863745be0cf2fc2f1c90dfbd1c402f Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Sat, 20 Jun 2026 16:27:28 +0200 Subject: [PATCH 12/33] review: address PR #761 code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - do_ngmix_metacal: seed the original-PSF pre-fit from a snapshot of the metacal RNG so it no longer advances the rng boot.go consumes — galaxy/shear results are now bit-identical to the pre-PR base (only the *_PSF_ORIG columns are added). Conservative: galaxy science unchanged vs base. - remove the redundant galaxy.sigma_to_fwhm wrapper; inline cs_size.sigma_to_fwhm(sigma) * pixel_scale at call sites (spread_model); cs_util unchanged. - drop leftover psf_fit_prior knob references (docstrings/comments/tests). - canonicalize the ESTIMATOR_COMPONENT_OBJECT grammar spec string across all sites. - trim over-explained copy-PSF / leakage / weighting prose to one canonical spot; document that bit-identity rests on BOTH pristine gal_obs.psf AND the snapshot RNG. - add tests: PSF columns survive a failed-fit NaN-fill; all-epochs-failed raises; grammar token check runs over both shipped param files. Fix a stale cross-ref. Co-Authored-By: Claude Opus 4.8 --- .../modules/make_cat_package/make_cat.py | 37 +++--- src/shapepipe/modules/ngmix_package/ngmix.py | 66 +++++----- .../spread_model_package/spread_model.py | 4 +- src/shapepipe/utilities/galaxy.py | 91 ------------- tests/module/test_make_cat.py | 9 +- tests/module/test_ngmix.py | 10 +- tests/module/test_psf_averaging_properties.py | 26 ++++ tests/module/test_psf_grammar_properties.py | 44 ++++--- tests/module/test_psf_noop_property.py | 19 +-- tests/module/test_psf_physics_properties.py | 14 +- tests/module/test_utilities.py | 123 ------------------ 11 files changed, 137 insertions(+), 306 deletions(-) diff --git a/src/shapepipe/modules/make_cat_package/make_cat.py b/src/shapepipe/modules/make_cat_package/make_cat.py index ca78dc75..d9aaa5f2 100644 --- a/src/shapepipe/modules/make_cat_package/make_cat.py +++ b/src/shapepipe/modules/make_cat_package/make_cat.py @@ -329,18 +329,16 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): Save the NGMIX catalogue into the final one. - Column grammar: ``NGMIX[m]_[_ERR][_][_]``. - ``OBJECT`` and ``SHEAR`` are both optional: the per-object metadata - columns (``NGMIX_MCAL_FLAGS``, ``NGMIX_N_EPOCH``, - ``NGMIX_MCAL_TYPES_FAIL``) carry neither. When present, ``OBJECT`` is - one of ``GAL`` (galaxy), ``PSF_ORIG`` (the original psfex/mccd image - PSF, fit by ``ngmix.average_original_psf`` — its true ellipticity and - size, feeding object-wise PSF leakage), or ``PSF_RECONV`` (the metacal - reconvolution kernel, round and enlarged by construction, fit by - ``ngmix.average_multiepoch_psf`` — the Tgal / Tpsf size cut and a g~0 - sanity check). ``PSF_ORIG`` and ``PSF_RECONV`` are independent fits of - different PSFs, no longer the single aliased value of the pre-fix code - (shapepipe#749). + Column grammar: ``NGMIX[m]_[_ERR]__``, plus + three OBJECT/SHEAR-less per-object metadata columns + (``NGMIX[m]_MCAL_FLAGS``, ``NGMIX_N_EPOCH``, + ``NGMIX_MCAL_TYPES_FAIL``). ``OBJECT`` is one of ``GAL`` (galaxy), + ``PSF_ORIG`` (the original image PSF, fit by + :func:`ngmix.average_original_psf`), or ``PSF_RECONV`` (the metacal + reconvolution kernel, fit by :func:`ngmix.average_multiepoch_psf`); + see those functions for what each PSF family IS. ``PSF_ORIG`` and + ``PSF_RECONV`` are independent fits of different PSFs, no longer the + single aliased value of the pre-fix code (shapepipe#749). Parameters ---------- @@ -390,9 +388,11 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): prefix = f"NGMIX{m}" # Galaxy (GAL), original image PSF (PSF_ORIG) and metacal - # reconvolution kernel (PSF_RECONV). G1/G2 are scalar reduced-shear - # components, not a 2-vector. Sentinels: sizes/fluxes/mags/flags 0, - # *_ERR fluxes/mags -1, ellipticities -10, *_ERR sizes 1e30. + # reconvolution kernel (PSF_RECONV); see ngmix.average_original_psf / + # average_multiepoch_psf for what each PSF family is. G1/G2 are scalar + # reduced-shear components, not a 2-vector. Sentinels: + # sizes/fluxes/mags/flags 0, *_ERR fluxes/mags -1, ellipticities -10, + # *_ERR sizes 1e30. for key_str in ( f"{prefix}_T_GAL_", f"{prefix}_SNR_GAL_", @@ -484,8 +484,7 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): ncf_data["flags"][ind[0]], idx ) - # Original image PSF (psfex/mccd) — its genuine, - # un-reconvolved ellipticity and size. + # Original image PSF (see ngmix.average_original_psf). self._add2dict( f"{prefix}_G1_PSF_ORIG_{key}", ncf_data["g1_psf_orig"][ind[0]], idx @@ -511,8 +510,8 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): ncf_data["T_err_psf_orig"][ind[0]], idx ) - # Metacal reconvolution kernel — round, enlarged by - # construction; the Tgal/Tpsf cut and g~0 sanity check. + # Metacal reconvolution kernel (see + # ngmix.average_multiepoch_psf). self._add2dict( f"{prefix}_G1_PSF_RECONV_{key}", ncf_data["g1_psf_reconv"][ind[0]], idx diff --git a/src/shapepipe/modules/ngmix_package/ngmix.py b/src/shapepipe/modules/ngmix_package/ngmix.py index 56b974d5..33222106 100644 --- a/src/shapepipe/modules/ngmix_package/ngmix.py +++ b/src/shapepipe/modules/ngmix_package/ngmix.py @@ -380,16 +380,13 @@ def compile_results(self, results): Compiled results ready to be written to a file. Two PSF column families — each carrying ellipticity *and* size, - for *different* PSFs (shapepipe#749): + for *different* PSFs (shapepipe#749). See the function docstrings + for what each family IS: - * ``*_psf_orig`` (``g1``/``g2`` + ``*_err``, ``T``) — the - ORIGINAL image PSF (the psfex/mccd model stamp), fit before - metacal reconvolution by :func:`average_original_psf`. This is - the PSF whose true ellipticity and size enter object-wise - PSF-leakage diagnostics. - * ``*_psf_reconv`` — the metacal RECONVOLUTION kernel (round and - enlarged by construction, used for the Tgal/Tpsf size cut and a - g~0 sanity check), fit by :func:`average_multiepoch_psf`. + * ``*_psf_orig`` (``g1``/``g2`` + ``*_err``, ``T``) — the original + image PSF, fit by :func:`average_original_psf`. + * ``*_psf_reconv`` — the metacal reconvolution kernel, fit by + :func:`average_multiepoch_psf`. Raises ------ @@ -1232,23 +1229,18 @@ def average_original_psf(gal_obs_list, psf_runner): """Fit and average the *original* image PSF over epochs. The original PSF is the psfex/mccd model stamp handed to ngmix - (``gal_obs.psf``), fit here with the same ``psf_runner`` machinery — and - so the same ``psf_fit_prior`` — used inside metacal, but on the PSF - *before* metacal's reconvolution. Exported to the original-PSF columns + (``gal_obs.psf``), fit here with the same ``psf_runner`` (hence the same + fit prior and guesser) used inside metacal, but on the PSF *before* + metacal's reconvolution. Exported to the original-PSF columns (``NGMIX_G1/G2_PSF_ORIG``, ``NGMIX_T_PSF_ORIG``). Distinct from the reconvolution-kernel fit (:func:`average_multiepoch_psf`): the original PSF retains its true ellipticity and size, whereas the reconvolution kernel is round and enlarged by construction. This is the PSF whose true shape and size enter object-wise PSF-leakage diagnostics. - Epochs are weighted by the *galaxy* inverse-variance weight - (``gal_obs.weight.sum()``). This is the same *form* as - :func:`average_multiepoch_psf` (``weight.sum()`` per epoch, skipping - ``flags != 0`` fits) but not the identical weight: the reconvolution path - weights by the fixnoise-combined inverse variance of the noshear metacal - image, whereas this path uses the raw galaxy inverse variance. So the two - PSF families share an averaging *scheme* and differ in which PSF is fit - and in the precise per-epoch weighting factor. + Weighted by the raw galaxy inverse variance (``gal_obs.weight.sum()`` per + epoch); :func:`average_multiepoch_psf` uses the same scheme but the + fixnoise-combined metacal-image weight instead. The fit runs on a *copy* of each PSF observation so ``gal_obs.psf`` — the object metacal later deep-copies and consumes via @@ -1258,8 +1250,11 @@ def average_original_psf(gal_obs_list, psf_runner): would survive metacal's deep copy and be reused as the ``MetacalFitGaussPSF`` fallback when its own admom+ML PSF fits both fail, silently rescuing objects the base branch dropped (``BootPSFFailure``) and - changing the galaxy/shear result set. Fitting a copy keeps this add-column - refactor bit-identical on the galaxy results. + changing the galaxy/shear result set. Fitting a copy closes this + PSF-aliasing channel; combined with :func:`do_ngmix_metacal` seeding this + pre-fit from a *snapshot* of the metacal RNG (so the pre-fit does not + advance it), the add-column refactor stays bit-identical on the galaxy + results. Parameters ---------- @@ -1268,7 +1263,8 @@ def average_original_psf(gal_obs_list, psf_runner): (pre-metacal) PSF observation to fit, with no further ``.psf`` of its own so the runner fits the stamp itself. Left pristine. psf_runner : ngmix.runners.PSFRunner - The module's PSF runner, carrying the resolved ``psf_fit_prior``. + The module's PSF runner (built by :func:`make_runners` from the + shared ``prior``). Returns ------- @@ -1276,11 +1272,8 @@ def average_original_psf(gal_obs_list, psf_runner): Same keys as :func:`average_multiepoch_psf`. """ def fit(gal_obs): - # Fit a COPY of the PSF observation: PSFRunner.go fits the PSF stamp - # and sets its .meta['result'] (and .gmix on success). Reading from - # the copy leaves gal_obs.psf pristine — no gmix to leak through - # metacal's deep copy into the MetacalFitGaussPSF fallback. Failed - # fits keep flags != 0 and are dropped by _average_psf_fits. + # Fit a COPY so gal_obs.psf stays pristine for metacal — see docstring. + # Failed fits keep flags != 0 and are dropped by _average_psf_fits. psf_obs = gal_obs.psf.copy() psf_runner.go(psf_obs) return psf_obs.meta['result'], gal_obs.weight.sum() @@ -1379,14 +1372,15 @@ def do_ngmix_metacal(stamp, prior, flux_guess, rng, centroid_source="hsm"): runner, psf_runner = make_runners(prior, flux_guess, rng) - # Fit the ORIGINAL (psfex/mccd) PSF before metacal reconvolves it, using - # the same psf_runner (so the same psf_fit_prior and centroid). This is - # the PSF_ORIG family; metacal below produces the PSF_RECONV family. Run - # first so the original PSF is fit on its own stamp, distinct from the - # round, enlarged kernel metacal convolves back in. average_original_psf - # fits a COPY of each gal_obs.psf, so gal_obs_list reaches boot.go below - # pristine — no stray gmix to leak into MetacalFitGaussPSF's fallback. - psf_orig_res = average_original_psf(gal_obs_list, psf_runner) + # Fit the ORIGINAL (psfex/mccd) PSF before metacal reconvolves it. Use a + # psf_runner seeded from a snapshot of rng's state so this prefit does NOT + # advance the rng that boot.go consumes below — keeping the galaxy/shear + # results bit-identical to the no-prefit branch. average_original_psf fits a + # COPY of each gal_obs.psf, so gal_obs_list also reaches boot.go pristine. + prefit_rng = np.random.RandomState() + prefit_rng.set_state(rng.get_state()) + _, prefit_psf_runner = make_runners(prior, flux_guess, prefit_rng) + psf_orig_res = average_original_psf(gal_obs_list, prefit_psf_runner) metacal_pars = { 'types': ['noshear', '1p', '1m', '2p', '2m'], diff --git a/src/shapepipe/modules/spread_model_package/spread_model.py b/src/shapepipe/modules/spread_model_package/spread_model.py index 43140598..1461ecce 100644 --- a/src/shapepipe/modules/spread_model_package/spread_model.py +++ b/src/shapepipe/modules/spread_model_package/spread_model.py @@ -8,10 +8,10 @@ import galsim import numpy as np +from cs_util import size as cs_size from sqlitedict import SqliteDict from shapepipe.pipeline import file_io -from shapepipe.utilities import galaxy def get_sm(obj_vign, psf_vign, model_vign, weight_vign): @@ -102,7 +102,7 @@ def get_model(sigma, flux, img_shape, pixel_scale=0.186): """ # Get scale radius - scale_radius = 1 / 16 * galaxy.sigma_to_fwhm(sigma, pixel_scale=pixel_scale) + scale_radius = 1 / 16 * cs_size.sigma_to_fwhm(sigma) * pixel_scale # Get galaxy model gal_obj = galsim.Exponential(scale_radius=scale_radius, flux=flux) diff --git a/src/shapepipe/utilities/galaxy.py b/src/shapepipe/utilities/galaxy.py index b593df45..c533eac5 100644 --- a/src/shapepipe/utilities/galaxy.py +++ b/src/shapepipe/utilities/galaxy.py @@ -5,94 +5,3 @@ :Author: Martin Kilbinger """ - -import numpy as np -from cs_util import size as cs_size - - -def sigma_to_fwhm(sigma, pixel_scale=1.0): - r"""Convert Sigma to FWHM. - - Transform standard deviation of a 1D Gaussian, sigma, to FWHM - (Full Width Half Maximum). - - Thin wrapper over :func:`cs_util.size.sigma_to_fwhm`, the single - source of truth for the bare ``FWHM = 2 sqrt(2 ln 2) sigma`` - conversion; this layer adds the optional ``pixel_scale`` rescaling - and ShapePipe's input validation. - - Parameters - ---------- - sigma : numpy.ndarray - input standard deviation(s) - pixel_scale : float, optional, default=1 - pixel size in arcsec, set to 1 if no scaling - required - - Returns - ------- - fwhm : (array of) float - output fwhm(s) - - Raises - ------ - TypeError - If ``sigma`` is not of type numpy array or float - TypeError - If ``sigma`` array values are not of type float - TypeError - If ``pixel_scale`` is not of type float - ValueError - If ``sigma`` array values are not greater than 0.0 - ValueError - If ``sigma`` is not greater than 0.0 - ValueError - If ``pixel_scale`` is not greater than 0.0 - - Notes - ----- - To compute the FWHMh for a 1D Gaussian N(x), solve the equation - - .. math:: - - N(x) = (\sigma \sqrt{2\pi})^{-1} \exp[x^2/2\sigma^2] = \frac 1 2 N(x) - - - for :math:`x`. The FWHM is :math:`x + (-x) = 2x`. The solution is - - .. math:: - - \textrm{FWHM} = 2 \sqrt(2 \ln 2) \sigma \approx 2.355 \sigma - - """ - if not isinstance(sigma, (np.ndarray, float)): - raise TypeError( - f"Sigma must be of type numpy array or float, not {type(sigma)}." - ) - elif isinstance(sigma, np.ndarray) and sigma.dtype != np.float64: - raise TypeError( - f"Sigma array values must be of type float, not {sigma.dtype}." - ) - - if not isinstance(pixel_scale, float): - raise TypeError( - f"The pixel scale must of type float, not {type(pixel_scale)}." - ) - - if isinstance(sigma, np.ndarray) and np.any(sigma <= 0.0): - raise ValueError( - f"Found {sigma[sigma <=0].size} invalid standard deviation array " - + "values, all elements must to be greater than 0.0." - ) - elif isinstance(sigma, float) and sigma <= 0.0: - raise ValueError( - f"Invalid standard deviation {sigma}, needs to be greater than " - + "0.0." - ) - - if pixel_scale <= 0.0: - raise ValueError( - f"Invalid pixel scale {pixel_scale}, needs to be greater than 0.0." - ) - - return cs_size.sigma_to_fwhm(sigma) * pixel_scale diff --git a/tests/module/test_make_cat.py b/tests/module/test_make_cat.py index b5077d3a..e2960701 100644 --- a/tests/module/test_make_cat.py +++ b/tests/module/test_make_cat.py @@ -2,10 +2,11 @@ Drives ``SaveCatalogue._save_ngmix_data`` against a synthetic ngmix catalogue to lock in the renamed PSF-column grammar -``NGMIX_[_ERR]_[_]`` (shapepipe#749): the original -image PSF (``PSF_ORIG``) and the metacal reconvolution kernel (``PSF_RECONV``) -are independent fits of *different* PSFs, no longer the single aliased value of -the pre-fix code. +``NGMIX[m]_[_ERR]__`` (shapepipe#749), plus the three +OBJECT/SHEAR-less metadata columns (``NGMIX[m]_MCAL_FLAGS``, ``NGMIX_N_EPOCH``, +``NGMIX_MCAL_TYPES_FAIL``): the original image PSF (``PSF_ORIG``) and the metacal +reconvolution kernel (``PSF_RECONV``) are independent fits of *different* PSFs, +no longer the single aliased value of the pre-fix code. """ import numpy as np diff --git a/tests/module/test_ngmix.py b/tests/module/test_ngmix.py index 9d0e640f..37db4e3f 100644 --- a/tests/module/test_ngmix.py +++ b/tests/module/test_ngmix.py @@ -238,7 +238,7 @@ def test_compile_results_psf_families_are_unaliased(): came from one ``average_multiepoch_psf`` result and so were byte- identical; here they must differ, each tracing its own source. The companion size un-aliasing is checked in - ``test_compile_results_size_columns_are_half_light_radii``. + ``test_compile_results_size_columns_are_unaliased``. """ from shapepipe.modules.ngmix_package.ngmix import Ngmix @@ -296,6 +296,14 @@ def test_compile_results_nan_fills_failed_fit_types(): assert failed["flags"] == [0x8] assert failed["mcal_flags"] == [0x8] and failed["mcal_flags"][0] != 0 + # The object-level PSF columns survive on a failed fit type: they are copied + # outside the flags==0 guard, so the failed type keeps a full-length PSF row + # (gating that copy on the galaxy flags would shorten it -> save crash). + npt.assert_allclose(failed["T_psf_orig"], [ORIG_PSF_T]) + npt.assert_allclose(failed["T_psf_reconv"], [0.09]) + npt.assert_allclose(failed["g1_psf_orig"], [ORIG_PSF_G[0]]) + npt.assert_allclose(failed["g1_psf_reconv"], [RECONV_PSF_G[0]]) + # successful types are untouched, but share the object's mcal_flags npt.assert_allclose(out["noshear"]["flux"], [100.0]) assert out["noshear"]["flags"] == [0] diff --git a/tests/module/test_psf_averaging_properties.py b/tests/module/test_psf_averaging_properties.py index 25c0fc77..2314b6ca 100644 --- a/tests/module/test_psf_averaging_properties.py +++ b/tests/module/test_psf_averaging_properties.py @@ -20,6 +20,7 @@ import numpy as np import numpy.testing as npt +import pytest from astropy.io import fits from hypothesis import given, settings from hypothesis import strategies as st @@ -199,6 +200,31 @@ def test_single_survivor_returns_its_own_value(survivor, flagged_specs): assert out["n_epoch"] == 1 +@given( + st.lists( + st.tuples(_weight, st.integers(min_value=1, max_value=255)), + min_size=1, max_size=5, + ) +) +@settings(deadline=None, max_examples=25) +def test_all_epochs_failed_raises_zero_division(flagged_specs): + """All-epochs-failed is the contract that wsum == 0 raises, not returns 0/NaN. + + An object whose PSF fit failed on every epoch reaches both PSF-family + wrappers (reconv and orig) with zero survivors, so ``wsum`` stays 0. The + core must raise ``ZeroDivisionError`` rather than silently divide — a + regression returning NaN/0 would be caught here. Every input epoch is + flagged (``flags != 0``) with an arbitrary positive weight, so no survivor + can rescue the sum. + """ + flagged = [ + (_result(np.nan, np.nan, 1e6, np.nan, np.nan, np.nan, flags=f), w) + for w, f in flagged_specs + ] + with pytest.raises(ZeroDivisionError): + _average_psf_fits(flagged) + + # --------------------------------------------------------------------------- # # (d) make_cat: an obj_id absent from the ngmix cat keeps its sentinel fill. # --------------------------------------------------------------------------- # diff --git a/tests/module/test_psf_grammar_properties.py b/tests/module/test_psf_grammar_properties.py index 5123d7f3..c7c361dc 100644 --- a/tests/module/test_psf_grammar_properties.py +++ b/tests/module/test_psf_grammar_properties.py @@ -2,9 +2,11 @@ Drives ``SaveCatalogue._save_ngmix_data`` (the make_cat writer) under hypothesis to pin three invariants of the renamed grammar -``NGMIX[m]_[_ERR]_[_]`` (shapepipe#749), where the -ORIGINAL image PSF (``PSF_ORIG``) and the metacal RECONVOLUTION kernel -(``PSF_RECONV``) are independent fits of *different* PSFs: +``NGMIX[m]_[_ERR]__`` (shapepipe#749), plus the three +OBJECT/SHEAR-less metadata columns (``NGMIX[m]_MCAL_FLAGS``, ``NGMIX_N_EPOCH``, +``NGMIX_MCAL_TYPES_FAIL``), where the ORIGINAL image PSF (``PSF_ORIG``) and the +metacal RECONVOLUTION kernel (``PSF_RECONV``) are independent fits of +*different* PSFs: (a) un-aliasing holds for ALL inputs: for arbitrary distinct values fed as the orig vs reconv ngmix source columns, every emitted ``*_PSF_ORIG_*`` column @@ -25,6 +27,7 @@ import numpy as np import numpy.testing as npt +import pytest from astropy.io import fits from hypothesis import given, settings from hypothesis import strategies as st @@ -156,10 +159,13 @@ def _run_save_ngmix(ngmix_path, obj_ids, cat_size_target=None): rf"|^NGMIXm?_(?:MCAL_FLAGS|MCAL_TYPES_FAIL|N_EPOCH)$" ) -# example/cfis/final_cat.param lives two levels up from tests/module/. -PARAM_PATH = ( - Path(__file__).resolve().parents[2] / "example" / "cfis" / "final_cat.param" -) +# The shipped final-catalogue param files, two levels up from tests/module/. +# Both are consumer contracts updated to the new grammar, so both are checked. +_EXAMPLE = Path(__file__).resolve().parents[2] / "example" +PARAM_PATHS = [ + _EXAMPLE / "cfis" / "final_cat.param", + _EXAMPLE / "unions_800" / "cat_matched.param", +] # The one param-file NGMIX token outside the _save_ngmix_data grammar: the # moments-failure flag, set by a different code path (not the metacal writer). @@ -169,9 +175,9 @@ def _run_save_ngmix(ngmix_path, obj_ids, cat_size_target=None): NON_WRITER_PARAM_TOKENS = {"NGMIX_MOM_FAIL"} -def _param_ngmix_tokens(): - """Every distinct NGMIX_* token named in the shipped param file.""" - text = PARAM_PATH.read_text() +def _param_ngmix_tokens(param_path): + """Every distinct NGMIX_* token named in a shipped param file.""" + text = param_path.read_text() return set(re.findall(r"\bNGMIX[A-Za-z0-9_]*", text)) @@ -317,15 +323,21 @@ def test_emitted_column_names_match_grammar(obj_ids, tmp_path_factory): # (c) Param-file tokens are all producible by the writer # --------------------------------------------------------------------------- +@pytest.mark.parametrize( + "param_path", PARAM_PATHS, ids=lambda p: f"{p.parent.name}/{p.name}" +) @settings(deadline=None, max_examples=15) @given(obj_ids=obj_ids_strategy) -def test_param_file_ngmix_tokens_are_producible(obj_ids): +def test_param_file_ngmix_tokens_are_producible(param_path, obj_ids): """Every NGMIX_* token the param file names is a column the writer produces. - ``example/cfis/final_cat.param`` is the consumer contract for the final - catalogue; ``create_final_cat`` keeps only the listed columns, so a token it - names that the writer cannot emit is a silent, empty column downstream. The - one known exception — ``NGMIX_MOM_FAIL``, the moments-failure flag set by a + Each shipped final-catalogue param file (``example/cfis/final_cat.param`` + and ``example/unions_800/cat_matched.param``) is a consumer contract for the + final catalogue; ``create_final_cat`` keeps only the listed columns, so a + token it names that the writer cannot emit is a silent, empty column + downstream. Both files are checked so a future divergence in either (a + typo'd or stale NGMIX token) cannot escape the consistency check. The one + known exception — ``NGMIX_MOM_FAIL``, the moments-failure flag set by a different path — is excluded BY NAME, and we assert it is genuinely outside the producible set so the carve-out cannot hide a real grammar regression. @@ -333,7 +345,7 @@ def test_param_file_ngmix_tokens_are_producible(obj_ids): across inputs (the contract is structural, not data-dependent). """ produced = _produced_columns(obj_ids) - param_tokens = _param_ngmix_tokens() + param_tokens = _param_ngmix_tokens(param_path) # The carve-out is real: the excluded token is NOT producible. (If a future # change made the writer emit it, this fails and we drop the exclusion.) diff --git a/tests/module/test_psf_noop_property.py b/tests/module/test_psf_noop_property.py index 003efcaf..98f47b2b 100644 --- a/tests/module/test_psf_noop_property.py +++ b/tests/module/test_psf_noop_property.py @@ -6,14 +6,17 @@ each PSF observation, so the observation metacal later deep-copies and consumes (``boot.go(gal_obs_list)``) is left untouched. -This is the invariant that makes the add-column refactor bit-identical to a -no-original-fit branch on the galaxy/shear results: if the pre-fit cannot alter -``gal_obs.psf``, it cannot alter what metacal consumes, so the galaxy results -are unchanged. ``PSFRunner.go`` sets ``.gmix`` (and ``.meta['result']``) on the -observation it fits; were that ``gal_obs.psf`` itself, a stray gmix would -survive metacal's deep copy and be reused as the ``MetacalFitGaussPSF`` fallback -when admom+ML both fail, silently rescuing objects the base branch dropped -(``BootPSFFailure``). +Pristine ``gal_obs.psf`` is *one* of the two conditions for the add-column +refactor to be bit-identical to a no-original-fit branch on the galaxy/shear +results: it closes the PSF-aliasing channel, which this test pins. +``PSFRunner.go`` sets ``.gmix`` (and ``.meta['result']``) on the observation it +fits; were that ``gal_obs.psf`` itself, a stray gmix would survive metacal's +deep copy and be reused as the ``MetacalFitGaussPSF`` fallback when admom+ML +both fail, silently rescuing objects the base branch dropped +(``BootPSFFailure``). The *second* condition — the pre-fit not advancing the +RNG that metacal consumes — is ensured in ``do_ngmix_metacal`` by seeding the +pre-fit from a snapshot of the RNG state; together they leave the galaxy +results unchanged. The unit guard ``test_original_psf_prefit_leaves_gal_obs_psf_pristine`` in ``tests/module/test_ngmix.py`` pins this on one fixed input; here it is diff --git a/tests/module/test_psf_physics_properties.py b/tests/module/test_psf_physics_properties.py index 6b67f164..b0feeab5 100644 --- a/tests/module/test_psf_physics_properties.py +++ b/tests/module/test_psf_physics_properties.py @@ -63,10 +63,11 @@ def _do_ngmix_metacal_on_psf(psf_shear, psf_fwhm=0.55, img_size=51, seed=7, # Physically-sensible PSF strategies: ellipticity well inside |g| < 1 (real # PSFs are mildly elliptical; a magnitude up to ~0.18 keeps the Moffat shear # valid and the fit well-behaved), and a Moffat FWHM in arcsec spanning the -# realistic ground-based range. NB: the original-PSF fit uses the default galaxy -# prior (psf_fit_prior="galaxy"), which shrinks the *recovered* |g_orig| to -# O(1e-4) even for input |g|~0.1 (the shapepipe#749 prior-domination) -- so the -# dilation (T) inequality, not the rounding (|g|) one, is the primary guard. +# realistic ground-based range. NB: the original-PSF fit uses the same +# (galaxy-shaped) prior as the galaxy fit, which shrinks the *recovered* +# |g_orig| to O(1e-4) even for input |g|~0.1 (the shapepipe#749 +# prior-domination) -- so the dilation (T) inequality, not the rounding (|g|) +# one, is the primary guard. _g_complex = st.complex_numbers( min_magnitude=0.02, max_magnitude=0.18, allow_nan=False, allow_infinity=False ) @@ -90,8 +91,9 @@ def test_reconv_psf_rounder_and_larger_than_orig_over_random_psfs(g, psf_fwhm): dilated, so ``T_reconv`` exceeds ``T_orig`` by a clear margin (~0.01-0.03) and a strict ``>`` is asserted below -- a transposition makes ``T_reconv`` the smaller value and fails it hard. The ``|g|`` inequality corroborates but - with a thin margin: under the default galaxy prior the recovered ``|g_orig|`` - is only O(1e-4) (shapepipe#749 prior-domination), so it is a secondary check. + with a thin margin: under the same (galaxy-shaped) prior as the galaxy fit + the recovered ``|g_orig|`` is only O(1e-4) (shapepipe#749 prior-domination), + so it is a secondary check. """ _, reconv, orig = _do_ngmix_metacal_on_psf( (g.real, g.imag), psf_fwhm=psf_fwhm diff --git a/tests/module/test_utilities.py b/tests/module/test_utilities.py index da098d33..453455cb 100644 --- a/tests/module/test_utilities.py +++ b/tests/module/test_utilities.py @@ -5,126 +5,3 @@ :Author: Samuel Farrens """ - -from unittest import TestCase - -from hypothesis import given -from hypothesis import strategies as st -import numpy as np -import numpy.testing as npt - -from shapepipe.utilities import galaxy - -positive_floats = st.floats( - min_value=1.0e-100, - max_value=1.0e100, - allow_nan=False, - allow_infinity=False, -) - - -class GalaxyTestCase(TestCase): - - def setUp(self): - - self.sigma_float = 5.5 - self.sigma_array = np.arange(1, 4) * 0.1 - self.sigma_int = 1 - self.sigma_array_int = np.arange(3) - self.pixel_scale = 2.0 - self.pixel_int = 1 - self.sigma_float_exp = 12.9515102 - self.sigma_float_ps_exp = 25.9030204 - self.sigma_array_exp = np.array([0.235482, 0.47096401, 0.70644601]) - - def tearDown(self): - - self.sigma_float = None - self.sigma_array = None - self.sigma_int = None - self.sigma_array_int = None - self.pixel_scale = None - self.pixel_int = None - self.sigma_float_exp = None - self.sigma_float_ps_exp = None - self.sigma_array_exp = None - - def test_sigma_to_fwhm(self): - - npt.assert_almost_equal( - galaxy.sigma_to_fwhm(self.sigma_float), - self.sigma_float_exp, - err_msg="sigma_to_fwhm gave invalid result for float input", - ) - - npt.assert_almost_equal( - galaxy.sigma_to_fwhm(self.sigma_float, self.pixel_scale), - self.sigma_float_ps_exp, - err_msg=( - "sigma_to_fwhm gave invalid result for float input with " - + "non-default pixel scale" - ), - ) - - npt.assert_allclose( - galaxy.sigma_to_fwhm(self.sigma_array), - self.sigma_array_exp, - err_msg="sigma_to_fwhm gave invalid result for array input", - ) - - npt.assert_raises(TypeError, galaxy.sigma_to_fwhm, self.sigma_int) - - npt.assert_raises( - TypeError, - galaxy.sigma_to_fwhm, - self.sigma_array_int, - ) - - npt.assert_raises( - TypeError, - galaxy.sigma_to_fwhm, - self.sigma_float, - self.pixel_int, - ) - - npt.assert_raises(ValueError, galaxy.sigma_to_fwhm, -self.sigma_float) - - npt.assert_raises( - ValueError, - galaxy.sigma_to_fwhm, - np.negative(self.sigma_array), - ) - - npt.assert_raises( - ValueError, - galaxy.sigma_to_fwhm, - self.sigma_float, - -self.pixel_scale, - ) - - -@given(positive_floats, positive_floats) -def test_sigma_to_fwhm_is_linear_in_sigma_and_pixel_scale(sigma, pixel_scale): - - fwhm = galaxy.sigma_to_fwhm(sigma, pixel_scale) - - assert fwhm > 0.0 - npt.assert_allclose( - galaxy.sigma_to_fwhm(sigma) * pixel_scale, - fwhm, - rtol=1.0e-12, - ) - - -@given( - st.lists(positive_floats, min_size=1, max_size=20), - positive_floats, -) -def test_sigma_to_fwhm_preserves_array_shape_and_scale(sigmas, pixel_scale): - - sigma_array = np.array(sigmas, dtype=np.float64) - fwhm = galaxy.sigma_to_fwhm(sigma_array, pixel_scale) - - assert fwhm.shape == sigma_array.shape - assert np.all(fwhm > 0.0) - npt.assert_allclose(fwhm / sigma_array, fwhm[0] / sigma_array[0]) From 8ae32035cf2cd2ea49b4fa486b79220a36791102 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Sat, 20 Jun 2026 17:08:57 +0200 Subject: [PATCH 13/33] refactor(ngmix): build the per-family PSF res-keys from one loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 12 hand-written g*/T_psf_{reconv,orig} assignments collapse to a single template applied over (family, source) — same keys, same values, but the two PSF families are now generated symmetrically, so one can't gain or misname a column the other lacks. Addresses a drift/hardcoding risk Cail flagged in review. Co-Authored-By: Claude Opus 4.8 --- src/shapepipe/modules/ngmix_package/ngmix.py | 29 +++++++++----------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/src/shapepipe/modules/ngmix_package/ngmix.py b/src/shapepipe/modules/ngmix_package/ngmix.py index 33222106..cc7ab87b 100644 --- a/src/shapepipe/modules/ngmix_package/ngmix.py +++ b/src/shapepipe/modules/ngmix_package/ngmix.py @@ -708,22 +708,19 @@ def process(self): if res.get(k, {}).get('flags', 0) != 0 ) res['mcal_flags'] = get_mcal_flags(res) - # Two distinct PSF families (shapepipe#749), each with its own - # ellipticity AND size, written under self-naming res-keys: - # reconvolution kernel (psf_res) -> *_psf_reconv - # original image PSF (psf_orig_res) -> *_psf_orig - res['g1_psf_reconv'] = psf_res['g_psf'][0] - res['g2_psf_reconv'] = psf_res['g_psf'][1] - res['g1_err_psf_reconv'] = psf_res['g_psf_err'][0] - res['g2_err_psf_reconv'] = psf_res['g_psf_err'][1] - res['T_psf_reconv'] = psf_res['T_psf'] - res['T_err_psf_reconv'] = psf_res['T_psf_err'] - res['g1_psf_orig'] = psf_orig_res['g_psf'][0] - res['g2_psf_orig'] = psf_orig_res['g_psf'][1] - res['g1_err_psf_orig'] = psf_orig_res['g_psf_err'][0] - res['g2_err_psf_orig'] = psf_orig_res['g_psf_err'][1] - res['T_psf_orig'] = psf_orig_res['T_psf'] - res['T_err_psf_orig'] = psf_orig_res['T_psf_err'] + # Two distinct PSF families (shapepipe#749), each carrying its own + # ellipticity AND size: the metacal reconvolution kernel (psf_res) + # and the original image PSF (psf_orig_res). Tag both into res from + # ONE template so the families stay symmetric — same quantities, + # parallel ``*_psf_{family}`` names — and cannot drift apart by a + # hand-edit to one and not the other. + for family, psf in (("reconv", psf_res), ("orig", psf_orig_res)): + res[f"g1_psf_{family}"] = psf["g_psf"][0] + res[f"g2_psf_{family}"] = psf["g_psf"][1] + res[f"g1_err_psf_{family}"] = psf["g_psf_err"][0] + res[f"g2_err_psf_{family}"] = psf["g_psf_err"][1] + res[f"T_psf_{family}"] = psf["T_psf"] + res[f"T_err_psf_{family}"] = psf["T_psf_err"] final_res.append(res) n_fitted += 1 count_batch += 1 From 20878746d8222e1156364e65931810dae4d8d544 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Sat, 20 Jun 2026 17:23:08 +0200 Subject: [PATCH 14/33] refactor(make_cat): family-loop the PSF column write block The two parallel 6-call blocks (PSF_ORIG / PSF_RECONV) collapse to one template over (family, obj): the FITS column name and the res-key it reads are now generated from the same pair, so the write seam (where the grammar drift bit us) can't desync the two families. Behaviour-identical column set + values. Co-Authored-By: Claude Opus 4.8 --- .../modules/make_cat_package/make_cat.py | 85 +++++++------------ 1 file changed, 33 insertions(+), 52 deletions(-) diff --git a/src/shapepipe/modules/make_cat_package/make_cat.py b/src/shapepipe/modules/make_cat_package/make_cat.py index d9aaa5f2..1c7c5a6b 100644 --- a/src/shapepipe/modules/make_cat_package/make_cat.py +++ b/src/shapepipe/modules/make_cat_package/make_cat.py @@ -484,58 +484,39 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): ncf_data["flags"][ind[0]], idx ) - # Original image PSF (see ngmix.average_original_psf). - self._add2dict( - f"{prefix}_G1_PSF_ORIG_{key}", - ncf_data["g1_psf_orig"][ind[0]], idx - ) - self._add2dict( - f"{prefix}_G2_PSF_ORIG_{key}", - ncf_data["g2_psf_orig"][ind[0]], idx - ) - self._add2dict( - f"{prefix}_G1_ERR_PSF_ORIG_{key}", - ncf_data["g1_err_psf_orig"][ind[0]], idx - ) - self._add2dict( - f"{prefix}_G2_ERR_PSF_ORIG_{key}", - ncf_data["g2_err_psf_orig"][ind[0]], idx - ) - self._add2dict( - f"{prefix}_T_PSF_ORIG_{key}", - ncf_data["T_psf_orig"][ind[0]], idx - ) - self._add2dict( - f"{prefix}_T_ERR_PSF_ORIG_{key}", - ncf_data["T_err_psf_orig"][ind[0]], idx - ) - - # Metacal reconvolution kernel (see - # ngmix.average_multiepoch_psf). - self._add2dict( - f"{prefix}_G1_PSF_RECONV_{key}", - ncf_data["g1_psf_reconv"][ind[0]], idx - ) - self._add2dict( - f"{prefix}_G2_PSF_RECONV_{key}", - ncf_data["g2_psf_reconv"][ind[0]], idx - ) - self._add2dict( - f"{prefix}_G1_ERR_PSF_RECONV_{key}", - ncf_data["g1_err_psf_reconv"][ind[0]], idx - ) - self._add2dict( - f"{prefix}_G2_ERR_PSF_RECONV_{key}", - ncf_data["g2_err_psf_reconv"][ind[0]], idx - ) - self._add2dict( - f"{prefix}_T_PSF_RECONV_{key}", - ncf_data["T_psf_reconv"][ind[0]], idx - ) - self._add2dict( - f"{prefix}_T_ERR_PSF_RECONV_{key}", - ncf_data["T_err_psf_reconv"][ind[0]], idx - ) + # Original image PSF (average_original_psf) and metacal + # reconvolution kernel (average_multiepoch_psf). Both PSF + # families share ONE write template, so the FITS column + # name (``{obj}``) and the res-key it reads (``{family}``) + # are generated from the same pair and cannot drift apart. + for family, obj in ( + ("orig", "PSF_ORIG"), + ("reconv", "PSF_RECONV"), + ): + self._add2dict( + f"{prefix}_G1_{obj}_{key}", + ncf_data[f"g1_psf_{family}"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G2_{obj}_{key}", + ncf_data[f"g2_psf_{family}"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G1_ERR_{obj}_{key}", + ncf_data[f"g1_err_psf_{family}"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_G2_ERR_{obj}_{key}", + ncf_data[f"g2_err_psf_{family}"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_T_{obj}_{key}", + ncf_data[f"T_psf_{family}"][ind[0]], idx + ) + self._add2dict( + f"{prefix}_T_ERR_{obj}_{key}", + ncf_data[f"T_err_psf_{family}"][ind[0]], idx + ) self._add2dict( f"NGMIX{m}_MCAL_FLAGS", From ade9cf323cf065a86fa527b205b1424b2101eca2 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Tue, 30 Jun 2026 22:01:05 +0200 Subject: [PATCH 15/33] docs(CLAUDE): PYTHONPATH-shadow recipe for testing a library build in-container MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document how to run the container against a different pure-Python library build (a worktree, an unreleased branch, this repo's own src/ against a stale image) by prepending the host checkout to PYTHONPATH — the local-testing counterpart of a git-ref dependency like `cs_util @ develop`. Includes the -o addopts='' tip for when a pytest plugin the config references isn't in the image. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01MgDK9SKn2PsDZGEbr4WFb3 --- CLAUDE.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index c80029b1..aaec7943 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -36,6 +36,30 @@ way to get all of that is the container. sandbox with a host clone of the repo bind-mounted in and `pip install -e` pointed at it, so edits on the host are live inside the container. +**Shadow a library build with `PYTHONPATH` (no image rebuild).** When you need +the container to run a *different* build of a pure-Python library than the one +baked into its venv — a feature worktree, an unreleased branch, this repo's own +`src/` against a stale image — prepend the host checkout to `PYTHONPATH`. Python +resolves the prepended path first, so the on-disk version shadows +`/app/.venv/...` without touching the image. This is the local-testing +counterpart of a git-ref dependency (e.g. `cs_util @ develop` in +`pyproject.toml`): the dep change makes CI build the right version; the shadow +lets you test that version *now*, before any rebuild. The recipe: + +```bash +apptainer exec --bind /n17data,/automnt bash -c \ + "cd && \ + PYTHONPATH=/path/to/libfoo-checkout:/src \ + python -m pytest -o addopts='' -q" +``` + +Notes: the checkout path is the **parent** of the importable package dir (the +dir containing `foo/`, not `foo/` itself); list several `:`-separated to stack +shadows; `-o addopts=''` clears `pyproject.toml`'s pytest defaults when a plugin +they reference (e.g. `pytest-cov`) isn't in the image. Use this for a quick +verify; land the real fix as the `pyproject.toml` / `uv.lock` dep change so CI +and the next image agree. + **Testing container changes: build remotely, pull locally.** Don't `apptainer build` images on a cluster — quotas are tight and the build is slow. The loop for any change to `Dockerfile` / `pyproject.toml` / `uv.lock` is: edit From 98e822ad953a3c3b5a785c8a3691e07b7fd47593 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Tue, 30 Jun 2026 22:22:40 +0200 Subject: [PATCH 16/33] fix(hsm): store true e-type distortion in E*_HSM columns, not g-type shear MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SCIENCE-AFFECTING — the one value change in the column-grammar refactor. The HSM star/PSF moment columns are named E1/E2_*_HSM (e-type, the distortion convention the star/PSF residual ρ/τ statistics expect), but the producers were storing galsim's reduced shear `observed_shape.g1/.g2` (g-type). Name and value disagreed. This swaps the five producer sites to `observed_shape.e1/.e2` so the stored value is the true e-type distortion (|e| = 2|g|/(1+|g|²)), matching the column name. Sites (galsim.Shear.e1/.e2, distortion): psfex_interp.py _get_psfshapes (325-326), _get_starshapes (440-441) mccd_interpolation_script.py PSF (222-223), STAR (280-281) shapepipe_auxiliary_mccd.py PSF (421-422) Closes Sacha's review point (#761): HSM is e-type, ngmix is g-type — name and value must agree. Downstream consequence: ρ/τ-statistic numbers change. Fabian /Axel to confirm the e-type convention is what the residual stats want. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01MgDK9SKn2PsDZGEbr4WFb3 --- .../modules/mccd_package/mccd_interpolation_script.py | 8 ++++---- .../modules/mccd_package/shapepipe_auxiliary_mccd.py | 4 ++-- .../modules/psfex_interp_package/psfex_interp.py | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py b/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py index d6625bc3..4626ff25 100644 --- a/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py +++ b/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py @@ -219,8 +219,8 @@ def _get_psfshapes(self): self.psf_shapes = np.array( [ [ - moms.observed_shape.g1, - moms.observed_shape.g2, + moms.observed_shape.e1, + moms.observed_shape.e2, moms.moments_sigma, int(bool(moms.error_message)), ] @@ -277,8 +277,8 @@ def _get_starshapes(self, star_vign): self.star_shapes = np.array( [ [ - moms.observed_shape.g1, - moms.observed_shape.g2, + moms.observed_shape.e1, + moms.observed_shape.e2, moms.moments_sigma, int(bool(moms.error_message)), ] diff --git a/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py b/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py index 44b7cf55..cc38b407 100644 --- a/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py +++ b/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py @@ -418,8 +418,8 @@ def mccd_interpolation_pipeline( PSF_shapes = np.array( [ [ - moms.observed_shape.g1, - moms.observed_shape.g2, + moms.observed_shape.e1, + moms.observed_shape.e2, moms.moments_sigma, int(bool(moms.error_message)), ] diff --git a/src/shapepipe/modules/psfex_interp_package/psfex_interp.py b/src/shapepipe/modules/psfex_interp_package/psfex_interp.py index 956092f2..2f1a2db8 100644 --- a/src/shapepipe/modules/psfex_interp_package/psfex_interp.py +++ b/src/shapepipe/modules/psfex_interp_package/psfex_interp.py @@ -322,8 +322,8 @@ def _get_psfshapes(self): self.psf_shapes = np.array( [ [ - moms.observed_shape.g1, - moms.observed_shape.g2, + moms.observed_shape.e1, + moms.observed_shape.e2, moms.moments_sigma, int(bool(moms.error_message)), ] @@ -437,8 +437,8 @@ def _get_starshapes(self, star_vign): self.star_shapes = np.array( [ [ - moms.observed_shape.g1, - moms.observed_shape.g2, + moms.observed_shape.e1, + moms.observed_shape.e2, moms.moments_sigma, int(bool(moms.error_message)), ] From 4b17f7ec769306f0e3fd8a586286cd3547e10f88 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Tue, 30 Jun 2026 22:47:25 +0200 Subject: [PATCH 17/33] refactor(hsm): one column grammar for HSM star/PSF moments, size stored as T Rename every HSM moment column to the ShapePipe-v2 grammar ESTIMATOR_COMPONENT_OBJECT and store size as T = 2*sigma^2 (an area) sourced from cs_util.size, across the PSFEx and MCCD producers and their consumers: E1/E2_PSF_HSM -> HSM_E1/E2_PSF E1/E2_STAR_HSM -> HSM_E1/E2_STAR SIGMA_PSF/STAR_HSM -> HSM_T_PSF / HSM_T_STAR (value now T, not sigma) FLAG_PSF/STAR_HSM -> HSM_FLAG_PSF / HSM_FLAG_STAR Size is converted at the earliest honest point: the producers (psfex_interp, mccd_interpolation_script, shapepipe_auxiliary_mccd) store cs_size.sigma_to_T under the HSM_T_* keys -- in the FITS columns AND the SHAPES SqliteDict -- so every consumer reads T straight through. The hand-coded conversions in the consumers drop out: merge_starcat's np.sqrt(size) / **2 and mccd_plot_utilities' 2*sigma^2 arithmetic become plain HSM_T_* reads. One intended numeric change: the merge_starcat MCCD branch derives its output size from the external PSF_MOM_LIST/STAR_MOM_LIST[:,2] (a sigma), now via cs_size.sigma_to_T -- so that validation catalogue's size column changes from sigma^2 to T = 2*sigma^2. Extends test_psfex_interp with an HSM e-type property test (elliptical-PSF fixture, since e vs g vanishes on round shapes) and a producer test pinning the new grammar + T routing. Downstream consumers (sp_validation) break on both the rename and the sigma->T semantics; that ripple is absorbed by the separate #205 PR. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01MgDK9SKn2PsDZGEbr4WFb3 --- .../mccd_package/mccd_interpolation_script.py | 37 ++--- .../mccd_package/mccd_plot_utilities.py | 18 +-- .../mccd_package/shapepipe_auxiliary_mccd.py | 9 +- .../merge_starcat_package/merge_starcat.py | 67 +++++---- .../psfex_interp_package/psfex_interp.py | 37 ++--- tests/module/test_psfex_interp.py | 141 ++++++++++++++++++ 6 files changed, 228 insertions(+), 81 deletions(-) diff --git a/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py b/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py index 4626ff25..ddce0194 100644 --- a/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py +++ b/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py @@ -10,6 +10,7 @@ import mccd import numpy as np +from cs_util import size as cs_size from sqlitedict import SqliteDict from shapepipe.pipeline import file_io @@ -243,10 +244,10 @@ def _write_output(self): if self._compute_shape: data = { "VIGNET": self.interp_PSFs, - "E1_PSF_HSM": self.psf_shapes[:, 0], - "E2_PSF_HSM": self.psf_shapes[:, 1], - "SIGMA_PSF_HSM": self.psf_shapes[:, 2], - "FLAG_PSF_HSM": self.psf_shapes[:, 3].astype(int), + "HSM_E1_PSF": self.psf_shapes[:, 0], + "HSM_E2_PSF": self.psf_shapes[:, 1], + "HSM_T_PSF": cs_size.sigma_to_T(self.psf_shapes[:, 2]), + "HSM_FLAG_PSF": self.psf_shapes[:, 3].astype(int), } else: data = {"VIGNET": self.interp_PSFs} @@ -339,14 +340,14 @@ def _write_output_validation(self, star_dict, psfex_cat_dict): ) data = { - "E1_PSF_HSM": self.psf_shapes[:, 0], - "E2_PSF_HSM": self.psf_shapes[:, 1], - "SIGMA_PSF_HSM": self.psf_shapes[:, 2], - "FLAG_PSF_HSM": self.psf_shapes[:, 3].astype(int), - "E1_STAR_HSM": self.star_shapes[:, 0], - "E2_STAR_HSM": self.star_shapes[:, 1], - "SIGMA_STAR_HSM": self.star_shapes[:, 2], - "FLAG_STAR_HSM": self.star_shapes[:, 3].astype(int), + "HSM_E1_PSF": self.psf_shapes[:, 0], + "HSM_E2_PSF": self.psf_shapes[:, 1], + "HSM_T_PSF": cs_size.sigma_to_T(self.psf_shapes[:, 2]), + "HSM_FLAG_PSF": self.psf_shapes[:, 3].astype(int), + "HSM_E1_STAR": self.star_shapes[:, 0], + "HSM_E2_STAR": self.star_shapes[:, 1], + "HSM_T_STAR": cs_size.sigma_to_T(self.star_shapes[:, 2]), + "HSM_FLAG_STAR": self.star_shapes[:, 3].astype(int), } data = {**data, **star_dict} @@ -536,16 +537,16 @@ def _interpolate_me(self): ] = final_list[j][1][where_res[0]] if self._compute_shape: shape_dict = {} - shape_dict["E1_PSF_HSM"] = final_list[j][2][ + shape_dict["HSM_E1_PSF"] = final_list[j][2][ where_res[0] ][0] - shape_dict["E2_PSF_HSM"] = final_list[j][2][ + shape_dict["HSM_E2_PSF"] = final_list[j][2][ where_res[0] ][1] - shape_dict["SIGMA_PSF_HSM"] = final_list[j][2][ - where_res[0] - ][2] - shape_dict["FLAG_PSF_HSM"] = final_list[j][2][ + shape_dict["HSM_T_PSF"] = cs_size.sigma_to_T( + final_list[j][2][where_res[0]][2] + ) + shape_dict["HSM_FLAG_PSF"] = final_list[j][2][ where_res[0] ][3] output_dict[id_tmp][final_list[j][3][where_res[0]]][ diff --git a/src/shapepipe/modules/mccd_package/mccd_plot_utilities.py b/src/shapepipe/modules/mccd_package/mccd_plot_utilities.py index cc20b651..1cf33ff8 100644 --- a/src/shapepipe/modules/mccd_package/mccd_plot_utilities.py +++ b/src/shapepipe/modules/mccd_package/mccd_plot_utilities.py @@ -254,23 +254,23 @@ def plot_meanshapes( ) # Flag mask - star_flags = starcat[hdu_no].data["FLAG_STAR_HSM"] - psf_flags = starcat[hdu_no].data["FLAG_PSF_HSM"] + star_flags = starcat[hdu_no].data["HSM_FLAG_STAR"] + psf_flags = starcat[hdu_no].data["HSM_FLAG_PSF"] flagmask = np.abs(star_flags - 1) * np.abs(psf_flags - 1) - # convert sigma to R^2's + # size column already holds T = 2 sigma^2 all_star_shapes = np.array( [ - starcat[hdu_no].data["E1_STAR_HSM"], - starcat[hdu_no].data["E2_STAR_HSM"], - 2.0 * starcat[hdu_no].data["SIGMA_STAR_HSM"] ** 2, + starcat[hdu_no].data["HSM_E1_STAR"], + starcat[hdu_no].data["HSM_E2_STAR"], + starcat[hdu_no].data["HSM_T_STAR"], ] ) all_psf_shapes = np.array( [ - starcat[hdu_no].data["E1_PSF_HSM"], - starcat[hdu_no].data["E2_PSF_HSM"], - 2.0 * starcat[hdu_no].data["SIGMA_PSF_HSM"] ** 2, + starcat[hdu_no].data["HSM_E1_PSF"], + starcat[hdu_no].data["HSM_E2_PSF"], + starcat[hdu_no].data["HSM_T_PSF"], ] ) all_CCDs = starcat[hdu_no].data["CCD_NB"] diff --git a/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py b/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py index cc38b407..99dea6b9 100644 --- a/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py +++ b/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py @@ -14,6 +14,7 @@ import mccd import numpy as np from astropy.io import fits +from cs_util import size as cs_size from shapepipe.pipeline import file_io @@ -471,10 +472,10 @@ def shapepipe_write_output( if get_shapes: data = { "VIGNET": interp_PSFs, - "E1_PSF_HSM": PSF_shapes[:, 0], - "E2_PSF_HSM": PSF_shapes[:, 1], - "SIGMA_PSF_HSM": PSF_shapes[:, 2], - "FLAG_PSF_HSM": PSF_shapes[:, 3].astype(int), + "HSM_E1_PSF": PSF_shapes[:, 0], + "HSM_E2_PSF": PSF_shapes[:, 1], + "HSM_T_PSF": cs_size.sigma_to_T(PSF_shapes[:, 2]), + "HSM_FLAG_PSF": PSF_shapes[:, 3].astype(int), } else: data = {"VIGNET": interp_PSFs} diff --git a/src/shapepipe/modules/merge_starcat_package/merge_starcat.py b/src/shapepipe/modules/merge_starcat_package/merge_starcat.py index 3b06cd07..c5d3eccd 100644 --- a/src/shapepipe/modules/merge_starcat_package/merge_starcat.py +++ b/src/shapepipe/modules/merge_starcat_package/merge_starcat.py @@ -12,6 +12,7 @@ import numpy as np from astropy.io import fits +from cs_util import size as cs_size from shapepipe.pipeline import file_io @@ -344,7 +345,7 @@ def process(self): ) ) - # shapes (convert sigmas to R^2) + # shapes (convert sigmas to T = 2 sigma^2) g1_psf += list( starcat_j[self._hdu_table].data["PSF_MOM_LIST"][:, 0] ) @@ -352,12 +353,16 @@ def process(self): starcat_j[self._hdu_table].data["PSF_MOM_LIST"][:, 1] ) size_psf += list( - starcat_j[self._hdu_table].data["PSF_MOM_LIST"][:, 2] ** 2 + cs_size.sigma_to_T( + starcat_j[self._hdu_table].data["PSF_MOM_LIST"][:, 2] + ) ) g1 += list(starcat_j[self._hdu_table].data["STAR_MOM_LIST"][:, 0]) g2 += list(starcat_j[self._hdu_table].data["STAR_MOM_LIST"][:, 1]) size += list( - starcat_j[self._hdu_table].data["STAR_MOM_LIST"][:, 2] ** 2 + cs_size.sigma_to_T( + starcat_j[self._hdu_table].data["STAR_MOM_LIST"][:, 2] + ) ) # flags @@ -479,21 +484,20 @@ def process(self): SEx_catalogue=True, ) - # Collect columns - # convert back to sigma for consistency + # Collect columns (size stored as T = 2 sigma^2) data = { "X": x, "Y": y, "RA": ra, "DEC": dec, - "E1_PSF_HSM": g1_psf, - "E2_PSF_HSM": g2_psf, - "SIGMA_PSF_HSM": np.sqrt(size_psf), - "E1_STAR_HSM": g1, - "E2_STAR_HSM": g2, - "SIGMA_STAR_HSM": np.sqrt(size), - "FLAG_PSF_HSM": flag_psf, - "FLAG_STAR_HSM": flag_star, + "HSM_E1_PSF": g1_psf, + "HSM_E2_PSF": g2_psf, + "HSM_T_PSF": size_psf, + "HSM_E1_STAR": g1, + "HSM_E2_STAR": g2, + "HSM_T_STAR": size, + "HSM_FLAG_PSF": flag_psf, + "HSM_FLAG_STAR": flag_star, "CCD_NB": ccd_nb, } @@ -580,17 +584,17 @@ def process(self): ra += list(data_j["RA"]) dec += list(data_j["DEC"]) - # shapes (convert sigmas to R^2) - g1_psf += list(data_j["E1_PSF_HSM"]) - g2_psf += list(data_j["E2_PSF_HSM"]) - size_psf += list(data_j["SIGMA_PSF_HSM"] ** 2) - g1 += list(data_j["E1_STAR_HSM"]) - g2 += list(data_j["E2_STAR_HSM"]) - size += list(data_j["SIGMA_STAR_HSM"] ** 2) + # shapes (size column already holds T = 2 sigma^2) + g1_psf += list(data_j["HSM_E1_PSF"]) + g2_psf += list(data_j["HSM_E2_PSF"]) + size_psf += list(data_j["HSM_T_PSF"]) + g1 += list(data_j["HSM_E1_STAR"]) + g2 += list(data_j["HSM_E2_STAR"]) + size += list(data_j["HSM_T_STAR"]) # flags - flag_psf += list(data_j["FLAG_PSF_HSM"]) - flag_star += list(data_j["FLAG_STAR_HSM"]) + flag_psf += list(data_j["HSM_FLAG_PSF"]) + flag_star += list(data_j["HSM_FLAG_STAR"]) # misc @@ -623,21 +627,20 @@ def process(self): SEx_catalogue=False, ) - # Collect columns - # convert back to sigma for consistency + # Collect columns (size stored as T = 2 sigma^2) data = { "X": x, "Y": y, "RA": ra, "DEC": dec, - "E1_PSF_HSM": g1_psf, - "E2_PSF_HSM": g2_psf, - "SIGMA_PSF_HSM": np.sqrt(size_psf), - "E1_STAR_HSM": g1, - "E2_STAR_HSM": g2, - "SIGMA_STAR_HSM": np.sqrt(size), - "FLAG_PSF_HSM": flag_psf, - "FLAG_STAR_HSM": flag_star, + "HSM_E1_PSF": g1_psf, + "HSM_E2_PSF": g2_psf, + "HSM_T_PSF": size_psf, + "HSM_E1_STAR": g1, + "HSM_E2_STAR": g2, + "HSM_T_STAR": size, + "HSM_FLAG_PSF": flag_psf, + "HSM_FLAG_STAR": flag_star, "MAG": mag, "SNR": snr, "ACCEPTED": psfex_acc, diff --git a/src/shapepipe/modules/psfex_interp_package/psfex_interp.py b/src/shapepipe/modules/psfex_interp_package/psfex_interp.py index 2f1a2db8..7428281d 100644 --- a/src/shapepipe/modules/psfex_interp_package/psfex_interp.py +++ b/src/shapepipe/modules/psfex_interp_package/psfex_interp.py @@ -11,6 +11,7 @@ import numpy as np from astropy.io import fits +from cs_util import size as cs_size from sqlitedict import SqliteDict from shapepipe.pipeline import file_io @@ -346,10 +347,10 @@ def _write_output(self): if self._compute_shape: data = { "VIGNET": self.interp_PSFs, - "E1_PSF_HSM": self.psf_shapes[:, 0], - "E2_PSF_HSM": self.psf_shapes[:, 1], - "SIGMA_PSF_HSM": self.psf_shapes[:, 2], - "FLAG_PSF_HSM": self.psf_shapes[:, 3].astype(int), + "HSM_E1_PSF": self.psf_shapes[:, 0], + "HSM_E2_PSF": self.psf_shapes[:, 1], + "HSM_T_PSF": cs_size.sigma_to_T(self.psf_shapes[:, 2]), + "HSM_FLAG_PSF": self.psf_shapes[:, 3].astype(int), } else: data = {"VIGNET": self.interp_PSFs} @@ -499,14 +500,14 @@ def _write_output_validation(self, star_dict, psfex_cat_dict): ) data = { - "E1_PSF_HSM": self.psf_shapes[:, 0], - "E2_PSF_HSM": self.psf_shapes[:, 1], - "SIGMA_PSF_HSM": self.psf_shapes[:, 2], - "FLAG_PSF_HSM": self.psf_shapes[:, 3].astype(int), - "E1_STAR_HSM": self.star_shapes[:, 0], - "E2_STAR_HSM": self.star_shapes[:, 1], - "SIGMA_STAR_HSM": self.star_shapes[:, 2], - "FLAG_STAR_HSM": self.star_shapes[:, 3].astype(int), + "HSM_E1_PSF": self.psf_shapes[:, 0], + "HSM_E2_PSF": self.psf_shapes[:, 1], + "HSM_T_PSF": cs_size.sigma_to_T(self.psf_shapes[:, 2]), + "HSM_FLAG_PSF": self.psf_shapes[:, 3].astype(int), + "HSM_E1_STAR": self.star_shapes[:, 0], + "HSM_E2_STAR": self.star_shapes[:, 1], + "HSM_T_STAR": cs_size.sigma_to_T(self.star_shapes[:, 2]), + "HSM_FLAG_STAR": self.star_shapes[:, 3].astype(int), } data = {**data, **star_dict} @@ -754,16 +755,16 @@ def _interpolate_me(self): ] = final_list[j][1][where_res[0]] if self._compute_shape: shape_dict = {} - shape_dict["E1_PSF_HSM"] = final_list[j][2][ + shape_dict["HSM_E1_PSF"] = final_list[j][2][ where_res[0] ][0] - shape_dict["E2_PSF_HSM"] = final_list[j][2][ + shape_dict["HSM_E2_PSF"] = final_list[j][2][ where_res[0] ][1] - shape_dict["SIGMA_PSF_HSM"] = final_list[j][2][ - where_res[0] - ][2] - shape_dict["FLAG_PSF_HSM"] = final_list[j][2][ + shape_dict["HSM_T_PSF"] = cs_size.sigma_to_T( + final_list[j][2][where_res[0]][2] + ) + shape_dict["HSM_FLAG_PSF"] = final_list[j][2][ where_res[0] ][3] output_dict[id_tmp][final_list[j][3][where_res[0]]][ diff --git a/tests/module/test_psfex_interp.py b/tests/module/test_psfex_interp.py index 34da52e0..cb644875 100644 --- a/tests/module/test_psfex_interp.py +++ b/tests/module/test_psfex_interp.py @@ -2,11 +2,15 @@ from types import SimpleNamespace +import galsim from astropy.io import fits import numpy as np import numpy.testing as npt import pytest +from hypothesis import assume, given, settings +from hypothesis import strategies as st +from shapepipe.pipeline import file_io from shapepipe.modules.psfex_interp_package.psfex_interp import ( PSFExInterpolator, ) @@ -32,6 +36,45 @@ def _write_galaxy_catalogue(path, **columns): ).writeto(path) +def _elliptical_psf_stamps(shears, sigma=3.0, stamp=51, scale=1.0): + """Draw elliptical Gaussian PSF stamps with given e-type distortions. + + Each ``(e1, e2)`` is applied as a galsim *distortion* (``shear(e1=, e2=)``) + so the stamp is genuinely non-round; round stamps make ``e == g == 0`` and + the e/g grammar fix becomes invisible. + """ + return np.array( + [ + galsim.Gaussian(sigma=sigma) + .shear(e1=e1, e2=e2) + .drawImage(nx=stamp, ny=stamp, scale=scale, method="no_pixel") + .array + for e1, e2 in shears + ] + ) + + +@pytest.fixture(scope="module") +def psfex_interpolator(tmp_path_factory): + """A shape-computing interpolator backed by a minimal galaxy catalogue. + + Module-scoped so it can be driven by the hypothesis property test without + tripping the function-scoped-fixture health check; ``_get_psfshapes`` only + reads ``interp_PSFs`` and overwrites ``psf_shapes``, so reuse is safe. + """ + cat_path = tmp_path_factory.mktemp("psfex") / "galaxies.fits" + _write_galaxy_catalogue(cat_path, X_IMAGE=[1.5, 2.5], Y_IMAGE=[3.5, 4.5]) + return PSFExInterpolator( + dotpsf_path=None, + galcat_path=str(cat_path), + output_path=str(cat_path.parent), + img_number="001", + w_log=SimpleNamespace(info=lambda message: None), + pos_params=["X_IMAGE", "Y_IMAGE"], + get_shapes=True, + ) + + def test_get_galaxy_positions_reads_requested_columns(tmp_path): cat_path = tmp_path / "galaxies.fits" @@ -66,3 +109,101 @@ def test_get_galaxy_positions_raises_for_missing_column(tmp_path): with pytest.raises(KeyError, match="Y_IMAGE"): interp._get_galaxy_positions() + + +# Distortion magnitude well inside |e| < 1, large enough that the e-type and +# g-type representations of the same shape differ by far more than fit noise +# (so the property has teeth), any orientation. +_e_complex = st.complex_numbers( + min_magnitude=0.05, max_magnitude=0.6, allow_nan=False, allow_infinity=False +) + + +@settings(deadline=None, max_examples=25) +@given(e=_e_complex) +def test_get_psfshapes_stores_e_type_distortion(psfex_interpolator, e): + """The stored HSM PSF shapes are e-type distortion, not g-type shear. + + ``_get_psfshapes`` reads ``moms.observed_shape.e1/.e2``; this pins it to the + galsim *distortion* accessor and away from the reduced-shear ``.g1/.g2``. + The check is exact against the same image's ``observed_shape`` (independent + of how well HSM recovers the input), and the magnitude inequality is the + tooth: for any non-round shape ``|e| > |g|`` strictly, so storing ``.g`` + instead (the pre-fix bug) would fail it. + """ + psfex_interpolator.interp_PSFs = _elliptical_psf_stamps([(e.real, e.imag)]) + psfex_interpolator._get_psfshapes() + + moms = galsim.hsm.FindAdaptiveMom( + galsim.Image(psfex_interpolator.interp_PSFs[0]), strict=False + ) + e1, e2 = moms.observed_shape.e1, moms.observed_shape.e2 + g1, g2 = moms.observed_shape.g1, moms.observed_shape.g2 + + mag_e = np.hypot(e1, e2) + mag_g = np.hypot(g1, g2) + assume(mag_e - mag_g > 1e-3) # ensure a non-degenerate e-vs-g distinction + + # Stored values are exactly the e-type distortion components ... + npt.assert_array_equal(psfex_interpolator.psf_shapes[0, 0], e1) + npt.assert_array_equal(psfex_interpolator.psf_shapes[0, 1], e2) + + # ... whose magnitude strictly exceeds the g-type shear it would carry if + # the .g/.e swap regressed. + mag_stored = np.hypot( + psfex_interpolator.psf_shapes[0, 0], psfex_interpolator.psf_shapes[0, 1] + ) + npt.assert_allclose(mag_stored, mag_e) + assert mag_stored > mag_g + 1e-4 + + +def test_write_output_uses_hsm_grammar_and_routes_size_to_T( + tmp_path, monkeypatch +): + """``_write_output`` writes the HSM grammar names and stores T, not sigma. + + Captures the dict handed to ``save_as_fits`` and asserts the frozen + grammar (``HSM_E1_PSF`` / ``HSM_E2_PSF`` / ``HSM_T_PSF`` / ``HSM_FLAG_PSF``, + no legacy ``*_PSF_HSM``) and that the size column is routed through + ``cs_util.size.sigma_to_T`` (``T = 2 sigma^2``) at the producer. + """ + cat_path = tmp_path / "galaxies.fits" + _write_galaxy_catalogue(cat_path, X_IMAGE=[1.5, 2.5], Y_IMAGE=[3.5, 4.5]) + interp = PSFExInterpolator( + dotpsf_path=None, + galcat_path=str(cat_path), + output_path=str(tmp_path), + img_number="001", + w_log=SimpleNamespace(info=lambda message: None), + pos_params=["X_IMAGE", "Y_IMAGE"], + get_shapes=True, + ) + interp.interp_PSFs = _elliptical_psf_stamps([(0.3, 0.1), (-0.2, 0.25)]) + interp._get_psfshapes() + + captured = {} + + def _capture(self, data=None, **kwargs): + captured["data"] = data + + monkeypatch.setattr(file_io.FITSCatalogue, "save_as_fits", _capture) + + interp._write_output() + data = captured["data"] + + assert { + "VIGNET", + "HSM_E1_PSF", + "HSM_E2_PSF", + "HSM_T_PSF", + "HSM_FLAG_PSF", + } <= set(data) + for legacy in ("E1_PSF_HSM", "E2_PSF_HSM", "SIGMA_PSF_HSM", "FLAG_PSF_HSM"): + assert legacy not in data + + # e-type distortion stored straight through from _get_psfshapes ... + npt.assert_array_equal(data["HSM_E1_PSF"], interp.psf_shapes[:, 0]) + npt.assert_array_equal(data["HSM_E2_PSF"], interp.psf_shapes[:, 1]) + # ... and the size column is T = 2 sigma^2 (sigma_to_T), not raw sigma. + npt.assert_allclose(data["HSM_T_PSF"], 2.0 * interp.psf_shapes[:, 2] ** 2) + assert not np.allclose(data["HSM_T_PSF"], interp.psf_shapes[:, 2]) From 85a18ab558e445d120f5bb09bab56ce7eb66801d Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Tue, 30 Jun 2026 22:47:44 +0200 Subject: [PATCH 18/33] refactor: remove the spread_model pipeline module (unused, never cut on) Axel's review point #2: spread_model must never be cut on -- and it isn't used as a pipeline step. Delete the module and unwire it: - delete spread_model_package/ and spread_model_runner.py (only the runner imported the package, so the removal is self-contained) - make_cat_runner: drop spread_model_runner from the input modules, drop the sexcat_sm input pattern/ext and the SM_* config, remove the save_sm_data() call, and collapse the now-dead 3/4/5-input branching (the len==5 galsim-dual-shape path was dormant -- no galsim_shapes_runner exists) - example configs, module docstring, pipeline tutorial: drop spread_model The make_cat-side writer (save_sm_data) and its SPREAD_* columns are removed in the column-grammar commit that follows. summary_params_pre_v2.py keeps its historical v1 runner list untouched. Downstream, sp_validation cuts on SPREAD_MODEL (classification_galaxy_base, do_spread_model default True) -- that paired change rides the separate #205 PR. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01MgDK9SKn2PsDZGEbr4WFb3 --- docs/source/pipeline_tutorial.md | 11 +- example/cfis/config_make_cat_mccd.ini | 13 +- example/cfis/config_make_cat_psfex.ini | 10 +- example/cfis/config_make_cat_psfex_nosm.ini | 2 - .../modules/make_cat_package/__init__.py | 20 +- src/shapepipe/modules/make_cat_runner.py | 69 +--- .../modules/spread_model_package/__init__.py | 36 -- .../spread_model_package/spread_model.py | 309 ------------------ src/shapepipe/modules/spread_model_runner.py | 69 ---- 9 files changed, 24 insertions(+), 515 deletions(-) delete mode 100644 src/shapepipe/modules/spread_model_package/__init__.py delete mode 100644 src/shapepipe/modules/spread_model_package/spread_model.py delete mode 100644 src/shapepipe/modules/spread_model_runner.py diff --git a/docs/source/pipeline_tutorial.md b/docs/source/pipeline_tutorial.md index 95a0aa94..5279b5f9 100644 --- a/docs/source/pipeline_tutorial.md +++ b/docs/source/pipeline_tutorial.md @@ -289,16 +289,15 @@ job_sp TILE_ID -j 16 is the selection of galaxies as extended objects compared to the PSF. First, the PSF model is interpolated to galaxy positions, according to the PSF model with `psfex_interp` or `mccd_interp`. Next, postage stamps around galaxies -of the weights maps are created via `vignetmaker`. Then, the spread model -is computed by the `spread_model` module. Finally, postage stamps +of the weights maps are created via `vignetmaker`. Finally, postage stamps around galaxies of single-exposure data is extracted with another call to `vignetmaker`. The output directory is -- `run_sp_MiViSmVi` if the PSF model is `mccd`; -- `run_sp_tile_PsViSmVi` for the `PSFEx` PSF model. +- `run_sp_MiViVi` if the PSF model is `mccd`; +- `run_sp_tile_PsViVi` for the `PSFEx` PSF model. -This corresponds to the MCCD/PSFex interpolation (`Mi`/`Pi`), `vignetmaker` (`Vi`), `spread_model` (`Sm`), and the +This corresponds to the MCCD/PSFex interpolation (`Mi`/`Pi`), `vignetmaker` (`Vi`), and the second call to `vignetmaker` (`Vi`). @@ -325,7 +324,7 @@ job_sp TILE_ID -j 64 This task first merges the `NJOB` parallel `ngmix` output files from the previous step into one output file. Then, previously obtained information are pasted into a _final_ shape catalogue via `make_cat`. Included are galaxy detection and basic measurement parameters, the PSF model at -galaxy positions, the spread-model classification, and the shape measurement. +galaxy positions, and the shape measurement. Two output directories are created. The first one is `run_sp_Ms` for the `merge_sep` run. diff --git a/example/cfis/config_make_cat_mccd.ini b/example/cfis/config_make_cat_mccd.ini index 5aa33d27..4ac61823 100644 --- a/example/cfis/config_make_cat_mccd.ini +++ b/example/cfis/config_make_cat_mccd.ini @@ -34,7 +34,7 @@ LOG_NAME = log_sp RUN_LOG_NAME = log_run_sp # Input directory, containing input files, single string or list of names with length matching FILE_PATTERN -INPUT_DIR = last:sextractor_runner_run_1, last:spread_model_runner, last:mccd_interp_runner, last:merge_sep_cats_runner +INPUT_DIR = last:sextractor_runner_run_1, last:mccd_interp_runner, last:merge_sep_cats_runner # Output directory OUTPUT_DIR = ./output @@ -56,12 +56,10 @@ TIMEOUT = 96:00:00 # Input file pattern(s), list of strings with length matching number of expected input file types # Cannot contain wild cards -FILE_PATTERN = sexcat, sexcat_sm, galaxy_psf, ngmix -#, galsim +FILE_PATTERN = sexcat, galaxy_psf, ngmix # FILE_EXT (optional) list of string extensions to identify input files -FILE_EXT = .fits, .fits, .sqlite, .fits -#, .fits +FILE_EXT = .fits, .sqlite, .fits # Numbering convention, string that exemplifies a numbering pattern. # Matches input single exposures (with 'p' removed) @@ -69,9 +67,4 @@ FILE_EXT = .fits, .fits, .sqlite, .fits # sections below NUMBERING_SCHEME = -000-000 -SM_DO_CLASSIFICATION = True -SM_STAR_THRESH = 0.003 -SM_GAL_THRESH = 0.01 - SHAPE_MEASUREMENT_TYPE = ngmix -#, galsim diff --git a/example/cfis/config_make_cat_psfex.ini b/example/cfis/config_make_cat_psfex.ini index a7407d99..552c578e 100644 --- a/example/cfis/config_make_cat_psfex.ini +++ b/example/cfis/config_make_cat_psfex.ini @@ -55,14 +55,14 @@ TIMEOUT = 96:00:00 [MAKE_CAT_RUNNER] # Input directory, containing input files, single string or list of names with length matching FILE_PATTERN -INPUT_DIR = run_sp_tile_Sx:sextractor_runner, last:spread_model_runner, last:psfex_interp_runner, last:merge_sep_cats_runner +INPUT_DIR = run_sp_tile_Sx:sextractor_runner, last:psfex_interp_runner, last:merge_sep_cats_runner # Input file pattern(s), list of strings with length matching number of expected input file types # Cannot contain wild cards -FILE_PATTERN = sexcat, sexcat_sm, galaxy_psf, ngmix +FILE_PATTERN = sexcat, galaxy_psf, ngmix # FILE_EXT (optional) list of string extensions to identify input files -FILE_EXT = .fits, .fits, .sqlite, .fits +FILE_EXT = .fits, .sqlite, .fits # Numbering convention, string that exemplifies a numbering pattern. # Matches input single exposures (with 'p' removed) @@ -70,8 +70,4 @@ FILE_EXT = .fits, .fits, .sqlite, .fits # sections below NUMBERING_SCHEME = -000-000 -SM_DO_CLASSIFICATION = True -SM_STAR_THRESH = 0.003 -SM_GAL_THRESH = 0.01 - SHAPE_MEASUREMENT_TYPE = ngmix diff --git a/example/cfis/config_make_cat_psfex_nosm.ini b/example/cfis/config_make_cat_psfex_nosm.ini index 1983f91c..fc458232 100644 --- a/example/cfis/config_make_cat_psfex_nosm.ini +++ b/example/cfis/config_make_cat_psfex_nosm.ini @@ -71,6 +71,4 @@ FILE_EXT = .fits, .sqlite, .fits # sections below NUMBERING_SCHEME = -000-000 -SM_DO_CLASSIFICATION = False - SHAPE_MEASUREMENT_TYPE = ngmix diff --git a/src/shapepipe/modules/make_cat_package/__init__.py b/src/shapepipe/modules/make_cat_package/__init__.py index 1bd1cee5..84e6c97a 100644 --- a/src/shapepipe/modules/make_cat_package/__init__.py +++ b/src/shapepipe/modules/make_cat_package/__init__.py @@ -7,7 +7,6 @@ :Parent modules: - ``sextractor_runner`` -- ``spread_model_runner`` - ``psfex_interp_runner`` or ``mccd_interp_runner`` - ``ngmix_runner`` @@ -21,27 +20,12 @@ This module creates a *final* catalogue combining the output of various previous module runs. This gathers all relevant information on the measured galaxies for weak-lensing post-processing. This includes galaxy detection and -basic measurement parameters, the PSF model at galaxy positions, the -spread-model classification, and the shape measurement. +basic measurement parameters, the PSF model at galaxy positions, and the +shape measurement. Module-specific config file entries =================================== -SM_DO_CLASSIFICATION : bool, optional - Adds spread-model star/galaxy classification flag as the column - ``SPREAD_CLASS`` to the output if ``True`` -SM_STAR_THRESH : float, optional - Threshold :math:`s_{\rm star, thresh}` for star selection; object is - classified as a star if - :math:`|x s + 2 \sigma_s | < s_{\textrm{star, thresh}}` - where :math:`s` is the spread model and :math:`\sigma_s` is the spread - model error; default value is ``0.003`` -SM_GAL_THRESH : float, optional - Threshold :math:`s_{\rm gal, thresh}` for galaxy selection; object is - classified as a galaxy if - :math:`s + 2 \sigma_s > s_{\textrm{gal, thresh}}` where :math:`s` is the - spread model and :math:`\sigma_s` is the spread model error; default value - is ``0.01`` SHAPE_MEASUREMENT_TYPE : list Shape measurement method, valid options are ``ngmix`` and/or ``galsim`` SAVE_PSF_DATA : bool, optional diff --git a/src/shapepipe/modules/make_cat_runner.py b/src/shapepipe/modules/make_cat_runner.py index fa1fc43f..6ab6449d 100644 --- a/src/shapepipe/modules/make_cat_runner.py +++ b/src/shapepipe/modules/make_cat_runner.py @@ -16,17 +16,15 @@ version="1.1", input_module=[ "sextractor_runner", - "spread_model_runner", "psfex_interp_runner", "ngmix_runner", ], file_pattern=[ "tile_sexcat", - "sexcat_sm", "galaxy_psf", "ngmix", ], - file_ext=[".fits", ".fits", ".sqlite", ".fits"], + file_ext=[".fits", ".sqlite", ".fits"], depends=["numpy", "sqlitedict"], ) def make_cat_runner( @@ -39,37 +37,11 @@ def make_cat_runner( ): """Define The Make Catalogue Runner.""" # Set input file paths - if len(input_file_list) == 3: - # No spread model input - ( - tile_sexcat_path, - galaxy_psf_path, - shape1_cat_path, - ) = input_file_list - sexcat_sm_path = None - else: - # With spread model input - ( - tile_sexcat_path, - sexcat_sm_path, - galaxy_psf_path, - shape1_cat_path, - ) = input_file_list[0:4] - if len(input_file_list) == 5: - # With second shape catalogue input - shape2_cat_path = input_file_list[4] - - # Fetch classification options - do_classif = config.getboolean( - module_config_sec, - "SM_DO_CLASSIFICATION", - ) - if do_classif: - star_thresh = config.getfloat(module_config_sec, "SM_STAR_THRESH") - gal_thresh = config.getfloat(module_config_sec, "SM_GAL_THRESH") - else: - star_thresh = None - gal_thresh = None + ( + tile_sexcat_path, + galaxy_psf_path, + shape1_cat_path, + ) = input_file_list # Fetch shape measurement type shape_type_list = config.getlist( @@ -99,35 +71,16 @@ def make_cat_runner( n_obj = make_cat.save_sextractor_data(final_cat_file, tile_sexcat_path) cat_size_sextractor = n_obj - # Save spread-model data - if sexcat_sm_path is None: - w_log.info("No sm cat input, setting spread model to 99") - else: - w_log.info("Save spread-model data") - cat_size_sm = make_cat.save_sm_data( - final_cat_file, - sexcat_sm_path, - do_classif, - star_thresh, - gal_thresh, - n_obj=n_obj - ) - - if cat_size_sextractor != cat_size_sm: - w_log( - f"Warnign: SExtractor catalogue {tile_sexcat_path} has different size" - + f" ({cat_size_sextractor} than spread_model catalogue" - + f" {sexcat_sm_path} ({cat_size_sm})" - ) - # Save shape data sc_inst = make_cat.SaveCatalogue(final_cat_file, cat_size_sextractor, w_log) w_log.info("Save shape measurement data") for shape_type in shape_type_list: w_log.info(f"Save {shape_type.lower()} data") - cat_path = ( - shape2_cat_path if shape_type == "galsim" else shape1_cat_path - ) + # A second, galsim-specific shape catalogue path (shape2_cat_path) + # was never produced by any runner in this pipeline (no + # galsim_shapes_runner exists) — galsim shapes are read from the + # same input as ngmix (shape1_cat_path). + cat_path = shape1_cat_path err_msg = sc_inst.process(shape_type.lower(), cat_path) diff --git a/src/shapepipe/modules/spread_model_package/__init__.py b/src/shapepipe/modules/spread_model_package/__init__.py deleted file mode 100644 index 336283e7..00000000 --- a/src/shapepipe/modules/spread_model_package/__init__.py +++ /dev/null @@ -1,36 +0,0 @@ -"""SPREAD MODEL PACKAGE. - -This package contains the module for ``spread_model``. - -:Author: Axel Guinot - -:Parent modules: - -- ``sextractor_runner`` -- ``psfex_interp_runner`` or ``mccd_interp_runner`` - and ``vignetmaker_runner`` - -:Input: SExtractor output catalogue, PSFEx output catalogue and vignet files - -:Output: Updated SExtractor or new FITS file - -Description -=========== - -This module refines the galaxy sample that will be used for shape measurement -using the spread model method of :cite:`desai:12` and :cite:`mohr:12`. - -Module-specific config file entries -=================================== - -PREFIX : str, optional - Ouput file prefix -PIXEL_SCALE : float - Pixel scale in arcseconds -OUTPUT_MODE : str - Output mode, options are ``add`` to add the outputs to the input - SExtractor catalogue or ``new`` to generte a new output catalogue - -""" - -__all__ = ["spread_model"] diff --git a/src/shapepipe/modules/spread_model_package/spread_model.py b/src/shapepipe/modules/spread_model_package/spread_model.py deleted file mode 100644 index 1461ecce..00000000 --- a/src/shapepipe/modules/spread_model_package/spread_model.py +++ /dev/null @@ -1,309 +0,0 @@ -"""SPREAD MODEL. - -Class to compute the spread model, criterion to select galaxies - -:Author: Axel Guinot - -""" - -import galsim -import numpy as np -from cs_util import size as cs_size -from sqlitedict import SqliteDict - -from shapepipe.pipeline import file_io - - -def get_sm(obj_vign, psf_vign, model_vign, weight_vign): - """Get Spread Model. - - This method compute the spread moel for an object. - - Parameters - ---------- - obj_vign : numpy.ndarray - Vignet of the object - psf_vign : numpy.ndarray - Vignet of the gaussian model of the PSF - model_vign : numpy.ndarray - Vignet of the galaxy model - weight_vign : numpy.ndarray - Vignet of the weight at the object position - - Returns - ------- - tuple - Spread model and corresponding error values - - """ - # Mask invalid pixels - m = (obj_vign > -1e29) & (weight_vign > 0) - w = m.astype(float) - - # Set noise as inverse weight - noise_v = (1 / weight_vign).ravel() - - # Remove infinite noise pixels - noise_v[np.isinf(noise_v)] = 0 - - # Transform 2D vignets to 1D vectors - t_v = model_vign.ravel() - g_v = obj_vign.ravel() - psf_v = psf_vign.ravel() - w_v = w.ravel() - - # Compute scalar products used in spread model - tg = np.sum(t_v * w_v * g_v) - pg = np.sum(psf_v * w_v * g_v) - tp = np.sum(t_v * w_v * psf_v) - pp = np.sum(psf_v * w_v * psf_v) - - tnt = np.sum(t_v * noise_v * t_v * w_v) - pnp = np.sum(psf_v * noise_v * psf_v * w_v) - tnp = np.sum(t_v * noise_v * psf_v * w_v) - err = tnt * pg**2 + pnp * tg**2 - 2 * tnp * pg * tg - - # Compute spread model - if pg > 0: - sm = (tg / pg) - (tp / pp) - else: - sm = 1 - - if (pg > 0) & (err > 0): - sm_err = np.sqrt(err) / pg**2 - else: - sm_err = 1 - - return sm, sm_err - - -def get_model(sigma, flux, img_shape, pixel_scale=0.186): - """Get Model. - - This method computes - - an exponential galaxy model with scale radius = 1/16 FWHM - - a Gaussian model for the PSF - - Parameters - ---------- - sigma : float - Sigma of the PSF (in pixel units) - flux : float - Flux of the galaxy for the model - img_shape : list - Size of the output vignet ``[xsize, ysize]`` - pixel_scale : float, optional - Pixel scale to use for the model (in arcsec); default is ``0.186`` - - Returns - ------- - tuple - Vignet of the galaxy model and of the PSF model - - """ - # Get scale radius - scale_radius = 1 / 16 * cs_size.sigma_to_fwhm(sigma) * pixel_scale - - # Get galaxy model - gal_obj = galsim.Exponential(scale_radius=scale_radius, flux=flux) - - # Get PSF - psf_obj = galsim.Gaussian(sigma=sigma * pixel_scale) - - # Convolve both - gal_obj = galsim.Convolve(gal_obj, psf_obj) - - # Draw galaxy and PSF on vignets - gal_vign = gal_obj.drawImage( - nx=img_shape[0], ny=img_shape[1], scale=pixel_scale - ).array - - psf_vign = psf_obj.drawImage( - nx=img_shape[0], ny=img_shape[1], scale=pixel_scale - ).array - - return gal_vign, psf_vign - - -class SpreadModel(object): - """The Spread Model Class. - - Parameters - ---------- - sex_cat_path : str - Path to SExtractor catalogue - psf_cat_path : str - Path to PSF catalogue - weight_cat_path : str - Path to weight catalogue - output_path : str - Output file path of pasted catalog - pixel_scale : float - Pixel scale in arcsec - output_mode : str - Options are ``new`` or ``add`` - - Notes - ----- - For the ``output_mode``: - - - ``new`` will create a new catalogue with - ``[number, mag, sm, sm_err]`` - - ``add`` will output a copy of the input SExtractor with the columns - ``sm`` and ``sm_err`` - - """ - - def __init__( - self, - sex_cat_path, - psf_cat_path, - weight_cat_path, - output_path, - pixel_scale, - output_mode, - ): - - self._sex_cat_path = sex_cat_path - self._psf_cat_path = psf_cat_path - self._weight_cat_path = weight_cat_path - self._output_path = output_path - self._pixel_scale = pixel_scale - self._output_mode = output_mode - - def process(self): - """Process. - - Process the spread model computation - - """ - # Get data - sex_cat = file_io.FITSCatalogue(self._sex_cat_path, SEx_catalogue=True) - sex_cat.open() - obj_id = np.copy(sex_cat.get_data()["NUMBER"]) - obj_vign = np.copy(sex_cat.get_data()["VIGNET"]) - obj_mag = None - if self._output_mode == "new": - obj_mag = np.copy(sex_cat.get_data()["MAG_AUTO"]) - sex_cat.close() - - psf_cat = SqliteDict(self._psf_cat_path) - - weight_cat = file_io.FITSCatalogue( - self._weight_cat_path, - SEx_catalogue=True, - ) - weight_cat.open() - weigh_vign = weight_cat.get_data()["VIGNET"] - weight_cat.close() - - # Get spread model - skip_obj = False - spread_model_final = [] - spread_model_err_final = [] - for idx, id_tmp in enumerate(obj_id): - sigma_list = [] - - if psf_cat[str(id_tmp)] == "empty": - spread_model_final.append(-1) - spread_model_err_final.append(1) - continue - - psf_expccd_name = list(psf_cat[str(id_tmp)].keys()) - - for expccd_name_tmp in psf_expccd_name: - psf_cat_id_ccd = psf_cat[str(id_tmp)][expccd_name_tmp] - sigma_list.append(psf_cat_id_ccd["SHAPES"]["SIGMA_PSF_HSM"]) - - obj_sigma_tmp = np.mean(sigma_list) - if obj_sigma_tmp > 0: - obj_vign_tmp = obj_vign[idx] - obj_flux_tmp = 1.0 - obj_weight_tmp = weigh_vign[idx] - obj_model_tmp, obj_psf_tmp = get_model( - obj_sigma_tmp, - obj_flux_tmp, - obj_vign_tmp.shape, - self._pixel_scale, - ) - - obj_sm, obj_sm_err = get_sm( - obj_vign_tmp, obj_psf_tmp, obj_model_tmp, obj_weight_tmp - ) - else: - # size < 0, something is not right with this object - obj_sm, obj_sm_err = -1.0, -1.0 - - spread_model_final.append(obj_sm) - spread_model_err_final.append(obj_sm_err) - - spread_model_final = np.array(spread_model_final, dtype="float64") - spread_model_err_final = np.array( - spread_model_err_final, - dtype="float64", - ) - - psf_cat.close() - - self.save_results( - spread_model_final, spread_model_err_final, obj_mag, obj_id - ) - - def save_results(self, sm, sm_err, mag, number): - """Save Results. - - Save output catalogue with spread model and errors. - - Parameters - ---------- - sm : numpy.ndarray - Value of the spread model for all objects - sm_err : numpy.ndarray - Value of the spread model error for all objects - mag : numpy.ndarray - Magnitude of all objects (only for a new catalogue) - number : numpy.ndarray - ID of all objects (only for a new catalogue) - - Raises - ------ - ValueError - For incorrect output mode - - """ - if self._output_mode == "new": - new_cat = file_io.FITSCatalogue( - self._output_path, - SEx_catalogue=True, - open_mode=file_io.BaseCatalogue.OpenMode.ReadWrite, - ) - dict_data = { - "NUMBER": number, - "MAG": mag, - "SPREAD_MODEL": sm, - "SPREADERR_MODEL": sm_err, - } - new_cat.save_as_fits( - data=dict_data, sex_cat_path=self._sex_cat_path - ) - elif self._output_mode == "add": - ori_cat = file_io.FITSCatalogue( - self._sex_cat_path, - SEx_catalogue=True, - ) - ori_cat.open() - new_cat = file_io.FITSCatalogue( - self._output_path, - SEx_catalogue=True, - open_mode=file_io.BaseCatalogue.OpenMode.ReadWrite, - ) - ori_cat.add_col( - "SPREAD_MODEL", sm, new_cat=True, new_cat_inst=new_cat - ) - ori_cat.add_col("SPREAD_MODEL", sm, new_cat=True, new_cat_inst=new_cat) - ori_cat.close() - new_cat.open() - new_cat.add_col("SPREADERR_MODEL", sm_err) - new_cat.close() - else: - raise ValueError("Mode must be in [new, add].") diff --git a/src/shapepipe/modules/spread_model_runner.py b/src/shapepipe/modules/spread_model_runner.py deleted file mode 100644 index 16c9a2d3..00000000 --- a/src/shapepipe/modules/spread_model_runner.py +++ /dev/null @@ -1,69 +0,0 @@ -"""SPREAD MODEL RUNNER. - -Module runner for ``spread_model``. - -:Author: Axel Guinot - -""" - -from shapepipe.modules.module_decorator import module_runner -from shapepipe.modules.spread_model_package.spread_model import SpreadModel - - -@module_runner( - version="1.1", - input_module=[ - "sextractor_runner", - "psfex_interp_runner", - "vignetmaker_runner", - ], - file_pattern=["sexcat", "galaxy_psf", "weight_vign"], - file_ext=[".fits", ".sqlite", ".fits"], - depends=["numpy", "galsim"], - run_method="parallel", -) -def spread_model_runner( - input_file_list, - run_dirs, - file_number_string, - config, - module_config_sec, - w_log, -): - """Define The Spread Model Runner.""" - # Get input files - sex_cat_path, psf_cat_path, weight_cat_path = input_file_list - - # Get file prefix (optional) - if config.has_option(module_config_sec, "PREFIX"): - prefix = config.get(module_config_sec, "PREFIX") - if (prefix.lower() != "none") & (prefix != ""): - prefix = prefix + "_" - else: - prefix = "" - else: - prefix = "" - - # Get pixel scale and output mode - pixel_scale = config.getfloat(module_config_sec, "PIXEL_SCALE") - output_mode = config.get(module_config_sec, "OUTPUT_MODE") - - # Set output file path - file_name = f"{prefix}sexcat_sm{file_number_string}.fits" - output_path = f'{run_dirs["output"]}/{file_name}' - - # Create spread model class instance - sm_inst = SpreadModel( - sex_cat_path, - psf_cat_path, - weight_cat_path, - output_path, - pixel_scale, - output_mode, - ) - - # Process spread model computation - sm_inst.process() - - # No return objects - return None, None From 4f0756d0e60d343ecfba18582f37cff649661804 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Tue, 30 Jun 2026 22:48:04 +0200 Subject: [PATCH 19/33] refactor(make_cat): one column grammar for ngmix, galsim & HSM-sink columns Apply the ShapePipe-v2 grammar (ESTIMATOR_COMPONENT_OBJECT, uppercase; galaxy the implicit default object with no token; ellipticity split into named scalar G1/G2 for g-type and E1/E2 for e-type; one stored size T = 2*sigma^2 via cs_util.size) to every column make_cat writes, and drop the spread_model writer: ngmix galaxy: NGMIX_*_GAL_ -> NGMIX_*_ (drop GAL; realigns with what sp_validation already reads, e.g. NGMIX_T_NOSHEAR) galsim: GALSIM_GAL_ELL[_ERR|_UNCORR]_ packed(N,2) -> scalar GALSIM_{G1,G2}[_ERR|_UNCORR]_; GALSIM_PSF_ELL_ -> GALSIM_{G1,G2}_PSF_; GALSIM_*_SIGMA_ -> GALSIM_T[_PSF]_ via cs_size.sigma_to_T. Also fixes a latent NameError (undefined n_obj -> len(self._obj_id)) in this dormant block. HSM sink: _save_psf_data's PSF_ELL_ -> HSM_E1/E2_PSF_, PSF_FWHM_ -> HSM_T_PSF_ (read straight from the SHAPES dict, now T), PSF_FLAG_ -> HSM_FLAG_PSF_ spread_model: delete the save_sm_data writer and its SPREAD_MODEL / SPREADERR_MODEL / SPREAD_CLASS columns Updates the example .param allow-lists (final_cat, cat_matched) and the make_cat /grammar/averaging property tests; adds test_galsim_grammar_properties. ngmix PSF (PSF_ORIG/PSF_RECONV) columns were already grammar-correct. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01MgDK9SKn2PsDZGEbr4WFb3 --- example/cfis/final_cat.param | 97 +++---- example/unions_800/cat_matched.param | 97 +++---- .../modules/make_cat_package/make_cat.py | 272 ++++++++---------- .../module/test_galsim_grammar_properties.py | 251 ++++++++++++++++ tests/module/test_make_cat.py | 36 ++- tests/module/test_psf_averaging_properties.py | 16 +- tests/module/test_psf_grammar_properties.py | 128 ++++++++- 7 files changed, 609 insertions(+), 288 deletions(-) create mode 100644 tests/module/test_galsim_grammar_properties.py diff --git a/example/cfis/final_cat.param b/example/cfis/final_cat.param index a469aac7..d403b034 100644 --- a/example/cfis/final_cat.param +++ b/example/cfis/final_cat.param @@ -15,13 +15,6 @@ NGMIX_MCAL_FLAGS NGMIX_G1_PSF_ORIG_NOSHEAR NGMIX_G2_PSF_ORIG_NOSHEAR -# spread class -#SPREAD_CLASS - -# spread model flag and error -#SPREAD_MODEL -#SPREADERR_MODEL - # Number of epochs (exposures) N_EPOCH NGMIX_N_EPOCH @@ -30,45 +23,45 @@ NGMIX_N_EPOCH ## Ngmix: model fitting # galaxy ellipticity -NGMIX_G1_GAL_1M -NGMIX_G2_GAL_1M -NGMIX_G1_GAL_1P -NGMIX_G2_GAL_1P -NGMIX_G1_GAL_2M -NGMIX_G2_GAL_2M -NGMIX_G1_GAL_2P -NGMIX_G2_GAL_2P -NGMIX_G1_GAL_NOSHEAR -NGMIX_G2_GAL_NOSHEAR -#NGMIX_G1_ERR_GAL_1M -#NGMIX_G2_ERR_GAL_1M -#NGMIX_G1_ERR_GAL_1P -#NGMIX_G2_ERR_GAL_1P -#NGMIX_G1_ERR_GAL_2M -#NGMIX_G2_ERR_GAL_2M -#NGMIX_G1_ERR_GAL_2P -#NGMIX_G2_ERR_GAL_2P -NGMIX_G1_ERR_GAL_NOSHEAR -NGMIX_G2_ERR_GAL_NOSHEAR +NGMIX_G1_1M +NGMIX_G2_1M +NGMIX_G1_1P +NGMIX_G2_1P +NGMIX_G1_2M +NGMIX_G2_2M +NGMIX_G1_2P +NGMIX_G2_2P +NGMIX_G1_NOSHEAR +NGMIX_G2_NOSHEAR +#NGMIX_G1_ERR_1M +#NGMIX_G2_ERR_1M +#NGMIX_G1_ERR_1P +#NGMIX_G2_ERR_1P +#NGMIX_G1_ERR_2M +#NGMIX_G2_ERR_2M +#NGMIX_G1_ERR_2P +#NGMIX_G2_ERR_2P +NGMIX_G1_ERR_NOSHEAR +NGMIX_G2_ERR_NOSHEAR # flags -NGMIX_FLAGS_GAL_1M -NGMIX_FLAGS_GAL_1P -NGMIX_FLAGS_GAL_2M -NGMIX_FLAGS_GAL_2P -NGMIX_FLAGS_GAL_NOSHEAR +NGMIX_FLAGS_1M +NGMIX_FLAGS_1P +NGMIX_FLAGS_2M +NGMIX_FLAGS_2P +NGMIX_FLAGS_NOSHEAR # size and error -NGMIX_T_GAL_1M -NGMIX_T_GAL_1P -NGMIX_T_GAL_2M -NGMIX_T_GAL_2P -NGMIX_T_GAL_NOSHEAR -NGMIX_T_ERR_GAL_1M -NGMIX_T_ERR_GAL_1P -NGMIX_T_ERR_GAL_2M -NGMIX_T_ERR_GAL_2P -NGMIX_T_ERR_GAL_NOSHEAR +NGMIX_T_1M +NGMIX_T_1P +NGMIX_T_2M +NGMIX_T_2P +NGMIX_T_NOSHEAR +NGMIX_T_ERR_1M +NGMIX_T_ERR_1P +NGMIX_T_ERR_2M +NGMIX_T_ERR_2P +NGMIX_T_ERR_NOSHEAR NGMIX_T_PSF_RECONV_1M NGMIX_T_PSF_RECONV_1P NGMIX_T_PSF_RECONV_2M @@ -76,16 +69,16 @@ NGMIX_T_PSF_RECONV_2P NGMIX_T_PSF_RECONV_NOSHEAR # flux and error -NGMIX_FLUX_GAL_1M -NGMIX_FLUX_GAL_1P -NGMIX_FLUX_GAL_2M -NGMIX_FLUX_GAL_2P -NGMIX_FLUX_GAL_NOSHEAR -NGMIX_FLUX_ERR_GAL_1M -NGMIX_FLUX_ERR_GAL_1P -NGMIX_FLUX_ERR_GAL_2M -NGMIX_FLUX_ERR_GAL_2P -NGMIX_FLUX_ERR_GAL_NOSHEAR +NGMIX_FLUX_1M +NGMIX_FLUX_1P +NGMIX_FLUX_2M +NGMIX_FLUX_2P +NGMIX_FLUX_NOSHEAR +NGMIX_FLUX_ERR_1M +NGMIX_FLUX_ERR_1P +NGMIX_FLUX_ERR_2M +NGMIX_FLUX_ERR_2P +NGMIX_FLUX_ERR_NOSHEAR # magnitudes MAG_AUTO diff --git a/example/unions_800/cat_matched.param b/example/unions_800/cat_matched.param index e1869df5..f387be00 100644 --- a/example/unions_800/cat_matched.param +++ b/example/unions_800/cat_matched.param @@ -15,13 +15,6 @@ NGMIX_MCAL_FLAGS NGMIX_G1_PSF_ORIG_NOSHEAR NGMIX_G2_PSF_ORIG_NOSHEAR -# spread class -SPREAD_CLASS - -# spread model flag and error -SPREAD_MODEL -SPREADERR_MODEL - # Number of epochs (exposures) N_EPOCH NGMIX_N_EPOCH @@ -30,45 +23,45 @@ NGMIX_N_EPOCH ## Ngmix: model fitting # galaxy ellipticity -NGMIX_G1_GAL_1M -NGMIX_G2_GAL_1M -NGMIX_G1_GAL_1P -NGMIX_G2_GAL_1P -NGMIX_G1_GAL_2M -NGMIX_G2_GAL_2M -NGMIX_G1_GAL_2P -NGMIX_G2_GAL_2P -NGMIX_G1_GAL_NOSHEAR -NGMIX_G2_GAL_NOSHEAR -#NGMIX_G1_ERR_GAL_1M -#NGMIX_G2_ERR_GAL_1M -#NGMIX_G1_ERR_GAL_1P -#NGMIX_G2_ERR_GAL_1P -#NGMIX_G1_ERR_GAL_2M -#NGMIX_G2_ERR_GAL_2M -#NGMIX_G1_ERR_GAL_2P -#NGMIX_G2_ERR_GAL_2P -NGMIX_G1_ERR_GAL_NOSHEAR -NGMIX_G2_ERR_GAL_NOSHEAR +NGMIX_G1_1M +NGMIX_G2_1M +NGMIX_G1_1P +NGMIX_G2_1P +NGMIX_G1_2M +NGMIX_G2_2M +NGMIX_G1_2P +NGMIX_G2_2P +NGMIX_G1_NOSHEAR +NGMIX_G2_NOSHEAR +#NGMIX_G1_ERR_1M +#NGMIX_G2_ERR_1M +#NGMIX_G1_ERR_1P +#NGMIX_G2_ERR_1P +#NGMIX_G1_ERR_2M +#NGMIX_G2_ERR_2M +#NGMIX_G1_ERR_2P +#NGMIX_G2_ERR_2P +NGMIX_G1_ERR_NOSHEAR +NGMIX_G2_ERR_NOSHEAR # flags -NGMIX_FLAGS_GAL_1M -NGMIX_FLAGS_GAL_1P -NGMIX_FLAGS_GAL_2M -NGMIX_FLAGS_GAL_2P -NGMIX_FLAGS_GAL_NOSHEAR +NGMIX_FLAGS_1M +NGMIX_FLAGS_1P +NGMIX_FLAGS_2M +NGMIX_FLAGS_2P +NGMIX_FLAGS_NOSHEAR # size and error -NGMIX_T_GAL_1M -NGMIX_T_GAL_1P -NGMIX_T_GAL_2M -NGMIX_T_GAL_2P -NGMIX_T_GAL_NOSHEAR -NGMIX_T_ERR_GAL_1M -NGMIX_T_ERR_GAL_1P -NGMIX_T_ERR_GAL_2M -NGMIX_T_ERR_GAL_2P -NGMIX_T_ERR_GAL_NOSHEAR +NGMIX_T_1M +NGMIX_T_1P +NGMIX_T_2M +NGMIX_T_2P +NGMIX_T_NOSHEAR +NGMIX_T_ERR_1M +NGMIX_T_ERR_1P +NGMIX_T_ERR_2M +NGMIX_T_ERR_2P +NGMIX_T_ERR_NOSHEAR NGMIX_T_PSF_RECONV_1M NGMIX_T_PSF_RECONV_1P NGMIX_T_PSF_RECONV_2M @@ -76,16 +69,16 @@ NGMIX_T_PSF_RECONV_2P NGMIX_T_PSF_RECONV_NOSHEAR # flux and error -NGMIX_FLUX_GAL_1M -NGMIX_FLUX_GAL_1P -NGMIX_FLUX_GAL_2M -NGMIX_FLUX_GAL_2P -NGMIX_FLUX_GAL_NOSHEAR -NGMIX_FLUX_ERR_GAL_1M -NGMIX_FLUX_ERR_GAL_1P -NGMIX_FLUX_ERR_GAL_2M -NGMIX_FLUX_ERR_GAL_2P -NGMIX_FLUX_ERR_GAL_NOSHEAR +NGMIX_FLUX_1M +NGMIX_FLUX_1P +NGMIX_FLUX_2M +NGMIX_FLUX_2P +NGMIX_FLUX_NOSHEAR +NGMIX_FLUX_ERR_1M +NGMIX_FLUX_ERR_1P +NGMIX_FLUX_ERR_2M +NGMIX_FLUX_ERR_2P +NGMIX_FLUX_ERR_NOSHEAR # Flag for multiple objects due to overlapping # tiles diff --git a/src/shapepipe/modules/make_cat_package/make_cat.py b/src/shapepipe/modules/make_cat_package/make_cat.py index 1c7c5a6b..e28ac643 100644 --- a/src/shapepipe/modules/make_cat_package/make_cat.py +++ b/src/shapepipe/modules/make_cat_package/make_cat.py @@ -139,77 +139,6 @@ def save_sextractor_data(final_cat_file, sexcat_path, remove_vignet=True): return cat_size -def save_sm_data( - final_cat_file, - sexcat_sm_path, - do_classif=True, - star_thresh=0.003, - gal_thresh=0.01, - n_obj=-1, -): - r"""Save Spread-Model Data. - - Save the spread-model data into the final catalogue. - - Parameters - ---------- - final_cat_file : file_io.FITSCatalogue - Final catalogue - sexcat_sm_path : str - Path to spread-model catalogue to save. If ``None``, spread_model is - set to 99 - do_classif : bool - If ``True`` objects will be classified into stars, galaxies, and other, - using the classifier - :math:`{\rm class} = {\rm sm} + 2 * {\rm sm}_{\rm err}` - star_thresh : float - Threshold for star selection; object is classified as star if - :math:`|{\rm class}| <` ``star_thresh`` - gal_thresh : float - Threshold for galaxy selection; object is classified as galaxy if - :math:`{\rm class} >` ``gal_thresh`` - nobj : int, optional - Number of objects, only used if sexcat_sm_path is ``-1`` - - Returns - ------- - int - Number of objects saved - """ - final_cat_file.open() - - if sexcat_sm_path is not None: - sexcat_sm_file = file_io.FITSCatalogue( - sexcat_sm_path, - SEx_catalogue=True, - ) - sexcat_sm_file.open() - - sm = np.copy(sexcat_sm_file.get_data()["SPREAD_MODEL"]) - sm_err = np.copy(sexcat_sm_file.get_data()["SPREADERR_MODEL"]) - - sexcat_sm_file.close() - - else: - sm = np.ones(n_obj) * 99 - sm_err = np.ones(n_obj) * 99 - - final_cat_file.add_col("SPREAD_MODEL", sm) - final_cat_file.add_col("SPREADERR_MODEL", sm_err) - - if do_classif: - obj_flag = np.ones_like(sm, dtype="int16") * 2 - classif = sm + 2.0 * sm_err - obj_flag[np.where(np.abs(classif) < star_thresh)] = 0 - obj_flag[np.where(classif > gal_thresh)] = 1 - - final_cat_file.add_col("SPREAD_CLASS", obj_flag) - - final_cat_file.close() - - return n_obj - - class SaveCatalogue: """Save Catalogue. @@ -329,12 +258,15 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): Save the NGMIX catalogue into the final one. - Column grammar: ``NGMIX[m]_[_ERR]__``, plus - three OBJECT/SHEAR-less per-object metadata columns + Column grammar: ``NGMIX[m]_[_ERR][_]_``, + plus three OBJECT/SHEAR-less per-object metadata columns (``NGMIX[m]_MCAL_FLAGS``, ``NGMIX_N_EPOCH``, - ``NGMIX_MCAL_TYPES_FAIL``). ``OBJECT`` is one of ``GAL`` (galaxy), - ``PSF_ORIG`` (the original image PSF, fit by - :func:`ngmix.average_original_psf`), or ``PSF_RECONV`` (the metacal + ``NGMIX_MCAL_TYPES_FAIL``). The galaxy is the implicit default object + and carries NO ``OBJECT`` token (``NGMIX_G1_NOSHEAR``, dropping the + ``GAL`` segment carried by the pre-#761 names). The explicit PSF + objects are ``PSF_ORIG`` + (the original image PSF, fit by + :func:`ngmix.average_original_psf`) and ``PSF_RECONV`` (the metacal reconvolution kernel, fit by :func:`ngmix.average_multiepoch_psf`); see those functions for what each PSF family IS. ``PSF_ORIG`` and ``PSF_RECONV`` are independent fits of different PSFs, no longer the @@ -387,32 +319,32 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): prefix = f"NGMIX{m}" - # Galaxy (GAL), original image PSF (PSF_ORIG) and metacal + # Galaxy (no object token), original image PSF (PSF_ORIG) and metacal # reconvolution kernel (PSF_RECONV); see ngmix.average_original_psf / # average_multiepoch_psf for what each PSF family is. G1/G2 are scalar # reduced-shear components, not a 2-vector. Sentinels: # sizes/fluxes/mags/flags 0, *_ERR fluxes/mags -1, ellipticities -10, # *_ERR sizes 1e30. for key_str in ( - f"{prefix}_T_GAL_", - f"{prefix}_SNR_GAL_", - f"{prefix}_FLUX_GAL_", - f"{prefix}_MAG_GAL_", - f"{prefix}_FLAGS_GAL_", + f"{prefix}_T_", + f"{prefix}_SNR_", + f"{prefix}_FLUX_", + f"{prefix}_MAG_", + f"{prefix}_FLAGS_", f"{prefix}_T_PSF_ORIG_", f"{prefix}_T_PSF_RECONV_", ): self._update_dict(key_str, np.zeros(n_obj)) for key_str in ( - f"{prefix}_FLUX_ERR_GAL_", - f"{prefix}_MAG_ERR_GAL_", + f"{prefix}_FLUX_ERR_", + f"{prefix}_MAG_ERR_", ): self._update_dict(key_str, np.ones(n_obj) * -1) for key_str in ( - f"{prefix}_G1_GAL_", - f"{prefix}_G2_GAL_", - f"{prefix}_G1_ERR_GAL_", - f"{prefix}_G2_ERR_GAL_", + f"{prefix}_G1_", + f"{prefix}_G2_", + f"{prefix}_G1_ERR_", + f"{prefix}_G2_ERR_", f"{prefix}_G1_PSF_ORIG_", f"{prefix}_G2_PSF_ORIG_", f"{prefix}_G1_ERR_PSF_ORIG_", @@ -424,7 +356,7 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): ): self._update_dict(key_str, np.ones(n_obj) * -10.0) for key_str in ( - f"{prefix}_T_ERR_GAL_", + f"{prefix}_T_ERR_", f"{prefix}_T_ERR_PSF_ORIG_", f"{prefix}_T_ERR_PSF_RECONV_", ): @@ -441,46 +373,46 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): # Galaxy shape, size and photometry. self._add2dict( - f"{prefix}_G1_GAL_{key}", ncf_data["g1"][ind[0]], idx + f"{prefix}_G1_{key}", ncf_data["g1"][ind[0]], idx ) self._add2dict( - f"{prefix}_G2_GAL_{key}", ncf_data["g2"][ind[0]], idx + f"{prefix}_G2_{key}", ncf_data["g2"][ind[0]], idx ) self._add2dict( - f"{prefix}_G1_ERR_GAL_{key}", + f"{prefix}_G1_ERR_{key}", ncf_data["g1_err"][ind[0]], idx ) self._add2dict( - f"{prefix}_G2_ERR_GAL_{key}", + f"{prefix}_G2_ERR_{key}", ncf_data["g2_err"][ind[0]], idx ) self._add2dict( - f"{prefix}_T_GAL_{key}", ncf_data["T"][ind[0]], idx + f"{prefix}_T_{key}", ncf_data["T"][ind[0]], idx ) self._add2dict( - f"{prefix}_T_ERR_GAL_{key}", + f"{prefix}_T_ERR_{key}", ncf_data["T_err"][ind[0]], idx ) self._add2dict( - f"{prefix}_SNR_GAL_{key}", ncf_data["s2n"][ind[0]], idx + f"{prefix}_SNR_{key}", ncf_data["s2n"][ind[0]], idx ) self._add2dict( - f"{prefix}_FLUX_GAL_{key}", + f"{prefix}_FLUX_{key}", ncf_data["flux"][ind[0]], idx ) self._add2dict( - f"{prefix}_FLUX_ERR_GAL_{key}", + f"{prefix}_FLUX_ERR_{key}", ncf_data["flux_err"][ind[0]], idx ) self._add2dict( - f"{prefix}_MAG_GAL_{key}", ncf_data["mag"][ind[0]], idx + f"{prefix}_MAG_{key}", ncf_data["mag"][ind[0]], idx ) self._add2dict( - f"{prefix}_MAG_ERR_GAL_{key}", + f"{prefix}_MAG_ERR_{key}", ncf_data["mag_err"][ind[0]], idx ) self._add2dict( - f"{prefix}_FLAGS_GAL_{key}", + f"{prefix}_FLAGS_{key}", ncf_data["flags"][ind[0]], idx ) @@ -559,21 +491,25 @@ def _save_galsim_shapes(self, galsim_cat_path): galsim_id = galsim_cat_file.get_data()["id"] for key_str in ( - "GALSIM_GAL_SIGMA_", - "GALSIM_PSF_SIGMA_", + "GALSIM_T_", + "GALSIM_T_PSF_", "GALSIM_FLUX_", "GALSIM_MAG_", ): self._update_dict(key_str, np.zeros(len(self._obj_id))) for key_str in ("GALSIM_FLUX_ERR_", "GALSIM_MAG_ERR_", "GALSIM_RES_"): - self._update_dict(key_str, np.ones(n_obj) * -1) + self._update_dict(key_str, np.ones(len(self._obj_id)) * -1) for key_str in ( - "GALSIM_GAL_ELL_", - "GALSIM_GAL_ELL_ERR_", - "GALSIM_GAL_ELL_UNCORR_", - "GALSIM_PSF_ELL_", + "GALSIM_G1_", + "GALSIM_G2_", + "GALSIM_G1_ERR_", + "GALSIM_G2_ERR_", + "GALSIM_G1_UNCORR_", + "GALSIM_G2_UNCORR_", + "GALSIM_G1_PSF_", + "GALSIM_G2_PSF_", ): - self._update_dict(key_str, np.ones((len(self._obj_id), 2)) * -10.0) + self._update_dict(key_str, np.ones(len(self._obj_id)) * -10.0) self._update_dict( "GALSIM_FLAGS_", np.ones(len(self._obj_id), dtype="int16"), @@ -589,50 +525,66 @@ def _save_galsim_shapes(self, galsim_cat_path): if key == "ORIGINAL_PSF": - uncorr_g = ( - gcf_data["gal_uncorr_g1"][ind[0]], - gcf_data["gal_uncorr_g2"][ind[0]], + # PSF columns sourced from the galaxy uncorr fields + # for this special extension (asymmetry preserved). + self._add2dict( + f"GALSIM_G1_PSF_{key}", + gcf_data["gal_uncorr_g1"][ind[0]], idx + ) + self._add2dict( + f"GALSIM_G2_PSF_{key}", + gcf_data["gal_uncorr_g2"][ind[0]], idx + ) + self._add2dict( + f"GALSIM_T_PSF_{key}", + cs_size.sigma_to_T(gcf_data["gal_sigma"][ind[0]]), + idx ) - psf_sig = gcf_data["gal_sigma"][ind[0]] - self._add2dict(f"GALSIM_PSF_ELL_{key}", uncorr_g, idx) - self._add2dict(f"GALSIM_PSF_SIGMA_{key}", psf_sig, idx) else: - g = ( - gcf_data["gal_g1"][ind[0]], - gcf_data["gal_g2"][ind[0]], + self._add2dict( + f"GALSIM_G1_{key}", gcf_data["gal_g1"][ind[0]], idx + ) + self._add2dict( + f"GALSIM_G2_{key}", gcf_data["gal_g2"][ind[0]], idx + ) + self._add2dict( + f"GALSIM_G1_ERR_{key}", + gcf_data["gal_g1_err"][ind[0]], idx ) - g_err = ( - gcf_data["gal_g1_err"][ind[0]], - gcf_data["gal_g2_err"][ind[0]], + self._add2dict( + f"GALSIM_G2_ERR_{key}", + gcf_data["gal_g2_err"][ind[0]], idx ) - self._add2dict(f"GALSIM_GAL_ELL_{key}", g, idx) - self._add2dict(f"GALSIM_GAL_ELL_ERR_{key}", g_err, idx) - uncorr_g = ( - gcf_data["gal_uncorr_g1"][ind[0]], - gcf_data["gal_uncorr_g2"][ind[0]], + self._add2dict( + f"GALSIM_G1_UNCORR_{key}", + gcf_data["gal_uncorr_g1"][ind[0]], idx ) self._add2dict( - f"GALSIM_GAL_ELL_UNCORR_{key}", - uncorr_g, - idx, + f"GALSIM_G2_UNCORR_{key}", + gcf_data["gal_uncorr_g2"][ind[0]], idx ) - sigma = gcf_data["gal_sigma"][ind[0]] - self._add2dict(f"GALSIM_GAL_SIGMA_{key}", sigma, idx) + self._add2dict( + f"GALSIM_T_{key}", + cs_size.sigma_to_T(gcf_data["gal_sigma"][ind[0]]), + idx + ) - psf_g = ( - gcf_data["psf_g1"][ind[0]], - gcf_data["psf_g2"][ind[0]], + self._add2dict( + f"GALSIM_G1_PSF_{key}", + gcf_data["psf_g1"][ind[0]], idx + ) + self._add2dict( + f"GALSIM_G2_PSF_{key}", + gcf_data["psf_g2"][ind[0]], idx ) - psf_sigma = gcf_data["psf_sigma"][ind[0]] - self._add2dict(f"GALSIM_PSF_ELL_{key}", psf_g, idx) self._add2dict( - f"GALSIM_PSF_SIGMA_{key}", - psf_sigma, - idx, + f"GALSIM_T_PSF_{key}", + cs_size.sigma_to_T(gcf_data["psf_sigma"][ind[0]]), + idx ) flux = gcf_data["gal_flux"][ind[0]] @@ -669,20 +621,29 @@ def _save_psf_data(self, galaxy_psf_path): max_epoch = np.max(self._final_cat_file.get_data()["N_EPOCH"]) + 1 self._output_dict = { - f"PSF_ELL_{idx + 1}": np.ones((len(self._obj_id), 2)) * -10.0 + f"HSM_E1_PSF_{idx + 1}": np.ones(len(self._obj_id)) * -10.0 for idx in range(max_epoch) } self._output_dict = { **self._output_dict, **{ - f"PSF_FWHM_{idx + 1}": np.zeros(len(self._obj_id)) + f"HSM_E2_PSF_{idx + 1}": np.ones(len(self._obj_id)) * -10.0 for idx in range(max_epoch) }, } self._output_dict = { **self._output_dict, **{ - f"PSF_FLAG_{idx + 1}": np.ones(len(self._obj_id), dtype="int16") + f"HSM_T_PSF_{idx + 1}": np.zeros(len(self._obj_id)) + for idx in range(max_epoch) + }, + } + self._output_dict = { + **self._output_dict, + **{ + f"HSM_FLAG_PSF_{idx + 1}": np.ones( + len(self._obj_id), dtype="int16" + ) for idx in range(max_epoch) }, } @@ -696,21 +657,28 @@ def _save_psf_data(self, galaxy_psf_path): gpc_data = galaxy_psf_cat[str(id_tmp)][key] - if gpc_data["SHAPES"]["FLAG_PSF_HSM"] != 0: + if gpc_data["SHAPES"]["HSM_FLAG_PSF"] != 0: continue - e_psf = ( - gpc_data["SHAPES"]["E1_PSF_HSM"], - gpc_data["SHAPES"]["E2_PSF_HSM"], + self._add2dict( + f"HSM_E1_PSF_{epoch + 1}", + gpc_data["SHAPES"]["HSM_E1_PSF"], idx + ) + self._add2dict( + f"HSM_E2_PSF_{epoch + 1}", + gpc_data["SHAPES"]["HSM_E2_PSF"], idx ) - self._add2dict(f"PSF_ELL_{epoch + 1}", e_psf, idx) - psf_fwhm = cs_size.sigma_to_fwhm( - gpc_data["SHAPES"]["SIGMA_PSF_HSM"] + # HSM_T_PSF already holds T (sigma_to_T applied at the + # producer's _interpolate_me); read straight through. + self._add2dict( + f"HSM_T_PSF_{epoch + 1}", + gpc_data["SHAPES"]["HSM_T_PSF"], idx ) - self._add2dict(f"PSF_FWHM_{epoch + 1}", psf_fwhm, idx) - flag_psf = gpc_data["SHAPES"]["FLAG_PSF_HSM"] - self._add2dict(f"PSF_FLAG_{epoch + 1}", flag_psf, idx) + self._add2dict( + f"HSM_FLAG_PSF_{epoch + 1}", + gpc_data["SHAPES"]["HSM_FLAG_PSF"], idx + ) galaxy_psf_cat.close() diff --git a/tests/module/test_galsim_grammar_properties.py b/tests/module/test_galsim_grammar_properties.py new file mode 100644 index 00000000..28346eaf --- /dev/null +++ b/tests/module/test_galsim_grammar_properties.py @@ -0,0 +1,251 @@ +"""PROPERTY-BASED TESTS FOR THE GALSIM GALAXY/PSF COLUMN GRAMMAR. + +Drives ``SaveCatalogue._save_galsim_shapes`` (the make_cat writer) to pin the +renamed galsim grammar (shapepipe#761): the packed length-2 +``GALSIM_GAL_ELL_*`` / ``GALSIM_PSF_ELL_*`` columns are split into scalar +``GALSIM_G1_*`` / ``GALSIM_G2_*`` (g-type — galsim is correctly g-type, no +e/g value fix needed here), the galaxy carries no ``GAL`` token (implicit +default object, same rule as ngmix), and the raw ``GALSIM_GAL_SIGMA_*`` / +``GALSIM_PSF_SIGMA_*`` columns are replaced by the single stored size +``GALSIM_T_*`` (``T = 2 sigma^2``, via ``cs_util.size.sigma_to_T``). + +The ``ORIGINAL_PSF`` extension is a deliberate asymmetry, preserved rather +than normalized away: its PSF columns are sourced from the *galaxy's* +uncorrected fields (``gal_uncorr_g1/g2``, ``gal_sigma``), not the +per-extension ``psf_g1/g2``/``psf_sigma`` fields every other extension uses. +""" + +import re +import tempfile +from pathlib import Path + +import numpy as np +import numpy.testing as npt +from astropy.io import fits +from cs_util import size as cs_size +from hypothesis import given, settings +from hypothesis import strategies as st + +from shapepipe.modules.make_cat_package.make_cat import SaveCatalogue + +# --------------------------------------------------------------------------- +# Harness +# --------------------------------------------------------------------------- + +# Source columns _save_galsim_shapes reads per extension. The ORIGINAL_PSF +# extension only consumes the gal_uncorr_g1/g2 + gal_sigma subset, but the +# real galsim catalogue carries the full set on every extension, so the +# fixture mirrors that rather than special-casing the row shape. +GALSIM_KEYS = [ + "id", + "gal_g1", "gal_g2", "gal_g1_err", "gal_g2_err", + "gal_uncorr_g1", "gal_uncorr_g2", "gal_sigma", + "psf_g1", "psf_g2", "psf_sigma", + "gal_flux", "gal_flux_err", "gal_mag", "gal_mag_err", + "gal_flag", "gal_resolution", +] +INT_KEYS = {"id", "gal_flag"} + +# Per-key multiplier so a single scalar ``seed`` fans out into twelve +# distinct values — a swapped source (e.g. psf_g1 <-> gal_uncorr_g1) breaks +# at least one equality below. +_MULT = { + "gal_g1": 1.0, "gal_g2": 1.3, "gal_g1_err": 1.7, "gal_g2_err": 2.1, + "gal_uncorr_g1": 2.6, "gal_uncorr_g2": 3.1, "gal_sigma": 0.4, + "psf_g1": 0.6, "psf_g2": 0.8, "psf_sigma": 0.5, + "gal_flux": 10.0, "gal_flux_err": 1.0, "gal_mag": 5.0, + "gal_mag_err": 0.2, +} + + +def _galsim_row(obj_id, seed): + """One object's per-key values for one extension, scaled by ``seed``.""" + row = {"id": obj_id, "gal_flag": 0, "gal_resolution": 0.5} + row.update({key: mult * seed for key, mult in _MULT.items()}) + return row + + +def _write_galsim_cat(path, obj_ids, seed_by_ext): + """Write a synthetic galsim FITS, one extension per ``seed_by_ext`` key. + + Extension order is preserved (dict order), matching how + ``get_ext_name()[1:]`` drives ``self._key_ends`` in production. + """ + hdus = [fits.PrimaryHDU()] + for ext, seed in seed_by_ext.items(): + rows = [_galsim_row(oid, seed) for oid in obj_ids] + cols = [ + fits.Column( + name=key, + format="K" if key in INT_KEYS else "D", + array=np.array([row[key] for row in rows]), + ) + for key in GALSIM_KEYS + ] + hdus.append(fits.BinTableHDU.from_columns(cols, name=ext)) + fits.HDUList(hdus).writeto(path, overwrite=True) + + +def _run_save_galsim(galsim_path, obj_ids): + """Drive ``_save_galsim_shapes`` and return its populated output dict.""" + inst = object.__new__(SaveCatalogue) + inst._obj_id = np.asarray(obj_ids) + inst._output_dict = {} + inst._save_galsim_shapes(str(galsim_path)) + return inst._output_dict + + +# --------------------------------------------------------------------------- +# Strategies +# --------------------------------------------------------------------------- + +obj_ids_strategy = st.lists( + st.integers(min_value=1, max_value=10_000), min_size=1, max_size=5, + unique=True, +) +# A finite seed bounded away from zero, so every per-key value it scales is +# itself non-zero and the family-vs-family / key-vs-key comparisons below are +# never vacuously satisfied by an accidental 0 == 0. +_seed = st.floats( + min_value=0.05, max_value=5.0, allow_nan=False, allow_infinity=False, +) + + +# --------------------------------------------------------------------------- +# (a) Scalar split + size routing, regular extension +# --------------------------------------------------------------------------- + +@settings(deadline=None, max_examples=25) +@given(obj_ids=obj_ids_strategy, seed=_seed) +def test_regular_extension_scalar_split_and_size(obj_ids, seed): + """A non-``ORIGINAL_PSF`` extension routes each source to its own column. + + ``GALSIM_G1/G2_`` from ``gal_g1/g2``, the ``_ERR``/``_UNCORR`` + variants from their namesake sources, ``GALSIM_T_`` from + ``sigma_to_T(gal_sigma)``, and the PSF family + (``GALSIM_G1/G2_PSF_``, ``GALSIM_T_PSF_``) from ``psf_g1/g2`` / + ``sigma_to_T(psf_sigma)`` — never from the galaxy's own fields. + """ + ext = "NOSHEAR" + path = tempfile.NamedTemporaryFile(suffix=".fits", delete=False).name + _write_galsim_cat(path, obj_ids, {ext: seed}) + out = _run_save_galsim(path, obj_ids) + Path(path).unlink() + + n = len(obj_ids) + row = _galsim_row(obj_ids[0], seed) + npt.assert_allclose(out[f"GALSIM_G1_{ext}"], [row["gal_g1"]] * n) + npt.assert_allclose(out[f"GALSIM_G2_{ext}"], [row["gal_g2"]] * n) + npt.assert_allclose(out[f"GALSIM_G1_ERR_{ext}"], [row["gal_g1_err"]] * n) + npt.assert_allclose(out[f"GALSIM_G2_ERR_{ext}"], [row["gal_g2_err"]] * n) + npt.assert_allclose( + out[f"GALSIM_G1_UNCORR_{ext}"], [row["gal_uncorr_g1"]] * n + ) + npt.assert_allclose( + out[f"GALSIM_G2_UNCORR_{ext}"], [row["gal_uncorr_g2"]] * n + ) + npt.assert_allclose( + out[f"GALSIM_T_{ext}"], [cs_size.sigma_to_T(row["gal_sigma"])] * n + ) + npt.assert_allclose(out[f"GALSIM_G1_PSF_{ext}"], [row["psf_g1"]] * n) + npt.assert_allclose(out[f"GALSIM_G2_PSF_{ext}"], [row["psf_g2"]] * n) + npt.assert_allclose( + out[f"GALSIM_T_PSF_{ext}"], [cs_size.sigma_to_T(row["psf_sigma"])] * n + ) + # The galaxy and PSF families must stay distinct (un-aliased), and the + # raw-sigma value must never leak through unconverted. + assert not np.allclose(out[f"GALSIM_T_{ext}"], row["gal_sigma"]) + assert not np.allclose(out[f"GALSIM_G1_{ext}"], out[f"GALSIM_G1_PSF_{ext}"]) + + npt.assert_allclose(out[f"GALSIM_FLUX_{ext}"], [row["gal_flux"]] * n) + npt.assert_allclose(out[f"GALSIM_FLUX_ERR_{ext}"], [row["gal_flux_err"]] * n) + npt.assert_allclose(out[f"GALSIM_MAG_{ext}"], [row["gal_mag"]] * n) + npt.assert_allclose(out[f"GALSIM_MAG_ERR_{ext}"], [row["gal_mag_err"]] * n) + npt.assert_allclose(out[f"GALSIM_RES_{ext}"], [row["gal_resolution"]] * n) + + +# --------------------------------------------------------------------------- +# (b) ORIGINAL_PSF asymmetry: sourced from the galaxy's uncorrected fields +# --------------------------------------------------------------------------- + +@settings(deadline=None, max_examples=25) +@given(obj_ids=obj_ids_strategy, seed_main=_seed, seed_origpsf=_seed) +def test_original_psf_extension_sourced_from_gal_uncorr( + obj_ids, seed_main, seed_origpsf +): + """``ORIGINAL_PSF`` columns trace to ``gal_uncorr_*``/``gal_sigma``, NOT + ``psf_*`` — the one preserved asymmetry in the galsim grammar. + + The two extensions are written with independent seeds, and the + ``ORIGINAL_PSF`` row's own ``psf_g1/g2``/``psf_sigma`` are deliberately + distinct from its ``gal_uncorr_*``/``gal_sigma`` (the seed multipliers + differ), so a regression that wired the regular per-extension ``psf_*`` + sourcing into ``ORIGINAL_PSF`` would be caught, not masked by equal + values. + """ + seed_by_ext = {"NOSHEAR": seed_main, "ORIGINAL_PSF": seed_origpsf} + path = tempfile.NamedTemporaryFile(suffix=".fits", delete=False).name + _write_galsim_cat(path, obj_ids, seed_by_ext) + out = _run_save_galsim(path, obj_ids) + Path(path).unlink() + + n = len(obj_ids) + op_row = _galsim_row(obj_ids[0], seed_origpsf) + npt.assert_allclose( + out["GALSIM_G1_PSF_ORIGINAL_PSF"], [op_row["gal_uncorr_g1"]] * n + ) + npt.assert_allclose( + out["GALSIM_G2_PSF_ORIGINAL_PSF"], [op_row["gal_uncorr_g2"]] * n + ) + npt.assert_allclose( + out["GALSIM_T_PSF_ORIGINAL_PSF"], + [cs_size.sigma_to_T(op_row["gal_sigma"])] * n, + ) + # Not sourced from this extension's own psf_g1/g2/psf_sigma (those are + # scaled by the same seed but through different multipliers, so equality + # here would mean the asymmetry was lost). + assert not np.allclose( + out["GALSIM_G1_PSF_ORIGINAL_PSF"], [op_row["psf_g1"]] * n + ) + assert not np.allclose( + out["GALSIM_T_PSF_ORIGINAL_PSF"], + [cs_size.sigma_to_T(op_row["psf_sigma"])] * n, + ) + + +# --------------------------------------------------------------------------- +# (c) Every emitted column matches the new grammar; no legacy tokens survive +# --------------------------------------------------------------------------- + +@settings(deadline=None, max_examples=25) +@given(obj_ids=obj_ids_strategy, seed=_seed) +def test_emitted_columns_match_grammar_no_legacy_tokens(obj_ids, seed): + """Every ``GALSIM_*`` column matches the scalar grammar; nothing packed. + + No ``GALSIM_GAL_ELL*``/``GALSIM_PSF_ELL*`` (packed length-2 ellipticity) + and no ``GALSIM_*_SIGMA*`` (raw size) survives — both replaced by scalar + ``G1``/``G2`` components and the single stored size ``T``. The galaxy + also carries no ``GAL`` token (implicit default object, same rule as + ngmix). + """ + ext_names = ["NOSHEAR", "ORIGINAL_PSF"] + seed_by_ext = {ext: seed * (i + 1) for i, ext in enumerate(ext_names)} + path = tempfile.NamedTemporaryFile(suffix=".fits", delete=False).name + _write_galsim_cat(path, obj_ids, seed_by_ext) + out = _run_save_galsim(path, obj_ids) + Path(path).unlink() + + assert out, "writer produced no columns" + + _component = "G1|G2|T" + _qual = "(?:_ERR|_UNCORR)?" + _ext = "|".join(re.escape(e) for e in ext_names) + grammar_re = re.compile( + rf"^GALSIM_(?:{_component}){_qual}(?:_PSF)?_(?:{_ext})$" + rf"|^GALSIM_(?:FLUX|FLUX_ERR|MAG|MAG_ERR|FLAGS|RES)_(?:{_ext})$" + ) + for col in out: + assert grammar_re.match(col), f"off-grammar column emitted: {col!r}" + assert "_ELL" not in col, col + assert "SIGMA" not in col, col + assert "_GAL_" not in col and not col.endswith("_GAL"), col diff --git a/tests/module/test_make_cat.py b/tests/module/test_make_cat.py index e2960701..d5d24942 100644 --- a/tests/module/test_make_cat.py +++ b/tests/module/test_make_cat.py @@ -1,12 +1,16 @@ """UNIT TESTS FOR MODULE PACKAGE: MAKE_CAT. Drives ``SaveCatalogue._save_ngmix_data`` against a synthetic ngmix catalogue -to lock in the renamed PSF-column grammar -``NGMIX[m]_[_ERR]__`` (shapepipe#749), plus the three -OBJECT/SHEAR-less metadata columns (``NGMIX[m]_MCAL_FLAGS``, ``NGMIX_N_EPOCH``, -``NGMIX_MCAL_TYPES_FAIL``): the original image PSF (``PSF_ORIG``) and the metacal -reconvolution kernel (``PSF_RECONV``) are independent fits of *different* PSFs, -no longer the single aliased value of the pre-fix code. +to lock in the column grammar ``ESTIMATOR_COMPONENT[_ERR]_OBJECT[_SHEAR]`` +(shapepipe#749, #761): galaxy is the implicit default object and carries no +``GAL`` token (``NGMIX_G1_NOSHEAR``, never ``NGMIX_G1_GAL_NOSHEAR``), while +the PSF families keep an explicit object token — +``NGMIX_[_ERR]__`` for ``PSF_ORIG``/``PSF_RECONV`` — +plus the three OBJECT/SHEAR-less metadata columns (``NGMIX[m]_MCAL_FLAGS``, +``NGMIX_N_EPOCH``, ``NGMIX_MCAL_TYPES_FAIL``). The original image PSF +(``PSF_ORIG``) and the metacal reconvolution kernel (``PSF_RECONV``) are +independent fits of *different* PSFs, no longer the single aliased value of +the pre-#749 code. """ import numpy as np @@ -130,13 +134,13 @@ def test_save_ngmix_data_uses_new_grammar_and_no_old_names(tmp_path): # Every per-shear family is present under the new grammar. for shear in SHEAR_EXTS: for col in ( - f"NGMIX_G1_GAL_{shear}", f"NGMIX_G2_GAL_{shear}", - f"NGMIX_G1_ERR_GAL_{shear}", f"NGMIX_G2_ERR_GAL_{shear}", - f"NGMIX_T_GAL_{shear}", f"NGMIX_T_ERR_GAL_{shear}", - f"NGMIX_SNR_GAL_{shear}", - f"NGMIX_FLUX_GAL_{shear}", f"NGMIX_FLUX_ERR_GAL_{shear}", - f"NGMIX_MAG_GAL_{shear}", f"NGMIX_MAG_ERR_GAL_{shear}", - f"NGMIX_FLAGS_GAL_{shear}", + f"NGMIX_G1_{shear}", f"NGMIX_G2_{shear}", + f"NGMIX_G1_ERR_{shear}", f"NGMIX_G2_ERR_{shear}", + f"NGMIX_T_{shear}", f"NGMIX_T_ERR_{shear}", + f"NGMIX_SNR_{shear}", + f"NGMIX_FLUX_{shear}", f"NGMIX_FLUX_ERR_{shear}", + f"NGMIX_MAG_{shear}", f"NGMIX_MAG_ERR_{shear}", + f"NGMIX_FLAGS_{shear}", f"NGMIX_G1_PSF_ORIG_{shear}", f"NGMIX_G2_PSF_ORIG_{shear}", f"NGMIX_T_PSF_ORIG_{shear}", f"NGMIX_G1_PSF_RECONV_{shear}", f"NGMIX_G2_PSF_RECONV_{shear}", @@ -154,6 +158,8 @@ def test_save_ngmix_data_uses_new_grammar_and_no_old_names(tmp_path): assert "_Tpsf" not in col and "TPSF" not in col, col assert not col.startswith("NGMIX_ELL_"), col assert "NGMIXm" not in col, col # moments branch off by default + # Galaxy is the implicit default object — no GAL token (shapepipe#761). + assert "_GAL_" not in col and not col.endswith("_GAL"), col def test_save_ngmix_data_psf_families_trace_distinct_sources(tmp_path): @@ -228,11 +234,11 @@ def test_save_ngmix_data_fills_sentinels_for_absent_objects(tmp_path): npt.assert_allclose(t_orig[present], row["T_psf_orig"]) npt.assert_allclose(t_orig[absent], [0.0, 0.0]) - t_err_gal = np.asarray(out["NGMIX_T_ERR_GAL_NOSHEAR"]) + t_err_gal = np.asarray(out["NGMIX_T_ERR_NOSHEAR"]) npt.assert_allclose(t_err_gal[present], row["T_err"]) npt.assert_allclose(t_err_gal[absent], [1e30, 1e30]) - flux_err = np.asarray(out["NGMIX_FLUX_ERR_GAL_NOSHEAR"]) + flux_err = np.asarray(out["NGMIX_FLUX_ERR_NOSHEAR"]) npt.assert_allclose(flux_err[present], row["flux_err"]) npt.assert_allclose(flux_err[absent], [-1.0, -1.0]) diff --git a/tests/module/test_psf_averaging_properties.py b/tests/module/test_psf_averaging_properties.py index 2314b6ca..3afaa7bb 100644 --- a/tests/module/test_psf_averaging_properties.py +++ b/tests/module/test_psf_averaging_properties.py @@ -233,18 +233,18 @@ def test_all_epochs_failed_raises_zero_division(flagged_specs): # rows are overwritten (mirrors make_cat._save_ngmix_data). The property: a row # whose obj_id never appears among the ngmix ids keeps exactly these. _SENTINELS = { - "NGMIX_T_GAL_NOSHEAR": 0.0, - "NGMIX_SNR_GAL_NOSHEAR": 0.0, - "NGMIX_FLAGS_GAL_NOSHEAR": 0.0, + "NGMIX_T_NOSHEAR": 0.0, + "NGMIX_SNR_NOSHEAR": 0.0, + "NGMIX_FLAGS_NOSHEAR": 0.0, "NGMIX_T_PSF_ORIG_NOSHEAR": 0.0, "NGMIX_T_PSF_RECONV_NOSHEAR": 0.0, - "NGMIX_FLUX_ERR_GAL_NOSHEAR": -1.0, - "NGMIX_MAG_ERR_GAL_NOSHEAR": -1.0, - "NGMIX_G1_GAL_NOSHEAR": -10.0, - "NGMIX_G2_GAL_NOSHEAR": -10.0, + "NGMIX_FLUX_ERR_NOSHEAR": -1.0, + "NGMIX_MAG_ERR_NOSHEAR": -1.0, + "NGMIX_G1_NOSHEAR": -10.0, + "NGMIX_G2_NOSHEAR": -10.0, "NGMIX_G1_PSF_ORIG_NOSHEAR": -10.0, "NGMIX_G1_PSF_RECONV_NOSHEAR": -10.0, - "NGMIX_T_ERR_GAL_NOSHEAR": 1e30, + "NGMIX_T_ERR_NOSHEAR": 1e30, "NGMIX_T_ERR_PSF_ORIG_NOSHEAR": 1e30, "NGMIX_T_ERR_PSF_RECONV_NOSHEAR": 1e30, "NGMIX_N_EPOCH": 0.0, diff --git a/tests/module/test_psf_grammar_properties.py b/tests/module/test_psf_grammar_properties.py index c7c361dc..09eaf181 100644 --- a/tests/module/test_psf_grammar_properties.py +++ b/tests/module/test_psf_grammar_properties.py @@ -2,7 +2,9 @@ Drives ``SaveCatalogue._save_ngmix_data`` (the make_cat writer) under hypothesis to pin three invariants of the renamed grammar -``NGMIX[m]_[_ERR]__`` (shapepipe#749), plus the three +``NGMIX[m]_[_ERR][_]_`` (shapepipe#749, #761): the +galaxy is the implicit default object and carries NO ``OBJECT`` token +(``NGMIX_G1_NOSHEAR``, never ``NGMIX_G1_GAL_NOSHEAR``), plus the three OBJECT/SHEAR-less metadata columns (``NGMIX[m]_MCAL_FLAGS``, ``NGMIX_N_EPOCH``, ``NGMIX_MCAL_TYPES_FAIL``), where the ORIGINAL image PSF (``PSF_ORIG``) and the metacal RECONVOLUTION kernel (``PSF_RECONV``) are independent fits of @@ -12,14 +14,25 @@ orig vs reconv ngmix source columns, every emitted ``*_PSF_ORIG_*`` column traces to its orig source and every ``*_PSF_RECONV_*`` to its reconv source, and the two families are never crossed; -(b) every column name the writer emits matches the grammar regex; +(b) every column name the writer emits matches the grammar regex (the OBJECT + group is optional, so a regressed ``GAL`` token is rejected, not + absorbed); (c) every ``NGMIX_*`` token the shipped param file (``example/cfis/final_cat.param``) names is a column the writer can - produce — writer/param-file consistency. - -Setup mirrors ``tests/module/test_make_cat.py``: a synthetic ngmix FITS with -the five shear-type extensions carrying the exact ``compile_results`` key set, -read back through ``_save_ngmix_data``. + produce — writer/param-file consistency; +(d) the FULL frozen grammar (shapepipe#761) — ``ESTIMATOR_COMPONENT[_ERR]_ + OBJECT[_metacaltype]`` — holds across all three estimator families: ngmix + and galsim split ellipticity into ``G1``/``G2`` (g-type), HSM into + ``E1``/``E2`` (e-type); every family stores exactly one size, ``T``; HSM + keeps the singular ``FLAG`` token, ngmix/galsim keep plural ``FLAGS``. + This part is a static example-based check (not writer-driven), so it + covers galsim/HSM without depending on those producer modules — see + ``test_galsim_grammar_properties.py`` and ``test_psfex_interp.py`` for + the writer-driven coverage of those families. + +Setup for (a)-(c) mirrors ``tests/module/test_make_cat.py``: a synthetic +ngmix FITS with the five shear-type extensions carrying the exact +``compile_results`` key set, read back through ``_save_ngmix_data``. """ import re @@ -151,11 +164,18 @@ def _run_save_ngmix(ngmix_path, obj_ids, cat_size_target=None): # NGMIX[m]_[_ERR]__, plus the three per-object # metadata columns that carry neither OBJECT nor SHEAR. Anchored so a stray # legacy token (_PSFo, _Tpsf, NGMIX_ELL_*) fails to match. +# Galaxy is the implicit default object — no token (shapepipe#761), so the +# OBJECT group is OPTIONAL: ``NGMIX_G1_NOSHEAR`` (galaxy, no object token) +# matches alongside ``NGMIX_G1_PSF_ORIG_NOSHEAR`` (explicit PSF object). This +# also makes the regex auto-reject any future ``GAL``-token regression: a +# stray ``_GAL_`` between COMPONENT and SHEAR cannot be absorbed by the +# optional group (``GAL`` is not one of its alternatives) and there is no +# other slot for it to land in. _COMPONENT = "G1|G2|T|SNR|FLUX|MAG|FLAGS" -_OBJECT = "GAL|PSF_ORIG|PSF_RECONV" +_OBJECT = "PSF_ORIG|PSF_RECONV" _SHEAR = "NOSHEAR|1P|1M|2P|2M" GRAMMAR_RE = re.compile( - rf"^NGMIXm?_(?:{_COMPONENT})(?:_ERR)?_(?:{_OBJECT})_(?:{_SHEAR})$" + rf"^NGMIXm?_(?:{_COMPONENT})(?:_ERR)?(?:_(?:{_OBJECT}))?_(?:{_SHEAR})$" rf"|^NGMIXm?_(?:MCAL_FLAGS|MCAL_TYPES_FAIL|N_EPOCH)$" ) @@ -317,6 +337,10 @@ def test_emitted_column_names_match_grammar(obj_ids, tmp_path_factory): assert out, "writer produced no columns" for col in out: assert GRAMMAR_RE.match(col), f"off-grammar column emitted: {col!r}" + # Galaxy is the implicit default object — a regressed GAL token would + # match neither the optional OBJECT group nor any other slot, but + # assert it directly so the failure message names the real defect. + assert "_GAL_" not in col and not col.endswith("_GAL"), col # --------------------------------------------------------------------------- @@ -363,3 +387,89 @@ def test_param_file_ngmix_tokens_are_producible(param_path, obj_ids): # Non-vacuous: the writer DOES cover real grammar tokens from the param # file (so the assertion above is exercised against a non-empty set). assert (param_tokens - NON_WRITER_PARAM_TOKENS) & produced + + +# --------------------------------------------------------------------------- +# (d) The FULL frozen grammar, across all three estimator families +# --------------------------------------------------------------------------- +# +# ``ESTIMATOR_COMPONENT[_ERR]_OBJECT[_metacaltype]`` (shapepipe#761). This +# section encodes the grammar itself as a static property — example column +# names, not a driven writer — so it covers ngmix, galsim, and HSM together +# without depending on the galsim/HSM producer modules (covered by their own +# writer-driving tests: ``test_galsim_grammar_properties.py``, +# ``test_psfex_interp.py``). What it pins: +# +# - galaxy is the implicit default object, no token, for BOTH g-type writers +# (ngmix, galsim); +# - ellipticity is split into named scalar components, never a packed +# 2-vector: ``G1``/``G2`` for g-type (ngmix, galsim), ``E1``/``E2`` for +# e-type (HSM); +# - exactly one stored size, ``T`` (no ``SIGMA``/``FWHM``/``r50``); +# - HSM keeps the singular ``FLAG`` token; ngmix/galsim keep plural ``FLAGS``. + +FROZEN_GRAMMAR_RE = re.compile( + r"^(?:" + # ngmix: galaxy (no object token) or explicit PSF_ORIG/PSF_RECONV. + r"NGMIXm?_(?:G1|G2|T|SNR|FLUX|MAG|FLAGS)(?:_ERR)?" + r"(?:_(?:PSF_ORIG|PSF_RECONV))?_(?:NOSHEAR|1P|1M|2P|2M)" + r"|NGMIXm?_(?:MCAL_FLAGS|MCAL_TYPES_FAIL|N_EPOCH)" + # galsim: galaxy (no object token) or explicit PSF object; is a + # data-driven FITS extension name, so left as a generic uppercase token. + r"|GALSIM_(?:G1|G2|T)(?:_ERR|_UNCORR)?(?:_PSF)?_[A-Z0-9_]+" + r"|GALSIM_(?:FLUX|FLUX_ERR|MAG|MAG_ERR|FLAGS|RES)_[A-Z0-9_]+" + # HSM: e-type, explicit PSF/STAR object, singular FLAG; the multi-epoch + # sink in make_cat._save_psf_data appends a bare epoch index. + r"|HSM_(?:E1|E2|T)_(?:PSF|STAR)(?:_\d+)?" + r"|HSM_FLAG_(?:PSF|STAR)(?:_\d+)?" + r")$" +) + +# Representative columns the new grammar PRODUCES — one or more per family, +# chosen to exercise every rule above (GAL-drop, G1/G2 vs E1/E2, single T, +# FLAG vs FLAGS). +_GRAMMAR_VALID_EXAMPLES = [ + "NGMIX_G1_NOSHEAR", # galaxy, no object token + "NGMIX_T_ERR_PSF_ORIG_1M", + "NGMIX_FLAGS_2P", + "NGMIXm_G1_PSF_RECONV_NOSHEAR", + "NGMIX_MCAL_FLAGS", + "GALSIM_G1_NOSHEAR", # galaxy, no object token + "GALSIM_G2_UNCORR_NOSHEAR", + "GALSIM_T_PSF_ORIGINAL_PSF", + "GALSIM_FLAGS_NOSHEAR", + "HSM_E1_PSF", + "HSM_T_STAR", + "HSM_FLAG_PSF", + "HSM_E1_PSF_3", # make_cat multi-epoch sink + "HSM_T_PSF_2", + "HSM_FLAG_STAR_1", +] + +# Pre-#761 / off-grammar columns the rename REMOVES — each violates exactly +# one rule, named so a failure here points at the specific regression. +_GRAMMAR_INVALID_EXAMPLES = [ + "NGMIX_G1_GAL_NOSHEAR", # GAL token regression + "NGMIX_ELL_NOSHEAR", # packed ellipticity + "E1_PSF_HSM", # pre-rename HSM naming (not HSM_-prefixed) + "SIGMA_PSF_HSM", # raw sigma, not T + "HSM_SIGMA_PSF", # stored sigma instead of T + "HSM_FLAGS_PSF", # plural — HSM is singular FLAG + "GALSIM_GAL_ELL_NOSHEAR", # GAL token + packed ellipticity + "GALSIM_GAL_SIGMA_NOSHEAR", # GAL token + raw sigma + "GALSIM_PSF_ELL_NOSHEAR", # packed ellipticity + "GALSIM_FWHM_NOSHEAR", # FWHM, not T + "SPREAD_MODEL", # removed entirely, not renamed +] + + +@pytest.mark.parametrize("col", _GRAMMAR_VALID_EXAMPLES) +def test_frozen_grammar_accepts_new_grammar_examples(col): + """One representative column per family/rule matches the frozen grammar.""" + assert FROZEN_GRAMMAR_RE.match(col), f"should match grammar: {col!r}" + + +@pytest.mark.parametrize("col", _GRAMMAR_INVALID_EXAMPLES) +def test_frozen_grammar_rejects_legacy_examples(col): + """Each pre-#761 / removed column is rejected, not silently absorbed.""" + assert not FROZEN_GRAMMAR_RE.match(col), f"should NOT match grammar: {col!r}" From f9a8f0509e8b0e198e1c2676ef849113fa85ebe8 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Tue, 30 Jun 2026 22:49:52 +0200 Subject: [PATCH 20/33] build: track cs_util @ develop (git ref) for the size/shape helpers shapepipe's column grammar sources size (T = 2*sigma^2) from cs_util.size, which landed in cs_util 0.2.x. Point the dependency at cs_util's develop branch via [tool.uv.sources] (a git ref, mirroring the ngmix entry) so new cs_util helpers are available without shipping a release and rebuilding the image each time, and raise the floor cs_util>=0.2.1 to make the cs_util.size requirement explicit rather than coincidental on PyPI's current latest. uv.lock pins the resolved develop commit. CI rebuilds the image from this pyproject/lock; local in-container runs use the PYTHONPATH-shadow of an on-disk cs_util checkout until the dev image is rebuilt (see CLAUDE.md). Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01MgDK9SKn2PsDZGEbr4WFb3 --- pyproject.toml | 7 ++++++- uv.lock | 8 ++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9097b3cc..69c73f06 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ dependencies = [ "astropy>=7.0", # major 6 → 7 "astroquery", "canfar", - "cs_util>=0.1.9", + "cs_util>=0.2.1", "galsim>=2.8", "h5py", "joblib>=1.4", @@ -90,6 +90,11 @@ environments = ["sys_platform == 'linux'"] [tool.uv.sources] ngmix = { git = "https://github.com/esheldon/ngmix", tag = "v2.4.0" } +# Track cs_util's develop branch (a git ref, not a pinned release) so new +# cs_util.size / cs_util.shape helpers are available without shipping a release +# and rebuilding the image each time. shapepipe's column grammar sources size +# (T = 2*sigma^2) and ellipticity conversions from cs_util. +cs_util = { git = "https://github.com/CosmoStat/cs_util", branch = "develop" } [build-system] # Declaring a build backend makes shapepipe an installed package (not a uv diff --git a/uv.lock b/uv.lock index b483439a..f5fe9c70 100644 --- a/uv.lock +++ b/uv.lock @@ -655,7 +655,7 @@ wheels = [ [[package]] name = "cs-util" version = "0.2.1" -source = { registry = "https://pypi.org/simple" } +source = { git = "https://github.com/CosmoStat/cs_util?branch=develop#ff8fa2b2168737353ac867cc21a8e95be082f17b" } dependencies = [ { name = "astropy", marker = "sys_platform == 'linux'" }, { name = "camb", marker = "sys_platform == 'linux'" }, @@ -670,10 +670,6 @@ dependencies = [ { name = "swig", marker = "sys_platform == 'linux'" }, { name = "vos", marker = "sys_platform == 'linux'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e0/08/d759e6af88c8a53b9c1c863a3bc3b9b5b26cf434f7232875a4bb75bec514/cs_util-0.2.1.tar.gz", hash = "sha256:5dd9be50249c412743ed116c0cb3364d5bcf1ad907be692dc52db0059a00412d", size = 26713, upload-time = "2026-06-10T23:58:23.017Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9a/2f/c8d89e8013567c3cf5535b3b941b81033e3c4a81101f3953d611af817dca/cs_util-0.2.1-py3-none-any.whl", hash = "sha256:577abc9ba04da8c78bdba9f6d235deecac99c266a0937e2c44ea1df5b50224de", size = 32209, upload-time = "2026-06-10T23:58:21.766Z" }, -] [[package]] name = "cycler" @@ -3515,7 +3511,7 @@ requires-dist = [ { name = "black", marker = "extra == 'lint'" }, { name = "build", marker = "extra == 'release'" }, { name = "canfar" }, - { name = "cs-util", specifier = ">=0.1.9" }, + { name = "cs-util", git = "https://github.com/CosmoStat/cs_util?branch=develop" }, { name = "fitsio", marker = "extra == 'fitsio'" }, { name = "galsim", specifier = ">=2.8" }, { name = "h5py" }, From d5b3e9251cdafc13b01b88cefe6fb016fb9b3d93 Mon Sep 17 00:00:00 2001 From: Sacha Guerrini Date: Wed, 1 Jul 2026 08:51:21 +0200 Subject: [PATCH 21/33] Go back to g-type ellipticities --- .../modules/mccd_package/mccd_interpolation_script.py | 8 ++++---- .../modules/mccd_package/shapepipe_auxiliary_mccd.py | 4 ++-- .../modules/psfex_interp_package/psfex_interp.py | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py b/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py index ddce0194..69d6c181 100644 --- a/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py +++ b/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py @@ -220,8 +220,8 @@ def _get_psfshapes(self): self.psf_shapes = np.array( [ [ - moms.observed_shape.e1, - moms.observed_shape.e2, + moms.observed_shape.g1, + moms.observed_shape.g2, moms.moments_sigma, int(bool(moms.error_message)), ] @@ -278,8 +278,8 @@ def _get_starshapes(self, star_vign): self.star_shapes = np.array( [ [ - moms.observed_shape.e1, - moms.observed_shape.e2, + moms.observed_shape.g1, + moms.observed_shape.g2, moms.moments_sigma, int(bool(moms.error_message)), ] diff --git a/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py b/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py index 99dea6b9..9b2c94a7 100644 --- a/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py +++ b/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py @@ -419,8 +419,8 @@ def mccd_interpolation_pipeline( PSF_shapes = np.array( [ [ - moms.observed_shape.e1, - moms.observed_shape.e2, + moms.observed_shape.g1, + moms.observed_shape.g2, moms.moments_sigma, int(bool(moms.error_message)), ] diff --git a/src/shapepipe/modules/psfex_interp_package/psfex_interp.py b/src/shapepipe/modules/psfex_interp_package/psfex_interp.py index 7428281d..db7ac3fe 100644 --- a/src/shapepipe/modules/psfex_interp_package/psfex_interp.py +++ b/src/shapepipe/modules/psfex_interp_package/psfex_interp.py @@ -323,8 +323,8 @@ def _get_psfshapes(self): self.psf_shapes = np.array( [ [ - moms.observed_shape.e1, - moms.observed_shape.e2, + moms.observed_shape.g1, + moms.observed_shape.g2, moms.moments_sigma, int(bool(moms.error_message)), ] @@ -438,8 +438,8 @@ def _get_starshapes(self, star_vign): self.star_shapes = np.array( [ [ - moms.observed_shape.e1, - moms.observed_shape.e2, + moms.observed_shape.g1, + moms.observed_shape.g2, moms.moments_sigma, int(bool(moms.error_message)), ] From 7d527d3da3d6646053e2fab9e271aebba3fcb1ba Mon Sep 17 00:00:00 2001 From: Sacha Guerrini Date: Wed, 1 Jul 2026 08:56:08 +0200 Subject: [PATCH 22/33] Rename the HSM columns with G1/2 instead of E1/2 --- .../mccd_package/mccd_interpolation_script.py | 12 +++++----- .../mccd_package/mccd_plot_utilities.py | 8 +++---- .../mccd_package/shapepipe_auxiliary_mccd.py | 4 ++-- .../merge_starcat_package/merge_starcat.py | 24 +++++++++---------- .../psfex_interp_package/psfex_interp.py | 16 ++++++------- 5 files changed, 32 insertions(+), 32 deletions(-) diff --git a/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py b/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py index 69d6c181..c288ac9e 100644 --- a/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py +++ b/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py @@ -340,12 +340,12 @@ def _write_output_validation(self, star_dict, psfex_cat_dict): ) data = { - "HSM_E1_PSF": self.psf_shapes[:, 0], - "HSM_E2_PSF": self.psf_shapes[:, 1], + "HSM_G1_PSF": self.psf_shapes[:, 0], + "HSM_G2_PSF": self.psf_shapes[:, 1], "HSM_T_PSF": cs_size.sigma_to_T(self.psf_shapes[:, 2]), "HSM_FLAG_PSF": self.psf_shapes[:, 3].astype(int), - "HSM_E1_STAR": self.star_shapes[:, 0], - "HSM_E2_STAR": self.star_shapes[:, 1], + "HSM_G1_STAR": self.star_shapes[:, 0], + "HSM_G2_STAR": self.star_shapes[:, 1], "HSM_T_STAR": cs_size.sigma_to_T(self.star_shapes[:, 2]), "HSM_FLAG_STAR": self.star_shapes[:, 3].astype(int), } @@ -537,10 +537,10 @@ def _interpolate_me(self): ] = final_list[j][1][where_res[0]] if self._compute_shape: shape_dict = {} - shape_dict["HSM_E1_PSF"] = final_list[j][2][ + shape_dict["HSM_G1_PSF"] = final_list[j][2][ where_res[0] ][0] - shape_dict["HSM_E2_PSF"] = final_list[j][2][ + shape_dict["HSM_G2_PSF"] = final_list[j][2][ where_res[0] ][1] shape_dict["HSM_T_PSF"] = cs_size.sigma_to_T( diff --git a/src/shapepipe/modules/mccd_package/mccd_plot_utilities.py b/src/shapepipe/modules/mccd_package/mccd_plot_utilities.py index 1cf33ff8..02bc55a8 100644 --- a/src/shapepipe/modules/mccd_package/mccd_plot_utilities.py +++ b/src/shapepipe/modules/mccd_package/mccd_plot_utilities.py @@ -261,15 +261,15 @@ def plot_meanshapes( # size column already holds T = 2 sigma^2 all_star_shapes = np.array( [ - starcat[hdu_no].data["HSM_E1_STAR"], - starcat[hdu_no].data["HSM_E2_STAR"], + starcat[hdu_no].data["HSM_G1_STAR"], + starcat[hdu_no].data["HSM_G2_STAR"], starcat[hdu_no].data["HSM_T_STAR"], ] ) all_psf_shapes = np.array( [ - starcat[hdu_no].data["HSM_E1_PSF"], - starcat[hdu_no].data["HSM_E2_PSF"], + starcat[hdu_no].data["HSM_G1_PSF"], + starcat[hdu_no].data["HSM_G2_PSF"], starcat[hdu_no].data["HSM_T_PSF"], ] ) diff --git a/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py b/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py index 9b2c94a7..943433e8 100644 --- a/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py +++ b/src/shapepipe/modules/mccd_package/shapepipe_auxiliary_mccd.py @@ -472,8 +472,8 @@ def shapepipe_write_output( if get_shapes: data = { "VIGNET": interp_PSFs, - "HSM_E1_PSF": PSF_shapes[:, 0], - "HSM_E2_PSF": PSF_shapes[:, 1], + "HSM_G1_PSF": PSF_shapes[:, 0], + "HSM_G2_PSF": PSF_shapes[:, 1], "HSM_T_PSF": cs_size.sigma_to_T(PSF_shapes[:, 2]), "HSM_FLAG_PSF": PSF_shapes[:, 3].astype(int), } diff --git a/src/shapepipe/modules/merge_starcat_package/merge_starcat.py b/src/shapepipe/modules/merge_starcat_package/merge_starcat.py index c5d3eccd..7bc0cb76 100644 --- a/src/shapepipe/modules/merge_starcat_package/merge_starcat.py +++ b/src/shapepipe/modules/merge_starcat_package/merge_starcat.py @@ -490,11 +490,11 @@ def process(self): "Y": y, "RA": ra, "DEC": dec, - "HSM_E1_PSF": g1_psf, - "HSM_E2_PSF": g2_psf, + "HSM_G1_PSF": g1_psf, + "HSM_G2_PSF": g2_psf, "HSM_T_PSF": size_psf, - "HSM_E1_STAR": g1, - "HSM_E2_STAR": g2, + "HSM_G1_STAR": g1, + "HSM_G2_STAR": g2, "HSM_T_STAR": size, "HSM_FLAG_PSF": flag_psf, "HSM_FLAG_STAR": flag_star, @@ -585,11 +585,11 @@ def process(self): dec += list(data_j["DEC"]) # shapes (size column already holds T = 2 sigma^2) - g1_psf += list(data_j["HSM_E1_PSF"]) - g2_psf += list(data_j["HSM_E2_PSF"]) + g1_psf += list(data_j["HSM_G1_PSF"]) + g2_psf += list(data_j["HSM_G2_PSF"]) size_psf += list(data_j["HSM_T_PSF"]) - g1 += list(data_j["HSM_E1_STAR"]) - g2 += list(data_j["HSM_E2_STAR"]) + g1 += list(data_j["HSM_G1_STAR"]) + g2 += list(data_j["HSM_G2_STAR"]) size += list(data_j["HSM_T_STAR"]) # flags @@ -633,11 +633,11 @@ def process(self): "Y": y, "RA": ra, "DEC": dec, - "HSM_E1_PSF": g1_psf, - "HSM_E2_PSF": g2_psf, + "HSM_G1_PSF": g1_psf, + "HSM_G2_PSF": g2_psf, "HSM_T_PSF": size_psf, - "HSM_E1_STAR": g1, - "HSM_E2_STAR": g2, + "HSM_G1_STAR": g1, + "HSM_G2_STAR": g2, "HSM_T_STAR": size, "HSM_FLAG_PSF": flag_psf, "HSM_FLAG_STAR": flag_star, diff --git a/src/shapepipe/modules/psfex_interp_package/psfex_interp.py b/src/shapepipe/modules/psfex_interp_package/psfex_interp.py index db7ac3fe..c14aeb13 100644 --- a/src/shapepipe/modules/psfex_interp_package/psfex_interp.py +++ b/src/shapepipe/modules/psfex_interp_package/psfex_interp.py @@ -347,8 +347,8 @@ def _write_output(self): if self._compute_shape: data = { "VIGNET": self.interp_PSFs, - "HSM_E1_PSF": self.psf_shapes[:, 0], - "HSM_E2_PSF": self.psf_shapes[:, 1], + "HSM_G1_PSF": self.psf_shapes[:, 0], + "HSM_G2_PSF": self.psf_shapes[:, 1], "HSM_T_PSF": cs_size.sigma_to_T(self.psf_shapes[:, 2]), "HSM_FLAG_PSF": self.psf_shapes[:, 3].astype(int), } @@ -500,12 +500,12 @@ def _write_output_validation(self, star_dict, psfex_cat_dict): ) data = { - "HSM_E1_PSF": self.psf_shapes[:, 0], - "HSM_E2_PSF": self.psf_shapes[:, 1], + "HSM_G1_PSF": self.psf_shapes[:, 0], + "HSM_G2_PSF": self.psf_shapes[:, 1], "HSM_T_PSF": cs_size.sigma_to_T(self.psf_shapes[:, 2]), "HSM_FLAG_PSF": self.psf_shapes[:, 3].astype(int), - "HSM_E1_STAR": self.star_shapes[:, 0], - "HSM_E2_STAR": self.star_shapes[:, 1], + "HSM_G1_STAR": self.star_shapes[:, 0], + "HSM_G2_STAR": self.star_shapes[:, 1], "HSM_T_STAR": cs_size.sigma_to_T(self.star_shapes[:, 2]), "HSM_FLAG_STAR": self.star_shapes[:, 3].astype(int), } @@ -755,10 +755,10 @@ def _interpolate_me(self): ] = final_list[j][1][where_res[0]] if self._compute_shape: shape_dict = {} - shape_dict["HSM_E1_PSF"] = final_list[j][2][ + shape_dict["HSM_G1_PSF"] = final_list[j][2][ where_res[0] ][0] - shape_dict["HSM_E2_PSF"] = final_list[j][2][ + shape_dict["HSM_G2_PSF"] = final_list[j][2][ where_res[0] ][1] shape_dict["HSM_T_PSF"] = cs_size.sigma_to_T( From 0df4f2c313c06e061edf45354d7fa25fe8cfa008 Mon Sep 17 00:00:00 2001 From: Sacha Guerrini Date: Wed, 1 Jul 2026 09:31:00 +0200 Subject: [PATCH 23/33] (fix) Remaining E1/2 HSM column instead of G1/2 --- .../modules/mccd_package/mccd_interpolation_script.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py b/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py index c288ac9e..bfd9f341 100644 --- a/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py +++ b/src/shapepipe/modules/mccd_package/mccd_interpolation_script.py @@ -244,8 +244,8 @@ def _write_output(self): if self._compute_shape: data = { "VIGNET": self.interp_PSFs, - "HSM_E1_PSF": self.psf_shapes[:, 0], - "HSM_E2_PSF": self.psf_shapes[:, 1], + "HSM_G1_PSF": self.psf_shapes[:, 0], + "HSM_G2_PSF": self.psf_shapes[:, 1], "HSM_T_PSF": cs_size.sigma_to_T(self.psf_shapes[:, 2]), "HSM_FLAG_PSF": self.psf_shapes[:, 3].astype(int), } From 726207b5ff277e3f13c549abfa0407a40084dd21 Mon Sep 17 00:00:00 2001 From: Sacha Guerrini Date: Wed, 1 Jul 2026 09:33:00 +0200 Subject: [PATCH 24/33] (fix) Change the HSM label in the associated pytest --- tests/module/test_psfex_interp.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/module/test_psfex_interp.py b/tests/module/test_psfex_interp.py index cb644875..ba8bf550 100644 --- a/tests/module/test_psfex_interp.py +++ b/tests/module/test_psfex_interp.py @@ -163,7 +163,7 @@ def test_write_output_uses_hsm_grammar_and_routes_size_to_T( """``_write_output`` writes the HSM grammar names and stores T, not sigma. Captures the dict handed to ``save_as_fits`` and asserts the frozen - grammar (``HSM_E1_PSF`` / ``HSM_E2_PSF`` / ``HSM_T_PSF`` / ``HSM_FLAG_PSF``, + grammar (``HSM_G1_PSF`` / ``HSM_G2_PSF`` / ``HSM_T_PSF`` / ``HSM_FLAG_PSF``, no legacy ``*_PSF_HSM``) and that the size column is routed through ``cs_util.size.sigma_to_T`` (``T = 2 sigma^2``) at the producer. """ @@ -193,17 +193,17 @@ def _capture(self, data=None, **kwargs): assert { "VIGNET", - "HSM_E1_PSF", - "HSM_E2_PSF", + "HSM_G1_PSF", + "HSM_G2_PSF", "HSM_T_PSF", "HSM_FLAG_PSF", } <= set(data) for legacy in ("E1_PSF_HSM", "E2_PSF_HSM", "SIGMA_PSF_HSM", "FLAG_PSF_HSM"): assert legacy not in data - # e-type distortion stored straight through from _get_psfshapes ... - npt.assert_array_equal(data["HSM_E1_PSF"], interp.psf_shapes[:, 0]) - npt.assert_array_equal(data["HSM_E2_PSF"], interp.psf_shapes[:, 1]) + # g-type distortion stored straight through from _get_psfshapes ... + npt.assert_array_equal(data["HSM_G1_PSF"], interp.psf_shapes[:, 0]) + npt.assert_array_equal(data["HSM_G2_PSF"], interp.psf_shapes[:, 1]) # ... and the size column is T = 2 sigma^2 (sigma_to_T), not raw sigma. npt.assert_allclose(data["HSM_T_PSF"], 2.0 * interp.psf_shapes[:, 2] ** 2) assert not np.allclose(data["HSM_T_PSF"], interp.psf_shapes[:, 2]) From bcf4def2ba2884d16e6108c3d20067dafb432b69 Mon Sep 17 00:00:00 2001 From: Sacha Guerrini Date: Wed, 1 Jul 2026 09:45:28 +0200 Subject: [PATCH 25/33] (fix) Non-passing test because test was based on e-type ellipticity instead of g-type --- tests/module/test_psfex_interp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/module/test_psfex_interp.py b/tests/module/test_psfex_interp.py index ba8bf550..52c21e5a 100644 --- a/tests/module/test_psfex_interp.py +++ b/tests/module/test_psfex_interp.py @@ -144,9 +144,9 @@ def test_get_psfshapes_stores_e_type_distortion(psfex_interpolator, e): mag_g = np.hypot(g1, g2) assume(mag_e - mag_g > 1e-3) # ensure a non-degenerate e-vs-g distinction - # Stored values are exactly the e-type distortion components ... - npt.assert_array_equal(psfex_interpolator.psf_shapes[0, 0], e1) - npt.assert_array_equal(psfex_interpolator.psf_shapes[0, 1], e2) + # Stored values are exactly the g-type distortion components ... + npt.assert_array_equal(psfex_interpolator.psf_shapes[0, 0], g1) + npt.assert_array_equal(psfex_interpolator.psf_shapes[0, 1], g2) # ... whose magnitude strictly exceeds the g-type shear it would carry if # the .g/.e swap regressed. From c5b428b6015ad9d8ebd93937d66e84379960ed61 Mon Sep 17 00:00:00 2001 From: Sacha Guerrini Date: Wed, 1 Jul 2026 10:02:39 +0200 Subject: [PATCH 26/33] (fix) Fix the magnitude comparison test between e- and g-type ellipticities. --- tests/module/test_psfex_interp.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/module/test_psfex_interp.py b/tests/module/test_psfex_interp.py index 52c21e5a..5e133fda 100644 --- a/tests/module/test_psfex_interp.py +++ b/tests/module/test_psfex_interp.py @@ -148,13 +148,12 @@ def test_get_psfshapes_stores_e_type_distortion(psfex_interpolator, e): npt.assert_array_equal(psfex_interpolator.psf_shapes[0, 0], g1) npt.assert_array_equal(psfex_interpolator.psf_shapes[0, 1], g2) - # ... whose magnitude strictly exceeds the g-type shear it would carry if - # the .g/.e swap regressed. + # ... whose magnitude strictly subceeds the e-type shear. mag_stored = np.hypot( psfex_interpolator.psf_shapes[0, 0], psfex_interpolator.psf_shapes[0, 1] ) - npt.assert_allclose(mag_stored, mag_e) - assert mag_stored > mag_g + 1e-4 + npt.assert_allclose(mag_stored, mag_g) + assert mag_e > mag_stored + 1e-4 def test_write_output_uses_hsm_grammar_and_routes_size_to_T( From 66b338d29a7b09d47756192b15f86da065e7e0f2 Mon Sep 17 00:00:00 2001 From: Cail McLean Daley Date: Wed, 1 Jul 2026 13:41:40 +0200 Subject: [PATCH 27/33] ngmix: give the PSF observation a finite weight map (#749) (#779) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ngmix: give the PSF observation a finite weight map (#749) make_ngmix_observation built psf_obs weightless, so ngmix defaulted to unit weight on the unit-flux PSF stamp and the galaxy GPriorBA(0.4) prior handed to the diagnostic PSF fitter swamped the likelihood. The recovered PSF shape collapsed toward the prior (zero ellipticity), corrupting the *_psf_orig diagnostic columns (#749), and the small finite-weight noise budget present in the pre-v2.0 esheldon/aguinot code was lost in the rewrite (the now-retired #774 — the same bug from the noise-budget angle). Build psf_obs with a flat weight = 1/PSF_NOISE**2 (PSF_NOISE = 1e-5, the esheldon/aguinot value Axel's reproduction used), forced float for parity with the galaxy weight — the PSF analogue of the galaxy weight built just below. Weight only; PSFEx/MCCD models already carry a little noise, so no explicit stamp-noise injection is needed (Axel's guidance on #749). The exact value is non-critical: validated flat across 1e-4..1e-6 on the twin. Calibration is untouched: metacal fits this same psf_obs with a prior-free AdmomFitter, which is insensitive to a flat weight's absolute scale, so the calibrated shear is invariant (verified bit-for-bit on the twin). Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_017KQn6mXoL4XMgscZDsaTkB * tests: guard the PSF-obs weight end-to-end (#749) Three assertions in test_ngmix_weight_validation.py, on the real production path now that this sits on the column grammar (#761): - structural: make_ngmix_observation builds psf_obs with the finite 1/PSF_NOISE**2 weight, never unit weight; - recovery (#749 done-condition): through do_ngmix_metacal / average_original_psf, the *_psf_orig column recovers the injected PSF moment ellipticity to 1e-3 with the weight, and collapses below 10% without it (the contrast that proves the weight is load-bearing for the diagnostic columns); - invariance: every metacal-type shear is bit-for-bit identical between the unit-weight (pre-fix) and shipped PSF_NOISE builds (metacal's prior-free AdmomFitter is insensitive to the flat weight's scale). Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_017KQn6mXoL4XMgscZDsaTkB --------- Co-authored-by: Claude Opus 4.8 --- src/shapepipe/modules/ngmix_package/ngmix.py | 23 +++- tests/module/test_ngmix_weight_validation.py | 112 +++++++++++++++++++ 2 files changed, 134 insertions(+), 1 deletion(-) diff --git a/src/shapepipe/modules/ngmix_package/ngmix.py b/src/shapepipe/modules/ngmix_package/ngmix.py index cc7ab87b..371264e2 100644 --- a/src/shapepipe/modules/ngmix_package/ngmix.py +++ b/src/shapepipe/modules/ngmix_package/ngmix.py @@ -23,6 +23,15 @@ METACAL_TYPES = ('noshear', '1p', '1m', '2p', '2m') +# Noise budget for the PSF observation's flat weight map (psf_wt = +# 1/PSF_NOISE**2). Mirrors the esheldon/aguinot pattern (sigma ~ 1e-5/1e-6); +# 1e-5 is the value Axel Guinot's #749 reproduction used. The fit is driven +# by the *relative* weighting of the likelihood vs the prior, so the exact +# value is non-critical once it is finite (validated on the digital twin: the +# recovered PSF shape/size are flat across 1e-4..1e-6). See +# make_ngmix_observation. +PSF_NOISE = 1e-5 + class MetacalResult(NamedTuple): """Return of :func:`do_ngmix_metacal`: the metacal fit plus both PSFs. @@ -1091,7 +1100,19 @@ def make_ngmix_observation( col=(psf.shape[1] - 1) / 2, wcs=wcs, ) - psf_obs = Observation(psf, jacobian=psf_jacob) + # A model fit always needs a weight: without one ngmix defaults to unit + # weights on a unit-flux PSF stamp, so the galaxy GPriorBA(0.4) prior + # handed to the diagnostic PSF fitter swamps the (tiny) likelihood and the + # recovered PSF shape collapses toward the prior (#749). PSFEx/MCCD models + # already carry a little noise, so a finite flat weight matching the + # esheldon/aguinot noise budget (psf_noise ~ 1e-5) suffices to restore the + # likelihood — no explicit stamp-noise injection needed (#774). This is the + # PSF analogue of the galaxy weight built just below; force float so an + # integer-typed stamp cannot truncate the weight (matching that build). + # Metacal's own PSF fit is prior-free (AdmomFitter) and so is insensitive + # to this flat weight's scale, leaving the calibration untouched. + psf_wt = np.full_like(psf, 1.0 / PSF_NOISE ** 2, dtype=float) + psf_obs = Observation(psf, weight=psf_wt, jacobian=psf_jacob) gal_masked, weight_map, noise_img = prepare_ngmix_weights( gal, weight, flag, rng, bkg_rms=bkg_rms diff --git a/tests/module/test_ngmix_weight_validation.py b/tests/module/test_ngmix_weight_validation.py index 2dad3a8c..abc908fd 100644 --- a/tests/module/test_ngmix_weight_validation.py +++ b/tests/module/test_ngmix_weight_validation.py @@ -41,6 +41,7 @@ import pytest from shapepipe.modules.ngmix_package.ngmix import ( + PSF_NOISE, Postage_stamp, do_ngmix_metacal, get_prior, @@ -318,3 +319,114 @@ def test_metacal_ivar_beats_binary_under_noise_gradient( f"metacal ivar/binary shape-scatter ratio {ratio:.3f} >= 0.9: the" " rms-weight advantage does not survive the metacal/fixnoise path" ) + + +# --------------------------------------------------------------------------- +# PSF-observation weight (#749 / #774) +# +# make_ngmix_observation built the PSF observation weightless, so ngmix +# defaulted to unit weight on the unit-flux PSF stamp and the galaxy +# GPriorBA(0.4) prior shared with the PSF fitter swamped the likelihood. The +# recovered PSF *shape* then collapsed toward the prior (zero ellipticity). +# With the column grammar (#761) this corrupts the *_psf_orig diagnostic +# columns, fit from the original image PSF by average_original_psf. A finite +# flat weight = 1/PSF_NOISE**2 restores the likelihood. The fix touches only +# the diagnostic original-PSF fit (run with the galaxy prior); metacal fits the +# same psf_obs with a prior-free AdmomFitter, which is insensitive to a flat +# weight's scale, so the calibrated shear is unchanged. +# --------------------------------------------------------------------------- + +PSF_SHEAR = (0.05, -0.03) # injected PSF ellipticity for the recovery test + + +def _psf_orig_via_metacal(psf_noise, psf_shear=PSF_SHEAR, seed=7): + """Run the real production path and return (HSM truth, psf_orig, resdict). + + Builds a sheared-PSF stamp with the module's own ``make_data`` simulator, + pushes it through ``do_ngmix_metacal`` (which fits the original image PSF + via ``average_original_psf`` into the ``*_psf_orig`` columns), under a + given ``PSF_NOISE``. ``psf_noise=1.0`` reproduces the pre-fix unit weight. + """ + import shapepipe.modules.ngmix_package.ngmix as ngm + from shapepipe.testing.simulate import make_data + + old = ngm.PSF_NOISE + ngm.PSF_NOISE = psf_noise + try: + rng = np.random.RandomState(seed) + prior = get_prior(PIX_SCALE, rng) + gals, psfs, _, weights, flags, jacobs = make_data( + rng=np.random.RandomState(123), shear=(0.0, 0.0), noise=1e-5, + n_epochs=2, img_size=51, psf_shear=psf_shear, + ) + hsm = galsim.hsm.FindAdaptiveMom(galsim.Image(psfs[0], scale=PIX_SCALE)) + g_truth = np.array([hsm.observed_shape.g1, hsm.observed_shape.g2]) + stamp = Postage_stamp(bkg_sub=False, megacam_flip=False) + (stamp.gals, stamp.psfs, stamp.weights, stamp.flags, stamp.jacobs) = ( + gals, psfs, weights, flags, jacobs, + ) + resdict, _psf_reconv, psf_orig = do_ngmix_metacal( + stamp, prior, 1.0, rng, + ) + return g_truth, psf_orig, resdict + finally: + ngm.PSF_NOISE = old + + +def test_make_ngmix_observation_psf_obs_is_weighted(): + """Structural guard: psf_obs carries the finite 1/PSF_NOISE**2 weight. + + The whole #749 fix is this weight; a regression to the weightless build + (unit weight) is what we must never ship again. + """ + from shapepipe.testing.simulate import make_data + + gals, psfs, _, weights, flags, jacobs = make_data( + rng=np.random.RandomState(123), shear=(0.0, 0.0), noise=1e-5, + n_epochs=1, img_size=51, psf_shear=PSF_SHEAR, + ) + obs = make_ngmix_observation( + gals[0], weights[0], flags[0], psfs[0], jacobs[0], + np.random.RandomState(0), + ) + npt.assert_allclose(obs.psf.weight, 1.0 / PSF_NOISE ** 2) + assert (obs.psf.weight > 1.0).all(), "PSF obs fell back to unit weight" + + +def test_psf_weight_recovers_original_psf_ellipticity(): + """#749 done-condition, end to end: *_psf_orig recovers the PSF shape. + + Through the real ``do_ngmix_metacal`` / ``average_original_psf`` path, the + original-PSF ellipticity matches the injected PSF moment shape with the + weight, and collapses toward the prior (zero) without it — the contrast + that proves the weight is load-bearing for the diagnostic columns. + """ + g_truth, psf_orig_w, _ = _psf_orig_via_metacal(PSF_NOISE) + _, psf_orig_u, _ = _psf_orig_via_metacal(1.0) # pre-fix unit weight + + # The fix: original-PSF column recovers the PSF moment shape. + npt.assert_allclose(psf_orig_w["g_psf"], g_truth, atol=1e-3) + + # The bug it cures: weightless collapses the shape toward the prior. + assert np.linalg.norm(psf_orig_u["g_psf"]) < 0.1 * np.linalg.norm(g_truth), ( + "weightless original-PSF fit did not collapse -- the #749 contrast is" + " gone, so this test no longer proves the weight is load-bearing" + ) + + +def test_psf_weight_leaves_metacal_shear_invariant(): + """The fix must not move the calibration: metacal shear flat in PSF_NOISE. + + Metacal fits the same psf_obs with a prior-free AdmomFitter (then dilates + to a round reconvolution kernel); a prior-free moment fit is insensitive to + a flat weight's absolute scale, so every metacal-type shear is invariant to + the diagnostic weight. Asserted bit-for-bit between the unit weight + (pre-fix) and the shipped PSF_NOISE. + """ + _, _, res_w = _psf_orig_via_metacal(PSF_NOISE) + _, _, res_u = _psf_orig_via_metacal(1.0) + for mtype in ("noshear", "1p", "1m", "2p", "2m"): + npt.assert_allclose( + res_w[mtype]["g"], res_u[mtype]["g"], rtol=0, atol=1e-6, + err_msg=f"PSF-obs weight moved the metacal {mtype} shear", + ) From e036a95aaca5e393002b359c7258dbebe1e2054c Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Wed, 1 Jul 2026 18:33:33 +0200 Subject: [PATCH 28/33] refactor(make_cat): remove the dead galsim shape serialization path ShapePipe has no galsim shape-measurement runner -- only ngmix_runner measures shapes and writes NGMIX_* columns. The galsim family in make_cat (_save_galsim_shapes, the process() "galsim" branch) could only be populated by reading an external shape catalogue that nothing in the pipeline produces; every production config pins SHAPE_MEASUREMENT_TYPE = ngmix. #761 renamed these columns onto the v2 grammar, but they are not creatable end-to-end -- confirmed from the sp_validation side, where the mirror galsim consumer was removed as dead code (sp_validation #201). Remove the dormant writer rather than carry grammar-renamed columns no pipeline can exercise. - delete SaveCatalogue._save_galsim_shapes and its process() "galsim" dispatch branch; process modes are now "ngmix" or "psf" - drop the now-orphaned cs_util.size import (only the galsim block used it) - delete tests/module/test_galsim_grammar_properties.py, which drove the removed writer Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_0191bMtgT2gcmcT7fNYbGLS1 --- .../modules/make_cat_package/make_cat.py | 142 +--------- .../module/test_galsim_grammar_properties.py | 251 ------------------ 2 files changed, 3 insertions(+), 390 deletions(-) delete mode 100644 tests/module/test_galsim_grammar_properties.py diff --git a/src/shapepipe/modules/make_cat_package/make_cat.py b/src/shapepipe/modules/make_cat_package/make_cat.py index e28ac643..ff8dd455 100644 --- a/src/shapepipe/modules/make_cat_package/make_cat.py +++ b/src/shapepipe/modules/make_cat_package/make_cat.py @@ -13,7 +13,6 @@ from astropy import coordinates as coords from astropy import units as u from astropy.wcs import WCS -from cs_util import size as cs_size from sqlitedict import SqliteDict from shapepipe.pipeline import file_io @@ -172,7 +171,7 @@ def process( Parameters ---------- mode : str - Run mode, options are ``ngmix``, ``galsim`` or ``psf`` + Run mode, options are ``ngmix`` or ``psf`` cat_path : str Path to input catalogue moments : bool @@ -192,15 +191,13 @@ def process( err_msg = None if mode == "ngmix": err_msg = self._save_ngmix_data(cat_path, moments) - elif mode == "galsim": - self._save_galsim_shapes(cat_path) elif mode == "psf": self._save_psf_data(cat_path) else: err_msg = ( f"Invalid process mode ({mode}) for " - + '``make_cat.Savecatalogue``. Options are "ngmix", ' - + '"galsim" or "psf".' + + '``make_cat.Savecatalogue``. Options are "ngmix" ' + + 'or "psf".' ) if err_msg is None: @@ -472,139 +469,6 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): return None - def _save_galsim_shapes(self, galsim_cat_path): - """Save GalSim Shapes. - - Save the GalSim catalogue into the final one. - - Parameters - ---------- - galsim_cat_path : str - Path to GalSim catalogue to save - - """ - galsim_cat_file = file_io.FITSCatalogue(galsim_cat_path) - galsim_cat_file.open() - - self._key_ends = galsim_cat_file.get_ext_name()[1:] - - galsim_id = galsim_cat_file.get_data()["id"] - - for key_str in ( - "GALSIM_T_", - "GALSIM_T_PSF_", - "GALSIM_FLUX_", - "GALSIM_MAG_", - ): - self._update_dict(key_str, np.zeros(len(self._obj_id))) - for key_str in ("GALSIM_FLUX_ERR_", "GALSIM_MAG_ERR_", "GALSIM_RES_"): - self._update_dict(key_str, np.ones(len(self._obj_id)) * -1) - for key_str in ( - "GALSIM_G1_", - "GALSIM_G2_", - "GALSIM_G1_ERR_", - "GALSIM_G2_ERR_", - "GALSIM_G1_UNCORR_", - "GALSIM_G2_UNCORR_", - "GALSIM_G1_PSF_", - "GALSIM_G2_PSF_", - ): - self._update_dict(key_str, np.ones(len(self._obj_id)) * -10.0) - self._update_dict( - "GALSIM_FLAGS_", - np.ones(len(self._obj_id), dtype="int16"), - ) - - for idx, id_tmp in enumerate(self._obj_id): - ind = np.where(id_tmp == galsim_id)[0] - if len(ind) > 0: - - for key in self._key_ends: - - gcf_data = galsim_cat_file.get_data(key) - - if key == "ORIGINAL_PSF": - - # PSF columns sourced from the galaxy uncorr fields - # for this special extension (asymmetry preserved). - self._add2dict( - f"GALSIM_G1_PSF_{key}", - gcf_data["gal_uncorr_g1"][ind[0]], idx - ) - self._add2dict( - f"GALSIM_G2_PSF_{key}", - gcf_data["gal_uncorr_g2"][ind[0]], idx - ) - self._add2dict( - f"GALSIM_T_PSF_{key}", - cs_size.sigma_to_T(gcf_data["gal_sigma"][ind[0]]), - idx - ) - - else: - - self._add2dict( - f"GALSIM_G1_{key}", gcf_data["gal_g1"][ind[0]], idx - ) - self._add2dict( - f"GALSIM_G2_{key}", gcf_data["gal_g2"][ind[0]], idx - ) - self._add2dict( - f"GALSIM_G1_ERR_{key}", - gcf_data["gal_g1_err"][ind[0]], idx - ) - self._add2dict( - f"GALSIM_G2_ERR_{key}", - gcf_data["gal_g2_err"][ind[0]], idx - ) - - self._add2dict( - f"GALSIM_G1_UNCORR_{key}", - gcf_data["gal_uncorr_g1"][ind[0]], idx - ) - self._add2dict( - f"GALSIM_G2_UNCORR_{key}", - gcf_data["gal_uncorr_g2"][ind[0]], idx - ) - - self._add2dict( - f"GALSIM_T_{key}", - cs_size.sigma_to_T(gcf_data["gal_sigma"][ind[0]]), - idx - ) - - self._add2dict( - f"GALSIM_G1_PSF_{key}", - gcf_data["psf_g1"][ind[0]], idx - ) - self._add2dict( - f"GALSIM_G2_PSF_{key}", - gcf_data["psf_g2"][ind[0]], idx - ) - self._add2dict( - f"GALSIM_T_PSF_{key}", - cs_size.sigma_to_T(gcf_data["psf_sigma"][ind[0]]), - idx - ) - - flux = gcf_data["gal_flux"][ind[0]] - flux_err = gcf_data["gal_flux_err"][ind[0]] - self._add2dict(f"GALSIM_FLUX_{key}", flux, idx) - self._add2dict(f"GALSIM_FLUX_ERR_{key}", flux_err, idx) - - mag = gcf_data["gal_mag"][ind[0]] - mag_err = gcf_data["gal_mag_err"][ind[0]] - self._add2dict(f"GALSIM_MAG_{key}", mag, idx) - self._add2dict(f"GALSIM_MAG_ERR_{key}", mag_err, idx) - - flags = gcf_data["gal_flag"][ind[0]] - self._add2dict(f"GALSIM_FLAGS_{key}", flags, idx) - - res = gcf_data["gal_resolution"][ind[0]] - self._add2dict(f"GALSIM_RES_{key}", res, idx) - - galsim_cat_file.close() - def _save_psf_data(self, galaxy_psf_path): """Save PSF data. diff --git a/tests/module/test_galsim_grammar_properties.py b/tests/module/test_galsim_grammar_properties.py deleted file mode 100644 index 28346eaf..00000000 --- a/tests/module/test_galsim_grammar_properties.py +++ /dev/null @@ -1,251 +0,0 @@ -"""PROPERTY-BASED TESTS FOR THE GALSIM GALAXY/PSF COLUMN GRAMMAR. - -Drives ``SaveCatalogue._save_galsim_shapes`` (the make_cat writer) to pin the -renamed galsim grammar (shapepipe#761): the packed length-2 -``GALSIM_GAL_ELL_*`` / ``GALSIM_PSF_ELL_*`` columns are split into scalar -``GALSIM_G1_*`` / ``GALSIM_G2_*`` (g-type — galsim is correctly g-type, no -e/g value fix needed here), the galaxy carries no ``GAL`` token (implicit -default object, same rule as ngmix), and the raw ``GALSIM_GAL_SIGMA_*`` / -``GALSIM_PSF_SIGMA_*`` columns are replaced by the single stored size -``GALSIM_T_*`` (``T = 2 sigma^2``, via ``cs_util.size.sigma_to_T``). - -The ``ORIGINAL_PSF`` extension is a deliberate asymmetry, preserved rather -than normalized away: its PSF columns are sourced from the *galaxy's* -uncorrected fields (``gal_uncorr_g1/g2``, ``gal_sigma``), not the -per-extension ``psf_g1/g2``/``psf_sigma`` fields every other extension uses. -""" - -import re -import tempfile -from pathlib import Path - -import numpy as np -import numpy.testing as npt -from astropy.io import fits -from cs_util import size as cs_size -from hypothesis import given, settings -from hypothesis import strategies as st - -from shapepipe.modules.make_cat_package.make_cat import SaveCatalogue - -# --------------------------------------------------------------------------- -# Harness -# --------------------------------------------------------------------------- - -# Source columns _save_galsim_shapes reads per extension. The ORIGINAL_PSF -# extension only consumes the gal_uncorr_g1/g2 + gal_sigma subset, but the -# real galsim catalogue carries the full set on every extension, so the -# fixture mirrors that rather than special-casing the row shape. -GALSIM_KEYS = [ - "id", - "gal_g1", "gal_g2", "gal_g1_err", "gal_g2_err", - "gal_uncorr_g1", "gal_uncorr_g2", "gal_sigma", - "psf_g1", "psf_g2", "psf_sigma", - "gal_flux", "gal_flux_err", "gal_mag", "gal_mag_err", - "gal_flag", "gal_resolution", -] -INT_KEYS = {"id", "gal_flag"} - -# Per-key multiplier so a single scalar ``seed`` fans out into twelve -# distinct values — a swapped source (e.g. psf_g1 <-> gal_uncorr_g1) breaks -# at least one equality below. -_MULT = { - "gal_g1": 1.0, "gal_g2": 1.3, "gal_g1_err": 1.7, "gal_g2_err": 2.1, - "gal_uncorr_g1": 2.6, "gal_uncorr_g2": 3.1, "gal_sigma": 0.4, - "psf_g1": 0.6, "psf_g2": 0.8, "psf_sigma": 0.5, - "gal_flux": 10.0, "gal_flux_err": 1.0, "gal_mag": 5.0, - "gal_mag_err": 0.2, -} - - -def _galsim_row(obj_id, seed): - """One object's per-key values for one extension, scaled by ``seed``.""" - row = {"id": obj_id, "gal_flag": 0, "gal_resolution": 0.5} - row.update({key: mult * seed for key, mult in _MULT.items()}) - return row - - -def _write_galsim_cat(path, obj_ids, seed_by_ext): - """Write a synthetic galsim FITS, one extension per ``seed_by_ext`` key. - - Extension order is preserved (dict order), matching how - ``get_ext_name()[1:]`` drives ``self._key_ends`` in production. - """ - hdus = [fits.PrimaryHDU()] - for ext, seed in seed_by_ext.items(): - rows = [_galsim_row(oid, seed) for oid in obj_ids] - cols = [ - fits.Column( - name=key, - format="K" if key in INT_KEYS else "D", - array=np.array([row[key] for row in rows]), - ) - for key in GALSIM_KEYS - ] - hdus.append(fits.BinTableHDU.from_columns(cols, name=ext)) - fits.HDUList(hdus).writeto(path, overwrite=True) - - -def _run_save_galsim(galsim_path, obj_ids): - """Drive ``_save_galsim_shapes`` and return its populated output dict.""" - inst = object.__new__(SaveCatalogue) - inst._obj_id = np.asarray(obj_ids) - inst._output_dict = {} - inst._save_galsim_shapes(str(galsim_path)) - return inst._output_dict - - -# --------------------------------------------------------------------------- -# Strategies -# --------------------------------------------------------------------------- - -obj_ids_strategy = st.lists( - st.integers(min_value=1, max_value=10_000), min_size=1, max_size=5, - unique=True, -) -# A finite seed bounded away from zero, so every per-key value it scales is -# itself non-zero and the family-vs-family / key-vs-key comparisons below are -# never vacuously satisfied by an accidental 0 == 0. -_seed = st.floats( - min_value=0.05, max_value=5.0, allow_nan=False, allow_infinity=False, -) - - -# --------------------------------------------------------------------------- -# (a) Scalar split + size routing, regular extension -# --------------------------------------------------------------------------- - -@settings(deadline=None, max_examples=25) -@given(obj_ids=obj_ids_strategy, seed=_seed) -def test_regular_extension_scalar_split_and_size(obj_ids, seed): - """A non-``ORIGINAL_PSF`` extension routes each source to its own column. - - ``GALSIM_G1/G2_`` from ``gal_g1/g2``, the ``_ERR``/``_UNCORR`` - variants from their namesake sources, ``GALSIM_T_`` from - ``sigma_to_T(gal_sigma)``, and the PSF family - (``GALSIM_G1/G2_PSF_``, ``GALSIM_T_PSF_``) from ``psf_g1/g2`` / - ``sigma_to_T(psf_sigma)`` — never from the galaxy's own fields. - """ - ext = "NOSHEAR" - path = tempfile.NamedTemporaryFile(suffix=".fits", delete=False).name - _write_galsim_cat(path, obj_ids, {ext: seed}) - out = _run_save_galsim(path, obj_ids) - Path(path).unlink() - - n = len(obj_ids) - row = _galsim_row(obj_ids[0], seed) - npt.assert_allclose(out[f"GALSIM_G1_{ext}"], [row["gal_g1"]] * n) - npt.assert_allclose(out[f"GALSIM_G2_{ext}"], [row["gal_g2"]] * n) - npt.assert_allclose(out[f"GALSIM_G1_ERR_{ext}"], [row["gal_g1_err"]] * n) - npt.assert_allclose(out[f"GALSIM_G2_ERR_{ext}"], [row["gal_g2_err"]] * n) - npt.assert_allclose( - out[f"GALSIM_G1_UNCORR_{ext}"], [row["gal_uncorr_g1"]] * n - ) - npt.assert_allclose( - out[f"GALSIM_G2_UNCORR_{ext}"], [row["gal_uncorr_g2"]] * n - ) - npt.assert_allclose( - out[f"GALSIM_T_{ext}"], [cs_size.sigma_to_T(row["gal_sigma"])] * n - ) - npt.assert_allclose(out[f"GALSIM_G1_PSF_{ext}"], [row["psf_g1"]] * n) - npt.assert_allclose(out[f"GALSIM_G2_PSF_{ext}"], [row["psf_g2"]] * n) - npt.assert_allclose( - out[f"GALSIM_T_PSF_{ext}"], [cs_size.sigma_to_T(row["psf_sigma"])] * n - ) - # The galaxy and PSF families must stay distinct (un-aliased), and the - # raw-sigma value must never leak through unconverted. - assert not np.allclose(out[f"GALSIM_T_{ext}"], row["gal_sigma"]) - assert not np.allclose(out[f"GALSIM_G1_{ext}"], out[f"GALSIM_G1_PSF_{ext}"]) - - npt.assert_allclose(out[f"GALSIM_FLUX_{ext}"], [row["gal_flux"]] * n) - npt.assert_allclose(out[f"GALSIM_FLUX_ERR_{ext}"], [row["gal_flux_err"]] * n) - npt.assert_allclose(out[f"GALSIM_MAG_{ext}"], [row["gal_mag"]] * n) - npt.assert_allclose(out[f"GALSIM_MAG_ERR_{ext}"], [row["gal_mag_err"]] * n) - npt.assert_allclose(out[f"GALSIM_RES_{ext}"], [row["gal_resolution"]] * n) - - -# --------------------------------------------------------------------------- -# (b) ORIGINAL_PSF asymmetry: sourced from the galaxy's uncorrected fields -# --------------------------------------------------------------------------- - -@settings(deadline=None, max_examples=25) -@given(obj_ids=obj_ids_strategy, seed_main=_seed, seed_origpsf=_seed) -def test_original_psf_extension_sourced_from_gal_uncorr( - obj_ids, seed_main, seed_origpsf -): - """``ORIGINAL_PSF`` columns trace to ``gal_uncorr_*``/``gal_sigma``, NOT - ``psf_*`` — the one preserved asymmetry in the galsim grammar. - - The two extensions are written with independent seeds, and the - ``ORIGINAL_PSF`` row's own ``psf_g1/g2``/``psf_sigma`` are deliberately - distinct from its ``gal_uncorr_*``/``gal_sigma`` (the seed multipliers - differ), so a regression that wired the regular per-extension ``psf_*`` - sourcing into ``ORIGINAL_PSF`` would be caught, not masked by equal - values. - """ - seed_by_ext = {"NOSHEAR": seed_main, "ORIGINAL_PSF": seed_origpsf} - path = tempfile.NamedTemporaryFile(suffix=".fits", delete=False).name - _write_galsim_cat(path, obj_ids, seed_by_ext) - out = _run_save_galsim(path, obj_ids) - Path(path).unlink() - - n = len(obj_ids) - op_row = _galsim_row(obj_ids[0], seed_origpsf) - npt.assert_allclose( - out["GALSIM_G1_PSF_ORIGINAL_PSF"], [op_row["gal_uncorr_g1"]] * n - ) - npt.assert_allclose( - out["GALSIM_G2_PSF_ORIGINAL_PSF"], [op_row["gal_uncorr_g2"]] * n - ) - npt.assert_allclose( - out["GALSIM_T_PSF_ORIGINAL_PSF"], - [cs_size.sigma_to_T(op_row["gal_sigma"])] * n, - ) - # Not sourced from this extension's own psf_g1/g2/psf_sigma (those are - # scaled by the same seed but through different multipliers, so equality - # here would mean the asymmetry was lost). - assert not np.allclose( - out["GALSIM_G1_PSF_ORIGINAL_PSF"], [op_row["psf_g1"]] * n - ) - assert not np.allclose( - out["GALSIM_T_PSF_ORIGINAL_PSF"], - [cs_size.sigma_to_T(op_row["psf_sigma"])] * n, - ) - - -# --------------------------------------------------------------------------- -# (c) Every emitted column matches the new grammar; no legacy tokens survive -# --------------------------------------------------------------------------- - -@settings(deadline=None, max_examples=25) -@given(obj_ids=obj_ids_strategy, seed=_seed) -def test_emitted_columns_match_grammar_no_legacy_tokens(obj_ids, seed): - """Every ``GALSIM_*`` column matches the scalar grammar; nothing packed. - - No ``GALSIM_GAL_ELL*``/``GALSIM_PSF_ELL*`` (packed length-2 ellipticity) - and no ``GALSIM_*_SIGMA*`` (raw size) survives — both replaced by scalar - ``G1``/``G2`` components and the single stored size ``T``. The galaxy - also carries no ``GAL`` token (implicit default object, same rule as - ngmix). - """ - ext_names = ["NOSHEAR", "ORIGINAL_PSF"] - seed_by_ext = {ext: seed * (i + 1) for i, ext in enumerate(ext_names)} - path = tempfile.NamedTemporaryFile(suffix=".fits", delete=False).name - _write_galsim_cat(path, obj_ids, seed_by_ext) - out = _run_save_galsim(path, obj_ids) - Path(path).unlink() - - assert out, "writer produced no columns" - - _component = "G1|G2|T" - _qual = "(?:_ERR|_UNCORR)?" - _ext = "|".join(re.escape(e) for e in ext_names) - grammar_re = re.compile( - rf"^GALSIM_(?:{_component}){_qual}(?:_PSF)?_(?:{_ext})$" - rf"|^GALSIM_(?:FLUX|FLUX_ERR|MAG|MAG_ERR|FLAGS|RES)_(?:{_ext})$" - ) - for col in out: - assert grammar_re.match(col), f"off-grammar column emitted: {col!r}" - assert "_ELL" not in col, col - assert "SIGMA" not in col, col - assert "_GAL_" not in col and not col.endswith("_GAL"), col From df84064f1bf569f234e44c803dc8fe4006c59935 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Wed, 1 Jul 2026 18:33:48 +0200 Subject: [PATCH 29/33] refactor(make_cat): restrict SHAPE_MEASUREMENT_TYPE to ngmix ngmix is now the pipeline's only shape estimator, so make_cat_runner validates SHAPE_MEASUREMENT_TYPE against ngmix alone and raises on anything else. The knob is kept (rather than hardcoded) as the extension point for a future estimator family, mirroring sp_validation's retained `prefix`. The shape-write loop drops the galsim-vs-ngmix catalogue selection -- shapes are always read from the single ngmix input. Docstring and the cat_matched.param comment lose their galsim references. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_0191bMtgT2gcmcT7fNYbGLS1 --- example/unions_800/cat_matched.param | 2 +- src/shapepipe/modules/make_cat_package/__init__.py | 2 +- src/shapepipe/modules/make_cat_runner.py | 14 +++----------- 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/example/unions_800/cat_matched.param b/example/unions_800/cat_matched.param index f387be00..bfe7c160 100644 --- a/example/unions_800/cat_matched.param +++ b/example/unions_800/cat_matched.param @@ -87,7 +87,7 @@ FLAG_TILING # magnitude, mainly for plots MAG_AUTO -# SNR from SExtractor, used for cuts on GALSIM shapes +# SNR from SExtractor, used for shape cuts SNR_WIN # PSF size measured on original image diff --git a/src/shapepipe/modules/make_cat_package/__init__.py b/src/shapepipe/modules/make_cat_package/__init__.py index 84e6c97a..22b48d81 100644 --- a/src/shapepipe/modules/make_cat_package/__init__.py +++ b/src/shapepipe/modules/make_cat_package/__init__.py @@ -27,7 +27,7 @@ =================================== SHAPE_MEASUREMENT_TYPE : list - Shape measurement method, valid options are ``ngmix`` and/or ``galsim`` + Shape measurement method; the only supported option is ``ngmix`` SAVE_PSF_DATA : bool, optional Save PSF information if ``True``; default value is ``False`` TILE_LIST : str, optional diff --git a/src/shapepipe/modules/make_cat_runner.py b/src/shapepipe/modules/make_cat_runner.py index 6ab6449d..7bf293cd 100644 --- a/src/shapepipe/modules/make_cat_runner.py +++ b/src/shapepipe/modules/make_cat_runner.py @@ -49,10 +49,8 @@ def make_cat_runner( "SHAPE_MEASUREMENT_TYPE", ) for shape_type in shape_type_list: - if shape_type.lower() not in ["ngmix", "galsim"]: - raise ValueError( - "SHAPE_MEASUREMENT_TYPE must be in [ngmix, galsim]" - ) + if shape_type.lower() != "ngmix": + raise ValueError("SHAPE_MEASUREMENT_TYPE must be ngmix") # Fetch PSF data option if config.has_option(module_config_sec, "SAVE_PSF_DATA"): @@ -76,13 +74,7 @@ def make_cat_runner( w_log.info("Save shape measurement data") for shape_type in shape_type_list: w_log.info(f"Save {shape_type.lower()} data") - # A second, galsim-specific shape catalogue path (shape2_cat_path) - # was never produced by any runner in this pipeline (no - # galsim_shapes_runner exists) — galsim shapes are read from the - # same input as ngmix (shape1_cat_path). - cat_path = shape1_cat_path - err_msg = sc_inst.process(shape_type.lower(), cat_path) - + err_msg = sc_inst.process(shape_type.lower(), shape1_cat_path) # If error message: delete (incomplete) output file and raise error if err_msg is not None: From 1c69af55fca03d253982eb796f23d35856e123f5 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Wed, 1 Jul 2026 18:33:58 +0200 Subject: [PATCH 30/33] test: drop galsim from the frozen column-grammar examples The static grammar test covered ngmix, galsim, and HSM. With the galsim writer gone, drop its regex branch and its valid/invalid example columns; the frozen grammar now spans the two families ShapePipe actually produces (ngmix, HSM). Also drop the docstring pointer to the deleted test_galsim_grammar_properties.py. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_0191bMtgT2gcmcT7fNYbGLS1 --- tests/module/test_psf_grammar_properties.py | 46 +++++++-------------- 1 file changed, 16 insertions(+), 30 deletions(-) diff --git a/tests/module/test_psf_grammar_properties.py b/tests/module/test_psf_grammar_properties.py index 09eaf181..593614a1 100644 --- a/tests/module/test_psf_grammar_properties.py +++ b/tests/module/test_psf_grammar_properties.py @@ -21,14 +21,13 @@ (``example/cfis/final_cat.param``) names is a column the writer can produce — writer/param-file consistency; (d) the FULL frozen grammar (shapepipe#761) — ``ESTIMATOR_COMPONENT[_ERR]_ - OBJECT[_metacaltype]`` — holds across all three estimator families: ngmix - and galsim split ellipticity into ``G1``/``G2`` (g-type), HSM into - ``E1``/``E2`` (e-type); every family stores exactly one size, ``T``; HSM - keeps the singular ``FLAG`` token, ngmix/galsim keep plural ``FLAGS``. - This part is a static example-based check (not writer-driven), so it - covers galsim/HSM without depending on those producer modules — see - ``test_galsim_grammar_properties.py`` and ``test_psfex_interp.py`` for - the writer-driven coverage of those families. + OBJECT[_metacaltype]`` — holds across both estimator families: ngmix + splits ellipticity into ``G1``/``G2`` (g-type), HSM into ``E1``/``E2`` + (e-type); every family stores exactly one size, ``T``; HSM keeps the + singular ``FLAG`` token, ngmix keeps plural ``FLAGS``. This part is a + static example-based check (not writer-driven), so it covers HSM without + depending on its producer module — see ``test_psfex_interp.py`` for the + writer-driven coverage of that family. Setup for (a)-(c) mirrors ``tests/module/test_make_cat.py``: a synthetic ngmix FITS with the five shear-type extensions carrying the exact @@ -390,23 +389,22 @@ def test_param_file_ngmix_tokens_are_producible(param_path, obj_ids): # --------------------------------------------------------------------------- -# (d) The FULL frozen grammar, across all three estimator families +# (d) The FULL frozen grammar, across both estimator families # --------------------------------------------------------------------------- # # ``ESTIMATOR_COMPONENT[_ERR]_OBJECT[_metacaltype]`` (shapepipe#761). This # section encodes the grammar itself as a static property — example column -# names, not a driven writer — so it covers ngmix, galsim, and HSM together -# without depending on the galsim/HSM producer modules (covered by their own -# writer-driving tests: ``test_galsim_grammar_properties.py``, -# ``test_psfex_interp.py``). What it pins: +# names, not a driven writer — so it covers ngmix and HSM together without +# depending on the HSM producer module (covered by its own writer-driving +# test, ``test_psfex_interp.py``). What it pins: # -# - galaxy is the implicit default object, no token, for BOTH g-type writers -# (ngmix, galsim); +# - galaxy is the implicit default object, no token, for the g-type ngmix +# writer; # - ellipticity is split into named scalar components, never a packed -# 2-vector: ``G1``/``G2`` for g-type (ngmix, galsim), ``E1``/``E2`` for -# e-type (HSM); +# 2-vector: ``G1``/``G2`` for g-type (ngmix), ``E1``/``E2`` for e-type +# (HSM); # - exactly one stored size, ``T`` (no ``SIGMA``/``FWHM``/``r50``); -# - HSM keeps the singular ``FLAG`` token; ngmix/galsim keep plural ``FLAGS``. +# - HSM keeps the singular ``FLAG`` token; ngmix keeps plural ``FLAGS``. FROZEN_GRAMMAR_RE = re.compile( r"^(?:" @@ -414,10 +412,6 @@ def test_param_file_ngmix_tokens_are_producible(param_path, obj_ids): r"NGMIXm?_(?:G1|G2|T|SNR|FLUX|MAG|FLAGS)(?:_ERR)?" r"(?:_(?:PSF_ORIG|PSF_RECONV))?_(?:NOSHEAR|1P|1M|2P|2M)" r"|NGMIXm?_(?:MCAL_FLAGS|MCAL_TYPES_FAIL|N_EPOCH)" - # galsim: galaxy (no object token) or explicit PSF object; is a - # data-driven FITS extension name, so left as a generic uppercase token. - r"|GALSIM_(?:G1|G2|T)(?:_ERR|_UNCORR)?(?:_PSF)?_[A-Z0-9_]+" - r"|GALSIM_(?:FLUX|FLUX_ERR|MAG|MAG_ERR|FLAGS|RES)_[A-Z0-9_]+" # HSM: e-type, explicit PSF/STAR object, singular FLAG; the multi-epoch # sink in make_cat._save_psf_data appends a bare epoch index. r"|HSM_(?:E1|E2|T)_(?:PSF|STAR)(?:_\d+)?" @@ -434,10 +428,6 @@ def test_param_file_ngmix_tokens_are_producible(param_path, obj_ids): "NGMIX_FLAGS_2P", "NGMIXm_G1_PSF_RECONV_NOSHEAR", "NGMIX_MCAL_FLAGS", - "GALSIM_G1_NOSHEAR", # galaxy, no object token - "GALSIM_G2_UNCORR_NOSHEAR", - "GALSIM_T_PSF_ORIGINAL_PSF", - "GALSIM_FLAGS_NOSHEAR", "HSM_E1_PSF", "HSM_T_STAR", "HSM_FLAG_PSF", @@ -455,10 +445,6 @@ def test_param_file_ngmix_tokens_are_producible(param_path, obj_ids): "SIGMA_PSF_HSM", # raw sigma, not T "HSM_SIGMA_PSF", # stored sigma instead of T "HSM_FLAGS_PSF", # plural — HSM is singular FLAG - "GALSIM_GAL_ELL_NOSHEAR", # GAL token + packed ellipticity - "GALSIM_GAL_SIGMA_NOSHEAR", # GAL token + raw sigma - "GALSIM_PSF_ELL_NOSHEAR", # packed ellipticity - "GALSIM_FWHM_NOSHEAR", # FWHM, not T "SPREAD_MODEL", # removed entirely, not renamed ] From de138b6083e72798376ae07be2ec05dbc43139de Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Fri, 3 Jul 2026 18:51:05 +0200 Subject: [PATCH 31/33] =?UTF-8?q?Revert=20"remove=20the=20galsim=20dead=20?= =?UTF-8?q?shape=20path"=20=E2=80=94=20carve=20out=20of=20#761?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restores the galsim shape-serialization path that commits e036a95a, df84064f and 1c69af55 removed: - _save_galsim_shapes() writer in make_cat.py (new-grammar form) - GALSIM in SHAPE_MEASUREMENT_TYPE (make_cat_runner, __init__) - test_galsim_grammar_properties.py and the galsim frozen-grammar case Per Axel Guinot's 2026-07-02 review of #761: the galsim dead-path removal is unrelated to the reconvolved-PSF / column-grammar purpose of this PR ("too many things at once"). The removal is good work and stands on its own — it moves to a dedicated PR against ngmix_v2.0 rather than riding here. This revert keeps #761 scoped to the PSF column grammar. This reverts the net effect of e036a95a, df84064f and 1c69af55. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01GQSyaXLnLZtksntYAYD41E --- example/unions_800/cat_matched.param | 2 +- .../modules/make_cat_package/__init__.py | 2 +- .../modules/make_cat_package/make_cat.py | 142 +++++++++- src/shapepipe/modules/make_cat_runner.py | 14 +- .../module/test_galsim_grammar_properties.py | 251 ++++++++++++++++++ tests/module/test_psf_grammar_properties.py | 46 ++-- 6 files changed, 433 insertions(+), 24 deletions(-) create mode 100644 tests/module/test_galsim_grammar_properties.py diff --git a/example/unions_800/cat_matched.param b/example/unions_800/cat_matched.param index bfe7c160..f387be00 100644 --- a/example/unions_800/cat_matched.param +++ b/example/unions_800/cat_matched.param @@ -87,7 +87,7 @@ FLAG_TILING # magnitude, mainly for plots MAG_AUTO -# SNR from SExtractor, used for shape cuts +# SNR from SExtractor, used for cuts on GALSIM shapes SNR_WIN # PSF size measured on original image diff --git a/src/shapepipe/modules/make_cat_package/__init__.py b/src/shapepipe/modules/make_cat_package/__init__.py index 22b48d81..84e6c97a 100644 --- a/src/shapepipe/modules/make_cat_package/__init__.py +++ b/src/shapepipe/modules/make_cat_package/__init__.py @@ -27,7 +27,7 @@ =================================== SHAPE_MEASUREMENT_TYPE : list - Shape measurement method; the only supported option is ``ngmix`` + Shape measurement method, valid options are ``ngmix`` and/or ``galsim`` SAVE_PSF_DATA : bool, optional Save PSF information if ``True``; default value is ``False`` TILE_LIST : str, optional diff --git a/src/shapepipe/modules/make_cat_package/make_cat.py b/src/shapepipe/modules/make_cat_package/make_cat.py index ff8dd455..e28ac643 100644 --- a/src/shapepipe/modules/make_cat_package/make_cat.py +++ b/src/shapepipe/modules/make_cat_package/make_cat.py @@ -13,6 +13,7 @@ from astropy import coordinates as coords from astropy import units as u from astropy.wcs import WCS +from cs_util import size as cs_size from sqlitedict import SqliteDict from shapepipe.pipeline import file_io @@ -171,7 +172,7 @@ def process( Parameters ---------- mode : str - Run mode, options are ``ngmix`` or ``psf`` + Run mode, options are ``ngmix``, ``galsim`` or ``psf`` cat_path : str Path to input catalogue moments : bool @@ -191,13 +192,15 @@ def process( err_msg = None if mode == "ngmix": err_msg = self._save_ngmix_data(cat_path, moments) + elif mode == "galsim": + self._save_galsim_shapes(cat_path) elif mode == "psf": self._save_psf_data(cat_path) else: err_msg = ( f"Invalid process mode ({mode}) for " - + '``make_cat.Savecatalogue``. Options are "ngmix" ' - + 'or "psf".' + + '``make_cat.Savecatalogue``. Options are "ngmix", ' + + '"galsim" or "psf".' ) if err_msg is None: @@ -469,6 +472,139 @@ def _save_ngmix_data(self, ngmix_cat_path, moments=False): return None + def _save_galsim_shapes(self, galsim_cat_path): + """Save GalSim Shapes. + + Save the GalSim catalogue into the final one. + + Parameters + ---------- + galsim_cat_path : str + Path to GalSim catalogue to save + + """ + galsim_cat_file = file_io.FITSCatalogue(galsim_cat_path) + galsim_cat_file.open() + + self._key_ends = galsim_cat_file.get_ext_name()[1:] + + galsim_id = galsim_cat_file.get_data()["id"] + + for key_str in ( + "GALSIM_T_", + "GALSIM_T_PSF_", + "GALSIM_FLUX_", + "GALSIM_MAG_", + ): + self._update_dict(key_str, np.zeros(len(self._obj_id))) + for key_str in ("GALSIM_FLUX_ERR_", "GALSIM_MAG_ERR_", "GALSIM_RES_"): + self._update_dict(key_str, np.ones(len(self._obj_id)) * -1) + for key_str in ( + "GALSIM_G1_", + "GALSIM_G2_", + "GALSIM_G1_ERR_", + "GALSIM_G2_ERR_", + "GALSIM_G1_UNCORR_", + "GALSIM_G2_UNCORR_", + "GALSIM_G1_PSF_", + "GALSIM_G2_PSF_", + ): + self._update_dict(key_str, np.ones(len(self._obj_id)) * -10.0) + self._update_dict( + "GALSIM_FLAGS_", + np.ones(len(self._obj_id), dtype="int16"), + ) + + for idx, id_tmp in enumerate(self._obj_id): + ind = np.where(id_tmp == galsim_id)[0] + if len(ind) > 0: + + for key in self._key_ends: + + gcf_data = galsim_cat_file.get_data(key) + + if key == "ORIGINAL_PSF": + + # PSF columns sourced from the galaxy uncorr fields + # for this special extension (asymmetry preserved). + self._add2dict( + f"GALSIM_G1_PSF_{key}", + gcf_data["gal_uncorr_g1"][ind[0]], idx + ) + self._add2dict( + f"GALSIM_G2_PSF_{key}", + gcf_data["gal_uncorr_g2"][ind[0]], idx + ) + self._add2dict( + f"GALSIM_T_PSF_{key}", + cs_size.sigma_to_T(gcf_data["gal_sigma"][ind[0]]), + idx + ) + + else: + + self._add2dict( + f"GALSIM_G1_{key}", gcf_data["gal_g1"][ind[0]], idx + ) + self._add2dict( + f"GALSIM_G2_{key}", gcf_data["gal_g2"][ind[0]], idx + ) + self._add2dict( + f"GALSIM_G1_ERR_{key}", + gcf_data["gal_g1_err"][ind[0]], idx + ) + self._add2dict( + f"GALSIM_G2_ERR_{key}", + gcf_data["gal_g2_err"][ind[0]], idx + ) + + self._add2dict( + f"GALSIM_G1_UNCORR_{key}", + gcf_data["gal_uncorr_g1"][ind[0]], idx + ) + self._add2dict( + f"GALSIM_G2_UNCORR_{key}", + gcf_data["gal_uncorr_g2"][ind[0]], idx + ) + + self._add2dict( + f"GALSIM_T_{key}", + cs_size.sigma_to_T(gcf_data["gal_sigma"][ind[0]]), + idx + ) + + self._add2dict( + f"GALSIM_G1_PSF_{key}", + gcf_data["psf_g1"][ind[0]], idx + ) + self._add2dict( + f"GALSIM_G2_PSF_{key}", + gcf_data["psf_g2"][ind[0]], idx + ) + self._add2dict( + f"GALSIM_T_PSF_{key}", + cs_size.sigma_to_T(gcf_data["psf_sigma"][ind[0]]), + idx + ) + + flux = gcf_data["gal_flux"][ind[0]] + flux_err = gcf_data["gal_flux_err"][ind[0]] + self._add2dict(f"GALSIM_FLUX_{key}", flux, idx) + self._add2dict(f"GALSIM_FLUX_ERR_{key}", flux_err, idx) + + mag = gcf_data["gal_mag"][ind[0]] + mag_err = gcf_data["gal_mag_err"][ind[0]] + self._add2dict(f"GALSIM_MAG_{key}", mag, idx) + self._add2dict(f"GALSIM_MAG_ERR_{key}", mag_err, idx) + + flags = gcf_data["gal_flag"][ind[0]] + self._add2dict(f"GALSIM_FLAGS_{key}", flags, idx) + + res = gcf_data["gal_resolution"][ind[0]] + self._add2dict(f"GALSIM_RES_{key}", res, idx) + + galsim_cat_file.close() + def _save_psf_data(self, galaxy_psf_path): """Save PSF data. diff --git a/src/shapepipe/modules/make_cat_runner.py b/src/shapepipe/modules/make_cat_runner.py index 7bf293cd..6ab6449d 100644 --- a/src/shapepipe/modules/make_cat_runner.py +++ b/src/shapepipe/modules/make_cat_runner.py @@ -49,8 +49,10 @@ def make_cat_runner( "SHAPE_MEASUREMENT_TYPE", ) for shape_type in shape_type_list: - if shape_type.lower() != "ngmix": - raise ValueError("SHAPE_MEASUREMENT_TYPE must be ngmix") + if shape_type.lower() not in ["ngmix", "galsim"]: + raise ValueError( + "SHAPE_MEASUREMENT_TYPE must be in [ngmix, galsim]" + ) # Fetch PSF data option if config.has_option(module_config_sec, "SAVE_PSF_DATA"): @@ -74,7 +76,13 @@ def make_cat_runner( w_log.info("Save shape measurement data") for shape_type in shape_type_list: w_log.info(f"Save {shape_type.lower()} data") - err_msg = sc_inst.process(shape_type.lower(), shape1_cat_path) + # A second, galsim-specific shape catalogue path (shape2_cat_path) + # was never produced by any runner in this pipeline (no + # galsim_shapes_runner exists) — galsim shapes are read from the + # same input as ngmix (shape1_cat_path). + cat_path = shape1_cat_path + err_msg = sc_inst.process(shape_type.lower(), cat_path) + # If error message: delete (incomplete) output file and raise error if err_msg is not None: diff --git a/tests/module/test_galsim_grammar_properties.py b/tests/module/test_galsim_grammar_properties.py new file mode 100644 index 00000000..28346eaf --- /dev/null +++ b/tests/module/test_galsim_grammar_properties.py @@ -0,0 +1,251 @@ +"""PROPERTY-BASED TESTS FOR THE GALSIM GALAXY/PSF COLUMN GRAMMAR. + +Drives ``SaveCatalogue._save_galsim_shapes`` (the make_cat writer) to pin the +renamed galsim grammar (shapepipe#761): the packed length-2 +``GALSIM_GAL_ELL_*`` / ``GALSIM_PSF_ELL_*`` columns are split into scalar +``GALSIM_G1_*`` / ``GALSIM_G2_*`` (g-type — galsim is correctly g-type, no +e/g value fix needed here), the galaxy carries no ``GAL`` token (implicit +default object, same rule as ngmix), and the raw ``GALSIM_GAL_SIGMA_*`` / +``GALSIM_PSF_SIGMA_*`` columns are replaced by the single stored size +``GALSIM_T_*`` (``T = 2 sigma^2``, via ``cs_util.size.sigma_to_T``). + +The ``ORIGINAL_PSF`` extension is a deliberate asymmetry, preserved rather +than normalized away: its PSF columns are sourced from the *galaxy's* +uncorrected fields (``gal_uncorr_g1/g2``, ``gal_sigma``), not the +per-extension ``psf_g1/g2``/``psf_sigma`` fields every other extension uses. +""" + +import re +import tempfile +from pathlib import Path + +import numpy as np +import numpy.testing as npt +from astropy.io import fits +from cs_util import size as cs_size +from hypothesis import given, settings +from hypothesis import strategies as st + +from shapepipe.modules.make_cat_package.make_cat import SaveCatalogue + +# --------------------------------------------------------------------------- +# Harness +# --------------------------------------------------------------------------- + +# Source columns _save_galsim_shapes reads per extension. The ORIGINAL_PSF +# extension only consumes the gal_uncorr_g1/g2 + gal_sigma subset, but the +# real galsim catalogue carries the full set on every extension, so the +# fixture mirrors that rather than special-casing the row shape. +GALSIM_KEYS = [ + "id", + "gal_g1", "gal_g2", "gal_g1_err", "gal_g2_err", + "gal_uncorr_g1", "gal_uncorr_g2", "gal_sigma", + "psf_g1", "psf_g2", "psf_sigma", + "gal_flux", "gal_flux_err", "gal_mag", "gal_mag_err", + "gal_flag", "gal_resolution", +] +INT_KEYS = {"id", "gal_flag"} + +# Per-key multiplier so a single scalar ``seed`` fans out into twelve +# distinct values — a swapped source (e.g. psf_g1 <-> gal_uncorr_g1) breaks +# at least one equality below. +_MULT = { + "gal_g1": 1.0, "gal_g2": 1.3, "gal_g1_err": 1.7, "gal_g2_err": 2.1, + "gal_uncorr_g1": 2.6, "gal_uncorr_g2": 3.1, "gal_sigma": 0.4, + "psf_g1": 0.6, "psf_g2": 0.8, "psf_sigma": 0.5, + "gal_flux": 10.0, "gal_flux_err": 1.0, "gal_mag": 5.0, + "gal_mag_err": 0.2, +} + + +def _galsim_row(obj_id, seed): + """One object's per-key values for one extension, scaled by ``seed``.""" + row = {"id": obj_id, "gal_flag": 0, "gal_resolution": 0.5} + row.update({key: mult * seed for key, mult in _MULT.items()}) + return row + + +def _write_galsim_cat(path, obj_ids, seed_by_ext): + """Write a synthetic galsim FITS, one extension per ``seed_by_ext`` key. + + Extension order is preserved (dict order), matching how + ``get_ext_name()[1:]`` drives ``self._key_ends`` in production. + """ + hdus = [fits.PrimaryHDU()] + for ext, seed in seed_by_ext.items(): + rows = [_galsim_row(oid, seed) for oid in obj_ids] + cols = [ + fits.Column( + name=key, + format="K" if key in INT_KEYS else "D", + array=np.array([row[key] for row in rows]), + ) + for key in GALSIM_KEYS + ] + hdus.append(fits.BinTableHDU.from_columns(cols, name=ext)) + fits.HDUList(hdus).writeto(path, overwrite=True) + + +def _run_save_galsim(galsim_path, obj_ids): + """Drive ``_save_galsim_shapes`` and return its populated output dict.""" + inst = object.__new__(SaveCatalogue) + inst._obj_id = np.asarray(obj_ids) + inst._output_dict = {} + inst._save_galsim_shapes(str(galsim_path)) + return inst._output_dict + + +# --------------------------------------------------------------------------- +# Strategies +# --------------------------------------------------------------------------- + +obj_ids_strategy = st.lists( + st.integers(min_value=1, max_value=10_000), min_size=1, max_size=5, + unique=True, +) +# A finite seed bounded away from zero, so every per-key value it scales is +# itself non-zero and the family-vs-family / key-vs-key comparisons below are +# never vacuously satisfied by an accidental 0 == 0. +_seed = st.floats( + min_value=0.05, max_value=5.0, allow_nan=False, allow_infinity=False, +) + + +# --------------------------------------------------------------------------- +# (a) Scalar split + size routing, regular extension +# --------------------------------------------------------------------------- + +@settings(deadline=None, max_examples=25) +@given(obj_ids=obj_ids_strategy, seed=_seed) +def test_regular_extension_scalar_split_and_size(obj_ids, seed): + """A non-``ORIGINAL_PSF`` extension routes each source to its own column. + + ``GALSIM_G1/G2_`` from ``gal_g1/g2``, the ``_ERR``/``_UNCORR`` + variants from their namesake sources, ``GALSIM_T_`` from + ``sigma_to_T(gal_sigma)``, and the PSF family + (``GALSIM_G1/G2_PSF_``, ``GALSIM_T_PSF_``) from ``psf_g1/g2`` / + ``sigma_to_T(psf_sigma)`` — never from the galaxy's own fields. + """ + ext = "NOSHEAR" + path = tempfile.NamedTemporaryFile(suffix=".fits", delete=False).name + _write_galsim_cat(path, obj_ids, {ext: seed}) + out = _run_save_galsim(path, obj_ids) + Path(path).unlink() + + n = len(obj_ids) + row = _galsim_row(obj_ids[0], seed) + npt.assert_allclose(out[f"GALSIM_G1_{ext}"], [row["gal_g1"]] * n) + npt.assert_allclose(out[f"GALSIM_G2_{ext}"], [row["gal_g2"]] * n) + npt.assert_allclose(out[f"GALSIM_G1_ERR_{ext}"], [row["gal_g1_err"]] * n) + npt.assert_allclose(out[f"GALSIM_G2_ERR_{ext}"], [row["gal_g2_err"]] * n) + npt.assert_allclose( + out[f"GALSIM_G1_UNCORR_{ext}"], [row["gal_uncorr_g1"]] * n + ) + npt.assert_allclose( + out[f"GALSIM_G2_UNCORR_{ext}"], [row["gal_uncorr_g2"]] * n + ) + npt.assert_allclose( + out[f"GALSIM_T_{ext}"], [cs_size.sigma_to_T(row["gal_sigma"])] * n + ) + npt.assert_allclose(out[f"GALSIM_G1_PSF_{ext}"], [row["psf_g1"]] * n) + npt.assert_allclose(out[f"GALSIM_G2_PSF_{ext}"], [row["psf_g2"]] * n) + npt.assert_allclose( + out[f"GALSIM_T_PSF_{ext}"], [cs_size.sigma_to_T(row["psf_sigma"])] * n + ) + # The galaxy and PSF families must stay distinct (un-aliased), and the + # raw-sigma value must never leak through unconverted. + assert not np.allclose(out[f"GALSIM_T_{ext}"], row["gal_sigma"]) + assert not np.allclose(out[f"GALSIM_G1_{ext}"], out[f"GALSIM_G1_PSF_{ext}"]) + + npt.assert_allclose(out[f"GALSIM_FLUX_{ext}"], [row["gal_flux"]] * n) + npt.assert_allclose(out[f"GALSIM_FLUX_ERR_{ext}"], [row["gal_flux_err"]] * n) + npt.assert_allclose(out[f"GALSIM_MAG_{ext}"], [row["gal_mag"]] * n) + npt.assert_allclose(out[f"GALSIM_MAG_ERR_{ext}"], [row["gal_mag_err"]] * n) + npt.assert_allclose(out[f"GALSIM_RES_{ext}"], [row["gal_resolution"]] * n) + + +# --------------------------------------------------------------------------- +# (b) ORIGINAL_PSF asymmetry: sourced from the galaxy's uncorrected fields +# --------------------------------------------------------------------------- + +@settings(deadline=None, max_examples=25) +@given(obj_ids=obj_ids_strategy, seed_main=_seed, seed_origpsf=_seed) +def test_original_psf_extension_sourced_from_gal_uncorr( + obj_ids, seed_main, seed_origpsf +): + """``ORIGINAL_PSF`` columns trace to ``gal_uncorr_*``/``gal_sigma``, NOT + ``psf_*`` — the one preserved asymmetry in the galsim grammar. + + The two extensions are written with independent seeds, and the + ``ORIGINAL_PSF`` row's own ``psf_g1/g2``/``psf_sigma`` are deliberately + distinct from its ``gal_uncorr_*``/``gal_sigma`` (the seed multipliers + differ), so a regression that wired the regular per-extension ``psf_*`` + sourcing into ``ORIGINAL_PSF`` would be caught, not masked by equal + values. + """ + seed_by_ext = {"NOSHEAR": seed_main, "ORIGINAL_PSF": seed_origpsf} + path = tempfile.NamedTemporaryFile(suffix=".fits", delete=False).name + _write_galsim_cat(path, obj_ids, seed_by_ext) + out = _run_save_galsim(path, obj_ids) + Path(path).unlink() + + n = len(obj_ids) + op_row = _galsim_row(obj_ids[0], seed_origpsf) + npt.assert_allclose( + out["GALSIM_G1_PSF_ORIGINAL_PSF"], [op_row["gal_uncorr_g1"]] * n + ) + npt.assert_allclose( + out["GALSIM_G2_PSF_ORIGINAL_PSF"], [op_row["gal_uncorr_g2"]] * n + ) + npt.assert_allclose( + out["GALSIM_T_PSF_ORIGINAL_PSF"], + [cs_size.sigma_to_T(op_row["gal_sigma"])] * n, + ) + # Not sourced from this extension's own psf_g1/g2/psf_sigma (those are + # scaled by the same seed but through different multipliers, so equality + # here would mean the asymmetry was lost). + assert not np.allclose( + out["GALSIM_G1_PSF_ORIGINAL_PSF"], [op_row["psf_g1"]] * n + ) + assert not np.allclose( + out["GALSIM_T_PSF_ORIGINAL_PSF"], + [cs_size.sigma_to_T(op_row["psf_sigma"])] * n, + ) + + +# --------------------------------------------------------------------------- +# (c) Every emitted column matches the new grammar; no legacy tokens survive +# --------------------------------------------------------------------------- + +@settings(deadline=None, max_examples=25) +@given(obj_ids=obj_ids_strategy, seed=_seed) +def test_emitted_columns_match_grammar_no_legacy_tokens(obj_ids, seed): + """Every ``GALSIM_*`` column matches the scalar grammar; nothing packed. + + No ``GALSIM_GAL_ELL*``/``GALSIM_PSF_ELL*`` (packed length-2 ellipticity) + and no ``GALSIM_*_SIGMA*`` (raw size) survives — both replaced by scalar + ``G1``/``G2`` components and the single stored size ``T``. The galaxy + also carries no ``GAL`` token (implicit default object, same rule as + ngmix). + """ + ext_names = ["NOSHEAR", "ORIGINAL_PSF"] + seed_by_ext = {ext: seed * (i + 1) for i, ext in enumerate(ext_names)} + path = tempfile.NamedTemporaryFile(suffix=".fits", delete=False).name + _write_galsim_cat(path, obj_ids, seed_by_ext) + out = _run_save_galsim(path, obj_ids) + Path(path).unlink() + + assert out, "writer produced no columns" + + _component = "G1|G2|T" + _qual = "(?:_ERR|_UNCORR)?" + _ext = "|".join(re.escape(e) for e in ext_names) + grammar_re = re.compile( + rf"^GALSIM_(?:{_component}){_qual}(?:_PSF)?_(?:{_ext})$" + rf"|^GALSIM_(?:FLUX|FLUX_ERR|MAG|MAG_ERR|FLAGS|RES)_(?:{_ext})$" + ) + for col in out: + assert grammar_re.match(col), f"off-grammar column emitted: {col!r}" + assert "_ELL" not in col, col + assert "SIGMA" not in col, col + assert "_GAL_" not in col and not col.endswith("_GAL"), col diff --git a/tests/module/test_psf_grammar_properties.py b/tests/module/test_psf_grammar_properties.py index 593614a1..09eaf181 100644 --- a/tests/module/test_psf_grammar_properties.py +++ b/tests/module/test_psf_grammar_properties.py @@ -21,13 +21,14 @@ (``example/cfis/final_cat.param``) names is a column the writer can produce — writer/param-file consistency; (d) the FULL frozen grammar (shapepipe#761) — ``ESTIMATOR_COMPONENT[_ERR]_ - OBJECT[_metacaltype]`` — holds across both estimator families: ngmix - splits ellipticity into ``G1``/``G2`` (g-type), HSM into ``E1``/``E2`` - (e-type); every family stores exactly one size, ``T``; HSM keeps the - singular ``FLAG`` token, ngmix keeps plural ``FLAGS``. This part is a - static example-based check (not writer-driven), so it covers HSM without - depending on its producer module — see ``test_psfex_interp.py`` for the - writer-driven coverage of that family. + OBJECT[_metacaltype]`` — holds across all three estimator families: ngmix + and galsim split ellipticity into ``G1``/``G2`` (g-type), HSM into + ``E1``/``E2`` (e-type); every family stores exactly one size, ``T``; HSM + keeps the singular ``FLAG`` token, ngmix/galsim keep plural ``FLAGS``. + This part is a static example-based check (not writer-driven), so it + covers galsim/HSM without depending on those producer modules — see + ``test_galsim_grammar_properties.py`` and ``test_psfex_interp.py`` for + the writer-driven coverage of those families. Setup for (a)-(c) mirrors ``tests/module/test_make_cat.py``: a synthetic ngmix FITS with the five shear-type extensions carrying the exact @@ -389,22 +390,23 @@ def test_param_file_ngmix_tokens_are_producible(param_path, obj_ids): # --------------------------------------------------------------------------- -# (d) The FULL frozen grammar, across both estimator families +# (d) The FULL frozen grammar, across all three estimator families # --------------------------------------------------------------------------- # # ``ESTIMATOR_COMPONENT[_ERR]_OBJECT[_metacaltype]`` (shapepipe#761). This # section encodes the grammar itself as a static property — example column -# names, not a driven writer — so it covers ngmix and HSM together without -# depending on the HSM producer module (covered by its own writer-driving -# test, ``test_psfex_interp.py``). What it pins: +# names, not a driven writer — so it covers ngmix, galsim, and HSM together +# without depending on the galsim/HSM producer modules (covered by their own +# writer-driving tests: ``test_galsim_grammar_properties.py``, +# ``test_psfex_interp.py``). What it pins: # -# - galaxy is the implicit default object, no token, for the g-type ngmix -# writer; +# - galaxy is the implicit default object, no token, for BOTH g-type writers +# (ngmix, galsim); # - ellipticity is split into named scalar components, never a packed -# 2-vector: ``G1``/``G2`` for g-type (ngmix), ``E1``/``E2`` for e-type -# (HSM); +# 2-vector: ``G1``/``G2`` for g-type (ngmix, galsim), ``E1``/``E2`` for +# e-type (HSM); # - exactly one stored size, ``T`` (no ``SIGMA``/``FWHM``/``r50``); -# - HSM keeps the singular ``FLAG`` token; ngmix keeps plural ``FLAGS``. +# - HSM keeps the singular ``FLAG`` token; ngmix/galsim keep plural ``FLAGS``. FROZEN_GRAMMAR_RE = re.compile( r"^(?:" @@ -412,6 +414,10 @@ def test_param_file_ngmix_tokens_are_producible(param_path, obj_ids): r"NGMIXm?_(?:G1|G2|T|SNR|FLUX|MAG|FLAGS)(?:_ERR)?" r"(?:_(?:PSF_ORIG|PSF_RECONV))?_(?:NOSHEAR|1P|1M|2P|2M)" r"|NGMIXm?_(?:MCAL_FLAGS|MCAL_TYPES_FAIL|N_EPOCH)" + # galsim: galaxy (no object token) or explicit PSF object; is a + # data-driven FITS extension name, so left as a generic uppercase token. + r"|GALSIM_(?:G1|G2|T)(?:_ERR|_UNCORR)?(?:_PSF)?_[A-Z0-9_]+" + r"|GALSIM_(?:FLUX|FLUX_ERR|MAG|MAG_ERR|FLAGS|RES)_[A-Z0-9_]+" # HSM: e-type, explicit PSF/STAR object, singular FLAG; the multi-epoch # sink in make_cat._save_psf_data appends a bare epoch index. r"|HSM_(?:E1|E2|T)_(?:PSF|STAR)(?:_\d+)?" @@ -428,6 +434,10 @@ def test_param_file_ngmix_tokens_are_producible(param_path, obj_ids): "NGMIX_FLAGS_2P", "NGMIXm_G1_PSF_RECONV_NOSHEAR", "NGMIX_MCAL_FLAGS", + "GALSIM_G1_NOSHEAR", # galaxy, no object token + "GALSIM_G2_UNCORR_NOSHEAR", + "GALSIM_T_PSF_ORIGINAL_PSF", + "GALSIM_FLAGS_NOSHEAR", "HSM_E1_PSF", "HSM_T_STAR", "HSM_FLAG_PSF", @@ -445,6 +455,10 @@ def test_param_file_ngmix_tokens_are_producible(param_path, obj_ids): "SIGMA_PSF_HSM", # raw sigma, not T "HSM_SIGMA_PSF", # stored sigma instead of T "HSM_FLAGS_PSF", # plural — HSM is singular FLAG + "GALSIM_GAL_ELL_NOSHEAR", # GAL token + packed ellipticity + "GALSIM_GAL_SIGMA_NOSHEAR", # GAL token + raw sigma + "GALSIM_PSF_ELL_NOSHEAR", # packed ellipticity + "GALSIM_FWHM_NOSHEAR", # FWHM, not T "SPREAD_MODEL", # removed entirely, not renamed ] From 714847c29d144e96f5c601ffb7573fcb1536786f Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Fri, 3 Jul 2026 18:53:48 +0200 Subject: [PATCH 32/33] =?UTF-8?q?Restore=20spread=5Fmodel=20to=20#761=20?= =?UTF-8?q?=E2=80=94=20it=20does=20not=20belong=20in=20this=20PR?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Undoes the spread_model removal that this branch carried (85a18ab5 in full, plus the save_sm_data half of 4f0756d0), so #761 no longer touches spread_model at all: - spread_model_package/ + spread_model_runner.py restored - make_cat_runner: spread_model_runner input, sexcat_sm pattern, SM_* config, and the save_sm_data() call restored - make_cat.save_sm_data() writer + SPREAD_MODEL / SPREADERR_MODEL / SPREAD_CLASS columns restored (re-added by hand alongside the new grammar, since 4f0756d0 deleted the writer while unifying the grammar) - example configs, .param allow-lists, pipeline tutorial restored Per Axel Guinot's 2026-07-02 review: his position is that spread_model must not be *cut on* for shear selection — not that it should be deleted ("it could still be useful for other things... it does not hurt to keep it... this is not the right PR to remove it"). The earlier deletion over-read that point. Whether spread_model should be removed in a dedicated future PR is a separate question, addressed in a standalone report; #761 is scoped to the PSF column grammar and leaves it in place. The grammar unification of 4f0756d0 (ngmix / galsim / HSM-sink columns) is untouched — only the spread_model writer it also deleted is restored. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01GQSyaXLnLZtksntYAYD41E --- docs/source/pipeline_tutorial.md | 11 +- example/cfis/config_make_cat_mccd.ini | 13 +- example/cfis/config_make_cat_psfex.ini | 10 +- example/cfis/config_make_cat_psfex_nosm.ini | 2 + example/cfis/final_cat.param | 7 + example/unions_800/cat_matched.param | 7 + .../modules/make_cat_package/__init__.py | 20 +- .../modules/make_cat_package/make_cat.py | 71 ++++ src/shapepipe/modules/make_cat_runner.py | 69 +++- .../modules/spread_model_package/__init__.py | 36 ++ .../spread_model_package/spread_model.py | 309 ++++++++++++++++++ src/shapepipe/modules/spread_model_runner.py | 69 ++++ 12 files changed, 600 insertions(+), 24 deletions(-) create mode 100644 src/shapepipe/modules/spread_model_package/__init__.py create mode 100644 src/shapepipe/modules/spread_model_package/spread_model.py create mode 100644 src/shapepipe/modules/spread_model_runner.py diff --git a/docs/source/pipeline_tutorial.md b/docs/source/pipeline_tutorial.md index 5279b5f9..95a0aa94 100644 --- a/docs/source/pipeline_tutorial.md +++ b/docs/source/pipeline_tutorial.md @@ -289,15 +289,16 @@ job_sp TILE_ID -j 16 is the selection of galaxies as extended objects compared to the PSF. First, the PSF model is interpolated to galaxy positions, according to the PSF model with `psfex_interp` or `mccd_interp`. Next, postage stamps around galaxies -of the weights maps are created via `vignetmaker`. Finally, postage stamps +of the weights maps are created via `vignetmaker`. Then, the spread model +is computed by the `spread_model` module. Finally, postage stamps around galaxies of single-exposure data is extracted with another call to `vignetmaker`. The output directory is -- `run_sp_MiViVi` if the PSF model is `mccd`; -- `run_sp_tile_PsViVi` for the `PSFEx` PSF model. +- `run_sp_MiViSmVi` if the PSF model is `mccd`; +- `run_sp_tile_PsViSmVi` for the `PSFEx` PSF model. -This corresponds to the MCCD/PSFex interpolation (`Mi`/`Pi`), `vignetmaker` (`Vi`), and the +This corresponds to the MCCD/PSFex interpolation (`Mi`/`Pi`), `vignetmaker` (`Vi`), `spread_model` (`Sm`), and the second call to `vignetmaker` (`Vi`). @@ -324,7 +325,7 @@ job_sp TILE_ID -j 64 This task first merges the `NJOB` parallel `ngmix` output files from the previous step into one output file. Then, previously obtained information are pasted into a _final_ shape catalogue via `make_cat`. Included are galaxy detection and basic measurement parameters, the PSF model at -galaxy positions, and the shape measurement. +galaxy positions, the spread-model classification, and the shape measurement. Two output directories are created. The first one is `run_sp_Ms` for the `merge_sep` run. diff --git a/example/cfis/config_make_cat_mccd.ini b/example/cfis/config_make_cat_mccd.ini index 4ac61823..5aa33d27 100644 --- a/example/cfis/config_make_cat_mccd.ini +++ b/example/cfis/config_make_cat_mccd.ini @@ -34,7 +34,7 @@ LOG_NAME = log_sp RUN_LOG_NAME = log_run_sp # Input directory, containing input files, single string or list of names with length matching FILE_PATTERN -INPUT_DIR = last:sextractor_runner_run_1, last:mccd_interp_runner, last:merge_sep_cats_runner +INPUT_DIR = last:sextractor_runner_run_1, last:spread_model_runner, last:mccd_interp_runner, last:merge_sep_cats_runner # Output directory OUTPUT_DIR = ./output @@ -56,10 +56,12 @@ TIMEOUT = 96:00:00 # Input file pattern(s), list of strings with length matching number of expected input file types # Cannot contain wild cards -FILE_PATTERN = sexcat, galaxy_psf, ngmix +FILE_PATTERN = sexcat, sexcat_sm, galaxy_psf, ngmix +#, galsim # FILE_EXT (optional) list of string extensions to identify input files -FILE_EXT = .fits, .sqlite, .fits +FILE_EXT = .fits, .fits, .sqlite, .fits +#, .fits # Numbering convention, string that exemplifies a numbering pattern. # Matches input single exposures (with 'p' removed) @@ -67,4 +69,9 @@ FILE_EXT = .fits, .sqlite, .fits # sections below NUMBERING_SCHEME = -000-000 +SM_DO_CLASSIFICATION = True +SM_STAR_THRESH = 0.003 +SM_GAL_THRESH = 0.01 + SHAPE_MEASUREMENT_TYPE = ngmix +#, galsim diff --git a/example/cfis/config_make_cat_psfex.ini b/example/cfis/config_make_cat_psfex.ini index 552c578e..a7407d99 100644 --- a/example/cfis/config_make_cat_psfex.ini +++ b/example/cfis/config_make_cat_psfex.ini @@ -55,14 +55,14 @@ TIMEOUT = 96:00:00 [MAKE_CAT_RUNNER] # Input directory, containing input files, single string or list of names with length matching FILE_PATTERN -INPUT_DIR = run_sp_tile_Sx:sextractor_runner, last:psfex_interp_runner, last:merge_sep_cats_runner +INPUT_DIR = run_sp_tile_Sx:sextractor_runner, last:spread_model_runner, last:psfex_interp_runner, last:merge_sep_cats_runner # Input file pattern(s), list of strings with length matching number of expected input file types # Cannot contain wild cards -FILE_PATTERN = sexcat, galaxy_psf, ngmix +FILE_PATTERN = sexcat, sexcat_sm, galaxy_psf, ngmix # FILE_EXT (optional) list of string extensions to identify input files -FILE_EXT = .fits, .sqlite, .fits +FILE_EXT = .fits, .fits, .sqlite, .fits # Numbering convention, string that exemplifies a numbering pattern. # Matches input single exposures (with 'p' removed) @@ -70,4 +70,8 @@ FILE_EXT = .fits, .sqlite, .fits # sections below NUMBERING_SCHEME = -000-000 +SM_DO_CLASSIFICATION = True +SM_STAR_THRESH = 0.003 +SM_GAL_THRESH = 0.01 + SHAPE_MEASUREMENT_TYPE = ngmix diff --git a/example/cfis/config_make_cat_psfex_nosm.ini b/example/cfis/config_make_cat_psfex_nosm.ini index fc458232..1983f91c 100644 --- a/example/cfis/config_make_cat_psfex_nosm.ini +++ b/example/cfis/config_make_cat_psfex_nosm.ini @@ -71,4 +71,6 @@ FILE_EXT = .fits, .sqlite, .fits # sections below NUMBERING_SCHEME = -000-000 +SM_DO_CLASSIFICATION = False + SHAPE_MEASUREMENT_TYPE = ngmix diff --git a/example/cfis/final_cat.param b/example/cfis/final_cat.param index d403b034..8ddfebd3 100644 --- a/example/cfis/final_cat.param +++ b/example/cfis/final_cat.param @@ -15,6 +15,13 @@ NGMIX_MCAL_FLAGS NGMIX_G1_PSF_ORIG_NOSHEAR NGMIX_G2_PSF_ORIG_NOSHEAR +# spread class +#SPREAD_CLASS + +# spread model flag and error +#SPREAD_MODEL +#SPREADERR_MODEL + # Number of epochs (exposures) N_EPOCH NGMIX_N_EPOCH diff --git a/example/unions_800/cat_matched.param b/example/unions_800/cat_matched.param index f387be00..ce19045d 100644 --- a/example/unions_800/cat_matched.param +++ b/example/unions_800/cat_matched.param @@ -15,6 +15,13 @@ NGMIX_MCAL_FLAGS NGMIX_G1_PSF_ORIG_NOSHEAR NGMIX_G2_PSF_ORIG_NOSHEAR +# spread class +SPREAD_CLASS + +# spread model flag and error +SPREAD_MODEL +SPREADERR_MODEL + # Number of epochs (exposures) N_EPOCH NGMIX_N_EPOCH diff --git a/src/shapepipe/modules/make_cat_package/__init__.py b/src/shapepipe/modules/make_cat_package/__init__.py index 84e6c97a..1bd1cee5 100644 --- a/src/shapepipe/modules/make_cat_package/__init__.py +++ b/src/shapepipe/modules/make_cat_package/__init__.py @@ -7,6 +7,7 @@ :Parent modules: - ``sextractor_runner`` +- ``spread_model_runner`` - ``psfex_interp_runner`` or ``mccd_interp_runner`` - ``ngmix_runner`` @@ -20,12 +21,27 @@ This module creates a *final* catalogue combining the output of various previous module runs. This gathers all relevant information on the measured galaxies for weak-lensing post-processing. This includes galaxy detection and -basic measurement parameters, the PSF model at galaxy positions, and the -shape measurement. +basic measurement parameters, the PSF model at galaxy positions, the +spread-model classification, and the shape measurement. Module-specific config file entries =================================== +SM_DO_CLASSIFICATION : bool, optional + Adds spread-model star/galaxy classification flag as the column + ``SPREAD_CLASS`` to the output if ``True`` +SM_STAR_THRESH : float, optional + Threshold :math:`s_{\rm star, thresh}` for star selection; object is + classified as a star if + :math:`|x s + 2 \sigma_s | < s_{\textrm{star, thresh}}` + where :math:`s` is the spread model and :math:`\sigma_s` is the spread + model error; default value is ``0.003`` +SM_GAL_THRESH : float, optional + Threshold :math:`s_{\rm gal, thresh}` for galaxy selection; object is + classified as a galaxy if + :math:`s + 2 \sigma_s > s_{\textrm{gal, thresh}}` where :math:`s` is the + spread model and :math:`\sigma_s` is the spread model error; default value + is ``0.01`` SHAPE_MEASUREMENT_TYPE : list Shape measurement method, valid options are ``ngmix`` and/or ``galsim`` SAVE_PSF_DATA : bool, optional diff --git a/src/shapepipe/modules/make_cat_package/make_cat.py b/src/shapepipe/modules/make_cat_package/make_cat.py index e28ac643..ae4ac648 100644 --- a/src/shapepipe/modules/make_cat_package/make_cat.py +++ b/src/shapepipe/modules/make_cat_package/make_cat.py @@ -139,6 +139,77 @@ def save_sextractor_data(final_cat_file, sexcat_path, remove_vignet=True): return cat_size +def save_sm_data( + final_cat_file, + sexcat_sm_path, + do_classif=True, + star_thresh=0.003, + gal_thresh=0.01, + n_obj=-1, +): + r"""Save Spread-Model Data. + + Save the spread-model data into the final catalogue. + + Parameters + ---------- + final_cat_file : file_io.FITSCatalogue + Final catalogue + sexcat_sm_path : str + Path to spread-model catalogue to save. If ``None``, spread_model is + set to 99 + do_classif : bool + If ``True`` objects will be classified into stars, galaxies, and other, + using the classifier + :math:`{\rm class} = {\rm sm} + 2 * {\rm sm}_{\rm err}` + star_thresh : float + Threshold for star selection; object is classified as star if + :math:`|{\rm class}| <` ``star_thresh`` + gal_thresh : float + Threshold for galaxy selection; object is classified as galaxy if + :math:`{\rm class} >` ``gal_thresh`` + nobj : int, optional + Number of objects, only used if sexcat_sm_path is ``-1`` + + Returns + ------- + int + Number of objects saved + """ + final_cat_file.open() + + if sexcat_sm_path is not None: + sexcat_sm_file = file_io.FITSCatalogue( + sexcat_sm_path, + SEx_catalogue=True, + ) + sexcat_sm_file.open() + + sm = np.copy(sexcat_sm_file.get_data()["SPREAD_MODEL"]) + sm_err = np.copy(sexcat_sm_file.get_data()["SPREADERR_MODEL"]) + + sexcat_sm_file.close() + + else: + sm = np.ones(n_obj) * 99 + sm_err = np.ones(n_obj) * 99 + + final_cat_file.add_col("SPREAD_MODEL", sm) + final_cat_file.add_col("SPREADERR_MODEL", sm_err) + + if do_classif: + obj_flag = np.ones_like(sm, dtype="int16") * 2 + classif = sm + 2.0 * sm_err + obj_flag[np.where(np.abs(classif) < star_thresh)] = 0 + obj_flag[np.where(classif > gal_thresh)] = 1 + + final_cat_file.add_col("SPREAD_CLASS", obj_flag) + + final_cat_file.close() + + return n_obj + + class SaveCatalogue: """Save Catalogue. diff --git a/src/shapepipe/modules/make_cat_runner.py b/src/shapepipe/modules/make_cat_runner.py index 6ab6449d..fa1fc43f 100644 --- a/src/shapepipe/modules/make_cat_runner.py +++ b/src/shapepipe/modules/make_cat_runner.py @@ -16,15 +16,17 @@ version="1.1", input_module=[ "sextractor_runner", + "spread_model_runner", "psfex_interp_runner", "ngmix_runner", ], file_pattern=[ "tile_sexcat", + "sexcat_sm", "galaxy_psf", "ngmix", ], - file_ext=[".fits", ".sqlite", ".fits"], + file_ext=[".fits", ".fits", ".sqlite", ".fits"], depends=["numpy", "sqlitedict"], ) def make_cat_runner( @@ -37,11 +39,37 @@ def make_cat_runner( ): """Define The Make Catalogue Runner.""" # Set input file paths - ( - tile_sexcat_path, - galaxy_psf_path, - shape1_cat_path, - ) = input_file_list + if len(input_file_list) == 3: + # No spread model input + ( + tile_sexcat_path, + galaxy_psf_path, + shape1_cat_path, + ) = input_file_list + sexcat_sm_path = None + else: + # With spread model input + ( + tile_sexcat_path, + sexcat_sm_path, + galaxy_psf_path, + shape1_cat_path, + ) = input_file_list[0:4] + if len(input_file_list) == 5: + # With second shape catalogue input + shape2_cat_path = input_file_list[4] + + # Fetch classification options + do_classif = config.getboolean( + module_config_sec, + "SM_DO_CLASSIFICATION", + ) + if do_classif: + star_thresh = config.getfloat(module_config_sec, "SM_STAR_THRESH") + gal_thresh = config.getfloat(module_config_sec, "SM_GAL_THRESH") + else: + star_thresh = None + gal_thresh = None # Fetch shape measurement type shape_type_list = config.getlist( @@ -71,16 +99,35 @@ def make_cat_runner( n_obj = make_cat.save_sextractor_data(final_cat_file, tile_sexcat_path) cat_size_sextractor = n_obj + # Save spread-model data + if sexcat_sm_path is None: + w_log.info("No sm cat input, setting spread model to 99") + else: + w_log.info("Save spread-model data") + cat_size_sm = make_cat.save_sm_data( + final_cat_file, + sexcat_sm_path, + do_classif, + star_thresh, + gal_thresh, + n_obj=n_obj + ) + + if cat_size_sextractor != cat_size_sm: + w_log( + f"Warnign: SExtractor catalogue {tile_sexcat_path} has different size" + + f" ({cat_size_sextractor} than spread_model catalogue" + + f" {sexcat_sm_path} ({cat_size_sm})" + ) + # Save shape data sc_inst = make_cat.SaveCatalogue(final_cat_file, cat_size_sextractor, w_log) w_log.info("Save shape measurement data") for shape_type in shape_type_list: w_log.info(f"Save {shape_type.lower()} data") - # A second, galsim-specific shape catalogue path (shape2_cat_path) - # was never produced by any runner in this pipeline (no - # galsim_shapes_runner exists) — galsim shapes are read from the - # same input as ngmix (shape1_cat_path). - cat_path = shape1_cat_path + cat_path = ( + shape2_cat_path if shape_type == "galsim" else shape1_cat_path + ) err_msg = sc_inst.process(shape_type.lower(), cat_path) diff --git a/src/shapepipe/modules/spread_model_package/__init__.py b/src/shapepipe/modules/spread_model_package/__init__.py new file mode 100644 index 00000000..336283e7 --- /dev/null +++ b/src/shapepipe/modules/spread_model_package/__init__.py @@ -0,0 +1,36 @@ +"""SPREAD MODEL PACKAGE. + +This package contains the module for ``spread_model``. + +:Author: Axel Guinot + +:Parent modules: + +- ``sextractor_runner`` +- ``psfex_interp_runner`` or ``mccd_interp_runner`` + and ``vignetmaker_runner`` + +:Input: SExtractor output catalogue, PSFEx output catalogue and vignet files + +:Output: Updated SExtractor or new FITS file + +Description +=========== + +This module refines the galaxy sample that will be used for shape measurement +using the spread model method of :cite:`desai:12` and :cite:`mohr:12`. + +Module-specific config file entries +=================================== + +PREFIX : str, optional + Ouput file prefix +PIXEL_SCALE : float + Pixel scale in arcseconds +OUTPUT_MODE : str + Output mode, options are ``add`` to add the outputs to the input + SExtractor catalogue or ``new`` to generte a new output catalogue + +""" + +__all__ = ["spread_model"] diff --git a/src/shapepipe/modules/spread_model_package/spread_model.py b/src/shapepipe/modules/spread_model_package/spread_model.py new file mode 100644 index 00000000..1461ecce --- /dev/null +++ b/src/shapepipe/modules/spread_model_package/spread_model.py @@ -0,0 +1,309 @@ +"""SPREAD MODEL. + +Class to compute the spread model, criterion to select galaxies + +:Author: Axel Guinot + +""" + +import galsim +import numpy as np +from cs_util import size as cs_size +from sqlitedict import SqliteDict + +from shapepipe.pipeline import file_io + + +def get_sm(obj_vign, psf_vign, model_vign, weight_vign): + """Get Spread Model. + + This method compute the spread moel for an object. + + Parameters + ---------- + obj_vign : numpy.ndarray + Vignet of the object + psf_vign : numpy.ndarray + Vignet of the gaussian model of the PSF + model_vign : numpy.ndarray + Vignet of the galaxy model + weight_vign : numpy.ndarray + Vignet of the weight at the object position + + Returns + ------- + tuple + Spread model and corresponding error values + + """ + # Mask invalid pixels + m = (obj_vign > -1e29) & (weight_vign > 0) + w = m.astype(float) + + # Set noise as inverse weight + noise_v = (1 / weight_vign).ravel() + + # Remove infinite noise pixels + noise_v[np.isinf(noise_v)] = 0 + + # Transform 2D vignets to 1D vectors + t_v = model_vign.ravel() + g_v = obj_vign.ravel() + psf_v = psf_vign.ravel() + w_v = w.ravel() + + # Compute scalar products used in spread model + tg = np.sum(t_v * w_v * g_v) + pg = np.sum(psf_v * w_v * g_v) + tp = np.sum(t_v * w_v * psf_v) + pp = np.sum(psf_v * w_v * psf_v) + + tnt = np.sum(t_v * noise_v * t_v * w_v) + pnp = np.sum(psf_v * noise_v * psf_v * w_v) + tnp = np.sum(t_v * noise_v * psf_v * w_v) + err = tnt * pg**2 + pnp * tg**2 - 2 * tnp * pg * tg + + # Compute spread model + if pg > 0: + sm = (tg / pg) - (tp / pp) + else: + sm = 1 + + if (pg > 0) & (err > 0): + sm_err = np.sqrt(err) / pg**2 + else: + sm_err = 1 + + return sm, sm_err + + +def get_model(sigma, flux, img_shape, pixel_scale=0.186): + """Get Model. + + This method computes + - an exponential galaxy model with scale radius = 1/16 FWHM + - a Gaussian model for the PSF + + Parameters + ---------- + sigma : float + Sigma of the PSF (in pixel units) + flux : float + Flux of the galaxy for the model + img_shape : list + Size of the output vignet ``[xsize, ysize]`` + pixel_scale : float, optional + Pixel scale to use for the model (in arcsec); default is ``0.186`` + + Returns + ------- + tuple + Vignet of the galaxy model and of the PSF model + + """ + # Get scale radius + scale_radius = 1 / 16 * cs_size.sigma_to_fwhm(sigma) * pixel_scale + + # Get galaxy model + gal_obj = galsim.Exponential(scale_radius=scale_radius, flux=flux) + + # Get PSF + psf_obj = galsim.Gaussian(sigma=sigma * pixel_scale) + + # Convolve both + gal_obj = galsim.Convolve(gal_obj, psf_obj) + + # Draw galaxy and PSF on vignets + gal_vign = gal_obj.drawImage( + nx=img_shape[0], ny=img_shape[1], scale=pixel_scale + ).array + + psf_vign = psf_obj.drawImage( + nx=img_shape[0], ny=img_shape[1], scale=pixel_scale + ).array + + return gal_vign, psf_vign + + +class SpreadModel(object): + """The Spread Model Class. + + Parameters + ---------- + sex_cat_path : str + Path to SExtractor catalogue + psf_cat_path : str + Path to PSF catalogue + weight_cat_path : str + Path to weight catalogue + output_path : str + Output file path of pasted catalog + pixel_scale : float + Pixel scale in arcsec + output_mode : str + Options are ``new`` or ``add`` + + Notes + ----- + For the ``output_mode``: + + - ``new`` will create a new catalogue with + ``[number, mag, sm, sm_err]`` + - ``add`` will output a copy of the input SExtractor with the columns + ``sm`` and ``sm_err`` + + """ + + def __init__( + self, + sex_cat_path, + psf_cat_path, + weight_cat_path, + output_path, + pixel_scale, + output_mode, + ): + + self._sex_cat_path = sex_cat_path + self._psf_cat_path = psf_cat_path + self._weight_cat_path = weight_cat_path + self._output_path = output_path + self._pixel_scale = pixel_scale + self._output_mode = output_mode + + def process(self): + """Process. + + Process the spread model computation + + """ + # Get data + sex_cat = file_io.FITSCatalogue(self._sex_cat_path, SEx_catalogue=True) + sex_cat.open() + obj_id = np.copy(sex_cat.get_data()["NUMBER"]) + obj_vign = np.copy(sex_cat.get_data()["VIGNET"]) + obj_mag = None + if self._output_mode == "new": + obj_mag = np.copy(sex_cat.get_data()["MAG_AUTO"]) + sex_cat.close() + + psf_cat = SqliteDict(self._psf_cat_path) + + weight_cat = file_io.FITSCatalogue( + self._weight_cat_path, + SEx_catalogue=True, + ) + weight_cat.open() + weigh_vign = weight_cat.get_data()["VIGNET"] + weight_cat.close() + + # Get spread model + skip_obj = False + spread_model_final = [] + spread_model_err_final = [] + for idx, id_tmp in enumerate(obj_id): + sigma_list = [] + + if psf_cat[str(id_tmp)] == "empty": + spread_model_final.append(-1) + spread_model_err_final.append(1) + continue + + psf_expccd_name = list(psf_cat[str(id_tmp)].keys()) + + for expccd_name_tmp in psf_expccd_name: + psf_cat_id_ccd = psf_cat[str(id_tmp)][expccd_name_tmp] + sigma_list.append(psf_cat_id_ccd["SHAPES"]["SIGMA_PSF_HSM"]) + + obj_sigma_tmp = np.mean(sigma_list) + if obj_sigma_tmp > 0: + obj_vign_tmp = obj_vign[idx] + obj_flux_tmp = 1.0 + obj_weight_tmp = weigh_vign[idx] + obj_model_tmp, obj_psf_tmp = get_model( + obj_sigma_tmp, + obj_flux_tmp, + obj_vign_tmp.shape, + self._pixel_scale, + ) + + obj_sm, obj_sm_err = get_sm( + obj_vign_tmp, obj_psf_tmp, obj_model_tmp, obj_weight_tmp + ) + else: + # size < 0, something is not right with this object + obj_sm, obj_sm_err = -1.0, -1.0 + + spread_model_final.append(obj_sm) + spread_model_err_final.append(obj_sm_err) + + spread_model_final = np.array(spread_model_final, dtype="float64") + spread_model_err_final = np.array( + spread_model_err_final, + dtype="float64", + ) + + psf_cat.close() + + self.save_results( + spread_model_final, spread_model_err_final, obj_mag, obj_id + ) + + def save_results(self, sm, sm_err, mag, number): + """Save Results. + + Save output catalogue with spread model and errors. + + Parameters + ---------- + sm : numpy.ndarray + Value of the spread model for all objects + sm_err : numpy.ndarray + Value of the spread model error for all objects + mag : numpy.ndarray + Magnitude of all objects (only for a new catalogue) + number : numpy.ndarray + ID of all objects (only for a new catalogue) + + Raises + ------ + ValueError + For incorrect output mode + + """ + if self._output_mode == "new": + new_cat = file_io.FITSCatalogue( + self._output_path, + SEx_catalogue=True, + open_mode=file_io.BaseCatalogue.OpenMode.ReadWrite, + ) + dict_data = { + "NUMBER": number, + "MAG": mag, + "SPREAD_MODEL": sm, + "SPREADERR_MODEL": sm_err, + } + new_cat.save_as_fits( + data=dict_data, sex_cat_path=self._sex_cat_path + ) + elif self._output_mode == "add": + ori_cat = file_io.FITSCatalogue( + self._sex_cat_path, + SEx_catalogue=True, + ) + ori_cat.open() + new_cat = file_io.FITSCatalogue( + self._output_path, + SEx_catalogue=True, + open_mode=file_io.BaseCatalogue.OpenMode.ReadWrite, + ) + ori_cat.add_col( + "SPREAD_MODEL", sm, new_cat=True, new_cat_inst=new_cat + ) + ori_cat.add_col("SPREAD_MODEL", sm, new_cat=True, new_cat_inst=new_cat) + ori_cat.close() + new_cat.open() + new_cat.add_col("SPREADERR_MODEL", sm_err) + new_cat.close() + else: + raise ValueError("Mode must be in [new, add].") diff --git a/src/shapepipe/modules/spread_model_runner.py b/src/shapepipe/modules/spread_model_runner.py new file mode 100644 index 00000000..16c9a2d3 --- /dev/null +++ b/src/shapepipe/modules/spread_model_runner.py @@ -0,0 +1,69 @@ +"""SPREAD MODEL RUNNER. + +Module runner for ``spread_model``. + +:Author: Axel Guinot + +""" + +from shapepipe.modules.module_decorator import module_runner +from shapepipe.modules.spread_model_package.spread_model import SpreadModel + + +@module_runner( + version="1.1", + input_module=[ + "sextractor_runner", + "psfex_interp_runner", + "vignetmaker_runner", + ], + file_pattern=["sexcat", "galaxy_psf", "weight_vign"], + file_ext=[".fits", ".sqlite", ".fits"], + depends=["numpy", "galsim"], + run_method="parallel", +) +def spread_model_runner( + input_file_list, + run_dirs, + file_number_string, + config, + module_config_sec, + w_log, +): + """Define The Spread Model Runner.""" + # Get input files + sex_cat_path, psf_cat_path, weight_cat_path = input_file_list + + # Get file prefix (optional) + if config.has_option(module_config_sec, "PREFIX"): + prefix = config.get(module_config_sec, "PREFIX") + if (prefix.lower() != "none") & (prefix != ""): + prefix = prefix + "_" + else: + prefix = "" + else: + prefix = "" + + # Get pixel scale and output mode + pixel_scale = config.getfloat(module_config_sec, "PIXEL_SCALE") + output_mode = config.get(module_config_sec, "OUTPUT_MODE") + + # Set output file path + file_name = f"{prefix}sexcat_sm{file_number_string}.fits" + output_path = f'{run_dirs["output"]}/{file_name}' + + # Create spread model class instance + sm_inst = SpreadModel( + sex_cat_path, + psf_cat_path, + weight_cat_path, + output_path, + pixel_scale, + output_mode, + ) + + # Process spread model computation + sm_inst.process() + + # No return objects + return None, None From e9bfd5382bab0acb197f284f9010d0d73346d8fc Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Fri, 3 Jul 2026 20:16:06 +0200 Subject: [PATCH 33/33] fix(spread_model): read PSF size under the new HSM grammar The HSM column-grammar rename (4b17f7ec) renamed the SqliteDict SHAPES key SIGMA_PSF_HSM -> HSM_T_PSF and changed its value from the Gaussian sigma to T = 2 sigma^2. spread_model still read the old key, so with spread_model restored to this PR it would KeyError at runtime. Read HSM_T_PSF and convert back to sigma via cs_util.size.T_to_sigma, which the rest of the module (get_model, the >0 guard, sigma_to_fwhm, the galsim Gaussian) expects. Mean-of-sigma semantics are preserved: each per-exp/ccd T is converted to sigma before averaging, exactly as before. This completes the grammar rename for spread_model's one consumer so #761 no longer ships it broken. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01JSoGrXLyNr74SjiFw5FVkB --- src/shapepipe/modules/spread_model_package/spread_model.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/shapepipe/modules/spread_model_package/spread_model.py b/src/shapepipe/modules/spread_model_package/spread_model.py index 1461ecce..ed72aa58 100644 --- a/src/shapepipe/modules/spread_model_package/spread_model.py +++ b/src/shapepipe/modules/spread_model_package/spread_model.py @@ -213,7 +213,11 @@ def process(self): for expccd_name_tmp in psf_expccd_name: psf_cat_id_ccd = psf_cat[str(id_tmp)][expccd_name_tmp] - sigma_list.append(psf_cat_id_ccd["SHAPES"]["SIGMA_PSF_HSM"]) + # The HSM grammar stores PSF size as T = 2 sigma^2 under + # HSM_T_PSF; spread model needs the Gaussian sigma (pixels). + sigma_list.append( + cs_size.T_to_sigma(psf_cat_id_ccd["SHAPES"]["HSM_T_PSF"]) + ) obj_sigma_tmp = np.mean(sigma_list) if obj_sigma_tmp > 0: