From bae87e6a66028f459b27aaa1246a897f40c6fc9e Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Fri, 30 Jan 2026 14:09:07 -0500 Subject: [PATCH 01/38] initial commit for converter script --- .../convert_to_csv/convert_to_csv.cc | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc diff --git a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc new file mode 100644 index 000000000..c61012ab4 --- /dev/null +++ b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc @@ -0,0 +1,50 @@ +/** + * @file convert_to_csv.cc + * @author Kevin Scheuer + * @brief + * @date 2026-01-30 + * + * TODO: + * this is basically replacement for python script, so it will need to loop over args + * but instead provide a vector of files to the individual scripts + */ + +#include +#include + +int main(std::vector args(argv, argv + argc)) +{ + + // parse args + std::vector input_files; + std::string output_file; + for (const auto &arg : args) + { + + // parse input files + if (arg.find("--input") != std::string_view::npos || arg.find("-i") != std::string_view::npos) + { + // Skip the flag itself and collect all following arguments until next flag + auto it = std::find(args.begin(), args.end(), arg); + if (it != args.end()) { + ++it; + while (it != args.end() && !it->starts_with("-")) { + input_files.push_back(*it); + ++it; + } + } + } + else if (arg.find("--output") != std::string_view::npos || arg.find("-o") != std::string_view::npos) + { + // parse output file + } + } + + // TEMP: print out input files + for (const auto &file : input_files) + { + std::cout << "Input file: " << file << std::endl; + } + + return 0; +} \ No newline at end of file From afeb6f41e6693940096ff3348909afc97a562e31 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Fri, 30 Jan 2026 16:15:16 -0500 Subject: [PATCH 02/38] add needed sconscript files --- src/programs/AmplitudeAnalysis/SConscript | 2 +- .../convert_to_csv/SConscript | 20 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 src/programs/AmplitudeAnalysis/convert_to_csv/SConscript diff --git a/src/programs/AmplitudeAnalysis/SConscript b/src/programs/AmplitudeAnalysis/SConscript index 2e64c9056..d704834d0 100644 --- a/src/programs/AmplitudeAnalysis/SConscript +++ b/src/programs/AmplitudeAnalysis/SConscript @@ -3,7 +3,7 @@ import sbms Import('*') -subdirs = ['fit', 'twopi_plotter', 'twopi_plotter_amp', 'twopi_plotter_mom', 'twopi_plotter_primakoff', 'twolepton_plotter', 'twoleptonGJ_plotter', 'split_mass', 'split_t', 'threepi_plotter_schilling', 'omega_radiative_plotter', 'project_moments', 'plot_etapi_delta', 'project_moments_polarized', 'Bootstrap_plot_etapi_delta_SPDG_allamps_mass_t_bins', 'Pol_moments_viafittedPW', 'project_moments_SPD_etapi0_posepsilon', 'omegapi_plotter', 'vecps_plotter', 'plot_etapi0'] +subdirs = ['fit', 'twopi_plotter', 'twopi_plotter_amp', 'twopi_plotter_mom', 'twopi_plotter_primakoff', 'twolepton_plotter', 'twoleptonGJ_plotter', 'split_mass', 'split_t', 'threepi_plotter_schilling', 'omega_radiative_plotter', 'project_moments', 'plot_etapi_delta', 'project_moments_polarized', 'Bootstrap_plot_etapi_delta_SPDG_allamps_mass_t_bins', 'Pol_moments_viafittedPW', 'project_moments_SPD_etapi0_posepsilon', 'omegapi_plotter', 'vecps_plotter', 'plot_etapi0', 'convert_to_csv'] SConscript(dirs=subdirs, exports='env osname', duplicate=0) diff --git a/src/programs/AmplitudeAnalysis/convert_to_csv/SConscript b/src/programs/AmplitudeAnalysis/convert_to_csv/SConscript new file mode 100644 index 000000000..423109ba1 --- /dev/null +++ b/src/programs/AmplitudeAnalysis/convert_to_csv/SConscript @@ -0,0 +1,20 @@ +import os +import sbms + +# get env object and clone it +Import('*') + +# Verify AMPTOOLS environment variable is set +if os.getenv('AMPTOOLS', 'nada')!='nada': + + env = env.Clone() + + AMPTOOLS_LIBS = "AMPTOOLS_AMPS AMPTOOLS_DATAIO AMPTOOLS_MCGEN UTILITIES" + env.AppendUnique(LIBS = AMPTOOLS_LIBS.split()) + + sbms.AddHDDM(env) + sbms.AddAmpTools(env) + sbms.AddUtilities(env) + sbms.AddROOT(env) + + sbms.executable(env) \ No newline at end of file From dc30e7cc1e7154d1fdc182bae77ac48844f1b45a Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Sat, 31 Jan 2026 16:27:49 -0500 Subject: [PATCH 03/38] added cli args and helper functions --- .../convert_to_csv/convert_to_csv.cc | 285 ++++++++++++++++-- 1 file changed, 268 insertions(+), 17 deletions(-) diff --git a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc index c61012ab4..166e5e0b6 100644 --- a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc +++ b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc @@ -9,42 +9,293 @@ * but instead provide a vector of files to the individual scripts */ +#include +#include +#include +#include +#include +#include #include #include -int main(std::vector args(argv, argv + argc)) +// forward declarations +std::vector sort_files(const std::vector &files, int sort_index); +bool are_valid_fit_files(const std::vector &files); + +int main(int argc, char *argv[]) { + std::vector args(argv, argv + argc); - // parse args + // ==== PARSE ARGUMENTS ==== + // required arguments std::vector input_files; - std::string output_file; + std::string output_file = ""; + + // optional arguments + bool sorted = false; + int sort_index = -1; + bool acceptance_corrected = false; + bool create_data_file = false; + bool covariance = false; + bool correlation = false; + bool verbose = false; + bool preview = false; + std::string mass_branch = "M4Pi"; // TODO: change by building up from AmpTools 4-vectors + + auto print_help = []() + { + std::cout << "Usage: convert_to_csv [-h] [-i INPUT_FILES] [-o OUTPUT_PATH]" + << " [-s] [--sort-index INDEX] [-a] [-d] [-m MASS_BRANCH] [-p] [-v]" + << " [--corelation] [--covariance] [-d]\n" + << " -i INPUT_FILES:\t\tFull path to the .fit file(s)\n" + << " -o OUTPUT_PATH:\t\tFull path to the output .csv file\n" + << " -s, --sort:\t\t\tSort files by last number in the file name or path (default:true)\n" + << " --sort-index INDEX:\t\tWhat number in file path to sort by (default:-1, so last number in path is used)\n" + << " -a --acceptance-correct:\tWhether to acceptance correct intensities (default:true)\n" + << " -d --data-file:\t\tCreate separate data file (default:false)\n" + << " -m --mass-branch:\t\tBranch name for final mass spectrum of interest (default:M4Pi)\n" + << " -p --preview:\t\t\tPrint files to be processed, but don't run (default:false)\n" + << " -v --verbose:\t\t\tPrint information from converter scripts as they run\n" + << " --correlation:\t\tSave correlation matrix to separate csv file\n" + << " --covariance:\t\t\tSave covariance matrix to separate csv file\n" + << " -h, --help:\t\t\tShow this help message\n"; + }; + + // first check if help message is requested for (const auto &arg : args) { + if (arg == "-h" || arg == "--help") + { + print_help(); + return 0; + } + } + + // helper to distinguish flags from negative numbers + auto is_flag = [](std::string_view s) + { + return s.starts_with("-") && s.size() > 1 && !std::isdigit(static_cast(s[1])); + }; + + for (size_t i = 1; i < args.size(); ++i) + { + auto arg = args[i]; - // parse input files - if (arg.find("--input") != std::string_view::npos || arg.find("-i") != std::string_view::npos) + if (arg == "-i" || arg == "--input") + { + // collect all following arguments until next flag + while (i + 1 < args.size() && !is_flag(args[i + 1])) + { + input_files.emplace_back(args[++i]); + } + } + else if (arg == "-o" || arg == "--output") { - // Skip the flag itself and collect all following arguments until next flag - auto it = std::find(args.begin(), args.end(), arg); - if (it != args.end()) { - ++it; - while (it != args.end() && !it->starts_with("-")) { - input_files.push_back(*it); - ++it; + if (i + 1 < args.size() && !is_flag(args[i + 1])) + { + output_file = std::string(args[++i]); + } + else + { + std::cerr << "Error: -o/--output requires a value. See help message.\n"; + print_help(); + return 1; + } + } + else if (arg == "-s" || arg == "--sort") + { + sorted = true; + } + else if (arg == "--sort-index") + { + if (i + 1 < args.size() && !is_flag(args[i + 1])) + { + auto val = args[++i]; + auto [ptr, ec] = std::from_chars(val.data(), val.data() + val.size(), sort_index); + if (ec != std::errc()) + { + std::cerr << "Error: --sort-index requires a valid integer. See help message.\n"; + print_help(); + return 1; } } + else + { + std::cerr << "Error: --sort-index requires a value. See help message.\n"; + print_help(); + return 1; + } } - else if (arg.find("--output") != std::string_view::npos || arg.find("-o") != std::string_view::npos) + else if (arg == "-a" || arg == "--acceptance-correct") { - // parse output file + acceptance_corrected = true; } + else if (arg == "-d" || arg == "--data-file") + { + create_data_file = true; + } + else if (arg == "--covariance") + { + covariance = true; + } + else if (arg == "--correlation") + { + correlation = true; + } + else if (arg == "-v" || arg == "--verbose") + { + verbose = true; + } + else if (arg == "-p" || arg == "--preview") + { + preview = true; + } + else if (arg == "-m" || arg == "--mass-branch") + { + if (i + 1 < args.size() && !is_flag(args[i + 1])) + { + mass_branch = std::string(args[++i]); + } + else + { + std::cerr << "Error: -m/--mass-branch requires a value. See help message.\n"; + print_help(); + return 1; + } + } + else if (is_flag(arg)) + { + std::cerr << "Unknown argument: " << arg << "\n"; + print_help(); + return 1; + } + } + + if (input_files.empty()) + { + std::cerr << "Input files must be provided. See help message." << "\n"; + print_help(); + return 1; + } + if (output_file.empty()) + { + std::cerr << "Output file must be provided. See help message." << "\n"; + print_help(); + return 1; } - // TEMP: print out input files - for (const auto &file : input_files) + // ensure only .fit files are provided + if (!are_valid_fit_files(input_files)) { - std::cout << "Input file: " << file << std::endl; + std::cerr << "One or more input files are not valid .fit files.\n"; + return 1; } + // sort files if requested + if (sorted) + input_files = sort_files(input_files, sort_index); + + if (preview) + { + std::cout << "Preview mode enabled. Printing files and exiting\n"; + for (const auto &file : input_files) + { + std::cout << "\t" << file << "\n"; + } + return 0; + } + + // extract fit results + // TODO: use Utilities to get fit result extractor class + + if (create_data_file) + ; // TODO: execute extract_data to create data file + + if (correlation) + ; // TODO: implement saving correlation matrix + if (covariance) + ; // TODO: implement saving covariance matrix + return 0; +} + + +/** + * @brief Sort files based on a number extracted from the file path + * + * Extracts all numbers (including decimals) from each file path and sorts based on + * the number at the specified index position. + * + * @param[in] files Vector of file paths to sort + * @param[in] sort_index Index of the number to sort by (supports negative indexing). + * Default -1 means use the last number found in the path. + * @return std::vector Sorted vector of file paths + */ +std::vector sort_files(const std::vector &files, int sort_index) +{ + auto extract_number = [sort_index](const std::string &path) -> double + { + // Regex pattern to match integers and decimal numbers + std::regex number_pattern(R"(\d*\.?\d+)"); + std::vector numbers; + + // Find all numbers in the path + auto begin = std::sregex_iterator(path.begin(), path.end(), number_pattern); + auto end = std::sregex_iterator(); + + for (auto it = begin; it != end; ++it) + { + numbers.push_back(std::stod(it->str())); + } + + if (numbers.empty()) + { + return std::numeric_limits::infinity(); + } + + int index = sort_index; + + // handle negative indexing + if (index < 0) + { + index = static_cast(numbers.size()) + index; + } + + // Return the number at the specified index, or infinity if out of bounds + if (index >= 0 && index < static_cast(numbers.size())) + { + return numbers[index]; + } + + return std::numeric_limits::infinity(); + }; + + // Create a copy and sort it + std::vector sorted_files = files; + std::sort(sorted_files.begin(), sorted_files.end(), + [&extract_number](const std::string &a, const std::string &b) + { + return extract_number(a) < extract_number(b); + }); + + return sorted_files; +} + +/** + * @brief Check if all provided files are .fit files + * + * @param[in] files Vector of file paths to check + * @return true + * @return false if any file is not a .fit file or does not exist + */ +bool are_valid_fit_files(const std::vector &files) +{ + for (const auto &file : files) + { + if (std::filesystem::path(file).extension() != ".fit" || !std::filesystem::exists(file)) + { + return false; + } + } + return true; } \ No newline at end of file From b970ad7de3b803e12cae0f2b8ad0686cb0333131 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Sun, 1 Feb 2026 11:44:50 -0500 Subject: [PATCH 04/38] Modify sconscript files and rename converter dir Converter directory now reflects that any other data converters may be added in the future, not just CSV. --- src/libraries/UTILITIES/SConscript | 11 +++++++---- src/programs/AmplitudeAnalysis/SConscript | 2 +- .../{convert_to_csv => converters}/SConscript | 0 .../{convert_to_csv => converters}/convert_to_csv.cc | 9 ++++++++- 4 files changed, 16 insertions(+), 6 deletions(-) rename src/programs/AmplitudeAnalysis/{convert_to_csv => converters}/SConscript (100%) rename src/programs/AmplitudeAnalysis/{convert_to_csv => converters}/convert_to_csv.cc (97%) diff --git a/src/libraries/UTILITIES/SConscript b/src/libraries/UTILITIES/SConscript index 42c557c7b..c7c73ad06 100644 --- a/src/libraries/UTILITIES/SConscript +++ b/src/libraries/UTILITIES/SConscript @@ -5,8 +5,11 @@ import sbms # get env object and clone it Import('*') -env = env.Clone() +if os.getenv('AMPTOOLS', 'nada')!='nada': -sbms.AddROOT(env) -sbms.AddCCDB(env) -sbms.library(env) + env = env.Clone() + + sbms.AddROOT(env) + sbms.AddAmpTools(env) + sbms.AddCCDB(env) + sbms.library(env) diff --git a/src/programs/AmplitudeAnalysis/SConscript b/src/programs/AmplitudeAnalysis/SConscript index d704834d0..5c326824d 100644 --- a/src/programs/AmplitudeAnalysis/SConscript +++ b/src/programs/AmplitudeAnalysis/SConscript @@ -3,7 +3,7 @@ import sbms Import('*') -subdirs = ['fit', 'twopi_plotter', 'twopi_plotter_amp', 'twopi_plotter_mom', 'twopi_plotter_primakoff', 'twolepton_plotter', 'twoleptonGJ_plotter', 'split_mass', 'split_t', 'threepi_plotter_schilling', 'omega_radiative_plotter', 'project_moments', 'plot_etapi_delta', 'project_moments_polarized', 'Bootstrap_plot_etapi_delta_SPDG_allamps_mass_t_bins', 'Pol_moments_viafittedPW', 'project_moments_SPD_etapi0_posepsilon', 'omegapi_plotter', 'vecps_plotter', 'plot_etapi0', 'convert_to_csv'] +subdirs = ['fit', 'twopi_plotter', 'twopi_plotter_amp', 'twopi_plotter_mom', 'twopi_plotter_primakoff', 'twolepton_plotter', 'twoleptonGJ_plotter', 'split_mass', 'split_t', 'threepi_plotter_schilling', 'omega_radiative_plotter', 'project_moments', 'plot_etapi_delta', 'project_moments_polarized', 'Bootstrap_plot_etapi_delta_SPDG_allamps_mass_t_bins', 'Pol_moments_viafittedPW', 'project_moments_SPD_etapi0_posepsilon', 'omegapi_plotter', 'vecps_plotter', 'plot_etapi0', 'converters'] SConscript(dirs=subdirs, exports='env osname', duplicate=0) diff --git a/src/programs/AmplitudeAnalysis/convert_to_csv/SConscript b/src/programs/AmplitudeAnalysis/converters/SConscript similarity index 100% rename from src/programs/AmplitudeAnalysis/convert_to_csv/SConscript rename to src/programs/AmplitudeAnalysis/converters/SConscript diff --git a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc b/src/programs/AmplitudeAnalysis/converters/convert_to_csv.cc similarity index 97% rename from src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc rename to src/programs/AmplitudeAnalysis/converters/convert_to_csv.cc index 166e5e0b6..953fd70dd 100644 --- a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc +++ b/src/programs/AmplitudeAnalysis/converters/convert_to_csv.cc @@ -18,6 +18,8 @@ #include #include +#include "UTILITIES/FitConverter.h" + // forward declarations std::vector sort_files(const std::vector &files, int sort_index); bool are_valid_fit_files(const std::vector &files); @@ -205,8 +207,13 @@ int main(int argc, char *argv[]) return 0; } + // TODO: create csv file and stringstream to hold data + // extract fit results - // TODO: use Utilities to get fit result extractor class + for (const auto& file : input_files) { + FitConverter converter(file, acceptance_corrected); + break; // TODO: remove this break after testing + } if (create_data_file) ; // TODO: execute extract_data to create data file From 8b012b0bb3decf4ccf01d73f2ea1dbd3c73fbe5c Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Sun, 1 Feb 2026 12:28:17 -0500 Subject: [PATCH 05/38] revert so binary properly created --- src/programs/AmplitudeAnalysis/SConscript | 2 +- .../{converters => convert_to_csv}/SConscript | 0 .../{converters => convert_to_csv}/convert_to_csv.cc | 4 ++-- 3 files changed, 3 insertions(+), 3 deletions(-) rename src/programs/AmplitudeAnalysis/{converters => convert_to_csv}/SConscript (100%) rename src/programs/AmplitudeAnalysis/{converters => convert_to_csv}/convert_to_csv.cc (99%) diff --git a/src/programs/AmplitudeAnalysis/SConscript b/src/programs/AmplitudeAnalysis/SConscript index 5c326824d..d704834d0 100644 --- a/src/programs/AmplitudeAnalysis/SConscript +++ b/src/programs/AmplitudeAnalysis/SConscript @@ -3,7 +3,7 @@ import sbms Import('*') -subdirs = ['fit', 'twopi_plotter', 'twopi_plotter_amp', 'twopi_plotter_mom', 'twopi_plotter_primakoff', 'twolepton_plotter', 'twoleptonGJ_plotter', 'split_mass', 'split_t', 'threepi_plotter_schilling', 'omega_radiative_plotter', 'project_moments', 'plot_etapi_delta', 'project_moments_polarized', 'Bootstrap_plot_etapi_delta_SPDG_allamps_mass_t_bins', 'Pol_moments_viafittedPW', 'project_moments_SPD_etapi0_posepsilon', 'omegapi_plotter', 'vecps_plotter', 'plot_etapi0', 'converters'] +subdirs = ['fit', 'twopi_plotter', 'twopi_plotter_amp', 'twopi_plotter_mom', 'twopi_plotter_primakoff', 'twolepton_plotter', 'twoleptonGJ_plotter', 'split_mass', 'split_t', 'threepi_plotter_schilling', 'omega_radiative_plotter', 'project_moments', 'plot_etapi_delta', 'project_moments_polarized', 'Bootstrap_plot_etapi_delta_SPDG_allamps_mass_t_bins', 'Pol_moments_viafittedPW', 'project_moments_SPD_etapi0_posepsilon', 'omegapi_plotter', 'vecps_plotter', 'plot_etapi0', 'convert_to_csv'] SConscript(dirs=subdirs, exports='env osname', duplicate=0) diff --git a/src/programs/AmplitudeAnalysis/converters/SConscript b/src/programs/AmplitudeAnalysis/convert_to_csv/SConscript similarity index 100% rename from src/programs/AmplitudeAnalysis/converters/SConscript rename to src/programs/AmplitudeAnalysis/convert_to_csv/SConscript diff --git a/src/programs/AmplitudeAnalysis/converters/convert_to_csv.cc b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc similarity index 99% rename from src/programs/AmplitudeAnalysis/converters/convert_to_csv.cc rename to src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc index 953fd70dd..ca318fd21 100644 --- a/src/programs/AmplitudeAnalysis/converters/convert_to_csv.cc +++ b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc @@ -210,7 +210,8 @@ int main(int argc, char *argv[]) // TODO: create csv file and stringstream to hold data // extract fit results - for (const auto& file : input_files) { + for (const auto &file : input_files) + { FitConverter converter(file, acceptance_corrected); break; // TODO: remove this break after testing } @@ -226,7 +227,6 @@ int main(int argc, char *argv[]) return 0; } - /** * @brief Sort files based on a number extracted from the file path * From 5766b6fbbf07095945f55b336c95019a2ef481c8 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Mon, 2 Feb 2026 14:05:22 -0500 Subject: [PATCH 06/38] Basic FitConverter functionality completed --- src/libraries/UTILITIES/FitConverter.cc | 221 ++++++++++++++++++ src/libraries/UTILITIES/FitConverter.h | 179 ++++++++++++++ .../convert_to_csv/convert_to_csv.cc | 6 +- 3 files changed, 403 insertions(+), 3 deletions(-) create mode 100644 src/libraries/UTILITIES/FitConverter.cc create mode 100644 src/libraries/UTILITIES/FitConverter.h diff --git a/src/libraries/UTILITIES/FitConverter.cc b/src/libraries/UTILITIES/FitConverter.cc new file mode 100644 index 000000000..cf8027f00 --- /dev/null +++ b/src/libraries/UTILITIES/FitConverter.cc @@ -0,0 +1,221 @@ +/** + * @file FitConverter.cc + * @author Kevin Scheuer + * @brief Implementation of FitConverter class for converting AmpTools fit results to CSV + * @date 2026-02-01 + * + * TODO: + * - Add coherent sum extraction. This is amplitude name dependent, so may need custom + * AmplitudeParsers for different naming schemes. + */ + +#include + +#include "FitConverter.h" +#include "AmplitudeParser.h" +#include "IUAmpTools/ConfigurationInfo.h" +#include "IUAmpTools/FitResults.h" +#include "IUAmpTools/report.h" + +const char *FitConverter::kModule = "FitConverter"; + +FitConverter::FitConverter(const std::string &filename, const bool &acceptance_correct) : m_fit_file(filename), + m_fit_results(filename), + m_cfg_info(m_fit_results.configInfo()), + m_is_acceptance_corrected(acceptance_correct) +{ + if (!m_fit_results.valid()) + { + report(ERROR, kModule) << "FitConverter ERROR: Unable to read fit results from file " + m_fit_file << "\n"; + assert(false); + } + extract(); +} + +void FitConverter::extract() +{ + // Store the standard AmpTools fit outputs + m_standard_results["eMatrixStatus"] = m_fit_results.eMatrixStatus(); + m_standard_results["lastMinuitCommandStatus"] = m_fit_results.lastMinuitCommandStatus(); + m_standard_results["likelihood"] = m_fit_results.likelihood(); + m_standard_results["intensity"] = m_fit_results.intensity(false).first; + m_standard_results["intensity_err"] = m_fit_results.intensity(false).second; + m_standard_results["ac_intensity"] = m_fit_results.intensity(true).first; + m_standard_results["ac_intensity_err"] = m_fit_results.intensity(true).second; + for (const auto &pair : m_standard_results) + { + report(DEBUG, kModule) << "Standard result: " << pair.first << " = " << pair.second << "\n"; + } + + // Store parameters, but ignore production coefficients (typically have "::" in name) + for (const auto &par_name : m_fit_results.parNameList()) + { + if (par_name.find("::") == std::string::npos) + m_parameters[par_name] = m_fit_results.parValue(par_name); + } + for (const auto &pair : m_parameters) + { + report(DEBUG, kModule) << "Parameter: " << pair.first << " = " << pair.second << "\n"; + } + + // Build map of unique amplitude names and their constrained amplitudes + m_constrained_amps = findConstrainedAmps(); + for (const auto &pair : m_constrained_amps) + { + report(DEBUG, kModule) << "Unique amplitude: " + << pair.first + << " has constrained amplitudes:\n"; + for (const auto & : pair.second) + report(DEBUG, kModule) << "\t" << amp << "\n"; + } + + // Calculate single amplitude intensities + for (const auto &pair : m_constrained_amps) + { + const std::string &unique_amp = pair.first; + const std::vector &full_amps = pair.second; + + std::pair intensity = m_fit_results.intensity(full_amps, m_is_acceptance_corrected); + m_single_amp_intensities[unique_amp] = intensity; + + report(DEBUG, kModule) << "Single amplitude intensity for " + << unique_amp + << " = " + << intensity.first + << " +/- " + << intensity.second + << "\n"; + } + + // Extract production coefficients for each unique amplitude name + for (const auto &unique_amp : uniqueAmpNames()) + { + // any full amplitude name will do, they are all constrained so their + // production coefficients are identical + std::string full_amp_name = m_constrained_amps[unique_amp][0]; + m_production_coefficients[unique_amp] = m_fit_results.scaledProductionParameter(full_amp_name); + } + + // Calculate phase differences between all pairs of unique amplitude names that + // share the same "reaction::sum" + for (const auto &pair : m_constrained_amps) + { + const std::string &1_name = pair.first; + const std::vector &1_full_names = pair.second; + + for (const auto &2_pair : m_constrained_amps) + { + const std::string &2_name = amp2_pair.first; + if (amp1_name >= amp2_name) + continue; // avoid self-comparison and symmetric duplicates + const std::vector &2_full_names = amp2_pair.second; + + // this will ensure we only calculate one phase difference per unique amplitude pair + bool found_phase_diff = false; + + // search for shared reaction::sum between amp1 and amp2 + for (const auto &full_amp1 : amp1_full_names) + { + for (const auto &full_amp2 : amp2_full_names) + { + // check if they share reaction::sum + size_t pos1 = full_amp1.find("::", 0); + size_t pos2 = full_amp1.find("::", pos1 + 2); + std::string reaction_sum1 = full_amp1.substr(0, pos2); + + pos1 = full_amp2.find("::", 0); + pos2 = full_amp2.find("::", pos1 + 2); + std::string reaction_sum2 = full_amp2.substr(0, pos2); + + if (reaction_sum1 == reaction_sum2) + { + // they share a coherent sum, so calculate phase difference + std::pair phase_diff = m_fit_results.phaseDiff(full_amp1, full_amp2); + m_phase_differences[{full_amp1, full_amp2}] = phase_diff; + + report(DEBUG, kModule) << "Phase difference between " + << full_amp1 + << " and " + << full_amp2 + << " = " + << phase_diff.first + << " +/- " + << phase_diff.second + << "\n"; + + found_phase_diff = true; + break; // break inner loop + } + } + if (found_phase_diff) + break; // break outer loop + } + } + } +} + +std::set FitConverter::uniqueAmpNames() const +{ + std::set unique_names; + for (const auto &litude : m_fit_results.ampList()) + { + size_t last_colon = amplitude.rfind("::"); + if (last_colon != std::string::npos) + { + std::string amp_name = amplitude.substr(last_colon + 2); + unique_names.insert(amp_name); + } + } + return unique_names; +} + +std::map> FitConverter::findConstrainedAmps() const +{ + std::map> constrained_amp_map; + + for (const auto &unique_amp : uniqueAmpNames()) + { + std::vector shared_terms = m_cfg_info->termList("", "", unique_amp); + + // we assume terms with common amplitude names are all constrained to each other + // so use first term to check this + std::vector constrained_terms = shared_terms[0]->constraints(); + + // filter constrained terms to only those with the same amplitude name + for (auto it = constrained_terms.begin(); it != constrained_terms.end();) + { + if ((*it)->fullName().find(unique_amp) == std::string::npos) + it = constrained_terms.erase(it); + else + ++it; + } + + // The constrained terms does not include the self term, so add it back in for comparison + constrained_terms.push_back(shared_terms[0]); + + // Compare the two sets of terms to ensure they are the same + std::sort(shared_terms.begin(), shared_terms.end()); + std::sort(constrained_terms.begin(), constrained_terms.end()); + if (shared_terms != constrained_terms) + { + report(ERROR, kModule) << "amplitude name " + << unique_amp + << " has inconsistent constraints among terms sharing this name." + << "\n"; + report(ERROR, kModule) << "shared terms:\n"; + for (const auto &term : shared_terms) + report(ERROR, kModule) << "\t" << term->fullName() << "\n"; + report(ERROR, kModule) << "constrained terms:\n"; + for (const auto &term : constrained_terms) + report(ERROR, kModule) << "\t" << term->fullName() << "\n"; + + assert(false); + } + + // store the full names of the constrained amplitudes + for (const auto &term : constrained_terms) + constrained_amp_map[unique_amp].push_back(term->fullName()); + } + + return constrained_amp_map; +} \ No newline at end of file diff --git a/src/libraries/UTILITIES/FitConverter.h b/src/libraries/UTILITIES/FitConverter.h new file mode 100644 index 000000000..edb5fcea4 --- /dev/null +++ b/src/libraries/UTILITIES/FitConverter.h @@ -0,0 +1,179 @@ +/** + * @file FitConverter.h + * @author Kevin Scheuer + * @brief Class for converting AmpTools fit results to any format + * @date 2026-01-31 + * + * Note that this currently is built to convert to CSV, but the class can be extended + * to convert to any other format by accessing the various containers that store + * the fit results. + */ + +#ifndef FIT_CONVERTER_H +#define FIT_CONVERTER_H + +#include +#include +#include +#include +#include +#include + +#include "IUAmpTools/FitResults.h" + +/** + * @class FitConverter + * @brief Converts AmpTools FitResults to any format + * + * This class extracts fit results from a single AmpTools .fit file. It can then output + * the data in various formats, such as CSV. The extracted data includes: + * - Standard fit outputs (likelihood, event counts, status codes) + * - AmpTools parameters + * - Real/Imaginary parts of production coefficients + * - Coherent sums of amplitudes by quantum numbers + * - Phase differences + * + * Example usage in convert_to_csv.cc: + * @code + * std::ofstream csv_file(output_file); + * std::stringstream csv_data; + * bool header_written = false; + * + * for (const auto& file : input_files) { + * FitConverter converter(file, acceptance_corrected); + * + * if (!header_written) { + * csv_data << converter.getCSVHeader() << "\n"; + * header_written = true; + * } + * csv_data << converter.getCSVRow() << "\n"; + * } + * + * csv_file << csv_data.str(); + * csv_file.close(); + * @endcode + */ +class FitConverter +{ +public: + /** + * @brief Construct a FitConverter for a single fit file + * @param[in] filename Path to the .fit file + * @param[in] acceptance_correct Whether to use acceptance-corrected intensities + * @throw ERROR If the fit file cannot be read or is invalid + */ + FitConverter(const std::string &filename, const bool &acceptance_correct = false); + + /** + * @brief Extract fit data from the .fit file + */ + void extract(); + + // TODO: implement these methods + std::string getCSVHeader() const; + std::string getCSVRow() const; + + /** + * @brief return set of all unique amplitude names in a fit result + * + * A "full" amplitude is a "reaction::sum::ampName" string, so this extracts all + * unique "ampName" strings + */ + std::set uniqueAmpNames() const; + + bool isValid() const { return m_fit_results.valid(); } + + /** + * @brief Map of AmpTool standard fit outputs and their values + * + * Standard outputs include values such as: + * - eMatrixStatus + * - lastMinuitCommandStatus + * - likelihood + * - intensity + */ + const std::map &standardResults() const { return m_standard_results; } + + /** + * @brief Map of AmpTools parameters and their values + * + * Ignores production coefficients, which typically have "::" in their names + */ + const std::map ¶meters() const { return m_parameters; } + + /** + * @brief Map of unique amplitude names and their related constrained amplitudes + * + * A "full" amplitude is a "reaction::sum::ampName" string, and often amplitudes + * are constrained across reactions and sums. This map groups all such constrained + * amplitudes by their common "ampName". + * "myAmpName" -> { + * "reaction1::sum1::myAmpName", + * "reaction1::sum2::myAmpName", + * "reaction2::sum1::myAmpName", + * ...} + * Note that the self-constraint is included in the list, so even amplitudes with + * no constraints will have a vector of size one (itself). + */ + const std::map> & + constrainedAmps() const { return m_constrained_amps; } + + /** + * @brief Map of single amplitude intensities to their values and errors + * + * Contains the intensity (and error) for each unique amplitude name, + * calculated using all the constrained amplitudes for that name. + */ + const std::map> & + singleAmpIntensities() const { return m_single_amp_intensities; } + + /** + * @brief Map of unique amplitude names to their production coefficients + * + * Contains complex production coefficients for each unique amplitude name + * extracted from the fit results. + */ + const std::map> & + productionCoefficients() const { return m_production_coefficients; } + + /** + * @brief Map of full amplitude pairs to their phase differences and errors + * + * Only includes pairs of unique amplitude names that share a reaction and sum. For + * 2 amplitudes "amp1" and "amp2", we search through every combination in + * m_constrained_amps[amp1] and m_constrained_amps[amp2] to find pairs that share + * the same "reaction::sum". The key is a vector of the two full amplitude strings, + * and the value is a pair of the phase difference and its error. + */ + const std::map, std::pair> & + phaseDifferences() const { return m_phase_differences; } + + ~FitConverter() = default; + +private: + const std::string m_fit_file; ///< Path to the .fit file + const FitResults m_fit_results; ///< AmpTools FitResults object + const ConfigurationInfo *m_cfg_info; ///< ConfigurationInfo from the fit + const bool m_is_acceptance_corrected; ///< Whether to use acceptance-corrected intensities + + // see accessor methods for descriptions + std::map m_standard_results; + std::map m_parameters; + std::map> m_constrained_amps; + std::map> m_single_amp_intensities; + std::map> m_production_coefficients; + std::map, std::pair> m_phase_differences; + + /** + * @brief Finds and returns a map of unique amplitude names to their constrained full amplitudes + * + * This assumes that all terms sharing the same amplitude name are constrained to each other. + * + * @throw ERROR If the above assumption is violated + */ + std::map> findConstrainedAmps() const; + + static const char *kModule; +}; + +#endif // FIT_CONVERTER_H \ No newline at end of file diff --git a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc index ca318fd21..ffea6a168 100644 --- a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc +++ b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc @@ -4,9 +4,6 @@ * @brief * @date 2026-01-30 * - * TODO: - * this is basically replacement for python script, so it will need to loop over args - * but instead provide a vector of files to the individual scripts */ #include @@ -213,6 +210,9 @@ int main(int argc, char *argv[]) for (const auto &file : input_files) { FitConverter converter(file, acceptance_corrected); + + // TODO: write header and rows to csv, and check that header is identical + // for all files. Use AmpTools report feature to error out if not. break; // TODO: remove this break after testing } From 09c467fdb16e8387fd1d006dcbc6d2c30e6255b4 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Mon, 2 Feb 2026 15:17:07 -0500 Subject: [PATCH 07/38] Added CSV output The parameters are now saved with their errors. The verbose flag now controls the amount of output during processing. --- src/libraries/UTILITIES/FitConverter.cc | 93 +++++++++++++++++-- src/libraries/UTILITIES/FitConverter.h | 40 ++++++-- .../convert_to_csv/convert_to_csv.cc | 53 +++++++++-- 3 files changed, 165 insertions(+), 21 deletions(-) diff --git a/src/libraries/UTILITIES/FitConverter.cc b/src/libraries/UTILITIES/FitConverter.cc index cf8027f00..35dbe56eb 100644 --- a/src/libraries/UTILITIES/FitConverter.cc +++ b/src/libraries/UTILITIES/FitConverter.cc @@ -19,10 +19,10 @@ const char *FitConverter::kModule = "FitConverter"; -FitConverter::FitConverter(const std::string &filename, const bool &acceptance_correct) : m_fit_file(filename), - m_fit_results(filename), - m_cfg_info(m_fit_results.configInfo()), - m_is_acceptance_corrected(acceptance_correct) +FitConverter::FitConverter(const std::string &filename, const bool &acceptance_correct, bool mute_warning) : m_fit_file(filename), + m_fit_results(filename, mute_warning), + m_cfg_info(m_fit_results.configInfo()), + m_is_acceptance_corrected(acceptance_correct) { if (!m_fit_results.valid()) { @@ -51,11 +51,11 @@ void FitConverter::extract() for (const auto &par_name : m_fit_results.parNameList()) { if (par_name.find("::") == std::string::npos) - m_parameters[par_name] = m_fit_results.parValue(par_name); + m_parameters[par_name] = {m_fit_results.parValue(par_name), m_fit_results.parError(par_name)}; } for (const auto &pair : m_parameters) { - report(DEBUG, kModule) << "Parameter: " << pair.first << " = " << pair.second << "\n"; + report(DEBUG, kModule) << "Parameter: " << pair.first << " = " << pair.second.first << " +/- " << pair.second.second << "\n"; } // Build map of unique amplitude names and their constrained amplitudes @@ -169,6 +169,87 @@ std::set FitConverter::uniqueAmpNames() const return unique_names; } +std::string FitConverter::getCSVHeader() const +{ + std::string header = "file,"; + // standard results + for (const auto &pair : m_standard_results) + { + header += pair.first + ","; + } + // parameters + for (const auto &pair : m_parameters) + { + header += pair.first + "," + pair.first + "_err,"; + } + // production coefficients + for (const auto &pair : m_production_coefficients) + { + header += pair.first + "_re," + pair.first + "_im,"; + } + // single amplitude intensities + for (const auto &pair : m_single_amp_intensities) + { + header += pair.first + "," + pair.first + "_err,"; + } + // phase differences + for (const auto &pair : m_phase_differences) + { + std::string full_amp1 = pair.first.first; + std::string full_amp2 = pair.first.second; + + // extract unique amplitude names from full amplitude strings + size_t last_colon1 = full_amp1.rfind("::"); + size_t last_colon2 = full_amp2.rfind("::"); + std::string amp1 = (last_colon1 != std::string::npos) ? full_amp1.substr(last_colon1 + 2) : full_amp1; + std::string amp2 = (last_colon2 != std::string::npos) ? full_amp2.substr(last_colon2 + 2) : full_amp2; + + header += amp1 + "_" + amp2 + "," + amp1 + "_" + amp2 + "_err,"; + } + // remove trailing comma + if (!header.empty() && header.back() == ',') + { + header.pop_back(); + } + return header; +} + +std::string FitConverter::getCSVRow() const +{ + std::string row = m_fit_file + ","; + // standard results + for (const auto &pair : m_standard_results) + { + row += std::to_string(pair.second) + ","; + } + // parameters + for (const auto &pair : m_parameters) + { + row += std::to_string(pair.second.first) + "," + std::to_string(pair.second.second) + ","; + } + // production coefficients + for (const auto &pair : m_production_coefficients) + { + row += std::to_string(pair.second.real()) + "," + std::to_string(pair.second.imag()) + ","; + } + // single amplitude intensities + for (const auto &pair : m_single_amp_intensities) + { + row += std::to_string(pair.second.first) + "," + std::to_string(pair.second.second) + ","; + } + // phase differences + for (const auto &pair : m_phase_differences) + { + row += std::to_string(pair.second.first) + "," + std::to_string(pair.second.second) + ","; + } + // remove trailing comma + if (!row.empty() && row.back() == ',') + { + row.pop_back(); + } + return row; +} + std::map> FitConverter::findConstrainedAmps() const { std::map> constrained_amp_map; diff --git a/src/libraries/UTILITIES/FitConverter.h b/src/libraries/UTILITIES/FitConverter.h index edb5fcea4..16495c97a 100644 --- a/src/libraries/UTILITIES/FitConverter.h +++ b/src/libraries/UTILITIES/FitConverter.h @@ -60,17 +60,43 @@ class FitConverter * @brief Construct a FitConverter for a single fit file * @param[in] filename Path to the .fit file * @param[in] acceptance_correct Whether to use acceptance-corrected intensities + * @param[in] mute_warning Suppress warning about free parameters if true * @throw ERROR If the fit file cannot be read or is invalid */ - FitConverter(const std::string &filename, const bool &acceptance_correct = false); + FitConverter( + const std::string &filename, + const bool &acceptance_correct = false, + bool mute_warning = false); /** * @brief Extract fit data from the .fit file */ void extract(); - // TODO: implement these methods + /** + * @brief Get CSV header string for the fit results + * + * Constructs a comma-separated header string based on the keys of the various + * containers storing the fit results. + * + * @note It is crucial that the order of entries in the header matches the order of + * values in getCSVRow(). + * + * @return Comma-separated header string + */ std::string getCSVHeader() const; + + /** + * @brief Get CSV row string for the fit results + * + * Constructs a comma-separated row string based on the values of the various + * containers storing the fit results. + * + * @note It is crucial that the order of values in this row matches the order of + * entries in getCSVHeader(). + * + * @return Comma-separated row string + */ std::string getCSVRow() const; /** @@ -95,11 +121,11 @@ class FitConverter const std::map &standardResults() const { return m_standard_results; } /** - * @brief Map of AmpTools parameters and their values + * @brief Map of AmpTools parameters and their values and errors * * Ignores production coefficients, which typically have "::" in their names */ - const std::map ¶meters() const { return m_parameters; } + const std::map> ¶meters() const { return m_parameters; } /** * @brief Map of unique amplitude names and their related constrained amplitudes @@ -158,7 +184,7 @@ class FitConverter // see accessor methods for descriptions std::map m_standard_results; - std::map m_parameters; + std::map> m_parameters; std::map> m_constrained_amps; std::map> m_single_amp_intensities; std::map> m_production_coefficients; @@ -166,9 +192,9 @@ class FitConverter /** * @brief Finds and returns a map of unique amplitude names to their constrained full amplitudes - * + * * This assumes that all terms sharing the same amplitude name are constrained to each other. - * + * * @throw ERROR If the above assumption is violated */ std::map> findConstrainedAmps() const; diff --git a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc index ffea6a168..919dd3ba3 100644 --- a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc +++ b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc @@ -10,12 +10,17 @@ #include #include #include +#include +#include #include #include #include #include #include "UTILITIES/FitConverter.h" +#include "IUAmpTools/report.h" + +const char *kModule = "convert_to_csv"; // forward declarations std::vector sort_files(const std::vector &files, int sort_index); @@ -183,6 +188,8 @@ int main(int argc, char *argv[]) return 1; } + // ==== VALIDATE INPUTS ==== + // ensure only .fit files are provided if (!are_valid_fit_files(input_files)) { @@ -190,6 +197,12 @@ int main(int argc, char *argv[]) return 1; } + // guarantee that output file ends with .csv + if (std::filesystem::path(output_file).extension() != ".csv") + { + output_file += ".csv"; + } + // sort files if requested if (sorted) input_files = sort_files(input_files, sort_index); @@ -204,25 +217,49 @@ int main(int argc, char *argv[]) return 0; } - // TODO: create csv file and stringstream to hold data + // ==== PROCESS FILES AND WRITE TO CSV ==== + std::ofstream result_file(output_file); + std::stringstream csv_data; + bool header_written = false; + std::string first_file_header; // extract fit results for (const auto &file : input_files) { - FitConverter converter(file, acceptance_corrected); + if (verbose) + report(INFO, kModule) << "Processing file: " << file << "\n"; + FitConverter converter(file, acceptance_corrected, !verbose); - // TODO: write header and rows to csv, and check that header is identical - // for all files. Use AmpTools report feature to error out if not. - break; // TODO: remove this break after testing + if (!header_written) + { + std::string header = converter.getCSVHeader(); + csv_data << header << "\n"; + header_written = true; + first_file_header = header; + } + else + { + std::string header = converter.getCSVHeader(); + if (header != first_file_header) + { + report(ERROR, kModule) << "File " + << file + << " has a different CSV header than the first file. Aborting\n"; + return 1; + } + } + csv_data << converter.getCSVRow() << "\n"; } + result_file << csv_data.str(); + result_file.close(); if (create_data_file) - ; // TODO: execute extract_data to create data file + ; // TODO: execute extract_data to create data file. Use output_file_data.csv as output name if (correlation) - ; // TODO: implement saving correlation matrix + ; // TODO: implement saving correlation matrix. Use output_file_correlation.csv as output name if (covariance) - ; // TODO: implement saving covariance matrix + ; // TODO: implement saving covariance matrix. Use output_file_covariance.csv as output name return 0; } From b268a0d1b53491edbe5bc198d9063633d1f5a669 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Mon, 2 Feb 2026 16:15:12 -0500 Subject: [PATCH 08/38] Add cov/corr matrix conversions --- src/libraries/UTILITIES/FitConverter.cc | 90 ++++++++++++++++++- src/libraries/UTILITIES/FitConverter.h | 43 +++++++++ .../convert_to_csv/convert_to_csv.cc | 83 ++++++++++++++--- 3 files changed, 201 insertions(+), 15 deletions(-) diff --git a/src/libraries/UTILITIES/FitConverter.cc b/src/libraries/UTILITIES/FitConverter.cc index 35dbe56eb..7eee2687b 100644 --- a/src/libraries/UTILITIES/FitConverter.cc +++ b/src/libraries/UTILITIES/FitConverter.cc @@ -22,7 +22,8 @@ const char *FitConverter::kModule = "FitConverter"; FitConverter::FitConverter(const std::string &filename, const bool &acceptance_correct, bool mute_warning) : m_fit_file(filename), m_fit_results(filename, mute_warning), m_cfg_info(m_fit_results.configInfo()), - m_is_acceptance_corrected(acceptance_correct) + m_is_acceptance_corrected(acceptance_correct), + m_error_matrix(m_fit_results.errorMatrix()) { if (!m_fit_results.valid()) { @@ -250,6 +251,93 @@ std::string FitConverter::getCSVRow() const return row; } +std::string FitConverter::getCSVCovarianceMatrixHeader() const +{ + std::string header = "file,parameter"; + for (const auto &par : m_fit_results.parNameList()) + { + header += "," + par; + } + return header; +} + +std::string FitConverter::getCSVCorrelationMatrixHeader() const +{ + return getCSVCovarianceMatrixHeader(); // same header format +} + +std::string FitConverter::getCSVCovarianceMatrix() const +{ + std::string csv; + for (long unsigned int row = 0; row < m_error_matrix.size(); row++) + { + const std::string row_par = m_fit_results.parNameList()[row]; + csv += m_fit_file + "," + row_par; + + for (long unsigned int col = 0; col < m_error_matrix[row].size(); col++) + { + const std::string col_par = m_fit_results.parNameList()[col]; + + // double check that our indexing is correct + if (m_error_matrix[row][col] != m_fit_results.covariance(row_par, col_par)) + { + report(ERROR, kModule) << "Mismatch in covariance values between parameters " + << row_par << " and " << col_par << ".\n"; + assert(false); + } + + csv += "," + std::to_string(m_error_matrix[row][col]); + } + + // don't add newline to last row, to avoid extra blank line at end of file + // and to match the style of the other csv functions + if (row != m_error_matrix.size() - 1) + csv += "\n"; + } + return csv; +} + +std::string FitConverter::getCSVCorrelationMatrix() const +{ + std::string csv; + for (long unsigned int row = 0; row < m_error_matrix.size(); row++) + { + const std::string row_par = m_fit_results.parNameList()[row]; + const double row_par_error = m_fit_results.parError(row_par); + csv += m_fit_file + "," + row_par; + + for (long unsigned int col = 0; col < m_error_matrix[row].size(); col++) + { + const std::string col_par = m_fit_results.parNameList()[col]; + const double col_par_error = m_fit_results.parError(col_par); + + // double check that our indexing is correct + if (m_error_matrix[row][col] != m_fit_results.covariance(row_par, col_par)) + { + report(ERROR, kModule) << "Mismatch in covariance values between parameters " + << row_par << " and " << col_par << ".\n"; + assert(false); + } + + // avoid division by zero + if (row_par_error == 0.0 || col_par_error == 0.0) + { + csv += ",0.0"; // set correlation to 0 if error is zero + continue; + } + + double correlation = m_error_matrix[row][col] / (row_par_error * col_par_error); + csv += "," + std::to_string(correlation); + } + + // don't add newline to last row, to avoid extra blank line at end of file + // and to match the style of the other csv functions + if (row != m_error_matrix.size() - 1) + csv += "\n"; + } + return csv; +} + std::map> FitConverter::findConstrainedAmps() const { std::map> constrained_amp_map; diff --git a/src/libraries/UTILITIES/FitConverter.h b/src/libraries/UTILITIES/FitConverter.h index 16495c97a..5c58720fd 100644 --- a/src/libraries/UTILITIES/FitConverter.h +++ b/src/libraries/UTILITIES/FitConverter.h @@ -99,6 +99,36 @@ class FitConverter */ std::string getCSVRow() const; + /** + * @brief Get the CSV header string for the covariance matrix + * + * @return std::string "file, parameter, par1, par2, ..., parN" + */ + std::string getCSVCovarianceMatrixHeader() const; + + /** + * @brief Get the CSV representation of the covariance matrix + * + * Every row corresponds to a parameter, with the first two columns being the file + * and parameter name, followed by the covariance values with all other parameters. + */ + std::string getCSVCovarianceMatrix() const; + + /** + * @brief Get the CSV header string for the correlation matrix + * + * @return std::string "file, parameter, par1, par2, ..., parN" + */ + std::string getCSVCorrelationMatrixHeader() const; + + /** + * @brief Get the CSV representation of the correlation matrix + * + * Every row corresponds to a parameter, with the first two columns being the file + * and parameter name, followed by the correlation values with all other parameters. + */ + std::string getCSVCorrelationMatrix() const; + /** * @brief return set of all unique amplitude names in a fit result * @@ -174,6 +204,18 @@ class FitConverter const std::map, std::pair> & phaseDifferences() const { return m_phase_differences; } + /** + * @brief Get the covariance matrix of the fit parameters. + * + * The error matrix is a 2D vector where each entry [i][j] corresponds to + * the covariance between parameter i and parameter j. The order of the rows and + * columns matches the order of parameters in FitResults::parNameList(). Note + * that we do include production parameters in this matrix, as opposed to the + * m_parameters map which excludes them. + */ + const std::vector> & + errorMatrix() const { return m_error_matrix; } + ~FitConverter() = default; private: @@ -189,6 +231,7 @@ class FitConverter std::map> m_single_amp_intensities; std::map> m_production_coefficients; std::map, std::pair> m_phase_differences; + std::vector> m_error_matrix; /** * @brief Finds and returns a map of unique amplitude names to their constrained full amplitudes diff --git a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc index 919dd3ba3..29570f3d6 100644 --- a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc +++ b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc @@ -218,12 +218,11 @@ int main(int argc, char *argv[]) } // ==== PROCESS FILES AND WRITE TO CSV ==== - std::ofstream result_file(output_file); - std::stringstream csv_data; + std::stringstream csv_result_data, csv_cov_data, csv_corr_data; bool header_written = false; - std::string first_file_header; + std::string first_file_result_header, first_file_cov_header, first_file_corr_header; - // extract fit results + // extract fit results, and optionally the cov/corr matrices for (const auto &file : input_files) { if (verbose) @@ -233,34 +232,90 @@ int main(int argc, char *argv[]) if (!header_written) { std::string header = converter.getCSVHeader(); - csv_data << header << "\n"; + csv_result_data << header << "\n"; header_written = true; - first_file_header = header; + first_file_result_header = header; + + if (covariance) + { + std::string cov_header = converter.getCSVCovarianceMatrixHeader(); + csv_cov_data << cov_header << "\n"; + first_file_cov_header = cov_header; + } + + if (correlation) + { + std::string corr_header = converter.getCSVCorrelationMatrixHeader(); + csv_corr_data << corr_header << "\n"; + first_file_corr_header = corr_header; + } } else { std::string header = converter.getCSVHeader(); - if (header != first_file_header) + if (header != first_file_result_header) { report(ERROR, kModule) << "File " << file << " has a different CSV header than the first file. Aborting\n"; return 1; } + + if (covariance) + { + std::string cov_header = converter.getCSVCovarianceMatrixHeader(); + if (cov_header != first_file_cov_header) + { + report(ERROR, kModule) << "File " + << file + << " has a different covariance CSV header than the first file. Aborting\n"; + return 1; + } + } + if (correlation) + { + std::string corr_header = converter.getCSVCorrelationMatrixHeader(); + if (corr_header != first_file_corr_header) + { + report(ERROR, kModule) << "File " + << file + << " has a different correlation CSV header than the first file. Aborting\n"; + return 1; + } + } } - csv_data << converter.getCSVRow() << "\n"; + csv_result_data << converter.getCSVRow() << "\n"; + + if (covariance) + csv_cov_data << converter.getCSVCovarianceMatrix() << "\n"; + if (correlation) + csv_corr_data << converter.getCSVCorrelationMatrix() << "\n"; } - result_file << csv_data.str(); + std::ofstream result_file(output_file); + result_file << csv_result_data.str(); result_file.close(); + if (covariance) + { + std::filesystem::path output_path(output_file); + std::string covariance_file = (output_path.parent_path() / (output_path.stem().string() + "_covariance.csv")).string(); + std::ofstream cov_file(covariance_file); + std::cout << covariance_file << "\n"; + cov_file << csv_cov_data.str(); + cov_file.close(); + } + if (correlation) + { + std::filesystem::path output_path(output_file); + std::string correlation_file = (output_path.parent_path() / (output_path.stem().string() + "_correlation.csv")).string(); + std::ofstream corr_file(correlation_file); + corr_file << csv_corr_data.str(); + corr_file.close(); + } + if (create_data_file) ; // TODO: execute extract_data to create data file. Use output_file_data.csv as output name - if (correlation) - ; // TODO: implement saving correlation matrix. Use output_file_correlation.csv as output name - if (covariance) - ; // TODO: implement saving covariance matrix. Use output_file_covariance.csv as output name - return 0; } From 633ef96d81a99653544426c9acaadf1e5301f29e Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Mon, 2 Feb 2026 18:01:09 -0500 Subject: [PATCH 09/38] Relax assumption of constrained amp names Was requiring that amplitudes with common amp names in "reaction::sum::ampName" format be constrained to each other. Now it will save the mapping for unique amplitude groups, e.g. "ampName", "sum::ampName", or the full "reaction::sum::ampName" strings. --- src/libraries/UTILITIES/FitConverter.cc | 134 +++++++++++++++++++----- src/libraries/UTILITIES/FitConverter.h | 58 ++++++++-- 2 files changed, 156 insertions(+), 36 deletions(-) diff --git a/src/libraries/UTILITIES/FitConverter.cc b/src/libraries/UTILITIES/FitConverter.cc index 7eee2687b..3873caa29 100644 --- a/src/libraries/UTILITIES/FitConverter.cc +++ b/src/libraries/UTILITIES/FitConverter.cc @@ -70,16 +70,16 @@ void FitConverter::extract() report(DEBUG, kModule) << "\t" << amp << "\n"; } - // Calculate single amplitude intensities + // Calculate unique amplitude intensities for (const auto &pair : m_constrained_amps) { const std::string &unique_amp = pair.first; const std::vector &full_amps = pair.second; std::pair intensity = m_fit_results.intensity(full_amps, m_is_acceptance_corrected); - m_single_amp_intensities[unique_amp] = intensity; + m_unique_amp_intensities[unique_amp] = intensity; - report(DEBUG, kModule) << "Single amplitude intensity for " + report(DEBUG, kModule) << "Unique amplitude intensity for " << unique_amp << " = " << intensity.first @@ -170,6 +170,22 @@ std::set FitConverter::uniqueAmpNames() const return unique_names; } +std::set FitConverter::uniqueSumAmpNames() const +{ + std::set unique_names; + for (const auto &litude : m_fit_results.ampList()) + { + size_t first_colon = amplitude.find("::"); + size_t last_colon = amplitude.rfind("::"); + if (first_colon != std::string::npos && last_colon != std::string::npos && first_colon != last_colon) + { + std::string sum_amp_name = amplitude.substr(first_colon + 2, last_colon - first_colon - 2); + unique_names.insert(sum_amp_name); + } + } + return unique_names; +} + std::string FitConverter::getCSVHeader() const { std::string header = "file,"; @@ -188,8 +204,8 @@ std::string FitConverter::getCSVHeader() const { header += pair.first + "_re," + pair.first + "_im,"; } - // single amplitude intensities - for (const auto &pair : m_single_amp_intensities) + // unique amplitude intensities + for (const auto &pair : m_unique_amp_intensities) { header += pair.first + "," + pair.first + "_err,"; } @@ -233,8 +249,8 @@ std::string FitConverter::getCSVRow() const { row += std::to_string(pair.second.real()) + "," + std::to_string(pair.second.imag()) + ","; } - // single amplitude intensities - for (const auto &pair : m_single_amp_intensities) + // unique amplitude intensities + for (const auto &pair : m_unique_amp_intensities) { row += std::to_string(pair.second.first) + "," + std::to_string(pair.second.second) + ","; } @@ -342,12 +358,56 @@ std::map> FitConverter::findConstrainedAmp { std::map> constrained_amp_map; + if (ampNamesAreConstrained()) + { + for (const auto &unique_amp : uniqueAmpNames()) + { + // the bool assures us that all terms with this amp name are constrained to + // each other + std::vector shared_terms = m_cfg_info->termList("", "", unique_amp); + + // store the full names of the constrained amplitudes + for (const auto &term : shared_terms) + constrained_amp_map[unique_amp].push_back(term->fullName()); + } + return constrained_amp_map; + } + + if (sumAmpNamesAreConstrained()) + { + for (const auto &unique_sum_amp : uniqueSumAmpNames()) + { + std::string sum = unique_sum_amp.substr(0, unique_sum_amp.find("::")); + std::string amp = unique_sum_amp.substr(unique_sum_amp.find("::") + 2); + // the bool assures us that all terms with this sum::amp name are + // constrained to each other + std::vector shared_terms = m_cfg_info->termList("", sum, amp); + + // store the full names of the constrained amplitudes + for (const auto &term : shared_terms) + constrained_amp_map[unique_sum_amp].push_back(term->fullName()); + } + return constrained_amp_map; + } + + // if we reach here, neither assumption held, so just map full amplitude names to + // themselves + for (const auto &litude : m_fit_results.ampList()) + { + constrained_amp_map[amplitude] = {amplitude}; + } + return constrained_amp_map; +} + + +bool FitConverter::ampNamesAreConstrained() const +{ for (const auto &unique_amp : uniqueAmpNames()) { std::vector shared_terms = m_cfg_info->termList("", "", unique_amp); - // we assume terms with common amplitude names are all constrained to each other - // so use first term to check this + // we assume terms with common amplitude names are all constrained to each + // other, so use first term to check this std::vector constrained_terms = shared_terms[0]->constraints(); // filter constrained terms to only those with the same amplitude name @@ -359,7 +419,8 @@ std::map> FitConverter::findConstrainedAmp ++it; } - // The constrained terms does not include the self term, so add it back in for comparison + // The constrained terms does not include the self term, so add it back in for + // comparison constrained_terms.push_back(shared_terms[0]); // Compare the two sets of terms to ensure they are the same @@ -367,24 +428,45 @@ std::map> FitConverter::findConstrainedAmp std::sort(constrained_terms.begin(), constrained_terms.end()); if (shared_terms != constrained_terms) { - report(ERROR, kModule) << "amplitude name " - << unique_amp - << " has inconsistent constraints among terms sharing this name." - << "\n"; - report(ERROR, kModule) << "shared terms:\n"; - for (const auto &term : shared_terms) - report(ERROR, kModule) << "\t" << term->fullName() << "\n"; - report(ERROR, kModule) << "constrained terms:\n"; - for (const auto &term : constrained_terms) - report(ERROR, kModule) << "\t" << term->fullName() << "\n"; + return false; + } + } + return true; +} - assert(false); +bool FitConverter::sumAmpNamesAreConstrained() const +{ + for (const auto &unique_sum_amp : uniqueSumAmpNames()) + { + std::string sum = unique_sum_amp.substr(0, unique_sum_amp.find("::")); + std::string amp = unique_sum_amp.substr(unique_sum_amp.find("::") + 2); + + std::vector shared_terms = m_cfg_info->termList("", sum, amp); + + // we assume terms with common sum::amp names are all constrained to each + // other, so use first term to check this + std::vector constrained_terms = shared_terms[0]->constraints(); + + // filter constrained terms to only those with the same sum::amp name + for (auto it = constrained_terms.begin(); it != constrained_terms.end();) + { + if ((*it)->fullName().find(unique_sum_amp) == std::string::npos) + it = constrained_terms.erase(it); + else + ++it; } - // store the full names of the constrained amplitudes - for (const auto &term : constrained_terms) - constrained_amp_map[unique_amp].push_back(term->fullName()); - } + // The constrained terms does not include the self term, so add it back in for + // comparison + constrained_terms.push_back(shared_terms[0]); - return constrained_amp_map; + // Compare the two sets of terms to ensure they are the same + std::sort(shared_terms.begin(), shared_terms.end()); + std::sort(constrained_terms.begin(), constrained_terms.end()); + if (shared_terms != constrained_terms) + { + return false; + } + } + return true; } \ No newline at end of file diff --git a/src/libraries/UTILITIES/FitConverter.h b/src/libraries/UTILITIES/FitConverter.h index 5c58720fd..f9df5cc6d 100644 --- a/src/libraries/UTILITIES/FitConverter.h +++ b/src/libraries/UTILITIES/FitConverter.h @@ -2,9 +2,9 @@ * @file FitConverter.h * @author Kevin Scheuer * @brief Class for converting AmpTools fit results to any format - * @date 2026-01-31 + * @date 2026-02-01 * - * Note that this currently is built to convert to CSV, but the class can be extended + * @note that this currently is built to convert to CSV, but the class can be extended * to convert to any other format by accessing the various containers that store * the fit results. */ @@ -137,6 +137,16 @@ class FitConverter */ std::set uniqueAmpNames() const; + /** + * @brief return set of all unique coherent sum + amp names in a fit result + * + * A "full" amplitude is a "reaction::sum::ampName" string, so this extracts all + * unique "sum::ampName" strings + * + * @return std::set + */ + std::set uniqueSumAmpNames() const; + bool isValid() const { return m_fit_results.valid(); } /** @@ -175,13 +185,13 @@ class FitConverter constrainedAmps() const { return m_constrained_amps; } /** - * @brief Map of single amplitude intensities to their values and errors + * @brief Map of unique amplitude intensities to their values and errors * - * Contains the intensity (and error) for each unique amplitude name, - * calculated using all the constrained amplitudes for that name. + * Contains the intensity (and error) for each unique amplitude grouping, + * calculated using all the constrained amplitudes for that group. */ const std::map> & - singleAmpIntensities() const { return m_single_amp_intensities; } + uniqueAmpIntensities() const { return m_unique_amp_intensities; } /** * @brief Map of unique amplitude names to their production coefficients @@ -228,20 +238,48 @@ class FitConverter std::map m_standard_results; std::map> m_parameters; std::map> m_constrained_amps; - std::map> m_single_amp_intensities; + std::map> m_unique_amp_intensities; std::map> m_production_coefficients; std::map, std::pair> m_phase_differences; std::vector> m_error_matrix; /** - * @brief Finds and returns a map of unique amplitude names to their constrained full amplitudes + * @brief Get map of unique amplitude groups to their constrained full amplitudes * - * This assumes that all terms sharing the same amplitude name are constrained to each other. + * This attempts to identify the unique strings that can be a coupled to a set of + * constrained amplitudes. For a "full" amplitude string of the form + * "reaction::sum::ampName", it first assumes that amplitudes with the same ampName + * are constrained to each other, across reactions and sums. If not, it peels back a + * layer and assumes that amplitudes with the same "sum::ampName" are constrained to + * each other, across reactions. If that fails, it simply saves a mapping of the + * full amplitude to itself. The map keys are whats extracted as the unique + * amplitude names (ampName, sum::ampName, or full amplitude), and the values are + * vectors of the full amplitude strings that are constrained to each other. * - * @throw ERROR If the above assumption is violated + * @note the keys are whats written to the CSV, and is why it first attempts to + * extract just the ampName for simplicity. */ std::map> findConstrainedAmps() const; + + /** + * @brief Checks if amplitude names are constrained to each other + * + * A "full" amplitude is a "reaction::sum::ampName" string, so this checks if all + * amplitudes with the same "ampName" are constrained to each other across reactions + * and sums. + */ + bool ampNamesAreConstrained() const; + + /** + * @brief Checks if coherent sum + amplitude names are constrained to each other + * + * A "full" amplitude is a "reaction::sum::ampName" string, so this checks if all + * amplitudes with the same "sum::ampName" are constrained to each other across + * reactions. + */ + bool sumAmpNamesAreConstrained() const; + static const char *kModule; }; From c1981a3d3f25bd0b5f3ebd113d5ffb534443a879 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Tue, 3 Feb 2026 00:08:48 -0500 Subject: [PATCH 10/38] basic structure of root data converter done --- src/libraries/UTILITIES/RootDataConverter.cc | 407 +++++++++++++++++++ src/libraries/UTILITIES/RootDataConverter.h | 311 ++++++++++++++ 2 files changed, 718 insertions(+) create mode 100644 src/libraries/UTILITIES/RootDataConverter.cc create mode 100644 src/libraries/UTILITIES/RootDataConverter.h diff --git a/src/libraries/UTILITIES/RootDataConverter.cc b/src/libraries/UTILITIES/RootDataConverter.cc new file mode 100644 index 000000000..13e3519e7 --- /dev/null +++ b/src/libraries/UTILITIES/RootDataConverter.cc @@ -0,0 +1,407 @@ +/** + * @file RootDataConverter.cc + * @author Kevin Scheuer + * @brief Implementation of RootDataConverter class + * @date 2026-02-02 + * + */ + +#include +#include + +#include "RootDataConverter.h" +#include "IUAmpTools/report.h" + +#include "TFile.h" +#include "TKey.h" +#include "TClass.h" +#include "TTree.h" + +const char *RootDataConverter::kModule = "RootDataConverter"; + +RootDataConverter::RootDataConverter(const std::string &filename, bool mute_warnings) : m_fit_file(filename), + m_fit_results(filename, mute_warnings), + m_cfg_info(m_fit_results.configInfo()), + m_mute_warnings(mute_warnings) +{ + if (!m_fit_results.valid()) + { + report(ERROR, kModule) << "RootDataConverter ERROR: Unable to read fit results from file " + m_fit_file << "\n"; + assert(false); + } + setLowerVertexIndices({1}); // default to index 1 being the lower vertex particle + extract(); +} + +void RootDataConverter::extract() +{ + // extract data files, background files, genMC, and accMC files + std::vector data_files = findFiles("data"); + std::vector background_files = findFiles("background"); + std::vector genMC_files = findFiles("genMC"); + std::vector accMC_files = findFiles("accMC"); + + if (data_files.empty()) + { + report(ERROR, kModule) << "No data files found in fit results for file: " + << m_fit_file << "\n"; + } + if (background_files.empty() && !m_mute_warnings) + { + report(WARNING, kModule) << "No background files found in fit results for file: " + << m_fit_file + << ". Assuming weights are stored directly in the data" + << " trees.\n"; + } + if (genMC_files.empty()) + { + report(ERROR, kModule) << "No genMC files found in fit results for file: " + << m_fit_file << "\n"; + } + if (accMC_files.empty()) + { + report(ERROR, kModule) << "No accMC files found in fit results for file: " + << m_fit_file << "\n"; + } + + // get tree names for each file + std::string data_tree_name = dataTreeName(); + std::string background_tree_name = (background_files.empty()) ? "" : backgroundTreeName(); + std::string genMC_tree_name = genMCTreeName(); + std::string accMC_tree_name = accMCTreeName(); + + // get weight branch name. If weight branch is not found, will return empty string + // and we'll assume weights of 1.0 later + std::string weight_branch_name; + if (background_files.empty()) + weight_branch_name = weightBranchName("data", data_tree_name); + else + weight_branch_name = weightBranchName("background", background_tree_name); + + // get beamE branch name + std::string beamE_branch_name = beamEBranchName("data"); + + // TODO: next step is to calculate t from the beam and lower vertex particle + // 4-vectors. Look at other plotters for reference. Then calculate the upper + // vertex mass from the remaining particles. Finally, fill histograms and + // calculate bin info with a better mapping between the header and value storage. +} + +std::vector RootDataConverter::findFiles(const std::string &file_type) const +{ + std::vector files; + + // get directory of the fit file + std::filesystem::path dir = std::filesystem::path(m_fit_file).parent_path(); + + const std::vector reaction_list = m_fit_results.reactionList(); + for (std::string reaction : reaction_list) + { + std::pair> file_pair; + + if (file_type == "data") + file_pair = m_cfg_info->reaction(reaction)->data(); + else if (file_type == "background") + file_pair = m_cfg_info->reaction(reaction)->background(); + else if (file_type == "genMC") + file_pair = m_cfg_info->reaction(reaction)->genMC(); + else if (file_type == "accMC") + file_pair = m_cfg_info->reaction(reaction)->accMC(); + else + { + report(ERROR, kModule) << "Unknown file type requested: " + << file_type + << "\n"; + assert(false); + } + + std::string data_reader_name = file_pair.first; + std::vector data_reader_args = file_pair.second; + + std::filesystem::path file_path = data_reader_args[0]; + + // assumes that relative paths are relative to the fit file directory + if (!file_path.is_absolute()) + file_path = dir / file_path; + + // canonical will remove any ../ or ./ from the path, while also checking + // for existence + try + { + files.push_back(std::filesystem::canonical(file_path).string()); + } + catch (const std::filesystem::filesystem_error &e) + { + report(ERROR, kModule) << "Error accessing " + << file_type + << " file: " + << file_path << "\n" + << e.what() << "\n"; + assert(false); + } + } + return files; +} + +std::string RootDataConverter::findTreeName(const std::string &file_type) const +{ + const std::vector reaction_list = m_fit_results.reactionList(); + + // assume all reactions use the same tree name, so just get the first one + std::string reaction = reaction_list[0]; + std::pair> file_pair; + std::string file_path; + if (file_type == "data") + { + file_pair = m_cfg_info->reaction(reaction)->data(); + file_path = findFiles("data")[0]; + } + else if (file_type == "background") + { + file_pair = m_cfg_info->reaction(reaction)->background(); + file_path = findFiles("background")[0]; + } + else if (file_type == "genMC") + { + file_pair = m_cfg_info->reaction(reaction)->genMC(); + file_path = findFiles("genMC")[0]; + } + else if (file_type == "accMC") + { + file_pair = m_cfg_info->reaction(reaction)->accMC(); + file_path = findFiles("accMC")[0]; + } + else + { + report(ERROR, kModule) << "Unknown file type requested: " + << file_type + << "\n"; + assert(false); + } + + // if only one tree exists in the file, return that tree name + std::vector tree_names = treesInFile(file_path); + if (tree_names.size() == 1) + return tree_names[0]; + + // otherwise, we have to extract the tree name from the data reader arguments + // which unfortunately differ for each data reader + std::string data_reader_name = file_pair.first; + std::vector data_reader_args = file_pair.second; + + if (data_reader_name == "ROOTDataReader" && data_reader_args.size() == 2) + { + return data_reader_args[1]; + } + else if (data_reader_name == "ROOTDataReaderBootstrap" && data_reader_args.size() == 3) + { + return data_reader_args[2]; + } + else if (data_reader_name == "FSRootDataReader") + { + return data_reader_args[1]; + } + else if (data_reader_name == "FSRootDataReaderBootstrap") + { + return data_reader_args[1]; + } + else + { + if (!m_mute_warnings) + report(WARNING, kModule) << "Unknown data reader or insufficient arguments to" + << " determine tree name. Defaulting to 'kin'\n"; + return "kin"; // default tree name + } +} + +std::vector RootDataConverter::treesInFile(const std::string &filename) const +{ + std::vector tree_names; + + TFile *f = TFile::Open(filename.c_str()); + if (!f || f->IsZombie()) + { + report(ERROR, kModule) << "Error opening ROOT file: " + filename + "\n"; + assert(false); + } + + TIter nextkey(f->GetListOfKeys()); + TKey *key; + while ((key = (TKey *)nextkey())) + { + TClass *cl = gROOT->GetClass(key->GetClassName()); + if (cl->InheritsFrom("TTree")) + { + tree_names.push_back(key->GetName()); + } + } + + f->Close(); + return tree_names; +} + +std::string RootDataConverter::weightBranchName( + const std::string &file_type, + const std::string &tree_name) const +{ + // TODO: verify that this friend tree works. May have to add friend tree to each + // file to access the weight branch later + + // FSRoot-based data readers can optionally pass a friend weight branch name as an + // argument, so check explicitly for that first + const std::vector reaction_list = m_fit_results.reactionList(); + std::string reaction = reaction_list[0]; // assume all reactions use the same weight branch + const std::pair> file_pair; + std::string root_file; + if (file_type == "data") + { + file_pair = m_cfg_info->reaction(reaction)->data(); + root_file = findFiles("data")[0]; + } + else if (file_type == "background") + { + file_pair = m_cfg_info->reaction(reaction)->background(); + root_file = findFiles("background")[0]; + } + else + { + report(ERROR, kModule) << "Unknown file type requested: " + << file_type + << "\n"; + assert(false); + } + + std::string data_reader_name = file_pair.first; + std::vector data_reader_args = file_pair.second; + + if (data_reader_name.find("FSRootDataReader") != std::string::npos && data_reader_args.size() >= 6) + { + std::string friend_file_name = data_reader_args[3]; + std::string friend_tree_name = data_reader_args[4]; + std::string weight_branch_name = data_reader_args[5]; + + // to access the weight branch, we need to add the friend tree + TFile *f = TFile::Open(root_file.c_str()); + if (!f || f->IsZombie()) + { + report(ERROR, kModule) << "Error opening ROOT file: " + root_file + "\n"; + assert(false); + } + TTree *tree = f->Get(tree_name.c_str()); + if (!tree) + { + report(ERROR, kModule) << "Error: Tree '" + tree_name + "' not found in file: " + root_file + "\n"; + assert(false); + } + tree->AddFriend(friend_tree_name.c_str(), friend_file_name.c_str()); + return friend_tree_name + "." + weight_branch_name; + } + else // not FSDataReader, so no friend tree weights + { + // proceed to check for weight branches in the main tree + } + + // otherwise, simply check if "weight" or "Weight" branch exists in the tree + TFile *f = TFile::Open(root_file.c_str()); + if (!f || f->IsZombie()) + { + report(ERROR, kModule) << "Error opening ROOT file: " + root_file + "\n"; + assert(false); + } + TTree *tree = f->Get(tree_name.c_str()); + if (!tree) + { + report(ERROR, kModule) << "Error: Tree '" + tree_name + "' not found in file: " + root_file + "\n"; + assert(false); + } + // check if weight branch exists before returning it. Most analyses use "weight" or + // "Weight" branches + if (tree->GetBranch("weight")) + { + f->Close(); + return "weight"; + } + else if (tree->GetBranch("Weight")) + { + f->Close(); + return "Weight"; + } + else + { + f->Close(); + if (!m_mute_warnings) + report(WARNING, kModule) << "Error: Weight branch not found in tree '" + << tree_name + << "' in file: " + << root_file + << ". Setting weights to 1.0\n"; + return ""; // return empty string to indicate no weight branch found + } +} + +std::string beamEBranchName(const std::string &file_type) const +{ + std::string file_path; + std::string tree_name; + if (file_type == "data") + { + file_path = findFiles("data")[0]; + tree_name = dataTreeName(); + } + else if (file_type == "background") + { + file_path = findFiles("background")[0]; + tree_name = backgroundTreeName(); + } + else if (file_type == "genMC") + { + file_path = findFiles("genMC")[0]; + tree_name = genMCTreeName(); + } + else if (file_type == "accMC") + { + file_path = findFiles("accMC")[0]; + tree_name = accMCTreeName(); + } + else + { + report(ERROR, kModule) << "Unknown file type requested: " + << file_type + << "\n"; + assert(false); + } + + TFile *f = TFile::Open(file_path.c_str()); + if (!f || f->IsZombie()) + { + report(ERROR, kModule) << "Error opening ROOT file: " + file_path + "\n"; + assert(false); + } + TTree *tree = f->Get(tree_name.c_str()); + if (!tree) + { + report(ERROR, kModule) << "Error: Tree '" + tree_name + "' not found in file: " + file_path + "\n"; + assert(false); + } + + if (tree->GetBranch("E_Beam")) + { + f->Close(); + return "E_Beam"; + } + else if (tree->GetBranch("EnPB")) + { + f->Close(); + return "EnPB"; + } + else + { + f->Close(); + report(ERROR, kModule) << "Error: beamE branch not found in tree '" + << tree_name + << "' in file: " + << file_path + << "\n"; + assert(false); + } +} \ No newline at end of file diff --git a/src/libraries/UTILITIES/RootDataConverter.h b/src/libraries/UTILITIES/RootDataConverter.h new file mode 100644 index 000000000..b17423e12 --- /dev/null +++ b/src/libraries/UTILITIES/RootDataConverter.h @@ -0,0 +1,311 @@ +/** + * @file RootDataConverter.h + * @author Kevin Scheuer + * @brief Class for converting ROOT tree data associated with an AmpTools fit to any format + * @date 2026-02-02 + * + */ + +#ifndef DATA_CONVERTER_H +#define DATA_CONVERTER_H + +#include +#include +#include + +#include "TH1D.h" +#include "TCanvas.h" +#include "TFile.h" +#include "TTree.h" + +#include "IUAmpTools/FitResults.h" +#include "IUAmpTools/ConfigurationInfo.h" + +/** + * @class RootDataConverter + * @brief Converts AmpTools fit result data to any format + * + * This class extracts data information from a single AmpTools .fit file. It can then + * output the data in various formats, such as CSV. The extracted data includes: + * - Bin edges, centers, averages, and RMS values for t, beam energy, and mass + * histograms + * - Total number of events and errors for data and acceptance-corrected data + * - Efficiency calculation based on genMC and accMC event counts + * + * Example usage in convert_to_csv.cc: + * @code + * TODO: complete example + * @endcode + */ +class RootDataConverter +{ +public: + /** + * @brief Construct a RootDataConverter for a single fit file + * + * This assumes the 4-momenta indices are as follows: + * - index 0 (B): beam photon + * - index 1: single lower vertex particle, typically the recoil proton + * - indices 2 to N: final state particles from upper vertex + * + * @param[in] filename path to the .fit file + * @param[in] mute_warnings whether to mute warnings from FitResults and when + * background files are not found + */ + RootDataConverter(const std::string &filename, bool mute_warnings = false); + + /** + * @brief Construct a new Data Converter object with specified lower vertex indices + * + * The 4-momenta indices are as follows: + * - index 0 (B): beam photon + * - indices 1 to M: lower vertex particles, e.g. proton, pi+ that decay from Delta++ + * - indices M+1 to N: final state particles from upper vertex + * + * @param[in] filename path to the .fit file + * @param[in] lower_vertex_indices 4-momenta indices for lower vertex particles + * @param[in] mute_warnings whether to mute warnings from FitResults and when + * background files are not found + */ + RootDataConverter(const std::string &filename, + const std::vector &lower_vertex_indices, + bool mute_warnings = false); + + /** + * @brief Construct a new Data Converter object with specified lower vertex and isobar indices + * + * This will add new headers and values for isobar mass and efficiency calculations. + * + * The 4-momenta indices are as follows: + * - index 0 (B): beam photon + * - indices 1 to M: lower vertex particles, e.g. proton, pi+ that decay from Delta++ + * - indices M+1 to P: isobar particles, e.g. pi+ pi- from rho + * - indices P+1 to N: remaining final state particles from upper vertex + * + * Note that indices M+1 to N are still used to calculate the total mass values. + * + * TODO: implement. make sure no conflicts with lower vertex indices. Also this + * should add isobar mass and event counts to the output. + * + * @param[in] filename path to the .fit file + * @param[in] lower_vertex_indices 4-momenta indices for lower vertex particles + * @param[in] isobar_indices 4-momenta indices for isobar particles + * @param[in] mute_warnings whether to mute warnings from FitResults and when + * background files are not found + */ + RootDataConverter(const std::string &filename, + const std::vector &lower_vertex_indices, + const std::vector &isobar_indices, + bool mute_warnings = false); + + /** + * @brief Extract fit data from the .fit file + */ + void extract(); + + /** + * @brief Data files associated with the fit results + * @note It assumes one data file per reaction, and that it is the first argument + * passed to the data reader. + * @return std::vector where each element is an absolute path to a data file + */ + std::vector dataFiles() const { return findFiles("data"); }; + + /** + * @brief Background files associated with the fit results, if they exist + * @note It assumes one background file per reaction, and that it is the first + * argument passed to the data reader. + * @return std::vector where each element is an absolute path to a background file + */ + std::vector backgroundFiles() const { return findFiles("background"); }; + + /** + * @brief Generated MC files associated with the fit results + * @note It assumes one genMC file per reaction, and that it is the first argument + * passed to the data reader. + * @return std::vector where each element is an absolute path to a genMC file + */ + std::vector genMCFiles() const { return findFiles("genMC"); }; + + /** + * @brief Accepted MC files associated with the fit results + * @note It assumes one accMC file per reaction, and that it is the first argument + * passed to the data reader. + * @return std::vector where each element is an absolute path to an accMC file + */ + std::vector accMCFiles() const { return findFiles("accMC"); }; + + /** + * @brief data tree name associated with the fit results + * If one tree exists in the data files, that tree name is returned. Otherwise, + * the tree name is extracted based on the arguments given to known data readers. + * If the data reader is unknown, it will warn and default to "kin". + * @return std::string tree name + */ + std::string dataTreeName() const { return findTreeName("data"); }; + + /** + * @brief background tree name associated with the fit results + * If one tree exists in the data files, that tree name is returned. Otherwise, + * the tree name is extracted based on the arguments given to known data readers. + * If the data reader is unknown, it will warn and default to "kin". + * @return std::string tree name + */ + std::string backgroundTreeName() const { return findTreeName("background"); }; + + /** + * @brief generated MC tree name associated with the fit results + * If one tree exists in the data files, that tree name is returned. Otherwise, + * the tree name is extracted based on the arguments given to known data readers. + * If the data reader is unknown, it will warn and default to "kin". + * @return std::string tree name + */ + std::string genMCTreeName() const { return findTreeName("genMC"); }; + + /** + * @brief accepted MC tree name associated with the fit results + * If one tree exists in the data files, that tree name is returned. Otherwise, + * the tree name is extracted based on the arguments given to known data readers. + * If the data reader is unknown, it will warn and default to "kin". + * @return std::string tree name + */ + std::string accMCTreeName() const { return findTreeName("accMC"); }; + + /** + * @brief Get the name of the weight branch in the specified tree and file type + * + * Some data readers, such as FSRootDataReader, can store the event weights in a + * friend tree. This function will check for that case first, and return the + * appropriate weight branch name. If no friend tree is used, it will check for + * "weight" or "Weight" branches in the main tree. If those branches do not exist, + * it will return an empty string to indicate no weight branch found. + * + * @param[in] file_type The type of file ("data", "background") + * @param[in] tree_name The name of the tree within the file + * @return std::string The name of the weight branch, or empty string if none found + * + * @todo FSRoot friend tree case untested + */ + std::string weightBranchName( + const std::string &file_type, + const std::string &tree_name) const; + + /** + * @brief Get the name of the beam energy branch + * + * @note Currently only searches for "E_Beam" and "EnPB" branch names, as these are + * most common. + * + * @param[in] file_type The type of file ("data", "background", "genMC", "accMC") + * @return std::string beam energy branch name + */ + std::string beamEBranchName(const std::string &file_type) const; + + /** + * @brief Get 4-momenta indices for lower vertex particles + * + * An AmpTools ROOT tree contains 4-vectors for all particles in the event. The + * first index is always the beam photon, but there can be multiple particles + * associated with the lower vertex like a proton, Delta+, etc. The rest of the + * particles are assumed to be final state particles from the upper vertex. This + * function returns the indices of the lower vertex particles in the data 4-vectors. + */ + std::vector getLowerVertexIndices() const { return m_lower_vertex_indices; } + + /** + * @brief Set 4-momenta indices for lower vertex particles + * + * See getLowerVertexIndices for more details. + */ + void setLowerVertexIndices(const std::vector &indices) { m_lower_vertex_indices = indices; } + + /** + * @brief Get the headers for the CSV output + */ + std::vector getHeaders() const { return headers; } + + /** + * @brief Set the headers for the CSV output + */ + void setHeaders(const std::vector &new_headers) { headers = new_headers; } + + /** + * @brief Map of header names to their values + */ + std::map getValues() const { return m_values; } + + bool isValid() const { return m_fit_results.valid(); } + + ~RootDataConverter() = default; + +private: + const std::string m_fit_file; ///< Path to the .fit file + const FitResults m_fit_results; ///< AmpTools FitResults object + const ConfigurationInfo *m_cfg_info; ///< ConfigurationInfo from the fit + std::vector m_lower_vertex_indices; ///< Indices of lower vertex particles in the data 4-vectors + std::vector m_isobar_indices; ///< Indices of isobar particles in the data 4-vectors + bool m_mute_warnings; ///< Whether to mute warnings about missing background files + + // these are the standard headers for the CSV output + std::vector m_headers = { + // TODO: ugly, should be better way like was done in FitConverter + "file", + "t_low", + "t_high", + "t_center", + "t_avg", + "t_rms", + "e_low", + "e_high", + "e_center", + "e_avg", + "e_rms", + "m_low", + "m_high", + "m_center", + "m_avg", + "m_rms", + "events", + "events_err", + "ac_events", + "ac_events_err", + "efficiency", + }; + + std::map m_values; + + /** + * @brief Find files of a given type associated with the fit results + * @param[in] file_type data, background, genMC, or accMC + * @note It assumes one file per reaction, and that it is the first argument passed + * to the data reader. + * @return std::vector where each element is an absolute path to a file of the given type + */ + std::vector findFiles(const std::string &file_type) const; + + /** + * @brief Find the tree name associated with the fit results + * + * This function will first attempt to see if only one tree exists in the ROOT file. + * If multiple trees exist, it will extract the tree name based on the arguments + * given to known data readers in halld_sim. If the data reader is unknown, it will + * warn and default to "kin". + * + * @param[in] file_type data, background, genMC, or accMC + * + * @return std::string tree name + */ + std::string findTreeName(const std::string &file_type) const; + + /** + * @brief Get the names of all trees in a ROOT file + * + * @param filename Path to the ROOT file + * @return std::vector List of tree names in the file + */ + std::vector treesInFile(const std::string &filename) const; + + static const char *kModule; +}; + +#endif // DATA_CONVERTER_H \ No newline at end of file From 9505423b157e77cadddd2a58f595144be0cbedc8 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Sun, 8 Feb 2026 14:23:34 -0500 Subject: [PATCH 11/38] Add file validation and save files to member vars Files are accessed so many times it makes more sense to save them. File loading happens in the constructor now. Also added a background file bool for easy tracking of whether or not the background files are present. A template for getting the -t values is also added, but not yet implemented. This will also effect how the other distributions are handled. --- src/libraries/UTILITIES/RootDataConverter.cc | 173 ++++++++----------- src/libraries/UTILITIES/RootDataConverter.h | 33 ++-- 2 files changed, 92 insertions(+), 114 deletions(-) diff --git a/src/libraries/UTILITIES/RootDataConverter.cc b/src/libraries/UTILITIES/RootDataConverter.cc index 13e3519e7..9432ef83a 100644 --- a/src/libraries/UTILITIES/RootDataConverter.cc +++ b/src/libraries/UTILITIES/RootDataConverter.cc @@ -16,6 +16,7 @@ #include "TKey.h" #include "TClass.h" #include "TTree.h" +#include "TLorentzVector.h" const char *RootDataConverter::kModule = "RootDataConverter"; @@ -30,56 +31,37 @@ RootDataConverter::RootDataConverter(const std::string &filename, bool mute_warn assert(false); } setLowerVertexIndices({1}); // default to index 1 being the lower vertex particle + + // set data, background, genMC, and accMC files + m_data_files = dataFiles(); + m_background_files = backgroundFiles(); + m_genMC_files = genMCFiles(); + m_accMC_files = accMCFiles(); + + m_background_files_exist = !m_background_files.empty(); + + validateDataFiles(); + validateBackgroundFiles(); // if no bkgnd files exist, warns user that weights assumed to be in data tree + validateGenMCFiles(); + validateAccMCFiles(); + + m_data_tree_name = dataTreeName(); + m_background_tree_name = backgroundTreeName(); + m_genMC_tree_name = genMCTreeName(); + m_accMC_tree_name = accMCTreeName(); + extract(); } void RootDataConverter::extract() { - // extract data files, background files, genMC, and accMC files - std::vector data_files = findFiles("data"); - std::vector background_files = findFiles("background"); - std::vector genMC_files = findFiles("genMC"); - std::vector accMC_files = findFiles("accMC"); - - if (data_files.empty()) - { - report(ERROR, kModule) << "No data files found in fit results for file: " - << m_fit_file << "\n"; - } - if (background_files.empty() && !m_mute_warnings) - { - report(WARNING, kModule) << "No background files found in fit results for file: " - << m_fit_file - << ". Assuming weights are stored directly in the data" - << " trees.\n"; - } - if (genMC_files.empty()) - { - report(ERROR, kModule) << "No genMC files found in fit results for file: " - << m_fit_file << "\n"; - } - if (accMC_files.empty()) - { - report(ERROR, kModule) << "No accMC files found in fit results for file: " - << m_fit_file << "\n"; - } - - // get tree names for each file - std::string data_tree_name = dataTreeName(); - std::string background_tree_name = (background_files.empty()) ? "" : backgroundTreeName(); - std::string genMC_tree_name = genMCTreeName(); - std::string accMC_tree_name = accMCTreeName(); - // get weight branch name. If weight branch is not found, will return empty string // and we'll assume weights of 1.0 later std::string weight_branch_name; - if (background_files.empty()) - weight_branch_name = weightBranchName("data", data_tree_name); + if (m_background_files_exist) + weight_branch_name = weightBranchName("background", m_background_tree_name); else - weight_branch_name = weightBranchName("background", background_tree_name); - - // get beamE branch name - std::string beamE_branch_name = beamEBranchName("data"); + weight_branch_name = weightBranchName("data", m_data_tree_name); // TODO: next step is to calculate t from the beam and lower vertex particle // 4-vectors. Look at other plotters for reference. Then calculate the upper @@ -154,22 +136,22 @@ std::string RootDataConverter::findTreeName(const std::string &file_type) const if (file_type == "data") { file_pair = m_cfg_info->reaction(reaction)->data(); - file_path = findFiles("data")[0]; + file_path = m_data_files[0]; } else if (file_type == "background") { file_pair = m_cfg_info->reaction(reaction)->background(); - file_path = findFiles("background")[0]; + file_path = m_background_files[0]; } else if (file_type == "genMC") { file_pair = m_cfg_info->reaction(reaction)->genMC(); - file_path = findFiles("genMC")[0]; + file_path = m_genMC_files[0]; } else if (file_type == "accMC") { file_pair = m_cfg_info->reaction(reaction)->accMC(); - file_path = findFiles("accMC")[0]; + file_path = m_accMC_files[0]; } else { @@ -256,12 +238,12 @@ std::string RootDataConverter::weightBranchName( if (file_type == "data") { file_pair = m_cfg_info->reaction(reaction)->data(); - root_file = findFiles("data")[0]; + root_file = m_data_files[0]; } else if (file_type == "background") { file_pair = m_cfg_info->reaction(reaction)->background(); - root_file = findFiles("background")[0]; + root_file = m_background_files[0]; } else { @@ -301,7 +283,7 @@ std::string RootDataConverter::weightBranchName( // proceed to check for weight branches in the main tree } - // otherwise, simply check if "weight" or "Weight" branch exists in the tree + // simply check if "weight" or "Weight" branch exists in the tree TFile *f = TFile::Open(root_file.c_str()); if (!f || f->IsZombie()) { @@ -339,69 +321,54 @@ std::string RootDataConverter::weightBranchName( } } -std::string beamEBranchName(const std::string &file_type) const + +double RootDataConverter::calculateMandelstamT() const { - std::string file_path; - std::string tree_name; - if (file_type == "data") - { - file_path = findFiles("data")[0]; - tree_name = dataTreeName(); - } - else if (file_type == "background") - { - file_path = findFiles("background")[0]; - tree_name = backgroundTreeName(); - } - else if (file_type == "genMC") - { - file_path = findFiles("genMC")[0]; - tree_name = genMCTreeName(); - } - else if (file_type == "accMC") - { - file_path = findFiles("accMC")[0]; - tree_name = accMCTreeName(); - } - else - { - report(ERROR, kModule) << "Unknown file type requested: " - << file_type - << "\n"; - assert(false); - } + // TODO: this is the basic start, and then you can get the lower vertex particle + // 4-vector. Its simple for a proton since its just the PxP1, etc 4 vectors, but + // for baryon decays I'll need to look at other plotters to see if boosts need to + // occur first. The other hangup is that if I'm doing all this, I might as well + // make the -t histogram as well, so need to think about how to structure this. + // probably make some generic tHist, massHist, and beamHist functions that can + // do all these calculations and histogram filling. Then from there the edges, + // centers, errors, etc can be calculated. I suppose if 4-vectors are used I don't + // need to calculate bin edges from the hist, because I can just get the max/min + // values directly from the 4-vectors. + TLorentzVector beam = beamLorentzVector(tree); + TLorentzVector target(0, 0, 0, 0.938); // target proton at rest +} - TFile *f = TFile::Open(file_path.c_str()); - if (!f || f->IsZombie()) - { - report(ERROR, kModule) << "Error opening ROOT file: " + file_path + "\n"; - assert(false); - } - TTree *tree = f->Get(tree_name.c_str()); - if (!tree) - { - report(ERROR, kModule) << "Error: Tree '" + tree_name + "' not found in file: " + file_path + "\n"; - assert(false); - } - if (tree->GetBranch("E_Beam")) +TLorentzVector RootDataConverter::beamLorentzVector(TTree* tree) const +{ + double EnPB, PxPB, PyPB, PzPB; + + tree->SetBranchAddress("EnPB", &EnPB); + tree->SetBranchAddress("PxPB", &PxPB); + tree->SetBranchAddress("PyPB", &PyPB); + tree->SetBranchAddress("PzPB", &PzPB); + + return TLorentzVector(PxPB, PyPB, PzPB, EnPB); +} + +void RootDataConverter::validateFiles(const std::vector &files, + const std::string &file_type) const +{ + if (files.empty() && file_type != "background") { - f->Close(); - return "E_Beam"; + report(ERROR, kModule) << "No " + << file_type << " files found in fit results for file: " + << m_fit_file << "\n"; } - else if (tree->GetBranch("EnPB")) + else if (files.empty() && file_type == "background" && !m_mute_warnings) { - f->Close(); - return "EnPB"; + report(WARNING, kModule) << "No background files found in fit results for file: " + << m_fit_file + << ". Assuming weights are stored directly in the data" + << " trees.\n"; } else { - f->Close(); - report(ERROR, kModule) << "Error: beamE branch not found in tree '" - << tree_name - << "' in file: " - << file_path - << "\n"; - assert(false); + return; // files are valid } } \ No newline at end of file diff --git a/src/libraries/UTILITIES/RootDataConverter.h b/src/libraries/UTILITIES/RootDataConverter.h index b17423e12..140b81cac 100644 --- a/src/libraries/UTILITIES/RootDataConverter.h +++ b/src/libraries/UTILITIES/RootDataConverter.h @@ -135,6 +135,13 @@ class RootDataConverter */ std::vector accMCFiles() const { return findFiles("accMC"); }; + + void validateDataFiles() const { validateFiles(m_data_files, "data"); } + void validateBackgroundFiles() const { validateFiles(m_background_files, "background"); } + void validateGenMCFiles() const { validateFiles(m_genMC_files, "genMC"); } + void validateAccMCFiles() const { validateFiles(m_accMC_files, "accMC"); } + + /** * @brief data tree name associated with the fit results * If one tree exists in the data files, that tree name is returned. Otherwise, @@ -190,17 +197,6 @@ class RootDataConverter const std::string &file_type, const std::string &tree_name) const; - /** - * @brief Get the name of the beam energy branch - * - * @note Currently only searches for "E_Beam" and "EnPB" branch names, as these are - * most common. - * - * @param[in] file_type The type of file ("data", "background", "genMC", "accMC") - * @return std::string beam energy branch name - */ - std::string beamEBranchName(const std::string &file_type) const; - /** * @brief Get 4-momenta indices for lower vertex particles * @@ -246,6 +242,18 @@ class RootDataConverter std::vector m_isobar_indices; ///< Indices of isobar particles in the data 4-vectors bool m_mute_warnings; ///< Whether to mute warnings about missing background files + const std::vector m_data_files; + const std::vector m_background_files; + const std::vector m_genMC_files; + const std::vector m_accMC_files; + + bool m_background_files_exist; + + const std::string m_data_tree_name; + const std::string m_background_tree_name; + const std::string m_genMC_tree_name; + const std::string m_accMC_tree_name; + // these are the standard headers for the CSV output std::vector m_headers = { // TODO: ugly, should be better way like was done in FitConverter @@ -283,6 +291,9 @@ class RootDataConverter */ std::vector findFiles(const std::string &file_type) const; + void validateFiles(const std::vector &files, + const std::string &file_type) const; + /** * @brief Find the tree name associated with the fit results * From 1db3e5189911bf30f7365a7fe06e14830099d6e7 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Sun, 8 Feb 2026 14:26:07 -0500 Subject: [PATCH 12/38] minor todo edit --- src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc index 29570f3d6..da96a038c 100644 --- a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc +++ b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc @@ -44,7 +44,7 @@ int main(int argc, char *argv[]) bool correlation = false; bool verbose = false; bool preview = false; - std::string mass_branch = "M4Pi"; // TODO: change by building up from AmpTools 4-vectors + std::string mass_branch = "M4Pi"; // TODO: change by building up from AmpTools 4-vectors. Instead option for lower vertex indices, and possibly isobar indices auto print_help = []() { From 70003115d1d00b9a11272324279729c42f14d7d3 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Mon, 4 May 2026 18:10:15 -0400 Subject: [PATCH 13/38] Improved some comments and variable names --- src/libraries/UTILITIES/FitConverter.cc | 4 +- src/libraries/UTILITIES/FitConverter.h | 8 +-- .../convert_to_csv/convert_to_csv.cc | 54 +++++++++++-------- 3 files changed, 38 insertions(+), 28 deletions(-) diff --git a/src/libraries/UTILITIES/FitConverter.cc b/src/libraries/UTILITIES/FitConverter.cc index 3873caa29..def394315 100644 --- a/src/libraries/UTILITIES/FitConverter.cc +++ b/src/libraries/UTILITIES/FitConverter.cc @@ -145,11 +145,11 @@ void FitConverter::extract() << "\n"; found_phase_diff = true; - break; // break inner loop + break; // break inner amp2 loop } } if (found_phase_diff) - break; // break outer loop + break; // break outer amp1 loop } } } diff --git a/src/libraries/UTILITIES/FitConverter.h b/src/libraries/UTILITIES/FitConverter.h index f9df5cc6d..393777346 100644 --- a/src/libraries/UTILITIES/FitConverter.h +++ b/src/libraries/UTILITIES/FitConverter.h @@ -150,7 +150,7 @@ class FitConverter bool isValid() const { return m_fit_results.valid(); } /** - * @brief Map of AmpTool standard fit outputs and their values + * @brief Map of AmpTools standard fit outputs and their values * * Standard outputs include values such as: * - eMatrixStatus @@ -178,7 +178,7 @@ class FitConverter * "reaction1::sum2::myAmpName", * "reaction2::sum1::myAmpName", * ...} - * Note that the self-constraint is included in the list, so even amplitudes with + * @note that the self-constraint is included in the list, so even amplitudes with * no constraints will have a vector of size one (itself). */ const std::map> & @@ -208,8 +208,8 @@ class FitConverter * Only includes pairs of unique amplitude names that share a reaction and sum. For * 2 amplitudes "amp1" and "amp2", we search through every combination in * m_constrained_amps[amp1] and m_constrained_amps[amp2] to find pairs that share - * the same "reaction::sum". The key is a vector of the two full amplitude strings, - * and the value is a pair of the phase difference and its error. + * the same "reaction::sum". The key is a size 2 vector of the full amplitude + * strings, and the value is a pair of the phase difference and its error. */ const std::map, std::pair> & phaseDifferences() const { return m_phase_differences; } diff --git a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc index da96a038c..13e7fd803 100644 --- a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc +++ b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc @@ -36,12 +36,12 @@ int main(int argc, char *argv[]) std::string output_file = ""; // optional arguments - bool sorted = false; + bool is_sorted = false; int sort_index = -1; - bool acceptance_corrected = false; + bool is_acceptance_corrected = false; bool create_data_file = false; - bool covariance = false; - bool correlation = false; + bool create_covariance = false; + bool create_correlation = false; bool verbose = false; bool preview = false; std::string mass_branch = "M4Pi"; // TODO: change by building up from AmpTools 4-vectors. Instead option for lower vertex indices, and possibly isobar indices @@ -50,7 +50,7 @@ int main(int argc, char *argv[]) { std::cout << "Usage: convert_to_csv [-h] [-i INPUT_FILES] [-o OUTPUT_PATH]" << " [-s] [--sort-index INDEX] [-a] [-d] [-m MASS_BRANCH] [-p] [-v]" - << " [--corelation] [--covariance] [-d]\n" + << " [--correlation] [--covariance] [-d]\n" << " -i INPUT_FILES:\t\tFull path to the .fit file(s)\n" << " -o OUTPUT_PATH:\t\tFull path to the output .csv file\n" << " -s, --sort:\t\t\tSort files by last number in the file name or path (default:true)\n" @@ -75,12 +75,13 @@ int main(int argc, char *argv[]) } } - // helper to distinguish flags from negative numbers + // helper to distinguish flags from negative numbers, since both use "-" character auto is_flag = [](std::string_view s) { return s.starts_with("-") && s.size() > 1 && !std::isdigit(static_cast(s[1])); }; + // parse arguments for (size_t i = 1; i < args.size(); ++i) { auto arg = args[i]; @@ -108,7 +109,7 @@ int main(int argc, char *argv[]) } else if (arg == "-s" || arg == "--sort") { - sorted = true; + is_sorted = true; } else if (arg == "--sort-index") { @@ -132,7 +133,7 @@ int main(int argc, char *argv[]) } else if (arg == "-a" || arg == "--acceptance-correct") { - acceptance_corrected = true; + is_acceptance_corrected = true; } else if (arg == "-d" || arg == "--data-file") { @@ -140,11 +141,11 @@ int main(int argc, char *argv[]) } else if (arg == "--covariance") { - covariance = true; + create_covariance = true; } else if (arg == "--correlation") { - correlation = true; + create_correlation = true; } else if (arg == "-v" || arg == "--verbose") { @@ -175,6 +176,7 @@ int main(int argc, char *argv[]) } } + // Error checks for required arguments if (input_files.empty()) { std::cerr << "Input files must be provided. See help message." << "\n"; @@ -204,7 +206,7 @@ int main(int argc, char *argv[]) } // sort files if requested - if (sorted) + if (is_sorted) input_files = sort_files(input_files, sort_index); if (preview) @@ -227,7 +229,7 @@ int main(int argc, char *argv[]) { if (verbose) report(INFO, kModule) << "Processing file: " << file << "\n"; - FitConverter converter(file, acceptance_corrected, !verbose); + FitConverter converter(file, is_acceptance_corrected, !verbose); if (!header_written) { @@ -236,14 +238,14 @@ int main(int argc, char *argv[]) header_written = true; first_file_result_header = header; - if (covariance) + if (create_covariance) { std::string cov_header = converter.getCSVCovarianceMatrixHeader(); csv_cov_data << cov_header << "\n"; first_file_cov_header = cov_header; } - if (correlation) + if (create_correlation) { std::string corr_header = converter.getCSVCorrelationMatrixHeader(); csv_corr_data << corr_header << "\n"; @@ -251,7 +253,10 @@ int main(int argc, char *argv[]) } } else - { + { + // this block ensures all files have the same csv header format. If this is + // not the case, then the csv files will be malformed and difficult to work + // with, so we enforce that here before processing any files std::string header = converter.getCSVHeader(); if (header != first_file_result_header) { @@ -261,7 +266,7 @@ int main(int argc, char *argv[]) return 1; } - if (covariance) + if (create_covariance) { std::string cov_header = converter.getCSVCovarianceMatrixHeader(); if (cov_header != first_file_cov_header) @@ -272,7 +277,7 @@ int main(int argc, char *argv[]) return 1; } } - if (correlation) + if (create_correlation) { std::string corr_header = converter.getCSVCorrelationMatrixHeader(); if (corr_header != first_file_corr_header) @@ -286,16 +291,21 @@ int main(int argc, char *argv[]) } csv_result_data << converter.getCSVRow() << "\n"; - if (covariance) + if (create_covariance) csv_cov_data << converter.getCSVCovarianceMatrix() << "\n"; - if (correlation) + if (create_correlation) csv_corr_data << converter.getCSVCorrelationMatrix() << "\n"; } + + // write the csv data to the output file std::ofstream result_file(output_file); result_file << csv_result_data.str(); result_file.close(); - if (covariance) + // covariance and correlation matrix files are written to separate files in the same + // directory as the main output file, with suffixes _covariance.csv and + // _correlation.csv + if (create_covariance) { std::filesystem::path output_path(output_file); std::string covariance_file = (output_path.parent_path() / (output_path.stem().string() + "_covariance.csv")).string(); @@ -304,7 +314,7 @@ int main(int argc, char *argv[]) cov_file << csv_cov_data.str(); cov_file.close(); } - if (correlation) + if (create_correlation) { std::filesystem::path output_path(output_file); std::string correlation_file = (output_path.parent_path() / (output_path.stem().string() + "_correlation.csv")).string(); @@ -384,7 +394,7 @@ std::vector sort_files(const std::vector &files, int s * @brief Check if all provided files are .fit files * * @param[in] files Vector of file paths to check - * @return true + * @return true if all files have a .fit extension and exist, false otherwise * @return false if any file is not a .fit file or does not exist */ bool are_valid_fit_files(const std::vector &files) From ac209088874e303f1e5d38d23f893eb5bab404c2 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Mon, 4 May 2026 18:59:22 -0400 Subject: [PATCH 14/38] added comments --- src/libraries/UTILITIES/RootDataConverter.cc | 11 +++++------ src/libraries/UTILITIES/RootDataConverter.h | 11 +++++++++++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/libraries/UTILITIES/RootDataConverter.cc b/src/libraries/UTILITIES/RootDataConverter.cc index 9432ef83a..9d8baeb3e 100644 --- a/src/libraries/UTILITIES/RootDataConverter.cc +++ b/src/libraries/UTILITIES/RootDataConverter.cc @@ -233,7 +233,7 @@ std::string RootDataConverter::weightBranchName( // argument, so check explicitly for that first const std::vector reaction_list = m_fit_results.reactionList(); std::string reaction = reaction_list[0]; // assume all reactions use the same weight branch - const std::pair> file_pair; + const std::pair> file_pair; // data reader name and args std::string root_file; if (file_type == "data") { @@ -256,6 +256,7 @@ std::string RootDataConverter::weightBranchName( std::string data_reader_name = file_pair.first; std::vector data_reader_args = file_pair.second; + // the FSRootDataReader can have special friend trees that hold the event weights if (data_reader_name.find("FSRootDataReader") != std::string::npos && data_reader_args.size() >= 6) { std::string friend_file_name = data_reader_args[3]; @@ -278,12 +279,9 @@ std::string RootDataConverter::weightBranchName( tree->AddFriend(friend_tree_name.c_str(), friend_file_name.c_str()); return friend_tree_name + "." + weight_branch_name; } - else // not FSDataReader, so no friend tree weights - { - // proceed to check for weight branches in the main tree - } - // simply check if "weight" or "Weight" branch exists in the tree + // not FSROOTDataReader, so no friend tree weights. Proceed to check for weight + // branches in the main tree TFile *f = TFile::Open(root_file.c_str()); if (!f || f->IsZombie()) { @@ -359,6 +357,7 @@ void RootDataConverter::validateFiles(const std::vector &files, report(ERROR, kModule) << "No " << file_type << " files found in fit results for file: " << m_fit_file << "\n"; + assert(false); } else if (files.empty() && file_type == "background" && !m_mute_warnings) { diff --git a/src/libraries/UTILITIES/RootDataConverter.h b/src/libraries/UTILITIES/RootDataConverter.h index 140b81cac..1cf0063ff 100644 --- a/src/libraries/UTILITIES/RootDataConverter.h +++ b/src/libraries/UTILITIES/RootDataConverter.h @@ -291,6 +291,17 @@ class RootDataConverter */ std::vector findFiles(const std::string &file_type) const; + + /** + * @brief Confirm that files vector is non-empty, unless its background + * + * Generally will throw an error if no files are present for a given type. If + * file_type is "background", this can happen, but it warns the user that it assumes + * any event weights are thus stored directly in the data trees. + * + * @param files absolute paths of files associated with the fit results for a given type + * @param file_type data, background, genMC, or accMC + */ void validateFiles(const std::vector &files, const std::string &file_type) const; From 0a6ce6d80c0a7a40dd1a5721979ce608b77f2bff Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Mon, 4 May 2026 22:31:47 -0400 Subject: [PATCH 15/38] add some basics for rootDataConverter in csv script --- .../convert_to_csv/convert_to_csv.cc | 30 +++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc index 13e7fd803..f9d4b58da 100644 --- a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc +++ b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc @@ -18,6 +18,7 @@ #include #include "UTILITIES/FitConverter.h" +#include "UTILITIES/RootDataConverter.h" #include "IUAmpTools/report.h" const char *kModule = "convert_to_csv"; @@ -55,10 +56,10 @@ int main(int argc, char *argv[]) << " -o OUTPUT_PATH:\t\tFull path to the output .csv file\n" << " -s, --sort:\t\t\tSort files by last number in the file name or path (default:true)\n" << " --sort-index INDEX:\t\tWhat number in file path to sort by (default:-1, so last number in path is used)\n" - << " -a --acceptance-correct:\tWhether to acceptance correct intensities (default:true)\n" - << " -d --data-file:\t\tCreate separate data file (default:false)\n" + << " -a --acceptance-correct:\tAcceptance correct the intensities\n" + << " -d --data-file:\t\tCreate separate data file\n" << " -m --mass-branch:\t\tBranch name for final mass spectrum of interest (default:M4Pi)\n" - << " -p --preview:\t\t\tPrint files to be processed, but don't run (default:false)\n" + << " -p --preview:\t\t\tPrint files to be processed, but don't run\n" << " -v --verbose:\t\t\tPrint information from converter scripts as they run\n" << " --correlation:\t\tSave correlation matrix to separate csv file\n" << " --covariance:\t\t\tSave covariance matrix to separate csv file\n" @@ -220,9 +221,9 @@ int main(int argc, char *argv[]) } // ==== PROCESS FILES AND WRITE TO CSV ==== - std::stringstream csv_result_data, csv_cov_data, csv_corr_data; + std::stringstream csv_result_data, csv_cov_data, csv_corr_data, csv_root_data; bool header_written = false; - std::string first_file_result_header, first_file_cov_header, first_file_corr_header; + std::string first_file_result_header, first_file_cov_header, first_file_corr_header, first_file_data_header; // extract fit results, and optionally the cov/corr matrices for (const auto &file : input_files) @@ -230,6 +231,10 @@ int main(int argc, char *argv[]) if (verbose) report(INFO, kModule) << "Processing file: " << file << "\n"; FitConverter converter(file, is_acceptance_corrected, !verbose); + if (create_data_file) + { + RootDataConverter data_converter(file, !verbose); + } if (!header_written) { @@ -251,6 +256,11 @@ int main(int argc, char *argv[]) csv_corr_data << corr_header << "\n"; first_file_corr_header = corr_header; } + + if (create_data_file) + { + // TODO: implement data header check + } } else { @@ -288,6 +298,10 @@ int main(int argc, char *argv[]) return 1; } } + if (create_data_file) + { + // TODO: implement data header check + } } csv_result_data << converter.getCSVRow() << "\n"; @@ -295,6 +309,8 @@ int main(int argc, char *argv[]) csv_cov_data << converter.getCSVCovarianceMatrix() << "\n"; if (create_correlation) csv_corr_data << converter.getCSVCorrelationMatrix() << "\n"; + if (create_data_file) + ;//csv_root_data << data_converter.getCSVRow() << "\n"; // TODO: implement getCSVData in RootDataConverter to extract relevant data for csv output } // write the csv data to the output file @@ -310,7 +326,6 @@ int main(int argc, char *argv[]) std::filesystem::path output_path(output_file); std::string covariance_file = (output_path.parent_path() / (output_path.stem().string() + "_covariance.csv")).string(); std::ofstream cov_file(covariance_file); - std::cout << covariance_file << "\n"; cov_file << csv_cov_data.str(); cov_file.close(); } @@ -323,9 +338,6 @@ int main(int argc, char *argv[]) corr_file.close(); } - if (create_data_file) - ; // TODO: execute extract_data to create data file. Use output_file_data.csv as output name - return 0; } From 0428e9b8628904ccff41ce6a04983f393056e0c5 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Mon, 4 May 2026 23:49:09 -0400 Subject: [PATCH 16/38] removed deprecated header file --- src/libraries/UTILITIES/FitConverter.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/libraries/UTILITIES/FitConverter.cc b/src/libraries/UTILITIES/FitConverter.cc index def394315..4eb186c34 100644 --- a/src/libraries/UTILITIES/FitConverter.cc +++ b/src/libraries/UTILITIES/FitConverter.cc @@ -12,7 +12,6 @@ #include #include "FitConverter.h" -#include "AmplitudeParser.h" #include "IUAmpTools/ConfigurationInfo.h" #include "IUAmpTools/FitResults.h" #include "IUAmpTools/report.h" From 938b35ef54c9cc21020b1cdb071a8cca1b458910 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Tue, 5 May 2026 18:17:13 -0400 Subject: [PATCH 17/38] Added energy stat extraction and cleaned up class The largest addition is a function that extracts the values of interest for the beam energy, which incorporates signal and background subtraction. To help with this, a min/max finder function was added to find a common min/max value for a branch across files. A few other report lines were added, and some fixes to compile properly. --- src/libraries/UTILITIES/RootDataConverter.cc | 265 ++++++++++++++++--- src/libraries/UTILITIES/RootDataConverter.h | 85 +++--- 2 files changed, 271 insertions(+), 79 deletions(-) diff --git a/src/libraries/UTILITIES/RootDataConverter.cc b/src/libraries/UTILITIES/RootDataConverter.cc index 9d8baeb3e..5a5f06044 100644 --- a/src/libraries/UTILITIES/RootDataConverter.cc +++ b/src/libraries/UTILITIES/RootDataConverter.cc @@ -8,6 +8,8 @@ #include #include +#include +#include #include "RootDataConverter.h" #include "IUAmpTools/report.h" @@ -15,6 +17,8 @@ #include "TFile.h" #include "TKey.h" #include "TClass.h" +#include "TCollection.h" +#include "TROOT.h" #include "TTree.h" #include "TLorentzVector.h" @@ -23,8 +27,19 @@ const char *RootDataConverter::kModule = "RootDataConverter"; RootDataConverter::RootDataConverter(const std::string &filename, bool mute_warnings) : m_fit_file(filename), m_fit_results(filename, mute_warnings), m_cfg_info(m_fit_results.configInfo()), - m_mute_warnings(mute_warnings) + m_mute_warnings(mute_warnings), + m_data_files(dataFiles()), + m_background_files(backgroundFiles()), + m_genMC_files(genMCFiles()), + m_accMC_files(accMCFiles()), + m_data_tree_name(findTreeName("data")), + m_background_tree_name(findTreeName("background")), + m_genMC_tree_name(findTreeName("genMC")), + m_accMC_tree_name(findTreeName("accMC")) { + + report(DEBUG, kModule) << "Constructing RootDataConverter for file: " << m_fit_file << "\n"; + if (!m_fit_results.valid()) { report(ERROR, kModule) << "RootDataConverter ERROR: Unable to read fit results from file " + m_fit_file << "\n"; @@ -32,12 +47,6 @@ RootDataConverter::RootDataConverter(const std::string &filename, bool mute_warn } setLowerVertexIndices({1}); // default to index 1 being the lower vertex particle - // set data, background, genMC, and accMC files - m_data_files = dataFiles(); - m_background_files = backgroundFiles(); - m_genMC_files = genMCFiles(); - m_accMC_files = accMCFiles(); - m_background_files_exist = !m_background_files.empty(); validateDataFiles(); @@ -45,28 +54,35 @@ RootDataConverter::RootDataConverter(const std::string &filename, bool mute_warn validateGenMCFiles(); validateAccMCFiles(); - m_data_tree_name = dataTreeName(); - m_background_tree_name = backgroundTreeName(); - m_genMC_tree_name = genMCTreeName(); - m_accMC_tree_name = accMCTreeName(); - extract(); } void RootDataConverter::extract() { // get weight branch name. If weight branch is not found, will return empty string - // and we'll assume weights of 1.0 later + // and we'll assume weights of 1.0 throughout our stats functions std::string weight_branch_name; if (m_background_files_exist) weight_branch_name = weightBranchName("background", m_background_tree_name); else weight_branch_name = weightBranchName("data", m_data_tree_name); - // TODO: next step is to calculate t from the beam and lower vertex particle + if (weight_branch_name.empty()) + { + report(DEBUG, kModule) << "No weight branch found. Assuming weights of 1.0 for all events.\n"; + } + else + { + report(DEBUG, kModule) << "Weight branch found: " << weight_branch_name << "\n"; + } + + // // Extract beam energy statistics + extractBeamEnergyStats(weight_branch_name); + + // TODO: next step is to calculate t from the at-rest proton and lower vertex particle // 4-vectors. Look at other plotters for reference. Then calculate the upper - // vertex mass from the remaining particles. Finally, fill histograms and - // calculate bin info with a better mapping between the header and value storage. + // vertex mass from the remaining particles. Finally, get number of events, and + // calculate efficiency from MC. Then use efficiency to get ac_events. } std::vector RootDataConverter::findFiles(const std::string &file_type) const @@ -84,7 +100,7 @@ std::vector RootDataConverter::findFiles(const std::string &file_ty if (file_type == "data") file_pair = m_cfg_info->reaction(reaction)->data(); else if (file_type == "background") - file_pair = m_cfg_info->reaction(reaction)->background(); + file_pair = m_cfg_info->reaction(reaction)->bkgnd(); else if (file_type == "genMC") file_pair = m_cfg_info->reaction(reaction)->genMC(); else if (file_type == "accMC") @@ -140,7 +156,7 @@ std::string RootDataConverter::findTreeName(const std::string &file_type) const } else if (file_type == "background") { - file_pair = m_cfg_info->reaction(reaction)->background(); + file_pair = m_cfg_info->reaction(reaction)->bkgnd(); file_path = m_background_files[0]; } else if (file_type == "genMC") @@ -233,7 +249,7 @@ std::string RootDataConverter::weightBranchName( // argument, so check explicitly for that first const std::vector reaction_list = m_fit_results.reactionList(); std::string reaction = reaction_list[0]; // assume all reactions use the same weight branch - const std::pair> file_pair; // data reader name and args + std::pair> file_pair; // data reader name and args std::string root_file; if (file_type == "data") { @@ -242,7 +258,7 @@ std::string RootDataConverter::weightBranchName( } else if (file_type == "background") { - file_pair = m_cfg_info->reaction(reaction)->background(); + file_pair = m_cfg_info->reaction(reaction)->bkgnd(); root_file = m_background_files[0]; } else @@ -320,33 +336,161 @@ std::string RootDataConverter::weightBranchName( } -double RootDataConverter::calculateMandelstamT() const -{ - // TODO: this is the basic start, and then you can get the lower vertex particle - // 4-vector. Its simple for a proton since its just the PxP1, etc 4 vectors, but - // for baryon decays I'll need to look at other plotters to see if boosts need to - // occur first. The other hangup is that if I'm doing all this, I might as well - // make the -t histogram as well, so need to think about how to structure this. - // probably make some generic tHist, massHist, and beamHist functions that can - // do all these calculations and histogram filling. Then from there the edges, - // centers, errors, etc can be calculated. I suppose if 4-vectors are used I don't - // need to calculate bin edges from the hist, because I can just get the max/min - // values directly from the 4-vectors. - TLorentzVector beam = beamLorentzVector(tree); - TLorentzVector target(0, 0, 0, 0.938); // target proton at rest -} +// double RootDataConverter::calculateMandelstamT() const +// { +// // TODO: this is the basic start, and then you can get the lower vertex particle +// // 4-vector. Its simple for a proton since its just the PxP1, etc 4 vectors, but +// // for baryon decays I'll need to look at other plotters to see if boosts need to +// // occur first. + +// TLorentzVector target(0, 0, 0, 0.938); // target proton at rest +// } -TLorentzVector RootDataConverter::beamLorentzVector(TTree* tree) const +void RootDataConverter::extractBeamEnergyStats(const std::string &weight_branch_name) { - double EnPB, PxPB, PyPB, PzPB; + // we will add all the data (and possible) weighted background energy histograms + // together to get overall beam energy stats + TH1D *h_energy_total = nullptr; + + double min, max; + std::tie(min, max) = findMinMaxOfBranch(m_data_files, m_data_tree_name, "EnPB"); + + if(m_background_files_exist) + { + double bg_min, bg_max; + std::tie(bg_min, bg_max) = findMinMaxOfBranch(m_background_files, m_background_tree_name, "EnPB"); + + // set the min and max for the total histogram to encompass both data and background + min = std::min(min, bg_min); + max = std::max(max, bg_max); + } + + for (const std::string &data_file : m_data_files) + { + TFile *f = TFile::Open(data_file.c_str()); + if (!f || f->IsZombie()) + { + report(ERROR, kModule) << "Error opening data ROOT file: " << data_file << "\n"; + assert(false); + } + TTree *tree = f->Get(m_data_tree_name.c_str()); + if (!tree) + { + report(ERROR, kModule) << "Error retrieving tree " << m_data_tree_name << " from file " << data_file << "\n"; + f->Close(); + assert(false); + } + + // Set up branch addresses + double energy; + tree->SetBranchAddress("EnPB", &energy); + + // draw into temporary histogram + const int n_bins = 200; + TH1D *h_energy = new TH1D("h_energy", "", n_bins, min, max); + + // If no background files exist, we assume weights are stored in the data tree + if (!m_background_files_exist && !weight_branch_name.empty() && tree->GetBranch(weight_branch_name.c_str())) + { + double weight; + tree->SetBranchAddress(weight_branch_name.c_str(), &weight); + tree->Draw("EnPB>>h_energy", weight_branch_name.c_str(), "goff"); + } + else if (m_background_files_exist) + { + tree->Draw("EnPB>>h_energy", "1.0", "goff"); // if bkgnd files exist, we will subtract them later, so just use weight of 1.0 here + } + else + { + tree->Draw("EnPB>>h_energy", "1.0", "goff"); + if (!m_mute_warnings) + report(WARNING, kModule) << "No weight branch found in tree '" + << m_data_tree_name + << "' in file: " + << data_file + << "\n" + << "Assuming weights of 1.0 for all events.\n"; + } + + // if this is first hist, clone it to the total hist. Otherwise, add to the total hist + if (!h_energy_total) + { + h_energy_total = (TH1D *)h_energy->Clone("h_energy_total"); + h_energy_total->SetDirectory(0); // detach from file + } + else + { + h_energy_total->Add(h_energy); + } + + delete h_energy; // clean up temporary histogram + f->Close(); + } + + // if background files exist, we'll subtract the background energy histograms from + // the total energy histogram to get the data-only energy distribution, which we + // will use for our stats. + for (const std::string &bg_file : m_background_files) + { + TFile *f = TFile::Open(bg_file.c_str()); + if (!f || f->IsZombie()) + { + report(ERROR, kModule) << "Error opening background ROOT file: " << bg_file << "\n"; + assert(false); + } + TTree *tree = f->Get(m_background_tree_name.c_str()); + if (!tree) + { + report(ERROR, kModule) << "Error retrieving tree " << m_background_tree_name << " from file " << bg_file << "\n"; + f->Close(); + assert(false); + } + + // Set up branch addresses + double energy; + double weight = 1.0; // default weight is 1.0 if no weight branch exists + + tree->SetBranchAddress("EnPB", &energy); + + // If weight branch exists, set its address + if (!weight_branch_name.empty() && tree->GetBranch(weight_branch_name.c_str())) + { + tree->SetBranchAddress(weight_branch_name.c_str(), &weight); + } + + // draw into temporary histogram + const int n_bins = 200; + TH1D *h_energy = new TH1D("h_energy", "", n_bins, min, max); + tree->Draw("EnPB>>h_energy", weight_branch_name.c_str(), "goff"); + + // subtract background histogram from total histogram + if (h_energy_total) + h_energy_total->Add(h_energy, -1.0); - tree->SetBranchAddress("EnPB", &EnPB); - tree->SetBranchAddress("PxPB", &PxPB); - tree->SetBranchAddress("PyPB", &PyPB); - tree->SetBranchAddress("PzPB", &PzPB); + delete h_energy; // clean up temporary histogram + f->Close(); + } - return TLorentzVector(PxPB, PyPB, PzPB, EnPB); + const double center_energy = 0.5 * (min + max); + const double mean_energy = h_energy_total->GetMean(); + const double rms_energy = h_energy_total->GetRMS(); + + // Store values in the map + m_values["e_low"] = min; + m_values["e_high"] = max; + m_values["e_mid"] = center_energy; + m_values["e_avg"] = mean_energy; + m_values["e_rms"] = rms_energy; + + delete h_energy_total; // Clean up total histogram + + report(DEBUG, kModule) << "Beam energy statistics:\n" + << " Min: " << min << "\n" + << " Max: " << max << "\n" + << " Center: " << center_energy << "\n" + << " Mean: " << mean_energy << "\n" + << " RMS: " << rms_energy << "\n"; } void RootDataConverter::validateFiles(const std::vector &files, @@ -370,4 +514,41 @@ void RootDataConverter::validateFiles(const std::vector &files, { return; // files are valid } +} + +std::pair RootDataConverter::findMinMaxOfBranch(const std::vector &files, + const std::string &tree_name, + const std::string &branch_name) const +{ + double global_min = std::numeric_limits::max(); + double global_max = std::numeric_limits::lowest(); + + for (const std::string &file : files) + { + TFile *f = TFile::Open(file.c_str()); + if (!f || f->IsZombie()) + { + report(ERROR, kModule) << "Error opening ROOT file: " << file << "\n"; + assert(false); + } + TTree *tree = f->Get(tree_name.c_str()); + if (!tree) + { + report(ERROR, kModule) << "Error retrieving tree " << tree_name << " from file " << file << "\n"; + f->Close(); + assert(false); + } + + double local_min = tree->GetMinimum(branch_name.c_str()); + double local_max = tree->GetMaximum(branch_name.c_str()); + + if (local_min < global_min) + global_min = local_min; + if (local_max > global_max) + global_max = local_max; + + f->Close(); + } + + return {global_min, global_max}; } \ No newline at end of file diff --git a/src/libraries/UTILITIES/RootDataConverter.h b/src/libraries/UTILITIES/RootDataConverter.h index 1cf0063ff..38f61f9cc 100644 --- a/src/libraries/UTILITIES/RootDataConverter.h +++ b/src/libraries/UTILITIES/RootDataConverter.h @@ -215,16 +215,6 @@ class RootDataConverter */ void setLowerVertexIndices(const std::vector &indices) { m_lower_vertex_indices = indices; } - /** - * @brief Get the headers for the CSV output - */ - std::vector getHeaders() const { return headers; } - - /** - * @brief Set the headers for the CSV output - */ - void setHeaders(const std::vector &new_headers) { headers = new_headers; } - /** * @brief Map of header names to their values */ @@ -254,33 +244,31 @@ class RootDataConverter const std::string m_genMC_tree_name; const std::string m_accMC_tree_name; - // these are the standard headers for the CSV output - std::vector m_headers = { - // TODO: ugly, should be better way like was done in FitConverter - "file", - "t_low", - "t_high", - "t_center", - "t_avg", - "t_rms", - "e_low", - "e_high", - "e_center", - "e_avg", - "e_rms", - "m_low", - "m_high", - "m_center", - "m_avg", - "m_rms", - "events", - "events_err", - "ac_events", - "ac_events_err", - "efficiency", - }; - - std::map m_values; + // TODO: variables we'll want to implement + // "file", + // "t_low", + // "t_high", + // "t_center", + // "t_avg", + // "t_rms", + // "e_low", DONE + // "e_high", DONE + // "e_center", DONE + // "e_avg", DONE + // "e_rms", DONE + // "m_low", + // "m_high", + // "m_center", + // "m_avg", + // "m_rms", + // "events", + // "events_err", + // "ac_events", + // "ac_events_err", + // "efficiency", + + + std::map m_values; //< Map of headers to their values for the CSV output (except for the "file" header) /** * @brief Find files of a given type associated with the fit results @@ -327,6 +315,29 @@ class RootDataConverter */ std::vector treesInFile(const std::string &filename) const; + /** + * @brief Extract beam energy statistics from the data tree + * + * Calculates min, max, mean, and RMS values for the "EnPB" branch from the data + * file. Stores results in m_values with keys: "e_low", "e_high", "e_center", + * "e_avg", "e_rms" + * + * @param[in] weight_branch_name Name of the weight branch (if empty, weights assumed to be 1.0) + */ + void extractBeamEnergyStats(const std::string &weight_branch_name); + + /** + * @brief Get the max/min values of a branch for a set of files + * + * @param files set of ROOT files containing a common tree and branch name + * @param tree_name ROOT tree containing the branch of interest + * @param branch_name branch name we want to find the max/min values of + * @return std::pair minimum and maximum values of the branch across all files + */ + std::pair findMinMaxOfBranch(const std::vector &files, + const std::string &tree_name, + const std::string &branch_name) const; + static const char *kModule; }; From 5cbccc08d1a9c1736adfff4b8dbba8404050f3ee Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Tue, 5 May 2026 18:35:53 -0400 Subject: [PATCH 18/38] added todo --- src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc index f9d4b58da..5f4bc59cc 100644 --- a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc +++ b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc @@ -4,6 +4,8 @@ * @brief * @date 2026-01-30 * + * @todo: complete docstring. Also make a function in the fit converter to write out + * the normalized integral matrix to a csv file, and use that like the corr/cov ones. */ #include From 91f04a00ff4be7df0d320a7e98c104e172d0ec5b Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Tue, 5 May 2026 18:38:36 -0400 Subject: [PATCH 19/38] added some todos and cleaned up comments --- src/libraries/UTILITIES/RootDataConverter.h | 30 +++++++++++++-------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/libraries/UTILITIES/RootDataConverter.h b/src/libraries/UTILITIES/RootDataConverter.h index 38f61f9cc..9e0528c24 100644 --- a/src/libraries/UTILITIES/RootDataConverter.h +++ b/src/libraries/UTILITIES/RootDataConverter.h @@ -61,6 +61,8 @@ class RootDataConverter * - index 0 (B): beam photon * - indices 1 to M: lower vertex particles, e.g. proton, pi+ that decay from Delta++ * - indices M+1 to N: final state particles from upper vertex + * + * TODO: implement * * @param[in] filename path to the .fit file * @param[in] lower_vertex_indices 4-momenta indices for lower vertex particles @@ -135,7 +137,7 @@ class RootDataConverter */ std::vector accMCFiles() const { return findFiles("accMC"); }; - + // TODO: write these docstrings void validateDataFiles() const { validateFiles(m_data_files, "data"); } void validateBackgroundFiles() const { validateFiles(m_background_files, "background"); } void validateGenMCFiles() const { validateFiles(m_genMC_files, "genMC"); } @@ -198,22 +200,28 @@ class RootDataConverter const std::string &tree_name) const; /** - * @brief Get 4-momenta indices for lower vertex particles + * @brief Set 4-momenta indices for lower vertex particles * - * An AmpTools ROOT tree contains 4-vectors for all particles in the event. The - * first index is always the beam photon, but there can be multiple particles - * associated with the lower vertex like a proton, Delta+, etc. The rest of the - * particles are assumed to be final state particles from the upper vertex. This - * function returns the indices of the lower vertex particles in the data 4-vectors. + * An AmpTools ROOT tree contains 4-vectors for all particles in the event, labelled + * "EnX", "PxX", "PyX", "PzX" where X is the particle index. The "B" index is always + * the beam photon, but the other indices are not guaranteed to be in any particular + * order. The simplest case is a recoil proton at the lower vertex, which would just + * be index 1. However, there can be multiple particles associated with the lower + * vertex like a Delta++ -> p + pi+ decay. + * + * This function allows the user to specify which indices correspond to the lower + * vertex system. The remaining indices are assumed to be final state particles from + * the upper vertex. This is important for correctly calculating the 4-momentum + * transfer t and the upper vertex mass. */ - std::vector getLowerVertexIndices() const { return m_lower_vertex_indices; } + void setLowerVertexIndices(const std::vector &indices) { m_lower_vertex_indices = indices; } /** - * @brief Set 4-momenta indices for lower vertex particles + * @brief Get 4-momenta indices for lower vertex particles * - * See getLowerVertexIndices for more details. + * See setLowerVertexIndices for more details. */ - void setLowerVertexIndices(const std::vector &indices) { m_lower_vertex_indices = indices; } + std::vector getLowerVertexIndices() const { return m_lower_vertex_indices; } /** * @brief Map of header names to their values From 35c113261dd39d8b58050d30c2409e1fc7ed2f49 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Tue, 5 May 2026 23:19:51 -0400 Subject: [PATCH 20/38] Added a method to calculate mandelstam t Uses a RDataFrame method to compute t from the various 4-vector component branches, then fills a histogram with the t values. If background files are present, also computes a background histogram and subtracts it from the data histogram before calculating statistics. Aside from this, small reports and comments were added. --- src/libraries/UTILITIES/RootDataConverter.cc | 227 +++++++++++++++++-- src/libraries/UTILITIES/RootDataConverter.h | 19 +- 2 files changed, 222 insertions(+), 24 deletions(-) diff --git a/src/libraries/UTILITIES/RootDataConverter.cc b/src/libraries/UTILITIES/RootDataConverter.cc index 5a5f06044..bd7ed25ad 100644 --- a/src/libraries/UTILITIES/RootDataConverter.cc +++ b/src/libraries/UTILITIES/RootDataConverter.cc @@ -21,6 +21,7 @@ #include "TROOT.h" #include "TTree.h" #include "TLorentzVector.h" +#include "ROOT/RDataFrame.hxx" const char *RootDataConverter::kModule = "RootDataConverter"; @@ -45,8 +46,20 @@ RootDataConverter::RootDataConverter(const std::string &filename, bool mute_warn report(ERROR, kModule) << "RootDataConverter ERROR: Unable to read fit results from file " + m_fit_file << "\n"; assert(false); } + setLowerVertexIndices({1}); // default to index 1 being the lower vertex particle + // print debug info detailing which particles are labeled as upper or lower vertex + // note that index 0 is always the beam photon + const std::vector particle_list = m_cfg_info->reaction(m_fit_results.reactionList()[0])->particleList(); + report(DEBUG, kModule) << "The following particles have been labeled as lower vertex particles by default (index 1):\n"; + report(DEBUG, kModule) << "i = 1 : " << particle_list[1] << "\n"; + report(DEBUG, kModule) << "The following particles are thus labeled as upper vertex particles by default (indices 2 to N):\n"; + for (size_t i = 2; i < particle_list.size(); i++) + { + report(DEBUG, kModule) << "i = " << i << " : " << particle_list[i] << "\n"; + } + m_background_files_exist = !m_background_files.empty(); validateDataFiles(); @@ -54,9 +67,21 @@ RootDataConverter::RootDataConverter(const std::string &filename, bool mute_warn validateGenMCFiles(); validateAccMCFiles(); + // fill the m_values map with all the relevant information from the fit results and + // associated files that we want to save to csv later. This includes: + // - Bin edges, centers, averages, and RMS values + // - total number of detected and acceptance-corrected events + // - efficiency based on genMC and accMC event counts extract(); } +// TODO: this block should be used for the constructor that specifies custom lv indices +// for (size_t i = 0; i < particle_list.size(); i++) +// { +// if (std::find(getLowerVertexIndices().begin(), getLowerVertexIndices().end(), i) != getLowerVertexIndices().end()) +// report(DEBUG, kModule) << "i = " << i << " : " << particle_list[i] << "\n"; +// } + void RootDataConverter::extract() { // get weight branch name. If weight branch is not found, will return empty string @@ -76,8 +101,10 @@ void RootDataConverter::extract() report(DEBUG, kModule) << "Weight branch found: " << weight_branch_name << "\n"; } - // // Extract beam energy statistics + // Extract min, max, mean, and RMS of the beam energy, -t, and mass of the upper + // vertex system. Background subtraction and proper weighting are all included. extractBeamEnergyStats(weight_branch_name); + extractFourMomentumTransferStats(weight_branch_name); // TODO: next step is to calculate t from the at-rest proton and lower vertex particle // 4-vectors. Look at other plotters for reference. Then calculate the upper @@ -336,17 +363,6 @@ std::string RootDataConverter::weightBranchName( } -// double RootDataConverter::calculateMandelstamT() const -// { -// // TODO: this is the basic start, and then you can get the lower vertex particle -// // 4-vector. Its simple for a proton since its just the PxP1, etc 4 vectors, but -// // for baryon decays I'll need to look at other plotters to see if boosts need to -// // occur first. - -// TLorentzVector target(0, 0, 0, 0.938); // target proton at rest -// } - - void RootDataConverter::extractBeamEnergyStats(const std::string &weight_branch_name) { // we will add all the data (and possible) weighted background energy histograms @@ -366,6 +382,7 @@ void RootDataConverter::extractBeamEnergyStats(const std::string &weight_branch_ max = std::max(max, bg_max); } + // ==== DATA ENERGY HISTOGRAMS ==== for (const std::string &data_file : m_data_files) { TFile *f = TFile::Open(data_file.c_str()); @@ -428,9 +445,9 @@ void RootDataConverter::extractBeamEnergyStats(const std::string &weight_branch_ f->Close(); } - // if background files exist, we'll subtract the background energy histograms from - // the total energy histogram to get the data-only energy distribution, which we - // will use for our stats. + // ==== BACKGROUND ENERGY HISTOGRAMS ==== + // if background files exist, we'll subtract the weighted background energy + // histograms from the total energy histogram, which we will use for our stats. for (const std::string &bg_file : m_background_files) { TFile *f = TFile::Open(bg_file.c_str()); @@ -468,7 +485,7 @@ void RootDataConverter::extractBeamEnergyStats(const std::string &weight_branch_ if (h_energy_total) h_energy_total->Add(h_energy, -1.0); - delete h_energy; // clean up temporary histogram + delete h_energy; f->Close(); } @@ -483,16 +500,180 @@ void RootDataConverter::extractBeamEnergyStats(const std::string &weight_branch_ m_values["e_avg"] = mean_energy; m_values["e_rms"] = rms_energy; - delete h_energy_total; // Clean up total histogram + delete h_energy_total; + + report(DEBUG, kModule) << "Beam energy statistics:\n"; + report(DEBUG, kModule) << " Min: " << min << "\n"; + report(DEBUG, kModule) << " Max: " << max << "\n"; + report(DEBUG, kModule) << " Center: " << center_energy << "\n"; + report(DEBUG, kModule) << " Mean: " << mean_energy << "\n"; + report(DEBUG, kModule) << " RMS: " << rms_energy << "\n"; +} + +void RootDataConverter::extractFourMomentumTransferStats(const std::string &weight_branch_name) +{ + // target proton mass (GeV) + const double m_proton = 0.938; + + if (m_lower_vertex_indices.empty()) { + report(ERROR, kModule) << "No lower vertex indices set; cannot compute -t\n"; + assert(false); + } + + // Prepare branch expression strings like "PxP1 + PxP2 + ..." + std::string recoil_px_expr; + std::string recoil_py_expr; + std::string recoil_pz_expr; + std::string recoil_e_expr; + for (size_t i = 0; i < m_lower_vertex_indices.size(); ++i) { + int idx = m_lower_vertex_indices[i]; + std::string sep = (i == 0) ? "" : " + "; + recoil_px_expr += sep + "PxP" + std::to_string(idx); + recoil_py_expr += sep + "PyP" + std::to_string(idx); + recoil_pz_expr += sep + "PzP" + std::to_string(idx); + recoil_e_expr += sep + "EnP" + std::to_string(idx); + } + + // ---- DATA histogram via RDataFrame ---- + try { + ROOT::RDataFrame df_data(m_data_tree_name, m_data_files); + + // Define recoil components and t (Mandelstam t = (target - recoil)^2) + auto df = df_data.Define("recoil_px", recoil_px_expr) + .Define("recoil_py", recoil_py_expr) + .Define("recoil_pz", recoil_pz_expr) + .Define("recoil_E", recoil_e_expr) + .Define("t_value", + [m_proton](double rE, double rpx, double rpy, double rpz) { + double dE = m_proton - rE; + double p2 = rpx * rpx + rpy * rpy + rpz * rpz; + double t = dE * dE - p2; + return std::fabs(t); + }, + {"recoil_E", "recoil_px", "recoil_py", "recoil_pz"}); + + // Determine min and max from data (unweighted) + auto data_min_r = df.Min("t_value"); + auto data_max_r = df.Max("t_value"); + double data_min = *data_min_r; + double data_max = *data_max_r; + + double global_min = data_min; + double global_max = data_max; + + // Build data histogram (if background exists, don't apply weights to data here) + const int n_bins = 200; + ROOT::RDF::RResultPtr h_data; + if (m_background_files_exist) { + h_data = df.Histo1D({"h_t_data", "t (data)", n_bins, global_min, global_max}, "t_value"); + } + else { + // if no background files, use weight branch from data (if provided) + if (!weight_branch_name.empty()) { + // if weight branch references a friend (contains a '.'), unclear if RDataFrame can handle that, so skip weights in that case with a warning + // TODO: test the friend tree case + if (weight_branch_name.find('.') != std::string::npos) { + report(WARNING, kModule) << "Weight branch appears to reference a friend tree (" << weight_branch_name << ") - skipping weights for RDataFrame histogram\n"; + h_data = df.Histo1D({"h_t_data", "t (data)", n_bins, global_min, global_max}, "t_value"); + } + else { + h_data = df.Histo1D({"h_t_data", "t (data)", n_bins, global_min, global_max}, "t_value", weight_branch_name); + } + } + else { + h_data = df.Histo1D({"h_t_data", "t (data)", n_bins, global_min, global_max}, "t_value"); + } + } - report(DEBUG, kModule) << "Beam energy statistics:\n" - << " Min: " << min << "\n" - << " Max: " << max << "\n" - << " Center: " << center_energy << "\n" - << " Mean: " << mean_energy << "\n" - << " RMS: " << rms_energy << "\n"; + // ---- BACKGROUND histogram via RDataFrame (if present) ---- + if (m_background_files_exist && !m_background_files.empty()) { + ROOT::RDF::RResultPtr h_bkg; + ROOT::RDataFrame df_bg(m_background_tree_name, m_background_files); + auto dfb = df_bg.Define("recoil_px", recoil_px_expr) + .Define("recoil_py", recoil_py_expr) + .Define("recoil_pz", recoil_pz_expr) + .Define("recoil_E", recoil_e_expr) + .Define("t_value", + [m_proton](double rE, double rpx, double rpy, double rpz) { + double dE = m_proton - rE; + double p2 = rpx * rpx + rpy * rpy + rpz * rpz; + double t = dE * dE - p2; + return std::fabs(t); + }, + {"recoil_E", "recoil_px", "recoil_py", "recoil_pz"}); + + // Expand global range to include background extremes + auto bg_min_r = dfb.Min("t_value"); + auto bg_max_r = dfb.Max("t_value"); + double bg_min = *bg_min_r; + double bg_max = *bg_max_r; + global_min = std::min(global_min, bg_min); + global_max = std::max(global_max, bg_max); + + // Recreate histograms with unified ranges + h_data = df.Histo1D({"h_t_data", "t (data)", n_bins, global_min, global_max}, "t_value"); + + // background histogram should use weight branch (weight_branch_name was chosen from background earlier) + if (!weight_branch_name.empty() && weight_branch_name.find('.') == std::string::npos) { + h_bkg = dfb.Histo1D({"h_t_total", "t (bg)", n_bins, global_min, global_max}, "t_value", weight_branch_name); + } + else { + if (weight_branch_name.find('.') != std::string::npos) + report(WARNING, kModule) << "Background weight branch references friend tree; skipping weights for bg RDataFrame histogram\n"; + h_bkg = dfb.Histo1D({"h_t_total", "t (bg)", n_bins, global_min, global_max}, "t_value"); + } + + // Subtract background from a standalone clone so RResultPtr ownership stays intact. + TH1D *h_result = (TH1D *)h_data->Clone("h_t_result"); + h_result->SetDirectory(nullptr); + h_result->Add(h_bkg.GetPtr(), -1.0); + + double mean_t = h_result->GetMean(); + double rms_t = h_result->GetRMS(); + double center_t = 0.5 * (global_min + global_max); + + m_values["t_low"] = global_min; + m_values["t_high"] = global_max; + m_values["t_mid"] = center_t; + m_values["t_avg"] = mean_t; + m_values["t_rms"] = rms_t; + + report(DEBUG, kModule) << "-t statistics (data - background):" << "\n"; + report(DEBUG, kModule) << " Min: " << global_min << "\n"; + report(DEBUG, kModule) << " Max: " << global_max << "\n"; + report(DEBUG, kModule) << " Center: " << center_t << "\n"; + report(DEBUG, kModule) << " Mean: " << mean_t << "\n"; + report(DEBUG, kModule) << " RMS: " << rms_t << "\n"; + + delete h_result; + } + else { + // No background files: final histogram is h_data + double mean_t = h_data->GetMean(); + double rms_t = h_data->GetRMS(); + double center_t = 0.5 * (global_min + global_max); + + m_values["t_low"] = global_min; + m_values["t_high"] = global_max; + m_values["t_mid"] = center_t; + m_values["t_avg"] = mean_t; + m_values["t_rms"] = rms_t; + + report(DEBUG, kModule) << "-t statistics (data):\n"; + report(DEBUG, kModule) << " Min: " << global_min << "\n"; + report(DEBUG, kModule) << " Max: " << global_max << "\n"; + report(DEBUG, kModule) << " Center: " << center_t << "\n"; + report(DEBUG, kModule) << " Mean: " << mean_t << "\n"; + report(DEBUG, kModule) << " RMS: " << rms_t << "\n"; + } + } + catch (const std::exception &e) { + report(ERROR, kModule) << "RDataFrame error computing -t: " << e.what() << "\n"; + assert(false); + } } + void RootDataConverter::validateFiles(const std::vector &files, const std::string &file_type) const { diff --git a/src/libraries/UTILITIES/RootDataConverter.h b/src/libraries/UTILITIES/RootDataConverter.h index 9e0528c24..c74294ed3 100644 --- a/src/libraries/UTILITIES/RootDataConverter.h +++ b/src/libraries/UTILITIES/RootDataConverter.h @@ -324,7 +324,7 @@ class RootDataConverter std::vector treesInFile(const std::string &filename) const; /** - * @brief Extract beam energy statistics from the data tree + * @brief Extract beam energy statistics from the trees * * Calculates min, max, mean, and RMS values for the "EnPB" branch from the data * file. Stores results in m_values with keys: "e_low", "e_high", "e_center", @@ -334,6 +334,23 @@ class RootDataConverter */ void extractBeamEnergyStats(const std::string &weight_branch_name); + /** + * @brief Extract -t 4-momentum transfer statistics from the trees + * + * Calculates min, max, mean, and RMS values from the momentum transfer between + * the at-rest proton and the lower-vertex recoil system. This is calculated as + * t = (P_proton - P_recoil)^2, where P_recoil is the sum of the 4-vectors of the + * lower vertex particles (see setLowerVertexIndices) + * Stores results in m_values with keys: "t_low", "t_high", "t_center", "t_avg", and + * "t_rms" + * + * @param weight_branch_name Name of the weight branch (if empty, weights assumed to be 1.0) + * + * @todo The function currently has not been tested for the FSRootFriendTree scenario, + * and so warns the user and assigns a weight of 1.0 for this case. + */ + void extractFourMomentumTransferStats(const std::string &weight_branch_name); + /** * @brief Get the max/min values of a branch for a set of files * From 52b6b979a66cba6f980abc342c2816d3e9df5a26 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Tue, 5 May 2026 23:54:39 -0400 Subject: [PATCH 21/38] made function for determining upper vertex indices --- src/libraries/UTILITIES/RootDataConverter.cc | 30 +++++++++++++++----- src/libraries/UTILITIES/RootDataConverter.h | 25 ++++++++++++++-- 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/src/libraries/UTILITIES/RootDataConverter.cc b/src/libraries/UTILITIES/RootDataConverter.cc index bd7ed25ad..edbbf1e4c 100644 --- a/src/libraries/UTILITIES/RootDataConverter.cc +++ b/src/libraries/UTILITIES/RootDataConverter.cc @@ -48,6 +48,7 @@ RootDataConverter::RootDataConverter(const std::string &filename, bool mute_warn } setLowerVertexIndices({1}); // default to index 1 being the lower vertex particle + setUpperVertexIndices(); // print debug info detailing which particles are labeled as upper or lower vertex // note that index 0 is always the beam photon @@ -55,9 +56,9 @@ RootDataConverter::RootDataConverter(const std::string &filename, bool mute_warn report(DEBUG, kModule) << "The following particles have been labeled as lower vertex particles by default (index 1):\n"; report(DEBUG, kModule) << "i = 1 : " << particle_list[1] << "\n"; report(DEBUG, kModule) << "The following particles are thus labeled as upper vertex particles by default (indices 2 to N):\n"; - for (size_t i = 2; i < particle_list.size(); i++) + for (const auto &idx : m_upper_vertex_indices) { - report(DEBUG, kModule) << "i = " << i << " : " << particle_list[i] << "\n"; + report(DEBUG, kModule) << "i = " << idx << " : " << particle_list[idx] << "\n"; } m_background_files_exist = !m_background_files.empty(); @@ -78,7 +79,7 @@ RootDataConverter::RootDataConverter(const std::string &filename, bool mute_warn // TODO: this block should be used for the constructor that specifies custom lv indices // for (size_t i = 0; i < particle_list.size(); i++) // { -// if (std::find(getLowerVertexIndices().begin(), getLowerVertexIndices().end(), i) != getLowerVertexIndices().end()) +// if (std::find(m_lower_vertex_indices.begin(), m_lower_vertex_indices.end(), i) != m_lower_vertex_indices.end()) // report(DEBUG, kModule) << "i = " << i << " : " << particle_list[i] << "\n"; // } @@ -105,11 +106,11 @@ void RootDataConverter::extract() // vertex system. Background subtraction and proper weighting are all included. extractBeamEnergyStats(weight_branch_name); extractFourMomentumTransferStats(weight_branch_name); + // extractUpperVertexMassStats(weight_branch_name); - // TODO: next step is to calculate t from the at-rest proton and lower vertex particle - // 4-vectors. Look at other plotters for reference. Then calculate the upper - // vertex mass from the remaining particles. Finally, get number of events, and - // calculate efficiency from MC. Then use efficiency to get ac_events. + // TODO: next step is to calculate the upper vertex mass from the remaining + // particles. Finally, get number of events, and calculate efficiency from MC. Then + // use efficiency to get ac_events. } std::vector RootDataConverter::findFiles(const std::string &file_type) const @@ -732,4 +733,19 @@ std::pair RootDataConverter::findMinMaxOfBranch(const std::vecto } return {global_min, global_max}; +} + +void RootDataConverter::setUpperVertexIndices() +{ + m_upper_vertex_indices.clear(); + const std::vector particle_list = m_cfg_info->reaction(m_fit_results.reactionList()[0])->particleList(); + + // Note that we always skip particle i=0, as this will be the beam photon + for (size_t i = 1; i < particle_list.size(); i++) + { + if (std::find(m_lower_vertex_indices.begin(), m_lower_vertex_indices.end(), i) == m_lower_vertex_indices.end()) + { + m_upper_vertex_indices.push_back(i); + } + } } \ No newline at end of file diff --git a/src/libraries/UTILITIES/RootDataConverter.h b/src/libraries/UTILITIES/RootDataConverter.h index c74294ed3..8e299fc1a 100644 --- a/src/libraries/UTILITIES/RootDataConverter.h +++ b/src/libraries/UTILITIES/RootDataConverter.h @@ -200,7 +200,7 @@ class RootDataConverter const std::string &tree_name) const; /** - * @brief Set 4-momenta indices for lower vertex particles + * @brief Set indices that belong to the lower vertex final state particles * * An AmpTools ROOT tree contains 4-vectors for all particles in the event, labelled * "EnX", "PxX", "PyX", "PzX" where X is the particle index. The "B" index is always @@ -217,12 +217,32 @@ class RootDataConverter void setLowerVertexIndices(const std::vector &indices) { m_lower_vertex_indices = indices; } /** - * @brief Get 4-momenta indices for lower vertex particles + * @brief Get indices for the lower vertex particles * * See setLowerVertexIndices for more details. */ std::vector getLowerVertexIndices() const { return m_lower_vertex_indices; } + /** + * @brief Set indices that belong to the upper vertex final state particles + * + * Any particle indices not in m_lower_vertex_indices that are also not 0 are thus + * assumed to belong to the upper vertex. This saves all leftover indices to + * m_upper_vertex_indices. This is important for correctly calculating the upper + * vertex mass. + * + * @note This function must be called AFTER setLowerVertexIndices + */ + void setUpperVertexIndices(); + + /** + * @brief Get indices for the upper vertex particles + * + * See setUpperVertexIndices for more details. + */ + std::vector getUpperVertexIndices() const { return m_upper_vertex_indices; } + + /** * @brief Map of header names to their values */ @@ -237,6 +257,7 @@ class RootDataConverter const FitResults m_fit_results; ///< AmpTools FitResults object const ConfigurationInfo *m_cfg_info; ///< ConfigurationInfo from the fit std::vector m_lower_vertex_indices; ///< Indices of lower vertex particles in the data 4-vectors + std::vector m_upper_vertex_indices; ///< Indices of upper vertex particles in the data 4-vectors std::vector m_isobar_indices; ///< Indices of isobar particles in the data 4-vectors bool m_mute_warnings; ///< Whether to mute warnings about missing background files From 17a25578d67e52ff81add864295d9667ed09da7c Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Wed, 6 May 2026 17:30:48 -0400 Subject: [PATCH 22/38] Added a function for calculating upper vertex mass --- src/libraries/UTILITIES/RootDataConverter.cc | 158 ++++++++++++++++++- src/libraries/UTILITIES/RootDataConverter.h | 15 ++ 2 files changed, 166 insertions(+), 7 deletions(-) diff --git a/src/libraries/UTILITIES/RootDataConverter.cc b/src/libraries/UTILITIES/RootDataConverter.cc index edbbf1e4c..767536c12 100644 --- a/src/libraries/UTILITIES/RootDataConverter.cc +++ b/src/libraries/UTILITIES/RootDataConverter.cc @@ -106,11 +106,7 @@ void RootDataConverter::extract() // vertex system. Background subtraction and proper weighting are all included. extractBeamEnergyStats(weight_branch_name); extractFourMomentumTransferStats(weight_branch_name); - // extractUpperVertexMassStats(weight_branch_name); - - // TODO: next step is to calculate the upper vertex mass from the remaining - // particles. Finally, get number of events, and calculate efficiency from MC. Then - // use efficiency to get ac_events. + extractUpperVertexMassStats(weight_branch_name); } std::vector RootDataConverter::findFiles(const std::string &file_type) const @@ -616,12 +612,12 @@ void RootDataConverter::extractFourMomentumTransferStats(const std::string &weig // background histogram should use weight branch (weight_branch_name was chosen from background earlier) if (!weight_branch_name.empty() && weight_branch_name.find('.') == std::string::npos) { - h_bkg = dfb.Histo1D({"h_t_total", "t (bg)", n_bins, global_min, global_max}, "t_value", weight_branch_name); + h_bkg = dfb.Histo1D({"h_t_bkg", "t (bg)", n_bins, global_min, global_max}, "t_value", weight_branch_name); } else { if (weight_branch_name.find('.') != std::string::npos) report(WARNING, kModule) << "Background weight branch references friend tree; skipping weights for bg RDataFrame histogram\n"; - h_bkg = dfb.Histo1D({"h_t_total", "t (bg)", n_bins, global_min, global_max}, "t_value"); + h_bkg = dfb.Histo1D({"h_t_bkg", "t (bg)", n_bins, global_min, global_max}, "t_value"); } // Subtract background from a standalone clone so RResultPtr ownership stays intact. @@ -674,6 +670,154 @@ void RootDataConverter::extractFourMomentumTransferStats(const std::string &weig } } +void RootDataConverter::extractUpperVertexMassStats(const std::string &weight_branch_name) +{ + if (m_upper_vertex_indices.empty()) { + report(ERROR, kModule) << "No upper vertex indices set; cannot compute mass\n"; + assert(false); + } + + // Prepare branch expression strings like "PxP2 + PxP3 + ..." + std::string upper_px_expr; + std::string upper_py_expr; + std::string upper_pz_expr; + std::string upper_e_expr; + for (size_t i = 0; i < m_upper_vertex_indices.size(); ++i) { + int idx = m_upper_vertex_indices[i]; + std::string sep = (i == 0) ? "" : " + "; + upper_px_expr += sep + "PxP" + std::to_string(idx); + upper_py_expr += sep + "PyP" + std::to_string(idx); + upper_pz_expr += sep + "PzP" + std::to_string(idx); + upper_e_expr += sep + "EnP" + std::to_string(idx); + } + + try { + // ---- DATA histogram via RDataFrame ---- + ROOT::RDataFrame df_data(m_data_tree_name, m_data_files); + + auto df = df_data.Define("upper_px", upper_px_expr) + .Define("upper_py", upper_py_expr) + .Define("upper_pz", upper_pz_expr) + .Define("upper_E", upper_e_expr) + .Define("m_value", + [](double E, double px, double py, double pz) { + double p2 = px * px + py * py + pz * pz; + double m2 = E * E - p2; + if (m2 < 0 && m2 > -1e-12) m2 = 0.0; + return std::sqrt(std::max(0.0, m2)); + }, + {"upper_E", "upper_px", "upper_py", "upper_pz"}); + + // Determine min and max from data (unweighted) + auto data_min_r = df.Min("m_value"); + auto data_max_r = df.Max("m_value"); + double data_min = *data_min_r; + double data_max = *data_max_r; + + double global_min = data_min; + double global_max = data_max; + + // Build data histogram (if background exists, don't apply weights to data here) + const int n_bins = 200; + ROOT::RDF::RResultPtr h_data; + if (m_background_files_exist) { + h_data = df.Histo1D({"h_m_data", "M (data)", n_bins, global_min, global_max}, "m_value"); + } + else { + // if no background files, use weight branch from data (if provided) + if (!weight_branch_name.empty() && weight_branch_name.find('.') == std::string::npos) { + h_data = df.Histo1D({"h_m_data", "M (data)", n_bins, global_min, global_max}, "m_value", weight_branch_name); + } + else { + h_data = df.Histo1D({"h_m_data", "M (data)", n_bins, global_min, global_max}, "m_value"); + } + } + + // ---- BACKGROUND histogram via RDataFrame (if present) ---- + if (m_background_files_exist && !m_background_files.empty()) { + ROOT::RDF::RResultPtr h_bkg; + ROOT::RDataFrame df_bg(m_background_tree_name, m_background_files); + auto dfb = df_bg.Define("upper_px", upper_px_expr) + .Define("upper_py", upper_py_expr) + .Define("upper_pz", upper_pz_expr) + .Define("upper_E", upper_e_expr) + .Define("m_value", + [](double E, double px, double py, double pz) { + double p2 = px * px + py * py + pz * pz; + double m2 = E * E - p2; + if (m2 < 0 && m2 > -1e-12) m2 = 0.0; + return std::sqrt(std::max(0.0, m2)); + }, + {"upper_E", "upper_px", "upper_py", "upper_pz"}); + + // Expand global range to include background extremes + auto bg_min_r = dfb.Min("m_value"); + auto bg_max_r = dfb.Max("m_value"); + double bg_min = *bg_min_r; + double bg_max = *bg_max_r; + global_min = std::min(global_min, bg_min); + global_max = std::max(global_max, bg_max); + + // Recreate histograms with unified ranges + h_data = df.Histo1D({"h_m_data", "M (data)", n_bins, global_min, global_max}, "m_value"); + + if (!weight_branch_name.empty() && weight_branch_name.find('.') == std::string::npos) { + h_bkg = dfb.Histo1D({"h_m_bkg", "M (bg)", n_bins, global_min, global_max}, "m_value", weight_branch_name); + } + else { + h_bkg = dfb.Histo1D({"h_m_bkg", "M (bg)", n_bins, global_min, global_max}, "m_value"); + } + + // Subtract background from a standalone clone so RResultPtr ownership stays intact. + TH1D *h_result = (TH1D *)h_data->Clone("h_m_result"); + h_result->SetDirectory(nullptr); + h_result->Add(h_bkg.GetPtr(), -1.0); + + double mean_m = h_result->GetMean(); + double rms_m = h_result->GetRMS(); + double center_m = 0.5 * (global_min + global_max); + + m_values["m_low"] = global_min; + m_values["m_high"] = global_max; + m_values["m_mid"] = center_m; + m_values["m_avg"] = mean_m; + m_values["m_rms"] = rms_m; + + report(DEBUG, kModule) << "M statistics (data - background):" << "\n"; + report(DEBUG, kModule) << " Min: " << global_min << "\n"; + report(DEBUG, kModule) << " Max: " << global_max << "\n"; + report(DEBUG, kModule) << " Center: " << center_m << "\n"; + report(DEBUG, kModule) << " Mean: " << mean_m << "\n"; + report(DEBUG, kModule) << " RMS: " << rms_m << "\n"; + + delete h_result; + } + else { + // No background files: final histogram is h_data + double mean_m = h_data->GetMean(); + double rms_m = h_data->GetRMS(); + double center_m = 0.5 * (global_min + global_max); + + m_values["m_low"] = global_min; + m_values["m_high"] = global_max; + m_values["m_mid"] = center_m; + m_values["m_avg"] = mean_m; + m_values["m_rms"] = rms_m; + + report(DEBUG, kModule) << "M statistics (data):\n"; + report(DEBUG, kModule) << " Min: " << global_min << "\n"; + report(DEBUG, kModule) << " Max: " << global_max << "\n"; + report(DEBUG, kModule) << " Center: " << center_m << "\n"; + report(DEBUG, kModule) << " Mean: " << mean_m << "\n"; + report(DEBUG, kModule) << " RMS: " << rms_m << "\n"; + } + } + catch (const std::exception &e) { + report(ERROR, kModule) << "RDataFrame error computing mass: " << e.what() << "\n"; + assert(false); + } +} + void RootDataConverter::validateFiles(const std::vector &files, const std::string &file_type) const diff --git a/src/libraries/UTILITIES/RootDataConverter.h b/src/libraries/UTILITIES/RootDataConverter.h index 8e299fc1a..15e93c696 100644 --- a/src/libraries/UTILITIES/RootDataConverter.h +++ b/src/libraries/UTILITIES/RootDataConverter.h @@ -372,6 +372,21 @@ class RootDataConverter */ void extractFourMomentumTransferStats(const std::string &weight_branch_name); + /** + * @brief Extract mass statistics for the upper vertex system from the trees + * + * Calculates min, max, mean, and RMS values from the total upper-vertex mass + * system, which is calculated as the invariant mass of the sum of the 4-vectors of + * the upper vertex particles (see setUpperVertexIndices). Stores the results in + * m_values with keys: "m_low", "m_high", "m_center", "m_avg", and "m_rms" + * + * @param weight_branch_name Name of the weight branch (if empty, weights assumed to be 1.0) + * + * @todo The function currently has not been tested for the FSRootFriendTree scenario, + * and so warns the user and assigns a weight of 1.0 for this case. + */ + void extractUpperVertexMassStats(const std::string &weight_branch_name); + /** * @brief Get the max/min values of a branch for a set of files * From fc3472b96635f338dd16fe8eba2ecfc77b4a18e9 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Wed, 6 May 2026 18:06:13 -0400 Subject: [PATCH 23/38] Lower vertex indices have been fully implemented Removed the mass-branch arg, as the mass can be calculated from the labeled 4-vectors. The indices can now be set by the user. Aside from this, the files have been formatted. --- src/libraries/UTILITIES/RootDataConverter.cc | 257 ++++++++++-------- src/libraries/UTILITIES/RootDataConverter.h | 71 +++-- .../convert_to_csv/convert_to_csv.cc | 41 ++- 3 files changed, 202 insertions(+), 167 deletions(-) diff --git a/src/libraries/UTILITIES/RootDataConverter.cc b/src/libraries/UTILITIES/RootDataConverter.cc index 767536c12..58a328b6b 100644 --- a/src/libraries/UTILITIES/RootDataConverter.cc +++ b/src/libraries/UTILITIES/RootDataConverter.cc @@ -25,20 +25,23 @@ const char *RootDataConverter::kModule = "RootDataConverter"; -RootDataConverter::RootDataConverter(const std::string &filename, bool mute_warnings) : m_fit_file(filename), - m_fit_results(filename, mute_warnings), - m_cfg_info(m_fit_results.configInfo()), - m_mute_warnings(mute_warnings), - m_data_files(dataFiles()), - m_background_files(backgroundFiles()), - m_genMC_files(genMCFiles()), - m_accMC_files(accMCFiles()), - m_data_tree_name(findTreeName("data")), - m_background_tree_name(findTreeName("background")), - m_genMC_tree_name(findTreeName("genMC")), - m_accMC_tree_name(findTreeName("accMC")) +RootDataConverter::RootDataConverter(const std::string &filename, bool mute_warnings) : RootDataConverter(filename, {1}, mute_warnings) {} + +RootDataConverter::RootDataConverter(const std::string &filename, + const std::vector &lower_vertex_indices, + bool mute_warnings) : m_fit_file(filename), + m_fit_results(filename, mute_warnings), + m_cfg_info(m_fit_results.configInfo()), + m_mute_warnings(mute_warnings), + m_data_files(dataFiles()), + m_background_files(backgroundFiles()), + m_genMC_files(genMCFiles()), + m_accMC_files(accMCFiles()), + m_data_tree_name(findTreeName("data")), + m_background_tree_name(findTreeName("background")), + m_genMC_tree_name(findTreeName("genMC")), + m_accMC_tree_name(findTreeName("accMC")) { - report(DEBUG, kModule) << "Constructing RootDataConverter for file: " << m_fit_file << "\n"; if (!m_fit_results.valid()) @@ -47,15 +50,18 @@ RootDataConverter::RootDataConverter(const std::string &filename, bool mute_warn assert(false); } - setLowerVertexIndices({1}); // default to index 1 being the lower vertex particle + setLowerVertexIndices(lower_vertex_indices); setUpperVertexIndices(); - // print debug info detailing which particles are labeled as upper or lower vertex + // print debug info detailing which particles are labeled as upper or lower vertex // note that index 0 is always the beam photon const std::vector particle_list = m_cfg_info->reaction(m_fit_results.reactionList()[0])->particleList(); - report(DEBUG, kModule) << "The following particles have been labeled as lower vertex particles by default (index 1):\n"; - report(DEBUG, kModule) << "i = 1 : " << particle_list[1] << "\n"; - report(DEBUG, kModule) << "The following particles are thus labeled as upper vertex particles by default (indices 2 to N):\n"; + report(DEBUG, kModule) << "The following particles have been labeled as lower vertex particles:\n"; + for (const auto &idx : m_lower_vertex_indices) + { + report(DEBUG, kModule) << "i = " << idx << " : " << particle_list[idx] << "\n"; + } + report(DEBUG, kModule) << "The following particles are thus labeled as upper vertex particles:\n"; for (const auto &idx : m_upper_vertex_indices) { report(DEBUG, kModule) << "i = " << idx << " : " << particle_list[idx] << "\n"; @@ -76,20 +82,13 @@ RootDataConverter::RootDataConverter(const std::string &filename, bool mute_warn extract(); } -// TODO: this block should be used for the constructor that specifies custom lv indices -// for (size_t i = 0; i < particle_list.size(); i++) -// { -// if (std::find(m_lower_vertex_indices.begin(), m_lower_vertex_indices.end(), i) != m_lower_vertex_indices.end()) -// report(DEBUG, kModule) << "i = " << i << " : " << particle_list[i] << "\n"; -// } - void RootDataConverter::extract() { // get weight branch name. If weight branch is not found, will return empty string // and we'll assume weights of 1.0 throughout our stats functions std::string weight_branch_name; if (m_background_files_exist) - weight_branch_name = weightBranchName("background", m_background_tree_name); + weight_branch_name = weightBranchName("background", m_background_tree_name); else weight_branch_name = weightBranchName("data", m_data_tree_name); @@ -102,7 +101,7 @@ void RootDataConverter::extract() report(DEBUG, kModule) << "Weight branch found: " << weight_branch_name << "\n"; } - // Extract min, max, mean, and RMS of the beam energy, -t, and mass of the upper + // Extract min, max, mean, and RMS of the beam energy, -t, and mass of the upper // vertex system. Background subtraction and proper weighting are all included. extractBeamEnergyStats(weight_branch_name); extractFourMomentumTransferStats(weight_branch_name); @@ -272,7 +271,7 @@ std::string RootDataConverter::weightBranchName( // FSRoot-based data readers can optionally pass a friend weight branch name as an // argument, so check explicitly for that first const std::vector reaction_list = m_fit_results.reactionList(); - std::string reaction = reaction_list[0]; // assume all reactions use the same weight branch + std::string reaction = reaction_list[0]; // assume all reactions use the same weight branch std::pair> file_pair; // data reader name and args std::string root_file; if (file_type == "data") @@ -359,7 +358,6 @@ std::string RootDataConverter::weightBranchName( } } - void RootDataConverter::extractBeamEnergyStats(const std::string &weight_branch_name) { // we will add all the data (and possible) weighted background energy histograms @@ -369,7 +367,7 @@ void RootDataConverter::extractBeamEnergyStats(const std::string &weight_branch_ double min, max; std::tie(min, max) = findMinMaxOfBranch(m_data_files, m_data_tree_name, "EnPB"); - if(m_background_files_exist) + if (m_background_files_exist) { double bg_min, bg_max; std::tie(bg_min, bg_max) = findMinMaxOfBranch(m_background_files, m_background_tree_name, "EnPB"); @@ -401,8 +399,8 @@ void RootDataConverter::extractBeamEnergyStats(const std::string &weight_branch_ tree->SetBranchAddress("EnPB", &energy); // draw into temporary histogram - const int n_bins = 200; - TH1D *h_energy = new TH1D("h_energy", "", n_bins, min, max); + const int n_bins = 200; + TH1D *h_energy = new TH1D("h_energy", "", n_bins, min, max); // If no background files exist, we assume weights are stored in the data tree if (!m_background_files_exist && !weight_branch_name.empty() && tree->GetBranch(weight_branch_name.c_str())) @@ -425,7 +423,7 @@ void RootDataConverter::extractBeamEnergyStats(const std::string &weight_branch_ << data_file << "\n" << "Assuming weights of 1.0 for all events.\n"; - } + } // if this is first hist, clone it to the total hist. Otherwise, add to the total hist if (!h_energy_total) @@ -444,7 +442,7 @@ void RootDataConverter::extractBeamEnergyStats(const std::string &weight_branch_ // ==== BACKGROUND ENERGY HISTOGRAMS ==== // if background files exist, we'll subtract the weighted background energy - // histograms from the total energy histogram, which we will use for our stats. + // histograms from the total energy histogram, which we will use for our stats. for (const std::string &bg_file : m_background_files) { TFile *f = TFile::Open(bg_file.c_str()); @@ -476,14 +474,14 @@ void RootDataConverter::extractBeamEnergyStats(const std::string &weight_branch_ // draw into temporary histogram const int n_bins = 200; TH1D *h_energy = new TH1D("h_energy", "", n_bins, min, max); - tree->Draw("EnPB>>h_energy", weight_branch_name.c_str(), "goff"); + tree->Draw("EnPB>>h_energy", weight_branch_name.c_str(), "goff"); // subtract background histogram from total histogram if (h_energy_total) h_energy_total->Add(h_energy, -1.0); delete h_energy; - f->Close(); + f->Close(); } const double center_energy = 0.5 * (min + max); @@ -512,7 +510,8 @@ void RootDataConverter::extractFourMomentumTransferStats(const std::string &weig // target proton mass (GeV) const double m_proton = 0.938; - if (m_lower_vertex_indices.empty()) { + if (m_lower_vertex_indices.empty()) + { report(ERROR, kModule) << "No lower vertex indices set; cannot compute -t\n"; assert(false); } @@ -522,32 +521,35 @@ void RootDataConverter::extractFourMomentumTransferStats(const std::string &weig std::string recoil_py_expr; std::string recoil_pz_expr; std::string recoil_e_expr; - for (size_t i = 0; i < m_lower_vertex_indices.size(); ++i) { + for (size_t i = 0; i < m_lower_vertex_indices.size(); ++i) + { int idx = m_lower_vertex_indices[i]; std::string sep = (i == 0) ? "" : " + "; recoil_px_expr += sep + "PxP" + std::to_string(idx); recoil_py_expr += sep + "PyP" + std::to_string(idx); recoil_pz_expr += sep + "PzP" + std::to_string(idx); - recoil_e_expr += sep + "EnP" + std::to_string(idx); + recoil_e_expr += sep + "EnP" + std::to_string(idx); } // ---- DATA histogram via RDataFrame ---- - try { + try + { ROOT::RDataFrame df_data(m_data_tree_name, m_data_files); // Define recoil components and t (Mandelstam t = (target - recoil)^2) auto df = df_data.Define("recoil_px", recoil_px_expr) - .Define("recoil_py", recoil_py_expr) - .Define("recoil_pz", recoil_pz_expr) - .Define("recoil_E", recoil_e_expr) - .Define("t_value", - [m_proton](double rE, double rpx, double rpy, double rpz) { - double dE = m_proton - rE; - double p2 = rpx * rpx + rpy * rpy + rpz * rpz; - double t = dE * dE - p2; - return std::fabs(t); - }, - {"recoil_E", "recoil_px", "recoil_py", "recoil_pz"}); + .Define("recoil_py", recoil_py_expr) + .Define("recoil_pz", recoil_pz_expr) + .Define("recoil_E", recoil_e_expr) + .Define("t_value", + [m_proton](double rE, double rpx, double rpy, double rpz) + { + double dE = m_proton - rE; + double p2 = rpx * rpx + rpy * rpy + rpz * rpz; + double t = dE * dE - p2; + return std::fabs(t); + }, + {"recoil_E", "recoil_px", "recoil_py", "recoil_pz"}); // Determine min and max from data (unweighted) auto data_min_r = df.Min("t_value"); @@ -561,43 +563,51 @@ void RootDataConverter::extractFourMomentumTransferStats(const std::string &weig // Build data histogram (if background exists, don't apply weights to data here) const int n_bins = 200; ROOT::RDF::RResultPtr h_data; - if (m_background_files_exist) { + if (m_background_files_exist) + { h_data = df.Histo1D({"h_t_data", "t (data)", n_bins, global_min, global_max}, "t_value"); } - else { + else + { // if no background files, use weight branch from data (if provided) - if (!weight_branch_name.empty()) { + if (!weight_branch_name.empty()) + { // if weight branch references a friend (contains a '.'), unclear if RDataFrame can handle that, so skip weights in that case with a warning // TODO: test the friend tree case - if (weight_branch_name.find('.') != std::string::npos) { + if (weight_branch_name.find('.') != std::string::npos) + { report(WARNING, kModule) << "Weight branch appears to reference a friend tree (" << weight_branch_name << ") - skipping weights for RDataFrame histogram\n"; h_data = df.Histo1D({"h_t_data", "t (data)", n_bins, global_min, global_max}, "t_value"); } - else { + else + { h_data = df.Histo1D({"h_t_data", "t (data)", n_bins, global_min, global_max}, "t_value", weight_branch_name); } } - else { + else + { h_data = df.Histo1D({"h_t_data", "t (data)", n_bins, global_min, global_max}, "t_value"); } } - // ---- BACKGROUND histogram via RDataFrame (if present) ---- - if (m_background_files_exist && !m_background_files.empty()) { + // ---- BACKGROUND histogram via RDataFrame (if present) ---- + if (m_background_files_exist && !m_background_files.empty()) + { ROOT::RDF::RResultPtr h_bkg; ROOT::RDataFrame df_bg(m_background_tree_name, m_background_files); auto dfb = df_bg.Define("recoil_px", recoil_px_expr) - .Define("recoil_py", recoil_py_expr) - .Define("recoil_pz", recoil_pz_expr) - .Define("recoil_E", recoil_e_expr) - .Define("t_value", - [m_proton](double rE, double rpx, double rpy, double rpz) { - double dE = m_proton - rE; - double p2 = rpx * rpx + rpy * rpy + rpz * rpz; - double t = dE * dE - p2; - return std::fabs(t); - }, - {"recoil_E", "recoil_px", "recoil_py", "recoil_pz"}); + .Define("recoil_py", recoil_py_expr) + .Define("recoil_pz", recoil_pz_expr) + .Define("recoil_E", recoil_e_expr) + .Define("t_value", + [m_proton](double rE, double rpx, double rpy, double rpz) + { + double dE = m_proton - rE; + double p2 = rpx * rpx + rpy * rpy + rpz * rpz; + double t = dE * dE - p2; + return std::fabs(t); + }, + {"recoil_E", "recoil_px", "recoil_py", "recoil_pz"}); // Expand global range to include background extremes auto bg_min_r = dfb.Min("t_value"); @@ -611,10 +621,12 @@ void RootDataConverter::extractFourMomentumTransferStats(const std::string &weig h_data = df.Histo1D({"h_t_data", "t (data)", n_bins, global_min, global_max}, "t_value"); // background histogram should use weight branch (weight_branch_name was chosen from background earlier) - if (!weight_branch_name.empty() && weight_branch_name.find('.') == std::string::npos) { + if (!weight_branch_name.empty() && weight_branch_name.find('.') == std::string::npos) + { h_bkg = dfb.Histo1D({"h_t_bkg", "t (bg)", n_bins, global_min, global_max}, "t_value", weight_branch_name); } - else { + else + { if (weight_branch_name.find('.') != std::string::npos) report(WARNING, kModule) << "Background weight branch references friend tree; skipping weights for bg RDataFrame histogram\n"; h_bkg = dfb.Histo1D({"h_t_bkg", "t (bg)", n_bins, global_min, global_max}, "t_value"); @@ -644,7 +656,8 @@ void RootDataConverter::extractFourMomentumTransferStats(const std::string &weig delete h_result; } - else { + else + { // No background files: final histogram is h_data double mean_t = h_data->GetMean(); double rms_t = h_data->GetRMS(); @@ -664,7 +677,8 @@ void RootDataConverter::extractFourMomentumTransferStats(const std::string &weig report(DEBUG, kModule) << " RMS: " << rms_t << "\n"; } } - catch (const std::exception &e) { + catch (const std::exception &e) + { report(ERROR, kModule) << "RDataFrame error computing -t: " << e.what() << "\n"; assert(false); } @@ -672,7 +686,8 @@ void RootDataConverter::extractFourMomentumTransferStats(const std::string &weig void RootDataConverter::extractUpperVertexMassStats(const std::string &weight_branch_name) { - if (m_upper_vertex_indices.empty()) { + if (m_upper_vertex_indices.empty()) + { report(ERROR, kModule) << "No upper vertex indices set; cannot compute mass\n"; assert(false); } @@ -682,31 +697,35 @@ void RootDataConverter::extractUpperVertexMassStats(const std::string &weight_br std::string upper_py_expr; std::string upper_pz_expr; std::string upper_e_expr; - for (size_t i = 0; i < m_upper_vertex_indices.size(); ++i) { + for (size_t i = 0; i < m_upper_vertex_indices.size(); ++i) + { int idx = m_upper_vertex_indices[i]; std::string sep = (i == 0) ? "" : " + "; upper_px_expr += sep + "PxP" + std::to_string(idx); upper_py_expr += sep + "PyP" + std::to_string(idx); upper_pz_expr += sep + "PzP" + std::to_string(idx); - upper_e_expr += sep + "EnP" + std::to_string(idx); + upper_e_expr += sep + "EnP" + std::to_string(idx); } - try { + try + { // ---- DATA histogram via RDataFrame ---- ROOT::RDataFrame df_data(m_data_tree_name, m_data_files); auto df = df_data.Define("upper_px", upper_px_expr) - .Define("upper_py", upper_py_expr) - .Define("upper_pz", upper_pz_expr) - .Define("upper_E", upper_e_expr) - .Define("m_value", - [](double E, double px, double py, double pz) { - double p2 = px * px + py * py + pz * pz; - double m2 = E * E - p2; - if (m2 < 0 && m2 > -1e-12) m2 = 0.0; - return std::sqrt(std::max(0.0, m2)); - }, - {"upper_E", "upper_px", "upper_py", "upper_pz"}); + .Define("upper_py", upper_py_expr) + .Define("upper_pz", upper_pz_expr) + .Define("upper_E", upper_e_expr) + .Define("m_value", + [](double E, double px, double py, double pz) + { + double p2 = px * px + py * py + pz * pz; + double m2 = E * E - p2; + if (m2 < 0 && m2 > -1e-12) + m2 = 0.0; + return std::sqrt(std::max(0.0, m2)); + }, + {"upper_E", "upper_px", "upper_py", "upper_pz"}); // Determine min and max from data (unweighted) auto data_min_r = df.Min("m_value"); @@ -720,35 +739,42 @@ void RootDataConverter::extractUpperVertexMassStats(const std::string &weight_br // Build data histogram (if background exists, don't apply weights to data here) const int n_bins = 200; ROOT::RDF::RResultPtr h_data; - if (m_background_files_exist) { + if (m_background_files_exist) + { h_data = df.Histo1D({"h_m_data", "M (data)", n_bins, global_min, global_max}, "m_value"); } - else { + else + { // if no background files, use weight branch from data (if provided) - if (!weight_branch_name.empty() && weight_branch_name.find('.') == std::string::npos) { + if (!weight_branch_name.empty() && weight_branch_name.find('.') == std::string::npos) + { h_data = df.Histo1D({"h_m_data", "M (data)", n_bins, global_min, global_max}, "m_value", weight_branch_name); } - else { + else + { h_data = df.Histo1D({"h_m_data", "M (data)", n_bins, global_min, global_max}, "m_value"); } } // ---- BACKGROUND histogram via RDataFrame (if present) ---- - if (m_background_files_exist && !m_background_files.empty()) { + if (m_background_files_exist && !m_background_files.empty()) + { ROOT::RDF::RResultPtr h_bkg; ROOT::RDataFrame df_bg(m_background_tree_name, m_background_files); auto dfb = df_bg.Define("upper_px", upper_px_expr) - .Define("upper_py", upper_py_expr) - .Define("upper_pz", upper_pz_expr) - .Define("upper_E", upper_e_expr) - .Define("m_value", - [](double E, double px, double py, double pz) { - double p2 = px * px + py * py + pz * pz; - double m2 = E * E - p2; - if (m2 < 0 && m2 > -1e-12) m2 = 0.0; - return std::sqrt(std::max(0.0, m2)); - }, - {"upper_E", "upper_px", "upper_py", "upper_pz"}); + .Define("upper_py", upper_py_expr) + .Define("upper_pz", upper_pz_expr) + .Define("upper_E", upper_e_expr) + .Define("m_value", + [](double E, double px, double py, double pz) + { + double p2 = px * px + py * py + pz * pz; + double m2 = E * E - p2; + if (m2 < 0 && m2 > -1e-12) + m2 = 0.0; + return std::sqrt(std::max(0.0, m2)); + }, + {"upper_E", "upper_px", "upper_py", "upper_pz"}); // Expand global range to include background extremes auto bg_min_r = dfb.Min("m_value"); @@ -761,10 +787,12 @@ void RootDataConverter::extractUpperVertexMassStats(const std::string &weight_br // Recreate histograms with unified ranges h_data = df.Histo1D({"h_m_data", "M (data)", n_bins, global_min, global_max}, "m_value"); - if (!weight_branch_name.empty() && weight_branch_name.find('.') == std::string::npos) { + if (!weight_branch_name.empty() && weight_branch_name.find('.') == std::string::npos) + { h_bkg = dfb.Histo1D({"h_m_bkg", "M (bg)", n_bins, global_min, global_max}, "m_value", weight_branch_name); } - else { + else + { h_bkg = dfb.Histo1D({"h_m_bkg", "M (bg)", n_bins, global_min, global_max}, "m_value"); } @@ -792,7 +820,8 @@ void RootDataConverter::extractUpperVertexMassStats(const std::string &weight_br delete h_result; } - else { + else + { // No background files: final histogram is h_data double mean_m = h_data->GetMean(); double rms_m = h_data->GetRMS(); @@ -812,19 +841,19 @@ void RootDataConverter::extractUpperVertexMassStats(const std::string &weight_br report(DEBUG, kModule) << " RMS: " << rms_m << "\n"; } } - catch (const std::exception &e) { + catch (const std::exception &e) + { report(ERROR, kModule) << "RDataFrame error computing mass: " << e.what() << "\n"; assert(false); } } - void RootDataConverter::validateFiles(const std::vector &files, - const std::string &file_type) const + const std::string &file_type) const { if (files.empty() && file_type != "background") { - report(ERROR, kModule) << "No " + report(ERROR, kModule) << "No " << file_type << " files found in fit results for file: " << m_fit_file << "\n"; assert(false); @@ -843,8 +872,8 @@ void RootDataConverter::validateFiles(const std::vector &files, } std::pair RootDataConverter::findMinMaxOfBranch(const std::vector &files, - const std::string &tree_name, - const std::string &branch_name) const + const std::string &tree_name, + const std::string &branch_name) const { double global_min = std::numeric_limits::max(); double global_max = std::numeric_limits::lowest(); diff --git a/src/libraries/UTILITIES/RootDataConverter.h b/src/libraries/UTILITIES/RootDataConverter.h index 15e93c696..4c9170793 100644 --- a/src/libraries/UTILITIES/RootDataConverter.h +++ b/src/libraries/UTILITIES/RootDataConverter.h @@ -59,10 +59,8 @@ class RootDataConverter * * The 4-momenta indices are as follows: * - index 0 (B): beam photon - * - indices 1 to M: lower vertex particles, e.g. proton, pi+ that decay from Delta++ - * - indices M+1 to N: final state particles from upper vertex - * - * TODO: implement + * - lower_vertex_indices: lower vertex particles, e.g. proton, pi+ that decay from Delta++ + * - All leftover indices: final state particles from the upper vertex * * @param[in] filename path to the .fit file * @param[in] lower_vertex_indices 4-momenta indices for lower vertex particles @@ -80,14 +78,15 @@ class RootDataConverter * * The 4-momenta indices are as follows: * - index 0 (B): beam photon - * - indices 1 to M: lower vertex particles, e.g. proton, pi+ that decay from Delta++ - * - indices M+1 to P: isobar particles, e.g. pi+ pi- from rho - * - indices P+1 to N: remaining final state particles from upper vertex + * - lower_vertex_indices: lower vertex particles, e.g. proton, pi+ that decay from Delta++ + * - isobar_indices: isobar particles, e.g. pi+ pi- from rho + * - all indices not in lower_vertex_indices or 0: final state particles from upper vertex * - * Note that indices M+1 to N are still used to calculate the total mass values. - * - * TODO: implement. make sure no conflicts with lower vertex indices. Also this - * should add isobar mass and event counts to the output. + * @todo: implement. make sure no conflicts with lower vertex indices. Also this + * should add isobar mass and event counts to the output. Usually one is interested + * in the distribution though, rather than just the bin centers/edges like we are + * for the total binned mass. For now then, we will leave this as a future + * improvement until a use case arises that requires it. * * @param[in] filename path to the .fit file * @param[in] lower_vertex_indices 4-momenta indices for lower vertex particles @@ -143,7 +142,6 @@ class RootDataConverter void validateGenMCFiles() const { validateFiles(m_genMC_files, "genMC"); } void validateAccMCFiles() const { validateFiles(m_accMC_files, "accMC"); } - /** * @brief data tree name associated with the fit results * If one tree exists in the data files, that tree name is returned. Otherwise, @@ -207,9 +205,9 @@ class RootDataConverter * the beam photon, but the other indices are not guaranteed to be in any particular * order. The simplest case is a recoil proton at the lower vertex, which would just * be index 1. However, there can be multiple particles associated with the lower - * vertex like a Delta++ -> p + pi+ decay. - * - * This function allows the user to specify which indices correspond to the lower + * vertex like a Delta++ -> p + pi+ decay. + * + * This function allows the user to specify which indices correspond to the lower * vertex system. The remaining indices are assumed to be final state particles from * the upper vertex. This is important for correctly calculating the 4-momentum * transfer t and the upper vertex mass. @@ -225,7 +223,7 @@ class RootDataConverter /** * @brief Set indices that belong to the upper vertex final state particles - * + * * Any particle indices not in m_lower_vertex_indices that are also not 0 are thus * assumed to belong to the upper vertex. This saves all leftover indices to * m_upper_vertex_indices. This is important for correctly calculating the upper @@ -237,12 +235,11 @@ class RootDataConverter /** * @brief Get indices for the upper vertex particles - * + * * See setUpperVertexIndices for more details. */ std::vector getUpperVertexIndices() const { return m_upper_vertex_indices; } - /** * @brief Map of header names to their values */ @@ -273,7 +270,7 @@ class RootDataConverter const std::string m_genMC_tree_name; const std::string m_accMC_tree_name; - // TODO: variables we'll want to implement + // TODO: variables we'll want to implement // "file", // "t_low", // "t_high", @@ -295,7 +292,6 @@ class RootDataConverter // "ac_events", // "ac_events_err", // "efficiency", - std::map m_values; //< Map of headers to their values for the CSV output (except for the "file" header) @@ -308,14 +304,13 @@ class RootDataConverter */ std::vector findFiles(const std::string &file_type) const; - /** * @brief Confirm that files vector is non-empty, unless its background - * - * Generally will throw an error if no files are present for a given type. If + * + * Generally will throw an error if no files are present for a given type. If * file_type is "background", this can happen, but it warns the user that it assumes * any event weights are thus stored directly in the data trees. - * + * * @param files absolute paths of files associated with the fit results for a given type * @param file_type data, background, genMC, or accMC */ @@ -357,16 +352,16 @@ class RootDataConverter /** * @brief Extract -t 4-momentum transfer statistics from the trees - * - * Calculates min, max, mean, and RMS values from the momentum transfer between - * the at-rest proton and the lower-vertex recoil system. This is calculated as - * t = (P_proton - P_recoil)^2, where P_recoil is the sum of the 4-vectors of the + * + * Calculates min, max, mean, and RMS values from the momentum transfer between + * the at-rest proton and the lower-vertex recoil system. This is calculated as + * t = (P_proton - P_recoil)^2, where P_recoil is the sum of the 4-vectors of the * lower vertex particles (see setLowerVertexIndices) * Stores results in m_values with keys: "t_low", "t_high", "t_center", "t_avg", and * "t_rms" - * + * * @param weight_branch_name Name of the weight branch (if empty, weights assumed to be 1.0) - * + * * @todo The function currently has not been tested for the FSRootFriendTree scenario, * and so warns the user and assigns a weight of 1.0 for this case. */ @@ -374,30 +369,30 @@ class RootDataConverter /** * @brief Extract mass statistics for the upper vertex system from the trees - * + * * Calculates min, max, mean, and RMS values from the total upper-vertex mass * system, which is calculated as the invariant mass of the sum of the 4-vectors of - * the upper vertex particles (see setUpperVertexIndices). Stores the results in + * the upper vertex particles (see setUpperVertexIndices). Stores the results in * m_values with keys: "m_low", "m_high", "m_center", "m_avg", and "m_rms" - * + * * @param weight_branch_name Name of the weight branch (if empty, weights assumed to be 1.0) - * + * * @todo The function currently has not been tested for the FSRootFriendTree scenario, * and so warns the user and assigns a weight of 1.0 for this case. - */ + */ void extractUpperVertexMassStats(const std::string &weight_branch_name); /** * @brief Get the max/min values of a branch for a set of files - * + * * @param files set of ROOT files containing a common tree and branch name * @param tree_name ROOT tree containing the branch of interest * @param branch_name branch name we want to find the max/min values of * @return std::pair minimum and maximum values of the branch across all files */ std::pair findMinMaxOfBranch(const std::vector &files, - const std::string &tree_name, - const std::string &branch_name) const; + const std::string &tree_name, + const std::string &branch_name) const; static const char *kModule; }; diff --git a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc index 5f4bc59cc..43ed267c9 100644 --- a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc +++ b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc @@ -42,25 +42,25 @@ int main(int argc, char *argv[]) bool is_sorted = false; int sort_index = -1; bool is_acceptance_corrected = false; + std::vector lower_vertex_indices; bool create_data_file = false; bool create_covariance = false; bool create_correlation = false; bool verbose = false; bool preview = false; - std::string mass_branch = "M4Pi"; // TODO: change by building up from AmpTools 4-vectors. Instead option for lower vertex indices, and possibly isobar indices auto print_help = []() { std::cout << "Usage: convert_to_csv [-h] [-i INPUT_FILES] [-o OUTPUT_PATH]" - << " [-s] [--sort-index INDEX] [-a] [-d] [-m MASS_BRANCH] [-p] [-v]" - << " [--correlation] [--covariance] [-d]\n" + << " [-s] [--sort-index INDEX] [-a] [-d] [-l LOWER_VERTEX_INDICES] [-p] [-v]" + << " [--correlation] [--covariance] \n" << " -i INPUT_FILES:\t\tFull path to the .fit file(s)\n" << " -o OUTPUT_PATH:\t\tFull path to the output .csv file\n" << " -s, --sort:\t\t\tSort files by last number in the file name or path (default:true)\n" << " --sort-index INDEX:\t\tWhat number in file path to sort by (default:-1, so last number in path is used)\n" << " -a --acceptance-correct:\tAcceptance correct the intensities\n" << " -d --data-file:\t\tCreate separate data file\n" - << " -m --mass-branch:\t\tBranch name for final mass spectrum of interest (default:M4Pi)\n" + << " -l --lower-vertex-indices:\tSpecify indices of lower vertex particles in the data 4-vectors\n" << " -p --preview:\t\t\tPrint files to be processed, but don't run\n" << " -v --verbose:\t\t\tPrint information from converter scripts as they run\n" << " --correlation:\t\tSave correlation matrix to separate csv file\n" @@ -158,17 +158,21 @@ int main(int argc, char *argv[]) { preview = true; } - else if (arg == "-m" || arg == "--mass-branch") + else if (arg == "-l" || arg == "--lower-vertex-indices") { - if (i + 1 < args.size() && !is_flag(args[i + 1])) - { - mass_branch = std::string(args[++i]); - } - else + // collect all following arguments until next flag + while (i + 1 < args.size() && !is_flag(args[i + 1])) { - std::cerr << "Error: -m/--mass-branch requires a value. See help message.\n"; - print_help(); - return 1; + auto val = args[++i]; + int index; + auto [ptr, ec] = std::from_chars(val.data(), val.data() + val.size(), index); + if (ec != std::errc()) + { + std::cerr << "Error: -l/--lower-vertex-indices requires valid integers. See help message.\n"; + print_help(); + return 1; + } + lower_vertex_indices.push_back(index); } } else if (is_flag(arg)) @@ -234,8 +238,15 @@ int main(int argc, char *argv[]) report(INFO, kModule) << "Processing file: " << file << "\n"; FitConverter converter(file, is_acceptance_corrected, !verbose); if (create_data_file) - { - RootDataConverter data_converter(file, !verbose); + { + if (!lower_vertex_indices.empty()) + { + RootDataConverter data_converter(file, lower_vertex_indices, !verbose); + } + else + { + RootDataConverter data_converter(file, !verbose); + } } if (!header_written) From 2c63a7f58699730a5f4cb6992fc3e8c5974a7dda Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Wed, 6 May 2026 18:18:56 -0400 Subject: [PATCH 24/38] updated comment and added error check --- src/libraries/UTILITIES/RootDataConverter.cc | 6 ++++++ src/libraries/UTILITIES/RootDataConverter.h | 18 +++++++++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/libraries/UTILITIES/RootDataConverter.cc b/src/libraries/UTILITIES/RootDataConverter.cc index 58a328b6b..02c1fccf1 100644 --- a/src/libraries/UTILITIES/RootDataConverter.cc +++ b/src/libraries/UTILITIES/RootDataConverter.cc @@ -910,6 +910,12 @@ std::pair RootDataConverter::findMinMaxOfBranch(const std::vecto void RootDataConverter::setUpperVertexIndices() { + if (m_lower_vertex_indices.empty()) + { + report(ERROR, kModule) << "Lower vertex indices must be set before setting upper vertex indices\n"; + assert(false); + } + m_upper_vertex_indices.clear(); const std::vector particle_list = m_cfg_info->reaction(m_fit_results.reactionList()[0])->particleList(); diff --git a/src/libraries/UTILITIES/RootDataConverter.h b/src/libraries/UTILITIES/RootDataConverter.h index 4c9170793..0ef4685ec 100644 --- a/src/libraries/UTILITIES/RootDataConverter.h +++ b/src/libraries/UTILITIES/RootDataConverter.h @@ -4,6 +4,9 @@ * @brief Class for converting ROOT tree data associated with an AmpTools fit to any format * @date 2026-02-02 * + * @note that this currently is built to convert to CSV, but the class can be extended + * to convert to any other format by accessing the various containers that store + * the fit results. */ #ifndef DATA_CONVERTER_H @@ -25,12 +28,14 @@ * @class RootDataConverter * @brief Converts AmpTools fit result data to any format * - * This class extracts data information from a single AmpTools .fit file. It can then - * output the data in various formats, such as CSV. The extracted data includes: - * - Bin edges, centers, averages, and RMS values for t, beam energy, and mass - * histograms + * This class is designed to take a single AmpTools .fit file, extract the associated + * data files and trees used in the fit, and then extract the relevant data from those + * trees to be converted into any desired output format (e.g. CSV). The extracted data + * includes: + * - Bin edges, centers, averages, and RMS values for t, beam energy, and upper vertex + * mass histograms * - Total number of events and errors for data and acceptance-corrected data - * - Efficiency calculation based on genMC and accMC event counts + * - Efficiency calculation based on generated and accepted MC event counts * * Example usage in convert_to_csv.cc: * @code @@ -101,6 +106,9 @@ class RootDataConverter /** * @brief Extract fit data from the .fit file + * + * Fills the m_values map with all the relevant information from the fit results and + * associated files that we want to save to csv later. */ void extract(); From c6754e82be18a235dba6784d474fea83930ef934 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Wed, 6 May 2026 19:04:18 -0400 Subject: [PATCH 25/38] Changed extractors to return the histograms Having the functions return the created histogram makes it: 1. Easier to understand the purpose of the function, and doesn't hide the map filling in the implementation 2. Allows for possibility of printing the hist for debugging purposes Also added a function to return the total number of events and its error --- src/libraries/UTILITIES/RootDataConverter.cc | 184 +++++++++---------- src/libraries/UTILITIES/RootDataConverter.h | 74 ++++---- 2 files changed, 126 insertions(+), 132 deletions(-) diff --git a/src/libraries/UTILITIES/RootDataConverter.cc b/src/libraries/UTILITIES/RootDataConverter.cc index 02c1fccf1..e02a60222 100644 --- a/src/libraries/UTILITIES/RootDataConverter.cc +++ b/src/libraries/UTILITIES/RootDataConverter.cc @@ -102,10 +102,58 @@ void RootDataConverter::extract() } // Extract min, max, mean, and RMS of the beam energy, -t, and mass of the upper - // vertex system. Background subtraction and proper weighting are all included. - extractBeamEnergyStats(weight_branch_name); - extractFourMomentumTransferStats(weight_branch_name); - extractUpperVertexMassStats(weight_branch_name); + // vertex system. Background subtraction and proper weighting are all included. + TH1D* h_beam_energy = beamEnergyHist(weight_branch_name); + m_values["e_low"] = h_beam_energy->GetXaxis()->GetXmin(); + m_values["e_high"] = h_beam_energy->GetXaxis()->GetXmax(); + m_values["e_center"] = 0.5 * (m_values["e_low"] + m_values["e_high"]); + m_values["e_avg"] = h_beam_energy->GetMean(); + m_values["e_rms"] = h_beam_energy->GetRMS(); + + report(DEBUG, kModule) << "Beam energy statistics:\n"; + report(DEBUG, kModule) << " Min: " << m_values["e_low"] << "\n"; + report(DEBUG, kModule) << " Max: " << m_values["e_high"] << "\n"; + report(DEBUG, kModule) << " Center: " << m_values["e_center"] << "\n"; + report(DEBUG, kModule) << " Mean: " << m_values["e_avg"] << "\n"; + report(DEBUG, kModule) << " RMS: " << m_values["e_rms"] << "\n"; + + TH1D* h_four_momentum_transfer = tHist(weight_branch_name); + m_values["t_low"] = h_four_momentum_transfer->GetXaxis()->GetXmin(); + m_values["t_high"] = h_four_momentum_transfer->GetXaxis()->GetXmax(); + m_values["t_center"] = 0.5 * (m_values["t_low"] + m_values["t_high"]); + m_values["t_avg"] = h_four_momentum_transfer->GetMean(); + m_values["t_rms"] = h_four_momentum_transfer->GetRMS(); + + report(DEBUG, kModule) << "-t statistics:" << "\n"; + report(DEBUG, kModule) << " Min: " << m_values["t_low"] << "\n"; + report(DEBUG, kModule) << " Max: " << m_values["t_high"] << "\n"; + report(DEBUG, kModule) << " Center: " << m_values["t_center"] << "\n"; + report(DEBUG, kModule) << " Mean: " << m_values["t_avg"] << "\n"; + report(DEBUG, kModule) << " RMS: " << m_values["t_rms"] << "\n"; + + TH1D* h_upper_vertex_mass = massHist(weight_branch_name); + m_values["m_low"] = h_upper_vertex_mass->GetXaxis()->GetXmin(); + m_values["m_high"] = h_upper_vertex_mass->GetXaxis()->GetXmax(); + m_values["m_center"] = 0.5 * (m_values["m_low"] + m_values["m_high"]); + m_values["m_avg"] = h_upper_vertex_mass->GetMean(); + m_values["m_rms"] = h_upper_vertex_mass->GetRMS(); + + report(DEBUG, kModule) << "Mass statistics:" << "\n"; + report(DEBUG, kModule) << " Min: " << m_values["m_low"] << "\n"; + report(DEBUG, kModule) << " Max: " << m_values["m_high"] << "\n"; + report(DEBUG, kModule) << " Center: " << m_values["m_center"] << "\n"; + report(DEBUG, kModule) << " Mean: " << m_values["m_avg"] << "\n"; + report(DEBUG, kModule) << " RMS: " << m_values["m_rms"] << "\n"; + + // Its not particularly important which histogram is used to calculate the number + // of events, as they all use the same weights. + double events, events_err; + std::tie(events, events_err) = numberOfEvents(h_beam_energy); + m_values["events"] = events; + m_values["events_err"] = events_err; + + report(DEBUG, kModule) << "Total number of events: " << m_values["events"] + << " +/- " << m_values["events_err"] << "\n"; } std::vector RootDataConverter::findFiles(const std::string &file_type) const @@ -358,7 +406,7 @@ std::string RootDataConverter::weightBranchName( } } -void RootDataConverter::extractBeamEnergyStats(const std::string &weight_branch_name) +TH1D* RootDataConverter::beamEnergyHist(const std::string &weight_branch_name) { // we will add all the data (and possible) weighted background energy histograms // together to get overall beam energy stats @@ -484,28 +532,10 @@ void RootDataConverter::extractBeamEnergyStats(const std::string &weight_branch_ f->Close(); } - const double center_energy = 0.5 * (min + max); - const double mean_energy = h_energy_total->GetMean(); - const double rms_energy = h_energy_total->GetRMS(); - - // Store values in the map - m_values["e_low"] = min; - m_values["e_high"] = max; - m_values["e_mid"] = center_energy; - m_values["e_avg"] = mean_energy; - m_values["e_rms"] = rms_energy; - - delete h_energy_total; - - report(DEBUG, kModule) << "Beam energy statistics:\n"; - report(DEBUG, kModule) << " Min: " << min << "\n"; - report(DEBUG, kModule) << " Max: " << max << "\n"; - report(DEBUG, kModule) << " Center: " << center_energy << "\n"; - report(DEBUG, kModule) << " Mean: " << mean_energy << "\n"; - report(DEBUG, kModule) << " RMS: " << rms_energy << "\n"; + return h_energy_total; } -void RootDataConverter::extractFourMomentumTransferStats(const std::string &weight_branch_name) +TH1D* RootDataConverter::tHist(const std::string &weight_branch_name) { // target proton mass (GeV) const double m_proton = 0.938; @@ -532,6 +562,7 @@ void RootDataConverter::extractFourMomentumTransferStats(const std::string &weig } // ---- DATA histogram via RDataFrame ---- + TH1D *h_result = nullptr; try { ROOT::RDataFrame df_data(m_data_tree_name, m_data_files); @@ -633,48 +664,15 @@ void RootDataConverter::extractFourMomentumTransferStats(const std::string &weig } // Subtract background from a standalone clone so RResultPtr ownership stays intact. - TH1D *h_result = (TH1D *)h_data->Clone("h_t_result"); + h_result = (TH1D *)h_data->Clone("h_t_result"); h_result->SetDirectory(nullptr); h_result->Add(h_bkg.GetPtr(), -1.0); - - double mean_t = h_result->GetMean(); - double rms_t = h_result->GetRMS(); - double center_t = 0.5 * (global_min + global_max); - - m_values["t_low"] = global_min; - m_values["t_high"] = global_max; - m_values["t_mid"] = center_t; - m_values["t_avg"] = mean_t; - m_values["t_rms"] = rms_t; - - report(DEBUG, kModule) << "-t statistics (data - background):" << "\n"; - report(DEBUG, kModule) << " Min: " << global_min << "\n"; - report(DEBUG, kModule) << " Max: " << global_max << "\n"; - report(DEBUG, kModule) << " Center: " << center_t << "\n"; - report(DEBUG, kModule) << " Mean: " << mean_t << "\n"; - report(DEBUG, kModule) << " RMS: " << rms_t << "\n"; - - delete h_result; } else { - // No background files: final histogram is h_data - double mean_t = h_data->GetMean(); - double rms_t = h_data->GetRMS(); - double center_t = 0.5 * (global_min + global_max); - - m_values["t_low"] = global_min; - m_values["t_high"] = global_max; - m_values["t_mid"] = center_t; - m_values["t_avg"] = mean_t; - m_values["t_rms"] = rms_t; - - report(DEBUG, kModule) << "-t statistics (data):\n"; - report(DEBUG, kModule) << " Min: " << global_min << "\n"; - report(DEBUG, kModule) << " Max: " << global_max << "\n"; - report(DEBUG, kModule) << " Center: " << center_t << "\n"; - report(DEBUG, kModule) << " Mean: " << mean_t << "\n"; - report(DEBUG, kModule) << " RMS: " << rms_t << "\n"; + // set h_result for consistent return variable + h_result = (TH1D *)h_data->Clone("h_t_result"); + h_result->SetDirectory(nullptr); } } catch (const std::exception &e) @@ -682,9 +680,11 @@ void RootDataConverter::extractFourMomentumTransferStats(const std::string &weig report(ERROR, kModule) << "RDataFrame error computing -t: " << e.what() << "\n"; assert(false); } + + return h_result; } -void RootDataConverter::extractUpperVertexMassStats(const std::string &weight_branch_name) +TH1D* RootDataConverter::massHist(const std::string &weight_branch_name) { if (m_upper_vertex_indices.empty()) { @@ -707,6 +707,7 @@ void RootDataConverter::extractUpperVertexMassStats(const std::string &weight_br upper_e_expr += sep + "EnP" + std::to_string(idx); } + TH1D *h_result = nullptr; try { // ---- DATA histogram via RDataFrame ---- @@ -797,48 +798,15 @@ void RootDataConverter::extractUpperVertexMassStats(const std::string &weight_br } // Subtract background from a standalone clone so RResultPtr ownership stays intact. - TH1D *h_result = (TH1D *)h_data->Clone("h_m_result"); + h_result = (TH1D *)h_data->Clone("h_m_result"); h_result->SetDirectory(nullptr); h_result->Add(h_bkg.GetPtr(), -1.0); - - double mean_m = h_result->GetMean(); - double rms_m = h_result->GetRMS(); - double center_m = 0.5 * (global_min + global_max); - - m_values["m_low"] = global_min; - m_values["m_high"] = global_max; - m_values["m_mid"] = center_m; - m_values["m_avg"] = mean_m; - m_values["m_rms"] = rms_m; - - report(DEBUG, kModule) << "M statistics (data - background):" << "\n"; - report(DEBUG, kModule) << " Min: " << global_min << "\n"; - report(DEBUG, kModule) << " Max: " << global_max << "\n"; - report(DEBUG, kModule) << " Center: " << center_m << "\n"; - report(DEBUG, kModule) << " Mean: " << mean_m << "\n"; - report(DEBUG, kModule) << " RMS: " << rms_m << "\n"; - - delete h_result; } else { - // No background files: final histogram is h_data - double mean_m = h_data->GetMean(); - double rms_m = h_data->GetRMS(); - double center_m = 0.5 * (global_min + global_max); - - m_values["m_low"] = global_min; - m_values["m_high"] = global_max; - m_values["m_mid"] = center_m; - m_values["m_avg"] = mean_m; - m_values["m_rms"] = rms_m; - - report(DEBUG, kModule) << "M statistics (data):\n"; - report(DEBUG, kModule) << " Min: " << global_min << "\n"; - report(DEBUG, kModule) << " Max: " << global_max << "\n"; - report(DEBUG, kModule) << " Center: " << center_m << "\n"; - report(DEBUG, kModule) << " Mean: " << mean_m << "\n"; - report(DEBUG, kModule) << " RMS: " << rms_m << "\n"; + // set h_result for consistent return variable + h_result = (TH1D *)h_data->Clone("h_m_result"); + h_result->SetDirectory(nullptr); } } catch (const std::exception &e) @@ -846,6 +814,22 @@ void RootDataConverter::extractUpperVertexMassStats(const std::string &weight_br report(ERROR, kModule) << "RDataFrame error computing mass: " << e.what() << "\n"; assert(false); } + + return h_result; +} + +std::pair RootDataConverter::numberOfEvents(TH1D* hist) +{ + if (!hist) + { + report(ERROR, kModule) << "Null histogram pointer passed to extractNumberOfEvents\n"; + assert(false); + } + + double n_events_err; + double n_events = hist->IntegralAndError(0, hist->GetNbinsX() + 1, n_events_err); + + return {n_events, n_events_err}; } void RootDataConverter::validateFiles(const std::vector &files, diff --git a/src/libraries/UTILITIES/RootDataConverter.h b/src/libraries/UTILITIES/RootDataConverter.h index 0ef4685ec..6fcd3f88e 100644 --- a/src/libraries/UTILITIES/RootDataConverter.h +++ b/src/libraries/UTILITIES/RootDataConverter.h @@ -280,23 +280,23 @@ class RootDataConverter // TODO: variables we'll want to implement // "file", - // "t_low", - // "t_high", - // "t_center", - // "t_avg", - // "t_rms", + // "t_low", DONE + // "t_high", DONE + // "t_center", DONE + // "t_avg", DONE + // "t_rms", DONE // "e_low", DONE // "e_high", DONE // "e_center", DONE // "e_avg", DONE // "e_rms", DONE - // "m_low", - // "m_high", - // "m_center", - // "m_avg", - // "m_rms", - // "events", - // "events_err", + // "m_low", DONE + // "m_high", DONE + // "m_center", DONE + // "m_avg", DONE + // "m_rms", DONE + // "events", DONE + // "events_err", DONE // "ac_events", // "ac_events_err", // "efficiency", @@ -348,47 +348,57 @@ class RootDataConverter std::vector treesInFile(const std::string &filename) const; /** - * @brief Extract beam energy statistics from the trees - * - * Calculates min, max, mean, and RMS values for the "EnPB" branch from the data - * file. Stores results in m_values with keys: "e_low", "e_high", "e_center", - * "e_avg", "e_rms" + * @brief Get the histogram of the beam energy distribution * * @param[in] weight_branch_name Name of the weight branch (if empty, weights assumed to be 1.0) + * + * @return TH1D* pointer to the beam energy histogram (background subtracted if background files exist) */ - void extractBeamEnergyStats(const std::string &weight_branch_name); + TH1D* beamEnergyHist(const std::string &weight_branch_name); /** - * @brief Extract -t 4-momentum transfer statistics from the trees + * @brief Get the -t 4-momentum transfer histogram * - * Calculates min, max, mean, and RMS values from the momentum transfer between - * the at-rest proton and the lower-vertex recoil system. This is calculated as - * t = (P_proton - P_recoil)^2, where P_recoil is the sum of the 4-vectors of the - * lower vertex particles (see setLowerVertexIndices) - * Stores results in m_values with keys: "t_low", "t_high", "t_center", "t_avg", and - * "t_rms" + * This is calculated as t = (P_proton - P_recoil)^2, where P_recoil is the sum of + * the 4-vectors of the lower vertex particles (see setLowerVertexIndices) * * @param weight_branch_name Name of the weight branch (if empty, weights assumed to be 1.0) + * + * @return TH1D* pointer to the -t histogram (background subtracted if background files exist) * * @todo The function currently has not been tested for the FSRootFriendTree scenario, * and so warns the user and assigns a weight of 1.0 for this case. */ - void extractFourMomentumTransferStats(const std::string &weight_branch_name); + TH1D* tHist(const std::string &weight_branch_name); /** - * @brief Extract mass statistics for the upper vertex system from the trees + * @brief Get the mass histogram for the upper vertex system * - * Calculates min, max, mean, and RMS values from the total upper-vertex mass - * system, which is calculated as the invariant mass of the sum of the 4-vectors of - * the upper vertex particles (see setUpperVertexIndices). Stores the results in - * m_values with keys: "m_low", "m_high", "m_center", "m_avg", and "m_rms" + * Calculated as the invariant mass of the sum of the 4-vectors of the upper vertex + * particles (see setUpperVertexIndices). * * @param weight_branch_name Name of the weight branch (if empty, weights assumed to be 1.0) + * + * @return TH1D* pointer to the upper vertex mass histogram (background subtracted if background files exist) * * @todo The function currently has not been tested for the FSRootFriendTree scenario, * and so warns the user and assigns a weight of 1.0 for this case. */ - void extractUpperVertexMassStats(const std::string &weight_branch_name); + TH1D* massHist(const std::string &weight_branch_name); + + /** + * @brief Extract the total number of events and associated error from a given histogram + * + * Use one of the weighted histograms from the beam energy, -t, or mass calculations + * to extract the total number of events and associated error using the + * IntegralAndError method. + * + * @param hist a pointer to a TH1D histogram from which to extract the total number of events and error + * + * @return std::pair where the first element is the total number of + * events and the second element is the associated error + */ + std::pair numberOfEvents(TH1D *hist); /** * @brief Get the max/min values of a branch for a set of files From 5d4c8b7eacd2035dda37bec2503592d8bcfe704b Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Wed, 6 May 2026 20:30:15 -0400 Subject: [PATCH 26/38] Added efficiency & acc-correction + formatting --- src/libraries/UTILITIES/RootDataConverter.cc | 92 +++++++++++++++++--- src/libraries/UTILITIES/RootDataConverter.h | 45 +++++++--- 2 files changed, 114 insertions(+), 23 deletions(-) diff --git a/src/libraries/UTILITIES/RootDataConverter.cc b/src/libraries/UTILITIES/RootDataConverter.cc index e02a60222..6f87855ae 100644 --- a/src/libraries/UTILITIES/RootDataConverter.cc +++ b/src/libraries/UTILITIES/RootDataConverter.cc @@ -102,8 +102,8 @@ void RootDataConverter::extract() } // Extract min, max, mean, and RMS of the beam energy, -t, and mass of the upper - // vertex system. Background subtraction and proper weighting are all included. - TH1D* h_beam_energy = beamEnergyHist(weight_branch_name); + // vertex system. Background subtraction and proper weighting are all included. + TH1D *h_beam_energy = beamEnergyHist(weight_branch_name); m_values["e_low"] = h_beam_energy->GetXaxis()->GetXmin(); m_values["e_high"] = h_beam_energy->GetXaxis()->GetXmax(); m_values["e_center"] = 0.5 * (m_values["e_low"] + m_values["e_high"]); @@ -117,7 +117,7 @@ void RootDataConverter::extract() report(DEBUG, kModule) << " Mean: " << m_values["e_avg"] << "\n"; report(DEBUG, kModule) << " RMS: " << m_values["e_rms"] << "\n"; - TH1D* h_four_momentum_transfer = tHist(weight_branch_name); + TH1D *h_four_momentum_transfer = tHist(weight_branch_name); m_values["t_low"] = h_four_momentum_transfer->GetXaxis()->GetXmin(); m_values["t_high"] = h_four_momentum_transfer->GetXaxis()->GetXmax(); m_values["t_center"] = 0.5 * (m_values["t_low"] + m_values["t_high"]); @@ -131,7 +131,7 @@ void RootDataConverter::extract() report(DEBUG, kModule) << " Mean: " << m_values["t_avg"] << "\n"; report(DEBUG, kModule) << " RMS: " << m_values["t_rms"] << "\n"; - TH1D* h_upper_vertex_mass = massHist(weight_branch_name); + TH1D *h_upper_vertex_mass = massHist(weight_branch_name); m_values["m_low"] = h_upper_vertex_mass->GetXaxis()->GetXmin(); m_values["m_high"] = h_upper_vertex_mass->GetXaxis()->GetXmax(); m_values["m_center"] = 0.5 * (m_values["m_low"] + m_values["m_high"]); @@ -144,7 +144,7 @@ void RootDataConverter::extract() report(DEBUG, kModule) << " Center: " << m_values["m_center"] << "\n"; report(DEBUG, kModule) << " Mean: " << m_values["m_avg"] << "\n"; report(DEBUG, kModule) << " RMS: " << m_values["m_rms"] << "\n"; - + // Its not particularly important which histogram is used to calculate the number // of events, as they all use the same weights. double events, events_err; @@ -152,8 +152,19 @@ void RootDataConverter::extract() m_values["events"] = events; m_values["events_err"] = events_err; - report(DEBUG, kModule) << "Total number of events: " << m_values["events"] - << " +/- " << m_values["events_err"] << "\n"; + report(DEBUG, kModule) << "Total number of events: " << m_values["events"] + << " +/- " << m_values["events_err"] << "\n"; + + // Calculate efficiency and acceptance-corrected number of events + double eff = efficiency(); + m_values["efficiency"] = eff; + report(DEBUG, kModule) << " Efficiency: " << eff << "\n"; + + double ac_events, ac_events_err; + std::tie(ac_events, ac_events_err) = acceptanceCorrectedEvents(events, events_err, eff); + m_values["ac_events"] = ac_events; + m_values["ac_events_err"] = ac_events_err; + report(DEBUG, kModule) << "Acceptance corrected events: " << ac_events << " +/- " << ac_events_err << "\n"; } std::vector RootDataConverter::findFiles(const std::string &file_type) const @@ -406,7 +417,7 @@ std::string RootDataConverter::weightBranchName( } } -TH1D* RootDataConverter::beamEnergyHist(const std::string &weight_branch_name) +TH1D *RootDataConverter::beamEnergyHist(const std::string &weight_branch_name) { // we will add all the data (and possible) weighted background energy histograms // together to get overall beam energy stats @@ -535,7 +546,7 @@ TH1D* RootDataConverter::beamEnergyHist(const std::string &weight_branch_name) return h_energy_total; } -TH1D* RootDataConverter::tHist(const std::string &weight_branch_name) +TH1D *RootDataConverter::tHist(const std::string &weight_branch_name) { // target proton mass (GeV) const double m_proton = 0.938; @@ -684,7 +695,7 @@ TH1D* RootDataConverter::tHist(const std::string &weight_branch_name) return h_result; } -TH1D* RootDataConverter::massHist(const std::string &weight_branch_name) +TH1D *RootDataConverter::massHist(const std::string &weight_branch_name) { if (m_upper_vertex_indices.empty()) { @@ -818,7 +829,7 @@ TH1D* RootDataConverter::massHist(const std::string &weight_branch_name) return h_result; } -std::pair RootDataConverter::numberOfEvents(TH1D* hist) +std::pair RootDataConverter::numberOfEvents(TH1D *hist) { if (!hist) { @@ -832,6 +843,65 @@ std::pair RootDataConverter::numberOfEvents(TH1D* hist) return {n_events, n_events_err}; } +double RootDataConverter::efficiency() +{ + // Count events in genMC and accMC files + // Since MC events are not weighted, we just count them directly + + if (m_genMC_files.empty() || m_accMC_files.empty()) + { + report(WARNING, kModule) << "GenMC or AccMC files not found; cannot compute efficiency\n"; + return 0.0; + } + + try + { + // Count generated events + ROOT::RDataFrame df_gen(m_genMC_tree_name, m_genMC_files); + auto gen_count_r = df_gen.Count(); + double gen_events = *gen_count_r; + + // Count accepted events + ROOT::RDataFrame df_acc(m_accMC_tree_name, m_accMC_files); + auto acc_count_r = df_acc.Count(); + double acc_events = *acc_count_r; + + if (gen_events == 0) + { + report(WARNING, kModule) << "No generated events found\n"; + return 0.0; + } + + report(DEBUG, kModule) << "Efficiency calculation:\n"; + report(DEBUG, kModule) << " Generated events: " << gen_events << "\n"; + report(DEBUG, kModule) << " Accepted events: " << acc_events << "\n"; + + return acc_events / gen_events; + } + catch (const std::exception &e) + { + report(ERROR, kModule) << "RDataFrame error computing efficiency: " << e.what() << "\n"; + return 0.0; + } +} + +std::pair RootDataConverter::acceptanceCorrectedEvents( + double events, + double events_err, + double efficiency) const +{ + if (efficiency <= 0.0 || efficiency > 1.0) + { + report(WARNING, kModule) << "Efficiency value out of bounds (0 < eff <= 1): " << efficiency << "\n"; + return {0.0, 0.0}; + } + + double corrected_events = events / efficiency; + double corrected_events_err = events_err / efficiency; + + return {corrected_events, corrected_events_err}; +} + void RootDataConverter::validateFiles(const std::vector &files, const std::string &file_type) const { diff --git a/src/libraries/UTILITIES/RootDataConverter.h b/src/libraries/UTILITIES/RootDataConverter.h index 6fcd3f88e..b56e79713 100644 --- a/src/libraries/UTILITIES/RootDataConverter.h +++ b/src/libraries/UTILITIES/RootDataConverter.h @@ -106,7 +106,7 @@ class RootDataConverter /** * @brief Extract fit data from the .fit file - * + * * Fills the m_values map with all the relevant information from the fit results and * associated files that we want to save to csv later. */ @@ -351,10 +351,10 @@ class RootDataConverter * @brief Get the histogram of the beam energy distribution * * @param[in] weight_branch_name Name of the weight branch (if empty, weights assumed to be 1.0) - * + * * @return TH1D* pointer to the beam energy histogram (background subtracted if background files exist) */ - TH1D* beamEnergyHist(const std::string &weight_branch_name); + TH1D *beamEnergyHist(const std::string &weight_branch_name); /** * @brief Get the -t 4-momentum transfer histogram @@ -363,13 +363,13 @@ class RootDataConverter * the 4-vectors of the lower vertex particles (see setLowerVertexIndices) * * @param weight_branch_name Name of the weight branch (if empty, weights assumed to be 1.0) - * + * * @return TH1D* pointer to the -t histogram (background subtracted if background files exist) * * @todo The function currently has not been tested for the FSRootFriendTree scenario, * and so warns the user and assigns a weight of 1.0 for this case. */ - TH1D* tHist(const std::string &weight_branch_name); + TH1D *tHist(const std::string &weight_branch_name); /** * @brief Get the mass histogram for the upper vertex system @@ -378,28 +378,49 @@ class RootDataConverter * particles (see setUpperVertexIndices). * * @param weight_branch_name Name of the weight branch (if empty, weights assumed to be 1.0) - * + * * @return TH1D* pointer to the upper vertex mass histogram (background subtracted if background files exist) * * @todo The function currently has not been tested for the FSRootFriendTree scenario, * and so warns the user and assigns a weight of 1.0 for this case. */ - TH1D* massHist(const std::string &weight_branch_name); + TH1D *massHist(const std::string &weight_branch_name); /** * @brief Extract the total number of events and associated error from a given histogram - * + * * Use one of the weighted histograms from the beam energy, -t, or mass calculations - * to extract the total number of events and associated error using the + * to extract the total number of events and associated error using the * IntegralAndError method. - * + * * @param hist a pointer to a TH1D histogram from which to extract the total number of events and error - * - * @return std::pair where the first element is the total number of + * + * @return std::pair where the first element is the total number of * events and the second element is the associated error */ std::pair numberOfEvents(TH1D *hist); + /** + * @brief Calculate the efficiency based on the number of generated and accepted MC events + * + * It's assumed this is a binned fit, so we are not interested in the shape of the + * acceptance within the bin, just the efficiency value. + * + * @return double accepted / generated number of events + */ + double efficiency(); + + /** + * @brief Calculate the acceptance-corrected number of events and associated error + * + * @param events total number of events. See numberOfEvents. + * @param events_err error on total number of events. See numberOfEvents. + * @param efficiency acceptance efficiency. See efficiency(). + * @return std::pair acceptance-corrected number of events and + * associated error + */ + std::pair acceptanceCorrectedEvents(double events, double events_err, double efficiency) const; + /** * @brief Get the max/min values of a branch for a set of files * From 1de878c2f304058866f8d525b5ca0d3f0b8857ec Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Wed, 6 May 2026 20:43:23 -0400 Subject: [PATCH 27/38] Added CSV methods to data converter --- src/libraries/UTILITIES/RootDataConverter.cc | 21 ++++++ src/libraries/UTILITIES/RootDataConverter.h | 66 ++++++++++++------- .../convert_to_csv/convert_to_csv.cc | 31 +++++++-- 3 files changed, 88 insertions(+), 30 deletions(-) diff --git a/src/libraries/UTILITIES/RootDataConverter.cc b/src/libraries/UTILITIES/RootDataConverter.cc index 6f87855ae..f27d9c4cc 100644 --- a/src/libraries/UTILITIES/RootDataConverter.cc +++ b/src/libraries/UTILITIES/RootDataConverter.cc @@ -167,6 +167,27 @@ void RootDataConverter::extract() report(DEBUG, kModule) << "Acceptance corrected events: " << ac_events << " +/- " << ac_events_err << "\n"; } +std::string RootDataConverter::getCSVHeader() const +{ + std::string header = "file"; + for (const auto &pair : m_values) + { + header += "," + pair.first; + } + return header; +} + +std::string RootDataConverter::getCSVRow() const +{ + std::string row = m_fit_file; + for (const auto &pair : m_values) + { + row += "," + std::to_string(pair.second); + } + return row; +} + + std::vector RootDataConverter::findFiles(const std::string &file_type) const { std::vector files; diff --git a/src/libraries/UTILITIES/RootDataConverter.h b/src/libraries/UTILITIES/RootDataConverter.h index b56e79713..57bb1e59d 100644 --- a/src/libraries/UTILITIES/RootDataConverter.h +++ b/src/libraries/UTILITIES/RootDataConverter.h @@ -112,6 +112,30 @@ class RootDataConverter */ void extract(); + /** + * @brief Get the CSV header row + * + * Constructs a comma-separated header string based on the keys of the m_values map. + * + * @note It is crucial that the order of entries in the header matches the order of + * values in getCSVRow(). + * + * @return std::string Comma-separated header string + */ + std::string getCSVHeader() const; + + /** + * @brief Get the CSV row + * + * Constructs a comma-separated row string based on the values of the m_values map. + * + * @note It is crucial that the order of values in this row matches the order of + * entries in getCSVHeader(). + * + * @return std::string Comma-separated row string + */ + std::string getCSVRow() const; + /** * @brief Data files associated with the fit results * @note It assumes one data file per reaction, and that it is the first argument @@ -144,10 +168,27 @@ class RootDataConverter */ std::vector accMCFiles() const { return findFiles("accMC"); }; - // TODO: write these docstrings + /** + * @brief Validate that data files were found + */ + void validateDataFiles() const { validateFiles(m_data_files, "data"); } + /** + * @brief Validate that background files were found + * + * If no background files are found, it will warn the user and assume that weights + * are stored directly in the data tree. + */ void validateBackgroundFiles() const { validateFiles(m_background_files, "background"); } + + /** + * @brief Validate that generated Monte Carlo files were found + */ void validateGenMCFiles() const { validateFiles(m_genMC_files, "genMC"); } + + /** + * @brief Validate that accepted Monte Carlo files were found + */ void validateAccMCFiles() const { validateFiles(m_accMC_files, "accMC"); } /** @@ -278,29 +319,6 @@ class RootDataConverter const std::string m_genMC_tree_name; const std::string m_accMC_tree_name; - // TODO: variables we'll want to implement - // "file", - // "t_low", DONE - // "t_high", DONE - // "t_center", DONE - // "t_avg", DONE - // "t_rms", DONE - // "e_low", DONE - // "e_high", DONE - // "e_center", DONE - // "e_avg", DONE - // "e_rms", DONE - // "m_low", DONE - // "m_high", DONE - // "m_center", DONE - // "m_avg", DONE - // "m_rms", DONE - // "events", DONE - // "events_err", DONE - // "ac_events", - // "ac_events_err", - // "efficiency", - std::map m_values; //< Map of headers to their values for the CSV output (except for the "file" header) /** diff --git a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc index 43ed267c9..455900ceb 100644 --- a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc +++ b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -237,15 +238,16 @@ int main(int argc, char *argv[]) if (verbose) report(INFO, kModule) << "Processing file: " << file << "\n"; FitConverter converter(file, is_acceptance_corrected, !verbose); + std::unique_ptr data_converter; if (create_data_file) - { + { if (!lower_vertex_indices.empty()) { - RootDataConverter data_converter(file, lower_vertex_indices, !verbose); + data_converter = std::make_unique(file, lower_vertex_indices, !verbose); } else { - RootDataConverter data_converter(file, !verbose); + data_converter = std::make_unique(file, !verbose); } } @@ -272,7 +274,9 @@ int main(int argc, char *argv[]) if (create_data_file) { - // TODO: implement data header check + std::string data_header = data_converter->getCSVHeader(); + csv_root_data << data_header << "\n"; + first_file_data_header = data_header; } } else @@ -313,7 +317,14 @@ int main(int argc, char *argv[]) } if (create_data_file) { - // TODO: implement data header check + std::string data_header = data_converter->getCSVHeader(); + if (data_header != first_file_data_header) + { + report(ERROR, kModule) << "File " + << file + << " has a different data CSV header than the first file. Aborting\n"; + return 1; + } } } csv_result_data << converter.getCSVRow() << "\n"; @@ -323,7 +334,7 @@ int main(int argc, char *argv[]) if (create_correlation) csv_corr_data << converter.getCSVCorrelationMatrix() << "\n"; if (create_data_file) - ;//csv_root_data << data_converter.getCSVRow() << "\n"; // TODO: implement getCSVData in RootDataConverter to extract relevant data for csv output + csv_root_data << data_converter->getCSVRow() << "\n"; } // write the csv data to the output file @@ -350,6 +361,14 @@ int main(int argc, char *argv[]) corr_file << csv_corr_data.str(); corr_file.close(); } + if (create_data_file) + { + std::filesystem::path output_path(output_file); + std::string data_file = (output_path.parent_path() / (output_path.stem().string() + "_data.csv")).string(); + std::ofstream data_out_file(data_file); + data_out_file << csv_root_data.str(); + data_out_file.close(); + } return 0; } From d1c86962a1af39221d5bf0fa367cf90c49f18aee Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Wed, 6 May 2026 20:53:54 -0400 Subject: [PATCH 28/38] added usage example --- src/libraries/UTILITIES/RootDataConverter.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/libraries/UTILITIES/RootDataConverter.h b/src/libraries/UTILITIES/RootDataConverter.h index 57bb1e59d..e078af9b6 100644 --- a/src/libraries/UTILITIES/RootDataConverter.h +++ b/src/libraries/UTILITIES/RootDataConverter.h @@ -39,7 +39,22 @@ * * Example usage in convert_to_csv.cc: * @code - * TODO: complete example + * std::ofstream csv_file(output_file); + * std::stringstream csv_data; + * bool header_written = false; + * + * for (const auto& file : input_files) { + * RootDataConverter converter(file); + * + * if (!header_written) { + * csv_data << converter.getCSVHeader() << "\n"; + * header_written = true; + * } + * csv_data << converter.getCSVRow() << "\n"; + * } + * + * csv_file << csv_data.str(); + * csv_file.close(); * @endcode */ class RootDataConverter From 30f2cbd7b56958cedbc89ee7926182939dd0c58d Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Thu, 7 May 2026 21:28:46 -0400 Subject: [PATCH 29/38] Coherent sums can now be saved from fit results In order to save the coherent sums, a new AmplitudeParser class was created to parse the amplitude names and categorize them into groups based on the quantum numbers they contain. This relies on known "naming schemes" for the amplitudes. Currently the most common schemes are supported, with instructions for how to add new schemes. --- src/libraries/UTILITIES/AmplitudeParser.cc | 259 +++++++++++++++++++++ src/libraries/UTILITIES/AmplitudeParser.h | 207 ++++++++++++++++ src/libraries/UTILITIES/FitConverter.cc | 53 ++++- src/libraries/UTILITIES/FitConverter.h | 44 +++- 4 files changed, 545 insertions(+), 18 deletions(-) create mode 100644 src/libraries/UTILITIES/AmplitudeParser.cc create mode 100644 src/libraries/UTILITIES/AmplitudeParser.h diff --git a/src/libraries/UTILITIES/AmplitudeParser.cc b/src/libraries/UTILITIES/AmplitudeParser.cc new file mode 100644 index 000000000..05ddd9011 --- /dev/null +++ b/src/libraries/UTILITIES/AmplitudeParser.cc @@ -0,0 +1,259 @@ +/** + * @file AmplitudeParser.cc + * @author Kevin Scheuer + * @brief Implementation of the amplitude naming parser and sum grouping logic. + * @date 2026-05-06 + */ + +#include +#include +#include + +#include "AmplitudeParser.h" +#include "IUAmpTools/report.h" + +const char *AmplitudeParser::kModule = "AmplitudeParser"; + +AmplitudeParser::AmplitudeParser(const std::string &naming_scheme) : m_requested_scheme(namingSchemeFromString(naming_scheme)) {} + +AmplitudeParser::NamingScheme +AmplitudeParser::namingSchemeFromString(const std::string &scheme_name) +{ + if (scheme_name == "JLme") + return NamingScheme::JLme; + if (scheme_name == "eJPmL") + return NamingScheme::eJPmL; + if (scheme_name == "Lme") + return NamingScheme::Lme; + return NamingScheme::Auto; +} + +AmplitudeParser::NamingScheme +AmplitudeParser::inferNamingScheme(const std::string &_name) +{ + if (amp_name.empty()) + { + report(ERROR, kModule) << "Amplitude name is empty, cannot infer naming scheme.\n"; + assert(false); + } + + if (std::isdigit(static_cast(amp_name.front()))) + return NamingScheme::JLme; + + if (std::isupper(static_cast(amp_name.front())) && + (amp_name.back() == '+' || amp_name.back() == '-')) + return NamingScheme::Lme; + + if ((amp_name.front() == 'p' || amp_name.front() == 'm') && + std::isupper(static_cast(amp_name.back()))) + return NamingScheme::eJPmL; + + return NamingScheme::Auto; // default if we can't infer +} + +std::vector +AmplitudeParser::filterAmplitudesByScheme( + const std::vector &full_amplitudes, + NamingScheme scheme) const +{ + std::vector filtered; + for (const auto &full_amp : full_amplitudes) + { + const std::string amp_name = ampName(full_amp); + if (inferNamingScheme(amp_name) == scheme) + filtered.push_back(full_amp); + else + { + report(WARNING, kModule) << "Amplitude '" + << full_amp + << "' does not match the inferred naming scheme and will be excluded from sum groups.\n"; + } + } + return filtered; +} + +std::string AmplitudeParser::ampName(const std::string &full_name) +{ + const size_t pos = full_name.rfind("::"); + return (pos == std::string::npos) ? full_name : full_name.substr(pos + 2); +} + +AmplitudeParser::ParsedAmplitude +AmplitudeParser::parseAmplitudeName(const std::string &_name, NamingScheme scheme) const +{ + ParsedAmplitude parsed; + parsed.amp_name = amp_name; + + // Parse quantum numbers according to the structure of the naming scheme + switch (scheme) + { + case NamingScheme::JLme: + parsed.J = amp_name.substr(0, 1); + parsed.L = amp_name.substr(1, 1); + parsed.m = amp_name.substr(2, amp_name.size() - 3); + parsed.e = amp_name.substr(amp_name.size() - 1, 1); + break; + case NamingScheme::Lme: + parsed.L = amp_name.substr(0, 1); + parsed.m = amp_name.substr(1, amp_name.size() - 2); + parsed.e = amp_name.substr(amp_name.size() - 1, 1); + break; + case NamingScheme::eJPmL: + parsed.e = amp_name.substr(0, 1); + parsed.J = amp_name.substr(1, 1); + parsed.P = amp_name.substr(2, 1); + parsed.m = amp_name.substr(3, amp_name.size() - 4); + parsed.L = amp_name.substr(amp_name.size() - 1, 1); + break; + default: + report(WARNING, kModule) << "Unknown naming scheme, cannot parse amplitude name '" << amp_name << "'.\n"; + break; + } + + return parsed; +} + +std::vector +AmplitudeParser::sumsForScheme(NamingScheme scheme) const +{ + if (scheme == NamingScheme::Lme) + { + return { + {{'e'}}, + {{'L'}}, + {{'e', 'L'}}, + {{'L', 'm'}}, + }; + } + + if (scheme == NamingScheme::JLme) + { + return { + {{'e'}}, + {{'J'}}, + {{'J', 'e'}}, + {{'J', 'L'}}, + {{'J', 'L', 'e'}}, + {{'J', 'L', 'm'}}, + }; + } + if (scheme == NamingScheme::eJPmL) + { + return { + {{'e'}}, + {{'J', 'P'}}, + {{'e', 'J', 'P'}}, + {{'J', 'P', 'L'}}, + {{'e', 'J', 'P', 'L'}}, + {{'J', 'P', 'm', 'L'}}, + }; + } + else + { + report(WARNING, kModule) << "Unknown naming scheme, cannot determine sum groups.\n"; + return {}; + } +} + +std::string AmplitudeParser::buildSumLabel( + const SumGroupDef &sum_group, + const ParsedAmplitude &parsed) const +{ + std::string label; + for (const auto &qn : sum_group.quantum_numbers) + { + std::string value = ""; + switch (qn) + { + case 'e': + value = parsed.e; + break; + case 'J': + value = parsed.J; + break; + case 'P': + value = parsed.P; + break; + case 'm': + value = parsed.m; + break; + case 'L': + value = parsed.L; + break; + default: + report(WARNING, kModule) << "Unknown quantum number '" << qn << "' in sum group definition.\n"; + break; + } + label += value; + } + return label; +} + +std::map> +AmplitudeParser::buildSumGroups(const std::vector &full_amplitudes) const +{ + std::map> groups; + + NamingScheme effective_scheme = m_requested_scheme; + + if (full_amplitudes.empty()) + { + report(WARNING, kModule) << "No amplitudes found in fit results, so no sums will be built.\n"; + return groups; + } + + // "auto" means we need to infer the naming scheme from the amplitude labels. + // We'll use the first amplitude label as a sample to infer the scheme. + if (effective_scheme == NamingScheme::Auto) + effective_scheme = inferNamingScheme(ampName(full_amplitudes.front())); + + // If we still can't infer a scheme, warn the user and don't attempt to build sums + // NOTE: if you have gotten this message and would like to build your own scheme, + // see the header file for instructions to do so. + if (effective_scheme == NamingScheme::Auto) + { + report(WARNING, kModule) << "Amplitude naming scheme could not be inferred, and so no sums will be built.\n"; + report(WARNING, kModule) << "This likely means your amplitude names don't follow a recognized pattern. Supported patterns include:\n"; + report(WARNING, kModule) << "\t1) JLme: e.g. 1P-1+ \n"; + report(WARNING, kModule) << "\t2) Lme: e.g. P1+ \n"; + report(WARNING, kModule) << "\t3) eJPmL: e.g. p1ppS \n"; + report(WARNING, kModule) << "You can also specify the naming scheme pattern (from the choices above) directly in the FitConverter constructor to avoid inference.\n"; + return groups; + } + + // Ensure that all amplitudes we attempt to group follow the same naming scheme, + // otherwise we will get undefined behavior when parsing the amplitude names. If we + // find any amplitudes that don't match the effective scheme, we will skip them and + // warn the user. + std::vector amplitudes_in_scheme = filterAmplitudesByScheme(full_amplitudes, effective_scheme); + + // determine what sum groups will be made + const std::vector sum_groups = sumsForScheme(effective_scheme); + + // loop over all amplitudes across all reactions and sums, parse the amplitude name, + // and group according to the requested scheme + for (const auto &full_amp : full_amplitudes) + { + const ParsedAmplitude parsed = parseAmplitudeName(ampName(full_amp), effective_scheme); + + for (const auto &sum_group : sum_groups) + { + // create the label that will be used as the CSV header + const std::string label = buildSumLabel(sum_group, parsed); + if (label.empty()) + continue; + + // add the full amplitude name that belongs to this sum group in the map + groups[label].push_back(full_amp); + } + } + + // remove duplicates from the sum groups + for (auto &pair : groups) + { + std::sort(pair.second.begin(), pair.second.end()); + pair.second.erase(std::unique(pair.second.begin(), pair.second.end()), pair.second.end()); + } + + return groups; +} \ No newline at end of file diff --git a/src/libraries/UTILITIES/AmplitudeParser.h b/src/libraries/UTILITIES/AmplitudeParser.h new file mode 100644 index 000000000..0902eb280 --- /dev/null +++ b/src/libraries/UTILITIES/AmplitudeParser.h @@ -0,0 +1,207 @@ +/** + * @file AmplitudeParser.h + * @author Kevin Scheuer + * @brief Helpers for parsing amplitude labels and building sum groups. + * @date 2026-05-06 + * + * @note All amplitudes that match the criteria for a sum group will be included, + * meaning amplitudes will be gathered across reactions and sums as long as they share + * the relevant quantum numbers based on the selected naming scheme. For example, if we + * have the following amplitudes: + * - "reaction1::sum1::p1ppS" + * - "reaction1::sum2::p1ppS" + * - "reaction2::sum1::p1ppS" + * Then all three of these amplitudes would be included in the same sum group "JPL" + * because they all share the same J, P, and L values. + * + * @page How to Build Your A Custom Naming Scheme + * The AmplitudeParser is designed to be flexible and support different amplitude naming + * schemes. If your amplitudes follow a different naming convention than the ones + * currently supported, you can easily extend the AmplitudeParser to accommodate your + * scheme. + * + * Add the label for your naming scheme in the NamingScheme enum, and then implement + * the parsing logic in the inferNamingScheme() and parseAmplitudeName() methods. Build + * your desired sum groups in sumsForScheme(), and make sure your label will be + * properly made in buildSumLabel(). The parser will then be able to extract the + * relevant quantum numbers from your amplitude names and group them into sums according + * to your definitions. + * + * It's very important that your scheme has unique identifiers for the sums you want to + * build. For example, if you want to build sums over all amplitudes with the same J, + * and another sum just over amplitudes with the same m, you need to ensure the J and + * m values can be uniquely extracted from the amplitude name. If both use digits, + * then the parser will not know which digits correspond to J and which correspond to m. + * In that case, you would need to modify the amplitude parser to look for specific + * delimiters or patterns in the amplitude name to extract the quantum numbers + * correctly. + * + */ + +#ifndef FIT_AMPLITUDE_PARSER_H +#define FIT_AMPLITUDE_PARSER_H + +#include +#include +#include + +class AmplitudeParser +{ +public: + enum class NamingScheme + { + Auto, + JLme, + eJPmL, + Lme + }; + + /** + * @brief Construct a new Amplitude Parser object + * + * @param naming_scheme what amplitude naming scheme to use for parsing amplitude names and building sum groups. + * Supported schemes include: + * - "auto": the parser will attempt to infer the naming scheme from the amplitude labels + * - "JLme": amp names like "1P-1+" (the one recommended for most amplitude analyses) + * - "eJPmL": amp names like "p1ppS" (a common vector-pseudoscalar naming scheme) + * - "Lme": amp names like "P1+" (a common two-pseudoscalar naming scheme) + */ + explicit AmplitudeParser(const std::string &naming_scheme = "auto"); + + /** + * @brief + * + * @param full_amplitudes + * @return std::map> + */ + std::map> + buildSumGroups(const std::vector &full_amplitudes) const; + +private: + /** + * @brief struct to contain the parsed quantum numbers from an amplitude name + * + * For example: + * "reaction::sum::p1ppS" would be parsed to: + * amp_name = "p1ppS" + * e = "p" + * J = "1" + * P = "p" + * m = "p" + * L = "S" + * + * "reaction::sum::2P1+" would be parsed to: + * amp_name = "2P1+" + * e = "+" + * J = "2" + * L = "P" + * m = "1" + * P is not included in this naming scheme, so it would be empty. + */ + struct ParsedAmplitude + { + std::string amp_name; ///< The "ampName" in the full "reaction::sum::ampName" + std::string e; ///< Reflectivity quantum number + std::string J; ///< Total angular momentum + std::string P; ///< Parity (sometimes excluded as its unnecessary) + std::string m; ///< Spin projection + std::string L; ///< Orbital angular momentum + }; + + NamingScheme m_requested_scheme; ///< The naming scheme to use, or "Auto" to infer from amplitude labels + + /** + * @brief Set the naming scheme based on the input string from the user. + * + * If the input is "auto" or is an unrecognized string, we will attempt to infer the + * naming scheme from the amplitude labels later. + * + * @param scheme_name user string indicating the naming scheme + * @return NamingScheme objective naming scheme enum value + */ + static NamingScheme namingSchemeFromString(const std::string &scheme_name); + + /** + * @brief Infer the naming scheme from the amplitude name format if "auto" is selected. + * + * @param amp_name the part of the full amplitude string after the last "::" + * + * @throw ERROR if the name is empty and we cannot infer a scheme + * + * @return NamingScheme::JLme if the name starts with a digit + * NamingScheme::Lme if the name starts with an uppercase letter and ends with + or - + * NamingScheme::eJPmL if the name starts with "p" or "m" and ends with an uppercase letter + * NamingScheme::Auto if we cannot infer a scheme + */ + static NamingScheme inferNamingScheme(const std::string &_name); + + /** + * @brief Filter amplitudes to belong to the chosen naming scheme + * + * To avoid undefined behavior when parsing amplitude names, we need to ensure that + * all amplitudes we attempt to group follow the same naming scheme. Any amps not + * belonging to the scheme will be reported in a printed warning, and excluded from + * the returned list. + * + * @param full_amplitudes all amplitudes from a fit result, in their full "reaction::sum::ampName" form + * @param scheme naming scheme to filter by + * @return std::vector full amplitudes that match the naming scheme + */ + std::vector filterAmplitudesByScheme(const std::vector &full_amplitudes, NamingScheme scheme) const; + + /** + * @brief Extract the amplitude name from the full amplitude string. + * + * For example, for "reaction::sum::amp", this would return "amp". This is the part + * of the amplitude label that typically contains the quantum numbers and is used + * for grouping into sums. + * + * @param full_name "reaction::sum::ampName" style amplitude string + * @return std::string ampName, the part of the full amplitude string after the last "::" + */ + static std::string ampName(const std::string &full_name); + + /** + * @brief Extract quantum numbers from the amplitude name according to the scheme + * + * @param amp_name ampName in "reaction::sum::ampName" + * @param scheme naming scheme to use for parsing + * @return ParsedAmplitude object that contains the original amp_name and the parsed quantum numbers + */ + ParsedAmplitude parseAmplitudeName(const std::string &_name, NamingScheme scheme) const; + + /** + * @brief Define the grouping of quantum numbers that will become a sum label + */ + struct SumGroupDef + { + std::vector quantum_numbers; ///< Quantum number fields to concatenate for the group label + }; + + /** + * @brief Get the sum definitions for a given naming scheme + * + * A sum label is denoted by an absence of a quantum number in the amplitude + * naming scheme. For example, the JLme scheme will have a "JL" sum group that + * includes all amplitudes with the same J and L, but any m and e. Since the + * ordering of the quantum numbers is dependent on the scheme, the group labels are + * thus also scheme-dependent. + * + * @param scheme naming scheme to get group definitions for + * @return std::vector + */ + std::vector sumsForScheme(NamingScheme scheme) const; + + /** + * @brief concatenates the quantum numbers for a SumGroupDef into a string label + * + * @param sum_group a grouping of quantum numbers to concatenate into a sum label, e.g. J and L + * @param parsed a parsed amplitude name + * @return std::string the label to be used as the key in sum map e.g. "1P" for a JL group with J=1 and L=P + */ + std::string buildSumLabel(const SumGroupDef &sum_group, const ParsedAmplitude &parsed) const; + + static const char *kModule; +}; + +#endif // FIT_AMPLITUDE_PARSER_H \ No newline at end of file diff --git a/src/libraries/UTILITIES/FitConverter.cc b/src/libraries/UTILITIES/FitConverter.cc index 4eb186c34..c6cb18cc6 100644 --- a/src/libraries/UTILITIES/FitConverter.cc +++ b/src/libraries/UTILITIES/FitConverter.cc @@ -4,9 +4,6 @@ * @brief Implementation of FitConverter class for converting AmpTools fit results to CSV * @date 2026-02-01 * - * TODO: - * - Add coherent sum extraction. This is amplitude name dependent, so may need custom - * AmplitudeParsers for different naming schemes. */ #include @@ -18,11 +15,12 @@ const char *FitConverter::kModule = "FitConverter"; -FitConverter::FitConverter(const std::string &filename, const bool &acceptance_correct, bool mute_warning) : m_fit_file(filename), - m_fit_results(filename, mute_warning), - m_cfg_info(m_fit_results.configInfo()), - m_is_acceptance_corrected(acceptance_correct), - m_error_matrix(m_fit_results.errorMatrix()) +FitConverter::FitConverter(const std::string &filename, const bool &acceptance_correct, bool mute_warning, const std::string &naming_scheme) : m_fit_file(filename), + m_fit_results(filename, mute_warning), + m_cfg_info(m_fit_results.configInfo()), + m_is_acceptance_corrected(acceptance_correct), + m_amplitude_parser(naming_scheme), + m_error_matrix(m_fit_results.errorMatrix()) { if (!m_fit_results.valid()) { @@ -87,12 +85,37 @@ void FitConverter::extract() << "\n"; } + // Build coherent-sum groups based on the selected amplitude naming scheme. + const std::map> coherent_sum_groups = + m_amplitude_parser.buildSumGroups(m_fit_results.ampList()); + + for (const auto &pair : coherent_sum_groups) + { + m_coherent_sum_intensities[pair.first] = m_fit_results.intensity(pair.second, m_is_acceptance_corrected); + + report(DEBUG, kModule) << "Coherent sum group: " << pair.first << " includes amplitudes:\n"; + for (const auto & : pair.second) + report(DEBUG, kModule) << "\t" << amp << "\n"; + + report(DEBUG, kModule) << "Coherent sum intensity for " + << pair.first + << " = " + << m_coherent_sum_intensities[pair.first].first + << " +/- " + << m_coherent_sum_intensities[pair.first].second + << "\n"; + } + // Extract production coefficients for each unique amplitude name - for (const auto &unique_amp : uniqueAmpNames()) + for (const auto &pair : m_constrained_amps) { // any full amplitude name will do, they are all constrained so their // production coefficients are identical - std::string full_amp_name = m_constrained_amps[unique_amp][0]; + const std::string &unique_amp = pair.first; + if (pair.second.empty()) + continue; + + const std::string &full_amp_name = pair.second[0]; m_production_coefficients[unique_amp] = m_fit_results.scaledProductionParameter(full_amp_name); } @@ -208,6 +231,11 @@ std::string FitConverter::getCSVHeader() const { header += pair.first + "," + pair.first + "_err,"; } + // coherent sum intensities + for (const auto &pair : m_coherent_sum_intensities) + { + header += pair.first + "," + pair.first + "_err,"; + } // phase differences for (const auto &pair : m_phase_differences) { @@ -253,6 +281,11 @@ std::string FitConverter::getCSVRow() const { row += std::to_string(pair.second.first) + "," + std::to_string(pair.second.second) + ","; } + // coherent sum intensities + for (const auto &pair : m_coherent_sum_intensities) + { + row += std::to_string(pair.second.first) + "," + std::to_string(pair.second.second) + ","; + } // phase differences for (const auto &pair : m_phase_differences) { diff --git a/src/libraries/UTILITIES/FitConverter.h b/src/libraries/UTILITIES/FitConverter.h index 393777346..f7d2cd6fb 100644 --- a/src/libraries/UTILITIES/FitConverter.h +++ b/src/libraries/UTILITIES/FitConverter.h @@ -19,6 +19,7 @@ #include #include +#include "AmplitudeParser.h" #include "IUAmpTools/FitResults.h" /** @@ -30,6 +31,8 @@ * - Standard fit outputs (likelihood, event counts, status codes) * - AmpTools parameters * - Real/Imaginary parts of production coefficients + * - Intensities of unique amplitudes. See constrainedAmps() for how unique amplitudes + * groups are defined. * - Coherent sums of amplitudes by quantum numbers * - Phase differences * @@ -61,12 +64,14 @@ class FitConverter * @param[in] filename Path to the .fit file * @param[in] acceptance_correct Whether to use acceptance-corrected intensities * @param[in] mute_warning Suppress warning about free parameters if true + * @param[in] naming_scheme Amplitude naming scheme: auto, JLme, eJPmL, or Lme * @throw ERROR If the fit file cannot be read or is invalid */ FitConverter( const std::string &filename, const bool &acceptance_correct = false, - bool mute_warning = false); + bool mute_warning = false, + const std::string &naming_scheme = "auto"); /** * @brief Extract fit data from the .fit file @@ -171,13 +176,25 @@ class FitConverter * @brief Map of unique amplitude names and their related constrained amplitudes * * A "full" amplitude is a "reaction::sum::ampName" string, and often amplitudes - * are constrained across reactions and sums. This map groups all such constrained - * amplitudes by their common "ampName". - * "myAmpName" -> { - * "reaction1::sum1::myAmpName", - * "reaction1::sum2::myAmpName", - * "reaction2::sum1::myAmpName", - * ...} + * are constrained across reactions and sums. The goal is to identify the shortest + * identifying string that can be used to group amplitudes that are constrained to + * each other. It will first attempt to group by "ampName", then by "sum::ampName", + * and if neither of those assumptions hold, it will just group by the full + * amplitude string. + * + * For example, if we have the following full amplitude strings: + * - "reaction1::sum1::myAmpName" + * - "reaction1::sum2::myAmpName" + * - "reaction2::sum1::myAmpName" + * + * We first check if all amplitudes with the same "ampName" (myAmpName) are + * constrained to each other, and if so, we group them under the unique amplitude + * name "myAmpName" and save the intensity + error for that group. If that + * assumption does not hold, we check if all amplitudes with the same "sum::ampName" + * are constrained to each other (i.e. across reactions) and if so, we group them + * under the unique amplitude name "sum::ampName". If neither assumption holds, we + * just group by the full amplitude name. + * * @note that the self-constraint is included in the list, so even amplitudes with * no constraints will have a vector of size one (itself). */ @@ -193,6 +210,15 @@ class FitConverter const std::map> & uniqueAmpIntensities() const { return m_unique_amp_intensities; } + /** + * @brief Map of coherent-sum labels to their values and errors + * + * The labels are built from the selected amplitude naming scheme and the + * configured coherent-sum groupings + */ + const std::map> & + coherentSumIntensities() const { return m_coherent_sum_intensities; } + /** * @brief Map of unique amplitude names to their production coefficients * @@ -233,12 +259,14 @@ class FitConverter const FitResults m_fit_results; ///< AmpTools FitResults object const ConfigurationInfo *m_cfg_info; ///< ConfigurationInfo from the fit const bool m_is_acceptance_corrected; ///< Whether to use acceptance-corrected intensities + const AmplitudeParser m_amplitude_parser; ///< Parser for amplitude labels and sums // see accessor methods for descriptions std::map m_standard_results; std::map> m_parameters; std::map> m_constrained_amps; std::map> m_unique_amp_intensities; + std::map> m_coherent_sum_intensities; std::map> m_production_coefficients; std::map, std::pair> m_phase_differences; std::vector> m_error_matrix; From fe35b584d8a54648682cefa8e0450349bd7c4d3a Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Thu, 7 May 2026 22:36:45 -0400 Subject: [PATCH 30/38] add the naming_scheme, and some comments --- .../convert_to_csv/convert_to_csv.cc | 36 ++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc index 455900ceb..ec1ad770a 100644 --- a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc +++ b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc @@ -4,8 +4,12 @@ * @brief * @date 2026-01-30 * - * @todo: complete docstring. Also make a function in the fit converter to write out - * the normalized integral matrix to a csv file, and use that like the corr/cov ones. + * This program converts AmpTools fit results in .fit files to CSV format. It can also + * optionally create separate CSV files for the covariance and correlation matrices, as + * well as a CSV file containing the data used in the fit. The program is designed to be + * flexible and can handle various amplitude naming schemes, which it uses to group + * amplitudes into coherent sums based on shared quantum numbers. The output CSV files + * can then be easily imported into just about any data analysis or plotting software. */ #include @@ -44,6 +48,7 @@ int main(int argc, char *argv[]) int sort_index = -1; bool is_acceptance_corrected = false; std::vector lower_vertex_indices; + std::string naming_scheme = "auto"; bool create_data_file = false; bool create_covariance = false; bool create_correlation = false; @@ -53,7 +58,7 @@ int main(int argc, char *argv[]) auto print_help = []() { std::cout << "Usage: convert_to_csv [-h] [-i INPUT_FILES] [-o OUTPUT_PATH]" - << " [-s] [--sort-index INDEX] [-a] [-d] [-l LOWER_VERTEX_INDICES] [-p] [-v]" + << " [-s] [--sort-index INDEX] [-a] [-d] [-l LOWER_VERTEX_INDICES] [-n NAMING_SCHEME] [-p] [-v]" << " [--correlation] [--covariance] \n" << " -i INPUT_FILES:\t\tFull path to the .fit file(s)\n" << " -o OUTPUT_PATH:\t\tFull path to the output .csv file\n" @@ -62,6 +67,7 @@ int main(int argc, char *argv[]) << " -a --acceptance-correct:\tAcceptance correct the intensities\n" << " -d --data-file:\t\tCreate separate data file\n" << " -l --lower-vertex-indices:\tSpecify indices of lower vertex particles in the data 4-vectors\n" + << " -n --naming-scheme:\t\tSpecify amplitude naming scheme (auto, JLme, eJPmL, or Lme) to use for grouping amplitudes into coherent sums. Default is auto, which will attempt to infer the naming scheme from the amplitude names in the fit file.\n" << " -p --preview:\t\t\tPrint files to be processed, but don't run\n" << " -v --verbose:\t\t\tPrint information from converter scripts as they run\n" << " --correlation:\t\tSave correlation matrix to separate csv file\n" @@ -176,6 +182,19 @@ int main(int argc, char *argv[]) lower_vertex_indices.push_back(index); } } + else if (arg == "-n" || arg == "--naming-scheme") + { + if (i + 1 < args.size() && !is_flag(args[i + 1])) + { + naming_scheme = std::string(args[++i]); + } + else + { + std::cerr << "Error: -n/--naming-scheme requires a value. See help message.\n"; + print_help(); + return 1; + } + } else if (is_flag(arg)) { std::cerr << "Unknown argument: " << arg << "\n"; @@ -184,6 +203,8 @@ int main(int argc, char *argv[]) } } + // ==== VALIDATE INPUTS ==== + // Error checks for required arguments if (input_files.empty()) { @@ -198,8 +219,6 @@ int main(int argc, char *argv[]) return 1; } - // ==== VALIDATE INPUTS ==== - // ensure only .fit files are provided if (!are_valid_fit_files(input_files)) { @@ -231,13 +250,12 @@ int main(int argc, char *argv[]) std::stringstream csv_result_data, csv_cov_data, csv_corr_data, csv_root_data; bool header_written = false; std::string first_file_result_header, first_file_cov_header, first_file_corr_header, first_file_data_header; - - // extract fit results, and optionally the cov/corr matrices + for (const auto &file : input_files) { if (verbose) report(INFO, kModule) << "Processing file: " << file << "\n"; - FitConverter converter(file, is_acceptance_corrected, !verbose); + FitConverter converter(file, is_acceptance_corrected, !verbose, naming_scheme); std::unique_ptr data_converter; if (create_data_file) { @@ -335,6 +353,8 @@ int main(int argc, char *argv[]) csv_corr_data << converter.getCSVCorrelationMatrix() << "\n"; if (create_data_file) csv_root_data << data_converter->getCSVRow() << "\n"; + + // loop to next input file to make a new row } // write the csv data to the output file From b03c8458e9da70b2dd5369fd70e8bae2a9abc2c5 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Thu, 7 May 2026 22:57:39 -0400 Subject: [PATCH 31/38] changed cout and cerr to report --- .../convert_to_csv/convert_to_csv.cc | 52 +++++++++---------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc index ec1ad770a..de73dd8b1 100644 --- a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc +++ b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc @@ -57,22 +57,20 @@ int main(int argc, char *argv[]) auto print_help = []() { - std::cout << "Usage: convert_to_csv [-h] [-i INPUT_FILES] [-o OUTPUT_PATH]" - << " [-s] [--sort-index INDEX] [-a] [-d] [-l LOWER_VERTEX_INDICES] [-n NAMING_SCHEME] [-p] [-v]" - << " [--correlation] [--covariance] \n" - << " -i INPUT_FILES:\t\tFull path to the .fit file(s)\n" - << " -o OUTPUT_PATH:\t\tFull path to the output .csv file\n" - << " -s, --sort:\t\t\tSort files by last number in the file name or path (default:true)\n" - << " --sort-index INDEX:\t\tWhat number in file path to sort by (default:-1, so last number in path is used)\n" - << " -a --acceptance-correct:\tAcceptance correct the intensities\n" - << " -d --data-file:\t\tCreate separate data file\n" - << " -l --lower-vertex-indices:\tSpecify indices of lower vertex particles in the data 4-vectors\n" - << " -n --naming-scheme:\t\tSpecify amplitude naming scheme (auto, JLme, eJPmL, or Lme) to use for grouping amplitudes into coherent sums. Default is auto, which will attempt to infer the naming scheme from the amplitude names in the fit file.\n" - << " -p --preview:\t\t\tPrint files to be processed, but don't run\n" - << " -v --verbose:\t\t\tPrint information from converter scripts as they run\n" - << " --correlation:\t\tSave correlation matrix to separate csv file\n" - << " --covariance:\t\t\tSave covariance matrix to separate csv file\n" - << " -h, --help:\t\t\tShow this help message\n"; + report(INFO, kModule) << "Usage: convert_to_csv [-h] [-i INPUT_FILES] [-o OUTPUT_PATH] [-s] [--sort-index INDEX] [-a] [-d] [-l LOWER_VERTEX_INDICES] [-n NAMING_SCHEME] [-p] [-v] [--correlation] [--covariance] \n"; + report(INFO, kModule) << " -i INPUT_FILES:\t\tFull path to the .fit file(s)\n"; + report(INFO, kModule) << " -o OUTPUT_PATH:\t\tFull path to the output .csv file\n"; + report(INFO, kModule) << " -s, --sort:\t\t\tSort files by last number in the file name or path (default:true)\n"; + report(INFO, kModule) << " --sort-index INDEX:\t\tWhat number in file path to sort by (default:-1, so last number in path is used)\n"; + report(INFO, kModule) << " -a --acceptance-correct:\tAcceptance correct the intensities\n"; + report(INFO, kModule) << " -d --data-file:\t\tCreate separate data file\n"; + report(INFO, kModule) << " -l --lower-vertex-indices:\tSpecify indices of lower vertex particles in the data 4-vectors\n"; + report(INFO, kModule) << " -n --naming-scheme:\t\tAmp naming scheme for coherent sum groupings. Default is auto (infers scheme from the amp names)\n"; + report(INFO, kModule) << " -p --preview:\t\t\tPrint files to be processed, but don't run\n"; + report(INFO, kModule) << " -v --verbose:\t\t\tPrint information from converter scripts as they run\n"; + report(INFO, kModule) << " --correlation:\t\tSave correlation matrix to separate csv file\n"; + report(INFO, kModule) << " --covariance:\t\t\tSave covariance matrix to separate csv file\n"; + report(INFO, kModule) << " -h, --help:\t\t\tShow this help message\n"; }; // first check if help message is requested @@ -112,7 +110,7 @@ int main(int argc, char *argv[]) } else { - std::cerr << "Error: -o/--output requires a value. See help message.\n"; + report(ERROR, kModule) << "Error: -o/--output requires a value. See help message.\n"; print_help(); return 1; } @@ -129,14 +127,14 @@ int main(int argc, char *argv[]) auto [ptr, ec] = std::from_chars(val.data(), val.data() + val.size(), sort_index); if (ec != std::errc()) { - std::cerr << "Error: --sort-index requires a valid integer. See help message.\n"; + report(ERROR, kModule) << "Error: --sort-index requires a valid integer. See help message.\n"; print_help(); return 1; } } else { - std::cerr << "Error: --sort-index requires a value. See help message.\n"; + report(ERROR, kModule) << "Error: --sort-index requires a value. See help message.\n"; print_help(); return 1; } @@ -175,7 +173,7 @@ int main(int argc, char *argv[]) auto [ptr, ec] = std::from_chars(val.data(), val.data() + val.size(), index); if (ec != std::errc()) { - std::cerr << "Error: -l/--lower-vertex-indices requires valid integers. See help message.\n"; + report(ERROR, kModule) << "Error: -l/--lower-vertex-indices requires valid integers. See help message.\n"; print_help(); return 1; } @@ -190,14 +188,14 @@ int main(int argc, char *argv[]) } else { - std::cerr << "Error: -n/--naming-scheme requires a value. See help message.\n"; + report(ERROR, kModule) << "Error: -n/--naming-scheme requires a value. See help message.\n"; print_help(); return 1; } } else if (is_flag(arg)) { - std::cerr << "Unknown argument: " << arg << "\n"; + report(ERROR, kModule) << "Unknown argument: " << arg << "\n"; print_help(); return 1; } @@ -208,13 +206,13 @@ int main(int argc, char *argv[]) // Error checks for required arguments if (input_files.empty()) { - std::cerr << "Input files must be provided. See help message." << "\n"; + report(ERROR, kModule) << "Input files must be provided. See help message." << "\n"; print_help(); return 1; } if (output_file.empty()) { - std::cerr << "Output file must be provided. See help message." << "\n"; + report(ERROR, kModule) << "Output file must be provided. See help message." << "\n"; print_help(); return 1; } @@ -222,7 +220,7 @@ int main(int argc, char *argv[]) // ensure only .fit files are provided if (!are_valid_fit_files(input_files)) { - std::cerr << "One or more input files are not valid .fit files.\n"; + report(ERROR, kModule) << "One or more input files are not valid .fit files.\n"; return 1; } @@ -238,10 +236,10 @@ int main(int argc, char *argv[]) if (preview) { - std::cout << "Preview mode enabled. Printing files and exiting\n"; + report(INFO, kModule) << "Preview mode enabled. Printing files and exiting\n"; for (const auto &file : input_files) { - std::cout << "\t" << file << "\n"; + report(INFO, kModule) << "\t" << file << "\n"; } return 0; } From 5747436f74cd1a595ff6eb9e11b66050b6e5c4f3 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Fri, 8 May 2026 19:00:06 -0400 Subject: [PATCH 32/38] Added way to get normalization integral matrix Also added a quick method to get the reaction string, which was helpful for the normInt functions. This commit also includes some formatting. --- src/libraries/UTILITIES/FitConverter.cc | 116 ++++++++++++++++-- src/libraries/UTILITIES/FitConverter.h | 72 +++++++---- .../convert_to_csv/convert_to_csv.cc | 40 +++++- 3 files changed, 192 insertions(+), 36 deletions(-) diff --git a/src/libraries/UTILITIES/FitConverter.cc b/src/libraries/UTILITIES/FitConverter.cc index c6cb18cc6..bec3911d8 100644 --- a/src/libraries/UTILITIES/FitConverter.cc +++ b/src/libraries/UTILITIES/FitConverter.cc @@ -7,20 +7,23 @@ */ #include +#include +#include #include "FitConverter.h" #include "IUAmpTools/ConfigurationInfo.h" #include "IUAmpTools/FitResults.h" +#include "IUAmpTools/NormIntInterface.h" #include "IUAmpTools/report.h" const char *FitConverter::kModule = "FitConverter"; FitConverter::FitConverter(const std::string &filename, const bool &acceptance_correct, bool mute_warning, const std::string &naming_scheme) : m_fit_file(filename), - m_fit_results(filename, mute_warning), - m_cfg_info(m_fit_results.configInfo()), - m_is_acceptance_corrected(acceptance_correct), - m_amplitude_parser(naming_scheme), - m_error_matrix(m_fit_results.errorMatrix()) + m_fit_results(filename, mute_warning), + m_cfg_info(m_fit_results.configInfo()), + m_is_acceptance_corrected(acceptance_correct), + m_amplitude_parser(naming_scheme), + m_error_matrix(m_fit_results.errorMatrix()) { if (!m_fit_results.valid()) { @@ -96,7 +99,7 @@ void FitConverter::extract() report(DEBUG, kModule) << "Coherent sum group: " << pair.first << " includes amplitudes:\n"; for (const auto & : pair.second) report(DEBUG, kModule) << "\t" << amp << "\n"; - + report(DEBUG, kModule) << "Coherent sum intensity for " << pair.first << " = " @@ -386,6 +389,87 @@ std::string FitConverter::getCSVCorrelationMatrix() const return csv; } +std::string FitConverter::getCSVNormIntMatrixHeader() const +{ + std::string header = "file,amplitude"; + for (const auto & : m_fit_results.ampList()) + { + header += "," + amp; + } + return header; +} + +std::string FitConverter::getCSVNormIntMatrix() const +{ + std::string csv; + + // build the norm_int_map of reactions -> normIntInterfaces + std::map norm_int_map; + for (const std::string &reaction : m_fit_results.reactionList()) + { + const NormIntInterface *normInt = m_fit_results.normInt(reaction); + if (normInt) + { + norm_int_map[reaction] = normInt; + } + else + { + report(ERROR, kModule) << "Could not retrieve normalization integral interface for reaction " << reaction << "\n"; + assert(false); + } + } + + std::vector amp_list = m_fit_results.ampList(); + for (size_t row_idx = 0; row_idx < m_fit_results.ampList().size(); row_idx++) + { + const std::string &row_amp = amp_list.at(row_idx); + std::string row_reaction = getReactionString(row_amp); + + csv += m_fit_file + "," + row_amp; // write file name and this amplitude name as first two columns of this row + + for (size_t col_idx = 0; col_idx < m_fit_results.ampList().size(); col_idx++) + { + const std::string &col_amp = amp_list.at(col_idx); + std::string col_reaction = getReactionString(col_amp); + + if (row_reaction != col_reaction) + { + csv += ",0+0j"; // normInt is zero for amplitudes from different reactions + continue; // skip to next column + } + + const NormIntInterface *normInt = norm_int_map[row_reaction]; + if (!normInt) + { + report(ERROR, kModule) << "Null normalization integral interface for reaction " << row_reaction << "\n"; + assert(false); + } + + std::complex value = normInt->normInt(row_amp, col_amp); + + std::cout << "NormInt for " << row_amp << " and " << col_amp << " = " << value.real() << "+" << value.imag() << "j\n"; + + // Format as pandas-readable complex number e.g. 1+2j or 1-2j + std::ostringstream oss; + oss << std::scientific << std::setprecision(std::numeric_limits::max_digits10 - 1); + oss << value.real(); + if (value.imag() >= 0) + { + oss << "+"; + } + oss << value.imag() << "j"; + csv += "," + oss.str(); + } // end column loop + + // don't add newline to last row, to avoid extra blank line at end of file + if (row_idx != m_fit_results.ampList().size() - 1) + csv += "\n"; + + } // end row loop + + return csv; +} + std::map> FitConverter::findConstrainedAmps() const { std::map> constrained_amp_map; @@ -422,7 +506,7 @@ std::map> FitConverter::findConstrainedAmp return constrained_amp_map; } - // if we reach here, neither assumption held, so just map full amplitude names to + // if we reach here, neither assumption held, so just map full amplitude names to // themselves for (const auto &litude : m_fit_results.ampList()) { @@ -431,7 +515,6 @@ std::map> FitConverter::findConstrainedAmp return constrained_amp_map; } - bool FitConverter::ampNamesAreConstrained() const { for (const auto &unique_amp : uniqueAmpNames()) @@ -451,7 +534,7 @@ bool FitConverter::ampNamesAreConstrained() const ++it; } - // The constrained terms does not include the self term, so add it back in for + // The constrained terms does not include the self term, so add it back in for // comparison constrained_terms.push_back(shared_terms[0]); @@ -488,7 +571,7 @@ bool FitConverter::sumAmpNamesAreConstrained() const ++it; } - // The constrained terms does not include the self term, so add it back in for + // The constrained terms does not include the self term, so add it back in for // comparison constrained_terms.push_back(shared_terms[0]); @@ -501,4 +584,17 @@ bool FitConverter::sumAmpNamesAreConstrained() const } } return true; +} + +std::string FitConverter::getReactionString(const std::string &full_amplitude) +{ + size_t first_colon = full_amplitude.find("::"); + size_t second_colon = full_amplitude.find("::", first_colon + 2); + if (first_colon == std::string::npos || second_colon == std::string::npos) + { + report(ERROR, kModule) << "Amplitude name " << full_amplitude << " does not match expected format 'reaction::sum::amp'.\n"; + assert(false); + } + std::string reaction = full_amplitude.substr(0, first_colon); + return reaction; } \ No newline at end of file diff --git a/src/libraries/UTILITIES/FitConverter.h b/src/libraries/UTILITIES/FitConverter.h index f7d2cd6fb..3fa35de87 100644 --- a/src/libraries/UTILITIES/FitConverter.h +++ b/src/libraries/UTILITIES/FitConverter.h @@ -31,7 +31,7 @@ * - Standard fit outputs (likelihood, event counts, status codes) * - AmpTools parameters * - Real/Imaginary parts of production coefficients - * - Intensities of unique amplitudes. See constrainedAmps() for how unique amplitudes + * - Intensities of unique amplitudes. See constrainedAmps() for how unique amplitudes * groups are defined. * - Coherent sums of amplitudes by quantum numbers * - Phase differences @@ -134,6 +134,23 @@ class FitConverter */ std::string getCSVCorrelationMatrix() const; + /** + * @brief Get the CSV header string for the normalization integral matrix + * + * @return std::string "file, amplitude, amp1, amp2, ..., ampN" + */ + std::string getCSVNormIntMatrixHeader() const; + + /** + * @brief Get the CSV representation of the normalization integral matrix + * + * Every row corresponds to an amplitude, with the first two columns being the file + * and amplitude names, followed by complex normalization integral values with all + * other amplitudes. Complex values are formatted as pandas-readable strings (e.g., + * "1+2j"). + */ + std::string getCSVNormIntMatrix() const; + /** * @brief return set of all unique amplitude names in a fit result * @@ -144,11 +161,11 @@ class FitConverter /** * @brief return set of all unique coherent sum + amp names in a fit result - * + * * A "full" amplitude is a "reaction::sum::ampName" string, so this extracts all * unique "sum::ampName" strings - * - * @return std::set + * + * @return std::set */ std::set uniqueSumAmpNames() const; @@ -179,22 +196,22 @@ class FitConverter * are constrained across reactions and sums. The goal is to identify the shortest * identifying string that can be used to group amplitudes that are constrained to * each other. It will first attempt to group by "ampName", then by "sum::ampName", - * and if neither of those assumptions hold, it will just group by the full - * amplitude string. - * + * and if neither of those assumptions hold, it will just group by the full + * amplitude string. + * * For example, if we have the following full amplitude strings: * - "reaction1::sum1::myAmpName" * - "reaction1::sum2::myAmpName" * - "reaction2::sum1::myAmpName" - * - * We first check if all amplitudes with the same "ampName" (myAmpName) are - * constrained to each other, and if so, we group them under the unique amplitude - * name "myAmpName" and save the intensity + error for that group. If that + * + * We first check if all amplitudes with the same "ampName" (myAmpName) are + * constrained to each other, and if so, we group them under the unique amplitude + * name "myAmpName" and save the intensity + error for that group. If that * assumption does not hold, we check if all amplitudes with the same "sum::ampName" - * are constrained to each other (i.e. across reactions) and if so, we group them - * under the unique amplitude name "sum::ampName". If neither assumption holds, we + * are constrained to each other (i.e. across reactions) and if so, we group them + * under the unique amplitude name "sum::ampName". If neither assumption holds, we * just group by the full amplitude name. - * + * * @note that the self-constraint is included in the list, so even amplitudes with * no constraints will have a vector of size one (itself). */ @@ -255,10 +272,10 @@ class FitConverter ~FitConverter() = default; private: - const std::string m_fit_file; ///< Path to the .fit file - const FitResults m_fit_results; ///< AmpTools FitResults object - const ConfigurationInfo *m_cfg_info; ///< ConfigurationInfo from the fit - const bool m_is_acceptance_corrected; ///< Whether to use acceptance-corrected intensities + const std::string m_fit_file; ///< Path to the .fit file + const FitResults m_fit_results; ///< AmpTools FitResults object + const ConfigurationInfo *m_cfg_info; ///< ConfigurationInfo from the fit + const bool m_is_acceptance_corrected; ///< Whether to use acceptance-corrected intensities const AmplitudeParser m_amplitude_parser; ///< Parser for amplitude labels and sums // see accessor methods for descriptions @@ -284,15 +301,14 @@ class FitConverter * amplitude names (ampName, sum::ampName, or full amplitude), and the values are * vectors of the full amplitude strings that are constrained to each other. * - * @note the keys are whats written to the CSV, and is why it first attempts to + * @note the keys are whats written to the CSV, and is why it first attempts to * extract just the ampName for simplicity. */ std::map> findConstrainedAmps() const; - /** * @brief Checks if amplitude names are constrained to each other - * + * * A "full" amplitude is a "reaction::sum::ampName" string, so this checks if all * amplitudes with the same "ampName" are constrained to each other across reactions * and sums. @@ -301,13 +317,23 @@ class FitConverter /** * @brief Checks if coherent sum + amplitude names are constrained to each other - * + * * A "full" amplitude is a "reaction::sum::ampName" string, so this checks if all * amplitudes with the same "sum::ampName" are constrained to each other across - * reactions. + * reactions. */ bool sumAmpNamesAreConstrained() const; + /** + * @brief Get the reaction string from a full amplitude name + * + * This extracts the reaction part from a "reaction::sum::ampName" string. + * + * @param full_amp_name The full amplitude name + * @return The reaction string + */ + static std::string getReactionString(const std::string &full_amp_name); + static const char *kModule; }; diff --git a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc index de73dd8b1..7f049a181 100644 --- a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc +++ b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc @@ -52,12 +52,13 @@ int main(int argc, char *argv[]) bool create_data_file = false; bool create_covariance = false; bool create_correlation = false; + bool create_norm_int = false; bool verbose = false; bool preview = false; auto print_help = []() { - report(INFO, kModule) << "Usage: convert_to_csv [-h] [-i INPUT_FILES] [-o OUTPUT_PATH] [-s] [--sort-index INDEX] [-a] [-d] [-l LOWER_VERTEX_INDICES] [-n NAMING_SCHEME] [-p] [-v] [--correlation] [--covariance] \n"; + report(INFO, kModule) << "Usage: convert_to_csv [-h] [-i INPUT_FILES] [-o OUTPUT_PATH] [-s] [--sort-index INDEX] [-a] [-d] [-l LOWER_VERTEX_INDICES] [-n NAMING_SCHEME] [-p] [-v] [--correlation] [--covariance] [--norm-int] \n"; report(INFO, kModule) << " -i INPUT_FILES:\t\tFull path to the .fit file(s)\n"; report(INFO, kModule) << " -o OUTPUT_PATH:\t\tFull path to the output .csv file\n"; report(INFO, kModule) << " -s, --sort:\t\t\tSort files by last number in the file name or path (default:true)\n"; @@ -70,6 +71,7 @@ int main(int argc, char *argv[]) report(INFO, kModule) << " -v --verbose:\t\t\tPrint information from converter scripts as they run\n"; report(INFO, kModule) << " --correlation:\t\tSave correlation matrix to separate csv file\n"; report(INFO, kModule) << " --covariance:\t\t\tSave covariance matrix to separate csv file\n"; + report(INFO, kModule) << " --norm-int:\t\t\tSave normalization integral matrix to separate csv file\n"; report(INFO, kModule) << " -h, --help:\t\t\tShow this help message\n"; }; @@ -155,6 +157,10 @@ int main(int argc, char *argv[]) { create_correlation = true; } + else if (arg == "--norm-int") + { + create_norm_int = true; + } else if (arg == "-v" || arg == "--verbose") { verbose = true; @@ -245,9 +251,9 @@ int main(int argc, char *argv[]) } // ==== PROCESS FILES AND WRITE TO CSV ==== - std::stringstream csv_result_data, csv_cov_data, csv_corr_data, csv_root_data; + std::stringstream csv_result_data, csv_cov_data, csv_corr_data, csv_norm_int_data, csv_root_data; bool header_written = false; - std::string first_file_result_header, first_file_cov_header, first_file_corr_header, first_file_data_header; + std::string first_file_result_header, first_file_cov_header, first_file_corr_header, first_file_norm_int_header, first_file_data_header; for (const auto &file : input_files) { @@ -288,6 +294,13 @@ int main(int argc, char *argv[]) first_file_corr_header = corr_header; } + if (create_norm_int) + { + std::string norm_int_header = converter.getCSVNormIntMatrixHeader(); + csv_norm_int_data << norm_int_header << "\n"; + first_file_norm_int_header = norm_int_header; + } + if (create_data_file) { std::string data_header = data_converter->getCSVHeader(); @@ -331,6 +344,17 @@ int main(int argc, char *argv[]) return 1; } } + if (create_norm_int) + { + std::string norm_int_header = converter.getCSVNormIntMatrixHeader(); + if (norm_int_header != first_file_norm_int_header) + { + report(ERROR, kModule) << "File " + << file + << " has a different normalization integral CSV header than the first file. Aborting\n"; + return 1; + } + } if (create_data_file) { std::string data_header = data_converter->getCSVHeader(); @@ -349,6 +373,8 @@ int main(int argc, char *argv[]) csv_cov_data << converter.getCSVCovarianceMatrix() << "\n"; if (create_correlation) csv_corr_data << converter.getCSVCorrelationMatrix() << "\n"; + if (create_norm_int) + csv_norm_int_data << converter.getCSVNormIntMatrix() << "\n"; if (create_data_file) csv_root_data << data_converter->getCSVRow() << "\n"; @@ -379,6 +405,14 @@ int main(int argc, char *argv[]) corr_file << csv_corr_data.str(); corr_file.close(); } + if (create_norm_int) + { + std::filesystem::path output_path(output_file); + std::string norm_int_file = (output_path.parent_path() / (output_path.stem().string() + "_norm_int.csv")).string(); + std::ofstream norm_int_out_file(norm_int_file); + norm_int_out_file << csv_norm_int_data.str(); + norm_int_out_file.close(); + } if (create_data_file) { std::filesystem::path output_path(output_file); From 920ddab8add2ce9bf31e6a49b9cb9705e80ac3d0 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Fri, 8 May 2026 19:04:47 -0400 Subject: [PATCH 33/38] updated docstring --- .../convert_to_csv/convert_to_csv.cc | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc index 7f049a181..a69e01521 100644 --- a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc +++ b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc @@ -5,11 +5,18 @@ * @date 2026-01-30 * * This program converts AmpTools fit results in .fit files to CSV format. It can also - * optionally create separate CSV files for the covariance and correlation matrices, as - * well as a CSV file containing the data used in the fit. The program is designed to be - * flexible and can handle various amplitude naming schemes, which it uses to group - * amplitudes into coherent sums based on shared quantum numbers. The output CSV files - * can then be easily imported into just about any data analysis or plotting software. + * optionally create separate CSV files for the covariance, correlation, and + * normalization integral matrices, as well as a CSV file containing the data used in + * the fit. The program is designed to be flexible and can handle various amplitude + * naming schemes, which it uses to group amplitudes into coherent sums based on shared + * quantum numbers. The output CSV files can then be easily imported into just about any + * data analysis or plotting software. + * + * Print the help message with + * @verbatim + * user@host:~$ convert_to_csv -h + * @endverbatim + * for more details. */ #include From c083b54f7bd327e7ed315ca75a92602c147e6bc2 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Fri, 8 May 2026 19:05:18 -0400 Subject: [PATCH 34/38] docstring update --- src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc index a69e01521..fac8b1804 100644 --- a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc +++ b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc @@ -1,7 +1,7 @@ /** * @file convert_to_csv.cc * @author Kevin Scheuer - * @brief + * @brief A utility for converting AmpTools .fit files to CSV format * @date 2026-01-30 * * This program converts AmpTools fit results in .fit files to CSV format. It can also From cc843c426ee08150e11954ed66adc714d0eaf354 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Fri, 8 May 2026 20:20:36 -0400 Subject: [PATCH 35/38] Updated dates and some comments --- src/libraries/UTILITIES/AmplitudeParser.cc | 2 +- src/libraries/UTILITIES/AmplitudeParser.h | 35 +++++++++++++------ src/libraries/UTILITIES/FitConverter.cc | 2 +- src/libraries/UTILITIES/FitConverter.h | 2 +- src/libraries/UTILITIES/RootDataConverter.cc | 2 +- src/libraries/UTILITIES/RootDataConverter.h | 2 +- .../convert_to_csv/convert_to_csv.cc | 2 +- 7 files changed, 30 insertions(+), 17 deletions(-) diff --git a/src/libraries/UTILITIES/AmplitudeParser.cc b/src/libraries/UTILITIES/AmplitudeParser.cc index 05ddd9011..6b999eda6 100644 --- a/src/libraries/UTILITIES/AmplitudeParser.cc +++ b/src/libraries/UTILITIES/AmplitudeParser.cc @@ -2,7 +2,7 @@ * @file AmplitudeParser.cc * @author Kevin Scheuer * @brief Implementation of the amplitude naming parser and sum grouping logic. - * @date 2026-05-06 + * @date 2026-05-08 */ #include diff --git a/src/libraries/UTILITIES/AmplitudeParser.h b/src/libraries/UTILITIES/AmplitudeParser.h index 0902eb280..9dcf83b06 100644 --- a/src/libraries/UTILITIES/AmplitudeParser.h +++ b/src/libraries/UTILITIES/AmplitudeParser.h @@ -2,17 +2,7 @@ * @file AmplitudeParser.h * @author Kevin Scheuer * @brief Helpers for parsing amplitude labels and building sum groups. - * @date 2026-05-06 - * - * @note All amplitudes that match the criteria for a sum group will be included, - * meaning amplitudes will be gathered across reactions and sums as long as they share - * the relevant quantum numbers based on the selected naming scheme. For example, if we - * have the following amplitudes: - * - "reaction1::sum1::p1ppS" - * - "reaction1::sum2::p1ppS" - * - "reaction2::sum1::p1ppS" - * Then all three of these amplitudes would be included in the same sum group "JPL" - * because they all share the same J, P, and L values. + * @date 2026-05-08 * * @page How to Build Your A Custom Naming Scheme * The AmplitudeParser is designed to be flexible and support different amplitude naming @@ -45,6 +35,29 @@ #include #include +/** + * @class AmplitudeParser + * @brief Parses amplitude names and builds coherent sums based on naming schemes + * + * This class is responsible for parsing amplitude names from AmpTools fit results and + * grouping them into coherent sums based on shared quantum numbers. It supports + * multiple naming schemes, which can be selected by the user or inferred from the + * amplitude labels. The parser identifies the relevant quantum numbers from the + * amplitude names and groups amplitudes that share those quantum numbers into sums, + * which can then be used to calculate intensities of sums e.g. total reflectivity + * contributions, or total spin contributions. + * + * @note All amplitudes that match the criteria for a sum group will be included, + * meaning amplitudes will be gathered across reactions and sums as long as they share + * the relevant quantum numbers based on the selected naming scheme. For example, if we + * have the following amplitudes in the "eJPmL" naming scheme: + * - "reaction1::sum1::p1ppS" + * - "reaction1::sum2::p1ppS" + * - "reaction2::sum1::p1ppS" + * Then all three of these amplitudes would be included in the same sum group "JPL" + * because they all share the same J, P, and L values. + * + */ class AmplitudeParser { public: diff --git a/src/libraries/UTILITIES/FitConverter.cc b/src/libraries/UTILITIES/FitConverter.cc index bec3911d8..6ce591399 100644 --- a/src/libraries/UTILITIES/FitConverter.cc +++ b/src/libraries/UTILITIES/FitConverter.cc @@ -2,7 +2,7 @@ * @file FitConverter.cc * @author Kevin Scheuer * @brief Implementation of FitConverter class for converting AmpTools fit results to CSV - * @date 2026-02-01 + * @date 2026-05-08 * */ diff --git a/src/libraries/UTILITIES/FitConverter.h b/src/libraries/UTILITIES/FitConverter.h index 3fa35de87..2e78450ef 100644 --- a/src/libraries/UTILITIES/FitConverter.h +++ b/src/libraries/UTILITIES/FitConverter.h @@ -2,7 +2,7 @@ * @file FitConverter.h * @author Kevin Scheuer * @brief Class for converting AmpTools fit results to any format - * @date 2026-02-01 + * @date 2026-05-08 * * @note that this currently is built to convert to CSV, but the class can be extended * to convert to any other format by accessing the various containers that store diff --git a/src/libraries/UTILITIES/RootDataConverter.cc b/src/libraries/UTILITIES/RootDataConverter.cc index f27d9c4cc..57bc513b2 100644 --- a/src/libraries/UTILITIES/RootDataConverter.cc +++ b/src/libraries/UTILITIES/RootDataConverter.cc @@ -2,7 +2,7 @@ * @file RootDataConverter.cc * @author Kevin Scheuer * @brief Implementation of RootDataConverter class - * @date 2026-02-02 + * @date 2026-05-08 * */ diff --git a/src/libraries/UTILITIES/RootDataConverter.h b/src/libraries/UTILITIES/RootDataConverter.h index e078af9b6..0da07423f 100644 --- a/src/libraries/UTILITIES/RootDataConverter.h +++ b/src/libraries/UTILITIES/RootDataConverter.h @@ -2,7 +2,7 @@ * @file RootDataConverter.h * @author Kevin Scheuer * @brief Class for converting ROOT tree data associated with an AmpTools fit to any format - * @date 2026-02-02 + * @date 2026-05-08 * * @note that this currently is built to convert to CSV, but the class can be extended * to convert to any other format by accessing the various containers that store diff --git a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc index fac8b1804..40f02fa94 100644 --- a/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc +++ b/src/programs/AmplitudeAnalysis/convert_to_csv/convert_to_csv.cc @@ -2,7 +2,7 @@ * @file convert_to_csv.cc * @author Kevin Scheuer * @brief A utility for converting AmpTools .fit files to CSV format - * @date 2026-01-30 + * @date 2026-05-08 * * This program converts AmpTools fit results in .fit files to CSV format. It can also * optionally create separate CSV files for the covariance, correlation, and From 904cd5dbd3d74d16fb4b96b77e2eaf5d3f95166f Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Sun, 10 May 2026 17:08:38 -0400 Subject: [PATCH 36/38] removed forgotten cout line --- src/libraries/UTILITIES/FitConverter.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/libraries/UTILITIES/FitConverter.cc b/src/libraries/UTILITIES/FitConverter.cc index 6ce591399..773f71588 100644 --- a/src/libraries/UTILITIES/FitConverter.cc +++ b/src/libraries/UTILITIES/FitConverter.cc @@ -447,8 +447,6 @@ std::string FitConverter::getCSVNormIntMatrix() const std::complex value = normInt->normInt(row_amp, col_amp); - std::cout << "NormInt for " << row_amp << " and " << col_amp << " = " << value.real() << "+" << value.imag() << "j\n"; - // Format as pandas-readable complex number e.g. 1+2j or 1-2j std::ostringstream oss; oss << std::scientific << std::setprecision(std::numeric_limits::max_digits10 - 1); From 958d1629703c008333843320cfe5c9c11a955af7 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Tue, 2 Jun 2026 10:12:54 -0400 Subject: [PATCH 37/38] fixed function description --- src/libraries/UTILITIES/FitConverter.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/libraries/UTILITIES/FitConverter.h b/src/libraries/UTILITIES/FitConverter.h index 2e78450ef..d65bbba65 100644 --- a/src/libraries/UTILITIES/FitConverter.h +++ b/src/libraries/UTILITIES/FitConverter.h @@ -205,12 +205,11 @@ class FitConverter * - "reaction2::sum1::myAmpName" * * We first check if all amplitudes with the same "ampName" (myAmpName) are - * constrained to each other, and if so, we group them under the unique amplitude - * name "myAmpName" and save the intensity + error for that group. If that - * assumption does not hold, we check if all amplitudes with the same "sum::ampName" - * are constrained to each other (i.e. across reactions) and if so, we group them - * under the unique amplitude name "sum::ampName". If neither assumption holds, we - * just group by the full amplitude name. + * constrained to each other, and if so, we group them under the unique key + * "myAmpName". If that assumption does not hold, we check if all amplitudes with + * the same "sum::ampName" are constrained to each other (i.e. across reactions) and + * if so, we group them under the unique key "sum::ampName". If neither assumption + * holds, we just group by the full amplitude name. * * @note that the self-constraint is included in the list, so even amplitudes with * no constraints will have a vector of size one (itself). From ad6bf3d4207de34c608d937c7a071475d9a4ec35 Mon Sep 17 00:00:00 2001 From: kevScheuer Date: Wed, 3 Jun 2026 05:05:34 -0400 Subject: [PATCH 38/38] Add estDistToMinimum to standard output --- src/libraries/UTILITIES/FitConverter.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libraries/UTILITIES/FitConverter.cc b/src/libraries/UTILITIES/FitConverter.cc index 773f71588..4f04872d6 100644 --- a/src/libraries/UTILITIES/FitConverter.cc +++ b/src/libraries/UTILITIES/FitConverter.cc @@ -43,6 +43,7 @@ void FitConverter::extract() m_standard_results["intensity_err"] = m_fit_results.intensity(false).second; m_standard_results["ac_intensity"] = m_fit_results.intensity(true).first; m_standard_results["ac_intensity_err"] = m_fit_results.intensity(true).second; + m_standard_results["estDistToMinimum"] = m_fit_results.estDistToMinimum(); for (const auto &pair : m_standard_results) { report(DEBUG, kModule) << "Standard result: " << pair.first << " = " << pair.second << "\n";