From 89ae8b986512174493b77b70a38d2c9e73d2eb33 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Thu, 5 Feb 2026 15:31:50 +0000
Subject: [PATCH 1/5] NPI-4453 introduce DataFrame hashing and test baselining
 functionality

---
 gnssanalysis/gn_utils.py                      | 372 +++++++++++++++++-
 .../test_duplicate_df_rejection.pickledlist   | Bin 0 -> 755 bytes
 ..._duplicate_df_rejection.pickledlist_sha256 |   1 +
 .../test_repeat_caller_rejection.pickledlist  | Bin 0 -> 595 bytes
 ...repeat_caller_rejection.pickledlist_sha256 |   1 +
 ...t_verify_refusal_in_wrong_mode.pickledlist | Bin 0 -> 595 bytes
 ...y_refusal_in_wrong_mode.pickledlist_sha256 |   1 +
 tests/test_utils.py                           | 130 +++++-
 8 files changed, 503 insertions(+), 2 deletions(-)
 create mode 100644 tests/baseline_dataframe_records/TestDataFrameHashUtils/test_duplicate_df_rejection.pickledlist
 create mode 100644 tests/baseline_dataframe_records/TestDataFrameHashUtils/test_duplicate_df_rejection.pickledlist_sha256
 create mode 100644 tests/baseline_dataframe_records/TestDataFrameHashUtils/test_repeat_caller_rejection.pickledlist
 create mode 100644 tests/baseline_dataframe_records/TestDataFrameHashUtils/test_repeat_caller_rejection.pickledlist_sha256
 create mode 100644 tests/baseline_dataframe_records/TestDataFrameHashUtils/test_verify_refusal_in_wrong_mode.pickledlist
 create mode 100644 tests/baseline_dataframe_records/TestDataFrameHashUtils/test_verify_refusal_in_wrong_mode.pickledlist_sha256

diff --git a/gnssanalysis/gn_utils.py b/gnssanalysis/gn_utils.py
index f381a37..3dfc5c0 100644
--- a/gnssanalysis/gn_utils.py
+++ b/gnssanalysis/gn_utils.py
@@ -1,15 +1,22 @@
+import hashlib
+import inspect
 import logging as _logging
 import os as _os
+import pickle
 import sys as _sys
 import pathlib as _pathlib
 from time import perf_counter
+import warnings
 
 import click as _click
 
-from typing import Union
+from pandas import DataFrame
+from typing import Literal, Optional, Union
 
 from gnssanalysis.enum_meta_properties import EnumMetaProperties
 
+DEFAULT_DATAFRAME_HASH_BASELINE_DIR = _pathlib.Path("./baseline_dataframe_records")
+
 
 class StrictMode(metaclass=EnumMetaProperties):
     name: str
@@ -986,3 +993,366 @@ def __exit__(self, type, value, traceback):
         )
         if self.print_time:
             print(self.readout)
+
+
+class DataFrameHashUtils:
+
+    mode: Literal["baseline", "verify"] = "verify"
+
+    # Unpickling is off by default for security reasons (arbitrary code injection via serialised objects)
+    # Enable temporarily when needed to debug a test regression / change, and ensure input data is trusted.
+    enable_unpickling: bool = False  # DO NOT commit changes to this
+
+    # Record of (test) functions which have called either baseline or verify functions.
+    # If the same function calls twice, this indicates multiple data sets are being stored / checked, under a single
+    # name. This will cause the last to overwrite all previous, and we will only test that last one.
+    caller_record: set[str] = set()
+
+    @staticmethod
+    def get_paths_for_pickle_and_hash(
+        filename_prefix: str,
+        parent_dir: _pathlib.Path = DEFAULT_DATAFRAME_HASH_BASELINE_DIR,
+        subdir: Optional[_pathlib.Path] = None,
+    ) -> tuple[_pathlib.Path, _pathlib.Path]:
+
+        cwd: str = _pathlib.Path.cwd().as_posix()
+        if not cwd.endswith("/gnssanalysis/tests"):
+            raise ValueError(
+                f"DataFrameHashUtils invoked in invalid workdir: '{cwd}'. "
+                "It should only be run within 'gnssanalysis/tests'"
+            )
+
+        dir = parent_dir / subdir if subdir is not None else parent_dir
+        ensure_folders([dir])  # Create if it doesn't already exist
+
+        pickled_list_path = _pathlib.Path(f"{dir}/{filename_prefix}.pickledlist")
+        pickled_list_hash_path = _pathlib.Path(f"{dir}/{filename_prefix}.pickledlist_sha256")
+        return (pickled_list_path, pickled_list_hash_path)
+
+    @staticmethod
+    def get_grandparent_caller_id() -> tuple[str, str]:
+        # This function uses Python frame inspection to determine the *2nd level* caller's name. I.e. finds
+        # the grandparent class and function on the stack.
+
+        # --- AI declaration ---: This function leverages suggestions from Google Gemini.
+
+        # For example, if this is *called by* a function which was itself called by TestClk.test_diff_clk(), the
+        # return would be: (TestClk, test_diff_clk)
+
+        # Note, because navigation is simply a question of how far to walk the stack, it is important to be mindful
+        # of where you call this from!
+        # I.e. don't call it from within a function which in turn is called by
+        # something, the *caller* of which you want to know about... that would be frame -3, not frame -2.
+
+        # The following depicts the typical frame structure of intended usage:
+        # TestClk.test_diff_clk() -> DataFrameHashUtils.create_and_verify_pickled_df_list() -> get_caller_names()
+        #         ^Frame -2                             ^Frame -1                              ^ current frame
+        # We want the name of frame -2, our 'grandparent'.
+
+        # Set up try block to ensure we delete the frame ref created by calling this function
+        try:
+            caller_frame = None
+            # The calling function's calling function frame. I.e the frame of the grandparent function.
+            # We have to step back two, because the first frame is us, the next is the function leveraging us,
+            # and the one after that is whatever called *that* function.
+
+            # Leveraging a lot of linter ignores here, as almost everything in these chains can return None, making
+            # it easier and much simpler, to just catch the exceptions.
+            callers_callers_frame = inspect.currentframe().f_back.f_back  # type: ignore
+            func_name = callers_callers_frame.f_code.co_name  # type: ignore
+            if "self" in callers_callers_frame.f_locals:  # type: ignore
+                calling_class_name = callers_callers_frame.f_locals["self"].__class__.__name__  # type: ignore
+            elif "cls" in callers_callers_frame.f_locals:  # type: ignore
+                calling_class_name = callers_callers_frame.f_locals["cls"].__class__.__name__  # type: ignore
+            else:
+                raise AttributeError("Class not found via either self or cls")
+
+            # If nothing has raised an AttributeError yet, we have a class and function name.
+            # Check it's not accidentally us:
+            if calling_class_name == __class__.__name__:
+                raise ValueError(
+                    f"Calling error: somehow, the grandparent of get_caller_pretty_string() was "
+                    f"us {__class__.__name__}. That shouldn't happen. Got: {calling_class_name}"
+                )
+            # TODO can we check if it's a test, or lives in a 'tests' package?
+            # return f"{calling_class_name}.{func_name}"
+            return (calling_class_name, func_name)
+
+        except AttributeError as a_ex:
+            raise ValueError(
+                f"Failed to find name of caller. Please set filename_prefix and subdir explicity. Exception: {a_ex}"
+            )
+
+        finally:
+            del caller_frame  # Avoid creating ref cycle and leaking memory. I.e. help the garbage collector.
+            # See doc here: https://docs.python.org/3/library/inspect.html#inspect.Traceback.positions
+
+    @staticmethod
+    def ensure_unique_df_objects(dataframes: list[DataFrame]) -> None:
+
+        _logging.debug("Verifying no duplicate object references in DataFrame list to hash")
+
+        unique_addresses: set[int] = set([id(df) for df in dataframes])
+
+        addr_count = len(unique_addresses)
+        df_count = len(dataframes)
+        if len(unique_addresses) != len(dataframes):
+            raise ValueError(
+                f"Count of unique addresses ({addr_count}) didn't match length of dataframe list ({df_count}). "
+                "Two references to the same DF may have been passed, please investigate!"
+            )
+
+    @staticmethod
+    def record_baseline(  # Was baseline_pickled_df_list_and_hash()
+        dataframes: list[DataFrame],
+        parent_dir: _pathlib.Path = DEFAULT_DATAFRAME_HASH_BASELINE_DIR,
+        # Used to differentiate between multiple sets of dataframes in a single test function
+        # TODO can't we just bundle them:
+        # TODO in any case we need to detect and throw an exception when the same function calls us twice in a run...
+        test_index: Optional[int] = None,
+        # These are used to describe the calling class and function, and are inferred automatically. If needed they
+        # can be explicitly set here:
+        subdir: Optional[_pathlib.Path] = None,
+        filename_prefix: Optional[str] = None,
+    ) -> None:
+
+        if test_index is not None:
+            raise NotImplementedError()
+
+        # Structure here is:
+        # pickled_list: bytes -> created from an array of DataFrames. Pickled into a single bytes object.
+        # pickled_list_sha256: str -> sha256 hash of the above pickled DataFrame list.
+
+        if DataFrameHashUtils.mode != "baseline":
+            raise ValueError(
+                "Refusing to create baseline of pickled DF and hash, while not in 'baseline' mode. "
+                "Set DataframeHashUtils.mode = 'baseline' first"
+            )
+
+        if filename_prefix is None:
+            # Try to determine filename prefix from class name and function which is calling us...
+            caller_class, caller_func = DataFrameHashUtils.get_grandparent_caller_id()
+            _logging.debug(
+                f"No filename_prefix provided. "
+                f"Using grandparent class and func (found using frame inspection): {caller_class}, {caller_func}"
+            )
+            filename_prefix = caller_func
+            subdir = _pathlib.Path(caller_class)
+
+            caller_id = f"{caller_class}.{caller_func}"
+        else:
+            caller_id = filename_prefix
+
+        # Check if we've been called before by this class,function pair (i.e. caller_id).
+        # If this is not our first call, continuing will overwrite previous results. So we raise.
+        if caller_id in DataFrameHashUtils.caller_record:
+            raise ValueError(
+                f"Multiple calls from '{caller_id}'! Please consolidate your dataframes and "
+                "only pass one list per test function / filename_prefix."
+            )
+        DataFrameHashUtils.caller_record.add(caller_id)
+
+        pickled_objects_path, aggregate_sha256_path = DataFrameHashUtils.get_paths_for_pickle_and_hash(
+            filename_prefix, parent_dir=parent_dir, subdir=subdir
+        )
+
+        DataFrameHashUtils.ensure_unique_df_objects(dataframes)
+
+        pickled_list: bytes = pickle.dumps(dataframes)
+        pickled_list_sha256: str = hashlib.sha256(pickled_list).hexdigest()
+
+        warnings.warn(
+            "Baselining should only be done supervised (in a dev environment). "
+            "If you see this message in a pipeline run, something needs fixing!"
+        )
+        _logging.debug(f"About to write baseline: '{pickled_objects_path.as_posix()}': {pickled_list_sha256}...")
+
+        with open(aggregate_sha256_path, "wb") as hash_file:
+            hash_file.write(pickled_list_sha256.encode())
+        with open(pickled_objects_path, "wb") as pickled_objects_file:
+            pickled_objects_file.write(pickled_list)
+
+        _logging.info(
+            "TEST BASELINED -->> **Please ensure you commit both pickle and hash files with your changes**: "
+            f"'{pickled_objects_path.as_posix()}': {pickled_list_sha256}.\n"
+        )
+
+    @staticmethod
+    def verify(  # Was create_and_verify_pickled_df_list()
+        dataframes: list[DataFrame],
+        parent_dir: _pathlib.Path = DEFAULT_DATAFRAME_HASH_BASELINE_DIR,
+        # Option to strictly enforce that a baseline must exist for anything this function is invoked to check:
+        raise_for_missing_baseline: bool = False,
+        raise_rather_than_continue_for_incorrect_mode: bool = False,
+        # The expected pickled list hash will be read from disk, at a path constructed using the name of the
+        # calling class and function. While it should not be necessary, you can optionally override the expected hash:
+        expected_pickled_list_sha256: Optional[str] = None,
+        # These are used to describe the calling class and function, and are inferred automatically. If needed they
+        # can be explicitly set here:
+        subdir: Optional[_pathlib.Path] = None,
+        filename_prefix: Optional[str] = None,
+    ) -> bool:
+        # Return options:
+        # - True if verification successful.
+        # - False if baseline incomplete or missing (unable to verify). OR, if not running as mode != 'verify'
+        # NOTE: Raises for verification failed.
+
+        if DataFrameHashUtils.mode != "verify":
+
+            # TODO could change this to just politely state that it is skipping as in baseline mode. But we don't
+            # want to leave things in baseline mode, so...? Is failing tests sufficient? Hopefully.
+            if raise_rather_than_continue_for_incorrect_mode:
+                raise ValueError(
+                    "Refusing to run verify method while not in verify mode. "
+                    "Set DataframeHashUtils.mode = 'verify' first"
+                )
+            warnings.warn(
+                "Refusing to run verify method while not in verify mode. "
+                "Set DataframeHashUtils.mode = 'verify' first"
+            )
+            return False
+
+        # Verify we didn't get passed multiple, overwritten copies of the same reference
+        DataFrameHashUtils.ensure_unique_df_objects(dataframes)
+
+        if filename_prefix is None:
+            # Try to determine filename prefix from class name and function which is calling us...
+            caller_class, caller_func = DataFrameHashUtils.get_grandparent_caller_id()
+            _logging.debug(
+                f"No filename_prefix provided. "
+                f"Using grandparent class and func (found using frame inspection): {caller_class}, {caller_func}"
+            )
+            filename_prefix = caller_func
+            subdir = _pathlib.Path(caller_class)
+
+            caller_id = f"{caller_class}.{caller_func}"
+        else:
+            caller_id = filename_prefix
+
+        # Check if we've been called before by this class,function pair (i.e. caller_id).
+        if caller_id in DataFrameHashUtils.caller_record:
+            raise ValueError(
+                f"Multiple calls from '{caller_id}'! Please consolidate your dataframes and "
+                "only pass one list per test function / filename_prefix."
+            )
+        DataFrameHashUtils.caller_record.add(caller_id)
+
+        # Determine paths on disk...
+        pickled_list_path, pickled_list_hash_path = DataFrameHashUtils.get_paths_for_pickle_and_hash(
+            filename_prefix, parent_dir=parent_dir, subdir=subdir
+        )
+
+        # Check if pickled_df_list or hash exist on disk
+        pickle_exists = pickled_list_path.exists()
+        hash_exists = pickled_list_hash_path.exists()
+
+        if hash_exists == False:
+            if raise_for_missing_baseline:
+                raise ValueError(
+                    f"Cannot verify DFs against baseline (hash file: {'present' if hash_exists else 'missing'}, "
+                    f"pickled list file: {'present' if pickle_exists else 'missing'}) "
+                    f"for '{caller_id}'."
+                )
+            warnings.warn(
+                f"Cannot verify DFs against baseline (hash file: {'present' if hash_exists else 'missing'}, "
+                f"pickled list file: {'present' if pickle_exists else 'missing'}) "
+                f"for '{caller_id}'."
+            )
+            return False
+
+        if expected_pickled_list_sha256 is None:  # Expected hash not provided, load it from disk
+            # Load old aggregate hash (of pickled list)...
+            _logging.debug(f"No expected hash value provided for '{pickled_list_path}', attempting to load...")
+            with open(pickled_list_hash_path, "rb") as pickled_list_hash_file:
+                expected_pickled_list_sha256 = pickled_list_hash_file.read().decode()
+
+        # Data ready, now do comparison
+        # Generate pickled list and aggregate hash
+        pickled_list = pickle.dumps(dataframes)
+        pickled_list_sha256 = hashlib.sha256(pickled_list).hexdigest()
+
+        if pickled_list_sha256 != expected_pickled_list_sha256:
+            _logging.debug(
+                f"Hashes did not match for '{pickled_list_path}'. Expected: {expected_pickled_list_sha256} Actual: {pickled_list_sha256}"
+            )
+            # Load old DataFrames (pickled list)...
+            with open(pickled_list_path, "rb") as pickled_list_hash_file:
+                pickled_dfs = pickled_list_hash_file.read()
+
+            # And print out diffs for them...
+            diff_pickled_dfs(pickled_dfs, dataframes)
+
+            # Raise to ensure the test fails and this change / regression gets investigated
+            raise ValueError("Dataframes did not match baseline. Please investigate using above diffs")
+        else:
+            _logging.debug(f"Hashes matched for '{pickled_list_path}': {pickled_list_sha256}")
+            return True
+
+
+def diff_pickled_dfs(pickled_df_list: bytes, current_dfs_list: list[DataFrame]) -> None:
+
+    # CAUTION: deserialising can present arbitrary code execution potential. Ensure the data passed in is trustworthy.
+    if DataFrameHashUtils.enable_unpickling != True:
+        raise ValueError(
+            "Cannot load baselined DataFrames from pickle for analysis as unpickling is off (default for security). "
+            "Temporarily set DataFrameHashUtils.enable_unpickling = True to allow deserialisation of old DFs from disk."
+        )
+    old_df_list: list[DataFrame] = pickle.loads(pickled_df_list)
+
+    old_length = len(old_df_list)
+    current_length = len(current_dfs_list)
+    if old_length != current_length:
+        raise ValueError(
+            f"Unpickled DataFrame list had {old_length} elements, " f"whereas the current one has {current_length}"
+        )
+    for i in range(current_length):
+        old_df = old_df_list[i]
+        current_df = current_dfs_list[i]
+
+        _logging.info(f"Diffing DataFrame #{i}...")
+
+        # DF.equals() may be useful, but does not check that the row/column index datatypes are the same
+        _logging.info(f"DataFrame.equals(): {current_df.equals(old_df)}")
+
+        try:
+            _logging.info(f"current_dataframe.compare(old_dataframe): {current_df.compare(old_df)}")
+        except ValueError:
+            _logging.info(
+                f"current_dataframe.compare(old_dataframe): FAILED! Indexes / columns likely differ. Running diff of those..."
+            )
+            diff_indexes_and_columns(old_df, current_df)
+
+
+def diff_indexes_and_columns(existing_df: DataFrame, current_df: DataFrame) -> None:
+    # Utility function to output diffs of DataFrame indexes and columns, as DataFrame.compare() will not run if
+    # they differ.
+
+    # Handle diffing of indexes
+    existing_df_index = existing_df.index.to_list()
+    current_df_index = current_df.index.to_list()
+    index_diff = set(existing_df_index).symmetric_difference(current_df_index)
+    if existing_df_index != current_df_index:
+        if len(index_diff) == 0:  # Diff must've been in order, not values
+            _logging.info("Indexes differed in order, but not values. Outputting full indexes:")
+            _logging.info(f"Existing DF indexes: {str(existing_df.index.to_list())}")
+            _logging.info(f"Current DF indexes: {str(current_df.index.to_list())}")
+        else:
+            _logging.info(f"The following index values are in one DF but not the other: {str(index_diff)}")
+
+    # Handle diffing of columns
+    existing_df_colums = existing_df.columns.to_list()
+    current_df_columns = current_df.columns.to_list()
+
+    column_diff = set(existing_df_colums).symmetric_difference(current_df_columns)
+    if existing_df_colums != current_df_columns:
+        if len(column_diff) == 0:  # Diff must've been in order, not values
+            _logging.info("Columns differed in order, but not values. Outputting full column listing:")
+            _logging.info(f"Existing DF columns: {str(existing_df.columns.to_list())}")
+            _logging.info(f"Current DF columns: {str(current_df.columns.to_list())}")
+        else:
+            _logging.info(f"The following column names are in one DF but not the other: {str(column_diff)}")
+
+
+# NOTE: for aggregate tests, the revised multi-dataframe functions in PickleHashUtils are suggested
+def pickle_and_sha256(obj: object) -> str:
+    return hashlib.sha256(pickle.dumps(obj)).hexdigest()
diff --git a/tests/baseline_dataframe_records/TestDataFrameHashUtils/test_duplicate_df_rejection.pickledlist b/tests/baseline_dataframe_records/TestDataFrameHashUtils/test_duplicate_df_rejection.pickledlist
new file mode 100644
index 0000000000000000000000000000000000000000..d5a567fa3f8bc53eb74b4c280c196eeb6362782c
GIT binary patch
literal 755
zcmZuvTWi%o5I#AVYNb$>;sr#|2dOU+M6GYt)(aXuLf>WCB-=Bgn@!l=P!D)OA6i@3
zw=@1n|Ao%%X^XYpkj-S~o7tJ~+wa3)zx%zozTkSe(MnTFmllmx>8_Qv!m#{CdigHe
zbA0>*XM`JydSWr${})~vudI>UrL{D2qO8NPvZb41@6+NR=NN9CWC&g5E<H=Zuqry!
zR>huHBIle4HeKCLQxUtVJMAlJt(;O>VO7x>=WSPbMvfv9!xid2LgtwD4sbik@h9%!
zdX~^LI7GM}Mx~#&ij(@EU*hr6B0dPoHyn@0AOFybTej&jKW0)AiGd1*q(DW!a5e8L
z>c!A}Bd4o^i)B4Nf@@`c-Vcy7@0{nSR9}@#bLoiHYr?2~@xhRfd%1fV(n>R7=nj8R
zp^<*W%8coFFRiD;!_fJr#Vlb%vC$n;7w`n05+0t7;d$1B7uf(dPC|;~q$80*5|<?L
zsobJgUb2Hm2qWn|2|2^CBzCo&MDw=Py4iQ4>{=RMdL^{Xm3DaKx*P_JI|56v3@fk-
zH(3c-JFmk%W)>d6!;qAbRd{qMg%>7B&L+4J`foVcgjetywqSdP@Rr{9+@4DR0_XT6
AS^xk5

literal 0
HcmV?d00001

diff --git a/tests/baseline_dataframe_records/TestDataFrameHashUtils/test_duplicate_df_rejection.pickledlist_sha256 b/tests/baseline_dataframe_records/TestDataFrameHashUtils/test_duplicate_df_rejection.pickledlist_sha256
new file mode 100644
index 0000000..387921b
--- /dev/null
+++ b/tests/baseline_dataframe_records/TestDataFrameHashUtils/test_duplicate_df_rejection.pickledlist_sha256
@@ -0,0 +1 @@
+6b5020201b08f64a2e7412422e03f94a6e7b0479f3a69a792967cec80b17a08b
\ No newline at end of file
diff --git a/tests/baseline_dataframe_records/TestDataFrameHashUtils/test_repeat_caller_rejection.pickledlist b/tests/baseline_dataframe_records/TestDataFrameHashUtils/test_repeat_caller_rejection.pickledlist
new file mode 100644
index 0000000000000000000000000000000000000000..7d0d08814c4f6ca87c081972fc68c3cbee3d5cf3
GIT binary patch
literal 595
zcmZuuOH0Hs5Z>;?7lL~6aq+B|3gYV~sEAsu;9bJBjoV_HmLx?N6!f6(LT)?$UY*1T
z3Z{@uXTEP<-(&afq}}r83hhD-G6Nl^mEz&Gf|8@1J^};B-X7!n6F%UY?Xt43(60W5
z7t(MgLFljq34N|K+Svn9r8noF|2Rgw_{CtNNObr^K|9A9Sr_Sz;4E<*50*_?4?^a<
zDH~x5PznZ=mMNZAQXAEz#vywiiFT5@_mDA0t!F$85`4u=T#EwwIxm>d+g@e{HFu=;
z#U74^pYe_j?lF$zvv)fFiuMgAqbnLV0JN0I1}Sp#*{P%{s254|^|UBbmn>?z5-wNN
zcK0(n=JMbDWc-fnFab?loneySvQtSu*UQR(NCmR*pqK9bhWhnOr%X)c4Jbp!ZC9JB
z#wc(GcSiF<UE~}2CgJm~INy$1`A*cycfUg1#Ysn621%Te#NTqWk{fUi8o~C!7!vXU
g?F73P(D&vN2vOZ?mNhkv4~8=V2^SiNx=A4X09(oI!2kdN

literal 0
HcmV?d00001

diff --git a/tests/baseline_dataframe_records/TestDataFrameHashUtils/test_repeat_caller_rejection.pickledlist_sha256 b/tests/baseline_dataframe_records/TestDataFrameHashUtils/test_repeat_caller_rejection.pickledlist_sha256
new file mode 100644
index 0000000..257e40d
--- /dev/null
+++ b/tests/baseline_dataframe_records/TestDataFrameHashUtils/test_repeat_caller_rejection.pickledlist_sha256
@@ -0,0 +1 @@
+1b369c0e1d2ee74b36b233cb0655cc5e0158b334ec0757f546d5e45e6d05d58e
\ No newline at end of file
diff --git a/tests/baseline_dataframe_records/TestDataFrameHashUtils/test_verify_refusal_in_wrong_mode.pickledlist b/tests/baseline_dataframe_records/TestDataFrameHashUtils/test_verify_refusal_in_wrong_mode.pickledlist
new file mode 100644
index 0000000000000000000000000000000000000000..7d0d08814c4f6ca87c081972fc68c3cbee3d5cf3
GIT binary patch
literal 595
zcmZuuOH0Hs5Z>;?7lL~6aq+B|3gYV~sEAsu;9bJBjoV_HmLx?N6!f6(LT)?$UY*1T
z3Z{@uXTEP<-(&afq}}r83hhD-G6Nl^mEz&Gf|8@1J^};B-X7!n6F%UY?Xt43(60W5
z7t(MgLFljq34N|K+Svn9r8noF|2Rgw_{CtNNObr^K|9A9Sr_Sz;4E<*50*_?4?^a<
zDH~x5PznZ=mMNZAQXAEz#vywiiFT5@_mDA0t!F$85`4u=T#EwwIxm>d+g@e{HFu=;
z#U74^pYe_j?lF$zvv)fFiuMgAqbnLV0JN0I1}Sp#*{P%{s254|^|UBbmn>?z5-wNN
zcK0(n=JMbDWc-fnFab?loneySvQtSu*UQR(NCmR*pqK9bhWhnOr%X)c4Jbp!ZC9JB
z#wc(GcSiF<UE~}2CgJm~INy$1`A*cycfUg1#Ysn621%Te#NTqWk{fUi8o~C!7!vXU
g?F73P(D&vN2vOZ?mNhkv4~8=V2^SiNx=A4X09(oI!2kdN

literal 0
HcmV?d00001

diff --git a/tests/baseline_dataframe_records/TestDataFrameHashUtils/test_verify_refusal_in_wrong_mode.pickledlist_sha256 b/tests/baseline_dataframe_records/TestDataFrameHashUtils/test_verify_refusal_in_wrong_mode.pickledlist_sha256
new file mode 100644
index 0000000..257e40d
--- /dev/null
+++ b/tests/baseline_dataframe_records/TestDataFrameHashUtils/test_verify_refusal_in_wrong_mode.pickledlist_sha256
@@ -0,0 +1 @@
+1b369c0e1d2ee74b36b233cb0655cc5e0158b334ec0757f546d5e45e6d05d58e
\ No newline at end of file
diff --git a/tests/test_utils.py b/tests/test_utils.py
index c3225b4..6c8d9b3 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,8 +1,11 @@
 import logging
+import os
+import unittest
+from pandas import DataFrame
 from pyfakefs.fake_filesystem_unittest import TestCase
 from pathlib import Path
 
-from gnssanalysis.gn_utils import delete_entire_directory
+from gnssanalysis.gn_utils import DataFrameHashUtils, delete_entire_directory
 import gnssanalysis.gn_utils as ga_utils
 
 
@@ -64,3 +67,128 @@ def test_configure_logging(self):
 
         # Verify
         self.assertEqual(logger_not_output, None)
+
+
+class TestDataFrameHashUtils(unittest.TestCase):
+
+    def test_verify_refusal_in_wrong_mode(self):
+        mode_backup = DataFrameHashUtils.mode
+        try:
+            df = DataFrame(["a", "b", "c"])
+
+            # Baseline (do not commit uncommented!) Note: every function needs its own baseline, becuase the
+            # function name determines the filename, unless we override that.
+            # DataFrameHashUtils.mode = "baseline"
+            # DataFrameHashUtils.record_baseline([df])
+
+            # In baseline (write) mode, verify should be refused.
+            DataFrameHashUtils.mode = "baseline"
+
+            with self.assertWarns(Warning) as warning_assessor:
+                self.assertFalse(
+                    DataFrameHashUtils.verify([df]),
+                    "DF list verification should not succeed in 'baseline' mode",
+                )
+            # Ensure the expected warning, and only that warning, was raised
+            captured_warnings = warning_assessor.warnings
+            self.assertEqual(
+                "Refusing to run verify method while not in verify mode. Set DataframeHashUtils.mode = 'verify' first",
+                str(captured_warnings[0].message),
+            )
+            self.assertEqual(
+                len(captured_warnings),
+                1,
+                "Expected exactly 1 warning. Check what other warnings are being raised!",
+            )
+
+            # Should succeed in correct mode.
+            DataFrameHashUtils.mode = "verify"
+            self.assertTrue(
+                DataFrameHashUtils.verify([df]),
+                "DF list verification should succeed in 'verify' mode",
+            )
+        finally:
+            # Ensure flag reset to avoid impacts on other tests (across the whole suite)
+            DataFrameHashUtils.mode = mode_backup
+
+    def test_repeat_caller_rejection(self):
+        # These functions determine what files to write/read baselines from, based on the identity of the (test)
+        # function that called them. Therefore, calling twice from the same function would cause the *same baseline
+        # files* to be read/written for a different part of the unit test.
+        # That would have the effect of:
+        # - in write mode: overwriting the baseline file for a previous part of the test function.
+        # - in read mode: repeating verification of the same file against a different DF list (which would likely fail).
+
+        # We're only testing it with the verify function below, but both verify and baseline functions use the same
+        # caller check logic, and store the caller record statically in a class variable. ?
+
+        df = DataFrame(["a", "b", "c"])
+
+        # Baseline (every function needs its own baseline, becuase the function name determines the filename,
+        # unless we override that)
+        # DataFrameHashUtils.mode = "baseline"
+        # DataFrameHashUtils.record_baseline([df])
+
+        self.assertTrue(
+            DataFrameHashUtils.verify([df]),
+            "DF list verification should succeed on *first* call from a function.",
+        )
+        with self.assertRaises(ValueError):
+            DataFrameHashUtils.verify([df])
+            self.fail("DF list verification should fail on *second*/repeated calls from a function.")
+
+    def test_duplicate_df_rejection(self):
+
+        # List to aggregate DFs for hashing
+        dfs_to_hash: list[DataFrame] = []
+
+        df = DataFrame(["a", "b", "c"])  # Let's call this Dataframe 'a'
+        dfs_to_hash.extend([df])
+
+        # Overwrite local variable, as often happens in our unit tests
+        df = DataFrame(["b", "c", "d"])  # Let's call this Dataframe 'b'
+
+        # This might look questionable, but is ok, because we saved a reference to dataframe 'a' to the list,
+        # before overwriting local var 'df' to point at dataframe 'b'.
+        dfs_to_hash.extend([df])
+
+        # Baseline this test (this should only be committed commented out!)
+        # DataFrameHashUtils.mode = "baseline"
+        # DataFrameHashUtils.record_baseline(dfs_to_hash)
+
+        # Will return True if verification succeeded. False if baseline missing or mode != verify
+        self.assertTrue(
+            DataFrameHashUtils.verify(dfs_to_hash),
+            "DF list verification should succeed here (unless baseline files are missing, or baselining has been turned on)",
+        )
+
+        # The local variable df still points to the same DF, so now the list contains [a,b,b]. This should be an error.
+        dfs_to_hash.extend([df])
+        with self.assertRaises(ValueError):
+            DataFrameHashUtils.verify(dfs_to_hash)
+
+    def test_caller_identity_fetch(self):
+        def wrapper_function():
+            class_name, func_name = DataFrameHashUtils.get_grandparent_caller_id()
+            self.assertEqual(class_name, "TestDataFrameHashUtils")
+            self.assertEqual(func_name, "test_caller_identity_fetch")
+
+        # We have to do this (create an extra stack frame) because the function looks for
+        # the *grandparent* caller, not parent caller.
+        wrapper_function()
+
+
+# For use with debugger
+# if __name__ == "__main__":
+
+#     logging.basicConfig(format="%(levelname)s: %(message)s")
+#     logger = logging.getLogger()
+#     logger.setLevel(logging.DEBUG)
+
+#     os.chdir("./tests")
+
+#     df_hash_tests = TestDataFrameHashUtils()
+#     df_hash_tests.test_duplicate_df_rejection()
+#     df_hash_tests.test_verify_refusal_in_wrong_mode
+#     df_hash_tests.test_repeat_caller_rejection()
+#     df_hash_tests.test_caller_identity_fetch()

From 17a0b97137cf0febf1577835cff55dd9f7c181e9 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Thu, 5 Feb 2026 16:22:12 +0000
Subject: [PATCH 2/5] NPI-4453 improvements to handle launches from both
 project root dir (used by pipeine), and tests subdir (common in development)

---
 gnssanalysis/gn_utils.py | 41 ++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/gnssanalysis/gn_utils.py b/gnssanalysis/gn_utils.py
index 3dfc5c0..bcbfa63 100644
--- a/gnssanalysis/gn_utils.py
+++ b/gnssanalysis/gn_utils.py
@@ -15,7 +15,9 @@
 
 from gnssanalysis.enum_meta_properties import EnumMetaProperties
 
-DEFAULT_DATAFRAME_HASH_BASELINE_DIR = _pathlib.Path("./baseline_dataframe_records")
+# Two options, as a convenience feature to allow invoking from the project root or the tests subdir.
+BASELINE_DATAFRAME_RECORDS_DIR_ROOT_RELATIVE = _pathlib.Path("./tests/baseline_dataframe_records")
+BASELINE_DATAFRAME_RECORDS_DIR_TESTS_RELATIVE = _pathlib.Path("./baseline_dataframe_records")
 
 
 class StrictMode(metaclass=EnumMetaProperties):
@@ -1011,22 +1013,37 @@ class DataFrameHashUtils:
     @staticmethod
     def get_paths_for_pickle_and_hash(
         filename_prefix: str,
-        parent_dir: _pathlib.Path = DEFAULT_DATAFRAME_HASH_BASELINE_DIR,
+        # parent_dir: _pathlib.Path = BASELINE_DATAFRAME_RECORDS_DIR_ROOT_RELATIVE,
         subdir: Optional[_pathlib.Path] = None,
     ) -> tuple[_pathlib.Path, _pathlib.Path]:
 
         cwd: str = _pathlib.Path.cwd().as_posix()
-        if not cwd.endswith("/gnssanalysis/tests"):
+
+        # The following is a quality of life feature, allowing test invocation from either:
+        #  - the project root dir --> python -m unittest discover -v -s tests
+        #  - the tests subdir     --> python -m unittest discover -v
+        if cwd.endswith("/gnssanalysis"):
+            parent_dir = BASELINE_DATAFRAME_RECORDS_DIR_ROOT_RELATIVE
+        elif cwd.endswith("/gnssanalysis/tests"):
+            parent_dir = BASELINE_DATAFRAME_RECORDS_DIR_TESTS_RELATIVE
+        else:
             raise ValueError(
                 f"DataFrameHashUtils invoked in invalid workdir: '{cwd}'. "
-                "It should only be run within 'gnssanalysis/tests'"
+                "It should be run within the top level gnssanalysis project dir (preferred), or the tests subdir"
             )
 
-        dir = parent_dir / subdir if subdir is not None else parent_dir
-        ensure_folders([dir])  # Create if it doesn't already exist
+        if not parent_dir.is_dir():
+            raise ValueError(f"Test baselining dir not found at: '{parent_dir.as_posix()}'")
+
+        target_dir = parent_dir / subdir if subdir is not None else parent_dir
+        if not target_dir.is_dir():
+            # Create directory (fail if parent dirs don't exist). We take this more conservative approach because if
+            # the baseline directory doesn't exist *where we are looking*, that may indicate our workdir is wrong
+            # and we should stop.
+            target_dir.mkdir()
 
-        pickled_list_path = _pathlib.Path(f"{dir}/{filename_prefix}.pickledlist")
-        pickled_list_hash_path = _pathlib.Path(f"{dir}/{filename_prefix}.pickledlist_sha256")
+        pickled_list_path = _pathlib.Path(f"{target_dir}/{filename_prefix}.pickledlist")
+        pickled_list_hash_path = _pathlib.Path(f"{target_dir}/{filename_prefix}.pickledlist_sha256")
         return (pickled_list_path, pickled_list_hash_path)
 
     @staticmethod
@@ -1105,7 +1122,7 @@ def ensure_unique_df_objects(dataframes: list[DataFrame]) -> None:
     @staticmethod
     def record_baseline(  # Was baseline_pickled_df_list_and_hash()
         dataframes: list[DataFrame],
-        parent_dir: _pathlib.Path = DEFAULT_DATAFRAME_HASH_BASELINE_DIR,
+        # parent_dir: _pathlib.Path = BASELINE_DATAFRAME_RECORDS_DIR_ROOT_RELATIVE,
         # Used to differentiate between multiple sets of dataframes in a single test function
         # TODO can't we just bundle them:
         # TODO in any case we need to detect and throw an exception when the same function calls us twice in a run...
@@ -1153,7 +1170,7 @@ def record_baseline(  # Was baseline_pickled_df_list_and_hash()
         DataFrameHashUtils.caller_record.add(caller_id)
 
         pickled_objects_path, aggregate_sha256_path = DataFrameHashUtils.get_paths_for_pickle_and_hash(
-            filename_prefix, parent_dir=parent_dir, subdir=subdir
+            filename_prefix, subdir=subdir
         )
 
         DataFrameHashUtils.ensure_unique_df_objects(dataframes)
@@ -1180,7 +1197,7 @@ def record_baseline(  # Was baseline_pickled_df_list_and_hash()
     @staticmethod
     def verify(  # Was create_and_verify_pickled_df_list()
         dataframes: list[DataFrame],
-        parent_dir: _pathlib.Path = DEFAULT_DATAFRAME_HASH_BASELINE_DIR,
+        # parent_dir: _pathlib.Path = BASELINE_DATAFRAME_RECORDS_DIR_ROOT_RELATIVE,
         # Option to strictly enforce that a baseline must exist for anything this function is invoked to check:
         raise_for_missing_baseline: bool = False,
         raise_rather_than_continue_for_incorrect_mode: bool = False,
@@ -1239,7 +1256,7 @@ def verify(  # Was create_and_verify_pickled_df_list()
 
         # Determine paths on disk...
         pickled_list_path, pickled_list_hash_path = DataFrameHashUtils.get_paths_for_pickle_and_hash(
-            filename_prefix, parent_dir=parent_dir, subdir=subdir
+            filename_prefix, subdir=subdir
         )
 
         # Check if pickled_df_list or hash exist on disk

From 5d94a26e385481dc088b8f745eca2faba7ba3d71 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Thu, 5 Feb 2026 16:25:11 +0000
Subject: [PATCH 3/5] NPI-4453 move comment for clarity

---
 gnssanalysis/gn_utils.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/gnssanalysis/gn_utils.py b/gnssanalysis/gn_utils.py
index bcbfa63..b8301c5 100644
--- a/gnssanalysis/gn_utils.py
+++ b/gnssanalysis/gn_utils.py
@@ -1136,10 +1136,6 @@ def record_baseline(  # Was baseline_pickled_df_list_and_hash()
         if test_index is not None:
             raise NotImplementedError()
 
-        # Structure here is:
-        # pickled_list: bytes -> created from an array of DataFrames. Pickled into a single bytes object.
-        # pickled_list_sha256: str -> sha256 hash of the above pickled DataFrame list.
-
         if DataFrameHashUtils.mode != "baseline":
             raise ValueError(
                 "Refusing to create baseline of pickled DF and hash, while not in 'baseline' mode. "
@@ -1173,8 +1169,13 @@ def record_baseline(  # Was baseline_pickled_df_list_and_hash()
             filename_prefix, subdir=subdir
         )
 
+        # Safety check that we did not get two references to the same DataFrame in the list
         DataFrameHashUtils.ensure_unique_df_objects(dataframes)
 
+        # Structure here is:
+        # pickled_list: bytes -> created from an array of DataFrames. Pickled into a single bytes object.
+        # pickled_list_sha256: str -> sha256 hash of the above pickled DataFrame list.
+
         pickled_list: bytes = pickle.dumps(dataframes)
         pickled_list_sha256: str = hashlib.sha256(pickled_list).hexdigest()
 

From c429792ca7673f34e1719e73d23602450234c05e Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Thu, 5 Feb 2026 16:31:13 +0000
Subject: [PATCH 4/5] NPI-4453 restructure into single class for consistency

---
 gnssanalysis/gn_utils.py | 124 +++++++++++++++++++--------------------
 1 file changed, 62 insertions(+), 62 deletions(-)

diff --git a/gnssanalysis/gn_utils.py b/gnssanalysis/gn_utils.py
index b8301c5..53c1315 100644
--- a/gnssanalysis/gn_utils.py
+++ b/gnssanalysis/gn_utils.py
@@ -1298,7 +1298,7 @@ def verify(  # Was create_and_verify_pickled_df_list()
                 pickled_dfs = pickled_list_hash_file.read()
 
             # And print out diffs for them...
-            diff_pickled_dfs(pickled_dfs, dataframes)
+            DataFrameHashUtils.diff_pickled_dfs(pickled_dfs, dataframes)
 
             # Raise to ensure the test fails and this change / regression gets investigated
             raise ValueError("Dataframes did not match baseline. Please investigate using above diffs")
@@ -1306,71 +1306,71 @@ def verify(  # Was create_and_verify_pickled_df_list()
             _logging.debug(f"Hashes matched for '{pickled_list_path}': {pickled_list_sha256}")
             return True
 
+    @staticmethod
+    def diff_pickled_dfs(pickled_df_list: bytes, current_dfs_list: list[DataFrame]) -> None:
 
-def diff_pickled_dfs(pickled_df_list: bytes, current_dfs_list: list[DataFrame]) -> None:
-
-    # CAUTION: deserialising can present arbitrary code execution potential. Ensure the data passed in is trustworthy.
-    if DataFrameHashUtils.enable_unpickling != True:
-        raise ValueError(
-            "Cannot load baselined DataFrames from pickle for analysis as unpickling is off (default for security). "
-            "Temporarily set DataFrameHashUtils.enable_unpickling = True to allow deserialisation of old DFs from disk."
-        )
-    old_df_list: list[DataFrame] = pickle.loads(pickled_df_list)
+        # CAUTION: deserialising can present arbitrary code execution potential. Ensure the data passed in is trustworthy.
+        if DataFrameHashUtils.enable_unpickling != True:
+            raise ValueError(
+                "Cannot load baselined DataFrames from pickle for analysis as unpickling is off (default for security). "
+                "Temporarily set DataFrameHashUtils.enable_unpickling = True to allow deserialisation of old DFs from disk."
+            )
+        old_df_list: list[DataFrame] = pickle.loads(pickled_df_list)
 
-    old_length = len(old_df_list)
-    current_length = len(current_dfs_list)
-    if old_length != current_length:
-        raise ValueError(
-            f"Unpickled DataFrame list had {old_length} elements, " f"whereas the current one has {current_length}"
-        )
-    for i in range(current_length):
-        old_df = old_df_list[i]
-        current_df = current_dfs_list[i]
+        old_length = len(old_df_list)
+        current_length = len(current_dfs_list)
+        if old_length != current_length:
+            raise ValueError(
+                f"Unpickled DataFrame list had {old_length} elements, " f"whereas the current one has {current_length}"
+            )
+        for i in range(current_length):
+            old_df = old_df_list[i]
+            current_df = current_dfs_list[i]
 
-        _logging.info(f"Diffing DataFrame #{i}...")
+            _logging.info(f"Diffing DataFrame #{i}...")
 
-        # DF.equals() may be useful, but does not check that the row/column index datatypes are the same
-        _logging.info(f"DataFrame.equals(): {current_df.equals(old_df)}")
+            # DF.equals() may be useful, but does not check that the row/column index datatypes are the same
+            _logging.info(f"DataFrame.equals(): {current_df.equals(old_df)}")
 
-        try:
-            _logging.info(f"current_dataframe.compare(old_dataframe): {current_df.compare(old_df)}")
-        except ValueError:
-            _logging.info(
-                f"current_dataframe.compare(old_dataframe): FAILED! Indexes / columns likely differ. Running diff of those..."
-            )
-            diff_indexes_and_columns(old_df, current_df)
-
-
-def diff_indexes_and_columns(existing_df: DataFrame, current_df: DataFrame) -> None:
-    # Utility function to output diffs of DataFrame indexes and columns, as DataFrame.compare() will not run if
-    # they differ.
-
-    # Handle diffing of indexes
-    existing_df_index = existing_df.index.to_list()
-    current_df_index = current_df.index.to_list()
-    index_diff = set(existing_df_index).symmetric_difference(current_df_index)
-    if existing_df_index != current_df_index:
-        if len(index_diff) == 0:  # Diff must've been in order, not values
-            _logging.info("Indexes differed in order, but not values. Outputting full indexes:")
-            _logging.info(f"Existing DF indexes: {str(existing_df.index.to_list())}")
-            _logging.info(f"Current DF indexes: {str(current_df.index.to_list())}")
-        else:
-            _logging.info(f"The following index values are in one DF but not the other: {str(index_diff)}")
-
-    # Handle diffing of columns
-    existing_df_colums = existing_df.columns.to_list()
-    current_df_columns = current_df.columns.to_list()
-
-    column_diff = set(existing_df_colums).symmetric_difference(current_df_columns)
-    if existing_df_colums != current_df_columns:
-        if len(column_diff) == 0:  # Diff must've been in order, not values
-            _logging.info("Columns differed in order, but not values. Outputting full column listing:")
-            _logging.info(f"Existing DF columns: {str(existing_df.columns.to_list())}")
-            _logging.info(f"Current DF columns: {str(current_df.columns.to_list())}")
-        else:
-            _logging.info(f"The following column names are in one DF but not the other: {str(column_diff)}")
+            try:
+                _logging.info(f"current_dataframe.compare(old_dataframe): {current_df.compare(old_df)}")
+            except ValueError:
+                _logging.info(
+                    f"current_dataframe.compare(old_dataframe): FAILED! Indexes / columns likely differ. Running diff of those..."
+                )
+                DataFrameHashUtils.diff_indexes_and_columns(old_df, current_df)
 
+    @staticmethod
+    def diff_indexes_and_columns(existing_df: DataFrame, current_df: DataFrame) -> None:
+        # Utility function to output diffs of DataFrame indexes and columns, as DataFrame.compare() will not run if
+        # they differ.
+
+        # Handle diffing of indexes
+        existing_df_index = existing_df.index.to_list()
+        current_df_index = current_df.index.to_list()
+        index_diff = set(existing_df_index).symmetric_difference(current_df_index)
+        if existing_df_index != current_df_index:
+            if len(index_diff) == 0:  # Diff must've been in order, not values
+                _logging.info("Indexes differed in order, but not values. Outputting full indexes:")
+                _logging.info(f"Existing DF indexes: {str(existing_df.index.to_list())}")
+                _logging.info(f"Current DF indexes: {str(current_df.index.to_list())}")
+            else:
+                _logging.info(f"The following index values are in one DF but not the other: {str(index_diff)}")
+
+        # Handle diffing of columns
+        existing_df_colums = existing_df.columns.to_list()
+        current_df_columns = current_df.columns.to_list()
+
+        column_diff = set(existing_df_colums).symmetric_difference(current_df_columns)
+        if existing_df_colums != current_df_columns:
+            if len(column_diff) == 0:  # Diff must've been in order, not values
+                _logging.info("Columns differed in order, but not values. Outputting full column listing:")
+                _logging.info(f"Existing DF columns: {str(existing_df.columns.to_list())}")
+                _logging.info(f"Current DF columns: {str(current_df.columns.to_list())}")
+            else:
+                _logging.info(f"The following column names are in one DF but not the other: {str(column_diff)}")
 
-# NOTE: for aggregate tests, the revised multi-dataframe functions in PickleHashUtils are suggested
-def pickle_and_sha256(obj: object) -> str:
-    return hashlib.sha256(pickle.dumps(obj)).hexdigest()
+    # NOTE: for aggregate tests, the revised multi-dataframe functions above are suggested
+    @staticmethod
+    def pickle_and_sha256(obj: object) -> str:
+        return hashlib.sha256(pickle.dumps(obj)).hexdigest()

From ee4fae0f7dd4c39f6b356a39a6abbf4ebf5ba5e7 Mon Sep 17 00:00:00 2001
From: Nathan <95725385+treefern@users.noreply.github.com>
Date: Fri, 6 Feb 2026 11:19:14 +0000
Subject: [PATCH 5/5] NPI-4453 clean up, refactor and rename things to allow
 easy extention to support any object type, not just DataFrames

---
 gnssanalysis/gn_utils.py                      | 175 +++++++++++-------
 tests/test_utils.py                           |  77 ++++----
 ...st_duplicate_object_rejection.pickledlist} | Bin
 ...icate_object_rejection.pickledlist_sha256} |   0
 .../test_repeat_caller_rejection.pickledlist  | Bin
 ...repeat_caller_rejection.pickledlist_sha256 |   0
 ...t_verify_refusal_in_wrong_mode.pickledlist | Bin
 ...y_refusal_in_wrong_mode.pickledlist_sha256 |   0
 8 files changed, 142 insertions(+), 110 deletions(-)
 rename tests/{baseline_dataframe_records/TestDataFrameHashUtils/test_duplicate_df_rejection.pickledlist => unittest_baselines/TestUnitTestBaseliner/test_duplicate_object_rejection.pickledlist} (100%)
 rename tests/{baseline_dataframe_records/TestDataFrameHashUtils/test_duplicate_df_rejection.pickledlist_sha256 => unittest_baselines/TestUnitTestBaseliner/test_duplicate_object_rejection.pickledlist_sha256} (100%)
 rename tests/{baseline_dataframe_records/TestDataFrameHashUtils => unittest_baselines/TestUnitTestBaseliner}/test_repeat_caller_rejection.pickledlist (100%)
 rename tests/{baseline_dataframe_records/TestDataFrameHashUtils => unittest_baselines/TestUnitTestBaseliner}/test_repeat_caller_rejection.pickledlist_sha256 (100%)
 rename tests/{baseline_dataframe_records/TestDataFrameHashUtils => unittest_baselines/TestUnitTestBaseliner}/test_verify_refusal_in_wrong_mode.pickledlist (100%)
 rename tests/{baseline_dataframe_records/TestDataFrameHashUtils => unittest_baselines/TestUnitTestBaseliner}/test_verify_refusal_in_wrong_mode.pickledlist_sha256 (100%)

diff --git a/gnssanalysis/gn_utils.py b/gnssanalysis/gn_utils.py
index 53c1315..8abb9b1 100644
--- a/gnssanalysis/gn_utils.py
+++ b/gnssanalysis/gn_utils.py
@@ -16,8 +16,8 @@
 from gnssanalysis.enum_meta_properties import EnumMetaProperties
 
 # Two options, as a convenience feature to allow invoking from the project root or the tests subdir.
-BASELINE_DATAFRAME_RECORDS_DIR_ROOT_RELATIVE = _pathlib.Path("./tests/baseline_dataframe_records")
-BASELINE_DATAFRAME_RECORDS_DIR_TESTS_RELATIVE = _pathlib.Path("./baseline_dataframe_records")
+UNITTEST_BASELINE_FILES_ROOT_RELATIVE = _pathlib.Path("./tests/unittest_baselines")
+UNITTEST_BASELINE_FILES_TESTS_RELATIVE = _pathlib.Path("./unittest_baselines")
 
 
 class StrictMode(metaclass=EnumMetaProperties):
@@ -997,7 +997,14 @@ def __exit__(self, type, value, traceback):
             print(self.readout)
 
 
-class DataFrameHashUtils:
+def sha256(bytes_to_hash: bytes) -> str:
+    """
+    Convenience wrapper to quickly call hashlib.sha256 and return a hex digest string
+    """
+    return hashlib.sha256(bytes_to_hash).hexdigest()
+
+
+class UnitTestBaseliner:
 
     mode: Literal["baseline", "verify"] = "verify"
 
@@ -1013,22 +1020,20 @@ class DataFrameHashUtils:
     @staticmethod
     def get_paths_for_pickle_and_hash(
         filename_prefix: str,
-        # parent_dir: _pathlib.Path = BASELINE_DATAFRAME_RECORDS_DIR_ROOT_RELATIVE,
         subdir: Optional[_pathlib.Path] = None,
     ) -> tuple[_pathlib.Path, _pathlib.Path]:
 
         cwd: str = _pathlib.Path.cwd().as_posix()
-
         # The following is a quality of life feature, allowing test invocation from either:
         #  - the project root dir --> python -m unittest discover -v -s tests
         #  - the tests subdir     --> python -m unittest discover -v
         if cwd.endswith("/gnssanalysis"):
-            parent_dir = BASELINE_DATAFRAME_RECORDS_DIR_ROOT_RELATIVE
+            parent_dir = UNITTEST_BASELINE_FILES_ROOT_RELATIVE
         elif cwd.endswith("/gnssanalysis/tests"):
-            parent_dir = BASELINE_DATAFRAME_RECORDS_DIR_TESTS_RELATIVE
+            parent_dir = UNITTEST_BASELINE_FILES_TESTS_RELATIVE
         else:
             raise ValueError(
-                f"DataFrameHashUtils invoked in invalid workdir: '{cwd}'. "
+                f"UnitTestBaseliner invoked in invalid workdir: '{cwd}'. "
                 "It should be run within the top level gnssanalysis project dir (preferred), or the tests subdir"
             )
 
@@ -1062,8 +1067,8 @@ def get_grandparent_caller_id() -> tuple[str, str]:
         # something, the *caller* of which you want to know about... that would be frame -3, not frame -2.
 
         # The following depicts the typical frame structure of intended usage:
-        # TestClk.test_diff_clk() -> DataFrameHashUtils.create_and_verify_pickled_df_list() -> get_caller_names()
-        #         ^Frame -2                             ^Frame -1                              ^ current frame
+        # TestClk.test_diff_clk() -> UnitTestBaseliner.verify() -> get_caller_names()
+        #         ^Frame -2                            ^Frame -1   ^ current frame
         # We want the name of frame -2, our 'grandparent'.
 
         # Set up try block to ensure we delete the frame ref created by calling this function
@@ -1105,46 +1110,38 @@ def get_grandparent_caller_id() -> tuple[str, str]:
             # See doc here: https://docs.python.org/3/library/inspect.html#inspect.Traceback.positions
 
     @staticmethod
-    def ensure_unique_df_objects(dataframes: list[DataFrame]) -> None:
+    def ensure_unique_objects(objects: list[object]) -> None:
 
-        _logging.debug("Verifying no duplicate object references in DataFrame list to hash")
+        _logging.debug("Verifying no duplicate object references in object list to hash")
 
-        unique_addresses: set[int] = set([id(df) for df in dataframes])
+        unique_addresses: set[int] = set([id(obj) for obj in objects])
 
         addr_count = len(unique_addresses)
-        df_count = len(dataframes)
-        if len(unique_addresses) != len(dataframes):
+        obj_count = len(objects)
+        if addr_count != obj_count:
             raise ValueError(
-                f"Count of unique addresses ({addr_count}) didn't match length of dataframe list ({df_count}). "
-                "Two references to the same DF may have been passed, please investigate!"
+                f"Count of unique addresses ({addr_count}) didn't match length of object list ({obj_count}). "
+                "Two references to the same DF / other object may have been passed, please investigate!"
             )
 
     @staticmethod
-    def record_baseline(  # Was baseline_pickled_df_list_and_hash()
-        dataframes: list[DataFrame],
-        # parent_dir: _pathlib.Path = BASELINE_DATAFRAME_RECORDS_DIR_ROOT_RELATIVE,
-        # Used to differentiate between multiple sets of dataframes in a single test function
-        # TODO can't we just bundle them:
-        # TODO in any case we need to detect and throw an exception when the same function calls us twice in a run...
-        test_index: Optional[int] = None,
+    def create_baseline(  # Was baseline_pickled_df_list_and_hash()
+        current_object_list: list[object],
         # These are used to describe the calling class and function, and are inferred automatically. If needed they
         # can be explicitly set here:
         subdir: Optional[_pathlib.Path] = None,
         filename_prefix: Optional[str] = None,
     ) -> None:
 
-        if test_index is not None:
-            raise NotImplementedError()
-
-        if DataFrameHashUtils.mode != "baseline":
+        if UnitTestBaseliner.mode != "baseline":
             raise ValueError(
-                "Refusing to create baseline of pickled DF and hash, while not in 'baseline' mode. "
-                "Set DataframeHashUtils.mode = 'baseline' first"
+                "Refusing to create baseline of pickled DFs / objects and hash, while not in 'baseline' mode. "
+                "Set UnitTestBaseliner.mode = 'baseline' first"
             )
 
         if filename_prefix is None:
             # Try to determine filename prefix from class name and function which is calling us...
-            caller_class, caller_func = DataFrameHashUtils.get_grandparent_caller_id()
+            caller_class, caller_func = UnitTestBaseliner.get_grandparent_caller_id()
             _logging.debug(
                 f"No filename_prefix provided. "
                 f"Using grandparent class and func (found using frame inspection): {caller_class}, {caller_func}"
@@ -1158,25 +1155,34 @@ def record_baseline(  # Was baseline_pickled_df_list_and_hash()
 
         # Check if we've been called before by this class,function pair (i.e. caller_id).
         # If this is not our first call, continuing will overwrite previous results. So we raise.
-        if caller_id in DataFrameHashUtils.caller_record:
+        if caller_id in UnitTestBaseliner.caller_record:
             raise ValueError(
-                f"Multiple calls from '{caller_id}'! Please consolidate your dataframes and "
+                f"Multiple calls from '{caller_id}'! Please consolidate your dataframes / objects to verify, and "
                 "only pass one list per test function / filename_prefix."
             )
-        DataFrameHashUtils.caller_record.add(caller_id)
+        UnitTestBaseliner.caller_record.add(caller_id)
 
-        pickled_objects_path, aggregate_sha256_path = DataFrameHashUtils.get_paths_for_pickle_and_hash(
+        pickled_objects_path, aggregate_sha256_path = UnitTestBaseliner.get_paths_for_pickle_and_hash(
             filename_prefix, subdir=subdir
         )
 
-        # Safety check that we did not get two references to the same DataFrame in the list
-        DataFrameHashUtils.ensure_unique_df_objects(dataframes)
+        # Safety check that we did not get two references to the same DataFrame / object in the list
+        UnitTestBaseliner.ensure_unique_objects(current_object_list)
 
         # Structure here is:
-        # pickled_list: bytes -> created from an array of DataFrames. Pickled into a single bytes object.
-        # pickled_list_sha256: str -> sha256 hash of the above pickled DataFrame list.
+        # pickled_list: bytes -> created from an array of DataFrames / objects. Pickled into a single bytes object.
+        # pickled_list_sha256: str -> sha256 hash of the above pickled DataFrame / object list.
+
+        current_df_list: list[DataFrame] = [df for df in current_object_list if isinstance(df, DataFrame)]
+        if len(current_object_list) > len(current_df_list):
+            warnings.warn(
+                "Creating a unittest baseline containing objects other than DataFrames! This can be hash "
+                "verified, but verify() will crash if any changes are detected. Please implement support for "
+                "other required object types!"
+            )
+        # TODO other object support to be added here
 
-        pickled_list: bytes = pickle.dumps(dataframes)
+        pickled_list: bytes = pickle.dumps(current_object_list)
         pickled_list_sha256: str = hashlib.sha256(pickled_list).hexdigest()
 
         warnings.warn(
@@ -1197,7 +1203,7 @@ def record_baseline(  # Was baseline_pickled_df_list_and_hash()
 
     @staticmethod
     def verify(  # Was create_and_verify_pickled_df_list()
-        dataframes: list[DataFrame],
+        current_object_list: list[object],
         # parent_dir: _pathlib.Path = BASELINE_DATAFRAME_RECORDS_DIR_ROOT_RELATIVE,
         # Option to strictly enforce that a baseline must exist for anything this function is invoked to check:
         raise_for_missing_baseline: bool = False,
@@ -1215,27 +1221,26 @@ def verify(  # Was create_and_verify_pickled_df_list()
         # - False if baseline incomplete or missing (unable to verify). OR, if not running as mode != 'verify'
         # NOTE: Raises for verification failed.
 
-        if DataFrameHashUtils.mode != "verify":
+        if UnitTestBaseliner.mode != "verify":
 
             # TODO could change this to just politely state that it is skipping as in baseline mode. But we don't
             # want to leave things in baseline mode, so...? Is failing tests sufficient? Hopefully.
             if raise_rather_than_continue_for_incorrect_mode:
                 raise ValueError(
                     "Refusing to run verify method while not in verify mode. "
-                    "Set DataframeHashUtils.mode = 'verify' first"
+                    "Set UnitTestBaseliner.mode = 'verify' first"
                 )
             warnings.warn(
-                "Refusing to run verify method while not in verify mode. "
-                "Set DataframeHashUtils.mode = 'verify' first"
+                "Refusing to run verify method while not in verify mode. " "Set UnitTestBaseliner.mode = 'verify' first"
             )
             return False
 
         # Verify we didn't get passed multiple, overwritten copies of the same reference
-        DataFrameHashUtils.ensure_unique_df_objects(dataframes)
+        UnitTestBaseliner.ensure_unique_objects(current_object_list)
 
         if filename_prefix is None:
             # Try to determine filename prefix from class name and function which is calling us...
-            caller_class, caller_func = DataFrameHashUtils.get_grandparent_caller_id()
+            caller_class, caller_func = UnitTestBaseliner.get_grandparent_caller_id()
             _logging.debug(
                 f"No filename_prefix provided. "
                 f"Using grandparent class and func (found using frame inspection): {caller_class}, {caller_func}"
@@ -1248,31 +1253,31 @@ def verify(  # Was create_and_verify_pickled_df_list()
             caller_id = filename_prefix
 
         # Check if we've been called before by this class,function pair (i.e. caller_id).
-        if caller_id in DataFrameHashUtils.caller_record:
+        if caller_id in UnitTestBaseliner.caller_record:
             raise ValueError(
-                f"Multiple calls from '{caller_id}'! Please consolidate your dataframes and "
+                f"Multiple calls from '{caller_id}'! Please consolidate your dataframes / objects to validate, and "
                 "only pass one list per test function / filename_prefix."
             )
-        DataFrameHashUtils.caller_record.add(caller_id)
+        UnitTestBaseliner.caller_record.add(caller_id)
 
         # Determine paths on disk...
-        pickled_list_path, pickled_list_hash_path = DataFrameHashUtils.get_paths_for_pickle_and_hash(
+        pickled_list_path, pickled_list_hash_path = UnitTestBaseliner.get_paths_for_pickle_and_hash(
             filename_prefix, subdir=subdir
         )
 
-        # Check if pickled_df_list or hash exist on disk
+        # Check if pickled list or hash exist on disk
         pickle_exists = pickled_list_path.exists()
         hash_exists = pickled_list_hash_path.exists()
 
         if hash_exists == False:
             if raise_for_missing_baseline:
                 raise ValueError(
-                    f"Cannot verify DFs against baseline (hash file: {'present' if hash_exists else 'missing'}, "
+                    f"Cannot verify DFs / objects against baseline (hash file: {'present' if hash_exists else 'missing'}, "
                     f"pickled list file: {'present' if pickle_exists else 'missing'}) "
                     f"for '{caller_id}'."
                 )
             warnings.warn(
-                f"Cannot verify DFs against baseline (hash file: {'present' if hash_exists else 'missing'}, "
+                f"Cannot verify DFs / objects against baseline (hash file: {'present' if hash_exists else 'missing'}, "
                 f"pickled list file: {'present' if pickle_exists else 'missing'}) "
                 f"for '{caller_id}'."
             )
@@ -1286,36 +1291,62 @@ def verify(  # Was create_and_verify_pickled_df_list()
 
         # Data ready, now do comparison
         # Generate pickled list and aggregate hash
-        pickled_list = pickle.dumps(dataframes)
-        pickled_list_sha256 = hashlib.sha256(pickled_list).hexdigest()
+        pickled_list = pickle.dumps(current_object_list)
+        pickled_list_sha256 = sha256(pickled_list)
 
         if pickled_list_sha256 != expected_pickled_list_sha256:
             _logging.debug(
                 f"Hashes did not match for '{pickled_list_path}'. Expected: {expected_pickled_list_sha256} Actual: {pickled_list_sha256}"
             )
-            # Load old DataFrames (pickled list)...
+            # Load old DataFrames / other objects (pickled list)...
             with open(pickled_list_path, "rb") as pickled_list_hash_file:
-                pickled_dfs = pickled_list_hash_file.read()
+                pickled_list = pickled_list_hash_file.read()
+
+            # Unpickle if the safety is turned off
+            # CAUTION: deserialising can present arbitrary code execution potential. Ensure the data passed in is trustworthy.
+            if UnitTestBaseliner.enable_unpickling != True:
+                raise ValueError(
+                    "Cannot load baselined DataFrames / objects from pickle for analysis as unpickling is "
+                    "off (default for security). Temporarily set UnitTestBaseliner.enable_unpickling = True to "
+                    "allow deserialisation of old DFs / objects from disk."
+                )
+            warnings.warn(
+                "Unpickling object list from unittest baseline, to create diff with current results. This may "
+                "present a security risk, and should NOT be left enabled when not needed. Please ensure "
+                "UnitTestBaseliner.enable_unpickling defaults to False"
+            )
+            unpickled_object_list: list[object] = pickle.loads(pickled_list)
+
+            # Filter OLD (baseline) object list by datatype
+            old_df_list: list[DataFrame] = [df for df in unpickled_object_list if isinstance(df, DataFrame)]
+            if len(unpickled_object_list) > len(old_df_list):
+                raise NotImplementedError(
+                    "Outputting diffs for non-DataFrame objects during verification, is not yet supported"
+                )
+            # TODO filtering to extract other supported datatypes will go here in future, rather than the above exception
 
-            # And print out diffs for them...
-            DataFrameHashUtils.diff_pickled_dfs(pickled_dfs, dataframes)
+            # Filter NEW (being verified) object list by datatype
+            current_df_list: list[DataFrame] = [df for df in current_object_list if isinstance(df, DataFrame)]
+            if len(current_object_list) > len(current_df_list):
+                raise NotImplementedError(
+                    "Outputting diffs for non-DataFrame objects during verification, is not yet supported"
+                )
+            # TODO as above for OLD objects, filtering for NEW objects will go here
+
+            # And print out diffs for the DataFrames. This in turn calls the index and column diff
+            # utility, if dataframe.diff() raises.
+            UnitTestBaseliner.diff_dfs(old_df_list, current_df_list)
+
+            # TODO when adding other supported object types, calculate diffs for them here.
 
             # Raise to ensure the test fails and this change / regression gets investigated
-            raise ValueError("Dataframes did not match baseline. Please investigate using above diffs")
+            raise ValueError("Dataframes / objects did not match baseline. Please investigate using above diffs")
         else:
             _logging.debug(f"Hashes matched for '{pickled_list_path}': {pickled_list_sha256}")
             return True
 
     @staticmethod
-    def diff_pickled_dfs(pickled_df_list: bytes, current_dfs_list: list[DataFrame]) -> None:
-
-        # CAUTION: deserialising can present arbitrary code execution potential. Ensure the data passed in is trustworthy.
-        if DataFrameHashUtils.enable_unpickling != True:
-            raise ValueError(
-                "Cannot load baselined DataFrames from pickle for analysis as unpickling is off (default for security). "
-                "Temporarily set DataFrameHashUtils.enable_unpickling = True to allow deserialisation of old DFs from disk."
-            )
-        old_df_list: list[DataFrame] = pickle.loads(pickled_df_list)
+    def diff_dfs(old_df_list: list[DataFrame], current_dfs_list: list[DataFrame]) -> None:
 
         old_length = len(old_df_list)
         current_length = len(current_dfs_list)
@@ -1338,7 +1369,7 @@ def diff_pickled_dfs(pickled_df_list: bytes, current_dfs_list: list[DataFrame])
                 _logging.info(
                     f"current_dataframe.compare(old_dataframe): FAILED! Indexes / columns likely differ. Running diff of those..."
                 )
-                DataFrameHashUtils.diff_indexes_and_columns(old_df, current_df)
+                UnitTestBaseliner.diff_indexes_and_columns(old_df, current_df)
 
     @staticmethod
     def diff_indexes_and_columns(existing_df: DataFrame, current_df: DataFrame) -> None:
@@ -1373,4 +1404,4 @@ def diff_indexes_and_columns(existing_df: DataFrame, current_df: DataFrame) -> N
     # NOTE: for aggregate tests, the revised multi-dataframe functions above are suggested
     @staticmethod
     def pickle_and_sha256(obj: object) -> str:
-        return hashlib.sha256(pickle.dumps(obj)).hexdigest()
+        return sha256(pickle.dumps(obj))
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 6c8d9b3..6937de2 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -5,7 +5,7 @@
 from pyfakefs.fake_filesystem_unittest import TestCase
 from pathlib import Path
 
-from gnssanalysis.gn_utils import DataFrameHashUtils, delete_entire_directory
+from gnssanalysis.gn_utils import UnitTestBaseliner, delete_entire_directory
 import gnssanalysis.gn_utils as ga_utils
 
 
@@ -69,30 +69,30 @@ def test_configure_logging(self):
         self.assertEqual(logger_not_output, None)
 
 
-class TestDataFrameHashUtils(unittest.TestCase):
+class TestUnitTestBaseliner(unittest.TestCase):
 
     def test_verify_refusal_in_wrong_mode(self):
-        mode_backup = DataFrameHashUtils.mode
+        mode_backup = UnitTestBaseliner.mode
         try:
             df = DataFrame(["a", "b", "c"])
 
             # Baseline (do not commit uncommented!) Note: every function needs its own baseline, becuase the
             # function name determines the filename, unless we override that.
-            # DataFrameHashUtils.mode = "baseline"
-            # DataFrameHashUtils.record_baseline([df])
+            # UnitTestBaseliner.mode = "baseline"
+            # UnitTestBaseliner.record_baseline([df])
 
             # In baseline (write) mode, verify should be refused.
-            DataFrameHashUtils.mode = "baseline"
+            UnitTestBaseliner.mode = "baseline"
 
             with self.assertWarns(Warning) as warning_assessor:
                 self.assertFalse(
-                    DataFrameHashUtils.verify([df]),
-                    "DF list verification should not succeed in 'baseline' mode",
+                    UnitTestBaseliner.verify([df]),
+                    "DF / object list verification should not succeed in 'baseline' mode",
                 )
             # Ensure the expected warning, and only that warning, was raised
             captured_warnings = warning_assessor.warnings
             self.assertEqual(
-                "Refusing to run verify method while not in verify mode. Set DataframeHashUtils.mode = 'verify' first",
+                "Refusing to run verify method while not in verify mode. Set UnitTestBaseliner.mode = 'verify' first",
                 str(captured_warnings[0].message),
             )
             self.assertEqual(
@@ -102,14 +102,14 @@ def test_verify_refusal_in_wrong_mode(self):
             )
 
             # Should succeed in correct mode.
-            DataFrameHashUtils.mode = "verify"
+            UnitTestBaseliner.mode = "verify"
             self.assertTrue(
-                DataFrameHashUtils.verify([df]),
-                "DF list verification should succeed in 'verify' mode",
+                UnitTestBaseliner.verify([df]),
+                "DF / object list verification should succeed in 'verify' mode",
             )
         finally:
             # Ensure flag reset to avoid impacts on other tests (across the whole suite)
-            DataFrameHashUtils.mode = mode_backup
+            UnitTestBaseliner.mode = mode_backup
 
     def test_repeat_caller_rejection(self):
         # These functions determine what files to write/read baselines from, based on the identity of the (test)
@@ -117,7 +117,8 @@ def test_repeat_caller_rejection(self):
         # files* to be read/written for a different part of the unit test.
         # That would have the effect of:
         # - in write mode: overwriting the baseline file for a previous part of the test function.
-        # - in read mode: repeating verification of the same file against a different DF list (which would likely fail).
+        # - in read mode: repeating verification of the same file against a different DF / object list (which would
+        #   likely fail).
 
         # We're only testing it with the verify function below, but both verify and baseline functions use the same
         # caller check logic, and store the caller record statically in a class variable. ?
@@ -126,51 +127,51 @@ def test_repeat_caller_rejection(self):
 
         # Baseline (every function needs its own baseline, becuase the function name determines the filename,
         # unless we override that)
-        # DataFrameHashUtils.mode = "baseline"
-        # DataFrameHashUtils.record_baseline([df])
+        # UnitTestBaseliner.mode = "baseline"
+        # UnitTestBaseliner.record_baseline([df])
 
         self.assertTrue(
-            DataFrameHashUtils.verify([df]),
-            "DF list verification should succeed on *first* call from a function.",
+            UnitTestBaseliner.verify([df]),
+            "DF / object list verification should succeed on *first* call from a function.",
         )
         with self.assertRaises(ValueError):
-            DataFrameHashUtils.verify([df])
-            self.fail("DF list verification should fail on *second*/repeated calls from a function.")
+            UnitTestBaseliner.verify([df])
+            self.fail("DF / object list verification should fail on *second*/repeated calls from a function.")
 
-    def test_duplicate_df_rejection(self):
+    def test_duplicate_object_rejection(self):
 
-        # List to aggregate DFs for hashing
-        dfs_to_hash: list[DataFrame] = []
+        # List to aggregate DFs / objects for hashing
+        objects_to_hash: list[object] = []
 
         df = DataFrame(["a", "b", "c"])  # Let's call this Dataframe 'a'
-        dfs_to_hash.extend([df])
+        objects_to_hash.extend([df])
 
         # Overwrite local variable, as often happens in our unit tests
         df = DataFrame(["b", "c", "d"])  # Let's call this Dataframe 'b'
 
         # This might look questionable, but is ok, because we saved a reference to dataframe 'a' to the list,
         # before overwriting local var 'df' to point at dataframe 'b'.
-        dfs_to_hash.extend([df])
+        objects_to_hash.extend([df])
 
         # Baseline this test (this should only be committed commented out!)
-        # DataFrameHashUtils.mode = "baseline"
-        # DataFrameHashUtils.record_baseline(dfs_to_hash)
+        # UnitTestBaseliner.mode = "baseline"
+        # UnitTestBaseliner.record_baseline(dfs_to_hash)
 
         # Will return True if verification succeeded. False if baseline missing or mode != verify
         self.assertTrue(
-            DataFrameHashUtils.verify(dfs_to_hash),
-            "DF list verification should succeed here (unless baseline files are missing, or baselining has been turned on)",
+            UnitTestBaseliner.verify(objects_to_hash),
+            "DF / object list verification should succeed here (unless baseline files are missing, or baselining has been turned on)",
         )
 
         # The local variable df still points to the same DF, so now the list contains [a,b,b]. This should be an error.
-        dfs_to_hash.extend([df])
+        objects_to_hash.extend([df])
         with self.assertRaises(ValueError):
-            DataFrameHashUtils.verify(dfs_to_hash)
+            UnitTestBaseliner.verify(objects_to_hash)
 
     def test_caller_identity_fetch(self):
         def wrapper_function():
-            class_name, func_name = DataFrameHashUtils.get_grandparent_caller_id()
-            self.assertEqual(class_name, "TestDataFrameHashUtils")
+            class_name, func_name = UnitTestBaseliner.get_grandparent_caller_id()
+            self.assertEqual(class_name, "TestUnitTestBaseliner")
             self.assertEqual(func_name, "test_caller_identity_fetch")
 
         # We have to do this (create an extra stack frame) because the function looks for
@@ -187,8 +188,8 @@ def wrapper_function():
 
 #     os.chdir("./tests")
 
-#     df_hash_tests = TestDataFrameHashUtils()
-#     df_hash_tests.test_duplicate_df_rejection()
-#     df_hash_tests.test_verify_refusal_in_wrong_mode
-#     df_hash_tests.test_repeat_caller_rejection()
-#     df_hash_tests.test_caller_identity_fetch()
+#     baseliner_tests = TestUnitTestBaseliner()
+#     baseliner_tests.test_duplicate_object_rejection()
+#     baseliner_tests.test_verify_refusal_in_wrong_mode
+#     baseliner_tests.test_repeat_caller_rejection()
+#     baseliner_tests.test_caller_identity_fetch()
diff --git a/tests/baseline_dataframe_records/TestDataFrameHashUtils/test_duplicate_df_rejection.pickledlist b/tests/unittest_baselines/TestUnitTestBaseliner/test_duplicate_object_rejection.pickledlist
similarity index 100%
rename from tests/baseline_dataframe_records/TestDataFrameHashUtils/test_duplicate_df_rejection.pickledlist
rename to tests/unittest_baselines/TestUnitTestBaseliner/test_duplicate_object_rejection.pickledlist
diff --git a/tests/baseline_dataframe_records/TestDataFrameHashUtils/test_duplicate_df_rejection.pickledlist_sha256 b/tests/unittest_baselines/TestUnitTestBaseliner/test_duplicate_object_rejection.pickledlist_sha256
similarity index 100%
rename from tests/baseline_dataframe_records/TestDataFrameHashUtils/test_duplicate_df_rejection.pickledlist_sha256
rename to tests/unittest_baselines/TestUnitTestBaseliner/test_duplicate_object_rejection.pickledlist_sha256
diff --git a/tests/baseline_dataframe_records/TestDataFrameHashUtils/test_repeat_caller_rejection.pickledlist b/tests/unittest_baselines/TestUnitTestBaseliner/test_repeat_caller_rejection.pickledlist
similarity index 100%
rename from tests/baseline_dataframe_records/TestDataFrameHashUtils/test_repeat_caller_rejection.pickledlist
rename to tests/unittest_baselines/TestUnitTestBaseliner/test_repeat_caller_rejection.pickledlist
diff --git a/tests/baseline_dataframe_records/TestDataFrameHashUtils/test_repeat_caller_rejection.pickledlist_sha256 b/tests/unittest_baselines/TestUnitTestBaseliner/test_repeat_caller_rejection.pickledlist_sha256
similarity index 100%
rename from tests/baseline_dataframe_records/TestDataFrameHashUtils/test_repeat_caller_rejection.pickledlist_sha256
rename to tests/unittest_baselines/TestUnitTestBaseliner/test_repeat_caller_rejection.pickledlist_sha256
diff --git a/tests/baseline_dataframe_records/TestDataFrameHashUtils/test_verify_refusal_in_wrong_mode.pickledlist b/tests/unittest_baselines/TestUnitTestBaseliner/test_verify_refusal_in_wrong_mode.pickledlist
similarity index 100%
rename from tests/baseline_dataframe_records/TestDataFrameHashUtils/test_verify_refusal_in_wrong_mode.pickledlist
rename to tests/unittest_baselines/TestUnitTestBaseliner/test_verify_refusal_in_wrong_mode.pickledlist
diff --git a/tests/baseline_dataframe_records/TestDataFrameHashUtils/test_verify_refusal_in_wrong_mode.pickledlist_sha256 b/tests/unittest_baselines/TestUnitTestBaseliner/test_verify_refusal_in_wrong_mode.pickledlist_sha256
similarity index 100%
rename from tests/baseline_dataframe_records/TestDataFrameHashUtils/test_verify_refusal_in_wrong_mode.pickledlist_sha256
rename to tests/unittest_baselines/TestUnitTestBaseliner/test_verify_refusal_in_wrong_mode.pickledlist_sha256