-
Notifications
You must be signed in to change notification settings - Fork 14
NPI-4453 Framework for DataFrame hashing & test baselining #110
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
89ae8b9
17a0b97
5d94a26
c429792
ee4fae0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,8 +1,11 @@ | ||
| import logging | ||
| import os | ||
| import unittest | ||
| from pandas import DataFrame | ||
| from pyfakefs.fake_filesystem_unittest import TestCase | ||
| from pathlib import Path | ||
|
|
||
| from gnssanalysis.gn_utils import delete_entire_directory | ||
| from gnssanalysis.gn_utils import UnitTestBaseliner, delete_entire_directory | ||
| import gnssanalysis.gn_utils as ga_utils | ||
|
|
||
|
|
||
|
|
@@ -64,3 +67,129 @@ def test_configure_logging(self): | |
|
|
||
| # Verify | ||
| self.assertEqual(logger_not_output, None) | ||
|
|
||
|
|
||
| class TestUnitTestBaseliner(unittest.TestCase): | ||
|
|
||
| def test_verify_refusal_in_wrong_mode(self): | ||
| mode_backup = UnitTestBaseliner.mode | ||
| try: | ||
| df = DataFrame(["a", "b", "c"]) | ||
|
|
||
| # Baseline (do not commit uncommented!) Note: every function needs its own baseline, becuase the | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo: |
||
| # function name determines the filename, unless we override that. | ||
| # UnitTestBaseliner.mode = "baseline" | ||
| # UnitTestBaseliner.record_baseline([df]) | ||
|
|
||
| # In baseline (write) mode, verify should be refused. | ||
| UnitTestBaseliner.mode = "baseline" | ||
|
|
||
| with self.assertWarns(Warning) as warning_assessor: | ||
| self.assertFalse( | ||
| UnitTestBaseliner.verify([df]), | ||
| "DF / object list verification should not succeed in 'baseline' mode", | ||
| ) | ||
| # Ensure the expected warning, and only that warning, was raised | ||
| captured_warnings = warning_assessor.warnings | ||
| self.assertEqual( | ||
| "Refusing to run verify method while not in verify mode. Set UnitTestBaseliner.mode = 'verify' first", | ||
| str(captured_warnings[0].message), | ||
| ) | ||
| self.assertEqual( | ||
| len(captured_warnings), | ||
| 1, | ||
| "Expected exactly 1 warning. Check what other warnings are being raised!", | ||
| ) | ||
|
|
||
| # Should succeed in correct mode. | ||
| UnitTestBaseliner.mode = "verify" | ||
| self.assertTrue( | ||
| UnitTestBaseliner.verify([df]), | ||
| "DF / object list verification should succeed in 'verify' mode", | ||
| ) | ||
| finally: | ||
| # Ensure flag reset to avoid impacts on other tests (across the whole suite) | ||
| UnitTestBaseliner.mode = mode_backup | ||
|
|
||
| def test_repeat_caller_rejection(self): | ||
| # These functions determine what files to write/read baselines from, based on the identity of the (test) | ||
| # function that called them. Therefore, calling twice from the same function would cause the *same baseline | ||
| # files* to be read/written for a different part of the unit test. | ||
| # That would have the effect of: | ||
| # - in write mode: overwriting the baseline file for a previous part of the test function. | ||
| # - in read mode: repeating verification of the same file against a different DF / object list (which would | ||
| # likely fail). | ||
|
|
||
| # We're only testing it with the verify function below, but both verify and baseline functions use the same | ||
| # caller check logic, and store the caller record statically in a class variable. ? | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the question mark here for a reason?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wasn't sure on the specific terminology. Checked and updated with a clarification. |
||
|
|
||
| df = DataFrame(["a", "b", "c"]) | ||
|
|
||
| # Baseline (every function needs its own baseline, becuase the function name determines the filename, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo: |
||
| # unless we override that) | ||
| # UnitTestBaseliner.mode = "baseline" | ||
| # UnitTestBaseliner.record_baseline([df]) | ||
|
|
||
| self.assertTrue( | ||
| UnitTestBaseliner.verify([df]), | ||
| "DF / object list verification should succeed on *first* call from a function.", | ||
| ) | ||
| with self.assertRaises(ValueError): | ||
| UnitTestBaseliner.verify([df]) | ||
| self.fail("DF / object list verification should fail on *second*/repeated calls from a function.") | ||
|
|
||
| def test_duplicate_object_rejection(self): | ||
|
|
||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No description of the test here, whereas the other previous has an intro to what is being tested |
||
| # List to aggregate DFs / objects for hashing | ||
| objects_to_hash: list[object] = [] | ||
|
|
||
| df = DataFrame(["a", "b", "c"]) # Let's call this Dataframe 'a' | ||
| objects_to_hash.extend([df]) | ||
|
|
||
| # Overwrite local variable, as often happens in our unit tests | ||
| df = DataFrame(["b", "c", "d"]) # Let's call this Dataframe 'b' | ||
|
|
||
| # This might look questionable, but is ok, because we saved a reference to dataframe 'a' to the list, | ||
| # before overwriting local var 'df' to point at dataframe 'b'. | ||
| objects_to_hash.extend([df]) | ||
|
|
||
| # Baseline this test (this should only be committed commented out!) | ||
| # UnitTestBaseliner.mode = "baseline" | ||
| # UnitTestBaseliner.record_baseline(dfs_to_hash) | ||
|
|
||
| # Will return True if verification succeeded. False if baseline missing or mode != verify | ||
| self.assertTrue( | ||
| UnitTestBaseliner.verify(objects_to_hash), | ||
| "DF / object list verification should succeed here (unless baseline files are missing, or baselining has been turned on)", | ||
| ) | ||
|
|
||
| # The local variable df still points to the same DF, so now the list contains [a,b,b]. This should be an error. | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure where the dataframe
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That was intended to be shorthand for the different DataFrame objects, rather than their content. |
||
| objects_to_hash.extend([df]) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are trying to add the same dataframe here to
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep. This checks for duplicate references to the same objects at the top level (as a safety check). It's not recursive, but the top-level check is arguably the most important. |
||
| with self.assertRaises(ValueError): | ||
| UnitTestBaseliner.verify(objects_to_hash) | ||
|
|
||
| def test_caller_identity_fetch(self): | ||
| def wrapper_function(): | ||
| class_name, func_name = UnitTestBaseliner.get_grandparent_caller_id() | ||
| self.assertEqual(class_name, "TestUnitTestBaseliner") | ||
| self.assertEqual(func_name, "test_caller_identity_fetch") | ||
|
|
||
| # We have to do this (create an extra stack frame) because the function looks for | ||
| # the *grandparent* caller, not parent caller. | ||
| wrapper_function() | ||
|
|
||
|
|
||
| # For use with debugger | ||
| # if __name__ == "__main__": | ||
|
|
||
| # logging.basicConfig(format="%(levelname)s: %(message)s") | ||
| # logger = logging.getLogger() | ||
| # logger.setLevel(logging.DEBUG) | ||
|
|
||
| # os.chdir("./tests") | ||
|
|
||
| # baseliner_tests = TestUnitTestBaseliner() | ||
| # baseliner_tests.test_duplicate_object_rejection() | ||
| # baseliner_tests.test_verify_refusal_in_wrong_mode | ||
| # baseliner_tests.test_repeat_caller_rejection() | ||
| # baseliner_tests.test_caller_identity_fetch() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| 6b5020201b08f64a2e7412422e03f94a6e7b0479f3a69a792967cec80b17a08b |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| 1b369c0e1d2ee74b36b233cb0655cc5e0158b334ec0757f546d5e45e6d05d58e |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| 1b369c0e1d2ee74b36b233cb0655cc5e0158b334ec0757f546d5e45e6d05d58e |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Short intro to what is being tested would be good here (kinda implied in the name, but could be good to add in)