diff --git a/Makefile.in b/Makefile.in index 2c4f120b..cca8d9cd 100644 --- a/Makefile.in +++ b/Makefile.in @@ -246,6 +246,7 @@ test_python: python env TEST_NAME=Test/test_cmor_frequency_required.py make test_a_python env TEST_NAME=Test/test_cmor_parent_attrs.py make test_a_python env TEST_NAME=Test/test_cmor_variable_attr_comment.py make test_a_python + env TEST_NAME=Test/test_cmor_cv_string_too_long.py make test_a_python test_cmip6_cv: python env TEST_NAME=Test/test_python_CMIP6_CV_sub_experimentnotset.py make test_a_python env TEST_NAME=Test/test_python_CMIP6_CV_sub_experimentbad.py make test_a_python diff --git a/Src/cmor.c b/Src/cmor.c index 0f550349..22f6f338 100644 --- a/Src/cmor.c +++ b/Src/cmor.c @@ -722,7 +722,7 @@ int cmor_have_NetCDF41min(void) } /************************************************************************/ -/* cmor_handle_error_internal() */ +/* cmor_handle_error_internal() */ /************************************************************************/ void cmor_handle_error_internal(char *error_msg, int level) { @@ -732,83 +732,68 @@ void cmor_handle_error_internal(char *error_msg, int level) if (output_logfile == NULL) output_logfile = stderr; +#ifdef COLOREDOUTPUT + int use_color = isatty(fileno(output_logfile)); + #define ANSI_COLOR(f, ...) do { if (use_color) fprintf(f, __VA_ARGS__); } while(0) +#else + #define ANSI_COLOR(f, ...) do {} while(0) +#endif + if (CMOR_VERBOSITY != CMOR_QUIET) { fprintf(output_logfile, "\n"); } + if (level == CMOR_WARNING) { cmor_nwarnings++; if (CMOR_VERBOSITY != CMOR_QUIET) { - -#ifdef COLOREDOUTPUT - fprintf(output_logfile, "%c[%d;%d;%dm", 0X1B, 2, 34, 47); -#endif - + ANSI_COLOR(output_logfile, "%c[%d;%d;%dm", 0x1B, 2, 34, 47); fprintf(output_logfile, "C Traceback:\nIn function: %s", cmor_traceback_info); - -#ifdef COLOREDOUTPUT - fprintf(output_logfile, "%c[%dm", 0X1B, 0); -#endif - + ANSI_COLOR(output_logfile, "%c[%dm", 0x1B, 0); fprintf(output_logfile, "\n\n"); - -#ifdef COLOREDOUTPUT - fprintf(output_logfile, "%c[%d;%d;%dm", 0X1B, 1, 34, 47); -#endif } } else { cmor_nerrors++; - -#ifdef COLOREDOUTPUT - fprintf(output_logfile, "%c[%d;%d;%dm", 0X1B, 2, 31, 47); -#endif - + ANSI_COLOR(output_logfile, "%c[%d;%d;%dm", 0x1B, 2, 31, 47); fprintf(output_logfile, "C Traceback:\n! In function: %s", cmor_traceback_info); - -#ifdef COLOREDOUTPUT - fprintf(output_logfile, "%c[%dm", 0X1B, 0); -#endif - + ANSI_COLOR(output_logfile, "%c[%dm", 0x1B, 0); fprintf(output_logfile, "\n\n"); - -#ifdef COLOREDOUTPUT - fprintf(output_logfile, "%c[%d;%d;%dm", 0X1B, 1, 31, 47); -#endif } - // fprintf(stderr, "%s ERROR LEVEL %d\n", error_msg, level); + if (CMOR_VERBOSITY != CMOR_QUIET || level != CMOR_WARNING) { - for (i = 0; i < 25; i++) { - fprintf(output_logfile, "!"); + /* Color the entire box: borders and message in one region */ + if (level == CMOR_WARNING) { + ANSI_COLOR(output_logfile, "%c[%d;%d;%dm", 0x1B, 1, 34, 47); + } else { + ANSI_COLOR(output_logfile, "%c[%d;%d;%dm", 0x1B, 1, 31, 47); } + + for (i = 0; i < 25; i++) + fprintf(output_logfile, "!"); fprintf(output_logfile, "\n"); fprintf(output_logfile, "!\n"); - + if (level == CMOR_WARNING) fprintf(output_logfile, "! Warning: %s\n", error_msg); else fprintf(output_logfile, "! Error: %s\n", error_msg); fprintf(output_logfile, "!\n"); - for (i = 0; i < 25; i++) fprintf(output_logfile, "!"); -#ifdef COLOREDOUTPUT - fprintf(output_logfile, "%c[%dm", 0X1B, 0); -#endif - + ANSI_COLOR(output_logfile, "%c[%dm", 0x1B, 0); fprintf(output_logfile, "\n\n"); } CV_ERROR = 1; if (level == CMOR_NOT_SETUP) { exit(1); - } if ((CMOR_MODE == CMOR_EXIT_ON_WARNING) || (level == CMOR_CRITICAL)) { - fflush(stdout); - fflush(output_logfile); + fflush(stdout); + fflush(output_logfile); kill(getpid(), SIGTERM); } fflush(output_logfile); diff --git a/Src/cmor_tables.c b/Src/cmor_tables.c index f80fa44e..66182555 100644 --- a/Src/cmor_tables.c +++ b/Src/cmor_tables.c @@ -1036,7 +1036,10 @@ int cmor_load_table_internal(char szTable[CMOR_MAX_STRING], int *table_id, done = 1; } else if (strncmp(key, JSON_KEY_CV_ENTRY, 2) == 0) { - cmor_validate_cv(value, NULL); + if (cmor_validate_cv(value, NULL) == TABLE_ERROR) { + cmor_pop_traceback(); + return (TABLE_ERROR); + } if (cmor_CV_set_entry(&cmor_tables[cmor_ntables], value) == 1) { cmor_pop_traceback(); @@ -1203,18 +1206,53 @@ int cmor_validate_json(json_object *json) /************************************************************************/ /* cmor_validate_cv() */ /************************************************************************/ -void cmor_validate_cv(json_object *cv, char *parent_attr) +int cmor_validate_cv(json_object *cv, char *parent_attr) { array_list *array; json_object *array_obj; - size_t length, i; + size_t length, str_len, i; int single_value_pairs; + size_t partial_len = 25; + char partial_str[partial_len]; + cmor_add_traceback("cmor_validate_cv"); json_object_object_foreach(cv, attr, value) { single_value_pairs = 0; + // String values must be 1023 or less characters long + str_len = strlen(attr); + if (str_len >= CMOR_MAX_STRING) { + strncpy(partial_str, attr, partial_len); + partial_str[partial_len - 1] = '\0'; + cmor_handle_error_variadic( + "Attribute \"%s...\" has a length of %d characters, " + "which exceeds the %d character limit.", + CMOR_CRITICAL, + partial_str, str_len, CMOR_MAX_STRING - 1); + cmor_pop_traceback(); + return TABLE_ERROR; + } + + if (json_object_is_type(value, json_type_string)) { + str_len = json_object_get_string_len(value); + if (str_len >= CMOR_MAX_STRING) { + strncpy(partial_str, + json_object_get_string(value), + partial_len); + partial_str[partial_len - 1] = '\0'; + cmor_handle_error_variadic( + "Attribute \"%s\" has value \"%s...\" " + "with a length of %d characters, which " + "exceeds the %d character limit.", + CMOR_CRITICAL, + attr, partial_str, str_len, CMOR_MAX_STRING - 1); + cmor_pop_traceback(); + return TABLE_ERROR; + } + } + if (parent_attr == NULL) { if (strcmp(attr, CV_KEY_BRANDING_TEMPLATE) == 0) { if (!json_object_is_type(value, json_type_string)) { @@ -1334,11 +1372,39 @@ void cmor_validate_cv(json_object *cv, char *parent_attr) CMOR_WARNING, attr); break; + } else { + str_len = json_object_get_string_len(array_obj); + if (str_len >= CMOR_MAX_STRING) { + strncpy(partial_str, + json_object_get_string(array_obj), + partial_len); + partial_str[partial_len - 1] = '\0'; + cmor_handle_error_variadic( + "Attribute \"%s\" has value \"%s...\" in its " + "array with a length of %d characters, which " + "exceeds the %d character limit.", + CMOR_CRITICAL, + attr, partial_str, str_len, CMOR_MAX_STRING - 1); + cmor_pop_traceback(); + return TABLE_ERROR; + } } } } else if (json_object_is_type(value, json_type_object)) { json_object_object_foreach(value, k, v) { - if (json_object_is_type(v, json_type_array)) { + str_len = strlen(k); + if (str_len >= CMOR_MAX_STRING) { + strncpy(partial_str, k, partial_len); + partial_str[partial_len - 1] = '\0'; + cmor_handle_error_variadic( + "Key value \"%s...\" in attribute \"%s\" " + "has a length of %d characters, which " + "exceeds the %d character limit.", + CMOR_CRITICAL, + partial_str, attr, str_len, CMOR_MAX_STRING - 1); + cmor_pop_traceback(); + return TABLE_ERROR; + } else if (json_object_is_type(v, json_type_array)) { cmor_handle_error_variadic( "Value for \"%s\" in attribute \"%s\" " "cannot be an array", @@ -1360,5 +1426,5 @@ void cmor_validate_cv(json_object *cv, char *parent_attr) } cmor_pop_traceback(); - return; + return TABLE_SUCCESS; } diff --git a/Test/test_cmor_cv_string_too_long.py b/Test/test_cmor_cv_string_too_long.py new file mode 100644 index 00000000..bb564212 --- /dev/null +++ b/Test/test_cmor_cv_string_too_long.py @@ -0,0 +1,175 @@ +import json +import cmor +import unittest +import os +from collections.abc import Mapping +from pathlib import Path + +from netCDF4 import Dataset +from base_CMIP6_CV import BaseCVsTest + +DATASET_INFO = { + "_AXIS_ENTRY_FILE": "Tables/CMIP6_coordinate.json", + "_FORMULA_VAR_FILE": "Tables/CMIP6_formula_terms.json", + "_controlled_vocabulary_file": "", + "activity_id": "CMIP", + "branch_method": "standard", + "branch_time_in_child": 30.0, + "branch_time_in_parent": 10800.0, + "calendar": "360_day", + "cv_version": "6.2.19.0", + "domain_id": "EUR-50", + "experiment": "AMIP", + "experiment_id": "amip", + "forcing_index": "3", + "further_info_url": "https://furtherinfo.es-doc.org/CMIP6.MOHC.HadGEM3-GC31-LL.amip.none.r1i1p1f3", + "grid": "N96", + "grid_label": "gn", + "initialization_index": "1", + "institution": "Met Office Hadley Centre, Fitzroy Road, Exeter, Devon, EX1 3PB, UK", + "institution_id": "MOHC", + "license": "CMIP6 model data produced by the Met Office Hadley Centre is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License (https://creativecommons.org/licenses). Consult https://pcmdi.llnl.gov/CMIP6/TermsOfUse for terms of use governing CMIP6 output, including citation requirements and proper acknowledgment. Further information about this data, including some limitations, can be found via the further_info_url (recorded as a global attribute in this file) and at https://ukesm.ac.uk/cmip6. The data producers and data providers make no warranty, either express or implied, including, but not limited to, warranties of merchantability and fitness for a particular purpose. All liabilities arising from the supply of the information (including any liability arising in negligence) are excluded to the fullest extent permitted by law.", + "mip_era": "CMIP6", + "nominal_resolution": "250 km", + "outpath": ".", + "parent_activity_id": "no parent", + "physics_index": "1", + "realization_index": "1", + "source": "HadGEM3-GC31-LL (2016): \naerosol: UKCA-GLOMAP-mode\natmos: MetUM-HadGEM3-GA7.1 (N96; 192 x 144 longitude/latitude; 85 levels; top level 85 km)\natmosChem: none\nland: JULES-HadGEM3-GL7.1\nlandIce: none\nocean: NEMO-HadGEM3-GO6.0 (eORCA1 tripolar primarily 1 deg with meridional refinement down to 1/3 degree in the tropics; 360 x 330 longitude/latitude; 75 levels; top grid cell 0-1 m)\nocnBgchem: none\nseaIce: CICE-HadGEM3-GSI8 (eORCA1 tripolar primarily 1 deg; 360 x 330 longitude/latitude)", + "source_id": "HadGEM3-GC31-LL", + "source_type": "AGCM", + "sub_experiment": "none", + "sub_experiment_id": "none", + "tracking_prefix": "hdl:21.14100", + "variant_label": "r1i1p1f3" +} + + +def deep_update(source, overrides): + """Recursively merges overrides into source dictionary.""" + for key, value in overrides.items(): + if isinstance(value, Mapping): + # If the value is a dictionary, dive deeper + source[key] = deep_update(source.get(key, {}), value) + else: + # Otherwise, override or add the value + source[key] = value + return source + + +class TestCVStringTooLong(BaseCVsTest): + def setUp(self): + """ + Write out a simple file using CMOR + """ + super().setUp() + + self.cv_file = Path("Test/CMIP6_CV_nested_attribute.json") + self.user_input_file = Path("Test/input_nested_attribute.json") + + # Set up CMOR + cmor.setup(inpath="Tables", netcdf_file_action=cmor.CMOR_REPLACE, + logfile=self.tmpfile, create_subdirectories=0) + + # Add 'mip_era' that is longer than 1023 characters + mip_era_too_long = "x" * 1024 + with open("Tables/CMIP6_CV.json", "r") as cv_infile: + cv = json.load(cv_infile) + cv["CV"]["mip_era"] = mip_era_too_long + with open(self.cv_file, "w") as cv_outfile: + json.dump(cv, cv_outfile, sort_keys=True, indent=4) + + # Define dataset using DATASET_INFO + with open(self.user_input_file, "w") as input_file: + user_input = DATASET_INFO.copy() + user_input["_controlled_vocabulary_file"] = str(self.cv_file) + json.dump(user_input, input_file, sort_keys=True, indent=4) + + def tearDown(self): + super().tearDown() + + self.cv_file.unlink() + self.user_input_file.unlink() + + def setup_cv(self, update_values: dict): + + # Update values in CV file + with open("Tables/CMIP6_CV.json", "r") as cv_infile: + cv = json.load(cv_infile) + deep_update(cv["CV"], update_values) + with open(self.cv_file, "w") as cv_outfile: + json.dump(cv, cv_outfile, sort_keys=True, indent=4) + + # Define dataset using DATASET_INFO + with open(self.user_input_file, "w") as input_file: + user_input = DATASET_INFO.copy() + user_input["_controlled_vocabulary_file"] = str(self.cv_file) + json.dump(user_input, input_file, sort_keys=True, indent=4) + + # read dataset info + error_flag = cmor.dataset_json(str(self.user_input_file)) + if error_flag: + raise RuntimeError("CMOR dataset_json call failed") + + def test_mip_era_too_long(self): + attr = "mip_era" + length = 1024 + value = "x" * length + self.setup_cv({attr: value}) + + mip_table = "CMIP6_Omon.json" + with self.assertRaises(cmor.CMORError): + _ = cmor.load_table(mip_table) + + self.assertCV( + f"Attribute \"{attr}\" has value \"{value[:24]}...\" with a length of " + f"{length} characters, which exceeds the 1023 character limit." + ) + + def test_nominal_resolution_too_long(self): + attr = "nominal_resolution" + length = 2000 + value = "x" * length + self.setup_cv({attr: [value]}) + + mip_table = "CMIP6_Omon.json" + with self.assertRaises(cmor.CMORError): + _ = cmor.load_table(mip_table) + + self.assertCV( + f"Attribute \"{attr}\" has value \"{value[:24]}...\" in its array with " + f"a length of {length} characters, which exceeds the 1023 character limit." + ) + + def test_activity_id_too_long(self): + attr = "activity_id" + length = 1080 + value = "x" * length + self.setup_cv({attr: {value: "Key is too long."}}) + + mip_table = "CMIP6_Omon.json" + with self.assertRaises(cmor.CMORError): + _ = cmor.load_table(mip_table) + + self.assertCV( + f"Key value \"{value[:24]}...\" in attribute \"{attr}\" has " + f"a length of {length} characters, which exceeds the 1023 character limit." + ) + + def test_attribute_name_too_long(self): + length = 3000 + attr = "x" * length + self.setup_cv({attr: "Attribute name is too long."}) + + mip_table = "CMIP6_Omon.json" + with self.assertRaises(cmor.CMORError): + _ = cmor.load_table(mip_table) + + self.assertCV( + f"Attribute \"{attr[:24]}...\" has a length of {length} characters, " + "which exceeds the 1023 character limit." + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/ci-support/test-wheel.sh b/ci-support/test-wheel.sh index 8b02c26e..1208c5be 100755 --- a/ci-support/test-wheel.sh +++ b/ci-support/test-wheel.sh @@ -212,6 +212,7 @@ wheel_python_tests=( "Test/test_cmor_frequency_required.py" "Test/test_cmor_parent_attrs.py" "Test/test_cmor_variable_attr_comment.py" + "Test/test_cmor_cv_string_too_long.py" "Test/test_python_CMIP6_CV_sub_experimentnotset.py" "Test/test_python_CMIP6_CV_sub_experimentbad.py" "Test/test_python_CMIP6_CV_furtherinfourl.py" diff --git a/cmip7-cmor-tables b/cmip7-cmor-tables index 37232b1b..81cdc464 160000 --- a/cmip7-cmor-tables +++ b/cmip7-cmor-tables @@ -1 +1 @@ -Subproject commit 37232b1b2a53d6ccf5494bc6a2c4bfca8b6374f8 +Subproject commit 81cdc4647a9ca50e0d8617d58ce911432e62300d diff --git a/include/cmor_func_def.h b/include/cmor_func_def.h index 8d79ee10..8658e1ed 100644 --- a/include/cmor_func_def.h +++ b/include/cmor_func_def.h @@ -311,7 +311,7 @@ extern int cmor_load_table_internal( char table[CMOR_MAX_STRING], extern int cmor_search_table( char szTable[CMOR_MAX_STRING], int *table_id); extern int cmor_validate_json(json_object *json); -extern void cmor_validate_cv(json_object *json, char *parent_attr); +extern int cmor_validate_cv(json_object *json, char *parent_attr); extern json_object *cmor_open_inpathFile( char *szFilename);