From 245a9cd001d23a10ccdff06671aceb4d2ecdbeb9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Apr 2026 13:46:51 +0000 Subject: [PATCH 1/9] Add autoload subcommand to thesaurus management command, task in tasks.py, and entrypoint call Agent-Logs-Url: https://github.com/GeoNode/geonode/sessions/9a26ad60-2d0f-4255-861e-2bd45a3dee10 Co-authored-by: etj <717359+etj@users.noreply.github.com> --- entrypoint.sh | 1 + geonode/base/management/commands/thesaurus.py | 34 +++++- geonode/tests/test_autoload_thesaurus.py | 112 ++++++++++++++++++ tasks.py | 9 ++ 4 files changed, 154 insertions(+), 2 deletions(-) create mode 100644 geonode/tests/test_autoload_thesaurus.py diff --git a/entrypoint.sh b/entrypoint.sh index ed9469efac2..18c8ab5269f 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -56,6 +56,7 @@ else fi invoke statics + invoke loadthesauri echo "Executing UWSGI server $cmd for Production" fi diff --git a/geonode/base/management/commands/thesaurus.py b/geonode/base/management/commands/thesaurus.py index 95802eaf451..0aa561a345c 100644 --- a/geonode/base/management/commands/thesaurus.py +++ b/geonode/base/management/commands/thesaurus.py @@ -1,3 +1,6 @@ +import os + +from django.apps import apps from django.conf import settings from django.core.management.base import BaseCommand, CommandError @@ -9,14 +12,15 @@ DUMP_FORMAT_SORTED, ) from geonode.base.management.commands.thesaurus_subcommands.list import list_thesauri -from geonode.base.management.commands.thesaurus_subcommands.load import load_thesaurus, ACTIONS, ACTION_CREATE +from geonode.base.management.commands.thesaurus_subcommands.load import load_thesaurus, ACTIONS, ACTION_CREATE, ACTION_UPDATE logger = setup_logger() COMMAND_LIST = "list" COMMAND_DUMP = "dump" COMMAND_LOAD = "load" -COMMANDS = [COMMAND_LIST, COMMAND_LOAD, COMMAND_DUMP] +COMMAND_AUTOLOAD = "autoload" +COMMANDS = [COMMAND_LIST, COMMAND_LOAD, COMMAND_DUMP, COMMAND_AUTOLOAD] class Command(BaseCommand): @@ -109,5 +113,31 @@ def handle(self, *args, **options): load_thesaurus(input_file, identifier, action) + elif subcommand == COMMAND_AUTOLOAD: + autoload_thesauri() + else: raise CommandError(f"Unknown subcommand: {subcommand}") + + +def autoload_thesauri(): + """ + Discover and load all thesauri (.rdf files) found in a `thesauri/` directory + within each installed Django app. Uses the `update` action so existing entries + are updated and new ones are created without duplicates. + """ + loaded = 0 + for app_config in apps.get_app_configs(): + thesauri_dir = os.path.join(app_config.path, "thesauri") + if not os.path.isdir(thesauri_dir): + continue + rdf_files = [f for f in os.listdir(thesauri_dir) if f.lower().endswith(".rdf")] + for rdf_file in sorted(rdf_files): + rdf_path = os.path.join(thesauri_dir, rdf_file) + logger.info(f"Autoloading thesaurus from app '{app_config.name}': {rdf_path}") + try: + load_thesaurus(rdf_path, identifier=None, action=ACTION_UPDATE) + loaded += 1 + except Exception as e: + logger.error(f"Failed to load thesaurus '{rdf_path}': {e}") + logger.info(f"Autoload complete: {loaded} thesaurus file(s) loaded.") diff --git a/geonode/tests/test_autoload_thesaurus.py b/geonode/tests/test_autoload_thesaurus.py new file mode 100644 index 00000000000..6b9013fc001 --- /dev/null +++ b/geonode/tests/test_autoload_thesaurus.py @@ -0,0 +1,112 @@ +######################################################################### +# +# Copyright (C) 2016 OSGeo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +######################################################################### + +import os +import shutil +import tempfile +from unittest.mock import patch, MagicMock + +from django.test import TestCase + +from geonode.base.management.commands.thesaurus import autoload_thesauri +from geonode.base.models import Thesaurus + + +RDF_CONTENT = """\ + + + + Autoload Test Thesaurus + 2024-01-01 + + + Concept One + + + +""" + + +class TestAutoloadThesauri(TestCase): + def setUp(self): + self.tmp_dir = tempfile.mkdtemp() + self.thesauri_dir = os.path.join(self.tmp_dir, "thesauri") + os.makedirs(self.thesauri_dir) + self.rdf_file = os.path.join(self.thesauri_dir, "autoload_test.rdf") + with open(self.rdf_file, "w", encoding="utf-8") as f: + f.write(RDF_CONTENT) + + def tearDown(self): + shutil.rmtree(self.tmp_dir, ignore_errors=True) + + def _make_app_config(self, name, path): + app_config = MagicMock() + app_config.name = name + app_config.path = path + return app_config + + def test_autoload_loads_rdf_files_from_thesauri_dirs(self): + """autoload_thesauri should load .rdf files found in thesauri/ dirs of installed apps.""" + app_configs = [self._make_app_config("fake_app", self.tmp_dir)] + with patch("geonode.base.management.commands.thesaurus.apps.get_app_configs", return_value=app_configs): + autoload_thesauri() + + self.assertTrue( + Thesaurus.objects.filter(about="http://example.com/autoload-test-scheme").exists(), + "The thesaurus should have been loaded from the app's thesauri/ directory", + ) + + def test_autoload_is_idempotent(self): + """Calling autoload_thesauri twice should not create duplicate thesauri (uses update action).""" + app_configs = [self._make_app_config("fake_app", self.tmp_dir)] + with patch("geonode.base.management.commands.thesaurus.apps.get_app_configs", return_value=app_configs): + autoload_thesauri() + autoload_thesauri() + + count = Thesaurus.objects.filter(about="http://example.com/autoload-test-scheme").count() + self.assertEqual(1, count, "Running autoload twice should not create duplicate thesauri") + + def test_autoload_skips_apps_without_thesauri_dir(self): + """autoload_thesauri should silently skip apps that have no thesauri/ directory.""" + app_without_thesauri = self._make_app_config("no_thesauri_app", self.tmp_dir.rstrip("/") + "_no_dir") + app_configs = [app_without_thesauri] + with patch("geonode.base.management.commands.thesaurus.apps.get_app_configs", return_value=app_configs): + # Should not raise + autoload_thesauri() + + self.assertEqual(0, Thesaurus.objects.count(), "No thesauri should be loaded when thesauri/ dir is absent") + + def test_autoload_continues_after_error(self): + """autoload_thesauri should continue loading other files if one fails.""" + bad_rdf = os.path.join(self.thesauri_dir, "aaaa_bad.rdf") + with open(bad_rdf, "w") as f: + f.write("THIS IS NOT VALID RDF") + + app_configs = [self._make_app_config("fake_app", self.tmp_dir)] + with patch("geonode.base.management.commands.thesaurus.apps.get_app_configs", return_value=app_configs): + # Should not raise despite the bad file + autoload_thesauri() + + # The valid thesaurus (sorted after the bad one alphabetically: autoload_test > aaaa_bad) should still load + self.assertTrue( + Thesaurus.objects.filter(about="http://example.com/autoload-test-scheme").exists(), + "Valid thesaurus should be loaded even if another file in the same dir fails", + ) diff --git a/tasks.py b/tasks.py index 4f5585ec6aa..86513d3380b 100755 --- a/tasks.py +++ b/tasks.py @@ -392,6 +392,15 @@ def updateadmin(ctx): ) +@task +def loadthesauri(ctx): + print("**************************thesauri*******************************") + ctx.run( + f"python manage.py thesaurus autoload --settings={_localsettings()}", + pty=True, + ) + + @task def collectmetrics(ctx): print("************************collect metrics******************************") From 0bb7e73047757a6beefcad6706649eed2a7e0da2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Apr 2026 13:56:35 +0000 Subject: [PATCH 2/9] Add documentation for thesaurus autoload command and boot-time thesauri loading Agent-Logs-Url: https://github.com/GeoNode/geonode/sessions/50827a39-23e0-47b0-9269-7c87c0ed4434 Co-authored-by: etj <717359+etj@users.noreply.github.com> --- docs/src/admin/thesauri/thesauri.md | 65 ++++++++++++++++++- .../docker/vanilla-docker-installation.md | 9 +++ 2 files changed, 71 insertions(+), 3 deletions(-) diff --git a/docs/src/admin/thesauri/thesauri.md b/docs/src/admin/thesauri/thesauri.md index 59f0577105d..74f3996deee 100644 --- a/docs/src/admin/thesauri/thesauri.md +++ b/docs/src/admin/thesauri/thesauri.md @@ -92,6 +92,7 @@ GeoNode provides a single command (``thesaurus``) with multiple actions: * ``list``: list existing thesauri * ``load``: load a RDF file * ``dump``: dump a thesaurus into a file +* ``autoload``: automatically discover and load all thesauri shipped by installed apps .. code-block:: @@ -102,12 +103,13 @@ GeoNode provides a single command (``thesaurus``) with multiple actions: [--format {json-ld,n3,nt,pretty-xml,sorted-xml,trig,ttl,xml}] [--default-lang LANG] [--version] [-v {0,1,2,3}] [--settings SETTINGS] [--pythonpath PYTHONPATH] [--traceback] [--no-color] [--force-color] [--skip-checks] - [{list,load,dump}] + [{list,load,dump,autoload}] - Handles thesaurus commands ['list', 'load', 'dump'] + Handles thesaurus commands ['list', 'load', 'dump', 'autoload'] positional arguments: - {list,load,dump} thesaurus operation to run + {list,load,dump,autoload} + thesaurus operation to run options: -h, --help show this help message and exit @@ -227,6 +229,63 @@ In order to only export the entries we edited, we'll issue the command:: python manage.py thesaurus dump -i labels-i18n --include "proj1_*" --include "*_ovr" -f labels-i18n.proj1.rdf +### Auto-loading thesauri: ``thesaurus autoload`` + +The ``autoload`` subcommand scans every installed Django app for a ``thesauri/`` directory +at the top level of the app package, then loads all ``.rdf`` files it finds there. +This is how GeoNode and third-party apps can ship thesauri that are loaded automatically at start-up. + +```bash +python manage.py thesaurus autoload +``` + +For each ``.rdf`` file discovered, the command runs the equivalent of ``thesaurus load --action update``, +so the operation is **idempotent**: running it multiple times will not create duplicates; instead, +existing records are updated and missing ones are created. + +**Convention for app-provided thesauri** + +Place one or more ``.rdf`` files inside a ``thesauri/`` directory at the root of your app package: + +``` +my_geonode_app/ + thesauri/ + my_vocabulary.rdf + another_vocab.rdf + models.py + ... +``` + +All ``.rdf`` files in that directory are picked up automatically whenever ``thesaurus autoload`` +(or ``invoke loadthesauri``) is executed. + +!!! note + The ``autoload`` command is automatically run during GeoNode's Docker container start-up sequence (see [Initialization at boot](#initialization-at-boot)). + + +## Initialization at boot { #initialization-at-boot } + +When GeoNode starts (e.g. via the Docker entrypoint), the following initialization steps are executed in order: + +1. **Database migrations** – applies any pending schema migrations. +2. **Fixtures** – loads default OAuth2 apps, admin user, and site data (only on first boot or when ``FORCE_REINIT=true``). +3. **Static files** – collects static assets. +4. **Thesauri autoload** – runs ``thesaurus autoload`` to load or update all ``.rdf`` files found in any installed app's ``thesauri/`` directory. This step runs on **every** boot so that thesaurus updates shipped with an upgraded app are applied automatically. + +To run the thesaurus autoload step manually: + +```bash +# Inside the GeoNode container +python manage.py thesaurus autoload +``` + +Or using the invoke task: + +```bash +invoke loadthesauri +``` + + ## Configuring a Thesaurus diff --git a/docs/src/setup/docker/vanilla-docker-installation.md b/docs/src/setup/docker/vanilla-docker-installation.md index 5136c1a2d0d..bef557550a8 100644 --- a/docs/src/setup/docker/vanilla-docker-installation.md +++ b/docs/src/setup/docker/vanilla-docker-installation.md @@ -81,6 +81,15 @@ Executing UWSGI server uwsgi --ini /usr/src/app/uwsgi.ini for Production [uWSGI] getting INI configuration from /usr/src/app/uwsgi.ini ``` +The container performs these initialization steps before starting the application server: + +1. **Database migrations** – applies any pending schema migrations. +2. **Fixtures** – loads default OAuth2 apps, admin user and site data (only on first boot or when ``FORCE_REINIT=true``). +3. **Static files** – collects static assets. +4. **Thesauri autoload** – scans all installed apps for a ``thesauri/`` directory and loads (or updates) any ``.rdf`` files found there. This makes sure thesauri shipped by GeoNode apps are always up-to-date. + +See [Thesauri – Initialization at boot](../../../admin/thesauri/thesauri.md#initialization-at-boot) for more details on the thesaurus autoload step. + To exit just hit `CTRL+C`. This message means that the GeoNode containers have been started. Browsing to `http://localhost/` will show the GeoNode home page. You should be able to successfully log with the credentials of admin user which are defined in the .env file and start using it right away. From fa0f85006394b65f9490d37deccac7ebfb7b76c8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Apr 2026 14:05:27 +0000 Subject: [PATCH 3/9] Move autoload_thesauri into its own thesaurus_subcommands/autoload.py Agent-Logs-Url: https://github.com/GeoNode/geonode/sessions/ca967273-78e4-4330-b1ee-7b10f8f774d9 Co-authored-by: etj <717359+etj@users.noreply.github.com> --- geonode/base/management/commands/thesaurus.py | 29 ++--------------- .../thesaurus_subcommands/autoload.py | 31 +++++++++++++++++++ 2 files changed, 33 insertions(+), 27 deletions(-) create mode 100644 geonode/base/management/commands/thesaurus_subcommands/autoload.py diff --git a/geonode/base/management/commands/thesaurus.py b/geonode/base/management/commands/thesaurus.py index 0aa561a345c..d5c1eda1943 100644 --- a/geonode/base/management/commands/thesaurus.py +++ b/geonode/base/management/commands/thesaurus.py @@ -1,10 +1,8 @@ -import os - -from django.apps import apps from django.conf import settings from django.core.management.base import BaseCommand, CommandError from geonode.base.management.command_utils import setup_logger +from geonode.base.management.commands.thesaurus_subcommands.autoload import autoload_thesauri from geonode.base.management.commands.thesaurus_subcommands.dump import ( dump_thesaurus, DUMP_FORMATS, @@ -12,7 +10,7 @@ DUMP_FORMAT_SORTED, ) from geonode.base.management.commands.thesaurus_subcommands.list import list_thesauri -from geonode.base.management.commands.thesaurus_subcommands.load import load_thesaurus, ACTIONS, ACTION_CREATE, ACTION_UPDATE +from geonode.base.management.commands.thesaurus_subcommands.load import load_thesaurus, ACTIONS, ACTION_CREATE logger = setup_logger() @@ -118,26 +116,3 @@ def handle(self, *args, **options): else: raise CommandError(f"Unknown subcommand: {subcommand}") - - -def autoload_thesauri(): - """ - Discover and load all thesauri (.rdf files) found in a `thesauri/` directory - within each installed Django app. Uses the `update` action so existing entries - are updated and new ones are created without duplicates. - """ - loaded = 0 - for app_config in apps.get_app_configs(): - thesauri_dir = os.path.join(app_config.path, "thesauri") - if not os.path.isdir(thesauri_dir): - continue - rdf_files = [f for f in os.listdir(thesauri_dir) if f.lower().endswith(".rdf")] - for rdf_file in sorted(rdf_files): - rdf_path = os.path.join(thesauri_dir, rdf_file) - logger.info(f"Autoloading thesaurus from app '{app_config.name}': {rdf_path}") - try: - load_thesaurus(rdf_path, identifier=None, action=ACTION_UPDATE) - loaded += 1 - except Exception as e: - logger.error(f"Failed to load thesaurus '{rdf_path}': {e}") - logger.info(f"Autoload complete: {loaded} thesaurus file(s) loaded.") diff --git a/geonode/base/management/commands/thesaurus_subcommands/autoload.py b/geonode/base/management/commands/thesaurus_subcommands/autoload.py new file mode 100644 index 00000000000..404839b96a4 --- /dev/null +++ b/geonode/base/management/commands/thesaurus_subcommands/autoload.py @@ -0,0 +1,31 @@ +import os + +from django.apps import apps + +from geonode.base.management.command_utils import setup_logger +from geonode.base.management.commands.thesaurus_subcommands.load import load_thesaurus, ACTION_UPDATE + +logger = setup_logger() + + +def autoload_thesauri(): + """ + Discover and load all thesauri (.rdf files) found in a `thesauri/` directory + within each installed Django app. Uses the `update` action so existing entries + are updated and new ones are created without duplicates. + """ + loaded = 0 + for app_config in apps.get_app_configs(): + thesauri_dir = os.path.join(app_config.path, "thesauri") + if not os.path.isdir(thesauri_dir): + continue + rdf_files = [f for f in os.listdir(thesauri_dir) if f.lower().endswith(".rdf")] + for rdf_file in sorted(rdf_files): + rdf_path = os.path.join(thesauri_dir, rdf_file) + logger.info(f"Autoloading thesaurus from app '{app_config.name}': {rdf_path}") + try: + load_thesaurus(rdf_path, identifier=None, action=ACTION_UPDATE) + loaded += 1 + except Exception as e: + logger.error(f"Failed to load thesaurus '{rdf_path}': {e}") + logger.info(f"Autoload complete: {loaded} thesaurus file(s) loaded.") From 4b4462a14df3ce08091fb0a18df7016094aa7253 Mon Sep 17 00:00:00 2001 From: Emanuele Tajariol Date: Thu, 30 Apr 2026 18:11:22 +0200 Subject: [PATCH 4/9] Log improvements, lang selection --- geonode/base/management/commands/thesaurus.py | 10 +++- .../thesaurus_subcommands/autoload.py | 5 +- .../commands/thesaurus_subcommands/load.py | 46 ++++++++++++++----- 3 files changed, 46 insertions(+), 15 deletions(-) diff --git a/geonode/base/management/commands/thesaurus.py b/geonode/base/management/commands/thesaurus.py index d5c1eda1943..d922b66b761 100644 --- a/geonode/base/management/commands/thesaurus.py +++ b/geonode/base/management/commands/thesaurus.py @@ -43,6 +43,12 @@ def add_arguments(self, parser): choices=ACTIONS, help="Actions to run upon data loading (default: create)", ) + load_group.add_argument( + "--langs", + dest="langs", + action="append", + help="Only import labels for the requested languages; can be repeated", + ) dump_group = parser.add_argument_group('Params for "dump" subcommand') dump_group.add_argument("-o", "--out", nargs="?", help="Full path to the output file to be created") @@ -101,6 +107,8 @@ def handle(self, *args, **options): input_file = options.get("file") action = options.get("action") identifier = options.get("identifier") + lang = options.get("lang") + langs = options.get("langs") or [] if not input_file: raise CommandError("'load' command requires the parameter.") @@ -109,7 +117,7 @@ def handle(self, *args, **options): action = ACTION_CREATE logger.info(f"Missing action param: setting actions as '{action}'") - load_thesaurus(input_file, identifier, action) + load_thesaurus(input_file, identifier, action, lang=lang, langs=langs) elif subcommand == COMMAND_AUTOLOAD: autoload_thesauri() diff --git a/geonode/base/management/commands/thesaurus_subcommands/autoload.py b/geonode/base/management/commands/thesaurus_subcommands/autoload.py index 404839b96a4..d9a9d871133 100644 --- a/geonode/base/management/commands/thesaurus_subcommands/autoload.py +++ b/geonode/base/management/commands/thesaurus_subcommands/autoload.py @@ -17,6 +17,7 @@ def autoload_thesauri(): loaded = 0 for app_config in apps.get_app_configs(): thesauri_dir = os.path.join(app_config.path, "thesauri") + logger.debug(f"Looking for auto thesaurus in app '{app_config.name}' path: {thesauri_dir}") if not os.path.isdir(thesauri_dir): continue rdf_files = [f for f in os.listdir(thesauri_dir) if f.lower().endswith(".rdf")] @@ -24,8 +25,8 @@ def autoload_thesauri(): rdf_path = os.path.join(thesauri_dir, rdf_file) logger.info(f"Autoloading thesaurus from app '{app_config.name}': {rdf_path}") try: - load_thesaurus(rdf_path, identifier=None, action=ACTION_UPDATE) + load_thesaurus(rdf_path, identifier=None, action=ACTION_UPDATE, log_details=False) loaded += 1 except Exception as e: - logger.error(f"Failed to load thesaurus '{rdf_path}': {e}") + logger.error(f"Failed to load thesaurus '{rdf_path}': {e}", exc_info=True) logger.info(f"Autoload complete: {loaded} thesaurus file(s) loaded.") diff --git a/geonode/base/management/commands/thesaurus_subcommands/load.py b/geonode/base/management/commands/thesaurus_subcommands/load.py index 09498010a6a..6bf1089c93c 100644 --- a/geonode/base/management/commands/thesaurus_subcommands/load.py +++ b/geonode/base/management/commands/thesaurus_subcommands/load.py @@ -45,13 +45,16 @@ FAKE_BASE_URI = "http://automatically/added/uri/" -def load_thesaurus(input_file, identifier: str, action: str = ACTION_CREATE): +def load_thesaurus(input_file, identifier: str, action: str = ACTION_CREATE, default_lang: str = None, langs: List[str] = [], log_details=True): g = Graph() # if the input_file is an UploadedFile object rather than a file path the Graph.parse() # method may not have enough info to correctly guess the type; in this case supply the # name, which should include the extension, to guess_format manually... + # explodes list of comma separated langs into single list of langs + langs = [lang for item in langs for lang in item.split(",")] + filename = input_file.name if isinstance(input_file, UploadedFile) else input_file rdf_format = guess_format(filename) if not identifier: @@ -65,7 +68,7 @@ def load_thesaurus(input_file, identifier: str, action: str = ACTION_CREATE): if scheme is None: raise CommandError("ConceptScheme not found in file") - default_lang = getattr(settings, "THESAURUS_DEFAULT_LANG", None) + default_lang = default_lang or getattr(settings, "THESAURUS_DEFAULT_LANG", None) or getattr(settings, "LANGUAGE_CODE", 'en') available_titles = [t for t in itertools.chain(g.objects(scheme, DC.title), g.objects(scheme, DCTERMS.title)) @@ -81,15 +84,22 @@ def load_thesaurus(input_file, identifier: str, action: str = ACTION_CREATE): Thesaurus, {"identifier": identifier}, {"date": date_issued, "description": description, "title": thesaurus_title, "about": str(scheme)}, - {"card_min": 0, "card_max": 0, "facet": False} + {"card_min": 0, "card_max": 0, "facet": False}, + log_details ) tl_cnt = tl_add = 0 tk_cnt = tk_add = 0 tkl_cnt = tkl_add = 0 + tkl_skp = 0 for lang in available_titles: if lang.language is not None: + tl_cnt += 1 + if langs and lang.language not in langs: + logger.debug(f"Skipping label for language '{lang.language}' not in requested langs {langs}") + tkl_skp += 1 + continue thesaurus_label, c = _run_action( action, ThesaurusLabel, @@ -99,8 +109,8 @@ def load_thesaurus(input_file, identifier: str, action: str = ACTION_CREATE): }, {"label": lang.value}, {}, + log_details ) - tl_cnt += 1 tl_add += 1 if c else 0 for concept in g.subjects(RDF.type, SKOS.Concept): @@ -115,7 +125,8 @@ def load_thesaurus(input_file, identifier: str, action: str = ACTION_CREATE): available_labels = [t for t in g.objects(concept, SKOS.prefLabel) if isinstance(t, Literal)] alt_label = value_for_language(available_labels, default_lang) or about - logger.info(f" - Parsed Concept -> about:'{about}' alt:'{alt_label}' pref:'{str(pref)}' ") + if log_details: + logger.info(f" - Parsed Concept -> about:'{about}' alt:'{alt_label}' pref:'{str(pref)}' ") tk, c = _run_action( action, @@ -126,14 +137,21 @@ def load_thesaurus(input_file, identifier: str, action: str = ACTION_CREATE): }, {"alt_label": alt_label}, {}, + log_details ) tk_cnt += 1 tk_add += 1 if c else 0 for _, pref_label in preferredLabel(g, concept): + tkl_cnt += 1 lang = pref_label.language + if langs and lang not in langs: + logger.debug(f"Skipping label for language '{lang}' not in requested langs {langs}") + tkl_skp += 1 + continue label = str(pref_label) - logger.info(f" - Label {lang}: {label}") + if log_details: + logger.info(f" - Label {lang}: {label}") tkl, c = _run_action( action, @@ -144,8 +162,8 @@ def load_thesaurus(input_file, identifier: str, action: str = ACTION_CREATE): }, {"label": label}, {}, + log_details ) - tkl_cnt += 1 tkl_add += 1 if c else 0 logger.warning(f"Thesaurus added: {cr_t}") @@ -154,7 +172,7 @@ def load_thesaurus(input_file, identifier: str, action: str = ACTION_CREATE): logger.warning(f"ThesaurusKeywordLabel added: {tkl_add:3}/{tkl_cnt:3}") -def _run_action(action: str, model: type[models.Model], pk_dict, upd_dict, create_dict) -> tuple[models.Model, bool]: +def _run_action(action: str, model: type[models.Model], pk_dict, upd_dict, create_dict, log_details) -> tuple[models.Model, bool]: def update_or_create(defaults=upd_dict, create_defaults=create_dict, **pk_dict): # this signature is available since django 5 obj, created = model.objects.get_or_create(defaults=upd_dict | create_dict, **pk_dict) @@ -162,7 +180,8 @@ def update_or_create(defaults=upd_dict, create_defaults=create_dict, **pk_dict): if not created: rows = model.objects.filter(pk=obj.pk).update(**upd_dict) if rows != 1: - logger.error(f"UPDATED {rows} rows for {model.__name__} -> {pk_dict}") + if log_details: + logger.error(f"UPDATED {rows} rows for {model.__name__} -> {pk_dict}") return obj, created @@ -176,14 +195,17 @@ def update_or_create(defaults=upd_dict, create_defaults=create_dict, **pk_dict): elif action == ACTION_UPDATE: obj, created = update_or_create(defaults=upd_dict, create_defaults=create_dict, **pk_dict) if created: - logger.info(f"{model.__name__} -> Created id:{pk_dict}") + if log_details: + logger.info(f"{model.__name__} -> Created id:{pk_dict}") else: - logger.info(f"{model.__name__} -> Updated id:{pk_dict} DATA:{upd_dict}") + if log_details: + logger.info(f"{model.__name__} -> Updated id:{pk_dict} DATA:{upd_dict}") elif action == ACTION_APPEND: obj, created = model.objects.get_or_create(defaults=upd_dict | create_dict, **pk_dict) if created: - logger.info(f"{model.__name__} -> Created {pk_dict}") + if log_details: + logger.info(f"{model.__name__} -> Created {pk_dict}") else: raise CommandError("No valid action found") From 36a43c3807b0a5df6c6eabbf0b2345c5fd5bf997 Mon Sep 17 00:00:00 2001 From: Emanuele Tajariol Date: Tue, 12 May 2026 18:35:33 +0200 Subject: [PATCH 5/9] Apply suggestions from code review Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- geonode/base/management/commands/thesaurus.py | 2 +- .../commands/thesaurus_subcommands/autoload.py | 9 ++++++++- .../management/commands/thesaurus_subcommands/load.py | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/geonode/base/management/commands/thesaurus.py b/geonode/base/management/commands/thesaurus.py index d922b66b761..c7cfd9d0c1c 100644 --- a/geonode/base/management/commands/thesaurus.py +++ b/geonode/base/management/commands/thesaurus.py @@ -117,7 +117,7 @@ def handle(self, *args, **options): action = ACTION_CREATE logger.info(f"Missing action param: setting actions as '{action}'") - load_thesaurus(input_file, identifier, action, lang=lang, langs=langs) + load_thesaurus(input_file, identifier, action, default_lang=lang, langs=langs) elif subcommand == COMMAND_AUTOLOAD: autoload_thesauri() diff --git a/geonode/base/management/commands/thesaurus_subcommands/autoload.py b/geonode/base/management/commands/thesaurus_subcommands/autoload.py index d9a9d871133..df2725d6dfa 100644 --- a/geonode/base/management/commands/thesaurus_subcommands/autoload.py +++ b/geonode/base/management/commands/thesaurus_subcommands/autoload.py @@ -20,7 +20,14 @@ def autoload_thesauri(): logger.debug(f"Looking for auto thesaurus in app '{app_config.name}' path: {thesauri_dir}") if not os.path.isdir(thesauri_dir): continue - rdf_files = [f for f in os.listdir(thesauri_dir) if f.lower().endswith(".rdf")] + try: + rdf_files = [f for f in os.listdir(thesauri_dir) if f.lower().endswith(".rdf")] + except OSError as e: + logger.error( + f"Failed to scan thesauri directory for app '{app_config.name}' at '{thesauri_dir}': {e}", + exc_info=True, + ) + continue for rdf_file in sorted(rdf_files): rdf_path = os.path.join(thesauri_dir, rdf_file) logger.info(f"Autoloading thesaurus from app '{app_config.name}': {rdf_path}") diff --git a/geonode/base/management/commands/thesaurus_subcommands/load.py b/geonode/base/management/commands/thesaurus_subcommands/load.py index 6bf1089c93c..c0c8b028ec9 100644 --- a/geonode/base/management/commands/thesaurus_subcommands/load.py +++ b/geonode/base/management/commands/thesaurus_subcommands/load.py @@ -53,7 +53,7 @@ def load_thesaurus(input_file, identifier: str, action: str = ACTION_CREATE, def # name, which should include the extension, to guess_format manually... # explodes list of comma separated langs into single list of langs - langs = [lang for item in langs for lang in item.split(",")] + langs = [lang.strip() for item in langs for lang in item.split(",") if lang.strip()] filename = input_file.name if isinstance(input_file, UploadedFile) else input_file rdf_format = guess_format(filename) From cfad81dfb0f114991784826e537232ad277d4789 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 14 May 2026 14:01:01 +0000 Subject: [PATCH 6/9] Fix mutable default argument for langs parameter in load_thesaurus Agent-Logs-Url: https://github.com/GeoNode/geonode/sessions/5771cb43-8066-4e9c-aa9d-1f71a79fadc5 Co-authored-by: etj <717359+etj@users.noreply.github.com> --- .../base/management/commands/thesaurus_subcommands/load.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/geonode/base/management/commands/thesaurus_subcommands/load.py b/geonode/base/management/commands/thesaurus_subcommands/load.py index c0c8b028ec9..586ff400077 100644 --- a/geonode/base/management/commands/thesaurus_subcommands/load.py +++ b/geonode/base/management/commands/thesaurus_subcommands/load.py @@ -45,7 +45,7 @@ FAKE_BASE_URI = "http://automatically/added/uri/" -def load_thesaurus(input_file, identifier: str, action: str = ACTION_CREATE, default_lang: str = None, langs: List[str] = [], log_details=True): +def load_thesaurus(input_file, identifier: str, action: str = ACTION_CREATE, default_lang: str = None, langs: List[str] = None, log_details=True): g = Graph() # if the input_file is an UploadedFile object rather than a file path the Graph.parse() @@ -53,7 +53,7 @@ def load_thesaurus(input_file, identifier: str, action: str = ACTION_CREATE, def # name, which should include the extension, to guess_format manually... # explodes list of comma separated langs into single list of langs - langs = [lang.strip() for item in langs for lang in item.split(",") if lang.strip()] + langs = [lang.strip() for item in (langs or []) for lang in item.split(",") if lang.strip()] filename = input_file.name if isinstance(input_file, UploadedFile) else input_file rdf_format = guess_format(filename) From 6ab74c23dc3154e7b582ba2979df6e604a2f2403 Mon Sep 17 00:00:00 2001 From: Niraj Adhikari Date: Mon, 25 May 2026 12:48:14 +0545 Subject: [PATCH 7/9] Fix patch apps.get_app_configs in correct autoload module --- geonode/tests/test_autoload_thesaurus.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/geonode/tests/test_autoload_thesaurus.py b/geonode/tests/test_autoload_thesaurus.py index 6b9013fc001..32cbcc9cb92 100644 --- a/geonode/tests/test_autoload_thesaurus.py +++ b/geonode/tests/test_autoload_thesaurus.py @@ -24,7 +24,7 @@ from django.test import TestCase -from geonode.base.management.commands.thesaurus import autoload_thesauri +from geonode.base.management.commands.thesaurus_subcommands.autoload import autoload_thesauri from geonode.base.models import Thesaurus @@ -66,7 +66,7 @@ def _make_app_config(self, name, path): def test_autoload_loads_rdf_files_from_thesauri_dirs(self): """autoload_thesauri should load .rdf files found in thesauri/ dirs of installed apps.""" app_configs = [self._make_app_config("fake_app", self.tmp_dir)] - with patch("geonode.base.management.commands.thesaurus.apps.get_app_configs", return_value=app_configs): + with patch("geonode.base.management.commands.thesaurus_subcommands.autoload.apps.get_app_configs", return_value=app_configs): autoload_thesauri() self.assertTrue( @@ -77,7 +77,7 @@ def test_autoload_loads_rdf_files_from_thesauri_dirs(self): def test_autoload_is_idempotent(self): """Calling autoload_thesauri twice should not create duplicate thesauri (uses update action).""" app_configs = [self._make_app_config("fake_app", self.tmp_dir)] - with patch("geonode.base.management.commands.thesaurus.apps.get_app_configs", return_value=app_configs): + with patch("geonode.base.management.commands.thesaurus_subcommands.autoload.apps.get_app_configs", return_value=app_configs): autoload_thesauri() autoload_thesauri() @@ -88,7 +88,7 @@ def test_autoload_skips_apps_without_thesauri_dir(self): """autoload_thesauri should silently skip apps that have no thesauri/ directory.""" app_without_thesauri = self._make_app_config("no_thesauri_app", self.tmp_dir.rstrip("/") + "_no_dir") app_configs = [app_without_thesauri] - with patch("geonode.base.management.commands.thesaurus.apps.get_app_configs", return_value=app_configs): + with patch("geonode.base.management.commands.thesaurus_subcommands.autoload.apps.get_app_configs", return_value=app_configs): # Should not raise autoload_thesauri() @@ -101,7 +101,7 @@ def test_autoload_continues_after_error(self): f.write("THIS IS NOT VALID RDF") app_configs = [self._make_app_config("fake_app", self.tmp_dir)] - with patch("geonode.base.management.commands.thesaurus.apps.get_app_configs", return_value=app_configs): + with patch("geonode.base.management.commands.thesaurus_subcommands.autoload.apps.get_app_configs", return_value=app_configs): # Should not raise despite the bad file autoload_thesauri() From a59ac4d8ab7d0b6881dbe6c4b3b2cfdbd9ae963b Mon Sep 17 00:00:00 2001 From: Niraj Adhikari Date: Mon, 25 May 2026 12:55:09 +0545 Subject: [PATCH 8/9] Fix code formatting --- geonode/tests/test_autoload_thesaurus.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/geonode/tests/test_autoload_thesaurus.py b/geonode/tests/test_autoload_thesaurus.py index 32cbcc9cb92..a0c60e49689 100644 --- a/geonode/tests/test_autoload_thesaurus.py +++ b/geonode/tests/test_autoload_thesaurus.py @@ -66,7 +66,10 @@ def _make_app_config(self, name, path): def test_autoload_loads_rdf_files_from_thesauri_dirs(self): """autoload_thesauri should load .rdf files found in thesauri/ dirs of installed apps.""" app_configs = [self._make_app_config("fake_app", self.tmp_dir)] - with patch("geonode.base.management.commands.thesaurus_subcommands.autoload.apps.get_app_configs", return_value=app_configs): + with patch( + "geonode.base.management.commands.thesaurus_subcommands.autoload.apps.get_app_configs", + return_value=app_configs, + ): autoload_thesauri() self.assertTrue( @@ -77,7 +80,10 @@ def test_autoload_loads_rdf_files_from_thesauri_dirs(self): def test_autoload_is_idempotent(self): """Calling autoload_thesauri twice should not create duplicate thesauri (uses update action).""" app_configs = [self._make_app_config("fake_app", self.tmp_dir)] - with patch("geonode.base.management.commands.thesaurus_subcommands.autoload.apps.get_app_configs", return_value=app_configs): + with patch( + "geonode.base.management.commands.thesaurus_subcommands.autoload.apps.get_app_configs", + return_value=app_configs, + ): autoload_thesauri() autoload_thesauri() @@ -88,7 +94,10 @@ def test_autoload_skips_apps_without_thesauri_dir(self): """autoload_thesauri should silently skip apps that have no thesauri/ directory.""" app_without_thesauri = self._make_app_config("no_thesauri_app", self.tmp_dir.rstrip("/") + "_no_dir") app_configs = [app_without_thesauri] - with patch("geonode.base.management.commands.thesaurus_subcommands.autoload.apps.get_app_configs", return_value=app_configs): + with patch( + "geonode.base.management.commands.thesaurus_subcommands.autoload.apps.get_app_configs", + return_value=app_configs, + ): # Should not raise autoload_thesauri() @@ -101,7 +110,10 @@ def test_autoload_continues_after_error(self): f.write("THIS IS NOT VALID RDF") app_configs = [self._make_app_config("fake_app", self.tmp_dir)] - with patch("geonode.base.management.commands.thesaurus_subcommands.autoload.apps.get_app_configs", return_value=app_configs): + with patch( + "geonode.base.management.commands.thesaurus_subcommands.autoload.apps.get_app_configs", + return_value=app_configs, + ): # Should not raise despite the bad file autoload_thesauri() From 75ad2ed94a5feff0f9258bd6c62c7704216df5dd Mon Sep 17 00:00:00 2001 From: Emanuele Tajariol Date: Tue, 26 May 2026 17:39:05 +0200 Subject: [PATCH 9/9] Improve load final log --- .../commands/thesaurus_subcommands/load.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/geonode/base/management/commands/thesaurus_subcommands/load.py b/geonode/base/management/commands/thesaurus_subcommands/load.py index 586ff400077..d64d664cf45 100644 --- a/geonode/base/management/commands/thesaurus_subcommands/load.py +++ b/geonode/base/management/commands/thesaurus_subcommands/load.py @@ -88,17 +88,16 @@ def load_thesaurus(input_file, identifier: str, action: str = ACTION_CREATE, def log_details ) - tl_cnt = tl_add = 0 + tl_cnt = tl_add = tl_skp = 0 tk_cnt = tk_add = 0 - tkl_cnt = tkl_add = 0 - tkl_skp = 0 + tkl_cnt = tkl_add = tkl_skp = 0 for lang in available_titles: if lang.language is not None: tl_cnt += 1 if langs and lang.language not in langs: - logger.debug(f"Skipping label for language '{lang.language}' not in requested langs {langs}") - tkl_skp += 1 + logger.debug(f"Skipping thesaurus label for language '{lang.language}' not in requested langs {langs}") + tl_skp += 1 continue thesaurus_label, c = _run_action( action, @@ -166,10 +165,10 @@ def load_thesaurus(input_file, identifier: str, action: str = ACTION_CREATE, def ) tkl_add += 1 if c else 0 - logger.warning(f"Thesaurus added: {cr_t}") - logger.warning(f"ThesaurusLabel added: {tl_add:3}/{tl_cnt:3}") - logger.warning(f"ThesaurusKeyword added: {tk_add:3}/{tk_cnt:3}") - logger.warning(f"ThesaurusKeywordLabel added: {tkl_add:3}/{tkl_cnt:3}") + logger.warning(f"Thesaurus added: {cr_t}") + logger.warning(f"ThesaurusLabel: found: {tl_cnt:3} - added: {tl_add:3} - skipped: {tl_skp:3}") + logger.warning(f"ThesaurusKeyword: found: {tk_cnt:3} - added: {tk_add:3}") + logger.warning(f"ThesaurusKeywordLabel: found: {tkl_cnt:3} - added: {tkl_add:3} - skipped: {tkl_skp:3}") def _run_action(action: str, model: type[models.Model], pk_dict, upd_dict, create_dict, log_details) -> tuple[models.Model, bool]: