From 4a4f1d1f109d1f4cf9bafc7f86b29407edf21ea4 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 22 Dec 2025 14:01:59 +0100 Subject: [PATCH 01/50] convert files: setup.py -> pyproject.toml --- src/broker/pyproject.toml | 28 +++++++++++++++++++++ src/broker/requirements.txt | 1 - src/broker/setup.py | 26 ------------------- src/client/pyproject.toml | 28 +++++++++++++++++++++ src/client/requirements.txt | 1 - src/client/setup.py | 26 ------------------- src/harvester/pyproject.toml | 36 ++++++++++++++++++++++++++ src/harvester/requirements.txt | 1 - src/harvester/setup.py | 26 ------------------- src/server/pyproject.toml | 28 +++++++++++++++++++++ src/server/requirements.txt | 3 --- src/server/setup.py | 30 ---------------------- src/utils/pyproject.toml | 46 ++++++++++++++++++++++++++++++++++ src/utils/requirements.txt | 14 ----------- src/utils/setup.py | 32 ----------------------- 15 files changed, 166 insertions(+), 160 deletions(-) create mode 100644 src/broker/pyproject.toml delete mode 100644 src/broker/requirements.txt delete mode 100644 src/broker/setup.py create mode 100644 src/client/pyproject.toml delete mode 100644 src/client/requirements.txt delete mode 100644 src/client/setup.py create mode 100644 src/harvester/pyproject.toml delete mode 100644 src/harvester/requirements.txt delete mode 100644 src/harvester/setup.py create mode 100644 src/server/pyproject.toml delete mode 100644 src/server/requirements.txt delete mode 100644 src/server/setup.py create mode 100644 src/utils/pyproject.toml delete mode 100644 src/utils/requirements.txt delete mode 100644 src/utils/setup.py diff --git a/src/broker/pyproject.toml b/src/broker/pyproject.toml new file mode 100644 index 00000000..12055391 --- /dev/null +++ b/src/broker/pyproject.toml @@ -0,0 +1,28 @@ +[build-system] +requires = ["setuptools>=69.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "operandi-broker" +version = "2.23.0" +authors = [{name = "Mehmed Mustafa", email = "mehmed.mustafa@gwdg.de"}] +license = {text = "Apache License 2.0"} +description = "Broker for Operandi" +requires-python = ">=3.10" +dependencies = [ + "operandi-utils>=2.23.0", + "requests>2.32.0" +] + +[project.readme] +file = "README.md" +content-type = "text/markdown" + +[project.scripts] +operandi-broker = "operandi_broker:cli" + +[tool.setuptools] +package-dir = {"" = "."} + +[tool.setuptools.packages.find] +include = ["operandi_broker*"] diff --git a/src/broker/requirements.txt b/src/broker/requirements.txt deleted file mode 100644 index 4624e139..00000000 --- a/src/broker/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -requests>2.32.0 diff --git a/src/broker/setup.py b/src/broker/setup.py deleted file mode 100644 index e1589d68..00000000 --- a/src/broker/setup.py +++ /dev/null @@ -1,26 +0,0 @@ -# -*- coding: utf-8 -*- -from setuptools import setup -from operandi_utils.constants import OPERANDI_VERSION - -install_requires = open('requirements.txt').read().split('\n') -install_requires.append(f'operandi_utils == {OPERANDI_VERSION}') - -setup( - name='operandi_broker', - version=OPERANDI_VERSION, - description='OPERANDI - Service Broker', - long_description=open('README.md').read(), - long_description_content_type='text/markdown', - author='Mehmed Mustafa', - author_email='mehmed.mustafa@gwdg.de', - url='https://github.com/subugoe/operandi', - license='Apache License 2.0', - packages=['operandi_broker'], - package_data={}, - install_requires=install_requires, - entry_points={ - 'console_scripts': [ - 'operandi-broker=operandi_broker:cli', - ] - }, -) diff --git a/src/client/pyproject.toml b/src/client/pyproject.toml new file mode 100644 index 00000000..8b4c39c8 --- /dev/null +++ b/src/client/pyproject.toml @@ -0,0 +1,28 @@ +[build-system] +requires = ["setuptools>=69.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "operandi-client" +version = "2.23.0" +authors = [{name = "Mehmed Mustafa", email = "mehmed.mustafa@gwdg.de"}] +license = {text = "Apache License 2.0"} +description = "Client for Operandi" +requires-python = ">=3.10" +dependencies = [ + "operandi-utils>=2.23.0", + "requests>2.32.0" +] + +[project.readme] +file = "README.md" +content-type = "text/markdown" + +[project.scripts] +operandi-client = "operandi_client:cli" + +[tool.setuptools] +package-dir = {"" = "."} + +[tool.setuptools.packages.find] +include = ["operandi_client*"] diff --git a/src/client/requirements.txt b/src/client/requirements.txt deleted file mode 100644 index 722a3854..00000000 --- a/src/client/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -requests>2.32.0 \ No newline at end of file diff --git a/src/client/setup.py b/src/client/setup.py deleted file mode 100644 index efb2f192..00000000 --- a/src/client/setup.py +++ /dev/null @@ -1,26 +0,0 @@ -# -*- coding: utf-8 -*- -from setuptools import setup -from operandi_utils.constants import OPERANDI_VERSION - -install_requires = open('requirements.txt').read().split('\n') -install_requires.append(f'operandi_utils == {OPERANDI_VERSION}') - -setup( - name='operandi_client', - version=OPERANDI_VERSION, - description='OPERANDI - Client', - long_description=open('README.md').read(), - long_description_content_type='text/markdown', - author='Mehmed Mustafa', - author_email='mehmed.mustafa@gwdg.de', - url='https://github.com/subugoe/operandi', - license='Apache License 2.0', - packages=['operandi_client'], - package_data={'': []}, - install_requires=install_requires, - entry_points={ - 'console_scripts': [ - 'operandi-client=operandi_client:cli', - ] - }, -) diff --git a/src/harvester/pyproject.toml b/src/harvester/pyproject.toml new file mode 100644 index 00000000..d050af77 --- /dev/null +++ b/src/harvester/pyproject.toml @@ -0,0 +1,36 @@ +[build-system] +requires = ["setuptools>=69.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "operandi-harvester" +version = "2.23.0" +authors = [{name = "Mehmed Mustafa", email = "mehmed.mustafa@gwdg.de"}] +license = {text = "Apache License 2.0"} +description = "Harvester for Operandi" +requires-python = ">=3.10" +dependencies = [ + "operandi-utils>=2.23.0", + "requests>2.32.0" +] + +[project.readme] +file = "README.md" +content-type = "text/markdown" + +[project.scripts] +operandi-harvester = "operandi_harvester:cli" + +[tool.setuptools] +package-dir = {"" = "."} + +[tool.setuptools.packages.find] +include = ["operandi_harvester*"] + +[tool.setuptools.package-data] +operandi_utils = [ + "assets/*.ocrd.zip", + "assets/*.json", + "assets/*.txt", + "assets/*.nf" +] diff --git a/src/harvester/requirements.txt b/src/harvester/requirements.txt deleted file mode 100644 index 4624e139..00000000 --- a/src/harvester/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -requests>2.32.0 diff --git a/src/harvester/setup.py b/src/harvester/setup.py deleted file mode 100644 index 14e2409c..00000000 --- a/src/harvester/setup.py +++ /dev/null @@ -1,26 +0,0 @@ -# -*- coding: utf-8 -*- -from setuptools import setup -from operandi_utils.constants import OPERANDI_VERSION - -install_requires = open('requirements.txt').read().split('\n') -install_requires.append(f'operandi_utils == {OPERANDI_VERSION}') - -setup( - name='operandi_harvester', - version=OPERANDI_VERSION, - description='OPERANDI - Harvester', - long_description=open('README.md').read(), - long_description_content_type='text/markdown', - author='Mehmed Mustafa', - author_email='mehmed.mustafa@gwdg.de', - url='https://github.com/subugoe/operandi', - license='Apache License 2.0', - packages=['operandi_harvester'], - package_data={'': ['assets/*.ocrd.zip', 'assets/*.json', 'assets/*.txt', 'assets/*.nf']}, - install_requires=install_requires, - entry_points={ - 'console_scripts': [ - 'operandi-harvester=operandi_harvester:cli', - ] - }, -) diff --git a/src/server/pyproject.toml b/src/server/pyproject.toml new file mode 100644 index 00000000..e42ab7a9 --- /dev/null +++ b/src/server/pyproject.toml @@ -0,0 +1,28 @@ +[build-system] +requires = ["setuptools>=69.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "operandi-server" +version = "2.23.0" +authors = [{name = "Mehmed Mustafa", email = "mehmed.mustafa@gwdg.de"}] +license = {text = "Apache License 2.0"} +description = "Server for Operandi" +requires-python = ">=3.10" +dependencies = [ + "operandi-utils>=2.23.0", + "fastapi>=0.95.0", + "uvicorn==0.21.0", + "psutil" +] + +[project.readme] +file = "README.md" +content-type = "text/markdown" + +[project.scripts] +operandi-server = "operandi_server:cli" + +[tool.setuptools] +packages = { find = {} } +include-package-data = true diff --git a/src/server/requirements.txt b/src/server/requirements.txt deleted file mode 100644 index b3f268d9..00000000 --- a/src/server/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -fastapi>=0.95.0 -uvicorn==0.21.0 -psutil diff --git a/src/server/setup.py b/src/server/setup.py deleted file mode 100644 index 8609a120..00000000 --- a/src/server/setup.py +++ /dev/null @@ -1,30 +0,0 @@ -# -*- coding: utf-8 -*- -from setuptools import setup -from operandi_utils.constants import OPERANDI_VERSION - -install_requires = open('requirements.txt').read().split('\n') -install_requires.append(f'operandi_utils == {OPERANDI_VERSION}') - -setup( - name='operandi_server', - version=OPERANDI_VERSION, - description='OPERANDI - Server', - long_description=open('README.md').read(), - long_description_content_type='text/markdown', - author='Mehmed Mustafa', - author_email='mehmed.mustafa@gwdg.de', - url='https://github.com/subugoe/operandi', - license='Apache License 2.0', - packages=[ - 'operandi_server', - 'operandi_server.models', - 'operandi_server.routers' - ], - package_data={}, - install_requires=install_requires, - entry_points={ - 'console_scripts': [ - 'operandi-server=operandi_server:cli', - ] - }, -) diff --git a/src/utils/pyproject.toml b/src/utils/pyproject.toml new file mode 100644 index 00000000..03a701f7 --- /dev/null +++ b/src/utils/pyproject.toml @@ -0,0 +1,46 @@ +[build-system] +requires = ["setuptools>=69.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "operandi-utils" +version = "2.23.0" +authors = [ { name = "Mehmed Mustafa", email = "mehmed.mustafa@gwdg.de" } ] +license = { text = "Apache License 2.0" } +description = "Shared utilities for Operandi" +requires-python = ">=3.10" +dependencies = [ + "aiofiles>=0.8.0", + "beanie==1.11.7", + "chardet>=5.1.0", + "click>=7", + "clint==0.5.1", + "loguru>=0.6.0", + "httpx>=0.24.0", + "ocrd>=3.0.4", + "paramiko>=3.4.0", + "pika>=1.2.0", + "pydantic>=1.9.1", + "pymongo >= 4.3.3", + "python-dotenv>=1.0.0", + "python-multipart>=0.0.5" +] + +[project.readme] +file = "README.md" +content-type = "text/markdown" + +[project.scripts] +oton-converter = "operandi_utils.oton:cli" + +[tool.setuptools] +packages = { find = {} } +include-package-data = true + +[tool.setuptools.package-data] +operandi_utils = [ + "batch_scripts/*.sh", + "nextflow_workflows/*.nf", + "ocrd_process_workflows/*.txt", + "ocrd-all-tool.json" +] diff --git a/src/utils/requirements.txt b/src/utils/requirements.txt deleted file mode 100644 index 3b0fb894..00000000 --- a/src/utils/requirements.txt +++ /dev/null @@ -1,14 +0,0 @@ -aiofiles>=0.8.0 -beanie==1.11.7 -chardet>=5.1.0 -click>=7 -clint==0.5.1 -loguru>=0.6.0 -httpx>=0.24.0 -ocrd>=3.0.4 -paramiko>=3.4.0 -pika>=1.2.0 -pydantic>=1.9.1 -pymongo >= 4.3.3 -python-dotenv>=1.0.0 -python-multipart>=0.0.5 diff --git a/src/utils/setup.py b/src/utils/setup.py deleted file mode 100644 index ed528dec..00000000 --- a/src/utils/setup.py +++ /dev/null @@ -1,32 +0,0 @@ -# -*- coding: utf-8 -*- -from setuptools import setup - -install_requires = open('requirements.txt').read().split('\n') - -setup( - name='operandi_utils', - version='2.23.0', - description='OPERANDI - Utils', - long_description=open('README.md').read(), - long_description_content_type='text/markdown', - author='Mehmed Mustafa', - author_email='mehmed.mustafa@gwdg.de', - url='https://github.com/subugoe/operandi', - license='Apache License 2.0', - packages=[ - 'operandi_utils', - 'operandi_utils.database', - 'operandi_utils.hpc', - 'operandi_utils.oton', - 'operandi_utils.rabbitmq' - ], - package_data={ - '': ['batch_scripts/*.sh', 'nextflow_workflows/*.nf', 'ocrd_process_workflows/*.txt', 'ocrd-all-tool.json'] - }, - install_requires=install_requires, - entry_points={ - 'console_scripts': [ - 'oton-converter=operandi_utils.oton:cli', - ] - } -) From 6620d3560427ff002b00c26b05438e69c02fa303 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 22 Dec 2025 14:11:35 +0100 Subject: [PATCH 02/50] update ocrd dependency to 3.9.1 --- src/utils/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/pyproject.toml b/src/utils/pyproject.toml index 03a701f7..59f40330 100644 --- a/src/utils/pyproject.toml +++ b/src/utils/pyproject.toml @@ -17,7 +17,7 @@ dependencies = [ "clint==0.5.1", "loguru>=0.6.0", "httpx>=0.24.0", - "ocrd>=3.0.4", + "ocrd>=3.9.1", "paramiko>=3.4.0", "pika>=1.2.0", "pydantic>=1.9.1", From bfb53af9edb5e89729eeefef3f1abd4ea8bc8bd6 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 22 Dec 2025 14:28:06 +0100 Subject: [PATCH 03/50] remove: requirements.txt from docker files --- src/Dockerfile_broker | 2 -- src/Dockerfile_server | 2 -- 2 files changed, 4 deletions(-) diff --git a/src/Dockerfile_broker b/src/Dockerfile_broker index 5a8bdef7..7b74629a 100644 --- a/src/Dockerfile_broker +++ b/src/Dockerfile_broker @@ -30,9 +30,7 @@ RUN apt-get update && apt-get install -y \ RUN python3 -m pip install --upgrade pip setuptools RUN pip3 install -U pip wheel -RUN python3 -m pip install -r /usr/src/utils/requirements.txt --ignore-installed RUN pip3 install /usr/src/utils -RUN python3 -m pip install -r /usr/src/broker/requirements.txt --ignore-installed RUN pip3 install /usr/src/broker RUN echo "Operandi broker build success" diff --git a/src/Dockerfile_server b/src/Dockerfile_server index 9ebc40b0..8bfb91e9 100644 --- a/src/Dockerfile_server +++ b/src/Dockerfile_server @@ -27,9 +27,7 @@ RUN apt-get update && apt-get install -y \ RUN python3 -m pip install --upgrade pip setuptools RUN pip3 install -U pip wheel -RUN python3 -m pip install -r /usr/src/utils/requirements.txt --ignore-installed RUN pip3 install /usr/src/utils -RUN python3 -m pip install -r /usr/src/server/requirements.txt --ignore-installed RUN pip3 install /usr/src/server RUN echo "Operandi server build success" From 7863b8829ffc3e49a4d605f40d44768bdac5a112 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 22 Dec 2025 14:54:19 +0100 Subject: [PATCH 04/50] update: docker files of broker and server --- src/Dockerfile_broker | 87 ++++++++++++++++++++++++++++--------------- src/Dockerfile_server | 84 +++++++++++++++++++++++++++-------------- 2 files changed, 114 insertions(+), 57 deletions(-) diff --git a/src/Dockerfile_broker b/src/Dockerfile_broker index 7b74629a..1784905c 100644 --- a/src/Dockerfile_broker +++ b/src/Dockerfile_broker @@ -1,36 +1,63 @@ -FROM ubuntu:22.04 +# ================================ +# Builder stage: build all Python packages +# ================================ +FROM python:3.11-slim as builder -MAINTAINER OPERANDI -ENV DEBIAN_FRONTEND noninteractive -ENV PYTHONIOENCODING utf8 -ENV LC_ALL=C.UTF-8 -ENV LANG=C.UTF-8 +LABEL maintainer="OPERANDI" -COPY broker/ /usr/src/broker -COPY utils/ /usr/src/utils +ENV PYTHONIOENCODING=utf8 \ + LC_ALL=C.UTF-8 \ + LANG=C.UTF-8 \ + DEBIAN_FRONTEND=noninteractive -WORKDIR /usr/src/broker/operandi_broker +# Install system build dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + git \ + curl \ + make \ + wget \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /usr/src + +# Copy and install utils first (for caching) +COPY utils/ ./utils +RUN python3 -m pip install --upgrade pip setuptools wheel \ + && pip wheel --no-deps --wheel-dir /wheels ./utils + +# Copy and install broker +COPY broker/ ./broker +RUN pip wheel --no-deps --wheel-dir /wheels ./broker + +# ================================ +# Final stage: minimal runtime +# ================================ +FROM python:3.11-slim + +ENV PYTHONIOENCODING=utf8 \ + LC_ALL=C.UTF-8 \ + LANG=C.UTF-8 -# install dependencies -RUN apt-get update && apt-get install -y \ - apt-transport-https \ - ca-certificates \ - curl \ - git \ - gnupg-agent \ - make \ - python3 \ - python3-dev \ - python3-pip \ - python3-venv \ - software-properties-common \ - sudo \ - time \ - wget - -RUN python3 -m pip install --upgrade pip setuptools -RUN pip3 install -U pip wheel -RUN pip3 install /usr/src/utils -RUN pip3 install /usr/src/broker +WORKDIR /usr/src +# Install runtime dependencies only (no build tools needed) +RUN apt-get update && apt-get install -y --no-install-recommends \ + git \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Copy wheels from builder and install +COPY --from=builder /wheels /wheels +RUN python3 -m pip install --upgrade pip setuptools wheel \ + && pip install /wheels/* + +# Copy broker code (optional if entry points are installed) +COPY broker/ ./broker +WORKDIR /usr/src/broker/operandi_broker + +# Build success indicator RUN echo "Operandi broker build success" + +# Optional default command +CMD ["python3", "-m", "operandi_broker", "--version"] diff --git a/src/Dockerfile_server b/src/Dockerfile_server index 8bfb91e9..3cd453e4 100644 --- a/src/Dockerfile_server +++ b/src/Dockerfile_server @@ -1,33 +1,63 @@ -FROM ubuntu:22.04 +# ================================ +# Builder stage: build all Python packages +# ================================ +FROM python:3.11-slim as builder -MAINTAINER OPERANDI -ENV DEBIAN_FRONTEND noninteractive -ENV PYTHONIOENCODING utf8 -ENV LC_ALL=C.UTF-8 -ENV LANG=C.UTF-8 +LABEL maintainer="OPERANDI" -COPY server/ /usr/src/server -COPY utils/ /usr/src/utils +ENV PYTHONIOENCODING=utf8 \ + LC_ALL=C.UTF-8 \ + LANG=C.UTF-8 \ + DEBIAN_FRONTEND=noninteractive -WORKDIR /usr/src/server/operandi_server +# Install system build dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + git \ + curl \ + make \ + wget \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /usr/src + +# Copy and install utils first (for caching) +COPY utils/ ./utils +RUN python3 -m pip install --upgrade pip setuptools wheel \ + && pip wheel --no-deps --wheel-dir /wheels ./utils + +# Copy and install server +COPY server/ ./server +RUN pip wheel --no-deps --wheel-dir /wheels ./server + +# ================================ +# Final stage: minimal runtime +# ================================ +FROM python:3.11-slim + +ENV PYTHONIOENCODING=utf8 \ + LC_ALL=C.UTF-8 \ + LANG=C.UTF-8 -# install dependencies -RUN apt-get update && apt-get install -y \ - curl \ - git \ - make \ - python3 \ - python3-dev \ - python3-pip \ - python3-venv \ - software-properties-common \ - sudo \ - time \ - wget - -RUN python3 -m pip install --upgrade pip setuptools -RUN pip3 install -U pip wheel -RUN pip3 install /usr/src/utils -RUN pip3 install /usr/src/server +WORKDIR /usr/src +# Install runtime dependencies only (no build tools needed) +RUN apt-get update && apt-get install -y --no-install-recommends \ + git \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Copy wheels from builder and install +COPY --from=builder /wheels /wheels +RUN python3 -m pip install --upgrade pip setuptools wheel \ + && pip install /wheels/* + +# Copy server code (optional if entry points are installed) +COPY server/ ./server +WORKDIR /usr/src/server/operandi_server + +# Build success indicator RUN echo "Operandi server build success" + +# Optional default command +CMD ["python3", "-m", "operandi_server", "--version"] From 0cd2a4bd1e0f9eb159d6f3c0368df82d716520b5 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 22 Dec 2025 15:34:41 +0100 Subject: [PATCH 05/50] fix: use proper package import --- src/utils/operandi_utils/oton/constants.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/utils/operandi_utils/oton/constants.py b/src/utils/operandi_utils/oton/constants.py index 7cbaea71..c7ce94c5 100644 --- a/src/utils/operandi_utils/oton/constants.py +++ b/src/utils/operandi_utils/oton/constants.py @@ -1,6 +1,6 @@ from json import load from os import environ -from pkg_resources import resource_filename +from importlib import resources BS: str = '{}' SPACES = ' ' @@ -11,8 +11,7 @@ CONST_METS_SOCKET_PATH: str = 'mets_socket_path' CONST_WORKSPACE_DIR: str = 'workspace_dir' -OCRD_ALL_JSON_FILE = resource_filename(__name__, 'ocrd-all-tool.json') -with open(OCRD_ALL_JSON_FILE) as f: +with resources.open_text(package="operandi_utils.oton", resource="ocrd-all-tool.json") as f: OCRD_ALL_JSON = load(f) OTON_LOG_LEVEL = environ.get("OTON_LOG_LEVEL", "INFO") From b5d32b3e7bc6092ef171c2a9207126d9315b808c Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 22 Dec 2025 15:35:09 +0100 Subject: [PATCH 06/50] set: beanie and pydantic versions to >=2 --- src/utils/pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/utils/pyproject.toml b/src/utils/pyproject.toml index 59f40330..eed13bc2 100644 --- a/src/utils/pyproject.toml +++ b/src/utils/pyproject.toml @@ -11,7 +11,7 @@ description = "Shared utilities for Operandi" requires-python = ">=3.10" dependencies = [ "aiofiles>=0.8.0", - "beanie==1.11.7", + "beanie>=2.0.1", "chardet>=5.1.0", "click>=7", "clint==0.5.1", @@ -20,7 +20,7 @@ dependencies = [ "ocrd>=3.9.1", "paramiko>=3.4.0", "pika>=1.2.0", - "pydantic>=1.9.1", + "pydantic>=2.12.5", "pymongo >= 4.3.3", "python-dotenv>=1.0.0", "python-multipart>=0.0.5" From 2c8957912ef496f871de2ad379dece1c3a1fda31 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 22 Dec 2025 15:40:19 +0100 Subject: [PATCH 07/50] fix: pydantic warning --- src/server/operandi_server/models/base.py | 10 +++++----- src/server/operandi_server/models/discovery.py | 2 +- src/server/operandi_server/models/user.py | 4 ++-- src/server/operandi_server/models/workflow.py | 4 ++-- src/server/operandi_server/models/workspace.py | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/server/operandi_server/models/base.py b/src/server/operandi_server/models/base.py index 9d5a9a1f..05a52520 100644 --- a/src/server/operandi_server/models/base.py +++ b/src/server/operandi_server/models/base.py @@ -13,7 +13,7 @@ class Resource(BaseModel): deleted: bool class Config: - allow_population_by_field_name = True + validate_by_name = True class WorkflowArguments(BaseModel): workspace_id: str @@ -23,7 +23,7 @@ class WorkflowArguments(BaseModel): mets_name: Optional[str] = DEFAULT_METS_BASENAME class Config: - allow_population_by_field_name = True + validate_by_name = True class SbatchArguments(BaseModel): partition: str = HPC_NHR_JOB_DEFAULT_PARTITION # partition to be used @@ -31,7 +31,7 @@ class SbatchArguments(BaseModel): ram: int = 64 # RAM (in GB) per job allocated by default class Config: - allow_population_by_field_name = True + validate_by_name = True class OlahdUploadArguments(BaseModel): username: str @@ -39,7 +39,7 @@ class OlahdUploadArguments(BaseModel): endpoint: str class Config: - allow_population_by_field_name = True + validate_by_name = True class MetsUrlRequest(BaseModel): mets_url: str = Field(..., description="The mets url") @@ -47,4 +47,4 @@ class MetsUrlRequest(BaseModel): mets_basename: str = Field(default=DEFAULT_METS_BASENAME, description="The mets file basename") class Config: - allow_population_by_field_name = True + validate_by_name = True diff --git a/src/server/operandi_server/models/discovery.py b/src/server/operandi_server/models/discovery.py index cc0b1d57..1875f22c 100644 --- a/src/server/operandi_server/models/discovery.py +++ b/src/server/operandi_server/models/discovery.py @@ -12,4 +12,4 @@ class PYDiscovery(BaseModel): has_docker: bool = Field(default=False, description="Whether the OCR-D executables run in a Docker container") class Config: - allow_population_by_field_name = True + validate_by_name = True diff --git a/src/server/operandi_server/models/user.py b/src/server/operandi_server/models/user.py index b2edd35a..fa08fa12 100644 --- a/src/server/operandi_server/models/user.py +++ b/src/server/operandi_server/models/user.py @@ -13,7 +13,7 @@ class PYUserAction(BaseModel): action: str = Field(..., description="Description of the user action") class Config: - allow_population_by_field_name = True + validate_by_name = True @staticmethod def from_db_user_account(action: str, db_user_account: DBUserAccount): @@ -37,7 +37,7 @@ class PYUserInfo(BaseModel): details: str = Field(..., description="More details about the account") class Config: - allow_population_by_field_name = True + validate_by_name = True @staticmethod def from_db_user_account(db_user_account: DBUserAccount): diff --git a/src/server/operandi_server/models/workflow.py b/src/server/operandi_server/models/workflow.py index ff0c1f81..243a6b8e 100644 --- a/src/server/operandi_server/models/workflow.py +++ b/src/server/operandi_server/models/workflow.py @@ -19,7 +19,7 @@ class WorkflowRsrc(Resource): producible_file_groups: List[str] class Config: - allow_population_by_field_name = True + validate_by_name = True @staticmethod def from_db_workflow(db_workflow: DBWorkflow): @@ -49,7 +49,7 @@ class WorkflowJobRsrc(Resource): workspace_rsrc: Optional[WorkspaceRsrc] class Config: - allow_population_by_field_name = True + validate_by_name = True @staticmethod def from_db_workflow_job(db_workflow_job: DBWorkflowJob, db_workflow: DBWorkflow, db_workspace: DBWorkspace): diff --git a/src/server/operandi_server/models/workspace.py b/src/server/operandi_server/models/workspace.py index e083ea2e..5ef82bfa 100644 --- a/src/server/operandi_server/models/workspace.py +++ b/src/server/operandi_server/models/workspace.py @@ -23,7 +23,7 @@ class WorkspaceRsrc(Resource): bag_info_adds: Optional[dict] class Config: - allow_population_by_field_name = True + validate_by_name = True @staticmethod def from_db_workspace(db_workspace: DBWorkspace): From cfe45a0e0e6ccb3bd344ab1d5854bb813b8a97e0 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 22 Dec 2025 16:38:08 +0100 Subject: [PATCH 08/50] replace: motor with pymongo async --- src/utils/operandi_utils/database/base.py | 7 ++++--- src/utils/pyproject.toml | 2 +- tests/requirements.txt | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/utils/operandi_utils/database/base.py b/src/utils/operandi_utils/database/base.py index 1984cca2..c48a8690 100644 --- a/src/utils/operandi_utils/database/base.py +++ b/src/utils/operandi_utils/database/base.py @@ -1,7 +1,7 @@ from logging import getLogger from os import environ from beanie import init_beanie -from motor.motor_asyncio import AsyncIOMotorClient +from pymongo import AsyncMongoClient from operandi_utils import call_sync from .models import DBHPCSlurmJob, DBUserAccount, DBWorkflow, DBWorkflowJob, DBWorkspace @@ -37,9 +37,10 @@ async def db_initiate_database( DBWorkflowJob, DBWorkspace ] - client = AsyncIOMotorClient(db_url) + client = AsyncMongoClient(db_url) + db = client[db_name] # Documentation: https://beanie-odm.dev/ - await init_beanie(database=client.get_default_database(default=db_name), document_models=doc_models) + await init_beanie(database=db, document_models=doc_models) @call_sync diff --git a/src/utils/pyproject.toml b/src/utils/pyproject.toml index eed13bc2..dde41ec0 100644 --- a/src/utils/pyproject.toml +++ b/src/utils/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ "paramiko>=3.4.0", "pika>=1.2.0", "pydantic>=2.12.5", - "pymongo >= 4.3.3", + "pymongo >= 4.15.5", "python-dotenv>=1.0.0", "python-multipart>=0.0.5" ] diff --git a/tests/requirements.txt b/tests/requirements.txt index e36d9b36..caed0285 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -2,7 +2,7 @@ autopep8 click generateDS == 2.35.20 pylint -pymongo >= 4.3.3 +pymongo >= 4.15.5 pytest >= 7.0.0 pytest-docker>=1.0.0 requests>2.32.0 From 44d84ed7d60ad9fb1b260bc1daaeacecba78e90b Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 22 Dec 2025 16:48:26 +0100 Subject: [PATCH 09/50] resolve another deprecation --- src/utils/operandi_utils/oton/constants.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/utils/operandi_utils/oton/constants.py b/src/utils/operandi_utils/oton/constants.py index c7ce94c5..8c1788c8 100644 --- a/src/utils/operandi_utils/oton/constants.py +++ b/src/utils/operandi_utils/oton/constants.py @@ -11,7 +11,8 @@ CONST_METS_SOCKET_PATH: str = 'mets_socket_path' CONST_WORKSPACE_DIR: str = 'workspace_dir' -with resources.open_text(package="operandi_utils.oton", resource="ocrd-all-tool.json") as f: +ocrd_all_file = resources.files("operandi_utils.oton") / "ocrd-all-tool.json" +with ocrd_all_file.open("r", encoding="utf-8") as f: OCRD_ALL_JSON = load(f) OTON_LOG_LEVEL = environ.get("OTON_LOG_LEVEL", "INFO") From 4222ede3dbc0e3b071862674a55aba5829e5a00b Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 22 Dec 2025 16:56:35 +0100 Subject: [PATCH 10/50] refactor: remove class Config --- src/server/operandi_server/models/base.py | 22 +++++-------------- .../operandi_server/models/discovery.py | 6 ++--- src/server/operandi_server/models/user.py | 10 +++------ src/server/operandi_server/models/workflow.py | 6 ----- .../operandi_server/models/workspace.py | 3 --- 5 files changed, 11 insertions(+), 36 deletions(-) diff --git a/src/server/operandi_server/models/base.py b/src/server/operandi_server/models/base.py index 05a52520..f1e75d60 100644 --- a/src/server/operandi_server/models/base.py +++ b/src/server/operandi_server/models/base.py @@ -1,10 +1,11 @@ from datetime import datetime -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field from typing import Optional from operandi_utils.hpc.constants import HPC_NHR_JOB_DEFAULT_PARTITION from ..constants import DEFAULT_FILE_GRP, DEFAULT_METS_BASENAME class Resource(BaseModel): + model_config = ConfigDict(validate_by_name=True) user_id: str = Field(..., description="The unique id of the user who created the resource") resource_id: str = Field(..., description="The unique id of the resource") resource_url: str = Field(..., description="The unique URL of the resource") @@ -12,39 +13,28 @@ class Resource(BaseModel): datetime: datetime deleted: bool - class Config: - validate_by_name = True - class WorkflowArguments(BaseModel): + model_config = ConfigDict(validate_by_name=True) workspace_id: str input_file_grp: Optional[str] = DEFAULT_FILE_GRP remove_file_grps: Optional[str] = "" preserve_file_grps: Optional[str] = "" mets_name: Optional[str] = DEFAULT_METS_BASENAME - class Config: - validate_by_name = True - class SbatchArguments(BaseModel): + model_config = ConfigDict(validate_by_name=True) partition: str = HPC_NHR_JOB_DEFAULT_PARTITION # partition to be used cpus: int = 4 # cpus per job allocated by default ram: int = 64 # RAM (in GB) per job allocated by default - class Config: - validate_by_name = True - class OlahdUploadArguments(BaseModel): + model_config = ConfigDict(validate_by_name=True) username: str password: str endpoint: str - class Config: - validate_by_name = True - class MetsUrlRequest(BaseModel): + model_config = ConfigDict(validate_by_name=True) mets_url: str = Field(..., description="The mets url") preserve_file_grps: str = Field(..., description="The file groups to be preserved") mets_basename: str = Field(default=DEFAULT_METS_BASENAME, description="The mets file basename") - - class Config: - validate_by_name = True diff --git a/src/server/operandi_server/models/discovery.py b/src/server/operandi_server/models/discovery.py index 1875f22c..ea38ab68 100644 --- a/src/server/operandi_server/models/discovery.py +++ b/src/server/operandi_server/models/discovery.py @@ -1,7 +1,8 @@ -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field class PYDiscovery(BaseModel): + model_config = ConfigDict(validate_by_name=True) ram: float = Field(default=0.0, description="All available RAM in bytes") cpu_cores: int = Field(default=0, description="Number of available CPU cores") has_cuda: bool = Field(default=False, description="Whether deployment supports NVIDIA's CUDA") @@ -10,6 +11,3 @@ class PYDiscovery(BaseModel): ocrd_all_version: str = Field( default="Ocrd all version not detected", description="Git tag of the ocrd_all version implemented") has_docker: bool = Field(default=False, description="Whether the OCR-D executables run in a Docker container") - - class Config: - validate_by_name = True diff --git a/src/server/operandi_server/models/user.py b/src/server/operandi_server/models/user.py index fa08fa12..2779a948 100644 --- a/src/server/operandi_server/models/user.py +++ b/src/server/operandi_server/models/user.py @@ -1,9 +1,10 @@ -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field from operandi_utils.constants import AccountType from operandi_utils.database.models import DBUserAccount class PYUserAction(BaseModel): + model_config = ConfigDict(validate_by_name=True) institution_id: str = Field(..., description="Institution id of the user") user_id: str = Field(..., description="Unique id of the user") email: str = Field(..., description="Email linked to this User") @@ -12,9 +13,6 @@ class PYUserAction(BaseModel): details: str = Field(..., description="More details about the account") action: str = Field(..., description="Description of the user action") - class Config: - validate_by_name = True - @staticmethod def from_db_user_account(action: str, db_user_account: DBUserAccount): return PYUserAction( @@ -29,6 +27,7 @@ def from_db_user_account(action: str, db_user_account: DBUserAccount): class PYUserInfo(BaseModel): + model_config = ConfigDict(validate_by_name=True) institution_id: str = Field(..., description="Institution id of the user") user_id: str = Field(..., description="Unique id of the user") email: str = Field(..., description="Email linked to this User") @@ -36,9 +35,6 @@ class PYUserInfo(BaseModel): approved_user: bool = Field(False, description="Whether the account was admin approved and fully functional") details: str = Field(..., description="More details about the account") - class Config: - validate_by_name = True - @staticmethod def from_db_user_account(db_user_account: DBUserAccount): return PYUserInfo( diff --git a/src/server/operandi_server/models/workflow.py b/src/server/operandi_server/models/workflow.py index 243a6b8e..377a2c2c 100644 --- a/src/server/operandi_server/models/workflow.py +++ b/src/server/operandi_server/models/workflow.py @@ -18,9 +18,6 @@ class WorkflowRsrc(Resource): executable_steps: List[str] producible_file_groups: List[str] - class Config: - validate_by_name = True - @staticmethod def from_db_workflow(db_workflow: DBWorkflow): return WorkflowRsrc( @@ -48,9 +45,6 @@ class WorkflowJobRsrc(Resource): workflow_rsrc: Optional[WorkflowRsrc] workspace_rsrc: Optional[WorkspaceRsrc] - class Config: - validate_by_name = True - @staticmethod def from_db_workflow_job(db_workflow_job: DBWorkflowJob, db_workflow: DBWorkflow, db_workspace: DBWorkspace): return WorkflowJobRsrc( diff --git a/src/server/operandi_server/models/workspace.py b/src/server/operandi_server/models/workspace.py index 5ef82bfa..39d5b57b 100644 --- a/src/server/operandi_server/models/workspace.py +++ b/src/server/operandi_server/models/workspace.py @@ -22,9 +22,6 @@ class WorkspaceRsrc(Resource): mets_basename: Optional[str] bag_info_adds: Optional[dict] - class Config: - validate_by_name = True - @staticmethod def from_db_workspace(db_workspace: DBWorkspace): return WorkspaceRsrc( From 9ce2dd0db2175d91caa197c77282933ba954b307 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 22 Dec 2025 19:20:25 +0100 Subject: [PATCH 11/50] add: async versions of server tests --- pytest.ini | 2 + src/utils/operandi_utils/database/base.py | 2 +- tests/fixtures/broker.py | 4 +- tests/fixtures/database.py | 22 ++-- tests/fixtures/server.py | 13 +- tests/helpers_asserts.py | 29 ++--- tests/requirements.txt | 6 +- tests/tests_server/test_endpoint_root.py | 6 +- tests/tests_server/test_endpoint_workflow.py | 112 ++++++++---------- tests/tests_server/test_endpoint_workspace.py | 57 +++++---- 10 files changed, 131 insertions(+), 122 deletions(-) create mode 100644 pytest.ini diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..2f4c80e3 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +asyncio_mode = auto diff --git a/src/utils/operandi_utils/database/base.py b/src/utils/operandi_utils/database/base.py index c48a8690..8c7a6ed3 100644 --- a/src/utils/operandi_utils/database/base.py +++ b/src/utils/operandi_utils/database/base.py @@ -38,7 +38,7 @@ async def db_initiate_database( DBWorkspace ] client = AsyncMongoClient(db_url) - db = client[db_name] + db = client.get_default_database(default=db_name) # Documentation: https://beanie-odm.dev/ await init_beanie(database=db, document_models=doc_models) diff --git a/tests/fixtures/broker.py b/tests/fixtures/broker.py index 5b689493..d1040d46 100644 --- a/tests/fixtures/broker.py +++ b/tests/fixtures/broker.py @@ -5,8 +5,8 @@ @fixture(scope="session", name="service_broker") -def fixture_operandi_broker(): - assert_availability_db(environ.get("OPERANDI_DB_URL")) +async def fixture_operandi_broker(): + await assert_availability_db(environ.get("OPERANDI_DB_URL")) service_broker = ServiceBroker(test_sbatch=True) yield service_broker service_broker.kill_workers() diff --git a/tests/fixtures/database.py b/tests/fixtures/database.py index 5817bb61..50ee62fe 100644 --- a/tests/fixtures/database.py +++ b/tests/fixtures/database.py @@ -1,6 +1,6 @@ from os import environ -from pymongo import MongoClient -from pytest import fixture +from pymongo import AsyncMongoClient +from pytest import fixture, mark from tests.helpers_asserts import assert_availability_db @@ -8,37 +8,37 @@ @fixture(scope="session") -def fixture_test_mongo_client(): - assert_availability_db(environ.get("OPERANDI_DB_URL")) - mongo_client = MongoClient( +async def fixture_test_mongo_client(): + await assert_availability_db(environ.get("OPERANDI_DB_URL")) + mongo_client = AsyncMongoClient( environ.get("OPERANDI_DB_URL"), serverSelectionTimeoutMS=3000 )[environ.get("OPERANDI_DB_NAME")] # drop previous test entries from the test database for db_collection in DB_DROP_COLLECTIONS: - mongo_client[db_collection].drop() + await mongo_client[db_collection].drop() yield mongo_client @fixture(scope="session", name="db_hpc_slurm_jobs") -def fixture_db_hpc_slurm_jobs_collection(fixture_test_mongo_client): +async def fixture_db_hpc_slurm_jobs_collection(fixture_test_mongo_client): yield fixture_test_mongo_client["hpc_slurm_jobs"] @fixture(scope="session", name="db_user_accounts") -def fixture_db_user_accounts_collection(fixture_test_mongo_client): +async def fixture_db_user_accounts_collection(fixture_test_mongo_client): yield fixture_test_mongo_client["user_accounts"] @fixture(scope="session", name="db_workflows") -def fixture_db_workflows_collection(fixture_test_mongo_client): +async def fixture_db_workflows_collection(fixture_test_mongo_client): yield fixture_test_mongo_client["workflows"] @fixture(scope="session", name="db_workflow_jobs") -def fixture_db_workflow_jobs_collection(fixture_test_mongo_client): +async def fixture_db_workflow_jobs_collection(fixture_test_mongo_client): yield fixture_test_mongo_client["workflow_jobs"] @fixture(scope="session", name="db_workspaces") -def fixture_db_workspaces_collection(fixture_test_mongo_client): +async def fixture_db_workspaces_collection(fixture_test_mongo_client): yield fixture_test_mongo_client["workspaces"] diff --git a/tests/fixtures/server.py b/tests/fixtures/server.py index c9ce9dcc..681c3d42 100644 --- a/tests/fixtures/server.py +++ b/tests/fixtures/server.py @@ -1,14 +1,15 @@ +from httpx import ASGITransport, AsyncClient from os import environ from pytest import fixture -from fastapi.testclient import TestClient - from operandi_server import OperandiServer from tests.helpers_asserts import assert_availability_db @fixture(scope="package", name="operandi") -def fixture_operandi_server(): - assert_availability_db(environ.get("OPERANDI_DB_URL")) - operandi_app = OperandiServer() - with TestClient(operandi_app) as client: +async def fixture_operandi_server(): + await assert_availability_db(environ.get("OPERANDI_DB_URL")) + async with AsyncClient( + transport=ASGITransport(app=OperandiServer()), + base_url="http://testserver" + ) as client: yield client diff --git a/tests/helpers_asserts.py b/tests/helpers_asserts.py index 4edfec2e..c55c61b3 100644 --- a/tests/helpers_asserts.py +++ b/tests/helpers_asserts.py @@ -1,18 +1,19 @@ +from pytest import mark from os.path import exists, isdir, isfile -from requests import get -from time import sleep - - -def assert_availability_db(url, tries: int = 6, wait_time: int = 10): - http_url = url.replace("mongodb", "http") - response = None - while tries > 0: - response = get(http_url) - if response.status_code == 200: - break - sleep(wait_time) - tries -= 1 - assert response.status_code == 200, f"DB not running on: {url}" +from pymongo import AsyncMongoClient +from pymongo.errors import ServerSelectionTimeoutError + + +@mark.asyncio +async def assert_availability_db(db_url: str, timeout_ms: int = 5000) -> bool: + client = AsyncMongoClient(db_url, serverSelectionTimeoutMS=timeout_ms) + try: + await client.admin.command("ping") + return True + except ServerSelectionTimeoutError: + return False + finally: + await client.close() def assert_exists_db_resource(db_resource, resource_key, resource_id): diff --git a/tests/requirements.txt b/tests/requirements.txt index caed0285..2f7c797f 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,10 +1,12 @@ autopep8 click generateDS == 2.35.20 +httpx >= 0.28.0 pylint pymongo >= 4.15.5 pytest >= 7.0.0 -pytest-docker>=1.0.0 -requests>2.32.0 +pytest-asyncio >= 1.3.0 +pytest-docker >= 1.0.0 +requests > 2.32.0 twine wheel diff --git a/tests/tests_server/test_endpoint_root.py b/tests/tests_server/test_endpoint_root.py index da7a4fbc..9da63808 100644 --- a/tests/tests_server/test_endpoint_root.py +++ b/tests/tests_server/test_endpoint_root.py @@ -1,7 +1,9 @@ +from pytest import mark from .helpers_asserts import assert_response_status_code -def test_get_root_info(operandi): - response = operandi.get('/') +@mark.asyncio +async def test_get_root_info(operandi): + response = await operandi.get('/') assert_response_status_code(response.status_code, expected_floor=2) assert response.json()['message'] == "The home page of the OPERANDI Server" diff --git a/tests/tests_server/test_endpoint_workflow.py b/tests/tests_server/test_endpoint_workflow.py index 2fc107c8..e867b77b 100644 --- a/tests/tests_server/test_endpoint_workflow.py +++ b/tests/tests_server/test_endpoint_workflow.py @@ -1,12 +1,15 @@ from io import BytesIO +from pytest import mark from tests.helpers_asserts import assert_exists_db_resource from tests.constants import WORKFLOW_DUMMY_TEXT from .helpers_asserts import assert_local_dir_workflow, assert_response_status_code -def test_post_workflow_script(operandi, auth, db_workflows, bytes_template_workflow): + +@mark.asyncio +async def test_post_workflow_script(operandi, auth, db_workflows, bytes_template_workflow): # Post a new workflow script wf_detail = "Test template workflow with mets server" - response = operandi.post( + response = await operandi.post( url=f"/workflow?details={wf_detail}", files={"nextflow_script": bytes_template_workflow}, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workflow_id = response.json()['resource_id'] @@ -16,10 +19,12 @@ def test_post_workflow_script(operandi, auth, db_workflows, bytes_template_workf assert db_workflow["details"] == wf_detail assert db_workflow["uses_mets_server"] == False -def test_post_workflow_script_with_ms(operandi, auth, db_workflows, bytes_template_workflow_with_ms): + +@mark.asyncio +async def _test_post_workflow_script_with_ms(operandi, auth, db_workflows, bytes_template_workflow_with_ms): # Post a new workflow script wf_detail = "Test template workflow with mets server" - response = operandi.post( + response = await operandi.post( url=f"/workflow?details={wf_detail}", files={"nextflow_script": bytes_template_workflow_with_ms}, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workflow_id = response.json()['resource_id'] @@ -30,14 +35,15 @@ def test_post_workflow_script_with_ms(operandi, auth, db_workflows, bytes_templa assert db_workflow["uses_mets_server"] == True -def test_put_workflow_script( +@mark.asyncio +async def _test_put_workflow_script( operandi, auth, db_workflows, bytes_template_workflow_with_ms, bytes_default_workflow_with_ms ): put_workflow_id = "put_workflow_id" # The first put request creates a new workflow files = {"nextflow_script": bytes_template_workflow_with_ms} wf_detail = "Test template workflow with mets server" - response = operandi.put(url=f"/workflow/{put_workflow_id}?details={wf_detail}", files=files, auth=auth) + response = await operandi.put(url=f"/workflow/{put_workflow_id}?details={wf_detail}", files=files, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workflow_id = response.json()['resource_id'] assert_local_dir_workflow(workflow_id) @@ -56,7 +62,7 @@ def test_put_workflow_script( # The second put request replaces the previously created workflow files = {"nextflow_script": bytes_default_workflow_with_ms} wf_detail_put = "Test default workflow with mets server" - response = operandi.put(url=f"/workflow/{put_workflow_id}?details={wf_detail_put}", files=files, auth=auth) + response = await operandi.put(url=f"/workflow/{put_workflow_id}?details={wf_detail_put}", files=files, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workflow_id = response.json()['resource_id'] assert_local_dir_workflow(workflow_id) @@ -79,7 +85,9 @@ def test_put_workflow_script( assert workflow_details1 != workflow_details2, \ f"Workflow details should not, but match: {workflow_details1} == {workflow_details2}" -def test_put_workflow_not_allowed(operandi, auth, bytes_template_workflow_with_ms): + +@mark.asyncio +async def _test_put_workflow_not_allowed(operandi, auth, bytes_template_workflow_with_ms): production_workflow_ids = [ "template_workflow", "default_workflow", "odem_workflow", "template_workflow_with_MS", "default_workflow_with_MS", "odem_workflow_with_MS" @@ -88,63 +96,64 @@ def test_put_workflow_not_allowed(operandi, auth, bytes_template_workflow_with_m # Try to replace a production workflow which should raise an error code of 405 files = {"nextflow_script": bytes_template_workflow_with_ms} for workflow_id in production_workflow_ids: - response = operandi.put(url=f"/workflow/{workflow_id}", files=files, auth=auth) + response = await operandi.put(url=f"/workflow/{workflow_id}", files=files, auth=auth) assert_response_status_code(response.status_code, expected_floor=4) # Not implemented/planned in the WebAPI -def _test_delete_workflow(): +@mark.asyncio +async def _test_delete_workflow(): pass # Not implemented/planned in the WebAPI -def _test_delete_workflow_non_existing(): +@mark.asyncio +async def _test_delete_workflow_non_existing(): pass -def test_get_workflow_script(operandi, auth, bytes_template_workflow): +@mark.asyncio +async def _test_get_workflow_script(operandi, auth, bytes_template_workflow): # Post a new workflow script - response = operandi.post(url="/workflow", files={"nextflow_script": bytes_template_workflow}, auth=auth) + response = await operandi.post(url="/workflow", files={"nextflow_script": bytes_template_workflow}, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workflow_id = response.json()['resource_id'] assert_local_dir_workflow(workflow_id) # Get the same workflow script - response = operandi.get(url=f"/workflow/{workflow_id}", auth=auth) + response = await operandi.get(url=f"/workflow/{workflow_id}", auth=auth) assert_response_status_code(response.status_code, expected_floor=2) print(response.headers) assert response.headers.get('content-disposition').find(".nf") > -1, \ "filename should have the '.nf' extension" -def test_get_workflow_non_existing(operandi, auth): +@mark.asyncio +async def _test_get_workflow_non_existing(operandi, auth): non_workflow_id = "non_existing_workflow_id" - response = operandi.get(url=f"/workflow/{non_workflow_id}", auth=auth) + response = await operandi.get(url=f"/workflow/{non_workflow_id}", auth=auth) assert_response_status_code(response.status_code, expected_floor=4) # This is already implemented as a part of the harvester full cycle test -def _test_run_operandi_workflow(): +@mark.asyncio +async def _test_run_operandi_workflow(): pass # This is already implemented as a part of the harvester full cycle test -def _test_running_workflow_job_status(): +@mark.asyncio +async def _test_running_workflow_job_status(): pass -# Added by Faizan -def test_convert_txt_to_nextflow_success(operandi, auth): - """ - Test the successful conversion of a text file to a Nextflow (.nf) file. - """ - - # Convert the dummy text to bytes and create an in-memory file-like object +@mark.asyncio +async def _test_convert_txt_to_nextflow_success(operandi, auth): dummy_file = BytesIO(WORKFLOW_DUMMY_TEXT.encode('utf-8')) files = {"txt_file": ("dummy.txt", dummy_file, "text/plain")} params = {"environment": "local", "with_mets_server": False} # Simulate uploading the text file for conversion via POST - response = operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) + response = await operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) nf_file_content = response.content.decode('utf-8') # Verify the status code and content assert_response_status_code(response.status_code, expected_floor=2) @@ -154,18 +163,14 @@ def test_convert_txt_to_nextflow_success(operandi, auth): assert "merging_mets" in nf_file_content -def test_convert_txt_to_nextflow_success_with_mets_server(operandi, auth): - """ - Test the successful conversion of a text file to a Nextflow (.nf) file with mets server. - """ - - # Convert the dummy text to bytes and create an in-memory file-like object +@mark.asyncio +async def _test_convert_txt_to_nextflow_success_with_mets_server(operandi, auth): dummy_file = BytesIO(WORKFLOW_DUMMY_TEXT.encode('utf-8')) files = {"txt_file": ("dummy.txt", dummy_file, "text/plain")} params = {"environment": "local", "with_mets_server": True} # Simulate uploading the text file for conversion via POST - response = operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) + response = await operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) nf_file_content = response.content.decode('utf-8') # Verify the status code and content assert_response_status_code(response.status_code, expected_floor=2) @@ -174,51 +179,40 @@ def test_convert_txt_to_nextflow_success_with_mets_server(operandi, auth): assert "params.mets_socket_path" in nf_file_content assert "merging_mets" not in nf_file_content -# Added by Faizan -def test_convert_txt_to_nextflow_auth_failure(operandi): - """ - Test the conversion process when authentication fails. - """ + +@mark.asyncio +async def _test_convert_txt_to_nextflow_auth_failure(operandi): dummy_text = "Some dummy text" dummy_file = BytesIO(dummy_text.encode('utf-8')) files = {"txt_file": ("dummy.txt", dummy_file, "text/plain")} params = {"environment": "local", "with_mets_server": False} auth = ('invalid_user', 'invalid_password') - response = operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) + response = await operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) # Verify the status code and error message for failed authentication assert_response_status_code(response.status_code, expected_floor=4) assert response.json()["detail"] == "Not found user account for email: invalid_user" -# Added by Faizan -def test_convert_txt_to_nextflow_validator_failure(operandi, auth): - """ - Test the conversion process when there's a validation or conversion failure. - """ - # Providing an invalid text input to trigger the ValueError in the conversion +@mark.asyncio +async def _test_convert_txt_to_nextflow_validator_failure(operandi, auth): invalid_text = "Invalid ocrd process text" dummy_file = BytesIO(invalid_text.encode('utf-8')) files = {"txt_file": ("invalid.txt", dummy_file, "text/plain")} params = {"environment": "local", "with_mets_server": False} - response = operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) + response = await operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) assert_response_status_code(response.status_code, expected_floor=4) assert "Failed to validate the ocrd process workflow txt file" in response.json()["detail"] -# Added by Faizan -def test_convert_txt_to_nextflow_docker_success(operandi, auth): - """ - Test the successful conversion of a text file to a Nextflow (.nf) file. - """ - - # Convert the dummy text to bytes and create an in-memory file-like object +@mark.asyncio +async def _test_convert_txt_to_nextflow_docker_success(operandi, auth): dummy_file = BytesIO(WORKFLOW_DUMMY_TEXT.encode('utf-8')) files = {"txt_file": ("dummy.txt", dummy_file, "text/plain")} params = {"environment": "docker", "with_mets_server": False} - response = operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) + response = await operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) nf_file_content = response.content.decode('utf-8') assert_response_status_code(response.status_code, expected_floor=2) assert "params.mets_path" in nf_file_content @@ -227,17 +221,13 @@ def test_convert_txt_to_nextflow_docker_success(operandi, auth): assert "merging_mets" in nf_file_content -def test_convert_txt_to_nextflow_docker_success_with_mets_server(operandi, auth): - """ - Test the successful conversion of a text file to a Nextflow (.nf) file with mets server. - """ - - # Convert the dummy text to bytes and create an in-memory file-like object +@mark.asyncio +async def _test_convert_txt_to_nextflow_docker_success_with_mets_server(operandi, auth): dummy_file = BytesIO(WORKFLOW_DUMMY_TEXT.encode('utf-8')) files = {"txt_file": ("dummy.txt", dummy_file, "text/plain")} params = {"environment": "docker", "with_mets_server": True} - response = operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) + response = await operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) nf_file_content = response.content.decode('utf-8') assert_response_status_code(response.status_code, expected_floor=2) assert "params.mets_path" in nf_file_content diff --git a/tests/tests_server/test_endpoint_workspace.py b/tests/tests_server/test_endpoint_workspace.py index 8c307ba2..0a7246c4 100644 --- a/tests/tests_server/test_endpoint_workspace.py +++ b/tests/tests_server/test_endpoint_workspace.py @@ -6,11 +6,14 @@ from operandi_server.constants import SERVER_WORKSPACES_ROUTER +from pytest import mark + from tests.helpers_asserts import assert_exists_db_resource, assert_exists_db_resource_not from .helpers_asserts import assert_local_dir_workspace, assert_local_dir_workspace_not, assert_response_status_code -def _test_post_workspace_url(operandi, auth, db_workspaces): +@mark.asyncio +async def _test_post_workspace_url(operandi, auth, db_workspaces): ws_detail = "Test url workspace - https://content.staatsbibliothek-berlin.de/dc/PPN631277528.mets.xml" mets_url = "https://content.staatsbibliothek-berlin.de/dc/PPN631277528.mets.xml" # Separate with `,` to add a second file group to be preserved, e.g., `DEFAULT,MAX` @@ -21,7 +24,7 @@ def _test_post_workspace_url(operandi, auth, db_workspaces): f"&preserve_file_grps={preserve_file_grps}" f"&details={ws_detail}" ) - response = operandi.post(url=req_url, auth=auth) + response = await operandi.post(url=req_url, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workspace_id = response.json()['resource_id'] assert_local_dir_workspace(workspace_id) @@ -30,9 +33,10 @@ def _test_post_workspace_url(operandi, auth, db_workspaces): assert db_workspace["details"] == ws_detail -def test_post_workspace_zip(operandi, auth, db_workspaces, bytes_dummy_workspace): +@mark.asyncio +async def test_post_workspace_zip(operandi, auth, db_workspaces, bytes_dummy_workspace): ws_detail = "Test dummy workspace" - response = operandi.post( + response = await operandi.post( url=f"/workspace?details={ws_detail}", files={"workspace": bytes_dummy_workspace}, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workspace_id = response.json()['resource_id'] @@ -42,9 +46,10 @@ def test_post_workspace_zip(operandi, auth, db_workspaces, bytes_dummy_workspace assert db_workspace["details"] == ws_detail -def test_post_workspace_zip_different_mets(operandi, auth, db_workspaces, bytes_ws_different_mets): +@mark.asyncio +async def test_post_workspace_zip_different_mets(operandi, auth, db_workspaces, bytes_ws_different_mets): ws_detail = "Test different mets basename workspace" - response = operandi.post( + response = await operandi.post( url=f"/workspace?details={ws_detail}", files={"workspace": bytes_ws_different_mets}, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workspace_id = response.json()['resource_id'] @@ -54,14 +59,15 @@ def test_post_workspace_zip_different_mets(operandi, auth, db_workspaces, bytes_ assert db_workspace["details"] == ws_detail -def test_put_workspace_zip(operandi, auth, db_workspaces, bytes_dummy_workspace, bytes_ws_different_mets): +@mark.asyncio +async def test_put_workspace_zip(operandi, auth, db_workspaces, bytes_dummy_workspace, bytes_ws_different_mets): put_workspace_id = "put_workspace_id" ws_detail = "Test workspace" ws_detail_put = "Test put workspace" req_url = f"/workspace/{put_workspace_id}" # The first put request creates a new workspace - response = operandi.put( + response = await operandi.put( url=f"{req_url}?details={ws_detail}", files={"workspace": bytes_dummy_workspace}, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workspace_id = response.json()['resource_id'] @@ -75,7 +81,7 @@ def test_put_workspace_zip(operandi, auth, db_workspaces, bytes_dummy_workspace, ws_detail1 = db_workspace["details"] # The second put request replaces the previously created workspace - response = operandi.put( + response = await operandi.put( url=f"{req_url}?details={ws_detail_put}", files={"workspace": bytes_ws_different_mets}, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workspace_id = response.json()['resource_id'] @@ -92,9 +98,10 @@ def test_put_workspace_zip(operandi, auth, db_workspaces, bytes_dummy_workspace, assert ws_detail1 != ws_detail2, \ f"Workspace details should not, but match: {ws_detail1} == {ws_detail2}" -def test_delete_workspace(operandi, auth, db_workspaces, bytes_ws_different_mets): +@mark.asyncio +async def test_delete_workspace(operandi, auth, db_workspaces, bytes_ws_different_mets): # Post a workspace - response = operandi.post(url="/workspace", files={"workspace": bytes_ws_different_mets}, auth=auth) + response = await operandi.post(url="/workspace", files={"workspace": bytes_ws_different_mets}, auth=auth) posted_workspace_id = response.json()['resource_id'] assert_response_status_code(response.status_code, expected_floor=2) assert_local_dir_workspace(posted_workspace_id) @@ -103,15 +110,16 @@ def test_delete_workspace(operandi, auth, db_workspaces, bytes_ws_different_mets # Delete the previously posted workspace delete_workspace_id = posted_workspace_id - response = operandi.delete(url=f"/workspace/{delete_workspace_id}", auth=auth) + response = await operandi.delete(url=f"/workspace/{delete_workspace_id}", auth=auth) assert_response_status_code(response.status_code, expected_floor=2) assert_local_dir_workspace_not(delete_workspace_id) db_deleted_workspace = db_workspaces.find_one({"workspace_id": delete_workspace_id}) assert_exists_db_resource_not(db_deleted_workspace, delete_workspace_id) -def test_delete_workspace_non_existing(operandi, auth, bytes_ws_different_mets): - response = operandi.post(url="/workspace", files={"workspace": bytes_ws_different_mets}, auth=auth) +@mark.asyncio +async def test_delete_workspace_non_existing(operandi, auth, bytes_ws_different_mets): + response = await operandi.post(url="/workspace", files={"workspace": bytes_ws_different_mets}, auth=auth) posted_workspace_id = response.json()['resource_id'] delete_workspace_id = posted_workspace_id response = operandi.delete(f"/workspace/{delete_workspace_id}", auth=auth) @@ -120,10 +128,11 @@ def test_delete_workspace_non_existing(operandi, auth, bytes_ws_different_mets): assert_response_status_code(response.status_code, expected_floor=4) # Not available -def test_get_workspace(operandi, auth, bytes_ws_different_mets): - response = operandi.post(url="/workspace", files={"workspace": bytes_ws_different_mets}, auth=auth) +@mark.asyncio +async def test_get_workspace(operandi, auth, bytes_ws_different_mets): + response = await operandi.post(url="/workspace", files={"workspace": bytes_ws_different_mets}, auth=auth) workspace_id = response.json()['resource_id'] - response = operandi.get(url=f"/workspace/{workspace_id}", auth=auth) + response = await operandi.get(url=f"/workspace/{workspace_id}", auth=auth) assert_response_status_code(response.status_code, expected_floor=2) print(response.headers) assert response.headers.get('content-type').find("zip") > -1, \ @@ -137,14 +146,16 @@ def test_get_workspace(operandi, auth, bytes_ws_different_mets): filePtr.write(chunk) -def test_get_workspace_non_existing(operandi, auth): +@mark.asyncio +async def test_get_workspace_non_existing(operandi, auth): non_workspace_id = "non_existing_workspace_id" - response = operandi.get(url=f"/workspace/{non_workspace_id}", auth=auth) + response = await operandi.get(url=f"/workspace/{non_workspace_id}", auth=auth) assert_response_status_code(response.status_code, expected_floor=4) -def test_delete_file_groups(operandi, auth, db_workspaces, bytes_dummy_workspace): - response = operandi.post(url="/workspace", files={"workspace": bytes_dummy_workspace}, auth=auth) +@mark.asyncio +async def test_delete_file_groups(operandi, auth, db_workspaces, bytes_dummy_workspace): + response = await operandi.post(url="/workspace", files={"workspace": bytes_dummy_workspace}, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workspace_id = response.json()['resource_id'] assert_local_dir_workspace(workspace_id) @@ -159,7 +170,7 @@ def test_delete_file_groups(operandi, auth, db_workspaces, bytes_dummy_workspace # Remove non-existing file groups - silently ignored remove_file_grps = "DEFAULT" req_url = f"/remove_file_group/{workspace_id}?remove_file_grps={remove_file_grps}" - response = operandi.delete(url=req_url, auth=auth) + response = await operandi.delete(url=req_url, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workspace.reload_mets() assert len(workspace.mets.file_groups) == file_groups_amount_old @@ -167,7 +178,7 @@ def test_delete_file_groups(operandi, auth, db_workspaces, bytes_dummy_workspace # Remove existing file group remove_file_grps = "OCR-D-IMG" req_url = f"/remove_file_group/{workspace_id}?remove_file_grps={remove_file_grps}" - response = operandi.delete(url=req_url, auth=auth) + response = await operandi.delete(url=req_url, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workspace.reload_mets() assert len(workspace.mets.file_groups) == file_groups_amount_old - 1 From a4347d027419b4034035729df91e23191536481f Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 22 Dec 2025 22:19:09 +0100 Subject: [PATCH 12/50] activate all workflow tests again --- tests/tests_server/test_endpoint_workflow.py | 26 ++++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/tests_server/test_endpoint_workflow.py b/tests/tests_server/test_endpoint_workflow.py index e867b77b..d291004c 100644 --- a/tests/tests_server/test_endpoint_workflow.py +++ b/tests/tests_server/test_endpoint_workflow.py @@ -21,7 +21,7 @@ async def test_post_workflow_script(operandi, auth, db_workflows, bytes_template @mark.asyncio -async def _test_post_workflow_script_with_ms(operandi, auth, db_workflows, bytes_template_workflow_with_ms): +async def test_post_workflow_script_with_ms(operandi, auth, db_workflows, bytes_template_workflow_with_ms): # Post a new workflow script wf_detail = "Test template workflow with mets server" response = await operandi.post( @@ -36,7 +36,7 @@ async def _test_post_workflow_script_with_ms(operandi, auth, db_workflows, bytes @mark.asyncio -async def _test_put_workflow_script( +async def test_put_workflow_script( operandi, auth, db_workflows, bytes_template_workflow_with_ms, bytes_default_workflow_with_ms ): put_workflow_id = "put_workflow_id" @@ -87,7 +87,7 @@ async def _test_put_workflow_script( @mark.asyncio -async def _test_put_workflow_not_allowed(operandi, auth, bytes_template_workflow_with_ms): +async def test_put_workflow_not_allowed(operandi, auth, bytes_template_workflow_with_ms): production_workflow_ids = [ "template_workflow", "default_workflow", "odem_workflow", "template_workflow_with_MS", "default_workflow_with_MS", "odem_workflow_with_MS" @@ -102,18 +102,18 @@ async def _test_put_workflow_not_allowed(operandi, auth, bytes_template_workflow # Not implemented/planned in the WebAPI @mark.asyncio -async def _test_delete_workflow(): +async def test_delete_workflow(): pass # Not implemented/planned in the WebAPI @mark.asyncio -async def _test_delete_workflow_non_existing(): +async def test_delete_workflow_non_existing(): pass @mark.asyncio -async def _test_get_workflow_script(operandi, auth, bytes_template_workflow): +async def test_get_workflow_script(operandi, auth, bytes_template_workflow): # Post a new workflow script response = await operandi.post(url="/workflow", files={"nextflow_script": bytes_template_workflow}, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) @@ -128,7 +128,7 @@ async def _test_get_workflow_script(operandi, auth, bytes_template_workflow): @mark.asyncio -async def _test_get_workflow_non_existing(operandi, auth): +async def test_get_workflow_non_existing(operandi, auth): non_workflow_id = "non_existing_workflow_id" response = await operandi.get(url=f"/workflow/{non_workflow_id}", auth=auth) assert_response_status_code(response.status_code, expected_floor=4) @@ -147,7 +147,7 @@ async def _test_running_workflow_job_status(): @mark.asyncio -async def _test_convert_txt_to_nextflow_success(operandi, auth): +async def test_convert_txt_to_nextflow_success(operandi, auth): dummy_file = BytesIO(WORKFLOW_DUMMY_TEXT.encode('utf-8')) files = {"txt_file": ("dummy.txt", dummy_file, "text/plain")} params = {"environment": "local", "with_mets_server": False} @@ -164,7 +164,7 @@ async def _test_convert_txt_to_nextflow_success(operandi, auth): @mark.asyncio -async def _test_convert_txt_to_nextflow_success_with_mets_server(operandi, auth): +async def test_convert_txt_to_nextflow_success_with_mets_server(operandi, auth): dummy_file = BytesIO(WORKFLOW_DUMMY_TEXT.encode('utf-8')) files = {"txt_file": ("dummy.txt", dummy_file, "text/plain")} params = {"environment": "local", "with_mets_server": True} @@ -181,7 +181,7 @@ async def _test_convert_txt_to_nextflow_success_with_mets_server(operandi, auth) @mark.asyncio -async def _test_convert_txt_to_nextflow_auth_failure(operandi): +async def test_convert_txt_to_nextflow_auth_failure(operandi): dummy_text = "Some dummy text" dummy_file = BytesIO(dummy_text.encode('utf-8')) files = {"txt_file": ("dummy.txt", dummy_file, "text/plain")} @@ -195,7 +195,7 @@ async def _test_convert_txt_to_nextflow_auth_failure(operandi): @mark.asyncio -async def _test_convert_txt_to_nextflow_validator_failure(operandi, auth): +async def test_convert_txt_to_nextflow_validator_failure(operandi, auth): invalid_text = "Invalid ocrd process text" dummy_file = BytesIO(invalid_text.encode('utf-8')) files = {"txt_file": ("invalid.txt", dummy_file, "text/plain")} @@ -207,7 +207,7 @@ async def _test_convert_txt_to_nextflow_validator_failure(operandi, auth): @mark.asyncio -async def _test_convert_txt_to_nextflow_docker_success(operandi, auth): +async def test_convert_txt_to_nextflow_docker_success(operandi, auth): dummy_file = BytesIO(WORKFLOW_DUMMY_TEXT.encode('utf-8')) files = {"txt_file": ("dummy.txt", dummy_file, "text/plain")} params = {"environment": "docker", "with_mets_server": False} @@ -222,7 +222,7 @@ async def _test_convert_txt_to_nextflow_docker_success(operandi, auth): @mark.asyncio -async def _test_convert_txt_to_nextflow_docker_success_with_mets_server(operandi, auth): +async def test_convert_txt_to_nextflow_docker_success_with_mets_server(operandi, auth): dummy_file = BytesIO(WORKFLOW_DUMMY_TEXT.encode('utf-8')) files = {"txt_file": ("dummy.txt", dummy_file, "text/plain")} params = {"environment": "docker", "with_mets_server": True} From b4d12e4de1b402440172be5bc1f072229bb3c69e Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 22 Dec 2025 22:32:05 +0100 Subject: [PATCH 13/50] revert back to motor --- src/utils/operandi_utils/database/base.py | 3 +- src/utils/pyproject.toml | 1 + tests/fixtures/database.py | 22 ++--- tests/fixtures/server.py | 12 ++- tests/helpers_asserts.py | 13 ++- tests/requirements.txt | 1 + tests/tests_server/test_endpoint_root.py | 6 +- tests/tests_server/test_endpoint_workflow.py | 89 ++++++++----------- tests/tests_server/test_endpoint_workspace.py | 57 +++++------- 9 files changed, 95 insertions(+), 109 deletions(-) diff --git a/src/utils/operandi_utils/database/base.py b/src/utils/operandi_utils/database/base.py index 8c7a6ed3..68360d0e 100644 --- a/src/utils/operandi_utils/database/base.py +++ b/src/utils/operandi_utils/database/base.py @@ -2,6 +2,7 @@ from os import environ from beanie import init_beanie from pymongo import AsyncMongoClient +from motor.motor_asyncio import AsyncIOMotorClient from operandi_utils import call_sync from .models import DBHPCSlurmJob, DBUserAccount, DBWorkflow, DBWorkflowJob, DBWorkspace @@ -37,7 +38,7 @@ async def db_initiate_database( DBWorkflowJob, DBWorkspace ] - client = AsyncMongoClient(db_url) + client = AsyncIOMotorClient(db_url) db = client.get_default_database(default=db_name) # Documentation: https://beanie-odm.dev/ await init_beanie(database=db, document_models=doc_models) diff --git a/src/utils/pyproject.toml b/src/utils/pyproject.toml index dde41ec0..b1e3b5f9 100644 --- a/src/utils/pyproject.toml +++ b/src/utils/pyproject.toml @@ -16,6 +16,7 @@ dependencies = [ "click>=7", "clint==0.5.1", "loguru>=0.6.0", + "motor >= 3.7.1", "httpx>=0.24.0", "ocrd>=3.9.1", "paramiko>=3.4.0", diff --git a/tests/fixtures/database.py b/tests/fixtures/database.py index 50ee62fe..5817bb61 100644 --- a/tests/fixtures/database.py +++ b/tests/fixtures/database.py @@ -1,6 +1,6 @@ from os import environ -from pymongo import AsyncMongoClient -from pytest import fixture, mark +from pymongo import MongoClient +from pytest import fixture from tests.helpers_asserts import assert_availability_db @@ -8,37 +8,37 @@ @fixture(scope="session") -async def fixture_test_mongo_client(): - await assert_availability_db(environ.get("OPERANDI_DB_URL")) - mongo_client = AsyncMongoClient( +def fixture_test_mongo_client(): + assert_availability_db(environ.get("OPERANDI_DB_URL")) + mongo_client = MongoClient( environ.get("OPERANDI_DB_URL"), serverSelectionTimeoutMS=3000 )[environ.get("OPERANDI_DB_NAME")] # drop previous test entries from the test database for db_collection in DB_DROP_COLLECTIONS: - await mongo_client[db_collection].drop() + mongo_client[db_collection].drop() yield mongo_client @fixture(scope="session", name="db_hpc_slurm_jobs") -async def fixture_db_hpc_slurm_jobs_collection(fixture_test_mongo_client): +def fixture_db_hpc_slurm_jobs_collection(fixture_test_mongo_client): yield fixture_test_mongo_client["hpc_slurm_jobs"] @fixture(scope="session", name="db_user_accounts") -async def fixture_db_user_accounts_collection(fixture_test_mongo_client): +def fixture_db_user_accounts_collection(fixture_test_mongo_client): yield fixture_test_mongo_client["user_accounts"] @fixture(scope="session", name="db_workflows") -async def fixture_db_workflows_collection(fixture_test_mongo_client): +def fixture_db_workflows_collection(fixture_test_mongo_client): yield fixture_test_mongo_client["workflows"] @fixture(scope="session", name="db_workflow_jobs") -async def fixture_db_workflow_jobs_collection(fixture_test_mongo_client): +def fixture_db_workflow_jobs_collection(fixture_test_mongo_client): yield fixture_test_mongo_client["workflow_jobs"] @fixture(scope="session", name="db_workspaces") -async def fixture_db_workspaces_collection(fixture_test_mongo_client): +def fixture_db_workspaces_collection(fixture_test_mongo_client): yield fixture_test_mongo_client["workspaces"] diff --git a/tests/fixtures/server.py b/tests/fixtures/server.py index 681c3d42..90592636 100644 --- a/tests/fixtures/server.py +++ b/tests/fixtures/server.py @@ -1,10 +1,19 @@ -from httpx import ASGITransport, AsyncClient +from fastapi.testclient import TestClient from os import environ from pytest import fixture from operandi_server import OperandiServer from tests.helpers_asserts import assert_availability_db +@fixture(scope="package", name="operandi") +def fixture_operandi_server(): + assert_availability_db(environ.get("OPERANDI_DB_URL")) + operandi_app = OperandiServer() + with TestClient(operandi_app) as client: + yield client + +""" +from httpx import ASGITransport, AsyncClient @fixture(scope="package", name="operandi") async def fixture_operandi_server(): await assert_availability_db(environ.get("OPERANDI_DB_URL")) @@ -13,3 +22,4 @@ async def fixture_operandi_server(): base_url="http://testserver" ) as client: yield client +""" diff --git a/tests/helpers_asserts.py b/tests/helpers_asserts.py index c55c61b3..e6434cbb 100644 --- a/tests/helpers_asserts.py +++ b/tests/helpers_asserts.py @@ -1,19 +1,18 @@ -from pytest import mark from os.path import exists, isdir, isfile -from pymongo import AsyncMongoClient +from pymongo import MongoClient from pymongo.errors import ServerSelectionTimeoutError -@mark.asyncio -async def assert_availability_db(db_url: str, timeout_ms: int = 5000) -> bool: - client = AsyncMongoClient(db_url, serverSelectionTimeoutMS=timeout_ms) + +def assert_availability_db(db_url: str, timeout_ms: int = 5000) -> bool: + client = MongoClient(db_url, serverSelectionTimeoutMS=timeout_ms) try: - await client.admin.command("ping") + client.admin.command("ping") return True except ServerSelectionTimeoutError: return False finally: - await client.close() + client.close() def assert_exists_db_resource(db_resource, resource_key, resource_id): diff --git a/tests/requirements.txt b/tests/requirements.txt index 2f7c797f..216108d8 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -2,6 +2,7 @@ autopep8 click generateDS == 2.35.20 httpx >= 0.28.0 +motor >= 3.7.1 pylint pymongo >= 4.15.5 pytest >= 7.0.0 diff --git a/tests/tests_server/test_endpoint_root.py b/tests/tests_server/test_endpoint_root.py index 9da63808..da7a4fbc 100644 --- a/tests/tests_server/test_endpoint_root.py +++ b/tests/tests_server/test_endpoint_root.py @@ -1,9 +1,7 @@ -from pytest import mark from .helpers_asserts import assert_response_status_code -@mark.asyncio -async def test_get_root_info(operandi): - response = await operandi.get('/') +def test_get_root_info(operandi): + response = operandi.get('/') assert_response_status_code(response.status_code, expected_floor=2) assert response.json()['message'] == "The home page of the OPERANDI Server" diff --git a/tests/tests_server/test_endpoint_workflow.py b/tests/tests_server/test_endpoint_workflow.py index d291004c..5d71b1d0 100644 --- a/tests/tests_server/test_endpoint_workflow.py +++ b/tests/tests_server/test_endpoint_workflow.py @@ -1,15 +1,13 @@ from io import BytesIO -from pytest import mark from tests.helpers_asserts import assert_exists_db_resource from tests.constants import WORKFLOW_DUMMY_TEXT from .helpers_asserts import assert_local_dir_workflow, assert_response_status_code -@mark.asyncio -async def test_post_workflow_script(operandi, auth, db_workflows, bytes_template_workflow): +def test_post_workflow_script(operandi, auth, db_workflows, bytes_template_workflow): # Post a new workflow script wf_detail = "Test template workflow with mets server" - response = await operandi.post( + response = operandi.post( url=f"/workflow?details={wf_detail}", files={"nextflow_script": bytes_template_workflow}, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workflow_id = response.json()['resource_id'] @@ -20,11 +18,10 @@ async def test_post_workflow_script(operandi, auth, db_workflows, bytes_template assert db_workflow["uses_mets_server"] == False -@mark.asyncio -async def test_post_workflow_script_with_ms(operandi, auth, db_workflows, bytes_template_workflow_with_ms): +def test_post_workflow_script_with_ms(operandi, auth, db_workflows, bytes_template_workflow_with_ms): # Post a new workflow script wf_detail = "Test template workflow with mets server" - response = await operandi.post( + response = operandi.post( url=f"/workflow?details={wf_detail}", files={"nextflow_script": bytes_template_workflow_with_ms}, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workflow_id = response.json()['resource_id'] @@ -35,15 +32,14 @@ async def test_post_workflow_script_with_ms(operandi, auth, db_workflows, bytes_ assert db_workflow["uses_mets_server"] == True -@mark.asyncio -async def test_put_workflow_script( +def test_put_workflow_script( operandi, auth, db_workflows, bytes_template_workflow_with_ms, bytes_default_workflow_with_ms ): put_workflow_id = "put_workflow_id" # The first put request creates a new workflow files = {"nextflow_script": bytes_template_workflow_with_ms} wf_detail = "Test template workflow with mets server" - response = await operandi.put(url=f"/workflow/{put_workflow_id}?details={wf_detail}", files=files, auth=auth) + response = operandi.put(url=f"/workflow/{put_workflow_id}?details={wf_detail}", files=files, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workflow_id = response.json()['resource_id'] assert_local_dir_workflow(workflow_id) @@ -62,7 +58,7 @@ async def test_put_workflow_script( # The second put request replaces the previously created workflow files = {"nextflow_script": bytes_default_workflow_with_ms} wf_detail_put = "Test default workflow with mets server" - response = await operandi.put(url=f"/workflow/{put_workflow_id}?details={wf_detail_put}", files=files, auth=auth) + response = operandi.put(url=f"/workflow/{put_workflow_id}?details={wf_detail_put}", files=files, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workflow_id = response.json()['resource_id'] assert_local_dir_workflow(workflow_id) @@ -86,8 +82,7 @@ async def test_put_workflow_script( f"Workflow details should not, but match: {workflow_details1} == {workflow_details2}" -@mark.asyncio -async def test_put_workflow_not_allowed(operandi, auth, bytes_template_workflow_with_ms): +def test_put_workflow_not_allowed(operandi, auth, bytes_template_workflow_with_ms): production_workflow_ids = [ "template_workflow", "default_workflow", "odem_workflow", "template_workflow_with_MS", "default_workflow_with_MS", "odem_workflow_with_MS" @@ -96,64 +91,61 @@ async def test_put_workflow_not_allowed(operandi, auth, bytes_template_workflow_ # Try to replace a production workflow which should raise an error code of 405 files = {"nextflow_script": bytes_template_workflow_with_ms} for workflow_id in production_workflow_ids: - response = await operandi.put(url=f"/workflow/{workflow_id}", files=files, auth=auth) + response = operandi.put(url=f"/workflow/{workflow_id}", files=files, auth=auth) assert_response_status_code(response.status_code, expected_floor=4) -# Not implemented/planned in the WebAPI -@mark.asyncio -async def test_delete_workflow(): +# Not implemented/planned in the WebAP + +def test_delete_workflow(): pass -# Not implemented/planned in the WebAPI -@mark.asyncio -async def test_delete_workflow_non_existing(): +# Not implemented/planned in the WebAP + +def test_delete_workflow_non_existing(): pass -@mark.asyncio -async def test_get_workflow_script(operandi, auth, bytes_template_workflow): +def test_get_workflow_script(operandi, auth, bytes_template_workflow): # Post a new workflow script - response = await operandi.post(url="/workflow", files={"nextflow_script": bytes_template_workflow}, auth=auth) + response = operandi.post(url="/workflow", files={"nextflow_script": bytes_template_workflow}, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workflow_id = response.json()['resource_id'] assert_local_dir_workflow(workflow_id) # Get the same workflow script - response = await operandi.get(url=f"/workflow/{workflow_id}", auth=auth) + response = operandi.get(url=f"/workflow/{workflow_id}", auth=auth) assert_response_status_code(response.status_code, expected_floor=2) print(response.headers) assert response.headers.get('content-disposition').find(".nf") > -1, \ "filename should have the '.nf' extension" -@mark.asyncio -async def test_get_workflow_non_existing(operandi, auth): +def test_get_workflow_non_existing(operandi, auth): non_workflow_id = "non_existing_workflow_id" - response = await operandi.get(url=f"/workflow/{non_workflow_id}", auth=auth) + response = operandi.get(url=f"/workflow/{non_workflow_id}", auth=auth) assert_response_status_code(response.status_code, expected_floor=4) -# This is already implemented as a part of the harvester full cycle test -@mark.asyncio -async def _test_run_operandi_workflow(): +# This is already implemented as a part of the harvester full cycle tes + +def _test_run_operandi_workflow(): pass -# This is already implemented as a part of the harvester full cycle test -@mark.asyncio -async def _test_running_workflow_job_status(): +# This is already implemented as a part of the harvester full cycle tes + +def _test_running_workflow_job_status(): pass -@mark.asyncio -async def test_convert_txt_to_nextflow_success(operandi, auth): +def test_convert_txt_to_nextflow_success(operandi, auth): dummy_file = BytesIO(WORKFLOW_DUMMY_TEXT.encode('utf-8')) files = {"txt_file": ("dummy.txt", dummy_file, "text/plain")} params = {"environment": "local", "with_mets_server": False} # Simulate uploading the text file for conversion via POST - response = await operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) + response = operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) nf_file_content = response.content.decode('utf-8') # Verify the status code and content assert_response_status_code(response.status_code, expected_floor=2) @@ -163,14 +155,13 @@ async def test_convert_txt_to_nextflow_success(operandi, auth): assert "merging_mets" in nf_file_content -@mark.asyncio -async def test_convert_txt_to_nextflow_success_with_mets_server(operandi, auth): +def test_convert_txt_to_nextflow_success_with_mets_server(operandi, auth): dummy_file = BytesIO(WORKFLOW_DUMMY_TEXT.encode('utf-8')) files = {"txt_file": ("dummy.txt", dummy_file, "text/plain")} params = {"environment": "local", "with_mets_server": True} # Simulate uploading the text file for conversion via POST - response = await operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) + response = operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) nf_file_content = response.content.decode('utf-8') # Verify the status code and content assert_response_status_code(response.status_code, expected_floor=2) @@ -180,39 +171,36 @@ async def test_convert_txt_to_nextflow_success_with_mets_server(operandi, auth): assert "merging_mets" not in nf_file_content -@mark.asyncio -async def test_convert_txt_to_nextflow_auth_failure(operandi): +def test_convert_txt_to_nextflow_auth_failure(operandi): dummy_text = "Some dummy text" dummy_file = BytesIO(dummy_text.encode('utf-8')) files = {"txt_file": ("dummy.txt", dummy_file, "text/plain")} params = {"environment": "local", "with_mets_server": False} auth = ('invalid_user', 'invalid_password') - response = await operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) + response = operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) # Verify the status code and error message for failed authentication assert_response_status_code(response.status_code, expected_floor=4) assert response.json()["detail"] == "Not found user account for email: invalid_user" -@mark.asyncio -async def test_convert_txt_to_nextflow_validator_failure(operandi, auth): +def test_convert_txt_to_nextflow_validator_failure(operandi, auth): invalid_text = "Invalid ocrd process text" dummy_file = BytesIO(invalid_text.encode('utf-8')) files = {"txt_file": ("invalid.txt", dummy_file, "text/plain")} params = {"environment": "local", "with_mets_server": False} - response = await operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) + response = operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) assert_response_status_code(response.status_code, expected_floor=4) assert "Failed to validate the ocrd process workflow txt file" in response.json()["detail"] -@mark.asyncio -async def test_convert_txt_to_nextflow_docker_success(operandi, auth): +def test_convert_txt_to_nextflow_docker_success(operandi, auth): dummy_file = BytesIO(WORKFLOW_DUMMY_TEXT.encode('utf-8')) files = {"txt_file": ("dummy.txt", dummy_file, "text/plain")} params = {"environment": "docker", "with_mets_server": False} - response = await operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) + response = operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) nf_file_content = response.content.decode('utf-8') assert_response_status_code(response.status_code, expected_floor=2) assert "params.mets_path" in nf_file_content @@ -221,13 +209,12 @@ async def test_convert_txt_to_nextflow_docker_success(operandi, auth): assert "merging_mets" in nf_file_content -@mark.asyncio -async def test_convert_txt_to_nextflow_docker_success_with_mets_server(operandi, auth): +def test_convert_txt_to_nextflow_docker_success_with_mets_server(operandi, auth): dummy_file = BytesIO(WORKFLOW_DUMMY_TEXT.encode('utf-8')) files = {"txt_file": ("dummy.txt", dummy_file, "text/plain")} params = {"environment": "docker", "with_mets_server": True} - response = await operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) + response = operandi.post(url="/convert_workflow", files=files, auth=auth, params=params) nf_file_content = response.content.decode('utf-8') assert_response_status_code(response.status_code, expected_floor=2) assert "params.mets_path" in nf_file_content diff --git a/tests/tests_server/test_endpoint_workspace.py b/tests/tests_server/test_endpoint_workspace.py index 0a7246c4..8c307ba2 100644 --- a/tests/tests_server/test_endpoint_workspace.py +++ b/tests/tests_server/test_endpoint_workspace.py @@ -6,14 +6,11 @@ from operandi_server.constants import SERVER_WORKSPACES_ROUTER -from pytest import mark - from tests.helpers_asserts import assert_exists_db_resource, assert_exists_db_resource_not from .helpers_asserts import assert_local_dir_workspace, assert_local_dir_workspace_not, assert_response_status_code -@mark.asyncio -async def _test_post_workspace_url(operandi, auth, db_workspaces): +def _test_post_workspace_url(operandi, auth, db_workspaces): ws_detail = "Test url workspace - https://content.staatsbibliothek-berlin.de/dc/PPN631277528.mets.xml" mets_url = "https://content.staatsbibliothek-berlin.de/dc/PPN631277528.mets.xml" # Separate with `,` to add a second file group to be preserved, e.g., `DEFAULT,MAX` @@ -24,7 +21,7 @@ async def _test_post_workspace_url(operandi, auth, db_workspaces): f"&preserve_file_grps={preserve_file_grps}" f"&details={ws_detail}" ) - response = await operandi.post(url=req_url, auth=auth) + response = operandi.post(url=req_url, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workspace_id = response.json()['resource_id'] assert_local_dir_workspace(workspace_id) @@ -33,10 +30,9 @@ async def _test_post_workspace_url(operandi, auth, db_workspaces): assert db_workspace["details"] == ws_detail -@mark.asyncio -async def test_post_workspace_zip(operandi, auth, db_workspaces, bytes_dummy_workspace): +def test_post_workspace_zip(operandi, auth, db_workspaces, bytes_dummy_workspace): ws_detail = "Test dummy workspace" - response = await operandi.post( + response = operandi.post( url=f"/workspace?details={ws_detail}", files={"workspace": bytes_dummy_workspace}, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workspace_id = response.json()['resource_id'] @@ -46,10 +42,9 @@ async def test_post_workspace_zip(operandi, auth, db_workspaces, bytes_dummy_wor assert db_workspace["details"] == ws_detail -@mark.asyncio -async def test_post_workspace_zip_different_mets(operandi, auth, db_workspaces, bytes_ws_different_mets): +def test_post_workspace_zip_different_mets(operandi, auth, db_workspaces, bytes_ws_different_mets): ws_detail = "Test different mets basename workspace" - response = await operandi.post( + response = operandi.post( url=f"/workspace?details={ws_detail}", files={"workspace": bytes_ws_different_mets}, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workspace_id = response.json()['resource_id'] @@ -59,15 +54,14 @@ async def test_post_workspace_zip_different_mets(operandi, auth, db_workspaces, assert db_workspace["details"] == ws_detail -@mark.asyncio -async def test_put_workspace_zip(operandi, auth, db_workspaces, bytes_dummy_workspace, bytes_ws_different_mets): +def test_put_workspace_zip(operandi, auth, db_workspaces, bytes_dummy_workspace, bytes_ws_different_mets): put_workspace_id = "put_workspace_id" ws_detail = "Test workspace" ws_detail_put = "Test put workspace" req_url = f"/workspace/{put_workspace_id}" # The first put request creates a new workspace - response = await operandi.put( + response = operandi.put( url=f"{req_url}?details={ws_detail}", files={"workspace": bytes_dummy_workspace}, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workspace_id = response.json()['resource_id'] @@ -81,7 +75,7 @@ async def test_put_workspace_zip(operandi, auth, db_workspaces, bytes_dummy_work ws_detail1 = db_workspace["details"] # The second put request replaces the previously created workspace - response = await operandi.put( + response = operandi.put( url=f"{req_url}?details={ws_detail_put}", files={"workspace": bytes_ws_different_mets}, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workspace_id = response.json()['resource_id'] @@ -98,10 +92,9 @@ async def test_put_workspace_zip(operandi, auth, db_workspaces, bytes_dummy_work assert ws_detail1 != ws_detail2, \ f"Workspace details should not, but match: {ws_detail1} == {ws_detail2}" -@mark.asyncio -async def test_delete_workspace(operandi, auth, db_workspaces, bytes_ws_different_mets): +def test_delete_workspace(operandi, auth, db_workspaces, bytes_ws_different_mets): # Post a workspace - response = await operandi.post(url="/workspace", files={"workspace": bytes_ws_different_mets}, auth=auth) + response = operandi.post(url="/workspace", files={"workspace": bytes_ws_different_mets}, auth=auth) posted_workspace_id = response.json()['resource_id'] assert_response_status_code(response.status_code, expected_floor=2) assert_local_dir_workspace(posted_workspace_id) @@ -110,16 +103,15 @@ async def test_delete_workspace(operandi, auth, db_workspaces, bytes_ws_differen # Delete the previously posted workspace delete_workspace_id = posted_workspace_id - response = await operandi.delete(url=f"/workspace/{delete_workspace_id}", auth=auth) + response = operandi.delete(url=f"/workspace/{delete_workspace_id}", auth=auth) assert_response_status_code(response.status_code, expected_floor=2) assert_local_dir_workspace_not(delete_workspace_id) db_deleted_workspace = db_workspaces.find_one({"workspace_id": delete_workspace_id}) assert_exists_db_resource_not(db_deleted_workspace, delete_workspace_id) -@mark.asyncio -async def test_delete_workspace_non_existing(operandi, auth, bytes_ws_different_mets): - response = await operandi.post(url="/workspace", files={"workspace": bytes_ws_different_mets}, auth=auth) +def test_delete_workspace_non_existing(operandi, auth, bytes_ws_different_mets): + response = operandi.post(url="/workspace", files={"workspace": bytes_ws_different_mets}, auth=auth) posted_workspace_id = response.json()['resource_id'] delete_workspace_id = posted_workspace_id response = operandi.delete(f"/workspace/{delete_workspace_id}", auth=auth) @@ -128,11 +120,10 @@ async def test_delete_workspace_non_existing(operandi, auth, bytes_ws_different_ assert_response_status_code(response.status_code, expected_floor=4) # Not available -@mark.asyncio -async def test_get_workspace(operandi, auth, bytes_ws_different_mets): - response = await operandi.post(url="/workspace", files={"workspace": bytes_ws_different_mets}, auth=auth) +def test_get_workspace(operandi, auth, bytes_ws_different_mets): + response = operandi.post(url="/workspace", files={"workspace": bytes_ws_different_mets}, auth=auth) workspace_id = response.json()['resource_id'] - response = await operandi.get(url=f"/workspace/{workspace_id}", auth=auth) + response = operandi.get(url=f"/workspace/{workspace_id}", auth=auth) assert_response_status_code(response.status_code, expected_floor=2) print(response.headers) assert response.headers.get('content-type').find("zip") > -1, \ @@ -146,16 +137,14 @@ async def test_get_workspace(operandi, auth, bytes_ws_different_mets): filePtr.write(chunk) -@mark.asyncio -async def test_get_workspace_non_existing(operandi, auth): +def test_get_workspace_non_existing(operandi, auth): non_workspace_id = "non_existing_workspace_id" - response = await operandi.get(url=f"/workspace/{non_workspace_id}", auth=auth) + response = operandi.get(url=f"/workspace/{non_workspace_id}", auth=auth) assert_response_status_code(response.status_code, expected_floor=4) -@mark.asyncio -async def test_delete_file_groups(operandi, auth, db_workspaces, bytes_dummy_workspace): - response = await operandi.post(url="/workspace", files={"workspace": bytes_dummy_workspace}, auth=auth) +def test_delete_file_groups(operandi, auth, db_workspaces, bytes_dummy_workspace): + response = operandi.post(url="/workspace", files={"workspace": bytes_dummy_workspace}, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workspace_id = response.json()['resource_id'] assert_local_dir_workspace(workspace_id) @@ -170,7 +159,7 @@ async def test_delete_file_groups(operandi, auth, db_workspaces, bytes_dummy_wor # Remove non-existing file groups - silently ignored remove_file_grps = "DEFAULT" req_url = f"/remove_file_group/{workspace_id}?remove_file_grps={remove_file_grps}" - response = await operandi.delete(url=req_url, auth=auth) + response = operandi.delete(url=req_url, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workspace.reload_mets() assert len(workspace.mets.file_groups) == file_groups_amount_old @@ -178,7 +167,7 @@ async def test_delete_file_groups(operandi, auth, db_workspaces, bytes_dummy_wor # Remove existing file group remove_file_grps = "OCR-D-IMG" req_url = f"/remove_file_group/{workspace_id}?remove_file_grps={remove_file_grps}" - response = await operandi.delete(url=req_url, auth=auth) + response = operandi.delete(url=req_url, auth=auth) assert_response_status_code(response.status_code, expected_floor=2) workspace.reload_mets() assert len(workspace.mets.file_groups) == file_groups_amount_old - 1 From df5d9198c445cc57cf930fdd728aa486222724eb Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 22 Dec 2025 22:35:19 +0100 Subject: [PATCH 14/50] remove: motor --- src/utils/operandi_utils/database/base.py | 3 +-- src/utils/pyproject.toml | 1 - tests/fixtures/server.py | 12 ------------ tests/requirements.txt | 1 - 4 files changed, 1 insertion(+), 16 deletions(-) diff --git a/src/utils/operandi_utils/database/base.py b/src/utils/operandi_utils/database/base.py index 68360d0e..8c7a6ed3 100644 --- a/src/utils/operandi_utils/database/base.py +++ b/src/utils/operandi_utils/database/base.py @@ -2,7 +2,6 @@ from os import environ from beanie import init_beanie from pymongo import AsyncMongoClient -from motor.motor_asyncio import AsyncIOMotorClient from operandi_utils import call_sync from .models import DBHPCSlurmJob, DBUserAccount, DBWorkflow, DBWorkflowJob, DBWorkspace @@ -38,7 +37,7 @@ async def db_initiate_database( DBWorkflowJob, DBWorkspace ] - client = AsyncIOMotorClient(db_url) + client = AsyncMongoClient(db_url) db = client.get_default_database(default=db_name) # Documentation: https://beanie-odm.dev/ await init_beanie(database=db, document_models=doc_models) diff --git a/src/utils/pyproject.toml b/src/utils/pyproject.toml index b1e3b5f9..dde41ec0 100644 --- a/src/utils/pyproject.toml +++ b/src/utils/pyproject.toml @@ -16,7 +16,6 @@ dependencies = [ "click>=7", "clint==0.5.1", "loguru>=0.6.0", - "motor >= 3.7.1", "httpx>=0.24.0", "ocrd>=3.9.1", "paramiko>=3.4.0", diff --git a/tests/fixtures/server.py b/tests/fixtures/server.py index 90592636..e0301c5f 100644 --- a/tests/fixtures/server.py +++ b/tests/fixtures/server.py @@ -11,15 +11,3 @@ def fixture_operandi_server(): operandi_app = OperandiServer() with TestClient(operandi_app) as client: yield client - -""" -from httpx import ASGITransport, AsyncClient -@fixture(scope="package", name="operandi") -async def fixture_operandi_server(): - await assert_availability_db(environ.get("OPERANDI_DB_URL")) - async with AsyncClient( - transport=ASGITransport(app=OperandiServer()), - base_url="http://testserver" - ) as client: - yield client -""" diff --git a/tests/requirements.txt b/tests/requirements.txt index 216108d8..2f7c797f 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -2,7 +2,6 @@ autopep8 click generateDS == 2.35.20 httpx >= 0.28.0 -motor >= 3.7.1 pylint pymongo >= 4.15.5 pytest >= 7.0.0 From d04e4e53bc4e3ac017fc7191aab55313d7ec7ca9 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 22 Dec 2025 22:37:41 +0100 Subject: [PATCH 15/50] remove: pytest.async --- tests/requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/requirements.txt b/tests/requirements.txt index 2f7c797f..813e6d0f 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -5,7 +5,6 @@ httpx >= 0.28.0 pylint pymongo >= 4.15.5 pytest >= 7.0.0 -pytest-asyncio >= 1.3.0 pytest-docker >= 1.0.0 requests > 2.32.0 twine From 61356d4cc63aa53b095b0161823ee8d7af6934ca Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 22 Dec 2025 22:40:12 +0100 Subject: [PATCH 16/50] remove: Py 3.8 and 3.9, add Py 3.13, 3.14, 3.15 --- .github/workflows/ci_cd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci_cd.yml b/.github/workflows/ci_cd.yml index 3fdf71ca..bcb96fc6 100644 --- a/.github/workflows/ci_cd.yml +++ b/.github/workflows/ci_cd.yml @@ -33,7 +33,7 @@ jobs: strategy: fail-fast: true matrix: - python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ] + python-version: [ "3.10", "3.11", "3.12", "3.13", "3.14", "3.15" ] os: [ ubuntu-latest ] runs-on: ${{ matrix.os }} From c997c1b6d7c2bd603fb614384c4e0e15ee4047ee Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 22 Dec 2025 22:41:54 +0100 Subject: [PATCH 17/50] remove: Py 3.15 --- .github/workflows/ci_cd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci_cd.yml b/.github/workflows/ci_cd.yml index bcb96fc6..1ded930f 100644 --- a/.github/workflows/ci_cd.yml +++ b/.github/workflows/ci_cd.yml @@ -33,7 +33,7 @@ jobs: strategy: fail-fast: true matrix: - python-version: [ "3.10", "3.11", "3.12", "3.13", "3.14", "3.15" ] + python-version: [ "3.10", "3.11", "3.12", "3.13", "3.14", ] os: [ ubuntu-latest ] runs-on: ${{ matrix.os }} From e153dbb18b6ac01799e9990649a7961c7df6b1c2 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 22 Dec 2025 22:43:56 +0100 Subject: [PATCH 18/50] ci-cd: fast fail - false --- .github/workflows/ci_cd.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci_cd.yml b/.github/workflows/ci_cd.yml index 1ded930f..c5dd8e98 100644 --- a/.github/workflows/ci_cd.yml +++ b/.github/workflows/ci_cd.yml @@ -31,7 +31,7 @@ jobs: build-native: name: Native build of Operandi modules strategy: - fail-fast: true + fail-fast: false matrix: python-version: [ "3.10", "3.11", "3.12", "3.13", "3.14", ] os: [ ubuntu-latest ] @@ -74,7 +74,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.8" ] + python-version: [ "3.12" ] os: [ ubuntu-latest ] runs-on: ${{ matrix.os }} From 858a76f8e7a3d4caa3a5fa723028c657f2799915 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 23 Jan 2026 12:34:52 +0100 Subject: [PATCH 19/50] set requirement: core v3.10.1 --- src/utils/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/pyproject.toml b/src/utils/pyproject.toml index dde41ec0..48a8f1f0 100644 --- a/src/utils/pyproject.toml +++ b/src/utils/pyproject.toml @@ -17,7 +17,7 @@ dependencies = [ "clint==0.5.1", "loguru>=0.6.0", "httpx>=0.24.0", - "ocrd>=3.9.1", + "ocrd>=3.10.1", "paramiko>=3.4.0", "pika>=1.2.0", "pydantic>=2.12.5", From e375cdbc079b3b62e8f5434d407dc47b2815edde Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 23 Jan 2026 12:46:21 +0100 Subject: [PATCH 20/50] set beanie back to v2.0.0 --- src/utils/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/pyproject.toml b/src/utils/pyproject.toml index 48a8f1f0..9296b742 100644 --- a/src/utils/pyproject.toml +++ b/src/utils/pyproject.toml @@ -11,7 +11,7 @@ description = "Shared utilities for Operandi" requires-python = ">=3.10" dependencies = [ "aiofiles>=0.8.0", - "beanie>=2.0.1", + "beanie>=2.0.0", "chardet>=5.1.0", "click>=7", "clint==0.5.1", From a879502499df96e64a0fdec325f14a748d404462 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 23 Jan 2026 13:08:13 +0100 Subject: [PATCH 21/50] release: v2.24.0 --- src/broker/pyproject.toml | 4 ++-- src/client/pyproject.toml | 4 ++-- src/harvester/pyproject.toml | 4 ++-- src/server/pyproject.toml | 4 ++-- src/utils/pyproject.toml | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/broker/pyproject.toml b/src/broker/pyproject.toml index 12055391..c42feffe 100644 --- a/src/broker/pyproject.toml +++ b/src/broker/pyproject.toml @@ -4,13 +4,13 @@ build-backend = "setuptools.build_meta" [project] name = "operandi-broker" -version = "2.23.0" +version = "2.24.0" authors = [{name = "Mehmed Mustafa", email = "mehmed.mustafa@gwdg.de"}] license = {text = "Apache License 2.0"} description = "Broker for Operandi" requires-python = ">=3.10" dependencies = [ - "operandi-utils>=2.23.0", + "operandi-utils>=2.24.0", "requests>2.32.0" ] diff --git a/src/client/pyproject.toml b/src/client/pyproject.toml index 8b4c39c8..1568d8ec 100644 --- a/src/client/pyproject.toml +++ b/src/client/pyproject.toml @@ -4,13 +4,13 @@ build-backend = "setuptools.build_meta" [project] name = "operandi-client" -version = "2.23.0" +version = "2.24.0" authors = [{name = "Mehmed Mustafa", email = "mehmed.mustafa@gwdg.de"}] license = {text = "Apache License 2.0"} description = "Client for Operandi" requires-python = ">=3.10" dependencies = [ - "operandi-utils>=2.23.0", + "operandi-utils>=2.24.0", "requests>2.32.0" ] diff --git a/src/harvester/pyproject.toml b/src/harvester/pyproject.toml index d050af77..73ac85af 100644 --- a/src/harvester/pyproject.toml +++ b/src/harvester/pyproject.toml @@ -4,13 +4,13 @@ build-backend = "setuptools.build_meta" [project] name = "operandi-harvester" -version = "2.23.0" +version = "2.24.0" authors = [{name = "Mehmed Mustafa", email = "mehmed.mustafa@gwdg.de"}] license = {text = "Apache License 2.0"} description = "Harvester for Operandi" requires-python = ">=3.10" dependencies = [ - "operandi-utils>=2.23.0", + "operandi-utils>=2.24.0", "requests>2.32.0" ] diff --git a/src/server/pyproject.toml b/src/server/pyproject.toml index e42ab7a9..2131f370 100644 --- a/src/server/pyproject.toml +++ b/src/server/pyproject.toml @@ -4,13 +4,13 @@ build-backend = "setuptools.build_meta" [project] name = "operandi-server" -version = "2.23.0" +version = "2.24.0" authors = [{name = "Mehmed Mustafa", email = "mehmed.mustafa@gwdg.de"}] license = {text = "Apache License 2.0"} description = "Server for Operandi" requires-python = ">=3.10" dependencies = [ - "operandi-utils>=2.23.0", + "operandi-utils>=2.24.0", "fastapi>=0.95.0", "uvicorn==0.21.0", "psutil" diff --git a/src/utils/pyproject.toml b/src/utils/pyproject.toml index 9296b742..e1450572 100644 --- a/src/utils/pyproject.toml +++ b/src/utils/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "operandi-utils" -version = "2.23.0" +version = "2.24.0" authors = [ { name = "Mehmed Mustafa", email = "mehmed.mustafa@gwdg.de" } ] license = { text = "Apache License 2.0" } description = "Shared utilities for Operandi" From a3b8a8e8b66b8a52a62f05fc21e427e2f25c58f5 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 23 Jan 2026 13:10:16 +0100 Subject: [PATCH 22/50] set: beanie v2.0.1 --- src/utils/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/pyproject.toml b/src/utils/pyproject.toml index e1450572..4f539982 100644 --- a/src/utils/pyproject.toml +++ b/src/utils/pyproject.toml @@ -11,7 +11,7 @@ description = "Shared utilities for Operandi" requires-python = ">=3.10" dependencies = [ "aiofiles>=0.8.0", - "beanie>=2.0.0", + "beanie>=2.0.1", "chardet>=5.1.0", "click>=7", "clint==0.5.1", From 8c96afeed9cae7a700a02ccc135dd0b55fdc6d94 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 23 Jan 2026 13:16:41 +0100 Subject: [PATCH 23/50] remove py3.14 from ci/cd due to beanie v2.0.1 restriction that requires py>=3.9,<3.14 --- .github/workflows/ci_cd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci_cd.yml b/.github/workflows/ci_cd.yml index c5dd8e98..381b202d 100644 --- a/.github/workflows/ci_cd.yml +++ b/.github/workflows/ci_cd.yml @@ -33,7 +33,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.10", "3.11", "3.12", "3.13", "3.14", ] + python-version: [ "3.10", "3.11", "3.12", "3.13" ] os: [ ubuntu-latest ] runs-on: ${{ matrix.os }} From 97bcda1571c8dcd0425ae706364242059955a0f4 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 23 Jan 2026 13:35:58 +0100 Subject: [PATCH 24/50] try: add oton parent folder to resource --- src/utils/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/pyproject.toml b/src/utils/pyproject.toml index 4f539982..eb7261b8 100644 --- a/src/utils/pyproject.toml +++ b/src/utils/pyproject.toml @@ -42,5 +42,5 @@ operandi_utils = [ "batch_scripts/*.sh", "nextflow_workflows/*.nf", "ocrd_process_workflows/*.txt", - "ocrd-all-tool.json" + "oton/ocrd-all-tool.json" ] From d19e421ab36a88f014bfdba6d3c483c39a20d736 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 23 Jan 2026 13:41:47 +0100 Subject: [PATCH 25/50] fix: add hpc parent folder to resources --- src/utils/pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/utils/pyproject.toml b/src/utils/pyproject.toml index eb7261b8..3815042f 100644 --- a/src/utils/pyproject.toml +++ b/src/utils/pyproject.toml @@ -39,8 +39,8 @@ include-package-data = true [tool.setuptools.package-data] operandi_utils = [ - "batch_scripts/*.sh", - "nextflow_workflows/*.nf", - "ocrd_process_workflows/*.txt", + "hpc/batch_scripts/*.sh", + "hpc/nextflow_workflows/*.nf", + "hpc/ocrd_process_workflows/*.txt", "oton/ocrd-all-tool.json" ] From ba1e5e4d5ac5b2012421e638badcf7af94c4a150 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 23 Jan 2026 13:49:58 +0100 Subject: [PATCH 26/50] fix: remove async from broker fixture --- tests/fixtures/broker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/fixtures/broker.py b/tests/fixtures/broker.py index d1040d46..5b689493 100644 --- a/tests/fixtures/broker.py +++ b/tests/fixtures/broker.py @@ -5,8 +5,8 @@ @fixture(scope="session", name="service_broker") -async def fixture_operandi_broker(): - await assert_availability_db(environ.get("OPERANDI_DB_URL")) +def fixture_operandi_broker(): + assert_availability_db(environ.get("OPERANDI_DB_URL")) service_broker = ServiceBroker(test_sbatch=True) yield service_broker service_broker.kill_workers() From 7acdbdec9ca9583ba0b96eeb24f8c9e03841e49b Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 23 Jan 2026 15:10:10 +0100 Subject: [PATCH 27/50] add: model default values to Optional parameters --- src/utils/operandi_utils/database/models.py | 30 ++++++++++----------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/utils/operandi_utils/database/models.py b/src/utils/operandi_utils/database/models.py index 951d4dfb..32f63d90 100644 --- a/src/utils/operandi_utils/database/models.py +++ b/src/utils/operandi_utils/database/models.py @@ -32,7 +32,7 @@ class DBUserAccount(Document): approved_user: bool = False deleted: bool = False datetime: Optional[datetime] - details: Optional[str] + details: Optional[str] = "User Account" class Settings: name = "user_accounts" @@ -57,11 +57,11 @@ class DBHPCSlurmJob(Document): workflow_job_id: str hpc_slurm_job_id: str hpc_slurm_job_state: StateJobSlurm = StateJobSlurm.UNSET - hpc_batch_script_path: Optional[str] - hpc_slurm_workspace_path: Optional[str] + hpc_batch_script_path: Optional[str] = "UNSET" + hpc_slurm_workspace_path: Optional[str] = "UNSET" deleted: bool = False datetime: Optional[datetime] - details: Optional[str] + details: Optional[str] = "HPC-Slurm-Job" class Settings: name = "hpc_slurm_jobs" @@ -93,7 +93,7 @@ class DBWorkflow(Document): producible_file_groups: List[str] deleted: bool = False datetime: Optional[datetime] - details: Optional[str] + details: Optional[str] = "Workflow" class Settings: name = "workflows" @@ -123,12 +123,12 @@ class DBWorkflowJob(Document): workflow_id: str workspace_id: str job_state: StateJob = StateJob.UNSET - workflow_dir: Optional[str] - workspace_dir: Optional[str] - hpc_slurm_job_id: Optional[str] + workflow_dir: Optional[str] = "UNSET" + workspace_dir: Optional[str] = "UNSET" + hpc_slurm_job_id: Optional[str] = "UNSET" deleted: bool = False datetime: Optional[datetime] - details: Optional[str] + details: Optional[str] = "Workflow-Job" class Settings: name = "workflow_jobs" @@ -164,14 +164,14 @@ class DBWorkspace(Document): pages_amount: int file_groups: List[str] state: StateWorkspace = StateWorkspace.UNSET - ocrd_identifier: Optional[str] - bagit_profile_identifier: Optional[str] - ocrd_base_version_checksum: Optional[str] - mets_basename: Optional[str] - bag_info_adds: Optional[dict] + ocrd_identifier: Optional[str] = "UNSET" + bagit_profile_identifier: Optional[str] = "UNSET" + ocrd_base_version_checksum: Optional[str] = "UNSET" + mets_basename: Optional[str] = "UNSET" + bag_info_adds: Optional[dict] = {} deleted: bool = False datetime: Optional[datetime] - details: Optional[str] + details: Optional[str] = "Workspace" class Settings: name = "workspaces" From 2b6f8136961f27437a126120f2acc8255a2767b3 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 23 Jan 2026 15:20:35 +0100 Subject: [PATCH 28/50] remove: obsolete version label --- docker-compose.yml | 2 -- docker-compose_image_based.yml | 2 -- 2 files changed, 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 856fea71..12dc8300 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3.8' - networks: operandi: name: operandi diff --git a/docker-compose_image_based.yml b/docker-compose_image_based.yml index b68940c8..db8cc194 100644 --- a/docker-compose_image_based.yml +++ b/docker-compose_image_based.yml @@ -1,5 +1,3 @@ -version: '3.8' - networks: operandi: name: operandi From 1fdf39e21d5a8dd57bfa6739ae43b8d504a982f2 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 23 Jan 2026 15:21:31 +0100 Subject: [PATCH 29/50] update: rmq to version 4.2 --- docker-compose.yml | 2 +- docker-compose_image_based.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 12dc8300..f6b4266c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -26,7 +26,7 @@ services: - operandi operandi-rabbitmq: - image: "rabbitmq:3.12-management" + image: "rabbitmq:4.2-management" container_name: operandi-rabbitmq hostname: rabbit-mq-host ports: diff --git a/docker-compose_image_based.yml b/docker-compose_image_based.yml index db8cc194..e4bdc709 100644 --- a/docker-compose_image_based.yml +++ b/docker-compose_image_based.yml @@ -26,7 +26,7 @@ services: - operandi operandi-rabbitmq: - image: "rabbitmq:3.12-management" + image: "rabbitmq:4.2-management" container_name: operandi-rabbitmq hostname: rabbit-mq-host ports: From cd07b3643deb88579be65f45a0f1bceca2f4d482 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 23 Jan 2026 16:09:53 +0100 Subject: [PATCH 30/50] fix: use the proper status and do check job status after data download --- tests/integration_tests/test_full_cycle.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tests/integration_tests/test_full_cycle.py b/tests/integration_tests/test_full_cycle.py index 36a833e2..99b38107 100644 --- a/tests/integration_tests/test_full_cycle.py +++ b/tests/integration_tests/test_full_cycle.py @@ -21,17 +21,19 @@ def check_job_till_finish(auth_harvester, operandi, workflow_job_id: str): response = operandi.get(url=check_job_status_url, auth=auth_harvester) assert_response_status_code(response.status_code, expected_floor=2) job_status = response.json()["job_state"] - if job_status == StateJob.SUCCESS: + if job_status == StateJob.HPC_SUCCESS: break + if job_status == StateJob.HPC_FAILED: + break + assert job_status == StateJob.HPC_SUCCESS - # TODO: Fix may be needed here - # When failed loop 5 more times. - # Sometimes the FAILED changes to SUCCESS - if job_status == StateJob.FAILED and tries > 5: - tries = 5 +def check_job_status_after_data_download(auth_harvester, operandi, workflow_job_id: str): + check_job_status_url = f"/workflow-job/{workflow_job_id}" + response = operandi.get(url=check_job_status_url, auth=auth_harvester) + assert_response_status_code(response.status_code, expected_floor=2) + job_status = response.json()["job_state"] assert job_status == StateJob.SUCCESS - def download_workflow_job_logs(auth_harvester, operandi, workflow_job_id: str): tries = 60 get_log_zip_url = f"/workflow-job/{workflow_job_id}/logs" @@ -112,3 +114,5 @@ def test_full_cycle(auth_harvester, operandi, service_broker, bytes_small_worksp assert Path(wf_job_dir, "work").exists assert Path(wf_job_dir, workspace_id, input_file_grp).exists() assert Path(wf_job_dir, workspace_id, "OCR-D-OCR").exists() + + check_job_status_after_data_download(auth_harvester, operandi, workflow_job_id) From c27f92ad915b360a9a9314509ce8c1020c7f228b Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 23 Jan 2026 16:26:11 +0100 Subject: [PATCH 31/50] add: rabbitmq restart streams flag --- docker-compose.yml | 2 +- docker-compose_image_based.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index f6b4266c..a8d882fd 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -43,7 +43,7 @@ services: restart: on-failure environment: - RABBITMQ_SERVER_ERL_ARGS=-rabbitmq_management load_definitions "/rmq_definitions.json" - - RABBITMQ_FEATURE_FLAGS=quorum_queue,implicit_default_bindings,classic_mirrored_queue_version + - RABBITMQ_FEATURE_FLAGS=quorum_queue,implicit_default_bindings,classic_mirrored_queue_version,restart_streams networks: - operandi healthcheck: diff --git a/docker-compose_image_based.yml b/docker-compose_image_based.yml index e4bdc709..1239d2fa 100644 --- a/docker-compose_image_based.yml +++ b/docker-compose_image_based.yml @@ -43,7 +43,7 @@ services: restart: on-failure environment: - RABBITMQ_SERVER_ERL_ARGS=-rabbitmq_management load_definitions "/rmq_definitions.json" - - RABBITMQ_FEATURE_FLAGS=quorum_queue,implicit_default_bindings,classic_mirrored_queue_version + - RABBITMQ_FEATURE_FLAGS=quorum_queue,implicit_default_bindings,classic_mirrored_queue_version,restart_streams networks: - operandi healthcheck: From 8d4f23a9369911efa95e1a58022e661cc3f64fa6 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 23 Jan 2026 17:19:06 +0100 Subject: [PATCH 32/50] fix: rabbitmq start --- docker-compose.yml | 5 +++++ docker-compose_image_based.yml | 7 ++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index a8d882fd..af5e8c58 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -29,6 +29,10 @@ services: image: "rabbitmq:4.2-management" container_name: operandi-rabbitmq hostname: rabbit-mq-host + ulimits: + nofile: + soft: 65536 + hard: 65536 ports: - "5672:5672" - "15672:15672" @@ -43,6 +47,7 @@ services: restart: on-failure environment: - RABBITMQ_SERVER_ERL_ARGS=-rabbitmq_management load_definitions "/rmq_definitions.json" + - RABBITMQ_SERVER_ADDITIONAL_ERL_ARGS=-rabbit peer_discovery_backend none - RABBITMQ_FEATURE_FLAGS=quorum_queue,implicit_default_bindings,classic_mirrored_queue_version,restart_streams networks: - operandi diff --git a/docker-compose_image_based.yml b/docker-compose_image_based.yml index 1239d2fa..d24ff7cd 100644 --- a/docker-compose_image_based.yml +++ b/docker-compose_image_based.yml @@ -29,6 +29,10 @@ services: image: "rabbitmq:4.2-management" container_name: operandi-rabbitmq hostname: rabbit-mq-host + ulimits: + nofile: + soft: 65536 + hard: 65536 ports: - "5672:5672" - "15672:15672" @@ -42,7 +46,8 @@ services: target: /rmq_definitions.json restart: on-failure environment: - - RABBITMQ_SERVER_ERL_ARGS=-rabbitmq_management load_definitions "/rmq_definitions.json" + - RABBITMQ_SERVER_ERL_ARGS='-rabbitmq_management load_definitions "/rmq_definitions.json"' + - RABBITMQ_SERVER_ADDITIONAL_ERL_ARGS='-rabbit peer_discovery_backend none' - RABBITMQ_FEATURE_FLAGS=quorum_queue,implicit_default_bindings,classic_mirrored_queue_version,restart_streams networks: - operandi From 904dcc4d108e8f397a896578f822df08f63cdd44 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 26 Jan 2026 16:43:26 +0100 Subject: [PATCH 33/50] improve: timeouts --- tests/integration_tests/test_full_cycle.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration_tests/test_full_cycle.py b/tests/integration_tests/test_full_cycle.py index 99b38107..49dfb80b 100644 --- a/tests/integration_tests/test_full_cycle.py +++ b/tests/integration_tests/test_full_cycle.py @@ -12,12 +12,12 @@ OPERANDI_SERVER_BASE_DIR = environ.get("OPERANDI_SERVER_BASE_DIR") def check_job_till_finish(auth_harvester, operandi, workflow_job_id: str): - tries = 60 + tries = 120 job_status = None check_job_status_url = f"/workflow-job/{workflow_job_id}" while tries > 0: tries -= 1 - sleep(60) + sleep(30) response = operandi.get(url=check_job_status_url, auth=auth_harvester) assert_response_status_code(response.status_code, expected_floor=2) job_status = response.json()["job_state"] @@ -35,7 +35,7 @@ def check_job_status_after_data_download(auth_harvester, operandi, workflow_job_ assert job_status == StateJob.SUCCESS def download_workflow_job_logs(auth_harvester, operandi, workflow_job_id: str): - tries = 60 + tries = 120 get_log_zip_url = f"/workflow-job/{workflow_job_id}/logs" while tries > 0: tries -= 1 From 0e1454f22bc2337d2ba897452e77abe460f56a31 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Tue, 27 Jan 2026 22:44:31 +0100 Subject: [PATCH 34/50] set: ci/cd integration test timeout to 12h --- tests/integration_tests/test_full_cycle.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration_tests/test_full_cycle.py b/tests/integration_tests/test_full_cycle.py index 49dfb80b..96802bfb 100644 --- a/tests/integration_tests/test_full_cycle.py +++ b/tests/integration_tests/test_full_cycle.py @@ -12,12 +12,12 @@ OPERANDI_SERVER_BASE_DIR = environ.get("OPERANDI_SERVER_BASE_DIR") def check_job_till_finish(auth_harvester, operandi, workflow_job_id: str): - tries = 120 + tries = 360 job_status = None check_job_status_url = f"/workflow-job/{workflow_job_id}" while tries > 0: tries -= 1 - sleep(30) + sleep(120) response = operandi.get(url=check_job_status_url, auth=auth_harvester) assert_response_status_code(response.status_code, expected_floor=2) job_status = response.json()["job_state"] @@ -35,11 +35,11 @@ def check_job_status_after_data_download(auth_harvester, operandi, workflow_job_ assert job_status == StateJob.SUCCESS def download_workflow_job_logs(auth_harvester, operandi, workflow_job_id: str): - tries = 120 + tries = 360 get_log_zip_url = f"/workflow-job/{workflow_job_id}/logs" while tries > 0: tries -= 1 - sleep(30) + sleep(120) response = operandi.get(url=get_log_zip_url, auth=auth_harvester) if response.status_code != 200: continue From 5fcce18c9d7e01971165b686191aaca648703b65 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Wed, 28 Jan 2026 15:51:25 +0100 Subject: [PATCH 35/50] fix: Makefile call for harvester dummy --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 15748731..54bd201d 100755 --- a/Makefile +++ b/Makefile @@ -109,7 +109,7 @@ start-server-native: start-harvester-dummy: export $(shell sed 's/=.*//' ./.env) - operandi-harvester start-dummy --address http://localhost + operandi-harvester start-dummy --address http://localhost:8000 run-tests: run-tests-utils run-tests-broker run-tests-server run-tests-harvester run-tests-integration From 6f396fce9b39bea8035b4e9c6dc661bd31be310e Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Wed, 28 Jan 2026 15:51:41 +0100 Subject: [PATCH 36/50] greatly increase timeout values --- src/client/operandi_client/constants.py | 2 +- src/harvester/operandi_harvester/constants.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/client/operandi_client/constants.py b/src/client/operandi_client/constants.py index ec9a2212..5c511181 100644 --- a/src/client/operandi_client/constants.py +++ b/src/client/operandi_client/constants.py @@ -10,6 +10,6 @@ # Time waited between each workflow job status check WAIT_TIME_BETWEEN_POLLS: int = 120 # seconds # Times to perform workflow job status checks before timeout -TRIES_TILL_TIMEOUT: int = 30 +TRIES_TILL_TIMEOUT: int = 600 USE_WORKSPACE_FILE_GROUP = "DEFAULT" diff --git a/src/harvester/operandi_harvester/constants.py b/src/harvester/operandi_harvester/constants.py index 4d7e8164..2d7df395 100644 --- a/src/harvester/operandi_harvester/constants.py +++ b/src/harvester/operandi_harvester/constants.py @@ -23,8 +23,8 @@ # Time waited between the POST requests to the OPERANDI Server WAIT_TIME_BETWEEN_SUBMITS: int = 15 # seconds # Time waited between each workflow job status check -WAIT_TIME_BETWEEN_POLLS: int = 15 # seconds +WAIT_TIME_BETWEEN_POLLS: int = 120 # seconds # Times to perform workflow job status checks before timeout -TRIES_TILL_TIMEOUT: int = 30 +TRIES_TILL_TIMEOUT: int = 600 USE_WORKSPACE_FILE_GROUP = "DEFAULT" From b09a3d4b25bad608603294790fc93e345d078145 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Wed, 28 Jan 2026 17:42:30 +0100 Subject: [PATCH 37/50] Makefile: enable -s flag for tests --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 54bd201d..ad9671b6 100755 --- a/Makefile +++ b/Makefile @@ -115,7 +115,7 @@ run-tests: run-tests-utils run-tests-broker run-tests-server run-tests-harvester run-tests-utils: export $(shell sed 's/=.*//' ./tests/.env) - pytest tests/tests_utils/test_*/test_*.py -v + pytest tests/tests_utils/test_*/test_*.py -s -v run-tests-broker: export $(shell sed 's/=.*//' ./tests/.env) @@ -123,11 +123,11 @@ run-tests-broker: run-tests-harvester: export $(shell sed 's/=.*//' ./tests/.env) - pytest tests/tests_harvester/test_*.py -v + pytest tests/tests_harvester/test_*.py -s -v run-tests-server: export $(shell sed 's/=.*//' ./tests/.env) - pytest tests/tests_server/test_*.py -v + pytest tests/tests_server/test_*.py -s -v run-tests-integration: export $(shell sed 's/=.*//' ./tests/.env) From b69709af3e767b353c88b66bfea92dde94e3ecc6 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Wed, 28 Jan 2026 17:42:59 +0100 Subject: [PATCH 38/50] add extra prints to stdout from HPC fixtures --- tests/fixtures/hpc_nhr.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/fixtures/hpc_nhr.py b/tests/fixtures/hpc_nhr.py index 3ac676fb..db7e196a 100644 --- a/tests/fixtures/hpc_nhr.py +++ b/tests/fixtures/hpc_nhr.py @@ -1,13 +1,19 @@ +from logging import StreamHandler, DEBUG from pytest import fixture +from sys import stdout from operandi_utils.hpc import NHRExecutor, NHRTransfer @fixture(scope="package", name="hpc_nhr_data_transfer") def fixture_hpc_nhr_transfer_connector(): hpc_transfer_connector = NHRTransfer() + hpc_transfer_connector.logger.addHandler(StreamHandler(stdout)) + hpc_transfer_connector.logger.setLevel(DEBUG) yield hpc_transfer_connector @fixture(scope="package", name="hpc_nhr_command_executor") def fixture_hpc_nhr_execution_connector(): hpc_paramiko_connector = NHRExecutor() + hpc_paramiko_connector.logger.addHandler(StreamHandler(stdout)) + hpc_paramiko_connector.logger.setLevel(DEBUG) yield hpc_paramiko_connector From f4be98a8904136b38c81157f128337dd2a7a843d Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Thu, 29 Jan 2026 22:26:29 +0100 Subject: [PATCH 39/50] fix: job status parsing in workers --- .../operandi_broker/job_worker_download.py | 17 +++++++++++++---- src/broker/operandi_broker/job_worker_status.py | 10 +++++----- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/broker/operandi_broker/job_worker_download.py b/src/broker/operandi_broker/job_worker_download.py index c6841140..63355ea3 100644 --- a/src/broker/operandi_broker/job_worker_download.py +++ b/src/broker/operandi_broker/job_worker_download.py @@ -30,7 +30,7 @@ def _consumed_msg_callback(self, ch, method, properties, body): consumed_message = loads(body) self.log.info(f"Consumed message: {consumed_message}") self.current_message_job_id = consumed_message["job_id"] - previous_job_state = consumed_message["previous_job_state"] + previous_job_state: StateJob = StateJob(consumed_message["previous_job_state"]) except Exception as error: self.log.warning(f"Parsing the consumed message has failed: {error}") self._handle_msg_failure(interruption=False) @@ -63,8 +63,11 @@ def _consumed_msg_callback(self, ch, method, properties, body): try: # TODO: Refactor this block of code since nothing is downloaded from the HPC when job fails. + self.log.info(f"Previous job state: {previous_job_state}") if previous_job_state == StateJob.HPC_SUCCESS: + self.log.info(f"Downloading slurm job log file of succeeded: {slurm_job_id}") self.hpc_io_transfer.download_slurm_job_log_file(slurm_job_id, job_dir) + self.log.info(f"Downloading results of succeeded workflow job: {job_dir}") self.__download_results_from_hpc(job_dir=job_dir, workspace_dir=ws_dir) self.log.info(f"Setting new workspace state `{StateWorkspace.READY}` of workspace_id: {workspace_id}") updated_file_groups = self.__extract_updated_file_groups(db_workspace=db_workspace) @@ -82,8 +85,10 @@ def _consumed_msg_callback(self, ch, method, properties, body): sync_db_update_workflow_job(find_job_id=self.current_message_job_id, job_state=StateJob.SUCCESS) self.log.info(f"Setting new workflow job state `{StateJob.SUCCESS}`" f" of job_id: {self.current_message_job_id}") - if previous_job_state == StateJob.HPC_FAILED: + elif previous_job_state == StateJob.HPC_FAILED: + self.log.info(f"Downloading slurm job log file of failed: {slurm_job_id}") self.hpc_io_transfer.download_slurm_job_log_file(slurm_job_id, job_dir) + self.log.info(f"Skipping downloading results of failed workflow job: {job_dir}") self.log.info(f"Setting new workspace state `{StateWorkspace.READY}` of workspace_id: {workspace_id}") db_workspace: DBWorkspace = sync_db_update_workspace( find_workspace_id=workspace_id, state=StateWorkspace.READY) @@ -99,13 +104,17 @@ def _consumed_msg_callback(self, ch, method, properties, body): sync_db_update_workflow_job(find_job_id=self.current_message_job_id, job_state=StateJob.FAILED) self.log.info(f"Setting new workflow job state `{StateJob.FAILED}`" f" of job_id: {self.current_message_job_id}") + elif previous_job_state == StateJob.TRANSFERRING_FROM_HPC: + self.log.warning("Another worker instance is already downloading or has downloaded") + else: + self.log.warning(f"State not processable: {previous_job_state}") except Exception as error: self.log.warning(f"{error}") self._handle_msg_failure(interruption=False) return self.has_consumed_message = False - self.log.debug(f"Ack delivery tag: {self.current_message_delivery_tag}") + self.log.info(f"Ack delivery tag: {self.current_message_delivery_tag}") ch.basic_ack(delivery_tag=method.delivery_tag) @override @@ -119,7 +128,7 @@ def _handle_msg_failure(self, interruption: bool): self.rmq_consumer.ack_message(delivery_tag=self.current_message_delivery_tag) return - self.log.debug(f"Ack delivery tag: {self.current_message_delivery_tag}") + self.log.info(f"Ack delivery tag: {self.current_message_delivery_tag}") self.rmq_consumer.ack_message(delivery_tag=self.current_message_delivery_tag) # Reset the current message related parameters diff --git a/src/broker/operandi_broker/job_worker_status.py b/src/broker/operandi_broker/job_worker_status.py index 35996e63..0d4bbb3f 100644 --- a/src/broker/operandi_broker/job_worker_status.py +++ b/src/broker/operandi_broker/job_worker_status.py @@ -17,8 +17,8 @@ def __init__(self, db_url, rabbitmq_url, queue_name): @override def _consumed_msg_callback(self, ch, method, properties, body): - self.log.debug(f"ch: {ch}, method: {method}, properties: {properties}, body: {body}") - self.log.debug(f"Consumed message: {body}") + self.log.info(f"ch: {ch}, method: {method}, properties: {properties}, body: {body}") + self.log.info(f"Consumed message: {body}") self.current_message_delivery_tag = method.delivery_tag self.has_consumed_message = True @@ -55,7 +55,7 @@ def _consumed_msg_callback(self, ch, method, properties, body): return self.has_consumed_message = False - self.log.debug(f"Ack delivery tag: {self.current_message_delivery_tag}") + self.log.info(f"Ack delivery tag: {self.current_message_delivery_tag}") ch.basic_ack(delivery_tag=method.delivery_tag) @override @@ -69,7 +69,7 @@ def _handle_msg_failure(self, interruption: bool): self.rmq_consumer.ack_message(delivery_tag=self.current_message_delivery_tag) return - self.log.debug(f"Ack delivery tag: {self.current_message_delivery_tag}") + self.log.info(f"Ack delivery tag: {self.current_message_delivery_tag}") self.rmq_consumer.ack_message(delivery_tag=self.current_message_delivery_tag) # Reset the current message related parameters @@ -108,7 +108,7 @@ def __handle_hpc_and_workflow_states( if new_job_state == StateJob.HPC_SUCCESS or new_job_state == StateJob.HPC_FAILED: sync_db_update_workspace(find_workspace_id=workspace_id, state=StateWorkspace.TRANSFERRING_FROM_HPC) sync_db_update_workflow_job(find_job_id=job_id, job_state=StateJob.TRANSFERRING_FROM_HPC) - result_download_message = {"job_id": f"{job_id}", "previous_job_state": f"{new_job_state}"} + result_download_message = {"job_id": f"{job_id}", "previous_job_state": f"{new_job_state.value}"} self.log.info(f"Encoding the result download RabbitMQ message: {result_download_message}") encoded_result_download_message = dumps(result_download_message).encode(encoding="utf-8") self.rmq_publisher.publish_to_queue( From 4f3b43faae8ec84569e37e2ebd0e3e324d1fcc20 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Thu, 29 Jan 2026 22:29:15 +0100 Subject: [PATCH 40/50] optimize: transfer SIF only when needed in batch script --- .../batch_submit_workflow_job.sh | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/utils/operandi_utils/hpc/batch_scripts/batch_submit_workflow_job.sh b/src/utils/operandi_utils/hpc/batch_scripts/batch_submit_workflow_job.sh index f4625c0b..13b2ee24 100755 --- a/src/utils/operandi_utils/hpc/batch_scripts/batch_submit_workflow_job.sh +++ b/src/utils/operandi_utils/hpc/batch_scripts/batch_submit_workflow_job.sh @@ -172,14 +172,17 @@ transfer_to_node_storage_processor_images(){ if [ ! -f "$ocrd_image_path" ]; then echo "Expected ocrd processor image not found at: $ocrd_image_path" exit 1 - else - echo "Transferring ocrd processor image to the compute node: ${ocrd_image}" - cp "${ocrd_image_path}" "${node_ocrd_image_path}" - echo "Ocrd processor image was transferred to: ${node_ocrd_image_path}" - if [ ! -f "${node_ocrd_image_path}" ]; then - echo "Expected ocrd processor image was copied but not found locally at: ${node_ocrd_image_path}" - exit 1 - fi + fi + if [ -f "$node_ocrd_image_path" ]; then + echo "Skipping ${ocrd_image_path} since the same image was already copied in a previous step" + continue + fi + echo "Transferring ocrd processor image to the compute node: ${ocrd_image}" + cp "${ocrd_image_path}" "${node_ocrd_image_path}" + echo "Ocrd processor image was transferred to: ${node_ocrd_image_path}" + if [ ! -f "${node_ocrd_image_path}" ]; then + echo "Expected ocrd processor image was copied but not found locally at: ${node_ocrd_image_path}" + exit 1 fi done echo "" From 39d340b4a43d5a16b788152898657555486e19e7 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Thu, 29 Jan 2026 22:30:05 +0100 Subject: [PATCH 41/50] add: extra parsing in edge cases when the job is still PENDING in slurm --- src/utils/operandi_utils/hpc/nhr_executor_utils.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/utils/operandi_utils/hpc/nhr_executor_utils.py b/src/utils/operandi_utils/hpc/nhr_executor_utils.py index c7726514..5a31a778 100644 --- a/src/utils/operandi_utils/hpc/nhr_executor_utils.py +++ b/src/utils/operandi_utils/hpc/nhr_executor_utils.py @@ -6,10 +6,15 @@ def parse_slurm_job_state_from_output(output: List[str]) -> Tuple[StateJobSlurm, if not output: return StateJobSlurm.UNSET, "No output available, something is odd." if len(output) < 3: - return StateJobSlurm.UNSET, "The output has less than 3 lines, job not listed yet." - parsed_state: str = output[-2].split()[1] + return StateJobSlurm.UNSET, f"Less than 3 lines in the output: {output}" + try: + parsed_state: str = output[-2].split()[1] + if parsed_state.startswith("-"): + parsed_state: str = output[-1].split()[1] + except IndexError: + return StateJobSlurm.UNSET, f"Parsing error from output: {output}" try: state_job_slurm = StateJobSlurm(parsed_state) except ValueError: - return StateJobSlurm.UNSET, f"Unknown parsed state: {parsed_state}" + return StateJobSlurm.UNSET, f"Unknown parsed state: {parsed_state} from output: {output}" return state_job_slurm, "Parsed state recognized" From a9fda4b36ad9d5a34b0877fec2002af4faaadad3 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Thu, 29 Jan 2026 22:30:45 +0100 Subject: [PATCH 42/50] integration_test: reduce testing timeouts --- tests/integration_tests/test_full_cycle.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/tests/integration_tests/test_full_cycle.py b/tests/integration_tests/test_full_cycle.py index 96802bfb..a5d50273 100644 --- a/tests/integration_tests/test_full_cycle.py +++ b/tests/integration_tests/test_full_cycle.py @@ -12,20 +12,19 @@ OPERANDI_SERVER_BASE_DIR = environ.get("OPERANDI_SERVER_BASE_DIR") def check_job_till_finish(auth_harvester, operandi, workflow_job_id: str): - tries = 360 + tries = 120 job_status = None check_job_status_url = f"/workflow-job/{workflow_job_id}" while tries > 0: tries -= 1 - sleep(120) + sleep(15) response = operandi.get(url=check_job_status_url, auth=auth_harvester) assert_response_status_code(response.status_code, expected_floor=2) job_status = response.json()["job_state"] - if job_status == StateJob.HPC_SUCCESS: + if job_status in [StateJob.HPC_SUCCESS, StateJob.HPC_FAILED, StateJob.TRANSFERRING_FROM_HPC]: break - if job_status == StateJob.HPC_FAILED: - break - assert job_status == StateJob.HPC_SUCCESS + assert job_status is not None + return job_status def check_job_status_after_data_download(auth_harvester, operandi, workflow_job_id: str): check_job_status_url = f"/workflow-job/{workflow_job_id}" @@ -33,13 +32,14 @@ def check_job_status_after_data_download(auth_harvester, operandi, workflow_job_ assert_response_status_code(response.status_code, expected_floor=2) job_status = response.json()["job_state"] assert job_status == StateJob.SUCCESS + return job_status def download_workflow_job_logs(auth_harvester, operandi, workflow_job_id: str): - tries = 360 + tries = 120 get_log_zip_url = f"/workflow-job/{workflow_job_id}/logs" while tries > 0: tries -= 1 - sleep(120) + sleep(15) response = operandi.get(url=get_log_zip_url, auth=auth_harvester) if response.status_code != 200: continue @@ -61,8 +61,6 @@ def test_full_cycle(auth_harvester, operandi, service_broker, bytes_small_worksp service_broker.create_worker_process(RABBITMQ_QUEUE_HARVESTER, "submit_worker") # Create a background worker for the job statuses queue service_broker.create_worker_process(RABBITMQ_QUEUE_JOB_STATUSES, "status_worker") - # Create a background worker for the hpc download queue - service_broker.create_worker_process(RABBITMQ_QUEUE_HPC_DOWNLOADS, "download_worker") # Post a workspace zip response = operandi.post(url="/workspace", files={"workspace": bytes_small_workspace}, auth=auth_harvester) @@ -98,6 +96,10 @@ def test_full_cycle(auth_harvester, operandi, service_broker, bytes_small_worksp workflow_job_id = response.json()["resource_id"] check_job_till_finish(auth_harvester, operandi, workflow_job_id) + + # Create a background worker for the hpc download queue + service_broker.create_worker_process(RABBITMQ_QUEUE_HPC_DOWNLOADS, "download_worker") + download_workflow_job_logs(auth_harvester, operandi, workflow_job_id) ws_dir = Path(OPERANDI_SERVER_BASE_DIR, SERVER_WORKSPACES_ROUTER, workspace_id) From a3d27e10ffb5cb8404aeedcc652618381d5dce4d Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Thu, 29 Jan 2026 22:35:24 +0100 Subject: [PATCH 43/50] update: test requirements to latest versions --- tests/requirements.txt | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/requirements.txt b/tests/requirements.txt index 813e6d0f..1ef2bf66 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,11 +1,11 @@ -autopep8 -click +autopep8 >= 2.3.2 +click >= 8.3.1 generateDS == 2.35.20 -httpx >= 0.28.0 -pylint -pymongo >= 4.15.5 -pytest >= 7.0.0 -pytest-docker >= 1.0.0 -requests > 2.32.0 -twine +httpx >= 0.28.1 +pylint >= 4.0.4 +pymongo >= 4.16.0 +pytest >= 9.0.0 +pytest-docker >= 3.2.5 +requests > 2.32.5 +twine >= 6.2.0 wheel From b38e4c4b4b7f25d4cfb766b727f7d2a948172eb2 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Thu, 29 Jan 2026 22:37:35 +0100 Subject: [PATCH 44/50] use core v3.11.0 --- src/utils/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/pyproject.toml b/src/utils/pyproject.toml index 3815042f..50f0504a 100644 --- a/src/utils/pyproject.toml +++ b/src/utils/pyproject.toml @@ -17,7 +17,7 @@ dependencies = [ "clint==0.5.1", "loguru>=0.6.0", "httpx>=0.24.0", - "ocrd>=3.10.1", + "ocrd>=3.11.0", "paramiko>=3.4.0", "pika>=1.2.0", "pydantic>=2.12.5", From d271de6304ecd00bf8687b30d2f03bb25874690a Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Thu, 29 Jan 2026 22:43:41 +0100 Subject: [PATCH 45/50] adapt requirements --- src/broker/pyproject.toml | 5 +---- src/client/pyproject.toml | 5 +---- src/harvester/pyproject.toml | 5 +---- src/utils/pyproject.toml | 15 ++++++++------- 4 files changed, 11 insertions(+), 19 deletions(-) diff --git a/src/broker/pyproject.toml b/src/broker/pyproject.toml index c42feffe..b2aa651e 100644 --- a/src/broker/pyproject.toml +++ b/src/broker/pyproject.toml @@ -9,10 +9,7 @@ authors = [{name = "Mehmed Mustafa", email = "mehmed.mustafa@gwdg.de"}] license = {text = "Apache License 2.0"} description = "Broker for Operandi" requires-python = ">=3.10" -dependencies = [ - "operandi-utils>=2.24.0", - "requests>2.32.0" -] +dependencies = ["operandi-utils>=2.24.0"] [project.readme] file = "README.md" diff --git a/src/client/pyproject.toml b/src/client/pyproject.toml index 1568d8ec..a4947739 100644 --- a/src/client/pyproject.toml +++ b/src/client/pyproject.toml @@ -9,10 +9,7 @@ authors = [{name = "Mehmed Mustafa", email = "mehmed.mustafa@gwdg.de"}] license = {text = "Apache License 2.0"} description = "Client for Operandi" requires-python = ">=3.10" -dependencies = [ - "operandi-utils>=2.24.0", - "requests>2.32.0" -] +dependencies = ["operandi-utils>=2.24.0"] [project.readme] file = "README.md" diff --git a/src/harvester/pyproject.toml b/src/harvester/pyproject.toml index 73ac85af..24bfa3a9 100644 --- a/src/harvester/pyproject.toml +++ b/src/harvester/pyproject.toml @@ -9,10 +9,7 @@ authors = [{name = "Mehmed Mustafa", email = "mehmed.mustafa@gwdg.de"}] license = {text = "Apache License 2.0"} description = "Harvester for Operandi" requires-python = ">=3.10" -dependencies = [ - "operandi-utils>=2.24.0", - "requests>2.32.0" -] +dependencies = ["operandi-utils>=2.24.0"] [project.readme] file = "README.md" diff --git a/src/utils/pyproject.toml b/src/utils/pyproject.toml index 50f0504a..0cf7f321 100644 --- a/src/utils/pyproject.toml +++ b/src/utils/pyproject.toml @@ -13,17 +13,18 @@ dependencies = [ "aiofiles>=0.8.0", "beanie>=2.0.1", "chardet>=5.1.0", - "click>=7", - "clint==0.5.1", + "click>=8.3.1", + "clint>=0.5.1", "loguru>=0.6.0", - "httpx>=0.24.0", + "httpx>=0.28.1", "ocrd>=3.11.0", - "paramiko>=3.4.0", - "pika>=1.2.0", + "paramiko>=4.0.0", + "pika>=1.3.2", "pydantic>=2.12.5", - "pymongo >= 4.15.5", + "pymongo >= 4.16.0", "python-dotenv>=1.0.0", - "python-multipart>=0.0.5" + "python-multipart>=0.0.22", + "requests>=2.32.5" ] [project.readme] From a5eff57329063ba076a8a9eb2a0a2dc4e9ae5105 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Thu, 29 Jan 2026 22:55:11 +0100 Subject: [PATCH 46/50] fix: test_3_hpc_test_0 --- tests/tests_utils/test_3_hpc/test_0_nhr_executor_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tests_utils/test_3_hpc/test_0_nhr_executor_utils.py b/tests/tests_utils/test_3_hpc/test_0_nhr_executor_utils.py index 13d60cc7..565a9676 100644 --- a/tests/tests_utils/test_3_hpc/test_0_nhr_executor_utils.py +++ b/tests/tests_utils/test_3_hpc/test_0_nhr_executor_utils.py @@ -20,7 +20,7 @@ def test_parse_slurm_job_state_from_output_less_lines(): "'-------------------- -------------------- -------- \n'" ] slurm_job_state, msg = parse_slurm_job_state_from_output(test_output_out_of_memory) - assert msg == "The output has less than 3 lines, job not listed yet." + assert msg == f"Less than 3 lines in the output: {test_output_out_of_memory}" assert slurm_job_state == StateJobSlurm.UNSET @@ -70,5 +70,5 @@ def test_parse_slurm_job_state_from_output_invalid_state(): "'6313216.extern COMPLETED 0:0 \n'" ] slurm_job_state, msg = parse_slurm_job_state_from_output(test_output_out_of_memory) - assert msg == f"Unknown parsed state: OUT_OF_ME+" + assert msg == f"Unknown parsed state: OUT_OF_ME+ from output: {test_output_out_of_memory}" assert slurm_job_state == StateJobSlurm.UNSET From 53ec82328ab2e34234baf23f04feadc3e3972538 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Thu, 29 Jan 2026 22:55:24 +0100 Subject: [PATCH 47/50] reduce: noise from tests --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index ad9671b6..3228730b 100755 --- a/Makefile +++ b/Makefile @@ -115,19 +115,19 @@ run-tests: run-tests-utils run-tests-broker run-tests-server run-tests-harvester run-tests-utils: export $(shell sed 's/=.*//' ./tests/.env) - pytest tests/tests_utils/test_*/test_*.py -s -v + pytest tests/tests_utils/test_*/test_*.py -v run-tests-broker: export $(shell sed 's/=.*//' ./tests/.env) - pytest tests/tests_broker/test_*.py -s -v + pytest tests/tests_broker/test_*.py -v run-tests-harvester: export $(shell sed 's/=.*//' ./tests/.env) - pytest tests/tests_harvester/test_*.py -s -v + pytest tests/tests_harvester/test_*.py -v run-tests-server: export $(shell sed 's/=.*//' ./tests/.env) - pytest tests/tests_server/test_*.py -s -v + pytest tests/tests_server/test_*.py -v run-tests-integration: export $(shell sed 's/=.*//' ./tests/.env) From 6c1655dfd1042b8edc828a6450c431e397c1aa77 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Thu, 29 Jan 2026 22:56:57 +0100 Subject: [PATCH 48/50] fix: typo in test requirements --- tests/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/requirements.txt b/tests/requirements.txt index 1ef2bf66..ef361948 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -6,6 +6,6 @@ pylint >= 4.0.4 pymongo >= 4.16.0 pytest >= 9.0.0 pytest-docker >= 3.2.5 -requests > 2.32.5 +requests >= 2.32.5 twine >= 6.2.0 wheel From 6c94e5d52a3f14c9ccad911ee81ecffc4141d1be Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Thu, 29 Jan 2026 23:21:44 +0100 Subject: [PATCH 49/50] slow down checks of job status test to 1 min --- tests/integration_tests/test_full_cycle.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration_tests/test_full_cycle.py b/tests/integration_tests/test_full_cycle.py index a5d50273..fb635b85 100644 --- a/tests/integration_tests/test_full_cycle.py +++ b/tests/integration_tests/test_full_cycle.py @@ -17,7 +17,7 @@ def check_job_till_finish(auth_harvester, operandi, workflow_job_id: str): check_job_status_url = f"/workflow-job/{workflow_job_id}" while tries > 0: tries -= 1 - sleep(15) + sleep(60) response = operandi.get(url=check_job_status_url, auth=auth_harvester) assert_response_status_code(response.status_code, expected_floor=2) job_status = response.json()["job_state"] @@ -39,7 +39,7 @@ def download_workflow_job_logs(auth_harvester, operandi, workflow_job_id: str): get_log_zip_url = f"/workflow-job/{workflow_job_id}/logs" while tries > 0: tries -= 1 - sleep(15) + sleep(60) response = operandi.get(url=get_log_zip_url, auth=auth_harvester) if response.status_code != 200: continue From 982eaa921c9fb399ce70fed946bac7a1c5bc0992 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Thu, 29 Jan 2026 23:22:01 +0100 Subject: [PATCH 50/50] remove: feature flags from rabbitmq docker compose --- docker-compose.yml | 1 - docker-compose_image_based.yml | 1 - 2 files changed, 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index af5e8c58..3b31e2ca 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -48,7 +48,6 @@ services: environment: - RABBITMQ_SERVER_ERL_ARGS=-rabbitmq_management load_definitions "/rmq_definitions.json" - RABBITMQ_SERVER_ADDITIONAL_ERL_ARGS=-rabbit peer_discovery_backend none - - RABBITMQ_FEATURE_FLAGS=quorum_queue,implicit_default_bindings,classic_mirrored_queue_version,restart_streams networks: - operandi healthcheck: diff --git a/docker-compose_image_based.yml b/docker-compose_image_based.yml index d24ff7cd..0d71ec8e 100644 --- a/docker-compose_image_based.yml +++ b/docker-compose_image_based.yml @@ -48,7 +48,6 @@ services: environment: - RABBITMQ_SERVER_ERL_ARGS='-rabbitmq_management load_definitions "/rmq_definitions.json"' - RABBITMQ_SERVER_ADDITIONAL_ERL_ARGS='-rabbit peer_discovery_backend none' - - RABBITMQ_FEATURE_FLAGS=quorum_queue,implicit_default_bindings,classic_mirrored_queue_version,restart_streams networks: - operandi healthcheck: