diff --git a/.github/workflows/ci_cd.yml b/.github/workflows/ci_cd.yml index 3fdf71ca..381b202d 100644 --- a/.github/workflows/ci_cd.yml +++ b/.github/workflows/ci_cd.yml @@ -31,9 +31,9 @@ jobs: build-native: name: Native build of Operandi modules strategy: - fail-fast: true + fail-fast: false matrix: - python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ] + python-version: [ "3.10", "3.11", "3.12", "3.13" ] os: [ ubuntu-latest ] runs-on: ${{ matrix.os }} @@ -74,7 +74,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.8" ] + python-version: [ "3.12" ] os: [ ubuntu-latest ] runs-on: ${{ matrix.os }} diff --git a/Makefile b/Makefile index 15748731..3228730b 100755 --- a/Makefile +++ b/Makefile @@ -109,7 +109,7 @@ start-server-native: start-harvester-dummy: export $(shell sed 's/=.*//' ./.env) - operandi-harvester start-dummy --address http://localhost + operandi-harvester start-dummy --address http://localhost:8000 run-tests: run-tests-utils run-tests-broker run-tests-server run-tests-harvester run-tests-integration @@ -119,7 +119,7 @@ run-tests-utils: run-tests-broker: export $(shell sed 's/=.*//' ./tests/.env) - pytest tests/tests_broker/test_*.py -s -v + pytest tests/tests_broker/test_*.py -v run-tests-harvester: export $(shell sed 's/=.*//' ./tests/.env) diff --git a/docker-compose.yml b/docker-compose.yml index 856fea71..3b31e2ca 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3.8' - networks: operandi: name: operandi @@ -28,9 +26,13 @@ services: - operandi operandi-rabbitmq: - image: "rabbitmq:3.12-management" + image: "rabbitmq:4.2-management" container_name: operandi-rabbitmq hostname: rabbit-mq-host + ulimits: + nofile: + soft: 65536 + hard: 65536 ports: - "5672:5672" - "15672:15672" @@ -45,7 +47,7 @@ services: restart: on-failure environment: - RABBITMQ_SERVER_ERL_ARGS=-rabbitmq_management load_definitions "/rmq_definitions.json" - - RABBITMQ_FEATURE_FLAGS=quorum_queue,implicit_default_bindings,classic_mirrored_queue_version + - RABBITMQ_SERVER_ADDITIONAL_ERL_ARGS=-rabbit peer_discovery_backend none networks: - operandi healthcheck: diff --git a/docker-compose_image_based.yml b/docker-compose_image_based.yml index b68940c8..0d71ec8e 100644 --- a/docker-compose_image_based.yml +++ b/docker-compose_image_based.yml @@ -1,5 +1,3 @@ -version: '3.8' - networks: operandi: name: operandi @@ -28,9 +26,13 @@ services: - operandi operandi-rabbitmq: - image: "rabbitmq:3.12-management" + image: "rabbitmq:4.2-management" container_name: operandi-rabbitmq hostname: rabbit-mq-host + ulimits: + nofile: + soft: 65536 + hard: 65536 ports: - "5672:5672" - "15672:15672" @@ -44,8 +46,8 @@ services: target: /rmq_definitions.json restart: on-failure environment: - - RABBITMQ_SERVER_ERL_ARGS=-rabbitmq_management load_definitions "/rmq_definitions.json" - - RABBITMQ_FEATURE_FLAGS=quorum_queue,implicit_default_bindings,classic_mirrored_queue_version + - RABBITMQ_SERVER_ERL_ARGS='-rabbitmq_management load_definitions "/rmq_definitions.json"' + - RABBITMQ_SERVER_ADDITIONAL_ERL_ARGS='-rabbit peer_discovery_backend none' networks: - operandi healthcheck: diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..2f4c80e3 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +asyncio_mode = auto diff --git a/src/Dockerfile_broker b/src/Dockerfile_broker index 5a8bdef7..1784905c 100644 --- a/src/Dockerfile_broker +++ b/src/Dockerfile_broker @@ -1,38 +1,63 @@ -FROM ubuntu:22.04 +# ================================ +# Builder stage: build all Python packages +# ================================ +FROM python:3.11-slim as builder -MAINTAINER OPERANDI -ENV DEBIAN_FRONTEND noninteractive -ENV PYTHONIOENCODING utf8 -ENV LC_ALL=C.UTF-8 -ENV LANG=C.UTF-8 +LABEL maintainer="OPERANDI" -COPY broker/ /usr/src/broker -COPY utils/ /usr/src/utils +ENV PYTHONIOENCODING=utf8 \ + LC_ALL=C.UTF-8 \ + LANG=C.UTF-8 \ + DEBIAN_FRONTEND=noninteractive -WORKDIR /usr/src/broker/operandi_broker +# Install system build dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + git \ + curl \ + make \ + wget \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /usr/src + +# Copy and install utils first (for caching) +COPY utils/ ./utils +RUN python3 -m pip install --upgrade pip setuptools wheel \ + && pip wheel --no-deps --wheel-dir /wheels ./utils + +# Copy and install broker +COPY broker/ ./broker +RUN pip wheel --no-deps --wheel-dir /wheels ./broker + +# ================================ +# Final stage: minimal runtime +# ================================ +FROM python:3.11-slim + +ENV PYTHONIOENCODING=utf8 \ + LC_ALL=C.UTF-8 \ + LANG=C.UTF-8 -# install dependencies -RUN apt-get update && apt-get install -y \ - apt-transport-https \ - ca-certificates \ - curl \ - git \ - gnupg-agent \ - make \ - python3 \ - python3-dev \ - python3-pip \ - python3-venv \ - software-properties-common \ - sudo \ - time \ - wget - -RUN python3 -m pip install --upgrade pip setuptools -RUN pip3 install -U pip wheel -RUN python3 -m pip install -r /usr/src/utils/requirements.txt --ignore-installed -RUN pip3 install /usr/src/utils -RUN python3 -m pip install -r /usr/src/broker/requirements.txt --ignore-installed -RUN pip3 install /usr/src/broker +WORKDIR /usr/src +# Install runtime dependencies only (no build tools needed) +RUN apt-get update && apt-get install -y --no-install-recommends \ + git \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Copy wheels from builder and install +COPY --from=builder /wheels /wheels +RUN python3 -m pip install --upgrade pip setuptools wheel \ + && pip install /wheels/* + +# Copy broker code (optional if entry points are installed) +COPY broker/ ./broker +WORKDIR /usr/src/broker/operandi_broker + +# Build success indicator RUN echo "Operandi broker build success" + +# Optional default command +CMD ["python3", "-m", "operandi_broker", "--version"] diff --git a/src/Dockerfile_server b/src/Dockerfile_server index 9ebc40b0..3cd453e4 100644 --- a/src/Dockerfile_server +++ b/src/Dockerfile_server @@ -1,35 +1,63 @@ -FROM ubuntu:22.04 +# ================================ +# Builder stage: build all Python packages +# ================================ +FROM python:3.11-slim as builder -MAINTAINER OPERANDI -ENV DEBIAN_FRONTEND noninteractive -ENV PYTHONIOENCODING utf8 -ENV LC_ALL=C.UTF-8 -ENV LANG=C.UTF-8 +LABEL maintainer="OPERANDI" -COPY server/ /usr/src/server -COPY utils/ /usr/src/utils +ENV PYTHONIOENCODING=utf8 \ + LC_ALL=C.UTF-8 \ + LANG=C.UTF-8 \ + DEBIAN_FRONTEND=noninteractive -WORKDIR /usr/src/server/operandi_server +# Install system build dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + git \ + curl \ + make \ + wget \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /usr/src + +# Copy and install utils first (for caching) +COPY utils/ ./utils +RUN python3 -m pip install --upgrade pip setuptools wheel \ + && pip wheel --no-deps --wheel-dir /wheels ./utils + +# Copy and install server +COPY server/ ./server +RUN pip wheel --no-deps --wheel-dir /wheels ./server + +# ================================ +# Final stage: minimal runtime +# ================================ +FROM python:3.11-slim + +ENV PYTHONIOENCODING=utf8 \ + LC_ALL=C.UTF-8 \ + LANG=C.UTF-8 -# install dependencies -RUN apt-get update && apt-get install -y \ - curl \ - git \ - make \ - python3 \ - python3-dev \ - python3-pip \ - python3-venv \ - software-properties-common \ - sudo \ - time \ - wget - -RUN python3 -m pip install --upgrade pip setuptools -RUN pip3 install -U pip wheel -RUN python3 -m pip install -r /usr/src/utils/requirements.txt --ignore-installed -RUN pip3 install /usr/src/utils -RUN python3 -m pip install -r /usr/src/server/requirements.txt --ignore-installed -RUN pip3 install /usr/src/server +WORKDIR /usr/src +# Install runtime dependencies only (no build tools needed) +RUN apt-get update && apt-get install -y --no-install-recommends \ + git \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Copy wheels from builder and install +COPY --from=builder /wheels /wheels +RUN python3 -m pip install --upgrade pip setuptools wheel \ + && pip install /wheels/* + +# Copy server code (optional if entry points are installed) +COPY server/ ./server +WORKDIR /usr/src/server/operandi_server + +# Build success indicator RUN echo "Operandi server build success" + +# Optional default command +CMD ["python3", "-m", "operandi_server", "--version"] diff --git a/src/broker/operandi_broker/job_worker_download.py b/src/broker/operandi_broker/job_worker_download.py index c6841140..63355ea3 100644 --- a/src/broker/operandi_broker/job_worker_download.py +++ b/src/broker/operandi_broker/job_worker_download.py @@ -30,7 +30,7 @@ def _consumed_msg_callback(self, ch, method, properties, body): consumed_message = loads(body) self.log.info(f"Consumed message: {consumed_message}") self.current_message_job_id = consumed_message["job_id"] - previous_job_state = consumed_message["previous_job_state"] + previous_job_state: StateJob = StateJob(consumed_message["previous_job_state"]) except Exception as error: self.log.warning(f"Parsing the consumed message has failed: {error}") self._handle_msg_failure(interruption=False) @@ -63,8 +63,11 @@ def _consumed_msg_callback(self, ch, method, properties, body): try: # TODO: Refactor this block of code since nothing is downloaded from the HPC when job fails. + self.log.info(f"Previous job state: {previous_job_state}") if previous_job_state == StateJob.HPC_SUCCESS: + self.log.info(f"Downloading slurm job log file of succeeded: {slurm_job_id}") self.hpc_io_transfer.download_slurm_job_log_file(slurm_job_id, job_dir) + self.log.info(f"Downloading results of succeeded workflow job: {job_dir}") self.__download_results_from_hpc(job_dir=job_dir, workspace_dir=ws_dir) self.log.info(f"Setting new workspace state `{StateWorkspace.READY}` of workspace_id: {workspace_id}") updated_file_groups = self.__extract_updated_file_groups(db_workspace=db_workspace) @@ -82,8 +85,10 @@ def _consumed_msg_callback(self, ch, method, properties, body): sync_db_update_workflow_job(find_job_id=self.current_message_job_id, job_state=StateJob.SUCCESS) self.log.info(f"Setting new workflow job state `{StateJob.SUCCESS}`" f" of job_id: {self.current_message_job_id}") - if previous_job_state == StateJob.HPC_FAILED: + elif previous_job_state == StateJob.HPC_FAILED: + self.log.info(f"Downloading slurm job log file of failed: {slurm_job_id}") self.hpc_io_transfer.download_slurm_job_log_file(slurm_job_id, job_dir) + self.log.info(f"Skipping downloading results of failed workflow job: {job_dir}") self.log.info(f"Setting new workspace state `{StateWorkspace.READY}` of workspace_id: {workspace_id}") db_workspace: DBWorkspace = sync_db_update_workspace( find_workspace_id=workspace_id, state=StateWorkspace.READY) @@ -99,13 +104,17 @@ def _consumed_msg_callback(self, ch, method, properties, body): sync_db_update_workflow_job(find_job_id=self.current_message_job_id, job_state=StateJob.FAILED) self.log.info(f"Setting new workflow job state `{StateJob.FAILED}`" f" of job_id: {self.current_message_job_id}") + elif previous_job_state == StateJob.TRANSFERRING_FROM_HPC: + self.log.warning("Another worker instance is already downloading or has downloaded") + else: + self.log.warning(f"State not processable: {previous_job_state}") except Exception as error: self.log.warning(f"{error}") self._handle_msg_failure(interruption=False) return self.has_consumed_message = False - self.log.debug(f"Ack delivery tag: {self.current_message_delivery_tag}") + self.log.info(f"Ack delivery tag: {self.current_message_delivery_tag}") ch.basic_ack(delivery_tag=method.delivery_tag) @override @@ -119,7 +128,7 @@ def _handle_msg_failure(self, interruption: bool): self.rmq_consumer.ack_message(delivery_tag=self.current_message_delivery_tag) return - self.log.debug(f"Ack delivery tag: {self.current_message_delivery_tag}") + self.log.info(f"Ack delivery tag: {self.current_message_delivery_tag}") self.rmq_consumer.ack_message(delivery_tag=self.current_message_delivery_tag) # Reset the current message related parameters diff --git a/src/broker/operandi_broker/job_worker_status.py b/src/broker/operandi_broker/job_worker_status.py index 35996e63..0d4bbb3f 100644 --- a/src/broker/operandi_broker/job_worker_status.py +++ b/src/broker/operandi_broker/job_worker_status.py @@ -17,8 +17,8 @@ def __init__(self, db_url, rabbitmq_url, queue_name): @override def _consumed_msg_callback(self, ch, method, properties, body): - self.log.debug(f"ch: {ch}, method: {method}, properties: {properties}, body: {body}") - self.log.debug(f"Consumed message: {body}") + self.log.info(f"ch: {ch}, method: {method}, properties: {properties}, body: {body}") + self.log.info(f"Consumed message: {body}") self.current_message_delivery_tag = method.delivery_tag self.has_consumed_message = True @@ -55,7 +55,7 @@ def _consumed_msg_callback(self, ch, method, properties, body): return self.has_consumed_message = False - self.log.debug(f"Ack delivery tag: {self.current_message_delivery_tag}") + self.log.info(f"Ack delivery tag: {self.current_message_delivery_tag}") ch.basic_ack(delivery_tag=method.delivery_tag) @override @@ -69,7 +69,7 @@ def _handle_msg_failure(self, interruption: bool): self.rmq_consumer.ack_message(delivery_tag=self.current_message_delivery_tag) return - self.log.debug(f"Ack delivery tag: {self.current_message_delivery_tag}") + self.log.info(f"Ack delivery tag: {self.current_message_delivery_tag}") self.rmq_consumer.ack_message(delivery_tag=self.current_message_delivery_tag) # Reset the current message related parameters @@ -108,7 +108,7 @@ def __handle_hpc_and_workflow_states( if new_job_state == StateJob.HPC_SUCCESS or new_job_state == StateJob.HPC_FAILED: sync_db_update_workspace(find_workspace_id=workspace_id, state=StateWorkspace.TRANSFERRING_FROM_HPC) sync_db_update_workflow_job(find_job_id=job_id, job_state=StateJob.TRANSFERRING_FROM_HPC) - result_download_message = {"job_id": f"{job_id}", "previous_job_state": f"{new_job_state}"} + result_download_message = {"job_id": f"{job_id}", "previous_job_state": f"{new_job_state.value}"} self.log.info(f"Encoding the result download RabbitMQ message: {result_download_message}") encoded_result_download_message = dumps(result_download_message).encode(encoding="utf-8") self.rmq_publisher.publish_to_queue( diff --git a/src/broker/pyproject.toml b/src/broker/pyproject.toml new file mode 100644 index 00000000..b2aa651e --- /dev/null +++ b/src/broker/pyproject.toml @@ -0,0 +1,25 @@ +[build-system] +requires = ["setuptools>=69.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "operandi-broker" +version = "2.24.0" +authors = [{name = "Mehmed Mustafa", email = "mehmed.mustafa@gwdg.de"}] +license = {text = "Apache License 2.0"} +description = "Broker for Operandi" +requires-python = ">=3.10" +dependencies = ["operandi-utils>=2.24.0"] + +[project.readme] +file = "README.md" +content-type = "text/markdown" + +[project.scripts] +operandi-broker = "operandi_broker:cli" + +[tool.setuptools] +package-dir = {"" = "."} + +[tool.setuptools.packages.find] +include = ["operandi_broker*"] diff --git a/src/broker/requirements.txt b/src/broker/requirements.txt deleted file mode 100644 index 4624e139..00000000 --- a/src/broker/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -requests>2.32.0 diff --git a/src/broker/setup.py b/src/broker/setup.py deleted file mode 100644 index e1589d68..00000000 --- a/src/broker/setup.py +++ /dev/null @@ -1,26 +0,0 @@ -# -*- coding: utf-8 -*- -from setuptools import setup -from operandi_utils.constants import OPERANDI_VERSION - -install_requires = open('requirements.txt').read().split('\n') -install_requires.append(f'operandi_utils == {OPERANDI_VERSION}') - -setup( - name='operandi_broker', - version=OPERANDI_VERSION, - description='OPERANDI - Service Broker', - long_description=open('README.md').read(), - long_description_content_type='text/markdown', - author='Mehmed Mustafa', - author_email='mehmed.mustafa@gwdg.de', - url='https://github.com/subugoe/operandi', - license='Apache License 2.0', - packages=['operandi_broker'], - package_data={}, - install_requires=install_requires, - entry_points={ - 'console_scripts': [ - 'operandi-broker=operandi_broker:cli', - ] - }, -) diff --git a/src/client/operandi_client/constants.py b/src/client/operandi_client/constants.py index ec9a2212..5c511181 100644 --- a/src/client/operandi_client/constants.py +++ b/src/client/operandi_client/constants.py @@ -10,6 +10,6 @@ # Time waited between each workflow job status check WAIT_TIME_BETWEEN_POLLS: int = 120 # seconds # Times to perform workflow job status checks before timeout -TRIES_TILL_TIMEOUT: int = 30 +TRIES_TILL_TIMEOUT: int = 600 USE_WORKSPACE_FILE_GROUP = "DEFAULT" diff --git a/src/client/pyproject.toml b/src/client/pyproject.toml new file mode 100644 index 00000000..a4947739 --- /dev/null +++ b/src/client/pyproject.toml @@ -0,0 +1,25 @@ +[build-system] +requires = ["setuptools>=69.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "operandi-client" +version = "2.24.0" +authors = [{name = "Mehmed Mustafa", email = "mehmed.mustafa@gwdg.de"}] +license = {text = "Apache License 2.0"} +description = "Client for Operandi" +requires-python = ">=3.10" +dependencies = ["operandi-utils>=2.24.0"] + +[project.readme] +file = "README.md" +content-type = "text/markdown" + +[project.scripts] +operandi-client = "operandi_client:cli" + +[tool.setuptools] +package-dir = {"" = "."} + +[tool.setuptools.packages.find] +include = ["operandi_client*"] diff --git a/src/client/requirements.txt b/src/client/requirements.txt deleted file mode 100644 index 722a3854..00000000 --- a/src/client/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -requests>2.32.0 \ No newline at end of file diff --git a/src/client/setup.py b/src/client/setup.py deleted file mode 100644 index efb2f192..00000000 --- a/src/client/setup.py +++ /dev/null @@ -1,26 +0,0 @@ -# -*- coding: utf-8 -*- -from setuptools import setup -from operandi_utils.constants import OPERANDI_VERSION - -install_requires = open('requirements.txt').read().split('\n') -install_requires.append(f'operandi_utils == {OPERANDI_VERSION}') - -setup( - name='operandi_client', - version=OPERANDI_VERSION, - description='OPERANDI - Client', - long_description=open('README.md').read(), - long_description_content_type='text/markdown', - author='Mehmed Mustafa', - author_email='mehmed.mustafa@gwdg.de', - url='https://github.com/subugoe/operandi', - license='Apache License 2.0', - packages=['operandi_client'], - package_data={'': []}, - install_requires=install_requires, - entry_points={ - 'console_scripts': [ - 'operandi-client=operandi_client:cli', - ] - }, -) diff --git a/src/harvester/operandi_harvester/constants.py b/src/harvester/operandi_harvester/constants.py index 4d7e8164..2d7df395 100644 --- a/src/harvester/operandi_harvester/constants.py +++ b/src/harvester/operandi_harvester/constants.py @@ -23,8 +23,8 @@ # Time waited between the POST requests to the OPERANDI Server WAIT_TIME_BETWEEN_SUBMITS: int = 15 # seconds # Time waited between each workflow job status check -WAIT_TIME_BETWEEN_POLLS: int = 15 # seconds +WAIT_TIME_BETWEEN_POLLS: int = 120 # seconds # Times to perform workflow job status checks before timeout -TRIES_TILL_TIMEOUT: int = 30 +TRIES_TILL_TIMEOUT: int = 600 USE_WORKSPACE_FILE_GROUP = "DEFAULT" diff --git a/src/harvester/pyproject.toml b/src/harvester/pyproject.toml new file mode 100644 index 00000000..24bfa3a9 --- /dev/null +++ b/src/harvester/pyproject.toml @@ -0,0 +1,33 @@ +[build-system] +requires = ["setuptools>=69.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "operandi-harvester" +version = "2.24.0" +authors = [{name = "Mehmed Mustafa", email = "mehmed.mustafa@gwdg.de"}] +license = {text = "Apache License 2.0"} +description = "Harvester for Operandi" +requires-python = ">=3.10" +dependencies = ["operandi-utils>=2.24.0"] + +[project.readme] +file = "README.md" +content-type = "text/markdown" + +[project.scripts] +operandi-harvester = "operandi_harvester:cli" + +[tool.setuptools] +package-dir = {"" = "."} + +[tool.setuptools.packages.find] +include = ["operandi_harvester*"] + +[tool.setuptools.package-data] +operandi_utils = [ + "assets/*.ocrd.zip", + "assets/*.json", + "assets/*.txt", + "assets/*.nf" +] diff --git a/src/harvester/requirements.txt b/src/harvester/requirements.txt deleted file mode 100644 index 4624e139..00000000 --- a/src/harvester/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -requests>2.32.0 diff --git a/src/harvester/setup.py b/src/harvester/setup.py deleted file mode 100644 index 14e2409c..00000000 --- a/src/harvester/setup.py +++ /dev/null @@ -1,26 +0,0 @@ -# -*- coding: utf-8 -*- -from setuptools import setup -from operandi_utils.constants import OPERANDI_VERSION - -install_requires = open('requirements.txt').read().split('\n') -install_requires.append(f'operandi_utils == {OPERANDI_VERSION}') - -setup( - name='operandi_harvester', - version=OPERANDI_VERSION, - description='OPERANDI - Harvester', - long_description=open('README.md').read(), - long_description_content_type='text/markdown', - author='Mehmed Mustafa', - author_email='mehmed.mustafa@gwdg.de', - url='https://github.com/subugoe/operandi', - license='Apache License 2.0', - packages=['operandi_harvester'], - package_data={'': ['assets/*.ocrd.zip', 'assets/*.json', 'assets/*.txt', 'assets/*.nf']}, - install_requires=install_requires, - entry_points={ - 'console_scripts': [ - 'operandi-harvester=operandi_harvester:cli', - ] - }, -) diff --git a/src/server/operandi_server/models/base.py b/src/server/operandi_server/models/base.py index 9d5a9a1f..f1e75d60 100644 --- a/src/server/operandi_server/models/base.py +++ b/src/server/operandi_server/models/base.py @@ -1,10 +1,11 @@ from datetime import datetime -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field from typing import Optional from operandi_utils.hpc.constants import HPC_NHR_JOB_DEFAULT_PARTITION from ..constants import DEFAULT_FILE_GRP, DEFAULT_METS_BASENAME class Resource(BaseModel): + model_config = ConfigDict(validate_by_name=True) user_id: str = Field(..., description="The unique id of the user who created the resource") resource_id: str = Field(..., description="The unique id of the resource") resource_url: str = Field(..., description="The unique URL of the resource") @@ -12,39 +13,28 @@ class Resource(BaseModel): datetime: datetime deleted: bool - class Config: - allow_population_by_field_name = True - class WorkflowArguments(BaseModel): + model_config = ConfigDict(validate_by_name=True) workspace_id: str input_file_grp: Optional[str] = DEFAULT_FILE_GRP remove_file_grps: Optional[str] = "" preserve_file_grps: Optional[str] = "" mets_name: Optional[str] = DEFAULT_METS_BASENAME - class Config: - allow_population_by_field_name = True - class SbatchArguments(BaseModel): + model_config = ConfigDict(validate_by_name=True) partition: str = HPC_NHR_JOB_DEFAULT_PARTITION # partition to be used cpus: int = 4 # cpus per job allocated by default ram: int = 64 # RAM (in GB) per job allocated by default - class Config: - allow_population_by_field_name = True - class OlahdUploadArguments(BaseModel): + model_config = ConfigDict(validate_by_name=True) username: str password: str endpoint: str - class Config: - allow_population_by_field_name = True - class MetsUrlRequest(BaseModel): + model_config = ConfigDict(validate_by_name=True) mets_url: str = Field(..., description="The mets url") preserve_file_grps: str = Field(..., description="The file groups to be preserved") mets_basename: str = Field(default=DEFAULT_METS_BASENAME, description="The mets file basename") - - class Config: - allow_population_by_field_name = True diff --git a/src/server/operandi_server/models/discovery.py b/src/server/operandi_server/models/discovery.py index cc0b1d57..ea38ab68 100644 --- a/src/server/operandi_server/models/discovery.py +++ b/src/server/operandi_server/models/discovery.py @@ -1,7 +1,8 @@ -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field class PYDiscovery(BaseModel): + model_config = ConfigDict(validate_by_name=True) ram: float = Field(default=0.0, description="All available RAM in bytes") cpu_cores: int = Field(default=0, description="Number of available CPU cores") has_cuda: bool = Field(default=False, description="Whether deployment supports NVIDIA's CUDA") @@ -10,6 +11,3 @@ class PYDiscovery(BaseModel): ocrd_all_version: str = Field( default="Ocrd all version not detected", description="Git tag of the ocrd_all version implemented") has_docker: bool = Field(default=False, description="Whether the OCR-D executables run in a Docker container") - - class Config: - allow_population_by_field_name = True diff --git a/src/server/operandi_server/models/user.py b/src/server/operandi_server/models/user.py index b2edd35a..2779a948 100644 --- a/src/server/operandi_server/models/user.py +++ b/src/server/operandi_server/models/user.py @@ -1,9 +1,10 @@ -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field from operandi_utils.constants import AccountType from operandi_utils.database.models import DBUserAccount class PYUserAction(BaseModel): + model_config = ConfigDict(validate_by_name=True) institution_id: str = Field(..., description="Institution id of the user") user_id: str = Field(..., description="Unique id of the user") email: str = Field(..., description="Email linked to this User") @@ -12,9 +13,6 @@ class PYUserAction(BaseModel): details: str = Field(..., description="More details about the account") action: str = Field(..., description="Description of the user action") - class Config: - allow_population_by_field_name = True - @staticmethod def from_db_user_account(action: str, db_user_account: DBUserAccount): return PYUserAction( @@ -29,6 +27,7 @@ def from_db_user_account(action: str, db_user_account: DBUserAccount): class PYUserInfo(BaseModel): + model_config = ConfigDict(validate_by_name=True) institution_id: str = Field(..., description="Institution id of the user") user_id: str = Field(..., description="Unique id of the user") email: str = Field(..., description="Email linked to this User") @@ -36,9 +35,6 @@ class PYUserInfo(BaseModel): approved_user: bool = Field(False, description="Whether the account was admin approved and fully functional") details: str = Field(..., description="More details about the account") - class Config: - allow_population_by_field_name = True - @staticmethod def from_db_user_account(db_user_account: DBUserAccount): return PYUserInfo( diff --git a/src/server/operandi_server/models/workflow.py b/src/server/operandi_server/models/workflow.py index ff0c1f81..377a2c2c 100644 --- a/src/server/operandi_server/models/workflow.py +++ b/src/server/operandi_server/models/workflow.py @@ -18,9 +18,6 @@ class WorkflowRsrc(Resource): executable_steps: List[str] producible_file_groups: List[str] - class Config: - allow_population_by_field_name = True - @staticmethod def from_db_workflow(db_workflow: DBWorkflow): return WorkflowRsrc( @@ -48,9 +45,6 @@ class WorkflowJobRsrc(Resource): workflow_rsrc: Optional[WorkflowRsrc] workspace_rsrc: Optional[WorkspaceRsrc] - class Config: - allow_population_by_field_name = True - @staticmethod def from_db_workflow_job(db_workflow_job: DBWorkflowJob, db_workflow: DBWorkflow, db_workspace: DBWorkspace): return WorkflowJobRsrc( diff --git a/src/server/operandi_server/models/workspace.py b/src/server/operandi_server/models/workspace.py index e083ea2e..39d5b57b 100644 --- a/src/server/operandi_server/models/workspace.py +++ b/src/server/operandi_server/models/workspace.py @@ -22,9 +22,6 @@ class WorkspaceRsrc(Resource): mets_basename: Optional[str] bag_info_adds: Optional[dict] - class Config: - allow_population_by_field_name = True - @staticmethod def from_db_workspace(db_workspace: DBWorkspace): return WorkspaceRsrc( diff --git a/src/server/pyproject.toml b/src/server/pyproject.toml new file mode 100644 index 00000000..2131f370 --- /dev/null +++ b/src/server/pyproject.toml @@ -0,0 +1,28 @@ +[build-system] +requires = ["setuptools>=69.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "operandi-server" +version = "2.24.0" +authors = [{name = "Mehmed Mustafa", email = "mehmed.mustafa@gwdg.de"}] +license = {text = "Apache License 2.0"} +description = "Server for Operandi" +requires-python = ">=3.10" +dependencies = [ + "operandi-utils>=2.24.0", + "fastapi>=0.95.0", + "uvicorn==0.21.0", + "psutil" +] + +[project.readme] +file = "README.md" +content-type = "text/markdown" + +[project.scripts] +operandi-server = "operandi_server:cli" + +[tool.setuptools] +packages = { find = {} } +include-package-data = true diff --git a/src/server/requirements.txt b/src/server/requirements.txt deleted file mode 100644 index b3f268d9..00000000 --- a/src/server/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -fastapi>=0.95.0 -uvicorn==0.21.0 -psutil diff --git a/src/server/setup.py b/src/server/setup.py deleted file mode 100644 index 8609a120..00000000 --- a/src/server/setup.py +++ /dev/null @@ -1,30 +0,0 @@ -# -*- coding: utf-8 -*- -from setuptools import setup -from operandi_utils.constants import OPERANDI_VERSION - -install_requires = open('requirements.txt').read().split('\n') -install_requires.append(f'operandi_utils == {OPERANDI_VERSION}') - -setup( - name='operandi_server', - version=OPERANDI_VERSION, - description='OPERANDI - Server', - long_description=open('README.md').read(), - long_description_content_type='text/markdown', - author='Mehmed Mustafa', - author_email='mehmed.mustafa@gwdg.de', - url='https://github.com/subugoe/operandi', - license='Apache License 2.0', - packages=[ - 'operandi_server', - 'operandi_server.models', - 'operandi_server.routers' - ], - package_data={}, - install_requires=install_requires, - entry_points={ - 'console_scripts': [ - 'operandi-server=operandi_server:cli', - ] - }, -) diff --git a/src/utils/operandi_utils/database/base.py b/src/utils/operandi_utils/database/base.py index 1984cca2..8c7a6ed3 100644 --- a/src/utils/operandi_utils/database/base.py +++ b/src/utils/operandi_utils/database/base.py @@ -1,7 +1,7 @@ from logging import getLogger from os import environ from beanie import init_beanie -from motor.motor_asyncio import AsyncIOMotorClient +from pymongo import AsyncMongoClient from operandi_utils import call_sync from .models import DBHPCSlurmJob, DBUserAccount, DBWorkflow, DBWorkflowJob, DBWorkspace @@ -37,9 +37,10 @@ async def db_initiate_database( DBWorkflowJob, DBWorkspace ] - client = AsyncIOMotorClient(db_url) + client = AsyncMongoClient(db_url) + db = client.get_default_database(default=db_name) # Documentation: https://beanie-odm.dev/ - await init_beanie(database=client.get_default_database(default=db_name), document_models=doc_models) + await init_beanie(database=db, document_models=doc_models) @call_sync diff --git a/src/utils/operandi_utils/database/models.py b/src/utils/operandi_utils/database/models.py index 951d4dfb..32f63d90 100644 --- a/src/utils/operandi_utils/database/models.py +++ b/src/utils/operandi_utils/database/models.py @@ -32,7 +32,7 @@ class DBUserAccount(Document): approved_user: bool = False deleted: bool = False datetime: Optional[datetime] - details: Optional[str] + details: Optional[str] = "User Account" class Settings: name = "user_accounts" @@ -57,11 +57,11 @@ class DBHPCSlurmJob(Document): workflow_job_id: str hpc_slurm_job_id: str hpc_slurm_job_state: StateJobSlurm = StateJobSlurm.UNSET - hpc_batch_script_path: Optional[str] - hpc_slurm_workspace_path: Optional[str] + hpc_batch_script_path: Optional[str] = "UNSET" + hpc_slurm_workspace_path: Optional[str] = "UNSET" deleted: bool = False datetime: Optional[datetime] - details: Optional[str] + details: Optional[str] = "HPC-Slurm-Job" class Settings: name = "hpc_slurm_jobs" @@ -93,7 +93,7 @@ class DBWorkflow(Document): producible_file_groups: List[str] deleted: bool = False datetime: Optional[datetime] - details: Optional[str] + details: Optional[str] = "Workflow" class Settings: name = "workflows" @@ -123,12 +123,12 @@ class DBWorkflowJob(Document): workflow_id: str workspace_id: str job_state: StateJob = StateJob.UNSET - workflow_dir: Optional[str] - workspace_dir: Optional[str] - hpc_slurm_job_id: Optional[str] + workflow_dir: Optional[str] = "UNSET" + workspace_dir: Optional[str] = "UNSET" + hpc_slurm_job_id: Optional[str] = "UNSET" deleted: bool = False datetime: Optional[datetime] - details: Optional[str] + details: Optional[str] = "Workflow-Job" class Settings: name = "workflow_jobs" @@ -164,14 +164,14 @@ class DBWorkspace(Document): pages_amount: int file_groups: List[str] state: StateWorkspace = StateWorkspace.UNSET - ocrd_identifier: Optional[str] - bagit_profile_identifier: Optional[str] - ocrd_base_version_checksum: Optional[str] - mets_basename: Optional[str] - bag_info_adds: Optional[dict] + ocrd_identifier: Optional[str] = "UNSET" + bagit_profile_identifier: Optional[str] = "UNSET" + ocrd_base_version_checksum: Optional[str] = "UNSET" + mets_basename: Optional[str] = "UNSET" + bag_info_adds: Optional[dict] = {} deleted: bool = False datetime: Optional[datetime] - details: Optional[str] + details: Optional[str] = "Workspace" class Settings: name = "workspaces" diff --git a/src/utils/operandi_utils/hpc/batch_scripts/batch_submit_workflow_job.sh b/src/utils/operandi_utils/hpc/batch_scripts/batch_submit_workflow_job.sh index f4625c0b..13b2ee24 100755 --- a/src/utils/operandi_utils/hpc/batch_scripts/batch_submit_workflow_job.sh +++ b/src/utils/operandi_utils/hpc/batch_scripts/batch_submit_workflow_job.sh @@ -172,14 +172,17 @@ transfer_to_node_storage_processor_images(){ if [ ! -f "$ocrd_image_path" ]; then echo "Expected ocrd processor image not found at: $ocrd_image_path" exit 1 - else - echo "Transferring ocrd processor image to the compute node: ${ocrd_image}" - cp "${ocrd_image_path}" "${node_ocrd_image_path}" - echo "Ocrd processor image was transferred to: ${node_ocrd_image_path}" - if [ ! -f "${node_ocrd_image_path}" ]; then - echo "Expected ocrd processor image was copied but not found locally at: ${node_ocrd_image_path}" - exit 1 - fi + fi + if [ -f "$node_ocrd_image_path" ]; then + echo "Skipping ${ocrd_image_path} since the same image was already copied in a previous step" + continue + fi + echo "Transferring ocrd processor image to the compute node: ${ocrd_image}" + cp "${ocrd_image_path}" "${node_ocrd_image_path}" + echo "Ocrd processor image was transferred to: ${node_ocrd_image_path}" + if [ ! -f "${node_ocrd_image_path}" ]; then + echo "Expected ocrd processor image was copied but not found locally at: ${node_ocrd_image_path}" + exit 1 fi done echo "" diff --git a/src/utils/operandi_utils/hpc/nhr_executor_utils.py b/src/utils/operandi_utils/hpc/nhr_executor_utils.py index c7726514..5a31a778 100644 --- a/src/utils/operandi_utils/hpc/nhr_executor_utils.py +++ b/src/utils/operandi_utils/hpc/nhr_executor_utils.py @@ -6,10 +6,15 @@ def parse_slurm_job_state_from_output(output: List[str]) -> Tuple[StateJobSlurm, if not output: return StateJobSlurm.UNSET, "No output available, something is odd." if len(output) < 3: - return StateJobSlurm.UNSET, "The output has less than 3 lines, job not listed yet." - parsed_state: str = output[-2].split()[1] + return StateJobSlurm.UNSET, f"Less than 3 lines in the output: {output}" + try: + parsed_state: str = output[-2].split()[1] + if parsed_state.startswith("-"): + parsed_state: str = output[-1].split()[1] + except IndexError: + return StateJobSlurm.UNSET, f"Parsing error from output: {output}" try: state_job_slurm = StateJobSlurm(parsed_state) except ValueError: - return StateJobSlurm.UNSET, f"Unknown parsed state: {parsed_state}" + return StateJobSlurm.UNSET, f"Unknown parsed state: {parsed_state} from output: {output}" return state_job_slurm, "Parsed state recognized" diff --git a/src/utils/operandi_utils/oton/constants.py b/src/utils/operandi_utils/oton/constants.py index 7cbaea71..8c1788c8 100644 --- a/src/utils/operandi_utils/oton/constants.py +++ b/src/utils/operandi_utils/oton/constants.py @@ -1,6 +1,6 @@ from json import load from os import environ -from pkg_resources import resource_filename +from importlib import resources BS: str = '{}' SPACES = ' ' @@ -11,8 +11,8 @@ CONST_METS_SOCKET_PATH: str = 'mets_socket_path' CONST_WORKSPACE_DIR: str = 'workspace_dir' -OCRD_ALL_JSON_FILE = resource_filename(__name__, 'ocrd-all-tool.json') -with open(OCRD_ALL_JSON_FILE) as f: +ocrd_all_file = resources.files("operandi_utils.oton") / "ocrd-all-tool.json" +with ocrd_all_file.open("r", encoding="utf-8") as f: OCRD_ALL_JSON = load(f) OTON_LOG_LEVEL = environ.get("OTON_LOG_LEVEL", "INFO") diff --git a/src/utils/pyproject.toml b/src/utils/pyproject.toml new file mode 100644 index 00000000..0cf7f321 --- /dev/null +++ b/src/utils/pyproject.toml @@ -0,0 +1,47 @@ +[build-system] +requires = ["setuptools>=69.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "operandi-utils" +version = "2.24.0" +authors = [ { name = "Mehmed Mustafa", email = "mehmed.mustafa@gwdg.de" } ] +license = { text = "Apache License 2.0" } +description = "Shared utilities for Operandi" +requires-python = ">=3.10" +dependencies = [ + "aiofiles>=0.8.0", + "beanie>=2.0.1", + "chardet>=5.1.0", + "click>=8.3.1", + "clint>=0.5.1", + "loguru>=0.6.0", + "httpx>=0.28.1", + "ocrd>=3.11.0", + "paramiko>=4.0.0", + "pika>=1.3.2", + "pydantic>=2.12.5", + "pymongo >= 4.16.0", + "python-dotenv>=1.0.0", + "python-multipart>=0.0.22", + "requests>=2.32.5" +] + +[project.readme] +file = "README.md" +content-type = "text/markdown" + +[project.scripts] +oton-converter = "operandi_utils.oton:cli" + +[tool.setuptools] +packages = { find = {} } +include-package-data = true + +[tool.setuptools.package-data] +operandi_utils = [ + "hpc/batch_scripts/*.sh", + "hpc/nextflow_workflows/*.nf", + "hpc/ocrd_process_workflows/*.txt", + "oton/ocrd-all-tool.json" +] diff --git a/src/utils/requirements.txt b/src/utils/requirements.txt deleted file mode 100644 index 3b0fb894..00000000 --- a/src/utils/requirements.txt +++ /dev/null @@ -1,14 +0,0 @@ -aiofiles>=0.8.0 -beanie==1.11.7 -chardet>=5.1.0 -click>=7 -clint==0.5.1 -loguru>=0.6.0 -httpx>=0.24.0 -ocrd>=3.0.4 -paramiko>=3.4.0 -pika>=1.2.0 -pydantic>=1.9.1 -pymongo >= 4.3.3 -python-dotenv>=1.0.0 -python-multipart>=0.0.5 diff --git a/src/utils/setup.py b/src/utils/setup.py deleted file mode 100644 index ed528dec..00000000 --- a/src/utils/setup.py +++ /dev/null @@ -1,32 +0,0 @@ -# -*- coding: utf-8 -*- -from setuptools import setup - -install_requires = open('requirements.txt').read().split('\n') - -setup( - name='operandi_utils', - version='2.23.0', - description='OPERANDI - Utils', - long_description=open('README.md').read(), - long_description_content_type='text/markdown', - author='Mehmed Mustafa', - author_email='mehmed.mustafa@gwdg.de', - url='https://github.com/subugoe/operandi', - license='Apache License 2.0', - packages=[ - 'operandi_utils', - 'operandi_utils.database', - 'operandi_utils.hpc', - 'operandi_utils.oton', - 'operandi_utils.rabbitmq' - ], - package_data={ - '': ['batch_scripts/*.sh', 'nextflow_workflows/*.nf', 'ocrd_process_workflows/*.txt', 'ocrd-all-tool.json'] - }, - install_requires=install_requires, - entry_points={ - 'console_scripts': [ - 'oton-converter=operandi_utils.oton:cli', - ] - } -) diff --git a/tests/fixtures/hpc_nhr.py b/tests/fixtures/hpc_nhr.py index 3ac676fb..db7e196a 100644 --- a/tests/fixtures/hpc_nhr.py +++ b/tests/fixtures/hpc_nhr.py @@ -1,13 +1,19 @@ +from logging import StreamHandler, DEBUG from pytest import fixture +from sys import stdout from operandi_utils.hpc import NHRExecutor, NHRTransfer @fixture(scope="package", name="hpc_nhr_data_transfer") def fixture_hpc_nhr_transfer_connector(): hpc_transfer_connector = NHRTransfer() + hpc_transfer_connector.logger.addHandler(StreamHandler(stdout)) + hpc_transfer_connector.logger.setLevel(DEBUG) yield hpc_transfer_connector @fixture(scope="package", name="hpc_nhr_command_executor") def fixture_hpc_nhr_execution_connector(): hpc_paramiko_connector = NHRExecutor() + hpc_paramiko_connector.logger.addHandler(StreamHandler(stdout)) + hpc_paramiko_connector.logger.setLevel(DEBUG) yield hpc_paramiko_connector diff --git a/tests/fixtures/server.py b/tests/fixtures/server.py index c9ce9dcc..e0301c5f 100644 --- a/tests/fixtures/server.py +++ b/tests/fixtures/server.py @@ -1,7 +1,6 @@ +from fastapi.testclient import TestClient from os import environ from pytest import fixture -from fastapi.testclient import TestClient - from operandi_server import OperandiServer from tests.helpers_asserts import assert_availability_db diff --git a/tests/helpers_asserts.py b/tests/helpers_asserts.py index 4edfec2e..e6434cbb 100644 --- a/tests/helpers_asserts.py +++ b/tests/helpers_asserts.py @@ -1,18 +1,18 @@ from os.path import exists, isdir, isfile -from requests import get -from time import sleep - - -def assert_availability_db(url, tries: int = 6, wait_time: int = 10): - http_url = url.replace("mongodb", "http") - response = None - while tries > 0: - response = get(http_url) - if response.status_code == 200: - break - sleep(wait_time) - tries -= 1 - assert response.status_code == 200, f"DB not running on: {url}" +from pymongo import MongoClient +from pymongo.errors import ServerSelectionTimeoutError + + + +def assert_availability_db(db_url: str, timeout_ms: int = 5000) -> bool: + client = MongoClient(db_url, serverSelectionTimeoutMS=timeout_ms) + try: + client.admin.command("ping") + return True + except ServerSelectionTimeoutError: + return False + finally: + client.close() def assert_exists_db_resource(db_resource, resource_key, resource_id): diff --git a/tests/integration_tests/test_full_cycle.py b/tests/integration_tests/test_full_cycle.py index 36a833e2..fb635b85 100644 --- a/tests/integration_tests/test_full_cycle.py +++ b/tests/integration_tests/test_full_cycle.py @@ -12,7 +12,7 @@ OPERANDI_SERVER_BASE_DIR = environ.get("OPERANDI_SERVER_BASE_DIR") def check_job_till_finish(auth_harvester, operandi, workflow_job_id: str): - tries = 60 + tries = 120 job_status = None check_job_status_url = f"/workflow-job/{workflow_job_id}" while tries > 0: @@ -21,23 +21,25 @@ def check_job_till_finish(auth_harvester, operandi, workflow_job_id: str): response = operandi.get(url=check_job_status_url, auth=auth_harvester) assert_response_status_code(response.status_code, expected_floor=2) job_status = response.json()["job_state"] - if job_status == StateJob.SUCCESS: + if job_status in [StateJob.HPC_SUCCESS, StateJob.HPC_FAILED, StateJob.TRANSFERRING_FROM_HPC]: break + assert job_status is not None + return job_status - # TODO: Fix may be needed here - # When failed loop 5 more times. - # Sometimes the FAILED changes to SUCCESS - if job_status == StateJob.FAILED and tries > 5: - tries = 5 +def check_job_status_after_data_download(auth_harvester, operandi, workflow_job_id: str): + check_job_status_url = f"/workflow-job/{workflow_job_id}" + response = operandi.get(url=check_job_status_url, auth=auth_harvester) + assert_response_status_code(response.status_code, expected_floor=2) + job_status = response.json()["job_state"] assert job_status == StateJob.SUCCESS - + return job_status def download_workflow_job_logs(auth_harvester, operandi, workflow_job_id: str): - tries = 60 + tries = 120 get_log_zip_url = f"/workflow-job/{workflow_job_id}/logs" while tries > 0: tries -= 1 - sleep(30) + sleep(60) response = operandi.get(url=get_log_zip_url, auth=auth_harvester) if response.status_code != 200: continue @@ -59,8 +61,6 @@ def test_full_cycle(auth_harvester, operandi, service_broker, bytes_small_worksp service_broker.create_worker_process(RABBITMQ_QUEUE_HARVESTER, "submit_worker") # Create a background worker for the job statuses queue service_broker.create_worker_process(RABBITMQ_QUEUE_JOB_STATUSES, "status_worker") - # Create a background worker for the hpc download queue - service_broker.create_worker_process(RABBITMQ_QUEUE_HPC_DOWNLOADS, "download_worker") # Post a workspace zip response = operandi.post(url="/workspace", files={"workspace": bytes_small_workspace}, auth=auth_harvester) @@ -96,6 +96,10 @@ def test_full_cycle(auth_harvester, operandi, service_broker, bytes_small_worksp workflow_job_id = response.json()["resource_id"] check_job_till_finish(auth_harvester, operandi, workflow_job_id) + + # Create a background worker for the hpc download queue + service_broker.create_worker_process(RABBITMQ_QUEUE_HPC_DOWNLOADS, "download_worker") + download_workflow_job_logs(auth_harvester, operandi, workflow_job_id) ws_dir = Path(OPERANDI_SERVER_BASE_DIR, SERVER_WORKSPACES_ROUTER, workspace_id) @@ -112,3 +116,5 @@ def test_full_cycle(auth_harvester, operandi, service_broker, bytes_small_worksp assert Path(wf_job_dir, "work").exists assert Path(wf_job_dir, workspace_id, input_file_grp).exists() assert Path(wf_job_dir, workspace_id, "OCR-D-OCR").exists() + + check_job_status_after_data_download(auth_harvester, operandi, workflow_job_id) diff --git a/tests/requirements.txt b/tests/requirements.txt index e36d9b36..ef361948 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,10 +1,11 @@ -autopep8 -click +autopep8 >= 2.3.2 +click >= 8.3.1 generateDS == 2.35.20 -pylint -pymongo >= 4.3.3 -pytest >= 7.0.0 -pytest-docker>=1.0.0 -requests>2.32.0 -twine +httpx >= 0.28.1 +pylint >= 4.0.4 +pymongo >= 4.16.0 +pytest >= 9.0.0 +pytest-docker >= 3.2.5 +requests >= 2.32.5 +twine >= 6.2.0 wheel diff --git a/tests/tests_server/test_endpoint_workflow.py b/tests/tests_server/test_endpoint_workflow.py index 2fc107c8..5d71b1d0 100644 --- a/tests/tests_server/test_endpoint_workflow.py +++ b/tests/tests_server/test_endpoint_workflow.py @@ -3,6 +3,7 @@ from tests.constants import WORKFLOW_DUMMY_TEXT from .helpers_asserts import assert_local_dir_workflow, assert_response_status_code + def test_post_workflow_script(operandi, auth, db_workflows, bytes_template_workflow): # Post a new workflow script wf_detail = "Test template workflow with mets server" @@ -16,6 +17,7 @@ def test_post_workflow_script(operandi, auth, db_workflows, bytes_template_workf assert db_workflow["details"] == wf_detail assert db_workflow["uses_mets_server"] == False + def test_post_workflow_script_with_ms(operandi, auth, db_workflows, bytes_template_workflow_with_ms): # Post a new workflow script wf_detail = "Test template workflow with mets server" @@ -79,6 +81,7 @@ def test_put_workflow_script( assert workflow_details1 != workflow_details2, \ f"Workflow details should not, but match: {workflow_details1} == {workflow_details2}" + def test_put_workflow_not_allowed(operandi, auth, bytes_template_workflow_with_ms): production_workflow_ids = [ "template_workflow", "default_workflow", "odem_workflow", @@ -92,13 +95,15 @@ def test_put_workflow_not_allowed(operandi, auth, bytes_template_workflow_with_m assert_response_status_code(response.status_code, expected_floor=4) -# Not implemented/planned in the WebAPI -def _test_delete_workflow(): +# Not implemented/planned in the WebAP + +def test_delete_workflow(): pass -# Not implemented/planned in the WebAPI -def _test_delete_workflow_non_existing(): +# Not implemented/planned in the WebAP + +def test_delete_workflow_non_existing(): pass @@ -122,23 +127,19 @@ def test_get_workflow_non_existing(operandi, auth): assert_response_status_code(response.status_code, expected_floor=4) -# This is already implemented as a part of the harvester full cycle test +# This is already implemented as a part of the harvester full cycle tes + def _test_run_operandi_workflow(): pass -# This is already implemented as a part of the harvester full cycle test +# This is already implemented as a part of the harvester full cycle tes + def _test_running_workflow_job_status(): pass -# Added by Faizan def test_convert_txt_to_nextflow_success(operandi, auth): - """ - Test the successful conversion of a text file to a Nextflow (.nf) file. - """ - - # Convert the dummy text to bytes and create an in-memory file-like object dummy_file = BytesIO(WORKFLOW_DUMMY_TEXT.encode('utf-8')) files = {"txt_file": ("dummy.txt", dummy_file, "text/plain")} params = {"environment": "local", "with_mets_server": False} @@ -155,11 +156,6 @@ def test_convert_txt_to_nextflow_success(operandi, auth): def test_convert_txt_to_nextflow_success_with_mets_server(operandi, auth): - """ - Test the successful conversion of a text file to a Nextflow (.nf) file with mets server. - """ - - # Convert the dummy text to bytes and create an in-memory file-like object dummy_file = BytesIO(WORKFLOW_DUMMY_TEXT.encode('utf-8')) files = {"txt_file": ("dummy.txt", dummy_file, "text/plain")} params = {"environment": "local", "with_mets_server": True} @@ -174,11 +170,8 @@ def test_convert_txt_to_nextflow_success_with_mets_server(operandi, auth): assert "params.mets_socket_path" in nf_file_content assert "merging_mets" not in nf_file_content -# Added by Faizan + def test_convert_txt_to_nextflow_auth_failure(operandi): - """ - Test the conversion process when authentication fails. - """ dummy_text = "Some dummy text" dummy_file = BytesIO(dummy_text.encode('utf-8')) files = {"txt_file": ("dummy.txt", dummy_file, "text/plain")} @@ -191,12 +184,7 @@ def test_convert_txt_to_nextflow_auth_failure(operandi): assert response.json()["detail"] == "Not found user account for email: invalid_user" -# Added by Faizan def test_convert_txt_to_nextflow_validator_failure(operandi, auth): - """ - Test the conversion process when there's a validation or conversion failure. - """ - # Providing an invalid text input to trigger the ValueError in the conversion invalid_text = "Invalid ocrd process text" dummy_file = BytesIO(invalid_text.encode('utf-8')) files = {"txt_file": ("invalid.txt", dummy_file, "text/plain")} @@ -207,13 +195,7 @@ def test_convert_txt_to_nextflow_validator_failure(operandi, auth): assert "Failed to validate the ocrd process workflow txt file" in response.json()["detail"] -# Added by Faizan def test_convert_txt_to_nextflow_docker_success(operandi, auth): - """ - Test the successful conversion of a text file to a Nextflow (.nf) file. - """ - - # Convert the dummy text to bytes and create an in-memory file-like object dummy_file = BytesIO(WORKFLOW_DUMMY_TEXT.encode('utf-8')) files = {"txt_file": ("dummy.txt", dummy_file, "text/plain")} params = {"environment": "docker", "with_mets_server": False} @@ -228,11 +210,6 @@ def test_convert_txt_to_nextflow_docker_success(operandi, auth): def test_convert_txt_to_nextflow_docker_success_with_mets_server(operandi, auth): - """ - Test the successful conversion of a text file to a Nextflow (.nf) file with mets server. - """ - - # Convert the dummy text to bytes and create an in-memory file-like object dummy_file = BytesIO(WORKFLOW_DUMMY_TEXT.encode('utf-8')) files = {"txt_file": ("dummy.txt", dummy_file, "text/plain")} params = {"environment": "docker", "with_mets_server": True} diff --git a/tests/tests_utils/test_3_hpc/test_0_nhr_executor_utils.py b/tests/tests_utils/test_3_hpc/test_0_nhr_executor_utils.py index 13d60cc7..565a9676 100644 --- a/tests/tests_utils/test_3_hpc/test_0_nhr_executor_utils.py +++ b/tests/tests_utils/test_3_hpc/test_0_nhr_executor_utils.py @@ -20,7 +20,7 @@ def test_parse_slurm_job_state_from_output_less_lines(): "'-------------------- -------------------- -------- \n'" ] slurm_job_state, msg = parse_slurm_job_state_from_output(test_output_out_of_memory) - assert msg == "The output has less than 3 lines, job not listed yet." + assert msg == f"Less than 3 lines in the output: {test_output_out_of_memory}" assert slurm_job_state == StateJobSlurm.UNSET @@ -70,5 +70,5 @@ def test_parse_slurm_job_state_from_output_invalid_state(): "'6313216.extern COMPLETED 0:0 \n'" ] slurm_job_state, msg = parse_slurm_job_state_from_output(test_output_out_of_memory) - assert msg == f"Unknown parsed state: OUT_OF_ME+" + assert msg == f"Unknown parsed state: OUT_OF_ME+ from output: {test_output_out_of_memory}" assert slurm_job_state == StateJobSlurm.UNSET