diff --git a/playbooks/roles/nextcloud/README.md b/playbooks/roles/nextcloud/README.md index cf76bb0..dce5aae 100644 --- a/playbooks/roles/nextcloud/README.md +++ b/playbooks/roles/nextcloud/README.md @@ -19,3 +19,32 @@ For targeted deployment: ```bash ansible-playbook install.yml --tags nextcloud ``` + +## Timeout Configuration for AI/LLM Tasks + +This role configures extended timeouts (3600s / 1 hour) to support long-running AI and LLM tasks: + +### PHP-FPM Configuration +- **request_terminate_timeout**: 3600s (configurable via `nextcloud_php_fpm_request_terminate_timeout`) +- **Slow log**: Enabled at 60s threshold with 20-level stack traces +- **Config file**: `/usr/local/etc/php-fpm.d/zzz-docker.conf` (loads AFTER www.conf) + +### nginx Configuration +- **fastcgi_read_timeout**: 3600s +- **fastcgi_send_timeout**: 3600s + +### Nextcloud Apps +- **Context Chat**: 3600s (configurable via `nextcloud_context_chat_request_timeout`) +- **integration_openai**: 3600s (configurable via `nextcloud_integration_openai_request_timeout`) + +### Rationale +Analysis of task execution history showed: +- 62% of tasks exceed 300s (old timeout) +- 26% of tasks exceed 600s (10 minutes) +- Maximum observed: 5,348s (89 minutes) +- Typical AI/LLM tasks: 10-25 minutes + +These timeouts ensure AI tasks (text generation, summarization, context analysis) can complete without being prematurely terminated. + +### Monitoring +Slow requests (>60s) are logged to `/var/log/php-fpm-slow.log` inside the container for debugging. diff --git a/playbooks/roles/nextcloud/defaults/main.yml b/playbooks/roles/nextcloud/defaults/main.yml index af6c710..9c13d5c 100644 --- a/playbooks/roles/nextcloud/defaults/main.yml +++ b/playbooks/roles/nextcloud/defaults/main.yml @@ -12,7 +12,7 @@ harp_version: "release" redis_image: "redis" redis_version: "8-alpine" nextcloud_image: "nextcloud" -nextcloud_version: "32" +nextcloud_version: "33" # PHP-FPM tuning to reduce request starvation under periodic background load. nextcloud_php_fpm_pm_max_children: 48 @@ -20,7 +20,7 @@ nextcloud_php_fpm_pm_start_servers: 6 nextcloud_php_fpm_pm_min_spare_servers: 4 nextcloud_php_fpm_pm_max_spare_servers: 12 nextcloud_php_fpm_pm_max_requests: 500 -nextcloud_php_fpm_request_terminate_timeout: "300s" +nextcloud_php_fpm_request_terminate_timeout: "3600s" # Preview generation is useful for photo-heavy setups. # This schedule keeps it enabled, but prevents overlap and long runaway runs. @@ -53,9 +53,9 @@ nextcloud_defaultapp_list: nextcloud_integration_openai_url: "http://webui-api-proxy:8080/v1" nextcloud_integration_openai_default_completion_model_id: "ministral-3:3b" -nextcloud_integration_openai_request_timeout: 600 +nextcloud_integration_openai_request_timeout: 3600 nextcloud_integration_openai_max_tokens: 768 -nextcloud_context_chat_request_timeout: 900 +nextcloud_context_chat_request_timeout: 3600 # nomic-embed-text v1.5: 8192-token context, Q4 GGUF (stable, no NaN), works well for French. # bge-m3 F16 was tried but produces NaN values in Ollama for many document types. nextcloud_context_chat_embedding_model: "nomic-embed-text" diff --git a/playbooks/roles/nextcloud/files/nextcloud/supervisord.conf b/playbooks/roles/nextcloud/files/nextcloud/supervisord.conf index b3508c8..2ebb015 100644 --- a/playbooks/roles/nextcloud/files/nextcloud/supervisord.conf +++ b/playbooks/roles/nextcloud/files/nextcloud/supervisord.conf @@ -46,4 +46,18 @@ environment=PORT="7867" stdout_logfile=/dev/stdout stdout_logfile_maxbytes=0 stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 + +[program:taskprocessing_worker] +; Task Processing worker for NC33+ - processes AI/background tasks +; Restarts every 5 minutes (300s) to pick up config changes +command=php occ taskprocessing:worker --timeout=300 +directory=/var/www/html +user=www-data +autorestart=true +startsecs=5 +startretries=3 +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr stderr_logfile_maxbytes=0 \ No newline at end of file diff --git a/playbooks/roles/nextcloud/tasks/main.yml b/playbooks/roles/nextcloud/tasks/main.yml index dada364..70bf691 100644 --- a/playbooks/roles/nextcloud/tasks/main.yml +++ b/playbooks/roles/nextcloud/tasks/main.yml @@ -1,5 +1,40 @@ # code: language=ansible --- +- name: Gather traefik_backend IPv4 network subnet for HaRP trusted proxy + ansible.builtin.shell: | + set -o pipefail + docker network inspect traefik_backend 2>/dev/null | \ + jq -r '.[0].IPAM.Config[] | select(.Subnet | test("^[0-9]+\\.")) | .Subnet' | head -1 + args: + executable: /bin/bash + register: traefik_network_subnet_ipv4 + changed_when: false + failed_when: false + +- name: Gather traefik_backend IPv6 network subnet for HaRP trusted proxy + ansible.builtin.shell: | + set -o pipefail + docker network inspect traefik_backend 2>/dev/null | \ + jq -r '.[0].IPAM.Config[] | select(.Subnet | test(":")) | .Subnet' | head -1 + args: + executable: /bin/bash + register: traefik_network_subnet_ipv6 + changed_when: false + failed_when: false + +- name: Set harp_trusted_proxy_ips from detected subnets or fallback + ansible.builtin.set_fact: + harp_trusted_proxy_ips: >- + {% set ipv4 = traefik_network_subnet_ipv4.stdout | default('') | trim %} + {% set ipv6 = traefik_network_subnet_ipv6.stdout | default('') | trim %} + {% set ipv4_net = ipv4 if ipv4 else '172.18.0.0/16' %} + {% set ipv4_base = ipv4_net | regex_replace('/.*', '') %} + {% set parts = [] %} + {% set _ = parts.append(ipv4_net) %} + {% set _ = parts.append('::ffff:' + ipv4_base + '/112') %} + {% if ipv6 %}{% set _ = parts.append(ipv6) %}{% endif %} + {{ parts | join(',') }} + - name: Verify stacks directory exists ansible.builtin.file: path: "/home/{{ ansible_user }}/home-server/stacks/nc" @@ -128,25 +163,25 @@ mode: u=rwx,g=rx,o=rx - name: Wait for Elasticsearch to be ready - ansible.builtin.uri: - url: "http://localhost:9200/_cluster/health" - user: "elastic" - password: "{{ nc_elastic_secret }}" - method: GET - status_code: 200 + ansible.builtin.command: + cmd: > + docker exec nc-elasticsearch-1 curl -s -u elastic:{{ nc_elastic_secret }} + http://localhost:9200/_cluster/health register: es_health - until: es_health.status == 200 + until: es_health.rc == 0 and (es_health.stdout | from_json).status is defined retries: 30 delay: 2 + changed_when: false failed_when: false - name: Configure Elasticsearch for single-node (0 replicas) - ansible.builtin.shell: - cmd: /home/{{ ansible_user }}/home-server/stacks/nc/elasticsearch_setup.sh - environment: - ELASTIC_HOST: "localhost:9200" - ELASTIC_USER: "elastic" - ELASTIC_PASS: "{{ nc_elastic_secret }}" + ansible.builtin.command: + cmd: >- + docker exec nc-elasticsearch-1 curl -X PUT -s + -u elastic:{{ nc_elastic_secret }} + http://localhost:9200/_index_template/nextcloud_defaults + -H "Content-Type: application/json" + -d '{"index_patterns": ["*"], "priority": 1, "template": {"settings": {"number_of_replicas": 0}}}' register: es_config failed_when: false changed_when: "'acknowledged' in es_config.stdout" @@ -212,6 +247,14 @@ failed_when: false changed_when: add_db_objects.rc == 0 +- name: Disable maintenance mode after upgrade + when: config_php.stat.exists + ansible.builtin.command: + cmd: 'docker exec --user www-data nc-nextcloud-1 php occ maintenance:mode --off' + register: maintenance_off + failed_when: false + changed_when: "'disabled' in maintenance_off.stdout" + - name: Install apps become: true # necessary to read docker_volumes_path content ansible.builtin.command: diff --git a/playbooks/roles/nextcloud/templates/docker-compose.yml b/playbooks/roles/nextcloud/templates/docker-compose.yml index 84e3d38..bc6e91c 100644 --- a/playbooks/roles/nextcloud/templates/docker-compose.yml +++ b/playbooks/roles/nextcloud/templates/docker-compose.yml @@ -163,7 +163,9 @@ services: HP_SHARED_KEY: "{{AppAPISecret}}" NC_INSTANCE_URL: "https://{{ app_domain_name }}" HP_FRP_DISABLE_TLS: "false" - # HP_TRUSTED_PROXY_IPS: "127.0.0.1/32" + # Trust Traefik as reverse proxy - allows forwarded requests from traefik_backend. + # Without this, HaRP rejects requests with "Only requests from AppAPI allowed". + HP_TRUSTED_PROXY_IPS: "{{ harp_trusted_proxy_ips }}" # HAProxy binds IPv4-only by default but the container has an IPv6 address on # traefik_backend. PHP AppAPI (Guzzle) resolves 'harp' to IPv6 and gets # "Connection refused". :::8780 listens on both IPv4 and IPv6. diff --git a/playbooks/roles/nextcloud/templates/nextcloud/Dockerfile b/playbooks/roles/nextcloud/templates/nextcloud/Dockerfile index 01b16e0..8908d9d 100644 --- a/playbooks/roles/nextcloud/templates/nextcloud/Dockerfile +++ b/playbooks/roles/nextcloud/templates/nextcloud/Dockerfile @@ -52,12 +52,17 @@ RUN set -ex; \ rm -rf /var/lib/apt/lists/* # php-fpm configuration tuned for sustained load -RUN echo 'pm.max_children = {{ nextcloud_php_fpm_pm_max_children }}' >> /usr/local/etc/php-fpm.d/zz-docker.conf && \ - echo 'pm.start_servers = {{ nextcloud_php_fpm_pm_start_servers }}' >> /usr/local/etc/php-fpm.d/zz-docker.conf && \ - echo 'pm.min_spare_servers = {{ nextcloud_php_fpm_pm_min_spare_servers }}' >> /usr/local/etc/php-fpm.d/zz-docker.conf && \ - echo 'pm.max_spare_servers = {{ nextcloud_php_fpm_pm_max_spare_servers }}' >> /usr/local/etc/php-fpm.d/zz-docker.conf && \ - echo 'pm.max_requests = {{ nextcloud_php_fpm_pm_max_requests }}' >> /usr/local/etc/php-fpm.d/zz-docker.conf && \ - echo 'request_terminate_timeout = {{ nextcloud_php_fpm_request_terminate_timeout }}' >> /usr/local/etc/php-fpm.d/zz-docker.conf +# IMPORTANT: zzz-docker.conf (with 3 z's) loads AFTER www.conf (alphabetical order) +# This ensures our settings override defaults instead of being overridden. +RUN echo 'pm.max_children = {{ nextcloud_php_fpm_pm_max_children }}' >> /usr/local/etc/php-fpm.d/zzz-docker.conf && \ + echo 'pm.start_servers = {{ nextcloud_php_fpm_pm_start_servers }}' >> /usr/local/etc/php-fpm.d/zzz-docker.conf && \ + echo 'pm.min_spare_servers = {{ nextcloud_php_fpm_pm_min_spare_servers }}' >> /usr/local/etc/php-fpm.d/zzz-docker.conf && \ + echo 'pm.max_spare_servers = {{ nextcloud_php_fpm_pm_max_spare_servers }}' >> /usr/local/etc/php-fpm.d/zzz-docker.conf && \ + echo 'pm.max_requests = {{ nextcloud_php_fpm_pm_max_requests }}' >> /usr/local/etc/php-fpm.d/zzz-docker.conf && \ + echo 'request_terminate_timeout = {{ nextcloud_php_fpm_request_terminate_timeout }}' >> /usr/local/etc/php-fpm.d/zzz-docker.conf && \ + echo 'slowlog = /var/log/php-fpm-slow.log' >> /usr/local/etc/php-fpm.d/zzz-docker.conf && \ + echo 'request_slowlog_timeout = 60s' >> /usr/local/etc/php-fpm.d/zzz-docker.conf && \ + echo 'request_slowlog_trace_depth = 20' >> /usr/local/etc/php-fpm.d/zzz-docker.conf # Keep preview generation enabled, but avoid overlap and cap runtime. RUN echo '{{ nextcloud_preview_pregenerate_cron }}' >> /var/spool/cron/crontabs/www-data diff --git a/playbooks/roles/nextcloud/templates/nginx.conf b/playbooks/roles/nextcloud/templates/nginx.conf index fe9f6b4..f2c84cc 100644 --- a/playbooks/roles/nextcloud/templates/nginx.conf +++ b/playbooks/roles/nextcloud/templates/nginx.conf @@ -166,8 +166,8 @@ http { fastcgi_param modHeadersAvailable true; # Avoid sending the security headers twice fastcgi_param front_controller_active true; # Enable pretty urls fastcgi_pass php-handler; - fastcgi_read_timeout 1800; - fastcgi_send_timeout 1800; + fastcgi_read_timeout 3600; + fastcgi_send_timeout 3600; fastcgi_intercept_errors on; fastcgi_request_buffering off;