diff --git a/.gitignore b/.gitignore index 600d2d3..a5c8936 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -.vscode \ No newline at end of file +.vscode +.env \ No newline at end of file diff --git a/backend/Dockerfile b/backend/Dockerfile index 78e845b..3f8766d 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -19,4 +19,4 @@ ENV PYTHONUNBUFFERED=1 WORKDIR /app EXPOSE 8000 COPY . /app/ -CMD ["gunicorn", "-b", "0.0.0.0:8000", "-w", "4", "--log-level", "debug", "--access-logfile", "-", "minicommerce.wsgi"] \ No newline at end of file +CMD ["gunicorn", "minicommerce.wsgi:application", "-c", "gunicorn.conf.py"] \ No newline at end of file diff --git a/backend/env.template b/backend/env.template index f232d50..6d35bf8 100644 --- a/backend/env.template +++ b/backend/env.template @@ -1,7 +1,9 @@ SECRET_KEY="your app secret key" DEBUG=False +SECRET_KEY=testsecurekey +APP_ENV=production ALLOWED_HOSTS="host1,host2,host3" # only the host -LOGIN_REDIRECT_URL="frontend url to redirect after login with openid" +LOGIN_REDIRECT_URL=/ AT_USERNAME="your africas talking app username" AT_APIKEY = "your africas talking app api key" # OIDC provider details for google auth (use any proider) @@ -18,3 +20,7 @@ POSTGRES_DB="my db" POSTGRES_USER="database user" POSTGRES_PASSWORD="database password" POSTGRES_HOST="database server url" +OTEL_EXPORTER_OTLP_ENDPOINT=http://172.17.0.3:4317 +OTEL_EXPORTER_OTLP_PROTOCOL=grpc +BACKEND_IMAGE_TAG=otel +FRONTEND_IMAGE_TAG=a046499 \ No newline at end of file diff --git a/backend/gunicorn.conf.py b/backend/gunicorn.conf.py new file mode 100644 index 0000000..619ed14 --- /dev/null +++ b/backend/gunicorn.conf.py @@ -0,0 +1,28 @@ +import multiprocessing +import os + +# ── Tell the app it is managed by Gunicorn ─────────────────────────────────── +# This must be set at module level (before workers are forked) so that +# AppConfig.ready() can skip its own tracing init and defer to post_fork. +os.environ["GUNICORN_MANAGED"] = "true" + +# ── gunicorn ──────────────────────────────────────────────────────────── +bind = os.environ.get("GUNICORN_BIND", "0.0.0.0:8000") +workers = int(os.environ.get("GUNICORN_WORKERS", multiprocessing.cpu_count() * 2 + 1)) +worker_class = "sync" +timeout = int(os.environ.get("GUNICORN_TIMEOUT", 30)) + +# ── Logging ────────────────────────────────────────────────────────────────── +accesslog = "-" # stdout +errorlog = "-" # stdout +loglevel = os.environ.get("GUNICORN_LOG_LEVEL", "info") + +# ── OpenTelemetry ───────────────────────────────────────────────────────────── +# Each Gunicorn worker is a *forked* child process. +# The TracerProvider must be initialised AFTER the fork — never before — +# because forking after SDK init causes broken background threads. +def post_fork(server, worker): + """Called once per worker after fork(). Safe place to init the SDK.""" + from minicommerce.telemetry import configure_tracing + configure_tracing() + server.log.info("OTel tracing initialised in worker pid=%s", worker.pid) \ No newline at end of file diff --git a/backend/manage.py b/backend/manage.py index 246bd37..4f0a8a7 100755 --- a/backend/manage.py +++ b/backend/manage.py @@ -7,6 +7,7 @@ def main(): """Run administrative tasks.""" os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'minicommerce.settings') + try: from django.core.management import execute_from_command_line except ImportError as exc: diff --git a/backend/minicommerce/apps.py b/backend/minicommerce/apps.py new file mode 100644 index 0000000..63c718f --- /dev/null +++ b/backend/minicommerce/apps.py @@ -0,0 +1,18 @@ +import os +from django.apps import AppConfig + + +class CoreConfig(AppConfig): + name = "minicommerce" + default_auto_field = "django.db.models.BigAutoField" + + def ready(self) -> None: + from opentelemetry.instrumentation.django import DjangoInstrumentor + DjangoInstrumentor().instrument() + + # Gunicorn initialises tracing per-worker in post_fork (fork-safe). + # Every other runner (manage.py runserver, pytest, celery, etc.) + # gets it here instead. + if not os.environ.get("GUNICORN_MANAGED"): + from minicommerce.telemetry import configure_tracing + configure_tracing() \ No newline at end of file diff --git a/backend/minicommerce/settings.py b/backend/minicommerce/settings.py index bca3388..b42900b 100644 --- a/backend/minicommerce/settings.py +++ b/backend/minicommerce/settings.py @@ -14,10 +14,16 @@ from os import getenv as environ from dotenv import load_dotenv -load_dotenv() +# Load .env from project BASE_DIR so env vars are available regardless of CWD +# (useful when wrapping with opentelemetry-instrument or running from repo root) +BASE_DIR = Path(__file__).resolve().parent.parent +env_path = BASE_DIR / '.env' +# print(f"Loading environment variables from: {env_path}") +load_dotenv(dotenv_path=env_path) +# load_dotenv(dotenv_path=env_path, override=True) # Build paths inside the project like this: BASE_DIR / 'subdir'. -BASE_DIR = Path(__file__).resolve().parent.parent +# BASE_DIR is already defined above # Quick-start development settings - unsuitable for production @@ -43,9 +49,11 @@ 'orders', 'users', 'corsheaders', + 'minicommerce.apps.CoreConfig', ] MIDDLEWARE = [ + 'django_prometheus.middleware.PrometheusBeforeMiddleware', 'django.middleware.security.SecurityMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', 'corsheaders.middleware.CorsMiddleware', @@ -55,6 +63,7 @@ 'django.contrib.messages.middleware.MessageMiddleware', 'django.middleware.clickjacking.XFrameOptionsMiddleware', 'mozilla_django_oidc.middleware.SessionRefresh', + 'django_prometheus.middleware.PrometheusAfterMiddleware', ] AUTHENTICATION_BACKENDS = ( @@ -88,7 +97,7 @@ DATABASES = { 'default': { - 'ENGINE': 'django.db.backends.postgresql', + 'ENGINE': 'django_prometheus.db.backends.postgresql', 'NAME': environ('POSTGRES_DB', 'mydatabase'), 'USER': environ('POSTGRES_USER', 'myuser'), 'PASSWORD': environ('POSTGRES_PASSWORD', 'mypassword'), @@ -151,6 +160,41 @@ ], } +# Structured logging to stdout so containers and collectors can capture logs +import logging + +LOGGING = { + 'version': 1, + 'disable_existing_loggers': False, + 'formatters': { + 'standard': { + 'format': '%(asctime)s %(levelname)s %(name)s %(message)s' + } + }, + 'handlers': { + 'console': { + 'class': 'logging.StreamHandler', + 'formatter': 'standard', + } + }, + 'root': { + 'handlers': ['console'], + 'level': 'INFO', + }, + 'loggers': { + 'django': { + 'handlers': ['console'], + 'level': 'INFO', + 'propagate': False, + }, + "minicommerce": { + "handlers": ["console"], + "level": "INFO", + "propagate": False, + }, + } +} + OIDC_RP_SIGN_ALGO = 'RS256' OIDC_OP_JWKS_ENDPOINT = environ('OIDC_OP_JWKS_ENDPOINT') diff --git a/backend/minicommerce/telemetry.py b/backend/minicommerce/telemetry.py new file mode 100644 index 0000000..322ae07 --- /dev/null +++ b/backend/minicommerce/telemetry.py @@ -0,0 +1,75 @@ +import logging +import os + +from opentelemetry import trace +from opentelemetry.sdk.resources import Resource, SERVICE_NAME, SERVICE_VERSION +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import ( + BatchSpanProcessor, + ConsoleSpanExporter, + SimpleSpanProcessor, +) + +logger = logging.getLogger(__name__) + + +def _build_resource() -> Resource: + """Builds the resource descriptor attached to every span.""" + return Resource.create( + { + SERVICE_NAME: os.environ.get("OTEL_SERVICE_NAME", "minicommerce_api"), + SERVICE_VERSION: os.environ.get("APP_VERSION", "1.0.0"), + "deployment.environment": os.environ.get("APP_ENV", "development"), + } + ) + + +def configure_tracing() -> None: + """ + Initialise the global TracerProvider. + + Behaviour is driven entirely by environment variables so no code + changes are needed to switch between dev and prod: + + APP_ENV=development → ConsoleSpanExporter (stdout, human-readable) + APP_ENV=production → OTLPSpanExporter (gRPC to collector) + + Call this ONCE per process (Gunicorn worker post_fork hook handles that). + """ + app_env = os.environ.get("APP_ENV", "development") + + provider = TracerProvider(resource=_build_resource()) + + if app_env == "production": + _attach_otlp_exporter(provider) + else: + _attach_console_exporter(provider) + + trace.set_tracer_provider(provider) + + logger.info("OpenTelemetry tracing configured [env=%s]", app_env) + + +def _attach_otlp_exporter(provider: TracerProvider) -> None: + """Sends spans over gRPC to an OTel Collector / Jaeger / Tempo.""" + from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter + + endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT", "http://otel-collector:4317") + + exporter = OTLPSpanExporter( + endpoint=endpoint, + # insecure=True is fine inside a private Docker network; + # set OTEL_EXPORTER_OTLP_INSECURE=false and provide certs for public endpoints. + insecure=os.environ.get("OTEL_EXPORTER_OTLP_INSECURE", "true").lower() == "true", + ) + + # BatchSpanProcessor buffers & sends in background threads — correct for prod. + provider.add_span_processor(BatchSpanProcessor(exporter)) + logger.info("OTLP exporter attached [endpoint=%s]", endpoint) + + +def _attach_console_exporter(provider: TracerProvider) -> None: + """Prints spans to stdout — great for local dev / CI.""" + # SimpleSpanProcessor flushes synchronously on every span — fine for dev. + provider.add_span_processor(SimpleSpanProcessor(ConsoleSpanExporter())) + logger.info("Console span exporter attached") \ No newline at end of file diff --git a/backend/minicommerce/urls.py b/backend/minicommerce/urls.py index 56dccc3..ae1ebd5 100644 --- a/backend/minicommerce/urls.py +++ b/backend/minicommerce/urls.py @@ -1,6 +1,6 @@ from django.contrib import admin from django.urls import path, include -from mozilla_django_oidc.views import OIDCAuthenticationCallbackView, OIDCAuthenticationRequestView +# from mozilla_django_oidc.views import OIDCAuthenticationCallbackView, OIDCAuthenticationRequestView urlpatterns = [ path('admin/', admin.site.urls), @@ -8,4 +8,5 @@ path('user/', include('users.urls')), path('auth/', include('rest_framework.urls', namespace='rest_framework')), path('oidc/', include('mozilla_django_oidc.urls')), + path('', include('django_prometheus.urls')), ] diff --git a/backend/orders/sms.py b/backend/orders/sms.py index ed2bd5d..784fdda 100644 --- a/backend/orders/sms.py +++ b/backend/orders/sms.py @@ -1,6 +1,11 @@ +import logging import africastalking +from opentelemetry import trace +from opentelemetry.trace import Status, StatusCode from minicommerce.settings import AT_USERNAME, AT_APIKEY +logger = logging.getLogger(__name__) + class SMS: def __init__(self): @@ -13,15 +18,27 @@ def __init__(self): # Get the SMS service self.sms = africastalking.SMS + # Tracer for SMS operations + self.tracer = trace.get_tracer(__name__) - def send(self, recipients: list, message: str) -> str: - try: - # send message - response = self.sms.send(message, recipients)['SMSMessageData']['Recipients'][0]['status'] - print(f"Order message sent successful with status: {response}") - # return response - except Exception as e: - print(str(e)) + def send(self, recipients: list, message: str) -> str | None: + # Create a span that represents the outbound SMS operation + with self.tracer.start_as_current_span("sms.send", attributes={ + 'sms.recipients': str(recipients), + 'sms.message_length': len(message), + }) as span: + try: + # send message + response = self.sms.send(message, recipients)['SMSMessageData']['Recipients'][0]['status'] + span.set_attribute('sms.status', str(response)) + logger.info('Order message sent successfully', extra={'status': response, 'recipients': recipients}) + return response + except Exception as e: + # record exception and mark span as error + span.record_exception(e) + span.set_status(Status(StatusCode.ERROR, str(e))) + logger.exception('Failed to send SMS') + return None send_sms = SMS() diff --git a/backend/requirements.txt b/backend/requirements.txt index 0885c5d..393c782 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -13,10 +13,13 @@ cryptography==43.0.1 distlib==0.3.8 Django==5.1.1 django-cors-headers==4.4.0 +django-prometheus==2.5.0 django-rest-framework==0.1.0 djangorestframework==3.15.2 filelock==3.16.1 flake8==7.1.1 +googleapis-common-protos==1.75.0 +grpcio==1.80.0 gunicorn==23.0.0 idna==3.10 josepy==1.14.0 @@ -24,10 +27,25 @@ markdown-it-py==3.0.0 mccabe==0.7.0 mdurl==0.1.2 mozilla-django-oidc==4.0.1 +opentelemetry-api==1.42.1 +opentelemetry-exporter-otlp==1.42.1 +opentelemetry-exporter-otlp-proto-common==1.42.1 +opentelemetry-exporter-otlp-proto-grpc==1.42.1 +opentelemetry-exporter-otlp-proto-http==1.42.1 +opentelemetry-instrumentation==0.63b1 +opentelemetry-instrumentation-django==0.63b1 +opentelemetry-instrumentation-logging==0.63b1 +opentelemetry-instrumentation-wsgi==0.63b1 +opentelemetry-proto==1.42.1 +opentelemetry-sdk==1.42.1 +opentelemetry-semantic-conventions==0.63b1 +opentelemetry-util-http==0.63b1 packaging==24.1 pbr==6.1.0 platformdirs==4.3.6 pluggy==1.5.0 +prometheus_client==0.25.0 +protobuf==6.33.6 psycopg==3.2.3 psycopg-pool==3.2.4 pycodestyle==2.12.1 @@ -45,6 +63,8 @@ sqlparse==0.5.1 stevedore==5.3.0 tox==4.20.0 typing_extensions==4.12.2 +tzdata==2026.2 urllib3==2.2.3 virtualenv==20.26.5 wheel==0.44.0 +wrapt==2.2.1 diff --git a/docker-compose.yaml b/docker-compose.yaml index 128542a..9797935 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,9 +1,38 @@ name: m-commerce services: + + otel-collector: + image: ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.142.0 + container_name: otel-collector + command: ["--config=/etc/otel-collector-config.yaml"] + volumes: + - ./otel/otel-collector-config.yaml:/etc/otel-collector-config.yaml + # ports: + # - 4317:4317 # OTLP gRPC receiver + expose: + - "4317" # OTLP gRPC receiver (from backend) + - "4318" # OTLP HTTP receiver (from frontend) + # - "9411:9411" # Zipkin receiver (optional) + # - "8888:8888" # Prometheus metrics (self-metrics) + environment: + - GOGC=80 + + jaeger: + image: jaegertracing/jaeger:2.12.0 + container_name: jaeger + ports: + - 16686:16686 + expose: + - "4317" + environment: + - COLLECTOR_OTLP_ENABLED=true + depends_on: + - otel-collector + frontend: image: hawkinswinja/m-commerce-frontend:${FRONTEND_IMAGE_TAG} ports: - - 80:80 + - 3000:80 depends_on: - backend volumes: @@ -12,6 +41,7 @@ services: backend: image: hawkinswinja/m-commerce-backend:${BACKEND_IMAGE_TAG} + command: "gunicorn minicommerce.wsgi:application -b 0.0.0.0:8000 --workers 3 --log-level warning --access-logfile - --error-logfile -" expose: - "8000" restart: "unless-stopped" @@ -20,9 +50,11 @@ services: environment: - SECRET_KEY=${SECRET_KEY} - DEBUG=${DEBUG} + - APP_ENV=${APP_ENV} - ALLOWED_HOSTS=${ALLOWED_HOSTS} - AT_USERNAME=${AT_USERNAME} - AT_APIKEY=${AT_APIKEY} + - GUNICORN_WORKERS=2 - OIDC_RP_CLIENT_ID=${OIDC_RP_CLIENT_ID} - OIDC_RP_CLIENT_SECRET=${OIDC_RP_CLIENT_SECRET} - OIDC_OP_AUTHORIZATION_ENDPOINT=${OIDC_OP_AUTHORIZATION_ENDPOINT} @@ -35,14 +67,20 @@ services: - POSTGRES_DB=${POSTGRES_DB} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} - POSTGRES_HOST=postgres + - POSTGRES_PORT=5432 + - OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4317 + - OTEL_EXPORTER_OTLP_PROTOCOL=grpc + - OTEL_SERVICE_NAME=minicommerce_api + - OTEL_TRACES_EXPORTER=otlp + - OTEL_METRICS_EXPORTER=otlp + - OTEL_LOGS_EXPORTER=otlp depends_on: - migrations + - otel-collector migrations: image: hawkinswinja/m-commerce-backend:${BACKEND_IMAGE_TAG} command: "python manage.py migrate" - labels: - - "com.centurylinklabs.watchtower.enable=false" environment: - SECRET_KEY=${SECRET_KEY} - DEBUG=False @@ -61,6 +99,7 @@ services: - POSTGRES_DB=${POSTGRES_DB} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} - POSTGRES_HOST=postgres + - POSTGRES_PORT=5432 restart: "no" depends_on: postgres: @@ -68,8 +107,6 @@ services: postgres: image: postgres:alpine3.19 - labels: - - "com.centurylinklabs.watchtower.enable=false" expose: - 5432 restart: "unless-stopped" @@ -85,18 +122,6 @@ services: volumes: - db:/var/lib/postgresql/data - # watchtower: - # image: containrrr/watchtower - # command: --interval 30 - # labels: - # - "com.centurylinklabs.watchtower.enable=false" - # volumes: - # - /var/run/docker.sock:/var/run/docker.sock - # environment: - # WATCHTOWER_CLEANUP: true - # WATCHTOWER_ROLLING_RESTART: true - # restart: "unless-stopped" - volumes: db: diff --git a/helm/.gitignore b/helm/.gitignore index 37d98b5..0850f60 100644 --- a/helm/.gitignore +++ b/helm/.gitignore @@ -1,2 +1,3 @@ charts *-values.yaml +*.env \ No newline at end of file diff --git a/helm/Chart.yaml b/helm/Chart.yaml index 257d32c..5b3122d 100644 --- a/helm/Chart.yaml +++ b/helm/Chart.yaml @@ -15,13 +15,13 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.0 +version: 0.1.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "1.0.1" +appVersion: "1.0.2" dependencies: - name: postgresql diff --git a/helm/values.yaml b/helm/values.yaml index 06e757c..5b5d49d 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -12,7 +12,7 @@ frontend: # This sets the pull policy for images. pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "7753bb1" + tag: "a046499" service: port: 80 type: ClusterIP @@ -46,7 +46,7 @@ backend: # This sets the pull policy for images. pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "v2" + tag: "4d2dd11" # This is for the secretes for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ imagePullSecrets: [] @@ -132,7 +132,7 @@ serviceAccount: # Specifies whether a service account should be created create: false # Automatically mount a ServiceAccount's API credentials? - automount: true + automount: false # Annotations to add to the service account annotations: {} # The name of the service account to use. diff --git a/otel/otel-collector-config.yaml b/otel/otel-collector-config.yaml new file mode 100644 index 0000000..1280912 --- /dev/null +++ b/otel/otel-collector-config.yaml @@ -0,0 +1,42 @@ +# otel-collector-config.yaml +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + + # prometheus: # collector scrapes your django /metrics endpoint + # config: + # scrape_configs: + # - job_name: django_metrics + # static_configs: + # - targets: ['backend:8000'] # replace with your django service name and port + # metrics_path: /metrics + +exporters: + debug: + verbosity: basic + otlp/jaeger: + endpoint: jaeger:4317 + tls: + insecure: true + +service: + telemetry: + logs: + level: debug + metrics: + level: none # Turns off internal operational metrics completely + + pipelines: + traces: + receivers: [otlp] + exporters: [debug, otlp/jaeger] + logs: + receivers: [otlp] + exporters: [debug] + metrics: + receivers: [otlp] + exporters: [debug] \ No newline at end of file