diff --git a/.gitignore b/.gitignore index f9a9ea4..58a5bb0 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,11 @@ ext/* *.swp share/webroot/css/epiceditor share/webroot/js/epiceditor.min.js +coverage.out +lib/coverage* +lib_coverage* + +# Test environment and build artifacts +bin/ +test/config/ +test/var/ diff --git a/Dockerfile.test b/Dockerfile.test new file mode 100644 index 0000000..63fa936 --- /dev/null +++ b/Dockerfile.test @@ -0,0 +1,15 @@ +FROM golang:1.21-alpine + +RUN apk add --no-cache postgresql-client openssl git build-base + +WORKDIR /app + +# Copy go.mod and go.sum first to leverage build cache +COPY go.mod go.sum ./ +RUN go mod download + +# Copy the rest of the code +COPY . . + +# Run tests by default +CMD ["/bin/sh", "scripts/run_tests.sh"] diff --git a/cmd/setup/main/main.go b/cmd/setup/main/main.go new file mode 100644 index 0000000..535e3d8 --- /dev/null +++ b/cmd/setup/main/main.go @@ -0,0 +1,25 @@ +package main + +import ( + pf "trident.li/pitchfork/lib" + pf_cmd_setup "trident.li/pitchfork/cmd/setup" +) + +func newctx() pf.PfCtx { + return pf.NewPfCtx(nil, nil, nil, nil, nil) +} + +func main() { + pf_cmd_setup.Setup( + "pitchfork-setup", // tname + "pitchfork", // ldname + "Pitchfork", // appname + "0.0.1", // version + "Trident Project", // copyright + "https://trident.li", // website + 0, // app_schema_version + "", // env_server + "", // server + newctx, // newctx + ) +} diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..39616fb --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,34 @@ +version: '3.8' + +services: + db: + image: postgres:14-alpine + environment: + POSTGRES_PASSWORD: postgres_password + ports: + - "5432:5432" + volumes: + - pgdata:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 5s + timeout: 5s + retries: 5 + + tester: + build: + context: . + dockerfile: Dockerfile.test + depends_on: + db: + condition: service_healthy + environment: + - DB_HOST=db + - DB_ADMIN_PASS=postgres_password + - PITCHFORK_TOOLNAME=pitchfork + - PITCHFORK_CONFROOT=/app/test/config + volumes: + - .:/app + +volumes: + pgdata: diff --git a/docs/core_properties.md b/docs/core_properties.md new file mode 100644 index 0000000..5634385 --- /dev/null +++ b/docs/core_properties.md @@ -0,0 +1,136 @@ +# Pitchfork Architecture and Core Properties + +Pitchfork is a highly consistent, modular, and secure web application and command-line framework written in Go. It acts as the foundation for [Trident](https://trident.li) and other identity, group, and wiki management systems. + +This document describes the core properties, design patterns, and architecture of the Pitchfork framework. + +--- + +## 1. System Architecture and Directory Structure + +The codebase is organized into clear, single-responsibility domains: + +* **`cmd/`**: Application entry points. + * `cmd/server/`: Web server daemon (`server.go`). + * `cmd/cli/`: Command-line client (`cli.go`). + * `cmd/setup/`: System initialization, database schema creation, migrations, and administrative tools (adds first user, resets passwords, sudo testing). + * `cmd/wikiexport/`: Command utility to bulk export wiki structures. +* **`lib/`**: Core business logic, framework, ORM, state handling, and domain modules. + * `app.go` & `cfg.go`: Global state and configuration (JSON-based parsing with comments stripping). + * `ctx.go`: Context definition (`PfCtx`) containing the request session, transactional DB scope, client IP, language translation, and scopes. + * `db.go`: Postgres driver wrapper enforcing auditing, connection pooling, and raw SQL transaction handling. + * `struct.go`: Proprietary reflection-based Object-Relational Mapping (ORM) engine. + * `system.go`: General system configuration (stored in DB) and auditing extraction. + * `user.go`, `group.go`, `ml.go`: Core domain models (users, groups, mailing lists, 2FA, emails, wiki pages). + * `iptrk.go`: Goroutine-driven, database-backed IP rate tracking and lockout engine. +* **`ui/`**: Web front-end. + * `ui.go`: Extended context (`PfUI`) adding HTTP parsing, cookie management, response writing, headers, and page templates rendering. + * `form.go`: Reflection-based HTML forms compiler. + * `root.go`: Main HTTP handler and tree-based path router. +* **`share/`**: Static assets and resources. + * `share/dbschemas/`: Incremental PostgreSQL schema migrations (`DB_0.psql` to `DB_20.psql`). + * `share/languages/`: Localization translations. + * `share/templates/`: HTML templates. + +--- + +## 2. The Unified Execution Engine Design Pattern + +One of Pitchfork's most powerful architectural properties is its **Unified Web & CLI Execution Engine**. + +Instead of duplicating business logic between CLI flags/commands and Web UI routes, Pitchfork routes **all** user actions through a single command-tree system: + +```mermaid +graph TD + CLI[CLI Application] -->|Parses line| CmdParser[lib.Ctx.Cmd] + WebForm[Web UI POST Form] -->|Parses form keys| FormHandler[ui.HandleFormS] + FormHandler -->|Constructs equivalent CLI args| CmdParser + CmdParser -->|Permission check| CheckPerms[lib.Ctx.CheckPerms] + CheckPerms -->|Allowed| Exec[Command Function: lib.PfFunc] +``` + +### Mechanics +1. **Command Definition**: Business actions are defined as `PfMEntry` items, specifying parameter counts, argument types, permission requirements, and the implementation `PfFunc` callback: + ```go + type PfFunc func(ctx PfCtx, args []string) error + ``` +2. **Web Form Binding**: When a web form is submitted via POST, `ui.HandleFormS` automatically: + * Evaluates struct tags to list valid updateable fields. + * Matches keys present in the form submission. + * Synthesizes a space-separated CLI command array (e.g., `[]string{"user", "set", "email", "user@example.com"}`). + * Dispatches the synthesized command directly into `ctx.Cmd()`. +3. **Benefits**: + * **100% Consistency**: The CLI and Web UI are guaranteed to behave identically. + * **Zero Duplication**: Developers write a change or validation step once, and it automatically supports both terminal scripts and web interfaces. + * **Auditability**: Because all writes resolve to commands, the system can audit operations uniformly. + +--- + +## 3. Context & Session State Lifecycle (`PfCtx` & `PfUI`) + +Pitchfork maintains request isolation using an explicit context passing pattern: + +* **`PfCtx` Interface**: Extends across all operations. It carries: + * `abort <-chan bool`: Client cancellation channel. + * `user PfUser`: Currently authenticated caller. + * `db_Tx *Tx`: Transaction boundary for the request. + * `sel_user`, `sel_group`: Target scopes of the command (who or what group is being acted upon). + * `client_ip`: Fully resolved client IP. + * `tfunc`: Current localization translation hook. +* **`PfUI` Interface**: Extends `PfCtx` with Web-only attributes (`http.Request`, `http.ResponseWriter`, cookies, templates metadata). +* **Impersonation (`Become`)**: Administrative commands or test routines can swap the execution identity inside the context using `ctx.Become(targetUser)`. + +--- + +## 4. Strict Database Auditing & Postgres Wrapper + +To meet operational compliance standards, the database wrapper `PfDB` implements **Mandatory Auditing Constraints**: + +1. **Explicit Audit Messages**: All SQL writes (inserts, updates, deletes) executed via `DB.Exec` or `DB.QueryRowA` **must** supply a non-empty `audittxt` string. Running a non-`SELECT` query without an audit message triggers a runtime `panic`. +2. **Transactional Integrity**: If a write is made outside an explicit context transaction, `PfDB` opens a short local transaction, executes the statement, inserts the audit entry into the `audit_history` table, and commits or rolls back atomically. +3. **Audit Ingestion**: The audit log captures the acting user (`member`), target user (`username`), target group (`trustgroup`), exact formatted SQL action text, and the remote client IP. + +--- + +## 5. Reflection-Based ORM and Forms Compilation + +To reduce boilerplate, Pitchfork avoids third-party heavyweight ORM libraries and instead utilizes two custom Go-reflection engines: + +### Dynamic SQL ORM (`lib/struct.go`) +Pitchfork maps Go structs directly to Postgres tables using struct tags: +* `pfcol`: Specifies the database column name. +* `pftable`: Specifies the database table name. +* `coalesce`: Specifies the default value to return for nullable rows. + +`StructFetchFields` reads struct tags to automatically construct SQL `SELECT` projection clauses and yields matching scanner destination pointers. `StructFetchStore` maps completed SQL row scans back into the instantiated struct fields. + +### Reflective Forms Compiler (`ui/form.go`) +Web forms are rendered directly from database-backed Go structs using the custom `pfform` template function: +* Reads field datatypes (e.g., `string`, `bool`, `int`, `struct`). +* Inspects validation constraints such as `min`, `max`, and custom config regex patterns (e.g., `username_regexp`). +* Enforces structural access rules: + * `isvisible`: Points to a struct method that decides if the user has permissions to see the field. + * `readonly`: Compiles inputs into static, un-editable text fields accompanied by hidden inputs to keep POST requests intact. + +--- + +## 6. Security and Robustness Measures + +Pitchfork implements multiple defensive programming guards: + +* **CSRF Protection**: `ui/ui.go` strictly validates a cryptographic hidden value (`CSRF_TOKENNAME` or `X-XSRF-TOKEN` header) for all POST form requests. +* **Proxy-Safe IP Resolution (XFF)**: `ParseClientIP` walks the `X-Forwarded-For` chain from right-to-left to locate the first untrusted IP outside of the configured reverse proxy CIDRs (`xff_trusted_cidr`). This prevents header-injection spoofing of client IP addresses. +* **SysAdmin IP Restrictions (SAR)**: Even if a user's account has the `SysAdmin` flag enabled, administrative actions are rejected unless the resolved client IP falls within the space-separated CIDR block specified in `SARestrict` (excluding localhost/loopback). +* **IP Lockout Engine (`iptrk`)**: Failed logins or invalid CSRF requests increment a database-backed IP tracking count. When this threshold (`IPtrk_Max`) is breached, requests are locked out. Updates are processed asynchronously via a dedicated background Goroutine to prevent thread congestion during flood attacks. +* **Stateless JWT Sessions**: Signed private/public JWT tokens track authenticated sessions. To mitigate token leaks, Pitchfork supports token invalidation via a `jwt_invalid` table. +* **HTML Sanitization**: Utilizes `bluemonday` to cleanse dynamic HTML inputs before they are rendered on Wiki templates. + +--- + +## 7. Multi-Namespace Wiki System + +The built-in Wiki system supports dynamic, multi-tenant namespace routing: +* **Data model**: Wiki paths are mapped in `wiki_namespace` while full histories are kept under `wiki_page_rev`. +* **Mount points (`Wiki_ModOpts`)**: Wikis can be mounted under different paths (e.g., a global wiki at `/system/wiki` or a group-specific wiki at `/group//wiki`). +* **Search & Revisions**: Includes built-in search projecting matching snippets and standard revision list retrieval. +* **Markup**: Uses `blackfriday` to translate markdown to structured, safe HTML bodies alongside automatically synthesized tables of contents (`html_toc`). diff --git a/docs/modernization_plan.md b/docs/modernization_plan.md new file mode 100644 index 0000000..d485c7f --- /dev/null +++ b/docs/modernization_plan.md @@ -0,0 +1,119 @@ +# Pitchfork Codebase Modernization & Security Review Plan + +This document outlines a phased plan to modernize the Pitchfork framework, improve its testability and coverage, and conduct a thorough security review. + +--- + +## Executive Summary + +Pitchfork is a modular and secure web application and CLI framework written in Go. While it has a solid architectural foundation (Unified Execution Engine, strict database auditing, proxy-safe IP resolution), it currently faces several technical debt challenges: +* **Outdated Go Version:** Currently using Go 1.18 (released in early 2022). +* **Outdated Dependencies:** Many third-party libraries are outdated or deprecated. +* **High Barrier to Testing:** Existing tests are integration tests requiring an active PostgreSQL database and specific environment variables, resulting in 0% actual unit test coverage during standard `go test` runs. +* **Custom Components:** Relying on custom reflection-based ORM and Forms compilers which are hard to maintain and audit. + +This plan proposes a **5-Phase approach** to address these issues, targeting modern Go standards, near 100% test coverage for critical components, and a robust security posture. + +--- + +## Phase 1: Assessment & Local Test Environment Setup [COMPLETE] + +**Goal:** Enable reliable, repeatable execution of existing integration tests locally and in CI. + +### Actions: +1. **Create a Containerized Test Environment:** (Completed) + * Develop a `docker-compose.yml` file to spin up a local PostgreSQL database. + * Define a Dockerfile for Pitchfork that can run migrations and execute tests. +2. **Document and Automate Test Execution:** (Completed) + * Create a script (e.g., `scripts/run_tests.sh`) that automatically sets up the database, applies schema migrations (`share/dbschemas/`), sets required environment variables (`PITCHFORK_TOOLNAME`, `PITCHFORK_CONFROOT`), and runs the tests. + * Fix the early-exit behavior in `lib/test/helpers.go` so that tests fail visibly if the environment is misconfigured, rather than silently passing with 0% coverage. +3. **Baseline Coverage Assessment:** (Completed) + * Run the existing tests in the containerized environment to get a true baseline coverage report. + +--- + +## Phase 2: Go & Dependency Modernization + +**Goal:** Upgrade the core Go runtime and modernize third-party dependencies. + +### Actions: +1. **Upgrade Go Version:** + * Upgrade `go.mod` to **Go 1.21** (or **1.22**). + * Update `debian/control` to reflect the newer Go version requirement for packaging. +2. **Update Key Dependencies:** + * `github.com/golang-jwt/jwt`: Upgrade to `github.com/golang-jwt/jwt/v5` (or v4) as v3 is deprecated and has known security issues. + * `github.com/russross/blackfriday`: Upgrade to v2 or migrate to `github.com/yuin/goldmark` (the standard markdown parser in modern Go). + * `github.com/nicksnyder/go-i18n`: Upgrade to v2 for better localization support. + * `github.com/pborman/uuid`: Replace with the more standard `github.com/google/uuid`. +3. **Refactor for Modern Go Features:** + * **Structured Logging (`slog`):** Evaluate replacing custom logging in `lib/db.go` and `lib/misc.go` with the standard library's `charcoal/slog` (available in Go 1.21+) for structured, level-based logging. + * **Generics:** Explore using generics to simplify the custom ORM (`lib/struct.go`) or other utility functions. + +--- + +## Phase 3: Test Suite Refactoring & Decoupling (Unit Testing) + +**Goal:** Decouple business logic from the database to allow fast, hermetic unit tests, aiming for high coverage. + +### Actions: +1. **Introduce Database Mocking:** + * Integrate `github.com/DATA-DOG/go-sqlmock` into the test suite. + * Refactor `PfDB` connection logic to allow injecting a mocked `*sql.DB` connection during tests. +2. **Refactor to Interfaces (Dependency Injection):** + * Define interfaces for core services (e.g., User Service, Group Service, Wiki Service). + * Modify the handlers and commands to accept these interfaces rather than relying strictly on global state. This allows mocking services in tests. +3. **Write Comprehensive Unit Tests:** + * **Priority 1 (Core Logic):** Write unit tests with mocked DB for `lib/user.go`, `lib/group.go`, `lib/jwt.go`, and `lib/iptrk.go`. + * **Priority 2 (Custom Framework):** Write tests for the reflection-based ORM (`lib/struct.go`) and forms compiler (`ui/form.go`) with various edge cases. + * **Priority 3 (UI Handlers):** Use `net/http/httptest` to test HTTP handlers in `ui/root.go` and `ui/form.go` with simulated requests. +4. **Target Coverage:** + * Aim for **>90% coverage** on core business logic (`lib/`) and security-critical components (`lib/jwt.go`, `lib/user_2fa.go`, `lib/iptrk.go`). + +--- + +## Phase 4: CI/CD Pipeline & Code Quality + +**Goal:** Automate testing, linting, and coverage reporting to prevent regression. + +### Actions: +1. **GitHub Actions Integration:** + * Create `.github/workflows/test.yml` to run on every Pull Request and commit to `main`. + * **Job 1 (Linting):** Run `golangci-lint` to enforce style, check for common bugs, and ensure formatting. + * **Job 2 (Unit Tests):** Run unit tests (using mocks) without needing a database. + * **Job 3 (Integration Tests):** Spin up a Postgres service container, run migrations, and execute database-dependent integration tests. +2. **Coverage Reporting:** + * Integrate a coverage reporting tool (e.g., Codecov or GitHub Actions artifacts) to track coverage over time and block PRs that decrease coverage. + +--- + +## Phase 5: Security Review & Hardening + +**Goal:** Conduct a deep, structured security review and establish a vulnerability management process. + +### Threat Modeling & Review Areas: +1. **Custom ORM & SQL Injection:** + * *Risk:* Dynamic SQL generation in `lib/struct.go` and query helper methods (`Q_AddWhere`, `Q_AddArg`) in `lib/db.go`. + * *Action:* Audit how queries are constructed. Ensure all user input is strictly parameterized and no raw string concatenation of user input occurs. +2. **Authentication & Session Management:** + * *Risk:* JWT token leakage, signature bypass, or weak key usage. + * *Action:* Review JWT signing and verification logic in `lib/jwt.go`. Verify token invalidation mechanism (`jwt_invalid` table) works under load. Ensure keys are managed securely and not hardcoded. +3. **Impersonation (`Become`):** + * *Risk:* Unauthorized users triggering the `Become` function to escalate privileges. + * *Action:* Thoroughly audit all usages of `ctx.Become`. Enforce that only validated administrators from trusted IPs (SAR) can trigger it. +4. **Proxy-Safe IP Resolution (XFF):** + * *Risk:* IP spoofing leading to SAR bypass or IP lockout evasion. + * *Action:* Audit `ParseClientIP` in `lib/misc.go` against various proxy configurations (multiple proxies, spoofed headers). +5. **Input Validation & XSS:** + * *Risk:* XSS in Wiki pages or custom forms. + * *Action:* Verify `bluemonday` HTML sanitization is applied consistently to all user-supplied markdown/HTML before rendering. Audit the reflection-based forms validation constraints. +6. **CSRF Protection:** + * *Risk:* CSRF bypass on state-changing POST requests. + * *Action:* Verify CSRF token validation in `ui/ui.go` is robust and cannot be bypassed (e.g., by changing content type or omitting the token). + +### Security Tooling & Finding Management: +* **Static Application Security Testing (SAST):** Integrate `gosec` into the linting phase to automatically scan for common Go security issues. +* **Dependency Scanning:** Integrate `govulncheck` into the CI pipeline to detect known vulnerabilities in dependencies. +* **Finding Management Plan:** + * **Triage:** Classify findings using CVSS (Common Vulnerability Scoring System). + * **Remediation SLA:** Establish timelines for fixing vulnerabilities (e.g., Critical: 7 days, High: 30 days, Medium: 90 days). + * **Regression Testing:** For every security fix, write a dedicated test case to prevent reintroduction. diff --git a/lib/file_test.go b/lib/file_test.go index 61ab6f6..0c7ceae 100644 --- a/lib/file_test.go +++ b/lib/file_test.go @@ -19,11 +19,11 @@ func TestPathOffset(t *testing.T) { obj_path := tsts[i].obj_path query_path := tsts[i].query_path value := tsts[i].value - out := pathOffset(obj_path, query_path) + out := PathOffset(obj_path, query_path) if out == value { - t.Logf("pathOffset(%s,%s) == %d ok", obj_path, query_path, value) + t.Logf("PathOffset(%s,%s) == %d ok", obj_path, query_path, value) } else { - t.Errorf("pathOffset(%s,%s) != %d failed, got %d", obj_path, query_path, value, out) + t.Errorf("PathOffset(%s,%s) != %d failed, got %d", obj_path, query_path, value, out) } } diff --git a/lib/system.go b/lib/system.go index 2dd12d3..fcd7aad 100755 --- a/lib/system.go +++ b/lib/system.go @@ -387,9 +387,9 @@ func System_db_test_setup() (err error) { /* Find the Application specific test data */ fn := System_findfile("dbschemas/", "APP_test_data.psql") - if fn == "" { + if fn != "" { /* Found it, execute it */ - err = DB.executeFile(fn) + err = DB.executeFile("APP_test_data.psql") } return diff --git a/lib/test/helpers.go b/lib/test/helpers.go index d324d50..83c5bcd 100644 --- a/lib/test/helpers.go +++ b/lib/test/helpers.go @@ -22,8 +22,8 @@ func Test_setup() (toolname string) { if toolname == "" || confroot == "" { pf.Errf("Refusing to test: PITCHFORK_TOOLNAME and PITCHFORK_CONFROOT are not configured") - /* Note: we exit(0) here, thus 'go test' won't complain as all looks okay */ - os.Exit(0) + /* Exit with error so that tests fail visibly if misconfigured */ + os.Exit(1) } /* Extra flags */ diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh new file mode 100755 index 0000000..b3ebd6c --- /dev/null +++ b/scripts/run_tests.sh @@ -0,0 +1,43 @@ +#!/bin/sh +set -e + +DB_HOST=${DB_HOST:-127.0.0.1} +DB_ADMIN_PASS=${DB_ADMIN_PASS:-postgres_password} +CONF_DIR=${PITCHFORK_CONFROOT:-$PWD/test/config} + +# Wait for PostgreSQL to be ready if we are in docker (DB_HOST is not localhost) +echo "Waiting for database at $DB_HOST to be ready..." +until pg_isready -h "$DB_HOST" -p 5432 -U postgres; do + sleep 1 +done + +echo "Database is ready!" + +# Ensure directories exist +mkdir -p "$CONF_DIR" +mkdir -p test/var + +# Generate JWT keys if they don't exist +if [ ! -f "$CONF_DIR/jwt.prv" ]; then + echo "Generating JWT keys..." + openssl ecparam -name secp521r1 -genkey -noout -out "$CONF_DIR/jwt.prv" + openssl ec -in "$CONF_DIR/jwt.prv" -pubout -out "$CONF_DIR/jwt.pub" +fi + +# Generate pitchfork.conf from template +echo "Generating pitchfork.conf..." +sed -e "s/DB_HOST_PLACEHOLDER/$DB_HOST/" \ + -e "s/DB_ADMIN_PASS_PLACEHOLDER/$DB_ADMIN_PASS/" \ + test/pitchfork.conf.template > "$CONF_DIR/pitchfork.conf" + +# Build setup tool +echo "Building setup tool..." +go build -o bin/pfsetup cmd/setup/main/main.go + +# Run migrations and setup test DB +echo "Setting up test database..." +PITCHFORK_TOOLNAME=pitchfork PITCHFORK_CONFROOT="$CONF_DIR" ./bin/pfsetup --force-db-destroy setup_test_db + +# Run tests +echo "Running tests..." +PITCHFORK_TOOLNAME=pitchfork PITCHFORK_CONFROOT="$CONF_DIR" go test -v ./... diff --git a/share/dbschemas/test_data.psql b/share/dbschemas/test_data.psql new file mode 100644 index 0000000..9b992d9 --- /dev/null +++ b/share/dbschemas/test_data.psql @@ -0,0 +1,3 @@ +-- Pitchfork Test Data +-- This file is intentionally empty for base framework tests. +-- Applications built on Pitchfork can populate this or APP_test_data.psql. diff --git a/test/pitchfork.conf.template b/test/pitchfork.conf.template new file mode 100644 index 0000000..67e8908 --- /dev/null +++ b/test/pitchfork.conf.template @@ -0,0 +1,16 @@ +{ + "var_root": "test/var", + "file_roots": ["share"], + "db_host": "DB_HOST_PLACEHOLDER", + "db_port": "5432", + "db_name": "pitchfork_test", + "db_user": "pitchfork_test_user", + "db_pass": "password", + "db_ssl_mode": "disable", + "db_admin_db": "postgres", + "db_admin_user": "postgres", + "db_admin_pass": "DB_ADMIN_PASS_PLACEHOLDER", + "smtp_host": "localhost", + "smtp_port": "25", + "smtp_ssl": "ignore" +}