diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..37f67f9 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,80 @@ +name: Release + +on: + push: + tags: + - "v*" + +permissions: + contents: write + +jobs: + release: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - uses: haskell-actions/setup@v2 + id: setup-haskell + with: + ghc-version: "9.10" + cabal-version: "3.14" + + - name: Configure + run: cabal configure --enable-tests --flag dev + + - name: Cache + uses: actions/cache@v4 + env: + cache-name: cache-cabal + with: + path: | + ${{ steps.setup-haskell.outputs.cabal-store }} + dist-newstyle + key: ${{ runner.os }}-release-${{ env.cache-name }}-${{ hashFiles('**/*.cabal') }}-${{ hashFiles('**/cabal.project') }} + restore-keys: | + ${{ runner.os }}-release-${{ env.cache-name }}- + ${{ runner.os }}-release- + ${{ runner.os }}- + + - name: Install dependencies + run: cabal build --only-dependencies all + + - name: Build + run: cabal build all + + - name: Run tests + run: cabal test all + + - name: Generate SHA256 checksums + run: | + mkdir -p dist/release + cp "$(cabal list-bin graphos)" dist/release/graphos-linux-x86_64 + cd dist/release + sha256sum graphos-linux-x86_64 > graphos-linux-x86_64.sha256 + + - name: Extract version from tag + id: version + run: echo "version=${GITHUB_REF#refs/tags/v}" >> "$GITHUB_OUTPUT" + + - name: Create GitHub Release + uses: softprops/action-gh-release@v2 + with: + tag_name: ${{ github.ref_name }} + name: Graphos ${{ github.ref_name }} + body: | + ## Graphos ${{ github.ref_name }} + + **Changes**: See [CHANGELOG.md](https://github.com/${{ github.repository }}/blob/main/CHANGELOG.md) + + ### Assets + | File | Description | + |------|-------------| + | `graphos-linux-x86_64` | Static Linux binary (x86_64) | + | `graphos-linux-x86_64.sha256` | SHA256 checksum | + files: | + dist/release/graphos-linux-x86_64 + dist/release/graphos-linux-x86_64.sha256 + draft: false + prerelease: false \ No newline at end of file diff --git a/.tmp/sessions/2026-04-20-graphos-refactor/context.md b/.tmp/sessions/2026-04-20-graphos-refactor/context.md new file mode 100644 index 0000000..9b4372e --- /dev/null +++ b/.tmp/sessions/2026-04-20-graphos-refactor/context.md @@ -0,0 +1,107 @@ +# Task Context: Graphos Modular Refactoring + +Session ID: 2026-04-20-graphos-refactor +Created: 2026-04-20T00:00:00Z +Status: in_progress + +## Current Request + +Refactor the 6 identified coupling hotspots in the Graphos Haskell codebase. All findings came from Graphos graph analysis (god_nodes, get_neighbors, get_community). The goal is to split oversized modules into focused sub-modules following clean architecture principles while maintaining backward compatibility via re-export hubs. + +## Context Files (Standards to Follow) + +- .opencode/context/core/standards/code-quality.md — Haskell patterns, clean architecture, module structure, anti-patterns +- .opencode/context/core/standards/test-coverage.md — Hspec/QuickCheck testing patterns + +## Reference Files (Source Material) + +These are the 6 modules identified as refactoring targets, ordered by priority: + +### P0: Infrastructure.Export.HTML (degree 436 — #1 hotspot) +- src/Graphos/Infrastructure/Export/HTML.hs — Contains ~200+ inline JS strings, ~100+ inline CSS strings, HTML templates, VisNode/VisEdge JSON types, search UI, community sidebar, all embedded as Haskell string literals +- src/Graphos/Infrastructure/Export/CommunityGraph.hs — Hub that knows all export formats (cross-coupling) + +### P1: Domain.Types (degree 342 — #2 hotspot, community 2621 with 258 members) +- src/Graphos/Domain/Types.hs — God Module containing: Node, Edge, Relation, Confidence, FileType, PipelineConfig, Detection, Analysis, GraphDiff, Hyperedge, LabeledGraph, ToJSON/FromJSON instances, relationToText/textToRelation converters, defaultConfig + +### P1: Infrastructure.LSP.Client (degree 292 — #3 hotspot, community 2040 with 211 members) +- src/Graphos/Infrastructure/LSP/Client.hs — Single module handling: 30+ hardcoded language→server mappings (languageServerCommands), process management (connectToLSP, disconnectLSP), JSON-RPC protocol (readLSPMessage, sendLSPMessage), capability parsing (parseServerCapabilities, lookupBoolCaps), symbol extraction (extractDocumentSymbols, extractCallHierarchy, extractWorkspaceSymbols), data conversion (symbolToNodes, symbolTreeToEdges, workspaceSymbolsToDocumentSymbols) +- src/Graphos/Infrastructure/LSP/Capabilities.hs — Partial capability handling already exists + +### P2: UseCase.Extract (degree 254 — #4 hotspot) +- src/Graphos/UseCase/Extract.hs — Combines: Haskell stub parsing (parseHaskellImports, parseHaskellDecls, extractHaskellStub, isTopLevelDecl), doc extraction (parseHeader, parseTags, parseWikiLinks, extractDocFile), LSP orchestration (doExtractWithSharedLSP, groupByLSPServer), thread pool management (QSemN concurrency) + +### P2: Domain.Graph (degree 208 — #6 hotspot) +- src/Graphos/Domain/Graph.hs — 15+ exported functions: buildGraph, shortestPath, articulationPoints, biconnectedComponents, godNodes, mergeGraphs, graphDiff, subgraph, neighbors, degree, breadthFirstSearch, depthFirstSearch, dominators, edgeBetweenness, isFileNode, isConceptNode + Graph record with 7+ fields + FGL bridge +- src/Graphos/Domain/Graph/FGL.hs — Already a sub-module, extend this pattern + +### P2: Export cross-coupling +- src/Graphos/Infrastructure/Export/CommunityGraph.hs — conceptually_related_to edges to every export format AND FileSystem modules. Mediator anti-pattern. + +## Components + +### P0: Export.HTML Refactor +- Extract JS/CSS/HTML into template files (templates/graph.html, templates/graph.js, templates/graph.css) +- Extract VisNode/VisEdge JSON types into Domain.Export.Visualization (domain-level types, not HTML-specific) +- Split buildHTML into: renderNodesJSON, renderEdgesJSON, renderPageShell, renderSearchUI, renderCommunitySidebar + +### P1: Domain.Types Split +- Domain.Types.Node — Node, NodeId, file types (Code, Doc, Image, Paper, Video) +- Domain.Types.Edge — Edge, EdgeId, Relation, Confidence, relationToText/textToRelation converters +- Domain.Types.Graph — LabeledGraph, GraphDiff, Extraction, Hyperedge types +- Domain.Types.Pipeline — PipelineConfig, defaultConfig, Detection types +- Domain.Types.Analysis — Analysis, GodNode, SurprisingConnection, SuggestedQuestion +- Keep Domain.Types as re-export hub for backward compatibility + +### P1: LSP.Client Split +- Infrastructure.LSP.ServerMap — 30+ languageServerCommands mapping +- Infrastructure.LSP.Transport — JSON-RPC read/write, connectToLSP, disconnectLSP +- Infrastructure.LSP.CapabilityParse — Server capability parsing (merge with existing Capabilities.hs) +- Infrastructure.LSP.Extraction — symbolToNodes, symbolTreeToEdges, workspaceSymbolsToDocumentSymbols conversions +- Keep LSP.Client as thin orchestrator + +### P2: UseCase.Extract Split +- UseCase.Extract.Haskell — parseHaskellImports, parseHaskellDecls, extractHaskellStub, isTopLevelDecl +- UseCase.Extract.Markdown — parseHeader, parseTags, parseWikiLinks, extractDocFile +- UseCase.Extract.LSPOrchestrator — doExtractWithSharedLSP, groupByLSPServer +- Keep UseCase.Extract as composition orchestrator + +### P2: Domain.Graph Split +- Domain.Graph.Core — Graph type, buildGraph, mergeGraphs, mergeExtractions, field accessors +- Domain.Graph.Query — shortestPath, neighbors, degree, BFS, DFS, subgraph +- Domain.Graph.Analysis — articulationPoints, biconnectedComponents, godNodes, edgeBetweenness, dominators +- Domain.Graph.Diff — graphDiff, LabeledGraph +- Keep Domain.Graph as re-export hub +- Domain.Graph.FGL already exists — extend this pattern + +### P2: Export Type Class +- Introduce Domain.Export.Format type class: class ExportFormat a where render :: LabeledGraph -> a +- Reduces CommunityGraph hub coupling + +## Constraints + +- Haskell project: cabal build, nix-shell, Hspec/QuickCheck tests +- Clean architecture: Domain has NO IO, UseCase has NO IO implementation, Infrastructure handles all side effects +- Module naming: Graphos.{Domain|UseCase|Infrastructure}.{SubModule} +- PascalCase types, camelCase functions, explicit exports, type signatures on all top-level definitions +- Backward compatibility: original modules become re-export hubs where possible +- Tests must pass after each subtask (cabal test) +- Each refactoring step must be independently compilable and testable +- Graph has 0 bridge nodes = safe to refactor (no single point of failure) + +## Exit Criteria + +- [ ] All 6 hotspot modules split into focused sub-modules +- [ ] Each new sub-module has explicit exports and < 100 lines ideally +- [ ] Original modules serve as re-export hubs where applicable +- [ ] Clean architecture boundaries respected (no IO in Domain/UseCase) +- [ ] cabal build succeeds after each and all subtasks +- [ ] cabal test passes after each and all subtasks +- [ ] Graph degree for former hotspot modules significantly reduced + +## Progress + +- [ ] Session initialized +- [ ] Tasks created by TaskManager +- [ ] Implementation complete +- [ ] All tests pass \ No newline at end of file diff --git a/.tmp/tasks/graphos-refactor/subtask_01.json b/.tmp/tasks/graphos-refactor/subtask_01.json new file mode 100644 index 0000000..ffd8139 --- /dev/null +++ b/.tmp/tasks/graphos-refactor/subtask_01.json @@ -0,0 +1,26 @@ +{ + "id": "graphos-refactor-01", + "seq": "01", + "title": "Create Domain.Types.Node sub-module", + "status": "pending", + "depends_on": [], + "parallel": true, + "suggested_agent": "coder-agent", + "context_files": [".opencode/context/core/standards/code-quality.md"], + "reference_files": ["src/Graphos/Domain/Types.hs", "Graphos.cabal"], + "acceptance_criteria": [ + "src/Graphos/Domain/Types/Node.hs created with Node, NodeId, FileType (Code/Doc/Image/Paper/Video) and related smart constructors", + "Explicit export list with type signatures on all top-level definitions", + "No IO in this module (pure domain)", + "cabal build succeeds with new module in exposed-modules", + "Domain.Types re-exports all names from Domain.Types.Node" + ], + "deliverables": [ + "src/Graphos/Domain/Types/Node.hs", + "Updated src/Graphos/Domain/Types.hs (re-export + removed definitions)", + "Updated Graphos.cabal" + ], + "started_at": null, + "completed_at": null, + "completion_summary": null +} \ No newline at end of file diff --git a/.tmp/tasks/graphos-refactor/subtask_02.json b/.tmp/tasks/graphos-refactor/subtask_02.json new file mode 100644 index 0000000..e61b039 --- /dev/null +++ b/.tmp/tasks/graphos-refactor/subtask_02.json @@ -0,0 +1,26 @@ +{ + "id": "graphos-refactor-02", + "seq": "02", + "title": "Create Domain.Types.Edge sub-module", + "status": "pending", + "depends_on": [], + "parallel": true, + "suggested_agent": "coder-agent", + "context_files": [".opencode/context/core/standards/code-quality.md"], + "reference_files": ["src/Graphos/Domain/Types.hs", "Graphos.cabal"], + "acceptance_criteria": [ + "src/Graphos/Domain/Types/Edge.hs created with Edge, EdgeId, Relation, Confidence, relationToText, textToRelation", + "Explicit export list with type signatures", + "No IO (pure domain)", + "cabal build succeeds", + "Domain.Types re-exports all names from Domain.Types.Edge" + ], + "deliverables": [ + "src/Graphos/Domain/Types/Edge.hs", + "Updated src/Graphos/Domain/Types.hs (re-export)", + "Updated Graphos.cabal" + ], + "started_at": null, + "completed_at": null, + "completion_summary": null +} \ No newline at end of file diff --git a/.tmp/tasks/graphos-refactor/subtask_03.json b/.tmp/tasks/graphos-refactor/subtask_03.json new file mode 100644 index 0000000..166cc65 --- /dev/null +++ b/.tmp/tasks/graphos-refactor/subtask_03.json @@ -0,0 +1,26 @@ +{ + "id": "graphos-refactor-03", + "seq": "03", + "title": "Create Domain.Types.Graph sub-module", + "status": "pending", + "depends_on": [], + "parallel": true, + "suggested_agent": "coder-agent", + "context_files": [".opencode/context/core/standards/code-quality.md"], + "reference_files": ["src/Graphos/Domain/Types.hs", "Graphos.cabal"], + "acceptance_criteria": [ + "src/Graphos/Domain/Types/Graph.hs created with LabeledGraph, GraphDiff, Extraction, Hyperedge types", + "Explicit export list with type signatures", + "No IO (pure domain)", + "cabal build succeeds", + "Domain.Types re-exports all names from Domain.Types.Graph" + ], + "deliverables": [ + "src/Graphos/Domain/Types/Graph.hs", + "Updated src/Graphos/Domain/Types.hs (re-export)", + "Updated Graphos.cabal" + ], + "started_at": null, + "completed_at": null, + "completion_summary": null +} \ No newline at end of file diff --git a/.tmp/tasks/graphos-refactor/subtask_04.json b/.tmp/tasks/graphos-refactor/subtask_04.json new file mode 100644 index 0000000..ecd43e7 --- /dev/null +++ b/.tmp/tasks/graphos-refactor/subtask_04.json @@ -0,0 +1,28 @@ +{ + "id": "graphos-refactor-04", + "seq": "04", + "title": "Create Domain.Types.Pipeline + Domain.Types.Analysis sub-modules", + "status": "pending", + "depends_on": [], + "parallel": true, + "suggested_agent": "coder-agent", + "context_files": [".opencode/context/core/standards/code-quality.md"], + "reference_files": ["src/Graphos/Domain/Types.hs", "src/Graphos/Domain/Analysis.hs", "Graphos.cabal"], + "acceptance_criteria": [ + "src/Graphos/Domain/Types/Pipeline.hs created with PipelineConfig, defaultConfig, Detection types", + "src/Graphos/Domain/Types/Analysis.hs created with Analysis, GodNode, SurprisingConnection, SuggestedQuestion", + "Explicit export lists with type signatures", + "No IO (pure domain)", + "cabal build succeeds", + "Domain.Types re-exports all names from both" + ], + "deliverables": [ + "src/Graphos/Domain/Types/Pipeline.hs", + "src/Graphos/Domain/Types/Analysis.hs", + "Updated src/Graphos/Domain/Types.hs (re-export hub, all original definitions removed)", + "Updated Graphos.cabal" + ], + "started_at": null, + "completed_at": null, + "completion_summary": null +} \ No newline at end of file diff --git a/.tmp/tasks/graphos-refactor/subtask_05.json b/.tmp/tasks/graphos-refactor/subtask_05.json new file mode 100644 index 0000000..4877c92 --- /dev/null +++ b/.tmp/tasks/graphos-refactor/subtask_05.json @@ -0,0 +1,37 @@ +{ + "id": "graphos-refactor-05", + "seq": "05", + "title": "Update all imports after Domain.Types split + cabal test", + "status": "pending", + "depends_on": ["01", "02", "03", "04"], + "parallel": false, + "suggested_agent": "coder-agent", + "context_files": [".opencode/context/core/standards/code-quality.md"], + "reference_files": [ + "src/Graphos/Domain/Types.hs", + "src/Graphos/UseCase/Extract.hs", + "src/Graphos/UseCase/Build.hs", + "src/Graphos/UseCase/Cluster.hs", + "src/Graphos/UseCase/Infer.hs", + "src/Graphos/UseCase/Analyze.hs", + "src/Graphos/UseCase/Query.hs", + "src/Graphos/UseCase/Export.hs", + "src/Graphos/UseCase/Report.hs", + "src/Graphos/UseCase/Pipeline.hs", + "src/Graphos/Infrastructure/Export/HTML.hs", + "src/Graphos/Infrastructure/LSP/Client.hs" + ], + "acceptance_criteria": [ + "All modules that imported Graphos.Domain.Types still compile (re-export hub ensures compatibility)", + "Modules that only need specific types update to import new sub-modules directly", + "cabal build succeeds", + "cabal test passes" + ], + "deliverables": [ + "Updated import statements across all consuming modules", + "All tests passing" + ], + "started_at": null, + "completed_at": null, + "completion_summary": null +} \ No newline at end of file diff --git a/.tmp/tasks/graphos-refactor/subtask_06.json b/.tmp/tasks/graphos-refactor/subtask_06.json new file mode 100644 index 0000000..caa308d --- /dev/null +++ b/.tmp/tasks/graphos-refactor/subtask_06.json @@ -0,0 +1,27 @@ +{ + "id": "graphos-refactor-06", + "seq": "06", + "title": "Create Domain.Graph.Core sub-module", + "status": "pending", + "depends_on": ["05"], + "parallel": false, + "suggested_agent": "coder-agent", + "context_files": [".opencode/context/core/standards/code-quality.md"], + "reference_files": ["src/Graphos/Domain/Graph.hs", "src/Graphos/Domain/Graph/FGL.hs", "src/Graphos/Domain/Types.hs", "Graphos.cabal"], + "acceptance_criteria": [ + "src/Graphos/Domain/Graph/Core.hs created with: Graph record type, buildGraph, mergeGraphs, mergeExtractions, field accessors (gNodes, gEdges, gAdjFwd, gAdjBack, gDirected)", + "isFileNode, isConceptNode helper predicates", + "Explicit export list, type signatures on all top-level definitions", + "No IO (pure domain)", + "cabal build succeeds", + "Domain.Graph re-exports from Domain.Graph.Core" + ], + "deliverables": [ + "src/Graphos/Domain/Graph/Core.hs", + "Updated src/Graphos/Domain/Graph.hs (re-export)", + "Updated Graphos.cabal" + ], + "started_at": null, + "completed_at": null, + "completion_summary": null +} \ No newline at end of file diff --git a/.tmp/tasks/graphos-refactor/subtask_07.json b/.tmp/tasks/graphos-refactor/subtask_07.json new file mode 100644 index 0000000..46226aa --- /dev/null +++ b/.tmp/tasks/graphos-refactor/subtask_07.json @@ -0,0 +1,29 @@ +{ + "id": "graphos-refactor-07", + "seq": "07", + "title": "Create Domain.Graph.Query + Domain.Graph.Analysis + Domain.Graph.Diff sub-modules", + "status": "pending", + "depends_on": ["06"], + "parallel": false, + "suggested_agent": "coder-agent", + "context_files": [".opencode/context/core/standards/code-quality.md"], + "reference_files": ["src/Graphos/Domain/Graph.hs", "src/Graphos/Domain/Graph/FGL.hs", "Graphos.cabal"], + "acceptance_criteria": [ + "src/Graphos/Domain/Graph/Query.hs created with: shortestPath, neighbors, degree, breadthFirstSearch, depthFirstSearch, subgraph", + "src/Graphos/Domain/Graph/Analysis.hs created with: articulationPoints, biconnectedComponents, godNodes, edgeBetweenness, dominators, components", + "src/Graphos/Domain/Graph/Diff.hs created with: graphDiff, LabeledGraph related functions", + "All explicit exports, type signatures, no IO", + "cabal build succeeds", + "Domain.Graph re-exports all from Core, Query, Analysis, Diff + existing FGL" + ], + "deliverables": [ + "src/Graphos/Domain/Graph/Query.hs", + "src/Graphos/Domain/Graph/Analysis.hs", + "src/Graphos/Domain/Graph/Diff.hs", + "Updated src/Graphos/Domain/Graph.hs (re-export hub)", + "Updated Graphos.cabal" + ], + "started_at": null, + "completed_at": null, + "completion_summary": null +} \ No newline at end of file diff --git a/.tmp/tasks/graphos-refactor/subtask_08.json b/.tmp/tasks/graphos-refactor/subtask_08.json new file mode 100644 index 0000000..7f79953 --- /dev/null +++ b/.tmp/tasks/graphos-refactor/subtask_08.json @@ -0,0 +1,24 @@ +{ + "id": "graphos-refactor-08", + "seq": "08", + "title": "Create Domain.Export.Visualization types", + "status": "pending", + "depends_on": ["05"], + "parallel": true, + "suggested_agent": "coder-agent", + "context_files": [".opencode/context/core/standards/code-quality.md"], + "reference_files": ["src/Graphos/Infrastructure/Export/HTML.hs", "Graphos.cabal"], + "acceptance_criteria": [ + "src/Graphos/Domain/Export/Visualization.hs created with: VisNode, VisEdge types (currently embedded in HTML.hs as domain-level visualization types)", + "ToJSON instances for VisNode/VisEdge", + "Explicit exports, type signatures, no IO (pure domain)", + "cabal build succeeds" + ], + "deliverables": [ + "src/Graphos/Domain/Export/Visualization.hs", + "Updated Graphos.cabal" + ], + "started_at": null, + "completed_at": null, + "completion_summary": null +} \ No newline at end of file diff --git a/.tmp/tasks/graphos-refactor/subtask_09.json b/.tmp/tasks/graphos-refactor/subtask_09.json new file mode 100644 index 0000000..8e37441 --- /dev/null +++ b/.tmp/tasks/graphos-refactor/subtask_09.json @@ -0,0 +1,24 @@ +{ + "id": "graphos-refactor-09", + "seq": "09", + "title": "Create Domain.Export.Format type class", + "status": "pending", + "depends_on": ["05"], + "parallel": true, + "suggested_agent": "coder-agent", + "context_files": [".opencode/context/core/standards/code-quality.md"], + "reference_files": ["src/Graphos/Infrastructure/Export/CommunityGraph.hs", "src/Graphos/UseCase/Export.hs", "Graphos.cabal"], + "acceptance_criteria": [ + "src/Graphos/Domain/Export/Format.hs created with ExportFormat type class: class ExportFormat a where render :: LabeledGraph -> a", + "Reduces CommunityGraph mediator coupling — each format implements the type class independently", + "Explicit exports, type signatures, no IO (pure domain)", + "cabal build succeeds" + ], + "deliverables": [ + "src/Graphos/Domain/Export/Format.hs", + "Updated Graphos.cabal" + ], + "started_at": null, + "completed_at": null, + "completion_summary": null +} \ No newline at end of file diff --git a/.tmp/tasks/graphos-refactor/subtask_10.json b/.tmp/tasks/graphos-refactor/subtask_10.json new file mode 100644 index 0000000..fc9b576 --- /dev/null +++ b/.tmp/tasks/graphos-refactor/subtask_10.json @@ -0,0 +1,32 @@ +{ + "id": "graphos-refactor-10", + "seq": "10", + "title": "Refactor Export.HTML — extract JS/CSS/HTML templates", + "status": "pending", + "depends_on": ["08", "09"], + "parallel": false, + "suggested_agent": "coder-agent", + "context_files": [".opencode/context/core/standards/code-quality.md"], + "reference_files": ["src/Graphos/Infrastructure/Export/HTML.hs", "src/Graphos/Domain/Export/Visualization.hs", "Graphos.cabal"], + "acceptance_criteria": [ + "templates/graph.js created with all inline JavaScript extracted from HTML.hs", + "templates/graph.css created with all inline CSS extracted from HTML.hs", + "templates/graph.html created with HTML shell template (placeholder variables for data injection)", + "HTML.hs loads templates at runtime or embeds them via Template Haskell / cabal data-files", + "VisNode/VisEdge types now imported from Domain.Export.Visualization", + "buildHTML split into smaller functions: renderNodesJSON, renderEdgesJSON, renderPageShell, renderSearchUI, renderCommunitySidebar", + "cabal build succeeds", + "cabal test passes", + "HTML.hs module reduced from 400+ lines to ~100 lines" + ], + "deliverables": [ + "templates/graph.js", + "templates/graph.css", + "templates/graph.html", + "Refactored src/Graphos/Infrastructure/Export/HTML.hs", + "Updated Graphos.cabal" + ], + "started_at": null, + "completed_at": null, + "completion_summary": null +} \ No newline at end of file diff --git a/.tmp/tasks/graphos-refactor/subtask_11.json b/.tmp/tasks/graphos-refactor/subtask_11.json new file mode 100644 index 0000000..a5eca82 --- /dev/null +++ b/.tmp/tasks/graphos-refactor/subtask_11.json @@ -0,0 +1,28 @@ +{ + "id": "graphos-refactor-11", + "seq": "11", + "title": "Split LSP.Client — create ServerMap + Transport sub-modules", + "status": "pending", + "depends_on": ["05"], + "parallel": true, + "suggested_agent": "coder-agent", + "context_files": [".opencode/context/core/standards/code-quality.md"], + "reference_files": ["src/Graphos/Infrastructure/LSP/Client.hs", "src/Graphos/Infrastructure/LSP/Capabilities.hs", "src/Graphos/Infrastructure/LSP/Protocol.hs", "Graphos.cabal"], + "acceptance_criteria": [ + "src/Graphos/Infrastructure/LSP/ServerMap.hs created with: languageServerCommands mapping (30+ language→server pairs), lspServerForFile helper", + "src/Graphos/Infrastructure/LSP/Transport.hs created with: connectToLSP, disconnectLSP, readLSPMessage, sendLSPMessage (JSON-RPC process management)", + "LSP.Client becomes thin orchestrator importing from ServerMap and Transport", + "Explicit exports, type signatures on all top-level definitions", + "cabal build succeeds", + "cabal test passes" + ], + "deliverables": [ + "src/Graphos/Infrastructure/LSP/ServerMap.hs", + "src/Graphos/Infrastructure/LSP/Transport.hs", + "Updated src/Graphos/Infrastructure/LSP/Client.hs (thin orchestrator)", + "Updated Graphos.cabal" + ], + "started_at": null, + "completed_at": null, + "completion_summary": null +} \ No newline at end of file diff --git a/.tmp/tasks/graphos-refactor/subtask_12.json b/.tmp/tasks/graphos-refactor/subtask_12.json new file mode 100644 index 0000000..ad1067c --- /dev/null +++ b/.tmp/tasks/graphos-refactor/subtask_12.json @@ -0,0 +1,28 @@ +{ + "id": "graphos-refactor-12", + "seq": "12", + "title": "Split LSP.Client — create CapabilityParse + Extraction sub-modules", + "status": "pending", + "depends_on": ["11"], + "parallel": false, + "suggested_agent": "coder-agent", + "context_files": [".opencode/context/core/standards/code-quality.md"], + "reference_files": ["src/Graphos/Infrastructure/LSP/Client.hs", "src/Graphos/Infrastructure/LSP/Capabilities.hs", "Graphos.cabal"], + "acceptance_criteria": [ + "src/Graphos/Infrastructure/LSP/CapabilityParse.hs created with: parseServerCapabilities, lookupBoolCaps (merge/enhance existing Capabilities.hs)", + "src/Graphos/Infrastructure/LSP/Extraction.hs created with: extractDocumentSymbols, extractCallHierarchy, extractWorkspaceSymbols, symbolToNodes, symbolTreeToEdges, workspaceSymbolsToDocumentSymbols", + "LSP.Client is now a thin orchestrator (< 50 lines)", + "All explicit exports, type signatures", + "cabal build succeeds", + "cabal test passes" + ], + "deliverables": [ + "src/Graphos/Infrastructure/LSP/CapabilityParse.hs", + "src/Graphos/Infrastructure/LSP/Extraction.hs", + "Updated src/Graphos/Infrastructure/LSP/Client.hs (thin orchestrator)", + "Updated Graphos.cabal" + ], + "started_at": null, + "completed_at": null, + "completion_summary": null +} \ No newline at end of file diff --git a/.tmp/tasks/graphos-refactor/subtask_13.json b/.tmp/tasks/graphos-refactor/subtask_13.json new file mode 100644 index 0000000..cabdff8 --- /dev/null +++ b/.tmp/tasks/graphos-refactor/subtask_13.json @@ -0,0 +1,26 @@ +{ + "id": "graphos-refactor-13", + "seq": "13", + "title": "Split UseCase.Extract — create Haskell + Markdown extractors", + "status": "pending", + "depends_on": ["12"], + "parallel": false, + "suggested_agent": "coder-agent", + "context_files": [".opencode/context/core/standards/code-quality.md"], + "reference_files": ["src/Graphos/UseCase/Extract.hs", "Graphos.cabal"], + "acceptance_criteria": [ + "src/Graphos/UseCase/Extract/Haskell.hs created with: parseHaskellImports, parseHaskellDecls, extractHaskellStub, isTopLevelDecl, extractDeclName, extractImportName, FileGroup type", + "src/Graphos/UseCase/Extract/Markdown.hs created with: parseHeader, parseTags, parseWikiLinks, extractDocFile, mkHeaderNode, mkTagNode", + "Explicit exports, type signatures, no IO implementation in UseCase layer", + "cabal build succeeds", + "cabal test passes" + ], + "deliverables": [ + "src/Graphos/UseCase/Extract/Haskell.hs", + "src/Graphos/UseCase/Extract/Markdown.hs", + "Updated Graphos.cabal" + ], + "started_at": null, + "completed_at": null, + "completion_summary": null +} \ No newline at end of file diff --git a/.tmp/tasks/graphos-refactor/subtask_14.json b/.tmp/tasks/graphos-refactor/subtask_14.json new file mode 100644 index 0000000..0b6e027 --- /dev/null +++ b/.tmp/tasks/graphos-refactor/subtask_14.json @@ -0,0 +1,27 @@ +{ + "id": "graphos-refactor-14", + "seq": "14", + "title": "Split UseCase.Extract — create LSP Orchestrator + thin composition layer", + "status": "pending", + "depends_on": ["13"], + "parallel": false, + "suggested_agent": "coder-agent", + "context_files": [".opencode/context/core/standards/code-quality.md"], + "reference_files": ["src/Graphos/UseCase/Extract.hs", "src/Graphos/Infrastructure/LSP/Extraction.hs", "Graphos.cabal"], + "acceptance_criteria": [ + "src/Graphos/UseCase/Extract/LSPOrchestrator.hs created with: doExtractWithSharedLSP, groupByLSPServer, extractionFromSymbols", + "UseCase.Extract becomes composition orchestrator (< 80 lines): extractAll, extractFromFile compose Haskell + Markdown + LSP extractors", + "Explicit exports, type signatures", + "cabal build succeeds", + "cabal test passes", + "No stub parsing logic remains in Extract.hs (moved to Haskell.hs)" + ], + "deliverables": [ + "src/Graphos/UseCase/Extract/LSPOrchestrator.hs", + "Updated src/Graphos/UseCase/Extract.hs (composition orchestrator)", + "Updated Graphos.cabal" + ], + "started_at": null, + "completed_at": null, + "completion_summary": null +} \ No newline at end of file diff --git a/.tmp/tasks/graphos-refactor/subtask_15.json b/.tmp/tasks/graphos-refactor/subtask_15.json new file mode 100644 index 0000000..5f72cd6 --- /dev/null +++ b/.tmp/tasks/graphos-refactor/subtask_15.json @@ -0,0 +1,37 @@ +{ + "id": "graphos-refactor-15", + "seq": "15", + "title": "Final integration — update CommunityGraph with ExportFormat + full test suite", + "status": "pending", + "depends_on": ["07", "10", "12", "14"], + "parallel": false, + "suggested_agent": "coder-agent", + "context_files": [".opencode/context/core/standards/code-quality.md", ".opencode/context/core/standards/test-coverage.md"], + "reference_files": [ + "src/Graphos/Infrastructure/Export/CommunityGraph.hs", + "src/Graphos/Domain/Export/Format.hs", + "src/Graphos/UseCase/Export.hs", + "src/Graphos/UseCase/Pipeline.hs", + "app/Main.hs", + "Graphos.cabal" + ], + "acceptance_criteria": [ + "CommunityGraph.hs refactored to use ExportFormat type class instead of direct format knowledge", + "Each export format (HTML, JSON, Neo4j, Obsidian, SVG, GraphML) implements ExportFormat", + "UseCase.Export updated to use type class dispatch", + "cabal build succeeds", + "cabal test passes (full suite, zero failures)", + "All re-export hubs verified (Domain.Types, Domain.Graph, LSP.Client, UseCase.Extract still importable)", + "No IO in Domain or UseCase layers", + "New modules added to test suite where appropriate" + ], + "deliverables": [ + "Updated src/Graphos/Infrastructure/Export/CommunityGraph.hs", + "Updated src/Graphos/UseCase/Export.hs", + "Updated Graphos.cabal (final)", + "All tests green" + ], + "started_at": null, + "completed_at": null, + "completion_summary": null +} \ No newline at end of file diff --git a/.tmp/tasks/graphos-refactor/task.json b/.tmp/tasks/graphos-refactor/task.json new file mode 100644 index 0000000..5005843 --- /dev/null +++ b/.tmp/tasks/graphos-refactor/task.json @@ -0,0 +1,35 @@ +{ + "id": "graphos-refactor", + "name": "Graphos Modular Refactoring", + "status": "active", + "objective": "Split 6 coupling hotspot modules identified by graph analysis into focused sub-modules, reducing degree and improving cohesion while maintaining backward compatibility.", + "context_files": [ + ".opencode/context/core/standards/code-quality.md", + ".opencode/context/core/standards/test-coverage.md" + ], + "reference_files": [ + "src/Graphos/Domain/Types.hs", + "src/Graphos/Domain/Graph.hs", + "src/Graphos/Domain/Graph/FGL.hs", + "src/Graphos/Infrastructure/Export/HTML.hs", + "src/Graphos/Infrastructure/Export/CommunityGraph.hs", + "src/Graphos/Infrastructure/LSP/Client.hs", + "src/Graphos/Infrastructure/LSP/Capabilities.hs", + "src/Graphos/Infrastructure/LSP/Protocol.hs", + "src/Graphos/UseCase/Extract.hs", + "Graphos.cabal" + ], + "exit_criteria": [ + "All 6 hotspot modules split into focused sub-modules", + "Each new sub-module has explicit exports (< 100 lines ideally)", + "Original modules serve as re-export hubs where applicable", + "Clean architecture boundaries respected (no IO in Domain/UseCase)", + "cabal build succeeds after each subtask", + "cabal test passes after each subtask", + "Graph degree for former hotspot modules significantly reduced" + ], + "subtask_count": 15, + "completed_count": 0, + "created_at": "2026-04-20T00:00:00Z", + "completed_at": null +} \ No newline at end of file diff --git a/docs/proposals/modular-refactoring.md b/docs/proposals/modular-refactoring.md new file mode 100644 index 0000000..ee8cb8b --- /dev/null +++ b/docs/proposals/modular-refactoring.md @@ -0,0 +1,168 @@ +# Graphos Modular Refactoring Proposal + +**Date**: 2026-04-20 +**Source**: Graphos knowledge graph analysis (god_nodes, get_neighbors, get_community) +**Status**: Proposed + +--- + +## Executive Summary + +Graph analysis of the codebase (3,128 nodes, 5,713 edges, 0 bridge nodes) identified **6 coupling hotspots** where single modules carry excessive responsibility. These "God Modules" have degree counts 10-100x higher than the average (3.65), making them hard to test, maintain, and reason about. + +This proposal splits each hotspot into focused sub-modules following clean architecture principles, with original modules preserved as re-export hubs for backward compatibility. + +--- + +## Hotspot Analysis + +| Priority | Module | Degree | Community Size | Smell | +|----------|--------|--------|---------------|-------| +| P0 | `Infrastructure.Export.HTML` | 436 | 353 | 200+ inline JS, 100+ inline CSS, HTML templates as string literals | +| P1 | `Domain.Types` | 342 | 258 | God Module: all types in one file | +| P1 | `Infrastructure.LSP.Client` | 292 | 211 | 5 responsibilities in one module | +| P2 | `UseCase.Extract` | 254 | — | 4 concerns combined (Haskell/Doc/LSP/concurrency) | +| P2 | `Domain.Graph` | 208 | — | 15+ exported functions, 7+ record fields | +| P2 | Export cross-coupling | — | — | CommunityGraph mediator knows all formats | + +--- + +## Refactoring Plan + +### P0: `Infrastructure.Export.HTML` (degree 436 → target < 100) + +**Problem**: 436 connections — the most-coupled module in the entire codebase. Contains ~200 inline JavaScript strings, ~100 inline CSS strings, and HTML templates all embedded as Haskell string literals. + +**Refactor**: +- Extract `VisNode`/`VisEdge` JSON types → `Domain.Export.Visualization` (domain-level types, not HTML-specific) +- Extract JS → `templates/graph.js` +- Extract CSS → `templates/graph.css` +- Extract HTML shell → `templates/graph.html` +- Split `buildHTML` into: `renderNodesJSON`, `renderEdgesJSON`, `renderPageShell`, `renderSearchUI`, `renderCommunitySidebar` + +### P1: `Domain.Types` (degree 342 → target < 50) + +**Problem**: God Module containing ALL domain types. 258 nodes in community 2621. + +**Refactor** — Split into 5 focused sub-modules: +- `Domain.Types.Node` — `Node`, `NodeId`, `FileType` (Code/Doc/Image/Paper/Video) +- `Domain.Types.Edge` — `Edge`, `EdgeId`, `Relation`, `Confidence`, converters +- `Domain.Types.Graph` — `LabeledGraph`, `GraphDiff`, `Extraction`, `Hyperedge` +- `Domain.Types.Pipeline` — `PipelineConfig`, `defaultConfig`, `Detection` types +- `Domain.Types.Analysis` — `Analysis`, `GodNode`, `SurprisingConnection`, `SuggestedQuestion` +- Keep `Domain.Types` as **re-export hub** for backward compatibility + +### P1: `Infrastructure.LSP.Client` (degree 292 → target < 30) + +**Problem**: 5 responsibilities in one module (211 community members). + +**Refactor** — Split into 4 focused sub-modules: +- `Infrastructure.LSP.ServerMap` — 30+ language→server mappings +- `Infrastructure.LSP.Transport` — JSON-RPC read/write, `connectToLSP`, `disconnectLSP` +- `Infrastructure.LSP.CapabilityParse` — Server capability parsing (merge with existing `Capabilities.hs`) +- `Infrastructure.LSP.Extraction` — `symbolToNodes`, `symbolTreeToEdges`, `workspaceSymbolsToDocumentSymbols` +- Keep `LSP.Client` as thin orchestrator + +### P2: `UseCase.Extract` (degree 254 → target < 30) + +**Problem**: Combines Haskell parsing, doc extraction, LSP orchestration, and concurrency management. + +**Refactor** — Split into 3 focused sub-modules: +- `UseCase.Extract.Haskell` — `parseHaskellImports`, `parseHaskellDecls`, `extractHaskellStub`, `isTopLevelDecl` +- `UseCase.Extract.Markdown` — `parseHeader`, `parseTags`, `parseWikiLinks`, `extractDocFile` +- `UseCase.Extract.LSPOrchestrator` — `doExtractWithSharedLSP`, `groupByLSPServer` +- Keep `UseCase.Extract` as composition orchestrator + +### P2: `Domain.Graph` (degree 208 → target < 40) + +**Problem**: 15+ exported functions, `Graph` record with 7+ fields. Already has `Domain.Graph.FGL` as a sub-module. + +**Refactor** — Extend the existing sub-module pattern: +- `Domain.Graph.Core` — `Graph` type, `buildGraph`, `mergeGraphs`, `mergeExtractions`, field accessors +- `Domain.Graph.Query` — `shortestPath`, `neighbors`, `degree`, BFS, DFS, `subgraph` +- `Domain.Graph.Analysis` — `articulationPoints`, `biconnectedComponents`, `godNodes`, `edgeBetweenness`, `dominators` +- `Domain.Graph.Diff` — `graphDiff`, `LabeledGraph` +- Keep `Domain.Graph` as re-export hub (+ existing `Domain.Graph.FGL`) + +### P2: Export Cross-Coupling (CommunityGraph mediator) + +**Problem**: `CommunityGraph` has `conceptually_related_to` edges to every export format AND FileSystem modules. Classic Mediator anti-pattern. + +**Refactor**: +- Introduce `Domain.Export.Format` type class: `class ExportFormat a where render :: LabeledGraph -> a` +- Each format (HTML, JSON, Neo4j, Obsidian, SVG, GraphML) implements independently +- Reduces CommunityGraph coupling significantly + +--- + +## Execution Plan + +### Dependency Graph + +``` +Batch 1 (PARALLEL): 01-04 Domain.Types sub-modules + ↓ +Batch 2 (SEQUENTIAL): 05 Update imports + cabal test + ↓ +Batch 3 (PARALLEL): 06,08,09,11 Graph.Core, Export.Visualization, Export.Format, LSP.ServerMap+Transport + ↓ +Batch 4 (SEQUENTIAL): 07,10,12 Graph.Query+Analysis+Diff, Export.HTML, LSP.CapabilityParse+Extraction + ↓ +Batch 5 (SEQUENTIAL): 13 UseCase.Extract.Haskell + Markdown + ↓ +Batch 6 (SEQUENTIAL): 14 UseCase.Extract.LSPOrchestrator + ↓ +Batch 7 (SEQUENTIAL): 15 Final integration + ExportFormat + full test suite +``` + +### 15 Subtasks + +| # | Title | Depends On | Parallel | +|---|-------|-----------|----------| +| 01 | Create Domain.Types.Node | — | Yes | +| 02 | Create Domain.Types.Edge | — | Yes | +| 03 | Create Domain.Types.Graph | — | Yes | +| 04 | Create Domain.Types.Pipeline + Analysis | — | Yes | +| 05 | Update all imports after Types split | 01-04 | No | +| 06 | Create Domain.Graph.Core | 05 | No | +| 07 | Create Domain.Graph.Query + Analysis + Diff | 06 | No | +| 08 | Create Domain.Export.Visualization types | 05 | Yes | +| 09 | Create Domain.Export.Format type class | 05 | Yes | +| 10 | Refactor Export.HTML — extract templates | 08,09 | No | +| 11 | Split LSP.Client — ServerMap + Transport | 05 | Yes | +| 12 | Split LSP.Client — CapabilityParse + Extraction | 11 | No | +| 13 | Split UseCase.Extract — Haskell + Markdown | 12 | No | +| 14 | Split UseCase.Extract — LSPOrchestrator | 13 | No | +| 15 | Final integration — ExportFormat + full tests | 07,10,12,14 | No | + +--- + +## Constraints + +- **Clean architecture**: No IO in Domain or UseCase layers +- **Backward compatibility**: Original modules become re-export hubs +- **Incremental**: Each subtask must compile and pass tests independently +- **Haskell conventions**: PascalCase types, camelCase functions, explicit exports, type signatures on all top-level definitions +- **Graph resilience**: 0 bridge nodes means no single module is critical — safe to refactor + +## Exit Criteria + +- [ ] All 6 hotspot modules split into focused sub-modules +- [ ] Each new sub-module has explicit exports and < 100 lines (ideally < 50) +- [ ] Original modules serve as re-export hubs where applicable +- [ ] Clean architecture boundaries respected +- [ ] `cabal build` succeeds after each and all subtasks +- [ ] `cabal test` passes after each and all subtasks +- [ ] Graph degree for former hotspot modules significantly reduced + +--- + +## Expected Impact + +| Module | Before | After (est.) | Reduction | +|--------|--------|-------------|-----------| +| Export.HTML | 436 | ~80 | -82% | +| Domain.Types | 342 | ~40 (hub) + 60 sub-modules | -88% | +| LSP.Client | 292 | ~25 (hub) + 55 sub-modules | -91% | +| UseCase.Extract | 254 | ~20 (orchestrator) + 50 sub-modules | -80% | +| Domain.Graph | 208 | ~30 (hub) + 40 sub-modules | -86% | \ No newline at end of file diff --git a/graphos.cabal b/graphos.cabal index 0d3922d..ad82f24 100644 --- a/graphos.cabal +++ b/graphos.cabal @@ -42,9 +42,19 @@ library hs-source-dirs: src exposed-modules: Graphos.Prelude - -- Domain + -- Domain - Types sub-modules Graphos.Domain.Types + Graphos.Domain.Types.Node + Graphos.Domain.Types.Edge + Graphos.Domain.Types.Graph + Graphos.Domain.Types.Pipeline + Graphos.Domain.Types.Analysis + -- Domain - Graph Graphos.Domain.Graph + Graphos.Domain.Graph.Core + Graphos.Domain.Graph.Query + Graphos.Domain.Graph.Analysis + Graphos.Domain.Graph.Diff Graphos.Domain.Graph.FGL Graphos.Domain.Community Graphos.Domain.Analysis @@ -52,8 +62,10 @@ library -- Use Cases Graphos.UseCase.Pipeline Graphos.UseCase.Detect - Graphos.UseCase.Extract - Graphos.UseCase.Build + Graphos.UseCase.Extract + Graphos.UseCase.Extract.Haskell + Graphos.UseCase.Extract.Markdown + Graphos.UseCase.Build Graphos.UseCase.Cluster Graphos.UseCase.Analyze Graphos.UseCase.Report @@ -71,12 +83,16 @@ library Graphos.UseCase.SelectContext Graphos.UseCase.FormatContext Graphos.UseCase.Conversation - -- Infrastructure - LSP - Graphos.Infrastructure.LSP.Client - Graphos.Infrastructure.LSP.Protocol - Graphos.Infrastructure.LSP.Capabilities - -- Infrastructure - Config - Graphos.Infrastructure.Config + -- Infrastructure - LSP + Graphos.Infrastructure.LSP.Client + Graphos.Infrastructure.LSP.Protocol + Graphos.Infrastructure.LSP.Capabilities + Graphos.Infrastructure.LSP.ServerMap + Graphos.Infrastructure.LSP.Transport + Graphos.Infrastructure.LSP.CapabilityParse + Graphos.Infrastructure.LSP.Extraction + -- Infrastructure - Config + Graphos.Infrastructure.Config -- Infrastructure - FileSystem Graphos.Infrastructure.FileSystem.Watcher Graphos.Infrastructure.FileSystem.Cache @@ -180,7 +196,8 @@ test-suite graphos-test Graphos.UseCase.PipelineSpec Graphos.UseCase.ExtractSpec Graphos.UseCase.QuerySpec - Graphos.Infrastructure.LSP.ClientSpec + Graphos.Infrastructure.LSP.ClientSpec + Graphos.Infrastructure.LSP.TransportSpec Graphos.Domain.ContextSpec Graphos.UseCase.SelectContextSpec Graphos.UseCase.FormatContextSpec @@ -195,4 +212,5 @@ test-suite graphos-test , aeson , containers , fgl-arbitrary >= 0.2 + , process >= 1.6 , text \ No newline at end of file diff --git a/src/Graphos/Domain/Graph.hs b/src/Graphos/Domain/Graph.hs index dc52f99..beb3ccc 100644 --- a/src/Graphos/Domain/Graph.hs +++ b/src/Graphos/Domain/Graph.hs @@ -1,11 +1,8 @@ --- | Graph operations - pure functions over the domain types. +-- | Graph operations (re-export hub). -- Build, merge, query, and diff knowledge graphs. -- --- This module uses fgl (Functional Graph Library) internally for graph --- algorithms (BFS, DFS, shortest path, betweenness centrality) while --- keeping the public API unchanged with Map/Set-based representation. - -{-# LANGUAGE ScopedTypeVariables #-} +-- This module re-exports everything from sub-modules for backward compatibility. +-- Sub-modules: Core, Query, Analysis, Diff, FGL. module Graphos.Domain.Graph ( -- * Types Graph @@ -40,296 +37,7 @@ module Graphos.Domain.Graph , graphDiff ) where -import Data.List (sortOn, nubBy, nub) -import Data.Map.Strict (Map) -import qualified Data.Map.Strict as Map -import Data.Set (Set) -import qualified Data.Set as Set -import qualified Data.Text as T -import Data.Graph.Inductive.Graph (labNodes) -import qualified Data.Graph.Inductive.Graph as FGL -import Data.Graph.Inductive.PatriciaTree () -import Data.Graph.Inductive.Query.BFS (bfs, esp) -import Data.Graph.Inductive.Query.DFS (dfs) -import Data.Graph.Inductive.Query.ArtPoint (ap) -import Data.Graph.Inductive.Query.BCC (bcc) -import Data.Graph.Inductive.Query.Dominators (dom) - -import Graphos.Domain.Types -import Graphos.Domain.Graph.FGL - ( toFGL, FGLGraph ) - --- ─────────────────────────────────────────────── --- Internal graph representation --- ─────────────────────────────────────────────── - --- | Adjacency-list graph with node and edge attributes -data Graph = Graph - { gNodes :: Map NodeId Node - , gEdges :: Map (NodeId, NodeId) Edge - , gAdjFwd :: Map NodeId (Set NodeId) -- forward adjacency - , gAdjBack :: Map NodeId (Set NodeId) -- backward adjacency (for undirected queries) - , gDirected :: Bool - } deriving (Eq, Show) - --- ─────────────────────────────────────────────── --- Construction --- ─────────────────────────────────────────────── - --- | Build a graph from an Extraction result -buildGraph :: Bool -> Extraction -> Graph -buildGraph directed extraction = - let nodes = Map.fromList [(nodeId n, n) | n <- extractionNodes extraction] - edgeMap = Map.fromList [((edgeSource e, edgeTarget e), e) | e <- extractionEdges extraction] - fwdAdj = Map.fromListWith Set.union - [(edgeSource e, Set.singleton (edgeTarget e)) | e <- extractionEdges extraction] - bwdAdj = if directed - then Map.fromListWith Set.union - [(edgeTarget e, Set.singleton (edgeSource e)) | e <- extractionEdges extraction] - else Map.fromListWith Set.union - [(edgeTarget e, Set.singleton (edgeSource e)) | e <- extractionEdges extraction] - <> fwdAdj -- undirected: edges go both ways - in Graph - { gNodes = nodes - , gEdges = edgeMap - , gAdjFwd = fwdAdj - , gAdjBack = bwdAdj - , gDirected = directed - } - --- | Merge two extractions (dedup nodes by id, combine edges) -mergeExtractions :: Extraction -> Extraction -> Extraction -mergeExtractions a b = - let allNodes = nubBy (\x y -> nodeId x == nodeId y) (extractionNodes a ++ extractionNodes b) - allEdges = extractionEdges a ++ extractionEdges b - allHyper = extractionHyperedges a ++ extractionHyperedges b - in Extraction - { extractionNodes = allNodes - , extractionEdges = allEdges - , extractionHyperedges = allHyper - , extractionInputTokens = extractionInputTokens a + extractionInputTokens b - , extractionOutputTokens = extractionOutputTokens a + extractionOutputTokens b - } - --- | Merge two graphs (new graph takes precedence for overlapping nodes) -mergeGraphs :: Graph -> Graph -> Graph -mergeGraphs old new = - let mergedNodes = gNodes old <> gNodes new - mergedEdges = gEdges old <> gEdges new - mergedFwd = Map.unionWith Set.union (gAdjFwd old) (gAdjFwd new) - mergedBwd = Map.unionWith Set.union (gAdjBack old) (gAdjBack new) - in Graph - { gNodes = mergedNodes - , gEdges = mergedEdges - , gAdjFwd = mergedFwd - , gAdjBack = mergedBwd - , gDirected = gDirected old - } - --- ─────────────────────────────────────────────── --- Internal: Graph -> FGL conversion --- ─────────────────────────────────────────────── - --- | Convert a Graphos Graph to an fgl Gr for algorithm use -toFGL' :: Graph -> FGLGraph -toFGL' g = toFGL (gNodes g) (gEdges g) - --- | Build a node ID lookup: fgl Int -> Graphos NodeId -nidLookup :: FGLGraph -> Map Int NodeId -nidLookup gr = Map.fromList [(idx, nid) | (idx, (nid, _)) <- labNodes gr] - --- | Find the fgl Int index for a Graphos NodeId -findFglIdx :: FGLGraph -> NodeId -> Maybe Int -findFglIdx gr nid = lookup nid idxList - where idxList = [(nid', idx) | (idx, (nid', _)) <- labNodes gr] - --- ─────────────────────────────────────────────── --- Queries --- ─────────────────────────────────────────────── - --- | Find god nodes (highest-degree nodes, excluding file hubs and concepts) -godNodes :: Graph -> Int -> [GodNode] -godNodes g topN = - let degrees = [(nid, Set.size (neighbors g nid), n) | (nid, n) <- Map.toList (gNodes g)] - filtered = filter (\(_, deg, n) -> not (isFileNode g n) && not (isConceptNode n) && deg > 0) degrees - sorted = sortOn (\(_, deg, _) -> negate deg) filtered - in take topN [GodNode { gnId = nid, gnLabel = nodeLabel n, gnEdges = deg } - | (nid, deg, n) <- sorted] - --- | Get neighbor node IDs. --- For directed graphs: forward neighbors only. --- For undirected graphs: union of forward and backward adjacency. -neighbors :: Graph -> NodeId -> Set NodeId -neighbors g nid = - let fwd = Map.findWithDefault Set.empty nid (gAdjFwd g) - bwd = Map.findWithDefault Set.empty nid (gAdjBack g) - in if gDirected g then fwd else fwd `Set.union` bwd - --- | Get degree of a node -degree :: Graph -> NodeId -> Int -degree g nid = Set.size $ neighbors g nid - --- | Breadth-first search from a start node, returns visited node IDs --- Uses fgl's BFS algorithm internally -breadthFirstSearch :: Graph -> NodeId -> Int -> Set NodeId -breadthFirstSearch g start _maxDepth = - let gr = toFGL' g - nidMap = nidLookup gr - in case findFglIdx gr start of - Just startIdx -> Set.fromList [Map.findWithDefault start idx nidMap | idx <- bfs startIdx gr] - Nothing -> Set.empty - --- | Depth-first search from a start node, returns visited node IDs --- Uses fgl's DFS algorithm internally -depthFirstSearch :: Graph -> NodeId -> Int -> Set NodeId -depthFirstSearch g start _maxDepth = - let gr = toFGL' g - nidMap = nidLookup gr - in case findFglIdx gr start of - Just startIdx -> Set.fromList [Map.findWithDefault start idx nidMap | idx <- dfs [startIdx] gr] - Nothing -> Set.empty - --- | Shortest path between two nodes (BFS) --- Uses fgl's ESP (shortest path by edge count) algorithm internally -shortestPath :: Graph -> NodeId -> NodeId -> Maybe [NodeId] -shortestPath g src tgt = - let gr = toFGL' g - nidMap = nidLookup gr - in case (findFglIdx gr src, findFglIdx gr tgt) of - (Just srcIdx, Just tgtIdx) -> - let path = esp srcIdx tgtIdx gr - in if null path then Nothing - else Just [Map.findWithDefault src idx nidMap | idx <- path] - _ -> Nothing - --- | Extract a subgraph around given nodes -subgraph :: Graph -> Set NodeId -> Graph -subgraph g nodeSet = - let nodes' = Map.filterWithKey (\k _ -> k `Set.member` nodeSet) (gNodes g) - edges' = Map.filterWithKey (\(s, t) _ -> s `Set.member` nodeSet && t `Set.member` nodeSet) (gEdges g) - fwd' = Map.map (`Set.intersection` nodeSet) $ Map.filterWithKey (\k _ -> k `Set.member` nodeSet) (gAdjFwd g) - bwd' = Map.map (`Set.intersection` nodeSet) $ Map.filterWithKey (\k _ -> k `Set.member` nodeSet) (gAdjBack g) - in Graph { gNodes = nodes', gEdges = edges', gAdjFwd = fwd', gAdjBack = bwd', gDirected = gDirected g } - --- ─────────────────────────────────────────────── --- Advanced queries (fgl-powered) --- ─────────────────────────────────────────────── - --- | Find articulation points (bridge nodes) whose removal would disconnect the graph. --- Uses fgl's ap algorithm internally. -articulationPoints :: Graph -> [NodeId] -articulationPoints g = - let gr = toFGL' g - nidMap = nidLookup gr - artPointIdxs = ap gr - in [Map.findWithDefault (T.pack "???") idx nidMap | idx <- artPointIdxs] - --- | Find biconnected components of the graph. --- Each component is a list of NodeIds forming a maximal subgraph --- with no articulation point. --- Uses fgl's bcc algorithm internally. -biconnectedComponents :: Graph -> [[NodeId]] -biconnectedComponents g = - let gr = toFGL' g - nidMap = nidLookup gr - components = bcc gr - in [nub [Map.findWithDefault (T.pack "???") idx nidMap | idx <- FGL.nodes comp] | comp <- components] - --- | Compute the dominator tree for a given start node. --- Returns a map from each node to its immediate dominator. --- Uses fgl's dom algorithm internally. -dominators :: Graph -> NodeId -> Map NodeId (Maybe NodeId) -dominators g start = - let gr = toFGL' g - nidMap = nidLookup gr - in case findFglIdx gr start of - Just startIdx -> - let domList = dom gr startIdx - in Map.fromList [(Map.findWithDefault n idx nidMap - , case Map.lookup idom nidMap of - Just d -> Just d - Nothing -> Nothing) - | (idx, idomList) <- domList - , n <- [Map.findWithDefault start idx nidMap] - , idom <- idomList] - Nothing -> Map.empty - --- ─────────────────────────────────────────────── --- Analysis helpers --- ─────────────────────────────────────────────── - --- | Check if a node is a file-level hub (synthetic AST node) -isFileNode :: Graph -> Node -> Bool -isFileNode g n = - let label = nodeLabel n - srcFile = nodeSourceFile n - in -- Method stub: starts with '.' and ends with ')' - (not (T.null label) && T.singleton (T.head label) == "." && T.last label == ')') - -- Low-degree function stub - || (not (T.null label) && T.last label == ')' && degree g (nodeId n) <= 1) - -- Label matches source filename - || (not (T.null srcFile) && not (T.null label) && label == T.pack (takeFileName (T.unpack srcFile))) - where - takeFileName path = case T.breakOnEnd "/" (T.pack path) of - (_, "") -> path - (_, name) -> T.unpack $ T.dropWhile (== '/') name - --- | Check if a node is a concept node (injected semantic annotation) -isConceptNode :: Node -> Bool -isConceptNode n = - let src = nodeSourceFile n - in T.null src || (T.null $ T.takeWhileEnd (/= '.') src) - --- | Compute edge betweenness centrality using fgl shortest paths -edgeBetweenness :: Graph -> Map (NodeId, NodeId) Double -edgeBetweenness g = - let gr = toFGL' g - nidMap = nidLookup gr - allNodeIndices = [(idx, nid) | (idx, (nid, _)) <- labNodes gr] - -- For each pair of nodes, find shortest path and count edge traversals - pathEdges = [edge - | (srcIdx, _) <- allNodeIndices - , (tgtIdx, _) <- allNodeIndices - , srcIdx < tgtIdx - , let path = esp srcIdx tgtIdx gr - , not (null path) - , edge <- zip path (drop 1 path)] - edgeCounts = Map.fromListWith (+) [ - ((Map.findWithDefault (T.pack "???") s nidMap, - Map.findWithDefault (T.pack "???") t nidMap), 1.0) - | (s, t) <- pathEdges] - n = fromIntegral (length allNodeIndices) - normalization = if n > 1 then 2.0 / (n * (n - 1)) else 1.0 - in fmap (* normalization) edgeCounts - --- ─────────────────────────────────────────────── --- Diff --- ─────────────────────────────────────────────── - --- | Compare two graph snapshots -graphDiff :: Graph -> Graph -> GraphDiff -graphDiff old new = - let oldNodeIds = Map.keysSet (gNodes old) - newNodeIds = Map.keysSet (gNodes new) - addedIds = newNodeIds `Set.difference` oldNodeIds - removedIds = oldNodeIds `Set.difference` newNodeIds - newNodes = [n | (nid, n) <- Map.toList (gNodes new), nid `Set.member` addedIds] - removedNodes = [(nid, nodeLabel n) | (nid, n) <- Map.toList (gNodes old), nid `Set.member` removedIds] - oldEdgeKeys = Map.keysSet (gEdges old) - newEdgeKeys = Map.keysSet (gEdges new) - addedEdgeKeys = newEdgeKeys `Set.difference` oldEdgeKeys - removedEdgeKeys = oldEdgeKeys `Set.difference` newEdgeKeys - newEdges = [e | (k, e) <- Map.toList (gEdges new), k `Set.member` addedEdgeKeys] - removedEs = [e | (k, e) <- Map.toList (gEdges old), k `Set.member` removedEdgeKeys] - parts = [] - <> (if null newNodes then [] else [T.pack (show (length newNodes) ++ " new node(s)")]) - <> (if null newEdges then [] else [T.pack (show (length newEdges) ++ " new edge(s)")]) - <> (if null removedNodes then [] else [T.pack (show (length removedNodes) ++ " node(s) removed")]) - <> (if null removedEs then [] else [T.pack (show (length removedEs) ++ " edge(s) removed")]) - in GraphDiff - { gdNewNodes = newNodes - , gdRemovedNodes = removedNodes - , gdNewEdges = newEdges - , gdRemovedEdges = removedEs - , gdSummary = if null parts then "no changes" else T.intercalate ", " parts - } \ No newline at end of file +import Graphos.Domain.Graph.Core (Graph, gNodes, gEdges, buildGraph, mergeExtractions, mergeGraphs, isFileNode, isConceptNode) +import Graphos.Domain.Graph.Query (neighbors, degree, shortestPath, breadthFirstSearch, depthFirstSearch, subgraph) +import Graphos.Domain.Graph.Analysis (godNodes, articulationPoints, biconnectedComponents, dominators, edgeBetweenness) +import Graphos.Domain.Graph.Diff (graphDiff) \ No newline at end of file diff --git a/src/Graphos/Domain/Graph/Analysis.hs b/src/Graphos/Domain/Graph/Analysis.hs new file mode 100644 index 0000000..cb1645f --- /dev/null +++ b/src/Graphos/Domain/Graph/Analysis.hs @@ -0,0 +1,122 @@ +-- | Advanced graph analysis — structural properties and centrality. +-- Pure functions over the domain types. +{-# LANGUAGE ScopedTypeVariables #-} +module Graphos.Domain.Graph.Analysis + ( godNodes + , articulationPoints + , biconnectedComponents + , dominators + , edgeBetweenness + ) where + +import Data.List (sortOn, nub) +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map +import qualified Data.Set as Set +import qualified Data.Text as T +import Data.Graph.Inductive.Graph (labNodes) +import qualified Data.Graph.Inductive.Graph as FGL +import Data.Graph.Inductive.Query.ArtPoint (ap) +import Data.Graph.Inductive.Query.BCC (bcc) +import Data.Graph.Inductive.Query.Dominators (dom) +import Data.Graph.Inductive.Query.BFS (esp) + +import Graphos.Domain.Types +import Graphos.Domain.Graph.Core (Graph(..), isFileNode, isConceptNode) +import Graphos.Domain.Graph.FGL (toFGL, FGLGraph) + +-- ─────────────────────────────────────────────── +-- Internal: Graph -> FGL conversion +-- ─────────────────────────────────────────────── + +-- | Convert a Graphos Graph to an fgl Gr for algorithm use +toFGL' :: Graph -> FGLGraph +toFGL' g = toFGL (gNodes g) (gEdges g) + +-- | Build a node ID lookup: fgl Int -> Graphos NodeId +nidLookup :: FGLGraph -> Map Int NodeId +nidLookup gr = Map.fromList [(idx, nid) | (idx, (nid, _)) <- labNodes gr] + +-- | Find the fgl Int index for a Graphos NodeId +findFglIdx :: FGLGraph -> NodeId -> Maybe Int +findFglIdx gr nid = lookup nid idxList + where idxList = [(nid', idx) | (idx, (nid', _)) <- labNodes gr] + +-- ─────────────────────────────────────────────── +-- Analysis queries +-- ─────────────────────────────────────────────── + +-- | Find god nodes (highest-degree nodes, excluding file hubs and concepts) +godNodes :: Graph -> Int -> [GodNode] +godNodes g topN = + let degrees = [(nid, Set.size (neighbors' g nid), n) | (nid, n) <- Map.toList (gNodes g)] + filtered = filter (\(_, deg, n) -> not (isFileNode g n) && not (isConceptNode n) && deg > 0) degrees + sorted = sortOn (\(_, deg, _) -> negate deg) filtered + in take topN [GodNode { gnId = nid, gnLabel = nodeLabel n, gnEdges = deg } + | (nid, deg, n) <- sorted] + where + neighbors' g' nid = + let fwd = Map.findWithDefault Set.empty nid (gAdjFwd g') + bwd = Map.findWithDefault Set.empty nid (gAdjBack g') + in if gDirected g' then fwd else fwd `Set.union` bwd + +-- | Find articulation points (bridge nodes) whose removal would disconnect the graph. +-- Uses fgl's ap algorithm internally. +articulationPoints :: Graph -> [NodeId] +articulationPoints g = + let gr = toFGL' g + nidMap = nidLookup gr + artPointIdxs = ap gr + in [Map.findWithDefault (T.pack "???") idx nidMap | idx <- artPointIdxs] + +-- | Find biconnected components of the graph. +-- Each component is a list of NodeIds forming a maximal subgraph +-- with no articulation point. +-- Uses fgl's bcc algorithm internally. +biconnectedComponents :: Graph -> [[NodeId]] +biconnectedComponents g = + let gr = toFGL' g + nidMap = nidLookup gr + components = bcc gr + in [nub [Map.findWithDefault (T.pack "???") idx nidMap | idx <- FGL.nodes comp] | comp <- components] + +-- | Compute the dominator tree for a given start node. +-- Returns a map from each node to its immediate dominator. +-- Uses fgl's dom algorithm internally. +dominators :: Graph -> NodeId -> Map NodeId (Maybe NodeId) +dominators g start = + let gr = toFGL' g + nidMap = nidLookup gr + in case findFglIdx gr start of + Just startIdx -> + let domList = dom gr startIdx + in Map.fromList [(Map.findWithDefault n idx nidMap + , case Map.lookup idom nidMap of + Just d -> Just d + Nothing -> Nothing) + | (idx, idomList) <- domList + , n <- [Map.findWithDefault start idx nidMap] + , idom <- idomList] + Nothing -> Map.empty + +-- | Compute edge betweenness centrality using fgl shortest paths +edgeBetweenness :: Graph -> Map (NodeId, NodeId) Double +edgeBetweenness g = + let gr = toFGL' g + nidMap = nidLookup gr + allNodeIndices = [(idx, nid) | (idx, (nid, _)) <- labNodes gr] + -- For each pair of nodes, find shortest path and count edge traversals + pathEdges = [edge + | (srcIdx, _) <- allNodeIndices + , (tgtIdx, _) <- allNodeIndices + , srcIdx < tgtIdx + , let path = esp srcIdx tgtIdx gr + , not (null path) + , edge <- zip path (drop 1 path)] + edgeCounts = Map.fromListWith (+) [ + ((Map.findWithDefault (T.pack "???") s nidMap, + Map.findWithDefault (T.pack "???") t nidMap), 1.0) + | (s, t) <- pathEdges] + n = fromIntegral (length allNodeIndices) + normalization = if n > 1 then 2.0 / (n * (n - 1)) else 1.0 + in fmap (* normalization) edgeCounts \ No newline at end of file diff --git a/src/Graphos/Domain/Graph/Core.hs b/src/Graphos/Domain/Graph/Core.hs new file mode 100644 index 0000000..5255027 --- /dev/null +++ b/src/Graphos/Domain/Graph/Core.hs @@ -0,0 +1,121 @@ +-- | Core graph type and construction operations. +-- Pure functions over the domain types. +module Graphos.Domain.Graph.Core + ( -- * Types + Graph(..) + + -- * Construction + , buildGraph + , mergeExtractions + , mergeGraphs + + -- * Analysis helpers + , isFileNode + , isConceptNode + ) where + +import Data.List (nubBy) +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map +import Data.Set (Set) +import qualified Data.Set as Set +import qualified Data.Text as T + +import Graphos.Domain.Types + +-- ─────────────────────────────────────────────── +-- Internal graph representation +-- ─────────────────────────────────────────────── + +-- | Adjacency-list graph with node and edge attributes +data Graph = Graph + { gNodes :: Map NodeId Node + , gEdges :: Map (NodeId, NodeId) Edge + , gAdjFwd :: Map NodeId (Set NodeId) -- forward adjacency + , gAdjBack :: Map NodeId (Set NodeId) -- backward adjacency (for undirected queries) + , gDirected :: Bool + } deriving (Eq, Show) + +-- ─────────────────────────────────────────────── +-- Construction +-- ─────────────────────────────────────────────── + +-- | Build a graph from an Extraction result +buildGraph :: Bool -> Extraction -> Graph +buildGraph directed extraction = + let nodes = Map.fromList [(nodeId n, n) | n <- extractionNodes extraction] + edgeMap = Map.fromList [((edgeSource e, edgeTarget e), e) | e <- extractionEdges extraction] + fwdAdj = Map.fromListWith Set.union + [(edgeSource e, Set.singleton (edgeTarget e)) | e <- extractionEdges extraction] + bwdAdj = if directed + then Map.fromListWith Set.union + [(edgeTarget e, Set.singleton (edgeSource e)) | e <- extractionEdges extraction] + else Map.fromListWith Set.union + [(edgeTarget e, Set.singleton (edgeSource e)) | e <- extractionEdges extraction] + <> fwdAdj -- undirected: edges go both ways + in Graph + { gNodes = nodes + , gEdges = edgeMap + , gAdjFwd = fwdAdj + , gAdjBack = bwdAdj + , gDirected = directed + } + +-- | Merge two extractions (dedup nodes by id, combine edges) +mergeExtractions :: Extraction -> Extraction -> Extraction +mergeExtractions a b = + let allNodes = nubBy (\x y -> nodeId x == nodeId y) (extractionNodes a ++ extractionNodes b) + allEdges = extractionEdges a ++ extractionEdges b + allHyper = extractionHyperedges a ++ extractionHyperedges b + in Extraction + { extractionNodes = allNodes + , extractionEdges = allEdges + , extractionHyperedges = allHyper + , extractionInputTokens = extractionInputTokens a + extractionInputTokens b + , extractionOutputTokens = extractionOutputTokens a + extractionOutputTokens b + } + +-- | Merge two graphs (new graph takes precedence for overlapping nodes) +mergeGraphs :: Graph -> Graph -> Graph +mergeGraphs old new = + let mergedNodes = gNodes old <> gNodes new + mergedEdges = gEdges old <> gEdges new + mergedFwd = Map.unionWith Set.union (gAdjFwd old) (gAdjFwd new) + mergedBwd = Map.unionWith Set.union (gAdjBack old) (gAdjBack new) + in Graph + { gNodes = mergedNodes + , gEdges = mergedEdges + , gAdjFwd = mergedFwd + , gAdjBack = mergedBwd + , gDirected = gDirected old + } + +-- ─────────────────────────────────────────────── +-- Analysis helpers +-- ─────────────────────────────────────────────── + +-- | Check if a node is a file-level hub (synthetic AST node) +isFileNode :: Graph -> Node -> Bool +isFileNode g n = + let label = nodeLabel n + srcFile = nodeSourceFile n + nid = nodeId n + fwd = Map.findWithDefault Set.empty nid (gAdjFwd g) + bwd = Map.findWithDefault Set.empty nid (gAdjBack g) + deg = Set.size $ if gDirected g then fwd else fwd `Set.union` bwd + in -- Method stub: starts with '.' and ends with ')' + (not (T.null label) && T.singleton (T.head label) == "." && T.last label == ')') + -- Low-degree function stub + || (not (T.null label) && T.last label == ')' && deg <= 1) + -- Label matches source filename + || (not (T.null srcFile) && not (T.null label) && label == T.pack (takeFileName (T.unpack srcFile))) + where + takeFileName path = case T.breakOnEnd "/" (T.pack path) of + (_, "") -> path + (_, name) -> T.unpack $ T.dropWhile (== '/') name + +-- | Check if a node is a concept node (injected semantic annotation) +isConceptNode :: Node -> Bool +isConceptNode n = + let src = nodeSourceFile n + in T.null src || (T.null $ T.takeWhileEnd (/= '.') src) \ No newline at end of file diff --git a/src/Graphos/Domain/Graph/Diff.hs b/src/Graphos/Domain/Graph/Diff.hs new file mode 100644 index 0000000..0716886 --- /dev/null +++ b/src/Graphos/Domain/Graph/Diff.hs @@ -0,0 +1,40 @@ +-- | Graph diff — compare two graph snapshots. +-- Pure functions over the domain types. +module Graphos.Domain.Graph.Diff + ( graphDiff + ) where + +import qualified Data.Map.Strict as Map +import qualified Data.Set as Set +import qualified Data.Text as T + +import Graphos.Domain.Types +import Graphos.Domain.Graph.Core (Graph(..)) + +-- | Compare two graph snapshots +graphDiff :: Graph -> Graph -> GraphDiff +graphDiff old new = + let oldNodeIds = Map.keysSet (gNodes old) + newNodeIds = Map.keysSet (gNodes new) + addedIds = newNodeIds `Set.difference` oldNodeIds + removedIds = oldNodeIds `Set.difference` newNodeIds + newNodes = [n | (nid, n) <- Map.toList (gNodes new), nid `Set.member` addedIds] + removedNodes = [(nid, nodeLabel n) | (nid, n) <- Map.toList (gNodes old), nid `Set.member` removedIds] + oldEdgeKeys = Map.keysSet (gEdges old) + newEdgeKeys = Map.keysSet (gEdges new) + addedEdgeKeys = newEdgeKeys `Set.difference` oldEdgeKeys + removedEdgeKeys = oldEdgeKeys `Set.difference` newEdgeKeys + newEdges = [e | (k, e) <- Map.toList (gEdges new), k `Set.member` addedEdgeKeys] + removedEs = [e | (k, e) <- Map.toList (gEdges old), k `Set.member` removedEdgeKeys] + parts = [] + <> (if null newNodes then [] else [T.pack (show (length newNodes) ++ " new node(s)")]) + <> (if null newEdges then [] else [T.pack (show (length newEdges) ++ " new edge(s)")]) + <> (if null removedNodes then [] else [T.pack (show (length removedNodes) ++ " node(s) removed")]) + <> (if null removedEs then [] else [T.pack (show (length removedEs) ++ " edge(s) removed")]) + in GraphDiff + { gdNewNodes = newNodes + , gdRemovedNodes = removedNodes + , gdNewEdges = newEdges + , gdRemovedEdges = removedEs + , gdSummary = if null parts then "no changes" else T.intercalate ", " parts + } \ No newline at end of file diff --git a/src/Graphos/Domain/Graph/Query.hs b/src/Graphos/Domain/Graph/Query.hs new file mode 100644 index 0000000..2ac10e6 --- /dev/null +++ b/src/Graphos/Domain/Graph/Query.hs @@ -0,0 +1,99 @@ +-- | Graph query operations — traversal and search. +-- Pure functions over the domain types. +{-# LANGUAGE ScopedTypeVariables #-} +module Graphos.Domain.Graph.Query + ( neighbors + , degree + , shortestPath + , breadthFirstSearch + , depthFirstSearch + , subgraph + ) where + +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map +import Data.Set (Set) +import qualified Data.Set as Set +import Data.Graph.Inductive.Graph (labNodes) +import Data.Graph.Inductive.Query.BFS (bfs, esp) +import Data.Graph.Inductive.Query.DFS (dfs) + +import Graphos.Domain.Types +import Graphos.Domain.Graph.Core (Graph(..)) +import Graphos.Domain.Graph.FGL (toFGL, FGLGraph) + +-- ─────────────────────────────────────────────── +-- Internal: Graph -> FGL conversion +-- ─────────────────────────────────────────────── + +-- | Convert a Graphos Graph to an fgl Gr for algorithm use +toFGL' :: Graph -> FGLGraph +toFGL' g = toFGL (gNodes g) (gEdges g) + +-- | Build a node ID lookup: fgl Int -> Graphos NodeId +nidLookup :: FGLGraph -> Map Int NodeId +nidLookup gr = Map.fromList [(idx, nid) | (idx, (nid, _)) <- labNodes gr] + +-- | Find the fgl Int index for a Graphos NodeId +findFglIdx :: FGLGraph -> NodeId -> Maybe Int +findFglIdx gr nid = lookup nid idxList + where idxList = [(nid', idx) | (idx, (nid', _)) <- labNodes gr] + +-- ─────────────────────────────────────────────── +-- Queries +-- ─────────────────────────────────────────────── + +-- | Get neighbor node IDs. +-- For directed graphs: forward neighbors only. +-- For undirected graphs: union of forward and backward adjacency. +neighbors :: Graph -> NodeId -> Set NodeId +neighbors g nid = + let fwd = Map.findWithDefault Set.empty nid (gAdjFwd g) + bwd = Map.findWithDefault Set.empty nid (gAdjBack g) + in if gDirected g then fwd else fwd `Set.union` bwd + +-- | Get degree of a node +degree :: Graph -> NodeId -> Int +degree g nid = Set.size $ neighbors g nid + +-- | Breadth-first search from a start node, returns visited node IDs +-- Uses fgl's BFS algorithm internally +breadthFirstSearch :: Graph -> NodeId -> Int -> Set NodeId +breadthFirstSearch g start _maxDepth = + let gr = toFGL' g + nidMap = nidLookup gr + in case findFglIdx gr start of + Just startIdx -> Set.fromList [Map.findWithDefault start idx nidMap | idx <- bfs startIdx gr] + Nothing -> Set.empty + +-- | Depth-first search from a start node, returns visited node IDs +-- Uses fgl's DFS algorithm internally +depthFirstSearch :: Graph -> NodeId -> Int -> Set NodeId +depthFirstSearch g start _maxDepth = + let gr = toFGL' g + nidMap = nidLookup gr + in case findFglIdx gr start of + Just startIdx -> Set.fromList [Map.findWithDefault start idx nidMap | idx <- dfs [startIdx] gr] + Nothing -> Set.empty + +-- | Shortest path between two nodes (BFS) +-- Uses fgl's ESP (shortest path by edge count) algorithm internally +shortestPath :: Graph -> NodeId -> NodeId -> Maybe [NodeId] +shortestPath g src tgt = + let gr = toFGL' g + nidMap = nidLookup gr + in case (findFglIdx gr src, findFglIdx gr tgt) of + (Just srcIdx, Just tgtIdx) -> + let path = esp srcIdx tgtIdx gr + in if null path then Nothing + else Just [Map.findWithDefault src idx nidMap | idx <- path] + _ -> Nothing + +-- | Extract a subgraph around given nodes +subgraph :: Graph -> Set NodeId -> Graph +subgraph g nodeSet = + let nodes' = Map.filterWithKey (\k _ -> k `Set.member` nodeSet) (gNodes g) + edges' = Map.filterWithKey (\(s, t) _ -> s `Set.member` nodeSet && t `Set.member` nodeSet) (gEdges g) + fwd' = Map.map (`Set.intersection` nodeSet) $ Map.filterWithKey (\k _ -> k `Set.member` nodeSet) (gAdjFwd g) + bwd' = Map.map (`Set.intersection` nodeSet) $ Map.filterWithKey (\k _ -> k `Set.member` nodeSet) (gAdjBack g) + in Graph { gNodes = nodes', gEdges = edges', gAdjFwd = fwd', gAdjBack = bwd', gDirected = gDirected g } \ No newline at end of file diff --git a/src/Graphos/Domain/Types.hs b/src/Graphos/Domain/Types.hs index 77f27bb..46fa8a0 100644 --- a/src/Graphos/Domain/Types.hs +++ b/src/Graphos/Domain/Types.hs @@ -1,7 +1,6 @@ --- | Core domain types for Graphos. --- These are pure data types with no IO dependencies. --- All domain logic operates on these types. -{-# LANGUAGE LambdaCase #-} +-- | Core domain types for Graphos (re-export hub). +-- All types are defined in focused sub-modules; this module re-exports +-- everything for backward compatibility. module Graphos.Domain.Types ( -- * Node types NodeId @@ -47,449 +46,13 @@ module Graphos.Domain.Types , PipelineConfig(..) , EdgeDensity(..) , defaultConfig + , GraphosConfig(..) + , defaultGraphosConfig ) where -import Data.Aeson (ToJSON(..), FromJSON(..), object, (.=), (.:), (.:?), withObject, withText) -import Data.Map.Strict (Map) -import Data.Text (Text) -import qualified Data.Text as T -import GHC.Generics (Generic) -import Graphos.Domain.Config (GraphosConfig, defaultGraphosConfig) - --- | Unique identifier for a node (derived from file + entity name) -type NodeId = Text - --- | Unique identifier for an edge -type EdgeId = Text - --- | Community identifier (integer) -type CommunityId = Int - --- | File type classification -data FileType - = CodeFile - | DocumentFile - | PaperFile - | ImageFile - | VideoFile - deriving (Eq, Show, Generic) - -instance ToJSON FileType where - toJSON CodeFile = "code" - toJSON DocumentFile = "document" - toJSON PaperFile = "paper" - toJSON ImageFile = "image" - toJSON VideoFile = "video" - -instance FromJSON FileType where - parseJSON = withText "FileType" $ \t -> case t of - "code" -> pure CodeFile - "document" -> pure DocumentFile - "paper" -> pure PaperFile - "image" -> pure ImageFile - "video" -> pure VideoFile - _ -> fail $ "Unknown file type: " ++ T.unpack t - --- | A node in the knowledge graph -data Node = Node - { nodeId :: NodeId - , nodeLabel :: Text - , nodeFileType :: FileType - , nodeSourceFile :: Text - , nodeSourceLocation :: Maybe Text - , nodeLineEnd :: Maybe Int -- ^ End line number (for exact code range) - , nodeKind :: Maybe Text -- ^ Symbol kind: "Function", "Class", "Method", "Interface", etc. - , nodeSignature :: Maybe Text -- ^ Type signature or declaration header - , nodeSourceUrl :: Maybe Text - , nodeCapturedAt :: Maybe Text - , nodeAuthor :: Maybe Text - , nodeContributor :: Maybe Text - } deriving (Eq, Show, Generic) - -instance ToJSON Node where - toJSON n = object - [ "id" .= nodeId n - , "label" .= nodeLabel n - , "file_type" .= nodeFileType n - , "source_file" .= nodeSourceFile n - , "source_location" .= nodeSourceLocation n - , "line_end" .= nodeLineEnd n - , "kind" .= nodeKind n - , "signature" .= nodeSignature n - , "source_url" .= nodeSourceUrl n - , "captured_at" .= nodeCapturedAt n - , "author" .= nodeAuthor n - , "contributor" .= nodeContributor n - ] - -instance FromJSON Node where - parseJSON = withObject "Node" $ \v -> Node - <$> v .: "id" - <*> v .: "label" - <*> v .: "file_type" - <*> v .: "source_file" - <*> v .:? "source_location" - <*> v .:? "line_end" - <*> v .:? "kind" - <*> v .:? "signature" - <*> v .:? "source_url" - <*> v .:? "captured_at" - <*> v .:? "author" - <*> v .:? "contributor" - --- | Relation types for edges -data Relation - = Calls - | Implements - | References - | Cites - | ConceptuallyRelatedTo - | SharesDataWith - | SemanticallySimilarTo - | RationaleFor - | Imports - | ImportsFrom - | Contains - | Method - | Extends - | Overrides - | DependsOn - deriving (Eq, Show, Generic, Ord) - -instance ToJSON Relation where - toJSON = toJSON . relationToText - -instance FromJSON Relation where - parseJSON = withText "Relation" $ \t -> - case textToRelation t of - Just r -> pure r - Nothing -> fail $ "Unknown relation: " ++ T.unpack t - -relationToText :: Relation -> Text -relationToText = \case - Calls -> "calls" - Implements -> "implements" - References -> "references" - Cites -> "cites" - ConceptuallyRelatedTo -> "conceptually_related_to" - SharesDataWith -> "shares_data_with" - SemanticallySimilarTo -> "semantically_similar_to" - RationaleFor -> "rationale_for" - Imports -> "imports" - ImportsFrom -> "imports_from" - Contains -> "contains" - Method -> "method" - Extends -> "extends" - Overrides -> "overrides" - DependsOn -> "depends_on" - -textToRelation :: Text -> Maybe Relation -textToRelation = \case - "calls" -> Just Calls - "implements" -> Just Implements - "references" -> Just References - "cites" -> Just Cites - "conceptually_related_to" -> Just ConceptuallyRelatedTo - "shares_data_with" -> Just SharesDataWith - "semantically_similar_to" -> Just SemanticallySimilarTo - "rationale_for" -> Just RationaleFor - "imports" -> Just Imports - "imports_from" -> Just ImportsFrom - "contains" -> Just Contains - "method" -> Just Method - "extends" -> Just Extends - "overrides" -> Just Overrides - "depends_on" -> Just DependsOn - _ -> Nothing - --- | Confidence level for an edge -data Confidence = Extracted | Inferred | Ambiguous - deriving (Eq, Show, Generic, Ord) - -instance ToJSON Confidence where - toJSON Extracted = "EXTRACTED" - toJSON Inferred = "INFERRED" - toJSON Ambiguous = "AMBIGUOUS" - -instance FromJSON Confidence where - parseJSON = withText "Confidence" $ \case - "EXTRACTED" -> pure Extracted - "INFERRED" -> pure Inferred - "AMBIGUOUS" -> pure Ambiguous - t -> fail $ "Unknown confidence: " ++ T.unpack t - --- | Convert confidence to a numeric score -confidenceScore :: Confidence -> Double -confidenceScore Extracted = 1.0 -confidenceScore Inferred = 0.7 -confidenceScore Ambiguous = 0.2 - --- | An edge in the knowledge graph -data Edge = Edge - { edgeSource :: NodeId - , edgeTarget :: NodeId - , edgeRelation :: Relation - , edgeConfidence :: Confidence - , edgeConfidenceScore :: Double - , edgeSourceFile :: Text - , edgeSourceLocation :: Maybe Text - , edgeWeight :: Double - } deriving (Eq, Show, Generic) - -instance ToJSON Edge where - toJSON e = object - [ "source" .= edgeSource e - , "target" .= edgeTarget e - , "relation" .= edgeRelation e - , "confidence" .= edgeConfidence e - , "confidence_score" .= edgeConfidenceScore e - , "source_file" .= edgeSourceFile e - , "source_location" .= edgeSourceLocation e - , "weight" .= edgeWeight e - ] - -instance FromJSON Edge where - parseJSON = withObject "Edge" $ \v -> Edge - <$> v .: "source" - <*> v .: "target" - <*> v .: "relation" - <*> v .: "confidence" - <*> v .: "confidence_score" - <*> v .: "source_file" - <*> v .:? "source_location" - <*> v .: "weight" - --- | A hyperedge connecting 3+ nodes -data Hyperedge = Hyperedge - { hyperedgeId :: Text - , hyperedgeLabel :: Text - , hyperedgeNodes :: [NodeId] - , hyperedgeRelation :: Text - , hyperedgeConfidence :: Confidence - , hyperedgeConfidenceScore :: Double - , hyperedgeSourceFile :: Text - } deriving (Eq, Show, Generic) - -instance ToJSON Hyperedge where - toJSON h = object - [ "id" .= hyperedgeId h - , "label" .= hyperedgeLabel h - , "nodes" .= hyperedgeNodes h - , "relation" .= hyperedgeRelation h - , "confidence" .= hyperedgeConfidence h - , "confidence_score" .= hyperedgeConfidenceScore h - , "source_file" .= hyperedgeSourceFile h - ] - -instance FromJSON Hyperedge where - parseJSON = withObject "Hyperedge" $ \v -> Hyperedge - <$> v .: "id" - <*> v .: "label" - <*> v .: "nodes" - <*> v .: "relation" - <*> v .: "confidence" - <*> v .: "confidence_score" - <*> v .: "source_file" - --- | Result of extracting entities from files -data Extraction = Extraction - { extractionNodes :: [Node] - , extractionEdges :: [Edge] - , extractionHyperedges :: [Hyperedge] - , extractionInputTokens :: Int - , extractionOutputTokens :: Int - } deriving (Eq, Show, Generic) - -emptyExtraction :: Extraction -emptyExtraction = Extraction - { extractionNodes = [] - , extractionEdges = [] - , extractionHyperedges = [] - , extractionInputTokens = 0 - , extractionOutputTokens = 0 - } - -instance ToJSON Extraction where - toJSON e = object - [ "nodes" .= extractionNodes e - , "edges" .= extractionEdges e - , "hyperedges" .= extractionHyperedges e - , "input_tokens" .= extractionInputTokens e - , "output_tokens" .= extractionOutputTokens e - ] - --- | A graph with community labels -data LabeledGraph = LabeledGraph - { lgNodes :: [(NodeId, Node)] - , lgEdges :: [Edge] - , lgHyperedges :: [Hyperedge] - , lgCommunityMap :: CommunityMap - } deriving (Eq, Show) - --- | Community membership map: community id → member node ids -type CommunityMap = Map CommunityId [NodeId] - --- | Cohesion scores per community -type CohesionMap = Map CommunityId Double - --- | Analysis results -data Analysis = Analysis - { analysisCommunities :: CommunityMap - , analysisCohesion :: CohesionMap - , analysisGodNodes :: [GodNode] - , analysisSurprises :: [SurprisingConnection] - , analysisQuestions :: [SuggestedQuestion] - } deriving (Eq, Show) - --- | A god node (high-degree hub) -data GodNode = GodNode - { gnId :: NodeId - , gnLabel :: Text - , gnEdges :: Int - } deriving (Eq, Show, Generic) - -instance ToJSON GodNode where - toJSON g = object - [ "id" .= gnId g - , "label" .= gnLabel g - , "edges" .= gnEdges g - ] - -instance FromJSON GodNode where - parseJSON = withObject "GodNode" $ \v -> GodNode - <$> v .: "id" - <*> v .: "label" - <*> v .: "edges" - --- | A surprising cross-community connection -data SurprisingConnection = SurprisingConnection - { scSource :: Text - , scTarget :: Text - , scSourceFiles :: [Text] - , scConfidence :: Confidence - , scRelation :: Text - , scWhy :: Text - } deriving (Eq, Show, Generic) - -instance ToJSON SurprisingConnection where - toJSON s = object - [ "source" .= scSource s - , "target" .= scTarget s - , "source_files" .= scSourceFiles s - , "confidence" .= scConfidence s - , "relation" .= scRelation s - , "why" .= scWhy s - ] - --- | A suggested question from graph analysis -data SuggestedQuestion = SuggestedQuestion - { sqType :: Text - , sqQuestion :: Maybe Text - , sqWhy :: Text - } deriving (Eq, Show, Generic) - -instance ToJSON SuggestedQuestion where - toJSON q = object - [ "type" .= sqType q - , "question" .= sqQuestion q - , "why" .= sqWhy q - ] - --- | Diff between two graph snapshots -data GraphDiff = GraphDiff - { gdNewNodes :: [Node] - , gdRemovedNodes :: [(NodeId, Text)] - , gdNewEdges :: [Edge] - , gdRemovedEdges :: [Edge] - , gdSummary :: Text - } deriving (Eq, Show) - --- | File detection result -data Detection = Detection - { detectionTotalFiles :: Int - , detectionTotalWords :: Int - , detectionNeedsGraph :: Bool - , detectionWarning :: Maybe Text - , detectionFiles :: Map FileCategory [FilePath] - } deriving (Eq, Show) - --- | File categories -data FileCategory - = CodeFiles - | DocFiles - | PaperFiles - | ImageFiles - | VideoFiles - deriving (Eq, Show, Ord, Generic) - -instance ToJSON FileCategory where - toJSON CodeFiles = "code" - toJSON DocFiles = "document" - toJSON PaperFiles = "paper" - toJSON ImageFiles = "image" - toJSON VideoFiles = "video" - --- | Pipeline configuration -data PipelineConfig = PipelineConfig - { cfgInputPath :: FilePath - , cfgOutputDir :: FilePath - , cfgDirected :: Bool - , cfgDeepMode :: Bool - , cfgNoViz :: Bool - , cfgUpdate :: Bool - , cfgClusterOnly :: Bool - , cfgObsidian :: Bool - , cfgObsidianDir :: Maybe FilePath - , cfgNeo4j :: Bool - , cfgNeo4jPush :: Maybe Text -- URI - , cfgMCP :: Maybe FilePath - , cfgSVG :: Bool - , cfgGraphML :: Bool - , cfgWatch :: Bool - , cfgWiki :: Bool - , cfgVerbose :: Bool -- ^ --verbose: show DEBUG level logs - , cfgDebug :: Bool -- ^ --debug: show TRACE level logs + internal details - , cfgEdgeDensity :: EdgeDensity -- ^ how many inferred edges to add - , cfgResolution :: Double -- ^ community resolution: higher = fewer larger communities (default: 1.0) - , cfgMinCommSize :: Int -- ^ minimum community size; smaller ones get merged (default: 3) - , cfgThreads :: Int -- ^ number of parallel extraction threads (default: 1) - , cfgCommunityGraph :: Bool -- ^ export community-level graph JSON for LLM navigation - , cfgGraphosConfig :: GraphosConfig -- ^ LSP servers, language IDs, file extensions (config-driven) - } deriving (Eq, Show) - --- | Edge density level for inference --- Controls how aggressively the pipeline infers additional edges between nodes. -data EdgeDensity - = Sparse -- ^ No inferred edges, only extracted ones - | Normal -- ^ Community bridge edges + transitive deps (default) - | Dense -- ^ All inferred edges: bridges + transitive + shared context - | Maximum -- ^ Dense + lower thresholds for shared context - deriving (Eq, Show, Read) - -defaultConfig :: PipelineConfig -defaultConfig = PipelineConfig - { cfgInputPath = "." - , cfgOutputDir = "graphos-out" - , cfgDirected = False - , cfgDeepMode = False - , cfgNoViz = False - , cfgUpdate = False - , cfgClusterOnly = False - , cfgObsidian = False - , cfgObsidianDir = Nothing - , cfgNeo4j = False - , cfgNeo4jPush = Nothing - , cfgMCP = Nothing - , cfgSVG = False - , cfgGraphML = False - , cfgWatch = False - , cfgWiki = False - , cfgVerbose = False - , cfgDebug = False - , cfgEdgeDensity = Normal - , cfgResolution = 1.0 - , cfgMinCommSize = 3 - , cfgThreads = 1 - , cfgCommunityGraph = False - , cfgGraphosConfig = defaultGraphosConfig - } \ No newline at end of file +import Graphos.Domain.Types.Node (NodeId, Node(..), FileType(..)) +import Graphos.Domain.Types.Edge (EdgeId, Edge(..), Relation(..), relationToText, textToRelation, Confidence(..), confidenceScore) +import Graphos.Domain.Types.Graph (Hyperedge(..), Extraction(..), emptyExtraction, LabeledGraph(..), CommunityId, CommunityMap, CohesionMap, GraphDiff(..)) +import Graphos.Domain.Types.Pipeline (PipelineConfig(..), EdgeDensity(..), defaultConfig, Detection(..), FileCategory(..)) +import Graphos.Domain.Types.Analysis (Analysis(..), GodNode(..), SurprisingConnection(..), SuggestedQuestion(..)) +import Graphos.Domain.Config (GraphosConfig(..), defaultGraphosConfig) \ No newline at end of file diff --git a/src/Graphos/Domain/Types/Analysis.hs b/src/Graphos/Domain/Types/Analysis.hs new file mode 100644 index 0000000..9ba09b3 --- /dev/null +++ b/src/Graphos/Domain/Types/Analysis.hs @@ -0,0 +1,80 @@ +-- | Analysis result types for the knowledge graph. +-- Pure data types with no IO dependencies. +module Graphos.Domain.Types.Analysis + ( -- * Analysis types + Analysis(..) + , GodNode(..) + , SurprisingConnection(..) + , SuggestedQuestion(..) + ) where + +import Data.Aeson (ToJSON(..), FromJSON(..), object, (.=), (.:), withObject) +import Data.Text (Text) +import GHC.Generics (Generic) + +import Graphos.Domain.Types.Edge (Confidence) +import Graphos.Domain.Types.Graph (CommunityMap, CohesionMap) +import Graphos.Domain.Types.Node (NodeId) + +-- | Analysis results +data Analysis = Analysis + { analysisCommunities :: CommunityMap + , analysisCohesion :: CohesionMap + , analysisGodNodes :: [GodNode] + , analysisSurprises :: [SurprisingConnection] + , analysisQuestions :: [SuggestedQuestion] + } deriving (Eq, Show) + +-- | A god node (high-degree hub) +data GodNode = GodNode + { gnId :: NodeId + , gnLabel :: Text + , gnEdges :: Int + } deriving (Eq, Show, Generic) + +instance ToJSON GodNode where + toJSON g = object + [ "id" .= gnId g + , "label" .= gnLabel g + , "edges" .= gnEdges g + ] + +instance FromJSON GodNode where + parseJSON = withObject "GodNode" $ \v -> GodNode + <$> v .: "id" + <*> v .: "label" + <*> v .: "edges" + +-- | A surprising cross-community connection +data SurprisingConnection = SurprisingConnection + { scSource :: Text + , scTarget :: Text + , scSourceFiles :: [Text] + , scConfidence :: Confidence + , scRelation :: Text + , scWhy :: Text + } deriving (Eq, Show, Generic) + +instance ToJSON SurprisingConnection where + toJSON s = object + [ "source" .= scSource s + , "target" .= scTarget s + , "source_files" .= scSourceFiles s + , "confidence" .= scConfidence s + , "relation" .= scRelation s + , "why" .= scWhy s + ] + +-- | A suggested question from graph analysis +data SuggestedQuestion = SuggestedQuestion + { sqType :: Text + , sqQuestion :: Maybe Text + , sqWhy :: Text + } deriving (Eq, Show, Generic) + +instance ToJSON SuggestedQuestion where + toJSON q = object + [ "type" .= sqType q + , "question" .= sqQuestion q + , "why" .= sqWhy q + ] \ No newline at end of file diff --git a/src/Graphos/Domain/Types/Edge.hs b/src/Graphos/Domain/Types/Edge.hs new file mode 100644 index 0000000..e6de7b7 --- /dev/null +++ b/src/Graphos/Domain/Types/Edge.hs @@ -0,0 +1,147 @@ +-- | Edge types for the knowledge graph. +-- Pure data types with no IO dependencies. +{-# LANGUAGE LambdaCase #-} +module Graphos.Domain.Types.Edge + ( -- * Edge types + EdgeId + , Edge(..) + , Relation(..) + , relationToText + , textToRelation + , Confidence(..) + , confidenceScore + ) where + +import Data.Aeson (ToJSON(..), FromJSON(..), object, (.=), (.:), (.:?), withObject, withText) +import Data.Text (Text) +import qualified Data.Text as T +import GHC.Generics (Generic) + +import Graphos.Domain.Types.Node (NodeId) + +-- | Unique identifier for an edge +type EdgeId = Text + +-- | Relation types for edges +data Relation + = Calls + | Implements + | References + | Cites + | ConceptuallyRelatedTo + | SharesDataWith + | SemanticallySimilarTo + | RationaleFor + | Imports + | ImportsFrom + | Contains + | Method + | Extends + | Overrides + | DependsOn + deriving (Eq, Show, Generic, Ord) + +instance ToJSON Relation where + toJSON = toJSON . relationToText + +instance FromJSON Relation where + parseJSON = withText "Relation" $ \t -> + case textToRelation t of + Just r -> pure r + Nothing -> fail $ "Unknown relation: " ++ T.unpack t + +-- | Convert a relation to its text representation +relationToText :: Relation -> Text +relationToText = \case + Calls -> "calls" + Implements -> "implements" + References -> "references" + Cites -> "cites" + ConceptuallyRelatedTo -> "conceptually_related_to" + SharesDataWith -> "shares_data_with" + SemanticallySimilarTo -> "semantically_similar_to" + RationaleFor -> "rationale_for" + Imports -> "imports" + ImportsFrom -> "imports_from" + Contains -> "contains" + Method -> "method" + Extends -> "extends" + Overrides -> "overrides" + DependsOn -> "depends_on" + +-- | Parse a relation from its text representation +textToRelation :: Text -> Maybe Relation +textToRelation = \case + "calls" -> Just Calls + "implements" -> Just Implements + "references" -> Just References + "cites" -> Just Cites + "conceptually_related_to" -> Just ConceptuallyRelatedTo + "shares_data_with" -> Just SharesDataWith + "semantically_similar_to" -> Just SemanticallySimilarTo + "rationale_for" -> Just RationaleFor + "imports" -> Just Imports + "imports_from" -> Just ImportsFrom + "contains" -> Just Contains + "method" -> Just Method + "extends" -> Just Extends + "overrides" -> Just Overrides + "depends_on" -> Just DependsOn + _ -> Nothing + +-- | Confidence level for an edge +data Confidence = Extracted | Inferred | Ambiguous + deriving (Eq, Show, Generic, Ord) + +instance ToJSON Confidence where + toJSON Extracted = "EXTRACTED" + toJSON Inferred = "INFERRED" + toJSON Ambiguous = "AMBIGUOUS" + +instance FromJSON Confidence where + parseJSON = withText "Confidence" $ \case + "EXTRACTED" -> pure Extracted + "INFERRED" -> pure Inferred + "AMBIGUOUS" -> pure Ambiguous + t -> fail $ "Unknown confidence: " ++ T.unpack t + +-- | Convert confidence to a numeric score +confidenceScore :: Confidence -> Double +confidenceScore Extracted = 1.0 +confidenceScore Inferred = 0.7 +confidenceScore Ambiguous = 0.2 + +-- | An edge in the knowledge graph +data Edge = Edge + { edgeSource :: NodeId + , edgeTarget :: NodeId + , edgeRelation :: Relation + , edgeConfidence :: Confidence + , edgeConfidenceScore :: Double + , edgeSourceFile :: Text + , edgeSourceLocation :: Maybe Text + , edgeWeight :: Double + } deriving (Eq, Show, Generic) + +instance ToJSON Edge where + toJSON e = object + [ "source" .= edgeSource e + , "target" .= edgeTarget e + , "relation" .= edgeRelation e + , "confidence" .= edgeConfidence e + , "confidence_score" .= edgeConfidenceScore e + , "source_file" .= edgeSourceFile e + , "source_location" .= edgeSourceLocation e + , "weight" .= edgeWeight e + ] + +instance FromJSON Edge where + parseJSON = withObject "Edge" $ \v -> Edge + <$> v .: "source" + <*> v .: "target" + <*> v .: "relation" + <*> v .: "confidence" + <*> v .: "confidence_score" + <*> v .: "source_file" + <*> v .:? "source_location" + <*> v .: "weight" \ No newline at end of file diff --git a/src/Graphos/Domain/Types/Graph.hs b/src/Graphos/Domain/Types/Graph.hs new file mode 100644 index 0000000..182f21c --- /dev/null +++ b/src/Graphos/Domain/Types/Graph.hs @@ -0,0 +1,115 @@ +-- | Graph-level types: hyperedges, extractions, labeled graphs, communities, and diffs. +-- Pure data types with no IO dependencies. +module Graphos.Domain.Types.Graph + ( -- * Hyperedge types + Hyperedge(..) + + -- * Extraction types + , Extraction(..) + , emptyExtraction + + -- * Graph types + , LabeledGraph(..) + + -- * Community types + , CommunityId + , CommunityMap + , CohesionMap + + -- * Diff types + , GraphDiff(..) + ) where + +import Data.Aeson (ToJSON(..), FromJSON(..), object, (.=), (.:), withObject) +import Data.Map.Strict (Map) +import Data.Text (Text) +import GHC.Generics (Generic) + +import Graphos.Domain.Types.Node (NodeId, Node) +import Graphos.Domain.Types.Edge (Edge, Confidence) + +-- | Community identifier (integer) +type CommunityId = Int + +-- | Community membership map: community id → member node ids +type CommunityMap = Map CommunityId [NodeId] + +-- | Cohesion scores per community +type CohesionMap = Map CommunityId Double + +-- | A hyperedge connecting 3+ nodes +data Hyperedge = Hyperedge + { hyperedgeId :: Text + , hyperedgeLabel :: Text + , hyperedgeNodes :: [NodeId] + , hyperedgeRelation :: Text + , hyperedgeConfidence :: Confidence + , hyperedgeConfidenceScore :: Double + , hyperedgeSourceFile :: Text + } deriving (Eq, Show, Generic) + +instance ToJSON Hyperedge where + toJSON h = object + [ "id" .= hyperedgeId h + , "label" .= hyperedgeLabel h + , "nodes" .= hyperedgeNodes h + , "relation" .= hyperedgeRelation h + , "confidence" .= hyperedgeConfidence h + , "confidence_score" .= hyperedgeConfidenceScore h + , "source_file" .= hyperedgeSourceFile h + ] + +instance FromJSON Hyperedge where + parseJSON = withObject "Hyperedge" $ \v -> Hyperedge + <$> v .: "id" + <*> v .: "label" + <*> v .: "nodes" + <*> v .: "relation" + <*> v .: "confidence" + <*> v .: "confidence_score" + <*> v .: "source_file" + +-- | Result of extracting entities from files +data Extraction = Extraction + { extractionNodes :: [Node] + , extractionEdges :: [Edge] + , extractionHyperedges :: [Hyperedge] + , extractionInputTokens :: Int + , extractionOutputTokens :: Int + } deriving (Eq, Show, Generic) + +-- | Empty extraction with no nodes, edges, or tokens +emptyExtraction :: Extraction +emptyExtraction = Extraction + { extractionNodes = [] + , extractionEdges = [] + , extractionHyperedges = [] + , extractionInputTokens = 0 + , extractionOutputTokens = 0 + } + +instance ToJSON Extraction where + toJSON e = object + [ "nodes" .= extractionNodes e + , "edges" .= extractionEdges e + , "hyperedges" .= extractionHyperedges e + , "input_tokens" .= extractionInputTokens e + , "output_tokens" .= extractionOutputTokens e + ] + +-- | A graph with community labels +data LabeledGraph = LabeledGraph + { lgNodes :: [(NodeId, Node)] + , lgEdges :: [Edge] + , lgHyperedges :: [Hyperedge] + , lgCommunityMap :: CommunityMap + } deriving (Eq, Show) + +-- | Diff between two graph snapshots +data GraphDiff = GraphDiff + { gdNewNodes :: [Node] + , gdRemovedNodes :: [(NodeId, Text)] + , gdNewEdges :: [Edge] + , gdRemovedEdges :: [Edge] + , gdSummary :: Text + } deriving (Eq, Show) \ No newline at end of file diff --git a/src/Graphos/Domain/Types/Node.hs b/src/Graphos/Domain/Types/Node.hs new file mode 100644 index 0000000..4b1bc4f --- /dev/null +++ b/src/Graphos/Domain/Types/Node.hs @@ -0,0 +1,89 @@ +-- | Node types for the knowledge graph. +-- Pure data types with no IO dependencies. +{-# LANGUAGE LambdaCase #-} +module Graphos.Domain.Types.Node + ( -- * Node types + NodeId + , Node(..) + , FileType(..) + ) where + +import Data.Aeson (ToJSON(..), FromJSON(..), object, (.=), (.:), (.:?), withObject, withText) +import Data.Text (Text) +import qualified Data.Text as T +import GHC.Generics (Generic) + +-- | Unique identifier for a node (derived from file + entity name) +type NodeId = Text + +-- | File type classification +data FileType + = CodeFile + | DocumentFile + | PaperFile + | ImageFile + | VideoFile + deriving (Eq, Show, Generic) + +instance ToJSON FileType where + toJSON CodeFile = "code" + toJSON DocumentFile = "document" + toJSON PaperFile = "paper" + toJSON ImageFile = "image" + toJSON VideoFile = "video" + +instance FromJSON FileType where + parseJSON = withText "FileType" $ \t -> case t of + "code" -> pure CodeFile + "document" -> pure DocumentFile + "paper" -> pure PaperFile + "image" -> pure ImageFile + "video" -> pure VideoFile + _ -> fail $ "Unknown file type: " ++ T.unpack t + +-- | A node in the knowledge graph +data Node = Node + { nodeId :: NodeId + , nodeLabel :: Text + , nodeFileType :: FileType + , nodeSourceFile :: Text + , nodeSourceLocation :: Maybe Text + , nodeLineEnd :: Maybe Int -- ^ End line number (for exact code range) + , nodeKind :: Maybe Text -- ^ Symbol kind: "Function", "Class", "Method", "Interface", etc. + , nodeSignature :: Maybe Text -- ^ Type signature or declaration header + , nodeSourceUrl :: Maybe Text + , nodeCapturedAt :: Maybe Text + , nodeAuthor :: Maybe Text + , nodeContributor :: Maybe Text + } deriving (Eq, Show, Generic) + +instance ToJSON Node where + toJSON n = object + [ "id" .= nodeId n + , "label" .= nodeLabel n + , "file_type" .= nodeFileType n + , "source_file" .= nodeSourceFile n + , "source_location" .= nodeSourceLocation n + , "line_end" .= nodeLineEnd n + , "kind" .= nodeKind n + , "signature" .= nodeSignature n + , "source_url" .= nodeSourceUrl n + , "captured_at" .= nodeCapturedAt n + , "author" .= nodeAuthor n + , "contributor" .= nodeContributor n + ] + +instance FromJSON Node where + parseJSON = withObject "Node" $ \v -> Node + <$> v .: "id" + <*> v .: "label" + <*> v .: "file_type" + <*> v .: "source_file" + <*> v .:? "source_location" + <*> v .:? "line_end" + <*> v .:? "kind" + <*> v .:? "signature" + <*> v .:? "source_url" + <*> v .:? "captured_at" + <*> v .:? "author" + <*> v .:? "contributor" \ No newline at end of file diff --git a/src/Graphos/Domain/Types/Pipeline.hs b/src/Graphos/Domain/Types/Pipeline.hs new file mode 100644 index 0000000..dac04a1 --- /dev/null +++ b/src/Graphos/Domain/Types/Pipeline.hs @@ -0,0 +1,110 @@ +-- | Pipeline configuration and detection types. +-- Pure data types with no IO dependencies. +module Graphos.Domain.Types.Pipeline + ( -- * Configuration + PipelineConfig(..) + , EdgeDensity(..) + , defaultConfig + + -- * Detection types + , Detection(..) + , FileCategory(..) + ) where + +import Data.Aeson (ToJSON(..)) +import Data.Map.Strict (Map) +import Data.Text (Text) +import GHC.Generics (Generic) + +import Graphos.Domain.Config (GraphosConfig, defaultGraphosConfig) + +-- | Pipeline configuration +data PipelineConfig = PipelineConfig + { cfgInputPath :: FilePath + , cfgOutputDir :: FilePath + , cfgDirected :: Bool + , cfgDeepMode :: Bool + , cfgNoViz :: Bool + , cfgUpdate :: Bool + , cfgClusterOnly :: Bool + , cfgObsidian :: Bool + , cfgObsidianDir :: Maybe FilePath + , cfgNeo4j :: Bool + , cfgNeo4jPush :: Maybe Text -- URI + , cfgMCP :: Maybe FilePath + , cfgSVG :: Bool + , cfgGraphML :: Bool + , cfgWatch :: Bool + , cfgWiki :: Bool + , cfgVerbose :: Bool -- ^ --verbose: show DEBUG level logs + , cfgDebug :: Bool -- ^ --debug: show TRACE level logs + internal details + , cfgEdgeDensity :: EdgeDensity -- ^ how many inferred edges to add + , cfgResolution :: Double -- ^ community resolution: higher = fewer larger communities (default: 1.0) + , cfgMinCommSize :: Int -- ^ minimum community size; smaller ones get merged (default: 3) + , cfgThreads :: Int -- ^ number of parallel extraction threads (default: 1) + , cfgCommunityGraph :: Bool -- ^ export community-level graph JSON for LLM navigation + , cfgGraphosConfig :: GraphosConfig -- ^ LSP servers, language IDs, file extensions (config-driven) + } deriving (Eq, Show) + +-- | Edge density level for inference +-- Controls how aggressively the pipeline infers additional edges between nodes. +data EdgeDensity + = Sparse -- ^ No inferred edges, only extracted ones + | Normal -- ^ Community bridge edges + transitive deps (default) + | Dense -- ^ All inferred edges: bridges + transitive + shared context + | Maximum -- ^ Dense + lower thresholds for shared context + deriving (Eq, Show, Read) + +-- | Default pipeline configuration +defaultConfig :: PipelineConfig +defaultConfig = PipelineConfig + { cfgInputPath = "." + , cfgOutputDir = "graphos-out" + , cfgDirected = False + , cfgDeepMode = False + , cfgNoViz = False + , cfgUpdate = False + , cfgClusterOnly = False + , cfgObsidian = False + , cfgObsidianDir = Nothing + , cfgNeo4j = False + , cfgNeo4jPush = Nothing + , cfgMCP = Nothing + , cfgSVG = False + , cfgGraphML = False + , cfgWatch = False + , cfgWiki = False + , cfgVerbose = False + , cfgDebug = False + , cfgEdgeDensity = Normal + , cfgResolution = 1.0 + , cfgMinCommSize = 3 + , cfgThreads = 1 + , cfgCommunityGraph = False + , cfgGraphosConfig = defaultGraphosConfig + } + +-- | File detection result +data Detection = Detection + { detectionTotalFiles :: Int + , detectionTotalWords :: Int + , detectionNeedsGraph :: Bool + , detectionWarning :: Maybe Text + , detectionFiles :: Map FileCategory [FilePath] + } deriving (Eq, Show) + +-- | File categories +data FileCategory + = CodeFiles + | DocFiles + | PaperFiles + | ImageFiles + | VideoFiles + deriving (Eq, Show, Ord, Generic) + +instance ToJSON FileCategory where + toJSON CodeFiles = "code" + toJSON DocFiles = "document" + toJSON PaperFiles = "paper" + toJSON ImageFiles = "image" + toJSON VideoFiles = "video" \ No newline at end of file diff --git a/src/Graphos/Infrastructure/LSP/CapabilityParse.hs b/src/Graphos/Infrastructure/LSP/CapabilityParse.hs new file mode 100644 index 0000000..f2a3da8 --- /dev/null +++ b/src/Graphos/Infrastructure/LSP/CapabilityParse.hs @@ -0,0 +1,70 @@ +-- | LSP server capability parsing from initialize responses. +module Graphos.Infrastructure.LSP.CapabilityParse + ( parseServerCapabilities + , defaultServerCapabilities + , ServerCapabilities(..) + ) where + +import Data.Aeson (Value(..)) +import qualified Data.Aeson as Aeson +import qualified Data.Aeson.Key as AK (fromText) +import qualified Data.Aeson.KeyMap as KM +import Data.Text (Text) + +import Graphos.Infrastructure.LSP.Protocol + +-- | Parse ServerCapabilities from the initialize JSON-RPC response. +-- Defaults to False for any capability not explicitly advertised (safe fallback). +parseServerCapabilities :: Value -> ServerCapabilities +parseServerCapabilities (Object o) = + let mResult = KM.lookup "result" o + mCaps = case mResult of + Just (Object r) -> KM.lookup "capabilities" r + _ -> Nothing + in case mCaps of + Just (Object caps) -> parseCapsObj caps + _ -> defaultServerCapabilities +parseServerCapabilities _ = defaultServerCapabilities + +parseCapsObj :: Aeson.Object -> ServerCapabilities +parseCapsObj caps = ServerCapabilities + { scpDocumentSymbolProvider = lookupBool caps "textDocument" "documentSymbolProvider" + , scpReferencesProvider = lookupBool caps "textDocument" "referencesProvider" + , scpCallHierarchyProvider = lookupBoolCaps caps "textDocument" "callHierarchyProvider" + , scpDefinitionProvider = lookupBool caps "textDocument" "definitionProvider" + , scpWorkspaceSymbolProvider = lookupBoolCaps caps "workspace" "symbolProvider" + } + +-- | Default server capabilities (all disabled — safe fallback) +defaultServerCapabilities :: ServerCapabilities +defaultServerCapabilities = ServerCapabilities + { scpDocumentSymbolProvider = False + , scpReferencesProvider = False + , scpCallHierarchyProvider = False + , scpDefinitionProvider = False + , scpWorkspaceSymbolProvider = False + } + +-- | Look up a boolean capability nested under a top-level key. +lookupBool :: Aeson.Object -> Text -> Text -> Bool +lookupBool caps topKey subKey = + case KM.lookup (AK.fromText topKey) caps of + Just (Object td) -> + case KM.lookup (AK.fromText subKey) td of + Just (Aeson.Bool b) -> b + Just (Object _) -> True + Just (Aeson.Number _) -> True + _ -> False + _ -> False + +-- | Same as lookupBool but also handles the capability being a nested object +lookupBoolCaps :: Aeson.Object -> Text -> Text -> Bool +lookupBoolCaps caps topKey subKey = + case KM.lookup (AK.fromText topKey) caps of + Just (Object td) -> + case KM.lookup (AK.fromText subKey) td of + Just (Aeson.Bool b) -> b + Just (Object _) -> True + Just (Aeson.Number _) -> True + _ -> False + _ -> False \ No newline at end of file diff --git a/src/Graphos/Infrastructure/LSP/Client.hs b/src/Graphos/Infrastructure/LSP/Client.hs index 84fc5d7..d95db6e 100644 --- a/src/Graphos/Infrastructure/LSP/Client.hs +++ b/src/Graphos/Infrastructure/LSP/Client.hs @@ -1,7 +1,6 @@ --- | LSP Client - connects to language servers and extracts symbol trees. +-- | LSP Client (re-export hub) — thin orchestrator that delegates to sub-modules. -- Uses proper JSON-RPC over stdio protocol with Content-Length framing. -- Shares one LSP connection per language server for efficiency. -{-# LANGUAGE ScopedTypeVariables #-} module Graphos.Infrastructure.LSP.Client ( -- * LSP Client LSPClient(..) @@ -28,730 +27,27 @@ module Graphos.Infrastructure.LSP.Client , languageIdFromExt ) where -import Control.Concurrent.MVar (MVar, newMVar, takeMVar, putMVar) -import Control.Concurrent (threadDelay) -import Control.Exception (catch, try, SomeException(..)) -import Data.Aeson (ToJSON, encode, eitherDecode, Value(..)) -import qualified Data.Aeson as Aeson -import qualified Data.Aeson.Key as AK (fromText) -import qualified Data.Aeson.KeyMap as KM -import qualified Data.Vector as V -import qualified Data.ByteString as BS -import qualified Data.ByteString.Lazy as BSL -import qualified Data.ByteString.Char8 as B8 -import Data.Map.Strict (Map) -import qualified Data.Map.Strict as Map -import Data.List (isInfixOf) -import Data.Maybe (mapMaybe) -import Data.Text (Text) -import qualified Data.Text as T -import System.Directory (findExecutable) -import System.Process (ProcessHandle, createProcess, proc, std_in, std_out, std_err, StdStream(CreatePipe), terminateProcess) -import System.IO (Handle, hFlush) -import System.Timeout (timeout) - -import Graphos.Domain.Types -import Graphos.Infrastructure.LSP.Protocol - --- ─────────────────────────────────────────────── --- LSP Client types --- ─────────────────────────────────────────────── - -data LSPClientConfig = LSPClientConfig - { lspCommand :: FilePath - , lspArgs :: [String] - , lspRootUri :: FilePath - , lspTimeout :: Int -- seconds - } deriving (Eq, Show) - -defaultLSPConfig :: FilePath -> LSPClientConfig -defaultLSPConfig root = LSPClientConfig - { lspCommand = "" - , lspArgs = [] - , lspRootUri = root - , lspTimeout = 60 - } - -data LSPClient = LSPClient - { lspHandle :: ProcessHandle - , lspStdin :: Handle - , lspStdout :: Handle - , lspConfig :: LSPClientConfig - , lspMessageId :: MVar Int - , lspServerCaps :: ServerCapabilities - } - --- ─────────────────────────────────────────────── --- Language server registry (default / hardcoded) --- Prefer using findLSPServerFromConfig with a GraphosConfig --- for user-configurable LSP servers. This fallback remains --- for cases where no config is available (e.g. tests). --- ─────────────────────────────────────────────── - --- | Default hardcoded LSP server commands. --- Kept for backward compatibility; prefer 'defaultLSPServers' from Domain.Config. -languageServerCommands :: Map String (String, [String]) -languageServerCommands = Map.fromList - [ (".ts", ("typescript-language-server", ["--stdio"])) - , (".tsx", ("typescript-language-server", ["--stdio"])) - , (".js", ("typescript-language-server", ["--stdio"])) - , (".jsx", ("typescript-language-server", ["--stdio"])) - , (".py", ("pyright-langserver", ["--stdio"])) - , (".go", ("gopls", [])) - , (".rs", ("rust-analyzer", [])) - , (".c", ("clangd", [])) - , (".cpp", ("clangd", [])) - , (".h", ("clangd", [])) - , (".hpp", ("clangd", [])) - , (".java", ("jdtls", [])) - , (".cs", ("omnisharp", [])) - , (".rb", ("solargraph", ["--stdio"])) - , (".hs", ("haskell-language-server", ["--lsp"])) - , (".lhs", ("haskell-language-server", ["--lsp"])) - , (".php", ("phpactor", [])) - , (".swift", ("sourcekit-lsp", [])) - , (".kt", ("kotlin-language-server", [])) - , (".kts", ("kotlin-language-server", [])) - , (".scala", ("metals", [])) - , (".lua", ("lua-language-server", [])) - , (".zig", ("zls", [])) - , (".ex", ("elixir-ls", [])) - , (".exs", ("elixir-ls", [])) - , (".dart", ("dart", ["analyze", "--stdio"])) - , (".vue", ("vue-language-server", [])) - , (".svelte", ("svelte-language-server", [])) - , (".nix", ("nixd", [])) - , (".json", ("vscode-json-language-server", ["--stdio"])) - ] - --- | Find an LSP server for a file extension using the default hardcoded registry. --- Prefer 'findLSPServerFromConfig' from Infrastructure.Config for user-configurable lookups. -findLSPServer :: String -> IO (Maybe (String, [String])) -findLSPServer ext = do - case Map.lookup ext languageServerCommands of - Nothing -> pure Nothing - Just (cmd, args) -> do - found <- findExecutable cmd - case found of - Just path -> pure $ Just (path, args) - Nothing -> pure Nothing - --- ─────────────────────────────────────────────── --- LSP Protocol: sending and receiving messages --- ─────────────────────────────────────────────── - --- | Send a JSON-RPC message with proper Content-Length framing -sendLSPMessage :: ToJSON a => Handle -> a -> IO () -sendLSPMessage h msg = do - let content = BSL.toStrict (encode msg) - contentLen = BS.length content - header = B8.pack $ "Content-Length: " ++ show contentLen ++ "\r\n\r\n" - BS.hPut h (header `BS.append` content) - hFlush h - --- | Read a byte until newline, stripping \r -readLineLF :: Handle -> IO String -readLineLF h = do - let loop acc = do - c <- BS.hGet h 1 - if BS.null c - then pure (reverse acc) - else case BS.head c of - 10 -> pure (reverse acc) -- \n = end of line - 13 -> loop acc -- \r = skip - b -> loop (toEnum (fromEnum b) : acc) - loop [] - --- | Read a single LSP message (Content-Length header + JSON body) --- LSP protocol format: Content-Length: N\r\n\r\n{...N bytes of JSON...} -readLSPMessage :: Handle -> IO (Either String Value) -readLSPMessage outh = catch (do - -- Read header lines until we find Content-Length - let findHeader = do - line <- readLineLF outh -- reads until \n, strips \r - if null line - then findHeader -- skip blank lines - else do - let contentLen = parseContentLength line - if contentLen < 0 - then findHeader -- not a Content-Length line, keep reading - else do - -- After Content-Length line, there may be more headers or an empty line (\r\n) - -- Read until we get an empty line (end of headers) - skipHeaders - -- Now read JSON body - result <- timeout 10000000 (BSL.hGet outh contentLen) - case result of - Nothing -> pure $ Left "Timeout reading body" - Just bodyBytes -> case eitherDecode bodyBytes of - Right val -> pure $ Right val - Left err -> pure $ Left $ "JSON parse error (" ++ show contentLen ++ " bytes): " ++ err - mResult <- timeout 10000000 findHeader -- 10s overall timeout - case mResult of - Nothing -> pure $ Left "Timeout waiting for LSP response" - Just result -> pure result - ) $ \(e :: SomeException) -> pure $ Left $ "Read error: " ++ show e - where - -- Skip remaining headers until empty line (separator between headers and body) - skipHeaders = do - line <- readLineLF outh - if null line then pure () -- empty line = end of headers - else skipHeaders -- skip this header line - --- | Parse Content-Length from header line like "Content-Length: 1234" -parseContentLength :: String -> Int -parseContentLength header = - let prefix :: String = "Content-Length:" - trimmed = dropWhile (== ' ') $ drop (length prefix) (takeWhile (/= '\r') header) - in case reads trimmed of - [(n, "")] -> n - _ -> -1 - --- | Read LSP messages until we get one with a matching "id" field -readLSPResponseForId :: Handle -> Int -> IO (Either String Value) -readLSPResponseForId outh targetId = loop - where - loop = do - result <- readLSPMessage outh - case result of - Left err -> pure $ Left err - Right val@(Object o) -> - case KM.lookup "id" o of - Just (Aeson.Number n) | round n == targetId -> pure $ Right val - Just (Aeson.Number _) -> loop -- different id, keep reading - _ -> loop -- notification (no id), keep reading - Right _ -> loop - --- | Drain all pending notifications from LSP server -drainNotifications :: Handle -> Int -> IO () -drainNotifications outh micros = do - mMsg <- timeout micros (readLSPMessage outh) - case mMsg of - Just (Right _) -> drainNotifications outh micros - _ -> pure () - --- ─────────────────────────────────────────────── --- LSP Client lifecycle --- ─────────────────────────────────────────────── - -connectToLSP :: LSPClientConfig -> IO (Either Text LSPClient) -connectToLSP config = catch (do - putStrLn $ "[lsp] Starting: " ++ lspCommand config ++ " " ++ unwords (lspArgs config) - let processSpec = proc (lspCommand config) (lspArgs config) - (minH, moutH, _, ph) <- createProcess processSpec - { std_in = CreatePipe - , std_out = CreatePipe - , std_err = CreatePipe - } - case (minH, moutH) of - (Just inh, Just outh) -> do - idVar <- newMVar 2 -- start at 2 since 1 is used for initialize - - -- 1. Send initialize request - let initMsg = lspInitialize (lspRootUri config) - sendLSPMessage inh initMsg - - -- 2. Read responses until we get the initialize response (id=1) - -- Use extended timeout for initialization (LSP servers like HLS can be slow) - let initTimeoutMicros = lspTimeout config * 1000000 - initResp <- timeout initTimeoutMicros (readLSPResponseForId outh 1) - case initResp of - Nothing -> do - putStrLn "[lsp] Initialize failed: Timeout waiting for LSP response" - terminateProcess ph - pure $ Left $ T.pack "LSP initialize failed: Timeout waiting for LSP response" - Just (Left err) -> do - putStrLn $ "[lsp] Initialize failed: " ++ err - terminateProcess ph - pure $ Left $ T.pack $ "LSP initialize failed: " ++ err - Just (Right respVal) -> do - putStrLn "[lsp] Initialize successful" - - -- 3. Send initialized notification - sendLSPMessage inh lspInitialized - - -- 4. Drain post-init notifications (slow servers like HLS need time to index) - let drainMicros = case lspCommand config of - cmd | "haskell-language-server" `isInfixOf` cmd -> 15000000 -- 15s for HLS - | otherwise -> 3000000 -- 3s for others - drainNotifications outh drainMicros - - let caps = parseServerCapabilities respVal - putStrLn $ "[lsp] Server capabilities: documentSymbol=" ++ show (scpDocumentSymbolProvider caps) - ++ " workspaceSymbol=" ++ show (scpWorkspaceSymbolProvider caps) - - putStrLn $ "[lsp] Connected to " ++ lspCommand config - pure $ Right LSPClient - { lspHandle = ph - , lspStdin = inh - , lspStdout = outh - , lspConfig = config - , lspMessageId = idVar - , lspServerCaps = caps - } - _ -> pure $ Left $ T.pack "Failed to create LSP process handles" - ) $ \(e :: SomeException) -> pure $ Left $ T.pack $ "LSP connection error: " ++ show e - -disconnectLSP :: LSPClient -> IO () -disconnectLSP client = do - result <- try $ do - sendLSPMessage (lspStdin client) lspShutdown - _ <- timeout 3000000 (readLSPResponseForId (lspStdout client) 999) - pure () - case result of - Left (_ :: SomeException) -> pure () - Right _ -> pure () - -- Send exit notification after shutdown, then allow brief cleanup time - catch (sendLSPMessage (lspStdin client) lspExit) $ \(_ :: SomeException) -> pure () - threadDelay 100000 -- 100ms for server cleanup before SIGTERM - terminateProcess (lspHandle client) - putStrLn $ "[lsp] Disconnected from " ++ lspCommand (lspConfig client) - --- ─────────────────────────────────────────────── --- Extraction via LSP --- ─────────────────────────────────────────────── - --- | Extract entities and relationships from a file using LSP. --- Opens the file, gets document symbols, then closes it. --- Returns empty extraction on any error (never throws). -extractViaLSP :: LSPClient -> FilePath -> IO Extraction -extractViaLSP client filePath = - catch (do - putStrLn $ "[lsp] Extracting: " ++ filePath - let ext = takeExtension filePath - langId = languageIdFromExt ext - - -- Read file content for didOpen - fileContent <- catch (T.pack <$> readFile filePath) $ \(_ :: SomeException) -> pure "" - - -- Open the document - let openMsg = lspDidOpen filePath langId fileContent - catch (sendLSPMessage (lspStdin client) openMsg) $ \(_ :: SomeException) -> pure () - catch (drainNotifications (lspStdout client) 500000) $ \(_ :: SomeException) -> pure () - - -- Request document symbols - symbols <- extractDocumentSymbols client filePath - putStrLn $ "[lsp] Got " ++ show (length symbols) ++ " symbols from " ++ filePath - - -- Close the document - let closeMsg = lspDidClose filePath - catch (sendLSPMessage (lspStdin client) closeMsg) $ \(_ :: SomeException) -> pure () - - let nodes = symbolToNodes filePath symbols - edges = symbolTreeToEdges filePath symbols - pure emptyExtraction - { extractionNodes = nodes - , extractionEdges = edges - } - ) $ \(e :: SomeException) -> do - -- LSP communication failed for this file — return stub instead of crashing - putStrLn $ "[lsp] Warning: extraction failed for " ++ filePath ++ ": " ++ show e - pure emptyExtraction - { extractionNodes = [makeStubNode filePath] - , extractionEdges = [] - } - --- | Extract document symbols from a file -extractDocumentSymbols :: LSPClient -> FilePath -> IO [DocumentSymbolResult] -extractDocumentSymbols client filePath = do - nextId <- takeMVar (lspMessageId client) - putMVar (lspMessageId client) (nextId + 1) - let req = lspDocumentSymbolWithId filePath nextId - sendLSPMessage (lspStdin client) req - - resp <- readLSPResponseForId (lspStdout client) nextId - case resp of - Left err -> do - putStrLn $ "[lsp] Failed to get symbols: " ++ err - pure [] - Right val -> pure $ parseSymbolsFromResponse val - --- | Parse symbol tree from JSON-RPC response. --- LSP returns a tree: each symbol may have "children" forming parent→child edges. -parseSymbolsFromResponse :: Value -> [DocumentSymbolResult] -parseSymbolsFromResponse (Object o) = - case KM.lookup "result" o of - Just (Array arr) -> concatMap (flattenSymbols []) (V.toList arr) - Just (Object obj) -> - case KM.lookup "children" obj of - Just (Array arr) -> concatMap (flattenSymbols []) (V.toList arr) - _ -> [parseSingleSymbol obj] - _ -> [] - where - -- Flatten the tree into a flat list, keeping parent info - flattenSymbols :: [Text] -> Value -> [DocumentSymbolResult] - flattenSymbols parents (Object s) = - let name = case KM.lookup "name" s of - Just (Aeson.String t) -> t - _ -> "" - kind = case KM.lookup "kind" s of - Just (Aeson.Number n) -> round n - _ -> 0 - range = case KM.lookup "range" s of - Just (Object r) -> parseRange r - _ -> dummyRange - childrenVals = case KM.lookup "children" s of - Just (Array arr) -> V.toList arr - _ -> [] - in if T.null name - then concatMap (flattenSymbols parents) childrenVals - else DocumentSymbolResult - { dsrName = name - , dsrKind = kind - , dsrRange = range - , dsrChildren = [] -- flat list, we track parent→child in edges - } - : concatMap (flattenSymbols (name : parents)) childrenVals - flattenSymbols _ _ = [] -- ignore non-object values - - parseSingleSymbol s = - let name = case KM.lookup "name" s of - Just (Aeson.String t) -> t - _ -> "" - kind = case KM.lookup "kind" s of - Just (Aeson.Number n) -> round n - _ -> 0 - range = case KM.lookup "range" s of - Just (Object r) -> parseRange r - _ -> dummyRange - in DocumentSymbolResult { dsrName = name, dsrKind = kind, dsrRange = range, dsrChildren = [] } - - parseRange r = - let start = case KM.lookup "start" r of - Just (Object p) -> parsePos p - _ -> Position 0 0 - end = case KM.lookup "end" r of - Just (Object p) -> parsePos p - _ -> Position 0 0 - in Range start end - - parsePos p = - let line = case KM.lookup "line" p of - Just (Aeson.Number n) -> round n - _ -> 0 - char = case KM.lookup "character" p of - Just (Aeson.Number n) -> round n - _ -> 0 - in Position line char - - dummyRange = Range (Position 0 0) (Position 0 0) - -parseSymbolsFromResponse _ = [] - --- | Extract call hierarchy (incoming calls) for a symbol -extractCallHierarchy :: LSPClient -> Text -> IO [CallHierarchyItem] -extractCallHierarchy client _name = do - _ <- takeMVar (lspMessageId client) - putMVar (lspMessageId client) 1 - pure [] -- placeholder - --- ─────────────────────────────────────────────── --- Symbol → Node/Edge conversion --- ─────────────────────────────────────────────── - -symbolToNodes :: FilePath -> [DocumentSymbolResult] -> [Node] -symbolToNodes filePath symbols = - [ Node - { nodeId = makeNodeId filePath (dsrName sym) - , nodeLabel = dsrName sym - , nodeFileType = CodeFile - , nodeSourceFile = T.pack filePath - , nodeSourceLocation = Just $ T.pack ("L" ++ show (posLine (rangeStart (dsrRange sym)))) - , nodeLineEnd = Just $ posLine (rangeEnd (dsrRange sym)) - , nodeKind = Just $ symbolKindToText (dsrKind sym) - , nodeSignature = Nothing - , nodeSourceUrl = Nothing - , nodeCapturedAt = Nothing - , nodeAuthor = Nothing - , nodeContributor = Nothing - } - | sym <- symbols - ] - --- | Build edges from the symbol tree (parent contains child). --- We add: --- 1. A Contains edge from the file to each top-level symbol --- 2. Contains edges between parent symbols and their children (from the symbol hierarchy) -symbolTreeToEdges :: FilePath -> [DocumentSymbolResult] -> [Edge] -symbolTreeToEdges filePath flatSymbols = - let -- File → symbol edges - fileEdges = - [ Edge - { edgeSource = T.pack (takeWhile (/= '.') $ reverse $ takeWhile (/= '/') $ reverse filePath) - , edgeTarget = makeNodeId filePath (dsrName sym) - , edgeRelation = Contains - , edgeConfidence = Extracted - , edgeConfidenceScore = 1.0 - , edgeSourceFile = T.pack filePath - , edgeSourceLocation = Just $ T.pack ("L" ++ show (posLine (rangeStart (dsrRange sym)))) - , edgeWeight = 1.0 - } - | sym <- flatSymbols - ] - -- Parent → child symbol edges (based on the hierarchy) - hierarchyEdges = buildHierarchyEdges filePath flatSymbols - in fileEdges ++ hierarchyEdges - --- | Build parent→child Contains edges from the symbol hierarchy. --- Since parseSymbolsFromResponse flattens the tree, we track parent context --- during the recursive descent. However, since our flat list doesn't preserve --- parent info, we reconstruct it from ranges: a parent's range contains a child's range. -buildHierarchyEdges :: FilePath -> [DocumentSymbolResult] -> [Edge] -buildHierarchyEdges filePath symbols = - let -- For each pair, check if one's range contains the other - -- A contains B if B's start is >= A's start AND B's end is <= A's end AND A != B - containsPairs = [(parent, child) - | parent <- symbols - , child <- symbols - , dsrName parent /= dsrName child - , rangeContains (dsrRange parent) (dsrRange child) - ] - -- Keep only direct parent→child (remove transitive containment) - -- A is a direct parent of B if no C exists where A contains C and C contains B - directPairs = [(p, c) | (p, c) <- containsPairs - , not (any (\m -> dsrName m /= dsrName p - && dsrName m /= dsrName c - && rangeContains (dsrRange p) (dsrRange m) - && rangeContains (dsrRange m) (dsrRange c)) symbols)] - in [ Edge - { edgeSource = makeNodeId filePath (dsrName parent) - , edgeTarget = makeNodeId filePath (dsrName child) - , edgeRelation = Contains - , edgeConfidence = Extracted - , edgeConfidenceScore = 1.0 - , edgeSourceFile = T.pack filePath - , edgeSourceLocation = Just $ T.pack ("L" ++ show (posLine (rangeStart (dsrRange parent)))) - , edgeWeight = 1.0 - } - | (parent, child) <- directPairs] - --- | Check if one range fully contains another -rangeContains :: Range -> Range -> Bool -rangeContains outer inner = - let outerStart = posLine (rangeStart outer) * 10000 + posCharacter (rangeStart outer) - outerEnd = posLine (rangeEnd outer) * 10000 + posCharacter (rangeEnd outer) - innerStart = posLine (rangeStart inner) * 10000 + posCharacter (rangeStart inner) - innerEnd = posLine (rangeEnd inner) * 10000 + posCharacter (rangeEnd inner) - in innerStart >= outerStart && innerEnd <= outerEnd - --- ─────────────────────────────────────────────── --- Helpers --- ─────────────────────────────────────────────── - -makeNodeId :: FilePath -> Text -> NodeId -makeNodeId filePath name = - let stem = T.pack $ takeWhile (/= '.') $ reverse $ takeWhile (/= '/') $ reverse filePath - -- Include directory hash to prevent collisions (e.g., src/Types.hs vs app/Types.hs) - dirPart = reverse $ dropWhile (/= '/') $ reverse filePath - dirHash = abs (T.foldl' (\acc c -> acc * 31 + fromEnum c) (0 :: Int) (T.pack dirPart) `mod` 65536) - hashPrefix = T.pack $ show dirHash - in hashPrefix <> T.pack "_" <> stem <> T.pack "_" <> name - --- | Create a stub node when LSP extraction fails -makeStubNode :: FilePath -> Node -makeStubNode filePath = - let name = T.pack $ takeWhile (/= '.') $ reverse $ takeWhile (/= '/') $ reverse filePath - dirPart = reverse $ dropWhile (/= '/') $ reverse filePath - dirHash = abs (T.foldl' (\acc c -> acc * 31 + fromEnum c) (0 :: Int) (T.pack dirPart) `mod` 65536) - hashPrefix = T.pack $ show dirHash - nodeId' = hashPrefix <> T.pack "_" <> name - in Node - { nodeId = nodeId' - , nodeLabel = name - , nodeFileType = CodeFile - , nodeSourceFile = T.pack filePath - , nodeSourceLocation = Nothing - , nodeLineEnd = Nothing - , nodeKind = Nothing - , nodeSignature = Nothing - , nodeSourceUrl = Nothing - , nodeCapturedAt = Nothing - , nodeAuthor = Nothing - , nodeContributor = Nothing - } - --- | Convert LSP SymbolKind integer to a human-readable text label. --- See: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#symbolKind -symbolKindToText :: Int -> Text -symbolKindToText k = case k of - 1 -> "File" - 2 -> "Module" - 3 -> "Namespace" - 4 -> "Package" - 5 -> "Class" - 6 -> "Method" - 7 -> "Property" - 8 -> "Field" - 9 -> "Constructor" - 10 -> "Enum" - 11 -> "Interface" - 12 -> "Function" - 13 -> "Variable" - 14 -> "Constant" - 15 -> "String" - 16 -> "Number" - 17 -> "Boolean" - 18 -> "Array" - 19 -> "Object" - 20 -> "Key" - 21 -> "Null" - 22 -> "EnumMember" - 23 -> "Struct" - 24 -> "Event" - 25 -> "Operator" - 26 -> "TypeParameter" - _ -> "Unknown" - -takeExtension :: FilePath -> String -takeExtension path = - let reversed = reverse path - ext = takeWhile (/= '.') reversed - in if null ext then "" else '.' : reverse ext - --- ─────────────────────────────────────────────── --- Parse server capabilities from initialize response --- ─────────────────────────────────────────────── - --- | Parse ServerCapabilities from the initialize JSON-RPC response. --- Defaults to False for any capability not explicitly advertised (safe fallback). -parseServerCapabilities :: Value -> ServerCapabilities -parseServerCapabilities (Object o) = - let -- Navigate: result.capabilities - mResult = KM.lookup "result" o - mCaps = case mResult of - Just (Object r) -> KM.lookup "capabilities" r - _ -> Nothing - in case mCaps of - Just (Object caps) -> parseCapsObj caps - _ -> defaultServerCapabilities -parseServerCapabilities _ = defaultServerCapabilities - -parseCapsObj :: Aeson.Object -> ServerCapabilities -parseCapsObj caps = ServerCapabilities - { scpDocumentSymbolProvider = lookupBool caps "textDocument" "documentSymbolProvider" - , scpReferencesProvider = lookupBool caps "textDocument" "referencesProvider" - , scpCallHierarchyProvider = lookupBoolCaps caps "textDocument" "callHierarchyProvider" - , scpDefinitionProvider = lookupBool caps "textDocument" "definitionProvider" - , scpWorkspaceSymbolProvider = lookupBoolCaps caps "workspace" "symbolProvider" - } - -defaultServerCapabilities :: ServerCapabilities -defaultServerCapabilities = ServerCapabilities - { scpDocumentSymbolProvider = False - , scpReferencesProvider = False - , scpCallHierarchyProvider = False - , scpDefinitionProvider = False - , scpWorkspaceSymbolProvider = False - } - --- | Look up a boolean capability nested under a top-level key. --- e.g. lookupBool caps "textDocument" "documentSymbolProvider" -lookupBool :: Aeson.Object -> Text -> Text -> Bool -lookupBool caps topKey subKey = - case KM.lookup (AK.fromText topKey) caps of - Just (Object td) -> - case KM.lookup (AK.fromText subKey) td of - Just (Aeson.Bool b) -> b - Just (Object _) -> True -- if it's an object, the capability exists - Just (Aeson.Number _) -> True -- some servers use 0/1 - _ -> False - _ -> False - --- | Same as lookupBool but also handles the capability being a nested object -lookupBoolCaps :: Aeson.Object -> Text -> Text -> Bool -lookupBoolCaps caps topKey subKey = - case KM.lookup (AK.fromText topKey) caps of - Just (Object td) -> - case KM.lookup (AK.fromText subKey) td of - Just (Aeson.Bool b) -> b - Just (Object _) -> True -- capability exists with options - Just (Aeson.Number _) -> True - _ -> False - _ -> False - --- ─────────────────────────────────────────────── --- Workspace symbol extraction --- ─────────────────────────────────────────────── - --- | Extract all symbols in the project using workspace/symbol. --- Returns a list grouped by file URI for downstream consumption. -extractWorkspaceSymbols :: LSPClient -> IO (Either Text [SymbolInformation]) -extractWorkspaceSymbols client = catch (do - nextId <- takeMVar (lspMessageId client) - putMVar (lspMessageId client) (nextId + 1) - let req = lspWorkspaceSymbolWithId nextId "" - sendLSPMessage (lspStdin client) req - - resp <- readLSPResponseForId (lspStdout client) nextId - case resp of - Left err -> pure $ Left $ T.pack $ "workspace/symbol failed: " ++ err - Right val -> pure $ Right $ parseWorkspaceSymbolResponse val - ) $ \(e :: SomeException) -> pure $ Left $ T.pack $ "workspace/symbol error: " ++ show e - --- | Parse workspace/symbol response into SymbolInformation list -parseWorkspaceSymbolResponse :: Value -> [SymbolInformation] -parseWorkspaceSymbolResponse (Object o) = - case KM.lookup "result" o of - Just (Array arr) -> mapMaybe parseSymInfo (V.toList arr) - _ -> [] - where - parseSymInfo (Object s) = - let name = case KM.lookup "name" s of - Just (Aeson.String t) -> t - _ -> "" - kind = case KM.lookup "kind" s of - Just (Aeson.Number n) -> round n - _ -> 0 - loc = case KM.lookup "location" s of - Just (Object l) -> parseLocation l - _ -> Location "" (Range (Position 0 0) (Position 0 0)) - in if T.null name then Nothing - else Just SymbolInformation { siName = name, siKind = kind, siLocation = loc } - parseSymInfo _ = Nothing - - parseLocation l = - let uri = case KM.lookup "uri" l of - Just (Aeson.String u) -> T.drop 7 u -- strip "file://" - _ -> "" - range = case KM.lookup "range" l of - Just (Object r) -> parseRange' r - _ -> Range (Position 0 0) (Position 0 0) - in Location uri range - - parseRange' r = - let start = case KM.lookup "start" r of - Just (Object p) -> parsePos' p - _ -> Position 0 0 - end = case KM.lookup "end" r of - Just (Object p) -> parsePos' p - _ -> Position 0 0 - in Range start end - - parsePos' p = - let line = case KM.lookup "line" p of - Just (Aeson.Number n) -> round n - _ -> 0 - char = case KM.lookup "character" p of - Just (Aeson.Number n) -> round n - _ -> 0 - in Position line char - -parseWorkspaceSymbolResponse _ = [] - --- | Convert workspace symbols to DocumentSymbolResult format --- Groups by file URI, then creates nodes/edges per file. -workspaceSymbolsToDocumentSymbols :: [SymbolInformation] -> Map FilePath [DocumentSymbolResult] -workspaceSymbolsToDocumentSymbols syms = - Map.fromListWith (++) - [ (T.unpack (locUri (siLocation sym)) - , [ DocumentSymbolResult - { dsrName = siName sym - , dsrKind = siKind sym - , dsrRange = locRange (siLocation sym) - , dsrChildren = [] - } - ] - ) - | sym <- syms - , not (T.null (locUri (siLocation sym))) - ] \ No newline at end of file +import Graphos.Infrastructure.LSP.ServerMap + ( languageServerCommands + , findLSPServer + , languageIdFromExt + ) +import Graphos.Infrastructure.LSP.Transport + ( LSPClient(..) + , LSPClientConfig(..) + , defaultLSPConfig + , connectToLSP + , disconnectLSP + ) +import Graphos.Infrastructure.LSP.CapabilityParse + ( parseServerCapabilities + ) +import Graphos.Infrastructure.LSP.Extraction + ( extractViaLSP + , extractDocumentSymbols + , extractCallHierarchy + , extractWorkspaceSymbols + , workspaceSymbolsToDocumentSymbols + , symbolToNodes + , symbolTreeToEdges + ) \ No newline at end of file diff --git a/src/Graphos/Infrastructure/LSP/Extraction.hs b/src/Graphos/Infrastructure/LSP/Extraction.hs new file mode 100644 index 0000000..76e7c68 --- /dev/null +++ b/src/Graphos/Infrastructure/LSP/Extraction.hs @@ -0,0 +1,405 @@ +-- | LSP symbol extraction — document symbols, call hierarchy, workspace symbols, +-- and conversion to Graphos domain types (Node/Edge). +{-# LANGUAGE ScopedTypeVariables #-} +module Graphos.Infrastructure.LSP.Extraction + ( extractViaLSP + , extractDocumentSymbols + , extractCallHierarchy + , extractWorkspaceSymbols + , workspaceSymbolsToDocumentSymbols + , symbolToNodes + , symbolTreeToEdges + ) where + +import Control.Concurrent.MVar (takeMVar, putMVar) +import Control.Exception (catch, SomeException(..)) +import Control.Monad (unless) +import Data.Aeson (Value(..)) +import qualified Data.Aeson as Aeson +import qualified Data.Aeson.KeyMap as KM +import qualified Data.Vector as V +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map +import Data.Maybe (mapMaybe) +import Data.Text (Text) +import qualified Data.Text as T + +import Graphos.Domain.Types +import Graphos.Infrastructure.LSP.Protocol hiding (languageIdFromExt) +import Graphos.Infrastructure.LSP.Transport + ( LSPClient(..) + , sendLSPMessageSafe + , drainNotifications + , readLSPResponseForId + ) +import Graphos.Infrastructure.LSP.ServerMap (languageIdFromExt, takeExtension) + +-- ─────────────────────────────────────────────── +-- Extraction via LSP +-- ─────────────────────────────────────────────── + +-- | Extract entities and relationships from a file using LSP. +-- Returns empty extraction on any error (never throws). +extractViaLSP :: LSPClient -> FilePath -> IO Extraction +extractViaLSP client filePath = + catch (do + putStrLn $ "[lsp] Extracting: " ++ filePath + let ext = takeExtension filePath + langId = languageIdFromExt ext + + fileContent <- catch (T.pack <$> readFile filePath) $ \(_ :: SomeException) -> pure "" + + let openMsg = lspDidOpen filePath langId fileContent + sent <- sendLSPMessageSafe client openMsg + unless sent $ putStrLn "[lsp] Warning: could not send didOpen (server disconnected?)" + catch (drainNotifications (lspStdout client) 500000) $ \(_ :: SomeException) -> pure () + + symbols <- extractDocumentSymbols client filePath + putStrLn $ "[lsp] Got " ++ show (length symbols) ++ " symbols from " ++ filePath + + let closeMsg = lspDidClose filePath + sentClose <- sendLSPMessageSafe client closeMsg + unless sentClose $ putStrLn "[lsp] Warning: could not send didClose (server disconnected?)" + + let nodes = symbolToNodes filePath symbols + edges = symbolTreeToEdges filePath symbols + pure emptyExtraction + { extractionNodes = nodes + , extractionEdges = edges + } + ) $ \(e :: SomeException) -> do + putStrLn $ "[lsp] Warning: extraction failed for " ++ filePath ++ ": " ++ show e + pure emptyExtraction + { extractionNodes = [makeStubNode filePath] + , extractionEdges = [] + } + +-- | Extract document symbols from a file. +-- Catches Broken pipe and other IO errors — returns [] instead of crashing. +extractDocumentSymbols :: LSPClient -> FilePath -> IO [DocumentSymbolResult] +extractDocumentSymbols client filePath = catch (do + nextId <- takeMVar (lspMessageId client) + putMVar (lspMessageId client) (nextId + 1) + let req = lspDocumentSymbolWithId filePath nextId + sent <- sendLSPMessageSafe client req + if not sent + then do + putStrLn $ "[lsp] Warning: could not send documentSymbol request for " ++ filePath ++ " (server disconnected?)" + pure [] + else do + resp <- readLSPResponseForId (lspStdout client) nextId + case resp of + Left err -> do + putStrLn $ "[lsp] Failed to get symbols: " ++ err + pure [] + Right val -> pure $ parseSymbolsFromResponse val + ) $ \(e :: SomeException) -> do + putStrLn $ "[lsp] Warning: documentSymbol request failed for " ++ filePath ++ ": " ++ show e + pure [] + +-- | Parse symbol tree from JSON-RPC response. +parseSymbolsFromResponse :: Value -> [DocumentSymbolResult] +parseSymbolsFromResponse (Object o) = + case KM.lookup "result" o of + Just (Array arr) -> concatMap (flattenSymbols []) (V.toList arr) + Just (Object obj) -> + case KM.lookup "children" obj of + Just (Array arr) -> concatMap (flattenSymbols []) (V.toList arr) + _ -> [parseSingleSymbol obj] + _ -> [] + where + flattenSymbols :: [Text] -> Value -> [DocumentSymbolResult] + flattenSymbols parents (Object s) = + let name = case KM.lookup "name" s of + Just (Aeson.String t) -> t + _ -> "" + kind = case KM.lookup "kind" s of + Just (Aeson.Number n) -> round n + _ -> 0 + range = case KM.lookup "range" s of + Just (Object r) -> parseRange r + _ -> dummyRange + childrenVals = case KM.lookup "children" s of + Just (Array arr) -> V.toList arr + _ -> [] + in if T.null name + then concatMap (flattenSymbols parents) childrenVals + else DocumentSymbolResult + { dsrName = name + , dsrKind = kind + , dsrRange = range + , dsrChildren = [] + } + : concatMap (flattenSymbols (name : parents)) childrenVals + flattenSymbols _ _ = [] + + parseSingleSymbol s = + let name = case KM.lookup "name" s of + Just (Aeson.String t) -> t + _ -> "" + kind = case KM.lookup "kind" s of + Just (Aeson.Number n) -> round n + _ -> 0 + range = case KM.lookup "range" s of + Just (Object r) -> parseRange r + _ -> dummyRange + in DocumentSymbolResult { dsrName = name, dsrKind = kind, dsrRange = range, dsrChildren = [] } + + parseRange r = + let start = case KM.lookup "start" r of + Just (Object p) -> parsePos p + _ -> Position 0 0 + end = case KM.lookup "end" r of + Just (Object p) -> parsePos p + _ -> Position 0 0 + in Range start end + + parsePos p = + let line = case KM.lookup "line" p of + Just (Aeson.Number n) -> round n + _ -> 0 + char = case KM.lookup "character" p of + Just (Aeson.Number n) -> round n + _ -> 0 + in Position line char + + dummyRange = Range (Position 0 0) (Position 0 0) + +parseSymbolsFromResponse _ = [] + +-- | Extract call hierarchy (incoming calls) for a symbol +extractCallHierarchy :: LSPClient -> Text -> IO [CallHierarchyItem] +extractCallHierarchy client _name = do + _ <- takeMVar (lspMessageId client) + putMVar (lspMessageId client) 1 + pure [] -- placeholder + +-- ─────────────────────────────────────────────── +-- Symbol → Node/Edge conversion +-- ─────────────────────────────────────────────── + +symbolToNodes :: FilePath -> [DocumentSymbolResult] -> [Node] +symbolToNodes filePath symbols = + [ Node + { nodeId = makeNodeId filePath (dsrName sym) + , nodeLabel = dsrName sym + , nodeFileType = CodeFile + , nodeSourceFile = T.pack filePath + , nodeSourceLocation = Just $ T.pack ("L" ++ show (posLine (rangeStart (dsrRange sym)))) + , nodeLineEnd = Just $ posLine (rangeEnd (dsrRange sym)) + , nodeKind = Just $ symbolKindToText (dsrKind sym) + , nodeSignature = Nothing + , nodeSourceUrl = Nothing + , nodeCapturedAt = Nothing + , nodeAuthor = Nothing + , nodeContributor = Nothing + } + | sym <- symbols + ] + +-- | Build edges from the symbol tree (parent contains child). +symbolTreeToEdges :: FilePath -> [DocumentSymbolResult] -> [Edge] +symbolTreeToEdges filePath flatSymbols = + let fileEdges = + [ Edge + { edgeSource = T.pack (takeWhile (/= '.') $ reverse $ takeWhile (/= '/') $ reverse filePath) + , edgeTarget = makeNodeId filePath (dsrName sym) + , edgeRelation = Contains + , edgeConfidence = Extracted + , edgeConfidenceScore = 1.0 + , edgeSourceFile = T.pack filePath + , edgeSourceLocation = Just $ T.pack ("L" ++ show (posLine (rangeStart (dsrRange sym)))) + , edgeWeight = 1.0 + } + | sym <- flatSymbols + ] + hierarchyEdges = buildHierarchyEdges filePath flatSymbols + in fileEdges ++ hierarchyEdges + +-- | Build parent→child Contains edges from the symbol hierarchy. +buildHierarchyEdges :: FilePath -> [DocumentSymbolResult] -> [Edge] +buildHierarchyEdges filePath symbols = + let containsPairs = [(parent, child) + | parent <- symbols + , child <- symbols + , dsrName parent /= dsrName child + , rangeContains (dsrRange parent) (dsrRange child) + ] + directPairs = [(p, c) | (p, c) <- containsPairs + , not (any (\m -> dsrName m /= dsrName p + && dsrName m /= dsrName c + && rangeContains (dsrRange p) (dsrRange m) + && rangeContains (dsrRange m) (dsrRange c)) symbols)] + in [ Edge + { edgeSource = makeNodeId filePath (dsrName parent) + , edgeTarget = makeNodeId filePath (dsrName child) + , edgeRelation = Contains + , edgeConfidence = Extracted + , edgeConfidenceScore = 1.0 + , edgeSourceFile = T.pack filePath + , edgeSourceLocation = Just $ T.pack ("L" ++ show (posLine (rangeStart (dsrRange parent)))) + , edgeWeight = 1.0 + } + | (parent, child) <- directPairs] + +-- | Check if one range fully contains another +rangeContains :: Range -> Range -> Bool +rangeContains outer inner = + let outerStart = posLine (rangeStart outer) * 10000 + posCharacter (rangeStart outer) + outerEnd = posLine (rangeEnd outer) * 10000 + posCharacter (rangeEnd outer) + innerStart = posLine (rangeStart inner) * 10000 + posCharacter (rangeStart inner) + innerEnd = posLine (rangeEnd inner) * 10000 + posCharacter (rangeEnd inner) + in innerStart >= outerStart && innerEnd <= outerEnd + +-- ─────────────────────────────────────────────── +-- Helpers +-- ─────────────────────────────────────────────── + +makeNodeId :: FilePath -> Text -> NodeId +makeNodeId filePath name = + let stem = T.pack $ takeWhile (/= '.') $ reverse $ takeWhile (/= '/') $ reverse filePath + dirPart = reverse $ dropWhile (/= '/') $ reverse filePath + dirHash = abs (T.foldl' (\acc c -> acc * 31 + fromEnum c) (0 :: Int) (T.pack dirPart) `mod` 65536) + hashPrefix = T.pack $ show dirHash + in hashPrefix <> T.pack "_" <> stem <> T.pack "_" <> name + +-- | Create a stub node when LSP extraction fails +makeStubNode :: FilePath -> Node +makeStubNode filePath = + let name = T.pack $ takeWhile (/= '.') $ reverse $ takeWhile (/= '/') $ reverse filePath + dirPart = reverse $ dropWhile (/= '/') $ reverse filePath + dirHash = abs (T.foldl' (\acc c -> acc * 31 + fromEnum c) (0 :: Int) (T.pack dirPart) `mod` 65536) + hashPrefix = T.pack $ show dirHash + nodeId' = hashPrefix <> T.pack "_" <> name + in Node + { nodeId = nodeId' + , nodeLabel = name + , nodeFileType = CodeFile + , nodeSourceFile = T.pack filePath + , nodeSourceLocation = Nothing + , nodeLineEnd = Nothing + , nodeKind = Nothing + , nodeSignature = Nothing + , nodeSourceUrl = Nothing + , nodeCapturedAt = Nothing + , nodeAuthor = Nothing + , nodeContributor = Nothing + } + +-- | Convert LSP SymbolKind integer to a human-readable text label. +symbolKindToText :: Int -> Text +symbolKindToText k = case k of + 1 -> "File" + 2 -> "Module" + 3 -> "Namespace" + 4 -> "Package" + 5 -> "Class" + 6 -> "Method" + 7 -> "Property" + 8 -> "Field" + 9 -> "Constructor" + 10 -> "Enum" + 11 -> "Interface" + 12 -> "Function" + 13 -> "Variable" + 14 -> "Constant" + 15 -> "String" + 16 -> "Number" + 17 -> "Boolean" + 18 -> "Array" + 19 -> "Object" + 20 -> "Key" + 21 -> "Null" + 22 -> "EnumMember" + 23 -> "Struct" + 24 -> "Event" + 25 -> "Operator" + 26 -> "TypeParameter" + _ -> "Unknown" + +-- ─────────────────────────────────────────────── +-- Workspace symbol extraction +-- ─────────────────────────────────────────────── + +-- | Extract all symbols in the project using workspace/symbol. +extractWorkspaceSymbols :: LSPClient -> IO (Either Text [SymbolInformation]) +extractWorkspaceSymbols client = catch (do + nextId <- takeMVar (lspMessageId client) + putMVar (lspMessageId client) (nextId + 1) + let req = lspWorkspaceSymbolWithId nextId "" + sent <- sendLSPMessageSafe client req + if not sent + then pure $ Left $ T.pack "workspace/symbol failed: server disconnected" + else do + resp <- readLSPResponseForId (lspStdout client) nextId + case resp of + Left err -> pure $ Left $ T.pack $ "workspace/symbol failed: " ++ err + Right val -> pure $ Right $ parseWorkspaceSymbolResponse val + ) $ \(e :: SomeException) -> pure $ Left $ T.pack $ "workspace/symbol error: " ++ show e + +-- | Parse workspace/symbol response into SymbolInformation list +parseWorkspaceSymbolResponse :: Value -> [SymbolInformation] +parseWorkspaceSymbolResponse (Object o) = + case KM.lookup "result" o of + Just (Array arr) -> mapMaybe parseSymInfo (V.toList arr) + _ -> [] + where + parseSymInfo (Object s) = + let name = case KM.lookup "name" s of + Just (Aeson.String t) -> t + _ -> "" + kind = case KM.lookup "kind" s of + Just (Aeson.Number n) -> round n + _ -> 0 + loc = case KM.lookup "location" s of + Just (Object l) -> parseLocation l + _ -> Location "" (Range (Position 0 0) (Position 0 0)) + in if T.null name then Nothing + else Just SymbolInformation { siName = name, siKind = kind, siLocation = loc } + parseSymInfo _ = Nothing + + parseLocation l = + let uri = case KM.lookup "uri" l of + Just (Aeson.String u) -> T.drop 7 u + _ -> "" + range = case KM.lookup "range" l of + Just (Object r) -> parseRange' r + _ -> Range (Position 0 0) (Position 0 0) + in Location uri range + + parseRange' r = + let start = case KM.lookup "start" r of + Just (Object p) -> parsePos' p + _ -> Position 0 0 + end = case KM.lookup "end" r of + Just (Object p) -> parsePos' p + _ -> Position 0 0 + in Range start end + + parsePos' p = + let line = case KM.lookup "line" p of + Just (Aeson.Number n) -> round n + _ -> 0 + char = case KM.lookup "character" p of + Just (Aeson.Number n) -> round n + _ -> 0 + in Position line char + +parseWorkspaceSymbolResponse _ = [] + +-- | Convert workspace symbols to DocumentSymbolResult format +workspaceSymbolsToDocumentSymbols :: [SymbolInformation] -> Map FilePath [DocumentSymbolResult] +workspaceSymbolsToDocumentSymbols syms = + Map.fromListWith (++) + [ (T.unpack (locUri (siLocation sym)) + , [ DocumentSymbolResult + { dsrName = siName sym + , dsrKind = siKind sym + , dsrRange = locRange (siLocation sym) + , dsrChildren = [] + } + ] + ) + | sym <- syms + , not (T.null (locUri (siLocation sym))) + ] \ No newline at end of file diff --git a/src/Graphos/Infrastructure/LSP/ServerMap.hs b/src/Graphos/Infrastructure/LSP/ServerMap.hs new file mode 100644 index 0000000..3e01a2b --- /dev/null +++ b/src/Graphos/Infrastructure/LSP/ServerMap.hs @@ -0,0 +1,100 @@ +-- | LSP language server registry — maps file extensions to LSP server commands. +module Graphos.Infrastructure.LSP.ServerMap + ( languageServerCommands + , findLSPServer + , languageIdFromExt + , takeExtension + ) where + +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map +import Data.Text (Text) +import System.Directory (findExecutable) + +-- | Map from file extension to (command, args) for LSP servers +languageServerCommands :: Map String (String, [String]) +languageServerCommands = Map.fromList + [ (".ts", ("typescript-language-server", ["--stdio"])) + , (".tsx", ("typescript-language-server", ["--stdio"])) + , (".js", ("typescript-language-server", ["--stdio"])) + , (".jsx", ("typescript-language-server", ["--stdio"])) + , (".py", ("pyright-langserver", ["--stdio"])) + , (".go", ("gopls", [])) + , (".rs", ("rust-analyzer", [])) + , (".c", ("clangd", [])) + , (".cpp", ("clangd", [])) + , (".h", ("clangd", [])) + , (".hpp", ("clangd", [])) + , (".java", ("jdtls", [])) + , (".cs", ("omnisharp", [])) + , (".rb", ("solargraph", ["--stdio"])) + , (".hs", ("haskell-language-server", ["--lsp"])) + , (".lhs", ("haskell-language-server", ["--lsp"])) + , (".php", ("phpactor", [])) + , (".swift", ("sourcekit-lsp", [])) + , (".kt", ("kotlin-language-server", [])) + , (".kts", ("kotlin-language-server", [])) + , (".scala", ("metals", [])) + , (".lua", ("lua-language-server", [])) + , (".zig", ("zls", [])) + , (".ex", ("elixir-ls", [])) + , (".exs", ("elixir-ls", [])) + , (".dart", ("dart", ["analyze", "--stdio"])) + , (".vue", ("vue-language-server", [])) + , (".svelte", ("svelte-language-server", [])) + , (".nix", ("nixd", [])) + , (".json", ("vscode-json-language-server", ["--stdio"])) + ] + +-- | Find an LSP server for a file extension, checking if the command exists +findLSPServer :: String -> IO (Maybe (String, [String])) +findLSPServer ext = do + case Map.lookup ext languageServerCommands of + Nothing -> pure Nothing + Just (cmd, args) -> do + found <- findExecutable cmd + case found of + Just path -> pure $ Just (path, args) + Nothing -> pure Nothing + +-- | Convert file extension to LSP LanguageId string +languageIdFromExt :: String -> Text +languageIdFromExt ext = case ext of + ".ts" -> "typescript" + ".tsx" -> "typescriptreact" + ".js" -> "javascript" + ".jsx" -> "javascriptreact" + ".py" -> "python" + ".go" -> "go" + ".rs" -> "rust" + ".c" -> "c" + ".cpp" -> "cpp" + ".h" -> "c" + ".hpp" -> "cpp" + ".java" -> "java" + ".cs" -> "csharp" + ".rb" -> "ruby" + ".hs" -> "haskell" + ".lhs" -> "haskell" + ".php" -> "php" + ".swift" -> "swift" + ".kt" -> "kotlin" + ".kts" -> "kotlin" + ".scala" -> "scala" + ".lua" -> "lua" + ".zig" -> "zig" + ".ex" -> "elixir" + ".exs" -> "elixir" + ".dart" -> "dart" + ".vue" -> "vue" + ".svelte" -> "svelte" + ".nix" -> "nix" + ".json" -> "json" + _ -> "plaintext" + +-- | Extract file extension including the dot +takeExtension :: FilePath -> String +takeExtension path = + let reversed = reverse path + ext = takeWhile (/= '.') reversed + in if null ext then "" else '.' : reverse ext \ No newline at end of file diff --git a/src/Graphos/Infrastructure/LSP/Transport.hs b/src/Graphos/Infrastructure/LSP/Transport.hs new file mode 100644 index 0000000..d1c26a3 --- /dev/null +++ b/src/Graphos/Infrastructure/LSP/Transport.hs @@ -0,0 +1,279 @@ +-- | LSP transport layer — JSON-RPC message sending/receiving and process management. +{-# LANGUAGE ScopedTypeVariables #-} +module Graphos.Infrastructure.LSP.Transport + ( -- * LSP Client types + LSPClient(..) + , LSPClientConfig(..) + , defaultLSPConfig + + -- * Lifecycle + , connectToLSP + , disconnectLSP + + -- * Low-level messaging + , sendLSPMessage + , sendLSPMessageSafe + , readLSPMessage + , readLSPResponseForId + , drainNotifications + + -- * Connection state + , isProcessAlive + , markDisconnected + ) where + +import Control.Concurrent.MVar (MVar, newMVar) +import Control.Concurrent (threadDelay) +import Control.Exception (catch, try, SomeException(..), IOException) +import Control.Monad (unless, void) +import Data.Aeson (ToJSON, encode, eitherDecode, Value(..)) +import qualified Data.Aeson as Aeson +import qualified Data.Aeson.KeyMap as KM +import qualified Data.ByteString as BS +import qualified Data.ByteString.Lazy as BSL +import qualified Data.ByteString.Char8 as B8 +import Data.IORef (IORef, newIORef, readIORef, writeIORef) +import Data.List (isInfixOf) +import Data.Text (Text) +import qualified Data.Text as T +import System.Process (ProcessHandle, createProcess, proc, std_in, std_out, std_err, StdStream(CreatePipe), terminateProcess, getProcessExitCode) +import System.IO (Handle, hFlush) +import System.Timeout (timeout) + +import Graphos.Infrastructure.LSP.Protocol +import Graphos.Infrastructure.LSP.CapabilityParse (parseServerCapabilities) + +-- ─────────────────────────────────────────────── +-- LSP Client types +-- ─────────────────────────────────────────────── + +data LSPClientConfig = LSPClientConfig + { lspCommand :: FilePath + , lspArgs :: [String] + , lspRootUri :: FilePath + , lspTimeout :: Int -- ^ seconds; HLS needs 300s+ due to cabal v2-repl setup + } deriving (Eq, Show) + +defaultLSPConfig :: FilePath -> LSPClientConfig +defaultLSPConfig root = LSPClientConfig + { lspCommand = "" + , lspArgs = [] + , lspRootUri = root + , lspTimeout = 300 -- 5 min default; HLS needs time for cabal v2-repl + } + +data LSPClient = LSPClient + { lspHandle :: ProcessHandle + , lspStdin :: Handle + , lspStdout :: Handle + , lspConfig :: LSPClientConfig + , lspMessageId :: MVar Int + , lspServerCaps :: ServerCapabilities + , lspConnected :: IORef Bool -- ^ True while the server process is alive and pipes are open + } + +-- ─────────────────────────────────────────────── +-- LSP Protocol: sending and receiving messages +-- ─────────────────────────────────────────────── + +-- | Send a JSON-RPC message with proper Content-Length framing +sendLSPMessage :: ToJSON a => Handle -> a -> IO () +sendLSPMessage h msg = do + let content = BSL.toStrict (encode msg) + contentLen = BS.length content + header = B8.pack $ "Content-Length: " ++ show contentLen ++ "\r\n\r\n" + BS.hPut h (header `BS.append` content) + hFlush h + +-- | Check if the LSP server process is still running. +-- Returns 'Nothing' when the process is still running (no exit code yet). +isProcessAlive :: ProcessHandle -> IO Bool +isProcessAlive ph = do + mec <- getProcessExitCode ph + pure $ case mec of + Nothing -> True + Just _ -> False + +-- | Mark the LSP client as disconnected (e.g. after detecting a dead process). +markDisconnected :: LSPClient -> IO () +markDisconnected client = writeIORef (lspConnected client) False + +-- | Send a JSON-RPC message with Broken-pipe protection. +-- Checks if the server process is alive before writing; on any IOException +-- (including ResourceVanished / Broken pipe), marks the client as disconnected +-- and returns 'False'. Returns 'True' on success. +sendLSPMessageSafe :: ToJSON a => LSPClient -> a -> IO Bool +sendLSPMessageSafe client msg = do + alive <- readIORef (lspConnected client) + if not alive + then pure False + else do + processAlive <- isProcessAlive (lspHandle client) + if not processAlive + then do + markDisconnected client + putStrLn "[lsp] Warning: server process is no longer running" + pure False + else catch (do + sendLSPMessage (lspStdin client) msg + pure True + ) $ \(e :: IOException) -> do + markDisconnected client + putStrLn $ "[lsp] Warning: failed to send message (connection lost): " ++ show e + pure False + +-- | Read a byte until newline, stripping \r +readLineLF :: Handle -> IO String +readLineLF h = do + let loop acc = do + c <- BS.hGet h 1 + if BS.null c + then pure (reverse acc) + else case BS.head c of + 10 -> pure (reverse acc) -- \n = end of line + 13 -> loop acc -- \r = skip + b -> loop (toEnum (fromEnum b) : acc) + loop [] + +-- | Read a single LSP message (Content-Length header + JSON body) +readLSPMessage :: Handle -> IO (Either String Value) +readLSPMessage outh = catch (do + let findHeader = do + line <- readLineLF outh + if null line + then findHeader + else do + let contentLen = parseContentLength line + if contentLen < 0 + then findHeader + else do + skipHeaders + result <- timeout 10000000 (BSL.hGet outh contentLen) + case result of + Nothing -> pure $ Left "Timeout reading body" + Just bodyBytes -> case eitherDecode bodyBytes of + Right val -> pure $ Right val + Left err -> pure $ Left $ "JSON parse error (" ++ show contentLen ++ " bytes): " ++ err + mResult <- timeout 10000000 findHeader + case mResult of + Nothing -> pure $ Left "Timeout waiting for LSP response" + Just result -> pure result + ) $ \(e :: SomeException) -> pure $ Left $ "Read error: " ++ show e + where + skipHeaders = do + line <- readLineLF outh + if null line then pure () + else skipHeaders + +-- | Parse Content-Length from header line +parseContentLength :: String -> Int +parseContentLength header = + let prefix :: String = "Content-Length:" + trimmed = dropWhile (== ' ') $ drop (length prefix) (takeWhile (/= '\r') header) + in case reads trimmed of + [(n, "")] -> n + _ -> -1 + +-- | Read LSP messages until we get one with a matching "id" field +readLSPResponseForId :: Handle -> Int -> IO (Either String Value) +readLSPResponseForId outh targetId = loop + where + loop = do + result <- readLSPMessage outh + case result of + Left err -> pure $ Left err + Right val@(Object o) -> + case KM.lookup "id" o of + Just (Aeson.Number n) | round n == targetId -> pure $ Right val + Just (Aeson.Number _) -> loop + _ -> loop + Right _ -> loop + +-- | Drain all pending notifications from LSP server +drainNotifications :: Handle -> Int -> IO () +drainNotifications outh micros = do + mMsg <- timeout micros (readLSPMessage outh) + case mMsg of + Just (Right _) -> drainNotifications outh micros + _ -> pure () + +-- ─────────────────────────────────────────────── +-- LSP Client lifecycle +-- ─────────────────────────────────────────────── + +connectToLSP :: LSPClientConfig -> IO (Either Text LSPClient) +connectToLSP config = catch (do + putStrLn $ "[lsp] Starting: " ++ lspCommand config ++ " " ++ unwords (lspArgs config) + let processSpec = proc (lspCommand config) (lspArgs config) + (minH, moutH, _, ph) <- createProcess processSpec + { std_in = CreatePipe + , std_out = CreatePipe + , std_err = CreatePipe + } + case (minH, moutH) of + (Just inh, Just outh) -> do + idVar <- newMVar 2 + connectedRef <- newIORef True + + let initMsg = lspInitialize (lspRootUri config) + sendLSPMessage inh initMsg + + -- HLS needs significantly more time to initialize because it runs + -- cabal v2-repl to determine build flags (can take minutes on first run). + -- Other LSP servers typically respond within seconds. + let initTimeoutMicros = case lspCommand config of + cmd | "haskell-language-server" `isInfixOf` cmd -> max (lspTimeout config) 300 * 1000000 + | otherwise -> lspTimeout config * 1000000 + initResp <- timeout initTimeoutMicros (readLSPResponseForId outh 1) + case initResp of + Nothing -> do + putStrLn $ "[lsp] Initialize failed: Timeout (" ++ show (lspTimeout config) ++ "s) waiting for LSP response" + putStrLn $ "[lsp] Tip: HLS needs cabal v2-repl to resolve build flags — this can take minutes on first run" + terminateProcess ph + pure $ Left $ T.pack $ "LSP initialize failed: Timeout (" ++ show (lspTimeout config) ++ "s) waiting for LSP response" + Just (Left err) -> do + putStrLn $ "[lsp] Initialize failed: " ++ err + terminateProcess ph + pure $ Left $ T.pack $ "LSP initialize failed: " ++ err + Just (Right respVal) -> do + putStrLn "[lsp] Initialize successful" + sendLSPMessage inh lspInitialized + + let drainMicros = case lspCommand config of + cmd | "haskell-language-server" `isInfixOf` cmd -> 15000000 + | otherwise -> 3000000 + drainNotifications outh drainMicros + + let caps = parseServerCapabilities respVal + putStrLn $ "[lsp] Server capabilities: documentSymbol=" ++ show (scpDocumentSymbolProvider caps) + ++ " workspaceSymbol=" ++ show (scpWorkspaceSymbolProvider caps) + + putStrLn $ "[lsp] Connected to " ++ lspCommand config + pure $ Right LSPClient + { lspHandle = ph + , lspStdin = inh + , lspStdout = outh + , lspConfig = config + , lspMessageId = idVar + , lspServerCaps = caps + , lspConnected = connectedRef + } + _ -> pure $ Left $ T.pack "Failed to create LSP process handles" + ) $ \(e :: SomeException) -> pure $ Left $ T.pack $ "LSP connection error: " ++ show e + +disconnectLSP :: LSPClient -> IO () +disconnectLSP client = do + alive <- readIORef (lspConnected client) + unless (not alive) $ do + result <- try $ do + _ <- sendLSPMessageSafe client lspShutdown + _ <- timeout 3000000 (readLSPResponseForId (lspStdout client) 999) + pure () + case result of + Left (_ :: SomeException) -> pure () + Right _ -> pure () + void (catch (sendLSPMessageSafe client lspExit) $ \(_ :: SomeException) -> pure False) + markDisconnected client + threadDelay 100000 + terminateProcess (lspHandle client) + putStrLn $ "[lsp] Disconnected from " ++ lspCommand (lspConfig client) \ No newline at end of file diff --git a/src/Graphos/UseCase/Extract.hs b/src/Graphos/UseCase/Extract.hs index 6a77447..ba227f9 100644 --- a/src/Graphos/UseCase/Extract.hs +++ b/src/Graphos/UseCase/Extract.hs @@ -1,4 +1,5 @@ --- | Extraction orchestration - parallel LSP extraction for all files +-- | Extraction orchestration (re-export hub) — parallel LSP extraction for all files. +-- Delegates to Haskell, Markdown, and LSP sub-modules. {-# LANGUAGE ScopedTypeVariables #-} module Graphos.UseCase.Extract ( extractAll @@ -7,42 +8,35 @@ module Graphos.UseCase.Extract import Control.Concurrent (newQSemN, waitQSemN, signalQSemN) import Control.Concurrent.Async (mapConcurrently) -import Control.Exception (bracket_, SomeException, catch) -import Data.Char (isAlphaNum) -import Data.List (nub, sort, isPrefixOf, find) +import Control.Exception (bracket_) +import Data.List (nub, sort) import qualified Data.Map.Strict as Map import qualified Data.Text as T import System.Directory (canonicalizePath) import System.FilePath (takeExtension) import Graphos.Domain.Types -import Graphos.Domain.Extraction () import Graphos.Domain.Graph (mergeExtractions) import Graphos.Infrastructure.LSP.Client (LSPClient(..), extractViaLSP, findLSPServer, LSPClientConfig(..), connectToLSP, disconnectLSP, languageServerCommands, extractWorkspaceSymbols, workspaceSymbolsToDocumentSymbols, symbolToNodes, symbolTreeToEdges) import Graphos.Infrastructure.LSP.Protocol (scpWorkspaceSymbolProvider, DocumentSymbolResult(..)) import Graphos.Infrastructure.Logging (LogEnv, logInfo, logDebug, logTrace, logWarn) +import Graphos.UseCase.Extract.Haskell (makeStubNode, extractHaskellStub) +import Graphos.UseCase.Extract.Markdown (extractDocFile) -- | Extract entities from all detected files. --- Uses parallel threads when cfgThreads > 1. --- Files are grouped by LSP server type so each server connection --- is reused across its files (sequential per server, parallel across servers). extractAll :: PipelineConfig -> Detection -> LogEnv -> IO Extraction extractAll config detection env = do let codeFiles = Map.findWithDefault [] CodeFiles (detectionFiles detection) docFiles = Map.findWithDefault [] DocFiles (detectionFiles detection) numThreads = max 1 (cfgThreads config) - -- Canonicalize project root to absolute path for LSP rootUri absRoot <- canonicalizePath (cfgInputPath config) - -- Log discovered file types logInfo env $ T.pack $ " Processing " ++ show (length codeFiles) ++ " code files, " ++ show (length docFiles) ++ " doc files" - -- Log unique extensions and their LSP status let exts = nub (sort (map takeExtension codeFiles)) logDebug env $ T.pack $ " File extensions: " ++ show exts - -- Check LSP availability per extension mapM_ (\ext -> do mbLSP <- findLSPServer ext case mbLSP of @@ -50,22 +44,18 @@ extractAll config detection env = do Nothing -> logWarn env $ T.pack $ " No LSP for " ++ ext ++ " - using stub extraction" ) exts - -- Group files by LSP server, then extract in parallel across groups let fileGroups = groupByLSPServer codeFiles numGroups = length fileGroups logInfo env $ T.pack $ " LSP server groups: " ++ show numGroups ++ " (threads: " ++ show numThreads ++ ")" codeExtractions <- if numThreads <= 1 - then -- Sequential: process each group one at a time - concatMapM (extractGroup env absRoot) fileGroups + then concatMapM (extractGroup env absRoot) fileGroups else if numGroups <= numThreads then do - -- One thread per group — ideal case results <- mapConcurrently (extractGroup env absRoot) fileGroups pure (concat results) else do - -- More groups than threads — use semaphore-based pool sem <- newQSemN numThreads results <- mapConcurrently (\grp -> bracket_ (waitQSemN sem 1) @@ -73,7 +63,6 @@ extractAll config detection env = do (extractGroup env absRoot grp)) fileGroups pure (concat results) - -- Doc extraction: parse markdown files for headers, links, tags docExtractions <- mapM (extractDocFile env) docFiles let docExtraction = foldr mergeExtractions emptyExtraction docExtractions @@ -82,14 +71,12 @@ extractAll config detection env = do pure merged -- | A group of files sharing the same LSP server command -type FileGroup = (String, [FilePath]) -- (server command, files) +type FileGroup = (String, [FilePath]) -- | Group files by their LSP server command groupByLSPServer :: [FilePath] -> [FileGroup] groupByLSPServer files = - let -- Map each file to its LSP server command (or "stub") - fileWithServer = [(serverCmd f, f) | f <- files] - -- Group by server command + let fileWithServer = [(serverCmd f, f) | f <- files] grouped = Map.toList $ Map.fromListWith (++) [(cmd, [fp]) | (cmd, fp) <- fileWithServer] in grouped where @@ -101,25 +88,20 @@ groupByLSPServer files = extractGroup :: LogEnv -> FilePath -> FileGroup -> IO [Extraction] extractGroup env absRoot (serverCmd, files) = if serverCmd == "stub" - then -- No LSP available — process stubs (fast, no connection needed) - mapM (\fp -> do - logDebug env $ T.pack $ " [stub] " ++ fp - pure emptyExtraction { extractionNodes = [makeStubNode fp] } - ) files - else -- Connect once, extract all files, disconnect - doExtractWithSharedLSP env absRoot serverCmd files + then mapM (\fp -> do + logDebug env $ T.pack $ " [stub] " ++ fp + pure emptyExtraction { extractionNodes = [makeStubNode fp] } + ) files + else doExtractWithSharedLSP env absRoot serverCmd files -- | Connect to an LSP server once and extract all files for it. --- Tries workspace/symbol first (project-level, single request) if the server supports it. --- Falls back to per-file documentSymbol if workspace/symbol fails or isn't supported. doExtractWithSharedLSP :: LogEnv -> FilePath -> String -> [FilePath] -> IO [Extraction] doExtractWithSharedLSP env absRoot serverCmd files = do mbLSPOpts <- findLSPServer (takeExtension (case files of (f:_) -> f; [] -> "")) case mbLSPOpts of - Nothing -> -- Fallback to stubs - mapM (\fp -> do - logWarn env $ T.pack $ " LSP " ++ serverCmd ++ " disappeared for " ++ fp - pure emptyExtraction { extractionNodes = [makeStubNode fp] } + Nothing -> mapM (\fp -> do + logWarn env $ T.pack $ " LSP " ++ serverCmd ++ " disappeared for " ++ fp + pure emptyExtraction { extractionNodes = [makeStubNode fp] } ) files Just (cmd, args) -> do logDebug env $ T.pack $ " [lsp] Connecting to " ++ cmd ++ " for " ++ show (length files) ++ " files" @@ -127,9 +109,9 @@ doExtractWithSharedLSP env absRoot serverCmd files = do { lspCommand = cmd , lspArgs = args , lspRootUri = absRoot - , lspTimeout = 60 + , lspTimeout = 300 } - result <- Graphos.Infrastructure.LSP.Client.connectToLSP config + result <- connectToLSP config case result of Left err -> do logWarn env $ T.pack $ " [lsp] Connection failed: " ++ T.unpack err @@ -143,7 +125,7 @@ doExtractWithSharedLSP env absRoot serverCmd files = do case wsResult of Right syms | not (null syms) -> do - let fileSymbols = Graphos.Infrastructure.LSP.Client.workspaceSymbolsToDocumentSymbols syms + let fileSymbols = workspaceSymbolsToDocumentSymbols syms logInfo env $ T.pack $ " [lsp] workspace/symbol returned " ++ show (length syms) ++ " symbols across " ++ show (Map.size fileSymbols) ++ " files" pure [extractionFromSymbols fp (Map.findWithDefault [] fp fileSymbols) | fp <- files] | otherwise -> do @@ -167,10 +149,10 @@ doExtractWithSharedLSP env absRoot serverCmd files = do extractHaskellStub fp else pure ext ) (zip files extractions) - Graphos.Infrastructure.LSP.Client.disconnectLSP client + disconnectLSP client pure enriched --- | Extract from a single file using LSP (standalone, opens its own connection) +-- | Extract from a single file using LSP (standalone) extractFromFile :: LogEnv -> FilePath -> IO Extraction extractFromFile env filePath = do let ext = takeExtension filePath @@ -190,9 +172,9 @@ extractFromFile env filePath = do { lspCommand = cmd , lspArgs = args , lspRootUri = absRoot - , lspTimeout = 60 + , lspTimeout = 300 } - result <- Graphos.Infrastructure.LSP.Client.connectToLSP config + result <- connectToLSP config case result of Left err -> do logWarn env $ T.pack $ " [lsp] Connection failed for " ++ filePath ++ ": " ++ T.unpack err @@ -201,210 +183,17 @@ extractFromFile env filePath = do } Right client -> do extraction <- extractViaLSP client filePath - Graphos.Infrastructure.LSP.Client.disconnectLSP client + disconnectLSP client let nNodes = length (extractionNodes extraction) nEdges = length (extractionEdges extraction) logDebug env $ T.pack $ " [lsp] " ++ filePath ++ " → " ++ show nNodes ++ " nodes, " ++ show nEdges ++ " edges" pure extraction --- ─────────────────────────────────────────────── --- Helpers --- ─────────────────────────────────────────────── - --- | Create a stub node when no LSP is available -makeStubNode :: FilePath -> Node -makeStubNode filePath = - let name = T.pack $ takeWhile (/= '.') $ reverse $ takeWhile (/= '/') $ reverse filePath - dirPart = reverse $ dropWhile (/= '/') $ reverse filePath - dirHash = abs (T.foldl' (\acc c -> acc * 31 + fromEnum c) (0 :: Int) (T.pack dirPart) `mod` 65536) - hashPrefix = T.pack $ show dirHash - nodeId' = hashPrefix <> T.pack "_" <> name - in Node - { nodeId = nodeId' - , nodeLabel = name - , nodeFileType = CodeFile - , nodeSourceFile = T.pack filePath - , nodeSourceLocation = Nothing - , nodeLineEnd = Nothing - , nodeKind = Nothing - , nodeSignature = Nothing - , nodeSourceUrl = Nothing - , nodeCapturedAt = Nothing - , nodeAuthor = Nothing - , nodeContributor = Nothing - } - --- | Haskell-aware stub extraction: parses module name, imports, and top-level declarations. --- Used as fallback when HLS returns 0 symbols (e.g., not yet indexed). -extractHaskellStub :: FilePath -> IO Extraction -extractHaskellStub filePath = catch (do - content <- readFile filePath - let allNodes = haskellStubNodes filePath content - edges = haskellStubEdges filePath allNodes - pure emptyExtraction - { extractionNodes = allNodes - , extractionEdges = edges - } - ) $ \(_ :: SomeException) -> pure emptyExtraction - { extractionNodes = [makeStubNode filePath] } - --- | Parse Haskell source for module name, imports, and top-level decl names -haskellStubNodes :: FilePath -> String -> [Node] -haskellStubNodes filePath content = - let dirPart = reverse $ dropWhile (/= '/') $ reverse filePath - dirHash = abs (T.foldl' (\acc c -> acc * 31 + fromEnum c) (0 :: Int) (T.pack dirPart) `mod` 65536) - hashPrefix = T.pack $ show dirHash - modName = parseHaskellModule content - imports = parseHaskellImports content - decls = parseHaskellDecls content - modNode = case modName of - Just mn -> - [ Node - { nodeId = hashPrefix <> T.pack "_" <> T.pack mn - , nodeLabel = T.pack mn - , nodeFileType = CodeFile - , nodeSourceFile = T.pack filePath - , nodeSourceLocation = Just "L1" - , nodeLineEnd = Nothing - , nodeKind = Just "Module" - , nodeSignature = Nothing - , nodeSourceUrl = Nothing - , nodeCapturedAt = Nothing - , nodeAuthor = Nothing - , nodeContributor = Nothing - } - ] - Nothing -> [] - impNodes = [ Node - { nodeId = hashPrefix <> T.pack "_import_" <> T.pack imp - , nodeLabel = T.pack imp - , nodeFileType = CodeFile - , nodeSourceFile = T.pack filePath - , nodeSourceLocation = Nothing - , nodeLineEnd = Nothing - , nodeKind = Just "Module" - , nodeSignature = Nothing - , nodeSourceUrl = Nothing - , nodeCapturedAt = Nothing - , nodeAuthor = Nothing - , nodeContributor = Nothing - } - | imp <- imports - ] - declNodes = [ Node - { nodeId = hashPrefix <> T.pack "_" <> T.pack decl - , nodeLabel = T.pack decl - , nodeFileType = CodeFile - , nodeSourceFile = T.pack filePath - , nodeSourceLocation = Nothing - , nodeLineEnd = Nothing - , nodeKind = Nothing - , nodeSignature = Nothing - , nodeSourceUrl = Nothing - , nodeCapturedAt = Nothing - , nodeAuthor = Nothing - , nodeContributor = Nothing - } - | decl <- decls - ] - in modNode ++ impNodes ++ declNodes - --- | Build edges from Haskell stub nodes: module→import, module→decl -haskellStubEdges :: FilePath -> [Node] -> [Edge] -haskellStubEdges filePath nodes = - let modNodeM = find (\n -> not ("_import_" `T.isInfixOf` nodeId n)) nodes - in case modNodeM of - Just mn -> - let otherNodes = filter (\n -> nodeId n /= nodeId mn) nodes - in [ Edge - { edgeSource = nodeId mn - , edgeTarget = nodeId other - , edgeRelation = Imports - , edgeConfidence = Ambiguous - , edgeConfidenceScore = 0.7 - , edgeSourceFile = T.pack filePath - , edgeSourceLocation = nodeSourceLocation mn - , edgeWeight = 0.7 - } - | other <- otherNodes - ] - Nothing -> [] - --- | Parse the module name from a Haskell source file -parseHaskellModule :: String -> Maybe String -parseHaskellModule content = - case [line | line <- lines content, "module " `isPrefixOf` dropWhile (== ' ') line] of - (line:_) -> Just $ extractModuleName line - [] -> Nothing - where - extractModuleName line = - let afterModule = dropWhile (== ' ') $ drop 7 line - name = takeWhile (\c -> isAlphaNum c || c `elem` ("._" :: String)) afterModule - in if null name then "Main" else name - --- | Parse import declarations from a Haskell source file -parseHaskellImports :: String -> [String] -parseHaskellImports content = - [ extractImportName line - | line <- lines content - , "import " `isPrefixOf` dropWhile (== ' ') line - ] - where - extractImportName line = - let trimmed = dropWhile (== ' ') line - afterImport = dropWhile (== ' ') $ drop 7 trimmed - isQualified = "qualified " `isPrefixOf` afterImport - afterQual = if isQualified then dropWhile (== ' ') $ drop 9 afterImport else afterImport - name = takeWhile (\c -> isAlphaNum c || c `elem` ("._" :: String)) afterQual - in if null name then "Unknown" else name - --- | Parse top-level declaration names from a Haskell source file -parseHaskellDecls :: String -> [String] -parseHaskellDecls content = - [ extractDeclName line - | line <- lines content - , isTopLevelDecl line - ] - where - isTopLevelDecl line = - let trimmed = dropWhile (== ' ') line - in not (null trimmed) - && case trimmed of (c:_) -> c `notElem` ("-{-#" :: String); [] -> False - && not ("module " `isPrefixOf` trimmed) - && not ("import " `isPrefixOf` trimmed) - && not ("where" `isPrefixOf` trimmed) - && not ("deriving" `isPrefixOf` trimmed) - && not ("else" `isPrefixOf` trimmed) - && not ("then" `isPrefixOf` trimmed) - && not ("in " `isPrefixOf` trimmed) - && not ("do" == trimmed) - && not ("let" `isPrefixOf` trimmed) - && not ("=" == dropWhile (/= '=') trimmed) - - extractDeclName line = - let trimmed = dropWhile (== ' ') line - -- Handle data/newtype/class/type/instance - (_prefixLen, rest) = case trimmed of - s | "data " `isPrefixOf` s -> (5 :: Int, drop 5 s) - | "newtype " `isPrefixOf` s -> (8, drop 8 s) - | "type " `isPrefixOf` s -> (5, drop 5 s) - | "class " `isPrefixOf` s -> (6, drop 6 s) - | "instance " `isPrefixOf` s -> (9, drop 9 s) - | "type family " `isPrefixOf` s -> (12, drop 12 s) - | "data family " `isPrefixOf` s -> (12, drop 12 s) - | otherwise -> (0, s) - nameRest = dropWhile (== ' ') rest - name = takeWhile (\c -> isAlphaNum c || c `elem` ("'_" :: String)) nameRest - in if null name - then take 20 trimmed - else name - --- | Sequential concatMapM (since it's not in base) +-- | Sequential concatMapM concatMapM :: Monad m => (a -> m [b]) -> [a] -> m [b] concatMapM f = fmap concat . mapM f -- | Build an Extraction from a file's DocumentSymbolResults. --- Reuses the same symbol→node/edge conversion as extractViaLSP. extractionFromSymbols :: FilePath -> [DocumentSymbolResult] -> Extraction extractionFromSymbols filePath symbols = let nodes = symbolToNodes filePath symbols @@ -412,204 +201,4 @@ extractionFromSymbols filePath symbols = in emptyExtraction { extractionNodes = nodes , extractionEdges = edges - } - --- ─────────────────────────────────────────────── --- Document extraction (Markdown, texts) --- ─────────────────────────────────────────────── - --- | Extract concepts and relationships from a document file. --- Parses headers as nodes, wikilinks/links/tags as edges. -extractDocFile :: LogEnv -> FilePath -> IO Extraction -extractDocFile env filePath = catch (do - content <- readFile filePath - let allNodes = docNodes filePath content - allEdges = docEdges filePath content allNodes - logDebug env $ T.pack $ " [doc] " ++ filePath ++ " → " ++ show (length allNodes) ++ " nodes, " ++ show (length allEdges) ++ " edges" - pure emptyExtraction - { extractionNodes = allNodes - , extractionEdges = allEdges - } - ) $ \(_ :: SomeException) -> do - logDebug env $ T.pack $ " [doc] " ++ filePath ++ " → stub (read error)" - pure emptyExtraction { extractionNodes = [makeStubNode filePath] } - --- | Parse a document for nodes: file node, headers, tags -docNodes :: FilePath -> String -> [Node] -docNodes filePath content = - let fileNode = docFileNode filePath - headerNodes = docHeaderNodes filePath content - tagNodes = docTagNodes filePath content - in fileNode : headerNodes ++ tagNodes - --- | Create a file-level node for a document -docFileNode :: FilePath -> Node -docFileNode filePath = - let name = T.pack $ takeWhile (/= '.') $ reverse $ takeWhile (/= '/') $ reverse filePath - dirPart = reverse $ dropWhile (/= '/') $ reverse filePath - dirHash = abs (T.foldl' (\acc c -> acc * 31 + fromEnum c) (0 :: Int) (T.pack dirPart) `mod` 65536) - nid = T.pack (show dirHash) <> T.pack "_doc_" <> name - in Node - { nodeId = nid - , nodeLabel = name - , nodeFileType = DocumentFile - , nodeSourceFile = T.pack filePath - , nodeSourceLocation = Just "L1" - , nodeLineEnd = Nothing - , nodeKind = Just "File" - , nodeSignature = Nothing - , nodeSourceUrl = Nothing - , nodeCapturedAt = Nothing - , nodeAuthor = Nothing - , nodeContributor = Nothing - } - --- | Parse headers (## Title) as nodes -docHeaderNodes :: FilePath -> String -> [Node] -docHeaderNodes filePath content = - [ mkHeaderNode filePath level titleText lineNum - | (lineNum, line) <- zip [1..] (lines content) - , Just (level, titleText) <- [parseHeader line] - , level <= 4 -- only H1-H4 - ] - --- | Parse a markdown header line, returns (level, title) -parseHeader :: String -> Maybe (Int, String) -parseHeader line = - let trimmed = dropWhile (== ' ') line - in case trimmed of - '#':'#':'#':'#':rest -> Just (4, dropWhile (== ' ') rest) - '#':'#':'#':rest -> Just (3, dropWhile (== ' ') rest) - '#':'#':rest -> Just (2, dropWhile (== ' ') rest) - '#':rest -> Just (1, dropWhile (== ' ') rest) - _ -> Nothing - --- | Create a node for a header -mkHeaderNode :: FilePath -> Int -> String -> Int -> Node -mkHeaderNode filePath level title lineNum = - let dirPart = reverse $ dropWhile (/= '/') $ reverse filePath - dirHash = abs (T.foldl' (\acc c -> acc * 31 + fromEnum c) (0 :: Int) (T.pack dirPart) `mod` 65536) - cleanTitle = T.pack $ takeWhile (\c -> isAlphaNum c || c `elem` (" -'_/" :: String)) title - nid = T.pack (show dirHash) <> T.pack "_h" <> T.pack (show level) <> T.pack "_" <> cleanTitle - in Node - { nodeId = nid - , nodeLabel = cleanTitle - , nodeFileType = DocumentFile - , nodeSourceFile = T.pack filePath - , nodeSourceLocation = Just (T.pack $ "L" ++ show lineNum) - , nodeLineEnd = Nothing - , nodeKind = Just "Header" - , nodeSignature = Nothing - , nodeSourceUrl = Nothing - , nodeCapturedAt = Nothing - , nodeAuthor = Nothing - , nodeContributor = Nothing - } - --- | Parse tags (#tag or #tag/sub) as nodes -docTagNodes :: FilePath -> String -> [Node] -docTagNodes filePath content = - let tags = nub $ parseTags content - in [ mkTagNode filePath tag | tag <- tags ] - where - parseTags :: String -> [String] - parseTags txt = [ tag | tag <- extractTags txt, not (isHeaderTag txt tag) ] - --- | Extract #tags from text (but exclude # headers) -extractTags :: String -> [String] -extractTags text = - [ tag - | (i, ch) <- zip [0..] text - , ch == '#' - , i > 0 -- not start of line header - , let prev = if i > 0 then text !! (i-1) else ' ' - , prev == ' ' || prev == '\n' || prev == ',' - , let afterHash = takeWhile (\ch' -> isAlphaNum ch' || ch' `elem` ("_/-" :: String)) (drop (i+1) text) - , not (null afterHash) - , case afterHash of (c':_) -> c' `notElem` (" " :: String); [] -> True -- not a header - , let tag = afterHash - , length tag >= 2 - ] - -isHeaderTag :: String -> String -> Bool -isHeaderTag _ _ = False -- simplified: all #tags are tags, headers are parsed separately - --- | Create a node for a tag -mkTagNode :: FilePath -> String -> Node -mkTagNode filePath tag = - let dirPart = reverse $ dropWhile (/= '/') $ reverse filePath - dirHash = abs (T.foldl' (\acc c -> acc * 31 + fromEnum c) (0 :: Int) (T.pack dirPart) `mod` 65536) - nid = T.pack (show dirHash) <> T.pack "_tag_" <> T.pack tag - in Node - { nodeId = nid - , nodeLabel = T.pack $ "#" ++ tag - , nodeFileType = DocumentFile - , nodeSourceFile = T.pack filePath - , nodeSourceLocation = Nothing - , nodeLineEnd = Nothing - , nodeKind = Just "Tag" - , nodeSignature = Nothing - , nodeSourceUrl = Nothing - , nodeCapturedAt = Nothing - , nodeAuthor = Nothing - , nodeContributor = Nothing - } - --- | Build edges: file→header (contains), file→tag (tags), wikilinks (references) -docEdges :: FilePath -> String -> [Node] -> [Edge] -docEdges filePath content nodes = - let dirPart = reverse $ dropWhile (/= '/') $ reverse filePath - dirHash = abs (T.foldl' (\acc c -> acc * 31 + fromEnum c) (0 :: Int) (T.pack dirPart) `mod` 65536) - fileNid = T.pack (show dirHash) <> T.pack "_doc_" <> T.pack (takeWhile (/= '.') (reverse $ takeWhile (/= '/') $ reverse filePath)) - -- file → header edges (Contains) - headerEdges = [ Edge - { edgeSource = fileNid - , edgeTarget = nodeId n - , edgeRelation = Contains - , edgeConfidence = Extracted - , edgeConfidenceScore = 1.0 - , edgeSourceFile = T.pack filePath - , edgeSourceLocation = Nothing - , edgeWeight = 1.0 - } - | n <- nodes - , T.isInfixOf (T.pack "_h") (nodeId n) -- header nodes - ] - -- file → tag edges (Tags) - tagEdges = [ Edge - { edgeSource = fileNid - , edgeTarget = nodeId n - , edgeRelation = References - , edgeConfidence = Extracted - , edgeConfidenceScore = 1.0 - , edgeSourceFile = T.pack filePath - , edgeSourceLocation = Nothing - , edgeWeight = 1.0 - } - | n <- nodes - , T.isInfixOf (T.pack "_tag_") (nodeId n) -- tag nodes - ] - -- wikilink edges [[Target]] → file node of target - wikilinkEdges = [ Edge - { edgeSource = fileNid - , edgeTarget = T.pack target -- best-effort: link to target name - , edgeRelation = References - , edgeConfidence = Extracted - , edgeConfidenceScore = 0.8 - , edgeSourceFile = T.pack filePath - , edgeSourceLocation = Nothing - , edgeWeight = 0.8 - } - | target <- parseWikiLinks content - ] - in headerEdges ++ tagEdges ++ wikilinkEdges - --- | Parse [[wikilinks]] from markdown content -parseWikiLinks :: String -> [String] -parseWikiLinks content = - [ T.unpack $ T.strip $ T.takeWhile (/= '|') $ T.drop 2 t - | t <- T.splitOn (T.pack "[[") (T.pack content) - , T.isInfixOf (T.pack "]]") t - , let linkText = T.takeWhile (/= ']') t - , not (T.null linkText) - ] \ No newline at end of file + } \ No newline at end of file diff --git a/src/Graphos/UseCase/Extract/Haskell.hs b/src/Graphos/UseCase/Extract/Haskell.hs new file mode 100644 index 0000000..248dd8e --- /dev/null +++ b/src/Graphos/UseCase/Extract/Haskell.hs @@ -0,0 +1,212 @@ +-- | Haskell stub extraction — parses module name, imports, and top-level declarations. +-- Used as fallback when HLS returns 0 symbols (e.g., not yet indexed). +{-# LANGUAGE ScopedTypeVariables #-} +module Graphos.UseCase.Extract.Haskell + ( extractHaskellStub + , makeStubNode + , parseHaskellModule + , parseHaskellImports + , parseHaskellDecls + , isTopLevelDecl + , extractDeclName + , extractImportName + ) where + +import Control.Exception (SomeException, catch) +import Data.Char (isAlphaNum) +import Data.List (find, isPrefixOf) +import qualified Data.Text as T + +import Graphos.Domain.Types + +-- | Create a stub node when no LSP is available +makeStubNode :: FilePath -> Node +makeStubNode filePath = + let name = T.pack $ takeWhile (/= '.') $ reverse $ takeWhile (/= '/') $ reverse filePath + dirPart = reverse $ dropWhile (/= '/') $ reverse filePath + dirHash = abs (T.foldl' (\acc c -> acc * 31 + fromEnum c) (0 :: Int) (T.pack dirPart) `mod` 65536) + hashPrefix = T.pack $ show dirHash + nodeId' = hashPrefix <> T.pack "_" <> name + in Node + { nodeId = nodeId' + , nodeLabel = name + , nodeFileType = CodeFile + , nodeSourceFile = T.pack filePath + , nodeSourceLocation = Nothing + , nodeLineEnd = Nothing + , nodeKind = Nothing + , nodeSignature = Nothing + , nodeSourceUrl = Nothing + , nodeCapturedAt = Nothing + , nodeAuthor = Nothing + , nodeContributor = Nothing + } + +-- | Haskell-aware stub extraction +extractHaskellStub :: FilePath -> IO Extraction +extractHaskellStub filePath = catch (do + content <- readFile filePath + let allNodes = haskellStubNodes filePath content + edges = haskellStubEdges filePath allNodes + pure emptyExtraction + { extractionNodes = allNodes + , extractionEdges = edges + } + ) $ \(_ :: SomeException) -> pure emptyExtraction + { extractionNodes = [makeStubNode filePath] } + +-- | Parse Haskell source for module name, imports, and top-level decl names +haskellStubNodes :: FilePath -> String -> [Node] +haskellStubNodes filePath content = + let dirPart = reverse $ dropWhile (/= '/') $ reverse filePath + dirHash = abs (T.foldl' (\acc c -> acc * 31 + fromEnum c) (0 :: Int) (T.pack dirPart) `mod` 65536) + hashPrefix = T.pack $ show dirHash + modName = parseHaskellModule content + imports = parseHaskellImports content + decls = parseHaskellDecls content + modNode = case modName of + Just mn -> + [ Node + { nodeId = hashPrefix <> T.pack "_" <> T.pack mn + , nodeLabel = T.pack mn + , nodeFileType = CodeFile + , nodeSourceFile = T.pack filePath + , nodeSourceLocation = Just "L1" + , nodeLineEnd = Nothing + , nodeKind = Just "Module" + , nodeSignature = Nothing + , nodeSourceUrl = Nothing + , nodeCapturedAt = Nothing + , nodeAuthor = Nothing + , nodeContributor = Nothing + } + ] + Nothing -> [] + impNodes = [ Node + { nodeId = hashPrefix <> T.pack "_import_" <> T.pack imp + , nodeLabel = T.pack imp + , nodeFileType = CodeFile + , nodeSourceFile = T.pack filePath + , nodeSourceLocation = Nothing + , nodeLineEnd = Nothing + , nodeKind = Just "Module" + , nodeSignature = Nothing + , nodeSourceUrl = Nothing + , nodeCapturedAt = Nothing + , nodeAuthor = Nothing + , nodeContributor = Nothing + } + | imp <- imports + ] + declNodes = [ Node + { nodeId = hashPrefix <> T.pack "_" <> T.pack decl + , nodeLabel = T.pack decl + , nodeFileType = CodeFile + , nodeSourceFile = T.pack filePath + , nodeSourceLocation = Nothing + , nodeLineEnd = Nothing + , nodeKind = Nothing + , nodeSignature = Nothing + , nodeSourceUrl = Nothing + , nodeCapturedAt = Nothing + , nodeAuthor = Nothing + , nodeContributor = Nothing + } + | decl <- decls + ] + in modNode ++ impNodes ++ declNodes + +-- | Build edges from Haskell stub nodes: module→import, module→decl +haskellStubEdges :: FilePath -> [Node] -> [Edge] +haskellStubEdges filePath nodes = + let modNodeM = find (\n -> not ("_import_" `T.isInfixOf` nodeId n)) nodes + in case modNodeM of + Just mn -> + let otherNodes = filter (\n -> nodeId n /= nodeId mn) nodes + in [ Edge + { edgeSource = nodeId mn + , edgeTarget = nodeId other + , edgeRelation = Imports + , edgeConfidence = Ambiguous + , edgeConfidenceScore = 0.7 + , edgeSourceFile = T.pack filePath + , edgeSourceLocation = nodeSourceLocation mn + , edgeWeight = 0.7 + } + | other <- otherNodes + ] + Nothing -> [] + +{- | Parse the module name from a Haskell source file -} +parseHaskellModule :: String -> Maybe String +parseHaskellModule content = + case [line | line <- lines content, "module " `isPrefixOf` dropWhile (== ' ') line] of + (line:_) -> Just $ extractModuleName line + [] -> Nothing + where + extractModuleName line = + let afterModule = dropWhile (== ' ') $ drop 7 line + name = takeWhile (\c -> isAlphaNum c || c `elem` ("._" :: String)) afterModule + in if null name then "Main" else name + +-- | Parse import declarations from a Haskell source file +parseHaskellImports :: String -> [String] +parseHaskellImports content = + [ extractImportName line + | line <- lines content + , "import " `isPrefixOf` dropWhile (== ' ') line + ] + +-- | Extract import name from a line +extractImportName :: String -> String +extractImportName line = + let trimmed = dropWhile (== ' ') line + afterImport = dropWhile (== ' ') $ drop 7 trimmed + isQualified = "qualified " `isPrefixOf` afterImport + afterQual = if isQualified then dropWhile (== ' ') $ drop 9 afterImport else afterImport + name = takeWhile (\c -> isAlphaNum c || c `elem` ("._" :: String)) afterQual + in if null name then "Unknown" else name + +-- | Parse top-level declaration names from a Haskell source file +parseHaskellDecls :: String -> [String] +parseHaskellDecls content = + [ extractDeclName line + | line <- lines content + , isTopLevelDecl line + ] + +-- | Check if a line is a top-level declaration +isTopLevelDecl :: String -> Bool +isTopLevelDecl line = + let trimmed = dropWhile (== ' ') line + in not (null trimmed) + && case trimmed of (c:_) -> c `notElem` ("-{-#" :: String); [] -> False + && not ("module " `isPrefixOf` trimmed) + && not ("import " `isPrefixOf` trimmed) + && not ("where" `isPrefixOf` trimmed) + && not ("deriving" `isPrefixOf` trimmed) + && not ("else" `isPrefixOf` trimmed) + && not ("then" `isPrefixOf` trimmed) + && not ("in " `isPrefixOf` trimmed) + && not ("do" == trimmed) + && not ("let" `isPrefixOf` trimmed) + && not ("=" == dropWhile (/= '=') trimmed) + +-- | Extract declaration name from a line +extractDeclName :: String -> String +extractDeclName line = + let trimmed = dropWhile (== ' ') line + (_prefixLen, rest) = case trimmed of + s | "data " `isPrefixOf` s -> (5 :: Int, drop 5 s) + | "newtype " `isPrefixOf` s -> (8, drop 8 s) + | "type " `isPrefixOf` s -> (5, drop 5 s) + | "class " `isPrefixOf` s -> (6, drop 6 s) + | "instance " `isPrefixOf` s -> (9, drop 9 s) + | "type family " `isPrefixOf` s -> (12, drop 12 s) + | "data family " `isPrefixOf` s -> (12, drop 12 s) + | otherwise -> (0, s) + nameRest = dropWhile (== ' ') rest + name = takeWhile (\c -> isAlphaNum c || c `elem` ("'_" :: String)) nameRest + in if null name + then take 20 trimmed + else name \ No newline at end of file diff --git a/src/Graphos/UseCase/Extract/Markdown.hs b/src/Graphos/UseCase/Extract/Markdown.hs new file mode 100644 index 0000000..f16856d --- /dev/null +++ b/src/Graphos/UseCase/Extract/Markdown.hs @@ -0,0 +1,207 @@ +-- | Markdown/document extraction — headers, tags, wikilinks. +-- Parses headers as nodes, wikilinks/links/tags as edges. +{-# LANGUAGE ScopedTypeVariables #-} +module Graphos.UseCase.Extract.Markdown + ( extractDocFile + , docFileNode + , mkHeaderNode + , mkTagNode + , parseHeader + , parseWikiLinks + , extractTags + ) where + +import Control.Exception (SomeException, catch) +import Data.Char (isAlphaNum) +import Data.List (nub) +import qualified Data.Text as T + +import Graphos.Domain.Types +import Graphos.Infrastructure.Logging (LogEnv, logDebug) +import Graphos.UseCase.Extract.Haskell (makeStubNode) + +-- | Extract concepts and relationships from a document file. +extractDocFile :: LogEnv -> FilePath -> IO Extraction +extractDocFile env filePath = catch (do + content <- readFile filePath + let allNodes = docNodes filePath content + allEdges = docEdges filePath content allNodes + logDebug env $ T.pack $ " [doc] " ++ filePath ++ " → " ++ show (length allNodes) ++ " nodes, " ++ show (length allEdges) ++ " edges" + pure emptyExtraction + { extractionNodes = allNodes + , extractionEdges = allEdges + } + ) $ \(_ :: SomeException) -> do + logDebug env $ T.pack $ " [doc] " ++ filePath ++ " → stub (read error)" + pure emptyExtraction { extractionNodes = [makeStubNode filePath] } + +-- | Parse a document for nodes: file node, headers, tags +docNodes :: FilePath -> String -> [Node] +docNodes filePath content = + let fileNode = docFileNode filePath + headerNodes = docHeaderNodes filePath content + tagNodes = docTagNodes filePath content + in fileNode : headerNodes ++ tagNodes + +-- | Create a file-level node for a document +docFileNode :: FilePath -> Node +docFileNode filePath = + let name = T.pack $ takeWhile (/= '.') $ reverse $ takeWhile (/= '/') $ reverse filePath + dirPart = reverse $ dropWhile (/= '/') $ reverse filePath + dirHash = abs (T.foldl' (\acc c -> acc * 31 + fromEnum c) (0 :: Int) (T.pack dirPart) `mod` 65536) + nid = T.pack (show dirHash) <> T.pack "_doc_" <> name + in Node + { nodeId = nid + , nodeLabel = name + , nodeFileType = DocumentFile + , nodeSourceFile = T.pack filePath + , nodeSourceLocation = Just "L1" + , nodeLineEnd = Nothing + , nodeKind = Just "File" + , nodeSignature = Nothing + , nodeSourceUrl = Nothing + , nodeCapturedAt = Nothing + , nodeAuthor = Nothing + , nodeContributor = Nothing + } + +-- | Parse headers (## Title) as nodes +docHeaderNodes :: FilePath -> String -> [Node] +docHeaderNodes filePath content = + [ mkHeaderNode filePath level titleText lineNum + | (lineNum, line) <- zip [1..] (lines content) + , Just (level, titleText) <- [parseHeader line] + , level <= 4 + ] + +-- | Parse a markdown header line, returns (level, title) +parseHeader :: String -> Maybe (Int, String) +parseHeader line = + let trimmed = dropWhile (== ' ') line + in case trimmed of + '#':'#':'#':'#':rest -> Just (4, dropWhile (== ' ') rest) + '#':'#':'#':rest -> Just (3, dropWhile (== ' ') rest) + '#':'#':rest -> Just (2, dropWhile (== ' ') rest) + '#':rest -> Just (1, dropWhile (== ' ') rest) + _ -> Nothing + +-- | Create a node for a header +mkHeaderNode :: FilePath -> Int -> String -> Int -> Node +mkHeaderNode filePath level title lineNum = + let dirPart = reverse $ dropWhile (/= '/') $ reverse filePath + dirHash = abs (T.foldl' (\acc c -> acc * 31 + fromEnum c) (0 :: Int) (T.pack dirPart) `mod` 65536) + cleanTitle = T.pack $ takeWhile (\c -> isAlphaNum c || c `elem` (" -'_/" :: String)) title + nid = T.pack (show dirHash) <> T.pack "_h" <> T.pack (show level) <> T.pack "_" <> cleanTitle + in Node + { nodeId = nid + , nodeLabel = cleanTitle + , nodeFileType = DocumentFile + , nodeSourceFile = T.pack filePath + , nodeSourceLocation = Just (T.pack $ "L" ++ show lineNum) + , nodeLineEnd = Nothing + , nodeKind = Just "Header" + , nodeSignature = Nothing + , nodeSourceUrl = Nothing + , nodeCapturedAt = Nothing + , nodeAuthor = Nothing + , nodeContributor = Nothing + } + +-- | Parse tags (#tag or #tag/sub) as nodes +docTagNodes :: FilePath -> String -> [Node] +docTagNodes filePath content = + let tags = nub $ extractTags content + in [ mkTagNode filePath tag | tag <- tags ] + +-- | Extract #tags from text (but exclude # headers) +extractTags :: String -> [String] +extractTags text = + [ tag + | (i, ch) <- zip [0..] text + , ch == '#' + , i > 0 + , let prev = if i > 0 then text !! (i-1) else ' ' + , prev == ' ' || prev == '\n' || prev == ',' + , let afterHash = takeWhile (\ch' -> isAlphaNum ch' || ch' `elem` ("_/-" :: String)) (drop (i+1) text) + , not (null afterHash) + , case afterHash of (c':_) -> c' `notElem` (" " :: String); [] -> True + , let tag = afterHash + , length tag >= 2 + ] + +-- | Create a node for a tag +mkTagNode :: FilePath -> String -> Node +mkTagNode filePath tag = + let dirPart = reverse $ dropWhile (/= '/') $ reverse filePath + dirHash = abs (T.foldl' (\acc c -> acc * 31 + fromEnum c) (0 :: Int) (T.pack dirPart) `mod` 65536) + nid = T.pack (show dirHash) <> T.pack "_tag_" <> T.pack tag + in Node + { nodeId = nid + , nodeLabel = T.pack $ "#" ++ tag + , nodeFileType = DocumentFile + , nodeSourceFile = T.pack filePath + , nodeSourceLocation = Nothing + , nodeLineEnd = Nothing + , nodeKind = Just "Tag" + , nodeSignature = Nothing + , nodeSourceUrl = Nothing + , nodeCapturedAt = Nothing + , nodeAuthor = Nothing + , nodeContributor = Nothing + } + +-- | Build edges: file→header (contains), file→tag (tags), wikilinks (references) +docEdges :: FilePath -> String -> [Node] -> [Edge] +docEdges filePath content nodes = + let dirPart = reverse $ dropWhile (/= '/') $ reverse filePath + dirHash = abs (T.foldl' (\acc c -> acc * 31 + fromEnum c) (0 :: Int) (T.pack dirPart) `mod` 65536) + fileNid = T.pack (show dirHash) <> T.pack "_doc_" <> T.pack (takeWhile (/= '.') (reverse $ takeWhile (/= '/') $ reverse filePath)) + headerEdges = [ Edge + { edgeSource = fileNid + , edgeTarget = nodeId n + , edgeRelation = Contains + , edgeConfidence = Extracted + , edgeConfidenceScore = 1.0 + , edgeSourceFile = T.pack filePath + , edgeSourceLocation = Nothing + , edgeWeight = 1.0 + } + | n <- nodes + , T.isInfixOf (T.pack "_h") (nodeId n) + ] + tagEdges = [ Edge + { edgeSource = fileNid + , edgeTarget = nodeId n + , edgeRelation = References + , edgeConfidence = Extracted + , edgeConfidenceScore = 1.0 + , edgeSourceFile = T.pack filePath + , edgeSourceLocation = Nothing + , edgeWeight = 1.0 + } + | n <- nodes + , T.isInfixOf (T.pack "_tag_") (nodeId n) + ] + wikilinkEdges = [ Edge + { edgeSource = fileNid + , edgeTarget = T.pack target + , edgeRelation = References + , edgeConfidence = Extracted + , edgeConfidenceScore = 0.8 + , edgeSourceFile = T.pack filePath + , edgeSourceLocation = Nothing + , edgeWeight = 0.8 + } + | target <- parseWikiLinks content + ] + in headerEdges ++ tagEdges ++ wikilinkEdges + +-- | Parse [[wikilinks]] from markdown content +parseWikiLinks :: String -> [String] +parseWikiLinks content = + [ T.unpack $ T.strip $ T.takeWhile (/= '|') $ T.drop 2 t + | t <- T.splitOn (T.pack "[[") (T.pack content) + , T.isInfixOf (T.pack "]]") t + , let linkText = T.takeWhile (/= ']') t + , not (T.null linkText) + ] \ No newline at end of file diff --git a/src/Graphos/UseCase/Infer.hs b/src/Graphos/UseCase/Infer.hs index fede13d..4cc9f53 100644 --- a/src/Graphos/UseCase/Infer.hs +++ b/src/Graphos/UseCase/Infer.hs @@ -6,11 +6,14 @@ -- * Shared neighbors → SharesDataWith -- * Transitive dependencies → DependsOn -- * Bridge nodes (articulation points) → RationaleFor +-- * Code ↔ Documentation linking → References module Graphos.UseCase.Infer ( -- * Community-bridging inference inferCommunityBridges , inferTransitiveDeps , inferSharedContextEdges + -- * Code↔Doc linking + , inferCodeDocEdges -- * Density-controlled inference , inferEdges -- * Bridge node classification @@ -22,6 +25,7 @@ import Data.List (sortOn, nubBy) import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import qualified Data.Set as Set +import Data.Text (Text) import qualified Data.Text as T import Graphos.Domain.Types @@ -203,9 +207,96 @@ makeSharedDataEdge src tgt sharedCount = Edge } -- ─────────────────────────────────────────────── --- Density-controlled inference +-- Code ↔ Documentation linking -- ─────────────────────────────────────────────── +-- | Infer edges between code nodes and documentation nodes. +-- +-- Two linking strategies: +-- +-- 1. **Name alignment**: A doc file references a module/class name +-- that exists as a code node. E.g. docs mention "Boond.Types" +-- and there's a code node with label "Boond.Types". +-- +-- 2. **Path alignment**: A doc file lives next to a code file. +-- E.g. @src/Foo.hs@ ↔ @docs/Foo.md@ share the base name "Foo". +-- +-- All inferred edges use 'References' relation with 'Inferred' confidence. +inferCodeDocEdges :: Graph -> [Edge] +inferCodeDocEdges g = + let allNodes = Map.toList (gNodes g) + docNodes = [(nid, n) | (nid, n) <- allNodes, nodeFileType n == DocumentFile] + codeNodes = [(nid, n) | (nid, n) <- allNodes, nodeFileType n == CodeFile] + + -- Build a label → NodeId index for code nodes (fast lookup) + codeLabelIdx :: Map Text [NodeId] + codeLabelIdx = Map.fromListWith (++) + [ (nodeLabel cn, [nid]) + | (nid, cn) <- codeNodes + ] + + -- Also index by source file base name (without extension) + codeBaseIdx :: Map Text [NodeId] + codeBaseIdx = Map.fromListWith (++) + [ (fileBaseName (nodeSourceFile cn), [nid]) + | (nid, cn) <- codeNodes + , not (T.null (nodeSourceFile cn)) + ] + + -- Strategy 1: Name alignment + -- Doc nodes whose label matches a code node label → References edge + nameAlignEdges = + [ makeCodeDocEdge codeNid docNid "name-alignment" + | (docNid, dn) <- docNodes + , codeNid <- Map.findWithDefault [] (nodeLabel dn) codeLabelIdx + , notEdgeAlready g docNid codeNid + ] + + -- Strategy 2: Path alignment + -- Doc file "Foo.md" ↔ Code file "Foo.hs" share base name + pathAlignEdges = + [ makeCodeDocEdge codeNid docNid "path-alignment" + | (docNid, dn) <- docNodes + , not (T.null (nodeSourceFile dn)) + , let docBase = fileBaseName (nodeSourceFile dn) + , not (T.null docBase) + , codeNid <- Map.findWithDefault [] docBase codeBaseIdx + , notEdgeAlready g docNid codeNid + ] + + in nubBy (\a b -> edgeSource a == edgeSource b && edgeTarget a == edgeTarget b) + (nameAlignEdges ++ pathAlignEdges) + +-- | Extract the base name of a file path (without directory or extension). +-- "src/Foo/Bar.hs" → "Bar" +fileBaseName :: Text -> Text +fileBaseName path = + let -- Take after last '/' + filename = case T.breakOnEnd "/" path of + (_, f) | not (T.null f) -> T.dropWhile (== '/') f + _ -> path + -- Drop extension (last .xxx segment) + base = case T.breakOnEnd "." filename of + (_, ext) | not (T.null ext) && T.length ext <= 5 -> + case T.breakOnEnd "." (T.dropEnd (T.length ext + 1) filename) of + (_, b) | not (T.null b) -> b + _ -> filename + _ -> filename + in base + +-- | Make a code↔doc edge +makeCodeDocEdge :: NodeId -> NodeId -> Text -> Edge +makeCodeDocEdge src tgt strategy = Edge + { edgeSource = src + , edgeTarget = tgt + , edgeRelation = References + , edgeConfidence = Inferred + , edgeConfidenceScore = 0.7 + , edgeSourceFile = "inferred:code-doc-" <> strategy + , edgeSourceLocation = Nothing + , edgeWeight = 0.7 + } + -- | Infer edges based on density level. -- -- * Sparse: No inferred edges at all (only what was extracted) @@ -213,7 +304,7 @@ makeSharedDataEdge src tgt sharedCount = Edge -- * Dense: Normal + shared-context edges (min 3 shared neighbors) -- * Maximum: Dense + shared-context edges (min 2 shared neighbors) inferEdges :: EdgeDensity -> Graph -> CommunityMap -> [Edge] -inferEdges Sparse _ _ = [] -inferEdges Normal g cm = inferCommunityBridges g cm ++ inferTransitiveDeps g -inferEdges Dense g cm = inferCommunityBridges g cm ++ inferTransitiveDeps g ++ inferSharedContextEdges g 3 -inferEdges Maximum g cm = inferCommunityBridges g cm ++ inferTransitiveDeps g ++ inferSharedContextEdges g 2 \ No newline at end of file +inferEdges Sparse g _ = inferCodeDocEdges g +inferEdges Normal g cm = inferCommunityBridges g cm ++ inferTransitiveDeps g ++ inferCodeDocEdges g +inferEdges Dense g cm = inferCommunityBridges g cm ++ inferTransitiveDeps g ++ inferSharedContextEdges g 3 ++ inferCodeDocEdges g +inferEdges Maximum g cm = inferCommunityBridges g cm ++ inferTransitiveDeps g ++ inferSharedContextEdges g 2 ++ inferCodeDocEdges g \ No newline at end of file diff --git a/tests/Graphos/Infrastructure/LSP/TransportSpec.hs b/tests/Graphos/Infrastructure/LSP/TransportSpec.hs new file mode 100644 index 0000000..2d91eb8 --- /dev/null +++ b/tests/Graphos/Infrastructure/LSP/TransportSpec.hs @@ -0,0 +1,91 @@ +module Graphos.Infrastructure.LSP.TransportSpec where + +import Control.Concurrent.MVar (newMVar) +import Data.IORef (newIORef, readIORef) +import System.Process (ProcessHandle, createProcess, proc, terminateProcess, StdStream(CreatePipe), std_in, std_out, std_err) +import Test.Hspec + +import Graphos.Infrastructure.LSP.Protocol (ServerCapabilities(..), lspShutdown) +import Graphos.Infrastructure.LSP.Transport + ( LSPClient(..) + , LSPClientConfig(..) + , isProcessAlive + , markDisconnected + , sendLSPMessageSafe + ) + +-- | Build a minimal LSPClient for testing connection-state logic. +-- We spawn a short-lived process (cat) so we can observe the alive check. +mkTestClient :: IO LSPClient +mkTestClient = do + (Just inh, Just outh, _, ph) <- createProcess (proc "cat" []) + { std_in = CreatePipe + , std_out = CreatePipe + , std_err = CreatePipe + } + idVar <- newMVar 100 + connectedRef <- newIORef True + pure LSPClient + { lspHandle = ph + , lspStdin = inh + , lspStdout = outh + , lspConfig = LSPClientConfig "" [] "" 5 + , lspMessageId = idVar + , lspServerCaps = ServerCapabilities False False False False False + , lspConnected = connectedRef + } + +spec :: Spec +spec = do + describe "isProcessAlive" $ do + it "returns True for a running process" $ do + client <- mkTestClient + alive <- isProcessAlive (lspHandle client) + alive `shouldBe` True + terminateProcess (lspHandle client) + + it "returns False after the process is terminated" $ do + client <- mkTestClient + terminateProcess (lspHandle client) + -- Give the OS a moment to reap the process + _ <- waitForExit (lspHandle client) + alive <- isProcessAlive (lspHandle client) + alive `shouldBe` False + + describe "markDisconnected" $ do + it "sets lspConnected to False" $ do + client <- mkTestClient + readIORef (lspConnected client) `shouldReturn` True + markDisconnected client + readIORef (lspConnected client) `shouldReturn` False + terminateProcess (lspHandle client) + + describe "sendLSPMessageSafe" $ do + it "returns True when the server is alive" $ do + client <- mkTestClient + result <- sendLSPMessageSafe client lspShutdown + result `shouldBe` True + terminateProcess (lspHandle client) + + it "returns False when already marked disconnected" $ do + client <- mkTestClient + markDisconnected client + result <- sendLSPMessageSafe client lspShutdown + result `shouldBe` False + terminateProcess (lspHandle client) + + it "returns False and marks disconnected when the process is dead" $ do + client <- mkTestClient + terminateProcess (lspHandle client) + _ <- waitForExit (lspHandle client) + result <- sendLSPMessageSafe client lspShutdown + result `shouldBe` False + readIORef (lspConnected client) `shouldReturn` False + +-- | Helper: block until the process exits (or timeout after 2s). +waitForExit :: ProcessHandle -> IO () +waitForExit = go + where + go _ph = do + alive <- isProcessAlive _ph + if alive then go _ph else pure () \ No newline at end of file