From a8e0bb93a9c8db4f97556be276240d8ce507973d Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 20 Mar 2026 03:55:09 +0000 Subject: [PATCH] feat: add emojis to all log messages and upgrade silent skip levels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add professional emojis to all 78 slog calls across 7 files for quick visual scanning of log output - Upgrade "skipping binary file" from Debug to Warn so users see when files are silently rejected during indexing - Upgrade "skipping unchanged file/page" from Debug to Info - Fix pdfcpu "config not found" error by disabling pdfcpu's config directory lookup (model.ConfigPath = "disable") Emoji legend: 📄 file ops 🌐 web/crawl 🔍 search 🔗 graph 🧩 pipeline phases 📦 upload 📊 embedding ⚙️ config 🚀 start ✅ done ⏭️ skip ⚠️ warning ❌ error 🛑 shutdown 💾 storage 📋 claims 📂 store https://claude.ai/code/session_011Ryet7uu9j6VyzNGmUuaaj --- cmd/index.go | 18 ++++----- cmd/root.go | 4 +- cmd/serve.go | 14 +++---- internal/api/handlers.go | 16 ++++---- internal/config/config.go | 6 +-- internal/crawler/crawler.go | 30 +++++++-------- internal/loader/pdf.go | 6 +++ internal/pipeline/pipeline.go | 72 +++++++++++++++++------------------ 8 files changed, 86 insertions(+), 80 deletions(-) diff --git a/cmd/index.go b/cmd/index.go index 49047a3..9287c6a 100644 --- a/cmd/index.go +++ b/cmd/index.go @@ -39,12 +39,12 @@ var indexCmd = &cobra.Command{ return fmt.Errorf("llm provider: %w", err) } pl := pipeline.New(st, prov, cfg) - slog.Info("running Phase 3-4: community detection + summaries") + slog.Info("🧩 running Phase 3-4: community detection + summaries") if err := pl.Finalize(cmd.Context(), indexVerbose); err != nil { - slog.Error("finalization failed", "err", err) + slog.Error("❌ finalization failed", "err", err) return err } - slog.Info("finalization complete") + slog.Info("✅ finalization complete") return nil } @@ -63,13 +63,13 @@ var indexCmd = &cobra.Command{ } if indexURL != "" { - slog.Info("crawling documentation site", "url", indexURL, "workers", indexWorkers, + slog.Info("🌐 crawling documentation site", "url", indexURL, "workers", indexWorkers, "max_pages", indexMaxPages, "max_depth", indexMaxDepth) if err := pl.IndexURL(cmd.Context(), indexURL, opts); err != nil { - slog.Error("web indexing failed", "url", indexURL, "err", err) + slog.Error("❌ web indexing failed", "url", indexURL, "err", err) return err } - slog.Info("web indexing complete", "url", indexURL) + slog.Info("✅ web indexing complete", "url", indexURL) return nil } @@ -77,12 +77,12 @@ var indexCmd = &cobra.Command{ return fmt.Errorf("path or --url required (or use --finalize)") } - slog.Info("indexing path", "path", args[0], "workers", indexWorkers, "force", indexForce) + slog.Info("📄 indexing path", "path", args[0], "workers", indexWorkers, "force", indexForce) if err := pl.IndexPath(cmd.Context(), args[0], opts); err != nil { - slog.Error("indexing failed", "path", args[0], "err", err) + slog.Error("❌ indexing failed", "path", args[0], "err", err) return err } - slog.Info("indexing complete", "path", args[0]) + slog.Info("✅ indexing complete", "path", args[0]) return nil }, } diff --git a/cmd/root.go b/cmd/root.go index 13521ac..b44c2c6 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -55,11 +55,11 @@ func initConfig() { var err error cfg, err = config.Load(cfgFile) if err != nil { - slog.Error("config error", "err", err) + slog.Error("❌ config error", "err", err) os.Exit(1) } if err := os.MkdirAll(cfg.DataDir, 0755); err != nil { - slog.Error("failed to create data directory", "path", cfg.DataDir, "err", err) + slog.Error("❌ failed to create data directory", "path", cfg.DataDir, "err", err) os.Exit(1) } } diff --git a/cmd/serve.go b/cmd/serve.go index 9cc7ea3..74f00b3 100644 --- a/cmd/serve.go +++ b/cmd/serve.go @@ -39,13 +39,13 @@ var serveCmd = &cobra.Command{ return fmt.Errorf("open store: %w", err) } defer st.Close() - slog.Info("store opened", "path", cfg.DBPath()) + slog.Info("📂 store opened", "path", cfg.DBPath()) prov, err := llm.NewProvider(&cfg.LLM) if err != nil { return fmt.Errorf("llm provider: %w", err) } - slog.Info("LLM provider initialised", "provider", prov.Name(), "model", prov.ModelID()) + slog.Info("⚙️ LLM provider initialised", "provider", prov.Name(), "model", prov.ModelID()) emb := embedder.New(prov, cfg.Indexing.BatchSize) router := api.NewRouter(st, prov, emb, cfg) @@ -58,7 +58,7 @@ var serveCmd = &cobra.Command{ srv := &http.Server{Handler: router, ReadTimeout: 60 * time.Second, WriteTimeout: 120 * time.Second} - slog.Info("server started", + slog.Info("🚀 server started", "addr", "http://"+addr, "ui", "http://"+addr+"/", "mcp", "http://"+addr+"/mcp", @@ -70,19 +70,19 @@ var serveCmd = &cobra.Command{ go func() { if err := srv.Serve(ln); err != nil && err != http.ErrServerClosed { - slog.Error("server error", "err", err) + slog.Error("❌ server error", "err", err) } }() <-ctx.Done() - slog.Info("shutting down...") + slog.Info("🛑 shutting down...") shutCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() if err := srv.Shutdown(shutCtx); err != nil { - slog.Error("shutdown error", "err", err) + slog.Error("❌ shutdown error", "err", err) return err } - slog.Info("shutdown complete") + slog.Info("✅ shutdown complete") return nil }, } diff --git a/internal/api/handlers.go b/internal/api/handlers.go index d45fce3..4205143 100644 --- a/internal/api/handlers.go +++ b/internal/api/handlers.go @@ -38,7 +38,7 @@ func writeJSON(w http.ResponseWriter, status int, v any) { func writeError(w http.ResponseWriter, r *http.Request, status int, msg string, err error) { if status >= 500 && err != nil { - slog.ErrorContext(r.Context(), "handler error", "path", r.URL.Path, "err", err) + slog.ErrorContext(r.Context(), "❌ handler error", "path", r.URL.Path, "err", err) } writeJSON(w, status, map[string]string{"error": msg}) } @@ -124,7 +124,7 @@ func (h *handlers) search(w http.ResponseWriter, r *http.Request) { req.GraphDepth = 2 } - slog.InfoContext(r.Context(), "search request", "mode", req.Mode, "query", req.Query, "top_k", req.TopK) + slog.InfoContext(r.Context(), "🔍 search request", "mode", req.Mode, "query", req.Query, "top_k", req.TopK) ctx := r.Context() switch req.Mode { @@ -156,7 +156,7 @@ func (h *handlers) graphNeighborhood(w http.ResponseWriter, r *http.Request) { return } - slog.DebugContext(r.Context(), "graph neighborhood request", "entity", name, "depth", depth) + slog.DebugContext(r.Context(), "🔗 graph neighborhood request", "entity", name, "depth", depth) ctx := r.Context() entity, err := h.store.GetEntityByName(ctx, name) @@ -206,7 +206,7 @@ func (h *handlers) graphNeighborhood(w http.ResponseWriter, r *http.Request) { }) } - slog.DebugContext(r.Context(), "graph neighborhood result", "entity", name, "nodes", len(nodes), "edges", len(edges)) + slog.DebugContext(r.Context(), "🔗 graph neighborhood result", "entity", name, "nodes", len(nodes), "edges", len(edges)) writeJSON(w, 200, map[string]any{"nodes": nodes, "edges": edges}) } @@ -297,7 +297,7 @@ func (h *handlers) upload(w http.ResponseWriter, r *http.Request) { } jobID := fmt.Sprintf("job-%d", len(h.jobProgress)) - slog.Info("upload job queued", "job_id", jobID, "files", len(paths)) + slog.Info("📦 upload job queued", "job_id", jobID, "files", len(paths)) h.uploadMu.Lock() h.jobProgress = append(h.jobProgress, fmt.Sprintf("queued: %d files", len(paths))) @@ -307,15 +307,15 @@ func (h *handlers) upload(w http.ResponseWriter, r *http.Request) { defer os.RemoveAll(tmpDir) pl := pipeline.New(h.store, h.provider, h.cfg) for _, p := range paths { - slog.Info("upload indexing file", "job_id", jobID, "file", filepath.Base(p)) + slog.Info("📦 upload indexing file", "job_id", jobID, "file", filepath.Base(p)) h.setProgress(jobID, fmt.Sprintf("indexing: %s", filepath.Base(p))) if err := pl.IndexPath(r.Context(), p, pipeline.IndexOptions{}); err != nil { - slog.Error("upload indexing failed", "job_id", jobID, "file", filepath.Base(p), "err", err) + slog.Error("❌ upload indexing failed", "job_id", jobID, "file", filepath.Base(p), "err", err) h.setProgress(jobID, fmt.Sprintf("error: %v", err)) return } } - slog.Info("upload job complete", "job_id", jobID, "files", len(paths)) + slog.Info("✅ upload job complete", "job_id", jobID, "files", len(paths)) h.setProgress(jobID, "done") }() diff --git a/internal/config/config.go b/internal/config/config.go index 4bda3a0..97eaff7 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -113,14 +113,14 @@ func Load(cfgFile string) (*Config, error) { if err := v.ReadInConfig(); err != nil { if _, ok := err.(viper.ConfigFileNotFoundError); ok { - slog.Warn("no config file found, using defaults", + slog.Warn("⚠️ no config file found, using defaults", "searched_paths", []string{defaultCfgDirLower, defaultCfgDir, "."}, "expected_names", "config.yaml or config.yml") } else { return nil, fmt.Errorf("reading config: %w", err) } } else { - slog.Info("loaded config file", "path", v.ConfigFileUsed()) + slog.Info("⚙️ loaded config file", "path", v.ConfigFileUsed()) } var cfg Config @@ -128,7 +128,7 @@ func Load(cfgFile string) (*Config, error) { return nil, fmt.Errorf("unmarshaling config: %w", err) } - slog.Info("resolved LLM config", "provider", cfg.LLM.Provider) + slog.Info("⚙️ resolved LLM config", "provider", cfg.LLM.Provider) // Expand home dir if strings.HasPrefix(cfg.DataDir, "~/") { diff --git a/internal/crawler/crawler.go b/internal/crawler/crawler.go index 52d1631..a806bea 100644 --- a/internal/crawler/crawler.go +++ b/internal/crawler/crawler.go @@ -56,26 +56,26 @@ func Crawl(ctx context.Context, rootURL string, opts Options) ([]*Page, error) { if !opts.SkipSitemap { urls, err = discoverSitemap(client, base) if err != nil { - slog.Debug("sitemap not found, falling back to BFS", "url", rootURL, "reason", err) + slog.Debug("🔍 sitemap not found, falling back to BFS", "url", rootURL, "reason", err) } else { - slog.Info("sitemap discovered", "url", rootURL, "urls", len(urls)) + slog.Info("🔍 sitemap discovered", "url", rootURL, "urls", len(urls)) } } // Fall back to BFS if len(urls) == 0 { - slog.Info("starting BFS crawl", "url", rootURL, "max_pages", opts.MaxPages, "max_depth", opts.MaxDepth) + slog.Info("🌐 starting BFS crawl", "url", rootURL, "max_pages", opts.MaxPages, "max_depth", opts.MaxDepth) urls = bfsCrawl(ctx, client, base, opts) - slog.Info("BFS crawl complete", "url", rootURL, "pages_found", len(urls)) + slog.Info("✅ BFS crawl complete", "url", rootURL, "pages_found", len(urls)) } // Cap if opts.MaxPages > 0 && len(urls) > opts.MaxPages { - slog.Debug("capping URLs at max_pages", "total", len(urls), "max_pages", opts.MaxPages) + slog.Debug("⏭️ capping URLs at max_pages", "total", len(urls), "max_pages", opts.MaxPages) urls = urls[:opts.MaxPages] } - slog.Info("fetching pages", "count", len(urls), "concurrency", opts.Concurrency) + slog.Info("🌐 fetching pages", "count", len(urls), "concurrency", opts.Concurrency) // Fetch pages concurrently pages := make([]*Page, len(urls)) @@ -91,7 +91,7 @@ func Crawl(ctx context.Context, rootURL string, opts Options) ([]*Page, error) { defer func() { <-sem }() p, err := wl.LoadURL(pageURL) if err != nil { - slog.Debug("failed to fetch page", "url", pageURL, "err", err) + slog.Debug("⚠️ failed to fetch page", "url", pageURL, "err", err) } pages[idx] = p errs[idx] = err @@ -111,9 +111,9 @@ func Crawl(ctx context.Context, rootURL string, opts Options) ([]*Page, error) { } if fetchErrs > 0 { - slog.Warn("some pages failed to fetch", "failed", fetchErrs, "succeeded", len(result)) + slog.Warn("⚠️ some pages failed to fetch", "failed", fetchErrs, "succeeded", len(result)) } - slog.Info("crawl finished", "url", rootURL, "pages_fetched", len(result)) + slog.Info("✅ crawl finished", "url", rootURL, "pages_fetched", len(result)) return result, nil } @@ -141,7 +141,7 @@ func discoverSitemap(client *http.Client, base *url.URL) ([]string, error) { for _, candidate := range candidates { urls, err := parseSitemap(client, candidate, base) if err == nil && len(urls) > 0 { - slog.Debug("sitemap parsed", "url", candidate, "entries", len(urls)) + slog.Debug("🔍 sitemap parsed", "url", candidate, "entries", len(urls)) return urls, nil } } @@ -163,7 +163,7 @@ func parseSitemap(client *http.Client, sitemapURL string, base *url.URL) ([]stri // Try sitemap index first var idx sitemapIndex if err := xml.Unmarshal(body, &idx); err == nil && len(idx.Sitemaps) > 0 { - slog.Debug("sitemap index found", "url", sitemapURL, "sub_sitemaps", len(idx.Sitemaps)) + slog.Debug("🔍 sitemap index found", "url", sitemapURL, "sub_sitemaps", len(idx.Sitemaps)) var all []string for _, s := range idx.Sitemaps { sub, err := parseSitemap(client, s.Loc, base) @@ -202,7 +202,7 @@ func bfsCrawl(ctx context.Context, client *http.Client, base *url.URL, opts Opti for len(queue) > 0 && (opts.MaxPages == 0 || len(found) < opts.MaxPages) { select { case <-ctx.Done(): - slog.Debug("BFS crawl cancelled by context", "pages_found", len(found)) + slog.Debug("🛑 BFS crawl cancelled by context", "pages_found", len(found)) return found default: } @@ -221,7 +221,7 @@ func bfsCrawl(ctx context.Context, client *http.Client, base *url.URL, opts Opti } links := extractLinks(client, item.u, base) - slog.Debug("BFS page links extracted", "url", item.u, "depth", item.depth, "links", len(links)) + slog.Debug("🔗 BFS page links extracted", "url", item.u, "depth", item.depth, "links", len(links)) for _, l := range links { if !visited[l] { queue = append(queue, struct { @@ -238,7 +238,7 @@ func extractLinks(client *http.Client, pageURL string, base *url.URL) []string { resp, err := client.Get(pageURL) if err != nil || resp.StatusCode != http.StatusOK { if err != nil { - slog.Debug("failed to fetch page for link extraction", "url", pageURL, "err", err) + slog.Debug("⚠️ failed to fetch page for link extraction", "url", pageURL, "err", err) } return nil } @@ -246,7 +246,7 @@ func extractLinks(client *http.Client, pageURL string, base *url.URL) []string { doc, err := html.Parse(resp.Body) if err != nil { - slog.Debug("failed to parse HTML", "url", pageURL, "err", err) + slog.Debug("⚠️ failed to parse HTML", "url", pageURL, "err", err) return nil } diff --git a/internal/loader/pdf.go b/internal/loader/pdf.go index fac15fd..b1020b9 100644 --- a/internal/loader/pdf.go +++ b/internal/loader/pdf.go @@ -12,6 +12,12 @@ import ( "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/model" ) +func init() { + // Disable pdfcpu's config directory lookup to avoid "config not found" + // errors when ~/.config/pdfcpu/ does not exist. + model.ConfigPath = "disable" +} + type PDFLoader struct{} func (l *PDFLoader) Supports(ext string) bool { return ext == ".pdf" } diff --git a/internal/pipeline/pipeline.go b/internal/pipeline/pipeline.go index 5500839..d62f267 100644 --- a/internal/pipeline/pipeline.go +++ b/internal/pipeline/pipeline.go @@ -81,7 +81,7 @@ func (p *Pipeline) IndexPath(ctx context.Context, path string, opts IndexOptions return fmt.Errorf("no supported files found in %s", path) } - slog.Info("indexing files", "path", path, "count", len(files), "workers", workers) + slog.Info("📄 indexing files", "path", path, "count", len(files), "workers", workers) bar := progressbar.NewOptions(len(files), progressbar.OptionSetDescription("Indexing"), @@ -103,7 +103,7 @@ func (p *Pipeline) IndexPath(ctx context.Context, path string, opts IndexOptions defer bar.Add(1) if err := p.indexFile(ctx, filePath, opts); err != nil { - slog.Warn("failed to index file", "path", filePath, "err", err) + slog.Warn("⚠️ failed to index file", "path", filePath, "err", err) mu.Lock() errs = append(errs, fmt.Sprintf("%s: %v", filePath, err)) mu.Unlock() @@ -113,10 +113,10 @@ func (p *Pipeline) IndexPath(ctx context.Context, path string, opts IndexOptions wg.Wait() if len(errs) > 0 { - slog.Error("indexing finished with errors", "failed", len(errs), "total", len(files)) + slog.Error("❌ indexing finished with errors", "failed", len(errs), "total", len(files)) return fmt.Errorf("indexing errors:\n%s", strings.Join(errs, "\n")) } - slog.Info("indexing complete", "files", len(files)) + slog.Info("✅ indexing complete", "files", len(files)) return nil } @@ -127,7 +127,7 @@ func (p *Pipeline) IndexURL(ctx context.Context, rootURL string, opts IndexOptio workers = p.cfg.Indexing.Workers } - slog.Info("crawling site", "url", rootURL) + slog.Info("🌐 crawling site", "url", rootURL) pages, err := crawler.Crawl(ctx, rootURL, crawler.Options{ MaxPages: opts.MaxPages, MaxDepth: opts.MaxDepth, @@ -140,7 +140,7 @@ func (p *Pipeline) IndexURL(ctx context.Context, rootURL string, opts IndexOptio if len(pages) == 0 { return fmt.Errorf("no pages found at %s", rootURL) } - slog.Info("crawl complete, indexing pages", "url", rootURL, "pages", len(pages)) + slog.Info("✅ crawl complete, indexing pages", "url", rootURL, "pages", len(pages)) bar := progressbar.NewOptions(len(pages), progressbar.OptionSetDescription("Indexing pages"), @@ -161,7 +161,7 @@ func (p *Pipeline) IndexURL(ctx context.Context, rootURL string, opts IndexOptio defer func() { <-sem }() defer bar.Add(1) if err := p.indexRawDoc(ctx, doc, opts); err != nil { - slog.Warn("failed to index page", "url", doc.Path, "err", err) + slog.Warn("⚠️ failed to index page", "url", doc.Path, "err", err) mu.Lock() errs = append(errs, fmt.Sprintf("%s: %v", doc.Path, err)) mu.Unlock() @@ -171,10 +171,10 @@ func (p *Pipeline) IndexURL(ctx context.Context, rootURL string, opts IndexOptio wg.Wait() if len(errs) > 0 { - slog.Error("web indexing finished with errors", "failed", len(errs), "total", len(pages)) + slog.Error("❌ web indexing finished with errors", "failed", len(errs), "total", len(pages)) return fmt.Errorf("indexing errors:\n%s", strings.Join(errs, "\n")) } - slog.Info("web indexing complete", "url", rootURL, "pages", len(pages)) + slog.Info("✅ web indexing complete", "url", rootURL, "pages", len(pages)) return nil } @@ -192,11 +192,11 @@ func (p *Pipeline) indexFile(ctx context.Context, path string, opts IndexOptions return err } if existing != nil && existing.FileHash == hash && !opts.Force { - slog.Debug("skipping unchanged file", "path", path) + slog.Info("⏭️ skipping unchanged file", "path", path) return nil } if existing != nil { - slog.Info("superseding existing document version", "path", path, "old_version", existing.Version) + slog.Info("📄 superseding existing document version", "path", path, "old_version", existing.Version) if err := p.store.SupersedeDocument(ctx, existing.ID); err != nil { return fmt.Errorf("supersede old version: %w", err) } @@ -206,7 +206,7 @@ func (p *Pipeline) indexFile(ctx context.Context, path string, opts IndexOptions doc, err := loader.Load(path) if err != nil { if errors.Is(err, loader.ErrBinaryFile) { - slog.Debug("skipping binary file", "path", path) + slog.Warn("⏭️ skipping binary file", "path", path) return nil } return fmt.Errorf("load: %w", err) @@ -216,11 +216,11 @@ func (p *Pipeline) indexFile(ctx context.Context, path string, opts IndexOptions chunks := p.chunker.Split(doc.Content) if len(chunks) == 0 { - slog.Warn("no chunks produced for file, skipping", "path", path) + slog.Warn("⏭️ no chunks produced for file, skipping", "path", path) return nil } - slog.Debug("indexing file", "path", path, "version", nextVersion, "chunks", len(chunks), "doc_type", doc.DocType) + slog.Debug("📄 indexing file", "path", path, "version", nextVersion, "chunks", len(chunks), "doc_type", doc.DocType) docID := uuid.New().String() if err := p.store.UpsertDocument(ctx, &store.Document{ @@ -262,7 +262,7 @@ func (p *Pipeline) indexFile(ctx context.Context, path string, opts IndexOptions if err != nil { return fmt.Errorf("embed: %w", err) } - slog.Debug("chunks embedded", "path", path, "chunks", len(vecs)) + slog.Debug("📊 chunks embedded", "path", path, "chunks", len(vecs)) if err := p.store.BatchUpsertEmbeddings(ctx, p.provider.ModelID(), chunkIDs, vecs); err != nil { return fmt.Errorf("batch store embeddings: %w", err) @@ -302,7 +302,7 @@ func (p *Pipeline) indexFile(ctx context.Context, path string, opts IndexOptions for label, e := range map[string]error{"graph": graphErr, "claims": claimsErr, "structure": structureErr} { if e != nil { - slog.Warn("extraction warning", "phase", label, "path", path, "err", e) + slog.Warn("⚠️ extraction warning", "phase", label, "path", path, "err", e) } } @@ -335,7 +335,7 @@ func (p *Pipeline) extractGraph(ctx context.Context, docID string, texts []strin } res, err := extractor.ExtractEntities(ctx, p.provider, texts[start:end]) if err != nil { - slog.Debug("entity extraction batch failed", "doc_id", docID, "batch", idx, "err", err) + slog.Debug("⚠️ entity extraction batch failed", "doc_id", docID, "batch", idx, "err", err) } results[idx] = batchResult{res, err} }(bi) @@ -424,7 +424,7 @@ func (p *Pipeline) extractGraph(ctx context.Context, docID string, texts []strin } } - slog.Debug("graph extraction complete", "doc_id", docID, + slog.Debug("🔗 graph extraction complete", "doc_id", docID, "entities", len(toUpsert), "relationships", len(rels)) return p.store.BatchInsertRelationships(ctx, rels) @@ -464,7 +464,7 @@ func (p *Pipeline) extractClaims(ctx context.Context, docID string, texts []stri }) } - slog.Debug("claims extracted", "doc_id", docID, "claims", len(claims)) + slog.Debug("📋 claims extracted", "doc_id", docID, "claims", len(claims)) return p.store.BatchInsertClaims(ctx, claims) } @@ -495,7 +495,7 @@ func (p *Pipeline) structureDocument(ctx context.Context, docID, content string) // Finalize runs Phases 3-4: community detection + parallel summaries. func (p *Pipeline) Finalize(ctx context.Context, verbose bool) error { - slog.Info("Phase 3: loading entities and relationships") + slog.Info("🧩 Phase 3: loading entities and relationships") entities, err := p.store.AllEntities(ctx) if err != nil { return fmt.Errorf("load entities: %w", err) @@ -508,7 +508,7 @@ func (p *Pipeline) Finalize(ctx context.Context, verbose bool) error { if err != nil { return fmt.Errorf("load relationships: %w", err) } - slog.Info("Phase 3: running Louvain community detection", + slog.Info("🧩 Phase 3: running Louvain community detection", "entities", len(entities), "relationships", len(rels)) nodes := make([]string, len(entities)) @@ -525,7 +525,7 @@ func (p *Pipeline) Finalize(ctx context.Context, verbose bool) error { g := community.NewGraph(nodes, edges) levels := community.HierarchicalLouvain(g, p.cfg.Community.MaxLevels, 100) - slog.Info("Phase 3: community detection complete", "levels", len(levels)) + slog.Info("✅ Phase 3: community detection complete", "levels", len(levels)) if err := p.store.ClearCommunities(ctx); err != nil { return err @@ -598,7 +598,7 @@ func (p *Pipeline) Finalize(ctx context.Context, verbose bool) error { } } - slog.Info("Phase 4: summarising communities", "communities", len(workItems)) + slog.Info("🧩 Phase 4: summarising communities", "communities", len(workItems)) type commResult struct { work commWork @@ -620,13 +620,13 @@ func (p *Pipeline) Finalize(ctx context.Context, verbose bool) error { res := commResult{work: work} report, err := community.Summarize(ctx, p.provider, work.entityDescs, work.relDescs) if err != nil { - slog.Warn("community summary failed", "community_idx", idx, "level", work.level, "err", err) + slog.Warn("⚠️ community summary failed", "community_idx", idx, "level", work.level, "err", err) res.title = fmt.Sprintf("Community %d (Level %d)", idx, work.level) res.summary = fmt.Sprintf("Contains %d entities.", work.rank) } else { res.title = report.Title res.summary = report.Summary - slog.Debug("community summarised", "idx", idx, "level", work.level, "title", report.Title) + slog.Debug("✅ community summarised", "idx", idx, "level", work.level, "title", report.Title) } if res.summary != "" { @@ -634,7 +634,7 @@ func (p *Pipeline) Finalize(ctx context.Context, verbose bool) error { if err == nil { res.vector = vec } else { - slog.Warn("community summary embedding failed", "community_idx", idx, "err", err) + slog.Warn("⚠️ community summary embedding failed", "community_idx", idx, "err", err) } } results[idx] = res @@ -642,7 +642,7 @@ func (p *Pipeline) Finalize(ctx context.Context, verbose bool) error { } wg.Wait() - slog.Info("Phase 4: writing communities to store") + slog.Info("💾 Phase 4: writing communities to store") communityAssignments := map[string]string{} for _, res := range results { if err := p.store.UpsertCommunity(ctx, &store.Community{ @@ -685,7 +685,7 @@ func (p *Pipeline) Finalize(ctx context.Context, verbose bool) error { } } if len(toEmbed) > 0 { - slog.Info("embedding entity descriptions", "count", len(toEmbed)) + slog.Info("📊 embedding entity descriptions", "count", len(toEmbed)) descTexts := make([]string, len(toEmbed)) for i, e := range toEmbed { descTexts[i] = e.Description @@ -699,11 +699,11 @@ func (p *Pipeline) Finalize(ctx context.Context, verbose bool) error { } p.store.BatchUpsertEntities(ctx, toEmbed) } else { - slog.Warn("entity description embedding failed", "err", err) + slog.Warn("⚠️ entity description embedding failed", "err", err) } } - slog.Info("Finalize complete", + slog.Info("✅ Finalize complete", "communities", len(workItems), "entities_updated", len(communityAssignments)) return nil @@ -731,11 +731,11 @@ func (p *Pipeline) indexRawDoc(ctx context.Context, doc *loader.RawDocument, opt return err } if existing != nil && existing.FileHash == hash && !opts.Force { - slog.Debug("skipping unchanged page", "url", doc.Path) + slog.Info("⏭️ skipping unchanged page", "url", doc.Path) return nil } if existing != nil { - slog.Info("superseding existing page version", "url", doc.Path, "old_version", existing.Version) + slog.Info("📄 superseding existing page version", "url", doc.Path, "old_version", existing.Version) if err := p.store.SupersedeDocument(ctx, existing.ID); err != nil { return fmt.Errorf("supersede old version: %w", err) } @@ -744,11 +744,11 @@ func (p *Pipeline) indexRawDoc(ctx context.Context, doc *loader.RawDocument, opt nextVersion, canonicalID := versionInfo(existing) chunks := p.chunker.Split(doc.Content) if len(chunks) == 0 { - slog.Warn("no chunks produced for page, skipping", "url", doc.Path) + slog.Warn("⏭️ no chunks produced for page, skipping", "url", doc.Path) return nil } - slog.Debug("indexing page", "url", doc.Path, "version", nextVersion, "chunks", len(chunks)) + slog.Debug("🌐 indexing page", "url", doc.Path, "version", nextVersion, "chunks", len(chunks)) docID := uuid.New().String() if err := p.store.UpsertDocument(ctx, &store.Document{ @@ -788,7 +788,7 @@ func (p *Pipeline) indexRawDoc(ctx context.Context, doc *loader.RawDocument, opt if err != nil { return fmt.Errorf("embed: %w", err) } - slog.Debug("chunks embedded", "url", doc.Path, "chunks", len(vecs)) + slog.Debug("📊 chunks embedded", "url", doc.Path, "chunks", len(vecs)) if err := p.store.BatchUpsertEmbeddings(ctx, p.provider.ModelID(), chunkIDs, vecs); err != nil { return fmt.Errorf("batch store embeddings: %w", err) @@ -823,7 +823,7 @@ func (p *Pipeline) indexRawDoc(ctx context.Context, doc *loader.RawDocument, opt for label, e := range map[string]error{"graph": graphErr, "claims": claimsErr, "structure": structureErr} { if e != nil { - slog.Warn("extraction warning", "phase", label, "url", doc.Path, "err", e) + slog.Warn("⚠️ extraction warning", "phase", label, "url", doc.Path, "err", e) } } return nil