Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions cmd/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ var indexCmd = &cobra.Command{
return fmt.Errorf("llm provider: %w", err)
}
pl := pipeline.New(st, prov, cfg)
slog.Info("running Phase 3-4: community detection + summaries")
slog.Info("🧩 running Phase 3-4: community detection + summaries")
if err := pl.Finalize(cmd.Context(), indexVerbose); err != nil {
slog.Error("finalization failed", "err", err)
slog.Error("finalization failed", "err", err)
return err
}
slog.Info("finalization complete")
slog.Info("finalization complete")
return nil
}

Expand All @@ -63,26 +63,26 @@ var indexCmd = &cobra.Command{
}

if indexURL != "" {
slog.Info("crawling documentation site", "url", indexURL, "workers", indexWorkers,
slog.Info("🌐 crawling documentation site", "url", indexURL, "workers", indexWorkers,
"max_pages", indexMaxPages, "max_depth", indexMaxDepth)
if err := pl.IndexURL(cmd.Context(), indexURL, opts); err != nil {
slog.Error("web indexing failed", "url", indexURL, "err", err)
slog.Error("web indexing failed", "url", indexURL, "err", err)
return err
}
slog.Info("web indexing complete", "url", indexURL)
slog.Info("web indexing complete", "url", indexURL)
return nil
}

if len(args) == 0 {
return fmt.Errorf("path or --url required (or use --finalize)")
}

slog.Info("indexing path", "path", args[0], "workers", indexWorkers, "force", indexForce)
slog.Info("📄 indexing path", "path", args[0], "workers", indexWorkers, "force", indexForce)
if err := pl.IndexPath(cmd.Context(), args[0], opts); err != nil {
slog.Error("indexing failed", "path", args[0], "err", err)
slog.Error("indexing failed", "path", args[0], "err", err)
return err
}
slog.Info("indexing complete", "path", args[0])
slog.Info("indexing complete", "path", args[0])
return nil
},
}
Expand Down
4 changes: 2 additions & 2 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,11 @@ func initConfig() {
var err error
cfg, err = config.Load(cfgFile)
if err != nil {
slog.Error("config error", "err", err)
slog.Error("config error", "err", err)
os.Exit(1)
}
if err := os.MkdirAll(cfg.DataDir, 0755); err != nil {
slog.Error("failed to create data directory", "path", cfg.DataDir, "err", err)
slog.Error("failed to create data directory", "path", cfg.DataDir, "err", err)
os.Exit(1)
}
}
Expand Down
14 changes: 7 additions & 7 deletions cmd/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@ var serveCmd = &cobra.Command{
return fmt.Errorf("open store: %w", err)
}
defer st.Close()
slog.Info("store opened", "path", cfg.DBPath())
slog.Info("📂 store opened", "path", cfg.DBPath())

prov, err := llm.NewProvider(&cfg.LLM)
if err != nil {
return fmt.Errorf("llm provider: %w", err)
}
slog.Info("LLM provider initialised", "provider", prov.Name(), "model", prov.ModelID())
slog.Info("⚙️ LLM provider initialised", "provider", prov.Name(), "model", prov.ModelID())

emb := embedder.New(prov, cfg.Indexing.BatchSize)
router := api.NewRouter(st, prov, emb, cfg)
Expand All @@ -58,7 +58,7 @@ var serveCmd = &cobra.Command{

srv := &http.Server{Handler: router, ReadTimeout: 60 * time.Second, WriteTimeout: 120 * time.Second}

slog.Info("server started",
slog.Info("🚀 server started",
"addr", "http://"+addr,
"ui", "http://"+addr+"/",
"mcp", "http://"+addr+"/mcp",
Expand All @@ -70,19 +70,19 @@ var serveCmd = &cobra.Command{

go func() {
if err := srv.Serve(ln); err != nil && err != http.ErrServerClosed {
slog.Error("server error", "err", err)
slog.Error("server error", "err", err)
}
}()

<-ctx.Done()
slog.Info("shutting down...")
slog.Info("🛑 shutting down...")
shutCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := srv.Shutdown(shutCtx); err != nil {
slog.Error("shutdown error", "err", err)
slog.Error("shutdown error", "err", err)
return err
}
slog.Info("shutdown complete")
slog.Info("shutdown complete")
return nil
},
}
Expand Down
16 changes: 8 additions & 8 deletions internal/api/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func writeJSON(w http.ResponseWriter, status int, v any) {

func writeError(w http.ResponseWriter, r *http.Request, status int, msg string, err error) {
if status >= 500 && err != nil {
slog.ErrorContext(r.Context(), "handler error", "path", r.URL.Path, "err", err)
slog.ErrorContext(r.Context(), "handler error", "path", r.URL.Path, "err", err)
}
writeJSON(w, status, map[string]string{"error": msg})
}
Expand Down Expand Up @@ -124,7 +124,7 @@ func (h *handlers) search(w http.ResponseWriter, r *http.Request) {
req.GraphDepth = 2
}

slog.InfoContext(r.Context(), "search request", "mode", req.Mode, "query", req.Query, "top_k", req.TopK)
slog.InfoContext(r.Context(), "🔍 search request", "mode", req.Mode, "query", req.Query, "top_k", req.TopK)

ctx := r.Context()
switch req.Mode {
Expand Down Expand Up @@ -156,7 +156,7 @@ func (h *handlers) graphNeighborhood(w http.ResponseWriter, r *http.Request) {
return
}

slog.DebugContext(r.Context(), "graph neighborhood request", "entity", name, "depth", depth)
slog.DebugContext(r.Context(), "🔗 graph neighborhood request", "entity", name, "depth", depth)

ctx := r.Context()
entity, err := h.store.GetEntityByName(ctx, name)
Expand Down Expand Up @@ -206,7 +206,7 @@ func (h *handlers) graphNeighborhood(w http.ResponseWriter, r *http.Request) {
})
}

slog.DebugContext(r.Context(), "graph neighborhood result", "entity", name, "nodes", len(nodes), "edges", len(edges))
slog.DebugContext(r.Context(), "🔗 graph neighborhood result", "entity", name, "nodes", len(nodes), "edges", len(edges))
writeJSON(w, 200, map[string]any{"nodes": nodes, "edges": edges})
}

Expand Down Expand Up @@ -297,7 +297,7 @@ func (h *handlers) upload(w http.ResponseWriter, r *http.Request) {
}

jobID := fmt.Sprintf("job-%d", len(h.jobProgress))
slog.Info("upload job queued", "job_id", jobID, "files", len(paths))
slog.Info("📦 upload job queued", "job_id", jobID, "files", len(paths))

h.uploadMu.Lock()
h.jobProgress = append(h.jobProgress, fmt.Sprintf("queued: %d files", len(paths)))
Expand All @@ -307,15 +307,15 @@ func (h *handlers) upload(w http.ResponseWriter, r *http.Request) {
defer os.RemoveAll(tmpDir)
pl := pipeline.New(h.store, h.provider, h.cfg)
for _, p := range paths {
slog.Info("upload indexing file", "job_id", jobID, "file", filepath.Base(p))
slog.Info("📦 upload indexing file", "job_id", jobID, "file", filepath.Base(p))
h.setProgress(jobID, fmt.Sprintf("indexing: %s", filepath.Base(p)))
if err := pl.IndexPath(r.Context(), p, pipeline.IndexOptions{}); err != nil {
slog.Error("upload indexing failed", "job_id", jobID, "file", filepath.Base(p), "err", err)
slog.Error("upload indexing failed", "job_id", jobID, "file", filepath.Base(p), "err", err)
h.setProgress(jobID, fmt.Sprintf("error: %v", err))
return
}
}
slog.Info("upload job complete", "job_id", jobID, "files", len(paths))
slog.Info("upload job complete", "job_id", jobID, "files", len(paths))
h.setProgress(jobID, "done")
}()

Expand Down
6 changes: 3 additions & 3 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,22 +113,22 @@ func Load(cfgFile string) (*Config, error) {

if err := v.ReadInConfig(); err != nil {
if _, ok := err.(viper.ConfigFileNotFoundError); ok {
slog.Warn("no config file found, using defaults",
slog.Warn("⚠️ no config file found, using defaults",
"searched_paths", []string{defaultCfgDirLower, defaultCfgDir, "."},
"expected_names", "config.yaml or config.yml")
} else {
return nil, fmt.Errorf("reading config: %w", err)
}
} else {
slog.Info("loaded config file", "path", v.ConfigFileUsed())
slog.Info("⚙️ loaded config file", "path", v.ConfigFileUsed())
}

var cfg Config
if err := v.Unmarshal(&cfg); err != nil {
return nil, fmt.Errorf("unmarshaling config: %w", err)
}

slog.Info("resolved LLM config", "provider", cfg.LLM.Provider)
slog.Info("⚙️ resolved LLM config", "provider", cfg.LLM.Provider)

// Expand home dir
if strings.HasPrefix(cfg.DataDir, "~/") {
Expand Down
30 changes: 15 additions & 15 deletions internal/crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,26 +56,26 @@ func Crawl(ctx context.Context, rootURL string, opts Options) ([]*Page, error) {
if !opts.SkipSitemap {
urls, err = discoverSitemap(client, base)
if err != nil {
slog.Debug("sitemap not found, falling back to BFS", "url", rootURL, "reason", err)
slog.Debug("🔍 sitemap not found, falling back to BFS", "url", rootURL, "reason", err)
} else {
slog.Info("sitemap discovered", "url", rootURL, "urls", len(urls))
slog.Info("🔍 sitemap discovered", "url", rootURL, "urls", len(urls))
}
}

// Fall back to BFS
if len(urls) == 0 {
slog.Info("starting BFS crawl", "url", rootURL, "max_pages", opts.MaxPages, "max_depth", opts.MaxDepth)
slog.Info("🌐 starting BFS crawl", "url", rootURL, "max_pages", opts.MaxPages, "max_depth", opts.MaxDepth)
urls = bfsCrawl(ctx, client, base, opts)
slog.Info("BFS crawl complete", "url", rootURL, "pages_found", len(urls))
slog.Info("BFS crawl complete", "url", rootURL, "pages_found", len(urls))
}

// Cap
if opts.MaxPages > 0 && len(urls) > opts.MaxPages {
slog.Debug("capping URLs at max_pages", "total", len(urls), "max_pages", opts.MaxPages)
slog.Debug("⏭️ capping URLs at max_pages", "total", len(urls), "max_pages", opts.MaxPages)
urls = urls[:opts.MaxPages]
}

slog.Info("fetching pages", "count", len(urls), "concurrency", opts.Concurrency)
slog.Info("🌐 fetching pages", "count", len(urls), "concurrency", opts.Concurrency)

// Fetch pages concurrently
pages := make([]*Page, len(urls))
Expand All @@ -91,7 +91,7 @@ func Crawl(ctx context.Context, rootURL string, opts Options) ([]*Page, error) {
defer func() { <-sem }()
p, err := wl.LoadURL(pageURL)
if err != nil {
slog.Debug("failed to fetch page", "url", pageURL, "err", err)
slog.Debug("⚠️ failed to fetch page", "url", pageURL, "err", err)
}
pages[idx] = p
errs[idx] = err
Expand All @@ -111,9 +111,9 @@ func Crawl(ctx context.Context, rootURL string, opts Options) ([]*Page, error) {
}

if fetchErrs > 0 {
slog.Warn("some pages failed to fetch", "failed", fetchErrs, "succeeded", len(result))
slog.Warn("⚠️ some pages failed to fetch", "failed", fetchErrs, "succeeded", len(result))
}
slog.Info("crawl finished", "url", rootURL, "pages_fetched", len(result))
slog.Info("crawl finished", "url", rootURL, "pages_fetched", len(result))
return result, nil
}

Expand Down Expand Up @@ -141,7 +141,7 @@ func discoverSitemap(client *http.Client, base *url.URL) ([]string, error) {
for _, candidate := range candidates {
urls, err := parseSitemap(client, candidate, base)
if err == nil && len(urls) > 0 {
slog.Debug("sitemap parsed", "url", candidate, "entries", len(urls))
slog.Debug("🔍 sitemap parsed", "url", candidate, "entries", len(urls))
return urls, nil
}
}
Expand All @@ -163,7 +163,7 @@ func parseSitemap(client *http.Client, sitemapURL string, base *url.URL) ([]stri
// Try sitemap index first
var idx sitemapIndex
if err := xml.Unmarshal(body, &idx); err == nil && len(idx.Sitemaps) > 0 {
slog.Debug("sitemap index found", "url", sitemapURL, "sub_sitemaps", len(idx.Sitemaps))
slog.Debug("🔍 sitemap index found", "url", sitemapURL, "sub_sitemaps", len(idx.Sitemaps))
var all []string
for _, s := range idx.Sitemaps {
sub, err := parseSitemap(client, s.Loc, base)
Expand Down Expand Up @@ -202,7 +202,7 @@ func bfsCrawl(ctx context.Context, client *http.Client, base *url.URL, opts Opti
for len(queue) > 0 && (opts.MaxPages == 0 || len(found) < opts.MaxPages) {
select {
case <-ctx.Done():
slog.Debug("BFS crawl cancelled by context", "pages_found", len(found))
slog.Debug("🛑 BFS crawl cancelled by context", "pages_found", len(found))
return found
default:
}
Expand All @@ -221,7 +221,7 @@ func bfsCrawl(ctx context.Context, client *http.Client, base *url.URL, opts Opti
}

links := extractLinks(client, item.u, base)
slog.Debug("BFS page links extracted", "url", item.u, "depth", item.depth, "links", len(links))
slog.Debug("🔗 BFS page links extracted", "url", item.u, "depth", item.depth, "links", len(links))
for _, l := range links {
if !visited[l] {
queue = append(queue, struct {
Expand All @@ -238,15 +238,15 @@ func extractLinks(client *http.Client, pageURL string, base *url.URL) []string {
resp, err := client.Get(pageURL)
if err != nil || resp.StatusCode != http.StatusOK {
if err != nil {
slog.Debug("failed to fetch page for link extraction", "url", pageURL, "err", err)
slog.Debug("⚠️ failed to fetch page for link extraction", "url", pageURL, "err", err)
}
return nil
}
defer resp.Body.Close()

doc, err := html.Parse(resp.Body)
if err != nil {
slog.Debug("failed to parse HTML", "url", pageURL, "err", err)
slog.Debug("⚠️ failed to parse HTML", "url", pageURL, "err", err)
return nil
}

Expand Down
6 changes: 6 additions & 0 deletions internal/loader/pdf.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ import (
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu/model"
)

func init() {
// Disable pdfcpu's config directory lookup to avoid "config not found"
// errors when ~/.config/pdfcpu/ does not exist.
model.ConfigPath = "disable"
}

type PDFLoader struct{}

func (l *PDFLoader) Supports(ext string) bool { return ext == ".pdf" }
Expand Down
Loading
Loading