diff --git a/api/src/org/labkey/api/ApiModule.java b/api/src/org/labkey/api/ApiModule.java index d613909b2b6..5eecd9f5647 100644 --- a/api/src/org/labkey/api/ApiModule.java +++ b/api/src/org/labkey/api/ApiModule.java @@ -186,6 +186,7 @@ import org.labkey.api.view.ViewServlet; import org.labkey.api.view.WebPartFactory; import org.labkey.api.webdav.WebdavResolverImpl; +import org.labkey.api.wiki.WikiRendererType; import org.labkey.api.writer.ContainerUser; import org.labkey.filters.ContentSecurityPolicyFilter; @@ -527,6 +528,7 @@ public void registerServlets(ServletContext servletCtx) UserManager.TestCase.class, ViewCategoryManager.TestCase.class, WebdavResolverImpl.TestCase.class, + WikiRendererType.TestCase.class, WorkbookContainerType.TestCase.class, WriteableLookAndFeelProperties.TestCase.class ); diff --git a/api/src/org/labkey/api/mcp/McpService.java b/api/src/org/labkey/api/mcp/McpService.java index 6ad04872537..011d3800712 100644 --- a/api/src/org/labkey/api/mcp/McpService.java +++ b/api/src/org/labkey/api/mcp/McpService.java @@ -19,6 +19,7 @@ import org.springframework.ai.support.ToolCallbacks; import org.springframework.ai.tool.ToolCallback; import org.springframework.ai.tool.ToolCallbackProvider; +import org.springframework.ai.document.Document; import org.springframework.ai.vectorstore.VectorStore; import java.util.Arrays; @@ -183,4 +184,15 @@ default List sendMessageEx(ChatClient chat, String message) * CONSIDER: Is it possible to implement VectorStoreRetriever wrapper for SearchService??? */ VectorStore getVectorStore(); + + /** + * Adds documents to the vector store, automatically splitting any document whose token + * count exceeds the embedding model's input limit. Prefer this over + * {@code getVectorStore().add(...)} for indexing — it prevents the + * {@code IllegalArgumentException} that {@code TokenCountBatchingStrategy} throws on + * oversized inputs. + */ + void addDocuments(List documents); + + void saveVectorStore(); } diff --git a/api/src/org/labkey/api/mcp/NoopMcpService.java b/api/src/org/labkey/api/mcp/NoopMcpService.java index f6f63534ce0..57583655059 100644 --- a/api/src/org/labkey/api/mcp/NoopMcpService.java +++ b/api/src/org/labkey/api/mcp/NoopMcpService.java @@ -8,6 +8,7 @@ import org.springframework.ai.chat.client.ChatClient; import org.springframework.ai.chat.model.ToolContext; import org.springframework.ai.tool.ToolCallback; +import org.springframework.ai.document.Document; import org.springframework.ai.vectorstore.VectorStore; import java.util.List; @@ -84,4 +85,14 @@ public VectorStore getVectorStore() { return null; } + + @Override + public void addDocuments(List documents) + { + } + + @Override + public void saveVectorStore() + { + } } diff --git a/api/src/org/labkey/api/wiki/WikiRendererType.java b/api/src/org/labkey/api/wiki/WikiRendererType.java index 7260049370c..bf7c2e916a7 100644 Binary files a/api/src/org/labkey/api/wiki/WikiRendererType.java and b/api/src/org/labkey/api/wiki/WikiRendererType.java differ diff --git a/api/src/org/labkey/api/wiki/WikiService.java b/api/src/org/labkey/api/wiki/WikiService.java index 58fa73db6d9..0247ab4b50d 100644 --- a/api/src/org/labkey/api/wiki/WikiService.java +++ b/api/src/org/labkey/api/wiki/WikiService.java @@ -50,6 +50,15 @@ record RenderedWiki (String name, String title, HtmlString html, String entityId RenderedWiki getRenderedWiki(Container c, String name); + record WikiMarkdown(String name, String title, String markdown, String entityId) {} + + /** + * Returns a best-effort Markdown rendering of the wiki's raw source, intended for indexing + * (search, embedding, vector stores) — NOT for user display. Conversion is lossy and may + * drop or mangle markup details that don't have a direct Markdown equivalent. + */ + WikiMarkdown getWikiMarkdown(Container c, String name); + default HtmlString getHtml(Container c, String name) { var wiki = getRenderedWiki(c, name); diff --git a/core/src/org/labkey/core/CoreMcp.java b/core/src/org/labkey/core/CoreMcp.java index fa18a2a55c1..ceff5a4df00 100644 --- a/core/src/org/labkey/core/CoreMcp.java +++ b/core/src/org/labkey/core/CoreMcp.java @@ -4,11 +4,13 @@ import io.modelcontextprotocol.spec.McpSchema.ReadResourceResult; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; +import org.json.JSONArray; import org.json.JSONObject; import org.labkey.api.collections.LabKeyCollectors; import org.labkey.api.data.Container; import org.labkey.api.data.ContainerManager; import org.labkey.api.mcp.McpService; +import org.labkey.api.module.ModuleLoader; import org.labkey.api.security.RequiresNoPermission; import org.labkey.api.security.RequiresPermission; import org.labkey.api.security.User; @@ -130,6 +132,27 @@ String setContainer(ToolContext context, @ToolParam(description = "Container pat return message; } + + // TODO replace/augment with available feature list + @Tool(description = "List the modules installed on this server, this may be useful in inferring the available funcitonality. For instance, " + + "the presence of the `premium` module implies the availability of premium featues.") + @RequiresNoPermission + public String listModules(ToolContext context) + { + JSONArray modules = new JSONArray(); + ModuleLoader.getInstance().getModules().stream() + .map(module -> { + JSONObject obj = new JSONObject(); + obj.put("name", module.getName()); + if (StringUtils.isNotEmpty(module.getLabel())) + obj.put("label", module.getLabel()); + return obj; + }) + .forEach(modules::put); + return new JSONObject(Map.of("modules",modules)).toString(); + } + + @McpResource( uri = "resource://org/labkey/core/FileBasedModules.md", mimeType = "application/markdown", diff --git a/devtools/src/org/labkey/devtools/DevtoolsModule.java b/devtools/src/org/labkey/devtools/DevtoolsModule.java index d391f62baa0..178b254ca33 100644 --- a/devtools/src/org/labkey/devtools/DevtoolsModule.java +++ b/devtools/src/org/labkey/devtools/DevtoolsModule.java @@ -18,6 +18,7 @@ import org.jetbrains.annotations.NotNull; import org.labkey.api.exp.property.Domain; +import org.labkey.api.mcp.McpService; import org.labkey.api.module.CodeOnlyModule; import org.labkey.api.module.ModuleContext; import org.labkey.api.security.AuthenticationManager; @@ -71,6 +72,7 @@ protected void init() @Override public void doStartup(ModuleContext moduleContext) { + McpService.get().register(new TestController.DocumentationMCP()); } @Override diff --git a/devtools/src/org/labkey/devtools/TestController.java b/devtools/src/org/labkey/devtools/TestController.java index 5c8cf796240..548e3e5a2bc 100644 --- a/devtools/src/org/labkey/devtools/TestController.java +++ b/devtools/src/org/labkey/devtools/TestController.java @@ -17,8 +17,12 @@ package org.labkey.devtools; import jakarta.servlet.http.HttpServletResponse; +import org.apache.commons.io.IOUtils; +import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.NotNull; +import org.json.JSONArray; +import org.json.JSONObject; import org.labkey.api.action.ApiResponse; import org.labkey.api.action.ApiSimpleResponse; import org.labkey.api.action.ConfirmAction; @@ -28,8 +32,16 @@ import org.labkey.api.action.SimpleResponse; import org.labkey.api.action.SimpleViewAction; import org.labkey.api.action.SpringActionController; +import org.labkey.api.announcements.CommSchema; +import org.labkey.api.collections.CaseInsensitiveHashMap; +import org.labkey.api.collections.LabKeyCollectors; import org.labkey.api.data.Container; import org.labkey.api.data.ContainerManager; +import org.labkey.api.data.SQLFragment; +import org.labkey.api.data.SimpleFilter; +import org.labkey.api.data.SqlSelector; +import org.labkey.api.data.TableInfo; +import org.labkey.api.data.TableSelector; import org.labkey.api.mcp.AbstractAgentAction; import org.labkey.api.mcp.McpService; import org.labkey.api.security.CSRF; @@ -52,6 +64,7 @@ import org.labkey.api.util.HtmlString; import org.labkey.api.util.HtmlStringBuilder; import org.labkey.api.util.PageFlowUtil; +import org.labkey.api.util.Path; import org.labkey.api.util.URLHelper; import org.labkey.api.view.ActionURL; import org.labkey.api.view.HtmlView; @@ -64,7 +77,11 @@ import org.labkey.api.view.template.ClientDependency; import org.labkey.api.view.template.PageConfig; import org.labkey.api.wiki.WikiService; +import org.springframework.ai.chat.model.ToolContext; import org.springframework.ai.document.Document; +import org.springframework.ai.tool.annotation.Tool; +import org.springframework.ai.tool.annotation.ToolParam; +import org.springframework.ai.vectorstore.SearchRequest; import org.springframework.ai.vectorstore.SimpleVectorStore; import org.springframework.ai.vectorstore.VectorStore; import org.springframework.dao.PessimisticLockingFailureException; @@ -76,14 +93,16 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Gatherers; +import static org.apache.commons.lang3.StringUtils.isNotBlank; import static org.labkey.api.util.DOM.Attribute.name; import static org.labkey.api.util.DOM.Attribute.src; import static org.labkey.api.util.DOM.Attribute.style; @@ -1332,12 +1351,9 @@ public static class PopulateVectorStoreAction extends ConfirmAction @Override public ModelAndView getConfirmView(Object o, BindException errors) { - var db = FileUtil.getTempDirectoryFileLike().resolveChild("VectorStore.database"); HtmlStringBuilder message = HtmlStringBuilder.of(); message.append("This will add the contents of /Documention wikis to the vector store.").append(HtmlString.BR); message.append("This may take a few minutes."); - if (db.exists()) - message.unsafeAppend("

").append("I see a vector store file already exists. Just FYI."); return new HtmlView(message); } @@ -1366,35 +1382,42 @@ public boolean handlePost(Object o, BindException errors) Container documentsContainer = ContainerManager.getForPath("/Documentation"); if (null == documentsContainer) throw new NotFoundException(); - VectorStore vs = McpService.get().getVectorStore(); - if (null == vs) - throw new NotFoundException("/Documentation project was not found"); + McpService mcp = McpService.get(); + if (null == mcp.getVectorStore()) + throw new NotFoundException("VectorStore not enabled."); ActionURL wikiBase = new ActionURL("wiki","page",documentsContainer); WikiService service = Objects.requireNonNull(WikiService.get()); List all = service.getNames(documentsContainer); all.stream() - .map(name -> service.getRenderedWiki(documentsContainer, name)) + .map(name -> service.getWikiMarkdown(documentsContainer, name)) .filter(Objects::nonNull) .map(wiki -> { - count.incrementAndGet(); var metadata = Map.of( - "Content-Type", "text/html", - "filename", wiki.name() + ".html", + "Content-Type", "text/markdown", + "filename", wiki.name() + ".md", "title", (Object)wiki.title(), "source", wikiBase.clone().addParameter("name",wiki.name()).getURIString() ); - return new Document(wiki.entityId(), wiki.html().toString(), metadata); + return new Document(wiki.entityId(), wiki.markdown(), metadata); }) - .gather(Gatherers.windowFixed(50)) - .forEach(vs); + .forEach(d -> { + try + { + mcp.addDocuments(List.of(d)); + count.incrementAndGet(); + } + catch (IllegalArgumentException x) + { + LogManager.getLogger(TestController.class).info(d.getMetadata().get("filename"),x); + } + }); - var db = FileUtil.getTempDirectoryFileLike().resolveChild("VectorStore.database"); try { - ((SimpleVectorStore)vs).save(db.toNioPathForRead().toFile()); + McpService.get().saveVectorStore(); return true; } catch (Exception x) @@ -1404,4 +1427,149 @@ public boolean handlePost(Object o, BindException errors) } } } + + public static class DocumentationMCP implements McpService.McpImpl + { + static JSONObject full_index = null; + + static + { + try + { + full_index = new JSONObject(IOUtils.resourceToString("org/labkey/devtools/FULL_INDEX.json", null, DevtoolsModule.class.getClassLoader())); + } + catch(Exception x) + { + } + } + + + @Tool(description = "List of available documents from the LabKey user and administration manuals.") + @RequiresNoPermission + String listDocuments(ToolContext toolContext, + @ToolParam(description = "Index to start listing for paginatation (staring at 0)") Integer start, + @ToolParam(description = "Count of listings to return for pagination") Integer count) + { + Container documentsContainer = ContainerManager.getForPath("/Documentation"); + if (null == documentsContainer) + return new JSONObject(Map.of("error","There is no /Documentation project on this server")).toString(); + + if (null == full_index) + { + // CONSIDER include hierarchy or paths + // TODO WikiService doesn't expose this, just do a query for now (even though this info is cached) + TableInfo currentWikiVersions = CommSchema.getInstance().getSchema().getTable("CurrentWikiVersions"); + SimpleFilter filter = SimpleFilter.createContainerFilter(documentsContainer); + Collection> rows = new TableSelector(currentWikiVersions, Set.of("Name","Title","RowId","Parent","EntityId"), filter, null).getMapCollection(); + + JSONArray array = new JSONArray(); + for (var row : rows) + { + CaseInsensitiveHashMap copy = new CaseInsensitiveHashMap<>(row); + copy.put("id", String.valueOf(copy.get("EntityId"))); + copy.remove("EntityId"); + array.put(new JSONObject(copy)); + } + var j = new JSONObject(); + j.put("pages", array); + full_index = j; + } + + int index = start instanceof Integer i && i >= 0 ? i : 0; + int num = count instanceof Integer i && i >= 0 ? i : Integer.MAX_VALUE; + + JSONArray pages = full_index.getJSONArray("pages"); + int total = pages.length(); + int end = (int) Math.min((long) index + num, total); + + JSONArray subset = new JSONArray(); + for (int i = index; i < end; i++) + subset.put(pages.get(i)); + + var ret = new JSONObject(); + ret.put("total", total); + ret.put("start", index); + ret.put("count", subset.length()); + ret.put("pages", subset); + return ret.toString(); + } + + @Tool(description = "Return the entire document from the LabKey documentation using the `id` as returned by `searchDocumentation`.") + @RequiresNoPermission + String retrieveDocument( + ToolContext context, + @ToolParam(description = "Id of the document to return") String id) + { + WikiService service = Objects.requireNonNull(WikiService.get()); + Container documentsContainer = ContainerManager.getForPath("/Documentation"); + if (null == documentsContainer) + return new JSONObject(Map.of("error","There is not /Documentation project on this server")).toString(); + + ActionURL wikiBase = new ActionURL("wiki","page",documentsContainer); + var sql = new SQLFragment("SELECT Name FROM ").append(CommSchema.getInstance().getTableInfoPages(), "p").append(" WHERE EntityId = ").appendValue(id); + var name = new SqlSelector(CommSchema.getInstance().getSchema(), sql).getObject(String.class); + var wiki = service.getRenderedWiki(documentsContainer, name); + if (null == wiki) + throw new NotFoundException(); + + var ret = new JSONObject(); + ret.put("Content-Type", "text/html"); + ret.put("filename", wiki.name() + ".html"); + ret.put("id", wiki.entityId()); + ret.put("title", wiki.title()); + ret.put("source", wikiBase.clone().addParameter("name",wiki.name()).getURIString()); + ret.put("contents", wiki.html().toString()); + return ret.toString(); + } + + @Tool(description = "Search the LabKey documentation for documents semantically similar to a natural language query. " + + "Returns matching documents with their content, metadata (title, source URL, content type), and similarity scores.") + @RequiresNoPermission + String searchDocumentation( + ToolContext context, + @ToolParam(description = "Natural language search query describing what you're looking for") String query, + @ToolParam(required = false, description = "Maximum number of results to return, defaults to 5") String topK) + { + VectorStore vs = McpService.get().getVectorStore(); + if (vs == null) + throw new IllegalStateException("Vector store is not available. An embedding model may not be configured."); + + int k = 5; + if (isNotBlank(topK)) + { + try { k = Math.clamp(Integer.parseInt(topK), 1, 20); } + catch (NumberFormatException ignored) {} + } + + SearchRequest request = SearchRequest.builder() + .query(query) + .topK(k) + .build(); + + List results = vs.similaritySearch(request); + + var docs = results.stream() + .map(doc -> { + var obj = new JSONObject(); + obj.put("id", doc.getId()); + String text = doc.getText(); + if (text != null && text.length() > 2000) + text = text.substring(0, 2000) + "..."; + obj.put("content", text); + obj.put("metadata", new JSONObject(doc.getMetadata())); + if (doc.getScore() != null) + obj.put("score", doc.getScore()); + return obj; + }) + .collect(LabKeyCollectors.toJSONArray()); + + var ret = new JSONObject(Map.of( + "query", query, + "resultCount", results.size(), + "results", docs + )); +// LogManager.getLogger(TestController.class).info("Search: " + query + "\nResult: " +ret); + return ret.toString(); + } + } } diff --git a/wiki/src/org/labkey/wiki/WikiManager.java b/wiki/src/org/labkey/wiki/WikiManager.java index c487e0e1259..316b0bd49e8 100644 --- a/wiki/src/org/labkey/wiki/WikiManager.java +++ b/wiki/src/org/labkey/wiki/WikiManager.java @@ -869,6 +869,28 @@ public RenderedWiki getRenderedWiki(Container c, String name) } } + @Override + public WikiMarkdown getWikiMarkdown(Container c, String name) + { + if (null == c || null == name) + return null; + + try + { + Wiki wiki = WikiSelectManager.getWiki(c, name); + if (null == wiki) + return null; + WikiVersion version = wiki.getLatestVersion(); + String body = version.getBody(); + String markdown = version.getRendererTypeEnum().bestAttemptConvertToMarkdown(null == body ? "" : body); + return new WikiMarkdown(name, version.getTitle(), markdown, wiki.getEntityId()); + } + catch (Exception x) + { + throw new RuntimeException(x); + } + } + @Override public void insertWiki(User user, Container c, String name, String body, WikiRendererType renderType, String title) {