diff --git a/.gitignore b/.gitignore
index 3e874b4d..6a09bbfd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,4 +10,5 @@ _wavelink/logs
chat_history.db
dev.env
chroma.log
-emojis.yaml
\ No newline at end of file
+emojis.yaml
+.zed/
diff --git a/Dockerfile b/Dockerfile
index 4c36d030..414d7010 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -9,9 +9,9 @@ RUN useradd -u 6969 --home-dir /jakeybot jakey
# Copy the source code
COPY . .
-# Install C compiler
+# Install C compiler and Nano text editor
RUN apt-get update
-RUN apt-get install g++ --no-install-recommends --yes
+RUN apt-get install g++ nano --no-install-recommends --yes
# Correct ownership
RUN chown -R 6969:6969 /jakeybot
@@ -19,8 +19,11 @@ RUN chown -R 6969:6969 /jakeybot
# Change the user
USER jakey
-# Install dependencies
-RUN pip install --no-cache-dir -r requirements.txt
+# Install base dependencies and optionally plugin dependencies
+RUN pip install --no-cache-dir -r /jakeybot/requirements.txt && \
+ if [ -f /jakeybot/plugins/requirements.txt ]; then \
+ pip install --no-cache-dir -r /jakeybot/plugins/requirements.txt; \
+ fi
# Start the bot
-CMD ["python", "main.py"]
+ENTRYPOINT ["/bin/bash", "/jakeybot/run.sh"]
diff --git a/cogs/ai/generative_chat.py b/cogs/ai/generative_chat.py
index d3ed68f4..ce83433e 100644
--- a/cogs/ai/generative_chat.py
+++ b/cogs/ai/generative_chat.py
@@ -1,4 +1,4 @@
-from core.exceptions import *
+from core.exceptions import CustomErrorMessage
from core.database import History as typehint_History
from discord import Message
from models.core import set_assistant_type
@@ -49,11 +49,11 @@ async def _ask(self, prompt: Message):
_chat_history = await load_history(user_id=prompt.author.id, thread_name=_thread_name, db_conn=self.DBConn)
# Check for /chat:ephemeral only if enable_threads is true
- if not "/chat:ephemeral" in prompt.content:
+ if "/chat:ephemeral" not in prompt.content:
_append_history = True
else:
- await prompt.channel.send("🔒 This conversation is not saved and Jakey won't remember this")
- _append_history = False
+ await prompt.channel.send("> -# This conversation is not saved and Jakey won't remember this")
+ _append_history = False
else:
await prompt.channel.send("> -# ⚠️ This model doesn't support threads and therefore this interaction won't remember the previous and won't be saved.")
_chat_history = None
@@ -70,7 +70,6 @@ async def _ask(self, prompt: Message):
if prompt.attachments:
if _model_props.enable_files:
_uploadedFilesCount = 0
- _processFileInterstitial = await prompt.channel.send("⬆️ Please wait...")
for _attachment in prompt.attachments:
# Check for alt text
_extraMetadata = inspect.cleandoc(
@@ -83,10 +82,9 @@ async def _ask(self, prompt: Message):
""")
await _chat_session.upload_files(attachment=_attachment, extra_metadata=_extraMetadata)
-
+
# Update status
_uploadedFilesCount += 1
- await _processFileInterstitial.edit(f"✅ Added: **{_uploadedFilesCount}** file(s)...")
else:
raise CustomErrorMessage("⚠️ This model doesn't support file attachments, please choose another model to continue")
@@ -130,7 +128,7 @@ async def on_message(self, message: Message):
_command = message.content.split(" ")[0].replace(self.bot.command_prefix, "")
if self.bot.get_command(_command):
return
-
+
# User ID
_userID = message.author.id
@@ -138,13 +136,13 @@ async def on_message(self, message: Message):
if _userID in self.pending_ids:
await message.reply("⚠️ I'm still processing your previous request, please wait for a moment...")
return
-
+
# Check if the bot was only mentioned without any content or image attachments
# If none, then on main.py event, proceed sending the introductory message
if not message.attachments \
and not re.sub(f"<@{self.bot.user.id}>", '', message.content).strip():
return
-
+
# Remove the mention from the prompt
message.content = re.sub(f"<@{self.bot.user.id}>", '', message.content).strip()
@@ -154,13 +152,13 @@ async def on_message(self, message: Message):
# Skip if the mentioned user is the bot itself
if _mentioned_user.id == self.bot.user.id:
continue
-
+
# Get member object for guild-specific display name, fallback to user if not in guild
if message.guild:
_member = message.guild.get_member(_mentioned_user.id)
else:
_member = None
-
+
_user_metadata = inspect.cleandoc(
f"""
Username: @{_mentioned_user.name}
@@ -172,7 +170,7 @@ async def on_message(self, message: Message):
"""
)
_mentioned_users_metadata.append(_user_metadata)
-
+
# Append mentioned users metadata to the message content
if _mentioned_users_metadata:
message.content = message.content + "\n\n" + "\n".join(_mentioned_users_metadata)
@@ -183,20 +181,20 @@ async def on_message(self, message: Message):
_context_message = await message.channel.fetch_message(message.reference.message_id)
message.content = inspect.cleandoc(
f"""
-
+
# Replying to referenced message excerpt from {_context_message.author.display_name} (username: @{_context_message.author.name}):
<|begin_msg_contexts|diff>
{_context_message.content}
<|end_msg_contexts|diff>
-
+
Do not echo this metadata, only use for retrieval purposes
{message.content}"""
)
- await message.channel.send(f"✅ Referenced message: {_context_message.jump_url}")
+ await message.channel.send(f"> -# Referenced message: {_context_message.jump_url}")
- # For now the entire function is under try
+ # For now the entire function is under try
# Maybe this can be separated into another function
try:
# Add the user to the pending list
@@ -232,4 +230,3 @@ async def on_message(self, message: Message):
# Remove the user from the pending list
if _userID in self.pending_ids:
self.pending_ids.remove(_userID)
-
diff --git a/cogs/ai/tasks/avatartools.py b/cogs/ai/tasks/avatartools.py
index 62afb038..0aecc899 100644
--- a/cogs/ai/tasks/avatartools.py
+++ b/cogs/ai/tasks/avatartools.py
@@ -5,7 +5,6 @@
from discord.ext import commands
from discord import Member, DiscordException
from os import environ
-import asyncio
import base64
import discord
import importlib
@@ -197,13 +196,15 @@ async def remix(self, ctx: discord.ApplicationContext, style: str, user: Member
_params = {
"prompt": _crafted_prompt,
"image_urls": [_avatar_url],
+ "limit_generations": True,
+ "num_images": 1
}
# Run the image generation
- _imageURL = await run_image(
- model_name="gemini-25-flash-image/edit",
+ _image_payload = await run_image(
+ model_name="nano-banana-2/edit",
aiohttp_session=self.bot.aiohttp_instance,
- send_url_only=True,
+ send_bytes=False,
**_params
)
@@ -213,8 +214,8 @@ async def remix(self, ctx: discord.ApplicationContext, style: str, user: Member
description=f"Here's a remixed avatar of {_user.name}",
color=discord.Color.random()
)
- _embed.set_image(url=_imageURL[0])
- _embed.set_footer(text=f"Powered by Nano Banana")
+ _embed.set_image(url=_image_payload["images_urls"][0])
+ _embed.set_footer(text=f"Powered by Nano Banana 2")
await ctx.respond(embed=_embed, ephemeral=True)
@remix.error
diff --git a/core/startup.py b/core/startup.py
index 2abe6b55..289c2d2f 100644
--- a/core/startup.py
+++ b/core/startup.py
@@ -1,3 +1,6 @@
+# plugins
+from plugins.storage_plugin import StoragePluginLoader
+
from discord.ext import bridge
from google import genai
from os import environ
@@ -10,7 +13,22 @@ class SubClassBotPlugServices(bridge.Bot):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
- async def start_services(self):
+ # Load storage plugin
+ self.plugins_storage = StoragePluginLoader()
+
+ def start_plugins(self):
+ # Start storage plugin client if it has start method
+ if hasattr(self.plugins_storage, 'start_storage_client'):
+ self.plugins_storage.start_storage_client()
+ logging.info("Storage plugin client started successfully")
+
+ async def stop_plugins(self):
+ # Close storage plugin client if it has close method
+ if hasattr(self.plugins_storage, 'close_storage_client'):
+ await self.plugins_storage.close_storage_client()
+ logging.info("Storage plugin client closed successfully")
+
+ def start_services(self):
# Gemini API Client
self.gemini_api_client = genai.Client(api_key=environ.get("GEMINI_API_KEY"))
logging.info("Gemini API client initialized successfully")
@@ -27,14 +45,6 @@ async def start_services(self):
)
logging.info("OpenAI client for OpenRouter initialized successfully")
- # OpenAI client for Groq based models
- # NOTE: Use litellm SDK instead of OpenAI SDK for Groq models
- #self.openai_client_groq = openai.AsyncClient(
- # api_key=environ.get("GROQ_API_KEY"),
- # base_url="https://api.groq.com/openai/v1"
- #)
- #logging.info("OpenAI client for Groq initialized successfully")
-
async def stop_services(self):
# Close aiohttp client sessions
await self.aiohttp_instance.close()
diff --git a/main.py b/main.py
index 40d20ce8..b8087e35 100644
--- a/main.py
+++ b/main.py
@@ -18,8 +18,8 @@
dotenv.load_dotenv("dev.env")
# Logging
-logging.basicConfig(format='%(levelname)s %(asctime)s [%(pathname)s:%(lineno)d - %(module)s.%(funcName)s()]: %(message)s',
- datefmt='%m/%d/%Y %I:%M:%S %p',
+logging.basicConfig(format='%(levelname)s %(asctime)s [%(pathname)s:%(lineno)d - %(module)s.%(funcName)s()]: %(message)s',
+ datefmt='%m/%d/%Y %I:%M:%S %p',
level=logging.INFO)
# Check if TOKEN is set
@@ -51,14 +51,17 @@ def __init__(self, *args, **kwargs):
mkdir(environ.get("TEMP_DIR"))
# Initialize SDK clients
- self.loop.create_task(self.start_services())
+ self.start_services()
logging.info("Services initialized successfully")
+ # Initialize Plugins
+ self.start_plugins()
+ logging.info("Plugins initialized successfully")
+
# HTTP Client
self.aiohttp_instance = aiohttp.ClientSession(loop=self.loop)
logging.info("HTTP client session initialized successfully")
-
def _lock_socket_instance(self, port):
try:
self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -67,15 +70,19 @@ def _lock_socket_instance(self, port):
except socket.error as e:
logging.error("Failed to bind socket port: %s, reason: %s", port, str(e))
raise e
-
+
async def on_ready(self):
- await self.change_presence(activity=discord.Game(f"Preparing the bot for it's first use..."))
+ await self.change_presence(activity=discord.Game("Preparing the bot for it's first use..."))
#https://stackoverflow.com/a/65780398 - for multiple statuses
- await self.change_presence(activity=discord.Game(f"@ me to get started!"))
+ await self.change_presence(activity=discord.Game("@ me to get started!"))
logging.info("%s is ready and online!", self.user)
# Shutdown the bot
async def close(self):
+ # Close Plugins
+ await self.stop_plugins()
+ logging.info("Plugins stopped successfully")
+
# Close services
await self.stop_services()
logging.info("Services stopped successfully")
@@ -84,7 +91,7 @@ async def close(self):
if Path(environ.get("TEMP_DIR", "temp")).exists():
for file in Path(environ.get("TEMP_DIR", "temp")).iterdir():
await aiofiles.os.remove(file)
-
+
# Close socket
self._socket.close()
@@ -102,7 +109,7 @@ async def on_message(message: discord.Message):
if message.author == bot.user:
return
-
+
# Check if the bot was only mentioned without any content or image attachments
# On generative ask command, the same logic is used but it will just invoke return and the bot will respond with this
if bot.user.mentioned_in(message) \
@@ -113,15 +120,15 @@ async def on_message(message: discord.Message):
I am an AI bot and I can also make your server fun and entertaining! 🎉
You just pinged me, but what can I do for you? 🤔
-
- - You can ask me anything by typing **/ask** and get started or by mentioning me again but with a message
- - You can access most of my useful commands with **/**slash commands or use `{bot.command_prefix}help` to see the list prefixed commands I have.
+
+ - You can ask me anything by mentioning me with a message
+ - You can access most of my useful commands with **/**slash commands or ask me what I can do to pull my internal knowledge base.
- You can access my apps by **tapping and holding any message** or **clicking the three-dots menu** and click **Apps** to see the list of apps I have
-
+
You can ask me questions, such as:
- - **@{bot.user.name}** How many R's in the word strawberry?
- - **/ask** `prompt:`Can you tell me a joke?
- - Hey **@{bot.user.name}** can you give me quotes for today?
+ - **@{bot.user.name}** How many R's in the word strawberry?
+ - Hey **@{bot.user.name}** can you give me quotes for today?
+ - **@{bot.user.name}** list me your slash commands
If you have any questions, you can visit my [documentation or contact me here](https://zavocc.github.io)"""))
@@ -135,4 +142,4 @@ async def on_message(message: discord.Message):
logging.error("cogs.%s failed to load, skipping... The following error of the cog: %s", command, e)
continue
-bot.run(environ.get('TOKEN'))
+bot.run(environ.get('TOKEN'))
diff --git a/models/chat_utils.py b/models/chat_utils.py
index d0cf7e38..10b06f99 100644
--- a/models/chat_utils.py
+++ b/models/chat_utils.py
@@ -2,10 +2,19 @@
from core.database import History
from core.exceptions import CustomErrorMessage
import aiofiles
+import aiohttp
import logging
import yaml
# Methods for generative_chat.py
+# Download File Attachments method
+async def download_attachment_to_file(attachment_url: str, file_path: str, aiohttp_session: aiohttp.ClientSession) -> None:
+ async with aiohttp_session.get(attachment_url, allow_redirects=True) as file_dl:
+ async with aiofiles.open(file_path, "wb") as filepath:
+ async for _chunk in file_dl.content.iter_chunked(8192):
+ await filepath.write(_chunk)
+ logging.info("File downloaded successfully to %s", file_path)
+
# Fetch and validate models
async def fetch_model(model_alias: str) -> ModelProps:
# Load the models list from YAML file
diff --git a/models/providers/google/completion.py b/models/providers/google/completion.py
index c6a9057c..441e3486 100644
--- a/models/providers/google/completion.py
+++ b/models/providers/google/completion.py
@@ -99,38 +99,29 @@ async def send_message(self, prompt: str, chat_history: list = None, system_inst
if not self.model_props.model_id:
raise ValueError("Model is required, chose nothing")
- # Additional model params
- # Log
- if self.model_props.additional_params:
- logging.info("Merging additional_params into model_params: %s", self.model_props.additional_params)
-
- # Reverse merge
- _merged_params = self.model_props.additional_params.copy() if self.model_props.additional_params else {}
-
- # Remove model and messages if they exist in additional_params to avoid conflicts
- logging.info("Removing conflicting keys from additional_params if present")
- # Remove core conflicting keys
- _merged_params.pop("system_instruction", None)
- _merged_params.pop("tools", None)
-
- # Remove others found in model_params
- for _keys in self.model_params.keys():
- if _keys in _merged_params:
- logging.info("Removing key from additional_params to avoid conflict: %s", _keys)
- _merged_params.pop(_keys, None)
-
- # Update with model defaults
- _merged_params.update(self.model_params)
+ # Merge additional model params with defaults.
+ # Order matters: additional_params is loaded first, model_params overrides conflicts.
+ _additional_params = self.model_props.additional_params or {}
+ if _additional_params:
+ logging.info("Merging additional_params into model_params: %s", _additional_params)
+ _merged_params = {
+ **_additional_params,
+ **self.model_params,
+ }
+ logging.info("Final merged model parameters: %s", _merged_params)
+
+ # Keep system instruction authoritative at request time.
+ _request_config = {
+ **_merged_params,
+ "system_instruction": system_instructions or None,
+ }
# Generate
try:
_response: google_genai_types.GenerateContentResponse = await self.google_genai_client.aio.models.generate_content(
model=self.model_props.model_id,
contents=chat_history,
- config={
- "system_instruction": system_instructions or None,
- **_merged_params
- }
+ config=_request_config
)
except google_genai_errors.ClientError as e:
# Attempt to clear all file URLs since they may be expired
@@ -190,10 +181,7 @@ async def send_message(self, prompt: str, chat_history: list = None, system_inst
_response: google_genai_types.GenerateContentResponse = await self.google_genai_client.aio.models.generate_content(
model=self.model_props.model_id,
contents=chat_history,
- config={
- **self.model_params,
- "system_instruction": system_instructions or None
- }
+ config=_request_config
)
# Check if we need to run tools again, this block will stop the loop and response should have been sent
diff --git a/models/providers/litellm/completion.py b/models/providers/litellm/completion.py
index 7c40d74a..830a8586 100644
--- a/models/providers/litellm/completion.py
+++ b/models/providers/litellm/completion.py
@@ -1,17 +1,17 @@
from .utils import LiteLLMUtils
from core.database import History as typehint_History
-from core.exceptions import CustomErrorMessage
+
from models.validation import ModelParamsOpenAIDefaults as typehint_ModelParams
from models.validation import ModelProps as typehint_ModelProps
-from os import environ
+
import discord as typehint_Discord
import litellm
import logging
import models.core
class ChatSession(LiteLLMUtils):
- def __init__(self,
- user_id: int,
+ def __init__(self,
+ user_id: int,
model_props: typehint_ModelProps,
discord_bot: typehint_Discord.Bot = None,
discord_message: typehint_Discord.Message = None,
@@ -48,11 +48,11 @@ def __init__(self,
# Database
self.db_conn: typehint_History = db_conn or None
-
+
# Chat
async def send_message(self, prompt: str, chat_history: list = None, system_instructions: str = None):
# Load chat history and system instructions
- if chat_history is None or type(chat_history) != list:
+ if chat_history is None or type(chat_history) is not list:
chat_history = []
if self.model_props.enable_system_instruction and system_instructions:
chat_history.append({
@@ -90,38 +90,29 @@ async def send_message(self, prompt: str, chat_history: list = None, system_inst
if not self.model_props.model_id:
raise ValueError("Model is required, chose nothing")
- # Additional model params
- # Log
- if self.model_props.additional_params:
- logging.info("Merging additional_params into model_params: %s", self.model_props.additional_params)
-
- # Reverse merge
- _merged_params = self.model_props.additional_params.copy() if self.model_props.additional_params else {}
-
- # Remove model and messages if they exist in additional_params to avoid conflicts
- logging.info("Removing conflicting keys from additional_params if present")
- # Remove core conflicting keys
- _merged_params.pop("model", None)
- _merged_params.pop("messages", None)
- _merged_params.pop("tools", None)
-
- # Remove others found in model_params
- for _keys in self.model_params.keys():
- if _keys in _merged_params:
- logging.info("Removing key from additional_params to avoid conflict: %s", _keys)
- _merged_params.pop(_keys, None)
-
- # Update with model defaults
- _merged_params.update(self.model_params)
+ # Merge additional model params with defaults.
+ # Order matters: additional_params is loaded first, model_params overrides conflicts.
+ _additional_params = self.model_props.additional_params or {}
+ if _additional_params:
+ logging.info("Merging additional_params into model_params: %s", _additional_params)
+ _merged_params = {
+ **_additional_params,
+ **self.model_params,
+ }
logging.info("Final merged model parameters: %s", _merged_params)
+
+ # Keep request-owned fields authoritative.
+ _base_request_kwargs = {
+ **_merged_params,
+ "model": self.model_props.model_id,
+ }
# Drop unnecessary params
litellm.drop_params = True
- _response = await litellm.acompletion(
- model=self.model_props.model_id,
- messages=chat_history,
- **_merged_params
- )
+ _response = await litellm.acompletion(**{
+ **_base_request_kwargs,
+ "messages": chat_history,
+ })
# Check for tool calls
while True:
@@ -144,11 +135,10 @@ async def send_message(self, prompt: str, chat_history: list = None, system_inst
chat_history.extend(_tool_parts)
# Run the response the second time
- _response = await litellm.acompletion(
- model=self.model_props.model_id,
- messages=chat_history,
- **_merged_params
- )
+ _response = await litellm.acompletion(**{
+ **_base_request_kwargs,
+ "messages": chat_history,
+ })
# Check if we need to run tools again, this block will stop the loop and send the response
if not _response.choices[0].message.tool_calls:
diff --git a/models/providers/litellm/utils.py b/models/providers/litellm/utils.py
index 1f1949e3..c77c7a9e 100644
--- a/models/providers/litellm/utils.py
+++ b/models/providers/litellm/utils.py
@@ -1,5 +1,11 @@
from core.exceptions import CustomErrorMessage
+from models.chat_utils import download_attachment_to_file
+from os import environ
+from pathlib import Path
from tools.utils import fetch_tool_schema, return_builtin_tool_object, return_api_tools_object
+from uuid import uuid4
+import aiofiles
+import aiohttp
import discord as typehint_Discord
import json
import logging
@@ -8,6 +14,7 @@ class LiteLLMUtils:
# Handle multimodal
# Remove one per image restrictions so we'll just
async def upload_files(self, attachment: typehint_Discord.Attachment, extra_metadata: str = None):
+ # Handle multimodal
# Check if the attachment is an image
if not attachment.content_type.startswith("image"):
raise CustomErrorMessage("⚠️ This model only supports image attachments")
@@ -15,11 +22,54 @@ async def upload_files(self, attachment: typehint_Discord.Attachment, extra_meta
if not hasattr(self, "uploaded_files"):
self.uploaded_files = []
+ # Test if we have "self.discord_bot.aiohttp_instance"
+ if hasattr(self.discord_bot, "aiohttp_instance"):
+ logging.info("Found aiohttp_instance in discord bot, using that for downloading the file")
+ _aiohttp_session: aiohttp.ClientSession = self.discord_bot.aiohttp_instance
+ else:
+ # Raise exception since we don't have a session
+ logging.warning("No aiohttp_instance found in discord bot, aborting")
+ raise CustomErrorMessage("⚠️ An error has occurred while processing the file, please try again later.")
+
+ # Check if we have 'plugins_storage' from discord_bot
+ if not hasattr(self.discord_bot, "plugins_storage"):
+ logging.warning("No plugins_storage found in discord bot, aborting file upload")
+ raise CustomErrorMessage("⚠️ An error has occurred while processing the file, please try again later.")
+
+ # Grab filename
+ _filename = f"{environ.get('TEMP_DIR')}/JAKEY.{uuid4()}.{attachment.filename}"
+ try:
+ # Check if enabled is set in config
+ if self.discord_bot.plugins_storage.enabled:
+ # Download file using shared chunked helper.
+ await download_attachment_to_file(
+ attachment_url=attachment.url,
+ file_path=_filename,
+ aiohttp_session=_aiohttp_session,
+ )
+
+ # Upload the file to blob storage
+ _blob_url = await self.discord_bot.plugins_storage.upload_files(file_path=_filename, file_name=Path(_filename).name)
+
+ # Log
+ logging.info("The file %s has been uploaded to storage successfully, direct link URL: %s", attachment.filename, _blob_url)
+ else:
+ # If not enabled, use the attachment URL directly but with a warning about TTL
+ _blob_url = attachment.url
+ logging.warning("Storage plugin is disabled, attached with filename %s using Discord CDN URL directly which may expire: %s", attachment.filename, _blob_url)
+ except Exception as e:
+ # Raise exception
+ raise e
+ finally:
+ # Remove the file if it exists ensuring no data persists even on failure
+ if Path(_filename).exists():
+ await aiofiles.os.remove(_filename)
+
self.uploaded_files.append(
{
"type": "image_url",
"image_url": {
- "url": attachment.url
+ "url": _blob_url
}
}
)
diff --git a/models/providers/openai/completion.py b/models/providers/openai/completion.py
index 8b9263f3..68e6e827 100644
--- a/models/providers/openai/completion.py
+++ b/models/providers/openai/completion.py
@@ -1,6 +1,5 @@
from .utils import OpenAIUtils
from core.database import History as typehint_History
-from core.exceptions import CustomErrorMessage
from models.validation import ModelParamsOpenAIDefaults as typehint_ModelParams
from models.validation import ModelProps as typehint_ModelProps
from os import environ
@@ -99,42 +98,35 @@ async def send_message(self, prompt: str, chat_history: list = None, system_inst
if not self.model_props.model_id:
raise ValueError("Model is required, chose nothing")
- # Additional model params
- # Log
- if self.model_props.additional_params:
- logging.info("Merging additional_params into model_params: %s", self.model_props.additional_params)
-
- # Reverse merge
- _merged_params = self.model_props.additional_params.copy() if self.model_props.additional_params else {}
-
- # Remove model and messages if they exist in additional_params to avoid conflicts
- logging.info("Removing conflicting keys from additional_params if present")
- # Remove core conflicting keys
- _merged_params.pop("model", None)
- _merged_params.pop("messages", None)
- _merged_params.pop("tools", None)
-
- # Check if reasoning_effort exists as max_tokens cannot coexist and must be max_completion_tokens instead
- if "reasoning_effort" in _merged_params:
+ # Merge additional model params with defaults.
+ # Order matters: additional_params is loaded first, model_params overrides conflicts.
+ _additional_params = (self.model_props.additional_params or {}).copy()
+ _effective_model_params = self.model_params.copy()
+ if _additional_params:
+ logging.info("Merging additional_params into model_params: %s", _additional_params)
+
+ # reasoning_effort cannot coexist with max_tokens; map defaults to max_completion_tokens.
+ if "reasoning_effort" in _additional_params:
logging.info("reasoning_effort found in additional_params, converting from max_tokens to max_completion_tokens")
- _merged_params["max_completion_tokens"] = self.model_params.pop("max_tokens", 16000)
+ _additional_params["max_completion_tokens"] = _effective_model_params.pop("max_tokens", 16000)
- # Remove others found in model_params
- for _keys in self.model_params.keys():
- if _keys in _merged_params:
- logging.info("Removing key from additional_params to avoid conflict: %s", _keys)
- _merged_params.pop(_keys, None)
-
- # Update with model defaults
- _merged_params.update(self.model_params)
+ _merged_params = {
+ **_additional_params,
+ **_effective_model_params,
+ }
logging.info("Final merged model parameters: %s", _merged_params)
+
+ # Keep request-owned fields authoritative.
+ _base_request_kwargs = {
+ **_merged_params,
+ "model": self.model_props.model_id,
+ }
# Generate responses
- _response = await self.openai_client.chat.completions.create(
- model=self.model_props.model_id,
- messages=chat_history,
- **_merged_params
- )
+ _response = await self.openai_client.chat.completions.create(**{
+ **_base_request_kwargs,
+ "messages": chat_history,
+ })
# Check for tool calls
while True:
@@ -157,11 +149,10 @@ async def send_message(self, prompt: str, chat_history: list = None, system_inst
chat_history.extend(_tool_parts)
# Run the response the second time
- _response = await self.openai_client.chat.completions.create(
- model=self.model_props.model_id,
- messages=chat_history,
- **_merged_params
- )
+ _response = await self.openai_client.chat.completions.create(**{
+ **_base_request_kwargs,
+ "messages": chat_history,
+ })
# Check if we need to run tools again, this block will stop the loop and send the response
if not _response.choices[0].message.tool_calls:
diff --git a/models/providers/openai/utils.py b/models/providers/openai/utils.py
index 037e9ffb..09b84ce6 100644
--- a/models/providers/openai/utils.py
+++ b/models/providers/openai/utils.py
@@ -1,12 +1,17 @@
from core.exceptions import CustomErrorMessage
+from models.chat_utils import download_attachment_to_file
+from os import environ
+from pathlib import Path
from tools.utils import fetch_tool_schema, return_builtin_tool_object, return_api_tools_object
+from uuid import uuid4
+import aiofiles
+import aiohttp
import discord as typehint_Discord
import json
import logging
class OpenAIUtils:
# Handle multimodal
- # Remove one per image restrictions so we'll just
async def upload_files(self, attachment: typehint_Discord.Attachment, extra_metadata: str = None):
# Check if the attachment is an image
if not attachment.content_type.startswith("image"):
@@ -15,11 +20,54 @@ async def upload_files(self, attachment: typehint_Discord.Attachment, extra_meta
if not hasattr(self, "uploaded_files"):
self.uploaded_files = []
+ # Test if we have "self.discord_bot.aiohttp_instance"
+ if hasattr(self.discord_bot, "aiohttp_instance"):
+ logging.info("Found aiohttp_instance in discord bot, using that for downloading the file")
+ _aiohttp_session: aiohttp.ClientSession = self.discord_bot.aiohttp_instance
+ else:
+ # Raise exception since we don't have a session
+ logging.warning("No aiohttp_instance found in discord bot, aborting")
+ raise CustomErrorMessage("⚠️ An error has occurred while processing the file, please try again later.")
+
+ # Check if we have 'plugins_storage' from discord_bot
+ if not hasattr(self.discord_bot, "plugins_storage"):
+ logging.warning("No plugins_storage found in discord bot, aborting file upload")
+ raise CustomErrorMessage("⚠️ An error has occurred while processing the file, please try again later.")
+
+ # Grab filename
+ _filename = f"{environ.get('TEMP_DIR')}/JAKEY.{uuid4()}.{attachment.filename}"
+ try:
+ # Check if enabled is set in config
+ if self.discord_bot.plugins_storage.enabled:
+ # Download file using shared chunked helper.
+ await download_attachment_to_file(
+ attachment_url=attachment.url,
+ file_path=_filename,
+ aiohttp_session=_aiohttp_session,
+ )
+
+ # Upload the file to blob storage
+ _blob_url = await self.discord_bot.plugins_storage.upload_files(file_path=_filename, file_name=Path(_filename).name)
+
+ # Log
+ logging.info("The file %s has been uploaded to storage successfully, direct link URL: %s", attachment.filename, _blob_url)
+ else:
+ # If not enabled, use the attachment URL directly but with a warning about TTL
+ _blob_url = attachment.url
+ logging.warning("Storage plugin is disabled, attached with filename %s using Discord CDN URL directly which may expire: %s", attachment.filename, _blob_url)
+ except Exception as e:
+ # Raise exception
+ raise e
+ finally:
+ # Remove the file if it exists ensuring no data persists even on failure
+ if Path(_filename).exists():
+ await aiofiles.os.remove(_filename)
+
self.uploaded_files.append(
{
"type": "image_url",
"image_url": {
- "url": attachment.url
+ "url": _blob_url
}
}
)
diff --git a/models/tasks/media/fal_ai.py b/models/tasks/media/fal_ai.py
index 4bd84693..09197aee 100644
--- a/models/tasks/media/fal_ai.py
+++ b/models/tasks/media/fal_ai.py
@@ -1,5 +1,5 @@
from os import environ
-from typing import Union
+from typing import Any, Union
import aiohttp
import fal_client
@@ -7,13 +7,13 @@
async def run_image(
model_name: str,
aiohttp_session: aiohttp.ClientSession = None,
- send_url_only: bool = False,
+ send_bytes: bool = True,
**additional_client_args
-) -> Union[list[bytes], list[str]]:
+) -> dict[str, list[Any]]:
# Check if we have aiohttp session supplied or use the default one
- if not send_url_only:
+ if send_bytes:
if not aiohttp_session:
- raise ValueError("aiohttp_session must be provided if send_url_only is False")
+ raise ValueError("aiohttp_session must be provided if send_bytes is True")
_aiohttp_session = aiohttp_session
# check if FAL_KEY is set
@@ -37,25 +37,32 @@ async def run_image(
# Wait for the result
_result = await _status.get()
- if send_url_only:
- return [_image["url"] for _image in _result["images"]]
- else:
+ # Extract the image URLs.
+ _images_urls = [_image["url"] for _image in _result["images"]]
+ _response_payload: dict[str, list[Any]] = {
+ "images_urls": _images_urls,
+ "images_in_bytes": []
+ }
+
+ if send_bytes:
# Image in bytes
_images_in_bytes = []
# Download images
- for _images in _result["images"]:
- async with _aiohttp_session.get(_images["url"]) as response:
+ for _image_url in _images_urls:
+ async with _aiohttp_session.get(_image_url) as response:
if response.status == 200:
_image_data = await response.read()
# Send the image
_images_in_bytes.append(_image_data)
else:
- raise ValueError(f"Failed to download image from {_images}, status code: {response.status}")
+ raise ValueError(f"Failed to download image from {_image_url}, status code: {response.status}")
- # Cleanup
- return _images_in_bytes
+ _response_payload["images_in_bytes"] = _images_in_bytes
+
+ # Cleanup
+ return _response_payload
async def run_audio(
model_name: str,
diff --git a/models/tasks/text/openai.py b/models/tasks/text/openai.py
index 992d5472..acdd56b3 100644
--- a/models/tasks/text/openai.py
+++ b/models/tasks/text/openai.py
@@ -44,6 +44,10 @@ async def completion(prompt: Union[str, list],
**_oparams
)
+ # If client session was not provided, we should close the default client session to prevent resource leaks
+ if not client_session:
+ await _client.close()
+
if return_text:
return _response.choices[0].message.content
else:
diff --git a/models/validation.py b/models/validation.py
index 4f607a88..b8078714 100644
--- a/models/validation.py
+++ b/models/validation.py
@@ -41,9 +41,7 @@ class GeminiSafetySetting(BaseModel):
class ModelParamsGeminiDefaults(BaseModel):
candidate_count: int = Field(default=1)
max_output_tokens: int = Field(default=8192)
- temperature: float = Field(default=0.7)
- top_p: float = Field(default=0.95)
- top_k: int = Field(default=40)
+ temperature: float = Field(default=1)
safety_settings: List[GeminiSafetySetting] = Field(
default=[
GeminiSafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="BLOCK_MEDIUM_AND_ABOVE"),
diff --git a/plugins/abc/storage.py b/plugins/abc/storage.py
new file mode 100644
index 00000000..1a2880c9
--- /dev/null
+++ b/plugins/abc/storage.py
@@ -0,0 +1,20 @@
+from abc import ABC, abstractmethod
+
+# Storage ABC without client object.
+class StorageOneOff(ABC):
+ @abstractmethod
+ async def upload_files(file_path: str, file_name: str):
+ pass
+
+class Storage(ABC):
+ @abstractmethod
+ def start_storage_client(self):
+ pass
+
+ @abstractmethod
+ async def upload_files(file_path: str, file_name: str) -> str:
+ pass
+
+ @abstractmethod
+ async def close_storage_client(self):
+ pass
\ No newline at end of file
diff --git a/plugins/config.yaml b/plugins/config.yaml
new file mode 100644
index 00000000..f3564a2c
--- /dev/null
+++ b/plugins/config.yaml
@@ -0,0 +1,4 @@
+# Used in /models/providers/litellm/utils.py and /models/providers/openai/utils.py for file attachments
+storage:
+ name: azure
+ enabled: true # If disabled, it will use Discord CDN but files would expire
\ No newline at end of file
diff --git a/plugins/requirements.txt b/plugins/requirements.txt
new file mode 100644
index 00000000..f26f22f6
--- /dev/null
+++ b/plugins/requirements.txt
@@ -0,0 +1 @@
+azure-storage-blob
diff --git a/plugins/storage/azure.py b/plugins/storage/azure.py
new file mode 100644
index 00000000..c8e12d17
--- /dev/null
+++ b/plugins/storage/azure.py
@@ -0,0 +1,54 @@
+from azure.storage.blob.aio import BlobServiceClient
+from azure.storage.blob import ContentSettings
+from core.exceptions import CustomErrorMessage
+from os import environ
+from plugins.abc.storage import Storage
+import aiofiles
+import filetype
+import logging
+
+class StoragePlugin(Storage):
+ def __init__(self):
+ self.blob_service_client = None
+
+ def start_storage_client(self):
+ self.blob_service_client = BlobServiceClient.from_connection_string(
+ conn_str=environ.get("AZURE_STORAGE_CONNECTION_STRING"),
+ max_block_size=8*1024*1024, # 8 MB chunk size
+ max_single_put_size=8*1024*1024
+ )
+ logging.info("Blob service client initialized successfully")
+
+ async def upload_files(self, file_path: str, file_name: str) -> str:
+ # Check if we have a blob service client
+ if not self.blob_service_client:
+ _blob_service_client = BlobServiceClient.from_connection_string(environ.get("AZURE_STORAGE_CONNECTION_STRING"))
+ else:
+ _blob_service_client = self.blob_service_client
+
+ # Upload the file
+ try:
+ _blob_client = _blob_service_client.get_blob_client(container=environ.get("AZURE_STORAGE_CONTAINER_NAME"), blob=file_name)
+
+ async with aiofiles.open(file_path, "rb") as _file_data:
+ _file_bytes = await _file_data.read()
+ _mime_type = filetype.guess(_file_bytes)
+ await _blob_client.upload_blob(_file_bytes,
+ overwrite=False,
+ content_settings=ContentSettings(content_type=_mime_type.mime if _mime_type else "application/octet-stream"))
+
+ # Return the blob URL
+ return _blob_client.url
+ except Exception as e:
+ logging.error("Error uploading file %s to blob storage, reason: %s", file_name, e)
+ raise CustomErrorMessage("⚠️ There was an error uploading your file, please try again later.")
+ finally:
+ if not self.blob_service_client:
+ logging.info("Closing one-off BlobServiceClient instance.")
+ await _blob_service_client.close()
+
+ async def close_storage_client(self):
+ # Close blob service client sessions if any
+ if hasattr(self, 'blob_service_client'):
+ await self.blob_service_client.close()
+ logging.info("Blob service client session closed successfully")
\ No newline at end of file
diff --git a/plugins/storage_plugin.py b/plugins/storage_plugin.py
new file mode 100644
index 00000000..dc2c4bae
--- /dev/null
+++ b/plugins/storage_plugin.py
@@ -0,0 +1,39 @@
+from plugins.abc.storage import Storage, StorageOneOff
+from plugins.validation import PluginsConfig
+from pydantic import ValidationError
+from typing import Union
+import importlib
+import yaml
+
+class StoragePluginLoader:
+ def __init__(self):
+ # Read and validate plugin config.
+ with open("plugins/config.yaml", "r") as f:
+ self._loaded_config = yaml.safe_load(f) or {}
+
+ try:
+ _validated_config = PluginsConfig(**self._loaded_config)
+ self.storage_config = _validated_config.storage
+ except ValidationError as e:
+ raise ValueError(f"Storage configuration validation failed: {e}") from e
+
+ _storage_name = self.storage_config.name
+ self._imported_module = importlib.import_module(f"plugins.storage.{_storage_name}")
+
+ # Check if imported module have StoragePlugin class
+ if not hasattr(self._imported_module, "StoragePlugin"):
+ raise AttributeError(f"The storage plugin module 'plugins.storage.{_storage_name}' does not have a 'StoragePlugin' class.")
+
+ self._storagepluginobject: Union[Storage, StorageOneOff] = self._imported_module.StoragePlugin()
+
+ # Expose storage enabled flag from validated config.
+ self.enabled = self.storage_config.enabled
+
+ if isinstance(self._storagepluginobject, Storage):
+ self.start_storage_client = self._storagepluginobject.start_storage_client
+ self.close_storage_client = self._storagepluginobject.close_storage_client
+ self.upload_files = self._storagepluginobject.upload_files
+ elif isinstance(self._storagepluginobject, StorageOneOff):
+ self.upload_files = self._storagepluginobject.upload_files
+ else:
+ raise TypeError("The storage plugin must implement either Storage or StorageOneOff interface.")
\ No newline at end of file
diff --git a/plugins/validation.py b/plugins/validation.py
new file mode 100644
index 00000000..27124efc
--- /dev/null
+++ b/plugins/validation.py
@@ -0,0 +1,18 @@
+from pydantic import BaseModel, Field, field_validator
+
+
+class StorageConfig(BaseModel):
+ name: str = Field(..., description="Storage backend plugin module name")
+ enabled: bool = Field(default=False, description="If disabled, it will use Discord CDN instead but not recommended due to TTL.")
+
+ @field_validator("name")
+ @classmethod
+ def validate_name(cls, value: str) -> str:
+ _normalized = value.strip().lower()
+ if not _normalized:
+ raise ValueError("Storage plugin name cannot be empty")
+ return _normalized
+
+
+class PluginsConfig(BaseModel):
+ storage: StorageConfig
\ No newline at end of file
diff --git a/pyrightconfig.json b/pyrightconfig.json
new file mode 100644
index 00000000..a8fcf120
--- /dev/null
+++ b/pyrightconfig.json
@@ -0,0 +1,12 @@
+{
+ "typeCheckingMode": "off",
+ "include": ["."],
+ "exclude": [
+ "**/node_modules",
+ "**/__pycache__",
+ "**/.venv",
+ "**/venv",
+ "**/build",
+ "**/dist"
+ ]
+}
diff --git a/run.sh b/run.sh
new file mode 100755
index 00000000..cdb2030d
--- /dev/null
+++ b/run.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+JAKEYBOT_HOME=/jakeybot
+
+# Test if /data/models.yaml and /data/text_models.yaml exist and symlink
+# Only if it's readable for the current user
+if [ -f /data/models.yaml ] && [ -r /data/models.yaml ]; then
+ echo "[*] /data/models.yaml found, symlinking..."
+ ln -frs /data/models.yaml $JAKEYBOT_HOME/data/models.yaml
+ echo "[*] /data/models.yaml symlinked successfully."
+else
+ echo "[!] /data/models.yaml is not readable or does not exist. Using default models.yaml."
+fi
+
+# Same for text_models.yaml
+if [ -f /data/text_models.yaml ] && [ -r /data/text_models.yaml ]; then
+ echo "[*] /data/text_models.yaml found, symlinking..."
+ ln -frs /data/text_models.yaml $JAKEYBOT_HOME/data/text_models.yaml
+ echo "[*] /data/text_models.yaml symlinked successfully."
+else
+ echo "[!] /data/text_models.yaml is not readable or does not exist. Using default text_models.yaml."
+fi
+
+# Run the main application
+python3 main.py
\ No newline at end of file
diff --git a/tools/apis/AudioTools/tool.py b/tools/apis/AudioTools/tool.py
index c977dec9..1e7149f8 100644
--- a/tools/apis/AudioTools/tool.py
+++ b/tools/apis/AudioTools/tool.py
@@ -1,5 +1,4 @@
from models.tasks.media.fal_ai import run_audio
-from os import environ
import aiohttp
import datetime
import discord
diff --git a/tools/apis/ImageGen/manifest.yaml b/tools/apis/ImageGen/manifest.yaml
index 70894ec3..c3d05051 100644
--- a/tools/apis/ImageGen/manifest.yaml
+++ b/tools/apis/ImageGen/manifest.yaml
@@ -1,34 +1,5 @@
tool_name: Image Generation and Editing
tool_list:
- - name: imagen_image_gen
- description: Generate high quality image using Imagen 4 Ultra, A diffusion based model. This provides faster way of photorealistic and surrealistic image generation.
- parameters:
- type: object
- properties:
- prompt:
- type: string
- description: The prompt to generate the image.
- aspect_ratio:
- type: string
- enum:
- - "1:1"
- - "9:16"
- - "16:9"
- - "4:3"
- - "3:4"
- description: The aspect ratio of the generated image.
- resolution:
- type: string
- enum:
- - 1K
- - 2K
- description: Using native resolution which quality to output for the generated image.
- negative_prompt:
- type: string
- description: The negative prompt to avoid certain elements in the generated image. This is only supported for Imagen 4.
- required:
- - prompt
-
- name: gpt_image_gen
description: Generate high fidelity, diverse, and knowledge-driven images using GPT-4o. Use this to provide more style variety, stronger text inclusion, and stronger instruction following. Unlike DALL-E 3, this is the latest autoregressive image generation model.
parameters:
@@ -70,9 +41,9 @@ tool_list:
required:
- prompt
- # Image editing tool powered by Nano Banana and Seedream 4
- - name: nb_sd_image_editor
- description: Edit images with strong image referencing powered by Gemini 2.5 Flash (Nano Banana) and Seedream 4.
+ # Image Generation and Editing tool powered by Nano Banana 2
+ - name: nano_banana_ii_gen
+ description: Create or edit images with 4K support using Nano Banana 2 powered by Gemini 3.1 Flash Image. Offers superior quality plus speed than 4o, stronger world knowledge such as infographics, factuality, and consistency. Including prompt adherence. It is the strongest image model to date.
parameters:
type: object
properties:
@@ -83,13 +54,29 @@ tool_list:
type: array
items:
type: string
- description: The URL of the image to edit. It's recommended to keep the image in order from attached images for consistency.
- model:
+ description: The URL of the image to be used as reference for editing or reference. It's recommended to put the images in order same as from user prompt's ordering.
+ aspect_ratio:
type: string
enum:
- - gemini-25-flash-image/edit
- - bytedance/seedream/v4/edit
- description: The model to use for image editing. It's recommended to use Gemini 2.5 Flash for precise and consistently locked edits like photo restoration, while Seedream 4 for quality 4K edits, upscales, and remasters but may result in drift.
+ - "21:9"
+ - "16:9"
+ - "3:2"
+ - "4:3"
+ - "5:4"
+ - "1:1"
+ - "4:5"
+ - "3:4"
+ - "2:3"
+ - "9:16"
+ resolution:
+ type: string
+ enum:
+ - 1K
+ - 2K
+ - 4K
+ description: Native resolution, 2K being the default. 4K provides highest quality possible image but requires bit more time to generate.
+ enable_web_search:
+ type: boolean
+ description: Connects to Google Search to retrieve latest and up-to-date information to inform image generation process.
required:
- prompt
- - image_url
diff --git a/tools/apis/ImageGen/tool.py b/tools/apis/ImageGen/tool.py
index 263417f9..98192ae0 100644
--- a/tools/apis/ImageGen/tool.py
+++ b/tools/apis/ImageGen/tool.py
@@ -12,10 +12,10 @@ def __init__(self, discord_message, discord_bot):
self.discord_bot = discord_bot
# Image generator
- async def tool_imagen_image_gen(self, prompt: str, aspect_ratio: str = "1:1", resolution: str = "1K", negative_prompt: str = None):
+ async def tool_gpt_image_gen(self, prompt: str, image_url: list = None, image_size: str = "auto", quality: str = "auto", background: str = "auto", input_fidelity: str = "high"):
# Create image
- _message_curent = await self.discord_message.channel.send(f"⌛ Generating image using Imagen 4 with prompt **{prompt}**")
-
+ _message_curent = await self.discord_message.channel.send(f"⌛ Generating image using GPT Images 1.5 with prompt **{prompt}**")
+
if hasattr(self.discord_bot, "aiohttp_instance"):
logging.info("Using existing aiohttp instance from discord bot subclass for Image Generation tool")
_aiohttp_client_session: aiohttp.ClientSession = self.discord_bot.aiohttp_instance
@@ -29,22 +29,28 @@ async def tool_imagen_image_gen(self, prompt: str, aspect_ratio: str = "1:1", re
"prompt": prompt
}
- logging.info("Using Imagen 4 model for generation")
+ logging.info("Using GPT Images 1.5 model for generation")
_params.update({
- "aspect_ratio": aspect_ratio,
- "resolution": resolution
+ "image_size": image_size,
+ "quality": quality,
+ "background": background
})
- if negative_prompt:
- _params["negative_prompt"] = negative_prompt
+ # Check if image_url is provided
+ if image_url:
+ _model_endpoint = "gpt-image-1.5/edit-image"
+ _params["image_urls"] = image_url
+ _params["input_fidelity"] = input_fidelity
+ else:
+ _model_endpoint = "gpt-image-1.5/text-to-image"
# Generate image
- _discordImageURLs = []
- _imagesInBytes = await run_image(
- model_name="imagen4/preview/ultra",
+ _imagesInBytesPayload = await run_image(
+ model_name=_model_endpoint,
aiohttp_session=_aiohttp_client_session,
**_params
)
+ _imagesInBytes = _imagesInBytesPayload["images_in_bytes"]
# Send the image and add each of the discord message to the list so we can add it as context later
for _index, _images in enumerate(_imagesInBytes):
@@ -62,8 +68,7 @@ async def tool_imagen_image_gen(self, prompt: str, aspect_ratio: str = "1:1", re
# Filename
_fileName = f"image_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}_index_{_index}.{_formatExtension}"
- _sentImg = await self.discord_message.channel.send(file=discord.File(io.BytesIO(_images), filename=_fileName))
- _discordImageURLs.append(_sentImg.attachments[0].url)
+ await self.discord_message.channel.send(file=discord.File(io.BytesIO(_images), filename=_fileName))
# Delete the _imagesInBytes to save memory
@@ -75,13 +80,16 @@ async def tool_imagen_image_gen(self, prompt: str, aspect_ratio: str = "1:1", re
# Cleanup
return {
"guidelines": "The image is already sent to the UI, no need to print the URLs as it will just cause previews to display images twice.",
- "context_results": _discordImageURLs,
+ "context_results": _imagesInBytesPayload["images_urls"],
"status": "Image generated successfully"
}
- async def tool_gpt_image_gen(self, prompt: str, image_url: list = None, image_size: str = "auto", quality: str = "auto", background: str = "auto", input_fidelity: str = "high"):
+ async def tool_nano_banana_ii_gen(self, prompt: str, image_url: list = None, aspect_ratio: str = "16:9", resolution: str = "2K", enable_web_search: bool = False):
# Create image
- _message_curent = await self.discord_message.channel.send(f"⌛ Generating image using GPT-4o with prompt **{prompt}**")
+ if enable_web_search:
+ _message_curent = await self.discord_message.channel.send(f"🔍 Searching the web for information and generating an image using Nano Banana 2 with prompt **{prompt}**")
+ else:
+ _message_curent = await self.discord_message.channel.send(f"🍌 Generating image using Nano Banana 2 with prompt **{prompt}**")
if hasattr(self.discord_bot, "aiohttp_instance"):
logging.info("Using existing aiohttp instance from discord bot subclass for Image Generation tool")
@@ -96,120 +104,48 @@ async def tool_gpt_image_gen(self, prompt: str, image_url: list = None, image_si
"prompt": prompt
}
- logging.info("Using GPT-4o model for generation")
+ logging.info("Using Nano Banana 2 model for generation")
_params.update({
- "image_size": image_size,
- "quality": quality,
- "background": background
+ "aspect_ratio": aspect_ratio,
+ "num_images": 1,
+ "output_format": "png",
+ "resolution": resolution,
+ "enable_web_search": enable_web_search
})
# Check if image_url is provided
if image_url:
- _model_endpoint = "gpt-image-1/edit-image"
+ _model_endpoint = "nano-banana-2/edit"
_params["image_urls"] = image_url
- _params["input_fidelity"] = input_fidelity
else:
- _model_endpoint = "gpt-image-1/text-to-image"
+ _model_endpoint = "nano-banana-2"
- # Generate image
- _discordImageURLs = []
- _imagesInBytes = await run_image(
+ # If 4k was set, we use embeds
+ _useEmbeds = resolution == "4K"
+ _imagesInBytesPayload = await run_image(
model_name=_model_endpoint,
aiohttp_session=_aiohttp_client_session,
+ send_bytes=not _useEmbeds,
**_params
)
-
- # Send the image and add each of the discord message to the list so we can add it as context later
- for _index, _images in enumerate(_imagesInBytes):
- # Check the image type
- _magicType = filetype.guess(_images)
- if _magicType.mime == "image/jpeg":
- _formatExtension = "jpg"
- elif _magicType.mime == "image/png":
- _formatExtension = "png"
- elif _magicType.mime == "image/webp":
- _formatExtension = "webp"
- else:
- _formatExtension = "bin"
-
- # Filename
- _fileName = f"image_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}_index_{_index}.{_formatExtension}"
-
- _sentImg = await self.discord_message.channel.send(file=discord.File(io.BytesIO(_images), filename=_fileName))
- _discordImageURLs.append(_sentImg.attachments[0].url)
-
-
- # Delete the _imagesInBytes to save memory
- del _imagesInBytes
-
- # Delete status
- await _message_curent.delete()
-
- # Cleanup
- return {
- "guidelines": "The image is already sent to the UI, no need to print the URLs as it will just cause previews to display images twice.",
- "context_results": _discordImageURLs,
- "status": "Image generated successfully"
- }
-
- # Image editor
- async def tool_nb_sd_image_editor(self, prompt: str, image_url: list[str], enable_safety_checker: bool = True, model: str = "gemini-25-flash-image"):
- # Create image
- _message_curent = await self.discord_message.channel.send(f"⌛ I will now edit the images with prompt **{prompt}**... this may take few minutes")
-
- if hasattr(self.discord_bot, "aiohttp_instance"):
- logging.info("Using existing aiohttp instance from discord bot subclass for Image Editing tool")
- _aiohttp_client_session: aiohttp.ClientSession = self.discord_bot.aiohttp_instance
- else:
- logging.warning("No aiohttp_instance found in discord bot subclass, aborting")
- raise Exception("HTTP Client has not been initialized properly, please try again later.")
-
- # Construct params
- _additional_params = {"prompt": prompt, "image_urls": image_url}
-
- # Output in 4k for seedream
- if model == "bytedance/seedream/v4/edit":
- logging.info("Using Seedream 4 model for editing, setting width and height to 4K")
- _additional_params.update({
- "enable_safety_checker": enable_safety_checker,
- "image_size": {
- "width": 3840,
- "height": 2160
- }
- })
+ _falImageURLs = _imagesInBytesPayload["images_urls"]
+
+ # Send the image. For 4K, send embed URL directly from FAL.
+ if _useEmbeds:
+ for _image_url in _falImageURLs:
+ _embed = discord.Embed(title="🍌 Generated Nano Banana 2 Image.", color=discord.Colour.yellow())
+ _embed.set_footer(text="Powered by Nano Banana 2 (also known as Gemini 3.1 Flash Image)")
+ _embed.set_image(url=_image_url)
+ await self.discord_message.channel.send(embed=_embed)
else:
- logging.info("Using Gemini 2.5 Flash model for editing")
-
- # Generate image
- _discordImageURLs = []
- _imagesInBytes = await run_image(
- model_name=model,
- aiohttp_session=_aiohttp_client_session,
- **_additional_params
- )
+ _imagesInBytes = _imagesInBytesPayload["images_in_bytes"]
+ for _images in _imagesInBytes:
+ # Filename
+ _fileName = f"image_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}_nb2.png"
+ await self.discord_message.channel.send(file=discord.File(io.BytesIO(_images), filename=_fileName))
- # Send the image and add each of the discord message to the list so we can add it as context later
- for _index, _images in enumerate(_imagesInBytes):
- # Check the image type
- _magicType = filetype.guess(_images)
- if _magicType.mime == "image/jpeg":
- _formatExtension = "jpg"
- elif _magicType.mime == "image/png":
- _formatExtension = "png"
- elif _magicType.mime == "image/webp":
- _formatExtension = "webp"
- else:
- _formatExtension = "bin"
-
- # Filename
- _fileName = f"image_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}_index_{_index}.{_formatExtension}"
-
- _sentImg = await self.discord_message.channel.send(file=discord.File(io.BytesIO(_images), filename=_fileName))
- _discordImageURLs.append(_sentImg.attachments[0].url)
-
-
- # Delete the _imagesInBytes to save memory
- del _imagesInBytes
+ # Delete the _imagesInBytes to save memory
+ del _imagesInBytes
# Delete status
await _message_curent.delete()
@@ -217,6 +153,6 @@ async def tool_nb_sd_image_editor(self, prompt: str, image_url: list[str], enabl
# Cleanup
return {
"guidelines": "The image is already sent to the UI, no need to print the URLs as it will just cause previews to display images twice.",
- "context_results": _discordImageURLs,
+ "context_results": _imagesInBytesPayload["images_urls"],
"status": "Image generated successfully"
}
\ No newline at end of file
diff --git a/tools/apis/InternetSearch/manifest.yaml b/tools/apis/InternetSearch/manifest.yaml
index 7a219195..c62216f9 100644
--- a/tools/apis/InternetSearch/manifest.yaml
+++ b/tools/apis/InternetSearch/manifest.yaml
@@ -1,5 +1,5 @@
tool_name: Web Search
-tool_list:
+tool_list:
# Search - Powered by Tavily https://tavily.com/
- name: web_search
description: Search the web to fetch up-to-date information. Before searching the web, call use fetch_date_time tool to get current date and time to make search more relevant.
@@ -18,6 +18,9 @@ tool_list:
max_results:
type: integer
description: The maximum number of search results to return. Default is 5, and maximum is 20.
+ show_images:
+ type: boolean
+ description: Whether to include image URLs in the search results alongside text results with the search query. By default, this is disabled.
include_domains:
type: array
items:
@@ -34,19 +37,21 @@ tool_list:
required:
- query
- search_depth
-
- # Browse - Powered by Jina AI https://jina.ai/
+
+ # Browse - Powered by Tavily https://tavily.com/
- name: url_browse
description: Reads the content of the webpage at the specified URL.
parameters:
type: object
properties:
- url:
- type: string
- description: The URL of the webpage to read
+ urls:
+ type: array
+ items:
+ type: string
+ description: The URLs of the webpage to read
required:
- - url
-
+ - urls
+
# YouTube Search
- name: youtube_video_search
description: Search for YouTube videos.
@@ -76,4 +81,4 @@ tool_list:
description: A prompt for VQA model to answer questions about the video content
required:
- video_id
- - question
\ No newline at end of file
+ - question
diff --git a/tools/apis/InternetSearch/tool.py b/tools/apis/InternetSearch/tool.py
index 47db9ffb..6161fbe3 100644
--- a/tools/apis/InternetSearch/tool.py
+++ b/tools/apis/InternetSearch/tool.py
@@ -1,7 +1,8 @@
+from models.tasks.text.openai import completion as VQAModelCompletion
from os import environ
import aiohttp
import discord
-import io
+import inspect
import logging
# Function implementations
@@ -10,10 +11,10 @@ def __init__(self, discord_message, discord_bot):
self.discord_message = discord_message
self.discord_bot = discord_bot
- async def tool_web_search(self, query: str, search_depth: str = "basic", max_results: int = 5, include_domains: list = None, exclude_domains: list = None, show_sources_list: bool = False):
+ async def tool_web_search(self, query: str, search_depth: str = "basic", max_results: int = 5, show_images = False, include_domains: list = None, exclude_domains: list = None, show_sources_list: bool = False):
if not query or not query.strip():
raise ValueError("query parameter is required and cannot be empty")
-
+
if hasattr(self.discord_bot, "aiohttp_instance"):
logging.info("Using existing aiohttp client session for post requests")
_session: aiohttp.ClientSession = self.discord_bot.aiohttp_instance
@@ -25,7 +26,7 @@ async def tool_web_search(self, query: str, search_depth: str = "basic", max_res
# Bing Subscription Key
if not environ.get("TAVILY_SEARCH_API_KEY"):
raise ValueError("TAVILY_SEARCH_API_KEY key not set, sign up at https://tavily.com/ and get an API key from the dashboard")
-
+
# Construct params with proper validation
if max_results < 0:
max_results = 5
@@ -36,6 +37,8 @@ async def tool_web_search(self, query: str, search_depth: str = "basic", max_res
"query": query.strip(),
"search_depth": search_depth,
"max_results": max_results,
+ "include_images": show_images,
+ "include_image_descriptions": show_images
}
# Add include_domains if provided
@@ -53,7 +56,7 @@ async def tool_web_search(self, query: str, search_depth: str = "basic", max_res
# Endpoint
_endpoint = "https://api.tavily.com/search"
-
+
# Make a request
async with _session.post(_endpoint, json=_params, headers=_headers) as _response:
# Raise an exception
@@ -62,7 +65,7 @@ async def tool_web_search(self, query: str, search_depth: str = "basic", max_res
# Hide sensitive data by abstracting it
except aiohttp.ClientConnectionError:
raise Exception(f"Failed to fetch web search results with code {_response.status}, reason: {_response.reason}")
-
+
_searchResults = await _response.json()
# Check if the results is empty
@@ -76,7 +79,12 @@ async def tool_web_search(self, query: str, search_depth: str = "basic", max_res
"scores": "Utilize the score field to rank the relevance of the search results. A higher score indicates a more relevant result to the query. Use this score to prioritize which sources to reference in your response.",
"results": _searchResults["results"]
}
-
+
+ if show_images:
+ _output["url_browse_rules"] = "Do not call url_browse tool if the tasks involves image search.",
+ _output["image_guidelines"] = "When showing images, format as [description](url) and without the exclamation mark as Discord does not do inline images natively within text. IDEALLY and PRIMARILY you can alternatively use discord_embed_tool to show inline images using ONLY title and image_url only for cleaner presentation, unless otherwise asked."
+ _output["images"] = _searchResults.get("images", None)
+
# Embed that contains first 10 sources
if show_sources_list:
_sembed = discord.Embed(
@@ -96,33 +104,56 @@ async def tool_web_search(self, query: str, search_depth: str = "basic", max_res
else:
_sembed = None
await self.discord_message.channel.send(f"🔍 Searched for **{query}**", embed=_sembed)
-
+
return _output
- async def tool_url_browse(self, url: str):
- # Powered by Jina AI
-
+ async def tool_url_browse(self, urls: list[str]):
+ # Powered by Tavily
if hasattr(self.discord_bot, "aiohttp_instance"):
logging.info("Using existing aiohttp client session for GET requests using Jina AI")
- _session = self.discord_bot.aiohttp_instance
+ _session: aiohttp.ClientSession = self.discord_bot.aiohttp_instance
else:
# Throw exception since we don't have a session
logging.warning("No aiohttp_instance found in discord bot subclass, aborting")
raise Exception("HTTP Client has not been initialized properly, please try again later.")
- _endpoint = f"https://r.jina.ai/{url}"
+ # Upto 10 URLs only
+ if len(urls) > 10:
+ raise ValueError("Only up to 10 URLs are allowed")
+
+ # Check if TAVILY_SEARCH_API_KEY is set
+ if not environ.get("TAVILY_SEARCH_API_KEY"):
+ raise ValueError("TAVILY_SEARCH_API_KEY key not set, sign up at https://tavily.com/ and get an API key from the dashboard")
+
+ _endpoint = "https://api.tavily.com/extract"
+
+ # Params
+ _params = {
+ "urls": urls,
+ "include_images": False
+ }
+
+ # Headers
+ _headers = {
+ "Authorization": f"Bearer {environ.get('TAVILY_SEARCH_API_KEY')}",
+ "Content-Type": "application/json",
+ }
- await self.discord_message.channel.send(f"🖱️ Browsing: **`{url}`**")
+ for _url in urls:
+ await self.discord_message.channel.send(f"🖱️ Reading: **`{_url}`**")
- async with _session.get(_endpoint) as _response:
+ async with _session.post(_endpoint, json=_params, headers=_headers) as _response:
if _response.status != 200:
- raise Exception(f"Failed to fetch URL content with code {_response.status}, reason: {_response.reason}")
- _data = await _response.text()
+ raise Exception(f"Failed to fetch provided URLs content with code {_response.status}, reason: {_response.reason}")
+ _data = await _response.json()
+
+ # Check if results are not empty
+ if not _data.get("results", []):
+ raise Exception("No results found for the provided URLs")
# Return the data
return {
- "url": url,
- "content": _data
+ "results": _data.get("results")
}
@@ -163,11 +194,11 @@ async def tool_youtube_video_search(self, query: str, n_results: int = 10):
# If the Content-Type is not application/json
if "application/json" not in _response.headers["Content-Type"]:
raise Exception("The response from the YouTube API is not in JSON format")
-
+
# If the response is not successful
if _response.status != 200:
raise Exception(f"Failed to fetch YouTube search results with code {_response.status}, reason: {_response.reason}")
-
+
# Iterate over items list
_videos = [
{
@@ -202,7 +233,7 @@ async def tool_youtube_video_search(self, query: str, n_results: int = 10):
# YouTube video watcher
async def tool_youtube_video_watcher(self, video_id: str, question: str):
# System instruction
- _sysprompt = inspect.cleandoc("""Your name is Video QA tool.
+ _sysprompt = inspect.cleandoc("""Your name is Video QA tool.
Your goal is to summarize and gain insights from the given video based on the user's question.
## Guidelines:
- Provide timestamps to ensure accuracy and trustworthiness of the information in each summary.
@@ -233,11 +264,12 @@ async def tool_youtube_video_watcher(self, video_id: str, question: str):
# Requires OpenRouter client session to be specified from startup.py by instantating OpenAI AsyncClient with BaseURL to OpenRouter
_response = await VQAModelCompletion(
prompt=_prompt,
- model_name="google/gemini-2.5-flash-lite",
+ model_name="google/gemini-3.1-flash-lite-preview",
+ system_instruction=_sysprompt,
return_text=True,
client_session=self.discord_bot.openai_client_openrouter
)
return {
"answer": _response
- }
\ No newline at end of file
+ }
diff --git a/tools/builtin/manifest.yaml b/tools/builtin/manifest.yaml
index 9d755ebe..127904da 100644
--- a/tools/builtin/manifest.yaml
+++ b/tools/builtin/manifest.yaml
@@ -37,6 +37,71 @@ builtin_tool_list:
- question
- multi_select
- choices
+ - name: discord_embed_tool
+ description: Creates a Discord embed message in-line for visual appeal and better formatting and presentation.
+ parameters:
+ type: object
+ properties:
+ title:
+ type: string
+ description: Title of the embed
+ description:
+ type: string
+ color:
+ type: string
+ description: Hex color code for the embed
+ fields:
+ type: array
+ items:
+ type: object
+ properties:
+ name:
+ type: string
+ description: The name of the field.
+ value:
+ type: string
+ description: The value of the field.
+ inline:
+ type: boolean
+ description: Whether the field should be displayed inline.
+ required:
+ - name
+ - value
+ - inline
+ description: A list of fields to include in the embed (up to 25 fields).
+ footer:
+ type: object
+ properties:
+ text:
+ type: string
+ description: The text to display in the footer of the embed.
+ icon_url:
+ type: string
+ description: The URL of the icon to display in the footer of the embed.
+ required:
+ - text
+ author:
+ type: object
+ properties:
+ name:
+ type: string
+ description: The name of the author to display in the embed.
+ url:
+ type: string
+ description: The URL of the author to display in the embed.
+ icon_url:
+ type: string
+ description: The URL of the icon to display for the author in the embed.
+ required:
+ - name
+ thumbnail_url:
+ type: string
+ description: The URL of the thumbnail image to display in the embed.
+ image_url:
+ type: string
+ description: The URL of the image to display in the embed.
+ required:
+ - title
- name: fetch_date_time
description: Fetches the current date and time in a specified timezone.
parameters:
diff --git a/tools/builtin/tools/create_polls.py b/tools/builtin/tools/create_polls.py
index b0bd9600..32bfbc2d 100644
--- a/tools/builtin/tools/create_polls.py
+++ b/tools/builtin/tools/create_polls.py
@@ -1,5 +1,4 @@
from tools.builtin._base import BuiltInToolDiscordStateBase
-from typing import Literal
import discord
# Built-in tools regardless of tool selection unless Disabled
diff --git a/tools/builtin/tools/discord_embed_tool.py b/tools/builtin/tools/discord_embed_tool.py
new file mode 100644
index 00000000..c072d4eb
--- /dev/null
+++ b/tools/builtin/tools/discord_embed_tool.py
@@ -0,0 +1,83 @@
+from tools.builtin._base import BuiltInToolDiscordStateBase
+import discord
+
+# Built-in tools regardless of tool selection unless Disabled
+class BuiltInTool(BuiltInToolDiscordStateBase):
+ @staticmethod
+ def _parse_embed_color(color: str | None) -> discord.Color | None:
+ if not color:
+ return None
+
+ _raw = color.strip().lower()
+ if _raw.startswith("#"):
+ _raw = _raw[1:]
+ elif _raw.startswith("0x"):
+ _raw = _raw[2:]
+
+ try:
+ _value = int(_raw, 16)
+ except Exception:
+ return None
+
+ if _value < 0 or _value > 0xFFFFFF:
+ return None
+
+ return discord.Color(_value)
+
+ async def tool_discord_embed_tool(
+ self,
+ title: str,
+ description: str = None,
+ color: str = None,
+ fields: list[dict] = None,
+ footer: dict = None,
+ author: dict = None,
+ thumbnail_url: str = None,
+ image_url: str = None,
+ ):
+ _embed_color = self._parse_embed_color(color)
+ _embed = discord.Embed(
+ title=title[:256],
+ description=description[:4096] if description else None,
+ color=_embed_color,
+ )
+
+ if footer and footer.get("text"):
+ _embed.set_footer(
+ text=footer["text"][:2048],
+ icon_url=footer.get("icon_url"),
+ )
+
+ if author and author.get("name"):
+ _embed.set_author(
+ name=author["name"][:256],
+ url=author.get("url"),
+ icon_url=author.get("icon_url"),
+ )
+
+ if thumbnail_url:
+ _embed.set_thumbnail(url=thumbnail_url)
+
+ if image_url:
+ _embed.set_image(url=image_url)
+
+ if fields:
+ _field_count = 0
+ for _field in fields:
+ if _field_count >= 25:
+ break
+
+ _name = _field.get("name")
+ _value = _field.get("value")
+ if not _name or not _value:
+ continue
+
+ _embed.add_field(
+ name=str(_name)[:256],
+ value=str(_value)[:1024],
+ inline=bool(_field.get("inline", False)),
+ )
+ _field_count += 1
+
+ await self.discord_message.channel.send(embed=_embed)
+ return "Embed sent successfully"
diff --git a/tools/builtin/tools/get_user_info.py b/tools/builtin/tools/get_user_info.py
index 7bf25911..806cd945 100644
--- a/tools/builtin/tools/get_user_info.py
+++ b/tools/builtin/tools/get_user_info.py
@@ -8,6 +8,7 @@ async def tool_get_user_info(self):
return {
"username": _user.name,
"display_name": _user.display_name,
- "snowflake": _user.id,
- "created_at": _user.created_at.isoformat()
+ # we stringify the snowflake to avoid issues passing the tool result which may cause integer overflow
+ "snowflake": str(_user.id),
+ "created_at": _user.created_at.isoformat(),
}