diff --git a/.env.example b/.env.example index c4e2000..05f1028 100644 --- a/.env.example +++ b/.env.example @@ -42,3 +42,7 @@ TELEGRAM_API_ID=your_telegram_api_id_here TELEGRAM_API_HASH=your_telegram_api_hash_here TELEGRAM_USE_LOCAL_API=true TELEGRAM_LOCAL_API_URL=http://localhost:8081 + +# Image Processing Configuration +MAX_IMAGE_RESOLUTION_VISION=1024 +MAX_IMAGE_RESOLUTION_EDIT=4096 diff --git a/.gitignore b/.gitignore index c3bc098..431cd95 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,8 @@ __pycache__/ .DS_Store data/ .vscode/ + +# Temporary media directories +temp_photos/ +temp_docs/ +temp_audio/ diff --git a/app/agents/image_tools.py b/app/agents/image_tools.py index e1b9eca..b8de7c8 100644 --- a/app/agents/image_tools.py +++ b/app/agents/image_tools.py @@ -10,12 +10,15 @@ from google import genai from google.genai import types import PIL.Image +import PIL.ImageOps +from image_utils import JPEG_FORMATS load_dotenv() openai_api_key = os.getenv("OPENAI_API_KEY") openai_client = OpenAI(api_key=openai_api_key) google_api_key = os.getenv("GOOGLE_API_KEY") genai_client = genai.Client(api_key=google_api_key) +max_image_resolution_edit = int(os.getenv("MAX_IMAGE_RESOLUTION_EDIT", "4096") or "4096") class TextPart: def __init__(self, text): @@ -273,6 +276,43 @@ def _resolve_image_path(self, image_path: str) -> Path: return relative_to_images return p # Return original so the caller can report the correct path + def _prepare_image_for_edit(self, image_path: Path, temp_paths: List[Path]) -> Path: + """Create a temporary JPEG for resized or non-JPEG edit inputs.""" + try: + with PIL.Image.open(image_path) as img: + image_format = (img.format or "").upper() + needs_resizing = max_image_resolution_edit > 0 and max(img.size) > max_image_resolution_edit + needs_conversion = image_format not in JPEG_FORMATS + + if needs_resizing or needs_conversion: + img = PIL.ImageOps.exif_transpose(img) + + if needs_resizing: + img.thumbnail( + (max_image_resolution_edit, max_image_resolution_edit), + getattr(PIL.Image, "Resampling", PIL.Image).LANCZOS + ) + + if img.mode != "RGB": + img = img.convert("RGB") + + temp_path = self.images_path / f"edit_input_{uuid.uuid4()}.jpg" + temp_paths.append(temp_path) + img.save(temp_path, format="JPEG", quality=90, optimize=True) + return temp_path + except Exception as e: + print(f"Failed to prepare image for edit, using original file: {e}") + + return image_path + + def _cleanup_temp_images(self, temp_paths: List[Path]) -> None: + for temp_path in temp_paths: + try: + if temp_path.exists(): + temp_path.unlink() + except Exception as e: + print(f"Failed to remove temporary image {temp_path}: {e}") + def _resolve_gpt_size(self, resolution: str, aspect_ratio: str) -> str: """Translate resolution + aspect_ratio into a GPT Image 2 pixel size string. @@ -418,6 +458,8 @@ async def _generate_multimodal_image_and_text(self, prompt: str, style: str = "3 async def _image_editing(self, prompt: str, image_path: str, model: str = "Normal", aspect_ratio: str = "16:9", resolution: str = "2K", gpt_quality: str = "auto", variants: int = 1, caption: str = "Here is your edited image") -> str: """Edit an existing image using Gemini or GPT Image 2""" print(f"Editing image - Prompt: {prompt}, Image: {image_path}, Model: {model}, Aspect Ratio: {aspect_ratio}, Resolution: {resolution}, GPT Quality: {gpt_quality}, Variants: {variants}") + temp_paths: List[Path] = [] + source_image = None try: image_path_obj = self._resolve_image_path(image_path) if not image_path_obj.exists(): @@ -433,8 +475,9 @@ async def _image_editing(self, prompt: str, image_path: str, model: str = "Norma ) all_results.append(r) return "\n".join(all_results) - - source_image = PIL.Image.open(image_path_obj) + + source_image_path = self._prepare_image_for_edit(image_path_obj, temp_paths) + source_image = PIL.Image.open(source_image_path) model_name = "gemini-3-pro-image-preview" if model.lower() == "pro" else "gemini-3.1-flash-image-preview" @@ -465,7 +508,7 @@ async def _image_editing(self, prompt: str, image_path: str, model: str = "Norma ) contents = response.candidates[0].content.parts - + for content in contents: if 'text' in content.model_fields_set: try: @@ -490,24 +533,32 @@ async def _image_editing(self, prompt: str, image_path: str, model: str = "Norma document=file, caption=variant_caption ) - + all_saved_paths.append(str(transformed_image_path)) - + if all_saved_paths: paths_str = "\n".join(f" - {p}" for p in all_saved_paths) result_message += f"Edited {len(all_saved_paths)} image(s) and sent to user.\nSaved to:\n{paths_str}\nImage transformation completed successfully.\n" else: result_message += "Warning: No transformed image was generated. The model only provided text response.\n" - + return result_message except Exception as e: return f"Error editing image: {str(e)}" + finally: + if source_image: + source_image.close() + self._cleanup_temp_images(temp_paths) async def _gpt_image_edit(self, prompt: str, image_paths: List[Path], size: str = "2048x1152", quality: str = "auto", caption: str = "Here is your edited image") -> str: """Edit one or more images using OpenAI GPT Image 2""" print(f"Editing image(s) with GPT Image 2 - Prompt: {prompt}, Images: {image_paths}, Size: {size}, Quality: {quality}") + temp_paths: List[Path] = [] try: - image_files = [open(str(p), "rb") for p in image_paths] + image_files = [ + open(str(self._prepare_image_for_edit(p, temp_paths)), "rb") + for p in image_paths + ] kwargs = { "model": "gpt-image-2-2026-04-21", @@ -526,6 +577,7 @@ async def _gpt_image_edit(self, prompt: str, image_paths: List[Path], size: str finally: for f in image_files: f.close() + self._cleanup_temp_images(temp_paths) image_base64 = result.data[0].b64_json image_bytes = base64.b64decode(image_base64) @@ -658,6 +710,8 @@ async def _gpt_image_generate(self, prompt: str, size: str = "2048x1152", qualit async def _image_composition(self, prompt: str, image_paths: List[str], model: str = "Normal", aspect_ratio: str = "16:9", resolution: str = "2K", gpt_quality: str = "auto", variants: int = 1, caption: str = "Here is your composed image") -> str: """Compose a new image from multiple input images using Gemini or GPT Image 2""" print(f"Composing image - Prompt: {prompt}, Images: {image_paths}, Model: {model}, Aspect Ratio: {aspect_ratio}, Resolution: {resolution}, GPT Quality: {gpt_quality}, Variants: {variants}") + temp_paths: List[Path] = [] + images: List[PIL.Image.Image] = [] try: resolved_paths = [] for image_path in image_paths: @@ -683,7 +737,11 @@ async def _image_composition(self, prompt: str, image_paths: List[str], model: s if len(resolved_paths) > 3: return "Error: Maximum 3 images are supported for composition with Normal/Pro mode. Use GPT mode for more images." - images = [PIL.Image.open(p) for p in resolved_paths] + prepared_paths = [ + self._prepare_image_for_edit(p, temp_paths) + for p in resolved_paths + ] + images = [PIL.Image.open(p) for p in prepared_paths] input_contents = [] for image in images: @@ -716,7 +774,7 @@ async def _image_composition(self, prompt: str, image_paths: List[str], model: s ) response_parts = response.candidates[0].content.parts - + for part in response_parts: if 'text' in part.model_fields_set and part.text: try: @@ -753,8 +811,11 @@ async def _image_composition(self, prompt: str, image_paths: List[str], model: s result_message += f"Composed {len(all_saved_paths)} image(s) and sent to user.\nSaved to:\n{paths_str}\nImage composition completed successfully.\n" else: result_message += "Warning: No composed image was generated. The model only provided text response.\n" - + return result_message except Exception as e: return f"Error composing image: {str(e)}" - + finally: + for image in images: + image.close() + self._cleanup_temp_images(temp_paths) diff --git a/app/image_utils.py b/app/image_utils.py new file mode 100644 index 0000000..a29ab49 --- /dev/null +++ b/app/image_utils.py @@ -0,0 +1,37 @@ +from pathlib import Path + +from PIL import Image, ImageOps +from pillow_heif import register_heif_opener + + +register_heif_opener() + +IMAGE_EXTENSIONS = (".jpg", ".jpeg", ".heic", ".heif") +JPEG_FORMATS = {"JPEG", "JPG"} + + +def is_jpeg_image(image_path: str | Path) -> bool: + with Image.open(image_path) as img: + return (img.format or "").upper() in JPEG_FORMATS + + +def save_image_as_jpeg( + source_path: str | Path, + target_path: str | Path, + *, + max_resolution: int = 0, + quality: int = 90, +) -> None: + with Image.open(source_path) as img: + img = ImageOps.exif_transpose(img) + + if max_resolution > 0 and max(img.size) > max_resolution: + img.thumbnail( + (max_resolution, max_resolution), + getattr(Image, "Resampling", Image).LANCZOS, + ) + + if img.mode != "RGB": + img = img.convert("RGB") + + img.save(target_path, format="JPEG", quality=quality, optimize=True) diff --git a/app/main_bot.py b/app/main_bot.py index 23b4baa..fe1f2a2 100644 --- a/app/main_bot.py +++ b/app/main_bot.py @@ -22,6 +22,8 @@ from secure_container.main import initialize_secure_containers, cleanup_containers from stats import stats_tracker, DEFAULT_ACTION_LIMIT import time +from PIL import Image +from image_utils import IMAGE_EXTENSIONS, is_jpeg_image, save_image_as_jpeg # Streaming config - read directly from env, not imported streaming_enabled = os.getenv("STREAMING_ENABLED", "false").lower() == "true" @@ -85,6 +87,7 @@ def get_prober_name(): anthropic_api_key = os.getenv("ANTHROPIC_API_KEY") anthropic_client = anthropic.AsyncAnthropic(api_key=anthropic_api_key) send_reasoning = True +max_image_resolution_vision = int(os.getenv("MAX_IMAGE_RESOLUTION_VISION", "1024") or "1024") user_invites = {} authorized_users = set(telegram_chat_id) @@ -115,6 +118,7 @@ def get_user_lock(user_id: str) -> asyncio.Lock: media_group_captions = {} media_group_waiting_message = {} media_group_tasks = {} +media_group_processing = {} MEDIA_GROUP_TIMEOUT = 10.0 @@ -249,8 +253,39 @@ def validate_semantic_max_results(self, max_results: int) -> int: return max(1, min(20, max_results)) def encode_image(image_path): - with open(image_path, "rb") as image_file: - return base64.b64encode(image_file.read()).decode('utf-8') + image_bytes = b"" + try: + with Image.open(image_path) as img: + needs_resizing = max(img.size) > max_image_resolution_vision + is_jpeg = (img.format or "").upper() in ("JPEG", "JPG") + + if needs_resizing or not is_jpeg: + if needs_resizing: + resample_filter = getattr(Image, 'Resampling', Image).LANCZOS + img.thumbnail((max_image_resolution_vision, max_image_resolution_vision), resample_filter) + + buffer = io.BytesIO() + if img.mode != "RGB": + img = img.convert("RGB") + img.save(buffer, format="JPEG", quality=85) + image_bytes = buffer.getvalue() + except Exception as e: + logger.warning(f"Error compressing image: {e}. Falling back to original size.") + + if not image_bytes: + with open(image_path, "rb") as image_file: + image_bytes = image_file.read() + + + return base64.b64encode(image_bytes).decode('utf-8') + + +def prepare_downloaded_image_for_vision(source_path: str, target_path: str) -> None: + if is_jpeg_image(source_path): + shutil.copy(source_path, target_path) + return + + save_image_as_jpeg(source_path, target_path, quality=90) def split_text_intelligently(text: str, max_length: int = 4000) -> list[str]: @@ -1462,133 +1497,69 @@ async def voice_button(update: Update, context: ContextTypes.DEFAULT_TYPE): parse_mode="markdown" ) +async def queue_image_media_group(update: Update, context: ContextTypes.DEFAULT_TYPE, user_id: str, image_ref: Any, caption: str | None): + """Collect Telegram album image refs and process them after updates stop arriving.""" + is_same_media_group = user_id in media_group_id and media_group_id[user_id] == update.message.media_group_id + is_processing = media_group_processing.get(user_id) and is_same_media_group + + if user_id in media_group_tasks and media_group_tasks[user_id] and not is_processing: + media_group_tasks[user_id].cancel() + + if user_id not in media_group_id or media_group_id[user_id] != update.message.media_group_id: + media_group_id[user_id] = update.message.media_group_id + media_group_photos[user_id] = [] + media_group_captions[user_id] = None + media_group_waiting_message[user_id] = await update.message.reply_text( + "🖼️ *Image media group received... Waiting for images...*", + parse_mode="markdown" + ) + + if caption and media_group_captions[user_id] is None: + media_group_captions[user_id] = caption + + if image_ref not in media_group_photos[user_id]: + media_group_photos[user_id].append(image_ref) + waiting_message = media_group_waiting_message.get(user_id) + if waiting_message and not is_processing: + await waiting_message.edit_text( + f"🖼️ *Image {len(media_group_photos[user_id])} received... Waiting for other images...*", + parse_mode="markdown" + ) + + if is_processing: + return + + async def process_media_group_with_timeout(): + try: + await asyncio.sleep(MEDIA_GROUP_TIMEOUT) + await process_media_group(update, context, user_id) + except asyncio.CancelledError: + pass + except Exception as e: + logging.error(f"Error in process_media_group_with_timeout: {str(e)}") + + task = asyncio.create_task(process_media_group_with_timeout()) + media_group_tasks[user_id] = task + async def process_media_group(update: Update, context: ContextTypes.DEFAULT_TYPE, user_id: str): - """Process all photos in a media group""" + """Process all images in a media group""" + media_group_processing[user_id] = True try: - if not user_id in media_group_photos: + if user_id not in media_group_photos or not media_group_photos[user_id]: print(f"User {user_id} is waiting for media group, but it is not in media_group_photos") return - if user_id in media_group_waiting_message and media_group_waiting_message[user_id]: - await media_group_waiting_message[user_id].edit_text("🖼️ *Processing media group...*", parse_mode="markdown") - - photos = media_group_photos[user_id] + status_message = media_group_waiting_message.get(user_id) + + image_refs = media_group_photos[user_id] # Track media group processed - stats_tracker.track_media_group_processed(user_id, len(photos)) - - print(f"User {user_id} has {len(photos)} photos in media group") + stats_tracker.track_media_group_processed(user_id, len(image_refs)) - if media_group_captions[user_id] == None: - caption = "Describe what is in this image in user language." - describe_question = caption - else: - caption = media_group_captions[user_id] - describe_question = f"Describe what is in this image and answer to this question: {caption}" + print(f"User {user_id} has {len(image_refs)} images in media group") - - # Send initial status message - status_message = await update.message.reply_text("🖼️ *Analyzing images...*", parse_mode="markdown") - - temp_photos = [] # Keep track of temporary files for cleanup - all_descriptions = [] # Store descriptions for all photos - image_paths = [] # Store paths to downloaded images - - try: - # Process each photo - for i, photo_group in enumerate(photos, 1): - try: - photo = photo_group[0] # Get the photo from the group - # Download the photo - photo_file = await context.bot.get_file(photo.file_id) - temp_dir = f"./data/{user_id}/temp_photos" - os.makedirs(temp_dir, exist_ok=True) - temp_photo = os.path.join(temp_dir, f"photo_{uuid.uuid4()}.jpg") - temp_photos.append(temp_photo) - await photo_file.download_to_drive(temp_photo) - - # Save the permanent copy to user's directory - user_images_dir = os.path.join("data", user_id, "images") - os.makedirs(user_images_dir, exist_ok=True) - permanent_image_path = os.path.join(user_images_dir, f"image_{uuid.uuid4()}.jpg") - # Copy the image to the permanent location - shutil.copy(temp_photo, permanent_image_path) - image_paths.append(permanent_image_path) - - # Get descriptions from both services - await status_message.edit_text(f"🤖 *Getting Anthropic description for image {i}...*", parse_mode="markdown") - anthropic_description = await describe_image_anthropic(question=describe_question, image_path=temp_photo) - stats_tracker.track_describe_used(user_id, "image_anthropic") - - await status_message.edit_text(f"🤖 *Getting OpenAI description for image {i}...*", parse_mode="markdown") - openai_description = await describe_image_openai(question=describe_question, image_path=temp_photo) - stats_tracker.track_describe_used(user_id, "image_openai") - - all_descriptions.append({ - 'anthropic': anthropic_description, - 'openai': openai_description, - 'path': permanent_image_path - }) - - except Exception as e: - logging.error(f"Error processing photo {i}: {str(e)}") - all_descriptions.append({ - 'anthropic': f"Error processing image {i}", - 'openai': f"Error processing image {i}", - 'path': "error_path" - }) - - # Craft the user question combining caption and all descriptions - descriptions_text = "\n\n".join([ - f"Image {i+1} (path: {desc['path']}):\n" - f"Anthropic description: {desc['anthropic']}\n" - f"OpenAI description: {desc['openai']}" - for i, desc in enumerate(all_descriptions) - ]) - - user_question = f"{caption}\n\nUser attached {len(all_descriptions)} image(s) to this message. Here are the details about each image from Anthropic and OpenAI:\n\n{descriptions_text}" - - await status_message.edit_text("🤖 *Processing...*", parse_mode="markdown") - # Process like a regular message - await context.bot.send_chat_action(chat_id=update.message.chat_id, action='typing', message_thread_id=get_thread_id(update)) - thinking_message = await update.message.reply_text("💭 *Thinking...*", parse_mode="markdown") - _, _, mg_limit = check_user_limits(user_id) - - async def update_thinking_message(step: str, details: str, iteration: int, critique: int): - if step == "saving": - iteration = "final" - critique = "end" - live_limit_info = "" - if mg_limit: - live_used = stats_tracker.get_user_action_count(user_id, days=30) - live_limit_info = f"📊 *Usage:* _{live_used}/{mg_limit} actions (30d)_\n" - await thinking_message.edit_text( - f"💭 *Thinking...*\n" - f"- - - - \n" - f"{live_limit_info}" - f"📝 *Step:* _{step.replace('_', '-')}_\n" - f"📋 *Details:* _{details.replace('_', '-')}_\n" - f"🔄 *Iterations:* _{iteration}_\n" - f"🎯 *Critiques:* _{critique}_", - parse_mode="markdown" - ) - - # Get response using the same logic as handle_message - _thread_id = get_thread_id(update) - on_text_chunk = create_streaming_callback(context.bot, user_id, _thread_id) - response, messages = await get_answer(user_question, user_id, update_thinking_message, update, context, on_text_chunk=on_text_chunk, message_thread_id=_thread_id) - await send_response_to_user(update, thinking_message, response, user_id) - await send_reasoning_file(update, messages, user_id) - await status_message.edit_text("🤖 *Done!*", parse_mode="markdown") - - finally: - # Clean up all temporary files - for temp_photo in temp_photos: - try: - if os.path.exists(temp_photo): - os.remove(temp_photo) - except Exception as e: - print(f"Error cleaning up temporary photo file {temp_photo}: {str(e)}") + _, _, mg_limit = check_user_limits(user_id) + await process_image_message(update, context, user_id, image_refs, media_group_captions[user_id], mg_limit, status_message) finally: # Clean up media group data media_group_id[user_id] = None @@ -1599,6 +1570,8 @@ async def update_thinking_message(step: str, details: str, iteration: int, criti media_group_waiting_message[user_id] = None if user_id in media_group_tasks: del media_group_tasks[user_id] + if user_id in media_group_processing: + del media_group_processing[user_id] async def handle_photo_message(update: Update, context: ContextTypes.DEFAULT_TYPE): """Handle incoming photo messages with support for multiple photos""" @@ -1619,55 +1592,26 @@ async def handle_photo_message(update: Update, context: ContextTypes.DEFAULT_TYP stats_tracker.track_message_received(user_id, "photo") if update.message.media_group_id: - # Media group collection stays outside lock - multiple photos arrive rapidly - if user_id in media_group_tasks and media_group_tasks[user_id]: - media_group_tasks[user_id].cancel() - - if user_id not in media_group_id or media_group_id[user_id] != update.message.media_group_id: - media_group_id[user_id] = update.message.media_group_id - media_group_photos[user_id] = [] - media_group_captions[user_id] = None - media_group_waiting_message[user_id] = await update.message.reply_text( - "🖼️ *Image media group received... Waiting for images...*", - parse_mode="markdown" - ) - - if update.message.caption and media_group_captions[user_id] == None: - media_group_captions[user_id] = update.message.caption - - photos = [update.message.photo[-1]] - if photos not in media_group_photos[user_id]: - media_group_photos[user_id].append(photos) - await media_group_waiting_message[user_id].edit_text( - f"🖼️ *Image {len(media_group_photos[user_id])} received... Waiting for other images...*", - parse_mode="markdown" - ) - - async def process_media_group_with_timeout(): - try: - await asyncio.sleep(MEDIA_GROUP_TIMEOUT) - async with get_user_lock(user_id): - await process_media_group(update, context, user_id) - except asyncio.CancelledError: - pass - except Exception as e: - logging.error(f"Error in process_media_group_with_timeout: {str(e)}") - - task = asyncio.create_task(process_media_group_with_timeout()) - media_group_tasks[user_id] = task + await queue_image_media_group(update, context, user_id, update.message.photo[-1], update.message.caption) return # Handle single photo message + await process_image_message(update, context, user_id, [update.message.photo[-1]], update.message.caption, limit) + +async def process_image_message(update: Update, context: ContextTypes.DEFAULT_TYPE, user_id: str, image_refs: list, caption: str | None, limit: int | None = None, status_message: Any = None): + """Process one or more Telegram image file refs through the photo analysis pipeline.""" async with get_user_lock(user_id): - photos = [update.message.photo[-1]] - if update.message.caption == None: + photos = image_refs + if caption is None: caption = "Describe what is in this image in user language." describe_question = caption else: - caption = update.message.caption describe_question = f"Describe what is in this image and answer to this question: {caption}" - status_message = await update.message.reply_text("🖼️ *Analyzing images...*", parse_mode="markdown") + if status_message: + await status_message.edit_text("🖼️ *Analyzing images...*", parse_mode="markdown") + else: + status_message = await update.message.reply_text("🖼️ *Analyzing images...*", parse_mode="markdown") temp_photos = [] all_descriptions = [] @@ -1679,9 +1623,17 @@ async def process_media_group_with_timeout(): photo_file = await context.bot.get_file(photo.file_id) temp_dir = "temp_photos" os.makedirs(temp_dir, exist_ok=True) + original_extension = os.path.splitext(getattr(photo, "file_name", "") or "")[1].lower() + if original_extension not in IMAGE_EXTENSIONS: + original_extension = ".jpg" + + temp_download = os.path.join(temp_dir, f"photo_{uuid.uuid4()}{original_extension}") + temp_photos.append(temp_download) + await photo_file.download_to_drive(temp_download) + temp_photo = os.path.join(temp_dir, f"photo_{uuid.uuid4()}.jpg") temp_photos.append(temp_photo) - await photo_file.download_to_drive(temp_photo) + prepare_downloaded_image_for_vision(temp_download, temp_photo) user_images_dir = os.path.join("data", user_id, "images") os.makedirs(user_images_dir, exist_ok=True) @@ -1689,11 +1641,12 @@ async def process_media_group_with_timeout(): shutil.copy(temp_photo, permanent_image_path) image_paths.append(permanent_image_path) - await status_message.edit_text(f"🤖 *Getting Anthropic description...*", parse_mode="markdown") + image_suffix = f" for image {i}" if len(photos) > 1 else "" + await status_message.edit_text(f"🤖 *Getting Anthropic description{image_suffix}...*", parse_mode="markdown") anthropic_description = await describe_image_anthropic(question=describe_question, image_path=temp_photo) stats_tracker.track_describe_used(user_id, "image_anthropic") - await status_message.edit_text(f"🤖 *Getting OpenAI description...*", parse_mode="markdown") + await status_message.edit_text(f"🤖 *Getting OpenAI description{image_suffix}...*", parse_mode="markdown") openai_description = await describe_image_openai(question=describe_question, image_path=temp_photo) stats_tracker.track_describe_used(user_id, "image_openai") @@ -1787,6 +1740,15 @@ async def handle_document_message(update: Update, context: ContextTypes.DEFAULT_ await handle_video_message(update, context) return + image_extensions = IMAGE_EXTENSIONS + if file_extension in image_extensions: + stats_tracker.track_message_received(user_id, "photo") + if update.message.media_group_id: + await queue_image_media_group(update, context, user_id, update.message.document, update.message.caption) + return + await process_image_message(update, context, user_id, [update.message.document], update.message.caption, limit) + return + # Track message received stats_tracker.track_message_received(user_id, "document") @@ -1794,7 +1756,8 @@ async def handle_document_message(update: Update, context: ContextTypes.DEFAULT_ if file_extension not in supported_extensions: supported_formats = ", ".join([ext.replace(".", "").upper() for ext in supported_extensions]) - await update.message.reply_text(f"❌ Only {supported_formats} documents are supported.") + supported_image_formats = "/".join(ext.replace(".", "").upper() for ext in image_extensions) + await update.message.reply_text(f"❌ Only {supported_formats} documents and {supported_image_formats} images are supported.") return async with get_user_lock(user_id): diff --git a/app/requirements.txt b/app/requirements.txt index 7a8636d..d93c36b 100644 --- a/app/requirements.txt +++ b/app/requirements.txt @@ -8,6 +8,7 @@ boto3>=1.36.0 loguru>=0.7.0 elevenlabs>=0.3.0 pydub>=0.25.1 # winget install ffmpeg +audioop-lts>=0.2.2; python_version >= "3.13" ffmpeg-downloader # ffdl install --add-path requests>=2.31.0 beautifulsoup4>=4.12.0 @@ -20,4 +21,5 @@ docx2txt>=0.8 pandas>=2.0.0 openpyxl>=3.1.2 google-genai>=1.45.0 -pillow>=11.1.0 \ No newline at end of file +pillow>=11.1.0 +pillow-heif>=1.4.0