From 2b08bc3e3b7c3843eb9fe72cc5e8dd1640cb47b4 Mon Sep 17 00:00:00 2001
From: mbv06 <mbv06.dev@gmail.com>
Date: Thu, 11 Jun 2026 16:19:35 +0300
Subject: [PATCH 1/7] Support JPEG documents as image inputs

---
 app/main_bot.py | 138 +++++++-----------------------------------------
 1 file changed, 20 insertions(+), 118 deletions(-)

diff --git a/app/main_bot.py b/app/main_bot.py
index 23b4baa..f6ca529 100644
--- a/app/main_bot.py
+++ b/app/main_bot.py
@@ -1479,116 +1479,9 @@ async def process_media_group(update: Update, context: ContextTypes.DEFAULT_TYPE
 
         print(f"User {user_id} has {len(photos)} photos in media group")
 
-        if media_group_captions[user_id] == None:
-            caption = "Describe what is in this image in user language."
-            describe_question = caption
-        else:
-            caption = media_group_captions[user_id]
-            describe_question = f"Describe what is in this image and answer to this question: {caption}"
-
-        
-        # Send initial status message
-        status_message = await update.message.reply_text("🖼️ *Analyzing images...*", parse_mode="markdown")
-        
-        temp_photos = []  # Keep track of temporary files for cleanup
-        all_descriptions = []  # Store descriptions for all photos
-        image_paths = []  # Store paths to downloaded images
-        
-        try:
-            # Process each photo
-            for i, photo_group in enumerate(photos, 1):
-                try:
-                    photo = photo_group[0]  # Get the photo from the group
-                    # Download the photo
-                    photo_file = await context.bot.get_file(photo.file_id)
-                    temp_dir = f"./data/{user_id}/temp_photos"
-                    os.makedirs(temp_dir, exist_ok=True)
-                    temp_photo = os.path.join(temp_dir, f"photo_{uuid.uuid4()}.jpg")
-                    temp_photos.append(temp_photo)
-                    await photo_file.download_to_drive(temp_photo)
-                    
-                    # Save the permanent copy to user's directory
-                    user_images_dir = os.path.join("data", user_id, "images")
-                    os.makedirs(user_images_dir, exist_ok=True)
-                    permanent_image_path = os.path.join(user_images_dir, f"image_{uuid.uuid4()}.jpg")
-                    # Copy the image to the permanent location
-                    shutil.copy(temp_photo, permanent_image_path)
-                    image_paths.append(permanent_image_path)
-                    
-                    # Get descriptions from both services
-                    await status_message.edit_text(f"🤖 *Getting Anthropic description for image {i}...*", parse_mode="markdown")
-                    anthropic_description = await describe_image_anthropic(question=describe_question, image_path=temp_photo)
-                    stats_tracker.track_describe_used(user_id, "image_anthropic")
-                    
-                    await status_message.edit_text(f"🤖 *Getting OpenAI description for image {i}...*", parse_mode="markdown")
-                    openai_description = await describe_image_openai(question=describe_question, image_path=temp_photo)
-                    stats_tracker.track_describe_used(user_id, "image_openai")
-                    
-                    all_descriptions.append({
-                        'anthropic': anthropic_description,
-                        'openai': openai_description,
-                        'path': permanent_image_path
-                    })
-                    
-                except Exception as e:
-                    logging.error(f"Error processing photo {i}: {str(e)}")
-                    all_descriptions.append({
-                        'anthropic': f"Error processing image {i}",
-                        'openai': f"Error processing image {i}",
-                        'path': "error_path"
-                    })
-            
-            # Craft the user question combining caption and all descriptions
-            descriptions_text = "\n\n".join([
-                f"Image {i+1} (path: {desc['path']}):\n"
-                f"Anthropic description: {desc['anthropic']}\n"
-                f"OpenAI description: {desc['openai']}"
-                for i, desc in enumerate(all_descriptions)
-            ])
-            
-            user_question = f"{caption}\n\nUser attached {len(all_descriptions)} image(s) to this message. Here are the details about each image from Anthropic and OpenAI:\n\n{descriptions_text}"
-            
-            await status_message.edit_text("🤖 *Processing...*", parse_mode="markdown")
-            # Process like a regular message
-            await context.bot.send_chat_action(chat_id=update.message.chat_id, action='typing', message_thread_id=get_thread_id(update))
-            thinking_message = await update.message.reply_text("💭 *Thinking...*", parse_mode="markdown")
-            _, _, mg_limit = check_user_limits(user_id)
-            
-            async def update_thinking_message(step: str, details: str, iteration: int, critique: int):
-                if step == "saving":
-                    iteration = "final"
-                    critique = "end"
-                live_limit_info = ""
-                if mg_limit:
-                    live_used = stats_tracker.get_user_action_count(user_id, days=30)
-                    live_limit_info = f"📊 *Usage:* _{live_used}/{mg_limit} actions (30d)_\n"
-                await thinking_message.edit_text(
-                    f"💭 *Thinking...*\n"
-                    f"- - - - \n"
-                    f"{live_limit_info}"
-                    f"📝 *Step:* _{step.replace('_', '-')}_\n"
-                    f"📋 *Details:* _{details.replace('_', '-')}_\n"
-                    f"🔄 *Iterations:* _{iteration}_\n"
-                    f"🎯 *Critiques:* _{critique}_",
-                    parse_mode="markdown"
-                )
-            
-            # Get response using the same logic as handle_message
-            _thread_id = get_thread_id(update)
-            on_text_chunk = create_streaming_callback(context.bot, user_id, _thread_id)
-            response, messages = await get_answer(user_question, user_id, update_thinking_message, update, context, on_text_chunk=on_text_chunk, message_thread_id=_thread_id)
-            await send_response_to_user(update, thinking_message, response, user_id)
-            await send_reasoning_file(update, messages, user_id)
-            await status_message.edit_text("🤖 *Done!*", parse_mode="markdown")
-            
-        finally:
-            # Clean up all temporary files
-            for temp_photo in temp_photos:
-                try:
-                    if os.path.exists(temp_photo):
-                        os.remove(temp_photo)
-                except Exception as e:
-                    print(f"Error cleaning up temporary photo file {temp_photo}: {str(e)}")
+        image_refs = [photo_group[0] for photo_group in photos]
+        _, _, mg_limit = check_user_limits(user_id)
+        await process_image_message(update, context, user_id, image_refs, media_group_captions[user_id], mg_limit)
     finally:
         # Clean up media group data
         media_group_id[user_id] = None
@@ -1646,8 +1539,7 @@ async def handle_photo_message(update: Update, context: ContextTypes.DEFAULT_TYP
         async def process_media_group_with_timeout():
             try:
                 await asyncio.sleep(MEDIA_GROUP_TIMEOUT)
-                async with get_user_lock(user_id):
-                    await process_media_group(update, context, user_id)
+                await process_media_group(update, context, user_id)
             except asyncio.CancelledError:
                 pass
             except Exception as e:
@@ -1658,13 +1550,16 @@ async def process_media_group_with_timeout():
         return
     
     # Handle single photo message
+    await process_image_message(update, context, user_id, [update.message.photo[-1]], update.message.caption, limit)
+
+async def process_image_message(update: Update, context: ContextTypes.DEFAULT_TYPE, user_id: str, image_refs: list, caption: str, limit: int | None = None):
+    """Process one or more Telegram image file refs through the photo analysis pipeline."""
     async with get_user_lock(user_id):
-        photos = [update.message.photo[-1]]
-        if update.message.caption == None:
+        photos = image_refs
+        if caption is None:
             caption = "Describe what is in this image in user language."
             describe_question = caption
         else:
-            caption = update.message.caption
             describe_question = f"Describe what is in this image and answer to this question: {caption}"
         
         status_message = await update.message.reply_text("🖼️ *Analyzing images...*", parse_mode="markdown")
@@ -1689,11 +1584,12 @@ async def process_media_group_with_timeout():
                     shutil.copy(temp_photo, permanent_image_path)
                     image_paths.append(permanent_image_path)
                     
-                    await status_message.edit_text(f"🤖 *Getting Anthropic description...*", parse_mode="markdown")
+                    image_suffix = f" for image {i}" if len(photos) > 1 else ""
+                    await status_message.edit_text(f"🤖 *Getting Anthropic description{image_suffix}...*", parse_mode="markdown")
                     anthropic_description = await describe_image_anthropic(question=describe_question, image_path=temp_photo)
                     stats_tracker.track_describe_used(user_id, "image_anthropic")
                     
-                    await status_message.edit_text(f"🤖 *Getting OpenAI description...*", parse_mode="markdown")
+                    await status_message.edit_text(f"🤖 *Getting OpenAI description{image_suffix}...*", parse_mode="markdown")
                     openai_description = await describe_image_openai(question=describe_question, image_path=temp_photo)
                     stats_tracker.track_describe_used(user_id, "image_openai")
                     
@@ -1787,6 +1683,12 @@ async def handle_document_message(update: Update, context: ContextTypes.DEFAULT_
         await handle_video_message(update, context)
         return
     
+    image_extensions = ['.jpg', '.jpeg']
+    if file_extension in image_extensions:
+        stats_tracker.track_message_received(user_id, "photo")
+        await process_image_message(update, context, user_id, [update.message.document], update.message.caption, limit)
+        return
+
     # Track message received
     stats_tracker.track_message_received(user_id, "document")
     
@@ -1794,7 +1696,7 @@ async def handle_document_message(update: Update, context: ContextTypes.DEFAULT_
     
     if file_extension not in supported_extensions:
         supported_formats = ", ".join([ext.replace(".", "").upper() for ext in supported_extensions])
-        await update.message.reply_text(f"❌ Only {supported_formats} documents are supported.")
+        await update.message.reply_text(f"❌ Only {supported_formats} documents and JPG/JPEG images are supported.")
         return
 
     async with get_user_lock(user_id):

From 20fd4c4391857d022f5eebaf29c8fb7ca42b73aa Mon Sep 17 00:00:00 2001
From: mbv06 <mbv06.dev@gmail.com>
Date: Thu, 11 Jun 2026 19:17:34 +0300
Subject: [PATCH 2/7] Handle grouped JPEG image documents

---
 .env.example    |   3 ++
 app/main_bot.py | 115 ++++++++++++++++++++++++++++++------------------
 2 files changed, 75 insertions(+), 43 deletions(-)

diff --git a/.env.example b/.env.example
index c4e2000..45f2758 100644
--- a/.env.example
+++ b/.env.example
@@ -42,3 +42,6 @@ TELEGRAM_API_ID=your_telegram_api_id_here
 TELEGRAM_API_HASH=your_telegram_api_hash_here
 TELEGRAM_USE_LOCAL_API=true
 TELEGRAM_LOCAL_API_URL=http://localhost:8081
+
+# Image Processing Configuration
+MAX_IMAGE_SIZE=1024
diff --git a/app/main_bot.py b/app/main_bot.py
index f6ca529..67ed7ec 100644
--- a/app/main_bot.py
+++ b/app/main_bot.py
@@ -22,6 +22,7 @@
 from secure_container.main import initialize_secure_containers, cleanup_containers
 from stats import stats_tracker, DEFAULT_ACTION_LIMIT
 import time
+from PIL import Image
 
 # Streaming config - read directly from env, not imported
 streaming_enabled = os.getenv("STREAMING_ENABLED", "false").lower() == "true"
@@ -85,6 +86,7 @@ def get_prober_name():
 anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
 anthropic_client = anthropic.AsyncAnthropic(api_key=anthropic_api_key)
 send_reasoning = True
+MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "1024") or "1024")
 
 user_invites = {}
 authorized_users = set(telegram_chat_id)
@@ -249,8 +251,31 @@ def validate_semantic_max_results(self, max_results: int) -> int:
         return max(1, min(20, max_results))
 
 def encode_image(image_path):
-    with open(image_path, "rb") as image_file:
-        return base64.b64encode(image_file.read()).decode('utf-8')
+    image_bytes = b""
+    try:
+        with Image.open(image_path) as img:
+            needs_resizing = max(img.size) > MAX_IMAGE_SIZE
+            is_jpeg = (img.format or "").upper() in ("JPEG", "JPG")
+
+            if needs_resizing or not is_jpeg:
+                if needs_resizing:
+                    resample_filter = getattr(Image, 'Resampling', Image).LANCZOS
+                    img.thumbnail((MAX_IMAGE_SIZE, MAX_IMAGE_SIZE), resample_filter)
+
+                buffer = io.BytesIO()
+                if img.mode != "RGB":
+                    img = img.convert("RGB")
+                img.save(buffer, format="JPEG", quality=85)
+                image_bytes = buffer.getvalue()
+    except Exception as e:
+        logger.warning(f"Error compressing image: {e}. Falling back to original size.")
+
+    if not image_bytes:
+        with open(image_path, "rb") as image_file:
+            image_bytes = image_file.read()
+
+
+    return base64.b64encode(image_bytes).decode('utf-8')
 
 
 def split_text_intelligently(text: str, max_length: int = 4000) -> list[str]:
@@ -1462,24 +1487,59 @@ async def voice_button(update: Update, context: ContextTypes.DEFAULT_TYPE):
             parse_mode="markdown"
         )
 
+async def queue_image_media_group(update: Update, context: ContextTypes.DEFAULT_TYPE, user_id: str, image_ref, caption: str | None):
+    """Collect Telegram album image refs and process them after updates stop arriving."""
+    if user_id in media_group_tasks and media_group_tasks[user_id]:
+        media_group_tasks[user_id].cancel()
+
+    if user_id not in media_group_id or media_group_id[user_id] != update.message.media_group_id:
+        media_group_id[user_id] = update.message.media_group_id
+        media_group_photos[user_id] = []
+        media_group_captions[user_id] = None
+        media_group_waiting_message[user_id] = await update.message.reply_text(
+            "🖼️ *Image media group received... Waiting for images...*",
+            parse_mode="markdown"
+        )
+
+    if caption and media_group_captions[user_id] is None:
+        media_group_captions[user_id] = caption
+
+    if image_ref not in media_group_photos[user_id]:
+        media_group_photos[user_id].append(image_ref)
+        await media_group_waiting_message[user_id].edit_text(
+            f"🖼️ *Image {len(media_group_photos[user_id])} received... Waiting for other images...*",
+            parse_mode="markdown"
+        )
+
+    async def process_media_group_with_timeout():
+        try:
+            await asyncio.sleep(MEDIA_GROUP_TIMEOUT)
+            await process_media_group(update, context, user_id)
+        except asyncio.CancelledError:
+            pass
+        except Exception as e:
+            logging.error(f"Error in process_media_group_with_timeout: {str(e)}")
+
+    task = asyncio.create_task(process_media_group_with_timeout())
+    media_group_tasks[user_id] = task
+
 async def process_media_group(update: Update, context: ContextTypes.DEFAULT_TYPE, user_id: str):
-    """Process all photos in a media group"""
+    """Process all images in a media group"""
     try:
-        if not user_id in media_group_photos:
+        if user_id not in media_group_photos or not media_group_photos[user_id]:
             print(f"User {user_id} is waiting for media group, but it is not in media_group_photos")
             return
 
         if user_id in media_group_waiting_message and media_group_waiting_message[user_id]:
             await media_group_waiting_message[user_id].edit_text("🖼️ *Processing media group...*", parse_mode="markdown")
         
-        photos = media_group_photos[user_id]
+        image_refs = media_group_photos[user_id]
         
         # Track media group processed
-        stats_tracker.track_media_group_processed(user_id, len(photos))
+        stats_tracker.track_media_group_processed(user_id, len(image_refs))
 
-        print(f"User {user_id} has {len(photos)} photos in media group")
+        print(f"User {user_id} has {len(image_refs)} images in media group")
 
-        image_refs = [photo_group[0] for photo_group in photos]
         _, _, mg_limit = check_user_limits(user_id)
         await process_image_message(update, context, user_id, image_refs, media_group_captions[user_id], mg_limit)
     finally:
@@ -1512,41 +1572,7 @@ async def handle_photo_message(update: Update, context: ContextTypes.DEFAULT_TYP
     stats_tracker.track_message_received(user_id, "photo")
     
     if update.message.media_group_id:
-        # Media group collection stays outside lock - multiple photos arrive rapidly
-        if user_id in media_group_tasks and media_group_tasks[user_id]:
-            media_group_tasks[user_id].cancel()
-        
-        if user_id not in media_group_id or media_group_id[user_id] != update.message.media_group_id:
-            media_group_id[user_id] = update.message.media_group_id
-            media_group_photos[user_id] = []
-            media_group_captions[user_id] = None
-            media_group_waiting_message[user_id] = await update.message.reply_text(
-                "🖼️ *Image media group received... Waiting for images...*",
-                parse_mode="markdown"
-            )
-        
-        if update.message.caption and media_group_captions[user_id] == None:
-            media_group_captions[user_id] = update.message.caption
-
-        photos = [update.message.photo[-1]]
-        if photos not in media_group_photos[user_id]:
-            media_group_photos[user_id].append(photos)
-            await media_group_waiting_message[user_id].edit_text(
-                f"🖼️ *Image {len(media_group_photos[user_id])} received... Waiting for other images...*",
-                parse_mode="markdown"
-            )
-        
-        async def process_media_group_with_timeout():
-            try:
-                await asyncio.sleep(MEDIA_GROUP_TIMEOUT)
-                await process_media_group(update, context, user_id)
-            except asyncio.CancelledError:
-                pass
-            except Exception as e:
-                logging.error(f"Error in process_media_group_with_timeout: {str(e)}")
-        
-        task = asyncio.create_task(process_media_group_with_timeout())
-        media_group_tasks[user_id] = task
+        await queue_image_media_group(update, context, user_id, update.message.photo[-1], update.message.caption)
         return
     
     # Handle single photo message
@@ -1686,6 +1712,9 @@ async def handle_document_message(update: Update, context: ContextTypes.DEFAULT_
     image_extensions = ['.jpg', '.jpeg']
     if file_extension in image_extensions:
         stats_tracker.track_message_received(user_id, "photo")
+        if update.message.media_group_id:
+            await queue_image_media_group(update, context, user_id, update.message.document, update.message.caption)
+            return
         await process_image_message(update, context, user_id, [update.message.document], update.message.caption, limit)
         return
 

From f7459583eaa8cfd17a170f8fe8014be97ee5ac41 Mon Sep 17 00:00:00 2001
From: mbv06 <mbv06.dev@gmail.com>
Date: Thu, 11 Jun 2026 19:18:43 +0300
Subject: [PATCH 3/7] Add audioop-lts dependency for Python 3.13 compatibility

---
 app/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/app/requirements.txt b/app/requirements.txt
index 7a8636d..8db5241 100644
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -8,6 +8,7 @@ boto3>=1.36.0
 loguru>=0.7.0
 elevenlabs>=0.3.0
 pydub>=0.25.1 # winget install ffmpeg
+audioop-lts>=0.2.2; python_version >= "3.13"
 ffmpeg-downloader # ffdl install --add-path
 requests>=2.31.0
 beautifulsoup4>=4.12.0

From b92b21ad7c186bb071732b6e887bdeef73014b01 Mon Sep 17 00:00:00 2001
From: mbv06 <mbv06.dev@gmail.com>
Date: Fri, 12 Jun 2026 00:34:23 +0300
Subject: [PATCH 4/7] Limit image edit input resolution

---
 .env.example              |  3 +-
 app/agents/image_tools.py | 76 +++++++++++++++++++++++++++++++++------
 app/main_bot.py           | 23 ++++++------
 3 files changed, 80 insertions(+), 22 deletions(-)

diff --git a/.env.example b/.env.example
index 45f2758..05f1028 100644
--- a/.env.example
+++ b/.env.example
@@ -44,4 +44,5 @@ TELEGRAM_USE_LOCAL_API=true
 TELEGRAM_LOCAL_API_URL=http://localhost:8081
 
 # Image Processing Configuration
-MAX_IMAGE_SIZE=1024
+MAX_IMAGE_RESOLUTION_VISION=1024
+MAX_IMAGE_RESOLUTION_EDIT=4096
diff --git a/app/agents/image_tools.py b/app/agents/image_tools.py
index e1b9eca..a5fd804 100644
--- a/app/agents/image_tools.py
+++ b/app/agents/image_tools.py
@@ -10,12 +10,14 @@
 from google import genai
 from google.genai import types
 import PIL.Image
+import PIL.ImageOps
 load_dotenv()
 
 openai_api_key = os.getenv("OPENAI_API_KEY")
 openai_client = OpenAI(api_key=openai_api_key)
 google_api_key = os.getenv("GOOGLE_API_KEY")
 genai_client = genai.Client(api_key=google_api_key)
+max_image_resolution_edit = int(os.getenv("MAX_IMAGE_RESOLUTION_EDIT", "4096") or "4096")
 
 class TextPart:
     def __init__(self, text):
@@ -273,6 +275,37 @@ def _resolve_image_path(self, image_path: str) -> Path:
             return relative_to_images
         return p  # Return original so the caller can report the correct path
 
+    def _prepare_image_for_edit(self, image_path: Path, temp_paths: List[Path]) -> Path:
+        """Create a temporary downsized JPEG for oversized edit inputs."""
+        if max_image_resolution_edit > 0:
+            try:
+                with PIL.Image.open(image_path) as img:
+                    img = PIL.ImageOps.exif_transpose(img)
+                    if max(img.size) > max_image_resolution_edit:
+                        img.thumbnail(
+                            (max_image_resolution_edit, max_image_resolution_edit),
+                            getattr(PIL.Image, "Resampling", PIL.Image).LANCZOS
+                        )
+                        if img.mode != "RGB":
+                            img = img.convert("RGB")
+
+                        temp_path = self.images_path / f"edit_input_{uuid.uuid4()}.jpg"
+                        temp_paths.append(temp_path)
+                        img.save(temp_path, format="JPEG", quality=90, optimize=True)
+                        return temp_path
+            except Exception as e:
+                print(f"Failed to prepare image for edit, using original file: {e}")
+
+        return image_path
+
+    def _cleanup_temp_images(self, temp_paths: List[Path]) -> None:
+        for temp_path in temp_paths:
+            try:
+                if temp_path.exists():
+                    temp_path.unlink()
+            except Exception as e:
+                print(f"Failed to remove temporary image {temp_path}: {e}")
+
     def _resolve_gpt_size(self, resolution: str, aspect_ratio: str) -> str:
         """Translate resolution + aspect_ratio into a GPT Image 2 pixel size string.
         
@@ -418,6 +451,8 @@ async def _generate_multimodal_image_and_text(self, prompt: str, style: str = "3
     async def _image_editing(self, prompt: str, image_path: str, model: str = "Normal", aspect_ratio: str = "16:9", resolution: str = "2K", gpt_quality: str = "auto", variants: int = 1, caption: str = "Here is your edited image") -> str:
         """Edit an existing image using Gemini or GPT Image 2"""
         print(f"Editing image - Prompt: {prompt}, Image: {image_path}, Model: {model}, Aspect Ratio: {aspect_ratio}, Resolution: {resolution}, GPT Quality: {gpt_quality}, Variants: {variants}")
+        temp_paths: List[Path] = []
+        source_image = None
         try:
             image_path_obj = self._resolve_image_path(image_path)
             if not image_path_obj.exists():
@@ -433,8 +468,9 @@ async def _image_editing(self, prompt: str, image_path: str, model: str = "Norma
                     )
                     all_results.append(r)
                 return "\n".join(all_results)
-            
-            source_image = PIL.Image.open(image_path_obj)
+
+            source_image_path = self._prepare_image_for_edit(image_path_obj, temp_paths)
+            source_image = PIL.Image.open(source_image_path)
             
             model_name = "gemini-3-pro-image-preview" if model.lower() == "pro" else "gemini-3.1-flash-image-preview"
             
@@ -465,7 +501,7 @@ async def _image_editing(self, prompt: str, image_path: str, model: str = "Norma
                 )
                 
                 contents = response.candidates[0].content.parts
-                
+
                 for content in contents:
                     if 'text' in content.model_fields_set:
                         try:
@@ -490,24 +526,32 @@ async def _image_editing(self, prompt: str, image_path: str, model: str = "Norma
                                 document=file,
                                 caption=variant_caption
                             )
-                        
+
                         all_saved_paths.append(str(transformed_image_path))
-            
+
             if all_saved_paths:
                 paths_str = "\n".join(f"  - {p}" for p in all_saved_paths)
                 result_message += f"Edited {len(all_saved_paths)} image(s) and sent to user.\nSaved to:\n{paths_str}\nImage transformation completed successfully.\n"
             else:
                 result_message += "Warning: No transformed image was generated. The model only provided text response.\n"
-                
+
             return result_message
         except Exception as e:
             return f"Error editing image: {str(e)}"
+        finally:
+            if source_image:
+                source_image.close()
+            self._cleanup_temp_images(temp_paths)
 
     async def _gpt_image_edit(self, prompt: str, image_paths: List[Path], size: str = "2048x1152", quality: str = "auto", caption: str = "Here is your edited image") -> str:
         """Edit one or more images using OpenAI GPT Image 2"""
         print(f"Editing image(s) with GPT Image 2 - Prompt: {prompt}, Images: {image_paths}, Size: {size}, Quality: {quality}")
+        temp_paths: List[Path] = []
         try:
-            image_files = [open(str(p), "rb") for p in image_paths]
+            image_files = [
+                open(str(self._prepare_image_for_edit(p, temp_paths)), "rb")
+                for p in image_paths
+            ]
             
             kwargs = {
                 "model": "gpt-image-2-2026-04-21",
@@ -526,6 +570,7 @@ async def _gpt_image_edit(self, prompt: str, image_paths: List[Path], size: str
             finally:
                 for f in image_files:
                     f.close()
+                self._cleanup_temp_images(temp_paths)
 
             image_base64 = result.data[0].b64_json
             image_bytes = base64.b64decode(image_base64)
@@ -658,6 +703,8 @@ async def _gpt_image_generate(self, prompt: str, size: str = "2048x1152", qualit
     async def _image_composition(self, prompt: str, image_paths: List[str], model: str = "Normal", aspect_ratio: str = "16:9", resolution: str = "2K", gpt_quality: str = "auto", variants: int = 1, caption: str = "Here is your composed image") -> str:
         """Compose a new image from multiple input images using Gemini or GPT Image 2"""
         print(f"Composing image - Prompt: {prompt}, Images: {image_paths}, Model: {model}, Aspect Ratio: {aspect_ratio}, Resolution: {resolution}, GPT Quality: {gpt_quality}, Variants: {variants}")
+        temp_paths: List[Path] = []
+        images: List[PIL.Image.Image] = []
         try:
             resolved_paths = []
             for image_path in image_paths:
@@ -683,7 +730,11 @@ async def _image_composition(self, prompt: str, image_paths: List[str], model: s
             if len(resolved_paths) > 3:
                 return "Error: Maximum 3 images are supported for composition with Normal/Pro mode. Use GPT mode for more images."
             
-            images = [PIL.Image.open(p) for p in resolved_paths]
+            prepared_paths = [
+                self._prepare_image_for_edit(p, temp_paths)
+                for p in resolved_paths
+            ]
+            images = [PIL.Image.open(p) for p in prepared_paths]
             
             input_contents = []
             for image in images:
@@ -716,7 +767,7 @@ async def _image_composition(self, prompt: str, image_paths: List[str], model: s
                 )
                 
                 response_parts = response.candidates[0].content.parts
-                
+
                 for part in response_parts:
                     if 'text' in part.model_fields_set and part.text:
                         try:
@@ -753,8 +804,11 @@ async def _image_composition(self, prompt: str, image_paths: List[str], model: s
                 result_message += f"Composed {len(all_saved_paths)} image(s) and sent to user.\nSaved to:\n{paths_str}\nImage composition completed successfully.\n"
             else:
                 result_message += "Warning: No composed image was generated. The model only provided text response.\n"
-                
+
             return result_message
         except Exception as e:
             return f"Error composing image: {str(e)}"
-
+        finally:
+            for image in images:
+                image.close()
+            self._cleanup_temp_images(temp_paths)
diff --git a/app/main_bot.py b/app/main_bot.py
index 67ed7ec..4a68bb3 100644
--- a/app/main_bot.py
+++ b/app/main_bot.py
@@ -86,7 +86,7 @@ def get_prober_name():
 anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
 anthropic_client = anthropic.AsyncAnthropic(api_key=anthropic_api_key)
 send_reasoning = True
-MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "1024") or "1024")
+max_image_resolution_vision = int(os.getenv("MAX_IMAGE_RESOLUTION_VISION", "1024") or "1024")
 
 user_invites = {}
 authorized_users = set(telegram_chat_id)
@@ -254,13 +254,13 @@ def encode_image(image_path):
     image_bytes = b""
     try:
         with Image.open(image_path) as img:
-            needs_resizing = max(img.size) > MAX_IMAGE_SIZE
+            needs_resizing = max(img.size) > max_image_resolution_vision
             is_jpeg = (img.format or "").upper() in ("JPEG", "JPG")
 
             if needs_resizing or not is_jpeg:
                 if needs_resizing:
                     resample_filter = getattr(Image, 'Resampling', Image).LANCZOS
-                    img.thumbnail((MAX_IMAGE_SIZE, MAX_IMAGE_SIZE), resample_filter)
+                    img.thumbnail((max_image_resolution_vision, max_image_resolution_vision), resample_filter)
 
                 buffer = io.BytesIO()
                 if img.mode != "RGB":
@@ -1487,7 +1487,7 @@ async def voice_button(update: Update, context: ContextTypes.DEFAULT_TYPE):
             parse_mode="markdown"
         )
 
-async def queue_image_media_group(update: Update, context: ContextTypes.DEFAULT_TYPE, user_id: str, image_ref, caption: str | None):
+async def queue_image_media_group(update: Update, context: ContextTypes.DEFAULT_TYPE, user_id: str, image_ref: Any, caption: str | None):
     """Collect Telegram album image refs and process them after updates stop arriving."""
     if user_id in media_group_tasks and media_group_tasks[user_id]:
         media_group_tasks[user_id].cancel()
@@ -1530,9 +1530,9 @@ async def process_media_group(update: Update, context: ContextTypes.DEFAULT_TYPE
             print(f"User {user_id} is waiting for media group, but it is not in media_group_photos")
             return
 
-        if user_id in media_group_waiting_message and media_group_waiting_message[user_id]:
-            await media_group_waiting_message[user_id].edit_text("🖼️ *Processing media group...*", parse_mode="markdown")
-        
+        status_message = media_group_waiting_message.get(user_id)
+        media_group_waiting_message[user_id] = None
+
         image_refs = media_group_photos[user_id]
         
         # Track media group processed
@@ -1541,7 +1541,7 @@ async def process_media_group(update: Update, context: ContextTypes.DEFAULT_TYPE
         print(f"User {user_id} has {len(image_refs)} images in media group")
 
         _, _, mg_limit = check_user_limits(user_id)
-        await process_image_message(update, context, user_id, image_refs, media_group_captions[user_id], mg_limit)
+        await process_image_message(update, context, user_id, image_refs, media_group_captions[user_id], mg_limit, status_message)
     finally:
         # Clean up media group data
         media_group_id[user_id] = None
@@ -1578,7 +1578,7 @@ async def handle_photo_message(update: Update, context: ContextTypes.DEFAULT_TYP
     # Handle single photo message
     await process_image_message(update, context, user_id, [update.message.photo[-1]], update.message.caption, limit)
 
-async def process_image_message(update: Update, context: ContextTypes.DEFAULT_TYPE, user_id: str, image_refs: list, caption: str, limit: int | None = None):
+async def process_image_message(update: Update, context: ContextTypes.DEFAULT_TYPE, user_id: str, image_refs: list, caption: str | None, limit: int | None = None, status_message: Any = None):
     """Process one or more Telegram image file refs through the photo analysis pipeline."""
     async with get_user_lock(user_id):
         photos = image_refs
@@ -1588,7 +1588,10 @@ async def process_image_message(update: Update, context: ContextTypes.DEFAULT_TY
         else:
             describe_question = f"Describe what is in this image and answer to this question: {caption}"
         
-        status_message = await update.message.reply_text("🖼️ *Analyzing images...*", parse_mode="markdown")
+        if status_message:
+            await status_message.edit_text("🖼️ *Analyzing images...*", parse_mode="markdown")
+        else:
+            status_message = await update.message.reply_text("🖼️ *Analyzing images...*", parse_mode="markdown")
         
         temp_photos = []
         all_descriptions = []

From 2ec0621723b86a238fe11c56375664e3a91fc38b Mon Sep 17 00:00:00 2001
From: mbv06 <mbv06.dev@gmail.com>
Date: Fri, 12 Jun 2026 01:09:29 +0300
Subject: [PATCH 5/7] chore: add temporary media directories to .gitignore

---
 .gitignore | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.gitignore b/.gitignore
index c3bc098..431cd95 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,8 @@ __pycache__/
 .DS_Store
 data/
 .vscode/
+
+# Temporary media directories
+temp_photos/
+temp_docs/
+temp_audio/

From f9adc6527658ec2f32080d04bb6fc16a0cf1b3d8 Mon Sep 17 00:00:00 2001
From: mbv06 <mbv06.dev@gmail.com>
Date: Fri, 12 Jun 2026 01:35:52 +0300
Subject: [PATCH 6/7] Add HEIC and HEIF image support

---
 app/agents/image_tools.py | 33 ++++++++++++++++++++-------------
 app/image_utils.py        | 37 +++++++++++++++++++++++++++++++++++++
 app/main_bot.py           | 24 +++++++++++++++++++++---
 app/requirements.txt      |  3 ++-
 4 files changed, 80 insertions(+), 17 deletions(-)
 create mode 100644 app/image_utils.py

diff --git a/app/agents/image_tools.py b/app/agents/image_tools.py
index a5fd804..b8de7c8 100644
--- a/app/agents/image_tools.py
+++ b/app/agents/image_tools.py
@@ -11,6 +11,7 @@
 from google.genai import types
 import PIL.Image
 import PIL.ImageOps
+from image_utils import JPEG_FORMATS
 load_dotenv()
 
 openai_api_key = os.getenv("OPENAI_API_KEY")
@@ -276,25 +277,31 @@ def _resolve_image_path(self, image_path: str) -> Path:
         return p  # Return original so the caller can report the correct path
 
     def _prepare_image_for_edit(self, image_path: Path, temp_paths: List[Path]) -> Path:
-        """Create a temporary downsized JPEG for oversized edit inputs."""
-        if max_image_resolution_edit > 0:
-            try:
-                with PIL.Image.open(image_path) as img:
+        """Create a temporary JPEG for resized or non-JPEG edit inputs."""
+        try:
+            with PIL.Image.open(image_path) as img:
+                image_format = (img.format or "").upper()
+                needs_resizing = max_image_resolution_edit > 0 and max(img.size) > max_image_resolution_edit
+                needs_conversion = image_format not in JPEG_FORMATS
+
+                if needs_resizing or needs_conversion:
                     img = PIL.ImageOps.exif_transpose(img)
-                    if max(img.size) > max_image_resolution_edit:
+
+                    if needs_resizing:
                         img.thumbnail(
                             (max_image_resolution_edit, max_image_resolution_edit),
                             getattr(PIL.Image, "Resampling", PIL.Image).LANCZOS
                         )
-                        if img.mode != "RGB":
-                            img = img.convert("RGB")
 
-                        temp_path = self.images_path / f"edit_input_{uuid.uuid4()}.jpg"
-                        temp_paths.append(temp_path)
-                        img.save(temp_path, format="JPEG", quality=90, optimize=True)
-                        return temp_path
-            except Exception as e:
-                print(f"Failed to prepare image for edit, using original file: {e}")
+                    if img.mode != "RGB":
+                        img = img.convert("RGB")
+
+                    temp_path = self.images_path / f"edit_input_{uuid.uuid4()}.jpg"
+                    temp_paths.append(temp_path)
+                    img.save(temp_path, format="JPEG", quality=90, optimize=True)
+                    return temp_path
+        except Exception as e:
+            print(f"Failed to prepare image for edit, using original file: {e}")
 
         return image_path
 
diff --git a/app/image_utils.py b/app/image_utils.py
new file mode 100644
index 0000000..a29ab49
--- /dev/null
+++ b/app/image_utils.py
@@ -0,0 +1,37 @@
+from pathlib import Path
+
+from PIL import Image, ImageOps
+from pillow_heif import register_heif_opener
+
+
+register_heif_opener()
+
+IMAGE_EXTENSIONS = (".jpg", ".jpeg", ".heic", ".heif")
+JPEG_FORMATS = {"JPEG", "JPG"}
+
+
+def is_jpeg_image(image_path: str | Path) -> bool:
+    with Image.open(image_path) as img:
+        return (img.format or "").upper() in JPEG_FORMATS
+
+
+def save_image_as_jpeg(
+    source_path: str | Path,
+    target_path: str | Path,
+    *,
+    max_resolution: int = 0,
+    quality: int = 90,
+) -> None:
+    with Image.open(source_path) as img:
+        img = ImageOps.exif_transpose(img)
+
+        if max_resolution > 0 and max(img.size) > max_resolution:
+            img.thumbnail(
+                (max_resolution, max_resolution),
+                getattr(Image, "Resampling", Image).LANCZOS,
+            )
+
+        if img.mode != "RGB":
+            img = img.convert("RGB")
+
+        img.save(target_path, format="JPEG", quality=quality, optimize=True)
diff --git a/app/main_bot.py b/app/main_bot.py
index 4a68bb3..ecd9857 100644
--- a/app/main_bot.py
+++ b/app/main_bot.py
@@ -23,6 +23,7 @@
 from stats import stats_tracker, DEFAULT_ACTION_LIMIT
 import time
 from PIL import Image
+from image_utils import IMAGE_EXTENSIONS, is_jpeg_image, save_image_as_jpeg
 
 # Streaming config - read directly from env, not imported
 streaming_enabled = os.getenv("STREAMING_ENABLED", "false").lower() == "true"
@@ -278,6 +279,14 @@ def encode_image(image_path):
     return base64.b64encode(image_bytes).decode('utf-8')
 
 
+def prepare_downloaded_image_for_vision(source_path: str, target_path: str) -> None:
+    if is_jpeg_image(source_path):
+        shutil.copy(source_path, target_path)
+        return
+
+    save_image_as_jpeg(source_path, target_path, quality=90)
+
+
 def split_text_intelligently(text: str, max_length: int = 4000) -> list[str]:
     """
     Split text into chunks with a maximum length, trying to split at natural boundaries:
@@ -1603,9 +1612,17 @@ async def process_image_message(update: Update, context: ContextTypes.DEFAULT_TY
                     photo_file = await context.bot.get_file(photo.file_id)
                     temp_dir = "temp_photos"
                     os.makedirs(temp_dir, exist_ok=True)
+                    original_extension = os.path.splitext(getattr(photo, "file_name", "") or "")[1].lower()
+                    if original_extension not in IMAGE_EXTENSIONS:
+                        original_extension = ".jpg"
+
+                    temp_download = os.path.join(temp_dir, f"photo_{uuid.uuid4()}{original_extension}")
+                    temp_photos.append(temp_download)
+                    await photo_file.download_to_drive(temp_download)
+
                     temp_photo = os.path.join(temp_dir, f"photo_{uuid.uuid4()}.jpg")
                     temp_photos.append(temp_photo)
-                    await photo_file.download_to_drive(temp_photo)
+                    prepare_downloaded_image_for_vision(temp_download, temp_photo)
                     
                     user_images_dir = os.path.join("data", user_id, "images")
                     os.makedirs(user_images_dir, exist_ok=True)
@@ -1712,7 +1729,7 @@ async def handle_document_message(update: Update, context: ContextTypes.DEFAULT_
         await handle_video_message(update, context)
         return
     
-    image_extensions = ['.jpg', '.jpeg']
+    image_extensions = IMAGE_EXTENSIONS
     if file_extension in image_extensions:
         stats_tracker.track_message_received(user_id, "photo")
         if update.message.media_group_id:
@@ -1728,7 +1745,8 @@ async def handle_document_message(update: Update, context: ContextTypes.DEFAULT_
     
     if file_extension not in supported_extensions:
         supported_formats = ", ".join([ext.replace(".", "").upper() for ext in supported_extensions])
-        await update.message.reply_text(f"❌ Only {supported_formats} documents and JPG/JPEG images are supported.")
+        supported_image_formats = "/".join(ext.replace(".", "").upper() for ext in image_extensions)
+        await update.message.reply_text(f"❌ Only {supported_formats} documents and {supported_image_formats} images are supported.")
         return
 
     async with get_user_lock(user_id):
diff --git a/app/requirements.txt b/app/requirements.txt
index 8db5241..d93c36b 100644
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -21,4 +21,5 @@ docx2txt>=0.8
 pandas>=2.0.0
 openpyxl>=3.1.2
 google-genai>=1.45.0
-pillow>=11.1.0
\ No newline at end of file
+pillow>=11.1.0
+pillow-heif>=1.4.0

From 9630112120cdce73b1f0dcb9c94eb415e4c010eb Mon Sep 17 00:00:00 2001
From: mbv06 <mbv06.dev@gmail.com>
Date: Fri, 12 Jun 2026 01:45:50 +0300
Subject: [PATCH 7/7] Handle late media group image updates

---
 app/main_bot.py | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/app/main_bot.py b/app/main_bot.py
index ecd9857..fe1f2a2 100644
--- a/app/main_bot.py
+++ b/app/main_bot.py
@@ -118,6 +118,7 @@ def get_user_lock(user_id: str) -> asyncio.Lock:
 media_group_captions = {}
 media_group_waiting_message = {}
 media_group_tasks = {}
+media_group_processing = {}
 MEDIA_GROUP_TIMEOUT = 10.0
 
 
@@ -1498,7 +1499,10 @@ async def voice_button(update: Update, context: ContextTypes.DEFAULT_TYPE):
 
 async def queue_image_media_group(update: Update, context: ContextTypes.DEFAULT_TYPE, user_id: str, image_ref: Any, caption: str | None):
     """Collect Telegram album image refs and process them after updates stop arriving."""
-    if user_id in media_group_tasks and media_group_tasks[user_id]:
+    is_same_media_group = user_id in media_group_id and media_group_id[user_id] == update.message.media_group_id
+    is_processing = media_group_processing.get(user_id) and is_same_media_group
+
+    if user_id in media_group_tasks and media_group_tasks[user_id] and not is_processing:
         media_group_tasks[user_id].cancel()
 
     if user_id not in media_group_id or media_group_id[user_id] != update.message.media_group_id:
@@ -1515,10 +1519,15 @@ async def queue_image_media_group(update: Update, context: ContextTypes.DEFAULT_
 
     if image_ref not in media_group_photos[user_id]:
         media_group_photos[user_id].append(image_ref)
-        await media_group_waiting_message[user_id].edit_text(
-            f"🖼️ *Image {len(media_group_photos[user_id])} received... Waiting for other images...*",
-            parse_mode="markdown"
-        )
+        waiting_message = media_group_waiting_message.get(user_id)
+        if waiting_message and not is_processing:
+            await waiting_message.edit_text(
+                f"🖼️ *Image {len(media_group_photos[user_id])} received... Waiting for other images...*",
+                parse_mode="markdown"
+            )
+
+    if is_processing:
+        return
 
     async def process_media_group_with_timeout():
         try:
@@ -1534,13 +1543,13 @@ async def process_media_group_with_timeout():
 
 async def process_media_group(update: Update, context: ContextTypes.DEFAULT_TYPE, user_id: str):
     """Process all images in a media group"""
+    media_group_processing[user_id] = True
     try:
         if user_id not in media_group_photos or not media_group_photos[user_id]:
             print(f"User {user_id} is waiting for media group, but it is not in media_group_photos")
             return
 
         status_message = media_group_waiting_message.get(user_id)
-        media_group_waiting_message[user_id] = None
 
         image_refs = media_group_photos[user_id]
         
@@ -1561,6 +1570,8 @@ async def process_media_group(update: Update, context: ContextTypes.DEFAULT_TYPE
         media_group_waiting_message[user_id] = None
         if user_id in media_group_tasks:
             del media_group_tasks[user_id]
+        if user_id in media_group_processing:
+            del media_group_processing[user_id]
 
 async def handle_photo_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
     """Handle incoming photo messages with support for multiple photos"""