From ee13d10649454bdced8e560adb28c09c2a528771 Mon Sep 17 00:00:00 2001 From: Crosve Lucero <92947897+crosve@users.noreply.github.com> Date: Sat, 20 Dec 2025 14:58:24 -0500 Subject: [PATCH 01/75] first phase of remodeling the worker --- .github/workflows/ci_cd.yml | 23 +++++++ backend/pyproject.toml | 2 +- csphere-worker/processors/__init__.py | 10 +++ csphere-worker/processors/base.py | 64 +++++++++++++++++ csphere-worker/processors/content.py | 98 +++++++++++++++++++++++++++ csphere-worker/utils/utils.py | 3 +- csphere-worker/worker.py | 2 + 7 files changed, 200 insertions(+), 2 deletions(-) create mode 100644 csphere-worker/processors/__init__.py create mode 100644 csphere-worker/processors/base.py create mode 100644 csphere-worker/processors/content.py diff --git a/.github/workflows/ci_cd.yml b/.github/workflows/ci_cd.yml index fc75ad4..ea6f338 100644 --- a/.github/workflows/ci_cd.yml +++ b/.github/workflows/ci_cd.yml @@ -58,3 +58,26 @@ jobs: ./build-csphere-backend.sh echo "deployment was a success" ' <<< "${{secrets.PASSWORD}}" + deploy-worker: + name: deploy worker to the backend servers + runs-on: ununtu-latest + + steps: + - name: Execute remote ssh commands + uses: appleboy/ssh-action@v1 + + with: + host: ${{secrets.HOST}} + username: ${{secrets.USERNAME}} + password: ${{secrets.PASSWORD}} + port: ${{secrets.PORT}} + + script: | + echo "Starting deployment process" + # NOTE: Use 'sh' to avoid issues with inline script quoting + sudo -S bash -c ' + cd /root/docker/csphere + #run the deployment script + ./build-csphere-worker.sh + echo "deployment was a success" + ' <<< "${{secrets.PASSWORD}}" diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 702a55b..403e839 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -84,7 +84,7 @@ dependencies = [ "platformdirs==4.3.8", "propcache==0.3.2", "psutil==7.0.0", - "psycopg2==2.9.11", + "psycopg-binary", "pyasn1==0.6.1", "pyasn1-modules==0.4.2", "pycparser==2.22", diff --git a/csphere-worker/processors/__init__.py b/csphere-worker/processors/__init__.py new file mode 100644 index 0000000..c0442a0 --- /dev/null +++ b/csphere-worker/processors/__init__.py @@ -0,0 +1,10 @@ +from .content import ContentProcessor + +PROCESSOR_MAP ={ + 'process_message': ContentProcessor + +} + + +def get_processor(task_type: str): + return PROCESSOR_MAP.get(task_type) \ No newline at end of file diff --git a/csphere-worker/processors/base.py b/csphere-worker/processors/base.py new file mode 100644 index 0000000..0d0d1c6 --- /dev/null +++ b/csphere-worker/processors/base.py @@ -0,0 +1,64 @@ +from abc import ABC, abstractmethod + +from database import get_db +import logging +from data_models.content import Content +from utils.utils import handle_existing_content + + +logger = logging.getLogger(__name__) + + +class BaseProcessor(ABC): + + def __init__(self): + self.db = get_db() + @abstractmethod + def process(self, payload: dict): + """Standard method all processors must implement.""" + pass + + + @staticmethod + def get_db(self): + ''' + Method to get the database instant + + :param self: base + ''' + db_gen = get_db() + db = next(db_gen) + return db + + + def extract_data(self, message:dict): + ''' + Method to extract and return the data stored inside message + + :param message: data of the message + :type message: dict + ''' + user_id = message.get('user_id') + notes = message.get('notes') + folder_id = message.get('folder_id', '') + content_data = message.get('content_payload', {}) + + + if content_data == {}: + logger.error("Content data is empty, returning") + raise ValueError("Content data was empty, no content payload available") + + return (user_id, notes, folder_id, content_data) + + def handle_if_exists(self, content_url: str, user_id: int, notes:str, folder_id: int) -> bool : + existing_content = self.db.query(Content).filter(Content.url == content_url).first() + + if existing_content: + handle_existing_content(existing_content, user_id, self.db, notes, folder_id) + logger.info("Bookmark succesfully saved to user") + return True + + return False + + + diff --git a/csphere-worker/processors/content.py b/csphere-worker/processors/content.py new file mode 100644 index 0000000..981eb1d --- /dev/null +++ b/csphere-worker/processors/content.py @@ -0,0 +1,98 @@ +from .base import BaseProcessor +import logging +from data_models.content import Content + +from datetime import datetime, timezone +from data_models.content_item import ContentItem +from classes.EmbeddingManager import ContentEmbeddingManager +from data_models.folder_item import folder_item + +from uuid import uuid4 +import time + + +logger = logging.getLogger(__name__) + + +class ContentProcessor(BaseProcessor): + + + def __init__(): + super(BaseProcessor) + + + def process(self, message: dict): + + user_id, notes, folder_id, content_data = BaseProcessor.extract_data(message) + + content_url = content_data.get('url') + + if BaseProcessor.handle_if_exists(content_url, user_id, notes, folder_id): + logger.info('Content existed and was saved appropiatly') + return + + new_content = Content(**content_data) + + try: + self.db.add(new_content) + self.db.flush() + + #update the content Embedding manager when necessary + content_manager = ContentEmbeddingManager(db=self.db, content_url=new_content.url) + + raw_html = message.get('raw_html', '') + + if raw_html == '': + logging.info("No raw html provided, categorization and summarization may be poor") + + content_ai = content_manager.process_content(new_content, raw_html) + + self.db.commit() + + if not content_ai: + logging.info("Embedding generation failed or skipped.") + else: + logging.debug(f"Summary Generated: {content_ai.ai_summary}") + + # Check if this user already saved this content + existing_item = self.db.query(ContentItem).filter( + ContentItem.user_id == user_id, + ContentItem.content_id == new_content.content_id + ).first() + + + utc_time = datetime.now(timezone.utc) + + if not existing_item: + new_item = ContentItem( + user_id=user_id, + content_id=new_content.content_id, + saved_at=utc_time, + notes=notes + ) + self.db.add(new_item) + self.db.commit() + + + # Add to the corresponding folder if any + if folder_id and folder_id != '' and folder_id != 'default': + new_folder_item = folder_item( + folder_item_id=uuid4(), + folder_id=folder_id, + user_id=user_id, + content_id=new_content.content_id, + added_at=datetime.utcnow() + ) + + self.db.add(new_folder_item) + self.db.commit() + self.db.refresh(new_folder_item) + else: + print("No valid folder id found, skipping this part") + + logging.info("Successfully saved content for user.") + + except Exception as e: + logging.error(f"Error occurred while saving the bookmark: {str(e)}") + + diff --git a/csphere-worker/utils/utils.py b/csphere-worker/utils/utils.py index dba3b39..9ccf230 100644 --- a/csphere-worker/utils/utils.py +++ b/csphere-worker/utils/utils.py @@ -72,4 +72,5 @@ def handle_existing_content(existing_content, user_id: str, db, notes : str, fol except Exception as e: logging.error("issue offucred: ", str(e)) - return False \ No newline at end of file + return False + diff --git a/csphere-worker/worker.py b/csphere-worker/worker.py index a138563..af1708b 100644 --- a/csphere-worker/worker.py +++ b/csphere-worker/worker.py @@ -38,6 +38,8 @@ def handle_message(message): + # + db_gen = get_db() db = next(db_gen) From 565d796bd807cf25d966f7d87dc55800df19cb52 Mon Sep 17 00:00:00 2001 From: Crosve Lucero <92947897+crosve@users.noreply.github.com> Date: Sat, 20 Dec 2025 15:29:45 -0500 Subject: [PATCH 02/75] working logic - still need to add in web parsing --- backend/app/routes/content.py | 14 ++- csphere-worker/processors/__init__.py | 4 +- csphere-worker/processors/base.py | 6 +- csphere-worker/processors/content.py | 8 +- csphere-worker/worker.py | 137 +++++++++++++------------- 5 files changed, 86 insertions(+), 83 deletions(-) diff --git a/backend/app/routes/content.py b/backend/app/routes/content.py index c10e861..6d98137 100644 --- a/backend/app/routes/content.py +++ b/backend/app/routes/content.py @@ -166,18 +166,16 @@ def save_content_by_url(content: ContentSavedByUrl, user_id: UUID = Depends(get_ if existing_content: return {"status": "unsuccessful", "message": "Content already exists"} - response = requests.get(url=safe_url, timeout=10) + # response = requests.get(url=safe_url, timeout=10) - if response.status_code != 200: - raise HTTPException(status_code=response.status_code, detail="Failed to fetch content") + # if response.status_code != 200: + # raise HTTPException(status_code=response.status_code, detail="Failed to fetch content") - html = response.text - meta = ContentPreprocessor().extract(html) + html = '' - title = meta["title"] or None + title =safe_url - if not html.strip(): - raise HTTPException(status_code=400, detail="Fetched page is empty") + _enqueue_new_content( url=safe_url, diff --git a/csphere-worker/processors/__init__.py b/csphere-worker/processors/__init__.py index c0442a0..1958ae5 100644 --- a/csphere-worker/processors/__init__.py +++ b/csphere-worker/processors/__init__.py @@ -1,8 +1,8 @@ from .content import ContentProcessor PROCESSOR_MAP ={ - 'process_message': ContentProcessor - + 'process_message': ContentProcessor() + } diff --git a/csphere-worker/processors/base.py b/csphere-worker/processors/base.py index 0d0d1c6..88b5e3f 100644 --- a/csphere-worker/processors/base.py +++ b/csphere-worker/processors/base.py @@ -12,15 +12,15 @@ class BaseProcessor(ABC): def __init__(self): - self.db = get_db() + self.db = self.get_db() @abstractmethod - def process(self, payload: dict): + def process(self, message: dict): """Standard method all processors must implement.""" pass @staticmethod - def get_db(self): + def get_db(): ''' Method to get the database instant diff --git a/csphere-worker/processors/content.py b/csphere-worker/processors/content.py index 981eb1d..0e1d5ee 100644 --- a/csphere-worker/processors/content.py +++ b/csphere-worker/processors/content.py @@ -17,17 +17,17 @@ class ContentProcessor(BaseProcessor): - def __init__(): - super(BaseProcessor) + def __init__(self): + super().__init__() def process(self, message: dict): - user_id, notes, folder_id, content_data = BaseProcessor.extract_data(message) + user_id, notes, folder_id, content_data = self.extract_data(message=message) content_url = content_data.get('url') - if BaseProcessor.handle_if_exists(content_url, user_id, notes, folder_id): + if self.handle_if_exists(content_url, user_id, notes, folder_id): logger.info('Content existed and was saved appropiatly') return diff --git a/csphere-worker/worker.py b/csphere-worker/worker.py index af1708b..c8f8355 100644 --- a/csphere-worker/worker.py +++ b/csphere-worker/worker.py @@ -22,6 +22,8 @@ from classes.EmbeddingManager import ContentEmbeddingManager +from processors import get_processor + #Logging config stuff logging.basicConfig( level=logging.DEBUG, @@ -40,103 +42,106 @@ def handle_message(message): # + messageProcessor = get_processor('process_message') + messageProcessor.process(message=message) + - db_gen = get_db() - db = next(db_gen) + # db_gen = get_db() + # db = next(db_gen) - logger.info(f"The current message: {message}") + # logger.info(f"The current message: {message}") - #Create the Content object - user_id = message.get('user_id') - notes = message.get('notes') - folder_id = message.get('folder_id', '') - content_data = message.get('content_payload', {}) + # #Create the Content object + # user_id = message.get('user_id') + # notes = message.get('notes') + # folder_id = message.get('folder_id', '') + # content_data = message.get('content_payload', {}) - #filter based on the content paylaod + # #filter based on the content paylaod - if content_data == {}: - logger.error("Content data is empty, returning") - return + # if content_data == {}: + # logger.error("Content data is empty, returning") + # return - content_url = content_data.get('url') + # content_url = content_data.get('url') - existing_content = db.query(Content).filter(Content.url == content_url).first() + # existing_content = db.query(Content).filter(Content.url == content_url).first() - if existing_content: - #done some logic and don't continue on , end it here + # if existing_content: + # #done some logic and don't continue on , end it here - handle_existing_content(existing_content, user_id, db, notes, folder_id) - logger.info("Bookmark succesfully saved to user") - return + # handle_existing_content(existing_content, user_id, db, notes, folder_id) + # logger.info("Bookmark succesfully saved to user") + # return - new_content = Content(**content_data) + # new_content = Content(**content_data) - try: - db.add(new_content) - db.flush() + # try: + # db.add(new_content) + # db.flush() - #update the content Embedding manager when necessary - content_manager = ContentEmbeddingManager(db=db, content_url=new_content.url) + # #update the content Embedding manager when necessary + # content_manager = ContentEmbeddingManager(db=db, content_url=new_content.url) - raw_html = message.get('raw_html', '') + # raw_html = message.get('raw_html', '') - if raw_html == '': - logging.info("No raw html provided, categorization and summarization may be poor") + # if raw_html == '': + # logging.info("No raw html provided, categorization and summarization may be poor") - content_ai = content_manager.process_content(new_content, raw_html) + # content_ai = content_manager.process_content(new_content, raw_html) - db.commit() + # db.commit() - if not content_ai: - logging.info("Embedding generation failed or skipped.") - else: - logging.debug(f"Summary Generated: {content_ai.ai_summary}") + # if not content_ai: + # logging.info("Embedding generation failed or skipped.") + # else: + # logging.debug(f"Summary Generated: {content_ai.ai_summary}") - # Check if this user already saved this content - existing_item = db.query(ContentItem).filter( - ContentItem.user_id == user_id, - ContentItem.content_id == new_content.content_id - ).first() + # # Check if this user already saved this content + # existing_item = db.query(ContentItem).filter( + # ContentItem.user_id == user_id, + # ContentItem.content_id == new_content.content_id + # ).first() - utc_time = datetime.now(timezone.utc) + # utc_time = datetime.now(timezone.utc) - if not existing_item: - new_item = ContentItem( - user_id=user_id, - content_id=new_content.content_id, - saved_at=utc_time, - notes=notes - ) - db.add(new_item) - db.commit() + # if not existing_item: + # new_item = ContentItem( + # user_id=user_id, + # content_id=new_content.content_id, + # saved_at=utc_time, + # notes=notes + # ) + # db.add(new_item) + # db.commit() - # Add to the corresponding folder if any - if folder_id and folder_id != '' and folder_id != 'default': - new_folder_item = folder_item( - folder_item_id=uuid4(), - folder_id=folder_id, - user_id=user_id, - content_id=new_content.content_id, - added_at=datetime.utcnow() - ) + # # Add to the corresponding folder if any + # if folder_id and folder_id != '' and folder_id != 'default': + # new_folder_item = folder_item( + # folder_item_id=uuid4(), + # folder_id=folder_id, + # user_id=user_id, + # content_id=new_content.content_id, + # added_at=datetime.utcnow() + # ) - db.add(new_folder_item) - db.commit() - db.refresh(new_folder_item) - else: - print("No valid folder id found, skipping this part") + # db.add(new_folder_item) + # db.commit() + # db.refresh(new_folder_item) + # else: + # print("No valid folder id found, skipping this part") - logging.info("Successfully saved content for user.") + # logging.info("Successfully saved content for user.") - except Exception as e: - logging.error(f"Error occurred while saving the bookmark: {str(e)}") + # except Exception as e: + # logging.error(f"Error occurred while saving the bookmark: {str(e)}") From 6ff88ead281941c989e48cba2e25c70a810285dd Mon Sep 17 00:00:00 2001 From: Crosve Lucero <92947897+crosve@users.noreply.github.com> Date: Sun, 21 Dec 2025 21:47:12 -0500 Subject: [PATCH 03/75] bucketing ui deisgn v1 --- csphere-worker/processors/bucket.py | 14 + frontend/next.config.mjs | 5 + frontend/package-lock.json | 16 + frontend/package.json | 1 + .../home/folders/[folderId]/page.tsx | 405 +++++++++++++++++- .../src/app/(content)/home/folders/page.tsx | 12 +- frontend/src/components/ui/input.tsx | 4 +- frontend/src/components/ui/settings.tsx | 83 ++++ 8 files changed, 509 insertions(+), 31 deletions(-) create mode 100644 csphere-worker/processors/bucket.py create mode 100644 frontend/src/components/ui/settings.tsx diff --git a/csphere-worker/processors/bucket.py b/csphere-worker/processors/bucket.py new file mode 100644 index 0000000..31c1dc0 --- /dev/null +++ b/csphere-worker/processors/bucket.py @@ -0,0 +1,14 @@ +from .base import BaseProcessor +import logging +from data_models.content import Content + +from datetime import datetime, timezone +from data_models.content_item import ContentItem +from classes.EmbeddingManager import ContentEmbeddingManager +from data_models.folder_item import folder_item + +from uuid import uuid4 +import time + + +logger = logging.getLogger(__name__) \ No newline at end of file diff --git a/frontend/next.config.mjs b/frontend/next.config.mjs index 69c4aa4..a5cf4bc 100644 --- a/frontend/next.config.mjs +++ b/frontend/next.config.mjs @@ -6,4 +6,9 @@ const nextConfig = { }, }; +// module.exports = { +// images: { +// qualities: [25, 50, 75], +// }, +// }; export default nextConfig; diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 10e4061..ca2ecf9 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -34,6 +34,7 @@ "react": "^19.2.1", "react-dom": "^19.2.1", "react-hook-form": "^7.55.0", + "react-icons": "^5.5.0", "sonner": "^2.0.3", "tailwind-merge": "^3.2.0", "tailwindcss": "^4.1.3", @@ -3321,6 +3322,15 @@ "react": "^16.8.0 || ^17 || ^18 || ^19" } }, + "node_modules/react-icons": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/react-icons/-/react-icons-5.5.0.tgz", + "integrity": "sha512-MEFcXdkP3dLo8uumGI5xN3lDFNsRtrjbOEKDLD7yv76v4wpnEq2Lt2qeHaQOr34I/wPN3s3+N08WkQ+CW37Xiw==", + "license": "MIT", + "peerDependencies": { + "react": "*" + } + }, "node_modules/react-remove-scroll": { "version": "2.6.3", "resolved": "https://registry.npmjs.org/react-remove-scroll/-/react-remove-scroll-2.6.3.tgz", @@ -5146,6 +5156,12 @@ "integrity": "sha512-XRnjsH3GVMQz1moZTW53MxfoWN7aDpUg/GpVNc4A3eXRVNdGXfbzJ4vM4aLQ8g6XCUh1nIbx70aaNCl7kxnjog==", "requires": {} }, + "react-icons": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/react-icons/-/react-icons-5.5.0.tgz", + "integrity": "sha512-MEFcXdkP3dLo8uumGI5xN3lDFNsRtrjbOEKDLD7yv76v4wpnEq2Lt2qeHaQOr34I/wPN3s3+N08WkQ+CW37Xiw==", + "requires": {} + }, "react-remove-scroll": { "version": "2.6.3", "resolved": "https://registry.npmjs.org/react-remove-scroll/-/react-remove-scroll-2.6.3.tgz", diff --git a/frontend/package.json b/frontend/package.json index eec6d28..fb1a518 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -35,6 +35,7 @@ "react": "^19.2.1", "react-dom": "^19.2.1", "react-hook-form": "^7.55.0", + "react-icons": "^5.5.0", "sonner": "^2.0.3", "tailwind-merge": "^3.2.0", "tailwindcss": "^4.1.3", diff --git a/frontend/src/app/(content)/home/folders/[folderId]/page.tsx b/frontend/src/app/(content)/home/folders/[folderId]/page.tsx index dbd8a4a..45434e5 100644 --- a/frontend/src/app/(content)/home/folders/[folderId]/page.tsx +++ b/frontend/src/app/(content)/home/folders/[folderId]/page.tsx @@ -2,30 +2,360 @@ import React, { useEffect, useState, use } from "react"; import FolderIdLayout from "./FolderIdLayout"; import { - Popover, - PopoverContent, - PopoverTrigger, -} from "@/components/ui/popover"; -import { Plus, Filter, ChevronDown } from "lucide-react"; + DropdownMenu, + DropdownMenuContent, + DropdownMenuGroup, + DropdownMenuItem, + DropdownMenuLabel, + DropdownMenuSeparator, + DropdownMenuShortcut, + DropdownMenuTrigger, +} from "@/components/ui/dropdown-menu"; +import { + Dialog, + DialogContent, + DialogDescription, + DialogHeader, + DialogTitle, + DialogTrigger, +} from "@/components/ui/dialog"; +import { Plus, Filter, ChevronDown, Sparkles, X } from "lucide-react"; import { fetchToken } from "@/functions/user/UserData"; import BookmarkList from "@/components/BookmarkList"; import { Breadcrumb } from "../foldercomponents/Breadcrumb"; import { Switch } from "@/components/ui/switch"; import { Label } from "@/components/ui/label"; +import { Input } from "@/components/ui/input"; +import { Button } from "@/components/ui/button"; +import SettingsIcon from "@/components/ui/settings"; +import { CiSettings } from "react-icons/ci"; +import { DropdownMenuArrow } from "@radix-ui/react-dropdown-menu"; interface PathProps { id: string; name: string; } + +interface FolderMetadata { + name: string; + keywords: string[]; + urlPatterns: string[]; + smartBucketingEnabled: boolean; +} + +// Folder Settings Dialog Component +function FolderSettingsDialog({ + folderId, + initialMetadata, + onSave, +}: { + folderId: string; + initialMetadata: FolderMetadata; + onSave: (metadata: FolderMetadata) => void; +}) { + const [isOpen, setIsOpen] = useState(false); + const [metadata, setMetadata] = useState(initialMetadata); + const [newKeyword, setNewKeyword] = useState(""); + const [newPattern, setNewPattern] = useState(""); + const [showAdvanced, setShowAdvanced] = useState(false); + + const addKeyword = () => { + if ( + newKeyword.trim() && + !metadata.keywords.includes(newKeyword.trim().toLowerCase()) + ) { + setMetadata({ + ...metadata, + keywords: [...metadata.keywords, newKeyword.trim().toLowerCase()], + }); + setNewKeyword(""); + } + }; + + const removeKeyword = (index: number) => { + setMetadata({ + ...metadata, + keywords: metadata.keywords.filter((_, i) => i !== index), + }); + }; + + const addPattern = () => { + if ( + newPattern.trim() && + !metadata.urlPatterns.includes(newPattern.trim()) + ) { + setMetadata({ + ...metadata, + urlPatterns: [...metadata.urlPatterns, newPattern.trim()], + }); + setNewPattern(""); + } + }; + + const removePattern = (index: number) => { + setMetadata({ + ...metadata, + urlPatterns: metadata.urlPatterns.filter((_, i) => i !== index), + }); + }; + + const generateKeywords = () => { + const name = metadata.name.toLowerCase(); + const generated = name.split(" ").filter((word) => word.length > 3); + const suggestions = [...new Set([...metadata.keywords, ...generated])]; + setMetadata({ ...metadata, keywords: suggestions }); + }; + + const handleSave = () => { + onSave(metadata); + setIsOpen(false); + }; + + return ( + +
+ + + + + + + Folder Settings + + + Configure smart bucketing for this folder + + + +
+ {/* Folder Name */} +
+ + + setMetadata({ ...metadata, name: e.target.value }) + } + className="mt-1.5 bg-white border-gray-300 focus:border-gray-400 focus:ring-gray-400" + /> +
+ + {/* Smart Bucketing Toggle */} +
+
+
+ +
+
+ Smart Bucketing +
+
+ Automatically suggest bookmarks for this folder +
+
+
+ + setMetadata({ ...metadata, smartBucketingEnabled: checked }) + } + className="data-[state=unchecked]:bg-gray-300 data-[state=checked]:bg-gray-900" + /> +
+
+ + {/* Advanced Options */} + {metadata.smartBucketingEnabled && ( + <> + + + {showAdvanced && ( +
+ {/* Keywords Section */} +
+
+ + +
+

+ Bookmarks containing these words will be suggested for + this folder +

+ +
+ {metadata.keywords.map((keyword, index) => ( + + {keyword} + + + ))} +
+ +
+ setNewKeyword(e.target.value)} + // onKeyPress={(e) => e.key === "Enter" && addKeyword()} + placeholder="Add keyword..." + className="flex-1 text-sm bg-white border-gray-300" + /> + +
+
+ + {/* URL Patterns Section */} +
+ +

+ Bookmarks matching these patterns will be suggested (use + * as wildcard) +

+ +
+ {metadata.urlPatterns.map((pattern, index) => ( + + {pattern} + + + ))} +
+ +
+ setNewPattern(e.target.value)} + onKeyPress={(e) => e.key === "Enter" && addPattern()} + placeholder="e.g., *.github.com or docs.*.com" + className="flex-1 text-sm font-mono bg-white border-gray-300" + /> + +
+
+ + {/* Preview */} +
+
+ Preview +
+
+ Bookmarks with{" "} + + {metadata.keywords.slice(0, 3).join(", ")} + {metadata.keywords.length > 3 && + `... (+${metadata.keywords.length - 3})`} + + {metadata.urlPatterns.length > 0 && ( + <> + {" "} + or from{" "} + + {metadata.urlPatterns.slice(0, 2).join(", ")} + {metadata.urlPatterns.length > 2 && + `... (+${metadata.urlPatterns.length - 2})`} + + + )}{" "} + will be suggested +
+
+
+ )} + + )} + + {/* Action Buttons */} +
+ + +
+
+
+
+
+ ); +} + export default function Page({ params, }: { params: Promise<{ folderId: string }>; }) { - // const [folderId, setFolderId] = useState(null); const { folderId } = use(params); const [bookmarks, setBookmarks] = useState([]); const [paths, setPaths] = useState([]); + const [folderMetadata, setFolderMetadata] = useState({ + name: "", + keywords: [], + urlPatterns: [], + smartBucketingEnabled: false, + }); useEffect(() => { const fetchBookmarks = async (id: string) => { @@ -78,7 +408,48 @@ export default function Page({ if (folderId) { fetchPathStructure(folderId); } - }, []); + }, [folderId]); + + // TODO: Fetch folder metadata from your API + useEffect(() => { + const fetchFolderMetadata = async (id: string) => { + // Implement your API call here + // const API_URL = `${process.env.NEXT_PUBLIC_API_BASE_URL}/folder-metadata/${id}`; + // const token = fetchToken(); + // const response = await fetch(API_URL, { ... }); + // const data = await response.json(); + // setFolderMetadata(data); + }; + + if (folderId) { + // fetchFolderMetadata(folderId); + // For now, using mock data + setFolderMetadata({ + name: paths[paths.length - 1]?.name || "", + keywords: [], + urlPatterns: [], + smartBucketingEnabled: false, + }); + } + }, [folderId, paths]); + + const handleSaveMetadata = async (metadata: FolderMetadata) => { + // TODO: Save to your API + // const API_URL = `${process.env.NEXT_PUBLIC_API_BASE_URL}/folder-metadata/${folderId}`; + // const token = fetchToken(); + // await fetch(API_URL, { + // method: 'PUT', + // headers: { + // 'Content-Type': 'application/json', + // Authorization: `Bearer ${token}`, + // }, + // body: JSON.stringify(metadata), + // }); + + setFolderMetadata(metadata); + console.log("Saved metadata:", metadata); + }; + return (
@@ -86,22 +457,10 @@ export default function Page({
- -
diff --git a/frontend/src/app/(content)/home/folders/page.tsx b/frontend/src/app/(content)/home/folders/page.tsx index 9411b1f..17001d2 100644 --- a/frontend/src/app/(content)/home/folders/page.tsx +++ b/frontend/src/app/(content)/home/folders/page.tsx @@ -1,7 +1,7 @@ "use client"; -import React, { useState, useEffect } from "react"; +import { useState, useEffect } from "react"; import FolderLayout from "./FolderLayout"; -import { Plus, ChevronDown } from "lucide-react"; +import { ChevronDown } from "lucide-react"; import FolderCard from "./foldercomponents/FolderCard"; import { createFolder } from "./functions/foldercreate"; import { toast } from "sonner"; @@ -31,7 +31,7 @@ interface ResponseModel { const sortOptions = ["Latest", "Oldest", "Name A-Z", "Name Z-A"]; function page() { - const[open, setOpen] = useState(false); + const [open, setOpen] = useState(false); const [sortBy, setSortBy] = useState("Latest"); const [isDropdownOpen, setIsDropdownOpen] = useState(false); const [folderName, setFolderName] = useState(""); @@ -200,8 +200,10 @@ function page() { className="border border-black focus:border-gray-300 focus:outline-none text-black px-3 py-2 rounded-md" />
- From 1244064d3116cb36ca0d989bf928cead31b9ccaf Mon Sep 17 00:00:00 2001 From: Crosve Lucero <92947897+crosve@users.noreply.github.com> Date: Tue, 23 Dec 2025 14:49:31 -0500 Subject: [PATCH 05/75] folder put route working and connecting with the frontend --- ...dding_bucketing_mode_boolean_column_to_.py | 53 +++++++++++++++++ backend/app/data_models/folder.py | 3 +- backend/app/exceptions/folder.py | 3 + backend/app/routes/content.py | 2 + backend/app/routes/folder.py | 56 +++++++++++++++++- backend/app/schemas/folder.py | 12 ++++ backend/app/services/folder.py | 45 +++++++++++++++ .../home/folders/[folderId]/page.tsx | 57 +++++++++---------- frontend/src/components/BookmarkList.tsx | 1 - 9 files changed, 200 insertions(+), 32 deletions(-) create mode 100644 backend/alembic/versions/2994a72baf17_adding_bucketing_mode_boolean_column_to_.py create mode 100644 backend/app/exceptions/folder.py create mode 100644 backend/app/services/folder.py diff --git a/backend/alembic/versions/2994a72baf17_adding_bucketing_mode_boolean_column_to_.py b/backend/alembic/versions/2994a72baf17_adding_bucketing_mode_boolean_column_to_.py new file mode 100644 index 0000000..83fb709 --- /dev/null +++ b/backend/alembic/versions/2994a72baf17_adding_bucketing_mode_boolean_column_to_.py @@ -0,0 +1,53 @@ +"""adding bucketing mode boolean column to Folder table + +Revision ID: 2994a72baf17 +Revises: 9076b42a5b56 +Create Date: 2025-12-23 12:56:56.367544 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = '2994a72baf17' +down_revision: Union[str, None] = '9076b42a5b56' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('folder', sa.Column('bucketing_mode', sa.Boolean(), nullable=True)) + + op.execute("UPDATE folder SET bucketing_mode = false") + + op.alter_column("folder", "bucketing_mode", nullable=False) + + op.execute("UPDATE folder SET keywords = ARRAY[]::VARCHAR[]") + + op.execute("UPDATE folder SET url_patterns = ARRAY[]::VARCHAR[]") + + op.alter_column('folder', 'keywords', + existing_type=postgresql.ARRAY(sa.VARCHAR()), + nullable=False) + op.alter_column('folder', 'url_patterns', + existing_type=postgresql.ARRAY(sa.VARCHAR()), + nullable=False) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column('folder', 'url_patterns', + existing_type=postgresql.ARRAY(sa.VARCHAR()), + nullable=True) + op.alter_column('folder', 'keywords', + existing_type=postgresql.ARRAY(sa.VARCHAR()), + nullable=True) + op.drop_column('folder', 'bucketing_mode') + # ### end Alembic commands ### diff --git a/backend/app/data_models/folder.py b/backend/app/data_models/folder.py index add6aa0..6d3ee28 100644 --- a/backend/app/data_models/folder.py +++ b/backend/app/data_models/folder.py @@ -1,4 +1,4 @@ -from sqlalchemy import Column, String, TIMESTAMP, ForeignKey +from sqlalchemy import Column, String, TIMESTAMP, ForeignKey, Boolean from sqlalchemy.dialects.postgresql import UUID from app.db.database import Base from pydantic import BaseModel, EmailStr @@ -18,6 +18,7 @@ class Folder(Base): user_id = Column(UUID(as_uuid=True), ForeignKey("users.id", ondelete="CASCADE"), nullable=False) parent_id = Column(UUID(as_uuid=True), ForeignKey("folder.folder_id", ondelete="CASCADE"), nullable=False) folder_name = Column(String, nullable=False) + bucketing_mode : Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default="false") keywords : Mapped[list[str]] = mapped_column(ARRAY(String)) url_patterns : Mapped[list[str]] = mapped_column(ARRAY(String)) created_at = Column(TIMESTAMP, server_default="NOW()") diff --git a/backend/app/exceptions/folder.py b/backend/app/exceptions/folder.py new file mode 100644 index 0000000..25dfb4b --- /dev/null +++ b/backend/app/exceptions/folder.py @@ -0,0 +1,3 @@ +class FolderNotFound(Exception): + """Raised when a folder with a given ID cannot be found for a user.""" + pass \ No newline at end of file diff --git a/backend/app/routes/content.py b/backend/app/routes/content.py index 6d98137..6a2d5af 100644 --- a/backend/app/routes/content.py +++ b/backend/app/routes/content.py @@ -209,6 +209,8 @@ def get_unread_count(user_id: UUID = Depends(get_current_user_id), db: Session = return {'status' : 'unsuccesfull', 'error' : str(e)} + + @router.get("/content/unread", response_model=UserSavedContentResponse) def get_unread_content(cursor: str = None, user_id: UUID = Depends(get_current_user_id), db: Session = Depends(get_db)): print("in here") diff --git a/backend/app/routes/folder.py b/backend/app/routes/folder.py index 2b37bc9..5c35267 100644 --- a/backend/app/routes/folder.py +++ b/backend/app/routes/folder.py @@ -10,8 +10,10 @@ from app.data_models.content_item import ContentItem from app.data_models.content_ai import ContentAI +from app.services.folder import update_folder_metadata + from app.db.database import get_db -from app.schemas.folder import FolderDetails, FolderItem +from app.schemas.folder import FolderDetails, FolderItem, FolderMetadata, FolderNotFound from app.utils.hashing import get_current_user_id from datetime import datetime @@ -73,6 +75,58 @@ def get_folder_path(folder_id: UUID, user_id: UUID=Depends(get_current_user_id), return {"path": path} +@router.get('/folder/metadata/{folder_id}') +def get_folder_metadata(folder_id : str, db: Session = Depends(get_db)): + + try: + folder : Folder = db.query(Folder).filter(Folder.folder_id ==folder_id ).first() + if not folder: + return {'success' : False, 'message' : 'No folder found for this folder id '} + + payload = { + "name" : folder.folder_name if not None else '', + "keywords" : folder.keywords if not None else [], + "urlPatterns" : folder.url_patterns if not None else [], + "smartBucketingEnabled" : folder.bucketing_mode if not None else False + } + + return {'success' : True, 'message': 'Data fetched successfully', 'data' : payload } + + + except Exception as e: + logging.error(f"Error occured trying to fet folder metadata: {e} ") + + +from sqlalchemy.exc import SQLAlchemyError + + +@router.put("/folder/{folder_id}") +def process_folder_metadata( + folder_id: UUID, + metadata: FolderMetadata, + user_id: UUID = Depends(get_current_user_id), + db: Session = Depends(get_db), +): + try: + folder = update_folder_metadata( + db=db, + folder_id=folder_id, + user_id=user_id, + metadata=metadata, + ) + return {"success": True, "folder_id": folder.folder_id} + + except FolderNotFound: + raise HTTPException(status_code=404, detail="Folder not found") + + + + + + + + + @router.get("/folder/{folder_id}") diff --git a/backend/app/schemas/folder.py b/backend/app/schemas/folder.py index 56f8433..85c378f 100644 --- a/backend/app/schemas/folder.py +++ b/backend/app/schemas/folder.py @@ -19,3 +19,15 @@ class FolderCreate(BaseModel): class FolderItem(BaseModel): folderId: str contentId: str + + + # name: string; +# keywords: string[]; +# urlPatterns: string[]; +# smartBucketingEnabled: boolean; + +class FolderMetadata(BaseModel): + name: str + smartBucketingEnabled: bool + keywords: list[str] + urlPatterns: list[str] \ No newline at end of file diff --git a/backend/app/services/folder.py b/backend/app/services/folder.py new file mode 100644 index 0000000..0ba2bfd --- /dev/null +++ b/backend/app/services/folder.py @@ -0,0 +1,45 @@ +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import Session + +from app.data_models.folder import Folder +from app.schemas.folder import FolderDetails, FolderItem, FolderMetadata +from uuid import UUID + + + +class FolderNotFound(Exception): + pass + + +def update_folder_metadata( + *, + db: Session, + folder_id: UUID, + user_id: UUID, + metadata: FolderMetadata, +) -> Folder: + folder = ( + db.query(Folder) + .filter( + Folder.folder_id == folder_id, + Folder.user_id == user_id, + ) + .first() + ) + + if not folder: + raise FolderNotFound() + + folder.folder_name = metadata.name + folder.bucketing_mode = metadata.smartBucketingEnabled + + if metadata.smartBucketingEnabled: + folder.keywords = metadata.keywords + folder.url_patterns = metadata.urlPatterns + else: + folder.keywords = [] + folder.url_patterns = [] + + db.commit() + db.refresh(folder) + return folder diff --git a/frontend/src/app/(content)/home/folders/[folderId]/page.tsx b/frontend/src/app/(content)/home/folders/[folderId]/page.tsx index 0273371..eb5ee44 100644 --- a/frontend/src/app/(content)/home/folders/[folderId]/page.tsx +++ b/frontend/src/app/(content)/home/folders/[folderId]/page.tsx @@ -10,7 +10,7 @@ import { DialogTitle, DialogTrigger, } from "@/components/ui/dialog"; -import { Plus, Filter, ChevronDown, Sparkles, X } from "lucide-react"; +import { Plus, ChevronDown, Sparkles, X } from "lucide-react"; import { fetchToken } from "@/functions/user/UserData"; import BookmarkList from "@/components/BookmarkList"; import { Breadcrumb } from "../foldercomponents/Breadcrumb"; @@ -18,9 +18,7 @@ import { Switch } from "@/components/ui/switch"; import { Label } from "@/components/ui/label"; import { Input } from "@/components/ui/input"; import { Button } from "@/components/ui/button"; -import SettingsIcon from "@/components/ui/settings"; import { CiSettings } from "react-icons/ci"; -import { DropdownMenuArrow } from "@radix-ui/react-dropdown-menu"; interface PathProps { id: string; @@ -49,9 +47,12 @@ function FolderSettingsDialog({ const [newKeyword, setNewKeyword] = useState(""); const [newPattern, setNewPattern] = useState(""); const [showAdvanced, setShowAdvanced] = useState(false); + console.log("Current UI State:", metadata); useEffect(() => { + console.log("metadata being set: ", initialMetadata); setMetadata(initialMetadata); + console.log("new data has been set"); }, [initialMetadata]); const addKeyword = () => { @@ -129,7 +130,7 @@ function FolderSettingsDialog({ htmlFor="folder-name" className="text-sm font-medium text-gray-700" > - Folder Name + Folder Name,

{metadata.name}

{ const fetchFolderMetadata = async (id: string) => { // Implement your API call here - // const API_URL = `${process.env.NEXT_PUBLIC_API_BASE_URL}/folder-metadata/${id}`; - // const token = fetchToken(); - // const response = await fetch(API_URL, { ... }); - // const data = await response.json(); - // setFolderMetadata(data); + const API_URL = `${process.env.NEXT_PUBLIC_API_BASE_URL}/folder/metadata/${id}`; + const token = fetchToken(); + const response = await fetch(API_URL, { + method: "GET", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${token}`, + }, + }); + const data = await response.json(); + console.log("Data returned: ", data); + setFolderMetadata(data.data); }; if (folderId) { - // fetchFolderMetadata(folderId); - // For now, using mock data - setFolderMetadata({ - name: paths[paths.length - 1]?.name || "", - keywords: [], - urlPatterns: [], - smartBucketingEnabled: false, - }); + fetchFolderMetadata(folderId); } }, [folderId, paths]); const handleSaveMetadata = async (metadata: FolderMetadata) => { // TODO: Save to your API - // const API_URL = `${process.env.NEXT_PUBLIC_API_BASE_URL}/folder-metadata/${folderId}`; - // const token = fetchToken(); - // await fetch(API_URL, { - // method: 'PUT', - // headers: { - // 'Content-Type': 'application/json', - // Authorization: `Bearer ${token}`, - // }, - // body: JSON.stringify(metadata), - // }); + const API_URL = `${process.env.NEXT_PUBLIC_API_BASE_URL}/folder/${folderId}`; + const token = fetchToken(); + await fetch(API_URL, { + method: "PUT", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${token}`, + }, + body: JSON.stringify(metadata), + }); setFolderMetadata(metadata); - console.log("Saved metadata:", metadata); }; return ( diff --git a/frontend/src/components/BookmarkList.tsx b/frontend/src/components/BookmarkList.tsx index 053d930..f0bf179 100644 --- a/frontend/src/components/BookmarkList.tsx +++ b/frontend/src/components/BookmarkList.tsx @@ -17,7 +17,6 @@ type Bookmark = { export default function BookmarkList({ items }: { items: Bookmark[] }) { const viewMode = useContext(LayoutContext); - console.log("current conext value: ", viewMode); if (items.length === 0) { return

No bookmarks found

; } From cd89a8c462917e551f110bb447a8e945900b9dff Mon Sep 17 00:00:00 2001 From: angvit Date: Tue, 23 Dec 2025 21:49:19 -0500 Subject: [PATCH 06/75] reverting to hardcoded ActiveMQ enviornment variables --- csphere-worker/worker.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/csphere-worker/worker.py b/csphere-worker/worker.py index 92f2cd1..04dfe3a 100644 --- a/csphere-worker/worker.py +++ b/csphere-worker/worker.py @@ -140,10 +140,16 @@ def handle_message(message): def poll_and_process(): - ACTIVEMQ_URL=os.getenv('ACTIVEMQ_URL') - ACTIVEMQ_QUEUE= os.getenv('ACTIVEMQ_QUEUE') - ACTIVEMQ_USER= os.getenv('ACTIVEMQ_USER') - ACTIVEMQ_PASS= os.getenv('ACTIVEMQ_PASS') + # ACTIVEMQ_URL=os.getenv('ACTIVEMQ_URL') + # ACTIVEMQ_QUEUE= os.getenv('ACTIVEMQ_QUEUE') + # ACTIVEMQ_USER= os.getenv('ACTIVEMQ_USER') + # ACTIVEMQ_PASS= os.getenv('ACTIVEMQ_PASS') + + ACTIVEMQ_URL='http://feeltiptop.com:8161/' + ACTIVEMQ_QUEUE='CSPHEREQUEUETEST' + ACTIVEMQ_USER='admin' + ACTIVEMQ_PASS='tiptop' + queue_url = f"{ACTIVEMQ_URL}/api/message/{ACTIVEMQ_QUEUE}?type=queue&oneShot=true" From c6a7c671d45a5c871eef48e70b93350d1cf961d9 Mon Sep 17 00:00:00 2001 From: Crosve Lucero <92947897+crosve@users.noreply.github.com> Date: Wed, 24 Dec 2025 08:35:57 -0500 Subject: [PATCH 07/75] indenting properly --- backend/app/routes/content.py | 95 +---------------------------------- 1 file changed, 1 insertion(+), 94 deletions(-) diff --git a/backend/app/routes/content.py b/backend/app/routes/content.py index d76cb7d..b0fe74e 100644 --- a/backend/app/routes/content.py +++ b/backend/app/routes/content.py @@ -141,100 +141,7 @@ def save_content(content: ContentCreate, user: User = Depends(get_current_user), folder_id=content.folder_id, ) - return {"status": "Success", 'message': 'Bookmark details sent to message queue'} - - push_to_activemq(message=message) - #create the new content - new_content = Content( - url=content.url, - title=content.title, - source="chrome_extension", - first_saved_at=utc_time, - ) - db.add(new_content) - db.flush() # generate content_id - - # Generate embedding only for new content - print("generating manager") - pre, sumz, emb = get_shared_services() - categorizer = Categorizer(file_url=content.url) - embedding_manager = ContentEmbeddingManager( - db, - preprocessor=pre, - summarizer=sumz, - embedder=emb, - categorizer=categorizer, - content_url=content.url, - ) - print("done generating") - raw_html = content.html - - try: - content_ai = embedding_manager.process_content(new_content, raw_html) - db.commit() - except Exception as e: - db.rollback() - print(f"Embedding generation failed: {e}") - # Prevent downstream foreign key error - return {"status": "unsuccessful", "error": "Failed to generate summary"} - - if not content_ai: - print("Embedding generation failed or skipped.") - - else: - print("Existing content link") - new_content = existing_content - content_ai = db.query(ContentAI).filter_by(content_id=new_content.content_id).first() - - # Check if this user already saved this content - existing_item = db.query(ContentItem).filter( - ContentItem.user_id == user_id, - ContentItem.content_id == new_content.content_id - ).first() - - print("current utc timezone: ", datetime.now(timezone.utc)) - - utc_time = datetime.now(timezone.utc) - - if not existing_item: - - new_item = ContentItem( - user_id=user_id, - content_id=new_content.content_id, - saved_at=utc_time, - notes=notes, - read=False - ) - db.add(new_item) - db.commit() - - saved_item = db.query(ContentItem).order_by(ContentItem.saved_at.desc()).first() - print(f"Retrieved from DB: {saved_item.saved_at}") - - - #add to the corresponding folder if any - - if content.folder_id and content.folder_id != '' and content.folder_id != 'default': - - new_item = folder_item( - folder_item_id = uuid4(), - folder_id = content.folder_id, - user_id = user_id, - content_id = new_content.content_id, - added_at = datetime.utcnow() - - ) - - db.add(new_item) - db.commit() - db.refresh(new_item) - else: - print("no valid fodler id found so skipping this part") - - - print("Successfully saved content for user.") - - return {"status": "Success"} + return {"status": "Success", 'message': 'Bookmark details sent to message queue'} except Exception as e: logger.error(f"Error occurred in saving the bookmark: {str(e)}", exc_info=True) From 5121351afccf47019865a3cada716813247f955a Mon Sep 17 00:00:00 2001 From: Crosve Lucero <92947897+crosve@users.noreply.github.com> Date: Wed, 24 Dec 2025 08:56:34 -0500 Subject: [PATCH 08/75] removing hard coded env variables --- csphere-worker/worker.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/csphere-worker/worker.py b/csphere-worker/worker.py index 04dfe3a..9aa70cb 100644 --- a/csphere-worker/worker.py +++ b/csphere-worker/worker.py @@ -140,15 +140,15 @@ def handle_message(message): def poll_and_process(): - # ACTIVEMQ_URL=os.getenv('ACTIVEMQ_URL') - # ACTIVEMQ_QUEUE= os.getenv('ACTIVEMQ_QUEUE') - # ACTIVEMQ_USER= os.getenv('ACTIVEMQ_USER') - # ACTIVEMQ_PASS= os.getenv('ACTIVEMQ_PASS') - - ACTIVEMQ_URL='http://feeltiptop.com:8161/' - ACTIVEMQ_QUEUE='CSPHEREQUEUETEST' - ACTIVEMQ_USER='admin' - ACTIVEMQ_PASS='tiptop' + ACTIVEMQ_URL=os.getenv('ACTIVEMQ_URL') + ACTIVEMQ_QUEUE= os.getenv('ACTIVEMQ_QUEUE') + ACTIVEMQ_USER= os.getenv('ACTIVEMQ_USER') + ACTIVEMQ_PASS= os.getenv('ACTIVEMQ_PASS') + + # ACTIVEMQ_URL='http://feeltiptop.com:8161/' + # ACTIVEMQ_QUEUE='CSPHEREQUEUETEST' + # ACTIVEMQ_USER='admin' + # ACTIVEMQ_PASS='tiptop' queue_url = f"{ACTIVEMQ_URL}/api/message/{ACTIVEMQ_QUEUE}?type=queue&oneShot=true" From 65e9d975f781da74b357550a60828f2833a75214 Mon Sep 17 00:00:00 2001 From: Crosve Lucero <92947897+crosve@users.noreply.github.com> Date: Wed, 24 Dec 2025 09:01:25 -0500 Subject: [PATCH 09/75] adding psycong2 to the requirements.txt file --- csphere-worker/requirements.txt | Bin 2064 -> 2096 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/csphere-worker/requirements.txt b/csphere-worker/requirements.txt index 7176a3fc90c9682c8714c23e0c439a40929919b4..5a9decbd93d219006a1b0ccdaf4009a410f379e3 100644 GIT binary patch delta 30 fcmbOrumK1;CcCln2^lfi0-+Iu9)l$i8v-E!WX=Tq delta 7 OcmdlWFhO9000#gHe*!50 From 2e786a110b4fcb80378e23c5ae810e6fe29d15e4 Mon Sep 17 00:00:00 2001 From: Crosve Lucero <92947897+crosve@users.noreply.github.com> Date: Wed, 24 Dec 2025 19:36:05 -0500 Subject: [PATCH 10/75] bucket base processing(prior to testing) --- backend/app/routes/folder.py | 3 +- csphere-worker/data_models/folder.py | 16 +- .../exceptions/bucket_excpetions.py | 5 + csphere-worker/processors/__init__.py | 13 +- csphere-worker/processors/base.py | 25 +++- csphere-worker/processors/bucket.py | 141 +++++++++++++++++- csphere-worker/processors/content.py | 1 - csphere-worker/schemas/content_schemas.py | 14 ++ csphere-worker/schemas/folder_schemas.py | 21 +++ csphere-worker/worker.py | 3 + .../home/folders/[folderId]/page.tsx | 2 +- 11 files changed, 230 insertions(+), 14 deletions(-) create mode 100644 csphere-worker/exceptions/bucket_excpetions.py create mode 100644 csphere-worker/schemas/content_schemas.py create mode 100644 csphere-worker/schemas/folder_schemas.py diff --git a/backend/app/routes/folder.py b/backend/app/routes/folder.py index 5c35267..6b98abf 100644 --- a/backend/app/routes/folder.py +++ b/backend/app/routes/folder.py @@ -13,7 +13,8 @@ from app.services.folder import update_folder_metadata from app.db.database import get_db -from app.schemas.folder import FolderDetails, FolderItem, FolderMetadata, FolderNotFound +from app.schemas.folder import FolderDetails, FolderItem, FolderMetadata +from app.exceptions.folder import FolderNotFound from app.utils.hashing import get_current_user_id from datetime import datetime diff --git a/csphere-worker/data_models/folder.py b/csphere-worker/data_models/folder.py index 4334654..d2dc6da 100644 --- a/csphere-worker/data_models/folder.py +++ b/csphere-worker/data_models/folder.py @@ -1,10 +1,15 @@ -from sqlalchemy import Column, String, TIMESTAMP, ForeignKey +from sqlalchemy import Column, String, TIMESTAMP, ForeignKey, Boolean from sqlalchemy.dialects.postgresql import UUID -from database import Base +from app.db.database import Base from pydantic import BaseModel, EmailStr from datetime import datetime import uuid +from sqlalchemy.orm import Mapped, mapped_column + +from sqlalchemy.dialects.postgresql import ARRAY + + class Folder(Base): __tablename__ = "folder" @@ -13,5 +18,12 @@ class Folder(Base): user_id = Column(UUID(as_uuid=True), ForeignKey("users.id", ondelete="CASCADE"), nullable=False) parent_id = Column(UUID(as_uuid=True), ForeignKey("folder.folder_id", ondelete="CASCADE"), nullable=False) folder_name = Column(String, nullable=False) + bucketing_mode : Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default="false") + keywords : Mapped[list[str]] = mapped_column(ARRAY(String)) + url_patterns : Mapped[list[str]] = mapped_column(ARRAY(String)) created_at = Column(TIMESTAMP, server_default="NOW()") + + + + diff --git a/csphere-worker/exceptions/bucket_excpetions.py b/csphere-worker/exceptions/bucket_excpetions.py new file mode 100644 index 0000000..2c29b0e --- /dev/null +++ b/csphere-worker/exceptions/bucket_excpetions.py @@ -0,0 +1,5 @@ + + + +class FoldersNotFound(Exception): + pass \ No newline at end of file diff --git a/csphere-worker/processors/__init__.py b/csphere-worker/processors/__init__.py index 1958ae5..d853361 100644 --- a/csphere-worker/processors/__init__.py +++ b/csphere-worker/processors/__init__.py @@ -1,10 +1,21 @@ from .content import ContentProcessor +from .bucket import BucketProcessor PROCESSOR_MAP ={ - 'process_message': ContentProcessor() + 'process_message': ContentProcessor(), + 'process_folder' : BucketProcessor() } def get_processor(task_type: str): + ''' + Returns the processor based on task_type + Possible task_types to input: + process_message + process_folder + + :param task_type: processor key name you want + :type task_type: str + ''' return PROCESSOR_MAP.get(task_type) \ No newline at end of file diff --git a/csphere-worker/processors/base.py b/csphere-worker/processors/base.py index 88b5e3f..cc6c799 100644 --- a/csphere-worker/processors/base.py +++ b/csphere-worker/processors/base.py @@ -4,8 +4,9 @@ import logging from data_models.content import Content from utils.utils import handle_existing_content +from classes.EmbeddingManager import ContentEmbeddingManager - +import requests logger = logging.getLogger(__name__) @@ -13,6 +14,9 @@ class BaseProcessor(ABC): def __init__(self): self.db = self.get_db() + self.embedding_manager = ContentEmbeddingManager() + + @abstractmethod def process(self, message: dict): """Standard method all processors must implement.""" @@ -31,6 +35,25 @@ def get_db(): return db + @staticmethod + def get_html_content(self, url: str) -> str: + try: + response = requests.get(url) + if response.status_code == 200: + # Get the HTML content as a string + html_content = response.text + return html_content + + else: + logging.error(f"Failed to retrieve the page. Status code: {response.status_code}") + + except requests.exceptions.RequestException as e: + logging.error(f"An error occurred: {e}") + + + except Exception as e: + logging.error(f"Error fetching the html content: {e}") + def extract_data(self, message:dict): ''' Method to extract and return the data stored inside message diff --git a/csphere-worker/processors/bucket.py b/csphere-worker/processors/bucket.py index 31c1dc0..a9d37ac 100644 --- a/csphere-worker/processors/bucket.py +++ b/csphere-worker/processors/bucket.py @@ -1,14 +1,141 @@ -from .base import BaseProcessor import logging -from data_models.content import Content +from uuid import uuid4 -from datetime import datetime, timezone +# Standardized imports: Grouping by standard library, third-party, then local modules +from .base import BaseProcessor +from data_models.content import Content from data_models.content_item import ContentItem +from data_models.folder_item import folder_item # Consider renaming to CamelCase if it's a class +from data_models.folder import Folder from classes.EmbeddingManager import ContentEmbeddingManager -from data_models.folder_item import folder_item +from exceptions.bucket_excpetions import FoldersNotFound +from schemas.folder_schemas import FolderBucketData +from schemas.content_schemas import ContentPayload -from uuid import uuid4 -import time +from typing import List, Tuple + +from sklearn.metrics.pairwise import cosine_similarity +import re +import numpy as np + +# Use the module-level logger consistently +logger = logging.getLogger(__name__) + +class BucketProcessor(BaseProcessor): + def __init__(self): + super().__init__() + + + def process(self, message: dict) -> bool: + """ + Process the message metadata to match with the current users folders. + """ + # 1. Added a try-except block around data extraction to handle malformed messages + try: + content_data : ContentPayload = None + user_id, notes, folder_id, content_data = self.extract_data(message=message) + except (KeyError, ValueError) as e: + logger.error(f"Failed to extract data from message: {e}") + return False + + try: + user_folder_data = self.get_user_folders(user_id=user_id) + + # html_content = self.get_html_content(content_data.url) + + # 1. Prepare Content for Matching + # We combine title and notes to create a 'searchable' string + content_text = f"{content_data.get('title', '')} {notes or ''}".lower() + content_url = content_data.get('url', '').lower() + + # 2. Run the Matching Engine + matched_folder_id = self.find_best_matching_folder( + content_text=content_text, + content_url=content_url, + folders=user_folder_data + ) + + if matched_folder_id: + logger.info(f"Content matched to folder: {matched_folder_id}") + self.assign_to_folder(content_data, matched_folder_id) + + return True + + + except FoldersNotFound: + # 2. Changed logging to use 'logger' instance instead of 'logging' root + logger.info(f"No bucketing folders found for user {user_id}, skipping.") + return True + + except Exception as e: + logger.error(f"Unexpected error processing folders for user {user_id}: {e}", exc_info=True) + return False + + def get_user_folders(self, user_id: str) -> list[FolderBucketData]: + """ + Retrieves active bucketing folders for a specific user. + """ + + user_folders = self.db.query(Folder).filter( + Folder.user_id == user_id, + Folder.bucketing_mode == True + ).all() + + if not user_folders: + # 4. FoldersNotFound should be raised if the list is empty + raise FoldersNotFound() + + # 5. List comprehension is more "Pythonic" and faster than manual for-loops + return [ + FolderBucketData( + folder_id=f.folder_id, + folder_name=f.folder_name, + keywords=f.keywords, + url_patterns=f.url_patterns + ) for f in user_folders + ] + + def find_best_matching_folder(self, content_text: str, content_url: str, folders: List[FolderBucketData]) -> str: + """ + Amazon-level matching using a Weighted Scoring Algorithm. + """ + scores = [] + + for folder in folders: + score = 0.0 + + # LAYER 1: URL Pattern Matching (Weight: 1.0 - Instant Match) + if folder.url_patterns: + for pattern in folder.url_patterns: + if re.search(pattern.lower(), content_url): + return folder.folder_id # Early exit for deterministic matches + + # LAYER 2: Keyword Boosting (Weight: 0.6) + # We count how many keywords appear in the content + if folder.keywords: + matches = sum(1 for kw in folder.keywords if kw.lower() in content_text) + score += (matches / len(folder.keywords)) * 0.6 if folder.keywords else 0 + + # LAYER 3: Semantic Vector Similarity (Weight: 0.4) + # Using your EmbeddingManager to compare intent + try: + folder_vector = self.embedding_manager._generate_embedding(folder.folder_name) + content_vector = self.embedding_manager._generate_embedding(content_text) + + # Cosine similarity returns a value between 0 and 1 + semantic_score = cosine_similarity([folder_vector], [content_vector])[0][0] + score += semantic_score * 0.4 + except Exception as e: + logger.warning(f"Semantic match failed for folder {folder.folder_id}: {e}") + + scores.append((folder.folder_id, score)) + + # Sort by score descending and pick the best one above a threshold + scores.sort(key=lambda x: x[1], reverse=True) + + if scores and scores[0][1] > 0.35: # Confidence Threshold + return scores[0][0] + + return None -logger = logging.getLogger(__name__) \ No newline at end of file diff --git a/csphere-worker/processors/content.py b/csphere-worker/processors/content.py index 0e1d5ee..962efc8 100644 --- a/csphere-worker/processors/content.py +++ b/csphere-worker/processors/content.py @@ -8,7 +8,6 @@ from data_models.folder_item import folder_item from uuid import uuid4 -import time logger = logging.getLogger(__name__) diff --git a/csphere-worker/schemas/content_schemas.py b/csphere-worker/schemas/content_schemas.py new file mode 100644 index 0000000..c35c13e --- /dev/null +++ b/csphere-worker/schemas/content_schemas.py @@ -0,0 +1,14 @@ +from pydantic import BaseModel, Field + + + #"content_payload": { + # "url": url, + # "title": title, + # "source": source, + # "first_saved_at": utc_time.isoformat(), + # }, +class ContentPayload(BaseModel): + url: str + title: str + source: str + first_saved_at : str diff --git a/csphere-worker/schemas/folder_schemas.py b/csphere-worker/schemas/folder_schemas.py new file mode 100644 index 0000000..8f0e59e --- /dev/null +++ b/csphere-worker/schemas/folder_schemas.py @@ -0,0 +1,21 @@ +from pydantic import BaseModel, Field +from typing import Optional +from uuid import UUID +from datetime import datetime + + + + # 'folder_id' : folder.folder_id, + # 'folder_name' : folder.folder_name, + # 'keywords' : folder.keywords, + # 'url_patterns': folder.url_patterns + + +class FolderBucketData(BaseModel): + folder_id : str + folder_name: str + keywords: Optional[list[str] ] = None or [] + url_patterns: Optional[list[str]] = None or [] + + + diff --git a/csphere-worker/worker.py b/csphere-worker/worker.py index c8f8355..a686166 100644 --- a/csphere-worker/worker.py +++ b/csphere-worker/worker.py @@ -45,6 +45,9 @@ def handle_message(message): messageProcessor = get_processor('process_message') messageProcessor.process(message=message) + bucketProcessor = get_processor('process_folder') + bucketProcessor.process(message=message) + # db_gen = get_db() # db = next(db_gen) diff --git a/frontend/src/app/(content)/home/folders/[folderId]/page.tsx b/frontend/src/app/(content)/home/folders/[folderId]/page.tsx index eb5ee44..cc14804 100644 --- a/frontend/src/app/(content)/home/folders/[folderId]/page.tsx +++ b/frontend/src/app/(content)/home/folders/[folderId]/page.tsx @@ -130,7 +130,7 @@ function FolderSettingsDialog({ htmlFor="folder-name" className="text-sm font-medium text-gray-700" > - Folder Name,

{metadata.name}

+ Folder Name Date: Wed, 24 Dec 2025 19:57:06 -0500 Subject: [PATCH 11/75] finished v1 for bucket - not tested yet --- csphere-worker/processors/bucket.py | 38 ++++++++- csphere-worker/processors/content.py | 5 +- csphere-worker/worker.py | 120 +++------------------------ 3 files changed, 49 insertions(+), 114 deletions(-) diff --git a/csphere-worker/processors/bucket.py b/csphere-worker/processors/bucket.py index a9d37ac..aea7859 100644 --- a/csphere-worker/processors/bucket.py +++ b/csphere-worker/processors/bucket.py @@ -1,5 +1,6 @@ import logging from uuid import uuid4 +import datetime # Standardized imports: Grouping by standard library, third-party, then local modules from .base import BaseProcessor @@ -26,7 +27,7 @@ def __init__(self): super().__init__() - def process(self, message: dict) -> bool: + def process(self, message: dict, content_id : str) -> bool: """ Process the message metadata to match with the current users folders. """ @@ -39,7 +40,7 @@ def process(self, message: dict) -> bool: return False try: - user_folder_data = self.get_user_folders(user_id=user_id) + user_folder_data : list[FolderBucketData] = self.get_user_folders(user_id=user_id) # html_content = self.get_html_content(content_data.url) @@ -57,7 +58,7 @@ def process(self, message: dict) -> bool: if matched_folder_id: logger.info(f"Content matched to folder: {matched_folder_id}") - self.assign_to_folder(content_data, matched_folder_id) + self.assign_to_folder(content_data, matched_folder_id, content_id, user_id) return True @@ -139,3 +140,34 @@ def find_best_matching_folder(self, content_text: str, content_url: str, folders return None + + def assign_to_folder(self, content_data : ContentPayload, matched_folder_id : str, content_id : str, user_id : str) -> bool: + + db = self.db + present = db.query(folder_item).filter(content_id == folder_item.content_id, matched_folder_id == folder_item.folder_id, user_id == folder_item.user_id).first() + + if present: + raise + + try: + new_item = folder_item( + folder_item_id = uuid4(), + folder_id = matched_folder_id, + user_id = user_id, + content_id = content_id, + added_at = datetime.utcnow() + + ) + + db.add(new_item) + db.commit() + db.refresh(new_item) + + return {'success' : True, 'message' : 'Bookmark added to folder'} + + + + except Exception as e: + logging.error(f"Error matching folder {e}" ) + + diff --git a/csphere-worker/processors/content.py b/csphere-worker/processors/content.py index 962efc8..2a341ed 100644 --- a/csphere-worker/processors/content.py +++ b/csphere-worker/processors/content.py @@ -20,7 +20,7 @@ def __init__(self): super().__init__() - def process(self, message: dict): + def process(self, message: dict) -> str: user_id, notes, folder_id, content_data = self.extract_data(message=message) @@ -91,7 +91,10 @@ def process(self, message: dict): logging.info("Successfully saved content for user.") + return new_content.content_id + except Exception as e: logging.error(f"Error occurred while saving the bookmark: {str(e)}") + return '' diff --git a/csphere-worker/worker.py b/csphere-worker/worker.py index a686166..d16b5c7 100644 --- a/csphere-worker/worker.py +++ b/csphere-worker/worker.py @@ -6,23 +6,15 @@ from email.utils import quote import time -from datetime import datetime, timezone import logging -from data_models.content import Content -from data_models.content_item import ContentItem -from data_models.folder_item import folder_item -from data_models.content_ai import ContentAI -from database import get_db - - -from utils.utils import handle_existing_content from dotenv import load_dotenv -from classes.EmbeddingManager import ContentEmbeddingManager from processors import get_processor +from processors.bucket import BucketProcessor +from processors.content import ContentProcessor #Logging config stuff logging.basicConfig( @@ -41,110 +33,18 @@ def handle_message(message): # + try: + messageProcessor : ContentProcessor = get_processor('process_message') + content_id : str = messageProcessor.process(message=message) - messageProcessor = get_processor('process_message') - messageProcessor.process(message=message) - - bucketProcessor = get_processor('process_folder') - bucketProcessor.process(message=message) - - - # db_gen = get_db() - # db = next(db_gen) - - # logger.info(f"The current message: {message}") - - - - - # #Create the Content object - # user_id = message.get('user_id') - # notes = message.get('notes') - # folder_id = message.get('folder_id', '') - # content_data = message.get('content_payload', {}) - - # #filter based on the content paylaod - - - # if content_data == {}: - # logger.error("Content data is empty, returning") - # return - - # content_url = content_data.get('url') - - # existing_content = db.query(Content).filter(Content.url == content_url).first() - - # if existing_content: - # #done some logic and don't continue on , end it here - - # handle_existing_content(existing_content, user_id, db, notes, folder_id) - # logger.info("Bookmark succesfully saved to user") - # return - - - # new_content = Content(**content_data) - - # try: - # db.add(new_content) - # db.flush() - - # #update the content Embedding manager when necessary - # content_manager = ContentEmbeddingManager(db=db, content_url=new_content.url) - - # raw_html = message.get('raw_html', '') - - # if raw_html == '': - # logging.info("No raw html provided, categorization and summarization may be poor") - - # content_ai = content_manager.process_content(new_content, raw_html) - - # db.commit() - - # if not content_ai: - # logging.info("Embedding generation failed or skipped.") - # else: - # logging.debug(f"Summary Generated: {content_ai.ai_summary}") - - # # Check if this user already saved this content - # existing_item = db.query(ContentItem).filter( - # ContentItem.user_id == user_id, - # ContentItem.content_id == new_content.content_id - # ).first() - - - # utc_time = datetime.now(timezone.utc) - - # if not existing_item: - # new_item = ContentItem( - # user_id=user_id, - # content_id=new_content.content_id, - # saved_at=utc_time, - # notes=notes - # ) - # db.add(new_item) - # db.commit() - - - # # Add to the corresponding folder if any - # if folder_id and folder_id != '' and folder_id != 'default': - # new_folder_item = folder_item( - # folder_item_id=uuid4(), - # folder_id=folder_id, - # user_id=user_id, - # content_id=new_content.content_id, - # added_at=datetime.utcnow() - # ) + if content_id != '': + bucketProcessor : BucketProcessor = get_processor('process_folder') + bucketProcessor.process(message=message, content_id=content_id) - # db.add(new_folder_item) - # db.commit() - # db.refresh(new_folder_item) - # else: - # print("No valid folder id found, skipping this part") + except Exception as e: + logging.error(f"Failed to fully process message: {e}") - # logging.info("Successfully saved content for user.") - # except Exception as e: - # logging.error(f"Error occurred while saving the bookmark: {str(e)}") From 10be0e8b11e96efa10867bbff78e45becbd318e9 Mon Sep 17 00:00:00 2001 From: Crosve Lucero <92947897+crosve@users.noreply.github.com> Date: Sun, 28 Dec 2025 18:53:09 -0500 Subject: [PATCH 12/75] bucketing for reggex is working - need work on semantic matching' --- backend/app/routes/folder.py | 5 +++++ backend/app/services/folder.py | 4 +++- csphere-worker/data_models/folder.py | 3 +-- csphere-worker/exceptions/bucket_excpetions.py | 8 +++++++- csphere-worker/processors/base.py | 2 +- csphere-worker/processors/bucket.py | 17 ++++++++++++++--- csphere-worker/schemas/folder_schemas.py | 2 +- 7 files changed, 32 insertions(+), 9 deletions(-) diff --git a/backend/app/routes/folder.py b/backend/app/routes/folder.py index 6b98abf..36aeeb1 100644 --- a/backend/app/routes/folder.py +++ b/backend/app/routes/folder.py @@ -109,6 +109,7 @@ def process_folder_metadata( db: Session = Depends(get_db), ): try: + logging.info(f"Folder metdata being processed: {metadata}") folder = update_folder_metadata( db=db, folder_id=folder_id, @@ -246,6 +247,9 @@ def create_folder(folderDetails: FolderDetails, user_id: UUID=Depends(get_curren user_id= user_id, parent_id = folderDetails.folderId if folderDetails.folderId else folder_uuid, folder_name = folderDetails.foldername, + bucketing_mode = False, + keywords = [], + url_patterns = [], created_at=datetime.utcnow() ) db.add(new_folder) @@ -266,6 +270,7 @@ def create_folder(folderDetails: FolderDetails, user_id: UUID=Depends(get_curren except Exception as e: + logging.error(f"Failed to create folder for user: {e}") return {'success' : False, 'message' : str(e)} diff --git a/backend/app/services/folder.py b/backend/app/services/folder.py index 0ba2bfd..53b1112 100644 --- a/backend/app/services/folder.py +++ b/backend/app/services/folder.py @@ -18,7 +18,7 @@ def update_folder_metadata( user_id: UUID, metadata: FolderMetadata, ) -> Folder: - folder = ( + folder : Folder = ( db.query(Folder) .filter( Folder.folder_id == folder_id, @@ -33,6 +33,8 @@ def update_folder_metadata( folder.folder_name = metadata.name folder.bucketing_mode = metadata.smartBucketingEnabled + print("current url patterns: ", metadata) + if metadata.smartBucketingEnabled: folder.keywords = metadata.keywords folder.url_patterns = metadata.urlPatterns diff --git a/csphere-worker/data_models/folder.py b/csphere-worker/data_models/folder.py index d2dc6da..7ae6311 100644 --- a/csphere-worker/data_models/folder.py +++ b/csphere-worker/data_models/folder.py @@ -1,7 +1,6 @@ from sqlalchemy import Column, String, TIMESTAMP, ForeignKey, Boolean from sqlalchemy.dialects.postgresql import UUID -from app.db.database import Base -from pydantic import BaseModel, EmailStr +from database import Base from datetime import datetime import uuid diff --git a/csphere-worker/exceptions/bucket_excpetions.py b/csphere-worker/exceptions/bucket_excpetions.py index 2c29b0e..adfc6de 100644 --- a/csphere-worker/exceptions/bucket_excpetions.py +++ b/csphere-worker/exceptions/bucket_excpetions.py @@ -2,4 +2,10 @@ class FoldersNotFound(Exception): - pass \ No newline at end of file + pass + +class ItemExistInFolder(Exception): + def __init__(self, item_id: str, folder_id: str): + self.item_id = item_id + self.folder_id = folder_id + super().__init__(f"Item {item_id} already exists in folder {folder_id}") \ No newline at end of file diff --git a/csphere-worker/processors/base.py b/csphere-worker/processors/base.py index cc6c799..043f2e7 100644 --- a/csphere-worker/processors/base.py +++ b/csphere-worker/processors/base.py @@ -14,7 +14,7 @@ class BaseProcessor(ABC): def __init__(self): self.db = self.get_db() - self.embedding_manager = ContentEmbeddingManager() + self.embedding_manager = ContentEmbeddingManager(self.db) @abstractmethod diff --git a/csphere-worker/processors/bucket.py b/csphere-worker/processors/bucket.py index aea7859..6c85856 100644 --- a/csphere-worker/processors/bucket.py +++ b/csphere-worker/processors/bucket.py @@ -9,7 +9,7 @@ from data_models.folder_item import folder_item # Consider renaming to CamelCase if it's a class from data_models.folder import Folder from classes.EmbeddingManager import ContentEmbeddingManager -from exceptions.bucket_excpetions import FoldersNotFound +from exceptions.bucket_excpetions import FoldersNotFound, ItemExistInFolder from schemas.folder_schemas import FolderBucketData from schemas.content_schemas import ContentPayload @@ -19,6 +19,9 @@ import re import numpy as np +from datetime import datetime, timezone + + # Use the module-level logger consistently logger = logging.getLogger(__name__) @@ -56,9 +59,13 @@ def process(self, message: dict, content_id : str) -> bool: folders=user_folder_data ) + logging.info(f"matched folder: {matched_folder_id}") + if matched_folder_id: logger.info(f"Content matched to folder: {matched_folder_id}") self.assign_to_folder(content_data, matched_folder_id, content_id, user_id) + + logging.info("Succesfully bucketed the new content") return True @@ -133,10 +140,13 @@ def find_best_matching_folder(self, content_text: str, content_url: str, folders # Sort by score descending and pick the best one above a threshold scores.sort(key=lambda x: x[1], reverse=True) + logging.info(f"Scores for current content: {scores}") if scores and scores[0][1] > 0.35: # Confidence Threshold return scores[0][0] + + return None @@ -147,7 +157,7 @@ def assign_to_folder(self, content_data : ContentPayload, matched_folder_id : st present = db.query(folder_item).filter(content_id == folder_item.content_id, matched_folder_id == folder_item.folder_id, user_id == folder_item.user_id).first() if present: - raise + raise ItemExistInFolder(item_id=content_id, folder_id=matched_folder_id) try: new_item = folder_item( @@ -155,13 +165,14 @@ def assign_to_folder(self, content_data : ContentPayload, matched_folder_id : st folder_id = matched_folder_id, user_id = user_id, content_id = content_id, - added_at = datetime.utcnow() + added_at = datetime.now(tz=timezone.utc) ) db.add(new_item) db.commit() db.refresh(new_item) + logging.info('succesfully saved the content to the folder') return {'success' : True, 'message' : 'Bookmark added to folder'} diff --git a/csphere-worker/schemas/folder_schemas.py b/csphere-worker/schemas/folder_schemas.py index 8f0e59e..07eac12 100644 --- a/csphere-worker/schemas/folder_schemas.py +++ b/csphere-worker/schemas/folder_schemas.py @@ -12,7 +12,7 @@ class FolderBucketData(BaseModel): - folder_id : str + folder_id : UUID folder_name: str keywords: Optional[list[str] ] = None or [] url_patterns: Optional[list[str]] = None or [] From fd0c24215696fdf916836995caf7c28c8a783cd0 Mon Sep 17 00:00:00 2001 From: Crosve Lucero <92947897+crosve@users.noreply.github.com> Date: Mon, 29 Dec 2025 15:02:18 -0500 Subject: [PATCH 13/75] description column added to the folder and started to test matching algo --- ...dding_decription_column_to_folder_table.py | 36 +++++++++++++ backend/app/data_models/folder.py | 1 + backend/app/routes/folder.py | 3 ++ backend/app/schemas/folder.py | 1 + backend/app/services/folder.py | 7 ++- csphere-worker/tests/bucket_test.py | 53 +++++++++++++++++++ .../home/folders/[folderId]/page.tsx | 34 ++++++++++-- frontend/src/components/ui/textarea.tsx | 18 +++++++ 8 files changed, 148 insertions(+), 5 deletions(-) create mode 100644 backend/alembic/versions/c32fb8abe107_adding_decription_column_to_folder_table.py create mode 100644 csphere-worker/tests/bucket_test.py create mode 100644 frontend/src/components/ui/textarea.tsx diff --git a/backend/alembic/versions/c32fb8abe107_adding_decription_column_to_folder_table.py b/backend/alembic/versions/c32fb8abe107_adding_decription_column_to_folder_table.py new file mode 100644 index 0000000..d72c848 --- /dev/null +++ b/backend/alembic/versions/c32fb8abe107_adding_decription_column_to_folder_table.py @@ -0,0 +1,36 @@ +"""adding decription column to Folder table + +Revision ID: c32fb8abe107 +Revises: 2994a72baf17 +Create Date: 2025-12-29 14:34:22.233508 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = 'c32fb8abe107' +down_revision: Union[str, None] = '2994a72baf17' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + + op.add_column('folder', sa.Column('description', sa.String(), nullable=True)) + + op.execute("UPDATE folder SET description = ''") + + op.alter_column('folder', 'description', nullable=False) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('folder', 'description') + # ### end Alembic commands ### diff --git a/backend/app/data_models/folder.py b/backend/app/data_models/folder.py index 6d3ee28..c3a57db 100644 --- a/backend/app/data_models/folder.py +++ b/backend/app/data_models/folder.py @@ -21,6 +21,7 @@ class Folder(Base): bucketing_mode : Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default="false") keywords : Mapped[list[str]] = mapped_column(ARRAY(String)) url_patterns : Mapped[list[str]] = mapped_column(ARRAY(String)) + description : Mapped[str] = mapped_column(String) created_at = Column(TIMESTAMP, server_default="NOW()") diff --git a/backend/app/routes/folder.py b/backend/app/routes/folder.py index 36aeeb1..ba22865 100644 --- a/backend/app/routes/folder.py +++ b/backend/app/routes/folder.py @@ -88,7 +88,9 @@ def get_folder_metadata(folder_id : str, db: Session = Depends(get_db)): "name" : folder.folder_name if not None else '', "keywords" : folder.keywords if not None else [], "urlPatterns" : folder.url_patterns if not None else [], + "description" : folder.description if not None else '', "smartBucketingEnabled" : folder.bucketing_mode if not None else False + } return {'success' : True, 'message': 'Data fetched successfully', 'data' : payload } @@ -250,6 +252,7 @@ def create_folder(folderDetails: FolderDetails, user_id: UUID=Depends(get_curren bucketing_mode = False, keywords = [], url_patterns = [], + description='', created_at=datetime.utcnow() ) db.add(new_folder) diff --git a/backend/app/schemas/folder.py b/backend/app/schemas/folder.py index 85c378f..0945e25 100644 --- a/backend/app/schemas/folder.py +++ b/backend/app/schemas/folder.py @@ -29,5 +29,6 @@ class FolderItem(BaseModel): class FolderMetadata(BaseModel): name: str smartBucketingEnabled: bool + description: Optional[str] = '' keywords: list[str] urlPatterns: list[str] \ No newline at end of file diff --git a/backend/app/services/folder.py b/backend/app/services/folder.py index 53b1112..6264bb5 100644 --- a/backend/app/services/folder.py +++ b/backend/app/services/folder.py @@ -38,9 +38,12 @@ def update_folder_metadata( if metadata.smartBucketingEnabled: folder.keywords = metadata.keywords folder.url_patterns = metadata.urlPatterns + folder.description = metadata.description + else: - folder.keywords = [] - folder.url_patterns = [] + folder.bucketing_mode = False + # folder.keywords = [] + # folder.url_patterns = [] db.commit() db.refresh(folder) diff --git a/csphere-worker/tests/bucket_test.py b/csphere-worker/tests/bucket_test.py new file mode 100644 index 0000000..f72f9b7 --- /dev/null +++ b/csphere-worker/tests/bucket_test.py @@ -0,0 +1,53 @@ +from processors import get_processor +from processors.bucket import BucketProcessor + + + # "content_payload": { + # "url": url, + # "title": title, + # "source": source, + # "first_saved_at": utc_time.isoformat(), + # }, + + + # payload = { + # "content_payload": { + # "url": url, + # "title": title, + # "source": source, + # "first_saved_at": utc_time.isoformat(), + # }, + # "raw_html": html[0:50], + # "user_id": str(user_id), + # "notes": notes, + # "folder_id": str(folder_id) if folder_id else None, + # } + +test_data = { + "content_payload" : { + "url" : "https://huggingface.co/datasets/nvidia/NitroGen", + "title" : "https://huggingface.co/datasets/nvidia/NitroGen", + "source" : "web_app", + "first_saved_at" : '2025-12-28 18:51:42.471089-05' + }, + 'raw_html' : None, + 'user_id' : 'fc1ee8c1-fd30-4670-ad22-c4cd4d886807', + "notes" : '', + 'folder_id' : None, + + +} + +content_id = '8ed8a17b-53f1-4c86-9f7d-39a60085fe85' + + +def testBucket(): + + bucket_processor : BucketProcessor = get_processor('process_folder') + status = bucket_processor.process(test_data, content_id) + + if status == True: + print('sucesfully bucket the item') + else: + print("something went wrong") + diff --git a/frontend/src/app/(content)/home/folders/[folderId]/page.tsx b/frontend/src/app/(content)/home/folders/[folderId]/page.tsx index cc14804..eb82838 100644 --- a/frontend/src/app/(content)/home/folders/[folderId]/page.tsx +++ b/frontend/src/app/(content)/home/folders/[folderId]/page.tsx @@ -19,6 +19,7 @@ import { Label } from "@/components/ui/label"; import { Input } from "@/components/ui/input"; import { Button } from "@/components/ui/button"; import { CiSettings } from "react-icons/ci"; +import { Textarea } from "@/components/ui/textarea"; interface PathProps { id: string; @@ -29,6 +30,7 @@ interface FolderMetadata { name: string; keywords: string[]; urlPatterns: string[]; + description: string; smartBucketingEnabled: boolean; } @@ -47,14 +49,20 @@ function FolderSettingsDialog({ const [newKeyword, setNewKeyword] = useState(""); const [newPattern, setNewPattern] = useState(""); const [showAdvanced, setShowAdvanced] = useState(false); - console.log("Current UI State:", metadata); useEffect(() => { - console.log("metadata being set: ", initialMetadata); setMetadata(initialMetadata); - console.log("new data has been set"); }, [initialMetadata]); + const updateDescription = (e: React.ChangeEvent) => { + e.preventDefault(); + console.log("current target value; ", e.target.value); + setMetadata({ + ...metadata, + description: e.target.value, + }); + }; + const addKeyword = () => { if ( newKeyword.trim() && @@ -285,6 +293,24 @@ function FolderSettingsDialog({
+ {/*User inputted description (optional) */} +
+ +

+ Description of this folder to help define the purpose of + this folder. +

+ +
0/280
-
-
+
-
- -
- -
+ +
- +
- -
-
-
-
📚
-

No recent bookmarks

- Your recently saved bookmarks will appear here -
+ + +
+
+

Loading recent bookmarks...

- -
-
-
-
📁
-

Loading folders...

-
+ +
+
+

Loading folders...

-

+

`; - setupTabNavigation(); - setupTagSystem(); - getRecentFolders(); - loadRecentBookmarks(); - setupBookmarkHandler(); - setupCharacterCounter(); + attachEventListeners(); + loadContextualData(); } -function renderLoginInterface() { - app.innerHTML = ` - - - `; - - document.getElementById("loginForm").addEventListener("submit", async (e) => { - e.preventDefault(); - const username = document.getElementById("username").value; - const password = document.getElementById("password").value; - try { - const LOGIN_URL = `${backend_url}/user/browser/login`; - const response = await fetch(LOGIN_URL, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ username, password }), - }); - const data = await response.json(); - if (data?.detail?.trim() === "Incorrect password") { - insertMessage("Incorrect password", "error"); - return; - } - if (data?.detail?.trim() === "User not found") { - insertMessage("User not found", "error"); - return; - } - if (data?.detail?.trim() === "sucessful login") { - browser.storage.local.set({ csphere_user_token: data.token }, () => { - renderInterface(); - }); - } - } catch (error) { - console.log("error logging in: ", error); - } +/** * ========================================== + * LOGIC & EVENTS + * ========================================== */ +function attachEventListeners() { + document.querySelectorAll(".tab-btn").forEach((btn) => { + btn.addEventListener("click", () => { + activeTab = btn.dataset.tab; + renderMainView(); + }); }); - document.getElementById("googleAuthBtn").addEventListener("click", () => { - browser.identity.launchWebAuthFlow( - { - url: `${backend_url}/auth/google`, - interactive: true, - }, - (redirectUrl) => { - if (browser.runtime.lastError || !redirectUrl) { - console.error("Google login failed", browser.runtime.lastError); - return; - } - const url = new URL(redirectUrl); - const code = url.searchParams.get("code"); - if (!code) { - console.error("Token missing from redirect"); - return; - } - fetch(`${backend_url}/auth/google/callback`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ code }), - }) - .then((res) => res.json()) - .then((data) => { - if (!data.token) { - console.error("Token missing from backend response"); - return; - } - browser.storage.local.set({ csphere_user_token: data.token }, () => { - renderInterface(); - }); - }) - .catch((err) => { - console.error("Error fetching token from backend", err); - }); - } - ); + document.getElementById("logoutBtn")?.addEventListener("click", () => { + browser.storage.local.remove("csphere_user_token", () => renderLoginView()); }); -} - -/** * ========================= * New UI Functions * ========================= */ -function setupTabNavigation() { - const tabBtns = document.querySelectorAll(".tab-btn"); - const tabPanels = document.querySelectorAll(".tab-panel"); - - tabBtns.forEach((btn) => { - btn.addEventListener("click", () => { - const targetTab = btn.dataset.tab; - // Update active states - tabBtns.forEach((b) => b.classList.remove("active")); - tabPanels.forEach((p) => p.classList.remove("active")); - - btn.classList.add("active"); - document.getElementById(`${targetTab}-panel`).classList.add("active"); - - currentTab = targetTab; - - // Load content for specific tabs - if (targetTab === "folders") { - loadFoldersView(); - } + const textarea = document.getElementById("notesTextarea"); + if (textarea) { + textarea.addEventListener("input", (e) => { + document.getElementById("charCount").textContent = e.target.value.length; }); + } + + document.getElementById("addTagBtn")?.addEventListener("click", handleAddTag); + document.getElementById("tagInput")?.addEventListener("keypress", (e) => { + if (e.key === "Enter") handleAddTag(); }); + + document + .getElementById("saveBookmarkBtn") + ?.addEventListener("click", handleSaveBookmark); } -function setupTagSystem() { - const tagInput = document.getElementById("tagInput"); - const addTagBtn = document.getElementById("addTagBtn"); +async function loadContextualData() { + //Fetch the folder once to improve speed - const addTag = () => { - const tagValue = tagInput.value.trim(); - if (tagValue && !tags.includes(tagValue)) { - tags.push(tagValue); - tagInput.value = ""; - renderTags(); - } - }; - - addTagBtn.addEventListener("click", addTag); - tagInput.addEventListener("keypress", (e) => { - if (e.key === "Enter") { - addTag(); - } - }); -} + if ( + (activeTab === "bookmark" && cachedFolders.length !== 0) || + cachedFolders.length === 0 + ) { + await fetchFolders(); + } -function renderTags() { - const container = document.getElementById("tagsContainer"); - container.innerHTML = tags - .map( - (tag) => ` - - ${tag} - - - ` - ) - .join(""); + if (activeTab === "recent") { + await renderRecentBookmarks(); + } else if (activeTab === "folders") { + renderFoldersList(); + } } -function removeTag(tagToRemove) { - tags = tags.filter((tag) => tag !== tagToRemove); - renderTags(); +async function fetchFolderBookmarks() { + try { + console.log("selected folder view data: ", selectedViewFolder); + + // selectedViewFolder = { + // id: card.dataset.folderId, + // name: card.dataset.name, + // }; + const data = await apiRequest(`/folder/${selectedViewFolder.id}`, "GET"); + console.log(`data from folder id ${selectedViewFolder.id}`, data); + + if (data) { + selectedViewFolderBookmarks = data; + } else { + selectedViewFolder = null; + } + } catch (err) { + console.log( + "error occured trying to fetch the bookmarks for a folder", + err, + ); + } } -function setupCharacterCounter() { - const textarea = document.getElementById("notesTextarea"); - const charCount = document.getElementById("charCount"); - - textarea.addEventListener("input", () => { - charCount.textContent = textarea.value.length; - }); +async function fetchRecentBookmarks() { + try { + const data = await apiRequest("/content/recent", "POST"); + console.log("recent bookmark list: ", data); + recentBookmarksList = data; + } catch (err) { + console.log("failed to fetch the folder data: ", err); + } } -async function loadRecentBookmarks() { +async function fetchFolders() { try { - const token = await fetchToken(); - console.log("token from loading recent: ", token); - const response = await fetch(`${backend_url}/content/recent`, { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${token}`, - }, - }); - - if (response.ok) { - const data = await response.json(); - console.log("data for recent bookmarks: ", data); - recentBookmarks = data || []; - renderRecentBookmarks(); + const response = await apiRequest("/folder"); + cachedFolders = response.data || []; + + // Update dropdown if we are on the bookmark tab + const dropdown = document.getElementById("folderDropdown"); + if (dropdown) { + dropdown.innerHTML = ``; + cachedFolders.forEach((f) => { + const opt = document.createElement("option"); + opt.value = f.folderId; + opt.textContent = f.folderName; + dropdown.appendChild(opt); + }); + dropdown.value = activeFolderId; + dropdown.addEventListener( + "change", + (e) => (activeFolderId = e.target.value), + ); } - } catch (error) { - console.log("Error loading recent bookmarks:", error); + } catch (err) { + console.error("Folder fetch error:", err); } } -function renderRecentBookmarks() { - const container = document.getElementById("recentBookmarks"); +function renderFoldersList() { + const container = document.getElementById("foldersListContainer"); + if (!container) return; - if (recentBookmarks.length === 0) { - container.innerHTML = ` -
-
📚
-

No recent bookmarks

- Your recently saved bookmarks will appear here -
- `; + // If a folder is selected, show the "Inside Folder" view (Placeholder) + if (selectedViewFolder) { + renderFolderDetailView(container); return; } - container.innerHTML = recentBookmarks - .map( - (bookmark) => ` -
-
-

${bookmark.title || "Untitled"}

- ${formatDate( - bookmark.first_saved_at || bookmark.created_at - )} -
- visit -