+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ diff --git a/.github/workflows/ci_cd.yml b/.github/workflows/ci_cd.yml index fc75ad4..67d372e 100644 --- a/.github/workflows/ci_cd.yml +++ b/.github/workflows/ci_cd.yml @@ -6,43 +6,68 @@ on: - main jobs: - confirm-changes: - name: confirm there are changes in the backend + deploy-backend: + name: deploy backend to tiptops server runs-on: ubuntu-latest - outputs: - folder_changed: ${{ steps.check_files.outputs.folder_changed }} steps: - - uses: actions/checkout@v4 + - name: Execute remote ssh commands using passwords + uses: appleboy/ssh-action@v1 with: - fetch-depth: 2 - - - name: Find name of changes files with git - id: check_files - run: | - set +e - CHANGED_FILES=$(git diff --name-only ${{github.event.before}} ${{github.sha}} || true) - echo "changed files found: $CHANGED_FILES" - - CHANGED_COUNT=$(echo "$CHANGED_FILES" | grep -c "backend") + host: ${{secrets.HOST}} + username: ${{secrets.USERNAME}} + password: ${{secrets.PASSWORD}} + port: ${{secrets.PORT}} - set -e - echo "file count that was changed: $CHANGED_COUNT" + script: | + echo "Starting deployment process" + # NOTE: Use 'sh' to avoid issues with inline script quoting + sudo -S bash -c ' + cd /root/docker/csphere + #run the deployment script + ./build-csphere-backend.sh + echo "deployment was a success" + ' <<< "${{secrets.PASSWORD}}" - if [ "$CHANGED_COUNT" -gt 0 ]; then - echo "folder_changed=true" >> $GITHUB_OUTPUT + - name: Slack Notification + if: always() + run: | + if [[ "${{ job.status }}" == "success" ]]; then + STATUS_TEXT="Deployment Successful" + COLOR="#098824" else - echo "folder_changed=false" >> $GITHUB_OUTPUT + STATUS_TEXT="Deployment Failed" + COLOR="#a80a0a" fi - deploy-backend: - name: deploy backend to tiptops server - runs-on: ubuntu-latest - # needs: confirm-changes + REPO_URL="https://github.com/${{ github.repository }}" + RUN_URL="$REPO_URL/actions/runs/${{ github.run_id }}" + COMMIT_URL="$REPO_URL/commit/${{ github.sha }}" + + curl -X POST -H 'Content-type: application/json; charset=utf-8' \ + -H "Authorization: Bearer ${{ secrets.SLACK_TOKEN }}" \ + --data "{ + \"channel\": \"${{ secrets.SLACK_CHANNEL }}\", + \"attachments\": [ + { + \"color\": \"$COLOR\", + \"title\": \"Csphere CI/CD Update - Backend\", + \"title_link\": \"$RUN_URL\", + \"text\": \"$STATUS_TEXT\n\n*Repository:* ${{ github.repository }}\n*Branch:* \`${{ github.ref_name }}\` \n*Commit:* <$COMMIT_URL|${{ github.event.head_commit.message }}>\n*Author:* ${{ github.actor }}\", + \"footer\": \"GitHub Actions • Build #${{ github.run_number }}\", + \"ts\": $(date +%s) + } + ] + }" \ + https://slack.com/api/chat.postMessage + + deploy-worker: + name: deploy worker to the backend servers + runs-on: ununtu-latest steps: - - name: Execute remote ssh commands using passwords - # if: needs.confirm-changes.outputs.folder_changed == 'true' + - name: Execute remote ssh commands uses: appleboy/ssh-action@v1 + with: host: ${{secrets.HOST}} username: ${{secrets.USERNAME}} @@ -55,6 +80,38 @@ jobs: sudo -S bash -c ' cd /root/docker/csphere #run the deployment script - ./build-csphere-backend.sh + ./build-csphere-worker.sh echo "deployment was a success" ' <<< "${{secrets.PASSWORD}}" + + - name: Slack Notification + if: always() + run: | + if [[ "${{ job.status }}" == "success" ]]; then + STATUS_TEXT="Deployment Successful" + COLOR="#098824" + else + STATUS_TEXT="Deployment Failed" + COLOR="#a80a0a" + fi + + REPO_URL="https://github.com/${{ github.repository }}" + RUN_URL="$REPO_URL/actions/runs/${{ github.run_id }}" + COMMIT_URL="$REPO_URL/commit/${{ github.sha }}" + + curl -X POST -H 'Content-type: application/json; charset=utf-8' \ + -H "Authorization: Bearer ${{ secrets.SLACK_TOKEN }}" \ + --data "{ + \"channel\": \"${{ secrets.SLACK_CHANNEL }}\", + \"attachments\": [ + { + \"color\": \"$COLOR\", + \"title\": \"Csphere CI/CD Update - worker\", + \"title_link\": \"$RUN_URL\", + \"text\": \"$STATUS_TEXT\n\n*Repository:* ${{ github.repository }}\n*Branch:* \`${{ github.ref_name }}\` \n*Commit:* <$COMMIT_URL|${{ github.event.head_commit.message }}>\n*Author:* ${{ github.actor }}\", + \"footer\": \"GitHub Actions • Build #${{ github.run_number }}\", + \"ts\": $(date +%s) + } + ] + }" \ + https://slack.com/api/chat.postMessage diff --git a/.github/workflows/heroku.yaml b/.github/workflows/heroku.yaml deleted file mode 100644 index 7bcc422..0000000 --- a/.github/workflows/heroku.yaml +++ /dev/null @@ -1,42 +0,0 @@ -name: Backend deployment via Heroku - -on: - push: - branches: - - main - -jobs: - deploy: - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v4 - with: - ref: main - - # - name: Set up Python environment - # uses: actions/setup-python@v5 - # with: - # python-version: "3.10" - - # - name: Install Heroku CLI - # run: | - # curl https://cli-assets.heroku.com/install.sh | sh - - # - name: Deploy to Heroku - # uses: akhileshns/heroku-deploy@v3.12.14 - # with: - # heroku_api_key: ${{ secrets.HEROKU_API_KEY }} - # heroku_app_name: ${{ secrets.HEROKU_APP_NAME }} - # heroku_email: ${{ secrets.HEROKU_EMAIL }} - # branch: main - # appdir: backend - - # - name: Show git remotes - # run: git remote -v - - # - name: Test Heroku login - # run: heroku auth:whoami - # env: - # HEROKU_API_KEY: ${{ secrets.HEROKU_API_KEY }} diff --git a/backend/.env.example b/backend/.env.example new file mode 100644 index 0000000..f2b12b8 --- /dev/null +++ b/backend/.env.example @@ -0,0 +1,22 @@ +DATABASE_URL= + +SECRET_KEY= + +OPENROUTER_API_KEY= + +NEXT_PUBLIC_API_BASE_URL=http://127.0.0.1:8000 +GOOGLE_API_KEY= +AWS_ACCESS_KEY= +AWS_SECRET_KEY= +BUCKET_NAME = +GOOGLE_REDIRECT_URI= +GOOGLE_CLIENT_ID= +GOOGLE_CLIENT_SECRET= + +ACTIVEMQ_QUEUE= + + +ACTIVEMQ_URL= +ACTIVEMQ_QUEUE= +ACTIVEMQ_USER= +ACTIVEMQ_PASS= \ No newline at end of file diff --git a/backend/.gitignore b/backend/.gitignore index 1d39339..4cf92fb 100644 --- a/backend/.gitignore +++ b/backend/.gitignore @@ -20,3 +20,6 @@ __pycache__/ app/data_models/__pycache__/** /dockerfile + + +/archives/* \ No newline at end of file diff --git a/backend/alembic/env.py b/backend/alembic/env.py index f7646bc..7c923b7 100644 --- a/backend/alembic/env.py +++ b/backend/alembic/env.py @@ -38,6 +38,7 @@ from app.data_models.folder_item import folder_item from app.data_models.category import Category from app.data_models.content_category import ContentCategory +from app.data_models.tag import Tag target_metadata = Base.metadata diff --git a/backend/alembic/versions/10a2bc716159_adding_html_url_for_static_web_rendering.py b/backend/alembic/versions/10a2bc716159_adding_html_url_for_static_web_rendering.py new file mode 100644 index 0000000..e496b69 --- /dev/null +++ b/backend/alembic/versions/10a2bc716159_adding_html_url_for_static_web_rendering.py @@ -0,0 +1,32 @@ +"""adding html url for static web rendering + +Revision ID: 10a2bc716159 +Revises: 82732e62263b +Create Date: 2026-02-10 12:10:19.134266 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '10a2bc716159' +down_revision: Union[str, None] = '82732e62263b' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('content', sa.Column('html_content_url', sa.String(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('content', 'html_content_url') + # ### end Alembic commands ### diff --git a/backend/alembic/versions/1e2cab304cac_folder_embedding_column_in_folder_table.py b/backend/alembic/versions/1e2cab304cac_folder_embedding_column_in_folder_table.py new file mode 100644 index 0000000..51a215f --- /dev/null +++ b/backend/alembic/versions/1e2cab304cac_folder_embedding_column_in_folder_table.py @@ -0,0 +1,36 @@ +"""folder_embedding column in folder table + +Revision ID: 1e2cab304cac +Revises: c32fb8abe107 +Create Date: 2025-12-29 15:15:04.787853 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from pgvector.sqlalchemy import Vector + + + +# revision identifiers, used by Alembic. +revision: str = '1e2cab304cac' +down_revision: Union[str, None] = 'c32fb8abe107' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + + #sa.Column('embedding', Vector(dim=1536), nullable=True), + op.add_column('folder', sa.Column('folder_embedding', Vector(dim=1536), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('folder', 'folder_embedding') + # ### end Alembic commands ### diff --git a/backend/alembic/versions/2994a72baf17_adding_bucketing_mode_boolean_column_to_.py b/backend/alembic/versions/2994a72baf17_adding_bucketing_mode_boolean_column_to_.py new file mode 100644 index 0000000..83fb709 --- /dev/null +++ b/backend/alembic/versions/2994a72baf17_adding_bucketing_mode_boolean_column_to_.py @@ -0,0 +1,53 @@ +"""adding bucketing mode boolean column to Folder table + +Revision ID: 2994a72baf17 +Revises: 9076b42a5b56 +Create Date: 2025-12-23 12:56:56.367544 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = '2994a72baf17' +down_revision: Union[str, None] = '9076b42a5b56' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('folder', sa.Column('bucketing_mode', sa.Boolean(), nullable=True)) + + op.execute("UPDATE folder SET bucketing_mode = false") + + op.alter_column("folder", "bucketing_mode", nullable=False) + + op.execute("UPDATE folder SET keywords = ARRAY[]::VARCHAR[]") + + op.execute("UPDATE folder SET url_patterns = ARRAY[]::VARCHAR[]") + + op.alter_column('folder', 'keywords', + existing_type=postgresql.ARRAY(sa.VARCHAR()), + nullable=False) + op.alter_column('folder', 'url_patterns', + existing_type=postgresql.ARRAY(sa.VARCHAR()), + nullable=False) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column('folder', 'url_patterns', + existing_type=postgresql.ARRAY(sa.VARCHAR()), + nullable=True) + op.alter_column('folder', 'keywords', + existing_type=postgresql.ARRAY(sa.VARCHAR()), + nullable=True) + op.drop_column('folder', 'bucketing_mode') + # ### end Alembic commands ### diff --git a/backend/alembic/versions/4a23460e20b9_adding_in_foreign_contraint.py b/backend/alembic/versions/4a23460e20b9_adding_in_foreign_contraint.py new file mode 100644 index 0000000..8313945 --- /dev/null +++ b/backend/alembic/versions/4a23460e20b9_adding_in_foreign_contraint.py @@ -0,0 +1,36 @@ +"""adding in foreign contraint + +Revision ID: 4a23460e20b9 +Revises: 55170ec071fa +Create Date: 2026-01-23 12:11:14.992338 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '4a23460e20b9' +down_revision: Union[str, None] = '55170ec071fa' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('content_tag', sa.Column('user_id', sa.UUID(), nullable=False)) + op.drop_constraint(op.f('content_tag_content_id_fkey'), 'content_tag', type_='foreignkey') + op.create_foreign_key(None, 'content_tag', 'content_item', ['content_id', 'user_id'], ['content_id', 'user_id']) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_constraint(None, 'content_tag', type_='foreignkey') + op.create_foreign_key(op.f('content_tag_content_id_fkey'), 'content_tag', 'content', ['content_id'], ['content_id']) + op.drop_column('content_tag', 'user_id') + # ### end Alembic commands ### diff --git a/backend/alembic/versions/5249d1cdad1b_adding_in_contenttag_table_for_the_.py b/backend/alembic/versions/5249d1cdad1b_adding_in_contenttag_table_for_the_.py new file mode 100644 index 0000000..66c29a0 --- /dev/null +++ b/backend/alembic/versions/5249d1cdad1b_adding_in_contenttag_table_for_the_.py @@ -0,0 +1,38 @@ +"""adding in contenttag table for the relationship between content and tags + +Revision ID: 5249d1cdad1b +Revises: 59bbe3b9cb96 +Create Date: 2026-01-23 11:27:20.100185 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '5249d1cdad1b' +down_revision: Union[str, None] = '59bbe3b9cb96' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('content_tag', + sa.Column('content_id', sa.UUID(), nullable=False), + sa.Column('tag_id', sa.UUID(), nullable=False), + sa.ForeignKeyConstraint(['content_id'], ['content.content_id'], ), + sa.ForeignKeyConstraint(['tag_id'], ['tag.tag_id'], ), + sa.PrimaryKeyConstraint('content_id', 'tag_id') + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('content_tag') + # ### end Alembic commands ### diff --git a/backend/alembic/versions/55170ec071fa_putting_tags_relationship_inside_.py b/backend/alembic/versions/55170ec071fa_putting_tags_relationship_inside_.py new file mode 100644 index 0000000..4fe6ec7 --- /dev/null +++ b/backend/alembic/versions/55170ec071fa_putting_tags_relationship_inside_.py @@ -0,0 +1,32 @@ +"""putting tags relationship inside ContentItem + +Revision ID: 55170ec071fa +Revises: 5249d1cdad1b +Create Date: 2026-01-23 11:56:29.320314 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '55170ec071fa' +down_revision: Union[str, None] = '5249d1cdad1b' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + pass + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + pass + # ### end Alembic commands ### diff --git a/backend/alembic/versions/59bbe3b9cb96_getting_rid_of_the_usertag_table_for_.py b/backend/alembic/versions/59bbe3b9cb96_getting_rid_of_the_usertag_table_for_.py new file mode 100644 index 0000000..ae931e7 --- /dev/null +++ b/backend/alembic/versions/59bbe3b9cb96_getting_rid_of_the_usertag_table_for_.py @@ -0,0 +1,77 @@ +"""getting rid of the UserTag table for simplicity + +Revision ID: 59bbe3b9cb96 +Revises: 8c28d27938c8 +Create Date: 2026-01-21 11:43:35.990670 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = '59bbe3b9cb96' +down_revision: Union[str, None] = '8c28d27938c8' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # 1. Add the column as nullable first + op.add_column('tag', sa.Column('user_id', sa.UUID(), nullable=True)) + + # 2. DATA MIGRATION: Copy user_id from user_tag into tag + # This matches the user_id to the tag_id so we don't lose ownership + op.execute( + """ + UPDATE tag + SET user_id = user_tag.user_id + FROM user_tag + WHERE tag.tag_id = user_tag.tag_id + """ + ) + + # 3. Clean up: Delete any tags that didn't have an owner (optional but safer) + # If a tag doesn't have a user_id now, the NOT NULL constraint will still fail. + op.execute("DELETE FROM tag WHERE user_id IS NULL") + + # 4. Now that every row has a user_id, we can safely set NOT NULL + op.alter_column('tag', 'user_id', nullable=False) + + # 5. Apply the rest of your changes + op.create_foreign_key(op.f('tag_user_id_fkey'), 'tag', 'users', ['user_id'], ['id']) + + op.alter_column('tag', 'first_created_at', + existing_type=postgresql.TIMESTAMP(timezone=True), + type_=sa.TIMESTAMP(), + existing_nullable=True, + existing_server_default=sa.text('now()')) + + op.drop_constraint('tag_tag_name_key', 'tag', type_='unique') + + # 6. Drop the old table LAST + op.drop_table('user_tag') + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_constraint(None, 'tag', type_='foreignkey') + op.create_unique_constraint(op.f('tag_tag_name_key'), 'tag', ['tag_name']) + op.alter_column('tag', 'first_created_at', + existing_type=sa.TIMESTAMP(), + type_=postgresql.TIMESTAMP(timezone=True), + existing_nullable=True, + existing_server_default=sa.text('now()')) + op.drop_column('tag', 'user_id') + op.create_table('user_tag', + sa.Column('user_id', sa.UUID(), autoincrement=False, nullable=False), + sa.Column('tag_id', sa.UUID(), autoincrement=False, nullable=False), + sa.Column('first_created_at', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('now()'), autoincrement=False, nullable=True), + sa.ForeignKeyConstraint(['tag_id'], ['tag.tag_id'], name=op.f('user_tag_tag_id_fkey'), ondelete='CASCADE'), + sa.ForeignKeyConstraint(['user_id'], ['users.id'], name=op.f('user_tag_user_id_fkey'), ondelete='CASCADE'), + sa.PrimaryKeyConstraint('user_id', 'tag_id', name=op.f('user_tag_pkey')) + ) + # ### end Alembic commands ### diff --git a/backend/alembic/versions/82732e62263b_adding_html_url_for_static_web_rendering.py b/backend/alembic/versions/82732e62263b_adding_html_url_for_static_web_rendering.py new file mode 100644 index 0000000..dd79d38 --- /dev/null +++ b/backend/alembic/versions/82732e62263b_adding_html_url_for_static_web_rendering.py @@ -0,0 +1,32 @@ +"""adding html url for static web rendering + +Revision ID: 82732e62263b +Revises: c4ba502e213e +Create Date: 2026-02-10 11:52:24.769869 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '82732e62263b' +down_revision: Union[str, None] = 'c4ba502e213e' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + pass + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + pass + # ### end Alembic commands ### diff --git a/backend/alembic/versions/8c28d27938c8_tag_database_details.py b/backend/alembic/versions/8c28d27938c8_tag_database_details.py new file mode 100644 index 0000000..fff5877 --- /dev/null +++ b/backend/alembic/versions/8c28d27938c8_tag_database_details.py @@ -0,0 +1,47 @@ +"""tag database details + +Revision ID: 8c28d27938c8 +Revises: 9d82db84f6ea +Create Date: 2026-01-07 17:50:18.492186 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '8c28d27938c8' +down_revision: Union[str, None] = '9d82db84f6ea' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('tag', + sa.Column('tag_id', sa.UUID(), nullable=False), + sa.Column('tag_name', sa.String(), nullable=False), + sa.Column('first_created_at', sa.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.PrimaryKeyConstraint('tag_id'), + sa.UniqueConstraint('tag_name') + ) + op.create_table('user_tag', + sa.Column('user_id', sa.UUID(), nullable=False), + sa.Column('tag_id', sa.UUID(), nullable=False), + sa.Column('first_created_at', sa.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.ForeignKeyConstraint(['tag_id'], ['tag.tag_id'], ondelete='CASCADE'), + sa.ForeignKeyConstraint(['user_id'], ['users.id'], ondelete='CASCADE'), + sa.PrimaryKeyConstraint('user_id', 'tag_id') + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('user_tag') + op.drop_table('tag') + # ### end Alembic commands ### diff --git a/backend/alembic/versions/9076b42a5b56_added_keyowrds_and_url_patterns_in_the_.py b/backend/alembic/versions/9076b42a5b56_added_keyowrds_and_url_patterns_in_the_.py new file mode 100644 index 0000000..1068ba8 --- /dev/null +++ b/backend/alembic/versions/9076b42a5b56_added_keyowrds_and_url_patterns_in_the_.py @@ -0,0 +1,34 @@ +"""Added keyowrds and url_patterns in the Folder table + +Revision ID: 9076b42a5b56 +Revises: bb902acb986a +Create Date: 2025-12-22 12:43:59.420387 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = '9076b42a5b56' +down_revision: Union[str, None] = 'bb902acb986a' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('folder', sa.Column('keywords', postgresql.ARRAY(sa.String()), nullable=True)) + op.add_column('folder', sa.Column('url_patterns', postgresql.ARRAY(sa.String()), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('folder', 'url_patterns') + op.drop_column('folder', 'keywords') + # ### end Alembic commands ### diff --git a/backend/alembic/versions/9d82db84f6ea_tag_tables_and_many_to_many_tags.py b/backend/alembic/versions/9d82db84f6ea_tag_tables_and_many_to_many_tags.py new file mode 100644 index 0000000..2a37107 --- /dev/null +++ b/backend/alembic/versions/9d82db84f6ea_tag_tables_and_many_to_many_tags.py @@ -0,0 +1,32 @@ +"""tag tables and many to many tags + +Revision ID: 9d82db84f6ea +Revises: 1e2cab304cac +Create Date: 2026-01-07 17:42:13.168622 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '9d82db84f6ea' +down_revision: Union[str, None] = '1e2cab304cac' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index(op.f('content_ai_embedding_idx'), table_name='content_ai', postgresql_with={'m': '16', 'ef_construction': '64'}, postgresql_using='hnsw') + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.create_index(op.f('content_ai_embedding_idx'), 'content_ai', ['embedding'], unique=False, postgresql_with={'m': '16', 'ef_construction': '64'}, postgresql_using='hnsw') + # ### end Alembic commands ### diff --git a/backend/alembic/versions/c32fb8abe107_adding_decription_column_to_folder_table.py b/backend/alembic/versions/c32fb8abe107_adding_decription_column_to_folder_table.py new file mode 100644 index 0000000..d72c848 --- /dev/null +++ b/backend/alembic/versions/c32fb8abe107_adding_decription_column_to_folder_table.py @@ -0,0 +1,36 @@ +"""adding decription column to Folder table + +Revision ID: c32fb8abe107 +Revises: 2994a72baf17 +Create Date: 2025-12-29 14:34:22.233508 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = 'c32fb8abe107' +down_revision: Union[str, None] = '2994a72baf17' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + + op.add_column('folder', sa.Column('description', sa.String(), nullable=True)) + + op.execute("UPDATE folder SET description = ''") + + op.alter_column('folder', 'description', nullable=False) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('folder', 'description') + # ### end Alembic commands ### diff --git a/backend/alembic/versions/c4ba502e213e_adding_in_embedding_update_timestamp.py b/backend/alembic/versions/c4ba502e213e_adding_in_embedding_update_timestamp.py new file mode 100644 index 0000000..8821de2 --- /dev/null +++ b/backend/alembic/versions/c4ba502e213e_adding_in_embedding_update_timestamp.py @@ -0,0 +1,32 @@ +"""adding in embedding update timestamp + +Revision ID: c4ba502e213e +Revises: dac411465e74 +Create Date: 2026-01-24 21:32:46.319882 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = 'c4ba502e213e' +down_revision: Union[str, None] = 'dac411465e74' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('users', sa.Column('last_embedding_update', sa.TIMESTAMP(), server_default='NOW()', nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('users', 'last_embedding_update') + # ### end Alembic commands ### diff --git a/backend/alembic/versions/dac411465e74_adding_in_user_embedding.py b/backend/alembic/versions/dac411465e74_adding_in_user_embedding.py new file mode 100644 index 0000000..1b260c5 --- /dev/null +++ b/backend/alembic/versions/dac411465e74_adding_in_user_embedding.py @@ -0,0 +1,35 @@ +"""adding in user embedding + +Revision ID: dac411465e74 +Revises: 4a23460e20b9 +Create Date: 2026-01-24 21:11:01.473125 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +from pgvector.sqlalchemy import Vector + + + +# revision identifiers, used by Alembic. +revision: str = 'dac411465e74' +down_revision: Union[str, None] = '4a23460e20b9' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('users', sa.Column('user_embedding', Vector(dim=1536), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('users', 'user_embedding') + # ### end Alembic commands ### diff --git a/backend/app/ai/embedder.py b/backend/app/ai/embedder.py index 1a0e008..413b04c 100644 --- a/backend/app/ai/embedder.py +++ b/backend/app/ai/embedder.py @@ -1,5 +1,6 @@ import os from typing import List +from app.core.settings import Settings from openai import OpenAI diff --git a/backend/app/ai/summarizer.py b/backend/app/ai/summarizer.py index b412b0f..913bd2e 100644 --- a/backend/app/ai/summarizer.py +++ b/backend/app/ai/summarizer.py @@ -1,13 +1,16 @@ import os from openai import OpenAI +from app.core.logging import logger +from app.core.settings import get_settings +settings = get_settings() class Summarizer: def __init__(self, model: str = "openrouter/auto:floor", system_prompt: str | None = None): self.model = model self.client = OpenAI( base_url="https://openrouter.ai/api/v1", - api_key=os.getenv("OPENROUTER_API_KEY"), + api_key=settings.OPENROUTER_API_KEY, ) self.system_prompt = system_prompt or ( "You are a concise technical summarizer. " @@ -27,7 +30,5 @@ def summarize(self, text: str) -> str | None: return response.choices[0].message.content.strip() except Exception as e: # Preserve behavior: return None on failure - import logging - - logging.error(f"OpenRouter summarization failed: {e}") + logger.error(f"OpenRouter summarization failed: {e}") return None diff --git a/backend/app/api/main.py b/backend/app/api/main.py index 669132e..60a14ea 100644 --- a/backend/app/api/main.py +++ b/backend/app/api/main.py @@ -2,31 +2,18 @@ from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from starlette.requests import Request - +from app.core.logging import setup_logging, logger from dotenv import load_dotenv import os -import logging -import sys - -from app.routes import user_router, folder_router, auth_router, content_router, setting_router +from app.routes import user_router, folder_router, auth_router, content_router, setting_router, tag_router # Load environment variables from a .env file load_dotenv() - app = FastAPI() -logger = logging.getLogger(__name__) - -# StreamHandler -logging.basicConfig( - level=logging.DEBUG, - format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", - handlers=[logging.StreamHandler(sys.stdout)] -) -logger.info('API is starting up') - +setup_logging() # Update CORS origins origins = ["*"] @@ -45,6 +32,7 @@ app.include_router(auth_router) app.include_router(content_router) app.include_router(setting_router) +app.include_router(tag_router) @app.middleware("http") diff --git a/backend/app/core/logging.py b/backend/app/core/logging.py new file mode 100644 index 0000000..bbf0115 --- /dev/null +++ b/backend/app/core/logging.py @@ -0,0 +1,9 @@ +import logging + +def setup_logging(): + logging.basicConfig( + level=logging.DEBUG, + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s" +) + +logger = logging.getLogger(__name__) diff --git a/backend/app/core/settings.py b/backend/app/core/settings.py new file mode 100644 index 0000000..ea6f2ac --- /dev/null +++ b/backend/app/core/settings.py @@ -0,0 +1,36 @@ +from pydantic_settings import BaseSettings +from pydantic_settings import SettingsConfigDict +from functools import lru_cache + +class Settings(BaseSettings): + model_config = SettingsConfigDict( + env_file=".env", + env_file_encoding="utf-8", + case_sensitive=False, + extra="ignore" + ) + + DATABASE_URL: str + + SECRET_KEY: str + + AWS_ACCESS_KEY: str + AWS_SECRET_KEY: str + BUCKET_NAME: str + + GOOGLE_CLIENT_ID: str + GOOGLE_CLIENT_SECRET: str + GOOGLE_REDIRECT_URI: str + + OPENAI_API_KEY: str + OPENROUTER_API_KEY: str + + ACTIVEMQ_URL: str + ACTIVEMQ_QUEUE: str + ACTIVEMQ_USER: str + ACTIVEMQ_PASS: str + + +@lru_cache() +def get_settings() -> Settings: + return Settings() diff --git a/backend/app/data_models/content.py b/backend/app/data_models/content.py index 0a6bb2d..22ae89f 100644 --- a/backend/app/data_models/content.py +++ b/backend/app/data_models/content.py @@ -17,6 +17,7 @@ class Content(Base): title = Column(String, nullable=True) source = Column(String, nullable=True) first_saved_at = Column(TIMESTAMP(timezone=True), default=func.now()) + html_content_url = Column(String, nullable=True) content_ai = relationship("ContentAI", backref="content", uselist=False) @@ -24,4 +25,5 @@ class Content(Base): "Category", secondary=ContentCategory, back_populates="contents" - ) \ No newline at end of file + ) + diff --git a/backend/app/data_models/content_item.py b/backend/app/data_models/content_item.py index d28aceb..0ad228e 100644 --- a/backend/app/data_models/content_item.py +++ b/backend/app/data_models/content_item.py @@ -3,6 +3,8 @@ from sqlalchemy.sql import func from sqlalchemy.orm import relationship from app.db.database import Base +from app.data_models.content_tag import ContentTag + class ContentItem(Base): __tablename__ = "content_item" @@ -14,6 +16,12 @@ class ContentItem(Base): content = relationship("Content", backref="content_items") read = Column(Boolean, nullable=False, server_default=text('false')) + tags = relationship( + 'Tag', + secondary=ContentTag, + back_populates="contents" + ) + # class ContentItem(Base): diff --git a/backend/app/data_models/content_tag.py b/backend/app/data_models/content_tag.py new file mode 100644 index 0000000..c111c49 --- /dev/null +++ b/backend/app/data_models/content_tag.py @@ -0,0 +1,22 @@ + +from sqlalchemy import Column, ForeignKey, Table, ForeignKeyConstraint + +from sqlalchemy.dialects.postgresql import UUID + + +from app.db.database import Base + + + +ContentTag = Table( + "content_tag", + Base.metadata, + Column("content_id", UUID(as_uuid=True), primary_key=True), + Column("user_id", UUID(as_uuid=True), primary_key=True), + Column("tag_id", UUID(as_uuid=True), ForeignKey("tag.tag_id"), primary_key=True), + + ForeignKeyConstraint( + ["content_id", "user_id"], + ["content_item.content_id", "content_item.user_id"] + ) +) \ No newline at end of file diff --git a/backend/app/data_models/folder.py b/backend/app/data_models/folder.py index 25de86a..bee4abc 100644 --- a/backend/app/data_models/folder.py +++ b/backend/app/data_models/folder.py @@ -1,9 +1,16 @@ -from sqlalchemy import Column, String, TIMESTAMP, ForeignKey +from sqlalchemy import Column, String, TIMESTAMP, ForeignKey, Boolean from sqlalchemy.dialects.postgresql import UUID from app.db.database import Base from pydantic import BaseModel, EmailStr from datetime import datetime import uuid +from pgvector.sqlalchemy import Vector + + +from sqlalchemy.orm import Mapped, mapped_column + +from sqlalchemy.dialects.postgresql import ARRAY + class Folder(Base): @@ -13,5 +20,12 @@ class Folder(Base): user_id = Column(UUID(as_uuid=True), ForeignKey("users.id", ondelete="CASCADE"), nullable=False) parent_id = Column(UUID(as_uuid=True), ForeignKey("folder.folder_id", ondelete="CASCADE"), nullable=False) folder_name = Column(String, nullable=False) + bucketing_mode : Mapped[bool] = mapped_column(Boolean, nullable=False, default=False, server_default="false") + keywords : Mapped[list[str]] = mapped_column(ARRAY(String)) + url_patterns : Mapped[list[str]] = mapped_column(ARRAY(String)) + description : Mapped[str] = mapped_column(String) + folder_embedding = Column(Vector(1536), nullable=True) #1536 for the gpt model param (small model) created_at = Column(TIMESTAMP, server_default="NOW()") + + diff --git a/backend/app/data_models/tag.py b/backend/app/data_models/tag.py new file mode 100644 index 0000000..5cf53d6 --- /dev/null +++ b/backend/app/data_models/tag.py @@ -0,0 +1,28 @@ +from sqlalchemy import Column, String, TIMESTAMP, ForeignKey +from sqlalchemy.orm import relationship, Mapped +from sqlalchemy.dialects.postgresql import UUID +from sqlalchemy.sql import func + +from app.db.database import Base +import uuid +from app.data_models.content_category import ContentCategory +from app.data_models.content_tag import ContentTag +from app.data_models.category import Category + + + +class Tag(Base): + __tablename__ = "tag" + + tag_id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + tag_name = Column(String, nullable=False) + user_id = Column(UUID(as_uuid=True), ForeignKey("users.id"), nullable=False) + first_created_at = Column(TIMESTAMP, server_default="NOW()") + + owner: Mapped["User"] = relationship("User", back_populates="user_tags") + + contents = relationship( + "ContentItem", + secondary=ContentTag, + back_populates="tags" + ) \ No newline at end of file diff --git a/backend/app/data_models/user.py b/backend/app/data_models/user.py index d653bab..44bdf96 100644 --- a/backend/app/data_models/user.py +++ b/backend/app/data_models/user.py @@ -1,22 +1,27 @@ from sqlalchemy import Column, String, TIMESTAMP from sqlalchemy.dialects.postgresql import UUID +from sqlalchemy.orm import relationship, Mapped +from pgvector.sqlalchemy import Vector + from app.db.database import Base -from pydantic import BaseModel, EmailStr -from datetime import datetime import uuid - class User(Base): __tablename__ = "users" - id = Column(UUID(as_uuid=True), primary_key=True, default=uuid) + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) email = Column(String, unique=True, nullable=False) created_at = Column(TIMESTAMP, server_default="NOW()") username = Column(String, nullable=False) password = Column(String, nullable=False) google_id = Column(String, nullable=True) profile_path = Column(String, default='') + user_embedding = Column(Vector(1536), nullable=True) + last_embedding_update = Column(TIMESTAMP, server_default="NOW()") + # Updated relationship: Point directly to Tag + # back_populates should match the attribute name in your Tag model (e.g., 'owner') + user_tags: Mapped[list["Tag"]] = relationship("Tag", back_populates="owner", cascade="all, delete-orphan") # class UserCreate(BaseModel): diff --git a/backend/app/db/database.py b/backend/app/db/database.py index bfa36be..652229d 100644 --- a/backend/app/db/database.py +++ b/backend/app/db/database.py @@ -1,26 +1,25 @@ -import os + from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker, declarative_base -from dotenv import load_dotenv - -load_dotenv() +from app.core.settings import get_settings -DATABASE_URL = os.getenv("DATABASE_URL") - -if DATABASE_URL.startswith('postgres://'): - DATABASE_URL = DATABASE_URL.replace('postgres://', 'postgresql://', 1) -print("DB URL: ", DATABASE_URL) +settings = get_settings() try: - engine = create_engine(DATABASE_URL, connect_args={ + engine = create_engine( + settings.DATABASE_URL, connect_args={ "options": "-c timezone=UTC" }) print("Connected") except Exception as e: print("Connection falied: ", e) -# managing transactions and DB state -SessionLocal = sessionmaker(bind=engine) + +SessionLocal = sessionmaker( + autocommit=False, + autoflush=False, + bind=engine +) #Initialize the base for all datamodels Base = declarative_base() diff --git a/backend/app/db/init_db.py b/backend/app/db/init_db.py index dc34f22..2098ec6 100644 --- a/backend/app/db/init_db.py +++ b/backend/app/db/init_db.py @@ -1,5 +1,5 @@ from app.db.database import engine, Base -from app.data_models import content, content_ai, content_item, user, folder, folder_item +from app.data_models import tag, user_tag, content, content_ai, content_item, user, folder, folder_item # Base.metadata.create_all(bind=engine) # print("All tables created") \ No newline at end of file diff --git a/backend/app/dependencies.py b/backend/app/dependencies.py index b582669..d782bb9 100644 --- a/backend/app/dependencies.py +++ b/backend/app/dependencies.py @@ -19,11 +19,13 @@ from pathlib import Path +from app.core.settings import get_settings dotenv_path = Path(__file__).resolve().parent.parent / "app"/ "api" / ".env" print("Loading .env file from:", dotenv_path) load_dotenv(dotenv_path) -SECRET_KEY = os.getenv('SECRET_KEY') +settings = get_settings() +SECRET_KEY = settings.SECRET_KEY print("Secret key from .env within dependencies file:", SECRET_KEY) if isinstance(SECRET_KEY, str): diff --git a/backend/app/embeddings/embedding_manager.py b/backend/app/embeddings/embedding_manager.py index 69421d2..d0930ae 100644 --- a/backend/app/embeddings/embedding_manager.py +++ b/backend/app/embeddings/embedding_manager.py @@ -1,5 +1,3 @@ -import logging - from uuid import UUID from sqlalchemy.orm import Session from sqlalchemy import exists @@ -22,10 +20,12 @@ from app.embeddings.semantic_cache import SemanticCache from collections import defaultdict +import logging -load_dotenv() logger = logging.getLogger(__name__) +load_dotenv() + class ContentEmbeddingManager: ''' Manages: @@ -160,7 +160,20 @@ def query_similar_content(self, query, user_id:UUID, start_date=None,end_date=No # .filter(ContentItem.user_id == user_id) # ) - TOP_K_FETCH = 50 + + # cosine_dist = Folder.folder_embedding.cosine_distance(metadataVector) + # similarity = (1 - cosine_dist).label("similarity") + + # results = ( + # self.db.query(Folder, similarity) + # .filter(Folder.user_id == user_id) + # .filter(Folder.bucketing_mode == True) + # .order_by(cosine_dist) # Nearest distance first + # .limit(5) + # .all() + # ) + + TOP_K_FETCH = 6 query = ( self.db.query( diff --git a/backend/app/exceptions/content_exceptions.py b/backend/app/exceptions/content_exceptions.py new file mode 100644 index 0000000..b186fa5 --- /dev/null +++ b/backend/app/exceptions/content_exceptions.py @@ -0,0 +1,22 @@ + + +class EmbeddingManagerNotFound(Exception): + pass + + +class NoMatchedContent(Exception): + pass + + +class ContentItemNotFound(Exception): + def __init__(self, content_id: str): + super().__init__(f"Content item with content id {content_id} not found") + + +class NotesNotFound(Exception): + def __init__(self, content_id: str): + super().__init__(f"Notes for bookmark {content_id} not found") + + +class ContentNotFound(Exception): + pass \ No newline at end of file diff --git a/backend/app/exceptions/folder.py b/backend/app/exceptions/folder.py new file mode 100644 index 0000000..058130a --- /dev/null +++ b/backend/app/exceptions/folder.py @@ -0,0 +1,16 @@ +class FolderNotFound(Exception): + """Raised when a folder with a given ID cannot be found for a user.""" + pass + + +class FolderNotFound(Exception): + pass + +class DuplicateFolder(Exception): + pass + +class FolderEmbeddingError(Exception): + pass + +class FolderItemNotFound(Exception): + pass \ No newline at end of file diff --git a/backend/app/exceptions/tag_exceptions.py b/backend/app/exceptions/tag_exceptions.py new file mode 100644 index 0000000..cc16179 --- /dev/null +++ b/backend/app/exceptions/tag_exceptions.py @@ -0,0 +1,14 @@ + + +class TagsNotFound(Exception): + pass + + +class TagAlreadyExists(Exception): + pass + +class TagNotFound(Exception): + pass + +class UserTagRelationNotFound(Exception): + pass \ No newline at end of file diff --git a/backend/app/functions/AWS_s3.py b/backend/app/functions/AWS_s3.py index 507eb25..2f3cad3 100644 --- a/backend/app/functions/AWS_s3.py +++ b/backend/app/functions/AWS_s3.py @@ -1,25 +1,22 @@ -from dotenv import load_dotenv +from app.core.settings import get_settings +from urllib.parse import urlparse import os import boto3 -load_dotenv() - -BUCKET_NAME = os.environ.get('BUCKET_NAME') - -from urllib.parse import urlparse +settings = get_settings() +BUCKET_NAME = settings.BUCKET_NAME s3 = boto3.client( "s3", - region_name="us-east-1", # change this to your S3 region - aws_access_key_id=os.environ.get("AWS_ACCESS_KEY"), - aws_secret_access_key=os.environ.get("AWS_SECRET_KEY"), + region_name="us-east-1", + aws_access_key_id=settings.AWS_ACCESS_KEY, + aws_secret_access_key=settings.AWS_SECRET_KEY, ) def extract_s3_key(s3_url: str) -> str: parsed = urlparse(s3_url) - print("parsed values: ", parsed) # parsed.path is like '/pfps/58b59edcb9034a9db9a488185f56d5af_pixil-frame-0.png' return parsed.path.lstrip('/') # Remove leading slash @@ -33,10 +30,5 @@ def get_presigned_url(profile_url: str) -> str: "Key": extract_s3_key(profile_url) }, ExpiresIn=3600 # seconds = 1 hour - - - ) - - print("pre signed url: ", presigned_url) - + ) return presigned_url \ No newline at end of file diff --git a/backend/app/routes/__init__.py b/backend/app/routes/__init__.py index 417d010..8bd8291 100644 --- a/backend/app/routes/__init__.py +++ b/backend/app/routes/__init__.py @@ -3,6 +3,7 @@ from app.routes.auth import router as auth_router from app.routes.content import router as content_router from app.routes.settings import router as setting_router +from app.routes.tags import router as tag_router __all__ =[ @@ -10,5 +11,6 @@ folder_router, auth_router, content_router, - setting_router + setting_router, + tag_router, ] \ No newline at end of file diff --git a/backend/app/routes/auth.py b/backend/app/routes/auth.py index 6f94c44..d1d6d9f 100644 --- a/backend/app/routes/auth.py +++ b/backend/app/routes/auth.py @@ -1,35 +1,41 @@ -from fastapi import APIRouter, Depends +from fastapi import APIRouter, Depends, HTTPException, status from fastapi.responses import RedirectResponse, JSONResponse from app.db.database import get_db from app.data_models.user import User from app.functions.AWS_s3 import get_presigned_url +from app.core.settings import get_settings from app.utils.hashing import create_access_token from sqlalchemy.orm import Session from urllib.parse import urlencode - +import logging import httpx +import os +logger = logging.getLogger(__name__) -import os +settings = get_settings() -BUCKET_NAME = os.environ.get('BUCKET_NAME') -GOOGLE_REDIRECT_URI = os.environ.get('GOOGLE_REDIRECT_URI') -GOOGLE_CLIENT_ID = os.getenv("GOOGLE_CLIENT_ID") -GOOGLE_CLIENT_SECRET = os.getenv("GOOGLE_CLIENT_SECRET") +BUCKET_NAME = settings.BUCKET_NAME +GOOGLE_REDIRECT_URI = settings.GOOGLE_REDIRECT_URI +GOOGLE_CLIENT_ID = settings.GOOGLE_CLIENT_ID +GOOGLE_CLIENT_SECRET = settings.GOOGLE_CLIENT_SECRET router = APIRouter( prefix="/auth" ) -@router.get("/google") + +#implement CSRF when you get the chance +@router.get("/google", status_code=status.HTTP_307_TEMPORARY_REDIRECT) def handle_google_session(): try: - - print("google redirect uri ", GOOGLE_REDIRECT_URI ) - print("google client id: ", GOOGLE_CLIENT_ID) - print("google client secret: ", GOOGLE_CLIENT_SECRET) + if not GOOGLE_CLIENT_ID or not GOOGLE_REDIRECT_URI: + raise HTTPException( + status_code = status.HTTP_500_INTERNAL_SERVER_ERROR, + detail = 'google client id or redirect url was not found', + ) params = { "client_id": GOOGLE_CLIENT_ID, "redirect_uri": GOOGLE_REDIRECT_URI, @@ -42,8 +48,11 @@ def handle_google_session(): return RedirectResponse(google_auth_url) except Exception as e: - print("error occured in the backend: ", e) - return + logger.error(f"OAuth Initiation Error: {e}") + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="Authentication service temporarily unavailable" + ) diff --git a/backend/app/routes/content.py b/backend/app/routes/content.py index d76cb7d..5664fd5 100644 --- a/backend/app/routes/content.py +++ b/backend/app/routes/content.py @@ -1,134 +1,152 @@ -from fastapi import APIRouter, Depends, HTTPException, Query +# 1. Standard Library Imports +import logging +from uuid import UUID + +# 2. Third-Party Imports (FastAPI, SQLAlchemy) +from fastapi import APIRouter, Depends, HTTPException, Query, status +from sqlalchemy.orm import Session + + +#AWS imports +import boto3 +from app.functions.AWS_s3 import extract_s3_key, get_presigned_url + + +# 3. Database & Models (Internal Data Structure) from app.db.database import get_db +from app.data_models.user import User from app.data_models.content import Content from app.data_models.content_item import ContentItem -from app.data_models.content_ai import ContentAI -from app.data_models.folder_item import folder_item -from app.data_models.folder import Folder -from app.schemas.content import ContentCreate, ContentSavedByUrl, ContentWithSummary, UserSavedContent, DBContent, TabRemover, NoteContentUpdate, UserSavedContentResponse, CategoryOut -from app.preprocessing.content_preprocessor import ContentPreprocessor -from app.preprocessing.query_preprocessor import QueryPreprocessor -from app.embeddings.embedding_manager import ContentEmbeddingManager -from app.deps.services import get_embedding_manager -from app.ai.categorizer import Categorizer -from app.data_models.user import User -from datetime import datetime, timezone -from uuid import uuid4 -import logging -from sqlalchemy.orm import joinedload -from dateutil.parser import isoparse +from app.core.settings import get_settings + + +# 4. Schemas (Pydantic / Request-Response shapes) +from app.schemas.content import ( + ContentCreate, + ContentSavedByUrl, + ContentWithSummary, + TabRemover, + NoteContentUpdate, + UserSavedContentResponse, + BookmarkImportRequest +) +# 5. Utilities & Security from app.utils.hashing import get_current_user_id from app.utils.user import get_current_user from app.utils.url import ensure_safe_url -from sqlalchemy.orm import Session -from uuid import UUID -from sqlalchemy import desc, select -import requests -import json +# 6. Service Layer (Business Logic) +from app.services.content_services import ( + search_content, + get_total_unread_count, + get_unread_content_service, + get_content_service, + update_note_service, + tab_content, + untabContent, + delete_content, + get_recent_saved_content, + import_browser_bookmarks_service, + _enqueue_new_content, + get_discover_content_service +) -from email.utils import quote +# 7. Exceptions +from app.exceptions.content_exceptions import ( + EmbeddingManagerNotFound, + NoMatchedContent, + NotesNotFound, + ContentItemNotFound, + ContentNotFound +) -import os -from dotenv import load_dotenv +logger = logging.getLogger(__name__) router = APIRouter( - # prefix="/content" + prefix="/content", + tags=["content"], ) -logger = logging.getLogger(__name__) +settings = get_settings() +settings.BUCKET_NAME = settings.BUCKET_NAME +s3 = boto3.client( + "s3", + region_name="us-east-1", + aws_access_key_id=settings.AWS_ACCESS_KEY, + aws_secret_access_key=settings.AWS_SECRET_KEY, +) -@router.get("/content/search", response_model=UserSavedContentResponse) -def search(query: str, user: User = Depends(get_current_user), db: Session = Depends(get_db)): - manager = get_embedding_manager() - manager.db = db - parsed_query = QueryPreprocessor().preprocess_query(query) - results = manager.query_similar_content( - query=parsed_query, - user_id=user.id - ) +# class UserSavedContentResponse(BaseModel): +# bookmarks: list[UserSavedContent] +# categories: Optional[list[CategoryOut] ] = [] +# next_cursor: Optional[str] = '' +# has_next: Optional[bool] = False - bookmark_data = [] - for content_ai, content in results: - bookmark_data.append( - UserSavedContent( - content_id=content_ai.content_id, - title=content.title, - url=content.url, - source=content.source, - first_saved_at=content.first_saved_at, - ai_summary=content_ai.ai_summary, - notes="", - tags=[] - ) +@router.get("/search", response_model=UserSavedContentResponse, status_code=status.HTTP_200_OK) +def search(query: str, user: User = Depends(get_current_user), db: Session = Depends(get_db)): + + try: + response_json = search_content(db=db, query=query, user=user ) + return response_json + + except EmbeddingManagerNotFound: + logging.error("Embedding manager not found ") + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="AI search engine is currently offline or broken" ) + + except NoMatchedContent: + raise HTTPException( + status_code=status.HTTP_204_NO_CONTENT, + detail="No Matched content found for this search query" + ) + except Exception as e: + logger.error(f"Search for query {query} failed. Error is as follows: {e}", exc_info=True) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Search is currently unavailable, please try again" + ) + - logger.info(f"Data for search: {bookmark_data}") - return { - "bookmarks": bookmark_data, - "categories": [], # or `None`, depending on how you define Optional - "has_next" : False - } +@router.get("/rediscover", status_code=200) +def get_discover_content(user_id: UUID = Depends(get_current_user_id), db: Session = Depends(get_db)): + try: + return get_discover_content_service(user_id=user_id, db=db) + except Exception as e: + logging.error(f"An error occured trying to fetch the users content: {e}") -def push_to_activemq(message: str): - ACTIVEMQ_URL=os.getenv('ACTIVEMQ_URL') - ACTIVEMQ_QUEUE= os.getenv('ACTIVEMQ_QUEUE') - ACTIVEMQ_USER= os.getenv('ACTIVEMQ_USER') - ACTIVEMQ_PASS= os.getenv('ACTIVEMQ_PASS') +@router.get("/{content_id}/archive") +def get_content_from_html(content_id: str, user_id: UUID = Depends(get_current_user_id), db: Session = Depends(get_db)): try: - url = f"{ACTIVEMQ_URL}/api/message/{quote(ACTIVEMQ_QUEUE)}?type=queue" - headers = {'Content-Type': 'text/plain'} + item : Content= db.query(Content).filter(Content.content_id == content_id).first() - response = requests.post(url, data=message, headers=headers, auth=(ACTIVEMQ_USER, ACTIVEMQ_PASS)) + #html_content_url - logging.debug(f"Response from ActiveMQ: {response.status_code} - {response.text}") - return response.status_code == 200 - - except requests.exceptions.RequestException as e: - logging.error(f"Error pushing to ActiveMQ: {e}") - return False - - -def _enqueue_new_content( - *, - url: str, - title: str | None, - source: str, - html: str | None, - user_id: UUID, - notes: str | None, - folder_id: str | UUID | None, -) -> None: - utc_time = datetime.now(timezone.utc) - payload = { - "content_payload": { - "url": url, - "title": title, - "source": source, - "first_saved_at": utc_time.isoformat(), - }, - "raw_html": html, - "user_id": str(user_id), - "notes": notes, - "folder_id": str(folder_id) if folder_id else None, - } - message = json.dumps(payload) - result = push_to_activemq(message=message) - if not result: - raise HTTPException(status_code=503, detail="Failed to push to ActiveMQ") - - -@router.post("/content/save") + presigned_url = get_presigned_url(str(item.html_content_url)) + + return {"url": presigned_url, 'success': True} + + + + + + + except Exception as e: + logging.error(f"Failed to get presigned url for the html contnt; {e}") + + +@router.post("/save") def save_content(content: ContentCreate, user: User = Depends(get_current_user), db: Session = Depends(get_db)): try: _enqueue_new_content( @@ -138,121 +156,35 @@ def save_content(content: ContentCreate, user: User = Depends(get_current_user), html=content.html, user_id=user.id, notes=content.notes, + tags=content.tags, folder_id=content.folder_id, ) - return {"status": "Success", 'message': 'Bookmark details sent to message queue'} - - push_to_activemq(message=message) - #create the new content - new_content = Content( - url=content.url, - title=content.title, - source="chrome_extension", - first_saved_at=utc_time, - ) - db.add(new_content) - db.flush() # generate content_id - - # Generate embedding only for new content - print("generating manager") - pre, sumz, emb = get_shared_services() - categorizer = Categorizer(file_url=content.url) - embedding_manager = ContentEmbeddingManager( - db, - preprocessor=pre, - summarizer=sumz, - embedder=emb, - categorizer=categorizer, - content_url=content.url, - ) - print("done generating") - raw_html = content.html - - try: - content_ai = embedding_manager.process_content(new_content, raw_html) - db.commit() - except Exception as e: - db.rollback() - print(f"Embedding generation failed: {e}") - # Prevent downstream foreign key error - return {"status": "unsuccessful", "error": "Failed to generate summary"} - - if not content_ai: - print("Embedding generation failed or skipped.") - - else: - print("Existing content link") - new_content = existing_content - content_ai = db.query(ContentAI).filter_by(content_id=new_content.content_id).first() - - # Check if this user already saved this content - existing_item = db.query(ContentItem).filter( - ContentItem.user_id == user_id, - ContentItem.content_id == new_content.content_id - ).first() - - print("current utc timezone: ", datetime.now(timezone.utc)) - - utc_time = datetime.now(timezone.utc) - - if not existing_item: - - new_item = ContentItem( - user_id=user_id, - content_id=new_content.content_id, - saved_at=utc_time, - notes=notes, - read=False - ) - db.add(new_item) - db.commit() - - saved_item = db.query(ContentItem).order_by(ContentItem.saved_at.desc()).first() - print(f"Retrieved from DB: {saved_item.saved_at}") - - - #add to the corresponding folder if any - - if content.folder_id and content.folder_id != '' and content.folder_id != 'default': - - new_item = folder_item( - folder_item_id = uuid4(), - folder_id = content.folder_id, - user_id = user_id, - content_id = new_content.content_id, - added_at = datetime.utcnow() - - ) - - db.add(new_item) - db.commit() - db.refresh(new_item) - else: - print("no valid fodler id found so skipping this part") - - - print("Successfully saved content for user.") - - return {"status": "Success"} + return {"status": "Success", 'message': 'Bookmark details sent to message queue'} except Exception as e: logger.error(f"Error occurred in saving the bookmark: {str(e)}", exc_info=True) return {'status': "unsuccessful", 'error': "Failed to save bookmark from chrome extension"} -@router.post("/content/save/url") +@router.post("/save/url") def save_content_by_url(content: ContentSavedByUrl, user: User = Depends(get_current_user), db: Session = Depends(get_db)): try: safe_url = ensure_safe_url(content.url) + html = '' + + title =safe_url + logger.info(f"safe url being set: {safe_url}") + _enqueue_new_content( - url=safe_url, - title=None, + url=str(safe_url) if safe_url else content.url, + title=content.url, source="web_app", html=None, user_id=user.id, notes=None, + tags=None, folder_id="default", ) return {'status': "Success", 'message': 'Bookmark details sent to message queue'} @@ -263,345 +195,162 @@ def save_content_by_url(content: ContentSavedByUrl, user: User = Depends(get_cur -@router.get("/content/unread/count") +@router.get("/unread/count", status_code=status.HTTP_200_OK) def get_unread_count(user_id: UUID = Depends(get_current_user_id), db: Session = Depends(get_db)): try: - total_count = db.query(ContentItem).filter(ContentItem.user_id == user_id, ContentItem.read == False).count() - - logger.debug(f"Total count fetched for user id {user_id} : {total_count}") - return {'status' : "succesful", 'total_count' : total_count} - + return get_total_unread_count(user_id=user_id, db=db) except Exception as e: logger.error(f"Error occured in count api router: {e}") - return {'status' : 'unsuccesfull', 'error' : str(e)} + + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Unable to get the total count at this moment" + ) -@router.get("/content/unread", response_model=UserSavedContentResponse) -def get_unread_content(cursor: str = None, user_id: UUID = Depends(get_current_user_id), db: Session = Depends(get_db)): - print("in here") - PAGE_SIZE = 18 - cursor_dt = None - if cursor: - try: - cursor_dt = isoparse(cursor) - except ValueError: - raise HTTPException(status_code=400, detail="Invalid cursor format. Use ISO8601 datetime.") - +@router.get("/unread", response_model=UserSavedContentResponse, status_code=status.HTTP_200_OK) +def get_unread_content(cursor: str = None, user_id: UUID = Depends(get_current_user_id), db: Session = Depends(get_db)): - query = ( - db.query(ContentItem, Content, ContentAI.ai_summary) - .join(Content, ContentItem.content_id == Content.content_id) - .outerjoin(ContentAI, Content.content_id == ContentAI.content_id) - .options(joinedload(Content.categories)) - .filter(ContentItem.user_id == user_id, ContentItem.read == False) - ) - if cursor_dt: - query.filter(ContentItem.saved_at < cursor_dt) + try: + return get_unread_content_service(cursor=cursor, filter_category_names=[], user_id=user_id, db=db) - query = query.order_by(desc(ContentItem.saved_at)).limit(PAGE_SIZE + 1) + #catches the previous message we're bubbling up + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + + except Exception as e: + logging.error(f"failed to get unread content for user id {user_id}: {e}", exc_info=True) + raise HTTPException( + status_code=500, + detail="Server error trying to fetch the unread content for the users unread content" + ) - results = query.all() - # Check if we have more results - has_next = len(results) > PAGE_SIZE - results = results[:PAGE_SIZE] - category_list = [] - bookmark_data = [] - - results = ( - db.query(ContentItem, Content, ContentAI.ai_summary) - .join(Content, ContentItem.content_id == Content.content_id) - .outerjoin(ContentAI, Content.content_id == ContentAI.content_id) - .options(joinedload(Content.categories)) # Eager load categories - .filter(ContentItem.user_id == user_id, ContentItem.read == False) - .order_by(desc(ContentItem.saved_at)) - .all() - ) - - bookmark_data = [] - category_list = [] - - for item, content, ai_summary in results: - tags = [CategoryOut.from_orm(cat) for cat in content.categories] - bookmark_data.append( - UserSavedContent( - content_id=content.content_id, - url=content.url, - title=content.title, - source=content.source, - ai_summary=ai_summary, - first_saved_at=item.saved_at, - notes=item.notes, - tags=tags - ) - ) - category_list.extend(tags) - - unique_categories = {cat.category_id: cat for cat in category_list}.values() - - # The new cursor = last item’s saved_at - next_cursor = bookmark_data[-1].first_saved_at.isoformat() if bookmark_data else None - - return { - "bookmarks": bookmark_data, - "categories": list(unique_categories)[:10], - "next_cursor": next_cursor, - "has_next": has_next - } - - - # for item, content, ai_summary in results: - # tags = [CategoryOut.from_orm(cat) for cat in content.categories] - # bookmark_data.append( - # UserSavedContent( - # content_id=content.content_id, - # url=content.url, - # title=content.title, - # source=content.source, - # ai_summary=ai_summary, - # first_saved_at=item.saved_at, - # notes=item.notes, - # tags=tags - # ) - # ) - # category_list.extend(tags) - - # # Deduplicate categories by category_id - # unique_categories = {cat.category_id: cat for cat in category_list}.values() - - # return { - # "bookmarks": bookmark_data, - # "categories": list(unique_categories), - # "has_next": True, - # "next_cursor": '' - # } - - - -@router.get("/content", response_model=UserSavedContentResponse) +@router.get("/", response_model=UserSavedContentResponse) def get_user_content( cursor: str = None, categories: list[str] = None, user_id: UUID = Depends(get_current_user_id), db: Session = Depends(get_db) ): - PAGE_SIZE = 10 - - if categories: - categories = set(categories) - - # Parse cursor into datetime if provided - - #note: adding in another param - filters of categories we need to fetch - cursor_dt = None - if cursor: - try: - cursor_dt = isoparse(cursor) - except ValueError: - raise HTTPException(status_code=400, detail="Invalid cursor format. Use ISO8601 datetime.") - - # Base query - query = ( - db.query(ContentItem, Content, ContentAI.ai_summary) - .join(Content, ContentItem.content_id == Content.content_id) - .outerjoin(ContentAI, Content.content_id == ContentAI.content_id) - .options(joinedload(Content.categories)) - .filter(ContentItem.user_id == user_id) - ) - - if cursor_dt: - query = query.filter(ContentItem.saved_at < cursor_dt) - - query = query.order_by(desc(ContentItem.saved_at)).limit(PAGE_SIZE + 1) - - results = query.all() - - # Check if we have more results - has_next = len(results) > PAGE_SIZE - results = results[:PAGE_SIZE] - - category_list = [] - bookmark_data = [] - - for item, content, ai_summary in results: - tags = [CategoryOut.from_orm(cat) for cat in content.categories] - - #calculate the intersection between the two - - if categories: - common_tags = set(tags).intersection(categories) - - - if len(common_tags) >= 1: - bookmark_data.append( - UserSavedContent( - content_id=content.content_id, - url=content.url, - title=content.title, - source=content.source, - ai_summary=ai_summary, - first_saved_at=item.saved_at, - notes=item.notes, - tags=tags - ) - ) - category_list.extend(tags) - - #no categories being filteres - Just add them in - else: - bookmark_data.append( - UserSavedContent( - content_id=content.content_id, - url=content.url, - title=content.title, - source=content.source, - ai_summary=ai_summary, - first_saved_at=item.saved_at, - notes=item.notes, - tags=tags - ) - ) - category_list.extend(tags) - - - - unique_categories = {cat.category_id: cat for cat in category_list}.values() + try: + return get_content_service(cursor=cursor, user_id=user_id, db=db, filter_category_names=categories) + + except ValueError as e: + db.rollback() + raise HTTPException( + status_code=400, + detail=str(e) + ) + + except Exception as e: + db.rollback() + logging.error(f"Following error happened when fetching the content for user id {user_id}: {e}") + raise HTTPException( + status_code=500, + detail="Server side error trying to fetch the content for the user" + ) - # The new cursor = last item’s saved_at - next_cursor = bookmark_data[-1].first_saved_at.isoformat() if bookmark_data else None - return { - "bookmarks": bookmark_data, - "categories": list(unique_categories)[:10], - "next_cursor": next_cursor, - "has_next": has_next - } + -@router.post("/content/update/notes") +@router.post("/update/notes") def updatenote(data: NoteContentUpdate, user_id: UUID = Depends(get_current_user_id), db: Session = Depends(get_db)): - previous_note = db.query(ContentItem).filter(ContentItem.content_id == data.bookmarkID).first() - if not previous_note: - raise HTTPException(status_code=404, detail="Content item not found") + try: + return update_note_service(data=data, user_id=user_id, db=db) - previous_note.notes = data.notes - - # Commit the change - db.commit() - - return {"message": "Note updated successfully", "bookmarkID": str(data.bookmarkID)} - - - - - - + except NotesNotFound as e: + db.rollback() + logger.info("Notes for user was not found") + raise HTTPException(status_code=404, detail=str(e)) + + except Exception as e: + db.rollback() + logger.error(f"User notes failed to update: {e}") + raise HTTPException( + status_code=500, + detail="A server side error occured when trying to update the users notes" + ) -@router.post("/content/tab") +@router.post("/tab", status_code=200) def tab_user_content(content: TabRemover,user_id: UUID = Depends(get_current_user_id), db: Session = Depends(get_db)): try: - content_id = content.content_id - - query = db.query(Content).filter( - Content.content_id == content_id - ) - - DBcontent = query.one_or_none() - - if not DBContent: - raise HTTPException( - status_code=400, - detail="Content not found in the Contents table" + return tab_content(content=content, user_id=user_id, db = db) + + except ContentItemNotFound as e: + db.rollback() + raise HTTPException( + status_code=404, + detail=str(e) ) - - existing_item = db.query(ContentItem).filter( - ContentItem.user_id == user_id, - ContentItem.content_id == DBcontent.content_id - ).first() - - utc_time = datetime.now(timezone.utc) - - if not existing_item: - new_item = ContentItem( - user_id=user_id, - content_id=DBcontent.content_id, - saved_at=utc_time, - notes='' - ) - db.add(new_item) - db.commit() - - return {'success' : True} - except Exception as e: - print("error in the backend: ", e) - return {'success': False} - - - - + db.rollback() + logger.error(f"error in the backend: {e}") + raise HTTPException( + status_code=500, + detail="An error occured when trying to tab the content for the user" + ) -@router.post("/content/untab") +@router.post("/untab", status_code=200) def untab_user_content(content: TabRemover,user_id: UUID = Depends(get_current_user_id), db: Session = Depends(get_db)): - #remove based on user_id and content_id - content_id_to_delete = content.content_id + try: + return untabContent(content=content, user_id=user_id, db=db) - # Construct the query to find the specific ContentItem to delete - query = db.query(ContentItem).filter( - ContentItem.user_id == user_id, - ContentItem.content_id == content_id_to_delete - ) + except ContentItemNotFound as e: + logger.error(f"Content item could not be untabbed because it was not found: {e}") + db.rollback() + raise HTTPException( + status_code=400, + detail=str(e) + ) - deleted_row_count = query.delete(synchronize_session='fetch') + except Exception as e: + db.rollback() + logger.error(f"Error occured trying to untab for user {user_id}: {e}") - if deleted_row_count == 0: - raise HTTPException( - status_code=400, - detail="Content item not found for the specified user and content ID." + status_code=500, + detail="An error occured when trying to untab the users content. Try again in a little bit. " ) - db.commit() - - return { - "message": "Content item successfully untabbed (deleted).", - "user_id": user_id, - "content_id": content_id_to_delete, - "deleted_count": deleted_row_count - } + +@router.delete("/{content_id}", status_code=204) +def delete_content(content_id: UUID, user_id: UUID = Depends(get_current_user_id), db: Session=Depends(get_db)): -@router.delete("/content/{content_id}", status_code=204) -def delete_content(content_id: UUID, user_id: UUID, db: Session=Depends(get_db)): - content = db.query(Content).filter(Content.content_id == content_id, Content.user_id == user_id).first() - if not content: - raise HTTPException(status_code=404, detail="Content not found or not owned by user") + try: + return delete_content(content_id=content_id, user_id=user_id, db=db) - db.delete(content) - db.commit() - return + except Exception as e: + logger.error(f"Failed to delete content: {e}") + db.rollback() + HTTPException( + status_code=500, + detail="Failed to delete content. Please try again." + ) -@router.post("/user/content/{content_id}") +@router.post("/read/{content_id}") def update_read(content_id: UUID, user_id: UUID = Depends(get_current_user_id), db: Session = Depends(get_db)): content = db.query(ContentItem).filter(ContentItem.content_id == content_id, ContentItem.user_id == user_id).first() @@ -615,7 +364,7 @@ def update_read(content_id: UUID, user_id: UUID = Depends(get_current_user_id), return {"success": True} -@router.get("/content/{content_id}", response_model=ContentWithSummary) +@router.get("/{content_id}", response_model=ContentWithSummary) def get_piece_content(content_id: UUID, user_id: UUID = Query(...), db: Session = Depends(get_db)): content = db.query(Content).filter(Content.content_id == content_id, Content.user_id == user_id).first() @@ -624,38 +373,38 @@ def get_piece_content(content_id: UUID, user_id: UUID = Query(...), db: Session return content -@router.post("/content/recent", response_model=list[ContentWithSummary]) +@router.post("/recent", response_model=list[ContentWithSummary], status_code=200) def get_recent_content(user_id: UUID = Depends(get_current_user_id), db: Session = Depends(get_db)): try: - results = ( - db.query(Content, Folder, ContentItem) - .join(ContentAI, ContentAI.content_id == Content.content_id) - .outerjoin(folder_item, folder_item.content_id == Content.content_id) - .join(ContentItem, ContentItem.content_id == Content.content_id) - .outerjoin(Folder, folder_item.folder_id == Folder.folder_id) - .filter(ContentItem.user_id == user_id) - .order_by(ContentItem.saved_at.desc()) - .limit(10) - .all() + return get_recent_saved_content(user_id=user_id, db=db) + + except ContentNotFound: + logger.error(f"Couldn't find any content recenty saved for user id {user_id}") + raise HTTPException( + status_code=204, + detail="No content found for user" ) + except Exception as e: + logger.error(f"Error occured in api endpoint '/content/recent' : {e}") + return [] - response = [] - for content, folder, _ in results: - response.append(ContentWithSummary( - content_id=content.content_id, - title=content.title, - url=content.url, - source=content.source, - first_saved_at=content.first_saved_at, - ai_summary=content.content_ai.ai_summary if content.content_ai else None, - folder = folder.folder_name if folder and folder.folder_name else 'none' - )) - logger.info(f"Recent content for user id {user_id} being returned: {response}") - return response +@router.post("/import", status_code=200) +def import_browser_bookmarks(bookmark_data : BookmarkImportRequest, user_id: UUID = Depends(get_current_user_id), db: Session = Depends(get_db)): + + try: + return import_browser_bookmarks_service(bookmark_data=bookmark_data, user_id=user_id, db=db) + except Exception as e: - logger.error(f"Error occured in api endpoint '/content/recent' : {e}") - return [] + logging.error(f"Error occured when trying to sync all bookmarks: {e}") + return HTTPException( + status_code=500, + detail="Failed to save the browser data, try again" + ) + + + + diff --git a/backend/app/routes/folder.py b/backend/app/routes/folder.py index 2b37bc9..cae19cd 100644 --- a/backend/app/routes/folder.py +++ b/backend/app/routes/folder.py @@ -1,6 +1,6 @@ from typing import Dict, List, Optional -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends, HTTPException, status from sqlalchemy.orm import Session from app.dependencies import get_current_user_id from app.data_models.folder import Folder @@ -10,24 +10,24 @@ from app.data_models.content_item import ContentItem from app.data_models.content_ai import ContentAI -from app.db.database import get_db -from app.schemas.folder import FolderDetails, FolderItem +from app.services.folder import update_folder_metadata, create_user_folder, addItemToFolder, remove_contents_from_folder +from app.db.database import get_db +from app.schemas.folder import FolderDetails, FolderItem, FolderMetadata, RemoveContentPayload +from app.exceptions.folder import FolderNotFound, FolderItemNotFound from app.utils.hashing import get_current_user_id from datetime import datetime from uuid import uuid4 from uuid import UUID -import logging +import logging +logger = logging.getLogger(__name__) router = APIRouter( tags=['folder'], ) -logger = logging.getLogger(__name__) - - @router.get("/folder") def get_folders( user_id: UUID=Depends(get_current_user_id), db:Session = Depends(get_db)): @@ -73,6 +73,61 @@ def get_folder_path(folder_id: UUID, user_id: UUID=Depends(get_current_user_id), return {"path": path} +@router.get('/folder/metadata/{folder_id}') +def get_folder_metadata(folder_id : str, db: Session = Depends(get_db)): + + try: + folder : Folder = db.query(Folder).filter(Folder.folder_id ==folder_id ).first() + if not folder: + return {'success' : False, 'message' : 'No folder found for this folder id '} + + payload = { + "name" : folder.folder_name if not None else '', + "keywords" : folder.keywords if not None else [], + "urlPatterns" : folder.url_patterns if not None else [], + "description" : folder.description if not None else '', + "smartBucketingEnabled" : folder.bucketing_mode if not None else False + + } + + return {'success' : True, 'message': 'Data fetched successfully', 'data' : payload } + + + except Exception as e: + logger.error(f"Error occured trying to fet folder metadata: {e} ") + + +from sqlalchemy.exc import SQLAlchemyError + + +@router.put("/folder/{folder_id}") +def process_folder_metadata( + folder_id: UUID, + metadata: FolderMetadata, + user_id: UUID = Depends(get_current_user_id), + db: Session = Depends(get_db), +): + try: + logger.info(f"Folder metdata being processed: {metadata}") + folder = update_folder_metadata( + db=db, + folder_id=folder_id, + user_id=user_id, + metadata=metadata, + ) + return {"success": True, "folder_id": folder.folder_id} + + except FolderNotFound: + raise HTTPException(status_code=404, detail="Folder not found") + + + + + + + + + @router.get("/folder/{folder_id}") @@ -113,31 +168,18 @@ def get_folder_items( @router.post("/users/folder/add") def add_to_folder(itemDetails: FolderItem, user_id: UUID=Depends(get_current_user_id), db: Session = Depends(get_db)): - #make sure item isn't already in the DB - - present = db.query(folder_item).filter(itemDetails.contentId == folder_item.content_id, itemDetails.folderId == folder_item.folder_id, user_id == folder_item.user_id).first() - - if present: - raise HTTPException(status_code=400, detail="Item already in the folder") - try: - new_item = folder_item( - folder_item_id = uuid4(), - folder_id = itemDetails.folderId, - user_id = user_id, - content_id = itemDetails.contentId, - added_at = datetime.utcnow() - ) - - db.add(new_item) - db.commit() - db.refresh(new_item) - - return {'success' : True, 'message' : 'Bookmark added to folder'} + res = addItemToFolder(db=db, user_id=user_id, folder_id=itemDetails.folderId, itemDetails=itemDetails) + if res.get('success', False): + logging.info(f'Succesfully inserted item to folder') + else: + logging.warning(f"Something went wrong, Check out the logic ") + return res except Exception as e: + logging.error(f"Error occured trying to add the item to the folder: {e}") return {'success': False, 'message' : str(e)} @@ -170,48 +212,64 @@ def get_users_folders( user_id: UUID=Depends(get_current_user_id), db: Session = #Edit the api endpoint protocol later @router.post("/user/folder/create") def create_folder(folderDetails: FolderDetails, user_id: UUID=Depends(get_current_user_id), db: Session = Depends(get_db)): - print("folder details: ", folderDetails) - - #check for existing folders with the same name under the same user_id - duplicates = db.query(Folder).filter( - Folder.user_id == user_id, - Folder.folder_name == folderDetails.foldername - ).all() - print(f"Found {len(duplicates)} folders with same name and user.") - - if duplicates: - print("folder already exists: ", duplicates) - raise HTTPException(status_code=400, detail="Folder already exists") + + try: + folder_creation_details = create_user_folder(db=db, folderDetails=folderDetails, user_id=user_id) + + return folder_creation_details + + + except Exception as e: + logging.error(f"failed to create user folder: {e}") + raise HTTPException(status_code=500, detail=f"Failed to create folder: {e}") + + # print("folder details: ", folderDetails) + + # #check for existing folders with the same name under the same user_id + # duplicates = db.query(Folder).filter( + # Folder.user_id == user_id, + # Folder.folder_name == folderDetails.foldername + # ).all() + # print(f"Found {len(duplicates)} folders with same name and user.") + + # if duplicates: + # print("folder already exists: ", duplicates) + # raise HTTPException(status_code=400, detail="Folder already exists") - folder_uuid = uuid4() + # folder_uuid = uuid4() - try: - new_folder = Folder( - folder_id = folder_uuid, - user_id= user_id, - parent_id = folderDetails.folderId if folderDetails.folderId else folder_uuid, - folder_name = folderDetails.foldername, - created_at=datetime.utcnow() - ) - db.add(new_folder) - db.commit() - db.refresh(new_folder) + # try: + # new_folder = Folder( + # folder_id = folder_uuid, + # user_id= user_id, + # parent_id = folderDetails.folderId if folderDetails.folderId else folder_uuid, + # folder_name = folderDetails.foldername, + # bucketing_mode = False, + # keywords = [], + # url_patterns = [], + # description='', + # created_at=datetime.utcnow() + # ) + # db.add(new_folder) + # db.commit() + # db.refresh(new_folder) - folder_details = { - 'folder_id' : new_folder.folder_id, - 'created_at' : new_folder.created_at, - 'folder_name' : new_folder.folder_name, - 'parent_id' : new_folder.parent_id, - 'file_count' : 0 + # folder_details = { + # 'folder_id' : new_folder.folder_id, + # 'created_at' : new_folder.created_at, + # 'folder_name' : new_folder.folder_name, + # 'parent_id' : new_folder.parent_id, + # 'file_count' : 0 - } + # } - return {'success' : True, 'message' : 'folder created successfully', 'folder_details': folder_details} + # return {'success' : True, 'message' : 'folder created successfully', 'folder_details': folder_details} - except Exception as e: - return {'success' : False, 'message' : str(e)} + # except Exception as e: + # logging.error(f"Failed to create folder for user: {e}") + # return {'success' : False, 'message' : str(e)} @router.delete("/folder/{folder_id}") @@ -246,3 +304,41 @@ def deleteFolder(folder_id: UUID, user_id: UUID=Depends(get_current_user_id), db return {'success' : False, 'message' : str(e)} + + + +@router.delete("/folder/{folder_id}/content") +def delete_content_from_folder( + folder_id: UUID, + payload: RemoveContentPayload, + user_id: UUID = Depends(get_current_user_id), + db: Session = Depends(get_db), +): + if not payload.content_ids: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="content_ids list cannot be empty", + ) + + try: + return remove_contents_from_folder( + db=db, + folder_id=folder_id, + user_id=user_id, + content_ids=payload.content_ids, + ) + + except FolderNotFound: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Folder not found", + ) + + except FolderItemNotFound: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="No matching content found in folder", + ) + + + diff --git a/backend/app/routes/tags.py b/backend/app/routes/tags.py new file mode 100644 index 0000000..6bad165 --- /dev/null +++ b/backend/app/routes/tags.py @@ -0,0 +1,103 @@ +from typing import Dict, List, Optional + +from fastapi import APIRouter, Depends, HTTPException, status +from sqlalchemy.orm import Session +from app.dependencies import get_current_user_id +from app.data_models.folder import Folder +from app.data_models.folder_item import folder_item +from sqlalchemy import desc, func, delete +from app.data_models.content import Content +from app.data_models.content_item import ContentItem +from app.data_models.content_ai import ContentAI + +from app.services.tag_services import create_tag_service, get_user_tags_service, delete_user_tags_service, update_tag_service, fetch_tag_bookmark_service +from app.schemas.tag import TagCreationData, TagDeleteData, TagUpdateData +from app.db.database import get_db +from app.exceptions.tag_exceptions import TagAlreadyExists, UserTagRelationNotFound, TagNotFound + +from app.utils.hashing import get_current_user_id +from datetime import datetime +from uuid import uuid4 +from uuid import UUID +import logging + + + +router = APIRouter() + +logger = logging.getLogger(__name__) + + +@router.post('/tag', status_code=200) +def create_tag(tag_data: TagCreationData , user_id: UUID=Depends(get_current_user_id), db:Session = Depends(get_db)): + + try: + print("tag data; ", tag_data) + return create_tag_service(tag_data=tag_data, db=db, user_id=user_id) + + + except TagAlreadyExists: + logging.warning('User already has tag saved') + raise HTTPException( + status_code=400 + ) + + except Exception as e: + logging.error(f"Failed to create a new tag for the user: {e}") + raise HTTPException( + status_code = 500, + detail="Failed to create the tag for the user" + ) + + + +@router.get('/tag', status_code=200) +def get_tags(user_id: UUID = Depends(get_current_user_id), db : Session = Depends(get_db)): + try: + print("getting the tags") + return get_user_tags_service(user_id=user_id, db=db) + + except Exception as e: + logging.error(f"Failed to fetch user {user_id}'s tags: {e}") + + +@router.delete('/tag', status_code=200) +def delete_tags(tags : TagDeleteData, user_id: UUID = Depends(get_current_user_id), db : Session = Depends(get_db)): + try: + return delete_user_tags_service(tag_ids=tags.tag_ids, user_id=user_id, db=db) + + except Exception as e: + logging.error(f"Failed to delete tags for user, {e}") + + + +@router.put("/tag/{tag_id}") +def update_tag(tag_id: str, updateTagBody : TagUpdateData, user_id: UUID = Depends(get_current_user_id), db: Session = Depends(get_db)): + try: + + return update_tag_service(user_id=user_id, tag_id=tag_id, updated_tag_name=updateTagBody.tag_name, db= db) + + except UserTagRelationNotFound: + logging.error("UserTag relation does not exists between user and tag") + raise HTTPException( + status_code=400 + ) + + except TagNotFound: + logging.error("Tag not found in table") + raise HTTPException( + status_code=400 + ) + + except Exception as e: + logging.error(f"Failed to update the users tags, {e}") + + +@router.get("/tag/bookmark/{tag_id}") +def get_tag_bookmarks(tag_id: str, user_id: UUID = Depends(get_current_user_id), db : Session = Depends(get_db)): + try: + return fetch_tag_bookmark_service(tag_id=tag_id, user_id=user_id, db=db) + + + except Exception as e: + logging.error(f"Failed to fetch bookmarks connected to the id:, {e} ") \ No newline at end of file diff --git a/backend/app/routes/users.py b/backend/app/routes/users.py index edfac90..2a72334 100644 --- a/backend/app/routes/users.py +++ b/backend/app/routes/users.py @@ -6,36 +6,31 @@ from app.schemas.user import UserCreate, UserSignIn, UserGoogleCreate, UserGoogleSignIn from app.utils.hashing import get_password_hash, verify_password, create_access_token, get_current_user_id from app.data_models.user import User +from app.core.settings import get_settings from app.functions.AWS_s3 import extract_s3_key, get_presigned_url from datetime import datetime from uuid import uuid4 from uuid import UUID import boto3 import logging - - - import os +from app.embeddings.embedding_manager import ContentEmbeddingManager - -router = APIRouter( - prefix="/user", - tags=['user'], - dependencies=[] -) +logger = logging.getLogger(__name__) -logger = logging.getLogger(__name__) +router = APIRouter(prefix="/user", tags=['user']) -BUCKET_NAME = os.environ.get('BUCKET_NAME') +settings = get_settings() +settings.BUCKET_NAME = settings.BUCKET_NAME s3 = boto3.client( "s3", region_name="us-east-1", - aws_access_key_id=os.environ.get("AWS_ACCESS_KEY"), - aws_secret_access_key=os.environ.get("AWS_SECRET_KEY"), + aws_access_key_id=settings.AWS_ACCESS_KEY, + aws_secret_access_key=settings.AWS_SECRET_KEY, ) @router.post("/signup") @@ -52,6 +47,9 @@ def create_user(user: UserCreate, db: Session = Depends(get_db)): if existing_user: logger.error(f"Username {user.username} already exists") raise HTTPException(status_code=400, detail="Username already registered") + + + embedder = ContentEmbeddingManager(db=db) #Insert user into the database new_user = User( @@ -60,6 +58,8 @@ def create_user(user: UserCreate, db: Session = Depends(get_db)): email=user.email, password=user.password, created_at=datetime.utcnow() if not user.created_at else user.created_at, + user_embedding=embedder._generate_embedding(text=f'initial embedding for user {user.username}'), + last_embedding_update = datetime.utcnow() if not user.created_at else user.created_at ) db.add(new_user) db.commit() @@ -112,7 +112,7 @@ def get_profile_picture(profile_url: str = Query(...), user_id: UUID = Depends(g presigned_url = s3.generate_presigned_url( ClientMethod="get_object", Params={ - "Bucket": BUCKET_NAME, + "Bucket": settings.BUCKET_NAME, "Key": extract_s3_key(profile_url) }, ExpiresIn=3600 # seconds = 1 hour @@ -149,7 +149,7 @@ def upload_user_media(pfp: UploadFile = File(...), user_id: UUID = Depends(get_c try: s3.upload_fileobj( pfp.file, - BUCKET_NAME, + settings.BUCKET_NAME, filename, ExtraArgs={ @@ -157,7 +157,7 @@ def upload_user_media(pfp: UploadFile = File(...), user_id: UUID = Depends(get_c }, ) - image_url = f"https://{BUCKET_NAME}.s3.amazonaws.com/{filename}" + image_url = f"https://{settings.BUCKET_NAME}.s3.amazonaws.com/{filename}" presigned_url = get_presigned_url(image_url) #save to the users DB @@ -248,9 +248,9 @@ def google_login(user : UserGoogleSignIn, db : Session = Depends(get_db)): - +@router.post("/browser/login") # aliasing login from form via extension, generalizing the name to be cross-browser compatible @router.post("/chrome/login") -def chrome_login(user: UserSignIn, db: Session = Depends(get_db)): +def chrome_login(user: UserSignIn, db: Session = Depends(get_db)): try: if not user: raise HTTPException(status_code=400, detail="Invalid user data") diff --git a/backend/app/schemas/content.py b/backend/app/schemas/content.py index 022b0fe..38827a5 100644 --- a/backend/app/schemas/content.py +++ b/backend/app/schemas/content.py @@ -1,15 +1,23 @@ from pydantic import BaseModel, HttpUrl -from typing import Optional +from typing import Optional, List from uuid import UUID from datetime import datetime +from app.schemas.tag import TagOut class NoteContentUpdate(BaseModel): notes: str bookmarkID: UUID + +class ContentCreatTags(BaseModel): + tag_name: str + tag_id: str + + class ContentCreate(BaseModel): url: str title: Optional[str] + tags: Optional[list[ContentCreatTags]] notes: Optional[str] folder_id: Optional[UUID] = None html: str @@ -35,7 +43,7 @@ class Config: from_attributes = True class ContentSavedByUrl(BaseModel): - url: HttpUrl + url: str class CategoryOut(BaseModel): category_id: UUID @@ -52,7 +60,9 @@ class UserSavedContent(BaseModel): ai_summary: Optional[str] first_saved_at: datetime notes: Optional[str] - tags: Optional[list[CategoryOut]] + tags: Optional[list[TagOut]] + categories: Optional[list[CategoryOut]] + html_url: Optional[str] = '' class CategoryItem(BaseModel): category_id: str @@ -62,4 +72,28 @@ class UserSavedContentResponse(BaseModel): bookmarks: list[UserSavedContent] categories: Optional[list[CategoryOut] ] = [] next_cursor: Optional[str] = '' - has_next: Optional[bool] = False \ No newline at end of file + has_next: Optional[bool] = False + + + +class BookmarkNode(BaseModel): + id: str + title: str + parentId: Optional[str] = None + index: Optional[int] = None + url: Optional[str] = None # Only present on bookmarks + dateAdded: Optional[float] = None + dateLastUsed: Optional[float] = None + # 'children' makes the model recursive + children: Optional[List["BookmarkNode"]] = None + + # These fields appear on specific folders like the Bookmarks Bar + folderType: Optional[str] = None + dateGroupModified: Optional[float] = None + + class Config: + # This allows the model to handle the recursive 'BookmarkNode' reference + arbitrary_types_allowed = True + +class BookmarkImportRequest(BaseModel): + bookmarks: List[BookmarkNode] \ No newline at end of file diff --git a/backend/app/schemas/folder.py b/backend/app/schemas/folder.py index 56f8433..f565dbf 100644 --- a/backend/app/schemas/folder.py +++ b/backend/app/schemas/folder.py @@ -19,3 +19,20 @@ class FolderCreate(BaseModel): class FolderItem(BaseModel): folderId: str contentId: str + + + # name: string; +# keywords: string[]; +# urlPatterns: string[]; +# smartBucketingEnabled: boolean; + +class FolderMetadata(BaseModel): + name: str + smartBucketingEnabled: bool + description: Optional[str] = '' + keywords: list[str] + urlPatterns: list[str] + + +class RemoveContentPayload(BaseModel): + content_ids: list[str] \ No newline at end of file diff --git a/backend/app/schemas/tag.py b/backend/app/schemas/tag.py new file mode 100644 index 0000000..84ec65d --- /dev/null +++ b/backend/app/schemas/tag.py @@ -0,0 +1,24 @@ +from pydantic import BaseModel, ConfigDict +from uuid import UUID +from datetime import datetime + +class TagCreationData(BaseModel): + tag_name: str = '' + + +class TagDeleteData(BaseModel): + tag_ids : list[str] = [] + + +class TagUpdateData(BaseModel): + tag_name: str + + +class TagOut(BaseModel): + tag_id: UUID + tag_name: str + user_id: UUID + # Optional: include this if you want to show when the tag was made + # first_created_at: datetime + + model_config = ConfigDict(from_attributes=True) diff --git a/backend/app/services/content_services.py b/backend/app/services/content_services.py new file mode 100644 index 0000000..8292741 --- /dev/null +++ b/backend/app/services/content_services.py @@ -0,0 +1,672 @@ +from fastapi import HTTPException +from app.data_models.content import Content +from app.data_models.content_item import ContentItem +from app.data_models.content_ai import ContentAI +from app.data_models.folder_item import folder_item +from app.data_models.folder import Folder +from app.schemas.content import ContentWithSummary, UserSavedContent, TabRemover, NoteContentUpdate, CategoryOut, BookmarkImportRequest +from app.preprocessing.query_preprocessor import QueryPreprocessor +from app.deps.services import get_embedding_manager +from app.data_models.user import User +from datetime import datetime, timezone +import logging +from sqlalchemy.orm import joinedload +from dateutil.parser import isoparse +from app.core.settings import get_settings +from app.schemas.content import ContentCreatTags +from app.schemas.tag import TagOut + +from sqlalchemy.orm import Session +from uuid import UUID +from sqlalchemy import desc + +import requests +import json + +from email.utils import quote + + +from app.exceptions.content_exceptions import EmbeddingManagerNotFound, NoMatchedContent, ContentItemNotFound, NotesNotFound, ContentNotFound + +import logging + + +from dateutil.relativedelta import relativedelta + +from app.functions.AWS_s3 import extract_s3_key, get_presigned_url +logger = logging.getLogger(__name__) +settings = get_settings() + + +# class UserSavedContentResponse(BaseModel): +# bookmarks: list[UserSavedContent] +# categories: Optional[list[CategoryOut] ] = [] +# next_cursor: Optional[str] = '' +# has_next: Optional[bool] = False + +def search_content(*, db : Session, query: str,user: User): + + manager = get_embedding_manager() + + if not manager: + raise EmbeddingManagerNotFound() + manager.db = db + + #Probably the one that takes the longest to query + parsed_query = QueryPreprocessor().preprocess_query(query) + + results = manager.query_similar_content( + query=parsed_query, + user_id=user.id + ) + + bookmark_data = [] + +# class UserSavedContent(BaseModel): +# content_id: UUID +# url: str +# title: Optional[str] +# source: Optional[str] +# ai_summary: Optional[str] +# first_saved_at: datetime +# notes: Optional[str] +# tags: Optional[list[TagOut]] +# categories: Optional[list[CategoryOut]] + + for content_ai, content in results: + bookmark_data.append( + UserSavedContent( + content_id=content_ai.content_id, + title=content.title, + url=content.url, + source=content.source, + first_saved_at=content.first_saved_at, + ai_summary=content_ai.ai_summary, + notes="", + tags=[], + categories=[] + ) + ) + + logger.info(f"Data for search: {bookmark_data}") + + if len(bookmark_data) == 0: + raise NoMatchedContent() + return { + "bookmarks": bookmark_data, + "categories": [], # or `None`, depending on how you define Optional + "has_next" : False, + "next_cursor": '', + } + + + + + +def push_to_activemq(message: str): + ACTIVEMQ_URL=settings.ACTIVEMQ_URL + ACTIVEMQ_QUEUE= settings.ACTIVEMQ_QUEUE + ACTIVEMQ_USER= settings.ACTIVEMQ_USER + ACTIVEMQ_PASS= settings.ACTIVEMQ_PASS + + try: + url = f"{ACTIVEMQ_URL}/api/message/{quote(ACTIVEMQ_QUEUE)}?type=queue" + headers = {'Content-Type': 'text/plain'} + + response = requests.post(url, data=message, headers=headers, auth=(ACTIVEMQ_USER, ACTIVEMQ_PASS)) + + logging.debug(f"Response from ActiveMQ: {response.status_code} - {response.text}") + return response.status_code == 200 + + except requests.exceptions.RequestException as e: + logging.error(f"Error pushing to ActiveMQ: {e}") + return False + + +def _enqueue_new_content( + *, + url: str, + title: str | None, + source: str, + html: str | None, + user_id: UUID, + notes: str | None, + tags: list[ContentCreatTags ]| None, + folder_id: str | UUID | None, +) -> None: + utc_time = datetime.now(timezone.utc) + + #pase out only the content id's + tag_ids = [] + if tags: + for tag in tags: + tag_ids.append(tag.tag_id) + + payload = { + "content_payload": { + "url": url, + "title": title, + "source": source, + "first_saved_at": utc_time.isoformat(), + }, + "raw_html": html, + "user_id": str(user_id), + "notes": notes, + "folder_id": str(folder_id) if folder_id else None, + "tag_ids" : tag_ids + } + message = json.dumps(payload) + result = push_to_activemq(message=message) + + if not result: + raise HTTPException(status_code=503, detail="Failed to push to ActiveMQ") + + + + +def get_total_unread_count(user_id: str, db: Session): + total_count = db.query(ContentItem).filter(ContentItem.user_id == user_id, ContentItem.read == False).count() + + logger.debug(f"Total count fetched for user id {user_id} : {total_count}") + return {'status' : "succesful", 'total_count' : total_count} + + +def get_content_service( + cursor: str, + filter_category_names: list[str], + user_id: UUID, + db: Session +): + PAGE_SIZE = 9 + + cursor_dt = None + if cursor: + try: + cursor_dt = isoparse(cursor) + except (ValueError, TypeError): + raise ValueError("Datetime for cursor is wrongly formatted") + + + query = ( + db.query(ContentItem, Content, ContentAI.ai_summary) + .join(Content, ContentItem.content_id == Content.content_id) + .outerjoin(ContentAI, Content.content_id == ContentAI.content_id) + .options( + joinedload(Content.categories), + joinedload(ContentItem.tags) + ) + .filter(ContentItem.user_id == user_id) + ) + + if cursor_dt: + query = query.filter(ContentItem.saved_at < cursor_dt) + + results = query.order_by(desc(ContentItem.saved_at)).limit(PAGE_SIZE + 1).all() + + has_next = len(results) > PAGE_SIZE + paged_results = results[:PAGE_SIZE] + + bookmarks = [] + global_categories_seen = {} + + filter_set = set(filter_category_names) if filter_category_names else None + + for item, content, ai_summary in paged_results: + item_categories = [CategoryOut.from_orm(cat) for cat in content.categories] + item_user_tags = [TagOut.from_orm(t) for t in item.tags] + + if filter_set: + category_names = {cat.category_name for cat in item_categories} + if not category_names.intersection(filter_set): + continue + + for cat in item_categories: + global_categories_seen[cat.category_id] = cat + + +# class UserSavedContent(BaseModel): +# content_id: UUID +# url: str +# title: Optional[str] +# source: Optional[str] +# ai_summary: Optional[str] +# first_saved_at: datetime +# notes: Optional[str] +# tags: Optional[list[CategoryOut]] +# html_url: Optional[str] = '' + bookmarks.append( + UserSavedContent( + content_id=content.content_id, + url=content.url, + title=content.title, + source=content.source, + ai_summary=ai_summary, + first_saved_at=item.saved_at, + notes=item.notes, + tags=item_user_tags, + categories=item_categories, + html_url=get_presigned_url(content.html_content_url) if content.html_content_url else '' + + ) + ) + + # 4. Prepare Response + next_cursor = bookmarks[-1].first_saved_at.isoformat() if bookmarks else None + + return { + "bookmarks": bookmarks, + "categories": list(global_categories_seen.values())[:10], + "next_cursor": next_cursor, + "has_next": has_next + } + + + +def get_unread_content_service( + cursor: str, + filter_category_names: list[str], + user_id: UUID, + db: Session +): + PAGE_SIZE = 9 + + cursor_dt = None + if cursor: + try: + cursor_dt = isoparse(cursor) + except (ValueError, TypeError): + raise ValueError("Datetime for cursor is wrongly formatted") + + + query = ( + db.query(ContentItem, Content, ContentAI.ai_summary) + .join(Content, ContentItem.content_id == Content.content_id) + .outerjoin(ContentAI, Content.content_id == ContentAI.content_id) + .options( + joinedload(Content.categories), + joinedload(ContentItem.tags) + ) + .filter(ContentItem.user_id == user_id, ContentItem.read == False) + ) + + if cursor_dt: + query = query.filter(ContentItem.saved_at < cursor_dt) + + results = query.order_by(desc(ContentItem.saved_at)).limit(PAGE_SIZE + 1).all() + + has_next = len(results) > PAGE_SIZE + paged_results = results[:PAGE_SIZE] + + bookmarks = [] + global_categories_seen = {} + + filter_set = set(filter_category_names) if filter_category_names else None + + for item, content, ai_summary in paged_results: + item_categories = [CategoryOut.from_orm(cat) for cat in content.categories] + item_user_tags = [TagOut.from_orm(t) for t in item.tags] + + if filter_set: + category_names = {cat.category_name for cat in item_categories} + if not category_names.intersection(filter_set): + continue + + for cat in item_categories: + global_categories_seen[cat.category_id] = cat + + +# class UserSavedContent(BaseModel): +# content_id: UUID +# url: str +# title: Optional[str] +# source: Optional[str] +# ai_summary: Optional[str] +# first_saved_at: datetime +# notes: Optional[str] +# tags: Optional[list[CategoryOut]] + bookmarks.append( + UserSavedContent( + content_id=content.content_id, + url=content.url, + title=content.title, + source=content.source, + ai_summary=ai_summary, + first_saved_at=item.saved_at, + notes=item.notes, + tags=item_user_tags, + categories=item_categories + + ) + ) + + # 4. Prepare Response + next_cursor = bookmarks[-1].first_saved_at.isoformat() if bookmarks else None + + return { + "bookmarks": bookmarks, + "categories": list(global_categories_seen.values())[:10], + "next_cursor": next_cursor, + "has_next": has_next + } + +# def get_unread_content_service(cursor: str, user_id: UUID, db: Session): +# PAGE_SIZE = 9 +# cursor_dt = None + +# if cursor: +# try: +# cursor_dt = isoparse(cursor) +# except (ValueError, TypeError): +# raise ValueError("Invalid cursor format") + +# # 1. Build the base query +# query = ( +# db.query(ContentItem, Content, ContentAI.ai_summary) +# .join(Content, ContentItem.content_id == Content.content_id) +# .outerjoin(ContentAI, Content.content_id == ContentAI.content_id) +# .options(joinedload(Content.categories)) +# .filter(ContentItem.user_id == user_id, ContentItem.read == False) +# ) + +# # 2. IMPORTANT: Re-assign the filtered query +# if cursor_dt: +# query = query.filter(ContentItem.saved_at < cursor_dt) + +# # 3. Order and Limit (fetch PAGE_SIZE + 1 to check for has_next) +# results = query.order_by(desc(ContentItem.saved_at)).limit(PAGE_SIZE + 1).all() + +# has_next = len(results) > PAGE_SIZE +# paged_results = results[:PAGE_SIZE] + +# bookmark_data = [] +# category_map = {} + +# for item, content, ai_summary in paged_results: +# # Map categories and build unique list simultaneously +# tags = [] +# for cat in content.categories: +# tag = CategoryOut.from_orm(cat) +# tags.append(tag) +# category_map[tag.category_id] = tag + +# bookmark_data.append( +# UserSavedContent( +# content_id=content.content_id, +# url=content.url, +# title=content.title, +# source=content.source, +# ai_summary=ai_summary, +# first_saved_at=item.saved_at, +# notes=item.notes, +# tags=tags +# ) +# ) + +# # Calculate next_cursor based on the last item in our paged results +# next_cursor = bookmark_data[-1].first_saved_at.isoformat() if bookmark_data else None + +# return { +# "bookmarks": bookmark_data, +# "categories": list(category_map.values())[:10], +# "next_cursor": next_cursor, +# "has_next": has_next +# } + + + +def update_note_service(*, data: NoteContentUpdate, user_id: UUID , db: Session): + + previous_note = db.query(ContentItem).filter(ContentItem.content_id == data.bookmarkID, ContentItem.user_id==user_id).first() + + if not previous_note: + raise NotesNotFound(content_id=data.bookmarkID) + # raise HTTPException(status_code=404, detail="Content item not found") + + previous_note.notes = data.notes + db.commit() + + return {"message": "Note updated successfully", "bookmarkID": str(data.bookmarkID)} + + + + +def tab_content(*, content: TabRemover,user_id: UUID , db: Session ): + content_id = content.content_id + + query = db.query(Content).filter( + Content.content_id == content_id + ) + + DBcontent : Content = query.one_or_none() + + + if not DBcontent: + raise ContentItemNotFound(content_id=content_id) + + # raise HTTPException( + # status_code=400, + # detail="Content not found in the Contents table" + # ) + + + + + existing_item : ContentItem = db.query(ContentItem).filter( + ContentItem.user_id == user_id, + ContentItem.content_id == DBcontent.content_id + ).first() + + utc_time = datetime.now(timezone.utc) + + if not existing_item: + new_item = ContentItem( + user_id=user_id, + content_id=DBcontent.content_id, + saved_at=utc_time, + notes='' + ) + db.add(new_item) + db.commit() + + + return {'success' : True} + + + +def untabContent(*, content: TabRemover,user_id: UUID , db : Session): + + content_id_to_delete = content.content_id + + # Construct the query to find the specific ContentItem to delete + query = db.query(ContentItem).filter( + ContentItem.user_id == user_id, + ContentItem.content_id == content_id_to_delete + ) + + + deleted_row_count = query.delete(synchronize_session='fetch') + + if deleted_row_count == 0: + raise ContentItemNotFound(content_id=content_id_to_delete) + + + + db.commit() + + return { + "message": "Content item successfully untabbed (deleted).", + "user_id": user_id, + "content_id": content_id_to_delete, + "deleted_count": deleted_row_count + } + + +def delete_content(content_id: UUID, user_id: UUID, db: Session): + + + content = db.query(Content).filter(Content.content_id == content_id, Content.user_id == user_id).first() + if not content: + raise HTTPException(status_code=404, detail="Content not found or not owned by user") + + + db.delete(content) + db.commit() + return { + 'status' : 'success' + } + + +def get_recent_saved_content(user_id : UUID, db : Session) -> list[ContentWithSummary]: + + results = ( + db.query(Content, Folder, ContentItem) + .join(ContentAI, ContentAI.content_id == Content.content_id) + .outerjoin(folder_item, folder_item.content_id == Content.content_id) + .join(ContentItem, ContentItem.content_id == Content.content_id) + .outerjoin(Folder, folder_item.folder_id == Folder.folder_id) + .filter(ContentItem.user_id == user_id) + .order_by(ContentItem.saved_at.desc()) + .limit(10) + .all() + ) + + if not results: + raise ContentNotFound() + + + response = [] + for content, folder, _ in results: + response.append(ContentWithSummary( + content_id=content.content_id, + title=content.title, + url=content.url, + source=content.source, + first_saved_at=content.first_saved_at, + ai_summary=content.content_ai.ai_summary if content.content_ai else None, + folder = folder.folder_name if folder and folder.folder_name else 'none' + )) + + logger.info(f"Recent content for user id {user_id} being returned: {response}") + + return response + + + +def webkit_to_iso(webkit_timestamp): + if not webkit_timestamp: + return None + + if webkit_timestamp > 1e15: + # Microseconds + seconds = webkit_timestamp / 1_000_000 + else: + # Milliseconds + seconds = webkit_timestamp / 1_000 + + # Apply the offset between 1601 and 1970 + unix_time = seconds - 11644473600 + + return datetime.fromtimestamp(unix_time, tz=timezone.utc).isoformat() + + +def import_browser_bookmarks_service(bookmark_data: BookmarkImportRequest, user_id: UUID, db: Session): + bookmarks_list = [] + + def collect_bookmarks(node, folder_path="Root"): + + if node.children is not None: + new_path = f"{folder_path} > {node.title}" + for child in node.children: + collect_bookmarks(child, new_path) + + elif node.url: + if node.url.startswith('https'): + bookmarks_list.append({ + 'url': node.url, + 'title': node.title, + 'source': 'browser import', + 'first_saved_at': webkit_to_iso(node.dateAdded) + }) + + + for root_node in bookmark_data.bookmarks: + collect_bookmarks(root_node) + + print(f"Successfully collected {len(bookmarks_list)} bookmarks") + + for bookmark in bookmarks_list: + payload = { + "content_payload":bookmark, + "raw_html": '', + "user_id" : str(user_id), + "notes" : '', + 'folder_id': '', + 'tags_ids' : [] + + } + + + message = json.dumps(payload) + result = push_to_activemq(message=message) + if result: + continue + else: + logging.error('Failed to push to active mq') + + + + + # Now you can proceed to save bookmarks_list to your DB + return {'status' : 'success', 'message' : 'All bookmarks have been pushed'} + + +def get_discover_content_service(user_id: str, db: Session): + user = db.query(User).filter(User.id == user_id).first() + + if not user: + return [] + + user_embedding = user.user_embedding + + #dict object for itterating through months + discovered_content = {} + + for i in range(6): + start_date = (datetime.now() - relativedelta(months=i)).replace(day=1, hour=0, minute=0, second=0) + end_date = start_date + relativedelta(months=1) + + current_month_name = start_date.strftime('%B %Y') + + # Query for top 4 similar items in this specific month + # Using pgvector operator <=> for cosine similarity + month_items = ( + db.query(ContentItem, Content, ContentAI, ) + .join(Content, Content.content_id == ContentItem.content_id) + .join(ContentAI, Content.content_id == ContentAI.content_id) + .filter(ContentItem.user_id == user_id) + .filter(ContentItem.saved_at >= start_date) + .filter(ContentItem.saved_at < end_date) + .filter(ContentItem.read == False) + .order_by(ContentAI.embedding.cosine_distance(user_embedding)) + .limit(6) + .all() + ) + + curr_matched_items = [] + + + for content_item, content, ai in month_items: + curr_matched_items.append(ContentWithSummary( + content_id=content.content_id, + title=content.title, + url=content.url, + source=content.source, + first_saved_at=content_item.saved_at, + ai_summary=ai.ai_summary, + folder='' + )) + + discovered_content[current_month_name] = curr_matched_items + + + return discovered_content \ No newline at end of file diff --git a/backend/app/services/folder.py b/backend/app/services/folder.py new file mode 100644 index 0000000..c9cafbd --- /dev/null +++ b/backend/app/services/folder.py @@ -0,0 +1,400 @@ +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import Session + + +from fastapi import APIRouter, Depends, HTTPException + +from app.data_models.folder import Folder +from app.data_models.content_ai import ContentAI +from app.schemas.folder import FolderDetails, FolderItem, FolderMetadata +from uuid import UUID +from uuid import uuid4 +from datetime import datetime +from app.embeddings.embedding_manager import ContentEmbeddingManager +from typing import Optional +from app.data_models.folder_item import folder_item + +from app.exceptions.folder import FolderNotFound, DuplicateFolder, FolderEmbeddingError, FolderItemNotFound + +import numpy as np +import logging + +logger = logging.getLogger(__name__) + + +def update_folder_metadata( + *, + db: Session, + folder_id: UUID, + user_id: UUID, + metadata: FolderMetadata, +) -> Folder: + folder : Folder = ( + db.query(Folder) + .filter( + Folder.folder_id == folder_id, + Folder.user_id == user_id, + ) + .first() + ) + + if not folder: + raise FolderNotFound() + + #save the previous folder embedding + prev_embedding = folder.folder_embedding + + folder.folder_name = metadata.name + folder.bucketing_mode = metadata.smartBucketingEnabled + + print("current url patterns: ", metadata) + + if metadata.smartBucketingEnabled: + folder.keywords = metadata.keywords + folder.url_patterns = metadata.urlPatterns + folder.description = metadata.description + + else: + folder.bucketing_mode = False + # folder.keywords = [] + # folder.url_patterns = [] + + db.commit() + db.refresh(folder) + + new_folder_embedding = create_folder_embedding(db=db, folder=folder) + + if new_folder_embedding is not None and prev_embedding is not None: + + alpha = 0.7 + old_vec = np.array(prev_embedding) + meta_vec = np.array(new_folder_embedding) + + blended_vec = (alpha * meta_vec) + ((1 - alpha) * old_vec) + + # Re-normalize + norm = np.linalg.norm(blended_vec) + if norm > 0: + blended_vec = blended_vec / norm + + folder.folder_embedding = blended_vec.tolist() + + + else: + folder.folder_embedding = new_folder_embedding + db.commit() + return folder + + +def create_folder_embedding( + db: Session, + folder: Folder +) -> Optional[list[float]]: + try: + parts = [ + f"Folder name: {folder.folder_name}", + f"Description: {folder.description}" if folder.description else None, + f"Keywords: {', '.join(folder.keywords)}" if folder.keywords else None, + f"URL patterns: {', '.join(folder.url_patterns)}" if folder.url_patterns else None, + ] + + embedding_text = "\n".join(p for p in parts if p) + + embedding_mgr = ContentEmbeddingManager(db=db) + return embedding_mgr._generate_embedding(embedding_text) + + except Exception: + logging.exception("Failed to create folder embedding") + return None + + + +def create_user_folder( + *, + db: Session, + folderDetails: FolderDetails, + user_id: UUID +): + exists = db.query(Folder).filter( + Folder.user_id == user_id, + Folder.folder_name == folderDetails.foldername + ).first() + + if exists: + raise HTTPException(status_code=400, detail="Folder already exists") + + folder_uuid = uuid4() + + new_folder = Folder( + folder_id=folder_uuid, + user_id=user_id, + parent_id=folderDetails.folderId or folder_uuid, + folder_name=folderDetails.foldername, + bucketing_mode=False, + keywords=[], + url_patterns=[], + description='', + folder_embedding=None, + created_at=datetime.utcnow() + ) + + try: + db.add(new_folder) + db.commit() + db.refresh(new_folder) + + # Best-effort embedding + embedding = create_folder_embedding(db, new_folder) + if embedding: + new_folder.folder_embedding = embedding + db.commit() + + return { + 'success': True, + 'message': 'folder created successfully', + 'folder_details': { + 'folder_id': new_folder.folder_id, + 'created_at': new_folder.created_at, + 'folder_name': new_folder.folder_name, + 'parent_id': new_folder.parent_id, + 'file_count': 0 + } + } + + except Exception: + db.rollback() + logging.exception("Failed to create folder") + raise HTTPException(status_code=500, detail="Failed to create folder") + + +def addItemToFolder(*, db: Session,user_id: UUID, folder_id: str, itemDetails: FolderItem) : + present = db.query(folder_item).filter(itemDetails.contentId == folder_item.content_id, itemDetails.folderId == folder_item.folder_id, user_id == folder_item.user_id).first() + + if present: + raise HTTPException(status_code=400, detail="Item already in the folder") + + try: + new_item = folder_item( + folder_item_id = uuid4(), + folder_id = itemDetails.folderId, + user_id = user_id, + content_id = itemDetails.contentId, + added_at = datetime.utcnow() + + ) + + db.add(new_item) + db.commit() + db.refresh(new_item) + + #update the folder leanring now + if update_folder_learning(db=db, folder_id=itemDetails.folderId, content_id=itemDetails.contentId): + logging.info(f"Folder centroid updated for folder id {itemDetails.folderId}") + + else: + logging.error(f"Folder centroid failed to updated for folder id {itemDetails.folderId} ") + + return {'success' : True, 'message' : 'Bookmark added to folder'} + + + except Exception as e: + return {'success': False, 'message' : str(e)} + + + + +def update_folder_learning(db: Session, folder_id: str, content_id: str): + """ + Updates the folder's vector profile based on newly added content. + """ + try: + folder: Folder = db.query(Folder).filter(Folder.folder_id == folder_id).first() + content_embedding = get_content_embedding(db=db, content_id=content_id) + + # Use 'is None' to avoid NumPy ambiguity errors + if folder is None or folder.folder_embedding is None or content_embedding is None: + logging.error(f"Learning skipped: Missing data for folder {folder_id} or content {content_id}") + return False + + # Convert to numpy arrays + current_vec = np.array(folder.folder_embedding) + new_content_vec = np.array(content_embedding) + + # Ensure they are the same shape (e.g., both 1536 dimensions) + if current_vec.shape != new_content_vec.shape: + logging.error(f"Shape mismatch: Folder({current_vec.shape}) vs Content({new_content_vec.shape})") + return False + + alpha = 0.1 + updated_vec = ((1 - alpha) * current_vec) + (alpha * new_content_vec) + + # Normalization is key for Cosine Similarity + norm = np.linalg.norm(updated_vec) + if norm > 0: + updated_vec = updated_vec / norm + + # Save back to DB + folder.folder_embedding = updated_vec.tolist() + db.commit() + + logging.info(f"Folder {folder_id} successfully shifted toward content {content_id}") + return True + + except Exception as e: + db.rollback() # Always rollback on error during a learning shift + logging.error(f"Error occurred when trying to shift the folder embedding model: {e}") + return False + + + +def get_content_embedding(db: Session, content_id: str) -> list[float]: + """ + Get the embedding stored in the database for the content item with content_id + """ + try: + result = ( + db.query(ContentAI.embedding) + .filter(ContentAI.content_id == content_id) + .first() + ) + + if result is None: + raise HTTPException( + status_code=404, + detail=f"Content with content id {content_id} not found" + ) + + (embedding,) = result + return embedding + + except HTTPException: + raise + except Exception as e: + logging.exception( + f"Failed to fetch content embedding for content_id={content_id}" + ) + raise HTTPException(status_code=500, detail="Internal server error") + + + +def _update_folder_embedding( db: Session,folder: Folder) -> Optional[list[float]]: + + try: + prev_embedding = folder.folder_embedding + parts = [ + f"Folder name: {folder.folder_name}", + f"Description: {folder.description}" if folder.description else None, + f"Keywords: {', '.join(folder.keywords)}" if folder.keywords else None, + f"URL patterns: {', '.join(folder.url_patterns)}" if folder.url_patterns else None, + ] + + embedding_text = "\n".join(p for p in parts if p) + + embedding_mgr = ContentEmbeddingManager(db=db) + return embedding_mgr._generate_embedding(embedding_text) + + except Exception: + logging.exception("Failed to create folder embedding") + return None + + +def get_folder_or_404(db: Session, folder_id: UUID) -> Folder: + folder = ( + db.query(Folder) + .filter(Folder.folder_id == folder_id) + .first() + ) + if not folder: + raise FolderNotFound() + return folder + + +def _penalize_folder_learning(db: Session, folder_id: str, content_id: str): + """ + Moves the folder embedding AWAY from the content embedding. + Used when a user manually removes an item they feel was misclassified. + """ + try: + folder = db.query(Folder).filter(Folder.folder_id == folder_id).first() + content_embedding = get_content_embedding(db, content_id) + + if folder is None or folder.folder_embedding is None or content_embedding is None: + return False + + current_vec = np.array(folder.folder_embedding) + removed_vec = np.array(content_embedding) + + # PENALIZATION RATE (Beta) + # We use a smaller rate so one removal doesn't ruin the whole folder + beta = 0.15 + + # Vector Subtraction: Move current_vec away from removed_vec + # New = Current - Beta * (Removed - Current) + updated_vec = current_vec - beta * (removed_vec - current_vec) + + # Re-normalize to keep it a valid unit vector for cosine similarity + norm = np.linalg.norm(updated_vec) + if norm > 0: + updated_vec = updated_vec / norm + + # Save the "corrected" identity + folder.folder_embedding = updated_vec.tolist() + + + db.commit() + logger.info(f"Folder {folder_id} penalized. Moved away from content {content_id}.") + return True + + except Exception as e: + db.rollback() + logger.error(f"Failed to penalize folder: {e}") + return False + +def remove_contents_from_folder( + db: Session, + folder_id: UUID, + user_id: UUID, + content_ids: list[str], +): + if not content_ids: + return {"status": "success", "removed": 0} + + try: + get_folder_or_404(db, folder_id) + + deleted_count = ( + db.query(folder_item) + .filter( + folder_item.folder_id == folder_id, + folder_item.user_id == user_id, + folder_item.content_id.in_(content_ids), + ) + .delete(synchronize_session=False) + ) + + if deleted_count == 0: + raise FolderItemNotFound("No matching content found in folder") + + db.commit() + + #Penalize the learning vector + #Later in the future make a vectore status to compare content with a vector of + #contents that has been removed in the past + logging.info('Penalizing folder with the removed data') + for content_id in content_ids: + _penalize_folder_learning(db=db, folder_id=folder_id, content_id=content_id) + + return { + "status": "success", + "removed": deleted_count, + } + + except Exception as e: + db.rollback() + logging.error( + f"Failed to remove content from folder {folder_id}: {e}", + exc_info=True, + ) + raise + + diff --git a/backend/app/services/tag_services.py b/backend/app/services/tag_services.py new file mode 100644 index 0000000..c8666c3 --- /dev/null +++ b/backend/app/services/tag_services.py @@ -0,0 +1,154 @@ +from sqlalchemy.orm import Session, joinedload +from app.schemas.tag import TagCreationData +from uuid import UUID, uuid4 +from app.exceptions.tag_exceptions import TagsNotFound, TagAlreadyExists, TagNotFound +from app.schemas.content import ContentWithSummary +from app.data_models.tag import Tag +from app.data_models.content import Content +from app.data_models.content_item import ContentItem +from app.data_models.content_tag import ContentTag +from app.data_models.content_ai import ContentAI +from app.schemas.content import ContentWithSummary, UserSavedContent, TabRemover, NoteContentUpdate, CategoryOut, BookmarkImportRequest +from app.schemas.tag import TagOut + +from datetime import datetime +from sqlalchemy import delete, desc +import logging + +logger = logging.getLogger(__name__) + +def create_tag_service(user_id: UUID, tag_data: TagCreationData, db: Session): + # Check if this specific user already has a tag with this name + exists = db.query(Tag).filter( + Tag.tag_name == tag_data.tag_name, + Tag.user_id == user_id + ).first() + + if exists: + raise TagAlreadyExists() + + # Every tag is now unique to the user + new_tag = Tag( + tag_id=uuid4(), + tag_name=tag_data.tag_name, + user_id=user_id, # Ownership is now direct + first_created_at=datetime.utcnow() + ) + + db.add(new_tag) + db.commit() + db.refresh(new_tag) + + return { + 'success': True, + 'newTag': new_tag + } + +def get_user_tags_service(user_id: UUID, db: Session): + # Direct fetch from Tag table using user_id + tags = db.query(Tag).filter(Tag.user_id == user_id).all() + print("all user tags: ", tags) + + if not tags: + # Keeping your existing logic, though an empty list is often preferred over an exception + return [] + + logging.info(f"All the tags: {tags}") + + return [ + { + 'tag_name': tag.tag_name, + 'tag_id': tag.tag_id + } for tag in tags + ] + +def delete_user_tags_service(user_id: UUID, tag_ids: list[UUID], db: Session): + # We delete directly from the Tag table. + # Ensuring user_id matches prevents a user from deleting someone else's tags. + stmt = ( + delete(Tag) + .where(Tag.user_id == user_id) + .where(Tag.tag_id.in_(tag_ids)) + ) + + result = db.execute(stmt) + db.commit() + + return { + "status": "success", + "deleted_count": result.rowcount + } + +def update_tag_service(user_id: UUID, tag_id: str, updated_tag_name: str, db: Session): + # Check ownership and existence in one query + target_tag = db.query(Tag).filter( + Tag.tag_id == tag_id, + Tag.user_id == user_id + ).first() + + if not target_tag: + # This replaces the need for UserTagRelationNotFound + raise TagNotFound() + + if target_tag.tag_name == updated_tag_name: + return {'status': 'success'} + + # Check if the NEW name already exists for this user to avoid duplicates during update + name_check = db.query(Tag).filter( + Tag.tag_name == updated_tag_name, + Tag.user_id == user_id + ).first() + + if name_check: + raise TagAlreadyExists() + + target_tag.tag_name = updated_tag_name + db.commit() + + return {'status': 'success'} + + +def fetch_tag_bookmark_service(tag_id: str, user_id: str, db: Session): + try: + query = ( + db.query(ContentItem, Content, ContentAI.ai_summary) + .join(Content, ContentItem.content_id == Content.content_id) + .outerjoin(ContentAI, Content.content_id == ContentAI.content_id) + # Use .c to access columns on Table objects + .join(ContentTag, ContentItem.content_id == ContentTag.c.content_id) + .options( + joinedload(ContentItem.tags), + joinedload(Content.categories) + ) + .filter( + ContentItem.user_id == user_id, + ContentTag.c.tag_id == tag_id # Added .c here too + ) + ) + + results = query.order_by(desc(ContentItem.saved_at)).all() + + bookmarks = [] + for item, content, ai_summary in results: + item_user_tags = [TagOut.from_orm(t) for t in item.tags] + item_categories = [CategoryOut.from_orm(cat) for cat in content.categories] + + bookmarks.append( + UserSavedContent( + content_id=content.content_id, + url=content.url, + title=content.title, + source=content.source, + ai_summary=ai_summary, + first_saved_at=item.saved_at, + notes=item.notes, + tags=item_user_tags, + categories=item_categories + ) + ) + return bookmarks + + except Exception as e: + # This will now capture the specific line if it fails again + logging.error(f"Failed to fetch bookmarks connected to the id: {e}") + return [] \ No newline at end of file diff --git a/backend/app/utils/hashing.py b/backend/app/utils/hashing.py index 1ad5f32..be05628 100644 --- a/backend/app/utils/hashing.py +++ b/backend/app/utils/hashing.py @@ -6,22 +6,19 @@ from uuid import UUID from jwt import exceptions as jwt_exceptions - -import jwt - - +from app.core.settings import get_settings from pydantic import BaseModel -import os + from dotenv import load_dotenv +from pathlib import Path +import os +import jwt -from pathlib import Path -dotenv_path = Path(__file__).resolve().parent.parent / "api" / ".env" -print("Loading .env file from:", dotenv_path) -load_dotenv(dotenv_path) +settings = get_settings() -SECRET_KEY = os.getenv('SECRET_KEY') +SECRET_KEY = settings.SECRET_KEY print("Secret key from .env within hashing file:", SECRET_KEY) if isinstance(SECRET_KEY, str): diff --git a/backend/app/utils/s3.py b/backend/app/utils/s3.py new file mode 100644 index 0000000..5e0b8f5 --- /dev/null +++ b/backend/app/utils/s3.py @@ -0,0 +1,48 @@ + +# import boto3 + + + +# settings = get_settings() +# settings.BUCKET_NAME = settings.BUCKET_NAME + + +# s3 = boto3.client( +# "s3", +# region_name="us-east-1", +# aws_access_key_id=settings.AWS_ACCESS_KEY, +# aws_secret_access_key=settings.AWS_SECRET_KEY, +# ) + + +# def creat_signed_url(): + + +# try: +# presigned_url = s3.generate_presigned_url( +# ClientMethod="get_object", +# Params={ +# "Bucket": settings.BUCKET_NAME, +# "Key": extract_s3_key(profile_url) +# }, +# ExpiresIn=3600 # seconds = 1 hour + + +# ) + +# logger.info(f"Presigned url created succesfully for user profile {profile_url}") + +# #set a new cookie with this + +# token_obj = Token(user_id) + +# new_jwt = token_obj.createAccessTokenWithUserId() + +# logger.info("new presigned url successfully generated: ", new_jwt) + + + +# return {'success' : True, "presigned_url": presigned_url, "jwt" : new_jwt} + +# except Exception as e: + \ No newline at end of file diff --git a/backend/archives/djo_test.html b/backend/archives/djo_test.html new file mode 100644 index 0000000..48a1b38 --- /dev/null +++ b/backend/archives/djo_test.html @@ -0,0 +1,2121 @@ + + + + + + + +