diff --git a/tests/test_document_related.py b/tests/test_document_related.py index 2167cc7..cf49cef 100644 --- a/tests/test_document_related.py +++ b/tests/test_document_related.py @@ -11,6 +11,9 @@ from welearn_database.data.models.corpus_related import Category, Corpus from welearn_database.data.models.document_related import ( ErrorDataQuality, + HistoricalQtyDocumentInQdrant, + HistoricalQtyDocumentInQdrantPerCorpus, + HistoricalQtyDocumentPerCorpus, ProcessState, WeLearnDocument, ) @@ -467,3 +470,66 @@ def test_error_data_quality(self): self.assertEqual(error_from_db.error_info, "Test Error") self.assertEqual(error_from_db.error_raiser, "welearn_database_test") self.assertEqual(error_from_db.document.id, doc_ids[0]) + + def test_historical_qty_document_per_corpus(self): + engine = create_engine("sqlite://") + s_maker = sessionmaker(engine) + handle_schema_with_sqlite(engine) + + test_session = s_maker() + Base.metadata.create_all(test_session.get_bind()) + + test_hqdpc = HistoricalQtyDocumentPerCorpus( + id=uuid.uuid4(), + source_name="corpus_test", + quantity=100, + ) + test_session.add(test_hqdpc) + test_session.commit() + + hqpdc_from_db = test_session.query(HistoricalQtyDocumentPerCorpus).first() + + self.assertEqual(hqpdc_from_db.source_name, "corpus_test") + self.assertEqual(hqpdc_from_db.quantity, 100) + + def test_historical_qty_document_in_qdrant_per_corpus(self): + engine = create_engine("sqlite://") + s_maker = sessionmaker(engine) + handle_schema_with_sqlite(engine) + + test_session = s_maker() + Base.metadata.create_all(test_session.get_bind()) + + test_hqdpc = HistoricalQtyDocumentInQdrantPerCorpus( + id=uuid.uuid4(), + source_name="corpus_test", + quantity=100, + ) + test_session.add(test_hqdpc) + test_session.commit() + + hqdiqpc_from_db = test_session.query( + HistoricalQtyDocumentInQdrantPerCorpus + ).first() + + self.assertEqual(hqdiqpc_from_db.source_name, "corpus_test") + self.assertEqual(hqdiqpc_from_db.quantity, 100) + + def test_historical_qty_document_in_qdrant(self): + engine = create_engine("sqlite://") + s_maker = sessionmaker(engine) + handle_schema_with_sqlite(engine) + + test_session = s_maker() + Base.metadata.create_all(test_session.get_bind()) + + test_hqdpc = HistoricalQtyDocumentInQdrant( + id=uuid.uuid4(), + quantity=100, + ) + test_session.add(test_hqdpc) + test_session.commit() + + hqdiq_from_db = test_session.query(HistoricalQtyDocumentInQdrant).first() + + self.assertEqual(hqdiq_from_db.quantity, 100) diff --git a/welearn_database/alembic/versions/5aad97149044_add_histo_table_for_materialized_views.py b/welearn_database/alembic/versions/5aad97149044_add_histo_table_for_materialized_views.py new file mode 100644 index 0000000..771b281 --- /dev/null +++ b/welearn_database/alembic/versions/5aad97149044_add_histo_table_for_materialized_views.py @@ -0,0 +1,71 @@ +"""add_histo_table_for_materialized_views + +Revision ID: 5aad97149044 +Revises: b84462ca3800 +Create Date: 2026-06-17 11:31:55.292978 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op +from sqlalchemy import text +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = "5aad97149044" +down_revision: Union[str, None] = "b84462ca3800" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + "historical_qty_document_per_corpus", + sa.Column( + "id", sa.Uuid(), server_default=text("gen_random_uuid()"), nullable=False + ), + sa.Column("source_name", sa.String(), nullable=False), + sa.Column("quantity", sa.Integer(), nullable=False), + sa.Column( + "created_at", postgresql.TIMESTAMP(), server_default="NOW()", nullable=False + ), + sa.PrimaryKeyConstraint("id"), + schema="document_related", + ) + + op.create_table( + "historical_qty_document_in_qdrant_per_corpus", + sa.Column( + "id", sa.Uuid(), server_default=text("gen_random_uuid()"), nullable=False + ), + sa.Column("source_name", sa.String(), nullable=False), + sa.Column("quantity", sa.Integer(), nullable=False), + sa.Column( + "created_at", postgresql.TIMESTAMP(), server_default="NOW()", nullable=False + ), + sa.PrimaryKeyConstraint("id"), + schema="document_related", + ) + + op.create_table( + "historical_qty_document_in_qdrant", + sa.Column( + "id", sa.Uuid(), server_default=text("gen_random_uuid()"), nullable=False + ), + sa.Column("quantity", sa.Integer(), nullable=False), + sa.Column( + "created_at", postgresql.TIMESTAMP(), server_default="NOW()", nullable=False + ), + sa.PrimaryKeyConstraint("id"), + schema="document_related", + ) + + +def downgrade() -> None: + op.drop_table("historical_qty_document_per_corpus", schema="document_related") + op.drop_table( + "historical_qty_document_in_qdrant_per_corpus", schema="document_related" + ) + op.drop_table("historical_qty_document_in_qdrant", schema="document_related") diff --git a/welearn_database/alembic/versions/6d4346fad6f4_view_used_feature_per_session.py b/welearn_database/alembic/versions/6d4346fad6f4_view_used_feature_per_session.py new file mode 100644 index 0000000..2a86b89 --- /dev/null +++ b/welearn_database/alembic/versions/6d4346fad6f4_view_used_feature_per_session.py @@ -0,0 +1,88 @@ +"""view_used_feature_per_session + +Revision ID: 6d4346fad6f4 +Revises: 5aad97149044 +Create Date: 2026-06-17 11:54:52.441998 + +""" + +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "6d4346fad6f4" +down_revision: Union[str, None] = "5aad97149044" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.execute(""" + CREATE OR REPLACE VIEW grafana.used_feature_per_session + AS WITH matching_features_endpoint AS ( +SELECT + endpoint_name, + feature_name +FROM + ( +VALUES + ('/api/v1/search/by_document', 'search'), + ('/api/v1/qna/chat/answer', 'chat'), + ('/api/v1/qna/chat/agent', 'chat'), + ('/api/v1/tutor/syllabus', 'syllabus'), + ('/api/v1/user/bookmarks/:document_id', 'bookmark'), + ('/api/v1/user/:user_id/bookmarks/:document_id', 'bookmark') + ) AS t(endpoint_name, feature_name) +), +session_feature_pair AS ( +SELECT + DISTINCT + er.session_id, + mfe.feature_name +FROM + user_related.endpoint_request er +CROSS JOIN matching_features_endpoint mfe +ORDER BY + er.session_id +), +actual_count AS ( +SELECT + er.session_id, + mfe.feature_name, + COUNT(1) AS cnt +FROM + user_related.endpoint_request er +INNER JOIN + user_related."session" s ON + s.id = er.session_id +INNER JOIN + matching_features_endpoint mfe ON + mfe.endpoint_name = er.endpoint_name +GROUP BY + er.session_id, + mfe.feature_name +) +SELECT + s.inferred_user_id, + sfp.session_id, + sfp.feature_name, + COALESCE(ac.cnt, 0) AS cnt, + COALESCE(ac.cnt, 0) > 0 AS is_feature_used, + s.created_at AS session_created_at +FROM + session_feature_pair sfp +LEFT JOIN actual_count ac ON + ac.feature_name = sfp.feature_name + AND ac.session_id = sfp.session_id +INNER JOIN user_related."session" s ON + s.id = sfp.session_id +ORDER BY + session_created_at + """) + + +def downgrade() -> None: + op.execute(""" + DROP VIEW IF EXISTS grafana.used_feature_per_session; + """) diff --git a/welearn_database/data/models/document_related.py b/welearn_database/data/models/document_related.py index 10657be..659c0f6 100644 --- a/welearn_database/data/models/document_related.py +++ b/welearn_database/data/models/document_related.py @@ -44,7 +44,7 @@ class WeLearnDocument(Base): """ This class represents a document in the WeLearn system. :cvar id: The unique identifier of the document. - :cvar doi: The unique DOI identifier if it exist for this document + :cvar doi: The unique DOI identifier if it exists for this document :cvar external_id: ID use by the document provider for identify it (ex: PubMed ID, ArXiv ID, OAI PMH identifier etc.) :cvar external_id_type: The type of the external ID, represented as an enumeration :cvar url: The URL of the document. @@ -478,6 +478,65 @@ class ContextDocument(Base): ) +class HistoricalQtyDocumentPerCorpus(Base): + __tablename__ = "historical_qty_document_per_corpus" + __table_args__ = {"schema": schema_name} + + id = mapped_column( + types.Uuid, + primary_key=True, + server_default=GEN_RANDOM_UUID, + nullable=False, + ) + source_name: Mapped[str] = mapped_column() + quantity: Mapped[int] + created_at: Mapped[datetime] = mapped_column( + TIMESTAMP(timezone=False), + nullable=False, + default=func.localtimestamp(), + server_default=NOW, + ) + + +class HistoricalQtyDocumentInQdrantPerCorpus(Base): + __tablename__ = "historical_qty_document_in_qdrant_per_corpus" + __table_args__ = {"schema": schema_name} + + id = mapped_column( + types.Uuid, + primary_key=True, + server_default=GEN_RANDOM_UUID, + nullable=False, + ) + source_name: Mapped[str] = mapped_column() + quantity: Mapped[int] + created_at: Mapped[datetime] = mapped_column( + TIMESTAMP(timezone=False), + nullable=False, + default=func.localtimestamp(), + server_default=NOW, + ) + + +class HistoricalQtyDocumentInQdrant(Base): + __tablename__ = "historical_qty_document_in_qdrant" + __table_args__ = {"schema": schema_name} + + id = mapped_column( + types.Uuid, + primary_key=True, + server_default=GEN_RANDOM_UUID, + nullable=False, + ) + quantity: Mapped[int] + created_at: Mapped[datetime] = mapped_column( + TIMESTAMP(timezone=False), + nullable=False, + default=func.localtimestamp(), + server_default=NOW, + ) + + # Views class QtyDocumentInQdrant(Base): __tablename__ = "qty_document_in_qdrant" diff --git a/welearn_database/data/models/grafana.py b/welearn_database/data/models/grafana.py index eb9e646..24cb033 100644 --- a/welearn_database/data/models/grafana.py +++ b/welearn_database/data/models/grafana.py @@ -136,3 +136,18 @@ class Session(Base): created_at: Mapped[datetime] = mapped_column() end_at: Mapped[datetime | None] = mapped_column(nullable=True) host: Mapped[str | None] = mapped_column(nullable=True) + + +class UsedFeaturePerSession(Base): + __tablename__ = "used_feature_per_session" + __table_args__ = {"schema": schema_name} + __read_only__ = True + + inferred_user_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), primary_key=True + ) + session_id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True) + feature_name: Mapped[str] = mapped_column(primary_key=True) + cnt: Mapped[int] = mapped_column() + is_feature_used: Mapped[bool] = mapped_column() + session_created_at: Mapped[datetime] = mapped_column()