Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions tests/test_document_related.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
from welearn_database.data.models.corpus_related import Category, Corpus
from welearn_database.data.models.document_related import (
ErrorDataQuality,
HistoricalQtyDocumentInQdrant,
HistoricalQtyDocumentInQdrantPerCorpus,
HistoricalQtyDocumentPerCorpus,
ProcessState,
WeLearnDocument,
)
Expand Down Expand Up @@ -467,3 +470,66 @@ def test_error_data_quality(self):
self.assertEqual(error_from_db.error_info, "Test Error")
self.assertEqual(error_from_db.error_raiser, "welearn_database_test")
self.assertEqual(error_from_db.document.id, doc_ids[0])

def test_historical_qty_document_per_corpus(self):
engine = create_engine("sqlite://")
s_maker = sessionmaker(engine)
handle_schema_with_sqlite(engine)

test_session = s_maker()
Base.metadata.create_all(test_session.get_bind())

test_hqdpc = HistoricalQtyDocumentPerCorpus(
id=uuid.uuid4(),
source_name="corpus_test",
quantity=100,
)
test_session.add(test_hqdpc)
test_session.commit()

hqpdc_from_db = test_session.query(HistoricalQtyDocumentPerCorpus).first()

self.assertEqual(hqpdc_from_db.source_name, "corpus_test")
self.assertEqual(hqpdc_from_db.quantity, 100)

def test_historical_qty_document_in_qdrant_per_corpus(self):
engine = create_engine("sqlite://")
s_maker = sessionmaker(engine)
handle_schema_with_sqlite(engine)

test_session = s_maker()
Base.metadata.create_all(test_session.get_bind())

test_hqdpc = HistoricalQtyDocumentInQdrantPerCorpus(
id=uuid.uuid4(),
source_name="corpus_test",
quantity=100,
)
test_session.add(test_hqdpc)
test_session.commit()

hqdiqpc_from_db = test_session.query(
HistoricalQtyDocumentInQdrantPerCorpus
).first()

self.assertEqual(hqdiqpc_from_db.source_name, "corpus_test")
self.assertEqual(hqdiqpc_from_db.quantity, 100)

def test_historical_qty_document_in_qdrant(self):
engine = create_engine("sqlite://")
s_maker = sessionmaker(engine)
handle_schema_with_sqlite(engine)

test_session = s_maker()
Base.metadata.create_all(test_session.get_bind())

test_hqdpc = HistoricalQtyDocumentInQdrant(
id=uuid.uuid4(),
quantity=100,
)
test_session.add(test_hqdpc)
test_session.commit()

hqdiq_from_db = test_session.query(HistoricalQtyDocumentInQdrant).first()

self.assertEqual(hqdiq_from_db.quantity, 100)
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""add_histo_table_for_materialized_views

Revision ID: 5aad97149044
Revises: b84462ca3800
Create Date: 2026-06-17 11:31:55.292978

"""

from typing import Sequence, Union

import sqlalchemy as sa
from alembic import op
from sqlalchemy import text
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision: str = "5aad97149044"
down_revision: Union[str, None] = "b84462ca3800"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
op.create_table(
"historical_qty_document_per_corpus",
sa.Column(
"id", sa.Uuid(), server_default=text("gen_random_uuid()"), nullable=False
),
sa.Column("source_name", sa.String(), nullable=False),
sa.Column("quantity", sa.Integer(), nullable=False),
sa.Column(
"created_at", postgresql.TIMESTAMP(), server_default="NOW()", nullable=False
),
sa.PrimaryKeyConstraint("id"),
schema="document_related",
)
Comment thread
lpi-tn marked this conversation as resolved.

op.create_table(
"historical_qty_document_in_qdrant_per_corpus",
sa.Column(
"id", sa.Uuid(), server_default=text("gen_random_uuid()"), nullable=False
),
sa.Column("source_name", sa.String(), nullable=False),
sa.Column("quantity", sa.Integer(), nullable=False),
sa.Column(
"created_at", postgresql.TIMESTAMP(), server_default="NOW()", nullable=False
),
sa.PrimaryKeyConstraint("id"),
schema="document_related",
)
Comment thread
lpi-tn marked this conversation as resolved.

op.create_table(
"historical_qty_document_in_qdrant",
sa.Column(
"id", sa.Uuid(), server_default=text("gen_random_uuid()"), nullable=False
),
sa.Column("quantity", sa.Integer(), nullable=False),
sa.Column(
"created_at", postgresql.TIMESTAMP(), server_default="NOW()", nullable=False
),
sa.PrimaryKeyConstraint("id"),
schema="document_related",
)
Comment thread
lpi-tn marked this conversation as resolved.


def downgrade() -> None:
op.drop_table("historical_qty_document_per_corpus", schema="document_related")
op.drop_table(
"historical_qty_document_in_qdrant_per_corpus", schema="document_related"
)
op.drop_table("historical_qty_document_in_qdrant", schema="document_related")
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""view_used_feature_per_session

Revision ID: 6d4346fad6f4
Revises: 5aad97149044
Create Date: 2026-06-17 11:54:52.441998

"""

from typing import Sequence, Union

from alembic import op

# revision identifiers, used by Alembic.
revision: str = "6d4346fad6f4"
down_revision: Union[str, None] = "5aad97149044"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
op.execute("""
CREATE OR REPLACE VIEW grafana.used_feature_per_session
AS WITH matching_features_endpoint AS (
SELECT
endpoint_name,
feature_name
FROM
(
VALUES
('/api/v1/search/by_document', 'search'),
('/api/v1/qna/chat/answer', 'chat'),
('/api/v1/qna/chat/agent', 'chat'),
('/api/v1/tutor/syllabus', 'syllabus'),
('/api/v1/user/bookmarks/:document_id', 'bookmark'),
('/api/v1/user/:user_id/bookmarks/:document_id', 'bookmark')
) AS t(endpoint_name, feature_name)
Comment thread
lpi-tn marked this conversation as resolved.
),
session_feature_pair AS (
SELECT
DISTINCT
er.session_id,
mfe.feature_name
FROM
user_related.endpoint_request er
CROSS JOIN matching_features_endpoint mfe
ORDER BY
er.session_id
),
Comment thread
lpi-tn marked this conversation as resolved.
actual_count AS (
SELECT
er.session_id,
mfe.feature_name,
COUNT(1) AS cnt
FROM
user_related.endpoint_request er
INNER JOIN
user_related."session" s ON
s.id = er.session_id
INNER JOIN
matching_features_endpoint mfe ON
mfe.endpoint_name = er.endpoint_name
GROUP BY
er.session_id,
mfe.feature_name
)
SELECT
s.inferred_user_id,
sfp.session_id,
sfp.feature_name,
COALESCE(ac.cnt, 0) AS cnt,
COALESCE(ac.cnt, 0) > 0 AS is_feature_used,
s.created_at AS session_created_at
FROM
session_feature_pair sfp
LEFT JOIN actual_count ac ON
ac.feature_name = sfp.feature_name
AND ac.session_id = sfp.session_id
INNER JOIN user_related."session" s ON
s.id = sfp.session_id
ORDER BY
session_created_at
""")


def downgrade() -> None:
op.execute("""
DROP VIEW IF EXISTS grafana.used_feature_per_session;
""")
Comment thread
lpi-tn marked this conversation as resolved.
61 changes: 60 additions & 1 deletion welearn_database/data/models/document_related.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class WeLearnDocument(Base):
"""
This class represents a document in the WeLearn system.
:cvar id: The unique identifier of the document.
:cvar doi: The unique DOI identifier if it exist for this document
:cvar doi: The unique DOI identifier if it exists for this document
:cvar external_id: ID use by the document provider for identify it (ex: PubMed ID, ArXiv ID, OAI PMH identifier etc.)
:cvar external_id_type: The type of the external ID, represented as an enumeration
:cvar url: The URL of the document.
Expand Down Expand Up @@ -478,6 +478,65 @@ class ContextDocument(Base):
)


class HistoricalQtyDocumentPerCorpus(Base):
__tablename__ = "historical_qty_document_per_corpus"
__table_args__ = {"schema": schema_name}

id = mapped_column(
types.Uuid,
primary_key=True,
server_default=GEN_RANDOM_UUID,
nullable=False,
)
source_name: Mapped[str] = mapped_column()
quantity: Mapped[int]
created_at: Mapped[datetime] = mapped_column(
TIMESTAMP(timezone=False),
nullable=False,
default=func.localtimestamp(),
server_default=NOW,
)


class HistoricalQtyDocumentInQdrantPerCorpus(Base):
__tablename__ = "historical_qty_document_in_qdrant_per_corpus"
__table_args__ = {"schema": schema_name}

id = mapped_column(
types.Uuid,
primary_key=True,
server_default=GEN_RANDOM_UUID,
nullable=False,
)
source_name: Mapped[str] = mapped_column()
quantity: Mapped[int]
created_at: Mapped[datetime] = mapped_column(
TIMESTAMP(timezone=False),
nullable=False,
default=func.localtimestamp(),
server_default=NOW,
)


class HistoricalQtyDocumentInQdrant(Base):
__tablename__ = "historical_qty_document_in_qdrant"
__table_args__ = {"schema": schema_name}

id = mapped_column(
types.Uuid,
primary_key=True,
server_default=GEN_RANDOM_UUID,
nullable=False,
)
quantity: Mapped[int]
created_at: Mapped[datetime] = mapped_column(
TIMESTAMP(timezone=False),
nullable=False,
default=func.localtimestamp(),
server_default=NOW,
)


# Views
class QtyDocumentInQdrant(Base):
__tablename__ = "qty_document_in_qdrant"
Expand Down
15 changes: 15 additions & 0 deletions welearn_database/data/models/grafana.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,18 @@ class Session(Base):
created_at: Mapped[datetime] = mapped_column()
end_at: Mapped[datetime | None] = mapped_column(nullable=True)
host: Mapped[str | None] = mapped_column(nullable=True)


class UsedFeaturePerSession(Base):
__tablename__ = "used_feature_per_session"
__table_args__ = {"schema": schema_name}
__read_only__ = True

inferred_user_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True
)
session_id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True)
feature_name: Mapped[str] = mapped_column(primary_key=True)
cnt: Mapped[int] = mapped_column()
is_feature_used: Mapped[bool] = mapped_column()
session_created_at: Mapped[datetime] = mapped_column()
Loading