Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions backend/alembic/versions/875afd8e0b2e_add_summary_to_document.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Add summary to document

Revision ID: 875afd8e0b2e
Revises: 6e2d1d2b7c4a
Create Date: 2026-03-28 16:00:35.361232

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision: str = '875afd8e0b2e'
down_revision: Union[str, Sequence[str], None] = '6e2d1d2b7c4a'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
"""Upgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('documents', sa.Column('summary', sa.Text(), nullable=True))
# ### end Alembic commands ###


def downgrade() -> None:
"""Downgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('documents', 'summary')
# ### end Alembic commands ###
36 changes: 35 additions & 1 deletion backend/app/api/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@
from app.utils.utils import get_page_count
from app.core.deps import get_current_user
from app.repositories.document_repository import DocumentRepository
from app.schemas.document import DocumentOut
from app.schemas.document import DocumentOut, DocumentSummaryOut
from app.utils.utils import process_document_task
from app.services.document_service import DocumentService


router = APIRouter(prefix="/documents", tags=["document"])
Expand Down Expand Up @@ -88,3 +89,36 @@ def delete_document(
if DocumentRepository.delete_document(db, document_id):
return {"message": "Document and all related data deleted"}

@router.post("/{document_id}/summary", response_model=DocumentSummaryOut)
def generate_document_summary_endpoint(
document_id: UUID,
db: Session = Depends(get_db),
user: User = Depends(get_current_user)
):
document = DocumentRepository.get_document_by_id(db, document_id)
if not document or document.user_id != user.id:
raise HTTPException(status_code=404, detail="Document not found")

if document.summary:
return {"summary": document.summary, "message": "Summary already created"}

try:
summary = DocumentService.generate_document_summary(db, document_id)
return {"summary": summary, "message": "Summary generated successfully"}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))

@router.get("/{document_id}/summary", response_model=DocumentSummaryOut)
def get_document_summary_endpoint(
document_id: UUID,
db: Session = Depends(get_db),
user: User = Depends(get_current_user)
):
document = DocumentRepository.get_document_by_id(db, document_id)
if not document or document.user_id != user.id:
raise HTTPException(status_code=404, detail="Document not found")

if not document.summary:
raise HTTPException(status_code=404, detail="Summary not generated yet")

return {"summary": document.summary, "message": "Summary retrieved from storage"}
4 changes: 3 additions & 1 deletion backend/app/models/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import enum

from sqlalchemy import Column, ForeignKey, Index, Integer, String, DateTime, Enum, func
from sqlalchemy import Column, ForeignKey, Index, Integer, String, DateTime, Enum, func, Text
from sqlalchemy.orm import relationship
from sqlalchemy.dialects.postgresql import UUID

Expand Down Expand Up @@ -44,6 +44,8 @@ class Document(Base):
nullable=False
)

summary = Column(Text, nullable=True)

user = relationship("User", back_populates="documents")
chunks = relationship("Chunk", back_populates="document", cascade="all, delete")
ai_interactions = relationship("AIInteraction", back_populates="document", cascade="all, delete-orphan")
Expand Down
16 changes: 16 additions & 0 deletions backend/app/repositories/document_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from uuid import UUID

from app.models.document import Document
from app.models.chunk import Chunk


class DocumentRepository:
Expand Down Expand Up @@ -54,3 +55,18 @@ def delete_document(db: Session, document_id: UUID) -> bool:
db.delete(document)
db.commit()
return True

@staticmethod
def update_document_summary(db: Session, document_id: UUID, summary: str) -> None:
document = db.query(Document).filter(Document.id == document_id).first()

if not document:
raise ValueError(f"Document with id {document_id} not found")

document.summary = summary
db.commit()
db.refresh(document)

@staticmethod
def get_document_chunks(db: Session, document_id: UUID) -> list[Chunk]:
return db.query(Chunk).filter(Chunk.document_id == document_id).order_by(Chunk.chunk_index.asc()).all()
5 changes: 5 additions & 0 deletions backend/app/schemas/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ class DocumentOut(BaseModel):
filename: str
pages: int
status: str
summary: str | None = None
created_at: datetime

model_config = ConfigDict(from_attributes=True)

class DocumentSummaryOut(BaseModel):
summary: str
message: str | None = None
29 changes: 29 additions & 0 deletions backend/app/services/document_service.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import fitz # PyMuPDF
from pptx import Presentation
from io import BytesIO
from sqlalchemy.orm import Session
from uuid import UUID
from app.repositories.document_repository import DocumentRepository
from app.services.llm_service import LLMService


class DocumentService:
Expand Down Expand Up @@ -49,3 +53,28 @@ def _clean_text(text: str) -> str:
text = text.replace("\x00", "")
text = text.replace("\n\n", "\n")
return text.strip()

@staticmethod
def generate_document_summary(db: Session, document_id: UUID) -> str:
# Check if already generated
document = DocumentRepository.get_document_by_id(db, document_id)
if not document:
raise ValueError(f"Document {document_id} not found")
if document.summary:
return document.summary

chunks = DocumentRepository.get_document_chunks(db, document_id)
if not chunks:
raise ValueError(f"No text extracted for document {document_id}")

# Combine all parts
combined_text = "\n\n".join([chunk.text for chunk in chunks])

# Generate summary using LLM service
summary_markdown = LLMService.generate_summary(combined_text)

# Save to DB
DocumentRepository.update_document_summary(db, document_id, summary_markdown)

return summary_markdown

33 changes: 32 additions & 1 deletion backend/app/services/llm_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,37 @@ def generate(cls, prompt: str) -> str:

response.raise_for_status()

data = response.json()
return data.get("choices", [{}])[0].get("message", {}).get("content", "")

@classmethod
def generate_summary(cls, text: str) -> str:
system_prompt = (
"You are an expert tutor. Provide a comprehensive, structured, and interactive summary of the provided text. "
"Use Markdown formatting, bold key terms, use bullet points, and clear headings. "
"Ensure everything a user needs to know to ace an exam on this topic is included. Be concise to save tokens but highly informative."
)

# Limiting input text length to avoid excessive token usage
max_chars = 120000
truncated_text = text[:max_chars] if len(text) > max_chars else text

payload = {
"model": cls.MODEL,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": f"Summarize this document comprehensively into key points:\n\n{truncated_text}"}
],
"temperature": 0.3,
"max_tokens": 1500
}

response = requests.post(
cls.API_URL,
headers=cls.HEADERS,
json=payload,
timeout=180
)

response.raise_for_status()
data = response.json()
return data.get("choices", [{}])[0].get("message", {}).get("content", "")
109 changes: 109 additions & 0 deletions frontend/src/components/study-room/DocumentSummaryView.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import React, { useEffect, useState } from 'react';
import ReactMarkdown from 'react-markdown';
import remarkGfm from 'remark-gfm';
import { FileText, Check, Copy } from 'lucide-react';
import { documentService, type DocumentSummary } from '../../services/document.service';
import { Button } from '../ui/Button';
import { LoadingView } from './LoadingView';
import { Skeleton } from '../ui/Skeleton';

interface DocumentSummaryViewProps {
documentId: string;
}

export const DocumentSummaryView: React.FC<DocumentSummaryViewProps> = ({ documentId }) => {
const [summary, setSummary] = useState<DocumentSummary | null>(null);
const [isLoading, setIsLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const [isCopied, setIsCopied] = useState(false);

useEffect(() => {
if (!documentId) return;

const fetchOrGenerateSummary = async () => {
setIsLoading(true);
setError(null);
try {
// Try getting the existing summary first
try {
const data = await documentService.getSummary(documentId);
setSummary(data);
} catch (e: unknown) {
const err = e as { response?: { status?: number }, status?: number };
// If not found (404), generate it
if (err.response?.status === 404 || err.status === 404) {
const generatedData = await documentService.generateSummary(documentId);
setSummary(generatedData);
} else {
throw e;
}
}
} catch (e: unknown) {
console.error('Failed to load summary:', e);
setError('Failed to generate or retrieve the document summary. Please try again later.');
} finally {
setIsLoading(false);
}
};

if (!summary) {
fetchOrGenerateSummary();
}
}, [documentId, summary]);

const handleCopy = () => {
if (summary) {
navigator.clipboard.writeText(summary.summary);
setIsCopied(true);
setTimeout(() => setIsCopied(false), 2000);
}
};

return (
<div className="flex flex-col h-full bg-surface">
<div className="flex-1 overflow-y-auto px-6 py-6 scrollbar-hide">
{isLoading ? (
<div className="flex flex-col items-center justify-center h-full space-y-6 animate-in fade-in duration-700">
<LoadingView text="AI is reading and summarizing your document..." />
<div className="w-full max-w-sm mt-8 space-y-3 opacity-50">
<Skeleton className="h-4 w-full rounded-md" />
<Skeleton className="h-4 w-[90%] rounded-md" />
<Skeleton className="h-4 w-[95%] rounded-md" />
<Skeleton className="h-4 w-[80%] rounded-md" />
<Skeleton className="h-4 w-full mt-6 rounded-md" />
<Skeleton className="h-4 w-[85%] rounded-md" />
<Skeleton className="h-4 w-[90%] rounded-md" />
</div>
</div>
) : error ? (
<div className="flex flex-col items-center justify-center h-full py-12 text-center animate-fade-in">
<div className="w-12 h-12 bg-destructive/10 rounded-2xl flex items-center justify-center mb-4">
<FileText className="w-6 h-6 text-destructive" />
</div>
<p className="text-sm font-medium text-foreground">{error}</p>
</div>
) : summary ? (
<div className="prose prose-sm md:prose-base prose-invert max-w-none animate-fade-in-up pb-8 selection:bg-primary/30">
<ReactMarkdown remarkPlugins={[remarkGfm]}>
{summary.summary}
</ReactMarkdown>
</div>
) : null}
</div>

{/* Footer */}
{summary && !isLoading && !error && (
<div className="px-6 py-4 border-t border-border/10 bg-surface/80 backdrop-blur-md shrink-0 flex justify-end">
<Button
variant="secondary"
size="sm"
onClick={handleCopy}
icon={isCopied ? <Check className="w-4 h-4" /> : <Copy className="w-4 h-4" />}
>
{isCopied ? 'Copied to Clipboard' : 'Copy Summary'}
</Button>
</div>
)}
</div>
);
};
Loading
Loading