-
Notifications
You must be signed in to change notification settings - Fork 0
fix: improve grouped dormitory facility search #36
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -419,13 +419,36 @@ def _answer_unspecified_dormitory_chat( | |
| raise | ||
|
|
||
| try: | ||
| chunks = search_hybrid_chunks_for_dormitories( | ||
| db=db, | ||
| query_text=question, | ||
| query_embedding=query_embedding, | ||
| dormitories=settings.chat_grouped_dormitories, | ||
| top_k=settings.chat_grouped_dormitory_top_k, | ||
| ) | ||
| if _should_search_each_dormitory(question): | ||
| chunks = _search_chunks_by_each_dormitory( | ||
| db=db, | ||
| question=question, | ||
| query_embedding=query_embedding, | ||
| dormitories=settings.chat_grouped_dormitories, | ||
| top_k_per_dormitory=1, | ||
| ) | ||
| else: | ||
| chunks = search_hybrid_chunks_for_dormitories( | ||
| db=db, | ||
| query_text=question, | ||
| query_embedding=query_embedding, | ||
| dormitories=settings.chat_grouped_dormitories, | ||
| top_k=settings.chat_grouped_dormitory_top_k, | ||
| ) | ||
|
|
||
| print("===== GROUPED SEARCH DEBUG =====") | ||
| print("question:", question) | ||
| print("chunks_count:", len(chunks)) | ||
| for index, chunk in enumerate(chunks, start=1): | ||
| print( | ||
| index, | ||
| chunk.get("document_id"), | ||
| chunk.get("dormitory"), | ||
| chunk.get("similarity"), | ||
| chunk.get("source"), | ||
| (chunk.get("content") or "")[:300], | ||
| ) | ||
| print("================================") | ||
|
|
||
|
|
||
|
|
||
|
|
@@ -865,3 +888,58 @@ def _should_pre_expand_query(question: str) -> bool: | |
| return True | ||
|
|
||
| return False | ||
|
|
||
|
|
||
| def _should_search_each_dormitory(question: str) -> bool: | ||
| compact_question = question.replace(" ", "") | ||
|
|
||
| dormitory_specific_triggers = [ | ||
| "휴게실", | ||
| "다리미", | ||
| "편의점", | ||
| "전자레인지", | ||
| "전자렌지", | ||
| "정수기", | ||
| "세탁실", | ||
| "탕비실", | ||
| "수용인원", | ||
| "몇명", | ||
| "몇명수용", | ||
| "호실수", | ||
| ] | ||
|
Comment on lines
+896
to
+909
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
|
||
| return any(trigger in compact_question for trigger in dormitory_specific_triggers) | ||
|
|
||
| def _search_chunks_by_each_dormitory( | ||
| db: Session, | ||
| *, | ||
| question: str, | ||
| query_embedding: list[float], | ||
| dormitories: list[str], | ||
| top_k_per_dormitory: int = 1, | ||
| ) -> list[dict]: | ||
| merged_chunks: list[dict] = [] | ||
| seen_chunk_ids: set[int] = set() | ||
|
|
||
| for dormitory in dormitories: | ||
| dormitory_chunks = search_hybrid_chunks( | ||
| db=db, | ||
| query_text=question, | ||
| query_embedding=query_embedding, | ||
| dormitory=dormitory, | ||
| top_k=top_k_per_dormitory, | ||
| candidate_k=20, | ||
| keyword_weight=0.3, | ||
| ) | ||
|
Comment on lines
+925
to
+933
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
dormitory_chunks = search_hybrid_chunks(
db=db,
query_text=question,
query_embedding=query_embedding,
dormitory=dormitory,
top_k=top_k_per_dormitory,
) |
||
|
|
||
| for chunk in dormitory_chunks: | ||
| regulation_chunk_id = chunk.get("regulation_chunk_id") | ||
|
|
||
| if regulation_chunk_id is not None: | ||
| if regulation_chunk_id in seen_chunk_ids: | ||
| continue | ||
| seen_chunk_ids.add(regulation_chunk_id) | ||
|
|
||
| merged_chunks.append(chunk) | ||
|
|
||
| return merged_chunks | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
디버깅을 위해 추가된
print문들이 코드에 남아 있습니다. 운영 환경의 로그를 오염시킬 수 있으므로, 이를 제거하거나logging모듈을 사용하여 적절한 로그 레벨(예:DEBUG)로 기록하는 것을 권장합니다.