From eb8f00ca2f394d738e5522f981a5c3c620419b62 Mon Sep 17 00:00:00 2001 From: 1024andrew <1024andrew@naver.com> Date: Wed, 6 May 2026 00:52:55 +0900 Subject: [PATCH] =?UTF-8?q?=EC=83=9D=ED=99=9C=EA=B4=80=20=EB=AF=B8?= =?UTF-8?q?=EC=A7=80=EC=A0=95=20=EC=A7=88=EB=AC=B8=EC=9D=98=20=EC=83=9D?= =?UTF-8?q?=ED=99=9C=EA=B4=80=EB=B3=84=20=EA=B2=80=EC=83=89=20=EA=B0=9C?= =?UTF-8?q?=EC=84=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/services/chat_service.py | 100 +++++++++++++++++++++++++---------- 1 file changed, 73 insertions(+), 27 deletions(-) diff --git a/app/services/chat_service.py b/app/services/chat_service.py index 660c9f2..690c296 100644 --- a/app/services/chat_service.py +++ b/app/services/chat_service.py @@ -422,13 +422,18 @@ def _answer_unspecified_dormitory_chat( raise try: + + print("should each dorm:", _should_search_each_dormitory(question)) + print("question:", question) + + if _should_search_each_dormitory(question): chunks = _search_chunks_by_each_dormitory( db=db, question=question, query_embedding=query_embedding, dormitories=settings.chat_grouped_dormitories, - top_k_per_dormitory=1, + top_k_per_dormitory=3, ) else: chunks = search_hybrid_chunks_for_dormitories( @@ -438,20 +443,18 @@ def _answer_unspecified_dormitory_chat( dormitories=settings.chat_grouped_dormitories, top_k=settings.chat_grouped_dormitory_top_k, ) - - print("===== GROUPED SEARCH DEBUG =====") - print("question:", question) + + print("===== FINAL EACH DORMITORY CHUNKS =====") print("chunks_count:", len(chunks)) for index, chunk in enumerate(chunks, start=1): print( index, chunk.get("document_id"), - chunk.get("dormitory"), - chunk.get("similarity"), chunk.get("source"), - (chunk.get("content") or "")[:300], + (chunk.get("content") or "")[:200], ) - print("================================") + print("======================================") + @@ -883,7 +886,7 @@ def _should_pre_expand_query(question: str) -> bool: "해먹", "음식해", "음식해먹", - "음식 해먹" + "음식 해먹", "전기포트", "라면포트", "에어프라이어", @@ -896,25 +899,29 @@ def _should_pre_expand_query(question: str) -> bool: return False +DORMITORY_SPECIFIC_SEARCH_TRIGGERS = [ + "휴게실", + "다리미", + "편의점", + "전자레인지", + "전자렌지", + "정수기", + "세탁실", + "수용인원", + "몇명", + "몇명수용", + "호실수", +] + + def _should_search_each_dormitory(question: str) -> bool: compact_question = question.replace(" ", "") - dormitory_specific_triggers = [ - "휴게실", - "다리미", - "편의점", - "전자레인지", - "전자렌지", - "정수기", - "세탁실", - "탕비실", - "수용인원", - "몇명", - "몇명수용", - "호실수", - ] + return any( + trigger in compact_question + for trigger in DORMITORY_SPECIFIC_SEARCH_TRIGGERS + ) - return any(trigger in compact_question for trigger in dormitory_specific_triggers) def _search_chunks_by_each_dormitory( db: Session, @@ -922,7 +929,7 @@ def _search_chunks_by_each_dormitory( question: str, query_embedding: list[float], dormitories: list[str], - top_k_per_dormitory: int = 1, + top_k_per_dormitory: int = 3, ) -> list[dict]: merged_chunks: list[dict] = [] seen_chunk_ids: set[int] = set() @@ -938,7 +945,12 @@ def _search_chunks_by_each_dormitory( keyword_weight=0.3, ) - for chunk in dormitory_chunks: + dormitory_chunks = _prefer_target_dormitory_chunks( + dormitory_chunks, + dormitory, + ) + + for chunk in dormitory_chunks[:1]: regulation_chunk_id = chunk.get("regulation_chunk_id") if regulation_chunk_id is not None: @@ -948,4 +960,38 @@ def _search_chunks_by_each_dormitory( merged_chunks.append(chunk) - return merged_chunks \ No newline at end of file + return merged_chunks + +def _prefer_target_dormitory_chunks( + chunks: list[dict], + dormitory: str, +) -> list[dict]: + def score(chunk: dict) -> tuple[int, int, float]: + text = " ".join( + [ + str(chunk.get("dormitory") or ""), + str(chunk.get("title") or ""), + str(chunk.get("source") or ""), + str(chunk.get("content") or ""), + ] + ) + + source_type = str(chunk.get("source_type") or "") + + dormitory_match_score = 1 if dormitory in text else 0 + official_score = 1 if source_type == "official" else 0 + + similarity = float( + chunk.get("similarity") + or chunk.get("vector_score") + or chunk.get("vector_similarity") + or 0.0 + ) + + return ( + dormitory_match_score, + official_score, + similarity, + ) + + return sorted(chunks, key=score, reverse=True) \ No newline at end of file