diff --git a/apps/ai_analysis/repositories.py b/apps/ai_analysis/repositories.py index c1056d4..c135987 100644 --- a/apps/ai_analysis/repositories.py +++ b/apps/ai_analysis/repositories.py @@ -1,7 +1,8 @@ from django.db import transaction -from django.db.models import Avg - -from apps.ai_analysis.models import PhotoSubjectiveAttribute, PropertySubjectiveAttribute +from django.db.models import Avg + +from apps.ai_analysis.models import PhotoSubjectiveAttribute, PropertySubjectiveAttribute +from apps.search.embeddings import EmbeddingService class SubjectiveAttributeRepository: @@ -39,6 +40,8 @@ def refresh_property_aggregates(property_obj): defaults={"strength_mean": item["strength_mean"]}, ) - PropertySubjectiveAttribute.objects.filter(property=property_obj).exclude( - attribute_token__in=current_tokens - ).delete() + PropertySubjectiveAttribute.objects.filter(property=property_obj).exclude( + attribute_token__in=current_tokens + ).delete() + + EmbeddingService.refresh_property_embedding(property_obj) diff --git a/apps/properties/repositories.py b/apps/properties/repositories.py index 2ca6370..f480239 100644 --- a/apps/properties/repositories.py +++ b/apps/properties/repositories.py @@ -23,6 +23,7 @@ def create_property(*, rooms, rooms_extras, condo, validated_data): rooms=rooms, rooms_extras=rooms_extras, condo=condo, + embedding="[]", **validated_data, ) diff --git a/apps/properties/serializers/property_serializers.py b/apps/properties/serializers/property_serializers.py index 4700317..ecfe390 100644 --- a/apps/properties/serializers/property_serializers.py +++ b/apps/properties/serializers/property_serializers.py @@ -56,7 +56,7 @@ class PropertiesReadSerializer(serializers.ModelSerializer): images = PropertiesPhotosSerializer(many=True, read_only=True, source="photos") nearby_places = NearbyPlacesSerializer(many=True, read_only=True) average_rating = serializers.SerializerMethodField() - match_score = serializers.SerializerMethodField() + search_match_score = serializers.SerializerMethodField() owner_name = serializers.CharField(source="owner.name", read_only=True) subjective_attributes = PropertySubjectiveAttributeSerializer(many=True, read_only=True) @@ -65,8 +65,8 @@ def get_average_rating(self, obj): return obj.average_rating return ReviewUseCase.get_average_rating(obj) - def get_match_score(self, obj): - return getattr(obj, "match_score", None) + def get_search_match_score(self, obj): + return getattr(obj, "search_match_score", None) # se match_score não for calculado, remove ele da resposta def to_representation(self, instance): diff --git a/apps/properties/use_cases.py b/apps/properties/use_cases.py index 59cfbc1..397477b 100644 --- a/apps/properties/use_cases.py +++ b/apps/properties/use_cases.py @@ -4,6 +4,7 @@ from django.core.exceptions import ObjectDoesNotExist from apps.properties.repositories import PhotoRepository, PropertyRepository, ReviewRepository +from apps.search.embeddings import EmbeddingService class MatchScoreUseCase: @@ -277,12 +278,13 @@ def create_property(validated_data): if condo_data: condo, _ = PropertyRepository.get_or_create_condo(condo_data) - return PropertyRepository.create_property( + property_obj = PropertyRepository.create_property( rooms=rooms, rooms_extras=rooms_extras, condo=condo, validated_data=validated_data, ) + return EmbeddingService.refresh_property_embedding(property_obj) @staticmethod def update_property(instance, validated_data): @@ -304,7 +306,8 @@ def update_property(instance, validated_data): for attr, value in validated_data.items(): setattr(instance, attr, value) - return PropertyRepository.save_model(instance) + PropertyRepository.save_model(instance) + return EmbeddingService.refresh_property_embedding(instance) @staticmethod def delete_property(instance): diff --git a/apps/search/embeddings.py b/apps/search/embeddings.py new file mode 100644 index 0000000..edf4618 --- /dev/null +++ b/apps/search/embeddings.py @@ -0,0 +1,200 @@ +import hashlib +import json +import math +import re +import unicodedata +from decimal import Decimal + +from django.conf import settings + +try: + from openai import OpenAI +except ImportError: + OpenAI = None + + +LOCAL_EMBEDDING_DIMENSIONS = 256 + + +class EmbeddingService: + @staticmethod + def embed_text(text: str) -> list[float]: + text = (text or "").strip() + if not text: + return [] + + if OpenAI is not None and getattr(settings, "AI_API_KEY", None): + try: + client = OpenAI( + base_url=getattr(settings, "AI_API_BASE_URL", None), + api_key=settings.AI_API_KEY, + ) + response = client.embeddings.create( + model=getattr( + settings, + "SEARCH_EMBEDDING_MODEL", + "text-embedding-004", + ), + input=text, + ) + return [float(value) for value in response.data[0].embedding] + except Exception: + pass + + return EmbeddingService._local_embedding(text) + + @staticmethod + def serialize(embedding: list[float]) -> str: + return json.dumps(embedding or [], separators=(",", ":")) + + @staticmethod + def deserialize(raw_embedding) -> list[float]: + if not raw_embedding: + return [] + if isinstance(raw_embedding, list): + return [float(value) for value in raw_embedding] + try: + parsed = json.loads(raw_embedding) + except (TypeError, ValueError): + return [] + if not isinstance(parsed, list): + return [] + return [float(value) for value in parsed] + + @staticmethod + def cosine_similarity(left: list[float], right: list[float]) -> float: + if not left or not right or len(left) != len(right): + return 0.0 + + dot = sum(a * b for a, b in zip(left, right)) + left_norm = math.sqrt(sum(a * a for a in left)) + right_norm = math.sqrt(sum(b * b for b in right)) + if not left_norm or not right_norm: + return 0.0 + return dot / (left_norm * right_norm) + + @staticmethod + def refresh_property_embedding(property_obj): + document = PropertyEmbeddingDocumentBuilder.build(property_obj) + property_obj.embedding = EmbeddingService.serialize( + EmbeddingService.embed_text(document) + ) + property_obj.save(update_fields=["embedding"]) + return property_obj + + @staticmethod + def _local_embedding(text: str) -> list[float]: + vector = [0.0] * LOCAL_EMBEDDING_DIMENSIONS + tokens = re.findall(r"\w+", EmbeddingService._normalize_text(text)) + + for token in tokens: + digest = hashlib.sha256(token.encode("utf-8")).digest() + index = int.from_bytes(digest[:4], "big") % LOCAL_EMBEDDING_DIMENSIONS + sign = 1.0 if digest[4] % 2 == 0 else -1.0 + vector[index] += sign + + norm = math.sqrt(sum(value * value for value in vector)) + if not norm: + return [] + return [value / norm for value in vector] + + @staticmethod + def _normalize_text(text: str) -> str: + normalized = unicodedata.normalize("NFKD", text.lower()) + return "".join(char for char in normalized if not unicodedata.combining(char)) + + +class PropertyEmbeddingDocumentBuilder: + PROPERTY_TYPE_LABELS = {"A": "apartamento", "H": "casa"} + PURPOSE_LABELS = {"S": "venda", "R": "aluguel", "B": "venda ou aluguel"} + + ROOM_EXTRA_LABELS = { + "living_room": "sala de estar", + "garden": "jardim", + "kitchen": "cozinha", + "laundry_room": "lavanderia", + "pool": "piscina", + "office": "escritorio home office", + } + + CONDO_LABELS = { + "gym": "academia no condominio", + "pool": "piscina no condominio", + "court": "quadra no condominio", + "parks": "parques no condominio", + "party_spaces": "salao de festas", + "concierge": "portaria concierge", + } + + NEARBY_LABELS = { + "R": "restaurante perto", + "G": "academia perto", + "S": "escola perto", + "H": "hospital perto", + "SM": "supermercado mercado perto", + "P": "parque perto", + } + + @classmethod + def build(cls, property_obj) -> str: + parts = [ + cls.PROPERTY_TYPE_LABELS.get(property_obj.type, ""), + cls.PURPOSE_LABELS.get(property_obj.property_purpose, ""), + property_obj.description, + property_obj.address, + property_obj.neighborhood, + property_obj.city, + f"{property_obj.area} metros quadrados", + f"{cls._format_decimal(property_obj.price)} reais", + "mobiliado" if property_obj.has_mobilia else "sem mobilia", + ] + + rooms = getattr(property_obj, "rooms", None) + if rooms: + parts.extend( + [ + f"{rooms.bedrooms} quartos", + f"{rooms.bathrooms} banheiros", + f"{rooms.parking_spots} vagas garagem", + ] + ) + + extras = getattr(property_obj, "rooms_extras", None) + if extras: + parts.extend( + label + for field, label in cls.ROOM_EXTRA_LABELS.items() + if getattr(extras, field, False) + ) + + condo = getattr(property_obj, "condo", None) + if condo: + parts.extend([condo.name, condo.address]) + parts.extend( + label + for field, label in cls.CONDO_LABELS.items() + if getattr(condo, field, False) + ) + + nearby_places = getattr(property_obj, "nearby_places", None) + if nearby_places is not None: + parts.extend( + f"{place.name} {cls.NEARBY_LABELS.get(place.category, '')}" + for place in nearby_places.all() + ) + + subjective_attributes = getattr(property_obj, "subjective_attributes", None) + if subjective_attributes is not None: + parts.extend( + attribute.attribute_token.replace(".", " ") + for attribute in subjective_attributes.all() + if attribute.strength_mean >= 0.5 + ) + + return "\n".join(str(part) for part in parts if part not in (None, "")) + + @staticmethod + def _format_decimal(value) -> str: + if isinstance(value, Decimal): + return format(value, "f") + return str(value) diff --git a/apps/search/management/__init__.py b/apps/search/management/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/apps/search/management/__init__.py @@ -0,0 +1 @@ + diff --git a/apps/search/management/commands/__init__.py b/apps/search/management/commands/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/apps/search/management/commands/__init__.py @@ -0,0 +1 @@ + diff --git a/apps/search/management/commands/refresh_property_embeddings.py b/apps/search/management/commands/refresh_property_embeddings.py new file mode 100644 index 0000000..5255715 --- /dev/null +++ b/apps/search/management/commands/refresh_property_embeddings.py @@ -0,0 +1,31 @@ +from django.core.management.base import BaseCommand + +from apps.properties.models import Properties +from apps.search.embeddings import EmbeddingService + + +class Command(BaseCommand): + help = "Regenerates semantic search embeddings for active properties." + + def add_arguments(self, parser): + parser.add_argument( + "--all", + action="store_true", + help="Refresh inactive properties too.", + ) + + def handle(self, *args, **options): + queryset = ( + Properties.objects.select_related("rooms", "rooms_extras", "condo", "owner") + .prefetch_related("nearby_places", "subjective_attributes") + .order_by("id") + ) + if not options["all"]: + queryset = queryset.filter(status=True) + + total = queryset.count() + for index, property_obj in enumerate(queryset.iterator(), start=1): + EmbeddingService.refresh_property_embedding(property_obj) + self.stdout.write(f"[{index}/{total}] property {property_obj.id} refreshed") + + self.stdout.write(self.style.SUCCESS(f"Refreshed {total} property embeddings.")) diff --git a/apps/search/repositories.py b/apps/search/repositories.py index 17dedf0..2d4fc4f 100644 --- a/apps/search/repositories.py +++ b/apps/search/repositories.py @@ -1,4 +1,78 @@ -"""Search data-access layer. +"""Search data-access layer.""" -This module is intentionally minimal while the search feature is still under implementation. -""" +from django.db.models import Avg, Count + +from apps.properties.models import Properties +from apps.search.embeddings import EmbeddingService + + +class SearchRepository: + @staticmethod + def filter_properties(criteria): + queryset = ( + Properties.objects.filter(status=True) + .select_related("rooms", "rooms_extras", "condo", "owner") + .prefetch_related("photos", "nearby_places", "subjective_attributes") + .annotate( + average_rating=Avg("reviews__rating"), + favorite_count=Count("favorited_by", distinct=True), + ) + ) + + if criteria.get("property_type"): + queryset = queryset.filter(type=criteria["property_type"]) + + if criteria.get("min_price") is not None: + queryset = queryset.filter(price__gte=criteria["min_price"]) + + if criteria.get("max_price") is not None: + queryset = queryset.filter(price__lte=criteria["max_price"]) + + if criteria.get("city"): + queryset = queryset.filter(city__icontains=criteria["city"]) + + if criteria.get("neighborhood"): + queryset = queryset.filter(neighborhood__icontains=criteria["neighborhood"]) + + if criteria.get("bedrooms") is not None: + queryset = queryset.filter(rooms__bedrooms=criteria["bedrooms"]) + + if criteria.get("bathrooms") is not None: + queryset = queryset.filter(rooms__bathrooms=criteria["bathrooms"]) + + if criteria.get("parking_spots") is not None: + queryset = queryset.filter(rooms__parking_spots=criteria["parking_spots"]) + + nearby_categories = criteria.get("nearby_categories") or [] + if nearby_categories: + queryset = queryset.filter(nearby_places__category__in=nearby_categories) + + desired_attributes = criteria.get("desired_attributes") or [] + if desired_attributes: + queryset = queryset.filter( + subjective_attributes__attribute_token__in=desired_attributes, + subjective_attributes__strength_mean__gte=0.5, + ) + + return queryset.distinct().order_by("created_at") + + @staticmethod + def rank_properties_by_embedding(query_embedding, properties): + if not query_embedding: + return list(properties) + + ranked = [] + for property_obj in properties: + property_embedding = EmbeddingService.deserialize(property_obj.embedding) + score = EmbeddingService.cosine_similarity(query_embedding, property_embedding) + property_obj.search_match_score = round(score, 6) + ranked.append(property_obj) + + return sorted( + ranked, + key=lambda property_obj: ( + getattr(property_obj, "search_match_score", 0.0), + property_obj.created_at, + ), + reverse=True, + ) diff --git a/apps/search/serializers.py b/apps/search/serializers.py new file mode 100644 index 0000000..1711829 --- /dev/null +++ b/apps/search/serializers.py @@ -0,0 +1,65 @@ +from rest_framework import serializers + +from apps.ai_analysis.schema import VALID_TOKENS + + +class NaturalSearchRequestSerializer(serializers.Serializer): + query = serializers.CharField( + allow_blank=False, + trim_whitespace=True, + max_length=500, + ) + + +class NaturalSearchCriteriaSerializer(serializers.Serializer): + property_type = serializers.ChoiceField( + choices=["A", "H"], + allow_null=True, + required=False, + ) + min_price = serializers.DecimalField( + max_digits=15, + decimal_places=2, + min_value=0, + allow_null=True, + required=False, + ) + max_price = serializers.DecimalField( + max_digits=15, + decimal_places=2, + min_value=0, + allow_null=True, + required=False, + ) + city = serializers.CharField( + allow_blank=True, + allow_null=True, + required=False, + max_length=100, + ) + neighborhood = serializers.CharField( + allow_blank=True, + allow_null=True, + required=False, + max_length=100, + ) + bedrooms = serializers.IntegerField(min_value=0, allow_null=True, required=False) + bathrooms = serializers.IntegerField(min_value=0, allow_null=True, required=False) + parking_spots = serializers.IntegerField(min_value=0, allow_null=True, required=False) + desired_attributes = serializers.ListField( + child=serializers.ChoiceField(choices=sorted(VALID_TOKENS)), + required=False, + ) + nearby_categories = serializers.ListField( + child=serializers.ChoiceField(choices=["R", "G", "S", "H", "SM", "P"]), + required=False, + ) + + def validate(self, attrs): + min_price = attrs.get("min_price") + max_price = attrs.get("max_price") + if min_price is not None and max_price is not None and min_price > max_price: + raise serializers.ValidationError( + {"max_price": "max_price must be greater than or equal to min_price."} + ) + return attrs diff --git a/apps/search/services.py b/apps/search/services.py index e12651e..3c41127 100644 --- a/apps/search/services.py +++ b/apps/search/services.py @@ -1,5 +1,308 @@ -"""Search service layer. +import json +import re +from decimal import Decimal, InvalidOperation +from django.conf import settings -The search app is currently a placeholder, but this file defines where -future search orchestration must live to preserve layered architecture. -""" +from apps.ai_analysis.schema import VALID_TOKENS +from apps.search.embeddings import EmbeddingService +from apps.search.repositories import SearchRepository +from apps.search.serializers import NaturalSearchCriteriaSerializer + +try: + from openai import OpenAI +except ImportError: + OpenAI = None + + +PROPERTY_TYPE_MAP = { + "apartment": "A", + "apartamento": "A", + "apartamentos": "A", + "ap": "A", + "apt": "A", + "casa": "H", + "casas": "H", + "house": "H", +} + +NEARBY_CATEGORY_MAP = { + "restaurant": "R", + "restaurante": "R", + "restaurantes": "R", + "gym": "G", + "academia": "G", + "academias": "G", + "school": "S", + "escola": "S", + "escolas": "S", + "hospital": "H", + "hospitais": "H", + "supermarket": "SM", + "supermercado": "SM", + "mercado": "SM", + "mercados": "SM", + "park": "P", + "parque": "P", + "parques": "P", +} + +ATTRIBUTE_KEYWORDS = { + "iluminado": "aesthetics.color.brightness", + "iluminada": "aesthetics.color.brightness", + "claro": "aesthetics.color.brightness", + "clara": "aesthetics.color.brightness", + "aconchegante": "livability.coziness", + "confortavel": "livability.coziness", + "verde": "livability.verdancy", + "plantas": "livability.verdancy", + "amplo": "livability.spaciousness", + "ampla": "livability.spaciousness", + "espacoso": "livability.spaciousness", + "espacosa": "livability.spaciousness", + "ventilado": "current_state.ventilation", + "ventilada": "current_state.ventilation", + "limpo": "current_state.cleanliness", + "limpa": "current_state.cleanliness", + "lazer": "current_state.leisure", + "moderno": "aesthetics.architecture.contemporary", + "moderna": "aesthetics.architecture.contemporary", + "minimalista": "aesthetics.architecture.minimalist", +} + + +class NaturalLanguageQueryInterpreter: + """Turns a free-text real-estate search query into structured criteria.""" + + EMPTY_RESULT = { + "property_type": None, + "min_price": None, + "max_price": None, + "city": None, + "neighborhood": None, + "bedrooms": None, + "bathrooms": None, + "parking_spots": None, + "desired_attributes": [], + "nearby_categories": [], + } + + RESPONSE_FORMAT = { + "type": "json_schema", + "json_schema": { + "name": "natural_search_filters", + "strict": True, + "schema": { + "type": "object", + "additionalProperties": False, + "required": list(EMPTY_RESULT.keys()), + "properties": { + "property_type": {"type": ["string", "null"], "enum": ["A", "H", None]}, + "min_price": {"type": ["number", "null"]}, + "max_price": {"type": ["number", "null"]}, + "city": {"type": ["string", "null"]}, + "neighborhood": {"type": ["string", "null"]}, + "bedrooms": {"type": ["integer", "null"]}, + "bathrooms": {"type": ["integer", "null"]}, + "parking_spots": {"type": ["integer", "null"]}, + "desired_attributes": { + "type": "array", + "items": {"type": "string", "enum": sorted(VALID_TOKENS)}, + }, + "nearby_categories": { + "type": "array", + "items": {"type": "string", "enum": ["R", "G", "S", "H", "SM", "P"]}, + }, + }, + }, + }, + } + + @classmethod + def interpret(cls, query: str) -> dict: + query = (query or "").strip() + if not query: + return cls._validate(cls.EMPTY_RESULT.copy()) + + if OpenAI is None or not getattr(settings, "AI_API_KEY", None): + return cls._rule_based_interpret(query) + + try: + return cls._validate(cls._normalize(cls._llm_interpret(query))) + except Exception: + return cls._rule_based_interpret(query) + + @classmethod + def _llm_interpret(cls, query: str) -> dict: + client = OpenAI( + base_url=getattr(settings, "AI_API_BASE_URL", None), + api_key=settings.AI_API_KEY, + ) + response = client.chat.completions.create( + model=settings.AI_MODEL, + messages=[ + { + "role": "system", + "content": ( + "You extract structured real-estate search criteria from " + "Brazilian Portuguese or English user queries. Return only " + "the JSON requested by the schema. Use null when unknown. " + "Use property_type A for apartment and H for house." + ), + }, + {"role": "user", "content": query}, + ], + response_format=cls.RESPONSE_FORMAT, + temperature=0, + ) + content = response.choices[0].message.content + return json.loads(content) + + @classmethod + def _rule_based_interpret(cls, query: str) -> dict: + normalized = cls._strip_accents(query.lower()) + result = cls.EMPTY_RESULT.copy() + + for token, property_type in PROPERTY_TYPE_MAP.items(): + if re.search(rf"\b{re.escape(token)}\b", normalized): + result["property_type"] = property_type + break + + result["bedrooms"] = cls._extract_number_before( + normalized, ["quarto", "quartos", "dormitorio", "dormitorios"] + ) + result["bathrooms"] = cls._extract_number_before( + normalized, ["banheiro", "banheiros"] + ) + result["parking_spots"] = cls._extract_number_before( + normalized, ["vaga", "vagas", "garagem"] + ) + result["max_price"] = cls._extract_max_price(normalized) + + result["desired_attributes"] = sorted( + { + attribute + for keyword, attribute in ATTRIBUTE_KEYWORDS.items() + if re.search(rf"\b{re.escape(keyword)}\b", normalized) + } + ) + result["nearby_categories"] = sorted( + { + category + for keyword, category in NEARBY_CATEGORY_MAP.items() + if re.search(rf"\b{re.escape(keyword)}\b", normalized) + } + ) + + return cls._validate(cls._normalize(result)) + + @classmethod + def _normalize(cls, data: dict) -> dict: + result = cls.EMPTY_RESULT.copy() + result.update(data or {}) + + result["property_type"] = cls._normalize_property_type(result["property_type"]) + result["min_price"] = cls._normalize_decimal(result["min_price"]) + result["max_price"] = cls._normalize_decimal(result["max_price"]) + result["bedrooms"] = cls._normalize_int(result["bedrooms"]) + result["bathrooms"] = cls._normalize_int(result["bathrooms"]) + result["parking_spots"] = cls._normalize_int(result["parking_spots"]) + result["desired_attributes"] = [ + item for item in result["desired_attributes"] if item in VALID_TOKENS + ] + result["nearby_categories"] = [ + item + for item in result["nearby_categories"] + if item in {"R", "G", "S", "H", "SM", "P"} + ] + + for key in ("city", "neighborhood"): + value = result[key] + result[key] = value.strip() if isinstance(value, str) and value.strip() else None + + return result + + @staticmethod + def _validate(data: dict) -> dict: + serializer = NaturalSearchCriteriaSerializer(data=data) + serializer.is_valid(raise_exception=True) + return serializer.validated_data + + @staticmethod + def _normalize_property_type(value): + if value in {"A", "H", None}: + return value + return PROPERTY_TYPE_MAP.get(str(value).strip().lower()) + + @staticmethod + def _normalize_decimal(value): + if value in (None, ""): + return None + try: + return Decimal(str(value)) + except (InvalidOperation, TypeError, ValueError): + return None + + @staticmethod + def _normalize_int(value): + if value in (None, ""): + return None + try: + parsed = int(value) + except (TypeError, ValueError): + return None + return parsed if parsed >= 0 else None + + @staticmethod + def _extract_number_before(query, words): + pattern = rf"(\d+)\s+(?:{'|'.join(re.escape(word) for word in words)})" + match = re.search(pattern, query) + return int(match.group(1)) if match else None + + @staticmethod + def _extract_max_price(query): + match = re.search( + r"(?:ate|menos de|no maximo)\s+r?\$?\s*(\d+(?:[.,]\d+)?)\s*(milhao|milhoes|mil|k)?", + query, + ) + if not match: + return None + + value = Decimal(match.group(1).replace(",", ".")) + suffix = match.group(2) + if suffix in {"milhao", "milhoes"}: + value *= Decimal("1000000") + elif suffix in {"mil", "k"}: + value *= Decimal("1000") + return value + + @staticmethod + def _strip_accents(value: str) -> str: + translation = str.maketrans( + "áàãâäéèêëíìîïóòõôöúùûüç", + "aaaaaeeeeiiiiooooouuuuc", + ) + return value.translate(translation) + + +def query_interpreter(query: str) -> dict: + return NaturalLanguageQueryInterpreter.interpret(query) + + +def query_interpretor(query: str) -> dict: + return query_interpreter(query) + + +class NaturalSearchService: + @staticmethod + def search(query: str): + criteria = NaturalLanguageQueryInterpreter.interpret(query) + query_embedding = EmbeddingService.embed_text(query) + queryset = SearchRepository.filter_properties(criteria) + results = SearchRepository.rank_properties_by_embedding( + query_embedding, + queryset, + ) + return { + "interpreted_filters": criteria, + "results": results, + } diff --git a/apps/search/urls.py b/apps/search/urls.py index e39cb2c..9ffebdb 100644 --- a/apps/search/urls.py +++ b/apps/search/urls.py @@ -1,3 +1,6 @@ from django.urls import path +from .views import SearchNaturalView -urlpatterns = [] +urlpatterns = [ + path("natural/", SearchNaturalView.as_view(), name="search-natural"), +] diff --git a/apps/search/views.py b/apps/search/views.py index 91ea44a..f7c6a07 100644 --- a/apps/search/views.py +++ b/apps/search/views.py @@ -1,3 +1,31 @@ -from django.shortcuts import render +from rest_framework.permissions import AllowAny +from rest_framework.response import Response +from rest_framework.views import APIView -# Create your views here. +from apps.properties.serializers.property_serializers import PropertiesReadSerializer +from apps.search.serializers import NaturalSearchRequestSerializer +from apps.search.services import NaturalSearchService + + +class SearchNaturalView(APIView): + permission_classes = [AllowAny] + + def post(self, request): + serializer = NaturalSearchRequestSerializer(data=request.data) + serializer.is_valid(raise_exception=True) + + search_result = NaturalSearchService.search( + serializer.validated_data["query"] + ) + results_serializer = PropertiesReadSerializer( + search_result["results"], + many=True, + context={"request": request}, + ) + + return Response( + { + "interpreted_filters": search_result["interpreted_filters"], + "results": results_serializer.data, + } + ) diff --git a/config/settings.py b/config/settings.py index 840481e..0ebfeef 100644 --- a/config/settings.py +++ b/config/settings.py @@ -140,6 +140,7 @@ AI_API_BASE_URL = config("AI_API_BASE_URL", default="https://generativelanguage.googleapis.com/v1beta/openai/") AI_API_KEY = config("AI_API_KEY", default=None) AI_MODEL = config("AI_MODEL", default="gemini-3-flash-preview") +SEARCH_EMBEDDING_MODEL = config("SEARCH_EMBEDDING_MODEL", default="text-embedding-004") # Default analysis prompt # Centralised here so it can be overridden per-environment without touching code. # Accepts a custom prompt for future use cases. @@ -166,4 +167,4 @@ GOOGLE_PLACES_API_KEY = config("GOOGLE_PLACES_API_KEY", default="") -STATICFILES_DIRS = [BASE_DIR / "frontend"] \ No newline at end of file +STATICFILES_DIRS = [BASE_DIR / "frontend"] diff --git a/docs/diagrama_camadas.mermaid b/docs/diagrama_camadas.mermaid index ad3bca1..51d9609 100644 --- a/docs/diagrama_camadas.mermaid +++ b/docs/diagrama_camadas.mermaid @@ -11,6 +11,7 @@ flowchart TB V7["AnalyzePropertyView"] V8["RegisterUserView"] V9["UserViewSet"] + V10["SearchNaturalView"] end subgraph SERIAL["Serializers"] S1["PropertiesReadSerializer"] @@ -21,6 +22,8 @@ flowchart TB S6["UserSerializer"] S7["RegisterSerializer"] S8["AnalyzePropertyRequestSerializer"] + S9["NaturalSearchRequestSerializer"] + S10["NaturalSearchCriteriaSerializer"] end subgraph PERMS["Permissions"] P1["IsAdvertiser"] @@ -41,6 +44,10 @@ flowchart TB SV2["UserService"] SV3["FavoriteService"] CL1["AiVisionClient"] + SV4["NaturalSearchService"] + SV5["NaturalLanguageQueryInterpreter"] + SV6["EmbeddingService"] + SV7["PropertyEmbeddingDocumentBuilder"] end end @@ -54,6 +61,7 @@ flowchart TB R5["UserRepository"] R6["SearchPreferenceRepository"] R7["FavoriteRepository"] + R8["SearchRepository"] end subgraph MODELS["Models"] M1["Properties"] @@ -66,6 +74,7 @@ flowchart TB M8["SearchPreference"] M9["PhotoSubjectiveAttribute"] M10["PropertySubjectiveAttribute"] + M11["NearbyPlaces"] end end