Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
"""Tests for text index BSON type validation.

Verifies that text index key specifier and options reject invalid BSON types
and accept valid types.
"""

import pytest

from documentdb_tests.framework.assertions import assertFailureCode, assertNotError
from documentdb_tests.framework.bson_type_validator import (
BsonType,
BsonTypeTestCase,
generate_bson_acceptance_test_cases,
generate_bson_rejection_test_cases,
)
from documentdb_tests.framework.error_codes import CANNOT_CREATE_INDEX_ERROR, TYPE_MISMATCH_ERROR
from documentdb_tests.framework.executor import execute_command

pytestmark = pytest.mark.text_search

# Default text index spec — tests override individual keywords with sample values
_DEFAULT_INDEX = {"key": {"content": "text"}, "name": "test_idx"}

TEXT_INDEX_PARAMS = [
BsonTypeTestCase(
id="key_specifier",
msg="text index key specifier should only accept the string 'text'",
keyword="key_specifier",
valid_types=[BsonType.STRING],
# Numeric types don't error but create a regular index instead of a text index,
# so they are neither valid text key specifiers nor rejected — skip them.
skip_rejection_types=[BsonType.DOUBLE, BsonType.INT, BsonType.LONG, BsonType.DECIMAL],
default_error_code=CANNOT_CREATE_INDEX_ERROR,
valid_inputs={
BsonType.STRING: "text",
},
),
BsonTypeTestCase(
id="default_language",
msg="default_language should only accept string",
keyword="default_language",
valid_types=[BsonType.STRING],
default_error_code=TYPE_MISMATCH_ERROR,
valid_inputs={
BsonType.STRING: "english",
},
),
BsonTypeTestCase(
id="language_override",
msg="language_override should only accept string",
keyword="language_override",
valid_types=[BsonType.STRING],
default_error_code=TYPE_MISMATCH_ERROR,
valid_inputs={
BsonType.STRING: "language",
},
),
BsonTypeTestCase(
id="weights",
msg="weights should only accept object",
keyword="weights",
valid_types=[BsonType.OBJECT],
default_error_code=TYPE_MISMATCH_ERROR,
error_code_overrides={
BsonType.STRING: CANNOT_CREATE_INDEX_ERROR,
BsonType.ARRAY: CANNOT_CREATE_INDEX_ERROR,
},
valid_inputs={
BsonType.OBJECT: {"content": 5},
},
),
BsonTypeTestCase(
id="textIndexVersion",
msg="textIndexVersion should only accept numeric",
keyword="textIndexVersion",
valid_types=[BsonType.INT, BsonType.LONG, BsonType.DOUBLE],
default_error_code=TYPE_MISMATCH_ERROR,
error_code_overrides={
BsonType.DECIMAL: CANNOT_CREATE_INDEX_ERROR,
},
valid_inputs={
BsonType.INT: 3,
BsonType.LONG: 3,
BsonType.DOUBLE: 3.0,
},
),
]


def _build_index(spec, sample_value):
"""Build index spec by overriding the keyword in the default index."""
if spec.keyword == "key_specifier":
return {**_DEFAULT_INDEX, "key": {"content": sample_value}}
return {**_DEFAULT_INDEX, spec.keyword: sample_value}


REJECTION_CASES = generate_bson_rejection_test_cases(TEXT_INDEX_PARAMS)


@pytest.mark.parametrize("bson_type,sample_value,spec", REJECTION_CASES)
def test_text_index_type_rejected(collection, bson_type, sample_value, spec):
"""Test text index creation rejects invalid BSON types."""
result = execute_command(
collection,
{"createIndexes": collection.name, "indexes": [_build_index(spec, sample_value)]},
)
assertFailureCode(result, spec.expected_code(bson_type), msg=spec.msg)


ACCEPTANCE_CASES = generate_bson_acceptance_test_cases(TEXT_INDEX_PARAMS)


@pytest.mark.parametrize("bson_type,sample_value,spec", ACCEPTANCE_CASES)
def test_text_index_type_accepted(collection, bson_type, sample_value, spec):
"""Test text index creation accepts valid BSON types."""
result = execute_command(
collection,
{"createIndexes": collection.name, "indexes": [_build_index(spec, sample_value)]},
)
assertNotError(result, msg=f"{spec.keyword} should accept {bson_type.value}")
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""Tests for text index creation and sparse query behavior.

Validates compound text index creation, and that documents lacking the
indexed field are excluded from $text query results (text indexes are sparse).
"""

import pytest

from documentdb_tests.framework.assertions import assertSuccess, assertSuccessPartial
from documentdb_tests.framework.executor import execute_command

pytestmark = pytest.mark.text_search


def test_text_compound_index(collection):
"""Test compound text index creation succeeds."""
result = execute_command(
collection,
{
"createIndexes": collection.name,
"indexes": [{"key": {"a": 1, "content": "text", "b": -1}, "name": "a_content_b"}],
},
)
assertSuccessPartial(result, {"ok": 1.0}, msg="Compound text index creation should succeed")


def test_text_excludes_docs_without_indexed_field(collection):
"""Test $text excludes documents missing the indexed field (text index is sparse)."""
collection.create_index([("content", "text")])
collection.insert_many(
[
{"_id": 1, "content": "hello world"},
{"_id": 2, "other": "no text field"},
{"_id": 3, "content": "hello there"},
]
)
result = execute_command(
collection,
{
"find": collection.name,
"filter": {"$text": {"$search": "hello"}},
"projection": {"_id": 1},
"sort": {"_id": 1},
},
)
assertSuccess(
result,
[{"_id": 1}, {"_id": 3}],
msg="$text should exclude documents without the indexed field",
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
"""Tests for text index creation error cases.
Comment thread
vic-tsang marked this conversation as resolved.

Validates invalid key specifier, missing text index requirement, multiple
text indexes on the same collection, non-simple collation, invalid
textIndexVersion, non-adjacent text keys, and text combined with geospatial.
"""

import pytest

from documentdb_tests.framework.assertions import assertFailureCode
from documentdb_tests.framework.error_codes import (
CANNOT_CREATE_INDEX_ERROR,
INDEX_NOT_FOUND_ERROR,
INDEX_OPTIONS_CONFLICT_ERROR,
)
from documentdb_tests.framework.executor import execute_command

pytestmark = pytest.mark.text_search


def test_text_invalid_string_key_specifier_fails(collection):
"""Test index creation with invalid string key specifier fails."""
result = execute_command(
collection,
{
"createIndexes": collection.name,
"indexes": [{"key": {"content": "invalid"}, "name": "idx"}],
},
)
assertFailureCode(
result, CANNOT_CREATE_INDEX_ERROR, msg="Invalid string key specifier should fail"
)


def test_text_without_index_fails(collection):
"""Test $text query without text index fails."""
collection.insert_one({"_id": 1, "content": "hello"})
result = execute_command(
collection, {"find": collection.name, "filter": {"$text": {"$search": "hello"}}}
)
assertFailureCode(result, INDEX_NOT_FOUND_ERROR, msg="$text without text index should fail")


def test_text_two_indexes_fails(collection):
"""Test creating two text indexes on same collection fails."""
execute_command(
collection,
{"createIndexes": collection.name, "indexes": [{"key": {"a": "text"}, "name": "a_text"}]},
)
result = execute_command(
collection,
{"createIndexes": collection.name, "indexes": [{"key": {"b": "text"}, "name": "b_text"}]},
)
assertFailureCode(result, INDEX_OPTIONS_CONFLICT_ERROR, msg="Two text indexes should fail")


def test_text_on_non_simple_collation_fails(database_client, collection):
"""Test text index on a collection with non-simple collation fails.

Text indexes do not support collation; the collection's non-simple
collation is inherited unless overridden with locale 'simple'.
"""
database_client.create_collection(collection.name, collation={"locale": "en"})
result = execute_command(
collection,
{"createIndexes": collection.name, "indexes": [{"key": {"a": "text"}, "name": "a_text"}]},
)
assertFailureCode(
result, CANNOT_CREATE_INDEX_ERROR, msg="Text index on non-simple collation should fail"
)


def test_text_index_version_zero_fails(collection):
"""Test textIndexVersion 0 fails (only versions 1, 2, 3 are valid)."""
result = execute_command(
collection,
{
"createIndexes": collection.name,
"indexes": [{"key": {"a": "text"}, "name": "a_text_v0", "textIndexVersion": 0}],
},
)
assertFailureCode(result, CANNOT_CREATE_INDEX_ERROR, msg="textIndexVersion 0 should fail")


def test_text_index_version_four_fails(collection):
"""Test textIndexVersion 4 fails (only versions 1, 2, 3 are valid)."""
result = execute_command(
collection,
{
"createIndexes": collection.name,
"indexes": [{"key": {"a": "text"}, "name": "a_text_v4", "textIndexVersion": 4}],
},
)
assertFailureCode(result, CANNOT_CREATE_INDEX_ERROR, msg="textIndexVersion 4 should fail")


def test_text_non_adjacent_text_keys_fails(collection):
"""Test compound index with non-adjacent text keys fails (text keys must be adjacent)."""
result = execute_command(
collection,
{
"createIndexes": collection.name,
"indexes": [{"key": {"a": "text", "b": 1, "c": "text"}, "name": "a_text_b_c_text"}],
},
)
assertFailureCode(result, CANNOT_CREATE_INDEX_ERROR, msg="Non-adjacent text keys should fail")


def test_text_with_geospatial_key_fails(collection):
"""Test compound index combining text and 2dsphere keys fails."""
result = execute_command(
collection,
{
"createIndexes": collection.name,
"indexes": [{"key": {"content": "text", "loc": "2dsphere"}, "name": "content_loc"}],
},
)
assertFailureCode(
result, CANNOT_CREATE_INDEX_ERROR, msg="Text combined with 2dsphere should fail"
)
8 changes: 7 additions & 1 deletion documentdb_tests/framework/bson_type_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ class BsonTypeTestCase(BaseTestCase):
keyword: The operator keyword being tested (e.g. "minimum", "required").
valid_types: List of BsonType values the keyword accepts.
All other BSON types will be tested as rejections.
skip_rejection_types: Optional list of BsonType values to exclude from
rejection test generation. USE SPARINGLY — only for types that have
documented quirky behavior. If a type cleanly errors
or cleanly succeeds, it belongs in valid_types or as a rejection — not here.
requires: Optional sibling fields needed alongside the keyword
(e.g. {"minimum": 0} for exclusiveMinimum).
default_error_code: Expected error code for rejected types.
Expand All @@ -36,6 +40,7 @@ class BsonTypeTestCase(BaseTestCase):

keyword: Optional[str] = None
valid_types: Optional[list] = None
skip_rejection_types: Optional[list] = None
requires: Optional[dict] = None
default_error_code: int = TYPE_MISMATCH_ERROR
error_code_overrides: Optional[dict] = None
Expand All @@ -55,8 +60,9 @@ def generate_bson_rejection_test_cases(params):
cases = []
for spec in params:
accepted = set(spec.valid_types)
skipped = set(spec.skip_rejection_types or [])
for bson_type in BsonType:
if bson_type in accepted:
if bson_type in accepted or bson_type in skipped:
continue
sample_value = BSON_TYPE_SAMPLES[bson_type]
test_id = f"reject_{bson_type.value}_for_{spec.id}"
Expand Down
Loading