documentdb · vic-tsang · May 29, 2026 · May 29, 2026 · Jun 3, 2026
@@ -0,0 +1,120 @@
+"""Tests for text index BSON type validation.
+
+Verifies that text index key specifier and options reject invalid BSON types
+and accept valid types.
+"""
+
+import pytest
+
+from documentdb_tests.framework.assertions import assertFailureCode, assertNotError
+from documentdb_tests.framework.bson_type_validator import (
+    BsonType,
+    BsonTypeTestCase,
+    generate_bson_acceptance_test_cases,
+    generate_bson_rejection_test_cases,
+)
+from documentdb_tests.framework.error_codes import CANNOT_CREATE_INDEX_ERROR, TYPE_MISMATCH_ERROR
+from documentdb_tests.framework.executor import execute_command
+
+pytestmark = pytest.mark.text_search
+
+# Default text index spec — tests override individual keywords with sample values
+_DEFAULT_INDEX = {"key": {"content": "text"}, "name": "test_idx"}
+
+TEXT_INDEX_PARAMS = [
+    BsonTypeTestCase(
+        id="key_specifier",
+        msg="text index key specifier should only accept the string 'text'",
+        keyword="key_specifier",
+        valid_types=[BsonType.STRING],
+        # Numeric types don't error but create a regular index instead of a text index,
+        # so they are neither valid text key specifiers nor rejected — skip them.
+        skip_rejection_types=[BsonType.DOUBLE, BsonType.INT, BsonType.LONG, BsonType.DECIMAL],
+        default_error_code=CANNOT_CREATE_INDEX_ERROR,
+        valid_inputs={
+            BsonType.STRING: "text",
+        },
+    ),
+    BsonTypeTestCase(
+        id="default_language",
+        msg="default_language should only accept string",
+        keyword="default_language",
+        valid_types=[BsonType.STRING],
+        default_error_code=TYPE_MISMATCH_ERROR,
+        valid_inputs={
+            BsonType.STRING: "english",
+        },
+    ),
+    BsonTypeTestCase(
+        id="language_override",
+        msg="language_override should only accept string",
+        keyword="language_override",
+        valid_types=[BsonType.STRING],
+        default_error_code=TYPE_MISMATCH_ERROR,
+        valid_inputs={
+            BsonType.STRING: "language",
+        },
+    ),
+    BsonTypeTestCase(
+        id="weights",
+        msg="weights should only accept object",
+        keyword="weights",
+        valid_types=[BsonType.OBJECT],
+        default_error_code=TYPE_MISMATCH_ERROR,
+        error_code_overrides={
+            BsonType.STRING: CANNOT_CREATE_INDEX_ERROR,
+            BsonType.ARRAY: CANNOT_CREATE_INDEX_ERROR,
+        },
+        valid_inputs={
+            BsonType.OBJECT: {"content": 5},
+        },
+    ),
+    BsonTypeTestCase(
+        id="textIndexVersion",
+        msg="textIndexVersion should only accept numeric",
+        keyword="textIndexVersion",
+        valid_types=[BsonType.INT, BsonType.LONG, BsonType.DOUBLE],
+        default_error_code=TYPE_MISMATCH_ERROR,
+        error_code_overrides={
+            BsonType.DECIMAL: CANNOT_CREATE_INDEX_ERROR,
+        },
+        valid_inputs={
+            BsonType.INT: 3,
+            BsonType.LONG: 3,
+            BsonType.DOUBLE: 3.0,
+        },
+    ),
+]
+
+
+def _build_index(spec, sample_value):
+    """Build index spec by overriding the keyword in the default index."""
+    if spec.keyword == "key_specifier":
+        return {**_DEFAULT_INDEX, "key": {"content": sample_value}}
+    return {**_DEFAULT_INDEX, spec.keyword: sample_value}
+
+
+REJECTION_CASES = generate_bson_rejection_test_cases(TEXT_INDEX_PARAMS)
+
+
+@pytest.mark.parametrize("bson_type,sample_value,spec", REJECTION_CASES)
+def test_text_index_type_rejected(collection, bson_type, sample_value, spec):
+    """Test text index creation rejects invalid BSON types."""
+    result = execute_command(
+        collection,
+        {"createIndexes": collection.name, "indexes": [_build_index(spec, sample_value)]},
+    )
+    assertFailureCode(result, spec.expected_code(bson_type), msg=spec.msg)
+
+
+ACCEPTANCE_CASES = generate_bson_acceptance_test_cases(TEXT_INDEX_PARAMS)
+
+
+@pytest.mark.parametrize("bson_type,sample_value,spec", ACCEPTANCE_CASES)
+def test_text_index_type_accepted(collection, bson_type, sample_value, spec):
+    """Test text index creation accepts valid BSON types."""
+    result = execute_command(
+        collection,
+        {"createIndexes": collection.name, "indexes": [_build_index(spec, sample_value)]},
+    )
+    assertNotError(result, msg=f"{spec.keyword} should accept {bson_type.value}")
@@ -0,0 +1,50 @@
+"""Tests for text index creation and sparse query behavior.
+
+Validates compound text index creation, and that documents lacking the
+indexed field are excluded from $text query results (text indexes are sparse).
+"""
+
+import pytest
+
+from documentdb_tests.framework.assertions import assertSuccess, assertSuccessPartial
+from documentdb_tests.framework.executor import execute_command
+
+pytestmark = pytest.mark.text_search
+
+
+def test_text_compound_index(collection):
+    """Test compound text index creation succeeds."""
+    result = execute_command(
+        collection,
+        {
+            "createIndexes": collection.name,
+            "indexes": [{"key": {"a": 1, "content": "text", "b": -1}, "name": "a_content_b"}],
+        },
+    )
+    assertSuccessPartial(result, {"ok": 1.0}, msg="Compound text index creation should succeed")
+
+
+def test_text_excludes_docs_without_indexed_field(collection):
+    """Test $text excludes documents missing the indexed field (text index is sparse)."""
+    collection.create_index([("content", "text")])
+    collection.insert_many(
+        [
+            {"_id": 1, "content": "hello world"},
+            {"_id": 2, "other": "no text field"},
+            {"_id": 3, "content": "hello there"},
+        ]
+    )
+    result = execute_command(
+        collection,
+        {
+            "find": collection.name,
+            "filter": {"$text": {"$search": "hello"}},
+            "projection": {"_id": 1},
+            "sort": {"_id": 1},
+        },
+    )
+    assertSuccess(
+        result,
+        [{"_id": 1}, {"_id": 3}],
+        msg="$text should exclude documents without the indexed field",
+    )
@@ -0,0 +1,120 @@
+"""Tests for text index creation error cases.
+
+Validates invalid key specifier, missing text index requirement, multiple
+text indexes on the same collection, non-simple collation, invalid
+textIndexVersion, non-adjacent text keys, and text combined with geospatial.
+"""
+
+import pytest
+
+from documentdb_tests.framework.assertions import assertFailureCode
+from documentdb_tests.framework.error_codes import (
+    CANNOT_CREATE_INDEX_ERROR,
+    INDEX_NOT_FOUND_ERROR,
+    INDEX_OPTIONS_CONFLICT_ERROR,
+)
+from documentdb_tests.framework.executor import execute_command
+
+pytestmark = pytest.mark.text_search
+
+
+def test_text_invalid_string_key_specifier_fails(collection):
+    """Test index creation with invalid string key specifier fails."""
+    result = execute_command(
+        collection,
+        {
+            "createIndexes": collection.name,
+            "indexes": [{"key": {"content": "invalid"}, "name": "idx"}],
+        },
+    )
+    assertFailureCode(
+        result, CANNOT_CREATE_INDEX_ERROR, msg="Invalid string key specifier should fail"
+    )
+
+
+def test_text_without_index_fails(collection):
+    """Test $text query without text index fails."""
+    collection.insert_one({"_id": 1, "content": "hello"})
+    result = execute_command(
+        collection, {"find": collection.name, "filter": {"$text": {"$search": "hello"}}}
+    )
+    assertFailureCode(result, INDEX_NOT_FOUND_ERROR, msg="$text without text index should fail")
+
+
+def test_text_two_indexes_fails(collection):
+    """Test creating two text indexes on same collection fails."""
+    execute_command(
+        collection,
+        {"createIndexes": collection.name, "indexes": [{"key": {"a": "text"}, "name": "a_text"}]},
+    )
+    result = execute_command(
+        collection,
+        {"createIndexes": collection.name, "indexes": [{"key": {"b": "text"}, "name": "b_text"}]},
+    )
+    assertFailureCode(result, INDEX_OPTIONS_CONFLICT_ERROR, msg="Two text indexes should fail")
+
+
+def test_text_on_non_simple_collation_fails(database_client, collection):
+    """Test text index on a collection with non-simple collation fails.
+
+    Text indexes do not support collation; the collection's non-simple
+    collation is inherited unless overridden with locale 'simple'.
+    """
+    database_client.create_collection(collection.name, collation={"locale": "en"})
+    result = execute_command(
+        collection,
+        {"createIndexes": collection.name, "indexes": [{"key": {"a": "text"}, "name": "a_text"}]},
+    )
+    assertFailureCode(
+        result, CANNOT_CREATE_INDEX_ERROR, msg="Text index on non-simple collation should fail"
+    )
+
+
+def test_text_index_version_zero_fails(collection):
+    """Test textIndexVersion 0 fails (only versions 1, 2, 3 are valid)."""
+    result = execute_command(
+        collection,
+        {
+            "createIndexes": collection.name,
+            "indexes": [{"key": {"a": "text"}, "name": "a_text_v0", "textIndexVersion": 0}],
+        },
+    )
+    assertFailureCode(result, CANNOT_CREATE_INDEX_ERROR, msg="textIndexVersion 0 should fail")
+
+
+def test_text_index_version_four_fails(collection):
+    """Test textIndexVersion 4 fails (only versions 1, 2, 3 are valid)."""
+    result = execute_command(
+        collection,
+        {
+            "createIndexes": collection.name,
+            "indexes": [{"key": {"a": "text"}, "name": "a_text_v4", "textIndexVersion": 4}],
+        },
+    )
+    assertFailureCode(result, CANNOT_CREATE_INDEX_ERROR, msg="textIndexVersion 4 should fail")
+
+
+def test_text_non_adjacent_text_keys_fails(collection):
+    """Test compound index with non-adjacent text keys fails (text keys must be adjacent)."""
+    result = execute_command(
+        collection,
+        {
+            "createIndexes": collection.name,
+            "indexes": [{"key": {"a": "text", "b": 1, "c": "text"}, "name": "a_text_b_c_text"}],
+        },
+    )
+    assertFailureCode(result, CANNOT_CREATE_INDEX_ERROR, msg="Non-adjacent text keys should fail")
+
+
+def test_text_with_geospatial_key_fails(collection):
+    """Test compound index combining text and 2dsphere keys fails."""
+    result = execute_command(
+        collection,
+        {
+            "createIndexes": collection.name,
+            "indexes": [{"key": {"content": "text", "loc": "2dsphere"}, "name": "content_loc"}],
+        },
+    )
+    assertFailureCode(
+        result, CANNOT_CREATE_INDEX_ERROR, msg="Text combined with 2dsphere should fail"
+    )
@@ -24,6 +24,10 @@ class BsonTypeTestCase(BaseTestCase):
         keyword: The operator keyword being tested (e.g. "minimum", "required").
         valid_types: List of BsonType values the keyword accepts.
             All other BSON types will be tested as rejections.
+        skip_rejection_types: Optional list of BsonType values to exclude from
+            rejection test generation. USE SPARINGLY — only for types that have
+            documented quirky behavior. If a type cleanly errors
+            or cleanly succeeds, it belongs in valid_types or as a rejection — not here.
         requires: Optional sibling fields needed alongside the keyword
             (e.g. {"minimum": 0} for exclusiveMinimum).
         default_error_code: Expected error code for rejected types.
@@ -36,6 +40,7 @@ class BsonTypeTestCase(BaseTestCase):
 
     keyword: Optional[str] = None
     valid_types: Optional[list] = None
+    skip_rejection_types: Optional[list] = None
     requires: Optional[dict] = None
     default_error_code: int = TYPE_MISMATCH_ERROR
     error_code_overrides: Optional[dict] = None
@@ -55,8 +60,9 @@ def generate_bson_rejection_test_cases(params):
     cases = []
     for spec in params:
         accepted = set(spec.valid_types)
+        skipped = set(spec.skip_rejection_types or [])
         for bson_type in BsonType:
-            if bson_type in accepted:
+            if bson_type in accepted or bson_type in skipped:
                 continue
             sample_value = BSON_TYPE_SAMPLES[bson_type]
             test_id = f"reject_{bson_type.value}_for_{spec.id}"