From 3a8cee7419e0879aaf6e86ba72f7e5cff2ebe865 Mon Sep 17 00:00:00 2001 From: "Victor [C] Tsang" Date: Fri, 29 May 2026 00:25:26 +0000 Subject: [PATCH 1/3] Added index type tests for text Signed-off-by: Victor [C] Tsang --- .../text/test_text_bson_type_validation.py | 120 ++++++++++++++++++ .../indexes/types/text/test_text_errors.py | 53 ++++++++ .../framework/bson_type_validator.py | 8 +- 3 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_bson_type_validation.py create mode 100644 documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_errors.py diff --git a/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_bson_type_validation.py b/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_bson_type_validation.py new file mode 100644 index 000000000..843466590 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_bson_type_validation.py @@ -0,0 +1,120 @@ +"""Tests for text index BSON type validation. + +Verifies that text index key specifier and options reject invalid BSON types +and accept valid types. +""" + +import pytest + +from documentdb_tests.framework.assertions import assertFailureCode, assertNotError +from documentdb_tests.framework.bson_type_validator import ( + BsonType, + BsonTypeTestCase, + generate_bson_acceptance_test_cases, + generate_bson_rejection_test_cases, +) +from documentdb_tests.framework.error_codes import CANNOT_CREATE_INDEX_ERROR, TYPE_MISMATCH_ERROR +from documentdb_tests.framework.executor import execute_command + +pytestmark = pytest.mark.text_search + +# Default text index spec — tests override individual keywords with sample values +_DEFAULT_INDEX = {"key": {"content": "text"}, "name": "test_idx"} + +TEXT_INDEX_PARAMS = [ + BsonTypeTestCase( + id="key_specifier", + msg="text index key specifier should only accept the string 'text'", + keyword="key_specifier", + valid_types=[BsonType.STRING], + # Numeric types don't error but create a regular index instead of a text index, + # so they are neither valid text key specifiers nor rejected — skip them. + skip_rejection_types=[BsonType.DOUBLE, BsonType.INT, BsonType.LONG, BsonType.DECIMAL], + default_error_code=CANNOT_CREATE_INDEX_ERROR, + valid_inputs={ + BsonType.STRING: "text", + }, + ), + BsonTypeTestCase( + id="default_language", + msg="default_language should only accept string", + keyword="default_language", + valid_types=[BsonType.STRING], + default_error_code=TYPE_MISMATCH_ERROR, + valid_inputs={ + BsonType.STRING: "english", + }, + ), + BsonTypeTestCase( + id="language_override", + msg="language_override should only accept string", + keyword="language_override", + valid_types=[BsonType.STRING], + default_error_code=TYPE_MISMATCH_ERROR, + valid_inputs={ + BsonType.STRING: "language", + }, + ), + BsonTypeTestCase( + id="weights", + msg="weights should only accept object", + keyword="weights", + valid_types=[BsonType.OBJECT], + default_error_code=TYPE_MISMATCH_ERROR, + error_code_overrides={ + BsonType.STRING: CANNOT_CREATE_INDEX_ERROR, + BsonType.ARRAY: CANNOT_CREATE_INDEX_ERROR, + }, + valid_inputs={ + BsonType.OBJECT: {"content": 5}, + }, + ), + BsonTypeTestCase( + id="textIndexVersion", + msg="textIndexVersion should only accept numeric", + keyword="textIndexVersion", + valid_types=[BsonType.INT, BsonType.LONG, BsonType.DOUBLE], + default_error_code=TYPE_MISMATCH_ERROR, + error_code_overrides={ + BsonType.DECIMAL: CANNOT_CREATE_INDEX_ERROR, + }, + valid_inputs={ + BsonType.INT: 3, + BsonType.LONG: 3, + BsonType.DOUBLE: 3.0, + }, + ), +] + + +def _build_index(spec, sample_value): + """Build index spec by overriding the keyword in the default index.""" + if spec.keyword == "key_specifier": + return {**_DEFAULT_INDEX, "key": {"content": sample_value}} + return {**_DEFAULT_INDEX, spec.keyword: sample_value} + + +REJECTION_CASES = generate_bson_rejection_test_cases(TEXT_INDEX_PARAMS) + + +@pytest.mark.parametrize("bson_type,sample_value,spec", REJECTION_CASES) +def test_text_index_type_rejected(collection, bson_type, sample_value, spec): + """Test text index creation rejects invalid BSON types.""" + result = execute_command( + collection, + {"createIndexes": collection.name, "indexes": [_build_index(spec, sample_value)]}, + ) + assertFailureCode(result, spec.expected_code(bson_type), msg=spec.msg) + + +ACCEPTANCE_CASES = generate_bson_acceptance_test_cases(TEXT_INDEX_PARAMS) + + +@pytest.mark.parametrize("bson_type,sample_value,spec", ACCEPTANCE_CASES) +def test_text_index_type_accepted(collection, bson_type, sample_value, spec): + """Test text index creation accepts valid BSON types.""" + result = execute_command( + collection, + {"createIndexes": collection.name, "indexes": [_build_index(spec, sample_value)]}, + ) + assertNotError(result, msg=f"{spec.keyword} should accept {bson_type.value}") diff --git a/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_errors.py b/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_errors.py new file mode 100644 index 000000000..c3b78ae41 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_errors.py @@ -0,0 +1,53 @@ +"""Tests for text index creation error cases. + +Validates invalid key specifier, missing text index requirement, +and multiple text indexes on the same collection. +""" + +import pytest + +from documentdb_tests.framework.assertions import assertFailureCode +from documentdb_tests.framework.error_codes import ( + CANNOT_CREATE_INDEX_ERROR, + INDEX_NOT_FOUND_ERROR, + INDEX_OPTIONS_CONFLICT_ERROR, +) +from documentdb_tests.framework.executor import execute_command + +pytestmark = pytest.mark.text_search + + +def test_text_invalid_string_key_specifier_fails(collection): + """Test index creation with invalid string key specifier fails.""" + result = execute_command( + collection, + { + "createIndexes": collection.name, + "indexes": [{"key": {"content": "invalid"}, "name": "idx"}], + }, + ) + assertFailureCode( + result, CANNOT_CREATE_INDEX_ERROR, msg="Invalid string key specifier should fail" + ) + + +def test_text_without_index_fails(collection): + """Test $text query without text index fails.""" + collection.insert_one({"_id": 1, "content": "hello"}) + result = execute_command( + collection, {"find": collection.name, "filter": {"$text": {"$search": "hello"}}} + ) + assertFailureCode(result, INDEX_NOT_FOUND_ERROR, msg="$text without text index should fail") + + +def test_text_two_indexes_fails(collection): + """Test creating two text indexes on same collection fails.""" + execute_command( + collection, + {"createIndexes": collection.name, "indexes": [{"key": {"a": "text"}, "name": "a_text"}]}, + ) + result = execute_command( + collection, + {"createIndexes": collection.name, "indexes": [{"key": {"b": "text"}, "name": "b_text"}]}, + ) + assertFailureCode(result, INDEX_OPTIONS_CONFLICT_ERROR, msg="Two text indexes should fail") diff --git a/documentdb_tests/framework/bson_type_validator.py b/documentdb_tests/framework/bson_type_validator.py index f6f2c362b..2b2b911e2 100644 --- a/documentdb_tests/framework/bson_type_validator.py +++ b/documentdb_tests/framework/bson_type_validator.py @@ -24,6 +24,10 @@ class BsonTypeTestCase(BaseTestCase): keyword: The operator keyword being tested (e.g. "minimum", "required"). valid_types: List of BsonType values the keyword accepts. All other BSON types will be tested as rejections. + skip_rejection_types: Optional list of BsonType values to exclude from + rejection test generation. USE SPARINGLY — only for types that have + documented quirky behavior. If a type cleanly errors + or cleanly succeeds, it belongs in valid_types or as a rejection — not here. requires: Optional sibling fields needed alongside the keyword (e.g. {"minimum": 0} for exclusiveMinimum). default_error_code: Expected error code for rejected types. @@ -36,6 +40,7 @@ class BsonTypeTestCase(BaseTestCase): keyword: Optional[str] = None valid_types: Optional[list] = None + skip_rejection_types: Optional[list] = None requires: Optional[dict] = None default_error_code: int = TYPE_MISMATCH_ERROR error_code_overrides: Optional[dict] = None @@ -55,8 +60,9 @@ def generate_bson_rejection_test_cases(params): cases = [] for spec in params: accepted = set(spec.valid_types) + skipped = set(spec.skip_rejection_types or []) for bson_type in BsonType: - if bson_type in accepted: + if bson_type in accepted or bson_type in skipped: continue sample_value = BSON_TYPE_SAMPLES[bson_type] test_id = f"reject_{bson_type.value}_for_{spec.id}" From d7834090881809104ba1344def22b91120c24a10 Mon Sep 17 00:00:00 2001 From: Victor Tsang Date: Fri, 29 May 2026 16:33:22 -0700 Subject: [PATCH 2/3] added missing test cases Signed-off-by: Victor Tsang --- .../indexes/types/text/test_text_creation.py | 50 +++++++++++++ .../indexes/types/text/test_text_errors.py | 74 ++++++++++++++++++- 2 files changed, 122 insertions(+), 2 deletions(-) create mode 100644 documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_creation.py diff --git a/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_creation.py b/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_creation.py new file mode 100644 index 000000000..8d1db00e5 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_creation.py @@ -0,0 +1,50 @@ +"""Tests for text index creation and sparse query behavior. + +Validates compound text index creation, and that documents lacking the +indexed field are excluded from $text query results (text indexes are sparse). +""" + +import pytest + +from documentdb_tests.framework.assertions import assertSuccess, assertSuccessPartial +from documentdb_tests.framework.executor import execute_command + +pytestmark = pytest.mark.text_search + + +def test_text_compound_index(collection): + """Test compound text index creation succeeds.""" + result = execute_command( + collection, + { + "createIndexes": collection.name, + "indexes": [{"key": {"a": 1, "content": "text", "b": -1}, "name": "a_content_b"}], + }, + ) + assertSuccessPartial(result, {"ok": 1.0}, msg="Compound text index creation should succeed") + + +def test_text_excludes_docs_without_indexed_field(collection): + """Test $text excludes documents missing the indexed field (text index is sparse).""" + collection.create_index([("content", "text")]) + collection.insert_many( + [ + {"_id": 1, "content": "hello world"}, + {"_id": 2, "other": "no text field"}, + {"_id": 3, "content": "hello there"}, + ] + ) + result = execute_command( + collection, + { + "find": collection.name, + "filter": {"$text": {"$search": "hello"}}, + "projection": {"_id": 1}, + "sort": {"_id": 1}, + }, + ) + assertSuccess( + result, + [{"_id": 1}, {"_id": 3}], + msg="$text should exclude documents without the indexed field", + ) diff --git a/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_errors.py b/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_errors.py index c3b78ae41..08c8c44f8 100644 --- a/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_errors.py +++ b/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_errors.py @@ -1,7 +1,8 @@ """Tests for text index creation error cases. -Validates invalid key specifier, missing text index requirement, -and multiple text indexes on the same collection. +Validates invalid key specifier, missing text index requirement, multiple +text indexes on the same collection, non-simple collation, invalid +textIndexVersion, non-adjacent text keys, and text combined with geospatial. """ import pytest @@ -51,3 +52,72 @@ def test_text_two_indexes_fails(collection): {"createIndexes": collection.name, "indexes": [{"key": {"b": "text"}, "name": "b_text"}]}, ) assertFailureCode(result, INDEX_OPTIONS_CONFLICT_ERROR, msg="Two text indexes should fail") + + +def test_text_on_non_simple_collation_fails(database_client): + """Test text index on a collection with non-simple collation fails. + + Text indexes do not support collation; the collection's non-simple + collation is inherited unless overridden with locale 'simple'. + """ + coll = database_client["text_non_simple_collation_test"] + coll.drop() + database_client.create_collection(coll.name, collation={"locale": "en"}) + result = execute_command( + coll, + {"createIndexes": coll.name, "indexes": [{"key": {"a": "text"}, "name": "a_text"}]}, + ) + assertFailureCode( + result, CANNOT_CREATE_INDEX_ERROR, msg="Text index on non-simple collation should fail" + ) + coll.drop() + + +def test_text_index_version_zero_fails(collection): + """Test textIndexVersion 0 fails (only versions 1, 2, 3 are valid).""" + result = execute_command( + collection, + { + "createIndexes": collection.name, + "indexes": [{"key": {"a": "text"}, "name": "a_text_v0", "textIndexVersion": 0}], + }, + ) + assertFailureCode(result, CANNOT_CREATE_INDEX_ERROR, msg="textIndexVersion 0 should fail") + + +def test_text_index_version_four_fails(collection): + """Test textIndexVersion 4 fails (only versions 1, 2, 3 are valid).""" + result = execute_command( + collection, + { + "createIndexes": collection.name, + "indexes": [{"key": {"a": "text"}, "name": "a_text_v4", "textIndexVersion": 4}], + }, + ) + assertFailureCode(result, CANNOT_CREATE_INDEX_ERROR, msg="textIndexVersion 4 should fail") + + +def test_text_non_adjacent_text_keys_fails(collection): + """Test compound index with non-adjacent text keys fails (text keys must be adjacent).""" + result = execute_command( + collection, + { + "createIndexes": collection.name, + "indexes": [{"key": {"a": "text", "b": 1, "c": "text"}, "name": "a_text_b_c_text"}], + }, + ) + assertFailureCode(result, CANNOT_CREATE_INDEX_ERROR, msg="Non-adjacent text keys should fail") + + +def test_text_with_geospatial_key_fails(collection): + """Test compound index combining text and 2dsphere keys fails.""" + result = execute_command( + collection, + { + "createIndexes": collection.name, + "indexes": [{"key": {"content": "text", "loc": "2dsphere"}, "name": "content_loc"}], + }, + ) + assertFailureCode( + result, CANNOT_CREATE_INDEX_ERROR, msg="Text combined with 2dsphere should fail" + ) From 4b725fe770860ea7c2c6ff1c39eb66646c3c0756 Mon Sep 17 00:00:00 2001 From: Victor Tsang Date: Tue, 2 Jun 2026 17:24:42 -0700 Subject: [PATCH 3/3] fixed test case to use collection Signed-off-by: Victor Tsang --- .../tests/core/indexes/types/text/test_text_errors.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_errors.py b/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_errors.py index 08c8c44f8..aa8193aab 100644 --- a/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_errors.py +++ b/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_errors.py @@ -54,23 +54,20 @@ def test_text_two_indexes_fails(collection): assertFailureCode(result, INDEX_OPTIONS_CONFLICT_ERROR, msg="Two text indexes should fail") -def test_text_on_non_simple_collation_fails(database_client): +def test_text_on_non_simple_collation_fails(database_client, collection): """Test text index on a collection with non-simple collation fails. Text indexes do not support collation; the collection's non-simple collation is inherited unless overridden with locale 'simple'. """ - coll = database_client["text_non_simple_collation_test"] - coll.drop() - database_client.create_collection(coll.name, collation={"locale": "en"}) + database_client.create_collection(collection.name, collation={"locale": "en"}) result = execute_command( - coll, - {"createIndexes": coll.name, "indexes": [{"key": {"a": "text"}, "name": "a_text"}]}, + collection, + {"createIndexes": collection.name, "indexes": [{"key": {"a": "text"}, "name": "a_text"}]}, ) assertFailureCode( result, CANNOT_CREATE_INDEX_ERROR, msg="Text index on non-simple collation should fail" ) - coll.drop() def test_text_index_version_zero_fails(collection):