diff --git a/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_bson_type_validation.py b/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_bson_type_validation.py new file mode 100644 index 000000000..843466590 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_bson_type_validation.py @@ -0,0 +1,120 @@ +"""Tests for text index BSON type validation. + +Verifies that text index key specifier and options reject invalid BSON types +and accept valid types. +""" + +import pytest + +from documentdb_tests.framework.assertions import assertFailureCode, assertNotError +from documentdb_tests.framework.bson_type_validator import ( + BsonType, + BsonTypeTestCase, + generate_bson_acceptance_test_cases, + generate_bson_rejection_test_cases, +) +from documentdb_tests.framework.error_codes import CANNOT_CREATE_INDEX_ERROR, TYPE_MISMATCH_ERROR +from documentdb_tests.framework.executor import execute_command + +pytestmark = pytest.mark.text_search + +# Default text index spec — tests override individual keywords with sample values +_DEFAULT_INDEX = {"key": {"content": "text"}, "name": "test_idx"} + +TEXT_INDEX_PARAMS = [ + BsonTypeTestCase( + id="key_specifier", + msg="text index key specifier should only accept the string 'text'", + keyword="key_specifier", + valid_types=[BsonType.STRING], + # Numeric types don't error but create a regular index instead of a text index, + # so they are neither valid text key specifiers nor rejected — skip them. + skip_rejection_types=[BsonType.DOUBLE, BsonType.INT, BsonType.LONG, BsonType.DECIMAL], + default_error_code=CANNOT_CREATE_INDEX_ERROR, + valid_inputs={ + BsonType.STRING: "text", + }, + ), + BsonTypeTestCase( + id="default_language", + msg="default_language should only accept string", + keyword="default_language", + valid_types=[BsonType.STRING], + default_error_code=TYPE_MISMATCH_ERROR, + valid_inputs={ + BsonType.STRING: "english", + }, + ), + BsonTypeTestCase( + id="language_override", + msg="language_override should only accept string", + keyword="language_override", + valid_types=[BsonType.STRING], + default_error_code=TYPE_MISMATCH_ERROR, + valid_inputs={ + BsonType.STRING: "language", + }, + ), + BsonTypeTestCase( + id="weights", + msg="weights should only accept object", + keyword="weights", + valid_types=[BsonType.OBJECT], + default_error_code=TYPE_MISMATCH_ERROR, + error_code_overrides={ + BsonType.STRING: CANNOT_CREATE_INDEX_ERROR, + BsonType.ARRAY: CANNOT_CREATE_INDEX_ERROR, + }, + valid_inputs={ + BsonType.OBJECT: {"content": 5}, + }, + ), + BsonTypeTestCase( + id="textIndexVersion", + msg="textIndexVersion should only accept numeric", + keyword="textIndexVersion", + valid_types=[BsonType.INT, BsonType.LONG, BsonType.DOUBLE], + default_error_code=TYPE_MISMATCH_ERROR, + error_code_overrides={ + BsonType.DECIMAL: CANNOT_CREATE_INDEX_ERROR, + }, + valid_inputs={ + BsonType.INT: 3, + BsonType.LONG: 3, + BsonType.DOUBLE: 3.0, + }, + ), +] + + +def _build_index(spec, sample_value): + """Build index spec by overriding the keyword in the default index.""" + if spec.keyword == "key_specifier": + return {**_DEFAULT_INDEX, "key": {"content": sample_value}} + return {**_DEFAULT_INDEX, spec.keyword: sample_value} + + +REJECTION_CASES = generate_bson_rejection_test_cases(TEXT_INDEX_PARAMS) + + +@pytest.mark.parametrize("bson_type,sample_value,spec", REJECTION_CASES) +def test_text_index_type_rejected(collection, bson_type, sample_value, spec): + """Test text index creation rejects invalid BSON types.""" + result = execute_command( + collection, + {"createIndexes": collection.name, "indexes": [_build_index(spec, sample_value)]}, + ) + assertFailureCode(result, spec.expected_code(bson_type), msg=spec.msg) + + +ACCEPTANCE_CASES = generate_bson_acceptance_test_cases(TEXT_INDEX_PARAMS) + + +@pytest.mark.parametrize("bson_type,sample_value,spec", ACCEPTANCE_CASES) +def test_text_index_type_accepted(collection, bson_type, sample_value, spec): + """Test text index creation accepts valid BSON types.""" + result = execute_command( + collection, + {"createIndexes": collection.name, "indexes": [_build_index(spec, sample_value)]}, + ) + assertNotError(result, msg=f"{spec.keyword} should accept {bson_type.value}") diff --git a/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_creation.py b/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_creation.py new file mode 100644 index 000000000..8d1db00e5 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_creation.py @@ -0,0 +1,50 @@ +"""Tests for text index creation and sparse query behavior. + +Validates compound text index creation, and that documents lacking the +indexed field are excluded from $text query results (text indexes are sparse). +""" + +import pytest + +from documentdb_tests.framework.assertions import assertSuccess, assertSuccessPartial +from documentdb_tests.framework.executor import execute_command + +pytestmark = pytest.mark.text_search + + +def test_text_compound_index(collection): + """Test compound text index creation succeeds.""" + result = execute_command( + collection, + { + "createIndexes": collection.name, + "indexes": [{"key": {"a": 1, "content": "text", "b": -1}, "name": "a_content_b"}], + }, + ) + assertSuccessPartial(result, {"ok": 1.0}, msg="Compound text index creation should succeed") + + +def test_text_excludes_docs_without_indexed_field(collection): + """Test $text excludes documents missing the indexed field (text index is sparse).""" + collection.create_index([("content", "text")]) + collection.insert_many( + [ + {"_id": 1, "content": "hello world"}, + {"_id": 2, "other": "no text field"}, + {"_id": 3, "content": "hello there"}, + ] + ) + result = execute_command( + collection, + { + "find": collection.name, + "filter": {"$text": {"$search": "hello"}}, + "projection": {"_id": 1}, + "sort": {"_id": 1}, + }, + ) + assertSuccess( + result, + [{"_id": 1}, {"_id": 3}], + msg="$text should exclude documents without the indexed field", + ) diff --git a/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_errors.py b/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_errors.py new file mode 100644 index 000000000..aa8193aab --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/indexes/types/text/test_text_errors.py @@ -0,0 +1,120 @@ +"""Tests for text index creation error cases. + +Validates invalid key specifier, missing text index requirement, multiple +text indexes on the same collection, non-simple collation, invalid +textIndexVersion, non-adjacent text keys, and text combined with geospatial. +""" + +import pytest + +from documentdb_tests.framework.assertions import assertFailureCode +from documentdb_tests.framework.error_codes import ( + CANNOT_CREATE_INDEX_ERROR, + INDEX_NOT_FOUND_ERROR, + INDEX_OPTIONS_CONFLICT_ERROR, +) +from documentdb_tests.framework.executor import execute_command + +pytestmark = pytest.mark.text_search + + +def test_text_invalid_string_key_specifier_fails(collection): + """Test index creation with invalid string key specifier fails.""" + result = execute_command( + collection, + { + "createIndexes": collection.name, + "indexes": [{"key": {"content": "invalid"}, "name": "idx"}], + }, + ) + assertFailureCode( + result, CANNOT_CREATE_INDEX_ERROR, msg="Invalid string key specifier should fail" + ) + + +def test_text_without_index_fails(collection): + """Test $text query without text index fails.""" + collection.insert_one({"_id": 1, "content": "hello"}) + result = execute_command( + collection, {"find": collection.name, "filter": {"$text": {"$search": "hello"}}} + ) + assertFailureCode(result, INDEX_NOT_FOUND_ERROR, msg="$text without text index should fail") + + +def test_text_two_indexes_fails(collection): + """Test creating two text indexes on same collection fails.""" + execute_command( + collection, + {"createIndexes": collection.name, "indexes": [{"key": {"a": "text"}, "name": "a_text"}]}, + ) + result = execute_command( + collection, + {"createIndexes": collection.name, "indexes": [{"key": {"b": "text"}, "name": "b_text"}]}, + ) + assertFailureCode(result, INDEX_OPTIONS_CONFLICT_ERROR, msg="Two text indexes should fail") + + +def test_text_on_non_simple_collation_fails(database_client, collection): + """Test text index on a collection with non-simple collation fails. + + Text indexes do not support collation; the collection's non-simple + collation is inherited unless overridden with locale 'simple'. + """ + database_client.create_collection(collection.name, collation={"locale": "en"}) + result = execute_command( + collection, + {"createIndexes": collection.name, "indexes": [{"key": {"a": "text"}, "name": "a_text"}]}, + ) + assertFailureCode( + result, CANNOT_CREATE_INDEX_ERROR, msg="Text index on non-simple collation should fail" + ) + + +def test_text_index_version_zero_fails(collection): + """Test textIndexVersion 0 fails (only versions 1, 2, 3 are valid).""" + result = execute_command( + collection, + { + "createIndexes": collection.name, + "indexes": [{"key": {"a": "text"}, "name": "a_text_v0", "textIndexVersion": 0}], + }, + ) + assertFailureCode(result, CANNOT_CREATE_INDEX_ERROR, msg="textIndexVersion 0 should fail") + + +def test_text_index_version_four_fails(collection): + """Test textIndexVersion 4 fails (only versions 1, 2, 3 are valid).""" + result = execute_command( + collection, + { + "createIndexes": collection.name, + "indexes": [{"key": {"a": "text"}, "name": "a_text_v4", "textIndexVersion": 4}], + }, + ) + assertFailureCode(result, CANNOT_CREATE_INDEX_ERROR, msg="textIndexVersion 4 should fail") + + +def test_text_non_adjacent_text_keys_fails(collection): + """Test compound index with non-adjacent text keys fails (text keys must be adjacent).""" + result = execute_command( + collection, + { + "createIndexes": collection.name, + "indexes": [{"key": {"a": "text", "b": 1, "c": "text"}, "name": "a_text_b_c_text"}], + }, + ) + assertFailureCode(result, CANNOT_CREATE_INDEX_ERROR, msg="Non-adjacent text keys should fail") + + +def test_text_with_geospatial_key_fails(collection): + """Test compound index combining text and 2dsphere keys fails.""" + result = execute_command( + collection, + { + "createIndexes": collection.name, + "indexes": [{"key": {"content": "text", "loc": "2dsphere"}, "name": "content_loc"}], + }, + ) + assertFailureCode( + result, CANNOT_CREATE_INDEX_ERROR, msg="Text combined with 2dsphere should fail" + ) diff --git a/documentdb_tests/framework/bson_type_validator.py b/documentdb_tests/framework/bson_type_validator.py index f6f2c362b..2b2b911e2 100644 --- a/documentdb_tests/framework/bson_type_validator.py +++ b/documentdb_tests/framework/bson_type_validator.py @@ -24,6 +24,10 @@ class BsonTypeTestCase(BaseTestCase): keyword: The operator keyword being tested (e.g. "minimum", "required"). valid_types: List of BsonType values the keyword accepts. All other BSON types will be tested as rejections. + skip_rejection_types: Optional list of BsonType values to exclude from + rejection test generation. USE SPARINGLY — only for types that have + documented quirky behavior. If a type cleanly errors + or cleanly succeeds, it belongs in valid_types or as a rejection — not here. requires: Optional sibling fields needed alongside the keyword (e.g. {"minimum": 0} for exclusiveMinimum). default_error_code: Expected error code for rejected types. @@ -36,6 +40,7 @@ class BsonTypeTestCase(BaseTestCase): keyword: Optional[str] = None valid_types: Optional[list] = None + skip_rejection_types: Optional[list] = None requires: Optional[dict] = None default_error_code: int = TYPE_MISMATCH_ERROR error_code_overrides: Optional[dict] = None @@ -55,8 +60,9 @@ def generate_bson_rejection_test_cases(params): cases = [] for spec in params: accepted = set(spec.valid_types) + skipped = set(spec.skip_rejection_types or []) for bson_type in BsonType: - if bson_type in accepted: + if bson_type in accepted or bson_type in skipped: continue sample_value = BSON_TYPE_SAMPLES[bson_type] test_id = f"reject_{bson_type.value}_for_{spec.id}"