diff --git a/CHANGELOG.md b/CHANGELOG.md index dee6e4b..5985343 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## [Version 1.3.1](https://github.com/dataiku/dss-plugin-sharepoint-online/releases/tag/v1.3.1) - Security release - 2026-04-01 + +- Adding optional whitelist mechanism on certificate preset + ## [Version 1.3.0](https://github.com/dataiku/dss-plugin-sharepoint-online/releases/tag/v1.3.0) - Security release - 2026-02-26 - Increase the version of the package cryptography to 46.0.5 and msal to 1.34.0 in response to CVE-2026-26007 diff --git a/custom-recipes/sharepoint-online-append-list/recipe.py b/custom-recipes/sharepoint-online-append-list/recipe.py index 3d29873..c1a8a37 100644 --- a/custom-recipes/sharepoint-online-append-list/recipe.py +++ b/custom-recipes/sharepoint-online-append-list/recipe.py @@ -61,6 +61,7 @@ def convert_date_format(json_row): metadata_to_retrieve.append("Title") display_metadata = len(metadata_to_retrieve) > 0 client = SharePointClient(config) +client.assert_can_write_list(sharepoint_list_title) sharepoint_writer = client.get_writer({"columns": input_schema}, None, None, max_workers, batch_size, write_mode) with output_dataset.get_writer() as writer: diff --git a/parameter-sets/app-certificate/parameter-set.json b/parameter-sets/app-certificate/parameter-set.json index 0c9ba87..5e7ddaa 100644 --- a/parameter-sets/app-certificate/parameter-set.json +++ b/parameter-sets/app-certificate/parameter-set.json @@ -63,6 +63,80 @@ "type": "PASSWORD", "description": "If required by private key", "mandatory": false + }, + { + "name": "activate_libraries_whitelist", + "label": "Whitelists libraries", + "type": "BOOLEAN", + "description": "Not advised: access rights should be handled at Azure app level" + }, + { + "name": "libraries_whitelist", + "label": "Whitelisted libraries", + "type": "OBJECT_LIST", + "description": "", + "visibilityCondition": "model.activate_libraries_whitelist === true", + "subParams": [ + { + "name": "whitelist_name", + "label": "Library name", + "type": "STRING", + "description": "/sites/YourSite/Shared Documents/your folder path" + }, + { + "name": "whitelist_rights", + "label": "Access rights", + "type": "MULTISELECT", + "description": "", + "selectChoices": [ + { + "value": "read", + "label": "Read" + }, + { + "value": "write", + "label": "Write" + } + ] + } + ] + }, + { + "name": "activate_lists_whitelist", + "label": "Whitelists lists", + "type": "BOOLEAN", + "description": "Not advised: access rights should be handled at Azure app level" + }, + { + "name": "lists_whitelist", + "label": "Whitelisted lists", + "type": "OBJECT_LIST", + "description": "", + "visibilityCondition": "model.activate_lists_whitelist === true", + "subParams": [ + { + "name": "whitelist_name", + "label": "List name", + "type": "STRING", + "description": "List ID from the list's SharePoint URL" + }, + { + "name": "whitelist_rights", + "label": "Access rights", + "type": "MULTISELECT", + "description": "", + "selectChoices": [ + { + "value": "read", + "label": "Read" + }, + { + "value": "write", + "label": "Write" + } + ] + } + ] } ] } \ No newline at end of file diff --git a/plugin.json b/plugin.json index c22d842..f41d293 100644 --- a/plugin.json +++ b/plugin.json @@ -1,6 +1,6 @@ { "id": "sharepoint-online", - "version": "1.3.0", + "version": "1.3.1", "meta": { "label": "SharePoint Online", "description": "Read and write data from/to your SharePoint Online account", diff --git a/python-connectors/sharepoint-online_lists/connector.py b/python-connectors/sharepoint-online_lists/connector.py index cee368c..7bc1878 100644 --- a/python-connectors/sharepoint-online_lists/connector.py +++ b/python-connectors/sharepoint-online_lists/connector.py @@ -59,6 +59,7 @@ def is_column_expendable(column): def generate_rows(self, dataset_schema=None, dataset_partitioning=None, partition_id=None, records_limit=-1): + self.client.assert_can_read_list(self.sharepoint_list_title) if self.client.column_ids == {}: self.client.get_read_schema() @@ -114,6 +115,7 @@ def format_row(self, row): def get_writer(self, dataset_schema=None, dataset_partitioning=None, partition_id=None, write_mode="OVERWRITE"): assert_list_title(self.sharepoint_list_title) + self.client.assert_can_write_list(self.sharepoint_list_title) if write_mode != "APPEND": write_mode = SharePointConstants.WRITE_MODE_CREATE return self.client.get_writer(dataset_schema, dataset_partitioning, partition_id, self.max_workers, self.batch_size, write_mode) diff --git a/python-fs-providers/sharepoint-online_shared-documents/fs-provider.py b/python-fs-providers/sharepoint-online_shared-documents/fs-provider.py index 204fec3..577ac79 100644 --- a/python-fs-providers/sharepoint-online_shared-documents/fs-provider.py +++ b/python-fs-providers/sharepoint-online_shared-documents/fs-provider.py @@ -50,6 +50,7 @@ def close(self): def stat(self, path): assert_valid_sharepoint_path(path) full_path = get_lnt_path(self.get_full_path(path)) + self.client.assert_can_read_path(full_path) logger.info('stat:path="{}", full_path="{}"'.format(path, full_path)) files = self.client.get_files(full_path) folders = self.client.get_folders(full_path) @@ -94,6 +95,7 @@ def browse(self, path): path = get_rel_path(path) full_path = get_lnt_path(self.get_full_path(path)) logger.info('browse:path="{}", full_path="{}"'.format(path, full_path)) + self.client.assert_can_read_path(full_path) folders = self.client.get_folders(full_path) files = self.client.get_files(full_path) @@ -157,6 +159,7 @@ def enumerate(self, path, first_non_empty): assert_valid_sharepoint_path(path) full_path = get_lnt_path(self.get_full_path(path)) logger.info('enumerate:path="{}",fullpath="{}", first_non_empty="{}"'.format(path, full_path, first_non_empty)) + self.client.assert_can_read_path(full_path) path_to_item, item_name = os.path.split(full_path) is_file = self.client.is_file(full_path) if is_file: @@ -192,6 +195,7 @@ def delete_recursive(self, path): assert_valid_sharepoint_path(path) full_path = self.get_full_path(path) logger.info('delete_recursive:path={},fullpath={}'.format(path, full_path)) + self.client.assert_can_write_path(full_path) assert_path_is_not_root(full_path) path_to_item, item_name = os.path.split(full_path.rstrip("/")) files = self.client.get_files(path_to_item) @@ -220,6 +224,8 @@ def move(self, from_path, to_path): full_from_path = self.get_full_path(from_path) full_to_path = self.get_full_path(to_path) logger.info('move:from={},to={}'.format(full_from_path, full_to_path)) + self.client.assert_can_read_path(full_from_path) + self.client.assert_can_write_path(full_to_path) self.client.move_file(full_from_path, full_to_path) # SP Online now returns {'odata.null': True} @@ -229,6 +235,7 @@ def read(self, path, stream, limit): assert_valid_sharepoint_path(path) full_path = self.get_full_path(path) logger.info('read:full_path={}'.format(full_path)) + self.client.assert_can_read_path(full_path) response = self.client.get_file_content(full_path) bio = BytesIO(response.content) shutil.copyfileobj(bio, stream) @@ -237,6 +244,7 @@ def write(self, path, stream): assert_valid_sharepoint_path(path) full_path = self.get_full_path(path) logger.info('write:path="{}", full_path="{}"'.format(path, full_path)) + self.client.assert_can_write_path(full_path) bio = BytesIO() shutil.copyfileobj(stream, bio) bio.seek(0) diff --git a/python-lib/dss_constants.py b/python-lib/dss_constants.py index d46a3e5..0b60de9 100644 --- a/python-lib/dss_constants.py +++ b/python-lib/dss_constants.py @@ -38,7 +38,7 @@ class DSSConstants(object): "sharepoint_oauth": "The access token is missing" } PATH = 'path' - PLUGIN_VERSION = "1.3.0" + PLUGIN_VERSION = "1.3.1" SECRET_PARAMETERS_KEYS = ["Authorization", "sharepoint_username", "sharepoint_password", "client_secret", "client_certificate", "passphrase"] SITE_APP_DETAILS = { "sharepoint_tenant": "The tenant name is missing", diff --git a/python-lib/sharepoint_client.py b/python-lib/sharepoint_client.py index 898dbf3..7fed808 100644 --- a/python-lib/sharepoint_client.py +++ b/python-lib/sharepoint_client.py @@ -20,6 +20,7 @@ is_empty_path, get_lnt_path, format_private_key, format_certificate_thumbprint, url_encode ) +from sharepoint_whitelist import WhiteList from safe_logger import SafeLogger @@ -50,6 +51,7 @@ def __init__(self, config, root_name_overwrite_legacy_mode=False): self.column_entity_property_name = {} self.columns_to_format = [] self.column_sharepoint_type = {} + self.whitelist = WhiteList() if config.get('auth_type') == DSSConstants.AUTH_OAUTH: logger.info("SharePointClient:sharepoint_oauth") @@ -117,6 +119,7 @@ def __init__(self, config, root_name_overwrite_legacy_mode=False): elif config.get('auth_type') == DSSConstants.AUTH_APP_CERTIFICATE: logger.info("SharePointClient:app-certificate") login_details = config.get('app_certificate') + self.whitelist = WhiteList(login_details) self.assert_login_details(DSSConstants.APP_CERTIFICATE_DETAILS, login_details) self.setup_sharepoint_online_url(login_details) self.setup_login_details(login_details) @@ -1095,6 +1098,26 @@ def is_column_displayable(self, column, display_metadata=False, metadata_to_retr return True return (not column[SharePointConstants.HIDDEN_COLUMN]) + def assert_can_read_path(self, path): + full_path = self.get_site_path(path) + full_path = "/" + full_path.strip("/") + logger.info("Testing read access to path '{}'".format(full_path)) + self.whitelist.assert_can_read_path(full_path) + + def assert_can_write_path(self, path): + full_path = self.get_site_path(path) + full_path = "/" + full_path.strip("/") + logger.info("Testing write access to path '{}'".format(full_path)) + self.whitelist.assert_can_write_path(full_path) + + def assert_can_read_list(self, list_name): + logger.info("Testing read access to list '{}'".format(list_name)) + self.whitelist.assert_can_read_list(list_name) + + def assert_can_write_list(self, list_name): + logger.info("Testing write access to list '{}'".format(list_name)) + self.whitelist.assert_can_write_list(list_name) + class SharePointSession(): diff --git a/python-lib/sharepoint_whitelist.py b/python-lib/sharepoint_whitelist.py new file mode 100644 index 0000000..eb09075 --- /dev/null +++ b/python-lib/sharepoint_whitelist.py @@ -0,0 +1,78 @@ +from safe_logger import SafeLogger + +logger = SafeLogger("sharepoint-online plugin") + + +class WhiteList(): + def __init__(self, config=None): + self.config = config or {} + self.activate_libraries_whitelist = self.config.get("activate_libraries_whitelist", False) + self.activate_lists_whitelist = self.config.get("activate_lists_whitelist", False) + self.libraries_whitelist = {} + self.lists_whitelist = {} + if self.activate_libraries_whitelist: + libraries_whitelist = self.config.get("libraries_whitelist", []) + for library in libraries_whitelist: + library_path = library.get("whitelist_name", "").strip("/").lower() + library_rights = library.get("whitelist_rights", []) + self.libraries_whitelist[library_path] = library_rights + if self.activate_lists_whitelist: + lists_whitelist = self.config.get("lists_whitelist", []) + for list_item in lists_whitelist: + list_name = list_item.get("whitelist_name", "").lower() + list_rights = list_item.get("whitelist_rights", []) + self.lists_whitelist[list_name] = list_rights + + if self.activate_libraries_whitelist or self.activate_lists_whitelist: + logger.info("Whitelisting with libraries:{} and lists:{}".format(self.libraries_whitelist, self.lists_whitelist)) + + def assert_can_read_path(self, path): + if not self.can_read_path(path): + raise Exception("This preset does not have read access to '{}'".format(path)) + + def assert_can_write_path(self, path): + if not self.can_write_path(path): + raise Exception("This preset does not have write access to '{}'".format(path)) + + def assert_can_read_list(self, list_name): + if not self.can_read_list(list_name): + raise Exception("This preset does not have read access to the list '{}'".format(list_name)) + + def assert_can_write_list(self, list_name): + if not self.can_write_list(list_name): + raise Exception("This preset does not have write access to the list '{}'".format(list_name)) + + def can_read_path(self, path): + if not self.activate_libraries_whitelist: + return True + return self.can_do("read", self.libraries_whitelist, path.strip("/").lower().split("/")) + + def can_write_path(self, path): + if not self.activate_libraries_whitelist: + return True + return self.can_do("write", self.libraries_whitelist, path.strip("/").lower().split("/")) + + def can_read_list(self, list_name): + if not self.activate_lists_whitelist: + return True + return self.can_do("read", self.lists_whitelist, list_name.lower()) + + def can_write_list(self, list_name): + if not self.activate_lists_whitelist: + return True + return self.can_do("write", self.lists_whitelist, list_name.lower()) + + def can_do(self, required_right, rights, path_to_test): + if isinstance(path_to_test, list): + for path_size in range(len(path_to_test) + 1, 0, -1): + tokens_in_path = path_to_test[0:path_size] + path_chunk_to_test = "/".join(tokens_in_path) + right_for_path = rights.get(path_chunk_to_test, None) + if right_for_path is None: + # No rights defined for that path, skipping + continue + return required_right in right_for_path + return False + else: + right_for_path = rights.get(path_to_test, []) + return required_right in right_for_path diff --git a/tests/python/integration/test_scenario.py b/tests/python/integration/test_scenario.py index d646dae..864b55c 100644 --- a/tests/python/integration/test_scenario.py +++ b/tests/python/integration/test_scenario.py @@ -61,3 +61,7 @@ def test_run_sharepoint_online_256_plus_chars_strings(user_dss_clients): def test_run_sharepoint_online_app_username_password_auth(user_dss_clients): dss_scenario.run(user_dss_clients, project_key=TEST_PROJECT_KEY, scenario_id="APPUSERNAMEPASSWORDAUTH") + + +def test_run_sharepoint_online_whitelisting(user_dss_clients): + dss_scenario.run(user_dss_clients, project_key=TEST_PROJECT_KEY, scenario_id="WHITELISTING") diff --git a/tests/python/unit/test_common.py b/tests/python/unit/test_common.py index aa6d87a..7b678ce 100644 --- a/tests/python/unit/test_common.py +++ b/tests/python/unit/test_common.py @@ -1,4 +1,5 @@ from common import get_value_from_path, is_request_performed, decode_retry_after_header +from sharepoint_whitelist import WhiteList from sharepoint_constants import SharePointConstants import pytest @@ -32,6 +33,57 @@ def setup_class(self): self.mock_response_http_429_date_in_past = MockResponse(429, {"Retry-After": "Wed, 21 Oct 2015 07:28:00 GMT"}) self.mock_response_http_429_date_in_future = MockResponse(429, {"Retry-After": "Wed, 21 Oct 9999 07:28:00 GMT"}) self.mock_response_http_429_garbage = MockResponse(429, {"Retry-After": "blablablabla"}) + self.app_certificate = { + 'libraries_whitelist': [ + { + '$$hashKey': 'object:540', + 'whitelist_rights': ['read'], + 'whitelist_name': 'site/Path/Shared Documents 1' + }, + { + '$$hashKey': 'object:540', + 'whitelist_rights': ['read'], + 'whitelist_name': '/site/Path/Shared Documents 2' + }, + { + '$$hashKey': 'object:540', + 'whitelist_rights': ['read'], + 'whitelist_name': 'site/Path/Shared Documents 3/' + }, + { + '$$hashKey': 'object:540', + 'whitelist_rights': ['read'], + 'whitelist_name': '/site/Path/Shared Documents 4/' + }, + { + '$$hashKey': 'object:540', + 'whitelist_rights': ['read'], + 'whitelist_name': '/site/Path/Shared Documents/' + }, + { + '$$hashKey': 'object:540', + 'whitelist_rights': [], + 'whitelist_name': '/site/Path/Shared Documents/subfolder/secret' + } + ], + 'lists_whitelist': [ + { + '$$hashKey': 'object:540', + 'whitelist_rights': ['read'], + 'whitelist_name': 'CanRead' + }, { + '$$hashKey': 'object:540', + 'whitelist_rights': ['read', 'write'], + 'whitelist_name': 'Can write' + }, { + '$$hashKey': 'object:540', + 'whitelist_rights': [], + 'whitelist_name': 'Cannot see' + } + ], + 'activate_lists_whitelist': True, + 'activate_libraries_whitelist': True + } def test_get_value_from_path_long_path(self): key = get_value_from_path(self.dictionary_to_search, self.ok_path_1) @@ -85,3 +137,88 @@ def test_decode_retry_after_header_garbage(self): def test_decode_retry_after_header_no_header(self): seconds_before_retry = decode_retry_after_header(self.mock_response_http_429_no_header) assert seconds_before_retry == SharePointConstants.DEFAULT_WAIT_BEFORE_RETRY + + def test_whitelist_read_start_slash_series_1(self): + whitelist = WhiteList(self.app_certificate) + assert whitelist.can_read_path("site/Path/Shared Documents 1") is True + assert whitelist.can_read_path("/site/Path/Shared Documents 1") is True + assert whitelist.can_read_path("site/Path/Shared Documents 1/subfolder") is True + assert whitelist.can_read_path("/site/Path/Shared Documents 1/subfolder") is True + + def test_whitelist_read_start_slash_series_2(self): + whitelist = WhiteList(self.app_certificate) + assert whitelist.can_read_path("site/Path/Shared Documents 2") is True + assert whitelist.can_read_path("/site/Path/Shared Documents 2") is True + assert whitelist.can_read_path("site/Path/Shared Documents 2/subfolder") is True + assert whitelist.can_read_path("/site/Path/Shared Documents 2/subfolder") is True + + def test_whitelist_read_start_slash_series_3(self): + whitelist = WhiteList(self.app_certificate) + assert whitelist.can_read_path("site/Path/Shared Documents 3") is True + assert whitelist.can_read_path("/site/Path/Shared Documents 3") is True + assert whitelist.can_read_path("site/Path/Shared Documents 3/subfolder") is True + assert whitelist.can_read_path("/site/Path/Shared Documents 3/subfolder") is True + + def test_whitelist_read_start_slash_series_4(self): + whitelist = WhiteList(self.app_certificate) + assert whitelist.can_read_path("site/Path/Shared Documents 4") is True + assert whitelist.can_read_path("/site/Path/Shared Documents 4") is True + assert whitelist.can_read_path("site/Path/Shared Documents 4/subfolder") is True + assert whitelist.can_read_path("/site/Path/Shared Documents 4/subfolder") is True + + def test_whitelist_read_start_slash_series_5(self): + whitelist = WhiteList(self.app_certificate) + assert whitelist.can_read_path("site/Path/Shared Documents 5") is False + assert whitelist.can_read_path("/site/Path/Shared Documents 5") is False + assert whitelist.can_read_path("site/Path/Shared Documents 5/subfolder") is False + assert whitelist.can_read_path("/site/Path/Shared Documents 5/subfolder") is False + + def test_whitelist_read_casing(self): + whitelist = WhiteList(self.app_certificate) + assert whitelist.can_read_path("Site/path/shared documents 4/subFolder") is True + + def test_whitelist_read_list(self): + whitelist = WhiteList(self.app_certificate) + assert whitelist.can_read_list("CanRead") is True + + def test_whitelist_cannot_write_list(self): + whitelist = WhiteList(self.app_certificate) + assert whitelist.can_write_list("CanRead") is False + + def test_whitelist_list_casing_cannot(self): + whitelist = WhiteList(self.app_certificate) + assert whitelist.can_write_list("canread") is False + + def test_whitelist_list_casing(self): + whitelist = WhiteList(self.app_certificate) + assert whitelist.can_read_list("canread") is True + + def test_whitelist_list_not_lib(self): + local_app_certificate = self.app_certificate.copy() + local_app_certificate.pop("activate_libraries_whitelist", None) + whitelist = WhiteList(local_app_certificate) + assert whitelist.can_read_list("canread") is True + assert whitelist.can_write_list("canread") is False + assert whitelist.can_read_path("/site/Path/Shared Documents 5/subfolder") is True + assert whitelist.can_write_path("/site/Path/Shared Documents 2/subfolder") is True + + def test_whitelist_lib_not_list(self): + local_app_certificate = self.app_certificate.copy() + local_app_certificate.pop("activate_lists_whitelist", None) + whitelist = WhiteList(local_app_certificate) + assert whitelist.can_read_list("random") is True + assert whitelist.can_write_list("random") is True + assert whitelist.can_read_path("/site/Path/Shared Documents 2/subfolder") is True + assert whitelist.can_write_path("/site/Path/Shared Documents 2/subfolder") is False + assert whitelist.can_read_path("/site/Path/Shared Documents 5/subfolder") is False + + def test_whitelist_lib_no_right(self): + whitelist = WhiteList(self.app_certificate) + assert whitelist.can_read_path("/site/Path/Shared Documents") is True + assert whitelist.can_read_path("/site/Path/Shared Documents/subfolder") is True + assert whitelist.can_read_path("/site/Path/Shared Documents/subfolder/secret") is False + + def test_whitelist_list_no_right(self): + whitelist = WhiteList(self.app_certificate) + assert whitelist.can_read_list("Cannot see") is False + assert whitelist.can_write_list("Cannot see") is False