diff --git a/lms/djangoapps/certificates/api.py b/lms/djangoapps/certificates/api.py index aed7d1559d91..9c5fc9fffb3e 100644 --- a/lms/djangoapps/certificates/api.py +++ b/lms/djangoapps/certificates/api.py @@ -24,6 +24,7 @@ from common.djangoapps.student.models import CourseEnrollment from lms.djangoapps.branding import api as branding_api from lms.djangoapps.certificates.config import AUTO_CERTIFICATE_GENERATION as _AUTO_CERTIFICATE_GENERATION +from lms.djangoapps.certificates.config import ENABLE_REDACT_HISTORICAL_PII_RETIREMENT from lms.djangoapps.certificates.data import CertificateStatuses, GeneratedCertificateData from lms.djangoapps.certificates.generation_handler import generate_certificate_task as _generate_certificate_task from lms.djangoapps.certificates.generation_handler import is_on_certificate_allowlist as _is_on_certificate_allowlist @@ -977,6 +978,10 @@ def clear_pii_from_certificate_records_for_user(user): model's custom `save()` function, nor fire any Django signals (which is desired at the time of writing). There is nothing to update in our external systems by this update. + When the ``certificates.enable_redact_historical_pii_retirement`` waffle flag is enabled, the + django-simple-history audit table ``certificates_historicalgeneratedcertificate`` is also updated so that + no snapshot retains the learner's name. + Args: user (User): The User instance of the learner actively being retired. @@ -984,6 +989,8 @@ def clear_pii_from_certificate_records_for_user(user): None """ GeneratedCertificate.objects.filter(user=user).update(name="") + if ENABLE_REDACT_HISTORICAL_PII_RETIREMENT.is_enabled(): + GeneratedCertificate.history.filter(user=user).update(name="") def get_cert_history_for_course_id(course_id): diff --git a/lms/djangoapps/certificates/config.py b/lms/djangoapps/certificates/config.py index 1a5fd387581b..217cbecb1cec 100644 --- a/lms/djangoapps/certificates/config.py +++ b/lms/djangoapps/certificates/config.py @@ -2,7 +2,7 @@ This module contains various configuration settings via waffle switches for the Certificates app. """ -from edx_toggles.toggles import WaffleSwitch +from edx_toggles.toggles import WaffleFlag, WaffleSwitch # Namespace WAFFLE_NAMESPACE = 'certificates' @@ -14,3 +14,14 @@ # .. toggle_use_cases: open_edx # .. toggle_creation_date: 2017-09-14 AUTO_CERTIFICATE_GENERATION = WaffleSwitch(f"{WAFFLE_NAMESPACE}.auto_certificate_generation", __name__) + +# .. toggle_name: certificates.enable_redact_historical_pii_retirement +# .. toggle_implementation: WaffleFlag +# .. toggle_default: False +# .. toggle_description: Clears the `name` field in the django-simple-history audit table for +# retiring users' certificate records. +# .. toggle_use_cases: open_edx +# .. toggle_creation_date: 2026-05-29 +ENABLE_REDACT_HISTORICAL_PII_RETIREMENT = WaffleFlag( + f"{WAFFLE_NAMESPACE}.enable_redact_historical_pii_retirement", __name__ +) diff --git a/lms/djangoapps/certificates/management/commands/purge_pii_from_generatedcertificates.py b/lms/djangoapps/certificates/management/commands/purge_pii_from_generatedcertificates.py index c478fd4fe0c5..bfd8274c9ed7 100644 --- a/lms/djangoapps/certificates/management/commands/purge_pii_from_generatedcertificates.py +++ b/lms/djangoapps/certificates/management/commands/purge_pii_from_generatedcertificates.py @@ -1,6 +1,7 @@ """ A management command, designed to be run once by Open edX Operators, to obfuscate learner PII from the -`Certificates_GeneratedCertificate` table that should have been purged during learner retirement. +`certificates_generatedcertificate` and `certificates_historicalgeneratedcertificate` tables that should have been +purged during learner retirement. A fix has been included in the retirement pipeline to properly purge this data during learner retirement. This can be used to purge PII from accounts that have already been retired. @@ -11,6 +12,7 @@ from django.contrib.auth import get_user_model from django.core.management.base import BaseCommand +from lms.djangoapps.certificates.config import ENABLE_REDACT_HISTORICAL_PII_RETIREMENT from lms.djangoapps.certificates.models import GeneratedCertificate from openedx.core.djangoapps.user_api.api import get_retired_user_ids @@ -20,11 +22,12 @@ class Command(BaseCommand): """ - This management command performs a bulk update on `GeneratedCertificate` instances. This means that it will not - invoke the custom save() function defined as part of the `GeneratedCertificate` model, and thus will not emit any - Django signals throughout the system after the update occurs. This is desired behavior. We are using this - management command to purge remnant PII, retired elsewhere in the system, that should have already been removed - from the Certificates tables. We don't need updates to propogate to external systems (like the Credentials IDA). + This management command performs bulk updates on `GeneratedCertificate` instances and their django-simple-history + audit rows in `certificates_historicalgeneratedcertificate`. This means that it will not invoke the custom save() + function defined as part of the `GeneratedCertificate` model, and thus will not emit any Django signals throughout + the system after the update occurs. This is desired behavior. We are using this management command to purge remnant + PII, retired elsewhere in the system, that should have already been removed from the Certificates tables. We don't + need updates to propogate to external systems (like the Credentials IDA). This management command functions by requesting a list of learners' user_ids whom have completed their journey through the retirement pipeline. The `get_retired_user_ids` utility function is responsible for filtering out any @@ -41,8 +44,8 @@ class Command(BaseCommand): """ help = """ - Purges learners' full names from the `Certificates_GeneratedCertificate` table if their account has been - successfully retired. + Purges learners' full names from the `certificates_generatedcertificate` and + `certificates_historicalgeneratedcertificate` tables if their account has been successfully retired. """ def add_arguments(self, parser): @@ -59,6 +62,11 @@ def handle(self, *args, **options): f"Purging `name` from the certificate records of the following users: {retired_user_ids}" ) GeneratedCertificate.objects.filter(user_id__in=retired_user_ids).update(name="") + if ENABLE_REDACT_HISTORICAL_PII_RETIREMENT.is_enabled(): + log.warning( + f"Purging `name` from the historical certificate records of the following users: {retired_user_ids}" + ) + GeneratedCertificate.history.filter(user_id__in=retired_user_ids).update(name="") else: log.info( "DRY RUN: running this management command would purge `name` data from the following users: " diff --git a/lms/djangoapps/certificates/management/commands/tests/test_purge_pii_from_generatedcertificates.py b/lms/djangoapps/certificates/management/commands/tests/test_purge_pii_from_generatedcertificates.py index 50855ccaa804..2cc52f0ff32e 100644 --- a/lms/djangoapps/certificates/management/commands/tests/test_purge_pii_from_generatedcertificates.py +++ b/lms/djangoapps/certificates/management/commands/tests/test_purge_pii_from_generatedcertificates.py @@ -4,9 +4,11 @@ from django.core.management import call_command +from edx_toggles.toggles.testutils import override_waffle_flag from testfixtures import LogCapture from common.djangoapps.student.tests.factories import UserFactory +from lms.djangoapps.certificates.config import ENABLE_REDACT_HISTORICAL_PII_RETIREMENT from lms.djangoapps.certificates.data import CertificateStatuses from lms.djangoapps.certificates.models import GeneratedCertificate from lms.djangoapps.certificates.tests.factories import GeneratedCertificateFactory @@ -82,13 +84,24 @@ def test_management_command(self): cert_for_retired_user = GeneratedCertificate.objects.get(user_id=self.user_retired) assert cert_for_retired_user.name == self.user_retired_name - call_command("purge_pii_from_generatedcertificates") + with override_waffle_flag(ENABLE_REDACT_HISTORICAL_PII_RETIREMENT, active=True): + call_command("purge_pii_from_generatedcertificates") cert_for_active_user = GeneratedCertificate.objects.get(user_id=self.user_active) assert cert_for_active_user.name == self.user_active_name cert_for_retired_user = GeneratedCertificate.objects.get(user_id=self.user_retired) assert cert_for_retired_user.name == "" + active_history_names = list( + GeneratedCertificate.history.filter(user=self.user_active).values_list("name", flat=True) + ) + assert all(n == self.user_active_name for n in active_history_names) + + retired_history_names = list( + GeneratedCertificate.history.filter(user=self.user_retired).values_list("name", flat=True) + ) + assert all(n == "" for n in retired_history_names) + def test_management_command_dry_run(self): """ Verify that the management command does not purge any data when invoked with the `--dry-run` flag @@ -103,12 +116,18 @@ def test_management_command_dry_run(self): cert_for_retired_user = GeneratedCertificate.objects.get(user_id=self.user_retired) assert cert_for_retired_user.name == self.user_retired_name - with LogCapture() as logger: - call_command("purge_pii_from_generatedcertificates", "--dry-run") + with override_waffle_flag(ENABLE_REDACT_HISTORICAL_PII_RETIREMENT, active=True): + with LogCapture() as logger: + call_command("purge_pii_from_generatedcertificates", "--dry-run") cert_for_active_user = GeneratedCertificate.objects.get(user_id=self.user_active) assert cert_for_active_user.name == self.user_active_name cert_for_retired_user = GeneratedCertificate.objects.get(user_id=self.user_retired) assert cert_for_retired_user.name == self.user_retired_name + retired_history_names = list( + GeneratedCertificate.history.filter(user=self.user_retired).values_list("name", flat=True) + ) + assert all(n == self.user_retired_name for n in retired_history_names) + assert logger.records[0].msg == expected_log_msg diff --git a/lms/djangoapps/certificates/models.py b/lms/djangoapps/certificates/models.py index 755a9c784c53..166985bf6451 100644 --- a/lms/djangoapps/certificates/models.py +++ b/lms/djangoapps/certificates/models.py @@ -170,9 +170,9 @@ class GeneratedCertificate(models.Model): # noqa: DJ008 """ Base model for generated course certificates - .. pii: PII can exist in the generated certificate linked to in this model. Certificate data is currently retained. + .. pii: PII can exist in the generated certificate linked to in this model. .. pii_types: name, username - .. pii_retirement: retained + .. pii_retirement: local_api course_id - Course run key created_date - Date and time the certificate was created diff --git a/lms/djangoapps/certificates/tests/test_api.py b/lms/djangoapps/certificates/tests/test_api.py index c2673e36703e..a1915f899f50 100644 --- a/lms/djangoapps/certificates/tests/test_api.py +++ b/lms/djangoapps/certificates/tests/test_api.py @@ -14,7 +14,7 @@ from django.test.utils import override_settings from django.urls import reverse from django.utils import timezone -from edx_toggles.toggles.testutils import override_waffle_switch +from edx_toggles.toggles.testutils import override_waffle_flag, override_waffle_switch from freezegun import freeze_time from opaque_keys.edx.keys import CourseKey from opaque_keys.edx.locator import CourseLocator @@ -54,7 +54,7 @@ remove_allowlist_entry, set_cert_generation_enabled, ) -from lms.djangoapps.certificates.config import AUTO_CERTIFICATE_GENERATION +from lms.djangoapps.certificates.config import AUTO_CERTIFICATE_GENERATION, ENABLE_REDACT_HISTORICAL_PII_RETIREMENT from lms.djangoapps.certificates.models import ( CertificateGenerationConfiguration, CertificateStatuses, @@ -1277,6 +1277,32 @@ def test_clear_pii_from_certificate_records(self): assert cert_course1.name == "" assert cert_course2.name == "" + def test_clear_pii_from_certificate_records_clears_history_table(self): + """ + Verify that `clear_pii_from_certificate_records_for_user` blanks `name` in the + django-simple-history audit table only when the ``certificates.enable_redact_historical_pii_retirement`` + waffle flag is enabled, and leaves it untouched when the flag is disabled. + """ + with override_waffle_flag(ENABLE_REDACT_HISTORICAL_PII_RETIREMENT, active=False): + clear_pii_from_certificate_records_for_user(self.user) + + history_names = list( + GeneratedCertificate.history.filter(user=self.user).values_list("name", flat=True) + ) + assert all(n == self.user_full_name for n in history_names), ( + "History rows should be untouched when the waffle flag is disabled." + ) + + with override_waffle_flag(ENABLE_REDACT_HISTORICAL_PII_RETIREMENT, active=True): + clear_pii_from_certificate_records_for_user(self.user) + + history_names_after = list( + GeneratedCertificate.history.filter(user=self.user).values_list("name", flat=True) + ) + assert all(n == "" for n in history_names_after), ( + "Expected all history rows to have name blanked after retirement." + ) + class GetCourseIdsForUsernameTests(TestCase): """