Skip to content
7 changes: 7 additions & 0 deletions lms/djangoapps/certificates/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from common.djangoapps.student.models import CourseEnrollment
from lms.djangoapps.branding import api as branding_api
from lms.djangoapps.certificates.config import AUTO_CERTIFICATE_GENERATION as _AUTO_CERTIFICATE_GENERATION
from lms.djangoapps.certificates.config import ENABLE_REDACT_HISTORICAL_PII_RETIREMENT
from lms.djangoapps.certificates.data import CertificateStatuses, GeneratedCertificateData
from lms.djangoapps.certificates.generation_handler import generate_certificate_task as _generate_certificate_task
from lms.djangoapps.certificates.generation_handler import is_on_certificate_allowlist as _is_on_certificate_allowlist
Expand Down Expand Up @@ -977,13 +978,19 @@ def clear_pii_from_certificate_records_for_user(user):
model's custom `save()` function, nor fire any Django signals (which is desired at the time of writing). There is
nothing to update in our external systems by this update.

When the ``certificates.enable_redact_historical_pii_retirement`` waffle flag is enabled, the
django-simple-history audit table ``certificates_historicalgeneratedcertificate`` is also updated so that
no snapshot retains the learner's name.

Args:
user (User): The User instance of the learner actively being retired.

Returns:
None
"""
GeneratedCertificate.objects.filter(user=user).update(name="")
if ENABLE_REDACT_HISTORICAL_PII_RETIREMENT.is_enabled():
GeneratedCertificate.history.filter(user=user).update(name="")


def get_cert_history_for_course_id(course_id):
Expand Down
13 changes: 12 additions & 1 deletion lms/djangoapps/certificates/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
This module contains various configuration settings via
waffle switches for the Certificates app.
"""
from edx_toggles.toggles import WaffleSwitch
from edx_toggles.toggles import WaffleFlag, WaffleSwitch

# Namespace
WAFFLE_NAMESPACE = 'certificates'
Expand All @@ -14,3 +14,14 @@
# .. toggle_use_cases: open_edx
# .. toggle_creation_date: 2017-09-14
AUTO_CERTIFICATE_GENERATION = WaffleSwitch(f"{WAFFLE_NAMESPACE}.auto_certificate_generation", __name__)

# .. toggle_name: certificates.enable_redact_historical_pii_retirement
# .. toggle_implementation: WaffleFlag
# .. toggle_default: False
# .. toggle_description: Clears the `name` field in the django-simple-history audit table for
# retiring users' certificate records.
# .. toggle_use_cases: open_edx
# .. toggle_creation_date: 2026-05-29
ENABLE_REDACT_HISTORICAL_PII_RETIREMENT = WaffleFlag(
f"{WAFFLE_NAMESPACE}.enable_redact_historical_pii_retirement", __name__
)
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
A management command, designed to be run once by Open edX Operators, to obfuscate learner PII from the
`Certificates_GeneratedCertificate` table that should have been purged during learner retirement.
`certificates_generatedcertificate` and `certificates_historicalgeneratedcertificate` tables that should have been
purged during learner retirement.

A fix has been included in the retirement pipeline to properly purge this data during learner retirement. This can be
used to purge PII from accounts that have already been retired.
Expand All @@ -11,6 +12,7 @@
from django.contrib.auth import get_user_model
from django.core.management.base import BaseCommand

from lms.djangoapps.certificates.config import ENABLE_REDACT_HISTORICAL_PII_RETIREMENT
from lms.djangoapps.certificates.models import GeneratedCertificate
from openedx.core.djangoapps.user_api.api import get_retired_user_ids

Expand All @@ -20,11 +22,12 @@

class Command(BaseCommand):
"""
This management command performs a bulk update on `GeneratedCertificate` instances. This means that it will not
invoke the custom save() function defined as part of the `GeneratedCertificate` model, and thus will not emit any
Django signals throughout the system after the update occurs. This is desired behavior. We are using this
management command to purge remnant PII, retired elsewhere in the system, that should have already been removed
from the Certificates tables. We don't need updates to propogate to external systems (like the Credentials IDA).
This management command performs bulk updates on `GeneratedCertificate` instances and their django-simple-history
audit rows in `certificates_historicalgeneratedcertificate`. This means that it will not invoke the custom save()
function defined as part of the `GeneratedCertificate` model, and thus will not emit any Django signals throughout
the system after the update occurs. This is desired behavior. We are using this management command to purge remnant
PII, retired elsewhere in the system, that should have already been removed from the Certificates tables. We don't
need updates to propogate to external systems (like the Credentials IDA).

This management command functions by requesting a list of learners' user_ids whom have completed their journey
through the retirement pipeline. The `get_retired_user_ids` utility function is responsible for filtering out any
Expand All @@ -41,8 +44,8 @@ class Command(BaseCommand):
"""

help = """
Purges learners' full names from the `Certificates_GeneratedCertificate` table if their account has been
successfully retired.
Purges learners' full names from the `certificates_generatedcertificate` and
`certificates_historicalgeneratedcertificate` tables if their account has been successfully retired.
"""

def add_arguments(self, parser):
Expand All @@ -59,6 +62,11 @@ def handle(self, *args, **options):
f"Purging `name` from the certificate records of the following users: {retired_user_ids}"
)
GeneratedCertificate.objects.filter(user_id__in=retired_user_ids).update(name="")
if ENABLE_REDACT_HISTORICAL_PII_RETIREMENT.is_enabled():
log.warning(
f"Purging `name` from the historical certificate records of the following users: {retired_user_ids}"
)
GeneratedCertificate.history.filter(user_id__in=retired_user_ids).update(name="")
else:
log.info(
"DRY RUN: running this management command would purge `name` data from the following users: "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@


from django.core.management import call_command
from edx_toggles.toggles.testutils import override_waffle_flag
from testfixtures import LogCapture

from common.djangoapps.student.tests.factories import UserFactory
from lms.djangoapps.certificates.config import ENABLE_REDACT_HISTORICAL_PII_RETIREMENT
from lms.djangoapps.certificates.data import CertificateStatuses
from lms.djangoapps.certificates.models import GeneratedCertificate
from lms.djangoapps.certificates.tests.factories import GeneratedCertificateFactory
Expand Down Expand Up @@ -82,13 +84,24 @@ def test_management_command(self):
cert_for_retired_user = GeneratedCertificate.objects.get(user_id=self.user_retired)
assert cert_for_retired_user.name == self.user_retired_name

call_command("purge_pii_from_generatedcertificates")
with override_waffle_flag(ENABLE_REDACT_HISTORICAL_PII_RETIREMENT, active=True):
call_command("purge_pii_from_generatedcertificates")

cert_for_active_user = GeneratedCertificate.objects.get(user_id=self.user_active)
assert cert_for_active_user.name == self.user_active_name
cert_for_retired_user = GeneratedCertificate.objects.get(user_id=self.user_retired)
assert cert_for_retired_user.name == ""

active_history_names = list(
GeneratedCertificate.history.filter(user=self.user_active).values_list("name", flat=True)
)
assert all(n == self.user_active_name for n in active_history_names)

retired_history_names = list(
GeneratedCertificate.history.filter(user=self.user_retired).values_list("name", flat=True)
)
assert all(n == "" for n in retired_history_names)

def test_management_command_dry_run(self):
"""
Verify that the management command does not purge any data when invoked with the `--dry-run` flag
Expand All @@ -103,12 +116,18 @@ def test_management_command_dry_run(self):
cert_for_retired_user = GeneratedCertificate.objects.get(user_id=self.user_retired)
assert cert_for_retired_user.name == self.user_retired_name

with LogCapture() as logger:
call_command("purge_pii_from_generatedcertificates", "--dry-run")
with override_waffle_flag(ENABLE_REDACT_HISTORICAL_PII_RETIREMENT, active=True):
with LogCapture() as logger:
call_command("purge_pii_from_generatedcertificates", "--dry-run")

cert_for_active_user = GeneratedCertificate.objects.get(user_id=self.user_active)
assert cert_for_active_user.name == self.user_active_name
cert_for_retired_user = GeneratedCertificate.objects.get(user_id=self.user_retired)
assert cert_for_retired_user.name == self.user_retired_name

retired_history_names = list(
GeneratedCertificate.history.filter(user=self.user_retired).values_list("name", flat=True)
)
assert all(n == self.user_retired_name for n in retired_history_names)

assert logger.records[0].msg == expected_log_msg
4 changes: 2 additions & 2 deletions lms/djangoapps/certificates/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,9 +170,9 @@ class GeneratedCertificate(models.Model): # noqa: DJ008
"""
Base model for generated course certificates

.. pii: PII can exist in the generated certificate linked to in this model. Certificate data is currently retained.
.. pii: PII can exist in the generated certificate linked to in this model.
.. pii_types: name, username
.. pii_retirement: retained
.. pii_retirement: local_api

course_id - Course run key
created_date - Date and time the certificate was created
Expand Down
30 changes: 28 additions & 2 deletions lms/djangoapps/certificates/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from django.test.utils import override_settings
from django.urls import reverse
from django.utils import timezone
from edx_toggles.toggles.testutils import override_waffle_switch
from edx_toggles.toggles.testutils import override_waffle_flag, override_waffle_switch
from freezegun import freeze_time
from opaque_keys.edx.keys import CourseKey
from opaque_keys.edx.locator import CourseLocator
Expand Down Expand Up @@ -54,7 +54,7 @@
remove_allowlist_entry,
set_cert_generation_enabled,
)
from lms.djangoapps.certificates.config import AUTO_CERTIFICATE_GENERATION
from lms.djangoapps.certificates.config import AUTO_CERTIFICATE_GENERATION, ENABLE_REDACT_HISTORICAL_PII_RETIREMENT
from lms.djangoapps.certificates.models import (
CertificateGenerationConfiguration,
CertificateStatuses,
Expand Down Expand Up @@ -1277,6 +1277,32 @@ def test_clear_pii_from_certificate_records(self):
assert cert_course1.name == ""
assert cert_course2.name == ""

def test_clear_pii_from_certificate_records_clears_history_table(self):
"""
Verify that `clear_pii_from_certificate_records_for_user` blanks `name` in the
django-simple-history audit table only when the ``certificates.enable_redact_historical_pii_retirement``
waffle flag is enabled, and leaves it untouched when the flag is disabled.
"""
with override_waffle_flag(ENABLE_REDACT_HISTORICAL_PII_RETIREMENT, active=False):
clear_pii_from_certificate_records_for_user(self.user)

history_names = list(
GeneratedCertificate.history.filter(user=self.user).values_list("name", flat=True)
)
assert all(n == self.user_full_name for n in history_names), (
"History rows should be untouched when the waffle flag is disabled."
)

with override_waffle_flag(ENABLE_REDACT_HISTORICAL_PII_RETIREMENT, active=True):
clear_pii_from_certificate_records_for_user(self.user)

history_names_after = list(
GeneratedCertificate.history.filter(user=self.user).values_list("name", flat=True)
)
assert all(n == "" for n in history_names_after), (
"Expected all history rows to have name blanked after retirement."
)


class GetCourseIdsForUsernameTests(TestCase):
"""
Expand Down
Loading