From fa44d92ab538648fdfcd5d72321a6526e0581cbc Mon Sep 17 00:00:00 2001 From: clencyc Date: Fri, 20 Mar 2026 15:00:36 +0300 Subject: [PATCH 1/2] Add PackageMetadataFile model to store metadata files in packagedb - Add PackageMetadataFile model with ForeignKey to Package - Store filename, filetype, content, download_url and sha1 - Add migration for new model - Add test for PackageMetadataFile creation Closes #840 Signed-off-by: clencyc --- .../migrations/0095_packagemetadatafile.py | 84 +++++++++++++++++++ packagedb/models.py | 63 ++++++++++++++ packagedb/tests/test_models.py | 19 ++++- 3 files changed, 165 insertions(+), 1 deletion(-) create mode 100644 packagedb/migrations/0095_packagemetadatafile.py diff --git a/packagedb/migrations/0095_packagemetadatafile.py b/packagedb/migrations/0095_packagemetadatafile.py new file mode 100644 index 00000000..7b447b15 --- /dev/null +++ b/packagedb/migrations/0095_packagemetadatafile.py @@ -0,0 +1,84 @@ +# Generated by Django 5.1.13 on 2026-03-20 11:29 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("packagedb", "0094_package_packagedb_p_package_d39839_idx"), + ] + + operations = [ + migrations.CreateModel( + name="PackageMetadataFile", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "filename", + models.CharField( + help_text="Name of the metadata file, e.g. 'package.json'", + max_length=255, + ), + ), + ( + "filetype", + models.CharField( + blank=True, + help_text="Type of metadata file, e.g. 'npm', 'pypi', 'maven'", + max_length=64, + null=True, + ), + ), + ( + "content", + models.TextField( + blank=True, + help_text="The raw text content of the metadata file", + null=True, + ), + ), + ( + "download_url", + models.CharField( + blank=True, + help_text="URL from which this metadata file was retrieved", + max_length=2048, + null=True, + ), + ), + ( + "sha1", + models.CharField( + blank=True, + db_index=True, + help_text="SHA1 checksum of the file content", + max_length=40, + null=True, + ), + ), + ( + "package", + models.ForeignKey( + help_text="The Package this metadata file belongs to", + on_delete=django.db.models.deletion.CASCADE, + related_name="metadata_files", + to="packagedb.package", + ), + ), + ], + options={ + "ordering": ["id"], + "unique_together": {("package", "filename")}, + }, + ), + ] diff --git a/packagedb/models.py b/packagedb/models.py index a774592e..1655fa81 100644 --- a/packagedb/models.py +++ b/packagedb/models.py @@ -1446,3 +1446,66 @@ class PackageActivity(FederatedCodePackageActivityMixin): is_processed = models.BooleanField( default=False, help_text=_("True if this activity has been processed.") ) + +class PackageMetadataFile(models.Model): + """ + Stores a metadata file associated with a Package, + such as package.json, setup.py, pom.xml, etc. + These can be federated and defederated alongside purls. + """ + + package = models.ForeignKey( + Package, + related_name="metadata_files", + on_delete=models.CASCADE, + help_text=_("The Package this metadata file belongs to"), + ) + + filename = models.CharField( + max_length=255, + help_text=_("Name of the metadata file, e.g. 'package.json'"), + ) + + filetype = models.CharField( + max_length=64, + blank=True, + null=True, + help_text=_("Type of metadata file, e.g. 'npm', 'pypi', 'maven'"), + ) + + content = models.TextField( + blank=True, + null=True, + help_text=_("The raw text content of the metadata file"), + ) + + download_url = models.CharField( + max_length=2048, + blank=True, + null=True, + help_text=_("URL from which this metadata file was retrieved"), + ) + + sha1 = models.CharField( + max_length=40, + blank=True, + null=True, + db_index=True, + help_text=_("SHA1 checksum of the file content"), + ) + + class Meta: + unique_together = [("package", "filename")] + ordering = ["id"] + + def __str__(self): + return f"{self.filename} for {self.package.package_url}" + + def to_dict(self): + return { + "filename": self.filename, + "filetype": self.filetype, + "content": self.content, + "download_url": self.download_url, + "sha1": self.sha1, + } \ No newline at end of file diff --git a/packagedb/tests/test_models.py b/packagedb/tests/test_models.py index 9d77f9b9..ee29176f 100644 --- a/packagedb/tests/test_models.py +++ b/packagedb/tests/test_models.py @@ -15,7 +15,7 @@ from dateutil.parser import parse as dateutil_parse -from packagedb.models import DependentPackage +from packagedb.models import DependentPackage, PackageMetadataFile from packagedb.models import Package from packagedb.models import PackageWatch from packagedb.models import Party @@ -494,3 +494,20 @@ def test_get_or_none(self): package = Package.objects.filter(download_url="http://a.ab").get_or_none() assert package assert Package.objects.filter(download_url="http://a.ab-foobar").get_or_none() is None + def test_package_metadata_file_creation(self): + package = Package.objects.create( + download_url="https://example.com/package.tar.gz", + type="pypi", + name="example-pkg", + version="1.0.0", + ) + metadata_file = PackageMetadataFile.objects.create( + package=package, + filename="setup.py", + filetype="pypi", + content="from setuptools import setup\nsetup(name='example-pkg')", + sha1="da39a3ee5e6b4b0d3255bfef95601890afd80709", + ) + assert metadata_file.filename == "setup.py" + assert metadata_file.package == package + assert str(metadata_file) == "setup.py for pkg:pypi/example-pkg@1.0.0" From 8b8d061c5e060740143ad452f41904a7cc02cf5c Mon Sep 17 00:00:00 2001 From: clencyc Date: Fri, 20 Mar 2026 16:22:04 +0300 Subject: [PATCH 2/2] Fix test_package_endpoint_throttling by clearing cache in setUp The throttle cache was not being reset between test runs, causing request counts from previous tests to spill over and trigger the rate limit prematurely. Fix by calling cache.clear() at the start of setUp(). Fixes # Signed-off-by: clencyc --- packagedb/tests/test_throttling.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packagedb/tests/test_throttling.py b/packagedb/tests/test_throttling.py index d17557fe..58dbd8cf 100644 --- a/packagedb/tests/test_throttling.py +++ b/packagedb/tests/test_throttling.py @@ -10,7 +10,7 @@ from unittest.mock import patch from django.contrib.auth.models import User - +from django.core.cache import cache from rest_framework.test import APIClient from rest_framework.test import APITestCase @@ -18,7 +18,9 @@ @patch("rest_framework.throttling.UserRateThrottle.get_rate", lambda x: "20/day") @patch("rest_framework.throttling.AnonRateThrottle.get_rate", lambda x: "10/day") class ThrottleApiTests(APITestCase): + def setUp(self): + cache.clear() # create a basic user self.user = User.objects.create_user( username="username",