Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 23 additions & 15 deletions src/poetry/repositories/link_sources/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from packaging.utils import canonicalize_name
from poetry.core.constraints.version import Version
from poetry.core.packages.package import Package
from poetry.core.packages.utils.utils import splitext
from poetry.core.version.exceptions import InvalidVersionError

from poetry.utils.patterns import sdist_file_re
Expand All @@ -19,12 +20,17 @@

if TYPE_CHECKING:
from collections import defaultdict
from collections.abc import Callable
from collections.abc import Iterator

from packaging.utils import NormalizedName
from poetry.core.packages.utils.link import Link

LinkCache = defaultdict[NormalizedName, defaultdict[Version, list[Link]]]
# The cache stores factories that build a Link on demand, so that Links are
# only constructed for the (few) versions actually retrieved rather than for
# every file listed by the repository.
LinkFactory = Callable[[], Link]
LinkCache = defaultdict[NormalizedName, defaultdict[Version, list[LinkFactory]]]


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -76,29 +82,30 @@ def packages(self) -> Iterator[Package]:
@property
def links(self) -> Iterator[Link]:
for links_per_version in self._link_cache.values():
for links in links_per_version.values():
yield from links
for link_factories in links_per_version.values():
for make_link in link_factories:
yield make_link()

@classmethod
def _link_package_name_and_version(
cls, link: Link
cls, filename: str
) -> tuple[NormalizedName, Version] | None:
"""Extract just the (normalized name, version) for a link.
"""Extract just the (normalized name, version) from a filename.

This is the hot path used when building the link cache: it avoids the
cost of constructing a full `Package` (which initializes a large
number of attributes) when only the name and version are needed as
cache keys.
This is the hot path used when building the link cache: it works on the
filename alone so that the cache can be populated without constructing a
`Link` (let alone a full `Package`) for every file. The `Link` for a
given file is only built when its version is actually retrieved.
"""
name: str | None = None
version_string: str | None = None
m = wheel_file_re.match(link.filename) or sdist_file_re.match(link.filename)
m = wheel_file_re.match(filename) or sdist_file_re.match(filename)

if m:
name = m.group("name")
version_string = m.group("ver")
else:
info, _ext = link.splitext()
info, _ext = splitext(filename, is_filename=True)
match = cls.VERSION_REGEX.match(info)
if match:
name = match.group(1)
Expand All @@ -111,8 +118,8 @@ def _link_package_name_and_version(
version = Version.parse(version_string)
except InvalidVersionError:
logger.debug(
"Skipping url (%s) due to invalid version (%s)",
link.url,
"Skipping file (%s) due to invalid version (%s)",
filename,
version_string,
)
return None
Expand All @@ -121,7 +128,7 @@ def _link_package_name_and_version(

@classmethod
def link_package_data(cls, link: Link) -> Package | None:
name_and_version = cls._link_package_name_and_version(link)
name_and_version = cls._link_package_name_and_version(link.filename)
if name_and_version is None:
return None

Expand All @@ -131,7 +138,8 @@ def link_package_data(cls, link: Link) -> Package | None:
def links_for_version(
self, name: NormalizedName, version: Version
) -> Iterator[Link]:
yield from self._link_cache[name][version]
for make_link in self._link_cache[name][version]:
yield make_link()

def clean_link(self, url: str) -> str:
"""Makes sure a link is fully encoded. That is, if a ' ' shows up in
Expand Down
14 changes: 12 additions & 2 deletions src/poetry/repositories/link_sources/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@

if TYPE_CHECKING:
from poetry.repositories.link_sources.base import LinkCache
from poetry.repositories.link_sources.base import LinkFactory


def _const_factory(link: Link) -> LinkFactory:
"""Wrap an already-built link in a factory for the link cache."""
return lambda: link


class HTMLPage(LinkSource):
Expand Down Expand Up @@ -60,10 +66,14 @@ def _link_cache(self) -> LinkCache:
if link.ext not in self.SUPPORTED_FORMATS:
continue

name_and_version = self._link_package_name_and_version(link)
# The HTML API has no separate filename field, so the filename
# (needed to parse name and version) has to be derived from the
# URL, which means the Link is built eagerly here. The cache
# stores factories, so it is wrapped in one that just returns it.
name_and_version = self._link_package_name_and_version(link.filename)
if name_and_version:
name, version = name_and_version
links[name][version].append(link)
links[name][version].append(_const_factory(link))

return links

Expand Down
72 changes: 38 additions & 34 deletions src/poetry/repositories/link_sources/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@

from collections import defaultdict
from functools import cached_property
from functools import partial
from typing import TYPE_CHECKING
from typing import Any

from poetry.core.packages.utils.link import Link
from poetry.core.packages.utils.utils import splitext

from poetry.repositories.link_sources.base import LinkSource
from poetry.repositories.link_sources.base import SimpleRepositoryRootPage
Expand All @@ -25,49 +27,51 @@ def __init__(self, url: str, content: dict[str, Any]) -> None:

@cached_property
def _link_cache(self) -> LinkCache:
# Only the filename is needed to enumerate the available versions, so we
# defer building the Link (and cleaning its URL) to _make_link, which is
# only called when the version's links are actually retrieved. For large
# projects this avoids constructing tens of thousands of Link objects
# that are never used during resolution.
links: LinkCache = defaultdict(lambda: defaultdict(list))
for file in self.content["files"]:
url = self.clean_link(make_absolute_url(file["url"], self._url))
requires_python = file.get("requires-python")
hashes = file.get("hashes", {})
yanked = file.get("yanked", False)
size = file.get("size")
upload_time = file.get("upload-time")

# see https://peps.python.org/pep-0714/#clients
# and https://peps.python.org/pep-0691/#project-detail
metadata: dict[str, str] | bool = False
for metadata_key in ("core-metadata", "dist-info-metadata"):
if metadata_key in file:
metadata_value = file[metadata_key]
if metadata_value and isinstance(metadata_value, dict):
metadata = metadata_value
else:
metadata = bool(metadata_value)
break

# use filename for performance (and strictly speaking also for correctness)
link = Link(
url,
filename=file["filename"],
requires_python=requires_python,
hashes=hashes,
yanked=yanked,
metadata=metadata,
size=size,
upload_time=upload_time,
)

if link.ext not in self.SUPPORTED_FORMATS:
filename = file["filename"]
if splitext(filename, is_filename=True)[1] not in self.SUPPORTED_FORMATS:
continue

name_and_version = self._link_package_name_and_version(link)
name_and_version = self._link_package_name_and_version(filename)
if name_and_version:
name, version = name_and_version
links[name][version].append(link)
links[name][version].append(partial(self._make_link, file))

return links

def _make_link(self, file: dict[str, Any]) -> Link:
url = self.clean_link(make_absolute_url(file["url"], self._url))

# see https://peps.python.org/pep-0714/#clients
# and https://peps.python.org/pep-0691/#project-detail
metadata: dict[str, str] | bool = False
for metadata_key in ("core-metadata", "dist-info-metadata"):
if metadata_key in file:
metadata_value = file[metadata_key]
if metadata_value and isinstance(metadata_value, dict):
metadata = metadata_value
else:
metadata = bool(metadata_value)
break

# use filename for performance (and strictly speaking also for correctness)
return Link(
url,
filename=file["filename"],
requires_python=file.get("requires-python"),
hashes=file.get("hashes", {}),
yanked=file.get("yanked", False),
metadata=metadata,
size=file.get("size"),
upload_time=file.get("upload-time"),
)


class SimpleRepositoryJsonRootPage(SimpleRepositoryRootPage):
"""
Expand Down
8 changes: 5 additions & 3 deletions tests/repositories/link_sources/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,12 @@ def link_source(mocker: MockerFixture) -> LinkSource:
list,
{
Version.parse("0.1.0"): [
Link(f"{url}/demo-0.1.0.tar.gz"),
Link(f"{url}/demo-0.1.0-py2.py3-none-any.whl"),
lambda: Link(f"{url}/demo-0.1.0.tar.gz"),
lambda: Link(f"{url}/demo-0.1.0-py2.py3-none-any.whl"),
],
Version.parse("0.1.1"): [
lambda: Link(f"{url}/demo-0.1.1.tar.gz")
],
Version.parse("0.1.1"): [Link(f"{url}/demo-0.1.1.tar.gz")],
},
),
},
Expand Down
Loading