Source code for pypi_simple.html

from __future__ import annotations
from dataclasses import dataclass
import re
from typing import Optional
from urllib.parse import urljoin
from bs4 import BeautifulSoup, Tag
from .util import basejoin, check_repo_version

[docs] @dataclass class RepositoryPage: """ .. versionadded:: 1.0.0 A parsed HTML page from a :pep:`503` simple repository """ #: The repository version, if any, reported by the page in accordance with #: :pep:`629` repository_version: Optional[str] #: A list of hyperlinks found on the page links: list[Link] #: .. versionadded:: 1.4.0 #: #: ``<meta/>`` tags found on the page whose ``name`` attributes start with #: ``pypi:``. This is a dict in which the keys are ``name`` attributes #: with leading ``"pypi:"`` removed and in which the values are the #: corresponding ``content`` attributes. pypi_meta: dict[str, list[str]] @property def tracks(self) -> list[str]: """ .. versionadded:: 1.4.0 Repository "tracks" metadata. See `PEP 708`__. __ """ return self.pypi_meta.get("tracks", []) @property def alternate_locations(self) -> list[str]: """ .. versionadded:: 1.4.0 Repository "alternate locations" metadata. See `PEP 708`__. __ """ return self.pypi_meta.get("alternate-locations", [])
[docs] @classmethod def from_html( cls, html: str | bytes, base_url: Optional[str] = None, from_encoding: Optional[str] = None, ) -> RepositoryPage: """ Parse an HTML page from a simple repository into a `RepositoryPage`. :param html: the HTML to parse :type html: str or bytes :param Optional[str] base_url: an optional URL to join to the front of the links' URLs (usually the URL of the page being parsed) :param Optional[str] from_encoding: an optional hint to Beautiful Soup as to the encoding of ``html`` when it is `bytes` (usually the ``charset`` parameter of the response's :mailheader:`Content-Type` header) :rtype: RepositoryPage :raises UnsupportedRepoVersionError: if the repository version has a greater major component than the supported repository version """ soup = BeautifulSoup(html, "html.parser", from_encoding=from_encoding) base_tag = soup.find("base", href=True) if base_tag is not None: assert isinstance(base_tag, Tag) href = base_tag["href"] assert isinstance(href, str) if base_url is None: base_url = href else: base_url = urljoin(base_url, href) meta: dict[str, list[str]] = {} for tag in soup.find_all( "meta", attrs={"name": re.compile(r"^pypi:"), "content": True} ): assert isinstance(tag, Tag) name = tag["name"] assert isinstance(name, str) assert name.startswith("pypi:") content = tag["content"] assert isinstance(content, str) meta.setdefault(name[5:], []).append(content) try: repository_version = meta["repository-version"][0] except LookupError: repository_version = None if repository_version is not None: check_repo_version(repository_version) links = [] for link in soup.find_all("a", href=True): links.append( Link( text="".join(link.strings).strip(), url=basejoin(base_url, link["href"]), attrs=link.attrs, ) ) return cls(repository_version=repository_version, links=links, pypi_meta=meta)