Source code for pypi_simple.classes

from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime
import re
from typing import Any, Optional
from urllib.parse import urlparse, urlunparse
from mailbits import ContentType
import requests
from .errors import UnparsableFilenameError, UnsupportedContentTypeError
from .filenames import parse_filename
from .html import Link, RepositoryPage
from .pep691 import File, Project, ProjectList
from .util import basejoin, check_repo_version, url_add_suffix


[docs] @dataclass class DistributionPackage: """ Information about a versioned archive file from which a Python project release can be installed .. versionchanged:: 1.0.0 ``yanked`` field replaced with `is_yanked` and `yanked_reason` """ #: The basename of the package file filename: str #: The URL from which the package file can be downloaded, with any hash #: digest fragment removed url: str #: The name of the project (as extracted from the filename), or `None` if #: the filename cannot be parsed project: Optional[str] #: The project version (as extracted from the filename), or `None` if the #: filename cannot be parsed version: Optional[str] #: The type of the package, or `None` if the filename cannot be parsed. #: The recognized package types are: #: #: - ``"dumb"`` #: - ``"egg"`` #: - ``"msi"`` #: - ``"rpm"`` #: - ``"sdist"`` #: - ``"wheel"`` #: - ``"wininst"`` package_type: Optional[str] #: A collection of hash digests for the file as a `dict` mapping hash #: algorithm names to hex-encoded digest strings digests: dict[str, str] #: An optional version specifier string declaring the Python version(s) in #: which the package can be installed requires_python: Optional[str] #: Whether the package file is accompanied by a PGP signature file. This #: is `None` if the package repository does not report such information. has_sig: Optional[bool] #: Whether the package file has been "yanked" from the package repository #: (meaning that it should only be installed when that specific version is #: requested) is_yanked: bool = False #: If the package file has been "yanked" and a reason is given, this #: attribute will contain that (possibly empty) reason yanked_reason: Optional[str] = None #: Whether the package file is accompanied by a Core Metadata file. This #: is `None` if the package repository does not report such information. has_metadata: Optional[bool] = None #: If the package repository provides a Core Metadata file for the package, #: this is a (possibly empty) `dict` of digests of the file, given as a #: mapping from hash algorithm names to hex-encoded digest strings; #: otherwise, it is `None` metadata_digests: Optional[dict[str, str]] = None #: .. versionadded:: 1.1.0 #: #: The size of the package file in bytes, or `None` if not specified #: [#pep700]_. size: Optional[int] = None #: .. versionadded:: 1.1.0 #: #: The time at which the package file was uploaded to the server, or `None` #: if not specified [#pep700]_. upload_time: Optional[datetime] = None #: .. versionadded:: 1.6.0 #: #: The SHA 256 digest of the package file's :pep:`740` ``.provenance`` #: file. #: #: If `provenance_sha256` is non-`None`, then the package repository #: provides a ``.provenance`` file for the package. If it is `None`, no #: conclusions can be drawn. provenance_sha256: Optional[str] = None @property def sig_url(self) -> str: """ The URL of the package file's PGP signature file, if it exists; cf. `has_sig` """ return url_add_suffix(self.url, ".asc") @property def metadata_url(self) -> str: """ The URL of the package file's Core Metadata file, if it exists; cf. `has_metadata` """ return url_add_suffix(self.url, ".metadata") @property def provenance_url(self) -> str: """ The URL of the package file's :pep:`740` ``.provenance`` file, if it exists; cf. `provenance_sha256` """ return url_add_suffix(self.url, ".provenance")
[docs] @classmethod def from_json_data( cls, data: Any, project_hint: Optional[str] = None, base_url: Optional[str] = None, ) -> DistributionPackage: """ Construct a `DistributionPackage` from an object taken from the ``"files"`` field of a :pep:`691` project detail JSON response. :param data: a file dictionary :param Optional[str] project_hint: Optionally, the expected value for the project name (usually the name of the project page on which the link was found). The name does not need to be normalized. :param Optional[str] base_url: an optional URL to join to the front of a relative file URL (usually the URL of the page being parsed) :rtype: DistributionPackage :raises ValueError: if ``data`` is not a `dict` """ return cls.from_file(File.model_validate(data), project_hint, base_url)
@classmethod def from_file( cls, file: File, project_hint: Optional[str] = None, base_url: Optional[str] = None, ) -> DistributionPackage: """:meta private:""" try: project, version, pkg_type = parse_filename(file.filename, project_hint) except UnparsableFilenameError: project = None version = None pkg_type = None return cls( filename=file.filename, url=basejoin(base_url, file.url), has_sig=file.gpg_sig, requires_python=file.requires_python, project=project, version=version, package_type=pkg_type, is_yanked=file.is_yanked, yanked_reason=file.yanked_reason, digests=file.hashes, metadata_digests=file.metadata_digests, has_metadata=file.has_metadata, size=file.size, upload_time=file.upload_time, provenance_sha256=file.provenance, )
[docs] @dataclass class ProjectPage: """A parsed project page from a simple repository""" #: The name of the project the page is for project: str #: A list of packages (as `DistributionPackage` objects) listed on the #: project page packages: list[DistributionPackage] #: The repository version reported by the page, or `None` if not specified repository_version: Optional[str] #: The value of the :mailheader:`X-PyPI-Last-Serial` response header #: returned when fetching the page, or `None` if not specified last_serial: Optional[str] #: .. versionadded:: 1.1.0 #: #: A list of the project's versions, or `None` if not specified [#pep700]_. versions: Optional[list[str]] = None #: .. versionadded:: 1.4.0 #: #: Repository "tracks" metadata. See `PEP 708`__. #: #: __ https://peps.python.org/pep-0708/#repository-tracks-metadata tracks: list[str] = field(default_factory=list) #: .. versionadded:: 1.4.0 #: #: Repository "alternate locations" metadata. See `PEP 708`__. #: #: __ https://peps.python.org/pep-0708/#alternate-locations-metadata alternate_locations: list[str] = field(default_factory=list)
[docs] @classmethod def from_html( cls, project: str, html: str | bytes, base_url: Optional[str] = None, from_encoding: Optional[str] = None, ) -> ProjectPage: """ .. versionadded:: 1.0.0 Parse an HTML project page from a simple repository into a `ProjectPage`. Note that the `last_serial` attribute will be `None`. :param str project: The name of the project whose page is being parsed :param html: the HTML to parse :type html: str or bytes :param Optional[str] base_url: an optional URL to join to the front of the packages' URLs (usually the URL of the page being parsed) :param Optional[str] from_encoding: an optional hint to Beautiful Soup as to the encoding of ``html`` when it is `bytes` (usually the ``charset`` parameter of the response's :mailheader:`Content-Type` header) :rtype: ProjectPage :raises UnsupportedRepoVersionError: if the repository version has a greater major component than the supported repository version """ page = RepositoryPage.from_html(html, base_url, from_encoding) return cls( project=project, packages=[ DistributionPackage.from_link(link, project) for link in page.links ], repository_version=page.repository_version, last_serial=None, versions=None, tracks=page.tracks, alternate_locations=page.alternate_locations, )
[docs] @classmethod def from_json_data(cls, data: Any, base_url: Optional[str] = None) -> ProjectPage: """ .. versionadded:: 1.0.0 Parse an object decoded from an :mimetype:`application/vnd.pypi.simple.v1+json` response (See :pep:`691`) into a `ProjectPage`. The `last_serial` attribute will be set to the value of the ``.meta._last-serial`` field, if any. :param data: The decoded body of the JSON response :param Optional[str] base_url: an optional URL to join to the front of any relative file URLs (usually the URL of the page being parsed) :rtype: ProjectPage :raises ValueError: if ``data`` is not a `dict` :raises UnsupportedRepoVersionError: if the repository version has a greater major component than the supported repository version """ project = Project.model_validate(data) check_repo_version(project.meta.api_version) return ProjectPage( project=project.name, packages=[ DistributionPackage.from_file(f, project.name, base_url) for f in project.files ], repository_version=project.meta.api_version, last_serial=project.meta.last_serial, versions=project.versions, tracks=project.meta.tracks, alternate_locations=project.meta.alternate_locations, )
[docs] @classmethod def from_response(cls, r: requests.Response, project: str) -> ProjectPage: """ .. versionadded:: 1.0.0 Parse a project page from a `requests.Response` returned from a (non-streaming) request to a simple repository, and return a `ProjectPage`. :param requests.Response r: the response object to parse :param str project: the name of the project whose page is being parsed :rtype: ProjectPage :raises UnsupportedRepoVersionError: if the repository version has a greater major component than the supported repository version :raises UnsupportedContentTypeError: if the response has an unsupported :mailheader:`Content-Type` """ ct = ContentType.parse(r.headers.get("content-type", "text/html")) if ct.content_type == "application/vnd.pypi.simple.v1+json": page = cls.from_json_data(r.json(), r.url) elif ( ct.content_type == "application/vnd.pypi.simple.v1+html" or ct.content_type == "text/html" ): page = cls.from_html( project=project, html=r.content, base_url=r.url, from_encoding=ct.params.get("charset"), ) else: raise UnsupportedContentTypeError(r.url, str(ct)) if page.last_serial is None: page.last_serial = r.headers.get("X-PyPI-Last-Serial") return page
[docs] @dataclass class IndexPage: """A parsed index/root page from a simple repository""" #: The project names listed in the index. The names are not normalized. projects: list[str] #: The repository version reported by the page, or `None` if not specified repository_version: Optional[str] #: The value of the :mailheader:`X-PyPI-Last-Serial` response header #: returned when fetching the page, or `None` if not specified last_serial: Optional[str]
[docs] @classmethod def from_html( cls, html: str | bytes, from_encoding: Optional[str] = None ) -> IndexPage: """ .. versionadded:: 1.0.0 Parse an HTML index/root page from a simple repository into an `IndexPage`. Note that the `last_serial` attribute will be `None`. :param html: the HTML to parse :type html: str or bytes :param Optional[str] from_encoding: an optional hint to Beautiful Soup as to the encoding of ``html`` when it is `bytes` (usually the ``charset`` parameter of the response's :mailheader:`Content-Type` header) :rtype: IndexPage :raises UnsupportedRepoVersionError: if the repository version has a greater major component than the supported repository version """ page = RepositoryPage.from_html(html, from_encoding=from_encoding) return cls( projects=[link.text for link in page.links], repository_version=page.repository_version, last_serial=None, )
[docs] @classmethod def from_json_data(cls, data: Any) -> IndexPage: """ .. versionadded:: 1.0.0 Parse an object decoded from an :mimetype:`application/vnd.pypi.simple.v1+json` response (See :pep:`691`) into an `IndexPage`. The `last_serial` attribute will be set to the value of the ``.meta._last-serial`` field, if any. :param data: The decoded body of the JSON response :rtype: IndexPage :raises UnsupportedRepoVersionError: if the repository version has a greater major component than the supported repository version :raises ValueError: if ``data`` is not a `dict` """ plist = ProjectList.model_validate(data) check_repo_version(plist.meta.api_version) return IndexPage( projects=[p.name for p in plist.projects], repository_version=plist.meta.api_version, last_serial=plist.meta.last_serial, )
[docs] @classmethod def from_response(cls, r: requests.Response) -> IndexPage: """ .. versionadded:: 1.0.0 Parse an index page from a `requests.Response` returned from a (non-streaming) request to a simple repository, and return an `IndexPage`. :param requests.Response r: the response object to parse :rtype: IndexPage :raises UnsupportedRepoVersionError: if the repository version has a greater major component than the supported repository version :raises UnsupportedContentTypeError: if the response has an unsupported :mailheader:`Content-Type` """ ct = ContentType.parse(r.headers.get("content-type", "text/html")) if ct.content_type == "application/vnd.pypi.simple.v1+json": page = cls.from_json_data(r.json()) elif ( ct.content_type == "application/vnd.pypi.simple.v1+html" or ct.content_type == "text/html" ): page = cls.from_html(html=r.content, from_encoding=ct.params.get("charset")) else: raise UnsupportedContentTypeError(r.url, str(ct)) if page.last_serial is None: page.last_serial = r.headers.get("X-PyPI-Last-Serial") return page