Source code for pypi_simple.parse_repo

from typing import Any, Dict, List, Optional, Tuple, Union
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from mailbits import ContentType
import requests
from .classes import DistributionPackage, IndexPage, Link, ProjectPage
from .util import UnsupportedContentTypeError, basejoin, check_repo_version





[docs]def parse_repo_project_page( project: str, html: Union[str, bytes], base_url: Optional[str] = None, from_encoding: Optional[str] = None, ) -> ProjectPage: """ .. versionadded:: 0.7.0 Parse a project page from a simple repository into a `ProjectPage`. Note that the `~ProjectPage.last_serial` attribute will be `None`. :param str project: The name of the project whose page is being parsed :param html: the HTML to parse :type html: str or bytes :param Optional[str] base_url: an optional URL to join to the front of the packages' URLs (usually the URL of the page being parsed) :param Optional[str] from_encoding: an optional hint to Beautiful Soup as to the encoding of ``html`` when it is `bytes` (usually the ``charset`` parameter of the response's :mailheader:`Content-Type` header) :rtype: ProjectPage :raises UnsupportedRepoVersionError: if the repository version has a greater major component than the supported repository version """ metadata, links = parse_repo_links(html, base_url, from_encoding) return ProjectPage( project=project, packages=[DistributionPackage.from_link(link, project) for link in links], repository_version=metadata.get("repository_version"), last_serial=None, )
[docs]def parse_repo_project_json(data: Any, base_url: Optional[str] = None) -> ProjectPage: """ .. versionadded:: 0.10.0 Parse a project page from an object decoded from an :mimetype:`application/vnd.pypi.simple.v1+json` response (See :pep:`691`). The `~ProjectPage.last_serial` attribute will be set to the value of the ``.meta._last-serial`` field, if any. :param data: The decoded body of the JSON response :param Optional[str] base_url: an optional URL to join to the front of any relative file URLs (usually the URL of the page being parsed) :rtype: ProjectPage :raises TypeError: if ``data`` is not a `dict` :raises UnsupportedRepoVersionError: if the repository version has a greater major component than the supported repository version """ if not isinstance(data, dict): raise TypeError("JSON project details response is not a dict") repository_version = data["meta"]["api-version"] check_repo_version(repository_version) try: last_serial = str(data["meta"]["_last-serial"]) except KeyError: last_serial = None return ProjectPage( project=data["name"], packages=[ DistributionPackage.from_pep691_details(filedata, data["name"], base_url) for filedata in data["files"] ], repository_version=repository_version, last_serial=last_serial, )
[docs]def parse_repo_project_response(project: str, r: requests.Response) -> ProjectPage: """ .. versionadded:: 0.7.0 Parse a project page from a `requests.Response` returned from a (non-streaming) request to a simple repository, and return a `ProjectPage`. :param str project: The name of the project whose page is being parsed :param requests.Response r: the response object to parse :rtype: ProjectPage :raises UnsupportedRepoVersionError: if the repository version has a greater major component than the supported repository version :raises UnsupportedContentTypeError: if the response has an unsupported :mailheader:`Content-Type` """ ct = ContentType.parse(r.headers.get("content-type", "text/html")) if ct.content_type == "application/vnd.pypi.simple.v1+json": page = parse_repo_project_json(r.json(), r.url) elif ( ct.content_type == "application/vnd.pypi.simple.v1+html" or ct.content_type == "text/html" ): charset: Optional[str] if "charset" in ct.params: charset = r.encoding else: charset = None page = parse_repo_project_page( project=project, html=r.content, base_url=r.url, from_encoding=charset, ) else: raise UnsupportedContentTypeError(r.url, str(ct)) if page.last_serial is None: page = page._replace(last_serial=r.headers.get("X-PyPI-Last-Serial")) return page
[docs]def parse_repo_index_page( html: Union[str, bytes], from_encoding: Optional[str] = None, ) -> IndexPage: """ .. versionadded:: 0.7.0 Parse an index/root page from a simple repository into an `IndexPage`. Note that the `~IndexPage.last_serial` attribute will be `None`. :param html: the HTML to parse :type html: str or bytes :param Optional[str] from_encoding: an optional hint to Beautiful Soup as to the encoding of ``html`` when it is `bytes` (usually the ``charset`` parameter of the response's :mailheader:`Content-Type` header) :rtype: IndexPage :raises UnsupportedRepoVersionError: if the repository version has a greater major component than the supported repository version """ metadata, links = parse_repo_links(html, from_encoding=from_encoding) return IndexPage( projects=[link.text for link in links], repository_version=metadata.get("repository_version"), last_serial=None, )
[docs]def parse_repo_index_json(data: Any) -> IndexPage: """ .. versionadded:: 0.10.0 Parse an index/root page from an object decoded from an :mimetype:`application/vnd.pypi.simple.v1+json` response (See :pep:`691`). The `~IndexPage.last_serial` attribute will be set to the value of the ``.meta._last-serial`` field, if any. :param data: The decoded body of the JSON response :rtype: IndexPage :raises UnsupportedRepoVersionError: if the repository version has a greater major component than the supported repository version :raises TypeError: if ``data`` is not a `dict` """ if not isinstance(data, dict): raise TypeError("JSON project list response is not a dict") repository_version = data["meta"]["api-version"] check_repo_version(repository_version) try: last_serial = str(data["meta"]["_last-serial"]) except KeyError: last_serial = None return IndexPage( projects=[p["name"] for p in data["projects"]], repository_version=repository_version, last_serial=last_serial, )
[docs]def parse_repo_index_response(r: requests.Response) -> IndexPage: """ .. versionadded:: 0.7.0 Parse an index page from a `requests.Response` returned from a (non-streaming) request to a simple repository, and return an `IndexPage`. :param requests.Response r: the response object to parse :rtype: IndexPage :raises UnsupportedRepoVersionError: if the repository version has a greater major component than the supported repository version :raises UnsupportedContentTypeError: if the response has an unsupported :mailheader:`Content-Type` """ ct = ContentType.parse(r.headers.get("content-type", "text/html")) if ct.content_type == "application/vnd.pypi.simple.v1+json": page = parse_repo_index_json(r.json()) elif ( ct.content_type == "application/vnd.pypi.simple.v1+html" or ct.content_type == "text/html" ): charset: Optional[str] if "charset" in ct.params: charset = r.encoding else: charset = None page = parse_repo_index_page(html=r.content, from_encoding=charset) else: raise UnsupportedContentTypeError(r.url, str(ct)) if page.last_serial is None: page = page._replace(last_serial=r.headers.get("X-PyPI-Last-Serial")) return page