Source code for pypi_simple.parse_repo

from   typing       import Dict, List, Optional, Tuple, Union
from   urllib.parse import urljoin
from   bs4          import BeautifulSoup
import requests
from   .classes     import DistributionPackage, IndexPage, Link, ProjectPage
from   .util        import check_repo_version



[docs]def parse_repo_project_page( project: str, html: Union[str, bytes], base_url: Optional[str] = None, from_encoding: Optional[str] = None, ) -> ProjectPage: """ .. versionadded:: 0.7.0 Parse a project page from a simple repository into a `ProjectPage`. Note that the `~ProjectPage.last_serial` attribute will be `None`. :param str project: The name of the project whose page is being parsed :param html: the HTML to parse :type html: str or bytes :param Optional[str] base_url: an optional URL to join to the front of the packages' URLs (usually the URL of the page being parsed) :param Optional[str] from_encoding: an optional hint to Beautiful Soup as to the encoding of ``html`` when it is `bytes` (usually the ``charset`` parameter of the response's :mailheader:`Content-Type` header) :rtype: ProjectPage :raises UnsupportedRepoVersionError: if the repository version has a greater major component than the supported repository version """ metadata, links = parse_repo_links(html, base_url, from_encoding) return ProjectPage( project = project, packages = [ DistributionPackage.from_link(link, project) for link in links ], repository_version = metadata.get("repository_version"), last_serial = None, )
[docs]def parse_repo_project_response(project: str, r: requests.Response) \ -> ProjectPage: """ .. versionadded:: 0.7.0 Parse a project page from a `requests.Response` returned from a (non-streaming) request to a simple repository, and return a `ProjectPage`. :param str project: The name of the project whose page is being parsed :param requests.Response r: the response object to parse :rtype: ProjectPage :raises UnsupportedRepoVersionError: if the repository version has a greater major component than the supported repository version """ charset: Optional[str] if 'charset' in r.headers.get('content-type', '').lower(): charset = r.encoding else: charset = None page = parse_repo_project_page( project = project, html = r.content, base_url = r.url, from_encoding = charset, ) return page._replace(last_serial=r.headers.get("X-PyPI-Last-Serial"))
[docs]def parse_repo_index_page( html: Union[str, bytes], from_encoding: Optional[str] = None, ) -> IndexPage: """ .. versionadded:: 0.7.0 Parse an index/root page from a simple repository into an `IndexPage`. Note that the `~IndexPage.last_serial` attribute will be `None`. :param html: the HTML to parse :type html: str or bytes :param Optional[str] from_encoding: an optional hint to Beautiful Soup as to the encoding of ``html`` when it is `bytes` (usually the ``charset`` parameter of the response's :mailheader:`Content-Type` header) :rtype: IndexPage :raises UnsupportedRepoVersionError: if the repository version has a greater major component than the supported repository version """ metadata, links = parse_repo_links(html, from_encoding=from_encoding) return IndexPage( projects = [link.text for link in links], repository_version = metadata.get("repository_version"), last_serial = None, )
[docs]def parse_repo_index_response(r: requests.Response) -> IndexPage: """ .. versionadded:: 0.7.0 Parse an index page from a `requests.Response` returned from a (non-streaming) request to a simple repository, and return an `IndexPage`. :param requests.Response r: the response object to parse :rtype: IndexPage :raises UnsupportedRepoVersionError: if the repository version has a greater major component than the supported repository version """ charset: Optional[str] if 'charset' in r.headers.get('content-type', '').lower(): charset = r.encoding else: charset = None page = parse_repo_index_page(html=r.content, from_encoding=charset) return page._replace(last_serial=r.headers.get("X-PyPI-Last-Serial"))