Source code for pypi_simple.parse_old

from typing import Dict, Iterator, List, Optional, Tuple, Union
from urllib.parse import urljoin
from warnings import warn
from bs4 import BeautifulSoup
from .classes import DistributionPackage
from .parse_repo import parse_repo_links


[docs]def parse_simple_index( html: Union[str, bytes], base_url: Optional[str] = None, from_encoding: Optional[str] = None, ) -> Iterator[Tuple[str, str]]: """ Parse a simple repository's index page and return a generator of ``(project name, project URL)`` pairs .. deprecated:: 0.7.0 Use `parse_repo_index_page()` or `parse_links_stream()` instead :param html: the HTML to parse :type html: str or bytes :param Optional[str] base_url: an optional URL to join to the front of the URLs returned (usually the URL of the page being parsed) :param Optional[str] from_encoding: an optional hint to Beautiful Soup as to the encoding of ``html`` when it is `bytes` (usually the ``charset`` parameter of the response's :mailheader:`Content-Type` header) :rtype: Iterator[Tuple[str, str]] :raises UnsupportedRepoVersionError: if the repository version has a greater major component than the supported repository version """ warn( "parse_simple_index() is deprecated." " Use parse_repo_index_page() or parse_links_stream() instead.", DeprecationWarning, ) for link in parse_repo_links(html, base_url, from_encoding)[1]: yield (link.text, link.url)
[docs]def parse_project_page( html: Union[str, bytes], base_url: Optional[str] = None, from_encoding: Optional[str] = None, project_hint: Optional[str] = None, ) -> List[DistributionPackage]: """ Parse a project page from a simple repository and return a list of `DistributionPackage` objects .. deprecated:: 0.7.0 Use `parse_repo_project_page()` instead :param html: the HTML to parse :type html: str or bytes :param Optional[str] base_url: an optional URL to join to the front of the packages' URLs (usually the URL of the page being parsed) :param Optional[str] from_encoding: an optional hint to Beautiful Soup as to the encoding of ``html`` when it is `bytes` (usually the ``charset`` parameter of the response's :mailheader:`Content-Type` header) :param Optional[str] project_hint: The name of the project whose page is being parsed; used to disambiguate the parsing of certain filenames :rtype: List[DistributionPackage] :raises UnsupportedRepoVersionError: if the repository version has a greater major component than the supported repository version """ warn( "parse_project_page() is deprecated." " Use parse_repo_project_page() instead.", DeprecationWarning, ) return [ DistributionPackage.from_link(link, project_hint) for link in parse_repo_links(html, base_url, from_encoding)[1] ]