123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825 |
- import email.feedparser
- import email.header
- import email.message
- import email.parser
- import email.policy
- import sys
- import typing
- from typing import (
- Any,
- Callable,
- Dict,
- Generic,
- List,
- Optional,
- Tuple,
- Type,
- Union,
- cast,
- )
- from . import requirements, specifiers, utils, version as version_module
- T = typing.TypeVar("T")
- if sys.version_info[:2] >= (3, 8): # pragma: no cover
- from typing import Literal, TypedDict
- else: # pragma: no cover
- if typing.TYPE_CHECKING:
- from typing_extensions import Literal, TypedDict
- else:
- try:
- from typing_extensions import Literal, TypedDict
- except ImportError:
- class Literal:
- def __init_subclass__(*_args, **_kwargs):
- pass
- class TypedDict:
- def __init_subclass__(*_args, **_kwargs):
- pass
- try:
- ExceptionGroup
- except NameError: # pragma: no cover
- class ExceptionGroup(Exception): # noqa: N818
- """A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11.
- If :external:exc:`ExceptionGroup` is already defined by Python itself,
- that version is used instead.
- """
- message: str
- exceptions: List[Exception]
- def __init__(self, message: str, exceptions: List[Exception]) -> None:
- self.message = message
- self.exceptions = exceptions
- def __repr__(self) -> str:
- return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})"
- else: # pragma: no cover
- ExceptionGroup = ExceptionGroup
- class InvalidMetadata(ValueError):
- """A metadata field contains invalid data."""
- field: str
- """The name of the field that contains invalid data."""
- def __init__(self, field: str, message: str) -> None:
- self.field = field
- super().__init__(message)
- # The RawMetadata class attempts to make as few assumptions about the underlying
- # serialization formats as possible. The idea is that as long as a serialization
- # formats offer some very basic primitives in *some* way then we can support
- # serializing to and from that format.
- class RawMetadata(TypedDict, total=False):
- """A dictionary of raw core metadata.
- Each field in core metadata maps to a key of this dictionary (when data is
- provided). The key is lower-case and underscores are used instead of dashes
- compared to the equivalent core metadata field. Any core metadata field that
- can be specified multiple times or can hold multiple values in a single
- field have a key with a plural name. See :class:`Metadata` whose attributes
- match the keys of this dictionary.
- Core metadata fields that can be specified multiple times are stored as a
- list or dict depending on which is appropriate for the field. Any fields
- which hold multiple values in a single field are stored as a list.
- """
- # Metadata 1.0 - PEP 241
- metadata_version: str
- name: str
- version: str
- platforms: List[str]
- summary: str
- description: str
- keywords: List[str]
- home_page: str
- author: str
- author_email: str
- license: str
- # Metadata 1.1 - PEP 314
- supported_platforms: List[str]
- download_url: str
- classifiers: List[str]
- requires: List[str]
- provides: List[str]
- obsoletes: List[str]
- # Metadata 1.2 - PEP 345
- maintainer: str
- maintainer_email: str
- requires_dist: List[str]
- provides_dist: List[str]
- obsoletes_dist: List[str]
- requires_python: str
- requires_external: List[str]
- project_urls: Dict[str, str]
- # Metadata 2.0
- # PEP 426 attempted to completely revamp the metadata format
- # but got stuck without ever being able to build consensus on
- # it and ultimately ended up withdrawn.
- #
- # However, a number of tools had started emitting METADATA with
- # `2.0` Metadata-Version, so for historical reasons, this version
- # was skipped.
- # Metadata 2.1 - PEP 566
- description_content_type: str
- provides_extra: List[str]
- # Metadata 2.2 - PEP 643
- dynamic: List[str]
- # Metadata 2.3 - PEP 685
- # No new fields were added in PEP 685, just some edge case were
- # tightened up to provide better interoptability.
- _STRING_FIELDS = {
- "author",
- "author_email",
- "description",
- "description_content_type",
- "download_url",
- "home_page",
- "license",
- "maintainer",
- "maintainer_email",
- "metadata_version",
- "name",
- "requires_python",
- "summary",
- "version",
- }
- _LIST_FIELDS = {
- "classifiers",
- "dynamic",
- "obsoletes",
- "obsoletes_dist",
- "platforms",
- "provides",
- "provides_dist",
- "provides_extra",
- "requires",
- "requires_dist",
- "requires_external",
- "supported_platforms",
- }
- _DICT_FIELDS = {
- "project_urls",
- }
- def _parse_keywords(data: str) -> List[str]:
- """Split a string of comma-separate keyboards into a list of keywords."""
- return [k.strip() for k in data.split(",")]
- def _parse_project_urls(data: List[str]) -> Dict[str, str]:
- """Parse a list of label/URL string pairings separated by a comma."""
- urls = {}
- for pair in data:
- # Our logic is slightly tricky here as we want to try and do
- # *something* reasonable with malformed data.
- #
- # The main thing that we have to worry about, is data that does
- # not have a ',' at all to split the label from the Value. There
- # isn't a singular right answer here, and we will fail validation
- # later on (if the caller is validating) so it doesn't *really*
- # matter, but since the missing value has to be an empty str
- # and our return value is dict[str, str], if we let the key
- # be the missing value, then they'd have multiple '' values that
- # overwrite each other in a accumulating dict.
- #
- # The other potentional issue is that it's possible to have the
- # same label multiple times in the metadata, with no solid "right"
- # answer with what to do in that case. As such, we'll do the only
- # thing we can, which is treat the field as unparseable and add it
- # to our list of unparsed fields.
- parts = [p.strip() for p in pair.split(",", 1)]
- parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items
- # TODO: The spec doesn't say anything about if the keys should be
- # considered case sensitive or not... logically they should
- # be case-preserving and case-insensitive, but doing that
- # would open up more cases where we might have duplicate
- # entries.
- label, url = parts
- if label in urls:
- # The label already exists in our set of urls, so this field
- # is unparseable, and we can just add the whole thing to our
- # unparseable data and stop processing it.
- raise KeyError("duplicate labels in project urls")
- urls[label] = url
- return urls
- def _get_payload(msg: email.message.Message, source: Union[bytes, str]) -> str:
- """Get the body of the message."""
- # If our source is a str, then our caller has managed encodings for us,
- # and we don't need to deal with it.
- if isinstance(source, str):
- payload: str = msg.get_payload()
- return payload
- # If our source is a bytes, then we're managing the encoding and we need
- # to deal with it.
- else:
- bpayload: bytes = msg.get_payload(decode=True)
- try:
- return bpayload.decode("utf8", "strict")
- except UnicodeDecodeError:
- raise ValueError("payload in an invalid encoding")
- # The various parse_FORMAT functions here are intended to be as lenient as
- # possible in their parsing, while still returning a correctly typed
- # RawMetadata.
- #
- # To aid in this, we also generally want to do as little touching of the
- # data as possible, except where there are possibly some historic holdovers
- # that make valid data awkward to work with.
- #
- # While this is a lower level, intermediate format than our ``Metadata``
- # class, some light touch ups can make a massive difference in usability.
- # Map METADATA fields to RawMetadata.
- _EMAIL_TO_RAW_MAPPING = {
- "author": "author",
- "author-email": "author_email",
- "classifier": "classifiers",
- "description": "description",
- "description-content-type": "description_content_type",
- "download-url": "download_url",
- "dynamic": "dynamic",
- "home-page": "home_page",
- "keywords": "keywords",
- "license": "license",
- "maintainer": "maintainer",
- "maintainer-email": "maintainer_email",
- "metadata-version": "metadata_version",
- "name": "name",
- "obsoletes": "obsoletes",
- "obsoletes-dist": "obsoletes_dist",
- "platform": "platforms",
- "project-url": "project_urls",
- "provides": "provides",
- "provides-dist": "provides_dist",
- "provides-extra": "provides_extra",
- "requires": "requires",
- "requires-dist": "requires_dist",
- "requires-external": "requires_external",
- "requires-python": "requires_python",
- "summary": "summary",
- "supported-platform": "supported_platforms",
- "version": "version",
- }
- _RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()}
- def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[str]]]:
- """Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``).
- This function returns a two-item tuple of dicts. The first dict is of
- recognized fields from the core metadata specification. Fields that can be
- parsed and translated into Python's built-in types are converted
- appropriately. All other fields are left as-is. Fields that are allowed to
- appear multiple times are stored as lists.
- The second dict contains all other fields from the metadata. This includes
- any unrecognized fields. It also includes any fields which are expected to
- be parsed into a built-in type but were not formatted appropriately. Finally,
- any fields that are expected to appear only once but are repeated are
- included in this dict.
- """
- raw: Dict[str, Union[str, List[str], Dict[str, str]]] = {}
- unparsed: Dict[str, List[str]] = {}
- if isinstance(data, str):
- parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data)
- else:
- parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data)
- # We have to wrap parsed.keys() in a set, because in the case of multiple
- # values for a key (a list), the key will appear multiple times in the
- # list of keys, but we're avoiding that by using get_all().
- for name in frozenset(parsed.keys()):
- # Header names in RFC are case insensitive, so we'll normalize to all
- # lower case to make comparisons easier.
- name = name.lower()
- # We use get_all() here, even for fields that aren't multiple use,
- # because otherwise someone could have e.g. two Name fields, and we
- # would just silently ignore it rather than doing something about it.
- headers = parsed.get_all(name) or []
- # The way the email module works when parsing bytes is that it
- # unconditionally decodes the bytes as ascii using the surrogateescape
- # handler. When you pull that data back out (such as with get_all() ),
- # it looks to see if the str has any surrogate escapes, and if it does
- # it wraps it in a Header object instead of returning the string.
- #
- # As such, we'll look for those Header objects, and fix up the encoding.
- value = []
- # Flag if we have run into any issues processing the headers, thus
- # signalling that the data belongs in 'unparsed'.
- valid_encoding = True
- for h in headers:
- # It's unclear if this can return more types than just a Header or
- # a str, so we'll just assert here to make sure.
- assert isinstance(h, (email.header.Header, str))
- # If it's a header object, we need to do our little dance to get
- # the real data out of it. In cases where there is invalid data
- # we're going to end up with mojibake, but there's no obvious, good
- # way around that without reimplementing parts of the Header object
- # ourselves.
- #
- # That should be fine since, if mojibacked happens, this key is
- # going into the unparsed dict anyways.
- if isinstance(h, email.header.Header):
- # The Header object stores it's data as chunks, and each chunk
- # can be independently encoded, so we'll need to check each
- # of them.
- chunks: List[Tuple[bytes, Optional[str]]] = []
- for bin, encoding in email.header.decode_header(h):
- try:
- bin.decode("utf8", "strict")
- except UnicodeDecodeError:
- # Enable mojibake.
- encoding = "latin1"
- valid_encoding = False
- else:
- encoding = "utf8"
- chunks.append((bin, encoding))
- # Turn our chunks back into a Header object, then let that
- # Header object do the right thing to turn them into a
- # string for us.
- value.append(str(email.header.make_header(chunks)))
- # This is already a string, so just add it.
- else:
- value.append(h)
- # We've processed all of our values to get them into a list of str,
- # but we may have mojibake data, in which case this is an unparsed
- # field.
- if not valid_encoding:
- unparsed[name] = value
- continue
- raw_name = _EMAIL_TO_RAW_MAPPING.get(name)
- if raw_name is None:
- # This is a bit of a weird situation, we've encountered a key that
- # we don't know what it means, so we don't know whether it's meant
- # to be a list or not.
- #
- # Since we can't really tell one way or another, we'll just leave it
- # as a list, even though it may be a single item list, because that's
- # what makes the most sense for email headers.
- unparsed[name] = value
- continue
- # If this is one of our string fields, then we'll check to see if our
- # value is a list of a single item. If it is then we'll assume that
- # it was emitted as a single string, and unwrap the str from inside
- # the list.
- #
- # If it's any other kind of data, then we haven't the faintest clue
- # what we should parse it as, and we have to just add it to our list
- # of unparsed stuff.
- if raw_name in _STRING_FIELDS and len(value) == 1:
- raw[raw_name] = value[0]
- # If this is one of our list of string fields, then we can just assign
- # the value, since email *only* has strings, and our get_all() call
- # above ensures that this is a list.
- elif raw_name in _LIST_FIELDS:
- raw[raw_name] = value
- # Special Case: Keywords
- # The keywords field is implemented in the metadata spec as a str,
- # but it conceptually is a list of strings, and is serialized using
- # ", ".join(keywords), so we'll do some light data massaging to turn
- # this into what it logically is.
- elif raw_name == "keywords" and len(value) == 1:
- raw[raw_name] = _parse_keywords(value[0])
- # Special Case: Project-URL
- # The project urls is implemented in the metadata spec as a list of
- # specially-formatted strings that represent a key and a value, which
- # is fundamentally a mapping, however the email format doesn't support
- # mappings in a sane way, so it was crammed into a list of strings
- # instead.
- #
- # We will do a little light data massaging to turn this into a map as
- # it logically should be.
- elif raw_name == "project_urls":
- try:
- raw[raw_name] = _parse_project_urls(value)
- except KeyError:
- unparsed[name] = value
- # Nothing that we've done has managed to parse this, so it'll just
- # throw it in our unparseable data and move on.
- else:
- unparsed[name] = value
- # We need to support getting the Description from the message payload in
- # addition to getting it from the the headers. This does mean, though, there
- # is the possibility of it being set both ways, in which case we put both
- # in 'unparsed' since we don't know which is right.
- try:
- payload = _get_payload(parsed, data)
- except ValueError:
- unparsed.setdefault("description", []).append(
- parsed.get_payload(decode=isinstance(data, bytes))
- )
- else:
- if payload:
- # Check to see if we've already got a description, if so then both
- # it, and this body move to unparseable.
- if "description" in raw:
- description_header = cast(str, raw.pop("description"))
- unparsed.setdefault("description", []).extend(
- [description_header, payload]
- )
- elif "description" in unparsed:
- unparsed["description"].append(payload)
- else:
- raw["description"] = payload
- # We need to cast our `raw` to a metadata, because a TypedDict only support
- # literal key names, but we're computing our key names on purpose, but the
- # way this function is implemented, our `TypedDict` can only have valid key
- # names.
- return cast(RawMetadata, raw), unparsed
- _NOT_FOUND = object()
- # Keep the two values in sync.
- _VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"]
- _MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"]
- _REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"])
- class _Validator(Generic[T]):
- """Validate a metadata field.
- All _process_*() methods correspond to a core metadata field. The method is
- called with the field's raw value. If the raw value is valid it is returned
- in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field).
- If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause
- as appropriate).
- """
- name: str
- raw_name: str
- added: _MetadataVersion
- def __init__(
- self,
- *,
- added: _MetadataVersion = "1.0",
- ) -> None:
- self.added = added
- def __set_name__(self, _owner: "Metadata", name: str) -> None:
- self.name = name
- self.raw_name = _RAW_TO_EMAIL_MAPPING[name]
- def __get__(self, instance: "Metadata", _owner: Type["Metadata"]) -> T:
- # With Python 3.8, the caching can be replaced with functools.cached_property().
- # No need to check the cache as attribute lookup will resolve into the
- # instance's __dict__ before __get__ is called.
- cache = instance.__dict__
- value = instance._raw.get(self.name)
- # To make the _process_* methods easier, we'll check if the value is None
- # and if this field is NOT a required attribute, and if both of those
- # things are true, we'll skip the the converter. This will mean that the
- # converters never have to deal with the None union.
- if self.name in _REQUIRED_ATTRS or value is not None:
- try:
- converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}")
- except AttributeError:
- pass
- else:
- value = converter(value)
- cache[self.name] = value
- try:
- del instance._raw[self.name] # type: ignore[misc]
- except KeyError:
- pass
- return cast(T, value)
- def _invalid_metadata(
- self, msg: str, cause: Optional[Exception] = None
- ) -> InvalidMetadata:
- exc = InvalidMetadata(
- self.raw_name, msg.format_map({"field": repr(self.raw_name)})
- )
- exc.__cause__ = cause
- return exc
- def _process_metadata_version(self, value: str) -> _MetadataVersion:
- # Implicitly makes Metadata-Version required.
- if value not in _VALID_METADATA_VERSIONS:
- raise self._invalid_metadata(f"{value!r} is not a valid metadata version")
- return cast(_MetadataVersion, value)
- def _process_name(self, value: str) -> str:
- if not value:
- raise self._invalid_metadata("{field} is a required field")
- # Validate the name as a side-effect.
- try:
- utils.canonicalize_name(value, validate=True)
- except utils.InvalidName as exc:
- raise self._invalid_metadata(
- f"{value!r} is invalid for {{field}}", cause=exc
- )
- else:
- return value
- def _process_version(self, value: str) -> version_module.Version:
- if not value:
- raise self._invalid_metadata("{field} is a required field")
- try:
- return version_module.parse(value)
- except version_module.InvalidVersion as exc:
- raise self._invalid_metadata(
- f"{value!r} is invalid for {{field}}", cause=exc
- )
- def _process_summary(self, value: str) -> str:
- """Check the field contains no newlines."""
- if "\n" in value:
- raise self._invalid_metadata("{field} must be a single line")
- return value
- def _process_description_content_type(self, value: str) -> str:
- content_types = {"text/plain", "text/x-rst", "text/markdown"}
- message = email.message.EmailMessage()
- message["content-type"] = value
- content_type, parameters = (
- # Defaults to `text/plain` if parsing failed.
- message.get_content_type().lower(),
- message["content-type"].params,
- )
- # Check if content-type is valid or defaulted to `text/plain` and thus was
- # not parseable.
- if content_type not in content_types or content_type not in value.lower():
- raise self._invalid_metadata(
- f"{{field}} must be one of {list(content_types)}, not {value!r}"
- )
- charset = parameters.get("charset", "UTF-8")
- if charset != "UTF-8":
- raise self._invalid_metadata(
- f"{{field}} can only specify the UTF-8 charset, not {list(charset)}"
- )
- markdown_variants = {"GFM", "CommonMark"}
- variant = parameters.get("variant", "GFM") # Use an acceptable default.
- if content_type == "text/markdown" and variant not in markdown_variants:
- raise self._invalid_metadata(
- f"valid Markdown variants for {{field}} are {list(markdown_variants)}, "
- f"not {variant!r}",
- )
- return value
- def _process_dynamic(self, value: List[str]) -> List[str]:
- for dynamic_field in map(str.lower, value):
- if dynamic_field in {"name", "version", "metadata-version"}:
- raise self._invalid_metadata(
- f"{value!r} is not allowed as a dynamic field"
- )
- elif dynamic_field not in _EMAIL_TO_RAW_MAPPING:
- raise self._invalid_metadata(f"{value!r} is not a valid dynamic field")
- return list(map(str.lower, value))
- def _process_provides_extra(
- self,
- value: List[str],
- ) -> List[utils.NormalizedName]:
- normalized_names = []
- try:
- for name in value:
- normalized_names.append(utils.canonicalize_name(name, validate=True))
- except utils.InvalidName as exc:
- raise self._invalid_metadata(
- f"{name!r} is invalid for {{field}}", cause=exc
- )
- else:
- return normalized_names
- def _process_requires_python(self, value: str) -> specifiers.SpecifierSet:
- try:
- return specifiers.SpecifierSet(value)
- except specifiers.InvalidSpecifier as exc:
- raise self._invalid_metadata(
- f"{value!r} is invalid for {{field}}", cause=exc
- )
- def _process_requires_dist(
- self,
- value: List[str],
- ) -> List[requirements.Requirement]:
- reqs = []
- try:
- for req in value:
- reqs.append(requirements.Requirement(req))
- except requirements.InvalidRequirement as exc:
- raise self._invalid_metadata(f"{req!r} is invalid for {{field}}", cause=exc)
- else:
- return reqs
- class Metadata:
- """Representation of distribution metadata.
- Compared to :class:`RawMetadata`, this class provides objects representing
- metadata fields instead of only using built-in types. Any invalid metadata
- will cause :exc:`InvalidMetadata` to be raised (with a
- :py:attr:`~BaseException.__cause__` attribute as appropriate).
- """
- _raw: RawMetadata
- @classmethod
- def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> "Metadata":
- """Create an instance from :class:`RawMetadata`.
- If *validate* is true, all metadata will be validated. All exceptions
- related to validation will be gathered and raised as an :class:`ExceptionGroup`.
- """
- ins = cls()
- ins._raw = data.copy() # Mutations occur due to caching enriched values.
- if validate:
- exceptions: List[Exception] = []
- try:
- metadata_version = ins.metadata_version
- metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version)
- except InvalidMetadata as metadata_version_exc:
- exceptions.append(metadata_version_exc)
- metadata_version = None
- # Make sure to check for the fields that are present, the required
- # fields (so their absence can be reported).
- fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS
- # Remove fields that have already been checked.
- fields_to_check -= {"metadata_version"}
- for key in fields_to_check:
- try:
- if metadata_version:
- # Can't use getattr() as that triggers descriptor protocol which
- # will fail due to no value for the instance argument.
- try:
- field_metadata_version = cls.__dict__[key].added
- except KeyError:
- exc = InvalidMetadata(key, f"unrecognized field: {key!r}")
- exceptions.append(exc)
- continue
- field_age = _VALID_METADATA_VERSIONS.index(
- field_metadata_version
- )
- if field_age > metadata_age:
- field = _RAW_TO_EMAIL_MAPPING[key]
- exc = InvalidMetadata(
- field,
- "{field} introduced in metadata version "
- "{field_metadata_version}, not {metadata_version}",
- )
- exceptions.append(exc)
- continue
- getattr(ins, key)
- except InvalidMetadata as exc:
- exceptions.append(exc)
- if exceptions:
- raise ExceptionGroup("invalid metadata", exceptions)
- return ins
- @classmethod
- def from_email(
- cls, data: Union[bytes, str], *, validate: bool = True
- ) -> "Metadata":
- """Parse metadata from email headers.
- If *validate* is true, the metadata will be validated. All exceptions
- related to validation will be gathered and raised as an :class:`ExceptionGroup`.
- """
- raw, unparsed = parse_email(data)
- if validate:
- exceptions: list[Exception] = []
- for unparsed_key in unparsed:
- if unparsed_key in _EMAIL_TO_RAW_MAPPING:
- message = f"{unparsed_key!r} has invalid data"
- else:
- message = f"unrecognized field: {unparsed_key!r}"
- exceptions.append(InvalidMetadata(unparsed_key, message))
- if exceptions:
- raise ExceptionGroup("unparsed", exceptions)
- try:
- return cls.from_raw(raw, validate=validate)
- except ExceptionGroup as exc_group:
- raise ExceptionGroup(
- "invalid or unparsed metadata", exc_group.exceptions
- ) from None
- metadata_version: _Validator[_MetadataVersion] = _Validator()
- """:external:ref:`core-metadata-metadata-version`
- (required; validated to be a valid metadata version)"""
- name: _Validator[str] = _Validator()
- """:external:ref:`core-metadata-name`
- (required; validated using :func:`~packaging.utils.canonicalize_name` and its
- *validate* parameter)"""
- version: _Validator[version_module.Version] = _Validator()
- """:external:ref:`core-metadata-version` (required)"""
- dynamic: _Validator[Optional[List[str]]] = _Validator(
- added="2.2",
- )
- """:external:ref:`core-metadata-dynamic`
- (validated against core metadata field names and lowercased)"""
- platforms: _Validator[Optional[List[str]]] = _Validator()
- """:external:ref:`core-metadata-platform`"""
- supported_platforms: _Validator[Optional[List[str]]] = _Validator(added="1.1")
- """:external:ref:`core-metadata-supported-platform`"""
- summary: _Validator[Optional[str]] = _Validator()
- """:external:ref:`core-metadata-summary` (validated to contain no newlines)"""
- description: _Validator[Optional[str]] = _Validator() # TODO 2.1: can be in body
- """:external:ref:`core-metadata-description`"""
- description_content_type: _Validator[Optional[str]] = _Validator(added="2.1")
- """:external:ref:`core-metadata-description-content-type` (validated)"""
- keywords: _Validator[Optional[List[str]]] = _Validator()
- """:external:ref:`core-metadata-keywords`"""
- home_page: _Validator[Optional[str]] = _Validator()
- """:external:ref:`core-metadata-home-page`"""
- download_url: _Validator[Optional[str]] = _Validator(added="1.1")
- """:external:ref:`core-metadata-download-url`"""
- author: _Validator[Optional[str]] = _Validator()
- """:external:ref:`core-metadata-author`"""
- author_email: _Validator[Optional[str]] = _Validator()
- """:external:ref:`core-metadata-author-email`"""
- maintainer: _Validator[Optional[str]] = _Validator(added="1.2")
- """:external:ref:`core-metadata-maintainer`"""
- maintainer_email: _Validator[Optional[str]] = _Validator(added="1.2")
- """:external:ref:`core-metadata-maintainer-email`"""
- license: _Validator[Optional[str]] = _Validator()
- """:external:ref:`core-metadata-license`"""
- classifiers: _Validator[Optional[List[str]]] = _Validator(added="1.1")
- """:external:ref:`core-metadata-classifier`"""
- requires_dist: _Validator[Optional[List[requirements.Requirement]]] = _Validator(
- added="1.2"
- )
- """:external:ref:`core-metadata-requires-dist`"""
- requires_python: _Validator[Optional[specifiers.SpecifierSet]] = _Validator(
- added="1.2"
- )
- """:external:ref:`core-metadata-requires-python`"""
- # Because `Requires-External` allows for non-PEP 440 version specifiers, we
- # don't do any processing on the values.
- requires_external: _Validator[Optional[List[str]]] = _Validator(added="1.2")
- """:external:ref:`core-metadata-requires-external`"""
- project_urls: _Validator[Optional[Dict[str, str]]] = _Validator(added="1.2")
- """:external:ref:`core-metadata-project-url`"""
- # PEP 685 lets us raise an error if an extra doesn't pass `Name` validation
- # regardless of metadata version.
- provides_extra: _Validator[Optional[List[utils.NormalizedName]]] = _Validator(
- added="2.1",
- )
- """:external:ref:`core-metadata-provides-extra`"""
- provides_dist: _Validator[Optional[List[str]]] = _Validator(added="1.2")
- """:external:ref:`core-metadata-provides-dist`"""
- obsoletes_dist: _Validator[Optional[List[str]]] = _Validator(added="1.2")
- """:external:ref:`core-metadata-obsoletes-dist`"""
- requires: _Validator[Optional[List[str]]] = _Validator(added="1.1")
- """``Requires`` (deprecated)"""
- provides: _Validator[Optional[List[str]]] = _Validator(added="1.1")
- """``Provides`` (deprecated)"""
- obsoletes: _Validator[Optional[List[str]]] = _Validator(added="1.1")
- """``Obsoletes`` (deprecated)"""
|