123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356 |
- """Handwritten parser of dependency specifiers.
- The docstring for each __parse_* function contains ENBF-inspired grammar representing
- the implementation.
- """
- import ast
- from typing import Any, List, NamedTuple, Optional, Tuple, Union
- from ._tokenizer import DEFAULT_RULES, Tokenizer
- class Node:
- def __init__(self, value: str) -> None:
- self.value = value
- def __str__(self) -> str:
- return self.value
- def __repr__(self) -> str:
- return f"<{self.__class__.__name__}('{self}')>"
- def serialize(self) -> str:
- raise NotImplementedError
- class Variable(Node):
- def serialize(self) -> str:
- return str(self)
- class Value(Node):
- def serialize(self) -> str:
- return f'"{self}"'
- class Op(Node):
- def serialize(self) -> str:
- return str(self)
- MarkerVar = Union[Variable, Value]
- MarkerItem = Tuple[MarkerVar, Op, MarkerVar]
- # MarkerAtom = Union[MarkerItem, List["MarkerAtom"]]
- # MarkerList = List[Union["MarkerList", MarkerAtom, str]]
- # mypy does not support recursive type definition
- # https://github.com/python/mypy/issues/731
- MarkerAtom = Any
- MarkerList = List[Any]
- class ParsedRequirement(NamedTuple):
- name: str
- url: str
- extras: List[str]
- specifier: str
- marker: Optional[MarkerList]
- # --------------------------------------------------------------------------------------
- # Recursive descent parser for dependency specifier
- # --------------------------------------------------------------------------------------
- def parse_requirement(source: str) -> ParsedRequirement:
- return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES))
- def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement:
- """
- requirement = WS? IDENTIFIER WS? extras WS? requirement_details
- """
- tokenizer.consume("WS")
- name_token = tokenizer.expect(
- "IDENTIFIER", expected="package name at the start of dependency specifier"
- )
- name = name_token.text
- tokenizer.consume("WS")
- extras = _parse_extras(tokenizer)
- tokenizer.consume("WS")
- url, specifier, marker = _parse_requirement_details(tokenizer)
- tokenizer.expect("END", expected="end of dependency specifier")
- return ParsedRequirement(name, url, extras, specifier, marker)
- def _parse_requirement_details(
- tokenizer: Tokenizer,
- ) -> Tuple[str, str, Optional[MarkerList]]:
- """
- requirement_details = AT URL (WS requirement_marker?)?
- | specifier WS? (requirement_marker)?
- """
- specifier = ""
- url = ""
- marker = None
- if tokenizer.check("AT"):
- tokenizer.read()
- tokenizer.consume("WS")
- url_start = tokenizer.position
- url = tokenizer.expect("URL", expected="URL after @").text
- if tokenizer.check("END", peek=True):
- return (url, specifier, marker)
- tokenizer.expect("WS", expected="whitespace after URL")
- # The input might end after whitespace.
- if tokenizer.check("END", peek=True):
- return (url, specifier, marker)
- marker = _parse_requirement_marker(
- tokenizer, span_start=url_start, after="URL and whitespace"
- )
- else:
- specifier_start = tokenizer.position
- specifier = _parse_specifier(tokenizer)
- tokenizer.consume("WS")
- if tokenizer.check("END", peek=True):
- return (url, specifier, marker)
- marker = _parse_requirement_marker(
- tokenizer,
- span_start=specifier_start,
- after=(
- "version specifier"
- if specifier
- else "name and no valid version specifier"
- ),
- )
- return (url, specifier, marker)
- def _parse_requirement_marker(
- tokenizer: Tokenizer, *, span_start: int, after: str
- ) -> MarkerList:
- """
- requirement_marker = SEMICOLON marker WS?
- """
- if not tokenizer.check("SEMICOLON"):
- tokenizer.raise_syntax_error(
- f"Expected end or semicolon (after {after})",
- span_start=span_start,
- )
- tokenizer.read()
- marker = _parse_marker(tokenizer)
- tokenizer.consume("WS")
- return marker
- def _parse_extras(tokenizer: Tokenizer) -> List[str]:
- """
- extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)?
- """
- if not tokenizer.check("LEFT_BRACKET", peek=True):
- return []
- with tokenizer.enclosing_tokens(
- "LEFT_BRACKET",
- "RIGHT_BRACKET",
- around="extras",
- ):
- tokenizer.consume("WS")
- extras = _parse_extras_list(tokenizer)
- tokenizer.consume("WS")
- return extras
- def _parse_extras_list(tokenizer: Tokenizer) -> List[str]:
- """
- extras_list = identifier (wsp* ',' wsp* identifier)*
- """
- extras: List[str] = []
- if not tokenizer.check("IDENTIFIER"):
- return extras
- extras.append(tokenizer.read().text)
- while True:
- tokenizer.consume("WS")
- if tokenizer.check("IDENTIFIER", peek=True):
- tokenizer.raise_syntax_error("Expected comma between extra names")
- elif not tokenizer.check("COMMA"):
- break
- tokenizer.read()
- tokenizer.consume("WS")
- extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma")
- extras.append(extra_token.text)
- return extras
- def _parse_specifier(tokenizer: Tokenizer) -> str:
- """
- specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
- | WS? version_many WS?
- """
- with tokenizer.enclosing_tokens(
- "LEFT_PARENTHESIS",
- "RIGHT_PARENTHESIS",
- around="version specifier",
- ):
- tokenizer.consume("WS")
- parsed_specifiers = _parse_version_many(tokenizer)
- tokenizer.consume("WS")
- return parsed_specifiers
- def _parse_version_many(tokenizer: Tokenizer) -> str:
- """
- version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)?
- """
- parsed_specifiers = ""
- while tokenizer.check("SPECIFIER"):
- span_start = tokenizer.position
- parsed_specifiers += tokenizer.read().text
- if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True):
- tokenizer.raise_syntax_error(
- ".* suffix can only be used with `==` or `!=` operators",
- span_start=span_start,
- span_end=tokenizer.position + 1,
- )
- if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True):
- tokenizer.raise_syntax_error(
- "Local version label can only be used with `==` or `!=` operators",
- span_start=span_start,
- span_end=tokenizer.position,
- )
- tokenizer.consume("WS")
- if not tokenizer.check("COMMA"):
- break
- parsed_specifiers += tokenizer.read().text
- tokenizer.consume("WS")
- return parsed_specifiers
- # --------------------------------------------------------------------------------------
- # Recursive descent parser for marker expression
- # --------------------------------------------------------------------------------------
- def parse_marker(source: str) -> MarkerList:
- return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
- def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList:
- retval = _parse_marker(tokenizer)
- tokenizer.expect("END", expected="end of marker expression")
- return retval
- def _parse_marker(tokenizer: Tokenizer) -> MarkerList:
- """
- marker = marker_atom (BOOLOP marker_atom)+
- """
- expression = [_parse_marker_atom(tokenizer)]
- while tokenizer.check("BOOLOP"):
- token = tokenizer.read()
- expr_right = _parse_marker_atom(tokenizer)
- expression.extend((token.text, expr_right))
- return expression
- def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
- """
- marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS?
- | WS? marker_item WS?
- """
- tokenizer.consume("WS")
- if tokenizer.check("LEFT_PARENTHESIS", peek=True):
- with tokenizer.enclosing_tokens(
- "LEFT_PARENTHESIS",
- "RIGHT_PARENTHESIS",
- around="marker expression",
- ):
- tokenizer.consume("WS")
- marker: MarkerAtom = _parse_marker(tokenizer)
- tokenizer.consume("WS")
- else:
- marker = _parse_marker_item(tokenizer)
- tokenizer.consume("WS")
- return marker
- def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem:
- """
- marker_item = WS? marker_var WS? marker_op WS? marker_var WS?
- """
- tokenizer.consume("WS")
- marker_var_left = _parse_marker_var(tokenizer)
- tokenizer.consume("WS")
- marker_op = _parse_marker_op(tokenizer)
- tokenizer.consume("WS")
- marker_var_right = _parse_marker_var(tokenizer)
- tokenizer.consume("WS")
- return (marker_var_left, marker_op, marker_var_right)
- def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar:
- """
- marker_var = VARIABLE | QUOTED_STRING
- """
- if tokenizer.check("VARIABLE"):
- return process_env_var(tokenizer.read().text.replace(".", "_"))
- elif tokenizer.check("QUOTED_STRING"):
- return process_python_str(tokenizer.read().text)
- else:
- tokenizer.raise_syntax_error(
- message="Expected a marker variable or quoted string"
- )
- def process_env_var(env_var: str) -> Variable:
- if env_var in ("platform_python_implementation", "python_implementation"):
- return Variable("platform_python_implementation")
- else:
- return Variable(env_var)
- def process_python_str(python_str: str) -> Value:
- value = ast.literal_eval(python_str)
- return Value(str(value))
- def _parse_marker_op(tokenizer: Tokenizer) -> Op:
- """
- marker_op = IN | NOT IN | OP
- """
- if tokenizer.check("IN"):
- tokenizer.read()
- return Op("in")
- elif tokenizer.check("NOT"):
- tokenizer.read()
- tokenizer.expect("WS", expected="whitespace after 'not'")
- tokenizer.expect("IN", expected="'in' after 'not'")
- return Op("not in")
- elif tokenizer.check("OP"):
- return Op(tokenizer.read().text)
- else:
- return tokenizer.raise_syntax_error(
- "Expected marker operator, one of "
- "<=, <, !=, ==, >=, >, ~=, ===, in, not in"
- )
|