123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192 |
- import contextlib
- import re
- from dataclasses import dataclass
- from typing import Dict, Iterator, NoReturn, Optional, Tuple, Union
- from .specifiers import Specifier
- @dataclass
- class Token:
- name: str
- text: str
- position: int
- class ParserSyntaxError(Exception):
- """The provided source text could not be parsed correctly."""
- def __init__(
- self,
- message: str,
- *,
- source: str,
- span: Tuple[int, int],
- ) -> None:
- self.span = span
- self.message = message
- self.source = source
- super().__init__()
- def __str__(self) -> str:
- marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^"
- return "\n ".join([self.message, self.source, marker])
- DEFAULT_RULES: "Dict[str, Union[str, re.Pattern[str]]]" = {
- "LEFT_PARENTHESIS": r"\(",
- "RIGHT_PARENTHESIS": r"\)",
- "LEFT_BRACKET": r"\[",
- "RIGHT_BRACKET": r"\]",
- "SEMICOLON": r";",
- "COMMA": r",",
- "QUOTED_STRING": re.compile(
- r"""
- (
- ('[^']*')
- |
- ("[^"]*")
- )
- """,
- re.VERBOSE,
- ),
- "OP": r"(===|==|~=|!=|<=|>=|<|>)",
- "BOOLOP": r"\b(or|and)\b",
- "IN": r"\bin\b",
- "NOT": r"\bnot\b",
- "VARIABLE": re.compile(
- r"""
- \b(
- python_version
- |python_full_version
- |os[._]name
- |sys[._]platform
- |platform_(release|system)
- |platform[._](version|machine|python_implementation)
- |python_implementation
- |implementation_(name|version)
- |extra
- )\b
- """,
- re.VERBOSE,
- ),
- "SPECIFIER": re.compile(
- Specifier._operator_regex_str + Specifier._version_regex_str,
- re.VERBOSE | re.IGNORECASE,
- ),
- "AT": r"\@",
- "URL": r"[^ \t]+",
- "IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b",
- "VERSION_PREFIX_TRAIL": r"\.\*",
- "VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*",
- "WS": r"[ \t]+",
- "END": r"$",
- }
- class Tokenizer:
- """Context-sensitive token parsing.
- Provides methods to examine the input stream to check whether the next token
- matches.
- """
- def __init__(
- self,
- source: str,
- *,
- rules: "Dict[str, Union[str, re.Pattern[str]]]",
- ) -> None:
- self.source = source
- self.rules: Dict[str, re.Pattern[str]] = {
- name: re.compile(pattern) for name, pattern in rules.items()
- }
- self.next_token: Optional[Token] = None
- self.position = 0
- def consume(self, name: str) -> None:
- """Move beyond provided token name, if at current position."""
- if self.check(name):
- self.read()
- def check(self, name: str, *, peek: bool = False) -> bool:
- """Check whether the next token has the provided name.
- By default, if the check succeeds, the token *must* be read before
- another check. If `peek` is set to `True`, the token is not loaded and
- would need to be checked again.
- """
- assert (
- self.next_token is None
- ), f"Cannot check for {name!r}, already have {self.next_token!r}"
- assert name in self.rules, f"Unknown token name: {name!r}"
- expression = self.rules[name]
- match = expression.match(self.source, self.position)
- if match is None:
- return False
- if not peek:
- self.next_token = Token(name, match[0], self.position)
- return True
- def expect(self, name: str, *, expected: str) -> Token:
- """Expect a certain token name next, failing with a syntax error otherwise.
- The token is *not* read.
- """
- if not self.check(name):
- raise self.raise_syntax_error(f"Expected {expected}")
- return self.read()
- def read(self) -> Token:
- """Consume the next token and return it."""
- token = self.next_token
- assert token is not None
- self.position += len(token.text)
- self.next_token = None
- return token
- def raise_syntax_error(
- self,
- message: str,
- *,
- span_start: Optional[int] = None,
- span_end: Optional[int] = None,
- ) -> NoReturn:
- """Raise ParserSyntaxError at the given position."""
- span = (
- self.position if span_start is None else span_start,
- self.position if span_end is None else span_end,
- )
- raise ParserSyntaxError(
- message,
- source=self.source,
- span=span,
- )
- @contextlib.contextmanager
- def enclosing_tokens(
- self, open_token: str, close_token: str, *, around: str
- ) -> Iterator[None]:
- if self.check(open_token):
- open_position = self.position
- self.read()
- else:
- open_position = None
- yield
- if open_position is None:
- return
- if not self.check(close_token):
- self.raise_syntax_error(
- f"Expected matching {close_token} for {open_token}, after {around}",
- span_start=open_position,
- )
- self.read()
|