_parser.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703
  1. import string
  2. from types import MappingProxyType
  3. from typing import (
  4. TYPE_CHECKING,
  5. Any,
  6. Callable,
  7. Dict,
  8. FrozenSet,
  9. Iterable,
  10. Optional,
  11. TextIO,
  12. Tuple,
  13. )
  14. from pip._vendor.tomli._re import (
  15. RE_BIN,
  16. RE_DATETIME,
  17. RE_HEX,
  18. RE_LOCALTIME,
  19. RE_NUMBER,
  20. RE_OCT,
  21. match_to_datetime,
  22. match_to_localtime,
  23. match_to_number,
  24. )
  25. if TYPE_CHECKING:
  26. from re import Pattern
  27. ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
  28. # Neither of these sets include quotation mark or backslash. They are
  29. # currently handled as separate cases in the parser functions.
  30. ILLEGAL_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t")
  31. ILLEGAL_MULTILINE_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t\n\r")
  32. ILLEGAL_LITERAL_STR_CHARS = ILLEGAL_BASIC_STR_CHARS
  33. ILLEGAL_MULTILINE_LITERAL_STR_CHARS = ASCII_CTRL - frozenset("\t\n")
  34. ILLEGAL_COMMENT_CHARS = ILLEGAL_BASIC_STR_CHARS
  35. TOML_WS = frozenset(" \t")
  36. TOML_WS_AND_NEWLINE = TOML_WS | frozenset("\n")
  37. BARE_KEY_CHARS = frozenset(string.ascii_letters + string.digits + "-_")
  38. KEY_INITIAL_CHARS = BARE_KEY_CHARS | frozenset("\"'")
  39. BASIC_STR_ESCAPE_REPLACEMENTS = MappingProxyType(
  40. {
  41. "\\b": "\u0008", # backspace
  42. "\\t": "\u0009", # tab
  43. "\\n": "\u000A", # linefeed
  44. "\\f": "\u000C", # form feed
  45. "\\r": "\u000D", # carriage return
  46. '\\"': "\u0022", # quote
  47. "\\\\": "\u005C", # backslash
  48. }
  49. )
  50. # Type annotations
  51. ParseFloat = Callable[[str], Any]
  52. Key = Tuple[str, ...]
  53. Pos = int
  54. class TOMLDecodeError(ValueError):
  55. """An error raised if a document is not valid TOML."""
  56. def load(fp: TextIO, *, parse_float: ParseFloat = float) -> Dict[str, Any]:
  57. """Parse TOML from a file object."""
  58. s = fp.read()
  59. return loads(s, parse_float=parse_float)
  60. def loads(s: str, *, parse_float: ParseFloat = float) -> Dict[str, Any]: # noqa: C901
  61. """Parse TOML from a string."""
  62. # The spec allows converting "\r\n" to "\n", even in string
  63. # literals. Let's do so to simplify parsing.
  64. src = s.replace("\r\n", "\n")
  65. pos = 0
  66. state = State()
  67. # Parse one statement at a time
  68. # (typically means one line in TOML source)
  69. while True:
  70. # 1. Skip line leading whitespace
  71. pos = skip_chars(src, pos, TOML_WS)
  72. # 2. Parse rules. Expect one of the following:
  73. # - end of file
  74. # - end of line
  75. # - comment
  76. # - key/value pair
  77. # - append dict to list (and move to its namespace)
  78. # - create dict (and move to its namespace)
  79. # Skip trailing whitespace when applicable.
  80. try:
  81. char = src[pos]
  82. except IndexError:
  83. break
  84. if char == "\n":
  85. pos += 1
  86. continue
  87. if char in KEY_INITIAL_CHARS:
  88. pos = key_value_rule(src, pos, state, parse_float)
  89. pos = skip_chars(src, pos, TOML_WS)
  90. elif char == "[":
  91. try:
  92. second_char: Optional[str] = src[pos + 1]
  93. except IndexError:
  94. second_char = None
  95. if second_char == "[":
  96. pos = create_list_rule(src, pos, state)
  97. else:
  98. pos = create_dict_rule(src, pos, state)
  99. pos = skip_chars(src, pos, TOML_WS)
  100. elif char != "#":
  101. raise suffixed_err(src, pos, "Invalid statement")
  102. # 3. Skip comment
  103. pos = skip_comment(src, pos)
  104. # 4. Expect end of line or end of file
  105. try:
  106. char = src[pos]
  107. except IndexError:
  108. break
  109. if char != "\n":
  110. raise suffixed_err(
  111. src, pos, "Expected newline or end of document after a statement"
  112. )
  113. pos += 1
  114. return state.out.dict
  115. class State:
  116. def __init__(self) -> None:
  117. # Mutable, read-only
  118. self.out = NestedDict()
  119. self.flags = Flags()
  120. # Immutable, read and write
  121. self.header_namespace: Key = ()
  122. class Flags:
  123. """Flags that map to parsed keys/namespaces."""
  124. # Marks an immutable namespace (inline array or inline table).
  125. FROZEN = 0
  126. # Marks a nest that has been explicitly created and can no longer
  127. # be opened using the "[table]" syntax.
  128. EXPLICIT_NEST = 1
  129. def __init__(self) -> None:
  130. self._flags: Dict[str, dict] = {}
  131. def unset_all(self, key: Key) -> None:
  132. cont = self._flags
  133. for k in key[:-1]:
  134. if k not in cont:
  135. return
  136. cont = cont[k]["nested"]
  137. cont.pop(key[-1], None)
  138. def set_for_relative_key(self, head_key: Key, rel_key: Key, flag: int) -> None:
  139. cont = self._flags
  140. for k in head_key:
  141. if k not in cont:
  142. cont[k] = {"flags": set(), "recursive_flags": set(), "nested": {}}
  143. cont = cont[k]["nested"]
  144. for k in rel_key:
  145. if k in cont:
  146. cont[k]["flags"].add(flag)
  147. else:
  148. cont[k] = {"flags": {flag}, "recursive_flags": set(), "nested": {}}
  149. cont = cont[k]["nested"]
  150. def set(self, key: Key, flag: int, *, recursive: bool) -> None: # noqa: A003
  151. cont = self._flags
  152. key_parent, key_stem = key[:-1], key[-1]
  153. for k in key_parent:
  154. if k not in cont:
  155. cont[k] = {"flags": set(), "recursive_flags": set(), "nested": {}}
  156. cont = cont[k]["nested"]
  157. if key_stem not in cont:
  158. cont[key_stem] = {"flags": set(), "recursive_flags": set(), "nested": {}}
  159. cont[key_stem]["recursive_flags" if recursive else "flags"].add(flag)
  160. def is_(self, key: Key, flag: int) -> bool:
  161. if not key:
  162. return False # document root has no flags
  163. cont = self._flags
  164. for k in key[:-1]:
  165. if k not in cont:
  166. return False
  167. inner_cont = cont[k]
  168. if flag in inner_cont["recursive_flags"]:
  169. return True
  170. cont = inner_cont["nested"]
  171. key_stem = key[-1]
  172. if key_stem in cont:
  173. cont = cont[key_stem]
  174. return flag in cont["flags"] or flag in cont["recursive_flags"]
  175. return False
  176. class NestedDict:
  177. def __init__(self) -> None:
  178. # The parsed content of the TOML document
  179. self.dict: Dict[str, Any] = {}
  180. def get_or_create_nest(
  181. self,
  182. key: Key,
  183. *,
  184. access_lists: bool = True,
  185. ) -> dict:
  186. cont: Any = self.dict
  187. for k in key:
  188. if k not in cont:
  189. cont[k] = {}
  190. cont = cont[k]
  191. if access_lists and isinstance(cont, list):
  192. cont = cont[-1]
  193. if not isinstance(cont, dict):
  194. raise KeyError("There is no nest behind this key")
  195. return cont
  196. def append_nest_to_list(self, key: Key) -> None:
  197. cont = self.get_or_create_nest(key[:-1])
  198. last_key = key[-1]
  199. if last_key in cont:
  200. list_ = cont[last_key]
  201. if not isinstance(list_, list):
  202. raise KeyError("An object other than list found behind this key")
  203. list_.append({})
  204. else:
  205. cont[last_key] = [{}]
  206. def skip_chars(src: str, pos: Pos, chars: Iterable[str]) -> Pos:
  207. try:
  208. while src[pos] in chars:
  209. pos += 1
  210. except IndexError:
  211. pass
  212. return pos
  213. def skip_until(
  214. src: str,
  215. pos: Pos,
  216. expect: str,
  217. *,
  218. error_on: FrozenSet[str],
  219. error_on_eof: bool,
  220. ) -> Pos:
  221. try:
  222. new_pos = src.index(expect, pos)
  223. except ValueError:
  224. new_pos = len(src)
  225. if error_on_eof:
  226. raise suffixed_err(src, new_pos, f'Expected "{expect!r}"')
  227. bad_chars = error_on.intersection(src[pos:new_pos])
  228. if bad_chars:
  229. bad_char = next(iter(bad_chars))
  230. bad_pos = src.index(bad_char, pos)
  231. raise suffixed_err(src, bad_pos, f'Found invalid character "{bad_char!r}"')
  232. return new_pos
  233. def skip_comment(src: str, pos: Pos) -> Pos:
  234. try:
  235. char: Optional[str] = src[pos]
  236. except IndexError:
  237. char = None
  238. if char == "#":
  239. return skip_until(
  240. src, pos + 1, "\n", error_on=ILLEGAL_COMMENT_CHARS, error_on_eof=False
  241. )
  242. return pos
  243. def skip_comments_and_array_ws(src: str, pos: Pos) -> Pos:
  244. while True:
  245. pos_before_skip = pos
  246. pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE)
  247. pos = skip_comment(src, pos)
  248. if pos == pos_before_skip:
  249. return pos
  250. def create_dict_rule(src: str, pos: Pos, state: State) -> Pos:
  251. pos += 1 # Skip "["
  252. pos = skip_chars(src, pos, TOML_WS)
  253. pos, key = parse_key(src, pos)
  254. if state.flags.is_(key, Flags.EXPLICIT_NEST) or state.flags.is_(key, Flags.FROZEN):
  255. raise suffixed_err(src, pos, f"Can not declare {key} twice")
  256. state.flags.set(key, Flags.EXPLICIT_NEST, recursive=False)
  257. try:
  258. state.out.get_or_create_nest(key)
  259. except KeyError:
  260. raise suffixed_err(src, pos, "Can not overwrite a value")
  261. state.header_namespace = key
  262. if src[pos : pos + 1] != "]":
  263. raise suffixed_err(src, pos, 'Expected "]" at the end of a table declaration')
  264. return pos + 1
  265. def create_list_rule(src: str, pos: Pos, state: State) -> Pos:
  266. pos += 2 # Skip "[["
  267. pos = skip_chars(src, pos, TOML_WS)
  268. pos, key = parse_key(src, pos)
  269. if state.flags.is_(key, Flags.FROZEN):
  270. raise suffixed_err(src, pos, f"Can not mutate immutable namespace {key}")
  271. # Free the namespace now that it points to another empty list item...
  272. state.flags.unset_all(key)
  273. # ...but this key precisely is still prohibited from table declaration
  274. state.flags.set(key, Flags.EXPLICIT_NEST, recursive=False)
  275. try:
  276. state.out.append_nest_to_list(key)
  277. except KeyError:
  278. raise suffixed_err(src, pos, "Can not overwrite a value")
  279. state.header_namespace = key
  280. end_marker = src[pos : pos + 2]
  281. if end_marker != "]]":
  282. raise suffixed_err(
  283. src,
  284. pos,
  285. f'Found "{end_marker!r}" at the end of an array declaration.'
  286. ' Expected "]]"',
  287. )
  288. return pos + 2
  289. def key_value_rule(src: str, pos: Pos, state: State, parse_float: ParseFloat) -> Pos:
  290. pos, key, value = parse_key_value_pair(src, pos, parse_float)
  291. key_parent, key_stem = key[:-1], key[-1]
  292. abs_key_parent = state.header_namespace + key_parent
  293. if state.flags.is_(abs_key_parent, Flags.FROZEN):
  294. raise suffixed_err(
  295. src, pos, f"Can not mutate immutable namespace {abs_key_parent}"
  296. )
  297. # Containers in the relative path can't be opened with the table syntax after this
  298. state.flags.set_for_relative_key(state.header_namespace, key, Flags.EXPLICIT_NEST)
  299. try:
  300. nest = state.out.get_or_create_nest(abs_key_parent)
  301. except KeyError:
  302. raise suffixed_err(src, pos, "Can not overwrite a value")
  303. if key_stem in nest:
  304. raise suffixed_err(src, pos, "Can not overwrite a value")
  305. # Mark inline table and array namespaces recursively immutable
  306. if isinstance(value, (dict, list)):
  307. abs_key = state.header_namespace + key
  308. state.flags.set(abs_key, Flags.FROZEN, recursive=True)
  309. nest[key_stem] = value
  310. return pos
  311. def parse_key_value_pair(
  312. src: str, pos: Pos, parse_float: ParseFloat
  313. ) -> Tuple[Pos, Key, Any]:
  314. pos, key = parse_key(src, pos)
  315. try:
  316. char: Optional[str] = src[pos]
  317. except IndexError:
  318. char = None
  319. if char != "=":
  320. raise suffixed_err(src, pos, 'Expected "=" after a key in a key/value pair')
  321. pos += 1
  322. pos = skip_chars(src, pos, TOML_WS)
  323. pos, value = parse_value(src, pos, parse_float)
  324. return pos, key, value
  325. def parse_key(src: str, pos: Pos) -> Tuple[Pos, Key]:
  326. pos, key_part = parse_key_part(src, pos)
  327. key = [key_part]
  328. pos = skip_chars(src, pos, TOML_WS)
  329. while True:
  330. try:
  331. char: Optional[str] = src[pos]
  332. except IndexError:
  333. char = None
  334. if char != ".":
  335. return pos, tuple(key)
  336. pos += 1
  337. pos = skip_chars(src, pos, TOML_WS)
  338. pos, key_part = parse_key_part(src, pos)
  339. key.append(key_part)
  340. pos = skip_chars(src, pos, TOML_WS)
  341. def parse_key_part(src: str, pos: Pos) -> Tuple[Pos, str]:
  342. try:
  343. char: Optional[str] = src[pos]
  344. except IndexError:
  345. char = None
  346. if char in BARE_KEY_CHARS:
  347. start_pos = pos
  348. pos = skip_chars(src, pos, BARE_KEY_CHARS)
  349. return pos, src[start_pos:pos]
  350. if char == "'":
  351. return parse_literal_str(src, pos)
  352. if char == '"':
  353. return parse_one_line_basic_str(src, pos)
  354. raise suffixed_err(src, pos, "Invalid initial character for a key part")
  355. def parse_one_line_basic_str(src: str, pos: Pos) -> Tuple[Pos, str]:
  356. pos += 1
  357. return parse_basic_str(src, pos, multiline=False)
  358. def parse_array(src: str, pos: Pos, parse_float: ParseFloat) -> Tuple[Pos, list]:
  359. pos += 1
  360. array: list = []
  361. pos = skip_comments_and_array_ws(src, pos)
  362. if src[pos : pos + 1] == "]":
  363. return pos + 1, array
  364. while True:
  365. pos, val = parse_value(src, pos, parse_float)
  366. array.append(val)
  367. pos = skip_comments_and_array_ws(src, pos)
  368. c = src[pos : pos + 1]
  369. if c == "]":
  370. return pos + 1, array
  371. if c != ",":
  372. raise suffixed_err(src, pos, "Unclosed array")
  373. pos += 1
  374. pos = skip_comments_and_array_ws(src, pos)
  375. if src[pos : pos + 1] == "]":
  376. return pos + 1, array
  377. def parse_inline_table(src: str, pos: Pos, parse_float: ParseFloat) -> Tuple[Pos, dict]:
  378. pos += 1
  379. nested_dict = NestedDict()
  380. flags = Flags()
  381. pos = skip_chars(src, pos, TOML_WS)
  382. if src[pos : pos + 1] == "}":
  383. return pos + 1, nested_dict.dict
  384. while True:
  385. pos, key, value = parse_key_value_pair(src, pos, parse_float)
  386. key_parent, key_stem = key[:-1], key[-1]
  387. if flags.is_(key, Flags.FROZEN):
  388. raise suffixed_err(src, pos, f"Can not mutate immutable namespace {key}")
  389. try:
  390. nest = nested_dict.get_or_create_nest(key_parent, access_lists=False)
  391. except KeyError:
  392. raise suffixed_err(src, pos, "Can not overwrite a value")
  393. if key_stem in nest:
  394. raise suffixed_err(src, pos, f'Duplicate inline table key "{key_stem}"')
  395. nest[key_stem] = value
  396. pos = skip_chars(src, pos, TOML_WS)
  397. c = src[pos : pos + 1]
  398. if c == "}":
  399. return pos + 1, nested_dict.dict
  400. if c != ",":
  401. raise suffixed_err(src, pos, "Unclosed inline table")
  402. if isinstance(value, (dict, list)):
  403. flags.set(key, Flags.FROZEN, recursive=True)
  404. pos += 1
  405. pos = skip_chars(src, pos, TOML_WS)
  406. def parse_basic_str_escape(
  407. src: str, pos: Pos, *, multiline: bool = False
  408. ) -> Tuple[Pos, str]:
  409. escape_id = src[pos : pos + 2]
  410. pos += 2
  411. if multiline and escape_id in {"\\ ", "\\\t", "\\\n"}:
  412. # Skip whitespace until next non-whitespace character or end of
  413. # the doc. Error if non-whitespace is found before newline.
  414. if escape_id != "\\\n":
  415. pos = skip_chars(src, pos, TOML_WS)
  416. char = src[pos : pos + 1]
  417. if not char:
  418. return pos, ""
  419. if char != "\n":
  420. raise suffixed_err(src, pos, 'Unescaped "\\" in a string')
  421. pos += 1
  422. pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE)
  423. return pos, ""
  424. if escape_id == "\\u":
  425. return parse_hex_char(src, pos, 4)
  426. if escape_id == "\\U":
  427. return parse_hex_char(src, pos, 8)
  428. try:
  429. return pos, BASIC_STR_ESCAPE_REPLACEMENTS[escape_id]
  430. except KeyError:
  431. if len(escape_id) != 2:
  432. raise suffixed_err(src, pos, "Unterminated string")
  433. raise suffixed_err(src, pos, 'Unescaped "\\" in a string')
  434. def parse_basic_str_escape_multiline(src: str, pos: Pos) -> Tuple[Pos, str]:
  435. return parse_basic_str_escape(src, pos, multiline=True)
  436. def parse_hex_char(src: str, pos: Pos, hex_len: int) -> Tuple[Pos, str]:
  437. hex_str = src[pos : pos + hex_len]
  438. if len(hex_str) != hex_len or any(c not in string.hexdigits for c in hex_str):
  439. raise suffixed_err(src, pos, "Invalid hex value")
  440. pos += hex_len
  441. hex_int = int(hex_str, 16)
  442. if not is_unicode_scalar_value(hex_int):
  443. raise suffixed_err(src, pos, "Escaped character is not a Unicode scalar value")
  444. return pos, chr(hex_int)
  445. def parse_literal_str(src: str, pos: Pos) -> Tuple[Pos, str]:
  446. pos += 1 # Skip starting apostrophe
  447. start_pos = pos
  448. pos = skip_until(
  449. src, pos, "'", error_on=ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True
  450. )
  451. return pos + 1, src[start_pos:pos] # Skip ending apostrophe
  452. def parse_multiline_str(src: str, pos: Pos, *, literal: bool) -> Tuple[Pos, str]:
  453. pos += 3
  454. if src[pos : pos + 1] == "\n":
  455. pos += 1
  456. if literal:
  457. delim = "'"
  458. end_pos = skip_until(
  459. src,
  460. pos,
  461. "'''",
  462. error_on=ILLEGAL_MULTILINE_LITERAL_STR_CHARS,
  463. error_on_eof=True,
  464. )
  465. result = src[pos:end_pos]
  466. pos = end_pos + 3
  467. else:
  468. delim = '"'
  469. pos, result = parse_basic_str(src, pos, multiline=True)
  470. # Add at maximum two extra apostrophes/quotes if the end sequence
  471. # is 4 or 5 chars long instead of just 3.
  472. if src[pos : pos + 1] != delim:
  473. return pos, result
  474. pos += 1
  475. if src[pos : pos + 1] != delim:
  476. return pos, result + delim
  477. pos += 1
  478. return pos, result + (delim * 2)
  479. def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> Tuple[Pos, str]:
  480. if multiline:
  481. error_on = ILLEGAL_MULTILINE_BASIC_STR_CHARS
  482. parse_escapes = parse_basic_str_escape_multiline
  483. else:
  484. error_on = ILLEGAL_BASIC_STR_CHARS
  485. parse_escapes = parse_basic_str_escape
  486. result = ""
  487. start_pos = pos
  488. while True:
  489. try:
  490. char = src[pos]
  491. except IndexError:
  492. raise suffixed_err(src, pos, "Unterminated string")
  493. if char == '"':
  494. if not multiline:
  495. return pos + 1, result + src[start_pos:pos]
  496. if src[pos + 1 : pos + 3] == '""':
  497. return pos + 3, result + src[start_pos:pos]
  498. pos += 1
  499. continue
  500. if char == "\\":
  501. result += src[start_pos:pos]
  502. pos, parsed_escape = parse_escapes(src, pos)
  503. result += parsed_escape
  504. start_pos = pos
  505. continue
  506. if char in error_on:
  507. raise suffixed_err(src, pos, f'Illegal character "{char!r}"')
  508. pos += 1
  509. def parse_regex(src: str, pos: Pos, regex: "Pattern") -> Tuple[Pos, str]:
  510. match = regex.match(src, pos)
  511. if not match:
  512. raise suffixed_err(src, pos, "Unexpected sequence")
  513. return match.end(), match.group()
  514. def parse_value( # noqa: C901
  515. src: str, pos: Pos, parse_float: ParseFloat
  516. ) -> Tuple[Pos, Any]:
  517. try:
  518. char: Optional[str] = src[pos]
  519. except IndexError:
  520. char = None
  521. # Basic strings
  522. if char == '"':
  523. if src[pos + 1 : pos + 3] == '""':
  524. return parse_multiline_str(src, pos, literal=False)
  525. return parse_one_line_basic_str(src, pos)
  526. # Literal strings
  527. if char == "'":
  528. if src[pos + 1 : pos + 3] == "''":
  529. return parse_multiline_str(src, pos, literal=True)
  530. return parse_literal_str(src, pos)
  531. # Booleans
  532. if char == "t":
  533. if src[pos + 1 : pos + 4] == "rue":
  534. return pos + 4, True
  535. if char == "f":
  536. if src[pos + 1 : pos + 5] == "alse":
  537. return pos + 5, False
  538. # Dates and times
  539. datetime_match = RE_DATETIME.match(src, pos)
  540. if datetime_match:
  541. try:
  542. datetime_obj = match_to_datetime(datetime_match)
  543. except ValueError:
  544. raise suffixed_err(src, pos, "Invalid date or datetime")
  545. return datetime_match.end(), datetime_obj
  546. localtime_match = RE_LOCALTIME.match(src, pos)
  547. if localtime_match:
  548. return localtime_match.end(), match_to_localtime(localtime_match)
  549. # Non-decimal integers
  550. if char == "0":
  551. second_char = src[pos + 1 : pos + 2]
  552. if second_char == "x":
  553. pos, hex_str = parse_regex(src, pos + 2, RE_HEX)
  554. return pos, int(hex_str, 16)
  555. if second_char == "o":
  556. pos, oct_str = parse_regex(src, pos + 2, RE_OCT)
  557. return pos, int(oct_str, 8)
  558. if second_char == "b":
  559. pos, bin_str = parse_regex(src, pos + 2, RE_BIN)
  560. return pos, int(bin_str, 2)
  561. # Decimal integers and "normal" floats.
  562. # The regex will greedily match any type starting with a decimal
  563. # char, so needs to be located after handling of non-decimal ints,
  564. # and dates and times.
  565. number_match = RE_NUMBER.match(src, pos)
  566. if number_match:
  567. return number_match.end(), match_to_number(number_match, parse_float)
  568. # Arrays
  569. if char == "[":
  570. return parse_array(src, pos, parse_float)
  571. # Inline tables
  572. if char == "{":
  573. return parse_inline_table(src, pos, parse_float)
  574. # Special floats
  575. first_three = src[pos : pos + 3]
  576. if first_three in {"inf", "nan"}:
  577. return pos + 3, parse_float(first_three)
  578. first_four = src[pos : pos + 4]
  579. if first_four in {"-inf", "+inf", "-nan", "+nan"}:
  580. return pos + 4, parse_float(first_four)
  581. raise suffixed_err(src, pos, "Invalid value")
  582. def suffixed_err(src: str, pos: Pos, msg: str) -> TOMLDecodeError:
  583. """Return a `TOMLDecodeError` where error message is suffixed with
  584. coordinates in source."""
  585. def coord_repr(src: str, pos: Pos) -> str:
  586. if pos >= len(src):
  587. return "end of document"
  588. line = src.count("\n", 0, pos) + 1
  589. if line == 1:
  590. column = pos + 1
  591. else:
  592. column = pos - src.rindex("\n", 0, pos)
  593. return f"line {line}, column {column}"
  594. return TOMLDecodeError(f"{msg} (at {coord_repr(src, pos)})")
  595. def is_unicode_scalar_value(codepoint: int) -> bool:
  596. return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111)