erlang.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. """
  2. pygments.lexers.erlang
  3. ~~~~~~~~~~~~~~~~~~~~~~
  4. Lexers for Erlang.
  5. :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import Lexer, RegexLexer, bygroups, words, do_insertions, \
  10. include, default, line_re
  11. from pygments.token import Comment, Operator, Keyword, Name, String, \
  12. Number, Punctuation, Generic, Whitespace
  13. __all__ = ['ErlangLexer', 'ErlangShellLexer', 'ElixirConsoleLexer',
  14. 'ElixirLexer']
  15. class ErlangLexer(RegexLexer):
  16. """
  17. For the Erlang functional programming language.
  18. .. versionadded:: 0.9
  19. """
  20. name = 'Erlang'
  21. url = 'https://www.erlang.org/'
  22. aliases = ['erlang']
  23. filenames = ['*.erl', '*.hrl', '*.es', '*.escript']
  24. mimetypes = ['text/x-erlang']
  25. keywords = (
  26. 'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if',
  27. 'let', 'of', 'query', 'receive', 'try', 'when',
  28. )
  29. builtins = ( # See erlang(3) man page
  30. 'abs', 'append_element', 'apply', 'atom_to_list', 'binary_to_list',
  31. 'bitstring_to_list', 'binary_to_term', 'bit_size', 'bump_reductions',
  32. 'byte_size', 'cancel_timer', 'check_process_code', 'delete_module',
  33. 'demonitor', 'disconnect_node', 'display', 'element', 'erase', 'exit',
  34. 'float', 'float_to_list', 'fun_info', 'fun_to_list',
  35. 'function_exported', 'garbage_collect', 'get', 'get_keys',
  36. 'group_leader', 'hash', 'hd', 'integer_to_list', 'iolist_to_binary',
  37. 'iolist_size', 'is_atom', 'is_binary', 'is_bitstring', 'is_boolean',
  38. 'is_builtin', 'is_float', 'is_function', 'is_integer', 'is_list',
  39. 'is_number', 'is_pid', 'is_port', 'is_process_alive', 'is_record',
  40. 'is_reference', 'is_tuple', 'length', 'link', 'list_to_atom',
  41. 'list_to_binary', 'list_to_bitstring', 'list_to_existing_atom',
  42. 'list_to_float', 'list_to_integer', 'list_to_pid', 'list_to_tuple',
  43. 'load_module', 'localtime_to_universaltime', 'make_tuple', 'md5',
  44. 'md5_final', 'md5_update', 'memory', 'module_loaded', 'monitor',
  45. 'monitor_node', 'node', 'nodes', 'open_port', 'phash', 'phash2',
  46. 'pid_to_list', 'port_close', 'port_command', 'port_connect',
  47. 'port_control', 'port_call', 'port_info', 'port_to_list',
  48. 'process_display', 'process_flag', 'process_info', 'purge_module',
  49. 'put', 'read_timer', 'ref_to_list', 'register', 'resume_process',
  50. 'round', 'send', 'send_after', 'send_nosuspend', 'set_cookie',
  51. 'setelement', 'size', 'spawn', 'spawn_link', 'spawn_monitor',
  52. 'spawn_opt', 'split_binary', 'start_timer', 'statistics',
  53. 'suspend_process', 'system_flag', 'system_info', 'system_monitor',
  54. 'system_profile', 'term_to_binary', 'tl', 'trace', 'trace_delivered',
  55. 'trace_info', 'trace_pattern', 'trunc', 'tuple_size', 'tuple_to_list',
  56. 'universaltime_to_localtime', 'unlink', 'unregister', 'whereis'
  57. )
  58. operators = r'(\+\+?|--?|\*|/|<|>|/=|=:=|=/=|=<|>=|==?|<-|!|\?)'
  59. word_operators = (
  60. 'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor',
  61. 'div', 'not', 'or', 'orelse', 'rem', 'xor'
  62. )
  63. atom_re = r"(?:[a-z]\w*|'[^\n']*[^\\]')"
  64. variable_re = r'(?:[A-Z_]\w*)'
  65. esc_char_re = r'[bdefnrstv\'"\\]'
  66. esc_octal_re = r'[0-7][0-7]?[0-7]?'
  67. esc_hex_re = r'(?:x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\})'
  68. esc_ctrl_re = r'\^[a-zA-Z]'
  69. escape_re = r'(?:\\(?:'+esc_char_re+r'|'+esc_octal_re+r'|'+esc_hex_re+r'|'+esc_ctrl_re+r'))'
  70. macro_re = r'(?:'+variable_re+r'|'+atom_re+r')'
  71. base_re = r'(?:[2-9]|[12][0-9]|3[0-6])'
  72. tokens = {
  73. 'root': [
  74. (r'\s+', Whitespace),
  75. (r'(%.*)(\n)', bygroups(Comment, Whitespace)),
  76. (words(keywords, suffix=r'\b'), Keyword),
  77. (words(builtins, suffix=r'\b'), Name.Builtin),
  78. (words(word_operators, suffix=r'\b'), Operator.Word),
  79. (r'^-', Punctuation, 'directive'),
  80. (operators, Operator),
  81. (r'"', String, 'string'),
  82. (r'<<', Name.Label),
  83. (r'>>', Name.Label),
  84. ('(' + atom_re + ')(:)', bygroups(Name.Namespace, Punctuation)),
  85. ('(?:^|(?<=:))(' + atom_re + r')(\s*)(\()',
  86. bygroups(Name.Function, Whitespace, Punctuation)),
  87. (r'[+-]?' + base_re + r'#[0-9a-zA-Z]+', Number.Integer),
  88. (r'[+-]?\d+', Number.Integer),
  89. (r'[+-]?\d+.\d+', Number.Float),
  90. (r'[]\[:_@\".{}()|;,]', Punctuation),
  91. (variable_re, Name.Variable),
  92. (atom_re, Name),
  93. (r'\?'+macro_re, Name.Constant),
  94. (r'\$(?:'+escape_re+r'|\\[ %]|[^\\])', String.Char),
  95. (r'#'+atom_re+r'(:?\.'+atom_re+r')?', Name.Label),
  96. # Erlang script shebang
  97. (r'\A#!.+\n', Comment.Hashbang),
  98. # EEP 43: Maps
  99. # http://www.erlang.org/eeps/eep-0043.html
  100. (r'#\{', Punctuation, 'map_key'),
  101. ],
  102. 'string': [
  103. (escape_re, String.Escape),
  104. (r'"', String, '#pop'),
  105. (r'~[0-9.*]*[~#+BPWXb-ginpswx]', String.Interpol),
  106. (r'[^"\\~]+', String),
  107. (r'~', String),
  108. ],
  109. 'directive': [
  110. (r'(define)(\s*)(\()('+macro_re+r')',
  111. bygroups(Name.Entity, Whitespace, Punctuation, Name.Constant), '#pop'),
  112. (r'(record)(\s*)(\()('+macro_re+r')',
  113. bygroups(Name.Entity, Whitespace, Punctuation, Name.Label), '#pop'),
  114. (atom_re, Name.Entity, '#pop'),
  115. ],
  116. 'map_key': [
  117. include('root'),
  118. (r'=>', Punctuation, 'map_val'),
  119. (r':=', Punctuation, 'map_val'),
  120. (r'\}', Punctuation, '#pop'),
  121. ],
  122. 'map_val': [
  123. include('root'),
  124. (r',', Punctuation, '#pop'),
  125. (r'(?=\})', Punctuation, '#pop'),
  126. ],
  127. }
  128. class ErlangShellLexer(Lexer):
  129. """
  130. Shell sessions in erl (for Erlang code).
  131. .. versionadded:: 1.1
  132. """
  133. name = 'Erlang erl session'
  134. aliases = ['erl']
  135. filenames = ['*.erl-sh']
  136. mimetypes = ['text/x-erl-shellsession']
  137. _prompt_re = re.compile(r'(?:\([\w@_.]+\))?\d+>(?=\s|\Z)')
  138. def get_tokens_unprocessed(self, text):
  139. erlexer = ErlangLexer(**self.options)
  140. curcode = ''
  141. insertions = []
  142. for match in line_re.finditer(text):
  143. line = match.group()
  144. m = self._prompt_re.match(line)
  145. if m is not None:
  146. end = m.end()
  147. insertions.append((len(curcode),
  148. [(0, Generic.Prompt, line[:end])]))
  149. curcode += line[end:]
  150. else:
  151. if curcode:
  152. yield from do_insertions(insertions,
  153. erlexer.get_tokens_unprocessed(curcode))
  154. curcode = ''
  155. insertions = []
  156. if line.startswith('*'):
  157. yield match.start(), Generic.Traceback, line
  158. else:
  159. yield match.start(), Generic.Output, line
  160. if curcode:
  161. yield from do_insertions(insertions,
  162. erlexer.get_tokens_unprocessed(curcode))
  163. def gen_elixir_string_rules(name, symbol, token):
  164. states = {}
  165. states['string_' + name] = [
  166. (r'[^#%s\\]+' % (symbol,), token),
  167. include('escapes'),
  168. (r'\\.', token),
  169. (r'(%s)' % (symbol,), bygroups(token), "#pop"),
  170. include('interpol')
  171. ]
  172. return states
  173. def gen_elixir_sigstr_rules(term, term_class, token, interpol=True):
  174. if interpol:
  175. return [
  176. (r'[^#%s\\]+' % (term_class,), token),
  177. include('escapes'),
  178. (r'\\.', token),
  179. (r'%s[a-zA-Z]*' % (term,), token, '#pop'),
  180. include('interpol')
  181. ]
  182. else:
  183. return [
  184. (r'[^%s\\]+' % (term_class,), token),
  185. (r'\\.', token),
  186. (r'%s[a-zA-Z]*' % (term,), token, '#pop'),
  187. ]
  188. class ElixirLexer(RegexLexer):
  189. """
  190. For the Elixir language.
  191. .. versionadded:: 1.5
  192. """
  193. name = 'Elixir'
  194. url = 'http://elixir-lang.org'
  195. aliases = ['elixir', 'ex', 'exs']
  196. filenames = ['*.ex', '*.eex', '*.exs', '*.leex']
  197. mimetypes = ['text/x-elixir']
  198. KEYWORD = ('fn', 'do', 'end', 'after', 'else', 'rescue', 'catch')
  199. KEYWORD_OPERATOR = ('not', 'and', 'or', 'when', 'in')
  200. BUILTIN = (
  201. 'case', 'cond', 'for', 'if', 'unless', 'try', 'receive', 'raise',
  202. 'quote', 'unquote', 'unquote_splicing', 'throw', 'super',
  203. )
  204. BUILTIN_DECLARATION = (
  205. 'def', 'defp', 'defmodule', 'defprotocol', 'defmacro', 'defmacrop',
  206. 'defdelegate', 'defexception', 'defstruct', 'defimpl', 'defcallback',
  207. )
  208. BUILTIN_NAMESPACE = ('import', 'require', 'use', 'alias')
  209. CONSTANT = ('nil', 'true', 'false')
  210. PSEUDO_VAR = ('_', '__MODULE__', '__DIR__', '__ENV__', '__CALLER__')
  211. OPERATORS3 = (
  212. '<<<', '>>>', '|||', '&&&', '^^^', '~~~', '===', '!==',
  213. '~>>', '<~>', '|~>', '<|>',
  214. )
  215. OPERATORS2 = (
  216. '==', '!=', '<=', '>=', '&&', '||', '<>', '++', '--', '|>', '=~',
  217. '->', '<-', '|', '.', '=', '~>', '<~',
  218. )
  219. OPERATORS1 = ('<', '>', '+', '-', '*', '/', '!', '^', '&')
  220. PUNCTUATION = (
  221. '\\\\', '<<', '>>', '=>', '(', ')', ':', ';', ',', '[', ']',
  222. )
  223. def get_tokens_unprocessed(self, text):
  224. for index, token, value in RegexLexer.get_tokens_unprocessed(self, text):
  225. if token is Name:
  226. if value in self.KEYWORD:
  227. yield index, Keyword, value
  228. elif value in self.KEYWORD_OPERATOR:
  229. yield index, Operator.Word, value
  230. elif value in self.BUILTIN:
  231. yield index, Keyword, value
  232. elif value in self.BUILTIN_DECLARATION:
  233. yield index, Keyword.Declaration, value
  234. elif value in self.BUILTIN_NAMESPACE:
  235. yield index, Keyword.Namespace, value
  236. elif value in self.CONSTANT:
  237. yield index, Name.Constant, value
  238. elif value in self.PSEUDO_VAR:
  239. yield index, Name.Builtin.Pseudo, value
  240. else:
  241. yield index, token, value
  242. else:
  243. yield index, token, value
  244. def gen_elixir_sigil_rules():
  245. # all valid sigil terminators (excluding heredocs)
  246. terminators = [
  247. (r'\{', r'\}', '}', 'cb'),
  248. (r'\[', r'\]', r'\]', 'sb'),
  249. (r'\(', r'\)', ')', 'pa'),
  250. ('<', '>', '>', 'ab'),
  251. ('/', '/', '/', 'slas'),
  252. (r'\|', r'\|', '|', 'pipe'),
  253. ('"', '"', '"', 'quot'),
  254. ("'", "'", "'", 'apos'),
  255. ]
  256. # heredocs have slightly different rules
  257. triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')]
  258. token = String.Other
  259. states = {'sigils': []}
  260. for term, name in triquotes:
  261. states['sigils'] += [
  262. (r'(~[a-z])(%s)' % (term,), bygroups(token, String.Heredoc),
  263. (name + '-end', name + '-intp')),
  264. (r'(~[A-Z])(%s)' % (term,), bygroups(token, String.Heredoc),
  265. (name + '-end', name + '-no-intp')),
  266. ]
  267. states[name + '-end'] = [
  268. (r'[a-zA-Z]+', token, '#pop'),
  269. default('#pop'),
  270. ]
  271. states[name + '-intp'] = [
  272. (r'^(\s*)(' + term + ')', bygroups(Whitespace, String.Heredoc), '#pop'),
  273. include('heredoc_interpol'),
  274. ]
  275. states[name + '-no-intp'] = [
  276. (r'^(\s*)(' + term +')', bygroups(Whitespace, String.Heredoc), '#pop'),
  277. include('heredoc_no_interpol'),
  278. ]
  279. for lterm, rterm, rterm_class, name in terminators:
  280. states['sigils'] += [
  281. (r'~[a-z]' + lterm, token, name + '-intp'),
  282. (r'~[A-Z]' + lterm, token, name + '-no-intp'),
  283. ]
  284. states[name + '-intp'] = \
  285. gen_elixir_sigstr_rules(rterm, rterm_class, token)
  286. states[name + '-no-intp'] = \
  287. gen_elixir_sigstr_rules(rterm, rterm_class, token, interpol=False)
  288. return states
  289. op3_re = "|".join(re.escape(s) for s in OPERATORS3)
  290. op2_re = "|".join(re.escape(s) for s in OPERATORS2)
  291. op1_re = "|".join(re.escape(s) for s in OPERATORS1)
  292. ops_re = r'(?:%s|%s|%s)' % (op3_re, op2_re, op1_re)
  293. punctuation_re = "|".join(re.escape(s) for s in PUNCTUATION)
  294. alnum = r'\w'
  295. name_re = r'(?:\.\.\.|[a-z_]%s*[!?]?)' % alnum
  296. modname_re = r'[A-Z]%(alnum)s*(?:\.[A-Z]%(alnum)s*)*' % {'alnum': alnum}
  297. complex_name_re = r'(?:%s|%s|%s)' % (name_re, modname_re, ops_re)
  298. special_atom_re = r'(?:\.\.\.|<<>>|%\{\}|%|\{\})'
  299. long_hex_char_re = r'(\\x\{)([\da-fA-F]+)(\})'
  300. hex_char_re = r'(\\x[\da-fA-F]{1,2})'
  301. escape_char_re = r'(\\[abdefnrstv])'
  302. tokens = {
  303. 'root': [
  304. (r'\s+', Whitespace),
  305. (r'#.*$', Comment.Single),
  306. # Various kinds of characters
  307. (r'(\?)' + long_hex_char_re,
  308. bygroups(String.Char,
  309. String.Escape, Number.Hex, String.Escape)),
  310. (r'(\?)' + hex_char_re,
  311. bygroups(String.Char, String.Escape)),
  312. (r'(\?)' + escape_char_re,
  313. bygroups(String.Char, String.Escape)),
  314. (r'\?\\?.', String.Char),
  315. # '::' has to go before atoms
  316. (r':::', String.Symbol),
  317. (r'::', Operator),
  318. # atoms
  319. (r':' + special_atom_re, String.Symbol),
  320. (r':' + complex_name_re, String.Symbol),
  321. (r':"', String.Symbol, 'string_double_atom'),
  322. (r":'", String.Symbol, 'string_single_atom'),
  323. # [keywords: ...]
  324. (r'(%s|%s)(:)(?=\s|\n)' % (special_atom_re, complex_name_re),
  325. bygroups(String.Symbol, Punctuation)),
  326. # @attributes
  327. (r'@' + name_re, Name.Attribute),
  328. # identifiers
  329. (name_re, Name),
  330. (r'(%%?)(%s)' % (modname_re,), bygroups(Punctuation, Name.Class)),
  331. # operators and punctuation
  332. (op3_re, Operator),
  333. (op2_re, Operator),
  334. (punctuation_re, Punctuation),
  335. (r'&\d', Name.Entity), # anon func arguments
  336. (op1_re, Operator),
  337. # numbers
  338. (r'0b[01]+', Number.Bin),
  339. (r'0o[0-7]+', Number.Oct),
  340. (r'0x[\da-fA-F]+', Number.Hex),
  341. (r'\d(_?\d)*\.\d(_?\d)*([eE][-+]?\d(_?\d)*)?', Number.Float),
  342. (r'\d(_?\d)*', Number.Integer),
  343. # strings and heredocs
  344. (r'(""")(\s*)', bygroups(String.Heredoc, Whitespace),
  345. 'heredoc_double'),
  346. (r"(''')(\s*)$", bygroups(String.Heredoc, Whitespace),
  347. 'heredoc_single'),
  348. (r'"', String.Double, 'string_double'),
  349. (r"'", String.Single, 'string_single'),
  350. include('sigils'),
  351. (r'%\{', Punctuation, 'map_key'),
  352. (r'\{', Punctuation, 'tuple'),
  353. ],
  354. 'heredoc_double': [
  355. (r'^(\s*)(""")', bygroups(Whitespace, String.Heredoc), '#pop'),
  356. include('heredoc_interpol'),
  357. ],
  358. 'heredoc_single': [
  359. (r"^\s*'''", String.Heredoc, '#pop'),
  360. include('heredoc_interpol'),
  361. ],
  362. 'heredoc_interpol': [
  363. (r'[^#\\\n]+', String.Heredoc),
  364. include('escapes'),
  365. (r'\\.', String.Heredoc),
  366. (r'\n+', String.Heredoc),
  367. include('interpol'),
  368. ],
  369. 'heredoc_no_interpol': [
  370. (r'[^\\\n]+', String.Heredoc),
  371. (r'\\.', String.Heredoc),
  372. (r'\n+', Whitespace),
  373. ],
  374. 'escapes': [
  375. (long_hex_char_re,
  376. bygroups(String.Escape, Number.Hex, String.Escape)),
  377. (hex_char_re, String.Escape),
  378. (escape_char_re, String.Escape),
  379. ],
  380. 'interpol': [
  381. (r'#\{', String.Interpol, 'interpol_string'),
  382. ],
  383. 'interpol_string': [
  384. (r'\}', String.Interpol, "#pop"),
  385. include('root')
  386. ],
  387. 'map_key': [
  388. include('root'),
  389. (r':', Punctuation, 'map_val'),
  390. (r'=>', Punctuation, 'map_val'),
  391. (r'\}', Punctuation, '#pop'),
  392. ],
  393. 'map_val': [
  394. include('root'),
  395. (r',', Punctuation, '#pop'),
  396. (r'(?=\})', Punctuation, '#pop'),
  397. ],
  398. 'tuple': [
  399. include('root'),
  400. (r'\}', Punctuation, '#pop'),
  401. ],
  402. }
  403. tokens.update(gen_elixir_string_rules('double', '"', String.Double))
  404. tokens.update(gen_elixir_string_rules('single', "'", String.Single))
  405. tokens.update(gen_elixir_string_rules('double_atom', '"', String.Symbol))
  406. tokens.update(gen_elixir_string_rules('single_atom', "'", String.Symbol))
  407. tokens.update(gen_elixir_sigil_rules())
  408. class ElixirConsoleLexer(Lexer):
  409. """
  410. For Elixir interactive console (iex) output like:
  411. .. sourcecode:: iex
  412. iex> [head | tail] = [1,2,3]
  413. [1,2,3]
  414. iex> head
  415. 1
  416. iex> tail
  417. [2,3]
  418. iex> [head | tail]
  419. [1,2,3]
  420. iex> length [head | tail]
  421. 3
  422. .. versionadded:: 1.5
  423. """
  424. name = 'Elixir iex session'
  425. aliases = ['iex']
  426. mimetypes = ['text/x-elixir-shellsession']
  427. _prompt_re = re.compile(r'(iex|\.{3})((?:\([\w@_.]+\))?\d+|\(\d+\))?> ')
  428. def get_tokens_unprocessed(self, text):
  429. exlexer = ElixirLexer(**self.options)
  430. curcode = ''
  431. in_error = False
  432. insertions = []
  433. for match in line_re.finditer(text):
  434. line = match.group()
  435. if line.startswith('** '):
  436. in_error = True
  437. insertions.append((len(curcode),
  438. [(0, Generic.Error, line[:-1])]))
  439. curcode += line[-1:]
  440. else:
  441. m = self._prompt_re.match(line)
  442. if m is not None:
  443. in_error = False
  444. end = m.end()
  445. insertions.append((len(curcode),
  446. [(0, Generic.Prompt, line[:end])]))
  447. curcode += line[end:]
  448. else:
  449. if curcode:
  450. yield from do_insertions(
  451. insertions, exlexer.get_tokens_unprocessed(curcode))
  452. curcode = ''
  453. insertions = []
  454. token = Generic.Error if in_error else Generic.Output
  455. yield match.start(), token, line
  456. if curcode:
  457. yield from do_insertions(
  458. insertions, exlexer.get_tokens_unprocessed(curcode))