cmdline.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668
  1. """
  2. pygments.cmdline
  3. ~~~~~~~~~~~~~~~~
  4. Command line interface.
  5. :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import os
  9. import sys
  10. import shutil
  11. import argparse
  12. from textwrap import dedent
  13. from pygments import __version__, highlight
  14. from pygments.util import ClassNotFound, OptionError, docstring_headline, \
  15. guess_decode, guess_decode_from_terminal, terminal_encoding, \
  16. UnclosingTextIOWrapper
  17. from pygments.lexers import get_all_lexers, get_lexer_by_name, guess_lexer, \
  18. load_lexer_from_file, get_lexer_for_filename, find_lexer_class_for_filename
  19. from pygments.lexers.special import TextLexer
  20. from pygments.formatters.latex import LatexEmbeddedLexer, LatexFormatter
  21. from pygments.formatters import get_all_formatters, get_formatter_by_name, \
  22. load_formatter_from_file, get_formatter_for_filename, find_formatter_class
  23. from pygments.formatters.terminal import TerminalFormatter
  24. from pygments.formatters.terminal256 import Terminal256Formatter, TerminalTrueColorFormatter
  25. from pygments.filters import get_all_filters, find_filter_class
  26. from pygments.styles import get_all_styles, get_style_by_name
  27. def _parse_options(o_strs):
  28. opts = {}
  29. if not o_strs:
  30. return opts
  31. for o_str in o_strs:
  32. if not o_str.strip():
  33. continue
  34. o_args = o_str.split(',')
  35. for o_arg in o_args:
  36. o_arg = o_arg.strip()
  37. try:
  38. o_key, o_val = o_arg.split('=', 1)
  39. o_key = o_key.strip()
  40. o_val = o_val.strip()
  41. except ValueError:
  42. opts[o_arg] = True
  43. else:
  44. opts[o_key] = o_val
  45. return opts
  46. def _parse_filters(f_strs):
  47. filters = []
  48. if not f_strs:
  49. return filters
  50. for f_str in f_strs:
  51. if ':' in f_str:
  52. fname, fopts = f_str.split(':', 1)
  53. filters.append((fname, _parse_options([fopts])))
  54. else:
  55. filters.append((f_str, {}))
  56. return filters
  57. def _print_help(what, name):
  58. try:
  59. if what == 'lexer':
  60. cls = get_lexer_by_name(name)
  61. print("Help on the %s lexer:" % cls.name)
  62. print(dedent(cls.__doc__))
  63. elif what == 'formatter':
  64. cls = find_formatter_class(name)
  65. print("Help on the %s formatter:" % cls.name)
  66. print(dedent(cls.__doc__))
  67. elif what == 'filter':
  68. cls = find_filter_class(name)
  69. print("Help on the %s filter:" % name)
  70. print(dedent(cls.__doc__))
  71. return 0
  72. except (AttributeError, ValueError):
  73. print("%s not found!" % what, file=sys.stderr)
  74. return 1
  75. def _print_list(what):
  76. if what == 'lexer':
  77. print()
  78. print("Lexers:")
  79. print("~~~~~~~")
  80. info = []
  81. for fullname, names, exts, _ in get_all_lexers():
  82. tup = (', '.join(names)+':', fullname,
  83. exts and '(filenames ' + ', '.join(exts) + ')' or '')
  84. info.append(tup)
  85. info.sort()
  86. for i in info:
  87. print(('* %s\n %s %s') % i)
  88. elif what == 'formatter':
  89. print()
  90. print("Formatters:")
  91. print("~~~~~~~~~~~")
  92. info = []
  93. for cls in get_all_formatters():
  94. doc = docstring_headline(cls)
  95. tup = (', '.join(cls.aliases) + ':', doc, cls.filenames and
  96. '(filenames ' + ', '.join(cls.filenames) + ')' or '')
  97. info.append(tup)
  98. info.sort()
  99. for i in info:
  100. print(('* %s\n %s %s') % i)
  101. elif what == 'filter':
  102. print()
  103. print("Filters:")
  104. print("~~~~~~~~")
  105. for name in get_all_filters():
  106. cls = find_filter_class(name)
  107. print("* " + name + ':')
  108. print(" %s" % docstring_headline(cls))
  109. elif what == 'style':
  110. print()
  111. print("Styles:")
  112. print("~~~~~~~")
  113. for name in get_all_styles():
  114. cls = get_style_by_name(name)
  115. print("* " + name + ':')
  116. print(" %s" % docstring_headline(cls))
  117. def _print_list_as_json(requested_items):
  118. import json
  119. result = {}
  120. if 'lexer' in requested_items:
  121. info = {}
  122. for fullname, names, filenames, mimetypes in get_all_lexers():
  123. info[fullname] = {
  124. 'aliases': names,
  125. 'filenames': filenames,
  126. 'mimetypes': mimetypes
  127. }
  128. result['lexers'] = info
  129. if 'formatter' in requested_items:
  130. info = {}
  131. for cls in get_all_formatters():
  132. doc = docstring_headline(cls)
  133. info[cls.name] = {
  134. 'aliases': cls.aliases,
  135. 'filenames': cls.filenames,
  136. 'doc': doc
  137. }
  138. result['formatters'] = info
  139. if 'filter' in requested_items:
  140. info = {}
  141. for name in get_all_filters():
  142. cls = find_filter_class(name)
  143. info[name] = {
  144. 'doc': docstring_headline(cls)
  145. }
  146. result['filters'] = info
  147. if 'style' in requested_items:
  148. info = {}
  149. for name in get_all_styles():
  150. cls = get_style_by_name(name)
  151. info[name] = {
  152. 'doc': docstring_headline(cls)
  153. }
  154. result['styles'] = info
  155. json.dump(result, sys.stdout)
  156. def main_inner(parser, argns):
  157. if argns.help:
  158. parser.print_help()
  159. return 0
  160. if argns.V:
  161. print('Pygments version %s, (c) 2006-2023 by Georg Brandl, Matthäus '
  162. 'Chajdas and contributors.' % __version__)
  163. return 0
  164. def is_only_option(opt):
  165. return not any(v for (k, v) in vars(argns).items() if k != opt)
  166. # handle ``pygmentize -L``
  167. if argns.L is not None:
  168. arg_set = set()
  169. for k, v in vars(argns).items():
  170. if v:
  171. arg_set.add(k)
  172. arg_set.discard('L')
  173. arg_set.discard('json')
  174. if arg_set:
  175. parser.print_help(sys.stderr)
  176. return 2
  177. # print version
  178. if not argns.json:
  179. main(['', '-V'])
  180. allowed_types = {'lexer', 'formatter', 'filter', 'style'}
  181. largs = [arg.rstrip('s') for arg in argns.L]
  182. if any(arg not in allowed_types for arg in largs):
  183. parser.print_help(sys.stderr)
  184. return 0
  185. if not largs:
  186. largs = allowed_types
  187. if not argns.json:
  188. for arg in largs:
  189. _print_list(arg)
  190. else:
  191. _print_list_as_json(largs)
  192. return 0
  193. # handle ``pygmentize -H``
  194. if argns.H:
  195. if not is_only_option('H'):
  196. parser.print_help(sys.stderr)
  197. return 2
  198. what, name = argns.H
  199. if what not in ('lexer', 'formatter', 'filter'):
  200. parser.print_help(sys.stderr)
  201. return 2
  202. return _print_help(what, name)
  203. # parse -O options
  204. parsed_opts = _parse_options(argns.O or [])
  205. # parse -P options
  206. for p_opt in argns.P or []:
  207. try:
  208. name, value = p_opt.split('=', 1)
  209. except ValueError:
  210. parsed_opts[p_opt] = True
  211. else:
  212. parsed_opts[name] = value
  213. # encodings
  214. inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding'))
  215. outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding'))
  216. # handle ``pygmentize -N``
  217. if argns.N:
  218. lexer = find_lexer_class_for_filename(argns.N)
  219. if lexer is None:
  220. lexer = TextLexer
  221. print(lexer.aliases[0])
  222. return 0
  223. # handle ``pygmentize -C``
  224. if argns.C:
  225. inp = sys.stdin.buffer.read()
  226. try:
  227. lexer = guess_lexer(inp, inencoding=inencoding)
  228. except ClassNotFound:
  229. lexer = TextLexer
  230. print(lexer.aliases[0])
  231. return 0
  232. # handle ``pygmentize -S``
  233. S_opt = argns.S
  234. a_opt = argns.a
  235. if S_opt is not None:
  236. f_opt = argns.f
  237. if not f_opt:
  238. parser.print_help(sys.stderr)
  239. return 2
  240. if argns.l or argns.INPUTFILE:
  241. parser.print_help(sys.stderr)
  242. return 2
  243. try:
  244. parsed_opts['style'] = S_opt
  245. fmter = get_formatter_by_name(f_opt, **parsed_opts)
  246. except ClassNotFound as err:
  247. print(err, file=sys.stderr)
  248. return 1
  249. print(fmter.get_style_defs(a_opt or ''))
  250. return 0
  251. # if no -S is given, -a is not allowed
  252. if argns.a is not None:
  253. parser.print_help(sys.stderr)
  254. return 2
  255. # parse -F options
  256. F_opts = _parse_filters(argns.F or [])
  257. # -x: allow custom (eXternal) lexers and formatters
  258. allow_custom_lexer_formatter = bool(argns.x)
  259. # select lexer
  260. lexer = None
  261. # given by name?
  262. lexername = argns.l
  263. if lexername:
  264. # custom lexer, located relative to user's cwd
  265. if allow_custom_lexer_formatter and '.py' in lexername:
  266. try:
  267. filename = None
  268. name = None
  269. if ':' in lexername:
  270. filename, name = lexername.rsplit(':', 1)
  271. if '.py' in name:
  272. # This can happen on Windows: If the lexername is
  273. # C:\lexer.py -- return to normal load path in that case
  274. name = None
  275. if filename and name:
  276. lexer = load_lexer_from_file(filename, name,
  277. **parsed_opts)
  278. else:
  279. lexer = load_lexer_from_file(lexername, **parsed_opts)
  280. except ClassNotFound as err:
  281. print('Error:', err, file=sys.stderr)
  282. return 1
  283. else:
  284. try:
  285. lexer = get_lexer_by_name(lexername, **parsed_opts)
  286. except (OptionError, ClassNotFound) as err:
  287. print('Error:', err, file=sys.stderr)
  288. return 1
  289. # read input code
  290. code = None
  291. if argns.INPUTFILE:
  292. if argns.s:
  293. print('Error: -s option not usable when input file specified',
  294. file=sys.stderr)
  295. return 2
  296. infn = argns.INPUTFILE
  297. try:
  298. with open(infn, 'rb') as infp:
  299. code = infp.read()
  300. except Exception as err:
  301. print('Error: cannot read infile:', err, file=sys.stderr)
  302. return 1
  303. if not inencoding:
  304. code, inencoding = guess_decode(code)
  305. # do we have to guess the lexer?
  306. if not lexer:
  307. try:
  308. lexer = get_lexer_for_filename(infn, code, **parsed_opts)
  309. except ClassNotFound as err:
  310. if argns.g:
  311. try:
  312. lexer = guess_lexer(code, **parsed_opts)
  313. except ClassNotFound:
  314. lexer = TextLexer(**parsed_opts)
  315. else:
  316. print('Error:', err, file=sys.stderr)
  317. return 1
  318. except OptionError as err:
  319. print('Error:', err, file=sys.stderr)
  320. return 1
  321. elif not argns.s: # treat stdin as full file (-s support is later)
  322. # read code from terminal, always in binary mode since we want to
  323. # decode ourselves and be tolerant with it
  324. code = sys.stdin.buffer.read() # use .buffer to get a binary stream
  325. if not inencoding:
  326. code, inencoding = guess_decode_from_terminal(code, sys.stdin)
  327. # else the lexer will do the decoding
  328. if not lexer:
  329. try:
  330. lexer = guess_lexer(code, **parsed_opts)
  331. except ClassNotFound:
  332. lexer = TextLexer(**parsed_opts)
  333. else: # -s option needs a lexer with -l
  334. if not lexer:
  335. print('Error: when using -s a lexer has to be selected with -l',
  336. file=sys.stderr)
  337. return 2
  338. # process filters
  339. for fname, fopts in F_opts:
  340. try:
  341. lexer.add_filter(fname, **fopts)
  342. except ClassNotFound as err:
  343. print('Error:', err, file=sys.stderr)
  344. return 1
  345. # select formatter
  346. outfn = argns.o
  347. fmter = argns.f
  348. if fmter:
  349. # custom formatter, located relative to user's cwd
  350. if allow_custom_lexer_formatter and '.py' in fmter:
  351. try:
  352. filename = None
  353. name = None
  354. if ':' in fmter:
  355. # Same logic as above for custom lexer
  356. filename, name = fmter.rsplit(':', 1)
  357. if '.py' in name:
  358. name = None
  359. if filename and name:
  360. fmter = load_formatter_from_file(filename, name,
  361. **parsed_opts)
  362. else:
  363. fmter = load_formatter_from_file(fmter, **parsed_opts)
  364. except ClassNotFound as err:
  365. print('Error:', err, file=sys.stderr)
  366. return 1
  367. else:
  368. try:
  369. fmter = get_formatter_by_name(fmter, **parsed_opts)
  370. except (OptionError, ClassNotFound) as err:
  371. print('Error:', err, file=sys.stderr)
  372. return 1
  373. if outfn:
  374. if not fmter:
  375. try:
  376. fmter = get_formatter_for_filename(outfn, **parsed_opts)
  377. except (OptionError, ClassNotFound) as err:
  378. print('Error:', err, file=sys.stderr)
  379. return 1
  380. try:
  381. outfile = open(outfn, 'wb')
  382. except Exception as err:
  383. print('Error: cannot open outfile:', err, file=sys.stderr)
  384. return 1
  385. else:
  386. if not fmter:
  387. if os.environ.get('COLORTERM','') in ('truecolor', '24bit'):
  388. fmter = TerminalTrueColorFormatter(**parsed_opts)
  389. elif '256' in os.environ.get('TERM', ''):
  390. fmter = Terminal256Formatter(**parsed_opts)
  391. else:
  392. fmter = TerminalFormatter(**parsed_opts)
  393. outfile = sys.stdout.buffer
  394. # determine output encoding if not explicitly selected
  395. if not outencoding:
  396. if outfn:
  397. # output file? use lexer encoding for now (can still be None)
  398. fmter.encoding = inencoding
  399. else:
  400. # else use terminal encoding
  401. fmter.encoding = terminal_encoding(sys.stdout)
  402. # provide coloring under Windows, if possible
  403. if not outfn and sys.platform in ('win32', 'cygwin') and \
  404. fmter.name in ('Terminal', 'Terminal256'): # pragma: no cover
  405. # unfortunately colorama doesn't support binary streams on Py3
  406. outfile = UnclosingTextIOWrapper(outfile, encoding=fmter.encoding)
  407. fmter.encoding = None
  408. try:
  409. import colorama.initialise
  410. except ImportError:
  411. pass
  412. else:
  413. outfile = colorama.initialise.wrap_stream(
  414. outfile, convert=None, strip=None, autoreset=False, wrap=True)
  415. # When using the LaTeX formatter and the option `escapeinside` is
  416. # specified, we need a special lexer which collects escaped text
  417. # before running the chosen language lexer.
  418. escapeinside = parsed_opts.get('escapeinside', '')
  419. if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter):
  420. left = escapeinside[0]
  421. right = escapeinside[1]
  422. lexer = LatexEmbeddedLexer(left, right, lexer)
  423. # ... and do it!
  424. if not argns.s:
  425. # process whole input as per normal...
  426. try:
  427. highlight(code, lexer, fmter, outfile)
  428. finally:
  429. if outfn:
  430. outfile.close()
  431. return 0
  432. else:
  433. # line by line processing of stdin (eg: for 'tail -f')...
  434. try:
  435. while 1:
  436. line = sys.stdin.buffer.readline()
  437. if not line:
  438. break
  439. if not inencoding:
  440. line = guess_decode_from_terminal(line, sys.stdin)[0]
  441. highlight(line, lexer, fmter, outfile)
  442. if hasattr(outfile, 'flush'):
  443. outfile.flush()
  444. return 0
  445. except KeyboardInterrupt: # pragma: no cover
  446. return 0
  447. finally:
  448. if outfn:
  449. outfile.close()
  450. class HelpFormatter(argparse.HelpFormatter):
  451. def __init__(self, prog, indent_increment=2, max_help_position=16, width=None):
  452. if width is None:
  453. try:
  454. width = shutil.get_terminal_size().columns - 2
  455. except Exception:
  456. pass
  457. argparse.HelpFormatter.__init__(self, prog, indent_increment,
  458. max_help_position, width)
  459. def main(args=sys.argv):
  460. """
  461. Main command line entry point.
  462. """
  463. desc = "Highlight an input file and write the result to an output file."
  464. parser = argparse.ArgumentParser(description=desc, add_help=False,
  465. formatter_class=HelpFormatter)
  466. operation = parser.add_argument_group('Main operation')
  467. lexersel = operation.add_mutually_exclusive_group()
  468. lexersel.add_argument(
  469. '-l', metavar='LEXER',
  470. help='Specify the lexer to use. (Query names with -L.) If not '
  471. 'given and -g is not present, the lexer is guessed from the filename.')
  472. lexersel.add_argument(
  473. '-g', action='store_true',
  474. help='Guess the lexer from the file contents, or pass through '
  475. 'as plain text if nothing can be guessed.')
  476. operation.add_argument(
  477. '-F', metavar='FILTER[:options]', action='append',
  478. help='Add a filter to the token stream. (Query names with -L.) '
  479. 'Filter options are given after a colon if necessary.')
  480. operation.add_argument(
  481. '-f', metavar='FORMATTER',
  482. help='Specify the formatter to use. (Query names with -L.) '
  483. 'If not given, the formatter is guessed from the output filename, '
  484. 'and defaults to the terminal formatter if the output is to the '
  485. 'terminal or an unknown file extension.')
  486. operation.add_argument(
  487. '-O', metavar='OPTION=value[,OPTION=value,...]', action='append',
  488. help='Give options to the lexer and formatter as a comma-separated '
  489. 'list of key-value pairs. '
  490. 'Example: `-O bg=light,python=cool`.')
  491. operation.add_argument(
  492. '-P', metavar='OPTION=value', action='append',
  493. help='Give a single option to the lexer and formatter - with this '
  494. 'you can pass options whose value contains commas and equal signs. '
  495. 'Example: `-P "heading=Pygments, the Python highlighter"`.')
  496. operation.add_argument(
  497. '-o', metavar='OUTPUTFILE',
  498. help='Where to write the output. Defaults to standard output.')
  499. operation.add_argument(
  500. 'INPUTFILE', nargs='?',
  501. help='Where to read the input. Defaults to standard input.')
  502. flags = parser.add_argument_group('Operation flags')
  503. flags.add_argument(
  504. '-v', action='store_true',
  505. help='Print a detailed traceback on unhandled exceptions, which '
  506. 'is useful for debugging and bug reports.')
  507. flags.add_argument(
  508. '-s', action='store_true',
  509. help='Process lines one at a time until EOF, rather than waiting to '
  510. 'process the entire file. This only works for stdin, only for lexers '
  511. 'with no line-spanning constructs, and is intended for streaming '
  512. 'input such as you get from `tail -f`. '
  513. 'Example usage: `tail -f sql.log | pygmentize -s -l sql`.')
  514. flags.add_argument(
  515. '-x', action='store_true',
  516. help='Allow custom lexers and formatters to be loaded from a .py file '
  517. 'relative to the current working directory. For example, '
  518. '`-l ./customlexer.py -x`. By default, this option expects a file '
  519. 'with a class named CustomLexer or CustomFormatter; you can also '
  520. 'specify your own class name with a colon (`-l ./lexer.py:MyLexer`). '
  521. 'Users should be very careful not to use this option with untrusted '
  522. 'files, because it will import and run them.')
  523. flags.add_argument('--json', help='Output as JSON. This can '
  524. 'be only used in conjunction with -L.',
  525. default=False,
  526. action='store_true')
  527. special_modes_group = parser.add_argument_group(
  528. 'Special modes - do not do any highlighting')
  529. special_modes = special_modes_group.add_mutually_exclusive_group()
  530. special_modes.add_argument(
  531. '-S', metavar='STYLE -f formatter',
  532. help='Print style definitions for STYLE for a formatter '
  533. 'given with -f. The argument given by -a is formatter '
  534. 'dependent.')
  535. special_modes.add_argument(
  536. '-L', nargs='*', metavar='WHAT',
  537. help='List lexers, formatters, styles or filters -- '
  538. 'give additional arguments for the thing(s) you want to list '
  539. '(e.g. "styles"), or omit them to list everything.')
  540. special_modes.add_argument(
  541. '-N', metavar='FILENAME',
  542. help='Guess and print out a lexer name based solely on the given '
  543. 'filename. Does not take input or highlight anything. If no specific '
  544. 'lexer can be determined, "text" is printed.')
  545. special_modes.add_argument(
  546. '-C', action='store_true',
  547. help='Like -N, but print out a lexer name based solely on '
  548. 'a given content from standard input.')
  549. special_modes.add_argument(
  550. '-H', action='store', nargs=2, metavar=('NAME', 'TYPE'),
  551. help='Print detailed help for the object <name> of type <type>, '
  552. 'where <type> is one of "lexer", "formatter" or "filter".')
  553. special_modes.add_argument(
  554. '-V', action='store_true',
  555. help='Print the package version.')
  556. special_modes.add_argument(
  557. '-h', '--help', action='store_true',
  558. help='Print this help.')
  559. special_modes_group.add_argument(
  560. '-a', metavar='ARG',
  561. help='Formatter-specific additional argument for the -S (print '
  562. 'style sheet) mode.')
  563. argns = parser.parse_args(args[1:])
  564. try:
  565. return main_inner(parser, argns)
  566. except BrokenPipeError:
  567. # someone closed our stdout, e.g. by quitting a pager.
  568. return 0
  569. except Exception:
  570. if argns.v:
  571. print(file=sys.stderr)
  572. print('*' * 65, file=sys.stderr)
  573. print('An unhandled exception occurred while highlighting.',
  574. file=sys.stderr)
  575. print('Please report the whole traceback to the issue tracker at',
  576. file=sys.stderr)
  577. print('<https://github.com/pygments/pygments/issues>.',
  578. file=sys.stderr)
  579. print('*' * 65, file=sys.stderr)
  580. print(file=sys.stderr)
  581. raise
  582. import traceback
  583. info = traceback.format_exception(*sys.exc_info())
  584. msg = info[-1].strip()
  585. if len(info) >= 3:
  586. # extract relevant file and position info
  587. msg += '\n (f%s)' % info[-2].split('\n')[0].strip()[1:]
  588. print(file=sys.stderr)
  589. print('*** Error while highlighting:', file=sys.stderr)
  590. print(msg, file=sys.stderr)
  591. print('*** If this is a bug you want to report, please rerun with -v.',
  592. file=sys.stderr)
  593. return 1