special.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. """
  2. pygments.lexers.special
  3. ~~~~~~~~~~~~~~~~~~~~~~~
  4. Special lexers.
  5. :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import ast
  9. from pygments.lexer import Lexer, line_re
  10. from pygments.token import Token, Error, Text, Generic
  11. from pygments.util import get_choice_opt
  12. __all__ = ['TextLexer', 'OutputLexer', 'RawTokenLexer']
  13. class TextLexer(Lexer):
  14. """
  15. "Null" lexer, doesn't highlight anything.
  16. """
  17. name = 'Text only'
  18. aliases = ['text']
  19. filenames = ['*.txt']
  20. mimetypes = ['text/plain']
  21. priority = 0.01
  22. def get_tokens_unprocessed(self, text):
  23. yield 0, Text, text
  24. def analyse_text(text):
  25. return TextLexer.priority
  26. class OutputLexer(Lexer):
  27. """
  28. Simple lexer that highlights everything as ``Token.Generic.Output``.
  29. .. versionadded:: 2.10
  30. """
  31. name = 'Text output'
  32. aliases = ['output']
  33. def get_tokens_unprocessed(self, text):
  34. yield 0, Generic.Output, text
  35. _ttype_cache = {}
  36. class RawTokenLexer(Lexer):
  37. """
  38. Recreate a token stream formatted with the `RawTokenFormatter`.
  39. Additional options accepted:
  40. `compress`
  41. If set to ``"gz"`` or ``"bz2"``, decompress the token stream with
  42. the given compression algorithm before lexing (default: ``""``).
  43. """
  44. name = 'Raw token data'
  45. aliases = []
  46. filenames = []
  47. mimetypes = ['application/x-pygments-tokens']
  48. def __init__(self, **options):
  49. self.compress = get_choice_opt(options, 'compress',
  50. ['', 'none', 'gz', 'bz2'], '')
  51. Lexer.__init__(self, **options)
  52. def get_tokens(self, text):
  53. if self.compress:
  54. if isinstance(text, str):
  55. text = text.encode('latin1')
  56. try:
  57. if self.compress == 'gz':
  58. import gzip
  59. text = gzip.decompress(text)
  60. elif self.compress == 'bz2':
  61. import bz2
  62. text = bz2.decompress(text)
  63. except OSError:
  64. yield Error, text.decode('latin1')
  65. if isinstance(text, bytes):
  66. text = text.decode('latin1')
  67. # do not call Lexer.get_tokens() because stripping is not optional.
  68. text = text.strip('\n') + '\n'
  69. for i, t, v in self.get_tokens_unprocessed(text):
  70. yield t, v
  71. def get_tokens_unprocessed(self, text):
  72. length = 0
  73. for match in line_re.finditer(text):
  74. try:
  75. ttypestr, val = match.group().rstrip().split('\t', 1)
  76. ttype = _ttype_cache.get(ttypestr)
  77. if not ttype:
  78. ttype = Token
  79. ttypes = ttypestr.split('.')[1:]
  80. for ttype_ in ttypes:
  81. if not ttype_ or not ttype_[0].isupper():
  82. raise ValueError('malformed token name')
  83. ttype = getattr(ttype, ttype_)
  84. _ttype_cache[ttypestr] = ttype
  85. val = ast.literal_eval(val)
  86. if not isinstance(val, str):
  87. raise ValueError('expected str')
  88. except (SyntaxError, ValueError):
  89. val = match.group()
  90. ttype = Error
  91. yield length, ttype, val
  92. length += len(val)