123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365 |
- """
- pygments.lexers.crystal
- ~~~~~~~~~~~~~~~~~~~~~~~
- Lexer for Crystal.
- :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
- """
- import re
- from pygments.lexer import ExtendedRegexLexer, include, bygroups, default, \
- words, line_re
- from pygments.token import Comment, Operator, Keyword, Name, String, Number, \
- Punctuation, Error, Whitespace
- __all__ = ['CrystalLexer']
- CRYSTAL_OPERATORS = [
- '!=', '!~', '!', '%', '&&', '&', '**', '*', '+', '-', '/', '<=>', '<<', '<=', '<',
- '===', '==', '=~', '=', '>=', '>>', '>', '[]=', '[]?', '[]', '^', '||', '|', '~'
- ]
- class CrystalLexer(ExtendedRegexLexer):
- """
- For Crystal source code.
- .. versionadded:: 2.2
- """
- name = 'Crystal'
- url = 'http://crystal-lang.org'
- aliases = ['cr', 'crystal']
- filenames = ['*.cr']
- mimetypes = ['text/x-crystal']
- flags = re.DOTALL | re.MULTILINE
- def heredoc_callback(self, match, ctx):
- # okay, this is the hardest part of parsing Crystal...
- # match: 1 = <<-?, 2 = quote? 3 = name 4 = quote? 5 = rest of line
- start = match.start(1)
- yield start, Operator, match.group(1) # <<-?
- yield match.start(2), String.Heredoc, match.group(2) # quote ", ', `
- yield match.start(3), String.Delimiter, match.group(3) # heredoc name
- yield match.start(4), String.Heredoc, match.group(4) # quote again
- heredocstack = ctx.__dict__.setdefault('heredocstack', [])
- outermost = not bool(heredocstack)
- heredocstack.append((match.group(1) == '<<-', match.group(3)))
- ctx.pos = match.start(5)
- ctx.end = match.end(5)
- # this may find other heredocs, so limit the recursion depth
- if len(heredocstack) < 100:
- yield from self.get_tokens_unprocessed(context=ctx)
- else:
- yield ctx.pos, String.Heredoc, match.group(5)
- ctx.pos = match.end()
- if outermost:
- # this is the outer heredoc again, now we can process them all
- for tolerant, hdname in heredocstack:
- lines = []
- for match in line_re.finditer(ctx.text, ctx.pos):
- if tolerant:
- check = match.group().strip()
- else:
- check = match.group().rstrip()
- if check == hdname:
- for amatch in lines:
- yield amatch.start(), String.Heredoc, amatch.group()
- yield match.start(), String.Delimiter, match.group()
- ctx.pos = match.end()
- break
- else:
- lines.append(match)
- else:
- # end of heredoc not found -- error!
- for amatch in lines:
- yield amatch.start(), Error, amatch.group()
- ctx.end = len(ctx.text)
- del heredocstack[:]
- def gen_crystalstrings_rules():
- states = {}
- states['strings'] = [
- (r'\:\w+[!?]?', String.Symbol),
- (words(CRYSTAL_OPERATORS, prefix=r'\:'), String.Symbol),
- (r":'(\\\\|\\[^\\]|[^'\\])*'", String.Symbol),
- # This allows arbitrary text after '\ for simplicity
- (r"'(\\\\|\\'|[^']|\\[^'\\]+)'", String.Char),
- (r':"', String.Symbol, 'simple-sym'),
- # Crystal doesn't have "symbol:"s but this simplifies function args
- (r'([a-zA-Z_]\w*)(:)(?!:)', bygroups(String.Symbol, Punctuation)),
- (r'"', String.Double, 'simple-string'),
- (r'(?<!\.)`', String.Backtick, 'simple-backtick'),
- ]
- # double-quoted string and symbol
- for name, ttype, end in ('string', String.Double, '"'), \
- ('sym', String.Symbol, '"'), \
- ('backtick', String.Backtick, '`'):
- states['simple-'+name] = [
- include('string-escaped' if name == 'sym' else 'string-intp-escaped'),
- (r'[^\\%s#]+' % end, ttype),
- (r'[\\#]', ttype),
- (end, ttype, '#pop'),
- ]
- # https://crystal-lang.org/docs/syntax_and_semantics/literals/string.html#percent-string-literals
- for lbrace, rbrace, bracecc, name in \
- ('\\{', '\\}', '{}', 'cb'), \
- ('\\[', '\\]', '\\[\\]', 'sb'), \
- ('\\(', '\\)', '()', 'pa'), \
- ('<', '>', '<>', 'ab'), \
- ('\\|', '\\|', '\\|', 'pi'):
- states[name+'-intp-string'] = [
- (r'\\' + lbrace, String.Other),
- ] + (lbrace != rbrace) * [
- (lbrace, String.Other, '#push'),
- ] + [
- (rbrace, String.Other, '#pop'),
- include('string-intp-escaped'),
- (r'[\\#' + bracecc + ']', String.Other),
- (r'[^\\#' + bracecc + ']+', String.Other),
- ]
- states['strings'].append((r'%Q?' + lbrace, String.Other,
- name+'-intp-string'))
- states[name+'-string'] = [
- (r'\\[\\' + bracecc + ']', String.Other),
- ] + (lbrace != rbrace) * [
- (lbrace, String.Other, '#push'),
- ] + [
- (rbrace, String.Other, '#pop'),
- (r'[\\#' + bracecc + ']', String.Other),
- (r'[^\\#' + bracecc + ']+', String.Other),
- ]
- # https://crystal-lang.org/docs/syntax_and_semantics/literals/array.html#percent-array-literals
- states['strings'].append((r'%[qwi]' + lbrace, String.Other,
- name+'-string'))
- states[name+'-regex'] = [
- (r'\\[\\' + bracecc + ']', String.Regex),
- ] + (lbrace != rbrace) * [
- (lbrace, String.Regex, '#push'),
- ] + [
- (rbrace + '[imsx]*', String.Regex, '#pop'),
- include('string-intp'),
- (r'[\\#' + bracecc + ']', String.Regex),
- (r'[^\\#' + bracecc + ']+', String.Regex),
- ]
- states['strings'].append((r'%r' + lbrace, String.Regex,
- name+'-regex'))
- return states
- tokens = {
- 'root': [
- (r'#.*?$', Comment.Single),
- # keywords
- (words('''
- abstract asm begin break case do else elsif end ensure extend if in
- include next of private protected require rescue return select self super
- then unless until when while with yield
- '''.split(), suffix=r'\b'), Keyword),
- (words('''
- previous_def forall out uninitialized __DIR__ __FILE__ __LINE__
- __END_LINE__
- '''.split(), prefix=r'(?<!\.)', suffix=r'\b'), Keyword.Pseudo),
- # https://crystal-lang.org/docs/syntax_and_semantics/is_a.html
- (r'\.(is_a\?|nil\?|responds_to\?|as\?|as\b)', Keyword.Pseudo),
- (words(['true', 'false', 'nil'], suffix=r'\b'), Keyword.Constant),
- # start of function, class and module names
- (r'(module|lib)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)',
- bygroups(Keyword, Whitespace, Name.Namespace)),
- (r'(def|fun|macro)(\s+)((?:[a-zA-Z_]\w*::)*)',
- bygroups(Keyword, Whitespace, Name.Namespace), 'funcname'),
- (r'def(?=[*%&^`~+-/\[<>=])', Keyword, 'funcname'),
- (r'(annotation|class|struct|union|type|alias|enum)(\s+)((?:[a-zA-Z_]\w*::)*)',
- bygroups(Keyword, Whitespace, Name.Namespace), 'classname'),
- # https://crystal-lang.org/api/toplevel.html
- (words('''
- instance_sizeof offsetof pointerof sizeof typeof
- '''.split(), prefix=r'(?<!\.)', suffix=r'\b'), Keyword.Pseudo),
- # macros
- (r'(?<!\.)(debugger\b|p!|pp!|record\b|spawn\b)', Name.Builtin.Pseudo),
- # builtins
- (words('''
- abort at_exit caller exit gets loop main p pp print printf puts
- raise rand read_line sleep spawn sprintf system
- '''.split(), prefix=r'(?<!\.)', suffix=r'\b'), Name.Builtin),
- # https://crystal-lang.org/api/Object.html#macro-summary
- (r'(?<!\.)(((class_)?((getter|property)\b[!?]?|setter\b))|'
- r'(def_(clone|equals|equals_and_hash|hash)|delegate|forward_missing_to)\b)',
- Name.Builtin.Pseudo),
- # normal heredocs
- (r'(?<!\w)(<<-?)(["`\']?)([a-zA-Z_]\w*)(\2)(.*?\n)',
- heredoc_callback),
- # empty string heredocs
- (r'(<<-?)("|\')()(\2)(.*?\n)', heredoc_callback),
- (r'__END__', Comment.Preproc, 'end-part'),
- # multiline regex (after keywords or assignments)
- (r'(?:^|(?<=[=<>~!:])|'
- r'(?<=(?:\s|;)when\s)|'
- r'(?<=(?:\s|;)or\s)|'
- r'(?<=(?:\s|;)and\s)|'
- r'(?<=\.index\s)|'
- r'(?<=\.scan\s)|'
- r'(?<=\.sub\s)|'
- r'(?<=\.sub!\s)|'
- r'(?<=\.gsub\s)|'
- r'(?<=\.gsub!\s)|'
- r'(?<=\.match\s)|'
- r'(?<=(?:\s|;)if\s)|'
- r'(?<=(?:\s|;)elsif\s)|'
- r'(?<=^when\s)|'
- r'(?<=^index\s)|'
- r'(?<=^scan\s)|'
- r'(?<=^sub\s)|'
- r'(?<=^gsub\s)|'
- r'(?<=^sub!\s)|'
- r'(?<=^gsub!\s)|'
- r'(?<=^match\s)|'
- r'(?<=^if\s)|'
- r'(?<=^elsif\s)'
- r')(\s*)(/)', bygroups(Whitespace, String.Regex), 'multiline-regex'),
- # multiline regex (in method calls or subscripts)
- (r'(?<=\(|,|\[)/', String.Regex, 'multiline-regex'),
- # multiline regex (this time the funny no whitespace rule)
- (r'(\s+)(/)(?![\s=])', bygroups(Whitespace, String.Regex),
- 'multiline-regex'),
- # lex numbers and ignore following regular expressions which
- # are division operators in fact (grrrr. i hate that. any
- # better ideas?)
- # since pygments 0.7 we also eat a "?" operator after numbers
- # so that the char operator does not work. Chars are not allowed
- # there so that you can use the ternary operator.
- # stupid example:
- # x>=0?n[x]:""
- (r'(0o[0-7]+(?:_[0-7]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?',
- bygroups(Number.Oct, Whitespace, Operator)),
- (r'(0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?',
- bygroups(Number.Hex, Whitespace, Operator)),
- (r'(0b[01]+(?:_[01]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?',
- bygroups(Number.Bin, Whitespace, Operator)),
- # 3 separate expressions for floats because any of the 3 optional
- # parts makes it a float
- (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)(?:e[+-]?[0-9]+)?'
- r'(?:_?f[0-9]+)?)(\s*)([/?])?',
- bygroups(Number.Float, Whitespace, Operator)),
- (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)'
- r'(?:_?f[0-9]+)?)(\s*)([/?])?',
- bygroups(Number.Float, Whitespace, Operator)),
- (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)?'
- r'(?:_?f[0-9]+))(\s*)([/?])?',
- bygroups(Number.Float, Whitespace, Operator)),
- (r'(0\b|[1-9][\d]*(?:_\d+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?',
- bygroups(Number.Integer, Whitespace, Operator)),
- # Names
- (r'@@[a-zA-Z_]\w*', Name.Variable.Class),
- (r'@[a-zA-Z_]\w*', Name.Variable.Instance),
- (r'\$\w+', Name.Variable.Global),
- (r'\$[!@&`\'+~=/\\,;.<>_*$?:"^-]', Name.Variable.Global),
- (r'\$-[0adFiIlpvw]', Name.Variable.Global),
- (r'::', Operator),
- include('strings'),
- # https://crystal-lang.org/reference/syntax_and_semantics/literals/char.html
- (r'\?(\\[MC]-)*' # modifiers
- r'(\\([\\abefnrtv#"\']|[0-7]{1,3}|x[a-fA-F0-9]{2}|u[a-fA-F0-9]{4}|u\{[a-fA-F0-9 ]+\})|\S)'
- r'(?!\w)',
- String.Char),
- (r'[A-Z][A-Z_]+\b(?!::|\.)', Name.Constant),
- # macro expansion
- (r'\{%', String.Interpol, 'in-macro-control'),
- (r'\{\{', String.Interpol, 'in-macro-expr'),
- # annotations
- (r'(@\[)(\s*)([A-Z]\w*(::[A-Z]\w*)*)',
- bygroups(Operator, Whitespace, Name.Decorator), 'in-annot'),
- # this is needed because Crystal attributes can look
- # like keywords (class) or like this: ` ?!?
- (words(CRYSTAL_OPERATORS, prefix=r'(\.|::)'),
- bygroups(Operator, Name.Operator)),
- (r'(\.|::)([a-zA-Z_]\w*[!?]?|[*%&^`~+\-/\[<>=])',
- bygroups(Operator, Name)),
- # Names can end with [!?] unless it's "!="
- (r'[a-zA-Z_]\w*(?:[!?](?!=))?', Name),
- (r'(\[|\]\??|\*\*|<=>?|>=|<<?|>>?|=~|===|'
- r'!~|&&?|\|\||\.{1,3})', Operator),
- (r'[-+/*%=<>&!^|~]=?', Operator),
- (r'[(){};,/?:\\]', Punctuation),
- (r'\s+', Whitespace)
- ],
- 'funcname': [
- (r'(?:([a-zA-Z_]\w*)(\.))?'
- r'([a-zA-Z_]\w*[!?]?|\*\*?|[-+]@?|'
- r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)',
- bygroups(Name.Class, Operator, Name.Function), '#pop'),
- default('#pop')
- ],
- 'classname': [
- (r'[A-Z_]\w*', Name.Class),
- (r'(\()(\s*)([A-Z_]\w*)(\s*)(\))',
- bygroups(Punctuation, Whitespace, Name.Class, Whitespace, Punctuation)),
- default('#pop')
- ],
- 'in-intp': [
- (r'\{', String.Interpol, '#push'),
- (r'\}', String.Interpol, '#pop'),
- include('root'),
- ],
- 'string-intp': [
- (r'#\{', String.Interpol, 'in-intp'),
- ],
- 'string-escaped': [
- # https://crystal-lang.org/reference/syntax_and_semantics/literals/string.html
- (r'\\([\\abefnrtv#"\']|[0-7]{1,3}|x[a-fA-F0-9]{2}|u[a-fA-F0-9]{4}|u\{[a-fA-F0-9 ]+\})',
- String.Escape)
- ],
- 'string-intp-escaped': [
- include('string-intp'),
- include('string-escaped'),
- ],
- 'interpolated-regex': [
- include('string-intp'),
- (r'[\\#]', String.Regex),
- (r'[^\\#]+', String.Regex),
- ],
- 'interpolated-string': [
- include('string-intp'),
- (r'[\\#]', String.Other),
- (r'[^\\#]+', String.Other),
- ],
- 'multiline-regex': [
- include('string-intp'),
- (r'\\\\', String.Regex),
- (r'\\/', String.Regex),
- (r'[\\#]', String.Regex),
- (r'[^\\/#]+', String.Regex),
- (r'/[imsx]*', String.Regex, '#pop'),
- ],
- 'end-part': [
- (r'.+', Comment.Preproc, '#pop')
- ],
- 'in-macro-control': [
- (r'\{%', String.Interpol, '#push'),
- (r'%\}', String.Interpol, '#pop'),
- (r'(for|verbatim)\b', Keyword),
- include('root'),
- ],
- 'in-macro-expr': [
- (r'\{\{', String.Interpol, '#push'),
- (r'\}\}', String.Interpol, '#pop'),
- include('root'),
- ],
- 'in-annot': [
- (r'\[', Operator, '#push'),
- (r'\]', Operator, '#pop'),
- include('root'),
- ],
- }
- tokens.update(gen_crystalstrings_rules())
|