123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265 |
- """
- pygments.lexers.grammar_notation
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Lexers for grammar notations like BNF.
- :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
- """
- from pygments.lexer import RegexLexer, bygroups, include, this, using, words
- from pygments.token import Comment, Keyword, Literal, Name, Number, \
- Operator, Punctuation, String, Text, Whitespace
- __all__ = ['BnfLexer', 'AbnfLexer', 'JsgfLexer', 'PegLexer']
- class BnfLexer(RegexLexer):
- """
- This lexer is for grammar notations which are similar to
- original BNF.
- In order to maximize a number of targets of this lexer,
- let's decide some designs:
- * We don't distinguish `Terminal Symbol`.
- * We do assume that `NonTerminal Symbol` are always enclosed
- with arrow brackets.
- * We do assume that `NonTerminal Symbol` may include
- any printable characters except arrow brackets and ASCII 0x20.
- This assumption is for `RBNF <http://www.rfc-base.org/txt/rfc-5511.txt>`_.
- * We do assume that target notation doesn't support comment.
- * We don't distinguish any operators and punctuation except
- `::=`.
- Though these decision making might cause too minimal highlighting
- and you might be disappointed, but it is reasonable for us.
- .. versionadded:: 2.1
- """
- name = 'BNF'
- aliases = ['bnf']
- filenames = ['*.bnf']
- mimetypes = ['text/x-bnf']
- tokens = {
- 'root': [
- (r'(<)([ -;=?-~]+)(>)',
- bygroups(Punctuation, Name.Class, Punctuation)),
- # an only operator
- (r'::=', Operator),
- # fallback
- (r'[^<>:]+', Text), # for performance
- (r'.', Text),
- ],
- }
- class AbnfLexer(RegexLexer):
- """
- Lexer for IETF 7405 ABNF.
- (Updates `5234 <http://www.ietf.org/rfc/rfc5234.txt>`_) grammars.
- .. versionadded:: 2.1
- """
- name = 'ABNF'
- url = 'http://www.ietf.org/rfc/rfc7405.txt'
- aliases = ['abnf']
- filenames = ['*.abnf']
- mimetypes = ['text/x-abnf']
- _core_rules = (
- 'ALPHA', 'BIT', 'CHAR', 'CR', 'CRLF', 'CTL', 'DIGIT',
- 'DQUOTE', 'HEXDIG', 'HTAB', 'LF', 'LWSP', 'OCTET',
- 'SP', 'VCHAR', 'WSP')
- tokens = {
- 'root': [
- # comment
- (r';.*$', Comment.Single),
- # quoted
- # double quote itself in this state, it is as '%x22'.
- (r'(%[si])?"[^"]*"', Literal),
- # binary (but i have never seen...)
- (r'%b[01]+\-[01]+\b', Literal), # range
- (r'%b[01]+(\.[01]+)*\b', Literal), # concat
- # decimal
- (r'%d[0-9]+\-[0-9]+\b', Literal), # range
- (r'%d[0-9]+(\.[0-9]+)*\b', Literal), # concat
- # hexadecimal
- (r'%x[0-9a-fA-F]+\-[0-9a-fA-F]+\b', Literal), # range
- (r'%x[0-9a-fA-F]+(\.[0-9a-fA-F]+)*\b', Literal), # concat
- # repetition (<a>*<b>element) including nRule
- (r'\b[0-9]+\*[0-9]+', Operator),
- (r'\b[0-9]+\*', Operator),
- (r'\b[0-9]+', Operator),
- (r'\*', Operator),
- # Strictly speaking, these are not keyword but
- # are called `Core Rule'.
- (words(_core_rules, suffix=r'\b'), Keyword),
- # nonterminals (ALPHA *(ALPHA / DIGIT / "-"))
- (r'[a-zA-Z][a-zA-Z0-9-]*\b', Name.Class),
- # operators
- (r'(=/|=|/)', Operator),
- # punctuation
- (r'[\[\]()]', Punctuation),
- # fallback
- (r'\s+', Whitespace),
- (r'.', Text),
- ],
- }
- class JsgfLexer(RegexLexer):
- """
- For JSpeech Grammar Format grammars.
- .. versionadded:: 2.2
- """
- name = 'JSGF'
- url = 'https://www.w3.org/TR/jsgf/'
- aliases = ['jsgf']
- filenames = ['*.jsgf']
- mimetypes = ['application/jsgf', 'application/x-jsgf', 'text/jsgf']
- tokens = {
- 'root': [
- include('comments'),
- include('non-comments'),
- ],
- 'comments': [
- (r'/\*\*(?!/)', Comment.Multiline, 'documentation comment'),
- (r'/\*[\w\W]*?\*/', Comment.Multiline),
- (r'//.*$', Comment.Single),
- ],
- 'non-comments': [
- (r'\A#JSGF[^;]*', Comment.Preproc),
- (r'\s+', Whitespace),
- (r';', Punctuation),
- (r'[=|()\[\]*+]', Operator),
- (r'/[^/]+/', Number.Float),
- (r'"', String.Double, 'string'),
- (r'\{', String.Other, 'tag'),
- (words(('import', 'public'), suffix=r'\b'), Keyword.Reserved),
- (r'grammar\b', Keyword.Reserved, 'grammar name'),
- (r'(<)(NULL|VOID)(>)',
- bygroups(Punctuation, Name.Builtin, Punctuation)),
- (r'<', Punctuation, 'rulename'),
- (r'\w+|[^\s;=|()\[\]*+/"{<\w]+', Text),
- ],
- 'string': [
- (r'"', String.Double, '#pop'),
- (r'\\.', String.Escape),
- (r'[^\\"]+', String.Double),
- ],
- 'tag': [
- (r'\}', String.Other, '#pop'),
- (r'\\.', String.Escape),
- (r'[^\\}]+', String.Other),
- ],
- 'grammar name': [
- (r';', Punctuation, '#pop'),
- (r'\s+', Whitespace),
- (r'\.', Punctuation),
- (r'[^;\s.]+', Name.Namespace),
- ],
- 'rulename': [
- (r'>', Punctuation, '#pop'),
- (r'\*', Punctuation),
- (r'\s+', Whitespace),
- (r'([^.>]+)(\s*)(\.)', bygroups(Name.Namespace, Text, Punctuation)),
- (r'[^.>]+', Name.Constant),
- ],
- 'documentation comment': [
- (r'\*/', Comment.Multiline, '#pop'),
- (r'^(\s*)(\*?)(\s*)(@(?:example|see))(\s+)'
- r'([\w\W]*?(?=(?:^\s*\*?\s*@|\*/)))',
- bygroups(Whitespace, Comment.Multiline, Whitespace, Comment.Special,
- Whitespace, using(this, state='example'))),
- (r'(^\s*\*?\s*)(@\S*)',
- bygroups(Comment.Multiline, Comment.Special)),
- (r'[^*\n@]+|\w|\W', Comment.Multiline),
- ],
- 'example': [
- (r'(\n\s*)(\*)', bygroups(Whitespace, Comment.Multiline)),
- include('non-comments'),
- (r'.', Comment.Multiline),
- ],
- }
- class PegLexer(RegexLexer):
- """
- This lexer is for Parsing Expression Grammars (PEG).
- Various implementations of PEG have made different decisions
- regarding the syntax, so let's try to be accommodating:
- * `<-`, `←`, `:`, and `=` are all accepted as rule operators.
- * Both `|` and `/` are choice operators.
- * `^`, `↑`, and `~` are cut operators.
- * A single `a-z` character immediately before a string, or
- multiple `a-z` characters following a string, are part of the
- string (e.g., `r"..."` or `"..."ilmsuxa`).
- .. versionadded:: 2.6
- """
- name = 'PEG'
- url = 'https://bford.info/pub/lang/peg.pdf'
- aliases = ['peg']
- filenames = ['*.peg']
- mimetypes = ['text/x-peg']
- tokens = {
- 'root': [
- # Comments
- (r'#.*$', Comment.Single),
- # All operators
- (r'<-|[←:=/|&!?*+^↑~]', Operator),
- # Other punctuation
- (r'[()]', Punctuation),
- # Keywords
- (r'\.', Keyword),
- # Character classes
- (r'(\[)([^\]]*(?:\\.[^\]\\]*)*)(\])',
- bygroups(Punctuation, String, Punctuation)),
- # Single and double quoted strings (with optional modifiers)
- (r'[a-z]?"[^"\\]*(?:\\.[^"\\]*)*"[a-z]*', String.Double),
- (r"[a-z]?'[^'\\]*(?:\\.[^'\\]*)*'[a-z]*", String.Single),
- # Nonterminals are not whitespace, operators, or punctuation
- (r'[^\s<←:=/|&!?*+\^↑~()\[\]"\'#]+', Name.Class),
- # Fallback
- (r'.', Text),
- ],
- }
|