{"id":24333,"library":"pygmars","title":"pygmars","description":"A library for crafting simple regex-based small language lexers and parsers. Build parsers from grammars and accept Pygments lexers as input. Derived from NLTK. Current version 1.0.0, released May 2024. Maintained by aboutcode-org, release cadence irregular.","status":"active","version":"1.0.0","language":"python","source_language":"en","source_url":"https://github.com/aboutcode-org/pygmars","tags":["lexer","parser","regex","grammar","nltk-derived"],"install":[{"cmd":"pip install pygmars","lang":"bash","label":"Install via pip"}],"dependencies":[{"reason":"Pygmars accepts Pygments lexers as input","package":"pygments","optional":true}],"imports":[{"note":"","wrong":"","symbol":"Lexer","correct":"from pygmars import Lexer"},{"note":"","wrong":"","symbol":"Grammar","correct":"from pygmars import Grammar"},{"note":"","wrong":"","symbol":"Parser","correct":"from pygmars import Parser"},{"note":"","wrong":"","symbol":"ParseString","correct":"from pygmars import ParseString"},{"note":"","wrong":"","symbol":"Tree","correct":"from pygmars import Tree"}],"quickstart":{"code":"from pygmars import Lexer, Grammar, Parser\n\n# Define a simple grammar for arithmetic expressions\nlexer = Lexer()\nlexer.add_token('NUM', r'\\d+')\nlexer.add_token('PLUS', r'\\+')\nlexer.add_token('MINUS', r'-')\nlexer.add_token('TIMES', r'\\*')\nlexer.add_token('DIVIDE', r'/')\nlexer.add_token('LPAREN', r'\\(')\nlexer.add_token('RPAREN', r'\\)')\nlexer.add_ignore(' ')\n\ngrammar = Grammar()\ngrammar.add_production('E', ['E', 'PLUS', 'T'])\ngrammar.add_production('E', ['T'])\ngrammar.add_production('T', ['T', 'TIMES', 'F'])\ngrammar.add_production('T', ['F'])\ngrammar.add_production('F', ['LPAREN', 'E', 'RPAREN'])\ngrammar.add_production('F', ['NUM'])\n\nparser = Parser(grammar)\n\ntokens = lexer.tokenize('2+3*4')\nparse_tree = parser.parse(tokens)\nprint(parse_tree.pformat())","lang":"python","description":"Create a lexer for arithmetic expressions, define a grammar, and parse a simple expression."},"warnings":[{"fix":"If you parsed structured data and used str(ParseString(...)) expecting formatting, you now need to call .format() or access attributes directly.","message":"In version 1.0.0, the __str__ method of ParseString no longer formats the string, breaking code that relied on it (PR #14).","severity":"breaking","affected_versions":">=1.0.0"},{"fix":"Ensure you import only from pygmars, not from nltk.","message":"The library is derived from NLTK but has diverged. Do not mix imports; use pygmars exclusively for lexing/parsing tasks.","severity":"deprecated","affected_versions":"all"},{"fix":"Order token additions from most specific to least specific to avoid regex precedence issues.","message":"Lexer token regexes are compiled in order of addition; overlapping patterns may lead to unexpected tokenization. Add more specific tokens first.","severity":"gotcha","affected_versions":"all"},{"fix":"Restructure grammars to be right-recursive or use a different parsing strategy.","message":"The library does not support left-recursive grammars directly. Defining left-recursive productions (e.g., E -> E + T) will cause recursion depth error.","severity":"gotcha","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-05-01T00:00:00.000Z","next_check":"2026-07-30T00:00:00.000Z","problems":[{"fix":"Rewrite grammar to eliminate left recursion. For example, replace E -> E + T | T with left-recursion removed grammar: E -> T E', E' -> + T E' | ε (use empty production via explicit epsilon).","cause":"Left-recursive grammar productions cause infinite recursion in the parser.","error":"RecursionError: maximum recursion depth exceeded"},{"fix":"Ensure add_ignore(' ') uses a string argument, and method calls are correct: lexer.add_ignore(' '), not lexer.add_ignore = ' '.","cause":"Trying to use add_ignore() or add_token() with a string that is not a valid regex or mis-calling the method.","error":"TypeError: 'str' object is not callable"},{"fix":"Check the regex syntax. Escaping backslashes correctly, e.g., r'\\d+' not '\\d+'.","cause":"Invalid regex pattern passed to add_token or add_ignore.","error":"ValueError: Token pattern '...' is not a valid regex"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}