SPARK Parser Toolkit

1.9.0 · active · verified Thu Apr 16

SPARK is a lightweight, pure-Python Earley-Algorithm context-free grammar parser toolkit. It enables developers to build parsers and scanners for custom languages or data formats using grammar rules defined as Python docstrings. The current version is 1.9.0, with releases occurring periodically to address Python compatibility and improve internal mechanics.

Common errors

Warnings

Install

Imports

Quickstart

This quickstart demonstrates how to define a simple arithmetic scanner and parser using `GenericScanner` and `GenericParser`. It tokenizes an input string and then parses it according to the defined grammar rules to calculate the result.

from spark_parser import GenericParser, GenericScanner

# 1. Define your scanner (lexer) by subclassing GenericScanner
class SimpleCalcScanner(GenericScanner):
    def tokenize(self, input_string):
        tokens = []
        i = 0
        while i < len(input_string):
            char = input_string[i]
            if char.isspace():
                i += 1
                continue
            if char.isdigit():
                num_str = ""
                while i < len(input_string) and input_string[i].isdigit():
                    num_str += input_string[i]
                    i += 1
                tokens.append(('NUMBER', int(num_str)))
            elif char in "+-*/()":
                tokens.append((char, char))
                i += 1
            else:
                raise ValueError(f"Invalid character: {char}")
        return tokens

# 2. Define your parser (grammar rules) by subclassing GenericParser
class SimpleCalcParser(GenericParser):
    def __init__(self, start_symbol='expr'):
        GenericParser.__init__(self, start_symbol)

    # Define grammar rules using docstrings for methods starting with 'p_'
    def p_expr_add(self, args):
        '''
        expr ::= expr + term
        '''
        return args[0] + args[2]

    def p_expr_term(self, args):
        '''
        expr ::= term
        '''
        return args[0]

    def p_term_num(self, args):
        '''
        term ::= NUMBER
        '''
        return args[0]

# 3. Instantiate scanner and parser, then tokenize and parse
scanner = SimpleCalcScanner()
parser = SimpleCalcParser()

text_to_parse = "10 + 5"
tokens = scanner.tokenize(text_to_parse)
result = parser.parse(tokens)

# print(f"Parsed result for '{text_to_parse}': {result}") # Expected: 15
assert result == 15, "Parsing failed!"

view raw JSON →