Tree-sitter Java Grammar

0.23.5 · active · verified Thu Apr 09

The `tree-sitter-java` library provides the Tree-sitter grammar definition and source files for the Java programming language. It is designed to be used with the `tree-sitter` Python binding (or other language bindings) to parse Java code into a concrete syntax tree (CST) or abstract syntax tree (AST). The current version is `0.23.5`, and it follows the release cadence of the upstream Tree-sitter grammar.

Warnings

Install

Imports

Quickstart

This quickstart demonstrates how to install `tree-sitter-java` and the `tree-sitter` binding, then build and load the Java grammar to parse a simple Java code snippet. It highlights the one-time build process for the shared library and basic AST traversal.

import tree_sitter
import os
import pathlib
import tree_sitter_java # This package provides the grammar source files

# Define a path where the shared library for the grammar will be built
# It's good practice to put this in a temporary directory or a known cache location
cache_dir = pathlib.Path.home() / ".tree_sitter_cache"
cache_dir.mkdir(parents=True, exist_ok=True)
java_language_so = cache_dir / "java_language.so" # .dylib for macOS, .dll for Windows

# Find the directory containing the Java grammar source files within the installed package
try:
    java_grammar_dir = pathlib.Path(tree_sitter_java.__file__).parent / "grammar"
except AttributeError:
    print("Error: Could not determine package path for tree_sitter_java. Ensure it's installed correctly.")
    exit(1)

# Ensure the grammar directory exists and contains necessary files (e.g., parser.c)
if not java_grammar_dir.is_dir() or not (java_grammar_dir / "parser.c").is_file():
    print(f"Warning: Grammar source directory not found at {java_grammar_dir}. ")
    print("You might need to adjust the path or manually download grammar files if the package structure differs.")
    exit(1)

print(f"Using Java grammar sources from: {java_grammar_dir}")

# Build the language library (this requires a C compiler like GCC/Clang)
# This step only needs to be done once per version of the grammar.
if not java_language_so.exists():
    try:
        print(f"Building Java grammar shared library to: {java_language_so}")
        tree_sitter.Language.build_library(
            str(java_language_so),
            [str(java_grammar_dir)]
        )
        print("Java grammar shared library built successfully.")
    except Exception as e:
        print(f"Error building Java grammar library: {e}")
        print("Ensure you have a C compiler (e.g., `gcc` or `clang`) installed and configured in your PATH.")
        exit(1)
else:
    print(f"Java grammar shared library already exists at: {java_language_so}")

# Load the built language
java_language = tree_sitter.Language(str(java_language_so), 'java')

# Create a parser for Java
parser = tree_sitter.Parser()
parser.set_language(java_language)

# Example Java code
java_code = """
class MyClass {
    /* A simple main method */
    public static void main(String[] args) {
        String message = "Hello, Tree-sitter!";
        System.out.println(message);
    }
}
"""

# Parse the code
tree = parser.parse(java_code.encode('utf-8')) # Tree-sitter expects bytes

# Get the root node of the AST
root_node = tree.root_node

# Print a simple representation of the AST
print("\nParsed Java AST (first few children):")
for child in root_node.children:
    print(f"- Type: {child.type}, Text: {child.text.decode('utf-8')[:70].strip()}...")

# Don't forget to close the parser and tree to release resources
parser.close()
tree.close()

view raw JSON →