1414# limitations under the License.
1515################################################################################
1616
17- """Java tree-sitter queries and helpers.
18-
19- Provides utilities to parse Java source text with tree-sitter and extract
20- classes, methods, interfaces, invocations, comments, and related metadata.
17+ """Java Tree-sitter queries and helpers module.
18+
19+ This module provides comprehensive utilities for parsing Java source code using
20+ Tree-sitter and extracting various code elements. It serves as the foundation
21+ for syntactic analysis in CLDK's Java support.
22+
23+ The module provides extraction for:
24+ - **Classes and interfaces**: Names, inheritance, implementations
25+ - **Methods**: Names, signatures, annotations, bodies
26+ - **Imports**: Package and type imports
27+ - **Invocations**: Method calls and type references
28+ - **Comments**: Block comments, line comments, and Javadoc
29+
30+ Key features:
31+ - S-expression query support for pattern matching
32+ - AST traversal utilities
33+ - Code transformation (comment removal, prettification)
34+ - Test method detection (JUnit annotations)
35+
36+ See Also:
37+ - :class:`~cldk.analysis.java.JavaAnalysis`: High-level Java analysis.
38+ - :class:`TreesitterPython`: Equivalent for Python parsing.
2139"""
2240import logging
2341from itertools import groupby
3149logger = logging .getLogger (__name__ )
3250
3351LANGUAGE : Language = Language (tsjava .language ())
52+ """The Tree-sitter Language object for Java grammar."""
53+
3454PARSER : Parser = Parser (LANGUAGE )
55+ """Global Tree-sitter parser instance configured for Java."""
3556
3657
3758# pylint: disable=too-many-public-methods
3859class TreesitterJava :
39- """Tree-sitter helpers for Java use cases."""
60+ """Tree-sitter helper class for Java source code parsing and analysis.
61+
62+ This class provides comprehensive utilities for parsing Java source code
63+ using Tree-sitter. It offers methods for:
64+ - Syntax validation
65+ - AST generation and traversal
66+ - Code element extraction (classes, methods, imports)
67+ - Pattern matching via S-expression queries
68+ - Code transformation (comment removal)
69+
70+ The class is stateless and uses module-level parser and language objects,
71+ making it thread-safe for concurrent use.
72+
73+ Attributes:
74+ None. This class is stateless and provides only utility methods.
75+
76+ See Also:
77+ - :class:`~cldk.analysis.java.JavaAnalysis`: High-level analysis facade.
78+ - :class:`TreesitterPython`: Equivalent for Python.
79+ """
4080
4181 def __init__ (self ) -> None :
82+ """Initialize the TreesitterJava helper.
83+
84+ Creates a new instance of the Java Tree-sitter helper. This class
85+ is stateless; initialization performs no setup as all parsing uses
86+ module-level parser and language objects.
87+ """
4288 pass
4389
4490 def method_is_not_in_class (self , method_name : str , class_body : str ) -> bool :
45- """Return True if the method is not declared in the class body.
91+ """Check if a method is NOT declared in a class body.
92+
93+ Searches for method declarations in the given class body and checks
94+ if the specified method name is absent.
4695
4796 Args:
48- method_name (str): Method name to check.
49- class_body (str): Class source body.
97+ method_name: The method name to check for (without parentheses
98+ or parameters).
99+ class_body: The Java class source code to search within.
50100
51101 Returns:
52- bool: True if absent, False otherwise.
102+ ``True`` if the method is NOT found in the class body,
103+ ``False`` if the method IS found.
53104 """
54105 methods_in_class = self .frame_query_and_capture_output ("(method_declaration name: (identifier) @name)" , class_body )
55106
56107 return method_name not in {method .node .text .decode () for method in methods_in_class }
57108
58109 def is_parsable (self , code : str ) -> bool :
59- """Check whether the Java code parses without syntax errors.
110+ """Check if the given code is syntactically valid Java.
111+
112+ Parses the code using Tree-sitter and recursively checks for ERROR
113+ nodes in the resulting AST. Returns ``True`` only if the entire
114+ code parses without syntax errors.
60115
61116 Args:
62- code (str): Source code.
117+ code: A string containing Java source code to validate. Can be
118+ a complete compilation unit, a class, a method, or any
119+ syntactically valid Java fragment.
63120
64121 Returns:
65- bool: True if parsable, False otherwise.
122+ ``True`` if the code parses without syntax errors, ``False``
123+ otherwise. Also returns ``False`` if parsing triggers a
124+ RecursionError (for extremely nested code).
125+
126+ Note:
127+ This checks syntactic validity only, not semantic correctness.
128+ Code with undefined types or methods will still be "parsable".
129+
130+ See Also:
131+ :meth:`get_raw_ast`: To obtain the AST for further analysis.
66132 """
67133
68134 def syntax_error (node ):
@@ -83,13 +149,29 @@ def syntax_error(node):
83149 return False
84150
85151 def get_raw_ast (self , code : str ) -> Tree :
86- """Parse and return the raw AST.
152+ """Parse code and return the Tree-sitter AST.
153+
154+ Parses the provided Java source code using Tree-sitter and returns
155+ the resulting abstract syntax tree. The AST can be traversed to
156+ extract syntactic information about the code structure.
87157
88158 Args:
89- code (str): Source code.
159+ code: A string containing Java source code to parse .
90160
91161 Returns:
92- Tree: Parsed AST.
162+ A Tree-sitter ``Tree`` object representing the parsed AST. The
163+ tree's ``root_node`` provides access to the entire syntax tree:
164+ - ``root_node.type``: Typically ``"program"`` for Java
165+ - ``root_node.children``: Top-level declarations
166+ - ``root_node.text``: Original source bytes
167+
168+ Note:
169+ If the source contains syntax errors, Tree-sitter returns a tree
170+ with ERROR nodes at parse error locations. Use :meth:`is_parsable`
171+ to check for valid syntax first.
172+
173+ See Also:
174+ :meth:`is_parsable`: To validate syntax before parsing.
93175 """
94176 return PARSER .parse (bytes (code , "utf-8" ))
95177
@@ -168,14 +250,29 @@ def get_all_interfaces(self, source_code: str) -> Set[str]:
168250 return {interface .node .text .decode () for interface in interfaces }
169251
170252 def frame_query_and_capture_output (self , query : str , code_to_process : str ) -> Captures :
171- """Run a query and return captures from the AST.
253+ """Execute a Tree-sitter query and return captured nodes.
254+
255+ Parses the provided source code and runs the given S-expression
256+ query against the AST, returning all captured nodes.
172257
173258 Args:
174- query (str): S-expression query string.
175- code_to_process (str): Java source.
259+ query: A Tree-sitter S-expression query string defining the
260+ pattern to match and captures to extract. Captures are
261+ denoted with ``@name`` syntax.
262+ code_to_process: Java source code to parse and query.
176263
177264 Returns:
178- Captures: Query captures for the AST root.
265+ A :class:`~cldk.analysis.commons.treesitter.models.Captures`
266+ object containing all nodes matched by the query, with their
267+ capture names and node references.
268+
269+ Note:
270+ The query syntax follows Tree-sitter's S-expression format.
271+ See Tree-sitter documentation for query syntax details.
272+
273+ See Also:
274+ :class:`~cldk.analysis.commons.treesitter.models.Captures`:
275+ The return type for captured nodes.
179276 """
180277 framed_query : Query = LANGUAGE .query (query )
181278 tree = PARSER .parse (bytes (code_to_process , "utf-8" ))
0 commit comments