-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathFileHandle.java
More file actions
334 lines (308 loc) · 15.3 KB
/
FileHandle.java
File metadata and controls
334 lines (308 loc) · 15.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
package org.perlonjava.frontend.parser;
import org.perlonjava.frontend.astnode.IdentifierNode;
import org.perlonjava.frontend.astnode.Node;
import org.perlonjava.frontend.astnode.OperatorNode;
import org.perlonjava.frontend.lexer.LexerToken;
import org.perlonjava.frontend.lexer.LexerTokenType;
import org.perlonjava.runtime.runtimetypes.GlobalVariable;
import org.perlonjava.runtime.runtimetypes.NameNormalizer;
import static org.perlonjava.frontend.parser.TokenUtils.peek;
/**
* FileHandle parser for PerlOnJava.
* <p>
* This class is responsible for parsing Perl file handle expressions in various contexts,
* particularly in print/printf statements and other I/O operations. Perl has several ways
* to specify file handles:
*
* <ul>
* <li>Bareword file handles: {@code print STDOUT "hello"}</li>
* <li>Glob references: {@code print *STDOUT "hello"}</li>
* <li>Scalar variables containing file handles: {@code print $fh "hello"}</li>
* <li>Bracketed forms: {@code print {STDOUT} "hello"} or {@code print {$fh} "hello"}</li>
* </ul>
* <p>
* The parser must distinguish between file handle expressions and regular expressions,
* which is particularly tricky for scalar variables that could be either a file handle
* or part of an arithmetic expression.
*
* @see ParsePrimary
* @see GlobalVariable
*/
public class FileHandle {
/**
* Parses a file handle expression from the token stream.
* <p>
* This method attempts to parse various forms of Perl file handle syntax:
*
* <h3>Bareword File Handles</h3>
* Traditional Perl bareword file handles like STDOUT, STDERR, STDIN, or user-defined
* file handles. These can appear with or without curly braces:
* <pre>
* print STDOUT "hello"; # bareword
* print {STDOUT} "hello"; # bracketed bareword
* </pre>
*
* <h3>Glob References</h3>
* File handles can be specified as glob references using the * or \* syntax:
* <pre>
* print *STDOUT "hello"; # glob reference
* print {*STDOUT} "hello"; # bracketed glob
* print {\*STDOUT} "hello"; # reference to glob
* </pre>
*
* <h3>Scalar Variables</h3>
* Modern Perl often stores file handles in scalar variables:
* <pre>
* open my $fh, '>', 'file.txt';
* print $fh "hello"; # scalar file handle
* print {$fh} "hello"; # bracketed scalar
* </pre>
*
* <h3>Disambiguation</h3>
* When parsing scalar variables without brackets, the parser must determine if the
* scalar is a file handle or part of an expression. For example:
* <pre>
* print $fh + 2; # $fh is NOT a file handle (arithmetic expression)
* print $fh "text"; # $fh IS a file handle
* print $fh; # ambiguous - depends on context
* </pre>
*
* @param parser The parser instance containing the token stream and parse context
* @return A Node representing the parsed file handle, or null if no valid file handle was found
*/
public static Node parseFileHandle(Parser parser) {
boolean hasBracket = false;
// Check if the file handle is enclosed in curly braces
// Perl allows {FILEHANDLE} syntax for disambiguation
if (peek(parser).text.equals("{")) {
// Before consuming {, check if this looks like an anonymous hash
// Hash patterns include:
// { identifier => ... } or { identifier , ... }
// { "string" , ... } or { "string" => ... }
// { 'string' , ... } or { 'string' => ... }
// { number , ... }
int idx = parser.tokenIndex + 1;
// Skip whitespace tokens
while (idx < parser.tokens.size() &&
parser.tokens.get(idx).type == LexerTokenType.WHITESPACE) {
idx++;
}
if (idx < parser.tokens.size()) {
LexerToken afterBrace = parser.tokens.get(idx);
// Check for identifier followed by => or ,
if (afterBrace.type == LexerTokenType.IDENTIFIER) {
int nextIdx = idx + 1;
// Skip whitespace
while (nextIdx < parser.tokens.size() &&
parser.tokens.get(nextIdx).type == LexerTokenType.WHITESPACE) {
nextIdx++;
}
if (nextIdx < parser.tokens.size()) {
LexerToken afterIdent = parser.tokens.get(nextIdx);
if (afterIdent.text.equals("=>") || afterIdent.text.equals(",")) {
// This is { a => ... } or { a, ... } - it's a hash, not filehandle
return null;
}
}
}
// Check for string literal followed by => or ,
// Strings are lexed with the opening quote as OPERATOR
if (afterBrace.type == LexerTokenType.OPERATOR &&
(afterBrace.text.equals("\"") || afterBrace.text.equals("'"))) {
// Scan forward to find the closing quote, then check for , or =>
// For simplicity, look for the pattern: quote ... quote (comma or =>)
String quoteChar = afterBrace.text;
int scanIdx = idx + 1;
int depth = 1;
while (scanIdx < parser.tokens.size() && depth > 0) {
LexerToken scanToken = parser.tokens.get(scanIdx);
if (scanToken.type == LexerTokenType.OPERATOR && scanToken.text.equals(quoteChar)) {
depth--;
}
scanIdx++;
}
// Skip whitespace after the closing quote
while (scanIdx < parser.tokens.size() &&
parser.tokens.get(scanIdx).type == LexerTokenType.WHITESPACE) {
scanIdx++;
}
// Check for , or =>
if (scanIdx < parser.tokens.size()) {
LexerToken afterString = parser.tokens.get(scanIdx);
if (afterString.text.equals(",") || afterString.text.equals("=>")) {
// This is { "a", ... } or { "a" => ... } - it's a hash
return null;
}
}
}
// Check for number followed by ,
if (afterBrace.type == LexerTokenType.NUMBER) {
int nextIdx = idx + 1;
// Skip whitespace
while (nextIdx < parser.tokens.size() &&
parser.tokens.get(nextIdx).type == LexerTokenType.WHITESPACE) {
nextIdx++;
}
if (nextIdx < parser.tokens.size()) {
LexerToken afterNum = parser.tokens.get(nextIdx);
if (afterNum.text.equals(",")) {
// This is { 1, ... } - it's a hash
return null;
}
}
}
}
TokenUtils.consume(parser);
hasBracket = true;
}
LexerToken token = peek(parser);
Node fileHandle = null;
// Handle glob or string expressions when we have brackets
if (hasBracket && token.type == LexerTokenType.OPERATOR && (token.text.equals("*") || token.text.equals("\\") || token.text.equals("\""))) {
// Parse glob expression: {*STDOUT}, {\*STDOUT}, {"STDOUT"} etc.
// ParsePrimary.parsePrimary() has logic to handle both * and \* cases
// and will create the appropriate glob or reference node
fileHandle = ParsePrimary.parsePrimary(parser);
}
// Handle bareword file handles (most common case)
// Examples: STDOUT, STDERR, STDIN, or user-defined handles like LOG, FILE, etc.
else if (token.type == LexerTokenType.IDENTIFIER) {
// Check if this is a function call or method chain
// In that case, we need to parse it as an expression, not a bareword
LexerToken nextToken = parser.tokens.get(parser.tokenIndex + 1);
if (hasBracket && (nextToken.text.equals("(") || nextToken.text.equals("->"))) {
// This is a function call like { get_fh() } or method chain like { shift->stdout }
// Parse as expression to capture the full call/chain
fileHandle = parser.parseExpression(0);
} else {
// Try to parse as a bareword identifier
// parseSubroutineIdentifier handles qualified names like Some::Package::HANDLE
String name = IdentifierParser.parseSubroutineIdentifier(parser);
if (name != null) {
fileHandle = parseBarewordHandle(parser, name);
// Do not treat compile-time magic like __PACKAGE__ as print filehandles:
// they match ^[A-Z_][A-Z0-9_]*$ but must fall through to the expression list
// (perl5_t/t/comp/package.t test 13: print __PACKAGE__ eq 'Pkg' ? ...).
if (fileHandle == null
&& name.matches("^[A-Z_][A-Z0-9_]*$")
&& !isDoubleUnderscoreMagicBareword(name)) {
GlobalVariable.vivifyGlobalIO(normalizeBarewordHandle(parser, name));
fileHandle = parseBarewordHandle(parser, name);
}
}
}
}
// Handle scalar variable file handles
// Modern Perl idiom: open my $fh, '<', 'filename'; print $fh "text";
else if (token.text.equals("$")) {
if (hasBracket) {
// When bracketed, parse as a full expression to capture method chains
// Example: print { $self->stdout } "text"
// This ensures $self->stdout is parsed as a complete expression
fileHandle = parser.parseExpression(0);
} else {
// Parse the scalar variable
fileHandle = ParsePrimary.parsePrimary(parser);
// When not bracketed, we need to disambiguate between:
// - print $fh "text"; # $fh is a file handle
// - print $fh + 2; # $fh is part of an expression
// - print $fh; # ambiguous case
// Check if the next token is an infix operator
// If so, this is likely an expression, not a file handle
String nextText = peek(parser).text;
if ("<<".equals(nextText)) {
// `<<` is an infix, but it is also a heredoc
} else if (ParserTables.INFIX_OP.contains(nextText) || "{[".contains(nextText) || "->".equals(nextText)) {
// Examples that are NOT file handles:
// print $fh + 2; # arithmetic
// print $fh{key}; # hash access
// print $fh[0]; # array access
// print $fh->method; # method call
fileHandle = null;
}
// Check if we're at the end of the print list
// "print $fh;" with nothing after is NOT a file handle
// but "print $fh 'text';" IS a file handle
if (ListParser.looksLikeEmptyList(parser)) {
// print $fh; # $fh is the thing to print, not a file handle
fileHandle = null;
}
}
}
// Handle expression in brackets (for any other case like method calls)
else if (hasBracket) {
// Parse as a general expression: { $obj->method } etc.
fileHandle = parser.parseExpression(0);
}
// If we had an opening bracket, consume the closing bracket
if (hasBracket) {
TokenUtils.consume(parser, LexerTokenType.OPERATOR, "}");
}
return fileHandle;
}
public static Node parseBarewordHandle(Parser parser, String name) {
name = normalizeBarewordHandle(parser, name);
// Check if this name has a CODE ref defined (it's a subroutine, not a filehandle)
// This handles the case where a subroutine was imported via typeglob assignment
// (e.g., *main::myconfig = \&Config::myconfig), creating a glob entry but
// with only a CODE slot, not an IO slot.
if (GlobalVariable.isGlobalCodeRefDefined(name)) {
return null; // Not a filehandle, it's a subroutine
}
// Check if this is a known file handle in the global I/O table
// This helps distinguish between file handles and other barewords
if (GlobalVariable.existsGlobalIO(name) || isStandardFilehandle(name)
|| isAllDigitGlobName(name)) {
// Create a GLOB reference for the file handle, like `\*FH`
return new OperatorNode("\\",
new OperatorNode("*",
new IdentifierNode(name, parser.tokenIndex), parser.tokenIndex), parser.tokenIndex);
}
return null;
}
/**
* Perl allows numeric typeglob names such as {@code open 0; print <0>}, where filehandle
* {@code 0} shares the {@code *0} stash entry with {@code $0} (program name).
*/
private static boolean isAllDigitGlobName(String normalizedName) {
int idx = normalizedName.lastIndexOf("::");
String base = idx >= 0 ? normalizedName.substring(idx + 2) : normalizedName;
if (base.isEmpty()) {
return false;
}
for (int i = 0; i < base.length(); i++) {
if (!Character.isDigit(base.charAt(i))) {
return false;
}
}
return true;
}
/**
* Checks if a normalized name represents a standard filehandle.
*
* @param normalizedName The normalized filehandle name (e.g., "main::STDOUT")
* @return true if the name is a standard filehandle
*/
private static boolean isStandardFilehandle(String normalizedName) {
return "main::STDOUT".equals(normalizedName) ||
"main::STDERR".equals(normalizedName) ||
"main::STDIN".equals(normalizedName);
}
public static String normalizeBarewordHandle(Parser parser, String name) {
// Determine the package context for the file handle
String packageName = parser.ctx.symbolTable.getCurrentPackage();
// Standard file handles (STDOUT, STDERR, STDIN) always belong to main::
// regardless of the current package context
if (name.equals("STDOUT") || name.equals("STDERR") || name.equals("STDIN")) {
packageName = "main";
}
// Normalize the name to include the package qualifier
// This converts "HANDLE" to "Package::HANDLE" format
name = NameNormalizer.normalizeVariableName(name, packageName);
return name;
}
/** {@code __FOO__} tokens (e.g. {@code __PACKAGE__}) are not print bareword filehandles. */
private static boolean isDoubleUnderscoreMagicBareword(String name) {
return name.length() >= 4 && name.startsWith("__") && name.endsWith("__");
}
}