PerlOnJava/src/main/java/org/perlonjava/frontend/parser/StringDoubleQuoted.java at 98796ea72c7de49403db552047d2c2ffb0d461f9 · fglock/PerlOnJava · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
package org.perlonjava.frontend.parser;

import org.perlonjava.app.cli.CompilerOptions;

import org.perlonjava.backend.jvm.EmitterContext;
import org.perlonjava.frontend.astnode.*;
import org.perlonjava.frontend.lexer.Lexer;
import org.perlonjava.frontend.lexer.LexerToken;
import org.perlonjava.frontend.lexer.LexerTokenType;
import org.perlonjava.runtime.runtimetypes.PerlCompilerException;

import java.util.ArrayList;
import java.util.List;
import java.util.Stack;

/**
 * Parser for double-quoted strings with case modification and quotemeta support.
 *
 * <p>This class extends StringSegmentParser to handle Perl's double-quoted string syntax,
 * which includes variable interpolation, escape sequences, and case modification operators.
 * Double-quoted strings in Perl support several advanced features:</p>
 *
 * <ul>
 *   <li><strong>Variable interpolation:</strong> $var, @array, ${expr}, @{expr}</li>
 *   <li><strong>Escape sequences:</strong> \n, \t, \x{hex}, \N{unicode_name}</li>
 *   <li><strong>Case modification:</strong> \U...\E (uppercase), \L...\E (lowercase)</li>
 *   <li><strong>Single character case:</strong> \\u (next char upper), \l (next char lower)</li>
 *   <li><strong>Quote metacharacters:</strong> \Q...\E (escape regex metacharacters)</li>
 * </ul>
 *
 * <h3>Case Modification System</h3>
 * <p>The parser maintains a stack of active case modifiers. When a case modifier like \U
 * is encountered, all subsequent text and interpolated variables are wrapped in the
 * appropriate case conversion function. Modifiers can be nested, but conflicting
 * modifiers (like \L inside \U) will terminate the previous modifier.</p>
 *
 * <h3>Examples</h3>
 * <pre>
 * "Hello \U$name\E"        # uc($name)
 * "Hello \\u$name"          # ucfirst($name)
 * "\L\U$text\E\E"          # lc(uc($text))
 * "Value: \Q$special\E"    # quotemeta($special)
 * </pre>
 *
 * @see StringSegmentParser
 * @see StringParser
 */
public class StringDoubleQuoted extends StringSegmentParser {

    /**
     * Stack of active case modifiers.
     *
     * <p>Case modifiers can be nested, so we use a stack to track them.
     * When \E is encountered, we pop and apply the most recent modifier.
     * The stack allows complex nesting like \L outer \U inner \E \E.
     */
    private final Stack<CaseModifier> caseModifiers = new Stack<>();

    /**
     * Flag indicating whether we're inside a \Q...\E quotemeta region.
     *
     * <p>When true, all special characters (including $ and @) are treated as literals,
     * and escape sequences are not processed (except \E to end the region).
     */
    private boolean inQuotemeta = false;

    /**
     * Private constructor for StringDoubleQuoted parser.
     *
     * <p>Use {@link #parseDoubleQuotedString} factory method to create instances.
     *
     * @param ctx          The emitter context for error reporting
     * @param tokens       The tokenized string content
     * @param parser       The parser instance for complex expressions
     * @param tokenIndex   The starting token position
     * @param isRegex      True if parsing regex pattern (affects interpolation)
     * @param parseEscapes True to process escape sequences, false to preserve them
     */
    private StringDoubleQuoted(EmitterContext ctx, List<LexerToken> tokens, Parser parser, int tokenIndex, boolean isRegex, boolean parseEscapes, boolean interpolateVariable, boolean isRegexReplacement) {
        super(ctx, tokens, parser, tokenIndex, isRegex, parseEscapes, interpolateVariable, isRegexReplacement);
    }

    /**
     * Parses a double-quoted string, handling escape sequences and variable interpolation.
     *
     * <p>This is the main entry point for parsing double-quoted strings. It handles:
     * <ul>
     *   <li>Creating a lexer for the string content</li>
     *   <li>Tokenizing the content</li>
     *   <li>Creating a parser instance</li>
     *   <li>Delegating to the StringDoubleQuoted parser</li>
     * </ul>
     *
     * @param ctx                 The emitter context for logging and error handling
     * @param rawStr              The parsed string data containing the string content and position info
     * @param parseEscapes        Whether to process escape sequences or preserve them literally
     * @param interpolateVariable Whether to interpolate variables
     * @param isRegexReplacement  Whether this is in a regex replacement context
     * @return An AST node representing the parsed string (StringNode, BinaryOperatorNode for join, etc.)
     */
    static Node parseDoubleQuotedString(EmitterContext ctx, StringParser.ParsedString rawStr, boolean parseEscapes, boolean interpolateVariable, boolean isRegexReplacement) {
        return parseDoubleQuotedString(ctx, rawStr, parseEscapes, interpolateVariable, isRegexReplacement, null);
    }

    /**
     * Parses a double-quoted string with optional shared heredoc state.
     *
     * <p>This overloaded version allows sharing heredoc nodes with a parent parser,
     * enabling proper handling of heredocs within string interpolation contexts.
     *
     * @param ctx                 The emitter context for logging and error handling
     * @param rawStr              The parsed string data containing the string content and position info
     * @param parseEscapes        Whether to process escape sequences or preserve them literally
     * @param interpolateVariable Whether to interpolate variables
     * @param isRegexReplacement  Whether this is in a regex replacement context
     * @param sharedHeredocNodes  Optional list of heredoc nodes to share with parent parser
     * @return An AST node representing the parsed string (StringNode, BinaryOperatorNode for join, etc.)
     */
    static Node parseDoubleQuotedString(EmitterContext ctx, StringParser.ParsedString rawStr, boolean parseEscapes, boolean interpolateVariable, boolean isRegexReplacement, List<OperatorNode> sharedHeredocNodes) {
        return parseDoubleQuotedString(ctx, rawStr, parseEscapes, interpolateVariable, isRegexReplacement, sharedHeredocNodes, null);
    }

    /**
     * Parses a double-quoted string with optional shared heredoc state and original parser context.
     *
     * @param ctx                 The emitter context for logging and error handling
     * @param rawStr              The parsed string data containing the string content and position info
     * @param parseEscapes        Whether to process escape sequences or preserve them literally
     * @param interpolateVariable Whether to interpolate variables
     * @param isRegexReplacement  Whether this is in a regex replacement context
     * @param sharedHeredocNodes  Optional list of heredoc nodes to share with parent parser
     * @param originalParser      Optional original parser to preserve context flags (e.g., isInMethod)
     * @return An AST node representing the parsed string
     */
    static Node parseDoubleQuotedString(EmitterContext ctx, StringParser.ParsedString rawStr, boolean parseEscapes, boolean interpolateVariable, boolean isRegexReplacement, List<OperatorNode> sharedHeredocNodes, Parser originalParser) {
        // Extract the first buffer (double-quoted strings don't have multiple parts like here-docs)
        var input = rawStr.buffers.getFirst();
        var tokenIndex = rawStr.next;

        // In regex context, we preserve escapes for the regex engine
        var isRegex = !parseEscapes;
        if (CompilerOptions.DEBUG_ENABLED) ctx.logDebug("parseDoubleQuotedString isRegex:" + isRegex);

        // Tokenize the string content
        var lexer = new Lexer(input);
        var tokens = lexer.tokenize();

        // Create parser with shared heredoc nodes if provided
        var parser = sharedHeredocNodes != null ?
                new Parser(ctx, tokens, sharedHeredocNodes) :
                new Parser(ctx, tokens);

        // Preserve context flags from original parser if provided
        if (originalParser != null) {
            parser.isInMethod = originalParser.isInMethod;
            parser.isInClassBlock = originalParser.isInClassBlock;
            // Copy any other relevant context flags as needed
        }

        // Set base line number so __LINE__ inside @{[...]} interpolation
        // returns the correct line from the original source, not the inner token list.
        // Use rawStr.index (position of opening delimiter in outer token list).
        parser.baseLineNumber = ctx.errorUtil.getLineNumberAccurate(rawStr.index);

        // Create and run the double-quoted string parser with original token offset tracking
        var doubleQuotedParser = new StringDoubleQuoted(ctx, tokens, parser, tokenIndex, isRegex, parseEscapes, interpolateVariable, isRegexReplacement);

        // Set up offset tracking and original string content for proper error reporting
        doubleQuotedParser.setOriginalTokenOffset(tokenIndex);
        doubleQuotedParser.setOriginalStringContent(input);

        return doubleQuotedParser.parse();
    }

    /**
     * Adds a string segment and tracks it for active case modifiers.
     *
     * <p>This override ensures that all segments (both literal text and interpolated
     * variables) are tracked by active case modifiers. This allows modifiers like
     * \U to affect both literal text and variable values.
     *
     * <p>After adding a segment, we check if any single-character modifiers (\\u, \l)
     * should be deactivated, as they only affect one character.
     *
     * @param node The AST node to add as a segment
     */
    @Override
    protected void addStringSegment(Node node) {
        // Add to main segments list
        segments.add(node);

        // Track this segment in all active case modifiers
        // This allows nested modifiers to all track the same content
        for (CaseModifier modifier : caseModifiers) {
            modifier.addSegment(node);
        }

        // Check if any single-char modifiers should be deactivated
        // This happens after they've affected at least one character
        checkSingleCharModifiers();
    }

    /**
     * Override to handle literal text appending with case modifier tracking.
     *
     * <p>When literal text is appended, we need to track whether single-character
     * modifiers (\\u, \l) have affected any content. Once they have, they should
     * be deactivated after the current segment is complete.
     *
     * @param text The literal text to append
     */
    @Override
    protected void appendToCurrentSegment(String text) {
        super.appendToCurrentSegment(text);

        // Mark single-char modifiers as having affected content
        // This is important because \\u and \l only affect the next character
        if (!text.isEmpty() && !caseModifiers.isEmpty() && caseModifiers.peek().isSingleChar) {
            caseModifiers.peek().hasAffectedContent = true;
        }
    }

    /**
     * Parses the string and applies any remaining case modifications.
     *
     * <p>This override ensures that any unclosed case modifiers (missing \E)
     * are still applied to their content. This matches Perl's behavior where
     * a missing \E is implicitly added at the end of the string.
     *
     * @return The final AST node representing the parsed string
     */
    @Override
    public Node parse() {
        // Parse the string content using the base class
        var result = super.parse();

        // Apply any unclosed case modifications
        // This handles cases like "text \U more text" without \E
        while (!caseModifiers.isEmpty()) {
            applyCaseModifier(caseModifiers.pop());
        }

        return createJoinNode(segments);
    }

    /**
     * Checks and deactivates single-character modifiers after they've affected content.
     *
     * <p>Single-character modifiers (\\u and \l) only affect the next character.
     * Once they've modified something, they should be removed from the stack
     * and their accumulated content should be wrapped in the appropriate function.
     */
    private void checkSingleCharModifiers() {
        // Process all single-char modifiers that have affected content
        while (!caseModifiers.isEmpty() &&
                caseModifiers.peek().isSingleChar &&
                caseModifiers.peek().hasAffectedContent) {
            applyCaseModifier(caseModifiers.pop());
        }
    }

    /**
     * Applies a case modification to its associated segments.
     *
     * <p>This method:
     * <ol>
     *   <li>Determines the appropriate Perl function for the modifier</li>
     *   <li>Creates a joined node from all segments affected by the modifier</li>
     *   <li>Wraps the content in the case function (uc, lc, ucfirst, lcfirst, quotemeta)</li>
     *   <li>Replaces the original segments with the case-modified node</li>
     *   <li>Updates parent modifiers to reference the new node</li>
     * </ol>
     *
     * @param modifier The case modifier to apply
     */
    private void applyCaseModifier(CaseModifier modifier) {
        if (modifier.segments.isEmpty()) {
            return;
        }

        // Map modifier type to Perl function name
        String operator = switch (modifier.type) {
            case "U" -> "uc";       // \U - uppercase
            case "L" -> "lc";       // \L - lowercase
            case "F" -> "fc";       // \F - foldcase
            case "u" -> "ucfirst";  // \\u - uppercase first
            case "l" -> "lcfirst";  // \l - lowercase first
            case "Q" -> "quotemeta"; // \Q - quote metacharacters
            default -> null;
        };

        if (operator == null) {
            return;
        }

        // Create case-modified node
        var contentNode = createJoinNode(modifier.segments);
        var caseModifiedNode = new OperatorNode(operator, contentNode, parser.tokenIndex);

        // Replace segments with case-modified node
        int firstIndex = segments.indexOf(modifier.segments.getFirst());
        if (firstIndex >= 0) {
            // Remove all segments of this modifier
            segments.removeAll(modifier.segments);
            // Insert the case-modified node at the original position
            segments.add(firstIndex, caseModifiedNode);

            // Update parent modifiers to reference the new node instead of the old segments
            // This maintains proper nesting when modifiers are nested
            for (CaseModifier parent : caseModifiers) {
                if (parent.segments.removeAll(modifier.segments)) {
                    parent.segments.add(caseModifiedNode);
                }
            }
        }
    }

    /**
     * Creates a join node for multiple segments or returns single segment.
     *
     * <p>This utility method handles the common pattern of joining string segments:
     * <ul>
     *   <li>Empty list: returns empty string node</li>
     *   <li>Single segment: returns it directly (no join needed)</li>
     *   <li>Multiple segments: creates join("", segment1, segment2, ...)</li>
     * </ul>
     *
     * @param nodes The list of nodes to join
     * @return A single node representing the joined content
     */
    private Node createJoinNode(List<Node> nodes) {
        return switch (nodes.size()) {
            case 0 -> new StringNode("", parser.tokenIndex);
            case 1 -> {
                var result = nodes.getFirst();
                if (result instanceof StringNode) {
                    yield result;
                }
                // In regex context, return the variable directly so qr overload can work
                if (isRegex) {
                    yield result;
                }
                // Single non-string segment needs to be converted to string
                // This ensures overloaded objects are properly stringified in string context
                var listNode = new ListNode(parser.tokenIndex);
                listNode.elements.add(result);
                yield new BinaryOperatorNode("join", new StringNode("", parser.tokenIndex), listNode, parser.tokenIndex);
            }
            default -> {
                var listNode = new ListNode(parser.tokenIndex);
                listNode.elements.addAll(nodes);
                yield new BinaryOperatorNode("join", new StringNode("", parser.tokenIndex), listNode, parser.tokenIndex);
            }
        };
    }

    /**
     * Parses escape sequences based on context.
     *
     * <p>This method delegates to different escape handling based on the
     * parseEscapes flag and quotemeta mode:
     * <ul>
     *   <li>inQuotemeta=true: Only \E is special, everything else is literal</li>
     *   <li>parseEscapes=true: Process escapes like \n to actual newline</li>
     *   <li>parseEscapes=false: Preserve escapes for regex engine</li>
     * </ul>
     */
    @Override
    protected void parseEscapeSequence() {
        if (inQuotemeta) {
            // In quotemeta mode, everything is literal except \E
            var token = tokens.get(parser.tokenIndex);
            if (token.text.startsWith("E")) {
                // End quotemeta mode
                TokenUtils.consumeChar(parser);
                flushCurrentSegment();
                if (!caseModifiers.isEmpty() && caseModifiers.peek().type.equals("Q")) {
                    applyCaseModifier(caseModifiers.pop());
                }
                inQuotemeta = false;
            } else if (token.text.startsWith("Q")) {
                // In quotemeta mode, \Q is idempotent and should be ignored.
                TokenUtils.consumeChar(parser);
            } else {
                // Everything else is literal, including the backslash
                currentSegment.append("\\");
            }
            return;
        }

        if (parseEscapes) {
            parseDoubleQuotedEscapes();
        } else {
            parseDoubleQuotedEscapesRegex();
        }
    }

    private void parseDoubleQuotedEscapesRegex() {
        // In regex context, preserve almost all escape sequences literally
        // The regex engine will process them

        // Consume the character after the backslash
        var escape = TokenUtils.consumeChar(parser);

        switch (escape) {
            // Case modification end marker
            case "E" -> {
                // Flush any pending literal text
                flushCurrentSegment();
                // Pop and apply the most recent case modifier
                if (!caseModifiers.isEmpty()) {
                    applyCaseModifier(caseModifiers.pop());
                }
            }

            // Case modifiers
            case "U" -> startCaseModifier("U", false);  // Uppercase until \E
            case "L" -> startCaseModifier("L", false);  // Lowercase until \E
            case "F" -> startCaseModifier("F", false);  // Foldcase until \E
            case "u" -> startCaseModifier("u", true);   // Uppercase next char
            case "l" -> startCaseModifier("l", true);   // Lowercase next char

            // Quotemeta modifier
            case "Q" -> {
                flushCurrentSegment();
                inQuotemeta = true;
                caseModifiers.push(new CaseModifier("Q", false));
            }

            // Unknown escape - treat as literal character
            default -> appendToCurrentSegment("\\" + escape);
        }
    }

    /**
     * Processes escape sequences for double-quoted strings.
     *
     * <p>This method handles all escape sequences valid in double-quoted strings:
     * <ul>
     *   <li>Standard escapes: \n, \t, \r, etc.</li>
     *   <li>Literal escapes: \\, \"</li>
     *   <li>Octal escapes: \123</li>
     *   <li>Hex escapes: \x41, \x{263A}</li>
     *   <li>Control chars: \cA</li>
     *   <li>Unicode names: \N{LATIN SMALL LETTER A}</li>
     *   <li>Case modifiers: \U, \L, \\u, \l, \E</li>
     *   <li>Quotemeta: \Q...\E</li>
     * </ul>
     */
    private void parseDoubleQuotedEscapes() {
        var token = tokens.get(parser.tokenIndex);

        // Handle octal escapes (\123)
        // Octal escapes start with a digit 0-7
        if (token.type == LexerTokenType.NUMBER) {
            var octalStr = new StringBuilder(TokenUtils.consumeChar(parser));
            var chr = TokenUtils.peekChar(parser);
            // Collect up to 3 octal digits
            while (octalStr.length() < 3 && chr.compareTo("0") >= 0 && chr.compareTo("7") <= 0) {
                octalStr.append(TokenUtils.consumeChar(parser));
                chr = TokenUtils.peekChar(parser);
            }
            // Convert octal to character
            appendToCurrentSegment(String.valueOf((char) Integer.parseInt(octalStr.toString(), 8)));
            return;
        }

        // Consume the character after the backslash
        var escape = TokenUtils.consumeChar(parser);

        // Trailing backslash at end of string content — treat as literal \
        // This happens when $\ consumes a \ for the variable name, leaving
        // a lone \ before end-of-string with no escape partner.
        if (escape.isEmpty()) {
            appendToCurrentSegment("\\");
            return;
        }

        switch (escape) {
            // Standard escapes - convert to actual characters
            case "\\" -> appendToCurrentSegment("\\");
            case "\"" -> appendToCurrentSegment("\"");
            case "n" -> appendToCurrentSegment("\n");
            case "t" -> appendToCurrentSegment("\t");
            case "r" -> appendToCurrentSegment("\r");
            case "f" -> appendToCurrentSegment("\f");
            case "b" -> appendToCurrentSegment("\b");
            case "a" -> appendToCurrentSegment(String.valueOf((char) 7));  // ASCII bell
            case "e" -> appendToCurrentSegment(String.valueOf((char) 27)); // ASCII escape
            case "$" -> appendToCurrentSegment("$");

            // Control character: \cX
            case "c" -> {
                var controlChar = TokenUtils.consumeChar(parser);
                if (controlChar.isEmpty()) {
                    throw new PerlCompilerException(parser.tokenIndex, "Missing control char name in \\c", parser.ctx.errorUtil);
                }
                var c = controlChar.charAt(0);
                var result = (c >= 'A' && c <= 'Z') ? String.valueOf((char) (c - 'A' + 1))
                        : (c >= 'a' && c <= 'z') ? String.valueOf((char) (c - 'a' + 1))
                        : c == '@' ? String.valueOf((char) 0)
                        : (c >= '[' && c <= '_') ? String.valueOf((char) (c - '[' + 27))
                        : c == '?' ? String.valueOf((char) 127)
                        : String.valueOf(c);
                appendToCurrentSegment(result);
            }

            // Case modification end marker
            case "E" -> {
                // Flush any pending literal text
                flushCurrentSegment();
                // Pop and apply the most recent case modifier
                if (!caseModifiers.isEmpty()) {
                    applyCaseModifier(caseModifiers.pop());
                }
            }

            // Case modifiers
            case "U" -> startCaseModifier("U", false);  // Uppercase until \E
            case "L" -> startCaseModifier("L", false);  // Lowercase until \E
            case "F" -> startCaseModifier("F", false);  // Foldcase until \E
            case "u" -> startCaseModifier("u", true);   // Uppercase next char
            case "l" -> startCaseModifier("l", true);   // Lowercase next char

            // Quotemeta modifier
            case "Q" -> {
                flushCurrentSegment();
                inQuotemeta = true;
                caseModifiers.push(new CaseModifier("Q", false));
            }

            // Other escape sequences
            case "x" -> handleHexEscape();           // \x41 or \x{263A}
            case "o" -> handleOctalEscape();         // \o{100}
            case "N" -> handleUnicodeNameEscape();   // \N{UNICODE NAME}

            // Unknown escape - treat as literal character
            default -> appendToCurrentSegment(escape);
        }
    }

    /**
     * Starts a new case modifier.
     *
     * <p>This method handles the complex interaction between case modifiers:
     * <ul>
     *   <li>Flushes pending literal text before starting modifier</li>
     *   <li>Handles conflicts between \L and \U (they cancel each other)</li>
     *   <li>Pushes new modifier onto the stack</li>
     *   <li>Validates that conflicting modifiers have content between them</li>
     * </ul>
     *
     * @param type         The modifier type ("U", "L", "u", or "l")
     * @param isSingleChar True for \\u and \l (affect only next character)
     */
    private void startCaseModifier(String type, boolean isSingleChar) {
        // Flush any pending literal text
        flushCurrentSegment();

        // Handle conflicting modifiers
        // \L and \U cancel each other out when they meet
        if (!caseModifiers.isEmpty()) {
            var top = caseModifiers.peek();
            if ((top.type.equals("L") && type.equals("U")) ||
                    (top.type.equals("U") && type.equals("L"))) {
                // Check if there's no content between the modifiers
                if (top.segments.isEmpty() && currentSegment.isEmpty()) {
                    // Perl doesn't allow \L\U or \U\L with no content between
                    throw new RuntimeException("syntax error: \\" + top.type + "\\" + type + " is not allowed");
                }
                // Apply the previous modifier before starting the new one
                applyCaseModifier(caseModifiers.pop());
            }
        }

        // Push the new modifier onto the stack
        caseModifiers.push(new CaseModifier(type, isSingleChar));
    }

    /**
     * Simple case modifier tracking class.
     *
     * <p>This class tracks:
     * <ul>
     *   <li>The type of modifier (U, L, u, l, Q)</li>
     *   <li>Whether it's single-character (u, l) or range-based (U, L, Q)</li>
     *   <li>All segments affected by this modifier</li>
     *   <li>Whether single-char modifiers have affected any content</li>
     * </ul>
     */
    private static class CaseModifier {
        /**
         * The modifier type: "U", "L", "u", "l", or "Q"
         */
        final String type;

        /**
         * True for \\u and \l (single character), false for \U, \L, and \Q (ranges)
         */
        final boolean isSingleChar;

        /**
         * List of segments affected by this modifier
         */
        final List<Node> segments = new ArrayList<>();

        /**
         * For single-char modifiers, tracks if they've modified anything yet
         */
        boolean hasAffectedContent = false;

        /**
         * Creates a new case modifier.
         *
         * @param type         The modifier type
         * @param isSingleChar Whether this is a single-character modifier
         */
        CaseModifier(String type, boolean isSingleChar) {
            this.type = type;
            this.isSingleChar = isSingleChar;
        }

        /**
         * Adds a segment to this modifier's scope.
         *
         * @param node The segment to track
         */
        void addSegment(Node node) {
            segments.add(node);
            // Single-char modifiers are immediately marked as having affected content
            if (isSingleChar) {
                hasAffectedContent = true;
            }
        }
    }
}