PerlOnJava/src/main/java/org/perlonjava/frontend/parser/TokenUtils.java at 8ee80477265d9c3a6d74af70ba0b83610bf41ddf · fglock/PerlOnJava · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
package org.perlonjava.frontend.parser;

import org.perlonjava.app.cli.CompilerOptions;

import org.perlonjava.frontend.lexer.LexerToken;
import org.perlonjava.frontend.lexer.LexerTokenType;
import org.perlonjava.runtime.runtimetypes.PerlCompilerException;

import java.util.List;

/**
 * The TokenUtils class provides utility methods for handling and manipulating
 * lexer tokens during parsing. It includes methods for converting tokens to text,
 * peeking at the next token, and consuming tokens with specific types or text.
 */
public class TokenUtils {

    /**
     * Converts a range of tokens into a single string of text, excluding EOF tokens.
     *
     * @param tokens    The list of LexerToken objects to process.
     * @param codeStart The starting index in the list of tokens.
     * @param codeEnd   The ending index in the list of tokens.
     * @return A string representing the concatenated text of the specified token range.
     */
    public static String toText(List<LexerToken> tokens, int codeStart, int codeEnd) {
        StringBuilder sb = new StringBuilder();
        codeStart = Math.max(codeStart, 0);
        codeEnd = Math.min(codeEnd, tokens.size() - 1);
        for (int i = codeStart; i <= codeEnd; i++) {
            LexerToken tok = tokens.get(i);
            if (tok.type != LexerTokenType.EOF) {
                sb.append(tok.text);
            }
        }
        return sb.toString();
    }

    /**
     * Peeks at the next non-whitespace token in the parser's token list without consuming it.
     * Whitespace is consumed.
     *
     * @param parser The parser containing the token list and current token index.
     * @return The next non-whitespace LexerToken, or an EOF token if the end of the list is reached.
     */
    public static LexerToken peek(Parser parser) {
        parser.tokenIndex = Whitespace.skipWhitespace(parser, parser.tokenIndex, parser.tokens);
        if (parser.tokenIndex >= parser.tokens.size()) {
            return new LexerToken(LexerTokenType.EOF, "");
        }
        return parser.tokens.get(parser.tokenIndex);
    }

    /**
     * Consumes a single character from the current token in the parser's token list.
     * If the token contains only one character, it advances to the next token.
     *
     * @param parser The parser containing the token list and current token index.
     * @return The consumed character as a string.
     */
    public static String consumeChar(Parser parser) {
        String str;
        if (parser.tokenIndex >= parser.tokens.size()) {
            str = "";
        } else {
            LexerToken token = parser.tokens.get(parser.tokenIndex);
            if (token.type == LexerTokenType.EOF) {
                str = "";
            } else if (token.text.length() == 1) {
                str = token.text;
                parser.tokenIndex++;
            } else {
                str = token.text.substring(0, 1);
                token.text = token.text.substring(1);
                if (CompilerOptions.DEBUG_ENABLED) parser.ctx.logDebug("consumeChar left: " + token);
                if (token.text.equals("=")) {
                    LexerToken next = parser.tokens.get(parser.tokenIndex + 1);
                    if (next.text.equals("=")) {
                        next.text = "==";
                        parser.tokenIndex++;
                        if (CompilerOptions.DEBUG_ENABLED) parser.ctx.logDebug("consumeChar resync: " + TokenUtils.peek(parser));
                    }
                }
            }
        }
        return str;
    }

    /**
     * Peeks at the next character in the current token in the parser's token list without consuming it.
     *
     * @param parser The parser containing the token list and current token index.
     * @return The next character as a string, or an empty string if the end of the list is reached.
     */
    public static String peekChar(Parser parser) {
        String str;
        if (parser.tokenIndex >= parser.tokens.size()) {
            str = "";
        } else {
            LexerToken token = parser.tokens.get(parser.tokenIndex);
            if (token.type == LexerTokenType.EOF) {
                str = "";
            } else if (token.text.length() == 1) {
                str = token.text;
            } else {
                str = token.text.substring(0, 1);
            }
        }
        return str;
    }

    /**
     * Consumes the next non-whitespace token in the parser's token list.
     *
     * @param parser The parser containing the token list and current token index.
     * @return The consumed LexerToken, or an EOF token if the end of the list is reached.
     */
    public static LexerToken consume(Parser parser) {
        parser.tokenIndex = Whitespace.skipWhitespace(parser, parser.tokenIndex, parser.tokens);
        if (parser.tokenIndex >= parser.tokens.size()) {
            return new LexerToken(LexerTokenType.EOF, "");
        }
        return parser.tokens.get(parser.tokenIndex++);
    }

    /**
     * Consumes the next non-whitespace token in the parser's token list and checks its type.
     * Throws an exception if the token type does not match the expected type.
     *
     * @param parser The parser containing the token list and current token index.
     * @param type   The expected LexerTokenType of the token to consume.
     * @return The consumed LexerToken.
     * @throws PerlCompilerException if the token type does not match the expected type.
     */
    public static LexerToken consume(Parser parser, LexerTokenType type) {
        LexerToken token = consume(parser);
        if (token.type != type) {
            throw new PerlCompilerException(
                    parser.tokenIndex, "syntax error", parser.ctx.errorUtil);
        }
        return token;
    }

    /**
     * Consumes the next non-whitespace token in the parser's token list and checks its type and text.
     * Throws an exception if the token type or text does not match the expected values.
     *
     * @param parser The parser containing the token list and current token index.
     * @param type   The expected LexerTokenType of the token to consume.
     * @param text   The expected text of the token to consume.
     * @throws PerlCompilerException if the token type or text does not match the expected values.
     */
    public static void consume(Parser parser, LexerTokenType type, String text) {
        LexerToken token = consume(parser);
        if (token.type != type || !token.text.equals(text)) {
            throw new PerlCompilerException(
                    parser.tokenIndex,
                    "syntax error",
                    parser.ctx.errorUtil);
        }
    }
}