Skip to content

Commit 59073f2

Browse files
committed
[WIP] Parser重写,支持StrictLevel解析严格程度(自定义ErrorStrategy),更好的ErrorListener等
1 parent 0183cec commit 59073f2

7 files changed

Lines changed: 328 additions & 8 deletions

File tree

i18n/Locale.Designer.cs

Lines changed: 72 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

i18n/Locale.resx

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,4 +178,28 @@
178178
<data name="ConnectingSlideIn103" xml:space="preserve">
179179
<value>The chart contains features not supported by MA2 1.03 (multi-segment connecting slides). Only the first slide segment was kept.</value>
180180
</data>
181+
<data name="AntlrUnknownError" xml:space="preserve">
182+
<value>Simai parser error: {0}</value>
183+
</data>
184+
<data name="LexerNoViableAltException" xml:space="preserve">
185+
<value>Unrecognized character: {0} (typo?). Ignored.</value>
186+
</data>
187+
<data name="LexerNoViableAltExceptionStrict" xml:space="preserve">
188+
<value>Unrecognized character: {0} (typo?)</value>
189+
</data>
190+
<data name="RecoverInlineExtraneousToken" xml:space="preserve">
191+
<value>Extraneous token: {0} (typo?). Ignored.</value>
192+
</data>
193+
<data name="RecoverInlineMissingToken" xml:space="preserve">
194+
<value>Missing token {1} before {0} (typo?). Inserted automatically.</value>
195+
</data>
196+
<data name="InputMismatchException" xml:space="preserve">
197+
<value>Unexpected token: {0}. Expected: {1}</value>
198+
</data>
199+
<data name="NoViableAltException" xml:space="preserve">
200+
<value>Could not parse chart markup: {0}</value>
201+
</data>
202+
<data name="LaxTryfixReminder" xml:space="preserve">
203+
<value>. The affected note(s) was discarded.</value>
204+
</data>
181205
</root>

i18n/Locale.zh-hans.resx

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,4 +178,28 @@
178178
<data name="ConnectingSlideIn103" xml:space="preserve">
179179
<value>谱面中含有MA2 1.03所不支持的特性(多段连接的星星)。仅留下第一段星星。</value>
180180
</data>
181+
<data name="AntlrUnknownError" xml:space="preserve">
182+
<value>Simai解析器发生错误: {0}</value>
183+
</data>
184+
<data name="LexerNoViableAltException" xml:space="preserve">
185+
<value>无法解析的字符: {0} (是不是手滑按错了?),已忽略。</value>
186+
</data>
187+
<data name="LexerNoViableAltExceptionStrict" xml:space="preserve">
188+
<value>无法解析的字符: {0} (是不是手滑按错了?)</value>
189+
</data>
190+
<data name="RecoverInlineExtraneousToken" xml:space="preserve">
191+
<value>多余的符号: {0} (是不是手滑按错了?),已忽略该符号。</value>
192+
</data>
193+
<data name="RecoverInlineMissingToken" xml:space="preserve">
194+
<value>在 {0} 前似乎缺失了符号 {1} (是不是漏打了?),已尝试帮您补全。</value>
195+
</data>
196+
<data name="InputMismatchException" xml:space="preserve">
197+
<value>遇到了不应在这里出现的符号: {0} ,这里应该出现的是 {1} </value>
198+
</data>
199+
<data name="NoViableAltException" xml:space="preserve">
200+
<value>无法解析的谱面标记: {0} </value>
201+
</data>
202+
<data name="LaxTryfixReminder" xml:space="preserve">
203+
<value>。出问题的音符已被丢弃。</value>
204+
</data>
181205
</root>

i18n/Locale.zh-hant.resx

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,4 +178,28 @@
178178
<data name="ConnectingSlideIn103" xml:space="preserve">
179179
<value>譜面中含有 MA2 1.03 所不支援的特性(多段連接的星星)。僅保留第一段星星。</value>
180180
</data>
181+
<data name="AntlrUnknownError" xml:space="preserve">
182+
<value>Simai 解析器發生錯誤: {0}</value>
183+
</data>
184+
<data name="LexerNoViableAltException" xml:space="preserve">
185+
<value>無法解析的字元: {0}(是否手誤?),已忽略。</value>
186+
</data>
187+
<data name="LexerNoViableAltExceptionStrict" xml:space="preserve">
188+
<value>無法解析的字元: {0}(是否手誤?)</value>
189+
</data>
190+
<data name="RecoverInlineExtraneousToken" xml:space="preserve">
191+
<value>多餘的符號: {0}(是否手誤?),已忽略該符號。</value>
192+
</data>
193+
<data name="RecoverInlineMissingToken" xml:space="preserve">
194+
<value>在 {0} 前似乎缺失了符號 {1}(是否漏打?),已嘗試為您補全。</value>
195+
</data>
196+
<data name="InputMismatchException" xml:space="preserve">
197+
<value>遇到了不應在這裡出現的符號: {0} ,這裡應該出現的是 {1} </value>
198+
</data>
199+
<data name="NoViableAltException" xml:space="preserve">
200+
<value>無法解析的譜面標記: {0} </value>
201+
</data>
202+
<data name="LaxTryfixReminder" xml:space="preserve">
203+
<value>。出問題的音符已被捨棄。</value>
204+
</data>
181205
</root>

parser/simai/ErrorStrategy.cs

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
using Antlr4.Runtime;
2+
using Antlr4.Runtime.Misc;
3+
using MuConvert.Antlr;
4+
using MuConvert.utils;
5+
using static MuConvert.utils.Alert.LEVEL;
6+
7+
namespace MuConvert.parser;
8+
9+
public class ErrorListener(SimaiParser simaiParser): BaseErrorListener, IAntlrErrorListener<int>
10+
{
11+
// 语法分析的错误报告函数
12+
public override void SyntaxError(TextWriter output, IRecognizer recognizer, IToken offendingSymbol, int line, int charPositionInLine, string msg, RecognitionException? e)
13+
{
14+
var parser = (Parser)recognizer;
15+
if (e == null)
16+
{ // 被recoverInline了。此时给个警告就可以了。(PS:只有在宽松模式下才会触发,严格模式是禁止recoverInline、直接丢InputMismatch的)
17+
if (msg.StartsWith("extraneous"))
18+
{
19+
simaiParser.alerts.Add(new Alert(Warning,
20+
string.Format(Locale.RecoverInlineExtraneousToken, GetTokenErrorDisplay(offendingSymbol)),
21+
line: line, relevantNote: _contextText(parser.Context)));
22+
return;
23+
}
24+
else if (msg.StartsWith("missing"))
25+
{
26+
simaiParser.alerts.Add(new Alert(Warning,
27+
string.Format(Locale.RecoverInlineMissingToken, GetTokenErrorDisplay(offendingSymbol), parser.GetExpectedTokens().ToString(parser.Vocabulary)),
28+
line: line, relevantNote: _contextText(parser.Context)));
29+
return;
30+
}
31+
}
32+
33+
Alert.LEVEL level = simaiParser.StrictLevel == SimaiParser.StrictLevelEnum.Lax ? Warning : Error;
34+
string msgPostFix = simaiParser.StrictLevel == SimaiParser.StrictLevelEnum.Lax ? Locale.LaxTryfixReminder : "";
35+
string message;
36+
switch (e)
37+
{
38+
case InputMismatchException:
39+
message = string.Format(Locale.InputMismatchException, GetTokenErrorDisplay(offendingSymbol), e.GetExpectedTokens().ToString(recognizer.Vocabulary));
40+
message += msgPostFix;
41+
break;
42+
case NoViableAltException ne:
43+
var iSt = (ITokenStream)parser.InputStream;
44+
message = string.Format(Locale.NoViableAltException, EscapeWSAndQuote(iSt == null ? "<unknown input>" : (ne.StartToken.Type != -1 ? iSt.GetText(ne.StartToken, ne.OffendingToken) : "<EOF>")));
45+
message += msgPostFix;
46+
break;
47+
default:
48+
message = string.Format(Locale.AntlrUnknownError, msg);
49+
break;
50+
}
51+
simaiParser.alerts.Add(new Alert(level, message, line: line, relevantNote: _contextText(parser.Context)));
52+
}
53+
54+
// 词法分析的错误报告函数
55+
public void SyntaxError(TextWriter output, IRecognizer recognizer, int offendingSymbol, int line, int charPositionInLine,
56+
string msg, RecognitionException e)
57+
{
58+
var lexer = (Lexer)recognizer;
59+
// 遵照Lexer.NotifyListeners的实现写出的,获得出错的token的详细内容的代码。
60+
var input = (ICharStream)lexer.InputStream;
61+
string errContent = lexer.GetErrorDisplay(input.GetText(Interval.Of(lexer.TokenStartCharIndex, input.Index)));
62+
63+
if (simaiParser.StrictLevel == SimaiParser.StrictLevelEnum.Strict)
64+
{ // 严格模式下,不准恢复,抛异常
65+
simaiParser.alerts.Add(new Alert(Error,
66+
string.Format(Locale.LexerNoViableAltExceptionStrict, errContent),
67+
line: line, relevantNote: errContent));
68+
throw new ParseCanceledException(e);
69+
}
70+
simaiParser.alerts.Add(new Alert(Warning,
71+
string.Format(Locale.LexerNoViableAltException, errContent),
72+
line: line, relevantNote: errContent));
73+
}
74+
75+
// 从context获得为适合放进relevantNote里的形式
76+
private string? _contextText(RuleContext? context)
77+
{
78+
while (true)
79+
{
80+
if (context == null) return null;
81+
if (context.GetText().Length >= 5) return context.GetText();
82+
context = context.Parent;
83+
}
84+
}
85+
86+
# region 用于暴露DefaultErrorStrategy内部的GetTokenErrorDisplay函数
87+
private class _ES : DefaultErrorStrategy
88+
{
89+
public new string GetTokenErrorDisplay(IToken t) => base.GetTokenErrorDisplay(t);
90+
public new string EscapeWSAndQuote(string s) => base.EscapeWSAndQuote(s);
91+
}
92+
private _ES _es = new(); // 仅用作调用里面的方法
93+
private string GetTokenErrorDisplay(IToken t) => _es.GetTokenErrorDisplay(t);
94+
private string EscapeWSAndQuote(string s) => _es.EscapeWSAndQuote(s);
95+
# endregion
96+
}
97+
98+
/**
99+
* 最宽松的ErrorStrategy,尽全力恢复不让谱面整个垮掉
100+
*/
101+
public class LaxErrorStrategy : DefaultErrorStrategy
102+
{
103+
protected override IToken SingleTokenDeletion(Parser recognizer)
104+
{
105+
if (recognizer.CurrentToken?.Type == SimaiLexer.COMMA) return null!; // 不准删逗号
106+
return base.SingleTokenDeletion(recognizer);
107+
}
108+
109+
private HashSet<int> insertionForbidden = [
110+
SimaiLexer.COMMA, SimaiLexer.KEY, SimaiLexer.SLIDE_TYPE, SimaiLexer.TOUCH_AREA, SimaiLexer.INT,
111+
SimaiLexer.CHART_END, SimaiLexer.MODIFIER, SimaiLexer.FALSE_EACH
112+
]; // 逗号,和不确定的可能引起歧义的符号,一律不允许补充
113+
114+
protected override IToken GetMissingSymbol(Parser recognizer)
115+
{
116+
IToken currentToken = recognizer.CurrentToken;
117+
118+
// 不准插入insertionForbidden里提到的元素
119+
var insertionCandidates = GetExpectedTokens(recognizer).ToList().Where(x => !insertionForbidden.Contains(x)).ToList();
120+
if (insertionCandidates.Count == 0) throw new InputMismatchException(recognizer); // 等价于SingleTokenInsertion返回false的情况,recoverInline失败、转交给上层recover处理
121+
int minElement = insertionCandidates[0];
122+
123+
string tokenText = minElement != -1 ? $"<missing {recognizer.Vocabulary.GetDisplayName(minElement)}>" : "<missing EOF>";
124+
IToken current = currentToken;
125+
IToken token = ((ITokenStream) recognizer.InputStream).LT(-1);
126+
if (current.Type == -1 && token != null)
127+
current = token;
128+
return this.ConstructToken(((ITokenStream) recognizer.InputStream).TokenSource, minElement, tokenText, current);
129+
}
130+
131+
private static Dictionary<string, int> _literals = Enumerable.Range(1, SimaiLexer.ruleNames.Length)
132+
.ToDictionary(i => SimaiLexer.DefaultVocabulary.GetLiteralName(i), i => i);
133+
private List<int> recoverySetAllowed = [
134+
SimaiLexer.COMMA, SimaiLexer.FALSE_EACH, _literals["'/'"], _literals["'('"], _literals["'{'"]
135+
]; // recover时,为了确保整个吞掉不合法的音符,而不是出现残缺的东西导致parser报错,只准同步到上面这些字符当中
136+
137+
public override void Recover(Parser recognizer, RecognitionException e)
138+
{
139+
if (this.lastErrorIndex == recognizer.InputStream.Index && this.lastErrorStates != null && this.lastErrorStates.Contains(recognizer.State))
140+
recognizer.Consume();
141+
this.lastErrorIndex = recognizer.InputStream.Index;
142+
if (this.lastErrorStates == null)
143+
this.lastErrorStates = new IntervalSet(Array.Empty<int>());
144+
this.lastErrorStates.Add(recognizer.State);
145+
IntervalSet errorRecoverySet = this.GetErrorRecoverySet(recognizer);
146+
147+
// 和上面的recoverySetAllowed取交集
148+
errorRecoverySet = new IntervalSet(errorRecoverySet.ToList().Where(x => recoverySetAllowed.Contains(x)).ToArray());
149+
150+
this.ConsumeUntil(recognizer, errorRecoverySet);
151+
}
152+
}
153+
154+
/**
155+
* 允许recoverInline,但是禁止大范围recover。
156+
*/
157+
public class ModerateErrorStrategy : LaxErrorStrategy
158+
{
159+
private BailErrorStrategy _bail = new();
160+
161+
public override void Recover(Parser recognizer, RecognitionException e) => _bail.Recover(recognizer, e); // 不准recover,只准recoverInline
162+
}

parser/simai/Simai.g4

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ options { language=CSharp; }
1212
// ---------------------------------------------------------------------------
1313

1414
WS: [ \t\r\n]+ -> channel(HIDDEN);
15+
COMMENT: '||' ~[\r\n]* -> channel(HIDDEN);
16+
17+
COMMA: ',';
1518

1619
TAP_TO_STAR: '$$' | '$';
1720
STAR_TO_TAP: '@';
@@ -35,20 +38,21 @@ modifiers: (MODIFIER | TAP_TO_STAR)*;
3538
// 语法
3639
// ---------------------------------------------------------------------------
3740

38-
chart: (notations ',')* CHART_END? EOF;
41+
chart: (notations COMMA)* CHART_END? EOF;
3942

4043
// 同一时刻的所有标记,包括note标记、bpm标记等等
4144
notations: (bpmTag | absulouteStepTag | metTag)* noteGroup?;
4245

4346
noteGroup: note eachNote*;
44-
eachSeparators: ('/' | '`')+;
47+
FALSE_EACH: '`';
48+
eachSeparators: '/' | FALSE_EACH+;
4549
eachNote: eachSeparators note;
4650

4751
bpmTag: '(' number ')';
4852
absulouteStepTag: '{' '#' number '}';
4953
metTag: '{' int '}';
5054

51-
note: slide (sharedHeadSlide)* | tap+ | hold | touch | touchHold; // tap+是因为,simai允许123这种语法、和1/2/3是等价的,但仅限tap之间。
55+
note: slide (sharedHeadSlide)* | tap | KEY+ | hold | touch | touchHold; // tap+是因为,simai允许123这种语法、和1/2/3是等价的,但仅限tap之间。
5256

5357
tap: KEY modifiers;
5458

0 commit comments

Comments
 (0)