Skip to content

Commit a06bdff

Browse files
committed
[+] 基于Token预处理(词法分析后、语法分析前)的方式,实现对“对星星头的修饰符,应该出现在键位号后、星星类型标记之前。”的错误处理,和类似括号的文法内方式实现对多余的双押符号的错误处理
1 parent 6ce6a44 commit a06bdff

9 files changed

Lines changed: 113 additions & 39 deletions

File tree

i18n/Locale.Designer.cs

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

i18n/Locale.resx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,4 +208,7 @@
208208
<data name="Fixed" xml:space="preserve">
209209
<value>. Fixed automatically.</value>
210210
</data>
211+
<data name="FixModifiersOnHead" xml:space="preserve">
212+
<value>For Slide's head, modifiers should appear after the key number and before the slide type notation.</value>
213+
</data>
211214
</root>

i18n/Locale.zh-hans.resx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,4 +208,7 @@
208208
<data name="Fixed" xml:space="preserve">
209209
<value>。已自动为您修复。</value>
210210
</data>
211+
<data name="FixModifiersOnHead" xml:space="preserve">
212+
<value>对星星头的修饰符,应该出现在键位号后、星星类型标记之前</value>
213+
</data>
211214
</root>

i18n/Locale.zh-hant.resx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,4 +208,7 @@
208208
<data name="Fixed" xml:space="preserve">
209209
<value>。已自動為您修復。</value>
210210
</data>
211+
<data name="FixModifiersOnHead" xml:space="preserve">
212+
<value>對星星頭的修飾符,應該出現在鍵位號後、星星類型標記之前</value>
213+
</data>
211214
</root>

parser/simai/ErrorStrategy.cs

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
using Antlr4.Runtime;
22
using Antlr4.Runtime.Misc;
3-
using MuConvert.Antlr;
43
using MuConvert.utils;
54
using static MuConvert.utils.Alert.LEVEL;
65
using L = MuConvert.Antlr.SimaiLexer;
76
using P = MuConvert.Antlr.SimaiParser;
7+
using Utils = MuConvert.utils.Utils;
88

99
namespace MuConvert.parser.simai;
1010

@@ -112,7 +112,7 @@ protected override IToken SingleTokenDeletion(Parser recognizer)
112112
L.KEY, L.SLIDE_TYPE, L.TOUCH_AREA, L.INT, L.CHART_END, L.FALSE_EACH,
113113
L.MODIFIER, L.NO_STAR, L.STAR_TO_TAP, L.TAP_TO_STAR
114114
]; // 不确定的可能引起歧义的符号,一律不允许补充
115-
private HashSet<int> insertCommaOnlyWhen = [_literals["("], _literals["{"]];
115+
private HashSet<int> insertCommaOnlyWhen = [Utils.TokenType("("), Utils.TokenType("{")];
116116

117117
protected override IToken GetMissingSymbol(Parser recognizer)
118118
{
@@ -144,11 +144,8 @@ protected override void ReportMissingToken(Parser recognizer)
144144
catch (InputMismatchException) {} // ignored
145145
}
146146

147-
private static Dictionary<string, int> _literals = Enumerable.Range(1, SimaiLexer.ruleNames.Length)
148-
.Where(i=>SimaiLexer.DefaultVocabulary.GetLiteralName(i) != null)
149-
.ToDictionary(i => SimaiLexer.DefaultVocabulary.GetLiteralName(i)[1..^1], i => i);
150147
private List<int> recoverySetAllowed = [
151-
L.COMMA, L.FALSE_EACH, _literals["/"], _literals["("], _literals["{"]
148+
L.COMMA, L.FALSE_EACH, Utils.TokenType("/"), Utils.TokenType("("), Utils.TokenType("{")
152149
]; // recover时,为了确保整个吞掉不合法的音符,而不是出现残缺的东西导致parser报错,只准同步到上面这些字符当中
153150

154151
public override void Recover(Parser recognizer, RecognitionException e)
@@ -178,7 +175,7 @@ protected virtual bool SpecificRecover(P parser, RecognitionException e)
178175
var ctx = parser.Context;
179176
var rule = ctx.RuleIndex;
180177
if (rule == P.RULE_beats && e is InputMismatchException &&
181-
e.OffendingToken.Text == "-" && e.GetExpectedTokens().Contains(_literals[":"]))
178+
e.OffendingToken.Text == "-" && e.GetExpectedTokens().Contains(Utils.TokenType(":")))
182179
{ // [4:1]中,错把:打成-了
183180
simaiParser.alerts.Last().Level = Warning; // Error改为Warning,因为恢复了
184181
simaiParser.alerts.Last().Description += Locale.Fixed;

parser/simai/Simai.g4

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,7 @@ notations: (bpmTag | absulouteStepTag | metTag)* noteGroup?;
4747

4848
noteGroup: note eachNote*;
4949
FALSE_EACH: '`';
50-
eachSeparators: '/' | FALSE_EACH+;
51-
eachNote: eachSeparators note;
50+
eachNote: (sep+=('/' | FALSE_EACH))+ note;
5251

5352
bpmTag: (lp+='(')+ number (rp+=')')+;
5453
absulouteStepTag: (lp+='{')+ '#' number (rp+='}')+;

parser/simai/SimaiParser.cs

Lines changed: 72 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using System.Text.RegularExpressions;
22
using Antlr4.Runtime;
3+
using Antlr4.Runtime.Misc;
34
using Antlr4.Runtime.Tree;
45
using MuConvert.Antlr;
56
using MuConvert.chart;
@@ -8,6 +9,8 @@
89
using Rationals;
910
using static MuConvert.utils.Alert.LEVEL;
1011
using P = MuConvert.Antlr.SimaiParser;
12+
using L = MuConvert.Antlr.SimaiLexer;
13+
using Utils = MuConvert.utils.Utils;
1114

1215
namespace MuConvert.parser;
1316

@@ -68,6 +71,48 @@ private string Preprocess(string text)
6871
return text;
6972
}
7073

74+
/**
75+
* 对词法分析得到的token流,在送入parser之前进行一些处理,以尝试修复一些特定类型的错误:
76+
* - 对星星头的修饰符,应该出现在键位号后、星星类型标记之前
77+
*/
78+
private CommonTokenStream TokenProcess(CommonTokenStream src)
79+
{
80+
List<Alert> alertsBuf = [];
81+
82+
src.Fill();
83+
var tokens = src.GetTokens().Index().Where(x=>x.Item.Channel == TokenConstants.DefaultChannel).ToList();
84+
var r = new TokenStreamRewriter(src);
85+
bool modified = false;
86+
for (int i = 0; i < tokens.Count - 1; i++)
87+
{
88+
var (idx, token) = tokens[i];
89+
if (token.Type == L.SLIDE_TYPE &&
90+
(i < tokens.Count - 1 && Utils.IsModifier(tokens[i+1].Item.Type)) && // SlideType后面接了modifier
91+
(i >= 2 && tokens[i-1].Item.Type == L.KEY && tokens[i-2].Item.Type != L.SLIDE_TYPE)) // 判断是否是星星的首个slidetype
92+
{ // 类似1-b2[2:1]这种,星星头的修饰符错误地出现在了首个slidetype后面的情况。
93+
// 找到modifier的结束位置
94+
int endPos = i+1;
95+
while (Utils.IsModifier(tokens[endPos + 1].Item.Type)) endPos++;
96+
// 将tokens[i]挪到endPos后面去
97+
r.Delete(idx);
98+
r.InsertAfter(tokens[endPos].Index, token.Text);
99+
alertsBuf.Add(new Alert(Warning, Locale.FixModifiersOnHead + Locale.Fixed, line: token.Line,
100+
relevantNote: src.GetText(tokens[i-1].Item, tokens[endPos + 1].Item)));
101+
modified = true;
102+
}
103+
}
104+
105+
if (!modified) return src;
106+
// 做过更改,则要重跑lexer
107+
alerts.Clear(); // 清空上次跑lexer时的报错,避免重复报错
108+
alerts.AddRange(alertsBuf);
109+
var inputStream = new AntlrInputStream(r.GetText());
110+
var lexer = new SimaiLexer(inputStream);
111+
lexer.RemoveErrorListeners();
112+
lexer.AddErrorListener(new ErrorListener(this));
113+
return new CommonTokenStream(lexer);
114+
}
115+
71116
public (Chart, List<Alert>) Parse(string text)
72117
{
73118
if (now != 0) throw new Exception(Locale.InstanceMultipleUsage);
@@ -82,13 +127,14 @@ private string Preprocess(string text)
82127
lexer.RemoveErrorListeners();
83128
lexer.AddErrorListener(new ErrorListener(this));
84129
var tokens = new CommonTokenStream(lexer);
130+
if (StrictLevel != StrictLevelEnum.Strict) tokens = TokenProcess(tokens);
85131

86132
var parser = new P(tokens) { ErrorHandler = ErrorStrategy() }; // MuConvert.Antlr.SimaiParser
87133
parser.RemoveErrorListeners();
88134
parser.AddErrorListener(new ErrorListener(this));
89135
root = parser.chart();
90136
}
91-
catch (Antlr4.Runtime.Misc.ParseCanceledException e)
137+
catch (ParseCanceledException e)
92138
{ // ErrorListener里会把alerts加好的,因此这里直接抛异常就可以了。
93139
throw new ConversionException(alerts, e);
94140
}
@@ -191,7 +237,7 @@ public sealed override object VisitNotations(P.NotationsContext context)
191237
return true;
192238
}
193239

194-
private void WarnMoreParentheses(IList<IToken> ps)
240+
private void WarnMoreThanOneTokens(IList<IToken> ps)
195241
{
196242
if (ps.Count <= 1) return;
197243
var extraStr = "'" + string.Join("", ps.Skip(1).Select(x => x.Text)) + "'";
@@ -205,8 +251,8 @@ private void WarnMoreParentheses(IList<IToken> ps)
205251

206252
private void WarnMoreParentheses(IList<IToken> lp, IList<IToken> rp)
207253
{
208-
WarnMoreParentheses(lp);
209-
WarnMoreParentheses(rp);
254+
WarnMoreThanOneTokens(lp);
255+
WarnMoreThanOneTokens(rp);
210256
}
211257

212258
public sealed override object VisitAbsulouteStepTag(P.AbsulouteStepTagContext context)
@@ -263,25 +309,26 @@ public sealed override object VisitNoteGroup(P.NoteGroupContext context)
263309
{
264310
noteC = c2.note();
265311

266-
var separators = c2.eachSeparators().GetText()!;
267-
if (separators[0] == '`')
312+
var separators = c2._sep;
313+
if (separators.Count >= 2 && separators.All(x=>x.Type == L.FALSE_EACH))
268314
{
269-
if (separators.Length >= 2)
270-
{
271-
// 出现连续多个反引号的情况,如"2``3"。
272-
// 这并不是标准的simai语法。但是,MajdataView中对此提供了支持,将每个`实现为128分音。
273-
// 因此,我们也支持这一特性,在遇到大于一个`时,不实现成FalseEachIndex,而是直接给予相同的实现、每个`错后128分音。
274-
var length = separators.Length * new Rational(1, 128);
275-
now = (now + length).CanonicalForm;
276-
extendedFalseEach += length;
277-
falseEachIdx = 0;
278-
if (!extendedFalseEachWarned)
279-
{
280-
AddAlert(Warning, Locale.ExtenedFalseEach, context);
281-
extendedFalseEachWarned = true;
282-
}
315+
// 出现连续多个反引号的情况,如"2``3"。
316+
// 这并不是标准的simai语法。但是,MajdataView中对此提供了支持,将每个`实现为128分音。
317+
// 因此,我们也支持这一特性,在遇到大于一个`时,不实现成FalseEachIndex,而是直接给予相同的实现、每个`错后128分音。
318+
var length = separators.Count * new Rational(1, 128);
319+
now = (now + length).CanonicalForm;
320+
extendedFalseEach += length;
321+
falseEachIdx = 0;
322+
if (!extendedFalseEachWarned)
323+
{
324+
AddAlert(Warning, Locale.ExtenedFalseEach, context);
325+
extendedFalseEachWarned = true;
283326
}
284-
else falseEachIdx++;
327+
}
328+
else
329+
{
330+
WarnMoreThanOneTokens(separators);
331+
if (separators[0].Type == L.FALSE_EACH) falseEachIdx++;
285332
}
286333
}
287334
else throw Utils.Fail();
@@ -328,7 +375,7 @@ public sealed override object VisitNote(P.NoteContext context)
328375
case P.SharedHeadSlideContext shSlideC:
329376
note = (Slide)VisitSharedHeadSlide(shSlideC);
330377
break;
331-
case ITerminalNode n when n.Symbol.Type == SimaiLexer.KEY:
378+
case ITerminalNode n when n.Symbol.Type == L.KEY:
332379
note = new Tap(chart, now) { Key = int.Parse(n.GetText())};
333380
break;
334381
default:
@@ -384,7 +431,7 @@ public sealed override object VisitTap(P.TapContext context)
384431
Key = int.Parse(context.KEY().GetText())
385432
};
386433
ApplyModifiers([context.modifiers()], result);
387-
if (context.Parent is not P.SlideContext && GetModifier(SimaiLexer.TAP_TO_STAR)) result = new Star(result); // 发现了”TAP_TO_STAR“的标记,把Tap转换为星星
434+
if (context.Parent is not P.SlideContext && GetModifier(L.TAP_TO_STAR)) result = new Star(result); // 发现了”TAP_TO_STAR“的标记,把Tap转换为星星
388435
return result;
389436
}
390437

@@ -541,12 +588,12 @@ public sealed override object VisitSlide(P.SlideContext context)
541588

542589
// 处理星星头
543590
Tap? head = (Tap)VisitTap(context.tap());
544-
if (GetModifier(SimaiLexer.NO_STAR))
591+
if (GetModifier(L.NO_STAR))
545592
{ // 标记了NO_STAR的星星,则不要放head、但是需要手动设置Key
546593
result.Key = head.Key;
547594
head = null;
548595
}
549-
else if (!GetModifier(SimaiLexer.STAR_TO_TAP)) head = new Star(head); // 除非标记了STAR_TO_TAP,否则把tap转为star
596+
else if (!GetModifier(L.STAR_TO_TAP)) head = new Star(head); // 除非标记了STAR_TO_TAP,否则把tap转为star
550597
result.OwnHead = head;
551598

552599
currNote = result;

tests/Simai预处理纠错测试.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,8 @@ public static IEnumerable<object[]> TryFix_Cases()
9595
yield return
9696
[
9797
"多打了一个双押符号",
98-
"(120){4}1//2,2//3,4,E",
99-
"(120){4}1/2,2//3,4,E"
98+
"(120){4}1/2,2/3,4,E",
99+
"(120){4}1//2,2//3,4,E"
100100
];
101101
}
102102

utils/Utils.cs

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
using System.Numerics;
33
using System.Reflection;
44
using Rationals;
5+
using L = MuConvert.Antlr.SimaiLexer;
56

67
namespace MuConvert.utils;
78

@@ -24,13 +25,18 @@ internal static Exception Fail(string msg = "")
2425
public static BigInteger LCM(BigInteger a, BigInteger b) => a / BigInteger.GreatestCommonDivisor(a, b) * b;
2526

2627
public static BigInteger LCM(IEnumerable<BigInteger> values) => values.Aggregate(LCM);
27-
28-
// 工作范围仅限正数
29-
public static Rational Ceil(Rational r) => r.WholePart + (r.FractionPart == 0 ? 0 : 1);
3028

3129
public static BigInteger Max(BigInteger a, BigInteger b) => a > b ? a : b;
3230

3331
public static Rational Min(Rational a, Rational b) => a < b ? a : b;
32+
33+
private static readonly Dictionary<string, int> _simaiLexerMap = Enumerable.Range(1, L.ruleNames.Length)
34+
.Where(i=>L.DefaultVocabulary.GetLiteralName(i) != null)
35+
.ToDictionary(i => L.DefaultVocabulary.GetLiteralName(i)[1..^1], i => i);
36+
37+
internal static int TokenType(string str) => _simaiLexerMap[str];
38+
39+
internal static bool IsModifier(int tokenType) => tokenType is L.MODIFIER or L.TAP_TO_STAR or L.STAR_TO_TAP or L.NO_STAR;
3440
}
3541

3642
internal static class ExtensionUtils
@@ -49,4 +55,11 @@ internal static Dictionary<K, V> EnsureKeys<K, V>(
4955
foreach (var key in requiredKeys) dict.TryAdd(key, defaultValue);
5056
return dict;
5157
}
58+
59+
// 工作范围仅限正数
60+
public static Rational Ceil(this Rational r)
61+
{
62+
if (r < 0) throw new ArgumentOutOfRangeException(nameof(r));
63+
return r.WholePart + (r.FractionPart == 0 ? 0 : 1);
64+
}
5265
}

0 commit comments

Comments
 (0)