-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser.go
More file actions
354 lines (334 loc) · 11.5 KB
/
parser.go
File metadata and controls
354 lines (334 loc) · 11.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
package comb
import (
"math"
"sync"
)
const (
ParentUndefined = math.MinInt32 + iota // used for calling the root parser
ParentUnknown // used for bottom-up parsing
)
// ParserIDs is the base of every comb parser.
// It enables registering of all parsers and error recovery.
type ParserIDs struct {
id, parent int32
}
func (pids *ParserIDs) ID() int32 {
return pids.id
}
func (pids *ParserIDs) setID(id int32) {
pids.id = id
}
func (pids *ParserIDs) setParent(id int32) {
pids.parent = id
}
// ============================================================================
// Leaf Parser
//
type prsr[Output any] struct {
ParserIDs
expected string
parseWithData func(State, interface{}) (State, Output, *ParserError, interface{})
recoverer Recoverer
safeSpot bool
}
// NewParser is THE way to create simple leaf parsers.
// recover can be nil to signal that there is no optimized recoverer available.
// In case of an error, the parser will be called again and again moving forward
// one byte/rune at a time instead.
func NewParser[Output any](
expected string,
parse func(State) (State, Output, *ParserError),
recover Recoverer,
) Parser[Output] {
p := &prsr[Output]{
ParserIDs: ParserIDs{id: -1, parent: ParentUndefined},
expected: expected,
parseWithData: func(state State, data interface{}) (State, Output, *ParserError, interface{}) {
nState, out, err := parse(state)
return nState, out, err, nil
},
recoverer: recover,
}
return p
}
// NewParserWithData is the way to create leaf parsers that have partial results
// they want to save in case of an error.
// recover can be nil to signal that there is no optimized recoverer available.
// In case of an error, the parser will be called again and again moving forward
// one byte/rune at a time instead.
func NewParserWithData[Output any](
expected string,
parse func(State, interface{}) (State, Output, *ParserError, interface{}),
recover Recoverer,
) Parser[Output] {
p := &prsr[Output]{
ParserIDs: ParserIDs{id: -1, parent: ParentUndefined},
expected: expected,
parseWithData: parse,
recoverer: recover,
}
return p
}
func (p *prsr[Output]) Expected() string {
return p.expected
}
func (p *prsr[Output]) Parse(state State) (State, Output, *ParserError) {
nState, out, err, data := p.parseWithData(state, nil)
if err != nil && data != nil {
err.StoreParserData(p.ID(), data)
}
if err != nil && err.parserID < 0 {
err.parserID = p.ID()
}
return nState, out, err
}
func (p *prsr[Output]) ParseAny(parent int32, state State) (State, interface{}, *ParserError) {
if parent >= 0 {
p.setParent(parent)
}
return p.Parse(state)
}
func (p *prsr[Output]) parseAnyAfterError(err *ParserError, state State) (int32, State, interface{}, *ParserError) {
nState, out, newErr, data := p.parseWithData(state, err.ParserData(p.ID()))
if newErr != nil {
newErr.StoreParserData(p.ID(), data)
}
if newErr != nil && newErr.parserID < 0 {
newErr.parserID = p.ID()
}
return p.ParserIDs.parent, nState, out, newErr
}
func (p *prsr[Output]) IsSafeSpot() bool {
return p.safeSpot
}
func (p *prsr[Output]) setSafeSpot() {
p.safeSpot = true
}
func (p *prsr[Output]) Recover(state State, data interface{}) (int, interface{}) {
return p.recoverer(state, data)
}
func (p *prsr[Output]) IsStepRecoverer() bool {
return p.recoverer == nil
}
func (p *prsr[Output]) SwapRecoverer(newRecoverer Recoverer) {
p.recoverer = newRecoverer // this isn't concurrency safe, but it only happens in the initialization phase
}
// ============================================================================
// Branch Parser
//
type brnchprsr[Output any] struct {
ParserIDs
expected string
childs func() []AnyParser
prsAfterChild func(childID int32, childStartState, childState State, childOut interface{}, childErr *ParserError, data interface{},
) (State, Output, *ParserError, interface{})
}
// NewBranchParser is THE way to create branch parsers.
// parseAfterChild is called with a `childID < 0` during normal (top -> down) parsing.
// It will be called with a `childID >= 0` during error recovery (bottom -> up).
func NewBranchParser[Output any](
expected string,
children func() []AnyParser,
parseAfterChild func(childID int32, childStartState, childState State, childOut interface{}, childErr *ParserError, data interface{},
) (State, Output, *ParserError, interface{}),
) Parser[Output] {
return &brnchprsr[Output]{
ParserIDs: ParserIDs{id: -1, parent: ParentUndefined},
expected: expected,
childs: children,
prsAfterChild: parseAfterChild,
}
}
func (bp *brnchprsr[Output]) Expected() string {
return bp.expected
}
func (bp *brnchprsr[Output]) Parse(state State) (State, Output, *ParserError) {
nState, aOut, err := bp.ParseAny(ParentUnknown, state)
out, _ := aOut.(Output)
return nState, out, err
}
func (bp *brnchprsr[Output]) ParseAny(parentID int32, state State) (State, interface{}, *ParserError) {
bp.ensureIDs()
if parentID >= 0 {
bp.setParent(parentID)
}
nState, out, err, data := bp.prsAfterChild(-1, state, state, nil, nil, nil)
if err != nil && err.parserID < 0 {
err.parserID = bp.ID()
}
if err != nil && data != nil {
err.StoreParserData(bp.ID(), data)
}
return nState, out, err
}
func (bp *brnchprsr[Output]) parseAfterError(
err *ParserError, childID int32, childStartState, childState State, childOut interface{}, childErr *ParserError,
) (int32, State, interface{}, *ParserError) {
bp.ensureIDs()
nState, out, nErr, data := bp.prsAfterChild(childID, childStartState, childState, childOut, childErr, err.ParserData(bp.ID()))
if nErr != nil && data != nil {
nErr.StoreParserData(bp.ID(), data)
}
if nErr != nil && nErr.parserID < 0 {
nErr.parserID = bp.ID()
}
return bp.ParserIDs.parent, nState, out, nErr
}
func (bp *brnchprsr[Output]) parseAnyAfterError(_ *ParserError, _ State) (int32, State, interface{}, *ParserError) {
panic("a branch parser has to be called with `parseAfterError` instead")
}
func (bp *brnchprsr[Output]) IsSafeSpot() bool {
return false // a branch parser can never be a safe spot
}
func (bp *brnchprsr[Output]) setSafeSpot() {
panic("a branch parser can never be a safe spot")
}
func (bp *brnchprsr[Output]) Recover(_ State, _ interface{}) (int, interface{}) {
return RecoverNever, nil // never recover with a branch parser
}
func (bp *brnchprsr[Output]) IsStepRecoverer() bool {
return false
}
func (bp *brnchprsr[Output]) SwapRecoverer(_ Recoverer) {
panic("a branch parser can never have a special recoverer")
}
func (bp *brnchprsr[Output]) children() []AnyParser {
return bp.childs()
}
func (bp *brnchprsr[Output]) ensureIDs() { // only needed if Parse was called directly
if bp.ID() < 0 { // ensure sane IDs
bp.id = 0
for i, child := range bp.childs() {
child.setID(int32(i + 1))
}
}
}
// ============================================================================
// Lazy Branch Parser
//
type lazyprsr[Output any] struct {
cachedPrsr Parser[Output]
once sync.Once
makePrsr func() Parser[Output]
newRecoverer Recoverer
}
// LazyBranchParser just stores a function that creates the parser and evaluates the function later.
// This allows deferring the call to NewParser() and thus to define recursive grammars.
// Only branch parsers need this ability. A leaf parser can't be recursive by definition.
func LazyBranchParser[Output any](makeParser func() Parser[Output]) Parser[Output] {
return &lazyprsr[Output]{makePrsr: makeParser}
}
func (lp *lazyprsr[Output]) ensurePrsr() {
lp.cachedPrsr = lp.makePrsr()
if lp.newRecoverer != nil {
lp.cachedPrsr.SwapRecoverer(lp.newRecoverer)
}
}
func (lp *lazyprsr[Output]) ID() int32 {
lp.once.Do(lp.ensurePrsr)
return lp.cachedPrsr.ID()
}
func (lp *lazyprsr[Output]) Expected() string {
lp.once.Do(lp.ensurePrsr)
return lp.cachedPrsr.Expected()
}
func (lp *lazyprsr[Output]) Parse(state State) (State, Output, *ParserError) {
lp.once.Do(lp.ensurePrsr)
return lp.cachedPrsr.Parse(state)
}
func (lp *lazyprsr[Output]) ParseAny(parent int32, state State) (State, interface{}, *ParserError) {
lp.once.Do(lp.ensurePrsr)
return lp.cachedPrsr.ParseAny(parent, state)
}
func (lp *lazyprsr[Output]) parseAfterError(
err *ParserError, childID int32, childStartState, childState State, childOut interface{}, childErr *ParserError,
) (int32, State, interface{}, *ParserError) {
lp.once.Do(lp.ensurePrsr)
return lp.cachedPrsr.(BranchParser).parseAfterError(err, childID, childStartState, childState, childOut, childErr)
}
func (lp *lazyprsr[Output]) parseAnyAfterError(_ *ParserError, _ State) (int32, State, interface{}, *ParserError) {
panic("a branch parser has to be called with `parseAfterError` instead")
}
func (lp *lazyprsr[Output]) IsSafeSpot() bool {
lp.once.Do(lp.ensurePrsr)
return lp.cachedPrsr.IsSafeSpot()
}
func (lp *lazyprsr[Output]) setSafeSpot() {
lp.once.Do(lp.ensurePrsr)
lp.cachedPrsr.setSafeSpot()
}
func (lp *lazyprsr[Output]) Recover(state State, data interface{}) (int, interface{}) {
lp.once.Do(lp.ensurePrsr)
return lp.cachedPrsr.Recover(state, data)
}
func (lp *lazyprsr[Output]) IsStepRecoverer() bool {
lp.once.Do(lp.ensurePrsr)
return lp.cachedPrsr.IsStepRecoverer()
}
func (lp *lazyprsr[Output]) SwapRecoverer(newRecoverer Recoverer) {
if lp.cachedPrsr == nil {
lp.newRecoverer = newRecoverer
return
}
lp.cachedPrsr.SwapRecoverer(newRecoverer)
}
func (lp *lazyprsr[Output]) children() []AnyParser {
lp.once.Do(lp.ensurePrsr)
return lp.cachedPrsr.(BranchParser).children()
}
func (lp *lazyprsr[Output]) setID(id int32) {
lp.once.Do(lp.ensurePrsr)
lp.cachedPrsr.setID(id)
}
func (lp *lazyprsr[Output]) setParent(id int32) {
lp.once.Do(lp.ensurePrsr)
lp.cachedPrsr.setParent(id)
}
// ============================================================================
// Save Spot Parser
//
// SafeSpot applies a sub-parser and marks the new state as a
// point of no return if successful.
// It really serves 3 slightly different purposes:
//
// 1. Prevent a `FirstSuccessful` parser from trying later sub-parsers even
// in case of an error.
// 2. Prevent other unnecessary backtracking in case of an error.
// 3. Mark a parser as a potential safe place to recover to
// when recovering from an error.
//
// So you don't need this parser at all if your input is always correct.
// SafeSpot is THE cornerstone of good and performant parsing otherwise.
//
// NOTE:
// - Parsers that accept the empty input or only perform look ahead are
// NOT allowed as sub-parsers.
// SafeSpot tests the optional recoverer of the parser during the
// construction phase to do a timely panic.
// This way we won't have to panic at the runtime of the parser.
// - Only leaf parsers MUST be given to SafeSpot as sub-parsers.
// SafeSpot will treat the sub-parser as a leaf parser.
// Any error will look as if coming from SafeSpot itself.
func SafeSpot[Output any](p Parser[Output]) Parser[Output] {
// call Recoverer to find a Forbidden recoverer during the construction phase and panic
if !p.IsStepRecoverer() {
waste, _ := p.Recover(NewFromBytes([]byte{}, 0), nil)
if waste == RecoverNever {
panic("can't make parser with Forbidden recoverer a safe spot")
}
}
pp, ok := p.(*prsr[Output])
if !ok {
panic("SafeSpot can only be applied to leaf parsers")
}
pp.setSafeSpot()
parse := pp.parseWithData
pp.parseWithData = func(state State, data interface{}) (State, Output, *ParserError, interface{}) {
nState, out, err, data2 := parse(state, data)
if err == nil {
return nState.MoveSafeSpot(), out, nil, data2
}
return nState, out, err, data2
}
return pp
}