parser.pony

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
use "collections"
use per = "collections/persistent"

use "debug"

actor Parser[
  S: (Any #read & Equatable[S]),
  D: Any #share = None,
  V: Any #share = None]
  """
  Stores a source of inputs to a parse, and a memo of parse results from prior
  parses. Used to initiate a parse attempt.
  """

  var _segments: Source[S]
  var _updates: Array[_SegmentUpdate[S]]

  let _memo: _Memo[S, D, V]
  let _memo_lr: _MemoLR[S, D, V]

  let _stack: Array[_RuleFrame[S, D, V]]

  new create(source: ReadSeq[Segment[S]] val) =>
    _segments = per.Lists[Segment[S]].from(source.values())
    _updates = _updates.create()
    _memo = _memo.create()
    _memo_lr = _memo_lr.create()
    _stack = _stack.create(1024)

  fun num_segments(): USize =>
    """
    Returns the number of segments currently in the source.
    """
    _segments.size()

  be insert_segment(index: USize, segment: Segment[S]) =>
    """
    Insert a source segment at the given index.  The insertion will happen upon the next call to `parse()`.
    """
    _updates.push(_InsertSeg[S](index, segment))

  be remove_segment(index: USize) =>
    """
    Removes the source segment at the given index.  The removal will happen upon the next call to `parse()`.
    """
    _updates.push(_RemoveSeg(index))

  fun ref _update_segments() =>
    for op in _updates.values() do
      match op
      | let insert: _InsertSeg[S] =>
        if insert.index > 0 then
          try
            _remove_memoized_spanning(
              _segments(insert.index - 1)?,
              _segments(insert.index)?)
          end
        end

        let left = _segments.take(insert.index)
        let right = _segments.drop(insert.index)
        _segments = left
          .concat(per.Cons[Segment[S]](insert.segment, per.Nil[Segment[S]]))
          .concat(right)
      | let remove: _RemoveSeg =>
        try
          if remove.index > 0 then
            _remove_memoized_spanning(
              _segments(remove.index - 1)?,
              _segments(remove.index)?)
          end
          _remove_memoized_spanning(
            _segments(remove.index)?,
            _segments(remove.index)?)
          if _segments.size() > (remove.index + 1) then
            _remove_memoized_spanning(
              _segments(remove.index)?,
              _segments(remove.index + 1)?)
          end
        end

        let left = _segments.take(remove.index)
        let right = _segments.drop(remove.index + 1)
        _segments = left.concat(right)
      end
    end
    _updates.clear()

  fun ref _remove_memoized_spanning(first: Segment[S], second: Segment[S]) =>
    // removes memoized results that span the first and second segments
    var keys_to_remove = HashSet[_MemoKey[S, D, V], _MemoHash[S, D, V]]
    for (key, result) in _memo.pairs() do
      if key._2.is_in(first) then
        match result
        | let success: Success[S, D, V] =>
          if success.next.is_in(second) then
            keys_to_remove.add(key)
          end
        end
      end
    end
    for key in keys_to_remove.values() do
      try
        _memo.remove(key)?
      end
    end

  be parse(
    rule: RuleNode[S, D, V] val,
    data: D,
    callback: ParseCallback[S, D, V],
    start: (Loc[S] | None) = None,
    clear_memo: Bool = false)
  =>
    """
    Initiates a parse attempt with the given rule.
    """
    if clear_memo then
      _memo.clear()
    end

    _update_segments()

    match _segments
    | let source: per.Cons[Segment[S]] =>
      let loc: Loc[S] =
        match start
        | let loc': Loc[S] =>
          loc'
        else
          Loc[S](source, 0)
        end

      _stack.clear()
      _stack.push(rule.call(0, loc))
      _parse(rule, loc, data, callback)
    else
      let loc =
        match start
        | let loc': Loc[S] =>
          loc'
        else
          Loc[S](per.Cons[Segment[S]]([], per.Nil[Segment[S]]), 0)
        end
      let failure = Failure[S, D, V](rule, loc, ErrorMsg.empty_source())
      callback(failure, [])
    end

  be _parse(
    top_rule: RuleNode[S, D, V] val,
    top_start: Loc[S],
    data: D,
    callback: ParseCallback[S, D, V])
  =>
    var last_child_result: (Result[S, D, V] | None) = None
    var i: USize = 0
    while true do
      i = i + 1

      let frame =
        try
          _stack(_stack.size() - 1)?
        else
          let failure = Failure[S, D, V](
            top_rule, top_start, ErrorMsg.internal_error())
          callback(failure, [])
          return
        end

      let frame_result =
        match frame
        | let named_rule_frame: _NamedRuleFrame[S, D, V] =>
          _parse_named_rule(named_rule_frame, last_child_result)
        | let rule_frame: _RuleFrame[S, D, V] =>
          rule_frame.run(last_child_result)
        end

      match frame_result
      | let result: Result[S, D, V] =>
        last_child_result = result
        if _stack.size() == 1 then
          match result
          | let success: Success[S, D, V] =>
            callback(success, success._values(data))
          else
            callback(result, [])
          end
          return
        else
          try _stack.pop()? end
        end
      | let rule_frame: _RuleFrame[S, D, V] =>
        last_child_result = None
        _stack.push(rule_frame)

        // this number is derived from empirical testing
        if i >= 100000 then
          // allow garbage collection
          _parse(top_rule, top_start, data, callback)
          return
        end
      end
    end

  fun ref _parse_named_rule(
    frame: _NamedRuleFrame[S, D, V],
    child_result: (Result[S, D, V] | None))
    : _FrameResult[S, D, V]
  =>
    if frame.rule.left_recursive then
      _parse_lr(frame, child_result)
    else
      _parse_non_lr(frame, child_result)
    end

  fun ref _parse_non_lr(
    frame: _NamedRuleFrame[S, D, V],
    child_result: (Result[S, D, V] | None))
    : _FrameResult[S, D, V]
  =>
    match child_result
    | None =>
      match try _memo((frame.rule, frame.loc))? end
      | let memoized: Result[S, D, V] =>
        _Dbg() and _Dbg.out(
          frame.depth, "RULE " + frame.rule.name + " @" + frame.loc.string() +
          " = MEMO FOUND " + memoized.string())
        memoized
      else
        _Dbg() and _Dbg.out(
          frame.depth, "RULE " + frame.rule.name + " @" + frame.loc.string())
        _call_body(frame)
      end
    | let child_result': Result[S, D, V] =>
      let rule_result =
        match child_result'
        | let success: Success[S, D, V] =>
          Success[S, D, V](frame.rule, success.start, success.next, [ success ])
        | let failure: Failure[S, D, V] =>
          Failure[S, D, V](
            frame.rule,
            failure.start,
            ErrorMsg.rule_expected(frame.rule.name, frame.loc.string()),
            failure)
        end
      if frame.rule.memoize then
        _Dbg() and _Dbg.out(
          frame.depth, "RULE " + frame.rule.name + " @" + frame.loc.string() +
          " = " + rule_result.string() + "; memoizing")
        _memo((frame.rule, frame.loc)) = rule_result
      else
        _Dbg() and _Dbg.out(
          frame.depth, "RULE " + frame.rule.name + " @" + frame.loc.string() +
          " = " + rule_result.string())
      end
      rule_result
    end

  fun ref _parse_lr(
    frame: _NamedRuleFrame[S, D, V],
    child_result: (Result[S, D, V] | None))
    : _FrameResult[S, D, V]
  =>
    match child_result
    | None => // new call
      _parse_lr_new_call(frame)
    | let child_result': Result[S, D, V] =>
      _parse_lr_with_child(frame, child_result')
    end

  fun ref _parse_lr_new_call(frame: _NamedRuleFrame[S, D, V])
    : _FrameResult[S, D, V]
  =>
    // look in memo
    match try _memo((frame.rule, frame.loc))? end
    | let memoized: Result[S, D, V] =>
      _Dbg() and _Dbg.out(
        frame.depth, "RULE " + frame.rule.name + " @" + frame.loc.string() +
        " = MEMO FOUND " + memoized.string())
      return memoized
    end

    // now see if we're in an LR at this position
    match _get_lr(frame.rule, frame.loc)
    | (let inv: _Involved[S, D, V], let exp: _Expansions[S, D, V]) =>
      try
        _Dbg() and _Dbg.out(
          frame.depth, "RULE " + frame.rule.name + " @" + frame.loc.string() +
          " = PREV EXPANSION " + (exp.size() - 1).string() + " " +
          exp(exp.size() - 1)?.string())
        return exp(exp.size() - 1)?
      else
        _Dbg() and _Dbg.out(
          frame.depth, "RULE " + frame.rule.name + " @" + frame.loc.string() +
          " = FAILURE NO EXPANSIONS!")
        return
          Failure[S, D, V](frame.rule, frame.loc, ErrorMsg.internal_error())
      end
    else
      _Dbg() and _Dbg.out(
        frame.depth, "RULE " + frame.rule.name + " @" + frame.loc.string() +
        ": START LR; MEMOIZE EXPANSION ZERO: FAILURE")
      let failure = Failure[S, D, V](
        frame.rule, frame.loc, ErrorMsg._lr_started())
      let expansions = Array[Result[S, D, V]](4)
      expansions.push(failure)
      _save_lr(frame.rule, frame.loc, expansions)
      _call_body(frame)
    end

  fun ref _parse_lr_with_child(
    frame: _NamedRuleFrame[S, D, V],
    child_result: Result[S, D, V])
    : _FrameResult[S, D, V]
  =>
    match _get_lr(frame.rule, frame.loc)
    | (let inv: _Involved[S, D, V], let exp: _Expansions[S, D, V]) =>
      match child_result
      | let cur_success: Success[S, D, V] =>
        let prev_expansion =
          try
            exp(exp.size() - 1)?
          else
            _Dbg() and _Dbg.out(
              frame.depth, "RULE " + frame.rule.name + " @" +
              frame.loc.string() + " FAILURE EXPANSION UNDERFLOW")
            return Failure[S, D, V](
              frame.rule, frame.loc, ErrorMsg.internal_error())
          end
        match prev_expansion
        | let prev_success: Success[S, D, V] =>
          if prev_success.next < cur_success.next then
            // try again
            _Dbg() and _Dbg.out(
              frame.depth, "RULE " + frame.rule.name + " @" +
              frame.loc.string() + " got expansion " +
              exp.size().string() + " = " + cur_success.string() +
              "; trying another expansion")
            exp.push(cur_success)
            _call_body(frame)
          else
            // we're done
            let result = Success[S, D, V](
              frame.rule,
              prev_success.start,
              prev_success.next,
              [ prev_success ])
            _del_lr(frame.rule, frame.loc)
            if frame.rule.memoize then
              _Dbg() and _Dbg.out(
                frame.depth, "RULE " + frame.rule.name + " @" +
                frame.loc.string() + " memozing expansion " +
                exp.size().string() + " = " + result.string())
              _memo((frame.rule, frame.loc)) = result
            else
              _Dbg() and _Dbg.out(
                frame.depth, "RULE " + frame.rule.name + " @" +
                frame.loc.string() + " found expansion " + exp.size().string() +
                " = " + result.string())
            end
            result
          end
        | let prev_failure: Failure[S, D, V] =>
          _Dbg() and _Dbg.out(
            frame.depth, "RULE " + frame.rule.name + " @" + frame.loc.string() +
            " got expansion " + exp.size().string() + " = " +
            cur_success.string() + "; trying another expansion")
          exp.push(cur_success)
          _call_body(frame)
        end
      | let failure: Failure[S, D, V] =>
        let result =
          match try exp(exp.size() - 1)? end
          | let prev_success: Success[S, D, V] =>
            _Dbg() and _Dbg.out(
              frame.depth, "RULE " + frame.rule.name + " @" +
              frame.loc.string() + " = " + prev_success.string() +
              " from previous expansion")
            prev_success
          else
            Failure[S, D, V](
              frame.rule,
              frame.loc,
              ErrorMsg.rule_expected(frame.rule.name, frame.loc.string()),
              failure)
          end

        if inv.size() == 1 then
          _Dbg() and _Dbg.out(
            frame.depth, "RULE " + frame.rule.name + " @" + frame.loc.string() +
            " = " + result.string() + "; not involved, memoizing")
          _memo((frame.rule, frame.loc)) = result
        else
          _Dbg() and _Dbg.out(
            frame.depth, "RULE " + frame.rule.name + " @" + frame.loc.string() +
            " = " + result.string() + "; involved, NOT memoizing")
        end
        _del_lr(frame.rule, frame.loc)
        result
      end
    else
      _Dbg() and _Dbg.out(
        frame.depth, "RULE " + frame.rule.name + " @" + frame.loc.string() +
        " FAILURE ITERATION WITH NO LR RECORD")
      Failure[S, D, V](frame.rule, frame.loc, ErrorMsg.internal_error())
    end

  fun ref _get_lr(rule: NamedRule[S, D, V] box, loc: Loc[S])
    : ((_Involved[S, D, V], _Expansions[S, D, V]) | None)
  =>
    match try _memo_lr(loc)? end
    | let involved: _Involved[S, D, V] =>
      match try involved(rule)? end
      | let expansions: _Expansions[S, D, V] =>
        (involved, expansions)
      end
    end

  fun ref _save_lr(
    rule: NamedRule[S, D, V] box,
    loc: Loc[S],
    expansions: _Expansions[S, D, V])
  =>
    let involved =
      match try _memo_lr(loc)? end
      | let involved': _Involved[S, D, V] =>
        involved'
      else
        let involved' = _Involved[S, D, V]
        _memo_lr(loc) = involved'
        involved'
      end
    involved(rule) = expansions

  fun ref _del_lr(rule: NamedRule[S, D, V] box, loc: Loc[S]) =>
    match try _memo_lr(loc)? end
    | let involved: _Involved[S, D, V] =>
      try involved.remove(rule)? end
    end

  fun _call_body(frame: _NamedRuleFrame[S, D, V]): _FrameResult[S, D, V] =>
    match frame.body
    | let node: RuleNode[S, D, V] box =>
      node.call(frame.depth + 1, frame.loc)
    else
      Failure[S, D, V](
        frame.rule, frame.loc, ErrorMsg.rule_empty(frame.rule.name))
    end