Skip to content

API Reference

Parser

A Parser is an object that wraps a function whose arguments are a string to be parsed and the index on which to begin parsing. The function should return either Result.success(next_index, value), where the next index is where to continue the parse and the value is the yielded value, or Result.failure(index, expected), where expected is a string indicating what was expected, and the index is the index of the failure.

Source code in persil/parser.py
23
24
25
26
27
def __init__(
    self,
    wrapped_fn: Callable[[Input, int], Result[Output]],
):
    self.wrapped_fn = wrapped_fn

cut

Commit to the current branch by raising the error one is returned.

Without cut, the error is returned, not raised, and bubles up until it is handled by another parser (see optional for an example), or raised by the parse(_partial) top-level method.

Source code in persil/parser.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def cut(self) -> "Parser[Input, Output]":
    """
    Commit to the current branch by raising the error one is returned.

    Without `cut`, the error is _returned_, not _raised_, and bubles up
    until it is handled by another parser (see `optional` for an example),
    or raised by the `parse(_partial)` top-level method.
    """

    @Parser
    def cut_parser(stream: Input, index: int) -> Result[Output]:
        result = self(stream, index)
        return result.ok_or_raise()

    return cut_parser

then

Returns a parser which, if the initial parser succeeds, will continue parsing with other. This will produce the value produced by other.

PARAMETER DESCRIPTION
other

Other parser to apply if the initial parser succeeds.

TYPE: Parser[In, T]

Source code in persil/parser.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def then(self, other: "Parser[In, T]") -> "Parser[Input, T]":
    """
    Returns a parser which, if the initial parser succeeds, will
    continue parsing with `other`. This will produce the
    value produced by `other`.

    Parameters
    ----------
    other
        Other parser to apply if the initial parser succeeds.
    """

    @Parser
    def then_parser(stream: Input, index: int) -> Result[T]:
        result = self(stream, index)

        if isinstance(result, Err):
            return result

        return other(stream, result.index)  # type: ignore

    return then_parser

skip

Returns a parser which, if the initial parser succeeds, will continue parsing with other. It will produce the value produced by the initial parser.

PARAMETER DESCRIPTION
other

Other parser to apply if the initial parser succeeds.

TYPE: Parser

Source code in persil/parser.py
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
def skip(self, other: "Parser") -> "Parser[Input, Output]":
    """
    Returns a parser which, if the initial parser succeeds, will
    continue parsing with `other`. It will produce the
    value produced by the initial parser.

    Parameters
    ----------
    other
        Other parser to apply if the initial parser succeeds.
    """

    @Parser
    def skip_parser(stream: Input, index: int) -> Result[Output]:
        result = self(stream, index)

        if isinstance(result, Err):
            return result

        other_result = other(stream, result.index)

        if isinstance(other_result, Err):
            return other_result

        return result

    return skip_parser

combine

Returns a parser which, if the initial parser succeeds, will continue parsing with other. It will produce a tuple containing the results from both parsers, in order.

The resulting parser fails if other fails.

PARAMETER DESCRIPTION
other

Other parser to combine.

TYPE: Parser[Input, T]

Source code in persil/parser.py
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def combine(
    self,
    other: "Parser[Input, T]",
) -> "Parser[Input, tuple[Output, T]]":
    """
    Returns a parser which, if the initial parser succeeds, will
    continue parsing with `other`. It will produce a tuple
    containing the results from both parsers, in order.

    The resulting parser fails if `other` fails.

    Parameters
    ----------
    other
        Other parser to combine.
    """

    @Parser
    def combined_parser(stream: Input, index: int) -> Result[tuple[Output, T]]:
        res1 = self(stream, index)

        if isinstance(res1, Err):
            return res1

        res2 = other(stream, res1.index)

        if isinstance(res2, Err):
            return res2

        return Ok((res1.value, res2.value), res2.index)

    return combined_parser

parse

Parses a string or list of tokens and returns the result or raise a ParseError.

PARAMETER DESCRIPTION
stream

Input to match the parser against.

TYPE: Input

Source code in persil/parser.py
152
153
154
155
156
157
158
159
160
161
162
163
164
165
def parse(
    self,
    stream: Input,
) -> Output:
    """
    Parses a string or list of tokens and returns the result or raise a ParseError.

    Parameters
    ----------
    stream
        Input to match the parser against.
    """
    (result, _) = (self << eof).parse_partial(stream)
    return result

parse_partial

Parses the longest possible prefix of a given string. Returns a tuple of the result and the unparsed remainder, or raises ParseError.

PARAMETER DESCRIPTION
stream

Input to match the parser against.

TYPE: Input

Source code in persil/parser.py
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
def parse_partial(
    self,
    stream: Input,
) -> tuple[Output, Input]:
    """
    Parses the longest possible prefix of a given string.
    Returns a tuple of the result and the unparsed remainder,
    or raises `ParseError`.

    Parameters
    ----------
    stream
        Input to match the parser against.
    """

    result = self(stream, 0)

    if isinstance(result, Err):
        raise result

    value = result.value
    remainder = cast(Input, stream[result.index :])

    return (value, remainder)

map

Returns a parser that transforms the produced value of the initial parser with map_function.

Source code in persil/parser.py
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
def map(
    self,
    map_function: Callable[[Output], T],
) -> "Parser[Input, T]":
    """
    Returns a parser that transforms the produced value of the initial parser
    with `map_function`.
    """

    @Parser
    def mapped_parser(stream: Input, index: int) -> Result[T]:
        res = self(stream, index)
        return res.map(map_function)

    return mapped_parser

result

Returns a parser that, if the initial parser succeeds, always produces the passed in value.

Source code in persil/parser.py
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
def result(self, value: T) -> "Parser[Input, T]":
    """
    Returns a parser that, if the initial parser succeeds, always produces
    the passed in `value`.
    """

    @Parser
    def result_parser(stream: Input, index: int) -> Result[T]:
        res = self(stream, index)

        if isinstance(res, Err):
            return res

        return Ok(value, res.index)

    return result_parser

times

Returns a parser that expects the initial parser at least min times, and at most max times, and produces a list of the results. If only one argument is given, the parser is expected exactly that number of times.

PARAMETER DESCRIPTION
min

Minimal number of times the parser should match.

TYPE: int

max

Maximal number of times the parser should match. Equals to min by default

TYPE: int | None DEFAULT: None

Source code in persil/parser.py
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
def times(
    self,
    min: int,
    max: int | None = None,
) -> "Parser[Input, list[Output]]":
    """
    Returns a parser that expects the initial parser at least `min` times,
    and at most `max` times, and produces a list of the results. If only one
    argument is given, the parser is expected exactly that number of times.

    Parameters
    ----------
    min
        Minimal number of times the parser should match.
    max
        Maximal number of times the parser should match.
        Equals to `min` by default
    """
    if max is None:
        max = min

    @Parser
    def times_parser(stream: Input, index: int) -> Result[list[Output]]:
        values = []
        times = 0
        result = None

        while times < max:
            result = self(stream, index)
            if isinstance(result, Ok):
                values.append(result.value)
                index = result.index
                times += 1
            elif times >= min:
                break
            else:
                return result

        return Ok(values, index)

    return times_parser

many

Returns a parser that expects the initial parser 0 or more times, and produces a list of the results.

Source code in persil/parser.py
267
268
269
270
271
272
def many(self) -> "Parser[Input, list[Output]]":
    """
    Returns a parser that expects the initial parser 0 or more times, and
    produces a list of the results.
    """
    return self.times(0, 9999999)

at_most

Returns a parser that expects the initial parser at most n times, and produces a list of the results.

PARAMETER DESCRIPTION
n

Maximum number of times the parser should match.

TYPE: int

Source code in persil/parser.py
274
275
276
277
278
279
280
281
282
283
284
def at_most(self, n: int) -> "Parser[Input, list[Output]]":
    """
    Returns a parser that expects the initial parser at most `n` times, and
    produces a list of the results.

    Parameters
    ----------
    n
        Maximum number of times the parser should match.
    """
    return self.times(0, n)

at_least

Returns a parser that expects the initial parser at least n times, and produces a list of the results.

PARAMETER DESCRIPTION
n

Minimum number of times the parser should match.

TYPE: int

Source code in persil/parser.py
286
287
288
289
290
291
292
293
294
295
296
def at_least(self, n: int) -> "Parser[Input, list[Output]]":
    """
    Returns a parser that expects the initial parser at least `n` times, and
    produces a list of the results.

    Parameters
    ----------
    n
        Minimum number of times the parser should match.
    """
    return self.times(n) + self.many()

optional

Returns a parser that expects the initial parser zero or once, and maps the result to a given default value in the case of no match. If no default value is given, None is used.

PARAMETER DESCRIPTION
default

Default value to output.

TYPE: T | None DEFAULT: None

Source code in persil/parser.py
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
def optional(self, default: T | None = None) -> "Parser[Input, Output | T | None]":
    """
    Returns a parser that expects the initial parser zero or once, and maps
    the result to a given default value in the case of no match. If no default
    value is given, `None` is used.

    Parameters
    ----------
    default
        Default value to output.
    """

    @Parser
    def optional_parser(stream: Input, index: int) -> Result[Output | T | None]:
        res = self(stream, index)

        if isinstance(res, Ok):
            return Ok(res.value, res.index)

        return Ok(default, index)

    return optional_parser

until

Returns a parser that expects the initial parser followed by other. The initial parser is expected at least min times and at most max times.

The new parser consumes other and returns it as part of an output tuple. If you are looking for a non-consuming parser, checkout until_and_discard.

PARAMETER DESCRIPTION
other

Other parser to check.

TYPE: Parser[Input, T]

min

Minimum number of times that the initial parser should match before matching other

TYPE: int DEFAULT: 0

max

Maximum number of times that the initial parser should match before matching other

TYPE: int DEFAULT: 999999

Source code in persil/parser.py
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
def until(
    self,
    other: "Parser[Input, T]",
    min: int = 0,
    max: int = 999999,
) -> "Parser[Input, tuple[list[Output], T]]":
    """
    Returns a parser that expects the initial parser followed by `other`.
    The initial parser is expected at least `min` times and at most `max` times.

    The new parser consumes `other` and returns it as part of an output tuple.
    If you are looking for a non-consuming parser, checkout `until_and_discard`.

    Parameters
    ----------
    other
        Other parser to check.
    min
        Minimum number of times that the initial parser should match before
        matching `other`
    max
        Maximum number of times that the initial parser should match before
        matching `other`
    """

    @Parser
    def until_parser(stream: Input, index: int) -> Result[tuple[list[Output], T]]:
        values: list[Output] = []
        times = 0

        while True:
            # try parser first
            res = other(stream, index)

            if isinstance(res, Ok) and times >= min:
                return Ok((values, res.value), index)

            # exceeded max?
            if isinstance(res, Ok) and times >= max:
                # return failure, it matched parser more than max times
                return Err(index, [f"at most {max} items"], stream)

            # failed, try parser
            result = self(stream, index)

            if isinstance(result, Ok):
                values.append(result.value)
                index = result.index
                times += 1
                continue

            if times >= min:
                # return failure, parser is not followed by other
                return Err(index, ["did not find other parser"], stream)
            else:
                # return failure, it did not match parser at least min times
                return Err(
                    index,
                    [f"at least {min} items; got {times} item(s)"],
                    stream,
                )

    return until_parser

until_and_discard

Returns a parser that expects the initial parser followed by other. The initial parser is expected at least min times and at most max times.

Does not consume other. If you are looking for that behaviour, checkout until.

PARAMETER DESCRIPTION
other

Other parser to check.

TYPE: Parser[Input, T]

min

Minimum number of times that the initial parser should match before matching other.

TYPE: int DEFAULT: 0

max

Maximum number of times that the initial parser should match before matching other.

TYPE: int DEFAULT: 999999

Source code in persil/parser.py
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
def until_and_discard(
    self,
    other: "Parser[Input, T]",
    min: int = 0,
    max: int = 999999,
) -> "Parser[Input, list[Output]]":
    """
    Returns a parser that expects the initial parser followed by `other`.
    The initial parser is expected at least `min` times and at most `max` times.

    Does not consume `other`. If you are looking for that behaviour,
    checkout `until`.

    Parameters
    ----------
    other
        Other parser to check.
    min
        Minimum number of times that the initial parser should match before
        matching `other`.
    max
        Maximum number of times that the initial parser should match before
        matching `other`.
    """

    def discard_next_value(output: tuple[list[Output], T]) -> list[Output]:
        values, _ = output
        return values

    return self.until(other, min=min, max=max).map(discard_next_value)

sep_by

Returns a new parser that repeats the initial parser and collects the results in a list. Between each item, the sep parser is run (and its return value is discarded). By default it repeats with no limit, but minimum and maximum values can be supplied.

PARAMETER DESCRIPTION
sep

Parser that separates values.

TYPE: Parser

min

Minimum number of times that the initial parser should match.

TYPE: int DEFAULT: 0

max

Maximum number of times that the initial parser should match.

TYPE: int DEFAULT: 999999

Source code in persil/parser.py
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
def sep_by(
    self,
    sep: "Parser",
    *,
    min: int = 0,
    max: int = 999999,
) -> "Parser[Input, list[Output]]":
    """
    Returns a new parser that repeats the initial parser and
    collects the results in a list. Between each item, the `sep` parser
    is run (and its return value is discarded). By default it
    repeats with no limit, but minimum and maximum values can be supplied.

    Parameters
    ----------
    sep
        Parser that separates values.
    min
        Minimum number of times that the initial parser should match.
    max
        Maximum number of times that the initial parser should match.
    """
    zero_times: Parser[Input, list[Output]] = success([])

    if max == 0:
        return zero_times
    res = self.times(1) + (sep >> self).times(min - 1, max - 1)
    if min == 0:
        res |= zero_times
    return res

desc

Returns a new parser with a description added, which is used in the error message if parsing fails.

PARAMETER DESCRIPTION
description

Description in case of failure.

TYPE: str

Source code in persil/parser.py
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
def desc(
    self,
    description: str,
) -> "Parser[Input, Output]":
    """
    Returns a new parser with a description added, which is used in the error message
    if parsing fails.

    Parameters
    ----------
    description
        Description in case of failure.
    """

    @Parser
    def desc_parser(stream: Input, index: int) -> Result[Output]:
        result = self(stream, index)
        if isinstance(result, Ok):
            return result
        return Err(index, [description], stream)

    return desc_parser

should_fail

Returns a parser that fails when the initial parser succeeds, and succeeds when the initial parser fails (consuming no input). A description must be passed which is used in parse failure messages.

This is essentially a negative lookahead.

PARAMETER DESCRIPTION
description

Description in case of failure.

TYPE: str

Source code in persil/parser.py
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
def should_fail(
    self,
    description: str,
) -> "Parser[Input, Result[Output]]":
    """
    Returns a parser that fails when the initial parser succeeds, and succeeds
    when the initial parser fails (consuming no input). A description must
    be passed which is used in parse failure messages.

    This is essentially a negative lookahead.

    Parameters
    ----------
    description
        Description in case of failure.
    """

    @Parser
    def fail_parser(stream: Input, index: int):
        res = self(stream, index)
        if isinstance(res, Ok):
            return Err(index, [description], stream)
        return Ok(res, index)

    return fail_parser

Stream

The Stream API lets you apply parsers iteratively, and handles the index bookeeping for you. Its design goal is to be used with the from_stream decorator.

Source code in persil/stream.py
24
25
26
def __init__(self, inner: In, index: int = 0):
    self.inner = inner
    self.index = index

generate

Create a complex parser using the generator syntax.

You should prefer the from_stream syntax, which is an alternative that plays better with types.

Source code in persil/generator.py
47
48
49
50
51
52
53
54
55
56
57
58
def generate(gen):
    """
    Create a complex parser using the generator syntax.

    You should prefer the `from_stream` syntax, which is an alternative that
    plays better with types.
    """
    if isinstance(gen, str):
        return lambda f: _generate(f).desc(gen)

    else:
        return _generate(gen)

success

Returns a parser that does not consume any of the stream, but produces value.

PARAMETER DESCRIPTION
value

Value to return.

TYPE: T

Source code in persil/parser.py
534
535
536
537
538
539
540
541
542
543
544
545
546
def success(
    value: T,
) -> Parser[Input, T]:
    """
    Returns a parser that does not consume any of the stream, but
    produces `value`.

    Parameters
    ----------
    value
        Value to return.
    """
    return Parser(lambda _, index: Ok(value, index))

fail

Returns a parser that always fails with the provided error message.

Source code in persil/parsers/outcome.py
 7
 8
 9
10
11
12
13
14
15
16
def fail(expected: str) -> Parser:
    """
    Returns a parser that always fails with the provided error message.
    """

    @Parser
    def fail_parser(stream: Sequence, index: int) -> Result:
        return Err(index, [expected], stream)

    return fail_parser

line_info

Return the line information (row, col)

Source code in persil/parsers/index.py
17
18
19
20
@Parser
def line_info(stream: S, index: int) -> Result[RowCol]:
    """Return the line information (row, col)"""
    return Ok(line_info_at(stream, index), index)

regex_groupdict

Returns a parser that leverages named groups to obtain the return value.

For instance, the following pattern: (eg (?P<year>\d{4})-(?P<month>\d{2})) will return a dictionary with two keys, year and month.

Source code in persil/parsers/regex.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def regex_groupdict(
    exp: str | re.Pattern[str],
    flags=0,
) -> Parser[str, dict[str, str | None]]:
    r"""
    Returns a parser that leverages named groups to obtain the return value.

    For instance, the following pattern: (eg `(?P<year>\d{4})-(?P<month>\d{2})`)
    will return a dictionary with two keys, `year` and `month`.
    """
    if isinstance(exp, (str, bytes)):
        exp = re.compile(exp, flags)

    @Parser
    def regex_groupdict_parser(
        stream: str,
        index: int,
    ) -> Result[dict[str, str | None]]:
        match = exp.match(stream, index)
        if match:
            return Ok(match.groupdict(), match.end())
        else:
            return Err(index, [exp.pattern], stream)

    return regex_groupdict_parser