tsemekwes

TS'emekwes — a PEG parser generator for TypeScript.

Parse grammar files, compile them into Grammar objects, and parse input files against compiled grammars.

All type annotations (Grammar, Rule, AnyExp, etc.) use TypedDict and map directly to the JSON serialization format.

Usage:

    from tsemekwes import api
 1# Copyright © 2017-2026 Juancarlo Añez (apalala@gmail.com)
 2# SPDX-License-Identifier: Apache-2.0
 3"""TS'emekwes — a PEG parser generator for TypeScript.
 4
 5Parse grammar files, compile them into Grammar objects, and
 6parse input files against compiled grammars.
 7
 8All type annotations (Grammar, Rule, AnyExp, etc.) use TypedDict
 9and map directly to the JSON serialization format.
10
11Usage:
12
13```python
14    from tsemekwes import api
15```
16"""
17
18from . import __version__, api, peg, tree  # noqa: F401
19from .__version__ import __toolname__, __version__
20from .api import (
21    boot_grammar,
22    boot_pretty,
23    compile,
24    loads_grammar,
25    parse_grammar,
26    parse_inputs,
27    read_grammar,
28)
29from .peg import Exp, Grammar, Rule
30from .tree import Tree
31
32__all__ = [
33    "__toolname__",
34    "__version__",
35    "boot_grammar",
36    "boot_pretty",
37    "compile",
38    "Exp",
39    "Grammar",
40    "loads_grammar",
41    "parse_grammar",
42    "parse_inputs",
43    "read_grammar",
44    "Rule",
45    "Tree",
46]
__toolname__ = 'TSemekwes'
__version__ = '0.1.0'
def boot_grammar(*, output: str | None = None) -> Grammar:
115def boot_grammar(*, output: str | None = None) -> Grammar:
116    """Get the bootstrapped TS'emekwes grammar."""
117    if output is not None:
118        result = bun.run(["boot", "--json"], output=output)
119    else:
120        with tempfile.NamedTemporaryFile(
121            mode="w+", suffix=".json", encoding="utf-8", delete=True
122        ) as tmp:
123            bun.run(["boot", "--json"], output=tmp.name)
124            result = Path(tmp.name).read_text()
125    return Grammar(json.loads(result))

Get the bootstrapped TS'emekwes grammar.

def boot_pretty(*, output: str | None = None) -> str:
128def boot_pretty(*, output: str | None = None) -> str:
129    """Get the bootstrapped grammar as a pretty-printed string."""
130    return bun.run(["boot", "--pretty"], output=output)

Get the bootstrapped grammar as a pretty-printed string.

def compile(path: str, *, output: str | None = None) -> Grammar:
87def compile(path: str, *, output: str | None = None) -> Grammar:
88    """Compile a grammar file into a Grammar."""
89    grammar = json.loads(bun.run(["grammar", "-j", path], output=output))
90    return Grammar(grammar)

Compile a grammar file into a Grammar.

type Exp = tsemekwes.ts.types.NullExp | tsemekwes.ts.types.CutExp | tsemekwes.ts.types.VoidExp | tsemekwes.ts.types.FailExp | tsemekwes.ts.types.DotExp | tsemekwes.ts.types.EofExp | tsemekwes.ts.types.EolExp | tsemekwes.ts.types.EmptyClosureExp | tsemekwes.ts.types.TokenExp | tsemekwes.ts.types.PatternExp | tsemekwes.ts.types.ConstantExp | tsemekwes.ts.types.AlertExp | tsemekwes.ts.types.CallExp | tsemekwes.ts.types.RuleIncludeExp | tsemekwes.ts.types.GroupExp | tsemekwes.ts.types.SkipGroupExp | tsemekwes.ts.types.LookaheadExp | tsemekwes.ts.types.NegativeLookaheadExp | tsemekwes.ts.types.SkipToExp | tsemekwes.ts.types.AltExp | tsemekwes.ts.types.OptionalExp | tsemekwes.ts.types.ClosureExp | tsemekwes.ts.types.PositiveClosureExp | tsemekwes.ts.types.OverrideExp | tsemekwes.ts.types.OverrideListExp | tsemekwes.ts.types.NamedExp | tsemekwes.ts.types.NamedListExp | tsemekwes.ts.types.JoinExp | tsemekwes.ts.types.PositiveJoinExp | tsemekwes.ts.types.GatherExp | tsemekwes.ts.types.PositiveGatherExp | tsemekwes.ts.types.SeqExp | tsemekwes.ts.types.ChoiceExp
class Grammar(typing.TypedDict):
212class Grammar(TypedDict):
213    name: str
214    directives: Dict[str, Any]
215    keywords: List[str]
216    rules: List[Rule]
name: str
directives: Dict[str, Any]
keywords: List[str]
rules: List[Rule]
def loads_grammar(json_str: str) -> Grammar:
133def loads_grammar(json_str: str) -> Grammar:
134    """Deserialize a JSON string into a Grammar."""
135    return Grammar(json.loads(json_str))

Deserialize a JSON string into a Grammar.

def parse_grammar(path: str, *, trace: bool = False, output: str | None = None) -> Tree:
76def parse_grammar(
77    path: str,
78    *,
79    trace: bool = False,
80    output: str | None = None,
81) -> Tree:
82    """Parse a grammar file and return the parse tree."""
83    result = bun.run(_build_grammar_args(path, trace=trace), output=output)
84    return json.loads(result)

Parse a grammar file and return the parse tree.

def parse_inputs( path: str, inputs: list[str], *, start: str | None = None, nproc: int | None = None, trace: bool = False, output: str | None = None) -> list[Tree]:
 93def parse_inputs(
 94    path: str,
 95    inputs: list[str],
 96    *,
 97    start: str | None = None,
 98    nproc: int | None = None,
 99    trace: bool = False,
100    output: str | None = None,
101) -> list[Tree]:
102    """Parse each input file against the grammar, return one Tree per input (JSONL)."""
103    result = bun.run(
104        _build_run_args(path, inputs, start=start, nproc=nproc, trace=trace),
105        output=output,
106    )
107    trees = parse_jsonl(result)
108    if len(trees) != len(inputs):
109        raise ValueError(
110            f"parse_inputs: expected {len(inputs)} result(s), got {len(trees)}"
111        )
112    return trees

Parse each input file against the grammar, return one Tree per input (JSONL).

def read_grammar(path: str) -> Grammar:
143def read_grammar(
144    path: str,
145) -> Grammar:
146    """Read a compiled grammar JSON file as a Grammar."""
147    text = Path(path).read_text()
148    return Grammar(json.loads(text))

Read a compiled grammar JSON file as a Grammar.

class Rule(typing.TypedDict):
194class Rule(TypedDict):
195    name: str
196    params: List[str]
197    kwparams: Dict[str, str]
198    decorators: List[str]
199    base: str | None
200    is_name: bool
201    is_tokn: bool
202    no_memo: bool
203    no_stak: bool
204    is_memo: bool
205    is_lrec: bool
206    exp: 'Exp'
name: str
params: List[str]
kwparams: Dict[str, str]
decorators: List[str]
base: str | None
is_name: bool
is_tokn: bool
no_memo: bool
no_stak: bool
is_memo: bool
is_lrec: bool
exp: Exp
type Tree = dict[str, Tree] | list[Tree] | object | str | int | float | bool | None