updated grammar.js

2026-01-17 15:59:40 +01:00
parent 1b4a4db26e
commit b25e66c3d3
1 changed files with 224 additions and 3 deletions
--- a/grammar.js
+++ b/grammar.js
@@ -10,8 +10,229 @@
 module.exports = grammar({
  name: "mc",
  // Whitespace and comments are skipped between tokens.
  extras: ($) => [
    /\s/, // space, tab, newline, etc.
    $.comment,
  ],
  rules: {
-    // TODO: add the actual grammar rules
+    // Temporary top-level rule: just a sequence of tokens.
-    source_file: $ => "hello"
+    // Replace this with your real syntax later.
-  }
+    source_file: ($) => repeat($._token),
    // Comments: // line and /* block */
    comment: ($) =>
      token(
        choice(
          // // line comment
          seq("//", /.*/),
          // /* block comment */
          // Standard Tree-sitter pattern for non-nested C-style comments.
          seq("/*", /[^*]*\*+([^/*][^*]*\*+)*/, "/"),
        ),
      ),
    // A catch-all for every token type we care about.
    _token: ($) =>
      choice(
        // --- Punctuation / basics ---
        ";",
        ",",
        ".",
        "->",
        "(",
        ")",
        "[",
        "]",
        "{",
        "}",
        // --- Arithmetic operators ---
        "++",
        "--",
        "+=",
        "-=",
        "*=",
        "/=",
        "+",
        "-",
        "*",
        "/",
        // --- Boolean / comparison operators ---
        "&&",
        "||",
        "!",
        "!=",
        "==",
        // Both <= and =< are accepted; same for >= and =>.
        choice("<=", "=<"),
        choice(">=", "=>"),
        "<",
        ">",
        "=",
        // --- Keywords ---
        "if",
        "else",
        "for",
        "while",
        "break",
        "continue",
        "return",
        "function",
        "print",
        "const",
        "mut",
        // Types
        "void",
        "bool",
        "_Bool",
        "char",
        "uint",
        "U8",
        "uint8_t",
        "U16",
        "uint16_t",
        "U32",
        "uint32_t",
        "U64",
        "uint64_t",
        "int",
        "I8",
        "int8_t",
        "I16",
        "int16_t",
        "I32",
        "int32_t",
        "I64",
        "int64_t",
        "string",
        // Literals / constants
        "NULL",
        "true",
        "false",
        // Identifiers and literal tokens:
        $.identifier,
        $.signed_integer_literal,
        $.unsigned_integer_literal,
        $.plain_integer_literal,
        $.char_literal,
        $.string_literal,
      ),
    // --------------------
    // Identifiers
    // --------------------
    // identifier: {character}({nondigit}|{digit})*
    // character: [a-zA-Z]
    // nondigit: _ | character
    identifier: ($) => /[a-zA-Z][a-zA-Z0-9_]*/,
    // --------------------
    // Integer literals (with suffixes)
    // --------------------
    //
    // Flex:
    // integer             {digits}
    // digits              {onenine}{digit}*|{digit}
    // signed_suffix       [iI]{bit_size}?
    // unsigned_suffix     [uU]{bit_size}?
    // bit_size            8|16|32|64
    //
    // {integer}{signed_suffix}     -> SIGNED_LITERAL
    // {integer}{unsigned_suffix}   -> UNSIGNED_LITERAL
    // {integer}                    -> SIGNED_LITERAL (width 0)
    signed_integer_literal: ($) =>
      token(
        seq(
          /[0-9]+/, // integer
          /[iI]/, // signed suffix head
          optional(choice("8", "16", "32", "64")), // optional bit size
        ),
      ),
    unsigned_integer_literal: ($) =>
      token(
        seq(
          /[0-9]+/, // integer
          /[uU]/, // unsigned suffix head
          optional(choice("8", "16", "32", "64")), // optional bit size
        ),
      ),
    // Unsuffixed integer (still treated as signed in your Flex rules)
    plain_integer_literal: ($) => token(/[0-9]+/),
    // --------------------
    // Char + string literals (with escapes)
    // --------------------
    //
    // simple_escape_sequence:    \\[abfnrtv\\\'\"]
    // decimal_escape_sequence:   \\[0]
    // escape:                    simple | decimal
    // c_char:                    [^\\'\n]
    // s_char:                    [^\\"\n]
    //
    // '{c_char|escape}'
    // "{s_char|escape}*"
    char_literal: ($) =>
      token(
        seq(
          "'",
          choice(
            /[^\\'\n]/, // c_char
            seq(
              "\\",
              choice(
                // escape
                /[abfnrtv\\'"]/, // simple escapes
                "0", // \0
              ),
            ),
          ),
          "'",
        ),
      ),
    string_literal: ($) =>
      token(
        seq(
          '"',
          repeat(
            choice(
              /[^\\\"\n]/, // s_char
              seq(
                "\\",
                choice(
                  // escape
                  /[abfnrtv\\'"]/, // simple escapes
                  "0", // \0
                ),
              ),
            ),
          ),
          '"',
        ),
      ),
  },
 });