From b25e66c3d3e4966e34abd1e6bb50b6ff7b33fb1b Mon Sep 17 00:00:00 2001 From: Matthias Unterrainer Date: Sat, 17 Jan 2026 15:59:40 +0100 Subject: [PATCH] updated grammar.js --- grammar.js | 227 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 224 insertions(+), 3 deletions(-) diff --git a/grammar.js b/grammar.js index 1c7d2b0..6eef533 100644 --- a/grammar.js +++ b/grammar.js @@ -10,8 +10,229 @@ module.exports = grammar({ name: "mc", + // Whitespace and comments are skipped between tokens. + extras: ($) => [ + /\s/, // space, tab, newline, etc. + $.comment, + ], + rules: { - // TODO: add the actual grammar rules - source_file: $ => "hello" - } + // Temporary top-level rule: just a sequence of tokens. + // Replace this with your real syntax later. + source_file: ($) => repeat($._token), + + // Comments: // line and /* block */ + comment: ($) => + token( + choice( + // // line comment + seq("//", /.*/), + + // /* block comment */ + // Standard Tree-sitter pattern for non-nested C-style comments. + seq("/*", /[^*]*\*+([^/*][^*]*\*+)*/, "/"), + ), + ), + + // A catch-all for every token type we care about. + _token: ($) => + choice( + // --- Punctuation / basics --- + ";", + ",", + ".", + "->", + + "(", + ")", + "[", + "]", + "{", + "}", + + // --- Arithmetic operators --- + "++", + "--", + "+=", + "-=", + "*=", + "/=", + "+", + "-", + "*", + "/", + + // --- Boolean / comparison operators --- + "&&", + "||", + "!", + "!=", + "==", + // Both <= and =< are accepted; same for >= and =>. + choice("<=", "=<"), + choice(">=", "=>"), + "<", + ">", + + "=", + + // --- Keywords --- + "if", + "else", + + "for", + "while", + + "break", + "continue", + + "return", + + "function", + + "print", + + "const", + "mut", + + // Types + "void", + "bool", + "_Bool", + "char", + + "uint", + "U8", + "uint8_t", + "U16", + "uint16_t", + "U32", + "uint32_t", + "U64", + "uint64_t", + + "int", + "I8", + "int8_t", + "I16", + "int16_t", + "I32", + "int32_t", + "I64", + "int64_t", + + "string", + + // Literals / constants + "NULL", + "true", + "false", + + // Identifiers and literal tokens: + $.identifier, + $.signed_integer_literal, + $.unsigned_integer_literal, + $.plain_integer_literal, + $.char_literal, + $.string_literal, + ), + + // -------------------- + // Identifiers + // -------------------- + + // identifier: {character}({nondigit}|{digit})* + // character: [a-zA-Z] + // nondigit: _ | character + identifier: ($) => /[a-zA-Z][a-zA-Z0-9_]*/, + + // -------------------- + // Integer literals (with suffixes) + // -------------------- + // + // Flex: + // integer {digits} + // digits {onenine}{digit}*|{digit} + // signed_suffix [iI]{bit_size}? + // unsigned_suffix [uU]{bit_size}? + // bit_size 8|16|32|64 + // + // {integer}{signed_suffix} -> SIGNED_LITERAL + // {integer}{unsigned_suffix} -> UNSIGNED_LITERAL + // {integer} -> SIGNED_LITERAL (width 0) + + signed_integer_literal: ($) => + token( + seq( + /[0-9]+/, // integer + /[iI]/, // signed suffix head + optional(choice("8", "16", "32", "64")), // optional bit size + ), + ), + + unsigned_integer_literal: ($) => + token( + seq( + /[0-9]+/, // integer + /[uU]/, // unsigned suffix head + optional(choice("8", "16", "32", "64")), // optional bit size + ), + ), + + // Unsuffixed integer (still treated as signed in your Flex rules) + plain_integer_literal: ($) => token(/[0-9]+/), + + // -------------------- + // Char + string literals (with escapes) + // -------------------- + // + // simple_escape_sequence: \\[abfnrtv\\\'\"] + // decimal_escape_sequence: \\[0] + // escape: simple | decimal + // c_char: [^\\'\n] + // s_char: [^\\"\n] + // + // '{c_char|escape}' + // "{s_char|escape}*" + + char_literal: ($) => + token( + seq( + "'", + choice( + /[^\\'\n]/, // c_char + seq( + "\\", + choice( + // escape + /[abfnrtv\\'"]/, // simple escapes + "0", // \0 + ), + ), + ), + "'", + ), + ), + + string_literal: ($) => + token( + seq( + '"', + repeat( + choice( + /[^\\\"\n]/, // s_char + seq( + "\\", + choice( + // escape + /[abfnrtv\\'"]/, // simple escapes + "0", // \0 + ), + ), + ), + ), + '"', + ), + ), + }, });