updated grammar.js

This commit is contained in:
2026-01-17 15:59:40 +01:00
parent 1b4a4db26e
commit b25e66c3d3

View File

@@ -10,8 +10,229 @@
module.exports = grammar({ module.exports = grammar({
name: "mc", name: "mc",
// Whitespace and comments are skipped between tokens.
extras: ($) => [
/\s/, // space, tab, newline, etc.
$.comment,
],
rules: { rules: {
// TODO: add the actual grammar rules // Temporary top-level rule: just a sequence of tokens.
source_file: $ => "hello" // Replace this with your real syntax later.
} source_file: ($) => repeat($._token),
// Comments: // line and /* block */
comment: ($) =>
token(
choice(
// // line comment
seq("//", /.*/),
// /* block comment */
// Standard Tree-sitter pattern for non-nested C-style comments.
seq("/*", /[^*]*\*+([^/*][^*]*\*+)*/, "/"),
),
),
// A catch-all for every token type we care about.
_token: ($) =>
choice(
// --- Punctuation / basics ---
";",
",",
".",
"->",
"(",
")",
"[",
"]",
"{",
"}",
// --- Arithmetic operators ---
"++",
"--",
"+=",
"-=",
"*=",
"/=",
"+",
"-",
"*",
"/",
// --- Boolean / comparison operators ---
"&&",
"||",
"!",
"!=",
"==",
// Both <= and =< are accepted; same for >= and =>.
choice("<=", "=<"),
choice(">=", "=>"),
"<",
">",
"=",
// --- Keywords ---
"if",
"else",
"for",
"while",
"break",
"continue",
"return",
"function",
"print",
"const",
"mut",
// Types
"void",
"bool",
"_Bool",
"char",
"uint",
"U8",
"uint8_t",
"U16",
"uint16_t",
"U32",
"uint32_t",
"U64",
"uint64_t",
"int",
"I8",
"int8_t",
"I16",
"int16_t",
"I32",
"int32_t",
"I64",
"int64_t",
"string",
// Literals / constants
"NULL",
"true",
"false",
// Identifiers and literal tokens:
$.identifier,
$.signed_integer_literal,
$.unsigned_integer_literal,
$.plain_integer_literal,
$.char_literal,
$.string_literal,
),
// --------------------
// Identifiers
// --------------------
// identifier: {character}({nondigit}|{digit})*
// character: [a-zA-Z]
// nondigit: _ | character
identifier: ($) => /[a-zA-Z][a-zA-Z0-9_]*/,
// --------------------
// Integer literals (with suffixes)
// --------------------
//
// Flex:
// integer {digits}
// digits {onenine}{digit}*|{digit}
// signed_suffix [iI]{bit_size}?
// unsigned_suffix [uU]{bit_size}?
// bit_size 8|16|32|64
//
// {integer}{signed_suffix} -> SIGNED_LITERAL
// {integer}{unsigned_suffix} -> UNSIGNED_LITERAL
// {integer} -> SIGNED_LITERAL (width 0)
signed_integer_literal: ($) =>
token(
seq(
/[0-9]+/, // integer
/[iI]/, // signed suffix head
optional(choice("8", "16", "32", "64")), // optional bit size
),
),
unsigned_integer_literal: ($) =>
token(
seq(
/[0-9]+/, // integer
/[uU]/, // unsigned suffix head
optional(choice("8", "16", "32", "64")), // optional bit size
),
),
// Unsuffixed integer (still treated as signed in your Flex rules)
plain_integer_literal: ($) => token(/[0-9]+/),
// --------------------
// Char + string literals (with escapes)
// --------------------
//
// simple_escape_sequence: \\[abfnrtv\\\'\"]
// decimal_escape_sequence: \\[0]
// escape: simple | decimal
// c_char: [^\\'\n]
// s_char: [^\\"\n]
//
// '{c_char|escape}'
// "{s_char|escape}*"
char_literal: ($) =>
token(
seq(
"'",
choice(
/[^\\'\n]/, // c_char
seq(
"\\",
choice(
// escape
/[abfnrtv\\'"]/, // simple escapes
"0", // \0
),
),
),
"'",
),
),
string_literal: ($) =>
token(
seq(
'"',
repeat(
choice(
/[^\\\"\n]/, // s_char
seq(
"\\",
choice(
// escape
/[abfnrtv\\'"]/, // simple escapes
"0", // \0
),
),
),
),
'"',
),
),
},
}); });