Added MC v.1.0.0 parser #3
477
grammar.js
477
grammar.js
@@ -17,184 +17,55 @@ module.exports = grammar({
|
||||
],
|
||||
|
||||
rules: {
|
||||
// Temporary top-level rule: just a sequence of tokens.
|
||||
// Replace this with your real syntax later.
|
||||
source_file: ($) => repeat($._token),
|
||||
// ------------------------------------------------------------------------
|
||||
// Top level (Bison: program)
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
source_file: ($) => repeat($.translation_entity),
|
||||
|
||||
translation_entity: ($) => choice($.declaration, $.function_definition),
|
||||
|
||||
function_definition: ($) =>
|
||||
seq("function", $.identifier, $.function_signature, $.statement_block),
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Comments
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
// Comments: // line and /* block */
|
||||
comment: ($) =>
|
||||
token(
|
||||
choice(
|
||||
// // line comment
|
||||
seq("//", /.*/),
|
||||
|
||||
// /* block comment */
|
||||
// Standard Tree-sitter pattern for non-nested C-style comments.
|
||||
// /* block comment */ (non-nested)
|
||||
seq("/*", /[^*]*\*+([^/*][^*]*\*+)*/, "/"),
|
||||
),
|
||||
),
|
||||
|
||||
// A catch-all for every token type we care about.
|
||||
_token: ($) =>
|
||||
choice(
|
||||
// --- Punctuation / basics ---
|
||||
";",
|
||||
",",
|
||||
".",
|
||||
"->",
|
||||
|
||||
"(",
|
||||
")",
|
||||
"[",
|
||||
"]",
|
||||
"{",
|
||||
"}",
|
||||
|
||||
// --- Arithmetic operators ---
|
||||
"++",
|
||||
"--",
|
||||
"+=",
|
||||
"-=",
|
||||
"*=",
|
||||
"/=",
|
||||
"+",
|
||||
"-",
|
||||
"*",
|
||||
"/",
|
||||
|
||||
// --- Boolean / comparison operators ---
|
||||
"&&",
|
||||
"||",
|
||||
"!",
|
||||
"!=",
|
||||
"==",
|
||||
// Both <= and =< are accepted; same for >= and =>.
|
||||
choice("<=", "=<"),
|
||||
choice(">=", "=>"),
|
||||
"<",
|
||||
">",
|
||||
|
||||
"=",
|
||||
|
||||
// --- Keywords ---
|
||||
"if",
|
||||
"else",
|
||||
|
||||
"for",
|
||||
"while",
|
||||
|
||||
"break",
|
||||
"continue",
|
||||
|
||||
"return",
|
||||
|
||||
"function",
|
||||
|
||||
"print",
|
||||
|
||||
"const",
|
||||
"mut",
|
||||
|
||||
// Types
|
||||
"void",
|
||||
"bool",
|
||||
"_Bool",
|
||||
"char",
|
||||
|
||||
"uint",
|
||||
"U8",
|
||||
"uint8_t",
|
||||
"U16",
|
||||
"uint16_t",
|
||||
"U32",
|
||||
"uint32_t",
|
||||
"U64",
|
||||
"uint64_t",
|
||||
|
||||
"int",
|
||||
"I8",
|
||||
"int8_t",
|
||||
"I16",
|
||||
"int16_t",
|
||||
"I32",
|
||||
"int32_t",
|
||||
"I64",
|
||||
"int64_t",
|
||||
|
||||
"string",
|
||||
|
||||
// Literals / constants
|
||||
"NULL",
|
||||
"true",
|
||||
"false",
|
||||
|
||||
// Identifiers and literal tokens:
|
||||
$.identifier,
|
||||
$.signed_integer_literal,
|
||||
$.unsigned_integer_literal,
|
||||
$.plain_integer_literal,
|
||||
$.char_literal,
|
||||
$.string_literal,
|
||||
),
|
||||
|
||||
// --------------------
|
||||
// Identifiers
|
||||
// --------------------
|
||||
// ------------------------------------------------------------------------
|
||||
// Identifiers & literals (port of Flex patterns)
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
// identifier: {character}({nondigit}|{digit})*
|
||||
// character: [a-zA-Z]
|
||||
// nondigit: _ | character
|
||||
identifier: ($) => /[a-zA-Z][a-zA-Z0-9_]*/,
|
||||
|
||||
// --------------------
|
||||
// Integer literals (with suffixes)
|
||||
// --------------------
|
||||
//
|
||||
// Flex:
|
||||
// integer {digits}
|
||||
// digits {onenine}{digit}*|{digit}
|
||||
// signed_suffix [iI]{bit_size}?
|
||||
// unsigned_suffix [uU]{bit_size}?
|
||||
// bit_size 8|16|32|64
|
||||
//
|
||||
// {integer}{signed_suffix} -> SIGNED_LITERAL
|
||||
// {integer}{unsigned_suffix} -> UNSIGNED_LITERAL
|
||||
// {integer} -> SIGNED_LITERAL (width 0)
|
||||
|
||||
// Signed integer with suffix: {integer}{signed_suffix}
|
||||
// integer : [0-9]+
|
||||
// signed_suffix : [iI]{bit_size}?
|
||||
// bit_size : 8|16|32|64
|
||||
signed_integer_literal: ($) =>
|
||||
token(
|
||||
seq(
|
||||
/[0-9]+/, // integer
|
||||
/[iI]/, // signed suffix head
|
||||
optional(choice("8", "16", "32", "64")), // optional bit size
|
||||
),
|
||||
),
|
||||
token(seq(/[0-9]+/, /[iI]/, optional(choice("8", "16", "32", "64")))),
|
||||
|
||||
// Unsigned integer with suffix: {integer}{unsigned_suffix}
|
||||
// unsigned_suffix: [uU]{bit_size}?
|
||||
unsigned_integer_literal: ($) =>
|
||||
token(
|
||||
seq(
|
||||
/[0-9]+/, // integer
|
||||
/[uU]/, // unsigned suffix head
|
||||
optional(choice("8", "16", "32", "64")), // optional bit size
|
||||
),
|
||||
),
|
||||
token(seq(/[0-9]+/, /[uU]/, optional(choice("8", "16", "32", "64")))),
|
||||
|
||||
// Unsuffixed integer (still treated as signed in your Flex rules)
|
||||
plain_integer_literal: ($) => token(/[0-9]+/),
|
||||
|
||||
// --------------------
|
||||
// Char + string literals (with escapes)
|
||||
// --------------------
|
||||
//
|
||||
// simple_escape_sequence: \\[abfnrtv\\\'\"]
|
||||
// decimal_escape_sequence: \\[0]
|
||||
// escape: simple | decimal
|
||||
// c_char: [^\\'\n]
|
||||
// s_char: [^\\"\n]
|
||||
//
|
||||
// '{c_char|escape}'
|
||||
// "{s_char|escape}*"
|
||||
// Plain integer (no suffix)
|
||||
integer_literal: ($) => token(/[0-9]+/),
|
||||
|
||||
// Char literal: 'c' or '\n' etc.
|
||||
char_literal: ($) =>
|
||||
token(
|
||||
seq(
|
||||
@@ -214,6 +85,7 @@ module.exports = grammar({
|
||||
),
|
||||
),
|
||||
|
||||
// String literal: "text" with escapes
|
||||
string_literal: ($) =>
|
||||
token(
|
||||
seq(
|
||||
@@ -234,5 +106,294 @@ module.exports = grammar({
|
||||
'"',
|
||||
),
|
||||
),
|
||||
|
||||
// Combined integer constant used in expressions
|
||||
constant: ($) =>
|
||||
choice(
|
||||
$.signed_integer_literal,
|
||||
$.unsigned_integer_literal,
|
||||
$.integer_literal,
|
||||
$.string_literal,
|
||||
$.char_literal,
|
||||
$.predefined_constant,
|
||||
),
|
||||
|
||||
predefined_constant: ($) => choice("NULL", "true", "false"),
|
||||
|
||||
// <= / =< and >= / => are both accepted
|
||||
leq_operator: ($) => token(choice("<=", "=<")),
|
||||
geq_operator: ($) => token(choice(">=", "=>")),
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Expressions (port of Bison expression hierarchy)
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
base_expression: ($) =>
|
||||
choice($.identifier, $.constant, seq("(", $.expression, ")")),
|
||||
|
||||
// postfix_expression (Bison: left-recursive) -> base_expression plus a
|
||||
// sequence of postfix operations.
|
||||
postfix_expression: ($) =>
|
||||
seq(
|
||||
$.base_expression,
|
||||
repeat(
|
||||
choice(
|
||||
seq("[", $.expression, "]"), // subscript
|
||||
seq("(", optional($.argument_expression_list), ")"), // call
|
||||
"++", // post-inc
|
||||
"--", // post-dec
|
||||
),
|
||||
),
|
||||
),
|
||||
|
||||
// argument_expression_list (Bison: left-recursive list)
|
||||
argument_expression_list: ($) =>
|
||||
seq($.assignment_expression, repeat(seq(",", $.assignment_expression))),
|
||||
|
||||
unary_expression: ($) =>
|
||||
choice(
|
||||
$.postfix_expression,
|
||||
seq("++", $.unary_expression),
|
||||
seq("--", $.unary_expression),
|
||||
seq("+", $.unary_expression),
|
||||
seq("-", $.unary_expression),
|
||||
seq("!", $.unary_expression),
|
||||
),
|
||||
|
||||
cast_expression: ($) =>
|
||||
choice(
|
||||
$.unary_expression,
|
||||
seq("(", $.type_specifier_qualifier, ")", $.cast_expression),
|
||||
),
|
||||
|
||||
// multiplicative_expression: cast_expression (( * | / ) cast_expression)*
|
||||
multiplicative_expression: ($) =>
|
||||
seq($.cast_expression, repeat(seq(choice("*", "/"), $.cast_expression))),
|
||||
|
||||
// additive_expression: multiplicative_expression (( + | - ) multiplicative_expression)*
|
||||
additive_expression: ($) =>
|
||||
seq(
|
||||
$.multiplicative_expression,
|
||||
repeat(seq(choice("+", "-"), $.multiplicative_expression)),
|
||||
),
|
||||
|
||||
// relational_expression: additive_expression ( (< | > | <= | =< | >= | => ) additive_expression )*
|
||||
relational_expression: ($) =>
|
||||
seq(
|
||||
$.additive_expression,
|
||||
repeat(
|
||||
seq(
|
||||
choice("<", ">", $.leq_operator, $.geq_operator),
|
||||
$.additive_expression,
|
||||
),
|
||||
),
|
||||
),
|
||||
|
||||
// equality_expression: relational_expression ( (== | !=) relational_expression )*
|
||||
equality_expression: ($) =>
|
||||
seq(
|
||||
$.relational_expression,
|
||||
repeat(seq(choice("==", "!="), $.relational_expression)),
|
||||
),
|
||||
|
||||
// logical_expression: equality_expression ( (&& ||) equality_expression )*
|
||||
logical_expression: ($) =>
|
||||
seq(
|
||||
$.equality_expression,
|
||||
repeat(seq(choice("&&", "||"), $.equality_expression)),
|
||||
),
|
||||
|
||||
// assignment_expression:
|
||||
// logical_expression
|
||||
// | unary_expression ( = | += | -= | *= | /= ) assignment_expression
|
||||
assignment_expression: ($) =>
|
||||
choice(
|
||||
$.logical_expression,
|
||||
seq(
|
||||
$.unary_expression,
|
||||
choice("=", "+=", "-=", "*=", "/="),
|
||||
$.assignment_expression,
|
||||
),
|
||||
),
|
||||
|
||||
expression: ($) => $.assignment_expression,
|
||||
|
||||
constant_expression: ($) => $.logical_expression,
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Types / declarations (port of Bison decl/type rules)
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
declaration: ($) => seq($.declaration_specifier, $.init_declarator, ";"),
|
||||
|
||||
declaration_specifier: ($) => $.type_specifier_qualifier,
|
||||
|
||||
type_specifier_qualifier: ($) =>
|
||||
seq(optional($.type_qualifier_list), $.type_specifier),
|
||||
|
||||
type_specifier: ($) =>
|
||||
choice(
|
||||
"void",
|
||||
"bool",
|
||||
"char",
|
||||
"string",
|
||||
$.int_type,
|
||||
$.uint_type,
|
||||
$.array_specifier,
|
||||
),
|
||||
|
||||
// All signed integer type names that previously mapped to INT
|
||||
int_type: ($) =>
|
||||
choice(
|
||||
"int",
|
||||
"I8",
|
||||
"int8_t",
|
||||
"I16",
|
||||
"int16_t",
|
||||
"I32",
|
||||
"int32_t",
|
||||
"I64",
|
||||
"int64_t",
|
||||
),
|
||||
|
||||
// All unsigned integer type names that previously mapped to UINT
|
||||
uint_type: ($) =>
|
||||
choice(
|
||||
"uint",
|
||||
"U8",
|
||||
"uint8_t",
|
||||
"U16",
|
||||
"uint16_t",
|
||||
"U32",
|
||||
"uint32_t",
|
||||
"U64",
|
||||
"uint64_t",
|
||||
),
|
||||
|
||||
array_specifier: ($) =>
|
||||
seq($.type_specifier, "[", $.constant_expression, "]"),
|
||||
|
||||
type_qualifier_list: ($) => seq($.type_qualifier, repeat($.type_qualifier)),
|
||||
|
||||
type_qualifier: ($) => choice("const", "mut"),
|
||||
|
||||
init_declarator: ($) =>
|
||||
seq($.identifier, optional(seq("=", $.initializer))),
|
||||
|
||||
braced_initializer: ($) =>
|
||||
choice(seq("{", "}"), seq("{", $.initializer_list, "}")),
|
||||
|
||||
initializer: ($) => choice($.assignment_expression, $.braced_initializer),
|
||||
|
||||
initializer_list: ($) =>
|
||||
seq($.initializer, repeat(seq(",", $.initializer))),
|
||||
|
||||
function_signature: ($) =>
|
||||
choice(
|
||||
// (params) -> returns
|
||||
seq("(", $.parameter_list, ")", "->", $.return_list),
|
||||
// () -> returns
|
||||
seq("(", ")", "->", $.return_list),
|
||||
// (params) (implicit void)
|
||||
seq("(", $.parameter_list, ")"),
|
||||
// () (implicit void)
|
||||
seq("(", ")"),
|
||||
),
|
||||
|
||||
parameter_list: ($) =>
|
||||
seq($.parameter_declaration, repeat(seq(",", $.parameter_declaration))),
|
||||
|
||||
parameter_declaration: ($) => seq($.declaration_specifier, $.identifier),
|
||||
|
||||
return_list: ($) => $.declaration_specifier,
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Statements (port of Bison statement rules)
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
statement: ($) =>
|
||||
choice(
|
||||
$.expression_statement,
|
||||
$.primary_block,
|
||||
$.jump_statement,
|
||||
$.print_statement,
|
||||
),
|
||||
|
||||
expression_statement: ($) => seq(optional($.expression), ";"),
|
||||
|
||||
primary_block: ($) =>
|
||||
choice($.statement_block, $.selection_statement, $.iteration_statement),
|
||||
|
||||
// secondary_block is just a single statement
|
||||
secondary_block: ($) => $.statement,
|
||||
|
||||
statement_block: ($) =>
|
||||
choice(seq("{", $.block_item_list, "}"), seq("{", "}")),
|
||||
|
||||
block_item_list: ($) => repeat1($.block_item),
|
||||
|
||||
block_item: ($) => choice($.declaration, $.statement),
|
||||
|
||||
// if (...) stmt [else stmt]
|
||||
selection_statement: ($) =>
|
||||
prec.right(
|
||||
seq(
|
||||
"if",
|
||||
"(",
|
||||
$.expression,
|
||||
")",
|
||||
$.statement_block,
|
||||
optional(
|
||||
seq(
|
||||
"else",
|
||||
choice(
|
||||
$.statement_block,
|
||||
$.selection_statement, // else-if chain
|
||||
),
|
||||
),
|
||||
),
|
||||
),
|
||||
),
|
||||
|
||||
iteration_statement: ($) =>
|
||||
choice(
|
||||
// while (expr) statement
|
||||
seq("while", "(", $.expression, ")", $.secondary_block),
|
||||
|
||||
// for (opt_expr ; opt_expr ; opt_expr) statement
|
||||
seq(
|
||||
"for",
|
||||
"(",
|
||||
optional($.expression),
|
||||
";",
|
||||
optional($.expression),
|
||||
";",
|
||||
optional($.expression),
|
||||
")",
|
||||
$.secondary_block,
|
||||
),
|
||||
|
||||
// for (declaration opt_expr ; opt_expr) statement
|
||||
seq(
|
||||
"for",
|
||||
"(",
|
||||
$.declaration,
|
||||
optional($.expression),
|
||||
";",
|
||||
optional($.expression),
|
||||
")",
|
||||
$.secondary_block,
|
||||
),
|
||||
),
|
||||
|
||||
jump_statement: ($) =>
|
||||
choice(
|
||||
seq("continue", ";"),
|
||||
seq("break", ";"),
|
||||
seq("return", ";"),
|
||||
seq("return", $.expression, ";"),
|
||||
),
|
||||
|
||||
print_statement: ($) => seq("print", "(", $.expression, ")", ";"),
|
||||
},
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user