Files
tree-sitter-mc/grammar.js

448 lines
12 KiB
JavaScript

/**
* @file A parser for the MC programming language
* @author Matthias Unterrainer
* @license Apache-2.0
*/
/// <reference types="tree-sitter-cli/dsl" />
// @ts-check
module.exports = grammar({
name: "mc",
// Whitespace and comments are skipped between tokens.
extras: ($) => [
/\s/, // space, tab, newline, etc.
$.comment,
],
rules: {
// ------------------------------------------------------------------------
// Top level (Bison: program)
// ------------------------------------------------------------------------
source_file: ($) => repeat($.translation_entity),
translation_entity: ($) => choice($.declaration, $.function_definition),
function_definition: ($) =>
seq(
"function",
field("name", $.identifier),
field("signature", $.function_signature),
field("body", $.statement_block),
),
// ------------------------------------------------------------------------
// Comments
// ------------------------------------------------------------------------
comment: ($) =>
token(
choice(
// // line comment
seq("//", /.*/),
// /* block comment */ (non-nested)
seq("/*", /[^*]*\*+([^/*][^*]*\*+)*/, "/"),
),
),
// ------------------------------------------------------------------------
// Identifiers & literals (port of Flex patterns)
// ------------------------------------------------------------------------
// identifier: {character}({nondigit}|{digit})*
identifier: ($) => /[a-zA-Z][a-zA-Z0-9_]*/,
// Signed integer with suffix: {integer}{signed_suffix}
// integer : [0-9]+
// signed_suffix : [iI]{bit_size}?
// bit_size : 8|16|32|64
signed_integer_literal: ($) =>
token(seq(/[0-9]+/, /[iI]/, optional(choice("8", "16", "32", "64")))),
// Unsigned integer with suffix: {integer}{unsigned_suffix}
// unsigned_suffix: [uU]{bit_size}?
unsigned_integer_literal: ($) =>
token(seq(/[0-9]+/, /[uU]/, optional(choice("8", "16", "32", "64")))),
// Plain integer (no suffix)
integer_literal: ($) => token(/[0-9]+/),
// Char literal: 'c' or '\n' etc.
char_literal: ($) =>
token(
seq(
"'",
choice(
/[^\\'\n]/, // c_char
seq(
"\\",
choice(
// escape
/[abfnrtv\\'"]/, // simple escapes
"0", // \0
),
),
),
"'",
),
),
// String literal: "text" with escapes
string_literal: ($) =>
token(
seq(
'"',
repeat(
choice(
/[^\\\"\n]/, // s_char
seq(
"\\",
choice(
// escape
/[abfnrtv\\'"]/, // simple escapes
"0", // \0
),
),
),
),
'"',
),
),
// Combined integer constant used in expressions
constant: ($) =>
choice(
$.signed_integer_literal,
$.unsigned_integer_literal,
$.integer_literal,
$.string_literal,
$.char_literal,
$.predefined_constant,
),
predefined_constant: ($) => choice("NULL", "true", "false"),
// <= / =< and >= / => are both accepted
leq_operator: ($) => token(choice("<=", "=<")),
geq_operator: ($) => token(choice(">=", "=>")),
// ------------------------------------------------------------------------
// Expressions (port of Bison expression hierarchy)
// ------------------------------------------------------------------------
base_expression: ($) =>
choice(
$.identifier,
$.constant,
seq("(", field("expression", $.expression), ")"),
),
// postfix_expression (Bison: left-recursive) -> base_expression plus a
// sequence of postfix operations.
postfix_expression: ($) =>
seq(
field("base", $.base_expression),
repeat(
choice(
seq("[", field("index", $.expression), "]"), // subscript
seq("(", optional(field("arguments", $.argument_expression_list)), ")"), // call
"++", // post-inc
"--", // post-dec
),
),
),
// argument_expression_list (Bison: left-recursive list)
argument_expression_list: ($) =>
seq($.assignment_expression, repeat(seq(",", $.assignment_expression))),
unary_expression: ($) =>
choice(
$.postfix_expression,
seq("++", field("operand", $.unary_expression)),
seq("--", field("operand", $.unary_expression)),
seq("+", field("operand", $.unary_expression)),
seq("-", field("operand", $.unary_expression)),
seq("!", field("operand", $.unary_expression)),
),
cast_expression: ($) =>
choice(
$.unary_expression,
seq(
"(",
field("type", $.type_specifier_qualifier),
")",
field("value", $.cast_expression),
),
),
// multiplicative_expression: cast_expression (( * | / ) cast_expression)*
multiplicative_expression: ($) =>
seq(
field("left", $.cast_expression),
repeat(seq(choice("*", "/"), field("right", $.cast_expression))),
),
// additive_expression: multiplicative_expression (( + | - ) multiplicative_expression)*
additive_expression: ($) =>
seq(
field("left", $.multiplicative_expression),
repeat(seq(choice("+", "-"), field("right", $.multiplicative_expression))),
),
// relational_expression: additive_expression ( (< | > | <= | =< | >= | => ) additive_expression )*
relational_expression: ($) =>
seq(
field("left", $.additive_expression),
repeat(
seq(
choice("<", ">", $.leq_operator, $.geq_operator),
field("right", $.additive_expression),
),
),
),
// equality_expression: relational_expression ( (== | !=) relational_expression )*
equality_expression: ($) =>
seq(
field("left", $.relational_expression),
repeat(seq(choice("==", "!="), field("right", $.relational_expression))),
),
// logical_expression: equality_expression ( (&& ||) equality_expression )*
logical_expression: ($) =>
seq(
field("left", $.equality_expression),
repeat(seq(choice("&&", "||"), field("right", $.equality_expression))),
),
// assignment_expression:
// logical_expression
// | unary_expression ( = | += | -= | *= | /= ) assignment_expression
assignment_expression: ($) =>
choice(
$.logical_expression,
seq(
field("left", $.unary_expression),
field("operator", choice("=", "+=", "-=", "*=", "/=")),
field("right", $.assignment_expression),
),
),
expression: ($) => $.assignment_expression,
constant_expression: ($) => $.logical_expression,
// ------------------------------------------------------------------------
// Types / declarations (port of Bison decl/type rules)
// ------------------------------------------------------------------------
declaration: ($) =>
seq(
field("type", $.declaration_specifier),
field("declarator", $.init_declarator),
";",
),
declaration_specifier: ($) => $.type_specifier_qualifier,
type_specifier_qualifier: ($) =>
seq(optional($.type_qualifier_list), $.type_specifier),
type_specifier: ($) =>
choice(
"void",
"bool",
"char",
"string",
$.int_type,
$.uint_type,
$.array_specifier,
),
// All signed integer type names that previously mapped to INT
int_type: ($) =>
choice(
"int",
"I8",
"int8_t",
"I16",
"int16_t",
"I32",
"int32_t",
"I64",
"int64_t",
),
// All unsigned integer type names that previously mapped to UINT
uint_type: ($) =>
choice(
"uint",
"U8",
"uint8_t",
"U16",
"uint16_t",
"U32",
"uint32_t",
"U64",
"uint64_t",
),
array_specifier: ($) =>
seq(
field("element", $.type_specifier),
"[",
field("size", $.constant_expression),
"]",
),
type_qualifier_list: ($) => seq($.type_qualifier, repeat($.type_qualifier)),
type_qualifier: ($) => choice("const", "mut"),
init_declarator: ($) =>
seq(
field("name", $.identifier),
optional(seq("=", field("value", $.initializer))),
),
braced_initializer: ($) =>
choice(seq("{", "}"), seq("{", $.initializer_list, "}")),
initializer: ($) => choice($.assignment_expression, $.braced_initializer),
initializer_list: ($) =>
seq($.initializer, repeat(seq(",", $.initializer))),
function_signature: ($) =>
choice(
// (params) -> returns
seq(
"(",
field("parameters", $.parameter_list),
")",
"->",
field("return_type", $.return_list),
),
// () -> returns
seq("(", ")", "->", field("return_type", $.return_list)),
// (params) (implicit void)
seq("(", field("parameters", $.parameter_list), ")"),
// () (implicit void)
seq("(", ")"),
),
parameter_list: ($) =>
seq($.parameter_declaration, repeat(seq(",", $.parameter_declaration))),
parameter_declaration: ($) =>
seq(field("type", $.declaration_specifier), field("name", $.identifier)),
return_list: ($) => $.declaration_specifier,
// ------------------------------------------------------------------------
// Statements (port of Bison statement rules)
// ------------------------------------------------------------------------
statement: ($) =>
choice(
$.expression_statement,
$.primary_block,
$.jump_statement,
$.print_statement,
),
expression_statement: ($) =>
seq(optional(field("expression", $.expression)), ";"),
primary_block: ($) =>
choice($.statement_block, $.selection_statement, $.iteration_statement),
// secondary_block is just a single statement
secondary_block: ($) => $.statement,
statement_block: ($) =>
choice(seq("{", $.block_item_list, "}"), seq("{", "}")),
block_item_list: ($) => repeat1($.block_item),
block_item: ($) => choice($.declaration, $.statement),
// if (...) stmt [else stmt]
selection_statement: ($) =>
prec.right(
seq(
"if",
"(",
field("condition", $.expression),
")",
field("consequence", $.statement_block),
optional(
seq(
"else",
field(
"alternative",
choice(
$.statement_block,
$.selection_statement, // else-if chain
),
),
),
),
),
),
iteration_statement: ($) =>
choice(
// while (expr) statement
seq(
"while",
"(",
field("condition", $.expression),
")",
field("body", $.secondary_block),
),
// for (opt_expr ; opt_expr ; opt_expr) statement
seq(
"for",
"(",
optional(field("initializer", $.expression)),
";",
optional(field("condition", $.expression)),
";",
optional(field("update", $.expression)),
")",
field("body", $.secondary_block),
),
// for (declaration opt_expr ; opt_expr) statement
seq(
"for",
"(",
field("initializer", $.declaration),
optional(field("condition", $.expression)),
";",
optional(field("update", $.expression)),
")",
field("body", $.secondary_block),
),
),
jump_statement: ($) =>
choice(
seq("continue", ";"),
seq("break", ";"),
seq("return", ";"),
seq("return", field("value", $.expression), ";"),
),
print_statement: ($) =>
seq("print", "(", field("value", $.expression), ")", ";"),
},
});