/** * @file A parser for the MC programming language * @author Matthias Unterrainer * @license Apache-2.0 */ /// // @ts-check module.exports = grammar({ name: "mc", // Whitespace and comments are skipped between tokens. extras: ($) => [ /\s/, // space, tab, newline, etc. $.comment, ], rules: { // ------------------------------------------------------------------------ // Top level (Bison: program) // ------------------------------------------------------------------------ source_file: ($) => repeat($.translation_entity), translation_entity: ($) => choice($.declaration, $.function_definition), function_definition: ($) => seq( "function", field("name", $.identifier), field("signature", $.function_signature), field("body", $.statement_block), ), // ------------------------------------------------------------------------ // Comments // ------------------------------------------------------------------------ comment: ($) => token( choice( // // line comment seq("//", /.*/), // /* block comment */ (non-nested) seq("/*", /[^*]*\*+([^/*][^*]*\*+)*/, "/"), ), ), // ------------------------------------------------------------------------ // Identifiers & literals (port of Flex patterns) // ------------------------------------------------------------------------ // identifier: {character}({nondigit}|{digit})* identifier: ($) => /[a-zA-Z][a-zA-Z0-9_]*/, // Signed integer with suffix: {integer}{signed_suffix} // integer : [0-9]+ // signed_suffix : [iI]{bit_size}? // bit_size : 8|16|32|64 signed_integer_literal: ($) => token(seq(/[0-9]+/, /[iI]/, optional(choice("8", "16", "32", "64")))), // Unsigned integer with suffix: {integer}{unsigned_suffix} // unsigned_suffix: [uU]{bit_size}? unsigned_integer_literal: ($) => token(seq(/[0-9]+/, /[uU]/, optional(choice("8", "16", "32", "64")))), // Plain integer (no suffix) integer_literal: ($) => token(/[0-9]+/), // Char literal: 'c' or '\n' etc. char_literal: ($) => token( seq( "'", choice( /[^\\'\n]/, // c_char seq( "\\", choice( // escape /[abfnrtv\\'"]/, // simple escapes "0", // \0 ), ), ), "'", ), ), // String literal: "text" with escapes string_literal: ($) => token( seq( '"', repeat( choice( /[^\\\"\n]/, // s_char seq( "\\", choice( // escape /[abfnrtv\\'"]/, // simple escapes "0", // \0 ), ), ), ), '"', ), ), // Combined integer constant used in expressions constant: ($) => choice( $.signed_integer_literal, $.unsigned_integer_literal, $.integer_literal, $.string_literal, $.char_literal, $.predefined_constant, ), predefined_constant: ($) => choice("NULL", "true", "false"), // <= / =< and >= / => are both accepted leq_operator: ($) => token(choice("<=", "=<")), geq_operator: ($) => token(choice(">=", "=>")), // ------------------------------------------------------------------------ // Expressions (port of Bison expression hierarchy) // ------------------------------------------------------------------------ base_expression: ($) => choice( $.identifier, $.constant, seq("(", field("expression", $.expression), ")"), ), // postfix_expression (Bison: left-recursive) -> base_expression plus a // sequence of postfix operations. postfix_expression: ($) => seq( field("base", $.base_expression), repeat( choice( seq("[", field("index", $.expression), "]"), // subscript seq("(", optional(field("arguments", $.argument_expression_list)), ")"), // call "++", // post-inc "--", // post-dec ), ), ), // argument_expression_list (Bison: left-recursive list) argument_expression_list: ($) => seq($.assignment_expression, repeat(seq(",", $.assignment_expression))), unary_expression: ($) => choice( $.postfix_expression, seq("++", field("operand", $.unary_expression)), seq("--", field("operand", $.unary_expression)), seq("+", field("operand", $.unary_expression)), seq("-", field("operand", $.unary_expression)), seq("!", field("operand", $.unary_expression)), ), cast_expression: ($) => choice( $.unary_expression, seq( "(", field("type", $.type_specifier_qualifier), ")", field("value", $.cast_expression), ), ), // multiplicative_expression: cast_expression (( * | / ) cast_expression)* multiplicative_expression: ($) => seq( field("left", $.cast_expression), repeat(seq(choice("*", "/"), field("right", $.cast_expression))), ), // additive_expression: multiplicative_expression (( + | - ) multiplicative_expression)* additive_expression: ($) => seq( field("left", $.multiplicative_expression), repeat(seq(choice("+", "-"), field("right", $.multiplicative_expression))), ), // relational_expression: additive_expression ( (< | > | <= | =< | >= | => ) additive_expression )* relational_expression: ($) => seq( field("left", $.additive_expression), repeat( seq( choice("<", ">", $.leq_operator, $.geq_operator), field("right", $.additive_expression), ), ), ), // equality_expression: relational_expression ( (== | !=) relational_expression )* equality_expression: ($) => seq( field("left", $.relational_expression), repeat(seq(choice("==", "!="), field("right", $.relational_expression))), ), // logical_expression: equality_expression ( (&& ||) equality_expression )* logical_expression: ($) => seq( field("left", $.equality_expression), repeat(seq(choice("&&", "||"), field("right", $.equality_expression))), ), // assignment_expression: // logical_expression // | unary_expression ( = | += | -= | *= | /= ) assignment_expression assignment_expression: ($) => choice( $.logical_expression, seq( field("left", $.unary_expression), field("operator", choice("=", "+=", "-=", "*=", "/=")), field("right", $.assignment_expression), ), ), expression: ($) => $.assignment_expression, constant_expression: ($) => $.logical_expression, // ------------------------------------------------------------------------ // Types / declarations (port of Bison decl/type rules) // ------------------------------------------------------------------------ declaration: ($) => seq( field("type", $.declaration_specifier), field("declarator", $.init_declarator), ";", ), declaration_specifier: ($) => $.type_specifier_qualifier, type_specifier_qualifier: ($) => seq(optional($.type_qualifier_list), $.type_specifier), type_specifier: ($) => choice( "void", "bool", "char", "string", $.int_type, $.uint_type, $.array_specifier, ), // All signed integer type names that previously mapped to INT int_type: ($) => choice( "int", "I8", "int8_t", "I16", "int16_t", "I32", "int32_t", "I64", "int64_t", ), // All unsigned integer type names that previously mapped to UINT uint_type: ($) => choice( "uint", "U8", "uint8_t", "U16", "uint16_t", "U32", "uint32_t", "U64", "uint64_t", ), array_specifier: ($) => seq( field("element", $.type_specifier), "[", field("size", $.constant_expression), "]", ), type_qualifier_list: ($) => seq($.type_qualifier, repeat($.type_qualifier)), type_qualifier: ($) => choice("const", "mut"), init_declarator: ($) => seq( field("name", $.identifier), optional(seq("=", field("value", $.initializer))), ), braced_initializer: ($) => choice(seq("{", "}"), seq("{", $.initializer_list, "}")), initializer: ($) => choice($.assignment_expression, $.braced_initializer), initializer_list: ($) => seq($.initializer, repeat(seq(",", $.initializer))), function_signature: ($) => choice( // (params) -> returns seq( "(", field("parameters", $.parameter_list), ")", "->", field("return_type", $.return_list), ), // () -> returns seq("(", ")", "->", field("return_type", $.return_list)), // (params) (implicit void) seq("(", field("parameters", $.parameter_list), ")"), // () (implicit void) seq("(", ")"), ), parameter_list: ($) => seq($.parameter_declaration, repeat(seq(",", $.parameter_declaration))), parameter_declaration: ($) => seq(field("type", $.declaration_specifier), field("name", $.identifier)), return_list: ($) => $.declaration_specifier, // ------------------------------------------------------------------------ // Statements (port of Bison statement rules) // ------------------------------------------------------------------------ statement: ($) => choice( $.expression_statement, $.primary_block, $.jump_statement, $.print_statement, ), expression_statement: ($) => seq(optional(field("expression", $.expression)), ";"), primary_block: ($) => choice($.statement_block, $.selection_statement, $.iteration_statement), // secondary_block is just a single statement secondary_block: ($) => $.statement, statement_block: ($) => choice(seq("{", $.block_item_list, "}"), seq("{", "}")), block_item_list: ($) => repeat1($.block_item), block_item: ($) => choice($.declaration, $.statement), // if (...) stmt [else stmt] selection_statement: ($) => prec.right( seq( "if", "(", field("condition", $.expression), ")", field("consequence", $.statement_block), optional( seq( "else", field( "alternative", choice( $.statement_block, $.selection_statement, // else-if chain ), ), ), ), ), ), iteration_statement: ($) => choice( // while (expr) statement seq( "while", "(", field("condition", $.expression), ")", field("body", $.secondary_block), ), // for (opt_expr ; opt_expr ; opt_expr) statement seq( "for", "(", optional(field("initializer", $.expression)), ";", optional(field("condition", $.expression)), ";", optional(field("update", $.expression)), ")", field("body", $.secondary_block), ), // for (declaration opt_expr ; opt_expr) statement seq( "for", "(", field("initializer", $.declaration), optional(field("condition", $.expression)), ";", optional(field("update", $.expression)), ")", field("body", $.secondary_block), ), ), jump_statement: ($) => choice( seq("continue", ";"), seq("break", ";"), seq("return", ";"), seq("return", field("value", $.expression), ";"), ), print_statement: ($) => seq("print", "(", field("value", $.expression), ")", ";"), }, });