/** * @file A parser for the MC programming language * @author Matthias Unterrainer * @license Apache-2.0 */ /// // @ts-check module.exports = grammar({ name: "mc", // Whitespace and comments are skipped between tokens. extras: ($) => [ /\s/, // space, tab, newline, etc. $.comment, ], rules: { // Temporary top-level rule: just a sequence of tokens. // Replace this with your real syntax later. source_file: ($) => repeat($._token), // Comments: // line and /* block */ comment: ($) => token( choice( // // line comment seq("//", /.*/), // /* block comment */ // Standard Tree-sitter pattern for non-nested C-style comments. seq("/*", /[^*]*\*+([^/*][^*]*\*+)*/, "/"), ), ), // A catch-all for every token type we care about. _token: ($) => choice( // --- Punctuation / basics --- ";", ",", ".", "->", "(", ")", "[", "]", "{", "}", // --- Arithmetic operators --- "++", "--", "+=", "-=", "*=", "/=", "+", "-", "*", "/", // --- Boolean / comparison operators --- "&&", "||", "!", "!=", "==", // Both <= and =< are accepted; same for >= and =>. choice("<=", "=<"), choice(">=", "=>"), "<", ">", "=", // --- Keywords --- "if", "else", "for", "while", "break", "continue", "return", "function", "print", "const", "mut", // Types "void", "bool", "_Bool", "char", "uint", "U8", "uint8_t", "U16", "uint16_t", "U32", "uint32_t", "U64", "uint64_t", "int", "I8", "int8_t", "I16", "int16_t", "I32", "int32_t", "I64", "int64_t", "string", // Literals / constants "NULL", "true", "false", // Identifiers and literal tokens: $.identifier, $.signed_integer_literal, $.unsigned_integer_literal, $.plain_integer_literal, $.char_literal, $.string_literal, ), // -------------------- // Identifiers // -------------------- // identifier: {character}({nondigit}|{digit})* // character: [a-zA-Z] // nondigit: _ | character identifier: ($) => /[a-zA-Z][a-zA-Z0-9_]*/, // -------------------- // Integer literals (with suffixes) // -------------------- // // Flex: // integer {digits} // digits {onenine}{digit}*|{digit} // signed_suffix [iI]{bit_size}? // unsigned_suffix [uU]{bit_size}? // bit_size 8|16|32|64 // // {integer}{signed_suffix} -> SIGNED_LITERAL // {integer}{unsigned_suffix} -> UNSIGNED_LITERAL // {integer} -> SIGNED_LITERAL (width 0) signed_integer_literal: ($) => token( seq( /[0-9]+/, // integer /[iI]/, // signed suffix head optional(choice("8", "16", "32", "64")), // optional bit size ), ), unsigned_integer_literal: ($) => token( seq( /[0-9]+/, // integer /[uU]/, // unsigned suffix head optional(choice("8", "16", "32", "64")), // optional bit size ), ), // Unsuffixed integer (still treated as signed in your Flex rules) plain_integer_literal: ($) => token(/[0-9]+/), // -------------------- // Char + string literals (with escapes) // -------------------- // // simple_escape_sequence: \\[abfnrtv\\\'\"] // decimal_escape_sequence: \\[0] // escape: simple | decimal // c_char: [^\\'\n] // s_char: [^\\"\n] // // '{c_char|escape}' // "{s_char|escape}*" char_literal: ($) => token( seq( "'", choice( /[^\\'\n]/, // c_char seq( "\\", choice( // escape /[abfnrtv\\'"]/, // simple escapes "0", // \0 ), ), ), "'", ), ), string_literal: ($) => token( seq( '"', repeat( choice( /[^\\\"\n]/, // s_char seq( "\\", choice( // escape /[abfnrtv\\'"]/, // simple escapes "0", // \0 ), ), ), ), '"', ), ), }, });