diff --git a/grammar.js b/grammar.js index 6eef533..414a63c 100644 --- a/grammar.js +++ b/grammar.js @@ -17,184 +17,55 @@ module.exports = grammar({ ], rules: { - // Temporary top-level rule: just a sequence of tokens. - // Replace this with your real syntax later. - source_file: ($) => repeat($._token), + // ------------------------------------------------------------------------ + // Top level (Bison: program) + // ------------------------------------------------------------------------ + + source_file: ($) => repeat($.translation_entity), + + translation_entity: ($) => choice($.declaration, $.function_definition), + + function_definition: ($) => + seq("function", $.identifier, $.function_signature, $.statement_block), + + // ------------------------------------------------------------------------ + // Comments + // ------------------------------------------------------------------------ - // Comments: // line and /* block */ comment: ($) => token( choice( // // line comment seq("//", /.*/), - // /* block comment */ - // Standard Tree-sitter pattern for non-nested C-style comments. + // /* block comment */ (non-nested) seq("/*", /[^*]*\*+([^/*][^*]*\*+)*/, "/"), ), ), - // A catch-all for every token type we care about. - _token: ($) => - choice( - // --- Punctuation / basics --- - ";", - ",", - ".", - "->", - - "(", - ")", - "[", - "]", - "{", - "}", - - // --- Arithmetic operators --- - "++", - "--", - "+=", - "-=", - "*=", - "/=", - "+", - "-", - "*", - "/", - - // --- Boolean / comparison operators --- - "&&", - "||", - "!", - "!=", - "==", - // Both <= and =< are accepted; same for >= and =>. - choice("<=", "=<"), - choice(">=", "=>"), - "<", - ">", - - "=", - - // --- Keywords --- - "if", - "else", - - "for", - "while", - - "break", - "continue", - - "return", - - "function", - - "print", - - "const", - "mut", - - // Types - "void", - "bool", - "_Bool", - "char", - - "uint", - "U8", - "uint8_t", - "U16", - "uint16_t", - "U32", - "uint32_t", - "U64", - "uint64_t", - - "int", - "I8", - "int8_t", - "I16", - "int16_t", - "I32", - "int32_t", - "I64", - "int64_t", - - "string", - - // Literals / constants - "NULL", - "true", - "false", - - // Identifiers and literal tokens: - $.identifier, - $.signed_integer_literal, - $.unsigned_integer_literal, - $.plain_integer_literal, - $.char_literal, - $.string_literal, - ), - - // -------------------- - // Identifiers - // -------------------- + // ------------------------------------------------------------------------ + // Identifiers & literals (port of Flex patterns) + // ------------------------------------------------------------------------ // identifier: {character}({nondigit}|{digit})* - // character: [a-zA-Z] - // nondigit: _ | character identifier: ($) => /[a-zA-Z][a-zA-Z0-9_]*/, - // -------------------- - // Integer literals (with suffixes) - // -------------------- - // - // Flex: - // integer {digits} - // digits {onenine}{digit}*|{digit} - // signed_suffix [iI]{bit_size}? - // unsigned_suffix [uU]{bit_size}? - // bit_size 8|16|32|64 - // - // {integer}{signed_suffix} -> SIGNED_LITERAL - // {integer}{unsigned_suffix} -> UNSIGNED_LITERAL - // {integer} -> SIGNED_LITERAL (width 0) - + // Signed integer with suffix: {integer}{signed_suffix} + // integer : [0-9]+ + // signed_suffix : [iI]{bit_size}? + // bit_size : 8|16|32|64 signed_integer_literal: ($) => - token( - seq( - /[0-9]+/, // integer - /[iI]/, // signed suffix head - optional(choice("8", "16", "32", "64")), // optional bit size - ), - ), + token(seq(/[0-9]+/, /[iI]/, optional(choice("8", "16", "32", "64")))), + // Unsigned integer with suffix: {integer}{unsigned_suffix} + // unsigned_suffix: [uU]{bit_size}? unsigned_integer_literal: ($) => - token( - seq( - /[0-9]+/, // integer - /[uU]/, // unsigned suffix head - optional(choice("8", "16", "32", "64")), // optional bit size - ), - ), + token(seq(/[0-9]+/, /[uU]/, optional(choice("8", "16", "32", "64")))), - // Unsuffixed integer (still treated as signed in your Flex rules) - plain_integer_literal: ($) => token(/[0-9]+/), - - // -------------------- - // Char + string literals (with escapes) - // -------------------- - // - // simple_escape_sequence: \\[abfnrtv\\\'\"] - // decimal_escape_sequence: \\[0] - // escape: simple | decimal - // c_char: [^\\'\n] - // s_char: [^\\"\n] - // - // '{c_char|escape}' - // "{s_char|escape}*" + // Plain integer (no suffix) + integer_literal: ($) => token(/[0-9]+/), + // Char literal: 'c' or '\n' etc. char_literal: ($) => token( seq( @@ -214,6 +85,7 @@ module.exports = grammar({ ), ), + // String literal: "text" with escapes string_literal: ($) => token( seq( @@ -234,5 +106,294 @@ module.exports = grammar({ '"', ), ), + + // Combined integer constant used in expressions + constant: ($) => + choice( + $.signed_integer_literal, + $.unsigned_integer_literal, + $.integer_literal, + $.string_literal, + $.char_literal, + $.predefined_constant, + ), + + predefined_constant: ($) => choice("NULL", "true", "false"), + + // <= / =< and >= / => are both accepted + leq_operator: ($) => token(choice("<=", "=<")), + geq_operator: ($) => token(choice(">=", "=>")), + + // ------------------------------------------------------------------------ + // Expressions (port of Bison expression hierarchy) + // ------------------------------------------------------------------------ + + base_expression: ($) => + choice($.identifier, $.constant, seq("(", $.expression, ")")), + + // postfix_expression (Bison: left-recursive) -> base_expression plus a + // sequence of postfix operations. + postfix_expression: ($) => + seq( + $.base_expression, + repeat( + choice( + seq("[", $.expression, "]"), // subscript + seq("(", optional($.argument_expression_list), ")"), // call + "++", // post-inc + "--", // post-dec + ), + ), + ), + + // argument_expression_list (Bison: left-recursive list) + argument_expression_list: ($) => + seq($.assignment_expression, repeat(seq(",", $.assignment_expression))), + + unary_expression: ($) => + choice( + $.postfix_expression, + seq("++", $.unary_expression), + seq("--", $.unary_expression), + seq("+", $.unary_expression), + seq("-", $.unary_expression), + seq("!", $.unary_expression), + ), + + cast_expression: ($) => + choice( + $.unary_expression, + seq("(", $.type_specifier_qualifier, ")", $.cast_expression), + ), + + // multiplicative_expression: cast_expression (( * | / ) cast_expression)* + multiplicative_expression: ($) => + seq($.cast_expression, repeat(seq(choice("*", "/"), $.cast_expression))), + + // additive_expression: multiplicative_expression (( + | - ) multiplicative_expression)* + additive_expression: ($) => + seq( + $.multiplicative_expression, + repeat(seq(choice("+", "-"), $.multiplicative_expression)), + ), + + // relational_expression: additive_expression ( (< | > | <= | =< | >= | => ) additive_expression )* + relational_expression: ($) => + seq( + $.additive_expression, + repeat( + seq( + choice("<", ">", $.leq_operator, $.geq_operator), + $.additive_expression, + ), + ), + ), + + // equality_expression: relational_expression ( (== | !=) relational_expression )* + equality_expression: ($) => + seq( + $.relational_expression, + repeat(seq(choice("==", "!="), $.relational_expression)), + ), + + // logical_expression: equality_expression ( (&& ||) equality_expression )* + logical_expression: ($) => + seq( + $.equality_expression, + repeat(seq(choice("&&", "||"), $.equality_expression)), + ), + + // assignment_expression: + // logical_expression + // | unary_expression ( = | += | -= | *= | /= ) assignment_expression + assignment_expression: ($) => + choice( + $.logical_expression, + seq( + $.unary_expression, + choice("=", "+=", "-=", "*=", "/="), + $.assignment_expression, + ), + ), + + expression: ($) => $.assignment_expression, + + constant_expression: ($) => $.logical_expression, + + // ------------------------------------------------------------------------ + // Types / declarations (port of Bison decl/type rules) + // ------------------------------------------------------------------------ + + declaration: ($) => seq($.declaration_specifier, $.init_declarator, ";"), + + declaration_specifier: ($) => $.type_specifier_qualifier, + + type_specifier_qualifier: ($) => + seq(optional($.type_qualifier_list), $.type_specifier), + + type_specifier: ($) => + choice( + "void", + "bool", + "char", + "string", + $.int_type, + $.uint_type, + $.array_specifier, + ), + + // All signed integer type names that previously mapped to INT + int_type: ($) => + choice( + "int", + "I8", + "int8_t", + "I16", + "int16_t", + "I32", + "int32_t", + "I64", + "int64_t", + ), + + // All unsigned integer type names that previously mapped to UINT + uint_type: ($) => + choice( + "uint", + "U8", + "uint8_t", + "U16", + "uint16_t", + "U32", + "uint32_t", + "U64", + "uint64_t", + ), + + array_specifier: ($) => + seq($.type_specifier, "[", $.constant_expression, "]"), + + type_qualifier_list: ($) => seq($.type_qualifier, repeat($.type_qualifier)), + + type_qualifier: ($) => choice("const", "mut"), + + init_declarator: ($) => + seq($.identifier, optional(seq("=", $.initializer))), + + braced_initializer: ($) => + choice(seq("{", "}"), seq("{", $.initializer_list, "}")), + + initializer: ($) => choice($.assignment_expression, $.braced_initializer), + + initializer_list: ($) => + seq($.initializer, repeat(seq(",", $.initializer))), + + function_signature: ($) => + choice( + // (params) -> returns + seq("(", $.parameter_list, ")", "->", $.return_list), + // () -> returns + seq("(", ")", "->", $.return_list), + // (params) (implicit void) + seq("(", $.parameter_list, ")"), + // () (implicit void) + seq("(", ")"), + ), + + parameter_list: ($) => + seq($.parameter_declaration, repeat(seq(",", $.parameter_declaration))), + + parameter_declaration: ($) => seq($.declaration_specifier, $.identifier), + + return_list: ($) => $.declaration_specifier, + + // ------------------------------------------------------------------------ + // Statements (port of Bison statement rules) + // ------------------------------------------------------------------------ + + statement: ($) => + choice( + $.expression_statement, + $.primary_block, + $.jump_statement, + $.print_statement, + ), + + expression_statement: ($) => seq(optional($.expression), ";"), + + primary_block: ($) => + choice($.statement_block, $.selection_statement, $.iteration_statement), + + // secondary_block is just a single statement + secondary_block: ($) => $.statement, + + statement_block: ($) => + choice(seq("{", $.block_item_list, "}"), seq("{", "}")), + + block_item_list: ($) => repeat1($.block_item), + + block_item: ($) => choice($.declaration, $.statement), + + // if (...) stmt [else stmt] + selection_statement: ($) => + prec.right( + seq( + "if", + "(", + $.expression, + ")", + $.statement_block, + optional( + seq( + "else", + choice( + $.statement_block, + $.selection_statement, // else-if chain + ), + ), + ), + ), + ), + + iteration_statement: ($) => + choice( + // while (expr) statement + seq("while", "(", $.expression, ")", $.secondary_block), + + // for (opt_expr ; opt_expr ; opt_expr) statement + seq( + "for", + "(", + optional($.expression), + ";", + optional($.expression), + ";", + optional($.expression), + ")", + $.secondary_block, + ), + + // for (declaration opt_expr ; opt_expr) statement + seq( + "for", + "(", + $.declaration, + optional($.expression), + ";", + optional($.expression), + ")", + $.secondary_block, + ), + ), + + jump_statement: ($) => + choice( + seq("continue", ";"), + seq("break", ";"), + seq("return", ";"), + seq("return", $.expression, ";"), + ), + + print_statement: ($) => seq("print", "(", $.expression, ")", ";"), }, });