/**
 * @file A parser for the MC programming language
 * @author Matthias Unterrainer
 * @license Apache-2.0
 */

/// <reference types="tree-sitter-cli/dsl" />
// @ts-check

module.exports = grammar({
  name: "mc",

  // Whitespace and comments are skipped between tokens.
  extras: ($) => [
    /\s/, // space, tab, newline, etc.
    $.comment,
  ],

  rules: {
    // Temporary top-level rule: just a sequence of tokens.
    // Replace this with your real syntax later.
    source_file: ($) => repeat($._token),

    // Comments: // line and /* block */
    comment: ($) =>
      token(
        choice(
          // // line comment
          seq("//", /.*/),

          // /* block comment */
          // Standard Tree-sitter pattern for non-nested C-style comments.
          seq("/*", /[^*]*\*+([^/*][^*]*\*+)*/, "/"),
        ),
      ),

    // A catch-all for every token type we care about.
    _token: ($) =>
      choice(
        // --- Punctuation / basics ---
        ";",
        ",",
        ".",
        "->",

        "(",
        ")",
        "[",
        "]",
        "{",
        "}",

        // --- Arithmetic operators ---
        "++",
        "--",
        "+=",
        "-=",
        "*=",
        "/=",
        "+",
        "-",
        "*",
        "/",

        // --- Boolean / comparison operators ---
        "&&",
        "||",
        "!",
        "!=",
        "==",
        // Both <= and =< are accepted; same for >= and =>.
        choice("<=", "=<"),
        choice(">=", "=>"),
        "<",
        ">",

        "=",

        // --- Keywords ---
        "if",
        "else",

        "for",
        "while",

        "break",
        "continue",

        "return",

        "function",

        "print",

        "const",
        "mut",

        // Types
        "void",
        "bool",
        "_Bool",
        "char",

        "uint",
        "U8",
        "uint8_t",
        "U16",
        "uint16_t",
        "U32",
        "uint32_t",
        "U64",
        "uint64_t",

        "int",
        "I8",
        "int8_t",
        "I16",
        "int16_t",
        "I32",
        "int32_t",
        "I64",
        "int64_t",

        "string",

        // Literals / constants
        "NULL",
        "true",
        "false",

        // Identifiers and literal tokens:
        $.identifier,
        $.signed_integer_literal,
        $.unsigned_integer_literal,
        $.plain_integer_literal,
        $.char_literal,
        $.string_literal,
      ),

    // --------------------
    // Identifiers
    // --------------------

    // identifier: {character}({nondigit}|{digit})*
    // character: [a-zA-Z]
    // nondigit: _ | character
    identifier: ($) => /[a-zA-Z][a-zA-Z0-9_]*/,

    // --------------------
    // Integer literals (with suffixes)
    // --------------------
    //
    // Flex:
    // integer             {digits}
    // digits              {onenine}{digit}*|{digit}
    // signed_suffix       [iI]{bit_size}?
    // unsigned_suffix     [uU]{bit_size}?
    // bit_size            8|16|32|64
    //
    // {integer}{signed_suffix}     -> SIGNED_LITERAL
    // {integer}{unsigned_suffix}   -> UNSIGNED_LITERAL
    // {integer}                    -> SIGNED_LITERAL (width 0)

    signed_integer_literal: ($) =>
      token(
        seq(
          /[0-9]+/, // integer
          /[iI]/, // signed suffix head
          optional(choice("8", "16", "32", "64")), // optional bit size
        ),
      ),

    unsigned_integer_literal: ($) =>
      token(
        seq(
          /[0-9]+/, // integer
          /[uU]/, // unsigned suffix head
          optional(choice("8", "16", "32", "64")), // optional bit size
        ),
      ),

    // Unsuffixed integer (still treated as signed in your Flex rules)
    plain_integer_literal: ($) => token(/[0-9]+/),

    // --------------------
    // Char + string literals (with escapes)
    // --------------------
    //
    // simple_escape_sequence:    \\[abfnrtv\\\'\"]
    // decimal_escape_sequence:   \\[0]
    // escape:                    simple | decimal
    // c_char:                    [^\\'\n]
    // s_char:                    [^\\"\n]
    //
    // '{c_char|escape}'
    // "{s_char|escape}*"

    char_literal: ($) =>
      token(
        seq(
          "'",
          choice(
            /[^\\'\n]/, // c_char
            seq(
              "\\",
              choice(
                // escape
                /[abfnrtv\\'"]/, // simple escapes
                "0", // \0
              ),
            ),
          ),
          "'",
        ),
      ),

    string_literal: ($) =>
      token(
        seq(
          '"',
          repeat(
            choice(
              /[^\\\"\n]/, // s_char
              seq(
                "\\",
                choice(
                  // escape
                  /[abfnrtv\\'"]/, // simple escapes
                  "0", // \0
                ),
              ),
            ),
          ),
          '"',
        ),
      ),
  },
});