third_party_cangjie_runtime/stdlib/libs/std/ast/tokens.cj-代码预览-third_party_cangjie_runtime:基于 OpenHarmony 的仓颉运行时与标准库项目 - AtomGit

Wweinwfeat(cangjie_runtime): sync from upstream v1.2.0-alpha.06
/*
 * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
 * This source file is part of the Cangjie project, licensed under Apache-2.0
 * with Runtime Library Exception.
 *
 * See https://cangjie-lang.cn/pages/LICENSE for license information.
 */

// The Cangjie API is in Beta. For details on its capabilities and limitations, please refer to the README file.

package std.ast

import std.collection.ArrayList

public class TokensIterator <: Iterator<Token> {
    /* Fields */
    private var position: Int64 = -1
    private let data: Tokens

    /* Constructors */

    public init(tokens: Tokens) {
        data = tokens
    }

    /* Methods */

    /** Returns the current Option<Token>. */
    public func peek(): Option<Token> {
        if (position >= data.size || position < 0) {
            return None<Token>
        }
        return Some(data[position])
    }

    /** Returns true if current token is the kind, otherwise false. */
    public func seeing(kind: TokenKind): Bool {
        return match (this.peek()) {
            case Some(obj) => obj.kind == kind
            case None => false
        }
    }

    /** Returns next Option<Token>. */
    public func next(): Option<Token> {
        // This is not quite right. Why do we need to add a Token(END) in the end?
        if (position < data.size) {
            position = position + 1
        }
        return peek()
    }
}

// This function is used in macro desugar and we don't want this function be public,
// the implicit used functions must be toplevel so we encapsulate refreshPosition().
func refreshTokensPosition(tks: Tokens) {
    return tks.refreshPosition()
}

// used for quote desugar
func transformTokens<T>(input: T, tks: Tokens): Tokens {
    match (input) {
        case _: Tokens => Tokens().concat(tks)
        case _ => tks
    }
}

public open class Tokens <: ToString & Iterable<Token> & ToBytes {
    /* Invariant: `tokens` is immutable; after construction, we never alter its elements */

    /* Fields */
    protected var tokens: ArrayList<Token> = ArrayList<Token>(0)

    func refreshPosition() {
        var newTokens: ArrayList<Token> = ArrayList<Token>(0)
        for (tk in tokens) {
            newTokens.add(refreshPos(tk, tk.pos.fileID, tk.pos.line, tk.pos.column))
        }
        tokens = newTokens
        return this
    }

    /* Constructors */

    /** Create an empty tokens. */
    public init() {
    }

    /** Create a new tokens from tokens' bytes. */
    init(buf: Array<UInt8>) {
        if (buf.size == 0) {
            return
        }
        var off: Int64 = 0
        let sizeOfTokens: UInt32 = getUInt32(buf[off..off + 4])
        off = off + 4
        for (_ in 0..Int64(sizeOfTokens)) {
            let tokenId: UInt16 = getUInt16(buf[off..off + 2])
            off = off + 2
            let strLen = getUInt32(buf[off..off + 4])
            off = off + 4
            let value: String = getString(buf, off, strLen)
            off = off + value.size
            let fileID: UInt32 = getUInt32(buf[off..off + 4])
            off = off + 4
            let line: Int32 = getInt32(buf[off..off + 4])
            off = off + 4
            let column: Int32 = getInt32(buf[off..off + 4])
            off = off + 4
            var tokenKind = getTokenKind(tokenId)
            let isSingle: UInt16 = getUInt16(buf[off..off + 2])
            if (tokenKind == STRING_LITERAL && isSingle == 1) {
                tokenKind = SINGLE_QUOTED_STRING_LITERAL
            }
            off = off + 2
            var token: Token = Token(tokenKind, value, fileID, line, column)
            if (isSingle == 1) {
                token.isSingleQuote = true
            }
            if (tokenKind == TokenKind.MULTILINE_RAW_STRING) {
                token.delimiterNum = getUInt16(buf[off..off + 2])
                off = off + 2
            }
            tokens.add(token)
        }
    }

    /** Create a new tokens from token array. */
    public init(tokArray: Array<Token>) {
        tokens.add(all: tokArray)
    }

    /** Create a new tokens from token array. */
    public init(tokArrayList: ArrayList<Token>) {
        // We need to copy the elements over in order to guarantee the invariant
        // that `tokens` is immutable.
        tokens.add(all: tokArrayList)
    }

    /* Methods */

    /** Returns the size of tokens. */
    public open prop size: Int64 {
        get() {
            return tokens.size
        }
    }

    /** Returns the token from the index, if the index is out of bound or its element is null then throws IndexOutOfBoundsException. */
    public open func get(index: Int64): Token {
        if (index < 0 || index >= tokens.size) {
            throw IndexOutOfBoundsException("index is ${index} but the size of tokens is ${tokens.size}")
        }
        return tokens[index]
    }

    /** Returns an iterator over the tokens. */
    public func iterator(): TokensIterator {
        return TokensIterator(this)
    }

    /** Returns the concatenation of this Tokens and the argument. */
    public func concat(tokens: Tokens): Tokens {
        return ConcatTokens(this, tokens)
    }

    /* Operator functions */

    /** Returns the token at the specified index. */
    public operator func [](index: Int64): Token {
        return this.get(index)
    }

    public open operator func [](range: Range<Int64>): Tokens {
        return Tokens(tokens[range])
    }

    /** Returns a tokens which is the result of concatenating `ts1` and `ts2`. */
    public operator func +(r: Tokens): Tokens {
        return this.concat(r)
    }

    /** Returns a tokens which is the result of concatenating `ts1` and `t2`. */
    public operator func +(r: Token): Tokens {
        var rts: Tokens = Tokens(Array<Token>(1, repeat: r))
        return this.concat(rts)
    }

    /** Returns current tokens which is the result of concatenating this and `tks`. */
    public open func append(tokens: Tokens): Tokens {
        if (!(tokens is ConcatTokens)) {
            this.tokens.add(all: tokens.tokens)
            return this
        }
        for (tk in tokens) {
            this.tokens.add(tk)
        }
        return this
    }

    /** Returns current tokens which is the result of concatenating this and `tk`. */
    public open func append(token: Token): Tokens {
        tokens.add(token)
        return this
    }

    public func append(node: Node): Tokens {
        this.append(node.toTokens())
        return this
    }

    public func remove(index: Int64): Tokens {
        /** The field `tokens` of ConcatTokens is always empty. */
        if (this is ConcatTokens) {
            return this[..index] + this[(index + 1)..]
        }
        tokens.remove(at: index)
        return this
    }

    /** Print the information of this tokens. */
    public func dump(): Unit {
        for (tk in this) {
            tk.dump()
        }
    }

    /** Convert Tokens to String. */
    public func toString(): String {
        // This API is just for pretty printing. Do not use this for production code.
        // For example, prviously if we have a MULTILINE_RAW_STRING Token:  #"hello\123"# 
        // When we toSting that Token, we get: "hello\123", which is wrong, and will cause
        // a internal compiler error when lexing. I have fixed that in tokenToString function,
        // but still, be careful when you use this API.
        if (size == 0) {
            return ""
        } else if (size == 1) {
            return tokenToString(this[0])
        }
        var tokBytes: CPointer<UInt8> = unsafePointerCastFromUint8Array((this).toBytes())
        var spaceFlag: CPointer<Bool> = unsafe { LibC.malloc<Bool>(count: size) }
        if (spaceFlag.isNull()) {
            unsafe { LibC.free(tokBytes) }
            throw IllegalMemoryException("malloc failed!")
        }
        unsafe { CJ_CheckAddSpace(tokBytes, spaceFlag) }
        var toStr: String = ""
        var indentStr = "    "
        try {
            var indent = 0;
            for (loop in 0..this.size) {
                if (this[loop].kind == TokenKind.RCURL && indent > 0) {
                    indent = indent - 1
                }
                // add indentation string
                if (loop != 0 && this[loop - 1].kind == TokenKind.NL) {
                    toStr = toStr + indentStr * indent
                }
                if (this[loop].kind == TokenKind.LCURL) {
                    indent = indent + 1
                }
                toStr = toStr + tokenToString(this[loop])
                // add space
                if (unsafe { spaceFlag.read(loop) }) {
                    toStr = toStr + " "
                }
            }
        } finally {
            unsafe { LibC.free(tokBytes) }
            unsafe { LibC.free(spaceFlag) }
        }
        return toStr
    }

    public func toBytes(): Array<UInt8> {
        let numberOfTokens: Int64 = this.size
        if (numberOfTokens == 0) {
            return Array<UInt8>(0, {_ => 0})
        }

        let size = getSize()
        let arr = Array<UInt8>(size, repeat: 0)
        var start = 0

        start = castUInt32ToBytes(arr, start, UInt32(numberOfTokens))

        for (i in 0..numberOfTokens) {
            start = this[i].toBytes(arr, start)
        }

        return arr
    }

    func toBytes(startFrom: Int64): Array<UInt8> {
        let numberOfTokens: Int64 = this.size - startFrom
        if (numberOfTokens <= 0) {
            return Array<UInt8>(0, {_ => 0})
        }

        let size = getSize(startFrom: startFrom)
        let arr = Array<UInt8>(size, repeat: 0)
        var start = 0

        start = castUInt32ToBytes(arr, start, UInt32(numberOfTokens))

        for (i in startFrom..this.size) {
            start = this[i].toBytes(arr, start)
        }

        return arr
    }

    func getSize(startFrom!: Int64 = 0) {
        let numberOfTokens: Int64 = this.size
        var size = 0;
        for (i in startFrom..numberOfTokens) {
            size += this[i].getSize()
        }
        let size_numberOfTokens = 4
        return size + size_numberOfTokens
    }
}

func tokenToString(tk: Token): String {
    var ret: String = " "
    let quoteMark: String = if (tk.isSingleQuote) {
        "\'"
    } else {
        "\""
    }
    if (tk.kind == TokenKind.MULTILINE_RAW_STRING) {
        var delimiter: String = ""
        var cnt = tk.delimiterNum
        while (cnt > 0) {
            delimiter += "#"
            cnt--
        }
        if (tk.value.endsWith("\\")) {
            ret = delimiter + "\\" + quoteMark + tk.value + "\\" + quoteMark + delimiter
        } else {
            ret = delimiter + quoteMark + tk.value + quoteMark + delimiter
        }
    } else if (tk.kind == TokenKind.MULTILINE_STRING) {
        ret = quoteMark + quoteMark + quoteMark + "\n" + tk.value + quoteMark + quoteMark + quoteMark
    } else if (tk.kind == TokenKind.STRING_LITERAL || tk.kind == TokenKind.SINGLE_QUOTED_STRING_LITERAL) {
        ret = quoteMark + tk.value + quoteMark
    } else if (tk.kind == TokenKind.JSTRING_LITERAL) {
        if (tk.value.endsWith("\\")) {
            ret = "J\\" + quoteMark + tk.value + "\\" + quoteMark
        } else {
            ret = "J" + quoteMark + tk.value + quoteMark
        }
    } else if (tk.kind == TokenKind.RUNE_LITERAL) {
        ret = "r" + quoteMark + tk.value + quoteMark
    } else {
        ret = tk.value
    }
    return ret
}

class ConcatTokens <: Tokens {
    /* Invariant: by construction, the `tokens` field inherited from Tokens is always empty. */
    /* Invariant: `cachedSize` is the `leftChild.size + rightChild.size` */

    /* Fields */
    private let leftChild: Tokens
    private var rightChild: Tokens
    private var cachedSize: Int64

    /* Constructors */

    /** Create a concatenation between a left Tokens subtree and a right Tokens subtree. */
    init(lc: Tokens, rc: Tokens) {
        // In the long run, we can do more here to balance the resulting tree. See AVL trees
        // and red-black trees. This makes a difference, because successive concatenation
        // tends to create left-leaning trees: (((ts1 + ts2) + ts3) + ts4) + ...

        leftChild = lc
        rightChild = rc
        cachedSize = leftChild.size + rightChild.size
    }

    /* Methods */

    /** Returns the size of tokens. */
    public override prop size: Int64 {
        get() {
            return cachedSize
        }
    }

    /** Returns the token from the index, if the index is out of bound or its element is null then throws IndexOutOfBoundsException. */
    public override func get(index: Int64): Token {
        if (index < 0 || index >= size) {
            throw IndexOutOfBoundsException("index is ${index} but the size of tokens is ${size}")
        }
        let leftChildSize = leftChild.size
        if (index < leftChildSize) {
            return leftChild[index]
        } else {
            return rightChild[index - leftChildSize]
        }
    }

    /**
     * @throws IllegalArgumentException if the `step` of `range` is not equal to 1.
     * @throws IndexOutOfBoundsException if the `range` is invalid.
     */
    public operator func [](range: Range<Int64>): Tokens {
        if (range.step != 1) {
            throw IllegalArgumentException("Illegal step ${range.step}, step should be 1")
        }

        let start = if (range.hasStart) {
            range.start
        } else {
            0
        }

        let end = if (range.hasEnd) {
            if (range.isClosed) {
                range.end + 1
            } else {
                range.end
            }
        } else {
            size
        }

        if (start < 0 || end < start || end > size) {
            throw IndexOutOfBoundsException(
                "The length of the array is ${size}, but the actual range is [${start}, ${end}).")
        }

        let leftChildSize = leftChild.size

        if (end <= leftChildSize) {
            return leftChild[start..end]
        } else if (start >= leftChildSize) {
            return rightChild[(start - leftChildSize)..(end - leftChildSize)]
        } else {
            return leftChild[start..leftChildSize] + rightChild[0..(end - leftChildSize)]
        }
    }

    /** Returns current ConcatTokens which is the result of concatenating leftChild + rightChild + tks. */
    public override func append(tks: Tokens): Tokens {
        rightChild = rightChild + tks
        cachedSize += tks.size
        return this
    }

    /** Returns current ConcatTokens which is the result of concatenating leftChild + rightChild + tk. */
    public override func append(tk: Token): Tokens {
        rightChild = rightChild + tk
        cachedSize += 1
        return this
    }
}