/*
 * Copyright (c) Cangjie Library Team 2022-2022. All rights reserved.
 */

/**
 * @file
 * Read gbk encoded file stream
 */
package csv4cj

import std.fs.*

/**
 * The file is parsed using the gbk encoding format.
 * The caller needs to ensure the correct encoding format of the file,
 * otherwise the expected parsing result may not be obtained
 * @author LilFlameZ
 * @since 1.0.4
 */
public class GBKReaderStream <: CharReader {
    let fileStream: BufferedReader

    /**
     * The Function is init constructor
     * The caller needs to ensure the correct encoding format of the file,
     * otherwise the expected parsing result may not be obtained
     * @param s of FileStream
     * @since 1.0.4
     */
    public init(s: File) {
        fileStream = BufferedReader(s)
    }

    //"look" Private buffer
    private let readAHeadBuf: Array<UInt8> = [0]

    // Read a dedicated buffer of one byte
    private let readBuf: Array<UInt8> = [0]

    // Read a character from the stream
    /**
     * The Function is read
     *
     * @return Type of Option<Rune>
     * @since 1.0.4
     */
    public func read(): Option<Rune> {
        var result = Option<Rune>.None
        if (let Some(readByte) <- fileStream.read()) {
            // Check if it's a double-byte character
            if (isDoubleByteChar(readByte)) {
                if (let Some(readByte1) <- fileStream.read()) {
                    // Combine two bytes to form a Unicode code point
                    let codePoint = convertGBKToUnicode(readByte, readByte1)
                    result = Rune(codePoint)
                }
            } else {
                // Single byte character
                result = Rune(readByte)
            }
        }

        return result
    }

    // Check if the byte indicates a double-byte character
    private func isDoubleByteChar(byte: UInt8): Bool {
        // In GBK, characters with the first byte >= 0x81 and <= 0xFE are double-byte
        // ASCII characters (0x00-0x7F) are single-byte
        return byte >= 0x81 && byte <= 0xFE
    }

    // Convert GBK double-byte to Unicode code point
    private func convertGBKToUnicode(byte1: UInt8, byte2: UInt8): UInt32 {
        // This is a simplified conversion
        // In a real implementation, you would need a mapping table or library
        // to convert GBK to Unicode properly
        // For now, we'll just combine the bytes as a placeholder
        let codePoint = (UInt32(byte1) << 8) + UInt32(byte2)
        try {
            return gbk2unicode[codePoint]
        } catch (e: NoneValueException) {
            throw Exception("GBK character not found in mapping table")
        }
    }

    /**
     * Look at a few characters
     * Read the size characters of buf from the current position of the stream, and then restore the position of the stream to the position before reading,
     * It is equivalent to not moving the current position of the stream, but just glancing at the contents of the next few characters
     *
     * @param buf of Array<Rune>
     *
     * @return Type of Int64
     * @since 1.0.4
     */
    public func lookAhead(buf: Array<Rune>): Int64 {
        if (buf.size == 0) {
            return 0
        }

        fileStream.mark(buf.size * 2) // GBK characters can be up to 2 bytes

        // Read quantity
        var currentChrCount = 0

        do {
            if (let Some(chr) <- lookAhead()) {
                buf[currentChrCount] = chr
                currentChrCount++
                // Consume the character from the stream
                _ = read()
            } else {
                break
            }
        } while (currentChrCount < buf.size)

        fileStream.reset()
        return currentChrCount
    }

    /**
     * Gets the next character without changing the current position of the stream
     *
     * @return Type of Option<Rune>
     * @since 1.0.4
     */
    public func lookAhead(): Option<Rune> {
        fileStream.mark(2) // GBK characters can be up to 2 bytes
        let result = read()
        fileStream.reset()
        return result
    }
}