DdengxuyueMisc bugfixes
3d79c591创建于 2021年3月6日历史提交
/* -------------------------------------------------------------------------
 *
 * encode.c
 *	  Various data encoding/decoding things.
 *
 * Copyright (c) 2001-2012, PostgreSQL Global Development Group
 *
 *
 * IDENTIFICATION
 *	  src/backend/utils/adt/encode.c
 *
 * -------------------------------------------------------------------------
 */
#include "postgres.h"
#include "knl/knl_variable.h"

#include <ctype.h>

#include "utils/builtins.h"

struct pg_encoding {
    unsigned (*encode_len)(const char* data, unsigned dlen);
    unsigned (*decode_len)(const char* data, unsigned dlen);
    unsigned (*encode)(const char* data, unsigned dlen, char* res);
    unsigned (*decode)(const char* data, unsigned dlen, char* res);
};

static const struct pg_encoding* pg_find_encoding(const char* name);

/*
 * SQL functions.
 */

Datum binary_encode(PG_FUNCTION_ARGS)
{
    bytea* data = PG_GETARG_BYTEA_P(0);
    Datum name = PG_GETARG_DATUM(1);
    text* result = NULL;
    char* namebuf = NULL;
    int datalen, resultlen, res;
    const struct pg_encoding* enc;

    datalen = VARSIZE(data) - VARHDRSZ;

    namebuf = TextDatumGetCString(name);

    enc = pg_find_encoding(namebuf);
    if (enc == NULL)
        ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unrecognized encoding: \"%s\"", namebuf)));

    resultlen = enc->encode_len(VARDATA(data), datalen);
    result = (text*)palloc(VARHDRSZ + resultlen);

    res = enc->encode(VARDATA(data), datalen, VARDATA(result));

    /* Make this FATAL 'cause we've trodden on memory ... */
    if (res > resultlen)
        ereport(FATAL, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("overflow - encode estimate too small")));

    SET_VARSIZE(result, VARHDRSZ + res);

    PG_RETURN_TEXT_P(result);
}

Datum binary_decode(PG_FUNCTION_ARGS)
{
    text* data = PG_GETARG_TEXT_P(0);
    Datum name = PG_GETARG_DATUM(1);
    bytea* result = NULL;
    char* namebuf = NULL;
    int datalen, resultlen, res;
    const struct pg_encoding* enc;

    datalen = VARSIZE(data) - VARHDRSZ;

    namebuf = TextDatumGetCString(name);

    enc = pg_find_encoding(namebuf);
    if (enc == NULL)
        ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unrecognized encoding: \"%s\"", namebuf)));

    resultlen = enc->decode_len(VARDATA(data), datalen);
    result = (bytea*)palloc(VARHDRSZ + resultlen);

    res = enc->decode(VARDATA(data), datalen, VARDATA(result));

    /* Make this FATAL 'cause we've trodden on memory ... */
    if (res > resultlen)
        ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("overflow - decode estimate too small")));

    SET_VARSIZE(result, VARHDRSZ + res);

    PG_RETURN_BYTEA_P(result);
}

/*
 * HEX
 */

static const char hextbl[] = "0123456789abcdef";

static const int8 hexlookup[128] = {
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    0,
    1,
    2,
    3,
    4,
    5,
    6,
    7,
    8,
    9,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    10,
    11,
    12,
    13,
    14,
    15,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    10,
    11,
    12,
    13,
    14,
    15,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
};

unsigned hex_encode(const char* src, unsigned len, char* dst)
{
    const char* end = src + len;

    while (src < end) {
        *dst++ = hextbl[(*src >> 4) & 0xF];
        *dst++ = hextbl[*src & 0xF];
        src++;
    }
    return len * 2;
}

static inline char get_hex(char c)
{
    int res = -1;

    if (c > 0 && c < 127)
        res = hexlookup[(unsigned char)c];

    if (res < 0)
        ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid hexadecimal digit: \"%c\"", c)));

    return (char)res;
}

unsigned hex_decode(const char* src, unsigned len, char* dst)
{
    const char *s = NULL, *srcend;
    char v1, v2, *p;

    srcend = src + len;
    s = src;
    p = dst;
    while (s < srcend) {
        if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r') {
            s++;
            continue;
        }
        v1 = get_hex(*s++) << 4;
        if (s >= srcend)
            ereport(ERROR,
                (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid hexadecimal data: odd number of digits")));

        v2 = get_hex(*s++);
        *p++ = v1 | v2;
    }

    return p - dst;
}

static unsigned hex_enc_len(const char* src, unsigned srclen)
{
    return srclen << 1;
}

static unsigned hex_dec_len(const char* src, unsigned srclen)
{
    return srclen >> 1;
}

/*
 * BASE64
 */

static const char _base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

static const int8 b64lookup[128] = {
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    62,
    -1,
    -1,
    -1,
    63,
    52,
    53,
    54,
    55,
    56,
    57,
    58,
    59,
    60,
    61,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    0,
    1,
    2,
    3,
    4,
    5,
    6,
    7,
    8,
    9,
    10,
    11,
    12,
    13,
    14,
    15,
    16,
    17,
    18,
    19,
    20,
    21,
    22,
    23,
    24,
    25,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    26,
    27,
    28,
    29,
    30,
    31,
    32,
    33,
    34,
    35,
    36,
    37,
    38,
    39,
    40,
    41,
    42,
    43,
    44,
    45,
    46,
    47,
    48,
    49,
    50,
    51,
    -1,
    -1,
    -1,
    -1,
    -1,
};

static unsigned b64_encode(const char* src, unsigned len, char* dst)
{
    char *p = NULL, *lend = dst + 76;
    const char *s = NULL, *end = src + len;
    int pos = 2;
    uint32 buf = 0;

    s = src;
    p = dst;

    while (s < end) {
        buf |= (unsigned char)*s << (pos << 3);
        pos--;
        s++;

        /* write it out */
        if (pos < 0) {
            *p++ = _base64[(buf >> 18) & 0x3f];
            *p++ = _base64[(buf >> 12) & 0x3f];
            *p++ = _base64[(buf >> 6) & 0x3f];
            *p++ = _base64[buf & 0x3f];

            pos = 2;
            buf = 0;
        }
        if (p >= lend) {
            *p++ = '\n';
            lend = p + 76;
        }
    }
    if (pos != 2) {
        *p++ = _base64[(buf >> 18) & 0x3f];
        *p++ = _base64[(buf >> 12) & 0x3f];
        *p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
        *p++ = '=';
    }

    return p - dst;
}

static unsigned b64_decode(const char* src, unsigned len, char* dst)
{
    const char *srcend = src + len, *s = src;
    char* p = dst;
    char c;
    int b = 0;
    uint32 buf = 0;
    int pos = 0, end = 0;

    while (s < srcend) {
        c = *s++;

        if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
            continue;

        if (c == '=') {
            /* end sequence */
            if (!end) {
                if (pos == 2)
                    end = 1;
                else if (pos == 3)
                    end = 2;
                else
                    ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unexpected \"=\"")));
            }
            b = 0;
        } else {
            b = -1;
            if (c > 0 && c < 127)
                b = b64lookup[(unsigned char)c];
            if (b < 0)
                ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid symbol")));
        }
        /* add it to buffer */
        buf = (buf << 6) + b;
        pos++;
        if (pos == 4) {
            *p++ = (buf >> 16) & 255;
            if (end == 0 || end > 1)
                *p++ = (buf >> 8) & 255;
            if (end == 0 || end > 2)
                *p++ = buf & 255;
            buf = 0;
            pos = 0;
        }
    }

    if (pos != 0)
        ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid end sequence")));

    return p - dst;
}

static unsigned b64_enc_len(const char* src, unsigned srclen)
{
    /* 3 bytes will be converted to 4, linefeed after 76 chars */
    return (srclen + 2) * 4 / 3 + srclen / (76 * 3 / 4);
}

static unsigned b64_dec_len(const char* src, unsigned srclen)
{
    return (srclen * 3) >> 2;
}

/*
 * Escape
 * Minimally escape bytea to text.
 * De-escape text to bytea.
 *
 * We must escape zero bytes and high-bit-set bytes to avoid generating
 * text that might be invalid in the current encoding, or that might
 * change to something else if passed through an encoding conversion
 * (leading to failing to de-escape to the original bytea value).
 * Also of course backslash itself has to be escaped.
 *
 * De-escaping processes \\ and any \### octal
 */

#define VAL(CH) ((CH) - '0')
#define DIG(VAL) ((VAL) + '0')

static unsigned esc_encode(const char* src, unsigned srclen, char* dst)
{
    const char* end = src + srclen;
    char* rp = dst;
    int len = 0;

    while (src < end) {
        unsigned char c = (unsigned char)*src;

        if (c == '\0' || IS_HIGHBIT_SET(c)) {
            rp[0] = '\\';
            rp[1] = DIG(c >> 6);
            rp[2] = DIG((c >> 3) & 7);
            rp[3] = DIG(c & 7);
            rp += 4;
            len += 4;
        } else if (c == '\\') {
            rp[0] = '\\';
            rp[1] = '\\';
            rp += 2;
            len += 2;
        } else {
            *rp++ = c;
            len++;
        }

        src++;
    }

    return len;
}

static unsigned esc_decode(const char* src, unsigned srclen, char* dst)
{
    const char* end = src + srclen;
    char* rp = dst;
    int len = 0;

    while (src < end) {
        if (src[0] != '\\')
            *rp++ = *src++;
        else if (src + 3 < end && (src[1] >= '0' && src[1] <= '3') && (src[2] >= '0' && src[2] <= '7') &&
                 (src[3] >= '0' && src[3] <= '7')) {
            unsigned int val;

            val = VAL(src[1]);
            val <<= 3;
            val += VAL(src[2]);
            val <<= 3;
            *rp++ = val + VAL(src[3]);
            src += 4;
        } else if (src + 1 < end && (src[1] == '\\')) {
            *rp++ = '\\';
            src += 2;
        } else {
            /*
             * One backslash, not followed by ### valid octal. Should never
             * get here, since esc_dec_len does same check.
             */
            ereport(
                ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type bytea")));
        }

        len++;
    }

    return len;
}

static unsigned esc_enc_len(const char* src, unsigned srclen)
{
    const char* end = src + srclen;
    int len = 0;

    while (src < end) {
        if (*src == '\0' || IS_HIGHBIT_SET(*src))
            len += 4;
        else if (*src == '\\')
            len += 2;
        else
            len++;

        src++;
    }

    return len;
}

static unsigned esc_dec_len(const char* src, unsigned srclen)
{
    const char* end = src + srclen;
    int len = 0;

    while (src < end) {
        if (src[0] != '\\')
            src++;
        else if (src + 3 < end && (src[1] >= '0' && src[1] <= '3') && (src[2] >= '0' && src[2] <= '7') &&
                 (src[3] >= '0' && src[3] <= '7')) {
            /*
             * backslash + valid octal
             */
            src += 4;
        } else if (src + 1 < end && (src[1] == '\\')) {
            /*
             * two backslashes = backslash
             */
            src += 2;
        } else {
            /*
             * one backslash, not followed by ### valid octal
             */
            ereport(
                ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type bytea")));
        }

        len++;
    }
    return len;
}

/*
 * Common
 */

static const struct {
    const char* name;
    struct pg_encoding enc;
} enclist[] =

    {{"hex", {hex_enc_len, hex_dec_len, hex_encode, hex_decode}},
        {"base64", {b64_enc_len, b64_dec_len, b64_encode, b64_decode}},
        {"escape", {esc_enc_len, esc_dec_len, esc_encode, esc_decode}},
        {NULL, {NULL, NULL, NULL, NULL}}};

static const struct pg_encoding* pg_find_encoding(const char* name)
{
    int i;

    for (i = 0; enclist[i].name; i++)
        if (pg_strcasecmp(enclist[i].name, name) == 0)
            return &enclist[i].enc;

    return NULL;
}