*
* encode.c
* Various data encoding/decoding things.
*
* Copyright (c) 2001-2012, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* src/backend/utils/adt/encode.c
*
* -------------------------------------------------------------------------
*/
#include "postgres.h"
#include "knl/knl_variable.h"
#include <ctype.h>
#include "utils/builtins.h"
struct pg_encoding {
unsigned (*encode_len)(const char* data, unsigned dlen);
unsigned (*decode_len)(const char* data, unsigned dlen);
unsigned (*encode)(const char* data, unsigned dlen, char* res);
unsigned (*decode)(const char* data, unsigned dlen, char* res);
};
static const struct pg_encoding* pg_find_encoding(const char* name);
* SQL functions.
*/
Datum binary_encode(PG_FUNCTION_ARGS)
{
bytea* data = PG_GETARG_BYTEA_P(0);
Datum name = PG_GETARG_DATUM(1);
text* result = NULL;
char* namebuf = NULL;
int datalen, resultlen, res;
const struct pg_encoding* enc;
datalen = VARSIZE(data) - VARHDRSZ;
namebuf = TextDatumGetCString(name);
enc = pg_find_encoding(namebuf);
if (enc == NULL)
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unrecognized encoding: \"%s\"", namebuf)));
resultlen = enc->encode_len(VARDATA(data), datalen);
result = (text*)palloc(VARHDRSZ + resultlen);
res = enc->encode(VARDATA(data), datalen, VARDATA(result));
if (res > resultlen)
ereport(FATAL, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("overflow - encode estimate too small")));
SET_VARSIZE(result, VARHDRSZ + res);
PG_RETURN_TEXT_P(result);
}
Datum binary_decode(PG_FUNCTION_ARGS)
{
text* data = PG_GETARG_TEXT_P(0);
Datum name = PG_GETARG_DATUM(1);
bytea* result = NULL;
char* namebuf = NULL;
int datalen, resultlen, res;
const struct pg_encoding* enc;
datalen = VARSIZE(data) - VARHDRSZ;
namebuf = TextDatumGetCString(name);
enc = pg_find_encoding(namebuf);
if (enc == NULL)
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unrecognized encoding: \"%s\"", namebuf)));
resultlen = enc->decode_len(VARDATA(data), datalen);
result = (bytea*)palloc(VARHDRSZ + resultlen);
res = enc->decode(VARDATA(data), datalen, VARDATA(result));
if (res > resultlen)
ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("overflow - decode estimate too small")));
SET_VARSIZE(result, VARHDRSZ + res);
PG_RETURN_BYTEA_P(result);
}
* HEX
*/
static const char hextbl[] = "0123456789abcdef";
static const int8 hexlookup[128] = {
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
10,
11,
12,
13,
14,
15,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
10,
11,
12,
13,
14,
15,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
};
unsigned hex_encode(const char* src, unsigned len, char* dst)
{
const char* end = src + len;
while (src < end) {
*dst++ = hextbl[(*src >> 4) & 0xF];
*dst++ = hextbl[*src & 0xF];
src++;
}
return len * 2;
}
static inline char get_hex(char c)
{
int res = -1;
if (c > 0 && c < 127)
res = hexlookup[(unsigned char)c];
if (res < 0)
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid hexadecimal digit: \"%c\"", c)));
return (char)res;
}
unsigned hex_decode(const char* src, unsigned len, char* dst)
{
const char *s = NULL, *srcend;
char v1, v2, *p;
srcend = src + len;
s = src;
p = dst;
while (s < srcend) {
if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r') {
s++;
continue;
}
v1 = get_hex(*s++) << 4;
if (s >= srcend)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid hexadecimal data: odd number of digits")));
v2 = get_hex(*s++);
*p++ = v1 | v2;
}
return p - dst;
}
static unsigned hex_enc_len(const char* src, unsigned srclen)
{
return srclen << 1;
}
static unsigned hex_dec_len(const char* src, unsigned srclen)
{
return srclen >> 1;
}
* BASE64
*/
static const char _base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static const int8 b64lookup[128] = {
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
62,
-1,
-1,
-1,
63,
52,
53,
54,
55,
56,
57,
58,
59,
60,
61,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
-1,
-1,
-1,
-1,
-1,
-1,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
-1,
-1,
-1,
-1,
-1,
};
static unsigned b64_encode(const char* src, unsigned len, char* dst)
{
char *p = NULL, *lend = dst + 76;
const char *s = NULL, *end = src + len;
int pos = 2;
uint32 buf = 0;
s = src;
p = dst;
while (s < end) {
buf |= (unsigned char)*s << (pos << 3);
pos--;
s++;
if (pos < 0) {
*p++ = _base64[(buf >> 18) & 0x3f];
*p++ = _base64[(buf >> 12) & 0x3f];
*p++ = _base64[(buf >> 6) & 0x3f];
*p++ = _base64[buf & 0x3f];
pos = 2;
buf = 0;
}
if (p >= lend) {
*p++ = '\n';
lend = p + 76;
}
}
if (pos != 2) {
*p++ = _base64[(buf >> 18) & 0x3f];
*p++ = _base64[(buf >> 12) & 0x3f];
*p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
*p++ = '=';
}
return p - dst;
}
static unsigned b64_decode(const char* src, unsigned len, char* dst)
{
const char *srcend = src + len, *s = src;
char* p = dst;
char c;
int b = 0;
uint32 buf = 0;
int pos = 0, end = 0;
while (s < srcend) {
c = *s++;
if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
continue;
if (c == '=') {
if (!end) {
if (pos == 2)
end = 1;
else if (pos == 3)
end = 2;
else
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unexpected \"=\"")));
}
b = 0;
} else {
b = -1;
if (c > 0 && c < 127)
b = b64lookup[(unsigned char)c];
if (b < 0)
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid symbol")));
}
buf = (buf << 6) + b;
pos++;
if (pos == 4) {
*p++ = (buf >> 16) & 255;
if (end == 0 || end > 1)
*p++ = (buf >> 8) & 255;
if (end == 0 || end > 2)
*p++ = buf & 255;
buf = 0;
pos = 0;
}
}
if (pos != 0)
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid end sequence")));
return p - dst;
}
static unsigned b64_enc_len(const char* src, unsigned srclen)
{
return (srclen + 2) * 4 / 3 + srclen / (76 * 3 / 4);
}
static unsigned b64_dec_len(const char* src, unsigned srclen)
{
return (srclen * 3) >> 2;
}
* Escape
* Minimally escape bytea to text.
* De-escape text to bytea.
*
* We must escape zero bytes and high-bit-set bytes to avoid generating
* text that might be invalid in the current encoding, or that might
* change to something else if passed through an encoding conversion
* (leading to failing to de-escape to the original bytea value).
* Also of course backslash itself has to be escaped.
*
* De-escaping processes \\ and any \### octal
*/
#define VAL(CH) ((CH) - '0')
#define DIG(VAL) ((VAL) + '0')
static unsigned esc_encode(const char* src, unsigned srclen, char* dst)
{
const char* end = src + srclen;
char* rp = dst;
int len = 0;
while (src < end) {
unsigned char c = (unsigned char)*src;
if (c == '\0' || IS_HIGHBIT_SET(c)) {
rp[0] = '\\';
rp[1] = DIG(c >> 6);
rp[2] = DIG((c >> 3) & 7);
rp[3] = DIG(c & 7);
rp += 4;
len += 4;
} else if (c == '\\') {
rp[0] = '\\';
rp[1] = '\\';
rp += 2;
len += 2;
} else {
*rp++ = c;
len++;
}
src++;
}
return len;
}
static unsigned esc_decode(const char* src, unsigned srclen, char* dst)
{
const char* end = src + srclen;
char* rp = dst;
int len = 0;
while (src < end) {
if (src[0] != '\\')
*rp++ = *src++;
else if (src + 3 < end && (src[1] >= '0' && src[1] <= '3') && (src[2] >= '0' && src[2] <= '7') &&
(src[3] >= '0' && src[3] <= '7')) {
unsigned int val;
val = VAL(src[1]);
val <<= 3;
val += VAL(src[2]);
val <<= 3;
*rp++ = val + VAL(src[3]);
src += 4;
} else if (src + 1 < end && (src[1] == '\\')) {
*rp++ = '\\';
src += 2;
} else {
* One backslash, not followed by ### valid octal. Should never
* get here, since esc_dec_len does same check.
*/
ereport(
ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type bytea")));
}
len++;
}
return len;
}
static unsigned esc_enc_len(const char* src, unsigned srclen)
{
const char* end = src + srclen;
int len = 0;
while (src < end) {
if (*src == '\0' || IS_HIGHBIT_SET(*src))
len += 4;
else if (*src == '\\')
len += 2;
else
len++;
src++;
}
return len;
}
static unsigned esc_dec_len(const char* src, unsigned srclen)
{
const char* end = src + srclen;
int len = 0;
while (src < end) {
if (src[0] != '\\')
src++;
else if (src + 3 < end && (src[1] >= '0' && src[1] <= '3') && (src[2] >= '0' && src[2] <= '7') &&
(src[3] >= '0' && src[3] <= '7')) {
* backslash + valid octal
*/
src += 4;
} else if (src + 1 < end && (src[1] == '\\')) {
* two backslashes = backslash
*/
src += 2;
} else {
* one backslash, not followed by ### valid octal
*/
ereport(
ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type bytea")));
}
len++;
}
return len;
}
* Common
*/
static const struct {
const char* name;
struct pg_encoding enc;
} enclist[] =
{{"hex", {hex_enc_len, hex_dec_len, hex_encode, hex_decode}},
{"base64", {b64_enc_len, b64_dec_len, b64_encode, b64_decode}},
{"escape", {esc_enc_len, esc_dec_len, esc_encode, esc_decode}},
{NULL, {NULL, NULL, NULL, NULL}}};
static const struct pg_encoding* pg_find_encoding(const char* name)
{
int i;
for (i = 0; enclist[i].name; i++)
if (pg_strcasecmp(enclist[i].name, name) == 0)
return &enclist[i].enc;
return NULL;
}