* Encoding names and routines for work with it. All
* in this file is shared bedween FE and BE.
*
* src/backend/utils/mb/encnames.c
*/
#ifdef FRONTEND
#include "postgres_fe.h"
#ifndef Assert
#define Assert(condition)
#endif
#else
#include "postgres.h"
#include "knl/knl_variable.h"
#include "utils/builtins.h"
#endif
#include <ctype.h>
#include <unistd.h>
#include "mb/pg_wchar.h"
* All encoding names, sorted: *** A L P H A B E T I C ***
*
* All names must be without irrelevant chars, search routines use
* isalnum() chars only. It means ISO-8859-1, iso_8859-1 and Iso8859_1
* are always converted to 'iso88591'. All must be lower case.
*
* The table doesn't contain 'cs' aliases (like csISOLatin1). It's needed?
*
* Karel Zak, Aug 2001
* ----------
*/
pg_encname pg_encname_tbl[] = {
{"abc", PG_WIN1258},
{"alt", PG_WIN866},
{"big5", PG_BIG5},
{"binary", PG_SQL_ASCII},
{"euccn", PG_EUC_CN},
* Width for Japanese, standard JIS X 0213 */
{"eucjis2004", PG_EUC_JIS_2004},
{"eucjp", PG_EUC_JP},
{"euckr", PG_EUC_KR},
{"euctw", PG_EUC_TW},
{"gb18030", PG_GB18030},
{"gb180302022", PG_GB18030_2022},
{"gbk", PG_GBK},
{"iso88591", PG_LATIN1},
{"iso885910", PG_LATIN6},
{"iso885913", PG_LATIN7},
{"iso885914", PG_LATIN8},
{"iso885915", PG_LATIN9},
{"iso885916", PG_LATIN10},
{"iso88592", PG_LATIN2},
{"iso88593", PG_LATIN3},
{"iso88594", PG_LATIN4},
{"iso88595", PG_ISO_8859_5},
{"iso88596", PG_ISO_8859_6},
{"iso88597", PG_ISO_8859_7},
{"iso88598", PG_ISO_8859_8},
{"iso88599", PG_LATIN5},
{"johab", PG_JOHAB},
{"koi8", PG_KOI8R},
{"koi8r", PG_KOI8R},
{"koi8u", PG_KOI8U},
{"latin1", PG_LATIN1},
{"latin10", PG_LATIN10},
{"latin2", PG_LATIN2},
{"latin3", PG_LATIN3},
{"latin4", PG_LATIN4},
{"latin5", PG_LATIN5},
{"latin6", PG_LATIN6},
{"latin7", PG_LATIN7},
{"latin8", PG_LATIN8},
{"latin9", PG_LATIN9},
{"mskanji", PG_SJIS},
{"muleinternal", PG_MULE_INTERNAL},
{"shiftjis", PG_SJIS},
{"shiftjis2004", PG_SHIFT_JIS_2004},
{"sjis", PG_SJIS},
{"sqlascii", PG_SQL_ASCII},
{"tcvn", PG_WIN1258},
{"tcvn5712", PG_WIN1258},
{"uhc", PG_UHC},
{"unicode", PG_UTF8},
{"utf8", PG_UTF8},
{"utf8mb4", PG_UTF8},
{"vscii", PG_WIN1258},
{"win", PG_WIN1251},
{"win1250", PG_WIN1250},
{"win1251", PG_WIN1251},
{"win1252", PG_WIN1252},
{"win1253", PG_WIN1253},
{"win1254", PG_WIN1254},
{"win1255", PG_WIN1255},
{"win1256", PG_WIN1256},
{"win1257", PG_WIN1257},
{"win1258", PG_WIN1258},
{"win866", PG_WIN866},
{"win874", PG_WIN874},
{"win932", PG_SJIS},
{"win936", PG_GBK},
{"win949", PG_UHC},
{"win950", PG_BIG5},
{"windows1250", PG_WIN1250},
{"windows1251", PG_WIN1251},
{"windows1252", PG_WIN1252},
{"windows1253", PG_WIN1253},
{"windows1254", PG_WIN1254},
{"windows1255", PG_WIN1255},
{"windows1256", PG_WIN1256},
{"windows1257", PG_WIN1257},
{"windows1258", PG_WIN1258},
{"windows866", PG_WIN866},
{"windows874", PG_WIN874},
{"windows932", PG_SJIS},
{"windows936", PG_GBK},
{"windows949", PG_UHC},
{"windows950", PG_BIG5},
{NULL, (pg_enc)0}
};
unsigned int pg_encname_tbl_sz = sizeof(pg_encname_tbl) / sizeof(pg_encname_tbl[0]) - 1;
* character set and its map supported in B format. We can find charset quickly
* by collation oid use FAST_GET_CHARSET_BY_COLL.
*/
pg_enc pg_enc_coll_map_b[] = {
PG_SQL_ASCII,
PG_GBK,
PG_UTF8,
PG_GB18030
};
* These are "official" encoding names.
* XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h)
* ----------
*/
#ifndef WIN32
#define DEF_ENC2NAME(name, codepage) \
{ \
#name, PG_##name \
}
#else
#define DEF_ENC2NAME(name, codepage) \
{ \
#name, PG_##name, codepage \
}
#endif
pg_enc2name pg_enc2name_tbl[] = {DEF_ENC2NAME(SQL_ASCII, 0),
DEF_ENC2NAME(EUC_JP, 20932),
DEF_ENC2NAME(EUC_CN, 20936),
DEF_ENC2NAME(EUC_KR, 51949),
DEF_ENC2NAME(EUC_TW, 0),
DEF_ENC2NAME(EUC_JIS_2004, 20932),
DEF_ENC2NAME(GBK, 936),
DEF_ENC2NAME(UTF8, 65001),
DEF_ENC2NAME(MULE_INTERNAL, 0),
DEF_ENC2NAME(LATIN1, 28591),
DEF_ENC2NAME(LATIN2, 28592),
DEF_ENC2NAME(LATIN3, 28593),
DEF_ENC2NAME(LATIN4, 28594),
DEF_ENC2NAME(LATIN5, 28599),
DEF_ENC2NAME(LATIN6, 0),
DEF_ENC2NAME(LATIN7, 0),
DEF_ENC2NAME(LATIN8, 0),
DEF_ENC2NAME(LATIN9, 28605),
DEF_ENC2NAME(LATIN10, 0),
DEF_ENC2NAME(WIN1256, 1256),
DEF_ENC2NAME(WIN1258, 1258),
DEF_ENC2NAME(WIN866, 866),
DEF_ENC2NAME(WIN874, 874),
DEF_ENC2NAME(KOI8R, 20866),
DEF_ENC2NAME(WIN1251, 1251),
DEF_ENC2NAME(WIN1252, 1252),
DEF_ENC2NAME(ISO_8859_5, 28595),
DEF_ENC2NAME(ISO_8859_6, 28596),
DEF_ENC2NAME(ISO_8859_7, 28597),
DEF_ENC2NAME(ISO_8859_8, 28598),
DEF_ENC2NAME(WIN1250, 1250),
DEF_ENC2NAME(WIN1253, 1253),
DEF_ENC2NAME(WIN1254, 1254),
DEF_ENC2NAME(WIN1255, 1255),
DEF_ENC2NAME(WIN1257, 1257),
DEF_ENC2NAME(KOI8U, 21866),
DEF_ENC2NAME(GB18030, 54936),
DEF_ENC2NAME(GB18030_2022, 54936),
DEF_ENC2NAME(SJIS, 932),
DEF_ENC2NAME(BIG5, 950),
DEF_ENC2NAME(UHC, 0),
DEF_ENC2NAME(JOHAB, 0),
DEF_ENC2NAME(SHIFT_JIS_2004, 932)};
* These are encoding names for gettext.
* ----------
*/
pg_enc2gettext pg_enc2gettext_tbl[] = {{PG_UTF8, "UTF-8"},
{PG_LATIN1, "LATIN1"},
{PG_LATIN2, "LATIN2"},
{PG_LATIN3, "LATIN3"},
{PG_LATIN4, "LATIN4"},
{PG_ISO_8859_5, "ISO-8859-5"},
{PG_ISO_8859_6, "ISO_8859-6"},
{PG_ISO_8859_7, "ISO-8859-7"},
{PG_ISO_8859_8, "ISO-8859-8"},
{PG_LATIN5, "LATIN5"},
{PG_LATIN6, "LATIN6"},
{PG_LATIN7, "LATIN7"},
{PG_LATIN8, "LATIN8"},
{PG_LATIN9, "LATIN-9"},
{PG_LATIN10, "LATIN10"},
{PG_KOI8R, "KOI8-R"},
{PG_KOI8U, "KOI8-U"},
{PG_WIN1250, "CP1250"},
{PG_WIN1251, "CP1251"},
{PG_WIN1252, "CP1252"},
{PG_WIN1253, "CP1253"},
{PG_WIN1254, "CP1254"},
{PG_WIN1255, "CP1255"},
{PG_WIN1256, "CP1256"},
{PG_WIN1257, "CP1257"},
{PG_WIN1258, "CP1258"},
{PG_WIN866, "CP866"},
{PG_WIN874, "CP874"},
{PG_EUC_CN, "EUC-CN"},
{PG_EUC_JP, "EUC-JP"},
{PG_EUC_KR, "EUC-KR"},
{PG_EUC_TW, "EUC-TW"},
{PG_EUC_JIS_2004, "EUC-JP"},
{(pg_enc)0, NULL}};
* Encoding checks, for error returns -1 else encoding id
* ----------
*/
int pg_valid_client_encoding(const char* name)
{
int enc;
if ((enc = pg_char_to_encoding(name)) < 0) {
return -1;
}
if (!PG_VALID_FE_ENCODING(enc)) {
return -1;
}
return enc;
}
int pg_valid_server_encoding(const char* name)
{
int enc;
if ((enc = pg_char_to_encoding(name)) < 0) {
return -1;
}
if (!PG_VALID_BE_ENCODING(enc)) {
return -1;
}
return enc;
}
int pg_valid_server_encoding_id(int encoding)
{
return (int)PG_VALID_BE_ENCODING(encoding);
}
* Remove irrelevant chars from encoding name
* ----------
*/
static char* clean_encoding_name(const char* key, char* newkey)
{
const char* p = NULL;
char* np = NULL;
for (p = key, np = newkey; *p != '\0'; p++) {
if (isalnum((unsigned char)*p)) {
if (*p >= 'A' && *p <= 'Z') {
*np++ = *p + 'a' - 'A';
} else {
*np++ = *p;
}
}
}
*np = '\0';
return newkey;
}
* Search encoding by encoding name
* ----------
*/
pg_encname* pg_char_to_encname_struct(const char* name)
{
unsigned int nel = pg_encname_tbl_sz;
pg_encname *base = pg_encname_tbl;
pg_encname *last = base + nel - 1;
pg_encname *position = NULL;
int result;
char buff[NAMEDATALEN];
char *key = NULL;
if (name == NULL || *name == '\0') {
return NULL;
}
if (strlen(name) >= NAMEDATALEN) {
#ifdef FRONTEND
fprintf(stderr, "encoding name too long\n");
return NULL;
#else
#ifndef ENABLE_UT
ereport(ERROR, (errcode(ERRCODE_NAME_TOO_LONG), errmsg("encoding name too long")));
#else
return NULL;
#endif
#endif
}
key = clean_encoding_name(name, buff);
while (last >= base) {
position = base + ((last - base) >> 1);
result = key[0] - position->name[0];
if (result == 0) {
result = strcmp(key, position->name);
if (result == 0)
return position;
}
if (result < 0) {
last = position - 1;
} else {
base = position + 1;
}
}
return NULL;
}
* Returns encoding or -1 for error
*/
int pg_char_to_encoding(const char* name)
{
pg_encname* p = NULL;
if (name == NULL) {
return -1;
}
p = pg_char_to_encname_struct(name);
return p ? p->encoding : -1;
}
#ifndef FRONTEND
Datum PG_char_to_encoding(PG_FUNCTION_ARGS)
{
Name s = PG_GETARG_NAME(0);
PG_RETURN_INT32(pg_char_to_encoding(NameStr(*s)));
}
#endif
const char* pg_encoding_to_char(int encoding)
{
if (PG_VALID_ENCODING(encoding)) {
pg_enc2name* p = &pg_enc2name_tbl[encoding];
Assert(encoding == p->encoding);
return p->name;
}
return "";
}
#ifndef FRONTEND
Datum PG_encoding_to_char(PG_FUNCTION_ARGS)
{
int32 encoding = PG_GETARG_INT32(0);
const char* encoding_name = pg_encoding_to_char(encoding);
return DirectFunctionCall1(namein, CStringGetDatum(encoding_name));
}
#endif