* Copyright (C) 2005-2006 Evgeniy <dushistov@mail.ru>
* Copyright 2011 kubtek <kubtek@mail.com>
*
* This file is part of StarDict.
*
* StarDict is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* StarDict is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with StarDict. If not, see <http://www.gnu.org/licenses/>.
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <glib.h>
#include <glib/gi18n.h>
#include <glib/gstdio.h>
#include <gtk/gtk.h>
#include <cstring>
#include <cstdlib>
#include <fcntl.h>
#include <cerrno>
#ifdef CONFIG_GNOME
# include <libgnome/libgnome.h>
#elif defined(_WIN32)
# include <gdk/gdkwin32.h>
# include <Shlwapi.h>
# include <io.h>
# include <ERRNO.H>
#endif
#ifndef O_BINARY
#define O_BINARY 0
#endif
#include <list>
#include "libcommon.h"
#include "utils.h"
void fontname_to_family_and_size(const gchar *fontname, std::string &family, gint &size)
{
const gchar *a = strrchr(fontname, ' ');
if (a) {
family.assign(fontname, a - fontname);
a++;
size = atoi(a);
}
}
button, entry {
color: #ff00ea;
font: 12px "Comic Sans";
}
font_name:
Comic Sans 12
px_font:
12px "Comic Sans"
*/
#if GTK_MAJOR_VERSION >= 3
void font_name_to_px_font(const char *font_name, std::string &px_font)
{
const char *ab;
ab = strrchr(font_name, ' ');
if (ab) {
std::string str2(font_name, ab-font_name);
ab++;
px_font = ab;
px_font += "px \"";
px_font += str2;
px_font += '\"';
}
}
#endif
void ProcessGtkEvent()
{
#if GTK_MAJOR_VERSION >= 4
g_main_context_iteration(g_main_context_default(), FALSE);
#else
while (gtk_events_pending())
gtk_main_iteration();
#endif
}
std::string combnum2str(gint comb_code)
{
switch (comb_code) {
#ifdef _WIN32
case 0:
return "Shift";
case 1:
return "Alt";
case 2:
return "Ctrl";
case 3:
return "Ctrl+Alt";
#else
case 0:
return "Win";
case 1:
return "Shift";
case 2:
return "Alt";
case 3:
return "Ctrl";
case 4:
return "Ctrl+Alt";
case 5:
return "Ctrl+e";
case 6:
return "Win+d";
case 7:
return "F1";
case 8:
return "F2";
case 9:
return "F3";
case 10:
return "F4";
#endif
default:
return "";
}
}
std::vector<std::string> split(const std::string& str, char sep)
{
std::vector<std::string> res;
std::string::size_type prev_pos=0, pos = 0;
while ((pos=str.find(sep, prev_pos))!=std::string::npos) {
res.push_back(std::string(str, prev_pos, pos-prev_pos));
prev_pos=pos+1;
}
res.push_back(std::string(str, prev_pos, str.length()-prev_pos));
return res;
}
GdkPixbuf *load_image_from_file(const std::string& filename)
{
GError *err=NULL;
GdkPixbuf *res=gdk_pixbuf_new_from_file(filename.c_str(), &err);
if (!res) {
g_error(_("Can not load image. %s"), err->message);
g_error_free(err);
}
return res;
}
static gchar * byte_to_hex(unsigned char nr) {
gchar *result = NULL;
result = g_strdup_printf("%%%x%x", nr / 0x10, nr % 0x10);
return result;
}
char *common_build_dictdata(char type, const char *definition)
{
size_t len = strlen(definition);
guint32 size;
size = sizeof(char) + len + 1;
char *data = (char *)g_malloc(sizeof(guint32) + size);
char *p = data;
*((guint32 *)p)= size;
p += sizeof(guint32);
*p = type;
p++;
memcpy(p, definition, len+1);
return data;
}
char *common_encode_uri_string(const char *string)
{
gchar *newURIString;
gchar *hex, *tmp = NULL;
int i, j, len, bytes;
the characters bytewise and convert non-ASCII
compatible chars to URI hexcodes */
newURIString = g_strdup("");
len = strlen(string);
for(i = 0; i < len; i++) {
if(g_ascii_isalnum(string[i]) || strchr("-_.!~*'()", (int)string[i]))
tmp = g_strdup_printf("%s%c", newURIString, string[i]);
else if(string[i] == ' ')
tmp = g_strdup_printf("%s%%20", newURIString);
else if((unsigned char)string[i] <= 127) {
tmp = g_strdup_printf("%s%s", newURIString, hex = byte_to_hex(string[i]));g_free(hex);
} else {
bytes = 0;
if(((unsigned char)string[i] >= 192) && ((unsigned char)string[i] <= 223))
bytes = 2;
else if(((unsigned char)string[i] > 223) && ((unsigned char)string[i] <= 239))
bytes = 3;
else if(((unsigned char)string[i] > 239) && ((unsigned char)string[i] <= 247))
bytes = 4;
else if(((unsigned char)string[i] > 247) && ((unsigned char)string[i] <= 251))
bytes = 5;
else if(((unsigned char)string[i] > 247) && ((unsigned char)string[i] <= 251))
bytes = 6;
if(0 != bytes) {
if((i + (bytes - 1)) > len) {
g_warning(("Unexpected end of character sequence or corrupt UTF-8 encoding! Some characters were dropped!"));
break;
}
for(j=0; j < (bytes - 1); j++) {
tmp = g_strdup_printf("%s%s", newURIString, hex = byte_to_hex((unsigned char)string[i++]));
g_free(hex);
g_free(newURIString);
newURIString = tmp;
}
tmp = g_strdup_printf("%s%s", newURIString, hex = byte_to_hex((unsigned char)string[i]));
g_free(hex);
} else {
g_error("Internal error while converting UTF-8 chars to HTTP URI!");
}
}
g_free(newURIString);
newURIString = tmp;
}
return newURIString;
}
void common_terminal2pango(const char *t, std::string &pango)
{
pango.clear();
const char *p1;
std::string color_num;
std::string color;
std::string tmp_str;
while (*t) {
if ((*t == '_') && (t[1] == '')) {
color = "#FFFA00";
pango += "<span foreground=\"";
pango += color;
pango += "\">";
t+=2;
pango += *t;
pango += "</span>";
if (*t) {
t++;
continue;
} else {
break;
}
}
if ((*t == '') && (t[1] == '[')) {
p1 = strchr(t+2, 'm');
if (p1) {
color_num.assign(t+2, p1-t-2);
if (color_num == "30") {
color = "#000000";
} else if (color_num == "31") {
color = "#FF0000";
} else if (color_num == "32") {
color = "#008000";
} else if (color_num == "33") {
color = "#FFFF00";
} else if (color_num == "34") {
color = "#0000FF";
} else if (color_num == "35") {
color = "#FF00FF";
} else if (color_num == "36") {
color = "#00FFFF";
} else if (color_num == "37") {
color = "#FFFFFF";
} else {
color = "#EEEEEE";
}
t = p1 +1 ;
pango += "<span foreground=\"";
pango += color;
pango += "\">";
p1 = strstr(t, "[m");
if (p1) {
tmp_str.assign(t, p1-t);
pango += tmp_str;
t = p1 +3;
pango += "</span>";
} else {
pango += t;
pango += "</span>";
break;
}
} else {
break;
}
} else {
pango += *t;
t++;
}
}
}
void common_terminal2pango_2(const char *t, std::string &pango)
{
pango.clear();
std::string prev_str;
std::string prev_char;
const char *p1;
while (*t) {
if (*t == 8) {
if (g_str_has_prefix(t+1, prev_char.c_str())) {
t++;
pango += "<b>";
pango += prev_str;
pango += "</b>";
} else if (prev_char == "_") {
t++;
p1 = g_utf8_next_char(t);
char *mark = g_markup_escape_text(t, p1 -t);
pango += "<u>";
pango += mark;
pango += "</u>";
g_free(mark);
}
prev_str.clear();
} else {
pango += prev_str;
switch (*t) {
case '&':
prev_str = "&";
break;
case '<':
prev_str = "<";
break;
case '>':
prev_str = ">";
break;
case '\'':
prev_str = "'";
break;
case '"':
prev_str = """;
break;
default:
p1 = g_utf8_next_char(t);
prev_str.assign(t, p1-t);
break;
}
}
p1 = g_utf8_next_char(t);
prev_char.assign(t, p1-t);
t = p1;
}
pango += prev_str;
}
#if defined(_WIN32)
flags is type of operation allowed parameter of _wsopen_s function
mode is permission settings parameter of _wsopen_s function
#else
for the meaning of flags and mode parameters see g_mkstemp_full function
#endif
*/
gint
stardict_mkstemp_full (gchar *tmpl,
int flags,
int mode)
{
char *XXXXXX;
int count, fd;
static const char letters[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
static const int NLETTERS = sizeof (letters) - 1;
glong value;
gint64 tv_usec;
gint64 tv_sec;
static int counter = 0;
g_return_val_if_fail (tmpl != NULL, -1);
XXXXXX = g_strrstr (tmpl, "XXXXXX");
if (!XXXXXX || strncmp (XXXXXX, "XXXXXX", 6))
{
errno = EINVAL;
return -1;
}
#if defined(_WIN32)
std::string tmpl_utf8;
if(!file_name_to_utf8(tmpl, tmpl_utf8))
return -1;
std_win_string tmpl_win;
if(!utf8_to_windows(tmpl_utf8, tmpl_win))
return -1;
size_t XXXXXXind = tmpl_win.rfind(TEXT("XXXXXX"));
if(XXXXXXind == std_win_string::npos)
return -1;
#endif
tv_usec = g_get_real_time();
tv_sec = tv_usec / G_USEC_PER_SEC;
value = (tv_usec ^ tv_sec) + counter++;
for (count = 0; count < 100; value += 7777, ++count)
{
glong v = value;
XXXXXX[0] = letters[v % NLETTERS];
v /= NLETTERS;
XXXXXX[1] = letters[v % NLETTERS];
v /= NLETTERS;
XXXXXX[2] = letters[v % NLETTERS];
v /= NLETTERS;
XXXXXX[3] = letters[v % NLETTERS];
v /= NLETTERS;
XXXXXX[4] = letters[v % NLETTERS];
v /= NLETTERS;
XXXXXX[5] = letters[v % NLETTERS];
#if defined(_WIN32)
tmpl_win[XXXXXXind + 0] = (TCHAR)XXXXXX[0];
tmpl_win[XXXXXXind + 1] = (TCHAR)XXXXXX[1];
tmpl_win[XXXXXXind + 2] = (TCHAR)XXXXXX[2];
tmpl_win[XXXXXXind + 3] = (TCHAR)XXXXXX[3];
tmpl_win[XXXXXXind + 4] = (TCHAR)XXXXXX[4];
tmpl_win[XXXXXXind + 5] = (TCHAR)XXXXXX[5];
errno_t err = _wsopen_s(&fd, tmpl_win.c_str(),
flags | _O_CREAT | _O_EXCL,
_SH_DENYWR, mode);
if(err == EEXIST || err == EACCES)
continue;
if(err != 0)
return -1;
if(fd >= 0)
return fd;
#else
fd = g_open (tmpl, flags | O_CREAT | O_EXCL, mode);
if (fd >= 0)
return fd;
else if (errno != EEXIST)
* try, and there are 2^32 or so of them, so give up now.
*/
return -1;
#endif
}
errno = EEXIST;
return -1;
}
gint
stardict_mkstemp (gchar *tmpl)
{
#if defined(_WIN32)
return stardict_mkstemp_full (tmpl, _O_RDWR | _O_BINARY, _S_IREAD | _S_IWRITE);
#else
return stardict_mkstemp_full (tmpl, O_RDWR | O_BINARY, 0600);
#endif
}
gint
stardict_file_open_tmp (const gchar *tmpl,
gchar **name_used,
GError **error)
{
int retval;
const char *tmpdir;
const char *sep;
char *fulltemplate;
const char *slash;
if (tmpl == NULL)
tmpl = ".XXXXXX";
if ((slash = strchr (tmpl, G_DIR_SEPARATOR)) != NULL
#ifdef G_OS_WIN32
|| (strchr (tmpl, '/') != NULL && (slash = "/"))
#endif
)
{
gchar *display_tmpl = g_filename_display_name (tmpl);
char c[2];
c[0] = *slash;
c[1] = '\0';
g_set_error (error,
G_FILE_ERROR,
G_FILE_ERROR_FAILED,
_("Template '%s' invalid, should not contain a '%s'"),
display_tmpl, c);
g_free (display_tmpl);
return -1;
}
if (strstr (tmpl, "XXXXXX") == NULL)
{
gchar *display_tmpl = g_filename_display_name (tmpl);
g_set_error (error,
G_FILE_ERROR,
G_FILE_ERROR_FAILED,
_("Template '%s' doesn't contain XXXXXX"),
display_tmpl);
g_free (display_tmpl);
return -1;
}
tmpdir = g_get_tmp_dir ();
if (G_IS_DIR_SEPARATOR (tmpdir [strlen (tmpdir) - 1]))
sep = "";
else
sep = G_DIR_SEPARATOR_S;
fulltemplate = g_strconcat (tmpdir, sep, tmpl, NULL);
retval = stardict_mkstemp (fulltemplate);
if (retval == -1)
{
int save_errno = errno;
gchar *display_fulltemplate = g_filename_display_name (fulltemplate);
g_set_error (error,
G_FILE_ERROR,
g_file_error_from_errno (save_errno),
_("Failed to create file '%s': %s"),
display_fulltemplate, g_strerror (save_errno));
g_free (display_fulltemplate);
g_free (fulltemplate);
return -1;
}
if (name_used)
*name_used = fulltemplate;
else
g_free (fulltemplate);
return retval;
}
query_t analyse_query(const char *s, std::string& res)
{
if (!s || !*s) {
res="";
return qtSIMPLE;
}
if (*s=='/') {
res=s+1;
return qtFUZZY;
}
if (*s==':') {
res=s+1;
return qtREGEX;
}
if (*s=='|') {
res=s+1;
return qtFULLTEXT;
}
bool pattern=false;
const char *p=s;
res="";
for (; *p; res+=*p, ++p) {
if (*p=='\\') {
++p;
if (!*p)
break;
continue;
}
if (*p=='*' || *p=='?')
pattern=true;
}
if (pattern)
return qtPATTERN;
return qtSIMPLE;
}
void stardict_input_escape(const char *text, std::string &res)
{
res.clear();
const char *p = text;
if (*p == '/' || *p == '|' || *p == ':') {
res = "\\";
res += *p;
p++;
}
while (*p) {
if (*p == '\\' || *p == '*' || *p == '?') {
res += '\\';
res += *p;
} else {
res += *p;
}
p++;
}
}
static const char* html_entrs[] = { "lt;", "gt;", "amp;", "apos;", "quot;", 0 };
static const int html_entrs_len[] = { 3, 3, 4, 5, 5, 0 };
static const char html_raw_entrs[] = { '<', '>', '&', '\'', '\"', 0 };
static const char* html_tags[] = { "br>", 0 };
static const int html_tags_len[] = { 3, 0 };
static const char html_raw_tags[] = { '\n', 0 };
void html_decode(const char *str, std::string& decoded)
{
decoded.clear();
decoded.reserve(strlen(str));
int ind;
const char* p = str;
while (*p)
if (*p == '&') {
if(*(p+1) == '#') {
const char *q = strchr(p+2, ';');
if(q) {
long code = atol(p+2);
char buf[10];
buf[g_unichar_to_utf8(gunichar(code), buf)] = '\0';
decoded += buf;
p = q + 1;
} else {
g_debug("unknown entry %s", p);
break;
}
} else {
for(ind = 0; html_entrs[ind] != 0; ++ind)
if(strncmp(p + 1, html_entrs[ind], html_entrs_len[ind]) == 0) {
decoded += html_raw_entrs[ind];
p += html_entrs_len[ind]+1;
break;
}
if (html_entrs[ind] == 0) {
const char *q = strchr(p+1, ';');
if(q) {
++q;
g_debug("unknown entry %s", std::string(p, q-p).c_str());
p = q;
} else {
g_debug("unknown entry %s", p);
break;
}
}
}
} else if(*p == '<') {
for(ind = 0; html_tags[ind] != 0; ++ind)
if(strncmp(p + 1, html_tags[ind], html_tags_len[ind]) == 0) {
decoded += html_raw_tags[ind];
p += html_tags_len[ind]+1;
break;
}
if (html_tags[ind] == 0) {
const char *q = strchr(p+1, '>');
if(q) {
++q;
g_debug("unknown tag %s", std::string(p, q-p).c_str());
p = q;
} else {
g_debug("unknown tag %s", p);
break;
}
}
} else {
decoded += *p++;
}
}
src in utf-8,
dst must point to a buffer of size enough to hold the src string */
void GetPureEnglishAlpha(char *dst, const char *src)
{
while(*src && (uchar(*src) >= 128 || !isalpha(*src)))
src++;
while(*src && uchar(*src) < 128 && (isalpha(*src) || *src == ' '))
*dst++ = *src++;
*dst = '\0';
}
bool IsASCII(const char *str)
{
for(; *str; ++str)
if(uchar(*str) >= 128)
return false;
return true;
}
const char* skip_spaces(const char *str)
{
while(*str && g_unichar_isspace(g_utf8_get_char(str)))
str = g_utf8_next_char(str);
return str;
}
return value - new '\0' character */
char* copy_normalize_spaces(char *dst, const char *src)
{
while(*src) {
if (g_unichar_isspace(g_utf8_get_char(src))) {
*dst++=' ';
src = g_utf8_next_char(src);
while(g_unichar_isspace(g_utf8_get_char(src)))
src = g_utf8_next_char(src);
} else {
g_utf8_strncpy(dst,src,1);
src = g_utf8_next_char(src);
dst = g_utf8_next_char(dst);
}
}
*dst='\0';
return dst;
}
remove leading and trailing unicode spaces */
void copy_normalize_trim_spaces(char *dst, const char *src)
{
src = skip_spaces(src);
char* end = copy_normalize_spaces(dst, src);
delete_trailing_spaces_ASCII(dst, end);
}
begin - beginning of the string, end - '\0' character
return value - new '\0' character */
char* delete_trailing_spaces_ASCII(const char *begin, char *end)
{
while(begin < end && *(end - 1) == ' ')
--end;
*end = '\0';
return end;
}
In other word we replace the last ASCII space with '\0'.
begin - beginning of the string, end - '\0' character
return value - new '\0' character */
char* delete_trailing_word_ASCII(const char *begin, char *end)
{
while(begin < end && *(end-1) != ' ')
--end;
if(begin < end)
--end;
*end = '\0';
return end;
}
char* delete_trailing_char(char *begin, char *end)
{
char* p = g_utf8_find_prev_char(begin, end);
if(p) {
*p = '\0';
return p;
} else {
*begin = '\0';
return begin;
}
}
instead assignes begin and end to the first and next to last chars in the source string */
void extract_word_in_place(const char **begin, const char **end, const char* src,
int BeginPos, gboolean (*is_splitter)(gunichar c))
{
g_assert(begin);
g_assert(end);
g_assert(BeginPos >= 0);
*begin = *end = NULL;
const char* const pointer = src + BeginPos;
const char *word_begin = NULL;
while(true) {
while(*src && is_splitter(g_utf8_get_char(src)))
src = g_utf8_next_char(src);
if(!*src)
break;
if(src <= pointer || !word_begin)
word_begin = src;
if(pointer <= src)
break;
while(*src && !is_splitter(g_utf8_get_char(src)))
src = g_utf8_next_char(src);
if(!*src)
break;
}
if(!word_begin)
return;
src = word_begin;
while(*src && !is_splitter(g_utf8_get_char(src))) {
src = g_utf8_next_char(src);
}
*begin = word_begin;
*end = src;
}
word separator chars are identified by is_splitter function
If BeginPos points to a splitter, get the word to the left of the point.
If there is no word to the left, get the word to the right of the point.
copy result into dst */
void extract_word(char *dst, const char* src, int BeginPos, gboolean (*is_splitter)(gunichar c))
{
g_assert(BeginPos >= 0);
const char *begin, *end;
extract_word_in_place(&begin, &end, src, BeginPos, is_splitter);
if(!begin || !end) {
*dst = '\0';
return;
}
const int num = end - begin;
strncpy(dst, begin, num);
dst[num] = '\0';
}
the word must look like Word
where the first letter satisfies the is_first_letter function,
the first letter must be followed by at least one letter satisfing the is_second_letter function */
void extract_capitalized_word_in_place(const char **begin, const char **end,
const char* src, int BeginPos,
gboolean (*is_first_letter)(gunichar c), gboolean (*is_second_letter)(gunichar c))
{
g_assert(begin);
g_assert(end);
g_assert(BeginPos >= 0);
*begin = *end = NULL;
const char* const pointer = src + BeginPos;
const char *word_begin = NULL;
while(true) {
while(*src && !is_first_letter(g_utf8_get_char(src)))
src = g_utf8_next_char(src);
if(!*src)
break;
const char *cur_word_begin = src;
src = g_utf8_next_char(src);
if(!is_second_letter(g_utf8_get_char(src)))
continue;
if(cur_word_begin <= pointer || !word_begin)
word_begin = cur_word_begin;
if(pointer <= cur_word_begin)
break;
while(*src && is_second_letter(g_utf8_get_char(src)))
src = g_utf8_next_char(src);
if(!*src)
break;
}
if(!word_begin)
return;
src = g_utf8_next_char(word_begin);
while(*src && is_second_letter(g_utf8_get_char(src))) {
src = g_utf8_next_char(src);
}
*begin = word_begin;
*end = src;
}
void extract_capitalized_word(char *dst, const char* src, int BeginPos,
gboolean (*is_first_letter)(gunichar c), gboolean (*is_second_letter)(gunichar c))
{
g_assert(BeginPos >= 0);
const char *begin, *end;
extract_capitalized_word_in_place(&begin, &end, src, BeginPos, is_first_letter, is_second_letter);
if(!begin || !end) {
*dst = '\0';
return;
}
const int num = end - begin;
strncpy(dst, begin, num);
dst[num] = '\0';
}
const char* find_first(const char* src, gboolean (*isfunc)(gunichar c))
{
while(*src && !isfunc(g_utf8_get_char(src)))
src = g_utf8_next_char(src);
if(isfunc(g_utf8_get_char(src)))
return src;
else
return NULL;
}
const char* find_first_not(const char* src, gboolean (*isfunc)(gunichar c))
{
while(*src && isfunc(g_utf8_get_char(src)))
src = g_utf8_next_char(src);
if(isfunc(g_utf8_get_char(src)))
return NULL;
else
return src;
}
gboolean is_space_or_punct(gunichar c)
{
return g_unichar_isspace(c) || g_unichar_ispunct(c);
}
gboolean is_not_alpha(gunichar c)
{
return !g_unichar_isalpha(c);
}
gboolean is_not_upper(gunichar c)
{
return !g_unichar_isupper(c);
}
gboolean is_not_lower(gunichar c)
{
return !g_unichar_islower(c);
}