* Copyright 2011 kubtek <kubtek@mail.com>
* Copyright 2025 Hu Zheng <huzheng_001@hotmail.com>
*
* This file is part of StarDict.
*
* StarDict is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* StarDict is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with StarDict. If not, see <http://www.gnu.org/licenses/>.
*/
#include "parsedata_xdxf.h"
#include <glib/gi18n.h>
#include <vector>
#include <string>
#include <iostream>
#include <stdlib.h>
#include <string.h>
#ifdef _WIN32
#include <windows.h>
#endif
const char config_section[] = "xdxf";
struct ColorScheme {
guint32 abr;
guint32 ex;
guint32 k;
guint32 c;
guint32 ref;
};
ColorScheme color_scheme;
static size_t xml_strlen(const std::string& str)
{
const char *q;
static const char* xml_entrs[] = { "lt;", "gt;", "amp;", "apos;", "quot;", 0 };
static const int xml_ent_len[] = { 3, 3, 4, 5, 5 };
size_t cur_pos;
int i;
for (cur_pos = 0, q = str.c_str(); *q; ++cur_pos) {
if (*q == '&') {
for (i = 0; xml_entrs[i]; ++i)
if (strncmp(xml_entrs[i], q + 1,
xml_ent_len[i]) == 0) {
q += xml_ent_len[i] + 1;
break;
}
if (xml_entrs[i] == NULL)
++q;
} else if (*q == '<') {
const char *p = strchr(q+1, '>');
if (p)
q = p + 1;
else
++q;
--cur_pos;
} else
q = g_utf8_next_char(q);
}
return cur_pos;
}
static void xml_decode(const char *str, std::string& decoded)
{
static const char raw_entrs[] = {
'<', '>', '&', '\'', '\"', 0
};
static const char* xml_entrs[] = {
"lt;", "gt;", "amp;", "apos;", "quot;", 0
};
static const int xml_ent_len[] = {
3, 3, 4, 5, 5
};
int ient;
const char *amp = strchr(str, '&');
if (amp == NULL) {
decoded = str;
return;
}
decoded.assign(str, amp - str);
while (*amp)
if (*amp == '&') {
for (ient = 0; xml_entrs[ient] != 0; ++ient)
if (strncmp(amp + 1, xml_entrs[ient],
xml_ent_len[ient]) == 0) {
decoded += raw_entrs[ient];
amp += xml_ent_len[ient]+1;
break;
}
if (xml_entrs[ient] == 0)
decoded += *amp++;
} else {
decoded += *amp++;
}
}
static std::string print_pango_color(guint32 c)
{
char buf[8];
gint n = g_snprintf(buf, sizeof(buf), "#%06x", c & 0xffffff);
if(n != sizeof(buf)-1)
return "";
else
return buf;
}
static void set_default_color_scheme(void)
{
color_scheme.abr = 0x007F00;
color_scheme.ex = 0x7F7F7F;
color_scheme.k = 0x000000;
color_scheme.c = 0x0066FF;
color_scheme.ref = 0x00007F;
}
struct ReplaceTag {
ReplaceTag(const char* match, int match_len, const std::string& replace, int char_len)
:
match_(match),
match_len_(match_len),
replace_(replace),
char_len_(char_len)
{
}
const char *match_;
int match_len_;
std::string replace_;
int char_len_;
};
class XDXFParser {
public:
XDXFParser(const char *p, ParseResult &result);
static void fill_replace_arr(void);
private:
void flush(void);
private:
ParseResult& result_;
std::string res_;
std::string::size_type cur_pos_;
static std::vector<ReplaceTag> replace_arr_;
};
std::vector<ReplaceTag> XDXFParser::replace_arr_;
void XDXFParser::fill_replace_arr(void)
{
replace_arr_.clear();
std::string value;
replace_arr_.push_back(ReplaceTag("abr>", 4,
std::string("<span foreground=\"") + print_pango_color(color_scheme.abr) + "\" style=\"italic\">",
0));
replace_arr_.push_back(ReplaceTag("/abr>", 5, "</span>", 0));
replace_arr_.push_back(ReplaceTag("b>", 2, "<b>", 0));
replace_arr_.push_back(ReplaceTag("/b>", 3, "</b>", 0));
replace_arr_.push_back(ReplaceTag("i>", 2, "<i>", 0));
replace_arr_.push_back(ReplaceTag("/i>", 3, "</i>", 0));
replace_arr_.push_back(ReplaceTag("sub>", 4, "<sub>", 0));
replace_arr_.push_back(ReplaceTag("/sub>", 5, "</sub>", 0));
replace_arr_.push_back(ReplaceTag("sup>", 4, "<sup>", 0));
replace_arr_.push_back(ReplaceTag("/sup>", 5, "</sup>", 0));
replace_arr_.push_back(ReplaceTag("tt>", 3, "<tt>", 0));
replace_arr_.push_back(ReplaceTag("/tt>", 4, "</tt>", 0));
replace_arr_.push_back(ReplaceTag("big>", 4, "<big>", 0));
replace_arr_.push_back(ReplaceTag("/big>", 5, "</big>", 0));
replace_arr_.push_back(ReplaceTag("small>", 6, "<small>", 0));
replace_arr_.push_back(ReplaceTag("/small>", 7, "</small>", 0));
replace_arr_.push_back(ReplaceTag("tr>", 3, "<b>[", 1));
replace_arr_.push_back(ReplaceTag("/tr>", 4, "]</b>", 1));
replace_arr_.push_back(ReplaceTag("ex>", 3,
std::string("<span foreground=\"") + print_pango_color(color_scheme.ex) + "\">",
0));
replace_arr_.push_back(ReplaceTag("/ex>", 4, "</span>", 0));
replace_arr_.push_back(ReplaceTag("/c>", 3, "</span>", 0));
}
XDXFParser::XDXFParser(const char *p, ParseResult &result) :
result_(result)
{
const char *tag, *next;
std::string name;
int i;
bool is_first_k = true;
for (cur_pos_ = 0; *p && (tag = strchr(p, '<')) != NULL;) {
std::string chunk(p, tag - p);
res_ += chunk;
cur_pos_ += xml_strlen(chunk);
p = tag;
for (i = 0; i < static_cast<int>(replace_arr_.size()); ++i)
if (strncmp(replace_arr_[i].match_, p + 1,
replace_arr_[i].match_len_) == 0) {
res_ += replace_arr_[i].replace_;
p += 1 + replace_arr_[i].match_len_;
cur_pos_ += replace_arr_[i].char_len_;
goto cycle_end;
}
if (strncmp("k>", p + 1, 2) == 0) {
next = strstr(p + 3, "</k>");
if (next) {
if (is_first_k) {
is_first_k = false;
if (*(next + 4) == '\n')
next++;
} else {
res_ += std::string("<span foreground=\"") + print_pango_color(color_scheme.k) + "\">";
std::string chunk(p+3, next-(p+3));
res_ += chunk;
size_t xml_len = xml_strlen(chunk);
cur_pos_ += xml_len;
res_ += "</span>";
}
p = next + sizeof("</k>") - 1;
} else
p += sizeof("<k>") - 1;
} else if (*(p + 1) == 'c' && (*(p + 2) == ' ' || *(p + 2) == '>')) {
next = strchr(p, '>');
if (!next) {
++p;
continue;
}
name.assign(p + 1, next - p - 1);
std::string::size_type pos = name.find("c=\"");
if (pos != std::string::npos) {
pos += sizeof("c=\"") - 1;
std::string::size_type end_pos = name.find("\"", pos);
if (end_pos == std::string::npos)
end_pos = name.length();
std::string color(name, pos, end_pos - pos);
if (pango_color_parse(NULL, color.c_str()))
res_ += "<span foreground=\"" + color + "\">";
else
res_ += "<span>";
} else
res_ += std::string("<span foreground=\"") + print_pango_color(color_scheme.c) + "\">";
p = next + 1;
} else if (*(p + 1) == 'r' && *(p + 2) == 'r' && *(p + 3) == 'e'
&& *(p + 4) == 'f' && (*(p + 5) == ' ' || *(p + 5) == '>')) {
next = strchr(p, '>');
if (!next) {
++p;
continue;
}
name.assign(p + 1, next - p - 1);
std::string type;
std::string::size_type pos = name.find("type=\"");
if (pos != std::string::npos) {
pos += sizeof("type=\"") - 1;
std::string::size_type end_pos = name.find("\"", pos);
if (end_pos == std::string::npos)
end_pos = name.length();
type.assign(name, pos, end_pos - pos);
}
p = next + 1;
next = strstr(p, "</rref>");
if (!next)
continue;
std::string chunk(p, next - p);
p = next + sizeof("</rref>") - 1;
if (type.empty()) {
if (g_str_has_suffix(chunk.c_str(), ".jpg")
|| g_str_has_suffix(chunk.c_str(), ".png")
|| g_str_has_suffix(chunk.c_str(), ".bmp")) {
type = "image";
} else if (g_str_has_suffix(chunk.c_str(), ".wav")
|| g_str_has_suffix(chunk.c_str(), ".mp3")
|| g_str_has_suffix(chunk.c_str(), ".ogg")) {
type = "sound";
} else if (g_str_has_suffix(chunk.c_str(), ".avi")
|| g_str_has_suffix(chunk.c_str(), ".mpeg")
|| g_str_has_suffix(chunk.c_str(), ".mpg")) {
type = "video";
} else {
type = "attach";
}
}
flush();
ParseResultItem item;
item.type = ParseResultItemType_res;
item.res = new ParseResultResItem;
item.res->type = type;
item.res->key = chunk;
result_.item_list.push_back(item);
} else if ((*(p + 1) == 'k' || *(p + 1) == 'i') && *(p + 2) == 'r'
&& *(p + 3) == 'e' && *(p + 4) == 'f' && (*(p + 5) == ' '
|| *(p + 5) == '>')) {
bool is_k_or_i = (*(p + 1) == 'k');
next = strchr(p, '>');
if (!next) {
++p;
continue;
}
name.assign(p + 1, next - p - 1);
std::string key;
std::string::size_type pos;
if (is_k_or_i)
pos = name.find("k=\"");
else
pos = name.find("href=\"");
if (pos != std::string::npos) {
if (is_k_or_i)
pos += sizeof("k=\"") - 1;
else
pos += sizeof("href=\"") - 1;
std::string::size_type end_pos = name.find("\"", pos);
if (end_pos == std::string::npos)
end_pos = name.length();
key.assign(name, pos, end_pos - pos);
}
p = next + 1;
if (is_k_or_i)
next = strstr(p, "</kref>");
else
next = strstr(p, "</iref>");
if (!next)
continue;
res_ += std::string("<span foreground=\"") + print_pango_color(color_scheme.ref) + "\" underline=\"single\">";
std::string::size_type link_len = next - p;
std::string chunk(p, link_len);
size_t xml_len = xml_strlen(chunk);
std::string xml_enc;
if (key.empty())
xml_decode(chunk.c_str(), xml_enc);
else
xml_decode(key.c_str(), xml_enc);
std::string link;
if (is_k_or_i)
link = "query://";
link += xml_enc;
links_list_.push_back(LinkDesc(cur_pos_, xml_len, link));
res_ += chunk;
cur_pos_ += xml_len;
res_ += "</span>";
if (is_k_or_i)
p = next + sizeof("</kref>") - 1;
else
p = next + sizeof("</iref>") - 1;
} else if (strncmp("blockquote", p + 1, 10) == 0 && (*(p + 11) == ' '
|| *(p + 11) == '>')) {
next = strchr(p, '>');
if (!next) {
++p;
continue;
}
p = next + 1;
flush();
ParseResultItem item;
item.type = ParseResultItemType_FormatBeg;
item.format_beg = new ParseResultFormatBegItem;
item.format_beg->type = ParseResultItemFormatType_Indent;
result_.item_list.push_back(item);
} else if (strncmp("/blockquote>", p + 1, 12) == 0) {
p += sizeof("/blockquote>");
flush();
ParseResultItem item;
item.type = ParseResultItemType_FormatEnd;
item.format_end = new ParseResultFormatEndItem;
item.format_end->type = ParseResultItemFormatType_Indent;
result_.item_list.push_back(item);
} else {
next = strchr(p+1, '>');
if (!next) {
p++;
res_ += "<";
cur_pos_++;
continue;
}
p = next + 1;
}
cycle_end:
;
}
res_ += p;
flush();
}
void XDXFParser::flush(void)
{
if (res_.empty()) {
g_assert(cur_pos_ == 0);
g_assert(links_list_.empty());
return;
}
ParseResultItem item;
item.type = ParseResultItemType_mark;
item.mark = new ParseResultMarkItem;
item.mark->pango = res_;
result_.item_list.push_back(item);
res_.clear();
cur_pos_ = 0;
}
std::string xdxf2pango(const char *p, guint32 sec_size, const char *oword)
{
set_default_color_scheme();
XDXFParser::fill_replace_arr();
XDXFParser(p, result);
return result.pango;
}