* This file is part of StarDict.
*
* StarDict is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* StarDict is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with StarDict. If not, see <http://www.gnu.org/licenses/>.
*/
#include <algorithm>
#include <fstream>
#include <iostream>
#include <iterator>
#include <list>
#include <string>
#include <vector>
#include <libgen.h>
#include <glib.h>
#include <cstring>
#include <ctype.h>
#include <arpa/inet.h>
#include "dictbuilder-tree.h"
struct sectionEntry
{
char sign;
};
struct indexEntry
{
std::string word;
size_t offset;
size_t size;
};
typedef std::list<indexEntry> entrylist_type;
typedef tree<indexEntry*> entrytree_type;
struct _core
{
std::string input;
std::string ifofile;
std::string tfofile;
std::string idxfile;
std::string tdxfile;
std::string dicfile;
std::string tmpfile;
char endl;
} core;
std::ofstream dictfs;
std::ofstream tmpfs;
sectionEntry section;
indexEntry *entry;
entrylist_type entrylist;
entrytree_type entrytree;
void initsection();
void initentry();
bool entrycmp(const indexEntry& left,
const indexEntry& right);
void help();
bool vaildSection();
bool vaildEntry();
void writeSection();
void writeEntry();
void writeTreeindex(entrytree_type::iterator it, std::ostream& os);
void action(std::istream& is);
int main(int argc, char* argv[])
{
core.endl = '\r';
for (int index = 1; index != argc ; index++)
{
if (strcasecmp(argv[index], "-o") == 0 ||
strcasecmp(argv[index], "--output") == 0)
{
if (++index != argc)
{
std::string output = argv[index];
core.ifofile = output + ".ifo";
core.tfofile = output + ".tfo";
core.idxfile = output + ".idx";
core.tdxfile = output + ".tdx";
core.dicfile = output + ".dict";
core.dicfile = output + ".tmp";
}
continue;
}
if (strcasecmp(argv[index], "-n") == 0)
{
core.endl = '\n';
continue;
}
if (strcasecmp(argv[index], "-h") == 0 ||
strcasecmp(argv[index], "--help") == 0)
{
help();
return 1;
}
core.input = argv[index];
}
if (core.input.size() == 0)
{
std::cerr << "must specify a input file." << std::endl;
help();
return 1;
}
if (core.dicfile.size() == 0)
{
std::string filename = basename((char*) core.input.c_str());
std::string output;
std::string::size_type pos = filename.find_last_of('.');
if (pos != std::string::npos)
{
output = core.input.substr(0,
core.input.size() - (filename.size() - pos));
std::cout << output << std::endl;
} else
output = core.input;
core.ifofile = output + ".ifo";
core.tfofile = output + ".tfo";
core.idxfile = output + ".idx";
core.tdxfile = output + ".tdx";
core.dicfile = output + ".dict";
core.tmpfile = output + ".tmp";
}
std::ifstream ifs(core.input.c_str());
if (!ifs.is_open())
{
std::cerr << "can't read file: " << core.input << std::endl;
return 1;
}
dictfs.open(core.dicfile.c_str(),
std::ios_base::out|std::ios_base::trunc|std::ios_base::binary);
if (!dictfs.is_open())
{
std::cerr << "cant's create dict file: " << core.dicfile << std::endl;
return 1;
}
*entrytree.root() = NULL;
initentry();
initsection();
action(ifs);
if (vaildSection())
writeSection();
if (vaildEntry())
writeEntry();
ifs.close();
dictfs.close();
size_t idx_size = 0, tdx_size = 0;
{
std::ofstream ofs(core.idxfile.c_str(),
std::ios_base::out|std::ios_base::trunc|std::ios_base::binary);
if (!ofs.is_open())
{
std::cerr << "cant's create idx file: "
<< core.idxfile << std::endl;
return 1;
}
entrylist.sort(entrycmp);
char zero = '\0';
for(entrylist_type::const_iterator it = entrylist.begin();
it != entrylist.end(); it++)
{
ofs.write(it->word.c_str(), it->word.size());
ofs.write(&zero, 1);
ofs.write((char*) &it->offset, sizeof(it->offset));
ofs.write((char*) &it->size, sizeof(it->size));
}
idx_size = ofs.tellp();
ofs.close();
}
if (entrytree.root().size() != 0)
{
std::ofstream ofs(core.tdxfile.c_str(),
std::ios_base::out|std::ios_base::trunc|std::ios_base::binary);
if (!ofs.is_open())
{
std::cerr << "cant's create tdx file: "
<< core.idxfile << std::endl;
return 1;
}
writeTreeindex(entrytree.root(), ofs);
tdx_size = ofs.tellp();
ofs.close();
}
{
std::ofstream ofs(core.ifofile.c_str(),
std::ios_base::out|std::ios_base::trunc);
if (!ofs.is_open())
{
std::cerr << "cant's create ifo file: "
<< core.ifofile << std::endl;
return 1;
}
ofs << "StarDict's dict ifo file" << std::endl;
ofs << "version=2.4.2" << std::endl;
ofs << "bookname=" << core.input << std::endl;
ofs << "wordcount=" << entrylist.size() << std::endl;
ofs << "idxfilesize=" << idx_size << std::endl;
ofs << "author=" << std::endl;
ofs << "email=" << std::endl;
ofs << "website=" << std::endl;
ofs << "description=" << std::endl;
ofs << "date=" << std::endl;
ofs.close();
}
if (tdx_size != 0)
{
std::ofstream ofs(core.tfofile.c_str(),
std::ios_base::out|std::ios_base::trunc);
if (!ofs.is_open())
{
std::cerr << "cant's create tfo file: "
<< core.ifofile << std::endl;
return 1;
}
ofs << "StarDict's treedict ifo file" << std::endl;
ofs << "version=2.4.2" << std::endl;
ofs << "bookname=" << core.input << std::endl;
ofs << "wordcount=" << entrylist.size() << std::endl;
ofs << "tdxfilesize=" << tdx_size << std::endl;
ofs << "author=" << std::endl;
ofs << "email=" << std::endl;
ofs << "website=" << std::endl;
ofs << "description=" << std::endl;
ofs << "date=" << std::endl;
ofs.close();
}
return 0;
}
gint stardict_strcmp(const gchar *s1, const gchar *s2)
{
int ret = g_ascii_strcasecmp(s1, s2);
if (ret == 0)
return strcmp(s1, s2);
else
return ret;
}
bool entrycmp(const indexEntry& left,
const indexEntry& right)
{
return stardict_strcmp(left.word.c_str(), right.word.c_str()) < 0;
}
void initsection()
{
section.sign = '\0';
}
void initentry()
{
entry = NULL;
}
bool vaildSection()
{
return section.sign != '\0';
}
bool vaildEntry()
{
return entry != NULL;
}
void writeSection()
{
if (tmpfs.is_open())
{
size_t size = htonl(tmpfs.tellp());
tmpfs.close();
tmpfs.clear();
dictfs.write((char*) &size, sizeof(size));
std::ifstream ifs(core.tmpfile.c_str(),
std::ios_base::in|std::ios_base::binary);
std::istreambuf_iterator<char> begin(ifs), end;
std::copy(begin, end, std::ostreambuf_iterator<char>(dictfs));
ifs.close();
} else
{
char zero = '\0';
dictfs.write(&zero, 1);
}
initsection();
}
void writeEntry()
{
entry->size = size_t(dictfs.tellp()) - entry->offset;
entry->offset = htonl(entry->offset);
entry->size = htonl(entry->size);
initentry();
}
void action(std::istream& is)
{
std::string line;
while(std::getline(is, line))
{
switch(line[0])
{
case '%':
{
if (line[1] == '%')
{
if (vaildSection())
writeSection();
if (vaildEntry())
writeEntry();
entrylist.resize(entrylist.size() + 1);
entry = &entrylist.back();
entry->word = line.substr(2);
entry->offset = dictfs.tellp();
entry->size = 0;
if (entry->word.size() > 256)
{
std::cerr << "too long word which longer than 256, trunc to 255." << std::endl;
entry->word.resize(255);
}
std::cerr << "word: " << section.sign << core.endl;
} else
{
if (vaildSection())
writeSection();
section.sign = line[1];
if (vaildSection())
{
if (isupper(section.sign))
{
tmpfs.open(core.tmpfile.c_str(),
std::ios_base::out|std::ios_base::trunc|
std::ios_base::binary);
if (!tmpfs.is_open())
{
std::cerr << "can't create tmp file: "
<< core.tmpfile << std::endl;
exit(1);
}
}
dictfs.write(§ion.sign, 1);
}
}
}
continue;
case '+':
{
std::string path = line.substr(1), node;
std::string::size_type pos, oldpos = 0;
entrytree_type::iterator it = entrytree.root();
typedef std::vector<std::string> path_type;
path_type paths;
do
{
pos = path.find(':', oldpos);
node = path.substr(oldpos, pos);
paths.push_back(node);
oldpos = pos + 1;
} while(pos != std::string::npos);
if (paths.size() != 0)
{
if (*it == NULL || (*it)->word != paths[0])
{
if (*it != NULL && (*it)->size == 0)
delete *it;
indexEntry* tmpentry = new indexEntry;
tmpentry->word = paths[0];
tmpentry->offset = 0;
tmpentry->size = 0;
*it = tmpentry;
}
for(path_type::size_type index = 1;
index < paths.size(); index++)
{
entrytree_type::iterator child;
for(child = it.begin(); child != it.end(); child++)
{
if ((*child)->word == paths[index])
break;
}
if (child == it.end())
{
indexEntry* tmpentry = new indexEntry;
tmpentry->word = paths[index];
tmpentry->offset = 0;
tmpentry->size = 0;
child = it.append(tmpentry);
}
it = child;
}
{
entrytree_type::iterator child;
for(child = it.begin(); child != it.end(); child++)
{
if ((*child)->word == entry->word)
break;
}
if (child == it.end())
{
it.append(entry);
} else
{
if ((*child)->size == 0)
delete *child;
*child = entry;
}
}
} else
{
*it = entry;
}
continue;
}
continue;
case '!':
{
std::string filename = line.substr(1);
std::ifstream ifs(filename.c_str(),
std::ios_base::in|std::ios_base::binary);
if (ifs.is_open())
{
std::istreambuf_iterator<char> begin(ifs), end;
std::copy(begin, end,
std::ostreambuf_iterator<char>(
tmpfs.is_open()?tmpfs:dictfs));
ifs.close();
} else
std::cerr << "can't read file: " << filename << std::endl;
}
continue;
case '^':
{
std::string filename = line.substr(1);
std::ifstream ifs(filename.c_str());
if (ifs.is_open())
{
action(ifs);
ifs.close();
} else
std::cerr << "can't read file: " << filename << std::endl;
}
continue;
case '#':
continue;
}
if (!vaildSection())
continue;
std::ostream& ofs = tmpfs.is_open()?tmpfs:dictfs;
if (line.size() > 0 &&
line[line.size() - 1] == '\\')
line.resize(line.size() - 1);
else
line.push_back('\n');
ofs.write(line.c_str(), line.size());
}
}
void writeTreeindex(entrytree_type::iterator it, std::ostream& os)
{
indexEntry* entry = *it;
char zero = '\0';
size_t size = htonl(it.size());
os.write(entry->word.c_str(), entry->word.size());
os.write(&zero , 1);
os.write((char*) &entry->offset, sizeof(entry->offset));
os.write((char*) &entry->size, sizeof(entry->size));
os.write((char*) &size, sizeof(size));
for (entrytree_type::iterator child = it.begin();
child != it.end(); child++)
writeTreeindex(child, os);
}
void help()
{
std::cout << "dictbuilder [-o output] dictfile" << std::endl;
}