* Copyright 2011 kubtek <kubtek@mail.com>
*
* This file is part of StarDict.
*
* StarDict is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* StarDict is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with StarDict. If not, see <http://www.gnu.org/licenses/>.
*/
#include <glib.h>
#include <string.h>
#include <glib/gi18n.h>
#include <glib/gstdio.h>
#ifdef _WIN32
# include <io.h>
#else
# include <unistd.h>
#endif
#include "storage.h"
#include "storage_impl.h"
#include "ifo_file.h"
#include "stddict.h"
#include "utils.h"
#include "dictziplib.h"
class FileBase
{
private:
explicit FileBase(const std::string& url_, bool temp_)
:
url(url_),
cnt(0),
temp(temp_)
{
}
virtual ~FileBase(void)
{
if(!url.empty() && temp)
if(g_remove(url.c_str()))
g_warning("Unable to remove temporary file: %s", url.c_str());
}
public:
static FileBase* create(const std::string& url, bool temp)
{
return new FileBase(url, temp);
}
const std::string& get_url(void) const { return url; }
void AddRef(void) { ++cnt; }
void Release(void)
{
--cnt;
if(cnt == 0)
delete this;
}
private:
std::string url;
size_t cnt;
bool temp;
};
FileHolder::FileHolder(void)
:
pFileBase(NULL)
{
}
FileHolder::FileHolder(const std::string &url, bool temp)
:
pFileBase(NULL)
{
if(!url.empty()) {
pFileBase = FileBase::create(url, temp);
if(pFileBase)
pFileBase->AddRef();
}
}
FileHolder::FileHolder(const FileHolder& right)
{
pFileBase = right.pFileBase;
if(pFileBase)
pFileBase->AddRef();
}
FileHolder::~FileHolder(void)
{
if(pFileBase)
pFileBase->Release();
}
const char* FileHolder::get_url(void) const
{
if(pFileBase)
return pFileBase->get_url().c_str();
else
return NULL;
}
FileHolder& FileHolder::operator=(const FileHolder& right)
{
if(this == &right)
return *this;
if(right.pFileBase)
right.pFileBase->AddRef();
if(pFileBase)
pFileBase->Release();
pFileBase = right.pFileBase;
return *this;
}
bool FileHolder::operator==(const FileHolder& right) const
{
if(pFileBase == right.pFileBase)
return true;
if(!pFileBase || !right.pFileBase)
return false;
return pFileBase->get_url() == right.pFileBase->get_url();
}
void FileHolder::clear(void)
{
if(pFileBase)
pFileBase->Release();
pFileBase = NULL;
}
*
* Parameters:
* pattern - file name pattern in file name encoding
* 1. blank, then default pattern is used
* 2. may be a string with "XXXXXX" substring indicating variable part
* 3. may be a simple string, then "XXXXXX" will be added to the front
*
* fh - file handle, close it with
#if defined(_WIN32)
* _close()
#else
* close()
#endif
* when not needed.
* -1 if file open failed. */
static FileHolder open_temp_file(const std::string &pattern, gint &fh)
{
std::string name_pattern(pattern);
if(!name_pattern.empty()) {
if(name_pattern.find("XXXXXX")==std::string::npos)
name_pattern.insert(0, "XXXXXX");
}
gchar * tmp_url = NULL;
fh = stardict_file_open_tmp(name_pattern.empty() ? NULL : name_pattern.c_str(),
&tmp_url, NULL);
FileHolder file(fh == -1 ? "" : tmp_url, true);
g_free(tmp_url);
return file;
}
struct ResCacheItem {
guint32 offset;
guint32 size;
gchar *data;
ResCacheItem(void) { data = NULL; }
~ResCacheItem(void) { g_free(data); }
};
class ResDict {
public:
ResDict(void);
~ResDict(void);
bool load(const std::string& base_url);
gchar *GetData(guint32 offset, guint32 size);
private:
static const size_t RES_DICT_CACHE_SIZE = 10;
FILE *dictfile;
std::unique_ptr<dictData> dictdzfile;
ResCacheItem cache[RES_DICT_CACHE_SIZE];
gint cache_cur;
};
rindex_file::rindex_file(void)
:
filecount(0)
{
}
rindex_file* rindex_file::Create(const std::string& filebasename,
const char* mainext, std::string& fullfilename)
{
rindex_file *rindex = NULL;
fullfilename = filebasename + "." + mainext + ".gz";
if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) {
rindex = new compressed_rindex;
} else {
fullfilename = filebasename + "." + mainext;
rindex = new offset_rindex;
}
return rindex;
}
offset_rindex::offset_rindex(void)
:
oft_file(CacheFileType_oft, COLLATE_FUNC_NONE),
idxfile(NULL),
npages(0)
{
}
offset_rindex::~offset_rindex(void)
{
if (idxfile)
fclose(idxfile);
}
bool offset_rindex::load(const std::string& url, gulong _filecount, gulong fsize,
bool CreateCacheFile)
{
filecount = _filecount;
npages=(filecount-1)/ENTR_PER_PAGE+2;
if (!oft_file.load_cache(url, url, npages*sizeof(guint32))) {
MapFile map_file;
if (!map_file.open(url.c_str(), fsize))
return false;
const gchar *idxdatabuffer=map_file.begin();
oft_file.allocate_wordoffset(npages);
const gchar *p1 = idxdatabuffer;
gulong index_size;
guint32 j=0;
for (guint32 i=0; i<filecount; i++) {
index_size=strlen(p1) +1 + 2*sizeof(guint32);
if (i % ENTR_PER_PAGE==0) {
oft_file.get_wordoffset(j)=p1-idxdatabuffer;
++j;
}
p1 += index_size;
}
oft_file.get_wordoffset(j)=p1-idxdatabuffer;
map_file.close();
if (CreateCacheFile) {
if (!oft_file.save_cache(url))
g_printerr("Cache update failed.\n");
}
}
if (!(idxfile = fopen(url.c_str(), "rb"))) {
return false;
}
first.assign(0, read_first_on_page_key(0));
last.assign(npages-2, read_first_on_page_key(npages-2));
middle.assign((npages-2)/2, read_first_on_page_key((npages-2)/2));
real_last.assign(filecount-1, get_key(filecount-1));
return true;
}
bool offset_rindex::lookup(const char *str, guint32 &entry_offset, guint32 &entry_size)
{
glong idx;
if(!lookup(str, idx))
return false;
get_data(idx, entry_offset, entry_size);
return true;
}
bool offset_rindex::lookup(const char *str, glong &idx)
{
bool bFound=false;
glong iTo=npages-2;
glong iFrom;
glong iThisIndex;
gint cmpint;
if (strcmp(str, first.keystr.c_str())<0) {
return false;
} else if (strcmp(str, real_last.keystr.c_str()) >0) {
return false;
} else {
iFrom=0;
while (iFrom<=iTo) {
iThisIndex=(iFrom+iTo)/2;
cmpint = strcmp(str, get_first_on_page_key(iThisIndex));
if (cmpint>0)
iFrom=iThisIndex+1;
else if (cmpint<0)
iTo=iThisIndex-1;
else {
bFound=true;
break;
}
}
if (!bFound) {
idx = iTo;
} else {
idx = iThisIndex;
}
}
if (!bFound) {
gulong netr=load_page(idx);
iFrom=1;
iTo=netr-1;
iThisIndex=0;
while (iFrom<=iTo) {
iThisIndex=(iFrom+iTo)/2;
cmpint = strcmp(str, page.entries[iThisIndex].keystr);
if (cmpint>0)
iFrom=iThisIndex+1;
else if (cmpint<0)
iTo=iThisIndex-1;
else {
bFound=true;
break;
}
}
if(!bFound)
return false;
idx *= ENTR_PER_PAGE;
idx += iThisIndex;
return true;
} else {
idx *= ENTR_PER_PAGE;
return true;
}
}
const gchar *offset_rindex::read_first_on_page_key(glong page_idx)
{
fseek(idxfile, oft_file.get_wordoffset(page_idx), SEEK_SET);
size_t size_to_read = DEFAULT_KEY_SIZE;
const char* end_of_line;
int inewdata = 0;
size_t size_read;
while(true) {
buffer.resize(inewdata+size_to_read+1);
size_read = fread(&buffer[inewdata], 1, size_to_read, idxfile);
if(size_read == 0)
return NULL;
buffer[inewdata+size_read]='\0';
end_of_line = strchr(&buffer[inewdata], '\0');
if(end_of_line != &buffer[inewdata+size_read])
return &buffer[0];
if(size_read != size_to_read)
return NULL;
inewdata += size_read;
size_to_read *= 2;
}
return NULL;
}
const gchar *offset_rindex::get_first_on_page_key(glong page_idx)
{
if (page_idx<middle.page_idx) {
if (page_idx==first.page_idx)
return first.keystr.c_str();
return read_first_on_page_key(page_idx);
} else if (page_idx>middle.page_idx) {
if (page_idx==last.page_idx)
return last.keystr.c_str();
return read_first_on_page_key(page_idx);
} else
return middle.keystr.c_str();
}
const gchar *offset_rindex::get_key(glong idx)
{
load_page(idx/ENTR_PER_PAGE);
glong idx_in_page=idx%ENTR_PER_PAGE;
return page.entries[idx_in_page].keystr;
}
void offset_rindex::get_data(glong idx, guint32 &entry_offset, guint32 &entry_size)
{
load_page(idx/ENTR_PER_PAGE);
glong idx_in_page=idx%ENTR_PER_PAGE;
entry_offset = page.entries[idx_in_page].off;
entry_size = page.entries[idx_in_page].size;
}
gulong offset_rindex::load_page(glong page_idx)
{
g_assert(gulong(page_idx+1)<npages);
gulong nentr=ENTR_PER_PAGE;
if (page_idx==glong(npages-2))
if ((nentr=filecount%ENTR_PER_PAGE)==0)
nentr=ENTR_PER_PAGE;
if (page_idx!=page.page_idx) {
page_data.resize(oft_file.get_wordoffset(page_idx+1)-oft_file.get_wordoffset(page_idx));
fseek(idxfile, oft_file.get_wordoffset(page_idx), SEEK_SET);
size_t fread_size;
size_t page_data_size = page_data.size();
fread_size = fread(&page_data[0], 1, page_data_size, idxfile);
if (fread_size != page_data_size) {
g_print("fread error!\n");
}
page.fill(&page_data[0], nentr, page_idx);
}
return nentr;
}
void offset_rindex::page_t::fill(gchar *data, gint nent, glong page_idx_)
{
page_idx=page_idx_;
gchar *p=data;
glong len;
for (gint i=0; i<nent; ++i) {
entries[i].keystr=p;
len=strlen(p);
p+=len+1;
entries[i].off=g_ntohl(get_uint32(p));
p+=sizeof(guint32);
entries[i].size=g_ntohl(get_uint32(p));
p+=sizeof(guint32);
}
}
compressed_rindex::compressed_rindex(void)
:
idxdatabuf(NULL)
{
}
compressed_rindex::~compressed_rindex(void)
{
g_free(idxdatabuf);
}
bool compressed_rindex::load(const std::string& url, gulong _filecount,
gulong fsize, bool CreateCacheFile)
{
filecount = _filecount;
gzFile in = gzopen(url.c_str(), "rb");
if (in == NULL)
return false;
idxdatabuf = (gchar *)g_malloc(fsize);
gulong len = gzread(in, idxdatabuf, fsize);
gzclose(in);
if (len < 0)
return false;
if (len != fsize)
return false;
filelist.resize(filecount+1);
gchar *p1 = idxdatabuf;
guint32 i;
for (i=0; i<filecount; i++) {
filelist[i] = p1;
p1 += strlen(p1) +1 + 2*sizeof(guint32);
}
filelist[filecount] = p1;
return true;
}
bool compressed_rindex::lookup(const char *str, guint32 &entry_offset, guint32 &entry_size)
{
glong idx;
if(!lookup(str, idx))
return false;
get_data(idx, entry_offset, entry_size);
return true;
}
bool compressed_rindex::lookup(const char *str, glong &idx)
{
bool bFound=false;
glong iTo=filelist.size()-2;
if (strcmp(str, get_key(0))<0) {
} else if (strcmp(str, get_key(iTo)) >0) {
} else {
glong iThisIndex=0;
glong iFrom=0;
gint cmpint;
while (iFrom<=iTo) {
iThisIndex=(iFrom+iTo)/2;
cmpint = strcmp(str, get_key(iThisIndex));
if (cmpint>0)
iFrom=iThisIndex+1;
else if (cmpint<0)
iTo=iThisIndex-1;
else {
bFound=true;
break;
}
}
if (bFound) {
idx = iThisIndex;
}
}
return bFound;
}
const gchar *compressed_rindex::get_key(glong idx)
{
return filelist[idx];
}
void compressed_rindex::get_data(glong idx, guint32 &entry_offset, guint32 &entry_size)
{
gchar *p1 = filelist[idx]+strlen(filelist[idx])+sizeof(gchar);
entry_offset = g_ntohl(get_uint32(p1));
p1 += sizeof(guint32);
entry_size = g_ntohl(get_uint32(p1));
}
ResDict::ResDict(void)
:
dictfile(NULL),
cache_cur(0)
{
}
ResDict::~ResDict(void)
{
if(dictfile)
fclose(dictfile);
}
bool ResDict::load(const std::string& base_url)
{
std::string url;
url = base_url + ".rdic.dz";
if (g_file_test(url.c_str(), G_FILE_TEST_EXISTS)) {
dictdzfile.reset(new dictData);
if (!dictdzfile->open(url, 0)) {
return false;
}
} else {
url = base_url + ".rdic";
dictfile = fopen(url.c_str(),"rb");
if (!dictfile) {
return false;
}
}
return true;
}
gchar *ResDict::GetData(guint32 offset, guint32 size)
{
for(size_t i=0; i<RES_DICT_CACHE_SIZE; ++i)
if(cache[i].data && cache[i].offset == offset && cache[i].size == size)
return cache[i].data;
gchar *data = (gchar*)g_malloc(size + sizeof(guint32));
memcpy(data, &size, sizeof(guint32));
if(dictfile) {
fseek(dictfile, offset, SEEK_SET);
size_t fread_size;
fread_size = fread(data+sizeof(guint32), size, 1, dictfile);
if (fread_size != 1) {
g_print("fread error!\n");
}
} else {
dictdzfile->read(data+sizeof(guint32), offset, size);
}
g_free(cache[cache_cur].data);
cache[cache_cur].data = data;
cache[cache_cur].offset = offset;
cache[cache_cur].size = size;
cache_cur = (cache_cur + 1) % RES_DICT_CACHE_SIZE;
return data;
}
ResourceStorage::ResourceStorage()
:
storage_type(StorageType_UNKNOWN),
file_storage(NULL),
database_storage(NULL)
{
}
ResourceStorage::~ResourceStorage()
{
delete file_storage;
delete database_storage;
}
* Otherwise return NULL. */
ResourceStorage* ResourceStorage::create(const std::string &dirname,
bool CreateCacheFile, show_progress_t *sp)
{
ResourceStorage *storage = NULL;
std::string fullfilename;
fullfilename = build_path(dirname, "res.rifo");
if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) {
storage = new ResourceStorage();
if(!storage->load_database(fullfilename, CreateCacheFile, sp)) {
delete storage;
storage = NULL;
}
} else {
fullfilename = build_path(dirname, "res");
if (g_file_test(fullfilename.c_str(), G_FILE_TEST_IS_DIR)) {
storage = new ResourceStorage();
if(!storage->load_filesdir(fullfilename, sp)) {
delete storage;
storage = NULL;
}
}
}
return storage;
}
bool ResourceStorage::load_filesdir(const std::string &resdir, show_progress_t *sp)
{
g_assert(storage_type == StorageType_UNKNOWN);
g_assert(file_storage == NULL && database_storage == NULL);
file_storage = new File_ResourceStorage(resdir);
storage_type = StorageType_FILE;
return true;
}
bool ResourceStorage::load_database(const std::string &rifofilename,
bool CreateCacheFile, show_progress_t *sp)
{
g_assert(storage_type == StorageType_UNKNOWN);
g_assert(file_storage == NULL && database_storage == NULL);
database_storage = new Database_ResourceStorage();
if(!database_storage->load(rifofilename, CreateCacheFile)) {
delete database_storage;
database_storage = NULL;
return false;
}
storage_type = StorageType_DATABASE;
return true;
}
FileHolder ResourceStorage::get_file_path(const std::string &key)
{
switch(storage_type) {
case StorageType_FILE:
return FileHolder(file_storage->get_file_path(key), false);
case StorageType_DATABASE:
return database_storage->get_file_path(key);
default:
g_assert_not_reached();
return FileHolder();
}
}
const char *ResourceStorage::get_file_content(const std::string &key)
{
switch(storage_type) {
case StorageType_FILE:
return file_storage->get_file_content(key);
case StorageType_DATABASE:
return database_storage->get_file_content(key);
default:
g_assert_not_reached();
return NULL;
}
}
File_ResourceStorage::File_ResourceStorage(const std::string &resdir_)
:
resdir(resdir_),
data(NULL)
{
}
File_ResourceStorage::~File_ResourceStorage(void)
{
g_free(data);
}
const std::string& File_ResourceStorage::get_file_path(const std::string &key)
{
filepath.clear();
if(key.empty())
return filepath;
std::string fs_key;
if(!utf8_to_file_name(dir_separator_db_to_fs(key), fs_key))
return filepath;
filepath = resdir;
filepath += G_DIR_SEPARATOR;
filepath += fs_key;
if(!g_file_test(filepath.c_str(), G_FILE_TEST_EXISTS))
filepath.clear();
return filepath;
}
const char *File_ResourceStorage::get_file_content(const std::string &key)
{
if(key.empty())
return NULL;
if(data) {
g_free(data);
data = NULL;
}
std::string fs_key;
if(!utf8_to_file_name(dir_separator_db_to_fs(key), fs_key))
return NULL;
std::string filename = resdir;
filename += G_DIR_SEPARATOR;
filename += fs_key;
gchar* contents = NULL;
gsize length = 0;
if(g_file_get_contents(filename.c_str(), &contents, &length, NULL)) {
data = (gchar *)g_malloc(length + sizeof(guint32));
guint32 t = length;
memcpy(data, &t, sizeof(guint32));
memcpy(data+sizeof(guint32), contents, length);
g_free(contents);
return static_cast<const char*>(data);
} else
return NULL;
}
Database_ResourceStorage::Database_ResourceStorage(void)
:
cur_cache_ind(0),
ridx_file(NULL),
dict(NULL)
{
}
Database_ResourceStorage::~Database_ResourceStorage(void)
{
delete ridx_file;
delete dict;
}
bool Database_ResourceStorage::load(const std::string& rifofilename,
bool CreateCacheFile)
{
gulong filecount, indexfilesize;
if(!load_rifofile(rifofilename, filecount, indexfilesize))
return false;
std::string filebasename
= rifofilename.substr(0, rifofilename.length()-sizeof(".rifo")+1);
delete dict;
dict = NULL;
dict = new ResDict;
if(!dict->load(filebasename))
return false;
std::string fullfilename;
delete ridx_file;
ridx_file = NULL;
ridx_file = rindex_file::Create(filebasename, "ridx", fullfilename);
if (!ridx_file->load(fullfilename, filecount, indexfilesize, CreateCacheFile))
return false;
return true;
}
FileHolder Database_ResourceStorage::get_file_path(const std::string& key)
{
int ind = find_in_cache(key);
if(ind >= 0)
return FileCache[ind].file;
guint32 entry_offset, entry_size;
if(!ridx_file->lookup(key.c_str(), entry_offset, entry_size))
return FileHolder();
gchar *data = dict->GetData(entry_offset, entry_size);
if(!data)
return FileHolder();
std::string name_pattern;
if(!utf8_to_file_name(key, name_pattern))
return FileHolder();
std::string::size_type pos = name_pattern.find_last_of("." DB_DIR_SEPARATOR_S);
if(pos != std::string::npos) {
if(name_pattern[pos] == '.')
name_pattern = name_pattern.substr(pos);
else
name_pattern.clear();
} else
name_pattern.clear();
gint fd;
FileHolder file(open_temp_file(name_pattern, fd));
if(file.empty())
return file;
ssize_t write_size;
#ifdef _WIN32
write_size = _write(fd, data+sizeof(guint32), entry_size);
if (write_size == -1) {
g_print("write error!\n");
}
_close(fd);
#else
write_size = write(fd, data+sizeof(guint32), entry_size);
if (write_size == -1) {
g_print("write error!\n");
}
close(fd);
#endif
ind = put_in_cache(key, file);
return FileCache[ind].file;
}
const char *Database_ResourceStorage::get_file_content(const std::string &key)
{
guint32 entry_offset, entry_size;
if(!ridx_file->lookup(key.c_str(), entry_offset, entry_size))
return NULL;
return dict->GetData(entry_offset, entry_size);
}
void Database_ResourceStorage::clear_cache(void)
{
for(size_t i=0; i<FILE_CACHE_SIZE; ++i) {
FileCache[i].key.clear();
FileCache[i].file.clear();
}
}
int Database_ResourceStorage::find_in_cache(const std::string& key) const
{
for(size_t i=0; i<FILE_CACHE_SIZE; ++i)
if(FileCache[i].key == key)
return i;
return -1;
}
size_t Database_ResourceStorage::put_in_cache(const std::string& key, const FileHolder& file)
{
size_t ind = cur_cache_ind;
FileCache[ind].key = key;
FileCache[ind].file = file;
cur_cache_ind = (ind + 1) % FILE_CACHE_SIZE;
return ind;
}
bool Database_ResourceStorage::load_rifofile(const std::string& rifofilename,
gulong& filecount, gulong& indexfilesize)
{
DictInfo dict_info;
if (!dict_info.load_from_ifo_file(rifofilename, DictInfoType_ResDb))
return false;
filecount = dict_info.get_filecount();
indexfilesize = dict_info.get_index_file_size();
return true;
}