mirror of
https://github.com/Dushistov/sdcv.git
synced 2025-12-15 17:31:56 +00:00
lib: c++11
This commit is contained in:
336
src/lib/lib.cpp
336
src/lib/lib.cpp
@@ -14,11 +14,16 @@
|
||||
#include "file.hpp"
|
||||
#include "mapfile.hpp"
|
||||
|
||||
#include "lib.h"
|
||||
#include "lib.hpp"
|
||||
|
||||
// Notice: read src/tools/DICTFILE_FORMAT for the dictionary
|
||||
// file's format information!
|
||||
|
||||
namespace {
|
||||
struct Fuzzystruct {
|
||||
char * pMatchWord;
|
||||
int iMatchWordDistance;
|
||||
};
|
||||
|
||||
static inline bool bIsVowel(gchar inputchar)
|
||||
{
|
||||
@@ -59,16 +64,27 @@ static inline gint stardict_strcmp(const gchar *s1, const gchar *s2)
|
||||
return a;
|
||||
}
|
||||
|
||||
static void unicode_strdown(gunichar *str)
|
||||
{
|
||||
while (*str) {
|
||||
*str=g_unichar_tolower(*str);
|
||||
++str;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool DictInfo::load_from_ifo_file(const std::string& ifofilename,
|
||||
bool istreedict)
|
||||
{
|
||||
ifo_file_name = ifofilename;
|
||||
gchar *buffer;
|
||||
if (!g_file_get_contents(ifofilename.c_str(), &buffer, NULL, NULL))
|
||||
if (!g_file_get_contents(ifofilename.c_str(), &buffer, nullptr, nullptr))
|
||||
return false;
|
||||
|
||||
#define TREEDICT_MAGIC_DATA "StarDict's treedict ifo file\nversion=2.4.2\n"
|
||||
#define DICT_MAGIC_DATA "StarDict's dict ifo file\nversion=2.4.2\n"
|
||||
|
||||
const gchar *magic_data = istreedict ? TREEDICT_MAGIC_DATA : DICT_MAGIC_DATA;
|
||||
if (!g_str_has_prefix(buffer, magic_data)) {
|
||||
g_free(buffer);
|
||||
@@ -174,18 +190,6 @@ bool DictInfo::load_from_ifo_file(const std::string& ifofilename,
|
||||
|
||||
return true;
|
||||
}
|
||||
//===================================================================
|
||||
DictBase::DictBase()
|
||||
{
|
||||
dictfile = NULL;
|
||||
cache_cur =0;
|
||||
}
|
||||
|
||||
DictBase::~DictBase()
|
||||
{
|
||||
if (dictfile)
|
||||
fclose(dictfile);
|
||||
}
|
||||
|
||||
gchar* DictBase::GetWordData(guint32 idxitem_offset, guint32 idxitem_size)
|
||||
{
|
||||
@@ -326,14 +330,6 @@ gchar* DictBase::GetWordData(guint32 idxitem_offset, guint32 idxitem_size)
|
||||
return data;
|
||||
}
|
||||
|
||||
inline bool DictBase::containSearchData()
|
||||
{
|
||||
if (sametypesequence.empty())
|
||||
return true;
|
||||
|
||||
return sametypesequence.find_first_of("mlgxty")!=std::string::npos;
|
||||
}
|
||||
|
||||
bool DictBase::SearchData(std::vector<std::string> &SearchWords, guint32 idxitem_offset, guint32 idxitem_size, gchar *origin_data)
|
||||
{
|
||||
int nWord = SearchWords.size();
|
||||
@@ -435,15 +431,21 @@ bool DictBase::SearchData(std::vector<std::string> &SearchWords, guint32 idxitem
|
||||
return false;
|
||||
}
|
||||
|
||||
class offset_index : public index_file {
|
||||
namespace {
|
||||
class OffsetIndex : public IIndexFile {
|
||||
public:
|
||||
offset_index() : idxfile(NULL) {}
|
||||
~offset_index();
|
||||
bool load(const std::string& url, gulong wc, gulong fsize);
|
||||
const gchar *get_key(glong idx);
|
||||
void get_data(glong idx);
|
||||
const gchar *get_key_and_data(glong idx);
|
||||
bool lookup(const char *str, glong &idx);
|
||||
OffsetIndex() : idxfile(nullptr) {}
|
||||
~OffsetIndex() {
|
||||
if (idxfile)
|
||||
fclose(idxfile);
|
||||
}
|
||||
bool load(const std::string& url, gulong wc, gulong fsize) override;
|
||||
const gchar *get_key(glong idx) override;
|
||||
void get_data(glong idx) override { get_key(idx); }
|
||||
const gchar *get_key_and_data(glong idx) override {
|
||||
return get_key(idx);
|
||||
}
|
||||
bool lookup(const char *str, glong &idx) override;
|
||||
private:
|
||||
static const gint ENTR_PER_PAGE = 32;
|
||||
static const char *CACHE_MAGIC;
|
||||
@@ -469,10 +471,10 @@ private:
|
||||
};
|
||||
std::vector<gchar> page_data;
|
||||
struct page_t {
|
||||
glong idx;
|
||||
glong idx = -1;
|
||||
page_entry entries[ENTR_PER_PAGE];
|
||||
|
||||
page_t(): idx(-1) {}
|
||||
page_t() {}
|
||||
void fill(gchar *data, gint nent, glong idx_);
|
||||
} page;
|
||||
gulong load_page(glong page_idx);
|
||||
@@ -483,23 +485,27 @@ private:
|
||||
static strlist_t get_cache_variant(const std::string& url);
|
||||
};
|
||||
|
||||
const char *offset_index::CACHE_MAGIC="StarDict's Cache, Version: 0.1";
|
||||
const char *OffsetIndex::CACHE_MAGIC = "StarDict's Cache, Version: 0.1";
|
||||
|
||||
class wordlist_index : public index_file {
|
||||
|
||||
class WordListIndex : public IIndexFile {
|
||||
public:
|
||||
wordlist_index() : idxdatabuf(NULL) {}
|
||||
~wordlist_index();
|
||||
bool load(const std::string& url, gulong wc, gulong fsize);
|
||||
const gchar *get_key(glong idx);
|
||||
void get_data(glong idx);
|
||||
const gchar *get_key_and_data(glong idx);
|
||||
bool lookup(const char *str, glong &idx);
|
||||
WordListIndex() : idxdatabuf(nullptr) {}
|
||||
~WordListIndex() { g_free(idxdatabuf); }
|
||||
bool load(const std::string& url, gulong wc, gulong fsize) override;
|
||||
const gchar *get_key(glong idx) override { return wordlist[idx]; }
|
||||
void get_data(glong idx) override;
|
||||
const gchar *get_key_and_data(glong idx) override {
|
||||
get_data(idx);
|
||||
return get_key(idx);
|
||||
}
|
||||
bool lookup(const char *str, glong &idx) override;
|
||||
private:
|
||||
gchar *idxdatabuf;
|
||||
std::vector<gchar *> wordlist;
|
||||
};
|
||||
|
||||
void offset_index::page_t::fill(gchar *data, gint nent, glong idx_)
|
||||
void OffsetIndex::page_t::fill(gchar *data, gint nent, glong idx_)
|
||||
{
|
||||
idx=idx_;
|
||||
gchar *p=data;
|
||||
@@ -515,13 +521,7 @@ void offset_index::page_t::fill(gchar *data, gint nent, glong idx_)
|
||||
}
|
||||
}
|
||||
|
||||
offset_index::~offset_index()
|
||||
{
|
||||
if (idxfile)
|
||||
fclose(idxfile);
|
||||
}
|
||||
|
||||
inline const gchar *offset_index::read_first_on_page_key(glong page_idx)
|
||||
inline const gchar *OffsetIndex::read_first_on_page_key(glong page_idx)
|
||||
{
|
||||
fseek(idxfile, wordoffset[page_idx], SEEK_SET);
|
||||
guint32 page_size=wordoffset[page_idx+1]-wordoffset[page_idx];
|
||||
@@ -532,7 +532,7 @@ inline const gchar *offset_index::read_first_on_page_key(glong page_idx)
|
||||
return wordentry_buf;
|
||||
}
|
||||
|
||||
inline const gchar *offset_index::get_first_on_page_key(glong page_idx)
|
||||
inline const gchar *OffsetIndex::get_first_on_page_key(glong page_idx)
|
||||
{
|
||||
if (page_idx<middle.idx) {
|
||||
if (page_idx==first.idx)
|
||||
@@ -546,19 +546,19 @@ inline const gchar *offset_index::get_first_on_page_key(glong page_idx)
|
||||
return middle.keystr.c_str();
|
||||
}
|
||||
|
||||
bool offset_index::load_cache(const std::string& url)
|
||||
bool OffsetIndex::load_cache(const std::string& url)
|
||||
{
|
||||
strlist_t vars=get_cache_variant(url);
|
||||
const strlist_t vars = get_cache_variant(url);
|
||||
|
||||
for (strlist_t::const_iterator it=vars.begin(); it!=vars.end(); ++it) {
|
||||
struct stat idxstat, cachestat;
|
||||
for (const std::string& item : vars) {
|
||||
struct ::stat idxstat, cachestat;
|
||||
if (g_stat(url.c_str(), &idxstat)!=0 ||
|
||||
g_stat(it->c_str(), &cachestat)!=0)
|
||||
g_stat(item.c_str(), &cachestat)!=0)
|
||||
continue;
|
||||
if (cachestat.st_mtime<idxstat.st_mtime)
|
||||
continue;
|
||||
MapFile mf;
|
||||
if (!mf.open(it->c_str(), cachestat.st_size))
|
||||
if (!mf.open(item.c_str(), cachestat.st_size))
|
||||
continue;
|
||||
if (strncmp(mf.begin(), CACHE_MAGIC, strlen(CACHE_MAGIC))!=0)
|
||||
continue;
|
||||
@@ -570,15 +570,14 @@ bool offset_index::load_cache(const std::string& url)
|
||||
return false;
|
||||
}
|
||||
|
||||
strlist_t offset_index::get_cache_variant(const std::string& url)
|
||||
strlist_t OffsetIndex::get_cache_variant(const std::string& url)
|
||||
{
|
||||
strlist_t res;
|
||||
res.push_back(url+".oft");
|
||||
strlist_t res = {url + ".oft"};
|
||||
if (!g_file_test(g_get_user_cache_dir(), G_FILE_TEST_EXISTS) &&
|
||||
g_mkdir(g_get_user_cache_dir(), 0700)==-1)
|
||||
return res;
|
||||
|
||||
std::string cache_dir=std::string(g_get_user_cache_dir())+G_DIR_SEPARATOR_S+"sdcv";
|
||||
const std::string cache_dir = std::string(g_get_user_cache_dir())+G_DIR_SEPARATOR_S+"sdcv";
|
||||
|
||||
if (!g_file_test(cache_dir.c_str(), G_FILE_TEST_EXISTS)) {
|
||||
if (g_mkdir(cache_dir.c_str(), 0700)==-1)
|
||||
@@ -592,11 +591,11 @@ strlist_t offset_index::get_cache_variant(const std::string& url)
|
||||
return res;
|
||||
}
|
||||
|
||||
bool offset_index::save_cache(const std::string& url)
|
||||
bool OffsetIndex::save_cache(const std::string& url)
|
||||
{
|
||||
strlist_t vars=get_cache_variant(url);
|
||||
for (strlist_t::const_iterator it=vars.begin(); it!=vars.end(); ++it) {
|
||||
FILE *out=fopen(it->c_str(), "wb");
|
||||
const strlist_t vars = get_cache_variant(url);
|
||||
for (const std::string& item : vars) {
|
||||
FILE *out=fopen(item.c_str(), "wb");
|
||||
if (!out)
|
||||
continue;
|
||||
if (fwrite(CACHE_MAGIC, 1, strlen(CACHE_MAGIC), out)!=strlen(CACHE_MAGIC))
|
||||
@@ -610,7 +609,7 @@ bool offset_index::save_cache(const std::string& url)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool offset_index::load(const std::string& url, gulong wc, gulong fsize)
|
||||
bool OffsetIndex::load(const std::string& url, gulong wc, gulong fsize)
|
||||
{
|
||||
wordcount=wc;
|
||||
gulong npages=(wc-1)/ENTR_PER_PAGE+2;
|
||||
@@ -650,7 +649,7 @@ bool offset_index::load(const std::string& url, gulong wc, gulong fsize)
|
||||
return true;
|
||||
}
|
||||
|
||||
inline gulong offset_index::load_page(glong page_idx)
|
||||
inline gulong OffsetIndex::load_page(glong page_idx)
|
||||
{
|
||||
gulong nentr=ENTR_PER_PAGE;
|
||||
if (page_idx==glong(wordoffset.size()-2))
|
||||
@@ -668,7 +667,7 @@ inline gulong offset_index::load_page(glong page_idx)
|
||||
return nentr;
|
||||
}
|
||||
|
||||
const gchar *offset_index::get_key(glong idx)
|
||||
const gchar *OffsetIndex::get_key(glong idx)
|
||||
{
|
||||
load_page(idx/ENTR_PER_PAGE);
|
||||
glong idx_in_page=idx%ENTR_PER_PAGE;
|
||||
@@ -678,17 +677,7 @@ const gchar *offset_index::get_key(glong idx)
|
||||
return page.entries[idx_in_page].keystr;
|
||||
}
|
||||
|
||||
void offset_index::get_data(glong idx)
|
||||
{
|
||||
get_key(idx);
|
||||
}
|
||||
|
||||
const gchar *offset_index::get_key_and_data(glong idx)
|
||||
{
|
||||
return get_key(idx);
|
||||
}
|
||||
|
||||
bool offset_index::lookup(const char *str, glong &idx)
|
||||
bool OffsetIndex::lookup(const char *str, glong &idx)
|
||||
{
|
||||
bool bFound=false;
|
||||
glong iFrom;
|
||||
@@ -749,15 +738,10 @@ bool offset_index::lookup(const char *str, glong &idx)
|
||||
return bFound;
|
||||
}
|
||||
|
||||
wordlist_index::~wordlist_index()
|
||||
{
|
||||
g_free(idxdatabuf);
|
||||
}
|
||||
|
||||
bool wordlist_index::load(const std::string& url, gulong wc, gulong fsize)
|
||||
bool WordListIndex::load(const std::string& url, gulong wc, gulong fsize)
|
||||
{
|
||||
gzFile in = gzopen(url.c_str(), "rb");
|
||||
if (in == NULL)
|
||||
if (in == nullptr)
|
||||
return false;
|
||||
|
||||
idxdatabuf = (gchar *)g_malloc(fsize);
|
||||
@@ -782,12 +766,7 @@ bool wordlist_index::load(const std::string& url, gulong wc, gulong fsize)
|
||||
return true;
|
||||
}
|
||||
|
||||
const gchar *wordlist_index::get_key(glong idx)
|
||||
{
|
||||
return wordlist[idx];
|
||||
}
|
||||
|
||||
void wordlist_index::get_data(glong idx)
|
||||
void WordListIndex::get_data(glong idx)
|
||||
{
|
||||
gchar *p1 = wordlist[idx]+strlen(wordlist[idx])+sizeof(gchar);
|
||||
wordentry_offset = g_ntohl(get_uint32(p1));
|
||||
@@ -795,13 +774,7 @@ void wordlist_index::get_data(glong idx)
|
||||
wordentry_size = g_ntohl(get_uint32(p1));
|
||||
}
|
||||
|
||||
const gchar *wordlist_index::get_key_and_data(glong idx)
|
||||
{
|
||||
get_data(idx);
|
||||
return get_key(idx);
|
||||
}
|
||||
|
||||
bool wordlist_index::lookup(const char *str, glong &idx)
|
||||
bool WordListIndex::lookup(const char *str, glong &idx)
|
||||
{
|
||||
bool bFound=false;
|
||||
glong iTo=wordlist.size()-2;
|
||||
@@ -833,6 +806,7 @@ bool wordlist_index::lookup(const char *str, glong &idx)
|
||||
}
|
||||
return bFound;
|
||||
}
|
||||
}
|
||||
|
||||
//===================================================================
|
||||
bool Dict::load(const std::string& ifofilename)
|
||||
@@ -863,10 +837,10 @@ bool Dict::load(const std::string& ifofilename)
|
||||
fullfilename.replace(fullfilename.length()-sizeof("ifo")+1, sizeof("ifo")-1, "idx.gz");
|
||||
|
||||
if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) {
|
||||
idx_file.reset(new wordlist_index);
|
||||
idx_file.reset(new WordListIndex);
|
||||
} else {
|
||||
fullfilename.erase(fullfilename.length()-sizeof(".gz")+1, sizeof(".gz")-1);
|
||||
idx_file.reset(new offset_index);
|
||||
idx_file.reset(new OffsetIndex);
|
||||
}
|
||||
|
||||
if (!idx_file->load(fullfilename, wordcount, idxfilesize))
|
||||
@@ -884,8 +858,6 @@ bool Dict::load_ifofile(const std::string& ifofilename, gulong &idxfilesize)
|
||||
if (dict_info.wordcount==0)
|
||||
return false;
|
||||
|
||||
|
||||
|
||||
ifo_file_name=dict_info.ifo_file_name;
|
||||
wordcount=dict_info.wordcount;
|
||||
bookname=dict_info.bookname;
|
||||
@@ -901,26 +873,19 @@ bool Dict::LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen)
|
||||
{
|
||||
int iIndexCount = 0;
|
||||
|
||||
for(guint32 i=0; i<narticles() && iIndexCount<iBuffLen-1; i++)
|
||||
for (guint32 i=0; i < narticles() && iIndexCount < (iBuffLen - 1); i++)
|
||||
if (g_pattern_match_string(pspec, get_key(i)))
|
||||
aIndex[iIndexCount++] = i;
|
||||
|
||||
aIndex[iIndexCount] = -1; // -1 is the end.
|
||||
|
||||
return (iIndexCount>0);
|
||||
}
|
||||
|
||||
//===================================================================
|
||||
Libs::Libs(progress_func_t f)
|
||||
{
|
||||
progress_func=f;
|
||||
iMaxFuzzyDistance = MAX_FUZZY_DISTANCE; //need to read from cfg.
|
||||
return iIndexCount > 0;
|
||||
}
|
||||
|
||||
Libs::~Libs()
|
||||
{
|
||||
for (std::vector<Dict *>::iterator p=oLib.begin(); p!=oLib.end(); ++p)
|
||||
delete *p;
|
||||
for (Dict *p : oLib)
|
||||
delete p;
|
||||
}
|
||||
|
||||
void Libs::load_dict(const std::string& url)
|
||||
@@ -932,81 +897,54 @@ void Libs::load_dict(const std::string& url)
|
||||
delete lib;
|
||||
}
|
||||
|
||||
class DictLoader {
|
||||
public:
|
||||
DictLoader(Libs& lib_): lib(lib_) {}
|
||||
void operator()(const std::string& url, bool disable) {
|
||||
if (!disable)
|
||||
lib.load_dict(url);
|
||||
}
|
||||
private:
|
||||
Libs& lib;
|
||||
};
|
||||
|
||||
void Libs::load(const strlist_t& dicts_dirs,
|
||||
const strlist_t& order_list,
|
||||
const strlist_t& disable_list)
|
||||
{
|
||||
for_each_file(dicts_dirs, ".ifo", order_list, disable_list,
|
||||
DictLoader(*this));
|
||||
[this](const std::string& url, bool disable) -> void {
|
||||
if (!disable)
|
||||
load_dict(url);
|
||||
});
|
||||
}
|
||||
|
||||
class DictReLoader {
|
||||
public:
|
||||
DictReLoader(std::vector<Dict *> &p, std::vector<Dict *> &f,
|
||||
Libs& lib_) : prev(p), future(f), lib(lib_)
|
||||
{
|
||||
}
|
||||
void operator()(const std::string& url, bool disable) {
|
||||
if (!disable) {
|
||||
Dict *dict=find(url);
|
||||
if (dict)
|
||||
future.push_back(dict);
|
||||
else
|
||||
lib.load_dict(url);
|
||||
}
|
||||
}
|
||||
private:
|
||||
std::vector<Dict *> &prev;
|
||||
std::vector<Dict *> &future;
|
||||
Libs& lib;
|
||||
|
||||
Dict *find(const std::string& url) {
|
||||
std::vector<Dict *>::iterator it;
|
||||
for (it=prev.begin(); it!=prev.end(); ++it)
|
||||
if ((*it)->ifofilename()==url)
|
||||
break;
|
||||
if (it!=prev.end()) {
|
||||
Dict *res=*it;
|
||||
prev.erase(it);
|
||||
return res;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
};
|
||||
|
||||
void Libs::reload(const strlist_t& dicts_dirs,
|
||||
const strlist_t& order_list,
|
||||
const strlist_t& disable_list)
|
||||
{
|
||||
std::vector<Dict *> prev(oLib);
|
||||
oLib.clear();
|
||||
|
||||
for_each_file(dicts_dirs, ".ifo", order_list, disable_list,
|
||||
DictReLoader(prev, oLib, *this));
|
||||
for (std::vector<Dict *>::iterator it=prev.begin(); it!=prev.end(); ++it)
|
||||
delete *it;
|
||||
[&prev, this](const std::string& url, bool disable) -> void {
|
||||
if (!disable) {
|
||||
auto it = prev.begin();
|
||||
for (; it != prev.end(); ++it)
|
||||
if ((*it)->ifofilename() == url)
|
||||
break;
|
||||
if (it != prev.end()) {
|
||||
Dict *res = *it;
|
||||
prev.erase(it);
|
||||
oLib.push_back(res);
|
||||
} else
|
||||
load_dict(url);
|
||||
}
|
||||
});
|
||||
|
||||
for (Dict *p : prev)
|
||||
delete p;
|
||||
}
|
||||
|
||||
const gchar *Libs::poGetCurrentWord(glong * iCurrent)
|
||||
{
|
||||
const gchar *poCurrentWord = NULL;
|
||||
const gchar *poCurrentWord = nullptr;
|
||||
const gchar *word;
|
||||
for (std::vector<Dict *>::size_type iLib=0; iLib<oLib.size(); iLib++) {
|
||||
if (iCurrent[iLib]==INVALID_INDEX)
|
||||
continue;
|
||||
if ( iCurrent[iLib]>=narticles(iLib) || iCurrent[iLib]<0)
|
||||
continue;
|
||||
if ( poCurrentWord == NULL ) {
|
||||
if ( poCurrentWord == nullptr ) {
|
||||
poCurrentWord = poGetWord(iCurrent[iLib],iLib);
|
||||
} else {
|
||||
word = poGetWord(iCurrent[iLib],iLib);
|
||||
@@ -1018,24 +956,23 @@ const gchar *Libs::poGetCurrentWord(glong * iCurrent)
|
||||
return poCurrentWord;
|
||||
}
|
||||
|
||||
const gchar *
|
||||
Libs::poGetNextWord(const gchar *sWord, glong *iCurrent)
|
||||
const gchar *Libs::poGetNextWord(const gchar *sWord, glong *iCurrent)
|
||||
{
|
||||
// the input can be:
|
||||
// (word,iCurrent),read word,write iNext to iCurrent,and return next word. used by TopWin::NextCallback();
|
||||
// (NULL,iCurrent),read iCurrent,write iNext to iCurrent,and return next word. used by AppCore::ListWords();
|
||||
const gchar *poCurrentWord = NULL;
|
||||
std::vector<Dict *>::size_type iCurrentLib=0;
|
||||
// (nullptr,iCurrent),read iCurrent,write iNext to iCurrent,and return next word. used by AppCore::ListWords();
|
||||
const gchar *poCurrentWord = nullptr;
|
||||
size_t iCurrentLib = 0;
|
||||
const gchar *word;
|
||||
|
||||
for (std::vector<Dict *>::size_type iLib=0;iLib<oLib.size();iLib++) {
|
||||
for (size_t iLib = 0; iLib < oLib.size(); ++iLib) {
|
||||
if (sWord)
|
||||
oLib[iLib]->Lookup(sWord, iCurrent[iLib]);
|
||||
if (iCurrent[iLib]==INVALID_INDEX)
|
||||
continue;
|
||||
if (iCurrent[iLib]>=narticles(iLib) || iCurrent[iLib]<0)
|
||||
continue;
|
||||
if (poCurrentWord == NULL ) {
|
||||
if (poCurrentWord == nullptr ) {
|
||||
poCurrentWord = poGetWord(iCurrent[iLib],iLib);
|
||||
iCurrentLib = iLib;
|
||||
} else {
|
||||
@@ -1069,7 +1006,7 @@ const gchar *
|
||||
Libs::poGetPreWord(glong * iCurrent)
|
||||
{
|
||||
// used by TopWin::PreviousCallback(); the iCurrent is cached by AppCore::TopWinWordChange();
|
||||
const gchar *poCurrentWord = NULL;
|
||||
const gchar *poCurrentWord = nullptr;
|
||||
std::vector<Dict *>::size_type iCurrentLib=0;
|
||||
const gchar *word;
|
||||
|
||||
@@ -1080,7 +1017,7 @@ Libs::poGetPreWord(glong * iCurrent)
|
||||
if ( iCurrent[iLib]>narticles(iLib) || iCurrent[iLib]<=0)
|
||||
continue;
|
||||
}
|
||||
if ( poCurrentWord == NULL ) {
|
||||
if ( poCurrentWord == nullptr ) {
|
||||
poCurrentWord = poGetWord(iCurrent[iLib]-1,iLib);
|
||||
iCurrentLib = iLib;
|
||||
} else {
|
||||
@@ -1449,29 +1386,6 @@ bool Libs::SimpleLookupWord(const gchar* sWord, glong & iWordIndex, int iLib)
|
||||
return bFound;
|
||||
}
|
||||
|
||||
struct Fuzzystruct {
|
||||
char * pMatchWord;
|
||||
int iMatchWordDistance;
|
||||
};
|
||||
|
||||
inline bool operator<(const Fuzzystruct & lh, const Fuzzystruct & rh) {
|
||||
if (lh.iMatchWordDistance!=rh.iMatchWordDistance)
|
||||
return lh.iMatchWordDistance<rh.iMatchWordDistance;
|
||||
|
||||
if (lh.pMatchWord && rh.pMatchWord)
|
||||
return stardict_strcmp(lh.pMatchWord, rh.pMatchWord)<0;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void unicode_strdown(gunichar *str)
|
||||
{
|
||||
while (*str) {
|
||||
*str=g_unichar_tolower(*str);
|
||||
++str;
|
||||
}
|
||||
}
|
||||
|
||||
bool Libs::LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_size)
|
||||
{
|
||||
if (sWord[0] == '\0')
|
||||
@@ -1480,7 +1394,7 @@ bool Libs::LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_si
|
||||
Fuzzystruct oFuzzystruct[reslist_size];
|
||||
|
||||
for (int i = 0; i < reslist_size; i++) {
|
||||
oFuzzystruct[i].pMatchWord = NULL;
|
||||
oFuzzystruct[i].pMatchWord = nullptr;
|
||||
oFuzzystruct[i].iMatchWordDistance = iMaxFuzzyDistance;
|
||||
}
|
||||
int iMaxDistance = iMaxFuzzyDistance;
|
||||
@@ -1496,13 +1410,13 @@ bool Libs::LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_si
|
||||
ucs4_str2 = g_utf8_to_ucs4_fast(sWord, -1, &ucs4_str2_len);
|
||||
unicode_strdown(ucs4_str2);
|
||||
|
||||
for (std::vector<Dict *>::size_type iLib=0; iLib<oLib.size(); iLib++) {
|
||||
for (size_t iLib = 0; iLib < oLib.size(); ++iLib) {
|
||||
if (progress_func)
|
||||
progress_func();
|
||||
|
||||
//if (stardict_strcmp(sWord, poGetWord(0,iLib))>=0 && stardict_strcmp(sWord, poGetWord(narticles(iLib)-1,iLib))<=0) {
|
||||
//there are Chinese dicts and English dicts...
|
||||
if (TRUE) {
|
||||
|
||||
const int iwords = narticles(iLib);
|
||||
for (int index=0; index<iwords; index++) {
|
||||
sCheck = poGetWord(index,iLib);
|
||||
@@ -1511,7 +1425,7 @@ bool Libs::LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_si
|
||||
if (iCheckWordLen-ucs4_str2_len>=iMaxDistance ||
|
||||
ucs4_str2_len-iCheckWordLen>=iMaxDistance)
|
||||
continue;
|
||||
ucs4_str1 = g_utf8_to_ucs4_fast(sCheck, -1, NULL);
|
||||
ucs4_str1 = g_utf8_to_ucs4_fast(sCheck, -1, nullptr);
|
||||
if (iCheckWordLen > ucs4_str2_len)
|
||||
ucs4_str1[ucs4_str2_len]=0;
|
||||
unicode_strdown(ucs4_str1);
|
||||
@@ -1548,12 +1462,20 @@ bool Libs::LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_si
|
||||
} // add to list
|
||||
} // find one
|
||||
} // each word
|
||||
} // ok for search
|
||||
|
||||
} // each lib
|
||||
g_free(ucs4_str2);
|
||||
|
||||
if (Found)// sort with distance
|
||||
std::sort(oFuzzystruct, oFuzzystruct+reslist_size);
|
||||
std::sort(oFuzzystruct, oFuzzystruct + reslist_size, [](const Fuzzystruct& lh, const Fuzzystruct& rh) -> bool {
|
||||
if (lh.iMatchWordDistance!=rh.iMatchWordDistance)
|
||||
return lh.iMatchWordDistance<rh.iMatchWordDistance;
|
||||
|
||||
if (lh.pMatchWord && rh.pMatchWord)
|
||||
return stardict_strcmp(lh.pMatchWord, rh.pMatchWord)<0;
|
||||
|
||||
return false;
|
||||
});
|
||||
|
||||
for (gint i = 0; i < reslist_size; ++i)
|
||||
reslist[i] = oFuzzystruct[i].pMatchWord;
|
||||
@@ -1561,10 +1483,6 @@ bool Libs::LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_si
|
||||
return Found;
|
||||
}
|
||||
|
||||
inline bool less_for_compare(const char *lh, const char *rh) {
|
||||
return stardict_strcmp(lh, rh)<0;
|
||||
}
|
||||
|
||||
gint Libs::LookupWithRule(const gchar *word, gchar **ppMatchWord)
|
||||
{
|
||||
glong aiIndex[MAX_MATCH_ITEM_PER_LIB+1];
|
||||
@@ -1595,7 +1513,9 @@ gint Libs::LookupWithRule(const gchar *word, gchar **ppMatchWord)
|
||||
g_pattern_spec_free(pspec);
|
||||
|
||||
if (iMatchCount)// sort it.
|
||||
std::sort(ppMatchWord, ppMatchWord+iMatchCount, less_for_compare);
|
||||
std::sort(ppMatchWord, ppMatchWord+iMatchCount, [](const char *lh, const char *rh) -> bool {
|
||||
return stardict_strcmp(lh, rh)<0;
|
||||
});
|
||||
|
||||
return iMatchCount;
|
||||
}
|
||||
@@ -1642,7 +1562,7 @@ bool Libs::LookupData(const gchar *sWord, std::vector<gchar *> *reslist)
|
||||
return false;
|
||||
|
||||
guint32 max_size =0;
|
||||
gchar *origin_data = NULL;
|
||||
gchar *origin_data = nullptr;
|
||||
for (std::vector<Dict *>::size_type i=0; i<oLib.size(); ++i) {
|
||||
if (!oLib[i]->containSearchData())
|
||||
continue;
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
#ifndef __SD_LIB_H__
|
||||
#define __SD_LIB_H__
|
||||
#pragma once
|
||||
|
||||
#include <cstdio>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
|
||||
#include "dictziplib.hpp"
|
||||
|
||||
@@ -16,7 +16,7 @@ struct cacheItem {
|
||||
guint32 offset;
|
||||
gchar *data;
|
||||
//write code here to make it inline
|
||||
cacheItem() {data= NULL;}
|
||||
cacheItem() { data = nullptr;}
|
||||
~cacheItem() { g_free(data); }
|
||||
};
|
||||
|
||||
@@ -25,18 +25,27 @@ const int INVALID_INDEX=-100;
|
||||
|
||||
class DictBase {
|
||||
public:
|
||||
DictBase();
|
||||
~DictBase();
|
||||
DictBase() {}
|
||||
~DictBase() {
|
||||
if (dictfile)
|
||||
fclose(dictfile);
|
||||
}
|
||||
DictBase(const DictBase&) = delete;
|
||||
DictBase& operator=(const DictBase&) = delete;
|
||||
gchar * GetWordData(guint32 idxitem_offset, guint32 idxitem_size);
|
||||
bool containSearchData();
|
||||
bool containSearchData() const {
|
||||
if (sametypesequence.empty())
|
||||
return true;
|
||||
return sametypesequence.find_first_of("mlgxty") != std::string::npos;
|
||||
}
|
||||
bool SearchData(std::vector<std::string> &SearchWords, guint32 idxitem_offset, guint32 idxitem_size, gchar *origin_data);
|
||||
protected:
|
||||
std::string sametypesequence;
|
||||
FILE *dictfile;
|
||||
std::auto_ptr<dictData> dictdzfile;
|
||||
FILE *dictfile = nullptr;
|
||||
std::unique_ptr<dictData> dictdzfile;
|
||||
private:
|
||||
cacheItem cache[WORDDATA_CACHE_NUM];
|
||||
gint cache_cur;
|
||||
gint cache_cur = 0;
|
||||
};
|
||||
|
||||
//this structure contain all information about dictionary
|
||||
@@ -51,15 +60,16 @@ struct DictInfo {
|
||||
std::string description;
|
||||
guint32 index_file_size;
|
||||
std::string sametypesequence;
|
||||
|
||||
bool load_from_ifo_file(const std::string& ifofilename, bool istreedict);
|
||||
};
|
||||
|
||||
class index_file {
|
||||
class IIndexFile {
|
||||
public:
|
||||
guint32 wordentry_offset;
|
||||
guint32 wordentry_size;
|
||||
|
||||
virtual ~index_file() {}
|
||||
virtual ~IIndexFile() {}
|
||||
virtual bool load(const std::string& url, gulong wc, gulong fsize) = 0;
|
||||
virtual const gchar *get_key(glong idx) = 0;
|
||||
virtual void get_data(glong idx) = 0;
|
||||
@@ -68,30 +78,22 @@ public:
|
||||
};
|
||||
|
||||
class Dict : public DictBase {
|
||||
private:
|
||||
std::string ifo_file_name;
|
||||
gulong wordcount;
|
||||
std::string bookname;
|
||||
|
||||
std::auto_ptr<index_file> idx_file;
|
||||
|
||||
bool load_ifofile(const std::string& ifofilename, gulong &idxfilesize);
|
||||
public:
|
||||
Dict() {}
|
||||
Dict(const Dict&) = delete;
|
||||
Dict& operator=(const Dict&) = delete;
|
||||
bool load(const std::string& ifofilename);
|
||||
|
||||
gulong narticles() { return wordcount; }
|
||||
const std::string& dict_name() { return bookname; }
|
||||
const std::string& ifofilename() { return ifo_file_name; }
|
||||
gulong narticles() const { return wordcount; }
|
||||
const std::string& dict_name() const { return bookname; }
|
||||
const std::string& ifofilename() const { return ifo_file_name; }
|
||||
|
||||
const gchar *get_key(glong index) { return idx_file->get_key(index); }
|
||||
gchar *get_data(glong index)
|
||||
{
|
||||
gchar *get_data(glong index) {
|
||||
idx_file->get_data(index);
|
||||
return DictBase::GetWordData(idx_file->wordentry_offset, idx_file->wordentry_size);
|
||||
}
|
||||
void get_key_and_data(glong index, const gchar **key, guint32 *offset, guint32 *size)
|
||||
{
|
||||
void get_key_and_data(glong index, const gchar **key, guint32 *offset, guint32 *size) {
|
||||
*key = idx_file->get_key_and_data(index);
|
||||
*offset = idx_file->wordentry_offset;
|
||||
*size = idx_file->wordentry_size;
|
||||
@@ -99,16 +101,28 @@ public:
|
||||
bool Lookup(const char *str, glong &idx) { return idx_file->lookup(str, idx); }
|
||||
|
||||
bool LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen);
|
||||
private:
|
||||
std::string ifo_file_name;
|
||||
gulong wordcount;
|
||||
std::string bookname;
|
||||
|
||||
std::unique_ptr<IIndexFile> idx_file;
|
||||
|
||||
bool load_ifofile(const std::string& ifofilename, gulong &idxfilesize);
|
||||
};
|
||||
|
||||
typedef std::list<std::string> strlist_t;
|
||||
|
||||
class Libs {
|
||||
public:
|
||||
typedef void (*progress_func_t)(void);
|
||||
|
||||
Libs(progress_func_t f=NULL);
|
||||
Libs(std::function<void(void)> f = std::function<void(void)>()) {
|
||||
progress_func = f;
|
||||
iMaxFuzzyDistance = MAX_FUZZY_DISTANCE; //need to read from cfg.
|
||||
}
|
||||
~Libs();
|
||||
Libs(const Libs&) = delete;
|
||||
Libs& operator=(const Libs&) = delete;
|
||||
|
||||
void load_dict(const std::string& url);
|
||||
void load(const strlist_t& dicts_dirs,
|
||||
const strlist_t& order_list,
|
||||
@@ -117,8 +131,8 @@ public:
|
||||
const strlist_t& order_list,
|
||||
const strlist_t& disable_list);
|
||||
|
||||
glong narticles(int idict) { return oLib[idict]->narticles(); }
|
||||
const std::string& dict_name(int idict) { return oLib[idict]->dict_name(); }
|
||||
glong narticles(int idict) const { return oLib[idict]->narticles(); }
|
||||
const std::string& dict_name(int idict) const { return oLib[idict]->dict_name(); }
|
||||
gint ndicts() const { return oLib.size(); }
|
||||
|
||||
const gchar *poGetWord(glong iIndex, int iLib) {
|
||||
@@ -126,7 +140,7 @@ public:
|
||||
}
|
||||
gchar * poGetWordData(glong iIndex,int iLib) {
|
||||
if (iIndex == INVALID_INDEX)
|
||||
return NULL;
|
||||
return nullptr;
|
||||
return oLib[iLib]->get_data(iIndex);
|
||||
}
|
||||
const gchar *poGetCurrentWord(glong *iCurrent);
|
||||
@@ -145,14 +159,13 @@ public:
|
||||
private:
|
||||
std::vector<Dict *> oLib; // word Libs.
|
||||
int iMaxFuzzyDistance;
|
||||
progress_func_t progress_func;
|
||||
std::function<void(void)> progress_func;
|
||||
};
|
||||
|
||||
|
||||
typedef enum {
|
||||
enum query_t {
|
||||
qtSIMPLE, qtREGEXP, qtFUZZY, qtDATA
|
||||
} query_t;
|
||||
};
|
||||
|
||||
extern query_t analyze_query(const char *s, std::string& res);
|
||||
|
||||
#endif//!__SD_LIB_H__
|
||||
Reference in New Issue
Block a user