mirror of
https://github.com/Dushistov/sdcv.git
synced 2025-12-15 17:31:56 +00:00
Use binary search for synonyms, fixes #31
This commit is contained in:
@@ -833,21 +833,37 @@ bool SynFile::load(const std::string &url, gulong wc)
|
|||||||
{
|
{
|
||||||
struct stat stat_buf;
|
struct stat stat_buf;
|
||||||
if (!stat(url.c_str(), &stat_buf)) {
|
if (!stat(url.c_str(), &stat_buf)) {
|
||||||
MapFile syn;
|
|
||||||
if (!syn.open(url.c_str(), stat_buf.st_size))
|
FILE *in = fopen(url.c_str(), "rb");
|
||||||
|
if (!in)
|
||||||
return false;
|
return false;
|
||||||
const gchar *current = syn.begin();
|
|
||||||
|
fseek(in, 0, SEEK_END);
|
||||||
|
gulong fsize = ftell(in);
|
||||||
|
fseek(in, 0, SEEK_SET);
|
||||||
|
syndatabuf = (gchar *)g_malloc(fsize);
|
||||||
|
|
||||||
|
const int len = fread(syndatabuf, 1, fsize, in);
|
||||||
|
fclose(in);
|
||||||
|
if (len < 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (gulong(len) != fsize)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
synlist.resize(wc + 1);
|
||||||
|
gchar *p1 = syndatabuf;
|
||||||
|
|
||||||
for (unsigned long i = 0; i < wc; i++) {
|
for (unsigned long i = 0; i < wc; i++) {
|
||||||
// each entry in a syn-file is:
|
// each entry in a syn-file is:
|
||||||
// - 0-terminated string
|
// - 0-terminated string
|
||||||
// 4-byte index into .dict file in network byte order
|
// 4-byte index into .dict file in network byte order
|
||||||
glib::CharStr lower_string{ g_utf8_casefold(current, -1) };
|
|
||||||
std::string synonym{ get_impl(lower_string) };
|
synlist[i] = p1;
|
||||||
current += synonym.length() + 1;
|
p1 += strlen(p1) + 1 + 4;
|
||||||
const guint32 idx = g_ntohl(get_uint32(current));
|
|
||||||
current += sizeof(idx);
|
|
||||||
synonyms[synonym] = idx;
|
|
||||||
}
|
}
|
||||||
|
synlist[wc] = p1;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
return false;
|
return false;
|
||||||
@@ -856,13 +872,35 @@ bool SynFile::load(const std::string &url, gulong wc)
|
|||||||
|
|
||||||
bool SynFile::lookup(const char *str, glong &idx)
|
bool SynFile::lookup(const char *str, glong &idx)
|
||||||
{
|
{
|
||||||
glib::CharStr lower_string{ g_utf8_casefold(str, -1) };
|
bool bFound = false;
|
||||||
auto it = synonyms.find(get_impl(lower_string));
|
glong iTo = synlist.size() - 2;
|
||||||
if (it != synonyms.end()) {
|
|
||||||
idx = it->second;
|
if (stardict_strcmp(str, get_key(0)) < 0) {
|
||||||
return true;
|
idx = 0;
|
||||||
|
} else if (stardict_strcmp(str, get_key(iTo)) > 0) {
|
||||||
|
idx = INVALID_INDEX;
|
||||||
|
} else {
|
||||||
|
glong iThisIndex = 0;
|
||||||
|
glong iFrom = 0;
|
||||||
|
gint cmpint;
|
||||||
|
while (iFrom <= iTo) {
|
||||||
|
iThisIndex = (iFrom + iTo) / 2;
|
||||||
|
cmpint = stardict_strcmp(str, get_key(iThisIndex));
|
||||||
|
if (cmpint > 0)
|
||||||
|
iFrom = iThisIndex + 1;
|
||||||
|
else if (cmpint < 0)
|
||||||
|
iTo = iThisIndex - 1;
|
||||||
|
else {
|
||||||
|
bFound = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!bFound)
|
||||||
|
idx = iFrom; //next
|
||||||
|
else
|
||||||
|
idx = iThisIndex;
|
||||||
}
|
}
|
||||||
return false;
|
return bFound;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Dict::Lookup(const char *str, glong &idx)
|
bool Dict::Lookup(const char *str, glong &idx)
|
||||||
|
|||||||
@@ -102,11 +102,18 @@ public:
|
|||||||
class SynFile
|
class SynFile
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
SynFile()
|
||||||
|
: syndatabuf(nullptr)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
~SynFile() { g_free(syndatabuf); }
|
||||||
bool load(const std::string &url, gulong wc);
|
bool load(const std::string &url, gulong wc);
|
||||||
bool lookup(const char *str, glong &idx);
|
bool lookup(const char *str, glong &idx);
|
||||||
|
const gchar *get_key(glong idx) { return synlist[idx]; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::map<std::string, gulong> synonyms;
|
gchar *syndatabuf;
|
||||||
|
std::vector<gchar *> synlist;
|
||||||
};
|
};
|
||||||
|
|
||||||
class Dict : public DictBase
|
class Dict : public DictBase
|
||||||
|
|||||||
Reference in New Issue
Block a user