Merge pull request #20 from ecraven/master

Add support for .syn synonym files.
This commit is contained in:
Evgeniy Dushistov
2017-07-06 23:57:19 +03:00
committed by GitHub
9 changed files with 121 additions and 1 deletions

View File

@@ -144,6 +144,7 @@ if (BUILD_TESTS)
add_sdcv_shell_test(t_list) add_sdcv_shell_test(t_list)
add_sdcv_shell_test(t_use) add_sdcv_shell_test(t_use)
add_sdcv_shell_test(t_synonyms)
add_sdcv_shell_test(t_interactive) add_sdcv_shell_test(t_interactive)
add_sdcv_shell_test(t_utf8output) add_sdcv_shell_test(t_utf8output)
add_sdcv_shell_test(t_utf8input) add_sdcv_shell_test(t_utf8input)

View File

@@ -172,6 +172,14 @@ bool DictInfo::load_from_ifo_file(const std::string& ifofilename,
sametypesequence.assign(p2, p3-p2); sametypesequence.assign(p2, p3-p2);
} }
p2 = strstr(p1,"\nsynwordcount=");
syn_wordcount = 0;
if (p2) {
p2+=sizeof("\nsynwordcount=")-1;
p3 = strchr(p2, '\n');
syn_wordcount = atol(std::string(p2, p3-p2).c_str());
}
return true; return true;
} }
@@ -803,6 +811,51 @@ namespace {
} }
} }
bool SynFile::load(const std::string& url, gulong wc) {
struct stat stat_buf;
if(!stat(url.c_str(), &stat_buf)) {
MapFile syn;
if(!syn.open(url.c_str(), stat_buf.st_size))
return false;
const gchar *current = syn.begin();
for(unsigned long i = 0; i < wc; i++) {
// each entry in a syn-file is:
// - 0-terminated string
// 4-byte index into .dict file in network byte order
gchar *lower_string = g_utf8_casefold(current, -1);
std::string synonym(lower_string);
g_free(lower_string);
current += synonym.length()+1;
unsigned int idx = * reinterpret_cast<const unsigned int*>(current);
idx = g_ntohl(idx);
current += sizeof(idx);
synonyms[synonym] = idx;
}
return true;
} else {
return false;
}
}
bool SynFile::lookup(const char *str, glong &idx) {
gchar *lower_string = g_utf8_casefold(str, -1);
auto it = synonyms.find(lower_string);
if(it != synonyms.end()) {
g_free(lower_string);
idx = it->second;
return true;
}
g_free(lower_string);
return false;
}
bool Dict::Lookup(const char *str, glong &idx) {
if(syn_file->lookup(str, idx)) {
return true;
}
return idx_file->lookup(str, idx);
}
bool Dict::load(const std::string& ifofilename) bool Dict::load(const std::string& ifofilename)
{ {
gulong idxfilesize; gulong idxfilesize;
@@ -840,6 +893,11 @@ bool Dict::load(const std::string& ifofilename)
if (!idx_file->load(fullfilename, wordcount, idxfilesize)) if (!idx_file->load(fullfilename, wordcount, idxfilesize))
return false; return false;
fullfilename=ifofilename;
fullfilename.replace(fullfilename.length()-sizeof("ifo")+1, sizeof("ifo")-1, "syn");
syn_file.reset(new SynFile);
syn_file->load(fullfilename, syn_wordcount);
//g_print("bookname: %s , wordcount %lu\n", bookname.c_str(), narticles()); //g_print("bookname: %s , wordcount %lu\n", bookname.c_str(), narticles());
return true; return true;
} }
@@ -854,6 +912,7 @@ bool Dict::load_ifofile(const std::string& ifofilename, gulong &idxfilesize)
ifo_file_name=dict_info.ifo_file_name; ifo_file_name=dict_info.ifo_file_name;
wordcount=dict_info.wordcount; wordcount=dict_info.wordcount;
syn_wordcount=dict_info.syn_wordcount;
bookname=dict_info.bookname; bookname=dict_info.bookname;
idxfilesize=dict_info.index_file_size; idxfilesize=dict_info.index_file_size;

View File

@@ -7,6 +7,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <functional> #include <functional>
#include <map>
#include "dictziplib.hpp" #include "dictziplib.hpp"
@@ -66,6 +67,7 @@ private:
struct DictInfo { struct DictInfo {
std::string ifo_file_name; std::string ifo_file_name;
guint32 wordcount; guint32 wordcount;
guint32 syn_wordcount;
std::string bookname; std::string bookname;
std::string author; std::string author;
std::string email; std::string email;
@@ -73,6 +75,7 @@ struct DictInfo {
std::string date; std::string date;
std::string description; std::string description;
guint32 index_file_size; guint32 index_file_size;
guint32 syn_file_size;
std::string sametypesequence; std::string sametypesequence;
bool load_from_ifo_file(const std::string& ifofilename, bool istreedict); bool load_from_ifo_file(const std::string& ifofilename, bool istreedict);
@@ -91,6 +94,14 @@ public:
virtual bool lookup(const char *str, glong &idx) = 0; virtual bool lookup(const char *str, glong &idx) = 0;
}; };
class SynFile {
public:
bool load(const std::string& url, gulong wc);
bool lookup(const char *str, glong &idx);
private:
std::map<std::string, gulong> synonyms;
};
class Dict : public DictBase { class Dict : public DictBase {
public: public:
Dict() {} Dict() {}
@@ -112,15 +123,17 @@ public:
*offset = idx_file->wordentry_offset; *offset = idx_file->wordentry_offset;
*size = idx_file->wordentry_size; *size = idx_file->wordentry_size;
} }
bool Lookup(const char *str, glong &idx) { return idx_file->lookup(str, idx); } bool Lookup(const char *str, glong &idx);
bool LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen); bool LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen);
private: private:
std::string ifo_file_name; std::string ifo_file_name;
gulong wordcount; gulong wordcount;
gulong syn_wordcount;
std::string bookname; std::string bookname;
std::unique_ptr<IIndexFile> idx_file; std::unique_ptr<IIndexFile> idx_file;
std::unique_ptr<SynFile> syn_file;
bool load_ifofile(const std::string& ifofilename, gulong &idxfilesize); bool load_ifofile(const std::string& ifofilename, gulong &idxfilesize);
}; };

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,7 @@
StarDict's dict ifo file
version=2.4.2
bookname=Test synonyms
wordcount=1
synwordcount=2
idxfilesize=13
sametypesequence=m

Binary file not shown.

View File

@@ -0,0 +1,18 @@
<?xml version="1.0" encoding="UTF-8" ?>
<stardict xmlns:xi="http://www.w3.org/2003/XInclude">
<info>
<version>2.4.2</version>
<bookname>Test synonyms</bookname>
<author></author>
<email></email>
<website></website>
<description></description>
<date></date>
<dicttype></dicttype>
</info>
<article><key>test</key><synonym>foo</synonym><synonym>bar</synonym>
<definition type="m">
<![CDATA[result of test]]>
</definition>
</article>
</stardict>

22
tests/t_synonyms Executable file
View File

@@ -0,0 +1,22 @@
#!/bin/sh
set -e
SDCV="$1"
TEST_DIR="$2"
unset SDCV_PAGER
test_word() {
WORD=$1
RES=$($SDCV -n --data-dir "$TEST_DIR" -u "Test synonyms" $WORD | grep result)
if [ "result of test" != "$RES" ]; then
echo "synonym for $WORD should be 'result of test' but was '$RES'"
exit 1
fi
}
test_word foo
test_word bar
test_word test
exit 0