mirror of
https://github.com/Dushistov/sdcv.git
synced 2025-12-15 17:31:56 +00:00
Merge pull request #20 from ecraven/master
Add support for .syn synonym files.
This commit is contained in:
@@ -144,6 +144,7 @@ if (BUILD_TESTS)
|
|||||||
|
|
||||||
add_sdcv_shell_test(t_list)
|
add_sdcv_shell_test(t_list)
|
||||||
add_sdcv_shell_test(t_use)
|
add_sdcv_shell_test(t_use)
|
||||||
|
add_sdcv_shell_test(t_synonyms)
|
||||||
add_sdcv_shell_test(t_interactive)
|
add_sdcv_shell_test(t_interactive)
|
||||||
add_sdcv_shell_test(t_utf8output)
|
add_sdcv_shell_test(t_utf8output)
|
||||||
add_sdcv_shell_test(t_utf8input)
|
add_sdcv_shell_test(t_utf8input)
|
||||||
|
|||||||
@@ -172,6 +172,14 @@ bool DictInfo::load_from_ifo_file(const std::string& ifofilename,
|
|||||||
sametypesequence.assign(p2, p3-p2);
|
sametypesequence.assign(p2, p3-p2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
p2 = strstr(p1,"\nsynwordcount=");
|
||||||
|
syn_wordcount = 0;
|
||||||
|
if (p2) {
|
||||||
|
p2+=sizeof("\nsynwordcount=")-1;
|
||||||
|
p3 = strchr(p2, '\n');
|
||||||
|
syn_wordcount = atol(std::string(p2, p3-p2).c_str());
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -803,6 +811,51 @@ namespace {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool SynFile::load(const std::string& url, gulong wc) {
|
||||||
|
struct stat stat_buf;
|
||||||
|
if(!stat(url.c_str(), &stat_buf)) {
|
||||||
|
MapFile syn;
|
||||||
|
if(!syn.open(url.c_str(), stat_buf.st_size))
|
||||||
|
return false;
|
||||||
|
const gchar *current = syn.begin();
|
||||||
|
for(unsigned long i = 0; i < wc; i++) {
|
||||||
|
// each entry in a syn-file is:
|
||||||
|
// - 0-terminated string
|
||||||
|
// 4-byte index into .dict file in network byte order
|
||||||
|
gchar *lower_string = g_utf8_casefold(current, -1);
|
||||||
|
std::string synonym(lower_string);
|
||||||
|
g_free(lower_string);
|
||||||
|
current += synonym.length()+1;
|
||||||
|
unsigned int idx = * reinterpret_cast<const unsigned int*>(current);
|
||||||
|
idx = g_ntohl(idx);
|
||||||
|
current += sizeof(idx);
|
||||||
|
synonyms[synonym] = idx;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SynFile::lookup(const char *str, glong &idx) {
|
||||||
|
gchar *lower_string = g_utf8_casefold(str, -1);
|
||||||
|
auto it = synonyms.find(lower_string);
|
||||||
|
if(it != synonyms.end()) {
|
||||||
|
g_free(lower_string);
|
||||||
|
idx = it->second;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
g_free(lower_string);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Dict::Lookup(const char *str, glong &idx) {
|
||||||
|
if(syn_file->lookup(str, idx)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return idx_file->lookup(str, idx);
|
||||||
|
}
|
||||||
|
|
||||||
bool Dict::load(const std::string& ifofilename)
|
bool Dict::load(const std::string& ifofilename)
|
||||||
{
|
{
|
||||||
gulong idxfilesize;
|
gulong idxfilesize;
|
||||||
@@ -840,6 +893,11 @@ bool Dict::load(const std::string& ifofilename)
|
|||||||
if (!idx_file->load(fullfilename, wordcount, idxfilesize))
|
if (!idx_file->load(fullfilename, wordcount, idxfilesize))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
fullfilename=ifofilename;
|
||||||
|
fullfilename.replace(fullfilename.length()-sizeof("ifo")+1, sizeof("ifo")-1, "syn");
|
||||||
|
syn_file.reset(new SynFile);
|
||||||
|
syn_file->load(fullfilename, syn_wordcount);
|
||||||
|
|
||||||
//g_print("bookname: %s , wordcount %lu\n", bookname.c_str(), narticles());
|
//g_print("bookname: %s , wordcount %lu\n", bookname.c_str(), narticles());
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -854,6 +912,7 @@ bool Dict::load_ifofile(const std::string& ifofilename, gulong &idxfilesize)
|
|||||||
|
|
||||||
ifo_file_name=dict_info.ifo_file_name;
|
ifo_file_name=dict_info.ifo_file_name;
|
||||||
wordcount=dict_info.wordcount;
|
wordcount=dict_info.wordcount;
|
||||||
|
syn_wordcount=dict_info.syn_wordcount;
|
||||||
bookname=dict_info.bookname;
|
bookname=dict_info.bookname;
|
||||||
|
|
||||||
idxfilesize=dict_info.index_file_size;
|
idxfilesize=dict_info.index_file_size;
|
||||||
|
|||||||
@@ -7,6 +7,7 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
#include "dictziplib.hpp"
|
#include "dictziplib.hpp"
|
||||||
|
|
||||||
@@ -66,6 +67,7 @@ private:
|
|||||||
struct DictInfo {
|
struct DictInfo {
|
||||||
std::string ifo_file_name;
|
std::string ifo_file_name;
|
||||||
guint32 wordcount;
|
guint32 wordcount;
|
||||||
|
guint32 syn_wordcount;
|
||||||
std::string bookname;
|
std::string bookname;
|
||||||
std::string author;
|
std::string author;
|
||||||
std::string email;
|
std::string email;
|
||||||
@@ -73,6 +75,7 @@ struct DictInfo {
|
|||||||
std::string date;
|
std::string date;
|
||||||
std::string description;
|
std::string description;
|
||||||
guint32 index_file_size;
|
guint32 index_file_size;
|
||||||
|
guint32 syn_file_size;
|
||||||
std::string sametypesequence;
|
std::string sametypesequence;
|
||||||
|
|
||||||
bool load_from_ifo_file(const std::string& ifofilename, bool istreedict);
|
bool load_from_ifo_file(const std::string& ifofilename, bool istreedict);
|
||||||
@@ -91,6 +94,14 @@ public:
|
|||||||
virtual bool lookup(const char *str, glong &idx) = 0;
|
virtual bool lookup(const char *str, glong &idx) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class SynFile {
|
||||||
|
public:
|
||||||
|
bool load(const std::string& url, gulong wc);
|
||||||
|
bool lookup(const char *str, glong &idx);
|
||||||
|
private:
|
||||||
|
std::map<std::string, gulong> synonyms;
|
||||||
|
};
|
||||||
|
|
||||||
class Dict : public DictBase {
|
class Dict : public DictBase {
|
||||||
public:
|
public:
|
||||||
Dict() {}
|
Dict() {}
|
||||||
@@ -112,15 +123,17 @@ public:
|
|||||||
*offset = idx_file->wordentry_offset;
|
*offset = idx_file->wordentry_offset;
|
||||||
*size = idx_file->wordentry_size;
|
*size = idx_file->wordentry_size;
|
||||||
}
|
}
|
||||||
bool Lookup(const char *str, glong &idx) { return idx_file->lookup(str, idx); }
|
bool Lookup(const char *str, glong &idx);
|
||||||
|
|
||||||
bool LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen);
|
bool LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen);
|
||||||
private:
|
private:
|
||||||
std::string ifo_file_name;
|
std::string ifo_file_name;
|
||||||
gulong wordcount;
|
gulong wordcount;
|
||||||
|
gulong syn_wordcount;
|
||||||
std::string bookname;
|
std::string bookname;
|
||||||
|
|
||||||
std::unique_ptr<IIndexFile> idx_file;
|
std::unique_ptr<IIndexFile> idx_file;
|
||||||
|
std::unique_ptr<SynFile> syn_file;
|
||||||
|
|
||||||
bool load_ifofile(const std::string& ifofilename, gulong &idxfilesize);
|
bool load_ifofile(const std::string& ifofilename, gulong &idxfilesize);
|
||||||
};
|
};
|
||||||
|
|||||||
BIN
tests/stardict-test_synonyms-2.4.2/test.dict.dz
Normal file
BIN
tests/stardict-test_synonyms-2.4.2/test.dict.dz
Normal file
Binary file not shown.
BIN
tests/stardict-test_synonyms-2.4.2/test.idx
Normal file
BIN
tests/stardict-test_synonyms-2.4.2/test.idx
Normal file
Binary file not shown.
7
tests/stardict-test_synonyms-2.4.2/test.ifo
Normal file
7
tests/stardict-test_synonyms-2.4.2/test.ifo
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
StarDict's dict ifo file
|
||||||
|
version=2.4.2
|
||||||
|
bookname=Test synonyms
|
||||||
|
wordcount=1
|
||||||
|
synwordcount=2
|
||||||
|
idxfilesize=13
|
||||||
|
sametypesequence=m
|
||||||
BIN
tests/stardict-test_synonyms-2.4.2/test.syn
Normal file
BIN
tests/stardict-test_synonyms-2.4.2/test.syn
Normal file
Binary file not shown.
18
tests/stardict-test_synonyms-2.4.2/test.xml
Normal file
18
tests/stardict-test_synonyms-2.4.2/test.xml
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8" ?>
|
||||||
|
<stardict xmlns:xi="http://www.w3.org/2003/XInclude">
|
||||||
|
<info>
|
||||||
|
<version>2.4.2</version>
|
||||||
|
<bookname>Test synonyms</bookname>
|
||||||
|
<author></author>
|
||||||
|
<email></email>
|
||||||
|
<website></website>
|
||||||
|
<description></description>
|
||||||
|
<date></date>
|
||||||
|
<dicttype></dicttype>
|
||||||
|
</info>
|
||||||
|
<article><key>test</key><synonym>foo</synonym><synonym>bar</synonym>
|
||||||
|
<definition type="m">
|
||||||
|
<![CDATA[result of test]]>
|
||||||
|
</definition>
|
||||||
|
</article>
|
||||||
|
</stardict>
|
||||||
22
tests/t_synonyms
Executable file
22
tests/t_synonyms
Executable file
@@ -0,0 +1,22 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
SDCV="$1"
|
||||||
|
TEST_DIR="$2"
|
||||||
|
|
||||||
|
unset SDCV_PAGER
|
||||||
|
test_word() {
|
||||||
|
WORD=$1
|
||||||
|
RES=$($SDCV -n --data-dir "$TEST_DIR" -u "Test synonyms" $WORD | grep result)
|
||||||
|
if [ "result of test" != "$RES" ]; then
|
||||||
|
echo "synonym for $WORD should be 'result of test' but was '$RES'"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
test_word foo
|
||||||
|
test_word bar
|
||||||
|
test_word test
|
||||||
|
|
||||||
|
exit 0
|
||||||
Reference in New Issue
Block a user