From 4b52181898d77939353617e35ae010e50b18cf49 Mon Sep 17 00:00:00 2001 From: Peter Date: Thu, 6 Jul 2017 09:09:56 +0200 Subject: [PATCH 1/2] Add support for .syn synonym files. Fixes #8. --- src/stardict_lib.cpp | 59 ++++++++++++++++++++++++++++++++++++++++++++ src/stardict_lib.hpp | 15 ++++++++++- 2 files changed, 73 insertions(+), 1 deletion(-) diff --git a/src/stardict_lib.cpp b/src/stardict_lib.cpp index 2ea23f9..74de6a7 100644 --- a/src/stardict_lib.cpp +++ b/src/stardict_lib.cpp @@ -178,6 +178,14 @@ bool DictInfo::load_from_ifo_file(const std::string& ifofilename, sametypesequence.assign(p2, p3-p2); } + p2 = strstr(p1,"\nsynwordcount="); + syn_wordcount = 0; + if (p2) { + p2+=sizeof("\nsynwordcount=")-1; + p3 = strchr(p2, '\n'); + syn_wordcount = atol(std::string(p2, p3-p2).c_str()); + } + return true; } @@ -809,6 +817,51 @@ namespace { } } +bool SynFile::load(const std::string& url, gulong wc) { + struct stat stat_buf; + if(!stat(url.c_str(), &stat_buf)) { + MapFile syn; + if(!syn.open(url.c_str(), stat_buf.st_size)) + return false; + const gchar *current = syn.begin(); + for(unsigned long i = 0; i < wc; i++) { + // each entry in a syn-file is: + // - 0-terminated string + // 4-byte index into .dict file in network byte order + gchar *lower_string = g_utf8_casefold(current, -1); + std::string synonym(lower_string); + g_free(lower_string); + current += synonym.length()+1; + unsigned int idx = * reinterpret_cast(current); + idx = g_ntohl(idx); + current += sizeof(idx); + synonyms[synonym] = idx; + } + return true; + } else { + return false; + } +} + +bool SynFile::lookup(const char *str, glong &idx) { + gchar *lower_string = g_utf8_casefold(str, -1); + auto it = synonyms.find(lower_string); + if(it != synonyms.end()) { + g_free(lower_string); + idx = it->second; + return true; + } + g_free(lower_string); + return false; +} + +bool Dict::Lookup(const char *str, glong &idx) { + if(syn_file->lookup(str, idx)) { + return true; + } + return idx_file->lookup(str, idx); +} + bool Dict::load(const std::string& ifofilename) { gulong idxfilesize; @@ -846,6 +899,11 @@ bool Dict::load(const std::string& ifofilename) if (!idx_file->load(fullfilename, wordcount, idxfilesize)) return false; + fullfilename=ifofilename; + fullfilename.replace(fullfilename.length()-sizeof("ifo")+1, sizeof("ifo")-1, "syn"); + syn_file.reset(new SynFile); + syn_file->load(fullfilename, syn_wordcount); + //g_print("bookname: %s , wordcount %lu\n", bookname.c_str(), narticles()); return true; } @@ -860,6 +918,7 @@ bool Dict::load_ifofile(const std::string& ifofilename, gulong &idxfilesize) ifo_file_name=dict_info.ifo_file_name; wordcount=dict_info.wordcount; + syn_wordcount=dict_info.syn_wordcount; bookname=dict_info.bookname; idxfilesize=dict_info.index_file_size; diff --git a/src/stardict_lib.hpp b/src/stardict_lib.hpp index cb85730..5b307e7 100644 --- a/src/stardict_lib.hpp +++ b/src/stardict_lib.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include "dictziplib.hpp" @@ -66,6 +67,7 @@ private: struct DictInfo { std::string ifo_file_name; guint32 wordcount; + guint32 syn_wordcount; std::string bookname; std::string author; std::string email; @@ -73,6 +75,7 @@ struct DictInfo { std::string date; std::string description; guint32 index_file_size; + guint32 syn_file_size; std::string sametypesequence; bool load_from_ifo_file(const std::string& ifofilename, bool istreedict); @@ -91,6 +94,14 @@ public: virtual bool lookup(const char *str, glong &idx) = 0; }; +class SynFile { +public: + bool load(const std::string& url, gulong wc); + bool lookup(const char *str, glong &idx); +private: + std::map synonyms; +}; + class Dict : public DictBase { public: Dict() {} @@ -112,15 +123,17 @@ public: *offset = idx_file->wordentry_offset; *size = idx_file->wordentry_size; } - bool Lookup(const char *str, glong &idx) { return idx_file->lookup(str, idx); } + bool Lookup(const char *str, glong &idx); bool LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen); private: std::string ifo_file_name; gulong wordcount; + gulong syn_wordcount; std::string bookname; std::unique_ptr idx_file; + std::unique_ptr syn_file; bool load_ifofile(const std::string& ifofilename, gulong &idxfilesize); }; From aa42132243d83a56c231ff5d469c73ec34bc6138 Mon Sep 17 00:00:00 2001 From: Peter Date: Thu, 6 Jul 2017 16:52:52 +0200 Subject: [PATCH 2/2] Add tests for synonyms --- CMakeLists.txt | 1 + .../stardict-test_synonyms-2.4.2/test.dict.dz | Bin 0 -> 64 bytes tests/stardict-test_synonyms-2.4.2/test.idx | Bin 0 -> 13 bytes tests/stardict-test_synonyms-2.4.2/test.ifo | 7 ++++++ tests/stardict-test_synonyms-2.4.2/test.syn | Bin 0 -> 16 bytes tests/stardict-test_synonyms-2.4.2/test.xml | 18 ++++++++++++++ tests/t_synonyms | 22 ++++++++++++++++++ 7 files changed, 48 insertions(+) create mode 100644 tests/stardict-test_synonyms-2.4.2/test.dict.dz create mode 100644 tests/stardict-test_synonyms-2.4.2/test.idx create mode 100644 tests/stardict-test_synonyms-2.4.2/test.ifo create mode 100644 tests/stardict-test_synonyms-2.4.2/test.syn create mode 100644 tests/stardict-test_synonyms-2.4.2/test.xml create mode 100755 tests/t_synonyms diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c95a26..eaf8048 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -144,6 +144,7 @@ if (BUILD_TESTS) add_sdcv_shell_test(t_list) add_sdcv_shell_test(t_use) + add_sdcv_shell_test(t_synonyms) add_sdcv_shell_test(t_interactive) add_sdcv_shell_test(t_utf8output) add_sdcv_shell_test(t_utf8input) diff --git a/tests/stardict-test_synonyms-2.4.2/test.dict.dz b/tests/stardict-test_synonyms-2.4.2/test.dict.dz new file mode 100644 index 0000000000000000000000000000000000000000..c081b8940a4b7aa800fa81d81e01c3acd9f23629 GIT binary patch literal 64 zcmb2|=HRI)jf-Sr=3xkO + + + 2.4.2 + Test synonyms + + + + + + + +
testfoobar + + + +
+
diff --git a/tests/t_synonyms b/tests/t_synonyms new file mode 100755 index 0000000..3ad3951 --- /dev/null +++ b/tests/t_synonyms @@ -0,0 +1,22 @@ +#!/bin/sh + +set -e + +SDCV="$1" +TEST_DIR="$2" + +unset SDCV_PAGER +test_word() { + WORD=$1 + RES=$($SDCV -n --data-dir "$TEST_DIR" -u "Test synonyms" $WORD | grep result) + if [ "result of test" != "$RES" ]; then + echo "synonym for $WORD should be 'result of test' but was '$RES'" + exit 1 + fi +} + +test_word foo +test_word bar +test_word test + +exit 0