Merge pull request #20 from ecraven/master

Add support for .syn synonym files.
2025-12-15 17:31:56 +00:00 · 2017-07-06 23:57:19 +03:00
parent 72a15b70a7 aa42132243
commit f510300f59
9 changed files with 121 additions and 1 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -144,6 +144,7 @@ if (BUILD_TESTS)
  add_sdcv_shell_test(t_list)
  add_sdcv_shell_test(t_use)
  add_sdcv_shell_test(t_synonyms)
  add_sdcv_shell_test(t_interactive)
  add_sdcv_shell_test(t_utf8output)
  add_sdcv_shell_test(t_utf8input)
--- a/src/stardict_lib.cpp
+++ b/src/stardict_lib.cpp
@@ -172,6 +172,14 @@ bool DictInfo::load_from_ifo_file(const std::string& ifofilename,
    sametypesequence.assign(p2, p3-p2);
  }
  p2 = strstr(p1,"\nsynwordcount=");
  syn_wordcount = 0;
  if (p2) {
    p2+=sizeof("\nsynwordcount=")-1;
    p3 = strchr(p2, '\n');
    syn_wordcount = atol(std::string(p2, p3-p2).c_str());
  }
  return true;
 }
@@ -803,6 +811,51 @@ namespace {
    }
 }
 bool SynFile::load(const std::string& url, gulong wc) {
 	struct stat stat_buf;
 	if(!stat(url.c_str(), &stat_buf)) {
 		MapFile syn;
 		if(!syn.open(url.c_str(), stat_buf.st_size))
 			return false;
 		const gchar *current = syn.begin();
 		for(unsigned long i = 0; i < wc; i++) {
 			// each entry in a syn-file is:
 			// - 0-terminated string
 			// 4-byte index into .dict file in network byte order
 			gchar *lower_string = g_utf8_casefold(current, -1);
 			std::string synonym(lower_string);
 			g_free(lower_string);
 			current += synonym.length()+1;
 			unsigned int idx = * reinterpret_cast<const unsigned int*>(current);
 			idx = g_ntohl(idx);
 			current += sizeof(idx);
 			synonyms[synonym] = idx;
 		}
 		return true;
 	} else {
 		return false;
 	}
 }
 bool SynFile::lookup(const char *str, glong &idx) {
 	gchar *lower_string = g_utf8_casefold(str, -1);
 	auto it = synonyms.find(lower_string);
 	if(it != synonyms.end()) {
 		g_free(lower_string);
 		idx = it->second;
 		return true;
 	}
 	g_free(lower_string);
 	return false;
 }
 bool Dict::Lookup(const char *str, glong &idx) {
 	if(syn_file->lookup(str, idx)) {
 		return true;
 	}
 	return idx_file->lookup(str, idx);
 }
 bool Dict::load(const std::string& ifofilename)
 {
 	gulong idxfilesize;
@@ -840,6 +893,11 @@ bool Dict::load(const std::string& ifofilename)
 	if (!idx_file->load(fullfilename, wordcount, idxfilesize))
 		return false;
 	fullfilename=ifofilename;
 	fullfilename.replace(fullfilename.length()-sizeof("ifo")+1, sizeof("ifo")-1, "syn");
 	syn_file.reset(new SynFile);
 	syn_file->load(fullfilename, syn_wordcount);
 	//g_print("bookname: %s , wordcount %lu\n", bookname.c_str(), narticles());
 	return true;
 }
@@ -854,6 +912,7 @@ bool Dict::load_ifofile(const std::string& ifofilename, gulong &idxfilesize)
 	ifo_file_name=dict_info.ifo_file_name;
 	wordcount=dict_info.wordcount;
 	syn_wordcount=dict_info.syn_wordcount;
 	bookname=dict_info.bookname;
 	idxfilesize=dict_info.index_file_size;
--- a/src/stardict_lib.hpp
+++ b/src/stardict_lib.hpp
@@ -7,6 +7,7 @@
 #include <string>
 #include <vector>
 #include <functional>
 #include <map>
 #include "dictziplib.hpp"
@@ -66,6 +67,7 @@ private:
 struct DictInfo {
 	std::string ifo_file_name;
 	guint32 wordcount;
 	guint32 syn_wordcount;
 	std::string bookname;
 	std::string author;
 	std::string email;
@@ -73,6 +75,7 @@ struct DictInfo {
 	std::string date;
 	std::string description;
 	guint32 index_file_size;
 	guint32 syn_file_size;
 	std::string sametypesequence;
 	bool load_from_ifo_file(const std::string& ifofilename, bool istreedict);
@@ -91,6 +94,14 @@ public:
 	virtual bool lookup(const char *str, glong &idx) = 0;
 };
 class SynFile {
 public:
 	bool load(const std::string& url, gulong wc);
 	bool lookup(const char *str, glong &idx);
 private:
 	std::map<std::string, gulong> synonyms;
 };
 class Dict : public DictBase {
 public:
 	Dict() {}
@@ -112,15 +123,17 @@ public:
        *offset = idx_file->wordentry_offset;
        *size = idx_file->wordentry_size;
    }
-	bool Lookup(const char *str, glong &idx) { return idx_file->lookup(str, idx); }
+	bool Lookup(const char *str, glong &idx);
 	bool LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen);
 private:	
 	std::string ifo_file_name;
 	gulong wordcount;
 	gulong syn_wordcount;
 	std::string bookname;
 	std::unique_ptr<IIndexFile> idx_file;
  	std::unique_ptr<SynFile> syn_file;
 	bool load_ifofile(const std::string& ifofilename, gulong &idxfilesize);
 };
--- a/tests/stardict-test_synonyms-2.4.2/test.dict.dz
+++ b/tests/stardict-test_synonyms-2.4.2/test.dict.dz
--- a/tests/stardict-test_synonyms-2.4.2/test.idx
+++ b/tests/stardict-test_synonyms-2.4.2/test.idx
--- a/tests/stardict-test_synonyms-2.4.2/test.ifo
+++ b/tests/stardict-test_synonyms-2.4.2/test.ifo
@@ -0,0 +1,7 @@
 StarDict's dict ifo file
 version=2.4.2
 bookname=Test synonyms
 wordcount=1
 synwordcount=2
 idxfilesize=13
 sametypesequence=m
--- a/tests/stardict-test_synonyms-2.4.2/test.syn
+++ b/tests/stardict-test_synonyms-2.4.2/test.syn
--- a/tests/stardict-test_synonyms-2.4.2/test.xml
+++ b/tests/stardict-test_synonyms-2.4.2/test.xml
@@ -0,0 +1,18 @@
 <?xml version="1.0" encoding="UTF-8" ?>
 <stardict xmlns:xi="http://www.w3.org/2003/XInclude">
  <info>
    <version>2.4.2</version>
    <bookname>Test synonyms</bookname>
    <author></author>
    <email></email>
    <website></website>
    <description></description>
    <date></date>
    <dicttype></dicttype>
  </info>
  <article><key>test</key><synonym>foo</synonym><synonym>bar</synonym>
  <definition type="m">
    <![CDATA[result of test]]>
  </definition>
  </article>
 </stardict>
--- a/tests/t_synonyms
+++ b/tests/t_synonyms
@@ -0,0 +1,22 @@
 #!/bin/sh
 set -e
 SDCV="$1"
 TEST_DIR="$2"
 unset SDCV_PAGER
 test_word() {
    WORD=$1
    RES=$($SDCV -n --data-dir "$TEST_DIR" -u "Test synonyms" $WORD | grep result)
    if [ "result of test" != "$RES" ]; then
 	echo "synonym for $WORD should be 'result of test' but was '$RES'"
 	exit 1
    fi
 }
 test_word foo
 test_word bar
 test_word test
 exit 0