From 4b52181898d77939353617e35ae010e50b18cf49 Mon Sep 17 00:00:00 2001 From: Peter Date: Thu, 6 Jul 2017 09:09:56 +0200 Subject: [PATCH] Add support for .syn synonym files. Fixes #8. --- src/stardict_lib.cpp | 59 ++++++++++++++++++++++++++++++++++++++++++++ src/stardict_lib.hpp | 15 ++++++++++- 2 files changed, 73 insertions(+), 1 deletion(-) diff --git a/src/stardict_lib.cpp b/src/stardict_lib.cpp index 2ea23f9..74de6a7 100644 --- a/src/stardict_lib.cpp +++ b/src/stardict_lib.cpp @@ -178,6 +178,14 @@ bool DictInfo::load_from_ifo_file(const std::string& ifofilename, sametypesequence.assign(p2, p3-p2); } + p2 = strstr(p1,"\nsynwordcount="); + syn_wordcount = 0; + if (p2) { + p2+=sizeof("\nsynwordcount=")-1; + p3 = strchr(p2, '\n'); + syn_wordcount = atol(std::string(p2, p3-p2).c_str()); + } + return true; } @@ -809,6 +817,51 @@ namespace { } } +bool SynFile::load(const std::string& url, gulong wc) { + struct stat stat_buf; + if(!stat(url.c_str(), &stat_buf)) { + MapFile syn; + if(!syn.open(url.c_str(), stat_buf.st_size)) + return false; + const gchar *current = syn.begin(); + for(unsigned long i = 0; i < wc; i++) { + // each entry in a syn-file is: + // - 0-terminated string + // 4-byte index into .dict file in network byte order + gchar *lower_string = g_utf8_casefold(current, -1); + std::string synonym(lower_string); + g_free(lower_string); + current += synonym.length()+1; + unsigned int idx = * reinterpret_cast(current); + idx = g_ntohl(idx); + current += sizeof(idx); + synonyms[synonym] = idx; + } + return true; + } else { + return false; + } +} + +bool SynFile::lookup(const char *str, glong &idx) { + gchar *lower_string = g_utf8_casefold(str, -1); + auto it = synonyms.find(lower_string); + if(it != synonyms.end()) { + g_free(lower_string); + idx = it->second; + return true; + } + g_free(lower_string); + return false; +} + +bool Dict::Lookup(const char *str, glong &idx) { + if(syn_file->lookup(str, idx)) { + return true; + } + return idx_file->lookup(str, idx); +} + bool Dict::load(const std::string& ifofilename) { gulong idxfilesize; @@ -846,6 +899,11 @@ bool Dict::load(const std::string& ifofilename) if (!idx_file->load(fullfilename, wordcount, idxfilesize)) return false; + fullfilename=ifofilename; + fullfilename.replace(fullfilename.length()-sizeof("ifo")+1, sizeof("ifo")-1, "syn"); + syn_file.reset(new SynFile); + syn_file->load(fullfilename, syn_wordcount); + //g_print("bookname: %s , wordcount %lu\n", bookname.c_str(), narticles()); return true; } @@ -860,6 +918,7 @@ bool Dict::load_ifofile(const std::string& ifofilename, gulong &idxfilesize) ifo_file_name=dict_info.ifo_file_name; wordcount=dict_info.wordcount; + syn_wordcount=dict_info.syn_wordcount; bookname=dict_info.bookname; idxfilesize=dict_info.index_file_size; diff --git a/src/stardict_lib.hpp b/src/stardict_lib.hpp index cb85730..5b307e7 100644 --- a/src/stardict_lib.hpp +++ b/src/stardict_lib.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include "dictziplib.hpp" @@ -66,6 +67,7 @@ private: struct DictInfo { std::string ifo_file_name; guint32 wordcount; + guint32 syn_wordcount; std::string bookname; std::string author; std::string email; @@ -73,6 +75,7 @@ struct DictInfo { std::string date; std::string description; guint32 index_file_size; + guint32 syn_file_size; std::string sametypesequence; bool load_from_ifo_file(const std::string& ifofilename, bool istreedict); @@ -91,6 +94,14 @@ public: virtual bool lookup(const char *str, glong &idx) = 0; }; +class SynFile { +public: + bool load(const std::string& url, gulong wc); + bool lookup(const char *str, glong &idx); +private: + std::map synonyms; +}; + class Dict : public DictBase { public: Dict() {} @@ -112,15 +123,17 @@ public: *offset = idx_file->wordentry_offset; *size = idx_file->wordentry_size; } - bool Lookup(const char *str, glong &idx) { return idx_file->lookup(str, idx); } + bool Lookup(const char *str, glong &idx); bool LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen); private: std::string ifo_file_name; gulong wordcount; + gulong syn_wordcount; std::string bookname; std::unique_ptr idx_file; + std::unique_ptr syn_file; bool load_ifofile(const std::string& ifofilename, gulong &idxfilesize); };