mirror of
https://github.com/Dushistov/sdcv.git
synced 2025-12-15 17:31:56 +00:00
Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
eeee360fb0 | ||
|
|
f69973e1fa | ||
|
|
931fc98478 | ||
|
|
6f30be7815 | ||
|
|
1a926d1b69 | ||
|
|
e89cfa18b1 | ||
|
|
12d9ea5b97 | ||
|
|
920c2bafb9 | ||
|
|
5d2332b0cb | ||
|
|
452a4e07fb | ||
|
|
59ef936288 |
@@ -15,7 +15,7 @@ BreakBeforeBinaryOperators: true
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakConstructorInitializersBeforeComma: true
|
||||
BinPackParameters: true
|
||||
ColumnLimit: 0
|
||||
ColumnLimit: 120
|
||||
ConstructorInitializerAllOnOneLineOrOnePerLine: false
|
||||
DerivePointerAlignment: false
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
|
||||
@@ -3,6 +3,10 @@ project(sdcv)
|
||||
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
|
||||
cmake_policy(VERSION 3.5)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED True)
|
||||
set(CMAKE_CXX_EXTENSIONS False)
|
||||
|
||||
include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/compiler.cmake")
|
||||
|
||||
set(ZLIB_FIND_REQUIRED True)
|
||||
@@ -91,7 +95,7 @@ set(CPACK_PACKAGE_VENDOR "Evgeniy Dushistov <dushistov@mail.ru>")
|
||||
set(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README.org")
|
||||
set(CPACK_PACKAGE_VERSION_MAJOR "0")
|
||||
set(CPACK_PACKAGE_VERSION_MINOR "5")
|
||||
set(CPACK_PACKAGE_VERSION_PATCH "3")
|
||||
set(CPACK_PACKAGE_VERSION_PATCH "4")
|
||||
|
||||
set(sdcv_VERSION
|
||||
"${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
|
||||
@@ -144,5 +148,6 @@ if (BUILD_TESTS)
|
||||
add_sdcv_shell_test(t_datadir)
|
||||
add_sdcv_shell_test(t_return_code)
|
||||
add_sdcv_shell_test(t_multiple_results)
|
||||
add_sdcv_shell_test(t_newlines_in_ifo)
|
||||
|
||||
endif (BUILD_TESTS)
|
||||
|
||||
@@ -16,19 +16,6 @@ if (NOT DEFINED SDCV_COMPILER_IS_GCC_COMPATIBLE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (MSVC AND (MSVC_VERSION LESS 1900))
|
||||
message(FATAL_ERROR "MSVC version ${MSVC_VERSION} have no full c++11 support")
|
||||
elseif (MSVC)
|
||||
add_definitions(-DNOMINMAX)
|
||||
elseif (NOT MSVC)
|
||||
check_cxx_compiler_flag("-std=c++11" CXX_SUPPORTS_CXX11)
|
||||
if (CXX_SUPPORTS_CXX11)
|
||||
append("-std=c++11" CMAKE_CXX_FLAGS)
|
||||
else ()
|
||||
message(FATAL_ERROR "sdcv requires C++11 support but the '-std=c++11' flag isn't supported.")
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
if (SDCV_COMPILER_IS_GCC_COMPATIBLE)
|
||||
append("-Wall" "-Wextra" "-Wformat-security" "-Wcast-align" "-Werror=format" "-Wcast-qual" CMAKE_C_FLAGS)
|
||||
append("-Wall" "-pedantic" "-Wextra" "-Wformat-security" "-Wcast-align" "-Werror=format" "-Wcast-qual" CMAKE_CXX_FLAGS)
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#ifdef HAVE_MMAP
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#ifdef _WIN32
|
||||
@@ -40,20 +41,25 @@ inline bool MapFile::open(const char *file_name, unsigned long file_size)
|
||||
size = file_size;
|
||||
#ifdef HAVE_MMAP
|
||||
if ((mmap_fd = ::open(file_name, O_RDONLY)) < 0) {
|
||||
//g_print("Open file %s failed!\n",fullfilename);
|
||||
// g_print("Open file %s failed!\n",fullfilename);
|
||||
return false;
|
||||
}
|
||||
struct stat st;
|
||||
if (fstat(mmap_fd, &st) == -1 || st.st_size < 0 || (st.st_size == 0 && S_ISREG(st.st_mode))
|
||||
|| sizeof(st.st_size) > sizeof(file_size) || static_cast<unsigned long>(st.st_size) != file_size) {
|
||||
close(mmap_fd);
|
||||
return false;
|
||||
}
|
||||
|
||||
data = (gchar *)mmap(nullptr, file_size, PROT_READ, MAP_SHARED, mmap_fd, 0);
|
||||
if ((void *)data == (void *)(-1)) {
|
||||
//g_print("mmap file %s failed!\n",idxfilename);
|
||||
// g_print("mmap file %s failed!\n",idxfilename);
|
||||
data = nullptr;
|
||||
return false;
|
||||
}
|
||||
#elif defined(_WIN32)
|
||||
hFile = CreateFile(file_name, GENERIC_READ, 0, nullptr, OPEN_ALWAYS,
|
||||
FILE_ATTRIBUTE_NORMAL, 0);
|
||||
hFileMap = CreateFileMapping(hFile, nullptr, PAGE_READONLY, 0,
|
||||
file_size, nullptr);
|
||||
hFile = CreateFile(file_name, GENERIC_READ, 0, nullptr, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0);
|
||||
hFileMap = CreateFileMapping(hFile, nullptr, PAGE_READONLY, 0, file_size, nullptr);
|
||||
data = (gchar *)MapViewOfFile(hFileMap, FILE_MAP_READ, 0, 0, file_size);
|
||||
#else
|
||||
gsize read_len;
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <glib/gstdio.h>
|
||||
@@ -47,8 +48,8 @@ static bool bIsPureEnglish(const gchar *str)
|
||||
{
|
||||
// i think this should work even when it is UTF8 string :).
|
||||
for (int i = 0; str[i] != 0; i++)
|
||||
//if(str[i]<0)
|
||||
//if(str[i]<32 || str[i]>126) // tab equal 9,so this is not OK.
|
||||
// if(str[i]<0)
|
||||
// if(str[i]<32 || str[i]>126) // tab equal 9,so this is not OK.
|
||||
// Better use isascii() but not str[i]<0 while char is default unsigned in arm
|
||||
if (!isascii(str[i]))
|
||||
return false;
|
||||
@@ -78,108 +79,93 @@ bool DictInfo::load_from_ifo_file(const std::string &ifofilename,
|
||||
{
|
||||
ifo_file_name = ifofilename;
|
||||
glib::CharStr buffer;
|
||||
if (!g_file_get_contents(ifofilename.c_str(), get_addr(buffer), nullptr, nullptr))
|
||||
gsize length = 0;
|
||||
if (!g_file_get_contents(ifofilename.c_str(), get_addr(buffer), &length, nullptr)) {
|
||||
fprintf(stderr, "Can not read from %s\n", ifofilename.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
static const char TREEDICT_MAGIC_DATA[] = "StarDict's treedict ifo file";
|
||||
static const char DICT_MAGIC_DATA[] = "StarDict's dict ifo file";
|
||||
|
||||
const gchar *magic_data = istreedict ? TREEDICT_MAGIC_DATA : DICT_MAGIC_DATA;
|
||||
static const unsigned char utf8_bom[] = { 0xEF, 0xBB, 0xBF, '\0' };
|
||||
if (!g_str_has_prefix(
|
||||
g_str_has_prefix(get_impl(buffer), (const gchar *)(utf8_bom)) ? get_impl(buffer) + 3 : get_impl(buffer),
|
||||
magic_data)) {
|
||||
static const gchar utf8_bom[] = { (gchar)0xEF, (gchar)0xBB, (gchar)0xBF, '\0' };
|
||||
|
||||
const gchar *p = get_impl(buffer);
|
||||
const gchar *end = p + length;
|
||||
|
||||
if (g_str_has_prefix(p, utf8_bom)) {
|
||||
p += strlen(utf8_bom);
|
||||
}
|
||||
if (!g_str_has_prefix(p, magic_data)) {
|
||||
fprintf(stderr, "No magic header(%s) in ifo file\n", magic_data);
|
||||
return false;
|
||||
}
|
||||
p += strlen(magic_data);
|
||||
|
||||
gchar *p1 = get_impl(buffer) + strlen(magic_data) - 1;
|
||||
|
||||
gchar *p2 = strstr(p1, "\nwordcount=");
|
||||
if (p2 == nullptr)
|
||||
std::map<std::string, std::string> key_value_map;
|
||||
while (p != end) {
|
||||
auto key_it = std::find_if(p, end, [](gchar ch) { return !g_ascii_isspace(ch); });
|
||||
if (key_it == end) {
|
||||
break;
|
||||
}
|
||||
auto eq_it = std::find(key_it, end, gchar('='));
|
||||
if (eq_it == end) {
|
||||
fprintf(stderr, "Invalid part of ifo (no '=') here: %s\n", key_it);
|
||||
return false;
|
||||
}
|
||||
auto val_it = std::find_if(eq_it + 1, end, [](gchar ch) { return !g_ascii_isspace(ch); });
|
||||
if (val_it == end) {
|
||||
key_value_map.insert(std::make_pair(std::string(key_it, eq_it), std::string()));
|
||||
break;
|
||||
}
|
||||
|
||||
gchar *p3 = strchr(p2 + sizeof("\nwordcount=") - 1, '\n');
|
||||
auto line_end_it = std::find_if(val_it, end, [](gchar ch) { return ch == '\r' || ch == '\n'; });
|
||||
key_value_map.insert(std::make_pair(std::string(key_it, eq_it), std::string(val_it, line_end_it)));
|
||||
if (line_end_it == end)
|
||||
break;
|
||||
p = line_end_it + 1;
|
||||
}
|
||||
|
||||
wordcount = atol(std::string(p2 + sizeof("\nwordcount=") - 1, p3 - (p2 + sizeof("\nwordcount=") - 1)).c_str());
|
||||
std::map<std::string, std::string>::const_iterator it;
|
||||
#define FIND_KEY(_key_) \
|
||||
it = key_value_map.find(_key_); \
|
||||
if (it == key_value_map.end()) { \
|
||||
fprintf(stderr, "Can not find '%s' in ifo file\n", _key_); \
|
||||
return false; \
|
||||
}
|
||||
|
||||
FIND_KEY("wordcount")
|
||||
wordcount = atol(it->second.c_str());
|
||||
|
||||
if (istreedict) {
|
||||
p2 = strstr(p1, "\ntdxfilesize=");
|
||||
if (p2 == nullptr)
|
||||
return false;
|
||||
|
||||
p3 = strchr(p2 + sizeof("\ntdxfilesize=") - 1, '\n');
|
||||
|
||||
index_file_size = atol(std::string(p2 + sizeof("\ntdxfilesize=") - 1, p3 - (p2 + sizeof("\ntdxfilesize=") - 1)).c_str());
|
||||
|
||||
FIND_KEY("tdxfilesize")
|
||||
index_file_size = atol(it->second.c_str());
|
||||
} else {
|
||||
FIND_KEY("idxfilesize")
|
||||
index_file_size = atol(it->second.c_str());
|
||||
}
|
||||
FIND_KEY("bookname")
|
||||
bookname = it->second;
|
||||
|
||||
p2 = strstr(p1, "\nidxfilesize=");
|
||||
if (p2 == nullptr)
|
||||
return false;
|
||||
|
||||
p3 = strchr(p2 + sizeof("\nidxfilesize=") - 1, '\n');
|
||||
index_file_size = atol(std::string(p2 + sizeof("\nidxfilesize=") - 1, p3 - (p2 + sizeof("\nidxfilesize=") - 1)).c_str());
|
||||
#define SET_IF_EXISTS(_key_) \
|
||||
it = key_value_map.find(#_key_); \
|
||||
if (it != key_value_map.end()) { \
|
||||
_key_ = it->second; \
|
||||
}
|
||||
|
||||
p2 = strstr(p1, "\nbookname=");
|
||||
|
||||
if (p2 == nullptr)
|
||||
return false;
|
||||
|
||||
p2 = p2 + sizeof("\nbookname=") - 1;
|
||||
p3 = strchr(p2, '\n');
|
||||
bookname.assign(p2, p3 - p2);
|
||||
|
||||
p2 = strstr(p1, "\nauthor=");
|
||||
if (p2) {
|
||||
p2 = p2 + sizeof("\nauthor=") - 1;
|
||||
p3 = strchr(p2, '\n');
|
||||
author.assign(p2, p3 - p2);
|
||||
}
|
||||
|
||||
p2 = strstr(p1, "\nemail=");
|
||||
if (p2) {
|
||||
p2 = p2 + sizeof("\nemail=") - 1;
|
||||
p3 = strchr(p2, '\n');
|
||||
email.assign(p2, p3 - p2);
|
||||
}
|
||||
|
||||
p2 = strstr(p1, "\nwebsite=");
|
||||
if (p2) {
|
||||
p2 = p2 + sizeof("\nwebsite=") - 1;
|
||||
p3 = strchr(p2, '\n');
|
||||
website.assign(p2, p3 - p2);
|
||||
}
|
||||
|
||||
p2 = strstr(p1, "\ndate=");
|
||||
if (p2) {
|
||||
p2 = p2 + sizeof("\ndate=") - 1;
|
||||
p3 = strchr(p2, '\n');
|
||||
date.assign(p2, p3 - p2);
|
||||
}
|
||||
|
||||
p2 = strstr(p1, "\ndescription=");
|
||||
if (p2) {
|
||||
p2 = p2 + sizeof("\ndescription=") - 1;
|
||||
p3 = strchr(p2, '\n');
|
||||
description.assign(p2, p3 - p2);
|
||||
}
|
||||
|
||||
p2 = strstr(p1, "\nsametypesequence=");
|
||||
if (p2) {
|
||||
p2 += sizeof("\nsametypesequence=") - 1;
|
||||
p3 = strchr(p2, '\n');
|
||||
sametypesequence.assign(p2, p3 - p2);
|
||||
}
|
||||
|
||||
p2 = strstr(p1, "\nsynwordcount=");
|
||||
SET_IF_EXISTS(author)
|
||||
SET_IF_EXISTS(email)
|
||||
SET_IF_EXISTS(website)
|
||||
SET_IF_EXISTS(date)
|
||||
SET_IF_EXISTS(description)
|
||||
SET_IF_EXISTS(sametypesequence)
|
||||
syn_wordcount = 0;
|
||||
if (p2) {
|
||||
p2 += sizeof("\nsynwordcount=") - 1;
|
||||
p3 = strchr(p2, '\n');
|
||||
syn_wordcount = atol(std::string(p2, p3 - p2).c_str());
|
||||
}
|
||||
|
||||
it = key_value_map.find("synwordcount");
|
||||
if (it != key_value_map.end())
|
||||
syn_wordcount = atol(it->second.c_str());
|
||||
#undef FIND_KEY
|
||||
#undef SET_IF_EXISTS
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -204,10 +190,10 @@ gchar *DictBase::GetWordData(guint32 idxitem_offset, guint32 idxitem_size)
|
||||
|
||||
guint32 data_size;
|
||||
gint sametypesequence_len = sametypesequence.length();
|
||||
//there have sametypesequence_len char being omitted.
|
||||
// there have sametypesequence_len char being omitted.
|
||||
data_size = idxitem_size + sizeof(guint32) + sametypesequence_len;
|
||||
//if the last item's size is determined by the end up '\0',then +=sizeof(gchar);
|
||||
//if the last item's size is determined by the head guint32 type data,then +=sizeof(guint32);
|
||||
// if the last item's size is determined by the end up '\0',then +=sizeof(gchar);
|
||||
// if the last item's size is determined by the head guint32 type data,then +=sizeof(guint32);
|
||||
switch (sametypesequence[sametypesequence_len - 1]) {
|
||||
case 'm':
|
||||
case 't':
|
||||
@@ -234,7 +220,7 @@ gchar *DictBase::GetWordData(guint32 idxitem_offset, guint32 idxitem_size)
|
||||
p1 = data + sizeof(guint32);
|
||||
p2 = get_impl(origin_data);
|
||||
guint32 sec_size;
|
||||
//copy the head items.
|
||||
// copy the head items.
|
||||
for (int i = 0; i < sametypesequence_len - 1; i++) {
|
||||
*p1 = sametypesequence[i];
|
||||
p1 += sizeof(gchar);
|
||||
@@ -272,7 +258,7 @@ gchar *DictBase::GetWordData(guint32 idxitem_offset, guint32 idxitem_size)
|
||||
break;
|
||||
}
|
||||
}
|
||||
//calculate the last item 's size.
|
||||
// calculate the last item 's size.
|
||||
sec_size = idxitem_size - (p2 - get_impl(origin_data));
|
||||
*p1 = sametypesequence[sametypesequence_len - 1];
|
||||
p1 += sizeof(gchar);
|
||||
@@ -286,7 +272,7 @@ gchar *DictBase::GetWordData(guint32 idxitem_offset, guint32 idxitem_size)
|
||||
case 'k':
|
||||
memcpy(p1, p2, sec_size);
|
||||
p1 += sec_size;
|
||||
*p1 = '\0'; //add the end up '\0';
|
||||
*p1 = '\0'; // add the end up '\0';
|
||||
break;
|
||||
case 'W':
|
||||
case 'P':
|
||||
@@ -542,7 +528,7 @@ inline const gchar *OffsetIndex::read_first_on_page_key(glong page_idx)
|
||||
std::min(sizeof(wordentry_buf), static_cast<size_t>(page_size)),
|
||||
1, idxfile);
|
||||
THROW_IF_ERROR(nitems == 1);
|
||||
//TODO: check returned values, deal with word entry that strlen>255.
|
||||
// TODO: check returned values, deal with word entry that strlen>255.
|
||||
return wordentry_buf;
|
||||
}
|
||||
|
||||
@@ -634,7 +620,7 @@ bool OffsetIndex::load(const std::string &url, gulong wc, gulong fsize, bool ver
|
||||
wordcount = wc;
|
||||
gulong npages = (wc - 1) / ENTR_PER_PAGE + 2;
|
||||
wordoffset.resize(npages);
|
||||
if (!load_cache(url)) { //map file will close after finish of block
|
||||
if (!load_cache(url)) { // map file will close after finish of block
|
||||
MapFile map_file;
|
||||
if (!map_file.open(url.c_str(), fsize))
|
||||
return false;
|
||||
@@ -756,10 +742,10 @@ bool OffsetIndex::lookup(const char *str, std::set<glong> &idxs, glong &next_idx
|
||||
}
|
||||
|
||||
if (!bFound)
|
||||
next_idx = iPage*ENTR_PER_PAGE + iFrom; // next
|
||||
next_idx = iPage * ENTR_PER_PAGE + iFrom; // next
|
||||
else {
|
||||
// Convert the found in-page index to the dict index.
|
||||
iThisIndex = iPage*ENTR_PER_PAGE + iThisIndex;
|
||||
iThisIndex = iPage * ENTR_PER_PAGE + iThisIndex;
|
||||
// In order to return all idxs that match the search string, walk
|
||||
// linearly behind and ahead of the found index.
|
||||
glong iHeadIndex = iThisIndex - 1; // do not include iThisIndex
|
||||
@@ -834,7 +820,7 @@ bool WordListIndex::lookup(const char *str, std::set<glong> &idxs, glong &next_i
|
||||
}
|
||||
}
|
||||
if (!bFound)
|
||||
next_idx = iFrom; //next
|
||||
next_idx = iFrom; // next
|
||||
else {
|
||||
// In order to return all idxs that match the search string, walk
|
||||
// linearly behind and ahead of the found index.
|
||||
@@ -905,19 +891,19 @@ bool SynFile::lookup(const char *str, std::set<glong> &idxs, glong &next_idx)
|
||||
}
|
||||
}
|
||||
if (!bFound)
|
||||
next_idx = iFrom; //next
|
||||
next_idx = iFrom; // next
|
||||
else {
|
||||
// In order to return all idxs that match the search string, walk
|
||||
// linearly behind and ahead of the found index.
|
||||
glong iHeadIndex = iThisIndex - 1; // do not include iThisIndex
|
||||
while (iHeadIndex >= 0 && stardict_strcmp(str, get_key(iHeadIndex)) == 0) {
|
||||
const gchar *key = get_key(iHeadIndex--);
|
||||
idxs.insert(g_ntohl(get_uint32(key+strlen(key)+1)));
|
||||
idxs.insert(g_ntohl(get_uint32(key + strlen(key) + 1)));
|
||||
}
|
||||
do {
|
||||
// no need to double-check iThisIndex -- we know it's a match already
|
||||
const gchar *key = get_key(iThisIndex++);
|
||||
idxs.insert(g_ntohl(get_uint32(key+strlen(key)+1)));
|
||||
idxs.insert(g_ntohl(get_uint32(key + strlen(key) + 1)));
|
||||
} while (iThisIndex <= iLast && stardict_strcmp(str, get_key(iThisIndex)) == 0);
|
||||
}
|
||||
}
|
||||
@@ -944,14 +930,14 @@ bool Dict::load(const std::string &ifofilename, bool verbose)
|
||||
if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) {
|
||||
dictdzfile.reset(new DictData);
|
||||
if (!dictdzfile->open(fullfilename, 0)) {
|
||||
//g_print("open file %s failed!\n",fullfilename);
|
||||
// g_print("open file %s failed!\n",fullfilename);
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
fullfilename.erase(fullfilename.length() - sizeof(".dz") + 1, sizeof(".dz") - 1);
|
||||
dictfile = fopen(fullfilename.c_str(), "rb");
|
||||
if (!dictfile) {
|
||||
//g_print("open file %s failed!\n",fullfilename);
|
||||
// g_print("open file %s failed!\n",fullfilename);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -974,7 +960,7 @@ bool Dict::load(const std::string &ifofilename, bool verbose)
|
||||
syn_file.reset(new SynFile);
|
||||
syn_file->load(fullfilename, syn_wordcount);
|
||||
|
||||
//g_print("bookname: %s , wordcount %lu\n", bookname.c_str(), narticles());
|
||||
// g_print("bookname: %s , wordcount %lu\n", bookname.c_str(), narticles());
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1082,7 +1068,7 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
|
||||
|
||||
gchar *sNewWord = (gchar *)g_malloc(iWordLen + 1);
|
||||
|
||||
//cut one char "s" or "d"
|
||||
// cut one char "s" or "d"
|
||||
if (!bFound && iWordLen > 1) {
|
||||
isupcase = sWord[iWordLen - 1] == 'S' || !strncmp(&sWord[iWordLen - 2], "ED", 2);
|
||||
if (isupcase || sWord[iWordLen - 1] == 's' || !strncmp(&sWord[iWordLen - 2], "ed", 2)) {
|
||||
@@ -1101,14 +1087,14 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
|
||||
}
|
||||
}
|
||||
|
||||
//cut "ly"
|
||||
// cut "ly"
|
||||
if (!bFound && iWordLen > 2) {
|
||||
isupcase = !strncmp(&sWord[iWordLen - 2], "LY", 2);
|
||||
if (isupcase || (!strncmp(&sWord[iWordLen - 2], "ly", 2))) {
|
||||
strcpy(sNewWord, sWord);
|
||||
sNewWord[iWordLen - 2] = '\0'; // cut "ly"
|
||||
if (iWordLen > 5 && sNewWord[iWordLen - 3] == sNewWord[iWordLen - 4]
|
||||
&& !bIsVowel(sNewWord[iWordLen - 4]) && bIsVowel(sNewWord[iWordLen - 5])) { //doubled
|
||||
&& !bIsVowel(sNewWord[iWordLen - 4]) && bIsVowel(sNewWord[iWordLen - 5])) { // doubled
|
||||
|
||||
sNewWord[iWordLen - 3] = '\0';
|
||||
if (oLib[iLib]->Lookup(sNewWord, iWordIndices))
|
||||
@@ -1123,7 +1109,7 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
|
||||
g_free(casestr);
|
||||
}
|
||||
if (!bFound)
|
||||
sNewWord[iWordLen - 3] = sNewWord[iWordLen - 4]; //restore
|
||||
sNewWord[iWordLen - 3] = sNewWord[iWordLen - 4]; // restore
|
||||
}
|
||||
}
|
||||
if (!bFound) {
|
||||
@@ -1141,14 +1127,14 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
|
||||
}
|
||||
}
|
||||
|
||||
//cut "ing"
|
||||
// cut "ing"
|
||||
if (!bFound && iWordLen > 3) {
|
||||
isupcase = !strncmp(&sWord[iWordLen - 3], "ING", 3);
|
||||
if (isupcase || !strncmp(&sWord[iWordLen - 3], "ing", 3)) {
|
||||
strcpy(sNewWord, sWord);
|
||||
sNewWord[iWordLen - 3] = '\0';
|
||||
if (iWordLen > 6 && (sNewWord[iWordLen - 4] == sNewWord[iWordLen - 5])
|
||||
&& !bIsVowel(sNewWord[iWordLen - 5]) && bIsVowel(sNewWord[iWordLen - 6])) { //doubled
|
||||
&& !bIsVowel(sNewWord[iWordLen - 5]) && bIsVowel(sNewWord[iWordLen - 6])) { // doubled
|
||||
sNewWord[iWordLen - 4] = '\0';
|
||||
if (oLib[iLib]->Lookup(sNewWord, iWordIndices))
|
||||
bFound = true;
|
||||
@@ -1162,7 +1148,7 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
|
||||
g_free(casestr);
|
||||
}
|
||||
if (!bFound)
|
||||
sNewWord[iWordLen - 4] = sNewWord[iWordLen - 5]; //restore
|
||||
sNewWord[iWordLen - 4] = sNewWord[iWordLen - 5]; // restore
|
||||
}
|
||||
}
|
||||
if (!bFound) {
|
||||
@@ -1196,7 +1182,7 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
|
||||
}
|
||||
}
|
||||
|
||||
//cut two char "es"
|
||||
// cut two char "es"
|
||||
if (!bFound && iWordLen > 3) {
|
||||
isupcase = (!strncmp(&sWord[iWordLen - 2], "ES", 2) && (sWord[iWordLen - 3] == 'S' || sWord[iWordLen - 3] == 'X' || sWord[iWordLen - 3] == 'O' || (iWordLen > 4 && sWord[iWordLen - 3] == 'H' && (sWord[iWordLen - 4] == 'C' || sWord[iWordLen - 4] == 'S'))));
|
||||
if (isupcase || (!strncmp(&sWord[iWordLen - 2], "es", 2) && (sWord[iWordLen - 3] == 's' || sWord[iWordLen - 3] == 'x' || sWord[iWordLen - 3] == 'o' || (iWordLen > 4 && sWord[iWordLen - 3] == 'h' && (sWord[iWordLen - 4] == 'c' || sWord[iWordLen - 4] == 's'))))) {
|
||||
@@ -1215,14 +1201,14 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
|
||||
}
|
||||
}
|
||||
|
||||
//cut "ed"
|
||||
// cut "ed"
|
||||
if (!bFound && iWordLen > 3) {
|
||||
isupcase = !strncmp(&sWord[iWordLen - 2], "ED", 2);
|
||||
if (isupcase || !strncmp(&sWord[iWordLen - 2], "ed", 2)) {
|
||||
strcpy(sNewWord, sWord);
|
||||
sNewWord[iWordLen - 2] = '\0';
|
||||
if (iWordLen > 5 && (sNewWord[iWordLen - 3] == sNewWord[iWordLen - 4])
|
||||
&& !bIsVowel(sNewWord[iWordLen - 4]) && bIsVowel(sNewWord[iWordLen - 5])) { //doubled
|
||||
&& !bIsVowel(sNewWord[iWordLen - 4]) && bIsVowel(sNewWord[iWordLen - 5])) { // doubled
|
||||
sNewWord[iWordLen - 3] = '\0';
|
||||
if (oLib[iLib]->Lookup(sNewWord, iWordIndices))
|
||||
bFound = true;
|
||||
@@ -1236,7 +1222,7 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
|
||||
g_free(casestr);
|
||||
}
|
||||
if (!bFound)
|
||||
sNewWord[iWordLen - 3] = sNewWord[iWordLen - 4]; //restore
|
||||
sNewWord[iWordLen - 3] = sNewWord[iWordLen - 4]; // restore
|
||||
}
|
||||
}
|
||||
if (!bFound) {
|
||||
@@ -1386,8 +1372,8 @@ bool Libs::LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_si
|
||||
if (progress_func)
|
||||
progress_func();
|
||||
|
||||
//if (stardict_strcmp(sWord, poGetWord(0,iLib))>=0 && stardict_strcmp(sWord, poGetWord(narticles(iLib)-1,iLib))<=0) {
|
||||
//there are Chinese dicts and English dicts...
|
||||
// if (stardict_strcmp(sWord, poGetWord(0,iLib))>=0 && stardict_strcmp(sWord, poGetWord(narticles(iLib)-1,iLib))<=0) {
|
||||
// there are Chinese dicts and English dicts...
|
||||
|
||||
const int iwords = narticles(iLib);
|
||||
for (int index = 0; index < iwords; index++) {
|
||||
@@ -1409,11 +1395,11 @@ bool Libs::LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_si
|
||||
bool bAlreadyInList = false;
|
||||
int iMaxDistanceAt = 0;
|
||||
for (int j = 0; j < reslist_size; j++) {
|
||||
if (oFuzzystruct[j].pMatchWord && strcmp(oFuzzystruct[j].pMatchWord, sCheck) == 0) { //already in list
|
||||
if (oFuzzystruct[j].pMatchWord && strcmp(oFuzzystruct[j].pMatchWord, sCheck) == 0) { // already in list
|
||||
bAlreadyInList = true;
|
||||
break;
|
||||
}
|
||||
//find the position,it will certainly be found (include the first time) as iMaxDistance is set by last time.
|
||||
// find the position,it will certainly be found (include the first time) as iMaxDistance is set by last time.
|
||||
if (oFuzzystruct[j].iMatchWordDistance == iMaxDistance) {
|
||||
iMaxDistanceAt = j;
|
||||
}
|
||||
@@ -1460,7 +1446,7 @@ gint Libs::LookupWithRule(const gchar *word, gchar **ppMatchWord)
|
||||
GPatternSpec *pspec = g_pattern_spec_new(word);
|
||||
|
||||
for (std::vector<Dict *>::size_type iLib = 0; iLib < oLib.size(); iLib++) {
|
||||
//if(oLibs.LookdupWordsWithRule(pspec,aiIndex,MAX_MATCH_ITEM_PER_LIB+1-iMatchCount,iLib))
|
||||
// if(oLibs.LookdupWordsWithRule(pspec,aiIndex,MAX_MATCH_ITEM_PER_LIB+1-iMatchCount,iLib))
|
||||
// -iMatchCount,so save time,but may got less result and the word may repeat.
|
||||
|
||||
if (oLib[iLib]->LookupWithRule(pspec, aiIndex, MAX_MATCH_ITEM_PER_LIB + 1)) {
|
||||
@@ -1470,7 +1456,7 @@ gint Libs::LookupWithRule(const gchar *word, gchar **ppMatchWord)
|
||||
const gchar *sMatchWord = poGetWord(aiIndex[i], iLib);
|
||||
bool bAlreadyInList = false;
|
||||
for (int j = 0; j < iMatchCount; j++) {
|
||||
if (strcmp(ppMatchWord[j], sMatchWord) == 0) { //already in list
|
||||
if (strcmp(ppMatchWord[j], sMatchWord) == 0) { // already in list
|
||||
bAlreadyInList = true;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
@@ -30,7 +28,7 @@ inline void set_uint32(gchar *addr, guint32 val)
|
||||
struct cacheItem {
|
||||
guint32 offset;
|
||||
gchar *data;
|
||||
//write code here to make it inline
|
||||
// write code here to make it inline
|
||||
cacheItem() { data = nullptr; }
|
||||
~cacheItem() { g_free(data); }
|
||||
};
|
||||
@@ -68,7 +66,7 @@ private:
|
||||
gint cache_cur = 0;
|
||||
};
|
||||
|
||||
//this structure contain all information about dictionary
|
||||
// this structure contain all information about dictionary
|
||||
struct DictInfo {
|
||||
std::string ifo_file_name;
|
||||
guint32 wordcount;
|
||||
@@ -98,7 +96,8 @@ public:
|
||||
virtual void get_data(glong idx) = 0;
|
||||
virtual const gchar *get_key_and_data(glong idx) = 0;
|
||||
virtual bool lookup(const char *str, std::set<glong> &idxs, glong &next_idx) = 0;
|
||||
virtual bool lookup(const char *str, std::set<glong> &idxs) {
|
||||
virtual bool lookup(const char *str, std::set<glong> &idxs)
|
||||
{
|
||||
glong unused_next_idx;
|
||||
return lookup(str, idxs, unused_next_idx);
|
||||
};
|
||||
@@ -144,7 +143,8 @@ public:
|
||||
*size = idx_file->wordentry_size;
|
||||
}
|
||||
bool Lookup(const char *str, std::set<glong> &idxs, glong &next_idx);
|
||||
bool Lookup(const char *str, std::set<glong> &idxs) {
|
||||
bool Lookup(const char *str, std::set<glong> &idxs)
|
||||
{
|
||||
glong unused_next_idx;
|
||||
return Lookup(str, idxs, unused_next_idx);
|
||||
}
|
||||
@@ -169,7 +169,7 @@ public:
|
||||
Libs(std::function<void(void)> f = std::function<void(void)>())
|
||||
{
|
||||
progress_func = f;
|
||||
iMaxFuzzyDistance = MAX_FUZZY_DISTANCE; //need to read from cfg.
|
||||
iMaxFuzzyDistance = MAX_FUZZY_DISTANCE; // need to read from cfg.
|
||||
}
|
||||
void setVerbose(bool verbose) { verbose_ = verbose; }
|
||||
void setFuzzy(bool fuzzy) { fuzzy_ = fuzzy; }
|
||||
|
||||
0
tests/not-unix-newlines-ifo/russian/russian.dict
Normal file
0
tests/not-unix-newlines-ifo/russian/russian.dict
Normal file
0
tests/not-unix-newlines-ifo/russian/russian.idx
Normal file
0
tests/not-unix-newlines-ifo/russian/russian.idx
Normal file
9
tests/not-unix-newlines-ifo/russian/russian.ifo
Normal file
9
tests/not-unix-newlines-ifo/russian/russian.ifo
Normal file
@@ -0,0 +1,9 @@
|
||||
StarDict's dict ifo file
|
||||
version=3.0.0
|
||||
bookname=Russian-English Dictionary (ru-en)
|
||||
wordcount=415144
|
||||
idxfilesize=12344255
|
||||
sametypesequence=h
|
||||
synwordcount=1277580
|
||||
author=Vuizur
|
||||
description=
|
||||
0
tests/not-unix-newlines-ifo/russian/russian.syn
Normal file
0
tests/not-unix-newlines-ifo/russian/russian.syn
Normal file
@@ -18,7 +18,8 @@ test_json() {
|
||||
fi
|
||||
}
|
||||
|
||||
test_json '[{"name": "Test synonyms", "wordcount": "2"},
|
||||
test_json '[{"name": "Russian-English Dictionary (ru-en)", "wordcount": "415144"},
|
||||
{"name": "Test synonyms", "wordcount": "2"},
|
||||
{"name": "Test multiple results", "wordcount": "246"},
|
||||
{"name": "Sample 1 test dictionary", "wordcount": "1"},
|
||||
{"name": "test_dict", "wordcount": "1"}]' -x -j -l -n --data-dir "$TEST_DIR"
|
||||
|
||||
18
tests/t_newlines_in_ifo
Executable file
18
tests/t_newlines_in_ifo
Executable file
@@ -0,0 +1,18 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
PATH_TO_SDCV="$1"
|
||||
TEST_DIR="$2"
|
||||
|
||||
unset SDCV_PAGER
|
||||
unset STARDICT_DATA_DIR
|
||||
|
||||
RES=$("$PATH_TO_SDCV" -n -x --data-dir="$TEST_DIR/not-unix-newlines-ifo" -l | tail -n 1)
|
||||
|
||||
if [ "$RES" = "Russian-English Dictionary (ru-en) 415144" ]; then
|
||||
exit 0
|
||||
else
|
||||
echo "test failed, unexpected result: $RES" >&2
|
||||
exit 1
|
||||
fi
|
||||
Reference in New Issue
Block a user