mirror of
https://github.com/Dushistov/sdcv.git
synced 2025-12-15 17:31:56 +00:00
Compare commits
31 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
eeee360fb0 | ||
|
|
f69973e1fa | ||
|
|
931fc98478 | ||
|
|
6f30be7815 | ||
|
|
1a926d1b69 | ||
|
|
e89cfa18b1 | ||
|
|
12d9ea5b97 | ||
|
|
920c2bafb9 | ||
|
|
5d2332b0cb | ||
|
|
452a4e07fb | ||
|
|
59ef936288 | ||
|
|
d054adb37c | ||
|
|
4a9b1dae3d | ||
|
|
6d385221d0 | ||
|
|
3d15ce3b07 | ||
|
|
51338ac5bb | ||
|
|
5ada75e08d | ||
|
|
c7d9944f7d | ||
|
|
3963e358cd | ||
|
|
3b26731b02 | ||
|
|
070a9fb0bd | ||
|
|
8f096629ec | ||
|
|
25768c6b80 | ||
|
|
4ae4207349 | ||
|
|
994c1c7ae6 | ||
|
|
d38f8f13c9 | ||
|
|
cc7bcb8b73 | ||
|
|
8e9f72ae57 | ||
|
|
88af1a077c | ||
|
|
b66799f358 | ||
|
|
be5c3a35bf |
@@ -15,7 +15,7 @@ BreakBeforeBinaryOperators: true
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakConstructorInitializersBeforeComma: true
|
||||
BinPackParameters: true
|
||||
ColumnLimit: 0
|
||||
ColumnLimit: 120
|
||||
ConstructorInitializerAllOnOneLineOrOnePerLine: false
|
||||
DerivePointerAlignment: false
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
|
||||
2
.github/workflows/main.yml
vendored
2
.github/workflows/main.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: 'recursive'
|
||||
- uses: jwlawson/actions-setup-cmake@v1.0
|
||||
- uses: jwlawson/actions-setup-cmake@v1.4
|
||||
with:
|
||||
cmake-version: '3.5.1'
|
||||
github-api-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
@@ -3,6 +3,10 @@ project(sdcv)
|
||||
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
|
||||
cmake_policy(VERSION 3.5)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED True)
|
||||
set(CMAKE_CXX_EXTENSIONS False)
|
||||
|
||||
include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/compiler.cmake")
|
||||
|
||||
set(ZLIB_FIND_REQUIRED True)
|
||||
@@ -91,7 +95,7 @@ set(CPACK_PACKAGE_VENDOR "Evgeniy Dushistov <dushistov@mail.ru>")
|
||||
set(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README.org")
|
||||
set(CPACK_PACKAGE_VERSION_MAJOR "0")
|
||||
set(CPACK_PACKAGE_VERSION_MINOR "5")
|
||||
set(CPACK_PACKAGE_VERSION_PATCH "3")
|
||||
set(CPACK_PACKAGE_VERSION_PATCH "4")
|
||||
|
||||
set(sdcv_VERSION
|
||||
"${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
|
||||
@@ -143,5 +147,7 @@ if (BUILD_TESTS)
|
||||
add_sdcv_shell_test(t_utf8input)
|
||||
add_sdcv_shell_test(t_datadir)
|
||||
add_sdcv_shell_test(t_return_code)
|
||||
add_sdcv_shell_test(t_multiple_results)
|
||||
add_sdcv_shell_test(t_newlines_in_ifo)
|
||||
|
||||
endif (BUILD_TESTS)
|
||||
|
||||
@@ -16,19 +16,6 @@ if (NOT DEFINED SDCV_COMPILER_IS_GCC_COMPATIBLE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (MSVC AND (MSVC_VERSION LESS 1900))
|
||||
message(FATAL_ERROR "MSVC version ${MSVC_VERSION} have no full c++11 support")
|
||||
elseif (MSVC)
|
||||
add_definitions(-DNOMINMAX)
|
||||
elseif (NOT MSVC)
|
||||
check_cxx_compiler_flag("-std=c++11" CXX_SUPPORTS_CXX11)
|
||||
if (CXX_SUPPORTS_CXX11)
|
||||
append("-std=c++11" CMAKE_CXX_FLAGS)
|
||||
else ()
|
||||
message(FATAL_ERROR "sdcv requires C++11 support but the '-std=c++11' flag isn't supported.")
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
if (SDCV_COMPILER_IS_GCC_COMPATIBLE)
|
||||
append("-Wall" "-Wextra" "-Wformat-security" "-Wcast-align" "-Werror=format" "-Wcast-qual" CMAKE_C_FLAGS)
|
||||
append("-Wall" "-pedantic" "-Wextra" "-Wformat-security" "-Wcast-align" "-Werror=format" "-Wcast-qual" CMAKE_CXX_FLAGS)
|
||||
|
||||
@@ -199,14 +199,18 @@ static std::string parse_data(const gchar *data, bool colorize_output)
|
||||
|
||||
void Library::SimpleLookup(const std::string &str, TSearchResultList &res_list)
|
||||
{
|
||||
glong ind;
|
||||
std::set<glong> wordIdxs;
|
||||
res_list.reserve(ndicts());
|
||||
for (gint idict = 0; idict < ndicts(); ++idict)
|
||||
if (SimpleLookupWord(str.c_str(), ind, idict))
|
||||
for (gint idict = 0; idict < ndicts(); ++idict) {
|
||||
wordIdxs.clear();
|
||||
if (SimpleLookupWord(str.c_str(), wordIdxs, idict))
|
||||
for (auto &wordIdx : wordIdxs)
|
||||
res_list.push_back(
|
||||
TSearchResult(dict_name(idict),
|
||||
poGetWord(ind, idict),
|
||||
parse_data(poGetWordData(ind, idict), colorize_output_)));
|
||||
poGetWord(wordIdx, idict),
|
||||
parse_data(poGetWordData(wordIdx, idict),
|
||||
colorize_output_)));
|
||||
}
|
||||
}
|
||||
|
||||
void Library::LookupWithFuzzy(const std::string &str, TSearchResultList &res_list)
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#ifdef HAVE_MMAP
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#ifdef _WIN32
|
||||
@@ -43,6 +44,13 @@ inline bool MapFile::open(const char *file_name, unsigned long file_size)
|
||||
// g_print("Open file %s failed!\n",fullfilename);
|
||||
return false;
|
||||
}
|
||||
struct stat st;
|
||||
if (fstat(mmap_fd, &st) == -1 || st.st_size < 0 || (st.st_size == 0 && S_ISREG(st.st_mode))
|
||||
|| sizeof(st.st_size) > sizeof(file_size) || static_cast<unsigned long>(st.st_size) != file_size) {
|
||||
close(mmap_fd);
|
||||
return false;
|
||||
}
|
||||
|
||||
data = (gchar *)mmap(nullptr, file_size, PROT_READ, MAP_SHARED, mmap_fd, 0);
|
||||
if ((void *)data == (void *)(-1)) {
|
||||
// g_print("mmap file %s failed!\n",idxfilename);
|
||||
@@ -50,10 +58,8 @@ inline bool MapFile::open(const char *file_name, unsigned long file_size)
|
||||
return false;
|
||||
}
|
||||
#elif defined(_WIN32)
|
||||
hFile = CreateFile(file_name, GENERIC_READ, 0, nullptr, OPEN_ALWAYS,
|
||||
FILE_ATTRIBUTE_NORMAL, 0);
|
||||
hFileMap = CreateFileMapping(hFile, nullptr, PAGE_READONLY, 0,
|
||||
file_size, nullptr);
|
||||
hFile = CreateFile(file_name, GENERIC_READ, 0, nullptr, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0);
|
||||
hFileMap = CreateFileMapping(hFile, nullptr, PAGE_READONLY, 0, file_size, nullptr);
|
||||
data = (gchar *)MapViewOfFile(hFileMap, FILE_MAP_READ, 0, 0, file_size);
|
||||
#else
|
||||
gsize read_len;
|
||||
|
||||
22
src/sdcv.cpp
22
src/sdcv.cpp
@@ -83,6 +83,7 @@ try {
|
||||
glib::CharStr opt_data_dir;
|
||||
gboolean only_data_dir = FALSE;
|
||||
gboolean colorize = FALSE;
|
||||
glib::StrArr word_list;
|
||||
|
||||
const GOptionEntry entries[] = {
|
||||
{ "version", 'v', 0, G_OPTION_ARG_NONE, &show_version,
|
||||
@@ -96,6 +97,8 @@ try {
|
||||
_("for use in scripts"), nullptr },
|
||||
{ "json-output", 'j', 0, G_OPTION_ARG_NONE, &json_output,
|
||||
_("print the result formatted as JSON"), nullptr },
|
||||
{ "json", 'j', 0, G_OPTION_ARG_NONE, &json_output,
|
||||
_("print the result formatted as JSON"), nullptr },
|
||||
{ "exact-search", 'e', 0, G_OPTION_ARG_NONE, &no_fuzzy,
|
||||
_("do not fuzzy-search for similar words, only return exact matches"), nullptr },
|
||||
{ "utf8-output", '0', 0, G_OPTION_ARG_NONE, &utf8_output,
|
||||
@@ -109,11 +112,13 @@ try {
|
||||
_("only use the dictionaries in data-dir, do not search in user and system directories"), nullptr },
|
||||
{ "color", 'c', 0, G_OPTION_ARG_NONE, &colorize,
|
||||
_("colorize the output"), nullptr },
|
||||
{ G_OPTION_REMAINING, 0, 0, G_OPTION_ARG_FILENAME_ARRAY, get_addr(word_list),
|
||||
_("search terms"), _(" words") },
|
||||
{},
|
||||
};
|
||||
|
||||
glib::Error error;
|
||||
GOptionContext *context = g_option_context_new(_(" words"));
|
||||
GOptionContext *context = g_option_context_new(nullptr);
|
||||
g_option_context_set_help_enabled(context, TRUE);
|
||||
g_option_context_add_main_entries(context, entries, nullptr);
|
||||
const gboolean parse_res = g_option_context_parse(context, &argc, &argv, get_addr(error));
|
||||
@@ -210,14 +215,19 @@ try {
|
||||
lib.load(dicts_dir_list, order_list, disable_list);
|
||||
|
||||
std::unique_ptr<IReadLine> io(create_readline_object());
|
||||
if (optind < argc) {
|
||||
if (word_list != nullptr) {
|
||||
search_result rval = SEARCH_SUCCESS;
|
||||
for (int i = optind; i < argc; ++i)
|
||||
if ((rval = lib.process_phrase(argv[i], *io, non_interactive)) != SEARCH_SUCCESS) {
|
||||
return rval;
|
||||
gchar **p = get_impl(word_list);
|
||||
while (*p) {
|
||||
search_result this_rval = lib.process_phrase(*p++, *io, non_interactive);
|
||||
// If we encounter any error, save it but continue through the word
|
||||
// list to check all requested words.
|
||||
if (rval == SEARCH_SUCCESS)
|
||||
rval = this_rval;
|
||||
}
|
||||
if (rval != SEARCH_SUCCESS)
|
||||
return rval;
|
||||
} else if (!non_interactive) {
|
||||
|
||||
std::string phrase;
|
||||
while (io->read(_("Enter word or phrase: "), phrase)) {
|
||||
if (lib.process_phrase(phrase.c_str(), *io) == SEARCH_FAILURE)
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <glib/gstdio.h>
|
||||
@@ -78,108 +79,93 @@ bool DictInfo::load_from_ifo_file(const std::string &ifofilename,
|
||||
{
|
||||
ifo_file_name = ifofilename;
|
||||
glib::CharStr buffer;
|
||||
if (!g_file_get_contents(ifofilename.c_str(), get_addr(buffer), nullptr, nullptr))
|
||||
gsize length = 0;
|
||||
if (!g_file_get_contents(ifofilename.c_str(), get_addr(buffer), &length, nullptr)) {
|
||||
fprintf(stderr, "Can not read from %s\n", ifofilename.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
static const char TREEDICT_MAGIC_DATA[] = "StarDict's treedict ifo file";
|
||||
static const char DICT_MAGIC_DATA[] = "StarDict's dict ifo file";
|
||||
|
||||
const gchar *magic_data = istreedict ? TREEDICT_MAGIC_DATA : DICT_MAGIC_DATA;
|
||||
static const unsigned char utf8_bom[] = { 0xEF, 0xBB, 0xBF, '\0' };
|
||||
if (!g_str_has_prefix(
|
||||
g_str_has_prefix(get_impl(buffer), (const gchar *)(utf8_bom)) ? get_impl(buffer) + 3 : get_impl(buffer),
|
||||
magic_data)) {
|
||||
static const gchar utf8_bom[] = { (gchar)0xEF, (gchar)0xBB, (gchar)0xBF, '\0' };
|
||||
|
||||
const gchar *p = get_impl(buffer);
|
||||
const gchar *end = p + length;
|
||||
|
||||
if (g_str_has_prefix(p, utf8_bom)) {
|
||||
p += strlen(utf8_bom);
|
||||
}
|
||||
if (!g_str_has_prefix(p, magic_data)) {
|
||||
fprintf(stderr, "No magic header(%s) in ifo file\n", magic_data);
|
||||
return false;
|
||||
}
|
||||
p += strlen(magic_data);
|
||||
|
||||
gchar *p1 = get_impl(buffer) + strlen(magic_data) - 1;
|
||||
|
||||
gchar *p2 = strstr(p1, "\nwordcount=");
|
||||
if (p2 == nullptr)
|
||||
std::map<std::string, std::string> key_value_map;
|
||||
while (p != end) {
|
||||
auto key_it = std::find_if(p, end, [](gchar ch) { return !g_ascii_isspace(ch); });
|
||||
if (key_it == end) {
|
||||
break;
|
||||
}
|
||||
auto eq_it = std::find(key_it, end, gchar('='));
|
||||
if (eq_it == end) {
|
||||
fprintf(stderr, "Invalid part of ifo (no '=') here: %s\n", key_it);
|
||||
return false;
|
||||
}
|
||||
auto val_it = std::find_if(eq_it + 1, end, [](gchar ch) { return !g_ascii_isspace(ch); });
|
||||
if (val_it == end) {
|
||||
key_value_map.insert(std::make_pair(std::string(key_it, eq_it), std::string()));
|
||||
break;
|
||||
}
|
||||
|
||||
gchar *p3 = strchr(p2 + sizeof("\nwordcount=") - 1, '\n');
|
||||
auto line_end_it = std::find_if(val_it, end, [](gchar ch) { return ch == '\r' || ch == '\n'; });
|
||||
key_value_map.insert(std::make_pair(std::string(key_it, eq_it), std::string(val_it, line_end_it)));
|
||||
if (line_end_it == end)
|
||||
break;
|
||||
p = line_end_it + 1;
|
||||
}
|
||||
|
||||
wordcount = atol(std::string(p2 + sizeof("\nwordcount=") - 1, p3 - (p2 + sizeof("\nwordcount=") - 1)).c_str());
|
||||
std::map<std::string, std::string>::const_iterator it;
|
||||
#define FIND_KEY(_key_) \
|
||||
it = key_value_map.find(_key_); \
|
||||
if (it == key_value_map.end()) { \
|
||||
fprintf(stderr, "Can not find '%s' in ifo file\n", _key_); \
|
||||
return false; \
|
||||
}
|
||||
|
||||
FIND_KEY("wordcount")
|
||||
wordcount = atol(it->second.c_str());
|
||||
|
||||
if (istreedict) {
|
||||
p2 = strstr(p1, "\ntdxfilesize=");
|
||||
if (p2 == nullptr)
|
||||
return false;
|
||||
|
||||
p3 = strchr(p2 + sizeof("\ntdxfilesize=") - 1, '\n');
|
||||
|
||||
index_file_size = atol(std::string(p2 + sizeof("\ntdxfilesize=") - 1, p3 - (p2 + sizeof("\ntdxfilesize=") - 1)).c_str());
|
||||
|
||||
FIND_KEY("tdxfilesize")
|
||||
index_file_size = atol(it->second.c_str());
|
||||
} else {
|
||||
FIND_KEY("idxfilesize")
|
||||
index_file_size = atol(it->second.c_str());
|
||||
}
|
||||
FIND_KEY("bookname")
|
||||
bookname = it->second;
|
||||
|
||||
p2 = strstr(p1, "\nidxfilesize=");
|
||||
if (p2 == nullptr)
|
||||
return false;
|
||||
|
||||
p3 = strchr(p2 + sizeof("\nidxfilesize=") - 1, '\n');
|
||||
index_file_size = atol(std::string(p2 + sizeof("\nidxfilesize=") - 1, p3 - (p2 + sizeof("\nidxfilesize=") - 1)).c_str());
|
||||
#define SET_IF_EXISTS(_key_) \
|
||||
it = key_value_map.find(#_key_); \
|
||||
if (it != key_value_map.end()) { \
|
||||
_key_ = it->second; \
|
||||
}
|
||||
|
||||
p2 = strstr(p1, "\nbookname=");
|
||||
|
||||
if (p2 == nullptr)
|
||||
return false;
|
||||
|
||||
p2 = p2 + sizeof("\nbookname=") - 1;
|
||||
p3 = strchr(p2, '\n');
|
||||
bookname.assign(p2, p3 - p2);
|
||||
|
||||
p2 = strstr(p1, "\nauthor=");
|
||||
if (p2) {
|
||||
p2 = p2 + sizeof("\nauthor=") - 1;
|
||||
p3 = strchr(p2, '\n');
|
||||
author.assign(p2, p3 - p2);
|
||||
}
|
||||
|
||||
p2 = strstr(p1, "\nemail=");
|
||||
if (p2) {
|
||||
p2 = p2 + sizeof("\nemail=") - 1;
|
||||
p3 = strchr(p2, '\n');
|
||||
email.assign(p2, p3 - p2);
|
||||
}
|
||||
|
||||
p2 = strstr(p1, "\nwebsite=");
|
||||
if (p2) {
|
||||
p2 = p2 + sizeof("\nwebsite=") - 1;
|
||||
p3 = strchr(p2, '\n');
|
||||
website.assign(p2, p3 - p2);
|
||||
}
|
||||
|
||||
p2 = strstr(p1, "\ndate=");
|
||||
if (p2) {
|
||||
p2 = p2 + sizeof("\ndate=") - 1;
|
||||
p3 = strchr(p2, '\n');
|
||||
date.assign(p2, p3 - p2);
|
||||
}
|
||||
|
||||
p2 = strstr(p1, "\ndescription=");
|
||||
if (p2) {
|
||||
p2 = p2 + sizeof("\ndescription=") - 1;
|
||||
p3 = strchr(p2, '\n');
|
||||
description.assign(p2, p3 - p2);
|
||||
}
|
||||
|
||||
p2 = strstr(p1, "\nsametypesequence=");
|
||||
if (p2) {
|
||||
p2 += sizeof("\nsametypesequence=") - 1;
|
||||
p3 = strchr(p2, '\n');
|
||||
sametypesequence.assign(p2, p3 - p2);
|
||||
}
|
||||
|
||||
p2 = strstr(p1, "\nsynwordcount=");
|
||||
SET_IF_EXISTS(author)
|
||||
SET_IF_EXISTS(email)
|
||||
SET_IF_EXISTS(website)
|
||||
SET_IF_EXISTS(date)
|
||||
SET_IF_EXISTS(description)
|
||||
SET_IF_EXISTS(sametypesequence)
|
||||
syn_wordcount = 0;
|
||||
if (p2) {
|
||||
p2 += sizeof("\nsynwordcount=") - 1;
|
||||
p3 = strchr(p2, '\n');
|
||||
syn_wordcount = atol(std::string(p2, p3 - p2).c_str());
|
||||
}
|
||||
|
||||
it = key_value_map.find("synwordcount");
|
||||
if (it != key_value_map.end())
|
||||
syn_wordcount = atol(it->second.c_str());
|
||||
#undef FIND_KEY
|
||||
#undef SET_IF_EXISTS
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -450,7 +436,7 @@ public:
|
||||
{
|
||||
return get_key(idx);
|
||||
}
|
||||
bool lookup(const char *str, glong &idx) override;
|
||||
bool lookup(const char *str, std::set<glong> &idxs, glong &next_idx) override;
|
||||
|
||||
private:
|
||||
static const gint ENTR_PER_PAGE = 32;
|
||||
@@ -511,7 +497,7 @@ public:
|
||||
get_data(idx);
|
||||
return get_key(idx);
|
||||
}
|
||||
bool lookup(const char *str, glong &idx) override;
|
||||
bool lookup(const char *str, std::set<glong> &idxs, glong &next_idx) override;
|
||||
|
||||
private:
|
||||
gchar *idxdatabuf;
|
||||
@@ -698,47 +684,52 @@ const gchar *OffsetIndex::get_key(glong idx)
|
||||
return page.entries[idx_in_page].keystr;
|
||||
}
|
||||
|
||||
bool OffsetIndex::lookup(const char *str, glong &idx)
|
||||
bool OffsetIndex::lookup(const char *str, std::set<glong> &idxs, glong &next_idx)
|
||||
{
|
||||
bool bFound = false;
|
||||
glong iFrom;
|
||||
glong iTo = wordoffset.size() - 2;
|
||||
gint cmpint;
|
||||
glong iThisIndex;
|
||||
|
||||
if (stardict_strcmp(str, first.keystr.c_str()) < 0) {
|
||||
idx = 0;
|
||||
next_idx = 0;
|
||||
return false;
|
||||
} else if (stardict_strcmp(str, real_last.keystr.c_str()) > 0) {
|
||||
idx = INVALID_INDEX;
|
||||
next_idx = INVALID_INDEX;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Search for the first page where the word is likely to be located.
|
||||
glong iFrom = 0, iTo = wordoffset.size() - 2;
|
||||
glong iPage = 0, iThisIndex = 0;
|
||||
while (iFrom <= iTo) {
|
||||
iThisIndex = (iFrom + iTo) / 2;
|
||||
glong cmpint = stardict_strcmp(str, get_first_on_page_key(iThisIndex));
|
||||
if (cmpint > 0)
|
||||
iFrom = iThisIndex + 1;
|
||||
else if (cmpint < 0)
|
||||
iTo = iThisIndex - 1;
|
||||
else {
|
||||
bFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (bFound) {
|
||||
// We can use this found index (even though it might not be the first)
|
||||
// because we will search backwards later and catch any entries on
|
||||
// previous pages.
|
||||
iPage = iThisIndex;
|
||||
iThisIndex = 0; // first item in the page
|
||||
} else {
|
||||
iPage = iTo; // prev
|
||||
// Not found at the start of a page, so search within the page that
|
||||
// should contain it. Binary search here is slightly overkill (we're
|
||||
// searching at most ENTR_PER_PAGE = 32 elements) but this way next_idx
|
||||
// is treated the same as other Lookup methods.
|
||||
gulong netr = load_page(iPage);
|
||||
iFrom = 0;
|
||||
iThisIndex = 0;
|
||||
while (iFrom <= iTo) {
|
||||
iThisIndex = (iFrom + iTo) / 2;
|
||||
cmpint = stardict_strcmp(str, get_first_on_page_key(iThisIndex));
|
||||
if (cmpint > 0)
|
||||
iFrom = iThisIndex + 1;
|
||||
else if (cmpint < 0)
|
||||
iTo = iThisIndex - 1;
|
||||
else {
|
||||
bFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!bFound)
|
||||
idx = iTo; //prev
|
||||
else
|
||||
idx = iThisIndex;
|
||||
}
|
||||
if (!bFound) {
|
||||
gulong netr = load_page(idx);
|
||||
iFrom = 1; // Needn't search the first word anymore.
|
||||
iTo = netr - 1;
|
||||
iThisIndex = 0;
|
||||
while (iFrom <= iTo) {
|
||||
iThisIndex = (iFrom + iTo) / 2;
|
||||
cmpint = stardict_strcmp(str, page.entries[iThisIndex].keystr);
|
||||
glong cmpint = stardict_strcmp(str, page.entries[iThisIndex].keystr);
|
||||
if (cmpint > 0)
|
||||
iFrom = iThisIndex + 1;
|
||||
else if (cmpint < 0)
|
||||
@@ -748,13 +739,21 @@ bool OffsetIndex::lookup(const char *str, glong &idx)
|
||||
break;
|
||||
}
|
||||
}
|
||||
idx *= ENTR_PER_PAGE;
|
||||
}
|
||||
|
||||
if (!bFound)
|
||||
idx += iFrom; //next
|
||||
else
|
||||
idx += iThisIndex;
|
||||
} else {
|
||||
idx *= ENTR_PER_PAGE;
|
||||
next_idx = iPage * ENTR_PER_PAGE + iFrom; // next
|
||||
else {
|
||||
// Convert the found in-page index to the dict index.
|
||||
iThisIndex = iPage * ENTR_PER_PAGE + iThisIndex;
|
||||
// In order to return all idxs that match the search string, walk
|
||||
// linearly behind and ahead of the found index.
|
||||
glong iHeadIndex = iThisIndex - 1; // do not include iThisIndex
|
||||
while (iHeadIndex >= 0 && stardict_strcmp(str, get_key(iHeadIndex)) == 0)
|
||||
idxs.insert(iHeadIndex--);
|
||||
do // no need to double-check iThisIndex -- we know it's a match already
|
||||
idxs.insert(iThisIndex++);
|
||||
while (iThisIndex <= real_last.idx && stardict_strcmp(str, get_key(iThisIndex)) == 0);
|
||||
}
|
||||
return bFound;
|
||||
}
|
||||
@@ -795,18 +794,18 @@ void WordListIndex::get_data(glong idx)
|
||||
wordentry_size = g_ntohl(get_uint32(p1));
|
||||
}
|
||||
|
||||
bool WordListIndex::lookup(const char *str, glong &idx)
|
||||
bool WordListIndex::lookup(const char *str, std::set<glong> &idxs, glong &next_idx)
|
||||
{
|
||||
bool bFound = false;
|
||||
glong iTo = wordlist.size() - 2;
|
||||
glong iLast = wordlist.size() - 2;
|
||||
|
||||
if (stardict_strcmp(str, get_key(0)) < 0) {
|
||||
idx = 0;
|
||||
} else if (stardict_strcmp(str, get_key(iTo)) > 0) {
|
||||
idx = INVALID_INDEX;
|
||||
next_idx = 0;
|
||||
} else if (stardict_strcmp(str, get_key(iLast)) > 0) {
|
||||
next_idx = INVALID_INDEX;
|
||||
} else {
|
||||
glong iThisIndex = 0;
|
||||
glong iFrom = 0;
|
||||
glong iFrom = 0, iTo = iLast;
|
||||
gint cmpint;
|
||||
while (iFrom <= iTo) {
|
||||
iThisIndex = (iFrom + iTo) / 2;
|
||||
@@ -821,9 +820,17 @@ bool WordListIndex::lookup(const char *str, glong &idx)
|
||||
}
|
||||
}
|
||||
if (!bFound)
|
||||
idx = iFrom; //next
|
||||
else
|
||||
idx = iThisIndex;
|
||||
next_idx = iFrom; // next
|
||||
else {
|
||||
// In order to return all idxs that match the search string, walk
|
||||
// linearly behind and ahead of the found index.
|
||||
glong iHeadIndex = iThisIndex - 1; // do not include iThisIndex
|
||||
while (iHeadIndex >= 0 && stardict_strcmp(str, get_key(iHeadIndex)) == 0)
|
||||
idxs.insert(iHeadIndex--);
|
||||
do // no need to double-check iThisIndex -- we know it's a match already
|
||||
idxs.insert(iThisIndex++);
|
||||
while (iThisIndex <= iLast && stardict_strcmp(str, get_key(iThisIndex)) == 0);
|
||||
}
|
||||
}
|
||||
return bFound;
|
||||
}
|
||||
@@ -833,41 +840,82 @@ bool SynFile::load(const std::string &url, gulong wc)
|
||||
{
|
||||
struct stat stat_buf;
|
||||
if (!stat(url.c_str(), &stat_buf)) {
|
||||
MapFile syn;
|
||||
if (!syn.open(url.c_str(), stat_buf.st_size))
|
||||
|
||||
if (!synfile.open(url.c_str(), stat_buf.st_size))
|
||||
return false;
|
||||
const gchar *current = syn.begin();
|
||||
|
||||
synlist.resize(wc + 1);
|
||||
gchar *p1 = synfile.begin();
|
||||
|
||||
for (unsigned long i = 0; i < wc; i++) {
|
||||
// each entry in a syn-file is:
|
||||
// - 0-terminated string
|
||||
// 4-byte index into .dict file in network byte order
|
||||
glib::CharStr lower_string{ g_utf8_casefold(current, -1) };
|
||||
std::string synonym{ get_impl(lower_string) };
|
||||
current += synonym.length() + 1;
|
||||
const guint32 idx = g_ntohl(get_uint32(current));
|
||||
current += sizeof(idx);
|
||||
synonyms[synonym] = idx;
|
||||
|
||||
synlist[i] = p1;
|
||||
p1 += strlen(p1) + 1 + 4;
|
||||
}
|
||||
synlist[wc] = p1;
|
||||
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool SynFile::lookup(const char *str, glong &idx)
|
||||
bool SynFile::lookup(const char *str, std::set<glong> &idxs, glong &next_idx)
|
||||
{
|
||||
glib::CharStr lower_string{ g_utf8_casefold(str, -1) };
|
||||
auto it = synonyms.find(get_impl(lower_string));
|
||||
if (it != synonyms.end()) {
|
||||
idx = it->second;
|
||||
return true;
|
||||
}
|
||||
bool bFound = false;
|
||||
glong iLast = synlist.size() - 2;
|
||||
if (iLast < 0)
|
||||
return false;
|
||||
|
||||
if (stardict_strcmp(str, get_key(0)) < 0) {
|
||||
next_idx = 0;
|
||||
} else if (stardict_strcmp(str, get_key(iLast)) > 0) {
|
||||
next_idx = INVALID_INDEX;
|
||||
} else {
|
||||
glong iThisIndex = 0;
|
||||
glong iFrom = 0, iTo = iLast;
|
||||
gint cmpint;
|
||||
while (iFrom <= iTo) {
|
||||
iThisIndex = (iFrom + iTo) / 2;
|
||||
cmpint = stardict_strcmp(str, get_key(iThisIndex));
|
||||
if (cmpint > 0)
|
||||
iFrom = iThisIndex + 1;
|
||||
else if (cmpint < 0)
|
||||
iTo = iThisIndex - 1;
|
||||
else {
|
||||
bFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!bFound)
|
||||
next_idx = iFrom; // next
|
||||
else {
|
||||
// In order to return all idxs that match the search string, walk
|
||||
// linearly behind and ahead of the found index.
|
||||
glong iHeadIndex = iThisIndex - 1; // do not include iThisIndex
|
||||
while (iHeadIndex >= 0 && stardict_strcmp(str, get_key(iHeadIndex)) == 0) {
|
||||
const gchar *key = get_key(iHeadIndex--);
|
||||
idxs.insert(g_ntohl(get_uint32(key + strlen(key) + 1)));
|
||||
}
|
||||
do {
|
||||
// no need to double-check iThisIndex -- we know it's a match already
|
||||
const gchar *key = get_key(iThisIndex++);
|
||||
idxs.insert(g_ntohl(get_uint32(key + strlen(key) + 1)));
|
||||
} while (iThisIndex <= iLast && stardict_strcmp(str, get_key(iThisIndex)) == 0);
|
||||
}
|
||||
}
|
||||
return bFound;
|
||||
}
|
||||
|
||||
bool Dict::Lookup(const char *str, glong &idx)
|
||||
bool Dict::Lookup(const char *str, std::set<glong> &idxs, glong &next_idx)
|
||||
{
|
||||
return syn_file->lookup(str, idx) || idx_file->lookup(str, idx);
|
||||
bool found = false;
|
||||
found |= syn_file->lookup(str, idxs, next_idx);
|
||||
found |= idx_file->lookup(str, idxs, next_idx);
|
||||
return found;
|
||||
}
|
||||
|
||||
bool Dict::load(const std::string &ifofilename, bool verbose)
|
||||
@@ -975,120 +1023,8 @@ void Libs::load(const std::list<std::string> &dicts_dirs,
|
||||
});
|
||||
}
|
||||
|
||||
const gchar *Libs::poGetCurrentWord(glong *iCurrent)
|
||||
bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices, int iLib)
|
||||
{
|
||||
const gchar *poCurrentWord = nullptr;
|
||||
const gchar *word;
|
||||
for (std::vector<Dict *>::size_type iLib = 0; iLib < oLib.size(); iLib++) {
|
||||
if (iCurrent[iLib] == INVALID_INDEX)
|
||||
continue;
|
||||
if (iCurrent[iLib] >= narticles(iLib) || iCurrent[iLib] < 0)
|
||||
continue;
|
||||
if (poCurrentWord == nullptr) {
|
||||
poCurrentWord = poGetWord(iCurrent[iLib], iLib);
|
||||
} else {
|
||||
word = poGetWord(iCurrent[iLib], iLib);
|
||||
|
||||
if (stardict_strcmp(poCurrentWord, word) > 0)
|
||||
poCurrentWord = word;
|
||||
}
|
||||
}
|
||||
return poCurrentWord;
|
||||
}
|
||||
|
||||
const gchar *Libs::poGetNextWord(const gchar *sWord, glong *iCurrent)
|
||||
{
|
||||
// the input can be:
|
||||
// (word,iCurrent),read word,write iNext to iCurrent,and return next word. used by TopWin::NextCallback();
|
||||
// (nullptr,iCurrent),read iCurrent,write iNext to iCurrent,and return next word. used by AppCore::ListWords();
|
||||
const gchar *poCurrentWord = nullptr;
|
||||
size_t iCurrentLib = 0;
|
||||
const gchar *word;
|
||||
|
||||
for (size_t iLib = 0; iLib < oLib.size(); ++iLib) {
|
||||
if (sWord)
|
||||
oLib[iLib]->Lookup(sWord, iCurrent[iLib]);
|
||||
if (iCurrent[iLib] == INVALID_INDEX)
|
||||
continue;
|
||||
if (iCurrent[iLib] >= narticles(iLib) || iCurrent[iLib] < 0)
|
||||
continue;
|
||||
if (poCurrentWord == nullptr) {
|
||||
poCurrentWord = poGetWord(iCurrent[iLib], iLib);
|
||||
iCurrentLib = iLib;
|
||||
} else {
|
||||
word = poGetWord(iCurrent[iLib], iLib);
|
||||
|
||||
if (stardict_strcmp(poCurrentWord, word) > 0) {
|
||||
poCurrentWord = word;
|
||||
iCurrentLib = iLib;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (poCurrentWord) {
|
||||
iCurrent[iCurrentLib]++;
|
||||
for (std::vector<Dict *>::size_type iLib = 0; iLib < oLib.size(); iLib++) {
|
||||
if (iLib == iCurrentLib)
|
||||
continue;
|
||||
if (iCurrent[iLib] == INVALID_INDEX)
|
||||
continue;
|
||||
if (iCurrent[iLib] >= narticles(iLib) || iCurrent[iLib] < 0)
|
||||
continue;
|
||||
if (strcmp(poCurrentWord, poGetWord(iCurrent[iLib], iLib)) == 0)
|
||||
iCurrent[iLib]++;
|
||||
}
|
||||
poCurrentWord = poGetCurrentWord(iCurrent);
|
||||
}
|
||||
return poCurrentWord;
|
||||
}
|
||||
|
||||
const gchar *
|
||||
Libs::poGetPreWord(glong *iCurrent)
|
||||
{
|
||||
// used by TopWin::PreviousCallback(); the iCurrent is cached by AppCore::TopWinWordChange();
|
||||
const gchar *poCurrentWord = nullptr;
|
||||
std::vector<Dict *>::size_type iCurrentLib = 0;
|
||||
const gchar *word;
|
||||
|
||||
for (std::vector<Dict *>::size_type iLib = 0; iLib < oLib.size(); iLib++) {
|
||||
if (iCurrent[iLib] == INVALID_INDEX)
|
||||
iCurrent[iLib] = narticles(iLib);
|
||||
else {
|
||||
if (iCurrent[iLib] > narticles(iLib) || iCurrent[iLib] <= 0)
|
||||
continue;
|
||||
}
|
||||
if (poCurrentWord == nullptr) {
|
||||
poCurrentWord = poGetWord(iCurrent[iLib] - 1, iLib);
|
||||
iCurrentLib = iLib;
|
||||
} else {
|
||||
word = poGetWord(iCurrent[iLib] - 1, iLib);
|
||||
if (stardict_strcmp(poCurrentWord, word) < 0) {
|
||||
poCurrentWord = word;
|
||||
iCurrentLib = iLib;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (poCurrentWord) {
|
||||
iCurrent[iCurrentLib]--;
|
||||
for (std::vector<Dict *>::size_type iLib = 0; iLib < oLib.size(); iLib++) {
|
||||
if (iLib == iCurrentLib)
|
||||
continue;
|
||||
if (iCurrent[iLib] > narticles(iLib) || iCurrent[iLib] <= 0)
|
||||
continue;
|
||||
if (strcmp(poCurrentWord, poGetWord(iCurrent[iLib] - 1, iLib)) == 0) {
|
||||
iCurrent[iLib]--;
|
||||
} else {
|
||||
if (iCurrent[iLib] == narticles(iLib))
|
||||
iCurrent[iLib] = INVALID_INDEX;
|
||||
}
|
||||
}
|
||||
}
|
||||
return poCurrentWord;
|
||||
}
|
||||
|
||||
bool Libs::LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib)
|
||||
{
|
||||
glong iIndex;
|
||||
bool bFound = false;
|
||||
gchar *casestr;
|
||||
|
||||
@@ -1096,7 +1032,7 @@ bool Libs::LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib)
|
||||
// to lower case.
|
||||
casestr = g_utf8_strdown(sWord, -1);
|
||||
if (strcmp(casestr, sWord)) {
|
||||
if (oLib[iLib]->Lookup(casestr, iIndex))
|
||||
if (oLib[iLib]->Lookup(casestr, iWordIndices))
|
||||
bFound = true;
|
||||
}
|
||||
g_free(casestr);
|
||||
@@ -1104,7 +1040,7 @@ bool Libs::LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib)
|
||||
if (!bFound) {
|
||||
casestr = g_utf8_strup(sWord, -1);
|
||||
if (strcmp(casestr, sWord)) {
|
||||
if (oLib[iLib]->Lookup(casestr, iIndex))
|
||||
if (oLib[iLib]->Lookup(casestr, iWordIndices))
|
||||
bFound = true;
|
||||
}
|
||||
g_free(casestr);
|
||||
@@ -1118,7 +1054,7 @@ bool Libs::LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib)
|
||||
g_free(firstchar);
|
||||
g_free(nextchar);
|
||||
if (strcmp(casestr, sWord)) {
|
||||
if (oLib[iLib]->Lookup(casestr, iIndex))
|
||||
if (oLib[iLib]->Lookup(casestr, iWordIndices))
|
||||
bFound = true;
|
||||
}
|
||||
g_free(casestr);
|
||||
@@ -1138,12 +1074,12 @@ bool Libs::LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib)
|
||||
if (isupcase || sWord[iWordLen - 1] == 's' || !strncmp(&sWord[iWordLen - 2], "ed", 2)) {
|
||||
strcpy(sNewWord, sWord);
|
||||
sNewWord[iWordLen - 1] = '\0'; // cut "s" or "d"
|
||||
if (oLib[iLib]->Lookup(sNewWord, iIndex))
|
||||
if (oLib[iLib]->Lookup(sNewWord, iWordIndices))
|
||||
bFound = true;
|
||||
else if (isupcase || g_ascii_isupper(sWord[0])) {
|
||||
casestr = g_ascii_strdown(sNewWord, -1);
|
||||
if (strcmp(casestr, sNewWord)) {
|
||||
if (oLib[iLib]->Lookup(casestr, iIndex))
|
||||
if (oLib[iLib]->Lookup(casestr, iWordIndices))
|
||||
bFound = true;
|
||||
}
|
||||
g_free(casestr);
|
||||
@@ -1161,13 +1097,13 @@ bool Libs::LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib)
|
||||
&& !bIsVowel(sNewWord[iWordLen - 4]) && bIsVowel(sNewWord[iWordLen - 5])) { // doubled
|
||||
|
||||
sNewWord[iWordLen - 3] = '\0';
|
||||
if (oLib[iLib]->Lookup(sNewWord, iIndex))
|
||||
if (oLib[iLib]->Lookup(sNewWord, iWordIndices))
|
||||
bFound = true;
|
||||
else {
|
||||
if (isupcase || g_ascii_isupper(sWord[0])) {
|
||||
casestr = g_ascii_strdown(sNewWord, -1);
|
||||
if (strcmp(casestr, sNewWord)) {
|
||||
if (oLib[iLib]->Lookup(casestr, iIndex))
|
||||
if (oLib[iLib]->Lookup(casestr, iWordIndices))
|
||||
bFound = true;
|
||||
}
|
||||
g_free(casestr);
|
||||
@@ -1177,12 +1113,12 @@ bool Libs::LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib)
|
||||
}
|
||||
}
|
||||
if (!bFound) {
|
||||
if (oLib[iLib]->Lookup(sNewWord, iIndex))
|
||||
if (oLib[iLib]->Lookup(sNewWord, iWordIndices))
|
||||
bFound = true;
|
||||
else if (isupcase || g_ascii_isupper(sWord[0])) {
|
||||
casestr = g_ascii_strdown(sNewWord, -1);
|
||||
if (strcmp(casestr, sNewWord)) {
|
||||
if (oLib[iLib]->Lookup(casestr, iIndex))
|
||||
if (oLib[iLib]->Lookup(casestr, iWordIndices))
|
||||
bFound = true;
|
||||
}
|
||||
g_free(casestr);
|
||||
@@ -1200,13 +1136,13 @@ bool Libs::LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib)
|
||||
if (iWordLen > 6 && (sNewWord[iWordLen - 4] == sNewWord[iWordLen - 5])
|
||||
&& !bIsVowel(sNewWord[iWordLen - 5]) && bIsVowel(sNewWord[iWordLen - 6])) { // doubled
|
||||
sNewWord[iWordLen - 4] = '\0';
|
||||
if (oLib[iLib]->Lookup(sNewWord, iIndex))
|
||||
if (oLib[iLib]->Lookup(sNewWord, iWordIndices))
|
||||
bFound = true;
|
||||
else {
|
||||
if (isupcase || g_ascii_isupper(sWord[0])) {
|
||||
casestr = g_ascii_strdown(sNewWord, -1);
|
||||
if (strcmp(casestr, sNewWord)) {
|
||||
if (oLib[iLib]->Lookup(casestr, iIndex))
|
||||
if (oLib[iLib]->Lookup(casestr, iWordIndices))
|
||||
bFound = true;
|
||||
}
|
||||
g_free(casestr);
|
||||
@@ -1216,12 +1152,12 @@ bool Libs::LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib)
|
||||
}
|
||||
}
|
||||
if (!bFound) {
|
||||
if (oLib[iLib]->Lookup(sNewWord, iIndex))
|
||||
if (oLib[iLib]->Lookup(sNewWord, iWordIndices))
|
||||
bFound = true;
|
||||
else if (isupcase || g_ascii_isupper(sWord[0])) {
|
||||
casestr = g_ascii_strdown(sNewWord, -1);
|
||||
if (strcmp(casestr, sNewWord)) {
|
||||
if (oLib[iLib]->Lookup(casestr, iIndex))
|
||||
if (oLib[iLib]->Lookup(casestr, iWordIndices))
|
||||
bFound = true;
|
||||
}
|
||||
g_free(casestr);
|
||||
@@ -1232,12 +1168,12 @@ bool Libs::LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib)
|
||||
strcat(sNewWord, "E"); // add a char "E"
|
||||
else
|
||||
strcat(sNewWord, "e"); // add a char "e"
|
||||
if (oLib[iLib]->Lookup(sNewWord, iIndex))
|
||||
if (oLib[iLib]->Lookup(sNewWord, iWordIndices))
|
||||
bFound = true;
|
||||
else if (isupcase || g_ascii_isupper(sWord[0])) {
|
||||
casestr = g_ascii_strdown(sNewWord, -1);
|
||||
if (strcmp(casestr, sNewWord)) {
|
||||
if (oLib[iLib]->Lookup(casestr, iIndex))
|
||||
if (oLib[iLib]->Lookup(casestr, iWordIndices))
|
||||
bFound = true;
|
||||
}
|
||||
g_free(casestr);
|
||||
@@ -1252,12 +1188,12 @@ bool Libs::LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib)
|
||||
if (isupcase || (!strncmp(&sWord[iWordLen - 2], "es", 2) && (sWord[iWordLen - 3] == 's' || sWord[iWordLen - 3] == 'x' || sWord[iWordLen - 3] == 'o' || (iWordLen > 4 && sWord[iWordLen - 3] == 'h' && (sWord[iWordLen - 4] == 'c' || sWord[iWordLen - 4] == 's'))))) {
|
||||
strcpy(sNewWord, sWord);
|
||||
sNewWord[iWordLen - 2] = '\0';
|
||||
if (oLib[iLib]->Lookup(sNewWord, iIndex))
|
||||
if (oLib[iLib]->Lookup(sNewWord, iWordIndices))
|
||||
bFound = true;
|
||||
else if (isupcase || g_ascii_isupper(sWord[0])) {
|
||||
casestr = g_ascii_strdown(sNewWord, -1);
|
||||
if (strcmp(casestr, sNewWord)) {
|
||||
if (oLib[iLib]->Lookup(casestr, iIndex))
|
||||
if (oLib[iLib]->Lookup(casestr, iWordIndices))
|
||||
bFound = true;
|
||||
}
|
||||
g_free(casestr);
|
||||
@@ -1274,13 +1210,13 @@ bool Libs::LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib)
|
||||
if (iWordLen > 5 && (sNewWord[iWordLen - 3] == sNewWord[iWordLen - 4])
|
||||
&& !bIsVowel(sNewWord[iWordLen - 4]) && bIsVowel(sNewWord[iWordLen - 5])) { // doubled
|
||||
sNewWord[iWordLen - 3] = '\0';
|
||||
if (oLib[iLib]->Lookup(sNewWord, iIndex))
|
||||
if (oLib[iLib]->Lookup(sNewWord, iWordIndices))
|
||||
bFound = true;
|
||||
else {
|
||||
if (isupcase || g_ascii_isupper(sWord[0])) {
|
||||
casestr = g_ascii_strdown(sNewWord, -1);
|
||||
if (strcmp(casestr, sNewWord)) {
|
||||
if (oLib[iLib]->Lookup(casestr, iIndex))
|
||||
if (oLib[iLib]->Lookup(casestr, iWordIndices))
|
||||
bFound = true;
|
||||
}
|
||||
g_free(casestr);
|
||||
@@ -1290,12 +1226,12 @@ bool Libs::LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib)
|
||||
}
|
||||
}
|
||||
if (!bFound) {
|
||||
if (oLib[iLib]->Lookup(sNewWord, iIndex))
|
||||
if (oLib[iLib]->Lookup(sNewWord, iWordIndices))
|
||||
bFound = true;
|
||||
else if (isupcase || g_ascii_isupper(sWord[0])) {
|
||||
casestr = g_ascii_strdown(sNewWord, -1);
|
||||
if (strcmp(casestr, sNewWord)) {
|
||||
if (oLib[iLib]->Lookup(casestr, iIndex))
|
||||
if (oLib[iLib]->Lookup(casestr, iWordIndices))
|
||||
bFound = true;
|
||||
}
|
||||
g_free(casestr);
|
||||
@@ -1314,12 +1250,12 @@ bool Libs::LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib)
|
||||
strcat(sNewWord, "Y"); // add a char "Y"
|
||||
else
|
||||
strcat(sNewWord, "y"); // add a char "y"
|
||||
if (oLib[iLib]->Lookup(sNewWord, iIndex))
|
||||
if (oLib[iLib]->Lookup(sNewWord, iWordIndices))
|
||||
bFound = true;
|
||||
else if (isupcase || g_ascii_isupper(sWord[0])) {
|
||||
casestr = g_ascii_strdown(sNewWord, -1);
|
||||
if (strcmp(casestr, sNewWord)) {
|
||||
if (oLib[iLib]->Lookup(casestr, iIndex))
|
||||
if (oLib[iLib]->Lookup(casestr, iWordIndices))
|
||||
bFound = true;
|
||||
}
|
||||
g_free(casestr);
|
||||
@@ -1337,12 +1273,12 @@ bool Libs::LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib)
|
||||
strcat(sNewWord, "Y"); // add a char "Y"
|
||||
else
|
||||
strcat(sNewWord, "y"); // add a char "y"
|
||||
if (oLib[iLib]->Lookup(sNewWord, iIndex))
|
||||
if (oLib[iLib]->Lookup(sNewWord, iWordIndices))
|
||||
bFound = true;
|
||||
else if (isupcase || g_ascii_isupper(sWord[0])) {
|
||||
casestr = g_ascii_strdown(sNewWord, -1);
|
||||
if (strcmp(casestr, sNewWord)) {
|
||||
if (oLib[iLib]->Lookup(casestr, iIndex))
|
||||
if (oLib[iLib]->Lookup(casestr, iWordIndices))
|
||||
bFound = true;
|
||||
}
|
||||
g_free(casestr);
|
||||
@@ -1356,12 +1292,12 @@ bool Libs::LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib)
|
||||
if (isupcase || (!strncmp(&sWord[iWordLen - 2], "er", 2))) {
|
||||
strcpy(sNewWord, sWord);
|
||||
sNewWord[iWordLen - 2] = '\0';
|
||||
if (oLib[iLib]->Lookup(sNewWord, iIndex))
|
||||
if (oLib[iLib]->Lookup(sNewWord, iWordIndices))
|
||||
bFound = true;
|
||||
else if (isupcase || g_ascii_isupper(sWord[0])) {
|
||||
casestr = g_ascii_strdown(sNewWord, -1);
|
||||
if (strcmp(casestr, sNewWord)) {
|
||||
if (oLib[iLib]->Lookup(casestr, iIndex))
|
||||
if (oLib[iLib]->Lookup(casestr, iWordIndices))
|
||||
bFound = true;
|
||||
}
|
||||
g_free(casestr);
|
||||
@@ -1375,12 +1311,12 @@ bool Libs::LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib)
|
||||
if (isupcase || (!strncmp(&sWord[iWordLen - 3], "est", 3))) {
|
||||
strcpy(sNewWord, sWord);
|
||||
sNewWord[iWordLen - 3] = '\0';
|
||||
if (oLib[iLib]->Lookup(sNewWord, iIndex))
|
||||
if (oLib[iLib]->Lookup(sNewWord, iWordIndices))
|
||||
bFound = true;
|
||||
else if (isupcase || g_ascii_isupper(sWord[0])) {
|
||||
casestr = g_ascii_strdown(sNewWord, -1);
|
||||
if (strcmp(casestr, sNewWord)) {
|
||||
if (oLib[iLib]->Lookup(casestr, iIndex))
|
||||
if (oLib[iLib]->Lookup(casestr, iWordIndices))
|
||||
bFound = true;
|
||||
}
|
||||
g_free(casestr);
|
||||
@@ -1390,9 +1326,6 @@ bool Libs::LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib)
|
||||
|
||||
g_free(sNewWord);
|
||||
}
|
||||
|
||||
if (bFound)
|
||||
iWordIndex = iIndex;
|
||||
#if 0
|
||||
else {
|
||||
//don't change iWordIndex here.
|
||||
@@ -1403,11 +1336,11 @@ bool Libs::LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib)
|
||||
return bFound;
|
||||
}
|
||||
|
||||
bool Libs::SimpleLookupWord(const gchar *sWord, glong &iWordIndex, int iLib)
|
||||
bool Libs::SimpleLookupWord(const gchar *sWord, std::set<glong> &iWordIndices, int iLib)
|
||||
{
|
||||
bool bFound = oLib[iLib]->Lookup(sWord, iWordIndex);
|
||||
bool bFound = oLib[iLib]->Lookup(sWord, iWordIndices);
|
||||
if (!bFound && fuzzy_)
|
||||
bFound = LookupSimilarWord(sWord, iWordIndex, iLib);
|
||||
bFound = LookupSimilarWord(sWord, iWordIndices, iLib);
|
||||
return bFound;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
@@ -96,17 +95,27 @@ public:
|
||||
virtual const gchar *get_key(glong idx) = 0;
|
||||
virtual void get_data(glong idx) = 0;
|
||||
virtual const gchar *get_key_and_data(glong idx) = 0;
|
||||
virtual bool lookup(const char *str, glong &idx) = 0;
|
||||
virtual bool lookup(const char *str, std::set<glong> &idxs, glong &next_idx) = 0;
|
||||
virtual bool lookup(const char *str, std::set<glong> &idxs)
|
||||
{
|
||||
glong unused_next_idx;
|
||||
return lookup(str, idxs, unused_next_idx);
|
||||
};
|
||||
};
|
||||
|
||||
class SynFile
|
||||
{
|
||||
public:
|
||||
SynFile() {}
|
||||
~SynFile() {}
|
||||
bool load(const std::string &url, gulong wc);
|
||||
bool lookup(const char *str, glong &idx);
|
||||
bool lookup(const char *str, std::set<glong> &idxs, glong &next_idx);
|
||||
bool lookup(const char *str, std::set<glong> &idxs);
|
||||
const gchar *get_key(glong idx) { return synlist[idx]; }
|
||||
|
||||
private:
|
||||
std::map<std::string, gulong> synonyms;
|
||||
MapFile synfile;
|
||||
std::vector<gchar *> synlist;
|
||||
};
|
||||
|
||||
class Dict : public DictBase
|
||||
@@ -133,7 +142,12 @@ public:
|
||||
*offset = idx_file->wordentry_offset;
|
||||
*size = idx_file->wordentry_size;
|
||||
}
|
||||
bool Lookup(const char *str, glong &idx);
|
||||
bool Lookup(const char *str, std::set<glong> &idxs, glong &next_idx);
|
||||
bool Lookup(const char *str, std::set<glong> &idxs)
|
||||
{
|
||||
glong unused_next_idx;
|
||||
return Lookup(str, idxs, unused_next_idx);
|
||||
}
|
||||
|
||||
bool LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen);
|
||||
|
||||
@@ -181,15 +195,12 @@ public:
|
||||
return nullptr;
|
||||
return oLib[iLib]->get_data(iIndex);
|
||||
}
|
||||
const gchar *poGetCurrentWord(glong *iCurrent);
|
||||
const gchar *poGetNextWord(const gchar *word, glong *iCurrent);
|
||||
const gchar *poGetPreWord(glong *iCurrent);
|
||||
bool LookupWord(const gchar *sWord, glong &iWordIndex, int iLib)
|
||||
bool LookupWord(const gchar *sWord, std::set<glong> &iWordIndices, int iLib)
|
||||
{
|
||||
return oLib[iLib]->Lookup(sWord, iWordIndex);
|
||||
return oLib[iLib]->Lookup(sWord, iWordIndices);
|
||||
}
|
||||
bool LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib);
|
||||
bool SimpleLookupWord(const gchar *sWord, glong &iWordIndex, int iLib);
|
||||
bool LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices, int iLib);
|
||||
bool SimpleLookupWord(const gchar *sWord, std::set<glong> &iWordIndices, int iLib);
|
||||
|
||||
bool LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_size);
|
||||
gint LookupWithRule(const gchar *sWord, gchar *reslist[]);
|
||||
|
||||
0
tests/not-unix-newlines-ifo/russian/russian.dict
Normal file
0
tests/not-unix-newlines-ifo/russian/russian.dict
Normal file
0
tests/not-unix-newlines-ifo/russian/russian.idx
Normal file
0
tests/not-unix-newlines-ifo/russian/russian.idx
Normal file
9
tests/not-unix-newlines-ifo/russian/russian.ifo
Normal file
9
tests/not-unix-newlines-ifo/russian/russian.ifo
Normal file
@@ -0,0 +1,9 @@
|
||||
StarDict's dict ifo file
|
||||
version=3.0.0
|
||||
bookname=Russian-English Dictionary (ru-en)
|
||||
wordcount=415144
|
||||
idxfilesize=12344255
|
||||
sametypesequence=h
|
||||
synwordcount=1277580
|
||||
author=Vuizur
|
||||
description=
|
||||
0
tests/not-unix-newlines-ifo/russian/russian.syn
Normal file
0
tests/not-unix-newlines-ifo/russian/russian.syn
Normal file
BIN
tests/stardict-test_multiple_results-2.4.2/test.dict
Normal file
BIN
tests/stardict-test_multiple_results-2.4.2/test.dict
Normal file
Binary file not shown.
BIN
tests/stardict-test_multiple_results-2.4.2/test.idx
Normal file
BIN
tests/stardict-test_multiple_results-2.4.2/test.idx
Normal file
Binary file not shown.
7
tests/stardict-test_multiple_results-2.4.2/test.ifo
Normal file
7
tests/stardict-test_multiple_results-2.4.2/test.ifo
Normal file
@@ -0,0 +1,7 @@
|
||||
StarDict's dict ifo file
|
||||
version=3.0.0
|
||||
bookname=Test multiple results
|
||||
wordcount=246
|
||||
idxfilesize=5977
|
||||
synwordcount=124
|
||||
description=
|
||||
BIN
tests/stardict-test_multiple_results-2.4.2/test.syn
Normal file
BIN
tests/stardict-test_multiple_results-2.4.2/test.syn
Normal file
Binary file not shown.
@@ -18,8 +18,15 @@ test_json() {
|
||||
fi
|
||||
}
|
||||
|
||||
test_json '[{"name": "Test synonyms", "wordcount": "2"},{"name": "Sample 1 test dictionary", "wordcount": "1"},{"name": "test_dict", "wordcount": "1"}]' -x -j -l -n --data-dir "$TEST_DIR"
|
||||
test_json '[{"name": "Russian-English Dictionary (ru-en)", "wordcount": "415144"},
|
||||
{"name": "Test synonyms", "wordcount": "2"},
|
||||
{"name": "Test multiple results", "wordcount": "246"},
|
||||
{"name": "Sample 1 test dictionary", "wordcount": "1"},
|
||||
{"name": "test_dict", "wordcount": "1"}]' -x -j -l -n --data-dir "$TEST_DIR"
|
||||
test_json '[{"dict": "Test synonyms","word":"test","definition":"\u000aresult of test"}]' -x -j -n --data-dir "$TEST_DIR" foo
|
||||
test_json '[]' -x -j -n --data-dir "$TEST_DIR" foobarbaaz
|
||||
|
||||
# Test multiple searches, with the first failing.
|
||||
test_json '[][{"dict": "Test synonyms","word":"test","definition":"\u000aresult of test"}]' -x -j -n --data-dir "$TEST_DIR" foobarbaaz foo
|
||||
|
||||
exit 0
|
||||
|
||||
67
tests/t_multiple_results
Executable file
67
tests/t_multiple_results
Executable file
@@ -0,0 +1,67 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
SDCV="$1"
|
||||
TEST_DIR="$2"
|
||||
|
||||
unset SDCV_PAGER
|
||||
unset STARDICT_DATA_DIR
|
||||
|
||||
test_json() {
|
||||
word="$1"
|
||||
jq_cmp="$2"
|
||||
result="$("$SDCV" --data-dir "$TEST_DIR" -exjn "$word" | sed 's|\\n|\\u000a|g')"
|
||||
cmp_result="$(echo "$result" | jq "$jq_cmp")"
|
||||
if [ "$cmp_result" != "true" ]; then
|
||||
echo "expected '$jq_cmp' to return true, but $result didn't"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Basic two-result search for the same headword.
|
||||
test_json bark \
|
||||
'. == [
|
||||
{"dict":"Test multiple results","word":"bark","definition":"\u000aThe harsh sound made by a dog."},
|
||||
{"dict":"Test multiple results","word":"bark","definition":"\u000aThe tough outer covering of trees and other woody plants."}
|
||||
]'
|
||||
|
||||
# Multi-result search where one word exists as both a synyonym and a separate
|
||||
# headword. This ensures that if there is a matching synyonym we don't skip the
|
||||
# regular search.
|
||||
test_json cat \
|
||||
'. == [
|
||||
{"dict":"Test multiple results","word":"cat","definition":"\u000aA cute animal which (rarely) barks."},
|
||||
{"dict":"Test multiple results","word":"lion","definition":"\u000aA larger cat which might bite your head off."},
|
||||
{"dict":"Test multiple results","word":"panther","definition":"\u000aI know very little about panthers, sorry."}
|
||||
]'
|
||||
|
||||
# Many-result search for a word that matches 120 distinct headwords.
|
||||
test_json many_headwords 'length == 120'
|
||||
test_json many_headwords 'all(.word == "many_headwords")'
|
||||
test_json many_headwords \
|
||||
'to_entries | map(.value.definition == "\u000aDefinition for [many_headwords] entry #\(.key+1) (same headword).") | all'
|
||||
|
||||
# Many-result search for 120 words that have the same synonym.
|
||||
test_json many_synonyms 'length == 120'
|
||||
test_json many_synonyms \
|
||||
'to_entries | map(.value.word == "many_synonyms-\(.key+101)") | all'
|
||||
test_json many_synonyms \
|
||||
'to_entries | map(.value.definition == "\u000aDefinition for [many_synonyms-\(.key+101)] (same synonym).") | all'
|
||||
|
||||
# Ensure that we don't return more than one result even if a word can be
|
||||
# resolved in more than one way.
|
||||
#
|
||||
# Most well-formed dictionaries don't have entries like this (it basically
|
||||
# requires you to have a dictionary where there is a synonym that is identical
|
||||
# to a word's headword or multiple identical synyonym entries).
|
||||
#
|
||||
# This entry was created by creating extra synonyms with different names then
|
||||
# modifying the .syn file manually.
|
||||
test_json many_resolution_paths \
|
||||
'. == [
|
||||
{"dict":"Test multiple results","word":"many_resolution_paths",
|
||||
"definition":"\u000aDefinition for [many_resolution_paths] headword (same word, multiple synonym entries)."}
|
||||
]'
|
||||
|
||||
exit 0
|
||||
18
tests/t_newlines_in_ifo
Executable file
18
tests/t_newlines_in_ifo
Executable file
@@ -0,0 +1,18 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
PATH_TO_SDCV="$1"
|
||||
TEST_DIR="$2"
|
||||
|
||||
unset SDCV_PAGER
|
||||
unset STARDICT_DATA_DIR
|
||||
|
||||
RES=$("$PATH_TO_SDCV" -n -x --data-dir="$TEST_DIR/not-unix-newlines-ifo" -l | tail -n 1)
|
||||
|
||||
if [ "$RES" = "Russian-English Dictionary (ru-en) 415144" ]; then
|
||||
exit 0
|
||||
else
|
||||
echo "test failed, unexpected result: $RES" >&2
|
||||
exit 1
|
||||
fi
|
||||
Reference in New Issue
Block a user