19 Commits

Author SHA1 Message Date
Evgeniy A. Dushistov
49c8094b53 version 0.5.5 2023-04-18 21:47:55 +03:00
Evgeniy A. Dushistov
4346e65bd3 fix CI build: ubuntu-18.04 not supported by github actions anymore 2023-04-18 21:44:18 +03:00
Evgeniy A. Dushistov
d144e0310c fix CI build 2023-01-16 16:44:09 +03:00
NiLuJe
6e36e7730c Warn on unknown dicts 2022-09-16 18:48:08 +03:00
NiLuJe
abe5e9e72f Check accesses to the bookname_to_ifo std::map
Avoid crashes when passing unknown dicts to the -u flag

Fix #87
2022-09-16 18:48:08 +03:00
NiLuJe
488ec68854 Use off_t for stuff mainly assigned to a stat.st_size value
Allows simplifying the mmap sanity checks in mapfile, and actually
ensuring they won't break when -D_FILE_OFFSET_BITS=64
2022-09-14 22:12:29 +03:00
Marcelino Alberdi Pereira
b698445ead Add a small summary of the project to the README 2022-09-07 17:51:13 +03:00
Evgeniy A. Dushistov
504e7807e6 add information about 0.5.4 into NEWS 2022-06-24 21:49:00 +03:00
Evgeniy A. Dushistov
6c80bf2d99 t_json: add data about new dictionary 2022-06-24 21:34:47 +03:00
Evgeniy A. Dushistov
8742575c33 fix bash syntax error 2022-06-24 21:34:47 +03:00
Evgeniy A. Dushistov
b294b76fb5 check file size before mapping on linux 2022-06-24 21:34:47 +03:00
Evgeniy A. Dushistov
823ec3d840 clang-format for mapfile 2022-06-24 21:34:47 +03:00
Evgeniy A. Dushistov
6ab8b51e6c version 0.5.4 2022-06-24 21:34:47 +03:00
Evgeniy A. Dushistov
881657b336 Revert "replace deprecated g_pattern_match_string function"
This reverts commit 452a4e07fb.
2022-06-24 21:34:47 +03:00
Evgeniy A. Dushistov
911fc2f561 more robust parsing of ifo file
fixes #79 fixes #81
2022-06-24 21:34:47 +03:00
Evgeniy A. Dushistov
f488f5350b stardict_lib.hpp: remove unused headers plus clang-format 2022-06-24 21:34:47 +03:00
Evgeniy A. Dushistov
e72220e748 use cmake to check if compiler supports c++11 2022-06-24 21:34:47 +03:00
Evgeniy A. Dushistov
b77c0e793a replace deprecated g_pattern_match_string function 2022-06-24 21:34:47 +03:00
Evgeniy A. Dushistov
ebaa6f2136 clang-format for stardict_lib.cpp 2022-06-24 21:34:47 +03:00
17 changed files with 211 additions and 176 deletions

View File

@@ -15,7 +15,7 @@ BreakBeforeBinaryOperators: true
BreakBeforeTernaryOperators: true BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: true BreakConstructorInitializersBeforeComma: true
BinPackParameters: true BinPackParameters: true
ColumnLimit: 0 ColumnLimit: 120
ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerAllOnOneLineOrOnePerLine: false
DerivePointerAlignment: false DerivePointerAlignment: false
ExperimentalAutoDetectBinPacking: false ExperimentalAutoDetectBinPacking: false

View File

@@ -20,12 +20,13 @@ jobs:
fail-fast: true fail-fast: true
matrix: matrix:
os: [ubuntu-latest] os: [ubuntu-20.04, ubuntu-latest]
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v3
with: with:
submodules: 'recursive' submodules: 'recursive'
- uses: jwlawson/actions-setup-cmake@v1.4 - uses: jwlawson/actions-setup-cmake@v1.4
if: matrix.os != 'ubuntu-latest'
with: with:
cmake-version: '3.5.1' cmake-version: '3.5.1'
github-api-token: ${{ secrets.GITHUB_TOKEN }} github-api-token: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -3,6 +3,10 @@ project(sdcv)
cmake_minimum_required(VERSION 3.5 FATAL_ERROR) cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
cmake_policy(VERSION 3.5) cmake_policy(VERSION 3.5)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED True)
set(CMAKE_CXX_EXTENSIONS False)
include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/compiler.cmake") include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/compiler.cmake")
set(ZLIB_FIND_REQUIRED True) set(ZLIB_FIND_REQUIRED True)
@@ -91,7 +95,7 @@ set(CPACK_PACKAGE_VENDOR "Evgeniy Dushistov <dushistov@mail.ru>")
set(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README.org") set(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README.org")
set(CPACK_PACKAGE_VERSION_MAJOR "0") set(CPACK_PACKAGE_VERSION_MAJOR "0")
set(CPACK_PACKAGE_VERSION_MINOR "5") set(CPACK_PACKAGE_VERSION_MINOR "5")
set(CPACK_PACKAGE_VERSION_PATCH "3") set(CPACK_PACKAGE_VERSION_PATCH "5")
set(sdcv_VERSION set(sdcv_VERSION
"${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}") "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
@@ -144,5 +148,6 @@ if (BUILD_TESTS)
add_sdcv_shell_test(t_datadir) add_sdcv_shell_test(t_datadir)
add_sdcv_shell_test(t_return_code) add_sdcv_shell_test(t_return_code)
add_sdcv_shell_test(t_multiple_results) add_sdcv_shell_test(t_multiple_results)
add_sdcv_shell_test(t_newlines_in_ifo)
endif (BUILD_TESTS) endif (BUILD_TESTS)

12
NEWS
View File

@@ -1,3 +1,13 @@
Version 0.5.5
- Avoid crashes when passing unknown dicts to the -u flag (by NiLuJe)
- Use off_t for stuff mainly assigned to a stat.st_size value
Version 0.5.4
- Use binary search for synonyms
- Various improvments in work with synonyms
- Added --json (same as --json-output) to match man
- Show all matched result
- More robust parsing of ifo file
- Prevent crash if file size of files not matched expecting one for .oft files
Version 0.5.3 Version 0.5.3
- Use single quotes around JSON data to reduce need for escaping - Use single quotes around JSON data to reduce need for escaping
- Store integer magic in cache file - Store integer magic in cache file
@@ -36,7 +46,7 @@ Version 0.4.2
* Russian translation update * Russian translation update
Version 0.4.1 Version 0.4.1
* Recreate cache if idx file was modified * Recreate cache if idx file was modified
* Abbility to use pager(SDCV_PAGER) * Abbility to use pager(SDCV_PAGER)
* Add Chinese (traditional) translation * Add Chinese (traditional) translation
* Add Ukrainian translation * Add Ukrainian translation

View File

@@ -1,6 +1,9 @@
#+OPTIONS: ^:nil #+OPTIONS: ^:nil
[[https://github.com/Dushistov/sdcv/actions?query=workflow%3ACI+branch%3Amaster][https://github.com/Dushistov/sdcv/workflows/CI/badge.svg]] [[https://github.com/Dushistov/sdcv/actions?query=workflow%3ACI+branch%3Amaster][https://github.com/Dushistov/sdcv/workflows/CI/badge.svg]]
[[https://github.com/Dushistov/sdcv/blob/master/LICENSE][https://img.shields.io/badge/license-GPL%202-brightgreen.svg]] [[https://github.com/Dushistov/sdcv/blob/master/LICENSE][https://img.shields.io/badge/license-GPL%202-brightgreen.svg]]
* sdcv
*sdcv* is a simple, cross-platform, text-based utility for working with dictionaries in [[http://stardict-4.sourceforge.net/][StarDict]] format.
* How to compile and install * How to compile and install
#+BEGIN_SRC sh #+BEGIN_SRC sh
mkdir /tmp/build-sdcv mkdir /tmp/build-sdcv

View File

@@ -16,19 +16,6 @@ if (NOT DEFINED SDCV_COMPILER_IS_GCC_COMPATIBLE)
endif() endif()
endif() endif()
if (MSVC AND (MSVC_VERSION LESS 1900))
message(FATAL_ERROR "MSVC version ${MSVC_VERSION} have no full c++11 support")
elseif (MSVC)
add_definitions(-DNOMINMAX)
elseif (NOT MSVC)
check_cxx_compiler_flag("-std=c++11" CXX_SUPPORTS_CXX11)
if (CXX_SUPPORTS_CXX11)
append("-std=c++11" CMAKE_CXX_FLAGS)
else ()
message(FATAL_ERROR "sdcv requires C++11 support but the '-std=c++11' flag isn't supported.")
endif()
endif ()
if (SDCV_COMPILER_IS_GCC_COMPATIBLE) if (SDCV_COMPILER_IS_GCC_COMPATIBLE)
append("-Wall" "-Wextra" "-Wformat-security" "-Wcast-align" "-Werror=format" "-Wcast-qual" CMAKE_C_FLAGS) append("-Wall" "-Wextra" "-Wformat-security" "-Wcast-align" "-Werror=format" "-Wcast-qual" CMAKE_C_FLAGS)
append("-Wall" "-pedantic" "-Wextra" "-Wformat-security" "-Wcast-align" "-Werror=format" "-Wcast-qual" CMAKE_CXX_FLAGS) append("-Wall" "-pedantic" "-Wextra" "-Wformat-security" "-Wcast-align" "-Werror=format" "-Wcast-qual" CMAKE_CXX_FLAGS)

View File

@@ -27,7 +27,7 @@ public:
private: private:
const char *start; /* start of mmap'd area */ const char *start; /* start of mmap'd area */
const char *end; /* end of mmap'd area */ const char *end; /* end of mmap'd area */
unsigned long size; /* size of mmap */ off_t size; /* size of mmap */
int type; int type;
z_stream zStream; z_stream zStream;
@@ -47,7 +47,7 @@ private:
std::string origFilename; std::string origFilename;
std::string comment; std::string comment;
unsigned long crc; unsigned long crc;
unsigned long length; off_t length;
unsigned long compressedLength; unsigned long compressedLength;
DictCache cache[DICT_CACHE_SIZE]; DictCache cache[DICT_CACHE_SIZE];
MapFile mapfile; MapFile mapfile;

View File

@@ -7,6 +7,7 @@
#ifdef HAVE_MMAP #ifdef HAVE_MMAP
#include <fcntl.h> #include <fcntl.h>
#include <sys/mman.h> #include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h> #include <sys/types.h>
#endif #endif
#ifdef _WIN32 #ifdef _WIN32
@@ -21,13 +22,13 @@ public:
~MapFile(); ~MapFile();
MapFile(const MapFile &) = delete; MapFile(const MapFile &) = delete;
MapFile &operator=(const MapFile &) = delete; MapFile &operator=(const MapFile &) = delete;
bool open(const char *file_name, unsigned long file_size); bool open(const char *file_name, off_t file_size);
gchar *begin() { return data; } gchar *begin() { return data; }
private: private:
char *data = nullptr; char *data = nullptr;
unsigned long size = 0ul;
#ifdef HAVE_MMAP #ifdef HAVE_MMAP
size_t size = 0u;
int mmap_fd = -1; int mmap_fd = -1;
#elif defined(_WIN32) #elif defined(_WIN32)
HANDLE hFile = 0; HANDLE hFile = 0;
@@ -35,25 +36,31 @@ private:
#endif #endif
}; };
inline bool MapFile::open(const char *file_name, unsigned long file_size) inline bool MapFile::open(const char *file_name, off_t file_size)
{ {
size = file_size;
#ifdef HAVE_MMAP #ifdef HAVE_MMAP
if ((mmap_fd = ::open(file_name, O_RDONLY)) < 0) { if ((mmap_fd = ::open(file_name, O_RDONLY)) < 0) {
//g_print("Open file %s failed!\n",fullfilename); // g_print("Open file %s failed!\n",fullfilename);
return false; return false;
} }
data = (gchar *)mmap(nullptr, file_size, PROT_READ, MAP_SHARED, mmap_fd, 0); struct stat st;
if (fstat(mmap_fd, &st) == -1 || st.st_size < 0 || (st.st_size == 0 && S_ISREG(st.st_mode))
|| st.st_size != file_size) {
close(mmap_fd);
return false;
}
size = static_cast<size_t>(st.st_size);
data = (gchar *)mmap(nullptr, size, PROT_READ, MAP_SHARED, mmap_fd, 0);
if ((void *)data == (void *)(-1)) { if ((void *)data == (void *)(-1)) {
//g_print("mmap file %s failed!\n",idxfilename); // g_print("mmap file %s failed!\n",idxfilename);
size = 0u;
data = nullptr; data = nullptr;
return false; return false;
} }
#elif defined(_WIN32) #elif defined(_WIN32)
hFile = CreateFile(file_name, GENERIC_READ, 0, nullptr, OPEN_ALWAYS, hFile = CreateFile(file_name, GENERIC_READ, 0, nullptr, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0);
FILE_ATTRIBUTE_NORMAL, 0); hFileMap = CreateFileMapping(hFile, nullptr, PAGE_READONLY, 0, file_size, nullptr);
hFileMap = CreateFileMapping(hFile, nullptr, PAGE_READONLY, 0,
file_size, nullptr);
data = (gchar *)MapViewOfFile(hFileMap, FILE_MAP_READ, 0, 0, file_size); data = (gchar *)MapViewOfFile(hFileMap, FILE_MAP_READ, 0, 0, file_size);
#else #else
gsize read_len; gsize read_len;

View File

@@ -186,10 +186,13 @@ try {
} }
// add bookname to list // add bookname to list
gchar **p = get_impl(use_dict_list); for (gchar **p = get_impl(use_dict_list); *p != nullptr; ++p) {
while (*p) { auto it = bookname_to_ifo.find(*p);
order_list.push_back(bookname_to_ifo.at(*p)); if (it != bookname_to_ifo.end()) {
++p; order_list.push_back(it->second);
} else {
fprintf(stderr, _("Unknown dictionary: %s\n"), *p);
}
} }
} else { } else {
std::string ordering_cfg_file = std::string(g_get_user_config_dir()) + G_DIR_SEPARATOR_S "sdcv_ordering"; std::string ordering_cfg_file = std::string(g_get_user_config_dir()) + G_DIR_SEPARATOR_S "sdcv_ordering";
@@ -201,7 +204,12 @@ try {
if (ordering_file != nullptr) { if (ordering_file != nullptr) {
std::string line; std::string line;
while (stdio_getline(ordering_file, line)) { while (stdio_getline(ordering_file, line)) {
order_list.push_back(bookname_to_ifo.at(line)); auto it = bookname_to_ifo.find(line);
if (it != bookname_to_ifo.end()) {
order_list.push_back(it->second);
} else {
fprintf(stderr, _("Unknown dictionary: %s\n"), line.c_str());
}
} }
fclose(ordering_file); fclose(ordering_file);
} }

View File

@@ -5,6 +5,7 @@
#include <algorithm> #include <algorithm>
#include <cctype> #include <cctype>
#include <cstring> #include <cstring>
#include <map>
#include <stdexcept> #include <stdexcept>
#include <glib/gstdio.h> #include <glib/gstdio.h>
@@ -47,9 +48,9 @@ static bool bIsPureEnglish(const gchar *str)
{ {
// i think this should work even when it is UTF8 string :). // i think this should work even when it is UTF8 string :).
for (int i = 0; str[i] != 0; i++) for (int i = 0; str[i] != 0; i++)
//if(str[i]<0) // if(str[i]<0)
//if(str[i]<32 || str[i]>126) // tab equal 9,so this is not OK. // if(str[i]<32 || str[i]>126) // tab equal 9,so this is not OK.
// Better use isascii() but not str[i]<0 while char is default unsigned in arm // Better use isascii() but not str[i]<0 while char is default unsigned in arm
if (!isascii(str[i])) if (!isascii(str[i]))
return false; return false;
return true; return true;
@@ -78,108 +79,93 @@ bool DictInfo::load_from_ifo_file(const std::string &ifofilename,
{ {
ifo_file_name = ifofilename; ifo_file_name = ifofilename;
glib::CharStr buffer; glib::CharStr buffer;
if (!g_file_get_contents(ifofilename.c_str(), get_addr(buffer), nullptr, nullptr)) gsize length = 0;
if (!g_file_get_contents(ifofilename.c_str(), get_addr(buffer), &length, nullptr)) {
fprintf(stderr, "Can not read from %s\n", ifofilename.c_str());
return false; return false;
}
static const char TREEDICT_MAGIC_DATA[] = "StarDict's treedict ifo file"; static const char TREEDICT_MAGIC_DATA[] = "StarDict's treedict ifo file";
static const char DICT_MAGIC_DATA[] = "StarDict's dict ifo file"; static const char DICT_MAGIC_DATA[] = "StarDict's dict ifo file";
const gchar *magic_data = istreedict ? TREEDICT_MAGIC_DATA : DICT_MAGIC_DATA; const gchar *magic_data = istreedict ? TREEDICT_MAGIC_DATA : DICT_MAGIC_DATA;
static const unsigned char utf8_bom[] = { 0xEF, 0xBB, 0xBF, '\0' }; static const gchar utf8_bom[] = { (gchar)0xEF, (gchar)0xBB, (gchar)0xBF, '\0' };
if (!g_str_has_prefix(
g_str_has_prefix(get_impl(buffer), (const gchar *)(utf8_bom)) ? get_impl(buffer) + 3 : get_impl(buffer), const gchar *p = get_impl(buffer);
magic_data)) { const gchar *end = p + length;
if (g_str_has_prefix(p, utf8_bom)) {
p += strlen(utf8_bom);
}
if (!g_str_has_prefix(p, magic_data)) {
fprintf(stderr, "No magic header(%s) in ifo file\n", magic_data);
return false; return false;
} }
p += strlen(magic_data);
gchar *p1 = get_impl(buffer) + strlen(magic_data) - 1; std::map<std::string, std::string> key_value_map;
while (p != end) {
auto key_it = std::find_if(p, end, [](gchar ch) { return !g_ascii_isspace(ch); });
if (key_it == end) {
break;
}
auto eq_it = std::find(key_it, end, gchar('='));
if (eq_it == end) {
fprintf(stderr, "Invalid part of ifo (no '=') here: %s\n", key_it);
return false;
}
auto val_it = std::find_if(eq_it + 1, end, [](gchar ch) { return !g_ascii_isspace(ch); });
if (val_it == end) {
key_value_map.insert(std::make_pair(std::string(key_it, eq_it), std::string()));
break;
}
gchar *p2 = strstr(p1, "\nwordcount="); auto line_end_it = std::find_if(val_it, end, [](gchar ch) { return ch == '\r' || ch == '\n'; });
if (p2 == nullptr) key_value_map.insert(std::make_pair(std::string(key_it, eq_it), std::string(val_it, line_end_it)));
return false; if (line_end_it == end)
break;
p = line_end_it + 1;
}
gchar *p3 = strchr(p2 + sizeof("\nwordcount=") - 1, '\n'); std::map<std::string, std::string>::const_iterator it;
#define FIND_KEY(_key_) \
it = key_value_map.find(_key_); \
if (it == key_value_map.end()) { \
fprintf(stderr, "Can not find '%s' in ifo file\n", _key_); \
return false; \
}
wordcount = atol(std::string(p2 + sizeof("\nwordcount=") - 1, p3 - (p2 + sizeof("\nwordcount=") - 1)).c_str()); FIND_KEY("wordcount")
wordcount = atol(it->second.c_str());
if (istreedict) { if (istreedict) {
p2 = strstr(p1, "\ntdxfilesize="); FIND_KEY("tdxfilesize")
if (p2 == nullptr) index_file_size = atol(it->second.c_str());
return false;
p3 = strchr(p2 + sizeof("\ntdxfilesize=") - 1, '\n');
index_file_size = atol(std::string(p2 + sizeof("\ntdxfilesize=") - 1, p3 - (p2 + sizeof("\ntdxfilesize=") - 1)).c_str());
} else { } else {
FIND_KEY("idxfilesize")
index_file_size = atol(it->second.c_str());
}
FIND_KEY("bookname")
bookname = it->second;
p2 = strstr(p1, "\nidxfilesize="); #define SET_IF_EXISTS(_key_) \
if (p2 == nullptr) it = key_value_map.find(#_key_); \
return false; if (it != key_value_map.end()) { \
_key_ = it->second; \
p3 = strchr(p2 + sizeof("\nidxfilesize=") - 1, '\n');
index_file_size = atol(std::string(p2 + sizeof("\nidxfilesize=") - 1, p3 - (p2 + sizeof("\nidxfilesize=") - 1)).c_str());
} }
p2 = strstr(p1, "\nbookname="); SET_IF_EXISTS(author)
SET_IF_EXISTS(email)
if (p2 == nullptr) SET_IF_EXISTS(website)
return false; SET_IF_EXISTS(date)
SET_IF_EXISTS(description)
p2 = p2 + sizeof("\nbookname=") - 1; SET_IF_EXISTS(sametypesequence)
p3 = strchr(p2, '\n');
bookname.assign(p2, p3 - p2);
p2 = strstr(p1, "\nauthor=");
if (p2) {
p2 = p2 + sizeof("\nauthor=") - 1;
p3 = strchr(p2, '\n');
author.assign(p2, p3 - p2);
}
p2 = strstr(p1, "\nemail=");
if (p2) {
p2 = p2 + sizeof("\nemail=") - 1;
p3 = strchr(p2, '\n');
email.assign(p2, p3 - p2);
}
p2 = strstr(p1, "\nwebsite=");
if (p2) {
p2 = p2 + sizeof("\nwebsite=") - 1;
p3 = strchr(p2, '\n');
website.assign(p2, p3 - p2);
}
p2 = strstr(p1, "\ndate=");
if (p2) {
p2 = p2 + sizeof("\ndate=") - 1;
p3 = strchr(p2, '\n');
date.assign(p2, p3 - p2);
}
p2 = strstr(p1, "\ndescription=");
if (p2) {
p2 = p2 + sizeof("\ndescription=") - 1;
p3 = strchr(p2, '\n');
description.assign(p2, p3 - p2);
}
p2 = strstr(p1, "\nsametypesequence=");
if (p2) {
p2 += sizeof("\nsametypesequence=") - 1;
p3 = strchr(p2, '\n');
sametypesequence.assign(p2, p3 - p2);
}
p2 = strstr(p1, "\nsynwordcount=");
syn_wordcount = 0; syn_wordcount = 0;
if (p2) { it = key_value_map.find("synwordcount");
p2 += sizeof("\nsynwordcount=") - 1; if (it != key_value_map.end())
p3 = strchr(p2, '\n'); syn_wordcount = atol(it->second.c_str());
syn_wordcount = atol(std::string(p2, p3 - p2).c_str()); #undef FIND_KEY
} #undef SET_IF_EXISTS
return true; return true;
} }
@@ -204,10 +190,10 @@ gchar *DictBase::GetWordData(guint32 idxitem_offset, guint32 idxitem_size)
guint32 data_size; guint32 data_size;
gint sametypesequence_len = sametypesequence.length(); gint sametypesequence_len = sametypesequence.length();
//there have sametypesequence_len char being omitted. // there have sametypesequence_len char being omitted.
data_size = idxitem_size + sizeof(guint32) + sametypesequence_len; data_size = idxitem_size + sizeof(guint32) + sametypesequence_len;
//if the last item's size is determined by the end up '\0',then +=sizeof(gchar); // if the last item's size is determined by the end up '\0',then +=sizeof(gchar);
//if the last item's size is determined by the head guint32 type data,then +=sizeof(guint32); // if the last item's size is determined by the head guint32 type data,then +=sizeof(guint32);
switch (sametypesequence[sametypesequence_len - 1]) { switch (sametypesequence[sametypesequence_len - 1]) {
case 'm': case 'm':
case 't': case 't':
@@ -234,7 +220,7 @@ gchar *DictBase::GetWordData(guint32 idxitem_offset, guint32 idxitem_size)
p1 = data + sizeof(guint32); p1 = data + sizeof(guint32);
p2 = get_impl(origin_data); p2 = get_impl(origin_data);
guint32 sec_size; guint32 sec_size;
//copy the head items. // copy the head items.
for (int i = 0; i < sametypesequence_len - 1; i++) { for (int i = 0; i < sametypesequence_len - 1; i++) {
*p1 = sametypesequence[i]; *p1 = sametypesequence[i];
p1 += sizeof(gchar); p1 += sizeof(gchar);
@@ -272,7 +258,7 @@ gchar *DictBase::GetWordData(guint32 idxitem_offset, guint32 idxitem_size)
break; break;
} }
} }
//calculate the last item 's size. // calculate the last item 's size.
sec_size = idxitem_size - (p2 - get_impl(origin_data)); sec_size = idxitem_size - (p2 - get_impl(origin_data));
*p1 = sametypesequence[sametypesequence_len - 1]; *p1 = sametypesequence[sametypesequence_len - 1];
p1 += sizeof(gchar); p1 += sizeof(gchar);
@@ -286,7 +272,7 @@ gchar *DictBase::GetWordData(guint32 idxitem_offset, guint32 idxitem_size)
case 'k': case 'k':
memcpy(p1, p2, sec_size); memcpy(p1, p2, sec_size);
p1 += sec_size; p1 += sec_size;
*p1 = '\0'; //add the end up '\0'; *p1 = '\0'; // add the end up '\0';
break; break;
case 'W': case 'W':
case 'P': case 'P':
@@ -443,7 +429,7 @@ public:
if (idxfile) if (idxfile)
fclose(idxfile); fclose(idxfile);
} }
bool load(const std::string &url, gulong wc, gulong fsize, bool verbose) override; bool load(const std::string &url, gulong wc, off_t fsize, bool verbose) override;
const gchar *get_key(glong idx) override; const gchar *get_key(glong idx) override;
void get_data(glong idx) override { get_key(idx); } void get_data(glong idx) override { get_key(idx); }
const gchar *get_key_and_data(glong idx) override const gchar *get_key_and_data(glong idx) override
@@ -503,7 +489,7 @@ public:
{ {
} }
~WordListIndex() { g_free(idxdatabuf); } ~WordListIndex() { g_free(idxdatabuf); }
bool load(const std::string &url, gulong wc, gulong fsize, bool verbose) override; bool load(const std::string &url, gulong wc, off_t fsize, bool verbose) override;
const gchar *get_key(glong idx) override { return wordlist[idx]; } const gchar *get_key(glong idx) override { return wordlist[idx]; }
void get_data(glong idx) override; void get_data(glong idx) override;
const gchar *get_key_and_data(glong idx) override const gchar *get_key_and_data(glong idx) override
@@ -542,7 +528,7 @@ inline const gchar *OffsetIndex::read_first_on_page_key(glong page_idx)
std::min(sizeof(wordentry_buf), static_cast<size_t>(page_size)), std::min(sizeof(wordentry_buf), static_cast<size_t>(page_size)),
1, idxfile); 1, idxfile);
THROW_IF_ERROR(nitems == 1); THROW_IF_ERROR(nitems == 1);
//TODO: check returned values, deal with word entry that strlen>255. // TODO: check returned values, deal with word entry that strlen>255.
return wordentry_buf; return wordentry_buf;
} }
@@ -629,12 +615,12 @@ bool OffsetIndex::save_cache(const std::string &url, bool verbose)
return false; return false;
} }
bool OffsetIndex::load(const std::string &url, gulong wc, gulong fsize, bool verbose) bool OffsetIndex::load(const std::string &url, gulong wc, off_t fsize, bool verbose)
{ {
wordcount = wc; wordcount = wc;
gulong npages = (wc - 1) / ENTR_PER_PAGE + 2; gulong npages = (wc - 1) / ENTR_PER_PAGE + 2;
wordoffset.resize(npages); wordoffset.resize(npages);
if (!load_cache(url)) { //map file will close after finish of block if (!load_cache(url)) { // map file will close after finish of block
MapFile map_file; MapFile map_file;
if (!map_file.open(url.c_str(), fsize)) if (!map_file.open(url.c_str(), fsize))
return false; return false;
@@ -756,10 +742,10 @@ bool OffsetIndex::lookup(const char *str, std::set<glong> &idxs, glong &next_idx
} }
if (!bFound) if (!bFound)
next_idx = iPage*ENTR_PER_PAGE + iFrom; // next next_idx = iPage * ENTR_PER_PAGE + iFrom; // next
else { else {
// Convert the found in-page index to the dict index. // Convert the found in-page index to the dict index.
iThisIndex = iPage*ENTR_PER_PAGE + iThisIndex; iThisIndex = iPage * ENTR_PER_PAGE + iThisIndex;
// In order to return all idxs that match the search string, walk // In order to return all idxs that match the search string, walk
// linearly behind and ahead of the found index. // linearly behind and ahead of the found index.
glong iHeadIndex = iThisIndex - 1; // do not include iThisIndex glong iHeadIndex = iThisIndex - 1; // do not include iThisIndex
@@ -772,7 +758,7 @@ bool OffsetIndex::lookup(const char *str, std::set<glong> &idxs, glong &next_idx
return bFound; return bFound;
} }
bool WordListIndex::load(const std::string &url, gulong wc, gulong fsize, bool) bool WordListIndex::load(const std::string &url, gulong wc, off_t fsize, bool)
{ {
gzFile in = gzopen(url.c_str(), "rb"); gzFile in = gzopen(url.c_str(), "rb");
if (in == nullptr) if (in == nullptr)
@@ -785,7 +771,7 @@ bool WordListIndex::load(const std::string &url, gulong wc, gulong fsize, bool)
if (len < 0) if (len < 0)
return false; return false;
if (gulong(len) != fsize) if (static_cast<off_t>(len) != fsize)
return false; return false;
wordlist.resize(wc + 1); wordlist.resize(wc + 1);
@@ -834,7 +820,7 @@ bool WordListIndex::lookup(const char *str, std::set<glong> &idxs, glong &next_i
} }
} }
if (!bFound) if (!bFound)
next_idx = iFrom; //next next_idx = iFrom; // next
else { else {
// In order to return all idxs that match the search string, walk // In order to return all idxs that match the search string, walk
// linearly behind and ahead of the found index. // linearly behind and ahead of the found index.
@@ -905,19 +891,19 @@ bool SynFile::lookup(const char *str, std::set<glong> &idxs, glong &next_idx)
} }
} }
if (!bFound) if (!bFound)
next_idx = iFrom; //next next_idx = iFrom; // next
else { else {
// In order to return all idxs that match the search string, walk // In order to return all idxs that match the search string, walk
// linearly behind and ahead of the found index. // linearly behind and ahead of the found index.
glong iHeadIndex = iThisIndex - 1; // do not include iThisIndex glong iHeadIndex = iThisIndex - 1; // do not include iThisIndex
while (iHeadIndex >= 0 && stardict_strcmp(str, get_key(iHeadIndex)) == 0) { while (iHeadIndex >= 0 && stardict_strcmp(str, get_key(iHeadIndex)) == 0) {
const gchar *key = get_key(iHeadIndex--); const gchar *key = get_key(iHeadIndex--);
idxs.insert(g_ntohl(get_uint32(key+strlen(key)+1))); idxs.insert(g_ntohl(get_uint32(key + strlen(key) + 1)));
} }
do { do {
// no need to double-check iThisIndex -- we know it's a match already // no need to double-check iThisIndex -- we know it's a match already
const gchar *key = get_key(iThisIndex++); const gchar *key = get_key(iThisIndex++);
idxs.insert(g_ntohl(get_uint32(key+strlen(key)+1))); idxs.insert(g_ntohl(get_uint32(key + strlen(key) + 1)));
} while (iThisIndex <= iLast && stardict_strcmp(str, get_key(iThisIndex)) == 0); } while (iThisIndex <= iLast && stardict_strcmp(str, get_key(iThisIndex)) == 0);
} }
} }
@@ -934,7 +920,7 @@ bool Dict::Lookup(const char *str, std::set<glong> &idxs, glong &next_idx)
bool Dict::load(const std::string &ifofilename, bool verbose) bool Dict::load(const std::string &ifofilename, bool verbose)
{ {
gulong idxfilesize; off_t idxfilesize;
if (!load_ifofile(ifofilename, idxfilesize)) if (!load_ifofile(ifofilename, idxfilesize))
return false; return false;
@@ -944,14 +930,14 @@ bool Dict::load(const std::string &ifofilename, bool verbose)
if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) { if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) {
dictdzfile.reset(new DictData); dictdzfile.reset(new DictData);
if (!dictdzfile->open(fullfilename, 0)) { if (!dictdzfile->open(fullfilename, 0)) {
//g_print("open file %s failed!\n",fullfilename); // g_print("open file %s failed!\n",fullfilename);
return false; return false;
} }
} else { } else {
fullfilename.erase(fullfilename.length() - sizeof(".dz") + 1, sizeof(".dz") - 1); fullfilename.erase(fullfilename.length() - sizeof(".dz") + 1, sizeof(".dz") - 1);
dictfile = fopen(fullfilename.c_str(), "rb"); dictfile = fopen(fullfilename.c_str(), "rb");
if (!dictfile) { if (!dictfile) {
//g_print("open file %s failed!\n",fullfilename); // g_print("open file %s failed!\n",fullfilename);
return false; return false;
} }
} }
@@ -974,11 +960,11 @@ bool Dict::load(const std::string &ifofilename, bool verbose)
syn_file.reset(new SynFile); syn_file.reset(new SynFile);
syn_file->load(fullfilename, syn_wordcount); syn_file->load(fullfilename, syn_wordcount);
//g_print("bookname: %s , wordcount %lu\n", bookname.c_str(), narticles()); // g_print("bookname: %s , wordcount %lu\n", bookname.c_str(), narticles());
return true; return true;
} }
bool Dict::load_ifofile(const std::string &ifofilename, gulong &idxfilesize) bool Dict::load_ifofile(const std::string &ifofilename, off_t &idxfilesize)
{ {
DictInfo dict_info; DictInfo dict_info;
if (!dict_info.load_from_ifo_file(ifofilename, false)) if (!dict_info.load_from_ifo_file(ifofilename, false))
@@ -1082,7 +1068,7 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
gchar *sNewWord = (gchar *)g_malloc(iWordLen + 1); gchar *sNewWord = (gchar *)g_malloc(iWordLen + 1);
//cut one char "s" or "d" // cut one char "s" or "d"
if (!bFound && iWordLen > 1) { if (!bFound && iWordLen > 1) {
isupcase = sWord[iWordLen - 1] == 'S' || !strncmp(&sWord[iWordLen - 2], "ED", 2); isupcase = sWord[iWordLen - 1] == 'S' || !strncmp(&sWord[iWordLen - 2], "ED", 2);
if (isupcase || sWord[iWordLen - 1] == 's' || !strncmp(&sWord[iWordLen - 2], "ed", 2)) { if (isupcase || sWord[iWordLen - 1] == 's' || !strncmp(&sWord[iWordLen - 2], "ed", 2)) {
@@ -1101,14 +1087,14 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
} }
} }
//cut "ly" // cut "ly"
if (!bFound && iWordLen > 2) { if (!bFound && iWordLen > 2) {
isupcase = !strncmp(&sWord[iWordLen - 2], "LY", 2); isupcase = !strncmp(&sWord[iWordLen - 2], "LY", 2);
if (isupcase || (!strncmp(&sWord[iWordLen - 2], "ly", 2))) { if (isupcase || (!strncmp(&sWord[iWordLen - 2], "ly", 2))) {
strcpy(sNewWord, sWord); strcpy(sNewWord, sWord);
sNewWord[iWordLen - 2] = '\0'; // cut "ly" sNewWord[iWordLen - 2] = '\0'; // cut "ly"
if (iWordLen > 5 && sNewWord[iWordLen - 3] == sNewWord[iWordLen - 4] if (iWordLen > 5 && sNewWord[iWordLen - 3] == sNewWord[iWordLen - 4]
&& !bIsVowel(sNewWord[iWordLen - 4]) && bIsVowel(sNewWord[iWordLen - 5])) { //doubled && !bIsVowel(sNewWord[iWordLen - 4]) && bIsVowel(sNewWord[iWordLen - 5])) { // doubled
sNewWord[iWordLen - 3] = '\0'; sNewWord[iWordLen - 3] = '\0';
if (oLib[iLib]->Lookup(sNewWord, iWordIndices)) if (oLib[iLib]->Lookup(sNewWord, iWordIndices))
@@ -1123,7 +1109,7 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
g_free(casestr); g_free(casestr);
} }
if (!bFound) if (!bFound)
sNewWord[iWordLen - 3] = sNewWord[iWordLen - 4]; //restore sNewWord[iWordLen - 3] = sNewWord[iWordLen - 4]; // restore
} }
} }
if (!bFound) { if (!bFound) {
@@ -1141,14 +1127,14 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
} }
} }
//cut "ing" // cut "ing"
if (!bFound && iWordLen > 3) { if (!bFound && iWordLen > 3) {
isupcase = !strncmp(&sWord[iWordLen - 3], "ING", 3); isupcase = !strncmp(&sWord[iWordLen - 3], "ING", 3);
if (isupcase || !strncmp(&sWord[iWordLen - 3], "ing", 3)) { if (isupcase || !strncmp(&sWord[iWordLen - 3], "ing", 3)) {
strcpy(sNewWord, sWord); strcpy(sNewWord, sWord);
sNewWord[iWordLen - 3] = '\0'; sNewWord[iWordLen - 3] = '\0';
if (iWordLen > 6 && (sNewWord[iWordLen - 4] == sNewWord[iWordLen - 5]) if (iWordLen > 6 && (sNewWord[iWordLen - 4] == sNewWord[iWordLen - 5])
&& !bIsVowel(sNewWord[iWordLen - 5]) && bIsVowel(sNewWord[iWordLen - 6])) { //doubled && !bIsVowel(sNewWord[iWordLen - 5]) && bIsVowel(sNewWord[iWordLen - 6])) { // doubled
sNewWord[iWordLen - 4] = '\0'; sNewWord[iWordLen - 4] = '\0';
if (oLib[iLib]->Lookup(sNewWord, iWordIndices)) if (oLib[iLib]->Lookup(sNewWord, iWordIndices))
bFound = true; bFound = true;
@@ -1162,7 +1148,7 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
g_free(casestr); g_free(casestr);
} }
if (!bFound) if (!bFound)
sNewWord[iWordLen - 4] = sNewWord[iWordLen - 5]; //restore sNewWord[iWordLen - 4] = sNewWord[iWordLen - 5]; // restore
} }
} }
if (!bFound) { if (!bFound) {
@@ -1196,7 +1182,7 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
} }
} }
//cut two char "es" // cut two char "es"
if (!bFound && iWordLen > 3) { if (!bFound && iWordLen > 3) {
isupcase = (!strncmp(&sWord[iWordLen - 2], "ES", 2) && (sWord[iWordLen - 3] == 'S' || sWord[iWordLen - 3] == 'X' || sWord[iWordLen - 3] == 'O' || (iWordLen > 4 && sWord[iWordLen - 3] == 'H' && (sWord[iWordLen - 4] == 'C' || sWord[iWordLen - 4] == 'S')))); isupcase = (!strncmp(&sWord[iWordLen - 2], "ES", 2) && (sWord[iWordLen - 3] == 'S' || sWord[iWordLen - 3] == 'X' || sWord[iWordLen - 3] == 'O' || (iWordLen > 4 && sWord[iWordLen - 3] == 'H' && (sWord[iWordLen - 4] == 'C' || sWord[iWordLen - 4] == 'S'))));
if (isupcase || (!strncmp(&sWord[iWordLen - 2], "es", 2) && (sWord[iWordLen - 3] == 's' || sWord[iWordLen - 3] == 'x' || sWord[iWordLen - 3] == 'o' || (iWordLen > 4 && sWord[iWordLen - 3] == 'h' && (sWord[iWordLen - 4] == 'c' || sWord[iWordLen - 4] == 's'))))) { if (isupcase || (!strncmp(&sWord[iWordLen - 2], "es", 2) && (sWord[iWordLen - 3] == 's' || sWord[iWordLen - 3] == 'x' || sWord[iWordLen - 3] == 'o' || (iWordLen > 4 && sWord[iWordLen - 3] == 'h' && (sWord[iWordLen - 4] == 'c' || sWord[iWordLen - 4] == 's'))))) {
@@ -1215,14 +1201,14 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
} }
} }
//cut "ed" // cut "ed"
if (!bFound && iWordLen > 3) { if (!bFound && iWordLen > 3) {
isupcase = !strncmp(&sWord[iWordLen - 2], "ED", 2); isupcase = !strncmp(&sWord[iWordLen - 2], "ED", 2);
if (isupcase || !strncmp(&sWord[iWordLen - 2], "ed", 2)) { if (isupcase || !strncmp(&sWord[iWordLen - 2], "ed", 2)) {
strcpy(sNewWord, sWord); strcpy(sNewWord, sWord);
sNewWord[iWordLen - 2] = '\0'; sNewWord[iWordLen - 2] = '\0';
if (iWordLen > 5 && (sNewWord[iWordLen - 3] == sNewWord[iWordLen - 4]) if (iWordLen > 5 && (sNewWord[iWordLen - 3] == sNewWord[iWordLen - 4])
&& !bIsVowel(sNewWord[iWordLen - 4]) && bIsVowel(sNewWord[iWordLen - 5])) { //doubled && !bIsVowel(sNewWord[iWordLen - 4]) && bIsVowel(sNewWord[iWordLen - 5])) { // doubled
sNewWord[iWordLen - 3] = '\0'; sNewWord[iWordLen - 3] = '\0';
if (oLib[iLib]->Lookup(sNewWord, iWordIndices)) if (oLib[iLib]->Lookup(sNewWord, iWordIndices))
bFound = true; bFound = true;
@@ -1236,7 +1222,7 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
g_free(casestr); g_free(casestr);
} }
if (!bFound) if (!bFound)
sNewWord[iWordLen - 3] = sNewWord[iWordLen - 4]; //restore sNewWord[iWordLen - 3] = sNewWord[iWordLen - 4]; // restore
} }
} }
if (!bFound) { if (!bFound) {
@@ -1386,8 +1372,8 @@ bool Libs::LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_si
if (progress_func) if (progress_func)
progress_func(); progress_func();
//if (stardict_strcmp(sWord, poGetWord(0,iLib))>=0 && stardict_strcmp(sWord, poGetWord(narticles(iLib)-1,iLib))<=0) { // if (stardict_strcmp(sWord, poGetWord(0,iLib))>=0 && stardict_strcmp(sWord, poGetWord(narticles(iLib)-1,iLib))<=0) {
//there are Chinese dicts and English dicts... // there are Chinese dicts and English dicts...
const int iwords = narticles(iLib); const int iwords = narticles(iLib);
for (int index = 0; index < iwords; index++) { for (int index = 0; index < iwords; index++) {
@@ -1409,11 +1395,11 @@ bool Libs::LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_si
bool bAlreadyInList = false; bool bAlreadyInList = false;
int iMaxDistanceAt = 0; int iMaxDistanceAt = 0;
for (int j = 0; j < reslist_size; j++) { for (int j = 0; j < reslist_size; j++) {
if (oFuzzystruct[j].pMatchWord && strcmp(oFuzzystruct[j].pMatchWord, sCheck) == 0) { //already in list if (oFuzzystruct[j].pMatchWord && strcmp(oFuzzystruct[j].pMatchWord, sCheck) == 0) { // already in list
bAlreadyInList = true; bAlreadyInList = true;
break; break;
} }
//find the position,it will certainly be found (include the first time) as iMaxDistance is set by last time. // find the position,it will certainly be found (include the first time) as iMaxDistance is set by last time.
if (oFuzzystruct[j].iMatchWordDistance == iMaxDistance) { if (oFuzzystruct[j].iMatchWordDistance == iMaxDistance) {
iMaxDistanceAt = j; iMaxDistanceAt = j;
} }
@@ -1460,8 +1446,8 @@ gint Libs::LookupWithRule(const gchar *word, gchar **ppMatchWord)
GPatternSpec *pspec = g_pattern_spec_new(word); GPatternSpec *pspec = g_pattern_spec_new(word);
for (std::vector<Dict *>::size_type iLib = 0; iLib < oLib.size(); iLib++) { for (std::vector<Dict *>::size_type iLib = 0; iLib < oLib.size(); iLib++) {
//if(oLibs.LookdupWordsWithRule(pspec,aiIndex,MAX_MATCH_ITEM_PER_LIB+1-iMatchCount,iLib)) // if(oLibs.LookdupWordsWithRule(pspec,aiIndex,MAX_MATCH_ITEM_PER_LIB+1-iMatchCount,iLib))
// -iMatchCount,so save time,but may got less result and the word may repeat. // -iMatchCount,so save time,but may got less result and the word may repeat.
if (oLib[iLib]->LookupWithRule(pspec, aiIndex, MAX_MATCH_ITEM_PER_LIB + 1)) { if (oLib[iLib]->LookupWithRule(pspec, aiIndex, MAX_MATCH_ITEM_PER_LIB + 1)) {
if (progress_func) if (progress_func)
@@ -1470,7 +1456,7 @@ gint Libs::LookupWithRule(const gchar *word, gchar **ppMatchWord)
const gchar *sMatchWord = poGetWord(aiIndex[i], iLib); const gchar *sMatchWord = poGetWord(aiIndex[i], iLib);
bool bAlreadyInList = false; bool bAlreadyInList = false;
for (int j = 0; j < iMatchCount; j++) { for (int j = 0; j < iMatchCount; j++) {
if (strcmp(ppMatchWord[j], sMatchWord) == 0) { //already in list if (strcmp(ppMatchWord[j], sMatchWord) == 0) { // already in list
bAlreadyInList = true; bAlreadyInList = true;
break; break;
} }

View File

@@ -1,10 +1,8 @@
#pragma once #pragma once
#include <cstdio>
#include <cstring> #include <cstring>
#include <functional> #include <functional>
#include <list> #include <list>
#include <map>
#include <memory> #include <memory>
#include <set> #include <set>
#include <string> #include <string>
@@ -30,7 +28,7 @@ inline void set_uint32(gchar *addr, guint32 val)
struct cacheItem { struct cacheItem {
guint32 offset; guint32 offset;
gchar *data; gchar *data;
//write code here to make it inline // write code here to make it inline
cacheItem() { data = nullptr; } cacheItem() { data = nullptr; }
~cacheItem() { g_free(data); } ~cacheItem() { g_free(data); }
}; };
@@ -68,7 +66,7 @@ private:
gint cache_cur = 0; gint cache_cur = 0;
}; };
//this structure contain all information about dictionary // this structure contain all information about dictionary
struct DictInfo { struct DictInfo {
std::string ifo_file_name; std::string ifo_file_name;
guint32 wordcount; guint32 wordcount;
@@ -79,8 +77,8 @@ struct DictInfo {
std::string website; std::string website;
std::string date; std::string date;
std::string description; std::string description;
guint32 index_file_size; off_t index_file_size;
guint32 syn_file_size; off_t syn_file_size;
std::string sametypesequence; std::string sametypesequence;
bool load_from_ifo_file(const std::string &ifofilename, bool istreedict); bool load_from_ifo_file(const std::string &ifofilename, bool istreedict);
@@ -93,12 +91,13 @@ public:
guint32 wordentry_size; guint32 wordentry_size;
virtual ~IIndexFile() {} virtual ~IIndexFile() {}
virtual bool load(const std::string &url, gulong wc, gulong fsize, bool verbose) = 0; virtual bool load(const std::string &url, gulong wc, off_t fsize, bool verbose) = 0;
virtual const gchar *get_key(glong idx) = 0; virtual const gchar *get_key(glong idx) = 0;
virtual void get_data(glong idx) = 0; virtual void get_data(glong idx) = 0;
virtual const gchar *get_key_and_data(glong idx) = 0; virtual const gchar *get_key_and_data(glong idx) = 0;
virtual bool lookup(const char *str, std::set<glong> &idxs, glong &next_idx) = 0; virtual bool lookup(const char *str, std::set<glong> &idxs, glong &next_idx) = 0;
virtual bool lookup(const char *str, std::set<glong> &idxs) { virtual bool lookup(const char *str, std::set<glong> &idxs)
{
glong unused_next_idx; glong unused_next_idx;
return lookup(str, idxs, unused_next_idx); return lookup(str, idxs, unused_next_idx);
}; };
@@ -144,7 +143,8 @@ public:
*size = idx_file->wordentry_size; *size = idx_file->wordentry_size;
} }
bool Lookup(const char *str, std::set<glong> &idxs, glong &next_idx); bool Lookup(const char *str, std::set<glong> &idxs, glong &next_idx);
bool Lookup(const char *str, std::set<glong> &idxs) { bool Lookup(const char *str, std::set<glong> &idxs)
{
glong unused_next_idx; glong unused_next_idx;
return Lookup(str, idxs, unused_next_idx); return Lookup(str, idxs, unused_next_idx);
} }
@@ -160,7 +160,7 @@ private:
std::unique_ptr<IIndexFile> idx_file; std::unique_ptr<IIndexFile> idx_file;
std::unique_ptr<SynFile> syn_file; std::unique_ptr<SynFile> syn_file;
bool load_ifofile(const std::string &ifofilename, gulong &idxfilesize); bool load_ifofile(const std::string &ifofilename, off_t &idxfilesize);
}; };
class Libs class Libs
@@ -169,7 +169,7 @@ public:
Libs(std::function<void(void)> f = std::function<void(void)>()) Libs(std::function<void(void)> f = std::function<void(void)>())
{ {
progress_func = f; progress_func = f;
iMaxFuzzyDistance = MAX_FUZZY_DISTANCE; //need to read from cfg. iMaxFuzzyDistance = MAX_FUZZY_DISTANCE; // need to read from cfg.
} }
void setVerbose(bool verbose) { verbose_ = verbose; } void setVerbose(bool verbose) { verbose_ = verbose; }
void setFuzzy(bool fuzzy) { fuzzy_ = fuzzy; } void setFuzzy(bool fuzzy) { fuzzy_ = fuzzy; }

View File

@@ -0,0 +1,9 @@
StarDict's dict ifo file
version=3.0.0
bookname=Russian-English Dictionary (ru-en)
wordcount=415144
idxfilesize=12344255
sametypesequence=h
synwordcount=1277580
author=Vuizur
description=

View File

@@ -18,7 +18,8 @@ test_json() {
fi fi
} }
test_json '[{"name": "Test synonyms", "wordcount": "2"}, test_json '[{"name": "Russian-English Dictionary (ru-en)", "wordcount": "415144"},
{"name": "Test synonyms", "wordcount": "2"},
{"name": "Test multiple results", "wordcount": "246"}, {"name": "Test multiple results", "wordcount": "246"},
{"name": "Sample 1 test dictionary", "wordcount": "1"}, {"name": "Sample 1 test dictionary", "wordcount": "1"},
{"name": "test_dict", "wordcount": "1"}]' -x -j -l -n --data-dir "$TEST_DIR" {"name": "test_dict", "wordcount": "1"}]' -x -j -l -n --data-dir "$TEST_DIR"

18
tests/t_newlines_in_ifo Executable file
View File

@@ -0,0 +1,18 @@
#!/bin/sh
set -e
PATH_TO_SDCV="$1"
TEST_DIR="$2"
unset SDCV_PAGER
unset STARDICT_DATA_DIR
RES=$("$PATH_TO_SDCV" -n -x --data-dir="$TEST_DIR/not-unix-newlines-ifo" -l | tail -n 1)
if [ "$RES" = "Russian-English Dictionary (ru-en) 415144" ]; then
exit 0
else
echo "test failed, unexpected result: $RES" >&2
exit 1
fi