version 0.5.5

fix CI build: ubuntu-18.04 not supported by github actions anymore
fix CI build
2025-12-15 17:31:56 +00:00 · 2023-04-18 21:47:55 +03:00 · 2023-04-18 21:44:18 +03:00 · 2023-01-16 16:44:09 +03:00 · 2022-09-16 18:48:08 +03:00 · 2022-09-16 18:48:08 +03:00
17 changed files with 211 additions and 176 deletions
--- a/.clang-format
+++ b/.clang-format
@@ -15,7 +15,7 @@ BreakBeforeBinaryOperators: true
 BreakBeforeTernaryOperators: true
 BreakConstructorInitializersBeforeComma: true
 BinPackParameters: true
-ColumnLimit:     0
+ColumnLimit:     120
 ConstructorInitializerAllOnOneLineOrOnePerLine: false
 DerivePointerAlignment: false
 ExperimentalAutoDetectBinPacking: false
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -20,12 +20,13 @@ jobs:
      fail-fast: true
      matrix:
-        os: [ubuntu-latest]
+        os: [ubuntu-20.04, ubuntu-latest]
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
        with:
          submodules: 'recursive'
      - uses: jwlawson/actions-setup-cmake@v1.4
        if: matrix.os != 'ubuntu-latest'
        with:
          cmake-version: '3.5.1'
          github-api-token: ${{ secrets.GITHUB_TOKEN }}
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,6 +3,10 @@ project(sdcv)
 cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
 cmake_policy(VERSION 3.5)
 set(CMAKE_CXX_STANDARD 11)
 set(CMAKE_CXX_STANDARD_REQUIRED True)
 set(CMAKE_CXX_EXTENSIONS False)
 include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/compiler.cmake")
 set(ZLIB_FIND_REQUIRED True)
@@ -91,7 +95,7 @@ set(CPACK_PACKAGE_VENDOR "Evgeniy Dushistov <dushistov@mail.ru>")
 set(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README.org")
 set(CPACK_PACKAGE_VERSION_MAJOR "0")
 set(CPACK_PACKAGE_VERSION_MINOR "5")
-set(CPACK_PACKAGE_VERSION_PATCH "3")
+set(CPACK_PACKAGE_VERSION_PATCH "5")
 set(sdcv_VERSION
 	"${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
@@ -144,5 +148,6 @@ if (BUILD_TESTS)
  add_sdcv_shell_test(t_datadir)
  add_sdcv_shell_test(t_return_code)
  add_sdcv_shell_test(t_multiple_results)
  add_sdcv_shell_test(t_newlines_in_ifo)
 endif (BUILD_TESTS)
--- a/12
+++ b/12
@@ -1,3 +1,13 @@
 Version 0.5.5
    - Avoid crashes when passing unknown dicts to the -u flag (by NiLuJe)
    - Use off_t for stuff mainly assigned to a stat.st_size value
 Version 0.5.4
    - Use binary search for synonyms
    - Various improvments in work with synonyms
    - Added --json (same as --json-output) to match man
    - Show all matched result
    - More robust parsing of ifo file
    - Prevent crash if file size of files not matched expecting one for .oft files
 Version 0.5.3
  - Use single quotes around JSON data to reduce need for escaping
  - Store integer magic in cache file
@@ -36,7 +46,7 @@ Version 0.4.2
 * Russian translation update
 Version 0.4.1
-* Recreate cache if idx file was modified 
+* Recreate cache if idx file was modified
 * Abbility to use pager(SDCV_PAGER)
 * Add Chinese (traditional)  translation
 * Add  Ukrainian translation
--- a/README.org
+++ b/README.org
@@ -1,6 +1,9 @@
 #+OPTIONS: ^:nil
 [[https://github.com/Dushistov/sdcv/actions?query=workflow%3ACI+branch%3Amaster][https://github.com/Dushistov/sdcv/workflows/CI/badge.svg]]
 [[https://github.com/Dushistov/sdcv/blob/master/LICENSE][https://img.shields.io/badge/license-GPL%202-brightgreen.svg]]
 * sdcv
 *sdcv* is a simple, cross-platform, text-based utility for working with dictionaries in [[http://stardict-4.sourceforge.net/][StarDict]] format.
 * How to compile and install
 #+BEGIN_SRC sh
 mkdir /tmp/build-sdcv
--- a/cmake/compiler.cmake
+++ b/cmake/compiler.cmake
@@ -16,19 +16,6 @@ if (NOT DEFINED SDCV_COMPILER_IS_GCC_COMPATIBLE)
  endif()
 endif()
 if (MSVC AND (MSVC_VERSION LESS 1900))
  message(FATAL_ERROR "MSVC version ${MSVC_VERSION} have no full c++11 support")
 elseif (MSVC)
  add_definitions(-DNOMINMAX)
 elseif (NOT MSVC)
  check_cxx_compiler_flag("-std=c++11" CXX_SUPPORTS_CXX11)
  if (CXX_SUPPORTS_CXX11)
    append("-std=c++11" CMAKE_CXX_FLAGS)
  else ()
    message(FATAL_ERROR "sdcv requires C++11 support but the '-std=c++11' flag isn't supported.")
  endif()
 endif ()
 if (SDCV_COMPILER_IS_GCC_COMPATIBLE)
  append("-Wall" "-Wextra" "-Wformat-security" "-Wcast-align" "-Werror=format" "-Wcast-qual" CMAKE_C_FLAGS)
  append("-Wall" "-pedantic" "-Wextra" "-Wformat-security" "-Wcast-align" "-Werror=format" "-Wcast-qual" CMAKE_CXX_FLAGS)
--- a/src/dictziplib.hpp
+++ b/src/dictziplib.hpp
@@ -27,7 +27,7 @@ public:
 private:
    const char *start; /* start of mmap'd area */
    const char *end; /* end of mmap'd area */
-    unsigned long size; /* size of mmap */
+    off_t size; /* size of mmap */
    int type;
    z_stream zStream;
@@ -47,7 +47,7 @@ private:
    std::string origFilename;
    std::string comment;
    unsigned long crc;
-    unsigned long length;
+    off_t length;
    unsigned long compressedLength;
    DictCache cache[DICT_CACHE_SIZE];
    MapFile mapfile;
--- a/src/mapfile.hpp
+++ b/src/mapfile.hpp
@@ -7,6 +7,7 @@
 #ifdef HAVE_MMAP
 #include <fcntl.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #endif
 #ifdef _WIN32
@@ -21,13 +22,13 @@ public:
    ~MapFile();
    MapFile(const MapFile &) = delete;
    MapFile &operator=(const MapFile &) = delete;
-    bool open(const char *file_name, unsigned long file_size);
+    bool open(const char *file_name, off_t file_size);
    gchar *begin() { return data; }
 private:
    char *data = nullptr;
    unsigned long size = 0ul;
 #ifdef HAVE_MMAP
    size_t size = 0u;
    int mmap_fd = -1;
 #elif defined(_WIN32)
    HANDLE hFile = 0;
@@ -35,25 +36,31 @@ private:
 #endif
 };
-inline bool MapFile::open(const char *file_name, unsigned long file_size)
+inline bool MapFile::open(const char *file_name, off_t file_size)
 {
    size = file_size;
 #ifdef HAVE_MMAP
    if ((mmap_fd = ::open(file_name, O_RDONLY)) < 0) {
-        //g_print("Open file %s failed!\n",fullfilename);
+        // g_print("Open file %s failed!\n",fullfilename);
        return false;
    }
-    data = (gchar *)mmap(nullptr, file_size, PROT_READ, MAP_SHARED, mmap_fd, 0);
+    struct stat st;
    if (fstat(mmap_fd, &st) == -1 || st.st_size < 0 || (st.st_size == 0 && S_ISREG(st.st_mode))
        || st.st_size != file_size) {
        close(mmap_fd);
        return false;
    }
    size = static_cast<size_t>(st.st_size);
    data = (gchar *)mmap(nullptr, size, PROT_READ, MAP_SHARED, mmap_fd, 0);
    if ((void *)data == (void *)(-1)) {
-        //g_print("mmap file %s failed!\n",idxfilename);
+        // g_print("mmap file %s failed!\n",idxfilename);
        size = 0u;
        data = nullptr;
        return false;
    }
 #elif defined(_WIN32)
-    hFile = CreateFile(file_name, GENERIC_READ, 0, nullptr, OPEN_ALWAYS,
+    hFile = CreateFile(file_name, GENERIC_READ, 0, nullptr, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0);
-                       FILE_ATTRIBUTE_NORMAL, 0);
+    hFileMap = CreateFileMapping(hFile, nullptr, PAGE_READONLY, 0, file_size, nullptr);
    hFileMap = CreateFileMapping(hFile, nullptr, PAGE_READONLY, 0,
                                 file_size, nullptr);
    data = (gchar *)MapViewOfFile(hFileMap, FILE_MAP_READ, 0, 0, file_size);
 #else
    gsize read_len;
--- a/src/sdcv.cpp
+++ b/src/sdcv.cpp
@@ -186,10 +186,13 @@ try {
        }
        // add bookname to list
-        gchar **p = get_impl(use_dict_list);
+        for (gchar **p = get_impl(use_dict_list); *p != nullptr; ++p) {
-        while (*p) {
+            auto it = bookname_to_ifo.find(*p);
-            order_list.push_back(bookname_to_ifo.at(*p));
+            if (it != bookname_to_ifo.end()) {
-            ++p;
+                order_list.push_back(it->second);
            } else {
                fprintf(stderr, _("Unknown dictionary: %s\n"), *p);
            }
        }
    } else {
        std::string ordering_cfg_file = std::string(g_get_user_config_dir()) + G_DIR_SEPARATOR_S "sdcv_ordering";
@@ -201,7 +204,12 @@ try {
        if (ordering_file != nullptr) {
            std::string line;
            while (stdio_getline(ordering_file, line)) {
-                order_list.push_back(bookname_to_ifo.at(line));
+                auto it = bookname_to_ifo.find(line);
                if (it != bookname_to_ifo.end()) {
                    order_list.push_back(it->second);
                } else {
                    fprintf(stderr, _("Unknown dictionary: %s\n"), line.c_str());
                }
            }
            fclose(ordering_file);
        }
--- a/src/stardict_lib.cpp
+++ b/src/stardict_lib.cpp
@@ -5,6 +5,7 @@
 #include <algorithm>
 #include <cctype>
 #include <cstring>
 #include <map>
 #include <stdexcept>
 #include <glib/gstdio.h>
@@ -47,9 +48,9 @@ static bool bIsPureEnglish(const gchar *str)
 {
    // i think this should work even when it is UTF8 string :).
    for (int i = 0; str[i] != 0; i++)
-        //if(str[i]<0)
+        // if(str[i]<0)
-        //if(str[i]<32 || str[i]>126) // tab equal 9,so this is not OK.
+        // if(str[i]<32 || str[i]>126) // tab equal 9,so this is not OK.
-        // Better use isascii() but not str[i]<0 while char is default unsigned in arm
+        //  Better use isascii() but not str[i]<0 while char is default unsigned in arm
        if (!isascii(str[i]))
            return false;
    return true;
@@ -78,108 +79,93 @@ bool DictInfo::load_from_ifo_file(const std::string &ifofilename,
 {
    ifo_file_name = ifofilename;
    glib::CharStr buffer;
-    if (!g_file_get_contents(ifofilename.c_str(), get_addr(buffer), nullptr, nullptr))
+    gsize length = 0;
    if (!g_file_get_contents(ifofilename.c_str(), get_addr(buffer), &length, nullptr)) {
        fprintf(stderr, "Can not read from %s\n", ifofilename.c_str());
        return false;
    }
    static const char TREEDICT_MAGIC_DATA[] = "StarDict's treedict ifo file";
    static const char DICT_MAGIC_DATA[] = "StarDict's dict ifo file";
    const gchar *magic_data = istreedict ? TREEDICT_MAGIC_DATA : DICT_MAGIC_DATA;
-    static const unsigned char utf8_bom[] = { 0xEF, 0xBB, 0xBF, '\0' };
+    static const gchar utf8_bom[] = { (gchar)0xEF, (gchar)0xBB, (gchar)0xBF, '\0' };
-    if (!g_str_has_prefix(
+
-            g_str_has_prefix(get_impl(buffer), (const gchar *)(utf8_bom)) ? get_impl(buffer) + 3 : get_impl(buffer),
+    const gchar *p = get_impl(buffer);
-            magic_data)) {
+    const gchar *end = p + length;
    if (g_str_has_prefix(p, utf8_bom)) {
        p += strlen(utf8_bom);
    }
    if (!g_str_has_prefix(p, magic_data)) {
        fprintf(stderr, "No magic header(%s) in ifo file\n", magic_data);
        return false;
    }
    p += strlen(magic_data);
-    gchar *p1 = get_impl(buffer) + strlen(magic_data) - 1;
+    std::map<std::string, std::string> key_value_map;
    while (p != end) {
        auto key_it = std::find_if(p, end, [](gchar ch) { return !g_ascii_isspace(ch); });
        if (key_it == end) {
            break;
        }
        auto eq_it = std::find(key_it, end, gchar('='));
        if (eq_it == end) {
            fprintf(stderr, "Invalid part of ifo (no '=') here: %s\n", key_it);
            return false;
        }
        auto val_it = std::find_if(eq_it + 1, end, [](gchar ch) { return !g_ascii_isspace(ch); });
        if (val_it == end) {
            key_value_map.insert(std::make_pair(std::string(key_it, eq_it), std::string()));
            break;
        }
-    gchar *p2 = strstr(p1, "\nwordcount=");
+        auto line_end_it = std::find_if(val_it, end, [](gchar ch) { return ch == '\r' || ch == '\n'; });
-    if (p2 == nullptr)
+        key_value_map.insert(std::make_pair(std::string(key_it, eq_it), std::string(val_it, line_end_it)));
-        return false;
+        if (line_end_it == end)
            break;
        p = line_end_it + 1;
    }
-    gchar *p3 = strchr(p2 + sizeof("\nwordcount=") - 1, '\n');
+    std::map<std::string, std::string>::const_iterator it;
 #define FIND_KEY(_key_)                                            \
    it = key_value_map.find(_key_);                                \
    if (it == key_value_map.end()) {                               \
        fprintf(stderr, "Can not find '%s' in ifo file\n", _key_); \
        return false;                                              \
    }
-    wordcount = atol(std::string(p2 + sizeof("\nwordcount=") - 1, p3 - (p2 + sizeof("\nwordcount=") - 1)).c_str());
+    FIND_KEY("wordcount")
    wordcount = atol(it->second.c_str());
    if (istreedict) {
-        p2 = strstr(p1, "\ntdxfilesize=");
+        FIND_KEY("tdxfilesize")
-        if (p2 == nullptr)
+        index_file_size = atol(it->second.c_str());
            return false;
        p3 = strchr(p2 + sizeof("\ntdxfilesize=") - 1, '\n');
        index_file_size = atol(std::string(p2 + sizeof("\ntdxfilesize=") - 1, p3 - (p2 + sizeof("\ntdxfilesize=") - 1)).c_str());
    } else {
        FIND_KEY("idxfilesize")
        index_file_size = atol(it->second.c_str());
    }
    FIND_KEY("bookname")
    bookname = it->second;
-        p2 = strstr(p1, "\nidxfilesize=");
+#define SET_IF_EXISTS(_key_)         \
-        if (p2 == nullptr)
+    it = key_value_map.find(#_key_); \
-            return false;
+    if (it != key_value_map.end()) { \
-
+        _key_ = it->second;          \
        p3 = strchr(p2 + sizeof("\nidxfilesize=") - 1, '\n');
        index_file_size = atol(std::string(p2 + sizeof("\nidxfilesize=") - 1, p3 - (p2 + sizeof("\nidxfilesize=") - 1)).c_str());
    }
-    p2 = strstr(p1, "\nbookname=");
+    SET_IF_EXISTS(author)
-
+    SET_IF_EXISTS(email)
-    if (p2 == nullptr)
+    SET_IF_EXISTS(website)
-        return false;
+    SET_IF_EXISTS(date)
-
+    SET_IF_EXISTS(description)
-    p2 = p2 + sizeof("\nbookname=") - 1;
+    SET_IF_EXISTS(sametypesequence)
    p3 = strchr(p2, '\n');
    bookname.assign(p2, p3 - p2);
    p2 = strstr(p1, "\nauthor=");
    if (p2) {
        p2 = p2 + sizeof("\nauthor=") - 1;
        p3 = strchr(p2, '\n');
        author.assign(p2, p3 - p2);
    }
    p2 = strstr(p1, "\nemail=");
    if (p2) {
        p2 = p2 + sizeof("\nemail=") - 1;
        p3 = strchr(p2, '\n');
        email.assign(p2, p3 - p2);
    }
    p2 = strstr(p1, "\nwebsite=");
    if (p2) {
        p2 = p2 + sizeof("\nwebsite=") - 1;
        p3 = strchr(p2, '\n');
        website.assign(p2, p3 - p2);
    }
    p2 = strstr(p1, "\ndate=");
    if (p2) {
        p2 = p2 + sizeof("\ndate=") - 1;
        p3 = strchr(p2, '\n');
        date.assign(p2, p3 - p2);
    }
    p2 = strstr(p1, "\ndescription=");
    if (p2) {
        p2 = p2 + sizeof("\ndescription=") - 1;
        p3 = strchr(p2, '\n');
        description.assign(p2, p3 - p2);
    }
    p2 = strstr(p1, "\nsametypesequence=");
    if (p2) {
        p2 += sizeof("\nsametypesequence=") - 1;
        p3 = strchr(p2, '\n');
        sametypesequence.assign(p2, p3 - p2);
    }
    p2 = strstr(p1, "\nsynwordcount=");
    syn_wordcount = 0;
-    if (p2) {
+    it = key_value_map.find("synwordcount");
-        p2 += sizeof("\nsynwordcount=") - 1;
+    if (it != key_value_map.end())
-        p3 = strchr(p2, '\n');
+        syn_wordcount = atol(it->second.c_str());
-        syn_wordcount = atol(std::string(p2, p3 - p2).c_str());
+#undef FIND_KEY
-    }
+#undef SET_IF_EXISTS
    return true;
 }
@@ -204,10 +190,10 @@ gchar *DictBase::GetWordData(guint32 idxitem_offset, guint32 idxitem_size)
        guint32 data_size;
        gint sametypesequence_len = sametypesequence.length();
-        //there have sametypesequence_len char being omitted.
+        // there have sametypesequence_len char being omitted.
        data_size = idxitem_size + sizeof(guint32) + sametypesequence_len;
-        //if the last item's size is determined by the end up '\0',then +=sizeof(gchar);
+        // if the last item's size is determined by the end up '\0',then +=sizeof(gchar);
-        //if the last item's size is determined by the head guint32 type data,then +=sizeof(guint32);
+        // if the last item's size is determined by the head guint32 type data,then +=sizeof(guint32);
        switch (sametypesequence[sametypesequence_len - 1]) {
        case 'm':
        case 't':
@@ -234,7 +220,7 @@ gchar *DictBase::GetWordData(guint32 idxitem_offset, guint32 idxitem_size)
        p1 = data + sizeof(guint32);
        p2 = get_impl(origin_data);
        guint32 sec_size;
-        //copy the head items.
+        // copy the head items.
        for (int i = 0; i < sametypesequence_len - 1; i++) {
            *p1 = sametypesequence[i];
            p1 += sizeof(gchar);
@@ -272,7 +258,7 @@ gchar *DictBase::GetWordData(guint32 idxitem_offset, guint32 idxitem_size)
                break;
            }
        }
-        //calculate the last item 's size.
+        // calculate the last item 's size.
        sec_size = idxitem_size - (p2 - get_impl(origin_data));
        *p1 = sametypesequence[sametypesequence_len - 1];
        p1 += sizeof(gchar);
@@ -286,7 +272,7 @@ gchar *DictBase::GetWordData(guint32 idxitem_offset, guint32 idxitem_size)
        case 'k':
            memcpy(p1, p2, sec_size);
            p1 += sec_size;
-            *p1 = '\0'; //add the end up '\0';
+            *p1 = '\0'; // add the end up '\0';
            break;
        case 'W':
        case 'P':
@@ -443,7 +429,7 @@ public:
        if (idxfile)
            fclose(idxfile);
    }
-    bool load(const std::string &url, gulong wc, gulong fsize, bool verbose) override;
+    bool load(const std::string &url, gulong wc, off_t fsize, bool verbose) override;
    const gchar *get_key(glong idx) override;
    void get_data(glong idx) override { get_key(idx); }
    const gchar *get_key_and_data(glong idx) override
@@ -503,7 +489,7 @@ public:
    {
    }
    ~WordListIndex() { g_free(idxdatabuf); }
-    bool load(const std::string &url, gulong wc, gulong fsize, bool verbose) override;
+    bool load(const std::string &url, gulong wc, off_t fsize, bool verbose) override;
    const gchar *get_key(glong idx) override { return wordlist[idx]; }
    void get_data(glong idx) override;
    const gchar *get_key_and_data(glong idx) override
@@ -542,7 +528,7 @@ inline const gchar *OffsetIndex::read_first_on_page_key(glong page_idx)
                                std::min(sizeof(wordentry_buf), static_cast<size_t>(page_size)),
                                1, idxfile);
    THROW_IF_ERROR(nitems == 1);
-    //TODO: check returned values, deal with word entry that strlen>255.
+    // TODO: check returned values, deal with word entry that strlen>255.
    return wordentry_buf;
 }
@@ -629,12 +615,12 @@ bool OffsetIndex::save_cache(const std::string &url, bool verbose)
    return false;
 }
-bool OffsetIndex::load(const std::string &url, gulong wc, gulong fsize, bool verbose)
+bool OffsetIndex::load(const std::string &url, gulong wc, off_t fsize, bool verbose)
 {
    wordcount = wc;
    gulong npages = (wc - 1) / ENTR_PER_PAGE + 2;
    wordoffset.resize(npages);
-    if (!load_cache(url)) { //map file will close after finish of block
+    if (!load_cache(url)) { // map file will close after finish of block
        MapFile map_file;
        if (!map_file.open(url.c_str(), fsize))
            return false;
@@ -756,10 +742,10 @@ bool OffsetIndex::lookup(const char *str, std::set<glong> &idxs, glong &next_idx
    }
    if (!bFound)
-        next_idx = iPage*ENTR_PER_PAGE + iFrom; // next
+        next_idx = iPage * ENTR_PER_PAGE + iFrom; // next
    else {
        // Convert the found in-page index to the dict index.
-        iThisIndex = iPage*ENTR_PER_PAGE + iThisIndex;
+        iThisIndex = iPage * ENTR_PER_PAGE + iThisIndex;
        // In order to return all idxs that match the search string, walk
        // linearly behind and ahead of the found index.
        glong iHeadIndex = iThisIndex - 1; // do not include iThisIndex
@@ -772,7 +758,7 @@ bool OffsetIndex::lookup(const char *str, std::set<glong> &idxs, glong &next_idx
    return bFound;
 }
-bool WordListIndex::load(const std::string &url, gulong wc, gulong fsize, bool)
+bool WordListIndex::load(const std::string &url, gulong wc, off_t fsize, bool)
 {
    gzFile in = gzopen(url.c_str(), "rb");
    if (in == nullptr)
@@ -785,7 +771,7 @@ bool WordListIndex::load(const std::string &url, gulong wc, gulong fsize, bool)
    if (len < 0)
        return false;
-    if (gulong(len) != fsize)
+    if (static_cast<off_t>(len) != fsize)
        return false;
    wordlist.resize(wc + 1);
@@ -834,7 +820,7 @@ bool WordListIndex::lookup(const char *str, std::set<glong> &idxs, glong &next_i
            }
        }
        if (!bFound)
-            next_idx = iFrom; //next
+            next_idx = iFrom; // next
        else {
            // In order to return all idxs that match the search string, walk
            // linearly behind and ahead of the found index.
@@ -905,19 +891,19 @@ bool SynFile::lookup(const char *str, std::set<glong> &idxs, glong &next_idx)
            }
        }
        if (!bFound)
-            next_idx = iFrom; //next
+            next_idx = iFrom; // next
        else {
            // In order to return all idxs that match the search string, walk
            // linearly behind and ahead of the found index.
            glong iHeadIndex = iThisIndex - 1; // do not include iThisIndex
            while (iHeadIndex >= 0 && stardict_strcmp(str, get_key(iHeadIndex)) == 0) {
                const gchar *key = get_key(iHeadIndex--);
-                idxs.insert(g_ntohl(get_uint32(key+strlen(key)+1)));
+                idxs.insert(g_ntohl(get_uint32(key + strlen(key) + 1)));
            }
            do {
                // no need to double-check iThisIndex -- we know it's a match already
                const gchar *key = get_key(iThisIndex++);
-                idxs.insert(g_ntohl(get_uint32(key+strlen(key)+1)));
+                idxs.insert(g_ntohl(get_uint32(key + strlen(key) + 1)));
            } while (iThisIndex <= iLast && stardict_strcmp(str, get_key(iThisIndex)) == 0);
        }
    }
@@ -934,7 +920,7 @@ bool Dict::Lookup(const char *str, std::set<glong> &idxs, glong &next_idx)
 bool Dict::load(const std::string &ifofilename, bool verbose)
 {
-    gulong idxfilesize;
+    off_t idxfilesize;
    if (!load_ifofile(ifofilename, idxfilesize))
        return false;
@@ -944,14 +930,14 @@ bool Dict::load(const std::string &ifofilename, bool verbose)
    if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) {
        dictdzfile.reset(new DictData);
        if (!dictdzfile->open(fullfilename, 0)) {
-            //g_print("open file %s failed!\n",fullfilename);
+            // g_print("open file %s failed!\n",fullfilename);
            return false;
        }
    } else {
        fullfilename.erase(fullfilename.length() - sizeof(".dz") + 1, sizeof(".dz") - 1);
        dictfile = fopen(fullfilename.c_str(), "rb");
        if (!dictfile) {
-            //g_print("open file %s failed!\n",fullfilename);
+            // g_print("open file %s failed!\n",fullfilename);
            return false;
        }
    }
@@ -974,11 +960,11 @@ bool Dict::load(const std::string &ifofilename, bool verbose)
    syn_file.reset(new SynFile);
    syn_file->load(fullfilename, syn_wordcount);
-    //g_print("bookname: %s , wordcount %lu\n", bookname.c_str(), narticles());
+    // g_print("bookname: %s , wordcount %lu\n", bookname.c_str(), narticles());
    return true;
 }
-bool Dict::load_ifofile(const std::string &ifofilename, gulong &idxfilesize)
+bool Dict::load_ifofile(const std::string &ifofilename, off_t &idxfilesize)
 {
    DictInfo dict_info;
    if (!dict_info.load_from_ifo_file(ifofilename, false))
@@ -1082,7 +1068,7 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
        gchar *sNewWord = (gchar *)g_malloc(iWordLen + 1);
-        //cut one char "s" or "d"
+        // cut one char "s" or "d"
        if (!bFound && iWordLen > 1) {
            isupcase = sWord[iWordLen - 1] == 'S' || !strncmp(&sWord[iWordLen - 2], "ED", 2);
            if (isupcase || sWord[iWordLen - 1] == 's' || !strncmp(&sWord[iWordLen - 2], "ed", 2)) {
@@ -1101,14 +1087,14 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
            }
        }
-        //cut "ly"
+        // cut "ly"
        if (!bFound && iWordLen > 2) {
            isupcase = !strncmp(&sWord[iWordLen - 2], "LY", 2);
            if (isupcase || (!strncmp(&sWord[iWordLen - 2], "ly", 2))) {
                strcpy(sNewWord, sWord);
                sNewWord[iWordLen - 2] = '\0'; // cut "ly"
                if (iWordLen > 5 && sNewWord[iWordLen - 3] == sNewWord[iWordLen - 4]
-                    && !bIsVowel(sNewWord[iWordLen - 4]) && bIsVowel(sNewWord[iWordLen - 5])) { //doubled
+                    && !bIsVowel(sNewWord[iWordLen - 4]) && bIsVowel(sNewWord[iWordLen - 5])) { // doubled
                    sNewWord[iWordLen - 3] = '\0';
                    if (oLib[iLib]->Lookup(sNewWord, iWordIndices))
@@ -1123,7 +1109,7 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
                            g_free(casestr);
                        }
                        if (!bFound)
-                            sNewWord[iWordLen - 3] = sNewWord[iWordLen - 4]; //restore
+                            sNewWord[iWordLen - 3] = sNewWord[iWordLen - 4]; // restore
                    }
                }
                if (!bFound) {
@@ -1141,14 +1127,14 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
            }
        }
-        //cut "ing"
+        // cut "ing"
        if (!bFound && iWordLen > 3) {
            isupcase = !strncmp(&sWord[iWordLen - 3], "ING", 3);
            if (isupcase || !strncmp(&sWord[iWordLen - 3], "ing", 3)) {
                strcpy(sNewWord, sWord);
                sNewWord[iWordLen - 3] = '\0';
                if (iWordLen > 6 && (sNewWord[iWordLen - 4] == sNewWord[iWordLen - 5])
-                    && !bIsVowel(sNewWord[iWordLen - 5]) && bIsVowel(sNewWord[iWordLen - 6])) { //doubled
+                    && !bIsVowel(sNewWord[iWordLen - 5]) && bIsVowel(sNewWord[iWordLen - 6])) { // doubled
                    sNewWord[iWordLen - 4] = '\0';
                    if (oLib[iLib]->Lookup(sNewWord, iWordIndices))
                        bFound = true;
@@ -1162,7 +1148,7 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
                            g_free(casestr);
                        }
                        if (!bFound)
-                            sNewWord[iWordLen - 4] = sNewWord[iWordLen - 5]; //restore
+                            sNewWord[iWordLen - 4] = sNewWord[iWordLen - 5]; // restore
                    }
                }
                if (!bFound) {
@@ -1196,7 +1182,7 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
            }
        }
-        //cut two char "es"
+        // cut two char "es"
        if (!bFound && iWordLen > 3) {
            isupcase = (!strncmp(&sWord[iWordLen - 2], "ES", 2) && (sWord[iWordLen - 3] == 'S' || sWord[iWordLen - 3] == 'X' || sWord[iWordLen - 3] == 'O' || (iWordLen > 4 && sWord[iWordLen - 3] == 'H' && (sWord[iWordLen - 4] == 'C' || sWord[iWordLen - 4] == 'S'))));
            if (isupcase || (!strncmp(&sWord[iWordLen - 2], "es", 2) && (sWord[iWordLen - 3] == 's' || sWord[iWordLen - 3] == 'x' || sWord[iWordLen - 3] == 'o' || (iWordLen > 4 && sWord[iWordLen - 3] == 'h' && (sWord[iWordLen - 4] == 'c' || sWord[iWordLen - 4] == 's'))))) {
@@ -1215,14 +1201,14 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
            }
        }
-        //cut "ed"
+        // cut "ed"
        if (!bFound && iWordLen > 3) {
            isupcase = !strncmp(&sWord[iWordLen - 2], "ED", 2);
            if (isupcase || !strncmp(&sWord[iWordLen - 2], "ed", 2)) {
                strcpy(sNewWord, sWord);
                sNewWord[iWordLen - 2] = '\0';
                if (iWordLen > 5 && (sNewWord[iWordLen - 3] == sNewWord[iWordLen - 4])
-                    && !bIsVowel(sNewWord[iWordLen - 4]) && bIsVowel(sNewWord[iWordLen - 5])) { //doubled
+                    && !bIsVowel(sNewWord[iWordLen - 4]) && bIsVowel(sNewWord[iWordLen - 5])) { // doubled
                    sNewWord[iWordLen - 3] = '\0';
                    if (oLib[iLib]->Lookup(sNewWord, iWordIndices))
                        bFound = true;
@@ -1236,7 +1222,7 @@ bool Libs::LookupSimilarWord(const gchar *sWord, std::set<glong> &iWordIndices,
                            g_free(casestr);
                        }
                        if (!bFound)
-                            sNewWord[iWordLen - 3] = sNewWord[iWordLen - 4]; //restore
+                            sNewWord[iWordLen - 3] = sNewWord[iWordLen - 4]; // restore
                    }
                }
                if (!bFound) {
@@ -1386,8 +1372,8 @@ bool Libs::LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_si
        if (progress_func)
            progress_func();
-        //if (stardict_strcmp(sWord, poGetWord(0,iLib))>=0 && stardict_strcmp(sWord, poGetWord(narticles(iLib)-1,iLib))<=0) {
+        // if (stardict_strcmp(sWord, poGetWord(0,iLib))>=0 && stardict_strcmp(sWord, poGetWord(narticles(iLib)-1,iLib))<=0) {
-        //there are Chinese dicts and English dicts...
+        // there are Chinese dicts and English dicts...
        const int iwords = narticles(iLib);
        for (int index = 0; index < iwords; index++) {
@@ -1409,11 +1395,11 @@ bool Libs::LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_si
                bool bAlreadyInList = false;
                int iMaxDistanceAt = 0;
                for (int j = 0; j < reslist_size; j++) {
-                    if (oFuzzystruct[j].pMatchWord && strcmp(oFuzzystruct[j].pMatchWord, sCheck) == 0) { //already in list
+                    if (oFuzzystruct[j].pMatchWord && strcmp(oFuzzystruct[j].pMatchWord, sCheck) == 0) { // already in list
                        bAlreadyInList = true;
                        break;
                    }
-                    //find the position,it will certainly be found (include the first time) as iMaxDistance is set by last time.
+                    // find the position,it will certainly be found (include the first time) as iMaxDistance is set by last time.
                    if (oFuzzystruct[j].iMatchWordDistance == iMaxDistance) {
                        iMaxDistanceAt = j;
                    }
@@ -1460,8 +1446,8 @@ gint Libs::LookupWithRule(const gchar *word, gchar **ppMatchWord)
    GPatternSpec *pspec = g_pattern_spec_new(word);
    for (std::vector<Dict *>::size_type iLib = 0; iLib < oLib.size(); iLib++) {
-        //if(oLibs.LookdupWordsWithRule(pspec,aiIndex,MAX_MATCH_ITEM_PER_LIB+1-iMatchCount,iLib))
+        // if(oLibs.LookdupWordsWithRule(pspec,aiIndex,MAX_MATCH_ITEM_PER_LIB+1-iMatchCount,iLib))
-        // -iMatchCount,so save time,but may got less result and the word may repeat.
+        //  -iMatchCount,so save time,but may got less result and the word may repeat.
        if (oLib[iLib]->LookupWithRule(pspec, aiIndex, MAX_MATCH_ITEM_PER_LIB + 1)) {
            if (progress_func)
@@ -1470,7 +1456,7 @@ gint Libs::LookupWithRule(const gchar *word, gchar **ppMatchWord)
                const gchar *sMatchWord = poGetWord(aiIndex[i], iLib);
                bool bAlreadyInList = false;
                for (int j = 0; j < iMatchCount; j++) {
-                    if (strcmp(ppMatchWord[j], sMatchWord) == 0) { //already in list
+                    if (strcmp(ppMatchWord[j], sMatchWord) == 0) { // already in list
                        bAlreadyInList = true;
                        break;
                    }
--- a/src/stardict_lib.hpp
+++ b/src/stardict_lib.hpp
@@ -1,10 +1,8 @@
 #pragma once
 #include <cstdio>
 #include <cstring>
 #include <functional>
 #include <list>
 #include <map>
 #include <memory>
 #include <set>
 #include <string>
@@ -30,7 +28,7 @@ inline void set_uint32(gchar *addr, guint32 val)
 struct cacheItem {
    guint32 offset;
    gchar *data;
-    //write code here to make it inline
+    // write code here to make it inline
    cacheItem() { data = nullptr; }
    ~cacheItem() { g_free(data); }
 };
@@ -68,7 +66,7 @@ private:
    gint cache_cur = 0;
 };
-//this structure contain all information about dictionary
+// this structure contain all information about dictionary
 struct DictInfo {
    std::string ifo_file_name;
    guint32 wordcount;
@@ -79,8 +77,8 @@ struct DictInfo {
    std::string website;
    std::string date;
    std::string description;
-    guint32 index_file_size;
+    off_t index_file_size;
-    guint32 syn_file_size;
+    off_t syn_file_size;
    std::string sametypesequence;
    bool load_from_ifo_file(const std::string &ifofilename, bool istreedict);
@@ -93,12 +91,13 @@ public:
    guint32 wordentry_size;
    virtual ~IIndexFile() {}
-    virtual bool load(const std::string &url, gulong wc, gulong fsize, bool verbose) = 0;
+    virtual bool load(const std::string &url, gulong wc, off_t fsize, bool verbose) = 0;
    virtual const gchar *get_key(glong idx) = 0;
    virtual void get_data(glong idx) = 0;
    virtual const gchar *get_key_and_data(glong idx) = 0;
    virtual bool lookup(const char *str, std::set<glong> &idxs, glong &next_idx) = 0;
-    virtual bool lookup(const char *str, std::set<glong> &idxs) {
+    virtual bool lookup(const char *str, std::set<glong> &idxs)
    {
        glong unused_next_idx;
        return lookup(str, idxs, unused_next_idx);
    };
@@ -144,7 +143,8 @@ public:
        *size = idx_file->wordentry_size;
    }
    bool Lookup(const char *str, std::set<glong> &idxs, glong &next_idx);
-    bool Lookup(const char *str, std::set<glong> &idxs) {
+    bool Lookup(const char *str, std::set<glong> &idxs)
    {
        glong unused_next_idx;
        return Lookup(str, idxs, unused_next_idx);
    }
@@ -160,7 +160,7 @@ private:
    std::unique_ptr<IIndexFile> idx_file;
    std::unique_ptr<SynFile> syn_file;
-    bool load_ifofile(const std::string &ifofilename, gulong &idxfilesize);
+    bool load_ifofile(const std::string &ifofilename, off_t &idxfilesize);
 };
 class Libs
@@ -169,7 +169,7 @@ public:
    Libs(std::function<void(void)> f = std::function<void(void)>())
    {
        progress_func = f;
-        iMaxFuzzyDistance = MAX_FUZZY_DISTANCE; //need to read from cfg.
+        iMaxFuzzyDistance = MAX_FUZZY_DISTANCE; // need to read from cfg.
    }
    void setVerbose(bool verbose) { verbose_ = verbose; }
    void setFuzzy(bool fuzzy) { fuzzy_ = fuzzy; }
--- a/tests/not-unix-newlines-ifo/russian/russian.dict
+++ b/tests/not-unix-newlines-ifo/russian/russian.dict
--- a/tests/not-unix-newlines-ifo/russian/russian.idx
+++ b/tests/not-unix-newlines-ifo/russian/russian.idx
--- a/tests/not-unix-newlines-ifo/russian/russian.ifo
+++ b/tests/not-unix-newlines-ifo/russian/russian.ifo
@@ -0,0 +1,9 @@
 StarDict's dict ifo file
 version=3.0.0
 bookname=Russian-English Dictionary (ru-en)
 wordcount=415144
 idxfilesize=12344255
 sametypesequence=h
 synwordcount=1277580
 author=Vuizur
 description=
--- a/tests/not-unix-newlines-ifo/russian/russian.syn
+++ b/tests/not-unix-newlines-ifo/russian/russian.syn
--- a/tests/t_json
+++ b/tests/t_json
@@ -18,7 +18,8 @@ test_json() {
    fi
 }
-test_json '[{"name": "Test synonyms", "wordcount": "2"},
+test_json '[{"name": "Russian-English Dictionary (ru-en)", "wordcount": "415144"},
            {"name": "Test synonyms", "wordcount": "2"},
            {"name": "Test multiple results", "wordcount": "246"},
            {"name": "Sample 1 test dictionary", "wordcount": "1"},
            {"name": "test_dict", "wordcount": "1"}]' -x -j -l -n --data-dir "$TEST_DIR"
--- a/tests/t_newlines_in_ifo
+++ b/tests/t_newlines_in_ifo
@@ -0,0 +1,18 @@
 #!/bin/sh
 set -e
 PATH_TO_SDCV="$1"
 TEST_DIR="$2"
 unset SDCV_PAGER
 unset STARDICT_DATA_DIR
 RES=$("$PATH_TO_SDCV" -n -x --data-dir="$TEST_DIR/not-unix-newlines-ifo" -l | tail -n 1)
 if [ "$RES" = "Russian-English Dictionary (ru-en)    415144" ]; then
  exit 0
 else
  echo "test failed, unexpected result: $RES" >&2
  exit 1
 fi
Author	SHA1	Message	Date
Evgeniy A. Dushistov	49c8094b53	version 0.5.5	2023-04-18 21:47:55 +03:00
Evgeniy A. Dushistov	4346e65bd3	fix CI build: ubuntu-18.04 not supported by github actions anymore	2023-04-18 21:44:18 +03:00
Evgeniy A. Dushistov	d144e0310c	fix CI build	2023-01-16 16:44:09 +03:00
NiLuJe	6e36e7730c	Warn on unknown dicts	2022-09-16 18:48:08 +03:00
NiLuJe	abe5e9e72f	Check accesses to the bookname_to_ifo std::map Avoid crashes when passing unknown dicts to the -u flag Fix #87	2022-09-16 18:48:08 +03:00
NiLuJe	488ec68854	Use off_t for stuff mainly assigned to a stat.st_size value Allows simplifying the mmap sanity checks in mapfile, and actually ensuring they won't break when -D_FILE_OFFSET_BITS=64	2022-09-14 22:12:29 +03:00
Marcelino Alberdi Pereira	b698445ead	Add a small summary of the project to the README	2022-09-07 17:51:13 +03:00
Evgeniy A. Dushistov	504e7807e6	add information about 0.5.4 into NEWS	2022-06-24 21:49:00 +03:00
Evgeniy A. Dushistov	6c80bf2d99	t_json: add data about new dictionary	2022-06-24 21:34:47 +03:00
Evgeniy A. Dushistov	8742575c33	fix bash syntax error	2022-06-24 21:34:47 +03:00
Evgeniy A. Dushistov	b294b76fb5	check file size before mapping on linux	2022-06-24 21:34:47 +03:00
Evgeniy A. Dushistov	823ec3d840	clang-format for mapfile	2022-06-24 21:34:47 +03:00
Evgeniy A. Dushistov	6ab8b51e6c	version 0.5.4	2022-06-24 21:34:47 +03:00
Evgeniy A. Dushistov	881657b336	Revert "replace deprecated g_pattern_match_string function" This reverts commit `452a4e07fb`.	2022-06-24 21:34:47 +03:00
Evgeniy A. Dushistov	911fc2f561	more robust parsing of ifo file fixes #79 fixes #81	2022-06-24 21:34:47 +03:00
Evgeniy A. Dushistov	f488f5350b	stardict_lib.hpp: remove unused headers plus clang-format	2022-06-24 21:34:47 +03:00
Evgeniy A. Dushistov	e72220e748	use cmake to check if compiler supports c++11	2022-06-24 21:34:47 +03:00
Evgeniy A. Dushistov	b77c0e793a	replace deprecated g_pattern_match_string function	2022-06-24 21:34:47 +03:00
Evgeniy A. Dushistov	ebaa6f2136	clang-format for stardict_lib.cpp	2022-06-24 21:34:47 +03:00