t_json: add data about new dictionary

fix bash syntax error
check file size before mapping on linux
2025-12-15 17:31:56 +00:00 · 2022-06-24 21:33:33 +03:00 · 2022-06-24 21:26:10 +03:00 · 2022-06-24 21:25:55 +03:00 · 2022-06-24 21:24:03 +03:00 · 2022-06-24 20:57:57 +03:00
12 changed files with 162 additions and 150 deletions
--- a/.clang-format
+++ b/.clang-format
@@ -15,7 +15,7 @@ BreakBeforeBinaryOperators: true
 BreakBeforeTernaryOperators: true
 BreakConstructorInitializersBeforeComma: true
 BinPackParameters: true
-ColumnLimit:     0
+ColumnLimit:     120
 ConstructorInitializerAllOnOneLineOrOnePerLine: false
 DerivePointerAlignment: false
 ExperimentalAutoDetectBinPacking: false
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,6 +3,10 @@ project(sdcv)
 cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
 cmake_policy(VERSION 3.5)

+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD_REQUIRED True)
+set(CMAKE_CXX_EXTENSIONS False)
+
 include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/compiler.cmake")

 set(ZLIB_FIND_REQUIRED True)
@@ -91,7 +95,7 @@ set(CPACK_PACKAGE_VENDOR "Evgeniy Dushistov <dushistov@mail.ru>")
 set(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README.org")
 set(CPACK_PACKAGE_VERSION_MAJOR "0")
 set(CPACK_PACKAGE_VERSION_MINOR "5")
-set(CPACK_PACKAGE_VERSION_PATCH "3")
+set(CPACK_PACKAGE_VERSION_PATCH "4")

 set(sdcv_VERSION
 	"${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
@@ -144,5 +148,6 @@ if (BUILD_TESTS)
  add_sdcv_shell_test(t_datadir)
  add_sdcv_shell_test(t_return_code)
  add_sdcv_shell_test(t_multiple_results)
+  add_sdcv_shell_test(t_newlines_in_ifo)

 endif (BUILD_TESTS)
--- a/cmake/compiler.cmake
+++ b/cmake/compiler.cmake
@@ -16,19 +16,6 @@ if (NOT DEFINED SDCV_COMPILER_IS_GCC_COMPATIBLE)
  endif()
 endif()

-if (MSVC AND (MSVC_VERSION LESS 1900))
-  message(FATAL_ERROR "MSVC version ${MSVC_VERSION} have no full c++11 support")
-elseif (MSVC)
-  add_definitions(-DNOMINMAX)
-elseif (NOT MSVC)
-  check_cxx_compiler_flag("-std=c++11" CXX_SUPPORTS_CXX11)
-  if (CXX_SUPPORTS_CXX11)
-    append("-std=c++11" CMAKE_CXX_FLAGS)
-  else ()
-    message(FATAL_ERROR "sdcv requires C++11 support but the '-std=c++11' flag isn't supported.")
-  endif()
-endif ()
-
 if (SDCV_COMPILER_IS_GCC_COMPATIBLE)
  append("-Wall" "-Wextra" "-Wformat-security" "-Wcast-align" "-Werror=format" "-Wcast-qual" CMAKE_C_FLAGS)
  append("-Wall" "-pedantic" "-Wextra" "-Wformat-security" "-Wcast-align" "-Werror=format" "-Wcast-qual" CMAKE_CXX_FLAGS)
--- a/src/mapfile.hpp
+++ b/src/mapfile.hpp
@@ -7,6 +7,7 @@
 #ifdef HAVE_MMAP
 #include <fcntl.h>
 #include <sys/mman.h>
+#include <sys/stat.h>
 #include <sys/types.h>
 #endif
 #ifdef _WIN32
@@ -43,6 +44,13 @@ inline bool MapFile::open(const char *file_name, unsigned long file_size)
        // g_print("Open file %s failed!\n",fullfilename);
        return false;
    }
+    struct stat st;
+    if (fstat(mmap_fd, &st) == -1 || st.st_size < 0 || (st.st_size == 0 && S_ISREG(st.st_mode))
+        || sizeof(st.st_size) > sizeof(file_size) || static_cast<unsigned long>(st.st_size) != file_size) {
+        close(mmap_fd);
+        return false;
+    }
+
    data = (gchar *)mmap(nullptr, file_size, PROT_READ, MAP_SHARED, mmap_fd, 0);
    if ((void *)data == (void *)(-1)) {
        // g_print("mmap file %s failed!\n",idxfilename);
@@ -50,10 +58,8 @@ inline bool MapFile::open(const char *file_name, unsigned long file_size)
        return false;
    }
 #elif defined(_WIN32)
-    hFile = CreateFile(file_name, GENERIC_READ, 0, nullptr, OPEN_ALWAYS,
-                       FILE_ATTRIBUTE_NORMAL, 0);
-    hFileMap = CreateFileMapping(hFile, nullptr, PAGE_READONLY, 0,
-                                 file_size, nullptr);
+    hFile = CreateFile(file_name, GENERIC_READ, 0, nullptr, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0);
+    hFileMap = CreateFileMapping(hFile, nullptr, PAGE_READONLY, 0, file_size, nullptr);
    data = (gchar *)MapViewOfFile(hFileMap, FILE_MAP_READ, 0, 0, file_size);
 #else
    gsize read_len;
--- a/src/stardict_lib.cpp
+++ b/src/stardict_lib.cpp
@@ -5,6 +5,7 @@
 #include <algorithm>
 #include <cctype>
 #include <cstring>
+#include <map>
 #include <stdexcept>

 #include <glib/gstdio.h>
@@ -78,108 +79,93 @@ bool DictInfo::load_from_ifo_file(const std::string &ifofilename,
 {
    ifo_file_name = ifofilename;
    glib::CharStr buffer;
-    if (!g_file_get_contents(ifofilename.c_str(), get_addr(buffer), nullptr, nullptr))
+    gsize length = 0;
+    if (!g_file_get_contents(ifofilename.c_str(), get_addr(buffer), &length, nullptr)) {
+        fprintf(stderr, "Can not read from %s\n", ifofilename.c_str());
        return false;
+    }

    static const char TREEDICT_MAGIC_DATA[] = "StarDict's treedict ifo file";
    static const char DICT_MAGIC_DATA[] = "StarDict's dict ifo file";

    const gchar *magic_data = istreedict ? TREEDICT_MAGIC_DATA : DICT_MAGIC_DATA;
-    static const unsigned char utf8_bom[] = { 0xEF, 0xBB, 0xBF, '\0' };
-    if (!g_str_has_prefix(
-            g_str_has_prefix(get_impl(buffer), (const gchar *)(utf8_bom)) ? get_impl(buffer) + 3 : get_impl(buffer),
-            magic_data)) {
+    static const gchar utf8_bom[] = { (gchar)0xEF, (gchar)0xBB, (gchar)0xBF, '\0' };
+
+    const gchar *p = get_impl(buffer);
+    const gchar *end = p + length;
+
+    if (g_str_has_prefix(p, utf8_bom)) {
+        p += strlen(utf8_bom);
+    }
+    if (!g_str_has_prefix(p, magic_data)) {
+        fprintf(stderr, "No magic header(%s) in ifo file\n", magic_data);
        return false;
    }
+    p += strlen(magic_data);

-    gchar *p1 = get_impl(buffer) + strlen(magic_data) - 1;
-
-    gchar *p2 = strstr(p1, "\nwordcount=");
-    if (p2 == nullptr)
+    std::map<std::string, std::string> key_value_map;
+    while (p != end) {
+        auto key_it = std::find_if(p, end, [](gchar ch) { return !g_ascii_isspace(ch); });
+        if (key_it == end) {
+            break;
+        }
+        auto eq_it = std::find(key_it, end, gchar('='));
+        if (eq_it == end) {
+            fprintf(stderr, "Invalid part of ifo (no '=') here: %s\n", key_it);
            return false;
+        }
+        auto val_it = std::find_if(eq_it + 1, end, [](gchar ch) { return !g_ascii_isspace(ch); });
+        if (val_it == end) {
+            key_value_map.insert(std::make_pair(std::string(key_it, eq_it), std::string()));
+            break;
+        }

-    gchar *p3 = strchr(p2 + sizeof("\nwordcount=") - 1, '\n');
+        auto line_end_it = std::find_if(val_it, end, [](gchar ch) { return ch == '\r' || ch == '\n'; });
+        key_value_map.insert(std::make_pair(std::string(key_it, eq_it), std::string(val_it, line_end_it)));
+        if (line_end_it == end)
+            break;
+        p = line_end_it + 1;
+    }

-    wordcount = atol(std::string(p2 + sizeof("\nwordcount=") - 1, p3 - (p2 + sizeof("\nwordcount=") - 1)).c_str());
+    std::map<std::string, std::string>::const_iterator it;
+#define FIND_KEY(_key_)                                            \
+    it = key_value_map.find(_key_);                                \
+    if (it == key_value_map.end()) {                               \
+        fprintf(stderr, "Can not find '%s' in ifo file\n", _key_); \
+        return false;                                              \
+    }
+
+    FIND_KEY("wordcount")
+    wordcount = atol(it->second.c_str());

    if (istreedict) {
-        p2 = strstr(p1, "\ntdxfilesize=");
-        if (p2 == nullptr)
-            return false;
-
-        p3 = strchr(p2 + sizeof("\ntdxfilesize=") - 1, '\n');
-
-        index_file_size = atol(std::string(p2 + sizeof("\ntdxfilesize=") - 1, p3 - (p2 + sizeof("\ntdxfilesize=") - 1)).c_str());
-
+        FIND_KEY("tdxfilesize")
+        index_file_size = atol(it->second.c_str());
    } else {
+        FIND_KEY("idxfilesize")
+        index_file_size = atol(it->second.c_str());
+    }
+    FIND_KEY("bookname")
+    bookname = it->second;

-        p2 = strstr(p1, "\nidxfilesize=");
-        if (p2 == nullptr)
-            return false;
-
-        p3 = strchr(p2 + sizeof("\nidxfilesize=") - 1, '\n');
-        index_file_size = atol(std::string(p2 + sizeof("\nidxfilesize=") - 1, p3 - (p2 + sizeof("\nidxfilesize=") - 1)).c_str());
+#define SET_IF_EXISTS(_key_)         \
+    it = key_value_map.find(#_key_); \
+    if (it != key_value_map.end()) { \
+        _key_ = it->second;          \
    }

-    p2 = strstr(p1, "\nbookname=");
-
-    if (p2 == nullptr)
-        return false;
-
-    p2 = p2 + sizeof("\nbookname=") - 1;
-    p3 = strchr(p2, '\n');
-    bookname.assign(p2, p3 - p2);
-
-    p2 = strstr(p1, "\nauthor=");
-    if (p2) {
-        p2 = p2 + sizeof("\nauthor=") - 1;
-        p3 = strchr(p2, '\n');
-        author.assign(p2, p3 - p2);
-    }
-
-    p2 = strstr(p1, "\nemail=");
-    if (p2) {
-        p2 = p2 + sizeof("\nemail=") - 1;
-        p3 = strchr(p2, '\n');
-        email.assign(p2, p3 - p2);
-    }
-
-    p2 = strstr(p1, "\nwebsite=");
-    if (p2) {
-        p2 = p2 + sizeof("\nwebsite=") - 1;
-        p3 = strchr(p2, '\n');
-        website.assign(p2, p3 - p2);
-    }
-
-    p2 = strstr(p1, "\ndate=");
-    if (p2) {
-        p2 = p2 + sizeof("\ndate=") - 1;
-        p3 = strchr(p2, '\n');
-        date.assign(p2, p3 - p2);
-    }
-
-    p2 = strstr(p1, "\ndescription=");
-    if (p2) {
-        p2 = p2 + sizeof("\ndescription=") - 1;
-        p3 = strchr(p2, '\n');
-        description.assign(p2, p3 - p2);
-    }
-
-    p2 = strstr(p1, "\nsametypesequence=");
-    if (p2) {
-        p2 += sizeof("\nsametypesequence=") - 1;
-        p3 = strchr(p2, '\n');
-        sametypesequence.assign(p2, p3 - p2);
-    }
-
-    p2 = strstr(p1, "\nsynwordcount=");
+    SET_IF_EXISTS(author)
+    SET_IF_EXISTS(email)
+    SET_IF_EXISTS(website)
+    SET_IF_EXISTS(date)
+    SET_IF_EXISTS(description)
+    SET_IF_EXISTS(sametypesequence)
    syn_wordcount = 0;
-    if (p2) {
-        p2 += sizeof("\nsynwordcount=") - 1;
-        p3 = strchr(p2, '\n');
-        syn_wordcount = atol(std::string(p2, p3 - p2).c_str());
-    }
-
+    it = key_value_map.find("synwordcount");
+    if (it != key_value_map.end())
+        syn_wordcount = atol(it->second.c_str());
+#undef FIND_KEY
+#undef SET_IF_EXISTS
    return true;
 }

--- a/src/stardict_lib.hpp
+++ b/src/stardict_lib.hpp
@@ -1,10 +1,8 @@
 #pragma once

-#include <cstdio>
 #include <cstring>
 #include <functional>
 #include <list>
-#include <map>
 #include <memory>
 #include <set>
 #include <string>
@@ -98,7 +96,8 @@ public:
    virtual void get_data(glong idx) = 0;
    virtual const gchar *get_key_and_data(glong idx) = 0;
    virtual bool lookup(const char *str, std::set<glong> &idxs, glong &next_idx) = 0;
-    virtual bool lookup(const char *str, std::set<glong> &idxs) {
+    virtual bool lookup(const char *str, std::set<glong> &idxs)
+    {
        glong unused_next_idx;
        return lookup(str, idxs, unused_next_idx);
    };
@@ -144,7 +143,8 @@ public:
        *size = idx_file->wordentry_size;
    }
    bool Lookup(const char *str, std::set<glong> &idxs, glong &next_idx);
-    bool Lookup(const char *str, std::set<glong> &idxs) {
+    bool Lookup(const char *str, std::set<glong> &idxs)
+    {
        glong unused_next_idx;
        return Lookup(str, idxs, unused_next_idx);
    }
--- a/tests/not-unix-newlines-ifo/russian/russian.dict
+++ b/tests/not-unix-newlines-ifo/russian/russian.dict
--- a/tests/not-unix-newlines-ifo/russian/russian.idx
+++ b/tests/not-unix-newlines-ifo/russian/russian.idx
--- a/tests/not-unix-newlines-ifo/russian/russian.ifo
+++ b/tests/not-unix-newlines-ifo/russian/russian.ifo
@@ -0,0 +1,9 @@
+StarDict's dict ifo file
+version=3.0.0
+bookname=Russian-English Dictionary (ru-en)
+wordcount=415144
+idxfilesize=12344255
+sametypesequence=h
+synwordcount=1277580
+author=Vuizur
+description=
--- a/tests/not-unix-newlines-ifo/russian/russian.syn
+++ b/tests/not-unix-newlines-ifo/russian/russian.syn
--- a/tests/t_json
+++ b/tests/t_json
@@ -18,7 +18,8 @@ test_json() {
    fi
 }

-test_json '[{"name": "Test synonyms", "wordcount": "2"},
+test_json '[{"name": "Russian-English Dictionary (ru-en)", "wordcount": "415144"},
+            {"name": "Test synonyms", "wordcount": "2"},
            {"name": "Test multiple results", "wordcount": "246"},
            {"name": "Sample 1 test dictionary", "wordcount": "1"},
            {"name": "test_dict", "wordcount": "1"}]' -x -j -l -n --data-dir "$TEST_DIR"
--- a/tests/t_newlines_in_ifo
+++ b/tests/t_newlines_in_ifo
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+set -e
+
+PATH_TO_SDCV="$1"
+TEST_DIR="$2"
+
+unset SDCV_PAGER
+unset STARDICT_DATA_DIR
+
+RES=$("$PATH_TO_SDCV" -n -x --data-dir="$TEST_DIR/not-unix-newlines-ifo" -l | tail -n 1)
+
+if [ "$RES" = "Russian-English Dictionary (ru-en)    415144" ]; then
+  exit 0
+else
+  echo "test failed, unexpected result: $RES" >&2
+  exit 1
+fi
Author	SHA1	Message	Date
Evgeniy A. Dushistov	eeee360fb0	t_json: add data about new dictionary	2022-06-24 21:33:33 +03:00
Evgeniy A. Dushistov	f69973e1fa	fix bash syntax error	2022-06-24 21:26:10 +03:00
Evgeniy A. Dushistov	931fc98478	check file size before mapping on linux	2022-06-24 21:25:55 +03:00
Evgeniy A. Dushistov	6f30be7815	clang-format for mapfile	2022-06-24 21:24:03 +03:00
Evgeniy A. Dushistov	1a926d1b69	version 0.5.4	2022-06-24 20:57:57 +03:00
Evgeniy A. Dushistov	e89cfa18b1	Revert "replace deprecated g_pattern_match_string function" This reverts commit `452a4e07fb`.	2022-06-24 20:57:57 +03:00
Evgeniy A. Dushistov	12d9ea5b97	more robust parsing of ifo file fixes #79 fixes #81	2022-06-24 20:54:30 +03:00
Evgeniy A. Dushistov	920c2bafb9	stardict_lib.hpp: remove unused headers plus clang-format	2022-06-24 20:53:53 +03:00
Evgeniy A. Dushistov	5d2332b0cb	use cmake to check if compiler supports c++11	2022-06-24 20:10:43 +03:00
Evgeniy A. Dushistov	452a4e07fb	replace deprecated g_pattern_match_string function	2022-06-24 20:06:54 +03:00
Evgeniy A. Dushistov	59ef936288	clang-format for stardict_lib.cpp	2022-06-24 20:03:45 +03:00