more robust parsing of ifo file

fixes #79 fixes #81
2025-12-15 17:31:56 +00:00 · 2022-06-24 20:54:30 +03:00
parent 920c2bafb9
commit 12d9ea5b97
7 changed files with 95 additions and 83 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -148,5 +148,6 @@ if (BUILD_TESTS)
  add_sdcv_shell_test(t_datadir)
  add_sdcv_shell_test(t_return_code)
  add_sdcv_shell_test(t_multiple_results)
  add_sdcv_shell_test(t_newlines_in_ifo)
 endif (BUILD_TESTS)
--- a/src/stardict_lib.cpp
+++ b/src/stardict_lib.cpp
@@ -5,6 +5,7 @@
 #include <algorithm>
 #include <cctype>
 #include <cstring>
 #include <map>
 #include <stdexcept>
 #include <glib/gstdio.h>
@@ -78,108 +79,93 @@ bool DictInfo::load_from_ifo_file(const std::string &ifofilename,
 {
    ifo_file_name = ifofilename;
    glib::CharStr buffer;
-    if (!g_file_get_contents(ifofilename.c_str(), get_addr(buffer), nullptr, nullptr))
+    gsize length = 0;
    if (!g_file_get_contents(ifofilename.c_str(), get_addr(buffer), &length, nullptr)) {
        fprintf(stderr, "Can not read from %s\n", ifofilename.c_str());
        return false;
    }
    static const char TREEDICT_MAGIC_DATA[] = "StarDict's treedict ifo file";
    static const char DICT_MAGIC_DATA[] = "StarDict's dict ifo file";
    const gchar *magic_data = istreedict ? TREEDICT_MAGIC_DATA : DICT_MAGIC_DATA;
-    static const unsigned char utf8_bom[] = { 0xEF, 0xBB, 0xBF, '\0' };
+    static const gchar utf8_bom[] = { (gchar)0xEF, (gchar)0xBB, (gchar)0xBF, '\0' };
-    if (!g_str_has_prefix(
+
-            g_str_has_prefix(get_impl(buffer), (const gchar *)(utf8_bom)) ? get_impl(buffer) + 3 : get_impl(buffer),
+    const gchar *p = get_impl(buffer);
-            magic_data)) {
+    const gchar *end = p + length;
    if (g_str_has_prefix(p, utf8_bom)) {
        p += strlen(utf8_bom);
    }
    if (!g_str_has_prefix(p, magic_data)) {
        fprintf(stderr, "No magic header(%s) in ifo file\n", magic_data);
        return false;
    }
    p += strlen(magic_data);
-    gchar *p1 = get_impl(buffer) + strlen(magic_data) - 1;
+    std::map<std::string, std::string> key_value_map;
    while (p != end) {
        auto key_it = std::find_if(p, end, [](gchar ch) { return !g_ascii_isspace(ch); });
        if (key_it == end) {
            break;
        }
        auto eq_it = std::find(key_it, end, gchar('='));
        if (eq_it == end) {
            fprintf(stderr, "Invalid part of ifo (no '=') here: %s\n", key_it);
            return false;
        }
        auto val_it = std::find_if(eq_it + 1, end, [](gchar ch) { return !g_ascii_isspace(ch); });
        if (val_it == end) {
            key_value_map.insert(std::make_pair(std::string(key_it, eq_it), std::string()));
            break;
        }
-    gchar *p2 = strstr(p1, "\nwordcount=");
+        auto line_end_it = std::find_if(val_it, end, [](gchar ch) { return ch == '\r' || ch == '\n'; });
-    if (p2 == nullptr)
+        key_value_map.insert(std::make_pair(std::string(key_it, eq_it), std::string(val_it, line_end_it)));
-        return false;
+        if (line_end_it == end)
            break;
        p = line_end_it + 1;
    }
-    gchar *p3 = strchr(p2 + sizeof("\nwordcount=") - 1, '\n');
+    std::map<std::string, std::string>::const_iterator it;
 #define FIND_KEY(_key_)                                            \
    it = key_value_map.find(_key_);                                \
    if (it == key_value_map.end()) {                               \
        fprintf(stderr, "Can not find '%s' in ifo file\n", _key_); \
        return false;                                              \
    }
-    wordcount = atol(std::string(p2 + sizeof("\nwordcount=") - 1, p3 - (p2 + sizeof("\nwordcount=") - 1)).c_str());
+    FIND_KEY("wordcount")
    wordcount = atol(it->second.c_str());
    if (istreedict) {
-        p2 = strstr(p1, "\ntdxfilesize=");
+        FIND_KEY("tdxfilesize")
-        if (p2 == nullptr)
+        index_file_size = atol(it->second.c_str());
            return false;
        p3 = strchr(p2 + sizeof("\ntdxfilesize=") - 1, '\n');
        index_file_size = atol(std::string(p2 + sizeof("\ntdxfilesize=") - 1, p3 - (p2 + sizeof("\ntdxfilesize=") - 1)).c_str());
    } else {
        FIND_KEY("idxfilesize")
        index_file_size = atol(it->second.c_str());
    }
    FIND_KEY("bookname")
    bookname = it->second;
-        p2 = strstr(p1, "\nidxfilesize=");
+#define SET_IF_EXISTS(_key_)         \
-        if (p2 == nullptr)
+    it = key_value_map.find(#_key_); \
-            return false;
+    if (it != key_value_map.end()) { \
-
+        _key_ = it->second;          \
        p3 = strchr(p2 + sizeof("\nidxfilesize=") - 1, '\n');
        index_file_size = atol(std::string(p2 + sizeof("\nidxfilesize=") - 1, p3 - (p2 + sizeof("\nidxfilesize=") - 1)).c_str());
    }
-    p2 = strstr(p1, "\nbookname=");
+    SET_IF_EXISTS(author)
-
+    SET_IF_EXISTS(email)
-    if (p2 == nullptr)
+    SET_IF_EXISTS(website)
-        return false;
+    SET_IF_EXISTS(date)
-
+    SET_IF_EXISTS(description)
-    p2 = p2 + sizeof("\nbookname=") - 1;
+    SET_IF_EXISTS(sametypesequence)
    p3 = strchr(p2, '\n');
    bookname.assign(p2, p3 - p2);
    p2 = strstr(p1, "\nauthor=");
    if (p2) {
        p2 = p2 + sizeof("\nauthor=") - 1;
        p3 = strchr(p2, '\n');
        author.assign(p2, p3 - p2);
    }
    p2 = strstr(p1, "\nemail=");
    if (p2) {
        p2 = p2 + sizeof("\nemail=") - 1;
        p3 = strchr(p2, '\n');
        email.assign(p2, p3 - p2);
    }
    p2 = strstr(p1, "\nwebsite=");
    if (p2) {
        p2 = p2 + sizeof("\nwebsite=") - 1;
        p3 = strchr(p2, '\n');
        website.assign(p2, p3 - p2);
    }
    p2 = strstr(p1, "\ndate=");
    if (p2) {
        p2 = p2 + sizeof("\ndate=") - 1;
        p3 = strchr(p2, '\n');
        date.assign(p2, p3 - p2);
    }
    p2 = strstr(p1, "\ndescription=");
    if (p2) {
        p2 = p2 + sizeof("\ndescription=") - 1;
        p3 = strchr(p2, '\n');
        description.assign(p2, p3 - p2);
    }
    p2 = strstr(p1, "\nsametypesequence=");
    if (p2) {
        p2 += sizeof("\nsametypesequence=") - 1;
        p3 = strchr(p2, '\n');
        sametypesequence.assign(p2, p3 - p2);
    }
    p2 = strstr(p1, "\nsynwordcount=");
    syn_wordcount = 0;
-    if (p2) {
+    it = key_value_map.find("synwordcount");
-        p2 += sizeof("\nsynwordcount=") - 1;
+    if (it != key_value_map.end())
-        p3 = strchr(p2, '\n');
+        syn_wordcount = atol(it->second.c_str());
-        syn_wordcount = atol(std::string(p2, p3 - p2).c_str());
+#undef FIND_KEY
-    }
+#undef SET_IF_EXISTS
    return true;
 }
--- a/tests/not-unix-newlines-ifo/russian/russian.dict
+++ b/tests/not-unix-newlines-ifo/russian/russian.dict
--- a/tests/not-unix-newlines-ifo/russian/russian.idx
+++ b/tests/not-unix-newlines-ifo/russian/russian.idx
--- a/tests/not-unix-newlines-ifo/russian/russian.ifo
+++ b/tests/not-unix-newlines-ifo/russian/russian.ifo
@@ -0,0 +1,9 @@
 StarDict's dict ifo file
 version=3.0.0
 bookname=Russian-English Dictionary (ru-en)
 wordcount=415144
 idxfilesize=12344255
 sametypesequence=h
 synwordcount=1277580
 author=Vuizur
 description=
--- a/tests/not-unix-newlines-ifo/russian/russian.syn
+++ b/tests/not-unix-newlines-ifo/russian/russian.syn
--- a/tests/t_newlines_in_ifo
+++ b/tests/t_newlines_in_ifo
@@ -0,0 +1,16 @@
 #!/bin/sh
 set -e
 PATH_TO_SDCV="$1"
 TEST_DIR="$2"
 unset SDCV_PAGER
 RES=$("$PATH_TO_SDCV" -n -x --data-dir="$TEST_DIR/not-unix-newlines-ifo" -l | tail -n 1)
 if [ "$RES" == "Russian-English Dictionary (ru-en)    415144" ]; then
  exit 0
 else
  echo "test failed, unexpected result: $RES" >&2
  exit 1
 fi