mirror of
https://github.com/Dushistov/sdcv.git
synced 2025-12-15 17:31:56 +00:00
@@ -148,5 +148,6 @@ if (BUILD_TESTS)
|
|||||||
add_sdcv_shell_test(t_datadir)
|
add_sdcv_shell_test(t_datadir)
|
||||||
add_sdcv_shell_test(t_return_code)
|
add_sdcv_shell_test(t_return_code)
|
||||||
add_sdcv_shell_test(t_multiple_results)
|
add_sdcv_shell_test(t_multiple_results)
|
||||||
|
add_sdcv_shell_test(t_newlines_in_ifo)
|
||||||
|
|
||||||
endif (BUILD_TESTS)
|
endif (BUILD_TESTS)
|
||||||
|
|||||||
@@ -5,6 +5,7 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <map>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
|
||||||
#include <glib/gstdio.h>
|
#include <glib/gstdio.h>
|
||||||
@@ -78,108 +79,93 @@ bool DictInfo::load_from_ifo_file(const std::string &ifofilename,
|
|||||||
{
|
{
|
||||||
ifo_file_name = ifofilename;
|
ifo_file_name = ifofilename;
|
||||||
glib::CharStr buffer;
|
glib::CharStr buffer;
|
||||||
if (!g_file_get_contents(ifofilename.c_str(), get_addr(buffer), nullptr, nullptr))
|
gsize length = 0;
|
||||||
|
if (!g_file_get_contents(ifofilename.c_str(), get_addr(buffer), &length, nullptr)) {
|
||||||
|
fprintf(stderr, "Can not read from %s\n", ifofilename.c_str());
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static const char TREEDICT_MAGIC_DATA[] = "StarDict's treedict ifo file";
|
static const char TREEDICT_MAGIC_DATA[] = "StarDict's treedict ifo file";
|
||||||
static const char DICT_MAGIC_DATA[] = "StarDict's dict ifo file";
|
static const char DICT_MAGIC_DATA[] = "StarDict's dict ifo file";
|
||||||
|
|
||||||
const gchar *magic_data = istreedict ? TREEDICT_MAGIC_DATA : DICT_MAGIC_DATA;
|
const gchar *magic_data = istreedict ? TREEDICT_MAGIC_DATA : DICT_MAGIC_DATA;
|
||||||
static const unsigned char utf8_bom[] = { 0xEF, 0xBB, 0xBF, '\0' };
|
static const gchar utf8_bom[] = { (gchar)0xEF, (gchar)0xBB, (gchar)0xBF, '\0' };
|
||||||
if (!g_str_has_prefix(
|
|
||||||
g_str_has_prefix(get_impl(buffer), (const gchar *)(utf8_bom)) ? get_impl(buffer) + 3 : get_impl(buffer),
|
const gchar *p = get_impl(buffer);
|
||||||
magic_data)) {
|
const gchar *end = p + length;
|
||||||
|
|
||||||
|
if (g_str_has_prefix(p, utf8_bom)) {
|
||||||
|
p += strlen(utf8_bom);
|
||||||
|
}
|
||||||
|
if (!g_str_has_prefix(p, magic_data)) {
|
||||||
|
fprintf(stderr, "No magic header(%s) in ifo file\n", magic_data);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
p += strlen(magic_data);
|
||||||
|
|
||||||
gchar *p1 = get_impl(buffer) + strlen(magic_data) - 1;
|
std::map<std::string, std::string> key_value_map;
|
||||||
|
while (p != end) {
|
||||||
|
auto key_it = std::find_if(p, end, [](gchar ch) { return !g_ascii_isspace(ch); });
|
||||||
|
if (key_it == end) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
auto eq_it = std::find(key_it, end, gchar('='));
|
||||||
|
if (eq_it == end) {
|
||||||
|
fprintf(stderr, "Invalid part of ifo (no '=') here: %s\n", key_it);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
auto val_it = std::find_if(eq_it + 1, end, [](gchar ch) { return !g_ascii_isspace(ch); });
|
||||||
|
if (val_it == end) {
|
||||||
|
key_value_map.insert(std::make_pair(std::string(key_it, eq_it), std::string()));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
gchar *p2 = strstr(p1, "\nwordcount=");
|
auto line_end_it = std::find_if(val_it, end, [](gchar ch) { return ch == '\r' || ch == '\n'; });
|
||||||
if (p2 == nullptr)
|
key_value_map.insert(std::make_pair(std::string(key_it, eq_it), std::string(val_it, line_end_it)));
|
||||||
return false;
|
if (line_end_it == end)
|
||||||
|
break;
|
||||||
|
p = line_end_it + 1;
|
||||||
|
}
|
||||||
|
|
||||||
gchar *p3 = strchr(p2 + sizeof("\nwordcount=") - 1, '\n');
|
std::map<std::string, std::string>::const_iterator it;
|
||||||
|
#define FIND_KEY(_key_) \
|
||||||
|
it = key_value_map.find(_key_); \
|
||||||
|
if (it == key_value_map.end()) { \
|
||||||
|
fprintf(stderr, "Can not find '%s' in ifo file\n", _key_); \
|
||||||
|
return false; \
|
||||||
|
}
|
||||||
|
|
||||||
wordcount = atol(std::string(p2 + sizeof("\nwordcount=") - 1, p3 - (p2 + sizeof("\nwordcount=") - 1)).c_str());
|
FIND_KEY("wordcount")
|
||||||
|
wordcount = atol(it->second.c_str());
|
||||||
|
|
||||||
if (istreedict) {
|
if (istreedict) {
|
||||||
p2 = strstr(p1, "\ntdxfilesize=");
|
FIND_KEY("tdxfilesize")
|
||||||
if (p2 == nullptr)
|
index_file_size = atol(it->second.c_str());
|
||||||
return false;
|
|
||||||
|
|
||||||
p3 = strchr(p2 + sizeof("\ntdxfilesize=") - 1, '\n');
|
|
||||||
|
|
||||||
index_file_size = atol(std::string(p2 + sizeof("\ntdxfilesize=") - 1, p3 - (p2 + sizeof("\ntdxfilesize=") - 1)).c_str());
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
FIND_KEY("idxfilesize")
|
||||||
|
index_file_size = atol(it->second.c_str());
|
||||||
|
}
|
||||||
|
FIND_KEY("bookname")
|
||||||
|
bookname = it->second;
|
||||||
|
|
||||||
p2 = strstr(p1, "\nidxfilesize=");
|
#define SET_IF_EXISTS(_key_) \
|
||||||
if (p2 == nullptr)
|
it = key_value_map.find(#_key_); \
|
||||||
return false;
|
if (it != key_value_map.end()) { \
|
||||||
|
_key_ = it->second; \
|
||||||
p3 = strchr(p2 + sizeof("\nidxfilesize=") - 1, '\n');
|
|
||||||
index_file_size = atol(std::string(p2 + sizeof("\nidxfilesize=") - 1, p3 - (p2 + sizeof("\nidxfilesize=") - 1)).c_str());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
p2 = strstr(p1, "\nbookname=");
|
SET_IF_EXISTS(author)
|
||||||
|
SET_IF_EXISTS(email)
|
||||||
if (p2 == nullptr)
|
SET_IF_EXISTS(website)
|
||||||
return false;
|
SET_IF_EXISTS(date)
|
||||||
|
SET_IF_EXISTS(description)
|
||||||
p2 = p2 + sizeof("\nbookname=") - 1;
|
SET_IF_EXISTS(sametypesequence)
|
||||||
p3 = strchr(p2, '\n');
|
|
||||||
bookname.assign(p2, p3 - p2);
|
|
||||||
|
|
||||||
p2 = strstr(p1, "\nauthor=");
|
|
||||||
if (p2) {
|
|
||||||
p2 = p2 + sizeof("\nauthor=") - 1;
|
|
||||||
p3 = strchr(p2, '\n');
|
|
||||||
author.assign(p2, p3 - p2);
|
|
||||||
}
|
|
||||||
|
|
||||||
p2 = strstr(p1, "\nemail=");
|
|
||||||
if (p2) {
|
|
||||||
p2 = p2 + sizeof("\nemail=") - 1;
|
|
||||||
p3 = strchr(p2, '\n');
|
|
||||||
email.assign(p2, p3 - p2);
|
|
||||||
}
|
|
||||||
|
|
||||||
p2 = strstr(p1, "\nwebsite=");
|
|
||||||
if (p2) {
|
|
||||||
p2 = p2 + sizeof("\nwebsite=") - 1;
|
|
||||||
p3 = strchr(p2, '\n');
|
|
||||||
website.assign(p2, p3 - p2);
|
|
||||||
}
|
|
||||||
|
|
||||||
p2 = strstr(p1, "\ndate=");
|
|
||||||
if (p2) {
|
|
||||||
p2 = p2 + sizeof("\ndate=") - 1;
|
|
||||||
p3 = strchr(p2, '\n');
|
|
||||||
date.assign(p2, p3 - p2);
|
|
||||||
}
|
|
||||||
|
|
||||||
p2 = strstr(p1, "\ndescription=");
|
|
||||||
if (p2) {
|
|
||||||
p2 = p2 + sizeof("\ndescription=") - 1;
|
|
||||||
p3 = strchr(p2, '\n');
|
|
||||||
description.assign(p2, p3 - p2);
|
|
||||||
}
|
|
||||||
|
|
||||||
p2 = strstr(p1, "\nsametypesequence=");
|
|
||||||
if (p2) {
|
|
||||||
p2 += sizeof("\nsametypesequence=") - 1;
|
|
||||||
p3 = strchr(p2, '\n');
|
|
||||||
sametypesequence.assign(p2, p3 - p2);
|
|
||||||
}
|
|
||||||
|
|
||||||
p2 = strstr(p1, "\nsynwordcount=");
|
|
||||||
syn_wordcount = 0;
|
syn_wordcount = 0;
|
||||||
if (p2) {
|
it = key_value_map.find("synwordcount");
|
||||||
p2 += sizeof("\nsynwordcount=") - 1;
|
if (it != key_value_map.end())
|
||||||
p3 = strchr(p2, '\n');
|
syn_wordcount = atol(it->second.c_str());
|
||||||
syn_wordcount = atol(std::string(p2, p3 - p2).c_str());
|
#undef FIND_KEY
|
||||||
}
|
#undef SET_IF_EXISTS
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
0
tests/not-unix-newlines-ifo/russian/russian.dict
Normal file
0
tests/not-unix-newlines-ifo/russian/russian.dict
Normal file
0
tests/not-unix-newlines-ifo/russian/russian.idx
Normal file
0
tests/not-unix-newlines-ifo/russian/russian.idx
Normal file
9
tests/not-unix-newlines-ifo/russian/russian.ifo
Normal file
9
tests/not-unix-newlines-ifo/russian/russian.ifo
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
StarDict's dict ifo file
|
||||||
|
version=3.0.0
|
||||||
|
bookname=Russian-English Dictionary (ru-en)
|
||||||
|
wordcount=415144
|
||||||
|
idxfilesize=12344255
|
||||||
|
sametypesequence=h
|
||||||
|
synwordcount=1277580
|
||||||
|
author=Vuizur
|
||||||
|
description=
|
||||||
0
tests/not-unix-newlines-ifo/russian/russian.syn
Normal file
0
tests/not-unix-newlines-ifo/russian/russian.syn
Normal file
16
tests/t_newlines_in_ifo
Executable file
16
tests/t_newlines_in_ifo
Executable file
@@ -0,0 +1,16 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
PATH_TO_SDCV="$1"
|
||||||
|
TEST_DIR="$2"
|
||||||
|
|
||||||
|
unset SDCV_PAGER
|
||||||
|
|
||||||
|
RES=$("$PATH_TO_SDCV" -n -x --data-dir="$TEST_DIR/not-unix-newlines-ifo" -l | tail -n 1)
|
||||||
|
if [ "$RES" == "Russian-English Dictionary (ru-en) 415144" ]; then
|
||||||
|
exit 0
|
||||||
|
else
|
||||||
|
echo "test failed, unexpected result: $RES" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
Reference in New Issue
Block a user