refactoring: apply clang-format rules

This commit is contained in:
Evgeniy A. Dushistov
2017-08-09 07:46:27 +03:00
parent d0c0a0837f
commit 8f16ceae59
14 changed files with 2587 additions and 2537 deletions

View File

@@ -26,20 +26,19 @@
//#define HAVE_MMAP //it will defined in config.h. this can be done by configure.in with a AC_FUNC_MMAP. //#define HAVE_MMAP //it will defined in config.h. this can be done by configure.in with a AC_FUNC_MMAP.
#ifdef HAVE_CONFIG_H #ifdef HAVE_CONFIG_H
# include "config.h" #include "config.h"
#endif #endif
#include <cassert> #include <cassert>
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
#include <unistd.h>
#include <limits.h>
#include <fcntl.h> #include <fcntl.h>
#include <limits.h>
#include <unistd.h>
#include <sys/stat.h> #include <sys/stat.h>
#include "dictziplib.hpp" #include "dictziplib.hpp"
#define USE_CACHE 1 #define USE_CACHE 1
@@ -57,426 +56,424 @@
/* For gzip-compatible header, as defined in RFC 1952 */ /* For gzip-compatible header, as defined in RFC 1952 */
/* Magic for GZIP (rfc1952) */ /* Magic for GZIP (rfc1952) */
#define GZ_MAGIC1 0x1f /* First magic byte */ #define GZ_MAGIC1 0x1f /* First magic byte */
#define GZ_MAGIC2 0x8b /* Second magic byte */ #define GZ_MAGIC2 0x8b /* Second magic byte */
/* FLaGs (bitmapped), from rfc1952 */ /* FLaGs (bitmapped), from rfc1952 */
#define GZ_FTEXT 0x01 /* Set for ASCII text */ #define GZ_FTEXT 0x01 /* Set for ASCII text */
#define GZ_FHCRC 0x02 /* Header CRC16 */ #define GZ_FHCRC 0x02 /* Header CRC16 */
#define GZ_FEXTRA 0x04 /* Optional field (random access index) */ #define GZ_FEXTRA 0x04 /* Optional field (random access index) */
#define GZ_FNAME 0x08 /* Original name */ #define GZ_FNAME 0x08 /* Original name */
#define GZ_COMMENT 0x10 /* Zero-terminated, human-readable comment */ #define GZ_COMMENT 0x10 /* Zero-terminated, human-readable comment */
#define GZ_MAX 2 /* Maximum compression */ #define GZ_MAX 2 /* Maximum compression */
#define GZ_FAST 4 /* Fasted compression */ #define GZ_FAST 4 /* Fasted compression */
/* These are from rfc1952 */ /* These are from rfc1952 */
#define GZ_OS_FAT 0 /* FAT filesystem (MS-DOS, OS/2, NT/Win32) */ #define GZ_OS_FAT 0 /* FAT filesystem (MS-DOS, OS/2, NT/Win32) */
#define GZ_OS_AMIGA 1 /* Amiga */ #define GZ_OS_AMIGA 1 /* Amiga */
#define GZ_OS_VMS 2 /* VMS (or OpenVMS) */ #define GZ_OS_VMS 2 /* VMS (or OpenVMS) */
#define GZ_OS_UNIX 3 /* Unix */ #define GZ_OS_UNIX 3 /* Unix */
#define GZ_OS_VMCMS 4 /* VM/CMS */ #define GZ_OS_VMCMS 4 /* VM/CMS */
#define GZ_OS_ATARI 5 /* Atari TOS */ #define GZ_OS_ATARI 5 /* Atari TOS */
#define GZ_OS_HPFS 6 /* HPFS filesystem (OS/2, NT) */ #define GZ_OS_HPFS 6 /* HPFS filesystem (OS/2, NT) */
#define GZ_OS_MAC 7 /* Macintosh */ #define GZ_OS_MAC 7 /* Macintosh */
#define GZ_OS_Z 8 /* Z-System */ #define GZ_OS_Z 8 /* Z-System */
#define GZ_OS_CPM 9 /* CP/M */ #define GZ_OS_CPM 9 /* CP/M */
#define GZ_OS_TOPS20 10 /* TOPS-20 */ #define GZ_OS_TOPS20 10 /* TOPS-20 */
#define GZ_OS_NTFS 11 /* NTFS filesystem (NT) */ #define GZ_OS_NTFS 11 /* NTFS filesystem (NT) */
#define GZ_OS_QDOS 12 /* QDOS */ #define GZ_OS_QDOS 12 /* QDOS */
#define GZ_OS_ACORN 13 /* Acorn RISCOS */ #define GZ_OS_ACORN 13 /* Acorn RISCOS */
#define GZ_OS_UNKNOWN 255 /* unknown */ #define GZ_OS_UNKNOWN 255 /* unknown */
#define GZ_RND_S1 'R' /* First magic for random access format */ #define GZ_RND_S1 'R' /* First magic for random access format */
#define GZ_RND_S2 'A' /* Second magic for random access format */ #define GZ_RND_S2 'A' /* Second magic for random access format */
#define GZ_ID1 0 /* GZ_MAGIC1 */ #define GZ_ID1 0 /* GZ_MAGIC1 */
#define GZ_ID2 1 /* GZ_MAGIC2 */ #define GZ_ID2 1 /* GZ_MAGIC2 */
#define GZ_CM 2 /* Compression Method (Z_DEFALTED) */ #define GZ_CM 2 /* Compression Method (Z_DEFALTED) */
#define GZ_FLG 3 /* FLaGs (see above) */ #define GZ_FLG 3 /* FLaGs (see above) */
#define GZ_MTIME 4 /* Modification TIME */ #define GZ_MTIME 4 /* Modification TIME */
#define GZ_XFL 8 /* eXtra FLags (GZ_MAX or GZ_FAST) */ #define GZ_XFL 8 /* eXtra FLags (GZ_MAX or GZ_FAST) */
#define GZ_OS 9 /* Operating System */ #define GZ_OS 9 /* Operating System */
#define GZ_XLEN 10 /* eXtra LENgth (16bit) */ #define GZ_XLEN 10 /* eXtra LENgth (16bit) */
#define GZ_FEXTRA_START 12 /* Start of extra fields */ #define GZ_FEXTRA_START 12 /* Start of extra fields */
#define GZ_SI1 12 /* Subfield ID1 */ #define GZ_SI1 12 /* Subfield ID1 */
#define GZ_SI2 13 /* Subfield ID2 */ #define GZ_SI2 13 /* Subfield ID2 */
#define GZ_SUBLEN 14 /* Subfield length (16bit) */ #define GZ_SUBLEN 14 /* Subfield length (16bit) */
#define GZ_VERSION 16 /* Version for subfield format */ #define GZ_VERSION 16 /* Version for subfield format */
#define GZ_CHUNKLEN 18 /* Chunk length (16bit) */ #define GZ_CHUNKLEN 18 /* Chunk length (16bit) */
#define GZ_CHUNKCNT 20 /* Number of chunks (16bit) */ #define GZ_CHUNKCNT 20 /* Number of chunks (16bit) */
#define GZ_RNDDATA 22 /* Random access data (16bit) */ #define GZ_RNDDATA 22 /* Random access data (16bit) */
#define DICT_UNKNOWN 0
#define DICT_TEXT 1
#define DICT_GZIP 2
#define DICT_DZIP 3
#define DICT_UNKNOWN 0
#define DICT_TEXT 1
#define DICT_GZIP 2
#define DICT_DZIP 3
int DictData::read_header(const std::string &fname, int computeCRC) int DictData::read_header(const std::string &fname, int computeCRC)
{ {
FILE *str; FILE *str;
int id1, id2, si1, si2; int id1, id2, si1, si2;
char buffer[BUFFERSIZE]; char buffer[BUFFERSIZE];
int extraLength, subLength; int extraLength, subLength;
int i; int i;
char *pt; char *pt;
int c; int c;
struct stat sb; struct stat sb;
unsigned long crc = crc32( 0L, Z_NULL, 0 ); unsigned long crc = crc32(0L, Z_NULL, 0);
int count; int count;
unsigned long offset; unsigned long offset;
if (!(str = fopen(fname.c_str(), "rb"))) { if (!(str = fopen(fname.c_str(), "rb"))) {
//err_fatal_errno( __FUNCTION__, //err_fatal_errno( __FUNCTION__,
// "Cannot open data file \"%s\" for read\n", filename ); // "Cannot open data file \"%s\" for read\n", filename );
return -1; return -1;
} }
this->headerLength = GZ_XLEN - 1; this->headerLength = GZ_XLEN - 1;
this->type = DICT_UNKNOWN; this->type = DICT_UNKNOWN;
id1 = getc( str ); id1 = getc(str);
id2 = getc( str ); id2 = getc(str);
if (id1 != GZ_MAGIC1 || id2 != GZ_MAGIC2) { if (id1 != GZ_MAGIC1 || id2 != GZ_MAGIC2) {
this->type = DICT_TEXT; this->type = DICT_TEXT;
fstat( fileno( str ), &sb ); fstat(fileno(str), &sb);
this->compressedLength = this->length = sb.st_size; this->compressedLength = this->length = sb.st_size;
this->origFilename = fname; this->origFilename = fname;
this->mtime = sb.st_mtime; this->mtime = sb.st_mtime;
if (computeCRC) { if (computeCRC) {
rewind( str ); rewind(str);
while (!feof( str )) { while (!feof(str)) {
if ((count = fread( buffer, 1, BUFFERSIZE, str ))) { if ((count = fread(buffer, 1, BUFFERSIZE, str))) {
crc = crc32(crc, (Bytef *)buffer, count); crc = crc32(crc, (Bytef *)buffer, count);
} }
} }
} }
this->crc = crc; this->crc = crc;
fclose( str ); fclose(str);
return 0; return 0;
} }
this->type = DICT_GZIP; this->type = DICT_GZIP;
this->method = getc( str ); this->method = getc(str);
this->flags = getc( str ); this->flags = getc(str);
this->mtime = getc( str ) << 0; this->mtime = getc(str) << 0;
this->mtime |= getc( str ) << 8; this->mtime |= getc(str) << 8;
this->mtime |= getc( str ) << 16; this->mtime |= getc(str) << 16;
this->mtime |= getc( str ) << 24; this->mtime |= getc(str) << 24;
this->extraFlags = getc( str ); this->extraFlags = getc(str);
this->os = getc( str ); this->os = getc(str);
if (this->flags & GZ_FEXTRA) { if (this->flags & GZ_FEXTRA) {
extraLength = getc( str ) << 0; extraLength = getc(str) << 0;
extraLength |= getc( str ) << 8; extraLength |= getc(str) << 8;
this->headerLength += extraLength + 2; this->headerLength += extraLength + 2;
si1 = getc( str ); si1 = getc(str);
si2 = getc( str ); si2 = getc(str);
if (si1 == GZ_RND_S1 || si2 == GZ_RND_S2) { if (si1 == GZ_RND_S1 || si2 == GZ_RND_S2) {
subLength = getc( str ) << 0; subLength = getc(str) << 0;
subLength |= getc( str ) << 8; subLength |= getc(str) << 8;
this->version = getc( str ) << 0; this->version = getc(str) << 0;
this->version |= getc( str ) << 8; this->version |= getc(str) << 8;
if (this->version != 1) { if (this->version != 1) {
//err_internal( __FUNCTION__, //err_internal( __FUNCTION__,
// "dzip header version %d not supported\n", // "dzip header version %d not supported\n",
// this->version ); // this->version );
} }
this->chunkLength = getc( str ) << 0; this->chunkLength = getc(str) << 0;
this->chunkLength |= getc( str ) << 8; this->chunkLength |= getc(str) << 8;
this->chunkCount = getc( str ) << 0; this->chunkCount = getc(str) << 0;
this->chunkCount |= getc( str ) << 8; this->chunkCount |= getc(str) << 8;
if (this->chunkCount <= 0) { if (this->chunkCount <= 0) {
fclose( str ); fclose(str);
return 5; return 5;
} }
this->chunks = (int *)malloc(sizeof( this->chunks[0] ) this->chunks = (int *)malloc(sizeof(this->chunks[0])
* this->chunkCount ); * this->chunkCount);
for (i = 0; i < this->chunkCount; i++) { for (i = 0; i < this->chunkCount; i++) {
this->chunks[i] = getc( str ) << 0; this->chunks[i] = getc(str) << 0;
this->chunks[i] |= getc( str ) << 8; this->chunks[i] |= getc(str) << 8;
} }
this->type = DICT_DZIP; this->type = DICT_DZIP;
} else { } else {
fseek( str, this->headerLength, SEEK_SET ); fseek(str, this->headerLength, SEEK_SET);
} }
} }
if (this->flags & GZ_FNAME) { /* FIXME! Add checking against header len */ if (this->flags & GZ_FNAME) { /* FIXME! Add checking against header len */
pt = buffer; pt = buffer;
while ((c = getc( str )) && c != EOF) while ((c = getc(str)) && c != EOF)
*pt++ = c; *pt++ = c;
*pt = '\0'; *pt = '\0';
this->origFilename = buffer; this->origFilename = buffer;
this->headerLength += this->origFilename.length() + 1; this->headerLength += this->origFilename.length() + 1;
} else { } else {
this->origFilename = ""; this->origFilename = "";
} }
if (this->flags & GZ_COMMENT) { /* FIXME! Add checking for header len */ if (this->flags & GZ_COMMENT) { /* FIXME! Add checking for header len */
pt = buffer; pt = buffer;
while ((c = getc( str )) && c != EOF) while ((c = getc(str)) && c != EOF)
*pt++ = c; *pt++ = c;
*pt = '\0'; *pt = '\0';
comment = buffer; comment = buffer;
headerLength += comment.length()+1; headerLength += comment.length() + 1;
} else { } else {
comment = ""; comment = "";
} }
if (this->flags & GZ_FHCRC) { if (this->flags & GZ_FHCRC) {
getc( str ); getc(str);
getc( str ); getc(str);
this->headerLength += 2; this->headerLength += 2;
} }
if (ftell( str ) != this->headerLength + 1) { if (ftell(str) != this->headerLength + 1) {
//err_internal( __FUNCTION__, //err_internal( __FUNCTION__,
// "File position (%lu) != header length + 1 (%d)\n", // "File position (%lu) != header length + 1 (%d)\n",
// ftell( str ), this->headerLength + 1 ); // ftell( str ), this->headerLength + 1 );
} }
fseek( str, -8, SEEK_END ); fseek(str, -8, SEEK_END);
this->crc = getc( str ) << 0; this->crc = getc(str) << 0;
this->crc |= getc( str ) << 8; this->crc |= getc(str) << 8;
this->crc |= getc( str ) << 16; this->crc |= getc(str) << 16;
this->crc |= getc( str ) << 24; this->crc |= getc(str) << 24;
this->length = getc( str ) << 0; this->length = getc(str) << 0;
this->length |= getc( str ) << 8; this->length |= getc(str) << 8;
this->length |= getc( str ) << 16; this->length |= getc(str) << 16;
this->length |= getc( str ) << 24; this->length |= getc(str) << 24;
this->compressedLength = ftell( str ); this->compressedLength = ftell(str);
/* Compute offsets */ /* Compute offsets */
this->offsets = (unsigned long *)malloc( sizeof( this->offsets[0] ) this->offsets = (unsigned long *)malloc(sizeof(this->offsets[0])
* this->chunkCount ); * this->chunkCount);
for (offset = this->headerLength + 1, i = 0; for (offset = this->headerLength + 1, i = 0;
i < this->chunkCount; i < this->chunkCount;
i++) { i++) {
this->offsets[i] = offset; this->offsets[i] = offset;
offset += this->chunks[i]; offset += this->chunks[i];
} }
fclose( str ); fclose(str);
return 0; return 0;
} }
bool DictData::open(const std::string& fname, int computeCRC) bool DictData::open(const std::string &fname, int computeCRC)
{ {
struct stat sb; struct stat sb;
int fd; int fd;
this->initialized = 0; this->initialized = 0;
if (stat(fname.c_str(), &sb) || !S_ISREG(sb.st_mode)) { if (stat(fname.c_str(), &sb) || !S_ISREG(sb.st_mode)) {
//err_warning( __FUNCTION__, //err_warning( __FUNCTION__,
// "%s is not a regular file -- ignoring\n", fname ); // "%s is not a regular file -- ignoring\n", fname );
return false; return false;
} }
if (read_header(fname, computeCRC)) { if (read_header(fname, computeCRC)) {
//err_fatal( __FUNCTION__, //err_fatal( __FUNCTION__,
// "\"%s\" not in text or dzip format\n", fname ); // "\"%s\" not in text or dzip format\n", fname );
return false; return false;
} }
if ((fd = ::open(fname.c_str(), O_RDONLY )) < 0) { if ((fd = ::open(fname.c_str(), O_RDONLY)) < 0) {
//err_fatal_errno( __FUNCTION__, //err_fatal_errno( __FUNCTION__,
// "Cannot open data file \"%s\"\n", fname ); // "Cannot open data file \"%s\"\n", fname );
return false; return false;
} }
if (fstat(fd, &sb)) { if (fstat(fd, &sb)) {
//err_fatal_errno( __FUNCTION__, //err_fatal_errno( __FUNCTION__,
// "Cannot stat data file \"%s\"\n", fname ); // "Cannot stat data file \"%s\"\n", fname );
return false; return false;
} }
this->size = sb.st_size; this->size = sb.st_size;
::close(fd); ::close(fd);
if (!mapfile.open(fname.c_str(), size)) if (!mapfile.open(fname.c_str(), size))
return false; return false;
this->start=mapfile.begin(); this->start = mapfile.begin();
this->end = this->start + this->size; this->end = this->start + this->size;
for (size_t j = 0; j < DICT_CACHE_SIZE; j++) { for (size_t j = 0; j < DICT_CACHE_SIZE; j++) {
cache[j].chunk = -1; cache[j].chunk = -1;
cache[j].stamp = -1; cache[j].stamp = -1;
cache[j].inBuffer = nullptr; cache[j].inBuffer = nullptr;
cache[j].count = 0; cache[j].count = 0;
} }
return true; return true;
} }
void DictData::close() void DictData::close()
{ {
if (this->chunks) if (this->chunks)
free(this->chunks); free(this->chunks);
if (this->offsets) if (this->offsets)
free(this->offsets); free(this->offsets);
if (this->initialized) { if (this->initialized) {
if (inflateEnd( &this->zStream )) { if (inflateEnd(&this->zStream)) {
//err_internal( __FUNCTION__, //err_internal( __FUNCTION__,
// "Cannot shut down inflation engine: %s\n", // "Cannot shut down inflation engine: %s\n",
// this->zStream.msg ); // this->zStream.msg );
} }
} }
for (size_t i = 0; i < DICT_CACHE_SIZE; ++i){ for (size_t i = 0; i < DICT_CACHE_SIZE; ++i) {
if (this -> cache [i].inBuffer) if (this->cache[i].inBuffer)
free (this -> cache [i].inBuffer); free(this->cache[i].inBuffer);
} }
} }
void DictData::read(char *buffer, unsigned long start, unsigned long size) void DictData::read(char *buffer, unsigned long start, unsigned long size)
{ {
char *pt; char *pt;
unsigned long end; unsigned long end;
int count; int count;
char *inBuffer; char *inBuffer;
char outBuffer[OUT_BUFFER_SIZE]; char outBuffer[OUT_BUFFER_SIZE];
int firstChunk, lastChunk; int firstChunk, lastChunk;
int firstOffset, lastOffset; int firstOffset, lastOffset;
int i; int i;
int found, target, lastStamp; int found, target, lastStamp;
static int stamp = 0; static int stamp = 0;
end = start + size; end = start + size;
//buffer = malloc( size + 1 ); //buffer = malloc( size + 1 );
//PRINTF(DBG_UNZIP, //PRINTF(DBG_UNZIP,
// ("dict_data_read( %p, %lu, %lu )\n", // ("dict_data_read( %p, %lu, %lu )\n",
//h, start, size )); //h, start, size ));
switch (this->type) {
case DICT_GZIP:
//err_fatal( __FUNCTION__,
// "Cannot seek on pure gzip format files.\n"
// "Use plain text (for performance)"
// " or dzip format (for space savings).\n" );
break;
case DICT_TEXT:
memcpy(buffer, this->start + start, size);
//buffer[size] = '\0';
break;
case DICT_DZIP:
if (!this->initialized) {
++this->initialized;
this->zStream.zalloc = nullptr;
this->zStream.zfree = nullptr;
this->zStream.opaque = nullptr;
this->zStream.next_in = 0;
this->zStream.avail_in = 0;
this->zStream.next_out = nullptr;
this->zStream.avail_out = 0;
if (inflateInit2(&this->zStream, -15) != Z_OK) {
//err_internal( __FUNCTION__,
// "Cannot initialize inflation engine: %s\n",
//this->zStream.msg );
}
}
firstChunk = start / this->chunkLength;
firstOffset = start - firstChunk * this->chunkLength;
lastChunk = end / this->chunkLength;
lastOffset = end - lastChunk * this->chunkLength;
//PRINTF(DBG_UNZIP,
// (" start = %lu, end = %lu\n"
//"firstChunk = %d, firstOffset = %d,"
//" lastChunk = %d, lastOffset = %d\n",
//start, end, firstChunk, firstOffset, lastChunk, lastOffset ));
for (pt = buffer, i = firstChunk; i <= lastChunk; i++) {
switch (this->type) { /* Access cache */
case DICT_GZIP: found = 0;
//err_fatal( __FUNCTION__, target = 0;
// "Cannot seek on pure gzip format files.\n" lastStamp = INT_MAX;
// "Use plain text (for performance)" for (size_t j = 0; j < DICT_CACHE_SIZE; j++) {
// " or dzip format (for space savings).\n" );
break;
case DICT_TEXT:
memcpy( buffer, this->start + start, size );
//buffer[size] = '\0';
break;
case DICT_DZIP:
if (!this->initialized) {
++this->initialized;
this->zStream.zalloc = nullptr;
this->zStream.zfree = nullptr;
this->zStream.opaque = nullptr;
this->zStream.next_in = 0;
this->zStream.avail_in = 0;
this->zStream.next_out = nullptr;
this->zStream.avail_out = 0;
if (inflateInit2( &this->zStream, -15 ) != Z_OK) {
//err_internal( __FUNCTION__,
// "Cannot initialize inflation engine: %s\n",
//this->zStream.msg );
}
}
firstChunk = start / this->chunkLength;
firstOffset = start - firstChunk * this->chunkLength;
lastChunk = end / this->chunkLength;
lastOffset = end - lastChunk * this->chunkLength;
//PRINTF(DBG_UNZIP,
// (" start = %lu, end = %lu\n"
//"firstChunk = %d, firstOffset = %d,"
//" lastChunk = %d, lastOffset = %d\n",
//start, end, firstChunk, firstOffset, lastChunk, lastOffset ));
for (pt = buffer, i = firstChunk; i <= lastChunk; i++) {
/* Access cache */
found = 0;
target = 0;
lastStamp = INT_MAX;
for (size_t j = 0; j < DICT_CACHE_SIZE; j++) {
#if USE_CACHE #if USE_CACHE
if (this->cache[j].chunk == i) { if (this->cache[j].chunk == i) {
found = 1; found = 1;
target = j; target = j;
break; break;
} }
#endif #endif
if (this->cache[j].stamp < lastStamp) { if (this->cache[j].stamp < lastStamp) {
lastStamp = this->cache[j].stamp; lastStamp = this->cache[j].stamp;
target = j; target = j;
} }
} }
this->cache[target].stamp = ++stamp; this->cache[target].stamp = ++stamp;
if (found) { if (found) {
count = this->cache[target].count; count = this->cache[target].count;
inBuffer = this->cache[target].inBuffer; inBuffer = this->cache[target].inBuffer;
} else { } else {
this->cache[target].chunk = i; this->cache[target].chunk = i;
if (!this->cache[target].inBuffer) if (!this->cache[target].inBuffer)
this->cache[target].inBuffer = (char *)malloc( IN_BUFFER_SIZE ); this->cache[target].inBuffer = (char *)malloc(IN_BUFFER_SIZE);
inBuffer = this->cache[target].inBuffer; inBuffer = this->cache[target].inBuffer;
if (this->chunks[i] >= OUT_BUFFER_SIZE ) { if (this->chunks[i] >= OUT_BUFFER_SIZE) {
//err_internal( __FUNCTION__, //err_internal( __FUNCTION__,
// "this->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n", // "this->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n",
// i, this->chunks[i], OUT_BUFFER_SIZE ); // i, this->chunks[i], OUT_BUFFER_SIZE );
} }
memcpy( outBuffer, this->start + this->offsets[i], this->chunks[i] ); memcpy(outBuffer, this->start + this->offsets[i], this->chunks[i]);
this->zStream.next_in = (Bytef *)outBuffer; this->zStream.next_in = (Bytef *)outBuffer;
this->zStream.avail_in = this->chunks[i]; this->zStream.avail_in = this->chunks[i];
this->zStream.next_out = (Bytef *)inBuffer; this->zStream.next_out = (Bytef *)inBuffer;
this->zStream.avail_out = IN_BUFFER_SIZE; this->zStream.avail_out = IN_BUFFER_SIZE;
if (inflate( &this->zStream, Z_PARTIAL_FLUSH ) != Z_OK) { if (inflate(&this->zStream, Z_PARTIAL_FLUSH) != Z_OK) {
//err_fatal( __FUNCTION__, "inflate: %s\n", this->zStream.msg ); //err_fatal( __FUNCTION__, "inflate: %s\n", this->zStream.msg );
} }
if (this->zStream.avail_in) { if (this->zStream.avail_in) {
//err_internal( __FUNCTION__, //err_internal( __FUNCTION__,
// "inflate did not flush (%d pending, %d avail)\n", // "inflate did not flush (%d pending, %d avail)\n",
// this->zStream.avail_in, this->zStream.avail_out ); // this->zStream.avail_in, this->zStream.avail_out );
} }
count = IN_BUFFER_SIZE - this->zStream.avail_out; count = IN_BUFFER_SIZE - this->zStream.avail_out;
this->cache[target].count = count; this->cache[target].count = count;
} }
if (i == firstChunk) { if (i == firstChunk) {
if (i == lastChunk) { if (i == lastChunk) {
memcpy( pt, inBuffer + firstOffset, lastOffset-firstOffset); memcpy(pt, inBuffer + firstOffset, lastOffset - firstOffset);
pt += lastOffset - firstOffset; pt += lastOffset - firstOffset;
} else { } else {
if (count != this->chunkLength ) { if (count != this->chunkLength) {
//err_internal( __FUNCTION__, //err_internal( __FUNCTION__,
// "Length = %d instead of %d\n", // "Length = %d instead of %d\n",
//count, this->chunkLength ); //count, this->chunkLength );
} }
memcpy( pt, inBuffer + firstOffset, memcpy(pt, inBuffer + firstOffset,
this->chunkLength - firstOffset ); this->chunkLength - firstOffset);
pt += this->chunkLength - firstOffset; pt += this->chunkLength - firstOffset;
} }
} else if (i == lastChunk) { } else if (i == lastChunk) {
memcpy( pt, inBuffer, lastOffset ); memcpy(pt, inBuffer, lastOffset);
pt += lastOffset; pt += lastOffset;
} else { } else {
assert( count == this->chunkLength ); assert(count == this->chunkLength);
memcpy( pt, inBuffer, this->chunkLength ); memcpy(pt, inBuffer, this->chunkLength);
pt += this->chunkLength; pt += this->chunkLength;
} }
} }
//*pt = '\0'; //*pt = '\0';
break; break;
case DICT_UNKNOWN: case DICT_UNKNOWN:
//err_fatal( __FUNCTION__, "Cannot read unknown file type\n" ); //err_fatal( __FUNCTION__, "Cannot read unknown file type\n" );
break; break;
} }
} }

View File

@@ -7,49 +7,50 @@
#include "mapfile.hpp" #include "mapfile.hpp"
struct DictCache { struct DictCache {
int chunk; int chunk;
char *inBuffer; char *inBuffer;
int stamp; int stamp;
int count; int count;
}; };
class DictData { class DictData
{
public: public:
static const size_t DICT_CACHE_SIZE = 5; static const size_t DICT_CACHE_SIZE = 5;
DictData() {}
~DictData() { close(); }
bool open(const std::string &filename, int computeCRC);
void close();
void read(char *buffer, unsigned long start, unsigned long size);
DictData() {}
~DictData() { close(); }
bool open(const std::string& filename, int computeCRC);
void close();
void read(char *buffer, unsigned long start, unsigned long size);
private: private:
const char *start; /* start of mmap'd area */ const char *start; /* start of mmap'd area */
const char *end; /* end of mmap'd area */ const char *end; /* end of mmap'd area */
unsigned long size; /* size of mmap */ unsigned long size; /* size of mmap */
int type;
z_stream zStream;
int initialized;
int headerLength;
int method;
int flags;
time_t mtime;
int extraFlags;
int os;
int version;
int chunkLength;
int chunkCount;
int *chunks;
unsigned long *offsets; /* Sum-scan of chunks. */
std::string origFilename;
std::string comment;
unsigned long crc;
unsigned long length;
unsigned long compressedLength;
DictCache cache[DICT_CACHE_SIZE];
MapFile mapfile;
int read_header(const std::string &filename, int computeCRC); int type;
z_stream zStream;
int initialized;
int headerLength;
int method;
int flags;
time_t mtime;
int extraFlags;
int os;
int version;
int chunkLength;
int chunkCount;
int *chunks;
unsigned long *offsets; /* Sum-scan of chunks. */
std::string origFilename;
std::string comment;
unsigned long crc;
unsigned long length;
unsigned long compressedLength;
DictCache cache[DICT_CACHE_SIZE];
MapFile mapfile;
int read_header(const std::string &filename, int computeCRC);
}; };

View File

@@ -33,7 +33,6 @@ The Levenshtein distance algorithm has been used in:
* Plagiarism detection * Plagiarism detection
*/ */
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
@@ -56,43 +55,39 @@ Enhanced Dynamic Programming ASM Algorithm"
static inline int minimum(const int a, const int b, const int c) static inline int minimum(const int a, const int b, const int c)
{ {
int min = a; int min = a;
if ( b < min ) if (b < min)
min = b; min = b;
if ( c < min ) if (c < min)
min = c; min = c;
return min; return min;
} }
int EditDistance::CalEditDistance(const gunichar *s,const gunichar *t,const int limit) int EditDistance::CalEditDistance(const gunichar *s, const gunichar *t, const int limit)
/*Compute levenshtein distance between s and t, this is using QUICK algorithm*/ /*Compute levenshtein distance between s and t, this is using QUICK algorithm*/
{ {
int n=0,m=0,iLenDif,k,i,j,cost; int n = 0, m = 0, iLenDif, k, i, j, cost;
// Remove leftmost matching portion of strings // Remove leftmost matching portion of strings
while ( *s && (*s==*t) ) while (*s && (*s == *t)) {
{
s++; s++;
t++; t++;
} }
while (s[n]) while (s[n]) {
{ n++;
n++;
}
while (t[m])
{
m++;
}
// Remove rightmost matching portion of strings by decrement n and m.
while ( n && m && (*(s+n-1)==*(t+m-1)) )
{
n--;m--;
} }
if ( m==0 || n==0 || d==nullptr ) while (t[m]) {
return (m+n); m++;
if ( m < n ) }
{
const gunichar * temp = s; // Remove rightmost matching portion of strings by decrement n and m.
while (n && m && (*(s + n - 1) == *(t + m - 1))) {
n--;
m--;
}
if (m == 0 || n == 0 || d == nullptr)
return (m + n);
if (m < n) {
const gunichar *temp = s;
int itemp = n; int itemp = n;
s = t; s = t;
t = temp; t = temp;
@@ -100,55 +95,51 @@ int EditDistance::CalEditDistance(const gunichar *s,const gunichar *t,const int
m = itemp; m = itemp;
} }
iLenDif = m - n; iLenDif = m - n;
if ( iLenDif >= limit ) if (iLenDif >= limit)
return iLenDif; return iLenDif;
// step 1 // step 1
n++;m++; n++;
// d=(int*)malloc(sizeof(int)*m*n); m++;
if ( m*n > currentelements ) // d=(int*)malloc(sizeof(int)*m*n);
{ if (m * n > currentelements) {
currentelements = m*n*2; // double the request currentelements = m * n * 2; // double the request
d = static_cast<int*>(realloc(d, sizeof(int) * currentelements)); d = static_cast<int *>(realloc(d, sizeof(int) * currentelements));
if ( nullptr == d ) if (nullptr == d)
return (m+n); return (m + n);
} }
// step 2, init matrix // step 2, init matrix
for (k=0;k<n;k++) for (k = 0; k < n; k++)
d[k] = k; d[k] = k;
for (k=1;k<m;k++) for (k = 1; k < m; k++)
d[k*n] = k; d[k * n] = k;
// step 3 // step 3
for (i=1;i<n;i++) for (i = 1; i < n; i++) {
{
// first calculate column, d(i,j) // first calculate column, d(i,j)
for ( j=1;j<iLenDif+i;j++ ) for (j = 1; j < iLenDif + i; j++) {
{ cost = s[i - 1] == t[j - 1] ? 0 : 1;
cost = s[i-1]==t[j-1]?0:1; d[j * n + i] = minimum(d[(j - 1) * n + i] + 1, d[j * n + i - 1] + 1, d[(j - 1) * n + i - 1] + cost);
d[j*n+i] = minimum(d[(j-1)*n+i]+1,d[j*n+i-1]+1,d[(j-1)*n+i-1]+cost);
#ifdef COVER_TRANSPOSITION #ifdef COVER_TRANSPOSITION
if ( i>=2 && j>=2 && (d[j*n+i]-d[(j-2)*n+i-2]==2) if (i >= 2 && j >= 2 && (d[j * n + i] - d[(j - 2) * n + i - 2] == 2)
&& (s[i-2]==t[j-1]) && (s[i-1]==t[j-2]) ) && (s[i - 2] == t[j - 1]) && (s[i - 1] == t[j - 2]))
d[j*n+i]--; d[j * n + i]--;
#endif #endif
} }
// second calculate row, d(k,j) // second calculate row, d(k,j)
// now j==iLenDif+i; // now j==iLenDif+i;
for ( k=1;k<=i;k++ ) for (k = 1; k <= i; k++) {
{ cost = s[k - 1] == t[j - 1] ? 0 : 1;
cost = s[k-1]==t[j-1]?0:1; d[j * n + k] = minimum(d[(j - 1) * n + k] + 1, d[j * n + k - 1] + 1, d[(j - 1) * n + k - 1] + cost);
d[j*n+k] = minimum(d[(j-1)*n+k]+1,d[j*n+k-1]+1,d[(j-1)*n+k-1]+cost);
#ifdef COVER_TRANSPOSITION #ifdef COVER_TRANSPOSITION
if ( k>=2 && j>=2 && (d[j*n+k]-d[(j-2)*n+k-2]==2) if (k >= 2 && j >= 2 && (d[j * n + k] - d[(j - 2) * n + k - 2] == 2)
&& (s[k-2]==t[j-1]) && (s[k-1]==t[j-2]) ) && (s[k - 2] == t[j - 1]) && (s[k - 1] == t[j - 2]))
d[j*n+k]--; d[j * n + k]--;
#endif #endif
} }
// test if d(i,j) limit gets equal or exceed // test if d(i,j) limit gets equal or exceed
if ( d[j*n+i] >= limit ) if (d[j * n + i] >= limit) {
{ return d[j * n + i];
return d[j*n+i];
} }
} }
// d(n-1,m-1) // d(n-1,m-1)
return d[n*m-1]; return d[n * m - 1];
} }

View File

@@ -3,21 +3,24 @@
#include <cstdlib> #include <cstdlib>
#include <glib.h> #include <glib.h>
class EditDistance { class EditDistance
{
public: public:
EditDistance() { EditDistance()
{
currentelements = 2500; // It's enough for most conditions :-) currentelements = 2500; // It's enough for most conditions :-)
d = static_cast<int *>(malloc(sizeof(int)*currentelements)); d = static_cast<int *>(malloc(sizeof(int) * currentelements));
} }
~EditDistance() { ~EditDistance()
{
if (d != nullptr) if (d != nullptr)
free(d); free(d);
} }
EditDistance(const EditDistance&) = delete; EditDistance(const EditDistance &) = delete;
EditDistance& operator=(const EditDistance&) = delete; EditDistance &operator=(const EditDistance &) = delete;
int CalEditDistance( const gunichar *s, const gunichar *t, const int limit ); int CalEditDistance(const gunichar *s, const gunichar *t, const int limit);
private: private:
int *d; int *d;
int currentelements; int currentelements;
}; };

View File

@@ -19,7 +19,7 @@
*/ */
#ifdef HAVE_CONFIG_H #ifdef HAVE_CONFIG_H
# include "config.h" #include "config.h"
#endif #endif
#include <cstring> #include <cstring>
@@ -48,401 +48,405 @@ static const char *ABR_VISFMT = ESC_GREEN;
static std::string xdxf2text(const char *p, bool colorize_output) static std::string xdxf2text(const char *p, bool colorize_output)
{ {
std::string res; std::string res;
for (; *p; ++p) { for (; *p; ++p) {
if (*p != '<') { if (*p != '<') {
if (g_str_has_prefix(p, "&gt;")) { if (g_str_has_prefix(p, "&gt;")) {
res += ">"; res += ">";
p += 3; p += 3;
} else if (g_str_has_prefix(p, "&lt;")) { } else if (g_str_has_prefix(p, "&lt;")) {
res += "<"; res += "<";
p += 3; p += 3;
} else if (g_str_has_prefix(p, "&amp;")) { } else if (g_str_has_prefix(p, "&amp;")) {
res += "&"; res += "&";
p += 4; p += 4;
} else if (g_str_has_prefix(p, "&quot;")) { } else if (g_str_has_prefix(p, "&quot;")) {
res += "\""; res += "\"";
p += 5; p += 5;
} else if (g_str_has_prefix(p, "&apos;")) { } else if (g_str_has_prefix(p, "&apos;")) {
res += "\'"; res += "\'";
p += 5; p += 5;
} else } else
res += *p; res += *p;
continue; continue;
} }
const char *next = strchr(p, '>'); const char *next = strchr(p, '>');
if (!next) if (!next)
continue; continue;
const std::string name(p+1, next-p-1); const std::string name(p + 1, next - p - 1);
if (name == "abr") if (name == "abr")
res += colorize_output ? ABR_VISFMT : ""; res += colorize_output ? ABR_VISFMT : "";
else if (name=="/abr") else if (name == "/abr")
res += colorize_output ? ESC_END : ""; res += colorize_output ? ESC_END : "";
else if (name == "k") { else if (name == "k") {
const char *begin = next; const char *begin = next;
if ((next = strstr(begin, "</k>")) != nullptr) if ((next = strstr(begin, "</k>")) != nullptr)
next += sizeof("</k>") - 1 - 1; next += sizeof("</k>") - 1 - 1;
else else
next = begin; next = begin;
} else if (name == "kref") { } else if (name == "kref") {
res += colorize_output ? KREF_VISFMT : ""; res += colorize_output ? KREF_VISFMT : "";
} else if (name == "/kref") { } else if (name == "/kref") {
res += colorize_output ? ESC_END : ""; res += colorize_output ? ESC_END : "";
} else if (name == "b") } else if (name == "b")
res += colorize_output ? ESC_BOLD : ""; res += colorize_output ? ESC_BOLD : "";
else if (name=="/b") else if (name == "/b")
res += colorize_output ? ESC_END : ""; res += colorize_output ? ESC_END : "";
else if (name == "i") else if (name == "i")
res += colorize_output ? ESC_ITALIC : ""; res += colorize_output ? ESC_ITALIC : "";
else if (name == "/i") else if (name == "/i")
res += colorize_output ? ESC_END : ""; res += colorize_output ? ESC_END : "";
else if (name == "tr") { else if (name == "tr") {
if (colorize_output) if (colorize_output)
res += TRANSCRIPTION_VISFMT; res += TRANSCRIPTION_VISFMT;
res += "["; res += "[";
} else if (name == "/tr") { } else if (name == "/tr") {
res += "]"; res += "]";
if (colorize_output) if (colorize_output)
res += ESC_END; res += ESC_END;
} else if (name == "ex") } else if (name == "ex")
res += colorize_output ? EXAMPLE_VISFMT : ""; res += colorize_output ? EXAMPLE_VISFMT : "";
else if (name == "/ex") else if (name == "/ex")
res += colorize_output ? ESC_END : ""; res += colorize_output ? ESC_END : "";
else if (!name.empty() && name[0] == 'c' && name != "co") { else if (!name.empty() && name[0] == 'c' && name != "co") {
std::string::size_type pos = name.find("code"); std::string::size_type pos = name.find("code");
if (pos != std::string::npos) { if (pos != std::string::npos) {
pos += sizeof("code=\"") - 1; pos += sizeof("code=\"") - 1;
std::string::size_type end_pos = name.find("\""); std::string::size_type end_pos = name.find("\"");
const std::string color(name, pos, end_pos - pos); const std::string color(name, pos, end_pos - pos);
res += ""; res += "";
} else { } else {
res += ""; res += "";
} }
} else if (name == "/c") } else if (name == "/c")
res += ""; res += "";
p = next; p = next;
} }
return res; return res;
} }
static std::string parse_data(const gchar *data, bool colorize_output) static std::string parse_data(const gchar *data, bool colorize_output)
{ {
if (!data) if (!data)
return ""; return "";
std::string res; std::string res;
guint32 data_size, sec_size = 0; guint32 data_size, sec_size = 0;
gchar *m_str; gchar *m_str;
const gchar *p = data; const gchar *p = data;
data_size = get_uint32(p); data_size = get_uint32(p);
p += sizeof(guint32); p += sizeof(guint32);
while (guint32(p - data)<data_size) { while (guint32(p - data) < data_size) {
switch (*p++) { switch (*p++) {
case 'h': // HTML data case 'h': // HTML data
case 'w': // WikiMedia markup data case 'w': // WikiMedia markup data
case 'm': // plain text, utf-8 case 'm': // plain text, utf-8
case 'l': // not utf-8, some other locale encoding, discouraged, need more work... case 'l': // not utf-8, some other locale encoding, discouraged, need more work...
sec_size = strlen(p); sec_size = strlen(p);
if (sec_size) { if (sec_size) {
res+="\n"; res += "\n";
m_str = g_strndup(p, sec_size); m_str = g_strndup(p, sec_size);
res += m_str; res += m_str;
g_free(m_str); g_free(m_str);
} }
sec_size++; sec_size++;
break; break;
case 'g': // pango markup data case 'g': // pango markup data
case 'x': // xdxf case 'x': // xdxf
sec_size = strlen(p); sec_size = strlen(p);
if (sec_size) { if (sec_size) {
res+="\n"; res += "\n";
m_str = g_strndup(p, sec_size); m_str = g_strndup(p, sec_size);
res += xdxf2text(m_str, colorize_output); res += xdxf2text(m_str, colorize_output);
g_free(m_str); g_free(m_str);
} }
sec_size++; sec_size++;
break; break;
case 't': // english phonetic string case 't': // english phonetic string
sec_size = strlen(p); sec_size = strlen(p);
if(sec_size){ if (sec_size) {
res += "\n"; res += "\n";
if (colorize_output) if (colorize_output)
res += TRANSCRIPTION_VISFMT; res += TRANSCRIPTION_VISFMT;
res += "[" + std::string(p, sec_size) + "]"; res += "[" + std::string(p, sec_size) + "]";
if (colorize_output) if (colorize_output)
res += ESC_END; res += ESC_END;
} }
sec_size++; sec_size++;
break; break;
case 'k': // KingSoft PowerWord data case 'k': // KingSoft PowerWord data
case 'y': // chinese YinBiao or japanese kana, utf-8 case 'y': // chinese YinBiao or japanese kana, utf-8
sec_size = strlen(p); sec_size = strlen(p);
if (sec_size) if (sec_size)
res += std::string(p, sec_size); res += std::string(p, sec_size);
sec_size++; sec_size++;
break; break;
case 'W': // wav file case 'W': // wav file
case 'P': // picture data case 'P': // picture data
sec_size = get_uint32(p); sec_size = get_uint32(p);
sec_size += sizeof(guint32); sec_size += sizeof(guint32);
break; break;
} }
p += sec_size; p += sec_size;
} }
return res;
return res;
} }
void Library::SimpleLookup(const std::string &str, TSearchResultList& res_list) void Library::SimpleLookup(const std::string &str, TSearchResultList &res_list)
{ {
glong ind; glong ind;
res_list.reserve(ndicts()); res_list.reserve(ndicts());
for (gint idict = 0; idict < ndicts(); ++idict) for (gint idict = 0; idict < ndicts(); ++idict)
if (SimpleLookupWord(str.c_str(), ind, idict)) if (SimpleLookupWord(str.c_str(), ind, idict))
res_list.push_back( res_list.push_back(
TSearchResult(dict_name(idict), TSearchResult(dict_name(idict),
poGetWord(ind, idict), poGetWord(ind, idict),
parse_data(poGetWordData(ind, idict), colorize_output_))); parse_data(poGetWordData(ind, idict), colorize_output_)));
} }
void Library::LookupWithFuzzy(const std::string &str, TSearchResultList& res_list) void Library::LookupWithFuzzy(const std::string &str, TSearchResultList &res_list)
{ {
static const int MAXFUZZY=10; static const int MAXFUZZY = 10;
gchar *fuzzy_res[MAXFUZZY]; gchar *fuzzy_res[MAXFUZZY];
if (!Libs::LookupWithFuzzy(str.c_str(), fuzzy_res, MAXFUZZY)) if (!Libs::LookupWithFuzzy(str.c_str(), fuzzy_res, MAXFUZZY))
return; return;
for (gchar **p = fuzzy_res, **end = (fuzzy_res + MAXFUZZY); p != end && *p; ++p) { for (gchar **p = fuzzy_res, **end = (fuzzy_res + MAXFUZZY); p != end && *p; ++p) {
SimpleLookup(*p, res_list); SimpleLookup(*p, res_list);
g_free(*p); g_free(*p);
} }
} }
void Library::LookupWithRule(const std::string &str, TSearchResultList& res_list) void Library::LookupWithRule(const std::string &str, TSearchResultList &res_list)
{ {
std::vector<gchar *> match_res((MAX_MATCH_ITEM_PER_LIB) * ndicts()); std::vector<gchar *> match_res((MAX_MATCH_ITEM_PER_LIB)*ndicts());
const gint nfound = Libs::LookupWithRule(str.c_str(), &match_res[0]); const gint nfound = Libs::LookupWithRule(str.c_str(), &match_res[0]);
if (nfound == 0) if (nfound == 0)
return; return;
for (gint i = 0; i < nfound; ++i) { for (gint i = 0; i < nfound; ++i) {
SimpleLookup(match_res[i], res_list); SimpleLookup(match_res[i], res_list);
g_free(match_res[i]); g_free(match_res[i]);
} }
} }
void Library::LookupData(const std::string &str, TSearchResultList& res_list) void Library::LookupData(const std::string &str, TSearchResultList &res_list)
{ {
std::vector<std::vector<gchar *> > drl(ndicts()); std::vector<std::vector<gchar *>> drl(ndicts());
if (!Libs::LookupData(str.c_str(), &drl[0])) if (!Libs::LookupData(str.c_str(), &drl[0]))
return; return;
for (int idict = 0; idict < ndicts(); ++idict) for (int idict = 0; idict < ndicts(); ++idict)
for (gchar *res : drl[idict]) { for (gchar *res : drl[idict]) {
SimpleLookup(res, res_list); SimpleLookup(res, res_list);
g_free(res); g_free(res);
} }
} }
void Library::print_search_result(FILE *out, const TSearchResult & res, bool &first_result) void Library::print_search_result(FILE *out, const TSearchResult &res, bool &first_result)
{ {
std::string loc_bookname, loc_def, loc_exp; std::string loc_bookname, loc_def, loc_exp;
if (!utf8_output_){ if (!utf8_output_) {
loc_bookname = utf8_to_locale_ign_err(res.bookname); loc_bookname = utf8_to_locale_ign_err(res.bookname);
loc_def = utf8_to_locale_ign_err(res.def); loc_def = utf8_to_locale_ign_err(res.def);
loc_exp = utf8_to_locale_ign_err(res.exp); loc_exp = utf8_to_locale_ign_err(res.exp);
} }
if(json_) { if (json_) {
if(!first_result) { if (!first_result) {
fputs(",", out); fputs(",", out);
} else {
first_result=false;
}
fprintf(out,"{\"dict\": \"%s\",\"word\":\"%s\",\"definition\":\"%s\"}",
json_escape_string(res.bookname).c_str(),
json_escape_string(res.def).c_str(),
json_escape_string(res.exp).c_str());
} else { } else {
fprintf(out, first_result = false;
"-->%s%s%s\n"
"-->%s%s%s\n"
"%s\n\n",
colorize_output_ ? NAME_OF_DICT_VISFMT : "",
utf8_output_ ? res.bookname.c_str() : loc_bookname.c_str(),
colorize_output_ ? ESC_END : "",
colorize_output_ ? SEARCH_TERM_VISFMT : "",
utf8_output_ ? res.def.c_str() : loc_def.c_str(),
colorize_output_ ? ESC_END : "",
utf8_output_ ? res.exp.c_str() : loc_exp.c_str());
} }
fprintf(out, "{\"dict\": \"%s\",\"word\":\"%s\",\"definition\":\"%s\"}",
json_escape_string(res.bookname).c_str(),
json_escape_string(res.def).c_str(),
json_escape_string(res.exp).c_str());
} else {
fprintf(out,
"-->%s%s%s\n"
"-->%s%s%s\n"
"%s\n\n",
colorize_output_ ? NAME_OF_DICT_VISFMT : "",
utf8_output_ ? res.bookname.c_str() : loc_bookname.c_str(),
colorize_output_ ? ESC_END : "",
colorize_output_ ? SEARCH_TERM_VISFMT : "",
utf8_output_ ? res.def.c_str() : loc_def.c_str(),
colorize_output_ ? ESC_END : "",
utf8_output_ ? res.exp.c_str() : loc_exp.c_str());
}
} }
namespace { namespace
class sdcv_pager final { {
public: class sdcv_pager final
explicit sdcv_pager(bool ignore_env = false) { {
public:
explicit sdcv_pager(bool ignore_env = false)
{
output = stdout;
if (ignore_env) {
return;
}
const gchar *pager = g_getenv("SDCV_PAGER");
if (pager && (output = popen(pager, "w")) == nullptr) {
perror(_("popen failed"));
output = stdout; output = stdout;
if (ignore_env) {
return;
}
const gchar *pager = g_getenv("SDCV_PAGER");
if (pager && (output = popen(pager, "w")) == nullptr) {
perror(_("popen failed"));
output = stdout;
}
} }
sdcv_pager(const sdcv_pager&) = delete; }
sdcv_pager& operator=(const sdcv_pager&) = delete; sdcv_pager(const sdcv_pager &) = delete;
~sdcv_pager() { sdcv_pager &operator=(const sdcv_pager &) = delete;
if (output != stdout) { ~sdcv_pager()
pclose(output); {
} if (output != stdout) {
pclose(output);
} }
FILE *get_stream() { return output; } }
private: FILE *get_stream() { return output; }
FILE *output;
}; private:
FILE *output;
};
} }
bool Library::process_phrase(const char *loc_str, IReadLine &io, bool force) bool Library::process_phrase(const char *loc_str, IReadLine &io, bool force)
{ {
if (nullptr == loc_str) if (nullptr == loc_str)
return true; return true;
std::string query; std::string query;
analyze_query(loc_str, query); analyze_query(loc_str, query);
if (!query.empty()) if (!query.empty())
io.add_to_history(query.c_str()); io.add_to_history(query.c_str());
gsize bytes_read; gsize bytes_read;
gsize bytes_written; gsize bytes_written;
glib::Error err; glib::Error err;
glib::CharStr str; glib::CharStr str;
if (!utf8_input_) if (!utf8_input_)
str.reset(g_locale_to_utf8(loc_str, -1, &bytes_read, &bytes_written, get_addr(err))); str.reset(g_locale_to_utf8(loc_str, -1, &bytes_read, &bytes_written, get_addr(err)));
else else
str.reset(g_strdup(loc_str)); str.reset(g_strdup(loc_str));
if (nullptr == get_impl(str)) { if (nullptr == get_impl(str)) {
fprintf(stderr, _("Can not convert %s to utf8.\n"), loc_str); fprintf(stderr, _("Can not convert %s to utf8.\n"), loc_str);
fprintf(stderr, "%s\n", err->message); fprintf(stderr, "%s\n", err->message);
return false; return false;
} }
if (str[0] == '\0') if (str[0] == '\0')
return true; return true;
TSearchResultList res_list; TSearchResultList res_list;
switch (analyze_query(get_impl(str), query)) { switch (analyze_query(get_impl(str), query)) {
case qtFUZZY: case qtFUZZY:
LookupWithFuzzy(query, res_list); LookupWithFuzzy(query, res_list);
break; break;
case qtREGEXP: case qtREGEXP:
LookupWithRule(query, res_list); LookupWithRule(query, res_list);
break; break;
case qtSIMPLE: case qtSIMPLE:
SimpleLookup(get_impl(str), res_list); SimpleLookup(get_impl(str), res_list);
if (res_list.empty() && fuzzy_) if (res_list.empty() && fuzzy_)
LookupWithFuzzy(get_impl(str), res_list); LookupWithFuzzy(get_impl(str), res_list);
break; break;
case qtDATA: case qtDATA:
LookupData(query, res_list); LookupData(query, res_list);
break; break;
default: default:
/*nothing*/; /*nothing*/;
} }
bool first_result = true; bool first_result = true;
if (json_) { if (json_) {
fputc('[', stdout); fputc('[', stdout);
} }
if (!res_list.empty()) { if (!res_list.empty()) {
/* try to be more clever, if there are /* try to be more clever, if there are
one or zero results per dictionary show all one or zero results per dictionary show all
*/ */
bool show_all_results = true; bool show_all_results = true;
typedef std::map< std::string, int, std::less<std::string> > DictResMap; typedef std::map<std::string, int, std::less<std::string>> DictResMap;
if (!force) { if (!force) {
DictResMap res_per_dict; DictResMap res_per_dict;
for (const TSearchResult& search_res : res_list) { for (const TSearchResult &search_res : res_list) {
auto r = res_per_dict.equal_range(search_res.bookname); auto r = res_per_dict.equal_range(search_res.bookname);
DictResMap tmp(r.first, r.second); DictResMap tmp(r.first, r.second);
if (tmp.empty()) //there are no yet such bookname in map if (tmp.empty()) //there are no yet such bookname in map
res_per_dict.insert(DictResMap::value_type(search_res.bookname, 1)); res_per_dict.insert(DictResMap::value_type(search_res.bookname, 1));
else { else {
++((tmp.begin())->second); ++((tmp.begin())->second);
if (tmp.begin()->second > 1) { if (tmp.begin()->second > 1) {
show_all_results = false; show_all_results = false;
break; break;
} }
} }
} }
}//if (!force) } //if (!force)
if (!show_all_results && !force) { if (!show_all_results && !force) {
if (!json_) { if (!json_) {
printf(_("Found %zu items, similar to %s.\n"), res_list.size(), printf(_("Found %zu items, similar to %s.\n"), res_list.size(),
utf8_output_ ? get_impl(str) : utf8_to_locale_ign_err(get_impl(str)).c_str()); utf8_output_ ? get_impl(str) : utf8_to_locale_ign_err(get_impl(str)).c_str());
} }
for (size_t i = 0; i < res_list.size(); ++i) { for (size_t i = 0; i < res_list.size(); ++i) {
const std::string loc_bookname = utf8_to_locale_ign_err(res_list[i].bookname); const std::string loc_bookname = utf8_to_locale_ign_err(res_list[i].bookname);
const std::string loc_def = utf8_to_locale_ign_err(res_list[i].def); const std::string loc_def = utf8_to_locale_ign_err(res_list[i].def);
printf("%zu)%s%s%s-->%s%s%s\n", i, printf("%zu)%s%s%s-->%s%s%s\n", i,
colorize_output_ ? NAME_OF_DICT_VISFMT : "", colorize_output_ ? NAME_OF_DICT_VISFMT : "",
utf8_output_ ? res_list[i].bookname.c_str() : loc_bookname.c_str(), utf8_output_ ? res_list[i].bookname.c_str() : loc_bookname.c_str(),
colorize_output_ ? ESC_END : "", colorize_output_ ? ESC_END : "",
colorize_output_ ? SEARCH_TERM_VISFMT : "", colorize_output_ ? SEARCH_TERM_VISFMT : "",
utf8_output_ ? res_list[i].def.c_str() : loc_def.c_str(), utf8_output_ ? res_list[i].def.c_str() : loc_def.c_str(),
colorize_output_ ? ESC_END : ""); colorize_output_ ? ESC_END : "");
} }
int choise; int choise;
std::unique_ptr<IReadLine> choice_readline(create_readline_object()); std::unique_ptr<IReadLine> choice_readline(create_readline_object());
for (;;) { for (;;) {
std::string str_choise; std::string str_choise;
choice_readline->read(_("Your choice[-1 to abort]: "), str_choise); choice_readline->read(_("Your choice[-1 to abort]: "), str_choise);
sscanf(str_choise.c_str(), "%d", &choise); sscanf(str_choise.c_str(), "%d", &choise);
if (choise >= 0 && choise < int(res_list.size())) { if (choise >= 0 && choise < int(res_list.size())) {
sdcv_pager pager; sdcv_pager pager;
io.add_to_history(res_list[choise].def.c_str()); io.add_to_history(res_list[choise].def.c_str());
print_search_result(pager.get_stream(), res_list[choise], first_result); print_search_result(pager.get_stream(), res_list[choise], first_result);
break; break;
} else if (choise == -1){ } else if (choise == -1) {
break; break;
} else } else
printf(_("Invalid choice.\nIt must be from 0 to %zu or -1.\n"), printf(_("Invalid choice.\nIt must be from 0 to %zu or -1.\n"),
res_list.size()-1); res_list.size() - 1);
} }
} else { } else {
sdcv_pager pager(force || json_); sdcv_pager pager(force || json_);
if (!json_) { if (!json_) {
fprintf(pager.get_stream(), _("Found %zu items, similar to %s.\n"), fprintf(pager.get_stream(), _("Found %zu items, similar to %s.\n"),
res_list.size(), utf8_output_ ? get_impl(str) : utf8_to_locale_ign_err(get_impl(str)).c_str()); res_list.size(), utf8_output_ ? get_impl(str) : utf8_to_locale_ign_err(get_impl(str)).c_str());
} }
for (const TSearchResult& search_res : res_list) { for (const TSearchResult &search_res : res_list) {
print_search_result(pager.get_stream(), search_res, first_result); print_search_result(pager.get_stream(), search_res, first_result);
} }
} }
} else { } else {
std::string loc_str; std::string loc_str;
if (!utf8_output_) if (!utf8_output_)
loc_str = utf8_to_locale_ign_err(get_impl(str)); loc_str = utf8_to_locale_ign_err(get_impl(str));
if(!json_) if (!json_)
printf(_("Nothing similar to %s, sorry :(\n"), utf8_output_ ? get_impl(str) : loc_str.c_str()); printf(_("Nothing similar to %s, sorry :(\n"), utf8_output_ ? get_impl(str) : loc_str.c_str());
} }
if (json_) { if (json_) {
fputs("]\n", stdout); fputs("]\n", stdout);
} }
return true; return true;
} }

View File

@@ -3,45 +3,52 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "stardict_lib.hpp"
#include "readline.hpp" #include "readline.hpp"
#include "stardict_lib.hpp"
//this structure is wrapper and it need for unification //this structure is wrapper and it need for unification
//results of search whith return Dicts class //results of search whith return Dicts class
struct TSearchResult { struct TSearchResult {
std::string bookname; std::string bookname;
std::string def; std::string def;
std::string exp; std::string exp;
TSearchResult(const std::string& bookname_, const std::string& def_, const std::string& exp_) TSearchResult(const std::string &bookname_, const std::string &def_, const std::string &exp_)
: bookname(bookname_), def(def_), exp(exp_) : bookname(bookname_)
{ , def(def_)
} , exp(exp_)
{
}
}; };
typedef std::vector<TSearchResult> TSearchResultList; typedef std::vector<TSearchResult> TSearchResultList;
//this class is wrapper around Dicts class for easy use //this class is wrapper around Dicts class for easy use
//of it //of it
class Library : public Libs { class Library : public Libs
{
public: public:
Library(bool uinput, bool uoutput, bool colorize_output, bool use_json, bool no_fuzzy) Library(bool uinput, bool uoutput, bool colorize_output, bool use_json, bool no_fuzzy)
: utf8_input_(uinput), utf8_output_(uoutput), colorize_output_(colorize_output), json_(use_json) { : utf8_input_(uinput)
setVerbose(!use_json); , utf8_output_(uoutput)
setFuzzy(!no_fuzzy); , colorize_output_(colorize_output)
} , json_(use_json)
{
setVerbose(!use_json);
setFuzzy(!no_fuzzy);
}
bool process_phrase(const char *loc_str, IReadLine &io, bool force = false);
bool process_phrase(const char *loc_str, IReadLine &io, bool force = false);
private: private:
bool utf8_input_; bool utf8_input_;
bool utf8_output_; bool utf8_output_;
bool colorize_output_; bool colorize_output_;
bool json_; bool json_;
void SimpleLookup(const std::string &str, TSearchResultList& res_list); void SimpleLookup(const std::string &str, TSearchResultList &res_list);
void LookupWithFuzzy(const std::string &str, TSearchResultList& res_list); void LookupWithFuzzy(const std::string &str, TSearchResultList &res_list);
void LookupWithRule(const std::string &str, TSearchResultList& res_lsit); void LookupWithRule(const std::string &str, TSearchResultList &res_lsit);
void LookupData(const std::string &str, TSearchResultList& res_list); void LookupData(const std::string &str, TSearchResultList &res_list);
void print_search_result(FILE *out, const TSearchResult & res, bool &first_result); void print_search_result(FILE *out, const TSearchResult &res, bool &first_result);
}; };

View File

@@ -1,27 +1,29 @@
#pragma once #pragma once
#ifdef HAVE_CONFIG_H #ifdef HAVE_CONFIG_H
# include "config.h" #include "config.h"
#endif #endif
#ifdef HAVE_MMAP #ifdef HAVE_MMAP
# include <sys/types.h> #include <fcntl.h>
# include <fcntl.h> #include <sys/mman.h>
# include <sys/mman.h> #include <sys/types.h>
#endif #endif
#ifdef _WIN32 #ifdef _WIN32
# include <windows.h> #include <windows.h>
#endif #endif
#include <glib.h> #include <glib.h>
class MapFile { class MapFile
{
public: public:
MapFile() {} MapFile() {}
~MapFile(); ~MapFile();
MapFile(const MapFile&) = delete; MapFile(const MapFile &) = delete;
MapFile& operator=(const MapFile&) = delete; MapFile &operator=(const MapFile &) = delete;
bool open(const char *file_name, unsigned long file_size); bool open(const char *file_name, unsigned long file_size);
gchar *begin() { return data; } gchar *begin() { return data; }
private: private:
char *data = nullptr; char *data = nullptr;
unsigned long size = 0ul; unsigned long size = 0ul;
@@ -35,51 +37,50 @@ private:
inline bool MapFile::open(const char *file_name, unsigned long file_size) inline bool MapFile::open(const char *file_name, unsigned long file_size)
{ {
size=file_size; size = file_size;
#ifdef HAVE_MMAP #ifdef HAVE_MMAP
if ((mmap_fd = ::open(file_name, O_RDONLY)) < 0) { if ((mmap_fd = ::open(file_name, O_RDONLY)) < 0) {
//g_print("Open file %s failed!\n",fullfilename); //g_print("Open file %s failed!\n",fullfilename);
return false; return false;
} }
data = (gchar *)mmap( nullptr, file_size, PROT_READ, MAP_SHARED, mmap_fd, 0); data = (gchar *)mmap(nullptr, file_size, PROT_READ, MAP_SHARED, mmap_fd, 0);
if ((void *)data == (void *)(-1)) { if ((void *)data == (void *)(-1)) {
//g_print("mmap file %s failed!\n",idxfilename); //g_print("mmap file %s failed!\n",idxfilename);
data=nullptr; data = nullptr;
return false; return false;
} }
#elif defined( _WIN32) #elif defined(_WIN32)
hFile = CreateFile(file_name, GENERIC_READ, 0, nullptr, OPEN_ALWAYS, hFile = CreateFile(file_name, GENERIC_READ, 0, nullptr, OPEN_ALWAYS,
FILE_ATTRIBUTE_NORMAL, 0); FILE_ATTRIBUTE_NORMAL, 0);
hFileMap = CreateFileMapping(hFile, nullptr, PAGE_READONLY, 0, hFileMap = CreateFileMapping(hFile, nullptr, PAGE_READONLY, 0,
file_size, nullptr); file_size, nullptr);
data = (gchar *)MapViewOfFile(hFileMap, FILE_MAP_READ, 0, 0, file_size); data = (gchar *)MapViewOfFile(hFileMap, FILE_MAP_READ, 0, 0, file_size);
#else #else
gsize read_len; gsize read_len;
if (!g_file_get_contents(file_name, &data, &read_len, nullptr)) if (!g_file_get_contents(file_name, &data, &read_len, nullptr))
return false; return false;
if (read_len != file_size) if (read_len != file_size)
return false; return false;
#endif #endif
return true; return true;
} }
inline MapFile::~MapFile() inline MapFile::~MapFile()
{ {
if (!data) if (!data)
return; return;
#ifdef HAVE_MMAP #ifdef HAVE_MMAP
munmap(data, size); munmap(data, size);
close(mmap_fd); close(mmap_fd);
#else #else
# ifdef _WIN32 #ifdef _WIN32
UnmapViewOfFile(data); UnmapViewOfFile(data);
CloseHandle(hFileMap); CloseHandle(hFileMap);
CloseHandle(hFile); CloseHandle(hFile);
# else #else
g_free(data); g_free(data);
# endif #endif
#endif #endif
} }

View File

@@ -19,14 +19,14 @@
*/ */
#ifdef HAVE_CONFIG_H #ifdef HAVE_CONFIG_H
# include "config.h" #include "config.h"
#endif #endif
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
#ifdef WITH_READLINE #ifdef WITH_READLINE
# include <readline/readline.h> #include <readline/history.h>
# include <readline/history.h> #include <readline/readline.h>
#endif #endif
#include <glib.h> #include <glib.h>
@@ -34,73 +34,82 @@
#include "readline.hpp" #include "readline.hpp"
bool stdio_getline(FILE *in, std::string & str) bool stdio_getline(FILE *in, std::string &str)
{ {
assert(in != nullptr); assert(in != nullptr);
str.clear(); str.clear();
int ch; int ch;
while ((ch=fgetc(in)) != EOF && ch != '\n') while ((ch = fgetc(in)) != EOF && ch != '\n')
str += ch; str += ch;
return EOF != ch; return EOF != ch;
} }
#ifndef WITH_READLINE #ifndef WITH_READLINE
namespace { namespace
class dummy_readline : public IReadLine { {
public: class dummy_readline : public IReadLine
bool read(const std::string &banner, std::string &line) override { {
printf("%s", banner.c_str()); public:
return stdio_getline(stdin, line); bool read(const std::string &banner, std::string &line) override
} {
}; printf("%s", banner.c_str());
return stdio_getline(stdin, line);
}
};
} }
#else #else
namespace { namespace
class real_readline : public IReadLine { {
class real_readline : public IReadLine
{
public: public:
real_readline() { real_readline()
rl_readline_name = "sdcv"; {
using_history(); rl_readline_name = "sdcv";
const std::string histname = std::string(g_get_home_dir()) + G_DIR_SEPARATOR + ".sdcv_history"; using_history();
read_history(histname.c_str()); const std::string histname = std::string(g_get_home_dir()) + G_DIR_SEPARATOR + ".sdcv_history";
} read_history(histname.c_str());
}
~real_readline() { ~real_readline()
const std::string histname = std::string(g_get_home_dir()) + G_DIR_SEPARATOR + ".sdcv_history"; {
write_history(histname.c_str()); const std::string histname = std::string(g_get_home_dir()) + G_DIR_SEPARATOR + ".sdcv_history";
const gchar *hist_size_str=g_getenv("SDCV_HISTSIZE"); write_history(histname.c_str());
int hist_size; const gchar *hist_size_str = g_getenv("SDCV_HISTSIZE");
if (!hist_size_str || sscanf(hist_size_str, "%d", &hist_size)<1) int hist_size;
hist_size = 2000; if (!hist_size_str || sscanf(hist_size_str, "%d", &hist_size) < 1)
history_truncate_file(histname.c_str(), hist_size); hist_size = 2000;
} history_truncate_file(histname.c_str(), hist_size);
}
bool read(const std::string &banner, std::string& line) override { bool read(const std::string &banner, std::string &line) override
char *phrase = nullptr; {
phrase = readline(banner.c_str()); char *phrase = nullptr;
if (phrase) { phrase = readline(banner.c_str());
line = phrase; if (phrase) {
free(phrase); line = phrase;
return true; free(phrase);
} return true;
return false;
} }
return false;
}
void add_to_history(const std::string& phrase) override { void add_to_history(const std::string &phrase) override
add_history(phrase.c_str()); {
} add_history(phrase.c_str());
}; }
};
} }
#endif//WITH_READLINE #endif //WITH_READLINE
IReadLine *create_readline_object() IReadLine *create_readline_object()
{ {
#ifdef WITH_READLINE #ifdef WITH_READLINE
return new real_readline; return new real_readline;
#else #else
return new dummy_readline; return new dummy_readline;
#endif #endif
} }

View File

@@ -2,11 +2,12 @@
#include <string> #include <string>
class IReadLine { class IReadLine
{
public: public:
virtual ~IReadLine() {} virtual ~IReadLine() {}
virtual bool read(const std::string &banner, std::string& line) = 0; virtual bool read(const std::string &banner, std::string &line) = 0;
virtual void add_to_history(const std::string&) {} virtual void add_to_history(const std::string &) {}
}; };
extern std::string sdcv_readline; extern std::string sdcv_readline;

View File

@@ -22,16 +22,16 @@
#include "config.h" #include "config.h"
#endif #endif
#include <algorithm>
#include <cerrno> #include <cerrno>
#include <clocale> #include <clocale>
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
#include <map>
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include <algorithm>
#include <map>
#include <glib.h> #include <glib.h>
#include <glib/gi18n.h> #include <glib/gi18n.h>
@@ -56,7 +56,7 @@ static void free_str_array(gchar **arr)
} }
namespace glib namespace glib
{ {
using StrArr = ResourceWrapper<gchar *, gchar *, free_str_array>; using StrArr = ResourceWrapper<gchar *, gchar *, free_str_array>;
} }
static void list_dicts(const std::list<std::string> &dicts_dir_list, bool use_json); static void list_dicts(const std::list<std::string> &dicts_dir_list, bool use_json);
@@ -131,12 +131,12 @@ int main(int argc, char *argv[]) try {
const gchar *stardict_data_dir = g_getenv("STARDICT_DATA_DIR"); const gchar *stardict_data_dir = g_getenv("STARDICT_DATA_DIR");
std::string data_dir; std::string data_dir;
if (!opt_data_dir) { if (!opt_data_dir) {
if (!only_data_dir) { if (!only_data_dir) {
if (stardict_data_dir) if (stardict_data_dir)
data_dir = stardict_data_dir; data_dir = stardict_data_dir;
else else
data_dir = "/usr/share/stardict/dic"; data_dir = "/usr/share/stardict/dic";
} }
} else { } else {
data_dir = get_impl(opt_data_dir); data_dir = get_impl(opt_data_dir);
} }
@@ -146,8 +146,8 @@ int main(int argc, char *argv[]) try {
homedir = g_get_home_dir(); homedir = g_get_home_dir();
std::list<std::string> dicts_dir_list; std::list<std::string> dicts_dir_list;
if(!only_data_dir) if (!only_data_dir)
dicts_dir_list.push_back(std::string(homedir) + G_DIR_SEPARATOR + ".stardict" + G_DIR_SEPARATOR + "dic"); dicts_dir_list.push_back(std::string(homedir) + G_DIR_SEPARATOR + ".stardict" + G_DIR_SEPARATOR + "dic");
dicts_dir_list.push_back(data_dir); dicts_dir_list.push_back(data_dir);
if (show_list_dicts) { if (show_list_dicts) {
list_dicts(dicts_dir_list, json_output); list_dicts(dicts_dir_list, json_output);
@@ -215,7 +215,7 @@ int main(int argc, char *argv[]) try {
std::string phrase; std::string phrase;
while (io->read(_("Enter word or phrase: "), phrase)) { while (io->read(_("Enter word or phrase: "), phrase)) {
if (!lib.process_phrase(phrase.c_str(), *io)) if (!lib.process_phrase(phrase.c_str(), *io))
return EXIT_FAILURE; return EXIT_FAILURE;
phrase.clear(); phrase.clear();
} }
@@ -232,30 +232,29 @@ int main(int argc, char *argv[]) try {
static void list_dicts(const std::list<std::string> &dicts_dir_list, bool use_json) static void list_dicts(const std::list<std::string> &dicts_dir_list, bool use_json)
{ {
bool first_entry = true; bool first_entry = true;
if(!use_json) if (!use_json)
printf(_("Dictionary's name Word count\n")); printf(_("Dictionary's name Word count\n"));
else else
fputc('[', stdout); fputc('[', stdout);
std::list<std::string> order_list, disable_list; std::list<std::string> order_list, disable_list;
for_each_file(dicts_dir_list, ".ifo", order_list, for_each_file(dicts_dir_list, ".ifo", order_list,
disable_list, [use_json, &first_entry](const std::string &filename, bool) -> void { disable_list, [use_json, &first_entry](const std::string &filename, bool) -> void {
DictInfo dict_info; DictInfo dict_info;
if (dict_info.load_from_ifo_file(filename, false)) { if (dict_info.load_from_ifo_file(filename, false)) {
const std::string bookname = utf8_to_locale_ign_err(dict_info.bookname); const std::string bookname = utf8_to_locale_ign_err(dict_info.bookname);
if(use_json) { if (use_json) {
if(first_entry) { if (first_entry) {
first_entry=false; first_entry = false;
} else { } else {
fputc(',', stdout); // comma between entries fputc(',', stdout); // comma between entries
}
printf("{\"name\": \"%s\", \"wordcount\": \"%d\"}", json_escape_string(bookname).c_str(), dict_info.wordcount);
} else {
printf("%s %d\n", bookname.c_str(), dict_info.wordcount);
}
} }
printf("{\"name\": \"%s\", \"wordcount\": \"%d\"}", json_escape_string(bookname).c_str(), dict_info.wordcount); });
} else { if (use_json)
printf("%s %d\n", bookname.c_str(), dict_info.wordcount); fputs("]\n", stdout);
}
}
});
if(use_json)
fputs("]\n", stdout);
} }

File diff suppressed because it is too large Load Diff

View File

@@ -2,17 +2,17 @@
#include <cstdio> #include <cstdio>
#include <cstring> #include <cstring>
#include <functional>
#include <list> #include <list>
#include <map>
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include <functional>
#include <map>
#include "dictziplib.hpp" #include "dictziplib.hpp"
const int MAX_MATCH_ITEM_PER_LIB=100; const int MAX_MATCH_ITEM_PER_LIB = 100;
const int MAX_FUZZY_DISTANCE= 3; // at most MAX_FUZZY_DISTANCE-1 differences allowed when find similar words const int MAX_FUZZY_DISTANCE = 3; // at most MAX_FUZZY_DISTANCE-1 differences allowed when find similar words
inline guint32 get_uint32(const gchar *addr) inline guint32 get_uint32(const gchar *addr)
{ {
@@ -26,172 +26,190 @@ inline void set_uint32(gchar *addr, guint32 val)
memcpy(addr, &val, sizeof(guint32)); memcpy(addr, &val, sizeof(guint32));
} }
struct cacheItem { struct cacheItem {
guint32 offset; guint32 offset;
gchar *data; gchar *data;
//write code here to make it inline //write code here to make it inline
cacheItem() { data = nullptr;} cacheItem() { data = nullptr; }
~cacheItem() { g_free(data); } ~cacheItem() { g_free(data); }
}; };
const int WORDDATA_CACHE_NUM = 10; const int WORDDATA_CACHE_NUM = 10;
const int INVALID_INDEX=-100; const int INVALID_INDEX = -100;
class DictBase { class DictBase
{
public: public:
DictBase() {} DictBase() {}
~DictBase() { ~DictBase()
{
if (dictfile) if (dictfile)
fclose(dictfile); fclose(dictfile);
} }
DictBase(const DictBase&) = delete; DictBase(const DictBase &) = delete;
DictBase& operator=(const DictBase&) = delete; DictBase &operator=(const DictBase &) = delete;
gchar * GetWordData(guint32 idxitem_offset, guint32 idxitem_size); gchar *GetWordData(guint32 idxitem_offset, guint32 idxitem_size);
bool containSearchData() const { bool containSearchData() const
{
if (sametypesequence.empty()) if (sametypesequence.empty())
return true; return true;
return sametypesequence.find_first_of("mlgxty") != std::string::npos; return sametypesequence.find_first_of("mlgxty") != std::string::npos;
} }
bool SearchData(std::vector<std::string> &SearchWords, guint32 idxitem_offset, guint32 idxitem_size, gchar *origin_data); bool SearchData(std::vector<std::string> &SearchWords, guint32 idxitem_offset, guint32 idxitem_size, gchar *origin_data);
protected: protected:
std::string sametypesequence; std::string sametypesequence;
FILE *dictfile = nullptr; FILE *dictfile = nullptr;
std::unique_ptr<DictData> dictdzfile; std::unique_ptr<DictData> dictdzfile;
private: private:
cacheItem cache[WORDDATA_CACHE_NUM]; cacheItem cache[WORDDATA_CACHE_NUM];
gint cache_cur = 0; gint cache_cur = 0;
}; };
//this structure contain all information about dictionary //this structure contain all information about dictionary
struct DictInfo { struct DictInfo {
std::string ifo_file_name; std::string ifo_file_name;
guint32 wordcount; guint32 wordcount;
guint32 syn_wordcount; guint32 syn_wordcount;
std::string bookname; std::string bookname;
std::string author; std::string author;
std::string email; std::string email;
std::string website; std::string website;
std::string date; std::string date;
std::string description; std::string description;
guint32 index_file_size; guint32 index_file_size;
guint32 syn_file_size; guint32 syn_file_size;
std::string sametypesequence; std::string sametypesequence;
bool load_from_ifo_file(const std::string& ifofilename, bool istreedict); bool load_from_ifo_file(const std::string &ifofilename, bool istreedict);
}; };
class IIndexFile { class IIndexFile
{
public: public:
guint32 wordentry_offset; guint32 wordentry_offset;
guint32 wordentry_size; guint32 wordentry_size;
virtual ~IIndexFile() {} virtual ~IIndexFile() {}
virtual bool load(const std::string& url, gulong wc, gulong fsize, bool verbose) = 0; virtual bool load(const std::string &url, gulong wc, gulong fsize, bool verbose) = 0;
virtual const gchar *get_key(glong idx) = 0; virtual const gchar *get_key(glong idx) = 0;
virtual void get_data(glong idx) = 0; virtual void get_data(glong idx) = 0;
virtual const gchar *get_key_and_data(glong idx) = 0; virtual const gchar *get_key_and_data(glong idx) = 0;
virtual bool lookup(const char *str, glong &idx) = 0; virtual bool lookup(const char *str, glong &idx) = 0;
}; };
class SynFile { class SynFile
{
public: public:
bool load(const std::string& url, gulong wc); bool load(const std::string &url, gulong wc);
bool lookup(const char *str, glong &idx); bool lookup(const char *str, glong &idx);
private: private:
std::map<std::string, gulong> synonyms; std::map<std::string, gulong> synonyms;
}; };
class Dict : public DictBase { class Dict : public DictBase
{
public: public:
Dict() {} Dict() {}
Dict(const Dict&) = delete; Dict(const Dict &) = delete;
Dict& operator=(const Dict&) = delete; Dict &operator=(const Dict &) = delete;
bool load(const std::string& ifofilename, bool verbose); bool load(const std::string &ifofilename, bool verbose);
gulong narticles() const { return wordcount; } gulong narticles() const { return wordcount; }
const std::string& dict_name() const { return bookname; } const std::string &dict_name() const { return bookname; }
const std::string& ifofilename() const { return ifo_file_name; } const std::string &ifofilename() const { return ifo_file_name; }
const gchar *get_key(glong index) { return idx_file->get_key(index); } const gchar *get_key(glong index) { return idx_file->get_key(index); }
gchar *get_data(glong index) { gchar *get_data(glong index)
{
idx_file->get_data(index); idx_file->get_data(index);
return DictBase::GetWordData(idx_file->wordentry_offset, idx_file->wordentry_size); return DictBase::GetWordData(idx_file->wordentry_offset, idx_file->wordentry_size);
} }
void get_key_and_data(glong index, const gchar **key, guint32 *offset, guint32 *size) { void get_key_and_data(glong index, const gchar **key, guint32 *offset, guint32 *size)
{
*key = idx_file->get_key_and_data(index); *key = idx_file->get_key_and_data(index);
*offset = idx_file->wordentry_offset; *offset = idx_file->wordentry_offset;
*size = idx_file->wordentry_size; *size = idx_file->wordentry_size;
} }
bool Lookup(const char *str, glong &idx); bool Lookup(const char *str, glong &idx);
bool LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen); bool LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen);
private:
std::string ifo_file_name;
gulong wordcount;
gulong syn_wordcount;
std::string bookname;
std::unique_ptr<IIndexFile> idx_file;
std::unique_ptr<SynFile> syn_file;
bool load_ifofile(const std::string& ifofilename, gulong &idxfilesize);
};
class Libs {
public:
Libs(std::function<void(void)> f = std::function<void(void)>()) {
progress_func = f;
iMaxFuzzyDistance = MAX_FUZZY_DISTANCE; //need to read from cfg.
}
void setVerbose(bool verbose) { verbose_ = verbose; }
void setFuzzy(bool fuzzy) { fuzzy_ = fuzzy; }
~Libs();
Libs(const Libs&) = delete;
Libs& operator=(const Libs&) = delete;
void load_dict(const std::string& url);
void load(const std::list<std::string>& dicts_dirs,
const std::list<std::string>& order_list,
const std::list<std::string>& disable_list);
glong narticles(int idict) const { return oLib[idict]->narticles(); }
const std::string& dict_name(int idict) const { return oLib[idict]->dict_name(); }
gint ndicts() const { return oLib.size(); }
const gchar *poGetWord(glong iIndex, int iLib) {
return oLib[iLib]->get_key(iIndex);
}
gchar * poGetWordData(glong iIndex,int iLib) {
if (iIndex == INVALID_INDEX)
return nullptr;
return oLib[iLib]->get_data(iIndex);
}
const gchar *poGetCurrentWord(glong *iCurrent);
const gchar *poGetNextWord(const gchar *word, glong *iCurrent);
const gchar *poGetPreWord(glong *iCurrent);
bool LookupWord(const gchar* sWord, glong& iWordIndex, int iLib) {
return oLib[iLib]->Lookup(sWord, iWordIndex);
}
bool LookupSimilarWord(const gchar* sWord, glong & iWordIndex, int iLib);
bool SimpleLookupWord(const gchar* sWord, glong & iWordIndex, int iLib);
bool LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_size);
gint LookupWithRule(const gchar *sWord, gchar *reslist[]);
bool LookupData(const gchar *sWord, std::vector<gchar *> *reslist);
protected:
bool fuzzy_;
private: private:
std::vector<Dict *> oLib; // word Libs. std::string ifo_file_name;
int iMaxFuzzyDistance; gulong wordcount;
std::function<void(void)> progress_func; gulong syn_wordcount;
bool verbose_; std::string bookname;
std::unique_ptr<IIndexFile> idx_file;
std::unique_ptr<SynFile> syn_file;
bool load_ifofile(const std::string &ifofilename, gulong &idxfilesize);
}; };
class Libs
{
public:
Libs(std::function<void(void)> f = std::function<void(void)>())
{
progress_func = f;
iMaxFuzzyDistance = MAX_FUZZY_DISTANCE; //need to read from cfg.
}
void setVerbose(bool verbose) { verbose_ = verbose; }
void setFuzzy(bool fuzzy) { fuzzy_ = fuzzy; }
~Libs();
Libs(const Libs &) = delete;
Libs &operator=(const Libs &) = delete;
void load_dict(const std::string &url);
void load(const std::list<std::string> &dicts_dirs,
const std::list<std::string> &order_list,
const std::list<std::string> &disable_list);
glong narticles(int idict) const { return oLib[idict]->narticles(); }
const std::string &dict_name(int idict) const { return oLib[idict]->dict_name(); }
gint ndicts() const { return oLib.size(); }
const gchar *poGetWord(glong iIndex, int iLib)
{
return oLib[iLib]->get_key(iIndex);
}
gchar *poGetWordData(glong iIndex, int iLib)
{
if (iIndex == INVALID_INDEX)
return nullptr;
return oLib[iLib]->get_data(iIndex);
}
const gchar *poGetCurrentWord(glong *iCurrent);
const gchar *poGetNextWord(const gchar *word, glong *iCurrent);
const gchar *poGetPreWord(glong *iCurrent);
bool LookupWord(const gchar *sWord, glong &iWordIndex, int iLib)
{
return oLib[iLib]->Lookup(sWord, iWordIndex);
}
bool LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib);
bool SimpleLookupWord(const gchar *sWord, glong &iWordIndex, int iLib);
bool LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_size);
gint LookupWithRule(const gchar *sWord, gchar *reslist[]);
bool LookupData(const gchar *sWord, std::vector<gchar *> *reslist);
protected:
bool fuzzy_;
private:
std::vector<Dict *> oLib; // word Libs.
int iMaxFuzzyDistance;
std::function<void(void)> progress_func;
bool verbose_;
};
enum query_t { enum query_t {
qtSIMPLE, qtREGEXP, qtFUZZY, qtDATA qtSIMPLE,
qtREGEXP,
qtFUZZY,
qtDATA
}; };
extern query_t analyze_query(const char *s, std::string& res);
extern query_t analyze_query(const char *s, std::string &res);

View File

@@ -19,100 +19,113 @@
*/ */
#ifdef HAVE_CONFIG_H #ifdef HAVE_CONFIG_H
# include "config.h" #include "config.h"
#endif #endif
#include <algorithm>
#include <cstdio>
#include <cstdlib>
#include <glib.h> #include <glib.h>
#include <glib/gi18n.h> #include <glib/gi18n.h>
#include <cstdlib>
#include <cstdio>
#include <algorithm>
#include <sstream>
#include <iomanip> #include <iomanip>
#include <sstream>
#include "utils.hpp" #include "utils.hpp"
std::string utf8_to_locale_ign_err(const std::string& utf8_str) std::string utf8_to_locale_ign_err(const std::string &utf8_str)
{ {
std::string res; std::string res;
const char *charset; const char *charset;
if (g_get_charset(&charset)) if (g_get_charset(&charset))
res = utf8_str; res = utf8_str;
else { else {
gsize bytes_read, bytes_written; gsize bytes_read, bytes_written;
glib::Error err; glib::Error err;
glib::CharStr tmp(g_convert_with_fallback(utf8_str.c_str(), -1, charset, "UTF-8", nullptr, glib::CharStr tmp(g_convert_with_fallback(utf8_str.c_str(), -1, charset, "UTF-8", nullptr,
&bytes_read, &bytes_written, get_addr(err))); &bytes_read, &bytes_written, get_addr(err)));
if (nullptr == get_impl(tmp)){ if (nullptr == get_impl(tmp)) {
fprintf(stderr, _("Can not convert %s to current locale.\n"), utf8_str.c_str()); fprintf(stderr, _("Can not convert %s to current locale.\n"), utf8_str.c_str());
fprintf(stderr, "%s\n", err->message); fprintf(stderr, "%s\n", err->message);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
res = get_impl(tmp); res = get_impl(tmp);
} }
return res; return res;
} }
static void __for_each_file(const std::string& dirname, const std::string& suff, static void __for_each_file(const std::string &dirname, const std::string &suff,
const std::list<std::string>& order_list, const std::list<std::string>& disable_list, const std::list<std::string> &order_list, const std::list<std::string> &disable_list,
const std::function<void (const std::string&, bool)>& f) const std::function<void(const std::string &, bool)> &f)
{ {
GDir *dir = g_dir_open(dirname.c_str(), 0, nullptr); GDir *dir = g_dir_open(dirname.c_str(), 0, nullptr);
if (dir) { if (dir) {
const gchar *filename; const gchar *filename;
while ((filename = g_dir_read_name(dir))!=nullptr) { while ((filename = g_dir_read_name(dir)) != nullptr) {
const std::string fullfilename(dirname+G_DIR_SEPARATOR_S+filename); const std::string fullfilename(dirname + G_DIR_SEPARATOR_S + filename);
if (g_file_test(fullfilename.c_str(), G_FILE_TEST_IS_DIR)) if (g_file_test(fullfilename.c_str(), G_FILE_TEST_IS_DIR))
__for_each_file(fullfilename, suff, order_list, disable_list, f); __for_each_file(fullfilename, suff, order_list, disable_list, f);
else if (g_str_has_suffix(filename, suff.c_str()) && else if (g_str_has_suffix(filename, suff.c_str()) && std::find(order_list.begin(), order_list.end(), fullfilename) == order_list.end()) {
std::find(order_list.begin(), order_list.end(), const bool disable = std::find(disable_list.begin(),
fullfilename)==order_list.end()) { disable_list.end(),
const bool disable = std::find(disable_list.begin(), fullfilename)
disable_list.end(), != disable_list.end();
fullfilename)!=disable_list.end();
f(fullfilename, disable); f(fullfilename, disable);
} }
} }
g_dir_close(dir); g_dir_close(dir);
} }
} }
void for_each_file(const std::list<std::string> &dirs_list, const std::string &suff,
void for_each_file(const std::list<std::string>& dirs_list, const std::string& suff, const std::list<std::string> &order_list, const std::list<std::string> &disable_list,
const std::list<std::string>& order_list, const std::list<std::string>& disable_list, const std::function<void(const std::string &, bool)> &f)
const std::function<void (const std::string&, bool)>& f)
{ {
for (const std::string & item : order_list) { for (const std::string &item : order_list) {
const bool disable = std::find(disable_list.begin(), disable_list.end(), item) != disable_list.end(); const bool disable = std::find(disable_list.begin(), disable_list.end(), item) != disable_list.end();
f(item, disable); f(item, disable);
} }
for (const std::string& item : dirs_list) for (const std::string &item : dirs_list)
__for_each_file(item, suff, order_list, disable_list, f); __for_each_file(item, suff, order_list, disable_list, f);
} }
// based on https://stackoverflow.com/questions/7724448/simple-json-string-escape-for-c/33799784#33799784 // based on https://stackoverflow.com/questions/7724448/simple-json-string-escape-for-c/33799784#33799784
std::string json_escape_string(const std::string &s) { std::string json_escape_string(const std::string &s)
std::ostringstream o; {
for (auto c = s.cbegin(); c != s.cend(); c++) { std::ostringstream o;
switch (*c) { for (auto c = s.cbegin(); c != s.cend(); c++) {
case '"': o << "\\\""; break; switch (*c) {
case '\\': o << "\\\\"; break; case '"':
case '\b': o << "\\b"; break; o << "\\\"";
case '\f': o << "\\f"; break; break;
case '\n': o << "\\n"; break; case '\\':
case '\r': o << "\\r"; break; o << "\\\\";
case '\t': o << "\\t"; break; break;
default: case '\b':
if ('\x00' <= *c && *c <= '\x1f') { o << "\\b";
o << "\\u" break;
<< std::hex << std::setw(4) << std::setfill('0') << (int)*c; case '\f':
} else { o << "\\f";
o << *c; break;
} case '\n':
o << "\\n";
break;
case '\r':
o << "\\r";
break;
case '\t':
o << "\\t";
break;
default:
if ('\x00' <= *c && *c <= '\x1f') {
o << "\\u"
<< std::hex << std::setw(4) << std::setfill('0') << (int)*c;
} else {
o << *c;
}
}
} }
} return o.str();
return o.str();
} }

View File

@@ -1,63 +1,78 @@
#pragma once #pragma once
#include <glib.h>
#include <cstddef>
#include <cassert> #include <cassert>
#include <string> #include <cstddef>
#include <list>
#include <functional> #include <functional>
#include <glib.h>
#include <list>
#include <string>
template <typename T, typename unref_res_t, void (*unref_res)(unref_res_t *)> template <typename T, typename unref_res_t, void (*unref_res)(unref_res_t *)>
class ResourceWrapper { class ResourceWrapper
{
public: public:
ResourceWrapper(T *p = nullptr) : p_(p) {} ResourceWrapper(T *p = nullptr)
~ResourceWrapper() { free_resource(); } : p_(p)
ResourceWrapper(const ResourceWrapper&) = delete; {
ResourceWrapper& operator=(const ResourceWrapper&) = delete; }
T *operator->() const { return p_; } ~ResourceWrapper() { free_resource(); }
bool operator!() const { return p_ == nullptr; } ResourceWrapper(const ResourceWrapper &) = delete;
const T& operator[](size_t idx) const { ResourceWrapper &operator=(const ResourceWrapper &) = delete;
T *operator->() const { return p_; }
bool operator!() const { return p_ == nullptr; }
const T &operator[](size_t idx) const
{
assert(p_ != nullptr); assert(p_ != nullptr);
return p_[idx]; return p_[idx];
} }
void reset(T *newp) { void reset(T *newp)
if (p_ != newp) { {
free_resource(); if (p_ != newp) {
p_ = newp; free_resource();
} p_ = newp;
} }
}
friend inline bool operator==(const ResourceWrapper& lhs, std::nullptr_t) noexcept { friend inline bool operator==(const ResourceWrapper &lhs, std::nullptr_t) noexcept
{
return !lhs.p_; return !lhs.p_;
} }
friend inline bool operator!=(const ResourceWrapper& lhs, std::nullptr_t) noexcept { friend inline bool operator!=(const ResourceWrapper &lhs, std::nullptr_t) noexcept
{
return !!lhs.p_; return !!lhs.p_;
} }
friend inline T *get_impl(const ResourceWrapper& rw) { friend inline T *get_impl(const ResourceWrapper &rw)
return rw.p_; {
} return rw.p_;
}
friend inline T **get_addr(ResourceWrapper& rw) { friend inline T **get_addr(ResourceWrapper &rw)
return &rw.p_; {
} return &rw.p_;
}
private: private:
T *p_; T *p_;
void free_resource() { if (p_) unref_res(p_); } void free_resource()
{
if (p_)
unref_res(p_);
}
}; };
namespace glib { namespace glib
typedef ResourceWrapper<gchar, void, g_free> CharStr; {
typedef ResourceWrapper<GError, GError, g_error_free> Error; typedef ResourceWrapper<gchar, void, g_free> CharStr;
typedef ResourceWrapper<GError, GError, g_error_free> Error;
} }
extern std::string utf8_to_locale_ign_err(const std::string& utf8_str); extern std::string utf8_to_locale_ign_err(const std::string &utf8_str);
extern void for_each_file(const std::list<std::string>& dirs_list, const std::string& suff, extern void for_each_file(const std::list<std::string> &dirs_list, const std::string &suff,
const std::list<std::string>& order_list, const std::list<std::string>& disable_list, const std::list<std::string> &order_list, const std::list<std::string> &disable_list,
const std::function<void (const std::string&, bool)>& f); const std::function<void(const std::string &, bool)> &f);
extern std::string json_escape_string(const std::string &str); extern std::string json_escape_string(const std::string &str);