refactoring: apply clang-format rules

This commit is contained in:
Evgeniy A. Dushistov
2017-08-09 07:46:27 +03:00
parent d0c0a0837f
commit 8f16ceae59
14 changed files with 2587 additions and 2537 deletions

View File

@@ -26,20 +26,19 @@
//#define HAVE_MMAP //it will defined in config.h. this can be done by configure.in with a AC_FUNC_MMAP.
#ifdef HAVE_CONFIG_H
# include "config.h"
#include "config.h"
#endif
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <unistd.h>
#include <limits.h>
#include <fcntl.h>
#include <limits.h>
#include <unistd.h>
#include <sys/stat.h>
#include "dictziplib.hpp"
#define USE_CACHE 1
@@ -57,426 +56,424 @@
/* For gzip-compatible header, as defined in RFC 1952 */
/* Magic for GZIP (rfc1952) */
#define GZ_MAGIC1 0x1f /* First magic byte */
#define GZ_MAGIC2 0x8b /* Second magic byte */
/* Magic for GZIP (rfc1952) */
#define GZ_MAGIC1 0x1f /* First magic byte */
#define GZ_MAGIC2 0x8b /* Second magic byte */
/* FLaGs (bitmapped), from rfc1952 */
#define GZ_FTEXT 0x01 /* Set for ASCII text */
#define GZ_FHCRC 0x02 /* Header CRC16 */
#define GZ_FEXTRA 0x04 /* Optional field (random access index) */
#define GZ_FNAME 0x08 /* Original name */
#define GZ_COMMENT 0x10 /* Zero-terminated, human-readable comment */
#define GZ_MAX 2 /* Maximum compression */
#define GZ_FAST 4 /* Fasted compression */
/* FLaGs (bitmapped), from rfc1952 */
#define GZ_FTEXT 0x01 /* Set for ASCII text */
#define GZ_FHCRC 0x02 /* Header CRC16 */
#define GZ_FEXTRA 0x04 /* Optional field (random access index) */
#define GZ_FNAME 0x08 /* Original name */
#define GZ_COMMENT 0x10 /* Zero-terminated, human-readable comment */
#define GZ_MAX 2 /* Maximum compression */
#define GZ_FAST 4 /* Fasted compression */
/* These are from rfc1952 */
#define GZ_OS_FAT 0 /* FAT filesystem (MS-DOS, OS/2, NT/Win32) */
#define GZ_OS_AMIGA 1 /* Amiga */
#define GZ_OS_VMS 2 /* VMS (or OpenVMS) */
#define GZ_OS_UNIX 3 /* Unix */
#define GZ_OS_VMCMS 4 /* VM/CMS */
#define GZ_OS_ATARI 5 /* Atari TOS */
#define GZ_OS_HPFS 6 /* HPFS filesystem (OS/2, NT) */
#define GZ_OS_MAC 7 /* Macintosh */
#define GZ_OS_Z 8 /* Z-System */
#define GZ_OS_CPM 9 /* CP/M */
#define GZ_OS_TOPS20 10 /* TOPS-20 */
#define GZ_OS_NTFS 11 /* NTFS filesystem (NT) */
#define GZ_OS_QDOS 12 /* QDOS */
#define GZ_OS_ACORN 13 /* Acorn RISCOS */
#define GZ_OS_UNKNOWN 255 /* unknown */
/* These are from rfc1952 */
#define GZ_OS_FAT 0 /* FAT filesystem (MS-DOS, OS/2, NT/Win32) */
#define GZ_OS_AMIGA 1 /* Amiga */
#define GZ_OS_VMS 2 /* VMS (or OpenVMS) */
#define GZ_OS_UNIX 3 /* Unix */
#define GZ_OS_VMCMS 4 /* VM/CMS */
#define GZ_OS_ATARI 5 /* Atari TOS */
#define GZ_OS_HPFS 6 /* HPFS filesystem (OS/2, NT) */
#define GZ_OS_MAC 7 /* Macintosh */
#define GZ_OS_Z 8 /* Z-System */
#define GZ_OS_CPM 9 /* CP/M */
#define GZ_OS_TOPS20 10 /* TOPS-20 */
#define GZ_OS_NTFS 11 /* NTFS filesystem (NT) */
#define GZ_OS_QDOS 12 /* QDOS */
#define GZ_OS_ACORN 13 /* Acorn RISCOS */
#define GZ_OS_UNKNOWN 255 /* unknown */
#define GZ_RND_S1 'R' /* First magic for random access format */
#define GZ_RND_S2 'A' /* Second magic for random access format */
#define GZ_RND_S1 'R' /* First magic for random access format */
#define GZ_RND_S2 'A' /* Second magic for random access format */
#define GZ_ID1 0 /* GZ_MAGIC1 */
#define GZ_ID2 1 /* GZ_MAGIC2 */
#define GZ_CM 2 /* Compression Method (Z_DEFALTED) */
#define GZ_FLG 3 /* FLaGs (see above) */
#define GZ_MTIME 4 /* Modification TIME */
#define GZ_XFL 8 /* eXtra FLags (GZ_MAX or GZ_FAST) */
#define GZ_OS 9 /* Operating System */
#define GZ_XLEN 10 /* eXtra LENgth (16bit) */
#define GZ_FEXTRA_START 12 /* Start of extra fields */
#define GZ_SI1 12 /* Subfield ID1 */
#define GZ_SI2 13 /* Subfield ID2 */
#define GZ_SUBLEN 14 /* Subfield length (16bit) */
#define GZ_VERSION 16 /* Version for subfield format */
#define GZ_CHUNKLEN 18 /* Chunk length (16bit) */
#define GZ_CHUNKCNT 20 /* Number of chunks (16bit) */
#define GZ_RNDDATA 22 /* Random access data (16bit) */
#define DICT_UNKNOWN 0
#define DICT_TEXT 1
#define DICT_GZIP 2
#define DICT_DZIP 3
#define GZ_ID1 0 /* GZ_MAGIC1 */
#define GZ_ID2 1 /* GZ_MAGIC2 */
#define GZ_CM 2 /* Compression Method (Z_DEFALTED) */
#define GZ_FLG 3 /* FLaGs (see above) */
#define GZ_MTIME 4 /* Modification TIME */
#define GZ_XFL 8 /* eXtra FLags (GZ_MAX or GZ_FAST) */
#define GZ_OS 9 /* Operating System */
#define GZ_XLEN 10 /* eXtra LENgth (16bit) */
#define GZ_FEXTRA_START 12 /* Start of extra fields */
#define GZ_SI1 12 /* Subfield ID1 */
#define GZ_SI2 13 /* Subfield ID2 */
#define GZ_SUBLEN 14 /* Subfield length (16bit) */
#define GZ_VERSION 16 /* Version for subfield format */
#define GZ_CHUNKLEN 18 /* Chunk length (16bit) */
#define GZ_CHUNKCNT 20 /* Number of chunks (16bit) */
#define GZ_RNDDATA 22 /* Random access data (16bit) */
#define DICT_UNKNOWN 0
#define DICT_TEXT 1
#define DICT_GZIP 2
#define DICT_DZIP 3
int DictData::read_header(const std::string &fname, int computeCRC)
{
FILE *str;
int id1, id2, si1, si2;
char buffer[BUFFERSIZE];
int extraLength, subLength;
int i;
char *pt;
int c;
struct stat sb;
unsigned long crc = crc32( 0L, Z_NULL, 0 );
int count;
unsigned long offset;
FILE *str;
int id1, id2, si1, si2;
char buffer[BUFFERSIZE];
int extraLength, subLength;
int i;
char *pt;
int c;
struct stat sb;
unsigned long crc = crc32(0L, Z_NULL, 0);
int count;
unsigned long offset;
if (!(str = fopen(fname.c_str(), "rb"))) {
//err_fatal_errno( __FUNCTION__,
// "Cannot open data file \"%s\" for read\n", filename );
if (!(str = fopen(fname.c_str(), "rb"))) {
//err_fatal_errno( __FUNCTION__,
// "Cannot open data file \"%s\" for read\n", filename );
return -1;
}
}
this->headerLength = GZ_XLEN - 1;
this->type = DICT_UNKNOWN;
this->headerLength = GZ_XLEN - 1;
this->type = DICT_UNKNOWN;
id1 = getc( str );
id2 = getc( str );
id1 = getc(str);
id2 = getc(str);
if (id1 != GZ_MAGIC1 || id2 != GZ_MAGIC2) {
this->type = DICT_TEXT;
fstat( fileno( str ), &sb );
this->compressedLength = this->length = sb.st_size;
this->origFilename = fname;
this->mtime = sb.st_mtime;
if (computeCRC) {
rewind( str );
while (!feof( str )) {
if ((count = fread( buffer, 1, BUFFERSIZE, str ))) {
crc = crc32(crc, (Bytef *)buffer, count);
}
}
}
this->crc = crc;
fclose( str );
return 0;
}
this->type = DICT_GZIP;
if (id1 != GZ_MAGIC1 || id2 != GZ_MAGIC2) {
this->type = DICT_TEXT;
fstat(fileno(str), &sb);
this->compressedLength = this->length = sb.st_size;
this->origFilename = fname;
this->mtime = sb.st_mtime;
if (computeCRC) {
rewind(str);
while (!feof(str)) {
if ((count = fread(buffer, 1, BUFFERSIZE, str))) {
crc = crc32(crc, (Bytef *)buffer, count);
}
}
}
this->crc = crc;
fclose(str);
return 0;
}
this->type = DICT_GZIP;
this->method = getc( str );
this->flags = getc( str );
this->mtime = getc( str ) << 0;
this->mtime |= getc( str ) << 8;
this->mtime |= getc( str ) << 16;
this->mtime |= getc( str ) << 24;
this->extraFlags = getc( str );
this->os = getc( str );
this->method = getc(str);
this->flags = getc(str);
this->mtime = getc(str) << 0;
this->mtime |= getc(str) << 8;
this->mtime |= getc(str) << 16;
this->mtime |= getc(str) << 24;
this->extraFlags = getc(str);
this->os = getc(str);
if (this->flags & GZ_FEXTRA) {
extraLength = getc( str ) << 0;
extraLength |= getc( str ) << 8;
this->headerLength += extraLength + 2;
si1 = getc( str );
si2 = getc( str );
if (this->flags & GZ_FEXTRA) {
extraLength = getc(str) << 0;
extraLength |= getc(str) << 8;
this->headerLength += extraLength + 2;
si1 = getc(str);
si2 = getc(str);
if (si1 == GZ_RND_S1 || si2 == GZ_RND_S2) {
subLength = getc( str ) << 0;
subLength |= getc( str ) << 8;
this->version = getc( str ) << 0;
this->version |= getc( str ) << 8;
if (si1 == GZ_RND_S1 || si2 == GZ_RND_S2) {
subLength = getc(str) << 0;
subLength |= getc(str) << 8;
this->version = getc(str) << 0;
this->version |= getc(str) << 8;
if (this->version != 1) {
//err_internal( __FUNCTION__,
// "dzip header version %d not supported\n",
// this->version );
}
if (this->version != 1) {
//err_internal( __FUNCTION__,
// "dzip header version %d not supported\n",
// this->version );
}
this->chunkLength = getc( str ) << 0;
this->chunkLength |= getc( str ) << 8;
this->chunkCount = getc( str ) << 0;
this->chunkCount |= getc( str ) << 8;
this->chunkLength = getc(str) << 0;
this->chunkLength |= getc(str) << 8;
this->chunkCount = getc(str) << 0;
this->chunkCount |= getc(str) << 8;
if (this->chunkCount <= 0) {
fclose( str );
return 5;
}
this->chunks = (int *)malloc(sizeof( this->chunks[0] )
* this->chunkCount );
for (i = 0; i < this->chunkCount; i++) {
this->chunks[i] = getc( str ) << 0;
this->chunks[i] |= getc( str ) << 8;
}
this->type = DICT_DZIP;
} else {
fseek( str, this->headerLength, SEEK_SET );
}
}
if (this->chunkCount <= 0) {
fclose(str);
return 5;
}
this->chunks = (int *)malloc(sizeof(this->chunks[0])
* this->chunkCount);
for (i = 0; i < this->chunkCount; i++) {
this->chunks[i] = getc(str) << 0;
this->chunks[i] |= getc(str) << 8;
}
this->type = DICT_DZIP;
} else {
fseek(str, this->headerLength, SEEK_SET);
}
}
if (this->flags & GZ_FNAME) { /* FIXME! Add checking against header len */
pt = buffer;
while ((c = getc( str )) && c != EOF)
*pt++ = c;
*pt = '\0';
if (this->flags & GZ_FNAME) { /* FIXME! Add checking against header len */
pt = buffer;
while ((c = getc(str)) && c != EOF)
*pt++ = c;
*pt = '\0';
this->origFilename = buffer;
this->headerLength += this->origFilename.length() + 1;
} else {
this->origFilename = "";
}
this->origFilename = buffer;
this->headerLength += this->origFilename.length() + 1;
} else {
this->origFilename = "";
}
if (this->flags & GZ_COMMENT) { /* FIXME! Add checking for header len */
pt = buffer;
while ((c = getc( str )) && c != EOF)
*pt++ = c;
*pt = '\0';
comment = buffer;
headerLength += comment.length()+1;
} else {
comment = "";
}
if (this->flags & GZ_COMMENT) { /* FIXME! Add checking for header len */
pt = buffer;
while ((c = getc(str)) && c != EOF)
*pt++ = c;
*pt = '\0';
comment = buffer;
headerLength += comment.length() + 1;
} else {
comment = "";
}
if (this->flags & GZ_FHCRC) {
getc( str );
getc( str );
this->headerLength += 2;
}
if (this->flags & GZ_FHCRC) {
getc(str);
getc(str);
this->headerLength += 2;
}
if (ftell( str ) != this->headerLength + 1) {
//err_internal( __FUNCTION__,
// "File position (%lu) != header length + 1 (%d)\n",
// ftell( str ), this->headerLength + 1 );
}
if (ftell(str) != this->headerLength + 1) {
//err_internal( __FUNCTION__,
// "File position (%lu) != header length + 1 (%d)\n",
// ftell( str ), this->headerLength + 1 );
}
fseek( str, -8, SEEK_END );
this->crc = getc( str ) << 0;
this->crc |= getc( str ) << 8;
this->crc |= getc( str ) << 16;
this->crc |= getc( str ) << 24;
this->length = getc( str ) << 0;
this->length |= getc( str ) << 8;
this->length |= getc( str ) << 16;
this->length |= getc( str ) << 24;
this->compressedLength = ftell( str );
fseek(str, -8, SEEK_END);
this->crc = getc(str) << 0;
this->crc |= getc(str) << 8;
this->crc |= getc(str) << 16;
this->crc |= getc(str) << 24;
this->length = getc(str) << 0;
this->length |= getc(str) << 8;
this->length |= getc(str) << 16;
this->length |= getc(str) << 24;
this->compressedLength = ftell(str);
/* Compute offsets */
this->offsets = (unsigned long *)malloc( sizeof( this->offsets[0] )
* this->chunkCount );
for (offset = this->headerLength + 1, i = 0;
i < this->chunkCount;
i++) {
this->offsets[i] = offset;
offset += this->chunks[i];
}
/* Compute offsets */
this->offsets = (unsigned long *)malloc(sizeof(this->offsets[0])
* this->chunkCount);
for (offset = this->headerLength + 1, i = 0;
i < this->chunkCount;
i++) {
this->offsets[i] = offset;
offset += this->chunks[i];
}
fclose( str );
return 0;
fclose(str);
return 0;
}
bool DictData::open(const std::string& fname, int computeCRC)
bool DictData::open(const std::string &fname, int computeCRC)
{
struct stat sb;
int fd;
struct stat sb;
int fd;
this->initialized = 0;
this->initialized = 0;
if (stat(fname.c_str(), &sb) || !S_ISREG(sb.st_mode)) {
//err_warning( __FUNCTION__,
// "%s is not a regular file -- ignoring\n", fname );
return false;
}
if (stat(fname.c_str(), &sb) || !S_ISREG(sb.st_mode)) {
//err_warning( __FUNCTION__,
// "%s is not a regular file -- ignoring\n", fname );
return false;
}
if (read_header(fname, computeCRC)) {
//err_fatal( __FUNCTION__,
// "\"%s\" not in text or dzip format\n", fname );
return false;
}
if (read_header(fname, computeCRC)) {
//err_fatal( __FUNCTION__,
// "\"%s\" not in text or dzip format\n", fname );
return false;
}
if ((fd = ::open(fname.c_str(), O_RDONLY )) < 0) {
//err_fatal_errno( __FUNCTION__,
// "Cannot open data file \"%s\"\n", fname );
return false;
}
if (fstat(fd, &sb)) {
//err_fatal_errno( __FUNCTION__,
// "Cannot stat data file \"%s\"\n", fname );
return false;
}
if ((fd = ::open(fname.c_str(), O_RDONLY)) < 0) {
//err_fatal_errno( __FUNCTION__,
// "Cannot open data file \"%s\"\n", fname );
return false;
}
if (fstat(fd, &sb)) {
//err_fatal_errno( __FUNCTION__,
// "Cannot stat data file \"%s\"\n", fname );
return false;
}
this->size = sb.st_size;
::close(fd);
if (!mapfile.open(fname.c_str(), size))
return false;
this->size = sb.st_size;
::close(fd);
if (!mapfile.open(fname.c_str(), size))
return false;
this->start=mapfile.begin();
this->end = this->start + this->size;
this->start = mapfile.begin();
this->end = this->start + this->size;
for (size_t j = 0; j < DICT_CACHE_SIZE; j++) {
cache[j].chunk = -1;
cache[j].stamp = -1;
cache[j].inBuffer = nullptr;
cache[j].count = 0;
}
for (size_t j = 0; j < DICT_CACHE_SIZE; j++) {
cache[j].chunk = -1;
cache[j].stamp = -1;
cache[j].inBuffer = nullptr;
cache[j].count = 0;
}
return true;
return true;
}
void DictData::close()
{
if (this->chunks)
free(this->chunks);
if (this->offsets)
free(this->offsets);
if (this->chunks)
free(this->chunks);
if (this->offsets)
free(this->offsets);
if (this->initialized) {
if (inflateEnd( &this->zStream )) {
//err_internal( __FUNCTION__,
// "Cannot shut down inflation engine: %s\n",
// this->zStream.msg );
}
}
if (this->initialized) {
if (inflateEnd(&this->zStream)) {
//err_internal( __FUNCTION__,
// "Cannot shut down inflation engine: %s\n",
// this->zStream.msg );
}
}
for (size_t i = 0; i < DICT_CACHE_SIZE; ++i){
if (this -> cache [i].inBuffer)
free (this -> cache [i].inBuffer);
}
for (size_t i = 0; i < DICT_CACHE_SIZE; ++i) {
if (this->cache[i].inBuffer)
free(this->cache[i].inBuffer);
}
}
void DictData::read(char *buffer, unsigned long start, unsigned long size)
{
char *pt;
unsigned long end;
int count;
char *inBuffer;
char outBuffer[OUT_BUFFER_SIZE];
int firstChunk, lastChunk;
int firstOffset, lastOffset;
int i;
int found, target, lastStamp;
static int stamp = 0;
char *pt;
unsigned long end;
int count;
char *inBuffer;
char outBuffer[OUT_BUFFER_SIZE];
int firstChunk, lastChunk;
int firstOffset, lastOffset;
int i;
int found, target, lastStamp;
static int stamp = 0;
end = start + size;
end = start + size;
//buffer = malloc( size + 1 );
//buffer = malloc( size + 1 );
//PRINTF(DBG_UNZIP,
// ("dict_data_read( %p, %lu, %lu )\n",
//h, start, size ));
//PRINTF(DBG_UNZIP,
// ("dict_data_read( %p, %lu, %lu )\n",
//h, start, size ));
switch (this->type) {
case DICT_GZIP:
//err_fatal( __FUNCTION__,
// "Cannot seek on pure gzip format files.\n"
// "Use plain text (for performance)"
// " or dzip format (for space savings).\n" );
break;
case DICT_TEXT:
memcpy(buffer, this->start + start, size);
//buffer[size] = '\0';
break;
case DICT_DZIP:
if (!this->initialized) {
++this->initialized;
this->zStream.zalloc = nullptr;
this->zStream.zfree = nullptr;
this->zStream.opaque = nullptr;
this->zStream.next_in = 0;
this->zStream.avail_in = 0;
this->zStream.next_out = nullptr;
this->zStream.avail_out = 0;
if (inflateInit2(&this->zStream, -15) != Z_OK) {
//err_internal( __FUNCTION__,
// "Cannot initialize inflation engine: %s\n",
//this->zStream.msg );
}
}
firstChunk = start / this->chunkLength;
firstOffset = start - firstChunk * this->chunkLength;
lastChunk = end / this->chunkLength;
lastOffset = end - lastChunk * this->chunkLength;
//PRINTF(DBG_UNZIP,
// (" start = %lu, end = %lu\n"
//"firstChunk = %d, firstOffset = %d,"
//" lastChunk = %d, lastOffset = %d\n",
//start, end, firstChunk, firstOffset, lastChunk, lastOffset ));
for (pt = buffer, i = firstChunk; i <= lastChunk; i++) {
switch (this->type) {
case DICT_GZIP:
//err_fatal( __FUNCTION__,
// "Cannot seek on pure gzip format files.\n"
// "Use plain text (for performance)"
// " or dzip format (for space savings).\n" );
break;
case DICT_TEXT:
memcpy( buffer, this->start + start, size );
//buffer[size] = '\0';
break;
case DICT_DZIP:
if (!this->initialized) {
++this->initialized;
this->zStream.zalloc = nullptr;
this->zStream.zfree = nullptr;
this->zStream.opaque = nullptr;
this->zStream.next_in = 0;
this->zStream.avail_in = 0;
this->zStream.next_out = nullptr;
this->zStream.avail_out = 0;
if (inflateInit2( &this->zStream, -15 ) != Z_OK) {
//err_internal( __FUNCTION__,
// "Cannot initialize inflation engine: %s\n",
//this->zStream.msg );
}
}
firstChunk = start / this->chunkLength;
firstOffset = start - firstChunk * this->chunkLength;
lastChunk = end / this->chunkLength;
lastOffset = end - lastChunk * this->chunkLength;
//PRINTF(DBG_UNZIP,
// (" start = %lu, end = %lu\n"
//"firstChunk = %d, firstOffset = %d,"
//" lastChunk = %d, lastOffset = %d\n",
//start, end, firstChunk, firstOffset, lastChunk, lastOffset ));
for (pt = buffer, i = firstChunk; i <= lastChunk; i++) {
/* Access cache */
found = 0;
target = 0;
lastStamp = INT_MAX;
for (size_t j = 0; j < DICT_CACHE_SIZE; j++) {
/* Access cache */
found = 0;
target = 0;
lastStamp = INT_MAX;
for (size_t j = 0; j < DICT_CACHE_SIZE; j++) {
#if USE_CACHE
if (this->cache[j].chunk == i) {
found = 1;
target = j;
break;
}
if (this->cache[j].chunk == i) {
found = 1;
target = j;
break;
}
#endif
if (this->cache[j].stamp < lastStamp) {
lastStamp = this->cache[j].stamp;
target = j;
}
}
if (this->cache[j].stamp < lastStamp) {
lastStamp = this->cache[j].stamp;
target = j;
}
}
this->cache[target].stamp = ++stamp;
if (found) {
count = this->cache[target].count;
inBuffer = this->cache[target].inBuffer;
} else {
this->cache[target].chunk = i;
if (!this->cache[target].inBuffer)
this->cache[target].inBuffer = (char *)malloc( IN_BUFFER_SIZE );
inBuffer = this->cache[target].inBuffer;
this->cache[target].stamp = ++stamp;
if (found) {
count = this->cache[target].count;
inBuffer = this->cache[target].inBuffer;
} else {
this->cache[target].chunk = i;
if (!this->cache[target].inBuffer)
this->cache[target].inBuffer = (char *)malloc(IN_BUFFER_SIZE);
inBuffer = this->cache[target].inBuffer;
if (this->chunks[i] >= OUT_BUFFER_SIZE ) {
//err_internal( __FUNCTION__,
// "this->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n",
// i, this->chunks[i], OUT_BUFFER_SIZE );
}
memcpy( outBuffer, this->start + this->offsets[i], this->chunks[i] );
if (this->chunks[i] >= OUT_BUFFER_SIZE) {
//err_internal( __FUNCTION__,
// "this->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n",
// i, this->chunks[i], OUT_BUFFER_SIZE );
}
memcpy(outBuffer, this->start + this->offsets[i], this->chunks[i]);
this->zStream.next_in = (Bytef *)outBuffer;
this->zStream.avail_in = this->chunks[i];
this->zStream.next_out = (Bytef *)inBuffer;
this->zStream.avail_out = IN_BUFFER_SIZE;
if (inflate( &this->zStream, Z_PARTIAL_FLUSH ) != Z_OK) {
//err_fatal( __FUNCTION__, "inflate: %s\n", this->zStream.msg );
}
if (this->zStream.avail_in) {
//err_internal( __FUNCTION__,
// "inflate did not flush (%d pending, %d avail)\n",
// this->zStream.avail_in, this->zStream.avail_out );
}
this->zStream.next_in = (Bytef *)outBuffer;
this->zStream.avail_in = this->chunks[i];
this->zStream.next_out = (Bytef *)inBuffer;
this->zStream.avail_out = IN_BUFFER_SIZE;
if (inflate(&this->zStream, Z_PARTIAL_FLUSH) != Z_OK) {
//err_fatal( __FUNCTION__, "inflate: %s\n", this->zStream.msg );
}
if (this->zStream.avail_in) {
//err_internal( __FUNCTION__,
// "inflate did not flush (%d pending, %d avail)\n",
// this->zStream.avail_in, this->zStream.avail_out );
}
count = IN_BUFFER_SIZE - this->zStream.avail_out;
count = IN_BUFFER_SIZE - this->zStream.avail_out;
this->cache[target].count = count;
}
this->cache[target].count = count;
}
if (i == firstChunk) {
if (i == lastChunk) {
memcpy( pt, inBuffer + firstOffset, lastOffset-firstOffset);
pt += lastOffset - firstOffset;
} else {
if (count != this->chunkLength ) {
//err_internal( __FUNCTION__,
// "Length = %d instead of %d\n",
//count, this->chunkLength );
}
memcpy( pt, inBuffer + firstOffset,
this->chunkLength - firstOffset );
pt += this->chunkLength - firstOffset;
}
} else if (i == lastChunk) {
memcpy( pt, inBuffer, lastOffset );
pt += lastOffset;
} else {
assert( count == this->chunkLength );
memcpy( pt, inBuffer, this->chunkLength );
pt += this->chunkLength;
}
}
//*pt = '\0';
break;
case DICT_UNKNOWN:
//err_fatal( __FUNCTION__, "Cannot read unknown file type\n" );
break;
}
if (i == firstChunk) {
if (i == lastChunk) {
memcpy(pt, inBuffer + firstOffset, lastOffset - firstOffset);
pt += lastOffset - firstOffset;
} else {
if (count != this->chunkLength) {
//err_internal( __FUNCTION__,
// "Length = %d instead of %d\n",
//count, this->chunkLength );
}
memcpy(pt, inBuffer + firstOffset,
this->chunkLength - firstOffset);
pt += this->chunkLength - firstOffset;
}
} else if (i == lastChunk) {
memcpy(pt, inBuffer, lastOffset);
pt += lastOffset;
} else {
assert(count == this->chunkLength);
memcpy(pt, inBuffer, this->chunkLength);
pt += this->chunkLength;
}
}
//*pt = '\0';
break;
case DICT_UNKNOWN:
//err_fatal( __FUNCTION__, "Cannot read unknown file type\n" );
break;
}
}

View File

@@ -7,49 +7,50 @@
#include "mapfile.hpp"
struct DictCache {
int chunk;
char *inBuffer;
int stamp;
int count;
int chunk;
char *inBuffer;
int stamp;
int count;
};
class DictData {
class DictData
{
public:
static const size_t DICT_CACHE_SIZE = 5;
static const size_t DICT_CACHE_SIZE = 5;
DictData() {}
~DictData() { close(); }
bool open(const std::string &filename, int computeCRC);
void close();
void read(char *buffer, unsigned long start, unsigned long size);
DictData() {}
~DictData() { close(); }
bool open(const std::string& filename, int computeCRC);
void close();
void read(char *buffer, unsigned long start, unsigned long size);
private:
const char *start; /* start of mmap'd area */
const char *end; /* end of mmap'd area */
unsigned long size; /* size of mmap */
int type;
z_stream zStream;
int initialized;
int headerLength;
int method;
int flags;
time_t mtime;
int extraFlags;
int os;
int version;
int chunkLength;
int chunkCount;
int *chunks;
unsigned long *offsets; /* Sum-scan of chunks. */
std::string origFilename;
std::string comment;
unsigned long crc;
unsigned long length;
unsigned long compressedLength;
DictCache cache[DICT_CACHE_SIZE];
MapFile mapfile;
const char *start; /* start of mmap'd area */
const char *end; /* end of mmap'd area */
unsigned long size; /* size of mmap */
int read_header(const std::string &filename, int computeCRC);
int type;
z_stream zStream;
int initialized;
int headerLength;
int method;
int flags;
time_t mtime;
int extraFlags;
int os;
int version;
int chunkLength;
int chunkCount;
int *chunks;
unsigned long *offsets; /* Sum-scan of chunks. */
std::string origFilename;
std::string comment;
unsigned long crc;
unsigned long length;
unsigned long compressedLength;
DictCache cache[DICT_CACHE_SIZE];
MapFile mapfile;
int read_header(const std::string &filename, int computeCRC);
};

View File

@@ -33,7 +33,6 @@ The Levenshtein distance algorithm has been used in:
* Plagiarism detection
*/
#include <cstdlib>
#include <cstring>
@@ -56,43 +55,39 @@ Enhanced Dynamic Programming ASM Algorithm"
static inline int minimum(const int a, const int b, const int c)
{
int min = a;
if ( b < min )
if (b < min)
min = b;
if ( c < min )
if (c < min)
min = c;
return min;
}
int EditDistance::CalEditDistance(const gunichar *s,const gunichar *t,const int limit)
int EditDistance::CalEditDistance(const gunichar *s, const gunichar *t, const int limit)
/*Compute levenshtein distance between s and t, this is using QUICK algorithm*/
{
int n=0,m=0,iLenDif,k,i,j,cost;
int n = 0, m = 0, iLenDif, k, i, j, cost;
// Remove leftmost matching portion of strings
while ( *s && (*s==*t) )
{
while (*s && (*s == *t)) {
s++;
t++;
t++;
}
while (s[n])
{
n++;
}
while (t[m])
{
m++;
}
// Remove rightmost matching portion of strings by decrement n and m.
while ( n && m && (*(s+n-1)==*(t+m-1)) )
{
n--;m--;
while (s[n]) {
n++;
}
if ( m==0 || n==0 || d==nullptr )
return (m+n);
if ( m < n )
{
const gunichar * temp = s;
while (t[m]) {
m++;
}
// Remove rightmost matching portion of strings by decrement n and m.
while (n && m && (*(s + n - 1) == *(t + m - 1))) {
n--;
m--;
}
if (m == 0 || n == 0 || d == nullptr)
return (m + n);
if (m < n) {
const gunichar *temp = s;
int itemp = n;
s = t;
t = temp;
@@ -100,55 +95,51 @@ int EditDistance::CalEditDistance(const gunichar *s,const gunichar *t,const int
m = itemp;
}
iLenDif = m - n;
if ( iLenDif >= limit )
if (iLenDif >= limit)
return iLenDif;
// step 1
n++;m++;
// d=(int*)malloc(sizeof(int)*m*n);
if ( m*n > currentelements )
{
currentelements = m*n*2; // double the request
d = static_cast<int*>(realloc(d, sizeof(int) * currentelements));
if ( nullptr == d )
return (m+n);
n++;
m++;
// d=(int*)malloc(sizeof(int)*m*n);
if (m * n > currentelements) {
currentelements = m * n * 2; // double the request
d = static_cast<int *>(realloc(d, sizeof(int) * currentelements));
if (nullptr == d)
return (m + n);
}
// step 2, init matrix
for (k=0;k<n;k++)
for (k = 0; k < n; k++)
d[k] = k;
for (k=1;k<m;k++)
d[k*n] = k;
for (k = 1; k < m; k++)
d[k * n] = k;
// step 3
for (i=1;i<n;i++)
{
for (i = 1; i < n; i++) {
// first calculate column, d(i,j)
for ( j=1;j<iLenDif+i;j++ )
{
cost = s[i-1]==t[j-1]?0:1;
d[j*n+i] = minimum(d[(j-1)*n+i]+1,d[j*n+i-1]+1,d[(j-1)*n+i-1]+cost);
for (j = 1; j < iLenDif + i; j++) {
cost = s[i - 1] == t[j - 1] ? 0 : 1;
d[j * n + i] = minimum(d[(j - 1) * n + i] + 1, d[j * n + i - 1] + 1, d[(j - 1) * n + i - 1] + cost);
#ifdef COVER_TRANSPOSITION
if ( i>=2 && j>=2 && (d[j*n+i]-d[(j-2)*n+i-2]==2)
&& (s[i-2]==t[j-1]) && (s[i-1]==t[j-2]) )
d[j*n+i]--;
if (i >= 2 && j >= 2 && (d[j * n + i] - d[(j - 2) * n + i - 2] == 2)
&& (s[i - 2] == t[j - 1]) && (s[i - 1] == t[j - 2]))
d[j * n + i]--;
#endif
}
// second calculate row, d(k,j)
// now j==iLenDif+i;
for ( k=1;k<=i;k++ )
{
cost = s[k-1]==t[j-1]?0:1;
d[j*n+k] = minimum(d[(j-1)*n+k]+1,d[j*n+k-1]+1,d[(j-1)*n+k-1]+cost);
for (k = 1; k <= i; k++) {
cost = s[k - 1] == t[j - 1] ? 0 : 1;
d[j * n + k] = minimum(d[(j - 1) * n + k] + 1, d[j * n + k - 1] + 1, d[(j - 1) * n + k - 1] + cost);
#ifdef COVER_TRANSPOSITION
if ( k>=2 && j>=2 && (d[j*n+k]-d[(j-2)*n+k-2]==2)
&& (s[k-2]==t[j-1]) && (s[k-1]==t[j-2]) )
d[j*n+k]--;
if (k >= 2 && j >= 2 && (d[j * n + k] - d[(j - 2) * n + k - 2] == 2)
&& (s[k - 2] == t[j - 1]) && (s[k - 1] == t[j - 2]))
d[j * n + k]--;
#endif
}
// test if d(i,j) limit gets equal or exceed
if ( d[j*n+i] >= limit )
{
return d[j*n+i];
if (d[j * n + i] >= limit) {
return d[j * n + i];
}
}
// d(n-1,m-1)
return d[n*m-1];
return d[n * m - 1];
}

View File

@@ -3,21 +3,24 @@
#include <cstdlib>
#include <glib.h>
class EditDistance {
class EditDistance
{
public:
EditDistance() {
EditDistance()
{
currentelements = 2500; // It's enough for most conditions :-)
d = static_cast<int *>(malloc(sizeof(int)*currentelements));
d = static_cast<int *>(malloc(sizeof(int) * currentelements));
}
~EditDistance() {
~EditDistance()
{
if (d != nullptr)
free(d);
}
EditDistance(const EditDistance&) = delete;
EditDistance& operator=(const EditDistance&) = delete;
int CalEditDistance( const gunichar *s, const gunichar *t, const int limit );
EditDistance(const EditDistance &) = delete;
EditDistance &operator=(const EditDistance &) = delete;
int CalEditDistance(const gunichar *s, const gunichar *t, const int limit);
private:
int *d;
int currentelements;
};

View File

@@ -19,7 +19,7 @@
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#include "config.h"
#endif
#include <cstring>
@@ -48,401 +48,405 @@ static const char *ABR_VISFMT = ESC_GREEN;
static std::string xdxf2text(const char *p, bool colorize_output)
{
std::string res;
for (; *p; ++p) {
if (*p != '<') {
if (g_str_has_prefix(p, "&gt;")) {
res += ">";
p += 3;
} else if (g_str_has_prefix(p, "&lt;")) {
res += "<";
p += 3;
} else if (g_str_has_prefix(p, "&amp;")) {
res += "&";
p += 4;
} else if (g_str_has_prefix(p, "&quot;")) {
res += "\"";
p += 5;
} else if (g_str_has_prefix(p, "&apos;")) {
res += "\'";
p += 5;
} else
res += *p;
continue;
}
std::string res;
for (; *p; ++p) {
if (*p != '<') {
if (g_str_has_prefix(p, "&gt;")) {
res += ">";
p += 3;
} else if (g_str_has_prefix(p, "&lt;")) {
res += "<";
p += 3;
} else if (g_str_has_prefix(p, "&amp;")) {
res += "&";
p += 4;
} else if (g_str_has_prefix(p, "&quot;")) {
res += "\"";
p += 5;
} else if (g_str_has_prefix(p, "&apos;")) {
res += "\'";
p += 5;
} else
res += *p;
continue;
}
const char *next = strchr(p, '>');
if (!next)
continue;
const char *next = strchr(p, '>');
if (!next)
continue;
const std::string name(p+1, next-p-1);
const std::string name(p + 1, next - p - 1);
if (name == "abr")
res += colorize_output ? ABR_VISFMT : "";
else if (name=="/abr")
res += colorize_output ? ESC_END : "";
else if (name == "k") {
const char *begin = next;
if ((next = strstr(begin, "</k>")) != nullptr)
next += sizeof("</k>") - 1 - 1;
else
next = begin;
if (name == "abr")
res += colorize_output ? ABR_VISFMT : "";
else if (name == "/abr")
res += colorize_output ? ESC_END : "";
else if (name == "k") {
const char *begin = next;
if ((next = strstr(begin, "</k>")) != nullptr)
next += sizeof("</k>") - 1 - 1;
else
next = begin;
} else if (name == "kref") {
res += colorize_output ? KREF_VISFMT : "";
} else if (name == "/kref") {
res += colorize_output ? ESC_END : "";
} else if (name == "b")
res += colorize_output ? ESC_BOLD : "";
else if (name=="/b")
res += colorize_output ? ESC_END : "";
else if (name == "i")
res += colorize_output ? ESC_ITALIC : "";
else if (name == "/i")
res += colorize_output ? ESC_END : "";
else if (name == "tr") {
} else if (name == "b")
res += colorize_output ? ESC_BOLD : "";
else if (name == "/b")
res += colorize_output ? ESC_END : "";
else if (name == "i")
res += colorize_output ? ESC_ITALIC : "";
else if (name == "/i")
res += colorize_output ? ESC_END : "";
else if (name == "tr") {
if (colorize_output)
res += TRANSCRIPTION_VISFMT;
res += "[";
} else if (name == "/tr") {
res += "]";
res += "[";
} else if (name == "/tr") {
res += "]";
if (colorize_output)
res += ESC_END;
} else if (name == "ex")
res += colorize_output ? EXAMPLE_VISFMT : "";
else if (name == "/ex")
res += colorize_output ? ESC_END : "";
else if (!name.empty() && name[0] == 'c' && name != "co") {
std::string::size_type pos = name.find("code");
if (pos != std::string::npos) {
pos += sizeof("code=\"") - 1;
std::string::size_type end_pos = name.find("\"");
const std::string color(name, pos, end_pos - pos);
res += "";
} else {
res += "";
}
} else if (name == "/c")
res += "";
} else if (name == "ex")
res += colorize_output ? EXAMPLE_VISFMT : "";
else if (name == "/ex")
res += colorize_output ? ESC_END : "";
else if (!name.empty() && name[0] == 'c' && name != "co") {
std::string::size_type pos = name.find("code");
if (pos != std::string::npos) {
pos += sizeof("code=\"") - 1;
std::string::size_type end_pos = name.find("\"");
const std::string color(name, pos, end_pos - pos);
res += "";
} else {
res += "";
}
} else if (name == "/c")
res += "";
p = next;
}
return res;
p = next;
}
return res;
}
static std::string parse_data(const gchar *data, bool colorize_output)
{
if (!data)
return "";
if (!data)
return "";
std::string res;
guint32 data_size, sec_size = 0;
gchar *m_str;
const gchar *p = data;
data_size = get_uint32(p);
p += sizeof(guint32);
while (guint32(p - data)<data_size) {
switch (*p++) {
case 'h': // HTML data
case 'w': // WikiMedia markup data
case 'm': // plain text, utf-8
case 'l': // not utf-8, some other locale encoding, discouraged, need more work...
sec_size = strlen(p);
if (sec_size) {
res+="\n";
m_str = g_strndup(p, sec_size);
res += m_str;
g_free(m_str);
}
sec_size++;
break;
case 'g': // pango markup data
case 'x': // xdxf
sec_size = strlen(p);
if (sec_size) {
res+="\n";
m_str = g_strndup(p, sec_size);
res += xdxf2text(m_str, colorize_output);
g_free(m_str);
}
sec_size++;
break;
case 't': // english phonetic string
sec_size = strlen(p);
if(sec_size){
res += "\n";
std::string res;
guint32 data_size, sec_size = 0;
gchar *m_str;
const gchar *p = data;
data_size = get_uint32(p);
p += sizeof(guint32);
while (guint32(p - data) < data_size) {
switch (*p++) {
case 'h': // HTML data
case 'w': // WikiMedia markup data
case 'm': // plain text, utf-8
case 'l': // not utf-8, some other locale encoding, discouraged, need more work...
sec_size = strlen(p);
if (sec_size) {
res += "\n";
m_str = g_strndup(p, sec_size);
res += m_str;
g_free(m_str);
}
sec_size++;
break;
case 'g': // pango markup data
case 'x': // xdxf
sec_size = strlen(p);
if (sec_size) {
res += "\n";
m_str = g_strndup(p, sec_size);
res += xdxf2text(m_str, colorize_output);
g_free(m_str);
}
sec_size++;
break;
case 't': // english phonetic string
sec_size = strlen(p);
if (sec_size) {
res += "\n";
if (colorize_output)
res += TRANSCRIPTION_VISFMT;
res += "[" + std::string(p, sec_size) + "]";
res += "[" + std::string(p, sec_size) + "]";
if (colorize_output)
res += ESC_END;
}
sec_size++;
break;
case 'k': // KingSoft PowerWord data
case 'y': // chinese YinBiao or japanese kana, utf-8
sec_size = strlen(p);
}
sec_size++;
break;
case 'k': // KingSoft PowerWord data
case 'y': // chinese YinBiao or japanese kana, utf-8
sec_size = strlen(p);
if (sec_size)
res += std::string(p, sec_size);
sec_size++;
break;
case 'W': // wav file
case 'P': // picture data
sec_size = get_uint32(p);
sec_size += sizeof(guint32);
break;
}
p += sec_size;
}
sec_size++;
break;
case 'W': // wav file
case 'P': // picture data
sec_size = get_uint32(p);
sec_size += sizeof(guint32);
break;
}
p += sec_size;
}
return res;
return res;
}
void Library::SimpleLookup(const std::string &str, TSearchResultList& res_list)
void Library::SimpleLookup(const std::string &str, TSearchResultList &res_list)
{
glong ind;
res_list.reserve(ndicts());
for (gint idict = 0; idict < ndicts(); ++idict)
if (SimpleLookupWord(str.c_str(), ind, idict))
res_list.push_back(
TSearchResult(dict_name(idict),
glong ind;
res_list.reserve(ndicts());
for (gint idict = 0; idict < ndicts(); ++idict)
if (SimpleLookupWord(str.c_str(), ind, idict))
res_list.push_back(
TSearchResult(dict_name(idict),
poGetWord(ind, idict),
parse_data(poGetWordData(ind, idict), colorize_output_)));
}
void Library::LookupWithFuzzy(const std::string &str, TSearchResultList& res_list)
void Library::LookupWithFuzzy(const std::string &str, TSearchResultList &res_list)
{
static const int MAXFUZZY=10;
static const int MAXFUZZY = 10;
gchar *fuzzy_res[MAXFUZZY];
if (!Libs::LookupWithFuzzy(str.c_str(), fuzzy_res, MAXFUZZY))
return;
gchar *fuzzy_res[MAXFUZZY];
if (!Libs::LookupWithFuzzy(str.c_str(), fuzzy_res, MAXFUZZY))
return;
for (gchar **p = fuzzy_res, **end = (fuzzy_res + MAXFUZZY); p != end && *p; ++p) {
SimpleLookup(*p, res_list);
g_free(*p);
}
for (gchar **p = fuzzy_res, **end = (fuzzy_res + MAXFUZZY); p != end && *p; ++p) {
SimpleLookup(*p, res_list);
g_free(*p);
}
}
void Library::LookupWithRule(const std::string &str, TSearchResultList& res_list)
void Library::LookupWithRule(const std::string &str, TSearchResultList &res_list)
{
std::vector<gchar *> match_res((MAX_MATCH_ITEM_PER_LIB) * ndicts());
std::vector<gchar *> match_res((MAX_MATCH_ITEM_PER_LIB)*ndicts());
const gint nfound = Libs::LookupWithRule(str.c_str(), &match_res[0]);
if (nfound == 0)
return;
const gint nfound = Libs::LookupWithRule(str.c_str(), &match_res[0]);
if (nfound == 0)
return;
for (gint i = 0; i < nfound; ++i) {
SimpleLookup(match_res[i], res_list);
g_free(match_res[i]);
}
for (gint i = 0; i < nfound; ++i) {
SimpleLookup(match_res[i], res_list);
g_free(match_res[i]);
}
}
void Library::LookupData(const std::string &str, TSearchResultList& res_list)
void Library::LookupData(const std::string &str, TSearchResultList &res_list)
{
std::vector<std::vector<gchar *> > drl(ndicts());
if (!Libs::LookupData(str.c_str(), &drl[0]))
return;
for (int idict = 0; idict < ndicts(); ++idict)
for (gchar *res : drl[idict]) {
SimpleLookup(res, res_list);
g_free(res);
}
std::vector<std::vector<gchar *>> drl(ndicts());
if (!Libs::LookupData(str.c_str(), &drl[0]))
return;
for (int idict = 0; idict < ndicts(); ++idict)
for (gchar *res : drl[idict]) {
SimpleLookup(res, res_list);
g_free(res);
}
}
void Library::print_search_result(FILE *out, const TSearchResult & res, bool &first_result)
void Library::print_search_result(FILE *out, const TSearchResult &res, bool &first_result)
{
std::string loc_bookname, loc_def, loc_exp;
std::string loc_bookname, loc_def, loc_exp;
if (!utf8_output_){
loc_bookname = utf8_to_locale_ign_err(res.bookname);
loc_def = utf8_to_locale_ign_err(res.def);
loc_exp = utf8_to_locale_ign_err(res.exp);
}
if(json_) {
if(!first_result) {
if (!utf8_output_) {
loc_bookname = utf8_to_locale_ign_err(res.bookname);
loc_def = utf8_to_locale_ign_err(res.def);
loc_exp = utf8_to_locale_ign_err(res.exp);
}
if (json_) {
if (!first_result) {
fputs(",", out);
} else {
first_result=false;
}
fprintf(out,"{\"dict\": \"%s\",\"word\":\"%s\",\"definition\":\"%s\"}",
json_escape_string(res.bookname).c_str(),
json_escape_string(res.def).c_str(),
json_escape_string(res.exp).c_str());
} else {
fprintf(out,
"-->%s%s%s\n"
"-->%s%s%s\n"
"%s\n\n",
colorize_output_ ? NAME_OF_DICT_VISFMT : "",
utf8_output_ ? res.bookname.c_str() : loc_bookname.c_str(),
colorize_output_ ? ESC_END : "",
colorize_output_ ? SEARCH_TERM_VISFMT : "",
utf8_output_ ? res.def.c_str() : loc_def.c_str(),
colorize_output_ ? ESC_END : "",
utf8_output_ ? res.exp.c_str() : loc_exp.c_str());
first_result = false;
}
fprintf(out, "{\"dict\": \"%s\",\"word\":\"%s\",\"definition\":\"%s\"}",
json_escape_string(res.bookname).c_str(),
json_escape_string(res.def).c_str(),
json_escape_string(res.exp).c_str());
} else {
fprintf(out,
"-->%s%s%s\n"
"-->%s%s%s\n"
"%s\n\n",
colorize_output_ ? NAME_OF_DICT_VISFMT : "",
utf8_output_ ? res.bookname.c_str() : loc_bookname.c_str(),
colorize_output_ ? ESC_END : "",
colorize_output_ ? SEARCH_TERM_VISFMT : "",
utf8_output_ ? res.def.c_str() : loc_def.c_str(),
colorize_output_ ? ESC_END : "",
utf8_output_ ? res.exp.c_str() : loc_exp.c_str());
}
}
namespace {
class sdcv_pager final {
public:
explicit sdcv_pager(bool ignore_env = false) {
namespace
{
class sdcv_pager final
{
public:
explicit sdcv_pager(bool ignore_env = false)
{
output = stdout;
if (ignore_env) {
return;
}
const gchar *pager = g_getenv("SDCV_PAGER");
if (pager && (output = popen(pager, "w")) == nullptr) {
perror(_("popen failed"));
output = stdout;
if (ignore_env) {
return;
}
const gchar *pager = g_getenv("SDCV_PAGER");
if (pager && (output = popen(pager, "w")) == nullptr) {
perror(_("popen failed"));
output = stdout;
}
}
sdcv_pager(const sdcv_pager&) = delete;
sdcv_pager& operator=(const sdcv_pager&) = delete;
~sdcv_pager() {
if (output != stdout) {
pclose(output);
}
}
sdcv_pager(const sdcv_pager &) = delete;
sdcv_pager &operator=(const sdcv_pager &) = delete;
~sdcv_pager()
{
if (output != stdout) {
pclose(output);
}
FILE *get_stream() { return output; }
private:
FILE *output;
};
}
FILE *get_stream() { return output; }
private:
FILE *output;
};
}
bool Library::process_phrase(const char *loc_str, IReadLine &io, bool force)
{
if (nullptr == loc_str)
return true;
if (nullptr == loc_str)
return true;
std::string query;
std::string query;
analyze_query(loc_str, query);
if (!query.empty())
io.add_to_history(query.c_str());
analyze_query(loc_str, query);
if (!query.empty())
io.add_to_history(query.c_str());
gsize bytes_read;
gsize bytes_written;
gsize bytes_read;
gsize bytes_written;
glib::Error err;
glib::CharStr str;
if (!utf8_input_)
str.reset(g_locale_to_utf8(loc_str, -1, &bytes_read, &bytes_written, get_addr(err)));
else
str.reset(g_strdup(loc_str));
if (!utf8_input_)
str.reset(g_locale_to_utf8(loc_str, -1, &bytes_read, &bytes_written, get_addr(err)));
else
str.reset(g_strdup(loc_str));
if (nullptr == get_impl(str)) {
fprintf(stderr, _("Can not convert %s to utf8.\n"), loc_str);
fprintf(stderr, "%s\n", err->message);
return false;
}
if (nullptr == get_impl(str)) {
fprintf(stderr, _("Can not convert %s to utf8.\n"), loc_str);
fprintf(stderr, "%s\n", err->message);
return false;
}
if (str[0] == '\0')
return true;
if (str[0] == '\0')
return true;
TSearchResultList res_list;
TSearchResultList res_list;
switch (analyze_query(get_impl(str), query)) {
case qtFUZZY:
LookupWithFuzzy(query, res_list);
break;
case qtREGEXP:
LookupWithRule(query, res_list);
break;
case qtSIMPLE:
SimpleLookup(get_impl(str), res_list);
if (res_list.empty() && fuzzy_)
LookupWithFuzzy(get_impl(str), res_list);
break;
case qtDATA:
LookupData(query, res_list);
break;
default:
/*nothing*/;
}
switch (analyze_query(get_impl(str), query)) {
case qtFUZZY:
LookupWithFuzzy(query, res_list);
break;
case qtREGEXP:
LookupWithRule(query, res_list);
break;
case qtSIMPLE:
SimpleLookup(get_impl(str), res_list);
if (res_list.empty() && fuzzy_)
LookupWithFuzzy(get_impl(str), res_list);
break;
case qtDATA:
LookupData(query, res_list);
break;
default:
/*nothing*/;
}
bool first_result = true;
if (json_) {
bool first_result = true;
if (json_) {
fputc('[', stdout);
}
if (!res_list.empty()) {
/* try to be more clever, if there are
}
if (!res_list.empty()) {
/* try to be more clever, if there are
one or zero results per dictionary show all
*/
bool show_all_results = true;
typedef std::map< std::string, int, std::less<std::string> > DictResMap;
if (!force) {
DictResMap res_per_dict;
for (const TSearchResult& search_res : res_list) {
auto r = res_per_dict.equal_range(search_res.bookname);
DictResMap tmp(r.first, r.second);
if (tmp.empty()) //there are no yet such bookname in map
res_per_dict.insert(DictResMap::value_type(search_res.bookname, 1));
else {
++((tmp.begin())->second);
if (tmp.begin()->second > 1) {
show_all_results = false;
break;
}
}
}
}//if (!force)
bool show_all_results = true;
typedef std::map<std::string, int, std::less<std::string>> DictResMap;
if (!force) {
DictResMap res_per_dict;
for (const TSearchResult &search_res : res_list) {
auto r = res_per_dict.equal_range(search_res.bookname);
DictResMap tmp(r.first, r.second);
if (tmp.empty()) //there are no yet such bookname in map
res_per_dict.insert(DictResMap::value_type(search_res.bookname, 1));
else {
++((tmp.begin())->second);
if (tmp.begin()->second > 1) {
show_all_results = false;
break;
}
}
}
} //if (!force)
if (!show_all_results && !force) {
if (!show_all_results && !force) {
if (!json_) {
printf(_("Found %zu items, similar to %s.\n"), res_list.size(),
utf8_output_ ? get_impl(str) : utf8_to_locale_ign_err(get_impl(str)).c_str());
}
for (size_t i = 0; i < res_list.size(); ++i) {
for (size_t i = 0; i < res_list.size(); ++i) {
const std::string loc_bookname = utf8_to_locale_ign_err(res_list[i].bookname);
const std::string loc_def = utf8_to_locale_ign_err(res_list[i].def);
printf("%zu)%s%s%s-->%s%s%s\n", i,
printf("%zu)%s%s%s-->%s%s%s\n", i,
colorize_output_ ? NAME_OF_DICT_VISFMT : "",
utf8_output_ ? res_list[i].bookname.c_str() : loc_bookname.c_str(),
utf8_output_ ? res_list[i].bookname.c_str() : loc_bookname.c_str(),
colorize_output_ ? ESC_END : "",
colorize_output_ ? SEARCH_TERM_VISFMT : "",
utf8_output_ ? res_list[i].def.c_str() : loc_def.c_str(),
utf8_output_ ? res_list[i].def.c_str() : loc_def.c_str(),
colorize_output_ ? ESC_END : "");
}
int choise;
std::unique_ptr<IReadLine> choice_readline(create_readline_object());
for (;;) {
}
int choise;
std::unique_ptr<IReadLine> choice_readline(create_readline_object());
for (;;) {
std::string str_choise;
choice_readline->read(_("Your choice[-1 to abort]: "), str_choise);
sscanf(str_choise.c_str(), "%d", &choise);
if (choise >= 0 && choise < int(res_list.size())) {
choice_readline->read(_("Your choice[-1 to abort]: "), str_choise);
sscanf(str_choise.c_str(), "%d", &choise);
if (choise >= 0 && choise < int(res_list.size())) {
sdcv_pager pager;
io.add_to_history(res_list[choise].def.c_str());
print_search_result(pager.get_stream(), res_list[choise], first_result);
break;
} else if (choise == -1){
break;
} else
printf(_("Invalid choice.\nIt must be from 0 to %zu or -1.\n"),
res_list.size()-1);
}
} else {
print_search_result(pager.get_stream(), res_list[choise], first_result);
break;
} else if (choise == -1) {
break;
} else
printf(_("Invalid choice.\nIt must be from 0 to %zu or -1.\n"),
res_list.size() - 1);
}
} else {
sdcv_pager pager(force || json_);
if (!json_) {
fprintf(pager.get_stream(), _("Found %zu items, similar to %s.\n"),
res_list.size(), utf8_output_ ? get_impl(str) : utf8_to_locale_ign_err(get_impl(str)).c_str());
}
for (const TSearchResult& search_res : res_list) {
for (const TSearchResult &search_res : res_list) {
print_search_result(pager.get_stream(), search_res, first_result);
}
}
}
} else {
std::string loc_str;
if (!utf8_output_)
loc_str = utf8_to_locale_ign_err(get_impl(str));
if(!json_)
printf(_("Nothing similar to %s, sorry :(\n"), utf8_output_ ? get_impl(str) : loc_str.c_str());
}
} else {
std::string loc_str;
if (!utf8_output_)
loc_str = utf8_to_locale_ign_err(get_impl(str));
if (!json_)
printf(_("Nothing similar to %s, sorry :(\n"), utf8_output_ ? get_impl(str) : loc_str.c_str());
}
if (json_) {
fputs("]\n", stdout);
}
return true;
return true;
}

View File

@@ -3,45 +3,52 @@
#include <string>
#include <vector>
#include "stardict_lib.hpp"
#include "readline.hpp"
#include "stardict_lib.hpp"
//this structure is wrapper and it need for unification
//results of search whith return Dicts class
struct TSearchResult {
std::string bookname;
std::string def;
std::string exp;
std::string bookname;
std::string def;
std::string exp;
TSearchResult(const std::string& bookname_, const std::string& def_, const std::string& exp_)
: bookname(bookname_), def(def_), exp(exp_)
{
}
TSearchResult(const std::string &bookname_, const std::string &def_, const std::string &exp_)
: bookname(bookname_)
, def(def_)
, exp(exp_)
{
}
};
typedef std::vector<TSearchResult> TSearchResultList;
//this class is wrapper around Dicts class for easy use
//of it
class Library : public Libs {
class Library : public Libs
{
public:
Library(bool uinput, bool uoutput, bool colorize_output, bool use_json, bool no_fuzzy)
: utf8_input_(uinput), utf8_output_(uoutput), colorize_output_(colorize_output), json_(use_json) {
setVerbose(!use_json);
setFuzzy(!no_fuzzy);
}
Library(bool uinput, bool uoutput, bool colorize_output, bool use_json, bool no_fuzzy)
: utf8_input_(uinput)
, utf8_output_(uoutput)
, colorize_output_(colorize_output)
, json_(use_json)
{
setVerbose(!use_json);
setFuzzy(!no_fuzzy);
}
bool process_phrase(const char *loc_str, IReadLine &io, bool force = false);
bool process_phrase(const char *loc_str, IReadLine &io, bool force = false);
private:
bool utf8_input_;
bool utf8_output_;
bool colorize_output_;
bool json_;
bool utf8_input_;
bool utf8_output_;
bool colorize_output_;
bool json_;
void SimpleLookup(const std::string &str, TSearchResultList& res_list);
void LookupWithFuzzy(const std::string &str, TSearchResultList& res_list);
void LookupWithRule(const std::string &str, TSearchResultList& res_lsit);
void LookupData(const std::string &str, TSearchResultList& res_list);
void print_search_result(FILE *out, const TSearchResult & res, bool &first_result);
void SimpleLookup(const std::string &str, TSearchResultList &res_list);
void LookupWithFuzzy(const std::string &str, TSearchResultList &res_list);
void LookupWithRule(const std::string &str, TSearchResultList &res_lsit);
void LookupData(const std::string &str, TSearchResultList &res_list);
void print_search_result(FILE *out, const TSearchResult &res, bool &first_result);
};

View File

@@ -1,27 +1,29 @@
#pragma once
#ifdef HAVE_CONFIG_H
# include "config.h"
#include "config.h"
#endif
#ifdef HAVE_MMAP
# include <sys/types.h>
# include <fcntl.h>
# include <sys/mman.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/types.h>
#endif
#ifdef _WIN32
# include <windows.h>
#include <windows.h>
#endif
#include <glib.h>
class MapFile {
class MapFile
{
public:
MapFile() {}
~MapFile();
MapFile(const MapFile&) = delete;
MapFile& operator=(const MapFile&) = delete;
MapFile(const MapFile &) = delete;
MapFile &operator=(const MapFile &) = delete;
bool open(const char *file_name, unsigned long file_size);
gchar *begin() { return data; }
private:
char *data = nullptr;
unsigned long size = 0ul;
@@ -35,51 +37,50 @@ private:
inline bool MapFile::open(const char *file_name, unsigned long file_size)
{
size=file_size;
size = file_size;
#ifdef HAVE_MMAP
if ((mmap_fd = ::open(file_name, O_RDONLY)) < 0) {
//g_print("Open file %s failed!\n",fullfilename);
return false;
}
data = (gchar *)mmap( nullptr, file_size, PROT_READ, MAP_SHARED, mmap_fd, 0);
if ((void *)data == (void *)(-1)) {
//g_print("mmap file %s failed!\n",idxfilename);
data=nullptr;
return false;
}
#elif defined( _WIN32)
hFile = CreateFile(file_name, GENERIC_READ, 0, nullptr, OPEN_ALWAYS,
FILE_ATTRIBUTE_NORMAL, 0);
hFileMap = CreateFileMapping(hFile, nullptr, PAGE_READONLY, 0,
file_size, nullptr);
data = (gchar *)MapViewOfFile(hFileMap, FILE_MAP_READ, 0, 0, file_size);
if ((mmap_fd = ::open(file_name, O_RDONLY)) < 0) {
//g_print("Open file %s failed!\n",fullfilename);
return false;
}
data = (gchar *)mmap(nullptr, file_size, PROT_READ, MAP_SHARED, mmap_fd, 0);
if ((void *)data == (void *)(-1)) {
//g_print("mmap file %s failed!\n",idxfilename);
data = nullptr;
return false;
}
#elif defined(_WIN32)
hFile = CreateFile(file_name, GENERIC_READ, 0, nullptr, OPEN_ALWAYS,
FILE_ATTRIBUTE_NORMAL, 0);
hFileMap = CreateFileMapping(hFile, nullptr, PAGE_READONLY, 0,
file_size, nullptr);
data = (gchar *)MapViewOfFile(hFileMap, FILE_MAP_READ, 0, 0, file_size);
#else
gsize read_len;
if (!g_file_get_contents(file_name, &data, &read_len, nullptr))
return false;
gsize read_len;
if (!g_file_get_contents(file_name, &data, &read_len, nullptr))
return false;
if (read_len != file_size)
return false;
if (read_len != file_size)
return false;
#endif
return true;
return true;
}
inline MapFile::~MapFile()
{
if (!data)
return;
if (!data)
return;
#ifdef HAVE_MMAP
munmap(data, size);
close(mmap_fd);
munmap(data, size);
close(mmap_fd);
#else
# ifdef _WIN32
UnmapViewOfFile(data);
CloseHandle(hFileMap);
CloseHandle(hFile);
# else
g_free(data);
# endif
#endif
#ifdef _WIN32
UnmapViewOfFile(data);
CloseHandle(hFileMap);
CloseHandle(hFile);
#else
g_free(data);
#endif
#endif
}

View File

@@ -19,14 +19,14 @@
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#include "config.h"
#endif
#include <cstdio>
#include <cstdlib>
#ifdef WITH_READLINE
# include <readline/readline.h>
# include <readline/history.h>
#include <readline/history.h>
#include <readline/readline.h>
#endif
#include <glib.h>
@@ -34,73 +34,82 @@
#include "readline.hpp"
bool stdio_getline(FILE *in, std::string & str)
bool stdio_getline(FILE *in, std::string &str)
{
assert(in != nullptr);
str.clear();
int ch;
while ((ch=fgetc(in)) != EOF && ch != '\n')
while ((ch = fgetc(in)) != EOF && ch != '\n')
str += ch;
return EOF != ch;
}
#ifndef WITH_READLINE
namespace {
class dummy_readline : public IReadLine {
public:
bool read(const std::string &banner, std::string &line) override {
printf("%s", banner.c_str());
return stdio_getline(stdin, line);
}
};
namespace
{
class dummy_readline : public IReadLine
{
public:
bool read(const std::string &banner, std::string &line) override
{
printf("%s", banner.c_str());
return stdio_getline(stdin, line);
}
};
}
#else
namespace {
class real_readline : public IReadLine {
namespace
{
class real_readline : public IReadLine
{
public:
real_readline() {
rl_readline_name = "sdcv";
using_history();
const std::string histname = std::string(g_get_home_dir()) + G_DIR_SEPARATOR + ".sdcv_history";
read_history(histname.c_str());
}
public:
real_readline()
{
rl_readline_name = "sdcv";
using_history();
const std::string histname = std::string(g_get_home_dir()) + G_DIR_SEPARATOR + ".sdcv_history";
read_history(histname.c_str());
}
~real_readline() {
const std::string histname = std::string(g_get_home_dir()) + G_DIR_SEPARATOR + ".sdcv_history";
write_history(histname.c_str());
const gchar *hist_size_str=g_getenv("SDCV_HISTSIZE");
int hist_size;
if (!hist_size_str || sscanf(hist_size_str, "%d", &hist_size)<1)
hist_size = 2000;
history_truncate_file(histname.c_str(), hist_size);
}
~real_readline()
{
const std::string histname = std::string(g_get_home_dir()) + G_DIR_SEPARATOR + ".sdcv_history";
write_history(histname.c_str());
const gchar *hist_size_str = g_getenv("SDCV_HISTSIZE");
int hist_size;
if (!hist_size_str || sscanf(hist_size_str, "%d", &hist_size) < 1)
hist_size = 2000;
history_truncate_file(histname.c_str(), hist_size);
}
bool read(const std::string &banner, std::string& line) override {
char *phrase = nullptr;
phrase = readline(banner.c_str());
if (phrase) {
line = phrase;
free(phrase);
return true;
}
return false;
bool read(const std::string &banner, std::string &line) override
{
char *phrase = nullptr;
phrase = readline(banner.c_str());
if (phrase) {
line = phrase;
free(phrase);
return true;
}
return false;
}
void add_to_history(const std::string& phrase) override {
add_history(phrase.c_str());
}
};
void add_to_history(const std::string &phrase) override
{
add_history(phrase.c_str());
}
};
}
#endif//WITH_READLINE
#endif //WITH_READLINE
IReadLine *create_readline_object()
{
#ifdef WITH_READLINE
return new real_readline;
return new real_readline;
#else
return new dummy_readline;
return new dummy_readline;
#endif
}

View File

@@ -2,11 +2,12 @@
#include <string>
class IReadLine {
class IReadLine
{
public:
virtual ~IReadLine() {}
virtual bool read(const std::string &banner, std::string& line) = 0;
virtual void add_to_history(const std::string&) {}
virtual ~IReadLine() {}
virtual bool read(const std::string &banner, std::string &line) = 0;
virtual void add_to_history(const std::string &) {}
};
extern std::string sdcv_readline;

View File

@@ -22,16 +22,16 @@
#include "config.h"
#endif
#include <algorithm>
#include <cerrno>
#include <clocale>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include <algorithm>
#include <map>
#include <glib.h>
#include <glib/gi18n.h>
@@ -56,7 +56,7 @@ static void free_str_array(gchar **arr)
}
namespace glib
{
using StrArr = ResourceWrapper<gchar *, gchar *, free_str_array>;
using StrArr = ResourceWrapper<gchar *, gchar *, free_str_array>;
}
static void list_dicts(const std::list<std::string> &dicts_dir_list, bool use_json);
@@ -131,12 +131,12 @@ int main(int argc, char *argv[]) try {
const gchar *stardict_data_dir = g_getenv("STARDICT_DATA_DIR");
std::string data_dir;
if (!opt_data_dir) {
if (!only_data_dir) {
if (stardict_data_dir)
data_dir = stardict_data_dir;
else
data_dir = "/usr/share/stardict/dic";
}
if (!only_data_dir) {
if (stardict_data_dir)
data_dir = stardict_data_dir;
else
data_dir = "/usr/share/stardict/dic";
}
} else {
data_dir = get_impl(opt_data_dir);
}
@@ -146,8 +146,8 @@ int main(int argc, char *argv[]) try {
homedir = g_get_home_dir();
std::list<std::string> dicts_dir_list;
if(!only_data_dir)
dicts_dir_list.push_back(std::string(homedir) + G_DIR_SEPARATOR + ".stardict" + G_DIR_SEPARATOR + "dic");
if (!only_data_dir)
dicts_dir_list.push_back(std::string(homedir) + G_DIR_SEPARATOR + ".stardict" + G_DIR_SEPARATOR + "dic");
dicts_dir_list.push_back(data_dir);
if (show_list_dicts) {
list_dicts(dicts_dir_list, json_output);
@@ -215,7 +215,7 @@ int main(int argc, char *argv[]) try {
std::string phrase;
while (io->read(_("Enter word or phrase: "), phrase)) {
if (!lib.process_phrase(phrase.c_str(), *io))
if (!lib.process_phrase(phrase.c_str(), *io))
return EXIT_FAILURE;
phrase.clear();
}
@@ -232,30 +232,29 @@ int main(int argc, char *argv[]) try {
static void list_dicts(const std::list<std::string> &dicts_dir_list, bool use_json)
{
bool first_entry = true;
if(!use_json)
printf(_("Dictionary's name Word count\n"));
else
fputc('[', stdout);
std::list<std::string> order_list, disable_list;
for_each_file(dicts_dir_list, ".ifo", order_list,
disable_list, [use_json, &first_entry](const std::string &filename, bool) -> void {
DictInfo dict_info;
if (dict_info.load_from_ifo_file(filename, false)) {
const std::string bookname = utf8_to_locale_ign_err(dict_info.bookname);
if(use_json) {
if(first_entry) {
first_entry=false;
} else {
fputc(',', stdout); // comma between entries
bool first_entry = true;
if (!use_json)
printf(_("Dictionary's name Word count\n"));
else
fputc('[', stdout);
std::list<std::string> order_list, disable_list;
for_each_file(dicts_dir_list, ".ifo", order_list,
disable_list, [use_json, &first_entry](const std::string &filename, bool) -> void {
DictInfo dict_info;
if (dict_info.load_from_ifo_file(filename, false)) {
const std::string bookname = utf8_to_locale_ign_err(dict_info.bookname);
if (use_json) {
if (first_entry) {
first_entry = false;
} else {
fputc(',', stdout); // comma between entries
}
printf("{\"name\": \"%s\", \"wordcount\": \"%d\"}", json_escape_string(bookname).c_str(), dict_info.wordcount);
} else {
printf("%s %d\n", bookname.c_str(), dict_info.wordcount);
}
}
printf("{\"name\": \"%s\", \"wordcount\": \"%d\"}", json_escape_string(bookname).c_str(), dict_info.wordcount);
} else {
printf("%s %d\n", bookname.c_str(), dict_info.wordcount);
}
}
});
if(use_json)
fputs("]\n", stdout);
});
if (use_json)
fputs("]\n", stdout);
}

File diff suppressed because it is too large Load Diff

View File

@@ -2,17 +2,17 @@
#include <cstdio>
#include <cstring>
#include <functional>
#include <list>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include <functional>
#include <map>
#include "dictziplib.hpp"
const int MAX_MATCH_ITEM_PER_LIB=100;
const int MAX_FUZZY_DISTANCE= 3; // at most MAX_FUZZY_DISTANCE-1 differences allowed when find similar words
const int MAX_MATCH_ITEM_PER_LIB = 100;
const int MAX_FUZZY_DISTANCE = 3; // at most MAX_FUZZY_DISTANCE-1 differences allowed when find similar words
inline guint32 get_uint32(const gchar *addr)
{
@@ -26,172 +26,190 @@ inline void set_uint32(gchar *addr, guint32 val)
memcpy(addr, &val, sizeof(guint32));
}
struct cacheItem {
guint32 offset;
gchar *data;
//write code here to make it inline
cacheItem() { data = nullptr;}
~cacheItem() { g_free(data); }
guint32 offset;
gchar *data;
//write code here to make it inline
cacheItem() { data = nullptr; }
~cacheItem() { g_free(data); }
};
const int WORDDATA_CACHE_NUM = 10;
const int INVALID_INDEX=-100;
const int INVALID_INDEX = -100;
class DictBase {
class DictBase
{
public:
DictBase() {}
~DictBase() {
DictBase() {}
~DictBase()
{
if (dictfile)
fclose(dictfile);
}
DictBase(const DictBase&) = delete;
DictBase& operator=(const DictBase&) = delete;
gchar * GetWordData(guint32 idxitem_offset, guint32 idxitem_size);
bool containSearchData() const {
DictBase(const DictBase &) = delete;
DictBase &operator=(const DictBase &) = delete;
gchar *GetWordData(guint32 idxitem_offset, guint32 idxitem_size);
bool containSearchData() const
{
if (sametypesequence.empty())
return true;
return sametypesequence.find_first_of("mlgxty") != std::string::npos;
}
bool SearchData(std::vector<std::string> &SearchWords, guint32 idxitem_offset, guint32 idxitem_size, gchar *origin_data);
bool SearchData(std::vector<std::string> &SearchWords, guint32 idxitem_offset, guint32 idxitem_size, gchar *origin_data);
protected:
std::string sametypesequence;
FILE *dictfile = nullptr;
std::unique_ptr<DictData> dictdzfile;
std::string sametypesequence;
FILE *dictfile = nullptr;
std::unique_ptr<DictData> dictdzfile;
private:
cacheItem cache[WORDDATA_CACHE_NUM];
gint cache_cur = 0;
cacheItem cache[WORDDATA_CACHE_NUM];
gint cache_cur = 0;
};
//this structure contain all information about dictionary
struct DictInfo {
std::string ifo_file_name;
guint32 wordcount;
guint32 syn_wordcount;
std::string bookname;
std::string author;
std::string email;
std::string website;
std::string date;
std::string description;
guint32 index_file_size;
guint32 syn_file_size;
std::string sametypesequence;
std::string ifo_file_name;
guint32 wordcount;
guint32 syn_wordcount;
std::string bookname;
std::string author;
std::string email;
std::string website;
std::string date;
std::string description;
guint32 index_file_size;
guint32 syn_file_size;
std::string sametypesequence;
bool load_from_ifo_file(const std::string& ifofilename, bool istreedict);
bool load_from_ifo_file(const std::string &ifofilename, bool istreedict);
};
class IIndexFile {
class IIndexFile
{
public:
guint32 wordentry_offset;
guint32 wordentry_size;
guint32 wordentry_offset;
guint32 wordentry_size;
virtual ~IIndexFile() {}
virtual bool load(const std::string& url, gulong wc, gulong fsize, bool verbose) = 0;
virtual const gchar *get_key(glong idx) = 0;
virtual void get_data(glong idx) = 0;
virtual const gchar *get_key_and_data(glong idx) = 0;
virtual bool lookup(const char *str, glong &idx) = 0;
virtual ~IIndexFile() {}
virtual bool load(const std::string &url, gulong wc, gulong fsize, bool verbose) = 0;
virtual const gchar *get_key(glong idx) = 0;
virtual void get_data(glong idx) = 0;
virtual const gchar *get_key_and_data(glong idx) = 0;
virtual bool lookup(const char *str, glong &idx) = 0;
};
class SynFile {
class SynFile
{
public:
bool load(const std::string& url, gulong wc);
bool lookup(const char *str, glong &idx);
bool load(const std::string &url, gulong wc);
bool lookup(const char *str, glong &idx);
private:
std::map<std::string, gulong> synonyms;
std::map<std::string, gulong> synonyms;
};
class Dict : public DictBase {
class Dict : public DictBase
{
public:
Dict() {}
Dict(const Dict&) = delete;
Dict& operator=(const Dict&) = delete;
bool load(const std::string& ifofilename, bool verbose);
Dict() {}
Dict(const Dict &) = delete;
Dict &operator=(const Dict &) = delete;
bool load(const std::string &ifofilename, bool verbose);
gulong narticles() const { return wordcount; }
const std::string& dict_name() const { return bookname; }
const std::string& ifofilename() const { return ifo_file_name; }
gulong narticles() const { return wordcount; }
const std::string &dict_name() const { return bookname; }
const std::string &ifofilename() const { return ifo_file_name; }
const gchar *get_key(glong index) { return idx_file->get_key(index); }
gchar *get_data(glong index) {
const gchar *get_key(glong index) { return idx_file->get_key(index); }
gchar *get_data(glong index)
{
idx_file->get_data(index);
return DictBase::GetWordData(idx_file->wordentry_offset, idx_file->wordentry_size);
}
void get_key_and_data(glong index, const gchar **key, guint32 *offset, guint32 *size) {
void get_key_and_data(glong index, const gchar **key, guint32 *offset, guint32 *size)
{
*key = idx_file->get_key_and_data(index);
*offset = idx_file->wordentry_offset;
*size = idx_file->wordentry_size;
}
bool Lookup(const char *str, glong &idx);
bool Lookup(const char *str, glong &idx);
bool LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen);
private:
std::string ifo_file_name;
gulong wordcount;
gulong syn_wordcount;
std::string bookname;
bool LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen);
std::unique_ptr<IIndexFile> idx_file;
std::unique_ptr<SynFile> syn_file;
bool load_ifofile(const std::string& ifofilename, gulong &idxfilesize);
};
class Libs {
public:
Libs(std::function<void(void)> f = std::function<void(void)>()) {
progress_func = f;
iMaxFuzzyDistance = MAX_FUZZY_DISTANCE; //need to read from cfg.
}
void setVerbose(bool verbose) { verbose_ = verbose; }
void setFuzzy(bool fuzzy) { fuzzy_ = fuzzy; }
~Libs();
Libs(const Libs&) = delete;
Libs& operator=(const Libs&) = delete;
void load_dict(const std::string& url);
void load(const std::list<std::string>& dicts_dirs,
const std::list<std::string>& order_list,
const std::list<std::string>& disable_list);
glong narticles(int idict) const { return oLib[idict]->narticles(); }
const std::string& dict_name(int idict) const { return oLib[idict]->dict_name(); }
gint ndicts() const { return oLib.size(); }
const gchar *poGetWord(glong iIndex, int iLib) {
return oLib[iLib]->get_key(iIndex);
}
gchar * poGetWordData(glong iIndex,int iLib) {
if (iIndex == INVALID_INDEX)
return nullptr;
return oLib[iLib]->get_data(iIndex);
}
const gchar *poGetCurrentWord(glong *iCurrent);
const gchar *poGetNextWord(const gchar *word, glong *iCurrent);
const gchar *poGetPreWord(glong *iCurrent);
bool LookupWord(const gchar* sWord, glong& iWordIndex, int iLib) {
return oLib[iLib]->Lookup(sWord, iWordIndex);
}
bool LookupSimilarWord(const gchar* sWord, glong & iWordIndex, int iLib);
bool SimpleLookupWord(const gchar* sWord, glong & iWordIndex, int iLib);
bool LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_size);
gint LookupWithRule(const gchar *sWord, gchar *reslist[]);
bool LookupData(const gchar *sWord, std::vector<gchar *> *reslist);
protected:
bool fuzzy_;
private:
std::vector<Dict *> oLib; // word Libs.
int iMaxFuzzyDistance;
std::function<void(void)> progress_func;
bool verbose_;
std::string ifo_file_name;
gulong wordcount;
gulong syn_wordcount;
std::string bookname;
std::unique_ptr<IIndexFile> idx_file;
std::unique_ptr<SynFile> syn_file;
bool load_ifofile(const std::string &ifofilename, gulong &idxfilesize);
};
class Libs
{
public:
Libs(std::function<void(void)> f = std::function<void(void)>())
{
progress_func = f;
iMaxFuzzyDistance = MAX_FUZZY_DISTANCE; //need to read from cfg.
}
void setVerbose(bool verbose) { verbose_ = verbose; }
void setFuzzy(bool fuzzy) { fuzzy_ = fuzzy; }
~Libs();
Libs(const Libs &) = delete;
Libs &operator=(const Libs &) = delete;
void load_dict(const std::string &url);
void load(const std::list<std::string> &dicts_dirs,
const std::list<std::string> &order_list,
const std::list<std::string> &disable_list);
glong narticles(int idict) const { return oLib[idict]->narticles(); }
const std::string &dict_name(int idict) const { return oLib[idict]->dict_name(); }
gint ndicts() const { return oLib.size(); }
const gchar *poGetWord(glong iIndex, int iLib)
{
return oLib[iLib]->get_key(iIndex);
}
gchar *poGetWordData(glong iIndex, int iLib)
{
if (iIndex == INVALID_INDEX)
return nullptr;
return oLib[iLib]->get_data(iIndex);
}
const gchar *poGetCurrentWord(glong *iCurrent);
const gchar *poGetNextWord(const gchar *word, glong *iCurrent);
const gchar *poGetPreWord(glong *iCurrent);
bool LookupWord(const gchar *sWord, glong &iWordIndex, int iLib)
{
return oLib[iLib]->Lookup(sWord, iWordIndex);
}
bool LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib);
bool SimpleLookupWord(const gchar *sWord, glong &iWordIndex, int iLib);
bool LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_size);
gint LookupWithRule(const gchar *sWord, gchar *reslist[]);
bool LookupData(const gchar *sWord, std::vector<gchar *> *reslist);
protected:
bool fuzzy_;
private:
std::vector<Dict *> oLib; // word Libs.
int iMaxFuzzyDistance;
std::function<void(void)> progress_func;
bool verbose_;
};
enum query_t {
qtSIMPLE, qtREGEXP, qtFUZZY, qtDATA
qtSIMPLE,
qtREGEXP,
qtFUZZY,
qtDATA
};
extern query_t analyze_query(const char *s, std::string& res);
extern query_t analyze_query(const char *s, std::string &res);

View File

@@ -19,100 +19,113 @@
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#include "config.h"
#endif
#include <algorithm>
#include <cstdio>
#include <cstdlib>
#include <glib.h>
#include <glib/gi18n.h>
#include <cstdlib>
#include <cstdio>
#include <algorithm>
#include <sstream>
#include <iomanip>
#include <sstream>
#include "utils.hpp"
std::string utf8_to_locale_ign_err(const std::string& utf8_str)
std::string utf8_to_locale_ign_err(const std::string &utf8_str)
{
std::string res;
std::string res;
const char *charset;
if (g_get_charset(&charset))
res = utf8_str;
else {
const char *charset;
if (g_get_charset(&charset))
res = utf8_str;
else {
gsize bytes_read, bytes_written;
glib::Error err;
glib::CharStr tmp(g_convert_with_fallback(utf8_str.c_str(), -1, charset, "UTF-8", nullptr,
&bytes_read, &bytes_written, get_addr(err)));
if (nullptr == get_impl(tmp)){
fprintf(stderr, _("Can not convert %s to current locale.\n"), utf8_str.c_str());
fprintf(stderr, "%s\n", err->message);
exit(EXIT_FAILURE);
}
res = get_impl(tmp);
}
if (nullptr == get_impl(tmp)) {
fprintf(stderr, _("Can not convert %s to current locale.\n"), utf8_str.c_str());
fprintf(stderr, "%s\n", err->message);
exit(EXIT_FAILURE);
}
res = get_impl(tmp);
}
return res;
return res;
}
static void __for_each_file(const std::string& dirname, const std::string& suff,
const std::list<std::string>& order_list, const std::list<std::string>& disable_list,
const std::function<void (const std::string&, bool)>& f)
static void __for_each_file(const std::string &dirname, const std::string &suff,
const std::list<std::string> &order_list, const std::list<std::string> &disable_list,
const std::function<void(const std::string &, bool)> &f)
{
GDir *dir = g_dir_open(dirname.c_str(), 0, nullptr);
GDir *dir = g_dir_open(dirname.c_str(), 0, nullptr);
if (dir) {
const gchar *filename;
const gchar *filename;
while ((filename = g_dir_read_name(dir))!=nullptr) {
const std::string fullfilename(dirname+G_DIR_SEPARATOR_S+filename);
if (g_file_test(fullfilename.c_str(), G_FILE_TEST_IS_DIR))
__for_each_file(fullfilename, suff, order_list, disable_list, f);
else if (g_str_has_suffix(filename, suff.c_str()) &&
std::find(order_list.begin(), order_list.end(),
fullfilename)==order_list.end()) {
const bool disable = std::find(disable_list.begin(),
disable_list.end(),
fullfilename)!=disable_list.end();
while ((filename = g_dir_read_name(dir)) != nullptr) {
const std::string fullfilename(dirname + G_DIR_SEPARATOR_S + filename);
if (g_file_test(fullfilename.c_str(), G_FILE_TEST_IS_DIR))
__for_each_file(fullfilename, suff, order_list, disable_list, f);
else if (g_str_has_suffix(filename, suff.c_str()) && std::find(order_list.begin(), order_list.end(), fullfilename) == order_list.end()) {
const bool disable = std::find(disable_list.begin(),
disable_list.end(),
fullfilename)
!= disable_list.end();
f(fullfilename, disable);
}
}
g_dir_close(dir);
}
}
}
g_dir_close(dir);
}
}
void for_each_file(const std::list<std::string>& dirs_list, const std::string& suff,
const std::list<std::string>& order_list, const std::list<std::string>& disable_list,
const std::function<void (const std::string&, bool)>& f)
void for_each_file(const std::list<std::string> &dirs_list, const std::string &suff,
const std::list<std::string> &order_list, const std::list<std::string> &disable_list,
const std::function<void(const std::string &, bool)> &f)
{
for (const std::string & item : order_list) {
const bool disable = std::find(disable_list.begin(), disable_list.end(), item) != disable_list.end();
f(item, disable);
}
for (const std::string& item : dirs_list)
__for_each_file(item, suff, order_list, disable_list, f);
for (const std::string &item : order_list) {
const bool disable = std::find(disable_list.begin(), disable_list.end(), item) != disable_list.end();
f(item, disable);
}
for (const std::string &item : dirs_list)
__for_each_file(item, suff, order_list, disable_list, f);
}
// based on https://stackoverflow.com/questions/7724448/simple-json-string-escape-for-c/33799784#33799784
std::string json_escape_string(const std::string &s) {
std::ostringstream o;
for (auto c = s.cbegin(); c != s.cend(); c++) {
switch (*c) {
case '"': o << "\\\""; break;
case '\\': o << "\\\\"; break;
case '\b': o << "\\b"; break;
case '\f': o << "\\f"; break;
case '\n': o << "\\n"; break;
case '\r': o << "\\r"; break;
case '\t': o << "\\t"; break;
default:
if ('\x00' <= *c && *c <= '\x1f') {
o << "\\u"
<< std::hex << std::setw(4) << std::setfill('0') << (int)*c;
} else {
o << *c;
}
std::string json_escape_string(const std::string &s)
{
std::ostringstream o;
for (auto c = s.cbegin(); c != s.cend(); c++) {
switch (*c) {
case '"':
o << "\\\"";
break;
case '\\':
o << "\\\\";
break;
case '\b':
o << "\\b";
break;
case '\f':
o << "\\f";
break;
case '\n':
o << "\\n";
break;
case '\r':
o << "\\r";
break;
case '\t':
o << "\\t";
break;
default:
if ('\x00' <= *c && *c <= '\x1f') {
o << "\\u"
<< std::hex << std::setw(4) << std::setfill('0') << (int)*c;
} else {
o << *c;
}
}
}
}
return o.str();
return o.str();
}

View File

@@ -1,63 +1,78 @@
#pragma once
#include <glib.h>
#include <cstddef>
#include <cassert>
#include <string>
#include <list>
#include <cstddef>
#include <functional>
#include <glib.h>
#include <list>
#include <string>
template <typename T, typename unref_res_t, void (*unref_res)(unref_res_t *)>
class ResourceWrapper {
class ResourceWrapper
{
public:
ResourceWrapper(T *p = nullptr) : p_(p) {}
~ResourceWrapper() { free_resource(); }
ResourceWrapper(const ResourceWrapper&) = delete;
ResourceWrapper& operator=(const ResourceWrapper&) = delete;
T *operator->() const { return p_; }
bool operator!() const { return p_ == nullptr; }
const T& operator[](size_t idx) const {
ResourceWrapper(T *p = nullptr)
: p_(p)
{
}
~ResourceWrapper() { free_resource(); }
ResourceWrapper(const ResourceWrapper &) = delete;
ResourceWrapper &operator=(const ResourceWrapper &) = delete;
T *operator->() const { return p_; }
bool operator!() const { return p_ == nullptr; }
const T &operator[](size_t idx) const
{
assert(p_ != nullptr);
return p_[idx];
}
void reset(T *newp) {
if (p_ != newp) {
free_resource();
p_ = newp;
}
}
void reset(T *newp)
{
if (p_ != newp) {
free_resource();
p_ = newp;
}
}
friend inline bool operator==(const ResourceWrapper& lhs, std::nullptr_t) noexcept {
friend inline bool operator==(const ResourceWrapper &lhs, std::nullptr_t) noexcept
{
return !lhs.p_;
}
friend inline bool operator!=(const ResourceWrapper& lhs, std::nullptr_t) noexcept {
friend inline bool operator!=(const ResourceWrapper &lhs, std::nullptr_t) noexcept
{
return !!lhs.p_;
}
friend inline T *get_impl(const ResourceWrapper& rw) {
return rw.p_;
}
friend inline T *get_impl(const ResourceWrapper &rw)
{
return rw.p_;
}
friend inline T **get_addr(ResourceWrapper& rw) {
return &rw.p_;
}
friend inline T **get_addr(ResourceWrapper &rw)
{
return &rw.p_;
}
private:
T *p_;
T *p_;
void free_resource() { if (p_) unref_res(p_); }
void free_resource()
{
if (p_)
unref_res(p_);
}
};
namespace glib {
typedef ResourceWrapper<gchar, void, g_free> CharStr;
typedef ResourceWrapper<GError, GError, g_error_free> Error;
namespace glib
{
typedef ResourceWrapper<gchar, void, g_free> CharStr;
typedef ResourceWrapper<GError, GError, g_error_free> Error;
}
extern std::string utf8_to_locale_ign_err(const std::string& utf8_str);
extern std::string utf8_to_locale_ign_err(const std::string &utf8_str);
extern void for_each_file(const std::list<std::string>& dirs_list, const std::string& suff,
const std::list<std::string>& order_list, const std::list<std::string>& disable_list,
const std::function<void (const std::string&, bool)>& f);
extern void for_each_file(const std::list<std::string> &dirs_list, const std::string &suff,
const std::list<std::string> &order_list, const std::list<std::string> &disable_list,
const std::function<void(const std::string &, bool)> &f);
extern std::string json_escape_string(const std::string &str);