diff --git a/src/dictziplib.cpp b/src/dictziplib.cpp index 67f5e77..e8716bb 100644 --- a/src/dictziplib.cpp +++ b/src/dictziplib.cpp @@ -26,20 +26,19 @@ //#define HAVE_MMAP //it will defined in config.h. this can be done by configure.in with a AC_FUNC_MMAP. #ifdef HAVE_CONFIG_H -# include "config.h" +#include "config.h" #endif #include #include #include #include -#include -#include #include +#include +#include #include - #include "dictziplib.hpp" #define USE_CACHE 1 @@ -57,426 +56,424 @@ /* For gzip-compatible header, as defined in RFC 1952 */ - /* Magic for GZIP (rfc1952) */ -#define GZ_MAGIC1 0x1f /* First magic byte */ -#define GZ_MAGIC2 0x8b /* Second magic byte */ +/* Magic for GZIP (rfc1952) */ +#define GZ_MAGIC1 0x1f /* First magic byte */ +#define GZ_MAGIC2 0x8b /* Second magic byte */ - /* FLaGs (bitmapped), from rfc1952 */ -#define GZ_FTEXT 0x01 /* Set for ASCII text */ -#define GZ_FHCRC 0x02 /* Header CRC16 */ -#define GZ_FEXTRA 0x04 /* Optional field (random access index) */ -#define GZ_FNAME 0x08 /* Original name */ -#define GZ_COMMENT 0x10 /* Zero-terminated, human-readable comment */ -#define GZ_MAX 2 /* Maximum compression */ -#define GZ_FAST 4 /* Fasted compression */ +/* FLaGs (bitmapped), from rfc1952 */ +#define GZ_FTEXT 0x01 /* Set for ASCII text */ +#define GZ_FHCRC 0x02 /* Header CRC16 */ +#define GZ_FEXTRA 0x04 /* Optional field (random access index) */ +#define GZ_FNAME 0x08 /* Original name */ +#define GZ_COMMENT 0x10 /* Zero-terminated, human-readable comment */ +#define GZ_MAX 2 /* Maximum compression */ +#define GZ_FAST 4 /* Fasted compression */ - /* These are from rfc1952 */ -#define GZ_OS_FAT 0 /* FAT filesystem (MS-DOS, OS/2, NT/Win32) */ -#define GZ_OS_AMIGA 1 /* Amiga */ -#define GZ_OS_VMS 2 /* VMS (or OpenVMS) */ -#define GZ_OS_UNIX 3 /* Unix */ -#define GZ_OS_VMCMS 4 /* VM/CMS */ -#define GZ_OS_ATARI 5 /* Atari TOS */ -#define GZ_OS_HPFS 6 /* HPFS filesystem (OS/2, NT) */ -#define GZ_OS_MAC 7 /* Macintosh */ -#define GZ_OS_Z 8 /* Z-System */ -#define GZ_OS_CPM 9 /* CP/M */ -#define GZ_OS_TOPS20 10 /* TOPS-20 */ -#define GZ_OS_NTFS 11 /* NTFS filesystem (NT) */ -#define GZ_OS_QDOS 12 /* QDOS */ -#define GZ_OS_ACORN 13 /* Acorn RISCOS */ -#define GZ_OS_UNKNOWN 255 /* unknown */ +/* These are from rfc1952 */ +#define GZ_OS_FAT 0 /* FAT filesystem (MS-DOS, OS/2, NT/Win32) */ +#define GZ_OS_AMIGA 1 /* Amiga */ +#define GZ_OS_VMS 2 /* VMS (or OpenVMS) */ +#define GZ_OS_UNIX 3 /* Unix */ +#define GZ_OS_VMCMS 4 /* VM/CMS */ +#define GZ_OS_ATARI 5 /* Atari TOS */ +#define GZ_OS_HPFS 6 /* HPFS filesystem (OS/2, NT) */ +#define GZ_OS_MAC 7 /* Macintosh */ +#define GZ_OS_Z 8 /* Z-System */ +#define GZ_OS_CPM 9 /* CP/M */ +#define GZ_OS_TOPS20 10 /* TOPS-20 */ +#define GZ_OS_NTFS 11 /* NTFS filesystem (NT) */ +#define GZ_OS_QDOS 12 /* QDOS */ +#define GZ_OS_ACORN 13 /* Acorn RISCOS */ +#define GZ_OS_UNKNOWN 255 /* unknown */ -#define GZ_RND_S1 'R' /* First magic for random access format */ -#define GZ_RND_S2 'A' /* Second magic for random access format */ +#define GZ_RND_S1 'R' /* First magic for random access format */ +#define GZ_RND_S2 'A' /* Second magic for random access format */ -#define GZ_ID1 0 /* GZ_MAGIC1 */ -#define GZ_ID2 1 /* GZ_MAGIC2 */ -#define GZ_CM 2 /* Compression Method (Z_DEFALTED) */ -#define GZ_FLG 3 /* FLaGs (see above) */ -#define GZ_MTIME 4 /* Modification TIME */ -#define GZ_XFL 8 /* eXtra FLags (GZ_MAX or GZ_FAST) */ -#define GZ_OS 9 /* Operating System */ -#define GZ_XLEN 10 /* eXtra LENgth (16bit) */ -#define GZ_FEXTRA_START 12 /* Start of extra fields */ -#define GZ_SI1 12 /* Subfield ID1 */ -#define GZ_SI2 13 /* Subfield ID2 */ -#define GZ_SUBLEN 14 /* Subfield length (16bit) */ -#define GZ_VERSION 16 /* Version for subfield format */ -#define GZ_CHUNKLEN 18 /* Chunk length (16bit) */ -#define GZ_CHUNKCNT 20 /* Number of chunks (16bit) */ -#define GZ_RNDDATA 22 /* Random access data (16bit) */ - -#define DICT_UNKNOWN 0 -#define DICT_TEXT 1 -#define DICT_GZIP 2 -#define DICT_DZIP 3 +#define GZ_ID1 0 /* GZ_MAGIC1 */ +#define GZ_ID2 1 /* GZ_MAGIC2 */ +#define GZ_CM 2 /* Compression Method (Z_DEFALTED) */ +#define GZ_FLG 3 /* FLaGs (see above) */ +#define GZ_MTIME 4 /* Modification TIME */ +#define GZ_XFL 8 /* eXtra FLags (GZ_MAX or GZ_FAST) */ +#define GZ_OS 9 /* Operating System */ +#define GZ_XLEN 10 /* eXtra LENgth (16bit) */ +#define GZ_FEXTRA_START 12 /* Start of extra fields */ +#define GZ_SI1 12 /* Subfield ID1 */ +#define GZ_SI2 13 /* Subfield ID2 */ +#define GZ_SUBLEN 14 /* Subfield length (16bit) */ +#define GZ_VERSION 16 /* Version for subfield format */ +#define GZ_CHUNKLEN 18 /* Chunk length (16bit) */ +#define GZ_CHUNKCNT 20 /* Number of chunks (16bit) */ +#define GZ_RNDDATA 22 /* Random access data (16bit) */ +#define DICT_UNKNOWN 0 +#define DICT_TEXT 1 +#define DICT_GZIP 2 +#define DICT_DZIP 3 int DictData::read_header(const std::string &fname, int computeCRC) { - FILE *str; - int id1, id2, si1, si2; - char buffer[BUFFERSIZE]; - int extraLength, subLength; - int i; - char *pt; - int c; - struct stat sb; - unsigned long crc = crc32( 0L, Z_NULL, 0 ); - int count; - unsigned long offset; + FILE *str; + int id1, id2, si1, si2; + char buffer[BUFFERSIZE]; + int extraLength, subLength; + int i; + char *pt; + int c; + struct stat sb; + unsigned long crc = crc32(0L, Z_NULL, 0); + int count; + unsigned long offset; - if (!(str = fopen(fname.c_str(), "rb"))) { - //err_fatal_errno( __FUNCTION__, - // "Cannot open data file \"%s\" for read\n", filename ); + if (!(str = fopen(fname.c_str(), "rb"))) { + //err_fatal_errno( __FUNCTION__, + // "Cannot open data file \"%s\" for read\n", filename ); return -1; - } + } - this->headerLength = GZ_XLEN - 1; - this->type = DICT_UNKNOWN; + this->headerLength = GZ_XLEN - 1; + this->type = DICT_UNKNOWN; - id1 = getc( str ); - id2 = getc( str ); + id1 = getc(str); + id2 = getc(str); - if (id1 != GZ_MAGIC1 || id2 != GZ_MAGIC2) { - this->type = DICT_TEXT; - fstat( fileno( str ), &sb ); - this->compressedLength = this->length = sb.st_size; - this->origFilename = fname; - this->mtime = sb.st_mtime; - if (computeCRC) { - rewind( str ); - while (!feof( str )) { - if ((count = fread( buffer, 1, BUFFERSIZE, str ))) { - crc = crc32(crc, (Bytef *)buffer, count); - } - } - } - this->crc = crc; - fclose( str ); - return 0; - } - this->type = DICT_GZIP; + if (id1 != GZ_MAGIC1 || id2 != GZ_MAGIC2) { + this->type = DICT_TEXT; + fstat(fileno(str), &sb); + this->compressedLength = this->length = sb.st_size; + this->origFilename = fname; + this->mtime = sb.st_mtime; + if (computeCRC) { + rewind(str); + while (!feof(str)) { + if ((count = fread(buffer, 1, BUFFERSIZE, str))) { + crc = crc32(crc, (Bytef *)buffer, count); + } + } + } + this->crc = crc; + fclose(str); + return 0; + } + this->type = DICT_GZIP; - this->method = getc( str ); - this->flags = getc( str ); - this->mtime = getc( str ) << 0; - this->mtime |= getc( str ) << 8; - this->mtime |= getc( str ) << 16; - this->mtime |= getc( str ) << 24; - this->extraFlags = getc( str ); - this->os = getc( str ); + this->method = getc(str); + this->flags = getc(str); + this->mtime = getc(str) << 0; + this->mtime |= getc(str) << 8; + this->mtime |= getc(str) << 16; + this->mtime |= getc(str) << 24; + this->extraFlags = getc(str); + this->os = getc(str); - if (this->flags & GZ_FEXTRA) { - extraLength = getc( str ) << 0; - extraLength |= getc( str ) << 8; - this->headerLength += extraLength + 2; - si1 = getc( str ); - si2 = getc( str ); + if (this->flags & GZ_FEXTRA) { + extraLength = getc(str) << 0; + extraLength |= getc(str) << 8; + this->headerLength += extraLength + 2; + si1 = getc(str); + si2 = getc(str); - if (si1 == GZ_RND_S1 || si2 == GZ_RND_S2) { - subLength = getc( str ) << 0; - subLength |= getc( str ) << 8; - this->version = getc( str ) << 0; - this->version |= getc( str ) << 8; + if (si1 == GZ_RND_S1 || si2 == GZ_RND_S2) { + subLength = getc(str) << 0; + subLength |= getc(str) << 8; + this->version = getc(str) << 0; + this->version |= getc(str) << 8; - if (this->version != 1) { - //err_internal( __FUNCTION__, - // "dzip header version %d not supported\n", - // this->version ); - } + if (this->version != 1) { + //err_internal( __FUNCTION__, + // "dzip header version %d not supported\n", + // this->version ); + } - this->chunkLength = getc( str ) << 0; - this->chunkLength |= getc( str ) << 8; - this->chunkCount = getc( str ) << 0; - this->chunkCount |= getc( str ) << 8; + this->chunkLength = getc(str) << 0; + this->chunkLength |= getc(str) << 8; + this->chunkCount = getc(str) << 0; + this->chunkCount |= getc(str) << 8; - if (this->chunkCount <= 0) { - fclose( str ); - return 5; - } - this->chunks = (int *)malloc(sizeof( this->chunks[0] ) - * this->chunkCount ); - for (i = 0; i < this->chunkCount; i++) { - this->chunks[i] = getc( str ) << 0; - this->chunks[i] |= getc( str ) << 8; - } - this->type = DICT_DZIP; - } else { - fseek( str, this->headerLength, SEEK_SET ); - } - } + if (this->chunkCount <= 0) { + fclose(str); + return 5; + } + this->chunks = (int *)malloc(sizeof(this->chunks[0]) + * this->chunkCount); + for (i = 0; i < this->chunkCount; i++) { + this->chunks[i] = getc(str) << 0; + this->chunks[i] |= getc(str) << 8; + } + this->type = DICT_DZIP; + } else { + fseek(str, this->headerLength, SEEK_SET); + } + } - if (this->flags & GZ_FNAME) { /* FIXME! Add checking against header len */ - pt = buffer; - while ((c = getc( str )) && c != EOF) - *pt++ = c; - *pt = '\0'; + if (this->flags & GZ_FNAME) { /* FIXME! Add checking against header len */ + pt = buffer; + while ((c = getc(str)) && c != EOF) + *pt++ = c; + *pt = '\0'; - this->origFilename = buffer; - this->headerLength += this->origFilename.length() + 1; - } else { - this->origFilename = ""; - } + this->origFilename = buffer; + this->headerLength += this->origFilename.length() + 1; + } else { + this->origFilename = ""; + } - if (this->flags & GZ_COMMENT) { /* FIXME! Add checking for header len */ - pt = buffer; - while ((c = getc( str )) && c != EOF) - *pt++ = c; - *pt = '\0'; - comment = buffer; - headerLength += comment.length()+1; - } else { - comment = ""; - } + if (this->flags & GZ_COMMENT) { /* FIXME! Add checking for header len */ + pt = buffer; + while ((c = getc(str)) && c != EOF) + *pt++ = c; + *pt = '\0'; + comment = buffer; + headerLength += comment.length() + 1; + } else { + comment = ""; + } - if (this->flags & GZ_FHCRC) { - getc( str ); - getc( str ); - this->headerLength += 2; - } + if (this->flags & GZ_FHCRC) { + getc(str); + getc(str); + this->headerLength += 2; + } - if (ftell( str ) != this->headerLength + 1) { - //err_internal( __FUNCTION__, - // "File position (%lu) != header length + 1 (%d)\n", - // ftell( str ), this->headerLength + 1 ); - } + if (ftell(str) != this->headerLength + 1) { + //err_internal( __FUNCTION__, + // "File position (%lu) != header length + 1 (%d)\n", + // ftell( str ), this->headerLength + 1 ); + } - fseek( str, -8, SEEK_END ); - this->crc = getc( str ) << 0; - this->crc |= getc( str ) << 8; - this->crc |= getc( str ) << 16; - this->crc |= getc( str ) << 24; - this->length = getc( str ) << 0; - this->length |= getc( str ) << 8; - this->length |= getc( str ) << 16; - this->length |= getc( str ) << 24; - this->compressedLength = ftell( str ); + fseek(str, -8, SEEK_END); + this->crc = getc(str) << 0; + this->crc |= getc(str) << 8; + this->crc |= getc(str) << 16; + this->crc |= getc(str) << 24; + this->length = getc(str) << 0; + this->length |= getc(str) << 8; + this->length |= getc(str) << 16; + this->length |= getc(str) << 24; + this->compressedLength = ftell(str); - /* Compute offsets */ - this->offsets = (unsigned long *)malloc( sizeof( this->offsets[0] ) - * this->chunkCount ); - for (offset = this->headerLength + 1, i = 0; - i < this->chunkCount; - i++) { - this->offsets[i] = offset; - offset += this->chunks[i]; - } + /* Compute offsets */ + this->offsets = (unsigned long *)malloc(sizeof(this->offsets[0]) + * this->chunkCount); + for (offset = this->headerLength + 1, i = 0; + i < this->chunkCount; + i++) { + this->offsets[i] = offset; + offset += this->chunks[i]; + } - fclose( str ); - return 0; + fclose(str); + return 0; } -bool DictData::open(const std::string& fname, int computeCRC) +bool DictData::open(const std::string &fname, int computeCRC) { - struct stat sb; - int fd; + struct stat sb; + int fd; - this->initialized = 0; + this->initialized = 0; - if (stat(fname.c_str(), &sb) || !S_ISREG(sb.st_mode)) { - //err_warning( __FUNCTION__, - // "%s is not a regular file -- ignoring\n", fname ); - return false; - } + if (stat(fname.c_str(), &sb) || !S_ISREG(sb.st_mode)) { + //err_warning( __FUNCTION__, + // "%s is not a regular file -- ignoring\n", fname ); + return false; + } - if (read_header(fname, computeCRC)) { - //err_fatal( __FUNCTION__, - // "\"%s\" not in text or dzip format\n", fname ); - return false; - } + if (read_header(fname, computeCRC)) { + //err_fatal( __FUNCTION__, + // "\"%s\" not in text or dzip format\n", fname ); + return false; + } - if ((fd = ::open(fname.c_str(), O_RDONLY )) < 0) { - //err_fatal_errno( __FUNCTION__, - // "Cannot open data file \"%s\"\n", fname ); - return false; - } - if (fstat(fd, &sb)) { - //err_fatal_errno( __FUNCTION__, - // "Cannot stat data file \"%s\"\n", fname ); - return false; - } + if ((fd = ::open(fname.c_str(), O_RDONLY)) < 0) { + //err_fatal_errno( __FUNCTION__, + // "Cannot open data file \"%s\"\n", fname ); + return false; + } + if (fstat(fd, &sb)) { + //err_fatal_errno( __FUNCTION__, + // "Cannot stat data file \"%s\"\n", fname ); + return false; + } - this->size = sb.st_size; - ::close(fd); - if (!mapfile.open(fname.c_str(), size)) - return false; + this->size = sb.st_size; + ::close(fd); + if (!mapfile.open(fname.c_str(), size)) + return false; - this->start=mapfile.begin(); - this->end = this->start + this->size; + this->start = mapfile.begin(); + this->end = this->start + this->size; - for (size_t j = 0; j < DICT_CACHE_SIZE; j++) { - cache[j].chunk = -1; - cache[j].stamp = -1; - cache[j].inBuffer = nullptr; - cache[j].count = 0; - } + for (size_t j = 0; j < DICT_CACHE_SIZE; j++) { + cache[j].chunk = -1; + cache[j].stamp = -1; + cache[j].inBuffer = nullptr; + cache[j].count = 0; + } - return true; + return true; } void DictData::close() { - if (this->chunks) - free(this->chunks); - if (this->offsets) - free(this->offsets); + if (this->chunks) + free(this->chunks); + if (this->offsets) + free(this->offsets); - if (this->initialized) { - if (inflateEnd( &this->zStream )) { - //err_internal( __FUNCTION__, - // "Cannot shut down inflation engine: %s\n", - // this->zStream.msg ); - } - } + if (this->initialized) { + if (inflateEnd(&this->zStream)) { + //err_internal( __FUNCTION__, + // "Cannot shut down inflation engine: %s\n", + // this->zStream.msg ); + } + } - for (size_t i = 0; i < DICT_CACHE_SIZE; ++i){ - if (this -> cache [i].inBuffer) - free (this -> cache [i].inBuffer); - } + for (size_t i = 0; i < DICT_CACHE_SIZE; ++i) { + if (this->cache[i].inBuffer) + free(this->cache[i].inBuffer); + } } void DictData::read(char *buffer, unsigned long start, unsigned long size) { - char *pt; - unsigned long end; - int count; - char *inBuffer; - char outBuffer[OUT_BUFFER_SIZE]; - int firstChunk, lastChunk; - int firstOffset, lastOffset; - int i; - int found, target, lastStamp; - static int stamp = 0; + char *pt; + unsigned long end; + int count; + char *inBuffer; + char outBuffer[OUT_BUFFER_SIZE]; + int firstChunk, lastChunk; + int firstOffset, lastOffset; + int i; + int found, target, lastStamp; + static int stamp = 0; - end = start + size; + end = start + size; - //buffer = malloc( size + 1 ); + //buffer = malloc( size + 1 ); - //PRINTF(DBG_UNZIP, - // ("dict_data_read( %p, %lu, %lu )\n", - //h, start, size )); + //PRINTF(DBG_UNZIP, + // ("dict_data_read( %p, %lu, %lu )\n", + //h, start, size )); + switch (this->type) { + case DICT_GZIP: + //err_fatal( __FUNCTION__, + // "Cannot seek on pure gzip format files.\n" + // "Use plain text (for performance)" + // " or dzip format (for space savings).\n" ); + break; + case DICT_TEXT: + memcpy(buffer, this->start + start, size); + //buffer[size] = '\0'; + break; + case DICT_DZIP: + if (!this->initialized) { + ++this->initialized; + this->zStream.zalloc = nullptr; + this->zStream.zfree = nullptr; + this->zStream.opaque = nullptr; + this->zStream.next_in = 0; + this->zStream.avail_in = 0; + this->zStream.next_out = nullptr; + this->zStream.avail_out = 0; + if (inflateInit2(&this->zStream, -15) != Z_OK) { + //err_internal( __FUNCTION__, + // "Cannot initialize inflation engine: %s\n", + //this->zStream.msg ); + } + } + firstChunk = start / this->chunkLength; + firstOffset = start - firstChunk * this->chunkLength; + lastChunk = end / this->chunkLength; + lastOffset = end - lastChunk * this->chunkLength; + //PRINTF(DBG_UNZIP, + // (" start = %lu, end = %lu\n" + //"firstChunk = %d, firstOffset = %d," + //" lastChunk = %d, lastOffset = %d\n", + //start, end, firstChunk, firstOffset, lastChunk, lastOffset )); + for (pt = buffer, i = firstChunk; i <= lastChunk; i++) { - switch (this->type) { - case DICT_GZIP: - //err_fatal( __FUNCTION__, - // "Cannot seek on pure gzip format files.\n" - // "Use plain text (for performance)" - // " or dzip format (for space savings).\n" ); - break; - case DICT_TEXT: - memcpy( buffer, this->start + start, size ); - //buffer[size] = '\0'; - break; - case DICT_DZIP: - if (!this->initialized) { - ++this->initialized; - this->zStream.zalloc = nullptr; - this->zStream.zfree = nullptr; - this->zStream.opaque = nullptr; - this->zStream.next_in = 0; - this->zStream.avail_in = 0; - this->zStream.next_out = nullptr; - this->zStream.avail_out = 0; - if (inflateInit2( &this->zStream, -15 ) != Z_OK) { - //err_internal( __FUNCTION__, - // "Cannot initialize inflation engine: %s\n", - //this->zStream.msg ); - } - } - firstChunk = start / this->chunkLength; - firstOffset = start - firstChunk * this->chunkLength; - lastChunk = end / this->chunkLength; - lastOffset = end - lastChunk * this->chunkLength; - //PRINTF(DBG_UNZIP, - // (" start = %lu, end = %lu\n" - //"firstChunk = %d, firstOffset = %d," - //" lastChunk = %d, lastOffset = %d\n", - //start, end, firstChunk, firstOffset, lastChunk, lastOffset )); - for (pt = buffer, i = firstChunk; i <= lastChunk; i++) { - - /* Access cache */ - found = 0; - target = 0; - lastStamp = INT_MAX; - for (size_t j = 0; j < DICT_CACHE_SIZE; j++) { + /* Access cache */ + found = 0; + target = 0; + lastStamp = INT_MAX; + for (size_t j = 0; j < DICT_CACHE_SIZE; j++) { #if USE_CACHE - if (this->cache[j].chunk == i) { - found = 1; - target = j; - break; - } + if (this->cache[j].chunk == i) { + found = 1; + target = j; + break; + } #endif - if (this->cache[j].stamp < lastStamp) { - lastStamp = this->cache[j].stamp; - target = j; - } - } + if (this->cache[j].stamp < lastStamp) { + lastStamp = this->cache[j].stamp; + target = j; + } + } - this->cache[target].stamp = ++stamp; - if (found) { - count = this->cache[target].count; - inBuffer = this->cache[target].inBuffer; - } else { - this->cache[target].chunk = i; - if (!this->cache[target].inBuffer) - this->cache[target].inBuffer = (char *)malloc( IN_BUFFER_SIZE ); - inBuffer = this->cache[target].inBuffer; + this->cache[target].stamp = ++stamp; + if (found) { + count = this->cache[target].count; + inBuffer = this->cache[target].inBuffer; + } else { + this->cache[target].chunk = i; + if (!this->cache[target].inBuffer) + this->cache[target].inBuffer = (char *)malloc(IN_BUFFER_SIZE); + inBuffer = this->cache[target].inBuffer; - if (this->chunks[i] >= OUT_BUFFER_SIZE ) { - //err_internal( __FUNCTION__, - // "this->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n", - // i, this->chunks[i], OUT_BUFFER_SIZE ); - } - memcpy( outBuffer, this->start + this->offsets[i], this->chunks[i] ); + if (this->chunks[i] >= OUT_BUFFER_SIZE) { + //err_internal( __FUNCTION__, + // "this->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n", + // i, this->chunks[i], OUT_BUFFER_SIZE ); + } + memcpy(outBuffer, this->start + this->offsets[i], this->chunks[i]); - this->zStream.next_in = (Bytef *)outBuffer; - this->zStream.avail_in = this->chunks[i]; - this->zStream.next_out = (Bytef *)inBuffer; - this->zStream.avail_out = IN_BUFFER_SIZE; - if (inflate( &this->zStream, Z_PARTIAL_FLUSH ) != Z_OK) { - //err_fatal( __FUNCTION__, "inflate: %s\n", this->zStream.msg ); - } - if (this->zStream.avail_in) { - //err_internal( __FUNCTION__, - // "inflate did not flush (%d pending, %d avail)\n", - // this->zStream.avail_in, this->zStream.avail_out ); - } + this->zStream.next_in = (Bytef *)outBuffer; + this->zStream.avail_in = this->chunks[i]; + this->zStream.next_out = (Bytef *)inBuffer; + this->zStream.avail_out = IN_BUFFER_SIZE; + if (inflate(&this->zStream, Z_PARTIAL_FLUSH) != Z_OK) { + //err_fatal( __FUNCTION__, "inflate: %s\n", this->zStream.msg ); + } + if (this->zStream.avail_in) { + //err_internal( __FUNCTION__, + // "inflate did not flush (%d pending, %d avail)\n", + // this->zStream.avail_in, this->zStream.avail_out ); + } - count = IN_BUFFER_SIZE - this->zStream.avail_out; + count = IN_BUFFER_SIZE - this->zStream.avail_out; - this->cache[target].count = count; - } + this->cache[target].count = count; + } - if (i == firstChunk) { - if (i == lastChunk) { - memcpy( pt, inBuffer + firstOffset, lastOffset-firstOffset); - pt += lastOffset - firstOffset; - } else { - if (count != this->chunkLength ) { - //err_internal( __FUNCTION__, - // "Length = %d instead of %d\n", - //count, this->chunkLength ); - } - memcpy( pt, inBuffer + firstOffset, - this->chunkLength - firstOffset ); - pt += this->chunkLength - firstOffset; - } - } else if (i == lastChunk) { - memcpy( pt, inBuffer, lastOffset ); - pt += lastOffset; - } else { - assert( count == this->chunkLength ); - memcpy( pt, inBuffer, this->chunkLength ); - pt += this->chunkLength; - } - } - //*pt = '\0'; - break; - case DICT_UNKNOWN: - //err_fatal( __FUNCTION__, "Cannot read unknown file type\n" ); - break; - } + if (i == firstChunk) { + if (i == lastChunk) { + memcpy(pt, inBuffer + firstOffset, lastOffset - firstOffset); + pt += lastOffset - firstOffset; + } else { + if (count != this->chunkLength) { + //err_internal( __FUNCTION__, + // "Length = %d instead of %d\n", + //count, this->chunkLength ); + } + memcpy(pt, inBuffer + firstOffset, + this->chunkLength - firstOffset); + pt += this->chunkLength - firstOffset; + } + } else if (i == lastChunk) { + memcpy(pt, inBuffer, lastOffset); + pt += lastOffset; + } else { + assert(count == this->chunkLength); + memcpy(pt, inBuffer, this->chunkLength); + pt += this->chunkLength; + } + } + //*pt = '\0'; + break; + case DICT_UNKNOWN: + //err_fatal( __FUNCTION__, "Cannot read unknown file type\n" ); + break; + } } diff --git a/src/dictziplib.hpp b/src/dictziplib.hpp index 9d48aad..859c295 100644 --- a/src/dictziplib.hpp +++ b/src/dictziplib.hpp @@ -7,49 +7,50 @@ #include "mapfile.hpp" struct DictCache { - int chunk; - char *inBuffer; - int stamp; - int count; + int chunk; + char *inBuffer; + int stamp; + int count; }; -class DictData { +class DictData +{ public: - static const size_t DICT_CACHE_SIZE = 5; + static const size_t DICT_CACHE_SIZE = 5; + + DictData() {} + ~DictData() { close(); } + bool open(const std::string &filename, int computeCRC); + void close(); + void read(char *buffer, unsigned long start, unsigned long size); - DictData() {} - ~DictData() { close(); } - bool open(const std::string& filename, int computeCRC); - void close(); - void read(char *buffer, unsigned long start, unsigned long size); private: - const char *start; /* start of mmap'd area */ - const char *end; /* end of mmap'd area */ - unsigned long size; /* size of mmap */ - - int type; - z_stream zStream; - int initialized; - - int headerLength; - int method; - int flags; - time_t mtime; - int extraFlags; - int os; - int version; - int chunkLength; - int chunkCount; - int *chunks; - unsigned long *offsets; /* Sum-scan of chunks. */ - std::string origFilename; - std::string comment; - unsigned long crc; - unsigned long length; - unsigned long compressedLength; - DictCache cache[DICT_CACHE_SIZE]; - MapFile mapfile; + const char *start; /* start of mmap'd area */ + const char *end; /* end of mmap'd area */ + unsigned long size; /* size of mmap */ - int read_header(const std::string &filename, int computeCRC); + int type; + z_stream zStream; + int initialized; + + int headerLength; + int method; + int flags; + time_t mtime; + int extraFlags; + int os; + int version; + int chunkLength; + int chunkCount; + int *chunks; + unsigned long *offsets; /* Sum-scan of chunks. */ + std::string origFilename; + std::string comment; + unsigned long crc; + unsigned long length; + unsigned long compressedLength; + DictCache cache[DICT_CACHE_SIZE]; + MapFile mapfile; + + int read_header(const std::string &filename, int computeCRC); }; - diff --git a/src/distance.cpp b/src/distance.cpp index 1c9ae7f..8e5b553 100644 --- a/src/distance.cpp +++ b/src/distance.cpp @@ -33,7 +33,6 @@ The Levenshtein distance algorithm has been used in: * Plagiarism detection */ - #include #include @@ -56,43 +55,39 @@ Enhanced Dynamic Programming ASM Algorithm" static inline int minimum(const int a, const int b, const int c) { int min = a; - if ( b < min ) + if (b < min) min = b; - if ( c < min ) + if (c < min) min = c; return min; } -int EditDistance::CalEditDistance(const gunichar *s,const gunichar *t,const int limit) +int EditDistance::CalEditDistance(const gunichar *s, const gunichar *t, const int limit) /*Compute levenshtein distance between s and t, this is using QUICK algorithm*/ { - int n=0,m=0,iLenDif,k,i,j,cost; + int n = 0, m = 0, iLenDif, k, i, j, cost; // Remove leftmost matching portion of strings - while ( *s && (*s==*t) ) - { + while (*s && (*s == *t)) { s++; - t++; + t++; } - while (s[n]) - { - n++; - } - while (t[m]) - { - m++; - } - - // Remove rightmost matching portion of strings by decrement n and m. - while ( n && m && (*(s+n-1)==*(t+m-1)) ) - { - n--;m--; + while (s[n]) { + n++; } - if ( m==0 || n==0 || d==nullptr ) - return (m+n); - if ( m < n ) - { - const gunichar * temp = s; + while (t[m]) { + m++; + } + + // Remove rightmost matching portion of strings by decrement n and m. + while (n && m && (*(s + n - 1) == *(t + m - 1))) { + n--; + m--; + } + if (m == 0 || n == 0 || d == nullptr) + return (m + n); + if (m < n) { + const gunichar *temp = s; int itemp = n; s = t; t = temp; @@ -100,55 +95,51 @@ int EditDistance::CalEditDistance(const gunichar *s,const gunichar *t,const int m = itemp; } iLenDif = m - n; - if ( iLenDif >= limit ) + if (iLenDif >= limit) return iLenDif; // step 1 - n++;m++; -// d=(int*)malloc(sizeof(int)*m*n); - if ( m*n > currentelements ) - { - currentelements = m*n*2; // double the request - d = static_cast(realloc(d, sizeof(int) * currentelements)); - if ( nullptr == d ) - return (m+n); + n++; + m++; + // d=(int*)malloc(sizeof(int)*m*n); + if (m * n > currentelements) { + currentelements = m * n * 2; // double the request + d = static_cast(realloc(d, sizeof(int) * currentelements)); + if (nullptr == d) + return (m + n); } // step 2, init matrix - for (k=0;k=2 && j>=2 && (d[j*n+i]-d[(j-2)*n+i-2]==2) - && (s[i-2]==t[j-1]) && (s[i-1]==t[j-2]) ) - d[j*n+i]--; + if (i >= 2 && j >= 2 && (d[j * n + i] - d[(j - 2) * n + i - 2] == 2) + && (s[i - 2] == t[j - 1]) && (s[i - 1] == t[j - 2])) + d[j * n + i]--; #endif } // second calculate row, d(k,j) // now j==iLenDif+i; - for ( k=1;k<=i;k++ ) - { - cost = s[k-1]==t[j-1]?0:1; - d[j*n+k] = minimum(d[(j-1)*n+k]+1,d[j*n+k-1]+1,d[(j-1)*n+k-1]+cost); + for (k = 1; k <= i; k++) { + cost = s[k - 1] == t[j - 1] ? 0 : 1; + d[j * n + k] = minimum(d[(j - 1) * n + k] + 1, d[j * n + k - 1] + 1, d[(j - 1) * n + k - 1] + cost); #ifdef COVER_TRANSPOSITION - if ( k>=2 && j>=2 && (d[j*n+k]-d[(j-2)*n+k-2]==2) - && (s[k-2]==t[j-1]) && (s[k-1]==t[j-2]) ) - d[j*n+k]--; + if (k >= 2 && j >= 2 && (d[j * n + k] - d[(j - 2) * n + k - 2] == 2) + && (s[k - 2] == t[j - 1]) && (s[k - 1] == t[j - 2])) + d[j * n + k]--; #endif } // test if d(i,j) limit gets equal or exceed - if ( d[j*n+i] >= limit ) - { - return d[j*n+i]; + if (d[j * n + i] >= limit) { + return d[j * n + i]; } } // d(n-1,m-1) - return d[n*m-1]; + return d[n * m - 1]; } diff --git a/src/distance.hpp b/src/distance.hpp index 1c1e9e1..d472d5b 100644 --- a/src/distance.hpp +++ b/src/distance.hpp @@ -3,21 +3,24 @@ #include #include -class EditDistance { +class EditDistance +{ public: - EditDistance() { + EditDistance() + { currentelements = 2500; // It's enough for most conditions :-) - d = static_cast(malloc(sizeof(int)*currentelements)); + d = static_cast(malloc(sizeof(int) * currentelements)); } - ~EditDistance() { + ~EditDistance() + { if (d != nullptr) free(d); } - EditDistance(const EditDistance&) = delete; - EditDistance& operator=(const EditDistance&) = delete; - int CalEditDistance( const gunichar *s, const gunichar *t, const int limit ); + EditDistance(const EditDistance &) = delete; + EditDistance &operator=(const EditDistance &) = delete; + int CalEditDistance(const gunichar *s, const gunichar *t, const int limit); + private: int *d; int currentelements; }; - diff --git a/src/libwrapper.cpp b/src/libwrapper.cpp index b49a292..1544b18 100644 --- a/src/libwrapper.cpp +++ b/src/libwrapper.cpp @@ -19,7 +19,7 @@ */ #ifdef HAVE_CONFIG_H -# include "config.h" +#include "config.h" #endif #include @@ -48,401 +48,405 @@ static const char *ABR_VISFMT = ESC_GREEN; static std::string xdxf2text(const char *p, bool colorize_output) { - std::string res; - for (; *p; ++p) { - if (*p != '<') { - if (g_str_has_prefix(p, ">")) { - res += ">"; - p += 3; - } else if (g_str_has_prefix(p, "<")) { - res += "<"; - p += 3; - } else if (g_str_has_prefix(p, "&")) { - res += "&"; - p += 4; - } else if (g_str_has_prefix(p, """)) { - res += "\""; - p += 5; - } else if (g_str_has_prefix(p, "'")) { - res += "\'"; - p += 5; - } else - res += *p; - continue; - } + std::string res; + for (; *p; ++p) { + if (*p != '<') { + if (g_str_has_prefix(p, ">")) { + res += ">"; + p += 3; + } else if (g_str_has_prefix(p, "<")) { + res += "<"; + p += 3; + } else if (g_str_has_prefix(p, "&")) { + res += "&"; + p += 4; + } else if (g_str_has_prefix(p, """)) { + res += "\""; + p += 5; + } else if (g_str_has_prefix(p, "'")) { + res += "\'"; + p += 5; + } else + res += *p; + continue; + } - const char *next = strchr(p, '>'); - if (!next) - continue; + const char *next = strchr(p, '>'); + if (!next) + continue; - const std::string name(p+1, next-p-1); + const std::string name(p + 1, next - p - 1); - if (name == "abr") - res += colorize_output ? ABR_VISFMT : ""; - else if (name=="/abr") - res += colorize_output ? ESC_END : ""; - else if (name == "k") { - const char *begin = next; - if ((next = strstr(begin, "")) != nullptr) - next += sizeof("") - 1 - 1; - else - next = begin; + if (name == "abr") + res += colorize_output ? ABR_VISFMT : ""; + else if (name == "/abr") + res += colorize_output ? ESC_END : ""; + else if (name == "k") { + const char *begin = next; + if ((next = strstr(begin, "")) != nullptr) + next += sizeof("") - 1 - 1; + else + next = begin; } else if (name == "kref") { res += colorize_output ? KREF_VISFMT : ""; } else if (name == "/kref") { res += colorize_output ? ESC_END : ""; - } else if (name == "b") - res += colorize_output ? ESC_BOLD : ""; - else if (name=="/b") - res += colorize_output ? ESC_END : ""; - else if (name == "i") - res += colorize_output ? ESC_ITALIC : ""; - else if (name == "/i") - res += colorize_output ? ESC_END : ""; - else if (name == "tr") { + } else if (name == "b") + res += colorize_output ? ESC_BOLD : ""; + else if (name == "/b") + res += colorize_output ? ESC_END : ""; + else if (name == "i") + res += colorize_output ? ESC_ITALIC : ""; + else if (name == "/i") + res += colorize_output ? ESC_END : ""; + else if (name == "tr") { if (colorize_output) res += TRANSCRIPTION_VISFMT; - res += "["; - } else if (name == "/tr") { - res += "]"; + res += "["; + } else if (name == "/tr") { + res += "]"; if (colorize_output) res += ESC_END; - } else if (name == "ex") - res += colorize_output ? EXAMPLE_VISFMT : ""; - else if (name == "/ex") - res += colorize_output ? ESC_END : ""; - else if (!name.empty() && name[0] == 'c' && name != "co") { - std::string::size_type pos = name.find("code"); - if (pos != std::string::npos) { - pos += sizeof("code=\"") - 1; - std::string::size_type end_pos = name.find("\""); - const std::string color(name, pos, end_pos - pos); - res += ""; - } else { - res += ""; - } - } else if (name == "/c") - res += ""; + } else if (name == "ex") + res += colorize_output ? EXAMPLE_VISFMT : ""; + else if (name == "/ex") + res += colorize_output ? ESC_END : ""; + else if (!name.empty() && name[0] == 'c' && name != "co") { + std::string::size_type pos = name.find("code"); + if (pos != std::string::npos) { + pos += sizeof("code=\"") - 1; + std::string::size_type end_pos = name.find("\""); + const std::string color(name, pos, end_pos - pos); + res += ""; + } else { + res += ""; + } + } else if (name == "/c") + res += ""; - p = next; - } - return res; + p = next; + } + return res; } static std::string parse_data(const gchar *data, bool colorize_output) { - if (!data) - return ""; + if (!data) + return ""; - std::string res; - guint32 data_size, sec_size = 0; - gchar *m_str; - const gchar *p = data; - data_size = get_uint32(p); - p += sizeof(guint32); - while (guint32(p - data) match_res((MAX_MATCH_ITEM_PER_LIB) * ndicts()); + std::vector match_res((MAX_MATCH_ITEM_PER_LIB)*ndicts()); - const gint nfound = Libs::LookupWithRule(str.c_str(), &match_res[0]); - if (nfound == 0) - return; + const gint nfound = Libs::LookupWithRule(str.c_str(), &match_res[0]); + if (nfound == 0) + return; - for (gint i = 0; i < nfound; ++i) { - SimpleLookup(match_res[i], res_list); - g_free(match_res[i]); - } + for (gint i = 0; i < nfound; ++i) { + SimpleLookup(match_res[i], res_list); + g_free(match_res[i]); + } } -void Library::LookupData(const std::string &str, TSearchResultList& res_list) +void Library::LookupData(const std::string &str, TSearchResultList &res_list) { - std::vector > drl(ndicts()); - if (!Libs::LookupData(str.c_str(), &drl[0])) - return; - for (int idict = 0; idict < ndicts(); ++idict) - for (gchar *res : drl[idict]) { - SimpleLookup(res, res_list); - g_free(res); - } + std::vector> drl(ndicts()); + if (!Libs::LookupData(str.c_str(), &drl[0])) + return; + for (int idict = 0; idict < ndicts(); ++idict) + for (gchar *res : drl[idict]) { + SimpleLookup(res, res_list); + g_free(res); + } } -void Library::print_search_result(FILE *out, const TSearchResult & res, bool &first_result) +void Library::print_search_result(FILE *out, const TSearchResult &res, bool &first_result) { - std::string loc_bookname, loc_def, loc_exp; + std::string loc_bookname, loc_def, loc_exp; - if (!utf8_output_){ - loc_bookname = utf8_to_locale_ign_err(res.bookname); - loc_def = utf8_to_locale_ign_err(res.def); - loc_exp = utf8_to_locale_ign_err(res.exp); - } - if(json_) { - if(!first_result) { + if (!utf8_output_) { + loc_bookname = utf8_to_locale_ign_err(res.bookname); + loc_def = utf8_to_locale_ign_err(res.def); + loc_exp = utf8_to_locale_ign_err(res.exp); + } + if (json_) { + if (!first_result) { fputs(",", out); - } else { - first_result=false; - } - fprintf(out,"{\"dict\": \"%s\",\"word\":\"%s\",\"definition\":\"%s\"}", - json_escape_string(res.bookname).c_str(), - json_escape_string(res.def).c_str(), - json_escape_string(res.exp).c_str()); - } else { - fprintf(out, - "-->%s%s%s\n" - "-->%s%s%s\n" - "%s\n\n", - colorize_output_ ? NAME_OF_DICT_VISFMT : "", - utf8_output_ ? res.bookname.c_str() : loc_bookname.c_str(), - colorize_output_ ? ESC_END : "", - colorize_output_ ? SEARCH_TERM_VISFMT : "", - utf8_output_ ? res.def.c_str() : loc_def.c_str(), - colorize_output_ ? ESC_END : "", - utf8_output_ ? res.exp.c_str() : loc_exp.c_str()); + first_result = false; } + fprintf(out, "{\"dict\": \"%s\",\"word\":\"%s\",\"definition\":\"%s\"}", + json_escape_string(res.bookname).c_str(), + json_escape_string(res.def).c_str(), + json_escape_string(res.exp).c_str()); + + } else { + fprintf(out, + "-->%s%s%s\n" + "-->%s%s%s\n" + "%s\n\n", + colorize_output_ ? NAME_OF_DICT_VISFMT : "", + utf8_output_ ? res.bookname.c_str() : loc_bookname.c_str(), + colorize_output_ ? ESC_END : "", + colorize_output_ ? SEARCH_TERM_VISFMT : "", + utf8_output_ ? res.def.c_str() : loc_def.c_str(), + colorize_output_ ? ESC_END : "", + utf8_output_ ? res.exp.c_str() : loc_exp.c_str()); + } } -namespace { - class sdcv_pager final { - public: - explicit sdcv_pager(bool ignore_env = false) { +namespace +{ +class sdcv_pager final +{ +public: + explicit sdcv_pager(bool ignore_env = false) + { + output = stdout; + if (ignore_env) { + return; + } + const gchar *pager = g_getenv("SDCV_PAGER"); + if (pager && (output = popen(pager, "w")) == nullptr) { + perror(_("popen failed")); output = stdout; - if (ignore_env) { - return; - } - const gchar *pager = g_getenv("SDCV_PAGER"); - if (pager && (output = popen(pager, "w")) == nullptr) { - perror(_("popen failed")); - output = stdout; - } } - sdcv_pager(const sdcv_pager&) = delete; - sdcv_pager& operator=(const sdcv_pager&) = delete; - ~sdcv_pager() { - if (output != stdout) { - pclose(output); - } + } + sdcv_pager(const sdcv_pager &) = delete; + sdcv_pager &operator=(const sdcv_pager &) = delete; + ~sdcv_pager() + { + if (output != stdout) { + pclose(output); } - FILE *get_stream() { return output; } - private: - FILE *output; - }; + } + FILE *get_stream() { return output; } + +private: + FILE *output; +}; } bool Library::process_phrase(const char *loc_str, IReadLine &io, bool force) { - if (nullptr == loc_str) - return true; + if (nullptr == loc_str) + return true; - std::string query; + std::string query; - analyze_query(loc_str, query); - if (!query.empty()) - io.add_to_history(query.c_str()); + analyze_query(loc_str, query); + if (!query.empty()) + io.add_to_history(query.c_str()); - gsize bytes_read; - gsize bytes_written; + gsize bytes_read; + gsize bytes_written; glib::Error err; glib::CharStr str; - if (!utf8_input_) - str.reset(g_locale_to_utf8(loc_str, -1, &bytes_read, &bytes_written, get_addr(err))); - else - str.reset(g_strdup(loc_str)); + if (!utf8_input_) + str.reset(g_locale_to_utf8(loc_str, -1, &bytes_read, &bytes_written, get_addr(err))); + else + str.reset(g_strdup(loc_str)); - if (nullptr == get_impl(str)) { - fprintf(stderr, _("Can not convert %s to utf8.\n"), loc_str); - fprintf(stderr, "%s\n", err->message); - return false; - } + if (nullptr == get_impl(str)) { + fprintf(stderr, _("Can not convert %s to utf8.\n"), loc_str); + fprintf(stderr, "%s\n", err->message); + return false; + } - if (str[0] == '\0') - return true; + if (str[0] == '\0') + return true; - TSearchResultList res_list; + TSearchResultList res_list; - switch (analyze_query(get_impl(str), query)) { - case qtFUZZY: - LookupWithFuzzy(query, res_list); - break; - case qtREGEXP: - LookupWithRule(query, res_list); - break; - case qtSIMPLE: - SimpleLookup(get_impl(str), res_list); - if (res_list.empty() && fuzzy_) - LookupWithFuzzy(get_impl(str), res_list); - break; - case qtDATA: - LookupData(query, res_list); - break; - default: - /*nothing*/; - } + switch (analyze_query(get_impl(str), query)) { + case qtFUZZY: + LookupWithFuzzy(query, res_list); + break; + case qtREGEXP: + LookupWithRule(query, res_list); + break; + case qtSIMPLE: + SimpleLookup(get_impl(str), res_list); + if (res_list.empty() && fuzzy_) + LookupWithFuzzy(get_impl(str), res_list); + break; + case qtDATA: + LookupData(query, res_list); + break; + default: + /*nothing*/; + } - bool first_result = true; - if (json_) { + bool first_result = true; + if (json_) { fputc('[', stdout); - } - if (!res_list.empty()) { - /* try to be more clever, if there are + } + if (!res_list.empty()) { + /* try to be more clever, if there are one or zero results per dictionary show all */ - bool show_all_results = true; - typedef std::map< std::string, int, std::less > DictResMap; - if (!force) { - DictResMap res_per_dict; - for (const TSearchResult& search_res : res_list) { - auto r = res_per_dict.equal_range(search_res.bookname); - DictResMap tmp(r.first, r.second); - if (tmp.empty()) //there are no yet such bookname in map - res_per_dict.insert(DictResMap::value_type(search_res.bookname, 1)); - else { - ++((tmp.begin())->second); - if (tmp.begin()->second > 1) { - show_all_results = false; - break; - } - } - } - }//if (!force) + bool show_all_results = true; + typedef std::map> DictResMap; + if (!force) { + DictResMap res_per_dict; + for (const TSearchResult &search_res : res_list) { + auto r = res_per_dict.equal_range(search_res.bookname); + DictResMap tmp(r.first, r.second); + if (tmp.empty()) //there are no yet such bookname in map + res_per_dict.insert(DictResMap::value_type(search_res.bookname, 1)); + else { + ++((tmp.begin())->second); + if (tmp.begin()->second > 1) { + show_all_results = false; + break; + } + } + } + } //if (!force) - if (!show_all_results && !force) { + if (!show_all_results && !force) { if (!json_) { printf(_("Found %zu items, similar to %s.\n"), res_list.size(), utf8_output_ ? get_impl(str) : utf8_to_locale_ign_err(get_impl(str)).c_str()); } - for (size_t i = 0; i < res_list.size(); ++i) { + for (size_t i = 0; i < res_list.size(); ++i) { const std::string loc_bookname = utf8_to_locale_ign_err(res_list[i].bookname); const std::string loc_def = utf8_to_locale_ign_err(res_list[i].def); - printf("%zu)%s%s%s-->%s%s%s\n", i, + printf("%zu)%s%s%s-->%s%s%s\n", i, colorize_output_ ? NAME_OF_DICT_VISFMT : "", - utf8_output_ ? res_list[i].bookname.c_str() : loc_bookname.c_str(), + utf8_output_ ? res_list[i].bookname.c_str() : loc_bookname.c_str(), colorize_output_ ? ESC_END : "", colorize_output_ ? SEARCH_TERM_VISFMT : "", - utf8_output_ ? res_list[i].def.c_str() : loc_def.c_str(), + utf8_output_ ? res_list[i].def.c_str() : loc_def.c_str(), colorize_output_ ? ESC_END : ""); - } - int choise; - std::unique_ptr choice_readline(create_readline_object()); - for (;;) { + } + int choise; + std::unique_ptr choice_readline(create_readline_object()); + for (;;) { std::string str_choise; - choice_readline->read(_("Your choice[-1 to abort]: "), str_choise); - sscanf(str_choise.c_str(), "%d", &choise); - if (choise >= 0 && choise < int(res_list.size())) { + choice_readline->read(_("Your choice[-1 to abort]: "), str_choise); + sscanf(str_choise.c_str(), "%d", &choise); + if (choise >= 0 && choise < int(res_list.size())) { sdcv_pager pager; io.add_to_history(res_list[choise].def.c_str()); - print_search_result(pager.get_stream(), res_list[choise], first_result); - break; - } else if (choise == -1){ - break; - } else - printf(_("Invalid choice.\nIt must be from 0 to %zu or -1.\n"), - res_list.size()-1); - } - } else { + print_search_result(pager.get_stream(), res_list[choise], first_result); + break; + } else if (choise == -1) { + break; + } else + printf(_("Invalid choice.\nIt must be from 0 to %zu or -1.\n"), + res_list.size() - 1); + } + } else { sdcv_pager pager(force || json_); if (!json_) { fprintf(pager.get_stream(), _("Found %zu items, similar to %s.\n"), res_list.size(), utf8_output_ ? get_impl(str) : utf8_to_locale_ign_err(get_impl(str)).c_str()); } - for (const TSearchResult& search_res : res_list) { + for (const TSearchResult &search_res : res_list) { print_search_result(pager.get_stream(), search_res, first_result); } - } + } - } else { - std::string loc_str; - if (!utf8_output_) - loc_str = utf8_to_locale_ign_err(get_impl(str)); - if(!json_) - printf(_("Nothing similar to %s, sorry :(\n"), utf8_output_ ? get_impl(str) : loc_str.c_str()); - } + } else { + std::string loc_str; + if (!utf8_output_) + loc_str = utf8_to_locale_ign_err(get_impl(str)); + if (!json_) + printf(_("Nothing similar to %s, sorry :(\n"), utf8_output_ ? get_impl(str) : loc_str.c_str()); + } if (json_) { fputs("]\n", stdout); } - return true; + return true; } diff --git a/src/libwrapper.hpp b/src/libwrapper.hpp index 2a20309..31783b6 100644 --- a/src/libwrapper.hpp +++ b/src/libwrapper.hpp @@ -3,45 +3,52 @@ #include #include -#include "stardict_lib.hpp" #include "readline.hpp" +#include "stardict_lib.hpp" //this structure is wrapper and it need for unification //results of search whith return Dicts class struct TSearchResult { - std::string bookname; - std::string def; - std::string exp; + std::string bookname; + std::string def; + std::string exp; - TSearchResult(const std::string& bookname_, const std::string& def_, const std::string& exp_) - : bookname(bookname_), def(def_), exp(exp_) - { - } + TSearchResult(const std::string &bookname_, const std::string &def_, const std::string &exp_) + : bookname(bookname_) + , def(def_) + , exp(exp_) + { + } }; typedef std::vector TSearchResultList; //this class is wrapper around Dicts class for easy use //of it -class Library : public Libs { +class Library : public Libs +{ public: - Library(bool uinput, bool uoutput, bool colorize_output, bool use_json, bool no_fuzzy) - : utf8_input_(uinput), utf8_output_(uoutput), colorize_output_(colorize_output), json_(use_json) { - setVerbose(!use_json); - setFuzzy(!no_fuzzy); - } + Library(bool uinput, bool uoutput, bool colorize_output, bool use_json, bool no_fuzzy) + : utf8_input_(uinput) + , utf8_output_(uoutput) + , colorize_output_(colorize_output) + , json_(use_json) + { + setVerbose(!use_json); + setFuzzy(!no_fuzzy); + } + + bool process_phrase(const char *loc_str, IReadLine &io, bool force = false); - bool process_phrase(const char *loc_str, IReadLine &io, bool force = false); private: - bool utf8_input_; - bool utf8_output_; - bool colorize_output_; - bool json_; + bool utf8_input_; + bool utf8_output_; + bool colorize_output_; + bool json_; - void SimpleLookup(const std::string &str, TSearchResultList& res_list); - void LookupWithFuzzy(const std::string &str, TSearchResultList& res_list); - void LookupWithRule(const std::string &str, TSearchResultList& res_lsit); - void LookupData(const std::string &str, TSearchResultList& res_list); - void print_search_result(FILE *out, const TSearchResult & res, bool &first_result); + void SimpleLookup(const std::string &str, TSearchResultList &res_list); + void LookupWithFuzzy(const std::string &str, TSearchResultList &res_list); + void LookupWithRule(const std::string &str, TSearchResultList &res_lsit); + void LookupData(const std::string &str, TSearchResultList &res_list); + void print_search_result(FILE *out, const TSearchResult &res, bool &first_result); }; - diff --git a/src/mapfile.hpp b/src/mapfile.hpp index 8dc6d36..ca5a681 100644 --- a/src/mapfile.hpp +++ b/src/mapfile.hpp @@ -1,27 +1,29 @@ #pragma once #ifdef HAVE_CONFIG_H -# include "config.h" +#include "config.h" #endif #ifdef HAVE_MMAP -# include -# include -# include +#include +#include +#include #endif #ifdef _WIN32 -# include +#include #endif #include -class MapFile { +class MapFile +{ public: MapFile() {} ~MapFile(); - MapFile(const MapFile&) = delete; - MapFile& operator=(const MapFile&) = delete; + MapFile(const MapFile &) = delete; + MapFile &operator=(const MapFile &) = delete; bool open(const char *file_name, unsigned long file_size); gchar *begin() { return data; } + private: char *data = nullptr; unsigned long size = 0ul; @@ -35,51 +37,50 @@ private: inline bool MapFile::open(const char *file_name, unsigned long file_size) { - size=file_size; + size = file_size; #ifdef HAVE_MMAP - if ((mmap_fd = ::open(file_name, O_RDONLY)) < 0) { - //g_print("Open file %s failed!\n",fullfilename); - return false; - } - data = (gchar *)mmap( nullptr, file_size, PROT_READ, MAP_SHARED, mmap_fd, 0); - if ((void *)data == (void *)(-1)) { - //g_print("mmap file %s failed!\n",idxfilename); - data=nullptr; - return false; - } -#elif defined( _WIN32) - hFile = CreateFile(file_name, GENERIC_READ, 0, nullptr, OPEN_ALWAYS, - FILE_ATTRIBUTE_NORMAL, 0); - hFileMap = CreateFileMapping(hFile, nullptr, PAGE_READONLY, 0, - file_size, nullptr); - data = (gchar *)MapViewOfFile(hFileMap, FILE_MAP_READ, 0, 0, file_size); + if ((mmap_fd = ::open(file_name, O_RDONLY)) < 0) { + //g_print("Open file %s failed!\n",fullfilename); + return false; + } + data = (gchar *)mmap(nullptr, file_size, PROT_READ, MAP_SHARED, mmap_fd, 0); + if ((void *)data == (void *)(-1)) { + //g_print("mmap file %s failed!\n",idxfilename); + data = nullptr; + return false; + } +#elif defined(_WIN32) + hFile = CreateFile(file_name, GENERIC_READ, 0, nullptr, OPEN_ALWAYS, + FILE_ATTRIBUTE_NORMAL, 0); + hFileMap = CreateFileMapping(hFile, nullptr, PAGE_READONLY, 0, + file_size, nullptr); + data = (gchar *)MapViewOfFile(hFileMap, FILE_MAP_READ, 0, 0, file_size); #else - gsize read_len; - if (!g_file_get_contents(file_name, &data, &read_len, nullptr)) - return false; + gsize read_len; + if (!g_file_get_contents(file_name, &data, &read_len, nullptr)) + return false; - if (read_len != file_size) - return false; + if (read_len != file_size) + return false; #endif - return true; + return true; } inline MapFile::~MapFile() { - if (!data) - return; + if (!data) + return; #ifdef HAVE_MMAP - munmap(data, size); - close(mmap_fd); + munmap(data, size); + close(mmap_fd); #else -# ifdef _WIN32 - UnmapViewOfFile(data); - CloseHandle(hFileMap); - CloseHandle(hFile); -# else - g_free(data); -# endif -#endif +#ifdef _WIN32 + UnmapViewOfFile(data); + CloseHandle(hFileMap); + CloseHandle(hFile); +#else + g_free(data); +#endif +#endif } - diff --git a/src/readline.cpp b/src/readline.cpp index c769bf5..ab443ff 100644 --- a/src/readline.cpp +++ b/src/readline.cpp @@ -19,14 +19,14 @@ */ #ifdef HAVE_CONFIG_H -# include "config.h" +#include "config.h" #endif #include #include #ifdef WITH_READLINE -# include -# include +#include +#include #endif #include @@ -34,73 +34,82 @@ #include "readline.hpp" -bool stdio_getline(FILE *in, std::string & str) +bool stdio_getline(FILE *in, std::string &str) { assert(in != nullptr); str.clear(); int ch; - while ((ch=fgetc(in)) != EOF && ch != '\n') + while ((ch = fgetc(in)) != EOF && ch != '\n') str += ch; return EOF != ch; } #ifndef WITH_READLINE -namespace { - class dummy_readline : public IReadLine { - public: - bool read(const std::string &banner, std::string &line) override { - printf("%s", banner.c_str()); - return stdio_getline(stdin, line); - } - }; +namespace +{ +class dummy_readline : public IReadLine +{ +public: + bool read(const std::string &banner, std::string &line) override + { + printf("%s", banner.c_str()); + return stdio_getline(stdin, line); + } +}; } #else -namespace { - class real_readline : public IReadLine { +namespace +{ +class real_readline : public IReadLine +{ - public: - real_readline() { - rl_readline_name = "sdcv"; - using_history(); - const std::string histname = std::string(g_get_home_dir()) + G_DIR_SEPARATOR + ".sdcv_history"; - read_history(histname.c_str()); - } +public: + real_readline() + { + rl_readline_name = "sdcv"; + using_history(); + const std::string histname = std::string(g_get_home_dir()) + G_DIR_SEPARATOR + ".sdcv_history"; + read_history(histname.c_str()); + } - ~real_readline() { - const std::string histname = std::string(g_get_home_dir()) + G_DIR_SEPARATOR + ".sdcv_history"; - write_history(histname.c_str()); - const gchar *hist_size_str=g_getenv("SDCV_HISTSIZE"); - int hist_size; - if (!hist_size_str || sscanf(hist_size_str, "%d", &hist_size)<1) - hist_size = 2000; - history_truncate_file(histname.c_str(), hist_size); - } + ~real_readline() + { + const std::string histname = std::string(g_get_home_dir()) + G_DIR_SEPARATOR + ".sdcv_history"; + write_history(histname.c_str()); + const gchar *hist_size_str = g_getenv("SDCV_HISTSIZE"); + int hist_size; + if (!hist_size_str || sscanf(hist_size_str, "%d", &hist_size) < 1) + hist_size = 2000; + history_truncate_file(histname.c_str(), hist_size); + } - bool read(const std::string &banner, std::string& line) override { - char *phrase = nullptr; - phrase = readline(banner.c_str()); - if (phrase) { - line = phrase; - free(phrase); - return true; - } - return false; + bool read(const std::string &banner, std::string &line) override + { + char *phrase = nullptr; + phrase = readline(banner.c_str()); + if (phrase) { + line = phrase; + free(phrase); + return true; } + return false; + } - void add_to_history(const std::string& phrase) override { - add_history(phrase.c_str()); - } - }; + void add_to_history(const std::string &phrase) override + { + add_history(phrase.c_str()); + } +}; } -#endif//WITH_READLINE +#endif //WITH_READLINE IReadLine *create_readline_object() { #ifdef WITH_READLINE - return new real_readline; + return new real_readline; #else - return new dummy_readline; + return new dummy_readline; #endif } diff --git a/src/readline.hpp b/src/readline.hpp index b418f2b..e14ae00 100644 --- a/src/readline.hpp +++ b/src/readline.hpp @@ -2,11 +2,12 @@ #include -class IReadLine { +class IReadLine +{ public: - virtual ~IReadLine() {} - virtual bool read(const std::string &banner, std::string& line) = 0; - virtual void add_to_history(const std::string&) {} + virtual ~IReadLine() {} + virtual bool read(const std::string &banner, std::string &line) = 0; + virtual void add_to_history(const std::string &) {} }; extern std::string sdcv_readline; diff --git a/src/sdcv.cpp b/src/sdcv.cpp index 6573d58..a8302b9 100644 --- a/src/sdcv.cpp +++ b/src/sdcv.cpp @@ -22,16 +22,16 @@ #include "config.h" #endif +#include #include #include #include #include #include +#include #include #include #include -#include -#include #include #include @@ -56,7 +56,7 @@ static void free_str_array(gchar **arr) } namespace glib { - using StrArr = ResourceWrapper; +using StrArr = ResourceWrapper; } static void list_dicts(const std::list &dicts_dir_list, bool use_json); @@ -131,12 +131,12 @@ int main(int argc, char *argv[]) try { const gchar *stardict_data_dir = g_getenv("STARDICT_DATA_DIR"); std::string data_dir; if (!opt_data_dir) { - if (!only_data_dir) { - if (stardict_data_dir) - data_dir = stardict_data_dir; - else - data_dir = "/usr/share/stardict/dic"; - } + if (!only_data_dir) { + if (stardict_data_dir) + data_dir = stardict_data_dir; + else + data_dir = "/usr/share/stardict/dic"; + } } else { data_dir = get_impl(opt_data_dir); } @@ -146,8 +146,8 @@ int main(int argc, char *argv[]) try { homedir = g_get_home_dir(); std::list dicts_dir_list; - if(!only_data_dir) - dicts_dir_list.push_back(std::string(homedir) + G_DIR_SEPARATOR + ".stardict" + G_DIR_SEPARATOR + "dic"); + if (!only_data_dir) + dicts_dir_list.push_back(std::string(homedir) + G_DIR_SEPARATOR + ".stardict" + G_DIR_SEPARATOR + "dic"); dicts_dir_list.push_back(data_dir); if (show_list_dicts) { list_dicts(dicts_dir_list, json_output); @@ -215,7 +215,7 @@ int main(int argc, char *argv[]) try { std::string phrase; while (io->read(_("Enter word or phrase: "), phrase)) { - if (!lib.process_phrase(phrase.c_str(), *io)) + if (!lib.process_phrase(phrase.c_str(), *io)) return EXIT_FAILURE; phrase.clear(); } @@ -232,30 +232,29 @@ int main(int argc, char *argv[]) try { static void list_dicts(const std::list &dicts_dir_list, bool use_json) { - bool first_entry = true; - if(!use_json) - printf(_("Dictionary's name Word count\n")); - else - fputc('[', stdout); - std::list order_list, disable_list; - for_each_file(dicts_dir_list, ".ifo", order_list, - disable_list, [use_json, &first_entry](const std::string &filename, bool) -> void { - DictInfo dict_info; - if (dict_info.load_from_ifo_file(filename, false)) { - const std::string bookname = utf8_to_locale_ign_err(dict_info.bookname); - if(use_json) { - if(first_entry) { - first_entry=false; - } else { - fputc(',', stdout); // comma between entries + bool first_entry = true; + if (!use_json) + printf(_("Dictionary's name Word count\n")); + else + fputc('[', stdout); + std::list order_list, disable_list; + for_each_file(dicts_dir_list, ".ifo", order_list, + disable_list, [use_json, &first_entry](const std::string &filename, bool) -> void { + DictInfo dict_info; + if (dict_info.load_from_ifo_file(filename, false)) { + const std::string bookname = utf8_to_locale_ign_err(dict_info.bookname); + if (use_json) { + if (first_entry) { + first_entry = false; + } else { + fputc(',', stdout); // comma between entries + } + printf("{\"name\": \"%s\", \"wordcount\": \"%d\"}", json_escape_string(bookname).c_str(), dict_info.wordcount); + } else { + printf("%s %d\n", bookname.c_str(), dict_info.wordcount); + } } - printf("{\"name\": \"%s\", \"wordcount\": \"%d\"}", json_escape_string(bookname).c_str(), dict_info.wordcount); - } else { - printf("%s %d\n", bookname.c_str(), dict_info.wordcount); - } - } - }); - if(use_json) - fputs("]\n", stdout); - + }); + if (use_json) + fputs("]\n", stdout); } diff --git a/src/stardict_lib.cpp b/src/stardict_lib.cpp index f3077b7..63e23f5 100644 --- a/src/stardict_lib.cpp +++ b/src/stardict_lib.cpp @@ -1,15 +1,15 @@ #ifdef HAVE_CONFIG_H -# include "config.h" +#include "config.h" #endif -#include #include -#include #include +#include +#include +#include #include #include -#include #include "distance.hpp" #include "mapfile.hpp" @@ -17,800 +17,807 @@ #include "stardict_lib.hpp" - #define TO_STR2(xstr) #xstr #define TO_STR1(xstr) TO_STR2(xstr) -#define THROW_IF_ERROR(expr) do { \ - assert((expr)); \ - if (!(expr)) \ - throw std::runtime_error(#expr " not true at " __FILE__ ": " TO_STR1(__LINE__)); \ +#define THROW_IF_ERROR(expr) \ + do { \ + assert((expr)); \ + if (!(expr)) \ + throw std::runtime_error(#expr " not true at " __FILE__ ": " TO_STR1(__LINE__)); \ } while (false) // Notice: read src/tools/DICTFILE_FORMAT for the dictionary // file's format information! -namespace { - struct Fuzzystruct { - char * pMatchWord; - int iMatchWordDistance; - }; - - static inline bool bIsVowel(gchar inputchar) - { - gchar ch = g_ascii_toupper(inputchar); - return( ch=='A' || ch=='E' || ch=='I' || ch=='O' || ch=='U' ); - } - - static bool bIsPureEnglish(const gchar *str) - { - // i think this should work even when it is UTF8 string :). - for (int i=0; str[i]!=0; i++) - //if(str[i]<0) - //if(str[i]<32 || str[i]>126) // tab equal 9,so this is not OK. - // Better use isascii() but not str[i]<0 while char is default unsigned in arm - if (!isascii(str[i])) - return false; - return true; - } - - static inline gint stardict_strcmp(const gchar *s1, const gchar *s2) - { - const gint a = g_ascii_strcasecmp(s1, s2); - if (a == 0) - return strcmp(s1, s2); - else - return a; - } - - static void unicode_strdown(gunichar *str) - { - while (*str) { - *str = g_unichar_tolower(*str); - ++str; - } - } +namespace +{ +struct Fuzzystruct { + char *pMatchWord; + int iMatchWordDistance; +}; +static inline bool bIsVowel(gchar inputchar) +{ + gchar ch = g_ascii_toupper(inputchar); + return (ch == 'A' || ch == 'E' || ch == 'I' || ch == 'O' || ch == 'U'); } -bool DictInfo::load_from_ifo_file(const std::string& ifofilename, +static bool bIsPureEnglish(const gchar *str) +{ + // i think this should work even when it is UTF8 string :). + for (int i = 0; str[i] != 0; i++) + //if(str[i]<0) + //if(str[i]<32 || str[i]>126) // tab equal 9,so this is not OK. + // Better use isascii() but not str[i]<0 while char is default unsigned in arm + if (!isascii(str[i])) + return false; + return true; +} + +static inline gint stardict_strcmp(const gchar *s1, const gchar *s2) +{ + const gint a = g_ascii_strcasecmp(s1, s2); + if (a == 0) + return strcmp(s1, s2); + else + return a; +} + +static void unicode_strdown(gunichar *str) +{ + while (*str) { + *str = g_unichar_tolower(*str); + ++str; + } +} +} + +bool DictInfo::load_from_ifo_file(const std::string &ifofilename, bool istreedict) { - ifo_file_name = ifofilename; - glib::CharStr buffer; - if (!g_file_get_contents(ifofilename.c_str(), get_addr(buffer), nullptr, nullptr)) - return false; + ifo_file_name = ifofilename; + glib::CharStr buffer; + if (!g_file_get_contents(ifofilename.c_str(), get_addr(buffer), nullptr, nullptr)) + return false; - static const char TREEDICT_MAGIC_DATA[] = "StarDict's treedict ifo file"; - static const char DICT_MAGIC_DATA[] = "StarDict's dict ifo file"; + static const char TREEDICT_MAGIC_DATA[] = "StarDict's treedict ifo file"; + static const char DICT_MAGIC_DATA[] = "StarDict's dict ifo file"; - const gchar *magic_data = istreedict ? TREEDICT_MAGIC_DATA : DICT_MAGIC_DATA; - static const unsigned char utf8_bom[] = { 0xEF, 0xBB, 0xBF, '\0'}; - if (!g_str_has_prefix( - g_str_has_prefix(get_impl(buffer), (const gchar *)(utf8_bom)) ? get_impl(buffer) + 3 : get_impl(buffer), - magic_data)) { - return false; - } + const gchar *magic_data = istreedict ? TREEDICT_MAGIC_DATA : DICT_MAGIC_DATA; + static const unsigned char utf8_bom[] = { 0xEF, 0xBB, 0xBF, '\0' }; + if (!g_str_has_prefix( + g_str_has_prefix(get_impl(buffer), (const gchar *)(utf8_bom)) ? get_impl(buffer) + 3 : get_impl(buffer), + magic_data)) { + return false; + } - gchar *p1 = get_impl(buffer) + strlen(magic_data)-1; + gchar *p1 = get_impl(buffer) + strlen(magic_data) - 1; - gchar *p2 = strstr(p1, "\nwordcount="); - if (p2 == nullptr) - return false; - - gchar *p3 = strchr(p2 + sizeof("\nwordcount=") - 1, '\n'); - - wordcount = atol(std::string(p2+sizeof("\nwordcount=")-1, p3-(p2+sizeof("\nwordcount=")-1)).c_str()); - - if (istreedict) { - p2 = strstr(p1,"\ntdxfilesize="); + gchar *p2 = strstr(p1, "\nwordcount="); if (p2 == nullptr) - return false; + return false; - p3 = strchr(p2+ sizeof("\ntdxfilesize=")-1,'\n'); + gchar *p3 = strchr(p2 + sizeof("\nwordcount=") - 1, '\n'); - index_file_size = atol(std::string(p2+sizeof("\ntdxfilesize=")-1, p3-(p2+sizeof("\ntdxfilesize=")-1)).c_str()); + wordcount = atol(std::string(p2 + sizeof("\nwordcount=") - 1, p3 - (p2 + sizeof("\nwordcount=") - 1)).c_str()); - } else { + if (istreedict) { + p2 = strstr(p1, "\ntdxfilesize="); + if (p2 == nullptr) + return false; + + p3 = strchr(p2 + sizeof("\ntdxfilesize=") - 1, '\n'); + + index_file_size = atol(std::string(p2 + sizeof("\ntdxfilesize=") - 1, p3 - (p2 + sizeof("\ntdxfilesize=") - 1)).c_str()); + + } else { + + p2 = strstr(p1, "\nidxfilesize="); + if (p2 == nullptr) + return false; + + p3 = strchr(p2 + sizeof("\nidxfilesize=") - 1, '\n'); + index_file_size = atol(std::string(p2 + sizeof("\nidxfilesize=") - 1, p3 - (p2 + sizeof("\nidxfilesize=") - 1)).c_str()); + } + + p2 = strstr(p1, "\nbookname="); - p2 = strstr(p1,"\nidxfilesize="); if (p2 == nullptr) - return false; + return false; - p3 = strchr(p2+ sizeof("\nidxfilesize=")-1,'\n'); - index_file_size = atol(std::string(p2+sizeof("\nidxfilesize=")-1, p3-(p2+sizeof("\nidxfilesize=")-1)).c_str()); - } - - p2 = strstr(p1,"\nbookname="); - - if (p2 == nullptr) - return false; - - p2 = p2 + sizeof("\nbookname=") -1; - p3 = strchr(p2, '\n'); - bookname.assign(p2, p3-p2); - - p2 = strstr(p1,"\nauthor="); - if (p2) { - p2 = p2 + sizeof("\nauthor=") -1; + p2 = p2 + sizeof("\nbookname=") - 1; p3 = strchr(p2, '\n'); - author.assign(p2, p3-p2); - } + bookname.assign(p2, p3 - p2); - p2 = strstr(p1,"\nemail="); - if (p2) { - p2 = p2 + sizeof("\nemail=") -1; - p3 = strchr(p2, '\n'); - email.assign(p2, p3-p2); - } + p2 = strstr(p1, "\nauthor="); + if (p2) { + p2 = p2 + sizeof("\nauthor=") - 1; + p3 = strchr(p2, '\n'); + author.assign(p2, p3 - p2); + } - p2 = strstr(p1,"\nwebsite="); - if (p2) { - p2 = p2 + sizeof("\nwebsite=") -1; - p3 = strchr(p2, '\n'); - website.assign(p2, p3-p2); - } + p2 = strstr(p1, "\nemail="); + if (p2) { + p2 = p2 + sizeof("\nemail=") - 1; + p3 = strchr(p2, '\n'); + email.assign(p2, p3 - p2); + } - p2 = strstr(p1,"\ndate="); - if (p2) { - p2 = p2 + sizeof("\ndate=") -1; - p3 = strchr(p2, '\n'); - date.assign(p2, p3-p2); - } + p2 = strstr(p1, "\nwebsite="); + if (p2) { + p2 = p2 + sizeof("\nwebsite=") - 1; + p3 = strchr(p2, '\n'); + website.assign(p2, p3 - p2); + } - p2 = strstr(p1,"\ndescription="); - if (p2) { - p2 = p2 + sizeof("\ndescription=")-1; - p3 = strchr(p2, '\n'); - description.assign(p2, p3-p2); - } + p2 = strstr(p1, "\ndate="); + if (p2) { + p2 = p2 + sizeof("\ndate=") - 1; + p3 = strchr(p2, '\n'); + date.assign(p2, p3 - p2); + } - p2 = strstr(p1,"\nsametypesequence="); - if (p2) { - p2+=sizeof("\nsametypesequence=")-1; - p3 = strchr(p2, '\n'); - sametypesequence.assign(p2, p3-p2); - } + p2 = strstr(p1, "\ndescription="); + if (p2) { + p2 = p2 + sizeof("\ndescription=") - 1; + p3 = strchr(p2, '\n'); + description.assign(p2, p3 - p2); + } - p2 = strstr(p1,"\nsynwordcount="); - syn_wordcount = 0; - if (p2) { - p2+=sizeof("\nsynwordcount=")-1; - p3 = strchr(p2, '\n'); - syn_wordcount = atol(std::string(p2, p3-p2).c_str()); - } + p2 = strstr(p1, "\nsametypesequence="); + if (p2) { + p2 += sizeof("\nsametypesequence=") - 1; + p3 = strchr(p2, '\n'); + sametypesequence.assign(p2, p3 - p2); + } - return true; + p2 = strstr(p1, "\nsynwordcount="); + syn_wordcount = 0; + if (p2) { + p2 += sizeof("\nsynwordcount=") - 1; + p3 = strchr(p2, '\n'); + syn_wordcount = atol(std::string(p2, p3 - p2).c_str()); + } + + return true; } -gchar* DictBase::GetWordData(guint32 idxitem_offset, guint32 idxitem_size) +gchar *DictBase::GetWordData(guint32 idxitem_offset, guint32 idxitem_size) { - for (int i=0; iread(get_impl(origin_data), idxitem_offset, idxitem_size); + if (dictfile) { + const size_t nitems = fread(get_impl(origin_data), idxitem_size, 1, dictfile); + THROW_IF_ERROR(nitems == 1); + } else + dictdzfile->read(get_impl(origin_data), idxitem_offset, idxitem_size); - guint32 data_size; - gint sametypesequence_len = sametypesequence.length(); - //there have sametypesequence_len char being omitted. - data_size = idxitem_size + sizeof(guint32) + sametypesequence_len; - //if the last item's size is determined by the end up '\0',then +=sizeof(gchar); - //if the last item's size is determined by the head guint32 type data,then +=sizeof(guint32); - switch (sametypesequence[sametypesequence_len-1]) { - case 'm': - case 't': - case 'y': - case 'l': - case 'g': - case 'x': - case 'k': - data_size += sizeof(gchar); - break; - case 'W': - case 'P': - data_size += sizeof(guint32); - break; - default: - if (g_ascii_isupper(sametypesequence[sametypesequence_len-1])) - data_size += sizeof(guint32); - else - data_size += sizeof(gchar); - break; + guint32 data_size; + gint sametypesequence_len = sametypesequence.length(); + //there have sametypesequence_len char being omitted. + data_size = idxitem_size + sizeof(guint32) + sametypesequence_len; + //if the last item's size is determined by the end up '\0',then +=sizeof(gchar); + //if the last item's size is determined by the head guint32 type data,then +=sizeof(guint32); + switch (sametypesequence[sametypesequence_len - 1]) { + case 'm': + case 't': + case 'y': + case 'l': + case 'g': + case 'x': + case 'k': + data_size += sizeof(gchar); + break; + case 'W': + case 'P': + data_size += sizeof(guint32); + break; + default: + if (g_ascii_isupper(sametypesequence[sametypesequence_len - 1])) + data_size += sizeof(guint32); + else + data_size += sizeof(gchar); + break; + } + data = (gchar *)g_malloc(data_size); + gchar *p1, *p2; + p1 = data + sizeof(guint32); + p2 = get_impl(origin_data); + guint32 sec_size; + //copy the head items. + for (int i = 0; i < sametypesequence_len - 1; i++) { + *p1 = sametypesequence[i]; + p1 += sizeof(gchar); + switch (sametypesequence[i]) { + case 'm': + case 't': + case 'y': + case 'l': + case 'g': + case 'x': + case 'k': + sec_size = strlen(p2) + 1; + memcpy(p1, p2, sec_size); + p1 += sec_size; + p2 += sec_size; + break; + case 'W': + case 'P': + sec_size = get_uint32(p2); + sec_size += sizeof(guint32); + memcpy(p1, p2, sec_size); + p1 += sec_size; + p2 += sec_size; + break; + default: + if (g_ascii_isupper(sametypesequence[i])) { + sec_size = get_uint32(p2); + sec_size += sizeof(guint32); + } else { + sec_size = strlen(p2) + 1; + } + memcpy(p1, p2, sec_size); + p1 += sec_size; + p2 += sec_size; + break; + } + } + //calculate the last item 's size. + sec_size = idxitem_size - (p2 - get_impl(origin_data)); + *p1 = sametypesequence[sametypesequence_len - 1]; + p1 += sizeof(gchar); + switch (sametypesequence[sametypesequence_len - 1]) { + case 'm': + case 't': + case 'y': + case 'l': + case 'g': + case 'x': + case 'k': + memcpy(p1, p2, sec_size); + p1 += sec_size; + *p1 = '\0'; //add the end up '\0'; + break; + case 'W': + case 'P': + set_uint32(p1, sec_size); + p1 += sizeof(guint32); + memcpy(p1, p2, sec_size); + break; + default: + if (g_ascii_isupper(sametypesequence[sametypesequence_len - 1])) { + set_uint32(p1, sec_size); + p1 += sizeof(guint32); + memcpy(p1, p2, sec_size); + } else { + memcpy(p1, p2, sec_size); + p1 += sec_size; + *p1 = '\0'; + } + break; + } + set_uint32(data, data_size); + } else { + data = (gchar *)g_malloc(idxitem_size + sizeof(guint32)); + if (dictfile) { + const size_t nitems = fread(data + sizeof(guint32), idxitem_size, 1, dictfile); + THROW_IF_ERROR(nitems == 1); + } else + dictdzfile->read(data + sizeof(guint32), idxitem_offset, idxitem_size); + set_uint32(data, idxitem_size + sizeof(guint32)); } - data = (gchar *)g_malloc(data_size); - gchar *p1,*p2; - p1 = data + sizeof(guint32); - p2 = get_impl(origin_data); - guint32 sec_size; - //copy the head items. - for (int i=0; iread(data+sizeof(guint32), idxitem_offset, idxitem_size); - set_uint32(data, idxitem_size+sizeof(guint32)); - } - g_free(cache[cache_cur].data); + g_free(cache[cache_cur].data); - cache[cache_cur].data = data; - cache[cache_cur].offset = idxitem_offset; - cache_cur++; - if (cache_cur==WORDDATA_CACHE_NUM) - cache_cur = 0; - return data; + cache[cache_cur].data = data; + cache[cache_cur].offset = idxitem_offset; + cache_cur++; + if (cache_cur == WORDDATA_CACHE_NUM) + cache_cur = 0; + return data; } bool DictBase::SearchData(std::vector &SearchWords, guint32 idxitem_offset, guint32 idxitem_size, gchar *origin_data) { - int nWord = SearchWords.size(); - std::vector WordFind(nWord, false); - int nfound=0; + int nWord = SearchWords.size(); + std::vector WordFind(nWord, false); + int nfound = 0; - if (dictfile) - fseek(dictfile, idxitem_offset, SEEK_SET); - if (dictfile) { - const size_t nitems = fread(origin_data, idxitem_size, 1, dictfile); + if (dictfile) + fseek(dictfile, idxitem_offset, SEEK_SET); + if (dictfile) { + const size_t nitems = fread(origin_data, idxitem_size, 1, dictfile); THROW_IF_ERROR(nitems == 1); - } else - dictdzfile->read(origin_data, idxitem_offset, idxitem_size); - gchar *p = origin_data; - guint32 sec_size; - int j; - if (!sametypesequence.empty()) { - gint sametypesequence_len = sametypesequence.length(); - for (int i=0; iread(origin_data, idxitem_offset, idxitem_size); + gchar *p = origin_data; + guint32 sec_size; + int j; + if (!sametypesequence.empty()) { + gint sametypesequence_len = sametypesequence.length(); + for (int i = 0; i < sametypesequence_len - 1; i++) { + switch (sametypesequence[i]) { + case 'm': + case 't': + case 'y': + case 'l': + case 'g': + case 'x': case 'k': - for (j=0; j wordoffset; + FILE *idxfile; + gulong wordcount; + + gchar wordentry_buf[256 + sizeof(guint32) * 2]; // The length of "word_str" should be less than 256. See src/tools/DICTFILE_FORMAT. + struct index_entry { + glong idx; + std::string keystr; + void assign(glong i, const std::string &str) + { + idx = i; + keystr.assign(str); } - bool load(const std::string& url, gulong wc, gulong fsize, bool verbose) override; - const gchar *get_key(glong idx) override; - void get_data(glong idx) override { get_key(idx); } - const gchar *get_key_and_data(glong idx) override { - return get_key(idx); - } - bool lookup(const char *str, glong &idx) override; - private: - static const gint ENTR_PER_PAGE = 32; - static const char *CACHE_MAGIC; - - std::vector wordoffset; - FILE *idxfile; - gulong wordcount; - - gchar wordentry_buf[256+sizeof(guint32)*2]; // The length of "word_str" should be less than 256. See src/tools/DICTFILE_FORMAT. - struct index_entry { - glong idx; - std::string keystr; - void assign(glong i, const std::string& str) { - idx = i; - keystr.assign(str); - } - }; - index_entry first, last, middle, real_last; - - struct page_entry { - gchar *keystr; - guint32 off, size; - }; - std::vector page_data; - struct page_t { - glong idx = -1; - page_entry entries[ENTR_PER_PAGE]; - - page_t() {} - void fill(gchar *data, gint nent, glong idx_); - } page; - gulong load_page(glong page_idx); - const gchar *read_first_on_page_key(glong page_idx); - const gchar *get_first_on_page_key(glong page_idx); - bool load_cache(const std::string& url); - bool save_cache(const std::string& url, bool verbose); - static std::list get_cache_variant(const std::string& url); }; + index_entry first, last, middle, real_last; - const char *OffsetIndex::CACHE_MAGIC = "StarDict's Cache, Version: 0.1"; - - - class WordListIndex : public IIndexFile { - public: - WordListIndex() : idxdatabuf(nullptr) {} - ~WordListIndex() { g_free(idxdatabuf); } - bool load(const std::string& url, gulong wc, gulong fsize, bool verbose) override; - const gchar *get_key(glong idx) override { return wordlist[idx]; } - void get_data(glong idx) override; - const gchar *get_key_and_data(glong idx) override { - get_data(idx); - return get_key(idx); - } - bool lookup(const char *str, glong &idx) override; - private: - gchar *idxdatabuf; - std::vector wordlist; + struct page_entry { + gchar *keystr; + guint32 off, size; }; + std::vector page_data; + struct page_t { + glong idx = -1; + page_entry entries[ENTR_PER_PAGE]; - void OffsetIndex::page_t::fill(gchar *data, gint nent, glong idx_) + page_t() {} + void fill(gchar *data, gint nent, glong idx_); + } page; + gulong load_page(glong page_idx); + const gchar *read_first_on_page_key(glong page_idx); + const gchar *get_first_on_page_key(glong page_idx); + bool load_cache(const std::string &url); + bool save_cache(const std::string &url, bool verbose); + static std::list get_cache_variant(const std::string &url); +}; + +const char *OffsetIndex::CACHE_MAGIC = "StarDict's Cache, Version: 0.1"; + +class WordListIndex : public IIndexFile +{ +public: + WordListIndex() + : idxdatabuf(nullptr) { - idx=idx_; - gchar *p=data; - glong len; - for (gint i=0; i wordlist; +}; + +void OffsetIndex::page_t::fill(gchar *data, gint nent, glong idx_) +{ + idx = idx_; + gchar *p = data; + glong len; + for (gint i = 0; i < nent; ++i) { + entries[i].keystr = p; + len = strlen(p); + p += len + 1; + entries[i].off = g_ntohl(get_uint32(p)); + p += sizeof(guint32); + entries[i].size = g_ntohl(get_uint32(p)); + p += sizeof(guint32); + } +} + +inline const gchar *OffsetIndex::read_first_on_page_key(glong page_idx) +{ + fseek(idxfile, wordoffset[page_idx], SEEK_SET); + guint32 page_size = wordoffset[page_idx + 1] - wordoffset[page_idx]; + const size_t nitems = fread(wordentry_buf, + std::min(sizeof(wordentry_buf), static_cast(page_size)), + 1, idxfile); + THROW_IF_ERROR(nitems == 1); + //TODO: check returned values, deal with word entry that strlen>255. + return wordentry_buf; +} + +inline const gchar *OffsetIndex::get_first_on_page_key(glong page_idx) +{ + if (page_idx < middle.idx) { + if (page_idx == first.idx) + return first.keystr.c_str(); + return read_first_on_page_key(page_idx); + } else if (page_idx > middle.idx) { + if (page_idx == last.idx) + return last.keystr.c_str(); + return read_first_on_page_key(page_idx); + } else + return middle.keystr.c_str(); +} + +bool OffsetIndex::load_cache(const std::string &url) +{ + const std::list vars = get_cache_variant(url); + + for (const std::string &item : vars) { + struct ::stat idxstat, cachestat; + if (g_stat(url.c_str(), &idxstat) != 0 || g_stat(item.c_str(), &cachestat) != 0) + continue; + if (cachestat.st_mtime < idxstat.st_mtime) + continue; + MapFile mf; + if (!mf.open(item.c_str(), cachestat.st_size)) + continue; + if (strncmp(mf.begin(), CACHE_MAGIC, strlen(CACHE_MAGIC)) != 0) + continue; + memcpy(&wordoffset[0], mf.begin() + strlen(CACHE_MAGIC), wordoffset.size() * sizeof(wordoffset[0])); + return true; } - inline const gchar *OffsetIndex::read_first_on_page_key(glong page_idx) - { - fseek(idxfile, wordoffset[page_idx], SEEK_SET); - guint32 page_size = wordoffset[page_idx + 1] - wordoffset[page_idx]; - const size_t nitems = fread(wordentry_buf, - std::min(sizeof(wordentry_buf), static_cast(page_size)), - 1, idxfile); - THROW_IF_ERROR(nitems == 1); - //TODO: check returned values, deal with word entry that strlen>255. - return wordentry_buf; - } + return false; +} - inline const gchar *OffsetIndex::get_first_on_page_key(glong page_idx) - { - if (page_idxmiddle.idx) { - if (page_idx==last.idx) - return last.keystr.c_str(); - return read_first_on_page_key(page_idx); - } else - return middle.keystr.c_str(); - } - - bool OffsetIndex::load_cache(const std::string& url) - { - const std::list vars = get_cache_variant(url); - - for (const std::string& item : vars) { - struct ::stat idxstat, cachestat; - if (g_stat(url.c_str(), &idxstat)!=0 || - g_stat(item.c_str(), &cachestat)!=0) - continue; - if (cachestat.st_mtime OffsetIndex::get_cache_variant(const std::string& url) - { - std::list res = {url + ".oft"}; - if (!g_file_test(g_get_user_cache_dir(), G_FILE_TEST_EXISTS) && - g_mkdir(g_get_user_cache_dir(), 0700)==-1) - return res; - - const std::string cache_dir = std::string(g_get_user_cache_dir())+G_DIR_SEPARATOR_S+"sdcv"; - - if (!g_file_test(cache_dir.c_str(), G_FILE_TEST_EXISTS)) { - if (g_mkdir(cache_dir.c_str(), 0700)==-1) - return res; - } else if (!g_file_test(cache_dir.c_str(), G_FILE_TEST_IS_DIR)) - return res; - - gchar *base = g_path_get_basename(url.c_str()); - res.push_back(cache_dir+G_DIR_SEPARATOR_S+base+".oft"); - g_free(base); +std::list OffsetIndex::get_cache_variant(const std::string &url) +{ + std::list res = { url + ".oft" }; + if (!g_file_test(g_get_user_cache_dir(), G_FILE_TEST_EXISTS) && g_mkdir(g_get_user_cache_dir(), 0700) == -1) return res; + + const std::string cache_dir = std::string(g_get_user_cache_dir()) + G_DIR_SEPARATOR_S + "sdcv"; + + if (!g_file_test(cache_dir.c_str(), G_FILE_TEST_EXISTS)) { + if (g_mkdir(cache_dir.c_str(), 0700) == -1) + return res; + } else if (!g_file_test(cache_dir.c_str(), G_FILE_TEST_IS_DIR)) + return res; + + gchar *base = g_path_get_basename(url.c_str()); + res.push_back(cache_dir + G_DIR_SEPARATOR_S + base + ".oft"); + g_free(base); + return res; +} + +bool OffsetIndex::save_cache(const std::string &url, bool verbose) +{ + const std::list vars = get_cache_variant(url); + for (const std::string &item : vars) { + FILE *out = fopen(item.c_str(), "wb"); + if (!out) + continue; + if (fwrite(CACHE_MAGIC, 1, strlen(CACHE_MAGIC), out) != strlen(CACHE_MAGIC)) + continue; + if (fwrite(&wordoffset[0], sizeof(wordoffset[0]), wordoffset.size(), out) != wordoffset.size()) + continue; + fclose(out); + if (verbose) { + printf("save to cache %s\n", url.c_str()); + } + return true; + } + return false; +} + +bool OffsetIndex::load(const std::string &url, gulong wc, gulong fsize, bool verbose) +{ + wordcount = wc; + gulong npages = (wc - 1) / ENTR_PER_PAGE + 2; + wordoffset.resize(npages); + if (!load_cache(url)) { //map file will close after finish of block + MapFile map_file; + if (!map_file.open(url.c_str(), fsize)) + return false; + const gchar *idxdatabuffer = map_file.begin(); + + const gchar *p1 = idxdatabuffer; + gulong index_size; + guint32 j = 0; + for (guint32 i = 0; i < wc; i++) { + index_size = strlen(p1) + 1 + 2 * sizeof(guint32); + if (i % ENTR_PER_PAGE == 0) { + wordoffset[j] = p1 - idxdatabuffer; + ++j; + } + p1 += index_size; + } + wordoffset[j] = p1 - idxdatabuffer; + if (!save_cache(url, verbose)) + fprintf(stderr, "cache update failed\n"); } - bool OffsetIndex::save_cache(const std::string& url, bool verbose) - { - const std::list vars = get_cache_variant(url); - for (const std::string& item : vars) { - FILE *out=fopen(item.c_str(), "wb"); - if (!out) - continue; - if (fwrite(CACHE_MAGIC, 1, strlen(CACHE_MAGIC), out)!=strlen(CACHE_MAGIC)) - continue; - if (fwrite(&wordoffset[0], sizeof(wordoffset[0]), wordoffset.size(), out)!=wordoffset.size()) - continue; - fclose(out); - if(verbose) { - printf("save to cache %s\n", url.c_str()); - } - return true; - } + if (!(idxfile = fopen(url.c_str(), "rb"))) { + wordoffset.resize(0); return false; } - bool OffsetIndex::load(const std::string& url, gulong wc, gulong fsize, bool verbose) - { - wordcount=wc; - gulong npages=(wc-1)/ENTR_PER_PAGE+2; - wordoffset.resize(npages); - if (!load_cache(url)) {//map file will close after finish of block - MapFile map_file; - if (!map_file.open(url.c_str(), fsize)) - return false; - const gchar *idxdatabuffer=map_file.begin(); + first.assign(0, read_first_on_page_key(0)); + last.assign(wordoffset.size() - 2, read_first_on_page_key(wordoffset.size() - 2)); + middle.assign((wordoffset.size() - 2) / 2, read_first_on_page_key((wordoffset.size() - 2) / 2)); + real_last.assign(wc - 1, get_key(wc - 1)); - const gchar *p1 = idxdatabuffer; - gulong index_size; - guint32 j=0; - for (guint32 i=0; i 0) { + idx = INVALID_INDEX; + return false; + } else { + iFrom = 0; + iThisIndex = 0; + while (iFrom <= iTo) { + iThisIndex = (iFrom + iTo) / 2; + cmpint = stardict_strcmp(str, get_first_on_page_key(iThisIndex)); + if (cmpint > 0) + iFrom = iThisIndex + 1; + else if (cmpint < 0) + iTo = iThisIndex - 1; + else { + bFound = true; + break; } - wordoffset[j]=p1-idxdatabuffer; - if (!save_cache(url, verbose)) - fprintf(stderr, "cache update failed\n"); } - - if (!(idxfile = fopen(url.c_str(), "rb"))) { - wordoffset.resize(0); - return false; + if (!bFound) + idx = iTo; //prev + else + idx = iThisIndex; + } + if (!bFound) { + gulong netr = load_page(idx); + iFrom = 1; // Needn't search the first word anymore. + iTo = netr - 1; + iThisIndex = 0; + while (iFrom <= iTo) { + iThisIndex = (iFrom + iTo) / 2; + cmpint = stardict_strcmp(str, page.entries[iThisIndex].keystr); + if (cmpint > 0) + iFrom = iThisIndex + 1; + else if (cmpint < 0) + iTo = iThisIndex - 1; + else { + bFound = true; + break; + } } - - first.assign(0, read_first_on_page_key(0)); - last.assign(wordoffset.size()-2, read_first_on_page_key(wordoffset.size()-2)); - middle.assign((wordoffset.size()-2)/2, read_first_on_page_key((wordoffset.size()-2)/2)); - real_last.assign(wc-1, get_key(wc-1)); - - return true; + idx *= ENTR_PER_PAGE; + if (!bFound) + idx += iFrom; //next + else + idx += iThisIndex; + } else { + idx *= ENTR_PER_PAGE; } + return bFound; +} - inline gulong OffsetIndex::load_page(glong page_idx) - { - gulong nentr = ENTR_PER_PAGE; - if (page_idx == glong(wordoffset.size()-2)) - if ((nentr = (wordcount % ENTR_PER_PAGE)) == 0) - nentr = ENTR_PER_PAGE; +bool WordListIndex::load(const std::string &url, gulong wc, gulong fsize, bool verbose) +{ + gzFile in = gzopen(url.c_str(), "rb"); + if (in == nullptr) + return false; + idxdatabuf = (gchar *)g_malloc(fsize); - if (page_idx != page.idx) { - page_data.resize(wordoffset[page_idx+1]-wordoffset[page_idx]); - fseek(idxfile, wordoffset[page_idx], SEEK_SET); - const size_t nitems = fread(&page_data[0], 1, page_data.size(), idxfile); - THROW_IF_ERROR(nitems == page_data.size()); - - page.fill(&page_data[0], nentr, page_idx); - } + const int len = gzread(in, idxdatabuf, fsize); + gzclose(in); + if (len < 0) + return false; - return nentr; + if (gulong(len) != fsize) + return false; + + wordlist.resize(wc + 1); + gchar *p1 = idxdatabuf; + guint32 i; + for (i = 0; i < wc; i++) { + wordlist[i] = p1; + p1 += strlen(p1) + 1 + 2 * sizeof(guint32); } + wordlist[wc] = p1; - const gchar *OffsetIndex::get_key(glong idx) - { - load_page(idx/ENTR_PER_PAGE); - glong idx_in_page=idx%ENTR_PER_PAGE; - wordentry_offset=page.entries[idx_in_page].off; - wordentry_size=page.entries[idx_in_page].size; + return true; +} - return page.entries[idx_in_page].keystr; - } +void WordListIndex::get_data(glong idx) +{ + gchar *p1 = wordlist[idx] + strlen(wordlist[idx]) + sizeof(gchar); + wordentry_offset = g_ntohl(get_uint32(p1)); + p1 += sizeof(guint32); + wordentry_size = g_ntohl(get_uint32(p1)); +} - bool OffsetIndex::lookup(const char *str, glong &idx) - { - bool bFound=false; - glong iFrom; - glong iTo=wordoffset.size()-2; +bool WordListIndex::lookup(const char *str, glong &idx) +{ + bool bFound = false; + glong iTo = wordlist.size() - 2; + + if (stardict_strcmp(str, get_key(0)) < 0) { + idx = 0; + } else if (stardict_strcmp(str, get_key(iTo)) > 0) { + idx = INVALID_INDEX; + } else { + glong iThisIndex = 0; + glong iFrom = 0; gint cmpint; - glong iThisIndex; - if (stardict_strcmp(str, first.keystr.c_str())<0) { - idx = 0; - return false; - } else if (stardict_strcmp(str, real_last.keystr.c_str()) >0) { - idx = INVALID_INDEX; - return false; - } else { - iFrom=0; - iThisIndex=0; - while (iFrom<=iTo) { - iThisIndex=(iFrom+iTo)/2; - cmpint = stardict_strcmp(str, get_first_on_page_key(iThisIndex)); - if (cmpint>0) - iFrom=iThisIndex+1; - else if (cmpint<0) - iTo=iThisIndex-1; - else { - bFound=true; - break; - } + while (iFrom <= iTo) { + iThisIndex = (iFrom + iTo) / 2; + cmpint = stardict_strcmp(str, get_key(iThisIndex)); + if (cmpint > 0) + iFrom = iThisIndex + 1; + else if (cmpint < 0) + iTo = iThisIndex - 1; + else { + bFound = true; + break; } - if (!bFound) - idx = iTo; //prev - else - idx = iThisIndex; } - if (!bFound) { - gulong netr = load_page(idx); - iFrom = 1; // Needn't search the first word anymore. - iTo = netr-1; - iThisIndex = 0; - while (iFrom <= iTo) { - iThisIndex = (iFrom + iTo) / 2; - cmpint = stardict_strcmp(str, page.entries[iThisIndex].keystr); - if (cmpint > 0) - iFrom = iThisIndex+1; - else if (cmpint < 0) - iTo = iThisIndex-1; - else { - bFound = true; - break; - } - } - idx *= ENTR_PER_PAGE; - if (!bFound) - idx += iFrom; //next - else - idx += iThisIndex; - } else { - idx *= ENTR_PER_PAGE; - } - return bFound; - } - - bool WordListIndex::load(const std::string& url, gulong wc, gulong fsize, bool verbose) - { - gzFile in = gzopen(url.c_str(), "rb"); - if (in == nullptr) - return false; - - idxdatabuf = (gchar *)g_malloc(fsize); - - const int len = gzread(in, idxdatabuf, fsize); - gzclose(in); - if (len < 0) - return false; - - if (gulong(len) != fsize) - return false; - - wordlist.resize(wc+1); - gchar *p1 = idxdatabuf; - guint32 i; - for (i=0; i0) { - idx = INVALID_INDEX; - } else { - glong iThisIndex=0; - glong iFrom=0; - gint cmpint; - while (iFrom<=iTo) { - iThisIndex=(iFrom+iTo)/2; - cmpint = stardict_strcmp(str, get_key(iThisIndex)); - if (cmpint>0) - iFrom=iThisIndex+1; - else if (cmpint<0) - iTo=iThisIndex-1; - else { - bFound=true; - break; - } - } - if (!bFound) - idx = iFrom; //next - else - idx = iThisIndex; - } - return bFound; + if (!bFound) + idx = iFrom; //next + else + idx = iThisIndex; } + return bFound; +} } bool SynFile::load(const std::string &url, gulong wc) @@ -825,8 +832,8 @@ bool SynFile::load(const std::string &url, gulong wc) // each entry in a syn-file is: // - 0-terminated string // 4-byte index into .dict file in network byte order - glib::CharStr lower_string{g_utf8_casefold(current, -1)}; - std::string synonym{get_impl(lower_string)}; + glib::CharStr lower_string{ g_utf8_casefold(current, -1) }; + std::string synonym{ get_impl(lower_string) }; current += synonym.length() + 1; const guint32 idx = g_ntohl(get_uint32(current)); current += sizeof(idx); @@ -840,7 +847,7 @@ bool SynFile::load(const std::string &url, gulong wc) bool SynFile::lookup(const char *str, glong &idx) { - glib::CharStr lower_string{g_utf8_casefold(str, -1)}; + glib::CharStr lower_string{ g_utf8_casefold(str, -1) }; auto it = synonyms.find(get_impl(lower_string)); if (it != synonyms.end()) { idx = it->second; @@ -849,548 +856,534 @@ bool SynFile::lookup(const char *str, glong &idx) return false; } -bool Dict::Lookup(const char *str, glong &idx) { - return syn_file->lookup(str, idx) || idx_file->lookup(str, idx); +bool Dict::Lookup(const char *str, glong &idx) +{ + return syn_file->lookup(str, idx) || idx_file->lookup(str, idx); } -bool Dict::load(const std::string& ifofilename, bool verbose) +bool Dict::load(const std::string &ifofilename, bool verbose) { - gulong idxfilesize; - if (!load_ifofile(ifofilename, idxfilesize)) - return false; + gulong idxfilesize; + if (!load_ifofile(ifofilename, idxfilesize)) + return false; - std::string fullfilename(ifofilename); - fullfilename.replace(fullfilename.length()-sizeof("ifo")+1, sizeof("ifo")-1, "dict.dz"); + std::string fullfilename(ifofilename); + fullfilename.replace(fullfilename.length() - sizeof("ifo") + 1, sizeof("ifo") - 1, "dict.dz"); - if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) { - dictdzfile.reset(new DictData); - if (!dictdzfile->open(fullfilename, 0)) { - //g_print("open file %s failed!\n",fullfilename); - return false; - } - } else { - fullfilename.erase(fullfilename.length()-sizeof(".dz")+1, sizeof(".dz")-1); - dictfile = fopen(fullfilename.c_str(),"rb"); - if (!dictfile) { - //g_print("open file %s failed!\n",fullfilename); - return false; - } - } + if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) { + dictdzfile.reset(new DictData); + if (!dictdzfile->open(fullfilename, 0)) { + //g_print("open file %s failed!\n",fullfilename); + return false; + } + } else { + fullfilename.erase(fullfilename.length() - sizeof(".dz") + 1, sizeof(".dz") - 1); + dictfile = fopen(fullfilename.c_str(), "rb"); + if (!dictfile) { + //g_print("open file %s failed!\n",fullfilename); + return false; + } + } - fullfilename=ifofilename; - fullfilename.replace(fullfilename.length()-sizeof("ifo")+1, sizeof("ifo")-1, "idx.gz"); + fullfilename = ifofilename; + fullfilename.replace(fullfilename.length() - sizeof("ifo") + 1, sizeof("ifo") - 1, "idx.gz"); - if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) { - idx_file.reset(new WordListIndex); - } else { - fullfilename.erase(fullfilename.length()-sizeof(".gz")+1, sizeof(".gz")-1); - idx_file.reset(new OffsetIndex); - } + if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) { + idx_file.reset(new WordListIndex); + } else { + fullfilename.erase(fullfilename.length() - sizeof(".gz") + 1, sizeof(".gz") - 1); + idx_file.reset(new OffsetIndex); + } - if (!idx_file->load(fullfilename, wordcount, idxfilesize, verbose)) - return false; + if (!idx_file->load(fullfilename, wordcount, idxfilesize, verbose)) + return false; - fullfilename=ifofilename; - fullfilename.replace(fullfilename.length()-sizeof("ifo")+1, sizeof("ifo")-1, "syn"); - syn_file.reset(new SynFile); - syn_file->load(fullfilename, syn_wordcount); + fullfilename = ifofilename; + fullfilename.replace(fullfilename.length() - sizeof("ifo") + 1, sizeof("ifo") - 1, "syn"); + syn_file.reset(new SynFile); + syn_file->load(fullfilename, syn_wordcount); - //g_print("bookname: %s , wordcount %lu\n", bookname.c_str(), narticles()); - return true; + //g_print("bookname: %s , wordcount %lu\n", bookname.c_str(), narticles()); + return true; } -bool Dict::load_ifofile(const std::string& ifofilename, gulong &idxfilesize) +bool Dict::load_ifofile(const std::string &ifofilename, gulong &idxfilesize) { - DictInfo dict_info; - if (!dict_info.load_from_ifo_file(ifofilename, false)) - return false; - if (dict_info.wordcount==0) - return false; + DictInfo dict_info; + if (!dict_info.load_from_ifo_file(ifofilename, false)) + return false; + if (dict_info.wordcount == 0) + return false; - ifo_file_name=dict_info.ifo_file_name; - wordcount=dict_info.wordcount; - syn_wordcount=dict_info.syn_wordcount; - bookname=dict_info.bookname; + ifo_file_name = dict_info.ifo_file_name; + wordcount = dict_info.wordcount; + syn_wordcount = dict_info.syn_wordcount; + bookname = dict_info.bookname; - idxfilesize=dict_info.index_file_size; + idxfilesize = dict_info.index_file_size; - sametypesequence=dict_info.sametypesequence; + sametypesequence = dict_info.sametypesequence; - return true; + return true; } bool Dict::LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen) { - int iIndexCount = 0; + int iIndexCount = 0; - for (guint32 i=0; i < narticles() && iIndexCount < (iBuffLen - 1); i++) + for (guint32 i = 0; i < narticles() && iIndexCount < (iBuffLen - 1); i++) if (g_pattern_match_string(pspec, get_key(i))) - aIndex[iIndexCount++] = i; + aIndex[iIndexCount++] = i; aIndex[iIndexCount] = -1; // -1 is the end. - return iIndexCount > 0; + return iIndexCount > 0; } Libs::~Libs() { - for (Dict *p : oLib) - delete p; + for (Dict *p : oLib) + delete p; } -void Libs::load_dict(const std::string& url) +void Libs::load_dict(const std::string &url) { - Dict *lib=new Dict; - if (lib->load(url, verbose_)) - oLib.push_back(lib); - else - delete lib; + Dict *lib = new Dict; + if (lib->load(url, verbose_)) + oLib.push_back(lib); + else + delete lib; } -void Libs::load(const std::list& dicts_dirs, - const std::list& order_list, - const std::list& disable_list) +void Libs::load(const std::list &dicts_dirs, + const std::list &order_list, + const std::list &disable_list) { - for_each_file(dicts_dirs, ".ifo", order_list, disable_list, - [this](const std::string& url, bool disable) -> void { + for_each_file(dicts_dirs, ".ifo", order_list, disable_list, + [this](const std::string &url, bool disable) -> void { if (!disable) load_dict(url); }); } -const gchar *Libs::poGetCurrentWord(glong * iCurrent) +const gchar *Libs::poGetCurrentWord(glong *iCurrent) { - const gchar *poCurrentWord = nullptr; - const gchar *word; - for (std::vector::size_type iLib=0; iLib=narticles(iLib) || iCurrent[iLib]<0) - continue; - if ( poCurrentWord == nullptr ) { - poCurrentWord = poGetWord(iCurrent[iLib],iLib); - } else { - word = poGetWord(iCurrent[iLib],iLib); + const gchar *poCurrentWord = nullptr; + const gchar *word; + for (std::vector::size_type iLib = 0; iLib < oLib.size(); iLib++) { + if (iCurrent[iLib] == INVALID_INDEX) + continue; + if (iCurrent[iLib] >= narticles(iLib) || iCurrent[iLib] < 0) + continue; + if (poCurrentWord == nullptr) { + poCurrentWord = poGetWord(iCurrent[iLib], iLib); + } else { + word = poGetWord(iCurrent[iLib], iLib); - if (stardict_strcmp(poCurrentWord, word) > 0 ) - poCurrentWord = word; + if (stardict_strcmp(poCurrentWord, word) > 0) + poCurrentWord = word; + } } - } - return poCurrentWord; + return poCurrentWord; } const gchar *Libs::poGetNextWord(const gchar *sWord, glong *iCurrent) { - // the input can be: - // (word,iCurrent),read word,write iNext to iCurrent,and return next word. used by TopWin::NextCallback(); - // (nullptr,iCurrent),read iCurrent,write iNext to iCurrent,and return next word. used by AppCore::ListWords(); - const gchar *poCurrentWord = nullptr; + // the input can be: + // (word,iCurrent),read word,write iNext to iCurrent,and return next word. used by TopWin::NextCallback(); + // (nullptr,iCurrent),read iCurrent,write iNext to iCurrent,and return next word. used by AppCore::ListWords(); + const gchar *poCurrentWord = nullptr; size_t iCurrentLib = 0; - const gchar *word; + const gchar *word; - for (size_t iLib = 0; iLib < oLib.size(); ++iLib) { - if (sWord) - oLib[iLib]->Lookup(sWord, iCurrent[iLib]); - if (iCurrent[iLib]==INVALID_INDEX) - continue; - if (iCurrent[iLib]>=narticles(iLib) || iCurrent[iLib]<0) - continue; - if (poCurrentWord == nullptr ) { - poCurrentWord = poGetWord(iCurrent[iLib],iLib); - iCurrentLib = iLib; - } else { - word = poGetWord(iCurrent[iLib],iLib); + for (size_t iLib = 0; iLib < oLib.size(); ++iLib) { + if (sWord) + oLib[iLib]->Lookup(sWord, iCurrent[iLib]); + if (iCurrent[iLib] == INVALID_INDEX) + continue; + if (iCurrent[iLib] >= narticles(iLib) || iCurrent[iLib] < 0) + continue; + if (poCurrentWord == nullptr) { + poCurrentWord = poGetWord(iCurrent[iLib], iLib); + iCurrentLib = iLib; + } else { + word = poGetWord(iCurrent[iLib], iLib); - if (stardict_strcmp(poCurrentWord, word) > 0 ) { - poCurrentWord = word; - iCurrentLib = iLib; - } - } - } - if (poCurrentWord) { - iCurrent[iCurrentLib]++; - for (std::vector::size_type iLib=0;iLib=narticles(iLib) || iCurrent[iLib]<0) - continue; - if (strcmp(poCurrentWord, poGetWord(iCurrent[iLib],iLib)) == 0 ) - iCurrent[iLib]++; - } - poCurrentWord = poGetCurrentWord(iCurrent); - } - return poCurrentWord; + if (stardict_strcmp(poCurrentWord, word) > 0) { + poCurrentWord = word; + iCurrentLib = iLib; + } + } + } + if (poCurrentWord) { + iCurrent[iCurrentLib]++; + for (std::vector::size_type iLib = 0; iLib < oLib.size(); iLib++) { + if (iLib == iCurrentLib) + continue; + if (iCurrent[iLib] == INVALID_INDEX) + continue; + if (iCurrent[iLib] >= narticles(iLib) || iCurrent[iLib] < 0) + continue; + if (strcmp(poCurrentWord, poGetWord(iCurrent[iLib], iLib)) == 0) + iCurrent[iLib]++; + } + poCurrentWord = poGetCurrentWord(iCurrent); + } + return poCurrentWord; } - const gchar * -Libs::poGetPreWord(glong * iCurrent) +Libs::poGetPreWord(glong *iCurrent) { - // used by TopWin::PreviousCallback(); the iCurrent is cached by AppCore::TopWinWordChange(); - const gchar *poCurrentWord = nullptr; - std::vector::size_type iCurrentLib=0; - const gchar *word; + // used by TopWin::PreviousCallback(); the iCurrent is cached by AppCore::TopWinWordChange(); + const gchar *poCurrentWord = nullptr; + std::vector::size_type iCurrentLib = 0; + const gchar *word; - for (std::vector::size_type iLib=0;iLibnarticles(iLib) || iCurrent[iLib]<=0) - continue; - } - if ( poCurrentWord == nullptr ) { - poCurrentWord = poGetWord(iCurrent[iLib]-1,iLib); - iCurrentLib = iLib; - } else { - word = poGetWord(iCurrent[iLib]-1,iLib); - if (stardict_strcmp(poCurrentWord, word) < 0 ) { - poCurrentWord = word; - iCurrentLib = iLib; - } - } - } + for (std::vector::size_type iLib = 0; iLib < oLib.size(); iLib++) { + if (iCurrent[iLib] == INVALID_INDEX) + iCurrent[iLib] = narticles(iLib); + else { + if (iCurrent[iLib] > narticles(iLib) || iCurrent[iLib] <= 0) + continue; + } + if (poCurrentWord == nullptr) { + poCurrentWord = poGetWord(iCurrent[iLib] - 1, iLib); + iCurrentLib = iLib; + } else { + word = poGetWord(iCurrent[iLib] - 1, iLib); + if (stardict_strcmp(poCurrentWord, word) < 0) { + poCurrentWord = word; + iCurrentLib = iLib; + } + } + } - if (poCurrentWord) { - iCurrent[iCurrentLib]--; - for (std::vector::size_type iLib=0;iLibnarticles(iLib) || iCurrent[iLib]<=0) - continue; - if (strcmp(poCurrentWord, poGetWord(iCurrent[iLib]-1,iLib)) == 0) { - iCurrent[iLib]--; - } else { - if (iCurrent[iLib]==narticles(iLib)) - iCurrent[iLib]=INVALID_INDEX; - } - } - } - return poCurrentWord; + if (poCurrentWord) { + iCurrent[iCurrentLib]--; + for (std::vector::size_type iLib = 0; iLib < oLib.size(); iLib++) { + if (iLib == iCurrentLib) + continue; + if (iCurrent[iLib] > narticles(iLib) || iCurrent[iLib] <= 0) + continue; + if (strcmp(poCurrentWord, poGetWord(iCurrent[iLib] - 1, iLib)) == 0) { + iCurrent[iLib]--; + } else { + if (iCurrent[iLib] == narticles(iLib)) + iCurrent[iLib] = INVALID_INDEX; + } + } + } + return poCurrentWord; } -bool Libs::LookupSimilarWord(const gchar* sWord, glong & iWordIndex, int iLib) +bool Libs::LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib) { - glong iIndex; + glong iIndex; bool bFound = false; - gchar *casestr; + gchar *casestr; - if (!bFound) { - // to lower case. - casestr = g_utf8_strdown(sWord, -1); - if (strcmp(casestr, sWord)) { - if(oLib[iLib]->Lookup(casestr, iIndex)) - bFound=true; - } - g_free(casestr); - // to upper case. - if (!bFound) { - casestr = g_utf8_strup(sWord, -1); - if (strcmp(casestr, sWord)) { - if(oLib[iLib]->Lookup(casestr, iIndex)) - bFound=true; - } - g_free(casestr); - } - // Upper the first character and lower others. - if (!bFound) { - gchar *nextchar = g_utf8_next_char(sWord); - gchar *firstchar = g_utf8_strup(sWord, nextchar - sWord); - nextchar = g_utf8_strdown(nextchar, -1); - casestr = g_strdup_printf("%s%s", firstchar, nextchar); - g_free(firstchar); - g_free(nextchar); - if (strcmp(casestr, sWord)) { - if(oLib[iLib]->Lookup(casestr, iIndex)) - bFound=true; - } - g_free(casestr); - } - } + if (!bFound) { + // to lower case. + casestr = g_utf8_strdown(sWord, -1); + if (strcmp(casestr, sWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + // to upper case. + if (!bFound) { + casestr = g_utf8_strup(sWord, -1); + if (strcmp(casestr, sWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + // Upper the first character and lower others. + if (!bFound) { + gchar *nextchar = g_utf8_next_char(sWord); + gchar *firstchar = g_utf8_strup(sWord, nextchar - sWord); + nextchar = g_utf8_strdown(nextchar, -1); + casestr = g_strdup_printf("%s%s", firstchar, nextchar); + g_free(firstchar); + g_free(nextchar); + if (strcmp(casestr, sWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + } - if (bIsPureEnglish(sWord)) { - // If not Found , try other status of sWord. - int iWordLen=strlen(sWord); + if (bIsPureEnglish(sWord)) { + // If not Found , try other status of sWord. + int iWordLen = strlen(sWord); bool isupcase; - gchar *sNewWord = (gchar *)g_malloc(iWordLen + 1); + gchar *sNewWord = (gchar *)g_malloc(iWordLen + 1); - //cut one char "s" or "d" - if(!bFound && iWordLen>1) { - isupcase = sWord[iWordLen-1]=='S' || !strncmp(&sWord[iWordLen-2],"ED",2); - if (isupcase || sWord[iWordLen-1]=='s' || !strncmp(&sWord[iWordLen-2],"ed",2)) { - strcpy(sNewWord,sWord); - sNewWord[iWordLen-1]='\0'; // cut "s" or "d" - if (oLib[iLib]->Lookup(sNewWord, iIndex)) - bFound=true; - else if (isupcase || g_ascii_isupper(sWord[0])) { - casestr = g_ascii_strdown(sNewWord, -1); - if (strcmp(casestr, sNewWord)) { - if(oLib[iLib]->Lookup(casestr, iIndex)) - bFound=true; - } - g_free(casestr); - } - } - } + //cut one char "s" or "d" + if (!bFound && iWordLen > 1) { + isupcase = sWord[iWordLen - 1] == 'S' || !strncmp(&sWord[iWordLen - 2], "ED", 2); + if (isupcase || sWord[iWordLen - 1] == 's' || !strncmp(&sWord[iWordLen - 2], "ed", 2)) { + strcpy(sNewWord, sWord); + sNewWord[iWordLen - 1] = '\0'; // cut "s" or "d" + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + } + } - //cut "ly" - if(!bFound && iWordLen>2) { - isupcase = !strncmp(&sWord[iWordLen-2],"LY",2); - if (isupcase || (!strncmp(&sWord[iWordLen-2],"ly",2))) { - strcpy(sNewWord,sWord); - sNewWord[iWordLen-2]='\0'; // cut "ly" - if (iWordLen>5 && sNewWord[iWordLen-3]==sNewWord[iWordLen-4] - && !bIsVowel(sNewWord[iWordLen-4]) && - bIsVowel(sNewWord[iWordLen-5])) {//doubled + //cut "ly" + if (!bFound && iWordLen > 2) { + isupcase = !strncmp(&sWord[iWordLen - 2], "LY", 2); + if (isupcase || (!strncmp(&sWord[iWordLen - 2], "ly", 2))) { + strcpy(sNewWord, sWord); + sNewWord[iWordLen - 2] = '\0'; // cut "ly" + if (iWordLen > 5 && sNewWord[iWordLen - 3] == sNewWord[iWordLen - 4] + && !bIsVowel(sNewWord[iWordLen - 4]) && bIsVowel(sNewWord[iWordLen - 5])) { //doubled - sNewWord[iWordLen-3]='\0'; - if( oLib[iLib]->Lookup(sNewWord, iIndex) ) - bFound=true; - else { - if (isupcase || g_ascii_isupper(sWord[0])) { - casestr = g_ascii_strdown(sNewWord, -1); - if (strcmp(casestr, sNewWord)) { - if(oLib[iLib]->Lookup(casestr, iIndex)) - bFound=true; - } - g_free(casestr); - } - if (!bFound) - sNewWord[iWordLen-3]=sNewWord[iWordLen-4]; //restore - } - } - if (!bFound) { - if (oLib[iLib]->Lookup(sNewWord, iIndex)) - bFound=true; - else if (isupcase || g_ascii_isupper(sWord[0])) { - casestr = g_ascii_strdown(sNewWord, -1); - if (strcmp(casestr, sNewWord)) { - if(oLib[iLib]->Lookup(casestr, iIndex)) - bFound=true; - } - g_free(casestr); - } - } - } - } + sNewWord[iWordLen - 3] = '\0'; + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else { + if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + if (!bFound) + sNewWord[iWordLen - 3] = sNewWord[iWordLen - 4]; //restore + } + } + if (!bFound) { + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + } + } + } - //cut "ing" - if(!bFound && iWordLen>3) { - isupcase = !strncmp(&sWord[iWordLen-3],"ING",3); - if (isupcase || !strncmp(&sWord[iWordLen-3],"ing",3) ) { - strcpy(sNewWord,sWord); - sNewWord[iWordLen-3]='\0'; - if ( iWordLen>6 && (sNewWord[iWordLen-4]==sNewWord[iWordLen-5]) - && !bIsVowel(sNewWord[iWordLen-5]) && - bIsVowel(sNewWord[iWordLen-6])) { //doubled - sNewWord[iWordLen-4]='\0'; - if (oLib[iLib]->Lookup(sNewWord, iIndex)) - bFound=true; - else { - if (isupcase || g_ascii_isupper(sWord[0])) { - casestr = g_ascii_strdown(sNewWord, -1); - if (strcmp(casestr, sNewWord)) { - if(oLib[iLib]->Lookup(casestr, iIndex)) - bFound=true; - } - g_free(casestr); - } - if (!bFound) - sNewWord[iWordLen-4]=sNewWord[iWordLen-5]; //restore - } - } - if( !bFound ) { - if (oLib[iLib]->Lookup(sNewWord, iIndex)) - bFound=true; - else if (isupcase || g_ascii_isupper(sWord[0])) { - casestr = g_ascii_strdown(sNewWord, -1); - if (strcmp(casestr, sNewWord)) { - if(oLib[iLib]->Lookup(casestr, iIndex)) - bFound=true; - } - g_free(casestr); - } - } - if(!bFound) { - if (isupcase) - strcat(sNewWord,"E"); // add a char "E" - else - strcat(sNewWord,"e"); // add a char "e" - if(oLib[iLib]->Lookup(sNewWord, iIndex)) - bFound=true; - else if (isupcase || g_ascii_isupper(sWord[0])) { - casestr = g_ascii_strdown(sNewWord, -1); - if (strcmp(casestr, sNewWord)) { - if(oLib[iLib]->Lookup(casestr, iIndex)) - bFound=true; - } - g_free(casestr); - } - } - } - } + //cut "ing" + if (!bFound && iWordLen > 3) { + isupcase = !strncmp(&sWord[iWordLen - 3], "ING", 3); + if (isupcase || !strncmp(&sWord[iWordLen - 3], "ing", 3)) { + strcpy(sNewWord, sWord); + sNewWord[iWordLen - 3] = '\0'; + if (iWordLen > 6 && (sNewWord[iWordLen - 4] == sNewWord[iWordLen - 5]) + && !bIsVowel(sNewWord[iWordLen - 5]) && bIsVowel(sNewWord[iWordLen - 6])) { //doubled + sNewWord[iWordLen - 4] = '\0'; + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else { + if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + if (!bFound) + sNewWord[iWordLen - 4] = sNewWord[iWordLen - 5]; //restore + } + } + if (!bFound) { + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + } + if (!bFound) { + if (isupcase) + strcat(sNewWord, "E"); // add a char "E" + else + strcat(sNewWord, "e"); // add a char "e" + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + } + } + } - //cut two char "es" - if(!bFound && iWordLen>3) { - isupcase = (!strncmp(&sWord[iWordLen-2],"ES",2) && - (sWord[iWordLen-3] == 'S' || - sWord[iWordLen-3] == 'X' || - sWord[iWordLen-3] == 'O' || - (iWordLen >4 && sWord[iWordLen-3] == 'H' && - (sWord[iWordLen-4] == 'C' || - sWord[iWordLen-4] == 'S')))); - if (isupcase || - (!strncmp(&sWord[iWordLen-2],"es",2) && - (sWord[iWordLen-3] == 's' || sWord[iWordLen-3] == 'x' || - sWord[iWordLen-3] == 'o' || - (iWordLen >4 && sWord[iWordLen-3] == 'h' && - (sWord[iWordLen-4] == 'c' || sWord[iWordLen-4] == 's'))))) { - strcpy(sNewWord,sWord); - sNewWord[iWordLen-2]='\0'; - if(oLib[iLib]->Lookup(sNewWord, iIndex)) - bFound=true; - else if (isupcase || g_ascii_isupper(sWord[0])) { - casestr = g_ascii_strdown(sNewWord, -1); - if (strcmp(casestr, sNewWord)) { - if(oLib[iLib]->Lookup(casestr, iIndex)) - bFound=true; - } - g_free(casestr); - } - } - } + //cut two char "es" + if (!bFound && iWordLen > 3) { + isupcase = (!strncmp(&sWord[iWordLen - 2], "ES", 2) && (sWord[iWordLen - 3] == 'S' || sWord[iWordLen - 3] == 'X' || sWord[iWordLen - 3] == 'O' || (iWordLen > 4 && sWord[iWordLen - 3] == 'H' && (sWord[iWordLen - 4] == 'C' || sWord[iWordLen - 4] == 'S')))); + if (isupcase || (!strncmp(&sWord[iWordLen - 2], "es", 2) && (sWord[iWordLen - 3] == 's' || sWord[iWordLen - 3] == 'x' || sWord[iWordLen - 3] == 'o' || (iWordLen > 4 && sWord[iWordLen - 3] == 'h' && (sWord[iWordLen - 4] == 'c' || sWord[iWordLen - 4] == 's'))))) { + strcpy(sNewWord, sWord); + sNewWord[iWordLen - 2] = '\0'; + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + } + } - //cut "ed" - if (!bFound && iWordLen>3) { - isupcase = !strncmp(&sWord[iWordLen-2],"ED",2); - if (isupcase || !strncmp(&sWord[iWordLen-2],"ed",2)) { - strcpy(sNewWord,sWord); - sNewWord[iWordLen-2]='\0'; - if (iWordLen>5 && (sNewWord[iWordLen-3]==sNewWord[iWordLen-4]) - && !bIsVowel(sNewWord[iWordLen-4]) && - bIsVowel(sNewWord[iWordLen-5])) {//doubled - sNewWord[iWordLen-3]='\0'; - if (oLib[iLib]->Lookup(sNewWord, iIndex)) - bFound=true; - else { - if (isupcase || g_ascii_isupper(sWord[0])) { - casestr = g_ascii_strdown(sNewWord, -1); - if (strcmp(casestr, sNewWord)) { - if(oLib[iLib]->Lookup(casestr, iIndex)) - bFound=true; - } - g_free(casestr); - } - if (!bFound) - sNewWord[iWordLen-3]=sNewWord[iWordLen-4]; //restore - } - } - if (!bFound) { - if (oLib[iLib]->Lookup(sNewWord, iIndex)) - bFound=true; - else if (isupcase || g_ascii_isupper(sWord[0])) { - casestr = g_ascii_strdown(sNewWord, -1); - if (strcmp(casestr, sNewWord)) { - if(oLib[iLib]->Lookup(casestr, iIndex)) - bFound=true; - } - g_free(casestr); - } - } - } - } + //cut "ed" + if (!bFound && iWordLen > 3) { + isupcase = !strncmp(&sWord[iWordLen - 2], "ED", 2); + if (isupcase || !strncmp(&sWord[iWordLen - 2], "ed", 2)) { + strcpy(sNewWord, sWord); + sNewWord[iWordLen - 2] = '\0'; + if (iWordLen > 5 && (sNewWord[iWordLen - 3] == sNewWord[iWordLen - 4]) + && !bIsVowel(sNewWord[iWordLen - 4]) && bIsVowel(sNewWord[iWordLen - 5])) { //doubled + sNewWord[iWordLen - 3] = '\0'; + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else { + if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + if (!bFound) + sNewWord[iWordLen - 3] = sNewWord[iWordLen - 4]; //restore + } + } + if (!bFound) { + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + } + } + } - // cut "ied" , add "y". - if (!bFound && iWordLen>3) { - isupcase = !strncmp(&sWord[iWordLen-3],"IED",3); - if (isupcase || (!strncmp(&sWord[iWordLen-3],"ied",3))) { - strcpy(sNewWord,sWord); - sNewWord[iWordLen-3]='\0'; - if (isupcase) - strcat(sNewWord,"Y"); // add a char "Y" - else - strcat(sNewWord,"y"); // add a char "y" - if (oLib[iLib]->Lookup(sNewWord, iIndex)) - bFound=true; - else if (isupcase || g_ascii_isupper(sWord[0])) { - casestr = g_ascii_strdown(sNewWord, -1); - if (strcmp(casestr, sNewWord)) { - if(oLib[iLib]->Lookup(casestr, iIndex)) - bFound=true; - } - g_free(casestr); - } - } - } + // cut "ied" , add "y". + if (!bFound && iWordLen > 3) { + isupcase = !strncmp(&sWord[iWordLen - 3], "IED", 3); + if (isupcase || (!strncmp(&sWord[iWordLen - 3], "ied", 3))) { + strcpy(sNewWord, sWord); + sNewWord[iWordLen - 3] = '\0'; + if (isupcase) + strcat(sNewWord, "Y"); // add a char "Y" + else + strcat(sNewWord, "y"); // add a char "y" + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + } + } - // cut "ies" , add "y". - if (!bFound && iWordLen>3) { - isupcase = !strncmp(&sWord[iWordLen-3],"IES",3); - if (isupcase || (!strncmp(&sWord[iWordLen-3],"ies",3))) { - strcpy(sNewWord,sWord); - sNewWord[iWordLen-3]='\0'; - if (isupcase) - strcat(sNewWord,"Y"); // add a char "Y" - else - strcat(sNewWord,"y"); // add a char "y" - if(oLib[iLib]->Lookup(sNewWord, iIndex)) - bFound=true; - else if (isupcase || g_ascii_isupper(sWord[0])) { - casestr = g_ascii_strdown(sNewWord, -1); - if (strcmp(casestr, sNewWord)) { - if(oLib[iLib]->Lookup(casestr, iIndex)) - bFound=true; - } - g_free(casestr); - } - } - } + // cut "ies" , add "y". + if (!bFound && iWordLen > 3) { + isupcase = !strncmp(&sWord[iWordLen - 3], "IES", 3); + if (isupcase || (!strncmp(&sWord[iWordLen - 3], "ies", 3))) { + strcpy(sNewWord, sWord); + sNewWord[iWordLen - 3] = '\0'; + if (isupcase) + strcat(sNewWord, "Y"); // add a char "Y" + else + strcat(sNewWord, "y"); // add a char "y" + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + } + } - // cut "er". - if (!bFound && iWordLen>2) { - isupcase = !strncmp(&sWord[iWordLen-2],"ER",2); - if (isupcase || (!strncmp(&sWord[iWordLen-2],"er",2))) { - strcpy(sNewWord,sWord); - sNewWord[iWordLen-2]='\0'; - if(oLib[iLib]->Lookup(sNewWord, iIndex)) - bFound=true; - else if (isupcase || g_ascii_isupper(sWord[0])) { - casestr = g_ascii_strdown(sNewWord, -1); - if (strcmp(casestr, sNewWord)) { - if(oLib[iLib]->Lookup(casestr, iIndex)) - bFound=true; - } - g_free(casestr); - } - } - } + // cut "er". + if (!bFound && iWordLen > 2) { + isupcase = !strncmp(&sWord[iWordLen - 2], "ER", 2); + if (isupcase || (!strncmp(&sWord[iWordLen - 2], "er", 2))) { + strcpy(sNewWord, sWord); + sNewWord[iWordLen - 2] = '\0'; + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + } + } - // cut "est". - if (!bFound && iWordLen>3) { - isupcase = !strncmp(&sWord[iWordLen-3], "EST", 3); - if (isupcase || (!strncmp(&sWord[iWordLen-3],"est", 3))) { - strcpy(sNewWord,sWord); - sNewWord[iWordLen-3]='\0'; - if(oLib[iLib]->Lookup(sNewWord, iIndex)) - bFound=true; - else if (isupcase || g_ascii_isupper(sWord[0])) { - casestr = g_ascii_strdown(sNewWord, -1); - if (strcmp(casestr, sNewWord)) { - if(oLib[iLib]->Lookup(casestr, iIndex)) - bFound=true; - } - g_free(casestr); - } - } - } + // cut "est". + if (!bFound && iWordLen > 3) { + isupcase = !strncmp(&sWord[iWordLen - 3], "EST", 3); + if (isupcase || (!strncmp(&sWord[iWordLen - 3], "est", 3))) { + strcpy(sNewWord, sWord); + sNewWord[iWordLen - 3] = '\0'; + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + } + } - g_free(sNewWord); - } + g_free(sNewWord); + } - if (bFound) - iWordIndex = iIndex; + if (bFound) + iWordIndex = iIndex; #if 0 else { //don't change iWordIndex here. @@ -1398,254 +1391,252 @@ bool Libs::LookupSimilarWord(const gchar* sWord, glong & iWordIndex, int iLib) //iWordIndex = INVALID_INDEX; } #endif - return bFound; + return bFound; } -bool Libs::SimpleLookupWord(const gchar* sWord, glong & iWordIndex, int iLib) +bool Libs::SimpleLookupWord(const gchar *sWord, glong &iWordIndex, int iLib) { bool bFound = oLib[iLib]->Lookup(sWord, iWordIndex); - if (!bFound && fuzzy_) - bFound = LookupSimilarWord(sWord, iWordIndex, iLib); - return bFound; + if (!bFound && fuzzy_) + bFound = LookupSimilarWord(sWord, iWordIndex, iLib); + return bFound; } bool Libs::LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_size) { - if (sWord[0] == '\0') - return false; + if (sWord[0] == '\0') + return false; - Fuzzystruct oFuzzystruct[reslist_size]; + Fuzzystruct oFuzzystruct[reslist_size]; - for (int i = 0; i < reslist_size; i++) { - oFuzzystruct[i].pMatchWord = nullptr; - oFuzzystruct[i].iMatchWordDistance = iMaxFuzzyDistance; - } - int iMaxDistance = iMaxFuzzyDistance; - int iDistance; - bool Found = false; - EditDistance oEditDistance; + for (int i = 0; i < reslist_size; i++) { + oFuzzystruct[i].pMatchWord = nullptr; + oFuzzystruct[i].iMatchWordDistance = iMaxFuzzyDistance; + } + int iMaxDistance = iMaxFuzzyDistance; + int iDistance; + bool Found = false; + EditDistance oEditDistance; - glong iCheckWordLen; - const char *sCheck; - gunichar *ucs4_str1, *ucs4_str2; - glong ucs4_str2_len; + glong iCheckWordLen; + const char *sCheck; + gunichar *ucs4_str1, *ucs4_str2; + glong ucs4_str2_len; - ucs4_str2 = g_utf8_to_ucs4_fast(sWord, -1, &ucs4_str2_len); - unicode_strdown(ucs4_str2); + ucs4_str2 = g_utf8_to_ucs4_fast(sWord, -1, &ucs4_str2_len); + unicode_strdown(ucs4_str2); - for (size_t iLib = 0; iLib < oLib.size(); ++iLib) { - if (progress_func) - progress_func(); + for (size_t iLib = 0; iLib < oLib.size(); ++iLib) { + if (progress_func) + progress_func(); - //if (stardict_strcmp(sWord, poGetWord(0,iLib))>=0 && stardict_strcmp(sWord, poGetWord(narticles(iLib)-1,iLib))<=0) { - //there are Chinese dicts and English dicts... + //if (stardict_strcmp(sWord, poGetWord(0,iLib))>=0 && stardict_strcmp(sWord, poGetWord(narticles(iLib)-1,iLib))<=0) { + //there are Chinese dicts and English dicts... - const int iwords = narticles(iLib); - for (int index=0; index=iMaxDistance || - ucs4_str2_len-iCheckWordLen>=iMaxDistance) - continue; - ucs4_str1 = g_utf8_to_ucs4_fast(sCheck, -1, nullptr); - if (iCheckWordLen > ucs4_str2_len) - ucs4_str1[ucs4_str2_len]=0; - unicode_strdown(ucs4_str1); + const int iwords = narticles(iLib); + for (int index = 0; index < iwords; index++) { + sCheck = poGetWord(index, iLib); + // tolower and skip too long or too short words + iCheckWordLen = g_utf8_strlen(sCheck, -1); + if (iCheckWordLen - ucs4_str2_len >= iMaxDistance || ucs4_str2_len - iCheckWordLen >= iMaxDistance) + continue; + ucs4_str1 = g_utf8_to_ucs4_fast(sCheck, -1, nullptr); + if (iCheckWordLen > ucs4_str2_len) + ucs4_str1[ucs4_str2_len] = 0; + unicode_strdown(ucs4_str1); - iDistance = oEditDistance.CalEditDistance(ucs4_str1, ucs4_str2, iMaxDistance); - g_free(ucs4_str1); - if (iDistance iMaxDistance) - iMaxDistance = oFuzzystruct[j].iMatchWordDistance; - } // calc new iMaxDistance - } // add to list - } // find one - } // each word + if (!bAlreadyInList) { + if (oFuzzystruct[iMaxDistanceAt].pMatchWord) + g_free(oFuzzystruct[iMaxDistanceAt].pMatchWord); + oFuzzystruct[iMaxDistanceAt].pMatchWord = g_strdup(sCheck); + oFuzzystruct[iMaxDistanceAt].iMatchWordDistance = iDistance; + // calc new iMaxDistance + iMaxDistance = iDistance; + for (int j = 0; j < reslist_size; j++) { + if (oFuzzystruct[j].iMatchWordDistance > iMaxDistance) + iMaxDistance = oFuzzystruct[j].iMatchWordDistance; + } // calc new iMaxDistance + } // add to list + } // find one + } // each word - } // each lib - g_free(ucs4_str2); + } // each lib + g_free(ucs4_str2); - if (Found)// sort with distance - std::sort(oFuzzystruct, oFuzzystruct + reslist_size, [](const Fuzzystruct& lh, const Fuzzystruct& rh) -> bool { - if (lh.iMatchWordDistance!=rh.iMatchWordDistance) - return lh.iMatchWordDistance bool { + if (lh.iMatchWordDistance != rh.iMatchWordDistance) + return lh.iMatchWordDistance < rh.iMatchWordDistance; - if (lh.pMatchWord && rh.pMatchWord) - return stardict_strcmp(lh.pMatchWord, rh.pMatchWord)<0; + if (lh.pMatchWord && rh.pMatchWord) + return stardict_strcmp(lh.pMatchWord, rh.pMatchWord) < 0; - return false; - }); + return false; + }); - for (gint i = 0; i < reslist_size; ++i) - reslist[i] = oFuzzystruct[i].pMatchWord; + for (gint i = 0; i < reslist_size; ++i) + reslist[i] = oFuzzystruct[i].pMatchWord; - return Found; + return Found; } gint Libs::LookupWithRule(const gchar *word, gchar **ppMatchWord) { - glong aiIndex[MAX_MATCH_ITEM_PER_LIB+1]; - gint iMatchCount = 0; - GPatternSpec *pspec = g_pattern_spec_new(word); + glong aiIndex[MAX_MATCH_ITEM_PER_LIB + 1]; + gint iMatchCount = 0; + GPatternSpec *pspec = g_pattern_spec_new(word); - for (std::vector::size_type iLib=0; iLib::size_type iLib = 0; iLib < oLib.size(); iLib++) { + //if(oLibs.LookdupWordsWithRule(pspec,aiIndex,MAX_MATCH_ITEM_PER_LIB+1-iMatchCount,iLib)) + // -iMatchCount,so save time,but may got less result and the word may repeat. - if (oLib[iLib]->LookupWithRule(pspec,aiIndex, MAX_MATCH_ITEM_PER_LIB+1)) { - if (progress_func) - progress_func(); - for (int i=0; aiIndex[i]!=-1; i++) { - const gchar * sMatchWord = poGetWord(aiIndex[i],iLib); - bool bAlreadyInList = false; - for (int j=0; jLookupWithRule(pspec, aiIndex, MAX_MATCH_ITEM_PER_LIB + 1)) { + if (progress_func) + progress_func(); + for (int i = 0; aiIndex[i] != -1; i++) { + const gchar *sMatchWord = poGetWord(aiIndex[i], iLib); + bool bAlreadyInList = false; + for (int j = 0; j < iMatchCount; j++) { + if (strcmp(ppMatchWord[j], sMatchWord) == 0) { //already in list + bAlreadyInList = true; + break; + } + } + if (!bAlreadyInList) + ppMatchWord[iMatchCount++] = g_strdup(sMatchWord); + } + } } - } - g_pattern_spec_free(pspec); + g_pattern_spec_free(pspec); - if (iMatchCount)// sort it. - std::sort(ppMatchWord, ppMatchWord+iMatchCount, [](const char *lh, const char *rh) -> bool { - return stardict_strcmp(lh, rh)<0; - }); + if (iMatchCount) // sort it. + std::sort(ppMatchWord, ppMatchWord + iMatchCount, [](const char *lh, const char *rh) -> bool { + return stardict_strcmp(lh, rh) < 0; + }); - return iMatchCount; + return iMatchCount; } bool Libs::LookupData(const gchar *sWord, std::vector *reslist) { - std::vector SearchWords; - std::string SearchWord; - const char *p=sWord; - while (*p) { - if (*p=='\\') { - p++; - switch (*p) { - case ' ': - SearchWord+=' '; - break; - case '\\': - SearchWord+='\\'; - break; - case 't': - SearchWord+='\t'; - break; - case 'n': - SearchWord+='\n'; - break; - default: - SearchWord+=*p; - } - } else if (*p == ' ') { - if (!SearchWord.empty()) { - SearchWords.push_back(SearchWord); - SearchWord.clear(); - } - } else { - SearchWord+=*p; - } - p++; + std::vector SearchWords; + std::string SearchWord; + const char *p = sWord; + while (*p) { + if (*p == '\\') { + p++; + switch (*p) { + case ' ': + SearchWord += ' '; + break; + case '\\': + SearchWord += '\\'; + break; + case 't': + SearchWord += '\t'; + break; + case 'n': + SearchWord += '\n'; + break; + default: + SearchWord += *p; + } + } else if (*p == ' ') { + if (!SearchWord.empty()) { + SearchWords.push_back(SearchWord); + SearchWord.clear(); + } + } else { + SearchWord += *p; } - if (!SearchWord.empty()) { - SearchWords.push_back(SearchWord); - SearchWord.clear(); - } - if (SearchWords.empty()) - return false; + p++; + } + if (!SearchWord.empty()) { + SearchWords.push_back(SearchWord); + SearchWord.clear(); + } + if (SearchWords.empty()) + return false; - guint32 max_size =0; - gchar *origin_data = nullptr; - for (std::vector::size_type i=0; icontainSearchData()) - continue; - if (progress_func) - progress_func(); - const gulong iwords = narticles(i); - const gchar *key; - guint32 offset, size; - for (gulong j=0; jget_key_and_data(j, &key, &offset, &size); - if (size>max_size) { - origin_data = (gchar *)g_realloc(origin_data, size); - max_size = size; - } - if (oLib[i]->SearchData(SearchWords, offset, size, origin_data)) - reslist[i].push_back(g_strdup(key)); - } - } - g_free(origin_data); + guint32 max_size = 0; + gchar *origin_data = nullptr; + for (std::vector::size_type i = 0; i < oLib.size(); ++i) { + if (!oLib[i]->containSearchData()) + continue; + if (progress_func) + progress_func(); + const gulong iwords = narticles(i); + const gchar *key; + guint32 offset, size; + for (gulong j = 0; j < iwords; ++j) { + oLib[i]->get_key_and_data(j, &key, &offset, &size); + if (size > max_size) { + origin_data = (gchar *)g_realloc(origin_data, size); + max_size = size; + } + if (oLib[i]->SearchData(SearchWords, offset, size, origin_data)) + reslist[i].push_back(g_strdup(key)); + } + } + g_free(origin_data); - std::vector::size_type i; - for (i = 0; i < oLib.size(); ++i) - if (!reslist[i].empty()) - break; + std::vector::size_type i; + for (i = 0; i < oLib.size(); ++i) + if (!reslist[i].empty()) + break; - return i != oLib.size(); + return i != oLib.size(); } /**************************************************/ -query_t analyze_query(const char *s, std::string& res) +query_t analyze_query(const char *s, std::string &res) { - if (!s || !*s) { - res=""; - return qtSIMPLE; - } - if (*s=='/') { - res=s+1; - return qtFUZZY; - } + if (!s || !*s) { + res = ""; + return qtSIMPLE; + } + if (*s == '/') { + res = s + 1; + return qtFUZZY; + } - if (*s=='|') { - res=s+1; - return qtDATA; - } + if (*s == '|') { + res = s + 1; + return qtDATA; + } - bool regexp=false; - const char *p=s; - res=""; - for (; *p; res+=*p, ++p) { - if (*p=='\\') { - ++p; - if (!*p) - break; - continue; - } - if (*p=='*' || *p=='?') - regexp=true; - } - if (regexp) - return qtREGEXP; + bool regexp = false; + const char *p = s; + res = ""; + for (; *p; res += *p, ++p) { + if (*p == '\\') { + ++p; + if (!*p) + break; + continue; + } + if (*p == '*' || *p == '?') + regexp = true; + } + if (regexp) + return qtREGEXP; - return qtSIMPLE; + return qtSIMPLE; } diff --git a/src/stardict_lib.hpp b/src/stardict_lib.hpp index abcbe56..a629cbe 100644 --- a/src/stardict_lib.hpp +++ b/src/stardict_lib.hpp @@ -2,17 +2,17 @@ #include #include +#include #include +#include #include #include #include -#include -#include #include "dictziplib.hpp" -const int MAX_MATCH_ITEM_PER_LIB=100; -const int MAX_FUZZY_DISTANCE= 3; // at most MAX_FUZZY_DISTANCE-1 differences allowed when find similar words +const int MAX_MATCH_ITEM_PER_LIB = 100; +const int MAX_FUZZY_DISTANCE = 3; // at most MAX_FUZZY_DISTANCE-1 differences allowed when find similar words inline guint32 get_uint32(const gchar *addr) { @@ -26,172 +26,190 @@ inline void set_uint32(gchar *addr, guint32 val) memcpy(addr, &val, sizeof(guint32)); } - struct cacheItem { - guint32 offset; - gchar *data; - //write code here to make it inline - cacheItem() { data = nullptr;} - ~cacheItem() { g_free(data); } + guint32 offset; + gchar *data; + //write code here to make it inline + cacheItem() { data = nullptr; } + ~cacheItem() { g_free(data); } }; const int WORDDATA_CACHE_NUM = 10; -const int INVALID_INDEX=-100; +const int INVALID_INDEX = -100; -class DictBase { +class DictBase +{ public: - DictBase() {} - ~DictBase() { + DictBase() {} + ~DictBase() + { if (dictfile) fclose(dictfile); } - DictBase(const DictBase&) = delete; - DictBase& operator=(const DictBase&) = delete; - gchar * GetWordData(guint32 idxitem_offset, guint32 idxitem_size); - bool containSearchData() const { + DictBase(const DictBase &) = delete; + DictBase &operator=(const DictBase &) = delete; + gchar *GetWordData(guint32 idxitem_offset, guint32 idxitem_size); + bool containSearchData() const + { if (sametypesequence.empty()) return true; return sametypesequence.find_first_of("mlgxty") != std::string::npos; } - bool SearchData(std::vector &SearchWords, guint32 idxitem_offset, guint32 idxitem_size, gchar *origin_data); + bool SearchData(std::vector &SearchWords, guint32 idxitem_offset, guint32 idxitem_size, gchar *origin_data); + protected: - std::string sametypesequence; - FILE *dictfile = nullptr; - std::unique_ptr dictdzfile; + std::string sametypesequence; + FILE *dictfile = nullptr; + std::unique_ptr dictdzfile; + private: - cacheItem cache[WORDDATA_CACHE_NUM]; - gint cache_cur = 0; + cacheItem cache[WORDDATA_CACHE_NUM]; + gint cache_cur = 0; }; //this structure contain all information about dictionary struct DictInfo { - std::string ifo_file_name; - guint32 wordcount; - guint32 syn_wordcount; - std::string bookname; - std::string author; - std::string email; - std::string website; - std::string date; - std::string description; - guint32 index_file_size; - guint32 syn_file_size; - std::string sametypesequence; + std::string ifo_file_name; + guint32 wordcount; + guint32 syn_wordcount; + std::string bookname; + std::string author; + std::string email; + std::string website; + std::string date; + std::string description; + guint32 index_file_size; + guint32 syn_file_size; + std::string sametypesequence; - bool load_from_ifo_file(const std::string& ifofilename, bool istreedict); + bool load_from_ifo_file(const std::string &ifofilename, bool istreedict); }; -class IIndexFile { +class IIndexFile +{ public: - guint32 wordentry_offset; - guint32 wordentry_size; + guint32 wordentry_offset; + guint32 wordentry_size; - virtual ~IIndexFile() {} - virtual bool load(const std::string& url, gulong wc, gulong fsize, bool verbose) = 0; - virtual const gchar *get_key(glong idx) = 0; - virtual void get_data(glong idx) = 0; - virtual const gchar *get_key_and_data(glong idx) = 0; - virtual bool lookup(const char *str, glong &idx) = 0; + virtual ~IIndexFile() {} + virtual bool load(const std::string &url, gulong wc, gulong fsize, bool verbose) = 0; + virtual const gchar *get_key(glong idx) = 0; + virtual void get_data(glong idx) = 0; + virtual const gchar *get_key_and_data(glong idx) = 0; + virtual bool lookup(const char *str, glong &idx) = 0; }; -class SynFile { +class SynFile +{ public: - bool load(const std::string& url, gulong wc); - bool lookup(const char *str, glong &idx); + bool load(const std::string &url, gulong wc); + bool lookup(const char *str, glong &idx); + private: - std::map synonyms; + std::map synonyms; }; -class Dict : public DictBase { +class Dict : public DictBase +{ public: - Dict() {} - Dict(const Dict&) = delete; - Dict& operator=(const Dict&) = delete; - bool load(const std::string& ifofilename, bool verbose); + Dict() {} + Dict(const Dict &) = delete; + Dict &operator=(const Dict &) = delete; + bool load(const std::string &ifofilename, bool verbose); - gulong narticles() const { return wordcount; } - const std::string& dict_name() const { return bookname; } - const std::string& ifofilename() const { return ifo_file_name; } + gulong narticles() const { return wordcount; } + const std::string &dict_name() const { return bookname; } + const std::string &ifofilename() const { return ifo_file_name; } - const gchar *get_key(glong index) { return idx_file->get_key(index); } - gchar *get_data(glong index) { + const gchar *get_key(glong index) { return idx_file->get_key(index); } + gchar *get_data(glong index) + { idx_file->get_data(index); return DictBase::GetWordData(idx_file->wordentry_offset, idx_file->wordentry_size); } - void get_key_and_data(glong index, const gchar **key, guint32 *offset, guint32 *size) { + void get_key_and_data(glong index, const gchar **key, guint32 *offset, guint32 *size) + { *key = idx_file->get_key_and_data(index); *offset = idx_file->wordentry_offset; *size = idx_file->wordentry_size; } - bool Lookup(const char *str, glong &idx); + bool Lookup(const char *str, glong &idx); - bool LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen); -private: - std::string ifo_file_name; - gulong wordcount; - gulong syn_wordcount; - std::string bookname; + bool LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen); - std::unique_ptr idx_file; - std::unique_ptr syn_file; - - bool load_ifofile(const std::string& ifofilename, gulong &idxfilesize); -}; - -class Libs { -public: - Libs(std::function f = std::function()) { - progress_func = f; - iMaxFuzzyDistance = MAX_FUZZY_DISTANCE; //need to read from cfg. - } - void setVerbose(bool verbose) { verbose_ = verbose; } - void setFuzzy(bool fuzzy) { fuzzy_ = fuzzy; } - ~Libs(); - Libs(const Libs&) = delete; - Libs& operator=(const Libs&) = delete; - - void load_dict(const std::string& url); - void load(const std::list& dicts_dirs, - const std::list& order_list, - const std::list& disable_list); - glong narticles(int idict) const { return oLib[idict]->narticles(); } - const std::string& dict_name(int idict) const { return oLib[idict]->dict_name(); } - gint ndicts() const { return oLib.size(); } - - const gchar *poGetWord(glong iIndex, int iLib) { - return oLib[iLib]->get_key(iIndex); - } - gchar * poGetWordData(glong iIndex,int iLib) { - if (iIndex == INVALID_INDEX) - return nullptr; - return oLib[iLib]->get_data(iIndex); - } - const gchar *poGetCurrentWord(glong *iCurrent); - const gchar *poGetNextWord(const gchar *word, glong *iCurrent); - const gchar *poGetPreWord(glong *iCurrent); - bool LookupWord(const gchar* sWord, glong& iWordIndex, int iLib) { - return oLib[iLib]->Lookup(sWord, iWordIndex); - } - bool LookupSimilarWord(const gchar* sWord, glong & iWordIndex, int iLib); - bool SimpleLookupWord(const gchar* sWord, glong & iWordIndex, int iLib); - - - bool LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_size); - gint LookupWithRule(const gchar *sWord, gchar *reslist[]); - bool LookupData(const gchar *sWord, std::vector *reslist); -protected: - bool fuzzy_; private: - std::vector oLib; // word Libs. - int iMaxFuzzyDistance; - std::function progress_func; - bool verbose_; + std::string ifo_file_name; + gulong wordcount; + gulong syn_wordcount; + std::string bookname; + + std::unique_ptr idx_file; + std::unique_ptr syn_file; + + bool load_ifofile(const std::string &ifofilename, gulong &idxfilesize); }; +class Libs +{ +public: + Libs(std::function f = std::function()) + { + progress_func = f; + iMaxFuzzyDistance = MAX_FUZZY_DISTANCE; //need to read from cfg. + } + void setVerbose(bool verbose) { verbose_ = verbose; } + void setFuzzy(bool fuzzy) { fuzzy_ = fuzzy; } + ~Libs(); + Libs(const Libs &) = delete; + Libs &operator=(const Libs &) = delete; + + void load_dict(const std::string &url); + void load(const std::list &dicts_dirs, + const std::list &order_list, + const std::list &disable_list); + glong narticles(int idict) const { return oLib[idict]->narticles(); } + const std::string &dict_name(int idict) const { return oLib[idict]->dict_name(); } + gint ndicts() const { return oLib.size(); } + + const gchar *poGetWord(glong iIndex, int iLib) + { + return oLib[iLib]->get_key(iIndex); + } + gchar *poGetWordData(glong iIndex, int iLib) + { + if (iIndex == INVALID_INDEX) + return nullptr; + return oLib[iLib]->get_data(iIndex); + } + const gchar *poGetCurrentWord(glong *iCurrent); + const gchar *poGetNextWord(const gchar *word, glong *iCurrent); + const gchar *poGetPreWord(glong *iCurrent); + bool LookupWord(const gchar *sWord, glong &iWordIndex, int iLib) + { + return oLib[iLib]->Lookup(sWord, iWordIndex); + } + bool LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib); + bool SimpleLookupWord(const gchar *sWord, glong &iWordIndex, int iLib); + + bool LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_size); + gint LookupWithRule(const gchar *sWord, gchar *reslist[]); + bool LookupData(const gchar *sWord, std::vector *reslist); + +protected: + bool fuzzy_; + +private: + std::vector oLib; // word Libs. + int iMaxFuzzyDistance; + std::function progress_func; + bool verbose_; +}; enum query_t { - qtSIMPLE, qtREGEXP, qtFUZZY, qtDATA + qtSIMPLE, + qtREGEXP, + qtFUZZY, + qtDATA }; - -extern query_t analyze_query(const char *s, std::string& res); +extern query_t analyze_query(const char *s, std::string &res); diff --git a/src/utils.cpp b/src/utils.cpp index 211110c..33bfeaa 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -19,100 +19,113 @@ */ #ifdef HAVE_CONFIG_H -# include "config.h" +#include "config.h" #endif +#include +#include +#include #include #include -#include -#include -#include -#include #include +#include #include "utils.hpp" -std::string utf8_to_locale_ign_err(const std::string& utf8_str) +std::string utf8_to_locale_ign_err(const std::string &utf8_str) { - std::string res; + std::string res; - const char *charset; - if (g_get_charset(&charset)) - res = utf8_str; - else { + const char *charset; + if (g_get_charset(&charset)) + res = utf8_str; + else { gsize bytes_read, bytes_written; glib::Error err; glib::CharStr tmp(g_convert_with_fallback(utf8_str.c_str(), -1, charset, "UTF-8", nullptr, &bytes_read, &bytes_written, get_addr(err))); - if (nullptr == get_impl(tmp)){ - fprintf(stderr, _("Can not convert %s to current locale.\n"), utf8_str.c_str()); - fprintf(stderr, "%s\n", err->message); - exit(EXIT_FAILURE); - } - res = get_impl(tmp); - } + if (nullptr == get_impl(tmp)) { + fprintf(stderr, _("Can not convert %s to current locale.\n"), utf8_str.c_str()); + fprintf(stderr, "%s\n", err->message); + exit(EXIT_FAILURE); + } + res = get_impl(tmp); + } - return res; + return res; } -static void __for_each_file(const std::string& dirname, const std::string& suff, - const std::list& order_list, const std::list& disable_list, - const std::function& f) +static void __for_each_file(const std::string &dirname, const std::string &suff, + const std::list &order_list, const std::list &disable_list, + const std::function &f) { - GDir *dir = g_dir_open(dirname.c_str(), 0, nullptr); + GDir *dir = g_dir_open(dirname.c_str(), 0, nullptr); if (dir) { - const gchar *filename; + const gchar *filename; - while ((filename = g_dir_read_name(dir))!=nullptr) { - const std::string fullfilename(dirname+G_DIR_SEPARATOR_S+filename); - if (g_file_test(fullfilename.c_str(), G_FILE_TEST_IS_DIR)) - __for_each_file(fullfilename, suff, order_list, disable_list, f); - else if (g_str_has_suffix(filename, suff.c_str()) && - std::find(order_list.begin(), order_list.end(), - fullfilename)==order_list.end()) { - const bool disable = std::find(disable_list.begin(), - disable_list.end(), - fullfilename)!=disable_list.end(); + while ((filename = g_dir_read_name(dir)) != nullptr) { + const std::string fullfilename(dirname + G_DIR_SEPARATOR_S + filename); + if (g_file_test(fullfilename.c_str(), G_FILE_TEST_IS_DIR)) + __for_each_file(fullfilename, suff, order_list, disable_list, f); + else if (g_str_has_suffix(filename, suff.c_str()) && std::find(order_list.begin(), order_list.end(), fullfilename) == order_list.end()) { + const bool disable = std::find(disable_list.begin(), + disable_list.end(), + fullfilename) + != disable_list.end(); f(fullfilename, disable); - } - } - g_dir_close(dir); - } + } + } + g_dir_close(dir); + } } - -void for_each_file(const std::list& dirs_list, const std::string& suff, - const std::list& order_list, const std::list& disable_list, - const std::function& f) +void for_each_file(const std::list &dirs_list, const std::string &suff, + const std::list &order_list, const std::list &disable_list, + const std::function &f) { - for (const std::string & item : order_list) { - const bool disable = std::find(disable_list.begin(), disable_list.end(), item) != disable_list.end(); - f(item, disable); - } - for (const std::string& item : dirs_list) - __for_each_file(item, suff, order_list, disable_list, f); + for (const std::string &item : order_list) { + const bool disable = std::find(disable_list.begin(), disable_list.end(), item) != disable_list.end(); + f(item, disable); + } + for (const std::string &item : dirs_list) + __for_each_file(item, suff, order_list, disable_list, f); } // based on https://stackoverflow.com/questions/7724448/simple-json-string-escape-for-c/33799784#33799784 -std::string json_escape_string(const std::string &s) { - std::ostringstream o; - for (auto c = s.cbegin(); c != s.cend(); c++) { - switch (*c) { - case '"': o << "\\\""; break; - case '\\': o << "\\\\"; break; - case '\b': o << "\\b"; break; - case '\f': o << "\\f"; break; - case '\n': o << "\\n"; break; - case '\r': o << "\\r"; break; - case '\t': o << "\\t"; break; - default: - if ('\x00' <= *c && *c <= '\x1f') { - o << "\\u" - << std::hex << std::setw(4) << std::setfill('0') << (int)*c; - } else { - o << *c; - } +std::string json_escape_string(const std::string &s) +{ + std::ostringstream o; + for (auto c = s.cbegin(); c != s.cend(); c++) { + switch (*c) { + case '"': + o << "\\\""; + break; + case '\\': + o << "\\\\"; + break; + case '\b': + o << "\\b"; + break; + case '\f': + o << "\\f"; + break; + case '\n': + o << "\\n"; + break; + case '\r': + o << "\\r"; + break; + case '\t': + o << "\\t"; + break; + default: + if ('\x00' <= *c && *c <= '\x1f') { + o << "\\u" + << std::hex << std::setw(4) << std::setfill('0') << (int)*c; + } else { + o << *c; + } + } } - } - return o.str(); + return o.str(); } diff --git a/src/utils.hpp b/src/utils.hpp index 8b1419f..1081fd3 100644 --- a/src/utils.hpp +++ b/src/utils.hpp @@ -1,63 +1,78 @@ #pragma once -#include -#include #include -#include -#include +#include #include +#include +#include +#include template -class ResourceWrapper { +class ResourceWrapper +{ public: - ResourceWrapper(T *p = nullptr) : p_(p) {} - ~ResourceWrapper() { free_resource(); } - ResourceWrapper(const ResourceWrapper&) = delete; - ResourceWrapper& operator=(const ResourceWrapper&) = delete; - T *operator->() const { return p_; } - bool operator!() const { return p_ == nullptr; } - const T& operator[](size_t idx) const { + ResourceWrapper(T *p = nullptr) + : p_(p) + { + } + ~ResourceWrapper() { free_resource(); } + ResourceWrapper(const ResourceWrapper &) = delete; + ResourceWrapper &operator=(const ResourceWrapper &) = delete; + T *operator->() const { return p_; } + bool operator!() const { return p_ == nullptr; } + const T &operator[](size_t idx) const + { assert(p_ != nullptr); return p_[idx]; } - void reset(T *newp) { - if (p_ != newp) { - free_resource(); - p_ = newp; - } - } + void reset(T *newp) + { + if (p_ != newp) { + free_resource(); + p_ = newp; + } + } - friend inline bool operator==(const ResourceWrapper& lhs, std::nullptr_t) noexcept { + friend inline bool operator==(const ResourceWrapper &lhs, std::nullptr_t) noexcept + { return !lhs.p_; } - friend inline bool operator!=(const ResourceWrapper& lhs, std::nullptr_t) noexcept { + friend inline bool operator!=(const ResourceWrapper &lhs, std::nullptr_t) noexcept + { return !!lhs.p_; } - friend inline T *get_impl(const ResourceWrapper& rw) { - return rw.p_; - } + friend inline T *get_impl(const ResourceWrapper &rw) + { + return rw.p_; + } - friend inline T **get_addr(ResourceWrapper& rw) { - return &rw.p_; - } + friend inline T **get_addr(ResourceWrapper &rw) + { + return &rw.p_; + } private: - T *p_; + T *p_; - void free_resource() { if (p_) unref_res(p_); } + void free_resource() + { + if (p_) + unref_res(p_); + } }; -namespace glib { - typedef ResourceWrapper CharStr; - typedef ResourceWrapper Error; +namespace glib +{ +typedef ResourceWrapper CharStr; +typedef ResourceWrapper Error; } -extern std::string utf8_to_locale_ign_err(const std::string& utf8_str); +extern std::string utf8_to_locale_ign_err(const std::string &utf8_str); -extern void for_each_file(const std::list& dirs_list, const std::string& suff, - const std::list& order_list, const std::list& disable_list, - const std::function& f); +extern void for_each_file(const std::list &dirs_list, const std::string &suff, + const std::list &order_list, const std::list &disable_list, + const std::function &f); extern std::string json_escape_string(const std::string &str);