mirror of
https://github.com/Dushistov/sdcv.git
synced 2025-12-15 17:31:56 +00:00
refactoring: apply clang-format rules
This commit is contained in:
@@ -26,20 +26,19 @@
|
||||
|
||||
//#define HAVE_MMAP //it will defined in config.h. this can be done by configure.in with a AC_FUNC_MMAP.
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <unistd.h>
|
||||
#include <limits.h>
|
||||
#include <fcntl.h>
|
||||
#include <limits.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "dictziplib.hpp"
|
||||
|
||||
#define USE_CACHE 1
|
||||
@@ -57,11 +56,11 @@
|
||||
|
||||
/* For gzip-compatible header, as defined in RFC 1952 */
|
||||
|
||||
/* Magic for GZIP (rfc1952) */
|
||||
/* Magic for GZIP (rfc1952) */
|
||||
#define GZ_MAGIC1 0x1f /* First magic byte */
|
||||
#define GZ_MAGIC2 0x8b /* Second magic byte */
|
||||
|
||||
/* FLaGs (bitmapped), from rfc1952 */
|
||||
/* FLaGs (bitmapped), from rfc1952 */
|
||||
#define GZ_FTEXT 0x01 /* Set for ASCII text */
|
||||
#define GZ_FHCRC 0x02 /* Header CRC16 */
|
||||
#define GZ_FEXTRA 0x04 /* Optional field (random access index) */
|
||||
@@ -70,7 +69,7 @@
|
||||
#define GZ_MAX 2 /* Maximum compression */
|
||||
#define GZ_FAST 4 /* Fasted compression */
|
||||
|
||||
/* These are from rfc1952 */
|
||||
/* These are from rfc1952 */
|
||||
#define GZ_OS_FAT 0 /* FAT filesystem (MS-DOS, OS/2, NT/Win32) */
|
||||
#define GZ_OS_AMIGA 1 /* Amiga */
|
||||
#define GZ_OS_VMS 2 /* VMS (or OpenVMS) */
|
||||
@@ -112,7 +111,6 @@
|
||||
#define DICT_GZIP 2
|
||||
#define DICT_DZIP 3
|
||||
|
||||
|
||||
int DictData::read_header(const std::string &fname, int computeCRC)
|
||||
{
|
||||
FILE *str;
|
||||
@@ -123,7 +121,7 @@ int DictData::read_header(const std::string &fname, int computeCRC)
|
||||
char *pt;
|
||||
int c;
|
||||
struct stat sb;
|
||||
unsigned long crc = crc32( 0L, Z_NULL, 0 );
|
||||
unsigned long crc = crc32(0L, Z_NULL, 0);
|
||||
int count;
|
||||
unsigned long offset;
|
||||
|
||||
@@ -136,50 +134,50 @@ int DictData::read_header(const std::string &fname, int computeCRC)
|
||||
this->headerLength = GZ_XLEN - 1;
|
||||
this->type = DICT_UNKNOWN;
|
||||
|
||||
id1 = getc( str );
|
||||
id2 = getc( str );
|
||||
id1 = getc(str);
|
||||
id2 = getc(str);
|
||||
|
||||
if (id1 != GZ_MAGIC1 || id2 != GZ_MAGIC2) {
|
||||
this->type = DICT_TEXT;
|
||||
fstat( fileno( str ), &sb );
|
||||
fstat(fileno(str), &sb);
|
||||
this->compressedLength = this->length = sb.st_size;
|
||||
this->origFilename = fname;
|
||||
this->mtime = sb.st_mtime;
|
||||
if (computeCRC) {
|
||||
rewind( str );
|
||||
while (!feof( str )) {
|
||||
if ((count = fread( buffer, 1, BUFFERSIZE, str ))) {
|
||||
rewind(str);
|
||||
while (!feof(str)) {
|
||||
if ((count = fread(buffer, 1, BUFFERSIZE, str))) {
|
||||
crc = crc32(crc, (Bytef *)buffer, count);
|
||||
}
|
||||
}
|
||||
}
|
||||
this->crc = crc;
|
||||
fclose( str );
|
||||
fclose(str);
|
||||
return 0;
|
||||
}
|
||||
this->type = DICT_GZIP;
|
||||
|
||||
this->method = getc( str );
|
||||
this->flags = getc( str );
|
||||
this->mtime = getc( str ) << 0;
|
||||
this->mtime |= getc( str ) << 8;
|
||||
this->mtime |= getc( str ) << 16;
|
||||
this->mtime |= getc( str ) << 24;
|
||||
this->extraFlags = getc( str );
|
||||
this->os = getc( str );
|
||||
this->method = getc(str);
|
||||
this->flags = getc(str);
|
||||
this->mtime = getc(str) << 0;
|
||||
this->mtime |= getc(str) << 8;
|
||||
this->mtime |= getc(str) << 16;
|
||||
this->mtime |= getc(str) << 24;
|
||||
this->extraFlags = getc(str);
|
||||
this->os = getc(str);
|
||||
|
||||
if (this->flags & GZ_FEXTRA) {
|
||||
extraLength = getc( str ) << 0;
|
||||
extraLength |= getc( str ) << 8;
|
||||
extraLength = getc(str) << 0;
|
||||
extraLength |= getc(str) << 8;
|
||||
this->headerLength += extraLength + 2;
|
||||
si1 = getc( str );
|
||||
si2 = getc( str );
|
||||
si1 = getc(str);
|
||||
si2 = getc(str);
|
||||
|
||||
if (si1 == GZ_RND_S1 || si2 == GZ_RND_S2) {
|
||||
subLength = getc( str ) << 0;
|
||||
subLength |= getc( str ) << 8;
|
||||
this->version = getc( str ) << 0;
|
||||
this->version |= getc( str ) << 8;
|
||||
subLength = getc(str) << 0;
|
||||
subLength |= getc(str) << 8;
|
||||
this->version = getc(str) << 0;
|
||||
this->version |= getc(str) << 8;
|
||||
|
||||
if (this->version != 1) {
|
||||
//err_internal( __FUNCTION__,
|
||||
@@ -187,30 +185,30 @@ int DictData::read_header(const std::string &fname, int computeCRC)
|
||||
// this->version );
|
||||
}
|
||||
|
||||
this->chunkLength = getc( str ) << 0;
|
||||
this->chunkLength |= getc( str ) << 8;
|
||||
this->chunkCount = getc( str ) << 0;
|
||||
this->chunkCount |= getc( str ) << 8;
|
||||
this->chunkLength = getc(str) << 0;
|
||||
this->chunkLength |= getc(str) << 8;
|
||||
this->chunkCount = getc(str) << 0;
|
||||
this->chunkCount |= getc(str) << 8;
|
||||
|
||||
if (this->chunkCount <= 0) {
|
||||
fclose( str );
|
||||
fclose(str);
|
||||
return 5;
|
||||
}
|
||||
this->chunks = (int *)malloc(sizeof( this->chunks[0] )
|
||||
* this->chunkCount );
|
||||
this->chunks = (int *)malloc(sizeof(this->chunks[0])
|
||||
* this->chunkCount);
|
||||
for (i = 0; i < this->chunkCount; i++) {
|
||||
this->chunks[i] = getc( str ) << 0;
|
||||
this->chunks[i] |= getc( str ) << 8;
|
||||
this->chunks[i] = getc(str) << 0;
|
||||
this->chunks[i] |= getc(str) << 8;
|
||||
}
|
||||
this->type = DICT_DZIP;
|
||||
} else {
|
||||
fseek( str, this->headerLength, SEEK_SET );
|
||||
fseek(str, this->headerLength, SEEK_SET);
|
||||
}
|
||||
}
|
||||
|
||||
if (this->flags & GZ_FNAME) { /* FIXME! Add checking against header len */
|
||||
pt = buffer;
|
||||
while ((c = getc( str )) && c != EOF)
|
||||
while ((c = getc(str)) && c != EOF)
|
||||
*pt++ = c;
|
||||
*pt = '\0';
|
||||
|
||||
@@ -222,41 +220,41 @@ int DictData::read_header(const std::string &fname, int computeCRC)
|
||||
|
||||
if (this->flags & GZ_COMMENT) { /* FIXME! Add checking for header len */
|
||||
pt = buffer;
|
||||
while ((c = getc( str )) && c != EOF)
|
||||
while ((c = getc(str)) && c != EOF)
|
||||
*pt++ = c;
|
||||
*pt = '\0';
|
||||
comment = buffer;
|
||||
headerLength += comment.length()+1;
|
||||
headerLength += comment.length() + 1;
|
||||
} else {
|
||||
comment = "";
|
||||
}
|
||||
|
||||
if (this->flags & GZ_FHCRC) {
|
||||
getc( str );
|
||||
getc( str );
|
||||
getc(str);
|
||||
getc(str);
|
||||
this->headerLength += 2;
|
||||
}
|
||||
|
||||
if (ftell( str ) != this->headerLength + 1) {
|
||||
if (ftell(str) != this->headerLength + 1) {
|
||||
//err_internal( __FUNCTION__,
|
||||
// "File position (%lu) != header length + 1 (%d)\n",
|
||||
// ftell( str ), this->headerLength + 1 );
|
||||
}
|
||||
|
||||
fseek( str, -8, SEEK_END );
|
||||
this->crc = getc( str ) << 0;
|
||||
this->crc |= getc( str ) << 8;
|
||||
this->crc |= getc( str ) << 16;
|
||||
this->crc |= getc( str ) << 24;
|
||||
this->length = getc( str ) << 0;
|
||||
this->length |= getc( str ) << 8;
|
||||
this->length |= getc( str ) << 16;
|
||||
this->length |= getc( str ) << 24;
|
||||
this->compressedLength = ftell( str );
|
||||
fseek(str, -8, SEEK_END);
|
||||
this->crc = getc(str) << 0;
|
||||
this->crc |= getc(str) << 8;
|
||||
this->crc |= getc(str) << 16;
|
||||
this->crc |= getc(str) << 24;
|
||||
this->length = getc(str) << 0;
|
||||
this->length |= getc(str) << 8;
|
||||
this->length |= getc(str) << 16;
|
||||
this->length |= getc(str) << 24;
|
||||
this->compressedLength = ftell(str);
|
||||
|
||||
/* Compute offsets */
|
||||
this->offsets = (unsigned long *)malloc( sizeof( this->offsets[0] )
|
||||
* this->chunkCount );
|
||||
this->offsets = (unsigned long *)malloc(sizeof(this->offsets[0])
|
||||
* this->chunkCount);
|
||||
for (offset = this->headerLength + 1, i = 0;
|
||||
i < this->chunkCount;
|
||||
i++) {
|
||||
@@ -264,11 +262,11 @@ int DictData::read_header(const std::string &fname, int computeCRC)
|
||||
offset += this->chunks[i];
|
||||
}
|
||||
|
||||
fclose( str );
|
||||
fclose(str);
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool DictData::open(const std::string& fname, int computeCRC)
|
||||
bool DictData::open(const std::string &fname, int computeCRC)
|
||||
{
|
||||
struct stat sb;
|
||||
int fd;
|
||||
@@ -287,7 +285,7 @@ bool DictData::open(const std::string& fname, int computeCRC)
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((fd = ::open(fname.c_str(), O_RDONLY )) < 0) {
|
||||
if ((fd = ::open(fname.c_str(), O_RDONLY)) < 0) {
|
||||
//err_fatal_errno( __FUNCTION__,
|
||||
// "Cannot open data file \"%s\"\n", fname );
|
||||
return false;
|
||||
@@ -303,7 +301,7 @@ bool DictData::open(const std::string& fname, int computeCRC)
|
||||
if (!mapfile.open(fname.c_str(), size))
|
||||
return false;
|
||||
|
||||
this->start=mapfile.begin();
|
||||
this->start = mapfile.begin();
|
||||
this->end = this->start + this->size;
|
||||
|
||||
for (size_t j = 0; j < DICT_CACHE_SIZE; j++) {
|
||||
@@ -324,16 +322,16 @@ void DictData::close()
|
||||
free(this->offsets);
|
||||
|
||||
if (this->initialized) {
|
||||
if (inflateEnd( &this->zStream )) {
|
||||
if (inflateEnd(&this->zStream)) {
|
||||
//err_internal( __FUNCTION__,
|
||||
// "Cannot shut down inflation engine: %s\n",
|
||||
// this->zStream.msg );
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < DICT_CACHE_SIZE; ++i){
|
||||
if (this -> cache [i].inBuffer)
|
||||
free (this -> cache [i].inBuffer);
|
||||
for (size_t i = 0; i < DICT_CACHE_SIZE; ++i) {
|
||||
if (this->cache[i].inBuffer)
|
||||
free(this->cache[i].inBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -358,7 +356,6 @@ void DictData::read(char *buffer, unsigned long start, unsigned long size)
|
||||
// ("dict_data_read( %p, %lu, %lu )\n",
|
||||
//h, start, size ));
|
||||
|
||||
|
||||
switch (this->type) {
|
||||
case DICT_GZIP:
|
||||
//err_fatal( __FUNCTION__,
|
||||
@@ -367,7 +364,7 @@ void DictData::read(char *buffer, unsigned long start, unsigned long size)
|
||||
// " or dzip format (for space savings).\n" );
|
||||
break;
|
||||
case DICT_TEXT:
|
||||
memcpy( buffer, this->start + start, size );
|
||||
memcpy(buffer, this->start + start, size);
|
||||
//buffer[size] = '\0';
|
||||
break;
|
||||
case DICT_DZIP:
|
||||
@@ -380,7 +377,7 @@ void DictData::read(char *buffer, unsigned long start, unsigned long size)
|
||||
this->zStream.avail_in = 0;
|
||||
this->zStream.next_out = nullptr;
|
||||
this->zStream.avail_out = 0;
|
||||
if (inflateInit2( &this->zStream, -15 ) != Z_OK) {
|
||||
if (inflateInit2(&this->zStream, -15) != Z_OK) {
|
||||
//err_internal( __FUNCTION__,
|
||||
// "Cannot initialize inflation engine: %s\n",
|
||||
//this->zStream.msg );
|
||||
@@ -422,21 +419,21 @@ void DictData::read(char *buffer, unsigned long start, unsigned long size)
|
||||
} else {
|
||||
this->cache[target].chunk = i;
|
||||
if (!this->cache[target].inBuffer)
|
||||
this->cache[target].inBuffer = (char *)malloc( IN_BUFFER_SIZE );
|
||||
this->cache[target].inBuffer = (char *)malloc(IN_BUFFER_SIZE);
|
||||
inBuffer = this->cache[target].inBuffer;
|
||||
|
||||
if (this->chunks[i] >= OUT_BUFFER_SIZE ) {
|
||||
if (this->chunks[i] >= OUT_BUFFER_SIZE) {
|
||||
//err_internal( __FUNCTION__,
|
||||
// "this->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n",
|
||||
// i, this->chunks[i], OUT_BUFFER_SIZE );
|
||||
}
|
||||
memcpy( outBuffer, this->start + this->offsets[i], this->chunks[i] );
|
||||
memcpy(outBuffer, this->start + this->offsets[i], this->chunks[i]);
|
||||
|
||||
this->zStream.next_in = (Bytef *)outBuffer;
|
||||
this->zStream.avail_in = this->chunks[i];
|
||||
this->zStream.next_out = (Bytef *)inBuffer;
|
||||
this->zStream.avail_out = IN_BUFFER_SIZE;
|
||||
if (inflate( &this->zStream, Z_PARTIAL_FLUSH ) != Z_OK) {
|
||||
if (inflate(&this->zStream, Z_PARTIAL_FLUSH) != Z_OK) {
|
||||
//err_fatal( __FUNCTION__, "inflate: %s\n", this->zStream.msg );
|
||||
}
|
||||
if (this->zStream.avail_in) {
|
||||
@@ -452,24 +449,24 @@ void DictData::read(char *buffer, unsigned long start, unsigned long size)
|
||||
|
||||
if (i == firstChunk) {
|
||||
if (i == lastChunk) {
|
||||
memcpy( pt, inBuffer + firstOffset, lastOffset-firstOffset);
|
||||
memcpy(pt, inBuffer + firstOffset, lastOffset - firstOffset);
|
||||
pt += lastOffset - firstOffset;
|
||||
} else {
|
||||
if (count != this->chunkLength ) {
|
||||
if (count != this->chunkLength) {
|
||||
//err_internal( __FUNCTION__,
|
||||
// "Length = %d instead of %d\n",
|
||||
//count, this->chunkLength );
|
||||
}
|
||||
memcpy( pt, inBuffer + firstOffset,
|
||||
this->chunkLength - firstOffset );
|
||||
memcpy(pt, inBuffer + firstOffset,
|
||||
this->chunkLength - firstOffset);
|
||||
pt += this->chunkLength - firstOffset;
|
||||
}
|
||||
} else if (i == lastChunk) {
|
||||
memcpy( pt, inBuffer, lastOffset );
|
||||
memcpy(pt, inBuffer, lastOffset);
|
||||
pt += lastOffset;
|
||||
} else {
|
||||
assert( count == this->chunkLength );
|
||||
memcpy( pt, inBuffer, this->chunkLength );
|
||||
assert(count == this->chunkLength);
|
||||
memcpy(pt, inBuffer, this->chunkLength);
|
||||
pt += this->chunkLength;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,15 +13,17 @@ struct DictCache {
|
||||
int count;
|
||||
};
|
||||
|
||||
class DictData {
|
||||
class DictData
|
||||
{
|
||||
public:
|
||||
static const size_t DICT_CACHE_SIZE = 5;
|
||||
|
||||
DictData() {}
|
||||
~DictData() { close(); }
|
||||
bool open(const std::string& filename, int computeCRC);
|
||||
bool open(const std::string &filename, int computeCRC);
|
||||
void close();
|
||||
void read(char *buffer, unsigned long start, unsigned long size);
|
||||
|
||||
private:
|
||||
const char *start; /* start of mmap'd area */
|
||||
const char *end; /* end of mmap'd area */
|
||||
@@ -52,4 +54,3 @@ private:
|
||||
|
||||
int read_header(const std::string &filename, int computeCRC);
|
||||
};
|
||||
|
||||
|
||||
@@ -33,7 +33,6 @@ The Levenshtein distance algorithm has been used in:
|
||||
* Plagiarism detection
|
||||
*/
|
||||
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
@@ -56,43 +55,39 @@ Enhanced Dynamic Programming ASM Algorithm"
|
||||
static inline int minimum(const int a, const int b, const int c)
|
||||
{
|
||||
int min = a;
|
||||
if ( b < min )
|
||||
if (b < min)
|
||||
min = b;
|
||||
if ( c < min )
|
||||
if (c < min)
|
||||
min = c;
|
||||
return min;
|
||||
}
|
||||
|
||||
int EditDistance::CalEditDistance(const gunichar *s,const gunichar *t,const int limit)
|
||||
int EditDistance::CalEditDistance(const gunichar *s, const gunichar *t, const int limit)
|
||||
/*Compute levenshtein distance between s and t, this is using QUICK algorithm*/
|
||||
{
|
||||
int n=0,m=0,iLenDif,k,i,j,cost;
|
||||
int n = 0, m = 0, iLenDif, k, i, j, cost;
|
||||
// Remove leftmost matching portion of strings
|
||||
while ( *s && (*s==*t) )
|
||||
{
|
||||
while (*s && (*s == *t)) {
|
||||
s++;
|
||||
t++;
|
||||
}
|
||||
|
||||
while (s[n])
|
||||
{
|
||||
while (s[n]) {
|
||||
n++;
|
||||
}
|
||||
while (t[m])
|
||||
{
|
||||
while (t[m]) {
|
||||
m++;
|
||||
}
|
||||
|
||||
// Remove rightmost matching portion of strings by decrement n and m.
|
||||
while ( n && m && (*(s+n-1)==*(t+m-1)) )
|
||||
{
|
||||
n--;m--;
|
||||
while (n && m && (*(s + n - 1) == *(t + m - 1))) {
|
||||
n--;
|
||||
m--;
|
||||
}
|
||||
if ( m==0 || n==0 || d==nullptr )
|
||||
return (m+n);
|
||||
if ( m < n )
|
||||
{
|
||||
const gunichar * temp = s;
|
||||
if (m == 0 || n == 0 || d == nullptr)
|
||||
return (m + n);
|
||||
if (m < n) {
|
||||
const gunichar *temp = s;
|
||||
int itemp = n;
|
||||
s = t;
|
||||
t = temp;
|
||||
@@ -100,55 +95,51 @@ int EditDistance::CalEditDistance(const gunichar *s,const gunichar *t,const int
|
||||
m = itemp;
|
||||
}
|
||||
iLenDif = m - n;
|
||||
if ( iLenDif >= limit )
|
||||
if (iLenDif >= limit)
|
||||
return iLenDif;
|
||||
// step 1
|
||||
n++;m++;
|
||||
// d=(int*)malloc(sizeof(int)*m*n);
|
||||
if ( m*n > currentelements )
|
||||
{
|
||||
currentelements = m*n*2; // double the request
|
||||
d = static_cast<int*>(realloc(d, sizeof(int) * currentelements));
|
||||
if ( nullptr == d )
|
||||
return (m+n);
|
||||
n++;
|
||||
m++;
|
||||
// d=(int*)malloc(sizeof(int)*m*n);
|
||||
if (m * n > currentelements) {
|
||||
currentelements = m * n * 2; // double the request
|
||||
d = static_cast<int *>(realloc(d, sizeof(int) * currentelements));
|
||||
if (nullptr == d)
|
||||
return (m + n);
|
||||
}
|
||||
// step 2, init matrix
|
||||
for (k=0;k<n;k++)
|
||||
for (k = 0; k < n; k++)
|
||||
d[k] = k;
|
||||
for (k=1;k<m;k++)
|
||||
d[k*n] = k;
|
||||
for (k = 1; k < m; k++)
|
||||
d[k * n] = k;
|
||||
// step 3
|
||||
for (i=1;i<n;i++)
|
||||
{
|
||||
for (i = 1; i < n; i++) {
|
||||
// first calculate column, d(i,j)
|
||||
for ( j=1;j<iLenDif+i;j++ )
|
||||
{
|
||||
cost = s[i-1]==t[j-1]?0:1;
|
||||
d[j*n+i] = minimum(d[(j-1)*n+i]+1,d[j*n+i-1]+1,d[(j-1)*n+i-1]+cost);
|
||||
for (j = 1; j < iLenDif + i; j++) {
|
||||
cost = s[i - 1] == t[j - 1] ? 0 : 1;
|
||||
d[j * n + i] = minimum(d[(j - 1) * n + i] + 1, d[j * n + i - 1] + 1, d[(j - 1) * n + i - 1] + cost);
|
||||
#ifdef COVER_TRANSPOSITION
|
||||
if ( i>=2 && j>=2 && (d[j*n+i]-d[(j-2)*n+i-2]==2)
|
||||
&& (s[i-2]==t[j-1]) && (s[i-1]==t[j-2]) )
|
||||
d[j*n+i]--;
|
||||
if (i >= 2 && j >= 2 && (d[j * n + i] - d[(j - 2) * n + i - 2] == 2)
|
||||
&& (s[i - 2] == t[j - 1]) && (s[i - 1] == t[j - 2]))
|
||||
d[j * n + i]--;
|
||||
#endif
|
||||
}
|
||||
// second calculate row, d(k,j)
|
||||
// now j==iLenDif+i;
|
||||
for ( k=1;k<=i;k++ )
|
||||
{
|
||||
cost = s[k-1]==t[j-1]?0:1;
|
||||
d[j*n+k] = minimum(d[(j-1)*n+k]+1,d[j*n+k-1]+1,d[(j-1)*n+k-1]+cost);
|
||||
for (k = 1; k <= i; k++) {
|
||||
cost = s[k - 1] == t[j - 1] ? 0 : 1;
|
||||
d[j * n + k] = minimum(d[(j - 1) * n + k] + 1, d[j * n + k - 1] + 1, d[(j - 1) * n + k - 1] + cost);
|
||||
#ifdef COVER_TRANSPOSITION
|
||||
if ( k>=2 && j>=2 && (d[j*n+k]-d[(j-2)*n+k-2]==2)
|
||||
&& (s[k-2]==t[j-1]) && (s[k-1]==t[j-2]) )
|
||||
d[j*n+k]--;
|
||||
if (k >= 2 && j >= 2 && (d[j * n + k] - d[(j - 2) * n + k - 2] == 2)
|
||||
&& (s[k - 2] == t[j - 1]) && (s[k - 1] == t[j - 2]))
|
||||
d[j * n + k]--;
|
||||
#endif
|
||||
}
|
||||
// test if d(i,j) limit gets equal or exceed
|
||||
if ( d[j*n+i] >= limit )
|
||||
{
|
||||
return d[j*n+i];
|
||||
if (d[j * n + i] >= limit) {
|
||||
return d[j * n + i];
|
||||
}
|
||||
}
|
||||
// d(n-1,m-1)
|
||||
return d[n*m-1];
|
||||
return d[n * m - 1];
|
||||
}
|
||||
|
||||
@@ -3,21 +3,24 @@
|
||||
#include <cstdlib>
|
||||
#include <glib.h>
|
||||
|
||||
class EditDistance {
|
||||
class EditDistance
|
||||
{
|
||||
public:
|
||||
EditDistance() {
|
||||
EditDistance()
|
||||
{
|
||||
currentelements = 2500; // It's enough for most conditions :-)
|
||||
d = static_cast<int *>(malloc(sizeof(int)*currentelements));
|
||||
d = static_cast<int *>(malloc(sizeof(int) * currentelements));
|
||||
}
|
||||
~EditDistance() {
|
||||
~EditDistance()
|
||||
{
|
||||
if (d != nullptr)
|
||||
free(d);
|
||||
}
|
||||
EditDistance(const EditDistance&) = delete;
|
||||
EditDistance& operator=(const EditDistance&) = delete;
|
||||
int CalEditDistance( const gunichar *s, const gunichar *t, const int limit );
|
||||
EditDistance(const EditDistance &) = delete;
|
||||
EditDistance &operator=(const EditDistance &) = delete;
|
||||
int CalEditDistance(const gunichar *s, const gunichar *t, const int limit);
|
||||
|
||||
private:
|
||||
int *d;
|
||||
int currentelements;
|
||||
};
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <cstring>
|
||||
@@ -75,11 +75,11 @@ static std::string xdxf2text(const char *p, bool colorize_output)
|
||||
if (!next)
|
||||
continue;
|
||||
|
||||
const std::string name(p+1, next-p-1);
|
||||
const std::string name(p + 1, next - p - 1);
|
||||
|
||||
if (name == "abr")
|
||||
res += colorize_output ? ABR_VISFMT : "";
|
||||
else if (name=="/abr")
|
||||
else if (name == "/abr")
|
||||
res += colorize_output ? ESC_END : "";
|
||||
else if (name == "k") {
|
||||
const char *begin = next;
|
||||
@@ -93,7 +93,7 @@ static std::string xdxf2text(const char *p, bool colorize_output)
|
||||
res += colorize_output ? ESC_END : "";
|
||||
} else if (name == "b")
|
||||
res += colorize_output ? ESC_BOLD : "";
|
||||
else if (name=="/b")
|
||||
else if (name == "/b")
|
||||
res += colorize_output ? ESC_END : "";
|
||||
else if (name == "i")
|
||||
res += colorize_output ? ESC_ITALIC : "";
|
||||
@@ -140,7 +140,7 @@ static std::string parse_data(const gchar *data, bool colorize_output)
|
||||
const gchar *p = data;
|
||||
data_size = get_uint32(p);
|
||||
p += sizeof(guint32);
|
||||
while (guint32(p - data)<data_size) {
|
||||
while (guint32(p - data) < data_size) {
|
||||
switch (*p++) {
|
||||
case 'h': // HTML data
|
||||
case 'w': // WikiMedia markup data
|
||||
@@ -148,7 +148,7 @@ static std::string parse_data(const gchar *data, bool colorize_output)
|
||||
case 'l': // not utf-8, some other locale encoding, discouraged, need more work...
|
||||
sec_size = strlen(p);
|
||||
if (sec_size) {
|
||||
res+="\n";
|
||||
res += "\n";
|
||||
m_str = g_strndup(p, sec_size);
|
||||
res += m_str;
|
||||
g_free(m_str);
|
||||
@@ -159,7 +159,7 @@ static std::string parse_data(const gchar *data, bool colorize_output)
|
||||
case 'x': // xdxf
|
||||
sec_size = strlen(p);
|
||||
if (sec_size) {
|
||||
res+="\n";
|
||||
res += "\n";
|
||||
m_str = g_strndup(p, sec_size);
|
||||
res += xdxf2text(m_str, colorize_output);
|
||||
g_free(m_str);
|
||||
@@ -168,7 +168,7 @@ static std::string parse_data(const gchar *data, bool colorize_output)
|
||||
break;
|
||||
case 't': // english phonetic string
|
||||
sec_size = strlen(p);
|
||||
if(sec_size){
|
||||
if (sec_size) {
|
||||
res += "\n";
|
||||
if (colorize_output)
|
||||
res += TRANSCRIPTION_VISFMT;
|
||||
@@ -194,11 +194,10 @@ static std::string parse_data(const gchar *data, bool colorize_output)
|
||||
p += sec_size;
|
||||
}
|
||||
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void Library::SimpleLookup(const std::string &str, TSearchResultList& res_list)
|
||||
void Library::SimpleLookup(const std::string &str, TSearchResultList &res_list)
|
||||
{
|
||||
glong ind;
|
||||
res_list.reserve(ndicts());
|
||||
@@ -210,9 +209,9 @@ void Library::SimpleLookup(const std::string &str, TSearchResultList& res_list)
|
||||
parse_data(poGetWordData(ind, idict), colorize_output_)));
|
||||
}
|
||||
|
||||
void Library::LookupWithFuzzy(const std::string &str, TSearchResultList& res_list)
|
||||
void Library::LookupWithFuzzy(const std::string &str, TSearchResultList &res_list)
|
||||
{
|
||||
static const int MAXFUZZY=10;
|
||||
static const int MAXFUZZY = 10;
|
||||
|
||||
gchar *fuzzy_res[MAXFUZZY];
|
||||
if (!Libs::LookupWithFuzzy(str.c_str(), fuzzy_res, MAXFUZZY))
|
||||
@@ -224,9 +223,9 @@ void Library::LookupWithFuzzy(const std::string &str, TSearchResultList& res_lis
|
||||
}
|
||||
}
|
||||
|
||||
void Library::LookupWithRule(const std::string &str, TSearchResultList& res_list)
|
||||
void Library::LookupWithRule(const std::string &str, TSearchResultList &res_list)
|
||||
{
|
||||
std::vector<gchar *> match_res((MAX_MATCH_ITEM_PER_LIB) * ndicts());
|
||||
std::vector<gchar *> match_res((MAX_MATCH_ITEM_PER_LIB)*ndicts());
|
||||
|
||||
const gint nfound = Libs::LookupWithRule(str.c_str(), &match_res[0]);
|
||||
if (nfound == 0)
|
||||
@@ -238,9 +237,9 @@ void Library::LookupWithRule(const std::string &str, TSearchResultList& res_list
|
||||
}
|
||||
}
|
||||
|
||||
void Library::LookupData(const std::string &str, TSearchResultList& res_list)
|
||||
void Library::LookupData(const std::string &str, TSearchResultList &res_list)
|
||||
{
|
||||
std::vector<std::vector<gchar *> > drl(ndicts());
|
||||
std::vector<std::vector<gchar *>> drl(ndicts());
|
||||
if (!Libs::LookupData(str.c_str(), &drl[0]))
|
||||
return;
|
||||
for (int idict = 0; idict < ndicts(); ++idict)
|
||||
@@ -250,22 +249,22 @@ void Library::LookupData(const std::string &str, TSearchResultList& res_list)
|
||||
}
|
||||
}
|
||||
|
||||
void Library::print_search_result(FILE *out, const TSearchResult & res, bool &first_result)
|
||||
void Library::print_search_result(FILE *out, const TSearchResult &res, bool &first_result)
|
||||
{
|
||||
std::string loc_bookname, loc_def, loc_exp;
|
||||
|
||||
if (!utf8_output_){
|
||||
if (!utf8_output_) {
|
||||
loc_bookname = utf8_to_locale_ign_err(res.bookname);
|
||||
loc_def = utf8_to_locale_ign_err(res.def);
|
||||
loc_exp = utf8_to_locale_ign_err(res.exp);
|
||||
}
|
||||
if(json_) {
|
||||
if(!first_result) {
|
||||
if (json_) {
|
||||
if (!first_result) {
|
||||
fputs(",", out);
|
||||
} else {
|
||||
first_result=false;
|
||||
first_result = false;
|
||||
}
|
||||
fprintf(out,"{\"dict\": \"%s\",\"word\":\"%s\",\"definition\":\"%s\"}",
|
||||
fprintf(out, "{\"dict\": \"%s\",\"word\":\"%s\",\"definition\":\"%s\"}",
|
||||
json_escape_string(res.bookname).c_str(),
|
||||
json_escape_string(res.def).c_str(),
|
||||
json_escape_string(res.exp).c_str());
|
||||
@@ -285,10 +284,13 @@ void Library::print_search_result(FILE *out, const TSearchResult & res, bool &fi
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
class sdcv_pager final {
|
||||
public:
|
||||
explicit sdcv_pager(bool ignore_env = false) {
|
||||
namespace
|
||||
{
|
||||
class sdcv_pager final
|
||||
{
|
||||
public:
|
||||
explicit sdcv_pager(bool ignore_env = false)
|
||||
{
|
||||
output = stdout;
|
||||
if (ignore_env) {
|
||||
return;
|
||||
@@ -299,17 +301,19 @@ namespace {
|
||||
output = stdout;
|
||||
}
|
||||
}
|
||||
sdcv_pager(const sdcv_pager&) = delete;
|
||||
sdcv_pager& operator=(const sdcv_pager&) = delete;
|
||||
~sdcv_pager() {
|
||||
sdcv_pager(const sdcv_pager &) = delete;
|
||||
sdcv_pager &operator=(const sdcv_pager &) = delete;
|
||||
~sdcv_pager()
|
||||
{
|
||||
if (output != stdout) {
|
||||
pclose(output);
|
||||
}
|
||||
}
|
||||
FILE *get_stream() { return output; }
|
||||
private:
|
||||
|
||||
private:
|
||||
FILE *output;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
bool Library::process_phrase(const char *loc_str, IReadLine &io, bool force)
|
||||
@@ -371,10 +375,10 @@ bool Library::process_phrase(const char *loc_str, IReadLine &io, bool force)
|
||||
one or zero results per dictionary show all
|
||||
*/
|
||||
bool show_all_results = true;
|
||||
typedef std::map< std::string, int, std::less<std::string> > DictResMap;
|
||||
typedef std::map<std::string, int, std::less<std::string>> DictResMap;
|
||||
if (!force) {
|
||||
DictResMap res_per_dict;
|
||||
for (const TSearchResult& search_res : res_list) {
|
||||
for (const TSearchResult &search_res : res_list) {
|
||||
auto r = res_per_dict.equal_range(search_res.bookname);
|
||||
DictResMap tmp(r.first, r.second);
|
||||
if (tmp.empty()) //there are no yet such bookname in map
|
||||
@@ -387,7 +391,7 @@ bool Library::process_phrase(const char *loc_str, IReadLine &io, bool force)
|
||||
}
|
||||
}
|
||||
}
|
||||
}//if (!force)
|
||||
} //if (!force)
|
||||
|
||||
if (!show_all_results && !force) {
|
||||
if (!json_) {
|
||||
@@ -416,11 +420,11 @@ bool Library::process_phrase(const char *loc_str, IReadLine &io, bool force)
|
||||
io.add_to_history(res_list[choise].def.c_str());
|
||||
print_search_result(pager.get_stream(), res_list[choise], first_result);
|
||||
break;
|
||||
} else if (choise == -1){
|
||||
} else if (choise == -1) {
|
||||
break;
|
||||
} else
|
||||
printf(_("Invalid choice.\nIt must be from 0 to %zu or -1.\n"),
|
||||
res_list.size()-1);
|
||||
res_list.size() - 1);
|
||||
}
|
||||
} else {
|
||||
sdcv_pager pager(force || json_);
|
||||
@@ -428,7 +432,7 @@ bool Library::process_phrase(const char *loc_str, IReadLine &io, bool force)
|
||||
fprintf(pager.get_stream(), _("Found %zu items, similar to %s.\n"),
|
||||
res_list.size(), utf8_output_ ? get_impl(str) : utf8_to_locale_ign_err(get_impl(str)).c_str());
|
||||
}
|
||||
for (const TSearchResult& search_res : res_list) {
|
||||
for (const TSearchResult &search_res : res_list) {
|
||||
print_search_result(pager.get_stream(), search_res, first_result);
|
||||
}
|
||||
}
|
||||
@@ -437,7 +441,7 @@ bool Library::process_phrase(const char *loc_str, IReadLine &io, bool force)
|
||||
std::string loc_str;
|
||||
if (!utf8_output_)
|
||||
loc_str = utf8_to_locale_ign_err(get_impl(str));
|
||||
if(!json_)
|
||||
if (!json_)
|
||||
printf(_("Nothing similar to %s, sorry :(\n"), utf8_output_ ? get_impl(str) : loc_str.c_str());
|
||||
}
|
||||
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "stardict_lib.hpp"
|
||||
#include "readline.hpp"
|
||||
#include "stardict_lib.hpp"
|
||||
|
||||
//this structure is wrapper and it need for unification
|
||||
//results of search whith return Dicts class
|
||||
@@ -13,8 +13,10 @@ struct TSearchResult {
|
||||
std::string def;
|
||||
std::string exp;
|
||||
|
||||
TSearchResult(const std::string& bookname_, const std::string& def_, const std::string& exp_)
|
||||
: bookname(bookname_), def(def_), exp(exp_)
|
||||
TSearchResult(const std::string &bookname_, const std::string &def_, const std::string &exp_)
|
||||
: bookname(bookname_)
|
||||
, def(def_)
|
||||
, exp(exp_)
|
||||
{
|
||||
}
|
||||
};
|
||||
@@ -23,25 +25,30 @@ typedef std::vector<TSearchResult> TSearchResultList;
|
||||
|
||||
//this class is wrapper around Dicts class for easy use
|
||||
//of it
|
||||
class Library : public Libs {
|
||||
class Library : public Libs
|
||||
{
|
||||
public:
|
||||
Library(bool uinput, bool uoutput, bool colorize_output, bool use_json, bool no_fuzzy)
|
||||
: utf8_input_(uinput), utf8_output_(uoutput), colorize_output_(colorize_output), json_(use_json) {
|
||||
: utf8_input_(uinput)
|
||||
, utf8_output_(uoutput)
|
||||
, colorize_output_(colorize_output)
|
||||
, json_(use_json)
|
||||
{
|
||||
setVerbose(!use_json);
|
||||
setFuzzy(!no_fuzzy);
|
||||
}
|
||||
|
||||
bool process_phrase(const char *loc_str, IReadLine &io, bool force = false);
|
||||
|
||||
private:
|
||||
bool utf8_input_;
|
||||
bool utf8_output_;
|
||||
bool colorize_output_;
|
||||
bool json_;
|
||||
|
||||
void SimpleLookup(const std::string &str, TSearchResultList& res_list);
|
||||
void LookupWithFuzzy(const std::string &str, TSearchResultList& res_list);
|
||||
void LookupWithRule(const std::string &str, TSearchResultList& res_lsit);
|
||||
void LookupData(const std::string &str, TSearchResultList& res_list);
|
||||
void print_search_result(FILE *out, const TSearchResult & res, bool &first_result);
|
||||
void SimpleLookup(const std::string &str, TSearchResultList &res_list);
|
||||
void LookupWithFuzzy(const std::string &str, TSearchResultList &res_list);
|
||||
void LookupWithRule(const std::string &str, TSearchResultList &res_lsit);
|
||||
void LookupData(const std::string &str, TSearchResultList &res_list);
|
||||
void print_search_result(FILE *out, const TSearchResult &res, bool &first_result);
|
||||
};
|
||||
|
||||
|
||||
@@ -1,27 +1,29 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_MMAP
|
||||
# include <sys/types.h>
|
||||
# include <fcntl.h>
|
||||
# include <sys/mman.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#ifdef _WIN32
|
||||
# include <windows.h>
|
||||
#include <windows.h>
|
||||
#endif
|
||||
#include <glib.h>
|
||||
|
||||
class MapFile {
|
||||
class MapFile
|
||||
{
|
||||
public:
|
||||
MapFile() {}
|
||||
~MapFile();
|
||||
MapFile(const MapFile&) = delete;
|
||||
MapFile& operator=(const MapFile&) = delete;
|
||||
MapFile(const MapFile &) = delete;
|
||||
MapFile &operator=(const MapFile &) = delete;
|
||||
bool open(const char *file_name, unsigned long file_size);
|
||||
gchar *begin() { return data; }
|
||||
|
||||
private:
|
||||
char *data = nullptr;
|
||||
unsigned long size = 0ul;
|
||||
@@ -35,19 +37,19 @@ private:
|
||||
|
||||
inline bool MapFile::open(const char *file_name, unsigned long file_size)
|
||||
{
|
||||
size=file_size;
|
||||
size = file_size;
|
||||
#ifdef HAVE_MMAP
|
||||
if ((mmap_fd = ::open(file_name, O_RDONLY)) < 0) {
|
||||
//g_print("Open file %s failed!\n",fullfilename);
|
||||
return false;
|
||||
}
|
||||
data = (gchar *)mmap( nullptr, file_size, PROT_READ, MAP_SHARED, mmap_fd, 0);
|
||||
data = (gchar *)mmap(nullptr, file_size, PROT_READ, MAP_SHARED, mmap_fd, 0);
|
||||
if ((void *)data == (void *)(-1)) {
|
||||
//g_print("mmap file %s failed!\n",idxfilename);
|
||||
data=nullptr;
|
||||
data = nullptr;
|
||||
return false;
|
||||
}
|
||||
#elif defined( _WIN32)
|
||||
#elif defined(_WIN32)
|
||||
hFile = CreateFile(file_name, GENERIC_READ, 0, nullptr, OPEN_ALWAYS,
|
||||
FILE_ATTRIBUTE_NORMAL, 0);
|
||||
hFileMap = CreateFileMapping(hFile, nullptr, PAGE_READONLY, 0,
|
||||
@@ -73,13 +75,12 @@ inline MapFile::~MapFile()
|
||||
munmap(data, size);
|
||||
close(mmap_fd);
|
||||
#else
|
||||
# ifdef _WIN32
|
||||
#ifdef _WIN32
|
||||
UnmapViewOfFile(data);
|
||||
CloseHandle(hFileMap);
|
||||
CloseHandle(hFile);
|
||||
# else
|
||||
#else
|
||||
g_free(data);
|
||||
# endif
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -19,14 +19,14 @@
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#ifdef WITH_READLINE
|
||||
# include <readline/readline.h>
|
||||
# include <readline/history.h>
|
||||
#include <readline/history.h>
|
||||
#include <readline/readline.h>
|
||||
#endif
|
||||
#include <glib.h>
|
||||
|
||||
@@ -34,51 +34,59 @@
|
||||
|
||||
#include "readline.hpp"
|
||||
|
||||
bool stdio_getline(FILE *in, std::string & str)
|
||||
bool stdio_getline(FILE *in, std::string &str)
|
||||
{
|
||||
assert(in != nullptr);
|
||||
str.clear();
|
||||
int ch;
|
||||
while ((ch=fgetc(in)) != EOF && ch != '\n')
|
||||
while ((ch = fgetc(in)) != EOF && ch != '\n')
|
||||
str += ch;
|
||||
|
||||
return EOF != ch;
|
||||
}
|
||||
|
||||
#ifndef WITH_READLINE
|
||||
namespace {
|
||||
class dummy_readline : public IReadLine {
|
||||
public:
|
||||
bool read(const std::string &banner, std::string &line) override {
|
||||
namespace
|
||||
{
|
||||
class dummy_readline : public IReadLine
|
||||
{
|
||||
public:
|
||||
bool read(const std::string &banner, std::string &line) override
|
||||
{
|
||||
printf("%s", banner.c_str());
|
||||
return stdio_getline(stdin, line);
|
||||
}
|
||||
};
|
||||
};
|
||||
}
|
||||
#else
|
||||
|
||||
namespace {
|
||||
class real_readline : public IReadLine {
|
||||
namespace
|
||||
{
|
||||
class real_readline : public IReadLine
|
||||
{
|
||||
|
||||
public:
|
||||
real_readline() {
|
||||
public:
|
||||
real_readline()
|
||||
{
|
||||
rl_readline_name = "sdcv";
|
||||
using_history();
|
||||
const std::string histname = std::string(g_get_home_dir()) + G_DIR_SEPARATOR + ".sdcv_history";
|
||||
read_history(histname.c_str());
|
||||
}
|
||||
|
||||
~real_readline() {
|
||||
~real_readline()
|
||||
{
|
||||
const std::string histname = std::string(g_get_home_dir()) + G_DIR_SEPARATOR + ".sdcv_history";
|
||||
write_history(histname.c_str());
|
||||
const gchar *hist_size_str=g_getenv("SDCV_HISTSIZE");
|
||||
const gchar *hist_size_str = g_getenv("SDCV_HISTSIZE");
|
||||
int hist_size;
|
||||
if (!hist_size_str || sscanf(hist_size_str, "%d", &hist_size)<1)
|
||||
if (!hist_size_str || sscanf(hist_size_str, "%d", &hist_size) < 1)
|
||||
hist_size = 2000;
|
||||
history_truncate_file(histname.c_str(), hist_size);
|
||||
}
|
||||
|
||||
bool read(const std::string &banner, std::string& line) override {
|
||||
bool read(const std::string &banner, std::string &line) override
|
||||
{
|
||||
char *phrase = nullptr;
|
||||
phrase = readline(banner.c_str());
|
||||
if (phrase) {
|
||||
@@ -89,12 +97,13 @@ namespace {
|
||||
return false;
|
||||
}
|
||||
|
||||
void add_to_history(const std::string& phrase) override {
|
||||
void add_to_history(const std::string &phrase) override
|
||||
{
|
||||
add_history(phrase.c_str());
|
||||
}
|
||||
};
|
||||
};
|
||||
}
|
||||
#endif//WITH_READLINE
|
||||
#endif //WITH_READLINE
|
||||
|
||||
IReadLine *create_readline_object()
|
||||
{
|
||||
|
||||
@@ -2,11 +2,12 @@
|
||||
|
||||
#include <string>
|
||||
|
||||
class IReadLine {
|
||||
class IReadLine
|
||||
{
|
||||
public:
|
||||
virtual ~IReadLine() {}
|
||||
virtual bool read(const std::string &banner, std::string& line) = 0;
|
||||
virtual void add_to_history(const std::string&) {}
|
||||
virtual bool read(const std::string &banner, std::string &line) = 0;
|
||||
virtual void add_to_history(const std::string &) {}
|
||||
};
|
||||
|
||||
extern std::string sdcv_readline;
|
||||
|
||||
19
src/sdcv.cpp
19
src/sdcv.cpp
@@ -22,16 +22,16 @@
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <cerrno>
|
||||
#include <clocale>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
|
||||
#include <glib.h>
|
||||
#include <glib/gi18n.h>
|
||||
@@ -56,7 +56,7 @@ static void free_str_array(gchar **arr)
|
||||
}
|
||||
namespace glib
|
||||
{
|
||||
using StrArr = ResourceWrapper<gchar *, gchar *, free_str_array>;
|
||||
using StrArr = ResourceWrapper<gchar *, gchar *, free_str_array>;
|
||||
}
|
||||
|
||||
static void list_dicts(const std::list<std::string> &dicts_dir_list, bool use_json);
|
||||
@@ -146,7 +146,7 @@ int main(int argc, char *argv[]) try {
|
||||
homedir = g_get_home_dir();
|
||||
|
||||
std::list<std::string> dicts_dir_list;
|
||||
if(!only_data_dir)
|
||||
if (!only_data_dir)
|
||||
dicts_dir_list.push_back(std::string(homedir) + G_DIR_SEPARATOR + ".stardict" + G_DIR_SEPARATOR + "dic");
|
||||
dicts_dir_list.push_back(data_dir);
|
||||
if (show_list_dicts) {
|
||||
@@ -233,7 +233,7 @@ int main(int argc, char *argv[]) try {
|
||||
static void list_dicts(const std::list<std::string> &dicts_dir_list, bool use_json)
|
||||
{
|
||||
bool first_entry = true;
|
||||
if(!use_json)
|
||||
if (!use_json)
|
||||
printf(_("Dictionary's name Word count\n"));
|
||||
else
|
||||
fputc('[', stdout);
|
||||
@@ -243,9 +243,9 @@ static void list_dicts(const std::list<std::string> &dicts_dir_list, bool use_js
|
||||
DictInfo dict_info;
|
||||
if (dict_info.load_from_ifo_file(filename, false)) {
|
||||
const std::string bookname = utf8_to_locale_ign_err(dict_info.bookname);
|
||||
if(use_json) {
|
||||
if(first_entry) {
|
||||
first_entry=false;
|
||||
if (use_json) {
|
||||
if (first_entry) {
|
||||
first_entry = false;
|
||||
} else {
|
||||
fputc(',', stdout); // comma between entries
|
||||
}
|
||||
@@ -255,7 +255,6 @@ static void list_dicts(const std::list<std::string> &dicts_dir_list, bool use_js
|
||||
}
|
||||
}
|
||||
});
|
||||
if(use_json)
|
||||
if (use_json)
|
||||
fputs("]\n", stdout);
|
||||
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -2,17 +2,17 @@
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
|
||||
#include "dictziplib.hpp"
|
||||
|
||||
const int MAX_MATCH_ITEM_PER_LIB=100;
|
||||
const int MAX_FUZZY_DISTANCE= 3; // at most MAX_FUZZY_DISTANCE-1 differences allowed when find similar words
|
||||
const int MAX_MATCH_ITEM_PER_LIB = 100;
|
||||
const int MAX_FUZZY_DISTANCE = 3; // at most MAX_FUZZY_DISTANCE-1 differences allowed when find similar words
|
||||
|
||||
inline guint32 get_uint32(const gchar *addr)
|
||||
{
|
||||
@@ -26,38 +26,42 @@ inline void set_uint32(gchar *addr, guint32 val)
|
||||
memcpy(addr, &val, sizeof(guint32));
|
||||
}
|
||||
|
||||
|
||||
struct cacheItem {
|
||||
guint32 offset;
|
||||
gchar *data;
|
||||
//write code here to make it inline
|
||||
cacheItem() { data = nullptr;}
|
||||
cacheItem() { data = nullptr; }
|
||||
~cacheItem() { g_free(data); }
|
||||
};
|
||||
|
||||
const int WORDDATA_CACHE_NUM = 10;
|
||||
const int INVALID_INDEX=-100;
|
||||
const int INVALID_INDEX = -100;
|
||||
|
||||
class DictBase {
|
||||
class DictBase
|
||||
{
|
||||
public:
|
||||
DictBase() {}
|
||||
~DictBase() {
|
||||
~DictBase()
|
||||
{
|
||||
if (dictfile)
|
||||
fclose(dictfile);
|
||||
}
|
||||
DictBase(const DictBase&) = delete;
|
||||
DictBase& operator=(const DictBase&) = delete;
|
||||
gchar * GetWordData(guint32 idxitem_offset, guint32 idxitem_size);
|
||||
bool containSearchData() const {
|
||||
DictBase(const DictBase &) = delete;
|
||||
DictBase &operator=(const DictBase &) = delete;
|
||||
gchar *GetWordData(guint32 idxitem_offset, guint32 idxitem_size);
|
||||
bool containSearchData() const
|
||||
{
|
||||
if (sametypesequence.empty())
|
||||
return true;
|
||||
return sametypesequence.find_first_of("mlgxty") != std::string::npos;
|
||||
}
|
||||
bool SearchData(std::vector<std::string> &SearchWords, guint32 idxitem_offset, guint32 idxitem_size, gchar *origin_data);
|
||||
|
||||
protected:
|
||||
std::string sametypesequence;
|
||||
FILE *dictfile = nullptr;
|
||||
std::unique_ptr<DictData> dictdzfile;
|
||||
|
||||
private:
|
||||
cacheItem cache[WORDDATA_CACHE_NUM];
|
||||
gint cache_cur = 0;
|
||||
@@ -78,47 +82,53 @@ struct DictInfo {
|
||||
guint32 syn_file_size;
|
||||
std::string sametypesequence;
|
||||
|
||||
bool load_from_ifo_file(const std::string& ifofilename, bool istreedict);
|
||||
bool load_from_ifo_file(const std::string &ifofilename, bool istreedict);
|
||||
};
|
||||
|
||||
class IIndexFile {
|
||||
class IIndexFile
|
||||
{
|
||||
public:
|
||||
guint32 wordentry_offset;
|
||||
guint32 wordentry_size;
|
||||
|
||||
virtual ~IIndexFile() {}
|
||||
virtual bool load(const std::string& url, gulong wc, gulong fsize, bool verbose) = 0;
|
||||
virtual bool load(const std::string &url, gulong wc, gulong fsize, bool verbose) = 0;
|
||||
virtual const gchar *get_key(glong idx) = 0;
|
||||
virtual void get_data(glong idx) = 0;
|
||||
virtual const gchar *get_key_and_data(glong idx) = 0;
|
||||
virtual bool lookup(const char *str, glong &idx) = 0;
|
||||
};
|
||||
|
||||
class SynFile {
|
||||
class SynFile
|
||||
{
|
||||
public:
|
||||
bool load(const std::string& url, gulong wc);
|
||||
bool load(const std::string &url, gulong wc);
|
||||
bool lookup(const char *str, glong &idx);
|
||||
|
||||
private:
|
||||
std::map<std::string, gulong> synonyms;
|
||||
};
|
||||
|
||||
class Dict : public DictBase {
|
||||
class Dict : public DictBase
|
||||
{
|
||||
public:
|
||||
Dict() {}
|
||||
Dict(const Dict&) = delete;
|
||||
Dict& operator=(const Dict&) = delete;
|
||||
bool load(const std::string& ifofilename, bool verbose);
|
||||
Dict(const Dict &) = delete;
|
||||
Dict &operator=(const Dict &) = delete;
|
||||
bool load(const std::string &ifofilename, bool verbose);
|
||||
|
||||
gulong narticles() const { return wordcount; }
|
||||
const std::string& dict_name() const { return bookname; }
|
||||
const std::string& ifofilename() const { return ifo_file_name; }
|
||||
const std::string &dict_name() const { return bookname; }
|
||||
const std::string &ifofilename() const { return ifo_file_name; }
|
||||
|
||||
const gchar *get_key(glong index) { return idx_file->get_key(index); }
|
||||
gchar *get_data(glong index) {
|
||||
gchar *get_data(glong index)
|
||||
{
|
||||
idx_file->get_data(index);
|
||||
return DictBase::GetWordData(idx_file->wordentry_offset, idx_file->wordentry_size);
|
||||
}
|
||||
void get_key_and_data(glong index, const gchar **key, guint32 *offset, guint32 *size) {
|
||||
void get_key_and_data(glong index, const gchar **key, guint32 *offset, guint32 *size)
|
||||
{
|
||||
*key = idx_file->get_key_and_data(index);
|
||||
*offset = idx_file->wordentry_offset;
|
||||
*size = idx_file->wordentry_size;
|
||||
@@ -126,6 +136,7 @@ public:
|
||||
bool Lookup(const char *str, glong &idx);
|
||||
|
||||
bool LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen);
|
||||
|
||||
private:
|
||||
std::string ifo_file_name;
|
||||
gulong wordcount;
|
||||
@@ -135,33 +146,37 @@ private:
|
||||
std::unique_ptr<IIndexFile> idx_file;
|
||||
std::unique_ptr<SynFile> syn_file;
|
||||
|
||||
bool load_ifofile(const std::string& ifofilename, gulong &idxfilesize);
|
||||
bool load_ifofile(const std::string &ifofilename, gulong &idxfilesize);
|
||||
};
|
||||
|
||||
class Libs {
|
||||
class Libs
|
||||
{
|
||||
public:
|
||||
Libs(std::function<void(void)> f = std::function<void(void)>()) {
|
||||
Libs(std::function<void(void)> f = std::function<void(void)>())
|
||||
{
|
||||
progress_func = f;
|
||||
iMaxFuzzyDistance = MAX_FUZZY_DISTANCE; //need to read from cfg.
|
||||
}
|
||||
void setVerbose(bool verbose) { verbose_ = verbose; }
|
||||
void setFuzzy(bool fuzzy) { fuzzy_ = fuzzy; }
|
||||
~Libs();
|
||||
Libs(const Libs&) = delete;
|
||||
Libs& operator=(const Libs&) = delete;
|
||||
Libs(const Libs &) = delete;
|
||||
Libs &operator=(const Libs &) = delete;
|
||||
|
||||
void load_dict(const std::string& url);
|
||||
void load(const std::list<std::string>& dicts_dirs,
|
||||
const std::list<std::string>& order_list,
|
||||
const std::list<std::string>& disable_list);
|
||||
void load_dict(const std::string &url);
|
||||
void load(const std::list<std::string> &dicts_dirs,
|
||||
const std::list<std::string> &order_list,
|
||||
const std::list<std::string> &disable_list);
|
||||
glong narticles(int idict) const { return oLib[idict]->narticles(); }
|
||||
const std::string& dict_name(int idict) const { return oLib[idict]->dict_name(); }
|
||||
const std::string &dict_name(int idict) const { return oLib[idict]->dict_name(); }
|
||||
gint ndicts() const { return oLib.size(); }
|
||||
|
||||
const gchar *poGetWord(glong iIndex, int iLib) {
|
||||
const gchar *poGetWord(glong iIndex, int iLib)
|
||||
{
|
||||
return oLib[iLib]->get_key(iIndex);
|
||||
}
|
||||
gchar * poGetWordData(glong iIndex,int iLib) {
|
||||
gchar *poGetWordData(glong iIndex, int iLib)
|
||||
{
|
||||
if (iIndex == INVALID_INDEX)
|
||||
return nullptr;
|
||||
return oLib[iLib]->get_data(iIndex);
|
||||
@@ -169,18 +184,20 @@ public:
|
||||
const gchar *poGetCurrentWord(glong *iCurrent);
|
||||
const gchar *poGetNextWord(const gchar *word, glong *iCurrent);
|
||||
const gchar *poGetPreWord(glong *iCurrent);
|
||||
bool LookupWord(const gchar* sWord, glong& iWordIndex, int iLib) {
|
||||
bool LookupWord(const gchar *sWord, glong &iWordIndex, int iLib)
|
||||
{
|
||||
return oLib[iLib]->Lookup(sWord, iWordIndex);
|
||||
}
|
||||
bool LookupSimilarWord(const gchar* sWord, glong & iWordIndex, int iLib);
|
||||
bool SimpleLookupWord(const gchar* sWord, glong & iWordIndex, int iLib);
|
||||
|
||||
bool LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib);
|
||||
bool SimpleLookupWord(const gchar *sWord, glong &iWordIndex, int iLib);
|
||||
|
||||
bool LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_size);
|
||||
gint LookupWithRule(const gchar *sWord, gchar *reslist[]);
|
||||
bool LookupData(const gchar *sWord, std::vector<gchar *> *reslist);
|
||||
|
||||
protected:
|
||||
bool fuzzy_;
|
||||
|
||||
private:
|
||||
std::vector<Dict *> oLib; // word Libs.
|
||||
int iMaxFuzzyDistance;
|
||||
@@ -188,10 +205,11 @@ private:
|
||||
bool verbose_;
|
||||
};
|
||||
|
||||
|
||||
enum query_t {
|
||||
qtSIMPLE, qtREGEXP, qtFUZZY, qtDATA
|
||||
qtSIMPLE,
|
||||
qtREGEXP,
|
||||
qtFUZZY,
|
||||
qtDATA
|
||||
};
|
||||
|
||||
extern query_t analyze_query(const char *s, std::string& res);
|
||||
|
||||
extern query_t analyze_query(const char *s, std::string &res);
|
||||
|
||||
@@ -19,20 +19,20 @@
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <glib.h>
|
||||
#include <glib/gi18n.h>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <algorithm>
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
|
||||
#include "utils.hpp"
|
||||
|
||||
std::string utf8_to_locale_ign_err(const std::string& utf8_str)
|
||||
std::string utf8_to_locale_ign_err(const std::string &utf8_str)
|
||||
{
|
||||
std::string res;
|
||||
|
||||
@@ -44,7 +44,7 @@ std::string utf8_to_locale_ign_err(const std::string& utf8_str)
|
||||
glib::Error err;
|
||||
glib::CharStr tmp(g_convert_with_fallback(utf8_str.c_str(), -1, charset, "UTF-8", nullptr,
|
||||
&bytes_read, &bytes_written, get_addr(err)));
|
||||
if (nullptr == get_impl(tmp)){
|
||||
if (nullptr == get_impl(tmp)) {
|
||||
fprintf(stderr, _("Can not convert %s to current locale.\n"), utf8_str.c_str());
|
||||
fprintf(stderr, "%s\n", err->message);
|
||||
exit(EXIT_FAILURE);
|
||||
@@ -55,24 +55,23 @@ std::string utf8_to_locale_ign_err(const std::string& utf8_str)
|
||||
return res;
|
||||
}
|
||||
|
||||
static void __for_each_file(const std::string& dirname, const std::string& suff,
|
||||
const std::list<std::string>& order_list, const std::list<std::string>& disable_list,
|
||||
const std::function<void (const std::string&, bool)>& f)
|
||||
static void __for_each_file(const std::string &dirname, const std::string &suff,
|
||||
const std::list<std::string> &order_list, const std::list<std::string> &disable_list,
|
||||
const std::function<void(const std::string &, bool)> &f)
|
||||
{
|
||||
GDir *dir = g_dir_open(dirname.c_str(), 0, nullptr);
|
||||
if (dir) {
|
||||
const gchar *filename;
|
||||
|
||||
while ((filename = g_dir_read_name(dir))!=nullptr) {
|
||||
const std::string fullfilename(dirname+G_DIR_SEPARATOR_S+filename);
|
||||
while ((filename = g_dir_read_name(dir)) != nullptr) {
|
||||
const std::string fullfilename(dirname + G_DIR_SEPARATOR_S + filename);
|
||||
if (g_file_test(fullfilename.c_str(), G_FILE_TEST_IS_DIR))
|
||||
__for_each_file(fullfilename, suff, order_list, disable_list, f);
|
||||
else if (g_str_has_suffix(filename, suff.c_str()) &&
|
||||
std::find(order_list.begin(), order_list.end(),
|
||||
fullfilename)==order_list.end()) {
|
||||
else if (g_str_has_suffix(filename, suff.c_str()) && std::find(order_list.begin(), order_list.end(), fullfilename) == order_list.end()) {
|
||||
const bool disable = std::find(disable_list.begin(),
|
||||
disable_list.end(),
|
||||
fullfilename)!=disable_list.end();
|
||||
fullfilename)
|
||||
!= disable_list.end();
|
||||
f(fullfilename, disable);
|
||||
}
|
||||
}
|
||||
@@ -80,31 +79,45 @@ static void __for_each_file(const std::string& dirname, const std::string& suff,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void for_each_file(const std::list<std::string>& dirs_list, const std::string& suff,
|
||||
const std::list<std::string>& order_list, const std::list<std::string>& disable_list,
|
||||
const std::function<void (const std::string&, bool)>& f)
|
||||
void for_each_file(const std::list<std::string> &dirs_list, const std::string &suff,
|
||||
const std::list<std::string> &order_list, const std::list<std::string> &disable_list,
|
||||
const std::function<void(const std::string &, bool)> &f)
|
||||
{
|
||||
for (const std::string & item : order_list) {
|
||||
for (const std::string &item : order_list) {
|
||||
const bool disable = std::find(disable_list.begin(), disable_list.end(), item) != disable_list.end();
|
||||
f(item, disable);
|
||||
}
|
||||
for (const std::string& item : dirs_list)
|
||||
for (const std::string &item : dirs_list)
|
||||
__for_each_file(item, suff, order_list, disable_list, f);
|
||||
}
|
||||
|
||||
// based on https://stackoverflow.com/questions/7724448/simple-json-string-escape-for-c/33799784#33799784
|
||||
std::string json_escape_string(const std::string &s) {
|
||||
std::string json_escape_string(const std::string &s)
|
||||
{
|
||||
std::ostringstream o;
|
||||
for (auto c = s.cbegin(); c != s.cend(); c++) {
|
||||
switch (*c) {
|
||||
case '"': o << "\\\""; break;
|
||||
case '\\': o << "\\\\"; break;
|
||||
case '\b': o << "\\b"; break;
|
||||
case '\f': o << "\\f"; break;
|
||||
case '\n': o << "\\n"; break;
|
||||
case '\r': o << "\\r"; break;
|
||||
case '\t': o << "\\t"; break;
|
||||
case '"':
|
||||
o << "\\\"";
|
||||
break;
|
||||
case '\\':
|
||||
o << "\\\\";
|
||||
break;
|
||||
case '\b':
|
||||
o << "\\b";
|
||||
break;
|
||||
case '\f':
|
||||
o << "\\f";
|
||||
break;
|
||||
case '\n':
|
||||
o << "\\n";
|
||||
break;
|
||||
case '\r':
|
||||
o << "\\r";
|
||||
break;
|
||||
case '\t':
|
||||
o << "\\t";
|
||||
break;
|
||||
default:
|
||||
if ('\x00' <= *c && *c <= '\x1f') {
|
||||
o << "\\u"
|
||||
|
||||
@@ -1,63 +1,78 @@
|
||||
#pragma once
|
||||
|
||||
#include <glib.h>
|
||||
#include <cstddef>
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
#include <list>
|
||||
#include <cstddef>
|
||||
#include <functional>
|
||||
#include <glib.h>
|
||||
#include <list>
|
||||
#include <string>
|
||||
|
||||
template <typename T, typename unref_res_t, void (*unref_res)(unref_res_t *)>
|
||||
class ResourceWrapper {
|
||||
class ResourceWrapper
|
||||
{
|
||||
public:
|
||||
ResourceWrapper(T *p = nullptr) : p_(p) {}
|
||||
ResourceWrapper(T *p = nullptr)
|
||||
: p_(p)
|
||||
{
|
||||
}
|
||||
~ResourceWrapper() { free_resource(); }
|
||||
ResourceWrapper(const ResourceWrapper&) = delete;
|
||||
ResourceWrapper& operator=(const ResourceWrapper&) = delete;
|
||||
ResourceWrapper(const ResourceWrapper &) = delete;
|
||||
ResourceWrapper &operator=(const ResourceWrapper &) = delete;
|
||||
T *operator->() const { return p_; }
|
||||
bool operator!() const { return p_ == nullptr; }
|
||||
const T& operator[](size_t idx) const {
|
||||
const T &operator[](size_t idx) const
|
||||
{
|
||||
assert(p_ != nullptr);
|
||||
return p_[idx];
|
||||
}
|
||||
|
||||
void reset(T *newp) {
|
||||
void reset(T *newp)
|
||||
{
|
||||
if (p_ != newp) {
|
||||
free_resource();
|
||||
p_ = newp;
|
||||
}
|
||||
}
|
||||
|
||||
friend inline bool operator==(const ResourceWrapper& lhs, std::nullptr_t) noexcept {
|
||||
friend inline bool operator==(const ResourceWrapper &lhs, std::nullptr_t) noexcept
|
||||
{
|
||||
return !lhs.p_;
|
||||
}
|
||||
|
||||
friend inline bool operator!=(const ResourceWrapper& lhs, std::nullptr_t) noexcept {
|
||||
friend inline bool operator!=(const ResourceWrapper &lhs, std::nullptr_t) noexcept
|
||||
{
|
||||
return !!lhs.p_;
|
||||
}
|
||||
|
||||
friend inline T *get_impl(const ResourceWrapper& rw) {
|
||||
friend inline T *get_impl(const ResourceWrapper &rw)
|
||||
{
|
||||
return rw.p_;
|
||||
}
|
||||
|
||||
friend inline T **get_addr(ResourceWrapper& rw) {
|
||||
friend inline T **get_addr(ResourceWrapper &rw)
|
||||
{
|
||||
return &rw.p_;
|
||||
}
|
||||
|
||||
private:
|
||||
T *p_;
|
||||
|
||||
void free_resource() { if (p_) unref_res(p_); }
|
||||
void free_resource()
|
||||
{
|
||||
if (p_)
|
||||
unref_res(p_);
|
||||
}
|
||||
};
|
||||
|
||||
namespace glib {
|
||||
typedef ResourceWrapper<gchar, void, g_free> CharStr;
|
||||
typedef ResourceWrapper<GError, GError, g_error_free> Error;
|
||||
namespace glib
|
||||
{
|
||||
typedef ResourceWrapper<gchar, void, g_free> CharStr;
|
||||
typedef ResourceWrapper<GError, GError, g_error_free> Error;
|
||||
}
|
||||
|
||||
extern std::string utf8_to_locale_ign_err(const std::string& utf8_str);
|
||||
extern std::string utf8_to_locale_ign_err(const std::string &utf8_str);
|
||||
|
||||
extern void for_each_file(const std::list<std::string>& dirs_list, const std::string& suff,
|
||||
const std::list<std::string>& order_list, const std::list<std::string>& disable_list,
|
||||
const std::function<void (const std::string&, bool)>& f);
|
||||
extern void for_each_file(const std::list<std::string> &dirs_list, const std::string &suff,
|
||||
const std::list<std::string> &order_list, const std::list<std::string> &disable_list,
|
||||
const std::function<void(const std::string &, bool)> &f);
|
||||
extern std::string json_escape_string(const std::string &str);
|
||||
|
||||
Reference in New Issue
Block a user