mirror of
https://github.com/Dushistov/sdcv.git
synced 2025-12-16 18:01:54 +00:00
refactoring: apply clang-format rules
This commit is contained in:
107
src/distance.cpp
107
src/distance.cpp
@@ -33,7 +33,6 @@ The Levenshtein distance algorithm has been used in:
|
||||
* Plagiarism detection
|
||||
*/
|
||||
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
@@ -56,43 +55,39 @@ Enhanced Dynamic Programming ASM Algorithm"
|
||||
static inline int minimum(const int a, const int b, const int c)
|
||||
{
|
||||
int min = a;
|
||||
if ( b < min )
|
||||
if (b < min)
|
||||
min = b;
|
||||
if ( c < min )
|
||||
if (c < min)
|
||||
min = c;
|
||||
return min;
|
||||
}
|
||||
|
||||
int EditDistance::CalEditDistance(const gunichar *s,const gunichar *t,const int limit)
|
||||
int EditDistance::CalEditDistance(const gunichar *s, const gunichar *t, const int limit)
|
||||
/*Compute levenshtein distance between s and t, this is using QUICK algorithm*/
|
||||
{
|
||||
int n=0,m=0,iLenDif,k,i,j,cost;
|
||||
int n = 0, m = 0, iLenDif, k, i, j, cost;
|
||||
// Remove leftmost matching portion of strings
|
||||
while ( *s && (*s==*t) )
|
||||
{
|
||||
while (*s && (*s == *t)) {
|
||||
s++;
|
||||
t++;
|
||||
t++;
|
||||
}
|
||||
|
||||
while (s[n])
|
||||
{
|
||||
n++;
|
||||
}
|
||||
while (t[m])
|
||||
{
|
||||
m++;
|
||||
}
|
||||
|
||||
// Remove rightmost matching portion of strings by decrement n and m.
|
||||
while ( n && m && (*(s+n-1)==*(t+m-1)) )
|
||||
{
|
||||
n--;m--;
|
||||
while (s[n]) {
|
||||
n++;
|
||||
}
|
||||
if ( m==0 || n==0 || d==nullptr )
|
||||
return (m+n);
|
||||
if ( m < n )
|
||||
{
|
||||
const gunichar * temp = s;
|
||||
while (t[m]) {
|
||||
m++;
|
||||
}
|
||||
|
||||
// Remove rightmost matching portion of strings by decrement n and m.
|
||||
while (n && m && (*(s + n - 1) == *(t + m - 1))) {
|
||||
n--;
|
||||
m--;
|
||||
}
|
||||
if (m == 0 || n == 0 || d == nullptr)
|
||||
return (m + n);
|
||||
if (m < n) {
|
||||
const gunichar *temp = s;
|
||||
int itemp = n;
|
||||
s = t;
|
||||
t = temp;
|
||||
@@ -100,55 +95,51 @@ int EditDistance::CalEditDistance(const gunichar *s,const gunichar *t,const int
|
||||
m = itemp;
|
||||
}
|
||||
iLenDif = m - n;
|
||||
if ( iLenDif >= limit )
|
||||
if (iLenDif >= limit)
|
||||
return iLenDif;
|
||||
// step 1
|
||||
n++;m++;
|
||||
// d=(int*)malloc(sizeof(int)*m*n);
|
||||
if ( m*n > currentelements )
|
||||
{
|
||||
currentelements = m*n*2; // double the request
|
||||
d = static_cast<int*>(realloc(d, sizeof(int) * currentelements));
|
||||
if ( nullptr == d )
|
||||
return (m+n);
|
||||
n++;
|
||||
m++;
|
||||
// d=(int*)malloc(sizeof(int)*m*n);
|
||||
if (m * n > currentelements) {
|
||||
currentelements = m * n * 2; // double the request
|
||||
d = static_cast<int *>(realloc(d, sizeof(int) * currentelements));
|
||||
if (nullptr == d)
|
||||
return (m + n);
|
||||
}
|
||||
// step 2, init matrix
|
||||
for (k=0;k<n;k++)
|
||||
for (k = 0; k < n; k++)
|
||||
d[k] = k;
|
||||
for (k=1;k<m;k++)
|
||||
d[k*n] = k;
|
||||
for (k = 1; k < m; k++)
|
||||
d[k * n] = k;
|
||||
// step 3
|
||||
for (i=1;i<n;i++)
|
||||
{
|
||||
for (i = 1; i < n; i++) {
|
||||
// first calculate column, d(i,j)
|
||||
for ( j=1;j<iLenDif+i;j++ )
|
||||
{
|
||||
cost = s[i-1]==t[j-1]?0:1;
|
||||
d[j*n+i] = minimum(d[(j-1)*n+i]+1,d[j*n+i-1]+1,d[(j-1)*n+i-1]+cost);
|
||||
for (j = 1; j < iLenDif + i; j++) {
|
||||
cost = s[i - 1] == t[j - 1] ? 0 : 1;
|
||||
d[j * n + i] = minimum(d[(j - 1) * n + i] + 1, d[j * n + i - 1] + 1, d[(j - 1) * n + i - 1] + cost);
|
||||
#ifdef COVER_TRANSPOSITION
|
||||
if ( i>=2 && j>=2 && (d[j*n+i]-d[(j-2)*n+i-2]==2)
|
||||
&& (s[i-2]==t[j-1]) && (s[i-1]==t[j-2]) )
|
||||
d[j*n+i]--;
|
||||
if (i >= 2 && j >= 2 && (d[j * n + i] - d[(j - 2) * n + i - 2] == 2)
|
||||
&& (s[i - 2] == t[j - 1]) && (s[i - 1] == t[j - 2]))
|
||||
d[j * n + i]--;
|
||||
#endif
|
||||
}
|
||||
// second calculate row, d(k,j)
|
||||
// now j==iLenDif+i;
|
||||
for ( k=1;k<=i;k++ )
|
||||
{
|
||||
cost = s[k-1]==t[j-1]?0:1;
|
||||
d[j*n+k] = minimum(d[(j-1)*n+k]+1,d[j*n+k-1]+1,d[(j-1)*n+k-1]+cost);
|
||||
for (k = 1; k <= i; k++) {
|
||||
cost = s[k - 1] == t[j - 1] ? 0 : 1;
|
||||
d[j * n + k] = minimum(d[(j - 1) * n + k] + 1, d[j * n + k - 1] + 1, d[(j - 1) * n + k - 1] + cost);
|
||||
#ifdef COVER_TRANSPOSITION
|
||||
if ( k>=2 && j>=2 && (d[j*n+k]-d[(j-2)*n+k-2]==2)
|
||||
&& (s[k-2]==t[j-1]) && (s[k-1]==t[j-2]) )
|
||||
d[j*n+k]--;
|
||||
if (k >= 2 && j >= 2 && (d[j * n + k] - d[(j - 2) * n + k - 2] == 2)
|
||||
&& (s[k - 2] == t[j - 1]) && (s[k - 1] == t[j - 2]))
|
||||
d[j * n + k]--;
|
||||
#endif
|
||||
}
|
||||
// test if d(i,j) limit gets equal or exceed
|
||||
if ( d[j*n+i] >= limit )
|
||||
{
|
||||
return d[j*n+i];
|
||||
if (d[j * n + i] >= limit) {
|
||||
return d[j * n + i];
|
||||
}
|
||||
}
|
||||
// d(n-1,m-1)
|
||||
return d[n*m-1];
|
||||
return d[n * m - 1];
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user