0.4.2 release

This commit is contained in:
Evgeniy Dushistov
2007-08-14 18:18:20 +00:00
commit 3f241bb6bb
118 changed files with 29884 additions and 0 deletions

10
src/lib/Makefile.am Normal file
View File

@@ -0,0 +1,10 @@
noinst_LIBRARIES = libstardict.a
libstardict_a_SOURCES = \
lib.cpp lib.h \
dictziplib.cpp dictziplib.hpp \
distance.cpp distance.h \
mapfile.hpp file.hpp
INCLUDES = @LIB_STARDICT_CFLAGS@

428
src/lib/Makefile.in Normal file
View File

@@ -0,0 +1,428 @@
# Makefile.in generated by automake 1.9.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
# 2003, 2004, 2005 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
srcdir = @srcdir@
top_srcdir = @top_srcdir@
VPATH = @srcdir@
pkgdatadir = $(datadir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
top_builddir = ../..
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
INSTALL = @INSTALL@
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
subdir = src/lib
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/gettext.m4 \
$(top_srcdir)/m4/iconv.m4 $(top_srcdir)/m4/lib-ld.m4 \
$(top_srcdir)/m4/lib-link.m4 $(top_srcdir)/m4/lib-prefix.m4 \
$(top_srcdir)/m4/nls.m4 $(top_srcdir)/m4/po.m4 \
$(top_srcdir)/m4/progtest.m4 $(top_srcdir)/m4/readline.m4 \
$(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES =
LIBRARIES = $(noinst_LIBRARIES)
AR = ar
ARFLAGS = cru
libstardict_a_AR = $(AR) $(ARFLAGS)
libstardict_a_LIBADD =
am_libstardict_a_OBJECTS = lib.$(OBJEXT) dictziplib.$(OBJEXT) \
distance.$(OBJEXT)
libstardict_a_OBJECTS = $(am_libstardict_a_OBJECTS)
DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__depfiles_maybe = depfiles
CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
CXXLD = $(CXX)
CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \
-o $@
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
CCLD = $(CC)
LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
SOURCES = $(libstardict_a_SOURCES)
DIST_SOURCES = $(libstardict_a_SOURCES)
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
AMDEP_FALSE = @AMDEP_FALSE@
AMDEP_TRUE = @AMDEP_TRUE@
AMTAR = @AMTAR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CXX = @CXX@
CXXCPP = @CXXCPP@
CXXDEPMODE = @CXXDEPMODE@
CXXFLAGS = @CXXFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
GETTEXT_PACKAGE = @GETTEXT_PACKAGE@
GMSGFMT = @GMSGFMT@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
INTLLIBS = @INTLLIBS@
LDFLAGS = @LDFLAGS@
LIBICONV = @LIBICONV@
LIBINTL = @LIBINTL@
LIBOBJS = @LIBOBJS@
LIBREADLINE = @LIBREADLINE@
LIBS = @LIBS@
LIB_STARDICT_CFLAGS = @LIB_STARDICT_CFLAGS@
LIB_STARDICT_LIBS = @LIB_STARDICT_LIBS@
LTLIBICONV = @LTLIBICONV@
LTLIBINTL = @LTLIBINTL@
LTLIBOBJS = @LTLIBOBJS@
MAINT = @MAINT@
MAINTAINER_MODE_FALSE = @MAINTAINER_MODE_FALSE@
MAINTAINER_MODE_TRUE = @MAINTAINER_MODE_TRUE@
MAKEINFO = @MAKEINFO@
MKINSTALLDIRS = @MKINSTALLDIRS@
MSGFMT = @MSGFMT@
MSGMERGE = @MSGMERGE@
OBJEXT = @OBJEXT@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
PKG_CONFIG = @PKG_CONFIG@
POSUB = @POSUB@
RANLIB = @RANLIB@
SDCV_CFLAGS = @SDCV_CFLAGS@
SDCV_LIBS = @SDCV_LIBS@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STRIP = @STRIP@
USE_NLS = @USE_NLS@
VERSION = @VERSION@
XGETTEXT = @XGETTEXT@
ac_ct_CC = @ac_ct_CC@
ac_ct_CXX = @ac_ct_CXX@
ac_ct_RANLIB = @ac_ct_RANLIB@
ac_ct_STRIP = @ac_ct_STRIP@
ac_pt_PKG_CONFIG = @ac_pt_PKG_CONFIG@
am__fastdepCC_FALSE = @am__fastdepCC_FALSE@
am__fastdepCC_TRUE = @am__fastdepCC_TRUE@
am__fastdepCXX_FALSE = @am__fastdepCXX_FALSE@
am__fastdepCXX_TRUE = @am__fastdepCXX_TRUE@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
datadir = @datadir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
prefix = @prefix@
program_transform_name = @program_transform_name@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
noinst_LIBRARIES = libstardict.a
libstardict_a_SOURCES = \
lib.cpp lib.h \
dictziplib.cpp dictziplib.hpp \
distance.cpp distance.h \
mapfile.hpp file.hpp
INCLUDES = @LIB_STARDICT_CFLAGS@
all: all-am
.SUFFIXES:
.SUFFIXES: .cpp .o .obj
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
&& exit 0; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/lib/Makefile'; \
cd $(top_srcdir) && \
$(AUTOMAKE) --gnu src/lib/Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
clean-noinstLIBRARIES:
-test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES)
libstardict.a: $(libstardict_a_OBJECTS) $(libstardict_a_DEPENDENCIES)
-rm -f libstardict.a
$(libstardict_a_AR) libstardict.a $(libstardict_a_OBJECTS) $(libstardict_a_LIBADD)
$(RANLIB) libstardict.a
mostlyclean-compile:
-rm -f *.$(OBJEXT)
distclean-compile:
-rm -f *.tab.c
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dictziplib.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/distance.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lib.Po@am__quote@
.cpp.o:
@am__fastdepCXX_TRUE@ if $(CXXCOMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ $<; \
@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $<
.cpp.obj:
@am__fastdepCXX_TRUE@ if $(CXXCOMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ `$(CYGPATH_W) '$<'`; \
@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
uninstall-info-am:
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) ' { files[$$0] = 1; } \
END { for (i in files) print i; }'`; \
mkid -fID $$unique
tags: TAGS
TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
tags=; \
here=`pwd`; \
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) ' { files[$$0] = 1; } \
END { for (i in files) print i; }'`; \
if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$tags $$unique; \
fi
ctags: CTAGS
CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
tags=; \
here=`pwd`; \
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) ' { files[$$0] = 1; } \
END { for (i in files) print i; }'`; \
test -z "$(CTAGS_ARGS)$$tags$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$tags $$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& cd $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) $$here
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \
list='$(DISTFILES)'; for file in $$list; do \
case $$file in \
$(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \
$(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \
esac; \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
if test "$$dir" != "$$file" && test "$$dir" != "."; then \
dir="/$$dir"; \
$(mkdir_p) "$(distdir)$$dir"; \
else \
dir=''; \
fi; \
if test -d $$d/$$file; then \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
fi; \
cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
else \
test -f $(distdir)/$$file \
|| cp -p $$d/$$file $(distdir)/$$file \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-am
all-am: Makefile $(LIBRARIES)
installdirs:
install: install-am
install-exec: install-exec-am
install-data: install-data-am
uninstall: uninstall-am
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-am
install-strip:
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
`test -z '$(STRIP)' || \
echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-am
clean-am: clean-generic clean-noinstLIBRARIES mostlyclean-am
distclean: distclean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
distclean-tags
dvi: dvi-am
dvi-am:
html: html-am
info: info-am
info-am:
install-data-am:
install-exec-am:
install-info: install-info-am
install-man:
installcheck-am:
maintainer-clean: maintainer-clean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-am
mostlyclean-am: mostlyclean-compile mostlyclean-generic
pdf: pdf-am
pdf-am:
ps: ps-am
ps-am:
uninstall-am: uninstall-info-am
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-noinstLIBRARIES ctags distclean distclean-compile \
distclean-generic distclean-tags distdir dvi dvi-am html \
html-am info info-am install install-am install-data \
install-data-am install-exec install-exec-am install-info \
install-info-am install-man install-strip installcheck \
installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
mostlyclean-generic pdf pdf-am ps ps-am tags uninstall \
uninstall-am uninstall-info-am
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

484
src/lib/dictziplib.cpp Normal file
View File

@@ -0,0 +1,484 @@
/* dictziplib.c --
* http://stardict.sourceforge.net
* Copyright (C) 2003-2003 Hu Zheng <huzheng_001@163.com>
* This file is a modify version of dictd-1.9.7's data.c
*
* data.c --
* Created: Tue Jul 16 12:45:41 1996 by faith@dict.org
* Revised: Sat Mar 30 10:46:06 2002 by faith@dict.org
* Copyright 1996, 1997, 1998, 2000, 2002 Rickard E. Faith (faith@dict.org)
*
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Library General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
//#define HAVE_MMAP //it will defined in config.h. this can be done by configure.in with a AC_FUNC_MMAP.
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <unistd.h>
#include <limits.h>
#include <fcntl.h>
#include <sys/stat.h>
#include "dictziplib.hpp"
#define USE_CACHE 1
#define BUFFERSIZE 10240
/*
* Output buffer must be greater than or
* equal to 110% of input buffer size, plus
* 12 bytes.
*/
#define OUT_BUFFER_SIZE 0xffffL
#define IN_BUFFER_SIZE ((unsigned long)((double)(OUT_BUFFER_SIZE - 12) * 0.89))
/* For gzip-compatible header, as defined in RFC 1952 */
/* Magic for GZIP (rfc1952) */
#define GZ_MAGIC1 0x1f /* First magic byte */
#define GZ_MAGIC2 0x8b /* Second magic byte */
/* FLaGs (bitmapped), from rfc1952 */
#define GZ_FTEXT 0x01 /* Set for ASCII text */
#define GZ_FHCRC 0x02 /* Header CRC16 */
#define GZ_FEXTRA 0x04 /* Optional field (random access index) */
#define GZ_FNAME 0x08 /* Original name */
#define GZ_COMMENT 0x10 /* Zero-terminated, human-readable comment */
#define GZ_MAX 2 /* Maximum compression */
#define GZ_FAST 4 /* Fasted compression */
/* These are from rfc1952 */
#define GZ_OS_FAT 0 /* FAT filesystem (MS-DOS, OS/2, NT/Win32) */
#define GZ_OS_AMIGA 1 /* Amiga */
#define GZ_OS_VMS 2 /* VMS (or OpenVMS) */
#define GZ_OS_UNIX 3 /* Unix */
#define GZ_OS_VMCMS 4 /* VM/CMS */
#define GZ_OS_ATARI 5 /* Atari TOS */
#define GZ_OS_HPFS 6 /* HPFS filesystem (OS/2, NT) */
#define GZ_OS_MAC 7 /* Macintosh */
#define GZ_OS_Z 8 /* Z-System */
#define GZ_OS_CPM 9 /* CP/M */
#define GZ_OS_TOPS20 10 /* TOPS-20 */
#define GZ_OS_NTFS 11 /* NTFS filesystem (NT) */
#define GZ_OS_QDOS 12 /* QDOS */
#define GZ_OS_ACORN 13 /* Acorn RISCOS */
#define GZ_OS_UNKNOWN 255 /* unknown */
#define GZ_RND_S1 'R' /* First magic for random access format */
#define GZ_RND_S2 'A' /* Second magic for random access format */
#define GZ_ID1 0 /* GZ_MAGIC1 */
#define GZ_ID2 1 /* GZ_MAGIC2 */
#define GZ_CM 2 /* Compression Method (Z_DEFALTED) */
#define GZ_FLG 3 /* FLaGs (see above) */
#define GZ_MTIME 4 /* Modification TIME */
#define GZ_XFL 8 /* eXtra FLags (GZ_MAX or GZ_FAST) */
#define GZ_OS 9 /* Operating System */
#define GZ_XLEN 10 /* eXtra LENgth (16bit) */
#define GZ_FEXTRA_START 12 /* Start of extra fields */
#define GZ_SI1 12 /* Subfield ID1 */
#define GZ_SI2 13 /* Subfield ID2 */
#define GZ_SUBLEN 14 /* Subfield length (16bit) */
#define GZ_VERSION 16 /* Version for subfield format */
#define GZ_CHUNKLEN 18 /* Chunk length (16bit) */
#define GZ_CHUNKCNT 20 /* Number of chunks (16bit) */
#define GZ_RNDDATA 22 /* Random access data (16bit) */
#define DICT_UNKNOWN 0
#define DICT_TEXT 1
#define DICT_GZIP 2
#define DICT_DZIP 3
int dictData::read_header(const std::string &fname, int computeCRC)
{
FILE *str;
int id1, id2, si1, si2;
char buffer[BUFFERSIZE];
int extraLength, subLength;
int i;
char *pt;
int c;
struct stat sb;
unsigned long crc = crc32( 0L, Z_NULL, 0 );
int count;
unsigned long offset;
if (!(str = fopen(fname.c_str(), "rb"))) {
//err_fatal_errno( __FUNCTION__,
// "Cannot open data file \"%s\" for read\n", filename );
}
this->headerLength = GZ_XLEN - 1;
this->type = DICT_UNKNOWN;
id1 = getc( str );
id2 = getc( str );
if (id1 != GZ_MAGIC1 || id2 != GZ_MAGIC2) {
this->type = DICT_TEXT;
fstat( fileno( str ), &sb );
this->compressedLength = this->length = sb.st_size;
this->origFilename = fname;
this->mtime = sb.st_mtime;
if (computeCRC) {
rewind( str );
while (!feof( str )) {
if ((count = fread( buffer, 1, BUFFERSIZE, str ))) {
crc = crc32(crc, (Bytef *)buffer, count);
}
}
}
this->crc = crc;
fclose( str );
return 0;
}
this->type = DICT_GZIP;
this->method = getc( str );
this->flags = getc( str );
this->mtime = getc( str ) << 0;
this->mtime |= getc( str ) << 8;
this->mtime |= getc( str ) << 16;
this->mtime |= getc( str ) << 24;
this->extraFlags = getc( str );
this->os = getc( str );
if (this->flags & GZ_FEXTRA) {
extraLength = getc( str ) << 0;
extraLength |= getc( str ) << 8;
this->headerLength += extraLength + 2;
si1 = getc( str );
si2 = getc( str );
if (si1 == GZ_RND_S1 || si2 == GZ_RND_S2) {
subLength = getc( str ) << 0;
subLength |= getc( str ) << 8;
this->version = getc( str ) << 0;
this->version |= getc( str ) << 8;
if (this->version != 1) {
//err_internal( __FUNCTION__,
// "dzip header version %d not supported\n",
// this->version );
}
this->chunkLength = getc( str ) << 0;
this->chunkLength |= getc( str ) << 8;
this->chunkCount = getc( str ) << 0;
this->chunkCount |= getc( str ) << 8;
if (this->chunkCount <= 0) {
fclose( str );
return 5;
}
this->chunks = (int *)malloc(sizeof( this->chunks[0] )
* this->chunkCount );
for (i = 0; i < this->chunkCount; i++) {
this->chunks[i] = getc( str ) << 0;
this->chunks[i] |= getc( str ) << 8;
}
this->type = DICT_DZIP;
} else {
fseek( str, this->headerLength, SEEK_SET );
}
}
if (this->flags & GZ_FNAME) { /* FIXME! Add checking against header len */
pt = buffer;
while ((c = getc( str )) && c != EOF)
*pt++ = c;
*pt = '\0';
this->origFilename = buffer;
this->headerLength += this->origFilename.length() + 1;
} else {
this->origFilename = "";
}
if (this->flags & GZ_COMMENT) { /* FIXME! Add checking for header len */
pt = buffer;
while ((c = getc( str )) && c != EOF)
*pt++ = c;
*pt = '\0';
comment = buffer;
headerLength += comment.length()+1;
} else {
comment = "";
}
if (this->flags & GZ_FHCRC) {
getc( str );
getc( str );
this->headerLength += 2;
}
if (ftell( str ) != this->headerLength + 1) {
//err_internal( __FUNCTION__,
// "File position (%lu) != header length + 1 (%d)\n",
// ftell( str ), this->headerLength + 1 );
}
fseek( str, -8, SEEK_END );
this->crc = getc( str ) << 0;
this->crc |= getc( str ) << 8;
this->crc |= getc( str ) << 16;
this->crc |= getc( str ) << 24;
this->length = getc( str ) << 0;
this->length |= getc( str ) << 8;
this->length |= getc( str ) << 16;
this->length |= getc( str ) << 24;
this->compressedLength = ftell( str );
/* Compute offsets */
this->offsets = (unsigned long *)malloc( sizeof( this->offsets[0] )
* this->chunkCount );
for (offset = this->headerLength + 1, i = 0;
i < this->chunkCount;
i++) {
this->offsets[i] = offset;
offset += this->chunks[i];
}
fclose( str );
return 0;
}
bool dictData::open(const std::string& fname, int computeCRC)
{
struct stat sb;
int j;
int fd;
this->initialized = 0;
if (stat(fname.c_str(), &sb) || !S_ISREG(sb.st_mode)) {
//err_warning( __FUNCTION__,
// "%s is not a regular file -- ignoring\n", fname );
return false;
}
if (read_header(fname, computeCRC)) {
//err_fatal( __FUNCTION__,
// "\"%s\" not in text or dzip format\n", fname );
return false;
}
if ((fd = ::open(fname.c_str(), O_RDONLY )) < 0) {
//err_fatal_errno( __FUNCTION__,
// "Cannot open data file \"%s\"\n", fname );
return false;
}
if (fstat(fd, &sb)) {
//err_fatal_errno( __FUNCTION__,
// "Cannot stat data file \"%s\"\n", fname );
return false;
}
this->size = sb.st_size;
::close(fd);
if (!mapfile.open(fname.c_str(), size))
return false;
this->start=mapfile.begin();
this->end = this->start + this->size;
for (j = 0; j < DICT_CACHE_SIZE; j++) {
cache[j].chunk = -1;
cache[j].stamp = -1;
cache[j].inBuffer = NULL;
cache[j].count = 0;
}
return true;
}
void dictData::close()
{
int i;
if (this->chunks)
free(this->chunks);
if (this->offsets)
free(this->offsets);
if (this->initialized) {
if (inflateEnd( &this->zStream )) {
//err_internal( __FUNCTION__,
// "Cannot shut down inflation engine: %s\n",
// this->zStream.msg );
}
}
for (i = 0; i < DICT_CACHE_SIZE; ++i){
if (this -> cache [i].inBuffer)
free (this -> cache [i].inBuffer);
}
}
void dictData::read(char *buffer, unsigned long start, unsigned long size)
{
char *pt;
unsigned long end;
int count;
char *inBuffer;
char outBuffer[OUT_BUFFER_SIZE];
int firstChunk, lastChunk;
int firstOffset, lastOffset;
int i, j;
int found, target, lastStamp;
static int stamp = 0;
end = start + size;
//buffer = malloc( size + 1 );
//PRINTF(DBG_UNZIP,
// ("dict_data_read( %p, %lu, %lu )\n",
//h, start, size ));
switch (this->type) {
case DICT_GZIP:
//err_fatal( __FUNCTION__,
// "Cannot seek on pure gzip format files.\n"
// "Use plain text (for performance)"
// " or dzip format (for space savings).\n" );
break;
case DICT_TEXT:
memcpy( buffer, this->start + start, size );
//buffer[size] = '\0';
break;
case DICT_DZIP:
if (!this->initialized) {
++this->initialized;
this->zStream.zalloc = NULL;
this->zStream.zfree = NULL;
this->zStream.opaque = NULL;
this->zStream.next_in = 0;
this->zStream.avail_in = 0;
this->zStream.next_out = NULL;
this->zStream.avail_out = 0;
if (inflateInit2( &this->zStream, -15 ) != Z_OK) {
//err_internal( __FUNCTION__,
// "Cannot initialize inflation engine: %s\n",
//this->zStream.msg );
}
}
firstChunk = start / this->chunkLength;
firstOffset = start - firstChunk * this->chunkLength;
lastChunk = end / this->chunkLength;
lastOffset = end - lastChunk * this->chunkLength;
//PRINTF(DBG_UNZIP,
// (" start = %lu, end = %lu\n"
//"firstChunk = %d, firstOffset = %d,"
//" lastChunk = %d, lastOffset = %d\n",
//start, end, firstChunk, firstOffset, lastChunk, lastOffset ));
for (pt = buffer, i = firstChunk; i <= lastChunk; i++) {
/* Access cache */
found = 0;
target = 0;
lastStamp = INT_MAX;
for (j = 0; j < DICT_CACHE_SIZE; j++) {
#if USE_CACHE
if (this->cache[j].chunk == i) {
found = 1;
target = j;
break;
}
#endif
if (this->cache[j].stamp < lastStamp) {
lastStamp = this->cache[j].stamp;
target = j;
}
}
this->cache[target].stamp = ++stamp;
if (found) {
count = this->cache[target].count;
inBuffer = this->cache[target].inBuffer;
} else {
this->cache[target].chunk = i;
if (!this->cache[target].inBuffer)
this->cache[target].inBuffer = (char *)malloc( IN_BUFFER_SIZE );
inBuffer = this->cache[target].inBuffer;
if (this->chunks[i] >= OUT_BUFFER_SIZE ) {
//err_internal( __FUNCTION__,
// "this->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n",
// i, this->chunks[i], OUT_BUFFER_SIZE );
}
memcpy( outBuffer, this->start + this->offsets[i], this->chunks[i] );
this->zStream.next_in = (Bytef *)outBuffer;
this->zStream.avail_in = this->chunks[i];
this->zStream.next_out = (Bytef *)inBuffer;
this->zStream.avail_out = IN_BUFFER_SIZE;
if (inflate( &this->zStream, Z_PARTIAL_FLUSH ) != Z_OK) {
//err_fatal( __FUNCTION__, "inflate: %s\n", this->zStream.msg );
}
if (this->zStream.avail_in) {
//err_internal( __FUNCTION__,
// "inflate did not flush (%d pending, %d avail)\n",
// this->zStream.avail_in, this->zStream.avail_out );
}
count = IN_BUFFER_SIZE - this->zStream.avail_out;
this->cache[target].count = count;
}
if (i == firstChunk) {
if (i == lastChunk) {
memcpy( pt, inBuffer + firstOffset, lastOffset-firstOffset);
pt += lastOffset - firstOffset;
} else {
if (count != this->chunkLength ) {
//err_internal( __FUNCTION__,
// "Length = %d instead of %d\n",
//count, this->chunkLength );
}
memcpy( pt, inBuffer + firstOffset,
this->chunkLength - firstOffset );
pt += this->chunkLength - firstOffset;
}
} else if (i == lastChunk) {
memcpy( pt, inBuffer, lastOffset );
pt += lastOffset;
} else {
assert( count == this->chunkLength );
memcpy( pt, inBuffer, this->chunkLength );
pt += this->chunkLength;
}
}
//*pt = '\0';
break;
case DICT_UNKNOWN:
//err_fatal( __FUNCTION__, "Cannot read unknown file type\n" );
break;
}
}

57
src/lib/dictziplib.hpp Normal file
View File

@@ -0,0 +1,57 @@
#ifndef __DICT_ZIP_LIB_H__
#define __DICT_ZIP_LIB_H__
#include <ctime>
#include <string>
#include <zlib.h>
#include "mapfile.hpp"
#define DICT_CACHE_SIZE 5
struct dictCache {
int chunk;
char *inBuffer;
int stamp;
int count;
};
struct dictData {
dictData() {}
bool open(const std::string& filename, int computeCRC);
void close();
void read(char *buffer, unsigned long start, unsigned long size);
~dictData() { close(); }
private:
const char *start; /* start of mmap'd area */
const char *end; /* end of mmap'd area */
unsigned long size; /* size of mmap */
int type;
z_stream zStream;
int initialized;
int headerLength;
int method;
int flags;
time_t mtime;
int extraFlags;
int os;
int version;
int chunkLength;
int chunkCount;
int *chunks;
unsigned long *offsets; /* Sum-scan of chunks. */
std::string origFilename;
std::string comment;
unsigned long crc;
unsigned long length;
unsigned long compressedLength;
dictCache cache[DICT_CACHE_SIZE];
MapFile mapfile;
int read_header(const std::string &filename, int computeCRC);
};
#endif//!__DICT_ZIP_LIB_H__

203
src/lib/distance.cpp Normal file
View File

@@ -0,0 +1,203 @@
/*
writer : Opera Wang
E-Mail : wangvisual AT sohu DOT com
License: GPL
*/
/* filename: distance.cc */
/*
http://www.merriampark.com/ld.htm
What is Levenshtein Distance?
Levenshtein distance (LD) is a measure of the similarity between two strings,
which we will refer to as the source string (s) and the target string (t).
The distance is the number of deletions, insertions, or substitutions required
to transform s into t. For example,
* If s is "test" and t is "test", then LD(s,t) = 0, because no transformations are needed.
The strings are already identical.
* If s is "test" and t is "tent", then LD(s,t) = 1, because one substitution
(change "s" to "n") is sufficient to transform s into t.
The greater the Levenshtein distance, the more different the strings are.
Levenshtein distance is named after the Russian scientist Vladimir Levenshtein,
who devised the algorithm in 1965. If you can't spell or pronounce Levenshtein,
the metric is also sometimes called edit distance.
The Levenshtein distance algorithm has been used in:
* Spell checking
* Speech recognition
* DNA analysis
* Plagiarism detection
*/
#include <stdlib.h>
#include <string.h>
//#include <stdio.h>
#include "distance.h"
#define OPTIMIZE_ED
/*
Cover transposition, in addition to deletion,
insertion and substitution. This step is taken from:
Berghel, Hal ; Roach, David : "An Extension of Ukkonen's
Enhanced Dynamic Programming ASM Algorithm"
(http://www.acm.org/~hlb/publications/asm/asm.html)
*/
#define COVER_TRANSPOSITION
/****************************************/
/*Implementation of Levenshtein distance*/
/****************************************/
EditDistance::EditDistance()
{
currentelements = 2500; // It's enough for most conditions :-)
d = (int*)malloc(sizeof(int)*currentelements);
}
EditDistance::~EditDistance()
{
// printf("size:%d\n",currentelements);
if (d) free(d);
}
#ifdef OPTIMIZE_ED
int EditDistance::CalEditDistance(const gunichar *s,const gunichar *t,const int limit)
/*Compute levenshtein distance between s and t, this is using QUICK algorithm*/
{
int n=0,m=0,iLenDif,k,i,j,cost;
// Remove leftmost matching portion of strings
while ( *s && (*s==*t) )
{
s++;
t++;
}
while (s[n])
{
n++;
}
while (t[m])
{
m++;
}
// Remove rightmost matching portion of strings by decrement n and m.
while ( n && m && (*(s+n-1)==*(t+m-1)) )
{
n--;m--;
}
if ( m==0 || n==0 || d==(int*)0 )
return (m+n);
if ( m < n )
{
const gunichar * temp = s;
int itemp = n;
s = t;
t = temp;
n = m;
m = itemp;
}
iLenDif = m - n;
if ( iLenDif >= limit )
return iLenDif;
// step 1
n++;m++;
// d=(int*)malloc(sizeof(int)*m*n);
if ( m*n > currentelements )
{
currentelements = m*n*2; // double the request
d = (int*)realloc(d,sizeof(int)*currentelements);
if ( (int*)0 == d )
return (m+n);
}
// step 2, init matrix
for (k=0;k<n;k++)
d[k] = k;
for (k=1;k<m;k++)
d[k*n] = k;
// step 3
for (i=1;i<n;i++)
{
// first calculate column, d(i,j)
for ( j=1;j<iLenDif+i;j++ )
{
cost = s[i-1]==t[j-1]?0:1;
d[j*n+i] = minimum(d[(j-1)*n+i]+1,d[j*n+i-1]+1,d[(j-1)*n+i-1]+cost);
#ifdef COVER_TRANSPOSITION
if ( i>=2 && j>=2 && (d[j*n+i]-d[(j-2)*n+i-2]==2)
&& (s[i-2]==t[j-1]) && (s[i-1]==t[j-2]) )
d[j*n+i]--;
#endif
}
// second calculate row, d(k,j)
// now j==iLenDif+i;
for ( k=1;k<=i;k++ )
{
cost = s[k-1]==t[j-1]?0:1;
d[j*n+k] = minimum(d[(j-1)*n+k]+1,d[j*n+k-1]+1,d[(j-1)*n+k-1]+cost);
#ifdef COVER_TRANSPOSITION
if ( k>=2 && j>=2 && (d[j*n+k]-d[(j-2)*n+k-2]==2)
&& (s[k-2]==t[j-1]) && (s[k-1]==t[j-2]) )
d[j*n+k]--;
#endif
}
// test if d(i,j) limit gets equal or exceed
if ( d[j*n+i] >= limit )
{
return d[j*n+i];
}
}
// d(n-1,m-1)
return d[n*m-1];
}
#else
int EditDistance::CalEditDistance(const char *s,const char *t,const int limit)
{
//Step 1
int k,i,j,n,m,cost;
n=strlen(s);
m=strlen(t);
if( n!=0 && m!=0 && d!=(int*)0 )
{
m++;n++;
if ( m*n > currentelements )
{
currentelements = m*n*2;
d = (int*)realloc(d,sizeof(int)*currentelements);
if ( (int*)0 == d )
return (m+n);
}
//Step 2
for(k=0;k<n;k++)
d[k]=k;
for(k=0;k<m;k++)
d[k*n]=k;
//Step 3 and 4
for(i=1;i<n;i++)
for(j=1;j<m;j++)
{
//Step 5
if(s[i-1]==t[j-1])
cost=0;
else
cost=1;
//Step 6
d[j*n+i]=minimum(d[(j-1)*n+i]+1,d[j*n+i-1]+1,d[(j-1)*n+i-1]+cost);
#ifdef COVER_TRANSPOSITION
if ( i>=2 && j>=2 && (d[j*n+i]-d[(j-2)*n+i-2]==2)
&& (s[i-2]==t[j-1]) && (s[i-1]==t[j-2]) )
d[j*n+i]--;
#endif
}
return d[n*m-1];
}
else
return (n+m);
}
#endif

26
src/lib/distance.h Normal file
View File

@@ -0,0 +1,26 @@
#ifndef DISTANCE_H
#define DISTANCE_H
#include <glib.h>
class EditDistance {
private:
int *d;
int currentelements;
/*Gets the minimum of three values */
inline int minimum( const int a, const int b, const int c )
{
int min = a;
if ( b < min )
min = b;
if ( c < min )
min = c;
return min;
};
public:
EditDistance( );
~EditDistance( );
int CalEditDistance( const gunichar *s, const gunichar *t, const int limit );
};
#endif

53
src/lib/file.hpp Normal file
View File

@@ -0,0 +1,53 @@
#ifndef _FILE_HPP_
#define _FILE_HPP_
#include <algorithm>
#include <glib.h>
#include <list>
#include <string>
typedef std::list<std::string> List;
template<typename Function>
void __for_each_file(const std::string& dirname, const std::string& suff,
const List& order_list, const List& disable_list,
Function f)
{
GDir *dir = g_dir_open(dirname.c_str(), 0, NULL);
if (dir) {
const gchar *filename;
while ((filename = g_dir_read_name(dir))!=NULL) {
std::string fullfilename(dirname+G_DIR_SEPARATOR_S+filename);
if (g_file_test(fullfilename.c_str(), G_FILE_TEST_IS_DIR))
__for_each_file(fullfilename, suff, order_list, disable_list, f);
else if (g_str_has_suffix(filename, suff.c_str()) &&
std::find(order_list.begin(), order_list.end(),
fullfilename)==order_list.end()) {
bool disable=std::find(disable_list.begin(),
disable_list.end(),
fullfilename)!=disable_list.end();
f(fullfilename, disable);
}
}
g_dir_close(dir);
}
}
template<typename Function>
void for_each_file(const List& dirs_list, const std::string& suff,
const List& order_list, const List& disable_list,
Function f)
{
List::const_iterator it;
for (it=order_list.begin(); it!=order_list.end(); ++it) {
bool disable=std::find(disable_list.begin(), disable_list.end(),
*it)!=disable_list.end();
f(*it, disable);
}
for (it=dirs_list.begin(); it!=dirs_list.end(); ++it)
__for_each_file(*it, suff, order_list, disable_list, f);
}
#endif//!_FILE_HPP_

1693
src/lib/lib.cpp Normal file

File diff suppressed because it is too large Load Diff

158
src/lib/lib.h Normal file
View File

@@ -0,0 +1,158 @@
#ifndef __SD_LIB_H__
#define __SD_LIB_H__
#include <cstdio>
#include <list>
#include <memory>
#include <string>
#include <vector>
#include "dictziplib.hpp"
const int MAX_MATCH_ITEM_PER_LIB=100;
const int MAX_FUZZY_DISTANCE= 3; // at most MAX_FUZZY_DISTANCE-1 differences allowed when find similar words
struct cacheItem {
guint32 offset;
gchar *data;
//write code here to make it inline
cacheItem() {data= NULL;}
~cacheItem() {g_free(data);}
};
const int WORDDATA_CACHE_NUM = 10;
const int INVALID_INDEX=-100;
class DictBase {
public:
DictBase();
~DictBase();
gchar * GetWordData(guint32 idxitem_offset, guint32 idxitem_size);
bool containSearchData();
bool SearchData(std::vector<std::string> &SearchWords, guint32 idxitem_offset, guint32 idxitem_size, gchar *origin_data);
protected:
std::string sametypesequence;
FILE *dictfile;
std::auto_ptr<dictData> dictdzfile;
private:
cacheItem cache[WORDDATA_CACHE_NUM];
gint cache_cur;
};
//this structure contain all information about dictionary
struct DictInfo {
std::string ifo_file_name;
guint32 wordcount;
std::string bookname;
std::string author;
std::string email;
std::string website;
std::string date;
std::string description;
guint32 index_file_size;
std::string sametypesequence;
bool load_from_ifo_file(const std::string& ifofilename, bool istreedict);
};
class index_file {
public:
guint32 wordentry_offset;
guint32 wordentry_size;
virtual ~index_file() {}
virtual bool load(const std::string& url, gulong wc, gulong fsize) = 0;
virtual const gchar *get_key(glong idx) = 0;
virtual void get_data(glong idx) = 0;
virtual const gchar *get_key_and_data(glong idx) = 0;
virtual bool lookup(const char *str, glong &idx) = 0;
};
class Dict : public DictBase {
private:
std::string ifo_file_name;
gulong wordcount;
std::string bookname;
std::auto_ptr<index_file> idx_file;
bool load_ifofile(const std::string& ifofilename, gulong &idxfilesize);
public:
Dict() {}
bool load(const std::string& ifofilename);
gulong narticles() { return wordcount; }
const std::string& dict_name() { return bookname; }
const std::string& ifofilename() { return ifo_file_name; }
const gchar *get_key(glong index) { return idx_file->get_key(index); }
gchar *get_data(glong index)
{
idx_file->get_data(index);
return DictBase::GetWordData(idx_file->wordentry_offset, idx_file->wordentry_size);
}
void get_key_and_data(glong index, const gchar **key, guint32 *offset, guint32 *size)
{
*key = idx_file->get_key_and_data(index);
*offset = idx_file->wordentry_offset;
*size = idx_file->wordentry_size;
}
bool Lookup(const char *str, glong &idx) { return idx_file->lookup(str, idx); }
bool LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen);
};
typedef std::list<std::string> strlist_t;
class Libs {
public:
typedef void (*progress_func_t)(void);
Libs(progress_func_t f=NULL);
~Libs();
void load_dict(const std::string& url);
void load(const strlist_t& dicts_dirs,
const strlist_t& order_list,
const strlist_t& disable_list);
void reload(const strlist_t& dicts_dirs,
const strlist_t& order_list,
const strlist_t& disable_list);
glong narticles(int idict) { return oLib[idict]->narticles(); }
const std::string& dict_name(int idict) { return oLib[idict]->dict_name(); }
gint ndicts() { return oLib.size(); }
const gchar * poGetWord(glong iIndex,int iLib) {
return oLib[iLib]->get_key(iIndex);
}
gchar * poGetWordData(glong iIndex,int iLib) {
if (iIndex==INVALID_INDEX)
return NULL;
return oLib[iLib]->get_data(iIndex);
}
const gchar *poGetCurrentWord(glong *iCurrent);
const gchar *poGetNextWord(const gchar *word, glong *iCurrent);
const gchar *poGetPreWord(glong *iCurrent);
bool LookupWord(const gchar* sWord, glong& iWordIndex, int iLib) {
return oLib[iLib]->Lookup(sWord, iWordIndex);
}
bool LookupSimilarWord(const gchar* sWord, glong & iWordIndex, int iLib);
bool SimpleLookupWord(const gchar* sWord, glong & iWordIndex, int iLib);
bool LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_size);
gint LookupWithRule(const gchar *sWord, gchar *reslist[]);
bool LookupData(const gchar *sWord, std::vector<gchar *> *reslist);
private:
std::vector<Dict *> oLib; // word Libs.
int iMaxFuzzyDistance;
progress_func_t progress_func;
};
typedef enum {
qtSIMPLE, qtREGEXP, qtFUZZY, qtDATA
} query_t;
extern query_t analyze_query(const char *s, std::string& res);
#endif//!__SD_LIB_H__

94
src/lib/mapfile.hpp Normal file
View File

@@ -0,0 +1,94 @@
#ifndef _MAPFILE_HPP_
#define _MAPFILE_HPP_
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#ifdef HAVE_MMAP
# include <sys/types.h>
# include <fcntl.h>
# include <sys/mman.h>
#endif
#ifdef _WIN32
# include <windows.h>
#endif
#include <glib.h>
class MapFile {
public:
MapFile(void) :
data(NULL),
#ifdef HAVE_MMAP
mmap_fd(-1)
#elif defined(_WIN32)
hFile(0),
hFileMap(0)
#endif
{
}
~MapFile();
bool open(const char *file_name, unsigned long file_size);
inline gchar *begin(void) { return data; }
private:
char *data;
unsigned long size;
#ifdef HAVE_MMAP
int mmap_fd;
#elif defined(_WIN32)
HANDLE hFile;
HANDLE hFileMap;
#endif
};
inline bool MapFile::open(const char *file_name, unsigned long file_size)
{
size=file_size;
#ifdef HAVE_MMAP
if ((mmap_fd = ::open(file_name, O_RDONLY)) < 0) {
//g_print("Open file %s failed!\n",fullfilename);
return false;
}
data = (gchar *)mmap( NULL, file_size, PROT_READ, MAP_SHARED, mmap_fd, 0);
if ((void *)data == (void *)(-1)) {
//g_print("mmap file %s failed!\n",idxfilename);
data=NULL;
return false;
}
#elif defined( _WIN32)
hFile = CreateFile(file_name, GENERIC_READ, 0, NULL, OPEN_ALWAYS,
FILE_ATTRIBUTE_NORMAL, 0);
hFileMap = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0,
file_size, NULL);
data = (gchar *)MapViewOfFile(hFileMap, FILE_MAP_READ, 0, 0, file_size);
#else
gsize read_len;
if (!g_file_get_contents(file_name, &data, &read_len, NULL))
return false;
if (read_len!=file_size)
return false;
#endif
return true;
}
inline MapFile::~MapFile()
{
if (!data)
return;
#ifdef HAVE_MMAP
munmap(data, size);
close(mmap_fd);
#else
# ifdef _WIN32
UnmapViewOfFile(data);
CloseHandle(hFileMap);
CloseHandle(hFile);
# else
g_free(data);
# endif
#endif
}
#endif//!_MAPFILE_HPP_