From dcd966ba50fa18853c5ae06125a5b08b0ee6b10d Mon Sep 17 00:00:00 2001 From: "Suren A. Chilingaryan" Date: Wed, 3 Aug 2005 01:48:35 +0000 Subject: Language Fixes and Improvements - rccmutex - Language autodetection fixes and improvements - Language translation fixes and improvements - The current state is near to be usable --- ToDo | 15 +++-- configure.in | 41 ++++++++++++ external/rcclibtranslate.c | 84 +++++++++++++++++++++-- src/Makefile.am | 5 +- src/rccexternal.c | 9 ++- src/rccexternal.h | 2 - src/rccmutex.c | 73 ++++++++++++++++++++ src/rccmutex.h | 27 ++++++++ src/rccstring.c | 1 + src/rcctranslate.c | 133 ++++++++++++++++++++++++++++++------- src/rcctranslate.h | 8 ++- src/recode.c | 162 ++++++++++++++++++++++++++++++++------------- 12 files changed, 473 insertions(+), 87 deletions(-) create mode 100644 src/rccmutex.c create mode 100644 src/rccmutex.h diff --git a/ToDo b/ToDo index fdb843f..026888d 100644 --- a/ToDo +++ b/ToDo @@ -1,4 +1,13 @@ 0.3.x: + - Buffer managment: + + SetBufferSize ( 0 - autogrow ) + - Language autodetection and translation improvements + + Look on ofline translation libraries and other possibilities to improove + translation and language detection. + + Implement ispell support + + Configurable timeouts + +1.x: - Common encodings: + Provide way to add to all languages several default Unicode encodings (UTF8, UTF16, UTF16BE) + Special type of classes to select only from Unicode encodings (or even just specified subset of encodings) @@ -6,10 +15,8 @@ * rccToEncoding(current_language, *new_language, buf, size)? * rccFromEncoding(current_language, utf8_language, buf, size)? * Code some options in charset name. (SpecialEncodingPrefix_Encoding_EncodingOptions) - - Buffer managment: - + SetBufferSize ( 0 - autogrow ) - - Look on ofline translation libraries and other possibilities to improove - translation and language detection. + - Recoding options: + + Skip Translation on request: - Multibyte(not-UTF8) support for FS classes diff --git a/configure.in b/configure.in index 16051b5..013e9ae 100644 --- a/configure.in +++ b/configure.in @@ -201,6 +201,45 @@ AM_PATH_ASPELL([ HAVE_ASPELL=no ]) + +PTHREAD_LIBS=error +AC_EGREP_CPP(yes,[ +#if (defined(__FreeBSD_cc_version) && __FreeBSD_cc_version <= 500001) || defined(__OpenBSD__) + yes +#endif +], + PTHREAD_CFLAGS="-D_THREAD_SAFE" + PTHREAD_LIBS="-pthread" +) +if test "x$PTHREAD_LIBS" = "xerror"; then + AC_CHECK_LIB(pthread, pthread_mutex_lock, PTHREAD_LIBS="-lpthread") +fi +if test "x$PTHREAD_LIBS" = xerror; then + AC_CHECK_LIB(pthreads, pthread_mutex_lock, PTHREAD_LIBS="-lpthreads") +fi +if test "x$PTHREAD_LIBS" = xerror; then + AC_CHECK_LIB(c_r, pthread_mutex_lock, PTHREAD_LIBS="-lc_r") +fi +if test "x$PTHREAD_LIBS" = xerror; then + AC_CHECK_FUNC(pthread_mutex_lock, PTHREAD_LIBS="") +fi +if test "x$PTHREAD_LIBS" = xerror; then + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" +else + AC_CHECK_HEADER(pthread.h, [ + AC_DEFINE(HAVE_PTHREAD,1,[Defines if pthread is available]) + HAVE_PTHREAD=yes + ],[ + HAVE_PTHREAD=no + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" + ]) +fi +AC_SUBST(PTHREAD_CFLAGS) +AC_SUBST(PTHREAD_LIBS) + + dnl Checks for typedefs, structures, and compiler characteristics. AC_C_CONST @@ -211,6 +250,8 @@ AC_OUTPUT(src/Makefile engines/Makefile external/Makefile ui/Makefile examples/M echo "" echo "Configuration:" +echo " POSIX Threading Support: $HAVE_PTHREAD" +echo "" echo " Dynamic Engine Loading Support: $HAVE_DLOPEN" echo " Enca Charset Detection Support: $HAVE_ENCA" echo " LibRCD Charset Detection Support: $HAVE_RCD" diff --git a/external/rcclibtranslate.c b/external/rcclibtranslate.c index 56ce8a2..46fcd6e 100644 --- a/external/rcclibtranslate.c +++ b/external/rcclibtranslate.c @@ -45,11 +45,34 @@ static char *rccCreateKey(const char *from, const char *to, const char *data, si return res; } +static char *rccTranslateFixEOL(char *result, const char *text) { + size_t i,j; + char *res; + + if (!result) return result; + if (strstr(text, "\r\n")) return result; + + res = (char*)malloc((strlen(result)+1)*sizeof(char)); + if (!res) { + free(result); + return NULL; + } + + for (i=0, j=0;result[i];i++) { + if ((result[i]=='\r')&&(result[i+1]=='\n')) i++; + else res[j++] = result[i]; + } + res[j] = 0; + free(result); + return res; +} + static void *rccLibPostponed(void *info) { char *result; char *data; char from[3]; char to[3]; + size_t datalen; from[2] = 0; to[2] = 0; @@ -60,13 +83,21 @@ static void *rccLibPostponed(void *info) { if (data) { g_mutex_unlock(mutex); + datalen = strlen(data); + memcpy(from, data, 2); memcpy(to, data + 2, 2); - result = translate_session_translate_text(session, data + 4, from, to, NULL, NULL, NULL); - if (result) { - rccDb4SetKey(db4ctx, data, strlen(data), result); - free(result); + result = rccDb4GetKey(db4ctx, data, datalen); + if (result) free(result); + else { + result = translate_session_translate_text(session, data + 4, from, to, NULL, NULL, NULL); + + if (result) { + result = rccTranslateFixEOL(result, data+4); + rccDb4SetKey(db4ctx, data, datalen, result); + free(result); + } } free(data); @@ -164,6 +195,26 @@ void rccLibTranslateFree() { } +static void rccLibTranslateQueue(const char *from, const char *to, const char *text) { +#ifdef HAVE_LIBTRANSLATE + char *key = NULL; + size_t keysize; + + if ((!session)||(!from)||(!to)||(!text)) return; + if ((strlen(from)!=2)||(strlen(to)!=2)) return; + + if (db4ctx) { + key = rccCreateKey(from,to,text,&keysize); + if (key) { + g_mutex_lock(mutex); + g_queue_push_tail(queue, key); + g_mutex_unlock(mutex); + g_cond_signal(cond); + } + } +#endif /* HAVE_LIBTRANSLATE */ +} + static char *rccLibTranslateDo(const char *from, const char *to, const char *text, unsigned long timeout) { #ifdef HAVE_LIBTRANSLATE char *result; @@ -188,6 +239,8 @@ static char *rccLibTranslateDo(const char *from, const char *to, const char *tex # else result = translate_session_translate_text(session, text, from, to, NULL, NULL, NULL); # endif /* HAVE_LIBTRANSLATE_TIMED_TRANSLATE */ + + result = rccTranslateFixEOL(result, text); if ((db4ctx)&&(key)) { if (result) { @@ -242,6 +295,7 @@ void *rccLibTranslate(void *info) { res = read(s, buffer + readed, size - readed); if (res<=0) connected = 0; } + if (!connected) goto clear; prefix.cmd.cmd = 0; prefix.cmd.size = 0; @@ -264,14 +318,30 @@ respond: } else connected = 0; if (prefix.cmd.size) free(translated); +clear: + free(buffer); + } else connected = 0; + break; + case RCC_EXTERNAL_COMMAND_TRANSLATE_QUEUE: + size = 1 + prefix.cmd.size + sizeof(rcc_external_command_s) - sizeof(rcc_translate_prefix_s); + buffer = (char*)malloc(size); + if (buffer) { + for (readed = 0; (readed < size)&&(connected); readed += res) { + res = read(s, buffer + readed, size - readed); + if (res<=0) connected = 0; + } + if ((connected)&&(!prefix.from[2])&&(!prefix.to[2])&&(!buffer[readed-1])) { + rccLibTranslateQueue(prefix.from, prefix.to, buffer); + } free(buffer); } else connected = 0; break; default: - buffer = (char*)malloc(prefix.cmd.size); + size = 1 + prefix.cmd.size + sizeof(rcc_external_command_s) - sizeof(rcc_translate_prefix_s); + buffer = (char*)malloc(size); if (buffer) { - for (readed = 0; (readed < prefix.cmd.size)&&(connected); readed += res) { - res = read(s, buffer + readed, prefix.cmd.size - readed); + for (readed = 0; (readed < size)&&(connected); readed += res) { + res = read(s, buffer + readed, size - readed); if (res<=0) connected = 0; } free(buffer); diff --git a/src/Makefile.am b/src/Makefile.am index 4ba3c35..0a1fdc1 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -7,6 +7,7 @@ librcc_la_SOURCES = librcc.c \ curconfig.c curconfig.h \ rccconfig.c rccconfig.h \ rcclist.c rcclist.h \ + rccmutex.c rccmutex.h \ plugin.c plugin.h \ rccexternal.c rccexternal.h \ fake_enca.h fake_rcd.h \ @@ -23,7 +24,7 @@ librcc_la_SOURCES = librcc.c \ internal.h include_HEADERS = librcc.h -AM_CPPFLAGS = -I../src -DLIBRCC_DATA_DIR=\"${pkgdatadir}\" @XML_INCLUDES@ @DLOPEN_INCLUDES@ @RCD_INCLUDES@ @ENCA_INCLUDES@ @BDB_INCLUDES@ @ASPELL_CFLAGS@ -librcc_la_LIBADD = @XML_LIBS@ @DLOPEN_LIBS@ @RCD_LIBS@ @ENCA_LIBS@ @BDB_LIBS@ @ASPELL_LIBS@ +AM_CPPFLAGS = -I../src -DLIBRCC_DATA_DIR=\"${pkgdatadir}\" @XML_INCLUDES@ @DLOPEN_INCLUDES@ @RCD_INCLUDES@ @ENCA_INCLUDES@ @BDB_INCLUDES@ @ASPELL_CFLAGS@ @PTHREAD_CFLAGS@ +librcc_la_LIBADD = @XML_LIBS@ @DLOPEN_LIBS@ @RCD_LIBS@ @ENCA_LIBS@ @BDB_LIBS@ @ASPELL_LIBS@ @PTHREAD_LIBS@ librcc_la_LDFLAGS = -version-info @LIBRCC_VERSION_INFO@ diff --git a/src/rccexternal.c b/src/rccexternal.c index 4a09948..6a81c56 100644 --- a/src/rccexternal.c +++ b/src/rccexternal.c @@ -42,6 +42,7 @@ #include "internal.h" #define RCC_EXT_PROG_NAME "rccexternal" +#define RCC_EXTERNAL_TIMEOUT 250 /* 100us */ static pid_t pid = (pid_t)-1; static char *addr = NULL; @@ -88,9 +89,13 @@ void rccExternalFree() { } static int rccExternalSetDeadline(struct timeval *tv, unsigned long timeout) { +/* gettimeofday(tv, NULL); tv->tv_sec += (tv->tv_usec + timeout + RCC_EXTERNAL_TIMEOUT) / 1000000; tv->tv_usec = (tv->tv_usec + timeout + RCC_EXTERNAL_TIMEOUT) % 1000000; +*/ + tv->tv_sec = (timeout + RCC_EXTERNAL_TIMEOUT) / 1000000; + tv->tv_usec = (timeout + RCC_EXTERNAL_TIMEOUT) % 1000000; return 0; } @@ -103,7 +108,7 @@ size_t rccExternalWrite(int s, const char *buffer, ssize_t size, unsigned long t if (s == -1) return -1; - for (writed = 0; (writed < size)&&(connected); writed += connected?res:0) { + for (writed = 0; ((writed < size)&&(connected)); writed += connected?res:0) { FD_ZERO(&fdcon); FD_SET(s, &fdcon); rccExternalSetDeadline(&tv, timeout); @@ -127,7 +132,7 @@ size_t rccExternalRead(int s, char *buffer, ssize_t size, unsigned long timeout) if (s == -1) return -1; - for (readed = 0; (readed < size)&&(connected); readed += connected?res:0) { + for (readed = 0; ((readed < size)&&(connected)); readed += connected?res:0) { FD_ZERO(&fdcon); FD_SET(s, &fdcon); rccExternalSetDeadline(&tv, timeout); diff --git a/src/rccexternal.h b/src/rccexternal.h index bffd6b3..236e2df 100644 --- a/src/rccexternal.h +++ b/src/rccexternal.h @@ -1,8 +1,6 @@ #ifndef _RCC_EXTERNAL_H #define _RCC_EXTERNAL_H -#define RCC_EXTERNAL_TIMEOUT 1000000 - typedef enum rcc_external_module_t { RCC_EXTERNAL_MODULE_CONTROL = 0, RCC_EXTERNAL_MODULE_LIBRTRANSLATE, diff --git a/src/rccmutex.c b/src/rccmutex.c new file mode 100644 index 0000000..e2690fa --- /dev/null +++ b/src/rccmutex.c @@ -0,0 +1,73 @@ +#include +#include + +#include "rccmutex.h" + +#define RCC_MUTEX_SLEEP 500 + +rcc_mutex rccMutexCreate() { + rcc_mutex mutex; + + mutex = (rcc_mutex)malloc(sizeof(rcc_mutex_s)); + if (mutex) { +#ifdef HAVE_PTHREAD + pthread_mutex_init(&mutex->mutex, NULL); +#else + mutex->mutex = 0; +#endif /* HAVE_PTHREAD */ + } + return mutex; +} + +void rccMutexFree(rcc_mutex mutex) { + if (mutex) { +#ifdef HAVE_PTHREAD + pthread_mutex_destroy(&mutex->mutex); +#endif /* HAVE_PTHREAD */ + free(mutex); + } +} + +int rccMutexLock(rcc_mutex mutex) { +#ifndef HAVE_PTHREAD + struct timespec ts; +#endif /* !HAVE_PTHREAD */ + + if (!mutex) return -1; + +#ifdef HAVE_PTHREAD + return pthread_mutex_lock(&mutex->mutex); +#else + while (mutex->mutex) { + ts.tv_sec = RCC_MUTEX_SLEEP / 1000000; + ts.tv_nsec = (RCC_MUTEX_SLEEP % 1000000)*1000; + nanosleep(&ts, NULL); + } + mutex->mutex = 1; + + return 0; +#endif /* HAVE_PTHREAD */ +} + +int rccMutexTryLock(rcc_mutex mutex) { + if (!mutex) return -1; + +#ifdef HAVE_PTHREAD + return pthread_mutex_trylock(&mutex->mutex); +#else + if (mutex->mutex) return -1; + mutex->mutex = 1; + return 0; +#endif /* HAVE_PTHREAD */ +} + +void rccMutexUnLock(rcc_mutex mutex) { + if (!mutex) return; +#ifdef HAVE_PTHREAD + pthread_mutex_unlock(&mutex->mutex); +#else + mutex->mutex = 0; +#endif /* HAVE_PTHREAD */ +} + + diff --git a/src/rccmutex.h b/src/rccmutex.h new file mode 100644 index 0000000..8585621 --- /dev/null +++ b/src/rccmutex.h @@ -0,0 +1,27 @@ +#ifndef _RCC_MUTEX_H +#define _RCC_MUTEX_H + +#include "../config.h" + +#ifdef HAVE_PTHREAD +# include +#endif /* HAVE_PTHREAD */ + +struct rcc_mutex_t { +#ifdef HAVE_PTHREAD + pthread_mutex_t mutex; +#else + unsigned char mutex; +#endif /* HAVE_PTHREAD */ +}; +typedef struct rcc_mutex_t rcc_mutex_s; +typedef struct rcc_mutex_t *rcc_mutex; + +rcc_mutex rccMutexCreate(); +void rccMutexFree(rcc_mutex mutex); + +int rccMutexLock(rcc_mutex mutex); +int rccMutexTryLock(rcc_mutex mutex); +void rccMutexUnLock(rcc_mutex mutex); + +#endif /* _RCC_MUTEX_H */ diff --git a/src/rccstring.c b/src/rccstring.c index 9c4c19f..aa92407 100644 --- a/src/rccstring.c +++ b/src/rccstring.c @@ -61,6 +61,7 @@ int rccStringFixID(rcc_string string, rcc_context ctx) { int rccStringChangeID(rcc_string string, rcc_language_id language_id) { if ((!string)&&(language_id != (rcc_language_id)-1)) return -1; +// printf("ChangingID %lu: %s\n", language_id, string); ((rcc_string_header*)string)->language_id = language_id; return 0; } diff --git a/src/rcctranslate.c b/src/rcctranslate.c index d7bb4e4..9dcf411 100644 --- a/src/rcctranslate.c +++ b/src/rcctranslate.c @@ -3,10 +3,12 @@ #include #include "internal.h" +#include "rccconfig.h" #include "rccexternal.h" +#include "rccmutex.h" #include "rcctranslate.h" - +#define RCC_TRANSLATE_DEFAULT_TIMEOUT 1000000 /* 1s */ int rccTranslateInit() { @@ -26,18 +28,37 @@ rcc_translate rccTranslateOpen(const char *from, const char *to) { translate = (rcc_translate)malloc(sizeof(rcc_translate_s)); if (!translate) return NULL; + + translate->mutex = rccMutexCreate(); + translate->wmutex = rccMutexCreate(); + if ((!translate->mutex)||(!translate->wmutex)) { + if (translate->mutex) rccMutexFree(translate->mutex); + if (translate->wmutex) rccMutexFree(translate->wmutex); + free(translate); + return NULL; + } translate->sock = rccExternalConnect(RCC_EXTERNAL_MODULE_LIBRTRANSLATE); if (translate->sock == -1) { + rccMutexFree(translate->mutex); + rccMutexFree(translate->wmutex); free(translate); return NULL; } translate->remaining = 0; + translate->werror = 0; + translate->prefix.cmd.cmd = RCC_EXTERNAL_COMMAND_TRANSLATE; translate->prefix.cmd.size = sizeof(rcc_translate_prefix_s); memcpy(translate->prefix.from, from, 3*sizeof(char)); memcpy(translate->prefix.to, to, 3*sizeof(char)); + + translate->wprefix.cmd.cmd = RCC_EXTERNAL_COMMAND_TRANSLATE_QUEUE; + translate->wprefix.cmd.size = sizeof(rcc_translate_prefix_s); + memcpy(translate->wprefix.from, from, 3*sizeof(char)); + memcpy(translate->wprefix.to, to, 3*sizeof(char)); + rccTranslateSetTimeout(translate, RCC_TRANSLATE_DEFAULT_TIMEOUT); return translate; @@ -50,18 +71,40 @@ void rccTranslateClose(rcc_translate translate) { #ifdef HAVE_LIBTRANSLATE if (!translate) return; if (translate->sock != -1) rccExternalClose(translate->sock); + rccMutexFree(translate->mutex); + rccMutexFree(translate->wmutex); free(translate); #endif /* HAVE_LIBTRANSLATE */ } int rccTranslateSetTimeout(rcc_translate translate, unsigned long us) { -#ifdef HAVE_LIBTRANSLATE_TIMED_TRANSLATE if (!translate) return -1; translate->prefix.timeout = us; return 0; -#else - return -1; -#endif /* HAVE_LIBTRANSLATE_TIMED_TRANSLATE */ +} + +#define RCC_UNLOCK_W 1 +#define RCC_UNLOCK_R 2 +#define RCC_UNLOCK_RW 3 +#define RCC_UNLOCK_WR 3 +static char *rccTranslateReturn(rcc_translate translate, char *ret, int unlock) { + if (unlock&RCC_UNLOCK_R) rccMutexUnLock(translate->mutex); + if (unlock&RCC_UNLOCK_W) rccMutexUnLock(translate->wmutex); + return ret; +} +#define rccTranslateReturnNULL(translate, unlock) rccTranslateReturn(translate, NULL, unlock) + +static int rccTranslateQueue(rcc_translate translate, const char *buf) { + size_t len, err; + + + len = strlen(buf); + translate->wprefix.cmd.size = sizeof(rcc_translate_prefix_s) + len - sizeof(rcc_external_command_s); + + err = rccExternalWrite(translate->sock, (char*)&translate->wprefix, sizeof(rcc_translate_prefix_s) - 1, 0); + if (!err) err = rccExternalWrite(translate->sock, buf, len + 1, 0); + fsync(translate->sock); + return err?1:0; } char *rccTranslate(rcc_translate translate, const char *buf) { @@ -69,27 +112,57 @@ char *rccTranslate(rcc_translate translate, const char *buf) { rcc_external_command_s resp; size_t err, len; char *buffer; -/* size_t i; -*/ - + if ((!translate)||(!buf)) return NULL; -/* - if (!strcmp(translate->prefix.to, "en")) { - for (i=0;buf[i];i++) + if (!strcmp(translate->prefix.to, rcc_english_language_sn)) { + for (i=0;buf[i];i++) { if ((unsigned char)buf[i]>0x7F) break; + if ((buf[i]>='A')&&(buf[i]<='Z')) break; + if ((buf[i]>='a')&&(buf[i]<='z')) break; + } if (!buf[i]) return NULL; } -*/ + + rccMutexLock(translate->wmutex); + + if (rccMutexTryLock(translate->mutex)) { + if ((translate->werror)||(translate->sock == -1)) return rccTranslateReturnNULL(translate,RCC_UNLOCK_W); + + if (rccTranslateQueue(translate, buf)) translate->werror = 1; + return rccTranslateReturnNULL(translate, RCC_UNLOCK_W); + } + + if (translate->werror) { + rccExternalClose(translate->sock); + translate->sock = -1; + translate->werror = 0; + } if (translate->sock == -1) { translate->sock = rccExternalConnect(RCC_EXTERNAL_MODULE_LIBRTRANSLATE); - if (translate->sock == -1) return NULL; + if (translate->sock == -1) { + return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW); + } else { + translate->werror = 0; + translate->remaining = 0; + } } else if (translate->remaining) { if (translate->remaining == (size_t)-1) { err = rccExternalRead(translate->sock, (char*)&resp, sizeof(rcc_external_command_s), 0); - if (err) return NULL; + if (err) { + if (err == sizeof(rcc_external_command_s)) { + if (rccTranslateQueue(translate, buf)) { + rccExternalClose(translate->sock); + translate->sock = -1; + } + } else { + rccExternalClose(translate->sock); + translate->sock = -1; + } + return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW); + } translate->remaining = resp.size; } @@ -97,13 +170,18 @@ char *rccTranslate(rcc_translate translate, const char *buf) { if (!buffer) { rccExternalClose(translate->sock); translate->sock = -1; - return NULL; + return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW); } + err = rccExternalRead(translate->sock, buffer, translate->remaining, 0); free(buffer); if (err) { translate->remaining = err; - return NULL; + if (rccTranslateQueue(translate, buf)) { + rccExternalClose(translate->sock); + translate->sock = -1; + } + return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW); } translate->remaining = 0; } @@ -114,41 +192,50 @@ char *rccTranslate(rcc_translate translate, const char *buf) { if (err) { rccExternalClose(translate->sock); translate->sock = -1; - return NULL; + return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW); } err = rccExternalWrite(translate->sock, buf, len + 1, 0); if (err) { rccExternalClose(translate->sock); translate->sock = -1; - return NULL; + return rccTranslateReturnNULL(translate,RCC_UNLOCK_RW); } + rccMutexUnLock(translate->wmutex); err = rccExternalRead(translate->sock, (char*)&resp, sizeof(rcc_external_command_s), translate->prefix.timeout); if (err) { if (err == sizeof(rcc_external_command_s)) { translate->remaining = (size_t)-1; } else { + rccMutexLock(translate->wmutex); rccExternalClose(translate->sock); translate->sock = -1; + rccMutexUnLock(translate->wmutex); } - return NULL; + return rccTranslateReturnNULL(translate,RCC_UNLOCK_R); } - if ((resp.cmd!=RCC_EXTERNAL_COMMAND_TRANSLATE)||(!resp.size)) return NULL; + + if ((resp.cmd!=RCC_EXTERNAL_COMMAND_TRANSLATE)||(!resp.size)) + return rccTranslateReturnNULL(translate,RCC_UNLOCK_R); buffer = (char*)malloc(resp.size*sizeof(char)); if (!buffer) { + rccMutexLock(translate->wmutex); rccExternalClose(translate->sock); translate->sock = -1; - return NULL; + rccMutexUnLock(translate->wmutex); + + return rccTranslateReturnNULL(translate,RCC_UNLOCK_R); } + err = rccExternalRead(translate->sock, buffer, resp.size, 0); if (err) { translate->remaining = err; free(buffer); - return NULL; + return rccTranslateReturnNULL(translate,RCC_UNLOCK_R); } - return buffer; + return rccTranslateReturn(translate, buffer, RCC_UNLOCK_R); #else return NULL; #endif /* HAVE_LIBTRANSLATE */ diff --git a/src/rcctranslate.h b/src/rcctranslate.h index 961af6f..b00cdfd 100644 --- a/src/rcctranslate.h +++ b/src/rcctranslate.h @@ -1,9 +1,10 @@ #ifndef _RCC_TRANSLATE_H #define _RCC_TRANSLATE_H +#include "rccmutex.h" #include "rccexternal.h" -#define RCC_TRANSLATE_DEFAULT_TIMEOUT 5000000 /* 5s */ #define RCC_EXTERNAL_COMMAND_TRANSLATE 0x80 +#define RCC_EXTERNAL_COMMAND_TRANSLATE_QUEUE 0x81 struct rcc_translate_prefix_t { @@ -19,8 +20,13 @@ typedef struct rcc_translate_prefix_t *rcc_translate_prefix; struct rcc_translate_t { rcc_translate_prefix_s prefix; + rcc_translate_prefix_s wprefix; size_t remaining; + rcc_mutex mutex; + rcc_mutex wmutex; int sock; + + unsigned char werror; }; typedef struct rcc_translate_t rcc_translate_s; diff --git a/src/recode.c b/src/recode.c index 7e12343..d337164 100644 --- a/src/recode.c +++ b/src/recode.c @@ -15,21 +15,34 @@ #include "rccspell.h" #define isSpace(ch) ((ch<0x7F)&&((ch<'A')||(ch>'z')||((ch>'Z')&&(ch<'a')))) -#define RCC_REQUIRED_PROBABILITY 0.66 +#define RCC_PROBABILITY_STEP 0.10 +#define RCC_REQUIRED_PROBABILITY 0.33 +#define RCC_REQUIRED_LENGTH 5 +#define RCC_ACCEPTABLE_PROBABILITY 0 +#define RCC_ACCEPTABLE_LENGTH 3 rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len, rcc_string *retstring) { - rcc_speller speller; + rcc_speller speller = NULL, english_speller = NULL; unsigned long i, nlanguages; rcc_language_config config, config0 = NULL; rcc_string recoded; unsigned char *utf8; size_t j, mode; - unsigned long words, english, result; + unsigned long spres, words, english, result; + size_t longest; unsigned char english_mode, english_word = 1; + char *english_string = NULL; rcc_language_id english_lang = (rcc_language_id)-1; + size_t english_longest = 0; + unsigned char is_english_string = 1; double res, english_res = 0; rcc_option_value usedb4; - + rcc_language_id bestlang = (rcc_language_id)-1; + unsigned long bestlongest = RCC_ACCEPTABLE_LENGTH; + double bestres = RCC_ACCEPTABLE_PROBABILITY; + char *best_string = NULL; + + unsigned long accepted_nonenglish_langs = 0; usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE); @@ -50,6 +63,15 @@ rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id class_id nlanguages = ctx->n_languages; + english_lang = rccGetLanguageByName(ctx, rcc_english_language_sn); + if (english_lang != (rcc_language_id)-1) { + config = rccGetUsableConfig(ctx, english_lang); + if (config) { + english_speller = rccConfigGetSpeller(config); + if (rccSpellerGetError(english_speller)) english_speller = NULL; + } + } + for (i=0;ilanguage->sn); + + for (result=0,english=0,words=0,longest=0,mode=0,j=0;utf8[j];j++) { if (isSpace(utf8[j])) { if (mode) { - if ((!english_mode)&&(english_word)) english++; - result+=rccSpellerSized(speller, utf8 + mode - 1, j - mode + 1)?1:0; + if ((!english_mode)&&(english_word)&&(rccSpellerSized(english_speller, utf8 + mode -1, j - mode + 1))) + english++; + else { + if ((english_mode)&&(!english_word)) is_english_string = 0; + spres = rccSpellerSized(speller, utf8 + mode - 1, j - mode + 1)?1:0; + printf("%.*s %s\n",j-mode+1,utf8+mode-1, spres?"<======":""); + if ((spres)&&((j - mode + 1)>longest)) longest = j - mode + 1; + result+=spres; + } words++; mode = 0; } else continue; @@ -85,40 +116,89 @@ rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id class_id } } } + if (mode) { - result+=rccSpeller(speller, utf8 + mode - 1)?1:0; + if ((!english_mode)&&(english_word)&&(rccSpeller(english_speller, utf8 + mode -1))) + english++; + else { + if ((english_mode)&&(!english_word)) is_english_string = 0; + spres = rccSpeller(speller, utf8 + mode - 1)?1:0; + if ((spres)&&((j-mode+1)>longest)) longest = j - mode + 1; + result += spres; + } words++; } if (english_mode) { + if (english_string) free(english_string); + printf("%u %u\n", result, words); + english_res = 1.*result/words; - english_lang = (rcc_language_id)i; - } else if (words) { - res = 1.*result/words; - if (res > RCC_REQUIRED_PROBABILITY) { + english_lang = (rcc_language_id)i; + english_longest = longest; + english_string = recoded; + } else if (words>english) { + res = 1.*result/(words - english); + printf("%u %u %u\n", result, words, english); + if ((res > RCC_REQUIRED_PROBABILITY)&&(longest > RCC_REQUIRED_LENGTH)) { + if (best_string) free(best_string); + if (english_string) free(english_string); + if (retstring) *retstring = recoded; else free(recoded); return (rcc_language_id)i; - } - if (words > english) { - res = 1.*(result - english)/(words - english); - if (res > RCC_REQUIRED_PROBABILITY) { - if (retstring) *retstring = recoded; - else free(recoded); - return (rcc_language_id)i; - } - } - } - - free(recoded); + } else if ((res > bestres + RCC_PROBABILITY_STEP)|| + ((res > bestres - RCC_PROBABILITY_STEP)&&(longest > bestlongest))|| + ((res > bestres)&&(longest == bestlongest))) { + + if (best_string) free(best_string); + + bestres = res; + bestlang = (rcc_language_id)i; + bestlongest = longest; + best_string = recoded; + } else if (!accepted_nonenglish_langs) { + bestlang = (rcc_language_id)i; + best_string = recoded; + } else free(recoded); + + accepted_nonenglish_langs++; + } else free(recoded); } - if (english_res > RCC_REQUIRED_PROBABILITY) { - if (retstring) { - *retstring = rccCreateString(english_lang, buf, len); - } + if ((is_english_string)&&(english_res > RCC_REQUIRED_PROBABILITY)&&(english_longest > RCC_REQUIRED_LENGTH)) { + if (best_string) free(best_string); + if (retstring) *retstring = english_string; + else if (english_string) free(english_string); return english_lang; } + + if ((bestres > RCC_ACCEPTABLE_PROBABILITY)&&(bestlongest > RCC_ACCEPTABLE_LENGTH)) { + if (english_string) free(english_string); + if (retstring) *retstring = best_string; + else if (best_string) free(best_string); + return bestlang; + } + + if ((is_english_string)&&(english_res > RCC_ACCEPTABLE_PROBABILITY)&&(english_longest > RCC_ACCEPTABLE_LENGTH)) { + if (best_string) free(best_string); + if (retstring) *retstring = english_string; + else if (english_string) free(english_string); + return english_lang; + } + + if (best_string) { + if (english_string) free(english_string); + if (retstring) *retstring = best_string; + else if (best_string) free(best_string); + return bestlang; + } else if (best_string) free(best_string); + + if ((english_res > RCC_ACCEPTABLE_PROBABILITY)&&(english_longest > RCC_ACCEPTABLE_LENGTH)) { + if (retstring) *retstring = english_string; + else if (english_string) free(english_string); + return english_lang; + } else if (english_string) free(english_string); return (rcc_language_id)-1; } @@ -206,9 +286,12 @@ rcc_string rccSizedFrom(rcc_context ctx, rcc_class_id class_id, const char *buf, */ detected_language_id = rccDetectLanguageInternal(ctx, class_id, buf, len, &result); - if (detected_language_id != (rcc_language_id)-1) return result; + if (detected_language_id != (rcc_language_id)-1) { + printf("Language %i: %s\n", rccStringGetLanguage(result), result); + return result; + } + - err = rccConfigure(ctx); if (err) return NULL; @@ -316,7 +399,6 @@ char *rccSizedTo(rcc_context ctx, rcc_class_id class_id, rcc_const_string buf, s } if ((translate == RCC_OPTION_TRANSLATE_TO_ENGLISH)||((config->trans)&&(!translated))) { - puts("entrans"); if (!config->entrans) { config->entrans = rccTranslateOpen(rccGetLanguageName(ctx, language_id), rcc_english_language_sn); } @@ -384,7 +466,6 @@ char *rccSizedRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const const char *from_charset, *to_charset; rcc_charset_id from_charset_id, to_charset_id; rcc_class_type class_type; - rcc_option_value usedb4; if (!ctx) { if (rcc_default_ctx) ctx = rcc_default_ctx; @@ -394,20 +475,9 @@ char *rccSizedRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const class_type = rccGetClassType(ctx, to); if ((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_NAMES))) goto recoding; - if (rccGetOption(ctx, RCC_OPTION_LEARNING_MODE)&RCC_OPTION_LEARNING_FLAG_LEARN) goto recoding; - - usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE); - if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) { - stmp = rccDb4GetKey(ctx->db4ctx, buf, len); - if (stmp) { - if (rccStringFixID(stmp, ctx)) free(stmp); - else { - result = rccSizedTo(ctx, to, stmp, rlen); - free(stmp); - return result; - } - } - } + if (rccGetOption(ctx, RCC_OPTION_LEARNING_MODE)) goto recoding; + if (rccGetOption(ctx, RCC_OPTION_AUTODETECT_LANGUAGE)) goto recoding; + if (rccGetOption(ctx, RCC_OPTION_TRANSLATE)) goto recoding; err = rccConfigure(ctx); if (err) return NULL; -- cgit v1.2.3