From 35381569403e90b8d34b223f524519521bc81598 Mon Sep 17 00:00:00 2001 From: "Suren A. Chilingaryan" Date: Wed, 27 Jun 2007 09:28:22 +0000 Subject: Engines rework - LibGuess support for far east language autodetection - Support for LibRCD 0.1.9 supporting ISO-8859-1 strings - Fixing wrong encodings names returned by Enca - Engine plugins naming scheme is altered - New API functions: rccEngineGetInfo, rccEngineGetAutoCharsetByName - Most of languages are no more hardcoded, but moved to the configuration - RCD engine is added to Belarusian language (I hope it should work) - Some encoding names are fixed in configuration - Support for external libiconv - Support for libcharset - Find UI interface language from LC_MESSAGES locale - Simple compilation fix (Thanx to D. Panov) --- configure.in | 51 +++++++- docs/encodings.txt | 2 + engines/Makefile.am | 22 ++-- engines/libguess.c | 80 +++++++++++++ engines/librcd.c | 9 +- examples/rcc.xml | 296 +++++++++++++++++++++++++++++++++++++++++++++++ examples/rcc.xml.chinese | 28 ++++- examples/rcc.xml.eastern | 107 +++++++++++++++++ examples/rcc.xml.western | 25 ++++ src/Makefile.am | 2 +- src/engine.c | 27 +++++ src/engine.h | 1 + src/librcc.h | 3 + src/lngconfig.c | 2 +- src/plugin.c | 8 +- src/rccconfig.c | 25 ++-- src/rccenca.c | 48 +++++++- src/rccenca.h | 8 ++ src/rccexternal.h | 3 + src/rcclocale.c | 9 +- src/recode.c | 22 +++- ui/librccui.c | 2 +- ui/rccnames.c | 2 + 23 files changed, 746 insertions(+), 36 deletions(-) create mode 100644 docs/encodings.txt create mode 100644 engines/libguess.c create mode 100644 examples/rcc.xml.eastern create mode 100644 examples/rcc.xml.western diff --git a/configure.in b/configure.in index 7f85408..ce87ce8 100644 --- a/configure.in +++ b/configure.in @@ -41,6 +41,10 @@ AC_ARG_ENABLE( bdb, [ --disable-bdb disable usage of berkeleydb for recodings caching],, disable_bdb="yes") +AC_ARG_ENABLE( force-system-iconv, + [ --enable-force-system-iconv force usage of iconv library from glibc],, + enable_force_system_iconv="no") + AC_PROG_CC AM_PROG_CC_C_O AC_PROG_INSTALL @@ -118,13 +122,35 @@ AC_CHECK_HEADER(dlfcn.h, [AC_CHECK_LIB(dl, dlopen, [ ]) ])]) + +EXTRA_LIBS="" +EXTERNAL_ICONV=no +HAVE_LIBCHARSET=no + +if test "x$enable_force_system_iconv" != "xyes"; then +AC_CHECK_LIB(iconv, iconv_open, [ + EXTERNAL_ICONV=yes + EXTRA_LIBS+=" -liconv" +]) +AC_CHECK_HEADER(libcharset.h, [AC_CHECK_LIB(charset, locale_charset, [ + AC_DEFINE(HAVE_LIBCHARSET,1,[Defines if libRCD is available]) + HAVE_LIBCHARSET=yes + EXTRA_LIBS+=" -lcharset" +])]) +fi +AM_CONDITIONAL(HAVE_LIBCHARSET, [ test "x$HAVE_LIBCHARSET" = "xyes" ]) +AC_SUBST(EXTRA_LIBS) + + RCD_LIBS="" RCD_INCLUDES="" HAVE_RCD=no ENCA_LIBS="" ENCA_INCLUDES="" HAVE_ENCA=no - +LIBGUESS_LIBS="" +LIBGUESS_INCLUDES="" +HAVE_LIBGUESS=no if test "x$enable_force_dynamic_engines" != "xyes"; then AC_CHECK_HEADER(librcd.h, [AC_CHECK_LIB(rcd, rcdGetRussianCharset, [ @@ -141,13 +167,23 @@ AC_CHECK_HEADER(enca.h, [AC_CHECK_LIB(enca, enca_analyse, [ ])]) fi +AC_CHECK_HEADER(libguess.h, [AC_CHECK_LIB(guess, guess_jp, [ + AC_DEFINE(HAVE_LIBGUESS,1,[Defines if libguess is available]) + LIBGUESS_LIBS="-lguess" + LIBGUESS_INCLUDES="" + HAVE_LIBGUESS=yes +])]) + AM_CONDITIONAL(HAVE_RCD, [ test "x$HAVE_RCD" = "xyes" ]) AM_CONDITIONAL(HAVE_ENCA, [ test "x$HAVE_ENCA" = "xyes" ]) +AM_CONDITIONAL(HAVE_LIBGUESS, [ test "x$HAVE_LIBGUESS" = "xyes" ]) AC_SUBST(RCD_LIBS) AC_SUBST(RCD_INCLUDES) AC_SUBST(ENCA_LIBS) AC_SUBST(ENCA_INCLUDES) +AC_SUBST(LIBGUESS_LIBS) +AC_SUBST(LIBGUESS_INCLUDES) USE_DLOPEN=no if test "x$HAVE_DLOPEN" = "xyes"; then @@ -272,13 +308,23 @@ AC_CHECK_FUNCS(strcasecmp strncasecmp strdup strnlen) AC_OUTPUT(src/Makefile engines/Makefile external/Makefile ui/Makefile examples/Makefile Makefile librcc.spec) +rccdir=${pkgdatadir} +while expr ${rccdir:0:1} == '$' &>/dev/null; do + rccdir=`eval echo $rccdir` +done + + echo "" echo "Configuration:" echo " POSIX Threading Support: $HAVE_PTHREAD" echo "" +echo " External IConv Library: $EXTERNAL_ICONV" +echo " LibCharset Library: $HAVE_LIBCHARSET" +echo "" echo " Dynamic Engine Loading Support: $HAVE_DLOPEN" echo " Enca Charset Detection Support: $HAVE_ENCA" echo " LibRCD Charset Detection Support: $HAVE_RCD" +echo " LibGUESS Charset Detection Support: $HAVE_LIBGUESS" echo "" echo " Multilanguage support with DB4: $HAVE_BDB" echo " Language autodetection using aspell: $HAVE_ASPELL" @@ -289,5 +335,8 @@ echo "User Interfaces:" echo " GTK User Interface: $HAVE_GTK" echo " GTK2 User Interface: $HAVE_GTK2" echo "" +echo "Directories:" +echo " RCC Data Directory: ${rccdir}" +echo "" echo "" echo "" diff --git a/docs/encodings.txt b/docs/encodings.txt new file mode 100644 index 0000000..aa2355d --- /dev/null +++ b/docs/encodings.txt @@ -0,0 +1,2 @@ +Enca supports HZ chinese encoding which is not supported by the iconv shiped +with GLibc. Portable iconv library seems to support it. diff --git a/engines/Makefile.am b/engines/Makefile.am index 2b7bb26..93e490a 100644 --- a/engines/Makefile.am +++ b/engines/Makefile.am @@ -1,14 +1,22 @@ -lib_LTLIBRARIES = libwestern.la +lib_LTLIBRARIES = western_engine.la libdir = $(pkgdatadir)/engines if HAVE_RCD -lib_LTLIBRARIES += librcd.la -librcd_la_SOURCES = librcd.c -librcd_la_LDFLAGS = -module -avoid-version -export-symbols-regex "rccGetInfo" +lib_LTLIBRARIES += librcd_engine.la +librcd_engine_la_SOURCES = librcd.c +librcd_engine_la_LDFLAGS = -module -avoid-version -export-symbols-regex "rccGetInfo" +librcd_engine_la_LIBADD = @RCD_LIBS@ endif -libwestern_la_SOURCES = western.c -libwestern_la_LDFLAGS = -module -avoid-version -export-symbols-regex "rccGetInfo" +if HAVE_LIBGUESS +lib_LTLIBRARIES += libguess_engine.la +libguess_engine_la_SOURCES = libguess.c +libguess_engine_la_LDFLAGS = -module -avoid-version -export-symbols-regex "rccGetInfo" +libguess_engine_la_LIBADD = @LIBGUESS_LIBS@ +endif + +western_engine_la_SOURCES = western.c +western_engine_la_LDFLAGS = -module -avoid-version -export-symbols-regex "rccGetInfo" -AM_CPPFLAGS = -I../src @RCD_INCLUDES@ +AM_CPPFLAGS = -I../src @RCD_INCLUDES@ @LIBGUESS_INCLUDES@ diff --git a/engines/libguess.c b/engines/libguess.c new file mode 100644 index 0000000..7f13b50 --- /dev/null +++ b/engines/libguess.c @@ -0,0 +1,80 @@ +#include +#include + +#include +#include + +#define UTF8_ID 0 +#define UTF16_ID 1 + +typedef const char *(*guess_function)(const char *buf, int buflen); + +struct rcc_guess_engine_t { + struct rcc_engine_t engine; + guess_function func; +}; +typedef struct rcc_guess_engine_t rcc_guess_engine; + +rcc_autocharset_id guessDetect(rcc_engine_context ctx, const char *buf, int len) { + const char *res; + rcc_guess_engine *info; + + if (!buf) return (rcc_autocharset_id)-1; + + info = (rcc_guess_engine*)rccEngineGetInfo(ctx); + if (info) { + if (info->func) res = info->func(buf, len?len:strlen(buf)); + else { + if (!len) len = strlen(buf); + res = guess_cn(buf, len); + if (!res) res = guess_tw(buf, len); + printf("%s\n",res?res:"null"); + } + } else + res = NULL; + + if (!res) return (rcc_autocharset_id)-1; + return rccEngineGetAutoCharsetByName(ctx, res); +} + + +struct rcc_guess_engine_t guessJPEngine = { + { + "LibGUESS", + NULL, /* Constructor */ + NULL, /* Destructor */ + &guessDetect, + {"UTF-8", "UTF-16", "ISO-2022-JP", "EUC-JP", "SJIS", NULL} + }, + &guess_jp +}; + +struct rcc_guess_engine_t guessCNEngine = { + { + "LibGUESS", + NULL, /* Constructor */ + NULL, /* Destructor */ + &guessDetect, + {"UTF-8", "UTF-16", "ISO-2022-CN", "GB2312", "GB18030", "BIG5", NULL} + }, + NULL +}; + +struct rcc_guess_engine_t guessKREngine = { + { + "LibGUESS", + NULL, /* Constructor */ + NULL, /* Destructor */ + &guessDetect, + {"UTF-8", "UTF-16", "ISO-2022-KR", "EUC-KR", "JOHAB", NULL} + }, + &guess_kr +}; + + +rcc_engine *rccGetInfo(const char *lang) { + if (!strcmp(lang, "zh")) return (rcc_engine*)&guessCNEngine; + if (!strcmp(lang, "ja")) return (rcc_engine*)&guessJPEngine; + if (!strcmp(lang, "ko")) return (rcc_engine*)&guessKREngine; + return NULL; +} diff --git a/engines/librcd.c b/engines/librcd.c index c24d244..bfb14b0 100644 --- a/engines/librcd.c +++ b/engines/librcd.c @@ -9,11 +9,15 @@ static rcc_autocharset_id AutoengineRussian(rcc_engine_context ctx, const char * } static rcc_engine russian_engine = { - "LibRCD", NULL, NULL, &AutoengineRussian, {"CP1251","KOI8-R","UTF-8","IBM866", NULL} + "LibRCD", NULL, NULL, &AutoengineRussian, {"CP1251","KOI8-R","UTF-8","IBM866", "ISO8859-1", NULL} }; static rcc_engine ukrainian_engine = { - "LibRCD", NULL, NULL, &AutoengineRussian, {"CP1251","KOI8-U","UTF-8","IBM865", NULL} + "LibRCD", NULL, NULL, &AutoengineRussian, {"CP1251","KOI8-U","UTF-8","IBM865", "ISO8859-1", NULL} +}; + +static rcc_engine belarussian_engine = { + "LibRCD", NULL, NULL, &AutoengineRussian, {"CP1251","ISO-IR-111","UTF-8","IBM866", "ISO8859-1", NULL} }; rcc_engine *rccGetInfo(const char *lang) { @@ -21,6 +25,7 @@ rcc_engine *rccGetInfo(const char *lang) { if (!strcmp(lang, "ru")) return &russian_engine; if (!strcmp(lang, "uk")) return &ukrainian_engine; + if (!strcmp(lang, "be")) return &belarussian_engine; return NULL; } diff --git a/examples/rcc.xml b/examples/rcc.xml index 12f667b..eda97f1 100644 --- a/examples/rcc.xml +++ b/examples/rcc.xml @@ -1,8 +1,189 @@ + + Автоопределение + + + Отключить + + + Русский + + + Украинский + + + Беларуский + + + Английский + + ISO8859-1 + UTF-8 + + + western + + + + Болгарский + + CP1251 + UTF-8 + ISO-8859-1 + IBM855 + MACCYRILLIC + ISO-IR-111 + + + + Чешский + + CP1250 + UTF-8 + ISO-8859-2 + IBM852 + MACCENTRALEUROPE + CSKOI8R + + + + Эстонский + + CP1257 + UTF-8 + ISO-8859-4 + ISO-8859-13 + IBM755 + MACCENTRALEUROPE + BALTIC + + + + Хорватский + + CP1250 + UTF-8 + ISO-8859-2 + IBM852 + MACCENTRALEUROPE + + + + Венгерский + + CP1250 + UTF-8 + ISO-8859-2 + IBM852 + MACCENTRALEUROPE + + + + Латвийский + + CP1257 + UTF-8 + ISO-8859-4 + ISO-8859-13 + IBM755 + MACCENTRALEUROPE + BALTIC + + + + Литовский + + CP1257 + UTF-8 + ISO-8859-4 + ISO-8859-13 + IBM755 + MACCENTRALEUROPE + BALTIC + + + + Польский + + CP1250 + UTF-8 + ISO-8859-2 + ISO-8859-13 + ISO-8859-16 + IBM852 + MACCENTRALEUROPE + BALTIC + + + + Словацкий + + CP1250 + UTF-8 + ISO-8859-2 + IBM852 + MACCENTRALEUROPE + CSKOI8R + + + + Cловенский + + CP1250 + UTF-8 + ISO-8859-2 + IBM852 + MACCENTRALEUROPE + + + + Японский + + UTF-8 + UTF-16 + ISO-2022-JP + EUC-JP + SJIS + + + libguess + + + + Корейский + + UTF-8 + UTF-16 + ISO-2022-KR + EUC-KR + JOHAB + + + libguess + + + + Китайский + + UTF-8 + UTF-16 + ISO-2022-CN + GB2312 + GB18030 + GBK + BIG5 + EUC-CN + HZ + + + libguess + + German + Немецкий ISO8859-1 UTF-8 @@ -13,6 +194,7 @@ French + Французский ISO8859-1 UTF-8 @@ -22,4 +204,118 @@ + + + + + + + + + + + Кодировка ID3 Тэгов + + + Кодировка Списка Песен + + + Кодировка Файлов в Списке + + + Кодировка Файловой Системы + + + Отображение + + + + + Авто-определение + + + + + Отключить + + + Библиотека LibRCD + + + Библиотека Enca + + + Библиотека LibGUESS + + + + + РусXMMS + + + Язык + + + Текущий Язык + + + + + Авто-определение + + + Движок + + + + + + + Кодировки + + + + \ No newline at end of file diff --git a/examples/rcc.xml.chinese b/examples/rcc.xml.chinese index 0cc914c..ea916c6 100644 --- a/examples/rcc.xml.chinese +++ b/examples/rcc.xml.chinese @@ -5,24 +5,44 @@ Japanese UTF-8 + UTF-16 ISO-2022-JP EUC-JP - SHIFT-JIS + SJIS + + libguess + + + + Korean + + UTF-8 + UTF-16 + ISO-2022-KR + EUC-KR + JOHAB + + + libguess + Chinese UTF-8 + UTF-16 + ISO-2022-CN GB2312 GB18030 GBK - ISO-2022-CN BIG5 - BIG5-HKSCS EUC-CN - EUC-TW + HZ + + libguess + diff --git a/examples/rcc.xml.eastern b/examples/rcc.xml.eastern new file mode 100644 index 0000000..52fbfcb --- /dev/null +++ b/examples/rcc.xml.eastern @@ -0,0 +1,107 @@ + + + + + + CP1251 + UTF-8 + ISO-8859-1 + IBM855 + MACCYRILLIC + ISO-IR-111 + + + + + CP1250 + UTF-8 + ISO-8859-2 + IBM852 + MACCENTRALEUROPE + CSKOI8R + + + + + CP1257 + UTF-8 + ISO-8859-4 + ISO-8859-13 + IBM755 + MACCENTRALEUROPE + BALTIC + + + + + CP1250 + UTF-8 + ISO-8859-2 + IBM852 + MACCENTRALEUROPE + + + + + CP1250 + UTF-8 + ISO-8859-2 + IBM852 + MACCENTRALEUROPE + + + + + CP1257 + UTF-8 + ISO-8859-4 + ISO-8859-13 + IBM755 + MACCENTRALEUROPE + BALTIC + + + + + CP1257 + UTF-8 + ISO-8859-4 + ISO-8859-13 + IBM755 + MACCENTRALEUROPE + BALTIC + + + + + CP1250 + UTF-8 + ISO-8859-2 + ISO-8859-13 + ISO-8859-16 + IBM852 + MACCENTRALEUROPE + BALTIC + + + + + CP1250 + UTF-8 + ISO-8859-2 + IBM852 + MACCENTRALEUROPE + CSKOI8R + + + + + CP1250 + UTF-8 + ISO-8859-2 + IBM852 + MACCENTRALEUROPE + + + + \ No newline at end of file diff --git a/examples/rcc.xml.western b/examples/rcc.xml.western new file mode 100644 index 0000000..12f667b --- /dev/null +++ b/examples/rcc.xml.western @@ -0,0 +1,25 @@ + + + + + German + + ISO8859-1 + UTF-8 + + + western + + + + French + + ISO8859-1 + UTF-8 + + + western + + + + \ No newline at end of file diff --git a/src/Makefile.am b/src/Makefile.am index 79976c6..42c5966 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -28,6 +28,6 @@ librcc_la_SOURCES = librcc.c \ include_HEADERS = librcc.h AM_CPPFLAGS = -I../src -DLIBRCC_DATA_DIR=\"${pkgdatadir}\" @XML_INCLUDES@ @DLOPEN_INCLUDES@ @RCD_INCLUDES@ @ENCA_INCLUDES@ @BDB_INCLUDES@ @ASPELL_CFLAGS@ @PTHREAD_CFLAGS@ -librcc_la_LIBADD = @XML_LIBS@ @DLOPEN_LIBS@ @RCD_LIBS@ @ENCA_LIBS@ @BDB_LIBS@ @ASPELL_LIBS@ @PTHREAD_LIBS@ +librcc_la_LIBADD = @XML_LIBS@ @DLOPEN_LIBS@ @RCD_LIBS@ @ENCA_LIBS@ @BDB_LIBS@ @ASPELL_LIBS@ @PTHREAD_LIBS@ @EXTRA_LIBS@ librcc_la_LDFLAGS = -version-info @LIBRCC_VERSION_INFO@ diff --git a/src/engine.c b/src/engine.c index f9c2284..3d3e023 100644 --- a/src/engine.c +++ b/src/engine.c @@ -125,6 +125,7 @@ int rccEngineConfigure(rcc_engine_context ctx) { engine = ctx->config->language->engines[engine_id]; + ctx->id = engine_id; ctx->free_func = engine->free_func; ctx->func = engine->func; @@ -134,6 +135,30 @@ int rccEngineConfigure(rcc_engine_context ctx) { return 0; } + +rcc_engine *rccEngineGetInfo(rcc_engine_context ctx) { + if (!ctx) return NULL; + return ctx->config->language->engines[ctx->id]; +} + +rcc_autocharset_id rccEngineGetAutoCharsetByName(rcc_engine_context ctx, const char *name) { + unsigned int i; + rcc_engine *info; + rcc_charset *charsets; + + if ((!ctx)||(!name)) return (rcc_autocharset_id)-1; + + info = rccEngineGetInfo(ctx); + if (info) { + charsets = info->charsets; + + for (i=0;charsets[i];i++) + if (!strcasecmp(charsets[i],name)) return (rcc_autocharset_id)i; + } + + return (rcc_autocharset_id)-1; +} + rcc_engine_internal rccEngineGetInternal(rcc_engine_context ctx) { if (!ctx) return NULL; @@ -186,6 +211,8 @@ static int CheckWestern(const unsigned char *buf, int len) { rcc_autocharset_id rccEngineDetectCharset(rcc_engine_context ctx, const char *buf, size_t len) { rcc_autocharset_id utf; + /* DS: This should be done directly in autoengines, otherwise we will + fail to detect 7bit encodings */ if (CheckWestern(buf, len)) { utf=rccConfigGetAutoCharsetByName(ctx->config, "UTF-8"); if (utf != (rcc_autocharset_id)-1) return utf; diff --git a/src/engine.h b/src/engine.h index 96e6db6..3213f2b 100644 --- a/src/engine.h +++ b/src/engine.h @@ -26,6 +26,7 @@ struct rcc_engine_context_t { rcc_engine_function func; rcc_engine_free_function free_func; + rcc_engine_id id; rcc_engine_internal internal; }; typedef struct rcc_engine_context_t rcc_engine_context_s; diff --git a/src/librcc.h b/src/librcc.h index e5749cd..88cc802 100644 --- a/src/librcc.h +++ b/src/librcc.h @@ -1481,6 +1481,9 @@ typedef rcc_engine *(*rcc_plugin_engine_info_function)(const char *lang); rcc_engine_internal rccEngineGetInternal(rcc_engine_context ctx); rcc_language *rccEngineGetLanguage(rcc_engine_context ctx); rcc_context rccEngineGetRccContext(rcc_engine_context ctx); +rcc_engine *rccEngineGetInfo(rcc_engine_context ctx); +rcc_autocharset_id rccEngineGetAutoCharsetByName(rcc_engine_context ctx, const char *name); + /******************************************************************************* **************************** Configuration ************************************* diff --git a/src/lngconfig.c b/src/lngconfig.c index 67e05c6..670d97f 100644 --- a/src/lngconfig.c +++ b/src/lngconfig.c @@ -405,7 +405,7 @@ rcc_language_config rccGetUsableConfig(rcc_context ctx, rcc_language_id language rcc_language_config rccGetConfig(rcc_context ctx, rcc_language_id language_id) { rcc_language_config config; - + config = rccGetConfigPointer(ctx, language_id, &language_id); if (config) { if ((!config->charset)&&(rccConfigInit(config, ctx))) return NULL; diff --git a/src/plugin.c b/src/plugin.c index 38337fb..c53726f 100644 --- a/src/plugin.c +++ b/src/plugin.c @@ -121,13 +121,13 @@ rcc_plugin_handle rccPluginLoad(rcc_plugin_type type, const char *name) { switch (type) { case RCC_PLUGIN_TYPE_ENGINE: - pluginfn = (char*)malloc((32 + strlen(rcc_home_dir) + strlen(name))*sizeof(char)); + pluginfn = (char*)malloc((48 + strlen(rcc_home_dir) + strlen(name))*sizeof(char)); if (!pluginfn) return NULL; - sprintf(pluginfn, "%s/.rcc/engines/lib%s.so", rcc_home_dir, name); + sprintf(pluginfn, "%s/.rcc/engines/%s_engine.so", rcc_home_dir, name); res = rccLibraryOpen(pluginfn); if (!res) { - sprintf(pluginfn, LIBRCC_DATA_DIR "/engines/lib%s.so", name); + sprintf(pluginfn, LIBRCC_DATA_DIR "/engines/%s_engine.so", name); res = rccLibraryOpen(pluginfn); } free(pluginfn); @@ -156,7 +156,7 @@ rcc_plugin_handle rccPluginLoad(rcc_plugin_type type, const char *name) { rcc_engine *rccPluginEngineGetInfo(const char *name, const char *language) { rcc_plugin_handle handle; rcc_plugin_engine_info_function infofunc; - + handle = rccPluginLoad(RCC_PLUGIN_TYPE_ENGINE, name); if (!handle) return NULL; diff --git a/src/rccconfig.c b/src/rccconfig.c index 6723825..d5546c7 100644 --- a/src/rccconfig.c +++ b/src/rccconfig.c @@ -10,8 +10,8 @@ rcc_language_alias rcc_default_aliases[RCC_MAX_ALIASES + 1]; rcc_language_alias rcc_default_aliases_embeded[RCC_MAX_ALIASES + 1] = { - { "cs_SK", "sk" }, - { "ru_UA", "uk" }, +/* { "cs_SK", "sk" }, + { "ru_UA", "uk" },*/ { NULL, NULL } }; @@ -45,11 +45,15 @@ rcc_engine rcc_default_engine = { }; rcc_engine rcc_russian_engine = { - "LibRCD", NULL, NULL, &rccAutoengineRussian, {"CP1251","KOI8-R","UTF-8","IBM866", NULL} + "LibRCD", NULL, NULL, &rccAutoengineRussian, {"CP1251","KOI8-R","UTF-8","IBM866", "ISO8859-1", NULL} }; rcc_engine rcc_ukrainian_engine = { - "LibRCD", NULL, NULL, &rccAutoengineRussian, {"CP1251","KOI8-U","UTF-8","IBM865", NULL} + "LibRCD", NULL, NULL, &rccAutoengineRussian, {"CP1251","KOI8-U","UTF-8","IBM865", "ISO8859-1", NULL} +}; + +rcc_engine rcc_belarussian_engine = { + "LibRCD", NULL, NULL, &rccAutoengineRussian, {"CP1251","ISO-IR-111","UTF-8","IBM865", "ISO8859-1", NULL} }; rcc_language rcc_default_languages[RCC_MAX_LANGUAGES + 1]; @@ -81,11 +85,14 @@ rcc_language rcc_default_languages_embeded[RCC_MAX_LANGUAGES + 1] = { #endif /* RCC_RCD_SUPPORT */ NULL }}, -{"be", {rcc_default_charset, rcc_utf8_charset, "CP1251", "IBM866", "ISO-8859-5", "KOI8-UNI", "maccyr" "IBM855", NULL},{ +{"be", {rcc_default_charset, rcc_utf8_charset, "CP1251", "IBM866", "ISO-8859-5", "ISO-IR-111", "ISO-IR-111", "MACCYRILLIC" "IBM855", NULL},{ &rcc_default_engine, +#ifdef RCC_RCD_SUPPORT + &rcc_ukrainian_engine, +#endif /* RCC_RCD_SUPPORT */ NULL }}, -{"bg", {rcc_default_charset, rcc_utf8_charset, "CP1251", "ISO-8859-5", "IBM855", "maccyr", "ECMA-113", NULL},{ +/*{"bg", {rcc_default_charset, rcc_utf8_charset, "CP1251", "ISO-8859-5", "IBM855", "maccyr", "ECMA-113", NULL},{ &rcc_default_engine, NULL }}, @@ -124,11 +131,7 @@ rcc_language rcc_default_languages_embeded[RCC_MAX_LANGUAGES + 1] = { {"sl", {rcc_default_charset, rcc_utf8_charset, "ISO-8859-2", "CP1250", "IBM852", "macce", "CORK", NULL},{ &rcc_default_engine, NULL -}}, -{"zh", {rcc_default_charset, rcc_utf8_charset, "GB2312", "GBK", "GB18030", "BIG5", NULL},{ - &rcc_default_engine, - NULL -}}, +}},*/ {NULL} }; rcc_option_value_name rcc_sn_boolean[] = { "OFF", "ON", NULL }; diff --git a/src/rccenca.c b/src/rccenca.c index 28d3ccf..e46847e 100644 --- a/src/rccenca.c +++ b/src/rccenca.c @@ -20,6 +20,41 @@ static rcc_library_handle enca_handle = NULL; #endif /* RCC_ENCA_DYNAMIC */ static rcc_engine *enca_engines = NULL; + +/* CORK, KEYBCS2 is missing */ +rcc_enca_corrections rcc_enca_missing_corrections[] = { + { "be", "KOI8-UNI", "ISO-IR-111" }, + { NULL, "macce", "MACCENTRALEUROPE" }, + { "zh", "HZ", "HZ" }, + { "sk", "KOI-8_CS_2", "CSKOI8R" }, + { NULL, NULL, NULL } +}; + +rcc_enca_corrections rcc_enca_error_corrections[] = { + { NULL, "ECMA-cyrillic", "ISO-IR-111" }, + { NULL, NULL, NULL } +}; + + +static const char *rccEncaGetCorrection(const char *lang, const char *charset) { + int i; + for (i=0;rcc_enca_error_corrections[i].enca_charset;i++) { + if (((!rcc_enca_error_corrections[i].lang)||((lang)&&(!strcmp(lang, rcc_enca_error_corrections[i].lang))))&&(!strcmp(charset, rcc_enca_error_corrections[i].enca_charset))) + return rcc_enca_error_corrections[i].iconv_charset; + } + return charset; +} + +static const char *rccEncaGetMissing(const char *lang, const char *charset) { + int i; + for (i=0;rcc_enca_missing_corrections[i].enca_charset;i++) { + if (((!rcc_enca_missing_corrections[i].lang)||((lang)&&(!strcmp(lang, rcc_enca_missing_corrections[i].lang))))&&(!strcmp(charset, rcc_enca_missing_corrections[i].enca_charset))) + return rcc_enca_missing_corrections[i].iconv_charset; + } + return charset; +} + + rcc_engine_internal rccEncaInitContext(rcc_engine_context ctx) { #ifdef RCC_ENCA_SUPPORT EncaAnalyser enca; @@ -65,7 +100,12 @@ rcc_autocharset_id rccEnca(rcc_engine_context ctx, const char *buf, int len) { if (ee.charset<0) return (rcc_charset_id)-1; charset = enca_charset_name(ee.charset, ENCA_NAME_STYLE_ICONV); - return rccGetAutoCharsetByName(ctx->config->ctx, charset); + if (charset) { + charset = rccEncaGetCorrection(rccEngineGetLanguage(ctx)->sn, charset); + } else { + charset = rccEncaGetMissing(rccEngineGetLanguage(ctx)->sn, enca_charset_name(ee.charset, ENCA_NAME_STYLE_ENCA)); + } + return rccEngineGetAutoCharsetByName(ctx, charset); #else /* RCC_ENCA_SUPPORT */ return (rcc_charset_id)-1; #endif /* RCC_ENCA_SUPPORT */ @@ -160,7 +200,11 @@ int rccEncaInit() { for (l=0;l +#endif /* HAVE_SYS_TYPES_H */ typedef enum rcc_external_module_t { RCC_EXTERNAL_MODULE_CONTROL = 0, diff --git a/src/rcclocale.c b/src/rcclocale.c index 99d2b8f..9869a72 100644 --- a/src/rcclocale.c +++ b/src/rcclocale.c @@ -5,10 +5,14 @@ #include "../config.h" +#ifdef HAVE_LIBCHARSET +# include +#endif /* HAVE_LIBCHARSET */ #ifdef HAVE_CODESET # include #endif + #include "rccconfig.h" int rccLocaleGetClassByName(const char *locale) { @@ -80,9 +84,12 @@ int rccLocaleGetCharset(char *result, const char *lv, unsigned int n) { if (locale_class == LC_CTYPE) { l = getenv("CHARSET"); +#ifdef HAVE_LIBCHARSET + if (!l) l = locale_charset(); +#endif /* HAVE_LIBCHARSET */ #ifdef HAVE_CODESET if (!l) l = nl_langinfo(CODESET); -#endif +#endif /* HAVE_CODESET */ if (l) { if (strlen(l)>=n) return -1; strcpy(result, l); diff --git a/src/recode.c b/src/recode.c index e1e8e81..1d98306 100644 --- a/src/recode.c +++ b/src/recode.c @@ -742,7 +742,12 @@ rcc_string rccSizedFromCharset(rcc_context ctx, const char *charset, const char rcc_string ret; if ((!buf)||(!charset)) return NULL; - + + if (!ctx) { + if (rcc_default_ctx) ctx = rcc_default_ctx; + else return NULL; + } + config = rccGetCurrentConfig(ctx); if (!config) return NULL; @@ -768,6 +773,11 @@ char *rccSizedToCharset(rcc_context ctx, const char *charset, rcc_const_string b if ((!buf)||(!charset)) return NULL; + if (!ctx) { + if (rcc_default_ctx) ctx = rcc_default_ctx; + else return NULL; + } + res = rccStringCheck(buf); if (!res) return NULL; @@ -799,6 +809,11 @@ char *rccSizedRecodeToCharset(rcc_context ctx, rcc_class_id class_id, const char char *utf8, *extracted; if (!charset) return NULL; + + if (!ctx) { + if (rcc_default_ctx) ctx = rcc_default_ctx; + else return NULL; + } utf8 = rccSizedFrom(ctx, class_id, buf, len); if (!utf8) return utf8; @@ -839,6 +854,11 @@ char *rccSizedRecodeFromCharset(rcc_context ctx, rcc_class_id class_id, const ch char *extracted; if (!charset) return NULL; + + if (!ctx) { + if (rcc_default_ctx) ctx = rcc_default_ctx; + else return NULL; + } icnv = rccIConvOpen("UTF-8", charset); if (icnv) { diff --git a/ui/librccui.c b/ui/librccui.c index 17e7281..1979899 100644 --- a/ui/librccui.c +++ b/ui/librccui.c @@ -219,7 +219,7 @@ int rccUiInit() { else icnv = rccIConvOpen(ctype_charset, "UTF-8"); } - if (!rccLocaleGetLanguage(locale, "LANGUAGE", 32)) { + if (!rccLocaleGetLanguage(locale, "LC_MESSAGES", 32)) { search[0] = strdup(locale); if (!search[0]) goto clean; lpos = strrchr(search[0], '@'); diff --git a/ui/rccnames.c b/ui/rccnames.c index 7f4f912..3a8ade1 100644 --- a/ui/rccnames.c +++ b/ui/rccnames.c @@ -26,6 +26,8 @@ rcc_name rcc_default_language_names_embeded[RCC_MAX_LANGUAGES+1] = { {"sk","Slovak"}, {"sl","Slovenian"}, {"zh","Chinese"}, +{"ko","Korean"}, +{"ja","Japanese"}, {NULL, NULL} }; -- cgit v1.2.3