diff options
author | Suren A. Chilingaryan <csa@dside.dyndns.org> | 2005-08-11 01:06:56 +0000 |
---|---|---|
committer | Suren A. Chilingaryan <csa@dside.dyndns.org> | 2005-08-11 01:06:56 +0000 |
commit | 3736c5f3635863e54ab2cc47860628d26855c749 (patch) | |
tree | 3c1dadec1b75557463fcc740429cceb6e948f998 | |
parent | 63bf2a90a6d6fb0859e4c9dd9fcac85de9adc0f1 (diff) | |
download | librcc-3736c5f3635863e54ab2cc47860628d26855c749.tar.gz librcc-3736c5f3635863e54ab2cc47860628d26855c749.tar.bz2 librcc-3736c5f3635863e54ab2cc47860628d26855c749.tar.xz librcc-3736c5f3635863e54ab2cc47860628d26855c749.zip |
Transliteration and Documentation Update
- Fix: Autodetection of dissabled charsets.
- Fix: Cleanely terminate external process if parrent thread terminated.
- Transliteration for Russian, Ukrainian and using IConv.
- Documentation Update.
-rw-r--r-- | INSTALL | 37 | ||||
-rw-r--r-- | NEWS | 6 | ||||
-rw-r--r-- | README | 162 | ||||
-rw-r--r-- | ToDo | 63 | ||||
-rw-r--r-- | examples/rcc-gtk-config.c | 2 | ||||
-rw-r--r-- | external/rccexternal.c | 42 | ||||
-rw-r--r-- | src/librcc.h | 10 | ||||
-rw-r--r-- | src/lngconfig.c | 59 | ||||
-rw-r--r-- | src/rccconfig.c | 11 | ||||
-rw-r--r-- | src/rccconfig.h | 3 | ||||
-rw-r--r-- | src/recode.c | 47 | ||||
-rw-r--r-- | ui/rccnames.c | 2 |
12 files changed, 382 insertions, 62 deletions
@@ -0,0 +1,37 @@ +Dependencies +============ + LibRCC is dependent on LibXML2 library. However it requires some other + libraries to provide considered services. + + * LibRCD and Enca libraries are used to provide encoding autodetection. + * DB4 is used to cache translations and recodings. + * Aspell is required for language autodetection. + * LibTranslate is required for translation. + o The Libtranslate uses online services to translate the text. In the + defaultIt version there are no ways to limit translation time. The + LibRCC will respect the maximum time for recoding in any case, but + nevertheless it will be good idea to use patched version of Libtranslate, + providing API call to make time limited translate. The patch can be + downloaded from the RusXMMS(http://RusXMMS.sf.net) project page. + + The configure script will report which options are available. + + +Build +===== + LibRCC utilizes standard GNU autoconf build system. To build LibRCC release + you should type: + ./configure + make + + Installation can be performed using following command: + make install + + The configure script accepts following options: + --enable-force-dynamic-engines: + Will force encoding autodetection to load Enca and LibRCD libraries + dynamically, rather than compiling them in. + --disable-libtranslate: + Will not compile support for language translation. + Even if Libtranslate library is available. +
\ No newline at end of file @@ -0,0 +1,6 @@ +LibRCC-0.2.0: + + Language AutoDetection + + Language Translation + + Language Transliteration for Russian, Ukrainian and Languages supported by IConv. + + Support for per-class Charset Configuration + @@ -1,7 +1,161 @@ +RusXMMS Project +=============== -Language Autodetection ----------------------- +Originally, the project was aimed to provide means to work with multiple encodings +of the same language through adapting encoding of ID3 tags, M3U and PLS playlists +(including file names) to local settings on-the-fly. Both the tag reading and +writing back using any selected encoding was supported. +Nowadays there are library available providing the same functionality for almost +any program with just a few lines of code. The library is not limited to ID3 tags, +it can be useful for any program working with small titles or file names in +different languages and encodings. The patches for several music players, ID3 tag +libraries and some other programs are available on the project page. +The Abilities of LibRCC Library +=============================== -Translation ------------ + * Language Autodetection + * On the fly translation between languages, using online-services! + * Encoding Autodetection for most of European Languages1 + * Support for encoding detection plugins (besides Enca and LibRCD) + * Recoding/translation of multi-language playlists! + * Cache to speed-up re-recoding. + * Possibility to configure new languages and encodings. + * Shared configuration file. For example mentioned TagLib and LibID3 patches + do not have their own user interface, but will utilize the same recoding + configuration as XMMS. + * As well the separate program for configuration adjustment is available. + * GTK/GTK2 UI Library: you can add properties page to your GTK application + with 3 lines of code. + * Menu localization opportunity + +The Available Patches +===================== + + * RusXMMS: Visualization and editing of the whole range of ID3 tags using any + of eight byte or unicode encodings. Support for playlists with non-english + filenames. The translation of foreign languages to english or locale one is + supported as well. The embeded properties page. The patch makes XMMS the best + player to work with ID3 titles. + * TagLib: Visualization and editing of ID3 v.1 and v.2 titles. Any TagLib + based application will correctly work with ID3 tags out of the box. The + properties page can be added to application with several lines of code. + Additionaly, after applying the patch, the 'tagwriter' program from the + TagLib examples can be used to convert titles of all your MP3 files to + unicode ID3 v.2 tags just using command: "tagwriter *.mp3". + * LibID3: Visualization of ID3 v.1 and v.2 titles. Any LibID3 based + application will correctly display (but not edit) ID3 tags out of the box. + * Mpg123: Visualization of ID3 v.1 titles. + * GFtp: Recoding file names between FTP servers using different encodings. + * Unzip: Recoding file names from Windows created archives. + + +Gratitudes +========== + * Me ;) + * Michael Shigorin - Ideas and great help in wiping bugs + * Dmitry A. Koptev - Slackware packages + * IPE, ForschungsZentrum, Karlsruhe + * CRD, Yerevan Physics Institute + * Georgian and Spanish winemakers :) + +Important Notes +=============== + +1. It have much more sense to report problems here, then just claiming nothing is + working on miscellaneous forums and mailing lists. +2. If you want patches presented here to be included in the correspondent project + trees, please, ask authors. The same thing should be concerned about inclusion + of LibRCC and LibRCD in the official Gentoo portage tree. +3. Most of the patches will modify configure.in and Makefile.am files, so the full + autoconfiguration should be performed. + You should run "aclocal; automake; autoconf" prior to using configure script. +4. Output encoding normally must correspond to current "LC_CTYPE" locale. If + you would set it to another value without really knowing what are you doing, + it can raise problems. + +Preferences +=========== + + * Current Language. The English, Russian, Ukrainian, Belarussian, Bulgarian, + Czech, Estonian, Croatian, Hungarian, Lithuanian, Latvian, Polish, Slovak, + Slovenian and Chinese are embeded in the library. To get other languages you + should configure them in the "/etc/rcc.xml" or in user-defined configuration + "~/.rcc/rcc.xml". By default the language will be determined using LC_CTYPE + environmental variable. + * Current encoding for supported encoding classes. For RusXMMS the following + classes are defined: + o ID3 Encoding + o ID3 v.2 Encoding (uses ID3 by default) + o PlayList Encoding (uses ID3 by default) + o Encoding for Filenames in the Playlists (defaults to FS encoding) + o FileSystem Encoding (uses locale encoding by default) + o Output Encoding (uses locale encoding by default) + The default encoding will be resolved using: + o The unicode encoding selected for english language. + o The encoding of the parrent class if any. + o The unicode encoding defined by locale variable or any locale encoding + in the case of locale language is used. + o First available usable encoding. + * Encoding autodetection engine. First available is used by default. + * Mode for recoding cache: + o Off: Do not use recoding cache + o On: Use recoding cache to find out encoding and language + o ReLearn: Fill recoding cache with detected values + o Learn: Try to use recoding cache to find out encoding. If there are + no cached encoding for current title try to detect it and store in the + cache. + * Autodetect File Name: If option is switched on the encoding of the file + will be resolved using search over file system. + * Autoengine Set Current Encoding: Encoding autodetection engine will + automatically set detected encoding to be used by default. + * Autodetect Language: Try to autodetect used language. Quite slow. + * Translate Text: Translate text from detected language to the locale + language. Very slow and requires internet connection. The different modes is + available. In the full mode the string will be translated to the current + locale language. If translation to locale language is failed, the string will + be translated to english. With "Skip Related" and "Skip parrent" options the + translation between related languages will be omited (The language is + considered to be parrent language in the case then it is expected to have + words from that language in the strings of child language. The english + language is considered to be parrent language for any other.). + With "Translate to English" option it is possible to translate all strings + to english. + * Since the translation is slow, it is possible to limit maximum time used to + recode/translate string. In this case if translation in the specified amount + of time is not finished, untranslated string will be returned. However, the + string will be queued for translation and in the next access the translated + and cached value will be returned. + * Additionally, for RusXMMS only it is possible to specify font used by shade + form of xmms playlist. + +Using Multi-Language Playlist +============================= +There are two ways of using multi-language playlists. The first is to use +Language autodetection: + + * The UTF-8 locale should be set. + * The LibRCC should be compiled with aspell support. + * The aspell dictionaries for all languages used should be installed. + +However this is quite slow, and errnous in the cases then mostly non-dictionary +words are used. The second option, is to use recodings cache: + + * The UTF-8 locale should be set. + * Select "Learn" mode for recoding caching policy in the preferences prior + to loading new files. + * Afterwards the titles for the loaded files will be recoded correctly + whenever the recoding caching is enabled. + +Using Language Translation +========================== +It is possible to translate titles to your languages using libtranslate. +The LibRCC should be compiled with LibTranslate support (It will be quite good +idea to use patched libtranslated with posibility to limit maximum amount of +time spent for translation) and you should have internet connection. Since +libtranslate utilizes online translation services it takes a lot of time to get +translation. To solve this problem the translation queueing and caching is used. +If translation is failed in considered amount of time error will be returned to +caller and string will be queued for translation. The translated string will be +stored in the DB4 cache and will be returned to caller on the following requests +for translation. @@ -1,40 +1,39 @@ 0.3.x: - Buffer managment: + SetBufferSize ( 0 - autogrow ) - - Language autodetection and translation improvements - + Look on ofline translation libraries and other possibilities to improove - translation and language detection. - + Implement ispell support - + Configurable timeouts - - Move all recoding functionality on rccConfig level - - Revise locking subsystem - - Libtranslate can leave translated message partly in old language. This causes problems - because of recoding from UTF8 to Current language. (With UTF-8 encoding should be Okey). - - Lating languages. If in the string all characters < 0x7F then we have one of the Latin - languages? - - Statistic approach of language detection. - - LibRCD autolearning using db4 - + Charset detection - + Language detection (same as charsets, but for UTF8...) - * Consider word recognition based on probability - + Autolearning is triggered by large enough dictionary words - - Configurable common classes + - Move all recoding functionality on the rccConfig Level + - Revise Locking Subsystem + - Load class configurations from the XML files. -1.x: - - Common encodings: - + Provide way to add to all languages several default Unicode encodings (UTF8, UTF16, UTF16BE) - + Special type of classes to select only from Unicode encodings (or even just specified subset of encodings) - + Special pluggable encodings. For example translate to english. - * rccToEncoding(current_language, *new_language, buf, size)? - * rccFromEncoding(current_language, utf8_language, buf, size)? - * Code some options in charset name. (SpecialEncodingPrefix_Encoding_EncodingOptions) - - Recoding options: - + Skip Translation - - Switch to Get/Ref/UnRef system + +0.4.x: + - Language and Encoding autodetection improvements. + + LibRCD should use DB4 with statistic for different languages + + The statistic should be gathered using: + * Aspell dictionaries. + * Special program getting text on the standard input. + * From LibRCC when language is preciesely detected. + + The LibRCD engine should be used to fast language detection as well. + * Just analyze output UTF8 string + + Add ispell support + - Translation improvemtns + + Look if there are any offline translation libraries available. + + Use stardict (or other dictionary) to translate on per-word basis. + + Try to translate to first parrent encoding if translation to the current one is failed. + + Transliterate translation mode + +0.5.x: + - Special encoding. + + Instead of IConv call considered function. + * For example: Transliterate + * For example: Translate to English + + The options for encoding should be passed as a part of encoding name. + * Develope naming conventions + + Pluggable special encodings. + +1.0.x: + - Switch to Get/Ref/UnRef calls. - Drop down 'Class' keywords in all 'ClassCharset' function. Make it default behaviour. on request: - Multibyte(not-UTF8) support for FS classes - - If there are neccessity in western-european language relating. - + Check for correctness between related western-european languages while - invalid translation checking (rccTo). Can be done with rccSpeller. diff --git a/examples/rcc-gtk-config.c b/examples/rcc-gtk-config.c index 9a3f988..da73608 100644 --- a/examples/rcc-gtk-config.c +++ b/examples/rcc-gtk-config.c @@ -16,7 +16,7 @@ static rcc_class classes[] = { { "ftp", RCC_CLASS_STANDARD, NULL, NULL, "FTP Encoding", 0 }, { "http", RCC_CLASS_STANDARD, NULL, NULL, "HTTP Encoding", 0 }, { "ssh", RCC_CLASS_STANDARD, NULL, NULL, "SSH Encoding", 0 }, - { "out", RCC_CLASS_STANDARD, "LC_CTYPE", NULL, NULL, 0 }, + { "out", RCC_CLASS_STANDARD, "LC_CTYPE", NULL, "Output Encoding", 0 }, { NULL } }; diff --git a/external/rccexternal.c b/external/rccexternal.c index 47f628a..292ee5d 100644 --- a/external/rccexternal.c +++ b/external/rccexternal.c @@ -1,6 +1,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <errno.h> #include "../config.h" @@ -24,19 +25,37 @@ #ifdef HAVE_SYS_UN_H # include <sys/un.h> #endif /* HAVE_SYS_UN_H */ +#ifdef HAVE_SYS_TIME_H +# include <sys/time.h> +#endif /* JAVE_SYS_TIME_H */ + +#ifdef HAVE_SIGNAL_H +# include <signal.h> +#endif /* HAVE_SIGNAL_H */ #include <glib/gthread.h> #include "../src/rccexternal.h" #include "rcclibtranslate.h" +#define RCC_EXIT_CHECK_TIMEOUT 10 /* seconds */ + int main() { +#ifdef HAVE_SIGNAL_H + struct sigaction act; +#endif /* HAVE_PWD_H */ + int err; + struct timeval tv; + fd_set fdcon; + int s, sd; char addr[376]; const char *rcc_home_dir; struct sockaddr_un mysock, clisock; socklen_t socksize; + + pid_t parentpid; pid_t mypid; unsigned char loopflag = 1; @@ -49,6 +68,8 @@ int main() { struct passwd *pw; #endif /* HAVE_PWD_H */ + + parentpid = getppid(); mypid = getpid(); rcc_home_dir = getenv ("HOME"); @@ -78,13 +99,34 @@ int main() { mysock.sun_path[sizeof(mysock.sun_path)-1]=0; unlink(addr); + if (bind(s,(struct sockaddr*)&mysock,sizeof(mysock))==-1) return -1; if (listen(s,1)<0) { unlink(addr); return -1; } +#ifdef HAVE_SIGNAL_H + act.sa_handler = SIG_IGN; + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + sigaction(SIGPIPE,&act,NULL); + sigaction(SIGINT,&act,NULL); +#endif /* HAVE_SIGNAL_H */ + while (loopflag) { + tv.tv_sec = RCC_EXIT_CHECK_TIMEOUT; + tv.tv_usec = 0; + + FD_ZERO(&fdcon); + FD_SET(s, &fdcon); + + err = select(s+1, &fdcon, NULL, NULL, &tv); + if (err<=0) { + if (getppid() != parentpid) break; + continue; + } + sd = accept(s,(struct sockaddr*)&clisock,&socksize); if (sd < 0) continue; diff --git a/src/librcc.h b/src/librcc.h index 9b064d1..98ca1a6 100644 --- a/src/librcc.h +++ b/src/librcc.h @@ -427,6 +427,7 @@ typedef int rcc_option_value; typedef enum rcc_option_translate_t { RCC_OPTION_TRANSLATE_OFF = 0, /**< Switch translation off. */ + RCC_OPTION_TRANSLATE_TRANSLITERATE, /**< Transliterate data. */ RCC_OPTION_TRANSLATE_TO_ENGLISH, /**< Translate data to english language (Current language don't matter). */ RCC_OPTION_TRANSLATE_SKIP_RELATED, /**< Skip translation of the text's between related languages. */ RCC_OPTION_TRANSLATE_SKIP_PARRENT, /**< Skip translation of the text's from parrent languages (from english). */ @@ -821,7 +822,7 @@ rcc_charset_id rccConfigGetClassCharsetByName(rcc_language_config config, rcc_cl * Checks if charset is disabled for the specified class. * @param config is language configuration * @param class_id is class id. - * @param charset is charset name. + * @param charset_id is charset id. * @return 1 if charset is disabled, 0 if charset is enabled, -1 in the case of error. */ int rccConfigIsDisabledCharset(rcc_language_config config, rcc_class_id class_id, rcc_charset_id charset_id); @@ -885,10 +886,13 @@ const char *rccConfigGetSelectedCharsetName(rcc_language_config config, rcc_clas /** * Return current encoding_id. The default value will be resolved to paticular encoding id. * The following procedure is used to detect default encoding: + * - If Unicode encoding selected for the same class english language. Return this encoding. * - If the parrent class is defined in #defcharset, - return current encoding of parrent class. - * - If the locale variable is defined in #defcharset and config language coincide with locale language, use locale encoding. + * - If the locale variable is defined in #defcharset and either config language coincide with locale language or unciode encoding defined, use locale encoding. * - If the default value for config language is defined in #defvalue return that default value. - * - Return language with id 0. Normally this should be dummy language which indicates that RCC library is not used. + * - If the default value for all languages is defined in #defvalue return that default value. + * - If either config language is coincide with locale language or unicode locale is used, return locale encoding. + * - Return first by the list non-dissabled encoding. * * @param config is language configuration * @param class_id is encoding class diff --git a/src/lngconfig.c b/src/lngconfig.c index 20aff63..631abd1 100644 --- a/src/lngconfig.c +++ b/src/lngconfig.c @@ -567,9 +567,11 @@ const char *rccConfigGetSelectedCharsetName(rcc_language_config config, rcc_clas } rcc_charset_id rccConfigGetCurrentCharset(rcc_language_config config, rcc_class_id class_id) { + rcc_language_config enconfig; unsigned int i, max; rcc_charset_id charset_id; rcc_charset_id all_charset_id = (rcc_language_id)-1; + const char *charset; rcc_class_default_charset *defcharset; const char *lang; @@ -582,10 +584,19 @@ rcc_charset_id rccConfigGetCurrentCharset(rcc_language_config config, rcc_class_ const char *defvalue; if ((!config)||(!config->ctx)||(class_id<0)||(class_id>=config->ctx->n_classes)) return -1; - + charset_id = config->charset[class_id]; if (charset_id) return charset_id; + enconfig = rccGetConfigByName(config->ctx, rcc_english_language_sn); + if ((enconfig)&&(enconfig!=config)) { + charset_id = enconfig->charset[class_id]; + if (charset_id) { + charset = rccConfigGetClassCharsetName(enconfig, class_id, charset_id); + if ((charset)&&(rccIsUnicode(charset))) return charset_id; + } + } + if (!config->language) return (rcc_charset_id)-1; else language = config->language; @@ -598,23 +609,27 @@ rcc_charset_id rccConfigGetCurrentCharset(rcc_language_config config, rcc_class_ if (!strcmp(classes[i]->name, defvalue)) return rccConfigGetCurrentCharset(config, i); } - } else defvalue = config->ctx->locale_variable; + } if (config->default_charset[class_id]) return config->default_charset[class_id]; if (cl->defvalue) { charset_id = rccConfigGetLocaleClassCharset(config, class_id, defvalue); if ((charset_id != 0)&&(charset_id != (rcc_charset_id)-1)) { - config->default_charset[class_id] = charset_id; - return charset_id; + if (!rccConfigIsDisabledCharset(config, class_id, charset_id)) { + config->default_charset[class_id] = charset_id; + return charset_id; + } } } if (cl->defvalue) { charset_id = rccConfigGetClassCharsetByName(config, class_id, defvalue); if ((charset_id != 0)&&(charset_id != (rcc_charset_id)-1)) { - config->default_charset[class_id] = charset_id; - return charset_id; + if (!rccConfigIsDisabledCharset(config, class_id, charset_id)) { + config->default_charset[class_id] = charset_id; + return charset_id; + } } } @@ -626,9 +641,17 @@ rcc_charset_id rccConfigGetCurrentCharset(rcc_language_config config, rcc_class_ if (!strcasecmp(lang, defcharset[i].lang)) { charset_id = rccConfigGetClassCharsetByName(config, class_id, defcharset[i].charset); if ((charset_id != 0)&&(charset_id != (rcc_charset_id)-1)) { - config->default_charset[class_id] = charset_id; - return charset_id; - } else break; + if (!rccConfigIsDisabledCharset(config, class_id, charset_id)) { + config->default_charset[class_id] = charset_id; + return charset_id; + } else { + all_charset_id = (rcc_charset_id)-1; + break; + } + } else { + all_charset_id = (rcc_charset_id)-1; + break; + } } else if (!strcasecmp(rcc_default_all, defcharset[i].lang)) { charset_id = rccConfigGetClassCharsetByName(config, class_id, defcharset[i].charset); if ((charset_id != 0)&&(charset_id != (rcc_charset_id)-1)) { @@ -638,20 +661,26 @@ rcc_charset_id rccConfigGetCurrentCharset(rcc_language_config config, rcc_class_ } if (all_charset_id != (rcc_language_id)-1) { - config->default_charset[class_id] = all_charset_id; - return all_charset_id; + if (!rccConfigIsDisabledCharset(config, class_id, all_charset_id)) { + config->default_charset[class_id] = all_charset_id; + return all_charset_id; + } } } - charset_id = rccConfigGetLocaleClassCharset(config, class_id, defvalue); + charset_id = rccConfigGetLocaleClassCharset(config, class_id, config->ctx->locale_variable); if ((charset_id != 0)&&(charset_id != (rcc_charset_id)-1)) { - config->default_charset[class_id] = charset_id; - return charset_id; + if (!rccConfigIsDisabledCharset(config, class_id, charset_id)) { + config->default_charset[class_id] = charset_id; + return charset_id; + } } max = rccConfigGetClassCharsetNumber(config, class_id); for (i = 1; i< max; i++) - if (!rccConfigIsDisabledCharset(config, class_id, (rcc_charset_id)i)) return (rcc_charset_id)i; + if (!rccConfigIsDisabledCharset(config, class_id, (rcc_charset_id)i)) { + return (rcc_charset_id)i; + } return (rcc_charset_id)-1; } diff --git a/src/rccconfig.c b/src/rccconfig.c index 0752ee3..ae47a63 100644 --- a/src/rccconfig.c +++ b/src/rccconfig.c @@ -31,6 +31,8 @@ const char rcc_default_all[] = "all"; const char rcc_default_language_sn[] = "default"; const char rcc_disabled_language_sn[] = "Off"; const char rcc_english_language_sn[] = "en"; +const char rcc_russian_language_sn[] = "ru"; +const char rcc_ukrainian_language_sn[] = "uk"; const char rcc_disabled_engine_sn[] = "Off"; const char rcc_default_charset[] = "Default"; @@ -61,18 +63,18 @@ rcc_language rcc_default_languages_embeded[RCC_MAX_LANGUAGES + 1] = { &rcc_default_engine, NULL }}, -{"en", {rcc_default_charset, rcc_utf8_charset, NULL}, { +{rcc_english_language_sn, {rcc_default_charset, rcc_utf8_charset, "ISO8859-1", NULL}, { &rcc_default_engine, NULL }}, -{"ru", {rcc_default_charset,"KOI8-R","CP1251",rcc_utf8_charset,"IBM866","MACCYRILLIC","ISO8859-5", NULL}, { +{rcc_russian_language_sn, {rcc_default_charset,"KOI8-R","CP1251",rcc_utf8_charset,"IBM866","MACCYRILLIC","ISO8859-5", NULL}, { &rcc_default_engine, #ifdef RCC_RCD_SUPPORT &rcc_russian_engine, #endif /* RCC_RCD_SUPPORT */ NULL }}, -{"uk", {rcc_default_charset,"KOI8-U","CP1251",rcc_utf8_charset,"IBM855","MACCYRILLIC","ISO8859-5","CP1125", NULL}, { +{rcc_ukrainian_language_sn, {rcc_default_charset,"KOI8-U","CP1251",rcc_utf8_charset,"IBM855","MACCYRILLIC","ISO8859-5","CP1125", NULL}, { &rcc_default_engine, #ifdef RCC_RCD_SUPPORT &rcc_ukrainian_engine, @@ -129,11 +131,10 @@ rcc_language rcc_default_languages_embeded[RCC_MAX_LANGUAGES + 1] = { }}, {NULL} }; - rcc_option_value_name rcc_sn_boolean[] = { "OFF", "ON", NULL }; rcc_option_value_name rcc_sn_learning[] = { "OFF", "ON", "RELEARN", "LEARN", NULL }; rcc_option_value_name rcc_sn_clo[] = { "ALL", "CONFIGURED_AND_AUTO", "CONFIGURED_ONLY", NULL }; -rcc_option_value_name rcc_sn_translate[] = { "OFF", "TO_ENGLISH", "SKIP_RELATED", "SKIP_PARRENT", "FULL", NULL }; +rcc_option_value_name rcc_sn_translate[] = { "OFF", "TRANSLITERATE", "TO_ENGLISH", "SKIP_RELATED", "SKIP_PARRENT", "FULL", NULL }; rcc_option_description rcc_option_descriptions[RCC_MAX_OPTIONS+1]; rcc_option_description rcc_option_descriptions_embeded[RCC_MAX_OPTIONS+1] = { diff --git a/src/rccconfig.h b/src/rccconfig.h index f7f70dd..8b5ac0d 100644 --- a/src/rccconfig.h +++ b/src/rccconfig.h @@ -10,6 +10,9 @@ extern const char rcc_default_all[]; extern const char rcc_default_language_sn[]; extern const char rcc_english_language_sn[]; +extern const char rcc_russian_language_sn[]; +extern const char rcc_ukrainian_language_sn[]; + extern const char rcc_disabled_language_sn[]; extern const char rcc_disabled_engine_sn[]; diff --git a/src/recode.c b/src/recode.c index a528481..9e19078 100644 --- a/src/recode.c +++ b/src/recode.c @@ -322,7 +322,9 @@ static char *rccRecodeTranslate(rcc_language_config *config, rcc_class_id class_ rcc_translate trans, entrans; + unsigned int i; char *translated; + unsigned char change_case; ctx = (*config)->ctx; @@ -336,7 +338,7 @@ static char *rccRecodeTranslate(rcc_language_config *config, rcc_class_id class_ english_language_id = rccGetLanguageByName(ctx, rcc_english_language_sn); - if (translate == RCC_OPTION_TRANSLATE_TO_ENGLISH) { + if ((translate == RCC_OPTION_TRANSLATE_TO_ENGLISH)||(translate == RCC_OPTION_TRANSLATE_TRANSLITERATE)) { current_language_id = english_language_id ; } else { if (ctype == RCC_CLASS_TRANSLATE_LOCALE) { @@ -356,6 +358,49 @@ static char *rccRecodeTranslate(rcc_language_config *config, rcc_class_id class_ if (rccConfigConfigure(curconfig)) return NULL; + if (translate == RCC_OPTION_TRANSLATE_TRANSLITERATE) { + if (!strcasecmp((*config)->language->sn, rcc_russian_language_sn)) { + translated = rccSizedRecodeCharsets(ctx, "UTF-8", "KOI8-R", utfstring, 0, NULL); + if (!translated) return NULL; + for (i=0;translated[i];i++) { + if (translated[i]&0x80) change_case = 1; + else change_case = 0; + + translated[i]=translated[i]&0x7F; + if (change_case) { + if ((translated[i]<'Z')&&(translated[i]>'A')) + translated[i]=translated[i]-'A'+'a'; + else if ((translated[i]<'z')&&(translated[i]>'a')) + translated[i]=translated[i]-'a'+'A'; + } + } + *config = curconfig; + return translated; + } + if (!strcasecmp((*config)->language->sn, rcc_ukrainian_language_sn)) { + translated = rccSizedRecodeCharsets(ctx, "UTF-8", "KOI8-U", utfstring, 0, NULL); + if (!translated) return NULL; + for (i=0;translated[i];i++) { + if (translated[i]&0x80) change_case = 1; + else change_case = 0; + + translated[i]=translated[i]&0x7F; + if (change_case) { + if ((translated[i]<'Z')&&(translated[i]>'A')) + translated[i]=translated[i]-'A'+'a'; + else if ((translated[i]<'z')&&(translated[i]>'a')) + translated[i]=translated[i]-'a'+'A'; + } + } + *config = curconfig; + return translated; + } + + translated = rccSizedRecodeCharsets(ctx, "UTF-8", "US-ASCII//TRANSLIT", utfstring, 0, NULL); + if (translated) *config = curconfig; + return translated; + } + if (translate == RCC_OPTION_TRANSLATE_SKIP_RELATED) { if (rccAreRelatedLanguages(curconfig, *config)) return NULL; } diff --git a/ui/rccnames.c b/ui/rccnames.c index d3d54d7..7f4f912 100644 --- a/ui/rccnames.c +++ b/ui/rccnames.c @@ -32,7 +32,7 @@ rcc_name rcc_default_language_names_embeded[RCC_MAX_LANGUAGES+1] = { rcc_option_value_name rcc_default_option_boolean_names[] = { "Off", "On", NULL }; rcc_option_value_name rcc_default_option_learning_names[] = { "Off", "On", "Relearn", "Learn", NULL }; rcc_option_value_name rcc_default_option_clo_names[] = { "All Languages", "Configured / AutoEngine", "Configured Only", NULL }; -rcc_option_value_name rcc_default_option_translate_names[] = { "Off", "Translate to English", "Skip Translation between Related Languages", "Skip Translation from Parrent Languages", "Full", NULL }; +rcc_option_value_name rcc_default_option_translate_names[] = { "Off", "Transliterate", "Translate to English", "Skip Translation between Related Languages", "Skip Translation from Parrent Languages", "Full", NULL }; rcc_option_name rcc_default_option_names[RCC_MAX_OPTIONS+1]; rcc_option_name rcc_default_option_names_embeded[RCC_MAX_OPTIONS+1] = { |