diff options
author | Suren A. Chilingaryan <csa@dside.dyndns.org> | 2005-06-16 23:14:30 +0000 |
---|---|---|
committer | Suren A. Chilingaryan <csa@dside.dyndns.org> | 2005-06-16 23:14:30 +0000 |
commit | 3aa2acb1aa6931d9a5cab87fe9bef94086e25d16 (patch) | |
tree | c3b86c2f004e7a8498efbe41e72f42d81acde9ea /src | |
download | librcc-3aa2acb1aa6931d9a5cab87fe9bef94086e25d16.tar.gz librcc-3aa2acb1aa6931d9a5cab87fe9bef94086e25d16.tar.bz2 librcc-3aa2acb1aa6931d9a5cab87fe9bef94086e25d16.tar.xz librcc-3aa2acb1aa6931d9a5cab87fe9bef94086e25d16.zip |
Initial Import
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile.am | 6 | ||||
-rw-r--r-- | src/config.c | 104 | ||||
-rw-r--r-- | src/config.h | 17 | ||||
-rw-r--r-- | src/db4.c | 0 | ||||
-rw-r--r-- | src/db4.h | 5 | ||||
-rw-r--r-- | src/enca.c | 161 | ||||
-rw-r--r-- | src/enca.h | 19 | ||||
-rw-r--r-- | src/engine.c | 56 | ||||
-rw-r--r-- | src/engine.h | 11 | ||||
-rw-r--r-- | src/fake_enca.h | 289 | ||||
-rw-r--r-- | src/fs.c | 172 | ||||
-rw-r--r-- | src/fs.h | 9 | ||||
-rw-r--r-- | src/librcc.c | 312 | ||||
-rw-r--r-- | src/librcc.h | 271 | ||||
-rw-r--r-- | src/lng.c | 135 | ||||
-rw-r--r-- | src/lngconfig.c | 363 | ||||
-rw-r--r-- | src/plugin.c | 31 | ||||
-rw-r--r-- | src/plugin.h | 15 | ||||
-rw-r--r-- | src/recode.c | 228 | ||||
-rw-r--r-- | src/string.c | 74 | ||||
-rw-r--r-- | src/xml.c | 8 |
21 files changed, 2286 insertions, 0 deletions
diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 0000000..d555299 --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,6 @@ +lib_LTLIBRARIES = librcc.la +librcc_la_SOURCES = librcc.c lng.c lngconfig.c recode.c config.c config.h plugin.c plugin.h enca.c enca.h engine.c engine.h +include_HEADERS = librcc.h + +librcc_la_LDFLAGS = -version-info @LIBRCC_VERSION_INFO@ + diff --git a/src/config.c b/src/config.c new file mode 100644 index 0000000..885f416 --- /dev/null +++ b/src/config.c @@ -0,0 +1,104 @@ +#include <stdio.h> +#include <librcd.h> + +static rcc_charset_id rcc_autoengine_russian(rcc_engine_context ctx, char *buf, int len) { + return (int)get_russian_charset(buf,len); +} + +rcc_language_alias rcc_default_aliases[] = { + { "cs_SK", "sk" }, + { "ru_UA", "uk" }, + { NULL, NULL} +}; + +const char rcc_engine_nonconfigured[] = "NonConfigured"; + +rcc_engine rcc_default_engine = { + "Off", NULL, NULL, NULL, {NULL} +}; + +rcc_engine rcc_russian_engine = { + "Russian", NULL, NULL, &rcc_autoengine_russian, {"CP1251","KOI8-R","UTF-8","IBM866", NULL} +}; + +rcc_language rcc_default_languages[] = { +{"default", "Autodetect", {"Default", NULL} { + &default_engine, + NULL +}}, +{"off", "Dissable", {"Default", NULL} { + &default_engine, + NULL +}}, +{"ru","Russian",{"Default","KOI8-R","CP1251","UTF-8","IBM866","MACCYRILLIC","ISO8859-5", NULL},{ + &default_engine, + &russian_engine, + NULL +}}, +{"uk","Ukrainian",{"Default","KOI8-U","CP1251","UTF-8","IBM855","MACCYRILLIC","ISO8859-5","CP1125", NULL},{ + &default_engine, + &russian_engine, + NULL +}}, +{"be","Belarussian",{"Default", "UTF-8", "CP1251", "IBM866", "ISO-8859-5", "KOI8-UNI", "maccyr" "IBM855", NULL},{ + &default_engine, + NULL +}}, +{"bg","Bulgarian",{"Default", "UTF-8", "CP1251", "ISO-8859-5", "IBM855", "maccyr", "ECMA-113", NULL},{ + &default_engine, + NULL +}}, +{"cz","Czech",{"Default", "UTF-8", "ISO-8859-2", "CP1250", "IBM852", "KEYBCS2", "macce", "KOI-8_CS_2", "CORK", NULL},{ + &default_engine, + NULL +}}, +{"es","Estonian",{"Default", "UTF-8", "ISO-8859-4", "CP1257", "IBM775", "ISO-8859-13", "macce", "baltic", NULL},{ + &default_engine, + NULL +}}, +{"hr","Croatian",{"Default", "UTF-8", "CP1250", "ISO-8859-2", "IBM852", "macce", "CORK", NULL},{ + &default_engine, + NULL +}}, +{"hu","Hungarian",{"Default", "UTF-8", "ISO-8859-2", "CP1250", "IBM852", "macce", "CORK", NULL},{ + &default_engine, + NULL +}}, +{"lt","Lithuanian",{"Default", "UTF-8", "CP1257", "ISO-8859-4", "IBM775", "ISO-8859-13", "macce", "baltic", NULL},{ + &default_engine, + NULL +}}, +{"lv","Latvian",{"Default", "UTF-8", "CP1257", "ISO-8859-4", "IBM775", "ISO-8859-13", "macce", "baltic", NULL},{ + &default_engine, + NULL +}}, +{"pl","Polish",{"Default", "UTF-8", "ISO-8859-2", "CP1250", "IBM852", "macce", "ISO-8859-13", "ISO-8859-16", "baltic", "CORK", NULL},{ + &default_engine, + NULL +}}, +{"sk","Slovak",{"Default", "UTF-8", "CP1250", "ISO-8859-2", "IBM852", "KEYBCS2", "macce", "KOI-8_CS_2", "CORK", NULL},{ + &default_engine, + NULL +}}, +{"sl","Slovenian",{"Default", "UTF-8", "ISO-8859-2", "CP1250", "IBM852", "macce", "CORK", NULL},{ + &default_engine, + NULL +}}, +{"zh","Chinese",{"Default", "UTF-8", "GB2312", "GBK", "GB18030", "BIG5", NULL},{ + &default_engine, + NULL +}}, +NULL +}; + +/* +const charset_list_t charset_list_default = { "Default", NULL }; +charset_t *charset_list=(charset_t*)charset_list_default; +#define autocharset_list_ni_default 1 +autocharset_list_t autocharset_list_default = { + {"Off", NULL, {NULL}} +}; + +int autocharset_list_ni=autocharset_list_ni_default; +autocharset_t *autocharset_list=(autocharset_t*)autocharset_list_default; +*/
\ No newline at end of file diff --git a/src/config.h b/src/config.h new file mode 100644 index 0000000..ac74dbe --- /dev/null +++ b/src/config.h @@ -0,0 +1,17 @@ +#ifndef _RCC_CONFIG_H +#define _RCC_CONFIG_H +#include "librcc.h" + +#undef RCC_DEBUG +#define RCC_LOCALE_VARIABLE "LC_CTYPE" + +extern rcc_language_alias rcc_default_aliases[]; + +extern const char rcc_engine_nonconfigured[]; + +extern rcc_engine rcc_default_engine; +extern rcc_engine rcc_russian_engine; + +extern rcc_language rcc_default_languages[]; + +#endif /* _RCC_CONFIG_H */ diff --git a/src/db4.c b/src/db4.c new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/db4.c diff --git a/src/db4.h b/src/db4.h new file mode 100644 index 0000000..e13dd5f --- /dev/null +++ b/src/db4.h @@ -0,0 +1,5 @@ +#ifndef _RCC_DB4_H +#define _RCC_DB4_H + + +#endif /* _RCC_DB4_H */ diff --git a/src/enca.c b/src/enca.c new file mode 100644 index 0000000..95e2f49 --- /dev/null +++ b/src/enca.c @@ -0,0 +1,161 @@ +#include <stdio.h> + +#include "librcc.h" + +#include "plugin.h" +#include "enca.h" + +rcc_engine rcc_enca_engine = { + "Enca Library", &rccEncaInitContext, &rccEncaFreeContext, &rccEnca, {"UTF-8", NULL} +}; + +static rcc_library_handle enca_handle = NULL; +static rcc_engine enca_engines[sizeof(rcc_default_languages)/sizeof(rcc_language)]; + + +static int rccEncaLibraryLoad() { +#ifdef RCC_ENCA_DYNAMIC + if (enca_handle) return 0; + + enca_handle = rccLibraryLoad(RCC_ENCA_LIB); + if (!enca_handle) return -1; + + enca_set_multibyte=rccLibraryFind(enca_handle,"enca_set_multibyte"); + enca_set_interpreted_surfaces=rccLibraryFind(enca_handle,"enca_set_interpreted_surfaces"); + enca_set_ambiguity=rccLibraryFind(enca_handle,"enca_set_ambiguity"); + enca_set_filtering=rccLibraryFind(enca_handle,"enca_set_filtering"); + enca_set_garbage_test=rccLibraryFind(enca_handle,"enca_set_garbage_test"); + enca_set_termination_strictness=rccLibraryFind(enca_handle,"enca_set_termination_strictness"); + enca_set_significant=rccLibraryFind(enca_handle,"enca_set_significant"); + enca_set_threshold=rccLibraryFind(enca_handle,"enca_set_threshold"); + enca_charset_name=rccLibraryFind(enca_handle,"enca_charset_name"); + enca_get_language_charsets=rccLibraryFind(enca_handle,"enca_get_language_charsets"); + enca_analyser_alloc=rccLibraryFind(enca_handle,"enca_analyser_alloc"); + enca_analyser_free=rccLibraryFind(enca_handle,"enca_analyser_free"); + enca_analyse_const=rccLibraryFind(enca_handle,"enca_analyse_const"); + + if ((!enca_set_multibyte)||(!enca_set_interpreted_surfaces)||(!enca_set_ambiguity)|| + (!enca_set_filtering)||(!enca_set_garbage_test)||(!enca_set_termination_strictness)|| + (!enca_set_significant)||(!enca_set_threshold)||(!enca_charset_name)|| + (!enca_get_language_charsets)||(!enca_analyser_alloc)||(!enca_analyser_free)|| + (!enca_analyse_const)) { + rccLibraryClose(enca_handle); + enca_handle = NULL; +# ifdef RCC_DEBUG + perror( "rccEnca. Incomplete function set in library" ); +# endif /* RCC_DEBUG */ + } + +#endif /* RCC_ENCA_DYNAMIC */ + return 0; +} + +static void rccEncaLibraryUnload() { +#ifdef RCC_ENCA_DYNAMIC + if (enca_handle) { + rccLibraryUnload(enca_handle); + enca_handle = NULL; + } +#endif /* RCC_ENCA_DYNAMIC */ +} + + +int rccEncaInit() { + int err; + unsigned int i,j,k,l; + + rcc_engine_list engines; + + int *charsets; + size_t ncharsets; + +#ifdef RCC_ENCA_SUPPORT + err = rccEncaLibraryLoad(); + if (err) return err; + + for (i=0;rcc_default_languages[i];i++) { + engines = rcc_default_languages[i].engines; + for (j=0;engines[j];j++) + if (j >= RCC_MAX_ENGINES) continue; + + charsets = enca_get_language_charsets(rcc_default_languages[i].sn, &ncharsets); + if (charsets) { + memcpy(enca_engines+i, &rcc_enca_engine, sizeof(rcc_engine)); + for (k=0;enca_engines[i].charsets[k];k++); + if (n_charsets+k>=RCC_MAX_CHARSETS) n_charsets = RCC_MAX_CHARSETS-k; + + for (l=0;l<n_charsets;l++) + enca_engines[j].charset[k++] = enca_charset_name(charsets[l], ENCA_NAME_STYLE_ICONV); + enca_engines[j].charset[k] = NULL; + + engines[j] = enca_engines + i; + engines[j+1] = NULL; + + free(charsets); + } + } +#endif /* RCC_ENCA_SUPPORT */ + + return 0; +} + +void rccEncaFree() { +#ifdef RCC_ENCA_SUPPORT + rccEncaLibraryUnload(); +#endif /* RCC_ENCA_SUPPORT */ +} + + +rcc_engine_internal rccEncaInitContext(rcc_engine_context *ctx) { +#ifdef RCC_ENCA_SUPPORT + EncaAnalyser enca; + + if ((!ctx)||(!ctx->language)) return NULL; + + enca = enca_analyser_alloc(ctx->lanuage->sn); + if (!enca) return NULL; + + enca_set_threshold(enca, 1); + enca_set_multibyte(enca, 1); + enca_set_ambiguity(enca, 1); + enca_set_garbage_test(enca, 0); + enca_set_filtering(enca, 0); + enca_set_significant(enca,1); + enca_set_termination_strictness(enca,0); + + return (rcc_engine_internal)enca; +#else /* RCC_ENCA_SUPPORT */ + return NULL; +#endif /* RCC_ENCA_SUPPORT */ +} + +void rccEncaFreeContext(rcc_engine_context ctx) { + rcc_engine_internal internal; +#ifdef RCC_ENCA_SUPPORT + internal = rccEngineGetInternal(ctx); + if (internal) + enca_analyser_free(internal); +#endif /* RCC_ENCA_SUPPORT */ +} + +rcc_charset_id rccEnca(rcc_engine_context ctx, char *buf, int len) { +#ifdef RCC_ENCA_SUPPORT + rcc_engine_internal internal; + const char *charset; + EncaEncoding ee; + + internal = rccEngineGetInternal(ctx); + if ((!internal)||(!buf)) return -1; + + + len = STRLEN(buf, len); + + ee = enca_analyse_const((EncaAnalyser)ctx->internal,buf,len); + if (ee.charset<0) return -1; + + charset = enca_charset_name(ee.charset, ENCA_NAME_STYLE_ICONV); + return rccGetAutoCharsetByName(ctx->ctx, charset); +#else /* RCC_ENCA_SUPPORT */ + return -1; +#endif /* RCC_ENCA_SUPPORT */ +} diff --git a/src/enca.h b/src/enca.h new file mode 100644 index 0000000..7e3d139 --- /dev/null +++ b/src/enca.h @@ -0,0 +1,19 @@ +#ifndef _RCC_ENCA_H +#define _RCC_ENCA_H + +#define RCC_ENCA_SUPPORT +#define RCC_ENCA_DYNAMIC +#define RCC_ENCA_LIB "libenca.so.0" + +#ifdef HPUX +# undef RCC_ENCA_DYNAMIC +#endif + +#ifdef RCC_ENCA_DYNAMIC +# define RCC_ENCA_SUPPORT +#endif + +int rccEncaInit(); +void rccEncaFree(); + +#endif /* _RCC_ENCA_H */
\ No newline at end of file diff --git a/src/engine.c b/src/engine.c new file mode 100644 index 0000000..646d46e --- /dev/null +++ b/src/engine.c @@ -0,0 +1,56 @@ +#include <stdio.h> +#include <string.h> + +#include "librcc.h" + +int rccEngineInit(rcc_engine_context *engine_ctx, rcc_context *ctx) { + if ((!ctx)||(!engine_ctx)) return -1; + + engine_ctx->ctx = ctx; + engine_ctx->free_func = NULL; + engine_ctx->func = NULL; + return 0; +} + +void rccFreeEngine(rcc_engine_context *engine_ctx) { + if (!engine_ctx) return; + + if (engine_ctx->free_func) { + engine_ctx->free_func(engine_ctx); + engine_ctx->free_func = NULL; + } + + engine_ctx->func = NULL; + engine_ctx->internal = NULL; +} + +int rccEngineConfigure(rcc_engine_context *ctx) { + rcc_language_id language_id; + rcc_engine_id engine_id; + rcc_engine *engine; + + if ((!ctx)||(!ctx->ctx)) return -1; + + rccEngineFree(&ctx); + + language_id = rccGetCurrentLanguage(ctx->ctx); + if (language_id<0) return -1; + + engine_id = rccGetCurrentEngine(ctx->ctx); + if (engine_id<0) return -1; + + engine = ctx->ctx->languages[language_id]->engines[engine_id]; + + ctx->free_func = engine->free_func; + ctx->func = engine->func; + ctx->language = ctx->languages[language_id]; + + ctx->internal = engine->init_func(ctx); + return 0; +} + +rcc_engine_internal rccEngineGetInternal(rcc_engine_context *ctx) { + if (!ctx) return NULL; + + return ctx->internal; +} diff --git a/src/engine.h b/src/engine.h new file mode 100644 index 0000000..8f7400a --- /dev/null +++ b/src/engine.h @@ -0,0 +1,11 @@ +#ifndef _RCC_ENGINE_H +#defien _RCC_ENGINE_H + +int rccEngineInit(rcc_engine_context *engine_ctx, rcc_context *ctx); +void rccFreeEngine(rcc_engine_context *engine_ctx); + +int rccConfigure(rcc_engine_context *ctx); + +rcc_engine_internal rccEngineGetInternal(rcc_engine_context *ctx); + +#endif /* _RCC_ENGINE_H */ diff --git a/src/fake_enca.h b/src/fake_enca.h new file mode 100644 index 0000000..4483efb --- /dev/null +++ b/src/fake_enca.h @@ -0,0 +1,289 @@ +/* This header file is in the public domain. */ +#ifndef ENCA_H +#define ENCA_H + +#include <stdlib.h> +/* According to autoconf stdlib may not be enough for size_t */ +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* Enumerated types */ + +/** + * EncaSurface: + * @ENCA_SURFACE_EOL_CR: End-of-lines are represented with CR's. + * @ENCA_SURFACE_EOL_LF: End-of-lines are represented with LF's. + * @ENCA_SURFACE_EOL_CRLF: End-of-lines are represented with CRLF's. + * @ENCA_SURFACE_EOL_MIX: Several end-of-line types, mixed. + * @ENCA_SURFACE_EOL_BIN: End-of-line concept not applicable (binary data). + * @ENCA_SURFACE_MASK_EOL: Mask for end-of-line surfaces. + * @ENCA_SURFACE_PERM_21: Odd and even bytes swapped. + * @ENCA_SURFACE_PERM_4321: Reversed byte sequence in 4byte words. + * @ENCA_SURFACE_PERM_MIX: Chunks with both endianess, concatenated. + * @ENCA_SURFACE_MASK_PERM: Mask for permutation surfaces. + * @ENCA_SURFACE_QP: Quoted printables. + * @ENCA_SURFACE_REMOVE: Recode `remove' surface. + * @ENCA_SURFACE_UNKNOWN: Unknown surface. + * @ENCA_SURFACE_MASK_ALL: Mask for all bits, withnout #ENCA_SURFACE_UNKNOWN. + * + * Surface flags. + **/ +typedef enum { /*< flags >*/ + ENCA_SURFACE_EOL_CR = 1 << 0, + ENCA_SURFACE_EOL_LF = 1 << 1, + ENCA_SURFACE_EOL_CRLF = 1 << 2, + ENCA_SURFACE_EOL_MIX = 1 << 3, + ENCA_SURFACE_EOL_BIN = 1 << 4, + ENCA_SURFACE_MASK_EOL = (ENCA_SURFACE_EOL_CR + | ENCA_SURFACE_EOL_LF + | ENCA_SURFACE_EOL_CRLF + | ENCA_SURFACE_EOL_MIX + | ENCA_SURFACE_EOL_BIN), + ENCA_SURFACE_PERM_21 = 1 << 5, + ENCA_SURFACE_PERM_4321 = 1 << 6, + ENCA_SURFACE_PERM_MIX = 1 << 7, + ENCA_SURFACE_MASK_PERM = (ENCA_SURFACE_PERM_21 + | ENCA_SURFACE_PERM_4321 + | ENCA_SURFACE_PERM_MIX), + ENCA_SURFACE_QP = 1 << 8, + ENCA_SURFACE_REMOVE = 1 << 13, + ENCA_SURFACE_UNKNOWN = 1 << 14, + ENCA_SURFACE_MASK_ALL = (ENCA_SURFACE_MASK_EOL + | ENCA_SURFACE_MASK_PERM + | ENCA_SURFACE_QP + | ENCA_SURFACE_REMOVE) +} EncaSurface; + +/** + * EncaNameStyle: + * @ENCA_NAME_STYLE_ENCA: Default, implicit charset name in Enca. + * @ENCA_NAME_STYLE_RFC1345: RFC 1345 charset name. + * @ENCA_NAME_STYLE_CSTOCS: Cstocs charset name. + * @ENCA_NAME_STYLE_ICONV: Iconv charset name. + * @ENCA_NAME_STYLE_HUMAN: Human comprehensible description. + * + * Charset naming styles and conventions. + **/ +typedef enum { + ENCA_NAME_STYLE_ENCA, + ENCA_NAME_STYLE_RFC1345, + ENCA_NAME_STYLE_CSTOCS, + ENCA_NAME_STYLE_ICONV, + ENCA_NAME_STYLE_HUMAN +} EncaNameStyle; + +/** + * EncaCharsetFlags: + * @ENCA_CHARSET_7BIT: Characters are represented with 7bit characters. + * @ENCA_CHARSET_8BIT: Characters are represented with bytes. + * @ENCA_CHARSET_16BIT: Characters are represented with 2byte words. + * @ENCA_CHARSET_32BIT: Characters are represented with 4byte words. + * @ENCA_CHARSET_FIXED: One characters consists of one fundamental piece. + * @ENCA_CHARSET_VARIABLE: One character consists of variable number of + * fundamental pieces. + * @ENCA_CHARSET_BINARY: Charset is binary from ASCII viewpoint. + * @ENCA_CHARSET_REGULAR: Language dependent (8bit) charset. + * @ENCA_CHARSET_MULTIBYTE: Multibyte charset. + * + * Charset properties. + * + * Flags %ENCA_CHARSET_7BIT, %ENCA_CHARSET_8BIT, %ENCA_CHARSET_16BIT, + * %ENCA_CHARSET_32BIT tell how many bits a `fundamental piece' consists of. + * This is different from bits per character; r.g. UTF-8 consists of 8bit + * pieces (bytes), but character can be composed from 1 to 6 of them. + **/ +typedef enum { /*< flags >*/ + ENCA_CHARSET_7BIT = 1 << 0, + ENCA_CHARSET_8BIT = 1 << 1, + ENCA_CHARSET_16BIT = 1 << 2, + ENCA_CHARSET_32BIT = 1 << 3, + ENCA_CHARSET_FIXED = 1 << 4, + ENCA_CHARSET_VARIABLE = 1 << 5, + ENCA_CHARSET_BINARY = 1 << 6, + ENCA_CHARSET_REGULAR = 1 << 7, + ENCA_CHARSET_MULTIBYTE = 1 << 8 +} EncaCharsetFlags; + +/** + * EncaErrno: + * @ENCA_EOK: OK. + * @ENCA_EINVALUE: Invalid value (usually of an option). + * @ENCA_EEMPTY: Sample is empty. + * @ENCA_EFILTERED: After filtering, (almost) nothing remained. + * @ENCA_ENOCS8: Mulitibyte tests failed and language contains no 8bit charsets. + * @ENCA_ESIGNIF: Too few significant characters. + * @ENCA_EWINNER: No clear winner. + * @ENCA_EGARBAGE: Sample is garbage. + * + * Error codes. + **/ +typedef enum { + ENCA_EOK = 0, + ENCA_EINVALUE, + ENCA_EEMPTY, + ENCA_EFILTERED, + ENCA_ENOCS8, + ENCA_ESIGNIF, + ENCA_EWINNER, + ENCA_EGARBAGE +} EncaErrno; + +/** + * ENCA_CS_UNKNOWN: + * + * Unknown character set id. + * + * Use enca_charset_is_known() to check for unknown charset instead of direct + * comparsion. + **/ +#define ENCA_CS_UNKNOWN (-1) + +/** + * ENCA_NOT_A_CHAR: + * + * Not-a-character in unicode tables. + **/ +#define ENCA_NOT_A_CHAR 0xffff + +/* Published (opaque) typedefs */ +typedef struct _EncaAnalyserState *EncaAnalyser; + +/* Public (transparent) typedefs */ +typedef struct _EncaEncoding EncaEncoding; + +/** + * EncaEncoding: + * @charset: Numeric charset identifier. + * @surface: Surface flags. + * + * Encoding, i.e. charset and surface. + * + * This is what enca_analyse() and enca_analyse_const() return. + * + * The @charset field is an opaque numerical charset identifier, which has no + * meaning outside Enca library. + * You will probably want to use it only as enca_charset_name() argument. + * It is only guaranteed not to change meaning + * during program execution time; change of its interpretation (e.g. due to + * addition of new charsets) is not considered API change. + * + * The @surface field is a combination of #EncaSurface flags. You may want + * to ignore it completely; you should use enca_set_interpreted_surfaces() + * to disable weird surfaces then. + **/ +struct _EncaEncoding { int charset; EncaSurface surface; }; + +void (*enca_set_multibyte) (EncaAnalyser analyser, int multibyte); +void (*enca_set_interpreted_surfaces) (EncaAnalyser analyser, int interpreted_surfaces); +void (*enca_set_ambiguity) (EncaAnalyser analyser, int ambiguity); +void (*enca_set_filtering) (EncaAnalyser analyser, int filtering); +void (*enca_set_garbage_test) (EncaAnalyser analyser, int garabage_test); +void (*enca_set_termination_strictness) (EncaAnalyser analyser, int termination_strictness); +int (*enca_set_significant) (EncaAnalyser analyser, size_t significant); +int (*enca_set_threshold) (EncaAnalyser analyser, double threshold); +const char* (*enca_charset_name) (int charset, EncaNameStyle whatname); +int* (*enca_get_language_charsets) (const char *langname, size_t *n); +EncaAnalyser (*enca_analyser_alloc) (const char *langname); +void (*enca_analyser_free) (EncaAnalyser analyser); +EncaEncoding (*enca_analyse_const) (EncaAnalyser analyser,const unsigned char *buffer, size_t size); + +/** + * enca_charset_is_known: + * @cs: Charset id. + * + * Expands to nonzero when the charset is known (i.e. it's not + * ENCA_CS_UNKNOWN). + **/ +#define enca_charset_is_known(cs) \ + ((cs) != ENCA_CS_UNKNOWN) + +/** + * enca_charset_is_7bit: + * @cs: Charset id. + * + * Expands to nonzero when characters are represented with 7bit characters. + **/ +#define enca_charset_is_7bit(cs) \ + (enca_charset_properties(cs) & ENCA_CHARSET_7BIT) + +/** + * enca_charset_is_8bit: + * @cs: Charset id. + * + * Expands to nonzero when characters are represented with bytes. + **/ +#define enca_charset_is_8bit(cs) \ + (enca_charset_properties(cs) & ENCA_CHARSET_8BIT) + +/** + * enca_charset_is_16bit: + * @cs: Charset id. + * + * Expands to nonzero when characters are represented with 2byte words. + **/ +#define enca_charset_is_16bit(cs) \ + (enca_charset_properties(cs) & ENCA_CHARSET_16BIT) + +/** + * enca_charset_is_32bit: + * @cs: Charset id. + * + * Expands to nonzero when characters are represented with 4byte words. + **/ +#define enca_charset_is_32bit(cs) \ + (enca_charset_properties(cs) & ENCA_CHARSET_32BIT) + +/** + * enca_charset_is_fixed: + * @cs: Charset id. + * + * Expands to nonzero when one characters consists of one fundamental piece. + **/ +#define enca_charset_is_fixed(cs) \ + (enca_charset_properties(cs) & ENCA_CHARSET_FIXED) + +/** + * enca_charset_is_variable: + * @cs: Charset id. + * + * Expands to nonzero when one character consists of variable number of + * fundamental pieces. + **/ +#define enca_charset_is_variable(cs) \ + (enca_charset_properties(cs) & ENCA_CHARSET_VARIABLE) + +/** + * enca_charset_is_binary: + * @cs: Charset id. + * + * Expands to nonzero when charset is binary from ASCII viewpoint. + **/ +#define enca_charset_is_binary(cs) \ + (enca_charset_properties(cs) & ENCA_CHARSET_BINARY) + +/** + * enca_charset_is_regular: + * @cs: Charset id. + * + * Expands to nonzero when charset is language dependent (8bit) charset. + **/ +#define enca_charset_is_regular(cs) \ + (enca_charset_properties(cs) & ENCA_CHARSET_REGULAR) + +/** + * enca_charset_is_multibyte: + * @cs: Charset id. + * + * Expands to nonzero when charset is multibyte. + **/ +#define enca_charset_is_multibyte(cs) \ + (enca_charset_properties(cs) & ENCA_CHARSET_MULTIBYTE) + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif diff --git a/src/fs.c b/src/fs.c new file mode 100644 index 0000000..6acb05b --- /dev/null +++ b/src/fs.c @@ -0,0 +1,172 @@ +#include <stdio.h> +#include <errno.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <mntent.h> + +#include "librcc.h" + +static char *rccCreateFullName(const char *path, const char *filename) { + unsigned int i; + char *name; + + if (!path) { + if (filename) return strdup(filename); + else return strdup("/"); + } else if (!filename) return strdup(path); + + + i = strlen(path); + name = (char*)malloc(i+strlen(filename)+2)*sizeof(char)); + if (!name) return NULL; + + if ((path[i-1]=='/')||(filename[0]=='/')) + sprintf(name, "%s%s", path, filename); + else + sprintf(name, "%s/%s", path, filename); + + return name; +} + +static int rccIsFile(const char *filename) { + struct stat st; + + stat(filename,&st); + if (S_ISREG(st.st_mode)) return 1; + return 0; +} + +static char *rccCheckFile(const char *prefix, const char *name) { + char *temp; + + temp = rccCreateFullName(prefix, name); + if ((!temp)||(rccIsFile(temp))) return temp; + + free(temp); + return NULL; +} + +int rccFS0(const char *fspath, const char *filename, char **prefix, char **name) { + FILE *mtab; + struct mntent *fsentry; + char *tmp; + + if (fspath) { + tmp = strstr(filename, fspath); + if (tmp) tmp = filename + strlen(fspath); + } else { + mtab = setmntent(_PATH_MNTTAB, "r"); + if (mtab) { + while (!feof(mtab)) { + fsentry = getmntent(mtab); + if ((fsentry)&&(fsentry->mnt_dir)) { + tmp = strstr(filename, fsentry->mnt_dir); + if (tmp) tmp = filename + strlen(fsentry->mnt_dir); + } + } + endmntent(mtab); + } + } + + if (!tmp) tmp = filename; + + *name = strdup(tmp); + *prefix = strndup(filename, (tmp-filename)); + + if ((!*name)||(!*prefix)) { + if (*name) free(*name); + if (*prefix) free(*prefix); + return -1; + } + + return 0; +} + +int rccFS1(rcc_context *ctx, const char *fspath, char **prefix, char **name) { + int prefix_size; + char *result, *tmp; + char *path, *filename; + + path = *prefix; + filename = *name; + + + if ((path)&&(filename)) { + result = rccCreateFullName(path, filename); + if (!result) return -1; + } else if (filename) result = filename; + else if (path) result = path; + else return -1; + + + // Checking without recoding in case of autodetection + if (rccGetOption(ctx, RCC_AUTODETECT_FS_NAMES)) { + if (rccIsFile(name)) { + if ((path)&&(filename)) *name = result; + else if (filename) *name = strdup(filename); + else *name = strdup(path); + return 1; + } + } + + err = rccFS0(fspath, result, &prefix, &name); + if ((path)&&(filename)) free(name); + + return err; +} + +char *rccFS2(rcc_context *ctx, iconv_t icnv, const char *prefix, const char *name) { + if (icnv == (iconv_t)-1) return NULL; + if (icnv == (iconv_t)-2) { + strcpy(ctx->tmpbuffer, name); + ctx->tmpbuffer[len] = 0; + } else { + err = rccIConv(ctx, icnv, name, 0); + if (err<=0) return NULL; + } + + return rccCheckFile(prefix, ctx->tmpbuffer); +} + +char *rccFS3(rcc_context *ctx, rcc_language_id language_id, rcc_class_id class_id, const char *prefix, const char *name) { + rcc_charset charset; + rcc_language *language; + iconv_t icnv = ctx->fsiconv; + + if ((rccGetOption(ctx, RCC_AUTODETECT_FS_NAMES))&&(icnv != (iconv_t)-1)) { + result = rccFS2(ctx, icnv, prefix, name); + if (result) return result; + } + + result = rccFS2(ctx, ctx->iconv_to[class_id], prefix, name); + if (result) { + if ((icnv != (iconv_t)-1)||(icnv != (iconv_t)-2)) iconv_close(icnv); + ctx->fsiconv = (iconv_t)-1; + return result; + } + + if (rccGetOption(ctx, RCC_AUTODETECT_FS_NAMES)) { + language = ctx->language[language_id]; + if (language->charset[0]) { + for (i=1;(!result);i++) { + charset = language->charsets[i]; + if (!charset) break; + + if ((icnv != (iconv_t)-1)&&(icnv != (iconv_t)-2)) iconv_close(icnv); + + if (strcmp(charset, "UTF-8")&&strcmp(charset, "UTF8")) icnv = (iconv_t)-2; + else icnv = iconv_open(charset, "UTF-8"); + + result = rccFS2(ctx, icnv, prefix, name); + } + } + } + if (result) ctx->fsiconv = icnv; + else { + if ((icnv != (iconv_t)-1)&&(icnv != (iconv_t)-2)) iconv_close(icnv); + ctx->fsiconv = (iconv_t)-1; + } + + return result; +} diff --git a/src/fs.h b/src/fs.h new file mode 100644 index 0000000..5e31507 --- /dev/null +++ b/src/fs.h @@ -0,0 +1,9 @@ +#ifndef _RCC_FS_H +#define _RCC_FS_H + +int rccFS0(const char *fspath, const char *filename, char **prefix, char **name); +int rccFS1(rcc_context *ctx, const char *fspath, char **prefix, char **name); +char *rccFS2(rcc_context *ctx, iconv_t icnv, const char *prefix, const char *name); +char *rccFS3(rcc_context *ctx, rcc_language_id language_id, rcc_class_id class_id, const char *prefix, const char *name); + +#endif /* _RCC_FS_H */ diff --git a/src/librcc.c b/src/librcc.c new file mode 100644 index 0000000..6f621fc --- /dev/null +++ b/src/librcc.c @@ -0,0 +1,312 @@ +#include <stdio.h> +#include <string.h> + +#include <librcd.h> +#include "librcc.h" +#include "enca.h" + +#include "config.h" + +int rccInit() { + return rccEncaInit(); +} + +void rccFree() { + rccEncaFree(); +} + +rcc_context rccInitContext(rcc_init_flags flags, unsigned int max_languages, unsigned int max_classes, const char *locale) { + unsigned int i; + + rcc_context *ctx; + rcc_language_ptr *languages; + rcc_class_ptr *classes; + rcc_language_config *configs; + iconv_t *from, *to; + + if (!max_languages) max_languages = RCC_MAX_LANGUAGES; + if (!max_classes) max_classes = RCC_MAX_CLASSES; + + ctx = (rcc_context*)malloc(sizeof(rcc_context)); + languages = (rcc_language_ptr*)malloc((max_languages+1)*sizeof(rcc_language_ptr)); + classes = (rcc_class_ptr*)malloc((max_classes+1)*sizeof(rcc_class_ptr)); + from = (iconv_t*)malloc((max_classes)*sizeof(iconv_t)); + to = (iconv_t*)malloc((max_classes)*sizeof(iconv_t)); + + configs = (rcc_language_config*)malloc((max_languages)*sizeof(rcc_language_config)); + + if ((!ctx)||(!languages)||(!classes)) { + if (from) free(from); + if (to) free(to); + if (configs) free(configs); + if (classes) free(classes); + if (languages) free(languages); + if (ctx) free(ctx); + return NULL; + } + + ctx->languages = languages; + ctx->max_languages = max_languages; + ctx->n_languages = 0; + languages[0] = NULL; + + ctx->classes = classes; + ctx->max_classes = max_classes; + ctx->n_classes = 0; + classes[0] = NULL; + + ctx->fsiconv = (iconv_t)-1; + + ctx->iconv_from = from; + ctx->iconv_to = to; + for (i=0;i<max_classes;i++) { + from[i] = (iconv_t)-1; + to[i] = (iconv_t)-1; + } + + for (i=0;i<RCC_MAX_CHARSETS;i++) + ctx->iconv_auto[i] = (iconv_t)-1; + + ctx->configs = configs; + for (i=0;i<max_languages;i++) + configs[i].charset = NULL; + + err = rccEngineInit(&ctx->engine_ctx, ctx); + if (err) { + rccFree(ctx); + return NULL; + } + + ctx->current_language = 0; + + if (locale) { + if (strlen(locale)>=RCC_MAX_VARIABLE_CHARS) { + rccFree(ctx); + return NULL; + } + strcpy(ctx->locale_variable, locale); + } else { + strcpy(ctx->locale_variable, RCC_LOCALE_VARIABLE); + } + + if (flags&RCC_DEFAULT_CONFIGURATION) { + if (sizeof(languages)<sizeof(rcc_default_languages)) { + rccFree(ctx); + return NULL; + } + + for (i=0;rcc_default_languages[i];i++) + rccRegisterLanguage(ctx, rcc_default_language[i]); + + ctx->current_config = rccGetCurrentConfig(ctx); + } else { + rccRegisterLanguage(ctx, rcc_default_language[0]); + ctx->current_config = NULL; + } + + ctx->configure = 1; + + return ctx; +} + +static void rccFreeIConv(rcc_context *ctx) { + unsigned int i; + + if ((!ctx)||(!ctx->iconv_from)||(!ctx->iconv_to)) return; + + if ((ctx->fsiconv_t != (iconv_t)-1)&&(ctx->fsiconv_t != (iconv_t)-2)) { + iconv_close(ctx->fsiconv); + ctx->fsiconv = (iconv_t)-1; + } + + for (i=0;i<ctx->n_classes;i++) { + if ((ctx->iconv_from[i] != (iconv_t)-1)&&(ctx->iconv_from[i] != (iconv_t)-2)) { + iconv_close(ctx->iconv_from[i]); + ctx->iconv_from[i] = (iconv_t)-1; + } + if ((ctx->iconv_to[i] != (iconv_t)-1)&&(ctx->iconv_to[i] != (iconv_t)-2)) { + iconv_close(ctx->iconv_to[i]); + ctx->iconv_to[i] = (iconv_t)-1; + } + } + for (i=0;i<RCC_MAX_CHARSETS;i++) { + if ((ctx->iconv_auto[i] != (iconv_t)-1)&&(ctx->iconv_auto[i] != (iconv_t)-2)) { + iconv_close(ctx->iconv_auto[i]); + ctx->iconv_auto[i] = (iconv_t)-1; + } + } +} + +void rccFreeContext(rcc_context *ctx) { + if (ctx) { + rccFreeEngine(&ctx->engine_ctx); + rccFreeIConv(ctx); + if (ctx->iconv_from) free(ctx->iconv_from); + if (ctx->iconv_to) free(ctx->iconv_to); + + if (ctx->configs) { + for (i=0;i<ctx->max_languages;i++) + rccFreeConfig(configs+i); + free(ctx->configs); + } + if (ctx->charsets) free(ctx->charsets); + if (ctx->classes) free(ctx->classes); + if (ctx->languages) free(ctx->languages); + free(ctx); + } +} + +rcc_language_id rccRegisterLanguage(rcc_context *ctx, rcc_language *language) { + if ((!ctx)||(!language)) return -1; + if (ctx->n_languages == ctx->max_languages) return -2; + ctx->languages[ctx->n_languages++] = language; + ctx->languages[ctx->n_languages] = NULL; + + if (!ctx->current_language) + ctx->current_config = rccGetCurrentConfig(ctx); + + return ctx->n_languages-1; +} + +rcc_charset_id rccLanguageRegisterCharset(rcc_language *language, rcc_charset charset) { + unsigned int i; + + if ((!language)||(!charset)) return -1; + for (i=0;language->charsets[i];i++); + if (i>=RCC_MAX_CHARSETS) return -2; + language->charsets[i++] = charset; + language->charsets[i] = NULL; + return i-1; +} + +rcc_engine_id rccLanguageRegisterEngine(rcc_language *language, rcc_engine *engine) { + unsigned int i; + + if ((!language)||(!engine)) return -1; + for (i=0;language->engines[i];i++); + if (i>=RCC_MAX_ENGINES) return -2; + language->engines[i++] = engine; + language->engines[i] = NULL; + return i-1; +} + +rcc_class_id rccRegisterClass(rcc_context *ctx, rcc_class *cl) { + if ((!ctx)||(!cl)) return -1; + if (ctx->n_classes == ctx->max_classes) return -2; + ctx->configure = 1; + ctx->classes[ctx->n_languages++] = cl; + ctx->classes[ctx->n_languages] = NULL; + return ctx->n_classes-1; +} + + +rcc_class_type rccGetClassType(rcc_context *ctx, rcc_class_id class_id) { + rcc_class cl; + + if (!ctx)||(class_id<0)||(class_id>=ctx->n_classes)) return RCC_CLASS_INVALID; + + cl = rcc->classes[class_id]; + return cl->class_type; +} + +static rcc_language *rccGetLanguageList(rcc_context *ctx) { + if (!ctx) return NULL; + return ctx->languages; +} + +static rcc_charset *rccGetCharsetList(rcc_context *ctx, rcc_language_id language_id) { + if ((!ctx)||(language_id<0)||(language_id>=ctx->n_languages)) return NULL; + return ctx->languages[language_id]->charsets; +} + +static rcc_engine *rccGetEngineList(rcc_context *ctx, rcc_language_id language_id) { + if ((!ctx)||(language_id<0)||(language_id>=ctx->n_languages)) return NULL; + return ctx->languages[language_id]->engines; +} + +static rcc_charset *rccGetCurrentCharsetList(rcc_context *ctx) { + rcc_language_id language_id; + + if (!ctx) return NULL; + + language_id = rccGetCurrentLanguage(ctx); + if (language_id<0) return NULL; + + return rccGetCharsetList(ctx, language_id); +} + +static rcc_charset *rccGetCurrentEngineList(rcc_context *ctx) { + rcc_language_id language_id; + + if (!ctx) return NULL; + + language_id = rccGetCurrentLanguage(ctx); + if (language_id<0) return NULL; + + return rccGetEngineList(ctx, language_id); +} + +static rcc_charset *rccGetCurrentAutoCharsetList(rcc_context *ctx) { + rcc_language_id language_id; + rcc_engine_id engine_id; + + if (!ctx) return NULL; + + language_id = rccGetCurrentLanguage(ctx); + engine_id = rccGetCurrentEngine(ctx); + if ((language_id<0)||(engine_id<0)) return NULL; + + + return ctx->languages[language_id]->engine[engine_id]->charsets; +} + + +int rccConfigure(rcc_engine_context *ctx) { + unsigned int i; + rcc_charset *charsets; + char *charset; + + if (!ctx) return -1; + if (!ctx->configure) return 0; + + rccFreeIConv(ctx); + for (i=0;i<ctx->n_classes;i++) { + charset = rccGetCurrentCharsetName(ctx, i); + if (strcmp(charset, "UTF-8")&&strcmp(charset, "UTF8")) { + iconv_from = iconv_open("UTF-8", charset); + iconv_to = iconv_open(charset, "UTF-8"); + } else { + iconv_from = (iconv_t)-2; + iconv_to = (iconv_t)-2; + } + } + + charsets = rccGetCurrentAutoCharsetList(ctx); + for (i=0;charsets[i];i++) { + charset = charsets[i]; + if (strcmp(charset, "UTF-8")&&strcmp(charset, "UTF8")) + iconv_auto = iconv_open("UTF-8", charset); + else + iconv_auto = (iconv_t)-2; + } + + rccEngineConfigure(&ctx->engine_ctx); + + return 0; +} + +char *rccCreateResult(rcc_context *ctx, int len, int *rlen) { + char *res; + + if (!len) len = strlen(ctx->tmpbuffer); + + res = (char*)malloc(len+1); + if (!res) return NULL; + + memcpy(res, ctx->tmpbuffer, len); + res[len] = 0; + + if (rlen) *rlen = len; + + return res; +} diff --git a/src/librcc.h b/src/librcc.h new file mode 100644 index 0000000..c3d6f17 --- /dev/null +++ b/src/librcc.h @@ -0,0 +1,271 @@ +#ifndef LIBRCC_H +#define LIBRCC_H + +#include <iconv.h> + +#define RCC_MAX_CHARSETS 16 +#define RCC_MAX_ENGINES 5 +#define RCC_MAX_LANGUAGES 64 +#define RCC_MAX_CLASSES 16 + +#define RCC_MAX_ERRORS 3 + +#define RCC_MAX_CHARSET_CHARS 16 +#define RCC_MAX_LANGUAGE_CHARS 16 +#define RCC_MAX_VARIABLE_CHARS 16 + +#define RCC_MAX_STRING_CHARS 1024 + +#define RCC_STRING_MAGIC 0xFF7F01FF +/* + class = charset class + engine = auto engine + selected - which is selected + current - resolves default values +*/ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef unsigned int rcc_init_flags; +#define RCC_DEFAULT_CONFIGURATION 1 + +typedef int rcc_option_value; +typedef enum rcc_option_t { + RCC_LEARNING_MODE = 0, + RCC_AUTODETECT_FS_TITLES, + RCC_AUTODETECT_FS_NAMES, + RCC_USE_HEADERS, + RCC_MAX_OPTIONS +} rcc_option; + +typedef enum rcc_class_type_t { + RCC_CLASS_INVALID = 0, + RCC_CLASS_STANDARD, + RCC_CLASS_FS +} rcc_class_type; + + +struct rcc_string_header_t { + unsigned int magic; + rcc_language_id language_id; +}; +typedef struct rcc_string_header_t rcc_string_header; + +typedef char *rcc_string; +typedef char rcc_language_id; +typedef char rcc_charset_id; +typedef char rcc_engine_id; +typedef int rcc_class_id; + +typedef struct rcc_context_t rcc_context; +typedef struct rcc_engine_context_t rcc_engine_ctx; +typedef const struct rcc_class_t rcc_class; +typedef struct rcc_language_t rcc_language; +typedef struct rcc_engine_t rcc_engine; +typedef const char *rcc_charset; + +typedef struct rcc_language_config_t rcc_language_config; +typedef const struct rcc_language_alias_t rcc_language_alias; + +typedef void *rcc_engine_internal; +typedef rcc_engine_internal (*rcc_engine_init_function)(rcc_engine_context *ctx); +typedef rcc_charset_id (*rcc_engine_function)(rcc_engine_context ctx, char *buf, int len); +typedef void (rcc_engine_free_function)(rcc_engine_context ctx); + +typedef rcc_charset rcc_charset_list[RCC_MAX_CHARSETS+1]; + +struct rcc_engine_t { + const char *title; + rcc_engine_init_function init_func; + rcc_engine_free_function free_func; + rcc_engine_function func; + rcc_charset_list charsets; +}; +typedef rcc_engine *rcc_engine_ptr; +typedef rcc_engine_ptr rcc_engine_list[RCC_MAX_ENGINES+1]; + +struct rcc_language_t { + const char *sn; + const char *name; + rcc_charset_list charsets; + rcc_engine_list engines; +}; +typedef rcc_language *rcc_language_ptr; +typedef rcc_language_ptr rcc_language_list[RCC_MAX_LANGUAGES+1]; + +struct rcc_language_alias_t { + const char *alias; + const char *lang; +}; + +struct rcc_class_t { + const char *name; + const char *defvalue; /* locale variable name or parrent name */ + const rcc_class_type class_type; +}; +typedef rcc_class *rcc_class_ptr; +typedef rcc_class_ptr rcc_class_list[RCC_MAX_CLASSES+1]; + +struct rcc_language_config_t { + rcc_context *ctx; + rcc_language *language; + + rcc_engine_id engine; + rcc_charset_id *charset; + rcc_option_value options[RCC_MAX_OPTIONS]; +}; + +struct rcc_engine_context_t { + rcc_context *ctx; + rcc_language *language; + + rcc_engine_function func; + rcc_engine_free_function free_func; + + rcc_engine_internal internal; +}; + +struct rcc_context_t { + char locale_variable[RCC_MAX_VARIABLE_CHARS+1]; + + unsigned int max_languages; + unsigned int n_languages; + rcc_language_ptr *languages; + rcc_language_config *configs; + + unsigned int max_classes; + unsigned int n_classes; + rcc_class_ptr *classes; + + rcc_engine_ctx engine_ctx; + + iconv_t *iconv_from; + iconv_t *iconv_to; + iconv_t iconv_auto[RCC_MAX_CHARSETS]; + + char tmpbuffer[RCC_MAX_STRING_CHARS+sizeof(rcc_string_footer)+1]; + iconv_t fsiconv; + + unsigned char configure; + rcc_language_config *current_config; + rcc_language_id current_language; +}; + +int rccInit(); +void rccFree(); + +rcc_context rccInitContext(rcc_init_flags flags, unsigned int max_languages, unsigned int max_classes, const char *locale); +void rccFreeContext(rcc_context *ctx); + +rcc_language_id rccRegisterLanguage(rcc_context *ctx, rcc_language *language); +rcc_charset_id rccLanguageRegisterCharset(rcc_language *language, rcc_charset charset); +rcc_engine_id rccLanguageRegisterEngine(rcc_language *language, rcc_engine *engine); +rcc_class_id rccRegisterClass(rcc_context *ctx, rcc_class *cl); + +rcc_class_type rccGetClassType(rcc_context *ctx, rcc_class_id class_id); + +int rccConfigure(rcc_context *ctx); +char *rccCreateResult(rcc_context *ctx, int len, int *rlen); + +/* lng.c */ +const char *rccGetLanguageName(rcc_context *ctx, rcc_language_id language_id); +rcc_language_id rccGetLanguageByName(rcc_context *ctx, const char *name); +rcc_language_id rccGetRealLanguage(rcc_context *ctx, rcc_language_id language_id); +const char *rccGetRealLanguageName(rcc_context *ctx, rcc_language_id language_id); +rcc_language_id rccGetSelectedLanguage(rcc_context *ctx); +const char *rccGetSelectedLanguageName(rcc_context *ctx); +rcc_language_id rccGetCurrentLanguage(rcc_context *ctx); +const char *rccGetCurrentLanguageName(rcc_context *ctx); + +int rccSetLanguage(rcc_context *ctx, rcc_language_id language_id); +int rccSetLanguageByName(rcc_context *ctx, const char *name); + +/* lngconfig.c */ +int rccConfigInit(rcc_language_config *config, rcc_context *ctx); +int rccConfigFree(rcc_language_config *config); + +const char *rccConfigGetEngineName(rcc_language_config config, rcc_engine_id engine_id); +const char *rccConfigGetCharsetName(rcc_language_config config, rcc_charset_id charset_id); +const char *rccConfigGetAutoCharsetName(rcc_language_config config, rcc_charset_id charset_id); +rcc_engine_id rccConfigGetEngineByName(rcc_language_config *config, const char *name); +rcc_charset_id rccConfigGetCharsetByName(rcc_language_config *config, const char *name); +rcc_charset_id rccConfigGetAutoCharsetByName(rcc_language_config *config, const char *name); + +rcc_language_config *rccGetConfig(rcc_context *ctx, rcc_language_id language_id); +rcc_language_config *rccGetConfigByName(rcc_context *ctx, const char *name); +rcc_language_config *rccGetCurrentConfig(rcc_context *ctx); + +rcc_engine_id rccConfigGetSelectedEngine(rcc_language_config config); +const char *rccConfigGetSelectedEngineName(rcc_language_config config); +rcc_engine_id rccConfigGetCurrentEngine(rcc_language_config config); +const char *rccConfigGetCurrentEngineName(rcc_language_config config); +rcc_charset_id rccConfigGetSelectedCharset(rcc_language_config config, rcc_class_id class_id); +const char *rccConfigGetSelectedCharsetName(rcc_language_config config, rcc_class_id class_id); +rcc_charset_id rccConfigGetCurrentCharset(rcc_language_config config, rcc_class_id class_id); +const char *rccConfigGetCurrentCharsetName(rcc_language_config config, rcc_class_id class_id); +rcc_option_value rccConfigGetOption(rcc_language_config config, rcc_option option); + +int rccConfigSetEngine(rcc_language_config *config, rcc_engine_id engine_id); +int rccConfigSetCharset(rcc_language_config *config, rcc_class_id class_id, rcc_charset_id charset_id); +int rccConfigSetEngineByName(rcc_language_config *config, const char *name); +int rccConfigSetCharsetByName(rcc_language_config *config, rcc_class_id class_id, const char *name); +int rccConfigSetOption(rcc_language_config *config, rcc_option option, rcc_option_value value); + +rcc_charset_id rccConfigGetLocaleCharset(rcc_language_config *config, const char *locale_variable); + +/* curconfig.c */ +#define rccGetEngineName(ctx, engine_id) rccConfigGetEngineName(ctx->current_config, engine_id) +#define rccGetCharsetName(ctx, charset_id) rccConfigGetCharsetName(ctx->current_config, charset_id) +#define rccGetAutoCharsetName(ctx, charset_id) rccConfGetAutoCharsetName(ctx->current_config, charset_id) +#define rccGetEngineByName(ctx, name) rccConfigGetEngineByName(ctx->current_config, name) +#define rccGetCharsetByName(ctx, name) rccConfigGetCharsetByName(ctx->current_config, name) +#define rccGetAutoCharsetByName(ctx, name) rccConfigGetAutoCharsetByName(ctx->current_config, name) + +#define rccGetSelectedEngine(ctx) rccConfigGetSelectedEngine(ctx->current_config) +#define rccGetSelectedEngineName(ctx) rccConfigGetSelectedEngineName(ctx->current_config) +#define rccGetCurrentEngine(ctx) rccConfigGetCurrentEngine(ctx->current_config) +#define rccGetCurrentEngineName(ctx) rccConfigGetCurrentEngineName(ctx->current_config) +#define rccGetSelectedCharset(ctx,class_id) rccConfigGetSelectedCharset(ctx->current_config, class_id) +#define rccGetSelectedCharsetName(ctx,class_id) rccConfigGetSelectedCharsetName(ctx->current_config, class_id) +#define rccGetCurrentCharset(ctx,class_id) rccConfigGetCurrentCharset(ctx->current_config, class_id) +#define rccGetCurrentCharsetName(ctx,class_id) rccConfigGetCurrentCharsetName(ctx->current_config, class_id) +#define rccGetOption(ctx, option) rccConfigGetOption(ctx->current_config, option) + +#define rccSetEngine(ctx, engine_id) rccConfigSetEngine(ctx->current_config, engine_id) +#define rccSetCharset(ctx, class_id, charset_id) rccConfigSetCharset(ctx->current_config, class_id, charset_id) +#define rccSetOption(ctx,option,value) rccConfigSetOption(ctx->current_config, option, value) +#define rccSetEngineByName(ctx, name) rccConfigSetEngineByName(ctx->current_config, name) +#define rccSetCharsetByName(ctx, class_id, name) rccConfigSetCharsetByName(ctx->current_config, class_id, name) + +#define rccGetLocaleCharset(ctx, locale_variable) rccConfigGetLocaleCharset(ctx->current_config, locale_variable) + +/* recode.c */ +char *rccFrom(rcc_context *ctx, rcc_class_id class_id, char *buf, int len, int *rlen); +char *rccTo(rcc_context *ctx, rcc_class_id class_id, char *buf, int len, int *rlen); +char *rccRecode(rcc_context *ctx, rcc_class_id from, rcc_class_id to, char *buf, int len, int *rlen); +char *rccFS(rcc_context *ctx, char *fspath, char *path, char *filename, int len, int *rlen); + +/* string.c */ +rcc_string rccStringInit(rcc_language_id language_id, const char *buf, int len, int *rlen); +void rccStringFree(rcc_string str); + +rcc_language_id rccStringCheck(const rcc_string str); +const char *rccStringGet(const rcc_string str); +char *rccStringExtract(const rcc_string buf, int len, int *rlen); + +char *rccStringCmp(const rcc_string str1, const rcc_string str2); +char *rccStringNCmp(const rcc_string str1, const rcc_string str2, size_t n); +char *rccStringCaseCmp(const rcc_string str1, const rcc_string str2); +char *rccStringNCaseCmp(const rcc_string str1, const rcc_string str2, size_t n); + +/* xml.c */ +int rccSave(rcc_context *ctx); +int rccLoad(rcc_context *ctx); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBRCC_H */ diff --git a/src/lng.c b/src/lng.c new file mode 100644 index 0000000..607c69d --- /dev/null +++ b/src/lng.c @@ -0,0 +1,135 @@ +#include <stdio.h> +#include <string.h> +#include <locale.h> + +#include <librcd.h> +#include "librcc.h" + +const char *rccGetLanguageName(rcc_context *ctx, rcc_language_id language_id) { + if ((!ctx)||(language_id<0)||(language_id>=ctx->n_languages)) return NULL; + return ctx->languages[language_id]->sn; +} + +language_id rccGetLanguageByName(rcc_context *ctx, const char *name) { + unsigned int i; + if ((!ctx)||(!name)) return 0; + + for (i=0;ctx->languages[i];i++) + if (!strcmp(ctx->languages[i]->sn, name)) return i; + + return 0; +} + +static int rccGetLocaleLanguage(char *result, const char *lv, unsigned int n) { + charset_list_t *enc; + char *l; + + if (!lv) return -1; + + l = setlocale(lv, NULL); + if (!l) return -1; + else if ((strcmp(l,"C")==0)||(strcmp(l,"POSIX")==0)) return -1; + + for (i=0;((l[i])&&(l[i]!='.'));i++); + + for (i=0;rcc_default_aliases[i].alias;i++) + if (strncmp(l,rcc_default_aliases[i].alias,i)==0) { + l = rcc_default_aliases[i].alias; + break; + } + + for (i=0;((l[i])&&(l[i]!='.')&&(l[i]!='_'));i++); + if (i>=n) return -1; + + strncpy(result,l,i); + result[i]=0; + + return 0; +} + +static rcc_language_id rccGetDefaultLanguage(rc_context *ctx) { + int err; + unsigned int i; + char stmp[RCC_MAX_LANGUAGE_CHARS+1]; + + if (!ctx) return -1; + + err = rccGetLocaleLanguage(stmp, ctx->locale_variable, RCC_MAX_LANGUAGE_CHARS); + if (err) { + if (ctx->n_languages>1) return 1; + return -1; + } + + for (i=0;ctx->languages[i];i++) + if (!strcmp(ctx->languages[i]->sn, stmp)) return i; + + if (i>1) return 1; + return -1; +} + +rcc_language_id rccGetRealLanguage(rcc_context *ctx, rcc_language_id language_id) { + if ((!ctx)||(language_id<0)||(language_id>=ctx->n_languages)) return -1; + if (language_id) return language_id; + return rccGetDefaultLanguage(ctx); +} + +const char *rccGetRealLanguageName(rcc_context *ctx, rcc_language_id language_id) { + language_id = rccGetRealLanguage(ctx, language_id); + if (language_id<0) return NULL; + + return rccGetLanguageName(ctx, language_id); +} + +rcc_language_id rccGetSelectedLanguage(rcc_context *ctx) { + if (!ctx) return NULL; + return ctx->current_language; +} + +const char *rccGetSelectedLanguageName(rcc_context *ctx) { + rcc_language_id language_id; + + language_id = rccGetSelectedLanguage(ctx); + if (language_id<0) return NULL; + + return rccGetLanguageName(ctx, language_id); +} + +rcc_language_id rccGetCurrentLanguage(rcc_context *ctx) { + if (!ctx) return -1; + return rccGetRealLanguage(ctx, ctx->current_language); +} + +const char *rccGetCurrentLanguageName(rcc_context *ctx) { + rcc_language_id language_id; + + language_id = rccGetCurrentLanguage(ctx); + if (language_id<0) return NULL; + + return rccGetLanguageName(ctx, language_id); +} + + +int rccSetLanguage(rcc_context *ctx, rcc_language_id language_id) { + rcc_language_config config; + + if ((!ctx)||(language_id < 0)||(language_id >= ctx->n_languages)) return -1; + if ((!ctx->languages[language_id]->engines[0])||(!ctx->languages[language_id]->charsets[0])) return -2; + + if (ctx->current_language != language_id) { + config = rccGetConfig(ctx, language_id); + if (!config) return -1; + + ctx->configure = 1; + ctx->current_language = language_id; + ctx->current_config = config; + } +} + +int rccSetLanguageByName(rcc_context *ctx, const char *name) { + rcc_language_id language_id; + + language_id = rccGetLanguageByName(ctx, name); + if (language_id < 0) return -1; + + return rccSetLanguage(ctx, language_id); +} diff --git a/src/lngconfig.c b/src/lngconfig.c new file mode 100644 index 0000000..ba904ec --- /dev/null +++ b/src/lngconfig.c @@ -0,0 +1,363 @@ +#include <stdio.h> +#include <string.h> + +#include <librcd.h> +#include "librcc.h" +#include "config.h" + +const char *rccConfigGetEngineName(rcc_language_config config, rcc_engine_id engine_id) { + rcc_engine_ptr *engines; + + if ((!config)||(!config->language)||(engine_id<0)) return NULL; + + engines = config->language->engines; + + for (i=0;engines[i];i++); + if (engine_id>=i) return NULL; + + return engines[engine_id]->title; +} + +const char *rccConfigGetCharsetName(rcc_language_config config, rcc_charset_id charset_id) { + rcc_charset_ptr *charsets; + + if ((!config)||(!config->language)||(charset_id<0)) return NULL; + + charsets = config->language->charsets; + + for (i=0;charsets[i];i++); + if (charset_id>=i) return NULL; + + return charsets[charset_id]; +} + +const char *rccConfigGetAutoCharsetName(rcc_language_config config, rcc_charset_id charset_id) { + rcc_charset_ptr *charsets; + rcc_engine_ptr *engines; + + if ((!config)||(!config->language)||(engine_id<0)) return NULL; + + engines = config->language->engines; + charsets = engines[config->engine]->charsets; + + for (i=0;charsets[i];i++); + if (charset_id>=i) return NULL; + + return charsets[charset_id]; +} + + +rcc_engine_id rccConfigGetEngineByName(rcc_language_config *config, const char *name) { + unsigned int i; + rcc_engine *engines; + + if ((!config)||(!config->language)||(!name)) return -1; + + engines = config->language->engines; + for (i=0;engines[i];i++) + if (!strcmp(engines[i]->title,name)) return i; + + return -1; +} + +rcc_charset_id rccConfigGetCharsetByName(rcc_language_config *config, const char *name) { + unsigned int i; + rcc_charset *charsets; + + if ((!config)||(!config->language)||(!name)) return -1; + + charsets = config->language->charsets; + for (i=0;charsets[i];i++) + if (!strcmp(charsets[i],name)) return i; + + return 0; +} + +rcc_charset_id rccConfigGetAutoCharsetByName(rcc_language_config *config, const char *name) { + unsigned int i; + rcc_charset *charsets; + rcc_engine_ptr *engines; + + if ((!config)||(!config->language)||(!name)) return -1; + + engines = config->language->engines; + charsets = engines[config->engine]->charsets; + + for (i=0;charsets[i];i++) + if (!strcmp(charsets[i],name)) return i; + + return -1; +} + +int rccConfigInit(rcc_language_config *config, rcc_context *ctx) { + rcc_charset_id *charsets; + + if ((!ctx)||(!config)) return -1; + + charsets = (rcc_charset_id*)malloc((ctx->max_classes)*sizeof(rcc_charset_id)); + if (!charsets) return -1; + + for (i=0;i<ctx->max_classes;i++) + charsets[i] = 0; + + config->ctx = ctx; + config->language = NULL; + config->charset = charsets; + config->engine = -1; + for (i=0;i<RCC_MAX_OPTIONS;i++) + config->options[i] = 0; + + return 0; +} + +int rccConfigFree(rcc_language_config *config) { + if (config->charset) { + free(config->charset); + config->charset = NULL; + } +} + + +rcc_language_config *rccGetConfig(rcc_context *ctx, rcc_language_id language_id) { + int err; + + language_id = rccGetRealLanguage(ctx, language_id); + if (language_id < 0) return NULL; + if (!ctx->configs[language_id].charsets) { + if (rccInitConfig(ctx->configs+language_id, ctx)) return NULL; + } + + ctx->configs[language_id] = ctx->languages[language_id]; + return ctx->configs + language_id; +} + +rcc_language_config *rccGetConfigByName(rcc_context *ctx, const char *name) { + rcc_language_id language_id; + + language_id = rccGetLanguageByName(ctx, name); + if (language_id < 0) return NULL; + + return rccGetConfig(ctx, language_id); +} + +rcc_language_config *rccGetCurrentConfig(rcc_context *ctx) { + rcc_language_id language_id; + + language_id = rccGetCurrentLanguage(ctx); + if (language_id < 0) return NULL; + + return rccGetConfig(ctx, language_id); +} + + +rcc_engine_id rccConfigGetSelectedEngine(rcc_language_config config) { + if (!config) return -1; + + return config->engine; +} + +const char *rccConfigGetSelectedEngineName(rcc_language_config config) { + rcc_engine_id engine_id; + + engine_id = rccConfigGetSelectedEngine(config); + if (engine_id == -1) return rcc_engine_nonconfigured; + if ((engine_id < 0)||(!config->language)) return NULL; + + return rccConfigGetEngineName(config, engine_id); +} + +rcc_engine_id rccConfigGetCurrentEngine(rcc_language_config config) { + rcc_engine_list enginelist; + rcc_engine_id engine_id; + + engine_id = rccConfigGetSelectedEngine(config); + if (engine_id>=0) return engine_id; + + if (!config->language) return NULL; + else enginelist = config->language->engines; + + if (enginelist[0]) { + if (enginelist[1]) return 1; + return 0; + } + return -1; +} + +const char *rccConfigGetCurrentEngineName(rcc_language_config config) { + rcc_engine_id engine_id; + + engine_id = rccConfigGetCurrentEngine(config); + if ((engine_id < 0)||(!config->language)) return NULL; + + return rccConfigGetEngineName(config, engine_id); +} + + +static int rccGetLocaleCharset(char *result, const char *lv, unsigned int n) { + char *l; + + if (!lv) return -1; + l = setlocale(lv, NULL); + if (!l) return -1; + + for (i=0;((l[i])&&(l[i]!='.')&&(l[i]!='_'));i++); + if (i>=n) return -1; + + l = strrchr(l, '.'); + if (!l) return -1; + + for (i=0;((l[i])&&(l[i]!='@'));i++); + if (i>=n) return -1; + + strncpy(result,l,i); + result[i]=0; + + return 0; +} + +rcc_charset_id rccConfigGetSelectedCharset(rcc_language_config config, rcc_class_id class_id) { + if ((!config)||(!config->ctx)||(class_id<0)||(class_id>=ctx->n_classes)) return -1; + + return config->charset[class_id]; +} + +const char *rccConfigGetSelectedCharsetName(rcc_language_config config, rcc_class_id class_id) { + rcc_charset_id charset_id; + + charset_id = rccConfigGetSelectedCharset(config, class_id); + if ((charset_id < 0)||(!config->language)) return NULL; + + return rccConfigGetCharsetName(config, charset_id); +} + +rcc_charset_id rccConfigGetCurrentCharset(rcc_language_config config, rcc_class_id class_id) { + int err; + unsigned int i; + rcc_charset_id charset_id; + + rcc_language *language; + rcc_class *classes; + rcc_charset *charsets; + + char stmp[RCC_MAX_CHARSET_CHARS + 1]; + char *defvalue; + + if ((!config)||(!config->ctx)||(class_id<0)||(class_id>=ctx->n_classes)) return NULL; + + charset_id = ctx->config->charset[class_id]; + if (charset_id) return charset_id; + + if (!config->language) return -1; + else language = config->language; + + classes = config->ctx->classes; + + cl = classes[class_id]; + defvalue = cl->defvalue; + if (defvalue) { + for (i=0;classes[i];i++) { + if (!strcmp(classes[i]->name, defvalue)) + return rccConfigGetCurrentCharset(config, i); + } + } else defvalue = config->ctx->locale_variable; + + err = rccGetLocaleCharset(stmp, defvalue, RCC_MAX_CHARSET_CHARS); + if (err) { + charsets=ctx->languages[language_id]->charsets; + if ((charsets[0])&&(charsets[1])) return 1; + return -1; + } + + return rccConfigGetCharsetByName(config, stmp); +} + +const char *rccConfigGetCurrentCharsetName(rcc_language_config config, rcc_class_id class_id) { + rcc_charset_id charset_id; + + charset_id = rccConfigGetCurrentCharset(config, class_id); + if ((charset_id < 0)||(!config->language)) return NULL; + + return rccConfigGetCharsetName(config, charset_id); +} + +rcc_option_value rccConfigGetOption(rcc_language_config config, rcc_option option) { + if ((!config)||(option<0)||(option>=RCC_MAX_OPTIONS)) return -1; + + return config->options[option]; +} + +int rccConfigSetEngine(rcc_language_config *config, rcc_engine_id engine_id) { + unsigned int i; + + if ((!config)||(!config->language)||(engine_id < 0)) return -1; + + for (i=0;config->language->engines[i];i++); + if (engine_id >= i) return -1; + + if (config->engine != engine_id) { + if (config->ctx->current_config == config) config->ctx->configure = 1; + config->engine = engine_id; + } + return 0; +} + +int rccConfigSetEngineByName(rcc_language_config *config, const char *name) { + rcc_engine_id engine_id; + + engine_id = rccConfigGetEngineByName(config, name); + if (engine_id < 0) return -1; + + return rccConfigSetEngine(config, engine_id); +} + +int rccConfigSetCharset(rcc_language_config *config, rcc_class_id class_id, rcc_charset_id charset_id) { + unsigned int i; + + if ((!config)||(!config->language)||(class_id < 0)||(class_id >= config->ctx->n_classes)||(charset_id<0)) return -1; + + for (i=0;config->language->charsets[i];i++); + if (charset_id >= i) return -1; + + if (config->charset[class_id] != charset_id) { + if (config->ctx->current_config == config) config->ctx->configure = 1; + config->charset[class_id] = charset_id; + } + + return 0; +} + +int rccConfigSetCharsetByName(rcc_language_config *config, rcc_class_id class_id, const char *name) { + rcc_charset_id charset_id; + + charset_id = rccConfigGetCharsetByName(config, name); + if (charset_id < 0) return -1; + + return rccConfigSetCharset(config, class_id, charset_id); +} + +int rccConfigSetOption(rcc_language_config *config, rcc_option option, rcc_option_value value) { + if ((!config)||(option>=RCC_MAX_OPTIONS)) return -1; + if (config->options[option] != value) { + if (config->ctx->current_config == config) config->ctx->configure = 1; + config->options[option]=value; + } + + return 0; +} + +rcc_charset_id rccConfigGetLocaleCharset(rcc_language_config *config, const char *locale_variable) { + int err; + rcc_charset *charsets; + char stmp[RCC_MAX_CHARSET_CHARS+1]; + + if ((!config)||(!config->language)) return -1; + + err = rccGetLocaleCharset(stmp, locale_variable?locale_variable:config->ctx->locale_variable, RCC_MAX_CHARSET_CHARS); + if (err) { + charsets=config->language->charsets; + if ((charsets[0])&&(charsets[1])) return 1; + return -1; + } + + return rccConfigGetCharsetByName(config, stmp); +} diff --git a/src/plugin.c b/src/plugin.c new file mode 100644 index 0000000..53ff00c --- /dev/null +++ b/src/plugin.c @@ -0,0 +1,31 @@ +#ifdef RCC_PLUGINS +# include <dlfcn.h> +# ifndef RTLD_NOW +# define RTLD_NOW 0 +# endif +#endif /* RCC_PLUGINS */ + +rcc_library_handle rccLibraryOpen(char *filename) +{ +#ifdef RCC_PLUGINS + return (rcc_library_handle)dlopen(filename, RTLD_NOW); +#else + return NULL; +#endif /* RCC_PLUGINS */ +} + +void rccLibraryClose(rcc_library_handle handle) +{ +#ifdef RCC_PLUGINS + dlclose(handle); +#endif /* RCC_PLUGINS */ +} + +void* rccLibraryFind(rcc_library_handle handle, const char *symbol) +{ +#ifdef RCC_PLUGINS + return dlsym(handle, symbol); +#else + return NULL; +#endif /* RCC_PLUGINS */ +} diff --git a/src/plugin.h b/src/plugin.h new file mode 100644 index 0000000..c6ed7b1 --- /dev/null +++ b/src/plugin.h @@ -0,0 +1,15 @@ +#ifndef _RCC_PLUGIN_H +#define _RCC_PLUGIN_H +#include "enca.h" + +#ifdef RCC_ENCA_DYNAMIC +# define RCC_PLUGINS +#endif /* RCC_ENCA_DYNAMIC */ + +typedef void *rcc_library_handle; + +rcc_library_handle rccLibraryOpen(char *filename); +void rccLibraryClose(rcc_library_handle handle); +void* rccLibraryFind(rcc_library_handle handle, const char *symbol); + +#endif /* _RCC_PLUGIN_H */ diff --git a/src/recode.c b/src/recode.c new file mode 100644 index 0000000..6d82daa --- /dev/null +++ b/src/recode.c @@ -0,0 +1,228 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <locale.h> + +#include <librcd.h> +#include "librcc.h" + +#include "fs.h" +#include "config.h" + + +static void rccIConvCopySymbol(char **in_buf, int *in_left, char **out_buf, int *out_left) { + if ((out_left>0)&&(in_left>0)) { + (**out_buf)=(**in_buf); + (*out_buf)++; + (*in_buf)++; + (*in_left)--; + (*out_left)--; + } +} + +static int rccIConvUTFBytes(unsigned char c) { + int j; + if (c<128) return 1; + + for (j=6;j>=0;j--) + if ((c&bit(j))==0) break; + + if ((j==0)||(j==6)) return 1; + return 6-j; +} + +static int rccIConv(rcc_context *ctx, iconv_t icnv, char *buf, int len) { + char *in_buf, *out_buf, *res, err; + int in_left, out_left, olen; + int ub, utf_mode=0; + int errors=0; + + if ((!buf)||(!ctx)||(icnv == (iconv_t)-1)) return -1; + + len = STRNLEN(buf,len); + + if (iconv(icnv, NULL, NULL, NULL, NULL) == -1) return -1; + +loop_restart: + errors = 0; + in_buf = buf; + in_left = len; + out_buf = ctx->tmpbuffer; + out_left = RCC_MAX_STRING_CHARS; + +loop: + err=iconv(icnv, &in_buf, &in_left, &out_buf, &out_left); + if (err<0) { + if (errno==E2BIG) { + *(int*)(ctx->tmpbuffer+(CHARSET_MAX_STRING_SIZE-sizeof(int)))=0; + } else if (errno==EILSEQ) { + if (errors++<CHARSET_MAX_ERRORS) { + for (ub=utf_mode?rccIConvUTFBytes(*in_buf):1;ub>0;ub--) + rccIConvCopySymbol(&in_buf, &in_left, &out_buf, &out_left); + if (in_left>0) goto loop; + } else if (!utf_mode) { + utf_mode = 1; + goto loop_restart; + } else { + return -1; + } + } else { + return -1; + } + } + + return CHARSET_MAX_STRING_SIZE - out_left; +} + + +static charset_id rccIConvAuto(rcc_context *ctx, rcc_class_id class_id, char *buf, int len) { + rcc_class_type class_type; + + if ((!ctx)||(!buf)) return -1; + + class_type = rccGetClassType(ctx, class_id); + if ((class_type == RCC_CLASS_STANDARD)||((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_AUTODETECT_FS_TITLES)))) { + engine = rccGetCurrentEngine(ctx); + if ((!engine)||(!engine->func)||(!stricmp(engine->title, "off"))||(!strcmp(engine->title, "dissable"))) return -1; + + return engine->func(buf, len); + } + + return -1; +} + +rcc_string rccFrom(rcc_context *ctx, rcc_class_id class_id, const char *buf, int len, int *rlen) { + int err; + rcc_language_id language_id; + rcc_charset_id charset_id; + iconv_t icnv = (iconv_t)-1; + char *result; + + if ((!ctx)||(class_id<0)||(class_id>=ctx->n_classes)||(!buf)) return NULL; + + err = rccConfigure(ctx); + if (err) return NULL; + + + language_id = rccGetCurrentLanguage(ctx); + // DS: Learning. check database (language_id) + + charset_id = rccIConvAuto(ctx, buf, len); + if (charset_id > 0) icnv = ctx->iconv_auto[charset_id]; + if (icnv == (iconv_t)-1) { + icnv = ctx->iconv_from[class_id]; + if (icnv == (iconv_t)-1) return NULL; + } + + if (icnv == (iconv_t)-2) { + result = rccCreateString(language_id, buf, len, rlen); + } else { + err = rccIConv(ctx, icnv, buf, len); + if (err<=0) return NULL; + result = rccCreateString(language_id, ctx->tmpbuffer, err, rlen); + } + + // DS: Learning. write database + + return result; +} + +char *rccTo(rcc_context *ctx, rcc_class_id class_id, const rcc_string buf, int len, int *rlen) { + int err; + char *result; + char *prefix, *name; + rcc_language_id language_id; + rcc_charset_id charset_id; + iconv_t icnv; + + if ((!ctx)||(class_id<0)||(class_id>=ctx->n_classes)||(!buf)) return NULL; + + language_id = rccCheckString(ctx, buf); + if (!language_id) return NULL; + + err = rccConfigure(ctx); + if (err) return NULL; + + icnv = ctx->iconv_to[class_id]; + + if ((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_AUTODETECT_FS_NAMES))) { + // DS: file_names (autodetect fspath) + prefix = NULL; name = buf + sizeof(rcc_string_header); + err = rccFS0(NULL, buf, &prefix, &name); + if (!err) { + result = rccFS3(ctx, language_id, class_id, prefix, name, 0, rlen); + return result; + } + } + + if (icnv == (iconv_t)-1) return NULL; + if (icnv == (iconv_t)-2) { + result = rccParseString(ctx, buf, len, rlen); + } else { + err = rccIConv(ctx, icnv, buf + sizeof(rcc_string_header), len?len-sizeof(rcc_string_header):0); + if (err<=0) return NULL; + + result = rccCreateAnswer(ctx, err, rlen); + } + + return result; +} + +char *rccRecode(rcc_context *ctx, rcc_class_id from, rcc_class_id to, const char *buf, int len, int *rlen) { + int nlen; + rcc_string stmp; + charset_id from_charset_id, to_charset_id; + + if ((!ctx)||(from<0)||(from>=ctx->n_classes)||(to<0)||(to>=ctx->n_classes)||(!buf)) return NULL; + + if ((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_AUTODETECT_FS_NAMES))) goto recoding; + + from_charset_id = rccIConvAuto(ctx, buf, len); + if (from_charset_id>0) { + from_charset = rccGetAutoCharsetName(ctx, from_charset_id); + to_charset = rccGetCurrentCharsetName(ctx, to); + if ((from_charset)&&(to_charset)&&(!stricmp(from_charset, to_charset))) return NULL; + } else { + from_charset_id = rccGetCurrentCharset(ctx, from) + to_charset_id = rccGetCurrentCharset(ctx, to); + if (from_charset_id == to_charset_id) return NULL; + } + +recoding: + stmp = rccFrom(ctx, from, buf, len, &nlen); + if (stmp) { + buf = rccTo(ctx, to, stmp, nlen, rlen); + free(stmp); + return buf; + } + + return rccTo(ctx, to, buf, len, rlen); +} + +char *rccFS(rcc_context *ctx, rcc_class_id from, rcc_class_id to, const char *fspath, const char *path, const char *filename) { + int err; + char *prefix = path, *name = filename; + rcc_string string; + + char *stmp; + char *result_fn = NULL; + + + err = rccFS1(ctx, fspath, &prefix, &name); + if (err) { + if (err<0) return NULL; + return name; + } + + string = rccFrom(ctx, from, name, len, rlen); + if (string) { + language_id = rccGetCurrentLanguage(ctx); + result = rccFS3(ctx, language_id, to, prefix, string + sizeof(rcc_string_header), 0, NULL); + free(string); + } else result = NULL; + + free(prefix); + free(name); + + return result; +} diff --git a/src/string.c b/src/string.c new file mode 100644 index 0000000..85a5767 --- /dev/null +++ b/src/string.c @@ -0,0 +1,74 @@ +#include <stdio.h> +#include <string.h> + +rcc_string rccStringInit(rcc_language_id language_id, const char *buf, int len, int *rlen) { + rcc_string_header header = {RCC_STRING_MAGIC, language_id}; + + len = STRNLEN(buf, len); + + res = (char*)malloc(len+sizeof(rcc_string_header)+1); + if (!res) return NULL; + + strncpy(res + sizeof(rcc_string_header), buf, len); + res[sizeof(rcc_string_header) + 1 + len] = 0; + + memcpy(res, &header, sizeof(rcc_string_header)); + + if (rlen) *rlen = len + sizeof(rcc_string_header); + return (rcc_string)res; +} + +void rccStringFree(rcc_string str) { + if (str) free(str); +} + +rcc_language_id rccStringCheck(const rcc_string str) { + int len; + rcc_string_header *header; + + len = strlen(str); + + if ((!str)||(len<=sizeof(unsigned int))||(*((unsigned int*)(str))!=RCC_STRING_MAGIC)) return 0; + + header = (rcc_string_header*)(str); + return header->language_id; +} + +const char *rccStringGet(const rcc_string str) { + if (rccStringCheck(str)) return str + sizeof(rcc_string_header); + return (const char *)str; +} + +char *rccStringExtract(const rcc_string buf, int len, int *rlen) { + char *res; + + len = STRNLEN(buf, len) - sizeof(rcc_string_header); + if (len<0) return NULL; + + res = (char*)malloc(len+1); + if (!res) return NULL; + + strncpy(res, buf + sizeof(rcc_string_header), len); + res[len] = 0; + + if (rlen) *rlen = len; + + return res; +} + +char *rccStringCmp(const rcc_string str1, const rcc_string str2) { + return strcmp(rccStringGet(str1), rccStringGet(str2)); +} + +char *rccStringNCmp(const rcc_string str1, const rcc_string str2, size_t n) { + return strncmp(rccStringGet(str1), rccStringGet(str2), n); +} + +char *rccStringCaseCmp(const rcc_string str1, const rcc_string str2) { + return strcasecmp(rccStringGet(str1), rccStringGet(str2)); +} + +char *rccStringNCaseCmp(const rcc_string str1, const rcc_string str2, size_t n) { + return strncasecmp(rccStringGet(str1), rccStringGet(str2), n); +} + diff --git a/src/xml.c b/src/xml.c new file mode 100644 index 0000000..a2225f2 --- /dev/null +++ b/src/xml.c @@ -0,0 +1,8 @@ + +int rccSave(rcc_context *ctx) { + return 0; +} + +int rccLoad(rcc_context *ctx) { + return 0; +} |