From 60bd665e74cfeeaf70882173a0dd56c883e2014a Mon Sep 17 00:00:00 2001 From: "Suren A. Chilingaryan" Date: Fri, 12 Mar 2021 03:55:34 +0100 Subject: Added to git tree --- fixes/libguess-fixes/README | 1 + fixes/libguess-fixes/libguess-ds-cn.patch | 62 +++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 fixes/libguess-fixes/README create mode 100644 fixes/libguess-fixes/libguess-ds-cn.patch (limited to 'fixes/libguess-fixes') diff --git a/fixes/libguess-fixes/README b/fixes/libguess-fixes/README new file mode 100644 index 0000000..bad7594 --- /dev/null +++ b/fixes/libguess-fixes/README @@ -0,0 +1 @@ +This patch adds BIG5 encoding detection into the Chinese detection engine. diff --git a/fixes/libguess-fixes/libguess-ds-cn.patch b/fixes/libguess-fixes/libguess-ds-cn.patch new file mode 100644 index 0000000..7c2384b --- /dev/null +++ b/fixes/libguess-fixes/libguess-ds-cn.patch @@ -0,0 +1,62 @@ +diff -dPNur libguess-0.2.0-d7/guess.c libguess-0.2.0-d7-new/guess.c +--- libguess-0.2.0-d7/guess.c 2006-12-05 17:59:32.000000000 +0100 ++++ libguess-0.2.0-d7-new/guess.c 2007-06-26 19:56:59.000000000 +0200 +@@ -44,7 +44,7 @@ + /* ORDER_** &highest, &second, ... &lowest */ + #define ORDER_JP &utf8, &sjis, &eucj + #define ORDER_TW &utf8, &big5 +-#define ORDER_CN &utf8, &gb2312, &gb18030 ++#define ORDER_CN &utf8, &gb2312, &gb18030, &big5 + #define ORDER_KR &utf8, &euck, &johab + + /* workaround for that glib's g_convert can't convert +@@ -252,6 +252,8 @@ + guess_dfa gb2312 = DFA_INIT(guess_gb2312_st, guess_gb2312_ar); + guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar); + guess_dfa gb18030 = DFA_INIT(guess_gb18030_st, guess_gb18030_ar); ++ guess_dfa big5 = DFA_INIT(guess_big5_st, guess_big5_ar); ++ + guess_dfa *top = NULL; + + guess_dfa *order[] = { ORDER_CN, NULL }; +@@ -287,22 +289,27 @@ + } + + if (DFA_ALIVE(gb2312)) { +- if (!DFA_ALIVE(utf8) && !DFA_ALIVE(gb18030)) ++ if (!DFA_ALIVE(utf8) && !DFA_ALIVE(gb18030) && !DFA_ALIVE(big5)) + return "GB2312"; + DFA_NEXT(gb2312, c); + } + if (DFA_ALIVE(utf8)) { +- if (!DFA_ALIVE(gb2312) && !DFA_ALIVE(gb18030)) ++ if (!DFA_ALIVE(gb2312) && !DFA_ALIVE(gb18030) && !DFA_ALIVE(big5)) + return "UTF-8"; + DFA_NEXT(utf8, c); + } + if (DFA_ALIVE(gb18030)) { +- if (!DFA_ALIVE(utf8) && !DFA_ALIVE(gb2312)) ++ if (!DFA_ALIVE(utf8) && !DFA_ALIVE(gb2312) && !DFA_ALIVE(big5)) + return "GB18030"; + DFA_NEXT(gb18030, c); + } ++ if (DFA_ALIVE(big5)) { ++ if (!DFA_ALIVE(utf8) && !DFA_ALIVE(gb2312) && !DFA_ALIVE(gb18030)) ++ return "big5"; ++ DFA_NEXT(big5, c); ++ } + +- if (!DFA_ALIVE(gb2312) && !DFA_ALIVE(utf8) && !DFA_ALIVE(gb18030)) { ++ if (!DFA_ALIVE(gb2312) && !DFA_ALIVE(utf8) && !DFA_ALIVE(gb18030) && !DFA_ALIVE(big5)) { + /* we ran out the possibilities */ + return NULL; + } +@@ -323,6 +330,8 @@ + return "UTF-8"; + if (top == &gb18030) + return "GB18030"; ++ if (top == &big5) ++ return "BIG5"; + return NULL; + } + -- cgit v1.2.3