#include <stdio.h>
#include <string.h>

#include <librcc.h>

#define bit(i) (1<<i)

/* 
 * Latin unicode subset:
 * 0x100 - 0x17E
 * 0x180 - 0x24F
 * 0x1E00 - 0x1EFF
 */

static rcc_autocharset_id AutoengineWestern(rcc_engine_context ctx, const char *sbuf, int len) {
    const unsigned char *buf = sbuf;
    long i,j;
    int bytes=0,rflag=0;
    int res=0;

    if (!len) len = strlen(buf);
    for (i=0;i<len;i++) {
	if (buf[i]<128) continue;
	
	if (bytes>0) {
	    if ((buf[i]&0xC0)==0x80) {
		if (rflag) {
		    // Western is 0x100-0x17e
		    res++;
		}
		bytes--;
	    } else {
		res--;
		bytes=1-bytes;
		rflag=0;
	    }
	} else {
	    for (j=6;j>=0;j--)
		if ((buf[i]&bit(j))==0) break;
	    
	    if ((j==0)||(j==6)) {
		if ((j==6)&&(bytes<0)) bytes++;
		else res--;
		continue;
	    }
	    bytes=6-j;
	    if (bytes==1) {
		// Western Languages (C2-C3)
		if (buf[i]==0xC2) rflag=1;
		else if (buf[i]==0xC3) rflag=2;
	    }
	}
	    
	if ((buf[i]==0xC0)||(buf[i]==0xC1)) {
	    if (i+1==len) break;
	    	    
	}
    }

    if (res > 0) return (rcc_autocharset_id)0;
    return (rcc_autocharset_id)1;
}

static rcc_engine western_engine = {
    "Western", NULL, NULL, &AutoengineWestern, {"UTF-8","ISO8859-1", NULL}
};

rcc_engine *rccGetInfo(const char *lang) {
    if (!lang) return NULL;

    return &western_engine;
}