/*///////////////////////////////////////////////////////////////////////
Copyright (c) 1993-1999 Electrotechnical Laboratry (ETL), AIST, MITI
Copyright (c) 1993-1999 Yutaka Sato

Permission to use, copy, and distribute this material for any purpose
and without fee is hereby granted, provided that the above copyright
notice and this permission notice appear in all copies.
ETL MAKES NO REPRESENTATIONS ABOUT THE ACCURACY OR SUITABILITY OF THIS
MATERIAL FOR ANY PURPOSE.  IT IS PROVIDED "AS IS", WITHOUT ANY EXPRESS
OR IMPLIED WARRANTIES.
/////////////////////////////////////////////////////////////////////////
Content-Type:	program/C; charset=US-ASCII
Program:	JIS.c
Author:		Yutaka Sato <ysato@etl.go.jp>
Description:
History:
	930923	extracted from codeconv.c of cosmos
//////////////////////////////////////////////////////////////////////#*/
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include "ystring.h"
#include "file.h"

int CCX_debug = 0;

/*
 *	STATUS CHANGE CODES
 */
#define ESC		'\033'
#define TO_2BCODE	'$'
#define TO_1BCODE	'('

#define TO_KANA		'\016'
#define TO_KANAOUT	'\017'

#define TO_KANJI	"\033$B"
#define TO_ASCII	"\033(B"

#define IS_SJIS_LO(lo)	((0x40<=lo)&&(lo!=0x7F)&&(lo<=0xFC))
#define IS_SJIS_HI1(hi) ((0x81<=hi)&&(hi<=0x9F))	/* 1st lev. */
#define IS_SJIS_HI2(hi) ((0xE0<=hi)&&(hi<=0xEF))	/* 2nd lev. */
#define IS_SJIS_HI(hi)	(IS_SJIS_HI1(hi)||IS_SJIS_HI2(hi))
#define IS_SJIS_2B(hi,lo,in_sjis)\
	(!IS_SJIS_LO(lo) ? 0:\
	IS_SJIS_HI1(hi) ? (in_sjis = 1):\
	in_sjis && IS_SJIS_HI2(hi))

#define IS_SJIS_1B(ch)	(0xA1 <= (ch) && (ch) <= 0xDF)

#define SJIS_DAKUTEN	0xDE
#define SJIS_HANDAKU	0xDF
static const char *SJIS_1B_TO_JIS[] = {
/* 0xA1-A7 */       "!#", "!V", "!W", "!\"","!%", "%r", "%!",
/* 0xA8-AF */ "%#", "%%", "%'", "%)", "%c", "%e", "%g", "%C",
/* 0xB0-B7 */ "!<", "%\"","%$", "%&", "%(", "%*", "%+", "%-",
/* 0xB8-BF */ "%/", "%1", "%3", "%5", "%7", "%9", "%;", "%=",
/* 0xC0-C7 */ "%?", "%A", "%D", "%F", "%H", "%J", "%K", "%L",
/* 0xC8-CF */ "%M", "%N", "%O", "%R", "%U", "%X", "%[", "%^",
/* 0xD0-D7 */ "%_", "%`", "%a", "%b", "%d", "%f", "%h", "%i",
/* 0xD8-DF */ "%j", "%k", "%l", "%m", "%o", "%s", "!+", "!,"
};
static const char *SJIS_1B_TO_DAKUTEN[] = {
/* 0xA1-A7 */       0,    0,    0,    0,    0,    0,    0,
/* 0xA8-AF */ 0,    0,    0,    0,    0,    0,    0,    0,
/* 0xB0-B7 */ 0,    0,    0,    "%t", 0,    0,    "%,", "%.",
/* 0xB8-BF */ "%0", "%2", "%4", "%6", "%8", "%:", "%<", "%>",
/* 0xC0-C7 */ "%@", "%B", "%E", "%G", "%I", 0,    0,    0,
/* 0xC8-CF */ 0,    0,    "%P", "%S", "%V", "%Y", "%\\",0,
/* 0xD0-D7 */ 0,    0,    0,    0,    0,    0,    0,    0,
/* 0xD8-DF */ 0,    0,    0,    0,    0,    0,    0,    0
};
static const char *SJIS_1B_TO_HANDAKU[] = {
/* 0xA1-A7 */       0,    0,    0,    0,    0,    0,    0,
/* 0xA8-AF */ 0,    0,    0,    0,    0,    0,    0,    0,
/* 0xB0-B7 */ 0,    0,    0,    0,    0,    0,    0,    0,
/* 0xB8-BF */ 0,    0,    0,    0,    0,    0,    0,    0,
/* 0xC0-C7 */ 0,    0,    0,    0,    0,    0,    0,    0,
/* 0xC8-CF */ 0,    0,    "%Q", "%T", "%W", "%Z", "%]", 0,
/* 0xD0-D7 */ 0,    0,    0,    0,    0,    0,    0,    0,
/* 0xD8-DF */ 0,    0,    0,    0,    0,    0,    0,    0
};

#define EUC_HANKAKU_HI	0x8E
#define IS_EUC_HANKAKU(hi,lo)	(hi==EUC_HANKAKU_HI && IS_SJIS_1B(lo))

#define IS_EUC_LOS(lo)	((0x21<=lo)&&(lo<=0x7E))	/* standard */
#define IS_EUC_LOX(lo)	((0xA1<=lo)&&(lo<=0xFE))	/* extended */
#define IS_EUC_HI(hi)	((0xA1<=hi)&&(hi<=0xFE))
#define IS_EUC(hi,lo)\
	(IS_EUC_HANKAKU(hi,lo) || \
	IS_EUC_HI(hi) && (IS_EUC_LOS(lo) || IS_EUC_LOX(lo)))


int IS_SJIS_CHAR(int ch1,int ch2,int in_sjis)
{
	if( ch1 & 0x80 ){
		if( IS_SJIS_LO(ch2) ){
			if( IS_SJIS_HI1(ch1) || in_sjis && IS_SJIS_HI2(ch1) )
				return 2;
		}
		if( in_sjis && IS_SJIS_1B(ch1) )
			return 1;
	}
	return 0;
}
int IS_SJIS_STR(unsigned char *str)
{	const unsigned char *s;
	unsigned char ch;
	int is_sjis = 0;

	s = str;
	while( ch = *s++ ){
		if( ch & 0x80 )
			if( !IS_EUC_HANKAKU(ch,*s) )
			if( IS_SJIS_2B(ch,*s,is_sjis) )
				return 1;
	}
	return 0;
}

unsigned char *
SJIS_TO_JIS1(unsigned char HI, unsigned char LO, unsigned char *JCODE)
{
	HI -= (HI <= 0x9F) ? 0x71 : 0xB1;
	HI = (HI << 1) + 1;
	if( 0x7F < LO )
		LO--;
	if( 0x9E <= LO ){
		LO -= 0x7D;
		HI++;
	}else	LO -= 0x1F;
	JCODE[0] = HI;
	JCODE[1] = LO;
	return JCODE;
}
unsigned char *
JIS_TO_SJIS1(unsigned char HI,unsigned char LO,unsigned char *SJCODE)
{
	if( HI & 1 )
		LO += 0x1F;
	else	LO += 0x7D;
	if( 0x7F <= LO )
		LO++;

	HI = (((int)HI - 0x21) >> 1) + 0x81;
	if( 0x9F < HI )
		HI += 0x40;
	SJCODE[0] = HI;
	SJCODE[1] = LO;
	return SJCODE;
}

static const char *sjis_1b_to_jis(int ch1,int ch2,int *cat)
{	unsigned char c1,c2;
	const char *js;

	c1 = (0x80 | ch1) - 0xA1;
	c2 =  0x80 | ch2;
	if( c2 == SJIS_DAKUTEN && (js = SJIS_1B_TO_DAKUTEN[c1])
	 || c2 == SJIS_HANDAKU && (js = SJIS_1B_TO_HANDAKU[c1]) ){
		*cat = 1;
		return js;
	}else{
		*cat = 0;
		return SJIS_1B_TO_JIS[c1];
	}
}
static int EUC_hankaku_TO_JIS(unsigned char **spp,unsigned char **dpp,PVStr(db))
{	const unsigned char *sp;
	unsigned char *dp; /**/
	unsigned char ch1,ch2;
	const char *js;
	int cat;

	sp = *spp;
	dp = *dpp;
	if( !IS_EUC_HANKAKU(sp[0],sp[1]) )
		return 0;

	ch1 = sp[1];
	if( sp[2] && sp[3] )
		ch2 = sp[3];
	else	ch2 = 0;
	js = sjis_1b_to_jis(ch1,ch2,&cat);
	if( cat )
		sp += 2;

	Xstrcpy(QVStr(dp,db),js);
	dp += 2; *dpp = dp;
	sp += 2; *spp = (unsigned char*)sp;
	return 1;
}

#define IS_JIS_HI(c1)	(0x20 < (c1) && (c1) < 0x7F)
#define IS_JIS_LO(c1)	(0x20 < (c1) && (c1) < 0x7F)
#define	IS_JIS7(c1,c2)	(IS_JIS_HI(c1) && IS_JIS_LO(c2))
#define SO		('N'-0x40)
#define SI		('O'-0x40)
#define NBSP		0xA0	/* non-breaking space */


#define sputc(dp,ch)	(dp?(*(char*)dp++ = ch):ch)

static int istag(PCStr(str))
{	char ch;
	const char *s;

	for( s = str; ch = *s; s++ ){
		if( ch == '>' || isspace(ch) )
			return str < s;
		if( !isalpha(ch) )
			return 0;
	}
	return 0;
}

int FIX_2022(PCStr(src),PVStr(dst),PCStr(ctype))
{	int in2B;
	const char *sp;
	char ch1,ch2;
	refQStr(dp,dst); /**/
	int bad;
	int isHTML,len,ech;

	in2B = 0;
	sp = src;
	bad = 0;

	isHTML = strcasecmp(ctype,"text/html") == 0;

	while( ch1 = *sp++ ){
		assertVStr(dst,dp+3);
		if( ch1 == ESC ){
			if( *sp == TO_2BCODE ){
				if( sp[1] == 'B' || sp[1] == '@' ){
					in2B = 1;
					sputc(dp, ch1);
					sputc(dp, *sp++);
					sputc(dp, *sp++);
					continue;
				}
			}else
			if( *sp == TO_1BCODE ){
				if( sp[1] == 'B' || sp[1] == 'J' ){
					in2B = 0;
					sputc(dp, ch1);
					sputc(dp, *sp++);
					sputc(dp, *sp++);
					continue;
				}
			}
		}

		if( in2B ){
			ch2 = sp[0];
			if( ch1 <= 0x20
			||  ch2 <= 0x20
			||  isHTML && ch1=='<' && sp[0]=='/' && istag(&sp[1])
			||  isHTML && ch2=='<' && sp[1]=='/' && istag(&sp[2]) ){
				in2B = 0;
				sputc(dp, ESC);
				sputc(dp, TO_1BCODE);
				sputc(dp, 'B');
				sputc(dp, ch1);
				bad = 1;
				continue;
			}

			if( isHTML && ch1 == '&' )
			if( len = isHTMLentity(sp,&ech) )
			if( sp[len] != 0 ){
				ch1 = ech;
				sp += len;
				bad = 1;
			}

			ch2 = *sp++;

			if( isHTML && ch2 == '&' )
			if( len = isHTMLentity(sp,&ech) )
			if( sp[len] != 0 ){
				ch2 = ech;
				sp += len;
				bad = 1;
			}

			sputc(dp, ch1);
			sputc(dp, ch2);
		}else{
			sputc(dp, ch1);
		}
	}
	sputc(dp, 0);
	return bad;
}

static int is_EUC_JP(PCStr(euc))
{	const char *cp;
	int ch1,ch2;

	for( cp = euc; ch1 = *cp & 0xFF; cp++ ){
		if( ch1 & 0x80 ){
			ch2 = cp[1] & 0xFF;
			if( !IS_EUC(ch1,ch2) )
				return 0;
			cp++;
		}
	}
	return 1;
}


#define CC_THRU		0
#define CC_ASCII	1
#define CC_JIS2B7	2
#define CC_JIS7K	3	/* with 7bit/1byte kana by ESC(I */
#define CC_SJIS		4
#define CC_EUCJP	5
#define CC_JIS7KANA	6
#define CC_JIS7ROMA	7
#define CC_UTF8		8
#define CC_GUESS	9
#define CC_ASIS		10
#define CC_NONJP	11
#define CC_EURO8	12
#define JIS2B8(code)	(code == CC_SJIS || code == CC_EUCJP)
#define JIS2B(code)	(code == CC_JIS2B7 || JIS2B8(code))

static struct {
  const	char   *cs_name;
  const	char   *cs_formname;
	short	cs_charcode;
} charsets[] = {
	{0},
	{"jis",		"ISO-2022-JP",	CC_JIS2B7	},
	{"x-euc-jp",	"x-euc-jp",	CC_EUCJP	},
	{"euc",		"EUC-JP",	CC_EUCJP	},
	{"x-sjis",	"x-sjis",	CC_SJIS		},
	{"sjis",	"Shift_JIS",	CC_SJIS		},
	{"utf8",	"UTF-8",	CC_UTF8		},
	{0},
}; 

typedef struct _CCX {
	short	cc_size;
	short	cc_OUT; /* target charcode of conversion */
	short	cc_out;	/* current output charcode */
	short	cc_outx; /* chaset-name index in chasets[] */
	short	cc_indef; /* default input charcode */
	short	cc_in;	/* current input charcode */
	short	cc_previn; /* previous non-ASCII input charcode */
	char	cc_symconv;
	char	cc_inswitch[16]; /**//* detected input character type */
unsigned char	cc_pinb[8];
	int	cc_pinx;
	int	cc_id;
	int	cc_nonASCII;
	short	cc_sjis;
	short	cc_euc;
	short	cc_nonJP;
	short	cc_nonjp8;
unsigned char	cc_thru8[4]; /* total size of CCX must be <= 64 */
} CCX;
#define cc_UTF8		cc_inswitch[CC_UTF8]
#define cc_SJIS		cc_inswitch[CC_SJIS]
#define cc_JIS7		cc_inswitch[CC_JIS2B7]
#define cc_EUCJP	cc_inswitch[CC_EUCJP]

#define SCtoASCII	0x04 /* multibyte alnum/symbol into to ASCII */
#define SCtoJIS2B	0x08
#define SCtoLF		0x10
#define SCtoCRLF	0x20
#define SCtoIGNBAD	0x40 /* ignore bad JIS sequence */
#define IGN_bad(ccx)	(ccx->cc_symconv & SCtoIGNBAD)

#define setccin(ccx,cc)	{ \
	if( ccx->cc_in != ccx->cc_previn && JIS2B(ccx->cc_in) )\
		ccx->cc_previn = ccx->cc_in; \
	ccx->cc_in = cc; \
	ccx->cc_nonASCII += (cc != CC_ASCII); \
	ccx->cc_inswitch[cc] = 1; \
}

#define pushpending(ccx,ch) (ccx->cc_pinb[ccx->cc_pinx++] = ch)
#define EOB	-1
#define CHn(n) \
	((pilen <= pix+n) ? EOB : \
	(pix+n < pin) ? ccx->cc_pinb[pix+n] : cinb[pix+n-pin])
#define CH1 CHn(0)
#define CH2 CHn(0)
#define CH3 CHn(1)
#define CH4 CHn(2)

#define inNonASCII(ccx,code) \
	(  ccx->cc_in == code \
	|| ccx->cc_in == CC_ASCII && ccx->cc_previn == code )

#define CC_CHECKLEN	8
#define CC_BEEUC	4

void CCXcounts(CCX *ccx)
{
	fprintf(stderr,"UTF8[%d]",ccx->cc_UTF8);
	fprintf(stderr,"JIS7[%d]",ccx->cc_JIS7);
	fprintf(stderr,"SJIS[%d/%d]",ccx->cc_SJIS,ccx->cc_sjis);
	fprintf(stderr,"EUCJP[%d/%d]\n",ccx->cc_EUCJP,ccx->cc_euc);
}

/* distinguish SJIS from EUC */
static int guessCode(CCX *ccx, const unsigned char *cinb, int pix, int pin, int pilen)
{	int cn,ci,ch1,ch2;
	int cs = -1;
	int n2K = 0,n1K = 0,n2B = 0;
	int nJ8 = 0,nSJ = 0,nEJ = 0;
	int nL8;
	int as_sjis = 1;
	int is2B;
	int nES = 0; /* non-kanji symbols in EUC */
	int preL8 = -1;
	int isJP = ccx->cc_sjis + ccx->cc_euc;

	nL8 = ccx->cc_nonJP; /* bias for tie break */
	if( ccx->cc_in == CC_EURO8 && 0 < nL8 ){
		return CC_EURO8;
	}
	if( 5 <= nL8 )
		return CC_NONJP;
	if( ccx->cc_in == CC_SJIS || ccx->cc_in == CC_EUCJP )
		isJP = 1;

	if( ccx->cc_nonASCII == 0 )
		cn = 1024;
	else	cn = 128;

	for( ci = 0; ci < cn; ){
		ch1 = CHn(ci);
		ch2 = 0;
		ci++;
		if( ch1 == EOB )
			break;
		if( ch1 == 033 ){
			if( nJ8 == 0 ){
				cs = CC_JIS2B7;
				break;
			}
		}
		if( ch1 == '\r' || ch1 == '\n' ){
			if( 0 < nJ8 && ccx->cc_nonASCII != 0 )
				break;
		}

		if( (ch1 & 0x80) == 0 )
			continue;

		ch2 = CHn(ci);
		ci++;
		if( ch2 == EOB )
			break;

		is2B = 0;
		if( nJ8 == nEJ ){
			if( IS_EUC(ch1,ch2) ){
				is2B = 1;
				nEJ++;
				if( ch2 & 0x80 )
					n2B++;
				if( IS_EUC_HANKAKU(ch1,ch2) )
					n2K++;
				if( ch1 == 0xA3 || ch1 == 0xA4 || ch1 == 0xA5 )
					nES++;
			}
		}
		if( nJ8 == nSJ ){
			if( IS_SJIS_2B(ch1,ch2,as_sjis) ){
				is2B = 1;
				nSJ++;
				if( ch2 & 0x80 )
				n2B++;
			}
			if( IS_SJIS_1B(ch1) ){
				nSJ++;
				n1K++;
				if( !is2B )
					ci--;
			}
		}
		if( n2B == 0 )
		if( isJP == 0 )
		{
			/* distinguish from SJIS where 8bit char. appear closely */
			if( preL8 < 0 || 2 < (ci-preL8) ){
				preL8 = ci;
				nL8++;
			}
		}
		nJ8++;

		if( 0 < n2B )
		if( nSJ != nEJ || nSJ < nJ8 || CC_CHECKLEN <= nJ8 )
			break;
	}
	if( 0 < nJ8 ){
		if( n2B == 0 && nJ8 <= nL8 && nSJ <= nL8 && nEJ <= nL8 ){
/*
 fprintf(stderr,"#### EURO8? nJ8=%d n2B=%d nSJ=%d nEJ=%d nL8=%d %d/%d\n",
nJ8,n2B,nSJ,nEJ,nL8,ci,cn);
*/
			cs = CC_NONJP;
		}else
		if( nSJ == nEJ ){
			if( inNonASCII(ccx,CC_SJIS) ) cs = CC_SJIS; else
			if( inNonASCII(ccx,CC_EUCJP) ) cs = CC_EUCJP; else
			if( nJ8 == nSJ ){
				if( ccx->cc_indef == CC_EUCJP
				 || ccx->cc_indef == CC_SJIS )
					cs = ccx->cc_indef;
				else
				if( n1K == nES )
					cs = CC_EUCJP;
				else
				/* long indistinguish SJIS is not likely to be */
				if( CC_BEEUC < nJ8 )
					cs = CC_EUCJP;
				else	cs = CC_SJIS;
			}
		}
		if( cs == -1 ){
			if( nJ8 == nSJ ){ cs = CC_SJIS; }else
			if( nJ8 == nEJ ){ cs = CC_EUCJP; }
		}
	}

	if( CCX_debug )
	fprintf(stderr,"#### %3d %3d <%02X %02X> J%d S%d(%d) E%d(%d)%d [%s]\n",
		ccx->cc_nonASCII,ci,ch1,ch2,
		nJ8, nSJ,n1K, nEJ,n2K,nES,
		cs==CC_SJIS?"SJIS":(cs==CC_EUCJP?"EUC":"?"));

/*
if( ccx->cc_nonJP && (cs == CC_SJIS|cs == CC_EUCJP) )
 fprintf(stderr,"## MISSED GUESS-A nJ8=%d n2B=%d nSJ=%d nEJ=%d nL8=%d %d\n",
nJ8,n2B,nSJ,nEJ,nL8,ci);
if( isJP && cs == CC_NONJP )
 fprintf(stderr,"## MISSED GUESS-B nJ8=%d n2B=%d nSJ=%d nEJ=%d nL8=%d %d\n",
nJ8,n2B,nSJ,nEJ,nL8,ci);
*/

	if( cs == CC_SJIS )
		ccx->cc_sjis++;
	if( cs == CC_EUCJP )
		ccx->cc_euc++;
	if( cs == CC_NONJP )
		ccx->cc_nonJP++;

	return cs;
}
static int is_SJIS(CCX *ccx,const unsigned char *cinb,int pix,int pin,int pilen)
{	int cs;

	cs = guessCode(ccx,cinb,pix,pin,pilen);
	return cs == CC_SJIS;
}

static int UTF8toLocal(int ch1,int ch2,int ch3,int charset,unsigned char *op,const unsigned char *ox);
int JIS_TO_ASCII(int chset,int ch1,int ch2,int ch3)
{	int chA;
	unsigned char j2[3];

	chA = 0;
	if( chset == CC_UTF8 ){
		if( UTF8toLocal(ch1,ch2,ch3,CC_EUCJP,j2,j2+1) == 2 ){
			ch1 = j2[0];
			ch2 = j2[1];
			chset = CC_EUCJP;
		}
	}
	if( chset == CC_EUCJP ){
		ch1 = ch1 & 0x7F;
		ch2 = ch2 & 0x7F;
	}else
	if( chset == CC_SJIS ){
		SJIS_TO_JIS1(ch1,ch2,j2);
		ch1 = j2[0];
		ch2 = j2[1];
	}
	switch( ch1 ){
		case '!':
			switch( ch2 ){
				case '!': chA = ' '; break;
				case '$': chA = ','; break;
				case '%': chA = '.'; break;
				case '#': chA = '.'; break;
				case '\'': chA = ':'; break;
				case '(': chA = ';'; break;
				case ')': chA = '?'; break;
				case '-': chA = '\''; break;
				case '.': chA = '\''; break;
				case '*': chA = '!'; break;
				case '0': chA = '^'; break;
				case '1': chA = '~'; break;
				case '2': chA = '_'; break;
				case '=': chA = '-'; break;
				case '?': chA = '/'; break;
				case '@': chA = '\\'; break;
				case 'A': chA = '~'; break;
				case 'C': chA = '|'; break;
				case 'F': chA = '`'; break;
				case 'G': chA = '\''; break;
				case 'H': chA = '"'; break;
				case 'I': chA = '"'; break;
				case 'J': chA = '('; break;
				case 'K': chA = ')'; break;
				case 'N': chA = '['; break;
				case 'O': chA = ']'; break;
				case 'P': chA = '{'; break;
				case 'Q': chA = '}'; break;
				case 'R': chA = '<'; break;
				case 'S': chA = '>'; break;
				case '\\': chA = '+'; break;
				case 'Z': chA = '['; break;
				case '[': chA = ']'; break;
				case ']': chA = '-'; break;
				case 'a': chA = '='; break;
				case 'c': chA = '<'; break;
				case 'd': chA = '>'; break;
				case 'o': chA = '\\'; break;
				case 'p': chA = '$'; break;
				case 's': chA = '%'; break;
				case 't': chA = '#'; break;
				case 'u': chA = '&'; break;
				case 'v': chA = '*'; break;
				case 'w': chA = '@'; break;
			}
			break;
		case '#':
			if( isalnum(ch2) )
				chA = ch2;
			break;
	}
	return chA;
}

int CCXwithSJIS(CCX *ccx)
{
	return ccx->cc_SJIS;
}
int CCXwithJP(CCX *ccx)
{
	return ccx->cc_SJIS | ccx->cc_EUCJP | ccx->cc_JIS7 | ccx->cc_UTF8;
}
void CCXthru8(CCX *ccx,PCStr(thru8))
{	int ti,tch;

	for(ti = 0; ti < sizeof(ccx->cc_thru8) && (tch = thru8[ti]); ti++)
		ccx->cc_thru8[ti] = tch;
	if( ti < sizeof(ccx->cc_thru8) )
		ccx->cc_thru8[ti] = 0;
}
static int isthru8(CCX *ccx, unsigned char ch)
{	int ti,tch;
	for(ti = 0; ti < sizeof(ccx->cc_thru8) && (tch = ccx->cc_thru8[ti]); ti++ )
		if( tch == ch )
			return ch;
	return 0;
}
void CCXclear(CCX *ccx)
{
	bzero(ccx,ccx->cc_size);
}
int CCXactive(CCX *ccx)
{
	return ccx->cc_OUT != CC_THRU;
}
int CCXguessing(CCX *ccx)
{
	return ccx->cc_OUT == CC_GUESS;
}
int CCXoutcharset(CCX *ccx,const char **xcode)
{	const char *code = 0;

	switch( ccx->cc_OUT ){
		case CC_ASIS:	code = ""; break;
		case CC_ASCII:	code = "US-ASCII"; break;
		case CC_UTF8:	code = "utf8"; break;
		case CC_JIS2B7:	code = "iso-2022-jp"; break;
		case CC_SJIS:	code = "Shift_JIS"; break;
		case CC_EUCJP:	code = "EUC-JP"; break;
		case CC_GUESS:	code = "guess"; break;
	}
	if( xcode )
		*xcode = code;
	if( code != 0 )
		return 1 | ccx->cc_symconv;
	else	return 0;
}
const char *CCXcharset(CCX *ccx)
{
	if( ccx->cc_outx )
		return charsets[ccx->cc_outx].cs_formname;
	else	return 0;
}
const char *CCXident(CCX *ccx)
{
	if( ccx->cc_UTF8 ) return "UTF-8";
	if( ccx->cc_euc <= ccx->cc_sjis )
	if( ccx->cc_SJIS ) return "Shift_JIS";
	if( ccx->cc_EUCJP) return "EUC-JP";
	if( ccx->cc_JIS7 ) return "ISO-2022-JP";
	return "US-ASCII";
}
static int scanFlags(PCStr(ccn),const char **nccn)
{	int flag = 0;

	for(; *ccn; ccn += 2 ){
		if( strneq(ccn,"a-",2) ) flag |= SCtoASCII; else
		if( strneq(ccn,"b-",2) ) flag |= SCtoIGNBAD; else
		if( strneq(ccn,"A-",2) ) flag |= SCtoJIS2B; else
		if( strneq(ccn,"r+",2) ) flag |= SCtoCRLF; else
		if( strneq(ccn,"r-",2) ) flag |= SCtoLF; else
		break;
	}
	if( nccn )
		*nccn = ccn;
	return flag;
}
static int ccx_codex(PCStr(ccn),int *cxp)
{	int cx,charcode;
	const char *name;

	scanFlags(ccn,&ccn);

	charcode = -1;
	for( cx = 1; name = charsets[cx].cs_name; cx++ ){
		if( strcaseeq(ccn,name)
		 || strcaseeq(ccn,charsets[cx].cs_formname) ){
			charcode = charsets[cx].cs_charcode;
			break;
		}
	}
	if( charcode == -1 ){
		if( strcaseeq(ccn,"guess") )
			    charcode = CC_GUESS;
	}
	if( charcode == -1 ){
		cx = 0;
		if( strncaseeq(ccn,"koi8",4) ){
		}else
		switch( *ccn ){
			case 0:		    charcode = CC_ASIS;   break;
			case 'a':           charcode = CC_ASCII;  break;
			case 'u':           charcode = CC_UTF8;   break;
			case 'j': case 'J': charcode = CC_JIS2B7; break;
			case 'k': case 'K': charcode = CC_JIS7K;  break;
			case 's': case 'S': charcode = CC_SJIS;   break;
				  case 'U':
			case 'e': case 'E': charcode = CC_EUCJP;  break;
			case 't': case 'T': charcode = CC_THRU;   break;
			case '+':	    charcode = CC_GUESS;  break;
			default: return -1;
		}
	}
	*cxp = cx;
	return charcode;
}
int CCXcharsetcmp(PCStr(cs1),PCStr(cs2))
{	int Cs1,Cs2,cx;

	Cs1 = ccx_codex(cs1,&cx);
	Cs2 = ccx_codex(cs2,&cx);
	return Cs1 - Cs2;
}
static char incode[64]; /**/
void CCX_setincode(CCX *ccx,PCStr(ccn))
{	int charcode,cx;

	XStrncpy(ZVStr(incode,sizeof(incode)),ccn,sizeof(incode));
	charcode = ccx_codex(ccn,&cx);
	if( 0 <= charcode ){
		if( charcode == CC_JIS2B7 ){
			/* don't set current mode to be in 2B7 */
		}else{
		ccx->cc_in = charcode;
		}
		ccx->cc_previn = charcode;
		ccx->cc_nonASCII += (charcode != CC_ASCII);
	}
	else{
		/* not to be converted */
		ccx->cc_nonJP = 10;
		if( strncasecmp(ccn,"iso-8859",8) == 0
		 || strncasecmp(ccn,"koi8",4) == 0
		){
			ccx->cc_in = CC_EURO8;
			ccx->cc_previn = CC_EURO8;
		}else{
			ccx->cc_in = CC_NONJP;
			ccx->cc_previn = CC_NONJP;
		}
	}
}
static int CCXid;
int CCXcreate(PCStr(from),PCStr(to),CCX *ccx)
{	int charcode,icc;
	int cx,icx;

	charcode = ccx_codex(to,&cx);
	if( charcode < 0 )
		return 0;

	incode[0] = 0;
	bzero(ccx,sizeof(CCX));
	ccx->cc_id = ++CCXid;
	ccx->cc_symconv = scanFlags(to,NULL);
	ccx->cc_size = sizeof(CCX);
	ccx->cc_OUT = charcode;
	ccx->cc_out = CC_ASCII;
	ccx->cc_outx = cx;
	icc = ccx_codex(from,&icx);
	if( icc < 0 )
		icc = CC_ASCII;
	ccx->cc_indef = icc;
	ccx->cc_in = icc;
	ccx->cc_previn = icc;

	ccx->cc_nonASCII = 0;
	return sizeof(CCX);
}
CCX *CCXnew(PCStr(from),PCStr(to))
{	CCX ccxbuf,*ccx;
	int size;

	if( size = CCXcreate(from,to,&ccxbuf) ){
		ccx = (CCX*)malloc(size);
		bcopy(&ccxbuf,ccx,size);
		return ccx;
	}else	return NULL;
}

#define toJIS7(ccx,op) {\
	if( ccx->cc_out != CC_JIS2B7 ){ \
		*op++ = 033; \
		*op++ = '$'; \
		*op++ = 'B'; \
		ccx->cc_out = CC_JIS2B7; \
	}}

#define toJIS7K(ccx,op) {\
	if( ccx->cc_out != CC_JIS7K ){ \
		*op++ = 033; \
		*op++ = '('; \
		*op++ = 'I'; \
		ccx->cc_out = CC_JIS7K; \
	}}

#define toASCII(ccx,op) { \
	if( ccx->cc_out == CC_JIS2B7 || ccx->cc_out == CC_JIS7K ) \
	if( !JIS2B8(ccx->cc_OUT) ){ \
		*op++ = 033; \
		*op++ = '('; \
		*op++ = 'B'; \
	} \
	ccx->cc_out = CC_ASCII; }

#define IS_UTF8_CONT(ch)	((ch & 0xC0) == 0x80)

static int EUCtoUTF8(int euc,unsigned char *us,const unsigned char *ux);

int CCXexec(CCX *ccx,PCStr(scinb),int ilen,PVStr(sout),int osiz)
{	const unsigned char *cinb = (unsigned char *)scinb;
	unsigned char *out = (unsigned char *)sout; /**/
	int ch1,ch2,ch3,ch4;
	int pch;
	const unsigned char *pinb;
	unsigned char *op; /**/
	const unsigned char *ox;
	int pilen,pix,pin;
	int insjis;
	const char *js;
	int cat;
	int codesw;
	int outlen;
	unsigned char *outtop; /**/
	unsigned char outbuf[2048]; /**/
	int chA;
	int badout = 0;

	if( out == cinb ){
		if( osiz <= sizeof(outbuf) )
			outtop = outbuf;
		else	outtop = (unsigned char*)malloc(osiz); 
	}else	outtop = out;
	op = outtop;
	ox = outtop + (osiz - 1);

	pin = ccx->cc_pinx; ccx->cc_pinx = 0;
	pinb = ccx->cc_pinb;
	pilen = pin + ilen;
	pix = 0;
	pch = -1;
	ch1 = -1;

	while( pix < pilen && op < ox ){
		pch = ch1;
		ch1 = CH1;
		pix++;
		ch2 = CH2;

		if( ccx->cc_symconv & SCtoLF ){
			if( ch1 == '\r' && ch2 == '\n' ){
				continue;
			}
		}
		if( ccx->cc_symconv & SCtoCRLF ){
			if( pch != '\r' && ch1 == '\n' ){
				*op++ = '\r';
			}
		}

		if( ch1 == 033 ){
			/* single ESC from keyboard input must be passed
			 * thru ...  */
			if( ch2 == EOB /* && not buffer full ... */ ){
				pushpending(ccx,ch1);
				break;
			}
			if( ch2 == '$' || ch2 == '(' ){
				ch3 = CH3;
				if( ch3 == EOB ){
					pushpending(ccx,ch1);
					pushpending(ccx,ch2);
					break;
				}

				codesw = 0;
				if( ch2 == '$' ){
					if( ch3 == 'B' || ch3 == '@' )
						codesw = CC_JIS2B7;
				}else
				if( ch2 == '(' ){
					if( ch3 == 'B' ){
						codesw = CC_ASCII;
					}else
					if( ch3 == 'J' ){
						codesw = CC_ASCII;
						/* CC_JIS7ROMA */
					}else
					if( ch3 == 'I' ){
						codesw = CC_JIS7KANA;
					}
				}
				if( codesw ){
					setccin(ccx,codesw);
					if( JIS2B8(ccx->cc_OUT)
					 || ccx->cc_OUT == CC_UTF8
					 || ccx->cc_OUT == CC_JIS2B7
					 && codesw == CC_JIS7KANA
					){
						pix += 2;
						continue;
					}
					else
					if( ccx->cc_symconv & SCtoASCII ){
						pix += 2;
						continue;
					}
				}
			}
		}

		if( ccx->cc_in == CC_JIS7KANA )
		if( (ch1 & 0x80) == 0 && IS_SJIS_1B(ch1 | 0x80) ){
			switch( ccx->cc_OUT ){
			    case CC_JIS7K:
				*op++ = ch1;
				break;

			    case CC_JIS2B7:
				toJIS7(ccx,op);
				js = sjis_1b_to_jis(ch1,ch2,&cat);
				*op++ = js[0];
				*op++ = js[1];
				if( cat ) pix++;
				break;

			    case CC_EUCJP:
				*op++ = EUC_HANKAKU_HI;
				*op++ = 0x80 | ch1;
				break;

			    default:
				*op++ = 0x80 | ch1;
				break;
			}
			continue;
		}

		if( ccx->cc_in == CC_JIS2B7 && IS_JIS_HI(ch1) ){
			if( ch2 == EOB ){
				pushpending(ccx,ch1);
				break;
			}
			if( IS_JIS7(ch1,ch2) ){
				if( (ccx->cc_symconv & SCtoASCII)
				 && (chA = JIS_TO_ASCII(CC_JIS2B7,ch1,ch2,0)) )
				{
					toASCII(ccx,op);
					*op++ = chA;
				}else
				switch( ccx->cc_OUT ){
					case CC_SJIS:
						JIS_TO_SJIS1(ch1,ch2,op);
						op += 2;
						break;
					case CC_EUCJP:
						*op++ = 0x80 | ch1;
						*op++ = 0x80 | ch2;
						break;
					case CC_UTF8:
						op += EUCtoUTF8((ch1<<8)|ch2,op,ox);
						break;
					default:
						toJIS7(ccx,op);
						*op++ = ch1;
						*op++ = ch2;
						break;
				}
				pix++;
				continue;
			}
		}

		if( !ccx->cc_SJIS && !ccx->cc_EUCJP )
		if( (ch1&0xF0) == 0xE0 ){
			int isUTF8 = 0,insch = 0;

			ch3 = CH3;
			ch4 = CH4;
			if( ccx->cc_UTF8 )
			if( ch2 == EOB || ch3 == EOB
			 || ch4 == EOB && (ch2 == '\n' || ch3 == '\n') ){
				pushpending(ccx,ch1);
				if( ch2 != EOB ) pushpending(ccx,ch2);
				if( ch3 != EOB ) pushpending(ccx,ch3);
				break;
			}

			if( ch2 == '\n'
			 && ch3 != EOB && IS_UTF8_CONT(ch3)
			 && ch4 != EOB && IS_UTF8_CONT(ch4)
			){
				syslog_DEBUG("##CCX UTF8-RECOVER[%x %x %x %x]\n",
					ch1,ch2,ch3,ch4);
				ch2 = ch3;
				ch3 = ch4;
				insch = CH2;
				isUTF8 = 1;
			}else
			if( ch3 == '\n'
			 && ch2 != EOB && IS_UTF8_CONT(ch2)
			 && ch4 != EOB && IS_UTF8_CONT(ch4)
			){
				syslog_DEBUG("##CCX UTF8-RECOVER[%x %x %x %x]\n",
					ch1,ch2,ch3,ch4);
				ch3 = ch4;
				insch = CH3;
				isUTF8 = 1;
			}else
			if( ch2 != EOB && IS_UTF8_CONT(ch2) )
			if( ch3 != EOB && IS_UTF8_CONT(ch3) )
			if( ch4 == EOB || (ch4&0x80)==0 || (ch4&0xF0)==0xE0 ){
				isUTF8 = 1;
			}
			if( isUTF8 ){
			    setccin(ccx,CC_UTF8);

			    if( (ccx->cc_symconv & SCtoASCII)
			    && (chA=JIS_TO_ASCII(CC_UTF8,ch1,ch2,ch3)) ){
				toASCII(ccx,op);
				*op++ = chA;
			    }else
			    switch( ccx->cc_OUT ){
				case CC_JIS2B7:
				    toJIS7(ccx,op);
				    op += UTF8toLocal(ch1,ch2,ch3,CC_JIS2B7,op,ox);
				    break;
				case CC_EUCJP:
				    op += UTF8toLocal(ch1,ch2,ch3,CC_EUCJP,op,ox);
				    break;
				case CC_SJIS:
				    op += UTF8toLocal(ch1,ch2,ch3,CC_SJIS,op,ox);
				    break;
				default:
				    *op++ = ch1;
				    *op++ = ch2;
				    *op++ = ch3;
				    break;
			    }
			    pix += 2;
			    if( insch ){
				*op++ = insch;
				pix += 1;
			    }
			    continue;
			}
		}

		if( IS_SJIS_HI(ch1) || IS_EUC_HI(ch1) ){
			if( ch2 == EOB ){
				pushpending(ccx,ch1);
				break;
			}

			if( IS_SJIS_1B(ch1) )
			if( ccx->cc_in != CC_EUCJP )
			if( inNonASCII(ccx,CC_SJIS)
			 || is_SJIS(ccx,cinb,pix-1,pin,pilen) ){
				setccin(ccx,CC_SJIS);

				if( (ccx->cc_symconv & SCtoASCII)
				 && (chA = JIS_TO_ASCII(CC_SJIS,ch1,ch2,0)) )
				{
					toASCII(ccx,op);
					*op++ = chA;
				}
				else
				switch( ccx->cc_OUT ){
				    case CC_JIS7K:
					toJIS7K(ccx,op);
					*op++ = 0x7F & ch1;
					break;
				    case CC_JIS2B7:
					toJIS7(ccx,op);
					js = sjis_1b_to_jis(ch1,ch2,&cat);
					*op++ = js[0];
					*op++ = js[1];
					if( cat ) pix++;
					break;

				    case CC_EUCJP:
					*op++ = EUC_HANKAKU_HI;
					*op++ = ch1;
					break;

				    default:
					*op++ = ch1;
					break;
				}
				continue;
			}

			if( IS_EUC_HANKAKU(ch1,ch2) )
			if( inNonASCII(ccx,CC_EUCJP)
			 || !is_SJIS(ccx,cinb,pix-1,pin,pilen) ){
				setccin(ccx,CC_EUCJP);
				ch3 = CH3;
				ch4 = CH4;
				pix++;

				switch( ccx->cc_OUT ){
				    case CC_SJIS:
					*op++ = ch2;
					break;

				    case CC_JIS7K:
					toJIS7K(ccx,op);
					*op++ = 0x7F & ch2;
					break;

				    case CC_JIS2B7:
					ch1 = ch2;
					if( IS_EUC_HANKAKU(ch3,ch4) )
						ch2 = ch4;
					else	ch2 = -1;

					toJIS7(ccx,op);
					js = sjis_1b_to_jis(ch1,ch2,&cat);
					*op++ = js[0];
					*op++ = js[1];
					if( cat ) pix += 2;
					break;

				    default:
					*op++ = ch1;
					*op++ = ch2;
					break;
				}
				continue;
			}

			if( IS_EUC(ch1,ch2) )
			if( inNonASCII(ccx,CC_EUCJP)
/*
			 || !is_SJIS(ccx,cinb,pix-1,pin,pilen) ){
}
*/
			 || guessCode(ccx,cinb,pix-1,pin,pilen)==CC_EUCJP ){
				setccin(ccx,CC_EUCJP);
				pix++;

				if( (ccx->cc_symconv & SCtoASCII)
				 && (chA = JIS_TO_ASCII(CC_EUCJP,ch1,ch2,0)) )
				{
					toASCII(ccx,op);
					*op++ = chA;
				}
				else
				switch( ccx->cc_OUT ){
				    case CC_JIS2B7:
				    case CC_JIS7K:
					toJIS7(ccx,op);
					*op++ = 0x7F & ch1;
					*op++ = 0x7F & ch2;
					break;

				    case CC_SJIS:
					JIS_TO_SJIS1(0x7F&ch1,0x7F&ch2,op);
					op += 2;
					break;

				    case CC_UTF8:
					op += EUCtoUTF8((ch1<<8)|ch2,op,ox);
					break;

				    default:
					*op++ = ch1;
					*op++ = ch2;
					break;
				}
				continue;
			}

			insjis = inNonASCII(ccx,CC_SJIS);
			if( !insjis && ccx->cc_nonASCII == 0 )
				insjis = is_SJIS(ccx,cinb,pix-1,pin,pilen);

			if( IS_SJIS_2B(ch1,ch2,insjis) )
			if( inNonASCII(ccx,CC_SJIS)
			 || is_SJIS(ccx,cinb,pix-1,pin,pilen) ){
				setccin(ccx,CC_SJIS);
				pix++;

				if( (ccx->cc_symconv & SCtoASCII)
				 && (chA = JIS_TO_ASCII(CC_SJIS,ch1,ch2,0))){
					toASCII(ccx,op);
					*op++ = chA;
				}else
				switch( ccx->cc_OUT ){
				    case CC_JIS2B7:
				    case CC_JIS7K:
					toJIS7(ccx,op);
					SJIS_TO_JIS1(ch1,ch2,op);
					op += 2;
					break;

				    case CC_EUCJP:
					SJIS_TO_JIS1(ch1,ch2,op);
					*op++ |= 0x80;
					*op++ |= 0x80;
					break;

				    case CC_UTF8:
					SJIS_TO_JIS1(ch1,ch2,op);
					op += EUCtoUTF8((op[0]<<8)|op[1],op,ox);
					break;

				    default:
					*op++ = ch1;
					*op++ = ch2;
					break;
				}
				continue;
			}
		}
		if( ccx->cc_in != CC_EURO8 )
		setccin(ccx,CC_ASCII);
		toASCII(ccx,op);

		if( IGN_bad(ccx) && (ch1&0x80) ){
			if( 0 < ccx->cc_UTF8 && (ch1&0xF0) != 0xE0 ){
			}else{

if( ccx->cc_nonJP<10 && CCXwithJP(ccx) ){
	if( ccx->cc_nonjp8 == 0 ){
		/*
		fprintf(stderr,
			"---- [%s] BadJP8 %X %o %o nJ=%d utf=%d[%X]\n",
			incode,0xFF&ch1,0xFF&ch1,0xFF&ch2,
			ccx->cc_nonJP,ccx->cc_UTF8,(ch1&0xF0));
		*/
	}
	if( 16 < ox-op ){
		Xsprintf(ZVStr((char*)op,ox-op),"{BadJP8:%X,%X}",0xFF&ch1,0xFF&ch2);
		op += strlen((char*)op);
	}
}
				if( badout == 0 && isthru8(ccx,ch1) ){
					*op++ = ch1;
				}else{
					badout++;
				}
			}
			ccx->cc_nonjp8++;
		}else{
		*op++ = ch1;
		}
	}
	if( ilen == 0 )
		toASCII(ccx,op);
	*op = 0;

	outlen = op - outtop;
	if( out == cinb ){
		bcopy(outtop,out,outlen+1);
		if( outtop != outbuf )
			free(outtop);
	}else	outlen = op - out;
	return outlen;
}

void strip_ISO2022JP(PVStr(str))
{	char ch;
	const char *sp;
	refQStr(dp,str); /**/

	for( sp = str; ch = *sp++; ){
		if( ch == 033 )
		if( *sp == TO_2BCODE && (sp[1]=='@' || sp[1]=='B')
		 || *sp == TO_1BCODE && (sp[1]=='J' || sp[1]=='B')
		){
			sp += 2;
			continue;
		}
		setVStrPtrInc(dp,ch);
	}
	setVStrEnd(dp,0);
}


int utf8n;
int utf8err;

int fixUTF8(unsigned char *us,int leng)
{	int cx,uc4,ucs;
	int error;

	error = 0;
	for( cx = 1; cx < leng; cx++ ){
		uc4 = us[cx];
		if( (uc4&0xC0) != 0x80 ){
			if( uc4 == '\n' && (us[cx+1]&0xC0) == 0x80 ){
syslog_DEBUG("{#CCX:UTF-8:FIXED:%d/%d:%x,%x#}\n",cx+1,leng,uc4,us[cx+1]);
				us[cx] = us[cx+1];
				us[cx+1] = uc4;
			}else{
syslog_DEBUG("{#CCX:UTF-8:ERROR:%d/%d:%x,%x#}\n",cx+1,leng,uc4,us[cx+1]);
				error++;
				break;
			}
		}
	}
	return error;
}
static unsigned int fromUTF8(unsigned char *us,int *lengp,char **spp)
{	unsigned char uc0,ucx;
	int leng,mask,lx;
	unsigned int uc4;
	int len;
	CStr(buf,64);

	uc4 = 0;
	uc0 = *us;
	len = *lengp;
	if( (uc0 & 0x80) == 0 ){ leng = 1; }else
	if( (uc0 & 0x40) == 0 ){ leng = 1;
			if( spp ){
sprintf(buf,"{#CCX:UTF-8:ERROR:%d/%d:%x,%x#}",1,1,uc0,us[1]);
XStrncpy(ZVStr(*spp,len),buf,len); /**/
			utf8err++;
			*spp += strlen(*spp);
			uc0 = '?';
			}
	}else
	if( (uc0 & 0x20) == 0 ){ leng = 2; }else
	if( (uc0 & 0x10) == 0 ){ leng = 3; }else
	if( (uc0 & 0x08) == 0 ){ leng = 4; }else
	if( (uc0 & 0x04) == 0 ){ leng = 5; }else
	if( (uc0 & 0x02) == 0 ){ leng = 6; }else{
		leng = 1;
	}

	fixUTF8(us,leng);
	if( leng == 1 ){
		uc4 = uc0;
	}else{
		mask = 0xFF >> (leng+1);
		uc4 = uc0 & mask;
		for( lx = 1; lx < leng; lx++ ){
			ucx = us[lx];
			if( (ucx & 0xC0) != 0x80 ){
				return uc0;
			}
			uc4 = (uc4 << 6) | (0x3F & ucx);
		}
	}
	*lengp = leng;
	return uc4;
}
int toUTF8(unsigned int uc,unsigned char *us)
{	int tag,leng,lx;

	if( uc < 0x0000080 ){ leng = 1; }else
	if( uc < 0x0000800 ){ leng = 2; }else
	if( uc < 0x0010000 ){ leng = 3; }else
	if( uc < 0x0200000 ){ leng = 4; }else
	if( uc < 0x4000000 ){ leng = 5; }else
			      leng = 6;
	if( leng == 1 ){
		*us++ = uc;
	}else{
		for( lx = leng-1; 0 < lx; lx-- ){
			us[lx] = 0x80 | (uc & 0x3F);
			uc = uc >> 6;
		}
		tag = 0x3F << (8-leng);
		us[0] = tag | uc;
	}
	return leng;
}

#define JUMAP	"sjis-jis-ucs.ccx"
typedef struct {
unsigned short	u_l2umap[0x10000];
unsigned short	u_u2lmap[0x10000];
	int	u_init;
} UcsX;
#define L2Umap	ucx->u_l2umap
#define U2Lmap	ucx->u_u2lmap
static UcsX **UcsXtab;
UcsX *UCSx(){
	if( UcsXtab == 0 )
		UcsXtab = (UcsX**)StructAlloc(8*sizeof(UcsX*));
	if( UcsXtab[0] == 0 )
		UcsXtab[0] = NewStruct(UcsX);
	return UcsXtab[0];
}

static int loadUnicodeMappings(PCStr(jumap),UcsX *ucx,unsigned short l2ucnt[],unsigned short u2lcnt[]);

static int UcxInit(UcsX *ucx)
{	unsigned short l2ucnt[0x10000];
	unsigned short u2lcnt[0x10000];
	int loaded;

	if( ucx->u_init == 0 ){
		bzero(l2ucnt,sizeof(l2ucnt));
		bzero(u2lcnt,sizeof(u2lcnt));
		loaded = loadUnicodeMappings(JUMAP,ucx,l2ucnt,u2lcnt);
		if( 0 < loaded )
			ucx->u_init = loaded;
		else	ucx->u_init = -1;
	}
	return ucx->u_init;
}
int UCSinit(){
	UcsX *ucx = UCSx();
	return UcxInit(ucx);
}
void UCSreset(){
	UcsX *ucx = UCSx();
	ucx->u_init = 0;
}

static int UTF8toLocal(int ch1,int ch2,int ch3,int charset,unsigned char *op,const unsigned char *ox)
{	UcsX *ucx = UCSx();
	CStr(buf,64);
	int len;
	unsigned int ucs,euc;

	UcxInit(ucx);
	buf[0] = ch1;
	buf[1] = ch2;
	buf[2] = ch3;
	buf[3] = 0;
	len = sizeof(buf);
	ucs = fromUTF8((unsigned char*)buf,&len,NULL);
	euc = U2Lmap[ucs];
	if( euc  ){
		switch( charset ){
			case CC_JIS2B7:
				*op++ = 0x7F & (euc >> 8);
				*op++ = 0x7F & euc;
				len = 2;
				break;
			case CC_EUCJP:
				*op++ = 0x80 | (euc >> 8);
				*op++ = 0x80 | euc;
				len = 2;
				break;
			case CC_SJIS:
				*op++ = euc >> 8;
				*op++ = euc;
				len = 2;
				break;
			default:
				len = 0;
				break;
		}
		*op = 0;
		return len;
	}else{
		switch( charset ){
			case CC_JIS2B7:
				Xsprintf(ZVStr((char*)op,ox-op),"\".");
				break;
			default:
sprintf(buf,"{#CCX:UTF-8:NOMAP:UTF8toLocal:0x%04x:%x,%x,%x#}",ucs,ch1,ch2,ch3);
XStrncpy(ZVStr((char*)op,ox-op),(char*)buf,ox-op);
				break;
		}
		return strlen((char*)op);
	}
}
static int EUCtoUTF8(int euc,unsigned char *us,const unsigned char *ux)
{	UcsX *ucx = UCSx();
	unsigned int ucs;
	int len;
	CStr(buf,64);

	UcxInit(ucx);
	ucs = L2Umap[euc];
	if( ucs == 0 ){
		if( (euc & 0x8080) == 0 )
			ucs = L2Umap[ 0x8080 | euc];
		else	ucs = L2Umap[~0x8080 & euc];
	}
	if( ucs ){
		len = toUTF8(ucs,us);
		return len;
	}else{
sprintf(buf,"{#CCX:UTF-8:NOMAP:LocalToUTF8:%x#}",euc);
XStrncpy(ZVStr((char*)us,ux-us),buf,ux-us);
		return strlen((char*)us);
	}
	return 0;
}

static void dumpCharMapCompiled(PCStr(file),UcsX *ucx)
{	FILE *mfp;
	int lch,uch,rcc;
	CStr(buff,0x40000);

	if( mfp = fopen(file,"w") ){
		char *bp = buff; /**/
		for( lch = 0; lch < 0x10000; lch++ )
		if( uch = L2Umap[lch] ){
			*bp++ = lch >> 8; *bp++ = lch;
			*bp++ = uch >> 8; *bp++ = uch;
		}
		fwrite(buff,1,bp-buff,mfp);
		syslog_DEBUG("##CCX DUMPED %s %d\n",file,bp-buff);
		fclose(mfp);
		chmodIfShared(file,0644);
	}
}
static int loadCharMap(PCStr(file),UcsX *ucx)
{	FILE *mfp;
	CStr(xpath,1024);
	int rcc;
	unsigned int lch,uch;
	unsigned char buff[0x40000];
	const unsigned char *bp;

	if( mfp = fopen_LIBPATH(file,"r",AVStr(xpath)) ){
		rcc = fread(buff,1,sizeof(buff),mfp);
		syslog_DEBUG("##CCX LOADED %s %d\n",xpath,rcc);
		for( bp = buff; bp < &buff[rcc]; bp += 4 ){
			lch = (bp[0] << 8) | bp[1];
			uch = (bp[2] << 8) | bp[3];
			L2Umap[lch] = uch;
			U2Lmap[uch] = lch;
		}
		fclose(mfp);
		return 1;
	}
	else{
		syslog_DEBUG("##CCX FAILED LOADING %s\n",file);
	}
	return 0;
}
static void statDump(unsigned short l2umap[], unsigned short l2ucnt[])
{	int filled,unified;
	unsigned int uc4,lc4;

	unified = 0;
	for( lc4 = 0; lc4 < 0x10000; lc4++ ){
		if( 0 < l2umap[lc4] && l2umap[lc4] < 0x100 ){
/*
syslog_ERROR("##CCX UNIFIED: %x -> %x\n",lc4,l2umap[lc4]);
*/
			unified++;
		}
	}
	filled = 0;
	for( uc4 = 0; uc4 < 0x10000; uc4++ ){
		if( 1 < l2ucnt[uc4] ){
/*
syslog_ERROR("##CCX DUPLICATE: %x (%d)\n",uc4,l2ucnt[uc4]);
*/
		}
		if( l2ucnt[uc4] == 1 )
			filled++;
	}
syslog_ERROR("##CCX UNIFIED=%d FILLED=%d\n",unified,filled);
}
static int loadMapping1(FILE *fp,PCStr(file),UcsX *ucx,unsigned short l2ucnt[],unsigned short u2lcnt[])
{	CStr(line,1024);
	unsigned int lc1,lc2,uc1;

	while( fgets(line,sizeof(line),fp) ){
		if( sscanf(line,"0x%x 0x%x 0x%x",&lc1,&lc2,&uc1) == 3 ){
			lc1 = lc2 | 0x8080;
		}else
		if( sscanf(line,"0x%x 0x%x",&lc1,&uc1) != 2 )
			continue;
/*
		if( 0x80 <= uc1 )
*/
		{
			if( L2Umap[lc1] && L2Umap[lc1] != uc1 ){
				syslog_ERROR("##CCX L2U OVR: %x -> %x %x\n",
					lc1,L2Umap[lc1],uc1);
			}
			l2ucnt[lc1] += 1;
			L2Umap[lc1] = uc1;

			if( U2Lmap[uc1] && U2Lmap[uc1] != lc1 ){
				syslog_ERROR("##CCX U2L OVR: %x -> %x %x\n",
					uc1,U2Lmap[uc1],lc1);
			}
			u2lcnt[uc1] += 1;
			U2Lmap[uc1] = lc1;
		}
	}
	syslog_ERROR("##CCX Local to Unicode:\n"); statDump(L2Umap,l2ucnt);
	syslog_ERROR("##CCX Unicode to Local:\n"); statDump(U2Lmap,u2lcnt);
	syslog_ERROR("##CCX loaded: %s\n",file);
	return 1;
}
static int loadMap1(PCStr(file),UcsX *ucx,unsigned short l2ucnt[],unsigned short u2lcnt[])
{	FILE *fp;
	CStr(xpath,1024);

	if( fp = fopen_LIBPATH(file,"r",AVStr(xpath)) ){
		loadMapping1(fp,xpath,ucx,l2ucnt,u2lcnt);
		fclose(fp);
		return 1;
	}
	return 0;
}
static int loadUnicodeMappings(PCStr(jumap),UcsX *ucx,unsigned short l2ucnt[],unsigned short u2lcnt[])
{	FILE *fp,*mfp;
	const char *map;
	int loaded = 0;
	CStr(bpath,1024);
	const char *tmpdir;
	refQStr(xp,bpath); /**/

	if( isFullpath(jumap) )
		strcpy(bpath,jumap);
	else{
		tmpdir = getTMPDIR();
		if( tmpdir == 0 )
			tmpdir = "/tmp";
		sprintf(bpath,"%s/%s",tmpdir,jumap);
	}
	if( (xp = strrpbrk(bpath,"./\\")) && *xp == '.' )
		strcpy(xp,".ccb");
	else	strcat(bpath,".ccb");
	syslog_ERROR("##CCX %s %s\n",jumap,bpath);

	if( loadCharMap(bpath,ucx) ){
		loaded = 1;
		goto EXIT;
	}

	if( map = getenv("MAPUNICODE") )
		loaded += loadMap1(map,ucx,l2ucnt,u2lcnt);

	loaded += loadMap1(jumap,ucx,l2ucnt,u2lcnt);
	if( loaded == 0 )
	loaded += loadMap1("JIS0208.TXT",ucx,l2ucnt,u2lcnt);
/*	loaded += loadMap1("JIS0201.TXT",ucx,l2ucnt,u2lcnt); */

	if( loaded )
		dumpCharMapCompiled(bpath,ucx);

EXIT:
	return loaded;
}

static void scanCharMapping(FILE *in,UcsX *ucx,unsigned short l2ucnt[],unsigned short u2lcnt[])
{	CStr(line,1024);
	unsigned CStr(map1,1024);
	const char *sp;
	unsigned int uc4,lc4;
	int len;

	while( fgets(line,sizeof(line),in) ){
	    for( sp = line; sp && *sp; sp = strpbrk(sp," \t\r\n") ){
		if( isspace(*sp) )
			sp++;
		*map1 = 0;
		Xsscanf(sp,"%s",AVStr(map1));
		if( *map1 == 0 )
			break;
		if( isxdigit(map1[0]) && map1[4] == ':' ){
			map1[4] = 0;
			sscanf((char*)map1,"%x",&lc4);
			len = sizeof(map1) - 5;
			uc4 = fromUTF8(&map1[5],&len,NULL);

if( (lc4 & 0x8080) == 0 && (lc4 & 0xFF00) != 0 ) lc4 |= 0x8080;

			l2ucnt[uc4] += 1;
			if( l2ucnt[uc4] == 1 )
				L2Umap[lc4] = uc4;
			u2lcnt[lc4] += 1;
			if( u2lcnt[lc4] == 1 )
				U2Lmap[uc4] = lc4;
		}
	    }
	}
	statDump(L2Umap,l2ucnt);
}
void dumpUTF8mapping(){
	unsigned int uc4;
	for( uc4 = 0x001; uc4 < 0x080; uc4++ ){
		printf("%04x:%c\r\n",uc4,uc4);
	}
	for( uc4 = 0x080; uc4 < 0x800; uc4++ ){
		printf("%04x:%c%c\r\n",
			uc4,
			0xC0|0x1F&(uc4>>6),
			0x80|0x3F&(uc4)
		);
	}
	for( uc4 = 0x800; uc4 < 0x10000; uc4++ ){
		printf("%04x:%c%c%c\r\n",
			uc4,
			0xE0|0x0F&(uc4>>12),
			0x80|0x3F&(uc4>>6),
			0x80|0x3F&(uc4)
		);
	}
}
void dumpJIS7mapping()
{	int ch,ch1,ch2,nc;

	nc = 0;
	for( ch = 0; ch < 0x8000; ch++ ){
		ch1 = 0xFF & (ch >> 8);
		ch2 = 0xFF & ch;
		if( !IS_JIS7(ch1,ch2) )
			continue;
		if( nc != 0 && nc % 8 == 0 )
			printf("\r\n");
		nc++;
		printf("%02x%02x:\033$B%c%c\033(B ",ch1,ch2,ch1,ch2);
	}
	printf("\r\n");
}
void dumpEUCJPmapping()
{	int ch,ch1,ch2,nc;

	nc = 0;
	for( ch = 0x8080; ch < 0x10000; ch++ ){
		ch1 = 0xFF & (ch >> 8);
		ch2 = 0xFF & ch;
		if( !IS_EUC(ch1,ch2) )
			continue;
		if( nc != 0 && nc % 8 == 0 )
			printf("\r\n");
		nc++;
		printf("%02x%02x:%c%c ",ch1,ch2,ch1,ch2);
	}
	printf("\r\n");
}
void dumpCharMapping(PCStr(code))
{
	switch( *code ){
		case 'u': dumpUTF8mapping(); break;
		case 'j': dumpJIS7mapping(); break;
		case 'e': dumpEUCJPmapping(); break;
	}
}
void loadCharMapping(PCStr(code),FILE *ifp)
{	UcsX *ucx = UCSx();
	unsigned short l2ucnt[0x10000];
	unsigned short u2lcnt[0x10000];

	bzero(l2ucnt,sizeof(l2ucnt));
	bzero(u2lcnt,sizeof(u2lcnt));
/*
	scanCharMapping(ifp,ucx,L2Umap,l2ucnt,U2Lmap,u2lcnt);
*/
	scanCharMapping(ifp,ucx,L2Umap,l2ucnt);
	dumpCharMapCompiled(JUMAP,ucx);
}

void CCX_TOXX(PCStr(src),int slen,PVStr(dst),int dsiz,PCStr(ctype),PCStr(chset))
{	CCX ccx;

	CCXcreate("*",chset,&ccx);
	CCXexec(&ccx,(char*)src,slen,AVStr(dst),dsiz);
}
void CCX_TOX(PCStr(src),PVStr(dst),PCStr(ctype),PCStr(chset))
{	int len;

	len = strlen((char*)src);
	CCX_TOXX(src,len,AVStr(dst),1024+len*2,ctype,chset);
}
void TO_EUC(PCStr(any),PVStr(euc),PCStr(ctype))
{
	CCX_TOX(any,AVStr(euc),ctype,"euc");
}
void TO_JIS(PCStr(any),PVStr(jis),PCStr(ctype))
{
	CCX_TOX(any,AVStr(jis),ctype,"jis");
}
void TO_SJIS(PCStr(any),PVStr(sjis),PCStr(ctype))
{
	CCX_TOX(any,AVStr(sjis),ctype,"sjis");
}
void TO_UTF8(PCStr(any),PVStr(utf8),PCStr(ctype))
{
	CCX_TOX(any,AVStr(utf8),ctype,"utf8");
}
void TO_euc(PCStr(in),PVStr(out),int osiz)
{	CCX ccx[1]; /**/

	CCXcreate("*","euc",ccx);
	CCXexec(ccx,(char*)in,strlen(in),AVStr(out),osiz);
}
void CCXfile(PCStr(icharset),PCStr(ocharset),FILE *in,FILE *out)
{	CCX ccx[1]; /**/
	int rcc,icc,wcc,occ;
	CStr(buf,2*1024);
	CStr(xbuf,8*1024);

	CCXcreate(icharset,ocharset,ccx);
	icc = occ = 0;
	while( 0 < (rcc = fread(buf,1,sizeof(buf),in)) ){
		icc += rcc;
		wcc = CCXexec(ccx,buf,rcc,AVStr(xbuf),sizeof(xbuf));
		occ += wcc;
		fwrite(xbuf,1,wcc,out);
	}
}
